1: <?php
2: /**
3: * This file contains the string utility class.
4: *
5: * @package Core
6: * @subpackage Util
7: * @author Murat Purc <murat@purc.de>
8: * @copyright four for business AG <www.4fb.de>
9: * @license http://www.contenido.org/license/LIZENZ.txt
10: * @link http://www.4fb.de
11: * @link http://www.contenido.org
12: */
13:
14: defined('CON_FRAMEWORK') || die('Illegal call: Missing framework initialization - request aborted.');
15:
16: /**
17: * String helper class.
18: *
19: * @package Core
20: * @subpackage Util
21: */
22: class cString {
23:
24: /**
25: * Replaces a string only once.
26: *
27: * Caution: This function only takes strings as parameters, not arrays!
28: *
29: * @param string $find
30: * String to find
31: * @param string $replace
32: * String to replace
33: * @param string $subject
34: * String to process
35: * @return string
36: * Processed string
37: */
38: public static function iReplaceOnce($find, $replace, $subject) {
39: $start = strpos(strtolower($subject), strtolower($find));
40:
41: if ($start === false) {
42: return $subject;
43: }
44:
45: $end = $start + strlen($find);
46: $first = substr($subject, 0, $start);
47: $last = substr($subject, $end, strlen($subject) - $end);
48:
49: $result = $first . $replace . $last;
50:
51: return $result;
52: }
53:
54: /**
55: * Replaces a string only once, in reverse direction.
56: *
57: * Caution: This function only takes strings as parameters, not arrays!
58: *
59: * @param string $find
60: * String to find
61: * @param string $replace
62: * String to replace
63: * @param string $subject
64: * String to process
65: * @return string
66: * Processed string
67: */
68: public static function iReplaceOnceReverse($find, $replace, $subject) {
69: $start = self::posReverse(strtolower($subject), strtolower($find));
70:
71: if ($start === false) {
72: return $subject;
73: }
74:
75: $end = $start + strlen($find);
76:
77: $first = substr($subject, 0, $start);
78: $last = substr($subject, $end, strlen($subject) - $end);
79:
80: $result = $first . $replace . $last;
81:
82: return $result;
83: }
84:
85: /**
86: * Finds a string position in reverse direction.
87: *
88: * NOTE: The original strrpos-function of PHP4 only finds a single character
89: * as needle.
90: *
91: * @param string $haystack
92: * String to search in
93: * @param string $needle
94: * String to search for
95: * @param int $start [optional]
96: * Offset
97: * @return int
98: * String position
99: */
100: public static function posReverse($haystack, $needle, $start = 0) {
101: $tempPos = strpos($haystack, $needle, $start);
102:
103: if ($tempPos === false) {
104: if ($start == 0) {
105: // Needle not in string at all
106: return false;
107: } else {
108: // No more occurances found
109: return $start - strlen($needle);
110: }
111: } else {
112: // Find the next occurance
113: return self::posReverse($haystack, $needle, $tempPos + strlen($needle));
114: }
115: }
116:
117: /**
118: * Adds slashes to passed variable or array.
119: *
120: * @param string|array $value
121: * Either a string or a multi-dimensional array of values
122: * @return string|array
123: */
124: public static function addSlashes($value) {
125: $value = is_array($value) ? array_map(array('cString', 'addSlashes'), $value) : addslashes($value);
126: return $value;
127: }
128:
129: /**
130: * Removes slashes from passed variable or array.
131: *
132: * @param string|array $value
133: * Either a string or a multi-dimensional array of values
134: * @return string|array
135: */
136: public static function stripSlashes($value) {
137: $value = is_array($value) ? array_map(array('cString', 'stripSlashes'), $value) : stripslashes($value);
138: return $value;
139: }
140:
141: /**
142: * Checks if the string haystack ends with needle.
143: *
144: * @param string $haystack
145: * The string to check
146: * @param string $needle
147: * The string with which it should end
148: * @return bool
149: */
150: public static function endsWith($haystack, $needle) {
151: $length = strlen($needle);
152: if ($length == 0) {
153: return true;
154: }
155:
156: return substr($haystack, -$length) === $needle;
157: }
158:
159: /**
160: * Returns true if needle can be found in haystack.
161: *
162: * @param string $haystack
163: * String to be searched
164: * @param string $needle
165: * String to search for
166: * @return bool
167: */
168: public static function contains($haystack, $needle) {
169: return !(strpos($haystack, $needle) === false);
170: }
171:
172: /**
173: * Implementation of PHP 5.3's strstr with beforeNeedle.
174: *
175: * @param string $haystack
176: * String to be searched
177: * @param string $needle
178: * String to search for
179: * @param string $beforeNeedle [optional]
180: * If true, return everything BEFORE needle
181: * @return string
182: * @link http://php.net/manual/de/function.strstr.php
183: */
184: public static function strstr($haystack, $needle, $beforeNeedle = false) {
185: if (!$beforeNeedle) {
186: return strstr($haystack, $needle);
187: } else {
188: return strtok($haystack, $needle);
189: }
190: }
191:
192: /**
193: * This function checks if a given format is accepted by php's date function.
194: *
195: * @param string $format
196: * format according to date function specification
197: * @return bool
198: * true if format is correct, false otherwise
199: */
200: public static function validateDateFormat($format) {
201: // try to create a DateTime instance based on php's date function format specification
202: // return true if date is valid (no wrong format)
203: return false !== DateTime::createFromFormat($format, date($format, time()));
204: }
205:
206: /**
207: * Extract a number from a string.
208: *
209: * @param string $string
210: * String var by reference
211: * @return string
212: */
213: public static function extractNumber(&$string) {
214: $string = preg_replace('/[^0-9]/', '', $string);
215: return $string;
216: }
217:
218:
219: /**
220: * Returns whether a string is UTF-8 encoded or not.
221: *
222: * @param string $input
223: * @return bool
224: */
225: public static function isUtf8($input) {
226: $len = strlen($input);
227:
228: for ($i = 0; $i < $len; $i++) {
229: $char = ord($input[$i]);
230: $n = 0;
231:
232: if ($char < 0x80) {
233: // ASCII char
234: continue;
235: } else if (($char & 0xE0) === 0xC0 && $char > 0xC1) {
236: // 2 byte long char
237: $n = 1;
238: } else if (($char & 0xF0) === 0xE0) {
239: // 3 byte long char
240: $n = 2;
241: } else if (($char & 0xF8) === 0xF0 && $char < 0xF5) {
242: // 4 byte long char
243: $n = 3;
244: } else {
245: return false;
246: }
247:
248: for ($j = 0; $j < $n; $j++) {
249: $i++;
250:
251: if ($i == $len || (ord($input[$i]) & 0xC0) !== 0x80) {
252: return false;
253: }
254: }
255: }
256: return true;
257: }
258:
259:
260: /**
261: * Checks if a value is alphanumeric.
262: *
263: * @param mixed $test
264: * Value to test
265: * @param bool $umlauts [optional]
266: * Use german umlauts
267: * @return bool
268: * Value is alphanumeric
269: */
270: public static function isAlphanumeric($test, $umlauts = true) {
271: if ($umlauts == true) {
272: $match = "/^[a-z0-9ÄäÖöÜüß ]+$/i";
273: } else {
274: $match = "/^[a-z0-9 ]+$/i";
275: }
276:
277: return preg_match($match, $test);
278: }
279:
280: /**
281: * Trims a string to a given length and makes sure that all words up to
282: * $maxlen are preserved, without exceeding $maxlen.
283: *
284: * Warning: Currently, this function uses a regular ASCII-Whitespace to do
285: * the separation test. If you are using ' ' to create spaces, this
286: * function will fail.
287: *
288: * Example:
289: * $string = "This is a simple test";
290: * echo cString::trimAfterWord($string, 15);
291: *
292: * This would output "This is a", since this function respects word
293: * boundaries and doesn't operate beyond the limit given by $maxlen.
294: *
295: * @param string $string
296: * The string to operate on
297: * @param int $maxlen
298: * The maximum number of characters
299: * @return string
300: * The resulting string
301: */
302: public static function trimAfterWord($string, $maxlen) {
303: // If the string is smaller than the maximum lenght, it makes no sense to
304: // process it any further. Return it.
305: if (strlen($string) < $maxlen) {
306: return $string;
307: }
308:
309: // If the character after the $maxlen position is a space, we can return
310: // the string until $maxlen.
311: if (substr($string, $maxlen, 1) == ' ') {
312: return substr($string, 0, $maxlen);
313: }
314:
315: // Cut the string up to $maxlen so we can use strrpos (reverse str position)
316: $cutted_string = substr($string, 0, $maxlen);
317:
318: // Extract the end of the last word
319: $last_word_position = strrpos($cutted_string, ' ');
320:
321: return substr($cutted_string, 0, $last_word_position);
322: }
323:
324: /**
325: * Trims a string to a specific length.
326: *
327: * If the string is longer than $maxlen, dots are inserted ("...") right
328: * before $maxlen.
329: *
330: * Example:
331: * $string = "This is a simple test";
332: * echo cString::trimHard ($string, 15);
333: *
334: * This would output "This is a si...", since the string is longer than
335: * $maxlen and the resulting string matches 15 characters including the dots.
336: *
337: * @param string $string
338: * The string to operate on
339: * @param int $maxlen
340: * The maximum number of characters
341: * @param string $fillup [optional]
342: * @return string
343: * The resulting string
344: */
345: public static function trimHard($string, $maxlen, $fillup = '...') {
346: // If the string is smaller than the maximum lenght, it makes no sense to
347: // process it any further. Return it.
348: if (strlen($string) < $maxlen) {
349: return $string;
350: }
351:
352: // Calculate the maximum text length
353: $maximum_text_length = $maxlen - strlen($fillup);
354:
355: // If text length is over zero cut it
356: if ($maximum_text_length > 0) {
357: if (preg_match('/(*UTF8)^.{0,' . $maximum_text_length . '}/', $string, $result_array)) {
358: $cutted_string = $result_array[0];
359: } else if (preg_match('/^.{0,' . $maximum_text_length . '}/u', $string, $result_array)) {
360: $cutted_string = $result_array[0];
361: } else {
362: $cutted_string = substr($string, 0, $maximum_text_length);
363: }
364: } else {
365: $cutted_string = $string;
366: }
367:
368: // Append the fillup string
369: $cutted_string .= $fillup;
370:
371: return $cutted_string;
372: }
373:
374: /**
375: * Trims a string to a approximate length preserving sentence boundaries.
376: *
377: * The algorithm inside calculates the sentence length to the previous and
378: * next sentences. The distance to the next sentence which is smaller will
379: * be taken to trim the string to match the approximate length parameter.
380: *
381: * Example:
382: *
383: * $string = "This contains two sentences. ";
384: * $string .= "Lets play around with them. ";
385: *
386: * echo cString::trimSentence($string, 40);
387: * echo cString::trimSentence($string, 50);
388: *
389: * The first example would only output the first sentence, the second
390: * example both sentences.
391: *
392: * Explanation:
393: *
394: * To match the given max length closely, the function calculates the
395: * distance to the next and previous sentences. Using the maxlength of 40
396: * characters, the distance to the previous sentence would be 8 characters,
397: * and to the next sentence it would be 19 characters. Therefore, only the
398: * previous sentence is displayed.
399: *
400: * The second example displays the second sentence also, since the distance
401: * to the next sentence is only 9 characters, but to the previous it is 18
402: * characters.
403: *
404: * If you specify the boolean flag "$hard", the limit parameter creates a
405: * hard limit instead of calculating the distance.
406: *
407: * This function ensures that at least one sentence is returned.
408: *
409: * @param string $string
410: * The string to operate on
411: * @param int $approxlen
412: * The approximate number of characters
413: * @param bool $hard [optional]
414: * If true, use a hard limit for the number of characters
415: * @return string
416: * The resulting string
417: */
418: public static function trimSentence($string, $approxlen, $hard = false) {
419: // If the string is smaller than the maximum lenght, it makes no sense to
420: // process it any further. Return it.
421: if (strlen($string) < $approxlen) {
422: return $string;
423: }
424:
425: // Find out the start of the next sentence
426: $next_sentence_start = strpos($string, '.', $approxlen);
427:
428: // If there's no next sentence (somebody forgot the dot?), set it to the end
429: // of the string.
430: if ($next_sentence_start === false) {
431: $next_sentence_start = strlen($string);
432: }
433:
434: // Cut the previous sentence so we can use strrpos
435: $previous_sentence_cutted = substr($string, 0, $approxlen);
436:
437: // Get out the previous sentence start
438: $previous_sentence_start = strrpos($previous_sentence_cutted, '.');
439:
440: // If the sentence doesn't contain a dot, use the text start.
441: if ($previous_sentence_start === false) {
442: $previous_sentence_start = 0;
443: }
444:
445: // If we have a hard limit, we only want to process everything before
446: // $approxlen
447: if (($hard == true) && ($next_sentence_start > $approxlen)) {
448: return substr($string, 0, $previous_sentence_start + 1);
449: }
450:
451: // Calculate next and previous sentence distances
452: $distance_previous_sentence = $approxlen - $previous_sentence_start;
453: $distance_next_sentence = $next_sentence_start - $approxlen;
454:
455: // Sanity: Return at least one sentence.
456: $sanity = substr($string, 0, $previous_sentence_start + 1);
457:
458: if (strpos($sanity, '.') === false) {
459: return substr($string, 0, $next_sentence_start + 1);
460: }
461:
462: // Decide wether the next or previous sentence is nearer
463: if ($distance_previous_sentence > $distance_next_sentence) {
464: return substr($string, 0, $next_sentence_start + 1);
465: } else {
466: return substr($string, 0, $previous_sentence_start + 1);
467: }
468: }
469:
470: /**
471: * Converts diactritics to english characters whenever possible.
472: *
473: * For german umlauts, this function converts the umlauts to their ASCII
474: * equivalents (e.g. ä => ae).
475: *
476: * For more information about diacritics, refer to
477: * http://en.wikipedia.org/wiki/Diacritic
478: *
479: * For other languages, the diacritic marks are removed, if possible.
480: *
481: * @param string $string
482: * The string to operate on
483: * @param string $sourceEncoding [optional; default: UTF-8]
484: * The source encoding
485: * @param string $targetEncoding [optional; default: UTF-8]
486: * The target encoding
487: * @return string
488: * The resulting string
489: */
490: public static function replaceDiacritics($string, $sourceEncoding = 'UTF-8', $targetEncoding = 'UTF-8') {
491: if ($sourceEncoding != 'UTF-8') {
492: $string = self::recodeString($string, $sourceEncoding, "UTF-8");
493: }
494:
495: // replace regular german umlauts and other common characters with
496: // diacritics
497: static $search, $replace;
498: if (!isset($search)) {
499: $search = array(
500: 'Ä',
501: 'Ö',
502: 'Ü',
503: 'ä',
504: 'ö',
505: 'ü',
506: 'ß',
507: 'Á',
508: 'À',
509: 'Â',
510: 'á',
511: 'à',
512: 'â',
513: 'É',
514: 'È',
515: 'Ê',
516: 'é',
517: 'è',
518: 'ê',
519: 'Í',
520: 'Ì',
521: 'Î',
522: 'í',
523: 'ì',
524: 'î',
525: 'Ó',
526: 'Ò',
527: 'Ô',
528: 'ó',
529: 'ò',
530: 'ô',
531: 'Ú',
532: 'Ù',
533: 'Û',
534: 'ú',
535: 'ù',
536: 'û'
537: );
538: $replace = array(
539: 'Ae',
540: 'Oe',
541: 'Ue',
542: 'ae',
543: 'oe',
544: 'ue',
545: 'ss',
546: 'A',
547: 'A',
548: 'A',
549: 'a',
550: 'a',
551: 'a',
552: 'E',
553: 'E',
554: 'E',
555: 'e',
556: 'e',
557: 'e',
558: 'I',
559: 'I',
560: 'I',
561: 'i',
562: 'i',
563: 'i',
564: 'O',
565: 'O',
566: 'O',
567: 'o',
568: 'o',
569: 'o',
570: 'U',
571: 'U',
572: 'U',
573: 'u',
574: 'u',
575: 'u'
576: );
577: }
578: $string = str_replace($search, $replace, $string);
579:
580: // TODO: Additional converting
581:
582: return self::recodeString($string, "UTF-8", $targetEncoding);
583: }
584:
585: /**
586: * Converts a string to another encoding.
587: *
588: * This function tries to detect which function to use (either recode or
589: * iconv).
590: *
591: * If $sourceEncoding and $targetEncoding are the same, this function
592: * returns immediately.
593: *
594: * For more information about encodings, refer to
595: * http://en.wikipedia.org/wiki/Character_encoding
596: *
597: * For more information about the supported encodings in recode, refer to
598: * http://www.delorie.com/gnu/docs/recode/recode_toc.html
599: *
600: * Note: depending on whether recode or iconv is used, the supported
601: * charsets differ. The following ones are commonly used and are most likely
602: * supported by both converters:
603: *
604: * - ISO-8859-1 to ISO-8859-15
605: * - ASCII
606: * - UTF-8
607: *
608: * @todo Check if the charset names are the same for both converters
609: * @todo Implement a converter and charset checker to ensure compilance.
610: * @param string $string
611: * The string to operate on
612: * @param string $sourceEncoding
613: * The source encoding
614: * @param string $targetEncoding
615: * The target encoding (if false, use source encoding)
616: * @return string
617: * The resulting string
618: */
619: public static function recodeString($string, $sourceEncoding, $targetEncoding) {
620: // If sourceEncoding and targetEncoding are the same, return
621: if (strtolower($sourceEncoding) == strtolower($targetEncoding)) {
622: return $string;
623: }
624:
625: // Check for the "recode" support
626: if (function_exists('recode')) {
627: $sResult = recode_string("$sourceEncoding..$targetEncoding", $string);
628: return $sResult;
629: }
630:
631: // Check for the "iconv" support
632: if (function_exists('iconv')) {
633: $sResult = iconv($sourceEncoding, $targetEncoding, $string);
634: return $sResult;
635: }
636:
637: // No charset converters found; return with warning
638: cWarning(__FILE__, __LINE__, 'cString::recodeString could not find either recode or iconv to do charset conversion.');
639: return $string;
640: }
641:
642: /**
643: * Removes or converts all "evil" URL characters.
644: *
645: * This function removes or converts all characters which can make an URL
646: * invalid.
647: *
648: * Clean characters include:
649: * - All characters between 32 and 126 which are not alphanumeric and
650: * aren't one of the following: _-.
651: *
652: * @param string $string
653: * The string to operate on
654: * @param bool $replace [optional]
655: * If true, all "unclean" characters are replaced
656: * @return string
657: * The resulting string
658: */
659: public static function cleanURLCharacters($string, $replace = false) {
660: $string = self::replaceDiacritics($string);
661: $string = str_replace(' ', '-', $string);
662: $string = str_replace('/', '-', $string);
663: $string = str_replace('&', '-', $string);
664: $string = str_replace('+', '-', $string);
665:
666: $iStrLen = strlen($string);
667:
668: $sResultString = '';
669:
670: for ($i = 0; $i < $iStrLen; $i++) {
671: $sChar = substr($string, $i, 1);
672:
673: if (preg_match('/^[a-z0-9]*$/i', $sChar) || $sChar == '-' || $sChar == '_' || $sChar == '.') {
674: $sResultString .= $sChar;
675: } else {
676: if ($replace == true) {
677: $sResultString .= '_';
678: }
679: }
680: }
681:
682: return $sResultString;
683: }
684:
685: /**
686: * Normalizes line endings in passed string.
687: *
688: * @param string $string
689: * @param string $lineEnding [optional]
690: * Feasible values are "\n", "\r" or "\r\n"
691: * @return string
692: */
693: public static function normalizeLineEndings($string, $lineEnding = "\n") {
694: if ($lineEnding !== "\n" && $lineEnding !== "\r" && $lineEnding !== "\r\n") {
695: $lineEnding = "\n";
696: }
697:
698: $string = str_replace("\r\n", "\n", $string);
699: $string = str_replace("\r", "\n", $string);
700: if ($lineEnding !== "\n") {
701: $string = str_replace("\n", $lineEnding, $string);
702: }
703:
704: return $string;
705: }
706: }
707: