1: <?php
2: /**
3: * This file contains the string utility class.
4: *
5: * @package Core
6: * @subpackage Util
7: * @version SVN Revision $Rev:$
8: *
9: * @author Murat Purc <murat@purc.de>
10: * @copyright four for business AG <www.4fb.de>
11: * @license http://www.contenido.org/license/LIZENZ.txt
12: * @link http://www.4fb.de
13: * @link http://www.contenido.org
14: */
15:
16: defined('CON_FRAMEWORK') || die('Illegal call: Missing framework initialization - request aborted.');
17:
18: /**
19: * String helper class.
20: *
21: * @package Core
22: * @subpackage Util
23: */
24: class cString {
25:
26: /**
27: * Replaces a string only once
28: *
29: * Caution: This function only takes strings as parameters, not arrays!
30: *
31: * @param string $find
32: * String to find
33: * @param string $replace
34: * String to replace
35: * @param string $subject
36: * String to process
37: * @return string
38: * Processed string
39: */
40: public static function iReplaceOnce($find, $replace, $subject) {
41: $start = strpos(strtolower($subject), strtolower($find));
42:
43: if ($start === false) {
44: return $subject;
45: }
46:
47: $end = $start + strlen($find);
48: $first = substr($subject, 0, $start);
49: $last = substr($subject, $end, strlen($subject) - $end);
50:
51: $result = $first . $replace . $last;
52:
53: return $result;
54: }
55:
56: /**
57: * Replaces a string only once, in reverse direction
58: *
59: * Caution: This function only takes strings as parameters, not arrays!
60: *
61: * @param string $find
62: * String to find
63: * @param string $replace
64: * String to replace
65: * @param string $subject
66: * String to process
67: * @return string
68: * Processed string
69: */
70: public static function iReplaceOnceReverse($find, $replace, $subject) {
71: $start = self::posReverse(strtolower($subject), strtolower($find));
72:
73: if ($start === false) {
74: return $subject;
75: }
76:
77: $end = $start + strlen($find);
78:
79: $first = substr($subject, 0, $start);
80: $last = substr($subject, $end, strlen($subject) - $end);
81:
82: $result = $first . $replace . $last;
83:
84: return $result;
85: }
86:
87: /**
88: * Finds a string position in reverse direction
89: *
90: * NOTE: The original strrpos-Function of PHP4 only finds a single character as needle.
91: *
92: * @param string $haystack
93: * String to search in
94: * @param string $needle
95: * String to search for
96: * @param int $start [optional]
97: * Offset
98: * @return string
99: * Processed string
100: */
101: public static function posReverse($haystack, $needle, $start = 0) {
102: $tempPos = strpos($haystack, $needle, $start);
103:
104: if ($tempPos === false) {
105: if ($start == 0) {
106: // Needle not in string at all
107: return false;
108: } else {
109: // No more occurances found
110: return $start - strlen($needle);
111: }
112: } else {
113: // Find the next occurance
114: return self::posReverse($haystack, $needle, $tempPos + strlen($needle));
115: }
116: }
117:
118: /**
119: * Adds slashes to passed variable or array.
120: *
121: * @param string|array $value
122: * Either a string or a multi-dimensional array of values
123: * @return string|array
124: */
125: public static function addSlashes($value) {
126: $value = is_array($value) ? array_map(array('cString', 'addSlashes'), $value) : addslashes($value);
127: return $value;
128: }
129:
130: /**
131: * Removes slashes from passed variable or array.
132: *
133: * @param string|array $value
134: * Either a string or a multi-dimensional array of values
135: * @return string|array
136: */
137: public static function stripSlashes($value) {
138: $value = is_array($value) ? array_map(array('cString', 'stripSlashes'), $value) : stripslashes($value);
139: return $value;
140: }
141:
142: /**
143: * Checks if the string haystack ends with needle
144: *
145: * @param string $haystack
146: * The string to check
147: * @param string $needle
148: * The string with which it should end
149: * @return bool
150: */
151: public static function endsWith($haystack, $needle) {
152: $length = strlen($needle);
153: if ($length == 0) {
154: return true;
155: }
156:
157: return substr($haystack, -$length) === $needle;
158: }
159:
160: /**
161: * Returns true if needle can be found in haystack
162: *
163: * @param string $haystack
164: * String to be searched
165: * @param string $needle
166: * String to search for
167: * @return bool
168: */
169: public static function contains($haystack, $needle) {
170: return !(strpos($haystack, $needle) === false);
171: }
172:
173: /**
174: * Implementation of PHP 5.3's strstr with beforeNeedle
175: *
176: * @param string $haystack
177: * String to be searched
178: * @param string $needle
179: * String to search for
180: * @param string $beforeNeedle [optional]
181: * If true, return everything BEFORE needle
182: * @return string
183: * @link http://php.net/manual/de/function.strstr.php
184: */
185: public static function strstr($haystack, $needle, $beforeNeedle = false) {
186: if (!$beforeNeedle) {
187: return strstr($haystack, $needle);
188: } else {
189: return strtok($haystack, $needle);
190: }
191: }
192:
193: /**
194: * This function checks if a given format is accepted by php's date function
195: * @param string $format
196: * format according to date function specification
197: * @return bool
198: * true if format is correct, false otherwise
199: */
200: public static function validateDateFormat($format) {
201: // try to create a DateTime instance based on php's date function format specification
202: // return true if date is valid (no wrong format)
203: return false !== DateTime::createFromFormat($format, date($format, time()));
204: }
205:
206: /**
207: * Extract a number from a string
208: *
209: * @param string $string
210: * String var by reference
211: * @return string
212: */
213: public static function extractNumber(&$string) {
214: $string = preg_replace('/[^0-9]/', '', $string);
215: return $string;
216: }
217:
218:
219: /**
220: * Returns whether a string is UTF-8 encoded or not
221: *
222: * @param string $input
223: * @return bool
224: */
225: public static function isUtf8($input) {
226: $len = strlen($input);
227:
228: for ($i = 0; $i < $len; $i++) {
229: $char = ord($input[$i]);
230: $n = 0;
231:
232: if ($char < 0x80) { // ASCII char
233: continue;
234: } else if (($char & 0xE0) === 0xC0 && $char > 0xC1) { // 2 byte long
235: // char
236: $n = 1;
237: } else if (($char & 0xF0) === 0xE0) { // 3 byte long char
238: $n = 2;
239: } else if (($char & 0xF8) === 0xF0 && $char < 0xF5) { // 4 byte long
240: // char
241: $n = 3;
242: } else {
243: return false;
244: }
245:
246: for ($j = 0; $j < $n; $j++) {
247: $i++;
248:
249: if ($i == $len || (ord($input[$i]) & 0xC0) !== 0x80) {
250: return false;
251: }
252: }
253: }
254: return true;
255: }
256:
257:
258: /**
259: * Checks if a value is alphanumeric
260: *
261: * @param mixed $test
262: * Value to test
263: * @param bool $umlauts [optional]
264: * Use german umlauts
265: * @return bool
266: * Value is alphanumeric
267: */
268: public static function isAlphanumeric($test, $umlauts = true) {
269: if ($umlauts == true) {
270: $match = "/^[a-z0-9ÄäÖöÜüß ]+$/i";
271: } else {
272: $match = "/^[a-z0-9 ]+$/i";
273: }
274:
275: return preg_match($match, $test);
276: }
277:
278: /**
279: * Trims a string to a given length and makes sure that all words up to $maxlen
280: * are preserved, without exceeding $maxlen.
281: *
282: * Warning: Currently, this function uses a regular ASCII-Whitespace to do the
283: * separation test. If you are using ' ' to create spaces, this function
284: * will fail.
285: *
286: * Example:
287: * $string = "This is a simple test";
288: * echo cString::trimAfterWord ($string, 15);
289: *
290: * This would output "This is a", since this function respects word boundaries
291: * and doesn't operate beyond the limit given by $maxlen.
292: *
293: * @param string $string
294: * The string to operate on
295: * @param int $maxlen
296: * The maximum number of characters
297: * @return string
298: * The resulting string
299: */
300: public static function trimAfterWord($string, $maxlen) {
301: // If the string is smaller than the maximum lenght, it makes no sense to
302: // process it any further. Return it.
303: if (strlen($string) < $maxlen) {
304: return $string;
305: }
306:
307: // If the character after the $maxlen position is a space, we can return
308: // the string until $maxlen.
309: if (substr($string, $maxlen, 1) == ' ') {
310: return substr($string, 0, $maxlen);
311: }
312:
313: // Cut the string up to $maxlen so we can use strrpos (reverse str position)
314: $cutted_string = substr($string, 0, $maxlen);
315:
316: // Extract the end of the last word
317: $last_word_position = strrpos($cutted_string, ' ');
318:
319: return substr($cutted_string, 0, $last_word_position);
320: }
321:
322: /**
323: * Trims a string to a specific length.
324: * If the string is longer than $maxlen,
325: * dots are inserted ("...") right before $maxlen.
326: *
327: * Example:
328: * $string = "This is a simple test";
329: * echo cString::trimHard ($string, 15);
330: *
331: * This would output "This is a si...", since the string is longer than $maxlen
332: * and the resulting string matches 15 characters including the dots.
333: *
334: * @param string $string
335: * The string to operate on
336: * @param int $maxlen
337: * The maximum number of characters
338: * @param string $fillup [optional]
339: * @return string
340: * The resulting string
341: */
342: public static function trimHard($string, $maxlen, $fillup = '...') {
343: // If the string is smaller than the maximum lenght, it makes no sense to
344: // process it any further. Return it.
345: if (strlen($string) < $maxlen) {
346: return $string;
347: }
348:
349: // Calculate the maximum text length
350: $maximum_text_length = $maxlen - strlen($fillup);
351:
352: // If text length is over zero cut it
353: if ($maximum_text_length > 0) {
354: if (preg_match('/(*UTF8)^.{0,' . $maximum_text_length . '}/', $string, $result_array)) {
355: $cutted_string = $result_array[0];
356: } else if (preg_match('/^.{0,' . $maximum_text_length . '}/u', $string, $result_array)) {
357: $cutted_string = $result_array[0];
358: } else {
359: $cutted_string = substr($string, 0, $maximum_text_length);
360: }
361: } else {
362: $cutted_string = $string;
363: }
364:
365: // Append the fillup string
366: $cutted_string .= $fillup;
367:
368: return $cutted_string;
369: }
370:
371: /**
372: * Trims a string to a approximate length.
373: * Sentence boundaries are preserved.
374: *
375: * The algorythm inside calculates the sentence length to the previous and next
376: * sentences. The distance to the next sentence which is smaller will be taken
377: * to
378: * trim the string to match the approximate length parameter.
379: *
380: * Example:
381: *
382: * $string = "This contains two sentences. ";
383: * $string .= "Lets play around with them. ";
384: *
385: * echo cString::trimSentence($string, 40);
386: * echo cString::trimSentence($string, 50);
387: *
388: * The first example would only output the first sentence, the second example
389: * both
390: * sentences.
391: *
392: * Explanation:
393: *
394: * To match the given max length closely, the function calculates the distance
395: * to
396: * the next and previous sentences. Using the maxlength of 40 characters, the
397: * distance to the previous sentence would be 8 characters, and to the next
398: * sentence
399: * it would be 19 characters. Therefore, only the previous sentence is
400: * displayed.
401: *
402: * The second example displays the second sentence also, since the distance to
403: * the
404: * next sentence is only 9 characters, but to the previous it is 18 characters.
405: *
406: * If you specify the boolean flag "$hard", the limit parameter creates a hard
407: * limit
408: * instead of calculating the distance.
409: *
410: * This function ensures that at least one sentence is returned.
411: *
412: * @param string $string
413: * The string to operate on
414: * @param int $approxlen
415: * The approximate number of characters
416: * @param bool $hard [optional]
417: * If true, use a hard limit for the number of characters
418: * @return string
419: * The resulting string
420: */
421: public static function trimSentence($string, $approxlen, $hard = false) {
422: // If the string is smaller than the maximum lenght, it makes no sense to
423: // process it any further. Return it.
424: if (strlen($string) < $approxlen) {
425: return $string;
426: }
427:
428: // Find out the start of the next sentence
429: $next_sentence_start = strpos($string, '.', $approxlen);
430:
431: // If there's no next sentence (somebody forgot the dot?), set it to the end
432: // of the string.
433: if ($next_sentence_start === false) {
434: $next_sentence_start = strlen($string);
435: }
436:
437: // Cut the previous sentence so we can use strrpos
438: $previous_sentence_cutted = substr($string, 0, $approxlen);
439:
440: // Get out the previous sentence start
441: $previous_sentence_start = strrpos($previous_sentence_cutted, '.');
442:
443: // If the sentence doesn't contain a dot, use the text start.
444: if ($previous_sentence_start === false) {
445: $previous_sentence_start = 0;
446: }
447:
448: // If we have a hard limit, we only want to process everything before
449: // $approxlen
450: if (($hard == true) && ($next_sentence_start > $approxlen)) {
451: return substr($string, 0, $previous_sentence_start + 1);
452: }
453:
454: // Calculate next and previous sentence distances
455: $distance_previous_sentence = $approxlen - $previous_sentence_start;
456: $distance_next_sentence = $next_sentence_start - $approxlen;
457:
458: // Sanity: Return at least one sentence.
459: $sanity = substr($string, 0, $previous_sentence_start + 1);
460:
461: if (strpos($sanity, '.') === false) {
462: return substr($string, 0, $next_sentence_start + 1);
463: }
464:
465: // Decide wether the next or previous sentence is nearer
466: if ($distance_previous_sentence > $distance_next_sentence) {
467: return substr($string, 0, $next_sentence_start + 1);
468: } else {
469: return substr($string, 0, $previous_sentence_start + 1);
470: }
471: }
472:
473: /**
474: * Converts diactritics to english characters whenever
475: * possible.
476: *
477: * For german umlauts, this function converts the umlauts to their ASCII
478: * equalients (e.g. ä => ae).
479: *
480: * For more information about diacritics, refer to
481: * http://en.wikipedia.org/wiki/Diacritic
482: *
483: * For other languages, the diacritic marks are removed, if possible.
484: *
485: * @param string $string
486: * The string to operate on
487: * @param string $sourceEncoding [optional; default: UTF-8]
488: * The source encoding
489: * @param string $targetEncoding [optional; default: UTF-8]
490: * The target encoding
491: * @return string
492: * The resulting string
493: */
494: public static function replaceDiacritics($string, $sourceEncoding = 'UTF-8', $targetEncoding = 'UTF-8') {
495: if ($sourceEncoding != 'UTF-8') {
496: $string = self::recodeString($string, $sourceEncoding, "UTF-8");
497: }
498:
499: // replace regular german umlauts and other common characters with
500: // diacritics
501: static $search, $replace;
502: if (!isset($search)) {
503: $search = array(
504: 'Ä',
505: 'Ö',
506: 'Ü',
507: 'ä',
508: 'ö',
509: 'ü',
510: 'ß',
511: 'Á',
512: 'À',
513: 'Â',
514: 'á',
515: 'à',
516: 'â',
517: 'É',
518: 'È',
519: 'Ê',
520: 'é',
521: 'è',
522: 'ê',
523: 'Í',
524: 'Ì',
525: 'Î',
526: 'í',
527: 'ì',
528: 'î',
529: 'Ó',
530: 'Ò',
531: 'Ô',
532: 'ó',
533: 'ò',
534: 'ô',
535: 'Ú',
536: 'Ù',
537: 'Û',
538: 'ú',
539: 'ù',
540: 'û'
541: );
542: $replace = array(
543: 'Ae',
544: 'Oe',
545: 'Ue',
546: 'ae',
547: 'oe',
548: 'ue',
549: 'ss',
550: 'A',
551: 'A',
552: 'A',
553: 'a',
554: 'a',
555: 'a',
556: 'E',
557: 'E',
558: 'E',
559: 'e',
560: 'e',
561: 'e',
562: 'I',
563: 'I',
564: 'I',
565: 'i',
566: 'i',
567: 'i',
568: 'O',
569: 'O',
570: 'O',
571: 'o',
572: 'o',
573: 'o',
574: 'U',
575: 'U',
576: 'U',
577: 'u',
578: 'u',
579: 'u'
580: );
581: }
582: $string = str_replace($search, $replace, $string);
583:
584: // TODO: Additional converting
585:
586: return self::recodeString($string, "UTF-8", $targetEncoding);
587: }
588:
589: /**
590: * Converts a string to another encoding.
591: * This function tries to detect which function
592: * to use (either recode or iconv).
593: *
594: * If $sourceEncoding and $targetEncoding are the same, this function returns
595: * immediately.
596: *
597: * For more information about encodings, refer to
598: * http://en.wikipedia.org/wiki/Character_encoding
599: *
600: * For more information about the supported encodings in recode, refer to
601: * http://www.delorie.com/gnu/docs/recode/recode_toc.html
602: *
603: * Note: depending on whether recode or iconv is used, the supported charsets
604: * differ. The following ones are commonly used and are most likely supported by
605: * both converters:
606: *
607: * - ISO-8859-1 to ISO-8859-15
608: * - ASCII
609: * - UTF-8
610: *
611: * @todo Check if the charset names are the same for both converters
612: * @todo Implement a converter and charset checker to ensure compilance.
613: *
614: * @param string $string
615: * The string to operate on
616: * @param string $sourceEncoding
617: * The source encoding (default: ISO-8859-1)
618: * @param string $targetEncoding
619: * The target encoding (if false, use source encoding)
620: * @return string
621: * The resulting string
622: */
623: public static function recodeString($string, $sourceEncoding, $targetEncoding) {
624: // If sourceEncoding and targetEncoding are the same, return
625: if ($sourceEncoding == $targetEncoding) {
626: return $string;
627: }
628:
629: // Check for the "recode" support
630: if (function_exists('recode')) {
631: $sResult = recode_string("$sourceEncoding..$targetEncoding", $string);
632: return $sResult;
633: }
634:
635: // Check for the "iconv" support
636: if (function_exists('iconv')) {
637: $sResult = iconv($sourceEncoding, $targetEncoding, $string);
638: return $sResult;
639: }
640:
641: // No charset converters found; return with warning
642: cWarning(__FILE__, __LINE__, 'cString::recodeString could not find either recode or iconv to do charset conversion.');
643: return $string;
644: }
645:
646: /**
647: * Removes or converts all "evil" URL characters.
648: * This function removes or converts
649: * all characters which can make an URL invalid.
650: *
651: * Clean characters include:
652: * - All characters between 32 and 126 which are not alphanumeric and
653: * aren't one of the following: _-.
654: *
655: * @param string $string
656: * The string to operate on
657: * @param bool $replace [optional]
658: * If true, all "unclean" characters are replaced
659: * @return string
660: * The resulting string
661: */
662: public static function cleanURLCharacters($string, $replace = false) {
663: $string = self::replaceDiacritics($string);
664: $string = str_replace(' ', '-', $string);
665: $string = str_replace('/', '-', $string);
666: $string = str_replace('&', '-', $string);
667: $string = str_replace('+', '-', $string);
668:
669: $iStrLen = strlen($string);
670:
671: $sResultString = '';
672:
673: for ($i = 0; $i < $iStrLen; $i++) {
674: $sChar = substr($string, $i, 1);
675:
676: if (preg_match('/^[a-z0-9]*$/i', $sChar) || $sChar == '-' || $sChar == '_' || $sChar == '.') {
677: $sResultString .= $sChar;
678: } else {
679: if ($replace == true) {
680: $sResultString .= '_';
681: }
682: }
683: }
684:
685: return $sResultString;
686: }
687:
688: /**
689: * Normalizes line endings in passed string.
690: *
691: * @param string $string
692: * @param string $lineEnding [optional]
693: * Feasible values are "\n", "\r" or "\r\n"
694: * @return string
695: */
696: public static function normalizeLineEndings($string, $lineEnding = "\n") {
697: if ($lineEnding !== "\n" && $lineEnding !== "\r" && $lineEnding !== "\r\n") {
698: $lineEnding = "\n";
699: }
700:
701: $string = str_replace("\r\n", "\n", $string);
702: $string = str_replace("\r", "\n", $string);
703: if ($lineEnding !== "\n") {
704: $string = str_replace("\n", $lineEnding, $string);
705: }
706:
707: return $string;
708: }
709: }
710: