1: <?php
2: /**
3: * This file contains the string utility class.
4: *
5: * @package Core
6: * @subpackage Util
7: * @author Murat Purc <murat@purc.de>
8: * @copyright four for business AG <www.4fb.de>
9: * @license http://www.contenido.org/license/LIZENZ.txt
10: * @link http://www.4fb.de
11: * @link http://www.contenido.org
12: */
13:
14: defined('CON_FRAMEWORK') || die('Illegal call: Missing framework initialization - request aborted.');
15:
16: /**
17: * String helper class.
18: *
19: * @package Core
20: * @subpackage Util
21: */
22: class cString extends cStringMultiByteWrapper {
23:
24: /**
25: * Replaces a string only once.
26: *
27: * Caution: This function only takes strings as parameters, not arrays!
28: *
29: * @param string $find
30: * String to find
31: * @param string $replace
32: * String to replace
33: * @param string $subject
34: * String to process
35: * @return string
36: * Processed string
37: */
38: public static function iReplaceOnce($find, $replace, $subject) {
39: $start = parent::findFirstPos(parent::toLowerCase($subject), parent::toLowerCase($find));
40:
41: if ($start === false) {
42: return $subject;
43: }
44:
45: $end = $start + parent::getStringLength($find);
46: $first = parent::getPartOfString($subject, 0, $start);
47: $last = parent::getPartOfString($subject, $end, parent::getStringLength($subject) - $end);
48:
49: $result = $first . $replace . $last;
50:
51: return $result;
52: }
53:
54: /**
55: * Replaces a string only once, in reverse direction.
56: *
57: * Caution: This function only takes strings as parameters, not arrays!
58: *
59: * @param string $find
60: * String to find
61: * @param string $replace
62: * String to replace
63: * @param string $subject
64: * String to process
65: * @return string
66: * Processed string
67: */
68: public static function iReplaceOnceReverse($find, $replace, $subject) {
69: $start = self::posReverse(parent::toLowerCase($subject), parent::toLowerCase($find));
70:
71: if ($start === false) {
72: return $subject;
73: }
74:
75: $end = $start + parent::getStringLength($find);
76:
77: $first = parent::getPartOfString($subject, 0, $start);
78: $last = parent::getPartOfString($subject, $end, parent::getStringLength($subject) - $end);
79:
80: $result = $first . $replace . $last;
81:
82: return $result;
83: }
84:
85: /**
86: * Finds a string position in reverse direction.
87: *
88: * NOTE: The original cString::findLastPos-function of PHP4 only finds a single character
89: * as needle.
90: *
91: * @param string $haystack
92: * String to search in
93: * @param string $needle
94: * String to search for
95: * @param int $start [optional]
96: * Offset
97: * @return int
98: * String position
99: */
100: public static function posReverse($haystack, $needle, $start = 0) {
101: $tempPos = parent::findFirstPos($haystack, $needle, $start);
102:
103: if ($tempPos === false) {
104: if ($start == 0) {
105: // Needle not in string at all
106: return false;
107: } else {
108: // No more occurances found
109: return $start - parent::getStringLength($needle);
110: }
111: } else {
112: // Find the next occurance
113: return self::posReverse($haystack, $needle, $tempPos + parent::getStringLength($needle));
114: }
115: }
116:
117: /**
118: * Adds slashes to passed variable or array.
119: *
120: * @param string|array $value
121: * Either a string or a multi-dimensional array of values
122: * @return string|array
123: */
124: public static function addSlashes($value) {
125: $value = is_array($value) ? array_map(array('cString', 'addSlashes'), $value) : addslashes($value);
126: return $value;
127: }
128:
129: /**
130: * Removes slashes from passed variable or array.
131: *
132: * @param string|array $value
133: * Either a string or a multi-dimensional array of values
134: * @return string|array
135: */
136: public static function stripSlashes($value) {
137: $value = is_array($value) ? array_map(array('cString', 'stripSlashes'), $value) : stripslashes($value);
138: return $value;
139: }
140:
141: /**
142: * Checks if the string haystack ends with needle.
143: *
144: * @param string $haystack
145: * The string to check
146: * @param string $needle
147: * The string with which it should end
148: * @return bool
149: */
150: public static function endsWith($haystack, $needle) {
151: $length = parent::getStringLength($needle);
152: if ($length == 0) {
153: return true;
154: }
155:
156: return parent::getPartOfString($haystack, -$length) === $needle;
157: }
158:
159: /**
160: * Returns true if needle can be found in haystack.
161: *
162: * @param string $haystack
163: * String to be searched
164: * @param string $needle
165: * String to search for
166: * @return bool
167: */
168: public static function contains($haystack, $needle) {
169: return !(parent::findFirstPos($haystack, $needle) === false);
170: }
171:
172: /**
173: * Implementation of PHP 5.3's strstr with beforeNeedle.
174: *
175: * @param string $haystack
176: * String to be searched
177: * @param string $needle
178: * String to search for
179: * @param bool $beforeNeedle [optional]
180: * If true, return everything BEFORE needle
181: * @return string
182: * @link http://php.net/manual/de/function.mb-strstr.php
183: * @link http://php.net/manual/de/function.strstr.php
184: */
185: public static function strstr($haystack, $needle, $beforeNeedle = false) {
186:
187: if (!$beforeNeedle) {
188: if (self::_functionExists('mb_strstr')) {
189: return mb_strstr($haystack, $needle);
190: } else {
191: return strstr($haystack, $needle);
192: }
193: } else {
194: return strtok($haystack, $needle);
195: }
196: }
197:
198: /**
199: * This function checks if a given format is accepted by php's date function.
200: *
201: * @param string $format
202: * format according to date function specification
203: * @return bool
204: * true if format is correct, false otherwise
205: */
206: public static function validateDateFormat($format) {
207: // try to create a DateTime instance based on php's date function format specification
208: // return true if date is valid (no wrong format)
209: return false !== DateTime::createFromFormat($format, date($format, time()));
210: }
211:
212: /**
213: * Extract a number from a string.
214: *
215: * @param string $string
216: * String var by reference
217: * @return string
218: */
219: public static function extractNumber(&$string) {
220: $string = preg_replace('/[^0-9]/', '', $string);
221: return $string;
222: }
223:
224:
225: /**
226: * Returns whether a string is UTF-8 encoded or not.
227: *
228: * @param string $input
229: * @return bool
230: */
231: public static function isUtf8($input) {
232: $len = parent::getStringLength($input);
233:
234: for ($i = 0; $i < $len; $i++) {
235: $char = ord($input[$i]);
236:
237: if ($char < 0x80) {
238: // ASCII char
239: continue;
240: } else if (($char & 0xE0) === 0xC0 && $char > 0xC1) {
241: // 2 byte long char
242: $n = 1;
243: } else if (($char & 0xF0) === 0xE0) {
244: // 3 byte long char
245: $n = 2;
246: } else if (($char & 0xF8) === 0xF0 && $char < 0xF5) {
247: // 4 byte long char
248: $n = 3;
249: } else {
250: return false;
251: }
252:
253: for ($j = 0; $j < $n; $j++) {
254: $i++;
255:
256: if ($i == $len || (ord($input[$i]) & 0xC0) !== 0x80) {
257: return false;
258: }
259: }
260: }
261: return true;
262: }
263:
264:
265: /**
266: * Checks if a value is alphanumeric.
267: *
268: * @param mixed $test
269: * Value to test
270: * @param bool $umlauts [optional]
271: * Use german umlauts
272: * @return bool
273: * Value is alphanumeric
274: */
275: public static function isAlphanumeric($test, $umlauts = true) {
276: if ($umlauts == true) {
277: $match = "/^[a-z0-9ÄäÖöÜüß ]+$/i";
278: } else {
279: $match = "/^[a-z0-9 ]+$/i";
280: }
281:
282: return preg_match($match, $test);
283: }
284:
285: /**
286: * Trims a string to a given length and makes sure that all words up to
287: * $maxlen are preserved, without exceeding $maxlen.
288: *
289: * Warning: Currently, this function uses a regular ASCII-Whitespace to do
290: * the separation test. If you are using ' ' to create spaces, this
291: * function will fail.
292: *
293: * Example:
294: * $string = "This is a simple test";
295: * echo cString::trimAfterWord($string, 15);
296: *
297: * This would output "This is a", since this function respects word
298: * boundaries and doesn't operate beyond the limit given by $maxlen.
299: *
300: * @param string $string
301: * The string to operate on
302: * @param int $maxlen
303: * The maximum number of characters
304: * @return string
305: * The resulting string
306: */
307: public static function trimAfterWord($string, $maxlen) {
308: // If the string is smaller than the maximum lenght, it makes no sense to
309: // process it any further. Return it.
310: if (parent::getStringLength($string) < $maxlen) {
311: return $string;
312: }
313:
314: // If the character after the $maxlen position is a space, we can return
315: // the string until $maxlen.
316: if (parent::getPartOfString($string, $maxlen, 1) == ' ') {
317: return parent::getPartOfString($string, 0, $maxlen);
318: }
319:
320: // Cut the string up to $maxlen so we can use cString::findLastPos (reverse str position)
321: $cutted_string = parent::getPartOfString($string, 0, $maxlen);
322:
323: // Extract the end of the last word
324: $last_word_position = cString::findLastPos($cutted_string, ' ');
325:
326: return parent::getPartOfString($cutted_string, 0, $last_word_position);
327: }
328:
329: /**
330: * Trims a string to a specific length.
331: *
332: * If the string is longer than $maxlen, dots are inserted ("...") right
333: * before $maxlen.
334: *
335: * Example:
336: * $string = "This is a simple test";
337: * echo cString::trimHard ($string, 15);
338: *
339: * This would output "This is a si...", since the string is longer than
340: * $maxlen and the resulting string matches 15 characters including the dots.
341: *
342: * @param string $string
343: * The string to operate on
344: * @param int $maxlen
345: * The maximum number of characters
346: * @param string $fillup [optional]
347: * @return string
348: * The resulting string
349: */
350: public static function trimHard($string, $maxlen, $fillup = '...') {
351: // If the string is smaller than the maximum lenght, it makes no sense to
352: // process it any further. Return it.
353: if (parent::getStringLength($string) < $maxlen) {
354: return $string;
355: }
356:
357: // Calculate the maximum text length
358: $maximum_text_length = $maxlen - parent::getStringLength($fillup);
359:
360: // If text length is over zero cut it
361: if ($maximum_text_length > 0) {
362: if (preg_match('/(*UTF8)^.{0,' . $maximum_text_length . '}/', $string, $result_array)) {
363: $cutted_string = $result_array[0];
364: } else if (preg_match('/^.{0,' . $maximum_text_length . '}/u', $string, $result_array)) {
365: $cutted_string = $result_array[0];
366: } else {
367: $cutted_string = parent::getPartOfString($string, 0, $maximum_text_length);
368: }
369: } else {
370: $cutted_string = $string;
371: }
372:
373: // Append the fillup string
374: $cutted_string .= $fillup;
375:
376: return $cutted_string;
377: }
378:
379: /**
380: * Trims a string to a approximate length preserving sentence boundaries.
381: *
382: * The algorithm inside calculates the sentence length to the previous and
383: * next sentences. The distance to the next sentence which is smaller will
384: * be taken to trim the string to match the approximate length parameter.
385: *
386: * Example:
387: *
388: * $string = "This contains two sentences. ";
389: * $string .= "Lets play around with them. ";
390: *
391: * echo cString::trimSentence($string, 40);
392: * echo cString::trimSentence($string, 50);
393: *
394: * The first example would only output the first sentence, the second
395: * example both sentences.
396: *
397: * Explanation:
398: *
399: * To match the given max length closely, the function calculates the
400: * distance to the next and previous sentences. Using the maxlength of 40
401: * characters, the distance to the previous sentence would be 8 characters,
402: * and to the next sentence it would be 19 characters. Therefore, only the
403: * previous sentence is displayed.
404: *
405: * The second example displays the second sentence also, since the distance
406: * to the next sentence is only 9 characters, but to the previous it is 18
407: * characters.
408: *
409: * If you specify the boolean flag "$hard", the limit parameter creates a
410: * hard limit instead of calculating the distance.
411: *
412: * This function ensures that at least one sentence is returned.
413: *
414: * @param string $string
415: * The string to operate on
416: * @param int $approxlen
417: * The approximate number of characters
418: * @param bool $hard [optional]
419: * If true, use a hard limit for the number of characters
420: * @return string
421: * The resulting string
422: */
423: public static function trimSentence($string, $approxlen, $hard = false) {
424: // If the string is smaller than the maximum lenght, it makes no sense to
425: // process it any further. Return it.
426: if (parent::getStringLength($string) < $approxlen) {
427: return $string;
428: }
429:
430: // Find out the start of the next sentence
431: $next_sentence_start = parent::findFirstPos($string, '.', $approxlen);
432:
433: // If there's no next sentence (somebody forgot the dot?), set it to the end
434: // of the string.
435: if ($next_sentence_start === false) {
436: $next_sentence_start = parent::getStringLength($string);
437: }
438:
439: // Cut the previous sentence so we can use cString::findLastPos
440: $previous_sentence_cutted = parent::getPartOfString($string, 0, $approxlen);
441:
442: // Get out the previous sentence start
443: $previous_sentence_start = cString::findLastPos($previous_sentence_cutted, '.');
444:
445: // If the sentence doesn't contain a dot, use the text start.
446: if ($previous_sentence_start === false) {
447: $previous_sentence_start = 0;
448: }
449:
450: // If we have a hard limit, we only want to process everything before
451: // $approxlen
452: if (($hard == true) && ($next_sentence_start > $approxlen)) {
453: return parent::getPartOfString($string, 0, $previous_sentence_start + 1);
454: }
455:
456: // Calculate next and previous sentence distances
457: $distance_previous_sentence = $approxlen - $previous_sentence_start;
458: $distance_next_sentence = $next_sentence_start - $approxlen;
459:
460: // Sanity: Return at least one sentence.
461: $sanity = parent::getPartOfString($string, 0, $previous_sentence_start + 1);
462:
463: if (parent::findFirstPos($sanity, '.') === false) {
464: return parent::getPartOfString($string, 0, $next_sentence_start + 1);
465: }
466:
467: // Decide wether the next or previous sentence is nearer
468: if ($distance_previous_sentence > $distance_next_sentence) {
469: return parent::getPartOfString($string, 0, $next_sentence_start + 1);
470: } else {
471: return parent::getPartOfString($string, 0, $previous_sentence_start + 1);
472: }
473: }
474:
475: /**
476: * Converts diactritics to english characters whenever possible.
477: *
478: * For german umlauts, this function converts the umlauts to their ASCII
479: * equivalents (e.g. ä => ae).
480: *
481: * For more information about diacritics, refer to
482: * http://en.wikipedia.org/wiki/Diacritic
483: *
484: * For other languages, the diacritic marks are removed, if possible.
485: *
486: * @param string $string
487: * The string to operate on
488: * @param string $sourceEncoding [optional; default: UTF-8]
489: * The source encoding
490: * @param string $targetEncoding [optional; default: UTF-8]
491: * The target encoding
492: *
493: * @return string
494: * The resulting string
495: * @throws cInvalidArgumentException
496: */
497: public static function replaceDiacritics($string, $sourceEncoding = 'UTF-8', $targetEncoding = 'UTF-8') {
498: if ($sourceEncoding != 'UTF-8') {
499: $string = self::recodeString($string, $sourceEncoding, "UTF-8");
500: }
501:
502: // replace regular german umlauts and other common characters with
503: // diacritics
504: static $search, $replace;
505: if (!isset($search)) {
506: $search = array(
507: 'Ä',
508: 'Ö',
509: 'Ü',
510: 'ä',
511: 'ö',
512: 'ü',
513: 'ß',
514: 'Á',
515: 'À',
516: 'Â',
517: 'á',
518: 'à',
519: 'â',
520: 'É',
521: 'È',
522: 'Ê',
523: 'é',
524: 'è',
525: 'ê',
526: 'Í',
527: 'Ì',
528: 'Î',
529: 'í',
530: 'ì',
531: 'î',
532: 'Ó',
533: 'Ò',
534: 'Ô',
535: 'ó',
536: 'ò',
537: 'ô',
538: 'Ú',
539: 'Ù',
540: 'Û',
541: 'ú',
542: 'ù',
543: 'û'
544: );
545: $replace = array(
546: 'Ae',
547: 'Oe',
548: 'Ue',
549: 'ae',
550: 'oe',
551: 'ue',
552: 'ss',
553: 'A',
554: 'A',
555: 'A',
556: 'a',
557: 'a',
558: 'a',
559: 'E',
560: 'E',
561: 'E',
562: 'e',
563: 'e',
564: 'e',
565: 'I',
566: 'I',
567: 'I',
568: 'i',
569: 'i',
570: 'i',
571: 'O',
572: 'O',
573: 'O',
574: 'o',
575: 'o',
576: 'o',
577: 'U',
578: 'U',
579: 'U',
580: 'u',
581: 'u',
582: 'u'
583: );
584: }
585: $string = str_replace($search, $replace, $string);
586:
587: // TODO: Additional converting
588:
589: return self::recodeString($string, "UTF-8", $targetEncoding);
590: }
591:
592: /**
593: * Converts a string to another encoding.
594: *
595: * This function tries to detect which function to use (either recode or
596: * iconv).
597: *
598: * If $sourceEncoding and $targetEncoding are the same, this function
599: * returns immediately.
600: *
601: * For more information about encodings, refer to
602: * http://en.wikipedia.org/wiki/Character_encoding
603: *
604: * For more information about the supported encodings in recode, refer to
605: * http://www.delorie.com/gnu/docs/recode/recode_toc.html
606: *
607: * Note: depending on whether recode or iconv is used, the supported
608: * charsets differ. The following ones are commonly used and are most likely
609: * supported by both converters:
610: *
611: * - ISO-8859-1 to ISO-8859-15
612: * - ASCII
613: * - UTF-8
614: *
615: * @todo Check if the charset names are the same for both converters
616: * @todo Implement a converter and charset checker to ensure compilance.
617: *
618: * @param string $string
619: * The string to operate on
620: * @param string $sourceEncoding
621: * The source encoding
622: * @param string $targetEncoding
623: * The target encoding (if false, use source encoding)
624: *
625: * @return string
626: * The resulting string
627: * @throws cInvalidArgumentException
628: */
629: public static function recodeString($string, $sourceEncoding, $targetEncoding) {
630: // If sourceEncoding and targetEncoding are the same, return
631: if (parent::toLowerCase($sourceEncoding) == parent::toLowerCase($targetEncoding)) {
632: return $string;
633: }
634:
635: // Check for the "recode" support
636: if (function_exists('recode')) {
637: $sResult = recode_string("$sourceEncoding..$targetEncoding", $string);
638: return $sResult;
639: }
640:
641: // Check for the "iconv" support
642: if (function_exists('iconv')) {
643: $sResult = iconv($sourceEncoding, $targetEncoding, $string);
644: return $sResult;
645: }
646:
647: // No charset converters found; return with warning
648: cWarning(__FILE__, __LINE__, 'cString::recodeString could not find either recode or iconv to do charset conversion.');
649: return $string;
650: }
651:
652: /**
653: * Removes or converts all "evil" URL characters.
654: *
655: * This function removes or converts all characters which can make an URL
656: * invalid.
657: *
658: * Clean characters include:
659: * - All characters between 32 and 126 which are not alphanumeric and
660: * aren't one of the following: _-.
661: *
662: * @param string $string
663: * The string to operate on
664: * @param bool $replace [optional]
665: * If true, all "unclean" characters are replaced
666: *
667: * @return string
668: * The resulting string
669: * @throws cInvalidArgumentException
670: */
671: public static function cleanURLCharacters($string, $replace = false) {
672: $string = self::replaceDiacritics($string);
673: $string = str_replace(' ', '-', $string);
674: $string = str_replace('/', '-', $string);
675: $string = str_replace('&', '-', $string);
676: $string = str_replace('+', '-', $string);
677:
678: $iStrLen = parent::getStringLength($string);
679:
680: $sResultString = '';
681:
682: for ($i = 0; $i < $iStrLen; $i++) {
683: $sChar = parent::getPartOfString($string, $i, 1);
684:
685: if (preg_match('/^[a-z0-9]*$/i', $sChar) || $sChar == '-' || $sChar == '_' || $sChar == '.') {
686: $sResultString .= $sChar;
687: } else {
688: if ($replace == true) {
689: $sResultString .= '_';
690: }
691: }
692: }
693:
694: return $sResultString;
695: }
696:
697: /**
698: * Normalizes line endings in passed string.
699: *
700: * @param string $string
701: * @param string $lineEnding [optional]
702: * Feasible values are "\n", "\r" or "\r\n"
703: * @return string
704: */
705: public static function normalizeLineEndings($string, $lineEnding = "\n") {
706: if ($lineEnding !== "\n" && $lineEnding !== "\r" && $lineEnding !== "\r\n") {
707: $lineEnding = "\n";
708: }
709:
710: $string = str_replace("\r\n", "\n", $string);
711: $string = str_replace("\r", "\n", $string);
712: if ($lineEnding !== "\n") {
713: $string = str_replace("\n", $lineEnding, $string);
714: }
715:
716: return $string;
717: }
718: }