File classes/class.string.php | CMS CONTENIDO 4.10.1

  1: <?php
  2: /**
  3:  * This file contains the string utility class.
  4:  *
  5:  * @package    Core
  6:  * @subpackage Util
  7:  * @author     Murat Purc <murat@purc.de>
  8:  * @copyright  four for business AG <www.4fb.de>
  9:  * @license    http://www.contenido.org/license/LIZENZ.txt
 10:  * @link       http://www.4fb.de
 11:  * @link       http://www.contenido.org
 12:  */
 13: 
 14: defined('CON_FRAMEWORK') || die('Illegal call: Missing framework initialization - request aborted.');
 15: 
 16: /**
 17:  * String helper class.
 18:  *
 19:  * @package Core
 20:  * @subpackage Util
 21:  */
 22: class cString extends cStringMultiByteWrapper {
 23: 
 24:     /**
 25:      * Replaces a string only once.
 26:      *
 27:      * Caution: This function only takes strings as parameters, not arrays!
 28:      *
 29:      * @param string $find
 30:      *         String to find
 31:      * @param string $replace
 32:      *         String to replace
 33:      * @param string $subject
 34:      *         String to process
 35:      * @return string
 36:      *         Processed string
 37:      */
 38:     public static function iReplaceOnce($find, $replace, $subject) {
 39:         $start = parent::findFirstPos(parent::toLowerCase($subject), parent::toLowerCase($find));
 40: 
 41:         if ($start === false) {
 42:             return $subject;
 43:         }
 44: 
 45:         $end = $start + parent::getStringLength($find);
 46:         $first = parent::getPartOfString($subject, 0, $start);
 47:         $last = parent::getPartOfString($subject, $end, parent::getStringLength($subject) - $end);
 48: 
 49:         $result = $first . $replace . $last;
 50: 
 51:         return $result;
 52:     }
 53: 
 54:     /**
 55:      * Replaces a string only once, in reverse direction.
 56:      *
 57:      * Caution: This function only takes strings as parameters, not arrays!
 58:      *
 59:      * @param string $find
 60:      *         String to find
 61:      * @param string $replace
 62:      *         String to replace
 63:      * @param string $subject
 64:      *         String to process
 65:      * @return string
 66:      *         Processed string
 67:      */
 68:     public static function iReplaceOnceReverse($find, $replace, $subject) {
 69:         $start = self::posReverse(parent::toLowerCase($subject), parent::toLowerCase($find));
 70: 
 71:         if ($start === false) {
 72:             return $subject;
 73:         }
 74: 
 75:         $end = $start + parent::getStringLength($find);
 76: 
 77:         $first = parent::getPartOfString($subject, 0, $start);
 78:         $last = parent::getPartOfString($subject, $end, parent::getStringLength($subject) - $end);
 79: 
 80:         $result = $first . $replace . $last;
 81: 
 82:         return $result;
 83:     }
 84: 
 85:     /**
 86:      * Finds a string position in reverse direction.
 87:      *
 88:      * NOTE: The original cString::findLastPos-function of PHP4 only finds a single character
 89:      * as needle.
 90:      *
 91:      * @param string $haystack
 92:      *         String to search in
 93:      * @param string $needle
 94:      *         String to search for
 95:      * @param int $start [optional]
 96:      *         Offset
 97:      * @return int
 98:      *         String position
 99:      */
100:     public static function posReverse($haystack, $needle, $start = 0) {
101:         $tempPos = parent::findFirstPos($haystack, $needle, $start);
102: 
103:         if ($tempPos === false) {
104:             if ($start == 0) {
105:                 // Needle not in string at all
106:                 return false;
107:             } else {
108:                 // No more occurances found
109:                 return $start - parent::getStringLength($needle);
110:             }
111:         } else {
112:             // Find the next occurance
113:             return self::posReverse($haystack, $needle, $tempPos + parent::getStringLength($needle));
114:         }
115:     }
116: 
117:     /**
118:      * Adds slashes to passed variable or array.
119:      *
120:      * @param string|array $value
121:      *         Either a string or a multi-dimensional array of values
122:      * @return string|array
123:      */
124:     public static function addSlashes($value) {
125:         $value = is_array($value) ? array_map(array('cString', 'addSlashes'), $value) : addslashes($value);
126:         return $value;
127:     }
128: 
129:     /**
130:      * Removes slashes from passed variable or array.
131:      *
132:      * @param string|array $value
133:      *         Either a string or a multi-dimensional array of values
134:      * @return string|array
135:      */
136:     public static function stripSlashes($value) {
137:         $value = is_array($value) ? array_map(array('cString', 'stripSlashes'), $value) : stripslashes($value);
138:         return $value;
139:     }
140: 
141:     /**
142:      * Checks if the string haystack ends with needle.
143:      *
144:      * @param string $haystack
145:      *         The string to check
146:      * @param string $needle
147:      *         The string with which it should end
148:      * @return bool
149:      */
150:     public static function endsWith($haystack, $needle) {
151:         $length = parent::getStringLength($needle);
152:         if ($length == 0) {
153:             return true;
154:         }
155: 
156:         return parent::getPartOfString($haystack, -$length) === $needle;
157:     }
158: 
159:     /**
160:      * Returns true if needle can be found in haystack.
161:      *
162:      * @param string $haystack
163:      *         String to be searched
164:      * @param string $needle
165:      *         String to search for
166:      * @return bool
167:      */
168:     public static function contains($haystack, $needle) {
169:         return !(parent::findFirstPos($haystack, $needle) === false);
170:     }
171: 
172:     /**
173:      * Implementation of PHP 5.3's strstr with beforeNeedle.
174:      *
175:      * @param string $haystack
176:      *         String to be searched
177:      * @param string $needle
178:      *         String to search for
179:      * @param bool $beforeNeedle [optional]
180:      *         If true, return everything BEFORE needle
181:      * @return string
182:      * @link http://php.net/manual/de/function.mb-strstr.php
183:      * @link http://php.net/manual/de/function.strstr.php
184:      */
185:     public static function strstr($haystack, $needle, $beforeNeedle = false) {
186: 
187:         if (!$beforeNeedle) {
188:             if (self::_functionExists('mb_strstr')) {
189:                 return mb_strstr($haystack, $needle);
190:             } else {
191:                 return strstr($haystack, $needle);
192:             }
193:         } else {
194:             return strtok($haystack, $needle);
195:         }
196:     }
197: 
198:     /**
199:      * This function checks if a given format is accepted by php's date function.
200:      *
201:      * @param string $format
202:      *         format according to date function specification
203:      * @return bool
204:      *         true if format is correct, false otherwise
205:      */
206:     public static function validateDateFormat($format) {
207:         // try to create a DateTime instance based on php's date function format specification
208:         // return true if date is valid (no wrong format)
209:         return false !== DateTime::createFromFormat($format, date($format, time()));
210:     }
211: 
212:     /**
213:      * Extract a number from a string.
214:      *
215:      * @param string $string
216:      *         String var by reference
217:      * @return string
218:      */
219:     public static function extractNumber(&$string) {
220:         $string = preg_replace('/[^0-9]/', '', $string);
221:         return $string;
222:     }
223: 
224: 
225:     /**
226:      * Returns whether a string is UTF-8 encoded or not.
227:      *
228:      * @param string $input
229:      * @return bool
230:      */
231:     public static function isUtf8($input) {
232:         $len = parent::getStringLength($input);
233: 
234:         for ($i = 0; $i < $len; $i++) {
235:             $char = ord($input[$i]);
236: 
237:             if ($char < 0x80) {
238:                 // ASCII char
239:                 continue;
240:             } else if (($char & 0xE0) === 0xC0 && $char > 0xC1) {
241:                 // 2 byte long char
242:                 $n = 1;
243:             } else if (($char & 0xF0) === 0xE0) {
244:                 // 3 byte long char
245:                 $n = 2;
246:             } else if (($char & 0xF8) === 0xF0 && $char < 0xF5) {
247:                 // 4 byte long char
248:                 $n = 3;
249:             } else {
250:                 return false;
251:             }
252: 
253:             for ($j = 0; $j < $n; $j++) {
254:                 $i++;
255: 
256:                 if ($i == $len || (ord($input[$i]) & 0xC0) !== 0x80) {
257:                     return false;
258:                 }
259:             }
260:         }
261:         return true;
262:     }
263: 
264: 
265:     /**
266:      * Checks if a value is alphanumeric.
267:      *
268:      * @param mixed $test
269:      *         Value to test
270:      * @param bool $umlauts [optional]
271:      *         Use german umlauts
272:      * @return bool
273:      *         Value is alphanumeric
274:      */
275:     public static function isAlphanumeric($test, $umlauts = true) {
276:         if ($umlauts == true) {
277:             $match = "/^[a-z0-9ÄäÖöÜüß ]+$/i";
278:         } else {
279:             $match = "/^[a-z0-9 ]+$/i";
280:         }
281: 
282:         return preg_match($match, $test);
283:     }
284: 
285:     /**
286:      * Trims a string to a given length and makes sure that all words up to
287:      * $maxlen are preserved, without exceeding $maxlen.
288:      *
289:      * Warning: Currently, this function uses a regular ASCII-Whitespace to do
290:      * the separation test. If you are using '&nbsp' to create spaces, this
291:      * function will fail.
292:      *
293:      * Example:
294:      * $string = "This is a simple test";
295:      * echo cString::trimAfterWord($string, 15);
296:      *
297:      * This would output "This is a", since this function respects word
298:      * boundaries and doesn't operate beyond the limit given by $maxlen.
299:      *
300:      * @param string $string
301:      *         The string to operate on
302:      * @param int $maxlen
303:      *         The maximum number of characters
304:      * @return string
305:      *         The resulting string
306:      */
307:     public static function trimAfterWord($string, $maxlen) {
308:         // If the string is smaller than the maximum lenght, it makes no sense to
309:         // process it any further. Return it.
310:         if (parent::getStringLength($string) < $maxlen) {
311:             return $string;
312:         }
313: 
314:         // If the character after the $maxlen position is a space, we can return
315:         // the string until $maxlen.
316:         if (parent::getPartOfString($string, $maxlen, 1) == ' ') {
317:             return parent::getPartOfString($string, 0, $maxlen);
318:         }
319: 
320:         // Cut the string up to $maxlen so we can use cString::findLastPos (reverse str position)
321:         $cutted_string = parent::getPartOfString($string, 0, $maxlen);
322: 
323:         // Extract the end of the last word
324:         $last_word_position = cString::findLastPos($cutted_string, ' ');
325: 
326:         return parent::getPartOfString($cutted_string, 0, $last_word_position);
327:     }
328: 
329:     /**
330:      * Trims a string to a specific length.
331:      *
332:      * If the string is longer than $maxlen, dots are inserted ("...") right
333:      * before $maxlen.
334:      *
335:      * Example:
336:      * $string = "This is a simple test";
337:      * echo cString::trimHard ($string, 15);
338:      *
339:      * This would output "This is a si...", since the string is longer than
340:      * $maxlen and the resulting string matches 15 characters including the dots.
341:      *
342:      * @param string $string
343:      *         The string to operate on
344:      * @param int $maxlen
345:      *         The maximum number of characters
346:      * @param string $fillup [optional]
347:      * @return string
348:      *         The resulting string
349:      */
350:     public static function trimHard($string, $maxlen, $fillup = '...') {
351:         // If the string is smaller than the maximum lenght, it makes no sense to
352:         // process it any further. Return it.
353:         if (parent::getStringLength($string) < $maxlen) {
354:             return $string;
355:         }
356: 
357:         // Calculate the maximum text length
358:         $maximum_text_length = $maxlen - parent::getStringLength($fillup);
359: 
360:         // If text length is over zero cut it
361:         if ($maximum_text_length > 0) {
362:             if (preg_match('/(*UTF8)^.{0,' . $maximum_text_length . '}/', $string, $result_array)) {
363:                 $cutted_string = $result_array[0];
364:             } else if (preg_match('/^.{0,' . $maximum_text_length . '}/u', $string, $result_array)) {
365:                 $cutted_string = $result_array[0];
366:             } else {
367:                 $cutted_string = parent::getPartOfString($string, 0, $maximum_text_length);
368:             }
369:         } else {
370:             $cutted_string = $string;
371:         }
372: 
373:         // Append the fillup string
374:         $cutted_string .= $fillup;
375: 
376:         return $cutted_string;
377:     }
378: 
379:     /**
380:      * Trims a string to a approximate length preserving sentence boundaries.
381:      *
382:      * The algorithm inside calculates the sentence length to the previous and
383:      * next sentences. The distance to the next sentence which is smaller will
384:      * be taken to trim the string to match the approximate length parameter.
385:      *
386:      * Example:
387:      *
388:      * $string = "This contains two sentences. ";
389:      * $string .= "Lets play around with them. ";
390:      *
391:      * echo cString::trimSentence($string, 40);
392:      * echo cString::trimSentence($string, 50);
393:      *
394:      * The first example would only output the first sentence, the second
395:      * example both sentences.
396:      *
397:      * Explanation:
398:      *
399:      * To match the given max length closely, the function calculates the
400:      * distance to the next and previous sentences. Using the maxlength of 40
401:      * characters, the distance to the previous sentence would be 8 characters,
402:      * and to the next sentence it would be 19 characters. Therefore, only the
403:      * previous sentence is displayed.
404:      *
405:      * The second example displays the second sentence also, since the distance
406:      * to the next sentence is only 9 characters, but to the previous it is 18
407:      * characters.
408:      *
409:      * If you specify the boolean flag "$hard", the limit parameter creates a
410:      * hard limit instead of calculating the distance.
411:      *
412:      * This function ensures that at least one sentence is returned.
413:      *
414:      * @param string $string
415:      *         The string to operate on
416:      * @param int $approxlen
417:      *         The approximate number of characters
418:      * @param bool $hard [optional]
419:      *         If true, use a hard limit for the number of characters
420:      * @return string
421:      *         The resulting string
422:      */
423:     public static function trimSentence($string, $approxlen, $hard = false) {
424:         // If the string is smaller than the maximum lenght, it makes no sense to
425:         // process it any further. Return it.
426:         if (parent::getStringLength($string) < $approxlen) {
427:             return $string;
428:         }
429: 
430:         // Find out the start of the next sentence
431:         $next_sentence_start = parent::findFirstPos($string, '.', $approxlen);
432: 
433:         // If there's no next sentence (somebody forgot the dot?), set it to the end
434:         // of the string.
435:         if ($next_sentence_start === false) {
436:             $next_sentence_start = parent::getStringLength($string);
437:         }
438: 
439:         // Cut the previous sentence so we can use cString::findLastPos
440:         $previous_sentence_cutted = parent::getPartOfString($string, 0, $approxlen);
441: 
442:         // Get out the previous sentence start
443:         $previous_sentence_start = cString::findLastPos($previous_sentence_cutted, '.');
444: 
445:         // If the sentence doesn't contain a dot, use the text start.
446:         if ($previous_sentence_start === false) {
447:             $previous_sentence_start = 0;
448:         }
449: 
450:         // If we have a hard limit, we only want to process everything before
451:         // $approxlen
452:         if (($hard == true) && ($next_sentence_start > $approxlen)) {
453:             return parent::getPartOfString($string, 0, $previous_sentence_start + 1);
454:         }
455: 
456:         // Calculate next and previous sentence distances
457:         $distance_previous_sentence = $approxlen - $previous_sentence_start;
458:         $distance_next_sentence = $next_sentence_start - $approxlen;
459: 
460:         // Sanity: Return at least one sentence.
461:         $sanity = parent::getPartOfString($string, 0, $previous_sentence_start + 1);
462: 
463:         if (parent::findFirstPos($sanity, '.') === false) {
464:             return parent::getPartOfString($string, 0, $next_sentence_start + 1);
465:         }
466: 
467:         // Decide wether the next or previous sentence is nearer
468:         if ($distance_previous_sentence > $distance_next_sentence) {
469:             return parent::getPartOfString($string, 0, $next_sentence_start + 1);
470:         } else {
471:             return parent::getPartOfString($string, 0, $previous_sentence_start + 1);
472:         }
473:     }
474: 
475:     /**
476:      * Converts diactritics to english characters whenever possible.
477:      *
478:      * For german umlauts, this function converts the umlauts to their ASCII
479:      * equivalents (e.g. ä => ae).
480:      *
481:      * For more information about diacritics, refer to
482:      * http://en.wikipedia.org/wiki/Diacritic
483:      *
484:      * For other languages, the diacritic marks are removed, if possible.
485:      *
486:      * @param string $string
487:      *                               The string to operate on
488:      * @param string $sourceEncoding [optional; default: UTF-8]
489:      *                               The source encoding
490:      * @param string $targetEncoding [optional; default: UTF-8]
491:      *                               The target encoding
492:      *
493:      * @return string
494:      *         The resulting string
495:      * @throws cInvalidArgumentException
496:      */
497:     public static function replaceDiacritics($string, $sourceEncoding = 'UTF-8', $targetEncoding = 'UTF-8') {
498:         if ($sourceEncoding != 'UTF-8') {
499:             $string = self::recodeString($string, $sourceEncoding, "UTF-8");
500:         }
501: 
502:         // replace regular german umlauts and other common characters with
503:         // diacritics
504:         static $search, $replace;
505:         if (!isset($search)) {
506:             $search = array(
507:                 'Ä',
508:                 'Ö',
509:                 'Ü',
510:                 'ä',
511:                 'ö',
512:                 'ü',
513:                 'ß',
514:                 'Á',
515:                 'À',
516:                 'Â',
517:                 'á',
518:                 'à',
519:                 'â',
520:                 'É',
521:                 'È',
522:                 'Ê',
523:                 'é',
524:                 'è',
525:                 'ê',
526:                 'Í',
527:                 'Ì',
528:                 'Î',
529:                 'í',
530:                 'ì',
531:                 'î',
532:                 'Ó',
533:                 'Ò',
534:                 'Ô',
535:                 'ó',
536:                 'ò',
537:                 'ô',
538:                 'Ú',
539:                 'Ù',
540:                 'Û',
541:                 'ú',
542:                 'ù',
543:                 'û'
544:             );
545:             $replace = array(
546:                 'Ae',
547:                 'Oe',
548:                 'Ue',
549:                 'ae',
550:                 'oe',
551:                 'ue',
552:                 'ss',
553:                 'A',
554:                 'A',
555:                 'A',
556:                 'a',
557:                 'a',
558:                 'a',
559:                 'E',
560:                 'E',
561:                 'E',
562:                 'e',
563:                 'e',
564:                 'e',
565:                 'I',
566:                 'I',
567:                 'I',
568:                 'i',
569:                 'i',
570:                 'i',
571:                 'O',
572:                 'O',
573:                 'O',
574:                 'o',
575:                 'o',
576:                 'o',
577:                 'U',
578:                 'U',
579:                 'U',
580:                 'u',
581:                 'u',
582:                 'u'
583:             );
584:         }
585:         $string = str_replace($search, $replace, $string);
586: 
587:         // TODO: Additional converting
588: 
589:         return self::recodeString($string, "UTF-8", $targetEncoding);
590:     }
591: 
592:     /**
593:      * Converts a string to another encoding.
594:      *
595:      * This function tries to detect which function to use (either recode or
596:      * iconv).
597:      *
598:      * If $sourceEncoding and $targetEncoding are the same, this function
599:      * returns immediately.
600:      *
601:      * For more information about encodings, refer to
602:      * http://en.wikipedia.org/wiki/Character_encoding
603:      *
604:      * For more information about the supported encodings in recode, refer to
605:      * http://www.delorie.com/gnu/docs/recode/recode_toc.html
606:      *
607:      * Note: depending on whether recode or iconv is used, the supported
608:      * charsets differ. The following ones are commonly used and are most likely
609:      * supported by both converters:
610:      *
611:      * - ISO-8859-1 to ISO-8859-15
612:      * - ASCII
613:      * - UTF-8
614:      *
615:      * @todo Check if the charset names are the same for both converters
616:      * @todo Implement a converter and charset checker to ensure compilance.
617:      *
618:      * @param string $string
619:      *         The string to operate on
620:      * @param string $sourceEncoding
621:      *         The source encoding
622:      * @param string $targetEncoding
623:      *         The target encoding (if false, use source encoding)
624:      *
625:      * @return string
626:      *         The resulting string
627:      * @throws cInvalidArgumentException
628:      */
629:     public static function recodeString($string, $sourceEncoding, $targetEncoding) {
630:         // If sourceEncoding and targetEncoding are the same, return
631:         if (parent::toLowerCase($sourceEncoding) == parent::toLowerCase($targetEncoding)) {
632:             return $string;
633:         }
634: 
635:         // Check for the "recode" support
636:         if (function_exists('recode')) {
637:             $sResult = recode_string("$sourceEncoding..$targetEncoding", $string);
638:             return $sResult;
639:         }
640: 
641:         // Check for the "iconv" support
642:         if (function_exists('iconv')) {
643:             $sResult = iconv($sourceEncoding, $targetEncoding, $string);
644:             return $sResult;
645:         }
646: 
647:         // No charset converters found; return with warning
648:         cWarning(__FILE__, __LINE__, 'cString::recodeString could not find either recode or iconv to do charset conversion.');
649:         return $string;
650:     }
651: 
652:     /**
653:      * Removes or converts all "evil" URL characters.
654:      *
655:      * This function removes or converts all characters which can make an URL
656:      * invalid.
657:      *
658:      * Clean characters include:
659:      * - All characters between 32 and 126 which are not alphanumeric and
660:      * aren't one of the following: _-.
661:      *
662:      * @param string $string
663:      *                        The string to operate on
664:      * @param bool   $replace [optional]
665:      *                        If true, all "unclean" characters are replaced
666:      *
667:      * @return string
668:      *         The resulting string
669:      * @throws cInvalidArgumentException
670:      */
671:     public static function cleanURLCharacters($string, $replace = false) {
672:         $string = self::replaceDiacritics($string);
673:         $string = str_replace(' ', '-', $string);
674:         $string = str_replace('/', '-', $string);
675:         $string = str_replace('&', '-', $string);
676:         $string = str_replace('+', '-', $string);
677: 
678:         $iStrLen = parent::getStringLength($string);
679: 
680:         $sResultString = '';
681: 
682:         for ($i = 0; $i < $iStrLen; $i++) {
683:             $sChar = parent::getPartOfString($string, $i, 1);
684: 
685:             if (preg_match('/^[a-z0-9]*$/i', $sChar) || $sChar == '-' || $sChar == '_' || $sChar == '.') {
686:                 $sResultString .= $sChar;
687:             } else {
688:                 if ($replace == true) {
689:                     $sResultString .= '_';
690:                 }
691:             }
692:         }
693: 
694:         return $sResultString;
695:     }
696: 
697:     /**
698:      * Normalizes line endings in passed string.
699:      *
700:      * @param string $string
701:      * @param string $lineEnding [optional]
702:      *         Feasible values are "\n", "\r" or "\r\n"
703:      * @return string
704:      */
705:     public static function normalizeLineEndings($string, $lineEnding = "\n") {
706:         if ($lineEnding !== "\n" && $lineEnding !== "\r" && $lineEnding !== "\r\n") {
707:             $lineEnding = "\n";
708:         }
709: 
710:         $string = str_replace("\r\n", "\n", $string);
711:         $string = str_replace("\r", "\n", $string);
712:         if ($lineEnding !== "\n") {
713:             $string = str_replace("\n", $lineEnding, $string);
714:         }
715: 
716:         return $string;
717:     }
718: }
Packages

Classes