File classes/class.string.php | CMS CONTENIDO 4.9.8

  1: <?php
  2: /**
  3:  * This file contains the string utility class.
  4:  *
  5:  * @package    Core
  6:  * @subpackage Util
  7:  * @version    SVN Revision $Rev:$
  8:  *
  9:  * @author     Murat Purc <murat@purc.de>
 10:  * @copyright  four for business AG <www.4fb.de>
 11:  * @license    http://www.contenido.org/license/LIZENZ.txt
 12:  * @link       http://www.4fb.de
 13:  * @link       http://www.contenido.org
 14:  */
 15: 
 16: defined('CON_FRAMEWORK') || die('Illegal call: Missing framework initialization - request aborted.');
 17: 
 18: /**
 19:  * String helper class.
 20:  *
 21:  * @package Core
 22:  * @subpackage Util
 23:  */
 24: class cString {
 25: 
 26:     /**
 27:      * Replaces a string only once
 28:      *
 29:      * Caution: This function only takes strings as parameters, not arrays!
 30:      *
 31:      * @param string $find
 32:      *         String to find
 33:      * @param string $replace
 34:      *         String to replace
 35:      * @param string $subject
 36:      *         String to process
 37:      * @return string
 38:      *         Processed string
 39:      */
 40:     public static function iReplaceOnce($find, $replace, $subject) {
 41:         $start = strpos(strtolower($subject), strtolower($find));
 42: 
 43:         if ($start === false) {
 44:             return $subject;
 45:         }
 46: 
 47:         $end = $start + strlen($find);
 48:         $first = substr($subject, 0, $start);
 49:         $last = substr($subject, $end, strlen($subject) - $end);
 50: 
 51:         $result = $first . $replace . $last;
 52: 
 53:         return $result;
 54:     }
 55: 
 56:     /**
 57:      * Replaces a string only once, in reverse direction
 58:      *
 59:      * Caution: This function only takes strings as parameters, not arrays!
 60:      *
 61:      * @param string $find
 62:      *         String to find
 63:      * @param string $replace
 64:      *         String to replace
 65:      * @param string $subject
 66:      *         String to process
 67:      * @return string
 68:      *         Processed string
 69:      */
 70:     public static function iReplaceOnceReverse($find, $replace, $subject) {
 71:         $start = self::posReverse(strtolower($subject), strtolower($find));
 72: 
 73:         if ($start === false) {
 74:             return $subject;
 75:         }
 76: 
 77:         $end = $start + strlen($find);
 78: 
 79:         $first = substr($subject, 0, $start);
 80:         $last = substr($subject, $end, strlen($subject) - $end);
 81: 
 82:         $result = $first . $replace . $last;
 83: 
 84:         return $result;
 85:     }
 86: 
 87:     /**
 88:      * Finds a string position in reverse direction
 89:      *
 90:      * NOTE: The original strrpos-Function of PHP4 only finds a single character as needle.
 91:      *
 92:      * @param string $haystack
 93:      *         String to search in
 94:      * @param string $needle
 95:      *         String to search for
 96:      * @param int $start [optional]
 97:      *         Offset
 98:      * @return string
 99:      *         Processed string
100:      */
101:     public static function posReverse($haystack, $needle, $start = 0) {
102:         $tempPos = strpos($haystack, $needle, $start);
103: 
104:         if ($tempPos === false) {
105:             if ($start == 0) {
106:                 // Needle not in string at all
107:                 return false;
108:             } else {
109:                 // No more occurances found
110:                 return $start - strlen($needle);
111:             }
112:         } else {
113:             // Find the next occurance
114:             return self::posReverse($haystack, $needle, $tempPos + strlen($needle));
115:         }
116:     }
117: 
118:     /**
119:      * Adds slashes to passed variable or array.
120:      *
121:      * @param string|array $value
122:      *         Either a string or a multi-dimensional array of values
123:      * @return string|array
124:      */
125:     public static function addSlashes($value) {
126:         $value = is_array($value) ? array_map(array('cString', 'addSlashes'), $value) : addslashes($value);
127:         return $value;
128:     }
129: 
130:     /**
131:      * Removes slashes from passed variable or array.
132:      *
133:      * @param string|array  $value
134:      *         Either a string or a multi-dimensional array of values
135:      * @return string|array
136:      */
137:     public static function stripSlashes($value) {
138:         $value = is_array($value) ? array_map(array('cString', 'stripSlashes'), $value) : stripslashes($value);
139:         return $value;
140:     }
141: 
142:     /**
143:      * Checks if the string haystack ends with needle
144:      *
145:      * @param string $haystack
146:      *         The string to check
147:      * @param string $needle
148:      *         The string with which it should end
149:      * @return bool
150:      */
151:     public static function endsWith($haystack, $needle) {
152:         $length = strlen($needle);
153:         if ($length == 0) {
154:             return true;
155:         }
156: 
157:         return substr($haystack, -$length) === $needle;
158:     }
159: 
160:     /**
161:      * Returns true if needle can be found in haystack
162:      *
163:      * @param string $haystack
164:      *         String to be searched
165:      * @param string $needle
166:      *         String to search for
167:      * @return bool
168:      */
169:     public static function contains($haystack, $needle) {
170:         return !(strpos($haystack, $needle) === false);
171:     }
172: 
173:     /**
174:      * Implementation of PHP 5.3's strstr with beforeNeedle
175:      *
176:      * @param string $haystack
177:      *         String to be searched
178:      * @param string $needle
179:      *         String to search for
180:      * @param string $beforeNeedle [optional]
181:      *         If true, return everything BEFORE needle
182:      * @return string
183:      * @link http://php.net/manual/de/function.strstr.php
184:      */
185:     public static function strstr($haystack, $needle, $beforeNeedle = false) {
186:         if (!$beforeNeedle) {
187:             return strstr($haystack, $needle);
188:         } else {
189:             return strtok($haystack, $needle);
190:         }
191:     }
192: 
193:     /**
194:      * This function checks if a given format is accepted by php's date function
195:      * @param string $format
196:      *         format according to date function specification
197:      * @return bool
198:      *         true if format is correct, false otherwise
199:      */
200:     public static function validateDateFormat($format) {
201:         // try to create a DateTime instance based on php's date function format specification
202:         // return true if date is valid (no wrong format)
203:         return false !== DateTime::createFromFormat($format, date($format, time()));
204:     }
205: 
206:     /**
207:      * Extract a number from a string
208:      *
209:      * @param string $string
210:      *         String var by reference
211:      * @return string
212:      */
213:     public static function extractNumber(&$string) {
214:         $string = preg_replace('/[^0-9]/', '', $string);
215:         return $string;
216:     }
217: 
218: 
219:     /**
220:      * Returns whether a string is UTF-8 encoded or not
221:      *
222:      * @param string $input
223:      * @return bool
224:      */
225:     public static function isUtf8($input) {
226:         $len = strlen($input);
227: 
228:         for ($i = 0; $i < $len; $i++) {
229:             $char = ord($input[$i]);
230:             $n = 0;
231: 
232:             if ($char < 0x80) { // ASCII char
233:                 continue;
234:             } else if (($char & 0xE0) === 0xC0 && $char > 0xC1) { // 2 byte long
235:                 // char
236:                 $n = 1;
237:             } else if (($char & 0xF0) === 0xE0) { // 3 byte long char
238:                 $n = 2;
239:             } else if (($char & 0xF8) === 0xF0 && $char < 0xF5) { // 4 byte long
240:                 // char
241:                 $n = 3;
242:             } else {
243:                 return false;
244:             }
245: 
246:             for ($j = 0; $j < $n; $j++) {
247:                 $i++;
248: 
249:                 if ($i == $len || (ord($input[$i]) & 0xC0) !== 0x80) {
250:                     return false;
251:                 }
252:             }
253:         }
254:         return true;
255:     }
256: 
257: 
258:     /**
259:      * Checks if a value is alphanumeric
260:      *
261:      * @param mixed $test
262:      *         Value to test
263:      * @param bool $umlauts [optional]
264:      *         Use german umlauts
265:      * @return bool
266:      *         Value is alphanumeric
267:      */
268:     public static function isAlphanumeric($test, $umlauts = true) {
269:         if ($umlauts == true) {
270:             $match = "/^[a-z0-9ÄäÖöÜüß ]+$/i";
271:         } else {
272:             $match = "/^[a-z0-9 ]+$/i";
273:         }
274: 
275:         return preg_match($match, $test);
276:     }
277: 
278:     /**
279:      * Trims a string to a given length and makes sure that all words up to $maxlen
280:      * are preserved, without exceeding $maxlen.
281:      *
282:      * Warning: Currently, this function uses a regular ASCII-Whitespace to do the
283:      * separation test. If you are using '&nbsp' to create spaces, this function
284:      * will fail.
285:      *
286:      * Example:
287:      * $string = "This is a simple test";
288:      * echo cString::trimAfterWord ($string, 15);
289:      *
290:      * This would output "This is a", since this function respects word boundaries
291:      * and doesn't operate beyond the limit given by $maxlen.
292:      *
293:      * @param string $string
294:      *         The string to operate on
295:      * @param int $maxlen
296:      *         The maximum number of characters
297:      * @return string
298:      *         The resulting string
299:      */
300:     public static function trimAfterWord($string, $maxlen) {
301:         // If the string is smaller than the maximum lenght, it makes no sense to
302:         // process it any further. Return it.
303:         if (strlen($string) < $maxlen) {
304:             return $string;
305:         }
306: 
307:         // If the character after the $maxlen position is a space, we can return
308:         // the string until $maxlen.
309:         if (substr($string, $maxlen, 1) == ' ') {
310:             return substr($string, 0, $maxlen);
311:         }
312: 
313:         // Cut the string up to $maxlen so we can use strrpos (reverse str position)
314:         $cutted_string = substr($string, 0, $maxlen);
315: 
316:         // Extract the end of the last word
317:         $last_word_position = strrpos($cutted_string, ' ');
318: 
319:         return substr($cutted_string, 0, $last_word_position);
320:     }
321: 
322:     /**
323:      * Trims a string to a specific length.
324:      * If the string is longer than $maxlen,
325:      * dots are inserted ("...") right before $maxlen.
326:      *
327:      * Example:
328:      * $string = "This is a simple test";
329:      * echo cString::trimHard ($string, 15);
330:      *
331:      * This would output "This is a si...", since the string is longer than $maxlen
332:      * and the resulting string matches 15 characters including the dots.
333:      *
334:      * @param string $string
335:      *         The string to operate on
336:      * @param int $maxlen
337:      *         The maximum number of characters
338:      * @param string $fillup [optional]
339:      * @return string
340:      *         The resulting string
341:      */
342:     public static function trimHard($string, $maxlen, $fillup = '...') {
343:         // If the string is smaller than the maximum lenght, it makes no sense to
344:         // process it any further. Return it.
345:         if (strlen($string) < $maxlen) {
346:             return $string;
347:         }
348: 
349:         // Calculate the maximum text length
350:         $maximum_text_length = $maxlen - strlen($fillup);
351: 
352:         // If text length is over zero cut it
353:         if ($maximum_text_length > 0) {
354:             if (preg_match('/(*UTF8)^.{0,' . $maximum_text_length . '}/', $string, $result_array)) {
355:                 $cutted_string = $result_array[0];
356:             } else if (preg_match('/^.{0,' . $maximum_text_length . '}/u', $string, $result_array)) {
357:                 $cutted_string = $result_array[0];
358:             } else {
359:                 $cutted_string = substr($string, 0, $maximum_text_length);
360:             }
361:         } else {
362:             $cutted_string = $string;
363:         }
364: 
365:         // Append the fillup string
366:         $cutted_string .= $fillup;
367: 
368:         return $cutted_string;
369:     }
370: 
371:     /**
372:      * Trims a string to a approximate length.
373:      * Sentence boundaries are preserved.
374:      *
375:      * The algorythm inside calculates the sentence length to the previous and next
376:      * sentences. The distance to the next sentence which is smaller will be taken
377:      * to
378:      * trim the string to match the approximate length parameter.
379:      *
380:      * Example:
381:      *
382:      * $string = "This contains two sentences. ";
383:      * $string .= "Lets play around with them. ";
384:      *
385:      * echo cString::trimSentence($string, 40);
386:      * echo cString::trimSentence($string, 50);
387:      *
388:      * The first example would only output the first sentence, the second example
389:      * both
390:      * sentences.
391:      *
392:      * Explanation:
393:      *
394:      * To match the given max length closely, the function calculates the distance
395:      * to
396:      * the next and previous sentences. Using the maxlength of 40 characters, the
397:      * distance to the previous sentence would be 8 characters, and to the next
398:      * sentence
399:      * it would be 19 characters. Therefore, only the previous sentence is
400:      * displayed.
401:      *
402:      * The second example displays the second sentence also, since the distance to
403:      * the
404:      * next sentence is only 9 characters, but to the previous it is 18 characters.
405:      *
406:      * If you specify the boolean flag "$hard", the limit parameter creates a hard
407:      * limit
408:      * instead of calculating the distance.
409:      *
410:      * This function ensures that at least one sentence is returned.
411:      *
412:      * @param string $string
413:      *         The string to operate on
414:      * @param int $approxlen
415:      *         The approximate number of characters
416:      * @param bool $hard [optional]
417:      *         If true, use a hard limit for the number of characters
418:      * @return string
419:      *         The resulting string
420:      */
421:     public static function trimSentence($string, $approxlen, $hard = false) {
422:         // If the string is smaller than the maximum lenght, it makes no sense to
423:         // process it any further. Return it.
424:         if (strlen($string) < $approxlen) {
425:             return $string;
426:         }
427: 
428:         // Find out the start of the next sentence
429:         $next_sentence_start = strpos($string, '.', $approxlen);
430: 
431:         // If there's no next sentence (somebody forgot the dot?), set it to the end
432:         // of the string.
433:         if ($next_sentence_start === false) {
434:             $next_sentence_start = strlen($string);
435:         }
436: 
437:         // Cut the previous sentence so we can use strrpos
438:         $previous_sentence_cutted = substr($string, 0, $approxlen);
439: 
440:         // Get out the previous sentence start
441:         $previous_sentence_start = strrpos($previous_sentence_cutted, '.');
442: 
443:         // If the sentence doesn't contain a dot, use the text start.
444:         if ($previous_sentence_start === false) {
445:             $previous_sentence_start = 0;
446:         }
447: 
448:         // If we have a hard limit, we only want to process everything before
449:         // $approxlen
450:         if (($hard == true) && ($next_sentence_start > $approxlen)) {
451:             return substr($string, 0, $previous_sentence_start + 1);
452:         }
453: 
454:         // Calculate next and previous sentence distances
455:         $distance_previous_sentence = $approxlen - $previous_sentence_start;
456:         $distance_next_sentence = $next_sentence_start - $approxlen;
457: 
458:         // Sanity: Return at least one sentence.
459:         $sanity = substr($string, 0, $previous_sentence_start + 1);
460: 
461:         if (strpos($sanity, '.') === false) {
462:             return substr($string, 0, $next_sentence_start + 1);
463:         }
464: 
465:         // Decide wether the next or previous sentence is nearer
466:         if ($distance_previous_sentence > $distance_next_sentence) {
467:             return substr($string, 0, $next_sentence_start + 1);
468:         } else {
469:             return substr($string, 0, $previous_sentence_start + 1);
470:         }
471:     }
472: 
473:     /**
474:      * Converts diactritics to english characters whenever
475:      * possible.
476:      *
477:      * For german umlauts, this function converts the umlauts to their ASCII
478:      * equalients (e.g. ä => ae).
479:      *
480:      * For more information about diacritics, refer to
481:      * http://en.wikipedia.org/wiki/Diacritic
482:      *
483:      * For other languages, the diacritic marks are removed, if possible.
484:      *
485:      * @param string $string
486:      *         The string to operate on
487:      * @param string $sourceEncoding [optional; default: UTF-8]
488:      *         The source encoding
489:      * @param string $targetEncoding [optional; default: UTF-8]
490:      *         The target encoding
491:      * @return string
492:      *         The resulting string
493:      */
494:     public static function replaceDiacritics($string, $sourceEncoding = 'UTF-8', $targetEncoding = 'UTF-8') {
495:         if ($sourceEncoding != 'UTF-8') {
496:             $string = self::recodeString($string, $sourceEncoding, "UTF-8");
497:         }
498: 
499:         // replace regular german umlauts and other common characters with
500:         // diacritics
501:         static $search, $replace;
502:         if (!isset($search)) {
503:             $search = array(
504:                     'Ä',
505:                     'Ö',
506:                     'Ü',
507:                     'ä',
508:                     'ö',
509:                     'ü',
510:                     'ß',
511:                     'Á',
512:                     'À',
513:                     'Â',
514:                     'á',
515:                     'à',
516:                     'â',
517:                     'É',
518:                     'È',
519:                     'Ê',
520:                     'é',
521:                     'è',
522:                     'ê',
523:                     'Í',
524:                     'Ì',
525:                     'Î',
526:                     'í',
527:                     'ì',
528:                     'î',
529:                     'Ó',
530:                     'Ò',
531:                     'Ô',
532:                     'ó',
533:                     'ò',
534:                     'ô',
535:                     'Ú',
536:                     'Ù',
537:                     'Û',
538:                     'ú',
539:                     'ù',
540:                     'û'
541:             );
542:             $replace = array(
543:                     'Ae',
544:                     'Oe',
545:                     'Ue',
546:                     'ae',
547:                     'oe',
548:                     'ue',
549:                     'ss',
550:                     'A',
551:                     'A',
552:                     'A',
553:                     'a',
554:                     'a',
555:                     'a',
556:                     'E',
557:                     'E',
558:                     'E',
559:                     'e',
560:                     'e',
561:                     'e',
562:                     'I',
563:                     'I',
564:                     'I',
565:                     'i',
566:                     'i',
567:                     'i',
568:                     'O',
569:                     'O',
570:                     'O',
571:                     'o',
572:                     'o',
573:                     'o',
574:                     'U',
575:                     'U',
576:                     'U',
577:                     'u',
578:                     'u',
579:                     'u'
580:             );
581:         }
582:         $string = str_replace($search, $replace, $string);
583: 
584:         // TODO: Additional converting
585: 
586:         return self::recodeString($string, "UTF-8", $targetEncoding);
587:     }
588: 
589:     /**
590:      * Converts a string to another encoding.
591:      * This function tries to detect which function
592:      * to use (either recode or iconv).
593:      *
594:      * If $sourceEncoding and $targetEncoding are the same, this function returns
595:      * immediately.
596:      *
597:      * For more information about encodings, refer to
598:      * http://en.wikipedia.org/wiki/Character_encoding
599:      *
600:      * For more information about the supported encodings in recode, refer to
601:      * http://www.delorie.com/gnu/docs/recode/recode_toc.html
602:      *
603:      * Note: depending on whether recode or iconv is used, the supported charsets
604:      * differ. The following ones are commonly used and are most likely supported by
605:      * both converters:
606:      *
607:      * - ISO-8859-1 to ISO-8859-15
608:      * - ASCII
609:      * - UTF-8
610:      *
611:      * @todo Check if the charset names are the same for both converters
612:      * @todo Implement a converter and charset checker to ensure compilance.
613:      *
614:      * @param string $string
615:      *         The string to operate on
616:      * @param string $sourceEncoding
617:      *         The source encoding (default: ISO-8859-1)
618:      * @param string $targetEncoding
619:      *         The target encoding (if false, use source encoding)
620:      * @return string
621:      *         The resulting string
622:      */
623:     public static function recodeString($string, $sourceEncoding, $targetEncoding) {
624:         // If sourceEncoding and targetEncoding are the same, return
625:         if ($sourceEncoding == $targetEncoding) {
626:             return $string;
627:         }
628: 
629:         // Check for the "recode" support
630:         if (function_exists('recode')) {
631:             $sResult = recode_string("$sourceEncoding..$targetEncoding", $string);
632:             return $sResult;
633:         }
634: 
635:         // Check for the "iconv" support
636:         if (function_exists('iconv')) {
637:             $sResult = iconv($sourceEncoding, $targetEncoding, $string);
638:             return $sResult;
639:         }
640: 
641:         // No charset converters found; return with warning
642:         cWarning(__FILE__, __LINE__, 'cString::recodeString could not find either recode or iconv to do charset conversion.');
643:         return $string;
644:     }
645: 
646:     /**
647:      * Removes or converts all "evil" URL characters.
648:      * This function removes or converts
649:      * all characters which can make an URL invalid.
650:      *
651:      * Clean characters include:
652:      * - All characters between 32 and 126 which are not alphanumeric and
653:      * aren't one of the following: _-.
654:      *
655:      * @param string $string
656:      *         The string to operate on
657:      * @param bool $replace [optional]
658:      *         If true, all "unclean" characters are replaced
659:      * @return string
660:      *         The resulting string
661:      */
662:     public static function cleanURLCharacters($string, $replace = false) {
663:         $string = self::replaceDiacritics($string);
664:         $string = str_replace(' ', '-', $string);
665:         $string = str_replace('/', '-', $string);
666:         $string = str_replace('&', '-', $string);
667:         $string = str_replace('+', '-', $string);
668: 
669:         $iStrLen = strlen($string);
670: 
671:         $sResultString = '';
672: 
673:         for ($i = 0; $i < $iStrLen; $i++) {
674:             $sChar = substr($string, $i, 1);
675: 
676:             if (preg_match('/^[a-z0-9]*$/i', $sChar) || $sChar == '-' || $sChar == '_' || $sChar == '.') {
677:                 $sResultString .= $sChar;
678:             } else {
679:                 if ($replace == true) {
680:                     $sResultString .= '_';
681:                 }
682:             }
683:         }
684: 
685:         return $sResultString;
686:     }
687: 
688:     /**
689:      * Normalizes line endings in passed string.
690:      *
691:      * @param string $string
692:      * @param string $lineEnding [optional]
693:      *         Feasible values are "\n", "\r" or "\r\n"
694:      * @return string
695:      */
696:     public static function normalizeLineEndings($string, $lineEnding = "\n") {
697:         if ($lineEnding !== "\n" && $lineEnding !== "\r" && $lineEnding !== "\r\n") {
698:             $lineEnding = "\n";
699:         }
700: 
701:         $string = str_replace("\r\n", "\n", $string);
702:         $string = str_replace("\r", "\n", $string);
703:         if ($lineEnding !== "\n") {
704:             $string = str_replace("\n", $lineEnding, $string);
705:         }
706: 
707:         return $string;
708:     }
709: }
710:
Packages

Classes