File classes/class.string.php | CMS CONTENIDO 4.9.11

  1: <?php
  2: /**
  3:  * This file contains the string utility class.
  4:  *
  5:  * @package    Core
  6:  * @subpackage Util
  7:  * @author     Murat Purc <murat@purc.de>
  8:  * @copyright  four for business AG <www.4fb.de>
  9:  * @license    http://www.contenido.org/license/LIZENZ.txt
 10:  * @link       http://www.4fb.de
 11:  * @link       http://www.contenido.org
 12:  */
 13: 
 14: defined('CON_FRAMEWORK') || die('Illegal call: Missing framework initialization - request aborted.');
 15: 
 16: /**
 17:  * String helper class.
 18:  *
 19:  * @package Core
 20:  * @subpackage Util
 21:  */
 22: class cString {
 23: 
 24:     /**
 25:      * Replaces a string only once.
 26:      *
 27:      * Caution: This function only takes strings as parameters, not arrays!
 28:      *
 29:      * @param string $find
 30:      *         String to find
 31:      * @param string $replace
 32:      *         String to replace
 33:      * @param string $subject
 34:      *         String to process
 35:      * @return string
 36:      *         Processed string
 37:      */
 38:     public static function iReplaceOnce($find, $replace, $subject) {
 39:         $start = strpos(strtolower($subject), strtolower($find));
 40: 
 41:         if ($start === false) {
 42:             return $subject;
 43:         }
 44: 
 45:         $end = $start + strlen($find);
 46:         $first = substr($subject, 0, $start);
 47:         $last = substr($subject, $end, strlen($subject) - $end);
 48: 
 49:         $result = $first . $replace . $last;
 50: 
 51:         return $result;
 52:     }
 53: 
 54:     /**
 55:      * Replaces a string only once, in reverse direction.
 56:      *
 57:      * Caution: This function only takes strings as parameters, not arrays!
 58:      *
 59:      * @param string $find
 60:      *         String to find
 61:      * @param string $replace
 62:      *         String to replace
 63:      * @param string $subject
 64:      *         String to process
 65:      * @return string
 66:      *         Processed string
 67:      */
 68:     public static function iReplaceOnceReverse($find, $replace, $subject) {
 69:         $start = self::posReverse(strtolower($subject), strtolower($find));
 70: 
 71:         if ($start === false) {
 72:             return $subject;
 73:         }
 74: 
 75:         $end = $start + strlen($find);
 76: 
 77:         $first = substr($subject, 0, $start);
 78:         $last = substr($subject, $end, strlen($subject) - $end);
 79: 
 80:         $result = $first . $replace . $last;
 81: 
 82:         return $result;
 83:     }
 84: 
 85:     /**
 86:      * Finds a string position in reverse direction.
 87:      *
 88:      * NOTE: The original strrpos-function of PHP4 only finds a single character
 89:      * as needle.
 90:      *
 91:      * @param string $haystack
 92:      *         String to search in
 93:      * @param string $needle
 94:      *         String to search for
 95:      * @param int $start [optional]
 96:      *         Offset
 97:      * @return int
 98:      *         String position
 99:      */
100:     public static function posReverse($haystack, $needle, $start = 0) {
101:         $tempPos = strpos($haystack, $needle, $start);
102: 
103:         if ($tempPos === false) {
104:             if ($start == 0) {
105:                 // Needle not in string at all
106:                 return false;
107:             } else {
108:                 // No more occurances found
109:                 return $start - strlen($needle);
110:             }
111:         } else {
112:             // Find the next occurance
113:             return self::posReverse($haystack, $needle, $tempPos + strlen($needle));
114:         }
115:     }
116: 
117:     /**
118:      * Adds slashes to passed variable or array.
119:      *
120:      * @param string|array $value
121:      *         Either a string or a multi-dimensional array of values
122:      * @return string|array
123:      */
124:     public static function addSlashes($value) {
125:         $value = is_array($value) ? array_map(array('cString', 'addSlashes'), $value) : addslashes($value);
126:         return $value;
127:     }
128: 
129:     /**
130:      * Removes slashes from passed variable or array.
131:      *
132:      * @param string|array  $value
133:      *         Either a string or a multi-dimensional array of values
134:      * @return string|array
135:      */
136:     public static function stripSlashes($value) {
137:         $value = is_array($value) ? array_map(array('cString', 'stripSlashes'), $value) : stripslashes($value);
138:         return $value;
139:     }
140: 
141:     /**
142:      * Checks if the string haystack ends with needle.
143:      *
144:      * @param string $haystack
145:      *         The string to check
146:      * @param string $needle
147:      *         The string with which it should end
148:      * @return bool
149:      */
150:     public static function endsWith($haystack, $needle) {
151:         $length = strlen($needle);
152:         if ($length == 0) {
153:             return true;
154:         }
155: 
156:         return substr($haystack, -$length) === $needle;
157:     }
158: 
159:     /**
160:      * Returns true if needle can be found in haystack.
161:      *
162:      * @param string $haystack
163:      *         String to be searched
164:      * @param string $needle
165:      *         String to search for
166:      * @return bool
167:      */
168:     public static function contains($haystack, $needle) {
169:         return !(strpos($haystack, $needle) === false);
170:     }
171: 
172:     /**
173:      * Implementation of PHP 5.3's strstr with beforeNeedle.
174:      *
175:      * @param string $haystack
176:      *         String to be searched
177:      * @param string $needle
178:      *         String to search for
179:      * @param string $beforeNeedle [optional]
180:      *         If true, return everything BEFORE needle
181:      * @return string
182:      * @link http://php.net/manual/de/function.strstr.php
183:      */
184:     public static function strstr($haystack, $needle, $beforeNeedle = false) {
185:         if (!$beforeNeedle) {
186:             return strstr($haystack, $needle);
187:         } else {
188:             return strtok($haystack, $needle);
189:         }
190:     }
191: 
192:     /**
193:      * This function checks if a given format is accepted by php's date function.
194:      *
195:      * @param string $format
196:      *         format according to date function specification
197:      * @return bool
198:      *         true if format is correct, false otherwise
199:      */
200:     public static function validateDateFormat($format) {
201:         // try to create a DateTime instance based on php's date function format specification
202:         // return true if date is valid (no wrong format)
203:         return false !== DateTime::createFromFormat($format, date($format, time()));
204:     }
205: 
206:     /**
207:      * Extract a number from a string.
208:      *
209:      * @param string $string
210:      *         String var by reference
211:      * @return string
212:      */
213:     public static function extractNumber(&$string) {
214:         $string = preg_replace('/[^0-9]/', '', $string);
215:         return $string;
216:     }
217: 
218: 
219:     /**
220:      * Returns whether a string is UTF-8 encoded or not.
221:      *
222:      * @param string $input
223:      * @return bool
224:      */
225:     public static function isUtf8($input) {
226:         $len = strlen($input);
227: 
228:         for ($i = 0; $i < $len; $i++) {
229:             $char = ord($input[$i]);
230:             $n = 0;
231: 
232:             if ($char < 0x80) {
233:                 // ASCII char
234:                 continue;
235:             } else if (($char & 0xE0) === 0xC0 && $char > 0xC1) {
236:                 // 2 byte long char
237:                 $n = 1;
238:             } else if (($char & 0xF0) === 0xE0) {
239:                 // 3 byte long char
240:                 $n = 2;
241:             } else if (($char & 0xF8) === 0xF0 && $char < 0xF5) {
242:                 // 4 byte long char
243:                 $n = 3;
244:             } else {
245:                 return false;
246:             }
247: 
248:             for ($j = 0; $j < $n; $j++) {
249:                 $i++;
250: 
251:                 if ($i == $len || (ord($input[$i]) & 0xC0) !== 0x80) {
252:                     return false;
253:                 }
254:             }
255:         }
256:         return true;
257:     }
258: 
259: 
260:     /**
261:      * Checks if a value is alphanumeric.
262:      *
263:      * @param mixed $test
264:      *         Value to test
265:      * @param bool $umlauts [optional]
266:      *         Use german umlauts
267:      * @return bool
268:      *         Value is alphanumeric
269:      */
270:     public static function isAlphanumeric($test, $umlauts = true) {
271:         if ($umlauts == true) {
272:             $match = "/^[a-z0-9ÄäÖöÜüß ]+$/i";
273:         } else {
274:             $match = "/^[a-z0-9 ]+$/i";
275:         }
276: 
277:         return preg_match($match, $test);
278:     }
279: 
280:     /**
281:      * Trims a string to a given length and makes sure that all words up to
282:      * $maxlen are preserved, without exceeding $maxlen.
283:      *
284:      * Warning: Currently, this function uses a regular ASCII-Whitespace to do
285:      * the separation test. If you are using '&nbsp' to create spaces, this
286:      * function will fail.
287:      *
288:      * Example:
289:      * $string = "This is a simple test";
290:      * echo cString::trimAfterWord($string, 15);
291:      *
292:      * This would output "This is a", since this function respects word
293:      * boundaries and doesn't operate beyond the limit given by $maxlen.
294:      *
295:      * @param string $string
296:      *         The string to operate on
297:      * @param int $maxlen
298:      *         The maximum number of characters
299:      * @return string
300:      *         The resulting string
301:      */
302:     public static function trimAfterWord($string, $maxlen) {
303:         // If the string is smaller than the maximum lenght, it makes no sense to
304:         // process it any further. Return it.
305:         if (strlen($string) < $maxlen) {
306:             return $string;
307:         }
308: 
309:         // If the character after the $maxlen position is a space, we can return
310:         // the string until $maxlen.
311:         if (substr($string, $maxlen, 1) == ' ') {
312:             return substr($string, 0, $maxlen);
313:         }
314: 
315:         // Cut the string up to $maxlen so we can use strrpos (reverse str position)
316:         $cutted_string = substr($string, 0, $maxlen);
317: 
318:         // Extract the end of the last word
319:         $last_word_position = strrpos($cutted_string, ' ');
320: 
321:         return substr($cutted_string, 0, $last_word_position);
322:     }
323: 
324:     /**
325:      * Trims a string to a specific length.
326:      *
327:      * If the string is longer than $maxlen, dots are inserted ("...") right
328:      * before $maxlen.
329:      *
330:      * Example:
331:      * $string = "This is a simple test";
332:      * echo cString::trimHard ($string, 15);
333:      *
334:      * This would output "This is a si...", since the string is longer than
335:      * $maxlen and the resulting string matches 15 characters including the dots.
336:      *
337:      * @param string $string
338:      *         The string to operate on
339:      * @param int $maxlen
340:      *         The maximum number of characters
341:      * @param string $fillup [optional]
342:      * @return string
343:      *         The resulting string
344:      */
345:     public static function trimHard($string, $maxlen, $fillup = '...') {
346:         // If the string is smaller than the maximum lenght, it makes no sense to
347:         // process it any further. Return it.
348:         if (strlen($string) < $maxlen) {
349:             return $string;
350:         }
351: 
352:         // Calculate the maximum text length
353:         $maximum_text_length = $maxlen - strlen($fillup);
354: 
355:         // If text length is over zero cut it
356:         if ($maximum_text_length > 0) {
357:             if (preg_match('/(*UTF8)^.{0,' . $maximum_text_length . '}/', $string, $result_array)) {
358:                 $cutted_string = $result_array[0];
359:             } else if (preg_match('/^.{0,' . $maximum_text_length . '}/u', $string, $result_array)) {
360:                 $cutted_string = $result_array[0];
361:             } else {
362:                 $cutted_string = substr($string, 0, $maximum_text_length);
363:             }
364:         } else {
365:             $cutted_string = $string;
366:         }
367: 
368:         // Append the fillup string
369:         $cutted_string .= $fillup;
370: 
371:         return $cutted_string;
372:     }
373: 
374:     /**
375:      * Trims a string to a approximate length preserving sentence boundaries.
376:      *
377:      * The algorithm inside calculates the sentence length to the previous and
378:      * next sentences. The distance to the next sentence which is smaller will
379:      * be taken to trim the string to match the approximate length parameter.
380:      *
381:      * Example:
382:      *
383:      * $string = "This contains two sentences. ";
384:      * $string .= "Lets play around with them. ";
385:      *
386:      * echo cString::trimSentence($string, 40);
387:      * echo cString::trimSentence($string, 50);
388:      *
389:      * The first example would only output the first sentence, the second
390:      * example both sentences.
391:      *
392:      * Explanation:
393:      *
394:      * To match the given max length closely, the function calculates the
395:      * distance to the next and previous sentences. Using the maxlength of 40
396:      * characters, the distance to the previous sentence would be 8 characters,
397:      * and to the next sentence it would be 19 characters. Therefore, only the
398:      * previous sentence is displayed.
399:      *
400:      * The second example displays the second sentence also, since the distance
401:      * to the next sentence is only 9 characters, but to the previous it is 18
402:      * characters.
403:      *
404:      * If you specify the boolean flag "$hard", the limit parameter creates a
405:      * hard limit instead of calculating the distance.
406:      *
407:      * This function ensures that at least one sentence is returned.
408:      *
409:      * @param string $string
410:      *         The string to operate on
411:      * @param int $approxlen
412:      *         The approximate number of characters
413:      * @param bool $hard [optional]
414:      *         If true, use a hard limit for the number of characters
415:      * @return string
416:      *         The resulting string
417:      */
418:     public static function trimSentence($string, $approxlen, $hard = false) {
419:         // If the string is smaller than the maximum lenght, it makes no sense to
420:         // process it any further. Return it.
421:         if (strlen($string) < $approxlen) {
422:             return $string;
423:         }
424: 
425:         // Find out the start of the next sentence
426:         $next_sentence_start = strpos($string, '.', $approxlen);
427: 
428:         // If there's no next sentence (somebody forgot the dot?), set it to the end
429:         // of the string.
430:         if ($next_sentence_start === false) {
431:             $next_sentence_start = strlen($string);
432:         }
433: 
434:         // Cut the previous sentence so we can use strrpos
435:         $previous_sentence_cutted = substr($string, 0, $approxlen);
436: 
437:         // Get out the previous sentence start
438:         $previous_sentence_start = strrpos($previous_sentence_cutted, '.');
439: 
440:         // If the sentence doesn't contain a dot, use the text start.
441:         if ($previous_sentence_start === false) {
442:             $previous_sentence_start = 0;
443:         }
444: 
445:         // If we have a hard limit, we only want to process everything before
446:         // $approxlen
447:         if (($hard == true) && ($next_sentence_start > $approxlen)) {
448:             return substr($string, 0, $previous_sentence_start + 1);
449:         }
450: 
451:         // Calculate next and previous sentence distances
452:         $distance_previous_sentence = $approxlen - $previous_sentence_start;
453:         $distance_next_sentence = $next_sentence_start - $approxlen;
454: 
455:         // Sanity: Return at least one sentence.
456:         $sanity = substr($string, 0, $previous_sentence_start + 1);
457: 
458:         if (strpos($sanity, '.') === false) {
459:             return substr($string, 0, $next_sentence_start + 1);
460:         }
461: 
462:         // Decide wether the next or previous sentence is nearer
463:         if ($distance_previous_sentence > $distance_next_sentence) {
464:             return substr($string, 0, $next_sentence_start + 1);
465:         } else {
466:             return substr($string, 0, $previous_sentence_start + 1);
467:         }
468:     }
469: 
470:     /**
471:      * Converts diactritics to english characters whenever possible.
472:      *
473:      * For german umlauts, this function converts the umlauts to their ASCII
474:      * equivalents (e.g. ä => ae).
475:      *
476:      * For more information about diacritics, refer to
477:      * http://en.wikipedia.org/wiki/Diacritic
478:      *
479:      * For other languages, the diacritic marks are removed, if possible.
480:      *
481:      * @param string $string
482:      *         The string to operate on
483:      * @param string $sourceEncoding [optional; default: UTF-8]
484:      *         The source encoding
485:      * @param string $targetEncoding [optional; default: UTF-8]
486:      *         The target encoding
487:      * @return string
488:      *         The resulting string
489:      */
490:     public static function replaceDiacritics($string, $sourceEncoding = 'UTF-8', $targetEncoding = 'UTF-8') {
491:         if ($sourceEncoding != 'UTF-8') {
492:             $string = self::recodeString($string, $sourceEncoding, "UTF-8");
493:         }
494: 
495:         // replace regular german umlauts and other common characters with
496:         // diacritics
497:         static $search, $replace;
498:         if (!isset($search)) {
499:             $search = array(
500:                     'Ä',
501:                     'Ö',
502:                     'Ü',
503:                     'ä',
504:                     'ö',
505:                     'ü',
506:                     'ß',
507:                     'Á',
508:                     'À',
509:                     'Â',
510:                     'á',
511:                     'à',
512:                     'â',
513:                     'É',
514:                     'È',
515:                     'Ê',
516:                     'é',
517:                     'è',
518:                     'ê',
519:                     'Í',
520:                     'Ì',
521:                     'Î',
522:                     'í',
523:                     'ì',
524:                     'î',
525:                     'Ó',
526:                     'Ò',
527:                     'Ô',
528:                     'ó',
529:                     'ò',
530:                     'ô',
531:                     'Ú',
532:                     'Ù',
533:                     'Û',
534:                     'ú',
535:                     'ù',
536:                     'û'
537:             );
538:             $replace = array(
539:                     'Ae',
540:                     'Oe',
541:                     'Ue',
542:                     'ae',
543:                     'oe',
544:                     'ue',
545:                     'ss',
546:                     'A',
547:                     'A',
548:                     'A',
549:                     'a',
550:                     'a',
551:                     'a',
552:                     'E',
553:                     'E',
554:                     'E',
555:                     'e',
556:                     'e',
557:                     'e',
558:                     'I',
559:                     'I',
560:                     'I',
561:                     'i',
562:                     'i',
563:                     'i',
564:                     'O',
565:                     'O',
566:                     'O',
567:                     'o',
568:                     'o',
569:                     'o',
570:                     'U',
571:                     'U',
572:                     'U',
573:                     'u',
574:                     'u',
575:                     'u'
576:             );
577:         }
578:         $string = str_replace($search, $replace, $string);
579: 
580:         // TODO: Additional converting
581: 
582:         return self::recodeString($string, "UTF-8", $targetEncoding);
583:     }
584: 
585:     /**
586:      * Converts a string to another encoding.
587:      *
588:      * This function tries to detect which function to use (either recode or
589:      * iconv).
590:      *
591:      * If $sourceEncoding and $targetEncoding are the same, this function
592:      * returns immediately.
593:      *
594:      * For more information about encodings, refer to
595:      * http://en.wikipedia.org/wiki/Character_encoding
596:      *
597:      * For more information about the supported encodings in recode, refer to
598:      * http://www.delorie.com/gnu/docs/recode/recode_toc.html
599:      *
600:      * Note: depending on whether recode or iconv is used, the supported
601:      * charsets differ. The following ones are commonly used and are most likely
602:      * supported by both converters:
603:      *
604:      * - ISO-8859-1 to ISO-8859-15
605:      * - ASCII
606:      * - UTF-8
607:      *
608:      * @todo Check if the charset names are the same for both converters
609:      * @todo Implement a converter and charset checker to ensure compilance.
610:      * @param string $string
611:      *         The string to operate on
612:      * @param string $sourceEncoding
613:      *         The source encoding
614:      * @param string $targetEncoding
615:      *         The target encoding (if false, use source encoding)
616:      * @return string
617:      *         The resulting string
618:      */
619:     public static function recodeString($string, $sourceEncoding, $targetEncoding) {
620:         // If sourceEncoding and targetEncoding are the same, return
621:         if (strtolower($sourceEncoding) == strtolower($targetEncoding)) {
622:             return $string;
623:         }
624: 
625:         // Check for the "recode" support
626:         if (function_exists('recode')) {
627:             $sResult = recode_string("$sourceEncoding..$targetEncoding", $string);
628:             return $sResult;
629:         }
630: 
631:         // Check for the "iconv" support
632:         if (function_exists('iconv')) {
633:             $sResult = iconv($sourceEncoding, $targetEncoding, $string);
634:             return $sResult;
635:         }
636: 
637:         // No charset converters found; return with warning
638:         cWarning(__FILE__, __LINE__, 'cString::recodeString could not find either recode or iconv to do charset conversion.');
639:         return $string;
640:     }
641: 
642:     /**
643:      * Removes or converts all "evil" URL characters.
644:      *
645:      * This function removes or converts all characters which can make an URL
646:      * invalid.
647:      *
648:      * Clean characters include:
649:      * - All characters between 32 and 126 which are not alphanumeric and
650:      * aren't one of the following: _-.
651:      *
652:      * @param string $string
653:      *         The string to operate on
654:      * @param bool $replace [optional]
655:      *         If true, all "unclean" characters are replaced
656:      * @return string
657:      *         The resulting string
658:      */
659:     public static function cleanURLCharacters($string, $replace = false) {
660:         $string = self::replaceDiacritics($string);
661:         $string = str_replace(' ', '-', $string);
662:         $string = str_replace('/', '-', $string);
663:         $string = str_replace('&', '-', $string);
664:         $string = str_replace('+', '-', $string);
665: 
666:         $iStrLen = strlen($string);
667: 
668:         $sResultString = '';
669: 
670:         for ($i = 0; $i < $iStrLen; $i++) {
671:             $sChar = substr($string, $i, 1);
672: 
673:             if (preg_match('/^[a-z0-9]*$/i', $sChar) || $sChar == '-' || $sChar == '_' || $sChar == '.') {
674:                 $sResultString .= $sChar;
675:             } else {
676:                 if ($replace == true) {
677:                     $sResultString .= '_';
678:                 }
679:             }
680:         }
681: 
682:         return $sResultString;
683:     }
684: 
685:     /**
686:      * Normalizes line endings in passed string.
687:      *
688:      * @param string $string
689:      * @param string $lineEnding [optional]
690:      *         Feasible values are "\n", "\r" or "\r\n"
691:      * @return string
692:      */
693:     public static function normalizeLineEndings($string, $lineEnding = "\n") {
694:         if ($lineEnding !== "\n" && $lineEnding !== "\r" && $lineEnding !== "\r\n") {
695:             $lineEnding = "\n";
696:         }
697: 
698:         $string = str_replace("\r\n", "\n", $string);
699:         $string = str_replace("\r", "\n", $string);
700:         if ($lineEnding !== "\n") {
701:             $string = str_replace("\n", $lineEnding, $string);
702:         }
703: 
704:         return $string;
705:     }
706: }
707:
Packages

Classes