File includes/functions.api.string.php

  1: <?php
  2: /**
  3:  * This file contains CONTENIDO String API functions.
  4:  *
  5:  * If you are planning to add a function, please make sure that:
  6:  * 1.) The function is in the correct place
  7:  * 2.) The function is documented
  8:  * 3.) The function makes sense and is generically usable
  9:  *
 10:  * @package Core
 11:  * @subpackage Backend
 12:  * @version SVN Revision $Rev:$
 13:  *
 14:  * @author Timo Hummel
 15:  * @copyright four for business AG <www.4fb.de>
 16:  * @license http://www.contenido.org/license/LIZENZ.txt
 17:  * @link http://www.4fb.de
 18:  * @link http://www.contenido.org
 19:  */
 20: 
 21: defined('CON_FRAMEWORK') || die('Illegal call: Missing framework initialization - request aborted.');
 22: 
 23: /**
 24:  * Trims a string to a given length and makes sure that all words up to $maxlen
 25:  * are preserved, without exceeding $maxlen.
 26:  *
 27:  * Warning: Currently, this function uses a regular ASCII-Whitespace to do the
 28:  * separation test. If you are using '&nbsp' to create spaces, this function
 29:  * will fail.
 30:  *
 31:  * Example:
 32:  * $string = "This is a simple test";
 33:  * echo cApiStrTrimAfterWord ($string, 15);
 34:  *
 35:  * This would output "This is a", since this function respects word boundaries
 36:  * and doesn't operate beyond the limit given by $maxlen.
 37:  *
 38:  * @param string $string The string to operate on
 39:  * @param int $maxlen The maximum number of characters
 40:  * @return string The resulting string
 41:  */
 42: function cApiStrTrimAfterWord($string, $maxlen) {
 43:     // If the string is smaller than the maximum lenght, it makes no sense to
 44:     // process it any further. Return it.
 45:     if (strlen($string) < $maxlen) {
 46:         return $string;
 47:     }
 48: 
 49:     // If the character after the $maxlen position is a space, we can return
 50:     // the string until $maxlen.
 51:     if (substr($string, $maxlen, 1) == ' ') {
 52:         return substr($string, 0, $maxlen);
 53:     }
 54: 
 55:     // Cut the string up to $maxlen so we can use strrpos (reverse str position)
 56:     $cutted_string = substr($string, 0, $maxlen);
 57: 
 58:     // Extract the end of the last word
 59:     $last_word_position = strrpos($cutted_string, ' ');
 60: 
 61:     return (substr($cutted_string, 0, $last_word_position));
 62: }
 63: 
 64: /**
 65:  * Trims a string to a specific length.
 66:  * If the string is longer than $maxlen,
 67:  * dots are inserted ("...") right before $maxlen.
 68:  *
 69:  * Example:
 70:  * $string = "This is a simple test";
 71:  * echo cApiStrTrimHard ($string, 15);
 72:  *
 73:  * This would output "This is a si...", since the string is longer than $maxlen
 74:  * and the resulting string matches 15 characters including the dots.
 75:  *
 76:  * @param string $string The string to operate on
 77:  * @param int $maxlen The maximum number of characters
 78:  * @return string The resulting string
 79:  */
 80: function cApiStrTrimHard($string, $maxlen, $fillup = '...') {
 81:     // If the string is smaller than the maximum lenght, it makes no sense to
 82:     // process it any further. Return it.
 83:     if (strlen($string) < $maxlen) {
 84:         return $string;
 85:     }
 86: 
 87:     // Calculate the maximum text length
 88:     $maximum_text_length = $maxlen - strlen($fillup);
 89: 
 90:     // If text length is over zero cut it
 91:     if ($maximum_text_length > 0) {
 92:         if (preg_match('/(*UTF8)^.{0,' . $maximum_text_length . '}/', $string, $result_array)) {
 93:             $cutted_string = $result_array[0];
 94:         } else if (preg_match('/^.{0,' . $maximum_text_length . '}/u', $string, $result_array)) {
 95:             $cutted_string = $result_array[0];
 96:         } else {
 97:             $cutted_string = substr($string, 0, $maximum_text_length);
 98:         }
 99:     } else {
100:         $cutted_string = $string;
101:     }
102: 
103:     // Append the fillup string
104:     $cutted_string .= $fillup;
105: 
106:     return ($cutted_string);
107: }
108: 
109: /**
110:  * Trims a string to a approximate length.
111:  * Sentence boundaries are preserved.
112:  *
113:  * The algorythm inside calculates the sentence length to the previous and next
114:  * sentences. The distance to the next sentence which is smaller will be taken
115:  * to
116:  * trim the string to match the approximate length parameter.
117:  *
118:  * Example:
119:  *
120:  * $string = "This contains two sentences. ";
121:  * $string .= "Lets play around with them. ";
122:  *
123:  * echo cApiStrTrimSentence($string, 40);
124:  * echo cApiStrTrimSentence($string, 50);
125:  *
126:  * The first example would only output the first sentence, the second example
127:  * both
128:  * sentences.
129:  *
130:  * Explanation:
131:  *
132:  * To match the given max length closely, the function calculates the distance
133:  * to
134:  * the next and previous sentences. Using the maxlength of 40 characters, the
135:  * distance to the previous sentence would be 8 characters, and to the next
136:  * sentence
137:  * it would be 19 characters. Therefore, only the previous sentence is
138:  * displayed.
139:  *
140:  * The second example displays the second sentence also, since the distance to
141:  * the
142:  * next sentence is only 9 characters, but to the previous it is 18 characters.
143:  *
144:  * If you specify the boolean flag "$hard", the limit parameter creates a hard
145:  * limit
146:  * instead of calculating the distance.
147:  *
148:  * This function ensures that at least one sentence is returned.
149:  *
150:  * @param string $string The string to operate on
151:  * @param int $approxlen The approximate number of characters
152:  * @param bool $hard If true, use a hard limit for the number of characters
153:  * @return string The resulting string
154:  */
155: function cApiStrTrimSentence($string, $approxlen, $hard = false) {
156:     // If the string is smaller than the maximum lenght, it makes no sense to
157:     // process it any further. Return it.
158:     if (strlen($string) < $approxlen) {
159:         return $string;
160:     }
161: 
162:     // Find out the start of the next sentence
163:     $next_sentence_start = strpos($string, '.', $approxlen);
164: 
165:     // If there's no next sentence (somebody forgot the dot?), set it to the end
166:     // of the string.
167:     if ($next_sentence_start === false) {
168:         $next_sentence_start = strlen($string);
169:     }
170: 
171:     // Cut the previous sentence so we can use strrpos
172:     $previous_sentence_cutted = substr($string, 0, $approxlen);
173: 
174:     // Get out the previous sentence start
175:     $previous_sentence_start = strrpos($previous_sentence_cutted, '.');
176: 
177:     // If the sentence doesn't contain a dot, use the text start.
178:     if ($previous_sentence_start === false) {
179:         $previous_sentence_start = 0;
180:     }
181: 
182:     // If we have a hard limit, we only want to process everything before
183:     // $approxlen
184:     if (($hard == true) && ($next_sentence_start > $approxlen)) {
185:         return (substr($string, 0, $previous_sentence_start + 1));
186:     }
187: 
188:     // Calculate next and previous sentence distances
189:     $distance_previous_sentence = $approxlen - $previous_sentence_start;
190:     $distance_next_sentence = $next_sentence_start - $approxlen;
191: 
192:     // Sanity: Return at least one sentence.
193:     $sanity = substr($string, 0, $previous_sentence_start + 1);
194: 
195:     if (strpos($sanity, '.') === false) {
196:         return (substr($string, 0, $next_sentence_start + 1));
197:     }
198: 
199:     // Decide wether the next or previous sentence is nearer
200:     if ($distance_previous_sentence > $distance_next_sentence) {
201:         return (substr($string, 0, $next_sentence_start + 1));
202:     } else {
203:         return (substr($string, 0, $previous_sentence_start + 1));
204:     }
205: }
206: 
207: /**
208:  * cApiStrReplaceDiacritics: Converts diactritics to english characters whenever
209:  * possible.
210:  *
211:  * For german umlauts, this function converts the umlauts to their ASCII
212:  * equalients (e.g. ä => ae).
213:  *
214:  * For more information about diacritics, refer to
215:  * http://en.wikipedia.org/wiki/Diacritic
216:  *
217:  * For other languages, the diacritic marks are removed, if possible.
218:  *
219:  * @param string $sString The string to operate on
220:  * @param string $sourceEncoding The source encoding (default: UTF-8)
221:  * @param string $targetEncoding The target encoding (default: UTF-8)
222:  * @return string The resulting string
223:  */
224: function cApiStrReplaceDiacritics($sString, $sourceEncoding = 'UTF-8', $targetEncoding = 'UTF-8') {
225:     if ($sourceEncoding != 'UTF-8') {
226:         $sString = cApiStrRecodeString($sString, $sourceEncoding, "UTF-8");
227:     }
228: 
229:     // replace regular german umlauts and other common characters with
230:     // diacritics
231:     static $aSearch, $aReplace;
232:     if (!isset($aSearch)) {
233:         $aSearch = array(
234:             'Ä',
235:             'Ö',
236:             'Ü',
237:             'ä',
238:             'ö',
239:             'ü',
240:             'ß',
241:             'Á',
242:             'À',
243:             'Â',
244:             'á',
245:             'à',
246:             'â',
247:             'É',
248:             'È',
249:             'Ê',
250:             'é',
251:             'è',
252:             'ê',
253:             'Í',
254:             'Ì',
255:             'Î',
256:             'í',
257:             'ì',
258:             'î',
259:             'Ó',
260:             'Ò',
261:             'Ô',
262:             'ó',
263:             'ò',
264:             'ô',
265:             'Ú',
266:             'Ù',
267:             'Û',
268:             'ú',
269:             'ù',
270:             'û'
271:         );
272:         $aReplace = array(
273:             'Ae',
274:             'Oe',
275:             'Ue',
276:             'ae',
277:             'oe',
278:             'ue',
279:             'ss',
280:             'A',
281:             'A',
282:             'A',
283:             'a',
284:             'a',
285:             'a',
286:             'E',
287:             'E',
288:             'E',
289:             'e',
290:             'e',
291:             'e',
292:             'I',
293:             'I',
294:             'I',
295:             'i',
296:             'i',
297:             'i',
298:             'O',
299:             'O',
300:             'O',
301:             'o',
302:             'o',
303:             'o',
304:             'U',
305:             'U',
306:             'U',
307:             'u',
308:             'u',
309:             'u'
310:         );
311:     }
312:     $sString = str_replace($aSearch, $aReplace, $sString);
313: 
314:     // TODO: Additional converting
315: 
316:     return cApiStrRecodeString($sString, "UTF-8", $targetEncoding);
317: }
318: 
319: /**
320:  * Converts a string to another encoding.
321:  * This function tries to detect which function
322:  * to use (either recode or iconv).
323:  *
324:  * If $sourceEncoding and $targetEncoding are the same, this function returns
325:  * immediately.
326:  *
327:  * For more information about encodings, refer to
328:  * http://en.wikipedia.org/wiki/Character_encoding
329:  *
330:  * For more information about the supported encodings in recode, refer to
331:  * http://www.delorie.com/gnu/docs/recode/recode_toc.html
332:  *
333:  * Note: depending on whether recode or iconv is used, the supported charsets
334:  * differ. The following ones are commonly used and are most likely supported by
335:  * both converters:
336:  *
337:  * - ISO-8859-1 to ISO-8859-15
338:  * - ASCII
339:  * - UTF-8
340:  *
341:  * @todo Check if the charset names are the same for both converters
342:  * @todo Implement a converter and charset checker to ensure compilance.
343:  *
344:  * @param string $sString The string to operate on
345:  * @param string $sourceEncoding The source encoding (default: ISO-8859-1)
346:  * @param string $targetEncoding The target encoding (if false, use source
347:  *            encoding)
348:  * @return string The resulting string
349:  */
350: function cApiStrRecodeString($sString, $sourceEncoding, $targetEncoding) {
351:     // If sourceEncoding and targetEncoding are the same, return
352:     if ($sourceEncoding == $targetEncoding) {
353:         return $sString;
354:     }
355: 
356:     // Check for the "recode" support
357:     if (function_exists('recode')) {
358:         $sResult = recode_string("$sourceEncoding..$targetEncoding", $sString);
359:         return $sResult;
360:     }
361: 
362:     // Check for the "iconv" support
363:     if (function_exists('iconv')) {
364:         $sResult = iconv($sourceEncoding, $targetEncoding, $sString);
365:         return $sResult;
366:     }
367: 
368:     // No charset converters found; return with warning
369:     cWarning(__FILE__, __LINE__, 'cApiStrRecodeString could not find either recode or iconv to do charset conversion.');
370:     return $sString;
371: }
372: 
373: /**
374:  * Removes or converts all "evil" URL characters.
375:  * This function removes or converts
376:  * all characters which can make an URL invalid.
377:  *
378:  * Clean characters include:
379:  * - All characters between 32 and 126 which are not alphanumeric and
380:  * aren't one of the following: _-.
381:  *
382:  * @param string $sString The string to operate on
383:  * @param bool $bReplace If true, all "unclean" characters are replaced
384:  * @return string The resulting string
385:  */
386: function cApiStrCleanURLCharacters($sString, $bReplace = false) {
387:     $sString = cApiStrReplaceDiacritics($sString);
388:     $sString = str_replace(' ', '-', $sString);
389:     $sString = str_replace('/', '-', $sString);
390:     $sString = str_replace('&', '-', $sString);
391:     $sString = str_replace('+', '-', $sString);
392: 
393:     $iStrLen = strlen($sString);
394: 
395:     $sResultString = '';
396: 
397:     for ($i = 0; $i < $iStrLen; $i++) {
398:         $sChar = substr($sString, $i, 1);
399: 
400:         if (preg_match('/^[a-z0-9]*$/i', $sChar) || $sChar == '-' || $sChar == '_' || $sChar == '.') {
401:             $sResultString .= $sChar;
402:         } else {
403:             if ($bReplace == true) {
404:                 $sResultString .= '_';
405:             }
406:         }
407:     }
408: 
409:     return $sResultString;
410: }
411: 
412: /**
413:  * Normalizes line endings in passed string.
414:  *
415:  * @param string $sString
416:  * @param string $sLineEnding Feasible values are "\n", "\r" or "\r\n"
417:  * @return string
418:  */
419: function cApiStrNormalizeLineEndings($sString, $sLineEnding = "\n") {
420:     if ($sLineEnding !== "\n" && $sLineEnding !== "\r" && $sLineEnding !== "\r\n") {
421:         $sLineEnding = "\n";
422:     }
423: 
424:     $sString = str_replace("\r\n", "\n", $sString);
425:     $sString = str_replace("\r", "\n", $sString);
426:     if ($sLineEnding !== "\n") {
427:         $sString = str_replace("\n", $sLineEnding, $sString);
428:     }
429: 
430:     return $sString;
431: }
432:
Packages

Classes

Interfaces