1: <?php
2: /**
3: * This file contains CONTENIDO String API functions.
4: *
5: * If you are planning to add a function, please make sure that:
6: * 1.) The function is in the correct place
7: * 2.) The function is documented
8: * 3.) The function makes sense and is generically usable
9: *
10: * @package Core
11: * @subpackage Backend
12: * @version SVN Revision $Rev:$
13: *
14: * @author Timo Hummel
15: * @copyright four for business AG <www.4fb.de>
16: * @license http://www.contenido.org/license/LIZENZ.txt
17: * @link http://www.4fb.de
18: * @link http://www.contenido.org
19: */
20:
21: defined('CON_FRAMEWORK') || die('Illegal call: Missing framework initialization - request aborted.');
22:
23: /**
24: * Trims a string to a given length and makes sure that all words up to $maxlen
25: * are preserved, without exceeding $maxlen.
26: *
27: * Warning: Currently, this function uses a regular ASCII-Whitespace to do the
28: * seperation test. If you are using ' ' to create spaces, this function
29: * will fail.
30: *
31: * Example:
32: * $string = "This is a simple test";
33: * echo cApiStrTrimAfterWord ($string, 15);
34: *
35: * This would output "This is a", since this function respects word boundaries
36: * and doesn't operate beyond the limit given by $maxlen.
37: *
38: * @param string $string The string to operate on
39: * @param int $maxlen The maximum number of characters
40: * @return string The resulting string
41: */
42: function cApiStrTrimAfterWord($string, $maxlen) {
43: // If the string is smaller than the maximum lenght, it makes no sense to
44: // process it any further. Return it.
45: if (strlen($string) < $maxlen) {
46: return $string;
47: }
48:
49: // If the character after the $maxlen position is a space, we can return
50: // the string until $maxlen.
51: if (substr($string, $maxlen, 1) == ' ') {
52: return substr($string, 0, $maxlen);
53: }
54:
55: // Cut the string up to $maxlen so we can use strrpos (reverse str position)
56: $cutted_string = substr($string, 0, $maxlen);
57:
58: // Extract the end of the last word
59: $last_word_position = strrpos($cutted_string, ' ');
60:
61: return (substr($cutted_string, 0, $last_word_position));
62: }
63:
64: /**
65: * Trims a string to a specific length.
66: * If the string is longer than $maxlen,
67: * dots are inserted ("...") right before $maxlen.
68: *
69: * Example:
70: * $string = "This is a simple test";
71: * echo cApiStrTrimHard ($string, 15);
72: *
73: * This would output "This is a si...", since the string is longer than $maxlen
74: * and the resulting string matches 15 characters including the dots.
75: *
76: * @param string $string The string to operate on
77: * @param int $maxlen The maximum number of characters
78: * @return string The resulting string
79: */
80: function cApiStrTrimHard($string, $maxlen, $fillup = '...') {
81: // If the string is smaller than the maximum lenght, it makes no sense to
82: // process it any further. Return it.
83: if (strlen($string) < $maxlen) {
84: return $string;
85: }
86:
87: // Calculate the maximum text length
88: $maximum_text_length = $maxlen - strlen($fillup);
89:
90: // If text length is over zero cut it
91: if ($maximum_text_length > 0) {
92: if (preg_match('/(*UTF8)^.{0,' . $maximum_text_length . '}/', $string, $result_array)) {
93: $cutted_string = $result_array[0];
94: } else if (preg_match('/^.{0,' . $maximum_text_length . '}/u', $string, $result_array)) {
95: $cutted_string = $result_array[0];
96: } else {
97: $cutted_string = substr($string, 0, $maximum_text_length);
98: }
99: } else {
100: $cutted_string = $string;
101: }
102:
103: // Append the fillup string
104: $cutted_string .= $fillup;
105:
106: return ($cutted_string);
107: }
108:
109: /**
110: * Trims a string to a approximate length.
111: * Sentence boundaries are preserved.
112: *
113: * The algorythm inside calculates the sentence length to the previous and next
114: * sentences. The distance to the next sentence which is smaller will be taken
115: * to
116: * trim the string to match the approximate length parameter.
117: *
118: * Example:
119: *
120: * $string = "This contains two sentences. ";
121: * $string .= "Lets play around with them. ";
122: *
123: * echo cApiStrTrimSentence($string, 40);
124: * echo cApiStrTrimSentence($string, 50);
125: *
126: * The first example would only output the first sentence, the second example
127: * both
128: * sentences.
129: *
130: * Explanation:
131: *
132: * To match the given max length closely, the function calculates the distance
133: * to
134: * the next and previous sentences. Using the maxlength of 40 characters, the
135: * distance to the previous sentence would be 8 characters, and to the next
136: * sentence
137: * it would be 19 characters. Therefore, only the previous sentence is
138: * displayed.
139: *
140: * The second example displays the second sentence also, since the distance to
141: * the
142: * next sentence is only 9 characters, but to the previous it is 18 characters.
143: *
144: * If you specify the boolean flag "$hard", the limit parameter creates a hard
145: * limit
146: * instead of calculating the distance.
147: *
148: * This function ensures that at least one sentence is returned.
149: *
150: * @param string $string The string to operate on
151: * @param int $approxlen The approximate number of characters
152: * @param bool $hard If true, use a hard limit for the number of characters
153: * @return string The resulting string
154: */
155: function cApiStrTrimSentence($string, $approxlen, $hard = false) {
156: // If the string is smaller than the maximum lenght, it makes no sense to
157: // process it any further. Return it.
158: if (strlen($string) < $approxlen) {
159: return $string;
160: }
161:
162: // Find out the start of the next sentence
163: $next_sentence_start = strpos($string, '.', $approxlen);
164:
165: // If there's no next sentence (somebody forgot the dot?), set it to the end
166: // of the string.
167: if ($next_sentence_start === false) {
168: $next_sentence_start = strlen($string);
169: }
170:
171: // Cut the previous sentence so we can use strrpos
172: $previous_sentence_cutted = substr($string, 0, $approxlen);
173:
174: // Get out the previous sentence start
175: $previous_sentence_start = strrpos($previous_sentence_cutted, '.');
176:
177: // If the sentence doesn't contain a dot, use the text start.
178: if ($previous_sentence_start === false) {
179: $previous_sentence_start = 0;
180: }
181:
182: // If we have a hard limit, we only want to process everything before
183: // $approxlen
184: if (($hard == true) && ($next_sentence_start > $approxlen)) {
185: return (substr($string, 0, $previous_sentence_start + 1));
186: }
187:
188: // Calculate next and previous sentence distances
189: $distance_previous_sentence = $approxlen - $previous_sentence_start;
190: $distance_next_sentence = $next_sentence_start - $approxlen;
191:
192: // Sanity: Return at least one sentence.
193: $sanity = substr($string, 0, $previous_sentence_start + 1);
194:
195: if (strpos($sanity, '.') === false) {
196: return (substr($string, 0, $next_sentence_start + 1));
197: }
198:
199: // Decide wether the next or previous sentence is nearer
200: if ($distance_previous_sentence > $distance_next_sentence) {
201: return (substr($string, 0, $next_sentence_start + 1));
202: } else {
203: return (substr($string, 0, $previous_sentence_start + 1));
204: }
205: }
206:
207: /**
208: * cApiStrReplaceDiacritics: Converts diactritics to english characters whenever
209: * possible.
210: *
211: * For german umlauts, this function converts the umlauts to their ASCII
212: * equalients (e.g. ä => ae).
213: *
214: * For more information about diacritics, refer to
215: * http://en.wikipedia.org/wiki/Diacritic
216: *
217: * For other languages, the diacritic marks are removed, if possible.
218: *
219: * @param string $sString The string to operate on
220: * @param string $sourceEncoding The source encoding (default: UTF-8)
221: * @param string $targetEncoding The target encoding (default: UTF-8)
222: * @return string The resulting string
223: */
224: function cApiStrReplaceDiacritics($sString, $sourceEncoding = 'UTF-8', $targetEncoding = 'UTF-8') {
225: if ($sourceEncoding != 'UTF-8') {
226: $sString = cApiStrRecodeString($sString, $sourceEncoding, "UTF-8");
227: }
228:
229: // replace regular german umlauts and other common characters with
230: // diacritics
231: static $aSearch, $aReplace;
232: if (!isset($aSearch)) {
233: $aSearch = array(
234: 'Ä',
235: 'Ö',
236: 'Ü',
237: 'ä',
238: 'ö',
239: 'ü',
240: 'ß',
241: 'Á',
242: 'À',
243: 'Â',
244: 'á',
245: 'à',
246: 'â',
247: 'É',
248: 'È',
249: 'Ê',
250: 'é',
251: 'è',
252: 'ê',
253: 'Í',
254: 'Ì',
255: 'Î',
256: 'í',
257: 'ì',
258: 'î',
259: 'Ó',
260: 'Ò',
261: 'Ô',
262: 'ó',
263: 'ò',
264: 'ô',
265: 'Ú',
266: 'Ù',
267: 'Û',
268: 'ú',
269: 'ù',
270: 'û'
271: );
272: $aReplace = array(
273: 'Ae',
274: 'Oe',
275: 'Ue',
276: 'ae',
277: 'oe',
278: 'ue',
279: 'ss',
280: 'A',
281: 'A',
282: 'A',
283: 'a',
284: 'a',
285: 'a',
286: 'E',
287: 'E',
288: 'E',
289: 'e',
290: 'e',
291: 'e',
292: 'I',
293: 'I',
294: 'I',
295: 'i',
296: 'i',
297: 'i',
298: 'O',
299: 'O',
300: 'O',
301: 'o',
302: 'o',
303: 'o',
304: 'U',
305: 'U',
306: 'U',
307: 'u',
308: 'u',
309: 'u'
310: );
311: }
312: $sString = str_replace($aSearch, $aReplace, $sString);
313:
314: // TODO: Additional converting
315:
316: return cApiStrRecodeString($sString, "UTF-8", $targetEncoding);
317: }
318:
319: /**
320: * Converts a string to another encoding.
321: * This function tries to detect which function
322: * to use (either recode or iconv).
323: *
324: * If $sourceEncoding and $targetEncoding are the same, this function returns
325: * immediately.
326: *
327: * For more information about encodings, refer to
328: * http://en.wikipedia.org/wiki/Character_encoding
329: *
330: * For more information about the supported encodings in recode, refer to
331: * http://www.delorie.com/gnu/docs/recode/recode_toc.html
332: *
333: * Note: depending on whether recode or iconv is used, the supported charsets
334: * differ. The following ones are commonly used and are most likely supported by
335: * both converters:
336: *
337: * - ISO-8859-1 to ISO-8859-15
338: * - ASCII
339: * - UTF-8
340: *
341: * @todo Check if the charset names are the same for both converters
342: * @todo Implement a converter and charset checker to ensure compilance.
343: *
344: * @param string $sString The string to operate on
345: * @param string $sourceEncoding The source encoding (default: ISO-8859-1)
346: * @param string $targetEncoding The target encoding (if false, use source
347: * encoding)
348: * @return string The resulting string
349: */
350: function cApiStrRecodeString($sString, $sourceEncoding, $targetEncoding) {
351: // If sourceEncoding and targetEncoding are the same, return
352: if ($sourceEncoding == $targetEncoding) {
353: return $sString;
354: }
355:
356: // Check for the "recode" support
357: if (function_exists('recode')) {
358: $sResult = recode_string("$sourceEncoding..$targetEncoding", $sString);
359: return $sResult;
360: }
361:
362: // Check for the "iconv" support
363: if (function_exists('iconv')) {
364: $sResult = iconv($sourceEncoding, $targetEncoding, $sString);
365: return $sResult;
366: }
367:
368: // No charset converters found; return with warning
369: cWarning(__FILE__, __LINE__, 'cApiStrRecodeString could not find either recode or iconv to do charset conversion.');
370: return $sString;
371: }
372:
373: /**
374: * Removes or converts all "evil" URL characters.
375: * This function removes or converts
376: * all characters which can make an URL invalid.
377: *
378: * Clean characters include:
379: * - All characters between 32 and 126 which are not alphanumeric and
380: * aren't one of the following: _-.
381: *
382: * @param string $sString The string to operate on
383: * @param bool $bReplace If true, all "unclean" characters are replaced
384: * @return string The resulting string
385: */
386: function cApiStrCleanURLCharacters($sString, $bReplace = false) {
387: $sString = cApiStrReplaceDiacritics($sString);
388: $sString = str_replace(' ', '-', $sString);
389: $sString = str_replace('/', '-', $sString);
390: $sString = str_replace('&', '-', $sString);
391: $sString = str_replace('+', '-', $sString);
392:
393: $iStrLen = strlen($sString);
394:
395: $sResultString = '';
396:
397: for ($i = 0; $i < $iStrLen; $i++) {
398: $sChar = substr($sString, $i, 1);
399:
400: if (preg_match('/^[a-z0-9]*$/i', $sChar) || $sChar == '-' || $sChar == '_' || $sChar == '.') {
401: $sResultString .= $sChar;
402: } else {
403: if ($bReplace == true) {
404: $sResultString .= '_';
405: }
406: }
407: }
408:
409: return $sResultString;
410: }
411:
412: /**
413: * Normalizes line endings in passed string.
414: *
415: * @param string $sString
416: * @param string $sLineEnding Feasible values are "\n", "\r" or "\r\n"
417: * @return string
418: */
419: function cApiStrNormalizeLineEndings($sString, $sLineEnding = "\n") {
420: if ($sLineEnding !== "\n" && $sLineEnding !== "\r" && $sLineEnding !== "\r\n") {
421: $sLineEnding = "\n";
422: }
423:
424: $sString = str_replace("\r\n", "\n", $sString);
425: $sString = str_replace("\r", "\n", $sString);
426: if ($sLineEnding !== "\n") {
427: $sString = str_replace("\n", $sLineEnding, $sString);
428: }
429:
430: return $sString;
431: }
432: