1: <?php
2: /**
3: * This file contains CONTENIDO String API functions.
4: *
5: * If you are planning to add a function, please make sure that:
6: * 1.) The function is in the correct place
7: * 2.) The function is documented
8: * 3.) The function makes sense and is generically usable
9: *
10: * @package Core
11: * @subpackage Backend
12: * @version SVN Revision $Rev:$
13: *
14: * @author Timo Hummel
15: * @copyright four for business AG <www.4fb.de>
16: * @license http://www.contenido.org/license/LIZENZ.txt
17: * @link http://www.4fb.de
18: * @link http://www.contenido.org
19: */
20:
21: defined('CON_FRAMEWORK') || die('Illegal call: Missing framework initialization - request aborted.');
22:
23: /**
24: * Trims a string to a given length and makes sure that all words up to $maxlen
25: * are preserved, without exceeding $maxlen.
26: *
27: * Warning: Currently, this function uses a regular ASCII-Whitespace to do the
28: * seperation test. If you are using ' ' to create spaces, this function will fail.
29: *
30: * Example:
31: * $string = "This is a simple test";
32: * echo cApiStrTrimAfterWord ($string, 15);
33: *
34: * This would output "This is a", since this function respects word boundaries
35: * and doesn't operate beyond the limit given by $maxlen.
36: *
37: * @param string $string The string to operate on
38: * @param int $maxlen The maximum number of characters
39: * @return string The resulting string
40: */
41: function cApiStrTrimAfterWord($string, $maxlen) {
42: // If the string is smaller than the maximum lenght, it makes no sense to
43: // process it any further. Return it.
44: if (strlen($string) < $maxlen) {
45: return $string;
46: }
47:
48: // If the character after the $maxlen position is a space, we can return
49: // the string until $maxlen.
50: if (substr($string, $maxlen, 1) == ' ') {
51: return substr($string, 0, $maxlen);
52: }
53:
54: // Cut the string up to $maxlen so we can use strrpos (reverse str position)
55: $cutted_string = substr($string, 0, $maxlen);
56:
57: // Extract the end of the last word
58: $last_word_position = strrpos($cutted_string, ' ');
59:
60: return (substr($cutted_string, 0, $last_word_position));
61: }
62:
63: /**
64: * Trims a string to a specific length. If the string is longer than $maxlen,
65: * dots are inserted ("...") right before $maxlen.
66: *
67: * Example:
68: * $string = "This is a simple test";
69: * echo cApiStrTrimHard ($string, 15);
70: *
71: * This would output "This is a si...", since the string is longer than $maxlen
72: * and the resulting string matches 15 characters including the dots.
73: *
74: * @param string $string The string to operate on
75: * @param int $maxlen The maximum number of characters
76: * @return string The resulting string
77: */
78: function cApiStrTrimHard($string, $maxlen, $fillup = '...') {
79: // If the string is smaller than the maximum lenght, it makes no sense to
80: // process it any further. Return it.
81: if (strlen($string) < $maxlen) {
82: return $string;
83: }
84:
85: // Calculate the maximum text length
86: $maximum_text_length = $maxlen - strlen($fillup);
87:
88: // Cut it
89: if (preg_match('/(*UTF8)^.{0,' . $maximum_text_length . '}/', $string ,$result_array)) {
90: $cutted_string = $result_array[0];
91: } else if (preg_match('/^.{0,' . $maximum_text_length . '}/u', $string ,$result_array)) {
92: $cutted_string = $result_array[0];
93: } else {
94: $cutted_string = substr($string, 0, $maximum_text_length);
95: }
96:
97: // Append the fillup string
98: $cutted_string .= $fillup;
99:
100: return ($cutted_string);
101: }
102:
103: /**
104: * Trims a string to a approximate length. Sentence boundaries are preserved.
105: *
106: * The algorythm inside calculates the sentence length to the previous and next
107: * sentences. The distance to the next sentence which is smaller will be taken to
108: * trim the string to match the approximate length parameter.
109: *
110: * Example:
111: *
112: * $string = "This contains two sentences. ";
113: * $string .= "Lets play around with them. ";
114: *
115: * echo cApiStrTrimSentence($string, 40);
116: * echo cApiStrTrimSentence($string, 50);
117: *
118: * The first example would only output the first sentence, the second example both
119: * sentences.
120: *
121: * Explanation:
122: *
123: * To match the given max length closely, the function calculates the distance to
124: * the next and previous sentences. Using the maxlength of 40 characters, the
125: * distance to the previous sentence would be 8 characters, and to the next sentence
126: * it would be 19 characters. Therefore, only the previous sentence is displayed.
127: *
128: * The second example displays the second sentence also, since the distance to the
129: * next sentence is only 9 characters, but to the previous it is 18 characters.
130: *
131: * If you specify the boolean flag "$hard", the limit parameter creates a hard limit
132: * instead of calculating the distance.
133: *
134: * This function ensures that at least one sentence is returned.
135: *
136: * @param string $string The string to operate on
137: * @param int $approxlen The approximate number of characters
138: * @param bool $hard If true, use a hard limit for the number of characters
139: * @return string The resulting string
140: */
141: function cApiStrTrimSentence($string, $approxlen, $hard = false) {
142: // If the string is smaller than the maximum lenght, it makes no sense to
143: // process it any further. Return it.
144: if (strlen($string) < $approxlen) {
145: return $string;
146: }
147:
148: // Find out the start of the next sentence
149: $next_sentence_start = strpos($string, '.', $approxlen);
150:
151: // If there's no next sentence (somebody forgot the dot?), set it to the end
152: // of the string.
153: if ($next_sentence_start === false) {
154: $next_sentence_start = strlen($string);
155: }
156:
157: // Cut the previous sentence so we can use strrpos
158: $previous_sentence_cutted = substr($string, 0, $approxlen);
159:
160: // Get out the previous sentence start
161: $previous_sentence_start = strrpos($previous_sentence_cutted, '.');
162:
163: // If the sentence doesn't contain a dot, use the text start.
164: if ($previous_sentence_start === false) {
165: $previous_sentence_start = 0;
166: }
167:
168: // If we have a hard limit, we only want to process everything before $approxlen
169: if (($hard == true) && ($next_sentence_start > $approxlen)) {
170: return (substr($string, 0, $previous_sentence_start + 1));
171: }
172:
173: // Calculate next and previous sentence distances
174: $distance_previous_sentence = $approxlen - $previous_sentence_start;
175: $distance_next_sentence = $next_sentence_start - $approxlen;
176:
177: // Sanity: Return at least one sentence.
178: $sanity = substr($string, 0, $previous_sentence_start + 1);
179:
180: if (strpos($sanity, '.') === false) {
181: return (substr($string, 0, $next_sentence_start + 1));
182: }
183:
184: // Decide wether the next or previous sentence is nearer
185: if ($distance_previous_sentence > $distance_next_sentence) {
186: return (substr($string, 0, $next_sentence_start + 1));
187: } else {
188: return (substr($string, 0, $previous_sentence_start + 1));
189: }
190: }
191:
192: /**
193: * cApiStrReplaceDiacritics: Converts diactritics to english characters whenever possible.
194: *
195: * For german umlauts, this function converts the umlauts to their ASCII
196: * equalients (e.g. ä => ae).
197: *
198: * For more information about diacritics, refer to
199: * http://en.wikipedia.org/wiki/Diacritic
200: *
201: * For other languages, the diacritic marks are removed, if possible.
202: *
203: * @param string $sString The string to operate on
204: * @param string $sourceEncoding The source encoding (default: UTF-8)
205: * @param string $targetEncoding The target encoding (default: UTF-8)
206: * @return string The resulting string
207: */
208: function cApiStrReplaceDiacritics($sString, $sourceEncoding = 'UTF-8', $targetEncoding = 'UTF-8') {
209: if ($sourceEncoding != 'UTF-8') {
210: $sString = cApiStrRecodeString($sString, $sourceEncoding, "UTF-8");
211: }
212:
213: // replace regular german umlauts and other common characters with diacritics
214: static $aSearch, $aReplace;
215: if (!isset($aSearch)) {
216: $aSearch = array('Ä', 'Ö', 'Ü', 'ä', 'ö', 'ü', 'ß', 'Á', 'À', 'Â', 'á', 'à', 'â', 'É', 'È', 'Ê', 'é', 'è', 'ê', 'Í', 'Ì', 'Î', 'í', 'ì', 'î', 'Ó', 'Ò', 'Ô', 'ó', 'ò', 'ô', 'Ú', 'Ù', 'Û', 'ú', 'ù', 'û');
217: $aReplace = array('Ae', 'Oe', 'Ue', 'ae', 'oe', 'ue', 'ss', 'A', 'A', 'A', 'a', 'a', 'a', 'E', 'E', 'E', 'e', 'e', 'e', 'I', 'I', 'I', 'i', 'i', 'i', 'O', 'O', 'O', 'o', 'o', 'o', 'U', 'U', 'U', 'u', 'u', 'u');
218: }
219: $sString = str_replace($aSearch, $aReplace, $sString);
220:
221: // TODO: Additional converting
222:
223: return cApiStrRecodeString($sString, "UTF-8", $targetEncoding);
224: }
225:
226: /**
227: * Converts a string to another encoding. This function tries to detect which function
228: * to use (either recode or iconv).
229: *
230: * If $sourceEncoding and $targetEncoding are the same, this function returns immediately.
231: *
232: * For more information about encodings, refer to
233: * http://en.wikipedia.org/wiki/Character_encoding
234: *
235: * For more information about the supported encodings in recode, refer to
236: * http://www.delorie.com/gnu/docs/recode/recode_toc.html
237: *
238: * Note: depending on whether recode or iconv is used, the supported charsets
239: * differ. The following ones are commonly used and are most likely supported by
240: * both converters:
241: *
242: * - ISO-8859-1 to ISO-8859-15
243: * - ASCII
244: * - UTF-8
245: *
246: * @todo Check if the charset names are the same for both converters
247: * @todo Implement a converter and charset checker to ensure compilance.
248: *
249: * @param string $sString The string to operate on
250: * @param string $sourceEncoding The source encoding (default: ISO-8859-1)
251: * @param string $targetEncoding The target encoding (if false, use source encoding)
252: * @return string The resulting string
253: */
254: function cApiStrRecodeString($sString, $sourceEncoding, $targetEncoding) {
255: // If sourceEncoding and targetEncoding are the same, return
256: if ($sourceEncoding == $targetEncoding) {
257: return $sString;
258: }
259:
260: // Check for the "recode" support
261: if (function_exists('recode')) {
262: $sResult = recode_string("$sourceEncoding..$targetEncoding", $sString);
263: return $sResult;
264: }
265:
266: // Check for the "iconv" support
267: if (function_exists('iconv')) {
268: $sResult = iconv($sourceEncoding, $targetEncoding, $sString);
269: return $sResult;
270: }
271:
272: // No charset converters found; return with warning
273: cWarning(__FILE__, __LINE__, 'cApiStrRecodeString could not find either recode or iconv to do charset conversion.');
274: return $sString;
275: }
276:
277: /**
278: * Removes or converts all "evil" URL characters. This function removes or converts
279: * all characters which can make an URL invalid.
280: *
281: * Clean characters include:
282: * - All characters between 32 and 126 which are not alphanumeric and
283: * aren't one of the following: _-.
284: *
285: * @param string $sString The string to operate on
286: * @param bool $bReplace If true, all "unclean" characters are replaced
287: * @return string The resulting string
288: */
289: function cApiStrCleanURLCharacters($sString, $bReplace = false) {
290: $sString = cApiStrReplaceDiacritics($sString);
291: $sString = str_replace(' ', '-', $sString);
292: $sString = str_replace('/', '-', $sString);
293: $sString = str_replace('&', '-', $sString);
294: $sString = str_replace('+', '-', $sString);
295:
296: $iStrLen = strlen($sString);
297:
298: $sResultString = '';
299:
300: for ($i = 0; $i < $iStrLen; $i++) {
301: $sChar = substr($sString, $i, 1);
302:
303: if (preg_match('/^[a-z0-9]*$/i', $sChar) || $sChar == '-' || $sChar == '_' || $sChar == '.') {
304: $sResultString .= $sChar;
305: } else {
306: if ($bReplace == true) {
307: $sResultString .= '_';
308: }
309: }
310: }
311:
312: return $sResultString;
313: }
314:
315: /**
316: * Normalizes line endings in passed string.
317: * @param string $sString
318: * @param string $sLineEnding Feasible values are "\n", "\r" or "\r\n"
319: * @return string
320: */
321: function cApiStrNormalizeLineEndings($sString, $sLineEnding = "\n") {
322: if ($sLineEnding !== "\n" && $sLineEnding !== "\r" && $sLineEnding !== "\r\n") {
323: $sLineEnding = "\n";
324: }
325:
326: $sString = str_replace("\r\n", "\n", $sString);
327: $sString = str_replace("\r", "\n", $sString);
328: if ($sLineEnding !== "\n") {
329: $sString = str_replace("\n", $sLineEnding, $sString);
330: }
331:
332: return $sString;
333: }
334: