File includes/functions.api.string.php

  1: <?php
  2: /**
  3:  * This file contains CONTENIDO String API functions.
  4:  *
  5:  * If you are planning to add a function, please make sure that:
  6:  * 1.) The function is in the correct place
  7:  * 2.) The function is documented
  8:  * 3.) The function makes sense and is generically usable
  9:  *
 10:  * @package          Core
 11:  * @subpackage       Backend
 12:  * @version          SVN Revision $Rev:$
 13:  *
 14:  * @author           Timo Hummel
 15:  * @copyright        four for business AG <www.4fb.de>
 16:  * @license          http://www.contenido.org/license/LIZENZ.txt
 17:  * @link             http://www.4fb.de
 18:  * @link             http://www.contenido.org
 19:  */
 20: 
 21: defined('CON_FRAMEWORK') || die('Illegal call: Missing framework initialization - request aborted.');
 22: 
 23: /**
 24:  * Trims a string to a given length and makes sure that all words up to $maxlen
 25:  * are preserved, without exceeding $maxlen.
 26:  *
 27:  * Warning: Currently, this function uses a regular ASCII-Whitespace to do the
 28:  * seperation test. If you are using '&nbsp' to create spaces, this function will fail.
 29:  *
 30:  * Example:
 31:  * $string = "This is a simple test";
 32:  * echo cApiStrTrimAfterWord ($string, 15);
 33:  *
 34:  * This would output "This is a", since this function respects word boundaries
 35:  * and doesn't operate beyond the limit given by $maxlen.
 36:  *
 37:  * @param   string  $string  The string to operate on
 38:  * @param   int     $maxlen  The maximum number of characters
 39:  * @return  string  The resulting string
 40:  */
 41: function cApiStrTrimAfterWord($string, $maxlen) {
 42:     // If the string is smaller than the maximum lenght, it makes no sense to
 43:     // process it any further. Return it.
 44:     if (strlen($string) < $maxlen) {
 45:         return $string;
 46:     }
 47: 
 48:     // If the character after the $maxlen position is a space, we can return
 49:     // the string until $maxlen.
 50:     if (substr($string, $maxlen, 1) == ' ') {
 51:         return substr($string, 0, $maxlen);
 52:     }
 53: 
 54:     // Cut the string up to $maxlen so we can use strrpos (reverse str position)
 55:     $cutted_string = substr($string, 0, $maxlen);
 56: 
 57:     // Extract the end of the last word
 58:     $last_word_position = strrpos($cutted_string, ' ');
 59: 
 60:     return (substr($cutted_string, 0, $last_word_position));
 61: }
 62: 
 63: /**
 64:  * Trims a string to a specific length. If the string is longer than $maxlen,
 65:  * dots are inserted ("...") right before $maxlen.
 66:  *
 67:  * Example:
 68:  * $string = "This is a simple test";
 69:  * echo cApiStrTrimHard ($string, 15);
 70:  *
 71:  * This would output "This is a si...", since the string is longer than $maxlen
 72:  * and the resulting string matches 15 characters including the dots.
 73:  *
 74:  * @param   string  $string  The string to operate on
 75:  * @param   int     $maxlen  The maximum number of characters
 76:  * @return  string  The resulting string
 77:  */
 78: function cApiStrTrimHard($string, $maxlen, $fillup = '...') {
 79:     // If the string is smaller than the maximum lenght, it makes no sense to
 80:     // process it any further. Return it.
 81:     if (strlen($string) < $maxlen) {
 82:         return $string;
 83:     }
 84: 
 85:     // Calculate the maximum text length
 86:     $maximum_text_length = $maxlen - strlen($fillup);
 87: 
 88:     // Cut it
 89:     if (preg_match('/(*UTF8)^.{0,' . $maximum_text_length . '}/', $string ,$result_array)) {
 90:         $cutted_string = $result_array[0];
 91:     } else if (preg_match('/^.{0,' . $maximum_text_length . '}/u', $string ,$result_array)) {
 92:         $cutted_string = $result_array[0];
 93:     } else {
 94:          $cutted_string = substr($string, 0, $maximum_text_length);
 95:     }
 96: 
 97:     // Append the fillup string
 98:     $cutted_string .= $fillup;
 99: 
100:     return ($cutted_string);
101: }
102: 
103: /**
104:  * Trims a string to a approximate length. Sentence boundaries are preserved.
105:  *
106:  * The algorythm inside calculates the sentence length to the previous and next
107:  * sentences. The distance to the next sentence which is smaller will be taken to
108:  * trim the string to match the approximate length parameter.
109:  *
110:  * Example:
111:  *
112:  * $string  = "This contains two sentences. ";
113:  * $string .= "Lets play around with them. ";
114:  *
115:  * echo cApiStrTrimSentence($string, 40);
116:  * echo cApiStrTrimSentence($string, 50);
117:  *
118:  * The first example would only output the first sentence, the second example both
119:  * sentences.
120:  *
121:  * Explanation:
122:  *
123:  * To match the given max length closely, the function calculates the distance to
124:  * the next and previous sentences. Using the maxlength of 40 characters, the
125:  * distance to the previous sentence would be 8 characters, and to the next sentence
126:  * it would be 19 characters. Therefore, only the previous sentence is displayed.
127:  *
128:  * The second example displays the second sentence also, since the distance to the
129:  * next sentence is only 9 characters, but to the previous it is 18 characters.
130:  *
131:  * If you specify the boolean flag "$hard", the limit parameter creates a hard limit
132:  * instead of calculating the distance.
133:  *
134:  * This function ensures that at least one sentence is returned.
135:  *
136:  * @param  string  $string     The string to operate on
137:  * @param  int     $approxlen  The approximate number of characters
138:  * @param  bool    $hard       If true, use a hard limit for the number of characters
139:  * @return string  The resulting string
140:  */
141: function cApiStrTrimSentence($string, $approxlen, $hard = false) {
142:     // If the string is smaller than the maximum lenght, it makes no sense to
143:     // process it any further. Return it.
144:     if (strlen($string) < $approxlen) {
145:         return $string;
146:     }
147: 
148:     // Find out the start of the next sentence
149:     $next_sentence_start = strpos($string, '.', $approxlen);
150: 
151:     // If there's no next sentence (somebody forgot the dot?), set it to the end
152:     // of the string.
153:     if ($next_sentence_start === false) {
154:         $next_sentence_start = strlen($string);
155:     }
156: 
157:     // Cut the previous sentence so we can use strrpos
158:     $previous_sentence_cutted = substr($string, 0, $approxlen);
159: 
160:     // Get out the previous sentence start
161:     $previous_sentence_start = strrpos($previous_sentence_cutted, '.');
162: 
163:     // If the sentence doesn't contain a dot, use the text start.
164:     if ($previous_sentence_start === false) {
165:         $previous_sentence_start = 0;
166:     }
167: 
168:     // If we have a hard limit, we only want to process everything before $approxlen
169:     if (($hard == true) && ($next_sentence_start > $approxlen)) {
170:         return (substr($string, 0, $previous_sentence_start + 1));
171:     }
172: 
173:     // Calculate next and previous sentence distances
174:     $distance_previous_sentence = $approxlen - $previous_sentence_start;
175:     $distance_next_sentence = $next_sentence_start - $approxlen;
176: 
177:     // Sanity: Return at least one sentence.
178:     $sanity = substr($string, 0, $previous_sentence_start + 1);
179: 
180:     if (strpos($sanity, '.') === false) {
181:         return (substr($string, 0, $next_sentence_start + 1));
182:     }
183: 
184:     // Decide wether the next or previous sentence is nearer
185:     if ($distance_previous_sentence > $distance_next_sentence) {
186:         return (substr($string, 0, $next_sentence_start + 1));
187:     } else {
188:         return (substr($string, 0, $previous_sentence_start + 1));
189:     }
190: }
191: 
192: /**
193:  * cApiStrReplaceDiacritics: Converts diactritics to english characters whenever possible.
194:  *
195:  * For german umlauts, this function converts the umlauts to their ASCII
196:  * equalients (e.g. ä => ae).
197:  *
198:  * For more information about diacritics, refer to
199:  * http://en.wikipedia.org/wiki/Diacritic
200:  *
201:  * For other languages, the diacritic marks are removed, if possible.
202:  *
203:  * @param  string  $sString         The string to operate on
204:  * @param  string  $sourceEncoding  The source encoding (default: UTF-8)
205:  * @param  string  $targetEncoding  The target encoding (default: UTF-8)
206:  * @return string  The resulting string
207:  */
208: function cApiStrReplaceDiacritics($sString, $sourceEncoding = 'UTF-8', $targetEncoding = 'UTF-8') {
209:     if ($sourceEncoding != 'UTF-8') {
210:         $sString = cApiStrRecodeString($sString, $sourceEncoding, "UTF-8");
211:     }
212: 
213:     // replace regular german umlauts and other common characters with diacritics
214:     static $aSearch, $aReplace;
215:     if (!isset($aSearch)) {
216:         $aSearch = array('Ä', 'Ö', 'Ü', 'ä', 'ö', 'ü', 'ß', 'Á', 'À', 'Â', 'á', 'à', 'â', 'É', 'È', 'Ê', 'é', 'è', 'ê', 'Í', 'Ì', 'Î', 'í', 'ì', 'î', 'Ó', 'Ò', 'Ô', 'ó', 'ò', 'ô', 'Ú', 'Ù', 'Û', 'ú', 'ù', 'û');
217:         $aReplace = array('Ae', 'Oe', 'Ue', 'ae', 'oe', 'ue', 'ss', 'A', 'A', 'A', 'a', 'a', 'a', 'E', 'E', 'E', 'e', 'e', 'e', 'I', 'I', 'I', 'i', 'i', 'i', 'O', 'O', 'O', 'o', 'o', 'o', 'U', 'U', 'U', 'u', 'u', 'u');
218:     }
219:     $sString = str_replace($aSearch, $aReplace, $sString);
220: 
221:     // TODO: Additional converting
222: 
223:     return cApiStrRecodeString($sString, "UTF-8", $targetEncoding);
224: }
225: 
226: /**
227:  * Converts a string to another encoding. This function tries to detect which function
228:  * to use (either recode or iconv).
229:  *
230:  * If $sourceEncoding and $targetEncoding are the same, this function returns immediately.
231:  *
232:  * For more information about encodings, refer to
233:  * http://en.wikipedia.org/wiki/Character_encoding
234:  *
235:  * For more information about the supported encodings in recode, refer to
236:  * http://www.delorie.com/gnu/docs/recode/recode_toc.html
237:  *
238:  * Note: depending on whether recode or iconv is used, the supported charsets
239:  * differ. The following ones are commonly used and are most likely supported by
240:  * both converters:
241:  *
242:  * - ISO-8859-1 to ISO-8859-15
243:  * - ASCII
244:  * - UTF-8
245:  *
246:  * @todo Check if the charset names are the same for both converters
247:  * @todo Implement a converter and charset checker to ensure compilance.
248:  *
249:  * @param   string  $sString         The string to operate on
250:  * @param   string  $sourceEncoding  The source encoding (default: ISO-8859-1)
251:  * @param   string  $targetEncoding  The target encoding (if false, use source encoding)
252:  * @return  string  The resulting string
253:  */
254: function cApiStrRecodeString($sString, $sourceEncoding, $targetEncoding) {
255:     // If sourceEncoding and targetEncoding are the same, return
256:     if ($sourceEncoding == $targetEncoding) {
257:         return $sString;
258:     }
259: 
260:     // Check for the "recode" support
261:     if (function_exists('recode')) {
262:         $sResult = recode_string("$sourceEncoding..$targetEncoding", $sString);
263:         return $sResult;
264:     }
265: 
266:     // Check for the "iconv" support
267:     if (function_exists('iconv')) {
268:         $sResult = iconv($sourceEncoding, $targetEncoding, $sString);
269:         return $sResult;
270:     }
271: 
272:     // No charset converters found; return with warning
273:     cWarning(__FILE__, __LINE__, 'cApiStrRecodeString could not find either recode or iconv to do charset conversion.');
274:     return $sString;
275: }
276: 
277: /**
278:  * Removes or converts all "evil" URL characters. This function removes or converts
279:  * all characters which can make an URL invalid.
280:  *
281:  * Clean characters include:
282:  * - All characters between 32 and 126 which are not alphanumeric and
283:  *   aren't one of the following: _-.
284:  *
285:  * @param   string  $sString   The string to operate on
286:  * @param   bool    $bReplace  If true, all "unclean" characters are replaced
287:  * @return  string  The resulting string
288:  */
289: function cApiStrCleanURLCharacters($sString, $bReplace = false) {
290:     $sString = cApiStrReplaceDiacritics($sString);
291:     $sString = str_replace(' ', '-', $sString);
292:     $sString = str_replace('/', '-', $sString);
293:     $sString = str_replace('&', '-', $sString);
294:     $sString = str_replace('+', '-', $sString);
295: 
296:     $iStrLen = strlen($sString);
297: 
298:     $sResultString = '';
299: 
300:     for ($i = 0; $i < $iStrLen; $i++) {
301:         $sChar = substr($sString, $i, 1);
302: 
303:         if (preg_match('/^[a-z0-9]*$/i', $sChar) || $sChar == '-' || $sChar == '_' || $sChar == '.') {
304:             $sResultString .= $sChar;
305:         } else {
306:             if ($bReplace == true) {
307:                 $sResultString .= '_';
308:             }
309:         }
310:     }
311: 
312:     return $sResultString;
313: }
314: 
315: /**
316:  * Normalizes line endings in passed string.
317:  * @param  string  $sString
318:  * @param  string  $sLineEnding  Feasible values are "\n", "\r" or "\r\n"
319:  * @return string
320:  */
321: function cApiStrNormalizeLineEndings($sString, $sLineEnding = "\n") {
322:     if ($sLineEnding !== "\n" && $sLineEnding !== "\r" && $sLineEnding !== "\r\n") {
323:         $sLineEnding = "\n";
324:     }
325: 
326:     $sString = str_replace("\r\n", "\n", $sString);
327:     $sString = str_replace("\r", "\n", $sString);
328:     if ($sLineEnding !== "\n") {
329:         $sString = str_replace("\n", $sLineEnding, $sString);
330:     }
331: 
332:     return $sString;
333: }
334:
Packages

Classes

Functions