1: <?php
2: /**
3: * This file contains various classes for content search.
4: * API to index a CONTENIDO article
5: * API to search in the index structure
6: * API to display the searchresults
7: *
8: * @package Core
9: * @subpackage Frontend_Search
10: * @version SVN Revision $Rev:$
11: *
12: * @author Willi Man
13: * @copyright four for business AG <www.4fb.de>
14: * @license http://www.contenido.org/license/LIZENZ.txt
15: * @link http://www.4fb.de
16: * @link http://www.contenido.org
17: */
18: defined('CON_FRAMEWORK') || die('Illegal call: Missing framework initialization - request aborted.');
19:
20: cInclude('includes', 'functions.encoding.php');
21:
22: /**
23: * Abstract base search class.
24: * Provides general properties and functions
25: * for child implementations.
26: *
27: * @author Murat Purc <murat@purc.de>
28: *
29: * @package Core
30: * @subpackage Frontend_Search
31: */
32: abstract class cSearchBaseAbstract {
33:
34: /**
35: * CONTENIDO database object
36: *
37: * @var cDb
38: */
39: protected $oDB;
40:
41: /**
42: * CONTENIDO configuration data
43: *
44: * @var array
45: */
46: protected $cfg;
47:
48: /**
49: * Language id of a client
50: *
51: * @var int
52: */
53: protected $lang;
54:
55: /**
56: * Client id
57: *
58: * @var int
59: */
60: protected $client;
61:
62: /**
63: * Initialises some properties
64: *
65: * @param cDb $oDB Optional database instance
66: * @param bool $bDebug Optional, flag to enable debugging (no longer needed)
67: */
68: protected function __construct($oDB = NULL, $bDebug = false) {
69: global $cfg, $lang, $client;
70:
71: $this->cfg = $cfg;
72: $this->lang = $lang;
73: $this->client = $client;
74:
75: $this->bDebug = $bDebug;
76:
77: if ($oDB == NULL || !is_object($oDB)) {
78: $this->db = cRegistry::getDb();
79: } else {
80: $this->db = $oDB;
81: }
82: }
83:
84: /**
85: * Main debug function, prints dumps parameter if debugging is enabled
86: *
87: * @param string $msg Some text
88: * @param mixed $var The variable to dump
89: */
90: protected function _debug($msg, $var) {
91: $dump = $msg . ': ';
92: if (is_array($var) || is_object($var)) {
93: $dump .= print_r($var, true);
94: } else {
95: $dump .= $var;
96: }
97: cDebug::out($dump);
98: }
99: }
100:
101: /**
102: * CONTENIDO API - Search Index Object
103: *
104: * This object creates an index of an article
105: *
106: * Create object with
107: * $oIndex = new SearchIndex($db); # where $db is the global CONTENIDO database
108: * object.
109: * Start indexing with
110: * $oIndex->start($idart, $aContent);
111: * where $aContent is the complete content of an article specified by its
112: * content types.
113: * It looks like
114: * Array (
115: * [CMS_HTMLHEAD] => Array (
116: * [1] => Herzlich Willkommen...
117: * [2] => ...auf Ihrer Website!
118: * )
119: * [CMS_HTML] => Array (
120: * [1] => Die Inhalte auf dieser Website ...
121: *
122: * The index for keyword 'willkommen' would look like '&12=1(CMS_HTMLHEAD-1)'
123: * which means the keyword 'willkommen' occurs 1 times in article with articleId
124: * 12 and content type CMS_HTMLHEAD[1].
125: *
126: * TODO: The basic idea of the indexing process is to take the complete content
127: * of an article and to generate normalized index terms
128: * from the content and to store a specific index structure in the relation
129: * 'con_keywords'.
130: * To take the complete content is not very flexible. It would be better to
131: * differentiate by specific content types or by any content.
132: * The &, =, () and - seperated string is not easy to parse to compute the
133: * search result set.
134: * It would be a better idea (and a lot of work) to extend the relation
135: * 'con_keywords' to store keywords by articleId (or content source identifier)
136: * and content type.
137: * The functions removeSpecialChars, setStopwords, setContentTypes and
138: * setCmsOptions should be sourced out into a new helper-class.
139: * Keep in mind that class Search and SearchResult uses an instance of object
140: * Index.
141: *
142: * @package Core
143: * @subpackage Frontend_Search
144: */
145: class cSearchIndex extends cSearchBaseAbstract {
146:
147: /**
148: * the content of the cms-types of an article
149: *
150: * @var array
151: */
152: protected $_keycode = array();
153:
154: /**
155: * the list of keywords of an article
156: *
157: * @var array
158: */
159: protected $_keywords = array();
160:
161: /**
162: * the words, which should not be indexed
163: *
164: * @var array
165: */
166: protected $_stopwords = array();
167:
168: /**
169: * the keywords of an article stored in the DB
170: *
171: * @var array
172: */
173: protected $_keywordsOld = array();
174:
175: /**
176: * the keywords to be deleted
177: *
178: * @var array
179: */
180: protected $_keywordsDel = array();
181:
182: /**
183: * 'auto' or 'self'
184: * The field 'auto' in table con_keywords is used for automatic indexing.
185: * The value is a string like "&12=2(CMS_HTMLHEAD-1,CMS_HTML-1)", which
186: * means a keyword occurs 2 times in article with $idart 12
187: * and can be found in CMS_HTMLHEAD[1] and CMS_HTML[1].
188: * The field 'self' can be used in the article properties to index the
189: * article manually.
190: *
191: * @var string
192: */
193: protected $_place;
194:
195: /**
196: * array of cms types
197: *
198: * @var array
199: */
200: protected $_cmsOptions = array();
201:
202: /**
203: * array of all available cms types
204: *
205: * htmlhead - HTML Headline
206: * html - HTML Text
207: * head - Headline (no HTML)
208: * text - Text (no HTML)
209: * img - Upload id of the element
210: * imgdescr - Image description
211: * link - Link (URL)
212: * linktarget - Linktarget (_self, _blank, _top ...)
213: * linkdescr - Linkdescription
214: * swf - Upload id of the element
215: * etc.
216: *
217: * @var array
218: */
219: protected $_cmsType = array();
220:
221: /**
222: * the suffix of all available cms types
223: *
224: * @var array
225: */
226: protected $_cmsTypeSuffix = array();
227:
228: /**
229: * Constructor, set object properties
230: *
231: * @param cDb $db CONTENIDO Database object
232: * @return void
233: */
234: public function __construct($db = NULL) {
235: parent::__construct($db);
236:
237: $this->setContentTypes();
238: }
239:
240: /**
241: * Start indexing the article.
242: *
243: * @param int $idart Article Id
244: * @param array $aContent The complete content of an article specified by
245: * its content types.
246: * It looks like
247: * Array (
248: * [CMS_HTMLHEAD] => Array (
249: * [1] => Herzlich Willkommen...
250: * [2] => ...auf Ihrer Website!
251: * )
252: * [CMS_HTML] => Array (
253: * [1] => Die Inhalte auf dieser Website ...
254: *
255: * @param string $place The field where to store the index information in
256: * db.
257: * @param array $cms_options One can specify explicitly cms types which
258: * should not be indexed.
259: * @param array $aStopwords Array with words which should not be indexed.
260: */
261: public function start($idart, $aContent, $place = 'auto', $cms_options = array(), $aStopwords = array()) {
262: if (!is_int((int) $idart) || $idart < 0) {
263: return;
264: } else {
265: $this->idart = $idart;
266: }
267:
268: $this->_place = $place;
269: $this->_keycode = $aContent;
270: $this->setStopwords($aStopwords);
271: $this->setCmsOptions($cms_options);
272:
273: $this->createKeywords();
274:
275: $this->getKeywords();
276:
277: $this->saveKeywords();
278:
279: $new_keys = array_keys($this->_keywords);
280: $old_keys = array_keys($this->_keywordsOld);
281:
282: $this->_keywordsDel = array_diff($old_keys, $new_keys);
283:
284: if (count($this->_keywordsDel) > 0) {
285: $this->deleteKeywords();
286: }
287: }
288:
289: /**
290: * for each cms-type create index structure.
291: * it looks like
292: * Array (
293: * [die] => CMS_HTML-1
294: * [inhalte] => CMS_HTML-1
295: * [auf] => CMS_HTML-1 CMS_HTMLHEAD-2
296: * [dieser] => CMS_HTML-1
297: * [website] => CMS_HTML-1 CMS_HTML-1 CMS_HTMLHEAD-2
298: * )
299: */
300: public function createKeywords() {
301: $tmp_keys = array();
302:
303: // Only create keycodes, if some are available
304: if (is_array($this->_keycode)) {
305: foreach ($this->_keycode as $idtype => $data) {
306: if ($this->checkCmsType($idtype)) {
307: foreach ($data as $typeid => $code) {
308: $this->_debug('code', $code);
309:
310: // remove backslash
311: $code = stripslashes($code);
312: // replace HTML line breaks with newlines
313: $code = str_ireplace(array(
314: '<br>',
315: '<br />'
316: ), "\n", $code);
317: // remove html tags
318: $code = strip_tags($code);
319: if (strlen($code) > 0) {
320: $code = conHtmlEntityDecode($code);
321: }
322: $this->_debug('code', $code);
323:
324: // split content by any number of commas or space
325: // characters
326: $tmp_keys = preg_split('/[\s,]+/', trim($code));
327: $this->_debug('tmp_keys', $tmp_keys);
328:
329: foreach ($tmp_keys as $value) {
330: // index terms are stored with lower case
331: // $value = strtolower($value);
332:
333: $value = htmlentities($value, ENT_COMPAT, 'UTF-8');
334: $value = trim(strtolower($value));
335: $value = html_entity_decode($value, ENT_COMPAT, 'UTF-8');
336:
337: if (!in_array($value, $this->_stopwords)) {
338: // eliminate stopwords
339: $value = $this->removeSpecialChars($value);
340:
341: if (strlen($value) > 1) {
342: // do not index single characters
343: $this->_keywords[$value] = $this->_keywords[$value] . $idtype . '-' . $typeid . ' ';
344: }
345: }
346: }
347: }
348: }
349:
350: unset($tmp_keys);
351: }
352: }
353:
354: $this->_debug('keywords', $this->_keywords);
355: }
356:
357: /**
358: * generate index_string from index structure and save keywords
359: * The index_string looks like "&12=2(CMS_HTMLHEAD-1,CMS_HTML-1)"
360: */
361: public function saveKeywords() {
362: $tmp_count = array();
363:
364: foreach ($this->_keywords as $keyword => $count) {
365: $tmp_count = preg_split('/[\s]/', trim($count));
366: $this->_debug('tmp_count', $tmp_count);
367:
368: $occurrence = count($tmp_count);
369: $tmp_count = array_unique($tmp_count);
370: $cms_types = implode(',', $tmp_count);
371: $index_string = '&' . $this->idart . '=' . $occurrence . '(' . $cms_types . ')';
372:
373: if (!array_key_exists($keyword, $this->_keywordsOld)) {
374: // if keyword is new, save index information
375: // $nextid = $this->db->nextid($this->cfg['tab']['keywords']);
376: $sql = "INSERT INTO " . $this->cfg['tab']['keywords'] . "
377: (keyword, " . $this->_place . ", idlang)
378: VALUES
379: ('" . $this->db->escape($keyword) . "', '" . $this->db->escape($index_string) . "', " . cSecurity::toInteger($this->lang) . ")";
380: } else {
381: // if keyword allready exists, create new index_string
382: if (preg_match("/&$this->idart=/", $this->_keywordsOld[$keyword])) {
383: $index_string = preg_replace("/&$this->idart=[0-9]+\([\w-,]+\)/", $index_string, $this->_keywordsOld[$keyword]);
384: } else {
385: $index_string = $this->_keywordsOld[$keyword] . $index_string;
386: }
387:
388: $sql = "UPDATE " . $this->cfg['tab']['keywords'] . "
389: SET " . $this->_place . " = '" . $index_string . "'
390: WHERE idlang='" . cSecurity::toInteger($this->lang) . "' AND keyword='" . $this->db->escape($keyword) . "'";
391: }
392: $this->_debug('sql', $sql);
393: $this->db->query($sql);
394: }
395: }
396:
397: /**
398: * if keywords don't occur in the article anymore, update index_string and
399: * delete keyword if necessary
400: */
401: public function deleteKeywords() {
402: foreach ($this->_keywordsDel as $key_del) {
403: $index_string = preg_replace("/&$this->idart=[0-9]+\([\w-,]+\)/", "", $this->_keywordsOld[$key_del]);
404:
405: if (strlen($index_string) == 0) {
406: // keyword is not referenced by any article
407: $sql = "DELETE FROM " . $this->cfg['tab']['keywords'] . "
408: WHERE idlang = " . cSecurity::toInteger($this->lang) . " AND keyword = '" . $this->db->escape($key_del) . "'";
409: } else {
410: $sql = "UPDATE " . $this->cfg['tab']['keywords'] . "
411: SET " . $this->_place . " = '" . $index_string . "'
412: WHERE idlang = " . cSecurity::toInteger($this->lang) . " AND keyword = '" . $this->db->escape($key_del) . "'";
413: }
414: $this->_debug('sql', $sql);
415: $this->db->query($sql);
416: }
417: }
418:
419: /**
420: * get the keywords of an article
421: */
422: public function getKeywords() {
423: $keys = implode("','", array_keys($this->_keywords));
424:
425: $sql = "SELECT
426: keyword, auto, self
427: FROM
428: " . $this->cfg['tab']['keywords'] . "
429: WHERE
430: idlang=" . cSecurity::toInteger($this->lang) . " AND
431: (keyword IN ('" . $keys . "') OR " . $this->_place . " REGEXP '&" . cSecurity::toInteger($this->idart) . "=')";
432:
433: $this->_debug('sql', $sql);
434:
435: $this->db->query($sql);
436:
437: $place = $this->_place;
438:
439: while ($this->db->nextRecord()) {
440: $this->_keywordsOld[$this->db->f('keyword')] = $this->db->f($place);
441: }
442: }
443:
444: /**
445: * remove special characters from index term
446: *
447: * @param string $key Keyword
448: * @return mixed
449: */
450: public function removeSpecialChars($key) {
451: $aSpecialChars = array(
452: /*"-",*/
453: "_",
454: "'",
455: ".",
456: "!",
457: "\"",
458: "#",
459: "$",
460: "%",
461: "&",
462: "(",
463: ")",
464: "*",
465: "+",
466: ",",
467: "/",
468: ":",
469: ";",
470: "<",
471: "=",
472: ">",
473: "?",
474: "@",
475: "[",
476: "\\",
477: "]",
478: "^",
479: "`",
480: "{",
481: "|",
482: "}",
483: "~",
484: "„"
485: );
486:
487: // for ($i = 127; $i < 192; $i++) {
488: // some other special characters
489: // $aSpecialChars[] = chr($i);
490: // }
491:
492: // TODO: The transformation of accented characters must depend on the
493: // selected encoding of the language of
494: // a client and should not be treated in this method.
495: // modified 2007-10-01, H. Librenz - added as hotfix for encoding
496: // problems (doesn't find any words with
497: // umlaut vowels in it since you turn on UTF-8 as language encoding)
498: $sEncoding = getEncodingByLanguage($this->db, $this->lang);
499:
500: if (strtolower($sEncoding) != 'iso-8859-2') {
501: $key = conHtmlentities($key, NULL, $sEncoding);
502: } else {
503: $key = htmlentities_iso88592($key);
504: }
505:
506: // $aUmlautMap = array(
507: // 'Ü' => 'ue',
508: // 'ü' => 'ue',
509: // 'Ä' => 'ae',
510: // 'ä' => 'ae',
511: // 'Ö' => 'oe',
512: // 'ö' => 'oe',
513: // 'ß' => 'ss'
514: // );
515:
516: // foreach ($aUmlautMap as $sUmlaut => $sMapped) {
517: // $key = str_replace($sUmlaut, $sMapped, $key);
518: // }
519:
520: $key = conHtmlEntityDecode($key);
521: $key = str_replace($aSpecialChars, '', $key);
522:
523: return $key;
524: }
525:
526: /**
527: *
528: * @param string $key Keyword
529: * @return string
530: */
531: public function addSpecialUmlauts($key) {
532: $key = conHtmlentities($key, NULL, getEncodingByLanguage($this->db, $this->lang));
533: $aUmlautMap = array(
534: 'ue' => 'Ü',
535: 'ue' => 'ü',
536: 'ae' => 'Ä',
537: 'ae' => 'ä',
538: 'oe' => 'Ö',
539: 'oe' => 'ö',
540: 'ss' => 'ß'
541: );
542:
543: foreach ($aUmlautMap as $sUmlaut => $sMapped) {
544: $key = str_replace($sUmlaut, $sMapped, $key);
545: }
546:
547: $key = conHtmlEntityDecode($key);
548: return $key;
549: }
550:
551: /**
552: * set the array of stopwords which should not be indexed
553: *
554: * @param array $aStopwords
555: */
556: public function setStopwords($aStopwords) {
557: if (is_array($aStopwords) && count($aStopwords) > 0) {
558: $this->_stopwords = $aStopwords;
559: }
560: }
561:
562: /**
563: * set the cms types
564: */
565: public function setContentTypes() {
566: $sql = "SELECT type, idtype FROM " . $this->cfg['tab']['type'] . ' ';
567: $this->_debug('sql', $sql);
568: $this->db->query($sql);
569: while ($this->db->nextRecord()) {
570: $this->_cmsType[$this->db->f('type')] = $this->db->f('idtype');
571: $this->_cmsTypeSuffix[$this->db->f('idtype')] = substr($this->db->f('type'), 4, strlen($this->db->f('type')));
572: }
573: }
574:
575: /**
576: * set the cms_options array of cms types which should be treated special
577: *
578: * @param mixed $cms_options
579: */
580: public function setCmsOptions($cms_options) {
581: if (is_array($cms_options) && count($cms_options) > 0) {
582: foreach ($cms_options as $opt) {
583: $opt = strtoupper($opt);
584:
585: if (strlen($opt) > 0) {
586: if (!stristr($opt, 'cms_')) {
587: if (in_array($opt, $this->_cmsTypeSuffix)) {
588: $this->_cmsOptions[$opt] = 'CMS_' . $opt;
589: }
590: } else {
591: if (array_key_exists($opt, $this->_cmsType)) {
592: $this->_cmsOptions[$opt] = $opt;
593: }
594: }
595: }
596: }
597: } else {
598: $this->_cmsOptions = array();
599: }
600: }
601:
602: /**
603: * check if the current cms type is in the cms_options array
604: *
605: * @param string $idtype
606: * @return boolean
607: */
608: public function checkCmsType($idtype) {
609: $idtype = strtoupper($idtype);
610: return (in_array($idtype, $this->_cmsOptions)) ? false : true;
611: }
612:
613: /**
614: *
615: * @return array the _cmsType property
616: */
617: public function getCmsType() {
618: return $this->_cmsType;
619: }
620:
621: /**
622: *
623: * @return array the _cmsTypeSuffix property
624: */
625: public function getCmsTypeSuffix() {
626: return $this->_cmsTypeSuffix;
627: }
628: }
629:
630: /**
631: * CONTENIDO API - Search Object
632: *
633: * This object starts a indexed fulltext search
634: *
635: * TODO:
636: * The way to set the search options could be done much more better!
637: * The computation of the set of searchable articles should not be treated in
638: * this class.
639: * It is better to compute the array of searchable articles from the outside and
640: * to pass the array of searchable articles as parameter.
641: * Avoid foreach loops.
642: *
643: * Use object with
644: *
645: * $options = array('db' => 'regexp', // use db function regexp
646: * 'combine' => 'or'); // combine searchwords with or
647: *
648: * The range of searchable articles is by default the complete content which is
649: * online and not protected.
650: *
651: * With option 'searchable_articles' you can define your own set of searchable
652: * articles.
653: * If parameter 'searchable_articles' is set the options 'cat_tree',
654: * 'categories', 'articles', 'exclude', 'artspecs',
655: * 'protected', 'dontshowofflinearticles' don't have any effect.
656: *
657: * $options = array('db' => 'regexp', // use db function regexp
658: * 'combine' => 'or', // combine searchwords with or
659: * 'searchable_articles' => array(5, 6, 9, 13));
660: *
661: * One can define the range of searchable articles by setting the parameter
662: * 'exclude' to false which means the range of categories
663: * defined by parameter 'cat_tree' or 'categories' and the range of articles
664: * defined by parameter 'articles' is included.
665: *
666: * $options = array('db' => 'regexp', // use db function regexp
667: * 'combine' => 'or', // combine searchwords with or
668: * 'exclude' => false, // => searchrange specified in 'cat_tree', 'categories'
669: * and 'articles' is included
670: * 'cat_tree' => array(12), // tree with root 12 included
671: * 'categories' => array(100,111), // categories 100, 111 included
672: * 'articles' => array(33), // article 33 included
673: * 'artspecs' => array(2, 3), // array of article specifications => search only
674: * articles with these artspecs
675: * 'res_per_page' => 2, // results per page
676: * 'protected' => true); // => do not search articles or articles in categories
677: * which are offline or protected
678: * 'dontshowofflinearticles' => false); // => search offline articles or
679: * articles in categories which are offline
680: *
681: * You can build the complement of the range of searchable articles by setting
682: * the parameter 'exclude' to true which means the range of categories
683: * defined by parameter 'cat_tree' or 'categories' and the range of articles
684: * defined by parameter 'articles' is excluded from search.
685: *
686: * $options = array('db' => 'regexp', // use db function regexp
687: * 'combine' => 'or', // combine searchwords with or
688: * 'exclude' => true, // => searchrange specified in 'cat_tree', 'categories'
689: * and 'articles' is excluded
690: * 'cat_tree' => array(12), // tree with root 12 excluded
691: * 'categories' => array(100,111), // categories 100, 111 excluded
692: * 'articles' => array(33), // article 33 excluded
693: * 'artspecs' => array(2, 3), // array of article specifications => search only
694: * articles with these artspecs
695: * 'res_per_page' => 2, // results per page
696: * 'protected' => true); // => do not search articles or articles in categories
697: * which are offline or protected
698: * 'dontshowofflinearticles' => false); // => search offline articles or
699: * articles in categories which are offline
700: *
701: * $search = new Search($options);
702: *
703: * $cms_options = array("htmlhead", "html", "head", "text", "imgdescr", "link",
704: * "linkdescr");
705: * search only in these cms-types
706: * $search->setCmsOptions($cms_options);
707: *
708: * $search_result = $search->searchIndex($searchword, $searchwordex); // start
709: * search
710: *
711: * The search result structure has following form
712: * Array (
713: * [20] => Array (
714: * [CMS_HTML] => Array (
715: * [0] => 1
716: * [1] => 1
717: * [2] => 1
718: * )
719: * [keyword] => Array (
720: * [0] => content
721: * [1] => contenido
722: * [2] => wwwcontenidoorg
723: * )
724: * [search] => Array (
725: * [0] => con
726: * [1] => con
727: * [2] => con
728: * )
729: * [occurence] => Array (
730: * [0] => 1
731: * [1] => 5
732: * [2] => 1
733: * )
734: * [similarity] => 60
735: * )
736: * )
737: *
738: * The keys of the array are the article ID's found by search.
739: *
740: * Searching 'con' matches keywords 'content', 'contenido' and 'wwwcontenidoorg'
741: * in article with ID 20 in content type CMS_HTML[1].
742: * The search term occurs 7 times.
743: * The maximum similarity between searchterm and matching keyword is 60%.
744: *
745: * with $oSearchResults = new cSearchResult($search_result, 10);
746: * one can rank and display the results
747: *
748: * @package Core
749: * @subpackage Frontend_Search
750: */
751: class cSearch extends cSearchBaseAbstract {
752:
753: /**
754: * Instance of class Index
755: *
756: * @var object
757: */
758: protected $_index;
759:
760: /**
761: * array of available cms types
762: *
763: * @var array
764: */
765: protected $_cmsType = array();
766:
767: /**
768: * suffix of available cms types
769: *
770: * @var array
771: */
772: protected $_cmsTypeSuffix = array();
773:
774: /**
775: * the search words
776: *
777: * @var array
778: */
779: protected $_searchWords = array();
780:
781: /**
782: * the words which should be excluded from search
783: *
784: * @var array
785: */
786: protected $_searchWordsExclude = array();
787:
788: /**
789: * type of db search
790: * like => 'sql like', regexp => 'sql regexp'
791: *
792: * @var string
793: */
794: protected $_searchOption;
795:
796: /**
797: * logical combination of searchwords (and, or)
798: *
799: * @var string
800: */
801: protected $_searchCombination;
802:
803: /**
804: * array of searchable articles
805: *
806: * @var array
807: */
808: protected $_searchableArts = array();
809:
810: /**
811: * article specifications
812: *
813: * @var array
814: */
815: protected $_articleSpecs = array();
816:
817: /**
818: * If $protected = true => do not search articles which are offline or
819: * articles in catgeories which are offline (protected)
820: *
821: * @var boolean
822: */
823: protected $_protected;
824:
825: /**
826: * If $dontshowofflinearticles = false => search offline articles or
827: * articles in categories which are offline
828: *
829: * @var boolean
830: */
831: protected $_dontshowofflinearticles;
832:
833: /**
834: * If $exclude = true => the specified search range is excluded from search,
835: * otherwise included
836: *
837: * @var boolean
838: */
839: protected $_exclude;
840:
841: /**
842: * Array of article id's with information about cms-types, occurence of
843: * keyword/searchword, similarity .
844: *
845: *
846: *
847: * @var array
848: */
849: protected $_searchResult = array();
850:
851: /**
852: * Constructor
853: *
854: * @param array $options $options['db'] 'regexp' => DB search with REGEXP;
855: * 'like' => DB search with LIKE; 'exact' => exact match;
856: * $options['combine'] 'and', 'or' Combination of search words with
857: * AND, OR
858: * $options['exclude'] 'true' => searchrange specified in 'cat_tree',
859: * 'categories' and 'articles' is excluded; 'false' =>
860: * searchrange specified in 'cat_tree', 'categories' and
861: * 'articles' is included
862: * $options['cat_tree'] e.g. array(8) => The complete tree with root
863: * 8 is in/excluded from search
864: * $options['categories'] e.g. array(10, 12) => Categories 10, 12
865: * in/excluded
866: * $options['articles'] e.g. array(23) => Article 33 in/excluded
867: * $options['artspecs'] => e.g. array(2, 3) => search only articles
868: * with certain article specifications
869: * $options['protected'] 'true' => do not search articles which are
870: * offline (locked) or articles in catgeories which are offline
871: * (protected)
872: * $options['dontshowofflinearticles'] 'false' => search offline
873: * articles or articles in categories which are offline
874: * $options['searchable_articles'] array of article ID's which should
875: * be searchable
876: * @param cDb $db Optional database instance
877: */
878: public function __construct($options, $db = NULL) {
879: parent::__construct($db);
880:
881: $this->_index = new cSearchIndex($db);
882:
883: $this->_cmsType = $this->_index->cms_type;
884: $this->_cmsTypeSuffix = $this->_index->cms_type_suffix;
885:
886: $this->_searchOption = (array_key_exists('db', $options)) ? strtolower($options['db']) : 'regexp';
887: $this->_searchCombination = (array_key_exists('combine', $options)) ? strtolower($options['combine']) : 'or';
888: $this->_protected = (array_key_exists('protected', $options)) ? $options['protected'] : true;
889: $this->_dontshowofflinearticles = (array_key_exists('dontshowofflinearticles', $options)) ? $options['dontshowofflinearticles'] : false;
890: $this->_exclude = (array_key_exists('exclude', $options)) ? $options['exclude'] : true;
891: $this->_articleSpecs = (array_key_exists('artspecs', $options) && is_array($options['artspecs'])) ? $options['artspecs'] : array();
892: $this->_index->setCmsOptions($this->_cmsTypeSuffix);
893:
894: if (array_key_exists('searchable_articles', $options) && is_array($options['searchable_articles'])) {
895: $this->_searchableArts = $options['searchable_articles'];
896: } else {
897: $this->_searchableArts = $this->getSearchableArticles($options);
898: }
899:
900: // minimum similarity between searchword and keyword in percent
901: $this->intMinimumSimilarity = 50;
902: }
903:
904: /**
905: * indexed fulltext search
906: *
907: * @param string $searchwords The search words
908: * @param string $searchwords_exclude The words, which should be excluded
909: * from search
910: * @return boolean multitype:
911: */
912: public function searchIndex($searchwords, $searchwords_exclude = '') {
913: if (strlen(trim($searchwords)) > 0) {
914: $this->_searchWords = $this->stripWords($searchwords);
915: } else {
916: return false;
917: }
918:
919: if (strlen(trim($searchwords_exclude)) > 0) {
920: $this->_searchWordsExclude = $this->stripWords($searchwords_exclude);
921: }
922:
923: $tmp_searchwords = array();
924: foreach ($this->_searchWords as $word) {
925: $wordEscaped = $this->db->escape($word);
926: if ($this->_searchOption == 'like') {
927: $wordEscaped = "'%" . $wordEscaped . "%'";
928: } elseif ($this->_searchOption == 'exact') {
929: $wordEscaped = "'" . $wordEscaped . "'";
930: }
931: $tmp_searchwords[] = $word;
932: }
933:
934: if (count($this->_searchWordsExclude) > 0) {
935: foreach ($this->_searchWordsExclude as $word) {
936: $wordEscaped = $this->db->escape($word);
937: if ($this->_searchOption == 'like') {
938: $wordEscaped = "'%" . $wordEscaped . "%'";
939: } elseif ($this->_searchOption == 'exact') {
940: $wordEscaped = "'" . $wordEscaped . "'";
941: }
942: $tmp_searchwords[] = $wordEscaped;
943: $this->_searchWords[] = $word;
944: }
945: }
946:
947: if ($this->_searchOption == 'regexp') {
948: // regexp search
949: $kwSql = "keyword REGEXP '" . implode('|', $tmp_searchwords) . "'";
950: } elseif ($this->_searchOption == 'like') {
951: // like search
952: $search_like = implode(" OR keyword LIKE ", $tmp_searchwords);
953: $kwSql = "keyword LIKE '" . $search_like;
954: } elseif ($this->_searchOption == 'exact') {
955: // exact match
956: $search_exact = implode(" OR keyword = ", $tmp_searchwords);
957: $kwSql = "keyword LIKE '" . $search_exact;
958: }
959:
960: $sql = "SELECT keyword, auto FROM " . $this->cfg['tab']['keywords'] . " WHERE idlang=" . cSecurity::toInteger($this->lang) . " AND " . $kwSql . " ";
961: $this->_debug('sql', $sql);
962: $this->db->query($sql);
963:
964: while ($this->db->nextRecord()) {
965:
966: $tmp_index_string = preg_split('/&/', $this->db->f('auto'), -1, PREG_SPLIT_NO_EMPTY);
967:
968: $this->_debug('index', $this->db->f('auto'));
969:
970: $tmp_index = array();
971: foreach ($tmp_index_string as $string) {
972: $tmp_string = preg_replace('/[=\(\)]/', ' ', $string);
973: $tmp_index[] = preg_split('/\s/', $tmp_string, -1, PREG_SPLIT_NO_EMPTY);
974: }
975: $this->_debug('tmp_index', $tmp_index);
976:
977: foreach ($tmp_index as $string) {
978: $artid = $string[0];
979:
980: // filter nonsearchable articles
981: if (in_array($artid, $this->_searchableArts)) {
982:
983: $cms_place = $string[2];
984: $keyword = $this->db->f('keyword');
985: $percent = 0;
986: $similarity = 0;
987: foreach ($this->_searchWords as $word) {
988: // computes similarity between searchword and keyword in
989: // percent
990: similar_text($word, $keyword, $percent);
991: if ($percent > $similarity) {
992: $similarity = $percent;
993: $searchword = $word;
994: }
995: }
996:
997: $tmp_cmstype = preg_split('/[,]/', $cms_place, -1, PREG_SPLIT_NO_EMPTY);
998: $this->_debug('tmp_cmstype', $tmp_cmstype);
999:
1000: $tmp_cmstype2 = array();
1001: foreach ($tmp_cmstype as $type) {
1002: $tmp_cmstype2[] = preg_split('/-/', $type, -1, PREG_SPLIT_NO_EMPTY);
1003: }
1004: $this->_debug('tmp_cmstype2', $tmp_cmstype2);
1005:
1006: foreach ($tmp_cmstype2 as $type) {
1007: if (!$this->_index->checkCmsType($type[0])) {
1008: // search for specified cms-types
1009: if ($similarity >= $this->intMinimumSimilarity) {
1010: // include article into searchresult set only if
1011: // similarity between searchword and keyword is
1012: // big enough
1013: $this->_searchResult[$artid][$type[0]][] = $type[1];
1014: $this->_searchResult[$artid]['keyword'][] = $this->db->f('keyword');
1015: $this->_searchResult[$artid]['search'][] = $searchword;
1016: $this->_searchResult[$artid]['occurence'][] = $string[1];
1017: $this->_searchResult[$artid]['debug_similarity'][] = $percent;
1018: if ($similarity > $this->_searchResult[$artid]['similarity']) {
1019: $this->_searchResult[$artid]['similarity'] = $similarity;
1020: }
1021: }
1022: }
1023: }
1024: }
1025: }
1026: }
1027:
1028: if ($this->_searchCombination == 'and') {
1029: // all search words must appear in the article
1030: foreach ($this->_searchResult as $article => $val) {
1031: if (!count(array_diff($this->_searchWords, $val['search'])) == 0) {
1032: // $this->rank_structure[$article] = $rank[$article];
1033: unset($this->_searchResult[$article]);
1034: }
1035: }
1036: }
1037:
1038: if (count($this->_searchWordsExclude) > 0) {
1039: // search words to be excluded must not appear in article
1040: foreach ($this->_searchResult as $article => $val) {
1041: if (!count(array_intersect($this->_searchWordsExclude, $val['search'])) == 0) {
1042: // $this->rank_structure[$article] = $rank[$article];
1043: unset($this->_searchResult[$article]);
1044: }
1045: }
1046: }
1047:
1048: $this->_debug('$this->search_result', $this->_searchResult);
1049: $this->_debug('$this->searchable_arts', $this->_searchableArts);
1050:
1051: return $this->_searchResult;
1052: }
1053:
1054: /**
1055: *
1056: * @param mixed $cms_options The cms-types (htmlhead, html, ...) which
1057: * should
1058: * explicitly be searched
1059: */
1060: public function setCmsOptions($cms_options) {
1061: if (is_array($cms_options) && count($cms_options) > 0) {
1062: $this->_index->setCmsOptions($cms_options);
1063: }
1064: }
1065:
1066: /**
1067: *
1068: * @param string $searchwords The search-words
1069: * @return array of stripped search-words
1070: */
1071: public function stripWords($searchwords) {
1072: // remove backslash and html tags
1073: $searchwords = trim(strip_tags(stripslashes($searchwords)));
1074:
1075: // split the phrase by any number of commas or space characters
1076: $tmp_words = preg_split('/[\s,]+/', $searchwords);
1077:
1078: $tmp_searchwords = array();
1079:
1080: foreach ($tmp_words as $word) {
1081:
1082: $word = htmlentities($word, ENT_COMPAT, 'UTF-8');
1083: $word = (trim(strtolower($word)));
1084: $word = html_entity_decode($word, ENT_COMPAT, 'UTF-8');
1085:
1086: // $word =(trim(strtolower($word)));
1087: if (strlen($word) > 1) {
1088: $tmp_searchwords[] = $word;
1089: }
1090: }
1091:
1092: return array_unique($tmp_searchwords);
1093: }
1094:
1095: /**
1096: * Returns the category tree array.
1097: *
1098: * @param int $cat_start Root of a category tree
1099: * @return array Category Tree
1100: * @todo This is not the job for search, should be outsourced ...
1101: */
1102: public function getSubTree($cat_start) {
1103: $sql = "SELECT
1104: B.idcat, B.parentid
1105: FROM
1106: " . $this->cfg['tab']['cat_tree'] . " AS A,
1107: " . $this->cfg['tab']['cat'] . " AS B,
1108: " . $this->cfg['tab']['cat_lang'] . " AS C
1109: WHERE
1110: A.idcat = B.idcat AND
1111: B.idcat = C.idcat AND
1112: C.idlang = '" . cSecurity::toInteger($this->lang) . "' AND
1113: B.idclient = '" . cSecurity::toInteger($this->client) . "'
1114: ORDER BY
1115: idtree";
1116: $this->_debug('sql', $sql);
1117: $this->db->query($sql);
1118:
1119: // $aSubCats = array();
1120: // $i = false;
1121: // while ($this->db->nextRecord()) {
1122: // if ($this->db->f('parentid') < $cat_start) {
1123: // // ending part of tree
1124: // $i = false;
1125: // }
1126: // if ($this->db->f('idcat') == $cat_start) {
1127: // // starting part of tree
1128: // $i = true;
1129: // }
1130: // if ($i == true) {
1131: // $aSubCats[] = $this->db->f('idcat');
1132: // }
1133: // }
1134:
1135: $aSubCats = array(
1136: $cat_start
1137: );
1138: while ($this->db->nextRecord()) {
1139: // ommit if cat is no child of any recognized descendant
1140: if (!in_array($this->db->f('parentid'), $aSubCats)) {
1141: continue;
1142: }
1143: // ommit if cat is already recognized (happens with $cat_start)
1144: if (in_array($this->db->f('idcat'), $aSubCats)) {
1145: continue;
1146: }
1147: // add cat as recognized descendant
1148: $aSubCats[] = $this->db->f('idcat');
1149: }
1150:
1151: return $aSubCats;
1152: }
1153:
1154: /**
1155: * Returns list of searchable article ids.
1156: *
1157: * @param array $search_range
1158: * @return array Articles in specified search range
1159: */
1160: public function getSearchableArticles($search_range) {
1161: $aCatRange = array();
1162: if (array_key_exists('cat_tree', $search_range) && is_array($search_range['cat_tree'])) {
1163: if (count($search_range['cat_tree']) > 0) {
1164: foreach ($search_range['cat_tree'] as $cat) {
1165: $aCatRange = array_merge($aCatRange, $this->getSubTree($cat));
1166: }
1167: }
1168: }
1169:
1170: if (array_key_exists('categories', $search_range) && is_array($search_range['categories'])) {
1171: if (count($search_range['categories']) > 0) {
1172: $aCatRange = array_merge($aCatRange, $search_range['categories']);
1173: }
1174: }
1175:
1176: $aCatRange = array_unique($aCatRange);
1177: $sCatRange = implode("','", $aCatRange);
1178:
1179: if (array_key_exists('articles', $search_range) && is_array($search_range['articles'])) {
1180: if (count($search_range['articles']) > 0) {
1181: $sArtRange = implode("','", $search_range['articles']);
1182: } else {
1183: $sArtRange = '';
1184: }
1185: }
1186:
1187: if ($this->_protected == true) {
1188: $sProtected = " C.public = 1 AND C.visible = 1 AND B.online = 1 ";
1189: } else {
1190: if ($this->_dontshowofflinearticles == true) {
1191: $sProtected = " C.visible = 1 AND B.online = 1 ";
1192: } else {
1193: $sProtected = " 1 ";
1194: }
1195: }
1196:
1197: if ($this->_exclude == true) {
1198: // exclude searchrange
1199: $sSearchRange = " A.idcat NOT IN ('" . $sCatRange . "') AND B.idart NOT IN ('" . $sArtRange . "') AND ";
1200: } else {
1201: // include searchrange
1202: if (strlen($sArtRange) > 0) {
1203: $sSearchRange = " A.idcat IN ('" . $sCatRange . "') AND B.idart IN ('" . $sArtRange . "') AND ";
1204: } else {
1205: $sSearchRange = " A.idcat IN ('" . $sCatRange . "') AND ";
1206: }
1207: }
1208:
1209: if (count($this->_articleSpecs) > 0) {
1210: $sArtSpecs = " B.artspec IN ('" . implode("','", $this->_articleSpecs) . "') AND ";
1211: } else {
1212: $sArtSpecs = '';
1213: }
1214:
1215: $sql = "SELECT
1216: A.idart
1217: FROM
1218: " . $this->cfg["tab"]["cat_art"] . " as A,
1219: " . $this->cfg["tab"]["art_lang"] . " as B,
1220: " . $this->cfg["tab"]["cat_lang"] . " as C
1221: WHERE
1222: " . $sSearchRange . "
1223: B.idlang = '" . cSecurity::toInteger($this->lang) . "' AND
1224: C.idlang = '" . cSecurity::toInteger($this->lang) . "' AND
1225: A.idart = B.idart AND
1226: B.searchable = 1 AND
1227: A.idcat = C.idcat AND
1228: " . $sArtSpecs . "
1229: " . $sProtected . " ";
1230: $this->_debug('sql', $sql);
1231: $this->db->query($sql);
1232:
1233: $aIdArts = array();
1234: while ($this->db->nextRecord()) {
1235: $aIdArts[] = $this->db->f('idart');
1236: }
1237: return $aIdArts;
1238: }
1239:
1240: /**
1241: * Fetch all article specifications which are online,
1242: *
1243: * @return array Array of article specification Ids
1244: */
1245: public function getArticleSpecifications() {
1246: $sql = "SELECT
1247: idartspec
1248: FROM
1249: " . $this->cfg['tab']['art_spec'] . "
1250: WHERE
1251: client = " . cSecurity::toInteger($this->client) . " AND
1252: lang = " . cSecurity::toInteger($this->lang) . " AND
1253: online = 1 ";
1254: $this->_debug('sql', $sql);
1255: $this->db->query($sql);
1256: $aArtspec = array();
1257: while ($this->db->nextRecord()) {
1258: $aArtspec[] = $this->db->f('idartspec');
1259: }
1260: return $aArtspec;
1261: }
1262:
1263: /**
1264: * Set article specification
1265: *
1266: * @param int $iArtspecID
1267: */
1268: public function setArticleSpecification($iArtspecID) {
1269: $this->_articleSpecs[] = $iArtspecID;
1270: }
1271:
1272: /**
1273: * Add all article specifications matching name of article specification
1274: * (client dependent but language independent)
1275: *
1276: * @param string $sArtSpecName
1277: * @return boolean
1278: */
1279: public function addArticleSpecificationsByName($sArtSpecName) {
1280: if (!isset($sArtSpecName) || strlen($sArtSpecName) == 0) {
1281: return false;
1282: }
1283:
1284: $sql = "SELECT
1285: idartspec
1286: FROM
1287: " . $this->cfg['tab']['art_spec'] . "
1288: WHERE
1289: client = " . cSecurity::toInteger($this->client) . " AND
1290: artspec = '" . $this->db->escape($sArtSpecName) . "' ";
1291: $this->_debug('sql', $sql);
1292: $this->db->query($sql);
1293: while ($this->db->nextRecord()) {
1294: $this->_articleSpecs[] = $this->db->f('idartspec');
1295: }
1296: }
1297: }
1298:
1299: /**
1300: * CONTENIDO API - SearchResult Object
1301: *
1302: * This object ranks and displays the result of the indexed fulltext search.
1303: * If you are not comfortable with this API feel free to use your own methods to
1304: * display the search results.
1305: * The search result is basically an array with article ID's.
1306: *
1307: * If $search_result = $search->searchIndex($searchword, $searchwordex);
1308: *
1309: * use object with
1310: *
1311: * $oSearchResults = new cSearchResult($search_result, 10);
1312: *
1313: * $oSearchResults->setReplacement('<span style="color:red">', '</span>'); //
1314: * html-tags to emphasize the located searchwords
1315: *
1316: * $num_res = $oSearchResults->getNumberOfResults();
1317: * $num_pages = $oSearchResults->getNumberOfPages();
1318: * $res_page = $oSearchResults->getSearchResultPage(1); // first result page
1319: * foreach ($res_page as $key => $val) {
1320: * $headline = $oSearchResults->getSearchContent($key, 'HTMLHEAD');
1321: * $first_headline = $headline[0];
1322: * $text = $oSearchResults->getSearchContent($key, 'HTML');
1323: * $first_text = $text[0];
1324: * $similarity = $oSearchResults->getSimilarity($key);
1325: * $iOccurrence = $oSearchResults->getOccurrence($key);
1326: * }
1327: *
1328: * @package Core
1329: * @subpackage Frontend_Search
1330: *
1331: */
1332: class cSearchResult extends cSearchBaseAbstract {
1333:
1334: /**
1335: * Instance of class Index
1336: *
1337: * @var object
1338: */
1339: protected $_index;
1340:
1341: /**
1342: * Number of results
1343: *
1344: * @var int
1345: */
1346: protected $_results;
1347:
1348: /**
1349: * Number of result pages
1350: *
1351: * @var int
1352: */
1353: protected $_pages;
1354:
1355: /**
1356: * Current result page
1357: *
1358: * @var int
1359: */
1360: protected $_resultPage;
1361:
1362: /**
1363: * Results per page to display
1364: *
1365: * @var int
1366: */
1367: protected $_resultPerPage;
1368:
1369: /**
1370: * Array of html-tags to emphasize the searchwords
1371: *
1372: * @var array
1373: */
1374: protected $_replacement = array();
1375:
1376: /**
1377: * Array of article id's with ranking information
1378: *
1379: * @var array
1380: */
1381: protected $_rankStructure = array();
1382:
1383: /**
1384: * Array of result-pages with array's of article id's
1385: *
1386: * @var array
1387: */
1388: protected $_orderedSearchResult = array();
1389:
1390: /**
1391: * Array of article id's with information about cms-types, occurence of
1392: * keyword/searchword, similarity .
1393: *
1394: *
1395: *
1396: *
1397: * @var array
1398: */
1399: protected $_searchResult = array();
1400:
1401: /**
1402: * Compute ranking factor for each search result and order the search
1403: * results by ranking factor
1404: * NOTE: The ranking factor is the sum of occurences of matching searchterms
1405: * weighted by similarity (in %) between searchword
1406: * and matching word in the article.
1407: * TODO: One can think of more sophisticated ranking strategies. One could
1408: * use the content type information for example
1409: * because a matching word in the headline (CMS_HEADLINE[1]) could be
1410: * weighted more than a matching word in the text (CMS_HTML[1]).
1411: *
1412: * @param array $search_result List of article ids
1413: * @param int $result_per_page Number of items per page
1414: * @param cDb $oDB Optional db instance
1415: * @param bool $bDebug Optional flag to enable debugging
1416: */
1417: public function __construct($search_result, $result_per_page, $oDB = NULL, $bDebug = false) {
1418: parent::__construct($oDB, $bDebug);
1419:
1420: $this->_index = new cSearchIndex($oDB);
1421:
1422: $this->_searchResult = $search_result;
1423: $this->_debug('$this->search_result', $this->_searchResult);
1424:
1425: $this->_resultPerPage = $result_per_page;
1426: $this->_results = count($this->_searchResult);
1427:
1428: // compute ranking factor for each search result
1429: foreach ($this->_searchResult as $article => $val) {
1430: $this->_rankStructure[$article] = $this->getOccurrence($article) * ($this->getSimilarity($article) / 100);
1431: }
1432: $this->_debug('$this->rank_structure', $this->_rankStructure);
1433:
1434: $this->setOrderedSearchResult($this->_rankStructure, $this->_resultPerPage);
1435: $this->_pages = count($this->_orderedSearchResult);
1436: $this->_debug('$this->ordered_search_result', $this->_orderedSearchResult);
1437: }
1438:
1439: /**
1440: *
1441: * @param array $ranked_search
1442: * @param int $result_per_page
1443: */
1444: public function setOrderedSearchResult($ranked_search, $result_per_page) {
1445: asort($ranked_search);
1446:
1447: $sorted_rank = array_reverse($ranked_search, true);
1448:
1449: if (isset($result_per_page) && $result_per_page > 0) {
1450: $split_result = array_chunk($sorted_rank, $result_per_page, true);
1451: $this->_orderedSearchResult = $split_result;
1452: } else {
1453: $this->_orderedSearchResult[] = $sorted_rank;
1454: }
1455: }
1456:
1457: /**
1458: *
1459: * @param int $art_id Id of an article
1460: * @param string $cms_type
1461: * @param int $id
1462: * @return string Content of an article, specified by it's content type
1463: */
1464: public function getContent($art_id, $cms_type, $id = 0) {
1465: $article = new cApiArticleLanguage();
1466: $article->loadByArticleAndLanguageId($art_id, $this->lang, true);
1467: return $article->getContent($cms_type, $id);
1468: }
1469:
1470: /**
1471: *
1472: * @param int $art_id Id of an article
1473: * @param string $cms_type Content type
1474: * @param int $cms_nr
1475: * @return string Content of an article in search result, specified by its
1476: * type
1477: */
1478: public function getSearchContent($art_id, $cms_type, $cms_nr = NULL) {
1479: $cms_type = strtoupper($cms_type);
1480: if (strlen($cms_type) > 0) {
1481: if (!stristr($cms_type, 'cms_')) {
1482: if (in_array($cms_type, $this->_index->getCmsTypeSuffix())) {
1483: $cms_type = 'CMS_' . $cms_type;
1484: }
1485: } else {
1486: if (!array_key_exists($cms_type, $this->_index->getCmsType())) {
1487: return array();
1488: }
1489: }
1490: }
1491:
1492: $article = new cApiArticleLanguage();
1493: $article->loadByArticleAndLanguageId($art_id, $this->lang, true);
1494: $content = array();
1495: if (isset($this->_searchResult[$art_id][$cms_type])) {
1496: // if searchword occurs in cms_type
1497: $search_words = $this->_searchResult[$art_id]['search'];
1498: $search_words = array_unique($search_words);
1499:
1500: $id_type = $this->_searchResult[$art_id][$cms_type];
1501: $id_type = array_unique($id_type);
1502:
1503: if (isset($cms_nr) && is_numeric($cms_nr)) {
1504: // get content of cms_type[cms_nr]
1505: // build consistent escaped string(Timo Trautmann) 2008-04-17
1506: $cms_content = conHtmlentities(conHtmlEntityDecode(strip_tags($article->getContent($cms_type, $cms_nr))));
1507: if (count($this->_replacement) == 2) {
1508: foreach ($search_words as $word) {
1509: // build consistent escaped string, replace ae ue ..
1510: // with original html entities (Timo Trautmann)
1511: // 2008-04-17
1512: $word = conHtmlentities(conHtmlEntityDecode($this->_index->addSpecialUmlauts($word)));
1513: $match = array();
1514: preg_match("/$word/i", $cms_content, $match);
1515: if (isset($match[0])) {
1516: $pattern = $match[0];
1517: $replacement = $this->_replacement[0] . $pattern . $this->_replacement[1];
1518: $cms_content = preg_replace("/$pattern/i", $replacement, $cms_content); // emphasize
1519: // located
1520: // searchwords
1521: }
1522: }
1523: }
1524: $content[] = htmlspecialchars_decode($cms_content);
1525: } else {
1526: // get content of cms_type[$id], where $id are the cms_type
1527: // numbers found in search
1528: foreach ($id_type as $id) {
1529: $cms_content = strip_tags($article->getContent($cms_type, $id));
1530:
1531: if (count($this->_replacement) == 2) {
1532: foreach ($search_words as $word) {
1533: preg_match("/$word/i", $cms_content, $match);
1534: if (isset($match[0])) {
1535: $pattern = $match[0];
1536: $replacement = $this->_replacement[0] . $pattern . $this->_replacement[1];
1537: $cms_content = preg_replace("/$pattern/i", $replacement, $cms_content); // emphasize
1538: // located
1539: // searchwords
1540: }
1541: }
1542: }
1543: $content[] = $cms_content;
1544: }
1545: }
1546: } else {
1547: // searchword was not found in cms_type
1548: if (isset($cms_nr) && is_numeric($cms_nr)) {
1549: $content[] = strip_tags($article->getContent($cms_type, $cms_nr));
1550: } else {
1551: $art_content = $article->getContent($cms_type);
1552: if (count($art_content) > 0) {
1553: foreach ($art_content as $val) {
1554: $content[] = strip_tags($val);
1555: }
1556: }
1557: }
1558: }
1559: return $content;
1560: }
1561:
1562: /**
1563: * Returns articles in page.
1564: *
1565: * @param int $page_id
1566: * @return array Articles in page $page_id
1567: */
1568: public function getSearchResultPage($page_id) {
1569: $this->_resultPage = $page_id;
1570: $result_page = $this->_orderedSearchResult[$page_id - 1];
1571: return $result_page;
1572: }
1573:
1574: /**
1575: * Returns number of result pages
1576: *
1577: * @return int
1578: */
1579: public function getNumberOfPages() {
1580: return $this->_pages;
1581: }
1582:
1583: /**
1584: * Returns number of results
1585: *
1586: * @return int
1587: */
1588: public function getNumberOfResults() {
1589: return $this->_results;
1590: }
1591:
1592: /**
1593: *
1594: * @param int $art_id Id of an article
1595: * @return int Similarity between searchword and matching word in article
1596: */
1597: public function getSimilarity($art_id) {
1598: return $this->_searchResult[$art_id]['similarity'];
1599: }
1600:
1601: /**
1602: *
1603: * @param int $art_id Id of an article
1604: * @return number of matching searchwords found in article
1605: */
1606: public function getOccurrence($art_id) {
1607: $aOccurence = $this->_searchResult[$art_id]['occurence'];
1608: $iSumOfOccurence = 0;
1609: for ($i = 0; $i < count($aOccurence); $i++) {
1610: $iSumOfOccurence += $aOccurence[$i];
1611: }
1612:
1613: return $iSumOfOccurence;
1614: }
1615:
1616: /**
1617: *
1618: * @param string $rep1 The opening html-tag to emphasize the searchword e.g.
1619: * '<b>'
1620: * @param string $rep2 The closing html-tag e.g. '</b>'
1621: * @return void
1622: */
1623: public function setReplacement($rep1, $rep2) {
1624: if (strlen(trim($rep1)) > 0 && strlen(trim($rep2)) > 0) {
1625: $this->_replacement[] = $rep1;
1626: $this->_replacement[] = $rep2;
1627: }
1628: }
1629:
1630: /**
1631: *
1632: * @param int $artid
1633: * @return int Category Id
1634: * @todo Is not job of search, should be outsourced!
1635: */
1636: public function getArtCat($artid) {
1637: $sql = "SELECT idcat FROM " . $this->cfg['tab']['cat_art'] . "
1638: WHERE idart = " . cSecurity::toInteger($artid) . " ";
1639: $this->db->query($sql);
1640: if ($this->db->nextRecord()) {
1641: return $this->db->f('idcat');
1642: }
1643: }
1644: }
1645: