1: <?php
2: /**
3: * This file contains various classes for content search.
4: * API to index a CONTENIDO article
5: * API to search in the index structure
6: * API to display the searchresults
7: *
8: * @package Core
9: * @subpackage Frontend_Search
10: * @version SVN Revision $Rev:$
11: *
12: * @author Willi Man
13: * @copyright four for business AG <www.4fb.de>
14: * @license http://www.contenido.org/license/LIZENZ.txt
15: * @link http://www.4fb.de
16: * @link http://www.contenido.org
17: */
18: defined('CON_FRAMEWORK') || die('Illegal call: Missing framework initialization - request aborted.');
19:
20: cInclude('includes', 'functions.encoding.php');
21:
22: /**
23: * Abstract base search class.
24: * Provides general properties and functions
25: * for child implementations.
26: *
27: * @author Murat Purc <murat@purc.de>
28: *
29: * @package Core
30: * @subpackage Frontend_Search
31: */
32: abstract class cSearchBaseAbstract {
33:
34: /**
35: * CONTENIDO database object
36: *
37: * @var cDb
38: */
39: protected $oDB;
40:
41: /**
42: * CONTENIDO configuration data
43: *
44: * @var array
45: */
46: protected $cfg;
47:
48: /**
49: * Language id of a client
50: *
51: * @var int
52: */
53: protected $lang;
54:
55: /**
56: * Client id
57: *
58: * @var int
59: */
60: protected $client;
61:
62: /**
63: * Initialises some properties
64: *
65: * @param cDb $oDB Optional database instance
66: * @param bool $bDebug Optional, flag to enable debugging (no longer needed)
67: */
68: protected function __construct($oDB = NULL, $bDebug = false) {
69: global $cfg, $lang, $client;
70:
71: $this->cfg = $cfg;
72: $this->lang = $lang;
73: $this->client = $client;
74:
75: $this->bDebug = $bDebug;
76:
77: if ($oDB == NULL || !is_object($oDB)) {
78: $this->db = cRegistry::getDb();
79: } else {
80: $this->db = $oDB;
81: }
82: }
83:
84: /**
85: * Main debug function, prints dumps parameter if debugging is enabled
86: *
87: * @param string $msg Some text
88: * @param mixed $var The variable to dump
89: */
90: protected function _debug($msg, $var) {
91: $dump = $msg . ': ';
92: if (is_array($var) || is_object($var)) {
93: $dump .= print_r($var, true);
94: } else {
95: $dump .= $var;
96: }
97: cDebug::out($dump);
98: }
99: }
100:
101: /**
102: * CONTENIDO API - Search Index Object
103: *
104: * This object creates an index of an article
105: *
106: * Create object with
107: * $oIndex = new SearchIndex($db); # where $db is the global CONTENIDO database
108: * object.
109: * Start indexing with
110: * $oIndex->start($idart, $aContent);
111: * where $aContent is the complete content of an article specified by its
112: * content types.
113: * It looks like
114: * Array (
115: * [CMS_HTMLHEAD] => Array (
116: * [1] => Herzlich Willkommen...
117: * [2] => ...auf Ihrer Website!
118: * )
119: * [CMS_HTML] => Array (
120: * [1] => Die Inhalte auf dieser Website ...
121: *
122: * The index for keyword 'willkommen' would look like '&12=1(CMS_HTMLHEAD-1)'
123: * which means the keyword 'willkommen' occurs 1 times in article with articleId
124: * 12 and content type CMS_HTMLHEAD[1].
125: *
126: * TODO: The basic idea of the indexing process is to take the complete content
127: * of an article and to generate normalized index terms
128: * from the content and to store a specific index structure in the relation
129: * 'con_keywords'.
130: * To take the complete content is not very flexible. It would be better to
131: * differentiate by specific content types or by any content.
132: * The &, =, () and - seperated string is not easy to parse to compute the
133: * search result set.
134: * It would be a better idea (and a lot of work) to extend the relation
135: * 'con_keywords' to store keywords by articleId (or content source identifier)
136: * and content type.
137: * The functions removeSpecialChars, setStopwords, setContentTypes and
138: * setCmsOptions should be sourced out into a new helper-class.
139: * Keep in mind that class Search and SearchResult uses an instance of object
140: * Index.
141: *
142: * @package Core
143: * @subpackage Frontend_Search
144: */
145: class cSearchIndex extends cSearchBaseAbstract {
146:
147: /**
148: * the content of the cms-types of an article
149: *
150: * @var array
151: */
152: protected $_keycode = array();
153:
154: /**
155: * the list of keywords of an article
156: *
157: * @var array
158: */
159: protected $_keywords = array();
160:
161: /**
162: * the words, which should not be indexed
163: *
164: * @var array
165: */
166: protected $_stopwords = array();
167:
168: /**
169: * the keywords of an article stored in the DB
170: *
171: * @var array
172: */
173: protected $_keywordsOld = array();
174:
175: /**
176: * the keywords to be deleted
177: *
178: * @var array
179: */
180: protected $_keywordsDel = array();
181:
182: /**
183: * 'auto' or 'self'
184: * The field 'auto' in table con_keywords is used for automatic indexing.
185: * The value is a string like "&12=2(CMS_HTMLHEAD-1,CMS_HTML-1)", which
186: * means a keyword occurs 2 times in article with $idart 12
187: * and can be found in CMS_HTMLHEAD[1] and CMS_HTML[1].
188: * The field 'self' can be used in the article properties to index the
189: * article manually.
190: *
191: * @var string
192: */
193: protected $_place;
194:
195: /**
196: * array of cms types
197: *
198: * @var array
199: */
200: protected $_cmsOptions = array();
201:
202: /**
203: * array of all available cms types
204: *
205: * htmlhead - HTML Headline
206: * html - HTML Text
207: * head - Headline (no HTML)
208: * text - Text (no HTML)
209: * img - Upload id of the element
210: * imgdescr - Image description
211: * link - Link (URL)
212: * linktarget - Linktarget (_self, _blank, _top ...)
213: * linkdescr - Linkdescription
214: * swf - Upload id of the element
215: * etc.
216: *
217: * @var array
218: */
219: protected $_cmsType = array();
220:
221: /**
222: * the suffix of all available cms types
223: *
224: * @var array
225: */
226: protected $_cmsTypeSuffix = array();
227:
228: /**
229: * Constructor, set object properties
230: *
231: * @param cDb $db CONTENIDO Database object
232: */
233: public function __construct($db = NULL) {
234: parent::__construct($db);
235:
236: $this->setContentTypes();
237: }
238:
239: /**
240: * Start indexing the article.
241: *
242: * @param int $idart Article Id
243: * @param array $aContent The complete content of an article specified by
244: * its content types.
245: * It looks like
246: * Array (
247: * [CMS_HTMLHEAD] => Array (
248: * [1] => Herzlich Willkommen...
249: * [2] => ...auf Ihrer Website!
250: * )
251: * [CMS_HTML] => Array (
252: * [1] => Die Inhalte auf dieser Website ...
253: *
254: * @param string $place The field where to store the index information in
255: * db.
256: * @param array $cms_options One can specify explicitly cms types which
257: * should not be indexed.
258: * @param array $aStopwords Array with words which should not be indexed.
259: */
260: public function start($idart, $aContent, $place = 'auto', $cms_options = array(), $aStopwords = array()) {
261: if (!is_int((int) $idart) || $idart < 0) {
262: return;
263: } else {
264: $this->idart = $idart;
265: }
266:
267: $this->_place = $place;
268: $this->_keycode = $aContent;
269: $this->setStopwords($aStopwords);
270: $this->setCmsOptions($cms_options);
271:
272: $this->createKeywords();
273:
274: $this->getKeywords();
275:
276: $this->saveKeywords();
277:
278: $new_keys = array_keys($this->_keywords);
279: $old_keys = array_keys($this->_keywordsOld);
280:
281: $this->_keywordsDel = array_diff($old_keys, $new_keys);
282:
283: if (count($this->_keywordsDel) > 0) {
284: $this->deleteKeywords();
285: }
286: }
287:
288: /**
289: * for each cms-type create index structure.
290: * it looks like
291: * Array (
292: * [die] => CMS_HTML-1
293: * [inhalte] => CMS_HTML-1
294: * [auf] => CMS_HTML-1 CMS_HTMLHEAD-2
295: * [dieser] => CMS_HTML-1
296: * [website] => CMS_HTML-1 CMS_HTML-1 CMS_HTMLHEAD-2
297: * )
298: */
299: public function createKeywords() {
300: $tmp_keys = array();
301:
302: // Only create keycodes, if some are available
303: if (is_array($this->_keycode)) {
304: foreach ($this->_keycode as $idtype => $data) {
305: if ($this->checkCmsType($idtype)) {
306: foreach ($data as $typeid => $code) {
307: $this->_debug('code', $code);
308:
309: // remove backslash
310: $code = stripslashes($code);
311: // replace HTML line breaks with newlines
312: $code = str_ireplace(array(
313: '<br>',
314: '<br />'
315: ), "\n", $code);
316: // remove html tags
317: $code = strip_tags($code);
318: if (strlen($code) > 0) {
319: $code = conHtmlEntityDecode($code);
320: }
321: $this->_debug('code', $code);
322:
323: // split content by any number of commas or space
324: // characters
325: $tmp_keys = preg_split('/[\s,]+/', trim($code));
326: $this->_debug('tmp_keys', $tmp_keys);
327:
328: foreach ($tmp_keys as $value) {
329: // index terms are stored with lower case
330: // $value = strtolower($value);
331:
332: $value = conHtmlentities($value);
333: $value = trim(strtolower($value));
334: $value = conHtmlEntityDecode($value);
335:
336: if (!in_array($value, $this->_stopwords)) {
337: // eliminate stopwords
338: $value = $this->removeSpecialChars($value);
339:
340: if (strlen($value) > 1) {
341: // do not index single characters
342: $this->_keywords[$value] = $this->_keywords[$value] . $idtype . '-' . $typeid . ' ';
343: }
344: }
345: }
346: }
347: }
348:
349: unset($tmp_keys);
350: }
351: }
352:
353: $this->_debug('keywords', $this->_keywords);
354: }
355:
356: /**
357: * generate index_string from index structure and save keywords
358: * The index_string looks like "&12=2(CMS_HTMLHEAD-1,CMS_HTML-1)"
359: */
360: public function saveKeywords() {
361: $tmp_count = array();
362:
363: foreach ($this->_keywords as $keyword => $count) {
364: $tmp_count = preg_split('/[\s]/', trim($count));
365: $this->_debug('tmp_count', $tmp_count);
366:
367: $occurrence = count($tmp_count);
368: $tmp_count = array_unique($tmp_count);
369: $cms_types = implode(',', $tmp_count);
370: $index_string = '&' . $this->idart . '=' . $occurrence . '(' . $cms_types . ')';
371:
372: if (!array_key_exists($keyword, $this->_keywordsOld)) {
373: // if keyword is new, save index information
374: // $nextid = $this->db->nextid($this->cfg['tab']['keywords']);
375: $sql = "INSERT INTO " . $this->cfg['tab']['keywords'] . "
376: (keyword, " . $this->_place . ", idlang)
377: VALUES
378: ('" . $this->db->escape($keyword) . "', '" . $this->db->escape($index_string) . "', " . cSecurity::toInteger($this->lang) . ")";
379: } else {
380: // if keyword allready exists, create new index_string
381: if (preg_match("/&$this->idart=/", $this->_keywordsOld[$keyword])) {
382: $index_string = preg_replace("/&$this->idart=[0-9]+\([\w-,]+\)/", $index_string, $this->_keywordsOld[$keyword]);
383: } else {
384: $index_string = $this->_keywordsOld[$keyword] . $index_string;
385: }
386:
387: $sql = "UPDATE " . $this->cfg['tab']['keywords'] . "
388: SET " . $this->_place . " = '" . $index_string . "'
389: WHERE idlang='" . cSecurity::toInteger($this->lang) . "' AND keyword='" . $this->db->escape($keyword) . "'";
390: }
391: $this->_debug('sql', $sql);
392: $this->db->query($sql);
393: }
394: }
395:
396: /**
397: * if keywords don't occur in the article anymore, update index_string and
398: * delete keyword if necessary
399: */
400: public function deleteKeywords() {
401: foreach ($this->_keywordsDel as $key_del) {
402: $index_string = preg_replace("/&$this->idart=[0-9]+\([\w-,]+\)/", "", $this->_keywordsOld[$key_del]);
403:
404: if (strlen($index_string) == 0) {
405: // keyword is not referenced by any article
406: $sql = "DELETE FROM " . $this->cfg['tab']['keywords'] . "
407: WHERE idlang = " . cSecurity::toInteger($this->lang) . " AND keyword = '" . $this->db->escape($key_del) . "'";
408: } else {
409: $sql = "UPDATE " . $this->cfg['tab']['keywords'] . "
410: SET " . $this->_place . " = '" . $index_string . "'
411: WHERE idlang = " . cSecurity::toInteger($this->lang) . " AND keyword = '" . $this->db->escape($key_del) . "'";
412: }
413: $this->_debug('sql', $sql);
414: $this->db->query($sql);
415: }
416: }
417:
418: /**
419: * get the keywords of an article
420: */
421: public function getKeywords() {
422: $keys = implode("','", array_keys($this->_keywords));
423:
424: $sql = "SELECT
425: keyword, auto, self
426: FROM
427: " . $this->cfg['tab']['keywords'] . "
428: WHERE
429: idlang=" . cSecurity::toInteger($this->lang) . " AND
430: (keyword IN ('" . $keys . "') OR " . $this->_place . " REGEXP '&" . cSecurity::toInteger($this->idart) . "=')";
431:
432: $this->_debug('sql', $sql);
433:
434: $this->db->query($sql);
435:
436: $place = $this->_place;
437:
438: while ($this->db->nextRecord()) {
439: $this->_keywordsOld[$this->db->f('keyword')] = $this->db->f($place);
440: }
441: }
442:
443: /**
444: * remove special characters from index term
445: *
446: * @param string $key Keyword
447: * @return mixed
448: */
449: public function removeSpecialChars($key) {
450: $aSpecialChars = array(
451: /*"-",*/
452: "_",
453: "'",
454: ".",
455: "!",
456: "\"",
457: "#",
458: "$",
459: "%",
460: "&",
461: "(",
462: ")",
463: "*",
464: "+",
465: ",",
466: "/",
467: ":",
468: ";",
469: "<",
470: "=",
471: ">",
472: "?",
473: "@",
474: "[",
475: "\\",
476: "]",
477: "^",
478: "`",
479: "{",
480: "|",
481: "}",
482: "~",
483: "„"
484: );
485:
486: // for ($i = 127; $i < 192; $i++) {
487: // some other special characters
488: // $aSpecialChars[] = chr($i);
489: // }
490:
491: // TODO: The transformation of accented characters must depend on the
492: // selected encoding of the language of
493: // a client and should not be treated in this method.
494: // modified 2007-10-01, H. Librenz - added as hotfix for encoding
495: // problems (doesn't find any words with
496: // umlaut vowels in it since you turn on UTF-8 as language encoding)
497: $sEncoding = getEncodingByLanguage($this->db, $this->lang);
498:
499: if (strtolower($sEncoding) != 'iso-8859-2') {
500: $key = conHtmlentities($key, NULL, $sEncoding);
501: } else {
502: $key = htmlentities_iso88592($key);
503: }
504:
505: // $aUmlautMap = array(
506: // 'Ü' => 'ue',
507: // 'ü' => 'ue',
508: // 'Ä' => 'ae',
509: // 'ä' => 'ae',
510: // 'Ö' => 'oe',
511: // 'ö' => 'oe',
512: // 'ß' => 'ss'
513: // );
514:
515: // foreach ($aUmlautMap as $sUmlaut => $sMapped) {
516: // $key = str_replace($sUmlaut, $sMapped, $key);
517: // }
518:
519: $key = conHtmlEntityDecode($key);
520: $key = str_replace($aSpecialChars, '', $key);
521:
522: return $key;
523: }
524:
525: /**
526: *
527: * @param string $key Keyword
528: * @return string
529: */
530: public function addSpecialUmlauts($key) {
531: $key = conHtmlentities($key, NULL, getEncodingByLanguage($this->db, $this->lang));
532: $aUmlautMap = array(
533: 'Ue' => 'Ü',
534: 'ue' => 'ü',
535: 'Ae' => 'Ä',
536: 'ae' => 'ä',
537: 'Oe' => 'Ö',
538: 'oe' => 'ö',
539: 'ss' => 'ß'
540: );
541:
542: foreach ($aUmlautMap as $sUmlaut => $sMapped) {
543: $key = str_replace($sUmlaut, $sMapped, $key);
544: }
545:
546: $key = conHtmlEntityDecode($key);
547: return $key;
548: }
549:
550: /**
551: * set the array of stopwords which should not be indexed
552: *
553: * @param array $aStopwords
554: */
555: public function setStopwords($aStopwords) {
556: if (is_array($aStopwords) && count($aStopwords) > 0) {
557: $this->_stopwords = $aStopwords;
558: }
559: }
560:
561: /**
562: * set the cms types
563: */
564: public function setContentTypes() {
565: $sql = "SELECT type, idtype FROM " . $this->cfg['tab']['type'] . ' ';
566: $this->_debug('sql', $sql);
567: $this->db->query($sql);
568: while ($this->db->nextRecord()) {
569: $this->_cmsType[$this->db->f('type')] = $this->db->f('idtype');
570: $this->_cmsTypeSuffix[$this->db->f('idtype')] = substr($this->db->f('type'), 4, strlen($this->db->f('type')));
571: }
572: }
573:
574: /**
575: * set the cms_options array of cms types which should be treated special
576: *
577: * @param mixed $cms_options
578: */
579: public function setCmsOptions($cms_options) {
580: if (is_array($cms_options) && count($cms_options) > 0) {
581: foreach ($cms_options as $opt) {
582: $opt = strtoupper($opt);
583:
584: if (strlen($opt) > 0) {
585: if (!stristr($opt, 'cms_')) {
586: if (in_array($opt, $this->_cmsTypeSuffix)) {
587: $this->_cmsOptions[$opt] = 'CMS_' . $opt;
588: }
589: } else {
590: if (array_key_exists($opt, $this->_cmsType)) {
591: $this->_cmsOptions[$opt] = $opt;
592: }
593: }
594: }
595: }
596: } else {
597: $this->_cmsOptions = array();
598: }
599: }
600:
601: /**
602: * check if the current cms type is in the cms_options array
603: *
604: * @param string $idtype
605: * @return boolean
606: */
607: public function checkCmsType($idtype) {
608: $idtype = strtoupper($idtype);
609: return (in_array($idtype, $this->_cmsOptions)) ? false : true;
610: }
611:
612: /**
613: *
614: * @return array the _cmsType property
615: */
616: public function getCmsType() {
617: return $this->_cmsType;
618: }
619:
620: /**
621: *
622: * @return array the _cmsTypeSuffix property
623: */
624: public function getCmsTypeSuffix() {
625: return $this->_cmsTypeSuffix;
626: }
627: }
628:
629: /**
630: * CONTENIDO API - Search Object
631: *
632: * This object starts a indexed fulltext search
633: *
634: * TODO:
635: * The way to set the search options could be done much more better!
636: * The computation of the set of searchable articles should not be treated in
637: * this class.
638: * It is better to compute the array of searchable articles from the outside and
639: * to pass the array of searchable articles as parameter.
640: * Avoid foreach loops.
641: *
642: * Use object with
643: *
644: * $options = array('db' => 'regexp', // use db function regexp
645: * 'combine' => 'or'); // combine searchwords with or
646: *
647: * The range of searchable articles is by default the complete content which is
648: * online and not protected.
649: *
650: * With option 'searchable_articles' you can define your own set of searchable
651: * articles.
652: * If parameter 'searchable_articles' is set the options 'cat_tree',
653: * 'categories', 'articles', 'exclude', 'artspecs',
654: * 'protected', 'dontshowofflinearticles' don't have any effect.
655: *
656: * $options = array('db' => 'regexp', // use db function regexp
657: * 'combine' => 'or', // combine searchwords with or
658: * 'searchable_articles' => array(5, 6, 9, 13));
659: *
660: * One can define the range of searchable articles by setting the parameter
661: * 'exclude' to false which means the range of categories
662: * defined by parameter 'cat_tree' or 'categories' and the range of articles
663: * defined by parameter 'articles' is included.
664: *
665: * $options = array('db' => 'regexp', // use db function regexp
666: * 'combine' => 'or', // combine searchwords with or
667: * 'exclude' => false, // => searchrange specified in 'cat_tree', 'categories'
668: * and 'articles' is included
669: * 'cat_tree' => array(12), // tree with root 12 included
670: * 'categories' => array(100,111), // categories 100, 111 included
671: * 'articles' => array(33), // article 33 included
672: * 'artspecs' => array(2, 3), // array of article specifications => search only
673: * articles with these artspecs
674: * 'res_per_page' => 2, // results per page
675: * 'protected' => true); // => do not search articles or articles in categories
676: * which are offline or protected
677: * 'dontshowofflinearticles' => false); // => search offline articles or
678: * articles in categories which are offline
679: *
680: * You can build the complement of the range of searchable articles by setting
681: * the parameter 'exclude' to true which means the range of categories
682: * defined by parameter 'cat_tree' or 'categories' and the range of articles
683: * defined by parameter 'articles' is excluded from search.
684: *
685: * $options = array('db' => 'regexp', // use db function regexp
686: * 'combine' => 'or', // combine searchwords with or
687: * 'exclude' => true, // => searchrange specified in 'cat_tree', 'categories'
688: * and 'articles' is excluded
689: * 'cat_tree' => array(12), // tree with root 12 excluded
690: * 'categories' => array(100,111), // categories 100, 111 excluded
691: * 'articles' => array(33), // article 33 excluded
692: * 'artspecs' => array(2, 3), // array of article specifications => search only
693: * articles with these artspecs
694: * 'res_per_page' => 2, // results per page
695: * 'protected' => true); // => do not search articles or articles in categories
696: * which are offline or protected
697: * 'dontshowofflinearticles' => false); // => search offline articles or
698: * articles in categories which are offline
699: *
700: * $search = new Search($options);
701: *
702: * $cms_options = array("htmlhead", "html", "head", "text", "imgdescr", "link",
703: * "linkdescr");
704: * search only in these cms-types
705: * $search->setCmsOptions($cms_options);
706: *
707: * $search_result = $search->searchIndex($searchword, $searchwordex); // start
708: * search
709: *
710: * The search result structure has following form
711: * Array (
712: * [20] => Array (
713: * [CMS_HTML] => Array (
714: * [0] => 1
715: * [1] => 1
716: * [2] => 1
717: * )
718: * [keyword] => Array (
719: * [0] => content
720: * [1] => contenido
721: * [2] => wwwcontenidoorg
722: * )
723: * [search] => Array (
724: * [0] => con
725: * [1] => con
726: * [2] => con
727: * )
728: * [occurence] => Array (
729: * [0] => 1
730: * [1] => 5
731: * [2] => 1
732: * )
733: * [similarity] => 60
734: * )
735: * )
736: *
737: * The keys of the array are the article ID's found by search.
738: *
739: * Searching 'con' matches keywords 'content', 'contenido' and 'wwwcontenidoorg'
740: * in article with ID 20 in content type CMS_HTML[1].
741: * The search term occurs 7 times.
742: * The maximum similarity between searchterm and matching keyword is 60%.
743: *
744: * with $oSearchResults = new cSearchResult($search_result, 10);
745: * one can rank and display the results
746: *
747: * @package Core
748: * @subpackage Frontend_Search
749: */
750: class cSearch extends cSearchBaseAbstract {
751:
752: /**
753: * Instance of class Index
754: *
755: * @var object
756: */
757: protected $_index;
758:
759: /**
760: * array of available cms types
761: *
762: * @var array
763: */
764: protected $_cmsType = array();
765:
766: /**
767: * suffix of available cms types
768: *
769: * @var array
770: */
771: protected $_cmsTypeSuffix = array();
772:
773: /**
774: * the search words
775: *
776: * @var array
777: */
778: protected $_searchWords = array();
779:
780: /**
781: * the words which should be excluded from search
782: *
783: * @var array
784: */
785: protected $_searchWordsExclude = array();
786:
787: /**
788: * type of db search
789: * like => 'sql like', regexp => 'sql regexp'
790: *
791: * @var string
792: */
793: protected $_searchOption;
794:
795: /**
796: * logical combination of searchwords (and, or)
797: *
798: * @var string
799: */
800: protected $_searchCombination;
801:
802: /**
803: * array of searchable articles
804: *
805: * @var array
806: */
807: protected $_searchableArts = array();
808:
809: /**
810: * article specifications
811: *
812: * @var array
813: */
814: protected $_articleSpecs = array();
815:
816: /**
817: * If $protected = true => do not search articles which are offline or
818: * articles in catgeories which are offline (protected)
819: *
820: * @var boolean
821: */
822: protected $_protected;
823:
824: /**
825: * If $dontshowofflinearticles = false => search offline articles or
826: * articles in categories which are offline
827: *
828: * @var boolean
829: */
830: protected $_dontshowofflinearticles;
831:
832: /**
833: * If $exclude = true => the specified search range is excluded from search,
834: * otherwise included
835: *
836: * @var boolean
837: */
838: protected $_exclude;
839:
840: /**
841: * Array of article id's with information about cms-types, occurence of
842: * keyword/searchword, similarity .
843: *
844: *
845: *
846: * @var array
847: */
848: protected $_searchResult = array();
849:
850: /**
851: * Constructor
852: *
853: * @param array $options $options['db'] 'regexp' => DB search with REGEXP;
854: * 'like' => DB search with LIKE; 'exact' => exact match;
855: * $options['combine'] 'and', 'or' Combination of search words with
856: * AND, OR
857: * $options['exclude'] 'true' => searchrange specified in 'cat_tree',
858: * 'categories' and 'articles' is excluded; 'false' =>
859: * searchrange specified in 'cat_tree', 'categories' and
860: * 'articles' is included
861: * $options['cat_tree'] e.g. array(8) => The complete tree with root
862: * 8 is in/excluded from search
863: * $options['categories'] e.g. array(10, 12) => Categories 10, 12
864: * in/excluded
865: * $options['articles'] e.g. array(23) => Article 33 in/excluded
866: * $options['artspecs'] => e.g. array(2, 3) => search only articles
867: * with certain article specifications
868: * $options['protected'] 'true' => do not search articles which are
869: * offline (locked) or articles in catgeories which are offline
870: * (protected)
871: * $options['dontshowofflinearticles'] 'false' => search offline
872: * articles or articles in categories which are offline
873: * $options['searchable_articles'] array of article ID's which should
874: * be searchable
875: * @param cDb $db Optional database instance
876: */
877: public function __construct($options, $db = NULL) {
878: parent::__construct($db);
879:
880: $this->_index = new cSearchIndex($db);
881:
882: $this->_cmsType = $this->_index->cms_type;
883: $this->_cmsTypeSuffix = $this->_index->cms_type_suffix;
884:
885: $this->_searchOption = (array_key_exists('db', $options)) ? strtolower($options['db']) : 'regexp';
886: $this->_searchCombination = (array_key_exists('combine', $options)) ? strtolower($options['combine']) : 'or';
887: $this->_protected = (array_key_exists('protected', $options)) ? $options['protected'] : true;
888: $this->_dontshowofflinearticles = (array_key_exists('dontshowofflinearticles', $options)) ? $options['dontshowofflinearticles'] : false;
889: $this->_exclude = (array_key_exists('exclude', $options)) ? $options['exclude'] : true;
890: $this->_articleSpecs = (array_key_exists('artspecs', $options) && is_array($options['artspecs'])) ? $options['artspecs'] : array();
891: $this->_index->setCmsOptions($this->_cmsTypeSuffix);
892:
893: if (array_key_exists('searchable_articles', $options) && is_array($options['searchable_articles'])) {
894: $this->_searchableArts = $options['searchable_articles'];
895: } else {
896: $this->_searchableArts = $this->getSearchableArticles($options);
897: }
898:
899: // minimum similarity between searchword and keyword in percent
900: $this->intMinimumSimilarity = 50;
901: }
902:
903: /**
904: * indexed fulltext search
905: *
906: * @param string $searchwords The search words
907: * @param string $searchwords_exclude The words, which should be excluded
908: * from search
909: * @return boolean multitype:
910: */
911: public function searchIndex($searchwords, $searchwords_exclude = '') {
912: if (strlen(trim($searchwords)) > 0) {
913: $this->_searchWords = $this->stripWords($searchwords);
914: } else {
915: return false;
916: }
917:
918: if (strlen(trim($searchwords_exclude)) > 0) {
919: $this->_searchWordsExclude = $this->stripWords($searchwords_exclude);
920: }
921:
922: $tmp_searchwords = array();
923: foreach ($this->_searchWords as $word) {
924: $wordEscaped = $this->db->escape($word);
925: if ($this->_searchOption == 'like') {
926: $wordEscaped = "'%" . $wordEscaped . "%'";
927: } elseif ($this->_searchOption == 'exact') {
928: $wordEscaped = "'" . $wordEscaped . "'";
929: }
930: $tmp_searchwords[] = $word;
931: }
932:
933: if (count($this->_searchWordsExclude) > 0) {
934: foreach ($this->_searchWordsExclude as $word) {
935: $wordEscaped = $this->db->escape($word);
936: if ($this->_searchOption == 'like') {
937: $wordEscaped = "'%" . $wordEscaped . "%'";
938: } elseif ($this->_searchOption == 'exact') {
939: $wordEscaped = "'" . $wordEscaped . "'";
940: }
941: $tmp_searchwords[] = $wordEscaped;
942: $this->_searchWords[] = $word;
943: }
944: }
945:
946: if ($this->_searchOption == 'regexp') {
947: // regexp search
948: $kwSql = "keyword REGEXP '" . implode('|', $tmp_searchwords) . "'";
949: } elseif ($this->_searchOption == 'like') {
950: // like search
951: $search_like = implode(" OR keyword LIKE ", $tmp_searchwords);
952: $kwSql = "keyword LIKE '" . $search_like;
953: } elseif ($this->_searchOption == 'exact') {
954: // exact match
955: $search_exact = implode(" OR keyword = ", $tmp_searchwords);
956: $kwSql = "keyword LIKE '" . $search_exact;
957: }
958:
959: $sql = "SELECT keyword, auto FROM " . $this->cfg['tab']['keywords'] . " WHERE idlang=" . cSecurity::toInteger($this->lang) . " AND " . $kwSql . " ";
960: $this->_debug('sql', $sql);
961: $this->db->query($sql);
962:
963: while ($this->db->nextRecord()) {
964:
965: $tmp_index_string = preg_split('/&/', $this->db->f('auto'), -1, PREG_SPLIT_NO_EMPTY);
966:
967: $this->_debug('index', $this->db->f('auto'));
968:
969: $tmp_index = array();
970: foreach ($tmp_index_string as $string) {
971: $tmp_string = preg_replace('/[=\(\)]/', ' ', $string);
972: $tmp_index[] = preg_split('/\s/', $tmp_string, -1, PREG_SPLIT_NO_EMPTY);
973: }
974: $this->_debug('tmp_index', $tmp_index);
975:
976: foreach ($tmp_index as $string) {
977: $artid = $string[0];
978:
979: // filter nonsearchable articles
980: if (in_array($artid, $this->_searchableArts)) {
981:
982: $cms_place = $string[2];
983: $keyword = $this->db->f('keyword');
984: $percent = 0;
985: $similarity = 0;
986: foreach ($this->_searchWords as $word) {
987: // computes similarity between searchword and keyword in
988: // percent
989: similar_text($word, $keyword, $percent);
990: if ($percent > $similarity) {
991: $similarity = $percent;
992: $searchword = $word;
993: }
994: }
995:
996: $tmp_cmstype = preg_split('/[,]/', $cms_place, -1, PREG_SPLIT_NO_EMPTY);
997: $this->_debug('tmp_cmstype', $tmp_cmstype);
998:
999: $tmp_cmstype2 = array();
1000: foreach ($tmp_cmstype as $type) {
1001: $tmp_cmstype2[] = preg_split('/-/', $type, -1, PREG_SPLIT_NO_EMPTY);
1002: }
1003: $this->_debug('tmp_cmstype2', $tmp_cmstype2);
1004:
1005: foreach ($tmp_cmstype2 as $type) {
1006: if (!$this->_index->checkCmsType($type[0])) {
1007: // search for specified cms-types
1008: if ($similarity >= $this->intMinimumSimilarity) {
1009: // include article into searchresult set only if
1010: // similarity between searchword and keyword is
1011: // big enough
1012: $this->_searchResult[$artid][$type[0]][] = $type[1];
1013: $this->_searchResult[$artid]['keyword'][] = $this->db->f('keyword');
1014: $this->_searchResult[$artid]['search'][] = $searchword;
1015: $this->_searchResult[$artid]['occurence'][] = $string[1];
1016: $this->_searchResult[$artid]['debug_similarity'][] = $percent;
1017: if ($similarity > $this->_searchResult[$artid]['similarity']) {
1018: $this->_searchResult[$artid]['similarity'] = $similarity;
1019: }
1020: }
1021: }
1022: }
1023: }
1024: }
1025: }
1026:
1027: if ($this->_searchCombination == 'and') {
1028: // all search words must appear in the article
1029: foreach ($this->_searchResult as $article => $val) {
1030: if (!count(array_diff($this->_searchWords, $val['search'])) == 0) {
1031: // $this->rank_structure[$article] = $rank[$article];
1032: unset($this->_searchResult[$article]);
1033: }
1034: }
1035: }
1036:
1037: if (count($this->_searchWordsExclude) > 0) {
1038: // search words to be excluded must not appear in article
1039: foreach ($this->_searchResult as $article => $val) {
1040: if (!count(array_intersect($this->_searchWordsExclude, $val['search'])) == 0) {
1041: // $this->rank_structure[$article] = $rank[$article];
1042: unset($this->_searchResult[$article]);
1043: }
1044: }
1045: }
1046:
1047: $this->_debug('$this->search_result', $this->_searchResult);
1048: $this->_debug('$this->searchable_arts', $this->_searchableArts);
1049:
1050: $searchTracking = new cApiSearchTrackingCollection();
1051: $searchTracking->trackSearch($searchwords, count($this->_searchResult));
1052:
1053: return $this->_searchResult;
1054: }
1055:
1056: /**
1057: *
1058: * @param mixed $cms_options The cms-types (htmlhead, html, ...) which
1059: * should
1060: * explicitly be searched
1061: */
1062: public function setCmsOptions($cms_options) {
1063: if (is_array($cms_options) && count($cms_options) > 0) {
1064: $this->_index->setCmsOptions($cms_options);
1065: }
1066: }
1067:
1068: /**
1069: *
1070: * @param string $searchwords The search-words
1071: * @return array of stripped search-words
1072: */
1073: public function stripWords($searchwords) {
1074: // remove backslash and html tags
1075: $searchwords = trim(strip_tags(stripslashes($searchwords)));
1076:
1077: // split the phrase by any number of commas or space characters
1078: $tmp_words = preg_split('/[\s,]+/', $searchwords);
1079:
1080: $tmp_searchwords = array();
1081:
1082: foreach ($tmp_words as $word) {
1083:
1084: $word = htmlentities($word, ENT_COMPAT, 'UTF-8');
1085: $word = (trim(strtolower($word)));
1086: $word = html_entity_decode($word, ENT_COMPAT, 'UTF-8');
1087:
1088: // $word =(trim(strtolower($word)));
1089: if (strlen($word) > 1) {
1090: $tmp_searchwords[] = $word;
1091: }
1092: }
1093:
1094: return array_unique($tmp_searchwords);
1095: }
1096:
1097: /**
1098: * Returns the category tree array.
1099: *
1100: * @param int $cat_start Root of a category tree
1101: * @return array Category Tree
1102: * @todo This is not the job for search, should be outsourced ...
1103: */
1104: public function getSubTree($cat_start) {
1105: $sql = "SELECT
1106: B.idcat, B.parentid
1107: FROM
1108: " . $this->cfg['tab']['cat_tree'] . " AS A,
1109: " . $this->cfg['tab']['cat'] . " AS B,
1110: " . $this->cfg['tab']['cat_lang'] . " AS C
1111: WHERE
1112: A.idcat = B.idcat AND
1113: B.idcat = C.idcat AND
1114: C.idlang = '" . cSecurity::toInteger($this->lang) . "' AND
1115: B.idclient = '" . cSecurity::toInteger($this->client) . "'
1116: ORDER BY
1117: idtree";
1118: $this->_debug('sql', $sql);
1119: $this->db->query($sql);
1120:
1121: // $aSubCats = array();
1122: // $i = false;
1123: // while ($this->db->nextRecord()) {
1124: // if ($this->db->f('parentid') < $cat_start) {
1125: // // ending part of tree
1126: // $i = false;
1127: // }
1128: // if ($this->db->f('idcat') == $cat_start) {
1129: // // starting part of tree
1130: // $i = true;
1131: // }
1132: // if ($i == true) {
1133: // $aSubCats[] = $this->db->f('idcat');
1134: // }
1135: // }
1136:
1137: $aSubCats = array(
1138: $cat_start
1139: );
1140: while ($this->db->nextRecord()) {
1141: // ommit if cat is no child of any recognized descendant
1142: if (!in_array($this->db->f('parentid'), $aSubCats)) {
1143: continue;
1144: }
1145: // ommit if cat is already recognized (happens with $cat_start)
1146: if (in_array($this->db->f('idcat'), $aSubCats)) {
1147: continue;
1148: }
1149: // add cat as recognized descendant
1150: $aSubCats[] = $this->db->f('idcat');
1151: }
1152:
1153: return $aSubCats;
1154: }
1155:
1156: /**
1157: * Returns list of searchable article ids.
1158: *
1159: * @param array $search_range
1160: * @return array Articles in specified search range
1161: */
1162: public function getSearchableArticles($search_range) {
1163: $aCatRange = array();
1164: if (array_key_exists('cat_tree', $search_range) && is_array($search_range['cat_tree'])) {
1165: if (count($search_range['cat_tree']) > 0) {
1166: foreach ($search_range['cat_tree'] as $cat) {
1167: $aCatRange = array_merge($aCatRange, $this->getSubTree($cat));
1168: }
1169: }
1170: }
1171:
1172: if (array_key_exists('categories', $search_range) && is_array($search_range['categories'])) {
1173: if (count($search_range['categories']) > 0) {
1174: $aCatRange = array_merge($aCatRange, $search_range['categories']);
1175: }
1176: }
1177:
1178: $aCatRange = array_unique($aCatRange);
1179: $sCatRange = implode("','", $aCatRange);
1180:
1181: if (array_key_exists('articles', $search_range) && is_array($search_range['articles'])) {
1182: if (count($search_range['articles']) > 0) {
1183: $sArtRange = implode("','", $search_range['articles']);
1184: } else {
1185: $sArtRange = '';
1186: }
1187: }
1188:
1189: if ($this->_protected == true) {
1190: $sProtected = " C.public = 1 AND C.visible = 1 AND B.online = 1 ";
1191: } else {
1192: if ($this->_dontshowofflinearticles == true) {
1193: $sProtected = " C.visible = 1 AND B.online = 1 ";
1194: } else {
1195: $sProtected = " 1 ";
1196: }
1197: }
1198:
1199: if ($this->_exclude == true) {
1200: // exclude searchrange
1201: $sSearchRange = " A.idcat NOT IN ('" . $sCatRange . "') AND B.idart NOT IN ('" . $sArtRange . "') AND ";
1202: } else {
1203: // include searchrange
1204: if (strlen($sArtRange) > 0) {
1205: $sSearchRange = " A.idcat IN ('" . $sCatRange . "') AND B.idart IN ('" . $sArtRange . "') AND ";
1206: } else {
1207: $sSearchRange = " A.idcat IN ('" . $sCatRange . "') AND ";
1208: }
1209: }
1210:
1211: if (count($this->_articleSpecs) > 0) {
1212: $sArtSpecs = " B.artspec IN ('" . implode("','", $this->_articleSpecs) . "') AND ";
1213: } else {
1214: $sArtSpecs = '';
1215: }
1216:
1217: $sql = "SELECT
1218: A.idart
1219: FROM
1220: " . $this->cfg["tab"]["cat_art"] . " as A,
1221: " . $this->cfg["tab"]["art_lang"] . " as B,
1222: " . $this->cfg["tab"]["cat_lang"] . " as C
1223: WHERE
1224: " . $sSearchRange . "
1225: B.idlang = '" . cSecurity::toInteger($this->lang) . "' AND
1226: C.idlang = '" . cSecurity::toInteger($this->lang) . "' AND
1227: A.idart = B.idart AND
1228: B.searchable = 1 AND
1229: A.idcat = C.idcat AND
1230: " . $sArtSpecs . "
1231: " . $sProtected . " ";
1232: $this->_debug('sql', $sql);
1233: $this->db->query($sql);
1234:
1235: $aIdArts = array();
1236: while ($this->db->nextRecord()) {
1237: $aIdArts[] = $this->db->f('idart');
1238: }
1239: return $aIdArts;
1240: }
1241:
1242: /**
1243: * Fetch all article specifications which are online,
1244: *
1245: * @return array Array of article specification Ids
1246: */
1247: public function getArticleSpecifications() {
1248: $sql = "SELECT
1249: idartspec
1250: FROM
1251: " . $this->cfg['tab']['art_spec'] . "
1252: WHERE
1253: client = " . cSecurity::toInteger($this->client) . " AND
1254: lang = " . cSecurity::toInteger($this->lang) . " AND
1255: online = 1 ";
1256: $this->_debug('sql', $sql);
1257: $this->db->query($sql);
1258: $aArtspec = array();
1259: while ($this->db->nextRecord()) {
1260: $aArtspec[] = $this->db->f('idartspec');
1261: }
1262: return $aArtspec;
1263: }
1264:
1265: /**
1266: * Set article specification
1267: *
1268: * @param int $iArtspecID
1269: */
1270: public function setArticleSpecification($iArtspecID) {
1271: $this->_articleSpecs[] = $iArtspecID;
1272: }
1273:
1274: /**
1275: * Add all article specifications matching name of article specification
1276: * (client dependent but language independent)
1277: *
1278: * @param string $sArtSpecName
1279: * @return boolean
1280: */
1281: public function addArticleSpecificationsByName($sArtSpecName) {
1282: if (!isset($sArtSpecName) || strlen($sArtSpecName) == 0) {
1283: return false;
1284: }
1285:
1286: $sql = "SELECT
1287: idartspec
1288: FROM
1289: " . $this->cfg['tab']['art_spec'] . "
1290: WHERE
1291: client = " . cSecurity::toInteger($this->client) . " AND
1292: artspec = '" . $this->db->escape($sArtSpecName) . "' ";
1293: $this->_debug('sql', $sql);
1294: $this->db->query($sql);
1295: while ($this->db->nextRecord()) {
1296: $this->_articleSpecs[] = $this->db->f('idartspec');
1297: }
1298: }
1299: }
1300:
1301: /**
1302: * CONTENIDO API - SearchResult Object
1303: *
1304: * This object ranks and displays the result of the indexed fulltext search.
1305: * If you are not comfortable with this API feel free to use your own methods to
1306: * display the search results.
1307: * The search result is basically an array with article ID's.
1308: *
1309: * If $search_result = $search->searchIndex($searchword, $searchwordex);
1310: *
1311: * use object with
1312: *
1313: * $oSearchResults = new cSearchResult($search_result, 10);
1314: *
1315: * $oSearchResults->setReplacement('<span style="color:red">', '</span>'); //
1316: * html-tags to emphasize the located searchwords
1317: *
1318: * $num_res = $oSearchResults->getNumberOfResults();
1319: * $num_pages = $oSearchResults->getNumberOfPages();
1320: * $res_page = $oSearchResults->getSearchResultPage(1); // first result page
1321: * foreach ($res_page as $key => $val) {
1322: * $headline = $oSearchResults->getSearchContent($key, 'HTMLHEAD');
1323: * $first_headline = $headline[0];
1324: * $text = $oSearchResults->getSearchContent($key, 'HTML');
1325: * $first_text = $text[0];
1326: * $similarity = $oSearchResults->getSimilarity($key);
1327: * $iOccurrence = $oSearchResults->getOccurrence($key);
1328: * }
1329: *
1330: * @package Core
1331: * @subpackage Frontend_Search
1332: *
1333: */
1334: class cSearchResult extends cSearchBaseAbstract {
1335:
1336: /**
1337: * Instance of class Index
1338: *
1339: * @var object
1340: */
1341: protected $_index;
1342:
1343: /**
1344: * Number of results
1345: *
1346: * @var int
1347: */
1348: protected $_results;
1349:
1350: /**
1351: * Number of result pages
1352: *
1353: * @var int
1354: */
1355: protected $_pages;
1356:
1357: /**
1358: * Current result page
1359: *
1360: * @var int
1361: */
1362: protected $_resultPage;
1363:
1364: /**
1365: * Results per page to display
1366: *
1367: * @var int
1368: */
1369: protected $_resultPerPage;
1370:
1371: /**
1372: * Array of html-tags to emphasize the searchwords
1373: *
1374: * @var array
1375: */
1376: protected $_replacement = array();
1377:
1378: /**
1379: * Array of article id's with ranking information
1380: *
1381: * @var array
1382: */
1383: protected $_rankStructure = array();
1384:
1385: /**
1386: * Array of result-pages with array's of article id's
1387: *
1388: * @var array
1389: */
1390: protected $_orderedSearchResult = array();
1391:
1392: /**
1393: * Array of article id's with information about cms-types, occurence of
1394: * keyword/searchword, similarity .
1395: *
1396: *
1397: *
1398: *
1399: * @var array
1400: */
1401: protected $_searchResult = array();
1402:
1403: /**
1404: * Compute ranking factor for each search result and order the search
1405: * results by ranking factor
1406: * NOTE: The ranking factor is the sum of occurences of matching searchterms
1407: * weighted by similarity (in %) between searchword
1408: * and matching word in the article.
1409: * TODO: One can think of more sophisticated ranking strategies. One could
1410: * use the content type information for example
1411: * because a matching word in the headline (CMS_HEADLINE[1]) could be
1412: * weighted more than a matching word in the text (CMS_HTML[1]).
1413: *
1414: * @param array $search_result List of article ids
1415: * @param int $result_per_page Number of items per page
1416: * @param cDb $oDB Optional db instance
1417: * @param bool $bDebug Optional flag to enable debugging
1418: */
1419: public function __construct($search_result, $result_per_page, $oDB = NULL, $bDebug = false) {
1420: parent::__construct($oDB, $bDebug);
1421:
1422: $this->_index = new cSearchIndex($oDB);
1423:
1424: $this->_searchResult = $search_result;
1425: $this->_debug('$this->search_result', $this->_searchResult);
1426:
1427: $this->_resultPerPage = $result_per_page;
1428: $this->_results = count($this->_searchResult);
1429:
1430: // compute ranking factor for each search result
1431: foreach ($this->_searchResult as $article => $val) {
1432: $this->_rankStructure[$article] = $this->getOccurrence($article) * ($this->getSimilarity($article) / 100);
1433: }
1434: $this->_debug('$this->rank_structure', $this->_rankStructure);
1435:
1436: $this->setOrderedSearchResult($this->_rankStructure, $this->_resultPerPage);
1437: $this->_pages = count($this->_orderedSearchResult);
1438: $this->_debug('$this->ordered_search_result', $this->_orderedSearchResult);
1439: }
1440:
1441: /**
1442: *
1443: * @param array $ranked_search
1444: * @param int $result_per_page
1445: */
1446: public function setOrderedSearchResult($ranked_search, $result_per_page) {
1447: asort($ranked_search);
1448:
1449: $sorted_rank = array_reverse($ranked_search, true);
1450:
1451: if (isset($result_per_page) && $result_per_page > 0) {
1452: $split_result = array_chunk($sorted_rank, $result_per_page, true);
1453: $this->_orderedSearchResult = $split_result;
1454: } else {
1455: $this->_orderedSearchResult[] = $sorted_rank;
1456: }
1457: }
1458:
1459: /**
1460: *
1461: * @param int $art_id Id of an article
1462: * @param string $cms_type
1463: * @param int $id
1464: * @return string Content of an article, specified by it's content type
1465: */
1466: public function getContent($art_id, $cms_type, $id = 0) {
1467: $article = new cApiArticleLanguage();
1468: $article->loadByArticleAndLanguageId($art_id, $this->lang, true);
1469: return $article->getContent($cms_type, $id);
1470: }
1471:
1472: /**
1473: *
1474: * @param int $art_id Id of an article
1475: * @param string $cms_type Content type
1476: * @param int $cms_nr
1477: * @return string Content of an article in search result, specified by its
1478: * type
1479: */
1480: public function getSearchContent($art_id, $cms_type, $cms_nr = NULL) {
1481: $cms_type = strtoupper($cms_type);
1482: if (strlen($cms_type) > 0) {
1483: if (!stristr($cms_type, 'cms_')) {
1484: if (in_array($cms_type, $this->_index->getCmsTypeSuffix())) {
1485: $cms_type = 'CMS_' . $cms_type;
1486: }
1487: } else {
1488: if (!array_key_exists($cms_type, $this->_index->getCmsType())) {
1489: return array();
1490: }
1491: }
1492: }
1493:
1494: $article = new cApiArticleLanguage();
1495: $article->loadByArticleAndLanguageId($art_id, $this->lang, true);
1496: $content = array();
1497: if (isset($this->_searchResult[$art_id][$cms_type])) {
1498: // if searchword occurs in cms_type
1499: $search_words = $this->_searchResult[$art_id]['search'];
1500: $search_words = array_unique($search_words);
1501:
1502: $id_type = $this->_searchResult[$art_id][$cms_type];
1503: $id_type = array_unique($id_type);
1504:
1505: if (isset($cms_nr) && is_numeric($cms_nr)) {
1506: // get content of cms_type[cms_nr]
1507: // build consistent escaped string(Timo Trautmann) 2008-04-17
1508: $cms_content = conHtmlentities(conHtmlEntityDecode(strip_tags($article->getContent($cms_type, $cms_nr))));
1509: if (count($this->_replacement) == 2) {
1510: foreach ($search_words as $word) {
1511: // build consistent escaped string, replace ae ue ..
1512: // with original html entities (Timo Trautmann)
1513: // 2008-04-17
1514: $word = conHtmlentities(conHtmlEntityDecode($this->_index->addSpecialUmlauts($word)));
1515: $match = array();
1516: preg_match("/$word/i", $cms_content, $match);
1517: if (isset($match[0])) {
1518: $pattern = $match[0];
1519: $replacement = $this->_replacement[0] . $pattern . $this->_replacement[1];
1520: $cms_content = preg_replace("/$pattern/i", $replacement, $cms_content); // emphasize
1521: // located
1522: // searchwords
1523: }
1524: }
1525: }
1526: $content[] = htmlspecialchars_decode($cms_content);
1527: } else {
1528: // get content of cms_type[$id], where $id are the cms_type
1529: // numbers found in search
1530: foreach ($id_type as $id) {
1531: $cms_content = strip_tags($article->getContent($cms_type, $id));
1532:
1533: if (count($this->_replacement) == 2) {
1534: foreach ($search_words as $word) {
1535: preg_match("/$word/i", $cms_content, $match);
1536: if (isset($match[0])) {
1537: $pattern = $match[0];
1538: $replacement = $this->_replacement[0] . $pattern . $this->_replacement[1];
1539: $cms_content = preg_replace("/$pattern/i", $replacement, $cms_content); // emphasize
1540: // located
1541: // searchwords
1542: }
1543: }
1544: }
1545: $content[] = $cms_content;
1546: }
1547: }
1548: } else {
1549: // searchword was not found in cms_type
1550: if (isset($cms_nr) && is_numeric($cms_nr)) {
1551: $content[] = strip_tags($article->getContent($cms_type, $cms_nr));
1552: } else {
1553: $art_content = $article->getContent($cms_type);
1554: if (count($art_content) > 0) {
1555: foreach ($art_content as $val) {
1556: $content[] = strip_tags($val);
1557: }
1558: }
1559: }
1560: }
1561: return $content;
1562: }
1563:
1564: /**
1565: * Returns articles in page.
1566: *
1567: * @param int $page_id
1568: * @return array Articles in page $page_id
1569: */
1570: public function getSearchResultPage($page_id) {
1571: $this->_resultPage = $page_id;
1572: $result_page = $this->_orderedSearchResult[$page_id - 1];
1573: return $result_page;
1574: }
1575:
1576: /**
1577: * Returns number of result pages
1578: *
1579: * @return int
1580: */
1581: public function getNumberOfPages() {
1582: return $this->_pages;
1583: }
1584:
1585: /**
1586: * Returns number of results
1587: *
1588: * @return int
1589: */
1590: public function getNumberOfResults() {
1591: return $this->_results;
1592: }
1593:
1594: /**
1595: *
1596: * @param int $art_id Id of an article
1597: * @return int Similarity between searchword and matching word in article
1598: */
1599: public function getSimilarity($art_id) {
1600: return $this->_searchResult[$art_id]['similarity'];
1601: }
1602:
1603: /**
1604: *
1605: * @param int $art_id Id of an article
1606: * @return number of matching searchwords found in article
1607: */
1608: public function getOccurrence($art_id) {
1609: $aOccurence = $this->_searchResult[$art_id]['occurence'];
1610: $iSumOfOccurence = 0;
1611: for ($i = 0; $i < count($aOccurence); $i++) {
1612: $iSumOfOccurence += $aOccurence[$i];
1613: }
1614:
1615: return $iSumOfOccurence;
1616: }
1617:
1618: /**
1619: *
1620: * @param string $rep1 The opening html-tag to emphasize the searchword e.g.
1621: * '<b>'
1622: * @param string $rep2 The closing html-tag e.g. '</b>'
1623: */
1624: public function setReplacement($rep1, $rep2) {
1625: if (strlen(trim($rep1)) > 0 && strlen(trim($rep2)) > 0) {
1626: $this->_replacement[] = $rep1;
1627: $this->_replacement[] = $rep2;
1628: }
1629: }
1630:
1631: /**
1632: *
1633: * @param int $artid
1634: * @return int Category Id
1635: * @todo Is not job of search, should be outsourced!
1636: */
1637: public function getArtCat($artid) {
1638: $sql = "SELECT idcat FROM " . $this->cfg['tab']['cat_art'] . "
1639: WHERE idart = " . cSecurity::toInteger($artid) . " ";
1640: $this->db->query($sql);
1641: if ($this->db->nextRecord()) {
1642: return $this->db->f('idcat');
1643: }
1644: }
1645: }
1646: