1: <?php
2:
3: /**
4: * This file contains the class for content search results.
5: *
6: * @package Core
7: * @subpackage Frontend_Search
8: * @author Willi Man
9: * @copyright four for business AG <www.4fb.de>
10: * @license http://www.contenido.org/license/LIZENZ.txt
11: * @link http://www.4fb.de
12: * @link http://www.contenido.org
13: */
14:
15: defined('CON_FRAMEWORK') || die('Illegal call: Missing framework initialization - request aborted.');
16:
17: cInclude('includes', 'functions.encoding.php');
18:
19:
20: /**
21: * CONTENIDO API - SearchResult Object
22: *
23: * This object ranks and displays the result of the indexed fulltext
24: * search.
25: * If you are not comfortable with this API feel free to use your own
26: * methods to display the search results.
27: * The search result is basically an array with article ID's.
28: *
29: * If $search_result = $search->searchIndex($searchword, $searchwordex);
30: *
31: * use object with
32: *
33: * $oSearchResults = new cSearchResult($search_result, 10);
34: *
35: * // html-tags to emphasize the located searchwords
36: * $oSearchResults->setReplacement('<span style="color:red">', '</span>');
37: *
38: * $num_res = $oSearchResults->getNumberOfResults();
39: * $num_pages = $oSearchResults->getNumberOfPages();
40: * // first result page
41: * $res_page = $oSearchResults->getSearchResultPage(1);
42: *
43: * foreach ($res_page as $key => $val) {
44: * $headline = $oSearchResults->getSearchContent($key, 'HTMLHEAD');
45: * $first_headline = $headline[0];
46: * $text = $oSearchResults->getSearchContent($key, 'HTML');
47: * $first_text = $text[0];
48: * $similarity = $oSearchResults->getSimilarity($key);
49: * $iOccurrence = $oSearchResults->getOccurrence($key);
50: * }
51: *
52: * @package Core
53: * @subpackage Frontend_Search
54: */
55: class cSearchResult extends cSearchBaseAbstract {
56:
57: /**
58: * Instance of class Index
59: *
60: * @var object
61: */
62: protected $_index;
63:
64: /**
65: * Number of results
66: *
67: * @var int
68: */
69: protected $_results;
70:
71: /**
72: * Number of result pages
73: *
74: * @var int
75: */
76: protected $_pages;
77:
78: /**
79: * Current result page
80: *
81: * @var int
82: */
83: protected $_resultPage;
84:
85: /**
86: * Results per page to display
87: *
88: * @var int
89: */
90: protected $_resultPerPage;
91:
92: /**
93: * Array of html-tags to emphasize the searchwords
94: *
95: * @var array
96: */
97: protected $_replacement = array();
98:
99: /**
100: * Array of article id's with ranking information
101: *
102: * @var array
103: */
104: protected $_rankStructure = array();
105:
106: /**
107: * Array of result-pages with array's of article id's
108: *
109: * @var array
110: */
111: protected $_orderedSearchResult = array();
112:
113: /**
114: * Array of article id's with information about cms-types, occurence of
115: * keyword/searchword, similarity .
116: *
117: * @var array
118: */
119: protected $_searchResult = array();
120:
121: /**
122: * Constructor to create an instance of this class.
123: *
124: * Compute ranking factor for each search result and order the
125: * search results by ranking factor.
126: *
127: * NOTE: The ranking factor is the sum of occurences of matching
128: * searchterms weighted by similarity (in %) between searchword
129: * and matching word in the article.
130: *
131: * TODO: One can think of more sophisticated ranking strategies.
132: * The content type information could be used for example because a
133: * matching word in the headline (CMS_HEADLINE[1]) could be weighted
134: * more than a matching word in the text (CMS_HTML[1]).
135: *
136: * @param array $search_result
137: * list of article ids
138: * @param int $result_per_page
139: * number of items per page
140: * @param cDb $oDB [optional]
141: * CONTENIDO database object
142: * @param bool $bDebug [optional]
143: * flag to enable debugging
144: *
145: * @throws cInvalidArgumentException
146: */
147: public function __construct($search_result, $result_per_page, $oDB = NULL, $bDebug = false) {
148: parent::__construct($oDB, $bDebug);
149:
150: $this->_index = new cSearchIndex($oDB);
151:
152: $this->_searchResult = $search_result;
153: $this->_debug('$this->search_result', $this->_searchResult);
154:
155: $this->_resultPerPage = $result_per_page;
156: $this->_results = count($this->_searchResult);
157:
158: // compute ranking factor for each search result
159: foreach ($this->_searchResult as $article => $val) {
160: $this->_rankStructure[$article] = $this->getOccurrence($article) * ($this->getSimilarity($article) / 100);
161: }
162: $this->_debug('$this->rank_structure', $this->_rankStructure);
163:
164: $this->setOrderedSearchResult($this->_rankStructure, $this->_resultPerPage);
165: $this->_pages = count($this->_orderedSearchResult);
166: $this->_debug('$this->ordered_search_result', $this->_orderedSearchResult);
167: }
168:
169: /**
170: *
171: * @param array $ranked_search
172: * @param int $result_per_page
173: */
174: public function setOrderedSearchResult($ranked_search, $result_per_page) {
175: asort($ranked_search);
176:
177: $sorted_rank = array_reverse($ranked_search, true);
178:
179: if (isset($result_per_page) && $result_per_page > 0) {
180: $split_result = array_chunk($sorted_rank, $result_per_page, true);
181: $this->_orderedSearchResult = $split_result;
182: } else {
183: $this->_orderedSearchResult[] = $sorted_rank;
184: }
185: }
186:
187: /**
188: *
189: * @param int $art_id
190: * Id of an article
191: * @param string $cms_type
192: * @param int $id [optional]
193: *
194: * @return string
195: * Content of an article, specified by it's content type
196: *
197: * @throws cDbException
198: * @throws cException
199: */
200: public function getContent($art_id, $cms_type, $id = 0) {
201: $article = new cApiArticleLanguage();
202: $article->loadByArticleAndLanguageId($art_id, $this->lang);
203: return $article->getContent($cms_type, $id);
204: }
205:
206: /**
207: *
208: * @param int $art_id
209: * Id of an article
210: * @param string $cms_type
211: * Content type
212: * @param int $cms_nr [optional]
213: *
214: * @return array Content of an article in search result, specified by its type* Content of an article in search result, specified by its type
215: *
216: * @throws cDbException
217: * @throws cException
218: */
219: public function getSearchContent($art_id, $cms_type, $cms_nr = NULL) {
220: $cms_type = cString::toUpperCase($cms_type);
221: if (cString::getStringLength($cms_type) > 0) {
222: if (!cString::findFirstOccurrenceCI($cms_type, 'cms_')) {
223: if (in_array($cms_type, $this->_index->getCmsTypeSuffix())) {
224: $cms_type = 'CMS_' . $cms_type;
225: }
226: } else {
227: if (!array_key_exists($cms_type, $this->_index->getCmsType())) {
228: return array();
229: }
230: }
231: }
232:
233: $article = new cApiArticleLanguage();
234: $article->loadByArticleAndLanguageId($art_id, $this->lang);
235: $content = array();
236: if (isset($this->_searchResult[$art_id][$cms_type])) {
237: // if searchword occurs in cms_type
238: $search_words = $this->_searchResult[$art_id]['search'];
239: $search_words = array_unique($search_words);
240:
241: $id_type = $this->_searchResult[$art_id][$cms_type];
242: $id_type = array_unique($id_type);
243:
244: if (isset($cms_nr) && is_numeric($cms_nr)) {
245: // get content of cms_type[cms_nr]
246: // build consistent escaped string(Timo Trautmann) 2008-04-17
247: $cms_content = conHtmlentities(conHtmlEntityDecode(strip_tags($article->getContent($cms_type, $cms_nr))));
248: if (count($this->_replacement) == 2) {
249: foreach ($search_words as $word) {
250: // build consistent escaped string, replace ae ue ..
251: // with original html entities (Timo Trautmann)
252: // 2008-04-17
253: $word = conHtmlentities(conHtmlEntityDecode($this->_index->addSpecialUmlauts($word)));
254: $match = array();
255: preg_match("/$word/i", $cms_content, $match);
256: if (isset($match[0])) {
257: $pattern = $match[0];
258: $replacement = $this->_replacement[0] . $pattern . $this->_replacement[1];
259: $cms_content = preg_replace("/$pattern/i", $replacement, $cms_content); // emphasize
260: // located
261: // searchwords
262: }
263: }
264: }
265: $content[] = htmlspecialchars_decode($cms_content);
266: } else {
267: // get content of cms_type[$id], where $id are the cms_type
268: // numbers found in search
269: foreach ($id_type as $id) {
270: $cms_content = strip_tags($article->getContent($cms_type, $id));
271:
272: if (count($this->_replacement) == 2) {
273: foreach ($search_words as $word) {
274: preg_match("/$word/i", $cms_content, $match);
275: if (isset($match[0])) {
276: $pattern = $match[0];
277: $replacement = $this->_replacement[0] . $pattern . $this->_replacement[1];
278: $cms_content = preg_replace("/$pattern/i", $replacement, $cms_content); // emphasize
279: // located
280: // searchwords
281: }
282: }
283: }
284: $content[] = $cms_content;
285: }
286: }
287: } else {
288: // searchword was not found in cms_type
289: if (isset($cms_nr) && is_numeric($cms_nr)) {
290: $content[] = strip_tags($article->getContent($cms_type, $cms_nr));
291: } else {
292: $art_content = $article->getContent($cms_type);
293: if (count($art_content) > 0) {
294: foreach ($art_content as $val) {
295: $content[] = strip_tags($val);
296: }
297: }
298: }
299: }
300: return $content;
301: }
302:
303: /**
304: * Returns articles in page.
305: *
306: * @param int $page_id
307: * @return array
308: * Articles in page $page_id
309: */
310: public function getSearchResultPage($page_id) {
311: if (isset($this->_orderedSearchResult[$page_id - 1])) {
312: $this->_resultPage = $page_id;
313: $result_page = $this->_orderedSearchResult[$page_id - 1];
314: return $result_page;
315: } else {
316: return [];
317: }
318: }
319:
320: /**
321: * Returns number of result pages.
322: *
323: * @return int
324: */
325: public function getNumberOfPages() {
326: return $this->_pages;
327: }
328:
329: /**
330: * Returns number of results.
331: *
332: * @return int
333: */
334: public function getNumberOfResults() {
335: return $this->_results;
336: }
337:
338: /**
339: *
340: * @param int $art_id
341: * Id of an article
342: * @return int
343: * Similarity between searchword and matching word in article
344: */
345: public function getSimilarity($art_id) {
346: return isset($this->_searchResult[$art_id]['similarity']) ? $this->_searchResult[$art_id]['similarity'] : 0;
347: }
348:
349: /**
350: *
351: * @param int $art_id
352: * Id of an article
353: * @return int
354: * number of matching searchwords found in article
355: */
356: public function getOccurrence($art_id) {
357: $aOccurence = $this->_searchResult[$art_id]['occurence'];
358: $iSumOfOccurence = 0;
359: for ($i = 0; $i < count($aOccurence); $i++) {
360: $iSumOfOccurence += $aOccurence[$i];
361: }
362:
363: return $iSumOfOccurence;
364: }
365:
366: /**
367: *
368: * @param string $rep1
369: * The opening html-tag to emphasize the searchword e.g. '<b>'
370: * @param string $rep2
371: * The closing html-tag e.g. '</b>'
372: */
373: public function setReplacement($rep1, $rep2) {
374: if (cString::getStringLength(trim($rep1)) > 0 && cString::getStringLength(trim($rep2)) > 0) {
375: $this->_replacement[] = $rep1;
376: $this->_replacement[] = $rep2;
377: }
378: }
379:
380: /**
381: *
382: * @todo refactor this because it shouldn't be the Search's job
383: *
384: * @param int $artid
385: *
386: * @return int
387: * Category Id
388: * @throws cDbException
389: */
390: public function getArtCat($artid) {
391: $sql = "SELECT idcat FROM " . $this->cfg['tab']['cat_art'] . "
392: WHERE idart = " . cSecurity::toInteger($artid) . " ";
393: $this->db->query($sql);
394: if ($this->db->nextRecord()) {
395: return $this->db->f('idcat');
396: }
397: }
398: }
399: