1: <?php
2:
3: /**
4: * This file contains the class for content search results.
5: *
6: * @package Core
7: * @subpackage Frontend_Search
8: * @version SVN Revision $Rev:$
9: *
10: * @author Willi Man
11: * @copyright four for business AG <www.4fb.de>
12: * @license http://www.contenido.org/license/LIZENZ.txt
13: * @link http://www.4fb.de
14: * @link http://www.contenido.org
15: */
16:
17: defined('CON_FRAMEWORK') || die('Illegal call: Missing framework initialization - request aborted.');
18:
19: cInclude('includes', 'functions.encoding.php');
20:
21:
22: /**
23: * CONTENIDO API - SearchResult Object
24: *
25: * This object ranks and displays the result of the indexed fulltext search.
26: * If you are not comfortable with this API feel free to use your own methods to
27: * display the search results.
28: * The search result is basically an array with article ID's.
29: *
30: * If $search_result = $search->searchIndex($searchword, $searchwordex);
31: *
32: * use object with
33: *
34: * $oSearchResults = new cSearchResult($search_result, 10);
35: *
36: * $oSearchResults->setReplacement('<span style="color:red">', '</span>'); //
37: * html-tags to emphasize the located searchwords
38: *
39: * $num_res = $oSearchResults->getNumberOfResults();
40: * $num_pages = $oSearchResults->getNumberOfPages();
41: * $res_page = $oSearchResults->getSearchResultPage(1); // first result page
42: * foreach ($res_page as $key => $val) {
43: * $headline = $oSearchResults->getSearchContent($key, 'HTMLHEAD');
44: * $first_headline = $headline[0];
45: * $text = $oSearchResults->getSearchContent($key, 'HTML');
46: * $first_text = $text[0];
47: * $similarity = $oSearchResults->getSimilarity($key);
48: * $iOccurrence = $oSearchResults->getOccurrence($key);
49: * }
50: *
51: * @package Core
52: * @subpackage Frontend_Search
53: *
54: */
55: class cSearchResult extends cSearchBaseAbstract {
56:
57: /**
58: * Instance of class Index
59: *
60: * @var object
61: */
62: protected $_index;
63:
64: /**
65: * Number of results
66: *
67: * @var int
68: */
69: protected $_results;
70:
71: /**
72: * Number of result pages
73: *
74: * @var int
75: */
76: protected $_pages;
77:
78: /**
79: * Current result page
80: *
81: * @var int
82: */
83: protected $_resultPage;
84:
85: /**
86: * Results per page to display
87: *
88: * @var int
89: */
90: protected $_resultPerPage;
91:
92: /**
93: * Array of html-tags to emphasize the searchwords
94: *
95: * @var array
96: */
97: protected $_replacement = array();
98:
99: /**
100: * Array of article id's with ranking information
101: *
102: * @var array
103: */
104: protected $_rankStructure = array();
105:
106: /**
107: * Array of result-pages with array's of article id's
108: *
109: * @var array
110: */
111: protected $_orderedSearchResult = array();
112:
113: /**
114: * Array of article id's with information about cms-types, occurence of
115: * keyword/searchword, similarity .
116: *
117: *
118: *
119: *
120: * @var array
121: */
122: protected $_searchResult = array();
123:
124: /**
125: * Compute ranking factor for each search result and order the search
126: * results by ranking factor
127: * NOTE: The ranking factor is the sum of occurences of matching searchterms
128: * weighted by similarity (in %) between searchword
129: * and matching word in the article.
130: * TODO: One can think of more sophisticated ranking strategies. One could
131: * use the content type information for example
132: * because a matching word in the headline (CMS_HEADLINE[1]) could be
133: * weighted more than a matching word in the text (CMS_HTML[1]).
134: *
135: * @param array $search_result
136: * List of article ids
137: * @param int $result_per_page
138: * Number of items per page
139: * @param cDb $oDB [optional]
140: * db instance
141: * @param bool $bDebug [optional]
142: * Optional flag to enable debugging
143: */
144: public function __construct($search_result, $result_per_page, $oDB = NULL, $bDebug = false) {
145: parent::__construct($oDB, $bDebug);
146:
147: $this->_index = new cSearchIndex($oDB);
148:
149: $this->_searchResult = $search_result;
150: $this->_debug('$this->search_result', $this->_searchResult);
151:
152: $this->_resultPerPage = $result_per_page;
153: $this->_results = count($this->_searchResult);
154:
155: // compute ranking factor for each search result
156: foreach ($this->_searchResult as $article => $val) {
157: $this->_rankStructure[$article] = $this->getOccurrence($article) * ($this->getSimilarity($article) / 100);
158: }
159: $this->_debug('$this->rank_structure', $this->_rankStructure);
160:
161: $this->setOrderedSearchResult($this->_rankStructure, $this->_resultPerPage);
162: $this->_pages = count($this->_orderedSearchResult);
163: $this->_debug('$this->ordered_search_result', $this->_orderedSearchResult);
164: }
165:
166: /**
167: *
168: * @param array $ranked_search
169: * @param int $result_per_page
170: */
171: public function setOrderedSearchResult($ranked_search, $result_per_page) {
172: asort($ranked_search);
173:
174: $sorted_rank = array_reverse($ranked_search, true);
175:
176: if (isset($result_per_page) && $result_per_page > 0) {
177: $split_result = array_chunk($sorted_rank, $result_per_page, true);
178: $this->_orderedSearchResult = $split_result;
179: } else {
180: $this->_orderedSearchResult[] = $sorted_rank;
181: }
182: }
183:
184: /**
185: *
186: * @param int $art_id
187: * Id of an article
188: * @param string $cms_type
189: * @param int $id [optional]
190: * @return string
191: * Content of an article, specified by it's content type
192: */
193: public function getContent($art_id, $cms_type, $id = 0) {
194: $article = new cApiArticleLanguage();
195: $article->loadByArticleAndLanguageId($art_id, $this->lang, true);
196: return $article->getContent($cms_type, $id);
197: }
198:
199: /**
200: *
201: * @param int $art_id
202: * Id of an article
203: * @param string $cms_type
204: * Content type
205: * @param int $cms_nr [optional]
206: * @return string
207: * Content of an article in search result, specified by its type
208: */
209: public function getSearchContent($art_id, $cms_type, $cms_nr = NULL) {
210: $cms_type = strtoupper($cms_type);
211: if (strlen($cms_type) > 0) {
212: if (!stristr($cms_type, 'cms_')) {
213: if (in_array($cms_type, $this->_index->getCmsTypeSuffix())) {
214: $cms_type = 'CMS_' . $cms_type;
215: }
216: } else {
217: if (!array_key_exists($cms_type, $this->_index->getCmsType())) {
218: return array();
219: }
220: }
221: }
222:
223: $article = new cApiArticleLanguage();
224: $article->loadByArticleAndLanguageId($art_id, $this->lang, true);
225: $content = array();
226: if (isset($this->_searchResult[$art_id][$cms_type])) {
227: // if searchword occurs in cms_type
228: $search_words = $this->_searchResult[$art_id]['search'];
229: $search_words = array_unique($search_words);
230:
231: $id_type = $this->_searchResult[$art_id][$cms_type];
232: $id_type = array_unique($id_type);
233:
234: if (isset($cms_nr) && is_numeric($cms_nr)) {
235: // get content of cms_type[cms_nr]
236: // build consistent escaped string(Timo Trautmann) 2008-04-17
237: $cms_content = conHtmlentities(conHtmlEntityDecode(strip_tags($article->getContent($cms_type, $cms_nr))));
238: if (count($this->_replacement) == 2) {
239: foreach ($search_words as $word) {
240: // build consistent escaped string, replace ae ue ..
241: // with original html entities (Timo Trautmann)
242: // 2008-04-17
243: $word = conHtmlentities(conHtmlEntityDecode($this->_index->addSpecialUmlauts($word)));
244: $match = array();
245: preg_match("/$word/i", $cms_content, $match);
246: if (isset($match[0])) {
247: $pattern = $match[0];
248: $replacement = $this->_replacement[0] . $pattern . $this->_replacement[1];
249: $cms_content = preg_replace("/$pattern/i", $replacement, $cms_content); // emphasize
250: // located
251: // searchwords
252: }
253: }
254: }
255: $content[] = htmlspecialchars_decode($cms_content);
256: } else {
257: // get content of cms_type[$id], where $id are the cms_type
258: // numbers found in search
259: foreach ($id_type as $id) {
260: $cms_content = strip_tags($article->getContent($cms_type, $id));
261:
262: if (count($this->_replacement) == 2) {
263: foreach ($search_words as $word) {
264: preg_match("/$word/i", $cms_content, $match);
265: if (isset($match[0])) {
266: $pattern = $match[0];
267: $replacement = $this->_replacement[0] . $pattern . $this->_replacement[1];
268: $cms_content = preg_replace("/$pattern/i", $replacement, $cms_content); // emphasize
269: // located
270: // searchwords
271: }
272: }
273: }
274: $content[] = $cms_content;
275: }
276: }
277: } else {
278: // searchword was not found in cms_type
279: if (isset($cms_nr) && is_numeric($cms_nr)) {
280: $content[] = strip_tags($article->getContent($cms_type, $cms_nr));
281: } else {
282: $art_content = $article->getContent($cms_type);
283: if (count($art_content) > 0) {
284: foreach ($art_content as $val) {
285: $content[] = strip_tags($val);
286: }
287: }
288: }
289: }
290: return $content;
291: }
292:
293: /**
294: * Returns articles in page.
295: *
296: * @param int $page_id
297: * @return array
298: * Articles in page $page_id
299: */
300: public function getSearchResultPage($page_id) {
301: $this->_resultPage = $page_id;
302: $result_page = $this->_orderedSearchResult[$page_id - 1];
303: return $result_page;
304: }
305:
306: /**
307: * Returns number of result pages
308: *
309: * @return int
310: */
311: public function getNumberOfPages() {
312: return $this->_pages;
313: }
314:
315: /**
316: * Returns number of results
317: *
318: * @return int
319: */
320: public function getNumberOfResults() {
321: return $this->_results;
322: }
323:
324: /**
325: *
326: * @param int $art_id
327: * Id of an article
328: * @return int
329: * Similarity between searchword and matching word in article
330: */
331: public function getSimilarity($art_id) {
332: return $this->_searchResult[$art_id]['similarity'];
333: }
334:
335: /**
336: *
337: * @param int $art_id
338: * Id of an article
339: * @return int
340: * number of matching searchwords found in article
341: */
342: public function getOccurrence($art_id) {
343: $aOccurence = $this->_searchResult[$art_id]['occurence'];
344: $iSumOfOccurence = 0;
345: for ($i = 0; $i < count($aOccurence); $i++) {
346: $iSumOfOccurence += $aOccurence[$i];
347: }
348:
349: return $iSumOfOccurence;
350: }
351:
352: /**
353: *
354: * @param string $rep1
355: * The opening html-tag to emphasize the searchword e.g. '<b>'
356: * @param string $rep2
357: * The closing html-tag e.g. '</b>'
358: */
359: public function setReplacement($rep1, $rep2) {
360: if (strlen(trim($rep1)) > 0 && strlen(trim($rep2)) > 0) {
361: $this->_replacement[] = $rep1;
362: $this->_replacement[] = $rep2;
363: }
364: }
365:
366: /**
367: *
368: * @todo Is not job of search, should be outsourced!
369: * @param int $artid
370: * @return int
371: * Category Id
372: */
373: public function getArtCat($artid) {
374: $sql = "SELECT idcat FROM " . $this->cfg['tab']['cat_art'] . "
375: WHERE idart = " . cSecurity::toInteger($artid) . " ";
376: $this->db->query($sql);
377: if ($this->db->nextRecord()) {
378: return $this->db->f('idcat');
379: }
380: }
381: }
382: