1: <?php
2:
3: /**
4: * This file contains the class for content search results.
5: *
6: * @package Core
7: * @subpackage Frontend_Search
8: * @author Willi Man
9: * @copyright four for business AG <www.4fb.de>
10: * @license http://www.contenido.org/license/LIZENZ.txt
11: * @link http://www.4fb.de
12: * @link http://www.contenido.org
13: */
14:
15: defined('CON_FRAMEWORK') || die('Illegal call: Missing framework initialization - request aborted.');
16:
17: cInclude('includes', 'functions.encoding.php');
18:
19:
20: /**
21: * CONTENIDO API - SearchResult Object
22: *
23: * This object ranks and displays the result of the indexed fulltext
24: * search.
25: * If you are not comfortable with this API feel free to use your own
26: * methods to display the search results.
27: * The search result is basically an array with article ID's.
28: *
29: * If $search_result = $search->searchIndex($searchword, $searchwordex);
30: *
31: * use object with
32: *
33: * $oSearchResults = new cSearchResult($search_result, 10);
34: *
35: * // html-tags to emphasize the located searchwords
36: * $oSearchResults->setReplacement('<span style="color:red">', '</span>');
37: *
38: * $num_res = $oSearchResults->getNumberOfResults();
39: * $num_pages = $oSearchResults->getNumberOfPages();
40: * // first result page
41: * $res_page = $oSearchResults->getSearchResultPage(1);
42: *
43: * foreach ($res_page as $key => $val) {
44: * $headline = $oSearchResults->getSearchContent($key, 'HTMLHEAD');
45: * $first_headline = $headline[0];
46: * $text = $oSearchResults->getSearchContent($key, 'HTML');
47: * $first_text = $text[0];
48: * $similarity = $oSearchResults->getSimilarity($key);
49: * $iOccurrence = $oSearchResults->getOccurrence($key);
50: * }
51: *
52: * @package Core
53: * @subpackage Frontend_Search
54: */
55: class cSearchResult extends cSearchBaseAbstract {
56:
57: /**
58: * Instance of class Index
59: *
60: * @var object
61: */
62: protected $_index;
63:
64: /**
65: * Number of results
66: *
67: * @var int
68: */
69: protected $_results;
70:
71: /**
72: * Number of result pages
73: *
74: * @var int
75: */
76: protected $_pages;
77:
78: /**
79: * Current result page
80: *
81: * @var int
82: */
83: protected $_resultPage;
84:
85: /**
86: * Results per page to display
87: *
88: * @var int
89: */
90: protected $_resultPerPage;
91:
92: /**
93: * Array of html-tags to emphasize the searchwords
94: *
95: * @var array
96: */
97: protected $_replacement = array();
98:
99: /**
100: * Array of article id's with ranking information
101: *
102: * @var array
103: */
104: protected $_rankStructure = array();
105:
106: /**
107: * Array of result-pages with array's of article id's
108: *
109: * @var array
110: */
111: protected $_orderedSearchResult = array();
112:
113: /**
114: * Array of article id's with information about cms-types, occurence of
115: * keyword/searchword, similarity .
116: *
117: * @var array
118: */
119: protected $_searchResult = array();
120:
121: /**
122: * Constructor to create an instance of this class.
123: *
124: * Compute ranking factor for each search result and order the
125: * search results by ranking factor.
126: *
127: * NOTE: The ranking factor is the sum of occurences of matching
128: * searchterms weighted by similarity (in %) between searchword
129: * and matching word in the article.
130: *
131: * TODO: One can think of more sophisticated ranking strategies.
132: * The content type information could be used for example because a
133: * matching word in the headline (CMS_HEADLINE[1]) could be weighted
134: * more than a matching word in the text (CMS_HTML[1]).
135: *
136: * @param array $search_result
137: * List of article ids
138: * @param int $result_per_page
139: * Number of items per page
140: * @param cDb $oDB [optional]
141: * db instance
142: * @param bool $bDebug [optional]
143: * Optional flag to enable debugging
144: */
145: public function __construct($search_result, $result_per_page, $oDB = NULL, $bDebug = false) {
146: parent::__construct($oDB, $bDebug);
147:
148: $this->_index = new cSearchIndex($oDB);
149:
150: $this->_searchResult = $search_result;
151: $this->_debug('$this->search_result', $this->_searchResult);
152:
153: $this->_resultPerPage = $result_per_page;
154: $this->_results = count($this->_searchResult);
155:
156: // compute ranking factor for each search result
157: foreach ($this->_searchResult as $article => $val) {
158: $this->_rankStructure[$article] = $this->getOccurrence($article) * ($this->getSimilarity($article) / 100);
159: }
160: $this->_debug('$this->rank_structure', $this->_rankStructure);
161:
162: $this->setOrderedSearchResult($this->_rankStructure, $this->_resultPerPage);
163: $this->_pages = count($this->_orderedSearchResult);
164: $this->_debug('$this->ordered_search_result', $this->_orderedSearchResult);
165: }
166:
167: /**
168: *
169: * @param array $ranked_search
170: * @param int $result_per_page
171: */
172: public function setOrderedSearchResult($ranked_search, $result_per_page) {
173: asort($ranked_search);
174:
175: $sorted_rank = array_reverse($ranked_search, true);
176:
177: if (isset($result_per_page) && $result_per_page > 0) {
178: $split_result = array_chunk($sorted_rank, $result_per_page, true);
179: $this->_orderedSearchResult = $split_result;
180: } else {
181: $this->_orderedSearchResult[] = $sorted_rank;
182: }
183: }
184:
185: /**
186: *
187: * @param int $art_id
188: * Id of an article
189: * @param string $cms_type
190: * @param int $id [optional]
191: * @return string
192: * Content of an article, specified by it's content type
193: */
194: public function getContent($art_id, $cms_type, $id = 0) {
195: $article = new cApiArticleLanguage();
196: $article->loadByArticleAndLanguageId($art_id, $this->lang);
197: return $article->getContent($cms_type, $id);
198: }
199:
200: /**
201: *
202: * @param int $art_id
203: * Id of an article
204: * @param string $cms_type
205: * Content type
206: * @param int $cms_nr [optional]
207: * @return string
208: * Content of an article in search result, specified by its type
209: */
210: public function getSearchContent($art_id, $cms_type, $cms_nr = NULL) {
211: $cms_type = strtoupper($cms_type);
212: if (strlen($cms_type) > 0) {
213: if (!stristr($cms_type, 'cms_')) {
214: if (in_array($cms_type, $this->_index->getCmsTypeSuffix())) {
215: $cms_type = 'CMS_' . $cms_type;
216: }
217: } else {
218: if (!array_key_exists($cms_type, $this->_index->getCmsType())) {
219: return array();
220: }
221: }
222: }
223:
224: $article = new cApiArticleLanguage();
225: $article->loadByArticleAndLanguageId($art_id, $this->lang);
226: $content = array();
227: if (isset($this->_searchResult[$art_id][$cms_type])) {
228: // if searchword occurs in cms_type
229: $search_words = $this->_searchResult[$art_id]['search'];
230: $search_words = array_unique($search_words);
231:
232: $id_type = $this->_searchResult[$art_id][$cms_type];
233: $id_type = array_unique($id_type);
234:
235: if (isset($cms_nr) && is_numeric($cms_nr)) {
236: // get content of cms_type[cms_nr]
237: // build consistent escaped string(Timo Trautmann) 2008-04-17
238: $cms_content = conHtmlentities(conHtmlEntityDecode(strip_tags($article->getContent($cms_type, $cms_nr))));
239: if (count($this->_replacement) == 2) {
240: foreach ($search_words as $word) {
241: // build consistent escaped string, replace ae ue ..
242: // with original html entities (Timo Trautmann)
243: // 2008-04-17
244: $word = conHtmlentities(conHtmlEntityDecode($this->_index->addSpecialUmlauts($word)));
245: $match = array();
246: preg_match("/$word/i", $cms_content, $match);
247: if (isset($match[0])) {
248: $pattern = $match[0];
249: $replacement = $this->_replacement[0] . $pattern . $this->_replacement[1];
250: $cms_content = preg_replace("/$pattern/i", $replacement, $cms_content); // emphasize
251: // located
252: // searchwords
253: }
254: }
255: }
256: $content[] = htmlspecialchars_decode($cms_content);
257: } else {
258: // get content of cms_type[$id], where $id are the cms_type
259: // numbers found in search
260: foreach ($id_type as $id) {
261: $cms_content = strip_tags($article->getContent($cms_type, $id));
262:
263: if (count($this->_replacement) == 2) {
264: foreach ($search_words as $word) {
265: preg_match("/$word/i", $cms_content, $match);
266: if (isset($match[0])) {
267: $pattern = $match[0];
268: $replacement = $this->_replacement[0] . $pattern . $this->_replacement[1];
269: $cms_content = preg_replace("/$pattern/i", $replacement, $cms_content); // emphasize
270: // located
271: // searchwords
272: }
273: }
274: }
275: $content[] = $cms_content;
276: }
277: }
278: } else {
279: // searchword was not found in cms_type
280: if (isset($cms_nr) && is_numeric($cms_nr)) {
281: $content[] = strip_tags($article->getContent($cms_type, $cms_nr));
282: } else {
283: $art_content = $article->getContent($cms_type);
284: if (count($art_content) > 0) {
285: foreach ($art_content as $val) {
286: $content[] = strip_tags($val);
287: }
288: }
289: }
290: }
291: return $content;
292: }
293:
294: /**
295: * Returns articles in page.
296: *
297: * @param int $page_id
298: * @return array
299: * Articles in page $page_id
300: */
301: public function getSearchResultPage($page_id) {
302: $this->_resultPage = $page_id;
303: $result_page = $this->_orderedSearchResult[$page_id - 1];
304: return $result_page;
305: }
306:
307: /**
308: * Returns number of result pages.
309: *
310: * @return int
311: */
312: public function getNumberOfPages() {
313: return $this->_pages;
314: }
315:
316: /**
317: * Returns number of results.
318: *
319: * @return int
320: */
321: public function getNumberOfResults() {
322: return $this->_results;
323: }
324:
325: /**
326: *
327: * @param int $art_id
328: * Id of an article
329: * @return int
330: * Similarity between searchword and matching word in article
331: */
332: public function getSimilarity($art_id) {
333: return $this->_searchResult[$art_id]['similarity'];
334: }
335:
336: /**
337: *
338: * @param int $art_id
339: * Id of an article
340: * @return int
341: * number of matching searchwords found in article
342: */
343: public function getOccurrence($art_id) {
344: $aOccurence = $this->_searchResult[$art_id]['occurence'];
345: $iSumOfOccurence = 0;
346: for ($i = 0; $i < count($aOccurence); $i++) {
347: $iSumOfOccurence += $aOccurence[$i];
348: }
349:
350: return $iSumOfOccurence;
351: }
352:
353: /**
354: *
355: * @param string $rep1
356: * The opening html-tag to emphasize the searchword e.g. '<b>'
357: * @param string $rep2
358: * The closing html-tag e.g. '</b>'
359: */
360: public function setReplacement($rep1, $rep2) {
361: if (strlen(trim($rep1)) > 0 && strlen(trim($rep2)) > 0) {
362: $this->_replacement[] = $rep1;
363: $this->_replacement[] = $rep2;
364: }
365: }
366:
367: /**
368: *
369: * @todo refactor this because it shouldn't be the Search's job
370: * @param int $artid
371: * @return int
372: * Category Id
373: */
374: public function getArtCat($artid) {
375: $sql = "SELECT idcat FROM " . $this->cfg['tab']['cat_art'] . "
376: WHERE idart = " . cSecurity::toInteger($artid) . " ";
377: $this->db->query($sql);
378: if ($this->db->nextRecord()) {
379: return $this->db->f('idcat');
380: }
381: }
382: }
383: