1: <?php
2: /**
3: * This file contains the class for content search results.
4: *
5: * @package Core
6: * @subpackage Frontend_Search
7: * @version SVN Revision $Rev:$
8: *
9: * @author Willi Man
10: * @copyright four for business AG <www.4fb.de>
11: * @license http://www.contenido.org/license/LIZENZ.txt
12: * @link http://www.4fb.de
13: * @link http://www.contenido.org
14: */
15: defined('CON_FRAMEWORK') || die('Illegal call: Missing framework initialization - request aborted.');
16:
17: cInclude('includes', 'functions.encoding.php');
18:
19:
20: /**
21: * CONTENIDO API - SearchResult Object
22: *
23: * This object ranks and displays the result of the indexed fulltext search.
24: * If you are not comfortable with this API feel free to use your own methods to
25: * display the search results.
26: * The search result is basically an array with article ID's.
27: *
28: * If $search_result = $search->searchIndex($searchword, $searchwordex);
29: *
30: * use object with
31: *
32: * $oSearchResults = new cSearchResult($search_result, 10);
33: *
34: * $oSearchResults->setReplacement('<span style="color:red">', '</span>'); //
35: * html-tags to emphasize the located searchwords
36: *
37: * $num_res = $oSearchResults->getNumberOfResults();
38: * $num_pages = $oSearchResults->getNumberOfPages();
39: * $res_page = $oSearchResults->getSearchResultPage(1); // first result page
40: * foreach ($res_page as $key => $val) {
41: * $headline = $oSearchResults->getSearchContent($key, 'HTMLHEAD');
42: * $first_headline = $headline[0];
43: * $text = $oSearchResults->getSearchContent($key, 'HTML');
44: * $first_text = $text[0];
45: * $similarity = $oSearchResults->getSimilarity($key);
46: * $iOccurrence = $oSearchResults->getOccurrence($key);
47: * }
48: *
49: * @package Core
50: * @subpackage Frontend_Search
51: *
52: */
53: class cSearchResult extends cSearchBaseAbstract {
54:
55: /**
56: * Instance of class Index
57: *
58: * @var object
59: */
60: protected $_index;
61:
62: /**
63: * Number of results
64: *
65: * @var int
66: */
67: protected $_results;
68:
69: /**
70: * Number of result pages
71: *
72: * @var int
73: */
74: protected $_pages;
75:
76: /**
77: * Current result page
78: *
79: * @var int
80: */
81: protected $_resultPage;
82:
83: /**
84: * Results per page to display
85: *
86: * @var int
87: */
88: protected $_resultPerPage;
89:
90: /**
91: * Array of html-tags to emphasize the searchwords
92: *
93: * @var array
94: */
95: protected $_replacement = array();
96:
97: /**
98: * Array of article id's with ranking information
99: *
100: * @var array
101: */
102: protected $_rankStructure = array();
103:
104: /**
105: * Array of result-pages with array's of article id's
106: *
107: * @var array
108: */
109: protected $_orderedSearchResult = array();
110:
111: /**
112: * Array of article id's with information about cms-types, occurence of
113: * keyword/searchword, similarity .
114: *
115: *
116: *
117: *
118: * @var array
119: */
120: protected $_searchResult = array();
121:
122: /**
123: * Compute ranking factor for each search result and order the search
124: * results by ranking factor
125: * NOTE: The ranking factor is the sum of occurences of matching searchterms
126: * weighted by similarity (in %) between searchword
127: * and matching word in the article.
128: * TODO: One can think of more sophisticated ranking strategies. One could
129: * use the content type information for example
130: * because a matching word in the headline (CMS_HEADLINE[1]) could be
131: * weighted more than a matching word in the text (CMS_HTML[1]).
132: *
133: * @param array $search_result List of article ids
134: * @param int $result_per_page Number of items per page
135: * @param cDb $oDB Optional db instance
136: * @param bool $bDebug Optional flag to enable debugging
137: */
138: public function __construct($search_result, $result_per_page, $oDB = NULL, $bDebug = false) {
139: parent::__construct($oDB, $bDebug);
140:
141: $this->_index = new cSearchIndex($oDB);
142:
143: $this->_searchResult = $search_result;
144: $this->_debug('$this->search_result', $this->_searchResult);
145:
146: $this->_resultPerPage = $result_per_page;
147: $this->_results = count($this->_searchResult);
148:
149: // compute ranking factor for each search result
150: foreach ($this->_searchResult as $article => $val) {
151: $this->_rankStructure[$article] = $this->getOccurrence($article) * ($this->getSimilarity($article) / 100);
152: }
153: $this->_debug('$this->rank_structure', $this->_rankStructure);
154:
155: $this->setOrderedSearchResult($this->_rankStructure, $this->_resultPerPage);
156: $this->_pages = count($this->_orderedSearchResult);
157: $this->_debug('$this->ordered_search_result', $this->_orderedSearchResult);
158: }
159:
160: /**
161: *
162: * @param array $ranked_search
163: * @param int $result_per_page
164: */
165: public function setOrderedSearchResult($ranked_search, $result_per_page) {
166: asort($ranked_search);
167:
168: $sorted_rank = array_reverse($ranked_search, true);
169:
170: if (isset($result_per_page) && $result_per_page > 0) {
171: $split_result = array_chunk($sorted_rank, $result_per_page, true);
172: $this->_orderedSearchResult = $split_result;
173: } else {
174: $this->_orderedSearchResult[] = $sorted_rank;
175: }
176: }
177:
178: /**
179: *
180: * @param int $art_id Id of an article
181: * @param string $cms_type
182: * @param int $id
183: * @return string Content of an article, specified by it's content type
184: */
185: public function getContent($art_id, $cms_type, $id = 0) {
186: $article = new cApiArticleLanguage();
187: $article->loadByArticleAndLanguageId($art_id, $this->lang, true);
188: return $article->getContent($cms_type, $id);
189: }
190:
191: /**
192: *
193: * @param int $art_id Id of an article
194: * @param string $cms_type Content type
195: * @param int $cms_nr
196: * @return string Content of an article in search result, specified by its
197: * type
198: */
199: public function getSearchContent($art_id, $cms_type, $cms_nr = NULL) {
200: $cms_type = strtoupper($cms_type);
201: if (strlen($cms_type) > 0) {
202: if (!stristr($cms_type, 'cms_')) {
203: if (in_array($cms_type, $this->_index->getCmsTypeSuffix())) {
204: $cms_type = 'CMS_' . $cms_type;
205: }
206: } else {
207: if (!array_key_exists($cms_type, $this->_index->getCmsType())) {
208: return array();
209: }
210: }
211: }
212:
213: $article = new cApiArticleLanguage();
214: $article->loadByArticleAndLanguageId($art_id, $this->lang, true);
215: $content = array();
216: if (isset($this->_searchResult[$art_id][$cms_type])) {
217: // if searchword occurs in cms_type
218: $search_words = $this->_searchResult[$art_id]['search'];
219: $search_words = array_unique($search_words);
220:
221: $id_type = $this->_searchResult[$art_id][$cms_type];
222: $id_type = array_unique($id_type);
223:
224: if (isset($cms_nr) && is_numeric($cms_nr)) {
225: // get content of cms_type[cms_nr]
226: // build consistent escaped string(Timo Trautmann) 2008-04-17
227: $cms_content = conHtmlentities(conHtmlEntityDecode(strip_tags($article->getContent($cms_type, $cms_nr))));
228: if (count($this->_replacement) == 2) {
229: foreach ($search_words as $word) {
230: // build consistent escaped string, replace ae ue ..
231: // with original html entities (Timo Trautmann)
232: // 2008-04-17
233: $word = conHtmlentities(conHtmlEntityDecode($this->_index->addSpecialUmlauts($word)));
234: $match = array();
235: preg_match("/$word/i", $cms_content, $match);
236: if (isset($match[0])) {
237: $pattern = $match[0];
238: $replacement = $this->_replacement[0] . $pattern . $this->_replacement[1];
239: $cms_content = preg_replace("/$pattern/i", $replacement, $cms_content); // emphasize
240: // located
241: // searchwords
242: }
243: }
244: }
245: $content[] = htmlspecialchars_decode($cms_content);
246: } else {
247: // get content of cms_type[$id], where $id are the cms_type
248: // numbers found in search
249: foreach ($id_type as $id) {
250: $cms_content = strip_tags($article->getContent($cms_type, $id));
251:
252: if (count($this->_replacement) == 2) {
253: foreach ($search_words as $word) {
254: preg_match("/$word/i", $cms_content, $match);
255: if (isset($match[0])) {
256: $pattern = $match[0];
257: $replacement = $this->_replacement[0] . $pattern . $this->_replacement[1];
258: $cms_content = preg_replace("/$pattern/i", $replacement, $cms_content); // emphasize
259: // located
260: // searchwords
261: }
262: }
263: }
264: $content[] = $cms_content;
265: }
266: }
267: } else {
268: // searchword was not found in cms_type
269: if (isset($cms_nr) && is_numeric($cms_nr)) {
270: $content[] = strip_tags($article->getContent($cms_type, $cms_nr));
271: } else {
272: $art_content = $article->getContent($cms_type);
273: if (count($art_content) > 0) {
274: foreach ($art_content as $val) {
275: $content[] = strip_tags($val);
276: }
277: }
278: }
279: }
280: return $content;
281: }
282:
283: /**
284: * Returns articles in page.
285: *
286: * @param int $page_id
287: * @return array Articles in page $page_id
288: */
289: public function getSearchResultPage($page_id) {
290: $this->_resultPage = $page_id;
291: $result_page = $this->_orderedSearchResult[$page_id - 1];
292: return $result_page;
293: }
294:
295: /**
296: * Returns number of result pages
297: *
298: * @return int
299: */
300: public function getNumberOfPages() {
301: return $this->_pages;
302: }
303:
304: /**
305: * Returns number of results
306: *
307: * @return int
308: */
309: public function getNumberOfResults() {
310: return $this->_results;
311: }
312:
313: /**
314: *
315: * @param int $art_id Id of an article
316: * @return int Similarity between searchword and matching word in article
317: */
318: public function getSimilarity($art_id) {
319: return $this->_searchResult[$art_id]['similarity'];
320: }
321:
322: /**
323: *
324: * @param int $art_id Id of an article
325: * @return number of matching searchwords found in article
326: */
327: public function getOccurrence($art_id) {
328: $aOccurence = $this->_searchResult[$art_id]['occurence'];
329: $iSumOfOccurence = 0;
330: for ($i = 0; $i < count($aOccurence); $i++) {
331: $iSumOfOccurence += $aOccurence[$i];
332: }
333:
334: return $iSumOfOccurence;
335: }
336:
337: /**
338: *
339: * @param string $rep1 The opening html-tag to emphasize the searchword e.g.
340: * '<b>'
341: * @param string $rep2 The closing html-tag e.g. '</b>'
342: */
343: public function setReplacement($rep1, $rep2) {
344: if (strlen(trim($rep1)) > 0 && strlen(trim($rep2)) > 0) {
345: $this->_replacement[] = $rep1;
346: $this->_replacement[] = $rep2;
347: }
348: }
349:
350: /**
351: *
352: * @param int $artid
353: * @return int Category Id
354: * @todo Is not job of search, should be outsourced!
355: */
356: public function getArtCat($artid) {
357: $sql = "SELECT idcat FROM " . $this->cfg['tab']['cat_art'] . "
358: WHERE idart = " . cSecurity::toInteger($artid) . " ";
359: $this->db->query($sql);
360: if ($this->db->nextRecord()) {
361: return $this->db->f('idcat');
362: }
363: }
364: }
365:
366: