Overview

Packages

  • Core
    • Authentication
    • Backend
    • Cache
    • CEC
    • Chain
    • ContentType
    • Database
    • Datatype
    • Debug
    • Exception
    • Frontend
      • Search
      • URI
      • Util
    • GenericDB
      • Model
    • GUI
      • HTML
    • I18N
    • LayoutHandler
    • Log
    • Security
    • Session
    • Util
    • Validation
    • Versioning
    • XML
  • Module
    • ContentSitemapHtml
    • ContentSitemapXml
    • ContentUserForum
    • NavigationTop
  • mpAutoloaderClassMap
  • None
  • Plugin
    • ContentAllocation
    • CronjobOverview
    • FormAssistant
    • FrontendLogic
    • FrontendUsers
    • Linkchecker
    • ModRewrite
    • Newsletter
    • Repository
      • FrontendNavigation
      • KeywordDensity
    • SearchSolr
    • SmartyWrapper
    • UrlShortener
    • UserForum
    • Workflow
  • PluginManager
  • Setup
    • Form
    • GUI
    • Helper
      • Environment
      • Filesystem
      • MySQL
      • PHP
    • UpgradeJob

Classes

  • cSearch
  • cSearchBaseAbstract
  • cSearchIndex
  • cSearchResult
  • Overview
  • Package
  • Class
  • Tree
  • Deprecated
  • Todo
  1: <?php
  2: /**
  3:  * This file contains the class for content search results.
  4:  *
  5:  * @package Core
  6:  * @subpackage Frontend_Search
  7:  * @version SVN Revision $Rev:$
  8:  *
  9:  * @author Willi Man
 10:  * @copyright four for business AG <www.4fb.de>
 11:  * @license http://www.contenido.org/license/LIZENZ.txt
 12:  * @link http://www.4fb.de
 13:  * @link http://www.contenido.org
 14:  */
 15: defined('CON_FRAMEWORK') || die('Illegal call: Missing framework initialization - request aborted.');
 16: 
 17: cInclude('includes', 'functions.encoding.php');
 18: 
 19: 
 20: /**
 21:  * CONTENIDO API - SearchResult Object
 22:  *
 23:  * This object ranks and displays the result of the indexed fulltext search.
 24:  * If you are not comfortable with this API feel free to use your own methods to
 25:  * display the search results.
 26:  * The search result is basically an array with article ID's.
 27:  *
 28:  * If $search_result = $search->searchIndex($searchword, $searchwordex);
 29:  *
 30:  * use object with
 31:  *
 32:  * $oSearchResults = new cSearchResult($search_result, 10);
 33:  *
 34:  * $oSearchResults->setReplacement('<span style="color:red">', '</span>'); //
 35:  * html-tags to emphasize the located searchwords
 36:  *
 37:  * $num_res = $oSearchResults->getNumberOfResults();
 38:  * $num_pages = $oSearchResults->getNumberOfPages();
 39:  * $res_page = $oSearchResults->getSearchResultPage(1); // first result page
 40:  * foreach ($res_page as $key => $val) {
 41:  * $headline = $oSearchResults->getSearchContent($key, 'HTMLHEAD');
 42:  * $first_headline = $headline[0];
 43:  * $text = $oSearchResults->getSearchContent($key, 'HTML');
 44:  * $first_text = $text[0];
 45:  * $similarity = $oSearchResults->getSimilarity($key);
 46:  * $iOccurrence = $oSearchResults->getOccurrence($key);
 47:  * }
 48:  *
 49:  * @package Core
 50:  * @subpackage Frontend_Search
 51:  *
 52:  */
 53: class cSearchResult extends cSearchBaseAbstract {
 54: 
 55:     /**
 56:      * Instance of class Index
 57:      *
 58:      * @var object
 59:      */
 60:     protected $_index;
 61: 
 62:     /**
 63:      * Number of results
 64:      *
 65:      * @var int
 66:      */
 67:     protected $_results;
 68: 
 69:     /**
 70:      * Number of result pages
 71:      *
 72:      * @var int
 73:      */
 74:     protected $_pages;
 75: 
 76:     /**
 77:      * Current result page
 78:      *
 79:      * @var int
 80:      */
 81:     protected $_resultPage;
 82: 
 83:     /**
 84:      * Results per page to display
 85:      *
 86:      * @var int
 87:      */
 88:     protected $_resultPerPage;
 89: 
 90:     /**
 91:      * Array of html-tags to emphasize the searchwords
 92:      *
 93:      * @var array
 94:      */
 95:     protected $_replacement = array();
 96: 
 97:     /**
 98:      * Array of article id's with ranking information
 99:      *
100:      * @var array
101:      */
102:     protected $_rankStructure = array();
103: 
104:     /**
105:      * Array of result-pages with array's of article id's
106:      *
107:      * @var array
108:      */
109:     protected $_orderedSearchResult = array();
110: 
111:     /**
112:      * Array of article id's with information about cms-types, occurence of
113:      * keyword/searchword, similarity .
114:      *
115:      *
116:      *
117:      *
118:      * @var array
119:      */
120:     protected $_searchResult = array();
121: 
122:     /**
123:      * Compute ranking factor for each search result and order the search
124:      * results by ranking factor
125:      * NOTE: The ranking factor is the sum of occurences of matching searchterms
126:      * weighted by similarity (in %) between searchword
127:      * and matching word in the article.
128:      * TODO: One can think of more sophisticated ranking strategies. One could
129:      * use the content type information for example
130:      * because a matching word in the headline (CMS_HEADLINE[1]) could be
131:      * weighted more than a matching word in the text (CMS_HTML[1]).
132:      *
133:      * @param array $search_result List of article ids
134:      * @param int $result_per_page Number of items per page
135:      * @param cDb $oDB Optional db instance
136:      * @param bool $bDebug Optional flag to enable debugging
137:      */
138:     public function __construct($search_result, $result_per_page, $oDB = NULL, $bDebug = false) {
139:         parent::__construct($oDB, $bDebug);
140: 
141:         $this->_index = new cSearchIndex($oDB);
142: 
143:         $this->_searchResult = $search_result;
144:         $this->_debug('$this->search_result', $this->_searchResult);
145: 
146:         $this->_resultPerPage = $result_per_page;
147:         $this->_results = count($this->_searchResult);
148: 
149:         // compute ranking factor for each search result
150:         foreach ($this->_searchResult as $article => $val) {
151:             $this->_rankStructure[$article] = $this->getOccurrence($article) * ($this->getSimilarity($article) / 100);
152:         }
153:         $this->_debug('$this->rank_structure', $this->_rankStructure);
154: 
155:         $this->setOrderedSearchResult($this->_rankStructure, $this->_resultPerPage);
156:         $this->_pages = count($this->_orderedSearchResult);
157:         $this->_debug('$this->ordered_search_result', $this->_orderedSearchResult);
158:     }
159: 
160:     /**
161:      *
162:      * @param array $ranked_search
163:      * @param int $result_per_page
164:      */
165:     public function setOrderedSearchResult($ranked_search, $result_per_page) {
166:         asort($ranked_search);
167: 
168:         $sorted_rank = array_reverse($ranked_search, true);
169: 
170:         if (isset($result_per_page) && $result_per_page > 0) {
171:             $split_result = array_chunk($sorted_rank, $result_per_page, true);
172:             $this->_orderedSearchResult = $split_result;
173:         } else {
174:             $this->_orderedSearchResult[] = $sorted_rank;
175:         }
176:     }
177: 
178:     /**
179:      *
180:      * @param int $art_id Id of an article
181:      * @param string $cms_type
182:      * @param int $id
183:      * @return string Content of an article, specified by it's content type
184:      */
185:     public function getContent($art_id, $cms_type, $id = 0) {
186:         $article = new cApiArticleLanguage();
187:         $article->loadByArticleAndLanguageId($art_id, $this->lang, true);
188:         return $article->getContent($cms_type, $id);
189:     }
190: 
191:     /**
192:      *
193:      * @param int $art_id Id of an article
194:      * @param string $cms_type Content type
195:      * @param int $cms_nr
196:      * @return string Content of an article in search result, specified by its
197:      *         type
198:      */
199:     public function getSearchContent($art_id, $cms_type, $cms_nr = NULL) {
200:         $cms_type = strtoupper($cms_type);
201:         if (strlen($cms_type) > 0) {
202:             if (!stristr($cms_type, 'cms_')) {
203:                 if (in_array($cms_type, $this->_index->getCmsTypeSuffix())) {
204:                     $cms_type = 'CMS_' . $cms_type;
205:                 }
206:             } else {
207:                 if (!array_key_exists($cms_type, $this->_index->getCmsType())) {
208:                     return array();
209:                 }
210:             }
211:         }
212: 
213:         $article = new cApiArticleLanguage();
214:         $article->loadByArticleAndLanguageId($art_id, $this->lang, true);
215:         $content = array();
216:         if (isset($this->_searchResult[$art_id][$cms_type])) {
217:             // if searchword occurs in cms_type
218:             $search_words = $this->_searchResult[$art_id]['search'];
219:             $search_words = array_unique($search_words);
220: 
221:             $id_type = $this->_searchResult[$art_id][$cms_type];
222:             $id_type = array_unique($id_type);
223: 
224:             if (isset($cms_nr) && is_numeric($cms_nr)) {
225:                 // get content of cms_type[cms_nr]
226:                 // build consistent escaped string(Timo Trautmann) 2008-04-17
227:                 $cms_content = conHtmlentities(conHtmlEntityDecode(strip_tags($article->getContent($cms_type, $cms_nr))));
228:                 if (count($this->_replacement) == 2) {
229:                     foreach ($search_words as $word) {
230:                         // build consistent escaped string, replace ae ue ..
231:                         // with original html entities (Timo Trautmann)
232:                         // 2008-04-17
233:                         $word = conHtmlentities(conHtmlEntityDecode($this->_index->addSpecialUmlauts($word)));
234:                         $match = array();
235:                         preg_match("/$word/i", $cms_content, $match);
236:                         if (isset($match[0])) {
237:                             $pattern = $match[0];
238:                             $replacement = $this->_replacement[0] . $pattern . $this->_replacement[1];
239:                             $cms_content = preg_replace("/$pattern/i", $replacement, $cms_content); // emphasize
240:                             // located
241:                             // searchwords
242:                         }
243:                     }
244:                 }
245:                 $content[] = htmlspecialchars_decode($cms_content);
246:             } else {
247:                 // get content of cms_type[$id], where $id are the cms_type
248:                 // numbers found in search
249:                 foreach ($id_type as $id) {
250:                     $cms_content = strip_tags($article->getContent($cms_type, $id));
251: 
252:                     if (count($this->_replacement) == 2) {
253:                         foreach ($search_words as $word) {
254:                             preg_match("/$word/i", $cms_content, $match);
255:                             if (isset($match[0])) {
256:                                 $pattern = $match[0];
257:                                 $replacement = $this->_replacement[0] . $pattern . $this->_replacement[1];
258:                                 $cms_content = preg_replace("/$pattern/i", $replacement, $cms_content); // emphasize
259:                                 // located
260:                                 // searchwords
261:                             }
262:                         }
263:                     }
264:                     $content[] = $cms_content;
265:                 }
266:             }
267:         } else {
268:             // searchword was not found in cms_type
269:             if (isset($cms_nr) && is_numeric($cms_nr)) {
270:                 $content[] = strip_tags($article->getContent($cms_type, $cms_nr));
271:             } else {
272:                 $art_content = $article->getContent($cms_type);
273:                 if (count($art_content) > 0) {
274:                     foreach ($art_content as $val) {
275:                         $content[] = strip_tags($val);
276:                     }
277:                 }
278:             }
279:         }
280:         return $content;
281:     }
282: 
283:     /**
284:      * Returns articles in page.
285:      *
286:      * @param int $page_id
287:      * @return array Articles in page $page_id
288:      */
289:     public function getSearchResultPage($page_id) {
290:         $this->_resultPage = $page_id;
291:         $result_page = $this->_orderedSearchResult[$page_id - 1];
292:         return $result_page;
293:     }
294: 
295:     /**
296:      * Returns number of result pages
297:      *
298:      * @return int
299:      */
300:     public function getNumberOfPages() {
301:         return $this->_pages;
302:     }
303: 
304:     /**
305:      * Returns number of results
306:      *
307:      * @return int
308:      */
309:     public function getNumberOfResults() {
310:         return $this->_results;
311:     }
312: 
313:     /**
314:      *
315:      * @param int $art_id Id of an article
316:      * @return int Similarity between searchword and matching word in article
317:      */
318:     public function getSimilarity($art_id) {
319:         return $this->_searchResult[$art_id]['similarity'];
320:     }
321: 
322:     /**
323:      *
324:      * @param int $art_id Id of an article
325:      * @return number of matching searchwords found in article
326:      */
327:     public function getOccurrence($art_id) {
328:         $aOccurence = $this->_searchResult[$art_id]['occurence'];
329:         $iSumOfOccurence = 0;
330:         for ($i = 0; $i < count($aOccurence); $i++) {
331:             $iSumOfOccurence += $aOccurence[$i];
332:         }
333: 
334:         return $iSumOfOccurence;
335:     }
336: 
337:     /**
338:      *
339:      * @param string $rep1 The opening html-tag to emphasize the searchword e.g.
340:      *        '<b>'
341:      * @param string $rep2 The closing html-tag e.g. '</b>'
342:      */
343:     public function setReplacement($rep1, $rep2) {
344:         if (strlen(trim($rep1)) > 0 && strlen(trim($rep2)) > 0) {
345:             $this->_replacement[] = $rep1;
346:             $this->_replacement[] = $rep2;
347:         }
348:     }
349: 
350:     /**
351:      *
352:      * @param int $artid
353:      * @return int Category Id
354:      * @todo Is not job of search, should be outsourced!
355:      */
356:     public function getArtCat($artid) {
357:         $sql = "SELECT idcat FROM " . $this->cfg['tab']['cat_art'] . "
358:                 WHERE idart = " . cSecurity::toInteger($artid) . " ";
359:         $this->db->query($sql);
360:         if ($this->db->nextRecord()) {
361:             return $this->db->f('idcat');
362:         }
363:     }
364: }
365: 
366: 
CMS CONTENIDO 4.9.5 API documentation generated by ApiGen 2.8.0