Overview

Packages

  • CONTENIDO
  • Core
    • Authentication
    • Backend
    • Cache
    • CEC
    • Chain
    • ContentType
    • Database
    • Debug
    • Exception
    • Frontend
      • Search
      • URI
      • Util
    • GenericDB
      • Model
    • GUI
      • HTML
    • I18N
    • LayoutHandler
    • Log
    • Security
    • Session
    • Util
    • Validation
    • Versioning
    • XML
  • Module
    • ContentRssCreator
    • ContentSitemapHtml
    • ContentSitemapXml
    • ContentUserForum
    • NavigationTop
    • ScriptCookieDirective
  • mpAutoloaderClassMap
  • None
  • Plugin
    • ContentAllocation
    • CronjobOverview
    • FormAssistant
    • FrontendLogic
    • FrontendUsers
    • Linkchecker
    • ModRewrite
    • Newsletter
    • Repository
      • FrontendNavigation
      • KeywordDensity
    • SearchSolr
    • SmartyWrapper
    • UrlShortener
    • UserForum
    • Workflow
  • PluginManager
  • Setup
    • Form
    • GUI
    • Helper
      • Environment
      • Filesystem
      • MySQL
      • PHP
    • UpgradeJob

Classes

  • cSearch
  • cSearchBaseAbstract
  • cSearchIndex
  • cSearchResult
  • Overview
  • Package
  • Class
  • Tree
  • Deprecated
  • Todo
  1: <?php
  2: 
  3: /**
  4:  * This file contains the class for content search results.
  5:  *
  6:  * @package Core
  7:  * @subpackage Frontend_Search
  8:  * @version SVN Revision $Rev:$
  9:  *
 10:  * @author Willi Man
 11:  * @copyright four for business AG <www.4fb.de>
 12:  * @license http://www.contenido.org/license/LIZENZ.txt
 13:  * @link http://www.4fb.de
 14:  * @link http://www.contenido.org
 15:  */
 16: 
 17: defined('CON_FRAMEWORK') || die('Illegal call: Missing framework initialization - request aborted.');
 18: 
 19: cInclude('includes', 'functions.encoding.php');
 20: 
 21: 
 22: /**
 23:  * CONTENIDO API - SearchResult Object
 24:  *
 25:  * This object ranks and displays the result of the indexed fulltext search.
 26:  * If you are not comfortable with this API feel free to use your own methods to
 27:  * display the search results.
 28:  * The search result is basically an array with article ID's.
 29:  *
 30:  * If $search_result = $search->searchIndex($searchword, $searchwordex);
 31:  *
 32:  * use object with
 33:  *
 34:  * $oSearchResults = new cSearchResult($search_result, 10);
 35:  *
 36:  * $oSearchResults->setReplacement('<span style="color:red">', '</span>'); //
 37:  * html-tags to emphasize the located searchwords
 38:  *
 39:  * $num_res = $oSearchResults->getNumberOfResults();
 40:  * $num_pages = $oSearchResults->getNumberOfPages();
 41:  * $res_page = $oSearchResults->getSearchResultPage(1); // first result page
 42:  * foreach ($res_page as $key => $val) {
 43:  * $headline = $oSearchResults->getSearchContent($key, 'HTMLHEAD');
 44:  * $first_headline = $headline[0];
 45:  * $text = $oSearchResults->getSearchContent($key, 'HTML');
 46:  * $first_text = $text[0];
 47:  * $similarity = $oSearchResults->getSimilarity($key);
 48:  * $iOccurrence = $oSearchResults->getOccurrence($key);
 49:  * }
 50:  *
 51:  * @package Core
 52:  * @subpackage Frontend_Search
 53:  *
 54:  */
 55: class cSearchResult extends cSearchBaseAbstract {
 56: 
 57:     /**
 58:      * Instance of class Index
 59:      *
 60:      * @var object
 61:      */
 62:     protected $_index;
 63: 
 64:     /**
 65:      * Number of results
 66:      *
 67:      * @var int
 68:      */
 69:     protected $_results;
 70: 
 71:     /**
 72:      * Number of result pages
 73:      *
 74:      * @var int
 75:      */
 76:     protected $_pages;
 77: 
 78:     /**
 79:      * Current result page
 80:      *
 81:      * @var int
 82:      */
 83:     protected $_resultPage;
 84: 
 85:     /**
 86:      * Results per page to display
 87:      *
 88:      * @var int
 89:      */
 90:     protected $_resultPerPage;
 91: 
 92:     /**
 93:      * Array of html-tags to emphasize the searchwords
 94:      *
 95:      * @var array
 96:      */
 97:     protected $_replacement = array();
 98: 
 99:     /**
100:      * Array of article id's with ranking information
101:      *
102:      * @var array
103:      */
104:     protected $_rankStructure = array();
105: 
106:     /**
107:      * Array of result-pages with array's of article id's
108:      *
109:      * @var array
110:      */
111:     protected $_orderedSearchResult = array();
112: 
113:     /**
114:      * Array of article id's with information about cms-types, occurence of
115:      * keyword/searchword, similarity .
116:      *
117:      *
118:      *
119:      *
120:      * @var array
121:      */
122:     protected $_searchResult = array();
123: 
124:     /**
125:      * Compute ranking factor for each search result and order the search
126:      * results by ranking factor
127:      * NOTE: The ranking factor is the sum of occurences of matching searchterms
128:      * weighted by similarity (in %) between searchword
129:      * and matching word in the article.
130:      * TODO: One can think of more sophisticated ranking strategies. One could
131:      * use the content type information for example
132:      * because a matching word in the headline (CMS_HEADLINE[1]) could be
133:      * weighted more than a matching word in the text (CMS_HTML[1]).
134:      *
135:      * @param array $search_result
136:      *         List of article ids
137:      * @param int $result_per_page
138:      *         Number of items per page
139:      * @param cDb $oDB [optional]
140:      *         db instance
141:      * @param bool $bDebug [optional]
142:      *         Optional flag to enable debugging
143:      */
144:     public function __construct($search_result, $result_per_page, $oDB = NULL, $bDebug = false) {
145:         parent::__construct($oDB, $bDebug);
146: 
147:         $this->_index = new cSearchIndex($oDB);
148: 
149:         $this->_searchResult = $search_result;
150:         $this->_debug('$this->search_result', $this->_searchResult);
151: 
152:         $this->_resultPerPage = $result_per_page;
153:         $this->_results = count($this->_searchResult);
154: 
155:         // compute ranking factor for each search result
156:         foreach ($this->_searchResult as $article => $val) {
157:             $this->_rankStructure[$article] = $this->getOccurrence($article) * ($this->getSimilarity($article) / 100);
158:         }
159:         $this->_debug('$this->rank_structure', $this->_rankStructure);
160: 
161:         $this->setOrderedSearchResult($this->_rankStructure, $this->_resultPerPage);
162:         $this->_pages = count($this->_orderedSearchResult);
163:         $this->_debug('$this->ordered_search_result', $this->_orderedSearchResult);
164:     }
165: 
166:     /**
167:      *
168:      * @param array $ranked_search
169:      * @param int $result_per_page
170:      */
171:     public function setOrderedSearchResult($ranked_search, $result_per_page) {
172:         asort($ranked_search);
173: 
174:         $sorted_rank = array_reverse($ranked_search, true);
175: 
176:         if (isset($result_per_page) && $result_per_page > 0) {
177:             $split_result = array_chunk($sorted_rank, $result_per_page, true);
178:             $this->_orderedSearchResult = $split_result;
179:         } else {
180:             $this->_orderedSearchResult[] = $sorted_rank;
181:         }
182:     }
183: 
184:     /**
185:      *
186:      * @param int $art_id
187:      *         Id of an article
188:      * @param string $cms_type
189:      * @param int $id [optional]
190:      * @return string
191:      *         Content of an article, specified by it's content type
192:      */
193:     public function getContent($art_id, $cms_type, $id = 0) {
194:         $article = new cApiArticleLanguage();
195:         $article->loadByArticleAndLanguageId($art_id, $this->lang, true);
196:         return $article->getContent($cms_type, $id);
197:     }
198: 
199:     /**
200:      *
201:      * @param int $art_id
202:      *         Id of an article
203:      * @param string $cms_type
204:      *         Content type
205:      * @param int $cms_nr [optional]
206:      * @return string
207:      *         Content of an article in search result, specified by its type
208:      */
209:     public function getSearchContent($art_id, $cms_type, $cms_nr = NULL) {
210:         $cms_type = strtoupper($cms_type);
211:         if (strlen($cms_type) > 0) {
212:             if (!stristr($cms_type, 'cms_')) {
213:                 if (in_array($cms_type, $this->_index->getCmsTypeSuffix())) {
214:                     $cms_type = 'CMS_' . $cms_type;
215:                 }
216:             } else {
217:                 if (!array_key_exists($cms_type, $this->_index->getCmsType())) {
218:                     return array();
219:                 }
220:             }
221:         }
222: 
223:         $article = new cApiArticleLanguage();
224:         $article->loadByArticleAndLanguageId($art_id, $this->lang, true);
225:         $content = array();
226:         if (isset($this->_searchResult[$art_id][$cms_type])) {
227:             // if searchword occurs in cms_type
228:             $search_words = $this->_searchResult[$art_id]['search'];
229:             $search_words = array_unique($search_words);
230: 
231:             $id_type = $this->_searchResult[$art_id][$cms_type];
232:             $id_type = array_unique($id_type);
233: 
234:             if (isset($cms_nr) && is_numeric($cms_nr)) {
235:                 // get content of cms_type[cms_nr]
236:                 // build consistent escaped string(Timo Trautmann) 2008-04-17
237:                 $cms_content = conHtmlentities(conHtmlEntityDecode(strip_tags($article->getContent($cms_type, $cms_nr))));
238:                 if (count($this->_replacement) == 2) {
239:                     foreach ($search_words as $word) {
240:                         // build consistent escaped string, replace ae ue ..
241:                         // with original html entities (Timo Trautmann)
242:                         // 2008-04-17
243:                         $word = conHtmlentities(conHtmlEntityDecode($this->_index->addSpecialUmlauts($word)));
244:                         $match = array();
245:                         preg_match("/$word/i", $cms_content, $match);
246:                         if (isset($match[0])) {
247:                             $pattern = $match[0];
248:                             $replacement = $this->_replacement[0] . $pattern . $this->_replacement[1];
249:                             $cms_content = preg_replace("/$pattern/i", $replacement, $cms_content); // emphasize
250:                             // located
251:                             // searchwords
252:                         }
253:                     }
254:                 }
255:                 $content[] = htmlspecialchars_decode($cms_content);
256:             } else {
257:                 // get content of cms_type[$id], where $id are the cms_type
258:                 // numbers found in search
259:                 foreach ($id_type as $id) {
260:                     $cms_content = strip_tags($article->getContent($cms_type, $id));
261: 
262:                     if (count($this->_replacement) == 2) {
263:                         foreach ($search_words as $word) {
264:                             preg_match("/$word/i", $cms_content, $match);
265:                             if (isset($match[0])) {
266:                                 $pattern = $match[0];
267:                                 $replacement = $this->_replacement[0] . $pattern . $this->_replacement[1];
268:                                 $cms_content = preg_replace("/$pattern/i", $replacement, $cms_content); // emphasize
269:                                 // located
270:                                 // searchwords
271:                             }
272:                         }
273:                     }
274:                     $content[] = $cms_content;
275:                 }
276:             }
277:         } else {
278:             // searchword was not found in cms_type
279:             if (isset($cms_nr) && is_numeric($cms_nr)) {
280:                 $content[] = strip_tags($article->getContent($cms_type, $cms_nr));
281:             } else {
282:                 $art_content = $article->getContent($cms_type);
283:                 if (count($art_content) > 0) {
284:                     foreach ($art_content as $val) {
285:                         $content[] = strip_tags($val);
286:                     }
287:                 }
288:             }
289:         }
290:         return $content;
291:     }
292: 
293:     /**
294:      * Returns articles in page.
295:      *
296:      * @param int $page_id
297:      * @return array
298:      *         Articles in page $page_id
299:      */
300:     public function getSearchResultPage($page_id) {
301:         $this->_resultPage = $page_id;
302:         $result_page = $this->_orderedSearchResult[$page_id - 1];
303:         return $result_page;
304:     }
305: 
306:     /**
307:      * Returns number of result pages
308:      *
309:      * @return int
310:      */
311:     public function getNumberOfPages() {
312:         return $this->_pages;
313:     }
314: 
315:     /**
316:      * Returns number of results
317:      *
318:      * @return int
319:      */
320:     public function getNumberOfResults() {
321:         return $this->_results;
322:     }
323: 
324:     /**
325:      *
326:      * @param int $art_id
327:      *         Id of an article
328:      * @return int
329:      *         Similarity between searchword and matching word in article
330:      */
331:     public function getSimilarity($art_id) {
332:         return $this->_searchResult[$art_id]['similarity'];
333:     }
334: 
335:     /**
336:      *
337:      * @param int $art_id
338:      *         Id of an article
339:      * @return int
340:      *         number of matching searchwords found in article
341:      */
342:     public function getOccurrence($art_id) {
343:         $aOccurence = $this->_searchResult[$art_id]['occurence'];
344:         $iSumOfOccurence = 0;
345:         for ($i = 0; $i < count($aOccurence); $i++) {
346:             $iSumOfOccurence += $aOccurence[$i];
347:         }
348: 
349:         return $iSumOfOccurence;
350:     }
351: 
352:     /**
353:      *
354:      * @param string $rep1
355:      *         The opening html-tag to emphasize the searchword e.g. '<b>'
356:      * @param string $rep2
357:      *         The closing html-tag e.g. '</b>'
358:      */
359:     public function setReplacement($rep1, $rep2) {
360:         if (strlen(trim($rep1)) > 0 && strlen(trim($rep2)) > 0) {
361:             $this->_replacement[] = $rep1;
362:             $this->_replacement[] = $rep2;
363:         }
364:     }
365: 
366:     /**
367:      *
368:      * @todo Is not job of search, should be outsourced!
369:      * @param int $artid
370:      * @return int
371:      *         Category Id
372:      */
373:     public function getArtCat($artid) {
374:         $sql = "SELECT idcat FROM " . $this->cfg['tab']['cat_art'] . "
375:                 WHERE idart = " . cSecurity::toInteger($artid) . " ";
376:         $this->db->query($sql);
377:         if ($this->db->nextRecord()) {
378:             return $this->db->f('idcat');
379:         }
380:     }
381: }
382: 
CMS CONTENIDO 4.9.8 API documentation generated by ApiGen 2.8.0