Overview

Packages

  • Core
    • Authentication
    • Backend
    • Cache
    • CEC
    • Chain
    • ContentType
    • Database
    • Datatype
    • Debug
    • Exception
    • Frontend
      • Search
      • URI
      • Util
    • GenericDB
      • Model
    • GUI
      • HTML
    • I18N
    • LayoutHandler
    • Log
    • Security
    • Session
    • Util
    • Validation
    • Versioning
    • XML
  • Module
    • ContentSitemapHtml
    • ContentSitemapXml
    • ContentUserForum
    • NavigationTop
  • mpAutoloaderClassMap
  • None
  • Plugin
    • ContentAllocation
    • CronjobOverview
    • FormAssistant
    • FrontendLogic
    • FrontendUsers
    • Linkchecker
    • ModRewrite
    • Newsletter
    • Repository
      • FrontendNavigation
      • KeywordDensity
    • SearchSolr
    • SmartyWrapper
    • UrlShortener
    • UserForum
    • Workflow
  • PluginManager
  • Setup
    • Form
    • GUI
    • Helper
      • Environment
      • Filesystem
      • MySQL
      • PHP
    • UpgradeJob

Classes

  • cSearch
  • cSearchBaseAbstract
  • cSearchIndex
  • cSearchResult
  • Overview
  • Package
  • Class
  • Tree
  • Deprecated
  • Todo
   1: <?php
   2: /**
   3:  * This file contains various classes for content search.
   4:  * API to index a CONTENIDO article
   5:  * API to search in the index structure
   6:  * API to display the searchresults
   7:  *
   8:  * @package Core
   9:  * @subpackage Frontend_Search
  10:  * @version SVN Revision $Rev:$
  11:  *
  12:  * @author Willi Man
  13:  * @copyright four for business AG <www.4fb.de>
  14:  * @license http://www.contenido.org/license/LIZENZ.txt
  15:  * @link http://www.4fb.de
  16:  * @link http://www.contenido.org
  17:  */
  18: defined('CON_FRAMEWORK') || die('Illegal call: Missing framework initialization - request aborted.');
  19: 
  20: cInclude('includes', 'functions.encoding.php');
  21: 
  22: /**
  23:  * Abstract base search class.
  24:  * Provides general properties and functions
  25:  * for child implementations.
  26:  *
  27:  * @author Murat Purc <murat@purc.de>
  28:  *
  29:  * @package Core
  30:  * @subpackage Frontend_Search
  31:  */
  32: abstract class cSearchBaseAbstract {
  33: 
  34:     /**
  35:      * CONTENIDO database object
  36:      *
  37:      * @var cDb
  38:      */
  39:     protected $oDB;
  40: 
  41:     /**
  42:      * CONTENIDO configuration data
  43:      *
  44:      * @var array
  45:      */
  46:     protected $cfg;
  47: 
  48:     /**
  49:      * Language id of a client
  50:      *
  51:      * @var int
  52:      */
  53:     protected $lang;
  54: 
  55:     /**
  56:      * Client id
  57:      *
  58:      * @var int
  59:      */
  60:     protected $client;
  61: 
  62:     /**
  63:      * Initialises some properties
  64:      *
  65:      * @param cDb $oDB Optional database instance
  66:      * @param bool $bDebug Optional, flag to enable debugging (no longer needed)
  67:      */
  68:     protected function __construct($oDB = NULL, $bDebug = false) {
  69:         global $cfg, $lang, $client;
  70: 
  71:         $this->cfg = $cfg;
  72:         $this->lang = $lang;
  73:         $this->client = $client;
  74: 
  75:         $this->bDebug = $bDebug;
  76: 
  77:         if ($oDB == NULL || !is_object($oDB)) {
  78:             $this->db = cRegistry::getDb();
  79:         } else {
  80:             $this->db = $oDB;
  81:         }
  82:     }
  83: 
  84:     /**
  85:      * Main debug function, prints dumps parameter if debugging is enabled
  86:      *
  87:      * @param string $msg Some text
  88:      * @param mixed $var The variable to dump
  89:      */
  90:     protected function _debug($msg, $var) {
  91:         $dump = $msg . ': ';
  92:         if (is_array($var) || is_object($var)) {
  93:             $dump .= print_r($var, true);
  94:         } else {
  95:             $dump .= $var;
  96:         }
  97:         cDebug::out($dump);
  98:     }
  99: }
 100: 
 101: /**
 102:  * CONTENIDO API - Search Index Object
 103:  *
 104:  * This object creates an index of an article
 105:  *
 106:  * Create object with
 107:  * $oIndex = new SearchIndex($db); # where $db is the global CONTENIDO database
 108:  * object.
 109:  * Start indexing with
 110:  * $oIndex->start($idart, $aContent);
 111:  * where $aContent is the complete content of an article specified by its
 112:  * content types.
 113:  * It looks like
 114:  * Array (
 115:  * [CMS_HTMLHEAD] => Array (
 116:  * [1] => Herzlich Willkommen...
 117:  * [2] => ...auf Ihrer Website!
 118:  * )
 119:  * [CMS_HTML] => Array (
 120:  * [1] => Die Inhalte auf dieser Website ...
 121:  *
 122:  * The index for keyword 'willkommen' would look like '&12=1(CMS_HTMLHEAD-1)'
 123:  * which means the keyword 'willkommen' occurs 1 times in article with articleId
 124:  * 12 and content type CMS_HTMLHEAD[1].
 125:  *
 126:  * TODO: The basic idea of the indexing process is to take the complete content
 127:  * of an article and to generate normalized index terms
 128:  * from the content and to store a specific index structure in the relation
 129:  * 'con_keywords'.
 130:  * To take the complete content is not very flexible. It would be better to
 131:  * differentiate by specific content types or by any content.
 132:  * The &, =, () and - seperated string is not easy to parse to compute the
 133:  * search result set.
 134:  * It would be a better idea (and a lot of work) to extend the relation
 135:  * 'con_keywords' to store keywords by articleId (or content source identifier)
 136:  * and content type.
 137:  * The functions removeSpecialChars, setStopwords, setContentTypes and
 138:  * setCmsOptions should be sourced out into a new helper-class.
 139:  * Keep in mind that class Search and SearchResult uses an instance of object
 140:  * Index.
 141:  *
 142:  * @package Core
 143:  * @subpackage Frontend_Search
 144:  */
 145: class cSearchIndex extends cSearchBaseAbstract {
 146: 
 147:     /**
 148:      * the content of the cms-types of an article
 149:      *
 150:      * @var array
 151:      */
 152:     protected $_keycode = array();
 153: 
 154:     /**
 155:      * the list of keywords of an article
 156:      *
 157:      * @var array
 158:      */
 159:     protected $_keywords = array();
 160: 
 161:     /**
 162:      * the words, which should not be indexed
 163:      *
 164:      * @var array
 165:      */
 166:     protected $_stopwords = array();
 167: 
 168:     /**
 169:      * the keywords of an article stored in the DB
 170:      *
 171:      * @var array
 172:      */
 173:     protected $_keywordsOld = array();
 174: 
 175:     /**
 176:      * the keywords to be deleted
 177:      *
 178:      * @var array
 179:      */
 180:     protected $_keywordsDel = array();
 181: 
 182:     /**
 183:      * 'auto' or 'self'
 184:      * The field 'auto' in table con_keywords is used for automatic indexing.
 185:      * The value is a string like "&12=2(CMS_HTMLHEAD-1,CMS_HTML-1)", which
 186:      * means a keyword occurs 2 times in article with $idart 12
 187:      * and can be found in CMS_HTMLHEAD[1] and CMS_HTML[1].
 188:      * The field 'self' can be used in the article properties to index the
 189:      * article manually.
 190:      *
 191:      * @var string
 192:      */
 193:     protected $_place;
 194: 
 195:     /**
 196:      * array of cms types
 197:      *
 198:      * @var array
 199:      */
 200:     protected $_cmsOptions = array();
 201: 
 202:     /**
 203:      * array of all available cms types
 204:      *
 205:      * htmlhead - HTML Headline
 206:      * html - HTML Text
 207:      * head - Headline (no HTML)
 208:      * text - Text (no HTML)
 209:      * img - Upload id of the element
 210:      * imgdescr - Image description
 211:      * link - Link (URL)
 212:      * linktarget - Linktarget (_self, _blank, _top ...)
 213:      * linkdescr - Linkdescription
 214:      * swf - Upload id of the element
 215:      * etc.
 216:      *
 217:      * @var array
 218:      */
 219:     protected $_cmsType = array();
 220: 
 221:     /**
 222:      * the suffix of all available cms types
 223:      *
 224:      * @var array
 225:      */
 226:     protected $_cmsTypeSuffix = array();
 227: 
 228:     /**
 229:      * Constructor, set object properties
 230:      *
 231:      * @param cDb $db CONTENIDO Database object
 232:      */
 233:     public function __construct($db = NULL) {
 234:         parent::__construct($db);
 235: 
 236:         $this->setContentTypes();
 237:     }
 238: 
 239:     /**
 240:      * Start indexing the article.
 241:      *
 242:      * @param int $idart Article Id
 243:      * @param array $aContent The complete content of an article specified by
 244:      *        its content types.
 245:      *        It looks like
 246:      *        Array (
 247:      *        [CMS_HTMLHEAD] => Array (
 248:      *        [1] => Herzlich Willkommen...
 249:      *        [2] => ...auf Ihrer Website!
 250:      *        )
 251:      *        [CMS_HTML] => Array (
 252:      *        [1] => Die Inhalte auf dieser Website ...
 253:      *
 254:      * @param string $place The field where to store the index information in
 255:      *        db.
 256:      * @param array $cms_options One can specify explicitly cms types which
 257:      *        should not be indexed.
 258:      * @param array $aStopwords Array with words which should not be indexed.
 259:      */
 260:     public function start($idart, $aContent, $place = 'auto', $cms_options = array(), $aStopwords = array()) {
 261:         if (!is_int((int) $idart) || $idart < 0) {
 262:             return;
 263:         } else {
 264:             $this->idart = $idart;
 265:         }
 266: 
 267:         $this->_place = $place;
 268:         $this->_keycode = $aContent;
 269:         $this->setStopwords($aStopwords);
 270:         $this->setCmsOptions($cms_options);
 271: 
 272:         $this->createKeywords();
 273: 
 274:         $this->getKeywords();
 275: 
 276:         $this->saveKeywords();
 277: 
 278:         $new_keys = array_keys($this->_keywords);
 279:         $old_keys = array_keys($this->_keywordsOld);
 280: 
 281:         $this->_keywordsDel = array_diff($old_keys, $new_keys);
 282: 
 283:         if (count($this->_keywordsDel) > 0) {
 284:             $this->deleteKeywords();
 285:         }
 286:     }
 287: 
 288:     /**
 289:      * for each cms-type create index structure.
 290:      * it looks like
 291:      * Array (
 292:      * [die] => CMS_HTML-1
 293:      * [inhalte] => CMS_HTML-1
 294:      * [auf] => CMS_HTML-1 CMS_HTMLHEAD-2
 295:      * [dieser] => CMS_HTML-1
 296:      * [website] => CMS_HTML-1 CMS_HTML-1 CMS_HTMLHEAD-2
 297:      * )
 298:      */
 299:     public function createKeywords() {
 300:         $tmp_keys = array();
 301: 
 302:         // Only create keycodes, if some are available
 303:         if (is_array($this->_keycode)) {
 304:             foreach ($this->_keycode as $idtype => $data) {
 305:                 if ($this->checkCmsType($idtype)) {
 306:                     foreach ($data as $typeid => $code) {
 307:                         $this->_debug('code', $code);
 308: 
 309:                         // remove backslash
 310:                         $code = stripslashes($code);
 311:                         // replace HTML line breaks with newlines
 312:                         $code = str_ireplace(array(
 313:                             '<br>',
 314:                             '<br />'
 315:                         ), "\n", $code);
 316:                         // remove html tags
 317:                         $code = strip_tags($code);
 318:                         if (strlen($code) > 0) {
 319:                             $code = conHtmlEntityDecode($code);
 320:                         }
 321:                         $this->_debug('code', $code);
 322: 
 323:                         // split content by any number of commas or space
 324:                         // characters
 325:                         $tmp_keys = preg_split('/[\s,]+/', trim($code));
 326:                         $this->_debug('tmp_keys', $tmp_keys);
 327: 
 328:                         foreach ($tmp_keys as $value) {
 329:                             // index terms are stored with lower case
 330:                             // $value = strtolower($value);
 331: 
 332:                             $value = conHtmlentities($value);
 333:                             $value = trim(strtolower($value));
 334:                             $value = conHtmlEntityDecode($value);
 335: 
 336:                             if (!in_array($value, $this->_stopwords)) {
 337:                                 // eliminate stopwords
 338:                                 $value = $this->removeSpecialChars($value);
 339: 
 340:                                 if (strlen($value) > 1) {
 341:                                     // do not index single characters
 342:                                     $this->_keywords[$value] = $this->_keywords[$value] . $idtype . '-' . $typeid . ' ';
 343:                                 }
 344:                             }
 345:                         }
 346:                     }
 347:                 }
 348: 
 349:                 unset($tmp_keys);
 350:             }
 351:         }
 352: 
 353:         $this->_debug('keywords', $this->_keywords);
 354:     }
 355: 
 356:     /**
 357:      * generate index_string from index structure and save keywords
 358:      * The index_string looks like "&12=2(CMS_HTMLHEAD-1,CMS_HTML-1)"
 359:      */
 360:     public function saveKeywords() {
 361:         $tmp_count = array();
 362: 
 363:         foreach ($this->_keywords as $keyword => $count) {
 364:             $tmp_count = preg_split('/[\s]/', trim($count));
 365:             $this->_debug('tmp_count', $tmp_count);
 366: 
 367:             $occurrence = count($tmp_count);
 368:             $tmp_count = array_unique($tmp_count);
 369:             $cms_types = implode(',', $tmp_count);
 370:             $index_string = '&' . $this->idart . '=' . $occurrence . '(' . $cms_types . ')';
 371: 
 372:             if (!array_key_exists($keyword, $this->_keywordsOld)) {
 373:                 // if keyword is new, save index information
 374:                 // $nextid = $this->db->nextid($this->cfg['tab']['keywords']);
 375:                 $sql = "INSERT INTO " . $this->cfg['tab']['keywords'] . "
 376:                             (keyword, " . $this->_place . ", idlang)
 377:                         VALUES
 378:                             ('" . $this->db->escape($keyword) . "', '" . $this->db->escape($index_string) . "', " . cSecurity::toInteger($this->lang) . ")";
 379:             } else {
 380:                 // if keyword allready exists, create new index_string
 381:                 if (preg_match("/&$this->idart=/", $this->_keywordsOld[$keyword])) {
 382:                     $index_string = preg_replace("/&$this->idart=[0-9]+\([\w-,]+\)/", $index_string, $this->_keywordsOld[$keyword]);
 383:                 } else {
 384:                     $index_string = $this->_keywordsOld[$keyword] . $index_string;
 385:                 }
 386: 
 387:                 $sql = "UPDATE " . $this->cfg['tab']['keywords'] . "
 388:                         SET " . $this->_place . " = '" . $index_string . "'
 389:                         WHERE idlang='" . cSecurity::toInteger($this->lang) . "' AND keyword='" . $this->db->escape($keyword) . "'";
 390:             }
 391:             $this->_debug('sql', $sql);
 392:             $this->db->query($sql);
 393:         }
 394:     }
 395: 
 396:     /**
 397:      * if keywords don't occur in the article anymore, update index_string and
 398:      * delete keyword if necessary
 399:      */
 400:     public function deleteKeywords() {
 401:         foreach ($this->_keywordsDel as $key_del) {
 402:             $index_string = preg_replace("/&$this->idart=[0-9]+\([\w-,]+\)/", "", $this->_keywordsOld[$key_del]);
 403: 
 404:             if (strlen($index_string) == 0) {
 405:                 // keyword is not referenced by any article
 406:                 $sql = "DELETE FROM " . $this->cfg['tab']['keywords'] . "
 407:                     WHERE idlang = " . cSecurity::toInteger($this->lang) . " AND keyword = '" . $this->db->escape($key_del) . "'";
 408:             } else {
 409:                 $sql = "UPDATE " . $this->cfg['tab']['keywords'] . "
 410:                     SET " . $this->_place . " = '" . $index_string . "'
 411:                     WHERE idlang = " . cSecurity::toInteger($this->lang) . " AND keyword = '" . $this->db->escape($key_del) . "'";
 412:             }
 413:             $this->_debug('sql', $sql);
 414:             $this->db->query($sql);
 415:         }
 416:     }
 417: 
 418:     /**
 419:      * get the keywords of an article
 420:      */
 421:     public function getKeywords() {
 422:         $keys = implode("','", array_keys($this->_keywords));
 423: 
 424:         $sql = "SELECT
 425:                     keyword, auto, self
 426:                 FROM
 427:                     " . $this->cfg['tab']['keywords'] . "
 428:                 WHERE
 429:                     idlang=" . cSecurity::toInteger($this->lang) . "  AND
 430:                     (keyword IN ('" . $keys . "')  OR " . $this->_place . " REGEXP '&" . cSecurity::toInteger($this->idart) . "=')";
 431: 
 432:         $this->_debug('sql', $sql);
 433: 
 434:         $this->db->query($sql);
 435: 
 436:         $place = $this->_place;
 437: 
 438:         while ($this->db->nextRecord()) {
 439:             $this->_keywordsOld[$this->db->f('keyword')] = $this->db->f($place);
 440:         }
 441:     }
 442: 
 443:     /**
 444:      * remove special characters from index term
 445:      *
 446:      * @param string $key Keyword
 447:      * @return mixed
 448:      */
 449:     public function removeSpecialChars($key) {
 450:         $aSpecialChars = array(
 451:             /*"-",*/
 452:             "_",
 453:             "'",
 454:             ".",
 455:             "!",
 456:             "\"",
 457:             "#",
 458:             "$",
 459:             "%",
 460:             "&",
 461:             "(",
 462:             ")",
 463:             "*",
 464:             "+",
 465:             ",",
 466:             "/",
 467:             ":",
 468:             ";",
 469:             "<",
 470:             "=",
 471:             ">",
 472:             "?",
 473:             "@",
 474:             "[",
 475:             "\\",
 476:             "]",
 477:             "^",
 478:             "`",
 479:             "{",
 480:             "|",
 481:             "}",
 482:             "~",
 483:             "„"
 484:         );
 485: 
 486:         // for ($i = 127; $i < 192; $i++) {
 487:         // some other special characters
 488:         // $aSpecialChars[] = chr($i);
 489:         // }
 490: 
 491:         // TODO: The transformation of accented characters must depend on the
 492:         // selected encoding of the language of
 493:         // a client and should not be treated in this method.
 494:         // modified 2007-10-01, H. Librenz - added as hotfix for encoding
 495:         // problems (doesn't find any words with
 496:         // umlaut vowels in it since you turn on UTF-8 as language encoding)
 497:         $sEncoding = getEncodingByLanguage($this->db, $this->lang);
 498: 
 499:         if (strtolower($sEncoding) != 'iso-8859-2') {
 500:             $key = conHtmlentities($key, NULL, $sEncoding);
 501:         } else {
 502:             $key = htmlentities_iso88592($key);
 503:         }
 504: 
 505:         // $aUmlautMap = array(
 506:         // '&Uuml;' => 'ue',
 507:         // '&uuml;' => 'ue',
 508:         // '&Auml;' => 'ae',
 509:         // '&auml;' => 'ae',
 510:         // '&Ouml;' => 'oe',
 511:         // '&ouml;' => 'oe',
 512:         // '&szlig;' => 'ss'
 513:         // );
 514: 
 515:         // foreach ($aUmlautMap as $sUmlaut => $sMapped) {
 516:         // $key = str_replace($sUmlaut, $sMapped, $key);
 517:         // }
 518: 
 519:         $key = conHtmlEntityDecode($key);
 520:         $key = str_replace($aSpecialChars, '', $key);
 521: 
 522:         return $key;
 523:     }
 524: 
 525:     /**
 526:      *
 527:      * @param string $key Keyword
 528:      * @return string
 529:      */
 530:     public function addSpecialUmlauts($key) {
 531:         $key = conHtmlentities($key, NULL, getEncodingByLanguage($this->db, $this->lang));
 532:         $aUmlautMap = array(
 533:             'Ue' => '&Uuml;',
 534:             'ue' => '&uuml;',
 535:             'Ae' => '&Auml;',
 536:             'ae' => '&auml;',
 537:             'Oe' => '&Ouml;',
 538:             'oe' => '&ouml;',
 539:             'ss' => '&szlig;'
 540:         );
 541: 
 542:         foreach ($aUmlautMap as $sUmlaut => $sMapped) {
 543:             $key = str_replace($sUmlaut, $sMapped, $key);
 544:         }
 545: 
 546:         $key = conHtmlEntityDecode($key);
 547:         return $key;
 548:     }
 549: 
 550:     /**
 551:      * set the array of stopwords which should not be indexed
 552:      *
 553:      * @param array $aStopwords
 554:      */
 555:     public function setStopwords($aStopwords) {
 556:         if (is_array($aStopwords) && count($aStopwords) > 0) {
 557:             $this->_stopwords = $aStopwords;
 558:         }
 559:     }
 560: 
 561:     /**
 562:      * set the cms types
 563:      */
 564:     public function setContentTypes() {
 565:         $sql = "SELECT type, idtype FROM " . $this->cfg['tab']['type'] . ' ';
 566:         $this->_debug('sql', $sql);
 567:         $this->db->query($sql);
 568:         while ($this->db->nextRecord()) {
 569:             $this->_cmsType[$this->db->f('type')] = $this->db->f('idtype');
 570:             $this->_cmsTypeSuffix[$this->db->f('idtype')] = substr($this->db->f('type'), 4, strlen($this->db->f('type')));
 571:         }
 572:     }
 573: 
 574:     /**
 575:      * set the cms_options array of cms types which should be treated special
 576:      *
 577:      * @param mixed $cms_options
 578:      */
 579:     public function setCmsOptions($cms_options) {
 580:         if (is_array($cms_options) && count($cms_options) > 0) {
 581:             foreach ($cms_options as $opt) {
 582:                 $opt = strtoupper($opt);
 583: 
 584:                 if (strlen($opt) > 0) {
 585:                     if (!stristr($opt, 'cms_')) {
 586:                         if (in_array($opt, $this->_cmsTypeSuffix)) {
 587:                             $this->_cmsOptions[$opt] = 'CMS_' . $opt;
 588:                         }
 589:                     } else {
 590:                         if (array_key_exists($opt, $this->_cmsType)) {
 591:                             $this->_cmsOptions[$opt] = $opt;
 592:                         }
 593:                     }
 594:                 }
 595:             }
 596:         } else {
 597:             $this->_cmsOptions = array();
 598:         }
 599:     }
 600: 
 601:     /**
 602:      * check if the current cms type is in the cms_options array
 603:      *
 604:      * @param string $idtype
 605:      * @return boolean
 606:      */
 607:     public function checkCmsType($idtype) {
 608:         $idtype = strtoupper($idtype);
 609:         return (in_array($idtype, $this->_cmsOptions)) ? false : true;
 610:     }
 611: 
 612:     /**
 613:      *
 614:      * @return array the _cmsType property
 615:      */
 616:     public function getCmsType() {
 617:         return $this->_cmsType;
 618:     }
 619: 
 620:     /**
 621:      *
 622:      * @return array the _cmsTypeSuffix property
 623:      */
 624:     public function getCmsTypeSuffix() {
 625:         return $this->_cmsTypeSuffix;
 626:     }
 627: }
 628: 
 629: /**
 630:  * CONTENIDO API - Search Object
 631:  *
 632:  * This object starts a indexed fulltext search
 633:  *
 634:  * TODO:
 635:  * The way to set the search options could be done much more better!
 636:  * The computation of the set of searchable articles should not be treated in
 637:  * this class.
 638:  * It is better to compute the array of searchable articles from the outside and
 639:  * to pass the array of searchable articles as parameter.
 640:  * Avoid foreach loops.
 641:  *
 642:  * Use object with
 643:  *
 644:  * $options = array('db' => 'regexp', // use db function regexp
 645:  * 'combine' => 'or'); // combine searchwords with or
 646:  *
 647:  * The range of searchable articles is by default the complete content which is
 648:  * online and not protected.
 649:  *
 650:  * With option 'searchable_articles' you can define your own set of searchable
 651:  * articles.
 652:  * If parameter 'searchable_articles' is set the options 'cat_tree',
 653:  * 'categories', 'articles', 'exclude', 'artspecs',
 654:  * 'protected', 'dontshowofflinearticles' don't have any effect.
 655:  *
 656:  * $options = array('db' => 'regexp', // use db function regexp
 657:  * 'combine' => 'or', // combine searchwords with or
 658:  * 'searchable_articles' => array(5, 6, 9, 13));
 659:  *
 660:  * One can define the range of searchable articles by setting the parameter
 661:  * 'exclude' to false which means the range of categories
 662:  * defined by parameter 'cat_tree' or 'categories' and the range of articles
 663:  * defined by parameter 'articles' is included.
 664:  *
 665:  * $options = array('db' => 'regexp', // use db function regexp
 666:  * 'combine' => 'or', // combine searchwords with or
 667:  * 'exclude' => false, // => searchrange specified in 'cat_tree', 'categories'
 668:  * and 'articles' is included
 669:  * 'cat_tree' => array(12), // tree with root 12 included
 670:  * 'categories' => array(100,111), // categories 100, 111 included
 671:  * 'articles' => array(33), // article 33 included
 672:  * 'artspecs' => array(2, 3), // array of article specifications => search only
 673:  * articles with these artspecs
 674:  * 'res_per_page' => 2, // results per page
 675:  * 'protected' => true); // => do not search articles or articles in categories
 676:  * which are offline or protected
 677:  * 'dontshowofflinearticles' => false); // => search offline articles or
 678:  * articles in categories which are offline
 679:  *
 680:  * You can build the complement of the range of searchable articles by setting
 681:  * the parameter 'exclude' to true which means the range of categories
 682:  * defined by parameter 'cat_tree' or 'categories' and the range of articles
 683:  * defined by parameter 'articles' is excluded from search.
 684:  *
 685:  * $options = array('db' => 'regexp', // use db function regexp
 686:  * 'combine' => 'or', // combine searchwords with or
 687:  * 'exclude' => true, // => searchrange specified in 'cat_tree', 'categories'
 688:  * and 'articles' is excluded
 689:  * 'cat_tree' => array(12), // tree with root 12 excluded
 690:  * 'categories' => array(100,111), // categories 100, 111 excluded
 691:  * 'articles' => array(33), // article 33 excluded
 692:  * 'artspecs' => array(2, 3), // array of article specifications => search only
 693:  * articles with these artspecs
 694:  * 'res_per_page' => 2, // results per page
 695:  * 'protected' => true); // => do not search articles or articles in categories
 696:  * which are offline or protected
 697:  * 'dontshowofflinearticles' => false); // => search offline articles or
 698:  * articles in categories which are offline
 699:  *
 700:  * $search = new Search($options);
 701:  *
 702:  * $cms_options = array("htmlhead", "html", "head", "text", "imgdescr", "link",
 703:  * "linkdescr");
 704:  * search only in these cms-types
 705:  * $search->setCmsOptions($cms_options);
 706:  *
 707:  * $search_result = $search->searchIndex($searchword, $searchwordex); // start
 708:  * search
 709:  *
 710:  * The search result structure has following form
 711:  * Array (
 712:  * [20] => Array (
 713:  * [CMS_HTML] => Array (
 714:  * [0] => 1
 715:  * [1] => 1
 716:  * [2] => 1
 717:  * )
 718:  * [keyword] => Array (
 719:  * [0] => content
 720:  * [1] => contenido
 721:  * [2] => wwwcontenidoorg
 722:  * )
 723:  * [search] => Array (
 724:  * [0] => con
 725:  * [1] => con
 726:  * [2] => con
 727:  * )
 728:  * [occurence] => Array (
 729:  * [0] => 1
 730:  * [1] => 5
 731:  * [2] => 1
 732:  * )
 733:  * [similarity] => 60
 734:  * )
 735:  * )
 736:  *
 737:  * The keys of the array are the article ID's found by search.
 738:  *
 739:  * Searching 'con' matches keywords 'content', 'contenido' and 'wwwcontenidoorg'
 740:  * in article with ID 20 in content type CMS_HTML[1].
 741:  * The search term occurs 7 times.
 742:  * The maximum similarity between searchterm and matching keyword is 60%.
 743:  *
 744:  * with $oSearchResults = new cSearchResult($search_result, 10);
 745:  * one can rank and display the results
 746:  *
 747:  * @package Core
 748:  * @subpackage Frontend_Search
 749:  */
 750: class cSearch extends cSearchBaseAbstract {
 751: 
 752:     /**
 753:      * Instance of class Index
 754:      *
 755:      * @var object
 756:      */
 757:     protected $_index;
 758: 
 759:     /**
 760:      * array of available cms types
 761:      *
 762:      * @var array
 763:      */
 764:     protected $_cmsType = array();
 765: 
 766:     /**
 767:      * suffix of available cms types
 768:      *
 769:      * @var array
 770:      */
 771:     protected $_cmsTypeSuffix = array();
 772: 
 773:     /**
 774:      * the search words
 775:      *
 776:      * @var array
 777:      */
 778:     protected $_searchWords = array();
 779: 
 780:     /**
 781:      * the words which should be excluded from search
 782:      *
 783:      * @var array
 784:      */
 785:     protected $_searchWordsExclude = array();
 786: 
 787:     /**
 788:      * type of db search
 789:      * like => 'sql like', regexp => 'sql regexp'
 790:      *
 791:      * @var string
 792:      */
 793:     protected $_searchOption;
 794: 
 795:     /**
 796:      * logical combination of searchwords (and, or)
 797:      *
 798:      * @var string
 799:      */
 800:     protected $_searchCombination;
 801: 
 802:     /**
 803:      * array of searchable articles
 804:      *
 805:      * @var array
 806:      */
 807:     protected $_searchableArts = array();
 808: 
 809:     /**
 810:      * article specifications
 811:      *
 812:      * @var array
 813:      */
 814:     protected $_articleSpecs = array();
 815: 
 816:     /**
 817:      * If $protected = true => do not search articles which are offline or
 818:      * articles in catgeories which are offline (protected)
 819:      *
 820:      * @var boolean
 821:      */
 822:     protected $_protected;
 823: 
 824:     /**
 825:      * If $dontshowofflinearticles = false => search offline articles or
 826:      * articles in categories which are offline
 827:      *
 828:      * @var boolean
 829:      */
 830:     protected $_dontshowofflinearticles;
 831: 
 832:     /**
 833:      * If $exclude = true => the specified search range is excluded from search,
 834:      * otherwise included
 835:      *
 836:      * @var boolean
 837:      */
 838:     protected $_exclude;
 839: 
 840:     /**
 841:      * Array of article id's with information about cms-types, occurence of
 842:      * keyword/searchword, similarity .
 843:      *
 844:      *
 845:      *
 846:      * @var array
 847:      */
 848:     protected $_searchResult = array();
 849: 
 850:     /**
 851:      * Constructor
 852:      *
 853:      * @param array $options $options['db'] 'regexp' => DB search with REGEXP;
 854:      *        'like' => DB search with LIKE; 'exact' => exact match;
 855:      *        $options['combine'] 'and', 'or' Combination of search words with
 856:      *        AND, OR
 857:      *        $options['exclude'] 'true' => searchrange specified in 'cat_tree',
 858:      *        'categories' and 'articles' is excluded; 'false' =>
 859:      *        searchrange specified in 'cat_tree', 'categories' and
 860:      *        'articles' is included
 861:      *        $options['cat_tree'] e.g. array(8) => The complete tree with root
 862:      *        8 is in/excluded from search
 863:      *        $options['categories'] e.g. array(10, 12) => Categories 10, 12
 864:      *        in/excluded
 865:      *        $options['articles'] e.g. array(23) => Article 33 in/excluded
 866:      *        $options['artspecs'] => e.g. array(2, 3) => search only articles
 867:      *        with certain article specifications
 868:      *        $options['protected'] 'true' => do not search articles which are
 869:      *        offline (locked) or articles in catgeories which are offline
 870:      *        (protected)
 871:      *        $options['dontshowofflinearticles'] 'false' => search offline
 872:      *        articles or articles in categories which are offline
 873:      *        $options['searchable_articles'] array of article ID's which should
 874:      *        be searchable
 875:      * @param cDb $db Optional database instance
 876:      */
 877:     public function __construct($options, $db = NULL) {
 878:         parent::__construct($db);
 879: 
 880:         $this->_index = new cSearchIndex($db);
 881: 
 882:         $this->_cmsType = $this->_index->cms_type;
 883:         $this->_cmsTypeSuffix = $this->_index->cms_type_suffix;
 884: 
 885:         $this->_searchOption = (array_key_exists('db', $options)) ? strtolower($options['db']) : 'regexp';
 886:         $this->_searchCombination = (array_key_exists('combine', $options)) ? strtolower($options['combine']) : 'or';
 887:         $this->_protected = (array_key_exists('protected', $options)) ? $options['protected'] : true;
 888:         $this->_dontshowofflinearticles = (array_key_exists('dontshowofflinearticles', $options)) ? $options['dontshowofflinearticles'] : false;
 889:         $this->_exclude = (array_key_exists('exclude', $options)) ? $options['exclude'] : true;
 890:         $this->_articleSpecs = (array_key_exists('artspecs', $options) && is_array($options['artspecs'])) ? $options['artspecs'] : array();
 891:         $this->_index->setCmsOptions($this->_cmsTypeSuffix);
 892: 
 893:         if (array_key_exists('searchable_articles', $options) && is_array($options['searchable_articles'])) {
 894:             $this->_searchableArts = $options['searchable_articles'];
 895:         } else {
 896:             $this->_searchableArts = $this->getSearchableArticles($options);
 897:         }
 898: 
 899:         // minimum similarity between searchword and keyword in percent
 900:         $this->intMinimumSimilarity = 50;
 901:     }
 902: 
 903:     /**
 904:      * indexed fulltext search
 905:      *
 906:      * @param string $searchwords The search words
 907:      * @param string $searchwords_exclude The words, which should be excluded
 908:      *        from search
 909:      * @return boolean multitype:
 910:      */
 911:     public function searchIndex($searchwords, $searchwords_exclude = '') {
 912:         if (strlen(trim($searchwords)) > 0) {
 913:             $this->_searchWords = $this->stripWords($searchwords);
 914:         } else {
 915:             return false;
 916:         }
 917: 
 918:         if (strlen(trim($searchwords_exclude)) > 0) {
 919:             $this->_searchWordsExclude = $this->stripWords($searchwords_exclude);
 920:         }
 921: 
 922:         $tmp_searchwords = array();
 923:         foreach ($this->_searchWords as $word) {
 924:             $wordEscaped = $this->db->escape($word);
 925:             if ($this->_searchOption == 'like') {
 926:                 $wordEscaped = "'%" . $wordEscaped . "%'";
 927:             } elseif ($this->_searchOption == 'exact') {
 928:                 $wordEscaped = "'" . $wordEscaped . "'";
 929:             }
 930:             $tmp_searchwords[] = $word;
 931:         }
 932: 
 933:         if (count($this->_searchWordsExclude) > 0) {
 934:             foreach ($this->_searchWordsExclude as $word) {
 935:                 $wordEscaped = $this->db->escape($word);
 936:                 if ($this->_searchOption == 'like') {
 937:                     $wordEscaped = "'%" . $wordEscaped . "%'";
 938:                 } elseif ($this->_searchOption == 'exact') {
 939:                     $wordEscaped = "'" . $wordEscaped . "'";
 940:                 }
 941:                 $tmp_searchwords[] = $wordEscaped;
 942:                 $this->_searchWords[] = $word;
 943:             }
 944:         }
 945: 
 946:         if ($this->_searchOption == 'regexp') {
 947:             // regexp search
 948:             $kwSql = "keyword REGEXP '" . implode('|', $tmp_searchwords) . "'";
 949:         } elseif ($this->_searchOption == 'like') {
 950:             // like search
 951:             $search_like = implode(" OR keyword LIKE ", $tmp_searchwords);
 952:             $kwSql = "keyword LIKE '" . $search_like;
 953:         } elseif ($this->_searchOption == 'exact') {
 954:             // exact match
 955:             $search_exact = implode(" OR keyword = ", $tmp_searchwords);
 956:             $kwSql = "keyword LIKE '" . $search_exact;
 957:         }
 958: 
 959:         $sql = "SELECT keyword, auto FROM " . $this->cfg['tab']['keywords'] . " WHERE idlang=" . cSecurity::toInteger($this->lang) . " AND " . $kwSql . " ";
 960:         $this->_debug('sql', $sql);
 961:         $this->db->query($sql);
 962: 
 963:         while ($this->db->nextRecord()) {
 964: 
 965:             $tmp_index_string = preg_split('/&/', $this->db->f('auto'), -1, PREG_SPLIT_NO_EMPTY);
 966: 
 967:             $this->_debug('index', $this->db->f('auto'));
 968: 
 969:             $tmp_index = array();
 970:             foreach ($tmp_index_string as $string) {
 971:                 $tmp_string = preg_replace('/[=\(\)]/', ' ', $string);
 972:                 $tmp_index[] = preg_split('/\s/', $tmp_string, -1, PREG_SPLIT_NO_EMPTY);
 973:             }
 974:             $this->_debug('tmp_index', $tmp_index);
 975: 
 976:             foreach ($tmp_index as $string) {
 977:                 $artid = $string[0];
 978: 
 979:                 // filter nonsearchable articles
 980:                 if (in_array($artid, $this->_searchableArts)) {
 981: 
 982:                     $cms_place = $string[2];
 983:                     $keyword = $this->db->f('keyword');
 984:                     $percent = 0;
 985:                     $similarity = 0;
 986:                     foreach ($this->_searchWords as $word) {
 987:                         // computes similarity between searchword and keyword in
 988:                         // percent
 989:                         similar_text($word, $keyword, $percent);
 990:                         if ($percent > $similarity) {
 991:                             $similarity = $percent;
 992:                             $searchword = $word;
 993:                         }
 994:                     }
 995: 
 996:                     $tmp_cmstype = preg_split('/[,]/', $cms_place, -1, PREG_SPLIT_NO_EMPTY);
 997:                     $this->_debug('tmp_cmstype', $tmp_cmstype);
 998: 
 999:                     $tmp_cmstype2 = array();
1000:                     foreach ($tmp_cmstype as $type) {
1001:                         $tmp_cmstype2[] = preg_split('/-/', $type, -1, PREG_SPLIT_NO_EMPTY);
1002:                     }
1003:                     $this->_debug('tmp_cmstype2', $tmp_cmstype2);
1004: 
1005:                     foreach ($tmp_cmstype2 as $type) {
1006:                         if (!$this->_index->checkCmsType($type[0])) {
1007:                             // search for specified cms-types
1008:                             if ($similarity >= $this->intMinimumSimilarity) {
1009:                                 // include article into searchresult set only if
1010:                                 // similarity between searchword and keyword is
1011:                                 // big enough
1012:                                 $this->_searchResult[$artid][$type[0]][] = $type[1];
1013:                                 $this->_searchResult[$artid]['keyword'][] = $this->db->f('keyword');
1014:                                 $this->_searchResult[$artid]['search'][] = $searchword;
1015:                                 $this->_searchResult[$artid]['occurence'][] = $string[1];
1016:                                 $this->_searchResult[$artid]['debug_similarity'][] = $percent;
1017:                                 if ($similarity > $this->_searchResult[$artid]['similarity']) {
1018:                                     $this->_searchResult[$artid]['similarity'] = $similarity;
1019:                                 }
1020:                             }
1021:                         }
1022:                     }
1023:                 }
1024:             }
1025:         }
1026: 
1027:         if ($this->_searchCombination == 'and') {
1028:             // all search words must appear in the article
1029:             foreach ($this->_searchResult as $article => $val) {
1030:                 if (!count(array_diff($this->_searchWords, $val['search'])) == 0) {
1031:                     // $this->rank_structure[$article] = $rank[$article];
1032:                     unset($this->_searchResult[$article]);
1033:                 }
1034:             }
1035:         }
1036: 
1037:         if (count($this->_searchWordsExclude) > 0) {
1038:             // search words to be excluded must not appear in article
1039:             foreach ($this->_searchResult as $article => $val) {
1040:                 if (!count(array_intersect($this->_searchWordsExclude, $val['search'])) == 0) {
1041:                     // $this->rank_structure[$article] = $rank[$article];
1042:                     unset($this->_searchResult[$article]);
1043:                 }
1044:             }
1045:         }
1046: 
1047:         $this->_debug('$this->search_result', $this->_searchResult);
1048:         $this->_debug('$this->searchable_arts', $this->_searchableArts);
1049: 
1050:         $searchTracking = new cApiSearchTrackingCollection();
1051:         $searchTracking->trackSearch($searchwords, count($this->_searchResult));
1052: 
1053:         return $this->_searchResult;
1054:     }
1055: 
1056:     /**
1057:      *
1058:      * @param mixed $cms_options The cms-types (htmlhead, html, ...) which
1059:      *            should
1060:      *        explicitly be searched
1061:      */
1062:     public function setCmsOptions($cms_options) {
1063:         if (is_array($cms_options) && count($cms_options) > 0) {
1064:             $this->_index->setCmsOptions($cms_options);
1065:         }
1066:     }
1067: 
1068:     /**
1069:      *
1070:      * @param string $searchwords The search-words
1071:      * @return array of stripped search-words
1072:      */
1073:     public function stripWords($searchwords) {
1074:         // remove backslash and html tags
1075:         $searchwords = trim(strip_tags(stripslashes($searchwords)));
1076: 
1077:         // split the phrase by any number of commas or space characters
1078:         $tmp_words = preg_split('/[\s,]+/', $searchwords);
1079: 
1080:         $tmp_searchwords = array();
1081: 
1082:         foreach ($tmp_words as $word) {
1083: 
1084:             $word = htmlentities($word, ENT_COMPAT, 'UTF-8');
1085:             $word = (trim(strtolower($word)));
1086:             $word = html_entity_decode($word, ENT_COMPAT, 'UTF-8');
1087: 
1088:             // $word =(trim(strtolower($word)));
1089:             if (strlen($word) > 1) {
1090:                 $tmp_searchwords[] = $word;
1091:             }
1092:         }
1093: 
1094:         return array_unique($tmp_searchwords);
1095:     }
1096: 
1097:     /**
1098:      * Returns the category tree array.
1099:      *
1100:      * @param int $cat_start Root of a category tree
1101:      * @return array Category Tree
1102:      * @todo This is not the job for search, should be outsourced ...
1103:      */
1104:     public function getSubTree($cat_start) {
1105:         $sql = "SELECT
1106:                 B.idcat, B.parentid
1107:             FROM
1108:                 " . $this->cfg['tab']['cat_tree'] . " AS A,
1109:                 " . $this->cfg['tab']['cat'] . " AS B,
1110:                 " . $this->cfg['tab']['cat_lang'] . " AS C
1111:             WHERE
1112:                 A.idcat  = B.idcat AND
1113:                 B.idcat  = C.idcat AND
1114:                 C.idlang = '" . cSecurity::toInteger($this->lang) . "' AND
1115:                 B.idclient = '" . cSecurity::toInteger($this->client) . "'
1116:             ORDER BY
1117:                 idtree";
1118:         $this->_debug('sql', $sql);
1119:         $this->db->query($sql);
1120: 
1121:         // $aSubCats = array();
1122:         // $i = false;
1123:         // while ($this->db->nextRecord()) {
1124:         // if ($this->db->f('parentid') < $cat_start) {
1125:         // // ending part of tree
1126:         // $i = false;
1127:         // }
1128:         // if ($this->db->f('idcat') == $cat_start) {
1129:         // // starting part of tree
1130:         // $i = true;
1131:         // }
1132:         // if ($i == true) {
1133:         // $aSubCats[] = $this->db->f('idcat');
1134:         // }
1135:         // }
1136: 
1137:         $aSubCats = array(
1138:             $cat_start
1139:         );
1140:         while ($this->db->nextRecord()) {
1141:             // ommit if cat is no child of any recognized descendant
1142:             if (!in_array($this->db->f('parentid'), $aSubCats)) {
1143:                 continue;
1144:             }
1145:             // ommit if cat is already recognized (happens with $cat_start)
1146:             if (in_array($this->db->f('idcat'), $aSubCats)) {
1147:                 continue;
1148:             }
1149:             // add cat as recognized descendant
1150:             $aSubCats[] = $this->db->f('idcat');
1151:         }
1152: 
1153:         return $aSubCats;
1154:     }
1155: 
1156:     /**
1157:      * Returns list of searchable article ids.
1158:      *
1159:      * @param array $search_range
1160:      * @return array Articles in specified search range
1161:      */
1162:     public function getSearchableArticles($search_range) {
1163:         $aCatRange = array();
1164:         if (array_key_exists('cat_tree', $search_range) && is_array($search_range['cat_tree'])) {
1165:             if (count($search_range['cat_tree']) > 0) {
1166:                 foreach ($search_range['cat_tree'] as $cat) {
1167:                     $aCatRange = array_merge($aCatRange, $this->getSubTree($cat));
1168:                 }
1169:             }
1170:         }
1171: 
1172:         if (array_key_exists('categories', $search_range) && is_array($search_range['categories'])) {
1173:             if (count($search_range['categories']) > 0) {
1174:                 $aCatRange = array_merge($aCatRange, $search_range['categories']);
1175:             }
1176:         }
1177: 
1178:         $aCatRange = array_unique($aCatRange);
1179:         $sCatRange = implode("','", $aCatRange);
1180: 
1181:         if (array_key_exists('articles', $search_range) && is_array($search_range['articles'])) {
1182:             if (count($search_range['articles']) > 0) {
1183:                 $sArtRange = implode("','", $search_range['articles']);
1184:             } else {
1185:                 $sArtRange = '';
1186:             }
1187:         }
1188: 
1189:         if ($this->_protected == true) {
1190:             $sProtected = " C.public = 1 AND C.visible = 1 AND B.online = 1 ";
1191:         } else {
1192:             if ($this->_dontshowofflinearticles == true) {
1193:                 $sProtected = " C.visible = 1 AND B.online = 1 ";
1194:             } else {
1195:                 $sProtected = " 1 ";
1196:             }
1197:         }
1198: 
1199:         if ($this->_exclude == true) {
1200:             // exclude searchrange
1201:             $sSearchRange = " A.idcat NOT IN ('" . $sCatRange . "') AND B.idart NOT IN ('" . $sArtRange . "') AND ";
1202:         } else {
1203:             // include searchrange
1204:             if (strlen($sArtRange) > 0) {
1205:                 $sSearchRange = " A.idcat IN ('" . $sCatRange . "') AND B.idart IN ('" . $sArtRange . "') AND ";
1206:             } else {
1207:                 $sSearchRange = " A.idcat IN ('" . $sCatRange . "') AND ";
1208:             }
1209:         }
1210: 
1211:         if (count($this->_articleSpecs) > 0) {
1212:             $sArtSpecs = " B.artspec IN ('" . implode("','", $this->_articleSpecs) . "') AND ";
1213:         } else {
1214:             $sArtSpecs = '';
1215:         }
1216: 
1217:         $sql = "SELECT
1218:                     A.idart
1219:                 FROM
1220:                     " . $this->cfg["tab"]["cat_art"] . " as A,
1221:                     " . $this->cfg["tab"]["art_lang"] . " as B,
1222:                     " . $this->cfg["tab"]["cat_lang"] . " as C
1223:                 WHERE
1224:                     " . $sSearchRange . "
1225:                     B.idlang = '" . cSecurity::toInteger($this->lang) . "' AND
1226:                     C.idlang = '" . cSecurity::toInteger($this->lang) . "' AND
1227:                     A.idart = B.idart AND
1228:                     B.searchable = 1  AND
1229:                     A.idcat = C.idcat AND
1230:                     " . $sArtSpecs . "
1231:                     " . $sProtected . " ";
1232:         $this->_debug('sql', $sql);
1233:         $this->db->query($sql);
1234: 
1235:         $aIdArts = array();
1236:         while ($this->db->nextRecord()) {
1237:             $aIdArts[] = $this->db->f('idart');
1238:         }
1239:         return $aIdArts;
1240:     }
1241: 
1242:     /**
1243:      * Fetch all article specifications which are online,
1244:      *
1245:      * @return array Array of article specification Ids
1246:      */
1247:     public function getArticleSpecifications() {
1248:         $sql = "SELECT
1249:                     idartspec
1250:                 FROM
1251:                     " . $this->cfg['tab']['art_spec'] . "
1252:                 WHERE
1253:                     client = " . cSecurity::toInteger($this->client) . " AND
1254:                     lang = " . cSecurity::toInteger($this->lang) . " AND
1255:                     online = 1 ";
1256:         $this->_debug('sql', $sql);
1257:         $this->db->query($sql);
1258:         $aArtspec = array();
1259:         while ($this->db->nextRecord()) {
1260:             $aArtspec[] = $this->db->f('idartspec');
1261:         }
1262:         return $aArtspec;
1263:     }
1264: 
1265:     /**
1266:      * Set article specification
1267:      *
1268:      * @param int $iArtspecID
1269:      */
1270:     public function setArticleSpecification($iArtspecID) {
1271:         $this->_articleSpecs[] = $iArtspecID;
1272:     }
1273: 
1274:     /**
1275:      * Add all article specifications matching name of article specification
1276:      * (client dependent but language independent)
1277:      *
1278:      * @param string $sArtSpecName
1279:      * @return boolean
1280:      */
1281:     public function addArticleSpecificationsByName($sArtSpecName) {
1282:         if (!isset($sArtSpecName) || strlen($sArtSpecName) == 0) {
1283:             return false;
1284:         }
1285: 
1286:         $sql = "SELECT
1287:                     idartspec
1288:                 FROM
1289:                     " . $this->cfg['tab']['art_spec'] . "
1290:                 WHERE
1291:                     client = " . cSecurity::toInteger($this->client) . " AND
1292:                     artspec = '" . $this->db->escape($sArtSpecName) . "' ";
1293:         $this->_debug('sql', $sql);
1294:         $this->db->query($sql);
1295:         while ($this->db->nextRecord()) {
1296:             $this->_articleSpecs[] = $this->db->f('idartspec');
1297:         }
1298:     }
1299: }
1300: 
1301: /**
1302:  * CONTENIDO API - SearchResult Object
1303:  *
1304:  * This object ranks and displays the result of the indexed fulltext search.
1305:  * If you are not comfortable with this API feel free to use your own methods to
1306:  * display the search results.
1307:  * The search result is basically an array with article ID's.
1308:  *
1309:  * If $search_result = $search->searchIndex($searchword, $searchwordex);
1310:  *
1311:  * use object with
1312:  *
1313:  * $oSearchResults = new cSearchResult($search_result, 10);
1314:  *
1315:  * $oSearchResults->setReplacement('<span style="color:red">', '</span>'); //
1316:  * html-tags to emphasize the located searchwords
1317:  *
1318:  * $num_res = $oSearchResults->getNumberOfResults();
1319:  * $num_pages = $oSearchResults->getNumberOfPages();
1320:  * $res_page = $oSearchResults->getSearchResultPage(1); // first result page
1321:  * foreach ($res_page as $key => $val) {
1322:  * $headline = $oSearchResults->getSearchContent($key, 'HTMLHEAD');
1323:  * $first_headline = $headline[0];
1324:  * $text = $oSearchResults->getSearchContent($key, 'HTML');
1325:  * $first_text = $text[0];
1326:  * $similarity = $oSearchResults->getSimilarity($key);
1327:  * $iOccurrence = $oSearchResults->getOccurrence($key);
1328:  * }
1329:  *
1330:  * @package Core
1331:  * @subpackage Frontend_Search
1332:  *
1333:  */
1334: class cSearchResult extends cSearchBaseAbstract {
1335: 
1336:     /**
1337:      * Instance of class Index
1338:      *
1339:      * @var object
1340:      */
1341:     protected $_index;
1342: 
1343:     /**
1344:      * Number of results
1345:      *
1346:      * @var int
1347:      */
1348:     protected $_results;
1349: 
1350:     /**
1351:      * Number of result pages
1352:      *
1353:      * @var int
1354:      */
1355:     protected $_pages;
1356: 
1357:     /**
1358:      * Current result page
1359:      *
1360:      * @var int
1361:      */
1362:     protected $_resultPage;
1363: 
1364:     /**
1365:      * Results per page to display
1366:      *
1367:      * @var int
1368:      */
1369:     protected $_resultPerPage;
1370: 
1371:     /**
1372:      * Array of html-tags to emphasize the searchwords
1373:      *
1374:      * @var array
1375:      */
1376:     protected $_replacement = array();
1377: 
1378:     /**
1379:      * Array of article id's with ranking information
1380:      *
1381:      * @var array
1382:      */
1383:     protected $_rankStructure = array();
1384: 
1385:     /**
1386:      * Array of result-pages with array's of article id's
1387:      *
1388:      * @var array
1389:      */
1390:     protected $_orderedSearchResult = array();
1391: 
1392:     /**
1393:      * Array of article id's with information about cms-types, occurence of
1394:      * keyword/searchword, similarity .
1395:      *
1396:      *
1397:      *
1398:      *
1399:      * @var array
1400:      */
1401:     protected $_searchResult = array();
1402: 
1403:     /**
1404:      * Compute ranking factor for each search result and order the search
1405:      * results by ranking factor
1406:      * NOTE: The ranking factor is the sum of occurences of matching searchterms
1407:      * weighted by similarity (in %) between searchword
1408:      * and matching word in the article.
1409:      * TODO: One can think of more sophisticated ranking strategies. One could
1410:      * use the content type information for example
1411:      * because a matching word in the headline (CMS_HEADLINE[1]) could be
1412:      * weighted more than a matching word in the text (CMS_HTML[1]).
1413:      *
1414:      * @param array $search_result List of article ids
1415:      * @param int $result_per_page Number of items per page
1416:      * @param cDb $oDB Optional db instance
1417:      * @param bool $bDebug Optional flag to enable debugging
1418:      */
1419:     public function __construct($search_result, $result_per_page, $oDB = NULL, $bDebug = false) {
1420:         parent::__construct($oDB, $bDebug);
1421: 
1422:         $this->_index = new cSearchIndex($oDB);
1423: 
1424:         $this->_searchResult = $search_result;
1425:         $this->_debug('$this->search_result', $this->_searchResult);
1426: 
1427:         $this->_resultPerPage = $result_per_page;
1428:         $this->_results = count($this->_searchResult);
1429: 
1430:         // compute ranking factor for each search result
1431:         foreach ($this->_searchResult as $article => $val) {
1432:             $this->_rankStructure[$article] = $this->getOccurrence($article) * ($this->getSimilarity($article) / 100);
1433:         }
1434:         $this->_debug('$this->rank_structure', $this->_rankStructure);
1435: 
1436:         $this->setOrderedSearchResult($this->_rankStructure, $this->_resultPerPage);
1437:         $this->_pages = count($this->_orderedSearchResult);
1438:         $this->_debug('$this->ordered_search_result', $this->_orderedSearchResult);
1439:     }
1440: 
1441:     /**
1442:      *
1443:      * @param array $ranked_search
1444:      * @param int $result_per_page
1445:      */
1446:     public function setOrderedSearchResult($ranked_search, $result_per_page) {
1447:         asort($ranked_search);
1448: 
1449:         $sorted_rank = array_reverse($ranked_search, true);
1450: 
1451:         if (isset($result_per_page) && $result_per_page > 0) {
1452:             $split_result = array_chunk($sorted_rank, $result_per_page, true);
1453:             $this->_orderedSearchResult = $split_result;
1454:         } else {
1455:             $this->_orderedSearchResult[] = $sorted_rank;
1456:         }
1457:     }
1458: 
1459:     /**
1460:      *
1461:      * @param int $art_id Id of an article
1462:      * @param string $cms_type
1463:      * @param int $id
1464:      * @return string Content of an article, specified by it's content type
1465:      */
1466:     public function getContent($art_id, $cms_type, $id = 0) {
1467:         $article = new cApiArticleLanguage();
1468:         $article->loadByArticleAndLanguageId($art_id, $this->lang, true);
1469:         return $article->getContent($cms_type, $id);
1470:     }
1471: 
1472:     /**
1473:      *
1474:      * @param int $art_id Id of an article
1475:      * @param string $cms_type Content type
1476:      * @param int $cms_nr
1477:      * @return string Content of an article in search result, specified by its
1478:      *         type
1479:      */
1480:     public function getSearchContent($art_id, $cms_type, $cms_nr = NULL) {
1481:         $cms_type = strtoupper($cms_type);
1482:         if (strlen($cms_type) > 0) {
1483:             if (!stristr($cms_type, 'cms_')) {
1484:                 if (in_array($cms_type, $this->_index->getCmsTypeSuffix())) {
1485:                     $cms_type = 'CMS_' . $cms_type;
1486:                 }
1487:             } else {
1488:                 if (!array_key_exists($cms_type, $this->_index->getCmsType())) {
1489:                     return array();
1490:                 }
1491:             }
1492:         }
1493: 
1494:         $article = new cApiArticleLanguage();
1495:         $article->loadByArticleAndLanguageId($art_id, $this->lang, true);
1496:         $content = array();
1497:         if (isset($this->_searchResult[$art_id][$cms_type])) {
1498:             // if searchword occurs in cms_type
1499:             $search_words = $this->_searchResult[$art_id]['search'];
1500:             $search_words = array_unique($search_words);
1501: 
1502:             $id_type = $this->_searchResult[$art_id][$cms_type];
1503:             $id_type = array_unique($id_type);
1504: 
1505:             if (isset($cms_nr) && is_numeric($cms_nr)) {
1506:                 // get content of cms_type[cms_nr]
1507:                 // build consistent escaped string(Timo Trautmann) 2008-04-17
1508:                 $cms_content = conHtmlentities(conHtmlEntityDecode(strip_tags($article->getContent($cms_type, $cms_nr))));
1509:                 if (count($this->_replacement) == 2) {
1510:                     foreach ($search_words as $word) {
1511:                         // build consistent escaped string, replace ae ue ..
1512:                         // with original html entities (Timo Trautmann)
1513:                         // 2008-04-17
1514:                         $word = conHtmlentities(conHtmlEntityDecode($this->_index->addSpecialUmlauts($word)));
1515:                         $match = array();
1516:                         preg_match("/$word/i", $cms_content, $match);
1517:                         if (isset($match[0])) {
1518:                             $pattern = $match[0];
1519:                             $replacement = $this->_replacement[0] . $pattern . $this->_replacement[1];
1520:                             $cms_content = preg_replace("/$pattern/i", $replacement, $cms_content); // emphasize
1521:                                                                                                         // located
1522:                                                                                                         // searchwords
1523:                         }
1524:                     }
1525:                 }
1526:                 $content[] = htmlspecialchars_decode($cms_content);
1527:             } else {
1528:                 // get content of cms_type[$id], where $id are the cms_type
1529:                 // numbers found in search
1530:                 foreach ($id_type as $id) {
1531:                     $cms_content = strip_tags($article->getContent($cms_type, $id));
1532: 
1533:                     if (count($this->_replacement) == 2) {
1534:                         foreach ($search_words as $word) {
1535:                             preg_match("/$word/i", $cms_content, $match);
1536:                             if (isset($match[0])) {
1537:                                 $pattern = $match[0];
1538:                                 $replacement = $this->_replacement[0] . $pattern . $this->_replacement[1];
1539:                                 $cms_content = preg_replace("/$pattern/i", $replacement, $cms_content); // emphasize
1540:                                                                                                             // located
1541:                                                                                                             // searchwords
1542:                             }
1543:                         }
1544:                     }
1545:                     $content[] = $cms_content;
1546:                 }
1547:             }
1548:         } else {
1549:             // searchword was not found in cms_type
1550:             if (isset($cms_nr) && is_numeric($cms_nr)) {
1551:                 $content[] = strip_tags($article->getContent($cms_type, $cms_nr));
1552:             } else {
1553:                 $art_content = $article->getContent($cms_type);
1554:                 if (count($art_content) > 0) {
1555:                     foreach ($art_content as $val) {
1556:                         $content[] = strip_tags($val);
1557:                     }
1558:                 }
1559:             }
1560:         }
1561:         return $content;
1562:     }
1563: 
1564:     /**
1565:      * Returns articles in page.
1566:      *
1567:      * @param int $page_id
1568:      * @return array Articles in page $page_id
1569:      */
1570:     public function getSearchResultPage($page_id) {
1571:         $this->_resultPage = $page_id;
1572:         $result_page = $this->_orderedSearchResult[$page_id - 1];
1573:         return $result_page;
1574:     }
1575: 
1576:     /**
1577:      * Returns number of result pages
1578:      *
1579:      * @return int
1580:      */
1581:     public function getNumberOfPages() {
1582:         return $this->_pages;
1583:     }
1584: 
1585:     /**
1586:      * Returns number of results
1587:      *
1588:      * @return int
1589:      */
1590:     public function getNumberOfResults() {
1591:         return $this->_results;
1592:     }
1593: 
1594:     /**
1595:      *
1596:      * @param int $art_id Id of an article
1597:      * @return int Similarity between searchword and matching word in article
1598:      */
1599:     public function getSimilarity($art_id) {
1600:         return $this->_searchResult[$art_id]['similarity'];
1601:     }
1602: 
1603:     /**
1604:      *
1605:      * @param int $art_id Id of an article
1606:      * @return number of matching searchwords found in article
1607:      */
1608:     public function getOccurrence($art_id) {
1609:         $aOccurence = $this->_searchResult[$art_id]['occurence'];
1610:         $iSumOfOccurence = 0;
1611:         for ($i = 0; $i < count($aOccurence); $i++) {
1612:             $iSumOfOccurence += $aOccurence[$i];
1613:         }
1614: 
1615:         return $iSumOfOccurence;
1616:     }
1617: 
1618:     /**
1619:      *
1620:      * @param string $rep1 The opening html-tag to emphasize the searchword e.g.
1621:      *        '<b>'
1622:      * @param string $rep2 The closing html-tag e.g. '</b>'
1623:      */
1624:     public function setReplacement($rep1, $rep2) {
1625:         if (strlen(trim($rep1)) > 0 && strlen(trim($rep2)) > 0) {
1626:             $this->_replacement[] = $rep1;
1627:             $this->_replacement[] = $rep2;
1628:         }
1629:     }
1630: 
1631:     /**
1632:      *
1633:      * @param int $artid
1634:      * @return int Category Id
1635:      * @todo Is not job of search, should be outsourced!
1636:      */
1637:     public function getArtCat($artid) {
1638:         $sql = "SELECT idcat FROM " . $this->cfg['tab']['cat_art'] . "
1639:                 WHERE idart = " . cSecurity::toInteger($artid) . " ";
1640:         $this->db->query($sql);
1641:         if ($this->db->nextRecord()) {
1642:             return $this->db->f('idcat');
1643:         }
1644:     }
1645: }
1646: 
CMS CONTENIDO 4.9.3 API documentation generated by ApiGen 2.8.0