Overview

Packages

  • CONTENIDO
  • Core
    • Authentication
    • Backend
    • Cache
    • CEC
    • Chain
    • ContentType
    • Database
    • Debug
    • Exception
    • Frontend
      • Search
      • URI
      • Util
    • GenericDB
      • Model
    • GUI
      • HTML
    • I18N
    • LayoutHandler
    • Log
    • Security
    • Session
    • Util
    • Validation
    • Versioning
    • XML
  • Module
    • ContentRssCreator
    • ContentSitemapHtml
    • ContentSitemapXml
    • ContentUserForum
    • NavigationTop
    • ScriptCookieDirective
  • mpAutoloaderClassMap
  • None
  • Plugin
    • ContentAllocation
    • CronjobOverview
    • FormAssistant
    • FrontendLogic
    • FrontendUsers
    • Linkchecker
    • ModRewrite
    • Newsletter
    • Repository
      • FrontendNavigation
      • KeywordDensity
    • SearchSolr
    • SmartyWrapper
    • UrlShortener
    • UserForum
    • Workflow
  • PluginManager
  • Setup
    • Form
    • GUI
    • Helper
      • Environment
      • Filesystem
      • MySQL
      • PHP
    • UpgradeJob

Classes

  • Solr
  • SolrIndexer
  • SolrSearcherAbstract
  • SolrSearcherSimple
  • SolrSearchModule

Exceptions

  • SolrException
  • Overview
  • Package
  • Class
  • Tree
  • Deprecated
  • Todo
  1: <?php
  2: 
  3: /**
  4:  *
  5:  * @package Plugin
  6:  * @subpackage SearchSolr
  7:  * @author Marcus Gnaß <marcus.gnass@4fb.de>
  8:  * @copyright four for business AG
  9:  * @link http://www.4fb.de
 10:  */
 11: 
 12: // assert CONTENIDO framework
 13: defined('CON_FRAMEWORK') || die('Illegal call: Missing framework initialization - request aborted.');
 14: 
 15: /**
 16:  * This class sends update requests to a Solr core.
 17:  * If the request failed an
 18:  * exception is thrown. This class allows handling of more than one article at
 19:  * once.
 20:  *
 21:  * <server>:8080/solr/admin/cores?action=STATUS
 22:  * <server>:8080/solr/admin/cores?action=RENAME&core=collection1&other=contenido
 23:  * <server>:8080/solr/admin/cores?action=RELOAD&core=contenido
 24:  *
 25:  * @author Marcus Gnaß <marcus.gnass@4fb.de>
 26:  */
 27: class SolrIndexer {
 28: 
 29:     /**
 30:      * @var bool
 31:      */
 32:     const DBG = false;
 33: 
 34:     /**
 35:      * Prefix to be used for Solr <uniqueKey> in order to distinguish docuemnts
 36:      * from different sources.
 37:      *
 38:      * @var string
 39:      */
 40:     const ID_PREFIX = 'contenido_article_';
 41: 
 42:     /**
 43:      *
 44:      * @var array of SolrClient
 45:      */
 46:     private $_solrClients = NULL;
 47: 
 48:     /**
 49:      * IDs of articles to be updated / added / deleted.
 50:      *
 51:      * @var array
 52:      */
 53:     private $_articleIds = array();
 54: 
 55:     /**
 56:      * CEC chain function for updating an article in the Solr core (index).
 57:      *
 58:      * This function is intended to be called after storing an article.
 59:      * This function will delete and eventually add the given article from/to
 60:      * the SOLR index. Adding will only be performed when the article should be
 61:      * indexed. Removal will always be performed, even when the article is not
 62:      * indexable, but it might have been indexed before!
 63:      *
 64:      * include.con_editcontent.php
 65:      *
 66:      * @param int $idartlang of article to be updated
 67:      */
 68:     public static function handleStoringOfArticle(array $newData, array $oldData) {
 69: 
 70:         // get IDs of given article language
 71:         if (cRegistry::getArticleLanguageId() == $newData['idartlang']) {
 72:             // quite easy if given article is current article
 73:             $idclient = cRegistry::getClientId();
 74:             $idlang = cRegistry::getLanguageId();
 75:             $idcat = cRegistry::getCategoryId();
 76:             $idart = cRegistry::getArticleId();
 77:             $idcatlang = cRegistry::getCategoryLanguageId();
 78:             $idartlang = cRegistry::getArticleLanguageId();
 79:         } else {
 80:             // == for other articles these infos have to be read from DB
 81:             // get idclient by idart
 82:             $article = new cApiArticle($newData['idart']);
 83:             if ($article->isLoaded()) {
 84:                 $idclient = $article->get('idclient');
 85:             }
 86:             // get idlang by idartlang
 87:             $articleLanguage = new cApiArticleLanguage($newData['idartlang']);
 88:             if ($articleLanguage->isLoaded()) {
 89:                 $idlang = $articleLanguage->get('idlang');
 90:             }
 91:             // get first idcat by idart
 92:             $coll = new cApiCategoryArticleCollection();
 93:             $idcat = array_shift($coll->getCategoryIdsByArticleId($newData['idart']));
 94:             // get idcatlang by idcat & idlang
 95:             $categoryLanguage = new cApiCategoryLanguage();
 96:             $categoryLanguage->loadByCategoryIdAndLanguageId($idcat, $idlang);
 97:             if ($categoryLanguage->isLoaded()) {
 98:                 $idcatlang = $articleLanguage->get('idlang');
 99:             }
100:         }
101: 
102:         self::handleStoringOfContentEntry(array(
103:             'idclient' => $idclient,
104:             'idlang' => $idlang,
105:             'idcat' => $idcat,
106:             'idcatlang' => $idcatlang,
107:             'idart' => $idart,
108:             'idartlang' => $idartlang
109:         ));
110:     }
111: 
112:     /**
113:      * CEC chain function for updating an article in the Solr core (index).
114:      *
115:      * This function is intended to be called after storing an article.
116:      * This function will delete and eventually add the given article from/to
117:      * the SOLR index. Adding will only be performed when the article should be
118:      * indexed. Removal will always be performed, even when the article is not
119:      * indexable, but it might have been indexed before!
120:      *
121:      * include.con_editcontent.php
122:      *
123:      * @param int $idartlang of article to be updated
124:      */
125:     public static function handleStoringOfContentEntry(array $articleIds) {
126:         try {
127:             // build indexer instance
128:             $indexer = new self(array(
129:                 $articleIds
130:             ));
131:             // update given articles
132:             $indexer->updateArticles();
133:         } catch (cException $e) {
134:             $lvl = $e instanceof SolrWarning? cGuiNotification::LEVEL_WARNING : cGuiNotification::LEVEL_ERROR;
135:             $note = new cGuiNotification();
136:             $note->displayNotification($lvl, $e->getMessage());
137:         }
138: 
139:         // destroy indexer to free mem
140:         unset($indexer);
141:     }
142: 
143:     /**
144:      * Create client instance (connect to Apache Solr) and aggregate it.
145:      *
146:      * @param array $articleIds IDs of articles to be handled
147:      */
148:     public function __construct(array $articleIds) {
149:         $this->_articleIds = $articleIds;
150:     }
151: 
152:     /**
153:      * Destroy aggregated client instance.
154:      *
155:      * Destroys Solr client to free memory. Is this really necessary?
156:      * As SolClient has a method __destruct() this seems to be correct.
157:      */
158:     public function __destruct() {
159:         foreach ($this->_solrClients as $key => $client) {
160:             unset($this->_solrClients[$key]);
161:         }
162:     }
163: 
164:     /**
165:      *
166:      * @param int $idclient
167:      * @param int $idlang
168:      * @return SolrClient
169:      */
170:     private function _getSolrClient($idclient, $idlang) {
171: 
172:         if (!isset($this->_solrClients[$idclient][$idlang])) {
173:             $opt = Solr::getClientOptions($idclient, $idlang);
174:             Solr::validateClientOptions($opt);
175:             $this->_solrClients[$idclient][$idlang] = new SolrClient($opt);
176:         }
177: 
178:         return $this->_solrClients[$idclient][$idlang];
179:     }
180: 
181:     /**
182:      * If the current articles are indexable for each article a new index
183:      * document will be created and filled with its content and eventually
184:      * be added to the index.
185:      *
186:      * @throws cException if Solr add request failed
187:      */
188:     public function addArticles() {
189: 
190:         $toAdd = array();
191:         foreach ($this->_articleIds as $articleIds) {
192: 
193:             // skip if article should not be indexed
194:             if (!$this->_isIndexable($articleIds['idartlang'])) {
195:                 continue;
196:             }
197: 
198:             if (!isset($toAdd[$articleIds['idlang']])) {
199:                 $toAdd[$articleIds['idlang']] = array(
200:                     'idclient' => $articleIds['idclient'],
201:                     'documents' => array()
202:                 );
203:             }
204: 
205:             // get article content to be indexed
206:             $articleContent = $this->_getContent($articleIds['idartlang']);
207: 
208:             // create input document
209:             $solrInputDocument = new SolrInputDocument();
210:             $solrInputDocument->addField('source', 'contenido_article');
211:             $solrInputDocument->addField('url', cUri::getInstance()->build(array(
212:                 'idart' => $articleIds['idart'],
213:                 'lang' => $articleIds['idlang']
214:             )));
215:             $solrInputDocument->addField('id', self::ID_PREFIX . $articleIds['idartlang']);
216:             // $solrInputDocument->addField('raise_exception', 'uncomment this
217:             // to raise an exception');
218:             // add IDs
219:             $solrInputDocument->addField('id_client', $articleIds['idclient']);
220:             $solrInputDocument->addField('id_lang', $articleIds['idlang']);
221:             $solrInputDocument->addField('id_cat', $articleIds['idcat']);
222:             $solrInputDocument->addField('id_art', $articleIds['idart']);
223:             $solrInputDocument->addField('id_cat_lang', $articleIds['idcatlang']);
224:             $solrInputDocument->addField('id_art_lang', $articleIds['idartlang']);
225: 
226:             // add content one by one
227:             foreach ($articleContent as $type => $typeContent) {
228: 
229:                 // field names in Solr should always be lowercase!
230:                 $type = strtolower($type);
231: 
232:                 // == sort content of a certain content type by their typeids
233:                 // This is important so that the most prominent headline can be
234:                 // displayed first.
235:                 ksort($typeContent);
236: 
237:                 // add each content entry seperatly (content type fields are
238:                 // defined as multiValued)
239:                 foreach ($typeContent as $typeid => $contentEntry) {
240:                     $contentEntry = trim($contentEntry);
241:                     if (0 < strlen($contentEntry)) {
242:                         $solrInputDocument->addField($type, $contentEntry);
243:                     }
244:                 }
245:             }
246: 
247:             if (isset($articleContent['CMS_IMGEDITOR'])) {
248:                 foreach ($articleContent['CMS_IMGEDITOR'] as $typeid => $idupl) {
249:                     if (0 == strlen($idupl)) {
250:                         continue;
251:                     }
252:                     $image = $this->_getImageUrlByIdupl($idupl);
253:                     if (false === $image) {
254:                         //Util::log("skipped \$idupl: $idupl");
255:                         continue;
256:                     }
257:                     $solrInputDocument->addField('images', $image);
258:                 }
259:             }
260: 
261:             array_push($toAdd[$articleIds['idlang']]['documents'], $solrInputDocument);
262: 
263:         }
264: 
265:         // add and commit documents and then optimze index
266:         foreach ($toAdd as $idlang => $data) {
267:             try {
268:                 $solrClient = $this->_getSolrClient($data['idclient'], $idlang);
269:                 if (self::DBG) {
270:                     error_log('# addArticles #');
271:                     error_log('idclient: ' . $data['idclient']);
272:                     error_log('idlang: ' . $idlang);
273:                     error_log('config: ' . print_r($solrClient->getOptions(), 1));
274:                     error_log('#documents: ' . count($data['documents']));
275:                 } else {
276:                     @$solrClient->addDocuments($data['documents']);
277:                     // @$solrClient->commit();
278:                     // @$solrClient->optimize();
279:                 }
280:             } catch (Exception $e) {
281:                 // log exception
282:                 Solr::log($e);
283:                 // rethrow as cException
284:                 throw new cException('article could not be deleted from index', 0, $e);
285:             }
286:         }
287: 
288:     }
289: 
290:     /**
291:      * Gets path to upload.
292:      *
293:      * @param int $idupl
294:      */
295:     private function _getImageUrlByIdupl($idupl) {
296:         $upload = new cApiUpload($idupl);
297: 
298:         if (false === $upload->isLoaded()) {
299:             return false;
300:         }
301: 
302:         $idclient = $upload->get('idclient');
303:         $dirname = $upload->get('dirname');
304:         $filename = $upload->get('filename');
305: 
306:         $clientConfig = cRegistry::getClientConfig($idclient);
307:         $image = $clientConfig['upl']['htmlpath'] . $dirname . $filename;
308: 
309:         return $image;
310:     }
311: 
312:     /**
313:      * Delete all CONTENIDO article documents that are aggregated as
314:      * $this->_articleIds.
315:      *
316:      * @throws SolrClientException if Solr delete request failed
317:      */
318:     public function deleteArticles() {
319:         $toDelete = array();
320:         foreach ($this->_articleIds as $articleIds) {
321:             if (!isset($toDelete[$articleIds['idlang']])) {
322:                 $toDelete[$articleIds['idlang']] = array(
323:                     'idclient' => $articleIds['idclient'],
324:                     'idartlangs' => array()
325:                 );
326:             }
327:             $key = self::ID_PREFIX . strval($articleIds['idartlang']);
328:             array_push($toDelete[$articleIds['idlang']]['idartlangs'], $key);
329:         }
330:         foreach ($toDelete as $idlang => $data) {
331:             try {
332:                 $solrClient = $this->_getSolrClient($data['idclient'], $idlang);
333:                 if (self::DBG) {
334:                     error_log('# deleteArticles #');
335:                     error_log('idclient: ' . $data['idclient']);
336:                     error_log('idlang: ' . $idlang);
337:                     error_log('config: ' . print_r($solrClient->getOptions(), 1));
338:                     error_log('#idartlangs: ' . count($data['idartlangs']));
339:                     error_log('idartlangs: ' . print_r($data['idartlangs'], 1));
340:                 } else {
341:                     $solrClient->deleteByIds($data['idartlangs']);
342:                     // @$solrClient->commit();
343:                 }
344:             } catch (Exception $e) {
345:                 // log exception
346:                 Solr::log($e);
347:                 // rethrow as cException
348:                 throw new cException('article could not be deleted from index', 0, $e);
349:             }
350:         }
351:     }
352: 
353:     /**
354:      *
355:      * @throws cException if Solr delete request failed
356:      */
357:     public function updateArticles() {
358: 
359:         // Always delete articles from index, even if article should not be
360:         // indexed it might have been indexed before
361:         // What happens if an article could not be deleted cause it was not
362:         // indexed before? does this throw an exception? if yes an article
363:         // could never been indexed!
364:         try {
365:             $this->deleteArticles();
366:         } catch (cException $e) {
367:             // ignore exception so that articles can be indexed nonetheless
368:         }
369: 
370:         // add articles to index
371:         // will be skipped if article is not indexable
372:         $this->addArticles();
373:     }
374: 
375:     /**
376:      * An article is indexable if it is online and searchable.
377:      *
378:      * Articles that are hidden due to a protected category are indexable. The
379:      * searcher is responsible for making sure these aticles are only displayed
380:      * to privileged users.
381:      *
382:      * @param int $idartlang of article to be checked
383:      * @return bool
384:      */
385:     private function _isIndexable($idartlang) {
386: 
387:         // What about time managment?
388:         $articleLanguage = new cApiArticleLanguage($idartlang);
389:         if (!$articleLanguage->isLoaded()) {
390:             return false;
391:         } else if (1 != $articleLanguage->get('online')) {
392:             return false;
393:         } else if (1 != $articleLanguage->get('searchable')) {
394:             return false;
395:         } else {
396:             return true;
397:         }
398:     }
399: 
400:     /**
401:      *
402:      * @param int $idartlang of article to be read
403:      * @return array
404:      */
405:     private function _getContent($idartlang) {
406: 
407:         // 'CMS_IMG', 'CMS_LINK', 'CMS_LINKTARGET', 'CMS_SWF'
408:         $cms = "'CMS_HTMLHEAD','CMS_HTML','CMS_TEXT','CMS_IMGDESCR',"
409:             . "'CMS_LINKDESCR','CMS_HEAD','CMS_LINKTITLE','CMS_LINKEDIT',"
410:             . "'CMS_RAWLINK','CMS_IMGEDIT','CMS_IMGTITLE','CMS_SIMPLELINKEDIT',"
411:             . "'CMS_HTMLTEXT','CMS_EASYIMGEDIT','CMS_DATE','CMS_TEASER',"
412:             . "'CMS_FILELIST','CMS_IMGEDITOR','CMS_LINKEDITOR','CMS_PIFAFORM'";
413: 
414:         // exclude certain content types from indexing
415:         // like in conMakeArticleIndex & conGenerateKeywords
416:         $db = cRegistry::getDb();
417:         $db->query("-- SolrIndexer->_getContent()
418:             SELECT
419:                 con_type.type
420:                 , con_content.typeid
421:                 , con_content.value
422:             FROM
423:                 con_content
424:             INNER JOIN
425:                 con_type
426:             ON
427:                 con_content.idtype = con_type.idtype
428:             WHERE
429:                 con_content.idartlang = $idartlang
430:                 AND con_type.type IN ($cms)
431:             ORDER BY
432:                 con_content.idtype
433:                 , con_content.typeid
434:             ;");
435: 
436:         $content = array();
437:         while (false !== $db->nextRecord()) {
438:             $value = $db->f('value');
439:             //$value = utf8_encode($value);
440:             $value = strip_tags($value);
441:             //$value = html_entity_decode($value);
442:             $value = html_entity_decode($value, ENT_QUOTES, 'UTF-8');
443:             $value = trim($value);
444: 
445:             $content[$db->f('type')][$db->f('typeid')] = $value;
446:         }
447: 
448:         // TODO check first alternative:
449:         // cInclude('includes', 'functions.con.php');
450:         // $content = conGetContentFromArticle($this->_idartlang);
451:         // TODO check second alternative:
452:         // $articleLanguage = new cApiArticleLanguage($this->_idartlang);
453:         // if (!$articleLanguage->isLoaded()) {
454:         // throw new cException('article could not be loaded');
455:         // }
456:         // $content = $articleLanguage->getContent();
457: 
458:         return $content;
459:     }
460: 
461:     /**
462:      *
463:      * @param SolrResponse $solrResponse
464:      * @throws cException if Solr update request failed
465:      */
466:     private function _checkResponse(SolrResponse $solrResponse, $msg = 'Solr update request failed') {
467:         $response = $solrResponse->getResponse();
468: 
469:         // SolrResponse::getDigestedResponse — Returns the XML response as
470:         // serialized PHP data
471:         // SolrResponse::getHttpStatus — Returns the HTTP status of the response
472:         // SolrResponse::getHttpStatusMessage — Returns more details on the HTTP
473:         // status
474:         // SolrResponse::getRawRequest — Returns the raw request sent to the
475:         // Solr server
476:         // SolrResponse::getRawRequestHeaders — Returns the raw request headers
477:         // sent to the Solr server
478:         // SolrResponse::getRawResponse — Returns the raw response from the
479:         // server
480:         // SolrResponse::getRawResponseHeaders — Returns the raw response
481:         // headers from the server
482:         // SolrResponse::getRequestUrl — Returns the full URL the request was
483:         // sent to
484:         // SolrResponse::getResponse — Returns a SolrObject representing the XML
485:         // response from the server
486:         // SolrResponse::setParseMode — Sets the parse mode
487:         // SolrResponse::success — Was the request a success
488: 
489:         if (0 != $response->status) {
490:             throw new cException($msg);
491:         }
492:     }
493: }
494: 
CMS CONTENIDO 4.9.11 API documentation generated by ApiGen 2.8.0