Overview

Packages

  • CONTENIDO
  • Core
    • Authentication
    • Backend
    • Cache
    • CEC
    • Chain
    • ContentType
    • Database
    • Debug
    • Exception
    • Frontend
      • Search
      • URI
      • Util
    • GenericDB
      • Model
    • GUI
      • HTML
    • I18N
    • LayoutHandler
    • Log
    • Security
    • Session
    • Util
    • Validation
    • Versioning
    • XML
  • Module
    • ContentRssCreator
    • ContentSitemapHtml
    • ContentSitemapXml
    • ContentUserForum
    • NavigationTop
    • ScriptCookieDirective
  • mpAutoloaderClassMap
  • None
  • Plugin
    • ContentAllocation
    • CronjobOverview
    • FormAssistant
    • FrontendLogic
    • FrontendUsers
    • Linkchecker
    • ModRewrite
    • Newsletter
    • Repository
      • FrontendNavigation
      • KeywordDensity
    • SearchSolr
    • SmartyWrapper
    • UrlShortener
    • UserForum
    • Workflow
  • PluginManager
  • Setup
    • Form
    • GUI
    • Helper
      • Environment
      • Filesystem
      • MySQL
      • PHP
    • UpgradeJob
  • Smarty
    • Cacher
    • Compiler
    • Config
    • Debug
    • PluginsBlock
    • PluginsFilter
    • PluginsFunction
    • PluginsInternal
    • PluginsModifier
    • PluginsModifierCompiler
    • PluginsShared
    • Security
    • Template
    • TemplateResources
  • Swift
    • ByteStream
    • CharacterStream
    • Encoder
    • Events
    • KeyCache
    • Mailer
    • Mime
    • Plugins
    • Transport

Classes

  • Swift_FailoverTransport
  • Swift_LoadBalancedTransport
  • Swift_MailTransport
  • Swift_Plugins_Loggers_ArrayLogger
  • Swift_Plugins_Loggers_EchoLogger
  • Swift_SendmailTransport
  • Swift_SmtpTransport
  • Swift_Transport_AbstractSmtpTransport
  • Swift_Transport_Esmtp_Auth_CramMd5Authenticator
  • Swift_Transport_Esmtp_Auth_LoginAuthenticator
  • Swift_Transport_Esmtp_Auth_PlainAuthenticator
  • Swift_Transport_Esmtp_AuthHandler
  • Swift_Transport_EsmtpTransport
  • Swift_Transport_FailoverTransport
  • Swift_Transport_LoadBalancedTransport
  • Swift_Transport_MailTransport
  • Swift_Transport_SendmailTransport
  • Swift_Transport_SimpleMailInvoker
  • Swift_Transport_StreamBuffer

Interfaces

  • Swift_Plugins_Logger
  • Swift_Plugins_Pop_Pop3Exception
  • Swift_Transport
  • Swift_Transport_Esmtp_Authenticator
  • Swift_Transport_EsmtpHandler
  • Swift_Transport_IoBuffer
  • Swift_Transport_MailInvoker
  • Swift_Transport_SmtpAgent
  • Swift_TransportException
  • Overview
  • Package
  • Function
  • Todo
  • Download
  1: <?php
  2: 
  3: /**
  4:  *
  5:  * @package Plugin
  6:  * @subpackage SearchSolr
  7:  * @version SVN Revision $Rev:$
  8:  * @author marcus.gnass
  9:  * @copyright four for business AG
 10:  * @link http://www.4fb.de
 11:  */
 12: 
 13: // assert CONTENIDO framework
 14: defined('CON_FRAMEWORK') || die('Illegal call: Missing framework initialization - request aborted.');
 15: 
 16: /**
 17:  * This class sends update requests to a Solr core.
 18:  * If the request failed an
 19:  * exception is thrown. This class allows handling of more than one article at
 20:  * once.
 21:  *
 22:  * <server>:8080/solr/admin/cores?action=STATUS
 23:  * <server>:8080/solr/admin/cores?action=RENAME&core=collection1&other=contenido
 24:  * <server>:8080/solr/admin/cores?action=RELOAD&core=contenido
 25:  *
 26:  * @author marcus.gnass
 27:  */
 28: class SolrIndexer {
 29: 
 30:     /**
 31:      * @var bool
 32:      */
 33:     const DBG = false;
 34: 
 35:     /**
 36:      * Prefix to be used for Solr <uniqueKey> in order to distinguish docuemnts
 37:      * from different sources.
 38:      *
 39:      * @var string
 40:      */
 41:     const ID_PREFIX = 'contenido_article_';
 42: 
 43:     /**
 44:      *
 45:      * @var array of SolrClient
 46:      */
 47:     private $_solrClients = NULL;
 48: 
 49:     /**
 50:      * IDs of articles to be updated / added / deleted.
 51:      *
 52:      * @var array
 53:      */
 54:     private $_articleIds = array();
 55: 
 56:     /**
 57:      * CEC chain function for updating an article in the Solr core (index).
 58:      *
 59:      * This function is intended to be called after storing an article.
 60:      * This function will delete and eventually add the given article from/to
 61:      * the SOLR index. Adding will only be performed when the article should be
 62:      * indexed. Removal will always be performed, even when the article is not
 63:      * indexable, but it might have been indexed before!
 64:      *
 65:      * include.con_editcontent.php
 66:      *
 67:      * @param int $idartlang of article to be updated
 68:      */
 69:     public static function handleStoringOfArticle(array $newData, array $oldData) {
 70: 
 71:         // get IDs of given article language
 72:         if (cRegistry::getArticleLanguageId() == $newData['idartlang']) {
 73:             // quite easy if given article is current article
 74:             $idclient = cRegistry::getClientId();
 75:             $idlang = cRegistry::getLanguageId();
 76:             $idcat = cRegistry::getCategoryId();
 77:             $idart = cRegistry::getArticleId();
 78:             $idcatlang = cRegistry::getCategoryLanguageId();
 79:             $idartlang = cRegistry::getArticleLanguageId();
 80:         } else {
 81:             // == for other articles these infos have to be read from DB
 82:             // get idclient by idart
 83:             $article = new cApiArticle($newData['idart']);
 84:             if ($article->isLoaded()) {
 85:                 $idclient = $article->get('idclient');
 86:             }
 87:             // get idlang by idartlang
 88:             $articleLanguage = new cApiArticleLanguage($newData['idartlang']);
 89:             if ($articleLanguage->isLoaded()) {
 90:                 $idlang = $articleLanguage->get('idlang');
 91:             }
 92:             // get first idcat by idart
 93:             $coll = new cApiCategoryArticleCollection();
 94:             $idcat = array_shift($coll->getCategoryIdsByArticleId($newData['idart']));
 95:             // get idcatlang by idcat & idlang
 96:             $categoryLanguage = new cApiCategoryLanguage();
 97:             $categoryLanguage->loadByCategoryIdAndLanguageId($idcat, $idlang);
 98:             if ($categoryLanguage->isLoaded()) {
 99:                 $idcatlang = $articleLanguage->get('idlang');
100:             }
101:         }
102: 
103:         self::handleStoringOfContentEntry(array(
104:             'idclient' => $idclient,
105:             'idlang' => $idlang,
106:             'idcat' => $idcat,
107:             'idcatlang' => $idcatlang,
108:             'idart' => $idart,
109:             'idartlang' => $idartlang
110:         ));
111:     }
112: 
113:     /**
114:      * CEC chain function for updating an article in the Solr core (index).
115:      *
116:      * This function is intended to be called after storing an article.
117:      * This function will delete and eventually add the given article from/to
118:      * the SOLR index. Adding will only be performed when the article should be
119:      * indexed. Removal will always be performed, even when the article is not
120:      * indexable, but it might have been indexed before!
121:      *
122:      * include.con_editcontent.php
123:      *
124:      * @param int $idartlang of article to be updated
125:      */
126:     public static function handleStoringOfContentEntry(array $articleIds) {
127:         try {
128:             // build indexer instance
129:             $indexer = new self(array(
130:                 $articleIds
131:             ));
132:             // update given articles
133:             $indexer->updateArticles();
134:         } catch (cException $e) {
135:             $lvl = $e instanceof SolrWarning? cGuiNotification::LEVEL_WARNING : cGuiNotification::LEVEL_ERROR;
136:             $note = new cGuiNotification();
137:             $note->displayNotification($lvl, $e->getMessage());
138:         }
139: 
140:         // destroy indexer to free mem
141:         unset($indexer);
142:     }
143: 
144:     /**
145:      * Create client instance (connect to Apache Solr) and aggregate it.
146:      *
147:      * @param array $articleIds IDs of articles to be handled
148:      */
149:     public function __construct(array $articleIds) {
150:         $this->_articleIds = $articleIds;
151:     }
152: 
153:     /**
154:      * Destroy aggregated client instance.
155:      *
156:      * Destroys Solr client to free memory. Is this really neccessary?
157:      * As SolClient has a method __destruct() this seems to be correct.
158:      */
159:     public function __destruct() {
160:         foreach ($this->_solrClients as $key => $client) {
161:             unset($this->_solrClients[$key]);
162:         }
163:     }
164: 
165:     /**
166:      *
167:      * @param int $idclient
168:      * @param int $idlang
169:      * @return SolrClient
170:      */
171:     private function _getSolrClient($idclient, $idlang) {
172: 
173:         if (!isset($this->_solrClients[$idclient][$idlang])) {
174:             $opt = Solr::getClientOptions($idclient, $idlang);
175:             Solr::validateClientOptions($opt);
176:             $this->_solrClients[$idclient][$idlang] = new SolrClient($opt);
177:         }
178: 
179:         return $this->_solrClients[$idclient][$idlang];
180:     }
181: 
182:     /**
183:      * If the current articles are indexable for each article a new index
184:      * document will be created and filled with its content and eventually
185:      * be added to the index.
186:      *
187:      * @throws cException if Solr add request failed
188:      */
189:     public function addArticles() {
190: 
191:         $toAdd = array();
192:         foreach ($this->_articleIds as $articleIds) {
193: 
194:             // skip if article should not be indexed
195:             if (!$this->_isIndexable($articleIds['idartlang'])) {
196:                 continue;
197:             }
198: 
199:             if (!isset($toAdd[$articleIds['idlang']])) {
200:                 $toAdd[$articleIds['idlang']] = array(
201:                     'idclient' => $articleIds['idclient'],
202:                     'documents' => array()
203:                 );
204:             }
205: 
206:             // get article content to be indexed
207:             $articleContent = $this->_getContent($articleIds['idartlang']);
208: 
209:             // create input document
210:             $solrInputDocument = new SolrInputDocument();
211:             $solrInputDocument->addField('source', 'contenido_article');
212:             $solrInputDocument->addField('url', cUri::getInstance()->build(array(
213:                 'idart' => $articleIds['idart'],
214:                 'lang' => $articleIds['idlang']
215:             )));
216:             $solrInputDocument->addField('id', self::ID_PREFIX . $articleIds['idartlang']);
217:             // $solrInputDocument->addField('raise_exception', 'uncomment this
218:             // to raise an exception');
219:             // add IDs
220:             $solrInputDocument->addField('id_client', $articleIds['idclient']);
221:             $solrInputDocument->addField('id_lang', $articleIds['idlang']);
222:             $solrInputDocument->addField('id_cat', $articleIds['idcat']);
223:             $solrInputDocument->addField('id_art', $articleIds['idart']);
224:             $solrInputDocument->addField('id_cat_lang', $articleIds['idcatlang']);
225:             $solrInputDocument->addField('id_art_lang', $articleIds['idartlang']);
226: 
227:             // add content one by one
228:             foreach ($articleContent as $type => $typeContent) {
229: 
230:                 // field names in Solr should always be lowercase!
231:                 $type = strtolower($type);
232: 
233:                 // == sort content of a certain content type by their typeids
234:                 // This is important so that the most prominent headline can be
235:                 // displayed first.
236:                 ksort($typeContent);
237: 
238:                 // add each content entry seperatly (content type fields are
239:                 // defined as multiValued)
240:                 foreach ($typeContent as $typeid => $contentEntry) {
241:                     $contentEntry = trim($contentEntry);
242:                     if (0 < strlen($contentEntry)) {
243:                         $solrInputDocument->addField($type, $contentEntry);
244:                     }
245:                 }
246:             }
247: 
248:             if (isset($articleContent['CMS_IMGEDITOR'])) {
249:                 foreach ($articleContent['CMS_IMGEDITOR'] as $typeid => $idupl) {
250:                     if (0 == strlen($idupl)) {
251:                         continue;
252:                     }
253:                     $image = $this->_getImageUrlByIdupl($idupl);
254:                     if (false === $image) {
255:                         //Util::log("skipped \$idupl: $idupl");
256:                         continue;
257:                     }
258:                     $solrInputDocument->addField('images', $image);
259:                 }
260:             }
261: 
262:             array_push($toAdd[$articleIds['idlang']]['documents'], $solrInputDocument);
263: 
264:         }
265: 
266:         // add and commit documents and then optimze index
267:         foreach ($toAdd as $idlang => $data) {
268:             try {
269:                 $solrClient = $this->_getSolrClient($data['idclient'], $idlang);
270:                 if (self::DBG) {
271:                     error_log('# addArticles #');
272:                     error_log('idclient: ' . $data['idclient']);
273:                     error_log('idlang: ' . $idlang);
274:                     error_log('config: ' . print_r($solrClient->getOptions(), 1));
275:                     error_log('#documents: ' . count($data['documents']));
276:                 } else {
277:                     @$solrClient->addDocuments($data['documents']);
278:                     // @$solrClient->commit();
279:                     // @$solrClient->optimize();
280:                 }
281:             } catch (Exception $e) {
282:                 // log exception
283:                 Solr::log($e);
284:                 // rethrow as cException
285:                 throw new cException('article could not be deleted from index', 0, $e);
286:             }
287:         }
288: 
289:     }
290: 
291:     /**
292:      */
293:     private function _getImageUrlByIdupl($idupl) {
294:         $upload = new cApiUpload($idupl);
295: 
296:         if (false === $upload->isLoaded()) {
297:             return false;
298:         }
299: 
300:         $idclient = $upload->get('idclient');
301:         $dirname = $upload->get('dirname');
302:         $filename = $upload->get('filename');
303: 
304:         $clientConfig = cRegistry::getClientConfig($idclient);
305:         $image = $clientConfig['upl']['htmlpath'] . $dirname . $filename;
306: 
307:         return $image;
308:     }
309: 
310:     /**
311:      * Delete all CONTENIDO article documents that are aggregated as
312:      * $this->_articleIds.
313:      *
314:      * @throws SolrClientException if Solr delete request failed
315:      */
316:     public function deleteArticles() {
317:         $toDelete = array();
318:         foreach ($this->_articleIds as $articleIds) {
319:             if (!isset($toDelete[$articleIds['idlang']])) {
320:                 $toDelete[$articleIds['idlang']] = array(
321:                     'idclient' => $articleIds['idclient'],
322:                     'idartlangs' => array()
323:                 );
324:             }
325:             $key = self::ID_PREFIX . strval($articleIds['idartlang']);
326:             array_push($toDelete[$articleIds['idlang']]['idartlangs'], $key);
327:         }
328:         foreach ($toDelete as $idlang => $data) {
329:             try {
330:                 $solrClient = $this->_getSolrClient($data['idclient'], $idlang);
331:                 if (self::DBG) {
332:                     error_log('# deleteArticles #');
333:                     error_log('idclient: ' . $data['idclient']);
334:                     error_log('idlang: ' . $idlang);
335:                     error_log('config: ' . print_r($solrClient->getOptions(), 1));
336:                     error_log('#idartlangs: ' . count($data['idartlangs']));
337:                     error_log('idartlangs: ' . print_r($data['idartlangs'], 1));
338:                 } else {
339:                     $solrClient->deleteByIds($data['idartlangs']);
340:                     // @$solrClient->commit();
341:                 }
342:             } catch (Exception $e) {
343:                 // log exception
344:                 Solr::log($e);
345:                 // rethrow as cException
346:                 throw new cException('article could not be deleted from index', 0, $e);
347:             }
348:         }
349:     }
350: 
351:     /**
352:      *
353:      * @throws cException if Solr delete request failed
354:      */
355:     public function updateArticles() {
356: 
357:         // Always delete articles from index, even if article should not be
358:         // indexed it might have been indexed before
359:         // What happens if an article could not be deleted cause it was not
360:         // indexed before? does this throw an exception? if yes an article
361:         // could never been indexed!
362:         try {
363:             $this->deleteArticles();
364:         } catch (cException $e) {
365:             // ignore exception so that articles can be indexed nonetheless
366:         }
367: 
368:         // add articles to index
369:         // will be skipped if article is not indexable
370:         $this->addArticles();
371:     }
372: 
373:     /**
374:      * An article is indexable if it is online and searchable.
375:      *
376:      * Articles that are hidden due to a protected category are indexable. The
377:      * searcher is responsible for making sure these aticles are only displayed
378:      * to privileged users.
379:      *
380:      * @param int $idartlang of article to be checked
381:      * @return bool
382:      */
383:     private function _isIndexable($idartlang) {
384: 
385:         // What about time managment?
386:         $articleLanguage = new cApiArticleLanguage($idartlang);
387:         if (!$articleLanguage->isLoaded()) {
388:             return false;
389:         } else if (1 != $articleLanguage->get('online')) {
390:             return false;
391:         } else if (1 != $articleLanguage->get('searchable')) {
392:             return false;
393:         } else {
394:             return true;
395:         }
396:     }
397: 
398:     /**
399:      *
400:      * @param int $idartlang of article to be read
401:      * @return array
402:      */
403:     private function _getContent($idartlang) {
404: 
405:         // 'CMS_IMG', 'CMS_LINK', 'CMS_LINKTARGET', 'CMS_SWF'
406:         $cms = "'CMS_HTMLHEAD','CMS_HTML','CMS_TEXT','CMS_IMGDESCR',"
407:             . "'CMS_LINKDESCR','CMS_HEAD','CMS_LINKTITLE','CMS_LINKEDIT',"
408:             . "'CMS_RAWLINK','CMS_IMGEDIT','CMS_IMGTITLE','CMS_SIMPLELINKEDIT',"
409:             . "'CMS_HTMLTEXT','CMS_EASYIMGEDIT','CMS_DATE','CMS_TEASER',"
410:             . "'CMS_FILELIST','CMS_IMGEDITOR','CMS_LINKEDITOR','CMS_PIFAFORM'";
411: 
412:         // exclude certain content types from indexing
413:         // like in conMakeArticleIndex & conGenerateKeywords
414:         $db = cRegistry::getDb();
415:         $db->query("-- SolrIndexer->_getContent()
416:             SELECT
417:                 con_type.type
418:                 , con_content.typeid
419:                 , con_content.value
420:             FROM
421:                 con_content
422:             INNER JOIN
423:                 con_type
424:             ON
425:                 con_content.idtype = con_type.idtype
426:             WHERE
427:                 con_content.idartlang = $idartlang
428:                 AND con_type.type IN ($cms)
429:             ORDER BY
430:                 con_content.idtype
431:                 , con_content.typeid
432:             ;");
433: 
434:         $content = array();
435:         while (false !== $db->nextRecord()) {
436:             $value = $db->f('value');
437:             //$value = utf8_encode($value);
438:             $value = strip_tags($value);
439:             //$value = html_entity_decode($value);
440:             $value = html_entity_decode($value, ENT_QUOTES, 'UTF-8');
441:             $value = trim($value);
442: 
443:             $content[$db->f('type')][$db->f('typeid')] = $value;
444:         }
445: 
446:         // TODO check first alternative:
447:         // cInclude('includes', 'functions.con.php');
448:         // $content = conGetContentFromArticle($this->_idartlang);
449:         // TODO check second alternative:
450:         // $articleLanguage = new cApiArticleLanguage($this->_idartlang);
451:         // if (!$articleLanguage->isLoaded()) {
452:         // throw new cException('article could not be loaded');
453:         // }
454:         // $content = $articleLanguage->getContent();
455: 
456:         return $content;
457:     }
458: 
459:     /**
460:      *
461:      * @param SolrResponse $solrResponse
462:      * @throws cException if Solr update request failed
463:      */
464:     private function _checkResponse(SolrResponse $solrResponse, $msg = 'Solr update request failed') {
465:         $response = $solrResponse->getResponse();
466: 
467:         // SolrResponse::getDigestedResponse — Returns the XML response as
468:         // serialized PHP data
469:         // SolrResponse::getHttpStatus — Returns the HTTP status of the response
470:         // SolrResponse::getHttpStatusMessage — Returns more details on the HTTP
471:         // status
472:         // SolrResponse::getRawRequest — Returns the raw request sent to the
473:         // Solr server
474:         // SolrResponse::getRawRequestHeaders — Returns the raw request headers
475:         // sent to the Solr server
476:         // SolrResponse::getRawResponse — Returns the raw response from the
477:         // server
478:         // SolrResponse::getRawResponseHeaders — Returns the raw response
479:         // headers from the server
480:         // SolrResponse::getRequestUrl — Returns the full URL the request was
481:         // sent to
482:         // SolrResponse::getResponse — Returns a SolrObject representing the XML
483:         // response from the server
484:         // SolrResponse::setParseMode — Sets the parse mode
485:         // SolrResponse::success — Was the request a success
486: 
487:         if (0 != $response->status) {
488:             throw new cException($msg);
489:         }
490:     }
491: }
492: 
CMS CONTENIDO 4.9.7 API documentation generated by ApiGen