Overview

Packages

  • Core
    • Authentication
    • Backend
    • Cache
    • CEC
    • Chain
    • ContentType
    • Database
    • Datatype
    • Debug
    • Exception
    • Frontend
      • Search
      • URI
      • Util
    • GenericDB
      • Model
    • GUI
      • HTML
    • I18N
    • LayoutHandler
    • Log
    • Security
    • Session
    • Util
    • Validation
    • Versioning
    • XML
  • Module
    • ContentSitemapHtml
    • ContentSitemapXml
    • ContentUserForum
    • NavigationMain
    • NavigationTop
  • mpAutoloaderClassMap
  • None
  • Plugin
    • ContentAllocation
    • CronjobOverview
    • FormAssistant
    • FrontendLogic
    • FrontendUsers
    • Linkchecker
    • ModRewrite
    • Newsletter
    • Repository
      • FrontendNavigation
      • KeywordDensity
    • SearchSolr
    • SmartyWrapper
    • UrlShortener
    • UserForum
    • Workflow
  • PluginManager
  • Setup
    • Form
    • GUI
    • Helper
      • Environment
      • Filesystem
      • MySQL
      • PHP
    • UpgradeJob

Classes

  • Solr
  • SolrIndexer
  • SolrSearcherAbstract
  • SolrSearcherSimple
  • SolrSearchModule
  • Overview
  • Package
  • Class
  • Tree
  • Deprecated
  • Todo
  1: <?php
  2: 
  3: /**
  4:  *
  5:  * @package Plugin
  6:  * @subpackage SearchSolr
  7:  * @version SVN Revision $Rev:$
  8:  * @author marcus.gnass
  9:  * @copyright four for business AG
 10:  * @link http://www.4fb.de
 11:  */
 12: 
 13: // assert CONTENIDO framework
 14: defined('CON_FRAMEWORK') || die('Illegal call: Missing framework initialization - request aborted.');
 15: 
 16: /**
 17:  * This class sends update requests to a Solr core.
 18:  * If the request failed an
 19:  * exception is thrown. This class allows handling of more than one article at
 20:  * once.
 21:  *
 22:  * http://client-uat70.4fb.net:8080/solr/admin/cores?action=STATUS
 23:  * http://client-uat70.4fb.net:8080/solr/admin/cores?action=RENAME&core=collection1&other=contenido
 24:  * http://client-uat70.4fb.net:8080/solr/admin/cores?action=RELOAD&core=contenido
 25:  *
 26:  * @author marcus.gnass
 27:  */
 28: class SolrIndexer {
 29: 
 30:     /**
 31:      * IDs of articles to be updated / added / deleted.
 32:      *
 33:      * @var array
 34:      */
 35:     private $_articleIds = array();
 36: 
 37:     /**
 38:      *
 39:      * @var SolrClient
 40:      */
 41:     private $_solrClient = NULL;
 42: 
 43:     /**
 44:      * CEC chain function for updating an article in the Solr core (index).
 45:      *
 46:      * This function is intended to be called after storing an article.
 47:      * This function will delete and eventually add the given article from/to
 48:      * the SOLR index. Adding will only be performed when the article should be
 49:      * indexed. Removal will always be performed, even when the article is not
 50:      * indexable, but it might have been indexed before!
 51:      *
 52:      * include.con_editcontent.php
 53:      *
 54:      * @param int $idartlang of article to be updated
 55:      */
 56:     public static function handleStoringOfArticle(array $newData, array $oldData) {
 57: 
 58:         // get IDs of given article langauge
 59:         if (cRegistry::getArticleLanguageId() == $newData['idartlang']) {
 60:             // quite easy if given article is current article
 61:             $idclient = cRegistry::getClientId();
 62:             $idlang = cRegistry::getLanguageId();
 63:             $idcat = cRegistry::getCategoryId();
 64:             $idart = cRegistry::getArticleId();
 65:             $idcatlang = cRegistry::getCategoryLanguageId();
 66:             $idartlang = cRegistry::getArticleLanguageId();
 67:         } else {
 68:             // == for other articles these infos have to be read from DB
 69:             // get idclient by idart
 70:             $article = new cApiArticle($idart);
 71:             if ($article->isLoaded()) {
 72:                 $idclient = $article->get('idclient');
 73:             }
 74:             // get idlang by idartlang
 75:             $articleLanguage = new cApiArticleLanguage($idartlang);
 76:             if ($articleLanguage->isLoaded()) {
 77:                 $idlang = $articleLanguage->get('idlang');
 78:             }
 79:             // get first idcat by idart
 80:             $coll = new cApiCategoryArticleCollection();
 81:             $idcat = array_shift($coll->getCategoryIdsByArticleId($idart));
 82:             // get idcatlang by idcat & idlang
 83:             $categoryLanguage = new cApiCategoryLanguage();
 84:             $categoryLanguage->loadByCategoryIdAndLanguageId($idcat, $idlang);
 85:             if ($categoryLanguage->isLoaded()) {
 86:                 $idcatlang = $articleLanguage->get('idlang');
 87:             }
 88:         }
 89: 
 90:         $articleIds = array(
 91:             'idclient' => $idclient,
 92:             'idlang' => $idlang,
 93:             'idcat' => $idcat,
 94:             'idcatlang' => $idcatlang,
 95:             'idart' => $idart,
 96:             'idartlang' => $idartlang
 97:         );
 98: 
 99:         self::handleStoringOfContentEntry($articleIds);
100:     }
101: 
102:     /**
103:      * CEC chain function for updating an article in the Solr core (index).
104:      *
105:      * This function is intended to be called after storing an article.
106:      * This function will delete and eventually add the given article from/to
107:      * the SOLR index. Adding will only be performed when the article should be
108:      * indexed. Removal will always be performed, even when the article is not
109:      * indexable, but it might have been indexed before!
110:      *
111:      * include.con_editcontent.php
112:      *
113:      * @param int $idartlang of article to be updated
114:      */
115:     public static function handleStoringOfContentEntry(array $articleIds) {
116:         try {
117:             // build indexer instance
118:             $indexer = new self(array(
119:                 $articleIds
120:             ));
121:             // update given articles
122:             $indexer->updateArticles();
123:         } catch (cException $e) {
124:             $lvl = $e instanceof SolrWarning? cGuiNotification::LEVEL_WARNING : cGuiNotification::LEVEL_ERROR;
125:             $note = new cGuiNotification();
126:             $note->displayNotification($lvl, $e->getMessage());
127:         }
128: 
129:         // destroy indexer to free mem
130:         unset($indexer);
131:     }
132: 
133:     /**
134:      * Create client instance (connect to Apache Solr) and aggregate it.
135:      *
136:      * @param array IDs of articles to be handled
137:      */
138:     public function __construct(array $articleIds) {
139:         $this->_articleIds = $articleIds;
140:         $options = Solr::getClientOptions();
141:         $options = Solr::validateClientOptions($options);
142:         $this->_solrClient = new SolrClient($options);
143:     }
144: 
145:     /**
146:      * Destroy aggregated client instance.
147:      */
148:     public function __destruct() {
149: 
150:         // destroy Solr client to free mem
151:         // really neccessary?
152:         // as SolClient has a method __destruct() this seems to be correct
153:         unset($this->_solrClient);
154:     }
155: 
156:     /**
157:      * If the current articles are indexable for each article a new index
158:      * document will be created and filled with its content and eventually
159:      * be added to the index.
160:      *
161:      * @throws cException if Solr add request failed
162:      */
163:     public function addArticles() {
164:         $documents = array();
165:         foreach ($this->_articleIds as $articleIds) {
166: 
167:             // skip if article should not be indexed
168:             if (!$this->_isIndexable($articleIds['idartlang'])) {
169:                 continue;
170:             }
171: 
172:             // get article content to be indexed
173:             $articleContent = $this->_getContent($articleIds['idartlang']);
174: 
175:             // create input document
176:             $solrInputDocument = new SolrInputDocument();
177:             // $solrInputDocument->addField('raise_exception', 'uncomment this
178:             // to raise an exception');
179:             // add IDs
180:             $solrInputDocument->addField('id_client', $articleIds['idclient']);
181:             $solrInputDocument->addField('id_lang', $articleIds['idlang']);
182:             $solrInputDocument->addField('id_cat', $articleIds['idcat']);
183:             $solrInputDocument->addField('id_art', $articleIds['idart']);
184:             $solrInputDocument->addField('id_cat_lang', $articleIds['idcatlang']);
185:             $solrInputDocument->addField('id_art_lang', $articleIds['idartlang']);
186: 
187:             // add content one by one
188:             foreach ($articleContent as $type => $typeContent) {
189: 
190:                 // field names in Solr should always be lowercase!
191:                 $type = strtolower($type);
192: 
193:                 // == sort content of a certain content type by their typeids
194:                 // This is important so that the most prominent headline can be
195:                 // displayed first.
196:                 ksort($typeContent);
197: 
198:                 // add each content entry seperatly (content type fields are
199:                 // defined as multiValued)
200:                 foreach ($typeContent as $typeid => $contentEntry) {
201:                     $contentEntry = trim($contentEntry);
202:                     if (0 < strlen($contentEntry)) {
203:                         $solrInputDocument->addField($type, $contentEntry);
204:                     }
205:                 }
206:             }
207: 
208:             array_push($documents, $solrInputDocument);
209:         }
210: 
211:         // add and commit documents and then optimze index
212:         try {
213:             @$this->_solrClient->addDocuments($documents);
214:             @$this->_solrClient->commit();
215:             @$this->_solrClient->optimize();
216:         } catch (Exception $e) {
217:             // log exception
218:             Solr::log($e);
219:             // rethrow as cException
220:             throw new cException('article could not be added to index', 0, $e);
221:         }
222:     }
223: 
224:     /**
225:      *
226:      * @throws SolrClientException if Solr delete request failed
227:      */
228:     public function deleteArticles() {
229: 
230:         function getIdartlang(array $array) {
231:             return $array['idartlang'];
232:         }
233: 
234:         $idartlangs = array_map('getIdartlang', $this->_articleIds);
235: 
236:         // delete document
237:         try {
238:             @$this->_solrClient->deleteByIds($idartlangs);
239:         } catch (Exception $e) {
240:             // log exception
241:             Solr::log($e);
242:             // rethrow as cException
243:             throw new cException('article could not be deleted from index', 0, $e);
244:         }
245:     }
246: 
247:     /**
248:      *
249:      * @throws cException if Solr delete request failed
250:      */
251:     public function updateArticles() {
252: 
253:         // Always delete articles from index, even if article should not be
254:         // indexed it might have been indexed before
255:         // What happens if an article could not be deleted cause it was not
256:         // indexed before? does this throw an exception? if yes an article
257:         // could never been indexed!
258:         try {
259:             $this->deleteArticles();
260:         } catch (cException $e) {
261:             // ignore exception so that articles can be indexed nonetheless
262:         }
263: 
264:         // add articles to index
265:         // will be skipped if article is not indexable
266:         $this->addArticles();
267:     }
268: 
269:     /**
270:      * An article is indexable if it is online and searchable.
271:      *
272:      * Articles that are hidden due to a protected category are indexable. The
273:      * searcher is responsible for making sure these aticles are only displayed
274:      * to privileged users.
275:      *
276:      * @param int $idartlang of article to be checked
277:      * @return bool
278:      */
279:     private function _isIndexable($idartlang) {
280: 
281:         // What about time managment?
282:         $articleLanguage = new cApiArticleLanguage($idartlang);
283:         if (!$articleLanguage->isLoaded()) {
284:             return false;
285:         } else if (1 != $articleLanguage->get('online')) {
286:             return false;
287:         } else if (1 != $articleLanguage->get('searchable')) {
288:             return false;
289:         } else {
290:             return true;
291:         }
292:     }
293: 
294:     /**
295:      *
296:      * @param int $idartlang of article to be read
297:      * @return array
298:      */
299:     private function _getContent($idartlang) {
300: 
301:         // exclude certain content types from indexing
302:         // like in conMakeArticleIndex & conGenerateKeywords
303:         $db = cRegistry::getDb();
304:         $db->query("-- SolrIndexer->_getContent()
305:             SELECT
306:                 con_type.type
307:                 , con_content.typeid
308:                 , con_content.value
309:             FROM
310:                 con_content
311:             INNER JOIN
312:                 con_type
313:             ON
314:                 con_content.idtype = con_type.idtype
315:             WHERE
316:                 con_content.idartlang = $idartlang
317:                 AND con_type.type NOT IN ('CMS_IMG', 'CMS_LINK', 'CMS_LINKTARGET', 'CMS_SWF')
318:             ORDER BY
319:                 con_content.idtype
320:                 , con_content.typeid
321:             ;");
322: 
323:         $content = array();
324:         while (false !== $db->nextRecord()) {
325:             $content[$db->f('type')][$db->f('typeid')] = $db->f('value');
326:         }
327: 
328:         // TODO check first alternative:
329:         // cInclude('includes', 'functions.con.php');
330:         // $content = conGetContentFromArticle($this->_idartlang);
331:         // TODO check second alternative:
332:         // $articleLanguage = new cApiArticleLanguage($this->_idartlang);
333:         // if (!$articleLanguage->isLoaded()) {
334:         // throw new cException('article could not be loaded');
335:         // }
336:         // $content = $articleLanguage->getContent();
337: 
338:         return $content;
339:     }
340: 
341:     /**
342:      *
343:      * @param SolrResponse $solrResponse
344:      * @throws cException if Solr update request failed
345:      */
346:     private function _checkResponse(SolrResponse $solrResponse, $msg = 'Solr update request failed') {
347:         $response = $solrResponse->getResponse();
348: 
349:         // SolrResponse::getDigestedResponse — Returns the XML response as
350:         // serialized PHP data
351:         // SolrResponse::getHttpStatus — Returns the HTTP status of the response
352:         // SolrResponse::getHttpStatusMessage — Returns more details on the HTTP
353:         // status
354:         // SolrResponse::getRawRequest — Returns the raw request sent to the
355:         // Solr server
356:         // SolrResponse::getRawRequestHeaders — Returns the raw request headers
357:         // sent to the Solr server
358:         // SolrResponse::getRawResponse — Returns the raw response from the
359:         // server
360:         // SolrResponse::getRawResponseHeaders — Returns the raw response
361:         // headers from the server
362:         // SolrResponse::getRequestUrl — Returns the full URL the request was
363:         // sent to
364:         // SolrResponse::getResponse — Returns a SolrObject representing the XML
365:         // response from the server
366:         // SolrResponse::setParseMode — Sets the parse mode
367:         // SolrResponse::success — Was the request a success
368: 
369:         if (0 != $response->status) {
370:             throw new cException($msg);
371:         }
372:     }
373: }
374: 
CMS CONTENIDO 4.9.0 API documentation generated by ApiGen 2.8.0