1: <?php
2:
3: /**
4: *
5: * @package Plugin
6: * @subpackage SearchSolr
7: * @version SVN Revision $Rev:$
8: * @author marcus.gnass
9: * @copyright four for business AG
10: * @link http://www.4fb.de
11: */
12:
13: // assert CONTENIDO framework
14: defined('CON_FRAMEWORK') || die('Illegal call: Missing framework initialization - request aborted.');
15:
16: /**
17: * This class sends update requests to a Solr core.
18: * If the request failed an
19: * exception is thrown. This class allows handling of more than one article at
20: * once.
21: *
22: * http://client-uat70.4fb.net:8080/solr/admin/cores?action=STATUS
23: * http://client-uat70.4fb.net:8080/solr/admin/cores?action=RENAME&core=collection1&other=contenido
24: * http://client-uat70.4fb.net:8080/solr/admin/cores?action=RELOAD&core=contenido
25: *
26: * @author marcus.gnass
27: */
28: class SolrIndexer {
29:
30: /**
31: * IDs of articles to be updated / added / deleted.
32: *
33: * @var array
34: */
35: private $_articleIds = array();
36:
37: /**
38: *
39: * @var SolrClient
40: */
41: private $_solrClient = NULL;
42:
43: /**
44: * CEC chain function for updating an article in the Solr core (index).
45: *
46: * This function is intended to be called after storing an article.
47: * This function will delete and eventually add the given article from/to
48: * the SOLR index. Adding will only be performed when the article should be
49: * indexed. Removal will always be performed, even when the article is not
50: * indexable, but it might have been indexed before!
51: *
52: * include.con_editcontent.php
53: *
54: * @param int $idartlang of article to be updated
55: */
56: public static function handleStoringOfArticle(array $newData, array $oldData) {
57:
58: // get IDs of given article langauge
59: if (cRegistry::getArticleLanguageId() == $newData['idartlang']) {
60: // quite easy if given article is current article
61: $idclient = cRegistry::getClientId();
62: $idlang = cRegistry::getLanguageId();
63: $idcat = cRegistry::getCategoryId();
64: $idart = cRegistry::getArticleId();
65: $idcatlang = cRegistry::getCategoryLanguageId();
66: $idartlang = cRegistry::getArticleLanguageId();
67: } else {
68: // == for other articles these infos have to be read from DB
69: // get idclient by idart
70: $article = new cApiArticle($idart);
71: if ($article->isLoaded()) {
72: $idclient = $article->get('idclient');
73: }
74: // get idlang by idartlang
75: $articleLanguage = new cApiArticleLanguage($idartlang);
76: if ($articleLanguage->isLoaded()) {
77: $idlang = $articleLanguage->get('idlang');
78: }
79: // get first idcat by idart
80: $coll = new cApiCategoryArticleCollection();
81: $idcat = array_shift($coll->getCategoryIdsByArticleId($idart));
82: // get idcatlang by idcat & idlang
83: $categoryLanguage = new cApiCategoryLanguage();
84: $categoryLanguage->loadByCategoryIdAndLanguageId($idcat, $idlang);
85: if ($categoryLanguage->isLoaded()) {
86: $idcatlang = $articleLanguage->get('idlang');
87: }
88: }
89:
90: $articleIds = array(
91: 'idclient' => $idclient,
92: 'idlang' => $idlang,
93: 'idcat' => $idcat,
94: 'idcatlang' => $idcatlang,
95: 'idart' => $idart,
96: 'idartlang' => $idartlang
97: );
98:
99: self::handleStoringOfContentEntry($articleIds);
100: }
101:
102: /**
103: * CEC chain function for updating an article in the Solr core (index).
104: *
105: * This function is intended to be called after storing an article.
106: * This function will delete and eventually add the given article from/to
107: * the SOLR index. Adding will only be performed when the article should be
108: * indexed. Removal will always be performed, even when the article is not
109: * indexable, but it might have been indexed before!
110: *
111: * include.con_editcontent.php
112: *
113: * @param int $idartlang of article to be updated
114: */
115: public static function handleStoringOfContentEntry(array $articleIds) {
116: try {
117: // build indexer instance
118: $indexer = new self(array(
119: $articleIds
120: ));
121: // update given articles
122: $indexer->updateArticles();
123: } catch (cException $e) {
124: $lvl = $e instanceof SolrWarning? cGuiNotification::LEVEL_WARNING : cGuiNotification::LEVEL_ERROR;
125: $note = new cGuiNotification();
126: $note->displayNotification($lvl, $e->getMessage());
127: }
128:
129: // destroy indexer to free mem
130: unset($indexer);
131: }
132:
133: /**
134: * Create client instance (connect to Apache Solr) and aggregate it.
135: *
136: * @param array IDs of articles to be handled
137: */
138: public function __construct(array $articleIds) {
139: $this->_articleIds = $articleIds;
140: $opt = Solr::getClientOptions();
141: Solr::validateClientOptions($opt);
142: $this->_solrClient = new SolrClient($opt);
143: }
144:
145: /**
146: * Destroy aggregated client instance.
147: */
148: public function __destruct() {
149:
150: // destroy Solr client to free mem
151: // really neccessary?
152: // as SolClient has a method __destruct() this seems to be correct
153: unset($this->_solrClient);
154: }
155:
156: /**
157: * If the current articles are indexable for each article a new index
158: * document will be created and filled with its content and eventually
159: * be added to the index.
160: *
161: * @throws cException if Solr add request failed
162: */
163: public function addArticles() {
164: $documents = array();
165: foreach ($this->_articleIds as $articleIds) {
166:
167: // skip if article should not be indexed
168: if (!$this->_isIndexable($articleIds['idartlang'])) {
169: continue;
170: }
171:
172: // get article content to be indexed
173: $articleContent = $this->_getContent($articleIds['idartlang']);
174:
175: // create input document
176: $solrInputDocument = new SolrInputDocument();
177: // $solrInputDocument->addField('raise_exception', 'uncomment this
178: // to raise an exception');
179: // add IDs
180: $solrInputDocument->addField('id_client', $articleIds['idclient']);
181: $solrInputDocument->addField('id_lang', $articleIds['idlang']);
182: $solrInputDocument->addField('id_cat', $articleIds['idcat']);
183: $solrInputDocument->addField('id_art', $articleIds['idart']);
184: $solrInputDocument->addField('id_cat_lang', $articleIds['idcatlang']);
185: $solrInputDocument->addField('id_art_lang', $articleIds['idartlang']);
186:
187: // add content one by one
188: foreach ($articleContent as $type => $typeContent) {
189:
190: // field names in Solr should always be lowercase!
191: $type = strtolower($type);
192:
193: // == sort content of a certain content type by their typeids
194: // This is important so that the most prominent headline can be
195: // displayed first.
196: ksort($typeContent);
197:
198: // add each content entry seperatly (content type fields are
199: // defined as multiValued)
200: foreach ($typeContent as $typeid => $contentEntry) {
201: $contentEntry = trim($contentEntry);
202: if (0 < strlen($contentEntry)) {
203: $solrInputDocument->addField($type, $contentEntry);
204: }
205: }
206: }
207:
208: array_push($documents, $solrInputDocument);
209: }
210:
211: // add and commit documents and then optimze index
212: try {
213: @$this->_solrClient->addDocuments($documents);
214: @$this->_solrClient->commit();
215: @$this->_solrClient->optimize();
216: } catch (Exception $e) {
217: // log exception
218: Solr::log($e);
219: // rethrow as cException
220: throw new cException('article could not be added to index', 0, $e);
221: }
222: }
223:
224: /**
225: *
226: * @throws SolrClientException if Solr delete request failed
227: */
228: public function deleteArticles() {
229:
230: function getIdartlang(array $array) {
231: return $array['idartlang'];
232: }
233:
234: $idartlangs = array_map('getIdartlang', $this->_articleIds);
235:
236: // delete document
237: try {
238: @$this->_solrClient->deleteByIds($idartlangs);
239: } catch (Exception $e) {
240: // log exception
241: Solr::log($e);
242: // rethrow as cException
243: throw new cException('article could not be deleted from index', 0, $e);
244: }
245: }
246:
247: /**
248: *
249: * @throws cException if Solr delete request failed
250: */
251: public function updateArticles() {
252:
253: // Always delete articles from index, even if article should not be
254: // indexed it might have been indexed before
255: // What happens if an article could not be deleted cause it was not
256: // indexed before? does this throw an exception? if yes an article
257: // could never been indexed!
258: try {
259: $this->deleteArticles();
260: } catch (cException $e) {
261: // ignore exception so that articles can be indexed nonetheless
262: }
263:
264: // add articles to index
265: // will be skipped if article is not indexable
266: $this->addArticles();
267: }
268:
269: /**
270: * An article is indexable if it is online and searchable.
271: *
272: * Articles that are hidden due to a protected category are indexable. The
273: * searcher is responsible for making sure these aticles are only displayed
274: * to privileged users.
275: *
276: * @param int $idartlang of article to be checked
277: * @return bool
278: */
279: private function _isIndexable($idartlang) {
280:
281: // What about time managment?
282: $articleLanguage = new cApiArticleLanguage($idartlang);
283: if (!$articleLanguage->isLoaded()) {
284: return false;
285: } else if (1 != $articleLanguage->get('online')) {
286: return false;
287: } else if (1 != $articleLanguage->get('searchable')) {
288: return false;
289: } else {
290: return true;
291: }
292: }
293:
294: /**
295: *
296: * @param int $idartlang of article to be read
297: * @return array
298: */
299: private function _getContent($idartlang) {
300:
301: // exclude certain content types from indexing
302: // like in conMakeArticleIndex & conGenerateKeywords
303: $db = cRegistry::getDb();
304: $db->query("-- SolrIndexer->_getContent()
305: SELECT
306: con_type.type
307: , con_content.typeid
308: , con_content.value
309: FROM
310: con_content
311: INNER JOIN
312: con_type
313: ON
314: con_content.idtype = con_type.idtype
315: WHERE
316: con_content.idartlang = $idartlang
317: AND con_type.type NOT IN ('CMS_IMG', 'CMS_LINK', 'CMS_LINKTARGET', 'CMS_SWF')
318: ORDER BY
319: con_content.idtype
320: , con_content.typeid
321: ;");
322:
323: $content = array();
324: while (false !== $db->nextRecord()) {
325: $content[$db->f('type')][$db->f('typeid')] = $db->f('value');
326: }
327:
328: // TODO check first alternative:
329: // cInclude('includes', 'functions.con.php');
330: // $content = conGetContentFromArticle($this->_idartlang);
331: // TODO check second alternative:
332: // $articleLanguage = new cApiArticleLanguage($this->_idartlang);
333: // if (!$articleLanguage->isLoaded()) {
334: // throw new cException('article could not be loaded');
335: // }
336: // $content = $articleLanguage->getContent();
337:
338: return $content;
339: }
340:
341: /**
342: *
343: * @param SolrResponse $solrResponse
344: * @throws cException if Solr update request failed
345: */
346: private function _checkResponse(SolrResponse $solrResponse, $msg = 'Solr update request failed') {
347: $response = $solrResponse->getResponse();
348:
349: // SolrResponse::getDigestedResponse — Returns the XML response as
350: // serialized PHP data
351: // SolrResponse::getHttpStatus — Returns the HTTP status of the response
352: // SolrResponse::getHttpStatusMessage — Returns more details on the HTTP
353: // status
354: // SolrResponse::getRawRequest — Returns the raw request sent to the
355: // Solr server
356: // SolrResponse::getRawRequestHeaders — Returns the raw request headers
357: // sent to the Solr server
358: // SolrResponse::getRawResponse — Returns the raw response from the
359: // server
360: // SolrResponse::getRawResponseHeaders — Returns the raw response
361: // headers from the server
362: // SolrResponse::getRequestUrl — Returns the full URL the request was
363: // sent to
364: // SolrResponse::getResponse — Returns a SolrObject representing the XML
365: // response from the server
366: // SolrResponse::setParseMode — Sets the parse mode
367: // SolrResponse::success — Was the request a success
368:
369: if (0 != $response->status) {
370: throw new cException($msg);
371: }
372: }
373: }
374: