Overview

Packages

  • CONTENIDO
  • Core
    • Authentication
    • Backend
    • Cache
    • CEC
    • Chain
    • ContentType
    • Database
    • Debug
    • Exception
    • Frontend
      • Search
      • URI
      • Util
    • GenericDB
      • Model
    • GUI
      • HTML
    • I18N
    • LayoutHandler
    • Log
    • Security
    • Session
    • Util
    • Validation
    • Versioning
    • XML
  • Module
    • ContentSitemapHtml
    • ContentSitemapXml
    • ContentUserForum
    • NavigationTop
    • ScriptCookieDirective
  • mpAutoloaderClassMap
  • None
  • PHP
  • Plugin
    • ContentAllocation
    • CronjobOverview
    • FormAssistant
    • FrontendLogic
    • FrontendUsers
    • Linkchecker
    • ModRewrite
    • Newsletter
    • Repository
      • FrontendNavigation
      • KeywordDensity
    • SmartyWrapper
    • UrlShortener
    • UserForum
    • Workflow
  • PluginManager
  • Setup
    • Form
    • GUI
    • Helper
      • Environment
      • Filesystem
      • MySQL
      • PHP
    • UpgradeJob

Functions

  • addArticlesToSitemap
  • addUrl
  • checkJobRerun
  • iso8601Date
  • saveSitemap
  • Overview
  • Package
  • Function
  • Tree
  • Deprecated
  • Todo
  1: <?php
  2: 
  3: /**
  4:  * Generate an XML sitemap.
  5:  *
  6:  * The module configuration allows for the selection of a category which is used
  7:  * as root to determine articles that will be listed in the sitemap.
  8:  *
  9:  * An optional filename can be defined too. If no filename is given, the sitemap
 10:  * is displayed immediatly. With a filename the sitemap is written to the given
 11:  * file. The filename has to be a basename (no path). The clients frontend path
 12:  * is used instead. In this case this module makes sure that the sitemap is
 13:  * generated only once each 23h.
 14:  *
 15:  * SETTING: content-sitemap-xml/cat-url-for-startart (default: true)
 16:  * If set to true for all startarticles the URL is generated for their category
 17:  * instead for the article itself.
 18:  * This should be done if the navigation produces category links which is
 19:  * usually the case..
 20:  *
 21:  * @package Module
 22:  * @subpackage ContentSitemapXml
 23:  * @author simon.sprankel@4fb.de
 24:  * @author marcus.gnass@4fb.de
 25:  * @copyright four for business AG
 26:  * @link http://www.4fb.de
 27:  * @see http://www.sitemaps.org/
 28:  */
 29: if(cRegistry::getBackendSessionId() === NULL) {
 30:     $client = cRegistry::getClientId();
 31:     $cfgClient = cRegistry::getClientConfig();
 32: 
 33:     // get idcat of category to generate sitemap from
 34:     $idcatStart = "CMS_VALUE[1]";
 35:     $idcatStart = cSecurity::toInteger($idcatStart);
 36: 
 37:     // get filename to save sitemap to (optional)
 38:     $filename = "CMS_VALUE[2]";
 39:     if (!empty($filename)) {
 40:         $filename = basename($filename);
 41:         // assert .xml extension
 42:         if (cString::getPartOfString($filename, -4) !== '.xml') {
 43:             $filename .= '.xml';
 44:         }
 45:     }
 46: 
 47:     try {
 48: 
 49:         // check if this is a rerun (a cException will then be thrown)
 50:         // check is skipped when 'rerun' is forced
 51:         if (!empty($filename) && !array_key_exists('rerun', $_REQUEST)) {
 52:             checkJobRerun('xml_sitemap_' . cRegistry::getClient()->get('name') . '_' . cRegistry::getLanguage()->get('name') . '_' . cRegistry::getArticleLanguageId());
 53:         }
 54: 
 55:         // get all categories recursively
 56:         $categoryCollection = new cApiCategoryCollection();
 57:         $categoryIds = $categoryCollection->getAllCategoryIdsRecursive($idcatStart, $client);
 58: 
 59:         $xmlString = <<<EOD
 60: <?xml version="1.0" encoding="UTF-8"?>
 61: <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"></urlset>
 62: EOD;
 63: 
 64:         $sitemap = new SimpleXMLElement($xmlString);
 65: 
 66:         $itemCount = array();
 67: 
 68:         // loop all languages of current client
 69:         $clientLanguageCollection = new cApiClientLanguageCollection();
 70:         foreach ($clientLanguageCollection->getLanguagesByClient($client) as $currentIdlang) {
 71: 
 72:             // skip nonexistant or incative languages
 73:             $language = new cApiLanguage($currentIdlang);
 74:             if (!$language->isLoaded() || '1' != $language->get('active')) {
 75:                 continue;
 76:             }
 77: 
 78:             // create copy of category ids
 79:             $arrayObject = new ArrayObject($categoryIds);
 80:             $currentCategoryIds = $arrayObject->getArrayCopy();
 81: 
 82:             // filter the categories - category must be visible and public!
 83:             foreach ($currentCategoryIds as $key => $categoryId) {
 84:                 $categoryLanguage = new cApiCategoryLanguage();
 85:                 $categoryLanguage->loadByCategoryIdAndLanguageId($categoryId, $currentIdlang);
 86:                 if ($categoryLanguage->get('visible') == false || $categoryLanguage->get('public') == false) {
 87:                     unset($currentCategoryIds[$key]);
 88:                 }
 89:             }
 90: 
 91:             $itemCount[] = addArticlesToSitemap($sitemap, $currentCategoryIds, $currentIdlang);
 92:         }
 93: 
 94:         // if there are items
 95:         if (0 < array_sum($itemCount)) {
 96:             // provide the possibility to alter the sitemap content
 97:             $sitemap = cApiCecHook::executeAndReturn('Contenido.Content.XmlSitemapCreate', $sitemap);
 98:         }
 99: 
100:         // echo sitemap or write it to file with the specified filename
101:         saveSitemap($sitemap, $filename);
102:     } catch (cException $e) {
103:         echo "\n\n[" . date('Y-m-d') . "] " . $e->getMessage() . "\n";
104:     }
105: }
106: /**
107:  * Reads timestamp from last job run and compares it to current timestamp.
108:  * If last run is less than 23h ago this script will be aborted. Elsethe
109:  * current timestamp is stored into job file.
110:  *
111:  * @param string $jobname
112:  * @throws cException if job was already executed within last 23h
113:  */
114: function checkJobRerun($jobname) {
115:     // get filename of cron job file
116:     $cfg = cRegistry::getConfig();
117:     $filename = $cfg['path']['contenido_cronlog'] . $jobname . '.job';
118:     if (cFileHandler::exists($filename)) {
119:         // get timestamp of last runf from cron job file
120:         $cronlogContent = file_get_contents($filename);
121:         $lastRun = cSecurity::toInteger($cronlogContent);
122:         // check timestamp of last run
123:         if ($lastRun > strtotime('-23 hour')) {
124:             // abort if last run is less than 23h ago
125:             throw new cException('job was already executed within last 23h');
126:         }
127:     }
128:     // store current timestamp in cronjob file
129:     file_put_contents($filename, time());
130: }
131: 
132: /**
133:  * Add all online and searchable articles of theses categories to the sitemap.
134:  *
135:  * @param SimpleXMLElement $sitemap
136:  * @param array $categoryIds
137:  * @param int $lang
138:  * @return int
139:  */
140: function addArticlesToSitemap(SimpleXMLElement $sitemap, array $categoryIds, $lang) {
141:     $itemCount = 0;
142: 
143:     // check if there are categories
144:     if (0 < count($categoryIds)) {
145: 
146:         $cfg = cRegistry::getConfig();
147:         $tab = $cfg['tab'];
148:         $db = cRegistry::getDb();
149: 
150:         $useCategoryUrlsForStartArticles = 'true' == getEffectiveSetting('content-sitemap-xml', 'cat-url-for-startart', 'true');
151: 
152:         $categoryIds = implode(',', $categoryIds);
153: 
154:         // get articles from DB
155:         $db->query("
156:             SELECT
157:                 art_lang.idart
158:                 , art_lang.idartlang
159:                 , UNIX_TIMESTAMP(art_lang.lastmodified) as lastmod
160:                 , art_lang.changefreq
161:                 , art_lang.sitemapprio
162:                 , cat_art.idcat
163:                 , IF(art_lang.idartlang = cat_lang.startidartlang, 1, 0) AS is_start
164:             FROM
165:                 `$tab[art_lang]` AS art_lang
166:                 , `$tab[cat_art]` AS cat_art
167:                 , `$tab[cat_lang]` AS cat_lang
168:             WHERE
169:                 art_lang.idart = cat_art.idart
170:                 AND art_lang.idlang = $lang
171:                 AND art_lang.online = 1
172:                 AND cat_art.idcat = cat_lang.idcat
173:                 AND cat_art.idcat IN ($categoryIds)
174:                 AND cat_lang.idlang = $lang
175:             ;");
176: 
177:         // construct the XML node
178:         while ($db->nextRecord()) {
179:             $indexState = conGetMetaValue($db->f('idartlang'), 7);
180: 
181:             if (preg_match('/noindex/', $indexState)) {
182:                 continue;
183:             }
184: 
185:             $params = array();
186:             $params['lang'] = $lang;
187:             $params['changelang'] = $lang;
188: 
189:             // if it is a startarticle the generated URL should be that of
190:             // the category (assuming the navigation contains category URLs)
191:             if (1 == $db->f('is_start') && $useCategoryUrlsForStartArticles) {
192:                 $params['idcat'] = $db->f('idcat');
193:             } else {
194:                 $params['idart'] = $db->f('idart');
195:             }
196: 
197:             $loc = cUri::getInstance()->build($params, true);
198:             $loc = htmlentities($loc);
199: 
200:             addUrl($sitemap, array(
201:                 // construct the link
202:                 'loc' => $loc,
203:                 // construct the last modified date in ISO 8601
204:                 'lastmod' => (int) $db->f('lastmod'),
205:                 // get the sitemap change frequency
206:                 'changefreq' => $db->f('changefreq'),
207:                 // get the sitemap priority
208:                 'priority' => $db->f('sitemapprio')
209:             ));
210:             $itemCount++;
211:         }
212:     }
213: 
214:     return $itemCount;
215: }
216: 
217: /**
218:  *
219:  * @param SimpleXMLElement $sitemap
220:  * @param array $data
221:  */
222: function addUrl(SimpleXMLElement $sitemap, array $data) {
223:     $url = $sitemap->addChild('url');
224: 
225:     $url->addChild('loc', $data['loc']);
226: 
227:     if ($data['lastmod'] == '0000-00-00 00:00:00' || $data['lastmod'] == '') {
228:         $url->addChild('lastmod', conHtmlSpecialChars(iso8601Date(mktime())));
229:     } else {
230:         $url->addChild('lastmod', conHtmlSpecialChars(iso8601Date($data['lastmod'])));
231:     }
232: 
233:     if (!empty($data['changefreq'])) {
234:         $url->addChild('changefreq', $data['changefreq']);
235:     }
236: 
237:     if (!empty($data['priority']) || $data['priority'] == 0) {
238:         $url->addChild('priority', $data['priority']);
239:     }
240: }
241: 
242: /**
243:  * Formats a date/time according to ISO 8601.
244:  *
245:  * Example:
246:  * YYYY-MM-DDThh:mm:ss.sTZD (eg 1997-07-16T19:20:30.45+01:00)
247:  *
248:  * @param int $time a UNIX timestamp
249:  * @return string the formatted date string
250:  */
251: function iso8601Date($time) {
252:     $tzd = date('O', $time);
253:     $tzd = chunk_split($tzd, 3, ':');
254:     $tzd = cString::getPartOfString($tzd, 0, 6);
255:     $date = date('Y-m-d\TH:i:s', $time);
256:     return $date . $tzd;
257: }
258: 
259: /**
260:  * Saves the sitemap to the file with the given filename.
261:  * If no filename is given, it outputs the sitemap.
262:  *
263:  * @todo How can I save this properly formatted?
264:  * @see http://stackoverflow.com/questions/1191167/format-output-of-simplexml-asxml
265:  * @param SimpleXMLElement $sitemap the XML structure of the sitemap
266:  * @param string $filename [optional] the filename to which the sitemap should
267:  *        be written
268:  */
269: function saveSitemap(SimpleXMLElement $sitemap, $filename = '') {
270:     if (empty($filename)) {
271:         header('Content-type: text/xml');
272:         echo $sitemap->asXML();
273:     } else if ($sitemap->asXML(cRegistry::getFrontendPath() . $filename)) {
274:         echo conHtmlSpecialChars(mi18n("XML sitemap successfully written to %s", $filename));
275:     } else {
276:         echo conHtmlSpecialChars(mi18n("XML sitemap could not be written to %s", $filename));
277:     }
278: }
279: 
280: ?>
CMS CONTENIDO 4.10.0 API documentation generated by ApiGen 2.8.0