Overview

Packages

  • CONTENIDO
  • Core
    • Authentication
    • Backend
    • Cache
    • CEC
    • Chain
    • ContentType
    • Database
    • Debug
    • Exception
    • Frontend
      • Search
      • URI
      • Util
    • GenericDB
      • Model
    • GUI
      • HTML
    • I18N
    • LayoutHandler
    • Log
    • Security
    • Session
    • Util
    • Validation
    • Versioning
    • XML
  • Module
    • ContentRssCreator
    • ContentSitemapHtml
    • ContentSitemapXml
    • ContentUserForum
    • NavigationTop
    • ScriptCookieDirective
  • mpAutoloaderClassMap
  • None
  • Plugin
    • ContentAllocation
    • CronjobOverview
    • FormAssistant
    • FrontendLogic
    • FrontendUsers
    • Linkchecker
    • ModRewrite
    • Newsletter
    • Repository
      • FrontendNavigation
      • KeywordDensity
    • SearchSolr
    • SmartyWrapper
    • UrlShortener
    • UserForum
    • Workflow
  • PluginManager
  • Setup
    • Form
    • GUI
    • Helper
      • Environment
      • Filesystem
      • MySQL
      • PHP
    • UpgradeJob
  • Smarty
    • Cacher
    • Compiler
    • Config
    • Debug
    • PluginsBlock
    • PluginsFilter
    • PluginsFunction
    • PluginsInternal
    • PluginsModifier
    • PluginsModifierCompiler
    • PluginsShared
    • Security
    • Template
    • TemplateResources
  • Swift
    • ByteStream
    • CharacterStream
    • Encoder
    • Events
    • KeyCache
    • Mailer
    • Mime
    • Plugins
    • Transport

Classes

  • Swift_FailoverTransport
  • Swift_LoadBalancedTransport
  • Swift_MailTransport
  • Swift_Plugins_Loggers_ArrayLogger
  • Swift_Plugins_Loggers_EchoLogger
  • Swift_SendmailTransport
  • Swift_SmtpTransport
  • Swift_Transport_AbstractSmtpTransport
  • Swift_Transport_Esmtp_Auth_CramMd5Authenticator
  • Swift_Transport_Esmtp_Auth_LoginAuthenticator
  • Swift_Transport_Esmtp_Auth_PlainAuthenticator
  • Swift_Transport_Esmtp_AuthHandler
  • Swift_Transport_EsmtpTransport
  • Swift_Transport_FailoverTransport
  • Swift_Transport_LoadBalancedTransport
  • Swift_Transport_MailTransport
  • Swift_Transport_SendmailTransport
  • Swift_Transport_SimpleMailInvoker
  • Swift_Transport_StreamBuffer

Interfaces

  • Swift_Plugins_Logger
  • Swift_Plugins_Pop_Pop3Exception
  • Swift_Transport
  • Swift_Transport_Esmtp_Authenticator
  • Swift_Transport_EsmtpHandler
  • Swift_Transport_IoBuffer
  • Swift_Transport_MailInvoker
  • Swift_Transport_SmtpAgent
  • Swift_TransportException
  • Overview
  • Package
  • Function
  • Todo
  • Download
  1: <?php
  2: /**
  3:  * This file contains the html parser class.
  4:  *
  5:  * @package Core
  6:  * @subpackage Backend
  7:  * @version SVN Revision $Rev:$
  8:  *
  9:  * @author Starnetsys, LLC.
 10:  * @copyright Starnetsys, LLC.
 11:  * @link http://starnetsys.com
 12:  */
 13: 
 14: defined('CON_FRAMEWORK') || die('Illegal call: Missing framework initialization - request aborted.');
 15: 
 16: /**
 17:  * Class HtmlParser.
 18:  * To use, create an instance of the class passing
 19:  * HTML text. Then invoke parse() until it's false.
 20:  * When parse() returns true, $iNodeType, $iNodeName
 21:  * $iNodeValue and $iNodeAttributes are updated.
 22:  *
 23:  * Copyright (c) 2003 Starnetsys, LLC. All rights reserved.
 24:  * Redistribution of source must retain this copyright notice.
 25:  *
 26:  * Starnetsys, LLC (http://starnetsys.com) specializes in
 27:  * website design and software consulting
 28:  *
 29:  * @package Core
 30:  * @subpackage Backend
 31:  */
 32: class HtmlParser {
 33: 
 34:     /**
 35:      * node type ID for elements
 36:      *
 37:      * @var int
 38:      */
 39:     const NODE_TYPE_ELEMENT = 1;
 40: 
 41:     /**
 42:      * node type ID for endelements
 43:      *
 44:      * @var int
 45:      */
 46:     const NODE_TYPE_ENDELEMENT = 2;
 47: 
 48:     /**
 49:      * node type ID for texts
 50:      *
 51:      * @var int
 52:      */
 53:     const NODE_TYPE_TEXT = 3;
 54: 
 55:     /**
 56:      * node type ID for comments
 57:      *
 58:      * @var int
 59:      */
 60:     const NODE_TYPE_COMMENT = 4;
 61: 
 62:     /**
 63:      * node type ID when done
 64:      *
 65:      * @var int
 66:      */
 67:     const NODE_TYPE_DONE = 5;
 68: 
 69:     /**
 70:      * Field iNodeType.
 71:      * May be one of the NODE_TYPE_* constants above.
 72:      *
 73:      * @var int
 74:      */
 75:     var $iNodeType;
 76: 
 77:     /**
 78:      * Field iNodeName.
 79:      * For elements, it's the name of the element.
 80:      *
 81:      * @var string
 82:      */
 83:     var $iNodeName = "";
 84: 
 85:     /**
 86:      * Field iNodeValue.
 87:      * For text nodes, it's the text.
 88:      *
 89:      * @var string
 90:      */
 91:     var $iNodeValue = "";
 92: 
 93:     /**
 94:      * Field iNodeAttributes.
 95:      * A string-indexed array containing attribute values
 96:      * of the current node. Indexes are always lowercase.
 97:      *
 98:      * @var array
 99:      */
100:     var $iNodeAttributes;
101: 
102:     /**
103:      *
104:      * @var unknown_type
105:      * @todo should be private
106:      */
107:     var $iHtmlText;
108: 
109:     /**
110:      *
111:      * @var unknown_type
112:      * @todo should be private
113:      */
114:     var $iHtmlTextLength;
115: 
116:     /**
117:      *
118:      * @var unknown_type
119:      * @todo should be private
120:      */
121:     var $iHtmlTextIndex = 0;
122: 
123:     /**
124:      * Constructor.
125:      * Constructs an HtmlParser instance with
126:      * the HTML text given.
127:      *
128:      * @param string $aHtmlText
129:      */
130:     function HtmlParser($aHtmlText) {
131:         $this->iHtmlText = $aHtmlText;
132:         $this->iHtmlTextLength = strlen($aHtmlText);
133:     }
134: 
135:     /**
136:      * Method parse.
137:      * Parses the next node. Returns false only if the end of the HTML text has
138:      * been reached. Updates values of iNode* fields.
139:      *
140:      * @return boolean
141:      */
142:     function parse() {
143:         $text = $this->skipToElement();
144:         if ($text != "") {
145:             $this->iNodeType = self::NODE_TYPE_TEXT;
146:             $this->iNodeName = "Text";
147:             $this->iNodeValue = $text;
148:             return true;
149:         }
150:         return $this->readTag();
151:     }
152: 
153:     /**
154:      */
155:     function clearAttributes() {
156:         $this->iNodeAttributes = array();
157:     }
158: 
159:     /**
160:      *
161:      * @return boolean
162:      */
163:     function readTag() {
164:         if ($this->currentChar() != "<") {
165:             $this->iNodeType = self::NODE_TYPE_DONE;
166:             return false;
167:         }
168: 
169:         $this->skipInTag("<");
170:         $this->clearAttributes();
171:         $name = $this->skipToBlanksInTag();
172:         $pos = strpos($name, "/");
173: 
174:         if ($pos === 0) {
175:             $this->iNodeType = self::NODE_TYPE_ENDELEMENT;
176:             $this->iNodeName = substr($name, 1);
177:             $this->iNodeValue = "";
178:         } else {
179:             if (!$this->isValidTagIdentifier($name)) {
180:                 $comment = false;
181:                 if ($name == "!--") {
182:                     $rest = $this->skipToStringInTag("-->");
183:                     if ($rest != "") {
184:                         $this->iNodeType = self::NODE_TYPE_COMMENT;
185:                         $this->iNodeName = "Comment";
186:                         $this->iNodeValue = "<" . $name . $rest;
187:                         $comment = true;
188:                     }
189:                 }
190:                 if (!$comment) {
191:                     $this->iNodeType = self::NODE_TYPE_TEXT;
192:                     $this->iNodeName = "Text";
193:                     $this->iNodeValue = "<" . $name;
194:                 }
195:                 return true;
196:             } else {
197:                 $this->iNodeType = self::NODE_TYPE_ELEMENT;
198:                 $this->iNodeValue = "";
199:                 $nameLength = strlen($name);
200:                 if ($nameLength > 0 && substr($name, $nameLength - 1, 1) == "/") {
201:                     $this->iNodeName = substr($name, 0, $nameLength - 1);
202:                 } else {
203:                     $this->iNodeName = $name;
204:                 }
205:             }
206:         }
207: 
208:         while ($this->skipBlanksInTag()) {
209:             $attrName = $this->skipToBlanksOrEqualsInTag();
210:             if ($attrName != "") {
211:                 $this->skipBlanksInTag();
212:                 if ($this->currentChar() == "=") {
213:                     $this->skipEqualsInTag();
214:                     $this->skipBlanksInTag();
215:                     $value = $this->readValueInTag();
216:                     $this->iNodeAttributes[strtolower($attrName)] = $value;
217:                 } else {
218:                     $this->iNodeAttributes[strtolower($attrName)] = "";
219:                 }
220:             }
221:         }
222:         $this->skipEndOfTag();
223:         return true;
224:     }
225: 
226:     /**
227:      *
228:      * @param string $name
229:      * @return number
230:      */
231:     function isValidTagIdentifier($name) {
232:         return preg_match('/[A-Za-z0-9]+/', $name);
233:     }
234: 
235:     /**
236:      *
237:      * @return boolean
238:      */
239:     function skipBlanksInTag() {
240:         return "" != ($this->skipInTag(array(
241:             " ",
242:             "\t",
243:             "\r",
244:             "\n"
245:         )));
246:     }
247: 
248:     /**
249:      *
250:      * @return Ambigous <string, number, unknown_type>
251:      */
252:     function skipToBlanksOrEqualsInTag() {
253:         return $this->skipToInTag(array(
254:             " ",
255:             "\t",
256:             "\r",
257:             "\n",
258:             "="
259:         ));
260:     }
261: 
262:     /**
263:      *
264:      * @return Ambigous <string, number, unknown_type>
265:      */
266:     function skipToBlanksInTag() {
267:         return $this->skipToInTag(array(
268:             " ",
269:             "\t",
270:             "\r",
271:             "\n"
272:         ));
273:     }
274: 
275:     /**
276:      *
277:      * @return Ambigous <unknown, string, number, unknown_type>
278:      */
279:     function skipEqualsInTag() {
280:         return $this->skipInTag(array(
281:             "="
282:         ));
283:     }
284: 
285:     /**
286:      *
287:      * @return Ambigous <string, Ambigous, number, unknown_type>
288:      */
289:     function readValueInTag() {
290:         $ch = $this->currentChar();
291:         $value = "";
292: 
293:         if ($ch == "\"") {
294:             $this->skipInTag(array(
295:                 "\""
296:             ));
297:             $value = $this->skipToInTag(array(
298:                 "\""
299:             ));
300:             $this->skipInTag(array(
301:                 "\""
302:             ));
303:         } else if ($ch == "\'") {
304:             $this->skipInTag(array(
305:                 "\'"
306:             ));
307:             $value = $this->skipToInTag(array(
308:                 "\'"
309:             ));
310:             $this->skipInTag(array(
311:                 "\'"
312:             ));
313:         } else {
314:             $value = $this->skipToBlanksInTag();
315:         }
316: 
317:         return $value;
318:     }
319: 
320:     /**
321:      *
322:      * @return number string
323:      */
324:     function currentChar() {
325:         if ($this->iHtmlTextIndex >= $this->iHtmlTextLength) {
326:             return -1;
327:         }
328:         return $this->iHtmlText{$this->iHtmlTextIndex};
329:     }
330: 
331:     /**
332:      *
333:      * @return boolean
334:      */
335:     function moveNext() {
336:         if ($this->iHtmlTextIndex < $this->iHtmlTextLength) {
337:             $this->iHtmlTextIndex++;
338:             return true;
339:         } else {
340:             return false;
341:         }
342:     }
343: 
344:     /**
345:      *
346:      * @return string Ambigous number, number, unknown_type>
347:      */
348:     function skipEndOfTag() {
349:         $sb = "";
350:         if (($ch = $this->currentChar()) !== -1) {
351:             $match = ($ch == ">");
352:             if (!$match) {
353:                 return $sb;
354:             }
355:             $sb .= $ch;
356:             $this->moveNext();
357:         }
358:         return $sb;
359:     }
360: 
361:     /**
362:      *
363:      * @param string $chars
364:      * @return string Ambigous number, number, unknown_type>
365:      */
366:     function skipInTag($chars) {
367:         $sb = "";
368:         while (($ch = $this->currentChar()) !== -1) {
369:             if ($ch == ">") {
370:                 return $sb;
371:             } else {
372:                 $match = false;
373:                 for ($idx = 0; $idx < count($chars); $idx++) {
374:                     if ($ch == $chars[$idx]) {
375:                         $match = true;
376:                         break;
377:                     }
378:                 }
379:                 if (!$match) {
380:                     return $sb;
381:                 }
382:                 $sb .= $ch;
383:                 $this->moveNext();
384:             }
385:         }
386:         return $sb;
387:     }
388: 
389:     /**
390:      *
391:      * @param string $chars
392:      * @return string Ambigous number, number, unknown_type>
393:      */
394:     function skipToInTag($chars) {
395:         $sb = "";
396:         while (($ch = $this->currentChar()) !== -1) {
397:             $match = $ch == ">";
398:             if (!$match) {
399:                 for ($idx = 0; $idx < count($chars); $idx++) {
400:                     if ($ch == $chars[$idx]) {
401:                         $match = true;
402:                         break;
403:                     }
404:                 }
405:             }
406:             if ($match) {
407:                 return $sb;
408:             }
409:             $sb .= $ch;
410:             $this->moveNext();
411:         }
412:         return $sb;
413:     }
414: 
415:     /**
416:      *
417:      * @return string Ambigous number, number, unknown_type>
418:      */
419:     function skipToElement() {
420:         $sb = "";
421:         while (($ch = $this->currentChar()) !== -1) {
422:             if ($ch == "<") {
423:                 return $sb;
424:             }
425:             $sb .= $ch;
426:             $this->moveNext();
427:         }
428:         return $sb;
429:     }
430: 
431:     /**
432:      * Returns text between current position and $needle,
433:      * inclusive, or "" if not found.
434:      * The current index is moved to a point
435:      * after the location of $needle, or not moved at all
436:      * if nothing is found.
437:      *
438:      * @param string $needle
439:      * @return string
440:      */
441:     function skipToStringInTag($needle) {
442:         $pos = strpos($this->iHtmlText, $needle, $this->iHtmlTextIndex);
443:         if ($pos === false) {
444:             return "";
445:         }
446:         $top = $pos + strlen($needle);
447:         $retvalue = substr($this->iHtmlText, $this->iHtmlTextIndex, $top - $this->iHtmlTextIndex);
448:         $this->iHtmlTextIndex = $top;
449:         return $retvalue;
450:     }
451: }
452: 
CMS CONTENIDO 4.9.7 API documentation generated by ApiGen