1: <?php
2: 3: 4: 5: 6: 7: 8: 9: 10: 11: 12:
13:
14: defined('CON_FRAMEWORK') || die('Illegal call: Missing framework initialization - request aborted.');
15:
16: 17: 18: 19: 20: 21: 22: 23: 24: 25: 26: 27: 28: 29: 30: 31:
32: class HtmlParser {
33:
34: 35: 36: 37:
38: var $iNodeType;
39:
40: 41: 42: 43:
44: var $iNodeName = "";
45:
46: 47: 48: 49:
50: var $iNodeValue = "";
51:
52: 53: 54: 55: 56:
57: var $iNodeAttributes;
58:
59:
60:
61: var $iHtmlText;
62: var $iHtmlTextLength;
63: var $iHtmlTextIndex = 0;
64:
65: const NODE_TYPE_ELEMENT = 1;
66:
67: const NODE_TYPE_ENDELEMENT = 2;
68:
69: const NODE_TYPE_TEXT = 3;
70:
71: const = 4;
72:
73: const NODE_TYPE_DONE = 5;
74:
75: 76: 77: 78: 79:
80: function HtmlParser($aHtmlText) {
81: $this->iHtmlText = $aHtmlText;
82: $this->iHtmlTextLength = strlen($aHtmlText);
83: }
84:
85: 86: 87: 88: 89: 90:
91: function parse() {
92: $text = $this->skipToElement();
93: if ($text != "") {
94: $this->iNodeType = self::NODE_TYPE_TEXT;
95: $this->iNodeName = "Text";
96: $this->iNodeValue = $text;
97: return true;
98: }
99: return $this->readTag();
100: }
101:
102: function clearAttributes() {
103: $this->iNodeAttributes = array();
104: }
105:
106: function readTag() {
107: if ($this->currentChar() != "<") {
108: $this->iNodeType = self::NODE_TYPE_DONE;
109: return false;
110: }
111:
112: $this->skipInTag("<");
113: $this->clearAttributes();
114: $name = $this->skipToBlanksInTag();
115: $pos = strpos($name, "/");
116:
117: if ($pos === 0) {
118: $this->iNodeType = self::NODE_TYPE_ENDELEMENT;
119: $this->iNodeName = substr($name, 1);
120: $this->iNodeValue = "";
121: } else {
122: if (!$this->isValidTagIdentifier($name)) {
123: $comment = false;
124: if ($name == "!--") {
125: $rest = $this->skipToStringInTag("-->");
126: if ($rest != "") {
127: $this->iNodeType = self::NODE_TYPE_COMMENT;
128: $this->iNodeName = "Comment";
129: $this->iNodeValue = "<" . $name . $rest;
130: $comment = true;
131: }
132: }
133: if (!$comment) {
134: $this->iNodeType = self::NODE_TYPE_TEXT;
135: $this->iNodeName = "Text";
136: $this->iNodeValue = "<" . $name;
137: }
138: return true;
139: } else {
140: $this->iNodeType = self::NODE_TYPE_ELEMENT;
141: $this->iNodeValue = "";
142: $nameLength = strlen($name);
143: if ($nameLength > 0 && substr($name, $nameLength - 1, 1) == "/") {
144: $this->iNodeName = substr($name, 0, $nameLength - 1);
145: } else {
146: $this->iNodeName = $name;
147: }
148: }
149: }
150:
151: while ($this->skipBlanksInTag()) {
152: $attrName = $this->skipToBlanksOrEqualsInTag();
153: if ($attrName != "") {
154: $this->skipBlanksInTag();
155: if ($this->currentChar() == "=") {
156: $this->skipEqualsInTag();
157: $this->skipBlanksInTag();
158: $value = $this->readValueInTag();
159: $this->iNodeAttributes[strtolower($attrName)] = $value;
160: } else {
161: $this->iNodeAttributes[strtolower($attrName)] = "";
162: }
163: }
164: }
165: $this->skipEndOfTag();
166: return true;
167: }
168:
169: function isValidTagIdentifier($name) {
170: return preg_match('/[A-Za-z0-9]+/', $name);
171: }
172:
173: function skipBlanksInTag() {
174: return "" != ($this->skipInTag(array(" ", "\t", "\r", "\n")));
175: }
176:
177: function skipToBlanksOrEqualsInTag() {
178: return $this->skipToInTag(array(" ", "\t", "\r", "\n", "="));
179: }
180:
181: function skipToBlanksInTag() {
182: return $this->skipToInTag(array(" ", "\t", "\r", "\n"));
183: }
184:
185: function skipEqualsInTag() {
186: return $this->skipInTag(array("="));
187: }
188:
189: function readValueInTag() {
190: $ch = $this->currentChar();
191: $value = "";
192:
193: if ($ch == "\"") {
194: $this->skipInTag(array("\""));
195: $value = $this->skipToInTag(array("\""));
196: $this->skipInTag(array("\""));
197: } else if ($ch == "\'") {
198: $this->skipInTag(array("\'"));
199: $value = $this->skipToInTag(array("\'"));
200: $this->skipInTag(array("\'"));
201: } else {
202: $value = $this->skipToBlanksInTag();
203: }
204:
205: return $value;
206: }
207:
208: function currentChar() {
209: if ($this->iHtmlTextIndex >= $this->iHtmlTextLength) {
210: return -1;
211: }
212: return $this->iHtmlText{$this->iHtmlTextIndex};
213: }
214:
215: function moveNext() {
216: if ($this->iHtmlTextIndex < $this->iHtmlTextLength) {
217: $this->iHtmlTextIndex++;
218: return true;
219: } else {
220: return false;
221: }
222: }
223:
224: function skipEndOfTag() {
225: $sb = "";
226: if (($ch = $this->currentChar()) !== -1) {
227: $match = ($ch == ">");
228: if (!$match) {
229: return $sb;
230: }
231: $sb .= $ch;
232: $this->moveNext();
233: }
234: return $sb;
235: }
236:
237: function skipInTag($chars) {
238: $sb = "";
239: while (($ch = $this->currentChar()) !== -1) {
240: if ($ch == ">") {
241: return $sb;
242: } else {
243: $match = false;
244: for ($idx = 0; $idx < count($chars); $idx++) {
245: if ($ch == $chars[$idx]) {
246: $match = true;
247: break;
248: }
249: }
250: if (!$match) {
251: return $sb;
252: }
253: $sb .= $ch;
254: $this->moveNext();
255: }
256: }
257: return $sb;
258: }
259:
260: function skipToInTag($chars) {
261: $sb = "";
262: while (($ch = $this->currentChar()) !== -1) {
263: $match = $ch == ">";
264: if (!$match) {
265: for ($idx = 0; $idx < count($chars); $idx++) {
266: if ($ch == $chars[$idx]) {
267: $match = true;
268: break;
269: }
270: }
271: }
272: if ($match) {
273: return $sb;
274: }
275: $sb .= $ch;
276: $this->moveNext();
277: }
278: return $sb;
279: }
280:
281: function skipToElement() {
282: $sb = "";
283: while (($ch = $this->currentChar()) !== -1) {
284: if ($ch == "<") {
285: return $sb;
286: }
287: $sb .= $ch;
288: $this->moveNext();
289: }
290: return $sb;
291: }
292:
293: 294: 295: 296: 297: 298:
299: function skipToStringInTag($needle) {
300: $pos = strpos($this->iHtmlText, $needle, $this->iHtmlTextIndex);
301: if ($pos === false) {
302: return "";
303: }
304: $top = $pos + strlen($needle);
305: $retvalue = substr($this->iHtmlText, $this->iHtmlTextIndex, $top - $this->iHtmlTextIndex);
306: $this->iHtmlTextIndex = $top;
307: return $retvalue;
308: }
309:
310: }
311:
312: ?>