1: <?php
2: 3: 4: 5: 6: 7: 8: 9: 10: 11: 12:
13:
14: defined('CON_FRAMEWORK') || die('Illegal call: Missing framework initialization - request aborted.');
15:
16: 17: 18: 19: 20: 21:
22: class cHTMLValidator {
23:
24: 25: 26: 27:
28: protected $_doubleTags = array(
29: "form",
30: "head",
31: "body",
32: "html",
33: "td",
34: "tr",
35: "table",
36: "a",
37: "tbody",
38: "title",
39: "container",
40: "span",
41: "div"
42: );
43:
44: 45: 46: 47:
48: public $missingNodes = array();
49:
50: 51: 52: 53: 54: 55:
56: public $iNodeName;
57:
58: 59: 60: 61:
62: protected $_html;
63:
64: 65: 66: 67:
68: protected $_nestingLevel = array();
69:
70: 71: 72: 73:
74: protected $_nestingNodes = array();
75:
76: 77: 78: 79:
80: protected $_existingTags = array();
81:
82: 83: 84: 85:
86: public function validate($html) {
87: $nestingLevel = 0;
88:
89:
90: $this->_html = $this->_cleanHTML($html);
91:
92: $htmlParser = new HtmlParser($this->_html);
93:
94: while ($htmlParser->parse()) {
95: $nodeName = $htmlParser->getNodeName();
96: $this->_existingTags[] = $nodeName;
97:
98: if (in_array($nodeName, $this->_doubleTags)) {
99: if (!array_key_exists($nodeName, $this->_nestingLevel)) {
100: $this->_nestingLevel[$nodeName] = 0;
101: }
102:
103: if (!array_key_exists($nodeName, $this->_nestingNodes)) {
104: $this->_nestingNodes[$nodeName][intval($this->_nestingLevel[$nodeName])] = array();
105: }
106:
107:
108: if ($htmlParser->getNodeType() == HtmlParser::NODE_TYPE_ELEMENT) {
109:
110:
111: $nestingLevel++;
112:
113: $this->_nestingNodes[$nodeName][intval($this->_nestingLevel[$nodeName])]["name"] = $htmlParser->getNodeAttributes('name');
114: $this->_nestingNodes[$nodeName][intval($this->_nestingLevel[$nodeName])]["id"] = $htmlParser->getNodeAttributes('id');
115: $this->_nestingNodes[$nodeName][intval($this->_nestingLevel[$nodeName])]["level"] = $nestingLevel;
116: $this->_nestingNodes[$nodeName][intval($this->_nestingLevel[$nodeName])]["char"] = $htmlParser->getHtmlTextIndex();
117: $this->_nestingLevel[$nodeName]++;
118: }
119:
120: if ($htmlParser->getNodeType() == HtmlParser::NODE_TYPE_ENDELEMENT) {
121:
122: if ($this->_nestingLevel[$nodeName] > 0) {
123: unset($this->_nestingNodes[$nodeName][$this->_nestingLevel[$nodeName]]);
124: $this->_nestingLevel[$nodeName]--;
125:
126: if ($this->_nestingNodes[$nodeName][intval($this->_nestingLevel[$nodeName])]["level"] != $nestingLevel) {
127:
128: }
129:
130: $nestingLevel--;
131: }
132: }
133: }
134: }
135:
136:
137: $this->missingNodes = array();
138:
139:
140: foreach ($this->_nestingLevel as $key => $value) {
141:
142: if ($value > 0) {
143:
144: for ($i = 0; $i < $value; $i++) {
145: $node = $this->_nestingNodes[$key][$i];
146:
147: list($line, $char) = $this->_getLineAndCharPos($node["char"]);
148: $this->missingNodes[] = array(
149: "tag" => $key,
150: "id" => $node["id"],
151: "name" => $node["name"],
152: "line" => $line,
153: "char" => $char
154: );
155:
156: $this->missingTags[$line][$char] = true;
157: }
158: }
159: }
160: }
161:
162: 163: 164: 165: 166:
167: public function tagExists($tag) {
168: if (in_array($tag, $this->_existingTags)) {
169: return true;
170: } else {
171: return false;
172: }
173: }
174:
175: 176: 177: 178: 179:
180: protected function _cleanHTML($html) {
181:
182: $resultingHTML = preg_replace('/<\?(php)?((.)|(\s))*?\?>/i', '', $html);
183:
184:
185:
186: $resultingHTML = str_replace("\r\n", "\n", $resultingHTML);
187: $resultingHTML = str_replace("\r", "\n", $resultingHTML);
188:
189: return $resultingHTML;
190: }
191:
192: 193: 194: 195: 196: 197:
198: protected function _returnErrorMap() {
199: $html = "<pre>";
200:
201: $chunks = explode("\n", $this->_html);
202:
203: foreach ($chunks as $key => $value) {
204: $html .= ($key + 1) . " ";
205:
206: for ($i = 0; $i < strlen($value); $i++) {
207: $char = substr($value, $i, 1);
208:
209: if (is_array($this->missingTags[$key + 1])) {
210:
211: if (array_key_exists($i + 2, $this->missingTags[$key + 1])) {
212: $html .= "<u><b>" . conHtmlSpecialChars($char) . "</b></u>";
213: } else {
214: $html .= conHtmlSpecialChars($char);
215: }
216: } else {
217: $html .= conHtmlSpecialChars($char);
218: }
219: }
220:
221: $html .= "<br>";
222: }
223:
224: return $html;
225: }
226:
227: 228: 229: 230: 231:
232: protected function _getLineAndCharPos($charpos) {
233: $mangled = substr($this->_html, 0, $charpos);
234:
235: $line = substr_count($mangled, "\n") + 1;
236: $char = $charpos - strrpos($mangled, "\n");
237:
238: return array($line, $char);
239: }
240: }
241: