| 1 | <?php |
|---|
| 2 | /******************************************************************************* |
|---|
| 3 | Version: 1.10 ($Rev: 166 $) |
|---|
| 4 | Website: http://sourceforge.net/projects/simplehtmldom/ |
|---|
| 5 | Author: S.C. Chen <me578022@gmail.com> |
|---|
| 6 | Acknowledge: Jose Solorzano (https://sourceforge.net/projects/php-html/) |
|---|
| 7 | Contributions by: |
|---|
| 8 | Yousuke Kumakura (Attribute filters) |
|---|
| 9 | Vadim Voituk (Negative indexes supports of "find" method) |
|---|
| 10 | Antcs (Constructor with automatically load contents either text or file/url) |
|---|
| 11 | Licensed under The MIT License |
|---|
| 12 | Redistributions of files must retain the above copyright notice. |
|---|
| 13 | *******************************************************************************/ |
|---|
| 14 | |
|---|
| 15 | define('HDOM_TYPE_ELEMENT', 1); |
|---|
| 16 | define('HDOM_TYPE_COMMENT', 2); |
|---|
| 17 | define('HDOM_TYPE_TEXT', 3); |
|---|
| 18 | define('HDOM_TYPE_ENDTAG', 4); |
|---|
| 19 | define('HDOM_TYPE_ROOT', 5); |
|---|
| 20 | define('HDOM_TYPE_UNKNOWN', 6); |
|---|
| 21 | define('HDOM_QUOTE_DOUBLE', 0); |
|---|
| 22 | define('HDOM_QUOTE_SINGLE', 1); |
|---|
| 23 | define('HDOM_QUOTE_NO', 3); |
|---|
| 24 | define('HDOM_INFO_BEGIN', 0); |
|---|
| 25 | define('HDOM_INFO_END', 1); |
|---|
| 26 | define('HDOM_INFO_QUOTE', 2); |
|---|
| 27 | define('HDOM_INFO_SPACE', 3); |
|---|
| 28 | define('HDOM_INFO_TEXT', 4); |
|---|
| 29 | define('HDOM_INFO_INNER', 5); |
|---|
| 30 | define('HDOM_INFO_OUTER', 6); |
|---|
| 31 | define('HDOM_INFO_ENDSPACE',7); |
|---|
| 32 | |
|---|
| 33 | // helper functions |
|---|
| 34 | // ----------------------------------------------------------------------------- |
|---|
| 35 | // get html dom form file |
|---|
| 36 | function file_get_html() { |
|---|
| 37 | $dom = new simple_html_dom; |
|---|
| 38 | $args = func_get_args(); |
|---|
| 39 | $dom->load(call_user_func_array('file_get_contents', $args), true); |
|---|
| 40 | return $dom; |
|---|
| 41 | } |
|---|
| 42 | |
|---|
| 43 | // get html dom form string |
|---|
| 44 | function str_get_html($str, $lowercase=true) { |
|---|
| 45 | $dom = new simple_html_dom; |
|---|
| 46 | $dom->load($str, $lowercase); |
|---|
| 47 | return $dom; |
|---|
| 48 | } |
|---|
| 49 | |
|---|
| 50 | // get dom form file (deprecation) |
|---|
| 51 | function file_get_dom() { |
|---|
| 52 | $dom = new simple_html_dom; |
|---|
| 53 | $args = func_get_args(); |
|---|
| 54 | $dom->load(call_user_func_array('file_get_contents', $args), true); |
|---|
| 55 | return $dom; |
|---|
| 56 | } |
|---|
| 57 | |
|---|
| 58 | // get dom form string (deprecation) |
|---|
| 59 | function str_get_dom($str, $lowercase=true) { |
|---|
| 60 | $dom = new simple_html_dom; |
|---|
| 61 | $dom->load($str, $lowercase); |
|---|
| 62 | return $dom; |
|---|
| 63 | } |
|---|
| 64 | |
|---|
| 65 | // simple html dom node |
|---|
| 66 | // ----------------------------------------------------------------------------- |
|---|
| 67 | class simple_html_dom_node { |
|---|
| 68 | public $nodetype = HDOM_TYPE_TEXT; |
|---|
| 69 | public $tag = 'text'; |
|---|
| 70 | public $attr = array(); |
|---|
| 71 | public $children = array(); |
|---|
| 72 | public $nodes = array(); |
|---|
| 73 | public $parent = null; |
|---|
| 74 | public $_ = array(); |
|---|
| 75 | private $dom = null; |
|---|
| 76 | |
|---|
| 77 | function __construct($dom) { |
|---|
| 78 | $this->dom = $dom; |
|---|
| 79 | $dom->nodes[] = &$this; |
|---|
| 80 | } |
|---|
| 81 | |
|---|
| 82 | function __destruct() { |
|---|
| 83 | $this->clear(); |
|---|
| 84 | } |
|---|
| 85 | |
|---|
| 86 | function __toString() { |
|---|
| 87 | return $this->outertext(); |
|---|
| 88 | } |
|---|
| 89 | |
|---|
| 90 | // clean up memory due to php5 circular references memory leak... |
|---|
| 91 | function clear() { |
|---|
| 92 | $this->dom = null; |
|---|
| 93 | $this->nodes = null; |
|---|
| 94 | $this->parent = null; |
|---|
| 95 | $this->children = null; |
|---|
| 96 | } |
|---|
| 97 | |
|---|
| 98 | // returns the parent of node |
|---|
| 99 | function parent() { |
|---|
| 100 | return $this->parent; |
|---|
| 101 | } |
|---|
| 102 | |
|---|
| 103 | // returns children of node |
|---|
| 104 | function children($idx=-1) { |
|---|
| 105 | if ($idx===-1) return $this->children; |
|---|
| 106 | if (isset($this->children[$idx])) return $this->children[$idx]; |
|---|
| 107 | return null; |
|---|
| 108 | } |
|---|
| 109 | |
|---|
| 110 | // returns the first child of node |
|---|
| 111 | function first_child() { |
|---|
| 112 | if (count($this->children)>0) return $this->children[0]; |
|---|
| 113 | return null; |
|---|
| 114 | } |
|---|
| 115 | |
|---|
| 116 | // returns the last child of node |
|---|
| 117 | function last_child() { |
|---|
| 118 | if (($count=count($this->children))>0) return $this->children[$count-1]; |
|---|
| 119 | return null; |
|---|
| 120 | } |
|---|
| 121 | |
|---|
| 122 | // returns the next sibling of node |
|---|
| 123 | function next_sibling() { |
|---|
| 124 | if ($this->parent===null) return null; |
|---|
| 125 | $idx = 0; |
|---|
| 126 | $count = count($this->parent->children); |
|---|
| 127 | while ($idx<$count && $this!==$this->parent->children[$idx]) |
|---|
| 128 | ++$idx; |
|---|
| 129 | if (++$idx>=$count) return null; |
|---|
| 130 | return $this->parent->children[$idx]; |
|---|
| 131 | } |
|---|
| 132 | |
|---|
| 133 | // returns the previous sibling of node |
|---|
| 134 | function prev_sibling() { |
|---|
| 135 | if ($this->parent===null) return null; |
|---|
| 136 | $idx = 0; |
|---|
| 137 | $count = count($this->parent->children); |
|---|
| 138 | while ($idx<$count && $this!==$this->parent->children[$idx]) |
|---|
| 139 | ++$idx; |
|---|
| 140 | if (--$idx<0) return null; |
|---|
| 141 | return $this->parent->children[$idx]; |
|---|
| 142 | } |
|---|
| 143 | |
|---|
| 144 | // get dom node's inner html |
|---|
| 145 | function innertext() { |
|---|
| 146 | if (isset($this->_[HDOM_INFO_INNER])) return $this->_[HDOM_INFO_INNER]; |
|---|
| 147 | if (isset($this->_[HDOM_INFO_TEXT])) return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]); |
|---|
| 148 | |
|---|
| 149 | $ret = ''; |
|---|
| 150 | foreach($this->nodes as $n) |
|---|
| 151 | $ret .= $n->outertext(); |
|---|
| 152 | return $ret; |
|---|
| 153 | } |
|---|
| 154 | |
|---|
| 155 | // get dom node's outer text (with tag) |
|---|
| 156 | function outertext() { |
|---|
| 157 | if ($this->tag==='root') return $this->innertext(); |
|---|
| 158 | |
|---|
| 159 | // trigger callback |
|---|
| 160 | if ($this->dom->callback!==null) |
|---|
| 161 | call_user_func_array($this->dom->callback, array($this)); |
|---|
| 162 | |
|---|
| 163 | if (isset($this->_[HDOM_INFO_OUTER])) return $this->_[HDOM_INFO_OUTER]; |
|---|
| 164 | if (isset($this->_[HDOM_INFO_TEXT])) return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]); |
|---|
| 165 | |
|---|
| 166 | // render begin tag |
|---|
| 167 | $ret = $this->dom->nodes[$this->_[HDOM_INFO_BEGIN]]->makeup(); |
|---|
| 168 | |
|---|
| 169 | // render inner text |
|---|
| 170 | if (isset($this->_[HDOM_INFO_INNER])) |
|---|
| 171 | $ret .= $this->_[HDOM_INFO_INNER]; |
|---|
| 172 | else { |
|---|
| 173 | foreach($this->nodes as $n) |
|---|
| 174 | $ret .= $n->outertext(); |
|---|
| 175 | } |
|---|
| 176 | |
|---|
| 177 | // render end tag |
|---|
| 178 | if(isset($this->_[HDOM_INFO_END]) && $this->_[HDOM_INFO_END]!=0) |
|---|
| 179 | $ret .= '</'.$this->tag.'>'; |
|---|
| 180 | return $ret; |
|---|
| 181 | } |
|---|
| 182 | |
|---|
| 183 | // get dom node's plain text |
|---|
| 184 | function plaintext() { |
|---|
| 185 | if (isset($this->_[HDOM_INFO_INNER])) return $this->_[HDOM_INFO_INNER]; |
|---|
| 186 | switch ($this->nodetype) { |
|---|
| 187 | case HDOM_TYPE_TEXT: return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]); |
|---|
| 188 | case HDOM_TYPE_COMMENT: return ''; |
|---|
| 189 | case HDOM_TYPE_UNKNOWN: return ''; |
|---|
| 190 | } |
|---|
| 191 | if (strcasecmp($this->tag, 'script')===0) return ''; |
|---|
| 192 | if (strcasecmp($this->tag, 'style')===0) return ''; |
|---|
| 193 | |
|---|
| 194 | $ret = ''; |
|---|
| 195 | foreach($this->nodes as $n) |
|---|
| 196 | $ret .= $n->plaintext(); |
|---|
| 197 | return $ret; |
|---|
| 198 | } |
|---|
| 199 | |
|---|
| 200 | // build node's text with tag |
|---|
| 201 | function makeup() { |
|---|
| 202 | // text, comment, unknown |
|---|
| 203 | if (isset($this->_[HDOM_INFO_TEXT])) return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]); |
|---|
| 204 | |
|---|
| 205 | $ret = '<'.$this->tag; |
|---|
| 206 | $i = -1; |
|---|
| 207 | |
|---|
| 208 | foreach($this->attr as $key=>$val) { |
|---|
| 209 | ++$i; |
|---|
| 210 | |
|---|
| 211 | // skip removed attribute |
|---|
| 212 | if ($val===null || $val===false) |
|---|
| 213 | continue; |
|---|
| 214 | |
|---|
| 215 | $ret .= $this->_[HDOM_INFO_SPACE][$i][0]; |
|---|
| 216 | //no value attr: nowrap, checked selected... |
|---|
| 217 | if ($val===true) |
|---|
| 218 | $ret .= $key; |
|---|
| 219 | else { |
|---|
| 220 | switch($this->_[HDOM_INFO_QUOTE][$i]) { |
|---|
| 221 | case HDOM_QUOTE_DOUBLE: $quote = '"'; break; |
|---|
| 222 | case HDOM_QUOTE_SINGLE: $quote = '\''; break; |
|---|
| 223 | default: $quote = ''; |
|---|
| 224 | } |
|---|
| 225 | $ret .= $key.$this->_[HDOM_INFO_SPACE][$i][1].'='.$this->_[HDOM_INFO_SPACE][$i][2].$quote.$val.$quote; |
|---|
| 226 | } |
|---|
| 227 | } |
|---|
| 228 | $ret = $this->dom->restore_noise($ret); |
|---|
| 229 | return $ret . $this->_[HDOM_INFO_ENDSPACE] . '>'; |
|---|
| 230 | } |
|---|
| 231 | |
|---|
| 232 | // find elements by css selector |
|---|
| 233 | function find($selector, $idx=null) { |
|---|
| 234 | $selectors = $this->parse_selector($selector); |
|---|
| 235 | |
|---|
| 236 | if (($count=count($selectors))===0) return array(); |
|---|
| 237 | $found_keys = array(); |
|---|
| 238 | |
|---|
| 239 | // find each selector |
|---|
| 240 | for ($c=0; $c<$count; ++$c) { |
|---|
| 241 | if (($levle=count($selectors[0]))===0) return array(); |
|---|
| 242 | if (!isset($this->_[HDOM_INFO_BEGIN])) return array(); |
|---|
| 243 | |
|---|
| 244 | $head = array($this->_[HDOM_INFO_BEGIN]=>1); |
|---|
| 245 | |
|---|
| 246 | // handle descendant selectors, no recursive! |
|---|
| 247 | for ($l=0; $l<$levle; ++$l) { |
|---|
| 248 | $ret = array(); |
|---|
| 249 | foreach($head as $k=>$v) { |
|---|
| 250 | $n = ($k===-1) ? $this->dom->root : $this->dom->nodes[$k]; |
|---|
| 251 | $n->seek($selectors[$c][$l], $ret); |
|---|
| 252 | } |
|---|
| 253 | $head = $ret; |
|---|
| 254 | } |
|---|
| 255 | |
|---|
| 256 | foreach($head as $k=>$v) { |
|---|
| 257 | if (!isset($found_keys[$k])) |
|---|
| 258 | $found_keys[$k] = 1; |
|---|
| 259 | } |
|---|
| 260 | } |
|---|
| 261 | |
|---|
| 262 | // sort keys |
|---|
| 263 | ksort($found_keys); |
|---|
| 264 | |
|---|
| 265 | $found = array(); |
|---|
| 266 | foreach($found_keys as $k=>$v) |
|---|
| 267 | $found[] = $this->dom->nodes[$k]; |
|---|
| 268 | |
|---|
| 269 | // return nth-element or array |
|---|
| 270 | if (is_null($idx)) return $found; |
|---|
| 271 | else if ($idx<0) $idx = count($found) + $idx; |
|---|
| 272 | return (isset($found[$idx])) ? $found[$idx] : null; |
|---|
| 273 | } |
|---|
| 274 | |
|---|
| 275 | // seek for given conditions |
|---|
| 276 | protected function seek($selector, &$ret) { |
|---|
| 277 | list($tag, $key, $val, $exp, $no_key) = $selector; |
|---|
| 278 | |
|---|
| 279 | $end = (!empty($this->_[HDOM_INFO_END])) ? $this->_[HDOM_INFO_END] : 0; |
|---|
| 280 | if ($end==0) { |
|---|
| 281 | $parent = $this->parent; |
|---|
| 282 | while (!isset($parent->_[HDOM_INFO_END]) && $parent!==null) { |
|---|
| 283 | $end -= 1; |
|---|
| 284 | $parent = $parent->parent; |
|---|
| 285 | } |
|---|
| 286 | $end += $parent->_[HDOM_INFO_END]; |
|---|
| 287 | } |
|---|
| 288 | |
|---|
| 289 | for($i=$this->_[HDOM_INFO_BEGIN]+1; $i<$end; ++$i) { |
|---|
| 290 | $node = $this->dom->nodes[$i]; |
|---|
| 291 | $pass = true; |
|---|
| 292 | |
|---|
| 293 | if ($tag==='*' && !$key) { |
|---|
| 294 | if (in_array($node, $this->children, true)) |
|---|
| 295 | $ret[$i] = 1; |
|---|
| 296 | continue; |
|---|
| 297 | } |
|---|
| 298 | |
|---|
| 299 | // compare tag |
|---|
| 300 | if ($tag && $tag!=$node->tag && $tag!=='*') {$pass=false;} |
|---|
| 301 | // compare key |
|---|
| 302 | if ($pass && $key) { |
|---|
| 303 | if ($no_key) { |
|---|
| 304 | if (isset($node->attr[$key])) $pass=false; |
|---|
| 305 | } |
|---|
| 306 | else if (!isset($node->attr[$key])) $pass=false; |
|---|
| 307 | } |
|---|
| 308 | // compare value |
|---|
| 309 | if ($pass && $key && $val && $val!=='*') { |
|---|
| 310 | $check = $this->match($exp, $val, $node->attr[$key]); |
|---|
| 311 | // handle multiple class |
|---|
| 312 | if (!$check && strcasecmp($key, 'class')===0) { |
|---|
| 313 | foreach(explode(' ',$node->attr[$key]) as $k) { |
|---|
| 314 | $check = $this->match($exp, $val, $k); |
|---|
| 315 | if ($check) break; |
|---|
| 316 | } |
|---|
| 317 | } |
|---|
| 318 | if (!$check) $pass = false; |
|---|
| 319 | } |
|---|
| 320 | if ($pass) $ret[$i] = 1; |
|---|
| 321 | unset($node); |
|---|
| 322 | } |
|---|
| 323 | } |
|---|
| 324 | |
|---|
| 325 | protected function match($exp, $pattern, $value) { |
|---|
| 326 | $check = true; |
|---|
| 327 | switch ($exp) { |
|---|
| 328 | case '=': |
|---|
| 329 | $check = ($value===$pattern) ? true : false; break; |
|---|
| 330 | case '!=': |
|---|
| 331 | $check = ($value!==$pattern) ? true : false; break; |
|---|
| 332 | case '^=': |
|---|
| 333 | $check = (preg_match("/^".preg_quote($pattern,'/')."/", $value)) ? true : false; break; |
|---|
| 334 | case '$=': |
|---|
| 335 | $check = (preg_match("/".preg_quote($pattern,'/')."$/", $value)) ? true : false; break; |
|---|
| 336 | case '*=': |
|---|
| 337 | $check = (preg_match("/".preg_quote($pattern,'/')."/i", $value)) ? true : false; break; |
|---|
| 338 | } |
|---|
| 339 | return $check; |
|---|
| 340 | } |
|---|
| 341 | |
|---|
| 342 | protected function parse_selector($selector_string) { |
|---|
| 343 | // pattern of CSS selectors, modified from mootools |
|---|
| 344 | $pattern = "/([\w-:\*]*)(?:\#([\w-]+)|\.([\w-]+))?(?:\[(!?[\w-]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([, ]+)/is"; |
|---|
| 345 | preg_match_all($pattern, trim($selector_string).' ', $matches, PREG_SET_ORDER); |
|---|
| 346 | $selectors = array(); |
|---|
| 347 | $result = array(); |
|---|
| 348 | |
|---|
| 349 | foreach ($matches as $m) { |
|---|
| 350 | if (trim($m[0])==='') continue; |
|---|
| 351 | |
|---|
| 352 | list($tag, $key, $val, $exp, $no_key) = array($m[1], null, null, '=', false); |
|---|
| 353 | if(!empty($m[2])) {$key='id'; $val=$m[2];} |
|---|
| 354 | if(!empty($m[3])) {$key='class'; $val=$m[3];} |
|---|
| 355 | if(!empty($m[4])) {$key=$m[4];} |
|---|
| 356 | if(!empty($m[5])) {$exp=$m[5];} |
|---|
| 357 | if(!empty($m[6])) {$val=$m[6];} |
|---|
| 358 | |
|---|
| 359 | // convert to lowercase |
|---|
| 360 | if ($this->dom->lowercase) {$tag=strtolower($tag); $key=strtolower($key);} |
|---|
| 361 | //elements that do NOT have the specified attribute |
|---|
| 362 | if (isset($key[0]) && $key[0]==='!') {$key=substr($key, 1); $no_key=true;} |
|---|
| 363 | |
|---|
| 364 | $result[] = array($tag, $key, $val, $exp, $no_key); |
|---|
| 365 | if (trim($m[7])===',') { |
|---|
| 366 | $selectors[] = $result; |
|---|
| 367 | $result = array(); |
|---|
| 368 | } |
|---|
| 369 | } |
|---|
| 370 | if (count($result)>0) |
|---|
| 371 | $selectors[] = $result; |
|---|
| 372 | |
|---|
| 373 | return $selectors; |
|---|
| 374 | } |
|---|
| 375 | |
|---|
| 376 | function __get($name) { |
|---|
| 377 | if (isset($this->attr[$name])) return $this->attr[$name]; |
|---|
| 378 | switch($name) { |
|---|
| 379 | case 'outertext': return $this->outertext(); |
|---|
| 380 | case 'innertext': return $this->innertext(); |
|---|
| 381 | case 'plaintext': return $this->plaintext(); |
|---|
| 382 | default: return array_key_exists($name, $this->attr); |
|---|
| 383 | } |
|---|
| 384 | } |
|---|
| 385 | |
|---|
| 386 | function __set($name, $value) { |
|---|
| 387 | switch($name) { |
|---|
| 388 | case 'outertext': return $this->_[HDOM_INFO_OUTER] = $value; |
|---|
| 389 | case 'innertext': |
|---|
| 390 | if (isset($this->_[HDOM_INFO_TEXT])) return $this->_[HDOM_INFO_TEXT] = $value; |
|---|
| 391 | return $this->_[HDOM_INFO_INNER] = $value; |
|---|
| 392 | } |
|---|
| 393 | if (!isset($this->attr[$name])) { |
|---|
| 394 | $this->_[HDOM_INFO_SPACE][] = array(' ', '', ''); |
|---|
| 395 | $this->_[HDOM_INFO_QUOTE][] = HDOM_QUOTE_DOUBLE; |
|---|
| 396 | } |
|---|
| 397 | $this->attr[$name] = $value; |
|---|
| 398 | } |
|---|
| 399 | |
|---|
| 400 | function __isset($name) { |
|---|
| 401 | switch($name) { |
|---|
| 402 | case 'outertext': return true; |
|---|
| 403 | case 'innertext': return true; |
|---|
| 404 | case 'plaintext': return true; |
|---|
| 405 | } |
|---|
| 406 | //no value attr: nowrap, checked selected... |
|---|
| 407 | return (array_key_exists($name, $this->attr)) ? true : isset($this->attr[$name]); |
|---|
| 408 | } |
|---|
| 409 | |
|---|
| 410 | function __unset($name) { |
|---|
| 411 | if (isset($this->attr[$name])) |
|---|
| 412 | unset($this->attr[$name]); |
|---|
| 413 | } |
|---|
| 414 | |
|---|
| 415 | // camel naming conventions |
|---|
| 416 | function getAllAttributes() {return $this->attr;} |
|---|
| 417 | function getAttribute($name) {return $this->__get($name);} |
|---|
| 418 | function setAttribute($name, $value) {$this->__set($name, $value);} |
|---|
| 419 | function hasAttribute($name) {return $this->__isset($name);} |
|---|
| 420 | function removeAttribute($name) {$this->__set($name, null);} |
|---|
| 421 | function getElementById($id) {return $this->find("#$id", 0);} |
|---|
| 422 | function getElementsById($id, $idx=null) {return $this->find("#$id", $idx);} |
|---|
| 423 | function getElementByTagName($name) {return $this->find($name, 0);} |
|---|
| 424 | function getElementsByTagName($name, $idx=null) {return $this->find($name, $idx);} |
|---|
| 425 | function parentNode() {return $this->parent();} |
|---|
| 426 | function childNodes($idx=-1) {return $this->children($idx);} |
|---|
| 427 | function firstChild() {return $this->first_child();} |
|---|
| 428 | function lastChild() {return $this->last_child();} |
|---|
| 429 | function nextSibling() {return $this->next_sibling();} |
|---|
| 430 | function previousSibling() {return $this->prev_sibling();} |
|---|
| 431 | } |
|---|
| 432 | |
|---|
| 433 | // simple html dom parser |
|---|
| 434 | // ----------------------------------------------------------------------------- |
|---|
| 435 | class simple_html_dom { |
|---|
| 436 | public $root = null; |
|---|
| 437 | public $nodes = array(); |
|---|
| 438 | public $callback = null; |
|---|
| 439 | public $lowercase = false; |
|---|
| 440 | protected $pos; |
|---|
| 441 | protected $doc; |
|---|
| 442 | protected $char; |
|---|
| 443 | protected $size; |
|---|
| 444 | protected $cursor; |
|---|
| 445 | protected $parent; |
|---|
| 446 | protected $noise = array(); |
|---|
| 447 | protected $token_blank = " \t\r\n"; |
|---|
| 448 | protected $token_equal = ' =/><'; |
|---|
| 449 | protected $token_slash = " />\r\n\t"; |
|---|
| 450 | protected $token_attr = ' >'; |
|---|
| 451 | // use isset instead of in_array, performance boost about 30%... |
|---|
| 452 | protected $self_closing_tags = array('img'=>1, 'br'=>1, 'input'=>1, 'meta'=>1, 'link'=>1, 'hr'=>1, 'base'=>1, 'embed'=>1, 'spacer'=>1); |
|---|
| 453 | protected $block_tags = array('root'=>1, 'body'=>1, 'form'=>1, 'div'=>1, 'span'=>1, 'table'=>1); |
|---|
| 454 | protected $optional_closing_tags = array( |
|---|
| 455 | 'tr'=>array('tr'=>1, 'td'=>1, 'th'=>1), |
|---|
| 456 | 'th'=>array('th'=>1), |
|---|
| 457 | 'td'=>array('td'=>1), |
|---|
| 458 | 'li'=>array('li'=>1), |
|---|
| 459 | 'dt'=>array('dt'=>1, 'dd'=>1), |
|---|
| 460 | 'dd'=>array('dd'=>1, 'dt'=>1), |
|---|
| 461 | 'dl'=>array('dd'=>1, 'dt'=>1), |
|---|
| 462 | 'p'=>array('p'=>1), |
|---|
| 463 | 'nobr'=>array('nobr'=>1), |
|---|
| 464 | ); |
|---|
| 465 | |
|---|
| 466 | function __construct($str=null) { |
|---|
| 467 | if ($str) { |
|---|
| 468 | if (preg_match("/^http:\/\//i",$str) || is_file($str)) |
|---|
| 469 | $this->load_file($str); |
|---|
| 470 | else |
|---|
| 471 | $this->load($str); |
|---|
| 472 | } |
|---|
| 473 | } |
|---|
| 474 | |
|---|
| 475 | function __destruct() { |
|---|
| 476 | $this->clear(); |
|---|
| 477 | } |
|---|
| 478 | |
|---|
| 479 | // load html from string |
|---|
| 480 | function load($str, $lowercase=true) { |
|---|
| 481 | // prepare |
|---|
| 482 | $this->prepare($str, $lowercase); |
|---|
| 483 | // strip out comments |
|---|
| 484 | $this->remove_noise("'<!--(.*?)-->'is"); |
|---|
| 485 | // strip out <style> tags |
|---|
| 486 | $this->remove_noise("'<\s*style[^>]*[^/]>(.*?)<\s*/\s*style\s*>'is"); |
|---|
| 487 | $this->remove_noise("'<\s*style\s*>(.*?)<\s*/\s*style\s*>'is"); |
|---|
| 488 | // strip out <script> tags |
|---|
| 489 | $this->remove_noise("'<\s*script[^>]*[^/]>(.*?)<\s*/\s*script\s*>'is"); |
|---|
| 490 | $this->remove_noise("'<\s*script\s*>(.*?)<\s*/\s*script\s*>'is"); |
|---|
| 491 | // strip out preformatted tags |
|---|
| 492 | $this->remove_noise("'<\s*(?:pre|code)[^>]*>(.*?)<\s*/\s*(?:pre|code)\s*>'is"); |
|---|
| 493 | // strip out server side scripts |
|---|
| 494 | $this->remove_noise("'(<\?)(.*?)(\?>)'is", true); |
|---|
| 495 | |
|---|
| 496 | //echo $this->doc; |
|---|
| 497 | //die; |
|---|
| 498 | |
|---|
| 499 | // parsing |
|---|
| 500 | while ($this->parse()); |
|---|
| 501 | // end |
|---|
| 502 | $this->root->_[HDOM_INFO_END] = $this->cursor; |
|---|
| 503 | } |
|---|
| 504 | |
|---|
| 505 | // load html from file |
|---|
| 506 | function load_file() { |
|---|
| 507 | $args = func_get_args(); |
|---|
| 508 | $this->load(call_user_func_array('file_get_contents', $args), true); |
|---|
| 509 | } |
|---|
| 510 | |
|---|
| 511 | // set callback function |
|---|
| 512 | function set_callback($function_name) { |
|---|
| 513 | $this->callback = $function_name; |
|---|
| 514 | } |
|---|
| 515 | |
|---|
| 516 | // remove callback function |
|---|
| 517 | function remove_callback() { |
|---|
| 518 | $this->callback = null; |
|---|
| 519 | } |
|---|
| 520 | |
|---|
| 521 | // save dom as string |
|---|
| 522 | function save($filepath='') { |
|---|
| 523 | $ret = $this->root->innertext(); |
|---|
| 524 | if ($filepath!=='') file_put_contents($filepath, $ret); |
|---|
| 525 | return $ret; |
|---|
| 526 | } |
|---|
| 527 | |
|---|
| 528 | // find dom node by css selector |
|---|
| 529 | function find($selector, $idx=null) { |
|---|
| 530 | return $this->root->find($selector, $idx); |
|---|
| 531 | } |
|---|
| 532 | |
|---|
| 533 | // clean up memory due to php5 circular references memory leak... |
|---|
| 534 | function clear() { |
|---|
| 535 | foreach($this->nodes as $n) {$n->clear(); $n = null;} |
|---|
| 536 | if (isset($this->parent)) {$this->parent->clear(); unset($this->parent);} |
|---|
| 537 | if (isset($this->root)) {$this->root->clear(); unset($this->root);} |
|---|
| 538 | unset($this->doc); |
|---|
| 539 | unset($this->noise); |
|---|
| 540 | } |
|---|
| 541 | |
|---|
| 542 | // prepare HTML data and init everything |
|---|
| 543 | protected function prepare($str, $lowercase=true) { |
|---|
| 544 | $this->clear(); |
|---|
| 545 | $this->doc = $str; |
|---|
| 546 | $this->pos = 0; |
|---|
| 547 | $this->cursor = 1; |
|---|
| 548 | $this->noise = array(); |
|---|
| 549 | $this->nodes = array(); |
|---|
| 550 | $this->lowercase = $lowercase; |
|---|
| 551 | $this->root = new simple_html_dom_node($this); |
|---|
| 552 | $this->root->tag = 'root'; |
|---|
| 553 | $this->root->_[HDOM_INFO_BEGIN] = -1; |
|---|
| 554 | $this->root->nodetype = HDOM_TYPE_ROOT; |
|---|
| 555 | $this->parent = $this->root; |
|---|
| 556 | // set the length of content |
|---|
| 557 | $this->size = strlen($str); |
|---|
| 558 | if ($this->size>0) $this->char = $this->doc[0]; |
|---|
| 559 | } |
|---|
| 560 | |
|---|
| 561 | // parse html content |
|---|
| 562 | protected function parse() { |
|---|
| 563 | if (($s = $this->copy_until_char('<'))==='') |
|---|
| 564 | return $this->read_tag(); |
|---|
| 565 | |
|---|
| 566 | // text |
|---|
| 567 | $node = new simple_html_dom_node($this); |
|---|
| 568 | ++$this->cursor; |
|---|
| 569 | $node->_[HDOM_INFO_TEXT] = $s; |
|---|
| 570 | $this->link_nodes($node, false); |
|---|
| 571 | return true; |
|---|
| 572 | } |
|---|
| 573 | |
|---|
| 574 | // read tag info |
|---|
| 575 | protected function read_tag() { |
|---|
| 576 | if ($this->char!=='<') { |
|---|
| 577 | $this->root->_[HDOM_INFO_END] = $this->cursor; |
|---|
| 578 | return false; |
|---|
| 579 | } |
|---|
| 580 | $this->char = (++$this->pos<$this->size) ? $this->doc[$this->pos] : null; // next |
|---|
| 581 | |
|---|
| 582 | // end tag |
|---|
| 583 | if ($this->char==='/') { |
|---|
| 584 | $this->char = (++$this->pos<$this->size) ? $this->doc[$this->pos] : null; // next |
|---|
| 585 | $this->skip($this->token_blank_t); |
|---|
| 586 | $tag = $this->copy_until_char('>'); |
|---|
| 587 | |
|---|
| 588 | // skip attributes in end tag |
|---|
| 589 | if (($pos = strpos($tag, ' '))!==false) |
|---|
| 590 | $tag = substr($tag, 0, $pos); |
|---|
| 591 | |
|---|
| 592 | $parent_lower = strtolower($this->parent->tag); |
|---|
| 593 | $tag_lower = strtolower($tag); |
|---|
| 594 | |
|---|
| 595 | if ($parent_lower!==$tag_lower) { |
|---|
| 596 | if (isset($this->optional_closing_tags[$parent_lower]) && isset($this->block_tags[$tag_lower])) { |
|---|
| 597 | $this->parent->_[HDOM_INFO_END] = 0; |
|---|
| 598 | $org_parent = $this->parent; |
|---|
| 599 | |
|---|
| 600 | while (($this->parent->parent) && strtolower($this->parent->tag)!==$tag_lower) |
|---|
| 601 | $this->parent = $this->parent->parent; |
|---|
| 602 | |
|---|
| 603 | if (strtolower($this->parent->tag)!==$tag_lower) { |
|---|
| 604 | $this->parent = $org_parent; // restore origonal parent |
|---|
| 605 | if ($this->parent->parent) $this->parent = $this->parent->parent; |
|---|
| 606 | $this->parent->_[HDOM_INFO_END] = $this->cursor; |
|---|
| 607 | return $this->as_text_node($tag); |
|---|
| 608 | } |
|---|
| 609 | } |
|---|
| 610 | else if (($this->parent->parent) && strtolower($this->parent->parent->tag)===$tag_lower) { |
|---|
| 611 | $this->parent->_[HDOM_INFO_END] = 0; |
|---|
| 612 | $this->parent = $this->parent->parent; |
|---|
| 613 | } |
|---|
| 614 | else |
|---|
| 615 | return $this->as_text_node($tag); |
|---|
| 616 | } |
|---|
| 617 | |
|---|
| 618 | $this->parent->_[HDOM_INFO_END] = $this->cursor; |
|---|
| 619 | if ($this->parent->parent) $this->parent = $this->parent->parent; |
|---|
| 620 | |
|---|
| 621 | $this->char = (++$this->pos<$this->size) ? $this->doc[$this->pos] : null; // next |
|---|
| 622 | return true; |
|---|
| 623 | } |
|---|
| 624 | |
|---|
| 625 | $node = new simple_html_dom_node($this); |
|---|
| 626 | $node->_[HDOM_INFO_BEGIN] = $this->cursor; |
|---|
| 627 | ++$this->cursor; |
|---|
| 628 | $tag = $this->copy_until($this->token_slash); |
|---|
| 629 | |
|---|
| 630 | // doctype, cdata & comments... |
|---|
| 631 | if (isset($tag[0]) && $tag[0]==='!') { |
|---|
| 632 | $node->_[HDOM_INFO_TEXT] = '<' . $tag . $this->copy_until_char('>'); |
|---|
| 633 | |
|---|
| 634 | if (isset($tag[2]) && $tag[1]==='-' && $tag[2]==='-') { |
|---|
| 635 | $node->nodetype = HDOM_TYPE_COMMENT; |
|---|
| 636 | $node->tag = 'comment'; |
|---|
| 637 | } else { |
|---|
| 638 | $node->nodetype = HDOM_TYPE_UNKNOWN; |
|---|
| 639 | $node->tag = 'unknown'; |
|---|
| 640 | } |
|---|
| 641 | |
|---|
| 642 | if ($this->char==='>') $node->_[HDOM_INFO_TEXT].='>'; |
|---|
| 643 | $this->link_nodes($node, false); |
|---|
| 644 | $this->char = (++$this->pos<$this->size) ? $this->doc[$this->pos] : null; // next |
|---|
| 645 | return true; |
|---|
| 646 | } |
|---|
| 647 | |
|---|
| 648 | // text |
|---|
| 649 | if (!preg_match("/^[\w-:]+$/", $tag)) { |
|---|
| 650 | $node->_[HDOM_INFO_TEXT] = '<' . $tag . $this->copy_until('<>'); |
|---|
| 651 | if ($this->char==='<') { |
|---|
| 652 | $this->link_nodes($node, false); |
|---|
| 653 | return true; |
|---|
| 654 | } |
|---|
| 655 | |
|---|
| 656 | if ($this->char==='>') $node->_[HDOM_INFO_TEXT].='>'; |
|---|
| 657 | $this->link_nodes($node, false); |
|---|
| 658 | $this->char = (++$this->pos<$this->size) ? $this->doc[$this->pos] : null; // next |
|---|
| 659 | return true; |
|---|
| 660 | } |
|---|
| 661 | |
|---|
| 662 | // begin tag |
|---|
| 663 | $node->nodetype = HDOM_TYPE_ELEMENT; |
|---|
| 664 | $tag_lower = strtolower($tag); |
|---|
| 665 | $node->tag = ($this->lowercase) ? $tag_lower : $tag; |
|---|
| 666 | |
|---|
| 667 | // handle optional closing tags |
|---|
| 668 | if (isset($this->optional_closing_tags[$tag_lower]) ) { |
|---|
| 669 | while (isset($this->optional_closing_tags[$tag_lower][strtolower($this->parent->tag)])) { |
|---|
| 670 | $this->parent->_[HDOM_INFO_END] = 0; |
|---|
| 671 | $this->parent = $this->parent->parent; |
|---|
| 672 | } |
|---|
| 673 | $node->parent = $this->parent; |
|---|
| 674 | } |
|---|
| 675 | $this->link_nodes($node, true); |
|---|
| 676 | |
|---|
| 677 | $guard = 0; // prevent infinity loop |
|---|
| 678 | $space = array($this->copy_skip($this->token_blank), '', ''); |
|---|
| 679 | |
|---|
| 680 | // attributes |
|---|
| 681 | do { |
|---|
| 682 | if ($this->char!==null && $space[0]==='') break; |
|---|
| 683 | $name = $this->copy_until($this->token_equal); |
|---|
| 684 | |
|---|
| 685 | if($guard===$this->pos) { |
|---|
| 686 | $this->char = (++$this->pos<$this->size) ? $this->doc[$this->pos] : null; // next |
|---|
| 687 | continue; |
|---|
| 688 | } |
|---|
| 689 | $guard = $this->pos; |
|---|
| 690 | |
|---|
| 691 | // handle endless '<' |
|---|
| 692 | if($this->pos>=$this->size-1 && $this->char!=='>') { |
|---|
| 693 | $node->nodetype = HDOM_TYPE_TEXT; |
|---|
| 694 | $node->_[HDOM_INFO_END] = 0; |
|---|
| 695 | $node->_[HDOM_INFO_TEXT] = '<'.$tag . $space[0] . $name; |
|---|
| 696 | $node->tag = 'text'; |
|---|
| 697 | return true; |
|---|
| 698 | } |
|---|
| 699 | |
|---|
| 700 | if ($name!=='/' && $name!=='') { |
|---|
| 701 | $space[1] = $this->copy_skip($this->token_blank); |
|---|
| 702 | if ($this->lowercase) $name = strtolower($name); |
|---|
| 703 | if ($this->char==='=') { |
|---|
| 704 | $this->char = (++$this->pos<$this->size) ? $this->doc[$this->pos] : null; // next |
|---|
| 705 | $this->parse_attr($node, $name, $space); |
|---|
| 706 | } |
|---|
| 707 | else { |
|---|
| 708 | //no value attr: nowrap, checked selected... |
|---|
| 709 | $node->_[HDOM_INFO_QUOTE][] = HDOM_QUOTE_NO; |
|---|
| 710 | $node->attr[$name] = true; |
|---|
| 711 | if ($this->char!='>') $this->char = $this->doc[--$this->pos]; // prev |
|---|
| 712 | } |
|---|
| 713 | $node->_[HDOM_INFO_SPACE][] = $space; |
|---|
| 714 | $space = array($this->copy_skip($this->token_blank), '', ''); |
|---|
| 715 | } |
|---|
| 716 | else |
|---|
| 717 | break; |
|---|
| 718 | } while($this->char!=='>' && $this->char!=='/'); |
|---|
| 719 | |
|---|
| 720 | $node->_[HDOM_INFO_ENDSPACE] = $space[0]; |
|---|
| 721 | |
|---|
| 722 | // check self closing |
|---|
| 723 | if ($this->copy_until_char_escape('>')==='/') { |
|---|
| 724 | $node->_[HDOM_INFO_ENDSPACE] .= '/'; |
|---|
| 725 | $node->_[HDOM_INFO_END] = 0; |
|---|
| 726 | } |
|---|
| 727 | else { |
|---|
| 728 | // reset parent |
|---|
| 729 | if (!isset($this->self_closing_tags[strtolower($node->tag)])) $this->parent = $node; |
|---|
| 730 | } |
|---|
| 731 | $this->char = (++$this->pos<$this->size) ? $this->doc[$this->pos] : null; // next |
|---|
| 732 | return true; |
|---|
| 733 | } |
|---|
| 734 | |
|---|
| 735 | // parse attributes |
|---|
| 736 | protected function parse_attr($node, $name, &$space) { |
|---|
| 737 | $space[2] = $this->copy_skip($this->token_blank); |
|---|
| 738 | switch($this->char) { |
|---|
| 739 | case '"': |
|---|
| 740 | $node->_[HDOM_INFO_QUOTE][] = HDOM_QUOTE_DOUBLE; |
|---|
| 741 | $this->char = (++$this->pos<$this->size) ? $this->doc[$this->pos] : null; // next |
|---|
| 742 | $node->attr[$name] = $this->restore_noise($this->copy_until_char_escape('"')); |
|---|
| 743 | $this->char = (++$this->pos<$this->size) ? $this->doc[$this->pos] : null; // next |
|---|
| 744 | break; |
|---|
| 745 | case '\'': |
|---|
| 746 | $node->_[HDOM_INFO_QUOTE][] = HDOM_QUOTE_SINGLE; |
|---|
| 747 | $this->char = (++$this->pos<$this->size) ? $this->doc[$this->pos] : null; // next |
|---|
| 748 | $node->attr[$name] = $this->restore_noise($this->copy_until_char_escape('\'')); |
|---|
| 749 | $this->char = (++$this->pos<$this->size) ? $this->doc[$this->pos] : null; // next |
|---|
| 750 | break; |
|---|
| 751 | default: |
|---|
| 752 | $node->_[HDOM_INFO_QUOTE][] = HDOM_QUOTE_NO; |
|---|
| 753 | $node->attr[$name] = $this->restore_noise($this->copy_until($this->token_attr)); |
|---|
| 754 | } |
|---|
| 755 | } |
|---|
| 756 | |
|---|
| 757 | // link node's parent |
|---|
| 758 | protected function link_nodes(&$node, $is_child) { |
|---|
| 759 | $node->parent = $this->parent; |
|---|
| 760 | $this->parent->nodes[] = &$node; |
|---|
| 761 | if ($is_child) |
|---|
| 762 | $this->parent->children[] = &$node; |
|---|
| 763 | } |
|---|
| 764 | |
|---|
| 765 | // as a text node |
|---|
| 766 | protected function as_text_node($tag) { |
|---|
| 767 | $node = new simple_html_dom_node($this); |
|---|
| 768 | ++$this->cursor; |
|---|
| 769 | $node->_[HDOM_INFO_TEXT] = '</' . $tag . '>'; |
|---|
| 770 | $this->link_nodes($node, false); |
|---|
| 771 | $this->char = (++$this->pos<$this->size) ? $this->doc[$this->pos] : null; // next |
|---|
| 772 | return true; |
|---|
| 773 | } |
|---|
| 774 | |
|---|
| 775 | protected function skip($chars) { |
|---|
| 776 | $this->pos += strspn($this->doc, $chars, $this->pos); |
|---|
| 777 | $this->char = ($this->pos<$this->size) ? $this->doc[$this->pos] : null; // next |
|---|
| 778 | } |
|---|
| 779 | |
|---|
| 780 | protected function copy_skip($chars) { |
|---|
| 781 | $pos = $this->pos; |
|---|
| 782 | $len = strspn($this->doc, $chars, $pos); |
|---|
| 783 | $this->pos += $len; |
|---|
| 784 | $this->char = ($this->pos<$this->size) ? $this->doc[$this->pos] : null; // next |
|---|
| 785 | if ($len===0) return ''; |
|---|
| 786 | return substr($this->doc, $pos, $len); |
|---|
| 787 | } |
|---|
| 788 | |
|---|
| 789 | protected function copy_until($chars) { |
|---|
| 790 | $pos = $this->pos; |
|---|
| 791 | $len = strcspn($this->doc, $chars, $pos); |
|---|
| 792 | $this->pos += $len; |
|---|
| 793 | $this->char = ($this->pos<$this->size) ? $this->doc[$this->pos] : null; // next |
|---|
| 794 | return substr($this->doc, $pos, $len); |
|---|
| 795 | } |
|---|
| 796 | |
|---|
| 797 | protected function copy_until_char($char) { |
|---|
| 798 | if ($this->char===null) return ''; |
|---|
| 799 | |
|---|
| 800 | if (($pos = strpos($this->doc, $char, $this->pos))===false) { |
|---|
| 801 | $ret = substr($this->doc, $this->pos, $this->size-$this->pos); |
|---|
| 802 | $this->char = null; |
|---|
| 803 | $this->pos = $this->size; |
|---|
| 804 | return $ret; |
|---|
| 805 | } |
|---|
| 806 | |
|---|
| 807 | if ($pos===$this->pos) return ''; |
|---|
| 808 | $pos_old = $this->pos; |
|---|
| 809 | $this->char = $this->doc[$pos]; |
|---|
| 810 | $this->pos = $pos; |
|---|
| 811 | return substr($this->doc, $pos_old, $pos-$pos_old); |
|---|
| 812 | } |
|---|
| 813 | |
|---|
| 814 | protected function copy_until_char_escape($char) { |
|---|
| 815 | if ($this->char===null) return ''; |
|---|
| 816 | |
|---|
| 817 | $start = $this->pos; |
|---|
| 818 | while(1) { |
|---|
| 819 | if (($pos = strpos($this->doc, $char, $start))===false) { |
|---|
| 820 | $ret = substr($this->doc, $this->pos, $this->size-$this->pos); |
|---|
| 821 | $this->char = null; |
|---|
| 822 | $this->pos = $this->size; |
|---|
| 823 | return $ret; |
|---|
| 824 | } |
|---|
| 825 | |
|---|
| 826 | if ($pos===$this->pos) return ''; |
|---|
| 827 | |
|---|
| 828 | if ($this->doc[$pos-1]==='\\') { |
|---|
| 829 | $start = $pos+1; |
|---|
| 830 | continue; |
|---|
| 831 | } |
|---|
| 832 | |
|---|
| 833 | $pos_old = $this->pos; |
|---|
| 834 | $this->char = $this->doc[$pos]; |
|---|
| 835 | $this->pos = $pos; |
|---|
| 836 | return substr($this->doc, $pos_old, $pos-$pos_old); |
|---|
| 837 | } |
|---|
| 838 | } |
|---|
| 839 | |
|---|
| 840 | // remove noise from html content |
|---|
| 841 | protected function remove_noise($pattern, $remove_tag=false) { |
|---|
| 842 | $count = preg_match_all($pattern, $this->doc, $matches, PREG_SET_ORDER|PREG_OFFSET_CAPTURE); |
|---|
| 843 | |
|---|
| 844 | for ($i=$count-1; $i>-1; --$i) { |
|---|
| 845 | $key = '___noise___'.sprintf('% 3d', count($this->noise)+100); |
|---|
| 846 | $idx = ($remove_tag) ? 0 : 1; |
|---|
| 847 | $this->noise[$key] = $matches[$i][$idx][0]; |
|---|
| 848 | $this->doc = substr_replace($this->doc, $key, $matches[$i][$idx][1], strlen($matches[$i][$idx][0])); |
|---|
| 849 | } |
|---|
| 850 | |
|---|
| 851 | // reset the length of content |
|---|
| 852 | $this->size = strlen($this->doc); |
|---|
| 853 | if ($this->size>0) $this->char = $this->doc[0]; |
|---|
| 854 | } |
|---|
| 855 | |
|---|
| 856 | // restore noise to html content |
|---|
| 857 | function restore_noise($text) { |
|---|
| 858 | while(($pos=strpos($text, '___noise___'))!==false) { |
|---|
| 859 | $key = '___noise___'.$text[$pos+11].$text[$pos+12].$text[$pos+13]; |
|---|
| 860 | if (isset($this->noise[$key])) |
|---|
| 861 | $text = substr($text, 0, $pos).$this->noise[$key].substr($text, $pos+14); |
|---|
| 862 | } |
|---|
| 863 | return $text; |
|---|
| 864 | } |
|---|
| 865 | |
|---|
| 866 | function __toString() { |
|---|
| 867 | return $this->root->innertext(); |
|---|
| 868 | } |
|---|
| 869 | |
|---|
| 870 | function __get($name) { |
|---|
| 871 | switch($name) { |
|---|
| 872 | case 'outertext': return $this->root->innertext(); |
|---|
| 873 | case 'innertext': return $this->root->innertext(); |
|---|
| 874 | case 'plaintext': return $this->root->plaintext(); |
|---|
| 875 | } |
|---|
| 876 | } |
|---|
| 877 | |
|---|
| 878 | // camel naming conventions |
|---|
| 879 | function childNodes($idx=-1) {return $this->root->childNodes($idx);} |
|---|
| 880 | function firstChild() {return $this->root->first_child();} |
|---|
| 881 | function lastChild() {return $this->root->last_child();} |
|---|
| 882 | function getElementById($id) {return $this->find("#$id", 0);} |
|---|
| 883 | function getElementsById($id, $idx=null) {return $this->find("#$id", $idx);} |
|---|
| 884 | function getElementByTagName($name) {return $this->find($name, 0);} |
|---|
| 885 | function getElementsByTagName($name, $idx=-1) {return $this->find($name, $idx);} |
|---|
| 886 | function loadFile() {$args = func_get_args();$this->load(call_user_func_array('file_get_contents', $args), true);} |
|---|
| 887 | } |
|---|
| 888 | ?> |
|---|