From 1fc48b7f38abd10454ab82841fea381454d47dba Mon Sep 17 00:00:00 2001 From: Cameron Date: Tue, 5 Mar 2013 15:13:16 -0800 Subject: [PATCH] Fixes for the new parser. --- e107_handlers/e_parse_class.php | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/e107_handlers/e_parse_class.php b/e107_handlers/e_parse_class.php index 55972dd8e..5cbd71b4c 100644 --- a/e107_handlers/e_parse_class.php +++ b/e107_handlers/e_parse_class.php @@ -2459,20 +2459,25 @@ class e_parser * @param $html raw HTML * TODO Html5 tag support. */ - public function cleanHtml($html='') + public function cleanHtml($html='',$root='*') { if(!vartrue($html)){ return; } // $html = mb_convert_encoding($html, 'UTF-8'); - - $html = ''.$html.''; // Set it up for processing. + + $html = ''.$html.''; + + + // Set it up for processing. $doc = $this->domObj; $doc->loadHTML($html); $doc->encoding = 'UTF-8'; //FIXME // $doc->resolveExternals = true; - $tmp = $doc->getElementsByTagName('*'); + // $tmp = $doc->getElementsByTagName('*'); + + $tmp = $doc->getElementsByTagName($root); foreach($tmp as $node) { @@ -2510,12 +2515,10 @@ class e_parser continue; } - if(invalidAttributeVal( $value)) // Check value against whitelist. + if($this->invalidAttributeVal( $value)) // Check value against whitelist. { $node->removeAttribute($name); $node->setAttribute($name, '#---sanitized---#'); - $node->removeAttribute($name); - $node->setAttribute($name, '#---sanitized---#'); $this->removedList['sanitized'][] = $tag.'['.$name.']'; } } @@ -2571,10 +2574,14 @@ class e_parser */ function invalidAttributeVal($val) { + + foreach($this->badAttrValues as $v) // global list because a bad value is bad regardless of the attribute it's in. ;-) { - if(preg_match('/'.$v.'/i',$v)!==false) + if(preg_match('/'.$v.'/i',$val)==true) { + $this->removedList['blacklist'][] = "Match found for '{$v}' in '{$val}'"; + return true; } @@ -2597,6 +2604,7 @@ Internationalization Test: 日本語
简体中文
Test +A GOOD LINK: Some Link Test regex