diff --git a/e107_handlers/e_parse_class.php b/e107_handlers/e_parse_class.php index 05ac4152d..7c8d4c9ce 100644 --- a/e107_handlers/e_parse_class.php +++ b/e107_handlers/e_parse_class.php @@ -29,7 +29,7 @@ define('E_UTF8_PACK', e_HANDLER.'utf8/'); define("E_NL", chr(2)); -class e_parse +class e_parse extends e_parser { /** * Determine how to handle utf-8. @@ -240,6 +240,7 @@ class e_parse public function __construct() { // initialise the type of UTF-8 processing methods depending on PHP version and mb string extension + $this->init(); $this->initCharset(); // Preprocess the supermods to be useful default arrays with all values @@ -2342,14 +2343,19 @@ class e_parse * Start Fresh and Build on it over time to become eventual replacement to e_parse. * Cameron's DOM-based parser. */ -class e_parser extends e_parse +class e_parser { - private $domObj = null; + public $domObj = null; private $removedList = array(); private $nodesToDelete = array(); private $nodesToConvert = array(); private $pathList = array(); - private $allowedAttributes = array('id','href','src','style','class', 'alt', 'title'); // allow posting of data-* ? + private $allowedAttributes = array( + 'default' => array('id', 'style', 'class'), + 'img' => array('id', 'src', 'style', 'class', 'alt', 'title', 'width', 'height'), + 'a' => array('id', 'href', 'style', 'class', 'title'), + ); + private $badAttrValues = array("javascript[\s]*?:","alert\(","vbscript[\s]*?:","data:text/html", "mhtml[\s]*?:", "data:[\s]*?image"); private $allowedTags = array('html', 'body','div','a','img','table','tr', 'td', 'th', 'tbody', 'thead', 'colgroup', 'b', 'i', 'pre','code', 'strong', 'u', 'em','ul','li','img','h1','h2','h3','h4','h5','h6','p', 'div','pre','section','article', 'blockquote','hgroup','aside','figure','span', 'video', 'br', @@ -2358,7 +2364,7 @@ class e_parser extends e_parse public function __construct() { - $this->domObj = new DOMDocument(); + $this->init(); /* $meths = get_class_methods('DomDocument'); @@ -2366,6 +2372,15 @@ class e_parser extends e_parse print_a($meths); */ } + + /** + * Used by e_parse to start + */ + function init() + { + $this->domObj = new DOMDocument(); + + } /** * Set Allowed Tags. @@ -2403,18 +2418,18 @@ class e_parser extends e_parse echo "
and Tags to Htmlentities.
- foreach($this->nodesToConvert as $node) //TODO Work on code processing and highlighting.
+ foreach($this->nodesToConvert as $node) //TODO Work on code processing and highlighting .
{
$value = $node->C14N();
+
$value = str_replace("
","",$value);
- $node->nodeValue = htmlentities($value);
+
+ if($node->nodeName == 'pre')
+ {
+ $value = substr($value,5);
+ $end = strrpos($value,"
");
+ $value = substr($value,0,$end);
+ }
+
+ if($node->nodeName == 'code')
+ {
+ $value = substr($value,6);
+ $end = strrpos($value,"
");
+ $value = substr($value,0,$end);
+ }
+
+ $value = htmlentities(htmlentities($value)); // Needed
+ $node->nodeValue = $value;
}
$cleaned = $doc->saveHTML();
- $cleaned = str_replace(array('','','','',''),'',$cleaned); // filter out tags.
-
+ $cleaned = str_replace(array('','','','','','',''),'',$cleaned); // filter out tags.
+
+ $cleaned = html_entity_decode($cleaned, ENT_QUOTES, 'UTF-8');
+
return $cleaned;
}
@@ -2529,12 +2571,9 @@ class e_parser extends e_parse
*/
function invalidAttributeVal($val)
{
- // FIXME default (strict) match and filters for certain attributes (e.g. src, href, etc)
- $invalid = array("javascript:","alert(","vbscript:","data:text/html", "mhtml:", "data:image");
-
- foreach($invalid as $v)
+ foreach($this->badAttrValues as $v) // global list because a bad value is bad regardless of the attribute it's in. ;-)
{
- if(stripos($val,$v)!==false) //TODO More reliable check.
+ if(preg_match('/'.$v.'/i',$v)!==false)
{
return true;
}
@@ -2553,6 +2592,13 @@ class e_parser extends e_parse
{
$html = <<