1
0
mirror of https://github.com/e107inc/e107.git synced 2025-01-17 20:58:30 +01:00

Parser improvements.

This commit is contained in:
Cameron 2013-03-05 23:46:51 -08:00
parent 0b46b83e50
commit bf435ceb03

View File

@ -2402,6 +2402,34 @@ class e_parser
}
/**
* Return an Array of all tags found in an HTML document.
* XXX Working on it currently.
*/
public function getTag($html, $tags)
{
$doc = $this->domObj;
$doc->loadHTML($html);
$html = "<html><body>".$html."</body></html>";
$doc = new DOMDocument();
$doc->loadHTML($html);
$tmp = $doc->getElementsByTagName($tag);
foreach($tmp as $k=>$tg)
{
$ret[$tag] = (string) $tg->getAttribute($att);
}
return $ret;
}
/**
* Perform and render XSS Test Comparison
*/
@ -2459,30 +2487,50 @@ class e_parser
* @param $html raw HTML
* TODO Html5 tag support.
*/
public function cleanHtml($html='',$root='*')
public function cleanHtml($html='')
{
if(!vartrue($html)){ return; }
// $html = mb_convert_encoding($html, 'UTF-8');
$html = '<?xml version="1.0" encoding="utf-8"?><!DOCTYPE html><html><head><meta charset="utf-8"></head><body>'.$html.'</body></html>';
if(preg_match("/<body/i",$html)!==true) // HTML Fragment
{
$html = '<?xml version="1.0" encoding="utf-8"?><!DOCTYPE html><html><head><meta charset="utf-8"></head><body>'.$html.'</body></html>';
}
else // Full HTML page.
{
// $this->allowedTags[] = 'head';
// $this->allowedTags[] = 'body';
// $this->allowedTags[] = 'title';
//$this->allowedTags[] = 'meta';
}
if(!is_object($this->domObj))
{
$this->init();
}
// Set it up for processing.
$doc = $this->domObj;
$doc->loadHTML($html);
@$doc->loadHTML($html);
$doc->encoding = 'UTF-8'; //FIXME
// $doc->resolveExternals = true;
// $tmp = $doc->getElementsByTagName('*');
$this->nodesToConvert = array(); // required.
$this->nodesToDelete = array(); // required.
$this->removedList = array();
$tmp = $doc->getElementsByTagName($root);
$tmp = $doc->getElementsByTagName('*');
foreach($tmp as $node)
{
$path = $node->getNodePath();
// echo "<br />Path = ".$path;
// $tag = strval(basename($path));
$tag = preg_replace('/([a-z0-9\[\]\/]*)?\/([\w]*)(\[(\d)*\])?$/i', "$2", $path);