1
0
mirror of https://github.com/e107inc/e107.git synced 2025-08-03 05:07:27 +02:00

Keep HTML abuse filter in line with 1.x.

Remove isutf8 flag - no longer relevant.
This commit is contained in:
SteveD
2013-01-05 09:42:34 +00:00
parent ceadfc9d16
commit 8c4432b4b2

View File

@@ -31,13 +31,6 @@ define("E_NL", chr(2));
class e_parse
{
/**
* Flag for global use indicates whether utf-8 character set
*
* @var boolean
*/
protected $isutf8 = FALSE;
/**
* Determine how to handle utf-8.
* 0 = 'do nothing'
@@ -275,7 +268,6 @@ class e_parse
// CHARSET is utf-8
// if(strtolower(CHARSET) == 'utf-8')
// {
$this->isutf8 = TRUE;
if(version_compare(PHP_VERSION, '6.0.0') < 1)
{
// Need to do something here
@@ -530,7 +522,8 @@ class e_parse
/**
* Check for HTML closing tag for input elements, without corresponding opening tag
* Check for umatched 'dangerous' HTML tags
* (these can destroy page layout where users are able to post HTML)
*
* @param string $data
* @param string $tagList - if empty, uses default list of input tags. Otherwise a CSV list of tags to check (any type)
@@ -548,17 +541,41 @@ class e_parse
{
$checkTags = explode(',', $tagList);
}
$data = strtolower(preg_replace('#\[code.*?\[\/code\]#i', '', $data)); // Ignore code blocks. All lower case simplifies subsequent processing
foreach ($checkTags as $tag)
$tagArray = array_flip($checkTags);
foreach ($tagArray as &$v) { $v = 0; }; // Data fields become zero; keys are tag names.
$data = strtolower(preg_replace('#\[code\].*?\[\/code\]#i', '', $data)); // Ignore code blocks. All lower case simplifies the rest
$matches = array();
if (!preg_match_all('#<(\/|)([^<>]*?[^\/])>#', $data, $matches, PREG_SET_ORDER))
{
$aCount = substr_count($data, '<'.$tag); // Count opening tags
$bCount = substr_count($data, '</'.$tag); // Count closing tags
if ($aCount != $bCount)
{
return TRUE; // Potentially abusive HTML found - tags don't balance
//echo "No tags found<br />";
return TRUE; // No tags found; so all OK
}
//print_a($matches);
foreach ($matches as $m)
{
// $m[0] is the complete tag; $m[1] is '/' or empty; $m[2] is the tag and any attributes
list ($tag) = explode(' ', $m[2], 2);
if (!isset($tagArray[$tag])) continue; // Not a tag of interest
if ($m[1] == '/')
{ // Closing tag
if ($tagArray[$tag] == 0)
{
//echo "Close before open: {$tag}<br />";
return TRUE; // Closing tag before we've had an opening tag
}
$tagArray[$tag]--; // Obviously had at least one opening tag
}
else
{ // Opening tag
$tagArray[$tag]++;
}
}
return FALSE; // Nothing detected
//print_a($tagArray);
foreach ($tagArray as $t)
{
if ($t > 0) return TRUE; // More opening tags than closing tags
}
return FALSE; // OK now
}
@@ -1512,8 +1529,6 @@ class e_parse
}
else
{
// CHARSET is utf-8 - e_parse_class.php too
//$email_text = ($this->isutf8) ? "\\1\\2©\\3" : "\\1\\2&copy;\\3";
$email_text = '$1$2©$3';
// $sub_blk = preg_replace("#(^|[\s])([\w]+?://(?:[\w-%]+?)(?:\.[\w-%]+?)+.*?)(?=$|[\s()[\]<]|\.\s|\.$|,\s|,$)#is", "\\1<a href=\"\\2\" rel=\"external\">\\2</a>", $sub_blk);