diff --git a/e107_handlers/e_parse_class.php b/e107_handlers/e_parse_class.php
index 75d4ceeef..c8a3b457d 100644
--- a/e107_handlers/e_parse_class.php
+++ b/e107_handlers/e_parse_class.php
@@ -31,13 +31,6 @@ define("E_NL", chr(2));
class e_parse
{
- /**
- * Flag for global use indicates whether utf-8 character set
- *
- * @var boolean
- */
- protected $isutf8 = FALSE;
-
/**
* Determine how to handle utf-8.
* 0 = 'do nothing'
@@ -275,7 +268,6 @@ class e_parse
// CHARSET is utf-8
// if(strtolower(CHARSET) == 'utf-8')
// {
- $this->isutf8 = TRUE;
if(version_compare(PHP_VERSION, '6.0.0') < 1)
{
// Need to do something here
@@ -530,7 +522,8 @@ class e_parse
/**
- * Check for HTML closing tag for input elements, without corresponding opening tag
+ * Check for umatched 'dangerous' HTML tags
+ * (these can destroy page layout where users are able to post HTML)
*
* @param string $data
* @param string $tagList - if empty, uses default list of input tags. Otherwise a CSV list of tags to check (any type)
@@ -548,17 +541,41 @@ class e_parse
{
$checkTags = explode(',', $tagList);
}
- $data = strtolower(preg_replace('#\[code.*?\[\/code\]#i', '', $data)); // Ignore code blocks. All lower case simplifies subsequent processing
- foreach ($checkTags as $tag)
+ $tagArray = array_flip($checkTags);
+ foreach ($tagArray as &$v) { $v = 0; }; // Data fields become zero; keys are tag names.
+ $data = strtolower(preg_replace('#\[code\].*?\[\/code\]#i', '', $data)); // Ignore code blocks. All lower case simplifies the rest
+ $matches = array();
+ if (!preg_match_all('#<(\/|)([^<>]*?[^\/])>#', $data, $matches, PREG_SET_ORDER))
{
- $aCount = substr_count($data, '<'.$tag); // Count opening tags
- $bCount = substr_count($data, ''.$tag); // Count closing tags
- if ($aCount != $bCount)
- {
- return TRUE; // Potentially abusive HTML found - tags don't balance
+ //echo "No tags found
";
+ return TRUE; // No tags found; so all OK
+ }
+ //print_a($matches);
+ foreach ($matches as $m)
+ {
+ // $m[0] is the complete tag; $m[1] is '/' or empty; $m[2] is the tag and any attributes
+ list ($tag) = explode(' ', $m[2], 2);
+ if (!isset($tagArray[$tag])) continue; // Not a tag of interest
+ if ($m[1] == '/')
+ { // Closing tag
+ if ($tagArray[$tag] == 0)
+ {
+ //echo "Close before open: {$tag}
";
+ return TRUE; // Closing tag before we've had an opening tag
+ }
+ $tagArray[$tag]--; // Obviously had at least one opening tag
+ }
+ else
+ { // Opening tag
+ $tagArray[$tag]++;
}
}
- return FALSE; // Nothing detected
+ //print_a($tagArray);
+ foreach ($tagArray as $t)
+ {
+ if ($t > 0) return TRUE; // More opening tags than closing tags
+ }
+ return FALSE; // OK now
}
@@ -1512,8 +1529,6 @@ class e_parse
}
else
{
- // CHARSET is utf-8 - e_parse_class.php too
- //$email_text = ($this->isutf8) ? "\\1\\2©\\3" : "\\1\\2©\\3";
$email_text = '$1$2©$3';
// $sub_blk = preg_replace("#(^|[\s])([\w]+?://(?:[\w-%]+?)(?:\.[\w-%]+?)+.*?)(?=$|[\s()[\]<]|\.\s|\.$|,\s|,$)#is", "\\1\\2", $sub_blk);