From 8c4432b4b2238ade25ef744d24230bd14cc8df0b Mon Sep 17 00:00:00 2001 From: SteveD Date: Sat, 5 Jan 2013 09:42:34 +0000 Subject: [PATCH] Keep HTML abuse filter in line with 1.x. Remove isutf8 flag - no longer relevant. --- e107_handlers/e_parse_class.php | 53 +++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 19 deletions(-) diff --git a/e107_handlers/e_parse_class.php b/e107_handlers/e_parse_class.php index 75d4ceeef..c8a3b457d 100644 --- a/e107_handlers/e_parse_class.php +++ b/e107_handlers/e_parse_class.php @@ -31,13 +31,6 @@ define("E_NL", chr(2)); class e_parse { - /** - * Flag for global use indicates whether utf-8 character set - * - * @var boolean - */ - protected $isutf8 = FALSE; - /** * Determine how to handle utf-8. * 0 = 'do nothing' @@ -275,7 +268,6 @@ class e_parse // CHARSET is utf-8 // if(strtolower(CHARSET) == 'utf-8') // { - $this->isutf8 = TRUE; if(version_compare(PHP_VERSION, '6.0.0') < 1) { // Need to do something here @@ -530,7 +522,8 @@ class e_parse /** - * Check for HTML closing tag for input elements, without corresponding opening tag + * Check for umatched 'dangerous' HTML tags + * (these can destroy page layout where users are able to post HTML) * * @param string $data * @param string $tagList - if empty, uses default list of input tags. Otherwise a CSV list of tags to check (any type) @@ -548,17 +541,41 @@ class e_parse { $checkTags = explode(',', $tagList); } - $data = strtolower(preg_replace('#\[code.*?\[\/code\]#i', '', $data)); // Ignore code blocks. All lower case simplifies subsequent processing - foreach ($checkTags as $tag) + $tagArray = array_flip($checkTags); + foreach ($tagArray as &$v) { $v = 0; }; // Data fields become zero; keys are tag names. + $data = strtolower(preg_replace('#\[code\].*?\[\/code\]#i', '', $data)); // Ignore code blocks. All lower case simplifies the rest + $matches = array(); + if (!preg_match_all('#<(\/|)([^<>]*?[^\/])>#', $data, $matches, PREG_SET_ORDER)) { - $aCount = substr_count($data, '<'.$tag); // Count opening tags - $bCount = substr_count($data, '"; + return TRUE; // No tags found; so all OK + } + //print_a($matches); + foreach ($matches as $m) + { + // $m[0] is the complete tag; $m[1] is '/' or empty; $m[2] is the tag and any attributes + list ($tag) = explode(' ', $m[2], 2); + if (!isset($tagArray[$tag])) continue; // Not a tag of interest + if ($m[1] == '/') + { // Closing tag + if ($tagArray[$tag] == 0) + { + //echo "Close before open: {$tag}
"; + return TRUE; // Closing tag before we've had an opening tag + } + $tagArray[$tag]--; // Obviously had at least one opening tag + } + else + { // Opening tag + $tagArray[$tag]++; } } - return FALSE; // Nothing detected + //print_a($tagArray); + foreach ($tagArray as $t) + { + if ($t > 0) return TRUE; // More opening tags than closing tags + } + return FALSE; // OK now } @@ -1512,8 +1529,6 @@ class e_parse } else { - // CHARSET is utf-8 - e_parse_class.php too - //$email_text = ($this->isutf8) ? "\\1\\2©\\3" : "\\1\\2©\\3"; $email_text = '$1$2©$3'; // $sub_blk = preg_replace("#(^|[\s])([\w]+?://(?:[\w-%]+?)(?:\.[\w-%]+?)+.*?)(?=$|[\s()[\]<]|\.\s|\.$|,\s|,$)#is", "\\1\\2", $sub_blk);