rror', '>', '"', ' & '); // Set to TRUE or FALSE once it has been calculated var $e_highlighting; // Highlight query var $e_query; public $thumbWidth = 100; public $thumbHeight = 0; // Set up the defaults var $e_optDefault = array( // default context: reflects legacy settings (many items enabled) 'context' => 'OLDDEFAULT', // 'fromadmin' => FALSE, // Enable emote display 'emotes' => TRUE, // Convert defines(constants) within text. 'defs' => FALSE, // replace all {e_XXX} constants with their e107 value - 'rel' or 'abs' 'constants' => FALSE, // Enable hooked parsers 'hook' => TRUE, // Allow scripts through (new for 0.8) 'scripts' => TRUE, // Make links clickable 'link_click' => TRUE, // Substitute on clickable links (only if link_click == TRUE) 'link_replace' => TRUE, // Parse shortcodes - TRUE enables parsing 'parse_sc' => FALSE, // remove HTML tags. 'no_tags' => FALSE, // Restore entity form of quotes and such to single characters - TRUE disables 'value' => FALSE, // Line break compression - TRUE removes newline characters 'nobreak' => FALSE, // Retain newlines - wraps to \n instead of
if TRUE (for non-HTML email text etc) 'retain_nl' => FALSE ); // Super modifiers override default option values var $e_SuperMods = array( //text is part of a title (e.g. news title) 'TITLE' => array( 'nobreak'=>TRUE, 'retain_nl'=>TRUE, 'link_click' => FALSE, 'emotes'=>FALSE, 'defs'=>TRUE, 'parse_sc'=>TRUE ), //text is user-entered (i.e. untrusted) and part of a title (e.g. forum title) 'USER_TITLE' => array( 'nobreak'=>TRUE, 'retain_nl'=>TRUE, 'link_click' => FALSE, 'scripts' => FALSE, 'emotes'=>FALSE, 'hook'=>FALSE ), // text is 'body' of email or similar - being sent 'off-site' so don't rely on server availability 'E_TITLE' => array( 'nobreak'=>TRUE, 'retain_nl'=>TRUE, 'defs'=>TRUE, 'parse_sc'=>TRUE, 'emotes'=>FALSE, 'scripts' => FALSE, 'link_click' => FALSE ), // text is part of the summary of a longer item (e.g. content summary) 'SUMMARY' => array( 'defs'=>TRUE, 'constants'=>'full', 'parse_sc'=>TRUE ), // text is the description of an item (e.g. download, link) 'DESCRIPTION' => array( 'defs'=>TRUE, 'constants'=>'full', 'parse_sc'=>TRUE ), // text is 'body' or 'bulk' text (e.g. custom page body, content body) 'BODY' => array( 'defs'=>TRUE, 'constants'=>'full', 'parse_sc'=>TRUE ), 'WYSIWYG' => array( 'defs'=>FALSE, 'constants'=>'full', 'parse_sc'=>FALSE, 'wysiwyg'=>TRUE ), // text is user-entered (i.e. untrusted)'body' or 'bulk' text (e.g. custom page body, content body) 'USER_BODY' => array( 'constants'=>'full', 'scripts' => FALSE ), // text is 'body' of email or similar - being sent 'off-site' so don't rely on server availability 'E_BODY' => array( 'defs'=>TRUE, 'constants'=>'full', 'parse_sc'=>TRUE, 'emotes'=>FALSE, 'scripts' => FALSE, 'link_click' => FALSE ), // text is text-only 'body' of email or similar - being sent 'off-site' so don't rely on server availability 'E_BODY_PLAIN' => array( 'defs'=>TRUE, 'constants'=>'full', 'parse_sc'=>TRUE, 'emotes'=>FALSE, 'scripts' => FALSE, 'link_click' => FALSE, 'retain_nl' => TRUE, 'no_tags' => TRUE ), // text is the 'content' of a link (A tag, etc) 'LINKTEXT' => array( 'nobreak'=>TRUE, 'retain_nl'=>TRUE, 'link_click' => FALSE, 'emotes'=>FALSE, 'hook'=>FALSE, 'defs'=>TRUE, 'parse_sc'=>TRUE ), // text is used (for admin edit) without fancy conversions or html. 'RAWTEXT' => array( 'nobreak'=>TRUE, 'retain_nl'=>TRUE, 'link_click' => FALSE, 'emotes'=>FALSE, 'hook'=>FALSE, 'no_tags'=>TRUE ) ); // Individual modifiers change the current context var $e_Modifiers = array( 'emotes_off' => array('emotes' => FALSE), 'emotes_on' => array('emotes' => TRUE), 'no_hook' => array('hook' => FALSE), 'do_hook' => array('hook' => TRUE), // New for 0.8 'scripts_off' => array('scripts' => FALSE), // New for 0.8 'scripts_on' => array('scripts' => TRUE), 'no_make_clickable' => array('link_click' => FALSE), 'make_clickable' => array('link_click' => TRUE), 'no_replace' => array('link_replace' => FALSE), // Replace text of clickable links (only if make_clickable option set) 'replace' => array('link_replace' => TRUE), // No path replacement 'consts_off' => array('constants' => FALSE), // Relative path replacement 'consts_rel' => array('constants' => 'rel'), // Absolute path replacement 'consts_abs' => array('constants' => 'abs'), // Full path replacement 'consts_full' => array('constants' => 'full'), // No shortcode parsing 'scparse_off' => array('parse_sc' => FALSE), 'scparse_on' => array('parse_sc' => TRUE), // Strip tags 'no_tags' => array('no_tags' => TRUE), // Leave tags 'do_tags' => array('no_tags' => FALSE), 'fromadmin' => array('fromadmin' => TRUE), 'notadmin' => array('fromadmin' => FALSE), // entity replacement 'er_off' => array('value' => FALSE), 'er_on' => array('value' => TRUE), // Decode constant if exists 'defs_off' => array('defs' => FALSE), 'defs_on' => array('defs' => TRUE), 'dobreak' => array('nobreak' => FALSE), 'nobreak' => array('nobreak' => TRUE), // Line break using \n 'lb_nl' => array('retain_nl' => TRUE), // Line break using
'lb_br' => array('retain_nl' => FALSE), // Legacy option names below here - discontinue later 'retain_nl' => array('retain_nl' => TRUE), 'defs' => array('defs' => TRUE), 'parse_sc' => array('parse_sc' => TRUE), 'constants' => array('constants' => 'rel'), 'value' => array('value' => TRUE), 'wysiwyg' => array('wysiwyg'=>TRUE) ); /** * Constructor - keep it public for backward compatibility still some new e_parse() in the core * * @return void */ public function __construct() { // initialise the type of UTF-8 processing methods depending on PHP version and mb string extension $this->init(); $this->initCharset(); // Preprocess the supermods to be useful default arrays with all values foreach ($this->e_SuperMods as $key => $val) { // precalculate super defaults $this->e_SuperMods[$key] = array_merge($this->e_optDefault , $this->e_SuperMods[$key]); $this->e_SuperMods[$key]['context'] = $key; } } /** * Initialise the type of UTF-8 processing methods depending on PHP version and mb string extension. * * NOTE: can't be called until CHARSET is known but we all know that it is UTF-8 now * * @return void */ private function initCharset() { // Start by working out what, if anything, we do about utf-8 handling. // 'Do nothing' is the simple option $this->utfAction = 0; // CHARSET is utf-8 // if(strtolower(CHARSET) == 'utf-8') // { if(version_compare(PHP_VERSION, '6.0.0') < 1) { // Need to do something here if(extension_loaded('mbstring')) { // Check for function overloading $temp = ini_get('mbstring.func_overload'); // Just check the string functions - will be non-zero if overloaded if(($temp & MB_OVERLOAD_STRING) == 0) { // Can use the mb_string routines $this->utfAction = 1; } // Set the default encoding, so we don't have to specify every time mb_internal_encoding('UTF-8'); } else { // Must use emulation - will probably be slow! $this->utfAction = 2; require (E_UTF8_PACK.'utils/unicode.php'); // Always load the core routines - bound to need some of them! require (E_UTF8_PACK.'native/core.php'); } } // } } /** * Unicode (UTF-8) analogue of standard @link http://php.net/strlen strlen PHP function. * Returns the length of the given string. * * @param string $str The UTF-8 encoded string being measured for length. * @return integer The length (amount of UTF-8 characters) of the string on success, and 0 if the string is empty. */ public function ustrlen($str) { switch($this->utfAction) { case 0: return strlen($str); case 1: return mb_strlen($str); } // Default case shouldn't happen often // Save a call - invoke the function directly return strlen(utf8_decode($str)); } /** * Unicode (UTF-8) analogue of standard @link http://php.net/strtolower strtolower PHP function. * Make a string lowercase. * * @param string $str The UTF-8 encoded string to be lowercased. * @return string Specified string with all alphabetic characters converted to lowercase. */ public function ustrtolower($str) { switch($this->utfAction) { case 0: return strtolower($str); case 1: return mb_strtolower($str); } // Default case shouldn't happen often return utf8_strtolower($str); } /** * Unicode (UTF-8) analogue of standard @link http://php.net/strtoupper strtoupper PHP function. * Make a string uppercase. * * @param string $str The UTF-8 encoded string to be uppercased. * @return string Specified string with all alphabetic characters converted to uppercase. */ public function ustrtoupper($str) { switch($this->utfAction) { case 0: return strtoupper($str); case 1: return mb_strtoupper($str); } // Default case shouldn't happen often return utf8_strtoupper($str); } /** * Unicode (UTF-8) analogue of standard @link http://php.net/strpos strpos PHP function. * Find the position of the first occurrence of a case-sensitive UTF-8 encoded string. * Returns the numeric position (offset in amount of UTF-8 characters) * of the first occurrence of needle in the haystack string. * * @param string $haystack The UTF-8 encoded string being searched in. * @param integer $needle The UTF-8 encoded string being searched for. * @param integer $offset [optional] The optional offset parameter allows you to specify which character in haystack to start searching. * The position returned is still relative to the beginning of haystack. * @return integer|boolean Returns the position as an integer. If needle is not found, the function will return boolean FALSE. */ public function ustrpos($haystack, $needle, $offset = 0) { switch($this->utfAction) { case 0: return strpos($haystack, $needle, $offset); case 1: return mb_strpos($haystack, $needle, $offset); } return utf8_strpos($haystack, $needle, $offset); } /** * Unicode (UTF-8) analogue of standard @link http://php.net/strrpos strrpos PHP function. * Find the position of the last occurrence of a case-sensitive UTF-8 encoded string. * Returns the numeric position (offset in amount of UTF-8 characters) * of the last occurrence of needle in the haystack string. * * @param string $haystack The UTF-8 encoded string being searched in. * @param integer $needle The UTF-8 encoded string being searched for. * @param integer $offset [optional] - The optional offset parameter allows you to specify which character in haystack to start searching. * The position returned is still relative to the beginning of haystack. * @return integer|boolean Returns the position as an integer. If needle is not found, the function will return boolean FALSE. */ public function ustrrpos($haystack, $needle, $offset = 0) { switch($this->utfAction) { case 0: return strrpos($haystack, $needle, $offset); case 1: return mb_strrpos($haystack, $needle, $offset); } return utf8_strrpos($haystack, $needle, $offset); } /** * Unicode (UTF-8) analogue of standard @link http://php.net/substr substr PHP function. * Returns the portion of string specified by the start and length parameters. * * NOTE: May be subtle differences in return values dependent on which routine is used. * Native substr() routine can return FALSE. mb_substr() and utf8_substr() just return an empty string. * * @param string $str The UTF-8 encoded string. * @param integer $start Start of portion to be returned. Position is counted in amount of UTF-8 characters from the beginning of str. * First character's position is 0. Second character position is 1, and so on. * @param integer $length [optional] If length is given, the string returned will contain at most length characters beginning from start * (depending on the length of string). If length is omitted, the rest of string from start will be returned. * @return string The extracted UTF-8 encoded part of input string. */ public function usubstr($str, $start, $length = NULL) { switch($this->utfAction) { case 0: return substr($str, $start, $length); case 1: if(is_null($length)) { return mb_substr($str, $start); } else { return mb_substr($str, $start, $length); } } return utf8_substr($str, $start, $length); } /** * Converts the supplied text (presumed to be from user input) to a format suitable for storing in a database table. * * @param string $data * @param boolean $nostrip [optional] Assumes all data is GPC ($_GET, $_POST, $_COOKIE) unless indicate otherwise by setting this var to TRUE. * If magic quotes is enabled on the server and you do not tell toDB() that the data is non GPC then slashes will be stripped when they should not be. * @param boolean $no_encode [optional] This parameter should nearly always be FALSE. It is used by the save_prefs() function to preserve HTML content within prefs even when * the save_prefs() function has been called by a non admin user / user without html posting permissions. * @param boolean $mod [optional] The 'no_html' and 'no_php' modifiers blanket prevent HTML and PHP posting regardless of posting permissions. (used in logging) * The 'pReFs' value is for internal use only, when saving prefs, to prevent sanitisation of HTML. * @param boolean $original_author [optional] * @return string * @todo complete the documentation of this essential method */ public function toDB($data, $nostrip = FALSE, $no_encode = FALSE, $mod = FALSE, $original_author = FALSE) { $core_pref = e107::getConfig(); if (is_array($data)) { foreach ($data as $key => $var) { //Fix - sanitize keys as well $ret[$this->toDB($key, $nostrip, $no_encode, $mod, $original_author)] = $this->toDB($var, $nostrip, $no_encode, $mod, $original_author); } return $ret; } if (MAGIC_QUOTES_GPC == TRUE && $nostrip == FALSE) { $data = stripslashes($data); } if ($mod != 'pReFs') { $data = $this->preFilter($data); if (!check_class($core_pref->get('post_html', e_UC_MAINADMIN)) || !check_class($core_pref->get('post_script', e_UC_MAINADMIN))) { $data = $this->dataFilter($data); } } if (/*$core_pref->is('post_html') && */check_class($core_pref->get('post_html'))) { $no_encode = TRUE; } if ($core_pref->get('html_abuse')) { if ($this->htmlAbuseFilter($data)) $no_encode = FALSE; } if (is_numeric($original_author) && !check_class($core_pref->get('post_html'), '', $original_author)) { $no_encode = FALSE; } if ($no_encode === TRUE && strpos($mod, 'no_html') === FALSE) { $search = array('$', '"', "'", '\\', 'get('php_bbcode'))) { $ret = preg_replace("#\[(php)#i", "[\\1", $ret); } return $ret; } /** * Check for umatched 'dangerous' HTML tags * (these can destroy page layout where users are able to post HTML) * * @param string $data * @param string $tagList - if empty, uses default list of input tags. Otherwise a CSV list of tags to check (any type) * * @return boolean TRUE if an unopened closing tag found * FALSE if nothing found */ function htmlAbuseFilter($data, $tagList = '') { if ($tagList == '') { $checkTags = array('textarea', 'input', 'td', 'tr', 'table'); } else { $checkTags = explode(',', $tagList); } $tagArray = array_flip($checkTags); foreach ($tagArray as &$v) { $v = 0; }; // Data fields become zero; keys are tag names. $data = strtolower(preg_replace('#\[code\].*?\[\/code\]#i', '', $data)); // Ignore code blocks. All lower case simplifies the rest $matches = array(); if (!preg_match_all('#<(\/|)([^<>]*?[^\/])>#', $data, $matches, PREG_SET_ORDER)) { //echo "No tags found
"; return TRUE; // No tags found; so all OK } //print_a($matches); foreach ($matches as $m) { // $m[0] is the complete tag; $m[1] is '/' or empty; $m[2] is the tag and any attributes list ($tag) = explode(' ', $m[2], 2); if (!isset($tagArray[$tag])) continue; // Not a tag of interest if ($m[1] == '/') { // Closing tag if ($tagArray[$tag] == 0) { //echo "Close before open: {$tag}
"; return TRUE; // Closing tag before we've had an opening tag } $tagArray[$tag]--; // Obviously had at least one opening tag } else { // Opening tag $tagArray[$tag]++; } } //print_a($tagArray); foreach ($tagArray as $t) { if ($t > 0) return TRUE; // More opening tags than closing tags } return FALSE; // OK now } /** * Checks a string for potentially dangerous HTML tags, including malformed tags * */ public function dataFilter($data, $mode='bbcode') { $ans = ''; $vetWords = array('dataFilter($decode)).$match[3]; } private function modTag($match) { $ans = ''; if (isset($match[1])) { $chop = intval(strlen($match[1]) / 2); $ans = substr($match[1], 0, $chop).'##xss##'.substr($match[1], $chop); } else { $ans = '?????'; } return '[sanitised]'.$ans.'[/sanitised]'; } /** * Processes data as needed before its written to the DB. * Currently gives bbcodes the opportunity to do something * * @param $data string - data about to be written to DB * @return string - modified data */ public function preFilter($data) { if (!is_object($this->e_bb)) { require_once(e_HANDLER.'bbcode_handler.php'); $this->e_bb = new e_bbcode; } $ret = $this->e_bb->parseBBCodes($data, USERID, 'default', 'PRE'); // $postID = logged in user here return $ret; } function toForm($text) { if(empty($text)) // fix - handle proper 0, Space etc values. { return $text; } $search = array('$', '"', '<', '>'); $replace = array('$', '"', '<', '>'); $text = str_replace($search, $replace, $text); if (e_WYSIWYG !== TRUE) { // fix for utf-8 issue with html_entity_decode(); ??? $text = str_replace(" ", " ", $text); } return $text; } function post_toForm($text) { if(is_array($text)) { foreach ($text as $key=>$value) { $text[$this->post_toForm($key)] = $this->post_toForm($value); } return $text; } if(MAGIC_QUOTES_GPC == TRUE) { $text = stripslashes($text); } return str_replace(array("'", '"', "<", ">"), array("'", """, "<", ">"), $text); } function post_toHTML($text, $original_author = FALSE, $extra = '', $mod = FALSE) { $text = $this->toDB($text, FALSE, FALSE, $mod, $original_author); return $this->toHTML($text, TRUE, $extra); } function parseTemplate($text, $parseSCFiles = TRUE, $extraCodes = null, $eVars = null) { return e107::getScParser()->parseCodes($text, $parseSCFiles, $extraCodes, $eVars); } /** * Simple parser * * @param string $template * @param e_vars $vars * @param string $replaceUnset string to be used if replace variable is not set, false - don't replace * @return string parsed content */ function simpleParse($template, e_vars $vars, $replaceUnset='') { $this->replaceVars = $vars; $this->replaceUnset = $replaceUnset; return preg_replace_callback("#\{([a-zA-Z0-9_]+)\}#", array($this, 'simpleReplace'), $template); } protected function simpleReplace($tmp) { $unset = ($this->replaceUnset !== false ? $this->replaceUnset : $tmp[0]); $key = $tmp[1]; if(is_array($this->replaceVars)) { return ($this->replaceVars[$key] !== null ? $this->replaceVars[$key]: $unset); } // return ($this->replaceVars->$tmp[1] !== null ? $this->replaceVars->$tmp[1] : $unset); // Doesn't work. } function htmlwrap($str, $width, $break = "\n", $nobreak = "a", $nobr = "pre", $utf = FALSE) { /* Pretty well complete rewrite to try and handle utf-8 properly. Breaks each utf-8 'word' every $width characters max. If possible, breaks after 'safe' characters. $break is the character inserted to flag the break. $nobreak is a list of tags within which word wrap is to be inactive */ //TODO handle htmlwrap somehow //return $str; // Don't wrap if non-numeric width $width = intval($width); // And trap stupid wrap counts if ($width < 6) return $str; // Transform protected element lists into arrays $nobreak = explode(" ", strtolower($nobreak)); // Variable setup $intag = FALSE; $innbk = array(); $drain = ""; // List of characters it is "safe" to insert line-breaks at // It is not necessary to add < and > as they are automatically implied $lbrks = "/?!%)-}]\\\"':;&"; // Is $str a UTF8 string? if ($utf || strtolower(CHARSET) == 'utf-8') { // 0x1680, 0x180e, 0x2000-0x200a, 0x2028, 0x205f, 0x3000 are 'non-ASCII' Unicode UCS-4 codepoints - see http://www.unicode.org/Public/UNIDATA/UnicodeData.txt // All convert to 3-byte utf-8 sequences: // 0x1680 0xe1 0x9a 0x80 // 0x180e 0xe1 0xa0 0x8e // 0x2000 0xe2 0x80 0x80 // - // 0x200a 0xe2 0x80 0x8a // 0x2028 0xe2 0x80 0xa8 // 0x205f 0xe2 0x81 0x9f // 0x3000 0xe3 0x80 0x80 $utf8 = 'u'; $whiteSpace = '#([\x20|\x0c]|[\xe1][\x9a][\x80]|[\xe1][\xa0][\x8e]|[\xe2][\x80][\x80-\x8a,\xa8]|[\xe2][\x81][\x9f]|[\xe3][\x80][\x80]+)#'; // Have to explicitly enumerate the whitespace chars, and use non-utf-8 mode, otherwise regex fails on badly formed utf-8 } else { $utf8 = ''; // For non-utf-8, can use a simple match string $whiteSpace = '#(\s+)#'; } // Start of the serious stuff - split into HTML tags and text between $content = preg_split('#(<.*?'.'>)#mis', $str, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE ); foreach($content as $value) { if ($value[0] == "<") { // We are within an HTML tag // Create a lowercase copy of this tag's contents $lvalue = strtolower(substr($value, 1, -1)); if ($lvalue) { // Tag of non-zero length // If the first character is not a / then this is an opening tag if ($lvalue[0] != "/") { // Collect the tag name preg_match("/^(\w*?)(\s|$)/", $lvalue, $t); // If this is a protected element, activate the associated protection flag if(in_array($t[1], $nobreak)) array_unshift($innbk, $t[1]); } else { // Otherwise this is a closing tag // If this is a closing tag for a protected element, unset the flag if (in_array(substr($lvalue, 1), $nobreak)) { reset($innbk); while (list($key, $tag) = each($innbk)) { if (substr($lvalue, 1) == $tag) { unset($innbk[$key]); break; } } $innbk = array_values($innbk); } } } else { // Eliminate any empty tags altogether $value = ''; } // Else if we're outside any tags, and with non-zero length string... } elseif ($value) { // If unprotected... if (!count($innbk)) { // Use the ACK (006) ASCII symbol to replace all HTML entities temporarily $value = str_replace("\x06", "", $value); preg_match_all("/&([a-z\d]{2,7}|#\d{2,5});/i", $value, $ents); $value = preg_replace("/&([a-z\d]{2,7}|#\d{2,5});/i", "\x06", $value); // echo "Found block length ".strlen($value).': '.substr($value,20).'
'; // Split at spaces - note that this will fail if presented with invalid utf-8 when doing the regex whitespace search // $split = preg_split('#(\s)#'.$utf8, $value, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE ); $split = preg_split($whiteSpace, $value, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE ); $value = ''; foreach ($split as $sp) { // echo "Split length ".strlen($sp).': '.substr($sp,20).'
'; $loopCount = 0; while (strlen($sp) > $width) { // Enough characters that we may need to do something. $pulled = ''; if ($utf8) { // Pull out a piece of the maximum permissible length if (preg_match('#^((?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,'.$width.'})(.{0,1}).*#s',$sp,$matches) == 0) { // Make any problems obvious for now $value .= '[!invalid utf-8: '.$sp.'!]'; $sp = ''; } elseif (empty($matches[2])) { // utf-8 length is less than specified - treat as a special case $value .= $sp; $sp = ''; } else { // Need to find somewhere to break the string for($i = strlen($matches[1]) - 1; $i >= 0; $i--) { if(strpos($lbrks, $matches[1][$i]) !== FALSE) break; } if($i < 0) { // No 'special' break character found - break at the word boundary $pulled = $matches[1]; } else { $pulled = substr($sp, 0, $i + 1); } } $loopCount++; if ($loopCount > 20) { // Make any problems obvious for now $value .= '[!loop count exceeded: '.$sp.'!]'; $sp = ''; } } else { for ($i = min($width, strlen($sp)); $i > 0; $i--) { // No speed advantage to defining match character if (strpos($lbrks, $sp[$i-1]) !== FALSE) break; } if ($i == 0) { // No 'special' break boundary character found - break at the word boundary $pulled = substr($sp, 0, $width); } else { $pulled = substr($sp, 0, $i); } } if ($pulled) { $value .= $pulled.$break; // Shorten $sp by whatever we've processed (will work even for utf-8) $sp = substr($sp, strlen($pulled)); } } // Add in any residue $value .= $sp; } // Put captured HTML entities back into the string foreach ($ents[0] as $ent) $value = preg_replace("/\x06/", $ent, $value, 1); } } // Send the modified segment down the drain $drain .= $value; } // Return contents of the drain return $drain; } /** * CakePHP(tm) : Rapid Development Framework (http://www.cakephp.org) * Copyright 2005-2008, Cake Software Foundation, Inc. (http://www.cakefoundation.org) * * Truncate a HTML string * * Cuts a string to the length of $length and adds the value of $ending if the text is longer than length. * * @param string $text String to truncate. * @param integer $length Length of returned string, including ellipsis. * @param string $ending It will be used as Ending and appended to the trimmed string. * @param boolean $exact If false, $text will not be cut mid-word * @return string Trimmed string. */ function html_truncate($text, $length = 100, $ending = '...', $exact = true) { if($this->ustrlen(preg_replace('/<.*?>/', '', $text)) <= $length) { return $text; } $totalLength = 0; $openTags = array(); $truncate = ''; preg_match_all('/(<\/?([\w+]+)[^>]*>)?([^<>]*)/', $text, $tags, PREG_SET_ORDER); foreach($tags as $tag) { if(!$tag[2] || !preg_match('/img|br|input|hr|area|base|basefont|col|frame|isindex|link|meta|param/si', $tag[2])) { if(preg_match('/<[\w]+[^>]*>/s', $tag[0])) { array_unshift($openTags, $tag[2]); } else if(preg_match('/<\/([\w]+)[^>]*>/s', $tag[0], $closeTag)) { $pos = array_search($closeTag[1], $openTags); if($pos !== false) { array_splice($openTags, $pos, 1); } } } $truncate .= $tag[1]; $contentLength = $this->ustrlen(preg_replace('/&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};/i', ' ', $tag[3])); if($contentLength + $totalLength > $length) { $left = $length - $totalLength; $entitiesLength = 0; if(preg_match_all('/&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};/i', $tag[3], $entities, PREG_OFFSET_CAPTURE)) { foreach($entities[0] as $entity) { if($entity[1] + 1 - $entitiesLength <= $left) { $left--; $entitiesLength += $this->ustrlen($entity[0]); } else { break; } } } $truncate .= $this->usubstr($tag[3], 0, $left + $entitiesLength); break; } else { $truncate .= $tag[3]; $totalLength += $contentLength; } if($totalLength >= $length) { break; } } if(!$exact) { $spacepos = $this->ustrrpos($truncate, ' '); if(isset($spacepos)) { $bits = $this->usubstr($truncate, $spacepos); preg_match_all('/<\/([a-z]+)>/i', $bits, $droppedTags, PREG_SET_ORDER); if(!empty($droppedTags)) { foreach($droppedTags as $closingTag) { if(!in_array($closingTag[1], $openTags)) { array_unshift($openTags, $closingTag[1]); } } } $truncate = $this->usubstr($truncate, 0, $spacepos); } } $truncate .= $ending; foreach($openTags as $tag) { $truncate .= ''; } return $truncate; } /** * Truncate a HTML string to a maximum length $len ­ append the string $more if it was truncated * * @param string $text String to process * @param integer $len [optional] Length of characters to be truncated - default 200 * @param string $more [optional] String which will be added if truncation - default ' ... ' * @return string */ public function html_truncate_old ($text, $len = 200, $more = ' ... ') { $pos = 0; $curlen = 0; $tmp_pos = 0; $intag = FALSE; while($curlen < $len && $curlen < strlen($text)) { switch($text {$pos} ) { case "<": if($text {$pos + 1} == "/") { $closing_tag = TRUE; } $intag = TRUE; $tmp_pos = $pos - 1; $pos++; break; case ">": if($text {$pos - 1} == "/") { $closing_tag = TRUE; } if($closing_tag == TRUE) { $tmp_pos = 0; $closing_tag = FALSE; } $intag = FALSE; $pos++; break; case "&": if($text {$pos + 1} == "#") { $end = strpos(substr($text, $pos, 7), ";"); if($end !== FALSE) { $pos += ($end + 1); if(!$intag) { $curlen++; } break; } } else { $pos++; if(!$intag) { $curlen++; } break; } default: $pos++; if(!$intag) { $curlen++; } break; } } $ret = ($tmp_pos > 0 ? substr($text, 0, $tmp_pos+1) : substr($text, 0, $pos)); if($pos < strlen($text)) { $ret = $ret.$more; } return $ret; } /** * Truncate a string of text to a maximum length $len ­ append the string $more if it was truncated * Uses current CHARSET ­ for utf-8, returns $len characters rather than $len bytes * * @param string $text ­ string to process * @param integer $len ­ length of characters to be truncated * @param string $more ­ string which will be added if truncation * @return string */ public function text_truncate($text, $len = 200, $more = ' ... ') { // Always valid if($this->ustrlen($text) <= $len) { return $text; } $ret = $this->usubstr($text, 0, $len); // search for possible broken html entities // - if an & is in the last 8 chars, removing it and whatever follows shouldn't hurt // it should work for any characters encoding // FIXME - INVESTIGATE this one, switch to utf8 aware methods $leftAmp = strrpos(substr($ret, -8), '&'); if($leftAmp) { $ret = substr($ret, 0, strlen($ret) - 8 + $leftAmp); } return $ret.$more; } function textclean ($text, $wrap = 100) { $text = str_replace("\n\n\n", "\n\n", $text); $text = $this->htmlwrap($text, $wrap); $text = str_replace(array('
', '
', '
'), '
', $text); /* we can remove any linebreaks added by htmlwrap function as any \n's will be converted later anyway */ return $text; } // Test for text highlighting, and determine the text highlighting transformation // Returns TRUE if highlighting is active for this page display function checkHighlighting() { global $pref; if (!defined('e_SELF')) { // Still in startup, so can't calculate highlighting return FALSE; } if(!isset($this->e_highlighting)) { $this->e_highlighting = FALSE; $shr = (isset($_SERVER['HTTP_REFERER']) ? $_SERVER['HTTP_REFERER'] : ""); if($pref['search_highlight'] && (strpos(e_SELF, 'search.php') === FALSE) && ((strpos($shr, 'q=') !== FALSE) || (strpos($shr, 'p=') !== FALSE))) { $this->e_highlighting = TRUE; if(!isset($this->e_query)) { $query = preg_match('#(q|p)=(.*?)(&|$)#', $shr, $matches); $this->e_query = str_replace(array('+', '*', '"', ' '), array('', '.*?', '', '\b|\b'), trim(urldecode($matches[2]))); } } } return $this->e_highlighting; } /** * Converts the text (presumably retrieved from the database) for HTML output. * * @param string $text * @param boolean $parseBB [optional] * @param string $modifiers [optional] TITLE|SUMMARY|DESCRIPTION|BODY|RAW|LINKTEXT etc. * Comma-separated list, no spaces allowed * first modifier must be a CONTEXT modifier, in UPPER CASE. * subsequent modifiers are lower case - see $this->e_Modifiers for possible values * @param mixed $postID [optional] * @param boolean $wrap [optional] * @return string * @todo complete the documentation of this essential method */ public function toHTML($text, $parseBB = FALSE, $modifiers = '', $postID = '', $wrap = FALSE) { if($text == '') { return $text; } global $pref, $fromadmin; // Set default modifiers to start $opts = $this->e_optDefault; // Now process any modifiers that are specified if ($modifiers) { $aMods = explode(',', $modifiers); // If there's a supermodifier, it must be first, and in uppercase $psm = trim($aMods[0]); if (isset($this->e_SuperMods[$psm])) { // Supermodifier found - override default values where necessary $opts = array_merge($opts,$this->e_SuperMods[$psm]); $opts['context'] = $psm; unset($aMods[0]); } // Now find any regular modifiers; use them to modify the context // (there should only be one or two out of the list of possibles) foreach ($aMods as $mod) { // Slight concession to varying coding styles - stripping spaces is a waste of CPU cycles! $mod = trim($mod); if (isset($this->e_Modifiers[$mod])) { // This is probably quicker than array_merge // - especially as usually only one or two loops foreach ($this->e_Modifiers[$mod] as $k => $v) { // Update our context-specific options $opts[$k] = $v; } } } } // Turn off a few things if not enabled in options if(!varsettrue($pref['smiley_activate'])) { $opts['emotes'] = FALSE; } if(!varsettrue($pref['make_clickable'])) { $opts['link_click'] = FALSE; } if(!varsettrue($pref['link_replace'])) { $opts['link_replace'] = FALSE; } $fromadmin = $opts['fromadmin']; // Convert defines(constants) within text. eg. Lan_XXXX - must be the entire text string (i.e. not embedded) // The check for '::' is a workaround for a bug in the Zend Optimiser 3.3.0 and PHP 5.2.4 combination // - causes crashes if '::' in site name //TODO - marj - find a way to use language method here XOR remove the limit of 24 characters. if($opts['defs'] && (strlen($text) < 35) && ((strpos($text, '::') === FALSE) && defined(trim($text)))) { return constant(trim($text)); } if ($opts['no_tags']) { $text = strip_tags($text); } // Make sure we have a valid count for word wrapping if (!$wrap && $pref['main_wordwrap']) { $wrap = $pref['main_wordwrap']; } // $text = " ".$text; // Now get on with the parsing $ret_parser = ''; $last_bbcode = ''; // So we can change them on each loop $saveOpts = $opts; if ($parseBB == FALSE) { $content = array($text); } else { // Split each text block into bits which are either within one of the 'key' bbcodes, or outside them // (Because we have to match end words, the 'extra' capturing subpattern gets added to output array. We strip it later) $content = preg_split('#(\[(table|html|php|code|scode|hide).*?\[/(?:\\2)\])#mis', $text, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE ); } // Use $full_text variable so its available to special bbcodes if required foreach ($content as $full_text) { $proc_funcs = TRUE; $convertNL = TRUE; // We may have 'captured' a bbcode word - strip it if so if ($last_bbcode == $full_text) { $last_bbcode = ''; $proc_funcs = FALSE; $full_text = ''; } else { // Set the options for this pass $opts = $saveOpts; // Have to have a good test in case a 'non-key' bbcode starts the block // - so pull out the bbcode parameters while we're there if (($parseBB !== FALSE) && preg_match('#(^\[(table|html|php|code|scode|hide)(.*?)\])(.*?)(\[/\\2\]$)#is', $full_text, $matches )) { // It's one of the 'key' bbcodes // Usually don't want 'normal' processing if its a 'special' bbcode $proc_funcs = FALSE; // $matches[0] - complete block from opening bracket of opening tag to closing bracket of closing tag // $matches[1] - complete opening tag (inclusive of brackets) // $matches[2] - bbcode word // $matches[3] - parameter, including '=' // $matches[4] - bit between the tags (i.e. text to process) // $matches[5] - closing tag // In case we decide to load a file $bbPath = e_CORE.'bbcodes/'; $bbFile = strtolower(str_replace('_', '', $matches[2])); $bbcode = ''; $className = ''; $full_text = ''; $code_text = $matches[4]; $parm = $matches[3] ? substr($matches[3],1) : ''; $last_bbcode = $matches[2]; switch ($matches[2]) { case 'php' : // Probably run the output through the normal processing functions - but put here so the PHP code can disable if desired $proc_funcs = TRUE; // This is just the contents of the php.bb file pulled in - its short, so will be quicker // $search = array(""", "'", "$", '
', E_NL, "->", "<br />"); // $replace = array('"', "'", "$", "\n", "\n", "->", "
"); // Shouldn't have any parameter on this bbcode // Not sure whether checks are necessary now we've reorganised // if (!$matches[3]) $bbcode = str_replace($search, $replace, $matches[4]); // Because we're bypassing most of the initial parser processing, we should be able to just reverse the effects of toDB() and execute the code // [SecretR] - avoid php code injections, missing php.bb will completely disable user posted php blocks $bbcode = file_get_contents($bbPath.$bbFile.'.bb'); if (!$matches[3]) { $code_text = html_entity_decode($matches[4], ENT_QUOTES, 'UTF-8'); } break; case 'html' : // This overrides and deprecates html.bb $proc_funcs = TRUE; $noBreak = TRUE; // $code_text = str_replace("\r\n", " ", $code_text); $code_text = html_entity_decode($code_text, ENT_QUOTES, CHARSET); $html_start = ""; // markers for html-to-bbcode replacement. $html_end = ""; $full_text = str_replace(array("[html]","[/html]"), "",$code_text); // quick fix.. security issue? $full_text =$this->replaceConstants($full_text,'abs'); $full_text = $html_start.$full_text.$html_end; $full_text = $this->parseBBTags($full_text); // strip tags. $opts['nobreak'] = true; break; case 'table' : // strip
from end of $convertNL = FALSE; // break; case 'hide' : $proc_funcs = TRUE; default : // Most bbcodes will just execute their normal file // @todo should we cache these bbcodes? require_once should make class-related codes quite efficient if (file_exists($bbPath.'bb_'.$bbFile.'.php')) { // Its a bbcode class file require_once($bbPath.'bb_'.$bbFile.'.php'); //echo "Load: {$bbFile}.php
"; $className = 'bb_'.$code; $this->bbList[$code] = new $className(); } elseif (file_exists($bbPath.$bbFile.'.bb')) { $bbcode = file_get_contents($bbPath.$bbFile.'.bb'); } } // end - switch ($matches[2]) if ($className) { $tempCode = new $className(); $full_text = $tempCode->bbPreDisplay($matches[4], $parm); } elseif ($bbcode) { // Execute the file $full_text = eval($bbcode); // Require output of bbcode to be returned // added to remove possibility of nested bbcode exploits ... // (same as in bbcode_handler - is it right that it just operates on $bbcode_return and not on $bbcode_output? - QUERY XXX-02 } if(strpos($full_text, '[') !== FALSE) { $exp_search = array('eval', 'expression'); $exp_replace = array('eval', 'expression'); $bbcode_return = str_replace($exp_search, $exp_replace, $full_text); } } } // Do the 'normal' processing - in principle, as previously - but think about the order. if ($proc_funcs && !empty($full_text)) // some more speed { // Split out and ignore any scripts and style blocks. With just two choices we can match the closing tag in the regex $subcon = preg_split('#((?:]+>.*?|tyle[^>]+>.*?))#mis', $full_text, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE ); foreach ($subcon as $sub_blk) { if(substr($sub_blk, 0, 7) == '"; } $ret_parser .= $sub_blk; } else { // Do 'normal' processing on a chunk // Could put tag stripping in here /* // Line break compression - filter white space after HTML tags - among other things, ensures HTML tables display properly // Hopefully now achieved by other means if ($convertNL && !$opts['nobreak']) { $sub_blk = preg_replace("#>\s*[\r]*\n[\r]*#", ">", $sub_blk); } */ // Link substitution // Convert URL's to clickable links, unless modifiers or prefs override if ($opts['link_click']) { if ($opts['link_replace']) { $_ext = ($pref['links_new_window'] ? " rel=\"external\"" : ""); // $sub_blk = preg_replace("#(^|[\s])([\w]+?://(?:[\w-%]+?)(?:\.[\w-%]+?)+.*?)(?=$|[\s()[\]<]|\.\s|\.$|,\s|,$)#is", "\\1".$pref['link_text']."", $sub_blk); // $sub_blk = preg_replace("#(^|[\s])((?:www|ftp)(?:\.[\w-%]+?){2}.*?)(?=$|[\s()[\]<]|\.\s|\.$|,\s|,$)#is", "\\1".$pref['link_text']."", $sub_blk); $sub_blk = preg_replace("#(^|[\s])([\w]+?://(?:[\w-%]+?)(?:\.[\w-%]+?)+.*?)(?=$|[\s[\]<]|\.\s|\.$|,\s|,$)#is", "\\1".$pref['link_text']."", $sub_blk); $sub_blk = preg_replace("#(^|[\s])((?:www|ftp)(?:\.[\w-%]+?){2}.*?)(?=$|[\s[\]<]|\.\s|\.$|,\s|,$)#is", "\\1".$pref['link_text']."", $sub_blk); $email_text = ($pref['email_text']) ? $this->replaceConstants($pref['email_text']) : LAN_EMAIL_SUBS; $sub_blk = preg_replace("#([\n ])([a-z0-9\-_.]+?)@([\w\-]+\.([\w\-\.]+\.)*[\w]+)#i", "\\1".$email_text."", $sub_blk); } else { $email_text = '$1$2©$3'; // $sub_blk = preg_replace("#(^|[\s])([\w]+?://(?:[\w-%]+?)(?:\.[\w-%]+?)+.*?)(?=$|[\s()[\]<]|\.\s|\.$|,\s|,$)#is", "\\1\\2", $sub_blk); // $sub_blk = preg_replace("#(^|[\s])((?:www|ftp)(?:\.[\w-%]+?){2}.*?)(?=$|[\s()[\]<]|\.\s|\.$|,\s|,$)#is", "\\1\\2", $sub_blk); $sub_blk = preg_replace("#(^|[\s])([\w]+?://(?:[\w-%]+?)(?:\.[\w-%]+?)+.*?)(?=$|[\s[\]<]|\.\s|\.$|,\s|,$)#is", "\\1\\2", $sub_blk); $sub_blk = preg_replace("#(^|[\s])((?:www|ftp)(?:\.[\w-%]+?){2}.*?)(?=$|[\s[\]<]|\.\s|\.$|,\s|,$)#is", "\\1\\2", $sub_blk); $sub_blk = preg_replace("#([\n ])([a-z0-9\-_.]+?)@([\w\-]+\.([\w\-\.]+\.)*[\w]+)#i", "\\1".$email_text."", $sub_blk); } } // Convert emoticons to graphical icons, if enabled if ($opts['emotes']) { if (!is_object($this->e_emote)) { require_once(e_HANDLER.'emote_filter.php'); $this->e_emote = new e_emoteFilter; } $sub_blk = $this->e_emote->filterEmotes($sub_blk); } // Reduce newlines in all forms to a single newline character (finds '\n', '\r\n', '\n\r') if (!$opts['nobreak']) { if ($convertNL && substr($sub_blk,0,6) != '[html]') //XXX Quick Fix, find a cleaner way. { // We may need to convert to
later $sub_blk = preg_replace("#[\r]*\n[\r]*#", E_NL, $sub_blk); } else { // Not doing any more - its HTML so keep \n so HTML is formatted $sub_blk = preg_replace("#[\r]*\n[\r]*#", "\n", $sub_blk); } } // Entity conversion // Restore entity form of quotes and such to single characters, except for text destined for tag attributes or JS. if($opts['value']) { // output used for attribute values. $sub_blk = str_replace($this->replace, $this->search, $sub_blk); } else { // output not used for attribute values. $sub_blk = str_replace($this->search, $this->replace, $sub_blk); } // BBCode processing (other than the four already done, which shouldn't appear at all in the text) if ($parseBB !== FALSE) { if (!is_object($this->e_bb)) { require_once(e_HANDLER.'bbcode_handler.php'); $this->e_bb = new e_bbcode; } if ($parseBB === TRUE) { // 'Normal' or 'legacy' processing if($modifiers == "WYSIWYG") { $sub_blk = $this->e_bb->parseBBCodes($sub_blk, $postID, 'wysiwyg'); } else { $sub_blk = $this->e_bb->parseBBCodes($sub_blk, $postID); } } elseif ($parseBB === 'STRIP') { // Need to strip all BBCodes $sub_blk = $this->e_bb->parseBBCodes($sub_blk, $postID, 'default', TRUE); } else { // Need to strip just some BBCodes $sub_blk = $this->e_bb->parseBBCodes($sub_blk, $postID, 'default', $parseBB); } } // replace all {e_XXX} constants with their e107 value. modifier determines relative/absolute conversion // (Moved to after bbcode processing by Cameron) if ($opts['constants']) { $sub_blk = $this->replaceConstants($sub_blk, $opts['constants']); // Now decodes text values } // profanity filter if ($pref['profanity_filter']) { if (!is_object($this->e_pf)) { require_once(e_HANDLER."profanity_filter.php"); $this->e_pf = new e_profanityFilter; } $sub_blk = $this->e_pf->filterProfanities($sub_blk); } // Shortcodes // Optional short-code conversion if ($opts['parse_sc']) { $sub_blk = $this->parseTemplate($sub_blk, TRUE); } //Run any hooked in parsers if ($opts['hook']) { if ( varset($pref['tohtml_hook'])) { //Process the older tohtml_hook pref (deprecated) foreach(explode(",", $pref['tohtml_hook']) as $hook) { if (!is_object($this->e_hook[$hook])) { if(is_readable(e_PLUGIN.$hook."/".$hook.".php")) { require_once(e_PLUGIN.$hook."/".$hook.".php"); $hook_class = "e_".$hook; $this->e_hook[$hook] = new $hook_class; } } $sub_blk = $this->e_hook[$hook]->$hook($sub_blk,$opts['context']); } } if(isset($pref['e_tohtml_list']) && is_array($pref['e_tohtml_list'])) { foreach($pref['e_tohtml_list'] as $hook) { if (!is_object($this->e_hook[$hook])) { if(is_readable(e_PLUGIN.$hook."/e_tohtml.php")) { require_once(e_PLUGIN.$hook."/e_tohtml.php"); $hook_class = "e_tohtml_".$hook; $this->e_hook[$hook] = new $hook_class; } } $sub_blk = $this->e_hook[$hook]->to_html($sub_blk, $opts['context']); } } } // Word wrap if ($wrap && !$opts['nobreak']) { $sub_blk = $this->textclean($sub_blk, $wrap); } // Search highlighting if ($opts['emotes']) // Why?? { if ($this->checkHighlighting()) { $sub_blk = $this->e_highlight($sub_blk, $this->e_query); } } if ($convertNL) { // Default replaces all \n with
for HTML display $nl_replace = '
'; if ($opts['nobreak']) { $nl_replace = ''; } elseif ($opts['retain_nl']) { $nl_replace = "\n"; } $sub_blk = str_replace(E_NL, $nl_replace, $sub_blk); } $ret_parser .= $sub_blk; } // End of 'normal' processing for a block of text } // End of 'foreach() on each block of non-script text } // End of 'normal' parsing (non-script text) else { // Text block that needed no processing at all $ret_parser .= $full_text; } } return trim($ret_parser); } function toAttribute($text) { // URLs posted without HTML access may have an & in them. $text = str_replace('&', '&', $text); // Xhtml compliance. $text = htmlspecialchars($text, ENT_QUOTES, 'UTF-8'); if(!preg_match('/&#|\'|"|\(|\)|<|>/s', $text)) { $text = $this->replaceConstants($text); return $text; } else { return ''; } } /** * Convert text blocks which are to be embedded within JS * * @param string|array $stringarray * @return string */ public function toJS($stringarray) { $search = array("\r\n", "\r", "
", "'"); $replace = array("\\n", "", "\\n", "\'"); $stringarray = str_replace($search, $replace, $stringarray); $stringarray = strip_tags($stringarray); $trans_tbl = get_html_translation_table(HTML_ENTITIES); $trans_tbl = array_flip($trans_tbl); return strtr($stringarray, $trans_tbl); } /** * Convert Text for RSS/XML use. * * @param string $text * @param boolean $tags [optional] * @return string */ function toRss($text, $tags = FALSE) { if($tags != TRUE) { $text = $this -> toHTML($text, TRUE); $text = strip_tags($text); } $text = $this->toEmail($text); $search = array("&#039;", "&#036;", "'", "$"," & ", e_BASE, "href='request.php"); $replace = array("'", '$', "'", '$',' & ', SITEURL, "href='".SITEURL."request.php" ); $text = str_replace($search, $replace, $text); if($tags == TRUE && ($text)) { $text = ""; } return $text; } //Convert specific characters back to original form, for use in storing code (or regex) values in the db. function toText($text) { $search = array("&#039;", "&#036;", "'", "$", "\", "&#092;"); $replace = array("'", '$', "'", '$', "\\", "\\"); $text = str_replace($search, $replace, $text); return $text; } /** * @param $url * @param $options ?? * @param $raw ?? * @param $full */ public function thumbUrl($url, $options = array(), $raw = false, $full = false) { if(substr($url,0,3)=="{e_") // Fix for broken links that use {e_MEDIA} etc. { //$url = $this->replaceConstants($url,'abs'); // always switch to 'nice' urls when SC is used $url = str_replace($this->getUrlConstants('sc'), $this->getUrlConstants('raw'), $url); } if(!is_array($options)) { parse_str($options, $options); } if($raw) $url = $this->createConstants($url, 'mix'); $baseurl = ($full ? SITEURL : e_HTTP).'thumb.php?'; $thurl = 'src='.$url.'&'; if(vartrue($options['aw']) || vartrue($options['ah']) || $this->thumbCrop == 1) { if($this->thumbCrop == 1 && !vartrue($options['aw']) && !vartrue($options['ah'])) // Allow templates to determine dimensions. See {SETIMAGE} { $options['aw'] = $this->thumbWidth; $options['ah'] = $this->thumbHeight; } $thurl .= 'aw='.((integer) vartrue($options['aw'], 0)).'&ah='.((integer) vartrue($options['ah'], 0)); } else { if(!vartrue($options['w']) && !vartrue($options['h'])) // Allow templates to determine dimensions. See {SETIMAGE} { $options['w'] = $this->thumbWidth; $options['h'] = $this->thumbHeight; } $thurl .= 'w='.((integer) vartrue($options['w'], 0)).'&h='.((integer) vartrue($options['h'], 0)); } if(vartrue($options['x']))//base64 encode url { $thurl = 'id='.base64_encode($thurl); } // echo "

".$thurl; return $baseurl.$thurl; } /** * Help for converting to more safe URLs * e.g. {e_MEDIA_FILE}path/to/video.flv => e_MEDIA_FILE/path/to/video.flv * * @todo support for ALL URL shortcodes (replacement methods) * @param string $type sc|raw|rev|all * @return array */ public function getUrlConstants($type = 'sc') { // sub-folders first! static $array = array( 'e_MEDIA_FILE/' => '{e_MEDIA_FILE}', 'e_MEDIA_VIDEO/' => '{e_MEDIA_VIDEO}', 'e_MEDIA_IMAGE/' => '{e_MEDIA_IMAGE}', 'e_MEDIA_ICON/' => '{e_MEDIA_ICON}', 'e_AVATAR/' => '{e_AVATAR}', 'e_WEB_JS/' => '{e_WEB_JS}', 'e_WEB_CSS/' => '{e_WEB_CSS}', 'e_WEB_IMAGE/' => '{e_WEB_IMAGE}', // 'e_WEB_PACK/' => '{e_WEB_PACK}', 'e_BASE/' => '{e_BASE}', 'e_ADMIN/' => '{e_ADMIN}', 'e_IMAGE/' => '{e_IMAGE}', 'e_THEME/' => '{e_THEME}', 'e_PLUGIN/' => '{e_PLUGIN}', 'e_HANDLER/' => '{e_HANDLER}', // BC 'e_MEDIA/' => '{e_MEDIA}', 'e_WEB/' => '{e_ADMIN}', ); switch ($type) { case 'sc': return array_values($array); break; case 'raw': return array_keys($array); break; case 'rev': return array_reverse($array, true); break; case 'all': return $array; break; } return array(); } /** * Replace e107 path constants * Note: only an ADMIN user can convert {e_ADMIN} * TODO - runtime cache of search/replace arrays (object property) when $mode !== '' * @param string $text * @param string $mode [optional] abs|full "full" = produce absolute URL path, e.g. http://sitename.com/e107_plugins/etc * 'abs' = produce truncated URL path, e.g. e107plugins/etc * "" (default) = URL's get relative path e.g. ../e107_plugins/etc * @param mixed $all [optional] if TRUE, then when $mode is "full" or TRUE, USERID is also replaced... * when $mode is "" (default), ALL other e107 constants are replaced * @return string */ public function replaceConstants($text, $mode = '', $all = FALSE) { if($mode != "") { $e107 = e107::getInstance(); $replace_relative = array( $e107->getFolder('media_files'), $e107->getFolder('media_video'), $e107->getFolder('media_image'), $e107->getFolder('media_icon'), $e107->getFolder('avatars'), $e107->getFolder('web_js'), $e107->getFolder('web_css'), $e107->getFolder('web_image'), //$e107->getFolder('web_pack'), e_IMAGE_ABS, e_THEME_ABS, $e107->getFolder('images'), $e107->getFolder('plugins'), $e107->getFolder('files'), $e107->getFolder('themes'), // $e107->getFolder('downloads'), $e107->getFolder('handlers'), $e107->getFolder('media'), $e107->getFolder('web'), $e107->site_theme ? $e107->getFolder('themes').$e107->site_theme.'/' : '', defset('THEME_ABS'), (ADMIN ? $e107->getFolder('admin') : ''), '', $e107->getFolder('core'), $e107->getFolder('system'), ); switch ($mode) { case 'abs': $replace_absolute = array( e_MEDIA_FILE_ABS, e_MEDIA_VIDEO_ABS, e_MEDIA_IMAGE_ABS, e_MEDIA_ICON_ABS, e_AVATAR_ABS, e_JS_ABS, e_CSS_ABS, e_WEB_IMAGE_ABS, // e_PACK_ABS, e_IMAGE_ABS, e_THEME_ABS, e_IMAGE_ABS, e_PLUGIN_ABS, e_FILE_ABS, e_THEME_ABS, // e_DOWNLOAD_ABS, //impossible when download is done via php. '', // handlers - no ABS path available e_MEDIA_ABS, e_WEB_ABS, defset('THEME_ABS'), defset('THEME_ABS'), (ADMIN ? e_ADMIN_ABS : ''), $e107->server_path, '', // no e_CORE absolute path '', // no e_SYSTEM absolute path ); break; case 'full': $replace_absolute = array( SITEURLBASE.e_MEDIA_FILE_ABS, SITEURLBASE.e_MEDIA_VIDEO_ABS, SITEURLBASE.e_MEDIA_IMAGE_ABS, SITEURLBASE.e_MEDIA_ICON_ABS, SITEURLBASE.e_AVATAR_ABS, SITEURLBASE.e_JS_ABS, SITEURLBASE.e_CSS_ABS, SITEURLBASE.e_WEB_IMAGE_ABS, // SITEURLBASE.e_PACK_ABS, SITEURLBASE.e_IMAGE_ABS, SITEURLBASE.e_THEME_ABS, SITEURLBASE.e_IMAGE_ABS, SITEURLBASE.e_PLUGIN_ABS, SITEURLBASE.e_FILE_ABS, // deprecated SITEURLBASE.e_THEME_ABS, //SITEURL.$e107->getFolder('downloads'), '', // handlers - no ABS path available SITEURLBASE.e_MEDIA_ABS, SITEURLBASE.e_WEB_ABS, defset('THEME_ABS') ? SITEURLBASE.THEME_ABS : '', defset('THEME_ABS') ? SITEURLBASE.THEME_ABS : '', (ADMIN ? SITEURLBASE.e_ADMIN_ABS : ''), SITEURL, '', // no e_CORE absolute path '', // no e_SYSTEM absolute path ); break; } // sub-folders first! $search = array( '{e_MEDIA_FILE}', '{e_MEDIA_VIDEO}', '{e_MEDIA_IMAGE}', '{e_MEDIA_ICON}', '{e_AVATAR}', '{e_WEB_JS}', '{e_WEB_CSS}', '{e_WEB_IMAGE}', // '{e_WEB_PACK}', "{e_IMAGE_ABS}", "{e_THEME_ABS}", "{e_IMAGE}", "{e_PLUGIN}", "{e_FILE}", "{e_THEME}", //,"{e_DOWNLOAD}" "{e_HANDLER}", "{e_MEDIA}", "{e_WEB}", "{THEME}", "{THEME_ABS}", "{e_ADMIN}", "{e_BASE}", "{e_CORE}", "{e_SYSTEM}", ); /*if (ADMIN) { $replace_relative[] = $e107->getFolder('admin'); $replace_absolute[] = SITEURL.$e107->getFolder('admin'); $search[] = "{e_ADMIN}"; }*/ if ($all) { if (USER) { // Can only replace with valid number for logged in users $replace_relative[] = USERID; $replace_absolute[] = USERID; } else { $replace_relative[] = ''; $replace_absolute[] = ''; } $search[] = "{USERID}"; } // current THEME /*if(!defined('THEME')) { //if not already parsed by doReplace $text = str_replace(array('{THEME}', '{THEME_ABS}'), '', $text); } else { $replace_relative[] = THEME; $replace_absolute[] = THEME_ABS; $search[] = "{THEME}"; $replace_relative[] = THEME; $replace_absolute[] = THEME_ABS; $search[] = "{THEME_ABS}"; }*/ $replace = ((string)$mode == "full" || (string)$mode=='abs' ) ? $replace_absolute : $replace_relative; return str_replace($search,$replace,$text); } // $pattern = ($all ? "#\{([A-Za-z_0-9]*)\}#s" : "#\{(e_[A-Z]*)\}#s"); $pattern = ($all ? '#\{([A-Za-z_0-9]*)\}#s' : '#\{(e_[A-Z]*(?:_IMAGE|_VIDEO|_FILE|_CONTENT|_ICON|_AVATAR|_JS|_CSS|_PACK|_DB|_ABS){0,1})\}#s'); $text = preg_replace_callback($pattern, array($this, 'doReplace'), $text); if(!defined('THEME')) { //if not already parsed by doReplace $text = str_replace(array('{THEME}', '{THEME_ABS}'), '', $text); } else { $srch = array('{THEME}', '{THEME_ABS}'); $repl = array(THEME, THEME_ABS); $text = str_replace($srch, $repl, $text); } return $text; } function doReplace($matches) { if(defined($matches[1]) && (deftrue('ADMIN') || strpos($matches[1], 'ADMIN') === FALSE)) { return constant($matches[1]); } return $matches[1]; } /** * Create and substitute e107 constants in passed URL * * @param string $url * @param integer $mode 0-folders, 1-relative ('rel'), 2-absolute ('abs'), 3-full ('full') (with domain), 4-absolute & relative ('mix') (combination of 1,2,3) * @return string */ public function createConstants($url, $mode = 0) { //FIXME - create constants for absolute paths and site URL's if (!is_numeric($mode)) { switch ($mode) { case 'rel' : $mode = 1; break; case 'abs' : $mode = 2; break; case 'full' : $mode = 3; break; case 'mix' : $mode = 4; break; case 'nice': $mode = 5; break; } } $e107 = e107::getInstance(); switch($mode) { case 0: // folder name only. $tmp = array( '{e_MEDIA_FILE}' => $e107->getFolder('media_files'), '{e_MEDIA_VIDEO}' => $e107->getFolder('media_videos'), '{e_MEDIA_IMAGE}' => $e107->getFolder('media_images'), '{e_MEDIA_ICON}' => $e107->getFolder('media_icons'), '{e_AVATAR}' => $e107->getFolder('avatars'), '{e_WEB_JS}' => $e107->getFolder('web_js'), '{e_WEB_CSS}' => $e107->getFolder('web_css'), '{e_WEB_IMAGE}' => $e107->getFolder('web_images'), // '{e_WEB_PACK}' => $e107->getFolder('web_packs'), '{e_IMAGE}' => $e107->getFolder('images'), '{e_PLUGIN}' => $e107->getFolder('plugins'), '{e_FILE}' => $e107->getFolder('files'), '{e_THEME}' => $e107->getFolder('themes'), '{e_DOWNLOAD}' => $e107->getFolder('downloads'), '{e_ADMIN}' => $e107->getFolder('admin'), '{e_HANDLER}' => $e107->getFolder('handlers'), '{e_MEDIA}' => $e107->getFolder('media'), '{e_WEB}' => $e107->getFolder('web'), '{e_UPLOAD}' => $e107->getFolder('uploads'), ); break; case 1: // relative path only $tmp = array( '{e_MEDIA_FILE}' => e_MEDIA_FILE, '{e_MEDIA_VIDEO}' => e_MEDIA_VIDEO, '{e_MEDIA_IMAGE}' => e_MEDIA_IMAGE, '{e_MEDIA_ICON}' => e_MEDIA_ICON, '{e_AVATAR}' => e_AVATAR, '{e_WEB_JS}' => e_WEB_JS, '{e_WEB_CSS}' => e_WEB_CSS, '{e_WEB_IMAGE}' => e_WEB_IMAGE, // '{e_WEB_PACK}' => e_WEB_PACK, '{e_IMAGE}' => e_IMAGE, '{e_PLUGIN}' => e_PLUGIN, '{e_FILE}' => e_FILE, '{e_THEME}' => e_THEME, '{e_DOWNLOAD}' => e_DOWNLOAD, '{e_ADMIN}' => e_ADMIN, '{e_HANDLER}' => e_HANDLER, '{e_MEDIA}' => e_MEDIA, '{e_WEB}' => e_WEB, '{e_UPLOAD}' => e_UPLOAD, ); break; case 2: // absolute path only $tmp = array( '{e_MEDIA_FILE}' => e_MEDIA_FILE_ABS, '{e_MEDIA_VIDEO}' => e_MEDIA_VIDEO_ABS, '{e_MEDIA_IMAGE}' => e_MEDIA_IMAGE_ABS, '{e_MEDIA_ICON}' => e_MEDIA_ICON_ABS, '{e_AVATAR}' => e_AVATAR_ABS, '{e_WEB_JS}' => e_JS_ABS, '{e_WEB_CSS}' => e_CSS_ABS, '{e_WEB_IMAGE}' => e_WEB_IMAGE_ABS, // '{e_WEB_PACK}' => e_PACK_ABS, '{e_IMAGE}' => e_IMAGE_ABS, '{e_PLUGIN}' => e_PLUGIN_ABS, '{e_FILE}' => e_FILE_ABS, // deprecated '{e_THEME}' => e_THEME_ABS, '{e_DOWNLOAD}' => e_HTTP.'request.php?',// FIXME - we need solution! '{e_ADMIN}' => e_ADMIN_ABS, //'{e_HANDLER}' => e_HANDLER_ABS, - no ABS path available '{e_MEDIA}' => e_MEDIA_ABS, '{e_WEB}' => e_WEB_ABS, '{e_BASE}' => e_HTTP, ); break; case 3: // full path (e.g http://domain.com/e107_images/) $tmp = array( '{e_MEDIA_FILE}' => SITEURLBASE.e_MEDIA_FILE_ABS, '{e_MEDIA_VIDEO}' => SITEURLBASE.e_MEDIA_VIDEO_ABS, '{e_MEDIA_IMAGE}' => SITEURLBASE.e_MEDIA_IMAGE_ABS, '{e_MEDIA_ICON}' => SITEURLBASE.e_MEDIA_ICON_ABS, '{e_AVATAR}' => SITEURLBASE.e_AVATAR_ABS, '{e_WEB_JS}' => SITEURLBASE.e_JS_ABS, '{e_WEB_CSS}' => SITEURLBASE.e_CSS_ABS, '{e_WEB_IMAGE}' => SITEURLBASE.e_WEB_IMAGE_ABS, // '{e_WEB_PACK}' => SITEURLBASE.e_PACK_ABS, '{e_IMAGE}' => SITEURLBASE.e_IMAGE_ABS, '{e_PLUGIN}' => SITEURLBASE.e_PLUGIN_ABS, '{e_FILE}' => SITEURLBASE.e_FILE_ABS, // deprecated '{e_THEME}' => SITEURLBASE.e_THEME_ABS, '{e_DOWNLOAD}' => SITEURLBASE.e_HTTP.'request.php?',// FIXME - we need solution! '{e_ADMIN}' => SITEURLBASE.e_ADMIN_ABS, //'{e_HANDLER}' => e_HANDLER_ABS, - no ABS path available '{e_MEDIA}' => SITEURLBASE.e_MEDIA_ABS, '{e_WEB}' => SITEURLBASE.e_WEB_ABS, '{e_BASE}' => SITEURL, ); break; case 4: // absolute & relative paths $url = $this->createConstants($url, 3); $url = $this->createConstants($url, 2); $url = $this->createConstants($url, 1); return $url; break; case 5: // nice urls - e.g. e_MEDIA_VIDEO/mystream.flv $url = $this->createConstants($url, 4); return str_replace($this->getUrlConstants('sc'), $this->getUrlConstants('raw'), $url); break; default: $tmp = array(); break; } foreach($tmp as $key=>$val) { $len = strlen($val); if(substr($url, 0, $len) == $val) { // replace the first instance only return substr_replace($url, $key, 0, $len); } } return $url; } //FIXME - $match not used? function e_highlight($text, $match) { $tags = array(); preg_match_all('#<[^>]+>#', $text, $tags); $text = preg_replace('#<[^>]+>#', '<|>', $text); $text = preg_replace('#(\b".$match."\b)#i', '\\1', $text); foreach ($tags[0] as $tag) { $text = preg_replace('#<\|>#', $tag, $text, 1); } return $text; } public function toEmail($text, $posted = "", $mods = "parse_sc, no_make_clickable") { if ($posted === TRUE) { if (MAGIC_QUOTES_GPC) { $text = stripslashes($text); } $text = preg_replace('#\[(php)#i', '[\\1', $text); } $text = (strtolower($mods) != "rawtext") ? $this->replaceConstants($text, "full") : $text; $text = $this->toHTML($text, TRUE, $mods); return $text; } // Given an email address, returns a link including js-based obfuscation function emailObfuscate($email, $words = '', $subject = '') { if(strpos($email, '@') === FALSE) { return ''; } if ($subject) { $subject = '?subject='.$subject; } list($name, $address) = explode('@', $email, 2); $reassembled = '"'.$name.'"+"@"+"'.$address.'"'; return "".$words.''; } public function __get($name) { switch($name) { case 'e_sc': $ret = e107::getScParser(); break; default: trigger_error('$e107->$'.$name.' not defined', E_USER_WARNING); return NULL; break; } $this->$name = $ret; return $ret; } } /** * Start Fresh and Build on it over time to become eventual replacement to e_parse. * Cameron's DOM-based parser. */ class e_parser { public $domObj = null; private $removedList = array(); private $nodesToDelete = array(); private $nodesToConvert = array(); private $pathList = array(); private $allowedAttributes = array( 'default' => array('id', 'style', 'class'), 'img' => array('id', 'src', 'style', 'class', 'alt', 'title', 'width', 'height'), 'a' => array('id', 'href', 'style', 'class', 'title'), ); private $badAttrValues = array("javascript[\s]*?:","alert\(","vbscript[\s]*?:","data:text/html", "mhtml[\s]*?:", "data:[\s]*?image"); private $allowedTags = array('html', 'body','div','a','img','table','tr', 'td', 'th', 'tbody', 'thead', 'colgroup', 'b', 'i', 'pre','code', 'strong', 'u', 'em','ul','li','img','h1','h2','h3','h4','h5','h6','p', 'div','pre','section','article', 'blockquote','hgroup','aside','figure','span', 'video', 'br', 'small', 'caption' ); public function __construct() { $this->init(); /* $meths = get_class_methods('DomDocument'); sort($meths); print_a($meths); */ } /** * Used by e_parse to start */ function init() { $this->domObj = new DOMDocument(); } /** * Set Allowed Tags. * @param $array */ public function setAllowedTags($array=array()) { $this->allowedTags = $array; } /** * Set Allowed Attributes. * @param $array */ public function setAllowedAttributes($array=array()) { $this->allowedAttributes = $array; } /** * Add leading zeros to a number. eg. 3 might become 000003 * @param $num integer * @param $numDigits - total number of digits * @return number with leading zeros. */ public function leadingZeros($num,$numDigits) { return sprintf("%0".$numDigits."d",$num); } /** * Return an Array of all specific tags found in an HTML document and their attributes. * @param $html - raw html code * @param $taglist - comma separated list of tags to search or '*' for all. * @param $header - if the $html includes the html head or body tags - it should be set to true. */ public function getTags($html, $taglist='*', $header = false) { if($header == false) { $html = "".$html.""; } $doc = $this->domObj; $doc->preserveWhiteSpace = true; $doc->loadHTML($html); $tg = explode(",", $taglist); $ret = array(); foreach($tg as $find) { $tmp = $doc->getElementsByTagName($find); foreach($tmp as $k=>$node) { $tag = $node->nodeName; $inner = $node->C14N(); $inner = str_replace(" ","",$inner); foreach ($node->attributes as $attr) { $name = $attr->nodeName; $value = $attr->nodeValue; $ret[$tag][$k][$name] = $value; } $ret[$tag][$k]['@value'] = $inner; } } if($header == false) { unset($ret['html'],$ret['body']); } return $ret; } // Parse i_xxxx to bootstrap glyph format. public function glyph($text) { $text = preg_replace('/\[(i_[\w]*)\]/',"", $text); return $text; } /** * Parse new tags into bbcode output. * @param $retainTags : when you want to replace html and retain the tags wrapping it. * @return html */ function parseBBTags($text,$retainTags = false) { $bbcodes = $this->getTags($text, 'bbcode'); foreach($bbcodes as $v) { foreach($v as $val) { $tag = urldecode($val['alt']); $repl = ($retainTags == true) ? '$1'.$tag.'$2' : $tag; $text = preg_replace('/(]*>).*(<\/bbcode>)/s',$repl, $text); //FIXME - handle multiple instances of bbcodes. } } return $text; } /** * Perform and render XSS Test Comparison */ public function test() { // $tp = e107::getParser(); $sql = e107::getDb(); $html = $this->getXss(); echo "

Unprocessed XSS

"; // echo $html; // Remove Comment for a real mess! print_a($html); echo "

Standard v2 Parser

"; echo "

\$tp->dataFilter()

"; // echo $tp->dataFilter($html); // Remove Comment for a real mess! $sql->db_Mark_Time('------ Start Parser Test -------'); print_a($this->dataFilter($html)); $sql->db_Mark_Time('tp->dataFilter'); echo "

\$tp->toHtml()

"; // echo $tp->dataFilter($html); // Remove Comment for a real mess! print_a($this->tohtml($html)); $sql->db_Mark_Time('tp->toHtml'); echo "

\$tp->toDB()

"; // echo $tp->dataFilter($html); // Remove Comment for a real mess! print_a($this->toDB($html)); $sql->db_Mark_Time('tp->toDB'); echo "

New Parser

"; echo "

Processed

"; $cleaned = $this->cleanHtml($html); print_a($cleaned); $sql->db_Mark_Time('new Parser'); // $sql->db_Mark_Time('------ End Parser Test -------'); echo "

Processed & Rendered

"; echo $cleaned; echo "

New Parser - Data

"; echo "

Converted Paths

"; print_a($this->pathList); echo "

Removed Tags and Attributes

"; print_a($this->removedList); // print_a($p); } /** * Process and clean HTML from user input. * @param $html raw HTML * TODO Html5 tag support. */ public function cleanHtml($html='') { if(!vartrue($html)){ return; } // $html = mb_convert_encoding($html, 'UTF-8'); if(preg_match("/'; } else // Full HTML page. { // $this->allowedTags[] = 'head'; // $this->allowedTags[] = 'body'; // $this->allowedTags[] = 'title'; //$this->allowedTags[] = 'meta'; } if(!is_object($this->domObj)) { $this->init(); } // Set it up for processing. $doc = $this->domObj; @$doc->loadHTML($html); $doc->encoding = 'UTF-8'; //FIXME // $doc->resolveExternals = true; // $tmp = $doc->getElementsByTagName('*'); $this->nodesToConvert = array(); // required. $this->nodesToDelete = array(); // required. $this->removedList = array(); $tmp = $doc->getElementsByTagName('*'); foreach($tmp as $node) { $path = $node->getNodePath(); // echo "
Path = ".$path; // $tag = strval(basename($path)); $tag = preg_replace('/([a-z0-9\[\]\/]*)?\/([\w]*)(\[(\d)*\])?$/i', "$2", $path); if(!in_array($tag, $this->allowedTags)) { if(strpos($path,'/code/') !== false || strpos($path,'/pre/') !== false) // treat as html. { $this->pathList[] = $path; $this->nodesToConvert[] = $node->parentNode; // $node; continue; } $this->removedList['tags'][] = $tag; $this->nodesToDelete[] = $node; continue; } foreach ($node->attributes as $attr) { $name = $attr->nodeName; $value = $attr->nodeValue; $allow = varset($this->allowedAttributes[$tag], $this->allowedAttributes['default']); if(!in_array($name, $allow)) { $node->removeAttribute($name); $this->removedList['attributes'][] = $name. " from <".$tag.">"; continue; } if($this->invalidAttributeVal( $value)) // Check value against whitelist. { $node->removeAttribute($name); $node->setAttribute($name, '#---sanitized---#'); $this->removedList['sanitized'][] = $tag.'['.$name.']'; } } } // Remove some stuff. foreach($this->nodesToDelete as $node) { $node->parentNode->removeChild($node); } // Convert and
 Tags to Htmlentities. 
        foreach($this->nodesToConvert as $node)  
        {
            $value = $node->C14N();

            $value = str_replace("
","",$value);
            
            if($node->nodeName == 'pre')
            {
                $value = substr($value,5);
                $end = strrpos($value,"
"); $value = substr($value,0,$end); } if($node->nodeName == 'code') { $value = substr($value,6); $end = strrpos($value,"
"); $value = substr($value,0,$end); } $value = htmlentities(htmlentities($value)); // Needed $node->nodeValue = $value; } $cleaned = $doc->saveHTML(); $cleaned = str_replace(array('','','','','','',''),'',$cleaned); // filter out tags. $cleaned = html_entity_decode($cleaned, ENT_QUOTES, 'UTF-8'); return $cleaned; } /** * Check for Invalid Attribute Values * @param $val string * @return true/false */ function invalidAttributeVal($val) { foreach($this->badAttrValues as $v) // global list because a bad value is bad regardless of the attribute it's in. ;-) { if(preg_match('/'.$v.'/i',$val)==true) { $this->removedList['blacklist'][] = "Match found for '{$v}' in '{$val}'"; return true; } } return false; } /** * XSS HTML code to test against */ private function getXss() { $html = << 日本語
简体中文
Test A GOOD LINK: Some Link Test regex
Hi there
XXX alert(1)//0
< script>document.getElementById("div2").innerHTML = document.getElementById("div1").innerHTML; Some example text
This is bold
This is italic
Some small text
This is pre-formatted
        
        Bold Stuff
        
something
code BOLD function myfunction() { }
function myfunction() { } ><image xlink:href=" // O10.10↓, OM10.0↓, GC6↓, FF // IE6, O10.10↓, OM10.0↓ // IE6, O11.01↓, OM10.1↓
x
[A] "> "> "> [B] "> [C] [D] <% foo> some content without two new line \n\n Content-Type: multipart/related; boundary="******"some content without two new line --****** Content-Location: xss.html Content-Transfer-Encoding: base64 PGlmcmFtZSBuYW1lPWxvIHN0eWxlPWRpc3BsYXk6bm9uZT48L2lmcmFtZT4NCjxzY3JpcHQ+DQp1 cmw9bG9jYXRpb24uaHJlZjtkb2N1bWVudC5nZXRFbGVtZW50c0J5TmFtZSgnbG8nKVswXS5zcmM9 dXJsLnN1YnN0cmluZyg2LHVybC5pbmRleE9mKCcvJywxNSkpO3NldFRpbWVvdXQoImFsZXJ0KGZy YW1lc1snbG8nXS5kb2N1bWVudC5jb29raWUpIiwyMDAwKTsNCjwvc2NyaXB0PiAgICAg --******--
XXX `><img src=xx:x onerror=alert(1)></a> `><img src=xx:x onerror=alert(2)// `><img src=xx:x onerror=alert(3)// // Safari 5.0, Chrome 9, 10 // Safari 5.0 <% %></xmp><img src=xx:x onerror=alert(1)// %>/ alert(2) XXX -->{} *{color:red} X
XXX






...



Some text goes here
EOF; return $html; } }