diff --git a/e107_handlers/e_parse_class.php b/e107_handlers/e_parse_class.php index 26d426b7e..cec6e7298 100644 --- a/e107_handlers/e_parse_class.php +++ b/e107_handlers/e_parse_class.php @@ -1,20 +1,18 @@ rror', '>', "'", '"', ' & '); - var $e_highlighting; // Set to TRUE or FALSE once it has been calculated + var $e_highlighting; // Set to TRUE or FALSE once it has been calculated var $e_query; // Highlight query - // toHTML Action defaults. For now these match existing convention. - // Let's reverse the logic on the first set ASAP; too confusing! - var $e_optDefault = array( - 'context' => 'olddefault', // default context: all "opt-out" conversions :( + // Set up the defaults + var $e_optDefault = array( + 'context' => 'OLDDEFAULT', // default context: reflects legacy settings (many items enabled) 'fromadmin' => FALSE, - - // Enabled by Default + 'emotes' => TRUE, // Enable emote display + 'defs' => FALSE, // Convert defines(constants) within text. + 'constants' => FALSE, // replace all {e_XXX} constants with their e107 value - 'rel' or 'abs' + 'hook' => TRUE, // Enable hooked parsers + 'scripts' => TRUE, // Allow scripts through (new for 0.8) + 'link_click' => TRUE, // Make links clickable + 'link_replace' => TRUE, // Substitute on clickable links (only if link_click == TRUE) + 'parse_sc' => FALSE, // Parse shortcodes - TRUE enables parsing + 'no_tags' => FALSE, // remove HTML tags. 'value' => FALSE, // Restore entity form of quotes and such to single characters - TRUE disables 'nobreak' => FALSE, // Line break compression - TRUE removes multiple line breaks - 'retain_nl' => FALSE, // Retain newlines - wraps to \n instead of
if TRUE - - 'no_make_clickable' => FALSE, // URLs etc are clickable - TRUE disables - 'no_replace' => FALSE, // Replace clickable links - TRUE disables (only if no_make_clickable not set) - - 'emotes_off' => FALSE, // Convert emoticons to graphical icons - TRUE disables conversion - 'emotes_on' => FALSE, // FORCE conversion to emotes, even if syspref is disabled - - 'no_hook' => FALSE, // Hooked parsers (TRUE disables completely) (deprecated) - - // Disabled by Default - 'defs' => FALSE, // Convert defines(constants) within text. - 'constants' => FALSE, // replace all {e_XXX} constants with their e107 value - 'abs_links' => FALSE, // Convert constants to absolute paths if TRUE - 'parse_sc' => FALSE, // Parse shortcodes - TRUE enables parsing - 'no_tags' => FALSE // remove HTML tags. + 'retain_nl' => FALSE // Retain newlines - wraps to \n instead of
if TRUE ); - // Super modifiers adjust default option values - // First line of adjustments change default-ON options - // Second line changes default-OFF options + // Super modifiers override default option values var $e_SuperMods = array( 'TITLE' => //text is part of a title (e.g. news title) array( - 'nobreak'=>TRUE, 'retain_nl'=>TRUE, 'no_make_clickable'=>TRUE,'emotes_off'=>TRUE, - 'defs'=>TRUE,'parse_sc'=>TRUE), + 'nobreak'=>TRUE, 'retain_nl'=>TRUE, 'link_click' => FALSE, 'emotes'=>FALSE, 'defs'=>TRUE, 'parse_sc'=>TRUE + ), 'USER_TITLE' => //text is user-entered (i.e. untrusted) and part of a title (e.g. forum title) array( - 'nobreak'=>TRUE, 'retain_nl'=>TRUE, 'no_make_clickable'=>TRUE,'emotes_off'=>TRUE,'no_hook'=>TRUE + 'nobreak'=>TRUE, 'retain_nl'=>TRUE, 'link_click' => FALSE, 'scripts' => FALSE, 'emotes'=>FALSE, 'hook'=>FALSE + ), + + 'E_TITLE' => // text is 'body' of email or similar - being sent 'off-site' so don't rely on server availability + array( + 'nobreak'=>TRUE, 'retain_nl'=>TRUE, 'defs'=>TRUE, 'parse_sc'=>TRUE, 'emotes'=>FALSE, 'scripts' => FALSE, 'link_click' => FALSE ), 'SUMMARY' => // text is part of the summary of a longer item (e.g. content summary) array( - // no changes to default-on items - 'defs'=>TRUE, 'constants'=>TRUE, 'parse_sc'=>TRUE), + 'defs'=>TRUE, 'constants'=>'rel', 'parse_sc'=>TRUE + ), 'DESCRIPTION' => // text is the description of an item (e.g. download, link) array( - // no changes to default-on items - 'defs'=>TRUE, 'constants'=>TRUE, 'parse_sc'=>TRUE), + 'defs'=>TRUE, 'constants'=>'rel', 'parse_sc'=>TRUE + ), 'BODY' => // text is 'body' or 'bulk' text (e.g. custom page body, content body) array( - // no changes to default-on items - 'defs'=>TRUE, 'constants'=>TRUE, 'parse_sc'=>TRUE), + 'defs'=>TRUE, 'constants'=>'rel', 'parse_sc'=>TRUE + ), 'USER_BODY' => // text is user-entered (i.e. untrusted)'body' or 'bulk' text (e.g. custom page body, content body) array( - 'constants'=>TRUE + 'constants'=>TRUE, 'scripts' => FALSE + ), + + 'E_BODY' => // text is 'body' of email or similar - being sent 'off-site' so don't rely on server availability + array( + 'defs'=>TRUE, 'constants'=>'abs', 'parse_sc'=>TRUE, 'emotes'=>FALSE, 'scripts' => FALSE, 'link_click' => FALSE ), 'LINKTEXT' => // text is the 'content' of a link (A tag, etc) array( - 'nobreak'=>TRUE, 'retain_nl'=>TRUE, 'no_make_clickable'=>TRUE,'emotes_off'=>TRUE,'no_hook'=>TRUE, - 'defs'=>TRUE,'parse_sc'=>TRUE), + 'nobreak'=>TRUE, 'retain_nl'=>TRUE, 'link_click' => FALSE, 'emotes_on'=>FALSE, 'hook'=>FALSE, 'defs'=>TRUE, 'parse_sc'=>TRUE + ), 'RAWTEXT' => // text is used (for admin edit) without fancy conversions or html. array( - 'nobreak'=>TRUE, 'retain_nl'=>TRUE, 'no_make_clickable'=>TRUE,'emotes_off'=>TRUE,'no_hook'=>TRUE,'no_tags'=>TRUE - // leave opt-in options off + 'nobreak'=>TRUE, 'retain_nl'=>TRUE, 'link_click' => FALSE, 'emotes'=>FALSE, 'hook'=>FALSE, 'no_tags'=>TRUE ) ); + // Individual modifiers change the current context + var $e_Modifiers = array( + 'emotes_off' => array('emotes_on' => FALSE), + 'emotes_on' => array('emotes_on' => TRUE), + 'no_hook' => array('hook' => FALSE), + 'do_hook' => array('hook' => TRUE), + 'scripts_off' => array('scripts' => FALSE), // New for 0.8 + 'scripts_on' => array('scripts' => TRUE), // New for 0.8 + 'no_make_clickable' => array('link_click' => FALSE), + 'make_clickable' => array('link_click' => TRUE), + 'no_replace' => array('link_replace' => FALSE), + 'replace' => array('link_replace' => TRUE), // Replace text of clickable links (only if make_clickable option set) + 'consts_off' => array('constants' => FALSE), // No path replacement + 'consts_rel' => array('constants' => 'rel'), // Relative path replacement + 'consts_abs' => array('constants' => 'abs'), // Absolute path replacement + 'scparse_off' => array('parse_sc' => FALSE), // No shortcode parsing + 'scparse_on' => array('parse_sc' => TRUE), + 'no_tags' => array('no_tags' => TRUE), // Strip tags + 'do_tags' => array('no_tags' => FALSE), // Leave tags + 'fromadmin' => array('fromadmin' => TRUE), + 'notadmin' => array('fromadmin' => FALSE), + 'er_off' => array('value' => FALSE), // entity replacement + 'er_on' => array('value' => TRUE), + 'defs_off' => array('defs' => FALSE), // Decode constant if exists + 'defs_on' => array('defs' => TRUE), + + 'dobreak' => array('nobreak' => TRUE), + 'nobreak' => array('nobreak' => FALSE), + 'lb_nl' => array('retain_nl' => TRUE), // Line break using \n + 'lb_br' => array('retain_nl' => FALSE), // Line break using
+ + // Legacy option names below here - discontinue later + 'retain_nl' => array('retain_nl' => TRUE), + 'defs' => array('defs' => TRUE), + 'parse_sc' => array('parse_sc' => TRUE), + 'constants' => array('constants' => 'rel'), + 'value' => array('value' => TRUE) + ); + + function e_parse() { - // Preprocess the supermods to be useful default arrays with all values - foreach ($this->e_SuperMods as $key=>$val) - { - $this->e_SuperMods[$key] = array_merge($this->e_optDefault,$this->e_SuperMods[$key]); // precalculate super defaults - $this->e_SuperMods[$key]['context']=$key; - } + // Preprocess the supermods to be useful default arrays with all values + foreach ($this->e_SuperMods as $key=>$val) + { + $this->e_SuperMods[$key] = array_merge($this->e_optDefault,$this->e_SuperMods[$key]); // precalculate super defaults + $this->e_SuperMods[$key]['context']=$key; + } } @@ -125,22 +162,26 @@ class e_parse { // Start by working out what, if anything, we do about utf-8 handling. $this->utfAction = 0; // 'Do nothing' is the simple option - if ((strtolower(CHARSET) == 'utf-8') && (version_compare(PHP_VERSION, '6.0.0') < 1)) - { // Need to do something here - if(extension_loaded('mbstring')) - { - $temp = ini_get('mbstring.func_overload'); // Check for function overloading - if (($temp & MB_OVERLOAD_STRING) == 0) // Just check the string functions - will be non-zero if overloaded + if (strtolower(CHARSET) == 'utf-8') + { + $this->isutf8 = TRUE; + if (version_compare(PHP_VERSION, '6.0.0') < 1) + { // Need to do something here + if(extension_loaded('mbstring')) { - $this->utfAction = 1; // Can use the mb_string routines + $temp = ini_get('mbstring.func_overload'); // Check for function overloading + if (($temp & MB_OVERLOAD_STRING) == 0) // Just check the string functions - will be non-zero if overloaded + { + $this->utfAction = 1; // Can use the mb_string routines + } + mb_internal_encoding('UTF-8'); // Set the default encoding, so we don't have to specify every time + } + else + { + $this->utfAction = 2; // Must use emulation - will probably be slow! + require(E_UTF8_PACK.'utils/unicode.php'); + require(E_UTF8_PACK.'native/core.php'); // Always load the core routines - bound to need some of them! } - mb_internal_encoding('UTF-8'); // Set the default encoding, so we don't have to specify every time - } - else - { - $this->utfAction = 2; // Must use emulation - will probably be slow! - require(E_UTF8_PACK.'utils/unicode.php'); - require(E_UTF8_PACK.'native/core.php'); // Always load the core routines - bound to need some of them! } } } @@ -170,6 +211,7 @@ class e_parse return utf8_strtolower($str); } + function uStrToUpper($str) { switch ($this->utfAction) @@ -193,6 +235,7 @@ class e_parse utf8_strpos($haystack, $needle, $offset); } + function uStrrPos($haystack, $needle, $offset = 0) { switch ($this->utfAction) @@ -229,11 +272,11 @@ class e_parse // Initialise the shortcode handler - has to be done when $prefs valid, so can't be done in constructor ATM function sch_load() { - if (!is_object($this->e_sc)) - { - require_once(e_HANDLER."shortcode_handler.php"); - $this->e_sc = new e_shortcode; - } + if (!is_object($this->e_sc)) + { + require_once(e_HANDLER."shortcode_handler.php"); + $this->e_sc = new e_shortcode; + } } @@ -248,12 +291,17 @@ class e_parse * $mod: the 'no_html' and 'no_php' modifiers blanket prevent html and php posting regardless of posting permissions. (used in logging) */ global $pref; - if (is_array($data)) { - foreach ($data as $key => $var) { + if (is_array($data)) + { + foreach ($data as $key => $var) + { $ret[$key] = $this -> toDB($var, $nostrip, $no_encode, $mod, $original_author); } - } else { - if (MAGIC_QUOTES_GPC == true && $nostrip == false) { + } + else + { + if (MAGIC_QUOTES_GPC == true && $nostrip == false) + { $data = stripslashes($data); } if (isset($pref['post_html']) && check_class($pref['post_html'])) @@ -269,7 +317,9 @@ class e_parse $search = array('$', '"', "'", '\\', ''); $replace = array('$', '"', '<', '>'); $text = str_replace($search, $replace, $text); - if (e_WYSIWYG !== true){ + if (e_WYSIWYG !== true) + { $text = str_replace(" ", " ", $text); // fix for utf-8 issue with html_entity_decode(); } return $text; } - function post_toForm($text) { - if (MAGIC_QUOTES_GPC == true) { + function post_toForm($text) + { + if (MAGIC_QUOTES_GPC == true) + { $text = stripslashes($text); } return str_replace(array( "'", '"', "<", ">"), array("'", """, "<", ">"), $text); } - function post_toHTML($text, $original_author = false, $extra = '', $mod = false) { + function post_toHTML($text, $original_author = false, $extra = '', $mod = false) + { $text = $this -> toDB($text, false, false, $mod, $original_author); return $this -> toHTML($text, true, $extra); } - function parseTemplate($text, $parseSCFiles = TRUE, $extraCodes = "") { + function parseTemplate($text, $parseSCFiles = TRUE, $extraCodes = "") + { $this->sch_load(); return $this->e_sc->parseCodes($text, $parseSCFiles, $extraCodes); } @@ -328,173 +381,175 @@ class e_parse $nobreak is a list of tags within which word wrap is to be inactive */ - if (!ctype_digit($width)) return $str; // Don't wrap if non-numeric width - if ($width < 6) return $str; // Trap stupid wrap counts, as well + return $str; - // Transform protected element lists into arrays - $nobreak = explode(" ", strtolower($nobreak)); - - // Variable setup - $intag = false; - $innbk = array(); - $drain = ""; - - // List of characters it is "safe" to insert line-breaks at - // It is not necessary to add < and > as they are automatically implied - $lbrks = "/?!%)-}]\\\"':;&"; - - // Is $str a UTF8 string? - if ($utf || strtolower(CHARSET) == 'utf-8') - { // 0x1680, 0x180e, 0x2000-0x200a, 0x2028, 0x205f, 0x3000 are 'non-ASCII' Unicode UCS-4 codepoints - see http://www.unicode.org/Public/UNIDATA/UnicodeData.txt - // All convert to 3-byte utf-8 sequences: - // 0x1680 0xe1 0x9a 0x80 - // 0x180e 0xe1 0xa0 0x8e - // 0x2000 0xe2 0x80 0x80 - // - - // 0x200a 0xe2 0x80 0x8a - // 0x2028 0xe2 0x80 0xa8 - // 0x205f 0xe2 0x81 0x9f - // 0x3000 0xe3 0x80 0x80 - $utf8 = 'u'; - $whiteSpace = '#([\x20|\x0c]|[\xe1][\x9a][\x80]|[\xe1][\xa0][\x8e]|[\xe2][\x80][\x80-\x8a,\xa8]|[\xe2][\x81][\x9f]|[\xe3][\x80][\x80]+)#'; - // Have to explicitly enumerate the whitespace chars, and use non-utf-8 mode, otherwise regex fails on badly formed utf-8 - } - else - { - $utf8 = ''; - $whiteSpace = '#(\s+)#'; // For non-utf-8, can use a simple match string - } - - -// Start of the serious stuff - split into HTML tags and text between - $content = preg_split('#(<.*?>)#mis', $str, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE ); - foreach($content as $value) - { - if ($value[0] == "<") - { // We are within an HTML tag - // Create a lowercase copy of this tag's contents - $lvalue = strtolower(substr($value,1,-1)); - if ($lvalue) - { // Tag of non-zero length - // If the first character is not a / then this is an opening tag - if ($lvalue[0] != "/") - { // Collect the tag name - preg_match("/^(\w*?)(\s|$)/", $lvalue, $t); - - // If this is a protected element, activate the associated protection flag - if (in_array($t[1], $nobreak)) array_unshift($innbk, $t[1]); - } - else - { // Otherwise this is a closing tag - // If this is a closing tag for a protected element, unset the flag - if (in_array(substr($lvalue, 1), $nobreak)) - { - reset($innbk); - while (list($key, $tag) = each($innbk)) - { - if (substr($lvalue, 1) == $tag) - { - unset($innbk[$key]); - break; - } - } - $innbk = array_values($innbk); - } - } - } - else - { - $value = ''; // Eliminate any empty tags altogether - } - // Else if we're outside any tags, and with non-zero length string... - } - elseif ($value) - { // If unprotected... - if (!count($innbk)) - { - // Use the ACK (006) ASCII symbol to replace all HTML entities temporarily - $value = str_replace("\x06", "", $value); - preg_match_all("/&([a-z\d]{2,7}|#\d{2,5});/i", $value, $ents); - $value = preg_replace("/&([a-z\d]{2,7}|#\d{2,5});/i", "\x06", $value); -// echo "Found block length ".strlen($value).': '.substr($value,20).'
'; - // Split at spaces - note that this will fail if presented with invalid utf-8 when doing the regex whitespace search -// $split = preg_split('#(\s)#'.$utf8, $value, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE ); - $split = preg_split($whiteSpace, $value, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE ); - $value = ''; - foreach ($split as $sp) - { -// echo "Split length ".strlen($sp).': '.substr($sp,20).'
'; - $loopCount = 0; - while (strlen($sp) > $width) - { // Enough characters that we may need to do something. - $pulled = ''; - if ($utf8) - { - // Pull out a piece of the maximum permissible length - if (preg_match('#^((?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,'.$width.'})(.{0,1}).*#s',$sp,$matches) == 0) - { - $value .= '[!invalid utf-8: '.$sp.'!]'; // Make any problems obvious for now - $sp = ''; - } - elseif (empty($matches[2])) - { // utf-8 length is less than specified - treat as a special case - $value .= $sp; - $sp = ''; - } - else - { // Need to find somewhere to break the string - for ($i = strlen($matches[1])-1; $i >= 0; $i--) - { - if (strpos($lbrks,$matches[1][$i]) !== FALSE) break; - } - if ($i < 0) - { // No 'special' break character found - break at the word boundary - $pulled = $matches[1]; - } - else - { - $pulled = substr($sp,0,$i+1); - } - } - $loopCount++; - if ($loopCount > 20) - { - $value .= '[!loop count exceeded: '.$sp.'!]'; // Make any problems obvious for now - $sp = ''; - } + if (!ctype_digit($width)) return $str; // Don't wrap if non-numeric width + if ($width < 6) return $str; // Trap stupid wrap counts, as well + + // Transform protected element lists into arrays + $nobreak = explode(" ", strtolower($nobreak)); + + // Variable setup + $intag = false; + $innbk = array(); + $drain = ""; + + // List of characters it is "safe" to insert line-breaks at + // It is not necessary to add < and > as they are automatically implied + $lbrks = "/?!%)-}]\\\"':;&"; + + // Is $str a UTF8 string? + if ($utf || strtolower(CHARSET) == 'utf-8') + { // 0x1680, 0x180e, 0x2000-0x200a, 0x2028, 0x205f, 0x3000 are 'non-ASCII' Unicode UCS-4 codepoints - see http://www.unicode.org/Public/UNIDATA/UnicodeData.txt + // All convert to 3-byte utf-8 sequences: + // 0x1680 0xe1 0x9a 0x80 + // 0x180e 0xe1 0xa0 0x8e + // 0x2000 0xe2 0x80 0x80 + // - + // 0x200a 0xe2 0x80 0x8a + // 0x2028 0xe2 0x80 0xa8 + // 0x205f 0xe2 0x81 0x9f + // 0x3000 0xe3 0x80 0x80 + $utf8 = 'u'; + $whiteSpace = '#([\x20|\x0c]|[\xe1][\x9a][\x80]|[\xe1][\xa0][\x8e]|[\xe2][\x80][\x80-\x8a,\xa8]|[\xe2][\x81][\x9f]|[\xe3][\x80][\x80]+)#'; + // Have to explicitly enumerate the whitespace chars, and use non-utf-8 mode, otherwise regex fails on badly formed utf-8 + } + else + { + $utf8 = ''; + $whiteSpace = '#(\s+)#'; // For non-utf-8, can use a simple match string + } + + + // Start of the serious stuff - split into HTML tags and text between + $content = preg_split('#(<.*?>)#mis', $str, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE ); + foreach($content as $value) + { + if ($value[0] == "<") + { // We are within an HTML tag + // Create a lowercase copy of this tag's contents + $lvalue = strtolower(substr($value,1,-1)); + if ($lvalue) + { // Tag of non-zero length + // If the first character is not a / then this is an opening tag + if ($lvalue[0] != "/") + { // Collect the tag name + preg_match("/^(\w*?)(\s|$)/", $lvalue, $t); + + // If this is a protected element, activate the associated protection flag + if (in_array($t[1], $nobreak)) array_unshift($innbk, $t[1]); } else - { - for ($i = min($width,strlen($sp)); $i > 0; $i--) + { // Otherwise this is a closing tag + // If this is a closing tag for a protected element, unset the flag + if (in_array(substr($lvalue, 1), $nobreak)) + { + reset($innbk); + while (list($key, $tag) = each($innbk)) { - if (strpos($lbrks,$sp[$i-1]) !== FALSE) break; // No speed advantage to defining match character + if (substr($lvalue, 1) == $tag) + { + unset($innbk[$key]); + break; + } } - if ($i == 0) - { // No 'special' break boundary character found - break at the word boundary - $pulled = substr($sp,0,$width); - } - else - { - $pulled = substr($sp,0,$i); - } - } - if ($pulled) - { - $value .= $pulled.$break; - $sp = substr($sp,strlen($pulled)); // Shorten $sp by whatever we've processed (will work even for utf-8) + $innbk = array_values($innbk); + } } } - $value .= $sp; // Add in any residue + else + { + $value = ''; // Eliminate any empty tags altogether + } + // Else if we're outside any tags, and with non-zero length string... } - // Put captured HTML entities back into the string - foreach ($ents[0] as $ent) $value = preg_replace("/\x06/", $ent, $value, 1); - } - } - // Send the modified segment down the drain - $drain .= $value; - } - // Return contents of the drain - return $drain; + elseif ($value) + { // If unprotected... + if (!count($innbk)) + { + // Use the ACK (006) ASCII symbol to replace all HTML entities temporarily + $value = str_replace("\x06", "", $value); + preg_match_all("/&([a-z\d]{2,7}|#\d{2,5});/i", $value, $ents); + $value = preg_replace("/&([a-z\d]{2,7}|#\d{2,5});/i", "\x06", $value); + // echo "Found block length ".strlen($value).': '.substr($value,20).'
'; + // Split at spaces - note that this will fail if presented with invalid utf-8 when doing the regex whitespace search + // $split = preg_split('#(\s)#'.$utf8, $value, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE ); + $split = preg_split($whiteSpace, $value, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE ); + $value = ''; + foreach ($split as $sp) + { + // echo "Split length ".strlen($sp).': '.substr($sp,20).'
'; + $loopCount = 0; + while (strlen($sp) > $width) + { // Enough characters that we may need to do something. + $pulled = ''; + if ($utf8) + { + // Pull out a piece of the maximum permissible length + if (preg_match('#^((?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,'.$width.'})(.{0,1}).*#s',$sp,$matches) == 0) + { + $value .= '[!invalid utf-8: '.$sp.'!]'; // Make any problems obvious for now + $sp = ''; + } + elseif (empty($matches[2])) + { // utf-8 length is less than specified - treat as a special case + $value .= $sp; + $sp = ''; + } + else + { // Need to find somewhere to break the string + for ($i = strlen($matches[1])-1; $i >= 0; $i--) + { + if (strpos($lbrks,$matches[1][$i]) !== FALSE) break; + } + if ($i < 0) + { // No 'special' break character found - break at the word boundary + $pulled = $matches[1]; + } + else + { + $pulled = substr($sp,0,$i+1); + } + } + $loopCount++; + if ($loopCount > 20) + { + $value .= '[!loop count exceeded: '.$sp.'!]'; // Make any problems obvious for now + $sp = ''; + } + } + else + { + for ($i = min($width,strlen($sp)); $i > 0; $i--) + { + if (strpos($lbrks,$sp[$i-1]) !== FALSE) break; // No speed advantage to defining match character + } + if ($i == 0) + { // No 'special' break boundary character found - break at the word boundary + $pulled = substr($sp,0,$width); + } + else + { + $pulled = substr($sp,0,$i); + } + } + if ($pulled) + { + $value .= $pulled.$break; + $sp = substr($sp,strlen($pulled)); // Shorten $sp by whatever we've processed (will work even for utf-8) + } + } + $value .= $sp; // Add in any residue + } + // Put captured HTML entities back into the string + foreach ($ents[0] as $ent) $value = preg_replace("/\x06/", $ent, $value, 1); + } + } + // Send the modified segment down the drain + $drain .= $value; + } + // Return contents of the drain + return $drain; } @@ -571,41 +626,42 @@ class e_parse // Uses current CHARSET - for utf-8, returns $len characters rather than $len bytes function text_truncate($text, $len = 200, $more = "[more]") { - if (strlen($text) <= $len) return $text; // Always valid - if (strtolower(CHARSET) !== 'utf-8') - { - $ret = substr($text,0,$len); // Non-utf-8 - one byte per character - simple (unless there's an entity involved) - } - else - { // Its a utf-8 string here - don't know whether its longer than allowed length yet - preg_match('#^(?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,0}'. + if (strlen($text) <= $len) return $text; // Always valid + if (strtolower(CHARSET) !== 'utf-8') + { + $ret = substr($text,0,$len); // Non-utf-8 - one byte per character - simple (unless there's an entity involved) + } + else + { // Its a utf-8 string here - don't know whether its longer than allowed length yet + preg_match('#^(?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,0}'. '((?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,'.$len.'})(.{0,1}).*#s',$text,$matches); - if (empty($matches[2])) return $text; // return if utf-8 length is less than max as well - $ret = $matches[1]; - } - // search for possible broken html entities - // - if an & is in the last 8 chars, removing it and whatever follows shouldn't hurt - // it should work for any characters encoding - $leftAmp = strrpos(substr($ret,-8), '&'); - if($leftAmp) $ret = substr($ret,0,strlen($ret)-8+$leftAmp); - return $ret.$more; + if (empty($matches[2])) return $text; // return if utf-8 length is less than max as well + $ret = $matches[1]; + } + // search for possible broken html entities + // - if an & is in the last 8 chars, removing it and whatever follows shouldn't hurt + // it should work for any characters encoding + $leftAmp = strrpos(substr($ret,-8), '&'); + if($leftAmp) $ret = substr($ret,0,strlen($ret)-8+$leftAmp); + return $ret.$more; } + function textclean ($text, $wrap=100) { $text = str_replace ("\n\n\n", "\n\n", $text); $text = $this->htmlwrap($text, $wrap); - $text = str_replace (array ("
", "
", "
"), "
", $text); + $text = str_replace (array ('
', '
', '
'), '
', $text); /* we can remove any linebreaks added by htmlwrap function as any \n's will be converted later anyway */ return $text; } - // + + // Test for text highlighting, and determine the text highlighting transformation // Returns TRUE if highlighting is active for this page display - // function checkHighlighting() { global $pref; @@ -633,398 +689,409 @@ class e_parse } + + function toHTML($text, $parseBB = FALSE, $modifiers = "", $postID = "", $wrap=FALSE) { - if ($text == '') return $text; + if ($text == '') return $text; - global $pref, $fromadmin; + global $pref, $fromadmin; - // Set default modifiers to start - $opts = $this->e_optDefault; - - // Now process any modifiers that are specified - if (strlen($modifiers)) - { - $aMods = explode( ',',$modifiers); - - // If there's a supermodifier, it must be first, and in uppercase - $psm = trim($aMods[0]); - if (isset($this->e_SuperMods[$psm])) - { - $opts = array_merge($this->e_optDefault,$this->e_SuperMods[$psm]); - $opts['context'] = $psm; - unset($aMods[0]); - } - else - { - // Set default modifiers + // Set default modifiers to start $opts = $this->e_optDefault; - } - // Now find any regular mods (could check each exists, but unnecessary processing really) - foreach ($aMods as $mod) + // Now process any modifiers that are specified + if ($modifiers) { - $opts[trim($mod)] = TRUE; // Change mods as spec'd + $aMods = explode(',',$modifiers); + + // If there's a supermodifier, it must be first, and in uppercase + $psm = trim($aMods[0]); + if (isset($this->e_SuperMods[$psm])) + { // Supermodifier found - it simply overrides the default + $opts = $this->e_SuperMods[$psm]; + $opts['context'] = $psm; + unset($aMods[0]); + } + + + // Now find any regular modifiers; use them to modify the context (there should only be one or two out of the list of possibles) + foreach ($aMods as $mod) + { + $mod = trim($mod); // Slight concession to varying coding styles + if (isset($this->e_Modifiers[$mod])) + { + foreach ($this->e_Modifiers[$mod] as $k => $v) // This is probably quicker than array_merge - especially as usually only one or two loops + { + $opts[$k] = $v; // Update our context-specific options + } + } + } } - } - $fromadmin = $opts['fromadmin']; + // Turn off a few things if not enabled in options + if (!varsettrue($pref['smiley_activate'])) $opts['emotes'] = FALSE; + if (!varsettrue($pref['make_clickable'])) $opts['link_click'] = FALSE; + if (!varsettrue($pref['link_replace'])) $opts['link_replace'] = FALSE; - // Convert defines(constants) within text. eg. Lan_XXXX - must be the entire text string (i.e. not embedded) - // The check for '::' is a workaround for a bug in the Zend Optimiser 3.3.0 and PHP 5.2.4 combination - causes crashes if '::' in site name - if ($opts['defs'] && (strlen($text) < 25) && ((strpos($text,'::') === FALSE) && defined(trim($text)))) - { - return constant(trim($text)); - } + $fromadmin = $opts['fromadmin']; + + // Convert defines(constants) within text. eg. Lan_XXXX - must be the entire text string (i.e. not embedded) + // The check for '::' is a workaround for a bug in the Zend Optimiser 3.3.0 and PHP 5.2.4 combination - causes crashes if '::' in site name + if ($opts['defs'] && (strlen($text) < 25) && ((strpos($text,'::') === FALSE) && defined(trim($text)))) + { + return constant(trim($text)); + } - if ($opts['no_tags']) - { - $text = strip_tags($text); - } + + if ($opts['no_tags']) + { + $text = strip_tags($text); + } - // Make sure we have a valid count for word wrapping - if(!$wrap && $pref['main_wordwrap']) $wrap = $pref['main_wordwrap']; - $text = " ".$text; + // Make sure we have a valid count for word wrapping + if (!$wrap && $pref['main_wordwrap']) + { + $wrap = $pref['main_wordwrap']; + } +// $text = " ".$text; // Now get on with the parsing - $ret_parser = ''; - $last_bbcode = ''; - if ($parseBB == FALSE) - { - $content = array($text); - } - else - { - // Split each text block into bits which are either within one of the 'key' bbcodes, or outside them - // (Because we have to match end words, the 'extra' capturing subpattern gets added to output array. We strip it later) - $content = preg_split('#(\[(php|code|scode|hide).*?\[/(?:\\2)\])#mis', $text, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE ); - } - - - // Use $full_text variable so its available to special bbcodes if required - foreach ($content as $full_text) - { - $proc_funcs = TRUE; - - // We may have 'captured' a bbcode word - strip it if so - if ($last_bbcode == $full_text) + $ret_parser = ''; + $last_bbcode = ''; + if ($parseBB == FALSE) { - $last_bbcode = ''; - $proc_funcs = FALSE; - $full_text = ''; + $content = array($text); } else { - // (Have to have a good test in case a 'non-key' bbcode starts the block - so pull out the bbcode parameters while we're there - if (($parseBB !== FALSE) && preg_match('#(^\[(php|code|scode|hide)(.*?)\])(.*?)(\[/\\2\]$)#is', $full_text, $matches )) - { // It's one of the 'key' bbcodes - $proc_funcs = FALSE; // Usually don't want 'normal' processing if its a 'special' bbcode - // $matches[0] - complete block from opening bracket of opening tag to closing bracket of closing tag - // $matches[1] - complete opening tag (inclusive of brackets) - // $matches[2] - bbcode word - // $matches[3] - parameter, including '=' - // $matches[4] - bit between the tags (i.e. text to process) - // $matches[5] - closing tag - $bbFile = e_FILE.'bbcode/'.strtolower(str_replace('_', '', $matches[2])).'.bb'; // In case we decide to load a file - $bbcode = ''; - $code_text = $matches[4]; - $parm = $matches[3] ? substr($matches[3],1) : ''; - $last_bbcode = $matches[2]; - switch ($matches[2]) - { - case 'php' : - if (DB_INF_SHOW) echo "PHP decode: ".htmlentities($matches[4])."

"; - $proc_funcs = TRUE; // Probably run the output through the normal processing functions - but put here so the PHP code can disable if desired - // This is just the contents of the php.bb file pulled in - its short, so will be quicker -// $search = array(""", "'", "$", '
', E_NL, "->", "<br />"); -// $replace = array('"', "'", "$", "\n", "\n", "->", "
"); - // Shouldn't have any parameter on this bbcode -// if (!$matches[3]) $bbcode = str_replace($search, $replace, $matches[4]); // Not sure whether checks are necessary now we've reorganised - // Because we're bypassing most of the initial parser processing, we should be able to just reverse the effects of toDB() and execute the code - if (!$matches[3]) $bbcode = html_entity_decode($matches[4], ENT_QUOTES, CHARSET); - if (DB_INF_SHOW) echo "PHP after decode: ".htmlentities($bbcode)."

"; - break; - case 'hide' : - $proc_funcs = TRUE; - default : // Most bbcodes will just execute their normal file - $bbcode = file_get_contents($bbFile); // Just read in the code file and execute it - } // end - switch ($matches[2]) - if ($bbcode) - { // Execute the file - ob_start(); - $bbcode_return = eval($bbcode); - $bbcode_output = ob_get_contents(); - ob_end_clean(); - // added to remove possibility of nested bbcode exploits ... - // (same as in bbcode_handler - is it right that it just operates on $bbcode_return and not on $bbcode_output? - QUERY XXX-02 - if(strpos($bbcode_return, "[") !== FALSE) - { - $exp_search = array("eval", "expression"); - $exp_replace = array("eval", "expression"); - $bbcode_return = str_replace($exp_search, $exp_replace, $bbcode_return); - } - $full_text = $bbcode_output.$bbcode_return; - } - } + // Split each text block into bits which are either within one of the 'key' bbcodes, or outside them + // (Because we have to match end words, the 'extra' capturing subpattern gets added to output array. We strip it later) + $content = preg_split('#(\[(php|code|scode|hide).*?\[/(?:\\2)\])#mis', $text, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE ); } + // Use $full_text variable so its available to special bbcodes if required + foreach ($content as $full_text) + { + $proc_funcs = TRUE; - if ($proc_funcs) - { // Do the 'normal' processing - in principle, as previously - but think about the order. - - // Split out and ignore any scripts and style blocks. With just two choices we can match the closing tag in the regex - $subcon = preg_split('#((?:]+>.*?|tyle[^>]+>.*?))#mis', $full_text, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE ); - foreach ($subcon as $sub_blk) - { -// if (preg_match('#^<(script|style)[^>]+>#',$sub_blk)) // - if ((substr($sub_blk,0,7) == '"; - if (!$opts['nobreak']) - { - $sub_blk = preg_replace("#>\s*[\r]*\n[\r]*#", ">", $sub_blk); - } - $ret_parser .= $sub_blk; + // We may have 'captured' a bbcode word - strip it if so + if ($last_bbcode == $full_text) + { + $last_bbcode = ''; + $proc_funcs = FALSE; + $full_text = ''; } else { - // Do 'normal' processing on a chunk - - - // Could put tag stripping in here - - - // Line break compression (why?) - // Prepare for line-break compression. Avoid compressing newlines in embedded scripts and CSS - if (!$opts['nobreak']) - { - $sub_blk = preg_replace("#>\s*[\r]*\n[\r]*#", ">", $sub_blk); - } - - - // Link substitution - // Convert URL's to clickable links, unless modifiers or prefs override - if ($pref['make_clickable'] && !$opts['no_make_clickable']) - { - if ($pref['link_replace'] && !$opts['no_replace']) - { - $_ext = ($pref['links_new_window'] ? " rel=\"external\"" : ""); -// $sub_blk = preg_replace("#(^|[\n ])([\w]+?://[^ \"\n\r\t<,]*)#is", "\\1".$pref['link_text']."", $sub_blk); - $sub_blk = preg_replace("#(^|[\s])([\w]+?://(?:[\w-%]+?)(?:\.[\w-%]+?)+.*?)(?=$|[\s()[\]<]|\.\s|\.$|,\s|,$)#is", "\\1".$pref['link_text']."", $sub_blk); -// $sub_blk = preg_replace("#(^|[\n \]])((www|ftp)\.[\w+-]+?\.[\w+\-.]*(?(?=/)(/.+?(?=\s|,\s))|(?=\W)))#is", "\\1".$pref['link_text']."", $sub_blk); - $sub_blk = preg_replace("#(^|[\s])((?:www|ftp)(?:\.[\w-%]+?){2}.*?)(?=$|[\s()[\]<]|\.\s|\.$|,\s|,$)#is", "\\1".$pref['link_text']."", $sub_blk); - if(CHARSET != "utf-8" && CHARSET != "UTF-8") - { - $email_text = ($pref['email_text']) ? $this->replaceConstants($pref['email_text']) : "\\1\\2©\\3"; - } - else - { - $email_text = ($pref['email_text']) ? $this->replaceConstants($pref['email_text']) : "\\1\\2©\\3"; - } - $sub_blk = preg_replace("#([\n ])([a-z0-9\-_.]+?)@([\w\-]+\.([\w\-\.]+\.)*[\w]+)#i", "\\1".$email_text."", $sub_blk); - } - else - { -// $sub_blk = preg_replace("#(^|[\n ])([\w]+?://[^ \"\n\r\t<,]*)#is", "\\1\\2", $sub_blk); - $sub_blk = preg_replace("#(^|[\s])([\w]+?://(?:[\w-%]+?)(?:\.[\w-%]+?)+.*?)(?=$|[\s()[\]<]|\.\s|\.$|,\s|,$)#is", "\\1\\2", $sub_blk); -// $sub_blk = preg_replace("#(^|[\n \]])((www|ftp)\.[\w+-]+?\.[\w+\-.]*(?(?=/)(/.+?(?=\s|,\s))|(?=\W)))#is", "\\1\\2", $sub_blk); - $sub_blk = preg_replace("#(^|[\s])((?:www|ftp)(?:\.[\w-%]+?){2}.*?)(?=$|[\s()[\]<]|\.\s|\.$|,\s|,$)#is", "\\1\\2", $sub_blk); - $sub_blk = preg_replace("#([\n ])([a-z0-9\-_.]+?)@([\w\-]+\.([\w\-\.]+\.)*[\w]+)#i", "\\1".LAN_EMAIL_SUBS."", $sub_blk); - } - } - - - // Emoticons - // Convert emoticons to graphical icons, unless modifiers override - if (!$opts['emotes_off'] && ($pref['smiley_activate'] || $opts['emotes_on'])) - { - if (!is_object($this->e_emote)) - { - require_once(e_HANDLER.'emote_filter.php'); - $this->e_emote = new e_emoteFilter; - } - $sub_blk = $this->e_emote->filterEmotes($sub_blk); - } - - - - // Newline processing (more) - // Reduce multiple newlines in all forms to a single newline character, except for embedded scripts and CSS - if (!$opts['nobreak']) - { - $sub_blk = preg_replace("#[\r]*\n[\r]*#", E_NL, $sub_blk); - } - - - - // Entity conversion - // Restore entity form of quotes and such to single characters, except for text destined for tag attributes or JS. - if (!$opts['value']) - { // output not used for attribute values. - $sub_blk = str_replace($this -> search, $this -> replace, $sub_blk); - } - else - { // output used for attribute values. - $sub_blk = str_replace($this -> replace, $this -> search, $sub_blk); - } - - - // BBCode processing (other than the four already done, which shouldn't appear at all in the text) - // Start parse [bb][/bb] codes - if ($parseBB !== FALSE) - { - if (!is_object($this->e_bb)) - { - require_once(e_HANDLER.'bbcode_handler.php'); - $this->e_bb = new e_bbcode; - } - if ($parseBB === TRUE) - { - $sub_blk = $this->e_bb->parseBBCodes($sub_blk, $postID); // 'Normal' or 'legacy' processing - } - elseif ($parseBB === 'STRIP') - { - $sub_blk = $this->e_bb->parseBBCodes($sub_blk, $postID, 'default', TRUE); // Need to strip all BBCodes - } - else - { - $sub_blk = $this->e_bb->parseBBCodes($sub_blk, $postID, 'default', $parseBB); // Need to strip just some BBCodes - } - } - // End parse [bb][/bb] codes - - - - // replace all {e_XXX} constants with their e107 value. modifier determines relative/absolute conversion - // (Moved to after bbcode processing by Cameron) - if ($opts['constants']) - { - $sub_blk = $this->replaceConstants($sub_blk, ($opts['abs_links'] ? 'full' : '')); - } - - - - // profanity filter - if ($pref['profanity_filter']) - { - if (!is_object($this->e_pf)) - { - require_once(e_HANDLER."profanity_filter.php"); - $this->e_pf = new e_profanityFilter; - } - $sub_blk = $this->e_pf->filterProfanities($sub_blk); - } - - - // Shortcodes - // Optional short-code conversion - if ($opts['parse_sc']) - { - $sub_blk = $this->parseTemplate($sub_blk, TRUE); - } - - - - //Run any hooked in parsers - if (!$opts['no_hook']) - { - if ( varset($pref['tohtml_hook'])) - { //Process the older tohtml_hook pref (deprecated) - foreach(explode(",",$pref['tohtml_hook']) as $hook) + // (Have to have a good test in case a 'non-key' bbcode starts the block - so pull out the bbcode parameters while we're there + if (($parseBB !== FALSE) && preg_match('#(^\[(php|code|scode|hide)(.*?)\])(.*?)(\[/\\2\]$)#is', $full_text, $matches )) + { // It's one of the 'key' bbcodes + $proc_funcs = FALSE; // Usually don't want 'normal' processing if its a 'special' bbcode + // $matches[0] - complete block from opening bracket of opening tag to closing bracket of closing tag + // $matches[1] - complete opening tag (inclusive of brackets) + // $matches[2] - bbcode word + // $matches[3] - parameter, including '=' + // $matches[4] - bit between the tags (i.e. text to process) + // $matches[5] - closing tag + $bbFile = e_FILE.'bbcode/'.strtolower(str_replace('_', '', $matches[2])).'.bb'; // In case we decide to load a file + $bbcode = ''; + $code_text = $matches[4]; + $parm = $matches[3] ? substr($matches[3],1) : ''; + $last_bbcode = $matches[2]; + switch ($matches[2]) { - if (!is_object($this->e_hook[$hook])) + case 'php' : + if (DB_INF_SHOW) echo "PHP decode: ".htmlentities($matches[4])."

"; + $proc_funcs = TRUE; // Probably run the output through the normal processing functions - but put here so the PHP code can disable if desired + // This is just the contents of the php.bb file pulled in - its short, so will be quicker + // $search = array(""", "'", "$", '
', E_NL, "->", "<br />"); + // $replace = array('"', "'", "$", "\n", "\n", "->", "
"); + // Shouldn't have any parameter on this bbcode + // if (!$matches[3]) $bbcode = str_replace($search, $replace, $matches[4]); // Not sure whether checks are necessary now we've reorganised + // Because we're bypassing most of the initial parser processing, we should be able to just reverse the effects of toDB() and execute the code + if (!$matches[3]) $bbcode = html_entity_decode($matches[4], ENT_QUOTES, CHARSET); + if (DB_INF_SHOW) echo "PHP after decode: ".htmlentities($bbcode)."

"; + break; + case 'hide' : + $proc_funcs = TRUE; + default : // Most bbcodes will just execute their normal file + $bbcode = file_get_contents($bbFile); // Just read in the code file and execute it + } // end - switch ($matches[2]) + if ($bbcode) + { // Execute the file + ob_start(); + $bbcode_return = eval($bbcode); + $bbcode_output = ob_get_contents(); + ob_end_clean(); + // added to remove possibility of nested bbcode exploits ... + // (same as in bbcode_handler - is it right that it just operates on $bbcode_return and not on $bbcode_output? - QUERY XXX-02 + if(strpos($bbcode_return, "[") !== FALSE) { - require_once(e_PLUGIN.$hook."/".$hook.".php"); - $hook_class = "e_".$hook; - $this->e_hook[$hook] = new $hook_class; + $exp_search = array("eval", "expression"); + $exp_replace = array("eval", "expression"); + $bbcode_return = str_replace($exp_search, $exp_replace, $bbcode_return); } - $sub_blk = $this->e_hook[$hook]->$hook($sub_blk,$opts['context']); + $full_text = $bbcode_output.$bbcode_return; } } + } - if(isset($pref['e_tohtml_list']) && is_array($pref['e_tohtml_list'])) + + + if ($proc_funcs) + { // Do the 'normal' processing - in principle, as previously - but think about the order. + + // Split out and ignore any scripts and style blocks. With just two choices we can match the closing tag in the regex + $subcon = preg_split('#((?:]+>.*?|tyle[^>]+>.*?))#mis', $full_text, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE ); + foreach ($subcon as $sub_blk) { - foreach($pref['e_tohtml_list'] as $hook) - { - if (!is_object($this->e_hook[$hook])) + if (substr($sub_blk,0,7) == 'e_hook[$hook] = new $hook_class; + if ($opts['scripts']) + { + $ret_parser .= $sub_blk; // Strip scripts unless permitted + } } - $sub_blk = $this->e_hook[$hook]->to_html($sub_blk, $opts['context']); - } - } - } + elseif (substr($sub_blk,0,6) == '"; + $ret_parser .= $sub_blk; + } + else + { + // Do 'normal' processing on a chunk + + + // Could put tag stripping in here + +/* + // Line break compression - filter white space after HTML tags + if (!$opts['nobreak']) + { + $sub_blk = preg_replace("#>\s*[\r]*\n[\r]*#", ">", $sub_blk); + } +*/ + + // Link substitution + // Convert URL's to clickable links, unless modifiers or prefs override + if ($opts['link_click']) + { + if ($opts['link_replace']) + { + $_ext = ($pref['links_new_window'] ? " rel=\"external\"" : ""); + $sub_blk = preg_replace("#(^|[\s])([\w]+?://(?:[\w-%]+?)(?:\.[\w-%]+?)+.*?)(?=$|[\s()[\]<]|\.\s|\.$|,\s|,$)#is", "\\1".$pref['link_text']."", $sub_blk); + $sub_blk = preg_replace("#(^|[\s])((?:www|ftp)(?:\.[\w-%]+?){2}.*?)(?=$|[\s()[\]<]|\.\s|\.$|,\s|,$)#is", "\\1".$pref['link_text']."", $sub_blk); + if ($this->isutf8) + { + $email_text = ($pref['email_text']) ? $this->replaceConstants($pref['email_text']) : "\\1\\2©\\3"; + } + else + { + $email_text = ($pref['email_text']) ? $this->replaceConstants($pref['email_text']) : "\\1\\2©\\3"; + } + $sub_blk = preg_replace("#([\n ])([a-z0-9\-_.]+?)@([\w\-]+\.([\w\-\.]+\.)*[\w]+)#i", "\\1".$email_text."", $sub_blk); + } + else + { + $sub_blk = preg_replace("#(^|[\s])([\w]+?://(?:[\w-%]+?)(?:\.[\w-%]+?)+.*?)(?=$|[\s()[\]<]|\.\s|\.$|,\s|,$)#is", "\\1\\2", $sub_blk); + $sub_blk = preg_replace("#(^|[\s])((?:www|ftp)(?:\.[\w-%]+?){2}.*?)(?=$|[\s()[\]<]|\.\s|\.$|,\s|,$)#is", "\\1\\2", $sub_blk); + $sub_blk = preg_replace("#([\n ])([a-z0-9\-_.]+?)@([\w\-]+\.([\w\-\.]+\.)*[\w]+)#i", "\\1".LAN_EMAIL_SUBS."", $sub_blk); + } + } + + + // Convert emoticons to graphical icons, if enabled + if ($opts['emotes']) + { + if (!is_object($this->e_emote)) + { + require_once(e_HANDLER.'emote_filter.php'); + $this->e_emote = new e_emoteFilter; + } + $sub_blk = $this->e_emote->filterEmotes($sub_blk); + } - // Word wrap - if (!$opts['nobreak']) - { - $sub_blk = $this -> textclean($sub_blk, $wrap); - } + // Reduce newlines in all forms to a single newline character (finds '\n', '\r\n', '\n\r') + if (!$opts['nobreak']) + { + $sub_blk = preg_replace("#[\r]*\n[\r]*#", E_NL, $sub_blk); + } + + + + // Entity conversion + // Restore entity form of quotes and such to single characters, except for text destined for tag attributes or JS. + if ($opts['value']) + { // output used for attribute values. + $sub_blk = str_replace($this -> replace, $this -> search, $sub_blk); + } + else + { // output not used for attribute values. + $sub_blk = str_replace($this -> search, $this -> replace, $sub_blk); + } - // Search highlighting - // Search Highlight - if (!$opts['emotes_off']) - { - if ($this->checkHighlighting()) - { - $sub_blk = $this -> e_highlight($sub_blk, $this -> e_query); - } - } + // BBCode processing (other than the four already done, which shouldn't appear at all in the text) + if ($parseBB !== FALSE) + { + if (!is_object($this->e_bb)) + { + require_once(e_HANDLER.'bbcode_handler.php'); + $this->e_bb = new e_bbcode; + } + if ($parseBB === TRUE) + { + $sub_blk = $this->e_bb->parseBBCodes($sub_blk, $postID); // 'Normal' or 'legacy' processing + } + elseif ($parseBB === 'STRIP') + { + $sub_blk = $this->e_bb->parseBBCodes($sub_blk, $postID, 'default', TRUE); // Need to strip all BBCodes + } + else + { + $sub_blk = $this->e_bb->parseBBCodes($sub_blk, $postID, 'default', $parseBB); // Need to strip just some BBCodes + } + } - // Purpose of this block? - $nl_replace = "
"; - if ($opts['nobreak']) - { - $nl_replace = ''; - } - elseif ($opts['retain_nl']) - { - $nl_replace = "\n"; - } - $sub_blk = str_replace(E_NL, $nl_replace, $sub_blk); + + // replace all {e_XXX} constants with their e107 value. modifier determines relative/absolute conversion + // (Moved to after bbcode processing by Cameron) + if ($opts['constants']) + { + $sub_blk = $this->replaceConstants($sub_blk, ($opts['constants'] == 'abs' ? 'full' : '')); + } + + + + // profanity filter + if ($pref['profanity_filter']) + { + if (!is_object($this->e_pf)) + { + require_once(e_HANDLER."profanity_filter.php"); + $this->e_pf = new e_profanityFilter; + } + $sub_blk = $this->e_pf->filterProfanities($sub_blk); + } + + + // Shortcodes + // Optional short-code conversion + if ($opts['parse_sc']) + { + $sub_blk = $this->parseTemplate($sub_blk, TRUE); + } + - $ret_parser .= $sub_blk; - } // End of 'normal' processing for a block of text - - } // End of 'foreach() on each block of non-script text + //Run any hooked in parsers + if ($opts['hook']) + { + if ( varset($pref['tohtml_hook'])) + { //Process the older tohtml_hook pref (deprecated) + foreach(explode(",",$pref['tohtml_hook']) as $hook) + { + if (!is_object($this->e_hook[$hook])) + { + require_once(e_PLUGIN.$hook."/".$hook.".php"); + $hook_class = "e_".$hook; + $this->e_hook[$hook] = new $hook_class; + } + $sub_blk = $this->e_hook[$hook]->$hook($sub_blk,$opts['context']); + } + } + + if(isset($pref['e_tohtml_list']) && is_array($pref['e_tohtml_list'])) + { + foreach($pref['e_tohtml_list'] as $hook) + { + if (!is_object($this->e_hook[$hook])) + { + require_once(e_PLUGIN.$hook."/e_tohtml.php"); + $hook_class = "e_tohtml_".$hook; + $this->e_hook[$hook] = new $hook_class; + } + $sub_blk = $this->e_hook[$hook]->to_html($sub_blk, $opts['context']); + } + } + } - } // End of 'normal' parsing (non-script text) - else - { - $ret_parser .= $full_text; // Text block that needed no processing at all + // Word wrap + if ($wrap && !$opts['nobreak']) + { + $sub_blk = $this -> textclean($sub_blk, $wrap); + } + + + + // Search highlighting + if ($opts['emotes']) // Why?? + { + if ($this->checkHighlighting()) + { + $sub_blk = $this -> e_highlight($sub_blk, $this -> e_query); + } + } + + + $nl_replace = '
'; // Default replaces all \n with
for HTML display + if ($opts['nobreak']) + { + $nl_replace = ''; + } + elseif ($opts['retain_nl']) + { + $nl_replace = "\n"; + } + $sub_blk = str_replace(E_NL, $nl_replace, $sub_blk); + + + $ret_parser .= $sub_blk; + } // End of 'normal' processing for a block of text + + } // End of 'foreach() on each block of non-script text + + } // End of 'normal' parsing (non-script text) + else + { + $ret_parser .= $full_text; // Text block that needed no processing at all + } } - } - return trim($ret_parser); + return trim($ret_parser); } - function toAttribute($text) { + + function toAttribute($text) + { $text = str_replace("&","&",$text); // URLs posted without HTML access may have an & in them. $text = htmlspecialchars($text, ENT_QUOTES, CHARSET); // Xhtml compliance. if (!preg_match('/&#|\'|"|\(|\)|<|>/s', $text)) { - $text = $this->replaceConstants($text); - return $text; - } else { + $text = $this->replaceConstants($text); + return $text; + } + else + { return ''; } } - function toJS($stringarray) { + + // Convert text blocks which are to be embedded within JS + function toJS($stringarray) + { $search = array("\r\n","\r","
","'"); $replace = array("\\n","","\\n","\'"); $stringarray = str_replace($search, $replace, $stringarray); @@ -1036,9 +1103,10 @@ class e_parse return strtr ($stringarray, $trans_tbl); } + + function toRss($text,$tags=FALSE) { - if($tags != TRUE) { $text = $this -> toHTML($text,TRUE); @@ -1101,23 +1169,25 @@ class e_parse SITEURL.$THEMES_DIRECTORY, SITEURL.$DOWNLOADS_DIRECTORY); $search = array("{e_BASE}","{e_IMAGE_ABS}","{e_THEME_ABS}","{e_IMAGE}","{e_PLUGIN}","{e_FILE}","{e_THEME}","{e_DOWNLOAD}"); - if (ADMIN) { + if (ADMIN) + { $replace_relative[] = $ADMIN_DIRECTORY; $replace_absolute[] = SITEURL.$ADMIN_DIRECTORY; $search[] = "{e_ADMIN}"; } - if ($all) { - if (USER) - { // Can only replace with valid number for logged in users - $replace_relative[] = USERID; - $replace_absolute[] = USERID; - } - else - { - $replace_relative[] = ''; - $replace_absolute[] = ''; - } - $search[] = "{USERID}"; + if ($all) + { + if (USER) + { // Can only replace with valid number for logged in users + $replace_relative[] = USERID; + $replace_absolute[] = USERID; + } + else + { + $replace_relative[] = ''; + $replace_absolute[] = ''; + } + $search[] = "{USERID}"; } $replace = ((string)$nonrelative == "full" ) ? $replace_absolute : $replace_relative; return str_replace($search,$replace,$text); @@ -1131,6 +1201,7 @@ class e_parse return $text; } + function doReplace($matches) { if(defined($matches[1]) && ($matches[1] != 'e_ADMIN' || ADMIN)) @@ -1178,11 +1249,13 @@ class e_parse } - function e_highlight($text, $match) { + function e_highlight($text, $match) + { preg_match_all("#<[^>]+>#", $text, $tags); $text = preg_replace("#<[^>]+>#", "<|>", $text); $text = preg_replace("#(\b".$match."\b)#i", "\\1", $text); - foreach ($tags[0] as $tag) { + foreach ($tags[0] as $tag) + { $text = preg_replace("#<\|>#", $tag, $text, 1); } return $text;