rror', '>', '"', ' & ');
// Set to TRUE or FALSE once it has been calculated
protected $e_highlighting;
// Highlight query
protected $e_query;
private $thumbWidth = 100;
private $thumbHeight = 0;
private $thumbCrop = 0;
private $thumbEncode = 0;
private $staticCount = 0;
protected $staticUrl;
/** @var array Stored relative paths - used by replaceConstants() */
private $relativePaths = array();
// BBcode that contain preformatted code.
private $preformatted = array('html', 'markdown');
private $bbList = array();
// Set up the defaults
private $e_optDefault = array(
// default context: reflects legacy settings (many items enabled)
'context' => 'OLDDEFAULT',
//
'fromadmin' => false,
// Enable emote display
'emotes' => true,
// Convert defines(constants) within text.
'defs' => false,
// replace all {e_XXX} constants with their e107 value - 'rel' or 'abs'
'constants' => false,
// Enable hooked parsers
'hook' => true,
// Allow scripts through (new for 0.8)
'scripts' => true,
// Make links clickable
'link_click' => true,
// Substitute on clickable links (only if link_click == TRUE)
'link_replace' => true,
// Parse shortcodes - TRUE enables parsing
'parse_sc' => false,
// remove HTML tags.
'no_tags' => false,
// Restore entity form of quotes and such to single characters - TRUE disables
'value' => false,
// Line break compression - TRUE removes newline characters
'nobreak' => false,
// Retain newlines - wraps to \n instead of if TRUE (for non-HTML email text etc)
'retain_nl' => false
);
// Super modifiers override default option values
private $e_SuperMods = array(
//text is part of a title (e.g. news title)
'TITLE' =>
array(
'nobreak' => true, 'retain_nl' => true, 'link_click' => false, 'emotes' => false, 'defs' => true, 'parse_sc' => true
),
'TITLE_PLAIN' =>
array(
'nobreak' => true, 'retain_nl' => true, 'link_click' => false, 'emotes' => false, 'defs' => true, 'parse_sc' => true, 'no_tags' => true
),
//text is user-entered (i.e. untrusted) and part of a title (e.g. forum title)
'USER_TITLE' =>
array(
'nobreak' => true, 'retain_nl' => true, 'link_click' => false, 'scripts' => false, 'emotes' => false, 'hook' => false
),
// text is 'body' of email or similar - being sent 'off-site' so don't rely on server availability
'E_TITLE' =>
array(
'nobreak' => true, 'retain_nl' => true, 'defs' => true, 'parse_sc' => true, 'emotes' => false, 'scripts' => false, 'link_click' => false
),
// text is part of the summary of a longer item (e.g. content summary)
'SUMMARY' =>
array(
'defs' => true, 'constants' => 'full', 'parse_sc' => true
),
// text is the description of an item (e.g. download, link)
'DESCRIPTION' =>
array(
'defs' => true, 'constants' => 'full', 'parse_sc' => true
),
// text is 'body' or 'bulk' text (e.g. custom page body, content body)
'BODY' =>
array(
'defs' => true, 'constants' => 'full', 'parse_sc' => true
),
// text is parsed by the Wysiwyg editor. eg. TinyMce
'WYSIWYG' =>
array(
'hook' => false, 'link_click' => false, 'link_replace' => false, 'retain_nl' => true
),
// text is user-entered (i.e. untrusted)'body' or 'bulk' text (e.g. custom page body, content body)
'USER_BODY' =>
array(
'constants' => 'full', 'scripts' => false, 'nostrip' => false
),
// text is 'body' of email or similar - being sent 'off-site' so don't rely on server availability
'E_BODY' =>
array(
'defs' => true, 'constants' => 'full', 'parse_sc' => true, 'emotes' => false, 'scripts' => false, 'link_click' => false
),
// text is text-only 'body' of email or similar - being sent 'off-site' so don't rely on server availability
'E_BODY_PLAIN' =>
array(
'defs' => true, 'constants' => 'full', 'parse_sc' => true, 'emotes' => false, 'scripts' => false, 'link_click' => false, 'retain_nl' => true, 'no_tags' => true
),
// text is the 'content' of a link (A tag, etc)
'LINKTEXT' =>
array(
'nobreak' => true, 'retain_nl' => true, 'link_click' => false, 'emotes' => false, 'hook' => false, 'defs' => true, 'parse_sc' => true
),
// text is used (for admin edit) without fancy conversions or html.
'RAWTEXT' =>
array(
'nobreak' => true, 'retain_nl' => true, 'link_click' => false, 'emotes' => false, 'hook' => false, 'no_tags' => true
),
'NODEFAULT' =>
array ('context' => false, 'fromadmin' => false, 'emotes' => false, 'defs' => false, 'constants' => false, 'hook' => false,
'scripts' => false, 'link_click' => false, 'link_replace' => false, 'parse_sc' => false, 'no_tags' => false, 'value' => false,
'nobreak' => false, 'retain_nl' => false
)
);
// Individual modifiers change the current context
private $e_Modifiers = array(
'emotes_off' => array('emotes' => false),
'emotes_on' => array('emotes' => true),
'no_hook' => array('hook' => false),
'do_hook' => array('hook' => true),
// New for 0.8
'scripts_off' => array('scripts' => false),
// New for 0.8
'scripts_on' => array('scripts' => true),
'no_make_clickable' => array('link_click' => false),
'make_clickable' => array('link_click' => true),
'no_replace' => array('link_replace' => false),
// Replace text of clickable links (only if make_clickable option set)
'replace' => array('link_replace' => true),
// No path replacement
'consts_off' => array('constants' => false),
// Relative path replacement
'consts_rel' => array('constants' => 'rel'),
// Absolute path replacement
'consts_abs' => array('constants' => 'abs'),
// Full path replacement
'consts_full' => array('constants' => 'full'),
// No shortcode parsing
'scparse_off' => array('parse_sc' => false),
'scparse_on' => array('parse_sc' => true),
// Strip tags
'no_tags' => array('no_tags' => true),
// Leave tags
'do_tags' => array('no_tags' => false),
'fromadmin' => array('fromadmin' => true),
'notadmin' => array('fromadmin' => false),
// entity replacement
'er_off' => array('value' => false),
'er_on' => array('value' => true),
// Decode constant if exists
'defs_off' => array('defs' => false),
'defs_on' => array('defs' => true),
'dobreak' => array('nobreak' => false),
'nobreak' => array('nobreak' => true),
// Line break using \n
'lb_nl' => array('retain_nl' => true),
// Line break using
'lb_br' => array('retain_nl' => false),
// Legacy option names below here - discontinue later
'retain_nl' => array('retain_nl' => true),
'defs' => array('defs' => true),
'parse_sc' => array('parse_sc' => true),
'constants' => array('constants' => 'rel'),
'value' => array('value' => true),
'wysiwyg' => array('wysiwyg' => true)
);
/**
* @var DOMDocument
*/
private $domObj;
private $isHtml = false;
private $bootstrap;
private $fontawesome;
private $convertToWebP = false;
private $removedList = array();
private $nodesToDelete = array();
private $nodesToConvert = array();
private $nodesToDisableSC = array();
private $pathList = array();
private $allowedAttributes = array(
'default' => array('id', 'style', 'class', 'title', 'lang', 'accesskey'),
'img' => array('src', 'alt', 'width', 'height'),
'a' => array('href', 'target', 'rel'),
'script' => array('type', 'src', 'language', 'async'),
'iframe' => array('src', 'frameborder', 'width', 'height'),
'input' => array('type', 'name', 'value'),
'form' => array('action', 'method', 'target'),
'audio' => array('src', 'controls', 'autoplay', 'loop', 'muted', 'preload'),
'video' => array('autoplay', 'controls', 'height', 'loop', 'muted', 'poster', 'preload', 'src', 'width'),
'td' => array('colspan', 'rowspan'),
'th' => array('colspan', 'rowspan'),
'col' => array('span'),
'embed' => array('src', 'wmode', 'type', 'width', 'height'),
'x-bbcode' => array('alt'),
'label' => array('for'),
'source' => array('media', 'sizes', 'src', 'srcset', 'type'),
);
private $badAttrValues = array('javascript[\s]*?:', 'alert\(', 'vbscript[\s]*?:', 'data:text\/html', 'mhtml[\s]*?:', 'data:[\s]*?image');
private $replaceAttrValues = array(
'default' => array()
);
private $allowedTags = array('html', 'body', 'div', 'a', 'img', 'table', 'tr', 'td', 'th', 'tbody', 'thead', 'colgroup', 'b',
'i', 'pre', 'code', 'strong', 'u', 'em', 'ul', 'ol', 'li', 'img', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p',
'div', 'pre', 'section', 'article', 'blockquote', 'hgroup', 'aside', 'figure', 'figcaption', 'abbr', 'span', 'audio', 'video', 'source', 'br',
'small', 'caption', 'noscript', 'hr', 'section', 'iframe', 'sub', 'sup', 'cite', 'x-bbcode', 'label'
);
private $scriptTags = array('script', 'applet', 'form', 'input', 'button', 'embed', 'object', 'ins', 'select', 'textarea'); //allowed when $pref['post_script'] is enabled.
private $scriptAttributes = array('onclick', 'onchange', 'onblur', 'onload', 'onfocus', 'onkeydown', 'onkeypress', 'onkeyup',
'ondblclick', 'onmousedown', 'onmousemove', 'onmouseout', 'onmouseover', 'onmouseup', 'onmousewheel',
'onwheel', 'oncopy', 'oncut', 'onpaste'
);
private $blockTags = array('pre', 'div', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'blockquote'); // element includes its own line-break.
private $scriptAccess = false; // nobody.
/**
* Constructor - keep it public for backward compatibility
* still some new e_parse() in the core
*
*/
public function __construct()
{
// initialise the type of UTF-8 processing methods depending on PHP version and mb string extension
$this->domObj = new DOMDocument('1.0', 'utf-8');
$this->init();
$this->compileAttributeDefaults();
}
public function getModifierList($type = '')
{
if($type === 'super')
{
return $this->e_SuperMods;
}
return $this->e_Modifiers;
}
/**
* Initialise the type of UTF-8 processing methods depending on PHP version and mb string extension.
* Note: mb string is required during installation of e107.
* NOTE: can't be called until CHARSET is known
* but we all know that it is UTF-8 now
*
* @return void
*/
public function setMultibyte($bool)
{
if($bool === false)
{
$this->multibyte = false;
return null;
}
if(extension_loaded('mbstring'))
{
$this->multibyte = true;
mb_internal_encoding('UTF-8');
}
}
/**
* Returns the length of the given string.
* Unicode (UTF-8) analogue of standard @link http://php.net/strlen strlen PHP function.
*
* @param string $str The UTF-8 encoded string being measured for length.
* @return integer The length (amount of UTF-8 characters) of the string on success, and 0 if the string is empty.
*/
public function ustrlen($str)
{
if($this->multibyte)
{
return mb_strlen($str);
}
return strlen($str);
// return strlen(utf8_decode($str));
}
/**
* Make a string lowercase.
* Unicode (UTF-8) analogue of standard @link http://php.net/strtolower strtolower PHP function.
*
* @param string $str The UTF-8 encoded string to be lowercased.
* @return string Specified string with all alphabetic characters converted to lowercase.
*/
public function ustrtolower($str)
{
if($this->multibyte)
{
return mb_strtolower($str);
}
return strtolower($str);
}
/**
* Make a string uppercase.
* Unicode (UTF-8) analogue of standard @link http://php.net/strtoupper strtoupper PHP function.
*
* @param string $str The UTF-8 encoded string to be uppercased.
* @return string Specified string with all alphabetic characters converted to uppercase.
*/
public function ustrtoupper($str)
{
if($this->multibyte)
{
return mb_strtoupper($str);
}
return strtoupper($str);
}
/**
* Find the position of the first occurrence of a case-sensitive UTF-8 encoded string.
* Unicode (UTF-8) analogue of standard @link http://php.net/strpos strpos PHP function.
*
* Returns the numeric position (offset in amount of UTF-8 characters)
* of the first occurrence of needle in the haystack string.
*
* @param string $haystack The UTF-8 encoded string being searched in.
* @param integer $needle The UTF-8 encoded string being searched for.
* @param integer $offset [optional] The optional offset parameter allows you to specify which character in haystack to start searching.
* The position returned is still relative to the beginning of haystack.
* @return integer|boolean Returns the position as an integer. If needle is not found, the function will return boolean FALSE.
*/
public function ustrpos($haystack, $needle, $offset = 0)
{
if($this->multibyte)
{
return mb_strpos($haystack, $needle, $offset);
}
return strpos($haystack, $needle, $offset);
}
/**
* Find the position of the last occurrence of a case-sensitive UTF-8 encoded string.
* Unicode (UTF-8) analogue of standard @link http://php.net/strrpos strrpos PHP function.
* Returns the numeric position (offset in amount of UTF-8 characters)
* of the last occurrence of needle in the haystack string.
*
* @param string $haystack The UTF-8 encoded string being searched in.
* @param integer $needle The UTF-8 encoded string being searched for.
* @param integer $offset [optional] - The optional offset parameter allows you to specify which character in haystack to start searching.
* The position returned is still relative to the beginning of haystack.
* @return integer|boolean Returns the position as an integer. If needle is not found, the function will return boolean FALSE.
*/
public function ustrrpos($haystack, $needle, $offset = 0)
{
if($this->multibyte)
{
return mb_strrpos($haystack, $needle, $offset);
}
return strrpos($haystack, $needle, $offset);
}
/**
* Returns all of haystack starting from and including the first occurrence of needle to the end.
* Unicode (UTF-8) analogue of standard @link http://php.net/stristr stristr PHP function.
*
* @param string $haystack The UTF-8 encoded string to search in.
* @param mixed $needle If needle is not a string, it is converted to an integer and applied as the ordinal value of a character.
* @param bool $before_needle [optional] (PHP 5.3+) If TRUE, returns the part of the haystack before the first occurrence of the needle (excluding needle).
* @return string Returns the matched substring. If needle is not found, returns FALSE.
*/
public function ustristr($haystack, $needle, $before_needle = false)
{
if($this->multibyte)
{
return mb_stristr($haystack, $needle, $before_needle);
}
return stristr($haystack, $needle, $before_needle);
}
/**
* Returns the portion of string specified by the start and length parameters.
* Unicode (UTF-8) analogue of standard @link http://php.net/substr substr PHP function.
*
* NOTE: May be subtle differences in return values dependent on which routine is used.
* Native substr() routine can return FALSE. mb_substr() and utf8_substr() just return an empty string.
*
* @param string $str The UTF-8 encoded string.
* @param integer $start Start of portion to be returned. Position is counted in amount of UTF-8 characters from the beginning of str.
* First character's position is 0. Second character position is 1, and so on.
* @param integer $length [optional] If length is given, the string returned will contain at most length characters beginning from start
* (depending on the length of string). If length is omitted, the rest of string from start will be returned.
* @return string The extracted UTF-8 encoded part of input string.
*/
public function usubstr($str, $start, $length = null)
{
if($this->multibyte)
{
return ($length === null) ? mb_substr($str, $start) : mb_substr($str, $start, $length);
}
return substr($str, $start, $length);
}
/**
* Converts the supplied text (presumed to be from user input) to a format suitable for storing in a database table.
*
* @param mixed $data
* @param boolean $nostrip [optional] Assumes all data is GPC ($_GET, $_POST, $_COOKIE) unless indicate otherwise by setting this var to TRUE.
* If magic quotes is enabled on the server and you do not tell toDB() that the data is non GPC then slashes will be stripped when they should not be.
* @param boolean $no_encode [optional] This parameter should nearly always be FALSE. It is used by the save_prefs() function to preserve HTML content within prefs even when
* the save_prefs() function has been called by a non admin user / user without html posting permissions.
* @param boolean|string $mod [optional] model = admin-ui usage. The 'no_html' and 'no_php' modifiers blanket prevent HTML and PHP posting regardless of posting permissions. (used in logging)
* The 'pReFs' value is for internal use only, when saving prefs, to prevent sanitisation of HTML.
* @param mixed $parm [optional]
* @return mixed
* @todo complete the documentation of this essential method
*/
public function toDB($data = null, $nostrip = false, $no_encode = false, $mod = false, $parm = null)
{
$variableType = gettype($data);
if(($variableType !== 'string' && $variableType !== 'array') || $data === '0')
{
return $data;
}
if($variableType === 'array')
{
$ret = array();
foreach($data as $key => $var)
{
//Fix - sanitize keys as well
$key = filter_var($key, FILTER_SANITIZE_STRING);
$ret[$key] = $this->toDB($var, $nostrip, $no_encode, $mod, $parm);
}
return $ret;
}
if(MAGIC_QUOTES_GPC === true && $nostrip === false)
{
$data = stripslashes($data);
}
$core_pref = e107::getConfig();
if($mod !== 'pReFs') //XXX We're not saving prefs.
{
$data = $this->preFilter($data); // used by bb_xxx.php toDB() functions. bb_code.php toDB() allows us to properly bypass HTML cleaning below.
$data = $this->cleanHtml($data); // clean it regardless of if it is text or html. (html could have missing closing tags)
if(($this->isHtml($data)) && strpos($mod, 'no_html') === false)
{
$this->isHtml = true;
// $data = $this->cleanHtml($data); // sanitize all html. (moved above to include everything)
$data = str_replace(array('%7B', '%7D'), array('{', '}'), $data); // fix for {e_XXX} paths.
}
// else // caused double-encoding of '&'
{
// $data = str_replace('&','&',$data);
// $data = str_replace('<','<',$data);
// $data = str_replace('>','>',$data);
// $data = str_replace('&','&',$data);
}
if(!check_class($core_pref->get('post_html', e_UC_MAINADMIN)))
{
$data = strip_tags($data); // remove tags from cleaned html.
$data = str_replace(array('[html]', '[/html]'), '', $data);
}
// $data = html_entity_decode($data, ENT_QUOTES, 'utf-8'); // Prevent double-entities. Fix for [code] - see bb_code.php toDB();
}
if(check_class($core_pref->get('post_html'))) /*$core_pref->is('post_html') && XXX preformecd by cleanHtml() */
{
$no_encode = true;
}
if($parm !== null && is_numeric($parm) && !check_class($core_pref->get('post_html'), '', $parm))
{
$no_encode = false;
}
if($no_encode === true && strpos($mod, 'no_html') === false)
{
$search = array('$', '"', "'", '\\', '');
$replace = array('$', '"', ''', '\', '<?');
$ret = str_replace($search, $replace, $data);
}
else // add entities for everything. we want to save the code.
{
$search = array('>', '<');
$replace = array('>', '<');
$data = str_replace($search, $replace, $data); // prevent > etc.
$data = htmlspecialchars($data, ENT_QUOTES, 'UTF-8');
$data = str_replace('\\', '\', $data);
$ret = preg_replace("/&#(\d*?);/", "\\1;", $data);
}
// XXX - php_bbcode has been deprecated.
if((strpos($mod, 'no_php') !== false) || !check_class($core_pref->get('php_bbcode')))
{
$ret = preg_replace("#\[(php)#i", "[\\1", $ret);
}
// Don't allow hooks to mess with prefs.
if($mod !== 'model')
{
return $ret;
}
/**
* e_parse hook
*/
$eParseList = $core_pref->get('e_parse_list');
if(!empty($eParseList))
{
$opts = array(
'nostrip' => $nostrip,
'noencode' => $no_encode,
'type' => $parm['type'],
'field' => $parm['field']
);
foreach($eParseList as $plugin)
{
$hookObj = e107::getAddon($plugin, 'e_parse');
if($tmp = e107::callMethod($hookObj, 'toDB', $ret, $opts))
{
$ret = $tmp;
}
}
}
return $ret;
}
/**
* Check for umatched 'dangerous' HTML tags
* (these can destroy page layout where users are able to post HTML)
* @param string $data
* @param string $tagList - if empty, uses default list of input tags. Otherwise a CSV list of tags to check (any type)
*
* @return boolean TRUE if an unopened closing tag found
* FALSE if nothing found
* @deprecated
*/
public function htmlAbuseFilter($data, $tagList = '')
{
trigger_error('' . __METHOD__ . ' is deprecated. Use $tp->cleanHtml() instead.', E_USER_WARNING); // NO LAN
return $data;
}
/**
* @deprecated
* Checks a string for potentially dangerous HTML tags, including malformed tags
*
*/
public function dataFilter($data, $mode = 'bbcode')
{
trigger_error('$tp->dateFilter() is deprecated. Use $tp->filter() instead.', E_USER_WARNING);
return $data;
}
/**
* Processes data as needed before its written to the DB.
* Currently gives bbcodes the opportunity to do something
*
* @param $data string - data about to be written to DB
* @return string - modified data
*/
public function preFilter($data)
{
if(!$this->isBBcode($data))
{
return $data;
}
return e107::getBB()->parseBBCodes($data, defset('USERID'), 'default', 'PRE'); // $postID = logged in user here
}
public function toForm($text)
{
if(empty($text)) // fix - handle proper 0, Space etc values.
{
return $text;
}
if(is_string($text) && strpos($text, '[html]') === 0)
{
// $text = $this->toHTML($text,true);
$search = array('"', ''', '\', '&',); // '&' must be last.
$replace = array('"', "'", "\\", '&');
// return htmlspecialchars_decode($text);
$text = str_replace($search, $replace, $text);
// return $text;
//$text = htmlentities($text,ENT_NOQUOTES, "UTF-8");
// return $text;
}
// return htmlentities($text);
$search = array('$', '"', '<', '>', '+');
$replace = array('$', '"', '<', '>', '%2B');
$text = str_replace($search, $replace, $text);
if(is_string($text) && e107::wysiwyg() !== true)
{
// fix for utf-8 issue with html_entity_decode(); ???
$text = urldecode($text);
// $text = str_replace(" ", " ", $text);
}
return $text;
}
/**
* @param $text
* @return array|string
*/
public function post_toForm($text)
{
if(is_array($text))
{
$arr = array();
foreach($text as $key => $value)
{
$key = $this->post_toForm($key);
$arr[$key] = $this->post_toForm($value);
}
return $arr;
}
if(MAGIC_QUOTES_GPC == true)
{
$text = stripslashes($text);
}
return str_replace(array("'", '"', '<', '>'), array(''', '"', '<', '>'), $text);
}
public function post_toHTML($text, $original_author = false, $extra = '', $mod = false)
{
$text = $this->toDB($text, false, false, $mod, $original_author);
return $this->toHTML($text, true, $extra);
}
/**
* @param $text - template to parse.
* @param boolean $parseSCFiles - parse core 'single' shortcodes
* @param object|array $extraCodes - shortcode class containing sc_xxxxx methods or an array of key/value pairs or legacy shortcode content (eg. content within .sc)
* @param object $eVars - XXX more info needed.
* @return string
*/
public function parseTemplate($text, $parseSCFiles = true, $extraCodes = null, $eVars = null)
{
if(!is_bool($parseSCFiles))
{
trigger_error('$parseSCFiles in parseTemplate() was given incorrect data');
}
return e107::getScParser()->parseCodes($text, $parseSCFiles, $extraCodes, $eVars);
}
/**
* Simple parser
*
* @param string $template
* @param e_vars|array $vars
* @param string $replaceUnset string to be used if replace variable is not set, false - don't replace
* @return string parsed content
*/
public function simpleParse($template, $vars, $replaceUnset = '')
{
$this->replaceVars = $vars;
$this->replaceUnset = $replaceUnset;
return preg_replace_callback("#\{([\w]+)\}#", array($this, 'simpleReplace'), $template);
}
protected function simpleReplace($tmp)
{
$unset = ($this->replaceUnset !== false ? $this->replaceUnset : $tmp[0]);
if(is_array($this->replaceVars))
{
$this->replaceVars = new e_vars($this->replaceVars);
//return ($this->replaceVars[$key] !== null ? $this->replaceVars[$key]: $unset);
}
$key = $tmp[1]; // PHP7 fix.
return (!empty($this->replaceVars) && ($this->replaceVars->$key !== null)) ? $this->replaceVars->$key : $unset; // Doesn't work.
}
/**
* @param $str
* @param $width
* @param string $break
* @param string $nobreak
* @param string $nobr
* @param false $utf
* @return string
* @todo find a modern replacement
*/
public function htmlwrap($str, $width, $break = "\n", $nobreak = 'a', $nobr = 'pre', $utf = false)
{
/*
Pretty well complete rewrite to try and handle utf-8 properly.
Breaks each utf-8 'word' every $width characters max. If possible, breaks after 'safe' characters.
$break is the character inserted to flag the break.
$nobreak is a list of tags within which word wrap is to be inactive
*/
//TODO handle htmlwrap somehow
//return $str;
// Don't wrap if non-numeric width
$width = (int) $width;
// And trap stupid wrap counts
if($width < 6)
{
return $str;
}
// Transform protected element lists into arrays
$nobreak = explode(' ', strtolower($nobreak));
// Variable setup
$innbk = array();
$drain = '';
// List of characters it is "safe" to insert line-breaks at
// It is not necessary to add < and > as they are automatically implied
$lbrks = "/?!%)-}]\\\"':;&";
// Is $str a UTF8 string?
if($utf || strtolower(CHARSET) === 'utf-8')
{
// 0x1680, 0x180e, 0x2000-0x200a, 0x2028, 0x205f, 0x3000 are 'non-ASCII' Unicode UCS-4 codepoints - see http://www.unicode.org/Public/UNIDATA/UnicodeData.txt
// All convert to 3-byte utf-8 sequences:
// 0x1680 0xe1 0x9a 0x80
// 0x180e 0xe1 0xa0 0x8e
// 0x2000 0xe2 0x80 0x80
// -
// 0x200a 0xe2 0x80 0x8a
// 0x2028 0xe2 0x80 0xa8
// 0x205f 0xe2 0x81 0x9f
// 0x3000 0xe3 0x80 0x80
$utf8 = 'u';
$whiteSpace = '#([\x20|\x0c]|[\xe1][\x9a][\x80]|[\xe1][\xa0][\x8e]|[\xe2][\x80][\x80-\x8a,\xa8]|[\xe2][\x81][\x9f]|[\xe3][\x80][\x80]+)#';
// Have to explicitly enumerate the whitespace chars, and use non-utf-8 mode, otherwise regex fails on badly formed utf-8
}
else
{
$utf8 = '';
// For non-utf-8, can use a simple match string
$whiteSpace = '#(\s+)#';
}
// Start of the serious stuff - split into HTML tags and text between
$content = preg_split('#(<.*?' . '>)#mis', $str, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE);
foreach($content as $value)
{
if($value[0] === '<')
{
// We are within an HTML tag
// Create a lowercase copy of this tag's contents
$lvalue = strtolower(substr($value, 1, -1));
if($lvalue)
{
// Tag of non-zero length
// If the first character is not a / then this is an opening tag
if($lvalue[0] !== '/')
{
// Collect the tag name
preg_match("/^(\w*?)(\s|$)/", $lvalue, $t);
// If this is a protected element, activate the associated protection flag
if(in_array($t[1], $nobreak))
{
array_unshift($innbk, $t[1]);
}
}
else
{
// Otherwise this is a closing tag
// If this is a closing tag for a protected element, unset the flag
if(in_array(substr($lvalue, 1), $nobreak))
{
reset($innbk);
foreach($innbk as $key => $tag)
{
if(substr($lvalue, 1) == $tag)
{
unset($innbk[$key]);
break;
}
}
$innbk = array_values($innbk);
}
}
}
else
{
// Eliminate any empty tags altogether
$value = '';
}
// Else if we're outside any tags, and with non-zero length string...
}
elseif($value)
{
// If unprotected...
if(!count($innbk))
{
// Use the ACK (006) ASCII symbol to replace all HTML entities temporarily
$value = str_replace("\x06", '', $value);
preg_match_all("/&([a-z\d]{2,7}|#\d{2,5});/i", $value, $ents);
$value = preg_replace("/&([a-z\d]{2,7}|#\d{2,5});/i", "\x06", $value);
// echo "Found block length ".strlen($value).': '.substr($value,20).' ';
// Split at spaces - note that this will fail if presented with invalid utf-8 when doing the regex whitespace search
// $split = preg_split('#(\s)#'.$utf8, $value, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE );
$split = preg_split($whiteSpace, $value, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE);
$value = '';
foreach($split as $sp)
{
// echo "Split length ".strlen($sp).': '.substr($sp,20).' ';
$loopCount = 0;
while(strlen($sp) > $width)
{
// Enough characters that we may need to do something.
$pulled = '';
if($utf8)
{
// Pull out a piece of the maximum permissible length
if(preg_match('#^((?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,' . $width . '})(.?).*#s', $sp, $matches) == 0)
{
// Make any problems obvious for now
$value .= '[!invalid utf-8: ' . $sp . '!]';
$sp = '';
}
elseif(empty($matches[2]))
{
// utf-8 length is less than specified - treat as a special case
$value .= $sp;
$sp = '';
}
else
{
// Need to find somewhere to break the string
for($i = strlen($matches[1]) - 1; $i >= 0; $i--)
{
if(strpos($lbrks, $matches[1][$i]) !== false)
{
break;
}
}
if($i < 0)
{
// No 'special' break character found - break at the word boundary
$pulled = $matches[1];
}
else
{
$pulled = substr($sp, 0, $i + 1);
}
}
$loopCount++;
if($loopCount > 20)
{
// Make any problems obvious for now
$value .= '[!loop count exceeded: ' . $sp . '!]';
$sp = '';
}
}
else
{
for($i = min($width, strlen($sp)); $i > 0; $i--)
{
// No speed advantage to defining match character
if(strpos($lbrks, $sp[$i - 1]) !== false)
{
break;
}
}
if($i == 0)
{
// No 'special' break boundary character found - break at the word boundary
$pulled = substr($sp, 0, $width);
}
else
{
$pulled = substr($sp, 0, $i);
}
}
if($pulled)
{
$value .= $pulled . $break;
// Shorten $sp by whatever we've processed (will work even for utf-8)
$sp = substr($sp, strlen($pulled));
}
}
// Add in any residue
$value .= $sp;
}
// Put captured HTML entities back into the string
foreach($ents[0] as $ent)
{
$value = preg_replace("/\x06/", $ent, $value, 1);
}
}
}
// Send the modified segment down the drain
$drain .= $value;
}
// Return contents of the drain
return $drain;
}
/**
* CakePHP(tm) : Rapid Development Framework (http://www.cakephp.org)
* Copyright 2005-2008, Cake Software Foundation, Inc. (http://www.cakefoundation.org)
*
* Truncate a HTML string
*
* Cuts a string to the length of $length and adds the value of $ending if the text is longer than length.
* @param string $text String to truncate.
* @param integer $length Length of returned string, including ellipsis.
* @param string $ending It will be used as Ending and appended to the trimmed string.
* @param boolean $exact If false, $text will not be cut mid-word
* @return string Trimmed string.
*/
public function html_truncate($text, $length = 100, $ending = '...', $exact = true)
{
if($this->ustrlen(preg_replace('/<.*?>/', '', $text)) <= $length)
{
return $text;
}
$totalLength = 0;
$openTags = array();
$truncate = '';
preg_match_all('/(<\/?([\w+]+)[^>]*>)?([^<>]*)/', $text, $tags, PREG_SET_ORDER);
foreach($tags as $tag)
{
if(!$tag[2] || !preg_match('/img|br|input|hr|area|base|basefont|col|frame|isindex|link|meta|param/i', $tag[2]))
{
if(preg_match('/<[\w]+[^>]*>/', $tag[0]))
{
array_unshift($openTags, $tag[2]);
}
elseif(preg_match('/<\/([\w]+)[^>]*>/', $tag[0], $closeTag))
{
$pos = array_search($closeTag[1], $openTags);
if($pos !== false)
{
array_splice($openTags, $pos, 1);
}
}
}
$truncate .= $tag[1];
$contentLength = $this->ustrlen(preg_replace('/&[0-9a-z]{2,8};|[0-9]{1,7};|[0-9a-f]{1,6};/i', ' ', $tag[3]));
if($contentLength + $totalLength > $length)
{
$left = $length - $totalLength;
$entitiesLength = 0;
if(preg_match_all('/&[0-9a-z]{2,8};|[0-9]{1,7};|[0-9a-f]{1,6};/i', $tag[3], $entities, PREG_OFFSET_CAPTURE))
{
foreach($entities[0] as $entity)
{
if($entity[1] + 1 - $entitiesLength <= $left)
{
$left--;
$entitiesLength += $this->ustrlen($entity[0]);
}
else
{
break;
}
}
}
$truncate .= $this->usubstr($tag[3], 0, $left + $entitiesLength);
break;
}
$truncate .= $tag[3];
$totalLength += $contentLength;
if($totalLength >= $length)
{
break;
}
}
if(!$exact)
{
$spacepos = $this->ustrrpos($truncate, ' ');
if(isset($spacepos))
{
$bits = $this->usubstr($truncate, $spacepos);
preg_match_all('/<\/([a-z]+)>/i', $bits, $droppedTags, PREG_SET_ORDER);
if(!empty($droppedTags))
{
foreach($droppedTags as $closingTag)
{
if(!in_array($closingTag[1], $openTags))
{
array_unshift($openTags, $closingTag[1]);
}
}
}
$truncate = $this->usubstr($truncate, 0, $spacepos);
}
}
$truncate .= $ending;
foreach($openTags as $tag)
{
$truncate .= '' . $tag . '>';
}
return $truncate;
}
/**
* Truncate a string of text to a maximum length $len append the string $more if it was truncated
* Uses current CHARSET for utf-8, returns $len characters rather than $len bytes
*
* @param string $text string to process
* @param integer $len length of characters to be truncated
* @param string $more string which will be added if truncation
* @return string
*/
public function text_truncate($text, $len = 200, $more = ' ... ')
{
// Always valid
if($this->ustrlen($text) <= $len)
{
return $text;
}
$text = html_entity_decode($text, ENT_QUOTES, 'utf-8');
if(function_exists('mb_strimwidth'))
{
return mb_strimwidth($text, 0, $len, $more);
}
$ret = $this->usubstr($text, 0, $len);
// search for possible broken html entities
// - if an & is in the last 8 chars, removing it and whatever follows shouldn't hurt
// it should work for any characters encoding
$leftAmp = $this->ustrrpos($this->usubstr($ret, -8), '&');
if($leftAmp)
{
$ret = $this->usubstr($ret, 0, $this->ustrlen($ret) - 8 + $leftAmp);
}
return $ret . $more;
}
public function textclean($text, $wrap = 100)
{
$text = str_replace("\n\n\n", "\n\n", $text);
$text = $this->htmlwrap($text, $wrap);
$text = str_replace(array(' ', ' ', ' '), ' ', $text);
/* we can remove any linebreaks added by htmlwrap function as any \n's will be converted later anyway */
return $text;
}
// Test for text highlighting, and determine the text highlighting transformation
// Returns TRUE if highlighting is active for this page display
public function checkHighlighting()
{
global $pref;
if(!defined('e_SELF'))
{
// Still in startup, so can't calculate highlighting
return false;
}
if(!isset($this->e_highlighting))
{
$this->e_highlighting = false;
$shr = (isset($_SERVER['HTTP_REFERER']) ? $_SERVER['HTTP_REFERER'] : '');
if($pref['search_highlight'] && (strpos(e_SELF, 'search.php') === false) && ((strpos($shr, 'q=') !== false) || (strpos($shr, 'p=') !== false)))
{
$this->e_highlighting = true;
if(!isset($this->e_query))
{
preg_match('#(q|p)=(.*?)(&|$)#', $shr, $matches);
$this->e_query = str_replace(array('+', '*', '"', ' '), array('', '.*?', '', '\b|\b'), trim(urldecode($matches[2])));
}
}
}
return $this->e_highlighting;
}
/**
* Replace text represenation of website urls and email addresses with clickable equivalents.
* @param string $text
* @param string $type email|url
* @param array $opts options. (see below)
* @param string $opts ['sub'] substitute text within links
* @param bool $opts ['ext'] load link in new window (not for email)
* @return string
*/
public function makeClickable($text = '', $type = 'email', $opts = array())
{
if(empty($text))
{
return '';
}
$textReplace = (!empty($opts['sub'])) ? $opts['sub'] : '';
if(substr($textReplace, -6) === '.glyph')
{
$textReplace = $this->toGlyph($textReplace, '');
}
switch($type)
{
default:
case 'email':
preg_match_all("#(?:[\n\r ]|^)?([a-z0-9\-_.]+?)@([\w\-]+\.([\w\-\.]+\.)*[\w]+)#i", $text, $match);
if(!empty($match[0]))
{
$srch = array();
$repl = array();
foreach($match[0] as $eml)
{
$email = trim($eml);
$srch[] = $email;
$repl[] = $this->emailObfuscate($email, $textReplace);
}
$text = str_replace($srch, $repl, $text);
}
break;
case 'url':
$linktext = (!empty($textReplace)) ? $textReplace : '$3';
$external = (!empty($opts['ext'])) ? 'target="_blank"' : '';
$text = preg_replace("/(^|[\n \(])([\w]*?)([\w]*?:\/\/[\w]+[^ \,\"\n\r\t<]*)/is", '$1$2' . $linktext . '', $text);
$text = preg_replace("/(^|[\n \(])([\w]*?)((www)\.[^ \,\"\t\n\r\)<]*)/is", '$1$2' . $linktext . '', $text);
$text = preg_replace("/(^|[\n ])([\w]*?)((ftp)\.[^ \,\"\t\n\r<]*)/is", '$1$2' . $linktext . '', $text);
break;
}
return $text;
}
public function parseBBCodes($text, $postID)
{
return e107::getBB()->parseBBCodes($text, $postID);
}
/**
* Converts the text (presumably retrieved from the database) for HTML output.
*
* @param string $text
* @param boolean $parseBB [optional]
* @param string $modifiers [optional] TITLE|SUMMARY|DESCRIPTION|BODY|RAW|LINKTEXT etc.
* Comma-separated list, no spaces allowed
* first modifier must be a CONTEXT modifier, in UPPER CASE.
* subsequent modifiers are lower case - see $this->e_Modifiers for possible values
* @param mixed $postID [optional]
* @param boolean $wrap [optional]
* @return string
* @todo complete the documentation of this essential method
*/
public function toHTML($text, $parseBB = false, $modifiers = '', $postID = '', $wrap = false)
{
if(empty($text) || !is_string($text))
{
return $text;
}
if(empty($this->pref)) // cache the prefs.
{
$prefsUsed = array('smiley_activate', 'make_clickable', 'link_replace', 'main_wordwrap', 'link_text',
'email_text', 'links_new_window', 'profanity_filter', 'tohtml_hook', 'e_tohtml_list', 'e_parse_list'
);
$cfg = e107::getConfig();
foreach($prefsUsed as $v)
{
$this->pref[$v] = $cfg->get($v);
}
}
global $fromadmin;
// Set default modifiers to start
$opts = $this->getModifiers($modifiers);
if($this->isHtml($text)) //BC FIx for when HTML is saved without [html][/html]
{
$opts['nobreak'] = true;
$text = trim($text);
if(strpos($text, '[center]') === 0) // quick bc fix TODO Find a better solution. [center][/center] containing HTML.
{
$text = str_replace(array('[center]', '[/center]'), array("
", '
'), $text);
}
}
$fromadmin = $opts['fromadmin'];
// Convert defines(constants) within text. eg. Lan_XXXX - must be the entire text string (i.e. not embedded)
// The check for '::' is a workaround for a bug in the Zend Optimiser 3.3.0 and PHP 5.2.4 combination
// - causes crashes if '::' in site name
if($opts['defs'] && (strlen($text) < 35) && ((strpos($text, '::') === false) && defined(trim($text))))
{
$text = constant(trim($text)); // don't return yet, words could be hooked with linkwords etc.
}
if($opts['no_tags'])
{
$text = strip_tags($text);
}
/*
if(MAGIC_QUOTES_GPC === true) // precaution for badly saved data.
{
$text = stripslashes($text);
}
*/
// Make sure we have a valid count for word wrapping
if(!$wrap && !empty($this->pref['main_wordwrap']))
{
$wrap = $this->pref['main_wordwrap'];
}
// $text = " ".$text;
// Now get on with the parsing
$ret_parser = '';
$last_bbcode = '';
// So we can change them on each loop
$saveOpts = $opts;
if($parseBB == false)
{
$content = array($text);
}
else
{
// Split each text block into bits which are either within one of the 'key' bbcodes, or outside them
// (Because we have to match end words, the 'extra' capturing subpattern gets added to output array. We strip it later)
$content = preg_split('#(\[(table|html|php|code|scode|hide).*?\[/(?:\\2)\])#mis', $text, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE);
}
// Use $full_text variable so its available to special bbcodes if required
foreach($content as $full_text)
{
$proc_funcs = true;
$convertNL = true;
// We may have 'captured' a bbcode word - strip it if so
if($last_bbcode == $full_text)
{
$last_bbcode = '';
$proc_funcs = false;
$full_text = '';
}
else
{
// Set the options for this pass
$opts = $saveOpts;
// Have to have a good test in case a 'non-key' bbcode starts the block
// - so pull out the bbcode parameters while we're there
if(($parseBB !== false) && preg_match('#(^\[(table|html|php|code|scode|hide)(.*?)\])(.*?)(\[/\\2\]$)#is', $full_text, $matches))
{
// It's one of the 'key' bbcodes
// Usually don't want 'normal' processing if its a 'special' bbcode
$proc_funcs = false;
// $matches[0] - complete block from opening bracket of opening tag to closing bracket of closing tag
// $matches[1] - complete opening tag (inclusive of brackets)
// $matches[2] - bbcode word
// $matches[3] - parameter, including '='
// $matches[4] - bit between the tags (i.e. text to process)
// $matches[5] - closing tag
// In case we decide to load a file
// $bbPath = e_CORE . 'bbcodes/';
// $bbFile = strtolower(str_replace('_', '', $matches[2]));
// $bbcode = '';
// $className = '';
$full_text = '';
$code_text = $matches[4];
// $parm = $matches[3] ? substr($matches[3], 1) : '';
$last_bbcode = $matches[2];
switch($matches[2])
{
case 'php' :
$proc_funcs = false;
$code_text = '';
break;
case 'html' : // This overrides and deprecates html.bb
$proc_funcs = true;
// $code_text = str_replace("\r\n", " ", $code_text);
// $code_text = html_entity_decode($code_text, ENT_QUOTES, CHARSET);
// $code_text = str_replace('&','&',$code_text); // validation safe.
$html_start = ''; // markers for html-to-bbcode replacement.
$html_end = '';
$full_text = str_replace(array('[html]', '[/html]'), '', $code_text); // quick fix.. security issue?
$full_text = $this->parseBBCodes($full_text, $postID); // parse any embedded bbcodes eg. [img]
$full_text = $this->replaceConstants($full_text, 'abs'); // parse any other paths using {e_....
$full_text = $html_start . $full_text . $html_end;
$full_text = $this->parseBBTags($full_text); // strip tags.
$opts['nobreak'] = true;
$parseBB = false; // prevent further bbcode processing.
break;
case 'table' : // strip from inside of