rror', '>', '"', ' & ');
// Set to TRUE or FALSE once it has been calculated
protected $e_highlighting;
// Highlight query
protected $e_query;
private $thumbWidth = 100;
private $thumbHeight = 0;
private $thumbCrop = 0;
private $thumbEncode = 0;
private $staticCount = 0;
protected $staticUrl;
protected $staticUrlMap = [];
/** @var array Stored relative paths - used by replaceConstants() */
private $relativePaths = [];
// BBcode that contain preformatted code.
private $preformatted = array('html', 'markdown');
// Set up the defaults
private $e_optDefault = array(
// default context: reflects legacy settings (many items enabled)
'context' => 'OLDDEFAULT',
//
'fromadmin' => false,
// Enable emote display
'emotes' => true,
// Convert defines(constants) within text.
'defs' => false,
// replace all {e_XXX} constants with their e107 value - 'rel' or 'abs'
'constants' => false,
// Enable hooked parsers
'hook' => true,
// Allow scripts through (new for 0.8)
'scripts' => true,
// Make links clickable
'link_click' => true,
// Substitute on clickable links (only if link_click == TRUE)
'link_replace' => true,
// Parse shortcodes - TRUE enables parsing
'parse_sc' => false,
// remove HTML tags.
'no_tags' => false,
// Restore entity form of quotes and such to single characters - TRUE disables
'value' => false,
// Line break compression - TRUE removes newline characters
'nobreak' => false,
// Retain newlines - wraps to \n instead of
if TRUE (for non-HTML email text etc)
'retain_nl' => false
);
// Super modifiers override default option values
private $e_SuperMods = array(
//text is part of a title (e.g. news title)
'TITLE' =>
array(
'nobreak' => true, 'retain_nl' => true, 'link_click' => false, 'emotes' => false, 'defs' => true, 'parse_sc' => true
),
'TITLE_PLAIN' =>
array(
'nobreak' => true, 'retain_nl' => true, 'link_click' => false, 'emotes' => false, 'defs' => true, 'parse_sc' => true, 'no_tags' => true
),
//text is user-entered (i.e. untrusted) and part of a title (e.g. forum title)
'USER_TITLE' =>
array(
'nobreak' => true, 'retain_nl' => true, 'link_click' => false, 'scripts' => false, 'emotes' => false, 'hook' => false
),
// text is 'body' of email or similar - being sent 'off-site' so don't rely on server availability
'E_TITLE' =>
array(
'nobreak' => true, 'retain_nl' => true, 'defs' => true, 'parse_sc' => true, 'emotes' => false, 'scripts' => false, 'link_click' => false
),
// text is part of the summary of a longer item (e.g. content summary)
'SUMMARY' =>
array(
'defs' => true, 'constants' => 'full', 'parse_sc' => true
),
// text is the description of an item (e.g. download, link)
'DESCRIPTION' =>
array(
'defs' => true, 'constants' => 'full', 'parse_sc' => true
),
// text is 'body' or 'bulk' text (e.g. custom page body, content body)
'BODY' =>
array(
'defs' => true, 'constants' => 'full', 'parse_sc' => true
),
// text is parsed by the Wysiwyg editor. eg. TinyMce
'WYSIWYG' =>
array(
'hook' => false, 'link_click' => false, 'link_replace' => false, 'retain_nl' => true
),
// text is user-entered (i.e. untrusted)'body' or 'bulk' text (e.g. custom page body, content body)
'USER_BODY' =>
array(
'constants' => 'full', 'scripts' => false, 'nostrip' => false
),
// text is 'body' of email or similar - being sent 'off-site' so don't rely on server availability
'E_BODY' =>
array(
'defs' => true, 'constants' => 'full', 'parse_sc' => true, 'emotes' => false, 'scripts' => false, 'link_click' => false
),
// text is text-only 'body' of email or similar - being sent 'off-site' so don't rely on server availability
'E_BODY_PLAIN' =>
array(
'defs' => true, 'constants' => 'full', 'parse_sc' => true, 'emotes' => false, 'scripts' => false, 'link_click' => false, 'retain_nl' => true, 'no_tags' => true
),
// text is the 'content' of a link (A tag, etc)
'LINKTEXT' =>
array(
'nobreak' => true, 'retain_nl' => true, 'link_click' => false, 'emotes' => false, 'hook' => false, 'defs' => true, 'parse_sc' => true
),
// text is used (for admin edit) without fancy conversions or html.
'RAWTEXT' =>
array(
'nobreak' => true, 'retain_nl' => true, 'link_click' => false, 'emotes' => false, 'hook' => false, 'no_tags' => true
),
'NODEFAULT' =>
array('context' => false, 'fromadmin' => false, 'emotes' => false, 'defs' => false, 'constants' => false, 'hook' => false,
'scripts' => false, 'link_click' => false, 'link_replace' => false, 'parse_sc' => false, 'no_tags' => false, 'value' => false,
'nobreak' => false, 'retain_nl' => false
)
);
// Individual modifiers change the current context
private $e_Modifiers = array(
'emotes_off' => array('emotes' => false),
'emotes_on' => array('emotes' => true),
'no_hook' => array('hook' => false),
'do_hook' => array('hook' => true),
// New for 0.8
'scripts_off' => array('scripts' => false),
// New for 0.8
'scripts_on' => array('scripts' => true),
'no_make_clickable' => array('link_click' => false),
'make_clickable' => array('link_click' => true),
'no_replace' => array('link_replace' => false),
// Replace text of clickable links (only if make_clickable option set)
'replace' => array('link_replace' => true),
// No path replacement
'consts_off' => array('constants' => false),
// Relative path replacement
'consts_rel' => array('constants' => 'rel'),
// Absolute path replacement
'consts_abs' => array('constants' => 'abs'),
// Full path replacement
'consts_full' => array('constants' => 'full'),
// No shortcode parsing
'scparse_off' => array('parse_sc' => false),
'scparse_on' => array('parse_sc' => true),
// Strip tags
'no_tags' => array('no_tags' => true),
// Leave tags
'do_tags' => array('no_tags' => false),
'fromadmin' => array('fromadmin' => true),
'notadmin' => array('fromadmin' => false),
// entity replacement
'er_off' => array('value' => false),
'er_on' => array('value' => true),
// Decode constant if exists
'defs_off' => array('defs' => false),
'defs_on' => array('defs' => true),
'dobreak' => array('nobreak' => false),
'nobreak' => array('nobreak' => true),
// Line break using \n
'lb_nl' => array('retain_nl' => true),
// Line break using
'lb_br' => array('retain_nl' => false),
// Legacy option names below here - discontinue later
'retain_nl' => array('retain_nl' => true),
'defs' => array('defs' => true),
'parse_sc' => array('parse_sc' => true),
'constants' => array('constants' => 'rel'),
'value' => array('value' => true),
'wysiwyg' => array('wysiwyg' => true)
);
/**
* @var DOMDocument
*/
private $domObj;
private $isHtml = false;
private $bootstrap;
private $fontawesome;
private $modRewriteMedia;
private $removedList = array();
private $nodesToDelete = array();
private $nodesToConvert = array();
private $nodesToDisableSC = array();
private $pathList = array();
private $allowedAttributes = array();
private $badAttrValues = array();
private $replaceAttrValues = array();
private $allowedTags = array();
private $scriptTags = array();
private $scriptAttributes = array();
private $blockTags = array();
private $scriptAccess = false; // nobody.
private $replaceVars;
private $replaceUnset;
/**
* Constructor - keep it public for backward compatibility
* still some new e_parse() in the core
*
*/
public function __construct()
{
// initialise the type of UTF-8 processing methods depending on PHP version and mb string extension
$this->domObj = new DOMDocument('1.0', 'utf-8');
$this->init();
$this->compileAttributeDefaults();
}
/**
* @param string $type
* @return array
*/
public function getModifierList($type = '')
{
if ($type === 'super')
{
return $this->e_SuperMods;
}
return $this->e_Modifiers;
}
/**
* Initialise the type of UTF-8 processing methods depending on PHP version and mb string extension.
* Note: mb string is required during installation of e107.
* NOTE: can't be called until CHARSET is known
* but we all know that it is UTF-8 now
*
* @return void|null
*/
public function setMultibyte($bool)
{
if ($bool === false)
{
$this->multibyte = false;
return null;
}
if (extension_loaded('mbstring'))
{
$this->multibyte = true;
mb_internal_encoding('UTF-8');
}
}
/**
* Returns the length of the given string.
* Unicode (UTF-8) analogue of standard @link http://php.net/strlen strlen PHP function.
*
* @param string $str The UTF-8 encoded string being measured for length.
* @return integer The length (amount of UTF-8 characters) of the string on success, and 0 if the string is empty.
*/
public function ustrlen($str)
{
if ($this->multibyte)
{
return mb_strlen($str);
}
return strlen($str);
// return strlen(utf8_decode($str));
}
/**
* Make a string lowercase.
* Unicode (UTF-8) analogue of standard @link http://php.net/strtolower strtolower PHP function.
*
* @param string $str The UTF-8 encoded string to be lowercased.
* @return string Specified string with all alphabetic characters converted to lowercase.
*/
public function ustrtolower($str)
{
if ($this->multibyte)
{
return mb_strtolower($str);
}
return strtolower($str);
}
/**
* Make a string uppercase.
* Unicode (UTF-8) analogue of standard @link http://php.net/strtoupper strtoupper PHP function.
*
* @param string $str The UTF-8 encoded string to be uppercased.
* @return string Specified string with all alphabetic characters converted to uppercase.
*/
public function ustrtoupper($str)
{
if ($this->multibyte)
{
return mb_strtoupper($str);
}
return strtoupper($str);
}
/**
* Find the position of the first occurrence of a case-sensitive UTF-8 encoded string.
* Unicode (UTF-8) analogue of standard @link http://php.net/strpos strpos PHP function.
*
* Returns the numeric position (offset in amount of UTF-8 characters)
* of the first occurrence of needle in the haystack string.
*
* @param string $haystack The UTF-8 encoded string being searched in.
* @param integer $needle The UTF-8 encoded string being searched for.
* @param integer $offset [optional] The optional offset parameter allows you to specify which character in haystack to start searching.
* The position returned is still relative to the beginning of haystack.
* @return integer|boolean Returns the position as an integer. If needle is not found, the function will return boolean FALSE.
*/
public function ustrpos($haystack, $needle, $offset = 0)
{
if ($this->multibyte)
{
return mb_strpos($haystack, $needle, $offset);
}
return strpos($haystack, $needle, $offset);
}
/**
* Find the position of the last occurrence of a case-sensitive UTF-8 encoded string.
* Unicode (UTF-8) analogue of standard @link http://php.net/strrpos strrpos PHP function.
* Returns the numeric position (offset in amount of UTF-8 characters)
* of the last occurrence of needle in the haystack string.
*
* @param string $haystack The UTF-8 encoded string being searched in.
* @param integer $needle The UTF-8 encoded string being searched for.
* @param integer $offset [optional] - The optional offset parameter allows you to specify which character in haystack to start searching.
* The position returned is still relative to the beginning of haystack.
* @return integer|boolean Returns the position as an integer. If needle is not found, the function will return boolean FALSE.
*/
public function ustrrpos($haystack, $needle, $offset = 0)
{
if ($this->multibyte)
{
return mb_strrpos($haystack, $needle, $offset);
}
return strrpos($haystack, $needle, $offset);
}
/**
* Returns all of haystack starting from and including the first occurrence of needle to the end.
* Unicode (UTF-8) analogue of standard @link http://php.net/stristr stristr PHP function.
*
* @param string $haystack The UTF-8 encoded string to search in.
* @param mixed $needle If needle is not a string, it is converted to an integer and applied as the ordinal value of a character.
* @param bool $before_needle [optional] (PHP 5.3+) If TRUE, returns the part of the haystack before the first occurrence of the needle (excluding needle).
* @return string Returns the matched substring. If needle is not found, returns FALSE.
*/
public function ustristr($haystack, $needle, $before_needle = false)
{
if ($this->multibyte)
{
return mb_stristr($haystack, $needle, $before_needle);
}
return stristr($haystack, $needle, $before_needle);
}
/**
* Returns the portion of string specified by the start and length parameters.
* Unicode (UTF-8) analogue of standard @link http://php.net/substr substr PHP function.
*
* NOTE: May be subtle differences in return values dependent on which routine is used.
* Native substr() routine can return FALSE. mb_substr() and utf8_substr() just return an empty string.
*
* @param string $str The UTF-8 encoded string.
* @param integer $start Start of portion to be returned. Position is counted in amount of UTF-8 characters from the beginning of str.
* First character's position is 0. Second character position is 1, and so on.
* @param integer $length [optional] If length is given, the string returned will contain at most length characters beginning from start
* (depending on the length of string). If length is omitted, the rest of string from start will be returned.
* @return string The extracted UTF-8 encoded part of input string.
*/
public function usubstr($str, $start, $length = null)
{
if ($this->multibyte)
{
return ($length === null) ? mb_substr($str, $start) : mb_substr($str, $start, $length);
}
return substr($str, $start, $length);
}
/**
* Converts the supplied text (presumed to be from user input) to a format suitable for storing in a database table.
*
* @param mixed $data
* @param boolean $nostrip [optional] Assumes all data is GPC ($_GET, $_POST, $_COOKIE) unless indicate otherwise by setting this var to TRUE.
* If magic quotes is enabled on the server and you do not tell toDB() that the data is non GPC then slashes will be stripped when they should not be.
* @param boolean $no_encode [optional] This parameter should nearly always be FALSE. It is used by the save_prefs() function to preserve HTML content within prefs even when
* the save_prefs() function has been called by a non admin user / user without html posting permissions.
* @param boolean|string $mod [optional] model = admin-ui usage. The 'no_html' and 'no_php' modifiers blanket prevent HTML and PHP posting regardless of posting permissions. (used in logging)
* The 'pReFs' value is for internal use only, when saving prefs, to prevent sanitisation of HTML.
* @param mixed $parm [optional]
* @return mixed
* @todo complete the documentation of this essential method
*/
public function toDB($data = null, $nostrip = false, $no_encode = false, $mod = false, $parm = null)
{
$variableType = gettype($data);
if (($variableType !== 'string' && $variableType !== 'array') || $data === '0')
{
return $data;
}
if ($variableType === 'array')
{
$ret = array();
foreach ($data as $key => $var)
{
//Fix - sanitize keys as well
$key = str_replace(['"', "'"], ['"', '''], $key);
$ret[$key] = $this->toDB($var, $nostrip, $no_encode, $mod, $parm);
}
return $ret;
}
if (MAGIC_QUOTES_GPC === true && $nostrip === false)
{
$data = stripslashes($data);
}
$core_pref = e107::getConfig();
if ($mod !== 'pReFs') //XXX We're not saving prefs.
{
$data = $this->preFilter($data); // used by bb_xxx.php toDB() functions. bb_code.php toDB() allows us to properly bypass HTML cleaning below.
$data = $this->cleanHtml($data); // clean it regardless of if it is text or html. (html could have missing closing tags)
if (($this->isHtml($data)) && strpos($mod, 'no_html') === false)
{
$this->isHtml = true;
// $data = $this->cleanHtml($data); // sanitize all html. (moved above to include everything)
$data = str_replace(array('%7B', '%7D'), array('{', '}'), $data); // fix for {e_XXX} paths.
}
// else // caused double-encoding of '&'
{
// $data = str_replace('&','&',$data);
// $data = str_replace('<','<',$data);
// $data = str_replace('>','>',$data);
// $data = str_replace('&','&',$data);
}
if (!check_class($core_pref->get('post_html', e_UC_MAINADMIN)))
{
$data = strip_tags($data); // remove tags from cleaned html.
$data = str_replace(array('[html]', '[/html]'), '', $data);
}
// $data = html_entity_decode($data, ENT_QUOTES, 'utf-8'); // Prevent double-entities. Fix for [code] - see bb_code.php toDB();
}
if (check_class($core_pref->get('post_html'))) /*$core_pref->is('post_html') && XXX preformecd by cleanHtml() */
{
$no_encode = true;
}
if ($parm !== null && is_numeric($parm) && !check_class($core_pref->get('post_html'), '', $parm))
{
$no_encode = false;
}
if ($no_encode === true && strpos($mod, 'no_html') === false)
{
$search = array('$', '"', "'", '\\', '');
$replace = array('$', '"', ''', '\', '<?');
$ret = str_replace($search, $replace, $data);
}
else // add entities for everything. we want to save the code.
{
$search = array('>', '<');
$replace = array('>', '<');
$data = str_replace($search, $replace, $data); // prevent > etc.
$data = htmlspecialchars($data, ENT_QUOTES, 'UTF-8');
$data = str_replace('\\', '\', $data);
$ret = preg_replace("/&#(\d*?);/", "\\1;", $data);
}
// XXX - php_bbcode has been deprecated.
if ((strpos($mod, 'no_php') !== false) || !check_class($core_pref->get('php_bbcode')))
{
$ret = preg_replace("#\[(php)#i", "[\\1", $ret);
}
// Don't allow hooks to mess with prefs.
if ($mod !== 'model')
{
return $ret;
}
/**
* e_parse hook
*/
$eParseList = $core_pref->get('e_parse_list');
if (!empty($eParseList))
{
$opts = array(
'nostrip' => $nostrip,
'noencode' => $no_encode,
'type' => $parm['type'],
'field' => $parm['field']
);
foreach ($eParseList as $plugin)
{
$hookObj = e107::getAddon($plugin, 'e_parse');
if ($tmp = e107::callMethod($hookObj, 'toDB', $ret, $opts))
{
$ret = $tmp;
}
}
}
return $ret;
}
/**
* Check for umatched 'dangerous' HTML tags
* (these can destroy page layout where users are able to post HTML)
*
* @param string $data
* @param string $tagList - if empty, uses default list of input tags. Otherwise a CSV list of tags to check (any type)
*
* @return string TRUE if an unopened closing tag found
* FALSE if nothing found
* @deprecated
*/
public function htmlAbuseFilter($data, $tagList = '')
{
trigger_error('' . __METHOD__ . ' is deprecated. Use $tp->cleanHtml() instead.', E_USER_WARNING); // NO LAN
return $data;
}
/**
* @deprecated
* Checks a string for potentially dangerous HTML tags, including malformed tags
*
*/
public function dataFilter($data, $mode = 'bbcode')
{
trigger_error('$tp->dateFilter() is deprecated. Use $tp->filter() instead.', E_USER_WARNING);
return $data;
}
/**
* Processes data as needed before its written to the DB.
* Currently gives bbcodes the opportunity to do something
*
* @param $data string - data about to be written to DB
* @return string - modified data
*/
public function preFilter($data)
{
if (!$this->isBBcode($data))
{
return $data;
}
return e107::getBB()->parseBBCodes($data, defset('USERID'), 'default', 'PRE'); // $postID = logged in user here
}
/**
* Takes a multi-dimensional array and converts the keys to a list of routing paths.
* paths are the key and value are the top most key.
*
* @param array $array
* @return array
*/
public function toRoute($array)
{
$res = $this->_processRoute($array);
$tmp = explode("_#_", $res);
$ret = [];
foreach ($tmp as $v)
{
list($k) = explode('/', $v);
$ret[$v] = $k;
}
return $ret;
}
/**
* @param array $array
* @param string $prefix
* @return string
*/
private function _processRoute($array, $prefix = '')
{
$text = [];
if (is_array($array))
{
foreach ($array as $key => $val)
{
if ($tag = $this->_processRoute($val, $key . '/'))
{
$add = $tag;
}
else
{
$add = $key;
}
$text[] = $prefix . $add;
}
}
return implode('_#_', $text);
}
/**
* @param string $text
* @return array|string|string[]
*/
public function toForm($text)
{
if (empty($text)) // fix - handle proper 0, Space etc values.
{
return $text;
}
if (is_string($text) && strpos($text, '[html]') === 0)
{
// $text = $this->toHTML($text,true);
$search = array('"', ''', '\', '&',); // '&' must be last.
$replace = array('"', "'", "\\", '&');
// return htmlspecialchars_decode($text);
$text = str_replace($search, $replace, $text);
// return $text;
//$text = htmlentities($text,ENT_NOQUOTES, "UTF-8");
// return $text;
}
// return htmlentities($text);
$search = array('$', '"', '<', '>', '+');
$replace = array('$', '"', '<', '>', '%2B');
$text = str_replace($search, $replace, $text);
if (is_string($text) && e107::wysiwyg() !== true)
{
// fix for utf-8 issue with html_entity_decode(); ???
$text = urldecode($text);
// $text = str_replace(" ", " ", $text);
}
return $text;
}
/**
* @param $text
* @return array|string
*/
public function post_toForm($text)
{
if (is_array($text))
{
$arr = array();
foreach ($text as $key => $value)
{
$key = $this->post_toForm($key);
$arr[$key] = $this->post_toForm($value);
}
return $arr;
}
$text = (string) $text;
if (MAGIC_QUOTES_GPC == true)
{
$text = stripslashes($text);
}
return str_replace(array("'", '"', '<', '>'), array(''', '"', '<', '>'), $text);
}
/**
* @param string $text
* @param $original_author
* @param string $extra
* @param bool $mod
* @return string
*/
public function post_toHTML($text, $original_author = false, $extra = '', $mod = false)
{
$text = $this->toDB($text, false, false, $mod, $original_author);
return $this->toHTML($text, true, $extra);
}
/**
* @param $text - template to parse.
* @param boolean $parseSCFiles - parse core 'single' shortcodes
* @param object|array $extraCodes - shortcode class containing sc_xxxxx methods or an array of key/value pairs or legacy shortcode content (eg. content within .sc)
* @param object $eVars - XXX more info needed.
* @return string
*/
public function parseTemplate($text, $parseSCFiles = true, $extraCodes = null, $eVars = null)
{
if (!is_bool($parseSCFiles))
{
trigger_error('$parseSCFiles in parseTemplate() was given incorrect data');
}
return e107::getScParser()->parseCodes($text, $parseSCFiles, $extraCodes, $eVars);
}
/**
* Parses a JSON schema template, processes placeholders, and reconstructs the JSON with optional main entity and extra codes.
*
* @param string $text The JSON schema template to be parsed.
* @param bool $parseSCFiles Whether to enable the parsing of shortcode files. Defaults to true.
* @param object|null $extraCodes Optional extra codes object for placeholder parsing.
* @param array|null $mainEntity Optional data array to replace the 'mainEntity' structure in the schema.
* @return string|false The processed JSON schema string on success, or false if the input JSON is invalid.
*/
public function parseSchemaTemplate($text, $parseSCFiles = true, $extraCodes = null, $mainEntity = null)
{
// Initialize the parser
$parse = e107::getScParser();
$parse->setMode('schema'); // Set parsing mode for schema
// Step 1: Decode the JSON input into an array
$jsonArray = json_decode($text, true);
// Step 2: Validate JSON decoding
if(json_last_error() !== JSON_ERROR_NONE)
{
error_log('Invalid JSON: ' . json_last_error_msg());
return false;
}
// Step 3: Recursive function to process the JSON structure
$processItems = function (&$item) use (&$processItems, $parse, $parseSCFiles, $extraCodes, $mainEntity)
{
if(is_array($item))
{
// Check if the current item contains 'mainEntity', the target of our processing
if(isset($item['mainEntity']) && is_array($mainEntity))
{
// Get the first template item from the 'mainEntity' array to use as the structure
$schemaTemplate = $item['mainEntity'][0];
$item['mainEntity'] = []; // Reset the 'mainEntity' array to prevent duplication
foreach($mainEntity as $dataRow)
{
// Create a fresh copy of the schema template for this specific dataRow
$duplicatedItem = json_decode(json_encode($schemaTemplate), true);
// Update the extraCodes for the current data row
if(method_exists($extraCodes, 'setVars'))
{
$extraCodes->setVars($dataRow); // Inject new placeholders from this row
}
// Process placeholders in the duplicated item
foreach($duplicatedItem as &$value)
{
if(is_string($value) && strpos($value, '{') !== false)
{
// Parse placeholders for current dataRow
$value = $parse->parseCodes($value, $parseSCFiles, $extraCodes);
$value = html_entity_decode($value, ENT_QUOTES | ENT_HTML5, 'UTF-8');
$value = strip_tags($value);
}
elseif(is_array($value))
{
// Recursively process arrays (e.g., nested structures)
$processItems($value);
}
}
// Append the processed item to the 'mainEntity' array
$item['mainEntity'][] = $duplicatedItem;
}
}
else
{
// Recursively process other parts of the JSON structure
foreach($item as &$value)
{
$processItems($value);
}
}
}
elseif(is_string($item))
{
// Parse string placeholders, if any
if(strpos($item, '{') !== false)
{
$item = $parse->parseCodes($item, $parseSCFiles, $extraCodes);
$item = str_replace('&', '&', $item);
$item = html_entity_decode($item, ENT_QUOTES | ENT_HTML5, 'UTF-8');
$item = strip_tags($item);
}
}
};
// Step 4: Initiate processing for the entire JSON structure
$processItems($jsonArray);
// Reset the parse mode after processing
$parse->setMode('default');
// Step 5: Encode the final result back into JSON
return json_encode($jsonArray, JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT);
}
/**
* Simple parser
*
* @param string $template
* @param e_vars|array $vars
* @param string $replaceUnset string to be used if replace variable is not set, false - don't replace
* @return string parsed content
*/
public function simpleParse($template, $vars, $replaceUnset = '')
{
$this->replaceVars = $vars;
$this->replaceUnset = $replaceUnset;
return preg_replace_callback("#\{([\w]+)\}#", array($this, 'simpleReplace'), $template);
}
/**
* @param $tmp
* @return mixed|string|null
*/
protected function simpleReplace($tmp)
{
$unset = ($this->replaceUnset !== false ? $this->replaceUnset : $tmp[0]);
if (is_array($this->replaceVars))
{
$this->replaceVars = new e_vars($this->replaceVars);
//return ($this->replaceVars[$key] !== null ? $this->replaceVars[$key]: $unset);
}
$key = $tmp[1]; // PHP7 fix.
return (!empty($this->replaceVars) && ($this->replaceVars->$key !== null)) ? $this->replaceVars->$key : $unset; // Doesn't work.
}
/**
* @param $str
* @param $width
* @param string $break
* @param string $nobreak
* @param string $nobr
* @param false $utf
* @return string
* @todo find a modern replacement
*/
public function htmlwrap($str, $width, $break = "\n", $nobreak = 'a', $nobr = 'pre', $utf = false)
{
/*
Pretty well complete rewrite to try and handle utf-8 properly.
Breaks each utf-8 'word' every $width characters max. If possible, breaks after 'safe' characters.
$break is the character inserted to flag the break.
$nobreak is a list of tags within which word wrap is to be inactive
*/
//TODO handle htmlwrap somehow
//return $str;
// Don't wrap if non-numeric width
$width = (int) $width;
// And trap stupid wrap counts
if ($width < 6)
{
return $str;
}
// Transform protected element lists into arrays
$nobreak = explode(' ', strtolower($nobreak));
// Variable setup
$innbk = array();
$drain = '';
// List of characters it is "safe" to insert line-breaks at
// It is not necessary to add < and > as they are automatically implied
$lbrks = "/?!%)-}]\\\"':;&";
// Is $str a UTF8 string?
if ($utf || strtolower(CHARSET) === 'utf-8')
{
// 0x1680, 0x180e, 0x2000-0x200a, 0x2028, 0x205f, 0x3000 are 'non-ASCII' Unicode UCS-4 codepoints - see http://www.unicode.org/Public/UNIDATA/UnicodeData.txt
// All convert to 3-byte utf-8 sequences:
// 0x1680 0xe1 0x9a 0x80
// 0x180e 0xe1 0xa0 0x8e
// 0x2000 0xe2 0x80 0x80
// -
// 0x200a 0xe2 0x80 0x8a
// 0x2028 0xe2 0x80 0xa8
// 0x205f 0xe2 0x81 0x9f
// 0x3000 0xe3 0x80 0x80
$utf8 = 'u';
$whiteSpace = '#([\x20|\x0c]|[\xe1][\x9a][\x80]|[\xe1][\xa0][\x8e]|[\xe2][\x80][\x80-\x8a,\xa8]|[\xe2][\x81][\x9f]|[\xe3][\x80][\x80]+)#';
// Have to explicitly enumerate the whitespace chars, and use non-utf-8 mode, otherwise regex fails on badly formed utf-8
}
else
{
$utf8 = '';
// For non-utf-8, can use a simple match string
$whiteSpace = '#(\s+)#';
}
// Start of the serious stuff - split into HTML tags and text between
$content = preg_split('#(<.*?' . '>)#mis', $str, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE);
foreach ($content as $value)
{
if ($value[0] === '<')
{
// We are within an HTML tag
// Create a lowercase copy of this tag's contents
$lvalue = strtolower(substr($value, 1, -1));
if ($lvalue)
{
// Tag of non-zero length
// If the first character is not a / then this is an opening tag
if ($lvalue[0] !== '/')
{
// Collect the tag name
preg_match("/^(\w*?)(\s|$)/", $lvalue, $t);
// If this is a protected element, activate the associated protection flag
if (in_array($t[1], $nobreak))
{
array_unshift($innbk, $t[1]);
}
}
else
{
// Otherwise this is a closing tag
// If this is a closing tag for a protected element, unset the flag
if (in_array(substr($lvalue, 1), $nobreak))
{
reset($innbk);
foreach ($innbk as $key => $tag)
{
if (substr($lvalue, 1) == $tag)
{
unset($innbk[$key]);
break;
}
}
$innbk = array_values($innbk);
}
}
}
else
{
// Eliminate any empty tags altogether
$value = '';
}
// Else if we're outside any tags, and with non-zero length string...
}
elseif ($value)
{
// If unprotected...
if (!count($innbk))
{
// Use the ACK (006) ASCII symbol to replace all HTML entities temporarily
$value = str_replace("\x06", '', $value);
preg_match_all("/&([a-z\d]{2,7}|#\d{2,5});/i", $value, $ents);
$value = preg_replace("/&([a-z\d]{2,7}|#\d{2,5});/i", "\x06", $value);
// echo "Found block length ".strlen($value).': '.substr($value,20).'
';
// Split at spaces - note that this will fail if presented with invalid utf-8 when doing the regex whitespace search
// $split = preg_split('#(\s)#'.$utf8, $value, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE );
$split = preg_split($whiteSpace, $value, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE);
$value = '';
foreach ($split as $sp)
{
// echo "Split length ".strlen($sp).': '.substr($sp,20).'
';
$loopCount = 0;
while (strlen($sp) > $width)
{
// Enough characters that we may need to do something.
$pulled = '';
if ($utf8)
{
// Pull out a piece of the maximum permissible length
if (preg_match('#^((?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,' . $width . '})(.?).*#s', $sp, $matches) == 0)
{
// Make any problems obvious for now
$value .= '[!invalid utf-8: ' . $sp . '!]';
$sp = '';
}
elseif (empty($matches[2]))
{
// utf-8 length is less than specified - treat as a special case
$value .= $sp;
$sp = '';
}
else
{
// Need to find somewhere to break the string
for ($i = strlen($matches[1]) - 1; $i >= 0; $i--)
{
if (strpos($lbrks, $matches[1][$i]) !== false)
{
break;
}
}
if ($i < 0)
{
// No 'special' break character found - break at the word boundary
$pulled = $matches[1];
}
else
{
$pulled = substr($sp, 0, $i + 1);
}
}
$loopCount++;
if ($loopCount > 20)
{
// Make any problems obvious for now
$value .= '[!loop count exceeded: ' . $sp . '!]';
$sp = '';
}
}
else
{
for ($i = min($width, strlen($sp)); $i > 0; $i--)
{
// No speed advantage to defining match character
if (strpos($lbrks, $sp[$i - 1]) !== false)
{
break;
}
}
if ($i == 0)
{
// No 'special' break boundary character found - break at the word boundary
$pulled = substr($sp, 0, $width);
}
else
{
$pulled = substr($sp, 0, $i);
}
}
if ($pulled)
{
$value .= $pulled . $break;
// Shorten $sp by whatever we've processed (will work even for utf-8)
$sp = substr($sp, strlen($pulled));
}
}
// Add in any residue
$value .= $sp;
}
// Put captured HTML entities back into the string
foreach ($ents[0] as $ent)
{
$value = preg_replace("/\x06/", $ent, $value, 1);
}
}
}
// Send the modified segment down the drain
$drain .= $value;
}
// Return contents of the drain
return $drain;
}
/**
* Universal text/bbcode/html truncate method.
* new in v2.3.1
*
* @param $text
* @param int $length
* @param string $ending
* @return string
*/
public function truncate($text, $length = 100, $ending = '...')
{
if ($this->isHtml($text))
{
return $this->html_truncate($text, $length, $ending);
}
if ($this->isBBcode($text))
{
$text = $this->toText($text);
}
return $this->text_truncate($text, $length, $ending);
}
/**
* @param string $text String to truncate.
* @param integer $length Length of returned string, including ellipsis.
* @param string $ending It will be used as Ending and appended to the trimmed string.
* @param boolean $exact If false, $text will not be cut mid-word
* @return string Trimmed string.
* @deprecated Soon to be made private. Use $tp->truncate() instead.
* CakePHP(tm) : Rapid Development Framework (http://www.cakephp.org)
* Copyright 2005-2008, Cake Software Foundation, Inc. (http://www.cakefoundation.org)
*
* Truncate a HTML string
*
* Cuts a string to the length of $length and adds the value of $ending if the text is longer than length.
*/
public function html_truncate($text, $length = 100, $ending = '...', $exact = true)
{
if ($this->ustrlen(preg_replace('/<.*?>/', '', $text)) <= $length)
{
return $text;
}
$totalLength = 0;
$openTags = array();
$truncate = '';
preg_match_all('/(<\/?([\w+]+)[^>]*>)?([^<>]*)/', $text, $tags, PREG_SET_ORDER);
foreach ($tags as $tag)
{
if (!$tag[2] || !preg_match('/img|br|input|hr|area|base|basefont|col|frame|isindex|link|meta|param/i', $tag[2]))
{
if (preg_match('/<[\w]+[^>]*>/', $tag[0]))
{
array_unshift($openTags, $tag[2]);
}
elseif (preg_match('/<\/([\w]+)[^>]*>/', $tag[0], $closeTag))
{
$pos = array_search($closeTag[1], $openTags);
if ($pos !== false)
{
array_splice($openTags, $pos, 1);
}
}
}
$truncate .= $tag[1];
$contentLength = $this->ustrlen(preg_replace('/&[0-9a-z]{2,8};|[0-9]{1,7};|[0-9a-f]{1,6};/i', ' ', $tag[3]));
if ($contentLength + $totalLength > $length)
{
$left = $length - $totalLength;
$entitiesLength = 0;
if (preg_match_all('/&[0-9a-z]{2,8};|[0-9]{1,7};|[0-9a-f]{1,6};/i', $tag[3], $entities, PREG_OFFSET_CAPTURE))
{
foreach ($entities[0] as $entity)
{
if ($entity[1] + 1 - $entitiesLength <= $left)
{
$left--;
$entitiesLength += $this->ustrlen($entity[0]);
}
else
{
break;
}
}
}
$truncate .= $this->usubstr($tag[3], 0, $left + $entitiesLength);
break;
}
$truncate .= $tag[3];
$totalLength += $contentLength;
if ($totalLength >= $length)
{
break;
}
}
if (!$exact)
{
$spacepos = $this->ustrrpos($truncate, ' ');
if (isset($spacepos))
{
$bits = $this->usubstr($truncate, $spacepos);
preg_match_all('/<\/([a-z]+)>/i', $bits, $droppedTags, PREG_SET_ORDER);
if (!empty($droppedTags))
{
foreach ($droppedTags as $closingTag)
{
if (!in_array($closingTag[1], $openTags))
{
array_unshift($openTags, $closingTag[1]);
}
}
}
$truncate = $this->usubstr($truncate, 0, $spacepos);
}
}
$truncate .= $ending;
foreach ($openTags as $tag)
{
$truncate .= '' . $tag . '>';
}
return $truncate;
}
/**
* @param string $text string to process
* @param integer $len length of characters to be truncated
* @param string $more string which will be added if truncation
* @return string Always returns text.
* @deprecated for public use. Will be made private. Use $tp->truncate() instead.
* Truncate a string of text to a maximum length $len append the string $more if it was truncated
* Uses current CHARSET for utf-8, returns $len characters rather than $len bytes
*
*/
public function text_truncate($text, $len = 200, $more = ' ... ')
{
if ($this->ustrlen($text) <= $len)
{
return $text;
}
if ($this->isBBcode($text) || $this->isHtml($text))
{
$text = $this->toText($text);
}
$text = html_entity_decode($text, ENT_QUOTES, 'utf-8');
if (function_exists('mb_strimwidth'))
{
return mb_strimwidth($text, 0, $len, $more);
}
$ret = $this->usubstr($text, 0, $len);
// search for possible broken html entities
// - if an & is in the last 8 chars, removing it and whatever follows shouldn't hurt
// it should work for any characters encoding
$leftAmp = $this->ustrrpos($this->usubstr($ret, -8), '&');
if ($leftAmp)
{
$ret = $this->usubstr($ret, 0, $this->ustrlen($ret) - 8 + $leftAmp);
}
return $ret . $more;
}
/**
* @param $text
* @param $wrap
* @return array|string|string[]
*/
public function textclean($text, $wrap = 100)
{
$text = str_replace("\n\n\n", "\n\n", $text);
$text = $this->htmlwrap($text, $wrap);
$text = str_replace(array('
', '
', '
'), '
', $text);
/* we can remove any linebreaks added by htmlwrap function as any \n's will be converted later anyway */
return $text;
}
/**
* Test for text highlighting, and determine the text highlighting transformation
* @return bool Returns TRUE if highlighting is active for this page display
*/
public function checkHighlighting()
{
global $pref;
if (!defined('e_SELF'))
{
// Still in startup, so can't calculate highlighting
return false;
}
if (!isset($this->e_highlighting))
{
$this->e_highlighting = false;
$shr = (isset($_SERVER['HTTP_REFERER']) ? $_SERVER['HTTP_REFERER'] : '');
if ($pref['search_highlight'] && (strpos(e_SELF, 'search.php') === false) && ((strpos($shr, 'q=') !== false) || (strpos($shr, 'p=') !== false)))
{
$this->e_highlighting = true;
if (!isset($this->e_query))
{
preg_match('#(q|p)=(.*?)(&|$)#', $shr, $matches);
$this->e_query = str_replace(array('+', '*', '"', ' '), array('', '.*?', '', '\b|\b'), trim(urldecode($matches[2])));
}
}
}
return $this->e_highlighting;
}
/**
* Replace text represenation of website urls and email addresses with clickable equivalents.
*
* @param string $text
* @param string $type email|url
* @param array $opts options.
* $opts = [
* 'sub' => (string) substitute text within links
* 'ext' => (bool) load link in new window (not for email)
* ]
* @return string
*/
public function makeClickable($text = '', $type = 'email', $opts = array())
{
if (empty($text))
{
return '';
}
$textReplace = (!empty($opts['sub'])) ? $opts['sub'] : '';
if (substr($textReplace, -6) === '.glyph')
{
$textReplace = $this->toGlyph($textReplace, '');
}
switch ($type)
{
default:
case 'email':
preg_match_all("#(?:[\n\r ]|^)?([a-z0-9\-_.]+?)@([\w\-]+\.([\w\-\.]+\.)*[\w]+)#i", $text, $match);
if (!empty($match[0]))
{
$srch = array();
$repl = array();
foreach ($match[0] as $eml)
{
$email = trim($eml);
$srch[] = $email;
$repl[] = $this->emailObfuscate($email, $textReplace);
}
$text = str_replace($srch, $repl, $text);
}
break;
case 'url':
$linktext = (!empty($textReplace)) ? $textReplace : '$3';
$external = (!empty($opts['ext'])) ? 'target="_blank"' : '';
$text = preg_replace("/(^|[\n \(])([\w]*?)([\w]*?:\/\/[\w]+[^ \,\"\n\r\t<]*)/is", '$1$2' . $linktext . '', $text);
$text = preg_replace("/(^|[\n \(])([\w]*?)((www)\.[^ \,\"\t\n\r\)<]*)/is", '$1$2' . $linktext . '', $text);
$text = preg_replace("/(^|[\n ])([\w]*?)((ftp)\.[^ \,\"\t\n\r<]*)/is", '$1$2' . $linktext . '', $text);
break;
}
return $text;
}
/**
* @param string $text
* @param $postID
* @return string
*/
public function parseBBCodes($text, $postID)
{
return e107::getBB()->parseBBCodes($text, $postID);
}
/**
* Strips block tags from html.
* ie.
etc are removed. * * @param string $html * @return string */ public function stripBlockTags($html) { $diff = array_diff($this->allowedTags, $this->blockTags); $parm = ''; foreach ($diff as $tag) { $parm .= '<' . $tag . '>'; } return strip_tags($html, $parm); } /** * @param $s * @param $allowedattr * @return array|mixed|string|string[] */ public function stripAttributes($s, $allowedattr = array()) { if (preg_match_all("/<[^>]*\\s([^>]*)\\/*>/msiU", $s, $res, PREG_SET_ORDER)) { foreach ($res as $r) { $tag = $r[0]; $attrs = array(); preg_match_all("/\\s.*=(['\"]).*\\1/msiU", " " . $r[1], $split, PREG_SET_ORDER); foreach ($split as $spl) { $attrs[] = $spl[0]; } $newattrs = array(); foreach ($attrs as $a) { $tmp = explode("=", $a); if (trim($a) != "" && (!isset($tmp[1]) || (trim($tmp[0]) != "" && !in_array(strtolower(trim($tmp[0])), $allowedattr)))) { } else { $newattrs[] = $a; } } $attrs = implode(" ", $newattrs); $rpl = str_replace($r[1], $attrs, $tag); $s = str_replace($tag, $rpl, $s); } } return $s; } /** * Converts the text (presumably retrieved from the database) for HTML output. * * @param string $text * @param boolean $parseBB [optional] * @param string $modifiers [optional] TITLE|SUMMARY|DESCRIPTION|BODY|RAW|LINKTEXT etc. * Comma-separated list, no spaces allowed * first modifier must be a CONTEXT modifier, in UPPER CASE. * subsequent modifiers are lower case - see $this->e_Modifiers for possible values * @param mixed $postID [optional] * @param boolean $wrap [optional] * @return string * @todo complete the documentation of this essential method */ public function toHTML($text, $parseBB = false, $modifiers = '', $postID = '', $wrap = false) { if (empty($text) || !is_string($text)) { return $text; } if (empty($this->pref)) // cache the prefs. { $prefsUsed = array('smiley_activate', 'make_clickable', 'link_replace', 'main_wordwrap', 'link_text', 'email_text', 'links_new_window', 'profanity_filter', 'tohtml_hook', 'e_tohtml_list', 'e_parse_list' ); $cfg = e107::getConfig(); foreach ($prefsUsed as $v) { $this->pref[$v] = $cfg->get($v); } } global $fromadmin; // Set default modifiers to start $opts = $this->getModifiers($modifiers); if ($this->isHtml($text)) //BC FIx for when HTML is saved without [html][/html] { $opts['nobreak'] = true; $text = trim($text); if (strpos($text, '[center]') === 0) // quick bc fix TODO Find a better solution. [center][/center] containing HTML. { $text = str_replace(array('[center]', '[/center]'), array("
", ''), $text); } } $fromadmin = $opts['fromadmin']; // Convert defines(constants) within text. eg. Lan_XXXX - must be the entire text string (i.e. not embedded) // The check for '::' is a workaround for a bug in the Zend Optimiser 3.3.0 and PHP 5.2.4 combination // - causes crashes if '::' in site name if ($opts['defs'] && (strlen($text) < 35) && ((strpos($text, '::') === false) && defined(trim($text)))) { $text = constant(trim($text)); // don't return yet, words could be hooked with linkwords etc. } if ($opts['no_tags']) { $text = strip_tags($text); } /* if(MAGIC_QUOTES_GPC === true) // precaution for badly saved data. { $text = stripslashes($text); } */ // $text = " ".$text; // Now get on with the parsing $ret_parser = ''; $last_bbcode = ''; // So we can change them on each loop $saveOpts = $opts; if ($parseBB == false) { $content = array($text); } else { // Split each text block into bits which are either within one of the 'key' bbcodes, or outside them // (Because we have to match end words, the 'extra' capturing subpattern gets added to output array. We strip it later) $content = preg_split('#(\[(table|html|php|code|scode|hide).*?\[\/(?:\\2)\])#mis', $text, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE); } // Use $full_text variable so its available to special bbcodes if required foreach ($content as $full_text) { $proc_funcs = true; $convertNL = true; // We may have 'captured' a bbcode word - strip it if so if ($last_bbcode == $full_text) { $last_bbcode = ''; $proc_funcs = false; $full_text = ''; } else { // Set the options for this pass $opts = $saveOpts; // Have to have a good test in case a 'non-key' bbcode starts the block // - so pull out the bbcode parameters while we're there if (($parseBB !== false) && preg_match('#(^\[(table|html|php|code|scode|hide)(.*?)\])(.*?)(\[/\\2\]$)#is', $full_text, $matches)) { $proc_funcs = false; $full_text = ''; $code_text = $matches[4]; // $parm = $matches[3] ? substr($matches[3], 1) : ''; $last_bbcode = $matches[2]; switch ($matches[2]) { case 'php' : $proc_funcs = false; $code_text = ''; break; case 'html' : // This overrides and deprecates html.bb $proc_funcs = true; // $code_text = str_replace("\r\n", " ", $code_text); // $code_text = html_entity_decode($code_text, ENT_QUOTES, CHARSET); // $code_text = str_replace('&','&',$code_text); // validation safe. $html_start = ''; // markers for html-to-bbcode replacement. $html_end = ''; $full_text = str_replace(array('[html]', '[/html]'), '', $code_text); // quick fix.. security issue? $full_text = $this->parseBBCodes($full_text, $postID); // parse any embedded bbcodes eg. [img] $full_text = $this->replaceConstants($full_text, 'abs'); // parse any other paths using {e_.... $full_text = $html_start . $full_text . $html_end; $full_text = $this->parseBBTags($full_text); // striptags. $opts['nobreak'] = true; $parseBB = false; // prevent further bbcode processing. break; case 'table' : // strip
from inside of$convertNL = false; // break; case 'hide' : $proc_funcs = true; case 'scode': case 'code' : $full_text = $this->parseBBCodes($matches[0], $postID); break; } } } // Do the 'normal' processing - in principle, as previously - but think about the order. if ($proc_funcs && !empty($full_text)) // some more speed { // Split out and ignore any scripts and style blocks. With just two choices we can match the closing tag in the regex $subcon = preg_split('#((?:
]+>.*?|tyle[^>]+>.*?))#mis', $full_text, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE); foreach ($subcon as $sub_blk) { if (strpos($sub_blk, '') !== false)) { $lan1 = defset('LAN_NO_SCRIPT_ACCESS', "You don't have permission to use [script] tags."); $lan2 = defset('', "If you believe this is an error, please ask the main administrator to grant you script access via [b]Preferences > Content Filters[/b]"); $srch = ['[', ']']; $repl = ['<', '>']; e107::getMessage()->addWarning(str_replace($srch,$repl,$lan1)); e107::getMessage()->addWarning(e107::getParser()->toHTML($lan2,true)); } // Set it up for processing. libxml_use_internal_errors(true); $html = mb_encode_numericentity($html, [0x80, 0xffff, 0, 0xffff], 'UTF-8'); // $fragment = $doc->createDocumentFragment(); // $fragment->appendXML($html); // $doc->appendChild($fragment); // $doc->encoding = 'utf-8'; $doc = $this->domObj; $opts = defined('LIBXML_HTML_NOIMPLIED') ? LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD : 0; $doc->loadHTML($html, $opts); $this->nodesToConvert = array(); // required. $this->nodesToDelete = array(); // required. $this->removedList = array(); $tmp = $doc->getElementsByTagName('*'); /** @var DOMElement $node */ foreach ($tmp as $node) { $path = $node->getNodePath(); // echo "
Path = ".$path; // $tag = strval(basename($path)); if (strpos($path, '/code') !== false || strpos($path, '/pre') !== false) // treat as html. { $this->pathList[] = $path; // $this->nodesToConvert[] = $node->parentNode; // $node; $this->nodesToDisableSC[] = $node; continue; } $tag = preg_replace('/([a-z0-9\[\]\/]*)?\/([\w\-]*)(\[(\d)*\])?$/i', '$2', $path); if (!in_array($tag, $this->allowedTags)) { $this->removedList['tags'][] = $tag; $this->nodesToDelete[] = $node; continue; } $removeAttributes = array(); foreach ($node->attributes as $attr) { $name = $attr->nodeName; $value = $attr->nodeValue; $allow = isset($this->allowedAttributes[$tag]) ? $this->allowedAttributes[$tag] : $this->allowedAttributes['default']; if (!in_array($name, $allow)) { if ($this->scriptAccess == true && strpos($name, 'data-') === 0) { continue; } $removeAttributes[] = $name; //$node->removeAttribute($name); $this->removedList['attributes'][] = $name . ' from <' . $tag . '>'; continue; } if ($this->invalidAttributeValue($value)) // Check value against blacklisted values. { //$node->removeAttribute($name); $node->setAttribute($name, '#---sanitized---#'); $this->removedList['sanitized'][] = $tag . '[' . $name . ']'; } else { $_value = $this->secureAttributeValue($name, $value); $node->setAttribute($name, $_value); if ($_value !== $value) { $this->removedList['sanitized'][] = $tag . '[' . $name . '] converted "' . $value . '" -> "' . $_value . '"'; } } } // required - removing attributes in a loop breaks the loop if (!empty($removeAttributes)) { foreach ($removeAttributes as $name) { $node->removeAttribute($name); } } } // Remove some stuff. foreach ($this->nodesToDelete as $node) { $node->parentNode->removeChild($node); } // Disable Shortcodes in pre/code foreach ($this->nodesToDisableSC as $key => $node) { $value = $node->C14N(); if (empty($value)) { continue; } $value = str_replace(' ', "\r", $value); if ($node->nodeName === 'pre') { $value = preg_replace('/^]*>/', '', $value); $value = str_replace(array('', '
'), array('', '__E_PARSER_CLEAN_HTML_LINE_BREAK__'), $value); } elseif ($node->nodeName === 'code') { $value = preg_replace('/^]*>/', '', $value); $value = str_replace(array('
', '
'), array('', '__E_PARSER_CLEAN_HTML_LINE_BREAK__'), $value); } // temporarily change {e_XXX} to {{{e_XXX}}} $value = str_replace(array('__E_PARSER_CLEAN_HTML_CURLY_OPEN__', '__E_PARSER_CLEAN_HTML_CURLY_CLOSED__'), array('{{{', '}}}'), $value); // temporarily change {e_XXX} to {{{e_XXX}}} $newNode = $doc->createElement($node->nodeName); $newNode->nodeValue = $value; if ($class = $node->getAttribute('class')) { $newNode->setAttribute('class', $class); } if ($style = $node->getAttribute('style')) { $newNode->setAttribute('style', $style); } $node->parentNode->replaceChild($newNode, $node); } // Convertand
"); $value = substr($value,0,$end); } $value = htmlentities(htmlentities($value)); // Needed $node->nodeValue = $value; } */ $cleaned = $doc->saveHTML($doc->documentElement); // $doc->documentElement fixes utf-8/entities issue. @see http://stackoverflow.com/questions/8218230/php-domdocument-loadhtml-not-encoding-utf-8-correctly $cleaned = str_replace( array("\n", '__E_PARSER_CLEAN_HTML_LINE_BREAK__', '__E_PARSER_CLEAN_HTML_NON_BREAKING_SPACE__', '{{{', '}}}', '__E_PARSER_CLEAN_HTML_CURLY_OPEN__', '__E_PARSER_CLEAN_HTML_CURLY_CLOSED__', '', '', '', ''), array('', "\n", ' ', '{', '}', '{', '}', '', '', '', ''), $cleaned ); // filter out tags. return trim($cleaned); } /** * @param $attribute * @param $value * @return array|mixed|string|string[] */ public function secureAttributeValue($attribute, $value) { $search = isset($this->replaceAttrValues[$attribute]) ? $this->replaceAttrValues[$attribute] : $this->replaceAttrValues['default']; if (!empty($search)) { $value = str_replace($search, '', $value); } return $value; } /** * Check for Invalid Attribute Values * * @param $value string * @return bool true/false */ public function invalidAttributeValue($value) { foreach ($this->badAttrValues as $v) // global list because a bad value is bad regardless of the attribute it's in. ;-) { if (preg_match('/' . $v . '/i', $value) == true) { $this->removedList['blacklist'][] = "Match found for '{$v}' in '{$value}'"; return true; } } return false; } /** * @param $modifiers * @return array */ private function getModifiers($modifiers) { $opts = $this->e_optDefault; if (strpos($modifiers, 'defaults_off') !== false) { $opts = $this->e_SuperMods['NODEFAULT']; } // Now process any modifiers that are specified $aMods = explode(',', $modifiers); // If there's a supermodifier, it must be first, and in uppercase $psm = trim($aMods[0]); if (isset($this->e_SuperMods[$psm])) { // Supermodifier found - override default values where necessary $opts = array_merge($opts, $this->e_SuperMods[$psm]); $opts['context'] = $psm; unset($aMods[0]); } // Now find any regular modifiers; use them to modify the context // (there should only be one or two out of the list of possibles) foreach ($aMods as $mod) { // Slight concession to varying coding styles - stripping spaces is a waste of CPU cycles! $mod = trim($mod); if (isset($this->e_Modifiers[$mod])) { // This is probably quicker than array_merge // - especially as usually only one or two loops foreach ($this->e_Modifiers[$mod] as $k => $v) { // Update our context-specific options $opts[$k] = $v; } } } // Turn off a few things if not enabled in options if (empty($this->pref['smiley_activate'])) { $opts['emotes'] = false; } if (empty($this->pref['make_clickable'])) { $opts['link_click'] = false; } if (empty($this->pref['link_replace'])) { $opts['link_replace'] = false; } return $opts; } /** * @param array $opts * @param string $text * @param bool $convertNL * @param bool|string $parseBB * @param $modifiers * @param int $postID * @return array|bool|mixed|string|null */ private function processModifiers($opts, $text, $convertNL, $parseBB, $modifiers, $postID) { if ($opts['link_click']) { if ($opts['link_replace'] && defset('ADMIN_AREA') !== true) { $link_text = $this->pref['link_text']; $email_text = ($this->pref['email_text']) ? $this->replaceConstants($this->pref['email_text']) : LAN_EMAIL_SUBS; $text = $this->makeClickable($text, 'url', array('sub' => $link_text, 'ext' => $this->pref['links_new_window'])); $text = $this->makeClickable($text, 'email', array('sub' => $email_text)); } else { $text = $this->makeClickable($text, 'url', array('ext' => true)); $text = $this->makeClickable($text, 'email'); } } // Convert emoticons to graphical icons, if enabled if ($opts['emotes']) { $text = e107::getEmote()->filterEmotes($text); } // Reduce newlines in all forms to a single newline character (finds '\n', '\r\n', '\n\r') if (!$opts['nobreak']) { if ($convertNL && ($this->preformatted($text) === false)) // eg. html or markdown { // We may need to convert toTags to Htmlentities. /* TODO XXX Still necessary? Perhaps using bbcodes only? foreach($this->nodesToConvert as $node) { $value = $node->C14N(); $value = str_replace(" ","",$value); // print_a("WOWOWO"); if($node->nodeName == 'pre') { $value = substr($value,5); $end = strrpos($value,""); $value = substr($value,0,$end); } if($node->nodeName == 'code') { $value = substr($value,6); $end = strrpos($value,"
later $text = preg_replace("#[\r]*\n[\r]*#", E_NL, $text); } else { // Not doing any more - its HTML or Markdown so keep it as is. $text = preg_replace("#[\r]*\n[\r]*#", "\n", $text); } } // Entity conversion // Restore entity form of quotes and such to single characters, except for text destined for tag attributes or JS. if ($opts['value']) { // output used for attribute values. $text = str_replace($this->replace, $this->search, $text); } else { // output not used for attribute values. $text = str_replace($this->search, $this->replace, $text); } // BBCode processing (other than the four already done, which shouldn't appear at all in the text) if ($parseBB !== false) { if ($parseBB === true) { // 'Normal' or 'legacy' processing if ($modifiers === 'WYSIWYG') { $text = e107::getBB()->parseBBCodes($text, $postID, 'wysiwyg'); } else { $text = e107::getBB()->parseBBCodes($text, $postID); } } elseif ($parseBB === 'STRIP') // Need to strip all BBCodes { $text = e107::getBB()->parseBBCodes($text, $postID, 'default', true); } else // Need to strip just some BBCodes { $text = e107::getBB()->parseBBCodes($text, $postID, 'default', $parseBB); } } // replace all {e_XXX} constants with their e107 value. modifier determines relative/absolute conversion // (Moved to after bbcode processing by Cameron) if ($opts['constants']) { $text = $this->replaceConstants($text, $opts['constants']); // Now decodes text values } // profanity filter if ($this->pref['profanity_filter']) { $text = e107::getProfanity()->filterProfanities($text); } // Optional short-code conversion if ($opts['parse_sc']) { $text = $this->parseTemplate($text, true); } /** * / @deprecated */ if ($opts['hook']) //Run any hooked in parsers { if (!empty($this->pref['tohtml_hook'])) { // trigger_error('tohtml_hook is deprecated. Use e_parse.php instead.', E_USER_DEPRECATED); // NO LAN //Process the older tohtml_hook pref (deprecated) foreach (explode(',', $this->pref['tohtml_hook']) as $hook) { if (!is_object($this->e_hook[$hook]) && is_readable(e_PLUGIN . $hook . '/' . $hook . '.php')) { require_once(e_PLUGIN . $hook . '/' . $hook . '.php'); $hook_class = 'e_' . $hook; $this->e_hook[$hook] = new $hook_class; } if (is_object($this->e_hook[$hook])) // precaution for old plugins. { $text = $this->e_hook[$hook]->$hook($text, $opts['context']); } } } /** * / @deprecated */ if (isset($this->pref['e_tohtml_list']) && is_array($this->pref['e_tohtml_list'])) { foreach ($this->pref['e_tohtml_list'] as $hook) { if (empty($hook)) { continue; } if (empty($this->e_hook[$hook]) && is_readable(e_PLUGIN . $hook . '/e_tohtml.php') /*&& !is_object($this->e_hook[$hook])*/) { require_once(e_PLUGIN . $hook . '/e_tohtml.php'); $hook_class = 'e_tohtml_' . $hook; if (class_exists($hook_class)) { $this->e_hook[$hook] = new $hook_class; } } if (isset($this->e_hook[$hook]) && is_object($this->e_hook[$hook])) { /** @var e_tohtml_linkwords $deprecatedHook */ $deprecatedHook = $this->e_hook[$hook]; $text = $deprecatedHook->to_html($text, $opts['context']); } } } /** * / Preferred 'hook' */ if (!empty($this->pref['e_parse_list'])) { foreach ($this->pref['e_parse_list'] as $plugin) { $hookObj = e107::getAddon($plugin, 'e_parse'); if ($tmp = e107::callMethod($hookObj, 'toHTML', $text, $opts['context'])) { $text = $tmp; } } } } // Word wrap if (!empty($this->pref['main_wordwrap']) && !$opts['nobreak']) { $text = $this->textclean($text, $this->pref['main_wordwrap']); } // Search highlighting if ($opts['emotes'] && $this->checkHighlighting()) // Why?? { $text = $this->e_highlight($text, $this->e_query); } if ($convertNL == true) { // Default replaces all \n with
for HTML display $nl_replace = '
'; if ($opts['nobreak']) { $nl_replace = ''; } elseif ($opts['retain_nl']) { $nl_replace = "\n"; } $text = str_replace(E_NL, $nl_replace, $text); } return $text; } }