$lan1 = defset('LAN_NO_SCRIPT_ACCESS', "You don't have permission to use [script] tags.");
$lan2 = defset('', "If you believe this is an error, please ask the main administrator to grant you script access via [b]Preferences > Content Filters[/b]");
$srch = ['[', ']'];
$repl = ['<', '>'];
// Set it up for processing.
if (function_exists('mb_convert_encoding'))
$html = mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8');
// $fragment = $doc->createDocumentFragment();
// $fragment->appendXML($html);
// $doc->appendChild($fragment);
// $doc->encoding = 'utf-8';
$doc = $this->domObj;
$doc->loadHTML($html, $opts);
$this->nodesToConvert = array(); // required.
$this->nodesToDelete = array(); // required.
$this->removedList = array();
$tmp = $doc->getElementsByTagName('*');
/** @var DOMElement $node */
foreach ($tmp as $node)
$path = $node->getNodePath();
// echo "
Path = ".$path;
// $tag = strval(basename($path));
if (strpos($path, '/code') !== false || strpos($path, '/pre') !== false) // treat as html.
$this->pathList[] = $path;
// $this->nodesToConvert[] = $node->parentNode; // $node;
$this->nodesToDisableSC[] = $node;
$tag = preg_replace('/([a-z0-9\[\]\/]*)?\/([\w\-]*)(\[(\d)*\])?$/i', '$2', $path);
if (!in_array($tag, $this->allowedTags))
$this->removedList['tags'][] = $tag;
$this->nodesToDelete[] = $node;
$removeAttributes = array();
foreach ($node->attributes as $attr)
$name = $attr->nodeName;
$value = $attr->nodeValue;
$allow = isset($this->allowedAttributes[$tag]) ? $this->allowedAttributes[$tag] : $this->allowedAttributes['default'];
if (!in_array($name, $allow))
if ($this->scriptAccess == true && strpos($name, 'data-') === 0)
$removeAttributes[] = $name;
$this->removedList['attributes'][] = $name . ' from <' . $tag . '>';
if ($this->invalidAttributeValue($value)) // Check value against blacklisted values.
$node->setAttribute($name, '#---sanitized---#');
$this->removedList['sanitized'][] = $tag . '[' . $name . ']';
$_value = $this->secureAttributeValue($name, $value);
$node->setAttribute($name, $_value);
if ($_value !== $value)
$this->removedList['sanitized'][] = $tag . '[' . $name . '] converted "' . $value . '" -> "' . $_value . '"';
// required - removing attributes in a loop breaks the loop
if (!empty($removeAttributes))
foreach ($removeAttributes as $name)
// Remove some stuff.
foreach ($this->nodesToDelete as $node)
// Disable Shortcodes in pre/code
foreach ($this->nodesToDisableSC as $key => $node)
$value = $node->C14N();
if (empty($value))
$value = str_replace('
', "\r", $value);
if ($node->nodeName === 'pre')
$value = preg_replace('/^]*>/', '', $value);
$value = str_replace(array('
', '
'), array('', '__E_PARSER_CLEAN_HTML_LINE_BREAK__'), $value);
elseif ($node->nodeName === 'code')
$value = preg_replace('/^]*>/', '', $value);
$value = str_replace(array('
', '
'), array('', '__E_PARSER_CLEAN_HTML_LINE_BREAK__'), $value);
// temporarily change {e_XXX} to {{{e_XXX}}}
$value = str_replace(array('__E_PARSER_CLEAN_HTML_CURLY_OPEN__', '__E_PARSER_CLEAN_HTML_CURLY_CLOSED__'), array('{{{', '}}}'), $value); // temporarily change {e_XXX} to {{{e_XXX}}}
$newNode = $doc->createElement($node->nodeName);
$newNode->nodeValue = $value;
if ($class = $node->getAttribute('class'))
$newNode->setAttribute('class', $class);
if ($style = $node->getAttribute('style'))
$newNode->setAttribute('style', $style);
$node->parentNode->replaceChild($newNode, $node);
// Convert and Tags to Htmlentities.
/* TODO XXX Still necessary? Perhaps using bbcodes only?
foreach($this->nodesToConvert as $node)
$value = $node->C14N();
$value = str_replace("
// print_a("WOWOWO");
if($node->nodeName == 'pre')
$value = substr($value,5);
$end = strrpos($value,"
$value = substr($value,0,$end);
if($node->nodeName == 'code')
$value = substr($value,6);
$end = strrpos($value,"
$value = substr($value,0,$end);
$value = htmlentities(htmlentities($value)); // Needed
$node->nodeValue = $value;
$cleaned = $doc->saveHTML($doc->documentElement); // $doc->documentElement fixes utf-8/entities issue. @see http://stackoverflow.com/questions/8218230/php-domdocument-loadhtml-not-encoding-utf-8-correctly
$cleaned = str_replace(
array('', "\n", ' ', '{', '}', '{', '}', '', '', '', ''),
); // filter out tags.
return trim($cleaned);
* @param $attribute
* @param $value
* @return array|mixed|string|string[]
public function secureAttributeValue($attribute, $value)
$search = isset($this->replaceAttrValues[$attribute]) ? $this->replaceAttrValues[$attribute] : $this->replaceAttrValues['default'];
if (!empty($search))
$value = str_replace($search, '', $value);
return $value;
* Check for Invalid Attribute Values
* @param $value string
* @return bool true/false
public function invalidAttributeValue($value)
foreach ($this->badAttrValues as $v) // global list because a bad value is bad regardless of the attribute it's in. ;-)
if (preg_match('/' . $v . '/i', $value) == true)
$this->removedList['blacklist'][] = "Match found for '{$v}' in '{$value}'";
return true;
return false;
* @param $modifiers
* @return array
private function getModifiers($modifiers)
$opts = $this->e_optDefault;
if (strpos($modifiers, 'defaults_off') !== false)
$opts = $this->e_SuperMods['NODEFAULT'];
// Now process any modifiers that are specified
$aMods = explode(',', $modifiers);
// If there's a supermodifier, it must be first, and in uppercase
$psm = trim($aMods[0]);
if (isset($this->e_SuperMods[$psm]))
// Supermodifier found - override default values where necessary
$opts = array_merge($opts, $this->e_SuperMods[$psm]);
$opts['context'] = $psm;
// Now find any regular modifiers; use them to modify the context
// (there should only be one or two out of the list of possibles)
foreach ($aMods as $mod)
// Slight concession to varying coding styles - stripping spaces is a waste of CPU cycles!
$mod = trim($mod);
if (isset($this->e_Modifiers[$mod]))
// This is probably quicker than array_merge
// - especially as usually only one or two loops
foreach ($this->e_Modifiers[$mod] as $k => $v)
// Update our context-specific options
$opts[$k] = $v;
// Turn off a few things if not enabled in options
if (empty($this->pref['smiley_activate']))
$opts['emotes'] = false;
if (empty($this->pref['make_clickable']))
$opts['link_click'] = false;
if (empty($this->pref['link_replace']))
$opts['link_replace'] = false;
return $opts;
* @param array $opts
* @param string $text
* @param bool $convertNL
* @param bool|string $parseBB
* @param $modifiers
* @param int $postID
* @return array|bool|mixed|string|null
private function processModifiers($opts, $text, $convertNL, $parseBB, $modifiers, $postID)
if ($opts['link_click'])
if ($opts['link_replace'] && defset('ADMIN_AREA') !== true)
$link_text = $this->pref['link_text'];
$email_text = ($this->pref['email_text']) ? $this->replaceConstants($this->pref['email_text']) : LAN_EMAIL_SUBS;
$text = $this->makeClickable($text, 'url', array('sub' => $link_text, 'ext' => $this->pref['links_new_window']));
$text = $this->makeClickable($text, 'email', array('sub' => $email_text));
$text = $this->makeClickable($text, 'url', array('ext' => true));
$text = $this->makeClickable($text, 'email');
// Convert emoticons to graphical icons, if enabled
if ($opts['emotes'])
$text = e107::getEmote()->filterEmotes($text);
// Reduce newlines in all forms to a single newline character (finds '\n', '\r\n', '\n\r')
if (!$opts['nobreak'])
if ($convertNL && ($this->preformatted($text) === false)) // eg. html or markdown
// We may need to convert to
$text = preg_replace("#[\r]*\n[\r]*#", E_NL, $text);
// Not doing any more - its HTML or Markdown so keep it as is.
$text = preg_replace("#[\r]*\n[\r]*#", "\n", $text);
// Entity conversion
// Restore entity form of quotes and such to single characters, except for text destined for tag attributes or JS.
if ($opts['value'])
// output used for attribute values.
$text = str_replace($this->replace, $this->search, $text);
// output not used for attribute values.
$text = str_replace($this->search, $this->replace, $text);
// BBCode processing (other than the four already done, which shouldn't appear at all in the text)
if ($parseBB !== false)
if ($parseBB === true)
// 'Normal' or 'legacy' processing
if ($modifiers === 'WYSIWYG')
$text = e107::getBB()->parseBBCodes($text, $postID, 'wysiwyg');
$text = e107::getBB()->parseBBCodes($text, $postID);
elseif ($parseBB === 'STRIP') // Need to strip all BBCodes
$text = e107::getBB()->parseBBCodes($text, $postID, 'default', true);
else // Need to strip just some BBCodes
$text = e107::getBB()->parseBBCodes($text, $postID, 'default', $parseBB);
// replace all {e_XXX} constants with their e107 value. modifier determines relative/absolute conversion
// (Moved to after bbcode processing by Cameron)
if ($opts['constants'])
$text = $this->replaceConstants($text, $opts['constants']); // Now decodes text values
// profanity filter
if ($this->pref['profanity_filter'])
$text = e107::getProfanity()->filterProfanities($text);
// Optional short-code conversion
if ($opts['parse_sc'])
$text = $this->parseTemplate($text, true);
* / @deprecated
if ($opts['hook']) //Run any hooked in parsers
if (!empty($this->pref['tohtml_hook']))
// trigger_error('tohtml_hook is deprecated. Use e_parse.php instead.', E_USER_DEPRECATED); // NO LAN
//Process the older tohtml_hook pref (deprecated)
foreach (explode(',', $this->pref['tohtml_hook']) as $hook)
if (!is_object($this->e_hook[$hook]) && is_readable(e_PLUGIN . $hook . '/' . $hook . '.php'))
require_once(e_PLUGIN . $hook . '/' . $hook . '.php');
$hook_class = 'e_' . $hook;
$this->e_hook[$hook] = new $hook_class;
if (is_object($this->e_hook[$hook])) // precaution for old plugins.
$text = $this->e_hook[$hook]->$hook($text, $opts['context']);
* / @deprecated
if (isset($this->pref['e_tohtml_list']) && is_array($this->pref['e_tohtml_list']))
foreach ($this->pref['e_tohtml_list'] as $hook)
if (empty($hook))
if (empty($this->e_hook[$hook]) && is_readable(e_PLUGIN . $hook . '/e_tohtml.php') /*&& !is_object($this->e_hook[$hook])*/)
require_once(e_PLUGIN . $hook . '/e_tohtml.php');
$hook_class = 'e_tohtml_' . $hook;
if (class_exists($hook_class))
$this->e_hook[$hook] = new $hook_class;
if (isset($this->e_hook[$hook]) && is_object($this->e_hook[$hook]))
/** @var e_tohtml_linkwords $deprecatedHook */
$deprecatedHook = $this->e_hook[$hook];
$text = $deprecatedHook->to_html($text, $opts['context']);
* / Preferred 'hook'
if (!empty($this->pref['e_parse_list']))
foreach ($this->pref['e_parse_list'] as $plugin)
$hookObj = e107::getAddon($plugin, 'e_parse');
if ($tmp = e107::callMethod($hookObj, 'toHTML', $text, $opts['context']))
$text = $tmp;
// Word wrap
if (!empty($this->pref['main_wordwrap']) && !$opts['nobreak'])
$text = $this->textclean($text, $this->pref['main_wordwrap']);
// Search highlighting
if ($opts['emotes'] && $this->checkHighlighting()) // Why??
$text = $this->e_highlight($text, $this->e_query);
if ($convertNL == true)
// Default replaces all \n with
for HTML display
$nl_replace = '
if ($opts['nobreak'])
$nl_replace = '';
elseif ($opts['retain_nl'])
$nl_replace = "\n";
$text = str_replace(E_NL, $nl_replace, $text);
return $text;