mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-07-31 03:10:09 +02:00
[2.1.0] True emoticon < fix.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1260 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
@@ -3,6 +3,16 @@
|
||||
require_once 'HTMLPurifier/Lexer.php';
|
||||
require_once 'HTMLPurifier/TokenFactory.php';
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Core', 'AggressivelyFixLt', false, 'bool', '
|
||||
This directive enables aggressive pre-filter fixes HTML Purifier can
|
||||
perform in order to ensure that open angled-brackets do not get killed
|
||||
during parsing stage. Enabling this will result in two preg_replace_callback
|
||||
calls and one preg_replace call for every bit of HTML passed through here.
|
||||
It is not necessary and will have no effect for PHP 4.
|
||||
This directive has been available since 2.1.0.
|
||||
');
|
||||
|
||||
/**
|
||||
* Parser that uses PHP 5's DOM extension (part of the core).
|
||||
*
|
||||
@@ -42,6 +52,16 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
||||
|
||||
$html = $this->normalize($html, $config, $context);
|
||||
|
||||
// attempt to armor stray angled brackets that cannot possibly
|
||||
// form tags and thus are probably being used as emoticons
|
||||
if ($config->get('Core', 'AggressivelyFixLt')) {
|
||||
$char = '[^a-z!\/]';
|
||||
$comment = "/<!--(.*?)(-->|\z)/is";
|
||||
$html = preg_replace_callback($comment, array('HTMLPurifier_Lexer_DOMLex', 'callbackArmorCommentEntities'), $html);
|
||||
$html = preg_replace("/<($char)/i", '<\\1', $html);
|
||||
$html = preg_replace_callback($comment, array('HTMLPurifier_Lexer_DOMLex', 'callbackUndoCommentSubst'), $html); // fix comments
|
||||
}
|
||||
|
||||
// preprocess html, essential for UTF-8
|
||||
$html =
|
||||
'<!DOCTYPE html '.
|
||||
@@ -151,5 +171,21 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
||||
*/
|
||||
public function muteErrorHandler($errno, $errstr) {}
|
||||
|
||||
/**
|
||||
* Callback function for undoing escaping of stray angled brackets
|
||||
* in comments
|
||||
*/
|
||||
function callbackUndoCommentSubst($matches) {
|
||||
return '<!--' . strtr($matches[1], array('&'=>'&','<'=>'<')) . $matches[2];
|
||||
}
|
||||
|
||||
/**
|
||||
* Callback function that entity-izes ampersands in comments so that
|
||||
* callbackUndoCommentSubst doesn't clobber them
|
||||
*/
|
||||
function callbackArmorCommentEntities($matches) {
|
||||
return '<!--' . str_replace('&', '&', $matches[1]) . $matches[2];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@@ -204,7 +204,8 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||
// Check leading character is alnum, if not, we may
|
||||
// have accidently grabbed an emoticon. Translate into
|
||||
// text and go our merry way
|
||||
if (!ctype_alnum($segment[0])) {
|
||||
if (!ctype_alpha($segment[0])) {
|
||||
// XML: $segment[0] !== '_' && $segment[0] !== ':'
|
||||
if ($e) $e->send(E_NOTICE, 'Lexer: Unescaped lt');
|
||||
$token = new
|
||||
HTMLPurifier_Token_Text(
|
||||
|
Reference in New Issue
Block a user