diff --git a/NEWS b/NEWS index beef6b20..de383ae1 100644 --- a/NEWS +++ b/NEWS @@ -36,6 +36,9 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier decoding entities that are missing trailing semicolon. To get old behavior, set %Core.LegacyEntityDecoder to true. (#119) +- Workaround libxml bug when HTML tags are embedded inside + script tags. To disable workaround set %Core.AggressivelyRemoveScript + to false. (#83) # By default, when a link has a target attribute associated with it, we now also add rel="noopener" in order to prevent the new window from being able to overwrite diff --git a/configdoc/usage.xml b/configdoc/usage.xml index 49bddaa5..de395b8d 100644 --- a/configdoc/usage.xml +++ b/configdoc/usage.xml @@ -6,7 +6,7 @@ 85 - 322 + 326 67 @@ -124,7 +124,7 @@ 122 - 304 + 308 @@ -172,7 +172,8 @@ 234 - 309 + 313 + 351 37 @@ -260,14 +261,25 @@ 62 + + + 215 + 337 + + - 320 + 324 - 343 + 347 + + + + + 351 diff --git a/library/HTMLPurifier/ConfigSchema/schema.ser b/library/HTMLPurifier/ConfigSchema/schema.ser index df8c5c46..371e948f 100644 Binary files a/library/HTMLPurifier/ConfigSchema/schema.ser and b/library/HTMLPurifier/ConfigSchema/schema.ser differ diff --git a/library/HTMLPurifier/ConfigSchema/schema/Core.AggressivelyRemoveScript.txt b/library/HTMLPurifier/ConfigSchema/schema/Core.AggressivelyRemoveScript.txt new file mode 100644 index 00000000..b2b6ab14 --- /dev/null +++ b/library/HTMLPurifier/ConfigSchema/schema/Core.AggressivelyRemoveScript.txt @@ -0,0 +1,16 @@ +Core.AggressivelyRemoveScript +TYPE: bool +VERSION: 4.9.0 +DEFAULT: true +--DESCRIPTION-- +

+ This directive enables aggressive pre-filter removal of + script tags. This is not necessary for security, + but it can help work around a bug in libxml where embedded + HTML elements inside script sections cause the parser to + choke. To revert to pre-4.9.0 behavior, set this to false. + This directive has no effect if %Core.Trusted is true, + %Core.RemoveScriptContents is false, or %Core.HiddenElements + does not contain script. +

+--# vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/Lexer.php b/library/HTMLPurifier/Lexer.php index 37174eae..0fc048f6 100644 --- a/library/HTMLPurifier/Lexer.php +++ b/library/HTMLPurifier/Lexer.php @@ -348,6 +348,12 @@ class HTMLPurifier_Lexer $html = preg_replace('#<\?.+?\?>#s', '', $html); } + if ($config->get('Core.AggressivelyRemoveScript') && + !($config->get('HTML.Trusted') || !$config->get('Core.RemoveScriptContents') + || empty($config->get('Core.HiddenElements')["script"]))) { + $html = preg_replace('#]*>.*?#i', '', $html); + } + return $html; }