diff --git a/NEWS b/NEWS
index f08163d0..617d6a17 100644
--- a/NEWS
+++ b/NEWS
@@ -10,6 +10,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
==========================
4.1.2, unknown release date
+! Added %Core.RemoveProcessingInstructions, which lets you remove
+ ... ?> statements.
- Fix improper handling of Internet Explorer conditional comments
by parser. Thanks zmonteca for reporting.
diff --git a/configdoc/usage.xml b/configdoc/usage.xml
index ec5b4166..444c3ad5 100644
--- a/configdoc/usage.xml
+++ b/configdoc/usage.xml
@@ -6,7 +6,7 @@
81
- 269
+ 282
53
@@ -149,7 +149,7 @@
202
- 258
+ 269
27
@@ -211,7 +211,12 @@
- 267
+ 280
+
+
+
+
+ 301
diff --git a/library/HTMLPurifier/ConfigSchema/schema.ser b/library/HTMLPurifier/ConfigSchema/schema.ser
index 22b8d54a..ac93a0c4 100644
Binary files a/library/HTMLPurifier/ConfigSchema/schema.ser and b/library/HTMLPurifier/ConfigSchema/schema.ser differ
diff --git a/library/HTMLPurifier/ConfigSchema/schema/Core.RemoveProcessingInstructions.txt b/library/HTMLPurifier/ConfigSchema/schema/Core.RemoveProcessingInstructions.txt
new file mode 100644
index 00000000..0a6d4ec1
--- /dev/null
+++ b/library/HTMLPurifier/ConfigSchema/schema/Core.RemoveProcessingInstructions.txt
@@ -0,0 +1,11 @@
+Core.RemoveProcessingInstructions
+TYPE: bool
+VERSION: 4.1.2
+DEFAULT: false
+--DESCRIPTION--
+Instead of escaping processing instructions in the form <? ...
+?>
, remove it out-right. This may be useful if the HTML
+you are validating contains XML processing instruction gunk, however,
+it can also be user-unfriendly for people attempting to post PHP
+snippets.
+--# vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/Lexer.php b/library/HTMLPurifier/Lexer.php
index e3522009..f853421a 100644
--- a/library/HTMLPurifier/Lexer.php
+++ b/library/HTMLPurifier/Lexer.php
@@ -297,6 +297,11 @@ class HTMLPurifier_Lexer
// represent non-SGML characters (horror, horror!)
$html = HTMLPurifier_Encoder::cleanUTF8($html);
+ // if processing instructions are to removed, remove them now
+ if ($config->get('Core.RemoveProcessingInstructions')) {
+ $html = preg_replace('#<\?.+?\?>#s', '', $html);
+ }
+
return $html;
}
diff --git a/tests/HTMLPurifier/LexerTest.php b/tests/HTMLPurifier/LexerTest.php
index e6b0e0fb..0cb95155 100644
--- a/tests/HTMLPurifier/LexerTest.php
+++ b/tests/HTMLPurifier/LexerTest.php
@@ -717,6 +717,14 @@ div {}
);
}
+ function test_tokenizeHTML_removeProcessingInstruction() {
+ $this->config->set('Core.RemoveProcessingInstructions', true);
+ $this->assertTokenization(
+ '',
+ array()
+ );
+ }
+
/*
function test_tokenizeHTML_() {