diff --git a/library/HTMLPurifier/Encoder.php b/library/HTMLPurifier/Encoder.php index fef9b589..054986ea 100644 --- a/library/HTMLPurifier/Encoder.php +++ b/library/HTMLPurifier/Encoder.php @@ -255,7 +255,7 @@ class HTMLPurifier_Encoder // 7F-9F is not strictly prohibited by XML, // but it is non-SGML, and thus we don't allow it (0xA0 <= $mUcs4 && 0xD7FF >= $mUcs4) || - (0x10000 <= $mUcs4 && 0x10FFFF >= $mUcs4) + (0xE000 <= $mUcs4 && 0x10FFFF >= $mUcs4) ) ) { $out .= $char; diff --git a/tests/HTMLPurifier/EncoderTest.php b/tests/HTMLPurifier/EncoderTest.php index 819d4b11..c43e4240 100644 --- a/tests/HTMLPurifier/EncoderTest.php +++ b/tests/HTMLPurifier/EncoderTest.php @@ -23,6 +23,7 @@ class HTMLPurifier_EncoderTest extends HTMLPurifier_Harness $this->assertCleanUTF8('Normal string.'); $this->assertCleanUTF8("Test\tAllowed\nControl\rCharacters"); $this->assertCleanUTF8("null byte: \0", 'null byte: '); + $this->assertCleanUTF8("あ(い)う(え)お\0", "あ(い)う(え)お"); // test for issue #122 $this->assertCleanUTF8("\1\2\3\4\5\6\7", ''); $this->assertCleanUTF8("\x7F", ''); // one byte invalid SGML char $this->assertCleanUTF8("\xC2\x80", ''); // two byte invalid SGML