1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-08-22 13:42:53 +02:00

[2.1.5] [MFH] Fix Shift_JIS encoding wonkiness with yen symbols and whatnot, as well as other patches

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/php4@1791 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang
2008-06-11 18:49:56 +00:00
parent 369a69d533
commit 450fc6649d
9 changed files with 274 additions and 132 deletions

View File

@@ -21,7 +21,20 @@ class HTMLPurifier_AttrDef_CSS_FontFamilyTest extends HTMLPurifier_AttrDefHarnes
$this->assertDef($d = "'\xE5\xAE\x8B\xE4\xBD\x93'");
$this->assertDef("\xE5\xAE\x8B\xE4\xBD\x93", $d);
$this->assertDef("'\\','f'", "'\\\\', f");
$this->assertDef("'\\01'", "''");
$this->assertDef("'\\20'", "' '");
$this->assertDef("\\0020", "'\\\\0020'");
$this->assertDef("'\\000045'", "E");
$this->assertDef("','", false);
$this->assertDef("',' foobar','", "' foobar'");
$this->assertDef("'\\27'", "'\''");
$this->assertDef('"\\22"', "'\"'");
$this->assertDef('"\\""', "'\"'");
$this->assertDef('"\'"', "'\\''");
$this->assertDef("'\\000045a'", "Ea");
$this->assertDef("'\\00045 a'", "Ea");
$this->assertDef("'\\00045 a'", "'E a'");
$this->assertDef("'\\\nf'", "f");
}
}

View File

@@ -11,7 +11,7 @@ class HTMLPurifier_AttrDef_TextTest extends HTMLPurifier_AttrDefHarness
$this->def = new HTMLPurifier_AttrDef_Text();
$this->assertDef('This is spiffy text!');
$this->assertDef(" Casual\tCDATA parse\ncheck. ", 'Casual CDATA parsecheck.');
$this->assertDef(" Casual\tCDATA parse\ncheck. ", 'Casual CDATA parse check.');
}

View File

@@ -12,8 +12,7 @@ class HTMLPurifier_AttrDefTest extends HTMLPurifier_Harness
$this->assertIdentical('', $def->parseCDATA(''));
$this->assertIdentical('', $def->parseCDATA("\t\n\r \t\t"));
$this->assertIdentical('foo', $def->parseCDATA("\t\n\r foo\t\t"));
$this->assertIdentical('ignorelinefeeds', $def->parseCDATA("ignore\nline\nfeeds"));
$this->assertIdentical('translate to space', $def->parseCDATA("translate\rto\tspace"));
$this->assertIdentical('translate to space', $def->parseCDATA("translate\nto\tspace"));
}

View File

@@ -9,6 +9,7 @@ class HTMLPurifier_EncoderTest extends HTMLPurifier_Harness
function setUp() {
$this->_entity_lookup = HTMLPurifier_EntityLookup::instance();
parent::setUp();
}
function assertCleanUTF8($string, $expect = null) {
@@ -28,91 +29,86 @@ class HTMLPurifier_EncoderTest extends HTMLPurifier_Harness
$this->assertCleanUTF8("\xDF\xFF", ''); // malformed UTF8
}
function test_convertToUTF8() {
$config = HTMLPurifier_Config::createDefault();
$context = new HTMLPurifier_Context();
function test_convertToUTF8_noConvert() {
// UTF-8 means that we don't touch it
$this->assertIdentical(
HTMLPurifier_Encoder::convertToUTF8("\xF6", $config, $context),
HTMLPurifier_Encoder::convertToUTF8("\xF6", $this->config, $this->context),
"\xF6" // this is invalid
);
$this->assertNoErrors();
$config = HTMLPurifier_Config::create(array(
'Core.Encoding' => 'ISO-8859-1'
));
// Now it gets converted
}
function test_convertToUTF8_iso8859_1() {
$this->config->set('Core', 'Encoding', 'ISO-8859-1');
$this->assertIdentical(
HTMLPurifier_Encoder::convertToUTF8("\xF6", $config, $context),
HTMLPurifier_Encoder::convertToUTF8("\xF6", $this->config, $this->context),
"\xC3\xB6"
);
$config = HTMLPurifier_Config::create(array(
'Core.Encoding' => 'ISO-8859-1',
'Test.ForceNoIconv' => true
));
}
function test_convertToUTF8_withoutIconv() {
$this->config->set('Core', 'Encoding', 'ISO-8859-1');
$this->config->set('Test', 'ForceNoIconv', true);
$this->assertIdentical(
HTMLPurifier_Encoder::convertToUTF8("\xF6", $config, $context),
HTMLPurifier_Encoder::convertToUTF8("\xF6", $this->config, $this->context),
"\xC3\xB6"
);
}
function test_convertFromUTF8() {
$config = HTMLPurifier_Config::createDefault();
$context = new HTMLPurifier_Context();
// zhong-wen
$chinese = "\xE4\xB8\xAD\xE6\x96\x87 (Chinese)";
function getZhongWen() {
return "\xE4\xB8\xAD\xE6\x96\x87 (Chinese)";
}
function test_convertFromUTF8_utf8() {
// UTF-8 means that we don't touch it
$this->assertIdentical(
HTMLPurifier_Encoder::convertFromUTF8("\xC3\xB6", $config, $context),
HTMLPurifier_Encoder::convertFromUTF8("\xC3\xB6", $this->config, $this->context),
"\xC3\xB6"
);
$config = HTMLPurifier_Config::create(array(
'Core.Encoding' => 'ISO-8859-1'
));
// Now it gets converted
}
function test_convertFromUTF8_iso8859_1() {
$this->config->set('Core', 'Encoding', 'ISO-8859-1');
$this->assertIdentical(
HTMLPurifier_Encoder::convertFromUTF8("\xC3\xB6", $config, $context),
HTMLPurifier_Encoder::convertFromUTF8("\xC3\xB6", $this->config, $this->context),
"\xF6"
);
if (function_exists('iconv')) {
// iconv has it's own way
$this->assertIdentical(
HTMLPurifier_Encoder::convertFromUTF8($chinese, $config, $context),
" (Chinese)"
);
}
}
function test_convertFromUTF8_iconvNoChars() {
if (!function_exists('iconv')) return;
$this->config->set('Core', 'Encoding', 'ISO-8859-1');
$this->assertIdentical(
HTMLPurifier_Encoder::convertFromUTF8($this->getZhongWen(), $this->config, $this->context),
" (Chinese)"
);
}
function test_convertFromUTF8_phpNormal() {
// Plain PHP implementation has slightly different behavior
$config = HTMLPurifier_Config::create(array(
'Core.Encoding' => 'ISO-8859-1',
'Test.ForceNoIconv' => true
));
$this->config->set('Core', 'Encoding', 'ISO-8859-1');
$this->config->set('Test', 'ForceNoIconv', true);
$this->assertIdentical(
HTMLPurifier_Encoder::convertFromUTF8("\xC3\xB6", $config, $context),
HTMLPurifier_Encoder::convertFromUTF8("\xC3\xB6", $this->config, $this->context),
"\xF6"
);
}
function test_convertFromUTF8_phpNoChars() {
$this->config->set('Core', 'Encoding', 'ISO-8859-1');
$this->config->set('Test', 'ForceNoIconv', true);
$this->assertIdentical(
HTMLPurifier_Encoder::convertFromUTF8($chinese, $config, $context),
HTMLPurifier_Encoder::convertFromUTF8($this->getZhongWen(), $this->config, $this->context),
"?? (Chinese)"
);
}
function test_convertFromUTF8_withProtection() {
// Preserve the characters!
$config = HTMLPurifier_Config::create(array(
'Core.Encoding' => 'ISO-8859-1',
'Core.EscapeNonASCIICharacters' => true
));
$this->config->set('Core', 'Encoding', 'ISO-8859-1');
$this->config->set('Core', 'EscapeNonASCIICharacters', true);
$this->assertIdentical(
HTMLPurifier_Encoder::convertFromUTF8($chinese, $config, $context),
HTMLPurifier_Encoder::convertFromUTF8($this->getZhongWen(), $this->config, $this->context),
"中文 (Chinese)"
);
@@ -139,5 +135,39 @@ class HTMLPurifier_EncoderTest extends HTMLPurifier_Harness
}
function assertASCIISupportCheck($enc, $ret) {
$test = HTMLPurifier_Encoder::testEncodingSupportsASCII($enc, true);
if ($test === false) return;
$this->assertIdentical(
HTMLPurifier_Encoder::testEncodingSupportsASCII($enc),
$ret
);
$this->assertIdentical(
HTMLPurifier_Encoder::testEncodingSupportsASCII($enc, true),
$ret
);
}
function test_testEncodingSupportsASCII() {
$this->assertASCIISupportCheck('Shift_JIS', array("\xC2\xA5" => '\\', "\xE2\x80\xBE" => '~'));
$this->assertASCIISupportCheck('JOHAB', array("\xE2\x82\xA9" => '\\'));
$this->assertASCIISupportCheck('ISO-8859-1', array());
$this->assertASCIISupportCheck('dontexist', array()); // canary
}
function testShiftJIS() {
if (!function_exists('iconv')) return;
$this->config->set('Core', 'Encoding', 'Shift_JIS');
// This actually looks like a Yen, but we're going to treat it differently
$this->assertIdentical(
HTMLPurifier_Encoder::convertFromUTF8('\\~', $this->config, $this->context),
'\\~'
);
$this->assertIdentical(
HTMLPurifier_Encoder::convertToUTF8('\\~', $this->config, $this->context),
'\\~'
);
}
}

View File

@@ -12,13 +12,24 @@ class HTMLPurifier_Harness extends UnitTestCase
parent::UnitTestCase();
}
var $config, $context;
var $config, $context, $purifier;
/**
* Generates easily accessible default config/context
* Generates easily accessible default config/context, as well as
* a convenience purifier for integration testing.
*/
function setUp() {
list($this->config, $this->context) = $this->createCommon();
$this->purifier = new HTMLPurifier();
}
/**
* Asserts a purification. Good for integration testing.
*/
function assertPurification($input, $expect = null) {
if ($expect === null) $expect = $input;
$result = $this->purifier->purify($input, $this->config);
$this->assertIdentical($expect, $result);
}
/**

View File

@@ -2,30 +2,15 @@
require_once 'HTMLPurifier.php';
// integration test
class HTMLPurifierTest extends HTMLPurifier_Harness
{
var $purifier;
function setUp() {
$this->purifier = new HTMLPurifier();
}
function assertPurification($input, $expect = null, $config = array()) {
if ($expect === null) $expect = $input;
$result = $this->purifier->purify($input, $config);
$this->assertIdentical($expect, $result);
}
function testNull() {
$this->assertPurification("Null byte\0", "Null byte");
}
function testStrict() {
$config = HTMLPurifier_Config::createDefault();
$config->set('HTML', 'Strict', true);
$this->purifier = new HTMLPurifier( $config ); // verbose syntax
$this->config->set('HTML', 'Strict', true);
$this->assertPurification(
'<u>Illegal underline</u>',
@@ -41,10 +26,8 @@ class HTMLPurifierTest extends HTMLPurifier_Harness
function testDifferentAllowedElements() {
$this->purifier = new HTMLPurifier(array(
'HTML.AllowedElements' => array('b', 'i', 'p', 'a'),
'HTML.AllowedAttributes' => array('a.href', '*.id')
));
$this->config->set('HTML', 'AllowedElements', array('b', 'i', 'p', 'a'));
$this->config->set('HTML', 'AllowedAttributes', array('a.href', '*.id'));
$this->assertPurification(
'<p>Par.</p><p>Para<a href="http://google.com/">gr</a>aph</p>Text<b>Bol<i>d</i></b>'
@@ -59,7 +42,7 @@ class HTMLPurifierTest extends HTMLPurifier_Harness
function testDisableURI() {
$this->purifier = new HTMLPurifier( array('Attr.DisableURI' => true) );
$this->config->set('URI', 'Disable', true);
$this->assertPurification(
'<img src="foobar"/>',
@@ -70,8 +53,6 @@ class HTMLPurifierTest extends HTMLPurifier_Harness
function test_purifyArray() {
$this->purifier = new HTMLPurifier();
$this->assertIdentical(
$this->purifier->purifyArray(
array('Good', '<b>Sketchy', 'foo' => '<script>bad</script>')
@@ -83,23 +64,24 @@ class HTMLPurifierTest extends HTMLPurifier_Harness
}
function testEnableAttrID() {
$this->purifier = new HTMLPurifier();
function testAttrIDDisabledByDefault() {
$this->assertPurification(
'<span id="moon">foobar</span>',
'<span>foobar</span>'
);
$this->purifier = new HTMLPurifier(array('HTML.EnableAttrID' => true));
}
function testEnableAttrID() {
$this->config->set('Attr', 'EnableID', true);
$this->assertPurification('<span id="moon">foobar</span>');
$this->assertPurification('<img id="folly" src="folly.png" alt="Omigosh!" />');
}
function testScript() {
$this->purifier = new HTMLPurifier(array('HTML.Trusted' => true));
$this->config->set('HTML', 'Trusted', true);
$ideal = '<script type="text/javascript"><!--//--><![CDATA[//><!--
alert("<This is compatible with XHTML>");
//--><!]]></script>';
@@ -140,13 +122,29 @@ alert("<This is compatible with XHTML>");
}
function testMakeAbsolute() {
$this->config->set('URI', 'Base', 'http://example.com/bar/baz.php');
$this->config->set('URI', 'MakeAbsolute', true);
$this->assertPurification(
'<a href="foo.txt">Foobar</a>',
'<a href="http://example.com/bar/foo.txt">Foobar</a>',
array(
'URI.Base' => 'http://example.com/bar/baz.php',
'URI.MakeAbsolute' => true
)
'<a href="http://example.com/bar/foo.txt">Foobar</a>'
);
}
function test_shiftJis() {
if (!function_exists('iconv')) return;
$this->config->set('Core', 'Encoding', 'Shift_JIS');
$this->config->set('Core', 'EscapeNonASCIICharacters', true);
$this->assertPurification(
"<b style=\"font-family:'&#165;';\">111</b>"
);
}
function test_shiftJisWorstCase() {
if (!function_exists('iconv')) return;
$this->config->set('Core', 'Encoding', 'Shift_JIS');
$this->assertPurification( // Notice how Yen disappears
"<b style=\"font-family:'&#165;';\">111</b>",
"<b style=\"font-family:'';\">111</b>"
);
}