mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-10-24 10:06:14 +02:00
[2.1.5] [MFH] Fix Shift_JIS encoding wonkiness with yen symbols and whatnot, as well as other patches
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/php4@1791 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
@@ -21,7 +21,20 @@ class HTMLPurifier_AttrDef_CSS_FontFamilyTest extends HTMLPurifier_AttrDefHarnes
|
||||
$this->assertDef($d = "'\xE5\xAE\x8B\xE4\xBD\x93'");
|
||||
$this->assertDef("\xE5\xAE\x8B\xE4\xBD\x93", $d);
|
||||
$this->assertDef("'\\','f'", "'\\\\', f");
|
||||
|
||||
$this->assertDef("'\\01'", "''");
|
||||
$this->assertDef("'\\20'", "' '");
|
||||
$this->assertDef("\\0020", "'\\\\0020'");
|
||||
$this->assertDef("'\\000045'", "E");
|
||||
$this->assertDef("','", false);
|
||||
$this->assertDef("',' foobar','", "' foobar'");
|
||||
$this->assertDef("'\\27'", "'\''");
|
||||
$this->assertDef('"\\22"', "'\"'");
|
||||
$this->assertDef('"\\""', "'\"'");
|
||||
$this->assertDef('"\'"', "'\\''");
|
||||
$this->assertDef("'\\000045a'", "Ea");
|
||||
$this->assertDef("'\\00045 a'", "Ea");
|
||||
$this->assertDef("'\\00045 a'", "'E a'");
|
||||
$this->assertDef("'\\\nf'", "f");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -11,7 +11,7 @@ class HTMLPurifier_AttrDef_TextTest extends HTMLPurifier_AttrDefHarness
|
||||
$this->def = new HTMLPurifier_AttrDef_Text();
|
||||
|
||||
$this->assertDef('This is spiffy text!');
|
||||
$this->assertDef(" Casual\tCDATA parse\ncheck. ", 'Casual CDATA parsecheck.');
|
||||
$this->assertDef(" Casual\tCDATA parse\ncheck. ", 'Casual CDATA parse check.');
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -12,8 +12,7 @@ class HTMLPurifier_AttrDefTest extends HTMLPurifier_Harness
|
||||
$this->assertIdentical('', $def->parseCDATA(''));
|
||||
$this->assertIdentical('', $def->parseCDATA("\t\n\r \t\t"));
|
||||
$this->assertIdentical('foo', $def->parseCDATA("\t\n\r foo\t\t"));
|
||||
$this->assertIdentical('ignorelinefeeds', $def->parseCDATA("ignore\nline\nfeeds"));
|
||||
$this->assertIdentical('translate to space', $def->parseCDATA("translate\rto\tspace"));
|
||||
$this->assertIdentical('translate to space', $def->parseCDATA("translate\nto\tspace"));
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@ class HTMLPurifier_EncoderTest extends HTMLPurifier_Harness
|
||||
|
||||
function setUp() {
|
||||
$this->_entity_lookup = HTMLPurifier_EntityLookup::instance();
|
||||
parent::setUp();
|
||||
}
|
||||
|
||||
function assertCleanUTF8($string, $expect = null) {
|
||||
@@ -28,91 +29,86 @@ class HTMLPurifier_EncoderTest extends HTMLPurifier_Harness
|
||||
$this->assertCleanUTF8("\xDF\xFF", ''); // malformed UTF8
|
||||
}
|
||||
|
||||
function test_convertToUTF8() {
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$context = new HTMLPurifier_Context();
|
||||
|
||||
function test_convertToUTF8_noConvert() {
|
||||
// UTF-8 means that we don't touch it
|
||||
$this->assertIdentical(
|
||||
HTMLPurifier_Encoder::convertToUTF8("\xF6", $config, $context),
|
||||
HTMLPurifier_Encoder::convertToUTF8("\xF6", $this->config, $this->context),
|
||||
"\xF6" // this is invalid
|
||||
);
|
||||
$this->assertNoErrors();
|
||||
|
||||
$config = HTMLPurifier_Config::create(array(
|
||||
'Core.Encoding' => 'ISO-8859-1'
|
||||
));
|
||||
|
||||
// Now it gets converted
|
||||
}
|
||||
|
||||
function test_convertToUTF8_iso8859_1() {
|
||||
$this->config->set('Core', 'Encoding', 'ISO-8859-1');
|
||||
$this->assertIdentical(
|
||||
HTMLPurifier_Encoder::convertToUTF8("\xF6", $config, $context),
|
||||
HTMLPurifier_Encoder::convertToUTF8("\xF6", $this->config, $this->context),
|
||||
"\xC3\xB6"
|
||||
);
|
||||
|
||||
$config = HTMLPurifier_Config::create(array(
|
||||
'Core.Encoding' => 'ISO-8859-1',
|
||||
'Test.ForceNoIconv' => true
|
||||
));
|
||||
}
|
||||
|
||||
function test_convertToUTF8_withoutIconv() {
|
||||
$this->config->set('Core', 'Encoding', 'ISO-8859-1');
|
||||
$this->config->set('Test', 'ForceNoIconv', true);
|
||||
$this->assertIdentical(
|
||||
HTMLPurifier_Encoder::convertToUTF8("\xF6", $config, $context),
|
||||
HTMLPurifier_Encoder::convertToUTF8("\xF6", $this->config, $this->context),
|
||||
"\xC3\xB6"
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
function test_convertFromUTF8() {
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$context = new HTMLPurifier_Context();
|
||||
|
||||
// zhong-wen
|
||||
$chinese = "\xE4\xB8\xAD\xE6\x96\x87 (Chinese)";
|
||||
|
||||
function getZhongWen() {
|
||||
return "\xE4\xB8\xAD\xE6\x96\x87 (Chinese)";
|
||||
}
|
||||
|
||||
function test_convertFromUTF8_utf8() {
|
||||
// UTF-8 means that we don't touch it
|
||||
$this->assertIdentical(
|
||||
HTMLPurifier_Encoder::convertFromUTF8("\xC3\xB6", $config, $context),
|
||||
HTMLPurifier_Encoder::convertFromUTF8("\xC3\xB6", $this->config, $this->context),
|
||||
"\xC3\xB6"
|
||||
);
|
||||
|
||||
$config = HTMLPurifier_Config::create(array(
|
||||
'Core.Encoding' => 'ISO-8859-1'
|
||||
));
|
||||
|
||||
// Now it gets converted
|
||||
}
|
||||
|
||||
function test_convertFromUTF8_iso8859_1() {
|
||||
$this->config->set('Core', 'Encoding', 'ISO-8859-1');
|
||||
$this->assertIdentical(
|
||||
HTMLPurifier_Encoder::convertFromUTF8("\xC3\xB6", $config, $context),
|
||||
HTMLPurifier_Encoder::convertFromUTF8("\xC3\xB6", $this->config, $this->context),
|
||||
"\xF6"
|
||||
);
|
||||
|
||||
if (function_exists('iconv')) {
|
||||
// iconv has it's own way
|
||||
$this->assertIdentical(
|
||||
HTMLPurifier_Encoder::convertFromUTF8($chinese, $config, $context),
|
||||
" (Chinese)"
|
||||
);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
function test_convertFromUTF8_iconvNoChars() {
|
||||
if (!function_exists('iconv')) return;
|
||||
$this->config->set('Core', 'Encoding', 'ISO-8859-1');
|
||||
$this->assertIdentical(
|
||||
HTMLPurifier_Encoder::convertFromUTF8($this->getZhongWen(), $this->config, $this->context),
|
||||
" (Chinese)"
|
||||
);
|
||||
}
|
||||
|
||||
function test_convertFromUTF8_phpNormal() {
|
||||
// Plain PHP implementation has slightly different behavior
|
||||
$config = HTMLPurifier_Config::create(array(
|
||||
'Core.Encoding' => 'ISO-8859-1',
|
||||
'Test.ForceNoIconv' => true
|
||||
));
|
||||
$this->config->set('Core', 'Encoding', 'ISO-8859-1');
|
||||
$this->config->set('Test', 'ForceNoIconv', true);
|
||||
$this->assertIdentical(
|
||||
HTMLPurifier_Encoder::convertFromUTF8("\xC3\xB6", $config, $context),
|
||||
HTMLPurifier_Encoder::convertFromUTF8("\xC3\xB6", $this->config, $this->context),
|
||||
"\xF6"
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
function test_convertFromUTF8_phpNoChars() {
|
||||
$this->config->set('Core', 'Encoding', 'ISO-8859-1');
|
||||
$this->config->set('Test', 'ForceNoIconv', true);
|
||||
$this->assertIdentical(
|
||||
HTMLPurifier_Encoder::convertFromUTF8($chinese, $config, $context),
|
||||
HTMLPurifier_Encoder::convertFromUTF8($this->getZhongWen(), $this->config, $this->context),
|
||||
"?? (Chinese)"
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
function test_convertFromUTF8_withProtection() {
|
||||
// Preserve the characters!
|
||||
$config = HTMLPurifier_Config::create(array(
|
||||
'Core.Encoding' => 'ISO-8859-1',
|
||||
'Core.EscapeNonASCIICharacters' => true
|
||||
));
|
||||
$this->config->set('Core', 'Encoding', 'ISO-8859-1');
|
||||
$this->config->set('Core', 'EscapeNonASCIICharacters', true);
|
||||
$this->assertIdentical(
|
||||
HTMLPurifier_Encoder::convertFromUTF8($chinese, $config, $context),
|
||||
HTMLPurifier_Encoder::convertFromUTF8($this->getZhongWen(), $this->config, $this->context),
|
||||
"中文 (Chinese)"
|
||||
);
|
||||
|
||||
@@ -139,5 +135,39 @@ class HTMLPurifier_EncoderTest extends HTMLPurifier_Harness
|
||||
|
||||
}
|
||||
|
||||
function assertASCIISupportCheck($enc, $ret) {
|
||||
$test = HTMLPurifier_Encoder::testEncodingSupportsASCII($enc, true);
|
||||
if ($test === false) return;
|
||||
$this->assertIdentical(
|
||||
HTMLPurifier_Encoder::testEncodingSupportsASCII($enc),
|
||||
$ret
|
||||
);
|
||||
$this->assertIdentical(
|
||||
HTMLPurifier_Encoder::testEncodingSupportsASCII($enc, true),
|
||||
$ret
|
||||
);
|
||||
}
|
||||
|
||||
function test_testEncodingSupportsASCII() {
|
||||
$this->assertASCIISupportCheck('Shift_JIS', array("\xC2\xA5" => '\\', "\xE2\x80\xBE" => '~'));
|
||||
$this->assertASCIISupportCheck('JOHAB', array("\xE2\x82\xA9" => '\\'));
|
||||
$this->assertASCIISupportCheck('ISO-8859-1', array());
|
||||
$this->assertASCIISupportCheck('dontexist', array()); // canary
|
||||
}
|
||||
|
||||
function testShiftJIS() {
|
||||
if (!function_exists('iconv')) return;
|
||||
$this->config->set('Core', 'Encoding', 'Shift_JIS');
|
||||
// This actually looks like a Yen, but we're going to treat it differently
|
||||
$this->assertIdentical(
|
||||
HTMLPurifier_Encoder::convertFromUTF8('\\~', $this->config, $this->context),
|
||||
'\\~'
|
||||
);
|
||||
$this->assertIdentical(
|
||||
HTMLPurifier_Encoder::convertToUTF8('\\~', $this->config, $this->context),
|
||||
'\\~'
|
||||
);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -12,13 +12,24 @@ class HTMLPurifier_Harness extends UnitTestCase
|
||||
parent::UnitTestCase();
|
||||
}
|
||||
|
||||
var $config, $context;
|
||||
var $config, $context, $purifier;
|
||||
|
||||
/**
|
||||
* Generates easily accessible default config/context
|
||||
* Generates easily accessible default config/context, as well as
|
||||
* a convenience purifier for integration testing.
|
||||
*/
|
||||
function setUp() {
|
||||
list($this->config, $this->context) = $this->createCommon();
|
||||
$this->purifier = new HTMLPurifier();
|
||||
}
|
||||
|
||||
/**
|
||||
* Asserts a purification. Good for integration testing.
|
||||
*/
|
||||
function assertPurification($input, $expect = null) {
|
||||
if ($expect === null) $expect = $input;
|
||||
$result = $this->purifier->purify($input, $this->config);
|
||||
$this->assertIdentical($expect, $result);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
Reference in New Issue
Block a user