mirror of
				https://github.com/ezyang/htmlpurifier.git
				synced 2025-10-24 18:16:19 +02:00 
			
		
		
		
	git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/php4@1793 48356398-32a2-884e-a903-53898d9a118a
		
			
				
	
	
		
			176 lines
		
	
	
		
			6.1 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
			
		
		
	
	
			176 lines
		
	
	
		
			6.1 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
| <?php
 | |
| 
 | |
| require_once 'HTMLPurifier/Encoder.php';
 | |
| 
 | |
| class HTMLPurifier_EncoderTest extends HTMLPurifier_Harness
 | |
| {
 | |
|     
 | |
|     var $_entity_lookup;
 | |
|     
 | |
|     function setUp() {
 | |
|         $this->_entity_lookup = HTMLPurifier_EntityLookup::instance();
 | |
|         parent::setUp();
 | |
|     }
 | |
|     
 | |
|     function assertCleanUTF8($string, $expect = null) {
 | |
|         if ($expect === null) $expect = $string;
 | |
|         $this->assertIdentical(HTMLPurifier_Encoder::cleanUTF8($string), $expect, 'iconv: %s');
 | |
|         $this->assertIdentical(HTMLPurifier_Encoder::cleanUTF8($string, true), $expect, 'PHP: %s');
 | |
|     }
 | |
|     
 | |
|     function test_cleanUTF8() {
 | |
|         $this->assertCleanUTF8('Normal string.');
 | |
|         $this->assertCleanUTF8("Test\tAllowed\nControl\rCharacters");
 | |
|         $this->assertCleanUTF8("null byte: \0", 'null byte: ');
 | |
|         $this->assertCleanUTF8("\1\2\3\4\5\6\7", '');
 | |
|         $this->assertCleanUTF8("\x7F", ''); // one byte invalid SGML char
 | |
|         $this->assertCleanUTF8("\xC2\x80", ''); // two byte invalid SGML
 | |
|         $this->assertCleanUTF8("\xF3\xBF\xBF\xBF"); // valid four byte
 | |
|         $this->assertCleanUTF8("\xDF\xFF", ''); // malformed UTF8
 | |
|         // invalid codepoints
 | |
|         $this->assertCleanUTF8("\xED\xB0\x80", '');
 | |
|     }
 | |
|     
 | |
|     function test_convertToUTF8_noConvert() {
 | |
|         // UTF-8 means that we don't touch it
 | |
|         $this->assertIdentical(
 | |
|             HTMLPurifier_Encoder::convertToUTF8("\xF6", $this->config, $this->context),
 | |
|             "\xF6" // this is invalid
 | |
|         );
 | |
|     }
 | |
|     
 | |
|     function test_convertToUTF8_iso8859_1() {
 | |
|         $this->config->set('Core', 'Encoding', 'ISO-8859-1');
 | |
|         $this->assertIdentical(
 | |
|             HTMLPurifier_Encoder::convertToUTF8("\xF6", $this->config, $this->context),
 | |
|             "\xC3\xB6"
 | |
|         );
 | |
|     }
 | |
|     
 | |
|     function test_convertToUTF8_withoutIconv() {
 | |
|         $this->config->set('Core', 'Encoding', 'ISO-8859-1');
 | |
|         $this->config->set('Test', 'ForceNoIconv', true);
 | |
|         $this->assertIdentical(
 | |
|             HTMLPurifier_Encoder::convertToUTF8("\xF6", $this->config, $this->context),
 | |
|             "\xC3\xB6"
 | |
|         );
 | |
|         
 | |
|     }
 | |
|     
 | |
|     function getZhongWen() {
 | |
|         return "\xE4\xB8\xAD\xE6\x96\x87 (Chinese)";
 | |
|     }
 | |
|     
 | |
|     function test_convertFromUTF8_utf8() {
 | |
|         // UTF-8 means that we don't touch it
 | |
|         $this->assertIdentical(
 | |
|             HTMLPurifier_Encoder::convertFromUTF8("\xC3\xB6", $this->config, $this->context),
 | |
|             "\xC3\xB6"
 | |
|         );
 | |
|     }
 | |
|     
 | |
|     function test_convertFromUTF8_iso8859_1() {
 | |
|         $this->config->set('Core', 'Encoding', 'ISO-8859-1');
 | |
|         $this->assertIdentical(
 | |
|             HTMLPurifier_Encoder::convertFromUTF8("\xC3\xB6", $this->config, $this->context),
 | |
|             "\xF6"
 | |
|         );
 | |
|     }
 | |
|     
 | |
|     function test_convertFromUTF8_iconvNoChars() {
 | |
|         if (!function_exists('iconv')) return;
 | |
|         $this->config->set('Core', 'Encoding', 'ISO-8859-1');
 | |
|         $this->assertIdentical(
 | |
|             HTMLPurifier_Encoder::convertFromUTF8($this->getZhongWen(), $this->config, $this->context),
 | |
|             " (Chinese)"
 | |
|         );
 | |
|     }
 | |
|     
 | |
|     function test_convertFromUTF8_phpNormal() {
 | |
|         // Plain PHP implementation has slightly different behavior
 | |
|         $this->config->set('Core', 'Encoding', 'ISO-8859-1');
 | |
|         $this->config->set('Test', 'ForceNoIconv', true);
 | |
|         $this->assertIdentical(
 | |
|             HTMLPurifier_Encoder::convertFromUTF8("\xC3\xB6", $this->config, $this->context),
 | |
|             "\xF6"
 | |
|         );
 | |
|     }
 | |
|     
 | |
|     function test_convertFromUTF8_phpNoChars() {
 | |
|         $this->config->set('Core', 'Encoding', 'ISO-8859-1');
 | |
|         $this->config->set('Test', 'ForceNoIconv', true);
 | |
|         $this->assertIdentical(
 | |
|             HTMLPurifier_Encoder::convertFromUTF8($this->getZhongWen(), $this->config, $this->context),
 | |
|             "?? (Chinese)"
 | |
|         );
 | |
|     }
 | |
|     
 | |
|     function test_convertFromUTF8_withProtection() {
 | |
|         // Preserve the characters!
 | |
|         $this->config->set('Core', 'Encoding', 'ISO-8859-1');
 | |
|         $this->config->set('Core', 'EscapeNonASCIICharacters', true);
 | |
|         $this->assertIdentical(
 | |
|             HTMLPurifier_Encoder::convertFromUTF8($this->getZhongWen(), $this->config, $this->context),
 | |
|             "中文 (Chinese)"
 | |
|         );
 | |
|         
 | |
|     }
 | |
|     
 | |
|     function test_convertToASCIIDumbLossless() {
 | |
|         
 | |
|         // Uppercase thorn letter
 | |
|         $this->assertIdentical(
 | |
|             HTMLPurifier_Encoder::convertToASCIIDumbLossless("\xC3\x9Eorn"),
 | |
|             "Þorn"
 | |
|         );
 | |
|         
 | |
|         $this->assertIdentical(
 | |
|             HTMLPurifier_Encoder::convertToASCIIDumbLossless("an"),
 | |
|             "an"
 | |
|         );
 | |
|         
 | |
|         // test up to four bytes
 | |
|         $this->assertIdentical(
 | |
|             HTMLPurifier_Encoder::convertToASCIIDumbLossless("\xF3\xA0\x80\xA0"),
 | |
|             "󠀠"
 | |
|         );
 | |
|         
 | |
|     }
 | |
|     
 | |
|     function assertASCIISupportCheck($enc, $ret) {
 | |
|         $test = HTMLPurifier_Encoder::testEncodingSupportsASCII($enc, true);
 | |
|         if ($test === false) return;
 | |
|         $this->assertIdentical(
 | |
|             HTMLPurifier_Encoder::testEncodingSupportsASCII($enc),
 | |
|             $ret
 | |
|         );
 | |
|         $this->assertIdentical(
 | |
|             HTMLPurifier_Encoder::testEncodingSupportsASCII($enc, true),
 | |
|             $ret
 | |
|         );
 | |
|     }
 | |
|     
 | |
|     function test_testEncodingSupportsASCII() {
 | |
|         $this->assertASCIISupportCheck('Shift_JIS', array("\xC2\xA5" => '\\', "\xE2\x80\xBE" => '~'));
 | |
|         $this->assertASCIISupportCheck('JOHAB', array("\xE2\x82\xA9" => '\\'));
 | |
|         $this->assertASCIISupportCheck('ISO-8859-1', array());
 | |
|         $this->assertASCIISupportCheck('dontexist', array()); // canary
 | |
|     }
 | |
|     
 | |
|     function testShiftJIS() {
 | |
|         if (!function_exists('iconv')) return;
 | |
|         $this->config->set('Core', 'Encoding', 'Shift_JIS');
 | |
|         // This actually looks like a Yen, but we're going to treat it differently
 | |
|         $this->assertIdentical(
 | |
|             HTMLPurifier_Encoder::convertFromUTF8('\\~', $this->config, $this->context),
 | |
|             '\\~'
 | |
|         );
 | |
|         $this->assertIdentical(
 | |
|             HTMLPurifier_Encoder::convertToUTF8('\\~', $this->config, $this->context),
 | |
|             '\\~'
 | |
|         );
 | |
|     }
 | |
|     
 | |
| }
 | |
| 
 |