diff --git a/library/HTMLPurifier/AttrDef.php b/library/HTMLPurifier/AttrDef.php index e94ee713..5417270e 100644 --- a/library/HTMLPurifier/AttrDef.php +++ b/library/HTMLPurifier/AttrDef.php @@ -54,18 +54,15 @@ class HTMLPurifier_AttrDef * * @warning This processing is inconsistent with XML's whitespace handling * as specified by section 3.3.3 and referenced XHTML 1.0 section - * 4.7. Compliant processing requires all line breaks normalized - * to "\n", so the fix is not as simple as fixing it in this - * function. Trim and whitespace collapsing are supposed to only - * occur in NMTOKENs. However, note that we are NOT necessarily - * parsing XML, thus, this behavior may still be correct. + * 4.7. However, note that we are NOT necessarily + * parsing XML, thus, this behavior may still be correct. We + * assume that newlines have been normalized. * * @public */ function parseCDATA($string) { $string = trim($string); - $string = str_replace("\n", '', $string); - $string = str_replace(array("\r", "\t"), ' ', $string); + $string = str_replace(array("\n", "\t", "\r"), ' ', $string); return $string; } diff --git a/library/HTMLPurifier/AttrDef/CSS/FontFamily.php b/library/HTMLPurifier/AttrDef/CSS/FontFamily.php index 9b18edca..d64d728e 100644 --- a/library/HTMLPurifier/AttrDef/CSS/FontFamily.php +++ b/library/HTMLPurifier/AttrDef/CSS/FontFamily.php @@ -19,10 +19,10 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef 'cursive' => true ); - $string = $this->parseCDATA($string); // assume that no font names contain commas in them $fonts = explode(',', $string); $final = ''; + $non_sgml = HTMLPurifier_Encoder::getNonSgmlCharacters(); foreach($fonts as $font) { $font = trim($font); if ($font === '') continue; @@ -38,11 +38,33 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef $quote = $font[0]; if ($font[$length - 1] !== $quote) continue; $font = substr($font, 1, $length - 2); - // double-backslash processing is buggy. Namely, it doesn't allow - // fonts that contain an adjacent quote, backslash, or comma - $font = str_replace("\\$quote", $quote, $font); // de-escape quote - $font = str_replace("\\\n", '', $font); // de-escape newlines - $font = str_replace("\\\\", "\\", $font); // de-escape double backslashes + + $new_font = ''; + for ($i = 0, $c = strlen($font); $i < $c; $i++) { + if ($font[$i] === '\\') { + $i++; + if ($i >= $c) { + $new_font .= '\\'; + break; + } + if (ctype_xdigit($font[$i])) { + $code = $font[$i]; + for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) { + if (!ctype_xdigit($font[$i])) break; + $code .= $font[$i]; + } + $char = HTMLPurifier_Encoder::unichr(hexdec($code)); + if (isset($non_sgml[$char])) continue; + $new_font .= $char; + if ($i < $c && trim($font[$i]) !== '') $i--; + continue; + } + if ($font[$i] === "\n") continue; + } + $new_font .= $font[$i]; + } + + $font = $new_font; } // $font is a pure representation of the font name diff --git a/library/HTMLPurifier/Encoder.php b/library/HTMLPurifier/Encoder.php index 31ebb785..7f535229 100644 --- a/library/HTMLPurifier/Encoder.php +++ b/library/HTMLPurifier/Encoder.php @@ -67,6 +67,25 @@ class HTMLPurifier_Encoder */ function muteErrorHandler() {} + /** + * Returns a lookup of UTF-8 character byte sequences that are non-SGML. + */ + function getNonSgmlCharacters() { + static $nonSgmlCharacters; + if (empty($nonSgmlCharacters)) { + for ($i = 0; $i <= 31; $i++) { + // non-SGML ASCII chars + // save \r, \t and \n + if ($i == 9 || $i == 13 || $i == 10) continue; + $nonSgmlCharacters[chr($i)] = ''; + } + for ($i = 127; $i <= 159; $i++) { + $nonSgmlCharacters[HTMLPurifier_Encoder::unichr($i)] = ''; + } + } + return $nonSgmlCharacters; + } + /** * Cleans a UTF-8 string for well-formedness and SGML validity * @@ -95,18 +114,7 @@ class HTMLPurifier_Encoder */ function cleanUTF8($str, $force_php = false) { - static $non_sgml_chars = array(); - if (empty($non_sgml_chars)) { - for ($i = 0; $i <= 31; $i++) { - // non-SGML ASCII chars - // save \r, \t and \n - if ($i == 9 || $i == 13 || $i == 10) continue; - $non_sgml_chars[chr($i)] = ''; - } - for ($i = 127; $i <= 159; $i++) { - $non_sgml_chars[HTMLPurifier_Encoder::unichr($i)] = ''; - } - } + $non_sgml = HTMLPurifier_Encoder::getNonSgmlCharacters(); static $iconv = null; if ($iconv === null) $iconv = function_exists('iconv'); @@ -115,7 +123,7 @@ class HTMLPurifier_Encoder // This is an optimization: if the string is already valid UTF-8, no // need to do iconv/php stuff. 99% of the time, this will be the case. if (preg_match('/^.{1}/us', $str)) { - return strtr($str, $non_sgml_chars); + return strtr($str, $non_sgml); } if ($iconv && !$force_php) { @@ -123,7 +131,7 @@ class HTMLPurifier_Encoder set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler')); $str = iconv('UTF-8', 'UTF-8//IGNORE', $str); restore_error_handler(); - return strtr($str, $non_sgml_chars); + return strtr($str, $non_sgml); } $mState = 0; // cached expected number of octets after the current octet @@ -327,14 +335,23 @@ class HTMLPurifier_Encoder * @static */ function convertToUTF8($str, $config, &$context) { - static $iconv = null; - if ($iconv === null) $iconv = function_exists('iconv'); $encoding = $config->get('Core', 'Encoding'); if ($encoding === 'utf-8') return $str; + static $iconv = null; + if ($iconv === null) $iconv = function_exists('iconv'); + set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler')); if ($iconv && !$config->get('Test', 'ForceNoIconv')) { - return @iconv($encoding, 'utf-8//IGNORE', $str); + $str = iconv($encoding, 'utf-8//IGNORE', $str); + // If the string is bjorked by Shift_JIS or a similar encoding + // that doesn't support all of ASCII, convert the naughty + // characters to their true byte-wise ASCII/UTF-8 equivalents. + $str = strtr($str, HTMLPurifier_Encoder::testEncodingSupportsASCII($encoding)); + restore_error_handler(); + return $str; } elseif ($encoding === 'iso-8859-1') { - return @utf8_encode($str); + $str = utf8_encode($str); + restore_error_handler(); + return $str; } trigger_error('Encoding not supported', E_USER_ERROR); } @@ -346,17 +363,31 @@ class HTMLPurifier_Encoder * characters being omitted. */ function convertFromUTF8($str, $config, &$context) { - static $iconv = null; - if ($iconv === null) $iconv = function_exists('iconv'); $encoding = $config->get('Core', 'Encoding'); if ($encoding === 'utf-8') return $str; - if ($config->get('Core', 'EscapeNonASCIICharacters')) { + static $iconv = null; + if ($iconv === null) $iconv = function_exists('iconv'); + if ($escape = $config->get('Core', 'EscapeNonASCIICharacters')) { $str = HTMLPurifier_Encoder::convertToASCIIDumbLossless($str); } + set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler')); if ($iconv && !$config->get('Test', 'ForceNoIconv')) { - return @iconv('utf-8', $encoding . '//IGNORE', $str); + // Undo our previous fix in convertToUTF8, otherwise iconv will barf + $ascii_fix = HTMLPurifier_Encoder::testEncodingSupportsASCII($encoding); + if (!$escape && !empty($ascii_fix)) { + $clear_fix = array(); + foreach ($ascii_fix as $utf8 => $native) $clear_fix[$utf8] = ''; + $str = strtr($str, $clear_fix); + } + $str = strtr($str, array_flip($ascii_fix)); + // Normal stuff + $str = iconv('utf-8', $encoding . '//IGNORE', $str); + restore_error_handler(); + return $str; } elseif ($encoding === 'iso-8859-1') { - return @utf8_decode($str); + $str = utf8_decode($str); + restore_error_handler(); + return $str; } trigger_error('Encoding not supported', E_USER_ERROR); } @@ -409,6 +440,47 @@ class HTMLPurifier_Encoder return $result; } + /** + * This expensive function tests whether or not a given character + * encoding supports ASCII. 7/8-bit encodings like Shift_JIS will + * fail this test, and require special processing. Variable width + * encodings shouldn't ever fail. + * + * @param string $encoding Encoding name to test, as per iconv format + * @param bool $bypass Whether or not to bypass the precompiled arrays. + * @return Array of UTF-8 characters to their corresponding ASCII, + * which can be used to "undo" any overzealous iconv action. + */ + function testEncodingSupportsASCII($encoding, $bypass = false) { + static $encodings = array(); + if (!$bypass) { + if (isset($encodings[$encoding])) return $encodings[$encoding]; + $lenc = strtolower($encoding); + switch ($lenc) { + case 'shift_jis': + return array("\xC2\xA5" => '\\', "\xE2\x80\xBE" => '~'); + case 'johab': + return array("\xE2\x82\xA9" => '\\'); + } + if (strpos($lenc, 'iso-8859-') === 0) return array(); + } + $ret = array(); + set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler')); + if (iconv('UTF-8', $encoding, 'a') === false) return false; + for ($i = 0x20; $i <= 0x7E; $i++) { // all printable ASCII chars + $c = chr($i); + if (iconv('UTF-8', "$encoding//IGNORE", $c) === '') { + // Reverse engineer: what's the UTF-8 equiv of this byte + // sequence? This assumes that there's no variable width + // encoding that doesn't support ASCII. + $ret[iconv($encoding, 'UTF-8//IGNORE', $c)] = $c; + } + } + restore_error_handler(); + $encodings[$encoding] = $ret; + return $ret; + } + } diff --git a/tests/HTMLPurifier/AttrDef/CSS/FontFamilyTest.php b/tests/HTMLPurifier/AttrDef/CSS/FontFamilyTest.php index b57d074f..3be02749 100644 --- a/tests/HTMLPurifier/AttrDef/CSS/FontFamilyTest.php +++ b/tests/HTMLPurifier/AttrDef/CSS/FontFamilyTest.php @@ -21,7 +21,20 @@ class HTMLPurifier_AttrDef_CSS_FontFamilyTest extends HTMLPurifier_AttrDefHarnes $this->assertDef($d = "'\xE5\xAE\x8B\xE4\xBD\x93'"); $this->assertDef("\xE5\xAE\x8B\xE4\xBD\x93", $d); $this->assertDef("'\\','f'", "'\\\\', f"); - + $this->assertDef("'\\01'", "''"); + $this->assertDef("'\\20'", "' '"); + $this->assertDef("\\0020", "'\\\\0020'"); + $this->assertDef("'\\000045'", "E"); + $this->assertDef("','", false); + $this->assertDef("',' foobar','", "' foobar'"); + $this->assertDef("'\\27'", "'\''"); + $this->assertDef('"\\22"', "'\"'"); + $this->assertDef('"\\""', "'\"'"); + $this->assertDef('"\'"', "'\\''"); + $this->assertDef("'\\000045a'", "Ea"); + $this->assertDef("'\\00045 a'", "Ea"); + $this->assertDef("'\\00045 a'", "'E a'"); + $this->assertDef("'\\\nf'", "f"); } } diff --git a/tests/HTMLPurifier/AttrDef/TextTest.php b/tests/HTMLPurifier/AttrDef/TextTest.php index 4977edcd..6eec1d56 100644 --- a/tests/HTMLPurifier/AttrDef/TextTest.php +++ b/tests/HTMLPurifier/AttrDef/TextTest.php @@ -11,7 +11,7 @@ class HTMLPurifier_AttrDef_TextTest extends HTMLPurifier_AttrDefHarness $this->def = new HTMLPurifier_AttrDef_Text(); $this->assertDef('This is spiffy text!'); - $this->assertDef(" Casual\tCDATA parse\ncheck. ", 'Casual CDATA parsecheck.'); + $this->assertDef(" Casual\tCDATA parse\ncheck. ", 'Casual CDATA parse check.'); } diff --git a/tests/HTMLPurifier/AttrDefTest.php b/tests/HTMLPurifier/AttrDefTest.php index 84889bb3..526048ca 100644 --- a/tests/HTMLPurifier/AttrDefTest.php +++ b/tests/HTMLPurifier/AttrDefTest.php @@ -12,8 +12,7 @@ class HTMLPurifier_AttrDefTest extends HTMLPurifier_Harness $this->assertIdentical('', $def->parseCDATA('')); $this->assertIdentical('', $def->parseCDATA("\t\n\r \t\t")); $this->assertIdentical('foo', $def->parseCDATA("\t\n\r foo\t\t")); - $this->assertIdentical('ignorelinefeeds', $def->parseCDATA("ignore\nline\nfeeds")); - $this->assertIdentical('translate to space', $def->parseCDATA("translate\rto\tspace")); + $this->assertIdentical('translate to space', $def->parseCDATA("translate\nto\tspace")); } diff --git a/tests/HTMLPurifier/EncoderTest.php b/tests/HTMLPurifier/EncoderTest.php index 6007bf6a..205fb8b6 100644 --- a/tests/HTMLPurifier/EncoderTest.php +++ b/tests/HTMLPurifier/EncoderTest.php @@ -9,6 +9,7 @@ class HTMLPurifier_EncoderTest extends HTMLPurifier_Harness function setUp() { $this->_entity_lookup = HTMLPurifier_EntityLookup::instance(); + parent::setUp(); } function assertCleanUTF8($string, $expect = null) { @@ -28,91 +29,86 @@ class HTMLPurifier_EncoderTest extends HTMLPurifier_Harness $this->assertCleanUTF8("\xDF\xFF", ''); // malformed UTF8 } - function test_convertToUTF8() { - $config = HTMLPurifier_Config::createDefault(); - $context = new HTMLPurifier_Context(); - + function test_convertToUTF8_noConvert() { // UTF-8 means that we don't touch it $this->assertIdentical( - HTMLPurifier_Encoder::convertToUTF8("\xF6", $config, $context), + HTMLPurifier_Encoder::convertToUTF8("\xF6", $this->config, $this->context), "\xF6" // this is invalid ); - $this->assertNoErrors(); - - $config = HTMLPurifier_Config::create(array( - 'Core.Encoding' => 'ISO-8859-1' - )); - - // Now it gets converted + } + + function test_convertToUTF8_iso8859_1() { + $this->config->set('Core', 'Encoding', 'ISO-8859-1'); $this->assertIdentical( - HTMLPurifier_Encoder::convertToUTF8("\xF6", $config, $context), + HTMLPurifier_Encoder::convertToUTF8("\xF6", $this->config, $this->context), "\xC3\xB6" ); - - $config = HTMLPurifier_Config::create(array( - 'Core.Encoding' => 'ISO-8859-1', - 'Test.ForceNoIconv' => true - )); + } + + function test_convertToUTF8_withoutIconv() { + $this->config->set('Core', 'Encoding', 'ISO-8859-1'); + $this->config->set('Test', 'ForceNoIconv', true); $this->assertIdentical( - HTMLPurifier_Encoder::convertToUTF8("\xF6", $config, $context), + HTMLPurifier_Encoder::convertToUTF8("\xF6", $this->config, $this->context), "\xC3\xB6" ); } - function test_convertFromUTF8() { - $config = HTMLPurifier_Config::createDefault(); - $context = new HTMLPurifier_Context(); - - // zhong-wen - $chinese = "\xE4\xB8\xAD\xE6\x96\x87 (Chinese)"; - + function getZhongWen() { + return "\xE4\xB8\xAD\xE6\x96\x87 (Chinese)"; + } + + function test_convertFromUTF8_utf8() { // UTF-8 means that we don't touch it $this->assertIdentical( - HTMLPurifier_Encoder::convertFromUTF8("\xC3\xB6", $config, $context), + HTMLPurifier_Encoder::convertFromUTF8("\xC3\xB6", $this->config, $this->context), "\xC3\xB6" ); - - $config = HTMLPurifier_Config::create(array( - 'Core.Encoding' => 'ISO-8859-1' - )); - - // Now it gets converted + } + + function test_convertFromUTF8_iso8859_1() { + $this->config->set('Core', 'Encoding', 'ISO-8859-1'); $this->assertIdentical( - HTMLPurifier_Encoder::convertFromUTF8("\xC3\xB6", $config, $context), + HTMLPurifier_Encoder::convertFromUTF8("\xC3\xB6", $this->config, $this->context), "\xF6" ); - - if (function_exists('iconv')) { - // iconv has it's own way - $this->assertIdentical( - HTMLPurifier_Encoder::convertFromUTF8($chinese, $config, $context), - " (Chinese)" - ); - } - + } + + function test_convertFromUTF8_iconvNoChars() { + if (!function_exists('iconv')) return; + $this->config->set('Core', 'Encoding', 'ISO-8859-1'); + $this->assertIdentical( + HTMLPurifier_Encoder::convertFromUTF8($this->getZhongWen(), $this->config, $this->context), + " (Chinese)" + ); + } + + function test_convertFromUTF8_phpNormal() { // Plain PHP implementation has slightly different behavior - $config = HTMLPurifier_Config::create(array( - 'Core.Encoding' => 'ISO-8859-1', - 'Test.ForceNoIconv' => true - )); + $this->config->set('Core', 'Encoding', 'ISO-8859-1'); + $this->config->set('Test', 'ForceNoIconv', true); $this->assertIdentical( - HTMLPurifier_Encoder::convertFromUTF8("\xC3\xB6", $config, $context), + HTMLPurifier_Encoder::convertFromUTF8("\xC3\xB6", $this->config, $this->context), "\xF6" ); - + } + + function test_convertFromUTF8_phpNoChars() { + $this->config->set('Core', 'Encoding', 'ISO-8859-1'); + $this->config->set('Test', 'ForceNoIconv', true); $this->assertIdentical( - HTMLPurifier_Encoder::convertFromUTF8($chinese, $config, $context), + HTMLPurifier_Encoder::convertFromUTF8($this->getZhongWen(), $this->config, $this->context), "?? (Chinese)" ); - + } + + function test_convertFromUTF8_withProtection() { // Preserve the characters! - $config = HTMLPurifier_Config::create(array( - 'Core.Encoding' => 'ISO-8859-1', - 'Core.EscapeNonASCIICharacters' => true - )); + $this->config->set('Core', 'Encoding', 'ISO-8859-1'); + $this->config->set('Core', 'EscapeNonASCIICharacters', true); $this->assertIdentical( - HTMLPurifier_Encoder::convertFromUTF8($chinese, $config, $context), + HTMLPurifier_Encoder::convertFromUTF8($this->getZhongWen(), $this->config, $this->context), "中文 (Chinese)" ); @@ -139,5 +135,39 @@ class HTMLPurifier_EncoderTest extends HTMLPurifier_Harness } + function assertASCIISupportCheck($enc, $ret) { + $test = HTMLPurifier_Encoder::testEncodingSupportsASCII($enc, true); + if ($test === false) return; + $this->assertIdentical( + HTMLPurifier_Encoder::testEncodingSupportsASCII($enc), + $ret + ); + $this->assertIdentical( + HTMLPurifier_Encoder::testEncodingSupportsASCII($enc, true), + $ret + ); + } + + function test_testEncodingSupportsASCII() { + $this->assertASCIISupportCheck('Shift_JIS', array("\xC2\xA5" => '\\', "\xE2\x80\xBE" => '~')); + $this->assertASCIISupportCheck('JOHAB', array("\xE2\x82\xA9" => '\\')); + $this->assertASCIISupportCheck('ISO-8859-1', array()); + $this->assertASCIISupportCheck('dontexist', array()); // canary + } + + function testShiftJIS() { + if (!function_exists('iconv')) return; + $this->config->set('Core', 'Encoding', 'Shift_JIS'); + // This actually looks like a Yen, but we're going to treat it differently + $this->assertIdentical( + HTMLPurifier_Encoder::convertFromUTF8('\\~', $this->config, $this->context), + '\\~' + ); + $this->assertIdentical( + HTMLPurifier_Encoder::convertToUTF8('\\~', $this->config, $this->context), + '\\~' + ); + } + } diff --git a/tests/HTMLPurifier/Harness.php b/tests/HTMLPurifier/Harness.php index 4af4384b..0e33e9b0 100644 --- a/tests/HTMLPurifier/Harness.php +++ b/tests/HTMLPurifier/Harness.php @@ -12,13 +12,24 @@ class HTMLPurifier_Harness extends UnitTestCase parent::UnitTestCase(); } - var $config, $context; + var $config, $context, $purifier; /** - * Generates easily accessible default config/context + * Generates easily accessible default config/context, as well as + * a convenience purifier for integration testing. */ function setUp() { list($this->config, $this->context) = $this->createCommon(); + $this->purifier = new HTMLPurifier(); + } + + /** + * Asserts a purification. Good for integration testing. + */ + function assertPurification($input, $expect = null) { + if ($expect === null) $expect = $input; + $result = $this->purifier->purify($input, $this->config); + $this->assertIdentical($expect, $result); } /** diff --git a/tests/HTMLPurifierTest.php b/tests/HTMLPurifierTest.php index 6a221b24..6800d521 100644 --- a/tests/HTMLPurifierTest.php +++ b/tests/HTMLPurifierTest.php @@ -2,30 +2,15 @@ require_once 'HTMLPurifier.php'; -// integration test - class HTMLPurifierTest extends HTMLPurifier_Harness { - var $purifier; - - function setUp() { - $this->purifier = new HTMLPurifier(); - } - - function assertPurification($input, $expect = null, $config = array()) { - if ($expect === null) $expect = $input; - $result = $this->purifier->purify($input, $config); - $this->assertIdentical($expect, $result); - } function testNull() { $this->assertPurification("Null byte\0", "Null byte"); } function testStrict() { - $config = HTMLPurifier_Config::createDefault(); - $config->set('HTML', 'Strict', true); - $this->purifier = new HTMLPurifier( $config ); // verbose syntax + $this->config->set('HTML', 'Strict', true); $this->assertPurification( 'Illegal underline', @@ -41,10 +26,8 @@ class HTMLPurifierTest extends HTMLPurifier_Harness function testDifferentAllowedElements() { - $this->purifier = new HTMLPurifier(array( - 'HTML.AllowedElements' => array('b', 'i', 'p', 'a'), - 'HTML.AllowedAttributes' => array('a.href', '*.id') - )); + $this->config->set('HTML', 'AllowedElements', array('b', 'i', 'p', 'a')); + $this->config->set('HTML', 'AllowedAttributes', array('a.href', '*.id')); $this->assertPurification( '

Par.

Paragraph

TextBold' @@ -59,7 +42,7 @@ class HTMLPurifierTest extends HTMLPurifier_Harness function testDisableURI() { - $this->purifier = new HTMLPurifier( array('Attr.DisableURI' => true) ); + $this->config->set('URI', 'Disable', true); $this->assertPurification( '', @@ -70,8 +53,6 @@ class HTMLPurifierTest extends HTMLPurifier_Harness function test_purifyArray() { - $this->purifier = new HTMLPurifier(); - $this->assertIdentical( $this->purifier->purifyArray( array('Good', 'Sketchy', 'foo' => '') @@ -83,23 +64,24 @@ class HTMLPurifierTest extends HTMLPurifier_Harness } - function testEnableAttrID() { - - $this->purifier = new HTMLPurifier(); + function testAttrIDDisabledByDefault() { $this->assertPurification( 'foobar', 'foobar' ); - $this->purifier = new HTMLPurifier(array('HTML.EnableAttrID' => true)); + } + + function testEnableAttrID() { + $this->config->set('Attr', 'EnableID', true); $this->assertPurification('foobar'); $this->assertPurification('Omigosh!'); - } function testScript() { - $this->purifier = new HTMLPurifier(array('HTML.Trusted' => true)); + $this->config->set('HTML', 'Trusted', true); + $ideal = ''; @@ -140,13 +122,29 @@ alert(""); } function testMakeAbsolute() { + $this->config->set('URI', 'Base', 'http://example.com/bar/baz.php'); + $this->config->set('URI', 'MakeAbsolute', true); $this->assertPurification( 'Foobar', - 'Foobar', - array( - 'URI.Base' => 'http://example.com/bar/baz.php', - 'URI.MakeAbsolute' => true - ) + 'Foobar' + ); + } + + function test_shiftJis() { + if (!function_exists('iconv')) return; + $this->config->set('Core', 'Encoding', 'Shift_JIS'); + $this->config->set('Core', 'EscapeNonASCIICharacters', true); + $this->assertPurification( + "111" + ); + } + + function test_shiftJisWorstCase() { + if (!function_exists('iconv')) return; + $this->config->set('Core', 'Encoding', 'Shift_JIS'); + $this->assertPurification( // Notice how Yen disappears + "111", + "111" ); }