[2.1.5] [MFH] Fix Shift_JIS encoding wonkiness with yen symbols and whatnot, as well as other patches

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/php4@1791 48356398-32a2-884e-a903-53898d9a118a
2025-10-22 17:16:34 +02:00 · 2008-06-11 18:49:56 +00:00
parent 369a69d533
commit 450fc6649d
9 changed files with 274 additions and 132 deletions
--- a/library/HTMLPurifier/AttrDef.php
+++ b/library/HTMLPurifier/AttrDef.php
@@ -54,18 +54,15 @@ class HTMLPurifier_AttrDef
     * 
     * @warning This processing is inconsistent with XML's whitespace handling
     *          as specified by section 3.3.3 and referenced XHTML 1.0 section
-     *          4.7.  Compliant processing requires all line breaks normalized
+     *          4.7.  However, note that we are NOT necessarily
-     *          to "\n", so the fix is not as simple as fixing it in this
+     *          parsing XML, thus, this behavior may still be correct. We
-     *          function.  Trim and whitespace collapsing are supposed to only
+     *          assume that newlines have been normalized.
     *          occur in NMTOKENs.  However, note that we are NOT necessarily
     *          parsing XML, thus, this behavior may still be correct.
     * 
     * @public
     */
    function parseCDATA($string) {
        $string = trim($string);
-        $string = str_replace("\n", '', $string);
+        $string = str_replace(array("\n", "\t", "\r"), ' ', $string);
        $string = str_replace(array("\r", "\t"), ' ', $string);
        return $string;
    }
--- a/library/HTMLPurifier/AttrDef/CSS/FontFamily.php
+++ b/library/HTMLPurifier/AttrDef/CSS/FontFamily.php
@@ -19,10 +19,10 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
            'cursive' => true
        );
        $string = $this->parseCDATA($string);
        // assume that no font names contain commas in them
        $fonts = explode(',', $string);
        $final = '';
        $non_sgml = HTMLPurifier_Encoder::getNonSgmlCharacters();
        foreach($fonts as $font) {
            $font = trim($font);
            if ($font === '') continue;
@@ -38,11 +38,33 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
                $quote = $font[0];
                if ($font[$length - 1] !== $quote) continue;
                $font = substr($font, 1, $length - 2);
-                // double-backslash processing is buggy. Namely, it doesn't allow
+                
-                // fonts that contain an adjacent quote, backslash, or comma
+                $new_font = '';
-                $font = str_replace("\\$quote", $quote, $font); // de-escape quote
+                for ($i = 0, $c = strlen($font); $i < $c; $i++) {
-                $font = str_replace("\\\n", '', $font);       // de-escape newlines
+                    if ($font[$i] === '\\') {
-                $font = str_replace("\\\\", "\\", $font);       // de-escape double backslashes
+                        $i++;
                        if ($i >= $c) {
                            $new_font .= '\\';
                            break;
                        }
                        if (ctype_xdigit($font[$i])) {
                            $code = $font[$i];
                            for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
                                if (!ctype_xdigit($font[$i])) break;
                                $code .= $font[$i];
                            }
                            $char = HTMLPurifier_Encoder::unichr(hexdec($code));
                            if (isset($non_sgml[$char])) continue;
                            $new_font .= $char;
                            if ($i < $c && trim($font[$i]) !== '') $i--;
                            continue;
                        }
                        if ($font[$i] === "\n") continue;
                    }
                    $new_font .= $font[$i];
                }
                $font = $new_font;
            }
            // $font is a pure representation of the font name
--- a/library/HTMLPurifier/Encoder.php
+++ b/library/HTMLPurifier/Encoder.php
@@ -67,6 +67,25 @@ class HTMLPurifier_Encoder
     */
    function muteErrorHandler() {}
    /**
     * Returns a lookup of UTF-8 character byte sequences that are non-SGML.
     */
    function getNonSgmlCharacters() {
        static $nonSgmlCharacters;
        if (empty($nonSgmlCharacters)) {
            for ($i = 0; $i <= 31; $i++) {
                // non-SGML ASCII chars
                // save \r, \t and \n
                if ($i == 9 || $i == 13 || $i == 10) continue;
                $nonSgmlCharacters[chr($i)] = '';
            }
            for ($i = 127; $i <= 159; $i++) {
                $nonSgmlCharacters[HTMLPurifier_Encoder::unichr($i)] = '';
            }
        }
        return $nonSgmlCharacters;
    }
    /**
     * Cleans a UTF-8 string for well-formedness and SGML validity
     * 
@@ -95,18 +114,7 @@ class HTMLPurifier_Encoder
     */
    function cleanUTF8($str, $force_php = false) {
-        static $non_sgml_chars = array();
+        $non_sgml = HTMLPurifier_Encoder::getNonSgmlCharacters();
        if (empty($non_sgml_chars)) {
            for ($i = 0; $i <= 31; $i++) {
                // non-SGML ASCII chars
                // save \r, \t and \n
                if ($i == 9 || $i == 13 || $i == 10) continue;
                $non_sgml_chars[chr($i)] = '';
            }
            for ($i = 127; $i <= 159; $i++) {
                $non_sgml_chars[HTMLPurifier_Encoder::unichr($i)] = '';
            }
        }
        static $iconv = null;
        if ($iconv === null) $iconv = function_exists('iconv');
@@ -115,7 +123,7 @@ class HTMLPurifier_Encoder
        // This is an optimization: if the string is already valid UTF-8, no
        // need to do iconv/php stuff. 99% of the time, this will be the case.
        if (preg_match('/^.{1}/us', $str)) {
-            return strtr($str, $non_sgml_chars);
+            return strtr($str, $non_sgml);
        }
        if ($iconv && !$force_php) {
@@ -123,7 +131,7 @@ class HTMLPurifier_Encoder
            set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
            $str = iconv('UTF-8', 'UTF-8//IGNORE', $str);
            restore_error_handler();
-            return strtr($str, $non_sgml_chars);
+            return strtr($str, $non_sgml);
        }
        $mState = 0; // cached expected number of octets after the current octet
@@ -327,14 +335,23 @@ class HTMLPurifier_Encoder
     * @static
     */
    function convertToUTF8($str, $config, &$context) {
        static $iconv = null;
        if ($iconv === null) $iconv = function_exists('iconv');
        $encoding = $config->get('Core', 'Encoding');
        if ($encoding === 'utf-8') return $str;
        static $iconv = null;
        if ($iconv === null) $iconv = function_exists('iconv');
        set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
        if ($iconv && !$config->get('Test', 'ForceNoIconv')) {
-            return @iconv($encoding, 'utf-8//IGNORE', $str);
+            $str = iconv($encoding, 'utf-8//IGNORE', $str);
            // If the string is bjorked by Shift_JIS or a similar encoding
            // that doesn't support all of ASCII, convert the naughty
            // characters to their true byte-wise ASCII/UTF-8 equivalents.
            $str = strtr($str, HTMLPurifier_Encoder::testEncodingSupportsASCII($encoding));
            restore_error_handler();
            return $str;
        } elseif ($encoding === 'iso-8859-1') {
-            return @utf8_encode($str);
+            $str = utf8_encode($str);
            restore_error_handler();
            return $str;
        }
        trigger_error('Encoding not supported', E_USER_ERROR);
    }
@@ -346,17 +363,31 @@ class HTMLPurifier_Encoder
     *       characters being omitted.
     */
    function convertFromUTF8($str, $config, &$context) {
        static $iconv = null;
        if ($iconv === null) $iconv = function_exists('iconv');
        $encoding = $config->get('Core', 'Encoding');
        if ($encoding === 'utf-8') return $str;
-        if ($config->get('Core', 'EscapeNonASCIICharacters')) {
+        static $iconv = null;
        if ($iconv === null) $iconv = function_exists('iconv');
        if ($escape = $config->get('Core', 'EscapeNonASCIICharacters')) {
            $str = HTMLPurifier_Encoder::convertToASCIIDumbLossless($str);
        }
        set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
        if ($iconv && !$config->get('Test', 'ForceNoIconv')) {
-            return @iconv('utf-8', $encoding . '//IGNORE', $str);
+            // Undo our previous fix in convertToUTF8, otherwise iconv will barf
            $ascii_fix = HTMLPurifier_Encoder::testEncodingSupportsASCII($encoding);
            if (!$escape && !empty($ascii_fix)) {
                $clear_fix = array();
                foreach ($ascii_fix as $utf8 => $native) $clear_fix[$utf8] = '';
                $str = strtr($str, $clear_fix);
            }
            $str = strtr($str, array_flip($ascii_fix));
            // Normal stuff
            $str = iconv('utf-8', $encoding . '//IGNORE', $str);
            restore_error_handler();
            return $str;
        } elseif ($encoding === 'iso-8859-1') {
-            return @utf8_decode($str);
+            $str = utf8_decode($str);
            restore_error_handler();
            return $str;
        }
        trigger_error('Encoding not supported', E_USER_ERROR);
    }
@@ -409,6 +440,47 @@ class HTMLPurifier_Encoder
        return $result;
    }
    /**
     * This expensive function tests whether or not a given character
     * encoding supports ASCII. 7/8-bit encodings like Shift_JIS will
     * fail this test, and require special processing. Variable width
     * encodings shouldn't ever fail.
     * 
     * @param string $encoding Encoding name to test, as per iconv format
     * @param bool $bypass Whether or not to bypass the precompiled arrays.
     * @return Array of UTF-8 characters to their corresponding ASCII,
     *      which can be used to "undo" any overzealous iconv action.
     */
    function testEncodingSupportsASCII($encoding, $bypass = false) {
        static $encodings = array();
        if (!$bypass) {
            if (isset($encodings[$encoding])) return $encodings[$encoding];
            $lenc = strtolower($encoding);
            switch ($lenc) {
                case 'shift_jis':
                    return array("\xC2\xA5" => '\\', "\xE2\x80\xBE" => '~');
                case 'johab':
                    return array("\xE2\x82\xA9" => '\\');
            }
            if (strpos($lenc, 'iso-8859-') === 0) return array();
        }
        $ret = array();
        set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
        if (iconv('UTF-8', $encoding, 'a') === false) return false;
        for ($i = 0x20; $i <= 0x7E; $i++) { // all printable ASCII chars
            $c = chr($i);
            if (iconv('UTF-8', "$encoding//IGNORE", $c) === '') {
                // Reverse engineer: what's the UTF-8 equiv of this byte
                // sequence? This assumes that there's no variable width
                // encoding that doesn't support ASCII.
                $ret[iconv($encoding, 'UTF-8//IGNORE', $c)] = $c;
            }
        }
        restore_error_handler();
        $encodings[$encoding] = $ret;
        return $ret;
    }
 }
--- a/tests/HTMLPurifier/AttrDef/CSS/FontFamilyTest.php
+++ b/tests/HTMLPurifier/AttrDef/CSS/FontFamilyTest.php
@@ -21,7 +21,20 @@ class HTMLPurifier_AttrDef_CSS_FontFamilyTest extends HTMLPurifier_AttrDefHarnes
        $this->assertDef($d = "'\xE5\xAE\x8B\xE4\xBD\x93'");
        $this->assertDef("\xE5\xAE\x8B\xE4\xBD\x93", $d);
        $this->assertDef("'\\','f'", "'\\\\', f");
-        
+        $this->assertDef("'\\01'", "''");
        $this->assertDef("'\\20'", "' '");
        $this->assertDef("\\0020", "'\\\\0020'");
        $this->assertDef("'\\000045'", "E");
        $this->assertDef("','", false);
        $this->assertDef("',' foobar','", "' foobar'");
        $this->assertDef("'\\27'", "'\''");
        $this->assertDef('"\\22"', "'\"'");
        $this->assertDef('"\\""', "'\"'");
        $this->assertDef('"\'"', "'\\''");
        $this->assertDef("'\\000045a'", "Ea");
        $this->assertDef("'\\00045 a'", "Ea");
        $this->assertDef("'\\00045  a'", "'E a'");
        $this->assertDef("'\\\nf'", "f");
    }
 }
--- a/tests/HTMLPurifier/AttrDef/TextTest.php
+++ b/tests/HTMLPurifier/AttrDef/TextTest.php
@@ -11,7 +11,7 @@ class HTMLPurifier_AttrDef_TextTest extends HTMLPurifier_AttrDefHarness
        $this->def = new HTMLPurifier_AttrDef_Text();
        $this->assertDef('This is spiffy text!');
-        $this->assertDef(" Casual\tCDATA parse\ncheck. ", 'Casual CDATA parsecheck.');
+        $this->assertDef(" Casual\tCDATA parse\ncheck. ", 'Casual CDATA parse check.');
    }
--- a/tests/HTMLPurifier/AttrDefTest.php
+++ b/tests/HTMLPurifier/AttrDefTest.php
@@ -12,8 +12,7 @@ class HTMLPurifier_AttrDefTest extends HTMLPurifier_Harness
        $this->assertIdentical('', $def->parseCDATA(''));
        $this->assertIdentical('', $def->parseCDATA("\t\n\r \t\t"));
        $this->assertIdentical('foo', $def->parseCDATA("\t\n\r foo\t\t"));
-        $this->assertIdentical('ignorelinefeeds', $def->parseCDATA("ignore\nline\nfeeds"));
+        $this->assertIdentical('translate to space', $def->parseCDATA("translate\nto\tspace"));
        $this->assertIdentical('translate to space', $def->parseCDATA("translate\rto\tspace"));
    }
--- a/tests/HTMLPurifier/EncoderTest.php
+++ b/tests/HTMLPurifier/EncoderTest.php
@@ -9,6 +9,7 @@ class HTMLPurifier_EncoderTest extends HTMLPurifier_Harness
    function setUp() {
        $this->_entity_lookup = HTMLPurifier_EntityLookup::instance();
        parent::setUp();
    }
    function assertCleanUTF8($string, $expect = null) {
@@ -28,91 +29,86 @@ class HTMLPurifier_EncoderTest extends HTMLPurifier_Harness
        $this->assertCleanUTF8("\xDF\xFF", ''); // malformed UTF8
    }
-    function test_convertToUTF8() {
+    function test_convertToUTF8_noConvert() {
        $config = HTMLPurifier_Config::createDefault();
        $context = new HTMLPurifier_Context();
        // UTF-8 means that we don't touch it
        $this->assertIdentical(
-            HTMLPurifier_Encoder::convertToUTF8("\xF6", $config, $context),
+            HTMLPurifier_Encoder::convertToUTF8("\xF6", $this->config, $this->context),
            "\xF6" // this is invalid
        );
-        $this->assertNoErrors();
+    }
-        
+    
-        $config = HTMLPurifier_Config::create(array(
+    function test_convertToUTF8_iso8859_1() {
-            'Core.Encoding' => 'ISO-8859-1'
+        $this->config->set('Core', 'Encoding', 'ISO-8859-1');
        ));
        // Now it gets converted
        $this->assertIdentical(
-            HTMLPurifier_Encoder::convertToUTF8("\xF6", $config, $context),
+            HTMLPurifier_Encoder::convertToUTF8("\xF6", $this->config, $this->context),
            "\xC3\xB6"
        );
-        
+    }
-        $config = HTMLPurifier_Config::create(array(
+    
-            'Core.Encoding' => 'ISO-8859-1',
+    function test_convertToUTF8_withoutIconv() {
-            'Test.ForceNoIconv' => true
+        $this->config->set('Core', 'Encoding', 'ISO-8859-1');
-        ));
+        $this->config->set('Test', 'ForceNoIconv', true);
        $this->assertIdentical(
-            HTMLPurifier_Encoder::convertToUTF8("\xF6", $config, $context),
+            HTMLPurifier_Encoder::convertToUTF8("\xF6", $this->config, $this->context),
            "\xC3\xB6"
        );
    }
-    function test_convertFromUTF8() {
+    function getZhongWen() {
-        $config = HTMLPurifier_Config::createDefault();
+        return "\xE4\xB8\xAD\xE6\x96\x87 (Chinese)";
-        $context = new HTMLPurifier_Context();
+    }
-        
+    
-        // zhong-wen
+    function test_convertFromUTF8_utf8() {
        $chinese = "\xE4\xB8\xAD\xE6\x96\x87 (Chinese)";
        // UTF-8 means that we don't touch it
        $this->assertIdentical(
-            HTMLPurifier_Encoder::convertFromUTF8("\xC3\xB6", $config, $context),
+            HTMLPurifier_Encoder::convertFromUTF8("\xC3\xB6", $this->config, $this->context),
            "\xC3\xB6"
        );
-        
+    }
-        $config = HTMLPurifier_Config::create(array(
+    
-            'Core.Encoding' => 'ISO-8859-1'
+    function test_convertFromUTF8_iso8859_1() {
-        ));
+        $this->config->set('Core', 'Encoding', 'ISO-8859-1');
        // Now it gets converted
        $this->assertIdentical(
-            HTMLPurifier_Encoder::convertFromUTF8("\xC3\xB6", $config, $context),
+            HTMLPurifier_Encoder::convertFromUTF8("\xC3\xB6", $this->config, $this->context),
            "\xF6"
        );
-        
+    }
-        if (function_exists('iconv')) {
+    
-            // iconv has it's own way
+    function test_convertFromUTF8_iconvNoChars() {
-            $this->assertIdentical(
+        if (!function_exists('iconv')) return;
-                HTMLPurifier_Encoder::convertFromUTF8($chinese, $config, $context),
+        $this->config->set('Core', 'Encoding', 'ISO-8859-1');
-                " (Chinese)"
+        $this->assertIdentical(
-            );
+            HTMLPurifier_Encoder::convertFromUTF8($this->getZhongWen(), $this->config, $this->context),
-        }
+            " (Chinese)"
-        
+        );
    }
    function test_convertFromUTF8_phpNormal() {
        // Plain PHP implementation has slightly different behavior
-        $config = HTMLPurifier_Config::create(array(
+        $this->config->set('Core', 'Encoding', 'ISO-8859-1');
-            'Core.Encoding' => 'ISO-8859-1',
+        $this->config->set('Test', 'ForceNoIconv', true);
            'Test.ForceNoIconv' => true
        ));
        $this->assertIdentical(
-            HTMLPurifier_Encoder::convertFromUTF8("\xC3\xB6", $config, $context),
+            HTMLPurifier_Encoder::convertFromUTF8("\xC3\xB6", $this->config, $this->context),
            "\xF6"
        );
-        
+    }
    function test_convertFromUTF8_phpNoChars() {
        $this->config->set('Core', 'Encoding', 'ISO-8859-1');
        $this->config->set('Test', 'ForceNoIconv', true);
        $this->assertIdentical(
-            HTMLPurifier_Encoder::convertFromUTF8($chinese, $config, $context),
+            HTMLPurifier_Encoder::convertFromUTF8($this->getZhongWen(), $this->config, $this->context),
            "?? (Chinese)"
        );
-        
+    }
    function test_convertFromUTF8_withProtection() {
        // Preserve the characters!
-        $config = HTMLPurifier_Config::create(array(
+        $this->config->set('Core', 'Encoding', 'ISO-8859-1');
-            'Core.Encoding' => 'ISO-8859-1',
+        $this->config->set('Core', 'EscapeNonASCIICharacters', true);
            'Core.EscapeNonASCIICharacters' => true
        ));
        $this->assertIdentical(
-            HTMLPurifier_Encoder::convertFromUTF8($chinese, $config, $context),
+            HTMLPurifier_Encoder::convertFromUTF8($this->getZhongWen(), $this->config, $this->context),
            "&#20013;&#25991; (Chinese)"
        );
@@ -139,5 +135,39 @@ class HTMLPurifier_EncoderTest extends HTMLPurifier_Harness
    }
    function assertASCIISupportCheck($enc, $ret) {
        $test = HTMLPurifier_Encoder::testEncodingSupportsASCII($enc, true);
        if ($test === false) return;
        $this->assertIdentical(
            HTMLPurifier_Encoder::testEncodingSupportsASCII($enc),
            $ret
        );
        $this->assertIdentical(
            HTMLPurifier_Encoder::testEncodingSupportsASCII($enc, true),
            $ret
        );
    }
    function test_testEncodingSupportsASCII() {
        $this->assertASCIISupportCheck('Shift_JIS', array("\xC2\xA5" => '\\', "\xE2\x80\xBE" => '~'));
        $this->assertASCIISupportCheck('JOHAB', array("\xE2\x82\xA9" => '\\'));
        $this->assertASCIISupportCheck('ISO-8859-1', array());
        $this->assertASCIISupportCheck('dontexist', array()); // canary
    }
    function testShiftJIS() {
        if (!function_exists('iconv')) return;
        $this->config->set('Core', 'Encoding', 'Shift_JIS');
        // This actually looks like a Yen, but we're going to treat it differently
        $this->assertIdentical(
            HTMLPurifier_Encoder::convertFromUTF8('\\~', $this->config, $this->context),
            '\\~'
        );
        $this->assertIdentical(
            HTMLPurifier_Encoder::convertToUTF8('\\~', $this->config, $this->context),
            '\\~'
        );
    }
 }
--- a/tests/HTMLPurifier/Harness.php
+++ b/tests/HTMLPurifier/Harness.php
@@ -12,13 +12,24 @@ class HTMLPurifier_Harness extends UnitTestCase
        parent::UnitTestCase();
    }
-    var $config, $context;
+    var $config, $context, $purifier;
    /**
-     * Generates easily accessible default config/context
+     * Generates easily accessible default config/context, as well as
     * a convenience purifier for integration testing.
     */
    function setUp() {
        list($this->config, $this->context) = $this->createCommon();
        $this->purifier = new HTMLPurifier();
    }
    /**
     * Asserts a purification. Good for integration testing.
     */
    function assertPurification($input, $expect = null) {
        if ($expect === null) $expect = $input;
        $result = $this->purifier->purify($input, $this->config);
        $this->assertIdentical($expect, $result);
    }
    /**
--- a/tests/HTMLPurifierTest.php
+++ b/tests/HTMLPurifierTest.php
@@ -2,30 +2,15 @@
 require_once 'HTMLPurifier.php';
 // integration test
 class HTMLPurifierTest extends HTMLPurifier_Harness
 {
    var $purifier;
    function setUp() {
        $this->purifier = new HTMLPurifier();
    }
    function assertPurification($input, $expect = null, $config = array()) {
        if ($expect === null) $expect = $input;
        $result = $this->purifier->purify($input, $config);
        $this->assertIdentical($expect, $result);
    }
    function testNull() {
        $this->assertPurification("Null byte\0", "Null byte");
    }
    function testStrict() {
-        $config = HTMLPurifier_Config::createDefault();
+        $this->config->set('HTML', 'Strict', true);
        $config->set('HTML', 'Strict', true);
        $this->purifier = new HTMLPurifier( $config ); // verbose syntax
        $this->assertPurification(
            '<u>Illegal underline</u>',
@@ -41,10 +26,8 @@ class HTMLPurifierTest extends HTMLPurifier_Harness
    function testDifferentAllowedElements() {
-        $this->purifier = new HTMLPurifier(array(
+        $this->config->set('HTML', 'AllowedElements', array('b', 'i', 'p', 'a'));
-            'HTML.AllowedElements' => array('b', 'i', 'p', 'a'),
+        $this->config->set('HTML', 'AllowedAttributes', array('a.href', '*.id'));
            'HTML.AllowedAttributes' => array('a.href', '*.id')
        ));
        $this->assertPurification(
            '<p>Par.</p><p>Para<a href="http://google.com/">gr</a>aph</p>Text<b>Bol<i>d</i></b>'
@@ -59,7 +42,7 @@ class HTMLPurifierTest extends HTMLPurifier_Harness
    function testDisableURI() {
-        $this->purifier = new HTMLPurifier( array('Attr.DisableURI' => true) );
+        $this->config->set('URI', 'Disable', true);
        $this->assertPurification(
            '<img src="foobar"/>',
@@ -70,8 +53,6 @@ class HTMLPurifierTest extends HTMLPurifier_Harness
    function test_purifyArray() {
        $this->purifier = new HTMLPurifier();
        $this->assertIdentical(
            $this->purifier->purifyArray(
                array('Good', '<b>Sketchy', 'foo' => '<script>bad</script>')
@@ -83,23 +64,24 @@ class HTMLPurifierTest extends HTMLPurifier_Harness
    }
-    function testEnableAttrID() {
+    function testAttrIDDisabledByDefault() {
        $this->purifier = new HTMLPurifier();
        $this->assertPurification(
            '<span id="moon">foobar</span>',
            '<span>foobar</span>'
        );
-        $this->purifier = new HTMLPurifier(array('HTML.EnableAttrID' => true));
+    }
    function testEnableAttrID() {
        $this->config->set('Attr', 'EnableID', true);
        $this->assertPurification('<span id="moon">foobar</span>');
        $this->assertPurification('<img id="folly" src="folly.png" alt="Omigosh!" />');
    }
    function testScript() {
-        $this->purifier = new HTMLPurifier(array('HTML.Trusted' => true));
+        $this->config->set('HTML', 'Trusted', true);
        $ideal = '<script type="text/javascript"><!--//--><![CDATA[//><!--
 alert("<This is compatible with XHTML>");
 //--><!]]></script>';
@@ -140,13 +122,29 @@ alert("<This is compatible with XHTML>");
    }
    function testMakeAbsolute() {
        $this->config->set('URI', 'Base', 'http://example.com/bar/baz.php');
        $this->config->set('URI', 'MakeAbsolute', true);
        $this->assertPurification(
            '<a href="foo.txt">Foobar</a>',
-            '<a href="http://example.com/bar/foo.txt">Foobar</a>',
+            '<a href="http://example.com/bar/foo.txt">Foobar</a>'
-            array(
+        );
-                'URI.Base' => 'http://example.com/bar/baz.php',
+    }
-                'URI.MakeAbsolute' => true
+    
-            )
+    function test_shiftJis() {
        if (!function_exists('iconv')) return;
        $this->config->set('Core', 'Encoding', 'Shift_JIS');
        $this->config->set('Core', 'EscapeNonASCIICharacters', true);
        $this->assertPurification(
            "<b style=\"font-family:'&#165;';\">111</b>"
        );
    }
    function test_shiftJisWorstCase() {
        if (!function_exists('iconv')) return;
        $this->config->set('Core', 'Encoding', 'Shift_JIS');
        $this->assertPurification( // Notice how Yen disappears
            "<b style=\"font-family:'&#165;';\">111</b>",
            "<b style=\"font-family:'';\">111</b>"
        );
    }