Implement lang and xml:lang. Fixed a bunch of bugs too.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@162 48356398-32a2-884e-a903-53898d9a118a
2025-10-17 06:56:06 +02:00 · 2006-08-05 01:50:13 +00:00
parent 1945ddca5c
commit 8a23710405
11 changed files with 312 additions and 7 deletions
--- a/tests/HTMLPurifier/AttrDef/LangTest.php
+++ b/tests/HTMLPurifier/AttrDef/LangTest.php
@@ -0,0 +1,83 @@
+<?php
+
+require_once 'HTMLPurifier/AttrDefHarness.php';
+require_once 'HTMLPurifier/AttrDef/Lang.php';
+
+class HTMLPurifier_AttrDef_LangTest extends HTMLPurifier_AttrDefHarness
+{
+    
+    function test() {
+        
+        $this->def = new HTMLPurifier_AttrDef_Lang();
+        
+        // basic good uses
+        $this->assertDef('en');
+        $this->assertDef('en-us');
+        
+        $this->assertDef(' en ', 'en'); // trim
+        $this->assertDef('EN', 'en'); // case insensitivity
+        
+        $this->assertDef('fr en', false); // multiple languages
+        $this->assertDef('%', false); // bad character
+        
+        // test overlong language according to syntax
+        $this->assertDef('thisistoolongsoitgetscut', false);
+        
+        // primary subtag rules
+            // I'm somewhat hesitant to allow x and i as primary language codes,
+            // because they usually are never used in real life. However,
+            // theoretically speaking, having them alone is permissble, so
+            // I'll be lenient. No XML parser is going to complain anyway.
+        $this->assertDef('x');
+        $this->assertDef('i');
+            // real world use-cases
+        $this->assertDef('x-klingon');
+        $this->assertDef('i-mingo');
+            // because the RFC only defines two and three letter primary codes,
+            // anything with a length of four or greater is invalid, despite
+            // the syntax stipulation of 1 to 8 characters. Because the RFC
+            // specifically states that this reservation is in order to allow
+            // for future versions to expand, the adoption of a new RFC will
+            // require these test cases to be rewritten, even if backwards-
+            // compatibility is largely retained (i.e. this is not forwards
+            // compatible)
+        $this->assertDef('four', false);
+            // for similar reasons, disallow any other one character language
+        $this->assertDef('f', false);
+        
+        // second subtag rules
+            // one letter subtags prohibited until revision. This is, however,
+            // less volatile than the restrictions on the primary subtags.
+            // Also note that this test-case tests fix-behavior: chop
+            // off subtags until you get a valid language code.
+        $this->assertDef('en-a', 'en');
+            // 2-8 chars are permitted, but have special meaning that cannot
+            // be checked without maintaining country code lookup tables (for
+            // two characters) or special registration tables (for all above).
+        $this->assertDef('en-uk', true);
+        
+        // further subtag rules: only syntactic constraints
+        $this->assertDef('en-us-edison');
+        $this->assertDef('en-us-toolonghaha', 'en-us');
+        $this->assertDef('en-us-a-silly-long-one');
+        
+        // rfc 3066 stipulates that if a three letter and a two letter code
+        // are available, the two letter one MUST be used. Without a language
+        // code lookup table, we cannot implement this functionality.
+        
+        // although the HTML protocol, technically speaking, allows you to
+        // omit language tags, this implicitly means that the parent element's
+        // language is the one applicable, which, in some cases, is incorrect.
+        // Thus, we allow und, only slightly defying the RFC's SHOULD NOT
+        // designation.
+        $this->assertDef('und');
+        
+        // because attributes only allow one language, mul is allowed, complying
+        // with the RFC's SHOULD NOT designation.
+        $this->assertDef('mul');
+        
+    }
+    
+}
+
+?>
--- a/tests/HTMLPurifier/AttrTransform/LangTest.php
+++ b/tests/HTMLPurifier/AttrTransform/LangTest.php
@@ -0,0 +1,60 @@
+<?php
+
+require_once 'HTMLPurifier/Token.php';
+require_once 'HTMLPurifier/AttrTransform/Lang.php';
+
+class HTMLPurifier_AttrTransform_LangTest extends UnitTestCase
+{
+    
+    function test() {
+        
+        $transform = new HTMLPurifier_AttrTransform_Lang();
+        
+        $inputs = array();
+        $expect = array();
+        
+        // leave non-lang'ed elements alone
+        $inputs[0] = new HTMLPurifier_Token_Start('b');
+        $expect[0] = $inputs[0];
+        
+        // copy lang to xml:lang
+        $inputs[1] = new HTMLPurifier_Token_Start('span',
+                        array('lang' => 'en'));
+        $expect[1] = new HTMLPurifier_Token_Start('span',
+                        array('lang' => 'en',
+                              'xml:lang' => 'en'));
+        
+        // empty tags must work too, also test attribute preservation
+        $inputs[2] = new HTMLPurifier_Token_Empty('img',
+                        array('src' => 'seine.png',
+                              'lang' => 'fr'));
+        $expect[2] = new HTMLPurifier_Token_Empty('img',
+                        array('src' => 'seine.png',
+                              'lang' => 'fr',
+                              'xml:lang' => 'fr'));
+        
+        // copy xml:lang to lang
+        $inputs[3] = new HTMLPurifier_Token_Start('span',
+                        array('xml:lang' => 'en'));
+        $expect[3] = new HTMLPurifier_Token_Start('span',
+                        array('lang' => 'en',
+                              'xml:lang' => 'en'));
+        
+        // both set, override lang with xml:lang
+        $inputs[4] = new HTMLPurifier_Token_Start('span',
+                        array('lang' => 'fr',
+                              'xml:lang' => 'de'));
+        $expect[4] = new HTMLPurifier_Token_Start('span',
+                        array('lang' => 'de',
+                              'xml:lang' => 'de'));
+        
+        foreach ($inputs as $i => $input) {
+            $result = $transform->transform($input);
+            $this->assertEqual($expect[$i], $result, "Test $i: %s");
+        }
+        
+    }
+    
+}
+
+?>
--- a/tests/HTMLPurifier/Strategy/ValidateAttributesTest.php
+++ b/tests/HTMLPurifier/Strategy/ValidateAttributesTest.php
@@ -59,6 +59,10 @@ class HTMLPurifier_Strategy_ValidateAttributesTest extends
        $inputs[10] = '<acronym title="PHP: Hypertext Preprocessor">PHP</acronym>';
        $expect[10] = $inputs[10];
        
+        // test lang (NEEDS CORRECTION!)
+        $inputs[11] = '<span lang="fr">La soupe.</span>';
+        $expect[11] = '<span lang="fr" xml:lang="fr">La soupe.</span>';
+        
        $this->assertStrategyWorks($strategy, $inputs, $expect, $config);
        
    }