1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-08-06 06:07:26 +02:00

Implement lang and xml:lang. Fixed a bunch of bugs too.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@162 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang
2006-08-05 01:50:13 +00:00
parent 1945ddca5c
commit 8a23710405
11 changed files with 312 additions and 7 deletions

View File

@@ -0,0 +1,83 @@
<?php
require_once 'HTMLPurifier/AttrDefHarness.php';
require_once 'HTMLPurifier/AttrDef/Lang.php';
class HTMLPurifier_AttrDef_LangTest extends HTMLPurifier_AttrDefHarness
{
function test() {
$this->def = new HTMLPurifier_AttrDef_Lang();
// basic good uses
$this->assertDef('en');
$this->assertDef('en-us');
$this->assertDef(' en ', 'en'); // trim
$this->assertDef('EN', 'en'); // case insensitivity
$this->assertDef('fr en', false); // multiple languages
$this->assertDef('%', false); // bad character
// test overlong language according to syntax
$this->assertDef('thisistoolongsoitgetscut', false);
// primary subtag rules
// I'm somewhat hesitant to allow x and i as primary language codes,
// because they usually are never used in real life. However,
// theoretically speaking, having them alone is permissble, so
// I'll be lenient. No XML parser is going to complain anyway.
$this->assertDef('x');
$this->assertDef('i');
// real world use-cases
$this->assertDef('x-klingon');
$this->assertDef('i-mingo');
// because the RFC only defines two and three letter primary codes,
// anything with a length of four or greater is invalid, despite
// the syntax stipulation of 1 to 8 characters. Because the RFC
// specifically states that this reservation is in order to allow
// for future versions to expand, the adoption of a new RFC will
// require these test cases to be rewritten, even if backwards-
// compatibility is largely retained (i.e. this is not forwards
// compatible)
$this->assertDef('four', false);
// for similar reasons, disallow any other one character language
$this->assertDef('f', false);
// second subtag rules
// one letter subtags prohibited until revision. This is, however,
// less volatile than the restrictions on the primary subtags.
// Also note that this test-case tests fix-behavior: chop
// off subtags until you get a valid language code.
$this->assertDef('en-a', 'en');
// 2-8 chars are permitted, but have special meaning that cannot
// be checked without maintaining country code lookup tables (for
// two characters) or special registration tables (for all above).
$this->assertDef('en-uk', true);
// further subtag rules: only syntactic constraints
$this->assertDef('en-us-edison');
$this->assertDef('en-us-toolonghaha', 'en-us');
$this->assertDef('en-us-a-silly-long-one');
// rfc 3066 stipulates that if a three letter and a two letter code
// are available, the two letter one MUST be used. Without a language
// code lookup table, we cannot implement this functionality.
// although the HTML protocol, technically speaking, allows you to
// omit language tags, this implicitly means that the parent element's
// language is the one applicable, which, in some cases, is incorrect.
// Thus, we allow und, only slightly defying the RFC's SHOULD NOT
// designation.
$this->assertDef('und');
// because attributes only allow one language, mul is allowed, complying
// with the RFC's SHOULD NOT designation.
$this->assertDef('mul');
}
}
?>

View File

@@ -0,0 +1,60 @@
<?php
require_once 'HTMLPurifier/Token.php';
require_once 'HTMLPurifier/AttrTransform/Lang.php';
class HTMLPurifier_AttrTransform_LangTest extends UnitTestCase
{
function test() {
$transform = new HTMLPurifier_AttrTransform_Lang();
$inputs = array();
$expect = array();
// leave non-lang'ed elements alone
$inputs[0] = new HTMLPurifier_Token_Start('b');
$expect[0] = $inputs[0];
// copy lang to xml:lang
$inputs[1] = new HTMLPurifier_Token_Start('span',
array('lang' => 'en'));
$expect[1] = new HTMLPurifier_Token_Start('span',
array('lang' => 'en',
'xml:lang' => 'en'));
// empty tags must work too, also test attribute preservation
$inputs[2] = new HTMLPurifier_Token_Empty('img',
array('src' => 'seine.png',
'lang' => 'fr'));
$expect[2] = new HTMLPurifier_Token_Empty('img',
array('src' => 'seine.png',
'lang' => 'fr',
'xml:lang' => 'fr'));
// copy xml:lang to lang
$inputs[3] = new HTMLPurifier_Token_Start('span',
array('xml:lang' => 'en'));
$expect[3] = new HTMLPurifier_Token_Start('span',
array('lang' => 'en',
'xml:lang' => 'en'));
// both set, override lang with xml:lang
$inputs[4] = new HTMLPurifier_Token_Start('span',
array('lang' => 'fr',
'xml:lang' => 'de'));
$expect[4] = new HTMLPurifier_Token_Start('span',
array('lang' => 'de',
'xml:lang' => 'de'));
foreach ($inputs as $i => $input) {
$result = $transform->transform($input);
$this->assertEqual($expect[$i], $result, "Test $i: %s");
}
}
}
?>

View File

@@ -59,6 +59,10 @@ class HTMLPurifier_Strategy_ValidateAttributesTest extends
$inputs[10] = '<acronym title="PHP: Hypertext Preprocessor">PHP</acronym>';
$expect[10] = $inputs[10];
// test lang (NEEDS CORRECTION!)
$inputs[11] = '<span lang="fr">La soupe.</span>';
$expect[11] = '<span lang="fr" xml:lang="fr">La soupe.</span>';
$this->assertStrategyWorks($strategy, $inputs, $expect, $config);
}