diff --git a/library/HTMLPurifier/AttrDef/Class.php b/library/HTMLPurifier/AttrDef/Class.php index c6cbe916..2dec50dd 100644 --- a/library/HTMLPurifier/AttrDef/Class.php +++ b/library/HTMLPurifier/AttrDef/Class.php @@ -6,11 +6,11 @@ require_once 'HTMLPurifier/Config.php'; class HTMLPurifier_AttrDef_Class extends HTMLPurifier_AttrDef { - function validate($raw_string, $config = null) { + function validate($string, $config = null) { if (!$config) $config = HTMLPurifier_Config::createDefault(); - $string = trim($raw_string); + $string = trim($string); // early abort: '' and '0' (strings that convert to false) are invalid if (!$string) return false; @@ -23,18 +23,20 @@ class HTMLPurifier_AttrDef_Class extends HTMLPurifier_AttrDef // and plus it would complicate optimization efforts (you never // see that anyway). $matches = array(); - $pattern = '/(?:\s|\A)'. - '((?:-?[A-Za-z_]|--)[A-Za-z_\-0-9]*)'. - '(?:\s|\z)/'; + $pattern = '/(?:(?<=\s)|\A)'. + '((?:--|-?[A-Za-z_])[A-Za-z_\-0-9]*)'. + '(?:(?=\s)|\z)/'; preg_match_all($pattern, $string, $matches); + if (empty($matches[1])) return false; + $new_string = ''; foreach ($matches[1] as $class_names) { $new_string .= $class_names . ' '; } $new_string = rtrim($new_string); - return ($new_string == $raw_string) ? true : $new_string ? $new_string : false; + return $new_string; } diff --git a/library/HTMLPurifier/AttrDef/Enum.php b/library/HTMLPurifier/AttrDef/Enum.php index c199a057..fa31e21a 100644 --- a/library/HTMLPurifier/AttrDef/Enum.php +++ b/library/HTMLPurifier/AttrDef/Enum.php @@ -14,16 +14,14 @@ class HTMLPurifier_AttrDef_Enum extends HTMLPurifier_AttrDef $this->case_sensitive = $case_sensitive; } - function validate($raw_string) { - $string = trim($raw_string); + function validate($string) { + $string = trim($string); if (!$this->case_sensitive) { $string = ctype_lower($string) ? $string : strtolower($string); } $result = isset($this->valid_values[$string]); - // if strings equal, return result, otherwise, return - // the new string on a good result and false on a bad one - return ($string == $raw_string) ? $result : $result ? $string : false; + return $result ? $string : false; } } diff --git a/library/HTMLPurifier/AttrDef/ID.php b/library/HTMLPurifier/AttrDef/ID.php index 2c90e279..4d777ee0 100644 --- a/library/HTMLPurifier/AttrDef/ID.php +++ b/library/HTMLPurifier/AttrDef/ID.php @@ -12,9 +12,9 @@ require_once 'HTMLPurifier/IDAccumulator.php'; class HTMLPurifier_AttrDef_ID extends HTMLPurifier_AttrDef { - function validate($old_id, $config, &$accumulator) { + function validate($id, $config, &$accumulator) { - $id = trim($old_id); // trim it first + $id = trim($id); // trim it first if ($id === '') return false; if (isset($accumulator->ids[$id])) return false; @@ -37,7 +37,7 @@ class HTMLPurifier_AttrDef_ID extends HTMLPurifier_AttrDef // if no change was made to the ID, return the result // else, return the new id if stripping whitespace made it // valid, or return false. - return ($id == $old_id) ? $result : ($result ? $id : false); + return $result ? $id : false; } diff --git a/library/HTMLPurifier/AttrDef/Text.php b/library/HTMLPurifier/AttrDef/Text.php index 5d9a3669..e489250f 100644 --- a/library/HTMLPurifier/AttrDef/Text.php +++ b/library/HTMLPurifier/AttrDef/Text.php @@ -6,8 +6,7 @@ class HTMLPurifier_AttrDef_Text extends HTMLPurifier_AttrDef { function validate($string) { - $new_string = $this->parseCDATA($string); - return ($string == $new_string) ? true : $new_string; + return $this->parseCDATA($string); } } diff --git a/library/HTMLPurifier/Config.php b/library/HTMLPurifier/Config.php index 2de62b3d..5ead680b 100644 --- a/library/HTMLPurifier/Config.php +++ b/library/HTMLPurifier/Config.php @@ -26,6 +26,15 @@ class HTMLPurifier_Config var $attr_class_blacklist = array(); var $attr_class_whitelist = array(); + // designate whether or not to allow numerals in language code subtags + // RFC 1766, the current standard referenced by XML, does not permit + // numbers, but, + // RFC 3066, the superseding best practice standard since January 2001, + // permits them. + // we allow numbers by default, although you generally never see them + // at all. + var $attr_lang_alpha = false; + function createDefault() { $config = new HTMLPurifier_Config(); return $config; diff --git a/tests/HTMLPurifier/AttrDef/ClassTest.php b/tests/HTMLPurifier/AttrDef/ClassTest.php index 6de74351..d2b53376 100644 --- a/tests/HTMLPurifier/AttrDef/ClassTest.php +++ b/tests/HTMLPurifier/AttrDef/ClassTest.php @@ -1,29 +1,30 @@ def = new HTMLPurifier_AttrDef_Class(); - $this->assertTrue($def->validate('valid')); - $this->assertTrue($def->validate('a0-_')); - $this->assertTrue($def->validate('-valid')); - $this->assertTrue($def->validate('_valid')); - $this->assertTrue($def->validate('double valid')); + $this->assertDef('valid'); + $this->assertDef('a0-_'); + $this->assertDef('-valid'); + $this->assertDef('_valid'); + $this->assertDef('double valid'); - $this->assertFalse($def->validate('0invalid')); - $this->assertFalse($def->validate('-0')); + $this->assertDef('0invalid', false); + $this->assertDef('-0', false); // test conditional replacement - $this->assertEqual('validassoc', $def->validate('validassoc 0invalid')); + $this->assertDef('validassoc 0invalid', 'validassoc'); // test whitespace leniency - $this->assertTrue('double valid', $def->validate(" double\nvalid\r")); + $this->assertDef(" double\nvalid\r", 'double valid'); } diff --git a/tests/HTMLPurifier/AttrDef/EnumTest.php b/tests/HTMLPurifier/AttrDef/EnumTest.php index b130c919..2842b214 100644 --- a/tests/HTMLPurifier/AttrDef/EnumTest.php +++ b/tests/HTMLPurifier/AttrDef/EnumTest.php @@ -1,33 +1,34 @@ def = new HTMLPurifier_AttrDef_Enum(array('one', 'two')); - $this->assertTrue($def->validate('one')); - $this->assertTrue($def->validate('ONE')); + $this->assertDef('one'); + $this->assertDef('ONE', 'one'); } function testCaseSensitive() { - $def = new HTMLPurifier_AttrDef_Enum(array('one', 'two'), true); + $this->def = new HTMLPurifier_AttrDef_Enum(array('one', 'two'), true); - $this->assertTrue($def->validate('one')); - $this->assertFalse($def->validate('ONE')); + $this->assertDef('one'); + $this->assertDef('ONE', false); } function testFixing() { - $def = new HTMLPurifier_AttrDef_Enum(array('one')); + $this->def = new HTMLPurifier_AttrDef_Enum(array('one')); - $this->assertEqual('one', $def->validate(' one ')); + $this->assertDef(' one ', 'one'); } diff --git a/tests/HTMLPurifier/AttrDef/IDTest.php b/tests/HTMLPurifier/AttrDef/IDTest.php index beabc53f..4279ee02 100644 --- a/tests/HTMLPurifier/AttrDef/IDTest.php +++ b/tests/HTMLPurifier/AttrDef/IDTest.php @@ -1,37 +1,33 @@ id_accumulator = new HTMLPurifier_IDAccumulator(); + $this->def = new HTMLPurifier_AttrDef_ID(); // valid ID names - $this->assertTrue($def->validate('alpha', $config, $acc)); - $this->assertTrue($def->validate('al_ha', $config, $acc)); - $this->assertTrue($def->validate('a0-:.', $config, $acc)); - $this->assertTrue($def->validate('a' , $config, $acc)); + $this->assertDef('alpha'); + $this->assertDef('al_ha'); + $this->assertDef('a0-:.'); + $this->assertDef('a'); // invalid ID names - $this->assertFalse($def->validate('assertFalse($def->validate('0123', $config, $acc)); - $this->assertFalse($def->validate('.asa', $config, $acc)); + $this->assertDef('assertDef('0123', false); + $this->assertDef('.asa', false); // test duplicate detection - $this->assertFalse($def->validate('a' , $config, $acc)); + $this->assertDef('a', false); // valid once whitespace stripped, but needs to be amended - $this->assertEqual('whee', $def->validate(' whee ', $config, $acc)); + $this->assertDef(' whee ', 'whee'); } diff --git a/tests/HTMLPurifier/AttrDef/TextTest.php b/tests/HTMLPurifier/AttrDef/TextTest.php index 8d8c2694..2aa4128d 100644 --- a/tests/HTMLPurifier/AttrDef/TextTest.php +++ b/tests/HTMLPurifier/AttrDef/TextTest.php @@ -1,17 +1,17 @@ def = new HTMLPurifier_AttrDef_Text(); - $this->assertTrue($def->validate('This is spiffy text!')); - $this->assertEqual('Casual CDATA parsecheck.', - $def->validate(" Casual\tCDATA parse\ncheck. ")); + $this->assertDef('This is spiffy text!'); + $this->assertDef(" Casual\tCDATA parse\ncheck. ", 'Casual CDATA parsecheck.'); } diff --git a/tests/HTMLPurifier/AttrDefHarness.php b/tests/HTMLPurifier/AttrDefHarness.php new file mode 100644 index 00000000..79e4390f --- /dev/null +++ b/tests/HTMLPurifier/AttrDefHarness.php @@ -0,0 +1,24 @@ +config) $this->config = HTMLPurifier_Config::createDefault(); + $result = $this->def->validate($string, $this->config, $this->id_accumulator); + if ($expect === true) { + $this->assertIdentical($string, $result); + } else { + $this->assertIdentical($expect, $result); + } + } + +} + +?> \ No newline at end of file diff --git a/tests/HTMLPurifier/Strategy/CoreTest.php b/tests/HTMLPurifier/Strategy/CoreTest.php index da29743c..0cbff05e 100644 --- a/tests/HTMLPurifier/Strategy/CoreTest.php +++ b/tests/HTMLPurifier/Strategy/CoreTest.php @@ -1,10 +1,10 @@ UnitTestCase(); // we can't use the DOM lexer since it does too much stuff