1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-01-16 21:48:14 +01:00

[1.7.0] ChildDef_Custom's regex generation has been improved, removing several false positives

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1173 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2007-06-20 15:54:50 +00:00
parent cf7a50163c
commit 8bbb73e47d
4 changed files with 64 additions and 6 deletions

2
NEWS
View File

@ -46,6 +46,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
- StrictBlockquote child definition refrains from wrapping whitespace
in tags now.
- Bug resulting from tag transforms to non-allowed elements fixed
- ChildDef_Custom's regex generation has been improved, removing several
false positives
. Unit test for ElementDef created, ElementDef behavior modified to
be more flexible
. Added convenience functions for HTMLModule constructors

View File

@ -3,5 +3,5 @@ The first is Tidy, which enables HTML Purifier to both natively support
deprecated elements and also convert them to standards-compliant
alternatives. The second is the Advanced API, which enables users to
create new elements and attributes with ease. Keeping in line with a
commitment to high quality, there are also four esoteric bug-fixes and a
commitment to high quality, there are also five esoteric bug-fixes and a
plethora of subtle improvements that enhance the library.

View File

@ -38,8 +38,21 @@ class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
if ($raw{0} != '(') {
$raw = "($raw)";
}
$reg = str_replace(',', ',?', $raw);
$reg = preg_replace('/([#a-zA-Z0-9_.-]+)/', '(,?\\0)', $reg);
$el = '[#a-zA-Z0-9_.-]+';
$reg = $raw;
// COMPLICATED! AND MIGHT BE BUGGY! I HAVE NO CLUE WHAT I'M
// DOING! Seriously: if there's problems, please report them.
// setup all elements as parentheticals with leading commas
$reg = preg_replace("/$el/", '(,\\0)', $reg);
// remove commas when they were not solicited
$reg = preg_replace("/([^,(|]\(+),/", '\\1', $reg);
// remove all non-paranthetical commas: they are handled by first regex
$reg = preg_replace("/,\(/", '(', $reg);
$this->_pcre_regex = $reg;
}
function validateChildren($tokens_of_children, $config, &$context) {
@ -60,11 +73,11 @@ class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
$list_of_children .= $token->name . ',';
}
}
$list_of_children = rtrim($list_of_children, ',');
// add leading comma to deal with stray comma declarations
$list_of_children = ',' . rtrim($list_of_children, ',');
$okay =
preg_match(
'/^'.$this->_pcre_regex.'$/',
'/^,?'.$this->_pcre_regex.'$/',
$list_of_children
);

View File

@ -19,6 +19,49 @@ class HTMLPurifier_ChildDef_CustomTest extends HTMLPurifier_ChildDefHarness
}
function testNesting() {
$this->obj = new HTMLPurifier_ChildDef_Custom('(a,b,(c|d))+');
$this->assertResult('', false);
$this->assertResult('<a /><b /><c /><a /><b /><d />');
$this->assertResult('<a /><b /><c /><d />', false);
}
function testNestedEitherOr() {
$this->obj = new HTMLPurifier_ChildDef_Custom('b,(a|(c|d))+');
$this->assertResult('', false);
$this->assertResult('<b /><a /><c /><d />');
$this->assertResult('<b /><d /><a /><a />');
$this->assertResult('<b /><a />');
$this->assertResult('<acd />', false);
}
function testNestedQuantifier() {
$this->obj = new HTMLPurifier_ChildDef_Custom('(b,c+)*');
$this->assertResult('');
$this->assertResult('<b /><c />');
$this->assertResult('<b /><c /><c /><c />');
$this->assertResult('<b /><c /><b /><c />');
$this->assertResult('<b /><c /><b />', false);
}
function testEitherOr() {
$this->obj = new HTMLPurifier_ChildDef_Custom('a|b');
$this->assertResult('', false);
$this->assertResult('<a />');
$this->assertResult('<b />');
$this->assertResult('<a /><b />', false);
}
function testCommafication() {
$this->obj = new HTMLPurifier_ChildDef_Custom('a,b');
$this->assertResult('<a /><b />');
$this->assertResult('<ab />', false);
}
}
?>