From 7a4c7b3777c73d81a83cfd3aa1429cfeacfd02b5 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Fri, 17 Nov 2006 01:05:41 +0000 Subject: [PATCH] [1.2.0] [BC] ID attributes now disabled by default. New directives: + %HTML.EnableAttrID - restores old behavior by allowing IDs + %Attr.IDPrefix - %Attr.IDBlacklist alternative that munges all user IDs so that they don't collide with your IDs + %Attr.IDPrefixLocal - Same as above, but for when there are multiple instances of user content on the page + Profuse documentation on how to use these available in id.txt git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@526 48356398-32a2-884e-a903-53898d9a118a --- NEWS | 8 ++ TODO | 2 + docs/id.txt | 124 ++++++++++++++++++ library/HTMLPurifier/AttrDef/ID.php | 34 +++++ library/HTMLPurifier/HTMLDefinition.php | 18 ++- tests/HTMLPurifier/AttrDef/IDTest.php | 46 ++++++- .../Strategy/ValidateAttributesTest.php | 25 +++- 7 files changed, 248 insertions(+), 9 deletions(-) create mode 100644 docs/id.txt diff --git a/NEWS b/NEWS index 92cb6c5c..dd6ea426 100644 --- a/NEWS +++ b/NEWS @@ -2,6 +2,7 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| = KEY ==================== + # Breaks back-compat ! Feature - Bugfix + Sub-comment @@ -9,6 +10,13 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier ========================== 1.2.0, unknown projected release date +# ID attributes now disabled by default. New directives: + + %HTML.EnableAttrID - restores old behavior by allowing IDs + + %Attr.IDPrefix - %Attr.IDBlacklist alternative that munges all user IDs + so that they don't collide with your IDs + + %Attr.IDPrefixLocal - Same as above, but for when there are multiple + instances of user content on the page + + Profuse documentation on how to use these available in id.txt ! Added MODx plugin ! Added percent encoding normalization ! XSS attacks smoketest given facelift diff --git a/TODO b/TODO index b24b6292..be9feb46 100644 --- a/TODO +++ b/TODO @@ -48,6 +48,8 @@ Unknown release (on a scratch-an-itch basis) empty-cells:show is applied to have compatibility with Internet Explorer - Convert RTL/LTR override characters to tags, or vice versa on demand. Also, enable disabling of directionality + - Append something to duplicate IDs so they're still usable (impl. note: the + dupe detector would also need to detect the suffix as well) Encoding workarounds - Non-lossy dumb alternate character encoding transformations, achieved by diff --git a/docs/id.txt b/docs/id.txt new file mode 100644 index 00000000..f8f26584 --- /dev/null +++ b/docs/id.txt @@ -0,0 +1,124 @@ + +IDs + What they are, why you should(n't) wear them, and how to deal with it + +Prior to HTML Purifier 1.2.0, this library blithely accepted user input that +looked like this: + + Anchor + +...presenting an attractive vector for those that would destroy standards +compliance: simply set the ID to one that is already used elsewhere in the +document and voila: validation breaks. There was a half-hearted attempt to +prevent this by allowing users to blacklist IDs, but I suspect that no one +really bothered, and thus, with the release of 1.2.0, IDs are now *removed* +by default. + +IDs, however, are quite useful functionality to have, so if users start +complaining about broken anchors you'll probably want to turn them back on +with %HTML.EnableAttrID. But before you go mucking around with the config +object, it's probably worth to take some precautions to keep your page +validating. Why? + +1. Standards-compliant pages are good +2. Duplicated IDs interfere with anchors. If there are two id="foobar"s in a + document, which spot does a browser presented with the fragment #foobar go + to? Most browsers opt for the first appearing ID, making it impossible + to references the second section. Similarly, duplicated IDs can hijack + client-side scripting that relies on the IDs of elements. + +You have (currently) four ways of dealing with the problem. + + + +Road #1: Blacklisting IDs + Good for pages with single content source and stable templates + +Keeping in terms with the KISS (Keep It Simple, Stupid) principle, let us +deal with the most obvious solution: preventing users from using any IDs that +appear elsewhere on the document. The method is simple: + + $config->set('HTML', 'EnableAttrID', true); + $config->set('Attr', 'IDBlacklist' array( + 'list', 'of', 'attributes', 'that', 'are', 'forbidden' + )); + +That being said, there are some notable drawbacks. First of all, you have to +know precisely which IDs are being used by the HTML surrounding the user code. +This is easier said than done: quite often the page designer and the system +coder work separately, so the designer has to constantly be talking with the +coder whenever he decides to add a new anchor. Miss one and you open yourself +to possible standards-compliance issues. + +Furthermore, this position becomes untenable when a single web page must hold +multiple portions of user-submitted content. Since there's obviously no way +to find out before-hand what IDs users will use, the blacklist is helpless. +And even since HTML Purifier validates each segment seperately, perhaps doing +so at different times, it would be extremely difficult to dynamically update +the blacklist inbetween runs. + +Finally, simply destroying the ID is extremely un-userfriendly behavior: after +all, they might have simply specified a duplicate ID by accident. + +Thus, we get to our second method. + + + +Road #2: Namespacing IDs + Lazy developer's way, but needs user education + +This method, too, is quite simple: add a prefix to all user IDs. With this +code: + + $config->set('HTML', 'EnableAttrID', true); + $config->set('Attr', 'IDPrefix', 'user_'); + +...this: + + Anchor! + +...turns into: + + Anchor! + +As long as you don't have any IDs that start with user_, collisions are +guaranteed not to happen. The drawback is obvious: if a user submits +id="foobar", they probably expect to be able to reference their page with +#foobar. You'll have to tell them, "No, that doesn't work, you have to add +user_ to the beginning." + +And yes, things get hairier. Even with a nice prefix, we still have done +nothing about multiple HTML Purifier outputs on one page. Thus, we have +a second configuration value to piggy-back off of: %Attr.IDPrefixLocal: + + $config->set('Attr', 'IDPrefixLocal', 'comment' . $id . '_'); + +This new attributes does nothing but append on to regular IDPrefix, but is +special in that it is volatile: it's value is determined at run-time and +cannot possibly be cordoned into, say, a .ini config file. As for what to +put into the directive, is up to you, but I would recommend the ID number +the text has been assigned in the database. Whatever you pick, however, it +has to be unique and stable for the text you are validating. Note, however, +that we require that %Attr.IDPrefix be set before you use this directive. + +And also remember: the user has to know what this prefix is too! + + + +Path #3: Abstinence + +You may not want to bother. That's okay too, just don't enable IDs. + +Personally, I would take this road whenever user-submitted content would be +possibly be shown together on one page. Why a blog comment would need to use +anchors is beyond me. + + + +Path #4: Denial + +To revert back to pre-1.2.0 behavior, simply: + + $config->set('HTML', 'EnableAttrID', true); + +Don't come crying to me when your page mysteriously stops validating, though. diff --git a/library/HTMLPurifier/AttrDef/ID.php b/library/HTMLPurifier/AttrDef/ID.php index 03d80b46..09c277ca 100644 --- a/library/HTMLPurifier/AttrDef/ID.php +++ b/library/HTMLPurifier/AttrDef/ID.php @@ -3,6 +3,30 @@ require_once 'HTMLPurifier/AttrDef.php'; require_once 'HTMLPurifier/IDAccumulator.php'; +HTMLPurifier_ConfigSchema::define( + 'Attr', 'IDPrefix', '', 'string', + 'String to prefix to IDs. If you have no idea what IDs your pages '. + 'may use, you may opt to simply add a prefix to all user-submitted ID '. + 'attributes so that they are still usable, but will not conflict with '. + 'core page IDs. Example: setting the directive to \'user_\' will result in '. + 'a user submitted \'foo\' to become \'user_foo\' Be sure to set '. + '%HTML.EnableAttrID to true before using '. + 'this. This directive was available since 1.2.0.' +); + +HTMLPurifier_ConfigSchema::define( + 'Attr', 'IDPrefixLocal', '', 'string', + 'Temporary prefix for IDs used in conjunction with %Attr.IDPrefix. If '. + 'you need to allow multiple sets of '. + 'user content on web page, you may need to have a seperate prefix that '. + 'changes with each iteration. This way, seperately submitted user content '. + 'displayed on the same page doesn\'t clobber each other. Ideal values '. + 'are unique identifiers for the content it represents (i.e. the id of '. + 'the row in the database). Be sure to add a seperator (like an underscore) '. + 'at the end. Warning: this directive will not work unless %Attr.IDPrefix '. + 'is set to a non-empty value! This directive was available since 1.2.0.' +); + /** * Validates the HTML attribute ID. * @warning Even though this is the id processor, it @@ -21,6 +45,16 @@ class HTMLPurifier_AttrDef_ID extends HTMLPurifier_AttrDef if ($id === '') return false; + $prefix = $config->get('Attr', 'IDPrefix'); + if ($prefix !== '') { + $prefix .= $config->get('Attr', 'IDPrefixLocal'); + // prevent re-appending the prefix + if (strpos($id, $prefix) !== 0) $id = $prefix . $id; + } elseif ($config->get('Attr', 'IDPrefixLocal') !== '') { + trigger_error('%Attr.IDPrefixLocal cannot be used unless '. + '%Attr.IDPrefix is set', E_USER_WARNING); + } + $id_accumulator =& $context->get('IDAccumulator'); if (isset($id_accumulator->ids[$id])) return false; diff --git a/library/HTMLPurifier/HTMLDefinition.php b/library/HTMLPurifier/HTMLDefinition.php index 8914ad70..096b919f 100644 --- a/library/HTMLPurifier/HTMLDefinition.php +++ b/library/HTMLPurifier/HTMLDefinition.php @@ -22,6 +22,19 @@ require_once 'HTMLPurifier/Generator.php'; require_once 'HTMLPurifier/Token.php'; require_once 'HTMLPurifier/TagTransform.php'; +HTMLPurifier_ConfigSchema::define( + 'HTML', 'EnableAttrID', false, 'bool', + 'Allows the ID attribute in HTML. This is disabled by default '. + 'due to the fact that without proper configuration user input can '. + 'easily break the validation of a webpage by specifying an ID that is '. + 'already on the surrounding HTML. If you don\'t mind throwing caution to '. + 'the wind, enable this directive, but I strongly recommend you also '. + 'consider blacklisting IDs you use (%Attr.IDBlacklist) or prefixing all '. + 'user supplied IDs (%Attr.IDPrefix). This directive has been available '. + 'since 1.2.0, and when set to true reverts to the behavior of pre-1.2.0 '. + 'versions.' +); + /** * Defines the purified HTML type with large amounts of objects. * @@ -271,7 +284,6 @@ class HTMLPurifier_HTMLDefinition // which manually override these in their local definitions $this->info_global_attr = array( // core attrs - 'id' => new HTMLPurifier_AttrDef_ID(), 'class' => new HTMLPurifier_AttrDef_Class(), 'title' => $e_Text, 'style' => new HTMLPurifier_AttrDef_CSS(), @@ -281,6 +293,10 @@ class HTMLPurifier_HTMLDefinition 'xml:lang' => new HTMLPurifier_AttrDef_Lang(), ); + if ($config->get('HTML', 'EnableAttrID')) { + $this->info_global_attr['id'] = new HTMLPurifier_AttrDef_ID(); + } + // required attribute stipulation handled in attribute transformation $this->info['bdo']->attr = array(); // nothing else diff --git a/tests/HTMLPurifier/AttrDef/IDTest.php b/tests/HTMLPurifier/AttrDef/IDTest.php index eebb3f08..6e0e54be 100644 --- a/tests/HTMLPurifier/AttrDef/IDTest.php +++ b/tests/HTMLPurifier/AttrDef/IDTest.php @@ -7,13 +7,17 @@ require_once 'HTMLPurifier/IDAccumulator.php'; class HTMLPurifier_AttrDef_IDTest extends HTMLPurifier_AttrDefHarness { - function test() { + function setUp() { + parent::setUp(); - $this->context = new HTMLPurifier_Context(); $id_accumulator = new HTMLPurifier_IDAccumulator(); $this->context->register('IDAccumulator', $id_accumulator); $this->def = new HTMLPurifier_AttrDef_ID(); + } + + function test() { + // valid ID names $this->assertDef('alpha'); $this->assertDef('al_ha'); @@ -34,6 +38,44 @@ class HTMLPurifier_AttrDef_IDTest extends HTMLPurifier_AttrDefHarness } + function testPrefix() { + + $this->config->set('Attr', 'IDPrefix', 'user_'); + + $this->assertDef('alpha', 'user_alpha'); + $this->assertDef('assertDef('once', 'user_once'); + $this->assertDef('once', false); + + // if already prefixed, leave alone + $this->assertDef('user_alas'); + $this->assertDef('user_user_alas'); // how to bypass + + } + + function testTwoPrefixes() { + + $this->config->set('Attr', 'IDPrefix', 'user_'); + $this->config->set('Attr', 'IDPrefixLocal', 'story95_'); + + $this->assertDef('alpha', 'user_story95_alpha'); + $this->assertDef('assertDef('once', 'user_story95_once'); + $this->assertDef('once', false); + + $this->assertDef('user_story95_alas'); + $this->assertDef('user_alas', 'user_story95_user_alas'); // ! + + $this->config->set('Attr', 'IDPrefix', ''); + $this->assertDef('amherst'); // no affect when IDPrefix isn't set + $this->assertError('%Attr.IDPrefixLocal cannot be used unless '. + '%Attr.IDPrefix is set'); + // SimpleTest has a bug and throws a sprintf error + // $this->assertNoErrors(); + $this->swallowErrors(); + + } + } ?> \ No newline at end of file diff --git a/tests/HTMLPurifier/Strategy/ValidateAttributesTest.php b/tests/HTMLPurifier/Strategy/ValidateAttributesTest.php index 60183558..5f499706 100644 --- a/tests/HTMLPurifier/Strategy/ValidateAttributesTest.php +++ b/tests/HTMLPurifier/Strategy/ValidateAttributesTest.php @@ -21,17 +21,25 @@ class HTMLPurifier_Strategy_ValidateAttributesTest extends $this->assertResult(''); // test ids - $this->assertResult('
Preserve the ID.
'); + $this->assertResult( + '
Kill the ID.
', + '
Kill the ID.
' + ); + + $this->assertResult('
Preserve the ID.
', true, + array('HTML.EnableAttrID' => true)); $this->assertResult( '
Kill the ID.
', - '
Kill the ID.
' + '
Kill the ID.
', + array('HTML.EnableAttrID' => true) ); // test id accumulator $this->assertResult( '
Valid
Invalid
', - '
Valid
Invalid
' + '
Valid
Invalid
', + array('HTML.EnableAttrID' => true) ); $this->assertResult( @@ -42,20 +50,25 @@ class HTMLPurifier_Strategy_ValidateAttributesTest extends // test attribute key case sensitivity $this->assertResult( '
Convert ID to lowercase.
', - '
Convert ID to lowercase.
' + '
Convert ID to lowercase.
', + array('HTML.EnableAttrID' => true) ); // test simple attribute substitution $this->assertResult( '
Trim whitespace.
', - '
Trim whitespace.
' + '
Trim whitespace.
', + array('HTML.EnableAttrID' => true) ); // test configuration id blacklist $this->assertResult( '
Invalid
', '
Invalid
', - array('Attr.IDBlacklist' => array('invalid')) + array( + 'Attr.IDBlacklist' => array('invalid'), + 'HTML.EnableAttrID' => true + ) ); // test classes