From baf053b016b118ff26554d819130ba4746c693d8 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Mon, 25 May 2009 21:55:44 -0400 Subject: [PATCH] Implement %Attr.AllowedClasses and %Attr.ForbiddenClasses. Signed-off-by: Edward Z. Yang --- NEWS | 3 ++ TODO | 3 +- library/HTMLPurifier.includes.php | 3 +- library/HTMLPurifier.safe-includes.php | 3 +- library/HTMLPurifier/AttrDef/HTML/Class.php | 22 +++++++++++ .../HTMLPurifier/AttrDef/HTML/Nmtokens.php | 36 ++++++++++-------- library/HTMLPurifier/AttrTypes.php | 3 ++ library/HTMLPurifier/ConfigSchema/schema.ser | Bin 12719 -> 12912 bytes .../schema/Attr.AllowedClasses.txt | 8 ++++ .../schema/Attr.ForbiddenClasses.txt | 8 ++++ .../HTMLModule/CommonAttributes.php | 3 +- tests/HTMLPurifier/AttrDef/HTML/ClassTest.php | 21 ++++++++++ .../AttrDef/HTML/NmtokensTest.php | 7 +++- 13 files changed, 98 insertions(+), 22 deletions(-) create mode 100644 library/HTMLPurifier/AttrDef/HTML/Class.php create mode 100644 library/HTMLPurifier/ConfigSchema/schema/Attr.AllowedClasses.txt create mode 100644 library/HTMLPurifier/ConfigSchema/schema/Attr.ForbiddenClasses.txt create mode 100644 tests/HTMLPurifier/AttrDef/HTML/ClassTest.php diff --git a/NEWS b/NEWS index 5c5272c4..d017d399 100644 --- a/NEWS +++ b/NEWS @@ -31,6 +31,9 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier tags that contain non-breaking spaces as well other whitespace. You can also modify which tags should have   maintained with %AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions. +! Implement %Attr.AllowedClasses, which allows administrators to restrict + classes users can use to a specified finite set of classes, and + %Attr.ForbiddenClasses, which is the logical inverse. . Created script maintenance/rename-config.php for renaming a configuration directive while maintaining its alias. This script does not change source code. diff --git a/TODO b/TODO index 71fbc1f3..03a70ebf 100644 --- a/TODO +++ b/TODO @@ -17,10 +17,11 @@ afraid to cast your vote for the next feature to be implemented! - Incorporate data: support as implemented here: http://htmlpurifier.org/phorum/read.php?3,3491,3548 - Fix ImgRequired to handle data correctly -- Provide callback/lookup table of allowed class attributes - Think about allowing explicit order of operations hooks for transforms - Allow more relaxed "class" definition than NMTOKENS for appropriate doctypes +- Lock when configuring Definition objects so we CAN'T access configuration + directives outside of what dependency has been registered. FUTURE VERSIONS --------------- diff --git a/library/HTMLPurifier.includes.php b/library/HTMLPurifier.includes.php index 5ee92d80..662e1184 100644 --- a/library/HTMLPurifier.includes.php +++ b/library/HTMLPurifier.includes.php @@ -98,6 +98,8 @@ require 'HTMLPurifier/AttrDef/CSS/Percentage.php'; require 'HTMLPurifier/AttrDef/CSS/TextDecoration.php'; require 'HTMLPurifier/AttrDef/CSS/URI.php'; require 'HTMLPurifier/AttrDef/HTML/Bool.php'; +require 'HTMLPurifier/AttrDef/HTML/Nmtokens.php'; +require 'HTMLPurifier/AttrDef/HTML/Class.php'; require 'HTMLPurifier/AttrDef/HTML/Color.php'; require 'HTMLPurifier/AttrDef/HTML/FrameTarget.php'; require 'HTMLPurifier/AttrDef/HTML/ID.php'; @@ -105,7 +107,6 @@ require 'HTMLPurifier/AttrDef/HTML/Pixels.php'; require 'HTMLPurifier/AttrDef/HTML/Length.php'; require 'HTMLPurifier/AttrDef/HTML/LinkTypes.php'; require 'HTMLPurifier/AttrDef/HTML/MultiLength.php'; -require 'HTMLPurifier/AttrDef/HTML/Nmtokens.php'; require 'HTMLPurifier/AttrDef/URI/Email.php'; require 'HTMLPurifier/AttrDef/URI/Host.php'; require 'HTMLPurifier/AttrDef/URI/IPv4.php'; diff --git a/library/HTMLPurifier.safe-includes.php b/library/HTMLPurifier.safe-includes.php index 052ddcc3..cf2c1d61 100644 --- a/library/HTMLPurifier.safe-includes.php +++ b/library/HTMLPurifier.safe-includes.php @@ -92,6 +92,8 @@ require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Percentage.php'; require_once $__dir . '/HTMLPurifier/AttrDef/CSS/TextDecoration.php'; require_once $__dir . '/HTMLPurifier/AttrDef/CSS/URI.php'; require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Bool.php'; +require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Nmtokens.php'; +require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Class.php'; require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Color.php'; require_once $__dir . '/HTMLPurifier/AttrDef/HTML/FrameTarget.php'; require_once $__dir . '/HTMLPurifier/AttrDef/HTML/ID.php'; @@ -99,7 +101,6 @@ require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Pixels.php'; require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Length.php'; require_once $__dir . '/HTMLPurifier/AttrDef/HTML/LinkTypes.php'; require_once $__dir . '/HTMLPurifier/AttrDef/HTML/MultiLength.php'; -require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Nmtokens.php'; require_once $__dir . '/HTMLPurifier/AttrDef/URI/Email.php'; require_once $__dir . '/HTMLPurifier/AttrDef/URI/Host.php'; require_once $__dir . '/HTMLPurifier/AttrDef/URI/IPv4.php'; diff --git a/library/HTMLPurifier/AttrDef/HTML/Class.php b/library/HTMLPurifier/AttrDef/HTML/Class.php new file mode 100644 index 00000000..a2f4a98a --- /dev/null +++ b/library/HTMLPurifier/AttrDef/HTML/Class.php @@ -0,0 +1,22 @@ +get('Attr.AllowedClasses'); + $forbidden = $config->get('Attr.ForbiddenClasses'); + $ret = array(); + foreach ($tokens as $token) { + if ( + ($allowed === null || isset($allowed[$token])) && + !isset($forbidden[$token]) + ) { + $ret[] = $token; + } + } + return $ret; + } +} diff --git a/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php b/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php index 55035c4d..7dab1fea 100644 --- a/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php +++ b/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php @@ -2,10 +2,6 @@ /** * Validates contents based on NMTOKENS attribute type. - * @note The only current use for this is the class attribute in HTML - * @note Could have some functionality factored out into Nmtoken class - * @warning We cannot assume this class will be used only for 'class' - * attributes. Not sure how to hook in magic behavior, then. */ class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef { @@ -17,6 +13,17 @@ class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef // early abort: '' and '0' (strings that convert to false) are invalid if (!$string) return false; + $tokens = $this->split($string); + $tokens = $this->filter($tokens, $config, $context); + if (empty($tokens)) return false; + return implode(' ', $tokens); + + } + + /** + * Splits a space separated list of tokens into its constituent parts. + */ + protected function split($string) { // OPTIMIZABLE! // do the preg_match, capture all subpatterns for reformulation @@ -24,23 +31,20 @@ class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef // escaping because I don't know how to do that with regexps // and plus it would complicate optimization efforts (you never // see that anyway). - $matches = array(); $pattern = '/(?:(?<=\s)|\A)'. // look behind for space or string start '((?:--|-?[A-Za-z_])[A-Za-z_\-0-9]*)'. '(?:(?=\s)|\z)/'; // look ahead for space or string end preg_match_all($pattern, $string, $matches); + return $matches[1]; + } - if (empty($matches[1])) return false; - - // reconstruct string - $new_string = ''; - foreach ($matches[1] as $token) { - $new_string .= $token . ' '; - } - $new_string = rtrim($new_string); - - return $new_string; - + /** + * Template method for removing certain tokens based on arbitrary criteria. + * @note If we wanted to be really functional, we'd do an array_filter + * with a callback. But... we're not. + */ + protected function filter($tokens, $config, $context) { + return $tokens; } } diff --git a/library/HTMLPurifier/AttrTypes.php b/library/HTMLPurifier/AttrTypes.php index 6c624bb0..fc2ea4e5 100644 --- a/library/HTMLPurifier/AttrTypes.php +++ b/library/HTMLPurifier/AttrTypes.php @@ -36,6 +36,9 @@ class HTMLPurifier_AttrTypes $this->info['Charsets'] = new HTMLPurifier_AttrDef_Text(); $this->info['Character'] = new HTMLPurifier_AttrDef_Text(); + // "proprietary" types + $this->info['Class'] = new HTMLPurifier_AttrDef_HTML_Class(); + // number is really a positive integer (one or more digits) // FIXME: ^^ not always, see start and value of list items $this->info['Number'] = new HTMLPurifier_AttrDef_Integer(false, false, true); diff --git a/library/HTMLPurifier/ConfigSchema/schema.ser b/library/HTMLPurifier/ConfigSchema/schema.ser index 81185b589a6f101218ae0610c1a49abeade4f48a..b534c3f128024cd9d46764e283de7f297860f92b 100644 GIT binary patch delta 247 zcmZ3V{2^t6nS!}hb+MJ9rInInNlB5OV@^(fd1{JtPGWI!YO#{FpY`TKepyCEBSWYH zxBQ}{%#@VWJh+lXD+8dEK$6*dRUcQbDO IuTUrm04FXO0RR91 diff --git a/library/HTMLPurifier/ConfigSchema/schema/Attr.AllowedClasses.txt b/library/HTMLPurifier/ConfigSchema/schema/Attr.AllowedClasses.txt new file mode 100644 index 00000000..0517fed0 --- /dev/null +++ b/library/HTMLPurifier/ConfigSchema/schema/Attr.AllowedClasses.txt @@ -0,0 +1,8 @@ +Attr.AllowedClasses +TYPE: lookup/null +VERSION: 4.0.0 +DEFAULT: null +--DESCRIPTION-- +List of allowed class values in the class attribute. By default, this is null, +which means all classes are allowed. +--# vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/ConfigSchema/schema/Attr.ForbiddenClasses.txt b/library/HTMLPurifier/ConfigSchema/schema/Attr.ForbiddenClasses.txt new file mode 100644 index 00000000..f31d226f --- /dev/null +++ b/library/HTMLPurifier/ConfigSchema/schema/Attr.ForbiddenClasses.txt @@ -0,0 +1,8 @@ +Attr.ForbiddenClasses +TYPE: lookup +VERSION: 4.0.0 +DEFAULT: array() +--DESCRIPTION-- +List of forbidden class values in the class attribute. By default, this is +empty, which means that no classes are forbidden. See also %Attr.AllowedClasses. +--# vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/HTMLModule/CommonAttributes.php b/library/HTMLPurifier/HTMLModule/CommonAttributes.php index fdf7b323..7c15da84 100644 --- a/library/HTMLPurifier/HTMLModule/CommonAttributes.php +++ b/library/HTMLPurifier/HTMLModule/CommonAttributes.php @@ -8,7 +8,7 @@ class HTMLPurifier_HTMLModule_CommonAttributes extends HTMLPurifier_HTMLModule 'Core' => array( 0 => array('Style'), // 'xml:space' => false, - 'class' => 'NMTOKENS', + 'class' => 'Class', 'id' => 'ID', 'title' => 'CDATA', ), @@ -20,6 +20,7 @@ class HTMLPurifier_HTMLModule_CommonAttributes extends HTMLPurifier_HTMLModule 0 => array('Core', 'I18N') ) ); + } // vim: et sw=4 sts=4 diff --git a/tests/HTMLPurifier/AttrDef/HTML/ClassTest.php b/tests/HTMLPurifier/AttrDef/HTML/ClassTest.php new file mode 100644 index 00000000..0e959b5e --- /dev/null +++ b/tests/HTMLPurifier/AttrDef/HTML/ClassTest.php @@ -0,0 +1,21 @@ +def = new HTMLPurifier_AttrDef_HTML_Class(); + } + function testAllowedClasses() { + $this->config->set('Attr.AllowedClasses', array('foo')); + $this->assertDef('foo'); + $this->assertDef('bar', false); + $this->assertDef('foo bar', 'foo'); + } + function testForbiddenClasses() { + $this->config->set('Attr.ForbiddenClasses', array('bar')); + $this->assertDef('foo'); + $this->assertDef('bar', false); + $this->assertDef('foo bar', 'foo'); + } +} diff --git a/tests/HTMLPurifier/AttrDef/HTML/NmtokensTest.php b/tests/HTMLPurifier/AttrDef/HTML/NmtokensTest.php index 4af98ee0..bb64ff6e 100644 --- a/tests/HTMLPurifier/AttrDef/HTML/NmtokensTest.php +++ b/tests/HTMLPurifier/AttrDef/HTML/NmtokensTest.php @@ -3,9 +3,12 @@ class HTMLPurifier_AttrDef_HTML_NmtokensTest extends HTMLPurifier_AttrDefHarness { - function testDefault() { - + function setUp() { + parent::setUp(); $this->def = new HTMLPurifier_AttrDef_HTML_Nmtokens(); + } + + function testDefault() { $this->assertDef('valid'); $this->assertDef('a0-_');