From 12f73605a3ab0379e711960dd17b5cccbf35cb47 Mon Sep 17 00:00:00 2001
From: "Edward Z. Yang"
Date: Tue, 29 May 2007 21:26:43 +0000
Subject: [PATCH] [1.7.0] Implement HTML.Allowed, a TinyMCE style whitelist
format.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1119 48356398-32a2-884e-a903-53898d9a118a
---
NEWS | 2 +
TODO | 11 ++--
configdoc/generate.php | 1 +
library/HTMLPurifier/HTMLDefinition.php | 67 ++++++++++++++++++++++-
tests/HTMLPurifier/HTMLDefinitionTest.php | 57 +++++++++++++++++++
tests/test_files.php | 1 +
6 files changed, 132 insertions(+), 7 deletions(-)
create mode 100644 tests/HTMLPurifier/HTMLDefinitionTest.php
diff --git a/NEWS b/NEWS
index 06f6721b..f280ddbc 100644
--- a/NEWS
+++ b/NEWS
@@ -30,6 +30,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
format: key1:value1,key2:value2
! ConfigDoc now factored into OOP design
! All deprecated elements now natively supported
+! Implement TinyMCE styled whitelist specification format in
+ %HTML.Allowed
- Deprecated and removed EnableRedundantUTF8Cleaning. It didn't even work!
. Unit test for ElementDef created, ElementDef behavior modified to
be more flexible
diff --git a/TODO b/TODO
index 5b6f6f42..1189c929 100644
--- a/TODO
+++ b/TODO
@@ -10,10 +10,7 @@ TODO List
1.7 release [Advanced API]
# Complete advanced API, and fully document it
- Add framework for unsafe attributes
- - Reorganize configuration directives
- Set up anonymous module management by HTMLDefinition (Advanced API)
- - Get all AttrTypes into string form
- - Parse TinyMCE-style whitelist into our %HTML.Allow* whitelists
1.8 release [Refactor, refactor!]
# URI validation routines tighter (see docs/dev-code-quality.html) (COMPLEX)
@@ -48,6 +45,9 @@ TODO List
- Append something to duplicate IDs so they're still usable (impl. note: the
dupe detector would also need to detect the suffix as well)
+1.11 release [It's All About Trust] (floating)
+ # Implement untrusted, dangerous elements/attributes
+
2.0 release [Beyond HTML]
# Legit token based CSS parsing (will require revamping almost every
AttrDef class)
@@ -81,8 +81,6 @@ Ongoing
- eFiction
- more! (look for ones that use WYSIWYGs)
- Complete basic smoketests
- - Reorganize Unit Tests
- - Refactor loop tests (esp. AttrDef_URI)
Unknown release (on a scratch-an-itch basis)
? Semi-lossy dumb alternate character encoding transfor
@@ -91,6 +89,9 @@ Unknown release (on a scratch-an-itch basis)
- Explain how to use HTML Purifier in non-PHP languages
- Abstract ChildDef_BlockQuote to work with all elements that only
allow blocks in them, required or optional
+ - Reorganize Unit Tests
+ - Refactor loop tests (esp. AttrDef_URI)
+ - Reorganize configuration directives (Create more namespaces! Get messy!)
Requested
? Native content compression, whitespace stripping (don't rely on Tidy, make
diff --git a/configdoc/generate.php b/configdoc/generate.php
index 4bf68d6a..80eacd76 100644
--- a/configdoc/generate.php
+++ b/configdoc/generate.php
@@ -12,6 +12,7 @@ TODO:
- allow generation of packaged docs that can be easily moved
- multipage documentation
- determine how to multilingualize
+- add blurbs to ToC
*/
if (version_compare('5', PHP_VERSION, '>')) exit('Requires PHP 5 or higher.');
diff --git a/library/HTMLPurifier/HTMLDefinition.php b/library/HTMLPurifier/HTMLDefinition.php
index 4d27771e..ec6a9c0d 100644
--- a/library/HTMLPurifier/HTMLDefinition.php
+++ b/library/HTMLPurifier/HTMLDefinition.php
@@ -65,6 +65,25 @@ HTMLPurifier_ConfigSchema::define(
');
+HTMLPurifier_ConfigSchema::define(
+ 'HTML', 'Allowed', null, 'string/null', '
+
+ This is a convenience directive that rolls the functionality of
+ %HTML.AllowedElements and %HTML.AllowedAttributes into one directive.
+ Specify elements and attributes that are allowed using:
+ element1[attr1|attr2],element2...
.
+
+
+ Warning:
+ All of the constraints on the component directives are still enforced.
+ The syntax is a subset of TinyMCE\'s valid_elements
+ whitelist: directly copy-pasting it here will probably result in
+ broken whitelists. If %HTML.AllowedElements or %HTML.AllowedAttributes
+ are set, this directive has no effect.
+ This directive has been available since 1.7.0.
+
+');
+
/**
* Definition of the purified HTML that describes allowed children,
* attributes, and many other things.
@@ -233,8 +252,18 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
$support = "(for information on implementing this, see the ".
"support forums) ";
- // setup allowed elements, SubtractiveWhitelist module(?)
+ // setup allowed elements
+
$allowed_elements = $config->get('HTML', 'AllowedElements');
+ $allowed_attributes = $config->get('HTML', 'AllowedAttributes');
+
+ if (!is_array($allowed_elements) && !is_array($allowed_attributes)) {
+ $allowed = $config->get('HTML', 'Allowed');
+ if (is_string($allowed)) {
+ list($allowed_elements, $allowed_attributes) = $this->parseTinyMCEAllowedList($allowed);
+ }
+ }
+
if (is_array($allowed_elements)) {
foreach ($this->info as $name => $d) {
if(!isset($allowed_elements[$name])) unset($this->info[$name]);
@@ -247,7 +276,6 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
}
}
- $allowed_attributes = $config->get('HTML', 'AllowedAttributes');
$allowed_attributes_mutable = $allowed_attributes; // by copy!
if (is_array($allowed_attributes)) {
foreach ($this->info_global_attr as $attr_key => $info) {
@@ -289,6 +317,41 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
}
+ /**
+ * Parses a TinyMCE-flavored Allowed Elements and Attributes list into
+ * separate lists for processing. Format is element[attr1|attr2],element2...
+ * @warning Although it's largely drawn from TinyMCE's implementation,
+ * it is different, and you'll probably have to modify your lists
+ * @param $list String list to parse
+ * @param array($allowed_elements, $allowed_attributes)
+ */
+ function parseTinyMCEAllowedList($list) {
+
+ $elements = array();
+ $attributes = array();
+
+ $chunks = explode(',', $list);
+ foreach ($chunks as $chunk) {
+ // remove TinyMCE element control characters
+ if (!strpos($chunk, '[')) {
+ $element = $chunk;
+ $attr = false;
+ } else {
+ list($element, $attr) = explode('[', $chunk);
+ }
+ if ($element !== '*') $elements[$element] = true;
+ if (!$attr) continue;
+ $attr = substr($attr, 0, strlen($attr) - 1); // remove trailing ]
+ $attr = explode('|', $attr);
+ foreach ($attr as $key) {
+ $attributes["$element.$key"] = true;
+ }
+ }
+
+ return array($elements, $attributes);
+
+ }
+
}
diff --git a/tests/HTMLPurifier/HTMLDefinitionTest.php b/tests/HTMLPurifier/HTMLDefinitionTest.php
new file mode 100644
index 00000000..b9c77e07
--- /dev/null
+++ b/tests/HTMLPurifier/HTMLDefinitionTest.php
@@ -0,0 +1,57 @@
+assertEqual(
+ $def->parseTinyMCEAllowedList('a,b,c'),
+ array(array('a' => true, 'b' => true, 'c' => true), array())
+ );
+
+ $this->assertEqual(
+ $def->parseTinyMCEAllowedList('a[x|y|z]'),
+ array(array('a' => true), array('a.x' => true, 'a.y' => true, 'a.z' => true))
+ );
+
+ $this->assertEqual(
+ $def->parseTinyMCEAllowedList('*[id]'),
+ array(array(), array('*.id' => true))
+ );
+
+ $this->assertEqual(
+ $def->parseTinyMCEAllowedList('a[*]'),
+ array(array('a' => true), array('a.*' => true))
+ );
+
+ $this->assertEqual(
+ $def->parseTinyMCEAllowedList('span[style],strong,a[href|title]'),
+ array(array('span' => true, 'strong' => true, 'a' => true),
+ array('span.style' => true, 'a.href' => true, 'a.title' => true))
+ );
+
+ }
+
+ function test_Allowed() {
+
+ $config1 = HTMLPurifier_Config::create(array(
+ 'HTML.AllowedElements' => array('b', 'i', 'p', 'a'),
+ 'HTML.AllowedAttributes' => array('a.href', '*.id')
+ ));
+
+ $config2 = HTMLPurifier_Config::create(array(
+ 'HTML.Allowed' => 'b,i,p,a[href],*[id]'
+ ));
+
+ $this->assertEqual($config1->getHTMLDefinition(), $config2->getHTMLDefinition());
+
+ }
+
+}
+
+?>
\ No newline at end of file
diff --git a/tests/test_files.php b/tests/test_files.php
index a6549d9f..58a1d127 100644
--- a/tests/test_files.php
+++ b/tests/test_files.php
@@ -74,6 +74,7 @@ $test_files[] = 'HTMLPurifier/EncoderTest.php';
$test_files[] = 'HTMLPurifier/EntityLookupTest.php';
$test_files[] = 'HTMLPurifier/EntityParserTest.php';
$test_files[] = 'HTMLPurifier/GeneratorTest.php';
+$test_files[] = 'HTMLPurifier/HTMLDefinitionTest.php';
$test_files[] = 'HTMLPurifier/HTMLModuleManagerTest.php';
$test_files[] = 'HTMLPurifier/HTMLModuleTest.php';
$test_files[] = 'HTMLPurifier/HTMLModule/ScriptingTest.php';