diff --git a/NEWS b/NEWS index 8e05466a..9a180fc2 100644 --- a/NEWS +++ b/NEWS @@ -11,6 +11,15 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier 1.3.0, unknown release date (major feature release) +! (X)HTML Strict now supported + + Transparently handles inline elements in block context (blockquote) +! Added GET method to demo for easier validation, added 50kb max input size +! New directive %HTML.BlockWrapper, for block-ifying inline elements +! New directive %HTML.Parent, allows you to only allow inline content +- Added missing type to ChildDef_Chameleon +. ChildDef_Required guards against empty tags +. Lookup table HTMLDefinition->info_flow_elements added +. Added peace-of-mind variable initialization to Strategy_FixNesting 1.2.1, unknown release date (bugfix/minor feature release, may be dropped if 1.2.0 is stable) diff --git a/docs/examples/demo.php b/docs/examples/demo.php index 35a47986..d5b3a5b1 100644 --- a/docs/examples/demo.php +++ b/docs/examples/demo.php @@ -1,11 +1,30 @@ +'; + +function getFormMethod() { + return (isset($_REQUEST['post'])) ? 'post' : 'get'; +} + +if (empty($_REQUEST['strict'])) { +?> - + + + +
Request exceeds maximum allowed text size of 50kb.
+ set('Core', 'TidyFormat', !empty($_POST['tidy'])); + $config->set('Core', 'TidyFormat', !empty($_REQUEST['tidy'])); + $config->set('HTML', 'Strict', !empty($_REQUEST['strict'])); $purifier = new HTMLPurifier($config); $pure_html = $purifier->purify($html); @@ -43,7 +68,17 @@ echo htmlspecialchars($pure_html, ENT_COMPAT, 'UTF-8'); ?> +If you would like to validate the code with +W3C's +validator, copy and paste the entire demo page's source.
+ @@ -54,12 +89,13 @@ will filter it. } ?> - -Return to HTMLPurifier's home page.
+Return to HTMLPurifier's home page. +Try the form in GET and POST request +flavors (GET is easy to validate, but POST allows larger inputs).
+ + + \ No newline at end of file diff --git a/docs/ref-loose-vs-strict.txt b/docs/ref-loose-vs-strict.txt index 110bda37..39d51aa2 100644 --- a/docs/ref-loose-vs-strict.txt +++ b/docs/ref-loose-vs-strict.txt @@ -7,11 +7,11 @@ to HTML Purifier, though, so let's take a look: == Major incompatibilities == -BLOCKQUOTE changes from 'flow' to 'block' +[done] BLOCKQUOTE changes from 'flow' to 'block' behavior: inline inner contents should not be nuked, paragraph as necessary -U, S, STRIKE cut +[partially-done] U, S, STRIKE cut behavior: replace with appropriate inline span + CSS -ADDRESS from potpourri to Inline (removes p tags) +[partially-done] ADDRESS from potpourri to Inline (removes p tags) (lower importance) behavior: p tags silently dropped or replaced with something (<blockquote>Foo</blockquote>
'.
+ 'would become <blockquote><p>Foo</p></blockquote>
. The '.
+ '<p>
tags can be replaced '.
+ 'with whatever you desire, as long as it is a block level element.'
+);
+
+HTMLPurifier_ConfigSchema::define(
+ 'HTML', 'Parent', 'div', 'string',
+ 'String name of element that HTML fragment passed to library will be '.
+ 'inserted in. An interesting variation would be using span as the '.
+ 'parent element, meaning that only inline tags would be allowed.'
+);
+
/**
* Defines the purified HTML type with large amounts of objects.
*
@@ -79,11 +97,17 @@ class HTMLPurifier_HTMLDefinition
/**
* String name of parent element HTML will be going into.
- * @todo Allow this to be overloaded by user config
* @public
*/
var $info_parent = 'div';
+ /**
+ * String name of element used to wrap inline elements in block context
+ * @note This is rarely used except for BLOCKQUOTEs in strict mode
+ * @public
+ */
+ var $info_block_wrapper = 'p';
+
/**
* Associative array of deprecated tag name to HTMLPurifier_TagTransform
* @public
@@ -102,6 +126,11 @@ class HTMLPurifier_HTMLDefinition
*/
var $info_attr_transform_post = array();
+ /**
+ * Lookup table of flow elements
+ */
+ var $info_flow_elements = array();
+
/**
* Initializes the definition, the meat of the class.
*/
@@ -164,11 +193,9 @@ class HTMLPurifier_HTMLDefinition
$e_phrase_basic = 'em | strong | dfn | code | q | samp | kbd | var'.
' | cite | abbr | acronym';
$e_phrase = "$e_phrase_basic | $e_phrase_extra";
- $e_inline_forms = ''; // humor the dtd
$e_misc_inline = 'ins | del';
$e_misc = "$e_misc_inline";
- $e_inline = "a | $e_special | $e_fontstyle | $e_phrase".
- " | $e_inline_forms";
+ $e_inline = "a | $e_special | $e_fontstyle | $e_phrase";
// pseudo-property we created for convenience, see later on
$e__inline = "#PCDATA | $e_inline | $e_misc_inline";
// note the casing
@@ -181,11 +208,10 @@ class HTMLPurifier_HTMLDefinition
$e__flow = "#PCDATA | $e_block | $e_inline | $e_misc";
$e_Flow = new HTMLPurifier_ChildDef_Optional($e__flow);
$e_a_content = new HTMLPurifier_ChildDef_Optional("#PCDATA".
- " | $e_special | $e_fontstyle | $e_phrase | $e_inline_forms".
- " | $e_misc_inline");
+ " | $e_special | $e_fontstyle | $e_phrase | $e_misc_inline");
$e_pre_content = new HTMLPurifier_ChildDef_Optional("#PCDATA | a".
" | $e_special_basic | $e_fontstyle_basic | $e_phrase_basic".
- " | $e_inline_forms | $e_misc_inline");
+ " | $e_misc_inline");
$e_form_content = new HTMLPurifier_ChildDef_Optional('');//unused
$e_form_button_content = new HTMLPurifier_ChildDef_Optional('');//unused
@@ -198,7 +224,7 @@ class HTMLPurifier_HTMLDefinition
$this->info['div']->child = $e_Flow;
if ($config->get('HTML', 'Strict')) {
- $this->info['blockquote']->child = $e_Block;
+ $this->info['blockquote']->child = new HTMLPurifier_ChildDef_StrictBlockquote();
} else {
$this->info['blockquote']->child = $e_Flow;
}
@@ -276,7 +302,7 @@ class HTMLPurifier_HTMLDefinition
// reuses $e_Inline and $e_Block
foreach ($e_Inline->elements as $name => $bool) {
- if ($name == '#PCDATA' || $name == '') continue;
+ if ($name == '#PCDATA') continue;
$this->info[$name]->type = 'inline';
}
@@ -284,6 +310,10 @@ class HTMLPurifier_HTMLDefinition
$this->info[$name]->type = 'block';
}
+ foreach ($e_Flow->elements as $name => $bool) {
+ $this->info_flow_elements[$name] = true;
+ }
+
//////////////////////////////////////////////////////////////////////
// info[]->excludes : defines elements that aren't allowed in here
@@ -447,6 +477,28 @@ class HTMLPurifier_HTMLDefinition
}
}
+ //////////////////////////////////////////////////////////////////////
+ // info_block_wrapper : wraps inline elements in block context
+
+ $block_wrapper = $config->get('HTML', 'BlockWrapper');
+ if (isset($e_Block->elements[$block_wrapper])) {
+ $this->info_block_wrapper = $block_wrapper;
+ } else {
+ trigger_error('Cannot use non-block element as block wrapper.',
+ E_USER_ERROR);
+ }
+
+ //////////////////////////////////////////////////////////////////////
+ // info_parent : parent element of the HTML fragment
+
+ $parent = $config->get('HTML', 'Parent');
+ if (isset($this->info[$parent])) {
+ $this->info_parent = $parent;
+ } else {
+ trigger_error('Cannot use unrecognized element as parent.',
+ E_USER_ERROR);
+ }
+
}
function setAttrForTableElements($attr, $def) {
diff --git a/library/HTMLPurifier/Strategy/FixNesting.php b/library/HTMLPurifier/Strategy/FixNesting.php
index ca6f1a33..3357937e 100644
--- a/library/HTMLPurifier/Strategy/FixNesting.php
+++ b/library/HTMLPurifier/Strategy/FixNesting.php
@@ -141,6 +141,7 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
if ($excluded) {
// there is an exclusion, remove the entire node
$result = false;
+ $excludes = array(); // not used, but good to initialize anyway
} else {
// DEFINITION CALL
$def = $definition->info[$tokens[$i]->name];
diff --git a/tests/HTMLPurifier/ChildDef/StrictBlockquoteTest.php b/tests/HTMLPurifier/ChildDef/StrictBlockquoteTest.php
new file mode 100644
index 00000000..a3e2cb14
--- /dev/null
+++ b/tests/HTMLPurifier/ChildDef/StrictBlockquoteTest.php
@@ -0,0 +1,50 @@
+obj = new HTMLPurifier_ChildDef_StrictBlockquote();
+
+ $this->assertResult('');
+ $this->assertResult('Valid
'); + $this->assertResult('Needs wrap
'); + $this->assertResult( + 'Wrap'. 'Do not wrap
', + 'Wrap
Do not wrap
' + ); + $this->assertResult( + 'Do not
'.'Wrap', + 'Do not
Wrap
' + ); + $this->assertResult( + 'Hmm.
', + 'Not allowedParagraph.
Hmm.
' + ); + $this->assertResult( + $var = 'He said$var
" + ); + $this->assertResult( + 'Fools!
', + 'Bar'. 'PeopleConniving.
Fools!
' + ); + $this->assertResult('Needs wrap', 'Needs wrap
', + array('HTML.BlockWrapper' => 'dav')); + $this->assertError('Cannot use non-block element as block wrapper.'); + $this->assertNoErrors(); + + } + +} + +?> \ No newline at end of file diff --git a/tests/HTMLPurifier/Strategy/FixNestingTest.php b/tests/HTMLPurifier/Strategy/FixNestingTest.php index ff88cc09..a395cf07 100644 --- a/tests/HTMLPurifier/Strategy/FixNestingTest.php +++ b/tests/HTMLPurifier/Strategy/FixNestingTest.php @@ -83,6 +83,20 @@ class HTMLPurifier_Strategy_FixNestingTest extends HTMLPurifier_StrategyHarness '' ); + // test inline parent + $this->assertResult( + 'Bold', true, array('HTML.Parent' => 'span') + ); + $this->assertResult( + 'Illegal contents', - '' + '
' ); } diff --git a/tests/index.php b/tests/index.php index 36b58433..92c845fe 100644 --- a/tests/index.php +++ b/tests/index.php @@ -49,6 +49,7 @@ $test_files[] = 'ChildDef/OptionalTest.php'; $test_files[] = 'ChildDef/ChameleonTest.php'; $test_files[] = 'ChildDef/CustomTest.php'; $test_files[] = 'ChildDef/TableTest.php'; +$test_files[] = 'ChildDef/StrictBlockquoteTest.php'; $test_files[] = 'GeneratorTest.php'; $test_files[] = 'EntityLookupTest.php'; $test_files[] = 'Strategy/RemoveForeignElementsTest.php';Illegal contents