diff --git a/NEWS b/NEWS index 86520f65..2dce9d37 100644 --- a/NEWS +++ b/NEWS @@ -19,6 +19,9 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier has 'id' attribute fixed, thanks NykO18 for reporting - Fix warning emitted when a non-supported URI scheme is passed to the MakeAbsolute URIFilter, thanks NykO18 (again) +- Further refine AutoParagraph injector. Behavior inside of elements + allowing paragraph tags clarified: only inline content delimeted by + double newlines (not block elements) are paragraphed. . %Core.AcceptFullDocuments renamed to %Core.ConvertDocumentToFragment to better communicate its purpose diff --git a/library/HTMLPurifier/Injector/AutoParagraph.php b/library/HTMLPurifier/Injector/AutoParagraph.php index 6e0a6a3e..56a6a268 100644 --- a/library/HTMLPurifier/Injector/AutoParagraph.php +++ b/library/HTMLPurifier/Injector/AutoParagraph.php @@ -6,20 +6,28 @@ HTMLPurifier_ConfigSchema::define( 'AutoFormat', 'AutoParagraph', false, 'bool', '
This directive turns on auto-paragraphing, where double newlines are - converted in to paragraphs whenever possible. Auto-paragraphing - applies when: + converted in to paragraphs whenever possible. Auto-paragraphing:
p
tags must be allowed for this directive to take effect.
We do not use br
tags for paragraphing, as that is
semantically incorrect.
+ To prevent auto-paragraphing as a content-producer, refrain from using
+ double-newlines except to specify a new paragraph or in contexts where
+ it has special meaning (whitespace usually has no meaning except in
+ tags like pre
, so this should not be difficult.) To prevent
+ the paragraphing of inline text adjacent to block elements, wrap them
+ in div
tags (the behavior is slightly different outside of
+ the root node.)
+
This directive has been available since 2.0.1.
@@ -62,19 +70,27 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector $ok = false; // test if up-coming tokens are either block or have // a double newline in them + $nesting = 0; for ($i = $this->inputIndex + 1; isset($this->inputTokens[$i]); $i++) { if ($this->inputTokens[$i]->type == 'start'){ if (!$this->_isInline($this->inputTokens[$i])) { - $ok = true; + // we haven't found a double-newline, and + // we've hit a block element, so don't paragraph + $ok = false; + break; } - break; + $nesting++; + } + if ($this->inputTokens[$i]->type == 'end') { + if ($nesting <= 0) break; + $nesting--; } - if ($this->inputTokens[$i]->type == 'end') break; if ($this->inputTokens[$i]->type == 'text') { + // found it! if (strpos($this->inputTokens[$i]->data, "\n\n") !== false) { $ok = true; + break; } - if (!$this->inputTokens[$i]->is_whitespace) break; } } if ($ok) { diff --git a/library/HTMLPurifier/Strategy/MakeWellFormed.php b/library/HTMLPurifier/Strategy/MakeWellFormed.php index b8173f6d..930bfca1 100644 --- a/library/HTMLPurifier/Strategy/MakeWellFormed.php +++ b/library/HTMLPurifier/Strategy/MakeWellFormed.php @@ -105,6 +105,8 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy // if all goes well, this token will be passed through unharmed $token = $tokens[$this->inputIndex]; + //printTokens($tokens, $this->inputIndex); + foreach ($this->injectors as $i => $x) { if ($x->skip > 0) $this->injectors[$i]->skip--; } diff --git a/tests/HTMLPurifier/Injector/AutoParagraphTest.php b/tests/HTMLPurifier/Injector/AutoParagraphTest.php index 23743dff..5c726a11 100644 --- a/tests/HTMLPurifier/Injector/AutoParagraphTest.php +++ b/tests/HTMLPurifier/Injector/AutoParagraphTest.php @@ -194,10 +194,7 @@ Bar', } function testNoParagraphSingleInlineNodeInBlockNode() { - $this->assertResult( -'Par1 -
bar
mmm asdf
bar
mmm
+
+asdf
bar
mmm
asdf
bar
mmm
+
+asdf
asdf bar
mmm
asdf