diff --git a/NEWS b/NEWS index 7ee2944c..a013b482 100644 --- a/NEWS +++ b/NEWS @@ -12,6 +12,9 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier 3.1.2, unknown release date # Using %Core.CollectErrors forces line number/column tracking on, whereas previously you could theoretically turn it off. +# HTMLPurifier_Injector->notifyEnd() is formally deprecated, and has changed + behavior slightly (end tokens it sees are not guaranteed to exist). Please + use handleEnd() instead. ! %Output.AttrSort for when you need your attributes in alphabetical order to deal with a bug in FCKEditor. Requested by frank farmer. ! Enable HTML comments when %HTML.Trusted is on. Requested by Waldo Jaquith. @@ -36,6 +39,11 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier ! Track column numbers when maintain line numbers is on ! Proprietary 'background' attribute on table-related elements converted into corresponding CSS. Thanks Fusemail for sponsoring this feature! +! Add forward(), forwardUntilEndToken(), backward() and current() to Injector + supertype. +! HTMLPurifier_Injector->handleEnd() permits modification to end tokens. The + time of operation varies slightly from notifyEnd() as *all* end tokens are + processed by the injector before they are subject to the well-formedness rules. - Fix two bugs in %URI.MakeAbsolute; one involving empty paths in base URLs, the other involving an undefined $is_folder error. - Throw error when %Core.Encoding is set to a spurious value. Previously, diff --git a/library/HTMLPurifier/Injector.php b/library/HTMLPurifier/Injector.php index ee10934a..cf3609e5 100644 --- a/library/HTMLPurifier/Injector.php +++ b/library/HTMLPurifier/Injector.php @@ -140,6 +140,69 @@ abstract class HTMLPurifier_Injector return true; } + /** + * Iterator function, which starts with the next token and continues until + * you reach the end of the input tokens. + * @warning Please prevent previous references from interfering with this + * functions by setting $i = null beforehand! + * @param &$i Current integer index variable for inputTokens + * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference + */ + protected function forward(&$i, &$current) { + if ($i === null) $i = $this->inputIndex + 1; + else $i++; + if (!isset($this->inputTokens[$i])) return false; + $current = $this->inputTokens[$i]; + return true; + } + + /** + * Similar to _forward, but accepts a third parameter $nesting (which + * should be initialized at 0) and stops when we hit the end tag + * for the node $this->inputIndex starts in. + */ + protected function forwardUntilEndToken(&$i, &$current, &$nesting) { + $result = $this->forward($i, $current); + if (!$result) return false; + if ($nesting === null) $nesting = 0; + if ($current instanceof HTMLPurifier_Token_Start) $nesting++; + elseif ($current instanceof HTMLPurifier_Token_End) { + if ($nesting <= 0) return false; + $nesting--; + } + return true; + } + + /** + * Iterator function, starts with the previous token and continues until + * you reach the beginning of input tokens. + * @warning Please prevent previous references from interfering with this + * functions by setting $i = null beforehand! + * @param &$i Current integer index variable for inputTokens + * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference + */ + protected function backward(&$i, &$current) { + if ($i === null) $i = $this->inputIndex - 1; + else $i--; + if ($i < 0) return false; + $current = $this->inputTokens[$i]; + return true; + } + + /** + * Initializes the iterator at the current position. Use in a do {} while; + * loop to force the _forward and _backward functions to start at the + * current location. + * @warning Please prevent previous references from interfering with this + * functions by setting $i = null beforehand! + * @param &$i Current integer index variable for inputTokens + * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference + */ + protected function current(&$i, &$current) { + if ($i === null) $i = $this->inputIndex; + $current = $this->inputTokens[$i]; + } + /** * Handler that is called when a text token is processed */ @@ -150,6 +213,13 @@ abstract class HTMLPurifier_Injector */ public function handleElement(&$token) {} + /** + * Handler that is called when an end token is processed + */ + public function handleEnd(&$token) { + $this->notifyEnd($token); + } + /** * Notifier that is called when an end token is processed * @note This differs from handlers in that the token is read-only diff --git a/library/HTMLPurifier/Injector/AutoParagraph.php b/library/HTMLPurifier/Injector/AutoParagraph.php index a722313b..3a766624 100644 --- a/library/HTMLPurifier/Injector/AutoParagraph.php +++ b/library/HTMLPurifier/Injector/AutoParagraph.php @@ -29,7 +29,7 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector // if it doesn't, see the next if-block if you're in the document. $i = $nesting = null; - if (!$this->_forwardUntilEndToken($i, $current, $nesting) && $token->is_whitespace) { + if (!$this->forwardUntilEndToken($i, $current, $nesting) && $token->is_whitespace) { // State 1.1: ... ^ (whitespace, then document end) // ---- // This is a degenerate case @@ -101,7 +101,7 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector // Check if this token is adjacent to the parent token // (seek backwards until token isn't whitespace) $i = null; - $this->_backward($i, $prev); + $this->backward($i, $prev); if (!$prev instanceof HTMLPurifier_Token_Start) { // Token wasn't adjacent @@ -160,7 +160,7 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector } $i = null; - if ($this->_backward($i, $prev)) { + if ($this->backward($i, $prev)) { if ( !$prev instanceof HTMLPurifier_Token_Text ) { @@ -296,11 +296,11 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector * to insert a
tag. */ private function _pLookAhead() { - $this->_current($i, $current); + $this->current($i, $current); if ($current instanceof HTMLPurifier_Token_Start) $nesting = 1; else $nesting = 0; $ok = false; - while ($this->_forwardUntilEndToken($i, $current, $nesting)) { + while ($this->forwardUntilEndToken($i, $current, $nesting)) { $result = $this->_checkNeedsP($current); if ($result !== null) { $ok = $result; @@ -310,69 +310,6 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector return $ok; } - /** - * Iterator function, which starts with the next token and continues until - * you reach the end of the input tokens. - * @warning Please prevent previous references from interfering with this - * functions by setting $i = null beforehand! - * @param &$i Current integer index variable for inputTokens - * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference - */ - private function _forward(&$i, &$current) { - if ($i === null) $i = $this->inputIndex + 1; - else $i++; - if (!isset($this->inputTokens[$i])) return false; - $current = $this->inputTokens[$i]; - return true; - } - - /** - * Similar to _forward, but accepts a third parameter $nesting (which - * should be initialized at 0) and stops when we hit the end tag - * for the node $this->inputIndex starts in. - */ - private function _forwardUntilEndToken(&$i, &$current, &$nesting) { - $result = $this->_forward($i, $current); - if (!$result) return false; - if ($nesting === null) $nesting = 0; - if ($current instanceof HTMLPurifier_Token_Start) $nesting++; - elseif ($current instanceof HTMLPurifier_Token_End) { - if ($nesting <= 0) return false; - $nesting--; - } - return true; - } - - /** - * Iterator function, starts with the previous token and continues until - * you reach the beginning of input tokens. - * @warning Please prevent previous references from interfering with this - * functions by setting $i = null beforehand! - * @param &$i Current integer index variable for inputTokens - * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference - */ - private function _backward(&$i, &$current) { - if ($i === null) $i = $this->inputIndex - 1; - else $i--; - if ($i < 0) return false; - $current = $this->inputTokens[$i]; - return true; - } - - /** - * Initializes the iterator at the current position. Use in a do {} while; - * loop to force the _forward and _backward functions to start at the - * current location. - * @warning Please prevent previous references from interfering with this - * functions by setting $i = null beforehand! - * @param &$i Current integer index variable for inputTokens - * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference - */ - private function _current(&$i, &$current) { - if ($i === null) $i = $this->inputIndex; - $current = $this->inputTokens[$i]; - } - /** * Determines if a particular token requires an earlier inline token * to get a paragraph. This should be used with _forwardUntilEndToken diff --git a/library/HTMLPurifier/Injector/SafeObject.php b/library/HTMLPurifier/Injector/SafeObject.php index f3794068..1ac6c359 100644 --- a/library/HTMLPurifier/Injector/SafeObject.php +++ b/library/HTMLPurifier/Injector/SafeObject.php @@ -72,7 +72,10 @@ class HTMLPurifier_Injector_SafeObject extends HTMLPurifier_Injector } } - public function notifyEnd($token) { + public function handleEnd(&$token) { + // This is the WRONG way of handling the object and param stacks; + // we should be inserting them directly on the relevant object tokens + // so that the global stack handling handles it. if ($token->name == 'object') { array_pop($this->objectStack); array_pop($this->paramStack); diff --git a/library/HTMLPurifier/Strategy/MakeWellFormed.php b/library/HTMLPurifier/Strategy/MakeWellFormed.php index cf68f6d8..87b22d5e 100644 --- a/library/HTMLPurifier/Strategy/MakeWellFormed.php +++ b/library/HTMLPurifier/Strategy/MakeWellFormed.php @@ -25,6 +25,8 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy $this->currentNesting = array(); $this->inputIndex = false; $this->inputTokens =& $tokens; + $this->config = $config; + $this->context = $context; // context variables $context->register('CurrentNesting', $this->currentNesting); @@ -81,7 +83,37 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy $context->register('CurrentToken', $token); // isset is in loop because $tokens size changes during loop exec - for ($this->inputIndex = 0; isset($tokens[$this->inputIndex]); $this->inputIndex++) { + for ( + $this->inputIndex = 0; + $this->inputIndex == 0 || isset($tokens[$this->inputIndex - 1]); + $this->inputIndex++ + ) { + + foreach ($this->injectors as $injector) { + if ($injector->skip > 0) $injector->skip--; + } + + // handle case of document end + if (!isset($tokens[$this->inputIndex])) { + // we're at the end now, fix all still unclosed tags (this is + // duplicated from the end of the loop with some slight modifications) + // not using $skipped_tags since it would invariably be all of them + if (!empty($this->currentNesting)) { + $top_nesting = array_pop($this->currentNesting); + // please don't redefine $i! + if ($e && !isset($top_nesting->armor['MakeWellFormed_TagClosedError'])) { + $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $top_nesting); + } + // instead of splice, since we know this is the end + $new_token = new HTMLPurifier_Token_End($top_nesting->name); + $tokens[] = $new_token; + $this->currentNesting[] = $top_nesting; + --$this->inputIndex; + // punt to the regular code to handle the new token + continue; + } + break; + } // if all goes well, this token will be passed through unharmed $token = $tokens[$this->inputIndex]; @@ -90,10 +122,6 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy //printTokens($tokens, $this->inputIndex); //var_dump($this->currentNesting); - foreach ($this->injectors as $injector) { - if ($injector->skip > 0) $injector->skip--; - } - // quick-check: if it's not a tag, no need to process if (empty( $token->is_tag )) { if ($token instanceof HTMLPurifier_Token_Text) { @@ -194,13 +222,12 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy continue; } + if (!$this->handleEnd($token)) continue; + // first, check for the simplest case: everything closes neatly $current_parent = array_pop($this->currentNesting); if ($current_parent->name == $token->name) { $token->start = $current_parent; - foreach ($this->injectors as $i => $injector) { - $injector->notifyEnd($token); - } continue; } @@ -249,9 +276,6 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy $this->insertAfter($new_token); //printTokens($tokens, $this->inputIndex); //var_dump($this->currentNesting); - foreach ($this->injectors as $injector) { - $injector->notifyEnd($new_token); - } } } @@ -260,24 +284,6 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy $context->destroy('InputIndex'); $context->destroy('CurrentToken'); - // we're at the end now, fix all still unclosed tags (this is - // duplicated from the end of the loop with some slight modifications) - // not using $skipped_tags since it would invariably be all of them - if (!empty($this->currentNesting)) { - for ($i = count($this->currentNesting) - 1; $i >= 0; $i--) { - // please don't redefine $i! - if ($e && !isset($this->currentNesting[$i]->armor['MakeWellFormed_TagClosedError'])) { - $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $this->currentNesting[$i]); - } - // instead of splice, since we know this is the end - $tokens[] = $new_token = new HTMLPurifier_Token_End($this->currentNesting[$i]->name); - $new_token->start = $this->currentNesting[$i]; - foreach ($this->injectors as $injector) { - $injector->notifyEnd($new_token); - } - } - } - unset($this->outputTokens, $this->injectors, $this->currentInjector, $this->currentNesting, $this->inputTokens, $this->inputIndex); return $tokens; @@ -323,7 +329,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy * * If $token is false, the current token is deleted. */ - protected function processToken($token, $config, $context) { + protected function processToken($token, $config, $context, $is_end = false) { if (is_array($token) || is_int($token)) { // the original token was overloaded by an injector, time // to some fancy acrobatics @@ -333,26 +339,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy array_splice($this->inputTokens, $this->inputIndex, $token, array()); } if ($this->injectors) { - $rewind = $this->injectors[$this->currentInjector]->getRewind(); - if ($rewind < 0) $rewind = 0; - if ($rewind !== false) { - $offset = $this->inputIndex - $rewind; - if ($this->injectors) { - foreach ($this->injectors as $i => $injector) { - if ($i == $this->currentInjector) { - $injector->skip = 0; - } else { - $injector->skip += $offset; - } - } - } - for ($this->inputIndex--; $this->inputIndex >= $rewind; $this->inputIndex--) { - $prev = $this->inputTokens[$this->inputIndex]; - if ($prev instanceof HTMLPurifier_Token_Start) array_pop($this->currentNesting); - elseif ($prev instanceof HTMLPurifier_Token_End) $this->currentNesting[] = $prev->start; - } - $this->inputIndex++; - } else { + if (!$this->checkRewind()) { // adjust the injector skips based on the array substitution $offset = is_array($token) ? count($token) : 0; for ($i = 0; $i <= $this->currentInjector; $i++) { @@ -370,18 +357,66 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy --$this->inputIndex; } elseif ($token) { - // regular case - $this->swap($token); - if ($token instanceof HTMLPurifier_Token_Start) { - $this->currentNesting[] = $token; - } elseif ($token instanceof HTMLPurifier_Token_End) { - // not actually used - $token->start = array_pop($this->currentNesting); + if ($is_end) { + $this->swap($token); + if (!$token instanceof HTMLPurifier_Token_End) { + --$this->inputIndex; + } + } else { + // regular case + $this->swap($token); + if ($token instanceof HTMLPurifier_Token_Start) { + $this->currentNesting[] = $token; + } elseif ($token instanceof HTMLPurifier_Token_End) { + // not actually used + $token->start = array_pop($this->currentNesting); + } } } else { $this->remove(); } } + /** + * Checks for a rewind, adjusts the input index and skips accordingly. + */ + protected function checkRewind() { + $rewind = $this->injectors[$this->currentInjector]->getRewind(); + if ($rewind < 0) $rewind = 0; + if (is_int($rewind)) { + $offset = $this->inputIndex - $rewind; + if ($this->injectors) { + foreach ($this->injectors as $i => $injector) { + if ($i == $this->currentInjector) { + $injector->skip = 0; + } else { + $injector->skip += $offset; + } + } + } + for ($this->inputIndex--; $this->inputIndex >= $rewind; $this->inputIndex--) { + $prev = $this->inputTokens[$this->inputIndex]; + if ($prev instanceof HTMLPurifier_Token_Start) array_pop($this->currentNesting); + elseif ($prev instanceof HTMLPurifier_Token_End) $this->currentNesting[] = $prev->start; + } + $this->inputIndex++; + return true; + } else { + return false; + } + } + + protected function handleEnd($token) { + foreach ($this->injectors as $i => $injector) { + if (!$injector->skip) $injector->handleEnd($token); + if (is_array($token) || is_int($token)) { + $this->currentInjector = $i; + break; + } + } + $this->processToken($token, $this->config, $this->context, true); + return $token instanceof HTMLPurifier_Token_End; + } + } diff --git a/tests/HTMLPurifier/Strategy/MakeWellFormed_InjectorTest.php b/tests/HTMLPurifier/Strategy/MakeWellFormed_InjectorTest.php index 8a2ebb83..f16d9347 100644 --- a/tests/HTMLPurifier/Strategy/MakeWellFormed_InjectorTest.php +++ b/tests/HTMLPurifier/Strategy/MakeWellFormed_InjectorTest.php @@ -12,16 +12,15 @@ class HTMLPurifier_Strategy_MakeWellFormed_InjectorTest extends HTMLPurifier_Str generate_mock_once('HTMLPurifier_Injector'); } - function testEndNotification() { + function testEndHandler() { $mock = new HTMLPurifier_InjectorMock(); $mock->skip = false; $b = new HTMLPurifier_Token_End('b'); - $b->start = new HTMLPurifier_Token_Start('b'); - $mock->expectAt(0, 'notifyEnd', array($b)); + $mock->expectAt(0, 'handleEnd', array($b)); $i = new HTMLPurifier_Token_End('i'); - $i->start = new HTMLPurifier_Token_Start('i'); - $mock->expectAt(1, 'notifyEnd', array($i)); - $mock->expectCallCount('notifyEnd', 2); + $mock->expectAt(1, 'handleEnd', array($i)); + $mock->expectCallCount('handleEnd', 2); + $mock->setReturnValue('getRewind', false); $this->config->set('AutoFormat', 'AutoParagraph', false); $this->config->set('AutoFormat', 'Linkify', false); $this->config->set('AutoFormat', 'Custom', array($mock));