1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-08-04 05:07:55 +02:00

Implement Injector->handleEnd, with lots of refactoring for injector.

Previous design of injector streaming involved editability only to start, empty
and text tokens, because they could be safely modified without causing formedness
errors.  By modifying notifyEnd to operate before MakeWellFormed's safeguards
kick into effect, it can be converted into a handle function, allowing for
arbitrary modification of end tags.

This change involved quite a bit of restructuring of the MakeWellFormed code,
including the moving of end of document tags to inside the loop, so rewinding
on those tags would be functional, increased reuse of the end tag codepath by
code that inserts end tags (as they could be changed out from under you), and
processToken modified to have an extra parameter to force re-processing of
a token if the original token was an end token.

We're not exactly sure if handleEnd works at this point, but the important
talking point about this refactoring is that nothing else broke. Also, a number
of convenience functions were moved from AutoParagraph to the Injector
supertype (specifically: forward, forwardToEndToken, backward, and current).

Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
This commit is contained in:
Edward Z. Yang
2008-10-01 00:54:51 -04:00
parent d0fdcc103e
commit fa413e96ac
6 changed files with 184 additions and 132 deletions

View File

@@ -25,6 +25,8 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
$this->currentNesting = array();
$this->inputIndex = false;
$this->inputTokens =& $tokens;
$this->config = $config;
$this->context = $context;
// context variables
$context->register('CurrentNesting', $this->currentNesting);
@@ -81,7 +83,37 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
$context->register('CurrentToken', $token);
// isset is in loop because $tokens size changes during loop exec
for ($this->inputIndex = 0; isset($tokens[$this->inputIndex]); $this->inputIndex++) {
for (
$this->inputIndex = 0;
$this->inputIndex == 0 || isset($tokens[$this->inputIndex - 1]);
$this->inputIndex++
) {
foreach ($this->injectors as $injector) {
if ($injector->skip > 0) $injector->skip--;
}
// handle case of document end
if (!isset($tokens[$this->inputIndex])) {
// we're at the end now, fix all still unclosed tags (this is
// duplicated from the end of the loop with some slight modifications)
// not using $skipped_tags since it would invariably be all of them
if (!empty($this->currentNesting)) {
$top_nesting = array_pop($this->currentNesting);
// please don't redefine $i!
if ($e && !isset($top_nesting->armor['MakeWellFormed_TagClosedError'])) {
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $top_nesting);
}
// instead of splice, since we know this is the end
$new_token = new HTMLPurifier_Token_End($top_nesting->name);
$tokens[] = $new_token;
$this->currentNesting[] = $top_nesting;
--$this->inputIndex;
// punt to the regular code to handle the new token
continue;
}
break;
}
// if all goes well, this token will be passed through unharmed
$token = $tokens[$this->inputIndex];
@@ -90,10 +122,6 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
//printTokens($tokens, $this->inputIndex);
//var_dump($this->currentNesting);
foreach ($this->injectors as $injector) {
if ($injector->skip > 0) $injector->skip--;
}
// quick-check: if it's not a tag, no need to process
if (empty( $token->is_tag )) {
if ($token instanceof HTMLPurifier_Token_Text) {
@@ -194,13 +222,12 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
continue;
}
if (!$this->handleEnd($token)) continue;
// first, check for the simplest case: everything closes neatly
$current_parent = array_pop($this->currentNesting);
if ($current_parent->name == $token->name) {
$token->start = $current_parent;
foreach ($this->injectors as $i => $injector) {
$injector->notifyEnd($token);
}
continue;
}
@@ -249,9 +276,6 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
$this->insertAfter($new_token);
//printTokens($tokens, $this->inputIndex);
//var_dump($this->currentNesting);
foreach ($this->injectors as $injector) {
$injector->notifyEnd($new_token);
}
}
}
@@ -260,24 +284,6 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
$context->destroy('InputIndex');
$context->destroy('CurrentToken');
// we're at the end now, fix all still unclosed tags (this is
// duplicated from the end of the loop with some slight modifications)
// not using $skipped_tags since it would invariably be all of them
if (!empty($this->currentNesting)) {
for ($i = count($this->currentNesting) - 1; $i >= 0; $i--) {
// please don't redefine $i!
if ($e && !isset($this->currentNesting[$i]->armor['MakeWellFormed_TagClosedError'])) {
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $this->currentNesting[$i]);
}
// instead of splice, since we know this is the end
$tokens[] = $new_token = new HTMLPurifier_Token_End($this->currentNesting[$i]->name);
$new_token->start = $this->currentNesting[$i];
foreach ($this->injectors as $injector) {
$injector->notifyEnd($new_token);
}
}
}
unset($this->outputTokens, $this->injectors, $this->currentInjector,
$this->currentNesting, $this->inputTokens, $this->inputIndex);
return $tokens;
@@ -323,7 +329,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
*
* If $token is false, the current token is deleted.
*/
protected function processToken($token, $config, $context) {
protected function processToken($token, $config, $context, $is_end = false) {
if (is_array($token) || is_int($token)) {
// the original token was overloaded by an injector, time
// to some fancy acrobatics
@@ -333,26 +339,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
array_splice($this->inputTokens, $this->inputIndex, $token, array());
}
if ($this->injectors) {
$rewind = $this->injectors[$this->currentInjector]->getRewind();
if ($rewind < 0) $rewind = 0;
if ($rewind !== false) {
$offset = $this->inputIndex - $rewind;
if ($this->injectors) {
foreach ($this->injectors as $i => $injector) {
if ($i == $this->currentInjector) {
$injector->skip = 0;
} else {
$injector->skip += $offset;
}
}
}
for ($this->inputIndex--; $this->inputIndex >= $rewind; $this->inputIndex--) {
$prev = $this->inputTokens[$this->inputIndex];
if ($prev instanceof HTMLPurifier_Token_Start) array_pop($this->currentNesting);
elseif ($prev instanceof HTMLPurifier_Token_End) $this->currentNesting[] = $prev->start;
}
$this->inputIndex++;
} else {
if (!$this->checkRewind()) {
// adjust the injector skips based on the array substitution
$offset = is_array($token) ? count($token) : 0;
for ($i = 0; $i <= $this->currentInjector; $i++) {
@@ -370,18 +357,66 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
--$this->inputIndex;
} elseif ($token) {
// regular case
$this->swap($token);
if ($token instanceof HTMLPurifier_Token_Start) {
$this->currentNesting[] = $token;
} elseif ($token instanceof HTMLPurifier_Token_End) {
// not actually used
$token->start = array_pop($this->currentNesting);
if ($is_end) {
$this->swap($token);
if (!$token instanceof HTMLPurifier_Token_End) {
--$this->inputIndex;
}
} else {
// regular case
$this->swap($token);
if ($token instanceof HTMLPurifier_Token_Start) {
$this->currentNesting[] = $token;
} elseif ($token instanceof HTMLPurifier_Token_End) {
// not actually used
$token->start = array_pop($this->currentNesting);
}
}
} else {
$this->remove();
}
}
/**
* Checks for a rewind, adjusts the input index and skips accordingly.
*/
protected function checkRewind() {
$rewind = $this->injectors[$this->currentInjector]->getRewind();
if ($rewind < 0) $rewind = 0;
if (is_int($rewind)) {
$offset = $this->inputIndex - $rewind;
if ($this->injectors) {
foreach ($this->injectors as $i => $injector) {
if ($i == $this->currentInjector) {
$injector->skip = 0;
} else {
$injector->skip += $offset;
}
}
}
for ($this->inputIndex--; $this->inputIndex >= $rewind; $this->inputIndex--) {
$prev = $this->inputTokens[$this->inputIndex];
if ($prev instanceof HTMLPurifier_Token_Start) array_pop($this->currentNesting);
elseif ($prev instanceof HTMLPurifier_Token_End) $this->currentNesting[] = $prev->start;
}
$this->inputIndex++;
return true;
} else {
return false;
}
}
protected function handleEnd($token) {
foreach ($this->injectors as $i => $injector) {
if (!$injector->skip) $injector->handleEnd($token);
if (is_array($token) || is_int($token)) {
$this->currentInjector = $i;
break;
}
}
$this->processToken($token, $this->config, $this->context, true);
return $token instanceof HTMLPurifier_Token_End;
}
}