1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-08-05 13:47:24 +02:00

Fix quadratic behavior in DOMLex due to array_shift.

Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
This commit is contained in:
Edward Z. Yang
2013-09-17 00:48:42 -07:00
parent cf44f399f8
commit 412bae13b5
6 changed files with 188 additions and 128 deletions

View File

@@ -92,11 +92,11 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
protected function tokenizeDOM($node, &$tokens)
{
$level = 0;
$nodes = array($level => array($node));
$nodes = array($level => new HTMLPurifier_Queue(array($node)));
$closingNodes = array();
do {
while (!empty($nodes[$level])) {
$node = array_shift($nodes[$level]); // FIFO
while (!$nodes[$level]->isEmpty()) {
$node = $nodes[$level]->shift(); // FIFO
$collect = $level > 0 ? true : false;
$needEndingTag = $this->createStartNode($node, $tokens, $collect);
if ($needEndingTag) {
@@ -104,9 +104,9 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
}
if ($node->childNodes && $node->childNodes->length) {
$level++;
$nodes[$level] = array();
$nodes[$level] = new HTMLPurifier_Queue();
foreach ($node->childNodes as $childNode) {
array_push($nodes[$level], $childNode);
$nodes[$level]->push($childNode);
}
}
}

View File

@@ -0,0 +1,56 @@
<?php
/**
* A simple array-backed queue, based off of the classic Okasaki
* persistent amortized queue. The basic idea is to maintain two
* stacks: an input stack and an output stack. When the output
* stack runs out, reverse the input stack and use it as the output
* stack.
*
* We don't use the SPL implementation because it's only supported
* on PHP 5.3 and later.
*
* Exercise: Prove that push/pop on this queue take amortized O(1) time.
*
* Exercise: Extend this queue to be a deque, while preserving amortized
* O(1) time. Some care must be taken on rebalancing to avoid quadratic
* behaviour caused by repeatedly shuffling data from the input stack
* to the output stack and back.
*/
class HTMLPurifier_Queue {
private $input;
private $output;
public function __construct($input = array()) {
$this->input = $input;
$this->output = array();
}
/**
* Shifts an element off the front of the queue.
*/
public function shift() {
if (empty($this->output)) {
$this->output = array_reverse($this->input);
$this->input = array();
}
if (empty($this->output)) {
return NULL;
}
return array_pop($this->output);
}
/**
* Pushes an element onto the front of the queue.
*/
public function push($x) {
array_push($this->input, $x);
}
/**
* Checks if it's empty.
*/
public function isEmpty() {
return empty($this->input) && empty($this->output);
}
}