From 5690c9e0a2994a8064ba6fa264319799a15f4d55 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Tue, 15 Aug 2006 21:19:45 +0000 Subject: [PATCH] Further optimization: 20% - 12%. Also fixed broken benchmarks. git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@266 48356398-32a2-884e-a903-53898d9a118a --- benchmarks/Lexer.php | 2 ++ benchmarks/ProfileDirectLex.php | 2 ++ library/HTMLPurifier/Lexer/DOMLex.php | 27 +++++++++++++++------------ 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/benchmarks/Lexer.php b/benchmarks/Lexer.php index 5fe3a55b..da112fe4 100644 --- a/benchmarks/Lexer.php +++ b/benchmarks/Lexer.php @@ -3,6 +3,8 @@ // emulates inserting a dir called HTMLPurifier into your class dir set_include_path(get_include_path() . PATH_SEPARATOR . '../library/'); +require_once 'HTMLPurifier/ConfigDef.php'; +require_once 'HTMLPurifier/Config.php'; require_once 'HTMLPurifier/Lexer/DirectLex.php'; require_once 'HTMLPurifier/Lexer/PEARSax3.php'; diff --git a/benchmarks/ProfileDirectLex.php b/benchmarks/ProfileDirectLex.php index 11781dcb..175d2894 100644 --- a/benchmarks/ProfileDirectLex.php +++ b/benchmarks/ProfileDirectLex.php @@ -2,6 +2,8 @@ set_include_path(get_include_path() . PATH_SEPARATOR . '../library/'); +require_once 'HTMLPurifier/ConfigDef.php'; +require_once 'HTMLPurifier/Config.php'; require_once 'HTMLPurifier/Lexer/DirectLex.php'; $input = file_get_contents('samples/Lexer/4.html'); diff --git a/library/HTMLPurifier/Lexer/DOMLex.php b/library/HTMLPurifier/Lexer/DOMLex.php index 07b12d22..adf10636 100644 --- a/library/HTMLPurifier/Lexer/DOMLex.php +++ b/library/HTMLPurifier/Lexer/DOMLex.php @@ -81,19 +81,19 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer // intercept non element nodes - if ( !($node instanceof DOMElement) ) { - if ($node instanceof DOMComment) { - $tokens[] = $this->factory->createComment($node->data); - } elseif ($node instanceof DOMText || - $node instanceof DOMCharacterData) { + if ( isset($node->data) ) { + if ($node->nodeType === XML_TEXT_NODE || + $node->nodeType === XML_CDATA_SECTION_NODE) { $tokens[] = $this->factory->createText($node->data); + } elseif ($node->nodeType === XML_COMMENT_NODE) { + $tokens[] = $this->factory->createComment($node->data); } // quite possibly, the object wasn't handled, that's fine return; } // We still have to make sure that the element actually IS empty - if (!$node->hasChildNodes()) { + if (!$node->childNodes->length) { if ($collect) { $tokens[] = $this->factory->createEmpty( $node->tagName, @@ -125,13 +125,16 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer * @param $attribute_list DOMNamedNodeMap of DOMAttr objects. * @returns Associative array of attributes. */ - protected function transformAttrToAssoc($attribute_list) { - $attribute_array = array(); - // undocumented behavior - foreach ($attribute_list as $key => $attr) { - $attribute_array[$key] = $attr->value; + protected function transformAttrToAssoc($node_map) { + // NamedNodeMap is documented very well, so we're using undocumented + // features, namely, the fact that it implements Iterator and + // has a ->length attribute + if ($node_map->length === 0) return array(); + $array = array(); + foreach ($node_map as $attr) { + $array[$attr->name] = $attr->value; } - return $attribute_array; + return $array; } }