Commit various optimizations to the Lexer, and add stub file for profiling the lexer.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@92 48356398-32a2-884e-a903-53898d9a118a
2025-10-17 06:56:06 +02:00 · 2006-07-22 22:48:07 +00:00
parent de5ab5e6a0
commit ca1aefe271
5 changed files with 141 additions and 92 deletions
--- a/library/HTMLPurifier/Token.php
+++ b/library/HTMLPurifier/Token.php
@@ -8,36 +8,26 @@ class HTMLPurifier_Token_Tag extends HTMLPurifier_Token // abstract
 {
    var $is_tag = true;
    var $name;
-    function HTMLPurifier_Token_Tag($name) {
-        // watch out, actually XML is case-sensitive, while HTML
-        // is case insensitive, which means we can't use this for XML
-        $this->name = strtolower($name); // for some reason, the SAX parser
-                                         // uses uppercase. Investigate?
-    }
-}
-
-// a rich tag has attributes
-class HTMLPurifier_Token_RichTag extends HTMLPurifier_Token_Tag // abstract
-{
    var $attributes = array();
-    function HTMLPurifier_Token_RichTag($name, $attributes = array()) {
-        $this->HTMLPurifier_Token_Tag($name);
+    function HTMLPurifier_Token_Tag($name, $attributes = array()) {
+        $this->name = ctype_lower($name) ? $name : strtolower($name);
        $this->attributes = $attributes;
    }
 }

 // start CONCRETE ones

-class HTMLPurifier_Token_Start extends HTMLPurifier_Token_RichTag
+class HTMLPurifier_Token_Start extends HTMLPurifier_Token_Tag
 {
    var $type = 'start';
 }

-class HTMLPurifier_Token_Empty extends HTMLPurifier_Token_RichTag
+class HTMLPurifier_Token_Empty extends HTMLPurifier_Token_Tag
 {
    var $type = 'empty';
 }

+// accepts attributes even though it really can't, for optimization reasons
 class HTMLPurifier_Token_End extends HTMLPurifier_Token_Tag
 {
    var $type = 'end';
@@ -51,7 +41,7 @@ class HTMLPurifier_Token_Text extends HTMLPurifier_Token
    var $is_whitespace = false;
    function HTMLPurifier_Token_Text($data) {
        $this->data = $data;
-        if (trim($data, " \n\r\t") === '') $this->is_whitespace = true;
+        if (ctype_space($data)) $this->is_whitespace = true;
    }
    function append($text) {
        return new HTMLPurifier_Token_Text($this->data . $text->data);