From f80de908bd100664dc451a880875cada88ee20d6 Mon Sep 17 00:00:00 2001
From: "Edward Z. Yang" <edwardzyang@thewritingpot.com>
Date: Tue, 31 Jul 2007 01:04:38 +0000
Subject: [PATCH] [2.1.0] Optimize ConfigSchema to only perform safety checks
 when HTMLPURIFIER_SCHEMA_STRICT is true - Remove useless ->revision check in
 Config.php - Add simple trace file to benchmarks folder

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1319 48356398-32a2-884e-a903-53898d9a118a
---
 NEWS                                  |   2 +
 benchmarks/Trace.php                  |  12 ++
 configdoc/generate.php                |   2 +
 library/HTMLPurifier/Config.php       |   1 -
 library/HTMLPurifier/ConfigSchema.php | 185 ++++++++++++++------------
 tests/index.php                       |   1 +
 6 files changed, 120 insertions(+), 83 deletions(-)
 create mode 100644 benchmarks/Trace.php

diff --git a/NEWS b/NEWS
index 7e48c753..f5f89d32 100644
--- a/NEWS
+++ b/NEWS
@@ -41,6 +41,8 @@ NEWS ( CHANGELOG and HISTORY )                                     HTMLPurifier
 . Custom ChildDef added to default include list
 . URIScheme reflection improved: will not attempt to include file if class
   already exists. May clobber autoload, so I need to keep an eye on it
+. ConfigSchema heavily optimized, will only collect information and validate
+  definitions when HTMLPURIFIER_SCHEMA_STRICT is true.
 
 2.0.1, released 2007-06-27
 ! Tag auto-closing now based on a ChildDef heuristic rather than a
diff --git a/benchmarks/Trace.php b/benchmarks/Trace.php
new file mode 100644
index 00000000..fa98ffac
--- /dev/null
+++ b/benchmarks/Trace.php
@@ -0,0 +1,12 @@
+<?php
+
+ini_set('xdebug.trace_format', 1);
+ini_set('xdebug.show_mem_delta', true);
+
+xdebug_start_trace(dirname(__FILE__) . '/Trace');
+require_once '../library/HTMLPurifier.auto.php';
+
+$purifier = new HTMLPurifier();
+
+$data = $purifier->purify(file_get_contents('samples/Lexer/4.html'));
+xdebug_stop_trace();
diff --git a/configdoc/generate.php b/configdoc/generate.php
index 97e96433..9e73f4c7 100644
--- a/configdoc/generate.php
+++ b/configdoc/generate.php
@@ -18,6 +18,8 @@ TODO:
 if (version_compare('5', PHP_VERSION, '>')) exit('Requires PHP 5 or higher.');
 error_reporting(E_ALL); // probably not possible to use E_STRICT
 
+define('HTMLPURIFIER_SCHEMA_STRICT', true); // description data needs to be collected
+
 // load dual-libraries
 require_once '../library/HTMLPurifier.auto.php';
 require_once 'library/ConfigDoc.auto.php';
diff --git a/library/HTMLPurifier/Config.php b/library/HTMLPurifier/Config.php
index 02594db4..2b811fa9 100644
--- a/library/HTMLPurifier/Config.php
+++ b/library/HTMLPurifier/Config.php
@@ -106,7 +106,6 @@ class HTMLPurifier_Config
         $ret = HTMLPurifier_Config::createDefault();
         if (is_string($config)) $ret->loadIni($config);
         elseif (is_array($config)) $ret->loadArray($config);
-        if (isset($revision)) $ret->revision = $revision;
         return $ret;
     }
     
diff --git a/library/HTMLPurifier/ConfigSchema.php b/library/HTMLPurifier/ConfigSchema.php
index e17a3f07..d6700e6e 100644
--- a/library/HTMLPurifier/ConfigSchema.php
+++ b/library/HTMLPurifier/ConfigSchema.php
@@ -6,6 +6,8 @@ require_once 'HTMLPurifier/ConfigDef/Namespace.php';
 require_once 'HTMLPurifier/ConfigDef/Directive.php';
 require_once 'HTMLPurifier/ConfigDef/DirectiveAlias.php';
 
+if (!defined('HTMLPURIFIER_SCHEMA_STRICT')) define('HTMLPURIFIER_SCHEMA_STRICT', false);
+
 /**
  * Configuration definition, defines directives and their defaults.
  * @note If you update this, please update Printer_ConfigForm
@@ -102,27 +104,30 @@ class HTMLPurifier_ConfigSchema {
      *      HTMLPurifier_DirectiveDef::$type for allowed values
      * @param $description Description of directive for documentation
      */
-    function define(
-        $namespace, $name, $default, $type, 
-        $description
-    ) {
+    function define($namespace, $name, $default, $type, $description) {
         $def =& HTMLPurifier_ConfigSchema::instance();
-        if (!isset($def->info[$namespace])) {
-            trigger_error('Cannot define directive for undefined namespace',
-                E_USER_ERROR);
-            return;
-        }
-        if (!ctype_alnum($name)) {
-            trigger_error('Directive name must be alphanumeric',
-                E_USER_ERROR);
-            return;
-        }
-        if (empty($description)) {
-            trigger_error('Description must be non-empty',
-                E_USER_ERROR);
-            return;
+        
+        // basic sanity checks
+        if (HTMLPURIFIER_SCHEMA_STRICT) {
+            if (!isset($def->info[$namespace])) {
+                trigger_error('Cannot define directive for undefined namespace',
+                    E_USER_ERROR);
+                return;
+            }
+            if (!ctype_alnum($name)) {
+                trigger_error('Directive name must be alphanumeric',
+                    E_USER_ERROR);
+                return;
+            }
+            if (empty($description)) {
+                trigger_error('Description must be non-empty',
+                    E_USER_ERROR);
+                return;
+            }
         }
+        
         if (isset($def->info[$namespace][$name])) {
+            // already defined
             if (
                 $def->info[$namespace][$name]->type !== $type ||
                 $def->defaults[$namespace][$name]   !== $default
@@ -131,29 +136,35 @@ class HTMLPurifier_ConfigSchema {
                 return;
             }
         } else {
-            // process modifiers
+            // needs defining
+            
+            // process modifiers (OPTIMIZE!)
             $type_values = explode('/', $type, 2);
             $type = $type_values[0];
             $modifier = isset($type_values[1]) ? $type_values[1] : false;
             $allow_null = ($modifier === 'null');
             
-            if (!isset($def->types[$type])) {
-                trigger_error('Invalid type for configuration directive',
-                    E_USER_ERROR);
-                return;
-            }
-            $default = $def->validate($default, $type, $allow_null);
-            if ($def->isError($default)) {
-                trigger_error('Default value does not match directive type',
-                    E_USER_ERROR);
-                return;
+            if (HTMLPURIFIER_SCHEMA_STRICT) {
+                if (!isset($def->types[$type])) {
+                    trigger_error('Invalid type for configuration directive',
+                        E_USER_ERROR);
+                    return;
+                }
+                $default = $def->validate($default, $type, $allow_null);
+                if ($def->isError($default)) {
+                    trigger_error('Default value does not match directive type',
+                        E_USER_ERROR);
+                    return;
+                }
             }
+            
             $def->info[$namespace][$name] =
                 new HTMLPurifier_ConfigDef_Directive();
             $def->info[$namespace][$name]->type = $type;
             $def->info[$namespace][$name]->allow_null = $allow_null;
             $def->defaults[$namespace][$name]   = $default;
         }
+        if (!HTMLPURIFIER_SCHEMA_STRICT) return;
         $backtrace = debug_backtrace();
         $file = $def->mungeFilename($backtrace[0]['file']);
         $line = $backtrace[0]['line'];
@@ -168,19 +179,21 @@ class HTMLPurifier_ConfigSchema {
      */
     function defineNamespace($namespace, $description) {
         $def =& HTMLPurifier_ConfigSchema::instance();
-        if (isset($def->info[$namespace])) {
-            trigger_error('Cannot redefine namespace', E_USER_ERROR);
-            return;
-        }
-        if (!ctype_alnum($namespace)) {
-            trigger_error('Namespace name must be alphanumeric',
-                E_USER_ERROR);
-            return;
-        }
-        if (empty($description)) {
-            trigger_error('Description must be non-empty',
-                E_USER_ERROR);
-            return;
+        if (HTMLPURIFIER_SCHEMA_STRICT) {
+            if (isset($def->info[$namespace])) {
+                trigger_error('Cannot redefine namespace', E_USER_ERROR);
+                return;
+            }
+            if (!ctype_alnum($namespace)) {
+                trigger_error('Namespace name must be alphanumeric',
+                    E_USER_ERROR);
+                return;
+            }
+            if (empty($description)) {
+                trigger_error('Description must be non-empty',
+                    E_USER_ERROR);
+                return;
+            }
         }
         $def->info[$namespace] = array();
         $def->info_namespace[$namespace] = new HTMLPurifier_ConfigDef_Namespace();
@@ -201,23 +214,25 @@ class HTMLPurifier_ConfigSchema {
      */
     function defineValueAliases($namespace, $name, $aliases) {
         $def =& HTMLPurifier_ConfigSchema::instance();
-        if (!isset($def->info[$namespace][$name])) {
+        if (HTMLPURIFIER_SCHEMA_STRICT && !isset($def->info[$namespace][$name])) {
             trigger_error('Cannot set value alias for non-existant directive',
                 E_USER_ERROR);
             return;
         }
         foreach ($aliases as $alias => $real) {
-            if (!$def->info[$namespace][$name] !== true &&
-                !isset($def->info[$namespace][$name]->allowed[$real])
-            ) {
-                trigger_error('Cannot define alias to value that is not allowed',
-                    E_USER_ERROR);
-                return;
-            }
-            if (isset($def->info[$namespace][$name]->allowed[$alias])) {
-                trigger_error('Cannot define alias over allowed value',
-                    E_USER_ERROR);
-                return;
+            if (HTMLPURIFIER_SCHEMA_STRICT) {
+                if (!$def->info[$namespace][$name] !== true &&
+                    !isset($def->info[$namespace][$name]->allowed[$real])
+                ) {
+                    trigger_error('Cannot define alias to value that is not allowed',
+                        E_USER_ERROR);
+                    return;
+                }
+                if (isset($def->info[$namespace][$name]->allowed[$alias])) {
+                    trigger_error('Cannot define alias over allowed value',
+                        E_USER_ERROR);
+                    return;
+                }
             }
             $def->info[$namespace][$name]->aliases[$alias] = $real;
         }
@@ -232,14 +247,14 @@ class HTMLPurifier_ConfigSchema {
      */
     function defineAllowedValues($namespace, $name, $allowed_values) {
         $def =& HTMLPurifier_ConfigSchema::instance();
-        if (!isset($def->info[$namespace][$name])) {
+        if (HTMLPURIFIER_SCHEMA_STRICT && !isset($def->info[$namespace][$name])) {
             trigger_error('Cannot define allowed values for undefined directive',
                 E_USER_ERROR);
             return;
         }
         $directive =& $def->info[$namespace][$name];
         $type = $directive->type;
-        if ($type != 'string' && $type != 'istring') {
+        if (HTMLPURIFIER_SCHEMA_STRICT && $type != 'string' && $type != 'istring') {
             trigger_error('Cannot define allowed values for directive whose type is not string',
                 E_USER_ERROR);
             return;
@@ -250,8 +265,11 @@ class HTMLPurifier_ConfigSchema {
         foreach ($allowed_values as $value) {
             $directive->allowed[$value] = true;
         }
-        if ($def->defaults[$namespace][$name] !== null &&
-            !isset($directive->allowed[$def->defaults[$namespace][$name]])) {
+        if (
+            HTMLPURIFIER_SCHEMA_STRICT &&
+            $def->defaults[$namespace][$name] !== null &&
+            !isset($directive->allowed[$def->defaults[$namespace][$name]])
+        ) {
             trigger_error('Default value must be in allowed range of variables',
                 E_USER_ERROR);
             $directive->allowed = true; // undo undo!
@@ -269,30 +287,32 @@ class HTMLPurifier_ConfigSchema {
      */
     function defineAlias($namespace, $name, $new_namespace, $new_name) {
         $def =& HTMLPurifier_ConfigSchema::instance();
-        if (!isset($def->info[$namespace])) {
-            trigger_error('Cannot define directive alias in undefined namespace',
-                E_USER_ERROR);
-            return;
-        }
-        if (!ctype_alnum($name)) {
-            trigger_error('Directive name must be alphanumeric',
-                E_USER_ERROR);
-            return;
-        }
-        if (isset($def->info[$namespace][$name])) {
-            trigger_error('Cannot define alias over directive',
-                E_USER_ERROR);
-            return;
-        }
-        if (!isset($def->info[$new_namespace][$new_name])) {
-            trigger_error('Cannot define alias to undefined directive',
-                E_USER_ERROR);
-            return;
-        }
-        if ($def->info[$new_namespace][$new_name]->class == 'alias') {
-            trigger_error('Cannot define alias to alias',
-                E_USER_ERROR);
-            return;
+        if (HTMLPURIFIER_SCHEMA_STRICT) {
+            if (!isset($def->info[$namespace])) {
+                trigger_error('Cannot define directive alias in undefined namespace',
+                    E_USER_ERROR);
+                return;
+            }
+            if (!ctype_alnum($name)) {
+                trigger_error('Directive name must be alphanumeric',
+                    E_USER_ERROR);
+                return;
+            }
+            if (isset($def->info[$namespace][$name])) {
+                trigger_error('Cannot define alias over directive',
+                    E_USER_ERROR);
+                return;
+            }
+            if (!isset($def->info[$new_namespace][$new_name])) {
+                trigger_error('Cannot define alias to undefined directive',
+                    E_USER_ERROR);
+                return;
+            }
+            if ($def->info[$new_namespace][$new_name]->class == 'alias') {
+                trigger_error('Cannot define alias to alias',
+                    E_USER_ERROR);
+                return;
+            }
         }
         $def->info[$namespace][$name] =
             new HTMLPurifier_ConfigDef_DirectiveAlias(
@@ -396,6 +416,7 @@ class HTMLPurifier_ConfigSchema {
      * Takes an absolute path and munges it into a more manageable relative path
      */
     function mungeFilename($filename) {
+        if (!HTMLPURIFIER_SCHEMA_STRICT) return $filename;
         $offset = strrpos($filename, 'HTMLPurifier');
         $filename = substr($filename, $offset);
         $filename = str_replace('\\', '/', $filename);
diff --git a/tests/index.php b/tests/index.php
index d5841c50..1034e44f 100644
--- a/tests/index.php
+++ b/tests/index.php
@@ -5,6 +5,7 @@
 
 error_reporting(E_ALL);
 define('HTMLPurifierTest', 1);
+define('HTMLPURIFIER_SCHEMA_STRICT', true);
 
 // wishlist: automated calling of this file from multiple PHP versions so we
 // don't have to constantly switch around