diff --git a/library/HTMLPurifier/Config.php b/library/HTMLPurifier/Config.php
index 3e2b09a7..c94e01f6 100644
--- a/library/HTMLPurifier/Config.php
+++ b/library/HTMLPurifier/Config.php
@@ -149,6 +149,13 @@ class HTMLPurifier_Config
return;
}
$this->conf[$namespace][$key] = $value;
+ if ($namespace == 'HTML' || $namespace == 'Attr') {
+ // reset HTML definition if relevant attributes changed
+ $this->html_definition = null;
+ }
+ if ($namespace == 'CSS') {
+ $this->css_definition = null;
+ }
}
/**
@@ -157,20 +164,14 @@ class HTMLPurifier_Config
* called before it's been setup, otherwise won't work.
*/
function &getHTMLDefinition($raw = false) {
- if ($this->html_definition === null) {
+ if (
+ empty($this->html_definition) || // hasn't ever been setup
+ ($raw && $this->html_definition->setup) // requesting new one
+ ) {
$this->html_definition = new HTMLPurifier_HTMLDefinition($this);
- if ($raw) {
- return $this->html_definition; // no setup!
- }
- $this->html_definition->setup($this);
- }
- if ($raw && $this->html_definition->setup) {
- trigger_error('HTMLDefinition already setup, overwriting old '.
- 'definition (set $config->definition manually to null '.
- 'if this is desired behavior).', E_USER_NOTICE);
- $this->html_definition = new HTMLPurifier_HTMLDefinition($this);
- return $this->html_definition;
+ if ($raw) return $this->html_definition; // no setup!
}
+ if (!$this->html_definition->setup) $this->html_definition->setup();
return $this->html_definition;
}
diff --git a/library/HTMLPurifier/HTMLDefinition.php b/library/HTMLPurifier/HTMLDefinition.php
index a14e6e9b..f39e43ff 100644
--- a/library/HTMLPurifier/HTMLDefinition.php
+++ b/library/HTMLPurifier/HTMLDefinition.php
@@ -38,6 +38,8 @@ require_once 'HTMLPurifier/HTMLModule/StyleAttribute.php';
require_once 'HTMLPurifier/HTMLModule/TransformToStrict.php';
require_once 'HTMLPurifier/HTMLModule/Legacy.php';
+// this definition and its modules MUST NOT define configuration directives
+// outside of the HTML or Attr namespaces
HTMLPurifier_ConfigSchema::define(
'HTML', 'EnableAttrID', false, 'bool',
'Allows the ID attribute in HTML. This is disabled by default '.
@@ -112,9 +114,6 @@ HTMLPurifier_ConfigSchema::define(
* Definition of the purified HTML that describes allowed children,
* attributes, and many other things.
*
- * @note This is the next-gen definition that will be renamed to
- * HTMLDefinition soon!
- *
* Conventions:
*
* All member variables that are prefixed with info
@@ -127,7 +126,6 @@ HTMLPurifier_ConfigSchema::define(
* internally by the HTMLDefinition and MUST NOT be used by other HTML
* Purifier internals. Many of them, however, are public, and may be
* edited by userspace code to tweak the behavior of HTMLDefinition.
- * In practice, there will not be too many of them.
*
* HTMLPurifier_Printer_HTMLDefinition is a notable exception to this
* rule: in the interest of comprehensiveness, it will sniff everything.
@@ -205,11 +203,25 @@ class HTMLPurifier_HTMLDefinition
var $strict;
/**
- * Array of HTMLPurifier_Module instances, indexed by module name
+ * Array of HTMLPurifier_Module instances, indexed by module's class name
* @public
*/
var $modules = array();
+ /**
+ * Associative array of module class name to module order keywords or
+ * numbers (keyword is preferred, all keywords are resolved at beginning
+ * of setup())
+ * @public
+ */
+ var $modules_order = array();
+
+ /**
+ * List of prefixes HTML Purifier should try to resolve short names to.
+ * @public
+ */
+ var $module_prefixes = array('HTMLPurifier_HTMLModule_');
+
/**
* Instance of HTMLPurifier_AttrTypes
* @public
@@ -223,7 +235,7 @@ class HTMLPurifier_HTMLDefinition
var $attr_collections;
/**
- * Is setup?
+ * Has setup() been called yet?
* @public
*/
var $setup = false;
@@ -234,45 +246,89 @@ class HTMLPurifier_HTMLDefinition
*/
var $content_sets;
+ /**
+ * Lookup table of module order "names" and an integer index
+ * @public
+ */
+ var $order_keywords = array(
+ 'setup' => 10,
+ 'early' => 20,
+ 'main' => 30,
+ 'late' => 40,
+ 'cleanup' => 50,
+ );
+
+ /**
+ * Temporary instance of HTMLPurifier_Config for convenience reasons,
+ * is removed after setup().
+ * @public
+ */
+ var $config;
+
/**
* Performs low-cost, preliminary initialization.
* @param $config Instance of HTMLPurifier_Config
*/
- function HTMLPurifier_HTMLDefinition($config) {
+ function HTMLPurifier_HTMLDefinition(&$config) {
- // setup some cached config variables
- // this will eventually influence module loading
- $this->strict = $config->get('HTML', 'Strict');
-
- // order is important!
- $this->modules['Text'] = new HTMLPurifier_HTMLModule_Text();
- $this->modules['Hypertext'] = new HTMLPurifier_HTMLModule_Hypertext();
- $this->modules['List'] = new HTMLPurifier_HTMLModule_List();
- $this->modules['Presentation'] = new HTMLPurifier_HTMLModule_Presentation();
- $this->modules['Edit'] = new HTMLPurifier_HTMLModule_Edit();
- $this->modules['Bdo'] = new HTMLPurifier_HTMLModule_Bdo();
- $this->modules['Tables'] = new HTMLPurifier_HTMLModule_Tables();
- $this->modules['Image'] = new HTMLPurifier_HTMLModule_Image();
- $this->modules['StyleAttribute']= new HTMLPurifier_HTMLModule_StyleAttribute();
-
- $this->modules['TransformToStrict'] = new HTMLPurifier_HTMLModule_TransformToStrict($config);
- if (!$this->strict) $this->modules['Legacy'] = new HTMLPurifier_HTMLModule_Legacy($config);
+ $this->config =& $config;
+ // set up public internals
+ $this->strict = $config->get('HTML', 'Strict');
$this->attr_types = new HTMLPurifier_AttrTypes();
$this->attr_collections = new HTMLPurifier_AttrCollections();
$this->content_sets = new HTMLPurifier_ContentSets();
- // some compat stuff, will be factored to modules
+ // modules
- // remove ID module
+ // main
+ $main_modules = array('Text', 'Hypertext', 'List', 'Presentation',
+ 'Edit', 'Bdo', 'Tables', 'Image', 'StyleAttribute');
+ foreach ($main_modules as $module) $this->addModule($module, 'main');
+
+ // late
+ if (!$this->strict) $this->addModule('Legacy', 'late');
+
+ // cleanup
+ $this->addModule('TransformToStrict', 'cleanup');
+
+ // remove ID module (refactor to module)
if (!$config->get('HTML', 'EnableAttrID')) {
$this->attr_collections->info['Core']['id'] = false;
}
}
-
+ /**
+ * Adds a module to the ordered list.
+ * @param $module Mixed: string module name, with or without
+ * HTMLPurifier_HTMLModule prefix, or instance of
+ * subclass of HTMLPurifier_HTMLModule.
+ */
+ function addModule($module, $order = 'main') {
+ if (is_string($module)) {
+ $original_module = $module;
+ if (!class_exists($module)) {
+ foreach ($this->module_prefixes as $prefix) {
+ $module = $prefix . $original_module;
+ if (class_exists($module)) break;
+ }
+ }
+ if (!class_exists($module)) {
+ trigger_error($original_module . ' module does not exist', E_USER_ERROR);
+ return;
+ }
+ $module = new $module($this);
+ }
+ if (!isset($this->order_keywords[$order])) {
+ trigger_error('Order keyword does not exist', E_USER_ERROR);
+ return;
+ }
+ $name = strtolower(get_class($module));
+ $this->modules[$name] = $module;
+ $this->modules_order[$name] = $order;
+ }
/**
* Processes internals into form usable by HTMLPurifier internals.
@@ -280,33 +336,55 @@ class HTMLPurifier_HTMLDefinition
* be done.
* @param $config Instance of HTMLPurifier_Config
*/
- function setup($config) {
+ function setup() {
// multiple call guard
if ($this->setup) {return;} else {$this->setup = true;}
- // would be nice if we could put each of these in their
- // own object, would make this hookable too!
- $this->processModules($config);
- $this->setupAttrTransform($config);
- $this->setupBlockWrapper($config);
- $this->setupParent($config);
- $this->setupCompat($config);
+ $this->processModules();
+ $this->setupAttrTransform();
+ $this->setupBlockWrapper();
+ $this->setupParent();
+ $this->setupCompat();
+
+ unset($this->config);
}
/**
* Processes the modules, setting up related info variables
- * @param $config Instance of HTMLPurifier_Config
*/
- function processModules($config) {
+ function processModules() {
+
+ // substitute out the order keywords
+ foreach ($this->modules_order as $name => $order) {
+ if (empty($this->modules[$name])) {
+ trigger_error('Orphan module order definition for module: ' . $name, E_USER_ERROR);
+ return;
+ }
+ if (is_int($order)) continue;
+ if (empty($this->order_keywords[$order])) {
+ trigger_error('Unknown order keyword: ' . $order, E_USER_ERROR);
+ return;
+ }
+ $this->modules_order[$name] = $this->order_keywords[$order];
+ }
+
+ // sort modules member variable
+ array_multisort(
+ $this->modules_order, SORT_ASC, SORT_NUMERIC,
+ $this->modules
+ );
+
+ // setup the global registries
$this->attr_collections->setup($this->attr_types, $this->modules);
$this->content_sets->setup($this->modules);
$this->info_content_sets = $this->content_sets->lookup;
+ // process the modules
foreach ($this->modules as $module_i => $module) {
- $module->preProcess($this, $config);
+ $module->preProcess($this);
// process element-wise definitions
foreach ($module->info as $name => $def) {
@@ -352,7 +430,7 @@ class HTMLPurifier_HTMLDefinition
foreach($module->info_attr_transform_pre as $k => $v) $this->info_attr_transform_pre[$k] = $v;
foreach($module->info_attr_transform_post as $k => $v) $this->info_attr_transform_post[$k]= $v;
- $module->postProcess($this, $config);
+ $module->postProcess($this);
}
@@ -360,18 +438,16 @@ class HTMLPurifier_HTMLDefinition
/**
* Sets up attribute transformations
- * @param $config Instance of HTMLPurifier_Config
*/
- function setupAttrTransform($config) {
+ function setupAttrTransform() {
$this->info_attr_transform_post[] = new HTMLPurifier_AttrTransform_Lang();
}
/**
* Sets up block wrapper based on config
- * @param $config Instance of HTMLPurifier_Config
*/
- function setupBlockWrapper($config) {
- $block_wrapper = $config->get('HTML', 'BlockWrapper');
+ function setupBlockWrapper() {
+ $block_wrapper = $this->config->get('HTML', 'BlockWrapper');
if (isset($this->info_content_sets['Block'][$block_wrapper])) {
$this->info_block_wrapper = $block_wrapper;
} else {
@@ -382,10 +458,9 @@ class HTMLPurifier_HTMLDefinition
/**
* Sets up parent of fragment based on config
- * @param $config Instance of HTMLPurifier_Config
*/
- function setupParent($config) {
- $parent = $config->get('HTML', 'Parent');
+ function setupParent() {
+ $parent = $this->config->get('HTML', 'Parent');
if (isset($this->info[$parent])) {
$this->info_parent = $parent;
} else {
@@ -399,10 +474,10 @@ class HTMLPurifier_HTMLDefinition
* Sets up compat code from HTMLDefinition that has not been
* delegated to modules yet
*/
- function setupCompat($config) {
+ function setupCompat() {
// deprecated config setting, implement in DisableURI module
- if ($config->get('Attr', 'DisableURI')) {
+ if ($this->config->get('Attr', 'DisableURI')) {
$this->info['a']->attr['href'] =
$this->info['img']->attr['longdesc'] =
$this->info['del']->attr['cite'] =
@@ -413,13 +488,13 @@ class HTMLPurifier_HTMLDefinition
}
// setup allowed elements, SubtractiveWhitelist module
- $allowed_elements = $config->get('HTML', 'AllowedElements');
+ $allowed_elements = $this->config->get('HTML', 'AllowedElements');
if (is_array($allowed_elements)) {
foreach ($this->info as $name => $d) {
if(!isset($allowed_elements[$name])) unset($this->info[$name]);
}
}
- $allowed_attributes = $config->get('HTML', 'AllowedAttributes');
+ $allowed_attributes = $this->config->get('HTML', 'AllowedAttributes');
if (is_array($allowed_attributes)) {
foreach ($this->info_global_attr as $attr_key => $info) {
if (!isset($allowed_attributes["*.$attr_key"])) {
diff --git a/library/HTMLPurifier/HTMLModule.php b/library/HTMLPurifier/HTMLModule.php
index 6538952b..a8de3f11 100644
--- a/library/HTMLPurifier/HTMLModule.php
+++ b/library/HTMLPurifier/HTMLModule.php
@@ -95,17 +95,22 @@ class HTMLPurifier_HTMLModule
* Hook method that lets module perform arbitrary operations on
* HTMLPurifier_HTMLDefinition before the module gets processed.
* @param $definition Reference to HTMLDefinition being setup
- * @param $config Instance of HTMLPurifier_Config
*/
- function preProcess(&$definition, $config) {}
+ function preProcess(&$definition) {}
/**
* Hook method that lets module perform arbitrary operations
* on HTMLPurifier_HTMLDefinition after the module gets processed.
* @param $definition Reference to HTMLDefinition being setup
- * @param $config Instance of HTMLPurifier_Config
*/
- function postProcess(&$definition, $config) {}
+ function postProcess(&$definition) {}
+
+ /**
+ * Hook method that is called when a module gets registered to
+ * the definition.
+ * @param $definition Reference to HTMLDefinition being setup
+ */
+ function setup(&$definition) {}
}
diff --git a/library/HTMLPurifier/HTMLModule/TransformToStrict.php b/library/HTMLPurifier/HTMLModule/TransformToStrict.php
index 6623b6fe..933f77fd 100644
--- a/library/HTMLPurifier/HTMLModule/TransformToStrict.php
+++ b/library/HTMLPurifier/HTMLModule/TransformToStrict.php
@@ -31,7 +31,9 @@ class HTMLPurifier_HTMLModule_TransformToStrict extends HTMLPurifier_HTMLModule
'lang' => false // placeholder
);
- function HTMLPurifier_HTMLModule_TransformToStrict($config) {
+ function HTMLPurifier_HTMLModule_TransformToStrict(&$definition) {
+ $config = $definition->config;
+
// deprecated tag transforms
$this->info_tag_transform['font'] = new HTMLPurifier_TagTransform_Font();
$this->info_tag_transform['menu'] = new HTMLPurifier_TagTransform_Simple('ul');
diff --git a/tests/HTMLPurifier/ConfigTest.php b/tests/HTMLPurifier/ConfigTest.php
index e04ac416..7283700f 100644
--- a/tests/HTMLPurifier/ConfigTest.php
+++ b/tests/HTMLPurifier/ConfigTest.php
@@ -216,7 +216,7 @@ class HTMLPurifier_ConfigTest extends UnitTestCase
}
- function test_getDefinition() {
+ function test_getHTMLDefinition() {
// we actually want to use the old copy, because the definition
// generation routines have dependencies on configuration values
@@ -224,12 +224,41 @@ class HTMLPurifier_ConfigTest extends UnitTestCase
$this->old_copy = HTMLPurifier_ConfigSchema::instance($this->old_copy);
$config = HTMLPurifier_Config::createDefault();
- $def = $config->getHTMLDefinition();
- $this->assertIsA($def, 'HTMLPurifier_HTMLDefinition');
$def = $config->getCSSDefinition();
$this->assertIsA($def, 'HTMLPurifier_CSSDefinition');
+ $def = $config->getHTMLDefinition();
+ $def2 = $config->getHTMLDefinition();
+ $this->assertIsA($def, 'HTMLPurifier_HTMLDefinition');
+ $this->assertEqual($def, $def2);
+ $this->assertTrue($def->setup);
+
+ // test re-calculation if HTML changes
+ $config->set('HTML', 'Strict', true);
+ $def = $config->getHTMLDefinition();
+ $this->assertIsA($def, 'HTMLPurifier_HTMLDefinition');
+ $this->assertNotEqual($def, $def2);
+ $this->assertTrue($def->setup);
+
+ // test retrieval of raw definition
+ $def =& $config->getHTMLDefinition(true);
+ $this->assertNotEqual($def, $def2);
+ $this->assertFalse($def->setup);
+
+ // auto initialization
+ $config->getHTMLDefinition();
+ $this->assertTrue($def->setup);
+
+ }
+
+ function test_getCSSDefinition() {
+ $this->old_copy = HTMLPurifier_ConfigSchema::instance($this->old_copy);
+
+ $config = HTMLPurifier_Config::createDefault();
+
+ $def = $config->getCSSDefinition();
+ $this->assertIsA($def, 'HTMLPurifier_CSSDefinition');
}
function test_loadArray() {