mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-07-10 01:06:20 +02:00
[1.7.0] Add versioning to serializer cache
- Make some AttrDef member-variables lazy-loading to save serialization space, clean up others - Refactor get*Definition() methods git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1116 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
3
TODO
3
TODO
@ -13,8 +13,7 @@ TODO List
|
|||||||
- Reorganize configuration directives
|
- Reorganize configuration directives
|
||||||
- Set up anonymous module management by HTMLDefinition (Advanced API)
|
- Set up anonymous module management by HTMLDefinition (Advanced API)
|
||||||
- Get all AttrTypes into string form
|
- Get all AttrTypes into string form
|
||||||
# Clean up HTMLDefinition caching, need easy cache invalidation,
|
# Clean up HTMLDefinition caching, need easy cache invalidation.
|
||||||
versioning of caches, etc.
|
|
||||||
- Parse TinyMCE-style whitelist into our %HTML.Allow* whitelists
|
- Parse TinyMCE-style whitelist into our %HTML.Allow* whitelists
|
||||||
|
|
||||||
1.8 release [Refactor, refactor!]
|
1.8 release [Refactor, refactor!]
|
||||||
|
@ -15,13 +15,10 @@ class HTMLPurifier_AttrDef_URI_IPv4 extends HTMLPurifier_AttrDef
|
|||||||
*/
|
*/
|
||||||
var $ip4;
|
var $ip4;
|
||||||
|
|
||||||
function HTMLPurifier_AttrDef_URI_IPv4() {
|
|
||||||
$oct = '(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])'; // 0-255
|
|
||||||
$this->ip4 = "(?:{$oct}\\.{$oct}\\.{$oct}\\.{$oct})";
|
|
||||||
}
|
|
||||||
|
|
||||||
function validate($aIP, $config, &$context) {
|
function validate($aIP, $config, &$context) {
|
||||||
|
|
||||||
|
if (!$this->ip4) $this->_loadRegex();
|
||||||
|
|
||||||
if (preg_match('#^' . $this->ip4 . '$#s', $aIP))
|
if (preg_match('#^' . $this->ip4 . '$#s', $aIP))
|
||||||
{
|
{
|
||||||
return $aIP;
|
return $aIP;
|
||||||
@ -31,6 +28,15 @@ class HTMLPurifier_AttrDef_URI_IPv4 extends HTMLPurifier_AttrDef
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Lazy load function to prevent regex from being stuffed in
|
||||||
|
* cache.
|
||||||
|
*/
|
||||||
|
function _loadRegex() {
|
||||||
|
$oct = '(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])'; // 0-255
|
||||||
|
$this->ip4 = "(?:{$oct}\\.{$oct}\\.{$oct}\\.{$oct})";
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
?>
|
@ -13,6 +13,8 @@ class HTMLPurifier_AttrDef_URI_IPv6 extends HTMLPurifier_AttrDef_URI_IPv4
|
|||||||
|
|
||||||
function validate($aIP, $config, &$context) {
|
function validate($aIP, $config, &$context) {
|
||||||
|
|
||||||
|
if (!$this->ip4) $this->_loadRegex();
|
||||||
|
|
||||||
$original = $aIP;
|
$original = $aIP;
|
||||||
|
|
||||||
$hex = '[0-9a-fA-F]';
|
$hex = '[0-9a-fA-F]';
|
||||||
|
@ -43,6 +43,13 @@ class HTMLPurifier_Config
|
|||||||
*/
|
*/
|
||||||
var $version = '1.6.1';
|
var $version = '1.6.1';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Integer key users can use to indicate they have manually
|
||||||
|
* overridden some internal behavior and would like the
|
||||||
|
* cache to invalidate itself.
|
||||||
|
*/
|
||||||
|
var $revision = 1;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Two-level associative array of configuration directives
|
* Two-level associative array of configuration directives
|
||||||
*/
|
*/
|
||||||
@ -54,14 +61,9 @@ class HTMLPurifier_Config
|
|||||||
var $def;
|
var $def;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Cached instance of HTMLPurifier_HTMLDefinition
|
* Indexed array of definitions
|
||||||
*/
|
*/
|
||||||
var $html_definition;
|
var $definitions;
|
||||||
|
|
||||||
/**
|
|
||||||
* Cached instance of HTMLPurifier_CSSDefinition
|
|
||||||
*/
|
|
||||||
var $css_definition;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Bool indicator whether or not config is finalized
|
* Bool indicator whether or not config is finalized
|
||||||
@ -205,10 +207,8 @@ class HTMLPurifier_Config
|
|||||||
// reset definitions if the directives they depend on changed
|
// reset definitions if the directives they depend on changed
|
||||||
// this is a very costly process, so it's discouraged
|
// this is a very costly process, so it's discouraged
|
||||||
// with finalization
|
// with finalization
|
||||||
if ($namespace == 'HTML') {
|
if ($namespace == 'HTML' || $namespace == 'CSS') {
|
||||||
$this->html_definition = null;
|
$this->definitions[$namespace] = null;
|
||||||
} elseif ($namespace == 'CSS') {
|
|
||||||
$this->css_definition = null;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -218,60 +218,61 @@ class HTMLPurifier_Config
|
|||||||
* called before it's been setup, otherwise won't work.
|
* called before it's been setup, otherwise won't work.
|
||||||
*/
|
*/
|
||||||
function &getHTMLDefinition($raw = false) {
|
function &getHTMLDefinition($raw = false) {
|
||||||
if (!$this->finalized && $this->autoFinalize) $this->finalize();
|
return $this->getDefinition('HTML', $raw);
|
||||||
$cache = HTMLPurifier_DefinitionCache::create('HTML', $this);
|
|
||||||
if($this->checkDefinition($this->html_definition, $cache, $raw)) {
|
|
||||||
return $this->html_definition;
|
|
||||||
}
|
|
||||||
return $this->createDefinition(
|
|
||||||
$this->html_definition,
|
|
||||||
$cache,
|
|
||||||
$raw,
|
|
||||||
new HTMLPurifier_HTMLDefinition()
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Retrieves reference to the CSS definition
|
* Retrieves reference to the CSS definition
|
||||||
*/
|
*/
|
||||||
function &getCSSDefinition($raw = false) {
|
function &getCSSDefinition($raw = false) {
|
||||||
|
return $this->getDefinition('CSS', $raw);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieves a definition
|
||||||
|
* @param $type Type of definition: HTML, CSS, etc
|
||||||
|
* @param $raw Whether or not definition should be returned raw
|
||||||
|
*/
|
||||||
|
function &getDefinition($type, $raw = false) {
|
||||||
if (!$this->finalized && $this->autoFinalize) $this->finalize();
|
if (!$this->finalized && $this->autoFinalize) $this->finalize();
|
||||||
$cache = HTMLPurifier_DefinitionCache::create('CSS', $this);
|
$cache = HTMLPurifier_DefinitionCache::create($type, $this);
|
||||||
if($this->checkDefinition($this->css_definition, $cache, $raw)) {
|
if (!$raw) {
|
||||||
return $this->css_definition;
|
// see if we can quickly supply a definition
|
||||||
|
if (!empty($this->definitions[$type])) {
|
||||||
|
if (!$this->definitions[$type]->setup) {
|
||||||
|
$this->definitions[$type]->setup($this);
|
||||||
}
|
}
|
||||||
return $this->createDefinition(
|
return $this->definitions[$type];
|
||||||
$this->css_definition,
|
|
||||||
$cache,
|
|
||||||
$raw,
|
|
||||||
new HTMLPurifier_CSSDefinition()
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
// memory check missed, try cache
|
||||||
/**
|
$this->definitions[$type] = $cache->get($this);
|
||||||
* Checks the variable and cache for an easy-access definition,
|
if ($this->definitions[$type]) {
|
||||||
* sets def to variable and returns true if available
|
// definition in cache, return it
|
||||||
*/
|
return $this->definitions[$type];
|
||||||
function checkDefinition(&$var, $cache, $raw) {
|
|
||||||
if ($raw) return false;
|
|
||||||
if (!empty($var)) {
|
|
||||||
if (!$var->setup) $var->setup($this);
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
$var = $cache->get($this);
|
} elseif (
|
||||||
return (bool) $var;
|
!empty($this->definitions[$type]) &&
|
||||||
|
!$this->definitions[$type]->setup
|
||||||
|
) {
|
||||||
|
// raw requested, raw in memory, quick return
|
||||||
|
return $this->definitions[$type];
|
||||||
}
|
}
|
||||||
|
// quick checks failed, let's create the object
|
||||||
/**
|
if ($type == 'HTML') {
|
||||||
* Generates a new definition, possibly returning it raw, returns
|
$this->definitions[$type] = new HTMLPurifier_HTMLDefinition();
|
||||||
* reference to variable.
|
} elseif ($type == 'CSS') {
|
||||||
*/
|
$this->definitions[$type] = new HTMLPurifier_CSSDefinition();
|
||||||
function &createDefinition(&$var, $cache, $raw, $obj) {
|
} else {
|
||||||
$var = $obj;
|
trigger_error("Definition of $type type not supported");
|
||||||
if ($raw) return $var;
|
return false;
|
||||||
$var->setup($this);
|
}
|
||||||
$cache->set($var, $this);
|
// quick abort if raw
|
||||||
return $var;
|
if ($raw) return $this->definitions[$type];
|
||||||
|
// set it up
|
||||||
|
$this->definitions[$type]->setup($this);
|
||||||
|
// save in cache
|
||||||
|
$cache->set($this->definitions[$type], $this);
|
||||||
|
return $this->definitions[$type];
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -33,7 +33,23 @@ class HTMLPurifier_DefinitionCache
|
|||||||
* @param Instance of HTMLPurifier_Config
|
* @param Instance of HTMLPurifier_Config
|
||||||
*/
|
*/
|
||||||
function generateKey($config) {
|
function generateKey($config) {
|
||||||
return md5(serialize($config->getBatch($this->type)));
|
$version = $config->version;
|
||||||
|
$revision = $config->revision;
|
||||||
|
return $version . '-' . $revision . '-' . md5(serialize($config->getBatch($this->type)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests whether or not a key is old with respect to the configuration's
|
||||||
|
* version and revision number.
|
||||||
|
* @param $key Key to test
|
||||||
|
* @param $config Instance of HTMLPurifier_Config to test against
|
||||||
|
*/
|
||||||
|
function isOld($key, $config) {
|
||||||
|
list($version, $revision, $hash) = explode('-', $key, 3);
|
||||||
|
$compare = version_compare($version, $config->version);
|
||||||
|
if ($compare > 0) return false;
|
||||||
|
if ($compare == 0 && $revision >= $config->revision) return false;
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -99,10 +115,16 @@ class HTMLPurifier_DefinitionCache
|
|||||||
/**
|
/**
|
||||||
* Clears all objects from cache
|
* Clears all objects from cache
|
||||||
*/
|
*/
|
||||||
function flush($config) {
|
function flush() {
|
||||||
trigger_error('Cannot call abstract method', E_USER_ERROR);
|
trigger_error('Cannot call abstract method', E_USER_ERROR);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Clears all expired (older version or revision) objects from cache
|
||||||
|
*/
|
||||||
|
function cleanup($config) {
|
||||||
|
trigger_error('Cannot call abstract method', E_USER_ERROR);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
?>
|
@ -44,13 +44,21 @@ class HTMLPurifier_DefinitionCache_Serializer extends
|
|||||||
while (false !== ($filename = readdir($dh))) {
|
while (false !== ($filename = readdir($dh))) {
|
||||||
if (empty($filename)) continue;
|
if (empty($filename)) continue;
|
||||||
if ($filename[0] === '.') continue;
|
if ($filename[0] === '.') continue;
|
||||||
// optimization: md5 + .ser will always be 36 char long
|
|
||||||
// needs to be changed if we change the identifier
|
|
||||||
if (strlen($filename) !== 36) continue;
|
|
||||||
unlink($dir . '/' . $filename);
|
unlink($dir . '/' . $filename);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function cleanup($config) {
|
||||||
|
$dir = $this->generateDirectoryPath();
|
||||||
|
$dh = opendir($dir);
|
||||||
|
while (false !== ($filename = readdir($dh))) {
|
||||||
|
if (empty($filename)) continue;
|
||||||
|
if ($filename[0] === '.') continue;
|
||||||
|
$key = substr($filename, 0, strlen($filename) - 4);
|
||||||
|
if ($this->isOld($key, $config)) unlink($dir . '/' . $filename);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generates the file path to the serial file corresponding to
|
* Generates the file path to the serial file corresponding to
|
||||||
* the configuration and definition name
|
* the configuration and definition name
|
||||||
|
@ -51,6 +51,8 @@ class HTMLPurifier_ElementDef
|
|||||||
* Abstract string representation of internal ChildDef rules. See
|
* Abstract string representation of internal ChildDef rules. See
|
||||||
* HTMLPurifier_ContentSets for how this is parsed and then transformed
|
* HTMLPurifier_ContentSets for how this is parsed and then transformed
|
||||||
* into an HTMLPurifier_ChildDef.
|
* into an HTMLPurifier_ChildDef.
|
||||||
|
* @warning This is a temporary variable that is not available after
|
||||||
|
* being processed by HTMLDefinition
|
||||||
* @public
|
* @public
|
||||||
*/
|
*/
|
||||||
var $content_model;
|
var $content_model;
|
||||||
@ -59,6 +61,8 @@ class HTMLPurifier_ElementDef
|
|||||||
* Value of $child->type, used to determine which ChildDef to use,
|
* Value of $child->type, used to determine which ChildDef to use,
|
||||||
* used in combination with $content_model.
|
* used in combination with $content_model.
|
||||||
* @warning This must be lowercase
|
* @warning This must be lowercase
|
||||||
|
* @warning This is a temporary variable that is not available after
|
||||||
|
* being processed by HTMLDefinition
|
||||||
* @public
|
* @public
|
||||||
*/
|
*/
|
||||||
var $content_model_type;
|
var $content_model_type;
|
||||||
|
@ -169,6 +169,12 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
|
|||||||
$this->processModules($config);
|
$this->processModules($config);
|
||||||
$this->setupConfigStuff($config);
|
$this->setupConfigStuff($config);
|
||||||
unset($this->manager);
|
unset($this->manager);
|
||||||
|
|
||||||
|
// cleanup some of the element definitions
|
||||||
|
foreach ($this->info as $k => $v) {
|
||||||
|
unset($this->info[$k]->content_model);
|
||||||
|
unset($this->info[$k]->content_model_type);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -53,7 +53,12 @@ class HTMLPurifier_DefinitionCache_SerializerTest extends HTMLPurifier_Definitio
|
|||||||
$cache = new HTMLPurifier_DefinitionCache_Serializer('Test');
|
$cache = new HTMLPurifier_DefinitionCache_Serializer('Test');
|
||||||
|
|
||||||
$config_array = array('Foo' => 'Bar');
|
$config_array = array('Foo' => 'Bar');
|
||||||
$config_md5 = md5(serialize($config_array));
|
|
||||||
|
$config = $this->generateConfigMock($config_array);
|
||||||
|
$config->version = '1.0.0';
|
||||||
|
$config->revision = 2;
|
||||||
|
|
||||||
|
$config_md5 = '1.0.0-' . $config->revision . '-' . md5(serialize($config_array));
|
||||||
|
|
||||||
$file = realpath(
|
$file = realpath(
|
||||||
$rel_file = dirname(__FILE__) .
|
$rel_file = dirname(__FILE__) .
|
||||||
@ -62,7 +67,6 @@ class HTMLPurifier_DefinitionCache_SerializerTest extends HTMLPurifier_Definitio
|
|||||||
);
|
);
|
||||||
if($file && file_exists($file)) unlink($file); // prevent previous failures from causing problems
|
if($file && file_exists($file)) unlink($file); // prevent previous failures from causing problems
|
||||||
|
|
||||||
$config = $this->generateConfigMock($config_array);
|
|
||||||
$this->assertIdentical($config_md5, $cache->generateKey($config));
|
$this->assertIdentical($config_md5, $cache->generateKey($config));
|
||||||
|
|
||||||
$def_original = $this->generateDefinition();
|
$def_original = $this->generateDefinition();
|
||||||
@ -150,6 +154,34 @@ class HTMLPurifier_DefinitionCache_SerializerTest extends HTMLPurifier_Definitio
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function testCleanup() {
|
||||||
|
|
||||||
|
$cache = new HTMLPurifier_DefinitionCache_Serializer('Test');
|
||||||
|
|
||||||
|
// in order of age, oldest first
|
||||||
|
// note that configurations are all identical, but version/revision
|
||||||
|
// are different
|
||||||
|
|
||||||
|
$config1 = $this->generateConfigMock();
|
||||||
|
$config1->version = '0.9.0';
|
||||||
|
$config1->revision = 574;
|
||||||
|
$def1 = $this->generateDefinition(array('info' => 1));
|
||||||
|
|
||||||
|
$config2 = $this->generateConfigMock();
|
||||||
|
$config2->version = '1.0.0beta';
|
||||||
|
$config2->revision = 1;
|
||||||
|
$def2 = $this->generateDefinition(array('info' => 3));
|
||||||
|
|
||||||
|
$cache->set($def1, $config1);
|
||||||
|
$cache->cleanup($config1);
|
||||||
|
$this->assertEqual($def1, $cache->get($config1)); // no change
|
||||||
|
|
||||||
|
$cache->cleanup($config2);
|
||||||
|
$this->assertFalse($cache->get($config1));
|
||||||
|
$this->assertFalse($cache->get($config2));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Asserts that a file exists, ignoring the stat cache
|
* Asserts that a file exists, ignoring the stat cache
|
||||||
*/
|
*/
|
||||||
|
@ -8,10 +8,12 @@ class HTMLPurifier_DefinitionCacheHarness extends UnitTestCase
|
|||||||
* to a getBatch() call
|
* to a getBatch() call
|
||||||
* @param $values Values to return when getBatch is invoked
|
* @param $values Values to return when getBatch is invoked
|
||||||
*/
|
*/
|
||||||
function generateConfigMock($values) {
|
function generateConfigMock($values = array()) {
|
||||||
generate_mock_once('HTMLPurifier_Config');
|
generate_mock_once('HTMLPurifier_Config');
|
||||||
$config = new HTMLPurifier_ConfigMock($this);
|
$config = new HTMLPurifier_ConfigMock($this);
|
||||||
$config->setReturnValue('getBatch', $values, array('Test'));
|
$config->setReturnValue('getBatch', $values, array('Test'));
|
||||||
|
$config->version = '1.0.0';
|
||||||
|
$config->revision = 1;
|
||||||
return $config;
|
return $config;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -9,6 +9,22 @@ class HTMLPurifier_DefinitionCacheTest extends UnitTestCase
|
|||||||
$cache = HTMLPurifier_DefinitionCache::create('Test', $config);
|
$cache = HTMLPurifier_DefinitionCache::create('Test', $config);
|
||||||
$this->assertEqual($cache, new HTMLPurifier_DefinitionCache_Serializer('Test'));
|
$this->assertEqual($cache, new HTMLPurifier_DefinitionCache_Serializer('Test'));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function test_isOld() {
|
||||||
|
$cache = new HTMLPurifier_DefinitionCache('Test'); // non-functional
|
||||||
|
|
||||||
|
$config = HTMLPurifier_Config::createDefault();
|
||||||
|
$config->version = '1.0.0';
|
||||||
|
$config->revision = 10;
|
||||||
|
|
||||||
|
$this->assertIdentical($cache->isOld('1.0.0-10-hashstuffhere', $config), false);
|
||||||
|
$this->assertIdentical($cache->isOld('1.5.0-1-hashstuffhere', $config), false);
|
||||||
|
|
||||||
|
$this->assertIdentical($cache->isOld('0.9.0-1-hashstuffhere', $config), true);
|
||||||
|
$this->assertIdentical($cache->isOld('1.0.0-1-hashstuffhere', $config), true);
|
||||||
|
$this->assertIdentical($cache->isOld('1.0.0beta-11-hashstuffhere', $config), true);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
?>
|
Reference in New Issue
Block a user