mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-07-09 16:56:20 +02:00
[1.7.0] Add versioning to serializer cache
- Make some AttrDef member-variables lazy-loading to save serialization space, clean up others - Refactor get*Definition() methods git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1116 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
3
TODO
3
TODO
@ -13,8 +13,7 @@ TODO List
|
||||
- Reorganize configuration directives
|
||||
- Set up anonymous module management by HTMLDefinition (Advanced API)
|
||||
- Get all AttrTypes into string form
|
||||
# Clean up HTMLDefinition caching, need easy cache invalidation,
|
||||
versioning of caches, etc.
|
||||
# Clean up HTMLDefinition caching, need easy cache invalidation.
|
||||
- Parse TinyMCE-style whitelist into our %HTML.Allow* whitelists
|
||||
|
||||
1.8 release [Refactor, refactor!]
|
||||
|
@ -15,13 +15,10 @@ class HTMLPurifier_AttrDef_URI_IPv4 extends HTMLPurifier_AttrDef
|
||||
*/
|
||||
var $ip4;
|
||||
|
||||
function HTMLPurifier_AttrDef_URI_IPv4() {
|
||||
$oct = '(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])'; // 0-255
|
||||
$this->ip4 = "(?:{$oct}\\.{$oct}\\.{$oct}\\.{$oct})";
|
||||
}
|
||||
|
||||
function validate($aIP, $config, &$context) {
|
||||
|
||||
if (!$this->ip4) $this->_loadRegex();
|
||||
|
||||
if (preg_match('#^' . $this->ip4 . '$#s', $aIP))
|
||||
{
|
||||
return $aIP;
|
||||
@ -31,6 +28,15 @@ class HTMLPurifier_AttrDef_URI_IPv4 extends HTMLPurifier_AttrDef
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Lazy load function to prevent regex from being stuffed in
|
||||
* cache.
|
||||
*/
|
||||
function _loadRegex() {
|
||||
$oct = '(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])'; // 0-255
|
||||
$this->ip4 = "(?:{$oct}\\.{$oct}\\.{$oct}\\.{$oct})";
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@ -13,6 +13,8 @@ class HTMLPurifier_AttrDef_URI_IPv6 extends HTMLPurifier_AttrDef_URI_IPv4
|
||||
|
||||
function validate($aIP, $config, &$context) {
|
||||
|
||||
if (!$this->ip4) $this->_loadRegex();
|
||||
|
||||
$original = $aIP;
|
||||
|
||||
$hex = '[0-9a-fA-F]';
|
||||
|
@ -43,6 +43,13 @@ class HTMLPurifier_Config
|
||||
*/
|
||||
var $version = '1.6.1';
|
||||
|
||||
/**
|
||||
* Integer key users can use to indicate they have manually
|
||||
* overridden some internal behavior and would like the
|
||||
* cache to invalidate itself.
|
||||
*/
|
||||
var $revision = 1;
|
||||
|
||||
/**
|
||||
* Two-level associative array of configuration directives
|
||||
*/
|
||||
@ -54,14 +61,9 @@ class HTMLPurifier_Config
|
||||
var $def;
|
||||
|
||||
/**
|
||||
* Cached instance of HTMLPurifier_HTMLDefinition
|
||||
* Indexed array of definitions
|
||||
*/
|
||||
var $html_definition;
|
||||
|
||||
/**
|
||||
* Cached instance of HTMLPurifier_CSSDefinition
|
||||
*/
|
||||
var $css_definition;
|
||||
var $definitions;
|
||||
|
||||
/**
|
||||
* Bool indicator whether or not config is finalized
|
||||
@ -205,10 +207,8 @@ class HTMLPurifier_Config
|
||||
// reset definitions if the directives they depend on changed
|
||||
// this is a very costly process, so it's discouraged
|
||||
// with finalization
|
||||
if ($namespace == 'HTML') {
|
||||
$this->html_definition = null;
|
||||
} elseif ($namespace == 'CSS') {
|
||||
$this->css_definition = null;
|
||||
if ($namespace == 'HTML' || $namespace == 'CSS') {
|
||||
$this->definitions[$namespace] = null;
|
||||
}
|
||||
}
|
||||
|
||||
@ -218,60 +218,61 @@ class HTMLPurifier_Config
|
||||
* called before it's been setup, otherwise won't work.
|
||||
*/
|
||||
function &getHTMLDefinition($raw = false) {
|
||||
if (!$this->finalized && $this->autoFinalize) $this->finalize();
|
||||
$cache = HTMLPurifier_DefinitionCache::create('HTML', $this);
|
||||
if($this->checkDefinition($this->html_definition, $cache, $raw)) {
|
||||
return $this->html_definition;
|
||||
}
|
||||
return $this->createDefinition(
|
||||
$this->html_definition,
|
||||
$cache,
|
||||
$raw,
|
||||
new HTMLPurifier_HTMLDefinition()
|
||||
);
|
||||
return $this->getDefinition('HTML', $raw);
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves reference to the CSS definition
|
||||
*/
|
||||
function &getCSSDefinition($raw = false) {
|
||||
return $this->getDefinition('CSS', $raw);
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves a definition
|
||||
* @param $type Type of definition: HTML, CSS, etc
|
||||
* @param $raw Whether or not definition should be returned raw
|
||||
*/
|
||||
function &getDefinition($type, $raw = false) {
|
||||
if (!$this->finalized && $this->autoFinalize) $this->finalize();
|
||||
$cache = HTMLPurifier_DefinitionCache::create('CSS', $this);
|
||||
if($this->checkDefinition($this->css_definition, $cache, $raw)) {
|
||||
return $this->css_definition;
|
||||
$cache = HTMLPurifier_DefinitionCache::create($type, $this);
|
||||
if (!$raw) {
|
||||
// see if we can quickly supply a definition
|
||||
if (!empty($this->definitions[$type])) {
|
||||
if (!$this->definitions[$type]->setup) {
|
||||
$this->definitions[$type]->setup($this);
|
||||
}
|
||||
return $this->createDefinition(
|
||||
$this->css_definition,
|
||||
$cache,
|
||||
$raw,
|
||||
new HTMLPurifier_CSSDefinition()
|
||||
);
|
||||
return $this->definitions[$type];
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks the variable and cache for an easy-access definition,
|
||||
* sets def to variable and returns true if available
|
||||
*/
|
||||
function checkDefinition(&$var, $cache, $raw) {
|
||||
if ($raw) return false;
|
||||
if (!empty($var)) {
|
||||
if (!$var->setup) $var->setup($this);
|
||||
return true;
|
||||
// memory check missed, try cache
|
||||
$this->definitions[$type] = $cache->get($this);
|
||||
if ($this->definitions[$type]) {
|
||||
// definition in cache, return it
|
||||
return $this->definitions[$type];
|
||||
}
|
||||
$var = $cache->get($this);
|
||||
return (bool) $var;
|
||||
} elseif (
|
||||
!empty($this->definitions[$type]) &&
|
||||
!$this->definitions[$type]->setup
|
||||
) {
|
||||
// raw requested, raw in memory, quick return
|
||||
return $this->definitions[$type];
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a new definition, possibly returning it raw, returns
|
||||
* reference to variable.
|
||||
*/
|
||||
function &createDefinition(&$var, $cache, $raw, $obj) {
|
||||
$var = $obj;
|
||||
if ($raw) return $var;
|
||||
$var->setup($this);
|
||||
$cache->set($var, $this);
|
||||
return $var;
|
||||
// quick checks failed, let's create the object
|
||||
if ($type == 'HTML') {
|
||||
$this->definitions[$type] = new HTMLPurifier_HTMLDefinition();
|
||||
} elseif ($type == 'CSS') {
|
||||
$this->definitions[$type] = new HTMLPurifier_CSSDefinition();
|
||||
} else {
|
||||
trigger_error("Definition of $type type not supported");
|
||||
return false;
|
||||
}
|
||||
// quick abort if raw
|
||||
if ($raw) return $this->definitions[$type];
|
||||
// set it up
|
||||
$this->definitions[$type]->setup($this);
|
||||
// save in cache
|
||||
$cache->set($this->definitions[$type], $this);
|
||||
return $this->definitions[$type];
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -33,7 +33,23 @@ class HTMLPurifier_DefinitionCache
|
||||
* @param Instance of HTMLPurifier_Config
|
||||
*/
|
||||
function generateKey($config) {
|
||||
return md5(serialize($config->getBatch($this->type)));
|
||||
$version = $config->version;
|
||||
$revision = $config->revision;
|
||||
return $version . '-' . $revision . '-' . md5(serialize($config->getBatch($this->type)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests whether or not a key is old with respect to the configuration's
|
||||
* version and revision number.
|
||||
* @param $key Key to test
|
||||
* @param $config Instance of HTMLPurifier_Config to test against
|
||||
*/
|
||||
function isOld($key, $config) {
|
||||
list($version, $revision, $hash) = explode('-', $key, 3);
|
||||
$compare = version_compare($version, $config->version);
|
||||
if ($compare > 0) return false;
|
||||
if ($compare == 0 && $revision >= $config->revision) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -99,10 +115,16 @@ class HTMLPurifier_DefinitionCache
|
||||
/**
|
||||
* Clears all objects from cache
|
||||
*/
|
||||
function flush($config) {
|
||||
function flush() {
|
||||
trigger_error('Cannot call abstract method', E_USER_ERROR);
|
||||
}
|
||||
|
||||
/**
|
||||
* Clears all expired (older version or revision) objects from cache
|
||||
*/
|
||||
function cleanup($config) {
|
||||
trigger_error('Cannot call abstract method', E_USER_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
@ -44,13 +44,21 @@ class HTMLPurifier_DefinitionCache_Serializer extends
|
||||
while (false !== ($filename = readdir($dh))) {
|
||||
if (empty($filename)) continue;
|
||||
if ($filename[0] === '.') continue;
|
||||
// optimization: md5 + .ser will always be 36 char long
|
||||
// needs to be changed if we change the identifier
|
||||
if (strlen($filename) !== 36) continue;
|
||||
unlink($dir . '/' . $filename);
|
||||
}
|
||||
}
|
||||
|
||||
function cleanup($config) {
|
||||
$dir = $this->generateDirectoryPath();
|
||||
$dh = opendir($dir);
|
||||
while (false !== ($filename = readdir($dh))) {
|
||||
if (empty($filename)) continue;
|
||||
if ($filename[0] === '.') continue;
|
||||
$key = substr($filename, 0, strlen($filename) - 4);
|
||||
if ($this->isOld($key, $config)) unlink($dir . '/' . $filename);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates the file path to the serial file corresponding to
|
||||
* the configuration and definition name
|
||||
|
@ -51,6 +51,8 @@ class HTMLPurifier_ElementDef
|
||||
* Abstract string representation of internal ChildDef rules. See
|
||||
* HTMLPurifier_ContentSets for how this is parsed and then transformed
|
||||
* into an HTMLPurifier_ChildDef.
|
||||
* @warning This is a temporary variable that is not available after
|
||||
* being processed by HTMLDefinition
|
||||
* @public
|
||||
*/
|
||||
var $content_model;
|
||||
@ -59,6 +61,8 @@ class HTMLPurifier_ElementDef
|
||||
* Value of $child->type, used to determine which ChildDef to use,
|
||||
* used in combination with $content_model.
|
||||
* @warning This must be lowercase
|
||||
* @warning This is a temporary variable that is not available after
|
||||
* being processed by HTMLDefinition
|
||||
* @public
|
||||
*/
|
||||
var $content_model_type;
|
||||
|
@ -169,6 +169,12 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
|
||||
$this->processModules($config);
|
||||
$this->setupConfigStuff($config);
|
||||
unset($this->manager);
|
||||
|
||||
// cleanup some of the element definitions
|
||||
foreach ($this->info as $k => $v) {
|
||||
unset($this->info[$k]->content_model);
|
||||
unset($this->info[$k]->content_model_type);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -53,7 +53,12 @@ class HTMLPurifier_DefinitionCache_SerializerTest extends HTMLPurifier_Definitio
|
||||
$cache = new HTMLPurifier_DefinitionCache_Serializer('Test');
|
||||
|
||||
$config_array = array('Foo' => 'Bar');
|
||||
$config_md5 = md5(serialize($config_array));
|
||||
|
||||
$config = $this->generateConfigMock($config_array);
|
||||
$config->version = '1.0.0';
|
||||
$config->revision = 2;
|
||||
|
||||
$config_md5 = '1.0.0-' . $config->revision . '-' . md5(serialize($config_array));
|
||||
|
||||
$file = realpath(
|
||||
$rel_file = dirname(__FILE__) .
|
||||
@ -62,7 +67,6 @@ class HTMLPurifier_DefinitionCache_SerializerTest extends HTMLPurifier_Definitio
|
||||
);
|
||||
if($file && file_exists($file)) unlink($file); // prevent previous failures from causing problems
|
||||
|
||||
$config = $this->generateConfigMock($config_array);
|
||||
$this->assertIdentical($config_md5, $cache->generateKey($config));
|
||||
|
||||
$def_original = $this->generateDefinition();
|
||||
@ -150,6 +154,34 @@ class HTMLPurifier_DefinitionCache_SerializerTest extends HTMLPurifier_Definitio
|
||||
|
||||
}
|
||||
|
||||
function testCleanup() {
|
||||
|
||||
$cache = new HTMLPurifier_DefinitionCache_Serializer('Test');
|
||||
|
||||
// in order of age, oldest first
|
||||
// note that configurations are all identical, but version/revision
|
||||
// are different
|
||||
|
||||
$config1 = $this->generateConfigMock();
|
||||
$config1->version = '0.9.0';
|
||||
$config1->revision = 574;
|
||||
$def1 = $this->generateDefinition(array('info' => 1));
|
||||
|
||||
$config2 = $this->generateConfigMock();
|
||||
$config2->version = '1.0.0beta';
|
||||
$config2->revision = 1;
|
||||
$def2 = $this->generateDefinition(array('info' => 3));
|
||||
|
||||
$cache->set($def1, $config1);
|
||||
$cache->cleanup($config1);
|
||||
$this->assertEqual($def1, $cache->get($config1)); // no change
|
||||
|
||||
$cache->cleanup($config2);
|
||||
$this->assertFalse($cache->get($config1));
|
||||
$this->assertFalse($cache->get($config2));
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Asserts that a file exists, ignoring the stat cache
|
||||
*/
|
||||
|
@ -8,10 +8,12 @@ class HTMLPurifier_DefinitionCacheHarness extends UnitTestCase
|
||||
* to a getBatch() call
|
||||
* @param $values Values to return when getBatch is invoked
|
||||
*/
|
||||
function generateConfigMock($values) {
|
||||
function generateConfigMock($values = array()) {
|
||||
generate_mock_once('HTMLPurifier_Config');
|
||||
$config = new HTMLPurifier_ConfigMock($this);
|
||||
$config->setReturnValue('getBatch', $values, array('Test'));
|
||||
$config->version = '1.0.0';
|
||||
$config->revision = 1;
|
||||
return $config;
|
||||
}
|
||||
|
||||
|
@ -9,6 +9,22 @@ class HTMLPurifier_DefinitionCacheTest extends UnitTestCase
|
||||
$cache = HTMLPurifier_DefinitionCache::create('Test', $config);
|
||||
$this->assertEqual($cache, new HTMLPurifier_DefinitionCache_Serializer('Test'));
|
||||
}
|
||||
|
||||
function test_isOld() {
|
||||
$cache = new HTMLPurifier_DefinitionCache('Test'); // non-functional
|
||||
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$config->version = '1.0.0';
|
||||
$config->revision = 10;
|
||||
|
||||
$this->assertIdentical($cache->isOld('1.0.0-10-hashstuffhere', $config), false);
|
||||
$this->assertIdentical($cache->isOld('1.5.0-1-hashstuffhere', $config), false);
|
||||
|
||||
$this->assertIdentical($cache->isOld('0.9.0-1-hashstuffhere', $config), true);
|
||||
$this->assertIdentical($cache->isOld('1.0.0-1-hashstuffhere', $config), true);
|
||||
$this->assertIdentical($cache->isOld('1.0.0beta-11-hashstuffhere', $config), true);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
Reference in New Issue
Block a user