1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-07-10 01:06:20 +02:00

[1.7.0] Add versioning to serializer cache

- Make some AttrDef member-variables lazy-loading to save serialization space, clean up others
- Refactor get*Definition() methods

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1116 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang
2007-05-29 18:19:42 +00:00
parent 426fbd1f97
commit d1187ed331
11 changed files with 169 additions and 71 deletions

3
TODO
View File

@ -13,8 +13,7 @@ TODO List
- Reorganize configuration directives
- Set up anonymous module management by HTMLDefinition (Advanced API)
- Get all AttrTypes into string form
# Clean up HTMLDefinition caching, need easy cache invalidation,
versioning of caches, etc.
# Clean up HTMLDefinition caching, need easy cache invalidation.
- Parse TinyMCE-style whitelist into our %HTML.Allow* whitelists
1.8 release [Refactor, refactor!]

View File

@ -15,13 +15,10 @@ class HTMLPurifier_AttrDef_URI_IPv4 extends HTMLPurifier_AttrDef
*/
var $ip4;
function HTMLPurifier_AttrDef_URI_IPv4() {
$oct = '(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])'; // 0-255
$this->ip4 = "(?:{$oct}\\.{$oct}\\.{$oct}\\.{$oct})";
}
function validate($aIP, $config, &$context) {
if (!$this->ip4) $this->_loadRegex();
if (preg_match('#^' . $this->ip4 . '$#s', $aIP))
{
return $aIP;
@ -31,6 +28,15 @@ class HTMLPurifier_AttrDef_URI_IPv4 extends HTMLPurifier_AttrDef
}
/**
* Lazy load function to prevent regex from being stuffed in
* cache.
*/
function _loadRegex() {
$oct = '(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])'; // 0-255
$this->ip4 = "(?:{$oct}\\.{$oct}\\.{$oct}\\.{$oct})";
}
}
?>

View File

@ -13,6 +13,8 @@ class HTMLPurifier_AttrDef_URI_IPv6 extends HTMLPurifier_AttrDef_URI_IPv4
function validate($aIP, $config, &$context) {
if (!$this->ip4) $this->_loadRegex();
$original = $aIP;
$hex = '[0-9a-fA-F]';

View File

@ -43,6 +43,13 @@ class HTMLPurifier_Config
*/
var $version = '1.6.1';
/**
* Integer key users can use to indicate they have manually
* overridden some internal behavior and would like the
* cache to invalidate itself.
*/
var $revision = 1;
/**
* Two-level associative array of configuration directives
*/
@ -54,14 +61,9 @@ class HTMLPurifier_Config
var $def;
/**
* Cached instance of HTMLPurifier_HTMLDefinition
* Indexed array of definitions
*/
var $html_definition;
/**
* Cached instance of HTMLPurifier_CSSDefinition
*/
var $css_definition;
var $definitions;
/**
* Bool indicator whether or not config is finalized
@ -205,10 +207,8 @@ class HTMLPurifier_Config
// reset definitions if the directives they depend on changed
// this is a very costly process, so it's discouraged
// with finalization
if ($namespace == 'HTML') {
$this->html_definition = null;
} elseif ($namespace == 'CSS') {
$this->css_definition = null;
if ($namespace == 'HTML' || $namespace == 'CSS') {
$this->definitions[$namespace] = null;
}
}
@ -218,60 +218,61 @@ class HTMLPurifier_Config
* called before it's been setup, otherwise won't work.
*/
function &getHTMLDefinition($raw = false) {
if (!$this->finalized && $this->autoFinalize) $this->finalize();
$cache = HTMLPurifier_DefinitionCache::create('HTML', $this);
if($this->checkDefinition($this->html_definition, $cache, $raw)) {
return $this->html_definition;
}
return $this->createDefinition(
$this->html_definition,
$cache,
$raw,
new HTMLPurifier_HTMLDefinition()
);
return $this->getDefinition('HTML', $raw);
}
/**
* Retrieves reference to the CSS definition
*/
function &getCSSDefinition($raw = false) {
return $this->getDefinition('CSS', $raw);
}
/**
* Retrieves a definition
* @param $type Type of definition: HTML, CSS, etc
* @param $raw Whether or not definition should be returned raw
*/
function &getDefinition($type, $raw = false) {
if (!$this->finalized && $this->autoFinalize) $this->finalize();
$cache = HTMLPurifier_DefinitionCache::create('CSS', $this);
if($this->checkDefinition($this->css_definition, $cache, $raw)) {
return $this->css_definition;
$cache = HTMLPurifier_DefinitionCache::create($type, $this);
if (!$raw) {
// see if we can quickly supply a definition
if (!empty($this->definitions[$type])) {
if (!$this->definitions[$type]->setup) {
$this->definitions[$type]->setup($this);
}
return $this->definitions[$type];
}
// memory check missed, try cache
$this->definitions[$type] = $cache->get($this);
if ($this->definitions[$type]) {
// definition in cache, return it
return $this->definitions[$type];
}
} elseif (
!empty($this->definitions[$type]) &&
!$this->definitions[$type]->setup
) {
// raw requested, raw in memory, quick return
return $this->definitions[$type];
}
return $this->createDefinition(
$this->css_definition,
$cache,
$raw,
new HTMLPurifier_CSSDefinition()
);
}
/**
* Checks the variable and cache for an easy-access definition,
* sets def to variable and returns true if available
*/
function checkDefinition(&$var, $cache, $raw) {
if ($raw) return false;
if (!empty($var)) {
if (!$var->setup) $var->setup($this);
return true;
// quick checks failed, let's create the object
if ($type == 'HTML') {
$this->definitions[$type] = new HTMLPurifier_HTMLDefinition();
} elseif ($type == 'CSS') {
$this->definitions[$type] = new HTMLPurifier_CSSDefinition();
} else {
trigger_error("Definition of $type type not supported");
return false;
}
$var = $cache->get($this);
return (bool) $var;
}
/**
* Generates a new definition, possibly returning it raw, returns
* reference to variable.
*/
function &createDefinition(&$var, $cache, $raw, $obj) {
$var = $obj;
if ($raw) return $var;
$var->setup($this);
$cache->set($var, $this);
return $var;
// quick abort if raw
if ($raw) return $this->definitions[$type];
// set it up
$this->definitions[$type]->setup($this);
// save in cache
$cache->set($this->definitions[$type], $this);
return $this->definitions[$type];
}
/**

View File

@ -33,7 +33,23 @@ class HTMLPurifier_DefinitionCache
* @param Instance of HTMLPurifier_Config
*/
function generateKey($config) {
return md5(serialize($config->getBatch($this->type)));
$version = $config->version;
$revision = $config->revision;
return $version . '-' . $revision . '-' . md5(serialize($config->getBatch($this->type)));
}
/**
* Tests whether or not a key is old with respect to the configuration's
* version and revision number.
* @param $key Key to test
* @param $config Instance of HTMLPurifier_Config to test against
*/
function isOld($key, $config) {
list($version, $revision, $hash) = explode('-', $key, 3);
$compare = version_compare($version, $config->version);
if ($compare > 0) return false;
if ($compare == 0 && $revision >= $config->revision) return false;
return true;
}
/**
@ -99,10 +115,16 @@ class HTMLPurifier_DefinitionCache
/**
* Clears all objects from cache
*/
function flush($config) {
function flush() {
trigger_error('Cannot call abstract method', E_USER_ERROR);
}
/**
* Clears all expired (older version or revision) objects from cache
*/
function cleanup($config) {
trigger_error('Cannot call abstract method', E_USER_ERROR);
}
}
?>

View File

@ -44,13 +44,21 @@ class HTMLPurifier_DefinitionCache_Serializer extends
while (false !== ($filename = readdir($dh))) {
if (empty($filename)) continue;
if ($filename[0] === '.') continue;
// optimization: md5 + .ser will always be 36 char long
// needs to be changed if we change the identifier
if (strlen($filename) !== 36) continue;
unlink($dir . '/' . $filename);
}
}
function cleanup($config) {
$dir = $this->generateDirectoryPath();
$dh = opendir($dir);
while (false !== ($filename = readdir($dh))) {
if (empty($filename)) continue;
if ($filename[0] === '.') continue;
$key = substr($filename, 0, strlen($filename) - 4);
if ($this->isOld($key, $config)) unlink($dir . '/' . $filename);
}
}
/**
* Generates the file path to the serial file corresponding to
* the configuration and definition name

View File

@ -51,6 +51,8 @@ class HTMLPurifier_ElementDef
* Abstract string representation of internal ChildDef rules. See
* HTMLPurifier_ContentSets for how this is parsed and then transformed
* into an HTMLPurifier_ChildDef.
* @warning This is a temporary variable that is not available after
* being processed by HTMLDefinition
* @public
*/
var $content_model;
@ -59,6 +61,8 @@ class HTMLPurifier_ElementDef
* Value of $child->type, used to determine which ChildDef to use,
* used in combination with $content_model.
* @warning This must be lowercase
* @warning This is a temporary variable that is not available after
* being processed by HTMLDefinition
* @public
*/
var $content_model_type;

View File

@ -169,6 +169,12 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
$this->processModules($config);
$this->setupConfigStuff($config);
unset($this->manager);
// cleanup some of the element definitions
foreach ($this->info as $k => $v) {
unset($this->info[$k]->content_model);
unset($this->info[$k]->content_model_type);
}
}
/**

View File

@ -53,7 +53,12 @@ class HTMLPurifier_DefinitionCache_SerializerTest extends HTMLPurifier_Definitio
$cache = new HTMLPurifier_DefinitionCache_Serializer('Test');
$config_array = array('Foo' => 'Bar');
$config_md5 = md5(serialize($config_array));
$config = $this->generateConfigMock($config_array);
$config->version = '1.0.0';
$config->revision = 2;
$config_md5 = '1.0.0-' . $config->revision . '-' . md5(serialize($config_array));
$file = realpath(
$rel_file = dirname(__FILE__) .
@ -62,7 +67,6 @@ class HTMLPurifier_DefinitionCache_SerializerTest extends HTMLPurifier_Definitio
);
if($file && file_exists($file)) unlink($file); // prevent previous failures from causing problems
$config = $this->generateConfigMock($config_array);
$this->assertIdentical($config_md5, $cache->generateKey($config));
$def_original = $this->generateDefinition();
@ -150,6 +154,34 @@ class HTMLPurifier_DefinitionCache_SerializerTest extends HTMLPurifier_Definitio
}
function testCleanup() {
$cache = new HTMLPurifier_DefinitionCache_Serializer('Test');
// in order of age, oldest first
// note that configurations are all identical, but version/revision
// are different
$config1 = $this->generateConfigMock();
$config1->version = '0.9.0';
$config1->revision = 574;
$def1 = $this->generateDefinition(array('info' => 1));
$config2 = $this->generateConfigMock();
$config2->version = '1.0.0beta';
$config2->revision = 1;
$def2 = $this->generateDefinition(array('info' => 3));
$cache->set($def1, $config1);
$cache->cleanup($config1);
$this->assertEqual($def1, $cache->get($config1)); // no change
$cache->cleanup($config2);
$this->assertFalse($cache->get($config1));
$this->assertFalse($cache->get($config2));
}
/**
* Asserts that a file exists, ignoring the stat cache
*/

View File

@ -8,10 +8,12 @@ class HTMLPurifier_DefinitionCacheHarness extends UnitTestCase
* to a getBatch() call
* @param $values Values to return when getBatch is invoked
*/
function generateConfigMock($values) {
function generateConfigMock($values = array()) {
generate_mock_once('HTMLPurifier_Config');
$config = new HTMLPurifier_ConfigMock($this);
$config->setReturnValue('getBatch', $values, array('Test'));
$config->version = '1.0.0';
$config->revision = 1;
return $config;
}

View File

@ -9,6 +9,22 @@ class HTMLPurifier_DefinitionCacheTest extends UnitTestCase
$cache = HTMLPurifier_DefinitionCache::create('Test', $config);
$this->assertEqual($cache, new HTMLPurifier_DefinitionCache_Serializer('Test'));
}
function test_isOld() {
$cache = new HTMLPurifier_DefinitionCache('Test'); // non-functional
$config = HTMLPurifier_Config::createDefault();
$config->version = '1.0.0';
$config->revision = 10;
$this->assertIdentical($cache->isOld('1.0.0-10-hashstuffhere', $config), false);
$this->assertIdentical($cache->isOld('1.5.0-1-hashstuffhere', $config), false);
$this->assertIdentical($cache->isOld('0.9.0-1-hashstuffhere', $config), true);
$this->assertIdentical($cache->isOld('1.0.0-1-hashstuffhere', $config), true);
$this->assertIdentical($cache->isOld('1.0.0beta-11-hashstuffhere', $config), true);
}
}
?>