1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-08-02 20:27:40 +02:00

[1.7.0] Add versioning to serializer cache

- Make some AttrDef member-variables lazy-loading to save serialization space, clean up others
- Refactor get*Definition() methods

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1116 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang
2007-05-29 18:19:42 +00:00
parent 426fbd1f97
commit d1187ed331
11 changed files with 169 additions and 71 deletions

View File

@@ -15,13 +15,10 @@ class HTMLPurifier_AttrDef_URI_IPv4 extends HTMLPurifier_AttrDef
*/
var $ip4;
function HTMLPurifier_AttrDef_URI_IPv4() {
$oct = '(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])'; // 0-255
$this->ip4 = "(?:{$oct}\\.{$oct}\\.{$oct}\\.{$oct})";
}
function validate($aIP, $config, &$context) {
if (!$this->ip4) $this->_loadRegex();
if (preg_match('#^' . $this->ip4 . '$#s', $aIP))
{
return $aIP;
@@ -31,6 +28,15 @@ class HTMLPurifier_AttrDef_URI_IPv4 extends HTMLPurifier_AttrDef
}
/**
* Lazy load function to prevent regex from being stuffed in
* cache.
*/
function _loadRegex() {
$oct = '(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])'; // 0-255
$this->ip4 = "(?:{$oct}\\.{$oct}\\.{$oct}\\.{$oct})";
}
}
?>

View File

@@ -13,6 +13,8 @@ class HTMLPurifier_AttrDef_URI_IPv6 extends HTMLPurifier_AttrDef_URI_IPv4
function validate($aIP, $config, &$context) {
if (!$this->ip4) $this->_loadRegex();
$original = $aIP;
$hex = '[0-9a-fA-F]';

View File

@@ -43,6 +43,13 @@ class HTMLPurifier_Config
*/
var $version = '1.6.1';
/**
* Integer key users can use to indicate they have manually
* overridden some internal behavior and would like the
* cache to invalidate itself.
*/
var $revision = 1;
/**
* Two-level associative array of configuration directives
*/
@@ -54,14 +61,9 @@ class HTMLPurifier_Config
var $def;
/**
* Cached instance of HTMLPurifier_HTMLDefinition
* Indexed array of definitions
*/
var $html_definition;
/**
* Cached instance of HTMLPurifier_CSSDefinition
*/
var $css_definition;
var $definitions;
/**
* Bool indicator whether or not config is finalized
@@ -205,10 +207,8 @@ class HTMLPurifier_Config
// reset definitions if the directives they depend on changed
// this is a very costly process, so it's discouraged
// with finalization
if ($namespace == 'HTML') {
$this->html_definition = null;
} elseif ($namespace == 'CSS') {
$this->css_definition = null;
if ($namespace == 'HTML' || $namespace == 'CSS') {
$this->definitions[$namespace] = null;
}
}
@@ -218,60 +218,61 @@ class HTMLPurifier_Config
* called before it's been setup, otherwise won't work.
*/
function &getHTMLDefinition($raw = false) {
if (!$this->finalized && $this->autoFinalize) $this->finalize();
$cache = HTMLPurifier_DefinitionCache::create('HTML', $this);
if($this->checkDefinition($this->html_definition, $cache, $raw)) {
return $this->html_definition;
}
return $this->createDefinition(
$this->html_definition,
$cache,
$raw,
new HTMLPurifier_HTMLDefinition()
);
return $this->getDefinition('HTML', $raw);
}
/**
* Retrieves reference to the CSS definition
*/
function &getCSSDefinition($raw = false) {
return $this->getDefinition('CSS', $raw);
}
/**
* Retrieves a definition
* @param $type Type of definition: HTML, CSS, etc
* @param $raw Whether or not definition should be returned raw
*/
function &getDefinition($type, $raw = false) {
if (!$this->finalized && $this->autoFinalize) $this->finalize();
$cache = HTMLPurifier_DefinitionCache::create('CSS', $this);
if($this->checkDefinition($this->css_definition, $cache, $raw)) {
return $this->css_definition;
$cache = HTMLPurifier_DefinitionCache::create($type, $this);
if (!$raw) {
// see if we can quickly supply a definition
if (!empty($this->definitions[$type])) {
if (!$this->definitions[$type]->setup) {
$this->definitions[$type]->setup($this);
}
return $this->definitions[$type];
}
// memory check missed, try cache
$this->definitions[$type] = $cache->get($this);
if ($this->definitions[$type]) {
// definition in cache, return it
return $this->definitions[$type];
}
} elseif (
!empty($this->definitions[$type]) &&
!$this->definitions[$type]->setup
) {
// raw requested, raw in memory, quick return
return $this->definitions[$type];
}
return $this->createDefinition(
$this->css_definition,
$cache,
$raw,
new HTMLPurifier_CSSDefinition()
);
}
/**
* Checks the variable and cache for an easy-access definition,
* sets def to variable and returns true if available
*/
function checkDefinition(&$var, $cache, $raw) {
if ($raw) return false;
if (!empty($var)) {
if (!$var->setup) $var->setup($this);
return true;
// quick checks failed, let's create the object
if ($type == 'HTML') {
$this->definitions[$type] = new HTMLPurifier_HTMLDefinition();
} elseif ($type == 'CSS') {
$this->definitions[$type] = new HTMLPurifier_CSSDefinition();
} else {
trigger_error("Definition of $type type not supported");
return false;
}
$var = $cache->get($this);
return (bool) $var;
}
/**
* Generates a new definition, possibly returning it raw, returns
* reference to variable.
*/
function &createDefinition(&$var, $cache, $raw, $obj) {
$var = $obj;
if ($raw) return $var;
$var->setup($this);
$cache->set($var, $this);
return $var;
// quick abort if raw
if ($raw) return $this->definitions[$type];
// set it up
$this->definitions[$type]->setup($this);
// save in cache
$cache->set($this->definitions[$type], $this);
return $this->definitions[$type];
}
/**

View File

@@ -33,7 +33,23 @@ class HTMLPurifier_DefinitionCache
* @param Instance of HTMLPurifier_Config
*/
function generateKey($config) {
return md5(serialize($config->getBatch($this->type)));
$version = $config->version;
$revision = $config->revision;
return $version . '-' . $revision . '-' . md5(serialize($config->getBatch($this->type)));
}
/**
* Tests whether or not a key is old with respect to the configuration's
* version and revision number.
* @param $key Key to test
* @param $config Instance of HTMLPurifier_Config to test against
*/
function isOld($key, $config) {
list($version, $revision, $hash) = explode('-', $key, 3);
$compare = version_compare($version, $config->version);
if ($compare > 0) return false;
if ($compare == 0 && $revision >= $config->revision) return false;
return true;
}
/**
@@ -99,10 +115,16 @@ class HTMLPurifier_DefinitionCache
/**
* Clears all objects from cache
*/
function flush($config) {
function flush() {
trigger_error('Cannot call abstract method', E_USER_ERROR);
}
/**
* Clears all expired (older version or revision) objects from cache
*/
function cleanup($config) {
trigger_error('Cannot call abstract method', E_USER_ERROR);
}
}
?>

View File

@@ -44,13 +44,21 @@ class HTMLPurifier_DefinitionCache_Serializer extends
while (false !== ($filename = readdir($dh))) {
if (empty($filename)) continue;
if ($filename[0] === '.') continue;
// optimization: md5 + .ser will always be 36 char long
// needs to be changed if we change the identifier
if (strlen($filename) !== 36) continue;
unlink($dir . '/' . $filename);
}
}
function cleanup($config) {
$dir = $this->generateDirectoryPath();
$dh = opendir($dir);
while (false !== ($filename = readdir($dh))) {
if (empty($filename)) continue;
if ($filename[0] === '.') continue;
$key = substr($filename, 0, strlen($filename) - 4);
if ($this->isOld($key, $config)) unlink($dir . '/' . $filename);
}
}
/**
* Generates the file path to the serial file corresponding to
* the configuration and definition name

View File

@@ -51,6 +51,8 @@ class HTMLPurifier_ElementDef
* Abstract string representation of internal ChildDef rules. See
* HTMLPurifier_ContentSets for how this is parsed and then transformed
* into an HTMLPurifier_ChildDef.
* @warning This is a temporary variable that is not available after
* being processed by HTMLDefinition
* @public
*/
var $content_model;
@@ -59,6 +61,8 @@ class HTMLPurifier_ElementDef
* Value of $child->type, used to determine which ChildDef to use,
* used in combination with $content_model.
* @warning This must be lowercase
* @warning This is a temporary variable that is not available after
* being processed by HTMLDefinition
* @public
*/
var $content_model_type;

View File

@@ -169,6 +169,12 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
$this->processModules($config);
$this->setupConfigStuff($config);
unset($this->manager);
// cleanup some of the element definitions
foreach ($this->info as $k => $v) {
unset($this->info[$k]->content_model);
unset($this->info[$k]->content_model_type);
}
}
/**