1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-07-30 19:00:10 +02:00

[1.5.0] Implement TransformToStrict proprietary module

- Factored out strictblockquote from the common definition
- Text module now follows "strict" rules by default
- attr_transform_* now are indexed with string keys, to allow overloading
- Implement ElementDef mergin, and add standalone class variable to ElementDef to prevent half-baked element definitions from masquerading as full ones
- Implement merging global attributes from modules, namely info_attr_transform_post, info_attr_transform_pre and info_tag_transform
- Rename setupInfo() to processModules()
- Fix typo in HTMLModule/Bdo.php

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@731 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang
2007-02-10 23:35:21 +00:00
parent bd544ad038
commit 54a68a1713
8 changed files with 156 additions and 56 deletions

View File

@@ -5,7 +5,6 @@ require_once 'HTMLPurifier/ChildDef.php';
require_once 'HTMLPurifier/ChildDef/Empty.php'; require_once 'HTMLPurifier/ChildDef/Empty.php';
require_once 'HTMLPurifier/ChildDef/Required.php'; require_once 'HTMLPurifier/ChildDef/Required.php';
require_once 'HTMLPurifier/ChildDef/Optional.php'; require_once 'HTMLPurifier/ChildDef/Optional.php';
require_once 'HTMLPurifier/ChildDef/StrictBlockquote.php'; // transform
class HTMLPurifier_ContentSets class HTMLPurifier_ContentSets
{ {
@@ -111,8 +110,6 @@ class HTMLPurifier_ContentSets
return new HTMLPurifier_ChildDef_Optional($value); return new HTMLPurifier_ChildDef_Optional($value);
case 'empty': case 'empty':
return new HTMLPurifier_ChildDef_Empty(); return new HTMLPurifier_ChildDef_Empty();
case 'strictblockquote':
return new HTMLPurifier_ChildDef_StrictBlockquote($value);
case 'custom': case 'custom':
return new HTMLPurifier_ChildDef_Custom($value); return new HTMLPurifier_ChildDef_Custom($value);
} }

View File

@@ -7,6 +7,12 @@
class HTMLPurifier_ElementDef class HTMLPurifier_ElementDef
{ {
/**
* Does the definition work by itself, or is it created solely
* for the purpose of merging into another definition?
*/
var $standalone = true;
/** /**
* Associative array of attribute name to HTMLPurifier_AttrDef * Associative array of attribute name to HTMLPurifier_AttrDef
* @note Before being processed by HTMLPurifier_AttrCollections * @note Before being processed by HTMLPurifier_AttrCollections
@@ -22,13 +28,13 @@ class HTMLPurifier_ElementDef
var $attr = array(); var $attr = array();
/** /**
* List of tag's HTMLPurifier_AttrTransform to be done before validation * Indexed list of tag's HTMLPurifier_AttrTransform to be done before validation
* @public * @public
*/ */
var $attr_transform_pre = array(); var $attr_transform_pre = array();
/** /**
* List of tag's HTMLPurifier_AttrTransform to be done after validation * Indexed list of tag's HTMLPurifier_AttrTransform to be done after validation
* @public * @public
*/ */
var $attr_transform_post = array(); var $attr_transform_post = array();
@@ -79,6 +85,27 @@ class HTMLPurifier_ElementDef
*/ */
var $excludes = array(); var $excludes = array();
/**
* Merges the values of another element definition into this one.
* Values from the new element def take precedence if a value is
* not mergeable.
*/
function mergeIn($def) {
// later keys takes precedence
foreach($def->attr as $k => $v) $this->attr[$k] = $v;
foreach($def->attr_transform_pre as $k => $v) $this->attr_transform_pre[$k] = $v;
foreach($def->attr_transform_post as $k => $v) $this->attr_transform_post[$k] = $v;
foreach($def->auto_close as $k => $v) $this->auto_close[$k] = $v;
foreach($def->excludes as $k => $v) $this->excludes[$k] = $v;
if(!is_null($def->child)) $this->child = $def->child;
if(!empty($def->content_model)) $this->content_model .= ' | ' . $def->content_model;
if(!empty($def->content_model_type)) $this->content_model_type = $def->content_model_type;
if(!is_null($def->descendants_are_inline)) $this->descendants_are_inline = $def->descendants_are_inline;
}
} }
?> ?>

View File

@@ -34,6 +34,9 @@ require_once 'HTMLPurifier/HTMLModule/Tables.php';
require_once 'HTMLPurifier/HTMLModule/Image.php'; require_once 'HTMLPurifier/HTMLModule/Image.php';
require_once 'HTMLPurifier/HTMLModule/StyleAttribute.php'; require_once 'HTMLPurifier/HTMLModule/StyleAttribute.php';
// compat modules
require_once 'HTMLPurifier/HTMLModule/TransformToStrict.php';
HTMLPurifier_ConfigSchema::define( HTMLPurifier_ConfigSchema::define(
'HTML', 'EnableAttrID', false, 'bool', 'HTML', 'EnableAttrID', false, 'bool',
'Allows the ID attribute in HTML. This is disabled by default '. 'Allows the ID attribute in HTML. This is disabled by default '.
@@ -168,19 +171,19 @@ class HTMLPurifier_HTMLDefinition
/** /**
* Associative array of deprecated tag name to HTMLPurifier_TagTransform * Associative array of deprecated tag name to HTMLPurifier_TagTransform
* @public * @public
*/ // use + operator */
var $info_tag_transform = array(); var $info_tag_transform = array();
/** /**
* List of HTMLPurifier_AttrTransform to be performed before validation. * Indexed list of HTMLPurifier_AttrTransform to be performed before validation.
* @public * @public
*/ // use array_merge or a foreach loop */
var $info_attr_transform_pre = array(); var $info_attr_transform_pre = array();
/** /**
* List of HTMLPurifier_AttrTransform to be performed after validation. * Indexed list of HTMLPurifier_AttrTransform to be performed after validation.
* @public * @public
*/ // use array_merge or a foreach loop */
var $info_attr_transform_post = array(); var $info_attr_transform_post = array();
/** /**
@@ -241,6 +244,7 @@ class HTMLPurifier_HTMLDefinition
// this will eventually influence module loading // this will eventually influence module loading
$this->strict = $config->get('HTML', 'Strict'); $this->strict = $config->get('HTML', 'Strict');
// order is important!
$this->modules['Text'] = new HTMLPurifier_HTMLModule_Text(); $this->modules['Text'] = new HTMLPurifier_HTMLModule_Text();
$this->modules['Hypertext'] = new HTMLPurifier_HTMLModule_Hypertext(); $this->modules['Hypertext'] = new HTMLPurifier_HTMLModule_Hypertext();
$this->modules['List'] = new HTMLPurifier_HTMLModule_List(); $this->modules['List'] = new HTMLPurifier_HTMLModule_List();
@@ -251,6 +255,8 @@ class HTMLPurifier_HTMLDefinition
$this->modules['Image'] = new HTMLPurifier_HTMLModule_Image(); $this->modules['Image'] = new HTMLPurifier_HTMLModule_Image();
$this->modules['StyleAttribute']= new HTMLPurifier_HTMLModule_StyleAttribute(); $this->modules['StyleAttribute']= new HTMLPurifier_HTMLModule_StyleAttribute();
$this->modules['TransformToStrict'] = new HTMLPurifier_HTMLModule_TransformToStrict($config);
$this->attr_types = new HTMLPurifier_AttrTypes(); $this->attr_types = new HTMLPurifier_AttrTypes();
$this->attr_collections = new HTMLPurifier_AttrCollections(); $this->attr_collections = new HTMLPurifier_AttrCollections();
$this->content_sets = new HTMLPurifier_ContentSets(); $this->content_sets = new HTMLPurifier_ContentSets();
@@ -279,7 +285,7 @@ class HTMLPurifier_HTMLDefinition
// would be nice if we could put each of these in their // would be nice if we could put each of these in their
// own object, would make this hookable too! // own object, would make this hookable too!
$this->setupInfo($config); $this->processModules($config);
$this->setupAttrTransform($config); $this->setupAttrTransform($config);
$this->setupBlockWrapper($config); $this->setupBlockWrapper($config);
$this->setupParent($config); $this->setupParent($config);
@@ -288,24 +294,39 @@ class HTMLPurifier_HTMLDefinition
} }
/** /**
* Sets up the info array. * Processes the modules, setting up related info variables
* @param $config Instance of HTMLPurifier_Config * @param $config Instance of HTMLPurifier_Config
*/ */
function setupInfo($config) { function processModules($config) {
$this->attr_collections->setup($this->attr_types, $this->modules); $this->attr_collections->setup($this->attr_types, $this->modules);
$this->content_sets->setup($this->modules); $this->content_sets->setup($this->modules);
$this->info_content_sets = $this->content_sets->lookup; $this->info_content_sets = $this->content_sets->lookup;
foreach ($this->modules as $module_i => $module) { foreach ($this->modules as $module_i => $module) {
// process element-wise definitions
foreach ($module->info as $name => $def) { foreach ($module->info as $name => $def) {
$def =& $this->modules[$module_i]->info[$name]; // setup info
if (!isset($this->info[$name])) {
if ($def->standalone) {
$this->info[$name] = $this->modules[$module_i]->info[$name];
} else {
// attempting to merge into an element that doesn't
// exist, ignore it
continue;
}
} else {
$this->info[$name]->mergeIn($this->modules[$module_i]->info[$name]);
}
// process info
$def = $this->info[$name];
// attribute value expansions // attribute value expansions
$this->attr_collections->performInclusions($def->attr); $this->attr_collections->performInclusions($def->attr);
$this->attr_collections->expandIdentifiers( $this->attr_collections->expandIdentifiers(
$def->attr, $this->attr_types); $def->attr, $this->attr_types);
// chameleon data, set descendants_are_inline // descendants_are_inline, for ChildDef_Chameleon
if (is_string($def->content_model) && if (is_string($def->content_model) &&
strpos($def->content_model, 'Inline') !== false) { strpos($def->content_model, 'Inline') !== false) {
if ($name != 'del' && $name != 'ins') { if ($name != 'del' && $name != 'ins') {
@@ -317,13 +338,16 @@ class HTMLPurifier_HTMLDefinition
// set child def from content model // set child def from content model
$this->content_sets->generateChildDef($def, $module); $this->content_sets->generateChildDef($def, $module);
// setup info
$this->info[$name] = $def; $this->info[$name] = $def;
if ($this->info_parent == $name) {
$this->info_parent_def = $this->info[$name];
}
} }
// merge in global info variables from module
foreach($module->info_tag_transform as $k => $v) $this->info_tag_transform[$k] = $v;
foreach($module->info_attr_transform_pre as $k => $v) $this->info_attr_transform_pre[$k] = $v;
foreach($module->info_attr_transform_post as $k => $v) $this->info_attr_transform_post[$k]= $v;
} }
} }
/** /**
@@ -369,17 +393,13 @@ class HTMLPurifier_HTMLDefinition
*/ */
function setupCompat($config) { function setupCompat($config) {
// convenience for compat
$e_Inline = new HTMLPurifier_ChildDef_Optional( $e_Inline = new HTMLPurifier_ChildDef_Optional(
$this->info_content_sets['Inline'] + $this->info_content_sets['Inline'] +
array('#PCDATA' => true)); array('#PCDATA' => true));
// blockquote changes, implement in TransformStrict and Legacy // blockquote alt child def, implement in Legacy
if ($this->strict) { if (!$this->strict) {
$this->info['blockquote']->child =
new HTMLPurifier_ChildDef_StrictBlockquote(
$this->info_content_sets['Block'] +
array('#PCDATA' => true));
} else {
$this->info['blockquote']->child = $this->info['blockquote']->child =
new HTMLPurifier_ChildDef_Optional( new HTMLPurifier_ChildDef_Optional(
$this->info_content_sets['Flow'] + $this->info_content_sets['Flow'] +
@@ -409,8 +429,7 @@ class HTMLPurifier_HTMLDefinition
array('#PCDATA' => true, 'p' => true)); array('#PCDATA' => true, 'p' => true));
} }
// custom, not sure where to implement, because it's not // deprecated config setting, implement in DisableURI module
// just /one/ module
if ($config->get('Attr', 'DisableURI')) { if ($config->get('Attr', 'DisableURI')) {
$this->info['a']->attr['href'] = $this->info['a']->attr['href'] =
$this->info['img']->attr['longdesc'] = $this->info['img']->attr['longdesc'] =
@@ -427,28 +446,7 @@ class HTMLPurifier_HTMLDefinition
$this->info['ol']->attr['start'] = new HTMLPurifier_AttrDef_Integer(); $this->info['ol']->attr['start'] = new HTMLPurifier_AttrDef_Integer();
} }
// deprecated elements transforms, implement in TransformToStrict // setup allowed elements, SubtractiveWhitelist module
$this->info_tag_transform['font'] = new HTMLPurifier_TagTransform_Font();
$this->info_tag_transform['menu'] = new HTMLPurifier_TagTransform_Simple('ul');
$this->info_tag_transform['dir'] = new HTMLPurifier_TagTransform_Simple('ul');
$this->info_tag_transform['center'] = new HTMLPurifier_TagTransform_Center();
// deprecated attribute transforms, implement in TransformToStrict
$this->info['h1']->attr_transform_pre[] =
$this->info['h2']->attr_transform_pre[] =
$this->info['h3']->attr_transform_pre[] =
$this->info['h4']->attr_transform_pre[] =
$this->info['h5']->attr_transform_pre[] =
$this->info['h6']->attr_transform_pre[] =
$this->info['p'] ->attr_transform_pre[] =
new HTMLPurifier_AttrTransform_TextAlign();
// xml:lang <=> lang mirroring, implement in TransformToStrict?
$this->info_attr_transform_post[] = new HTMLPurifier_AttrTransform_Lang();
$this->info_global_attr['lang'] = new HTMLPurifier_AttrDef_Lang();
// setup allowed elements, obsoleted by Modules? (does offer
// different functionality)
$allowed_elements = $config->get('HTML', 'AllowedElements'); $allowed_elements = $config->get('HTML', 'AllowedElements');
if (is_array($allowed_elements)) { if (is_array($allowed_elements)) {
foreach ($this->info as $name => $d) { foreach ($this->info as $name => $d) {

View File

@@ -53,6 +53,24 @@ class HTMLPurifier_HTMLModule
*/ */
var $attr_collections = array(); var $attr_collections = array();
/**
* Associative array of deprecated tag name to HTMLPurifier_TagTransform
* @public
*/
var $info_tag_transform = array();
/**
* List of HTMLPurifier_AttrTransform to be performed before validation.
* @public
*/
var $info_attr_transform_pre = array();
/**
* List of HTMLPurifier_AttrTransform to be performed after validation.
* @public
*/
var $info_attr_transform_post = array();
/** /**
* Boolean flag that indicates whether or not getChildDef is implemented. * Boolean flag that indicates whether or not getChildDef is implemented.
* For optimization reasons: may save a call to a function. Be sure * For optimization reasons: may save a call to a function. Be sure
@@ -72,6 +90,7 @@ class HTMLPurifier_HTMLModule
* @public * @public
*/ */
function getChildDef($def) {return false;} function getChildDef($def) {return false;}
} }
?> ?>

View File

@@ -13,13 +13,13 @@ class HTMLPurifier_HTMLModule_Bdo extends HTMLPurifier_HTMLModule
var $elements = array('bdo'); var $elements = array('bdo');
var $info = array(); var $info = array();
var $content_sets = array('Inline' => 'bdo'); var $content_sets = array('Inline' => 'bdo');
var $attr_collections_info = array( var $attr_collections = array(
'I18N' => array('dir' => false) 'I18N' => array('dir' => false)
); );
function HTMLPurifier_HTMLModule_Bdo() { function HTMLPurifier_HTMLModule_Bdo() {
$dir = new HTMLPurifier_AttrDef_Enum(array('ltr','rtl'), false); $dir = new HTMLPurifier_AttrDef_Enum(array('ltr','rtl'), false);
$this->attr_collections_info['I18N']['dir'] = $dir; $this->attr_collections['I18N']['dir'] = $dir;
$this->info['bdo'] = new HTMLPurifier_ElementDef(); $this->info['bdo'] = new HTMLPurifier_ElementDef();
$this->info['bdo']->attr = array( $this->info['bdo']->attr = array(
0 => array('Core', 'Lang'), 0 => array('Core', 'Lang'),
@@ -33,7 +33,7 @@ class HTMLPurifier_HTMLModule_Bdo extends HTMLPurifier_HTMLModule
$this->info['bdo']->content_model = '#PCDATA | Inline'; $this->info['bdo']->content_model = '#PCDATA | Inline';
$this->info['bdo']->content_model_type = 'optional'; $this->info['bdo']->content_model_type = 'optional';
// provides fallback behavior if dir's missing (dir is required) // provides fallback behavior if dir's missing (dir is required)
$this->info['bdo']->attr_transform_post[] = $this->info['bdo']->attr_transform_post['required-dir'] =
new HTMLPurifier_AttrTransform_BdoDir(); new HTMLPurifier_AttrTransform_BdoDir();
} }

View File

@@ -45,7 +45,7 @@ class HTMLPurifier_HTMLModule_Text extends HTMLPurifier_HTMLModule
$this->info[$element]->content_model_type = 'empty'; $this->info[$element]->content_model_type = 'empty';
} elseif ($element == 'blockquote') { } elseif ($element == 'blockquote') {
$this->info[$element]->content_model = 'Heading | Block | List'; $this->info[$element]->content_model = 'Heading | Block | List';
$this->info[$element]->content_model_type = 'strictblockquote'; $this->info[$element]->content_model_type = 'optional';
} elseif ($element == 'div') { } elseif ($element == 'div') {
$this->info[$element]->content_model = '#PCDATA | Flow'; $this->info[$element]->content_model = '#PCDATA | Flow';
$this->info[$element]->content_model_type = 'optional'; $this->info[$element]->content_model_type = 'optional';

View File

@@ -1,5 +1,7 @@
<?php <?php
require_once 'HTMLPurifier/ChildDef/StrictBlockquote.php';
/** /**
* Proprietary module that transforms deprecated elements into Strict * Proprietary module that transforms deprecated elements into Strict
* HTML (see HTML 4.01 and XHTML 1.0) when possible. * HTML (see HTML 4.01 and XHTML 1.0) when possible.
@@ -8,7 +10,62 @@
class HTMLPurifier_HTMLModule_TransformToStrict extends HTMLPurifier_HTMLModule class HTMLPurifier_HTMLModule_TransformToStrict extends HTMLPurifier_HTMLModule
{ {
// unimplemented // we're actually modifying these elements, not defining them
var $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'blockquote');
var $info_tag_transform = array(
// placeholders, see constructor for definitions
'font' => false,
'menu' => false,
'dir' => false,
'center'=> false
);
var $attr_collections = array(
'Lang' => array(
'lang' => false // placeholder
)
);
function HTMLPurifier_HTMLModule_TransformToStrict($config) {
// deprecated tag transforms
$this->info_tag_transform['font'] = new HTMLPurifier_TagTransform_Font();
$this->info_tag_transform['menu'] = new HTMLPurifier_TagTransform_Simple('ul');
$this->info_tag_transform['dir'] = new HTMLPurifier_TagTransform_Simple('ul');
$this->info_tag_transform['center'] = new HTMLPurifier_TagTransform_Center();
foreach ($this->elements as $name) {
$this->info[$name] = new HTMLPurifier_ElementDef();
$this->info[$name]->standalone = false;
}
// deprecated attribute transforms
$this->info['h1']->attr_transform_pre['align'] =
$this->info['h2']->attr_transform_pre['align'] =
$this->info['h3']->attr_transform_pre['align'] =
$this->info['h4']->attr_transform_pre['align'] =
$this->info['h5']->attr_transform_pre['align'] =
$this->info['h6']->attr_transform_pre['align'] =
$this->info['p'] ->attr_transform_pre['align'] =
new HTMLPurifier_AttrTransform_TextAlign();
// xml:lang <=> lang mirroring, implement in TransformToStrict,
// this is overridden in TransformToXHTML11
$this->info_attr_transform_post['lang'] = new HTMLPurifier_AttrTransform_Lang();
$this->attr_collections['Lang']['lang'] = new HTMLPurifier_AttrDef_Lang();
if ($config->get('HTML', 'Strict')) {
$this->info['blockquote']->content_model_type = 'strictblockquote';
$this->info['blockquote']->child = false; // recalculate please!
}
}
var $defines_child_def = true;
function getChildDef($def) {
if ($def->content_model_type != 'strictblockquote') return false;
return new HTMLPurifier_ChildDef_StrictBlockquote($def->content_model);
}
} }

View File

@@ -130,7 +130,9 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
$elements = array(); $elements = array();
$attr = array(); $attr = array();
if (isset($def->elements)) { if (isset($def->elements)) {
if ($def->type == 'strictblockquote') $def->validateChildren(array(), $this->config, $context); if ($def->type == 'strictblockquote') {
$def->validateChildren(array(), $this->config, $context);
}
$elements = $def->elements; $elements = $def->elements;
} elseif ($def->type == 'chameleon') { } elseif ($def->type == 'chameleon') {
$attr['rowspan'] = 2; $attr['rowspan'] = 2;