1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-08-05 21:57:26 +02:00

Make the definition format much more logical. Begin migrating specification docs to their respective classes.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@133 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang
2006-07-30 19:11:18 +00:00
parent 70bd80e66a
commit 558c49a92d
6 changed files with 86 additions and 93 deletions

View File

@@ -34,6 +34,7 @@ class HTMLPurifier_Definition
'table' => true,
'ul' => true
);
var $info_global_attr = array();
function instance() {
static $instance = null;
@@ -49,6 +50,20 @@ class HTMLPurifier_Definition
function setup() {
// emulates the structure of the DTD
$allowed_tags =
array(
'ins', 'del', 'blockquote', 'dd', 'li', 'div', 'em', 'strong',
'dfn', 'code', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym',
'q', 'sub', 'tt', 'sup', 'i', 'b', 'big', 'small', 'u', 's',
'strike', 'bdo', 'span', 'dt', 'p', 'h1', 'h2', 'h3', 'h4',
'h5', 'h6', 'ol', 'ul', 'dl', 'address', 'img', 'br', 'hr',
'pre', 'a'
);
foreach ($allowed_tags as $tag) {
$this->info[$tag] = new HTMLPurifier_ElementDef();
}
// entities: prefixed with e_ and _ replaces .
// we don't use an array because that complicates interpolation
// strings are used instead of arrays because if you use arrays,
@@ -96,73 +111,67 @@ class HTMLPurifier_Definition
$e_form_content = new HTMLPurifier_ChildDef_Optional(''); //unused
$e_form_button_content = new HTMLPurifier_ChildDef_Optional(''); // unused
$this->info['child'] = array();
$this->info['ins']->child =
$this->info['del']->child =
$this->info['blockquote']->child =
$this->info['dd']->child =
$this->info['li']->child =
$this->info['div']->child = $e_Flow;
$this->info['child']['ins'] =
$this->info['child']['del'] =
$this->info['child']['blockquote'] =
$this->info['child']['dd'] =
$this->info['child']['li'] =
$this->info['child']['div'] = $e_Flow;
$this->info['em']->child =
$this->info['strong']->child =
$this->info['dfn']->child =
$this->info['code']->child =
$this->info['samp']->child =
$this->info['kbd']->child =
$this->info['var']->child =
$this->info['cite']->child =
$this->info['abbr']->child =
$this->info['acronym']->child =
$this->info['q']->child =
$this->info['sub']->child =
$this->info['tt']->child =
$this->info['sup']->child =
$this->info['i']->child =
$this->info['b']->child =
$this->info['big']->child =
$this->info['small']->child =
$this->info['u']->child =
$this->info['s']->child =
$this->info['strike']->child =
$this->info['bdo']->child =
$this->info['span']->child =
$this->info['dt']->child =
$this->info['p']->child =
$this->info['h1']->child =
$this->info['h2']->child =
$this->info['h3']->child =
$this->info['h4']->child =
$this->info['h5']->child =
$this->info['h6']->child = $e_Inline;
$this->info['child']['em'] =
$this->info['child']['strong'] =
$this->info['child']['dfn'] =
$this->info['child']['code'] =
$this->info['child']['samp'] =
$this->info['child']['kbd'] =
$this->info['child']['var'] =
$this->info['child']['code'] =
$this->info['child']['samp'] =
$this->info['child']['kbd'] =
$this->info['child']['var'] =
$this->info['child']['cite'] =
$this->info['child']['abbr'] =
$this->info['child']['acronym'] =
$this->info['child']['q'] =
$this->info['child']['sub'] =
$this->info['child']['tt'] =
$this->info['child']['sup'] =
$this->info['child']['i'] =
$this->info['child']['b'] =
$this->info['child']['big'] =
$this->info['child']['small'] =
$this->info['child']['u'] =
$this->info['child']['s'] =
$this->info['child']['strike'] =
$this->info['child']['bdo'] =
$this->info['child']['span'] =
$this->info['child']['dt'] =
$this->info['child']['p'] =
$this->info['child']['h1'] =
$this->info['child']['h2'] =
$this->info['child']['h3'] =
$this->info['child']['h4'] =
$this->info['child']['h5'] =
$this->info['child']['h6'] = $e_Inline;
$this->info['ol']->child =
$this->info['ul']->child = new HTMLPurifier_ChildDef_Required('li');
$this->info['child']['ol'] =
$this->info['child']['ul'] = new HTMLPurifier_ChildDef_Required('li');
$this->info['child']['dl'] = new HTMLPurifier_ChildDef_Required('dt|dd');
$this->info['child']['address'] =
$this->info['dl']->child = new HTMLPurifier_ChildDef_Required('dt|dd');
$this->info['address']->child =
new HTMLPurifier_ChildDef_Optional("#PCDATA | p | $e_inline".
" | $e_misc_inline");
$this->info['child']['img'] =
$this->info['child']['br'] =
$this->info['child']['hr'] = new HTMLPurifier_ChildDef_Empty();
$this->info['img']->child =
$this->info['br']->child =
$this->info['hr']->child = new HTMLPurifier_ChildDef_Empty();
$this->info['child']['pre'] = $e_pre_content;
$this->info['pre']->child = $e_pre_content;
$this->info['child']['a'] = $e_a_content;
$this->info['a']->child = $e_a_content;
// attribute info
// this doesn't include REQUIRED declarations, those are handled
// by the transform classes
// attrs, included in almost every single one except for a few
$this->info['attr']['*'] = array(
$this->info_global_attr = array(
// core attrs
'id' => new HTMLPurifier_AttrDef_ID(),
// i18n
@@ -176,13 +185,8 @@ class HTMLPurifier_Definition
class HTMLPurifier_ElementDef
{
var $child_def;
var $attr_def = array();
function HTMLPurifier_ElementDef($child_def, $attr_def = array()) {
$this->child_def = $child_def;
$this->attr_def = $attr_def;
}
var $child;
var $attr = array();
}

View File

@@ -38,8 +38,11 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
// $i is index of start token
// $j is index of end token
// DEFINITION CALL
$child_def = $this->definition->info[$tokens[$i]->name]->child;
// have DTD child def validate children
$child_def = $this->definition->info['child'][$tokens[$i]->name];
$result = $child_def->validateChildren($child_tokens);
// process result

View File

@@ -23,7 +23,9 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
$result[] = $token;
continue;
}
$info = $this->definition->info['child'][$token->name]; // assumption but valid
// DEFINITION CALL
$info = $this->definition->info[$token->name]->child;
// test if it claims to be a start tag but is empty
if ($info->type == 'empty' &&

View File

@@ -4,6 +4,13 @@ require_once 'HTMLPurifier/Strategy.php';
require_once 'HTMLPurifier/Definition.php';
require_once 'HTMLPurifier/Generator.php';
/**
* Removes all unrecognized tags from the list of tokens.
*
* This strategy iterates through all the tokens and removes unrecognized
* tokens.
*/
class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
{
@@ -19,7 +26,8 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
$result = array();
foreach($tokens as $token) {
if (!empty( $token->is_tag )) {
if (!isset($this->definition->info['child'][$token->name])) {
// DEFINITION CALL
if (!isset($this->definition->info[$token->name])) {
// invalid tag, generate HTML and insert in
$token = new HTMLPurifier_Token_Text(
$this->generator->generateFromToken($token)

View File

@@ -15,13 +15,14 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
function execute($tokens) {
$accumulator = new HTMLPurifier_IDAccumulator();
$d_defs = $this->definition->info['attr']['*'];
$d_defs = $this->definition->info_global_attr;
foreach ($tokens as $key => $token) {
if ($token->type !== 'start' && $token->type !== 'end') continue;
$name = $token->name;
// DEFINITION CALL
$defs = $this->definition->info[$token->name]->attr;
$attr = $token->attributes;
$defs = isset($this->definition->info['attr'][$name]) ?
$this->definition->attr[$name] : array();
$changed = false;
foreach ($attr as $attr_key => $value) {
if ( isset($defs[$attr_key]) ) {