1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-10-17 06:56:06 +02:00

Split out code in Definition.php .

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@73 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang
2006-07-22 00:02:48 +00:00
parent f00a80c561
commit 025b648c99
6 changed files with 13 additions and 1618 deletions

View File

@@ -85,7 +85,7 @@ class HTMLPurifier_Definition
$this->info['blockquote'] =
$this->info['dd'] =
$this->info['li'] =
$this->info['div'] = new HTMLDTD_Element($e_Flow);
$this->info['div'] = new HTMLPurifier_ElementDef($e_Flow);
$this->info['em'] =
$this->info['strong'] =
@@ -121,31 +121,31 @@ class HTMLPurifier_Definition
$this->info['h3'] =
$this->info['h4'] =
$this->info['h5'] =
$this->info['h6'] = new HTMLDTD_Element($e_Inline);
$this->info['h6'] = new HTMLPurifier_ElementDef($e_Inline);
$this->info['ol'] =
$this->info['ul'] =
new HTMLDTD_Element(
new HTMLPurifier_ElementDef(
new HTMLPurifier_ChildDef_Required('li')
);
$this->info['dl'] =
new HTMLDTD_Element(
new HTMLPurifier_ElementDef(
new HTMLPurifier_ChildDef_Required('dt|dd')
);
$this->info['address'] =
new HTMLDTD_Element(
new HTMLPurifier_ElementDef(
new HTMLPurifier_ChildDef_Optional("#PCDATA | p | $e_inline".
" | $e_misc_inline")
);
$this->info['img'] =
$this->info['br'] =
$this->info['hr'] = new HTMLDTD_Element(new HTMLPurifier_ChildDef_Empty());
$this->info['hr'] = new HTMLPurifier_ElementDef(new HTMLPurifier_ChildDef_Empty());
$this->info['pre'] = new HTMLDTD_Element($e_pre_content);
$this->info['pre'] = new HTMLPurifier_ElementDef($e_pre_content);
$this->info['a'] = new HTMLDTD_Element($e_a_content);
$this->info['a'] = new HTMLPurifier_ElementDef($e_a_content);
}
@@ -424,191 +424,17 @@ class HTMLPurifier_Definition
}
class HTMLDTD_Element
class HTMLPurifier_ElementDef
{
var $child_def;
var $attr_def = array();
function HTMLDTD_Element($child_def, $attr_def = array()) {
function HTMLPurifier_ElementDef($child_def, $attr_def = array()) {
$this->child_def = $child_def;
$this->attr_def = $attr_def;
}
}
// HTMLPurifier_ChildDef and inheritance have three types of output:
// true = leave nodes as is
// false = delete parent node and all children
// array(...) = replace children nodes with these
// this is the hardest one to implement. We'll use fancy regexp tricks
// right now, we only expect it to return TRUE or FALSE (it won't attempt
// to fix the tree)
// we may end up writing custom code for each HTML case
// in order to make it self correcting
class HTMLPurifier_ChildDef
{
var $type = 'custom';
var $dtd_regex;
var $_pcre_regex;
function HTMLPurifier_ChildDef($dtd_regex) {
$this->dtd_regex = $dtd_regex;
$this->_compileRegex();
}
function _compileRegex() {
$raw = str_replace(' ', '', $this->dtd_regex);
if ($raw{0} != '(') {
$raw = "($raw)";
}
$reg = str_replace(',', ',?', $raw);
$reg = preg_replace('/([#a-zA-Z0-9_.-]+)/', '(,?\\0)', $reg);
$this->_pcre_regex = $reg;
}
function validateChildren($tokens_of_children) {
$list_of_children = '';
$nesting = 0; // depth into the nest
foreach ($tokens_of_children as $token) {
if (!empty($token->is_whitespace)) continue;
$is_child = ($nesting == 0); // direct
if ($token->type == 'start') {
$nesting++;
} elseif ($token->type == 'end') {
$nesting--;
}
if ($is_child) {
$list_of_children .= $token->name . ',';
}
}
$list_of_children = rtrim($list_of_children, ',');
$okay =
preg_match(
'/^'.$this->_pcre_regex.'$/',
$list_of_children
);
return (bool) $okay;
}
}
class HTMLPurifier_ChildDef_Simple extends HTMLPurifier_ChildDef
{
var $elements = array();
function HTMLPurifier_ChildDef_Simple($elements) {
if (is_string($elements)) {
$elements = str_replace(' ', '', $elements);
$elements = explode('|', $elements);
}
$elements = array_flip($elements);
foreach ($elements as $i => $x) $elements[$i] = true;
$this->elements = $elements;
$this->gen = new HTMLPurifier_Generator();
}
function validateChildren() {
trigger_error('Cannot call abstract function!', E_USER_ERROR);
}
}
class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef_Simple
{
var $type = 'required';
function validateChildren($tokens_of_children) {
// if there are no tokens, delete parent node
if (empty($tokens_of_children)) return false;
// the new set of children
$result = array();
// current depth into the nest
$nesting = 0;
// whether or not we're deleting a node
$is_deleting = false;
// whether or not parsed character data is allowed
// this controls whether or not we silently drop a tag
// or generate escaped HTML from it
$pcdata_allowed = isset($this->elements['#PCDATA']);
// a little sanity check to make sure it's not ALL whitespace
$all_whitespace = true;
foreach ($tokens_of_children as $token) {
if (!empty($token->is_whitespace)) {
$result[] = $token;
continue;
}
$all_whitespace = false; // phew, we're not talking about whitespace
$is_child = ($nesting == 0);
if ($token->type == 'start') {
$nesting++;
} elseif ($token->type == 'end') {
$nesting--;
}
if ($is_child) {
$is_deleting = false;
if (!isset($this->elements[$token->name])) {
$is_deleting = true;
if ($pcdata_allowed) {
$result[] = new HTMLPurifier_Token_Text(
$this->gen->generateFromToken($token)
);
}
continue;
}
}
if (!$is_deleting) {
$result[] = $token;
} elseif ($pcdata_allowed) {
$result[] =
new HTMLPurifier_Token_Text(
$this->gen->generateFromToken( $token )
);
} else {
// drop silently
}
}
if (empty($result)) return false;
if ($all_whitespace) return false;
if ($tokens_of_children == $result) return true;
return $result;
}
}
// only altered behavior is that it returns an empty array
// instead of a false (to delete the node)
class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required
{
var $type = 'optional';
function validateChildren($tokens_of_children) {
$result = parent::validateChildren($tokens_of_children);
if ($result === false) return array();
return $result;
}
}
// placeholder
class HTMLPurifier_ChildDef_Empty extends HTMLPurifier_ChildDef
{
var $type = 'empty';
function HTMLPurifier_ChildDef_Empty() {}
function validateChildren() {
return false;
}
}
class HTMLPurifier_AttrDef
{
var $def;
function HTMLPurifier_AttrDef($def) {
$this->def = $def;
}
}
?>