1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-01-29 12:38:11 +01:00

Initial implementation of XHTMLDefinition, you can see it in action at the smoketest printDefinition.php?x (add the x at the end).

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@707 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2007-02-04 00:07:52 +00:00
parent 806901cfd2
commit f6b50d4bfd
6 changed files with 299 additions and 1 deletions

View File

@ -0,0 +1,80 @@
<?php
require_once 'HTMLPurifier/AttrTypes.php';
require_once 'HTMLPurifier/AttrDef/Lang.php';
/**
* Defines common attribute collections that modules reference
*/
class HTMLPurifier_AttrCollection
{
var $info = array(
'Core' => array(
// 'xml:space' => false,
'class' => 'NMTOKENS',
'id' => 'ID',
'title' => 'CDATA',
),
'I18N' => array(
'xml:lang' => false, // see constructor
'lang' => false, // see constructor
),
'Events' => array(),
'Style' => array(),
'Common' => array(
0 => array('Core', 'Events', 'I18N', 'Style')
)
);
function HTMLPurifier_AttrCollection() {
// setup direct objects
$this->info['I18N']['xml:lang'] =
$this->info['I18N']['lang'] = new HTMLPurifier_AttrDef_Lang();
}
function setup($attr_types, $modules) {
$info =& $this->info;
// replace string identifiers with actual attribute objects
foreach ($info as $collection_i => $collection) {
foreach ($collection as $attr_i => $attr) {
if ($attr_i === 0) continue;
if (!is_string($attr)) continue;
if (isset($attr_types->info[$attr])) {
$info[$collection_i][$attr_i] = $attr_types->info[$attr];
} else {
unset($info[$collection_i][$attr_i]);
}
}
}
// merge attribute collections that include others
foreach ($info as $name => $attr) {
$this->performInclusions($info[$name]);
}
}
function performInclusions(&$attr) {
if (!isset($attr[0])) return;
$merge = $attr[0];
// loop through all the inclusions
for ($i = 0; isset($merge[$i]); $i++) {
// foreach attribute of the inclusion, copy it over
foreach ($this->info[$merge[$i]] as $key => $value) {
if (isset($attr[$key])) continue; // also catches more inclusions
$attr[$key] = $value;
}
if (isset($info[$merge[$i]][0])) {
// recursion
$merge = array_merge($merge, isset($info[$merge[$i]][0]));
}
}
unset($attr[0]);
}
}
?>

View File

@ -0,0 +1,21 @@
<?php
require_once 'HTMLPurifier/AttrDef/Nmtokens.php';
require_once 'HTMLPurifier/AttrDef/Text.php';
require_once 'HTMLPurifier/AttrDef/ID.php';
/**
* Provides lookup array of attribute types to HTMLPurifier_AttrDef objects
*/
class HTMLPurifier_AttrTypes
{
var $info = array();
function HTMLPurifier_AttrTypes() {
$this->info['NMTOKENS'] = new HTMLPurifier_AttrDef_Nmtokens();
$this->info['CDATA'] = new HTMLPurifier_AttrDef_Text();
$this->info['ID'] = new HTMLPurifier_AttrDef_ID();
$this->info['URI'] = new HTMLPurifier_AttrDef_URI();
}
}
?>

View File

@ -0,0 +1,27 @@
<?php
/**
* Represents an XHTML 1.1 module, with information on elements, tags
* and attributes.
* @note Even though this is technically XHTML 1.1, it is also used for
* regular HTML parsing. We are using modulization as a convenient
* way to represent the internals of HTMLDefinition, and our
* implementation is by no means conforming and does not directly
* use the normative DTDs or XML schemas.
*/
class HTMLPurifier_HTMLModule
{
var $elements = array();
var $info = array();
var $content_sets = array();
}
class HTMLPurifier_HTMLModuleElement
{
var $attr = array();
var $content_model;
var $content_model_type = 'optional';
}
?>

View File

@ -0,0 +1,51 @@
<?php
require_once 'HTMLPurifier/HTMLModule.php';
/**
* XHTML 1.1 Text Module, defines basic text containers. Core module.
*/
class HTMLPurifier_HTMLModule_Text extends HTMLPurifier_HTMLModule
{
var $elements = array('abbr', 'acronym', 'address', 'blockquote',
'br', 'cite', 'code', 'dfn', 'div', 'em', 'h1', 'h2', 'h3',
'h4', 'h5', 'h6', 'kbd', 'p', 'pre', 'q', 'samp', 'span', 'strong',
'var');
var $info = array();
var $content_sets = array(
'Heading' => 'h1 | h2 | h3 | h4 | h5 | h6',
'Block' => 'address | blockquote | div | p | pre',
'Inline' => 'abbr | acronym | br | cite | code | dfn | em | kbd | q | samp | span | strong | var',
'Flow' => 'Heading | Block | Inline'
);
function HTMLPurifier_HTMLModule_Text() {
foreach ($this->elements as $element) {
$this->info[$element] = new HTMLPurifier_HTMLModuleElement();
// attributes
if ($element == 'br') {
$this->info[$element]->attr = array(0 => array('Core'));
} elseif ($element == 'blockquote' || $element == 'q') {
$this->info[$element]->attr = array(0 => array('Common'), 'cite' => 'URI');
} else {
$this->info[$element]->attr = array(0 => array('Common'));
}
// content models
if ($element == 'br') {
$this->info[$element]->content_model_type = 'empty';
} elseif ($element == 'blockquote') {
$this->info[$element]->content_model_type = 'strictblockquote';
} elseif ($element == 'div') {
$this->info[$element]->content_model = '#PCDATA | Flow';
} else {
$this->info[$element]->content_model = '#PCDATA | Inline';
}
}
}
}
?>

View File

@ -1,6 +1,7 @@
<?php
require_once 'HTMLPurifier/Printer.php';
require_once 'HTMLPurifier/XHTMLDefinition.php';
class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
{
@ -13,7 +14,14 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
function render($config) {
$ret = '';
$this->config =& $config;
$this->def = $config->getHTMLDefinition();
if (isset($_GET['x'])) { // hidden settings
$this->def = new HTMLPurifier_XHTMLDefinition();
$this->def->initialize($config);
$this->def->setup($config);
} else {
$this->def = $config->getHTMLDefinition();
}
$def =& $this->def;
$ret .= $this->start('div', array('class' => 'HTMLPurifier_Printer'));

View File

@ -0,0 +1,111 @@
<?php
require_once 'HTMLPurifier/HTMLDefinition.php';
require_once 'HTMLPurifier/AttrTypes.php';
require_once 'HTMLPurifier/AttrCollection.php';
require_once 'HTMLPurifier/HTMLModule.php';
require_once 'HTMLPurifier/HTMLModule/Text.php';
/**
* Next-generation HTML definition that will supplant HTMLPurifier_HTMLDefinition
*/
class HTMLPurifier_XHTMLDefinition extends HTMLPurifier_HTMLDefinition
{
var $modules = array();
var $attr_types;
var $attr_collection;
function initialize($config) {
$this->modules['Text'] = new HTMLPurifier_HTMLModule_Text();
$this->attr_types = new HTMLPurifier_AttrTypes();
$this->attr_collection = new HTMLPurifier_AttrCollection();
}
function setup($config) {
// perform attribute collection substitutions
$this->attr_collection->setup($this->attr_types, $this->modules);
// populate content_sets based on module hints
$content_sets = array();
foreach ($this->modules as $module_i => $module) {
foreach ($module->content_sets as $key => $value) {
if (isset($content_sets[$key])) {
// add it into the existing content set
$content_sets[$key] = $content_sets[$key] . ' | ' . $value;
} else {
$content_sets[$key] = $value;
}
}
}
// perform content_set expansions
foreach ($content_sets as $i => $set) {
// only performed once, so infinite recursion is not
// a problem, you'll just have a stray $Set lying around
// at the end
$content_sets[$i] =
str_replace(
array_keys($content_sets),
array_values($content_sets),
$set);
}
$content_sets_keys = array_keys($content_sets);
$content_sets_values = array_values($content_sets);
foreach ($this->modules as $module_i => $module) {
foreach ($module->info as $element_i => $element) {
$element =& $this->modules[$module_i]->info[$element_i];
// attribute value expansions
$this->attr_collection->performInclusions($element->attr);
// perform content model expansions
$content_model = $element->content_model;
if (is_string($content_model)) {
$element->content_model = str_replace(
$content_sets_keys, $content_sets_values, $content_model);
}
// setup info
$this->info[$element_i] = new HTMLPurifier_ElementDef();
$this->info[$element_i]->attr = $element->attr;
$this->info[$element_i]->child = $this->getChildDef($element);
if ($this->info_parent == $element_i) {
$this->info_parent_def = $this->info[$element_i];
}
}
}
}
function getChildDef($element) {
$value = $element->content_model;
$type = $element->content_model_type;
switch ($type) {
case 'required':
return new HTMLPurifier_ChildDef_Required($value);
case 'optional':
return new HTMLPurifier_ChildDef_Optional($value);
case 'empty':
return new HTMLPurifier_ChildDef_Empty();
case 'strictblockquote':
return new HTMLPurifier_ChildDef_StrictBlockquote();
case 'table':
return new HTMLPurifier_ChildDef_Table();
case 'chameleon':
return new HTMLPurifier_ChildDef_Chameleon($value[0], $value[1]);
case 'custom':
return new HTMLPurifier_ChildDef_Custom($value);
}
return HTMLPurifier_ChildDef_Empty();
}
}
?>