1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-07-13 02:36:19 +02:00

Release 1.5.0, merged in r688-867.

- LanguageFactory::instance() declared static
- HTMLModuleManagerTest pass by reference bug fixed, merge back into trunk scheduled

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/strict@869 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang
2007-03-24 01:04:06 +00:00
parent cec7a1c087
commit dd2fd06591
130 changed files with 4324 additions and 1385 deletions

View File

@ -0,0 +1,43 @@
<?php
require_once 'HTMLPurifier/HTMLModule.php';
require_once 'HTMLPurifier/AttrTransform/BdoDir.php';
/**
* XHTML 1.1 Bi-directional Text Module, defines elements that
* declare directionality of content. Text Extension Module.
*/
class HTMLPurifier_HTMLModule_Bdo extends HTMLPurifier_HTMLModule
{
var $name = 'Bdo';
var $elements = array('bdo');
var $info = array();
var $content_sets = array('Inline' => 'bdo');
var $attr_collections = array(
'I18N' => array('dir' => false)
);
function HTMLPurifier_HTMLModule_Bdo() {
$dir = new HTMLPurifier_AttrDef_Enum(array('ltr','rtl'), false);
$this->attr_collections['I18N']['dir'] = $dir;
$this->info['bdo'] = new HTMLPurifier_ElementDef();
$this->info['bdo']->attr = array(
0 => array('Core', 'Lang'),
'dir' => $dir, // required
// The Abstract Module specification has the attribute
// inclusions wrong for bdo: bdo allows
// xml:lang too (and we'll toss in lang for good measure,
// though it is not allowed for XHTML 1.1, this will
// be managed with a global attribute transform)
);
$this->info['bdo']->content_model = '#PCDATA | Inline';
$this->info['bdo']->content_model_type = 'optional';
// provides fallback behavior if dir's missing (dir is required)
$this->info['bdo']->attr_transform_post['required-dir'] =
new HTMLPurifier_AttrTransform_BdoDir();
}
}
?>

View File

@ -0,0 +1,31 @@
<?php
class HTMLPurifier_HTMLModule_CommonAttributes extends HTMLPurifier_HTMLModule
{
var $name = 'CommonAttributes';
var $attr_collections = array(
'Core' => array(
0 => array('Style'),
// 'xml:space' => false,
'class' => 'NMTOKENS',
'id' => 'ID',
'title' => 'CDATA',
),
'Lang' => array(
'xml:lang' => false, // see constructor
),
'I18N' => array(
0 => array('Lang'), // proprietary, for xml:lang/lang
),
'Common' => array(
0 => array('Core', 'I18N')
)
);
function HTMLPurifier_HTMLModule_CommonAttributes() {
$this->attr_collections['Lang']['xml:lang'] = new HTMLPurifier_AttrDef_Lang();
}
}
?>

View File

@ -0,0 +1,46 @@
<?php
require_once 'HTMLPurifier/HTMLModule.php';
require_once 'HTMLPurifier/ChildDef/Chameleon.php';
/**
* XHTML 1.1 Edit Module, defines editing-related elements. Text Extension
* Module.
*/
class HTMLPurifier_HTMLModule_Edit extends HTMLPurifier_HTMLModule
{
var $name = 'Edit';
var $elements = array('del', 'ins');
var $info = array();
var $content_sets = array('Inline' => 'del | ins');
function HTMLPurifier_HTMLModule_Edit() {
foreach ($this->elements as $element) {
$this->info[$element] = new HTMLPurifier_ElementDef();
$this->info[$element]->attr = array(
0 => array('Common'),
'cite' => 'URI',
// 'datetime' => 'Datetime' // Datetime not implemented
);
// Inline context ! Block context (exclamation mark is
// separator, see getChildDef for parsing)
$this->info[$element]->content_model =
'#PCDATA | Inline ! #PCDATA | Flow';
// HTML 4.01 specifies that ins/del must not contain block
// elements when used in an inline context, chameleon is
// a complicated workaround to acheive this effect
$this->info[$element]->content_model_type = 'chameleon';
}
}
var $defines_child_def = true;
function getChildDef($def) {
if ($def->content_model_type != 'chameleon') return false;
$value = explode('!', $def->content_model);
return new HTMLPurifier_ChildDef_Chameleon($value[0], $value[1]);
}
}
?>

View File

@ -0,0 +1,36 @@
<?php
require_once 'HTMLPurifier/HTMLModule.php';
/**
* XHTML 1.1 Hypertext Module, defines hypertext links. Core Module.
*/
class HTMLPurifier_HTMLModule_Hypertext extends HTMLPurifier_HTMLModule
{
var $name = 'Hypertext';
var $elements = array('a');
var $info = array();
var $content_sets = array('Inline' => 'a');
function HTMLPurifier_HTMLModule_Hypertext() {
$this->info['a'] = new HTMLPurifier_ElementDef();
$this->info['a']->attr = array(
0 => array('Common'),
// 'accesskey' => 'Character',
// 'charset' => 'Charset',
'href' => 'URI',
//'hreflang' => 'LanguageCode',
//'rel' => 'LinkTypes',
//'rev' => 'LinkTypes',
//'tabindex' => 'Number',
//'type' => 'ContentType',
);
$this->info['a']->content_model = '#PCDATA | Inline';
$this->info['a']->content_model_type = 'optional';
$this->info['a']->excludes = array('a' => true);
}
}
?>

View File

@ -0,0 +1,38 @@
<?php
require_once 'HTMLPurifier/HTMLModule.php';
require_once 'HTMLPurifier/AttrDef/URI.php';
require_once 'HTMLPurifier/AttrTransform/ImgRequired.php';
/**
* XHTML 1.1 Image Module provides basic image embedding.
* @note There is specialized code for removing empty images in
* HTMLPurifier_Strategy_RemoveForeignElements
*/
class HTMLPurifier_HTMLModule_Image extends HTMLPurifier_HTMLModule
{
var $name = 'Image';
var $elements = array('img');
var $info = array();
var $content_sets = array('Inline' => 'img');
function HTMLPurifier_HTMLModule_Image() {
$this->info['img'] = new HTMLPurifier_ElementDef();
$this->info['img']->attr = array(
0 => array('Common'),
'alt' => 'Text',
'height' => 'Length',
'longdesc' => 'URI',
'src' => new HTMLPurifier_AttrDef_URI(true), // embedded
'width' => 'Length'
);
$this->info['img']->content_model_type = 'empty';
$this->info['img']->attr_transform_post[] =
new HTMLPurifier_AttrTransform_ImgRequired();
}
}
?>

View File

@ -0,0 +1,60 @@
<?php
/**
* XHTML 1.1 Legacy module defines elements that were previously
* deprecated.
*
* @note Not all legacy elements have been implemented yet, which
* is a bit of a reverse problem as compared to browsers! In
* addition, this legacy module may implement a bit more than
* mandated by XHTML 1.1.
*
* This module can be used in combination with TransformToStrict in order
* to transform as many deprecated elements as possible, but retain
* questionably deprecated elements that do not have good alternatives
* as well as transform elements that don't have an implementation.
* See docs/ref-strictness.txt for more details.
*/
class HTMLPurifier_HTMLModule_Legacy extends HTMLPurifier_HTMLModule
{
// incomplete
var $name = 'Legacy';
var $elements = array('u', 's', 'strike');
var $non_standalone_elements = array('li', 'ol', 'address', 'blockquote');
function HTMLPurifier_HTMLModule_Legacy() {
// setup new elements
foreach ($this->elements as $name) {
$this->info[$name] = new HTMLPurifier_ElementDef();
// for u, s, strike, as more elements get added, add
// conditionals as necessary
$this->info[$name]->content_model = 'Inline | #PCDATA';
$this->info[$name]->content_model_type = 'optional';
$this->info[$name]->attr[0] = array('Common');
}
// setup modifications to old elements
foreach ($this->non_standalone_elements as $name) {
$this->info[$name] = new HTMLPurifier_ElementDef();
$this->info[$name]->standalone = false;
}
$this->info['li']->attr['value'] = new HTMLPurifier_AttrDef_Integer();
$this->info['ol']->attr['start'] = new HTMLPurifier_AttrDef_Integer();
$this->info['address']->content_model = 'Inline | #PCDATA | p';
$this->info['address']->content_model_type = 'optional';
$this->info['address']->child = false;
$this->info['blockquote']->content_model = 'Flow | #PCDATA';
$this->info['blockquote']->content_model_type = 'optional';
$this->info['blockquote']->child = false;
}
}
?>

View File

@ -0,0 +1,46 @@
<?php
require_once 'HTMLPurifier/HTMLModule.php';
/**
* XHTML 1.1 List Module, defines list-oriented elements. Core Module.
*/
class HTMLPurifier_HTMLModule_List extends HTMLPurifier_HTMLModule
{
var $name = 'List';
var $elements = array('dl', 'dt', 'dd', 'ol', 'ul', 'li');
var $info = array();
// According to the abstract schema, the List content set is a fully formed
// one or more expr, but it invariably occurs in an optional declaration
// so we're not going to do that subtlety. It might cause trouble
// if a user defines "List" and expects that multiple lists are
// allowed to be specified, but then again, that's not very intuitive.
// Furthermore, the actual XML Schema may disagree. Regardless,
// we don't have support for such nested expressions without using
// the incredibly inefficient and draconic Custom ChildDef.
var $content_sets = array('List' => 'dl | ol | ul', 'Flow' => 'List');
function HTMLPurifier_HTMLModule_List() {
foreach ($this->elements as $element) {
$this->info[$element] = new HTMLPurifier_ElementDef();
$this->info[$element]->attr = array(0 => array('Common'));
if ($element == 'li' || $element == 'dd') {
$this->info[$element]->content_model = '#PCDATA | Flow';
$this->info[$element]->content_model_type = 'optional';
} elseif ($element == 'ol' || $element == 'ul') {
$this->info[$element]->content_model = 'li';
$this->info[$element]->content_model_type = 'required';
}
}
$this->info['dt']->content_model = '#PCDATA | Inline';
$this->info['dt']->content_model_type = 'optional';
$this->info['dl']->content_model = 'dt | dd';
$this->info['dl']->content_model_type = 'required';
// this could be a LOT more robust
$this->info['li']->auto_close = array('li' => true);
}
}
?>

View File

@ -0,0 +1,41 @@
<?php
require_once 'HTMLPurifier/HTMLModule.php';
/**
* XHTML 1.1 Presentation Module, defines simple presentation-related
* markup. Text Extension Module.
* @note The official XML Schema and DTD specs further divide this into
* two modules:
* - Block Presentation (hr)
* - Inline Presentation (b, big, i, small, sub, sup, tt)
* We have chosen not to heed this distinction, as content_sets
* provides satisfactory disambiguation.
*/
class HTMLPurifier_HTMLModule_Presentation extends HTMLPurifier_HTMLModule
{
var $name = 'Presentation';
var $elements = array('b', 'big', 'hr', 'i', 'small', 'sub', 'sup', 'tt');
var $info = array();
var $content_sets = array(
'Block' => 'hr',
'Inline' => 'b | big | i | small | sub | sup | tt'
);
function HTMLPurifier_HTMLModule_Presentation() {
foreach ($this->elements as $element) {
$this->info[$element] = new HTMLPurifier_ElementDef();
$this->info[$element]->attr = array(0 => array('Common'));
if ($element == 'hr') {
$this->info[$element]->content_model_type = 'empty';
} else {
$this->info[$element]->content_model = '#PCDATA | Inline';
$this->info[$element]->content_model_type = 'optional';
}
}
}
}
?>

View File

@ -0,0 +1,27 @@
<?php
require_once 'HTMLPurifier/HTMLModule.php';
require_once 'HTMLPurifier/AttrDef/CSS.php';
/**
* XHTML 1.1 Edit Module, defines editing-related elements. Text Extension
* Module.
*/
class HTMLPurifier_HTMLModule_StyleAttribute extends HTMLPurifier_HTMLModule
{
var $name = 'StyleAttribute';
var $attr_collections = array(
// The inclusion routine differs from the Abstract Modules but
// is in line with the DTD and XML Schemas.
'Style' => array('style' => false), // see constructor
'Core' => array(0 => array('Style'))
);
function HTMLPurifier_HTMLModule_StyleAttribute() {
$this->attr_collections['Style']['style'] = new HTMLPurifier_AttrDef_CSS();
}
}
?>

View File

@ -0,0 +1,88 @@
<?php
require_once 'HTMLPurifier/HTMLModule.php';
require_once 'HTMLPurifier/ChildDef/Table.php';
/**
* XHTML 1.1 Tables Module, fully defines accessible table elements.
*/
class HTMLPurifier_HTMLModule_Tables extends HTMLPurifier_HTMLModule
{
var $name = 'Tables';
var $elements = array('caption', 'table', 'td', 'th', 'tr', 'col',
'colgroup', 'tbody', 'thead', 'tfoot');
var $info = array();
var $content_sets = array('Block' => 'table');
function HTMLPurifier_HTMLModule_Tables() {
foreach ($this->elements as $e) {
$this->info[$e] = new HTMLPurifier_ElementDef();
$this->info[$e]->attr = array(0 => array('Common'));
$attr =& $this->info[$e]->attr;
if ($e == 'caption') continue;
if ($e == 'table'){
$attr['border'] = 'Pixels';
$attr['cellpadding'] = 'Length';
$attr['cellspacing'] = 'Length';
$attr['frame'] = new HTMLPurifier_AttrDef_Enum(array(
'void', 'above', 'below', 'hsides', 'lhs', 'rhs',
'vsides', 'box', 'border'
), false);
$attr['rules'] = new HTMLPurifier_AttrDef_Enum(array(
'none', 'groups', 'rows', 'cols', 'all'
), false);
$attr['summary'] = 'Text';
$attr['width'] = 'Length';
continue;
}
if ($e == 'col' || $e == 'colgroup') {
$attr['span'] = 'Number';
$attr['width'] = 'MultiLength';
}
if ($e == 'td' || $e == 'th') {
$attr['abbr'] = 'Text';
$attr['colspan'] = 'Number';
$attr['rowspan'] = 'Number';
}
$attr['align'] = new HTMLPurifier_AttrDef_Enum(array(
'left', 'center', 'right', 'justify', 'char'
), false);
$attr['valign'] = new HTMLPurifier_AttrDef_Enum(array(
'top', 'middle', 'bottom', 'baseline'
), false);
$attr['charoff'] = 'Length';
}
$this->info['caption']->content_model = '#PCDATA | Inline';
$this->info['caption']->content_model_type = 'optional';
// Is done directly because it doesn't leverage substitution
// mechanisms. True model is:
// 'caption?, ( col* | colgroup* ), (( thead?, tfoot?, tbody+ ) | ( tr+ ))'
$this->info['table']->child = new HTMLPurifier_ChildDef_Table();
$this->info['td']->content_model =
$this->info['th']->content_model = '#PCDATA | Flow';
$this->info['td']->content_model_type =
$this->info['th']->content_model_type = 'optional';
$this->info['tr']->content_model = 'td | th';
$this->info['tr']->content_model_type = 'required';
$this->info['col']->content_model_type = 'empty';
$this->info['colgroup']->content_model = 'col';
$this->info['colgroup']->content_model_type = 'optional';
$this->info['tbody']->content_model =
$this->info['thead']->content_model =
$this->info['tfoot']->content_model = 'tr';
$this->info['tbody']->content_model_type =
$this->info['thead']->content_model_type =
$this->info['tfoot']->content_model_type = 'required';
}
}
?>

View File

@ -0,0 +1,78 @@
<?php
require_once 'HTMLPurifier/HTMLModule.php';
/**
* XHTML 1.1 Text Module, defines basic text containers. Core Module.
* @note In the normative XML Schema specification, this module
* is further abstracted into the following modules:
* - Block Phrasal (address, blockquote, pre, h1, h2, h3, h4, h5, h6)
* - Block Structural (div, p)
* - Inline Phrasal (abbr, acronym, cite, code, dfn, em, kbd, q, samp, strong, var)
* - Inline Structural (br, span)
* We have elected not to follow suite, but this may change.
*/
class HTMLPurifier_HTMLModule_Text extends HTMLPurifier_HTMLModule
{
var $name = 'Text';
var $elements = array('abbr', 'acronym', 'address', 'blockquote',
'br', 'cite', 'code', 'dfn', 'div', 'em', 'h1', 'h2', 'h3',
'h4', 'h5', 'h6', 'kbd', 'p', 'pre', 'q', 'samp', 'span', 'strong',
'var');
var $info = array();
var $content_sets = array(
'Heading' => 'h1 | h2 | h3 | h4 | h5 | h6',
'Block' => 'address | blockquote | div | p | pre',
'Inline' => 'abbr | acronym | br | cite | code | dfn | em | kbd | q | samp | span | strong | var',
'Flow' => 'Heading | Block | Inline'
);
function HTMLPurifier_HTMLModule_Text() {
foreach ($this->elements as $element) {
$this->info[$element] = new HTMLPurifier_ElementDef();
// attributes
if ($element == 'br') {
$this->info[$element]->attr = array(0 => array('Core'));
} elseif ($element == 'blockquote' || $element == 'q') {
$this->info[$element]->attr = array(0 => array('Common'), 'cite' => 'URI');
} else {
$this->info[$element]->attr = array(0 => array('Common'));
}
// content models
if ($element == 'br') {
$this->info[$element]->content_model_type = 'empty';
} elseif ($element == 'blockquote') {
$this->info[$element]->content_model = 'Heading | Block | List';
$this->info[$element]->content_model_type = 'optional';
} elseif ($element == 'div') {
$this->info[$element]->content_model = '#PCDATA | Flow';
$this->info[$element]->content_model_type = 'optional';
} else {
$this->info[$element]->content_model = '#PCDATA | Inline';
$this->info[$element]->content_model_type = 'optional';
}
}
// SGML permits exclusions for all descendants, but this is
// not possible with DTDs or XML Schemas. W3C has elected to
// use complicated compositions of content_models to simulate
// exclusion for children, but we go the simpler, SGML-style
// route of flat-out exclusions. Note that the Abstract Module
// is blithely unaware of such distinctions.
$this->info['pre']->excludes = array_flip(array(
'img', 'big', 'small',
'object', 'applet', 'font', 'basefont' // generally not allowed
));
$this->info['p']->auto_close = array_flip(array(
'address', 'blockquote', 'dd', 'dir', 'div', 'dl', 'dt',
'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'ol', 'p', 'pre',
'table', 'ul'
));
}
}
?>

View File

@ -0,0 +1,86 @@
<?php
require_once 'HTMLPurifier/ChildDef/StrictBlockquote.php';
require_once 'HTMLPurifier/TagTransform/Simple.php';
require_once 'HTMLPurifier/TagTransform/Center.php';
require_once 'HTMLPurifier/TagTransform/Font.php';
require_once 'HTMLPurifier/AttrTransform/Lang.php';
require_once 'HTMLPurifier/AttrTransform/TextAlign.php';
/**
* Proprietary module that transforms deprecated elements into Strict
* HTML (see HTML 4.01 and XHTML 1.0) when possible.
*/
class HTMLPurifier_HTMLModule_TransformToStrict extends HTMLPurifier_HTMLModule
{
var $name = 'TransformToStrict';
// we're actually modifying these elements, not defining them
var $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'blockquote');
var $info_tag_transform = array(
// placeholders, see constructor for definitions
'font' => false,
'menu' => false,
'dir' => false,
'center'=> false
);
var $attr_collections = array(
'Lang' => array(
'lang' => false // placeholder
)
);
var $info_attr_transform_post = array(
'lang' => false // placeholder
);
function HTMLPurifier_HTMLModule_TransformToStrict() {
// deprecated tag transforms
$this->info_tag_transform['font'] = new HTMLPurifier_TagTransform_Font();
$this->info_tag_transform['menu'] = new HTMLPurifier_TagTransform_Simple('ul');
$this->info_tag_transform['dir'] = new HTMLPurifier_TagTransform_Simple('ul');
$this->info_tag_transform['center'] = new HTMLPurifier_TagTransform_Center();
foreach ($this->elements as $name) {
$this->info[$name] = new HTMLPurifier_ElementDef();
$this->info[$name]->standalone = false;
}
// deprecated attribute transforms
$this->info['h1']->attr_transform_pre['align'] =
$this->info['h2']->attr_transform_pre['align'] =
$this->info['h3']->attr_transform_pre['align'] =
$this->info['h4']->attr_transform_pre['align'] =
$this->info['h5']->attr_transform_pre['align'] =
$this->info['h6']->attr_transform_pre['align'] =
$this->info['p'] ->attr_transform_pre['align'] =
new HTMLPurifier_AttrTransform_TextAlign();
// xml:lang <=> lang mirroring, implement in TransformToStrict,
// this is overridden in TransformToXHTML11
$this->info_attr_transform_post['lang'] = new HTMLPurifier_AttrTransform_Lang();
$this->attr_collections['Lang']['lang'] = new HTMLPurifier_AttrDef_Lang();
// this should not be applied to XHTML 1.0 Transitional, ONLY
// XHTML 1.0 Strict. We may need three classes
$this->info['blockquote']->content_model_type = 'strictblockquote';
$this->info['blockquote']->child = false; // recalculate please!
}
var $defines_child_def = true;
function getChildDef($def) {
if ($def->content_model_type != 'strictblockquote') return false;
return new HTMLPurifier_ChildDef_StrictBlockquote($def->content_model);
}
}
?>

View File

@ -0,0 +1,30 @@
<?php
/**
* Proprietary module that transforms XHTML 1.0 deprecated aspects into
* XHTML 1.1 compliant ones, when possible. For maximum effectiveness,
* HTMLPurifier_HTMLModule_TransformToStrict must also be loaded
* (otherwise, elements that were deprecated from Transitional to Strict
* will not be transformed).
*
* XHTML 1.1 compliant document are automatically XHTML 1.0 compliant too,
* although they may not be as friendly to legacy browsers.
*/
class HTMLPurifier_HTMLModule_TransformToXHTML11 extends HTMLPurifier_HTMLModule
{
var $name = 'TransformToXHTML11';
var $attr_collections = array(
'Lang' => array(
'lang' => false // remove it
)
);
var $info_attr_transform_post = array(
'lang' => false // remove it
);
}
?>