1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-07-10 01:06:20 +02:00

[1.7.0] Fix bug in HTMLPurifier_Harness that causes certain aspects of $input to change after parsing

- Add makeLookup() convenience function to HTMLModule
- Relocate SGML exclusion comment
- Add preliminary Bdo module test

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1049 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang
2007-05-13 03:42:09 +00:00
parent bda9167423
commit da92cb9ff4
8 changed files with 145 additions and 49 deletions

View File

@ -10,65 +10,61 @@ require_once 'HTMLPurifier/HTMLModule.php';
* - Block Structural (div, p)
* - Inline Phrasal (abbr, acronym, cite, code, dfn, em, kbd, q, samp, strong, var)
* - Inline Structural (br, span)
* We have elected not to follow suite, but this may change.
* This module, functionally, does not distinguish between these
* sub-modules, but the code is internally structured to reflect
* these distinctions.
*/
class HTMLPurifier_HTMLModule_Text extends HTMLPurifier_HTMLModule
{
var $name = 'Text';
var $elements = array('abbr', 'acronym', 'address', 'blockquote',
'br', 'cite', 'code', 'dfn', 'div', 'em', 'h1', 'h2', 'h3',
'h4', 'h5', 'h6', 'kbd', 'p', 'pre', 'q', 'samp', 'span', 'strong',
'var');
var $content_sets = array(
'Heading' => 'h1 | h2 | h3 | h4 | h5 | h6',
'Block' => 'address | blockquote | div | p | pre',
'Inline' => 'abbr | acronym | br | cite | code | dfn | em | kbd | q | samp | span | strong | var',
'Flow' => 'Heading | Block | Inline'
);
function HTMLPurifier_HTMLModule_Text() {
foreach ($this->elements as $element) {
$this->info[$element] = new HTMLPurifier_ElementDef();
// attributes
if ($element == 'br') {
$this->info[$element]->attr = array(0 => array('Core'));
} elseif ($element == 'blockquote' || $element == 'q') {
$this->info[$element]->attr = array(0 => array('Common'), 'cite' => 'URI');
} else {
$this->info[$element]->attr = array(0 => array('Common'));
}
// content models
if ($element == 'br') {
$this->info[$element]->content_model_type = 'empty';
} elseif ($element == 'blockquote') {
$this->info[$element]->content_model = 'Heading | Block | List';
$this->info[$element]->content_model_type = 'optional';
} elseif ($element == 'div') {
$this->info[$element]->content_model = '#PCDATA | Flow';
$this->info[$element]->content_model_type = 'optional';
} else {
$this->info[$element]->content_model = '#PCDATA | Inline';
$this->info[$element]->content_model_type = 'optional';
}
}
// SGML permits exclusions for all descendants, but this is
// not possible with DTDs or XML Schemas. W3C has elected to
// use complicated compositions of content_models to simulate
// exclusion for children, but we go the simpler, SGML-style
// route of flat-out exclusions. Note that the Abstract Module
// is blithely unaware of such distinctions.
$this->info['pre']->excludes = array_flip(array(
'img', 'big', 'small',
'object', 'applet', 'font', 'basefont' // generally not allowed
));
$this->info['p']->auto_close = array_flip(array(
// Inline Phrasal -------------------------------------------------
$this->addElement('abbr', true, 'Inline', 'Inline', 'Common');
$this->addElement('acronym', true, 'Inline', 'Inline', 'Common');
$this->addElement('cite', true, 'Inline', 'Inline', 'Common');
$this->addElement('code', true, 'Inline', 'Inline', 'Common');
$this->addElement('dfn', true, 'Inline', 'Inline', 'Common');
$this->addElement('em', true, 'Inline', 'Inline', 'Common');
$this->addElement('kbd', true, 'Inline', 'Inline', 'Common');
$this->addElement('q', true, 'Inline', 'Inline', 'Common', array('cite' => 'URI'));
$this->addElement('samp', true, 'Inline', 'Inline', 'Common');
$this->addElement('strong', true, 'Inline', 'Inline', 'Common');
$this->addElement('var', true, 'Inline', 'Inline', 'Common');
// Inline Structural ----------------------------------------------
$this->addElement('span', true, 'Inline', 'Inline', 'Common');
$this->addElement('br', true, 'Inline', 'Empty', 'Core');
// Block Phrasal --------------------------------------------------
$this->addElement('address', true, 'Block', 'Inline', 'Common');
$this->addElement('blockquote', true, 'Block',
'Optional: Heading | Block | List', 'Common', array('cite' => 'URI') );
$pre =& $this->addElement('pre', true, 'Block', 'Inline', 'Common');
$pre->excludes = $this->makeLookup(
'img', 'big', 'small', 'object', 'applet', 'font', 'basefont' );
$this->addElement('h1', true, 'Heading', 'Inline', 'Common');
$this->addElement('h2', true, 'Heading', 'Inline', 'Common');
$this->addElement('h3', true, 'Heading', 'Inline', 'Common');
$this->addElement('h4', true, 'Heading', 'Inline', 'Common');
$this->addElement('h5', true, 'Heading', 'Inline', 'Common');
$this->addElement('h6', true, 'Heading', 'Inline', 'Common');
// Block Structural -----------------------------------------------
$p =& $this->addElement('p', true, 'Block', 'Inline', 'Common');
// this seems really ad hoc: implementing some general
// heuristics would probably be better
$p->auto_close = $this->makeLookup(
'address', 'blockquote', 'dd', 'dir', 'div', 'dl', 'dt',
'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'ol', 'p', 'pre',
'table', 'ul'
));
'table', 'ul' );
$this->addElement('div', true, 'Block', 'Flow', 'Common');
}
}