mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-08-03 12:47:56 +02:00
Compare commits
41 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
64d8ca9831 | ||
|
d7642b8c70 | ||
|
3b30c2ca5b | ||
|
f43616f72d | ||
|
6740ba61af | ||
|
6a33945499 | ||
|
4660791682 | ||
|
b5c69d8ca5 | ||
|
e440f25bce | ||
|
665e80d223 | ||
|
69747ede8a | ||
|
49b3832ebf | ||
|
a365d4c688 | ||
|
7038fad788 | ||
|
50b272d75e | ||
|
bfb642d32c | ||
|
edb39601c7 | ||
|
694139d3bb | ||
|
81721ded5c | ||
|
371fb7c3d2 | ||
|
9e6953e619 | ||
|
2299f0c831 | ||
|
9dd4dcb27a | ||
|
aa0838492e | ||
|
df075c96e0 | ||
|
fbaa909d25 | ||
|
967f40fc11 | ||
|
5ee6ffe20f | ||
|
10d41d7130 | ||
|
65a628bcb7 | ||
|
a5b4ed2126 | ||
|
d20bbd8db3 | ||
|
b99573223d | ||
|
c6cfb68713 | ||
|
2259bfa40e | ||
|
de3b2b70fb | ||
|
4f0a5c0e22 | ||
|
fdd583253c | ||
|
a4be6ffe4d | ||
|
6de42d8d1d | ||
|
e9a519e589 |
4
Doxyfile
4
Doxyfile
@@ -3,8 +3,8 @@
|
||||
#---------------------------------------------------------------------------
|
||||
# Project related configuration options
|
||||
#---------------------------------------------------------------------------
|
||||
PROJECT_NAME = HTMLPurifier
|
||||
PROJECT_NUMBER = trunk
|
||||
PROJECT_NAME = HTML Purifier
|
||||
PROJECT_NUMBER = 1.0.0
|
||||
OUTPUT_DIRECTORY = "C:/Documents and Settings/Edward/My Documents/My Webs/htmlpurifier/docs/doxygen"
|
||||
CREATE_SUBDIRS = NO
|
||||
OUTPUT_LANGUAGE = English
|
||||
|
69
INSTALL
69
INSTALL
@@ -17,7 +17,7 @@ these versions:
|
||||
- 4.3.9, 4.3.11
|
||||
- 4.4.0, 4.4.4
|
||||
- 5.0.0, 5.0.4
|
||||
- 5.1.0, 5.1.5
|
||||
- 5.1.0, 5.1.6
|
||||
|
||||
And can confidently say that HTML Purifier should work in all versions
|
||||
between and afterwards. HTML Purifier definitely does not support PHP 4.2,
|
||||
@@ -26,7 +26,7 @@ any earlier versions.
|
||||
|
||||
I have been unable to get PHP 5.0.5 working on my computer, so if someone
|
||||
wants to test that, be my guest. All tests were done on Windows XP Home,
|
||||
but operating system is quite irrelevant in this particular case.
|
||||
but operating system should not be a major factor in the library.
|
||||
|
||||
|
||||
|
||||
@@ -35,7 +35,8 @@ but operating system is quite irrelevant in this particular case.
|
||||
The library/ directory must be added to your path: HTML Purifier will not be
|
||||
able to find the necessary includes otherwise. This is as simple as:
|
||||
|
||||
set_include_path('/path/to/htmlpurifier/library' . PATH_SEPARATOR . get_include_path());
|
||||
set_include_path('/path/to/htmlpurifier/library' . PATH_SEPARATOR .
|
||||
get_include_path() );
|
||||
|
||||
...replacing /path/to/htmlpurifier with the actual location of the folder. Don't
|
||||
worry, HTML Purifier is namespaced so unless you have another file named
|
||||
@@ -58,19 +59,46 @@ is a (short) checklist:
|
||||
* Have I specified XHTML 1.0 Transitional as the doctype?
|
||||
* Have I specified UTF-8 as the character encoding?
|
||||
|
||||
To find out what these are, browse to your website and view its source code.
|
||||
You can figure out the doctype from the a declaration that looks like
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
or no doctype. You can figure out the character encoding by looking for
|
||||
<meta http-equiv="Content-type" content="text/html;charset=ENCODING">
|
||||
|
||||
I cannot stress the importance of these two bullets enough. Omitting either
|
||||
of them could have dire consequences not only for security but for plain
|
||||
old usability. You can find a more in-depth discussion of why this is needed
|
||||
in docs/security.txt, in the meantime, try to change your output so this is
|
||||
the case.
|
||||
the case. If you can't, well, we might be able to accomodate you (read
|
||||
section 3).
|
||||
|
||||
|
||||
|
||||
3. Configuring HTML Purifier
|
||||
|
||||
HTML Purifier is designed to run out-of-the-box, but occasionally HTML
|
||||
Purifier needs to be told what to do.
|
||||
|
||||
If, for some reason, you are unable to switch to UTF-8 immediately, you can
|
||||
switch HTML Purifier's encoding. Note that the availability of encodings is
|
||||
dependent on iconv, and you'll be missing characters if the charset you
|
||||
choose doesn't have them.
|
||||
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('Core', 'Encoding', $encoding);
|
||||
$config->set('Core', 'Encoding', /* put your encoding here */);
|
||||
|
||||
An example usage for Latin-1 websites:
|
||||
|
||||
$config->set('Core', 'Encoding', 'ISO-8859-1');
|
||||
|
||||
For those of you stuck using HTML 4.01 Transitional, you can disable
|
||||
XHTML output like this:
|
||||
|
||||
$config->set('Core', 'XHTML', false);
|
||||
|
||||
However, I strongly recommend that you use XHTML. Currently, we can only
|
||||
guarantee transitional-complaint output, future versions will also allow strict
|
||||
output.
|
||||
|
||||
|
||||
|
||||
@@ -88,3 +116,32 @@ Or, if you're using the configuration object:
|
||||
|
||||
That's it. For more examples, check out docs/examples/. Also, SLOW gives
|
||||
advice on what to do if HTML Purifier is slowing down your application.
|
||||
|
||||
|
||||
|
||||
4. Quick install
|
||||
|
||||
If your website is in UTF-8 and XHTML Transitional, use this code:
|
||||
|
||||
<?php
|
||||
set_include_path('/path/to/htmlpurifier/library'
|
||||
. PATH_SEPARATOR . get_include_path() );
|
||||
require_once 'HTMLPurifier.php';
|
||||
$purifier = new HTMLPurifier();
|
||||
|
||||
$clean_html = $purifier->purify($dirty_html);
|
||||
|
||||
If your website is in a different encoding or doctype, use this code:
|
||||
|
||||
<?php
|
||||
set_include_path('/path/to/htmlpurifier/library'
|
||||
. PATH_SEPARATOR . get_include_path() );
|
||||
require_once 'HTMLPurifier.php';
|
||||
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('Core', 'Encoding', 'ISO-8859-1'); //replace with your encoding
|
||||
$config->set('Core', 'XHTML', true); //replace with false if HTML 4.01
|
||||
$purifier = new HTMLPurifier($config);
|
||||
|
||||
$clean_html = $purifier->purify($dirty_html);
|
||||
?>
|
22
NEWS
22
NEWS
@@ -1,6 +1,28 @@
|
||||
NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
||||
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||
|
||||
1.2.0, unknown projected release date
|
||||
(major feature release)
|
||||
|
||||
1.1.1, unknown projected release date
|
||||
(bugfix release)
|
||||
|
||||
1.1.0, released 2006-09-16
|
||||
- Made URI validator more forgiving: will ignore leading and trailing
|
||||
quotes, apostrophes and less than or greater than signs.
|
||||
- Enforce alphanumeric namespace and directive names for configuration.
|
||||
- Directive documentation generation using XSLT
|
||||
- Table child definition made more flexible, will fix up poorly ordered elements
|
||||
- XHTML generation can now be turned off, allowing things like <br>
|
||||
- Renamed ConfigDef to ConfigSchema
|
||||
|
||||
1.0.1, released 2006-09-04
|
||||
- Fixed slight bug in DOMLex attribute parsing
|
||||
- Fixed rejection of case-insensitive configuration values when there is a
|
||||
set of allowed values. This manifested in %Core.Encoding.
|
||||
- Fixed rejection of inline style declarations that had lots of extra
|
||||
space in them. This manifested in TinyMCE.
|
||||
|
||||
1.0.0, released 2006-09-01
|
||||
- Fixed broken numeric entity conversion
|
||||
- Malformed UTF-8 and non-SGML character detection and cleaning implemented
|
||||
|
10
TODO
10
TODO
@@ -5,17 +5,15 @@ Ongoing
|
||||
- Lots of profiling, make it faster!
|
||||
- Plugins for major CMSes (very tricky issue)
|
||||
|
||||
1.1 release
|
||||
- Directive documentation generation
|
||||
- Rewrite table's child definition to be faster, smart, and regexp free
|
||||
- Allow HTML 4.01 output (cosmetic changes to the generator)
|
||||
|
||||
1.2 release
|
||||
- Additional support for poorly written HTML
|
||||
- Implement all non-essential attribute transforms
|
||||
- Microsoft Word HTML cleaning (i.e. MsoNormal)
|
||||
|
||||
1.3 release
|
||||
- Formatters for plaintext
|
||||
- Auto-paragraphing (be sure to leverage fact that we know when things
|
||||
shouldn't be paragraphed, such as lists and tables).
|
||||
- Make URI validation routines tighter (especially mailto)
|
||||
- More extensive URI filtering schemes
|
||||
- Allow for background-image and list-style-image (see above)
|
||||
@@ -30,8 +28,6 @@ Ongoing
|
||||
- Extended HTML capabilities based on namespacing and tag transforms
|
||||
- Hooks for adding custom processors to custom namespaced tags and
|
||||
attributes, offer default implementation
|
||||
- Auto-paragraphing (be sure to leverage fact that we know when things
|
||||
shouldn't be paragraphed, such as lists and tables).
|
||||
- Lots of documentation and samples
|
||||
|
||||
Unknown release (on a scratch-an-itch basis)
|
||||
|
7
WYSIWYG
7
WYSIWYG
@@ -1,6 +1,6 @@
|
||||
|
||||
WYSIWYG - What You See Is What You Get
|
||||
HTMLPurifier: A Pretty Good Fit for TinyMCE and FCKeditor
|
||||
HTML Purifier: A Pretty Good Fit for TinyMCE and FCKeditor
|
||||
|
||||
Javascript-based WYSIWYG editors, simply stated, are quite amazing. But I've
|
||||
always been wary about using them due to security issues: they handle the
|
||||
@@ -13,6 +13,9 @@ other markup languages still reign supreme. Put simply: filtering HTML is
|
||||
hard work, and these WYSIWYG authors don't offer anything to alleviate that
|
||||
trouble. Therein lies the solution:
|
||||
|
||||
HTMLPurifier is perfect for filtering pure-HTML input from WYSIWYG editors.
|
||||
HTML Purifier is perfect for filtering pure-HTML input from WYSIWYG editors.
|
||||
|
||||
Enough said.
|
||||
|
||||
There is a proof-of-concept integration of HTML Purifier with the Mantis
|
||||
bugtracker at http://hp.jpsband.org/mantis/
|
||||
|
214
configdoc/generate.php
Normal file
214
configdoc/generate.php
Normal file
@@ -0,0 +1,214 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Generates XML and HTML documents describing configuration.
|
||||
*/
|
||||
|
||||
/*
|
||||
TODO:
|
||||
- make XML format richer (see below)
|
||||
- extend XSLT transformation (see the corresponding XSLT file)
|
||||
- allow generation of packaged docs that can be easily moved
|
||||
- multipage documentation
|
||||
- determine how to multilingualize
|
||||
- factor out code into classes
|
||||
- generate a table of contents
|
||||
*/
|
||||
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Check and configure environment
|
||||
|
||||
if (version_compare('5', PHP_VERSION, '>')) exit('Requires PHP 5 or higher.');
|
||||
error_reporting(E_ALL);
|
||||
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Include HTML Purifier library
|
||||
|
||||
set_include_path('../library' . PATH_SEPARATOR . get_include_path());
|
||||
require_once 'HTMLPurifier.php';
|
||||
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Setup convenience functions
|
||||
|
||||
function appendHTMLDiv($document, $node, $html) {
|
||||
global $purifier;
|
||||
$html = $purifier->purify($html);
|
||||
$dom_html = $document->createDocumentFragment();
|
||||
$dom_html->appendXML($html);
|
||||
|
||||
$dom_div = $document->createElement('div');
|
||||
$dom_div->setAttribute('xmlns', 'http://www.w3.org/1999/xhtml');
|
||||
$dom_div->appendChild($dom_html);
|
||||
|
||||
$node->appendChild($dom_div);
|
||||
}
|
||||
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Load copies of HTMLPurifier_ConfigDef and HTMLPurifier
|
||||
|
||||
$definition = HTMLPurifier_ConfigDef::instance();
|
||||
$purifier = new HTMLPurifier();
|
||||
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Generate types.xml, a document describing the constraint "type"
|
||||
|
||||
$types_document = new DOMDocument('1.0', 'UTF-8');
|
||||
$types_root = $types_document->createElement('types');
|
||||
$types_document->appendChild($types_root);
|
||||
$types_document->formatOutput = true;
|
||||
foreach ($definition->types as $name => $expanded_name) {
|
||||
$types_type = $types_document->createElement('type', $expanded_name);
|
||||
$types_type->setAttribute('id', $name);
|
||||
$types_root->appendChild($types_type);
|
||||
}
|
||||
$types_document->save('types.xml');
|
||||
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Generate configdoc.xml, a document documenting configuration directives
|
||||
|
||||
$dom_document = new DOMDocument('1.0', 'UTF-8');
|
||||
$dom_root = $dom_document->createElement('configdoc');
|
||||
$dom_document->appendChild($dom_root);
|
||||
$dom_document->formatOutput = true;
|
||||
|
||||
// add the name of the application
|
||||
$dom_root->appendChild($dom_document->createElement('title', 'HTML Purifier'));
|
||||
|
||||
/*
|
||||
TODO for XML format:
|
||||
- namespace descriptions
|
||||
- enumerated values
|
||||
- default values
|
||||
- create a definition (DTD or other) once interface stabilizes
|
||||
*/
|
||||
|
||||
foreach($definition->info as $namespace_name => $namespace_info) {
|
||||
|
||||
$dom_namespace = $dom_document->createElement('namespace');
|
||||
$dom_root->appendChild($dom_namespace);
|
||||
|
||||
$dom_namespace->setAttribute('id', $namespace_name);
|
||||
$dom_namespace->appendChild(
|
||||
$dom_document->createElement('name', $namespace_name)
|
||||
);
|
||||
$dom_namespace_description = $dom_document->createElement('description');
|
||||
$dom_namespace->appendChild($dom_namespace_description);
|
||||
appendHTMLDiv($dom_document, $dom_namespace_description,
|
||||
$definition->info_namespace[$namespace_name]->description);
|
||||
|
||||
foreach ($namespace_info as $name => $info) {
|
||||
|
||||
$dom_directive = $dom_document->createElement('directive');
|
||||
$dom_namespace->appendChild($dom_directive);
|
||||
|
||||
$dom_directive->setAttribute('id', $namespace_name . '.' . $name);
|
||||
$dom_directive->appendChild(
|
||||
$dom_document->createElement('name', $name)
|
||||
);
|
||||
|
||||
$dom_constraints = $dom_document->createElement('constraints');
|
||||
$dom_directive->appendChild($dom_constraints);
|
||||
|
||||
$dom_constraints->appendChild(
|
||||
$dom_document->createElement('type', $info->type)
|
||||
);
|
||||
if ($info->allowed !== true) {
|
||||
$dom_allowed = $dom_document->createElement('allowed');
|
||||
$dom_constraints->appendChild($dom_allowed);
|
||||
foreach ($info->allowed as $allowed => $bool) {
|
||||
$dom_allowed->appendChild(
|
||||
$dom_document->createElement('value', $allowed)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
$raw_default = $definition->defaults[$namespace_name][$name];
|
||||
if (is_bool($raw_default)) {
|
||||
$default = $raw_default ? 'true' : 'false';
|
||||
} elseif (is_string($raw_default)) {
|
||||
$default = "\"$raw_default\"";
|
||||
} else {
|
||||
$default = print_r(
|
||||
$definition->defaults[$namespace_name][$name], true
|
||||
);
|
||||
}
|
||||
$dom_constraints->appendChild(
|
||||
$dom_document->createElement('default', $default)
|
||||
);
|
||||
|
||||
$dom_descriptions = $dom_document->createElement('descriptions');
|
||||
$dom_directive->appendChild($dom_descriptions);
|
||||
|
||||
foreach ($info->descriptions as $file => $file_descriptions) {
|
||||
foreach ($file_descriptions as $line => $description) {
|
||||
$dom_description = $dom_document->createElement('description');
|
||||
$dom_description->setAttribute('file', $file);
|
||||
$dom_description->setAttribute('line', $line);
|
||||
appendHTMLDiv($dom_document, $dom_description, $description);
|
||||
$dom_descriptions->appendChild($dom_description);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// print_r($dom_document->saveXML());
|
||||
|
||||
// save a copy of the raw XML
|
||||
$dom_document->save('configdoc.xml');
|
||||
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Generate final output using XSLT
|
||||
|
||||
// load the stylesheet
|
||||
$xsl_stylesheet_name = 'plain';
|
||||
$xsl_stylesheet = "styles/$xsl_stylesheet_name.xsl";
|
||||
$xsl_dom_stylesheet = new DOMDocument();
|
||||
$xsl_dom_stylesheet->load($xsl_stylesheet);
|
||||
|
||||
// setup the XSLT processor
|
||||
$xsl_processor = new XSLTProcessor();
|
||||
|
||||
// perform the transformation
|
||||
$xsl_processor->importStylesheet($xsl_dom_stylesheet);
|
||||
$html_output = $xsl_processor->transformToXML($dom_document);
|
||||
|
||||
// some slight fudges to preserve backwards compatibility
|
||||
$html_output = str_replace('/>', ' />', $html_output); // <br /> not <br>
|
||||
$html_output = str_replace(' xmlns=""', '', $html_output); // rm unnecessary xmlns
|
||||
|
||||
if (class_exists('Tidy')) {
|
||||
// cleanup output
|
||||
$config = array(
|
||||
'indent' => true,
|
||||
'output-xhtml' => true,
|
||||
'wrap' => 80
|
||||
);
|
||||
$tidy = new Tidy;
|
||||
$tidy->parseString($html_output, $config, 'utf8');
|
||||
$tidy->cleanRepair();
|
||||
$html_output = (string) $tidy;
|
||||
}
|
||||
|
||||
// write it to a file (todo: parse into seperate pages)
|
||||
file_put_contents("$xsl_stylesheet_name.html", $html_output);
|
||||
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Output for instant feedback
|
||||
|
||||
if (php_sapi_name() != 'cli') {
|
||||
echo $html_output;
|
||||
} else {
|
||||
echo 'Files generated successfully.';
|
||||
}
|
||||
|
||||
?>
|
7
configdoc/styles/plain.css
Normal file
7
configdoc/styles/plain.css
Normal file
@@ -0,0 +1,7 @@
|
||||
table {border-collapse:collapse;}
|
||||
table td, table th {padding:0.2em;}
|
||||
|
||||
table.constraints {margin:0 0 1em;}
|
||||
table.constraints th {text-align:left;padding-left:0.4em;}
|
||||
table.constraints td {padding-right:0.4em;}
|
||||
table.constraints td pre {margin:0;}
|
105
configdoc/styles/plain.xsl
Normal file
105
configdoc/styles/plain.xsl
Normal file
@@ -0,0 +1,105 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<xsl:stylesheet
|
||||
version = "1.0"
|
||||
xmlns = "http://www.w3.org/1999/xhtml"
|
||||
xmlns:xsl = "http://www.w3.org/1999/XSL/Transform"
|
||||
>
|
||||
<xsl:output
|
||||
method = "xml"
|
||||
encoding = "UTF-8"
|
||||
doctype-public = "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||
doctype-system = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
|
||||
indent = "no"
|
||||
media-type = "text/html"
|
||||
/>
|
||||
|
||||
<xsl:variable name="typeLookup" select="document('../types.xml')" />
|
||||
|
||||
<xsl:template match="/">
|
||||
<html lang="en" xml:lang="en">
|
||||
<head>
|
||||
<title><xsl:value-of select="/configdoc/title" /> Configuration Documentation</title>
|
||||
<meta http-equiv="Content-Type" content="text/html;charset=UTF-8" />
|
||||
<link rel="stylesheet" type="text/css" href="styles/plain.css" />
|
||||
</head>
|
||||
<body>
|
||||
<xsl:apply-templates />
|
||||
</body>
|
||||
</html>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="title">
|
||||
<h1><xsl:value-of select="/configdoc/title" /> Configuration Documentation</h1>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="namespace">
|
||||
<xsl:apply-templates />
|
||||
<xsl:if test="count(child::directive)=0">
|
||||
<p>No configuration directives defined for this namespace.</p>
|
||||
</xsl:if>
|
||||
</xsl:template>
|
||||
<xsl:template match="namespace/name">
|
||||
<h2 id="{../@id}"><xsl:value-of select="text()" /></h2>
|
||||
</xsl:template>
|
||||
<xsl:template match="namespace/description">
|
||||
<div class="description">
|
||||
<xsl:copy-of select="div/node()" />
|
||||
</div>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="directive">
|
||||
<xsl:apply-templates />
|
||||
</xsl:template>
|
||||
<xsl:template match="directive/name">
|
||||
<h3 id="{../@id}"><xsl:value-of select="text()" /></h3>
|
||||
</xsl:template>
|
||||
<xsl:template match="directive/constraints">
|
||||
<table class="constraints">
|
||||
<xsl:apply-templates />
|
||||
<!-- Calculated other values -->
|
||||
<tr>
|
||||
<th>Used by:</th>
|
||||
<td>
|
||||
<xsl:for-each select="../descriptions/description">
|
||||
<xsl:if test="position()>1">, </xsl:if>
|
||||
<xsl:value-of select="@file" />
|
||||
</xsl:for-each>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
</xsl:template>
|
||||
<xsl:template match="directive//description">
|
||||
<div class="description">
|
||||
<xsl:copy-of select="div/node()" />
|
||||
</div>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="constraints/type">
|
||||
<tr>
|
||||
<th>Type:</th>
|
||||
<td>
|
||||
<xsl:variable name="type" select="text()" />
|
||||
<xsl:attribute name="class">type type-<xsl:value-of select="$type" /></xsl:attribute>
|
||||
<xsl:value-of select="$typeLookup/types/type[@id=$type]/text()" />
|
||||
</td>
|
||||
</tr>
|
||||
</xsl:template>
|
||||
<xsl:template match="constraints/allowed">
|
||||
<tr>
|
||||
<th>Allowed values:</th>
|
||||
<td>
|
||||
<xsl:for-each select="value"><!--
|
||||
--><xsl:if test="position()>1">, </xsl:if>
|
||||
"<xsl:value-of select="." />"<!--
|
||||
--></xsl:for-each>
|
||||
</td>
|
||||
</tr>
|
||||
</xsl:template>
|
||||
<xsl:template match="constraints/default">
|
||||
<tr>
|
||||
<th>Default:</th>
|
||||
<td><pre><xsl:value-of select="." xml:space="preserve" /></pre></td>
|
||||
</tr>
|
||||
</xsl:template>
|
||||
|
||||
</xsl:stylesheet>
|
@@ -24,8 +24,7 @@ AttrDef
|
||||
Number - constructor interface is inconsistent with Integer
|
||||
AttrTransform - doesn't accept AttrContext, non-validating
|
||||
ChildDef - not-allowed nodes translated to text, likely invalid handling
|
||||
Config - "load configuration" hooks missing, rich set* accessors missing,
|
||||
needs redefined relationship with the definitions
|
||||
Config - "load configuration" hooks missing, rich set* accessors missing
|
||||
Strategy
|
||||
FixNesting - cannot bubble nodes out of structures
|
||||
MakeWellFormed - insufficient automatic closing definitions (check HTML
|
||||
|
@@ -17,18 +17,9 @@ are passed. These classes are: HTMLPurifier::*, Generator::generateFromTokens
|
||||
and Lexer::tokenizeHTML. However, whenever a valid configuration object
|
||||
is defined, that object should be used.
|
||||
|
||||
-- the following is projected changes to the configuration system --
|
||||
|
||||
In relation to HTMLDefinition and CSSDefinition, there are going to be some
|
||||
major structural changes to enable the easy configuration of these objects.
|
||||
Due to the intricacy of these objects, it's not feasible to ask an average
|
||||
user to twiddle around with an element and its 20 other dependencies. However,
|
||||
these objects are the only possible point where change could occur in the
|
||||
context of configuration.
|
||||
|
||||
The solution is to introduce a special class of directives that influence the
|
||||
*construction* of the Definition object. A standard call pattern would look
|
||||
like:
|
||||
In relation to HTMLDefinition and CSSDefinition, there is a special class
|
||||
of directives that influence the *construction* of the Definition object.
|
||||
A standard call pattern would look like:
|
||||
|
||||
1. Client calls Config->getHTMLDefinition()
|
||||
2. Config calls HTMLDefinition->createNew(this)
|
||||
|
@@ -1,272 +0,0 @@
|
||||
<!-- Transform %TextAlign to align:value in style -->
|
||||
|
||||
<!-- text alignment for p, div, h1-h6. The default is
|
||||
align="left" for ltr headings, "right" for rtl
|
||||
|
||||
Move to style! -->
|
||||
<!ENTITY % TextAlign "DEPRECATED align (left|center|right|justify) #IMPLIED">
|
||||
|
||||
<!-- type and start should have CSS equivalents, but they'll need to
|
||||
be translated intelligently -->
|
||||
<!ENTITY % ULStyle "(disc|square|circle)">
|
||||
<!-- Ordered list numbering style
|
||||
|
||||
1 arabic numbers 1, 2, 3, ...
|
||||
a lower alpha a, b, c, ...
|
||||
A upper alpha A, B, C, ...
|
||||
i lower roman i, ii, iii, ...
|
||||
I upper roman I, II, III, ...
|
||||
|
||||
The style is applied to the sequence number which by default
|
||||
is reset to 1 for the first list item in an ordered list.
|
||||
-->
|
||||
<!ENTITY % OLStyle "CDATA">
|
||||
<!-- LIStyle is constrained to: "(%ULStyle;|%OLStyle;)" -->
|
||||
<!ENTITY % LIStyle "CDATA">
|
||||
|
||||
<!ATTLIST ol
|
||||
%attrs;
|
||||
DEPRECATED type %OLStyle; #IMPLIED
|
||||
DEPRECATED start %Number; #IMPLIED
|
||||
>
|
||||
|
||||
<!ATTLIST li
|
||||
%attrs;
|
||||
DEPRECATED type %LIStyle; #IMPLIED
|
||||
DEPRECATED value %Number; #IMPLIED
|
||||
>
|
||||
|
||||
<!ATTLIST hr
|
||||
%attrs;
|
||||
DEPRECATED align (left|center|right) #IMPLIED
|
||||
DEPRECATED size %Pixels; #IMPLIED
|
||||
DEPRECATED width %Length; #IMPLIED
|
||||
>
|
||||
|
||||
<!ATTLIST pre
|
||||
%attrs;
|
||||
DEPRECATED width %Number; #IMPLIED
|
||||
>
|
||||
|
||||
<!ATTLIST blockquote
|
||||
%attrs;
|
||||
cite %URI; #IMPLIED
|
||||
>
|
||||
|
||||
<!ATTLIST ins
|
||||
%attrs;
|
||||
cite %URI; #IMPLIED
|
||||
datetime %Datetime; #IMPLIED
|
||||
>
|
||||
<!ATTLIST del
|
||||
%attrs;
|
||||
cite %URI; #IMPLIED
|
||||
datetime %Datetime; #IMPLIED
|
||||
>
|
||||
|
||||
<!ATTLIST a
|
||||
%attrs;
|
||||
name NMTOKEN #IMPLIED // ID
|
||||
href %URI; #IMPLIED
|
||||
rel %LinkTypes; #IMPLIED // needs policing
|
||||
rev %LinkTypes; #IMPLIED // see rel
|
||||
target %FrameTarget; #IMPLIED // usually not used, but might be
|
||||
>
|
||||
|
||||
<!ATTLIST bdo
|
||||
%coreattrs; // !#!
|
||||
lang %LanguageCode; #IMPLIED
|
||||
xml:lang %LanguageCode; #IMPLIED
|
||||
dir (ltr|rtl) #REQUIRED
|
||||
>
|
||||
|
||||
<!ATTLIST br
|
||||
%coreattrs; // !#!
|
||||
DEPRECATED clear (left|all|right|none) "none"
|
||||
>
|
||||
|
||||
<!ELEMENT q %Inline;> <!-- inlined quote -->
|
||||
<!ATTLIST q
|
||||
%attrs;
|
||||
cite %URI; #IMPLIED
|
||||
>
|
||||
|
||||
<!ATTLIST img
|
||||
%attrs;
|
||||
src %URI; #REQUIRED
|
||||
alt %Text; #REQUIRED
|
||||
DEPRECATED name NMTOKEN #IMPLIED // ID
|
||||
longdesc %URI; #IMPLIED
|
||||
height %Length; #IMPLIED // dubious, but we'll allow
|
||||
width %Length; #IMPLIED //
|
||||
DEPRECATED align %ImgAlign; #IMPLIED
|
||||
DEPRECATED border %Length; #IMPLIED
|
||||
DEPRECATED hspace %Pixels; #IMPLIED // left/right margin
|
||||
DEPRECATED vspace %Pixels; #IMPLIED // up/down margin
|
||||
>
|
||||
|
||||
<!--
|
||||
The border attribute sets the thickness of the frame around the
|
||||
table. The default units are screen pixels.
|
||||
|
||||
The frame attribute specifies which parts of the frame around
|
||||
the table should be rendered. The values are not the same as
|
||||
CALS to avoid a name clash with the valign attribute.
|
||||
-->
|
||||
<!ENTITY % TFrame "(void|above|below|hsides|lhs|rhs|vsides|box|border)">
|
||||
|
||||
<!--
|
||||
The rules attribute defines which rules to draw between cells:
|
||||
|
||||
If rules is absent then assume:
|
||||
"none" if border is absent or border="0" otherwise "all"
|
||||
-->
|
||||
|
||||
<!ENTITY % TRules "(none | groups | rows | cols | all)">
|
||||
|
||||
<!-- horizontal placement of table relative to document -->
|
||||
<!ENTITY % TAlign "(left|center|right)">
|
||||
|
||||
<!-- horizontal alignment attributes for cell contents
|
||||
|
||||
char alignment char, e.g. char=':'
|
||||
charoff offset for alignment char
|
||||
-->
|
||||
<!ENTITY % cellhalign
|
||||
"align (left|center|right|justify|char) #IMPLIED
|
||||
char %Character; #IMPLIED
|
||||
charoff %Length; #IMPLIED"
|
||||
>
|
||||
|
||||
<!-- vertical alignment attributes for cell contents -->
|
||||
<!ENTITY % cellvalign
|
||||
"valign (top|middle|bottom|baseline) #IMPLIED"
|
||||
>
|
||||
|
||||
<!-- we may want to convert some of these nonetheless -->
|
||||
<!ATTLIST table
|
||||
%attrs;
|
||||
summary %Text; #IMPLIED
|
||||
width %Length; #IMPLIED
|
||||
border %Pixels; #IMPLIED
|
||||
frame %TFrame; #IMPLIED
|
||||
rules %TRules; #IMPLIED
|
||||
cellspacing %Length; #IMPLIED
|
||||
cellpadding %Length; #IMPLIED
|
||||
DEPRECATED align %TAlign; #IMPLIED
|
||||
DEPRECATED bgcolor %Color; #IMPLIED
|
||||
>
|
||||
|
||||
<!ENTITY % CAlign "(top|bottom|left|right)">
|
||||
|
||||
<!ATTLIST caption
|
||||
%attrs;
|
||||
DEPRECATED align %CAlign; #IMPLIED // watch, it's a special set
|
||||
>
|
||||
|
||||
<!--
|
||||
colgroup groups a set of col elements. It allows you to group
|
||||
several semantically related columns together.
|
||||
-->
|
||||
<!ATTLIST colgroup
|
||||
%attrs;
|
||||
span %Number; "1"
|
||||
width %MultiLength; #IMPLIED
|
||||
%cellhalign; // very interesting
|
||||
%cellvalign;
|
||||
>
|
||||
|
||||
<!--
|
||||
col elements define the alignment properties for cells in
|
||||
one or more columns.
|
||||
|
||||
The width attribute specifies the width of the columns, e.g.
|
||||
|
||||
width=64 width in screen pixels
|
||||
width=0.5* relative width of 0.5
|
||||
|
||||
The span attribute causes the attributes of one
|
||||
col element to apply to more than one column.
|
||||
-->
|
||||
<!ATTLIST col
|
||||
%attrs;
|
||||
span %Number; "1"
|
||||
width %MultiLength; #IMPLIED
|
||||
%cellhalign;
|
||||
%cellvalign;
|
||||
>
|
||||
|
||||
<!--
|
||||
Use thead to duplicate headers when breaking table
|
||||
across page boundaries, or for static headers when
|
||||
tbody sections are rendered in scrolling panel.
|
||||
|
||||
Use tfoot to duplicate footers when breaking table
|
||||
across page boundaries, or for static footers when
|
||||
tbody sections are rendered in scrolling panel.
|
||||
|
||||
Use multiple tbody sections when rules are needed
|
||||
between groups of table rows.
|
||||
-->
|
||||
<!ATTLIST thead
|
||||
%attrs;
|
||||
%cellhalign;
|
||||
%cellvalign;
|
||||
>
|
||||
|
||||
<!ATTLIST tfoot
|
||||
%attrs;
|
||||
%cellhalign;
|
||||
%cellvalign;
|
||||
>
|
||||
|
||||
<!ATTLIST tbody
|
||||
%attrs;
|
||||
%cellhalign;
|
||||
%cellvalign;
|
||||
>
|
||||
|
||||
<!ATTLIST tr
|
||||
%attrs;
|
||||
%cellhalign;
|
||||
%cellvalign;
|
||||
DEPRECATED bgcolor %Color; #IMPLIED
|
||||
>
|
||||
|
||||
<!-- Scope is simpler than headers attribute for common tables -->
|
||||
<!ENTITY % Scope "(row|col|rowgroup|colgroup)">
|
||||
|
||||
<!-- th is for headers, td for data and for cells acting as both -->
|
||||
|
||||
<!ATTLIST th
|
||||
%attrs;
|
||||
abbr %Text; #IMPLIED
|
||||
axis CDATA #IMPLIED
|
||||
headers IDREFS #IMPLIED
|
||||
scope %Scope; #IMPLIED
|
||||
rowspan %Number; "1"
|
||||
colspan %Number; "1"
|
||||
%cellhalign;
|
||||
%cellvalign;
|
||||
DEPRECATED nowrap (nowrap) #IMPLIED
|
||||
DEPRECATED bgcolor %Color; #IMPLIED
|
||||
DEPRECATED width %Length; #IMPLIED
|
||||
DEPRECATED height %Length; #IMPLIED
|
||||
>
|
||||
|
||||
<!ATTLIST td
|
||||
%attrs;
|
||||
abbr %Text; #IMPLIED
|
||||
axis CDATA #IMPLIED
|
||||
headers IDREFS #IMPLIED
|
||||
scope %Scope; #IMPLIED
|
||||
rowspan %Number; "1"
|
||||
colspan %Number; "1"
|
||||
%cellhalign;
|
||||
%cellvalign;
|
||||
DEPRECATED nowrap (nowrap) #IMPLIED
|
||||
DEPRECATED bgcolor %Color; #IMPLIED
|
||||
DEPRECATED width %Length; #IMPLIED
|
||||
DEPRECATED height %Length; #IMPLIED
|
||||
>
|
||||
|
@@ -4,7 +4,7 @@ Optimization
|
||||
Here are some possible optimization techniques we can apply to code sections if
|
||||
they turn out to be slow. Be sure not to prematurely optimize though!
|
||||
|
||||
- Make Tokens Flyweights
|
||||
- Make Tokens Flyweights (may prove problematic, probably not worth it)
|
||||
- Rewrite regexps into PHP code
|
||||
- Serialize the Definition object
|
||||
- Batch regexp validation (do as many per function call as possible)
|
||||
|
@@ -12,8 +12,6 @@ character encoding explicitly stated" or UTF-7. If you're not using UTF-8 as
|
||||
your character encoding, you should switch. Now. Make sure any input is
|
||||
properly converted to UTF-8, or the parser will mangle it badly
|
||||
(though it won't be a security risk if you're outputting it as UTF-8 though).
|
||||
We will be adding out-of-the-box support for the other major character
|
||||
encodings shortly.
|
||||
|
||||
2. XHTML 1.0 Transitional. This is what the parser is outputting. For the most
|
||||
part, it's compatible with HTML 4.01, but XHTML enforces some very nice things
|
||||
@@ -37,4 +35,5 @@ to protect your pages from being attacked by garish colors and plain old
|
||||
bad taste. A neat feature would be the ability to define acceptable colors
|
||||
in a document, but that's not likely to be implemented for a while. In the
|
||||
meantime, be sure to make sure that floated elements (permitted, since they
|
||||
can be quite useful) can't mess up your layout.
|
||||
can be quite useful) can't mess up your layout. Once again, we may want to
|
||||
disable this by default to protect lazy developers.
|
||||
|
@@ -54,4 +54,4 @@ HTML Purifier is best suited for documents that require a rich array of
|
||||
HTML tags. Things like blog comments are, in all likelihood, most appropriately
|
||||
written in an extremely restrictive set of markup that doesn't require
|
||||
all this functionality (or not written in HTML at all), although this may
|
||||
be changing in the future.
|
||||
be changing in the future with the addition of levels of filtering.
|
||||
|
@@ -18,7 +18,7 @@
|
||||
* However, most users will only need to interface with the HTMLPurifier
|
||||
* class, so this massive amount of infrastructure is usually concealed.
|
||||
* If you plan on working with the internals, be sure to include
|
||||
* HTMLPurifier_ConfigDef and HTMLPurifier_Config.
|
||||
* HTMLPurifier_ConfigSchema and HTMLPurifier_Config.
|
||||
*/
|
||||
|
||||
/*
|
||||
@@ -39,11 +39,13 @@
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
require_once 'HTMLPurifier/ConfigDef.php';
|
||||
|
||||
// almost every class has an undocumented dependency to these, so make sure
|
||||
// they get included
|
||||
require_once 'HTMLPurifier/ConfigSchema.php';
|
||||
require_once 'HTMLPurifier/Config.php';
|
||||
|
||||
require_once 'HTMLPurifier/Lexer.php';
|
||||
require_once 'HTMLPurifier/HTMLDefinition.php';
|
||||
require_once 'HTMLPurifier/Generator.php';
|
||||
require_once 'HTMLPurifier/Strategy/Core.php';
|
||||
require_once 'HTMLPurifier/Encoder.php';
|
||||
|
@@ -15,6 +15,12 @@ require_once 'HTMLPurifier/AttrContext.php';
|
||||
class HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
* Tells us whether or not an HTML attribute is minimized. Only the
|
||||
* boolean attribute vapourware would use this.
|
||||
*/
|
||||
var $minimized = false;
|
||||
|
||||
/**
|
||||
* Abstract function defined for functions that validate and clean strings.
|
||||
*
|
||||
|
@@ -28,6 +28,8 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
|
||||
if (!$declaration) continue;
|
||||
if (!strpos($declaration, ':')) continue;
|
||||
list($property, $value) = explode(':', $declaration, 2);
|
||||
$property = trim($property);
|
||||
$value = trim($value);
|
||||
if (!isset($definition->info[$property])) continue;
|
||||
// inefficient call, since the validator will do this again
|
||||
if (strtolower(trim($value)) !== 'inherit') {
|
||||
|
@@ -5,7 +5,7 @@ require_once 'HTMLPurifier/URIScheme.php';
|
||||
require_once 'HTMLPurifier/URISchemeRegistry.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Host.php';
|
||||
|
||||
HTMLPurifier_ConfigDef::define(
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'URI', 'DefaultScheme', 'http', 'string',
|
||||
'Defines through what scheme the output will be served, in order to '.
|
||||
'select the proper object validator when no scheme information is present.'
|
||||
@@ -36,13 +36,13 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
||||
// for HTTP and thus won't work for our generic URI parsing
|
||||
|
||||
// according to the RFC... (but this cuts corners, i.e. non-validating)
|
||||
$r_URI = '!^'.
|
||||
'(([^:/?#<>]+):)?'. // 2. Scheme
|
||||
'(//([^/?#<>]*))?'. // 4. Authority
|
||||
'([^?#<>]*)'. // 5. Path
|
||||
'(\?([^#<>]*))?'. // 7. Query
|
||||
'(#([^<>]*))?'. // 8. Fragment
|
||||
'$!';
|
||||
$r_URI = '!'.
|
||||
'(([^:/?#<>\'"]+):)?'. // 2. Scheme
|
||||
'(//([^/?#<>\'"]*))?'. // 4. Authority
|
||||
'([^?#<>\'"]*)'. // 5. Path
|
||||
'(\?([^#<>\'"]*))?'. // 7. Query
|
||||
'(#([^<>\'"]*))?'. // 8. Fragment
|
||||
'!';
|
||||
|
||||
$matches = array();
|
||||
$result = preg_match($r_URI, $uri, $matches);
|
||||
|
@@ -4,13 +4,13 @@ require_once 'HTMLPurifier/AttrTransform.php';
|
||||
|
||||
// this MUST be placed in post, as it assumes that any value in dir is valid
|
||||
|
||||
HTMLPurifier_ConfigDef::define(
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Attr', 'DefaultTextDir', 'ltr', 'string',
|
||||
'Defines the default text direction (ltr or rtl) of the document '.
|
||||
'being parsed. This generally is the same as the value of the dir '.
|
||||
'attribute in HTML, or ltr if that is not specified.'
|
||||
);
|
||||
HTMLPurifier_ConfigDef::defineAllowedValues(
|
||||
HTMLPurifier_ConfigSchema::defineAllowedValues(
|
||||
'Attr', 'DefaultTextDir', array( 'ltr', 'rtl' )
|
||||
);
|
||||
|
||||
|
@@ -4,7 +4,7 @@ require_once 'HTMLPurifier/AttrTransform.php';
|
||||
|
||||
// must be called POST validation
|
||||
|
||||
HTMLPurifier_ConfigDef::define(
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Attr', 'DefaultInvalidImage', '', 'string',
|
||||
'This is the default image an img tag will be pointed to if it does '.
|
||||
'not have a valid src attribute. In future versions, we may allow the '.
|
||||
@@ -12,7 +12,7 @@ HTMLPurifier_ConfigDef::define(
|
||||
'not possible right now.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigDef::define(
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Attr', 'DefaultInvalidImageAlt', 'Invalid image', 'string',
|
||||
'This is the content of the alt tag of an invalid image if the user '.
|
||||
'had not previously specified an alt attribute. It has no effect when the '.
|
||||
|
@@ -5,14 +5,7 @@
|
||||
// false = delete parent node and all children
|
||||
// array(...) = replace children nodes with these
|
||||
|
||||
// this is the hardest one to implement. We'll use fancy regexp tricks
|
||||
// right now, we only expect it to return TRUE or FALSE (it won't attempt
|
||||
// to fix the tree)
|
||||
|
||||
// we may end up writing custom code for each HTML case
|
||||
// in order to make it self correcting
|
||||
|
||||
HTMLPurifier_ConfigDef::define(
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Core', 'EscapeInvalidChildren', false, 'bool',
|
||||
'When true, a child is found that is not allowed in the context of the '.
|
||||
'parent element will be transformed into text as if it were ASCII. When '.
|
||||
@@ -62,9 +55,7 @@ class HTMLPurifier_ChildDef
|
||||
* Custom validation class, accepts DTD child definitions
|
||||
*
|
||||
* @warning Currently this class is an all or nothing proposition, that is,
|
||||
* it will only give a bool return value. Table is the only
|
||||
* child definition that uses this class, and we ought to give
|
||||
* it a dedicated one.
|
||||
* it will only give a bool return value.
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
|
||||
{
|
||||
@@ -307,4 +298,129 @@ class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Definition for tables
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
|
||||
{
|
||||
var $allow_empty = false;
|
||||
var $type = 'table';
|
||||
function HTMLPurifier_ChildDef_Table() {}
|
||||
function validateChildren($tokens_of_children, $config, $context) {
|
||||
if (empty($tokens_of_children)) return false;
|
||||
|
||||
// this ensures that the loop gets run one last time before closing
|
||||
// up. It's a little bit of a hack, but it works! Just make sure you
|
||||
// get rid of the token later.
|
||||
$tokens_of_children[] = false;
|
||||
|
||||
// only one of these elements is allowed in a table
|
||||
$caption = false;
|
||||
$thead = false;
|
||||
$tfoot = false;
|
||||
|
||||
// as many of these as you want
|
||||
$cols = array();
|
||||
$content = array();
|
||||
|
||||
$nesting = 0; // current depth so we can determine nodes
|
||||
$is_collecting = false; // are we globbing together tokens to package
|
||||
// into one of the collectors?
|
||||
$collection = array(); // collected nodes
|
||||
|
||||
foreach ($tokens_of_children as $token) {
|
||||
$is_child = ($nesting == 0);
|
||||
|
||||
if ($token === false) {
|
||||
// terminating sequence started
|
||||
} elseif ($token->type == 'start') {
|
||||
$nesting++;
|
||||
} elseif ($token->type == 'end') {
|
||||
$nesting--;
|
||||
}
|
||||
|
||||
// handle node collection
|
||||
if ($is_collecting) {
|
||||
if ($is_child) {
|
||||
// okay, let's stash the tokens away
|
||||
// first token tells us the type of the collection
|
||||
switch ($collection[0]->name) {
|
||||
case 'tr':
|
||||
case 'tbody':
|
||||
$content[] = $collection;
|
||||
break;
|
||||
case 'caption':
|
||||
if ($caption !== false) break;
|
||||
$caption = $collection;
|
||||
break;
|
||||
case 'thead':
|
||||
case 'tfoot':
|
||||
// access the appropriate variable, $thead or $tfoot
|
||||
$var = $collection[0]->name;
|
||||
if ($$var === false) {
|
||||
$$var = $collection;
|
||||
} else {
|
||||
// transmutate the first and less entries into
|
||||
// tbody tags, and then put into content
|
||||
$collection[0]->name = 'tbody';
|
||||
$collection[count($collection)-1]->name = 'tbody';
|
||||
$content[] = $collection;
|
||||
}
|
||||
break;
|
||||
case 'colgroup':
|
||||
$cols[] = $collection;
|
||||
break;
|
||||
}
|
||||
$collection = array();
|
||||
$is_collecting = false;
|
||||
} else {
|
||||
// add the node to the collection
|
||||
$collection[] = $token;
|
||||
}
|
||||
}
|
||||
|
||||
// terminate
|
||||
if ($token === false) break;
|
||||
|
||||
if ($is_child) {
|
||||
// determine what we're dealing with
|
||||
if ($token->name == 'col') {
|
||||
// the only empty tag in the possie, we can handle it
|
||||
// immediately
|
||||
$cols[] = array($token);
|
||||
continue;
|
||||
}
|
||||
switch($token->name) {
|
||||
case 'caption':
|
||||
case 'colgroup':
|
||||
case 'thead':
|
||||
case 'tfoot':
|
||||
case 'tbody':
|
||||
case 'tr':
|
||||
$is_collecting = true;
|
||||
$collection[] = $token;
|
||||
continue;
|
||||
default:
|
||||
// unrecognized, drop silently
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (empty($content)) return false;
|
||||
|
||||
$ret = array();
|
||||
if ($caption !== false) $ret = array_merge($ret, $caption);
|
||||
if ($cols !== false) foreach ($cols as $token_array) $ret = array_merge($ret, $token_array);
|
||||
if ($thead !== false) $ret = array_merge($ret, $thead);
|
||||
if ($tfoot !== false) $ret = array_merge($ret, $tfoot);
|
||||
foreach ($content as $token_array) $ret = array_merge($ret, $token_array);
|
||||
|
||||
array_pop($tokens_of_children); // remove phantom token
|
||||
|
||||
return ($ret === $tokens_of_children) ? true : $ret;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
@@ -21,7 +21,7 @@ class HTMLPurifier_Config
|
||||
var $conf;
|
||||
|
||||
/**
|
||||
* Reference HTMLPurifier_ConfigDef for value checking
|
||||
* Reference HTMLPurifier_ConfigSchema for value checking
|
||||
*/
|
||||
var $def;
|
||||
|
||||
@@ -36,7 +36,7 @@ class HTMLPurifier_Config
|
||||
var $css_definition;
|
||||
|
||||
/**
|
||||
* @param $definition HTMLPurifier_ConfigDef that defines what directives
|
||||
* @param $definition HTMLPurifier_ConfigSchema that defines what directives
|
||||
* are allowed.
|
||||
*/
|
||||
function HTMLPurifier_Config(&$definition) {
|
||||
@@ -49,7 +49,7 @@ class HTMLPurifier_Config
|
||||
* @return Default HTMLPurifier_Config object.
|
||||
*/
|
||||
function createDefault() {
|
||||
$definition =& HTMLPurifier_ConfigDef::instance();
|
||||
$definition =& HTMLPurifier_ConfigSchema::instance();
|
||||
$config = new HTMLPurifier_Config($definition);
|
||||
return $config;
|
||||
}
|
||||
@@ -80,6 +80,8 @@ class HTMLPurifier_Config
|
||||
E_USER_WARNING);
|
||||
return;
|
||||
}
|
||||
$value = $this->def->validate($value,
|
||||
$this->def->info[$namespace][$key]->type);
|
||||
if (is_string($value)) {
|
||||
// resolve value alias if defined
|
||||
if (isset($this->def->info[$namespace][$key]->aliases[$value])) {
|
||||
@@ -93,8 +95,6 @@ class HTMLPurifier_Config
|
||||
}
|
||||
}
|
||||
}
|
||||
$value = $this->def->validate($value,
|
||||
$this->def->info[$namespace][$key]->type);
|
||||
if ($value === null) {
|
||||
trigger_error('Value is of invalid type', E_USER_WARNING);
|
||||
return;
|
||||
|
@@ -3,8 +3,23 @@
|
||||
/**
|
||||
* Configuration definition, defines directives and their defaults.
|
||||
* @todo Build documentation generation capabilities.
|
||||
* @todo The ability to define things multiple times is confusing and should
|
||||
* be factored out to its own function named registerDependency() or
|
||||
* addNote(), where only the namespace.name and an extra descriptions
|
||||
* documenting the nature of the dependency are needed. Since it's
|
||||
* possible that the dependency is registered before the configuration
|
||||
* is defined, deferring it to some sort of cache until it actually
|
||||
* gets defined would be wise, keeping it opaque until it does get
|
||||
* defined. We could add a finalize() method which would cause it to
|
||||
* error out if we get a dangling dependency. It's difficult, however,
|
||||
* to know whether or not it's a dependency, or a codependency, that is
|
||||
* neither of them fully depends on it. Where does the configuration go
|
||||
* then? This could be partially resolved by allowing blanket definitions
|
||||
* and then splitting them up into finer-grained versions, however, there
|
||||
* might be implementation difficulties in ini files regarding order of
|
||||
* execution.
|
||||
*/
|
||||
class HTMLPurifier_ConfigDef {
|
||||
class HTMLPurifier_ConfigSchema {
|
||||
|
||||
/**
|
||||
* Defaults of the directives and namespaces.
|
||||
@@ -24,17 +39,18 @@ class HTMLPurifier_ConfigDef {
|
||||
|
||||
/**
|
||||
* Lookup table of allowed types.
|
||||
* @todo Add descriptions
|
||||
*/
|
||||
var $types = array(
|
||||
'string' => true,
|
||||
'istring' => true,
|
||||
'int' => true,
|
||||
'float' => true,
|
||||
'bool' => true,
|
||||
'lookup' => true,
|
||||
'list' => true,
|
||||
'hash' => true,
|
||||
'mixed' => true
|
||||
'string' => 'String',
|
||||
'istring' => 'Case-insensitive string',
|
||||
'int' => 'Integer',
|
||||
'float' => 'Float',
|
||||
'bool' => 'Boolean',
|
||||
'lookup' => 'Lookup array',
|
||||
'list' => 'Array list',
|
||||
'hash' => 'Associative array',
|
||||
'mixed' => 'Mixed'
|
||||
);
|
||||
|
||||
/**
|
||||
@@ -46,6 +62,7 @@ class HTMLPurifier_ConfigDef {
|
||||
$this->defineNamespace('URI', 'Features regarding Uniform Resource Identifiers.');
|
||||
$this->defineNamespace('HTML', 'Configuration regarding allowed HTML.');
|
||||
$this->defineNamespace('CSS', 'Configuration regarding allowed CSS.');
|
||||
$this->defineNamespace('Test', 'Developer testing configuration for our unit tests.');
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -56,7 +73,7 @@ class HTMLPurifier_ConfigDef {
|
||||
if ($prototype !== null) {
|
||||
$instance = $prototype;
|
||||
} elseif ($instance === null || $prototype === true) {
|
||||
$instance = new HTMLPurifier_ConfigDef();
|
||||
$instance = new HTMLPurifier_ConfigSchema();
|
||||
$instance->initialize();
|
||||
}
|
||||
return $instance;
|
||||
@@ -79,12 +96,17 @@ class HTMLPurifier_ConfigDef {
|
||||
$namespace, $name, $default, $type,
|
||||
$description
|
||||
) {
|
||||
$def =& HTMLPurifier_ConfigDef::instance();
|
||||
$def =& HTMLPurifier_ConfigSchema::instance();
|
||||
if (!isset($def->info[$namespace])) {
|
||||
trigger_error('Cannot define directive for undefined namespace',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
if (!ctype_alnum($name)) {
|
||||
trigger_error('Directive name must be alphanumeric',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
if (isset($def->info[$namespace][$name])) {
|
||||
if (
|
||||
$def->info[$namespace][$name]->type !== $type ||
|
||||
@@ -121,17 +143,19 @@ class HTMLPurifier_ConfigDef {
|
||||
* @param $description Description of the namespace
|
||||
*/
|
||||
function defineNamespace($namespace, $description) {
|
||||
$def =& HTMLPurifier_ConfigDef::instance();
|
||||
$def =& HTMLPurifier_ConfigSchema::instance();
|
||||
if (isset($def->info[$namespace])) {
|
||||
trigger_error('Cannot redefine namespace', E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
if (!ctype_alnum($namespace)) {
|
||||
trigger_error('Namespace name must be alphanumeric',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
$def->info[$namespace] = array();
|
||||
$def->info_namespace[$namespace] = new HTMLPurifier_ConfigEntity_Namespace();
|
||||
$backtrace = debug_backtrace();
|
||||
$file = $def->mungeFilename($backtrace[0]['file']);
|
||||
$line = $backtrace[0]['line'];
|
||||
$def->info_namespace[$namespace]->addDescription($file,$line,$description);
|
||||
$def->info_namespace[$namespace]->description = $description;
|
||||
$def->defaults[$namespace] = array();
|
||||
}
|
||||
|
||||
@@ -146,7 +170,7 @@ class HTMLPurifier_ConfigDef {
|
||||
* @param $real Value aliased value will be converted into
|
||||
*/
|
||||
function defineValueAliases($namespace, $name, $aliases) {
|
||||
$def =& HTMLPurifier_ConfigDef::instance();
|
||||
$def =& HTMLPurifier_ConfigSchema::instance();
|
||||
if (!isset($def->info[$namespace][$name])) {
|
||||
trigger_error('Cannot set value alias for non-existant directive',
|
||||
E_USER_ERROR);
|
||||
@@ -176,7 +200,7 @@ class HTMLPurifier_ConfigDef {
|
||||
* @param $allowed_values Arraylist of allowed values
|
||||
*/
|
||||
function defineAllowedValues($namespace, $name, $allowed_values) {
|
||||
$def =& HTMLPurifier_ConfigDef::instance();
|
||||
$def =& HTMLPurifier_ConfigSchema::instance();
|
||||
if (!isset($def->info[$namespace][$name])) {
|
||||
trigger_error('Cannot define allowed values for undefined directive',
|
||||
E_USER_ERROR);
|
||||
@@ -255,27 +279,19 @@ class HTMLPurifier_ConfigDef {
|
||||
/**
|
||||
* Base class for configuration entity
|
||||
*/
|
||||
class HTMLPurifier_ConfigEntity
|
||||
{
|
||||
/**
|
||||
* Plaintext descriptions of the configuration entity is. Organized by
|
||||
* file and line number, so multiple descriptions are allowed.
|
||||
*/
|
||||
var $descriptions = array();
|
||||
|
||||
/**
|
||||
* Adds a description to the array
|
||||
*/
|
||||
function addDescription($file, $line, $description) {
|
||||
if (!isset($this->descriptions[$file])) $this->descriptions[$file] = array();
|
||||
$this->descriptions[$file][$line] = $description;
|
||||
}
|
||||
}
|
||||
class HTMLPurifier_ConfigEntity {}
|
||||
|
||||
/**
|
||||
* Structure object describing of a namespace
|
||||
*/
|
||||
class HTMLPurifier_ConfigEntity_Namespace extends HTMLPurifier_ConfigEntity {}
|
||||
class HTMLPurifier_ConfigEntity_Namespace extends HTMLPurifier_ConfigEntity {
|
||||
|
||||
/**
|
||||
* String description of what kinds of directives go in this namespace.
|
||||
*/
|
||||
var $description;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Structure object containing definition of a directive.
|
||||
@@ -307,6 +323,19 @@ class HTMLPurifier_ConfigEntity_Directive extends HTMLPurifier_ConfigEntity
|
||||
* - mixed (anything goes)
|
||||
*/
|
||||
var $type = 'mixed';
|
||||
/**
|
||||
* Plaintext descriptions of the configuration entity is. Organized by
|
||||
* file and line number, so multiple descriptions are allowed.
|
||||
*/
|
||||
var $descriptions = array();
|
||||
|
||||
/**
|
||||
* Adds a description to the array
|
||||
*/
|
||||
function addDescription($file, $line, $description) {
|
||||
if (!isset($this->descriptions[$file])) $this->descriptions[$file] = array();
|
||||
$this->descriptions[$file][$line] = $description;
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -2,7 +2,7 @@
|
||||
|
||||
require_once 'HTMLPurifier/EntityLookup.php';
|
||||
|
||||
HTMLPurifier_ConfigDef::define(
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Core', 'Encoding', 'utf-8', 'istring',
|
||||
'If for some reason you are unable to convert all webpages to UTF-8, '.
|
||||
'you can use this directive as a stop-gap compatibility change to '.
|
||||
@@ -17,14 +17,25 @@ HTMLPurifier_ConfigDef::define(
|
||||
|
||||
if ( !function_exists('iconv') ) {
|
||||
// only encodings with native PHP support
|
||||
HTMLPurifier_ConfigDef::defineAllowedValues(
|
||||
HTMLPurifier_ConfigSchema::defineAllowedValues(
|
||||
'Core', 'Encoding', array(
|
||||
'utf-8',
|
||||
'iso-8859-1'
|
||||
)
|
||||
);
|
||||
HTMLPurifier_ConfigSchema::defineValueAliases(
|
||||
'Core', 'Encoding', array(
|
||||
'iso8859-1' => 'iso-8859-1'
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Test', 'ForceNoIconv', false, 'bool',
|
||||
'When set to true, HTMLPurifier_Encoder will act as if iconv does not '.
|
||||
'exist and use only pure PHP implementations.'
|
||||
);
|
||||
|
||||
/**
|
||||
* A UTF-8 specific character encoder that handles cleaning and transforming.
|
||||
*/
|
||||
@@ -260,9 +271,9 @@ class HTMLPurifier_Encoder
|
||||
if ($iconv === null) $iconv = function_exists('iconv');
|
||||
$encoding = $config->get('Core', 'Encoding');
|
||||
if ($encoding === 'utf-8') return $str;
|
||||
if ($iconv) {
|
||||
if ($iconv && !$config->get('Test', 'ForceNoIconv')) {
|
||||
return @iconv($encoding, 'utf-8//IGNORE', $str);
|
||||
} elseif ($encoding === 'iso-8895-1') {
|
||||
} elseif ($encoding === 'iso-8859-1') {
|
||||
return @utf8_encode($str);
|
||||
}
|
||||
}
|
||||
@@ -277,10 +288,10 @@ class HTMLPurifier_Encoder
|
||||
if ($iconv === null) $iconv = function_exists('iconv');
|
||||
$encoding = $config->get('Core', 'Encoding');
|
||||
if ($encoding === 'utf-8') return $str;
|
||||
if ($iconv) {
|
||||
if ($iconv && !$config->get('Test', 'ForceNoIconv')) {
|
||||
return @iconv('utf-8', $encoding . '//IGNORE', $str);
|
||||
} elseif ($encoding === 'iso-8895-1') {
|
||||
return @utf8_encode($str);
|
||||
} elseif ($encoding === 'iso-8859-1') {
|
||||
return @utf8_decode($str);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -4,7 +4,7 @@
|
||||
|
||||
require_once 'HTMLPurifier/Lexer.php';
|
||||
|
||||
HTMLPurifier_ConfigDef::define(
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Core', 'CleanUTF8DuringGeneration', false, 'bool',
|
||||
'When true, HTMLPurifier_Generator will also check all strings it '.
|
||||
'escapes for UTF-8 well-formedness as a defense in depth measure. '.
|
||||
@@ -15,6 +15,14 @@ HTMLPurifier_ConfigDef::define(
|
||||
'generateFromTokens.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Core', 'XHTML', true, 'bool',
|
||||
'Determines whether or not output is XHTML or not. When disabled, HTML '.
|
||||
'Purifier goes into HTML 4.01 removes XHTML-specific markup constructs, '.
|
||||
'such as boolean attribute expansion and trailing slashes in empty tags. '.
|
||||
'This directive was available since 1.1.'
|
||||
);
|
||||
|
||||
/**
|
||||
* Generates HTML from tokens.
|
||||
*/
|
||||
@@ -22,11 +30,16 @@ class HTMLPurifier_Generator
|
||||
{
|
||||
|
||||
/**
|
||||
* Bool cache of the CleanUTF8DuringGeneration directive.
|
||||
* Bool cache of %Core.CleanUTF8DuringGeneration
|
||||
* @private
|
||||
*/
|
||||
var $_clean_utf8 = false;
|
||||
|
||||
/**
|
||||
* Bool cache of %Core.XHTML
|
||||
*/
|
||||
var $_xhtml = true;
|
||||
|
||||
/**
|
||||
* Generates HTML from an array of tokens.
|
||||
* @param $tokens Array of HTMLPurifier_Token
|
||||
@@ -38,6 +51,7 @@ class HTMLPurifier_Generator
|
||||
$html = '';
|
||||
if (!$config) $config = HTMLPurifier_Config::createDefault();
|
||||
$this->_clean_utf8 = $config->get('Core', 'CleanUTF8DuringGeneration');
|
||||
$this->_xhtml = $config->get('Core', 'XHTML');
|
||||
if (!$tokens) return '';
|
||||
foreach ($tokens as $token) {
|
||||
$html .= $this->generateFromToken($token);
|
||||
@@ -61,7 +75,9 @@ class HTMLPurifier_Generator
|
||||
|
||||
} elseif ($token->type == 'empty') {
|
||||
$attr = $this->generateAttributes($token->attributes);
|
||||
return '<' . $token->name . ($attr ? ' ' : '') . $attr . ' />';
|
||||
return '<' . $token->name . ($attr ? ' ' : '') . $attr .
|
||||
( $this->_xhtml ? ' /': '' )
|
||||
. '>';
|
||||
|
||||
} elseif ($token->type == 'text') {
|
||||
return $this->escape($token->data);
|
||||
@@ -80,6 +96,11 @@ class HTMLPurifier_Generator
|
||||
function generateAttributes($assoc_array_of_attributes) {
|
||||
$html = '';
|
||||
foreach ($assoc_array_of_attributes as $key => $value) {
|
||||
if (!$this->_xhtml) {
|
||||
// remove namespaced attributes
|
||||
if (strpos($key, ':') !== false) continue;
|
||||
// also needed: check for attribute minimization
|
||||
}
|
||||
$html .= $key.'="'.$this->escape($value).'" ';
|
||||
}
|
||||
return rtrim($html);
|
||||
|
@@ -209,8 +209,7 @@ class HTMLPurifier_HTMLDefinition
|
||||
|
||||
$this->info['a']->child = $e_a_content;
|
||||
|
||||
$this->info['table']->child = new HTMLPurifier_ChildDef_Custom(
|
||||
'(caption?, (col*|colgroup*), thead?, tfoot?, (tbody+|tr+))');
|
||||
$this->info['table']->child = new HTMLPurifier_ChildDef_Table();
|
||||
|
||||
// not a real entity, watch the double underscore
|
||||
$e__row = new HTMLPurifier_ChildDef_Required('tr');
|
||||
|
@@ -4,12 +4,11 @@ require_once 'HTMLPurifier/Token.php';
|
||||
require_once 'HTMLPurifier/Encoder.php';
|
||||
require_once 'HTMLPurifier/EntityParser.php';
|
||||
|
||||
HTMLPurifier_ConfigDef::define(
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Core', 'AcceptFullDocuments', true, 'bool',
|
||||
'This parameter determines whether or not the filter should accept full '.
|
||||
'HTML documents, not just HTML fragments. When on, it will '.
|
||||
'drop all sections except the content between body. Depending on '.
|
||||
'the implementation in use, this may speed up document parse times.'
|
||||
'drop all sections except the content between body.'
|
||||
);
|
||||
|
||||
/**
|
||||
|
@@ -87,19 +87,20 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
||||
return;
|
||||
}
|
||||
|
||||
$attr = $node->hasAttributes() ?
|
||||
$this->transformAttrToAssoc($node->attributes) :
|
||||
array();
|
||||
|
||||
// We still have to make sure that the element actually IS empty
|
||||
if (!$node->childNodes->length) {
|
||||
if ($collect) {
|
||||
$tokens[] = $this->factory->createEmpty(
|
||||
$node->tagName,
|
||||
$this->transformAttrToAssoc($node->attributes)
|
||||
);
|
||||
$tokens[] = $this->factory->createEmpty($node->tagName, $attr);
|
||||
}
|
||||
} else {
|
||||
if ($collect) { // don't wrap on first iteration
|
||||
$tokens[] = $this->factory->createStart(
|
||||
$tag_name = $node->tagName, // somehow, it get's dropped
|
||||
$this->transformAttrToAssoc($node->attributes)
|
||||
$attr
|
||||
);
|
||||
}
|
||||
foreach ($node->childNodes as $node) {
|
||||
|
@@ -8,7 +8,7 @@
|
||||
* features, such as custom tags, custom parsing of text, etc.
|
||||
*/
|
||||
|
||||
HTMLPurifier_ConfigDef::define(
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Core', 'EscapeInvalidTags', false, 'bool',
|
||||
'When true, invalid tags will be written back to the document as plain '.
|
||||
'text. Otherwise, they are silently dropped.'
|
||||
|
@@ -187,6 +187,7 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
|
||||
if (!$parent_def->child->allow_empty) {
|
||||
// we need to do a double-check
|
||||
$i = $parent_index;
|
||||
array_pop($stack);
|
||||
}
|
||||
|
||||
// PROJECTED OPTIMIZATION: Process all children elements before
|
||||
@@ -255,4 +256,4 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
|
||||
|
||||
}
|
||||
|
||||
?>
|
||||
?>
|
||||
|
@@ -3,10 +3,10 @@
|
||||
require_once 'HTMLPurifier/Strategy.php';
|
||||
require_once 'HTMLPurifier/HTMLDefinition.php';
|
||||
require_once 'HTMLPurifier/IDAccumulator.php';
|
||||
require_once 'HTMLPurifier/ConfigDef.php';
|
||||
require_once 'HTMLPurifier/ConfigSchema.php';
|
||||
require_once 'HTMLPurifier/AttrContext.php';
|
||||
|
||||
HTMLPurifier_ConfigDef::define(
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Attr', 'IDBlacklist', array(), 'list',
|
||||
'Array of IDs not allowed in the document.');
|
||||
|
||||
|
@@ -1,6 +1,6 @@
|
||||
<?php
|
||||
|
||||
HTMLPurifier_ConfigDef::define(
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'URI', 'AllowedSchemes', array(
|
||||
'http' => true, // "Hypertext Transfer Protocol", nuf' said
|
||||
'https' => true, // HTTP over SSL (Secure Socket Layer)
|
||||
@@ -16,7 +16,7 @@ HTMLPurifier_ConfigDef::define(
|
||||
'prevents XSS attacks from using pseudo-schemes like javascript or mocha.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigDef::define(
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'URI', 'OverrideAllowedSchemes', true, 'bool',
|
||||
'If this is set to true (which it is by default), you can override '.
|
||||
'%URI.AllowedSchemes by simply registering a HTMLPurifier_URIScheme '.
|
||||
|
@@ -16,7 +16,7 @@
|
||||
[Parse Data]
|
||||
;; title of all the documentation
|
||||
;; legal values: any string
|
||||
title = HTMLPurifier API Documentation
|
||||
title = HTML Purifier API Documentation
|
||||
|
||||
;; parse files that start with a . like .bash_profile
|
||||
;; legal values: true, false
|
||||
|
@@ -92,6 +92,10 @@ class HTMLPurifier_AttrDef_CSSTest extends HTMLPurifier_AttrDefHarness
|
||||
$this->assertDef('position:absolute;', false);
|
||||
$this->assertDef('background-image:url(javascript:alert\(\));', false);
|
||||
|
||||
// airy input
|
||||
$this->assertDef(' font-weight : bold; color : #ff0000',
|
||||
'font-weight:bold;color:#ff0000;');
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -158,9 +158,15 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
|
||||
$uri[18] = '/a/b';
|
||||
$components[18] = array(null, null, null, '/a/b', null);
|
||||
|
||||
// it's not allowed, so generic URI should get it
|
||||
$uri[19] = '<';
|
||||
$expect_uri[19] = false;
|
||||
// result of malformed tag, gracefully handle error
|
||||
$uri[19] = 'http://www.google.com/\'>"';
|
||||
$components[19] = array(null, 'www.google.com', null, '/', null);
|
||||
$expect_uri[19] = 'http://www.google.com/';
|
||||
|
||||
// test empty
|
||||
$uri[20] = '';
|
||||
$components[20] = array(null, null, null, '', null);
|
||||
$expect_uri[20] = '';
|
||||
|
||||
foreach ($uri as $i => $value) {
|
||||
|
||||
|
@@ -42,28 +42,62 @@ class HTMLPurifier_ChildDefTest extends UnitTestCase
|
||||
|
||||
function test_custom() {
|
||||
|
||||
// the table definition
|
||||
$this->def = new HTMLPurifier_ChildDef_Custom(
|
||||
'(caption?, (col*|colgroup*), thead?, tfoot?, (tbody+|tr+))');
|
||||
'(a, b?, c*, d+, (a, b)*)');
|
||||
|
||||
$inputs[0] = '';
|
||||
$expect[0] = false;
|
||||
|
||||
$inputs[1] = '<a /><b /><c /><d /><a /><b />';
|
||||
$expect[1] = true;
|
||||
|
||||
$inputs[2] = '<a /><d>Dob</d><a /><b>foo</b><a href="moo"><b>foo</b>';
|
||||
$expect[2] = true;
|
||||
|
||||
$inputs[3] = '<a /><a />';
|
||||
$expect[3] = false;
|
||||
|
||||
}
|
||||
|
||||
function test_table() {
|
||||
|
||||
// currently inactive, awaiting augmentation
|
||||
|
||||
// the table definition
|
||||
$this->def = new HTMLPurifier_ChildDef_Table();
|
||||
|
||||
$inputs = $expect = $config = array();
|
||||
|
||||
$inputs[0] = '';
|
||||
$expect[0] = false;
|
||||
|
||||
// we really don't care what's inside, because if it turns out
|
||||
// this tr is illegal, we'll end up re-evaluating the parent node
|
||||
// anyway.
|
||||
$inputs[1] = '<tr></tr>';
|
||||
// we're using empty tags to compact the tests: under real circumstances
|
||||
// there would be contents in them
|
||||
|
||||
$inputs[1] = '<tr />';
|
||||
$expect[1] = true;
|
||||
|
||||
$inputs[2] = '<caption></caption><col></col><thead></thead>' .
|
||||
'<tfoot></tfoot><tbody></tbody>';
|
||||
$inputs[2] = '<caption /><col /><thead /><tfoot /><tbody>'.
|
||||
'<tr><td>asdf</td></tr></tbody>';
|
||||
$expect[2] = true;
|
||||
|
||||
$inputs[3] = '<col></col><col></col><col></col><tr></tr>';
|
||||
$inputs[3] = '<col /><col /><col /><tr />';
|
||||
$expect[3] = true;
|
||||
|
||||
// mixed up order
|
||||
$inputs[4] = '<col /><colgroup /><tbody /><tfoot /><thead /><tr>1</tr><caption /><tr />';
|
||||
$expect[4] = '<caption /><col /><colgroup /><thead /><tfoot /><tbody /><tr>1</tr><tr />';
|
||||
|
||||
// duplicates of singles
|
||||
// - first caption serves
|
||||
// - trailing tfoots/theads get turned into tbodys
|
||||
$inputs[5] = '<caption>1</caption><caption /><tbody /><tbody /><tfoot>1</tfoot><tfoot />';
|
||||
$expect[5] = '<caption>1</caption><tfoot>1</tfoot><tbody /><tbody /><tbody />';
|
||||
|
||||
// errant text dropped (until bubbling is implemented)
|
||||
$inputs[6] = 'foo';
|
||||
$expect[6] = false;
|
||||
|
||||
$this->assertSeries($inputs, $expect, $config);
|
||||
|
||||
}
|
||||
|
@@ -1,8 +1,8 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/ConfigDef.php';
|
||||
require_once 'HTMLPurifier/ConfigSchema.php';
|
||||
|
||||
class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
||||
class HTMLPurifier_ConfigSchemaTest extends UnitTestCase
|
||||
{
|
||||
|
||||
var $old_copy;
|
||||
@@ -13,16 +13,16 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
||||
// you pay for using Singletons. Good thing we can overload it.
|
||||
|
||||
// first, let's get a clean copy to do tests
|
||||
$our_copy = new HTMLPurifier_ConfigDef();
|
||||
$our_copy = new HTMLPurifier_ConfigSchema();
|
||||
// get the old copy
|
||||
$this->old_copy = HTMLPurifier_ConfigDef::instance();
|
||||
$this->old_copy = HTMLPurifier_ConfigSchema::instance();
|
||||
// put in our copy, and reassign to the REAL reference
|
||||
$this->our_copy =& HTMLPurifier_ConfigDef::instance($our_copy);
|
||||
$this->our_copy =& HTMLPurifier_ConfigSchema::instance($our_copy);
|
||||
}
|
||||
|
||||
function tearDown() {
|
||||
// testing is done, restore the old copy
|
||||
HTMLPurifier_ConfigDef::instance($this->old_copy);
|
||||
HTMLPurifier_ConfigSchema::instance($this->old_copy);
|
||||
}
|
||||
|
||||
function testNormal() {
|
||||
@@ -31,9 +31,9 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
||||
|
||||
// define a namespace
|
||||
$description = 'Configuration that is always available.';
|
||||
HTMLPurifier_ConfigDef::defineNamespace(
|
||||
HTMLPurifier_ConfigSchema::defineNamespace(
|
||||
'Core', $description
|
||||
); $line = __LINE__;
|
||||
);
|
||||
$this->assertIdentical($this->our_copy->defaults, array(
|
||||
'Core' => array()
|
||||
));
|
||||
@@ -41,7 +41,7 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
||||
'Core' => array()
|
||||
));
|
||||
$namespace = new HTMLPurifier_ConfigEntity_Namespace();
|
||||
$namespace->addDescription($file, $line, $description);
|
||||
$namespace->description = $description;
|
||||
$this->assertIdentical($this->our_copy->info_namespace, array(
|
||||
'Core' => $namespace
|
||||
));
|
||||
@@ -50,7 +50,7 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
||||
|
||||
// define a directive
|
||||
$description = 'This is a description of the directive.';
|
||||
HTMLPurifier_ConfigDef::define(
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Core', 'Name', 'default value', 'string',
|
||||
$description
|
||||
); $line = __LINE__;
|
||||
@@ -71,7 +71,7 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
||||
|
||||
|
||||
// define a directive in an undefined namespace
|
||||
HTMLPurifier_ConfigDef::define(
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Extension', 'Name', false, 'bool',
|
||||
'This is for an extension, but we have not defined its namespace!'
|
||||
);
|
||||
@@ -83,7 +83,7 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
||||
|
||||
// redefine a value in a valid manner
|
||||
$description = 'Alternative configuration definition';
|
||||
HTMLPurifier_ConfigDef::define(
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Core', 'Name', 'default value', 'string',
|
||||
$description
|
||||
); $line = __LINE__;
|
||||
@@ -98,7 +98,7 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
||||
|
||||
|
||||
// redefine a directive in an invalid manner
|
||||
HTMLPurifier_ConfigDef::define(
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Core', 'Name', 'different default', 'string',
|
||||
'Inconsistent default or type, cannot redefine'
|
||||
);
|
||||
@@ -109,7 +109,7 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
||||
|
||||
|
||||
// make an enumeration
|
||||
HTMLPurifier_ConfigDef::defineAllowedValues(
|
||||
HTMLPurifier_ConfigSchema::defineAllowedValues(
|
||||
'Core', 'Name', array(
|
||||
'Real Value',
|
||||
'Real Value 2'
|
||||
@@ -128,7 +128,7 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
||||
|
||||
|
||||
// redefinition of enumeration is cumulative
|
||||
HTMLPurifier_ConfigDef::defineAllowedValues(
|
||||
HTMLPurifier_ConfigSchema::defineAllowedValues(
|
||||
'Core', 'Name', array(
|
||||
'Real Value 3',
|
||||
)
|
||||
@@ -143,7 +143,7 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
||||
|
||||
|
||||
// cannot define enumeration for undefined directive
|
||||
HTMLPurifier_ConfigDef::defineAllowedValues(
|
||||
HTMLPurifier_ConfigSchema::defineAllowedValues(
|
||||
'Core', 'Foobar', array(
|
||||
'Real Value 9',
|
||||
)
|
||||
@@ -155,7 +155,7 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
||||
|
||||
|
||||
// test defining value aliases for an enumerated value
|
||||
HTMLPurifier_ConfigDef::defineValueAliases(
|
||||
HTMLPurifier_ConfigSchema::defineValueAliases(
|
||||
'Core', 'Name', array(
|
||||
'Aliased Value' => 'Real Value'
|
||||
)
|
||||
@@ -170,7 +170,7 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
||||
|
||||
|
||||
// redefine should be cumulative
|
||||
HTMLPurifier_ConfigDef::defineValueAliases(
|
||||
HTMLPurifier_ConfigSchema::defineValueAliases(
|
||||
'Core', 'Name', array(
|
||||
'Aliased Value 2' => 'Real Value 2'
|
||||
)
|
||||
@@ -185,7 +185,7 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
||||
|
||||
|
||||
// cannot create alias to not-allowed value
|
||||
HTMLPurifier_ConfigDef::defineValueAliases(
|
||||
HTMLPurifier_ConfigSchema::defineValueAliases(
|
||||
'Core', 'Name', array(
|
||||
'Aliased Value 3' => 'Invalid Value'
|
||||
)
|
||||
@@ -197,7 +197,7 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
||||
|
||||
|
||||
// cannot create alias for already allowed value
|
||||
HTMLPurifier_ConfigDef::defineValueAliases(
|
||||
HTMLPurifier_ConfigSchema::defineValueAliases(
|
||||
'Core', 'Name', array(
|
||||
'Real Value' => 'Real Value 2'
|
||||
)
|
||||
@@ -209,7 +209,7 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
||||
|
||||
|
||||
// define a directive with an invalid type
|
||||
HTMLPurifier_ConfigDef::define(
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Core', 'Foobar', false, 'omen',
|
||||
'Omen is not a valid type, so we reject this.'
|
||||
);
|
||||
@@ -221,7 +221,7 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
||||
|
||||
|
||||
// define a directive with inconsistent type
|
||||
HTMLPurifier_ConfigDef::define(
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Core', 'Foobaz', 10, 'string',
|
||||
'If we say string, we should mean it, not integer 10.'
|
||||
);
|
||||
@@ -231,6 +231,24 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
||||
$this->swallowErrors();
|
||||
|
||||
|
||||
// define a directive with bad characters
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Core', 'Core.Attr', 10, 'int',
|
||||
'No periods! >:-('
|
||||
);
|
||||
|
||||
$this->assertError('Directive name must be alphanumeric');
|
||||
$this->assertNoErrors();
|
||||
$this->swallowErrors();
|
||||
|
||||
// define a namespace with bad characters
|
||||
HTMLPurifier_ConfigSchema::defineNamespace(
|
||||
'Foobar&Gromit', $description
|
||||
);
|
||||
|
||||
$this->assertError('Namespace name must be alphanumeric');
|
||||
$this->assertNoErrors();
|
||||
$this->swallowErrors();
|
||||
|
||||
}
|
||||
|
@@ -8,37 +8,43 @@ class HTMLPurifier_ConfigTest extends UnitTestCase
|
||||
var $our_copy, $old_copy;
|
||||
|
||||
function setUp() {
|
||||
$our_copy = new HTMLPurifier_ConfigDef();
|
||||
$this->old_copy = HTMLPurifier_ConfigDef::instance();
|
||||
$this->our_copy =& HTMLPurifier_ConfigDef::instance($our_copy);
|
||||
$our_copy = new HTMLPurifier_ConfigSchema();
|
||||
$this->old_copy = HTMLPurifier_ConfigSchema::instance();
|
||||
$this->our_copy =& HTMLPurifier_ConfigSchema::instance($our_copy);
|
||||
}
|
||||
|
||||
function tearDown() {
|
||||
HTMLPurifier_ConfigDef::instance($this->old_copy);
|
||||
HTMLPurifier_ConfigSchema::instance($this->old_copy);
|
||||
}
|
||||
|
||||
function test() {
|
||||
|
||||
HTMLPurifier_ConfigDef::defineNamespace('Core', 'Corestuff');
|
||||
HTMLPurifier_ConfigDef::defineNamespace('Attr', 'Attributes');
|
||||
HTMLPurifier_ConfigDef::defineNamespace('Extension', 'Extensible');
|
||||
HTMLPurifier_ConfigSchema::defineNamespace('Core', 'Corestuff');
|
||||
HTMLPurifier_ConfigSchema::defineNamespace('Attr', 'Attributes');
|
||||
HTMLPurifier_ConfigSchema::defineNamespace('Extension', 'Extensible');
|
||||
|
||||
HTMLPurifier_ConfigDef::define(
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Core', 'Key', false, 'bool', 'A boolean directive.'
|
||||
);
|
||||
HTMLPurifier_ConfigDef::define(
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Attr', 'Key', 42, 'int', 'An integer directive.'
|
||||
);
|
||||
HTMLPurifier_ConfigDef::define(
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Extension', 'Pert', 'foo', 'string', 'A string directive.'
|
||||
);
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Core', 'Encoding', 'utf-8', 'istring', 'Case insensitivity!'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigDef::defineAllowedValues(
|
||||
HTMLPurifier_ConfigSchema::defineAllowedValues(
|
||||
'Extension', 'Pert', array('foo', 'moo')
|
||||
);
|
||||
HTMLPurifier_ConfigDef::defineValueAliases(
|
||||
HTMLPurifier_ConfigSchema::defineValueAliases(
|
||||
'Extension', 'Pert', array('cow' => 'moo')
|
||||
);
|
||||
HTMLPurifier_ConfigSchema::defineAllowedValues(
|
||||
'Core', 'Encoding', array('utf-8', 'iso-8859-1')
|
||||
);
|
||||
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
|
||||
@@ -80,6 +86,11 @@ class HTMLPurifier_ConfigTest extends UnitTestCase
|
||||
$this->assertNoErrors();
|
||||
$this->assertIdentical($config->get('Extension', 'Pert'), 'moo');
|
||||
|
||||
// case-insensitive attempt to set value that is allowed
|
||||
$config->set('Core', 'Encoding', 'ISO-8859-1');
|
||||
$this->assertNoErrors();
|
||||
$this->assertIdentical($config->get('Core', 'Encoding'), 'iso-8859-1');
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -14,8 +14,8 @@ class HTMLPurifier_EncoderTest extends UnitTestCase
|
||||
|
||||
function assertCleanUTF8($string, $expect = null) {
|
||||
if ($expect === null) $expect = $string;
|
||||
$this->assertIdentical($this->Encoder->cleanUTF8($string), $expect);
|
||||
$this->assertIdentical($this->Encoder->cleanUTF8($string, true), $expect);
|
||||
$this->assertIdentical($this->Encoder->cleanUTF8($string), $expect, 'iconv: %s');
|
||||
$this->assertIdentical($this->Encoder->cleanUTF8($string, true), $expect, 'PHP: %s');
|
||||
}
|
||||
|
||||
function test_cleanUTF8() {
|
||||
@@ -46,6 +46,14 @@ class HTMLPurifier_EncoderTest extends UnitTestCase
|
||||
$this->Encoder->convertToUTF8("\xF6", $config),
|
||||
"\xC3\xB6"
|
||||
);
|
||||
|
||||
$config->set('Test', 'ForceNoIconv', true);
|
||||
|
||||
$this->assertIdentical(
|
||||
$this->Encoder->convertToUTF8("\xF6", $config),
|
||||
"\xC3\xB6"
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
function test_convertFromUTF8() {
|
||||
@@ -64,6 +72,14 @@ class HTMLPurifier_EncoderTest extends UnitTestCase
|
||||
$this->Encoder->convertFromUTF8("\xC3\xB6", $config),
|
||||
"\xF6"
|
||||
);
|
||||
|
||||
$config->set('Test', 'ForceNoIconv', true);
|
||||
|
||||
$this->assertIdentical(
|
||||
$this->Encoder->convertFromUTF8("\xC3\xB6", $config),
|
||||
"\xF6"
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -52,10 +52,8 @@ class HTMLPurifier_GeneratorTest extends UnitTestCase
|
||||
$inputs[7] = new HTMLPurifier_Token_Text($theta_char);
|
||||
$expect[7] = $theta_char;
|
||||
|
||||
$default_config = HTMLPurifier_Config::createDefault();
|
||||
foreach ($inputs as $i => $input) {
|
||||
if (!isset($config[$i])) $config[$i] = $default_config;
|
||||
$result = $this->gen->generateFromToken($input, $config[$i]);
|
||||
$result = $this->gen->generateFromToken($input);
|
||||
$this->assertEqual($result, $expect[$i]);
|
||||
paintIf($result, $result != $expect[$i]);
|
||||
}
|
||||
@@ -122,6 +120,34 @@ class HTMLPurifier_GeneratorTest extends UnitTestCase
|
||||
|
||||
}
|
||||
|
||||
var $config;
|
||||
function assertGeneration($tokens, $expect) {
|
||||
$result = $this->gen->generateFromTokens($tokens, $this->config);
|
||||
$this->assertEqual($expect, $result);
|
||||
}
|
||||
|
||||
function test_generateFromTokens_XHTMLoff() {
|
||||
$this->config = HTMLPurifier_Config::createDefault();
|
||||
$this->config->set('Core', 'XHTML', false);
|
||||
|
||||
// omit trailing slash
|
||||
$this->assertGeneration(
|
||||
array( new HTMLPurifier_Token_Empty('br') ),
|
||||
'<br>'
|
||||
);
|
||||
|
||||
// there should be a test for attribute minimization, but it is
|
||||
// impossible for something like that to happen due to our current
|
||||
// definitions! fix it later
|
||||
|
||||
// namespaced attributes must be dropped
|
||||
$this->assertGeneration(
|
||||
array( new HTMLPurifier_Token_Start('p', array('xml:lang'=>'fr')) ),
|
||||
'<p>'
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
24
tests/HTMLPurifier/Test.php
Normal file
24
tests/HTMLPurifier/Test.php
Normal file
@@ -0,0 +1,24 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier.php';
|
||||
|
||||
// integration test
|
||||
|
||||
class HTMLPurifier_Test extends UnitTestCase
|
||||
{
|
||||
var $purifier;
|
||||
|
||||
function assertPurification($input, $expect = null) {
|
||||
if ($expect === null) $expect = $input;
|
||||
$result = $this->purifier->purify($input);
|
||||
$this->assertIdentical($expect, $result);
|
||||
}
|
||||
|
||||
function test() {
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$this->purifier = new HTMLPurifier($config);
|
||||
$this->assertPurification("Null byte\0", "Null byte");
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
@@ -40,7 +40,7 @@ require_once 'HTMLPurifier.php';
|
||||
// define callable test files
|
||||
$test_files = array();
|
||||
$test_files[] = 'ConfigTest.php';
|
||||
$test_files[] = 'ConfigDefTest.php';
|
||||
$test_files[] = 'ConfigSchemaTest.php';
|
||||
$test_files[] = 'LexerTest.php';
|
||||
$test_files[] = 'Lexer/DirectLexTest.php';
|
||||
$test_files[] = 'TokenTest.php';
|
||||
@@ -88,6 +88,7 @@ $test_files[] = 'URISchemeRegistryTest.php';
|
||||
$test_files[] = 'URISchemeTest.php';
|
||||
$test_files[] = 'EncoderTest.php';
|
||||
$test_files[] = 'EntityParserTest.php';
|
||||
$test_files[] = 'Test.php';
|
||||
|
||||
if (version_compare(PHP_VERSION, '5', '>=')) {
|
||||
$test_files[] = 'TokenFactoryTest.php';
|
||||
|
Reference in New Issue
Block a user