diff --git a/library/HTMLPurifier.php b/library/HTMLPurifier.php
index fc4c51e0..7596b0ed 100644
--- a/library/HTMLPurifier.php
+++ b/library/HTMLPurifier.php
@@ -3,7 +3,7 @@
/*!
* @mainpage
*
- * HTMLPurifier is a purification class that will take an arbitrary snippet of
+ * HTMLPurifier is an HTML filter that will take an arbitrary snippet of
* HTML and rigorously test, validate and filter it into a version that
* is safe for output onto webpages. It achieves this by:
*
@@ -15,7 +15,10 @@
* -# Validating attributes of the nodes; and
* -# Generating HTML from the purified tokens.
*
- * See /docs/spec.txt for more details.
+ * However, most users will only need to interface with the HTMLPurifier
+ * class, so this massive amount of infrastructure is usually concealed.
+ * If you plan on working with the internals, be sure to include
+ * HTMLPurifier_ConfigDef and HTMLPurifier_Config.
*/
require_once 'HTMLPurifier/ConfigDef.php';
@@ -37,29 +40,37 @@ class HTMLPurifier
var $config;
+ var $lexer, $strategy, $generator;
+
/**
* Initializes the purifier.
- * @param $config Configuration for all instances of the purifier
+ * @param $config Optional HTMLPurifier_Config object for all instances of
+ * the purifier, if omitted, a default configuration is
+ * supplied.
*/
function HTMLPurifier($config = null) {
$this->config = $config ? $config : HTMLPurifier_Config::createDefault();
+
+ $this->lexer = HTMLPurifier_Lexer::create();
+ $this->strategy = new HTMLPurifier_Strategy_Core();
+ $this->generator = new HTMLPurifier_Generator();
}
/**
- * Purifies HTML.
+ * Filters an HTML snippet/document to be XSS-free and standards-compliant.
*
* @param $html String of HTML to purify
- * @param $config HTMLPurifier_Config object for this specific round
+ * @param $config HTMLPurifier_Config object for this operation, if omitted,
+ * defaults to the config object specified during this
+ * object's construction.
* @return Purified HTML
*/
function purify($html, $config = null) {
$config = $config ? $config : $this->config;
- $lexer = HTMLPurifier_Lexer::create();
- $strategy = new HTMLPurifier_Strategy_Core();
- $generator = new HTMLPurifier_Generator();
- return $generator->generateFromTokens(
- $strategy->execute(
- $lexer->tokenizeHTML($html, $config),
+ return
+ $this->generator->generateFromTokens(
+ $this->strategy->execute(
+ $this->lexer->tokenizeHTML($html, $config),
$config
),
$config
diff --git a/library/HTMLPurifier/AttrContext.php b/library/HTMLPurifier/AttrContext.php
index 2dae4ad3..c3316737 100644
--- a/library/HTMLPurifier/AttrContext.php
+++ b/library/HTMLPurifier/AttrContext.php
@@ -5,10 +5,21 @@
*
* All it is is a data-structure that holds objects that accumulate state, like
* HTMLPurifier_IDAccumulator.
+ *
+ * @param Many functions that accept this object have it as a mandatory
+ * parameter, even when there is no use for it. Though this is
+ * for the same reasons as why HTMLPurifier_Config is a mandatory
+ * parameter, it is also because you cannot assign a default value
+ * to a parameter passed by reference (passing by reference is essential
+ * for context to work in PHP 4).
*/
class HTMLPurifier_AttrContext
{
+ /**
+ * Contains an HTMLPurifier_IDAccumulator, which keeps track of used IDs.
+ * @public
+ */
var $id_accumulator;
}
diff --git a/library/HTMLPurifier/AttrDef.php b/library/HTMLPurifier/AttrDef.php
index 348c07fe..56d5101e 100644
--- a/library/HTMLPurifier/AttrDef.php
+++ b/library/HTMLPurifier/AttrDef.php
@@ -2,15 +2,50 @@
require_once 'HTMLPurifier/AttrContext.php';
-// AttrDef = Attribute Definition
+/**
+ * Base class for all validating attribute definitions.
+ *
+ * This family of classes forms the core for not only HTML attribute validation,
+ * but also any sort of string that needs to be validated or cleaned (which
+ * means CSS properties and composite definitions are defined here too).
+ * Besides defining (through code) what precisely makes the string valid,
+ * subclasses are also responsible for cleaning the code if possible.
+ */
+
class HTMLPurifier_AttrDef
{
- function HTMLPurifier_AttrDef() {}
+ /**
+ * Abstract function defined for functions that validate and clean strings.
+ *
+ * This function forms the basis for all the subclasses: they must
+ * define this method.
+ *
+ * @public
+ * @param $string String to be validated and cleaned.
+ * @param $config Mandatory HTMLPurifier_Config object.
+ * @param $context Mandatory HTMLPurifier_AttrContext object.
+ */
function validate($string, $config, &$context) {
trigger_error('Cannot call abstract function', E_USER_ERROR);
}
+ /**
+ * Convenience method that parses a string as if it were CDATA.
+ *
+ * This method process a string in the manner specified at
+ * by removing
+ * leading and trailing whitespace, ignoring line feeds, and replacing
+ * carriage returns and tabs with spaces. While most useful for HTML
+ * attributes specified as CDATA, it can also be applied to most CSS
+ * values.
+ *
+ * @note This method is not entirely standards compliant, as trim() removes
+ * more types of whitespace than specified in the spec. In practice,
+ * this is rarely a problem.
+ *
+ * @public
+ */
function parseCDATA($string) {
$string = trim($string);
$string = str_replace("\n", '', $string);
diff --git a/library/HTMLPurifier/AttrTransform.php b/library/HTMLPurifier/AttrTransform.php
index 8df5d3d2..0b68df68 100644
--- a/library/HTMLPurifier/AttrTransform.php
+++ b/library/HTMLPurifier/AttrTransform.php
@@ -1,12 +1,31 @@
get('Attr', 'DefaultTextDir');
- return $attributes;
+ function transform($attr, $config) {
+ if (isset($attr['dir'])) return $attributes;
+ $attr['dir'] = $config->get('Attr', 'DefaultTextDir');
+ return $attr;
}
}
diff --git a/library/HTMLPurifier/AttrTransform/ImgRequired.php b/library/HTMLPurifier/AttrTransform/ImgRequired.php
index 7fff1c78..5c47c084 100644
--- a/library/HTMLPurifier/AttrTransform/ImgRequired.php
+++ b/library/HTMLPurifier/AttrTransform/ImgRequired.php
@@ -22,23 +22,23 @@ HTMLPurifier_ConfigDef::define(
class HTMLPurifier_AttrTransform_ImgRequired extends HTMLPurifier_AttrTransform
{
- function transform($attributes, $config) {
+ function transform($attr, $config) {
$src = true;
- if (!isset($attributes['src'])) {
- $attributes['src'] = $config->get('Attr', 'DefaultInvalidImage');
+ if (!isset($attr['src'])) {
+ $attr['src'] = $config->get('Attr', 'DefaultInvalidImage');
$src = false;
}
- if (!isset($attributes['alt'])) {
+ if (!isset($attr['alt'])) {
if ($src) {
- $attributes['alt'] = basename($attributes['src']);
+ $attr['alt'] = basename($attr['src']);
} else {
- $attributes['alt'] = $config->get('Attr', 'DefaultInvalidImageAlt');
+ $attr['alt'] = $config->get('Attr', 'DefaultInvalidImageAlt');
}
}
- return $attributes;
+ return $attr;
}
diff --git a/library/HTMLPurifier/ChildDef.php b/library/HTMLPurifier/ChildDef.php
index 28d9b9d2..970de2f7 100644
--- a/library/HTMLPurifier/ChildDef.php
+++ b/library/HTMLPurifier/ChildDef.php
@@ -21,6 +21,9 @@ HTMLPurifier_ConfigDef::define(
'preserving child nodes.'
);
+/**
+ * Class that defines allowed child nodes and validates tokens against this.
+ */
class HTMLPurifier_ChildDef
{
var $type;
diff --git a/library/HTMLPurifier/Config.php b/library/HTMLPurifier/Config.php
index 6eb43227..e27978cd 100644
--- a/library/HTMLPurifier/Config.php
+++ b/library/HTMLPurifier/Config.php
@@ -1,6 +1,14 @@