diff --git a/library/HTMLPurifier.php b/library/HTMLPurifier.php index fc4c51e0..7596b0ed 100644 --- a/library/HTMLPurifier.php +++ b/library/HTMLPurifier.php @@ -3,7 +3,7 @@ /*! * @mainpage * - * HTMLPurifier is a purification class that will take an arbitrary snippet of + * HTMLPurifier is an HTML filter that will take an arbitrary snippet of * HTML and rigorously test, validate and filter it into a version that * is safe for output onto webpages. It achieves this by: * @@ -15,7 +15,10 @@ * -# Validating attributes of the nodes; and * -# Generating HTML from the purified tokens. * - * See /docs/spec.txt for more details. + * However, most users will only need to interface with the HTMLPurifier + * class, so this massive amount of infrastructure is usually concealed. + * If you plan on working with the internals, be sure to include + * HTMLPurifier_ConfigDef and HTMLPurifier_Config. */ require_once 'HTMLPurifier/ConfigDef.php'; @@ -37,29 +40,37 @@ class HTMLPurifier var $config; + var $lexer, $strategy, $generator; + /** * Initializes the purifier. - * @param $config Configuration for all instances of the purifier + * @param $config Optional HTMLPurifier_Config object for all instances of + * the purifier, if omitted, a default configuration is + * supplied. */ function HTMLPurifier($config = null) { $this->config = $config ? $config : HTMLPurifier_Config::createDefault(); + + $this->lexer = HTMLPurifier_Lexer::create(); + $this->strategy = new HTMLPurifier_Strategy_Core(); + $this->generator = new HTMLPurifier_Generator(); } /** - * Purifies HTML. + * Filters an HTML snippet/document to be XSS-free and standards-compliant. * * @param $html String of HTML to purify - * @param $config HTMLPurifier_Config object for this specific round + * @param $config HTMLPurifier_Config object for this operation, if omitted, + * defaults to the config object specified during this + * object's construction. * @return Purified HTML */ function purify($html, $config = null) { $config = $config ? $config : $this->config; - $lexer = HTMLPurifier_Lexer::create(); - $strategy = new HTMLPurifier_Strategy_Core(); - $generator = new HTMLPurifier_Generator(); - return $generator->generateFromTokens( - $strategy->execute( - $lexer->tokenizeHTML($html, $config), + return + $this->generator->generateFromTokens( + $this->strategy->execute( + $this->lexer->tokenizeHTML($html, $config), $config ), $config diff --git a/library/HTMLPurifier/AttrContext.php b/library/HTMLPurifier/AttrContext.php index 2dae4ad3..c3316737 100644 --- a/library/HTMLPurifier/AttrContext.php +++ b/library/HTMLPurifier/AttrContext.php @@ -5,10 +5,21 @@ * * All it is is a data-structure that holds objects that accumulate state, like * HTMLPurifier_IDAccumulator. + * + * @param Many functions that accept this object have it as a mandatory + * parameter, even when there is no use for it. Though this is + * for the same reasons as why HTMLPurifier_Config is a mandatory + * parameter, it is also because you cannot assign a default value + * to a parameter passed by reference (passing by reference is essential + * for context to work in PHP 4). */ class HTMLPurifier_AttrContext { + /** + * Contains an HTMLPurifier_IDAccumulator, which keeps track of used IDs. + * @public + */ var $id_accumulator; } diff --git a/library/HTMLPurifier/AttrDef.php b/library/HTMLPurifier/AttrDef.php index 348c07fe..56d5101e 100644 --- a/library/HTMLPurifier/AttrDef.php +++ b/library/HTMLPurifier/AttrDef.php @@ -2,15 +2,50 @@ require_once 'HTMLPurifier/AttrContext.php'; -// AttrDef = Attribute Definition +/** + * Base class for all validating attribute definitions. + * + * This family of classes forms the core for not only HTML attribute validation, + * but also any sort of string that needs to be validated or cleaned (which + * means CSS properties and composite definitions are defined here too). + * Besides defining (through code) what precisely makes the string valid, + * subclasses are also responsible for cleaning the code if possible. + */ + class HTMLPurifier_AttrDef { - function HTMLPurifier_AttrDef() {} + /** + * Abstract function defined for functions that validate and clean strings. + * + * This function forms the basis for all the subclasses: they must + * define this method. + * + * @public + * @param $string String to be validated and cleaned. + * @param $config Mandatory HTMLPurifier_Config object. + * @param $context Mandatory HTMLPurifier_AttrContext object. + */ function validate($string, $config, &$context) { trigger_error('Cannot call abstract function', E_USER_ERROR); } + /** + * Convenience method that parses a string as if it were CDATA. + * + * This method process a string in the manner specified at + * by removing + * leading and trailing whitespace, ignoring line feeds, and replacing + * carriage returns and tabs with spaces. While most useful for HTML + * attributes specified as CDATA, it can also be applied to most CSS + * values. + * + * @note This method is not entirely standards compliant, as trim() removes + * more types of whitespace than specified in the spec. In practice, + * this is rarely a problem. + * + * @public + */ function parseCDATA($string) { $string = trim($string); $string = str_replace("\n", '', $string); diff --git a/library/HTMLPurifier/AttrTransform.php b/library/HTMLPurifier/AttrTransform.php index 8df5d3d2..0b68df68 100644 --- a/library/HTMLPurifier/AttrTransform.php +++ b/library/HTMLPurifier/AttrTransform.php @@ -1,12 +1,31 @@ get('Attr', 'DefaultTextDir'); - return $attributes; + function transform($attr, $config) { + if (isset($attr['dir'])) return $attributes; + $attr['dir'] = $config->get('Attr', 'DefaultTextDir'); + return $attr; } } diff --git a/library/HTMLPurifier/AttrTransform/ImgRequired.php b/library/HTMLPurifier/AttrTransform/ImgRequired.php index 7fff1c78..5c47c084 100644 --- a/library/HTMLPurifier/AttrTransform/ImgRequired.php +++ b/library/HTMLPurifier/AttrTransform/ImgRequired.php @@ -22,23 +22,23 @@ HTMLPurifier_ConfigDef::define( class HTMLPurifier_AttrTransform_ImgRequired extends HTMLPurifier_AttrTransform { - function transform($attributes, $config) { + function transform($attr, $config) { $src = true; - if (!isset($attributes['src'])) { - $attributes['src'] = $config->get('Attr', 'DefaultInvalidImage'); + if (!isset($attr['src'])) { + $attr['src'] = $config->get('Attr', 'DefaultInvalidImage'); $src = false; } - if (!isset($attributes['alt'])) { + if (!isset($attr['alt'])) { if ($src) { - $attributes['alt'] = basename($attributes['src']); + $attr['alt'] = basename($attr['src']); } else { - $attributes['alt'] = $config->get('Attr', 'DefaultInvalidImageAlt'); + $attr['alt'] = $config->get('Attr', 'DefaultInvalidImageAlt'); } } - return $attributes; + return $attr; } diff --git a/library/HTMLPurifier/ChildDef.php b/library/HTMLPurifier/ChildDef.php index 28d9b9d2..970de2f7 100644 --- a/library/HTMLPurifier/ChildDef.php +++ b/library/HTMLPurifier/ChildDef.php @@ -21,6 +21,9 @@ HTMLPurifier_ConfigDef::define( 'preserving child nodes.' ); +/** + * Class that defines allowed child nodes and validates tokens against this. + */ class HTMLPurifier_ChildDef { var $type; diff --git a/library/HTMLPurifier/Config.php b/library/HTMLPurifier/Config.php index 6eb43227..e27978cd 100644 --- a/library/HTMLPurifier/Config.php +++ b/library/HTMLPurifier/Config.php @@ -1,6 +1,14 @@