Begin adding Doxygen documentation.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@98 48356398-32a2-884e-a903-53898d9a118a
2025-08-11 00:24:03 +02:00 · 2006-07-23 03:43:53 +00:00
parent 14f481bcf6
commit 728848c4c7
3 changed files with 339 additions and 10 deletions
--- a/library/HTMLPurifier.php
+++ b/library/HTMLPurifier.php
@@ -1,22 +1,62 @@
 <?php

+/*!
+ * @mainpage
+ * 
+ * HTMLPurifier is a purification class that will take an arbitrary snippet of
+ * HTML and rigorously test, validate and filter it into a version that
+ * is safe for output onto webpages. It achieves this by:
+ * 
+ *  -# Lexing (parsing into tokens) the document,
+ *  -# Removing all elements not in the whitelist,
+ *  -# Making the tokens well-formed,
+ *  -# Fixing the nesting of the nodes,
+ *  -# Validating attributes of the nodes, and
+ *  -# Generating HTML from the purified tokens.
+ * 
+ * See /docs/spec.txt for more details.
+ */
+
 require_once 'HTMLPurifier/Lexer.php';
 require_once 'HTMLPurifier/Definition.php';
 require_once 'HTMLPurifier/Generator.php';

+/**
+ * Main library execution class.
+ * 
+ * Facade that performs calls to the HTMLPurifier_Lexer,
+ * HTMLPurifier_Definition and HTMLPurifier_Generator subsystems in order to
+ * purify HTML.
+ */
 class HTMLPurifier
 {
    
-    var $lexer;
-    var $definition;
-    var $generator;
+    var $lexer;         /*!< @brief Instance of HTMLPurifier_Lexer concrete
+                                    implementation. */
+    var $definition;    /*!< @brief Instance of HTMLPurifier_Definition. */
+    var $generator;     /*!< @brief Instance of HTMLPurifier_Generator. */
    
+    /**
+     * Initializes the purifier.
+     * 
+     * The constructor instantiates all necessary sub-objects to do the job,
+     * because creating some of them (esp. HTMLPurifier_Definition) can be
+     * expensive.
+     * 
+     * @todo Accept Policy object to define configuration.
+     */
    function HTMLPurifier() {
-        $this->lexer        = new HTMLPurifier_Lexer();
+        $this->lexer        = new HTMLPurifier_Lexer::create();
        $this->definition   = new HTMLPurifier_Definition();
        $this->generator    = new HTMLPurifier_Generator();
    }
    
+    /**
+     * Purifies HTML.
+     * 
+     * @param $html String of HTML to purify
+     * @return Purified HTML
+     */
    function purify($html) {
        $tokens = $this->lexer->tokenizeHTML($html);
        $tokens = $this->definition->purifyTokens($tokens);
--- a/library/HTMLPurifier/Lexer.php
+++ b/library/HTMLPurifier/Lexer.php
@@ -1,21 +1,74 @@
 <?php

-/**
- * Forgivingly lexes HTML (not XML, since it doesn't adhere to spec exactly)
- */
-
 require_once 'HTMLPurifier/Token.php';

+/**
+ * Forgivingly lexes HTML (SGML-style) markup into tokens.
+ * 
+ * The lexer parses a string of SGML-style markup and converts them into
+ * corresponding tokens.  It doesn't check for well-formedness, although it's
+ * internal mechanism may make this automatic (such as the case of
+ * HTMLPurifier_Lexer_DOMLex).  There are several implementations to choose
+ * from.
+ * 
+ * The lexer is HTML-oriented: it might work with XML, but it's not
+ * recommended, as we adhere to a subset of the specification for optimization
+ * reasons.
+ * 
+ * This class cannot be directly instantiated, but you may use create() to
+ * retrieve a default copy of the lexer.
+ * 
+ * @note
+ * We use tokens rather than create a DOM representation because DOM would:
+ * 
+ * @note
+ *  -# Require more processing power to create,
+ *  -# Require recursion to iterate,
+ *  -# Must be compatible with PHP 5's DOM (otherwise duplication),
+ *  -# Has the entire document structure (html and body not needed), and
+ *  -# Has unknown readability improvement.
+ * 
+ * @note
+ * What the last item means is that the functions for manipulating tokens are
+ * already fairly compact, and when well-commented, more abstraction may not
+ * be needed.
+ * 
+ * @see HTMLPurifier_Token
+ */
 class HTMLPurifier_Lexer
 {
    
+    /**
+     * Lexes an HTML string into tokens.
+     * 
+     * @param $string String HTML.
+     * @return HTMLPurifier_Token array representation of HTML.
+     */
    function tokenizeHTML($string) {
        trigger_error('Call to abstract class', E_USER_ERROR);
    }
    
-    // we don't really care if it's a reference or a copy
-    
+    /**
+     * Retrieves or sets the default Lexer as a Prototype Factory.
+     * 
+     * Depending on what PHP version you are running, the abstract base
+     * Lexer class will determine which concrete Lexer is best for you:
+     * HTMLPurifier_Lexer_DirectLex for PHP 4, and HTMLPurifier_Lexer_DOMLex
+     * for PHP 5 and beyond.
+     * 
+     * Passing the optional prototype lexer parameter will override the
+     * default with your own implementation.  A copy/reference of the prototype
+     * lexer will now be returned when you request a new lexer.
+     * 
+     * @note
+     * Though it is possible to call this factory method from subclasses,
+     * such usage is not recommended.
+     * 
+     * @param $prototype Optional prototype lexer.
+     * @return Concrete lexer.
+     */
    function create($prototype = null) {
+        // we don't really care if it's a reference or a copy
        static $lexer = null;
        if ($prototype) {
            $lexer = $prototype;