Outfit a bunch of other classes so they can accept a configuration object. Put in basic scaffolding for extractBody() functionality.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@257 48356398-32a2-884e-a903-53898d9a118a
2025-10-16 14:36:04 +02:00 · 2006-08-15 00:31:12 +00:00
parent 24c64dbbac
commit d7140f2e05
14 changed files with 75 additions and 26 deletions
--- a/library/HTMLPurifier/ChildDef.php
+++ b/library/HTMLPurifier/ChildDef.php
@@ -137,7 +137,7 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
                    $is_deleting = true;
                    if ($pcdata_allowed && $escape_invalid_children) {
                        $result[] = new HTMLPurifier_Token_Text(
-                            $this->gen->generateFromToken($token)
+                            $this->gen->generateFromToken($token, $config)
                        );
                    }
                    continue;
@@ -148,7 +148,7 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
            } elseif ($pcdata_allowed && $escape_invalid_children) {
                $result[] =
                    new HTMLPurifier_Token_Text(
-                        $this->gen->generateFromToken( $token )
+                        $this->gen->generateFromToken( $token, $config )
                    );
            } else {
                // drop silently
--- a/library/HTMLPurifier/Generator.php
+++ b/library/HTMLPurifier/Generator.php
@@ -5,26 +5,28 @@
 class HTMLPurifier_Generator
 {
    
-    function generateFromTokens($tokens) {
+    // only unit tests may omit configuration: internals MUST pass config
+    function generateFromTokens($tokens, $config = null) {
        $html = '';
+        if (!$config) $config = HTMLPurifier_Config::createDefault();
        if (!$tokens) return '';
        foreach ($tokens as $token) {
-            $html .= $this->generateFromToken($token);
+            $html .= $this->generateFromToken($token, $config);
        }
        return $html;
    }
    
-    function generateFromToken($token) {
+    function generateFromToken($token, $config) {
        if (!isset($token->type)) return '';
        if ($token->type == 'start') {
-            $attr = $this->generateAttributes($token->attributes);
+            $attr = $this->generateAttributes($token->attributes, $config);
            return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
            
        } elseif ($token->type == 'end') {
            return '</' . $token->name . '>';
            
        } elseif ($token->type == 'empty') {
-            $attr = $this->generateAttributes($token->attributes);
+            $attr = $this->generateAttributes($token->attributes, $config);
             return '<' . $token->name . ($attr ? ' ' : '') . $attr . ' />';
            
        } elseif ($token->type == 'text') {
@@ -36,7 +38,7 @@ class HTMLPurifier_Generator
        }
    }
    
-    function generateAttributes($assoc_array_of_attributes) {
+    function generateAttributes($assoc_array_of_attributes, $config) {
        $html = '';
        foreach ($assoc_array_of_attributes as $key => $value) {
            $html .= $key.'="'.htmlspecialchars($value, ENT_COMPAT, 'UTF-8').'" ';
--- a/library/HTMLPurifier/Lexer.php
+++ b/library/HTMLPurifier/Lexer.php
@@ -2,6 +2,14 @@

 require_once 'HTMLPurifier/Token.php';

+HTMLPurifier_ConfigDef::define(
+    'Core', 'AcceptFullDocuments', true,
+    'This parameter determines whether or not the filter should accept full '.
+    'HTML documents, not just HTML fragments.  When on, it will '.
+    'drop all sections except the content between body.  Depending on '.
+    'the implementation in use, this may speed up document parse times.'
+);
+
 /**
 * Forgivingly lexes HTML (SGML-style) markup into tokens.
 * 
@@ -52,7 +60,7 @@ class HTMLPurifier_Lexer
     * @param $string String HTML.
     * @return HTMLPurifier_Token array representation of HTML.
     */
-    function tokenizeHTML($string) {
+    function tokenizeHTML($string, $config = null) {
        trigger_error('Call to abstract class', E_USER_ERROR);
    }
    
@@ -228,6 +236,14 @@ class HTMLPurifier_Lexer
        return htmlspecialchars($matches[1], ENT_COMPAT, 'UTF-8');
    }
    
+    /**
+     * Takes a string of HTML (fragment or document) and returns the content
+     */
+    function extractBody($html) {
+        if (strpos($html, '<html') === false) return $html; // already fragment
+        // ...
+    }
+    
 }

 ?>
--- a/library/HTMLPurifier/Lexer/DOMLex.php
+++ b/library/HTMLPurifier/Lexer/DOMLex.php
@@ -25,7 +25,9 @@ require_once 'HTMLPurifier/Lexer.php';
 class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
 {
    
-    public function tokenizeHTML($string) {
+    public function tokenizeHTML($string, $config = null) {
+        if (!$config) $config = HTMLPurifier_Config::createDefault();
+        
        $doc = new DOMDocument();
        $doc->encoding = 'UTF-8'; // technically does nothing, but comprehensive
        
--- a/library/HTMLPurifier/Lexer/DirectLex.php
+++ b/library/HTMLPurifier/Lexer/DirectLex.php
@@ -106,7 +106,9 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
        }
    }
    
-    function tokenizeHTML($string) {
+    function tokenizeHTML($string, $config = null) {
+        
+        if (!$config) $config = HTMLPurifier_Config::createDefault();
        
        // some quick checking (if empty, return empty)
        $string = @ (string) $string;
--- a/library/HTMLPurifier/Lexer/PEARSax3.php
+++ b/library/HTMLPurifier/Lexer/PEARSax3.php
@@ -29,7 +29,8 @@ class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer
     */
    var $tokens = array();
    
-    function tokenizeHTML($html) {
+    function tokenizeHTML($html, $config = null) {
+        if (!$config) $config = HTMLPurifier_Config::createDefault();
        $html = $this->escapeCDATA($html);
        $html = $this->substituteNonSpecialEntities($html);
        $parser=& new XML_HTMLSax3();
--- a/library/HTMLPurifier/Strategy/MakeWellFormed.php
+++ b/library/HTMLPurifier/Strategy/MakeWellFormed.php
@@ -87,7 +87,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
            // make sure that we have something open
            if (empty($current_nesting)) {
                $result[] = new HTMLPurifier_Token_Text(
-                    $this->generator->generateFromToken($token)
+                    $this->generator->generateFromToken($token, $config)
                );
                continue;
            }
@@ -122,7 +122,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
            // we still didn't find the tag, so translate to text
            if ($skipped_tags === false) {
                $result[] = new HTMLPurifier_Token_Text(
-                    $this->generator->generateFromToken($token)
+                    $this->generator->generateFromToken($token, $config)
                );
                continue;
            }
--- a/library/HTMLPurifier/Strategy/RemoveForeignElements.php
+++ b/library/HTMLPurifier/Strategy/RemoveForeignElements.php
@@ -43,7 +43,7 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
                } else {
                    // invalid tag, generate HTML and insert in
                    $token = new HTMLPurifier_Token_Text(
-                        $this->generator->generateFromToken($token)
+                        $this->generator->generateFromToken($token, $config)
                    );
                }
            } elseif ($token->type == 'comment') {