diff --git a/TODO b/TODO index 59ae74bf..268a45ef 100644 --- a/TODO +++ b/TODO @@ -12,7 +12,6 @@ TODO List - Add framework for unsafe attributes - Wire in modes (configuration, module and manager wise) - Reorganize configuration directives - . New namespace: Output for Generator - Determine handling for complex/cascading configuration directives - Reorganize transformation modules - Set up anonymous module management by HTMLDefinition diff --git a/library/HTMLPurifier/ConfigSchema.php b/library/HTMLPurifier/ConfigSchema.php index 940e8e61..1e58f196 100644 --- a/library/HTMLPurifier/ConfigSchema.php +++ b/library/HTMLPurifier/ConfigSchema.php @@ -66,6 +66,7 @@ class HTMLPurifier_ConfigSchema { $this->defineNamespace('URI', 'Features regarding Uniform Resource Identifiers.'); $this->defineNamespace('HTML', 'Configuration regarding allowed HTML.'); $this->defineNamespace('CSS', 'Configuration regarding allowed CSS.'); + $this->defineNamespace('Output', 'Configuration relating to the generation of (X)HTML.'); $this->defineNamespace('Test', 'Developer testing configuration for our unit tests.'); } diff --git a/library/HTMLPurifier/DoctypeRegistry.php b/library/HTMLPurifier/DoctypeRegistry.php index 0697f89a..34ed4736 100644 --- a/library/HTMLPurifier/DoctypeRegistry.php +++ b/library/HTMLPurifier/DoctypeRegistry.php @@ -2,6 +2,22 @@ require_once 'HTMLPurifier/Doctype.php'; +// Legacy directives for doctype specification +HTMLPurifier_ConfigSchema::define( + 'HTML', 'Strict', false, 'bool', + 'Determines whether or not to use Transitional (loose) or Strict rulesets. '. + 'This directive is deprecated in favor of %HTML.Doctype. '. + 'This directive has been available since 1.3.0.' +); + +HTMLPurifier_ConfigSchema::define( + 'HTML', 'XHTML', true, 'bool', + 'Determines whether or not output is XHTML 1.0 or HTML 4.01 flavor. '. + 'This directive is deprecated in favor of %HTML.Doctype. '. + 'This directive was available since 1.1.' +); +HTMLPurifier_ConfigSchema::defineAlias('Core', 'XHTML', 'HTML', 'XHTML'); + class HTMLPurifier_DoctypeRegistry { @@ -92,7 +108,7 @@ class HTMLPurifier_DoctypeRegistry return $doctype; } // backwards-compatibility - if ($config->get('Core', 'XHTML')) { + if ($config->get('HTML', 'XHTML')) { $doctype = 'XHTML 1.0'; } else { $doctype = 'HTML 4.01'; diff --git a/library/HTMLPurifier/Generator.php b/library/HTMLPurifier/Generator.php index d717332e..502be982 100644 --- a/library/HTMLPurifier/Generator.php +++ b/library/HTMLPurifier/Generator.php @@ -3,7 +3,7 @@ require_once 'HTMLPurifier/Lexer.php'; HTMLPurifier_ConfigSchema::define( - 'Core', 'CleanUTF8DuringGeneration', false, 'bool', + 'Output', 'EnableRedundantUTF8Cleaning', false, 'bool', 'When true, HTMLPurifier_Generator will also check all strings it '. 'escapes for UTF-8 well-formedness as a defense in depth measure. '. 'This could cause a considerable performance impact, and is not '. @@ -12,57 +12,64 @@ HTMLPurifier_ConfigSchema::define( 'the configuration value is only read at the beginning of '. 'generateFromTokens.' ); +HTMLPurifier_ConfigSchema::defineAlias('Core', 'CleanUTF8DuringGeneration', 'Output', 'EnableRedundantUTF8Cleaning'); HTMLPurifier_ConfigSchema::define( - 'Core', 'XHTML', true, 'bool', - 'Determines whether or not output is XHTML or not. When disabled, HTML '. - 'Purifier goes into HTML 4.01 removes XHTML-specific markup constructs, '. - 'such as boolean attribute expansion and trailing slashes in empty tags. '. - 'This directive was available since 1.1.' -); - -HTMLPurifier_ConfigSchema::define( - 'Core', 'CommentScriptContents', true, 'bool', + 'Output', 'CommentScriptContents', true, 'bool', 'Determines whether or not HTML Purifier should attempt to fix up '. 'the contents of script tags for legacy browsers with comments. This '. 'directive was available since 1.7.' ); +HTMLPurifier_ConfigSchema::defineAlias('Core', 'CommentScriptContents', 'Output', 'CommentScriptContents'); // extension constraints could be factored into ConfigSchema HTMLPurifier_ConfigSchema::define( - 'Core', 'TidyFormat', false, 'bool', - '

Determines whether or not to run Tidy on the final output for pretty '. - 'formatting reasons, such as indentation and wrap.

This can greatly '. - 'improve readability for editors who are hand-editing the HTML, but is '. - 'by no means necessary as HTML Purifier has already fixed all major '. - 'errors the HTML may have had. Tidy is a non-default extension, and this directive '. - 'will silently fail if Tidy is not available.

If you are looking to make '. - 'the overall look of your page\'s source better, I recommend running Tidy '. - 'on the entire page rather than just user-content (after all, the '. - 'indentation relative to the containing blocks will be incorrect).

This '. - 'directive was available since 1.1.1.

' + 'Output', 'TidyFormat', false, 'bool', << + Determines whether or not to run Tidy on the final output for pretty + formatting reasons, such as indentation and wrap. +

+

+ This can greatly improve readability for editors who are hand-editing + the HTML, but is by no means necessary as HTML Purifier has already + fixed all major errors the HTML may have had. Tidy is a non-default + extension, and this directive will silently fail if Tidy is not + available. +

+

+ If you are looking to make the overall look of your page's source + better, I recommend running Tidy on the entire page rather than just + user-content (after all, the indentation relative to the containing + blocks will be incorrect). +

+

+ This directive was available since 1.1.1. +

+HTML ); +HTMLPurifier_ConfigSchema::defineAlias('Core', 'TidyFormat', 'Output', 'TidyFormat'); /** * Generates HTML from tokens. + * @todo Create a configuration-wide instance that all objects retrieve */ class HTMLPurifier_Generator { /** - * Bool cache of %Core.CleanUTF8DuringGeneration + * Bool cache of %Output.EnableRedundantUTF8Cleaning * @private */ var $_clean_utf8 = false; /** - * Bool cache of %Core.XHTML + * Bool cache of %HTML.XHTML * @private */ var $_xhtml = true; /** - * Bool cache of %Core.CommentScriptContents + * Bool cache of %Output.CommentScriptContents * @private */ var $_scriptFix = false; @@ -76,9 +83,13 @@ class HTMLPurifier_Generator function generateFromTokens($tokens, $config, &$context) { $html = ''; if (!$config) $config = HTMLPurifier_Config::createDefault(); - $this->_clean_utf8 = $config->get('Core', 'CleanUTF8DuringGeneration'); - $this->_xhtml = $config->get('Core', 'XHTML'); - $this->_scriptFix = $config->get('Core', 'CommentScriptContents'); + $this->_clean_utf8 = $config->get('Output', 'EnableRedundantUTF8Cleaning'); + + // this should replaced with a query to the Doctype object in + // config to determine whether or not this is an XML-based language + $this->_xhtml = $config->get('HTML', 'XHTML'); + + $this->_scriptFix = $config->get('Output', 'CommentScriptContents'); if (!$tokens) return ''; for ($i = 0, $size = count($tokens); $i < $size; $i++) { if ($this->_scriptFix && $tokens[$i]->name === 'script') { @@ -91,7 +102,7 @@ class HTMLPurifier_Generator } $html .= $this->generateFromToken($tokens[$i]); } - if ($config->get('Core', 'TidyFormat') && extension_loaded('tidy')) { + if ($config->get('Output', 'TidyFormat') && extension_loaded('tidy')) { $tidy_options = array( 'indent'=> true, diff --git a/library/HTMLPurifier/HTMLDefinition.php b/library/HTMLPurifier/HTMLDefinition.php index 8aa3bbb7..0a99f58a 100644 --- a/library/HTMLPurifier/HTMLDefinition.php +++ b/library/HTMLPurifier/HTMLDefinition.php @@ -6,13 +6,6 @@ require_once 'HTMLPurifier/HTMLModuleManager.php'; // this definition and its modules MUST NOT define configuration directives // outside of the HTML or Attr namespaces -// will be superceded by more accurate doctype declaration schemes -HTMLPurifier_ConfigSchema::define( - 'HTML', 'Strict', false, 'bool', - 'Determines whether or not to use Transitional (loose) or Strict rulesets. '. - 'This directive has been available since 1.3.0.' -); - HTMLPurifier_ConfigSchema::define( 'HTML', 'BlockWrapper', 'p', 'string', 'String name of element to wrap inline elements that are inside a block '.