diff --git a/TODO b/TODO
index aad4c467..79c32c89 100644
--- a/TODO
+++ b/TODO
@@ -43,9 +43,9 @@ Unknown release (on a scratch-an-itch basis)
- Fixes for Firefox's inability to handle COL alignment props (Bug 915)
- Automatically add non-breaking spaces to empty table cells when
empty-cells:show is applied to have compatibility with Internet Explorer
- - Pretty-printing HTML (adds dependency of Generator to HTMLDefinition)
- Non-lossy dumb alternate character encoding transformations, achieved by
numerically encoding all non-ASCII characters
Wontfix
- Non-lossy smart alternate character encoding transformations
+ - Pretty-printing HTML, users can use Tidy on the output on entire page
diff --git a/docs/examples/demo.php b/docs/examples/demo.php
index 07630078..35a47986 100644
--- a/docs/examples/demo.php
+++ b/docs/examples/demo.php
@@ -21,7 +21,9 @@ if (!empty($_POST['html'])) {
$html = get_magic_quotes_gpc() ? stripslashes($_POST['html']) : $_POST['html'];
- $purifier = new HTMLPurifier();
+ $config = HTMLPurifier_Config::createDefault();
+ $config->set('Core', 'TidyFormat', !empty($_POST['tidy']));
+ $purifier = new HTMLPurifier($config);
$pure_html = $purifier->purify($html);
?>
@@ -65,6 +67,8 @@ if (isset($html)) {
HTMLPurifier_Encoder::cleanUTF8($html), ENT_COMPAT, 'UTF-8');
}
?>
+
Nicely format output with Tidy? />
diff --git a/library/HTMLPurifier/Generator.php b/library/HTMLPurifier/Generator.php
index 613ea965..7adfa81a 100644
--- a/library/HTMLPurifier/Generator.php
+++ b/library/HTMLPurifier/Generator.php
@@ -23,6 +23,19 @@ HTMLPurifier_ConfigSchema::define(
'This directive was available since 1.1.'
);
+// extension constraints could be factored into ConfigSchema
+HTMLPurifier_ConfigSchema::define(
+ 'Core', 'TidyFormat', false, 'bool',
+ 'Determines whether or not to run Tidy on the final output for pretty '.
+ 'formatting reasons, such as indentation and wrap. This can greatly '.
+ 'improve readability for editors who are hand-editing the HTML, but is '.
+ 'by no means necessary as HTML Purifier has already fixed all major '.
+ 'errors the HTML may have had and could potentially result in data loss '.
+ 'due to bugs in Tidy. Tidy is a non-default extension, and this directive '.
+ 'will silently fail if Tidy is not available. This '.
+ 'directive was available since 1.1.1.'
+);
+
/**
* Generates HTML from tokens.
*/
@@ -56,6 +69,30 @@ class HTMLPurifier_Generator
foreach ($tokens as $token) {
$html .= $this->generateFromToken($token);
}
+ if ($config->get('Core', 'TidyFormat') && extension_loaded('tidy')) {
+
+ $tidy_options = array(
+ 'indent'=> true,
+ 'output-xhtml' => $this->_xhtml,
+ 'show-body-only' => true,
+ 'indent-spaces' => 2,
+ 'wrap' => 68,
+ );
+ if (version_compare(PHP_VERSION, '5', '<')) {
+ tidy_set_encoding('utf8');
+ foreach ($tidy_options as $key => $value) {
+ tidy_setopt($key, $value);
+ }
+ tidy_parse_string($html);
+ tidy_clean_repair();
+ $html = tidy_get_output();
+ } else {
+ $tidy = new Tidy;
+ $tidy->parseString($html, $tidy_options, 'utf8');
+ $tidy->cleanRepair();
+ $html = (string) $tidy;
+ }
+ }
return $html;
}
diff --git a/tests/HTMLPurifier/GeneratorTest.php b/tests/HTMLPurifier/GeneratorTest.php
index 6b85a9ca..a6ca4043 100644
--- a/tests/HTMLPurifier/GeneratorTest.php
+++ b/tests/HTMLPurifier/GeneratorTest.php
@@ -123,6 +123,9 @@ class HTMLPurifier_GeneratorTest extends UnitTestCase
var $config;
function assertGeneration($tokens, $expect) {
$result = $this->gen->generateFromTokens($tokens, $this->config);
+ // normalized newlines, this probably should be put somewhere else
+ $result = str_replace("\r\n", "\n", $result);
+ $result = str_replace("\r", "\n", $result);
$this->assertEqual($expect, $result);
}
@@ -148,6 +151,25 @@ class HTMLPurifier_GeneratorTest extends UnitTestCase
}
+ function test_generateFromTokens_TidyFormat() {
+ // abort test if tidy isn't loaded
+ if (!extension_loaded('tidy')) return;
+
+ $this->config = HTMLPurifier_Config::createDefault();
+ $this->config->set('Core', 'TidyFormat', true);
+
+ // nice wrapping please
+ $this->assertGeneration(
+ array(
+ new HTMLPurifier_Token_Start('div'),
+ new HTMLPurifier_Token_Text('Text'),
+ new HTMLPurifier_Token_End('div')
+ ),
+ "