diff --git a/NEWS b/NEWS index e5c368b6..0d777cfd 100644 --- a/NEWS +++ b/NEWS @@ -5,7 +5,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier (major feature release) 1.1.1, unknown projected release date -(bugfix release) +- Various documentation updates +- Fixed parse error in configuration documentation script 1.1.0, released 2006-09-16 - Made URI validator more forgiving: will ignore leading and trailing diff --git a/SLOW b/SLOW index 00f372fa..9813c0f6 100644 --- a/SLOW +++ b/SLOW @@ -17,18 +17,23 @@ second tacked on to the load time probably isn't going to be that huge of a problem. Then, displaying the content is a simple a manner of outputting it directly from your database/filesystem. The trouble with this method is that your user loses the original text, and when doing edits, will be -handling the filtered text. Of course, maybe that's a good thing. If you -don't mind a little extra complexity, you can try... +handling the filtered text. While this may be a good thing, especially if +you're using a WYSIWYG editor, it can also result in data-loss if a user +expects a certain to be available but it doesn't. 2. Caching the filtered output - accept the submitted text and put it unaltered into the database, but then also generate a filtered version and stash that in the database. Serve the filtered version to readers, and the unaltered version to editors. If need be, you can invalidate the cache and have the cached filtered version be regenerated on the first page view. Pros? -Full data retention. Cons? It's more complicated. +Full data retention. Cons? It's more complicated, and opens other editors +up to XSS if they are using a WYSIWYG editor (to fix that, they'd have to +be able to get their hands on the *really* original text served in plaintext +mode). In short, inbound filtering is almost as simple as outbound filtering, but it has some drawbacks which cannot be fixed unless you save both the original and the filtered versions. -There is a third option: profile and optimize HTMLPurifier yourself. ;-) +There is a third option: profile and optimize HTMLPurifier yourself. Be sure +to tell me if you decide to do that! ;-) diff --git a/configdoc/generate.php b/configdoc/generate.php index ebef6777..a1231ba2 100644 --- a/configdoc/generate.php +++ b/configdoc/generate.php @@ -50,7 +50,7 @@ function appendHTMLDiv($document, $node, $html) { // --------------------------------------------------------------------------- // Load copies of HTMLPurifier_ConfigDef and HTMLPurifier -$definition = HTMLPurifier_ConfigDef::instance(); +$schema = HTMLPurifier_ConfigSchema::instance(); $purifier = new HTMLPurifier(); @@ -61,7 +61,7 @@ $types_document = new DOMDocument('1.0', 'UTF-8'); $types_root = $types_document->createElement('types'); $types_document->appendChild($types_root); $types_document->formatOutput = true; -foreach ($definition->types as $name => $expanded_name) { +foreach ($schema->types as $name => $expanded_name) { $types_type = $types_document->createElement('type', $expanded_name); $types_type->setAttribute('id', $name); $types_root->appendChild($types_type); @@ -88,7 +88,7 @@ TODO for XML format: - create a definition (DTD or other) once interface stabilizes */ -foreach($definition->info as $namespace_name => $namespace_info) { +foreach($schema->info as $namespace_name => $namespace_info) { $dom_namespace = $dom_document->createElement('namespace'); $dom_root->appendChild($dom_namespace); @@ -100,7 +100,7 @@ foreach($definition->info as $namespace_name => $namespace_info) { $dom_namespace_description = $dom_document->createElement('description'); $dom_namespace->appendChild($dom_namespace_description); appendHTMLDiv($dom_document, $dom_namespace_description, - $definition->info_namespace[$namespace_name]->description); + $schema->info_namespace[$namespace_name]->description); foreach ($namespace_info as $name => $info) { @@ -128,14 +128,14 @@ foreach($definition->info as $namespace_name => $namespace_info) { } } - $raw_default = $definition->defaults[$namespace_name][$name]; + $raw_default = $schema->defaults[$namespace_name][$name]; if (is_bool($raw_default)) { $default = $raw_default ? 'true' : 'false'; } elseif (is_string($raw_default)) { $default = "\"$raw_default\""; } else { $default = print_r( - $definition->defaults[$namespace_name][$name], true + $schema->defaults[$namespace_name][$name], true ); } $dom_constraints->appendChild( diff --git a/library/HTMLPurifier/Lexer/DOMLex.php b/library/HTMLPurifier/Lexer/DOMLex.php index a24d1014..fbdecb8f 100644 --- a/library/HTMLPurifier/Lexer/DOMLex.php +++ b/library/HTMLPurifier/Lexer/DOMLex.php @@ -18,6 +18,13 @@ require_once 'HTMLPurifier/TokenFactory.php'; * * @note PHP's DOM extension does not actually parse any entities, we use * our own function to do that. + * + * @warning DOM tends to drop whitespace, which may wreak havoc on indenting. + * If this is a huge problem, due to the fact that HTML is hand + * edited and youa re unable to get a parser cache that caches the + * the output of HTML Purifier while keeping the original HTML lying + * around, you may want to run Tidy on the resulting output or use + * HTMLPurifier_DirectLex */ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer