Release 4.1.1.

Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
Fix undefined index warnings in maintenance scripts.
2025-08-03 12:47:56 +02:00 · 2010-05-31 20:17:31 -07:00 · 2010-05-31 20:07:27 -07:00 · 2010-05-31 19:44:18 -07:00 · 2010-05-31 18:45:21 -07:00 · 2010-05-20 21:50:44 -04:00
763 changed files with 13024 additions and 7880 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,9 +1,20 @@
+tags
 conf/
+test-settings.php
+config-schema.php
 library/HTMLPurifier/DefinitionCache/Serializer/*/
 library/standalone/
 library/HTMLPurifier.standalone.php
+library/HTMLPurifier*.tgz
+library/package*.xml
+smoketests/test-schema.html
+configdoc/*.html
+configdoc/configdoc.xml
+docs/doxygen*
 *.phpt.diff
 *.phpt.exp
 *.phpt.log
 *.phpt.out
 *.phpt.php
+*.phpt.skip.php
+*.htmlt.ini
--- a/2
+++ b/2
@@ -5,3 +5,5 @@ Almost everything written by Edward Z. Yang (Ambush Commander).  Lots of thanks
 to the DevNetwork Community for their help (see docs/ref-devnetwork.html for
 more details), Feyd especially (namely IPv6 and optimization).  Thanks to RSnake
 for letting me package his fantastic XSS cheatsheet for a smoketest.
+
+    vim: et sw=4 sts=4
--- a/4
+++ b/4
@@ -31,7 +31,7 @@ PROJECT_NAME           = HTMLPurifier
 # This could be handy for archiving the generated documentation or
 # if some version control system is used.

-PROJECT_NUMBER         = 3.1.1
+PROJECT_NUMBER         = 4.1.1

 # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
 # base path where the generated documentation will be put.
@@ -1313,3 +1313,5 @@ DOT_CLEANUP            = YES
 # used. If set to NO the values of all tags below this one will be ignored.

 SEARCHENGINE           = NO
+
+# vim: et sw=4 sts=4
--- a/27
+++ b/27
@@ -18,8 +18,6 @@ with these contents.
 HTML Purifier is PHP 5 only, and is actively tested from PHP 5.0.5 and
 up. It has no core dependencies with other libraries. PHP
 4 support was deprecated on December 31, 2007 with HTML Purifier 3.0.0.
-Essential security fixes will be issued for the 2.1.x branch until 
-August 8, 2008. 

 These optional extensions can enhance the capabilities of HTML Purifier:

@@ -233,12 +231,12 @@ HTML Purifier uses iconv to support other character encodings, as such,
 any encoding that iconv supports <http://www.gnu.org/software/libiconv/>
 HTML Purifier supports with this code:

-    $config->set('Core', 'Encoding', /* put your encoding here */);
+    $config->set('Core.Encoding', /* put your encoding here */);

 An example usage for Latin-1 websites (the most common encoding for English
 websites):

-    $config->set('Core', 'Encoding', 'ISO-8859-1');
+    $config->set('Core.Encoding', 'ISO-8859-1');

 Note that HTML Purifier's support for non-Unicode encodings is crippled by the
 fact that any character not supported by that encoding will be silently
@@ -253,7 +251,7 @@ reason, I do not include the solution in this document).
 For those of you using HTML 4.01 Transitional, you can disable
 XHTML output like this:

-    $config->set('HTML', 'Doctype', 'HTML 4.01 Transitional');
+    $config->set('HTML.Doctype', 'HTML 4.01 Transitional');

 Other supported doctypes include:

@@ -279,14 +277,14 @@ are, respectively, %HTML.Allowed, %URI.MakeAbsolute and %URI.Base, and
 %AutoFormat.AutoParagraph. The %Namespace.Directive naming convention
 translates to:

-    $config->set('Namespace', 'Directive', $value);
+    $config->set('Namespace.Directive', $value);

 E.g.

-    $config->set('HTML', 'Allowed', 'p,b,a[href],i');
-    $config->set('URI', 'Base', 'http://www.example.com');
-    $config->set('URI', 'MakeAbsolute', true);
-    $config->set('AutoFormat', 'AutoParagraph', true);
+    $config->set('HTML.Allowed', 'p,b,a[href],i');
+    $config->set('URI.Base', 'http://www.example.com');
+    $config->set('URI.MakeAbsolute', true);
+    $config->set('AutoFormat.AutoParagraph', true);


 ---------------------------------------------------------------------------
@@ -320,11 +318,11 @@ If you are unable or unwilling to give write permissions to the cache
 directory, you can either disable the cache (and suffer a performance
 hit):

-    $config->set('Core', 'DefinitionCache', null);
+    $config->set('Core.DefinitionCache', null);

 Or move the cache directory somewhere else (no trailing slash):

-    $config->set('Cache', 'SerializerPath', '/home/user/absolute/path');
+    $config->set('Cache.SerializerPath', '/home/user/absolute/path');


 ---------------------------------------------------------------------------
@@ -365,10 +363,11 @@ If your website is in a different encoding or doctype, use this code:
    require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';

    $config = HTMLPurifier_Config::createDefault();
-    $config->set('Core', 'Encoding', 'ISO-8859-1'); // replace with your encoding
-    $config->set('HTML', 'Doctype', 'HTML 4.01 Transitional'); // replace with your doctype
+    $config->set('Core.Encoding', 'ISO-8859-1'); // replace with your encoding
+    $config->set('HTML.Doctype', 'HTML 4.01 Transitional'); // replace with your doctype
    $purifier = new HTMLPurifier($config);

    $clean_html = $purifier->purify($dirty_html);
 ?>

+    vim: et sw=4 sts=4
--- a/INSTALL.fr.utf8
+++ b/INSTALL.fr.utf8
@@ -65,3 +65,5 @@ Sinon, utilisez:

    $html_propre = $purificateur->purify($html_salle);
 ?>
+
+    vim: et sw=4 sts=4
--- a/2
+++ b/2
@@ -501,4 +501,4 @@ necessary.  Here is a sample; alter the names:

 That's all there is to it!

-
+    vim: et sw=4 sts=4
--- a/171
+++ b/171
@@ -9,11 +9,176 @@ NEWS ( CHANGELOG and HISTORY )                                     HTMLPurifier
    . Internal change
 ==========================

-3.2.0, unknown release date
+4.1.1, released 2010-05-31
+- Fix undefined index warnings in maintenance scripts.
+- Fix bug in DirectLex for parsing elements with a single attribute
+  with entities.
+- Rewrite CSS output logic for font-family and url().  Thanks Mario
+  Heiderich <mario.heiderich@googlemail.com> for reporting and Takeshi
+  Terada <t-terada@violet.plala.or.jp> for suggesting the fix.
+- Emit an error for CollectErrors if a body is extracted
+- Fix bug where in background-position for center keyword handling.
+- Fix infinite loop when a wrapper element is inserted in a context
+  where it's not allowed.  Thanks Lars <lars@renoz.dk> for reporting.
+- Remove +x bit and shebang from index.php; only supported mode is to
+  explicitly call it with php.
+- Make test script less chatty when log_errors is on.

-3.1.2, unknown release date
+4.1.0, released 2010-04-26
+! Support proprietary height attribute on table element
+! Support YouTube slideshows that contain /cp/ in their URL.
+! Support for data: URI scheme; not enabled by default, add it using
+  %URI.AllowedSchemes
+! Support flashvars when using %HTML.SafeObject and %HTML.SafeEmbed.
+! Support for Internet Explorer compatibility with %HTML.SafeObject
+  using %Output.FlashCompat.
+! Handle <ol><ol> properly, by inserting the necessary <li> tag.
+- Always quote the insides of url(...) in CSS.
+
+4.0.0, released 2009-07-07
+# APIs for ConfigSchema subsystem have substantially changed. See
+  docs/dev-config-bcbreaks.txt for details; in essence, anything that
+  had both namespace and directive now have a single unified key.
+# Some configuration directives were renamed, specifically:
+    %AutoFormatParam.PurifierLinkifyDocURL -> %AutoFormat.PurifierLinkify.DocURL
+    %FilterParam.ExtractStyleBlocksEscaping -> %Filter.ExtractStyleBlocks.Escaping
+    %FilterParam.ExtractStyleBlocksScope -> %Filter.ExtractStyleBlocks.Scope
+    %FilterParam.ExtractStyleBlocksTidyImpl -> %Filter.ExtractStyleBlocks.TidyImpl
+  As usual, the old directive names will still work, but will throw E_NOTICE
+  errors.
+# The allowed values for class have been relaxed to allow all of CDATA for
+  doctypes that are not XHTML 1.1 or XHTML 2.0.  For old behavior, set
+  %Attr.ClassUseCDATA to false.
+# Instead of appending the content model to an old content model, a blank
+  element will replace the old content model.  You can use #SUPER to get
+  the old content model.
+! More robust support for name="" and id=""
+! HTMLPurifier_Config::inherit($config) allows you to inherit one
+  configuration, and have changes to that configuration be propagated
+  to all of its children.
+! Implement %HTML.Attr.Name.UseCDATA, which relaxes validation rules on
+  the name attribute when set. Use with care. Thanks Ian Cook for
+  sponsoring.
+! Implement %AutoFormat.RemoveEmpty.RemoveNbsp, which removes empty
+  tags that contain non-breaking spaces as well other whitespace. You
+  can also modify which tags should have &nbsp; maintained with
+  %AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions.
+! Implement %Attr.AllowedClasses, which allows administrators to restrict
+  classes users can use to a specified finite set of classes, and
+  %Attr.ForbiddenClasses, which is the logical inverse.
+! You can now maintain your own configuration schema directories by
+  creating a config-schema.php file or passing an extra argument. Check
+  docs/dev-config-schema.html for more details.
+! Added HTMLPurifier_Config->serialize() method, which lets you save away
+  your configuration in a compact serial file, which you can unserialize
+  and use directly without having to go through the overhead of setup.
+- Fix bug where URIDefinition would not get cleared if it's directives got
+  changed.
+- Fix fatal error in HTMLPurifier_Encoder on certain platforms (probably NetBSD 5.0)
+- Fix bug in Linkify autoformatter involving <a><span>http://foo</span></a>
+- Make %URI.Munge not apply to links that have the same host as your host.
+- Prevent stray </body> tag from truncating output, if a second </body>
+  is present.
+. Created script maintenance/rename-config.php for renaming a configuration
+  directive while maintaining its alias.  This script does not change source code.
+. Implement namespace locking for definition construction, to prevent
+  bugs where a directive is used for definition construction but is not
+  used to construct the cache hash.
+
+3.3.0, released 2009-02-16
+! Implement CSS property 'overflow' when %CSS.AllowTricky is true.
+! Implement generic property list classess
+- Fix bug with testEncodingSupportsASCII() algorithm when iconv() implementation
+  does not do the "right thing" with characters not supported in the output
+  set.
+- Spellcheck UTF-8: The Secret To Character Encoding
+- Fix improper removal of the contents of elements with only whitespace. Thanks
+  Eric Wald for reporting.
+- Fix broken test suite in versions of PHP without spl_autoload_register()
+- Fix degenerate case with YouTube filter involving double hyphens.
+  Thanks Pierre Attar for reporting.
+- Fix YouTube rendering problem on certain versions of Firefox.
+- Fix CSSDefinition Printer problems with decorators
+- Add text parameter to unit tests, forces text output
+. Add verbose mode to command line test runner, use (--verbose)
+. Turn on unit tests for UnitConverter
+. Fix missing version number in configuration %Attr.DefaultImageAlt (added 3.2.0)
+. Fix newline errors that caused spurious failures when CRLF HTML Purifier was
+  tested on Linux.
+. Removed trailing whitespace from all text files, see
+  remote-trailing-whitespace.php maintenance script.
+. Convert configuration to use property list backend.
+
+3.2.0, released 2008-10-31
+# Using %Core.CollectErrors forces line number/column tracking on, whereas
+  previously you could theoretically turn it off.
+# HTMLPurifier_Injector->notifyEnd() is formally deprecated. Please
+  use handleEnd() instead.
 ! %Output.AttrSort for when you need your attributes in alphabetical order to
  deal with a bug in FCKEditor. Requested by frank farmer.
+! Enable HTML comments when %HTML.Trusted is on. Requested by Waldo Jaquith.
+! Proper support for name attribute. It is now allowed and equivalent to the id
+  attribute in a and img tags, and is only converted to id when %HTML.TidyLevel
+  is heavy (for all doctypes).
+! %AutoFormat.RemoveEmpty to remove some empty tags from documents. Please don't
+  use on hand-written HTML.
+! Add error-cases for unsupported elements in MakeWellFormed. This enables
+  the strategy to be used, standalone, on untrusted input.
+! %Core.AggressivelyFixLt is on by default. This causes more sensible
+  processing of left angled brackets in smileys and other whatnot.
+! Test scripts now have a 'type' parameter, which lets you say 'htmlpurifier',
+  'phpt', 'vtest', etc. in order to only execute those tests. This supercedes
+  the --only-phpt parameter, although for backwards-compatibility the flag
+  will still work.
+! AutoParagraph auto-formatter will now preserve double-newlines upon output.
+  Users who are not performing inbound filtering, this may seem a little
+  useless, but as a bonus, the test suite and handling of edge cases is also
+  improved.
+! Experimental implementation of forms for %HTML.Trusted
+! Track column numbers when maintain line numbers is on
+! Proprietary 'background' attribute on table-related elements converted into
+  corresponding CSS.  Thanks Fusemail for sponsoring this feature!
+! Add forward(), forwardUntilEndToken(), backward() and current() to Injector
+  supertype.
+! HTMLPurifier_Injector->handleEnd() permits modification to end tokens. The
+  time of operation varies slightly from notifyEnd() as *all* end tokens are
+  processed by the injector before they are subject to the well-formedness rules.
+! %Attr.DefaultImageAlt allows overriding default behavior of setting alt to
+  basename of image when not present.
+! %AutoFormat.DisplayLinkURI neuters <a> tags into plain text URLs.
+- Fix two bugs in %URI.MakeAbsolute; one involving empty paths in base URLs,
+  the other involving an undefined $is_folder error.
+- Throw error when %Core.Encoding is set to a spurious value. Previously,
+  this errored silently and returned false.
+- Redirected stderr to stdout for flush error output.
+- %URI.DisableExternal will now use the host in %URI.Base if %URI.Host is not
+  available.
+- Do not re-munge URL if the output URL has the same host as the input URL.
+  Requested by Chris.
+- Fix error in documentation regarding %Filter.ExtractStyleBlocks
+- Prevent <![CDATA[<body></body>]]> from triggering %Core.ConvertDocumentToFragment
+- Fix bug with inline elements in blockquotes conflicting with strict doctype
+- Detect if HTML support is disabled for DOM by checking for loadHTML() method.
+- Fix bug where dots and double-dots in absolute URLs without hostname were
+  not collapsed by URIFilter_MakeAbsolute.
+- Fix bug with anonymous modules operating on SafeEmbed or SafeObject elements
+  by reordering their addition.
+- Will now throw exception on many error conditions during lexer creation; also
+  throw an exception when MaintainLineNumbers is true, but a non-tracksLineNumbers
+  is being used.
+- Detect if domxml extension is loaded, and use DirectLEx accordingly.
+- Improve handling of big numbers with floating point arithmetic in UnitConverter.
+  Reported by David Morton.
+. Strategy_MakeWellFormed now operates in-place, saving memory and allowing
+  for more interesting filter-backtracking
+. New HTMLPurifier_Injector->rewind() functionality, allows injectors to rewind
+  index to reprocess tokens.
+. StringHashParser now allows for multiline sections with "empty" content;
+  previously the section would remain undefined.
+. Added --quick option to multitest.php, which tests only the most recent
+  release for each series.
+. Added --distro option to multitest.php, which accepts either 'normal' or
+  'standalone'. This supercedes --exclude-normal and --exclude-standalone

 3.1.1, released 2008-06-19
 # %URI.Munge now, by default, does not munge resources (for example, <img src="">)
@@ -749,3 +914,5 @@ NEWS ( CHANGELOG and HISTORY )                                     HTMLPurifier
  + Shorthand CSS properties
  + Table CSS properties
  + Deprecated attribute transformations
+
+    vim: et sw=4 sts=4
--- a/2
+++ b/2
@@ -20,3 +20,5 @@ Places to go:
 * See WYSIWYG for information on editors like TinyMCE and FCKeditor

 HTML Purifier can be found on the web at: http://htmlpurifier.org/
+
+    vim: et sw=4 sts=4
--- a/101
+++ b/101
@@ -11,56 +11,60 @@ If no interest is expressed for a feature that may require a considerable
 amount of effort to implement, it may get endlessly delayed. Do not be
 afraid to cast your vote for the next feature to be implemented!

- Investigate how early internal structures can be accessed; this would
-  prevent structures from being parsed and serialized multiple times.
- Built-in support for target="_blank" on all external links
- Gitify the repository
+Things to do as soon as possible:
+
+ - Think about allowing explicit order of operations hooks for transforms
+ - Inputs don't do the right thing with submit
+ - Fix "<.<" bug (trailing < is removed if not EOD)
+ - Build in better internal state dumps and debugging tools for remote
+   debugging
+ - Allowed/Allowed* have strange interactions when both set
+ - Transform lone embeds into object tags

 FUTURE VERSIONS
 ---------------

-3.2 release [It's All About Trust] (floating)
+4.2 release [OMG CONFIG PONIES]
+ ! Fix Printer. It's from the old days when we didn't have decent XML classes
+ ! Factor demo.php into a set of Printer classes, and then create a stub
+   file for users here (inside the actual HTML Purifier library)
+ - Fix error handling with form construction
+ - Do encoding validation in Printers, or at least, where user data comes in
+ - Config: Add examples to everything (make built-in which also automatically
+   gives output)
+ - Add "register" field to config schemas to eliminate dependence on
+   naming conventions (try to remember why we ultimately decided on tihs)
+
+5.0 release [HTML 5]
+ # Swap out code to use html5lib tokenizer and tree-builder
+ ! Allow turning off of FixNesting and required attribute insertion
+
+5.1 release [It's All About Trust] (floating)
 # Implement untrusted, dangerous elements/attributes
-    - Forms are especially wanted
 # Implement IDREF support (harder than it seems, since you cannot have
   IDREFs to non-existent IDs)
+     - Implement <area> (client and server side image maps are blocking
+       on IDREF support)
 # Frameset XHTML 1.0 and HTML 4.01 doctypes
- - Implement <area>
 - Figure out how to simultaneously set %CSS.Trusted and %HTML.Trusted (?)

-3.3 release [Error'ed]
+5.2 release [Error'ed]
 # Error logging for filtering/cleanup procedures
- - XSS-attempt detection--certain errors are flagged XSS-like
-
-3.4 release [Do What I Mean, Not What I Say]
 # Additional support for poorly written HTML
    - Microsoft Word HTML cleaning (i.e. MsoNormal, but research essential!)
    - Friendly strict handling of <address> (block -> <br>)
- ? Remove redundant tags, ex. <u><u>Underlined</u></u>. Implementation notes:
-    1. Analyzing which tags to remove duplicants
-    2. Ensure attributes are merged into the parent tag
-    3. Extend the tag exclusion system to specify whether or not the
-    contents should be dropped or not (currently, there's code that could do
-    something like this if it didn't drop the inner text too.)
- - Remove <span> tags that don't do anything (no attributes)
- - Remove empty inline tags<i></i>
+ - XSS-attempt detection--certain errors are flagged XSS-like
 - Append something to duplicate IDs so they're still usable (impl. note: the
   dupe detector would also need to detect the suffix as well)
- - Externalize inline CSS to promote clean HTML, proposed by Sander Tekelenburg

-4.0 release [Beyond HTML]
+6.0 release [Beyond HTML]
 # Legit token based CSS parsing (will require revamping almost every
-   AttrDef class). Probably will use CSSTidy class?
+   AttrDef class). Probably will use CSSTidy
 # More control over allowed CSS properties using a modularization
- # HTML 5 support
- # IRI support
+ # IRI support (this includes IDN)
 - Standardize token armor for all areas of processing
- - Convert RTL/LTR override characters to <bdo> tags, or vice versa on demand.
-   Also, enable disabling of directionality

-5.0 release [To XML and Beyond]
- - AllowedAttributes and ForbiddenAttributes step on the toes of XML by
-   using periods; this needs to be changed.
+7.0 release [To XML and Beyond]
 - Extended HTML capabilities based on namespacing and tag transforms (COMPLEX)
    - Hooks for adding custom processors to custom namespaced tags and
      attributes, offer default implementation
@@ -71,27 +75,14 @@ Ongoing
 - Refactor unit tests into lots of test methods
 - Plugins for major CMSes (COMPLEX)
    - phpBB
-    - Drupal needs loving!
-    - Phorum need loving!
-    - more! (look for ones that use WYSIWYGs)
-    - Also, maybe a FAQ for extension writers with HTML Purifier
+    - Also, a FAQ for extension writers with HTML Purifier

 AutoFormat
 - Smileys
 - Syntax highlighting (with GeSHi) with <pre> and possibly <?php
 - Look at http://drupal.org/project/Modules/category/63 for ideas

-Optimizations
- - Reduce size of internal data-structures (esp. HTMLDefinition)
- - Research memory usage of objects versus arrays
- - Combine multiple strategies into a single, single-pass strategy
- - Get PH5P working with the latest versions of DOM, which have much more
-   stringent error checking procedures. Maybe convert straight to tokens.
- - Get rid of set_include_path(). Save this for another major release.
-
 Neat feature related
- ! Factor demo.php into a set of Printer classes, and then create a stub
-   file for users here (inside the actual HTML Purifier library)
 ! Support exporting configuration, so users can easily tweak settings
   in the demo, and then copy-paste into their own setup
 - Advanced URI filtering schemes (see docs/proposal-new-directives.txt)
@@ -108,14 +99,28 @@ Neat feature related
 - Full set of color keywords. Also, a way to add onto them without
   finalizing the configuration object.
 - Write a var_export and memcached DefinitionCache - Denis
- - Allow restriction of allowed class values
+ - Built-in support for target="_blank" on all external links
+ - Convert RTL/LTR override characters to <bdo> tags, or vice versa on demand.
+   Also, enable disabling of directionality
+ ? Externalize inline CSS to promote clean HTML, proposed by Sander Tekelenburg
+ ? Remove redundant tags, ex. <u><u>Underlined</u></u>. Implementation notes:
+    1. Analyzing which tags to remove duplicants
+    2. Ensure attributes are merged into the parent tag
+    3. Extend the tag exclusion system to specify whether or not the
+    contents should be dropped or not (currently, there's code that could do
+    something like this if it didn't drop the inner text too.)

 Maintenance related (slightly boring)
 # CHMOD install script for PEAR installs
 ! Factor out command line parser into its own class, and unit test it
- ! Nested configuration namespaces
- - Distinguish between default settings and explicitly set settings, so
-   configurations can be merged
+ - Reduce size of internal data-structures (esp. HTMLDefinition)
+ - Allow merging configurations.  Thus,
+        a -> b -> default
+        c -> d -> default
+   becomes
+        a -> b -> c -> d -> default
+   Maybe allow more fine-grained tuning of this behavior. Alternatively,
+   encourage people to use short plist depths before building them up.
 - Time PHPT tests

 ChildDef related (very boring)
@@ -129,3 +134,5 @@ Wontfix
 - Pretty-printing HTML: users can use Tidy on the output on entire page
 - Native content compression, whitespace stripping: use gzip if this is
   really important
+
+    vim: et sw=4 sts=4
--- a/2
+++ b/2
@@ -1 +1 @@
-3.1.1
+4.1.1
--- a/13
+++ b/13
@@ -1,8 +1,5 @@
-HTML Purifier 3.1.1 is a security and bugfix release. This release addresses
-two security vulnerabilities, both related to CSS, and one of which only
-applies to users using Shift_JIS as their output encoding. There is also
-a security improvement regarding the imagecrash attack. There is a backwards
-incompatible change in which resources are no longer munged
-by default; please enable using %URI.MungeResources. Besides this, there
-are numerous improvements to URI munging, esp. with the addition of
-%URI.MungeSecretKey, as well as an experimental %HTML.SafeObject and %HTML.SafeEmbed.
+HTML Purifier 4.1.1 is a major security and bugfix release that
+improves on 4.1's fix for an XSS vulnerability exploitable on Internet
+Explorer.  It also contains a number of important bugfixes, including
+the removal of improper logic that could result in infinite loops and
+fixed parsing for single-attributes with entities with DirectLex.
--- a/2
+++ b/2
@@ -16,3 +16,5 @@ trouble.  Therein lies the solution:
 HTML Purifier is perfect for filtering pure-HTML input from WYSIWYG editors.

 Enough said.
+
+    vim: et sw=4 sts=4
--- a/benchmarks/ConfigSchema.php
+++ b/benchmarks/ConfigSchema.php
@@ -12,3 +12,5 @@ $begin = xdebug_memory_usage();
 $schema = HTMLPurifier_ConfigSchema::makeFromSerial();

 echo xdebug_memory_usage() - $begin;
+
+// vim: et sw=4 sts=4
--- a/benchmarks/Lexer.php
+++ b/benchmarks/Lexer.php
@@ -153,3 +153,6 @@ echo '<div>Random input was: ' .


 </body></html>
+<?php
+
+// vim: et sw=4 sts=4
--- a/benchmarks/Trace.php
+++ b/benchmarks/Trace.php
@@ -17,3 +17,5 @@ $data = $purifier->purify(file_get_contents('samples/Lexer/4.html'));
 xdebug_stop_trace();

 echo "Trace finished.";
+
+// vim: et sw=4 sts=4
--- a/benchmarks/samples/Lexer/1.html
+++ b/benchmarks/samples/Lexer/1.html
@@ -51,3 +51,6 @@
 <div style="text-align:center;">Click on photo to see HR version</div></div>
 </body>
 </html>
+
+<!-- vim: et sw=4 sts=4
+-->
--- a/benchmarks/samples/Lexer/2.html
+++ b/benchmarks/samples/Lexer/2.html
@@ -15,3 +15,6 @@ function rwt(el,ct,cd,sg){var e = window.encodeURIComponent ? encodeURIComponent
 function qs(el) {if (window.RegExp && window.encodeURIComponent) {var ue=el.href;var qe=encodeURIComponent(document.f.q.value);if(ue.indexOf("q=")!=-1){el.href=ue.replace(new RegExp("q=[^&$]*"),"q="+qe);}else{el.href=ue+"&q="+qe;}}return 1;}
 // -->
 </script><table border=0 cellspacing=0 cellpadding=4><tr><td nowrap><font size=-1><b>Web</b>&nbsp;&nbsp;&nbsp;&nbsp;<a id=1a class=q href="/imghp?hl=en&tab=wi" onClick="return qs(this);">Images</a>&nbsp;&nbsp;&nbsp;&nbsp;<a id=2a class=q href="http://groups.google.com/grphp?hl=en&tab=wg" onClick="return qs(this);">Groups</a>&nbsp;&nbsp;&nbsp;&nbsp;<a id=4a class=q href="http://news.google.com/nwshp?hl=en&tab=wn" onClick="return qs(this);">News</a>&nbsp;&nbsp;&nbsp;&nbsp;<a id=5a class=q href="http://froogle.google.com/frghp?hl=en&tab=wf" onClick="return qs(this);">Froogle</a>&nbsp;&nbsp;&nbsp;&nbsp;<a id=8a class=q href="/lochp?hl=en&tab=wl" onClick="return qs(this);">Local</a>&nbsp;&nbsp;&nbsp;&nbsp;<b><a href="/intl/en/options/" class=q>more&nbsp;&raquo;</a></b></font></td></tr></table><table cellspacing=0 cellpadding=0><tr><td width=25%>&nbsp;</td><td align=center><input type=hidden name=hl value=en><input maxlength=2048 size=55 name=q value="" title="Google Search"><br><input type=submit value="Google Search" name=btnG><input type=submit value="I'm Feeling Lucky" name=btnI></td><td valign=top nowrap width=25%><font size=-2>&nbsp;&nbsp;<a href=/advanced_search?hl=en>Advanced Search</a><br>&nbsp;&nbsp;<a href=/preferences?hl=en>Preferences</a><br>&nbsp;&nbsp;<a href=/language_tools?hl=en>Language Tools</a></font></td></tr></table></form><br><br><font size=-1><a href="/ads/">Advertising&nbsp;Programs</a> - <a href=/services/>Business Solutions</a> - <a href=/about.html>About Google</a></font><p><font size=-2>&copy;2006 Google</font></p></center></body></html>
+
+<!-- vim: et sw=4 sts=4
+-->
--- a/benchmarks/samples/Lexer/3.html
+++ b/benchmarks/samples/Lexer/3.html
@@ -126,3 +126,6 @@ if (objAdMgr.isSlotAvailable("leaderboard")) {
 </body>

 </html>
+
+<!-- vim: et sw=4 sts=4
+-->
--- a/benchmarks/samples/Lexer/4.html
+++ b/benchmarks/samples/Lexer/4.html
@@ -538,3 +538,6 @@ Retrieved from "<a href="http://en.wikipedia.org/wiki/Tai_Chi_Chuan">http://en.w

 <!-- Served by srv25 in 0.089 secs. -->
 </body></html>
+
+<!-- vim: et sw=4 sts=4
+-->
--- a/benchmarks/samples/Lexer/DISCLAIMER.txt
+++ b/benchmarks/samples/Lexer/DISCLAIMER.txt
@@ -3,3 +3,5 @@ Disclaimer:
 The HTML used in these samples are taken from random websites.  I claim
 no copyright over these and assert that I may use them like this under
 fair use.
+
+    vim: et sw=4 sts=4
--- a/configdoc/generate.php
+++ b/configdoc/generate.php
@@ -18,22 +18,24 @@ TODO:
 if (version_compare(PHP_VERSION, '5.2', '<')) exit('PHP 5.2+ required.');
 error_reporting(E_ALL | E_STRICT);

-chdir(dirname(__FILE__));
-
 // load dual-libraries
-require_once '../extras/HTMLPurifierExtras.auto.php';
-require_once '../library/HTMLPurifier.auto.php';
+require_once dirname(__FILE__) . '/../extras/HTMLPurifierExtras.auto.php';
+require_once dirname(__FILE__) . '/../library/HTMLPurifier.auto.php';

 // setup HTML Purifier singleton
 HTMLPurifier::getInstance(array(
    'AutoFormat.PurifierLinkify' => true
 ));

-$interchange = HTMLPurifier_ConfigSchema_InterchangeBuilder::buildFromDirectory();
+$builder = new HTMLPurifier_ConfigSchema_InterchangeBuilder();
+$interchange = new HTMLPurifier_ConfigSchema_Interchange();
+$builder->buildDir($interchange);
+$loader = dirname(__FILE__) . '/../config-schema.php';
+if (file_exists($loader)) include $loader;
 $interchange->validate();

 $style = 'plain'; // use $_GET in the future, careful to validate!
-$configdoc_xml = 'configdoc.xml';
+$configdoc_xml = dirname(__FILE__) . '/configdoc.xml';

 $xml_builder = new HTMLPurifier_ConfigSchema_Builder_Xml();
 $xml_builder->openURI($configdoc_xml);
@@ -50,12 +52,13 @@ if (!$output) {
 }

 // write out
-file_put_contents("$style.html", $output);
+file_put_contents(dirname(__FILE__) . "/$style.html", $output);

 if (php_sapi_name() != 'cli') {
    // output (instant feedback if it's a browser)
    echo $output;
 } else {
-    echo 'Files generated successfully.';
+    echo "Files generated successfully.\n";
 }

+// vim: et sw=4 sts=4
--- a/configdoc/styles/plain.css
+++ b/configdoc/styles/plain.css
@@ -40,3 +40,5 @@ h4 {font-family:sans-serif; font-size:0.9em; font-weight:bold; }
 .deprecated {color: #CCC;}
 .deprecated table.constraints th {background:#FFF;}
 .deprecated-notice {color: #000; text-align:center; margin-bottom: 1em;}
+
+/* vim: et sw=4 sts=4 */
--- a/configdoc/styles/plain.xsl
+++ b/configdoc/styles/plain.xsl
@@ -231,3 +231,6 @@
    </xsl:template>

 </xsl:stylesheet>
+
+<!-- vim: et sw=4 sts=4
+-->
--- a/configdoc/types.xml
+++ b/configdoc/types.xml
@@ -12,3 +12,6 @@
  <type id="hash">Associative array</type>
  <type id="mixed">Mixed</type>
 </types>
+
+<!-- vim: et sw=4 sts=4
+-->
--- a/configdoc/usage.xml
+++ b/configdoc/usage.xml
@@ -5,15 +5,16 @@
   <line>131</line>
  </file>
  <file name="HTMLPurifier/Lexer.php">
-   <line>85</line>
+   <line>81</line>
+   <line>269</line>
  </file>
  <file name="HTMLPurifier/Lexer/DirectLex.php">
-   <line>50</line>
-   <line>62</line>
-   <line>327</line>
+   <line>53</line>
+   <line>73</line>
+   <line>348</line>
  </file>
  <file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
-   <line>44</line>
+   <line>47</line>
  </file>
 </directive>
 <directive id="CSS.MaxImgLength">
@@ -38,7 +39,7 @@
 </directive>
 <directive id="CSS.AllowedProperties">
  <file name="HTMLPurifier/CSSDefinition.php">
-   <line>274</line>
+   <line>275</line>
  </file>
 </directive>
 <directive id="Cache.DefinitionImpl">
@@ -69,49 +70,43 @@
 <directive id="Core.Encoding">
  <file name="HTMLPurifier/Encoder.php">
   <line>267</line>
-   <line>294</line>
+   <line>300</line>
  </file>
 </directive>
 <directive id="Test.ForceNoIconv">
  <file name="HTMLPurifier/Encoder.php">
   <line>272</line>
-   <line>302</line>
+   <line>308</line>
  </file>
 </directive>
 <directive id="Core.EscapeNonASCIICharacters">
  <file name="HTMLPurifier/Encoder.php">
-   <line>298</line>
-  </file>
- </directive>
- <directive id="Core.MaintainLineNumbers">
-  <file name="HTMLPurifier/ErrorCollector.php">
-   <line>81</line>
-  </file>
-  <file name="HTMLPurifier/Lexer.php">
-   <line>82</line>
-  </file>
-  <file name="HTMLPurifier/Lexer/DirectLex.php">
-   <line>45</line>
+   <line>304</line>
  </file>
 </directive>
 <directive id="Output.CommentScriptContents">
  <file name="HTMLPurifier/Generator.php">
-   <line>45</line>
+   <line>56</line>
  </file>
 </directive>
 <directive id="Output.SortAttr">
  <file name="HTMLPurifier/Generator.php">
-   <line>46</line>
+   <line>57</line>
+  </file>
+ </directive>
+ <directive id="Output.FlashCompat">
+  <file name="HTMLPurifier/Generator.php">
+   <line>58</line>
  </file>
 </directive>
 <directive id="Output.TidyFormat">
  <file name="HTMLPurifier/Generator.php">
-   <line>75</line>
+   <line>87</line>
  </file>
 </directive>
 <directive id="Output.Newline">
  <file name="HTMLPurifier/Generator.php">
-   <line>89</line>
+   <line>101</line>
  </file>
 </directive>
 <directive id="HTML.BlockWrapper">
@@ -151,41 +146,44 @@
 </directive>
 <directive id="HTML.Trusted">
  <file name="HTMLPurifier/HTMLModuleManager.php">
-   <line>198</line>
+   <line>202</line>
  </file>
  <file name="HTMLPurifier/Lexer.php">
-   <line>238</line>
+   <line>258</line>
  </file>
  <file name="HTMLPurifier/HTMLModule/Image.php">
   <line>27</line>
  </file>
  <file name="HTMLPurifier/Lexer/DirectLex.php">
-   <line>34</line>
+   <line>36</line>
+  </file>
+  <file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
+   <line>23</line>
  </file>
 </directive>
 <directive id="HTML.AllowedModules">
  <file name="HTMLPurifier/HTMLModuleManager.php">
-   <line>205</line>
+   <line>209</line>
  </file>
 </directive>
 <directive id="HTML.CoreModules">
  <file name="HTMLPurifier/HTMLModuleManager.php">
-   <line>206</line>
+   <line>210</line>
  </file>
 </directive>
 <directive id="HTML.Proprietary">
  <file name="HTMLPurifier/HTMLModuleManager.php">
-   <line>220</line>
+   <line>221</line>
  </file>
 </directive>
 <directive id="HTML.SafeObject">
  <file name="HTMLPurifier/HTMLModuleManager.php">
-   <line>225</line>
+   <line>226</line>
  </file>
 </directive>
 <directive id="HTML.SafeEmbed">
  <file name="HTMLPurifier/HTMLModuleManager.php">
-   <line>228</line>
+   <line>229</line>
  </file>
 </directive>
 <directive id="Attr.IDBlacklist">
@@ -200,21 +198,34 @@
 </directive>
 <directive id="Core.LexerImpl">
  <file name="HTMLPurifier/Lexer.php">
-   <line>70</line>
+   <line>76</line>
+  </file>
+ </directive>
+ <directive id="Core.MaintainLineNumbers">
+  <file name="HTMLPurifier/Lexer.php">
+   <line>80</line>
+  </file>
+  <file name="HTMLPurifier/Lexer/DirectLex.php">
+   <line>48</line>
  </file>
 </directive>
 <directive id="Core.ConvertDocumentToFragment">
  <file name="HTMLPurifier/Lexer.php">
-   <line>230</line>
+   <line>267</line>
+  </file>
+ </directive>
+ <directive id="URI.">
+  <file name="HTMLPurifier/URIDefinition.php">
+   <line>55</line>
+  </file>
+  <file name="HTMLPurifier/URIFilter/Munge.php">
+   <line>12</line>
  </file>
 </directive>
 <directive id="URI.Host">
  <file name="HTMLPurifier/URIDefinition.php">
   <line>64</line>
  </file>
-  <file name="HTMLPurifier/URIFilter/DisableExternal.php">
-   <line>8</line>
-  </file>
 </directive>
 <directive id="URI.Base">
  <file name="HTMLPurifier/URIDefinition.php">
@@ -228,12 +239,12 @@
 </directive>
 <directive id="URI.AllowedSchemes">
  <file name="HTMLPurifier/URISchemeRegistry.php">
-   <line>42</line>
+   <line>41</line>
  </file>
 </directive>
 <directive id="URI.OverrideAllowedSchemes">
  <file name="HTMLPurifier/URISchemeRegistry.php">
-   <line>43</line>
+   <line>42</line>
  </file>
 </directive>
 <directive id="URI.Disable">
@@ -249,6 +260,16 @@
   <line>12</line>
  </file>
 </directive>
+ <directive id="Attr.AllowedClasses">
+  <file name="HTMLPurifier/AttrDef/HTML/Class.php">
+   <line>18</line>
+  </file>
+ </directive>
+ <directive id="Attr.ForbiddenClasses">
+  <file name="HTMLPurifier/AttrDef/HTML/Class.php">
+   <line>19</line>
+  </file>
+ </directive>
 <directive id="Attr.AllowedFrameTargets">
  <file name="HTMLPurifier/AttrDef/HTML/FrameTarget.php">
   <line>15</line>
@@ -275,6 +296,11 @@
   <line>54</line>
  </file>
 </directive>
+ <directive id="Attr.">
+  <file name="HTMLPurifier/AttrDef/HTML/LinkTypes.php">
+   <line>30</line>
+  </file>
+ </directive>
 <directive id="Attr.DefaultTextDir">
  <file name="HTMLPurifier/AttrTransform/BdoDir.php">
   <line>13</line>
@@ -293,14 +319,27 @@
   <line>19</line>
  </file>
 </directive>
+ <directive id="Attr.DefaultImageAlt">
+  <file name="HTMLPurifier/AttrTransform/ImgRequired.php">
+   <line>25</line>
+  </file>
+ </directive>
 <directive id="Attr.DefaultInvalidImageAlt">
  <file name="HTMLPurifier/AttrTransform/ImgRequired.php">
-   <line>27</line>
+   <line>33</line>
+  </file>
+ </directive>
+ <directive id="HTML.Attr.Name.UseCDATA">
+  <file name="HTMLPurifier/AttrTransform/Name.php">
+   <line>11</line>
+  </file>
+  <file name="HTMLPurifier/HTMLModule/Name.php">
+   <line>13</line>
  </file>
 </directive>
 <directive id="Core.EscapeInvalidChildren">
  <file name="HTMLPurifier/ChildDef/Required.php">
-   <line>55</line>
+   <line>62</line>
  </file>
 </directive>
 <directive id="Cache.SerializerPath">
@@ -308,17 +347,17 @@
   <line>91</line>
  </file>
 </directive>
- <directive id="FilterParam.ExtractStyleBlocksTidyImpl">
+ <directive id="Filter.ExtractStyleBlocks.TidyImpl">
  <file name="HTMLPurifier/Filter/ExtractStyleBlocks.php">
   <line>41</line>
  </file>
 </directive>
- <directive id="FilterParam.ExtractStyleBlocksScope">
+ <directive id="Filter.ExtractStyleBlocks.Scope">
  <file name="HTMLPurifier/Filter/ExtractStyleBlocks.php">
   <line>65</line>
  </file>
 </directive>
- <directive id="FilterParam.ExtractStyleBlocksEscaping">
+ <directive id="Filter.ExtractStyleBlocks.Escaping">
  <file name="HTMLPurifier/Filter/ExtractStyleBlocks.php">
   <line>123</line>
  </file>
@@ -349,11 +388,21 @@
   <line>50</line>
  </file>
 </directive>
- <directive id="AutoFormatParam.PurifierLinkifyDocURL">
+ <directive id="AutoFormat.PurifierLinkify.DocURL">
  <file name="HTMLPurifier/Injector/PurifierLinkify.php">
   <line>15</line>
  </file>
 </directive>
+ <directive id="AutoFormat.RemoveEmpty.RemoveNbsp">
+  <file name="HTMLPurifier/Injector/RemoveEmpty.php">
+   <line>12</line>
+  </file>
+ </directive>
+ <directive id="AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions">
+  <file name="HTMLPurifier/Injector/RemoveEmpty.php">
+   <line>13</line>
+  </file>
+ </directive>
 <directive id="Core.AggressivelyFixLt">
  <file name="HTMLPurifier/Lexer/DOMLex.php">
   <line>44</line>
@@ -361,12 +410,12 @@
 </directive>
 <directive id="Core.DirectLexLineNumberSyncInterval">
  <file name="HTMLPurifier/Lexer/DirectLex.php">
-   <line>59</line>
+   <line>70</line>
  </file>
 </directive>
 <directive id="Core.EscapeInvalidTags">
  <file name="HTMLPurifier/Strategy/MakeWellFormed.php">
-   <line>22</line>
+   <line>45</line>
  </file>
  <file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
   <line>19</line>
@@ -374,12 +423,12 @@
 </directive>
 <directive id="Core.RemoveScriptContents">
  <file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
-   <line>22</line>
+   <line>25</line>
  </file>
 </directive>
 <directive id="Core.HiddenElements">
  <file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
-   <line>23</line>
+   <line>26</line>
  </file>
 </directive>
 <directive id="URI.HostBlacklist">
--- a/docs/dev-advanced-api.html
+++ b/docs/dev-advanced-api.html
@@ -17,202 +17,10 @@
 <div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>

 <p>
-  <strong>Warning:</strong> This document may be out-of-date. When in doubt,
-  consult the source code documentation.
+  Please see <a href="enduser-customize.html">Customize!</a>
 </p>

-<p>HTML Purifier currently natively supports only a subset of HTML's
-allowed elements, attributes, and behavior; specifically, this subset
-is the set of elements that are safe for untrusted users to use.
-However, HTML Purifier is often utilized to ensure standards-compliance
-from input that is trusted (making it a sort of Tidy substitute),
-and often users need to define new elements or attributes. The
-advanced API is oriented specifically for these use-cases.</p>
-
-<p>Our goals are to let the user:</p>
-
-<dl>
-    <dt>Select</dt>
-    <dd><ul>
-        <li>Doctype</li>
-        <!-- <li>Filterset</li> -->
-        <li>Elements / Attributes / Modules</li>
-        <li>Tidy</li>
-    </ul></dd>
-    <dt>Customize</dt>
-    <dd><ul>
-        <li>Attributes</li>
-        <li>Elements</li>
-        <!--<li>Doctypes</li>-->
-    </ul></dd>
-</dl>
-
-<h2>Select</h2>
-
-<p>For basic use, the user will have to specify some basic parameters. This
-is not strictly necessary, as HTML Purifier's default setting will always
-output safe code, but is required for standards-compliant output.</p>
-
-<h3>Selecting a Doctype</h3>
-
-<p>The first thing to select is the <strong>doctype</strong>. This
-is essential for standards-compliant output.</p>
-
-<p class="technical">This identifier is based
-on the name the W3C has given to the document type and <em>not</em>
-the DTD identifier.</p>
-
-<p>This parameter is set via the configuration object:</p>
-
-<pre>$config->set('HTML', 'Doctype', 'XHTML 1.0 Transitional');</pre>
-
-<p>Due to historical reasons, the default doctype is XHTML 1.0
-Transitional, however, we really shouldn't be guessing what the user's
-doctype is. Fortunantely, people who can't be bothered to set this won't
-be bothered when their pages stop validating.</p>
-
-<h3>Selecting Elements / Attributes / Modules</h3>
-
-<p>HTML Purifier will, by default, allow as many elements and attributes
-as possible. However, a user may decide to roll their own filterset by
-selecting modules, elements and attributes to allow for their own
-specific use-case. This can be done using %HTML.Allowed:</p>
-
-<pre>$config->set('HTML', 'Allowed', 'a[href|title],em,p,blockquote');</pre>
-
-<p class="technical">The directive %HTML.Allowed is a convenience feature
-that may be fully expressed with the legacy interface.</p>
-
-<p>We currently support another interface from older versions:</p>
-
-<pre>$config->set('HTML', 'AllowedElements', 'a,em,p,blockquote');
-$config->set('HTML', 'AllowedAttributes', 'a.href,a.title');</pre>
-
-<p>A user may also choose to allow modules using a specialized
-directive:</p>
-
-<pre>$config->set('HTML', 'AllowedModules', 'Hypertext,Text,Lists');</pre>
-
-<p>But it is not expected that this feature will be widely used.</p>
-
-<p class="technical">Module selection will work slightly differently
-from the other AllowedElements and AllowedAttributes directives by
-directly modifying the doctype you are operating in, in the spirit of
-XHTML 1.1's modularization. We stop users from shooting themselves in the
-foot by mandating the modules in %HTML.CoreModules be used.</p>
-
-<p class="technical">Modules are distinguished from regular elements by the
-case of their first letter. While XML distinguishes between and allows
-lower and uppercase letters in element names, XHTML uses only lower-case
-element names for sake of consistency.</p>
-
-<h3>Selecting Tidy</h3>
-
-<p>The name of this segment of functionality is inspired off of Dave
-Ragget's program HTML Tidy, which purported to help clean up HTML. In
-HTML Purifier, Tidy functionality involves turning unsupported and
-deprecated elements into standards-compliant ones, maintaining
-backwards compatibility, and enforcing best practices.</p>
-
-<p>This is a complicated feature, and is explained more in depth at
-<a href="enduser-tidy.html">the Tidy documentation page</a>.</p>
-
-<!--
-<h3>Unified selector</h3>
-
-<p>Because selecting each and every one of these configuration options
-is a chore, we may wish to offer a specialized configuration method
-for selecting a filterset. Possibility:</p>
-
-<pre>function selectFilter($doctype, $filterset, $tidy)</pre>
-
-<p>...which is simply a light wrapper over the individual configuration
-calls. A custom config file format or text format could also be adopted.</p>
-->
-
-<h2>Customize</h2>
-
-<p>By reviewing topic posts in the support forum, we determined that
-there were two primarily demanded customization features people wanted:
-to add an attribute to an existing element, and to add an element.
-Thus, we'll want to create convenience functions for these common
-use-cases.</p>
-
-<p>Note that the functions described here are only available if
-a raw copy of <code>HTMLPurifier_HTMLDefinition</code> was retrieved.
-Furthermore, caching may prevent your changes from immediately
-being seen: consult <a href="enduser-customize.html">enduser-customize.html</a> on how
-to work around this.</p>
-
-<h3>Attributes</h3>
-
-<p>An attribute is bound to an element by a name and has a specific
-<code>AttrDef</code> that validates it. The interface is therefore:</p>
-
-<pre>function addAttribute($element, $attribute, $attribute_def);</pre>
-
-<p>Example of the functionality in action:</p>
-
-<pre>$def->addAttribute('a', 'rel', 'Enum#nofollow');</pre>
-
-<p>The <code>$attribute_def</code> value is flexible,
-to make things simpler. It can be a literal object or:</p>
-
-<ul>
-    <!--<li>Class name: We'll instantiate it for you</li>
-    <li>Function name: We'll create an <code>HTMLPurifier_AttrDef_Anonymous</code>
-        class with that function registered as a callback.</li>-->
-    <li>String attribute type: We'll use <code>HTMLPurifier_AttrTypes</code>
-        to resolve it for you. Any data that follows a hash mark (#) will
-        be used to customize the attribute type: in the example above, 
-        we specify which values for Enum to allow.</li>
-</ul>
-
-<h3>Elements</h3>
-
-<p>An element requires certain information as specified by
-<code>HTMLPurifier_ElementDef</code>. However, not all of it is necessary,
-the usual things required are:</p>
-
-<ul>
-    <li>Attributes</li>
-    <li>Content model/type</li>
-    <li>Registration in a content set</li>
-</ul>
-
-<p>This suggests an API like this:</p>
-
-<pre>function addElement($element, $type, $contents,
-    $attr_collections = array(); $attributes = array());</pre>
-
-<p>Each parameter explained in depth:</p>
-
-<dl>
-    <dt><code>$element</code></dt>
-    <dd>Element name, ex. 'label'</dd>
-    <dt><code>$type</code></dt>
-    <dd>Content set to register in, ex. 'Inline' or 'Flow'</dd>
-    <dt><code>$contents</code></dt>
-    <dd>Description of allowed children. This is a merged form of
-        <code>HTMLPurifier_ElementDef</code>'s member variables
-        <code>$content_model</code> and <code>$content_model_type</code>,
-        where the form is <q>Type: Model</q>, ex. 'Optional: Inline'.
-        There are also a number of predefined templates one may use.</dd>
-    <dt><code>$attr_collections</code></dt>
-    <dd>Array (or string if only one) of attribute collection(s) to
-        merge into the attributes array.</dd>
-    <dt><code>$attributes</code></dt>
-    <dd>Array of attribute names to attribute definitions, much like
-        the above-described attribute customization.</dd>
-</dl>
-
-<p>A possible usage:</p>
-
-<pre>$def->addElement('font', 'Inline', 'Optional: Inline', 'Common',
-    array('color' => 'Color'));</pre>
-
-<p>See <code>HTMLPurifier/HTMLModule.php</code> for details.</p>
-
-<div id="version">$Id$</div>
-
 </body></html>
+
+<!-- vim: et sw=4 sts=4
+-->
--- a/docs/dev-code-quality.txt
+++ b/docs/dev-code-quality.txt
@@ -25,3 +25,5 @@ URIScheme - needs to have callable generic checks
    mailto - doesn't validate emails, doesn't validate querystring
    news - doesn't validate opaque path
    nntp - doesn't constrain path
+
+    vim: et sw=4 sts=4
--- a/docs/dev-config-bcbreaks.txt
+++ b/docs/dev-config-bcbreaks.txt
@@ -0,0 +1,79 @@
+
+Configuration Backwards-Compatibility Breaks
+
+In version 4.0.0, the configuration subsystem (composed of the outwards
+facing Config class, as well as the ConfigSchema and ConfigSchema_Interchange
+subsystems), was significantly revamped to make use of property lists.
+While most of the changes are internal, some internal APIs were changed for the
+sake of clarity. HTMLPurifier_Config was kept completely backwards compatible,
+although some of the functions were retrofitted with an unambiguous alternate
+syntax. Both of these changes are discussed in this document.
+
+
+
+1. Outwards Facing Changes
+--------------------------------------------------------------------------------
+
+The HTMLPurifier_Config class now takes an alternate syntax. The general rule
+is:
+
+    If you passed $namespace, $directive, pass "$namespace.$directive"
+    instead.
+
+An example:
+
+    $config->set('HTML', 'Allowed', 'p');
+
+becomes:
+
+    $config->set('HTML.Allowed', 'p');
+
+New configuration options may have more than one namespace, they might
+look something like %Filter.YouTube.Blacklist. While you could technically
+set it with ('HTML', 'YouTube.Blacklist'), the logical extension
+('HTML', 'YouTube', 'Blacklist') does not work.
+
+The old API will still work, but will emit E_USER_NOTICEs.
+
+
+
+2. Internal API Changes
+--------------------------------------------------------------------------------
+
+Some overarching notes: we've completely eliminated the notion of namespace;
+it's now an informal construct for organizing related configuration directives.
+
+Also, the validation routines for keys (formerly "$namespace.$directive")
+have been completely relaxed. I don't think it really should be necessary.
+
+2.1 HTMLPurifier_ConfigSchema
+
+First off, if you're interfacing with this class, you really shouldn't.
+HTMLPurifier_ConfigSchema_Builder_ConfigSchema is really the only class that
+should ever be creating HTMLPurifier_ConfigSchema, and HTMLPurifier_Config the
+only class that should be reading it.
+
+All namespace related methods were removed; they are completely unnecessary
+now. Any $namespace, $name arguments must be replaced with $key (where
+$key == "$namespace.$name"), including for addAlias().
+
+The $info and $defaults member variables are no longer indexed as
+[$namespace][$name]; they are now indexed as ["$namespace.$name"].
+
+All deprecated methods were finally removed, after having yelled at you as
+an E_USER_NOTICE for a while now.
+
+2.2 HTMLPurifier_ConfigSchema_Interchange
+
+Member variable $namespaces was removed.
+
+2.3 HTMLPurifier_ConfigSchema_Interchange_Id
+
+Member variable $namespace and $directive removed; member variable $key added.
+Any method that took $namespace, $directive now takes $key.
+
+2.4 HTMLPurifier_ConfigSchema_Interchange_Namespace
+
+Removed.
+
+    vim: et sw=4 sts=4
--- a/docs/dev-config-naming.txt
+++ b/docs/dev-config-naming.txt
@@ -0,0 +1,164 @@
+Configuration naming
+
+HTML Purifier 4.0.0 features a new configuration naming system that
+allows arbitrary nesting of namespaces.  While there are certain cases
+in which using two namespaces is obviously better (the canonical example
+is where we were using AutoFormatParam to contain directives for AutoFormat
+parameters), it is unclear whether or not a general migration to highly
+namespaced directives is a good idea or not.
+
+== Case studies ==
+
+=== Attr.* ===
+
+We have a dead duck HTML.Attr.Name.UseCDATA which migrated before we decided
+to think this out thoroughly.
+
+We currently have a large number of directives in the Attr.* namespace.
+These directives tweak the behavior of some HTML attributes.  They have
+the properties:
+
+* While they apply to only one attribute at a time, the attribute can
+  span over multiple elements (not necessarily all attributes, either).
+  The information of which elements it impacts is either omitted or
+  informally stated (EnableID applies to all elements, DefaultImageAlt
+  applies to <img> tags, AllowedRev doesn't say but only applies to a tags).
+
+* There is a certain degree of clustering that could be applied, especially
+  to the ID directives.  The clustering could be done with respect to
+  what element/attribute was used, i.e.
+
+    *.id -> EnableID, IDBlacklistRegexp, IDBlacklist, IDPrefixLocal, IDPrefix
+    img.src -> DefaultInvalidImage
+    img.alt -> DefaultImageAlt, DefaultInvalidImageAlt
+    bdo.dir -> DefaultTextDir
+    a.rel -> AllowedRel
+    a.rev -> AllowedRev
+    a.target -> AllowedFrameTargets
+    a.name -> Name.UseCDATA
+
+* The directives often reference generic attribute types that were specified
+  in the DTD/specification.  However, some of the behavior specifically relies
+  on the fact that other use cases of the attribute are not, at current,
+  supported by HTML Purifier.
+
+    AllowedRel, AllowedRev -> heavily <a> specific; if <link> ends up being
+        allowed, we will also have to give users specificity there (we also
+        want to preserve generality) DTD %Linktypes, HTML5 distinguishes
+        between <link> and <a>/<area>
+    AllowedFrameTargets -> heavily <a> specific, but also used by <area>
+        and <form>. Transitional DTD %FrameTarget, not present in strict,
+        HTML5 calls them "browsing contexts"
+    Default*Image* -> as a default parameter, is almost entirely exlcusive
+        to <img>
+    EnableID -> global attribute
+    Name.UseCDATA -> heavily <a> specific, but has heavy other usage by
+        many things
+
+== AutoFormat.* ==
+
+These have the fairly normal pluggable architecture that lends itself to
+large amounts of namespaces (pluggability may be the key to figuring
+out when gratuitous namespacing is good.)  Properties:
+
+* Boolean directives are fair game for being namespaced: for example,
+  RemoveEmpty.RemoveNbsp triggers RemoveEmpty.RemoveNbsp.Exceptions,
+  the latter of which only makes sense when RemoveEmpty.RemoveNbsp
+  is set to true. (The same applies to RemoveNbsp too)
+
+The AutoFormat string is a bit long, but is the only bit of repeated
+context.
+
+== Core.* ==
+
+Core is the potpourri of directives, mostly regarding some minor behavioral
+tweaks for HTML handling abilities.
+
+    AggressivelyFixLt
+    ConvertDocumentToFragment
+    DirectLexLineNumberSyncInterval
+    LexerImpl
+    MaintainLineNumbers
+        Lexer
+    CollectErrors
+    Language
+        Error handling (Language is ostensibly a little more general, but
+        it's only used for error handling right now)
+    ColorKeywords
+        CSS and HTML
+    Encoding
+    EscapeNonASCIICharacters
+        Character encoding
+    EscapeInvalidChildren
+    EscapeInvalidTags
+    HiddenElements
+    RemoveInvalidImg
+        Lexing/Output
+    RemoveScriptContents
+        Deprecated
+
+== HTML.* ==
+
+    AllowedAttributes
+    AllowedElements
+    AllowedModules
+    Allowed
+    ForbiddenAttributes
+    ForbiddenElements
+        Element set tuning
+    BlockWrapper
+        Child def advanced twiddle
+    CoreModules
+    CustomDoctype
+        Advanced HTMLModuleManager twiddles
+    DefinitionID
+    DefinitionRev
+        Caching
+    Doctype
+    Parent
+    Strict
+    XHTML
+        Global environment
+    MaxImgLength
+        Attribute twiddle? (applies to two attributes)
+    Proprietary
+    SafeEmbed
+    SafeObject
+    Trusted
+        Extra functionality/tagsets
+    TidyAdd
+    TidyLevel
+    TidyRemove
+        Tidy
+
+== Output.* ==
+
+These directly affect the output of Generator. These are all advanced
+twiddles.
+
+== URI.* ==
+
+    AllowedSchemes
+    OverrideAllowedSchemes
+        Scheme tuning
+    Base
+    DefaultScheme
+    Host
+        Global environment
+    DefinitionID
+    DefinitionRev
+        Caching
+    DisableExternalResources
+    DisableExternal
+    DisableResources
+    Disable
+        Contextual/authority tuning
+    HostBlacklist
+        Authority tuning
+    MakeAbsolute
+    MungeResources
+    MungeSecretKey
+    Munge
+        Transformation behavior (munge can be grouped)
+
+
--- a/docs/dev-config-schema.html
+++ b/docs/dev-config-schema.html
@@ -114,7 +114,7 @@ Test.Example</pre>
        </tr>
        <tr>
          <td>VALUE-ALIASES</td>
-          <td>'baz' => 'bar'</td>
+          <td>'baz' =&gt; 'bar'</td>
          <td><em>Optional</em>. Mapping of one value to another, and
          should be a comma separated list of keypair duples. This
          is only allowed string, istring, text and itext TYPEs.</td>
@@ -213,7 +213,7 @@ Test.Example</pre>
        </tr>
        <tr>
          <td>lookup</td>
-          <td>array('key' => true)</td>
+          <td>array('key' =&gt; true)</td>
          <td>Lookup array, used with <code>isset($var[$key])</code></td>
        </tr>
        <tr>
@@ -223,7 +223,7 @@ Test.Example</pre>
        </tr>
        <tr>
          <td>hash</td>
-          <td>array('key' => 'val')</td>
+          <td>array('key' =&gt; 'val')</td>
          <td>Associative array of keys to values</td>
        </tr>
        <tr>
@@ -239,15 +239,15 @@ Test.Example</pre>
      object; users have a little bit of leeway when setting configuration
      values (for example, a lookup value can be specified as a list;
      HTML Purifier will flip it as necessary.) These types are defined
-      in <a href="http://htmlpurifier.org/svnroot/htmlpurifier/trunk/library/HTMLPurifier/VarParser.php">
+      in <a href="http://repo.or.cz/w/htmlpurifier.git?a=blob;hb=HEAD;f=library/HTMLPurifier/VarParser.php">
      library/HTMLPurifier/VarParser.php</a>.
    </p>

    <p>
      For more information on what values are allowed, and how they are parsed,
-      consult <a href="http://htmlpurifier.org/svnroot/htmlpurifier/trunk/library/HTMLPurifier/ConfigSchema/InterchangeBuilder.php">
+      consult <a href="http://repo.or.cz/w/htmlpurifier.git?a=blob;hb=HEAD;f=library/HTMLPurifier/ConfigSchema/InterchangeBuilder.php">
      library/HTMLPurifier/ConfigSchema/InterchangeBuilder.php</a>, as well
-      as <a href="http://htmlpurifier.org/svnroot/htmlpurifier/trunk/library/HTMLPurifier/ConfigSchema/Interchange/Directive.php">
+      as <a href="http://repo.or.cz/w/htmlpurifier.git?a=blob;hb=HEAD;f=library/HTMLPurifier/ConfigSchema/Interchange/Directive.php">
      library/HTMLPurifier/ConfigSchema/Interchange/Directive.php</a> for
      the semantics of the parsed values.
    </p>
@@ -267,12 +267,47 @@ Test.Example</pre>
      If you ever make changes to your configuration directives, you
      will need to run this script again.
    </p>
+    <h2>Adding in-house schema definitions</h2>
+
+    <p>
+      Placing stuff directly in HTML Purifier's source tree is generally not a
+      good idea, so HTML Purifier 4.0.0+ has some facilities in place to make your
+      life easier.
+    </p>
+
+    <p>
+      The first is to pass an extra parameter to <code>maintenance/generate-schema-cache.php</code>
+      with the location of your directory (relative or absolute path will do). For example,
+      if I'm storing my custom definitions in <em>/var/htmlpurifier/myschema</em>, run:
+      <code>php maintenance/generate-schema-cache.php /var/htmlpurifier/myschema</code>.
+    </p>
+
+    <p>
+      Alternatively, you can create a small loader PHP file in the HTML Purifier base
+      directory named <code>config-schema.php</code> (this is the same directory
+      you would place a <code>test-settings.php</code> file).  In this file, add
+      the following line for each directory you want to load:
+    </p>
+
+<pre>$builder-&gt;buildDir($interchange, '/var/htmlpurifier/myschema');</pre>
+
+    <p>You can even load a single file using:</p>
+
+<pre>$builder-&gt;buildFile($interchange, '/var/htmlpurifier/myschema/MyApp.Directive.txt');</pre>
+
+    <p>Storing custom definitions that you don't plan on sending back upstream in
+    a separate directory is <em>definitely</em> a good idea! Additionally, picking
+    a good namespace can go a long way to saving you grief if you want to use
+    someone else's change, but they picked the same name, or if HTML Purifier
+    decides to add support for a configuration directive that has the same name.</p>
+
+    <!-- TODO: how to name directives that rely on naming conventions -->

    <h2>Errors</h2>

    <p>
      All directive files go through a rigorous validation process
-      through <a href="http://htmlpurifier.org/svnroot/htmlpurifier/trunk/library/HTMLPurifier/ConfigSchema/">
+      through <a href="http://repo.or.cz/w/htmlpurifier.git?a=blob;hb=HEAD;f=library/HTMLPurifier/ConfigSchema/Validator.php">
      library/HTMLPurifier/ConfigSchema/Validator.php</a>, as well
      as some basic checks during building. While
      listing every error out here is out-of-scope for this document, we
@@ -339,7 +374,7 @@ Test.Example</pre>
      The most difficult part is translating the Interchange member variable (valueAliases)
      into a directive file key (VALUE-ALIASES), but there's a one-to-one
      correspondence currently. If the two formats diverge, any discrepancies
-      will be described in <a href="http://htmlpurifier.org/svnroot/htmlpurifier/trunk/library/HTMLPurifier/ConfigSchema/InterchangeBuilder.php">
+      will be described in <a href="http://repo.or.cz/w/htmlpurifier.git?a=blob;hb=HEAD;f=library/HTMLPurifier/ConfigSchema/InterchangeBuilder.php">
      library/HTMLPurifier/ConfigSchema/InterchangeBuilder.php</a>.
    </p>

@@ -370,7 +405,8 @@ Test.Example</pre>
      data. There is also an XML serializer, which is used to build documentation.
    </p>

-    <div id="version">$Id$</div>
-
  </body>
 </html>
+
+<!-- vim: et sw=4 sts=4
+-->
--- a/docs/dev-flush.html
+++ b/docs/dev-flush.html
@@ -62,6 +62,7 @@
    do.
 </p>

-<div id="version">$Id$</div>
-
 </body></html>
+
+<!-- vim: et sw=4 sts=4
+-->
--- a/docs/dev-includes.txt
+++ b/docs/dev-includes.txt
@@ -277,3 +277,5 @@ DEPRECATED-VERSION: If the directive was deprecated, when was it deprecated?
 DEPRECATED-USE: If the directive was deprecated, what should the user use now?
 REQUIRES: What classes does this configuration directive require, but are
    not part of the HTML Purifier core?
+
+    vim: et sw=4 sts=4
--- a/docs/dev-naming.html
+++ b/docs/dev-naming.html
@@ -77,6 +77,7 @@ help you find the correct functionality more quickly.  Here they are:</p>

 </dl>

-<div id="version">$Id$</div>
-
 </body></html>
+
+<!-- vim: et sw=4 sts=4
+-->
--- a/docs/dev-optimization.html
+++ b/docs/dev-optimization.html
@@ -27,6 +27,7 @@ that itch, put it here!</p>
    <li>Parallelize strategies</li>
 </ul>

-<div id="version">$Id$</div>
-
 </body></html>
+
+<!-- vim: et sw=4 sts=4
+-->
--- a/docs/dev-progress.html
+++ b/docs/dev-progress.html
@@ -303,6 +303,7 @@ Mozilla on inside and needs -moz-outline, no IE support.</td></tr>

 </table>

-<div id="version">$Id$</div>
-
 </body></html>
+
+<!-- vim: et sw=4 sts=4
+-->
--- a/docs/enduser-customize.html
+++ b/docs/enduser-customize.html
@@ -18,12 +18,11 @@
 <div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>

 <p>
-  You may have heard of the <a href="dev-advanced-api.html">Advanced API</a>.
-  If you're interested in reading dry prose and boring functional
-  specifications, feel free to click that link to get a no-nonsense overview
-  on the Advanced API. For the rest of us, there's this tutorial. By the time
-  you're finished reading this, you should have a pretty good idea on
-  how to implement custom tags and attributes that HTML Purifier may not have.
+  HTML Purifier has this quirk where if you try to allow certain elements or
+  attributes, HTML Purifier will tell you that it's not supported, and that
+  you should go to the forums to find out how to implement it. Well, this
+  document is how to implement elements and attributes which HTML Purifier
+  doesn't support out of the box.
 </p>

 <h2>Is it necessary?</h2>
@@ -84,17 +83,6 @@
  limited to translations) above or below other corresponding text.
 </p>

-<h3>XHTML 2.0</h3>
-
-<p>
-  <a href="http://www.w3.org/TR/xhtml2/">XHTML 2.0</a> is still a
-  working draft, so any elements introduced in the
-  specification have not been implemented and will not be implemented
-  until we get a recommendation or proposal.  Because XHTML 2.0 is
-  an entirely new markup language, implementing rules for it will be
-  no easy task.
-</p>
-
 <h3>HTML 5</h3>

 <p>
@@ -156,9 +144,9 @@
 </p>

 <pre>$config = HTMLPurifier_Config::createDefault();
-$config->set('HTML', 'DefinitionID', 'enduser-customize.html tutorial');
-$config->set('HTML', 'DefinitionRev', 1);
-$def = $config->getHTMLDefinition(true);</pre>
+$config-&gt;set('HTML.DefinitionID', 'enduser-customize.html tutorial');
+$config-&gt;set('HTML.DefinitionRev', 1);
+$def = $config-&gt;getHTMLDefinition(true);</pre>

 <p>
  Assuming that HTML Purifier has already been properly loaded (hint:
@@ -211,10 +199,10 @@ $def = $config->getHTMLDefinition(true);</pre>
 </p>

 <pre>$config = HTMLPurifier_Config::createDefault();
-$config->set('HTML', 'DefinitionID', 'enduser-customize.html tutorial');
-$config->set('HTML', 'DefinitionRev', 1);
-<strong>$config->set('Core', 'DefinitionCache', null); // remove this later!</strong>
-$def = $config->getHTMLDefinition(true);</pre>
+$config-&gt;set('HTML.DefinitionID', 'enduser-customize.html tutorial');
+$config-&gt;set('HTML.DefinitionRev', 1);
+<strong>$config-&gt;set('Cache.DefinitionImpl', null); // TODO: remove this later!</strong>
+$def = $config-&gt;getHTMLDefinition(true);</pre>

 <p>
  A few things should be mentioned about the caching mechanism before
@@ -267,10 +255,10 @@ $def = $config->getHTMLDefinition(true);</pre>
 </p>

 <pre>$config = HTMLPurifier_Config::createDefault();
-$config->set('HTML', 'DefinitionID', 'enduser-customize.html tutorial');
-$config->set('HTML', 'DefinitionRev', 1);
-$config->set('Core', 'DefinitionCache', null); // remove this later!
-$def = $config->getHTMLDefinition(true);
+$config-&gt;set('HTML.DefinitionID', 'enduser-customize.html tutorial');
+$config-&gt;set('HTML.DefinitionRev', 1);
+$config-&gt;set('Cache.DefinitionImpl', null); // remove this later!
+$def = $config-&gt;getHTMLDefinition(true);
 <strong>$def->addAttribute('a', 'target', 'Enum#_blank,_self,_target,_top');</strong></pre>

 <p>
@@ -372,10 +360,10 @@ $def = $config->getHTMLDefinition(true);

 <p>
  For a complete list, consult
-  <a href="http://htmlpurifier.org/svnroot/htmlpurifier/trunk/library/HTMLPurifier/AttrTypes.php"><code>library/HTMLPurifier/AttrTypes.php</code></a>;
+  <a href="http://repo.or.cz/w/htmlpurifier.git?a=blob;hb=HEAD;f=library/HTMLPurifier/AttrTypes.php"><code>library/HTMLPurifier/AttrTypes.php</code></a>;
  more information on attributes that accept parameters can be found on their
  respective includes in
-  <a href="http://htmlpurifier.org/svnroot/htmlpurifier/trunk/library/HTMLPurifier/AttrDef/"><code>library/HTMLPurifier/AttrDef</code></a>.
+  <a href="http://repo.or.cz/w/htmlpurifier.git?a=tree;hb=HEAD;f=library/HTMLPurifier/AttrDef"><code>library/HTMLPurifier/AttrDef</code></a>.
 </p>

 <p>
@@ -385,11 +373,11 @@ $def = $config->getHTMLDefinition(true);
 </p>

 <pre>$config = HTMLPurifier_Config::createDefault();
-$config->set('HTML', 'DefinitionID', 'enduser-customize.html tutorial');
-$config->set('HTML', 'DefinitionRev', 1);
-$config->set('Core', 'DefinitionCache', null); // remove this later!
-$def = $config->getHTMLDefinition(true);
-<strong>$def->addAttribute('a', 'target', new HTMLPurifier_AttrDef_Enum(
+$config-&gt;set('HTML.DefinitionID', 'enduser-customize.html tutorial');
+$config-&gt;set('HTML.DefinitionRev', 1);
+$config-&gt;set('Cache.DefinitionImpl', null); // remove this later!
+$def = $config-&gt;getHTMLDefinition(true);
+<strong>$def-&gt;addAttribute('a', 'target', new HTMLPurifier_AttrDef_Enum(
  array('_blank','_self','_target','_top')
 ));</strong></pre>

@@ -724,7 +712,7 @@ $def = $config->getHTMLDefinition(true);
    or more flow elements, but no nested <code>form</code>s</strong></li>
  <li>What attributes does the element allow that are general? <strong>Common</strong></li>
  <li>What attributes does the element allow that are specific to this element? <strong>A whole bunch, see ATTLIST;
-    we're going to the vital ones: <code>action</code>, <code>method</code> and <code>name</code></strong></li>
+    we're going to do the vital ones: <code>action</code>, <code>method</code> and <code>name</code></strong></li>
 </ol>

 <p>
@@ -732,14 +720,14 @@ $def = $config->getHTMLDefinition(true);
 </p>

 <pre>$config = HTMLPurifier_Config::createDefault();
-$config->set('HTML', 'DefinitionID', 'enduser-customize.html tutorial');
-$config->set('HTML', 'DefinitionRev', 1);
-$config->set('Core', 'DefinitionCache', null); // remove this later!
-$def = $config->getHTMLDefinition(true);
-$def->addAttribute('a', 'target', new HTMLPurifier_AttrDef_Enum(
+$config-&gt;set('HTML.DefinitionID', 'enduser-customize.html tutorial');
+$config-&gt;set('HTML.DefinitionRev', 1);
+$config-&gt;set('Cache.DefinitionImpl', null); // remove this later!
+$def = $config-&gt;getHTMLDefinition(true);
+$def-&gt;addAttribute('a', 'target', new HTMLPurifier_AttrDef_Enum(
  array('_blank','_self','_target','_top')
 ));
-<strong>$form = $def->addElement(
+<strong>$form = $def-&gt;addElement(
  'form',   // name
  'Block',  // content set
  'Flow', // allowed children
@@ -750,7 +738,7 @@ $def->addAttribute('a', 'target', new HTMLPurifier_AttrDef_Enum(
    'name' => 'ID'
  )
 );
-$form->excludes = array('form' => true);</strong></pre>
+$form-&gt;excludes = array('form' => true);</strong></pre>

 <p>
  Each of the parameters corresponds to one of the questions we asked.
@@ -764,7 +752,7 @@ $form->excludes = array('form' => true);</strong></pre>
 <p>
  And that's all there is to it! Implementing the rest of the form
  module is left as an exercise to the user; to see more examples
-  check the <a href="http://htmlpurifier.org/svnroot/htmlpurifier/trunk/library/HTMLPurifier/HTMLModule/"><code>library/HTMLPurifier/HTMLModule/</code></a> directory
+  check the <a href="http://repo.or.cz/w/htmlpurifier.git?a=tree;hb=HEAD;f=library/HTMLPurifier/HTMLModule"><code>library/HTMLPurifier/HTMLModule/</code></a> directory
  in your local HTML Purifier installation.
 </p>

@@ -789,10 +777,11 @@ $form->excludes = array('form' => true);</strong></pre>
 </p>

 <ul>
-  <li><a href="http://htmlpurifier.org/svnroot/htmlpurifier/trunk/library/HTMLPurifier/HTMLModule.php"><code>library/HTMLPurifier/HTMLModule.php</code></a></li>
-  <li><a href="http://htmlpurifier.org/svnroot/htmlpurifier/trunk/library/HTMLPurifier/ElementDef.php"><code>library/HTMLPurifier/ElementDef.php</code></a></li>
+  <li><a href="http://repo.or.cz/w/htmlpurifier.git?a=blob;hb=HEAD;f=library/HTMLPurifier/HTMLModule.php"><code>library/HTMLPurifier/HTMLModule.php</code></a></li>
+  <li><a href="http://repo.or.cz/w/htmlpurifier.git?a=blob;hb=HEAD;f=library/HTMLPurifier/ElementDef.php"><code>library/HTMLPurifier/ElementDef.php</code></a></li>
 </ul>

-<div id="version">$Id: enduser-tidy.html 1158 2007-06-18 19:26:29Z Edward $</div>
-
 </body></html>
+
+<!-- vim: et sw=4 sts=4
+-->
--- a/docs/enduser-id.html
+++ b/docs/enduser-id.html
@@ -31,7 +31,7 @@ by default.</p>

 <p>IDs, however, are quite useful functionality to have, so if users start
 complaining about broken anchors you'll probably want to turn them back on
-with %HTML.EnableAttrID. But before you go mucking around with the config
+with %Attr.EnableID. But before you go mucking around with the config
 object, it's probably worth to take some precautions to keep your page
 validating. Why?</p>

@@ -56,8 +56,8 @@ validating. Why?</p>
 deal with the most obvious solution: preventing users from using any IDs that
 appear elsewhere on the document.  The method is simple:</p>

-<pre>$config->set('HTML', 'EnableAttrID', true);
-$config->set('Attr', 'IDBlacklist' array(
+<pre>$config-&gt;set('Attr.EnableID', true);
+$config-&gt;set('Attr.IDBlacklist' array(
    'list', 'of', 'attribute', 'values', 'that', 'are', 'forbidden'
 ));</pre>

@@ -88,8 +88,8 @@ all, they might have simply specified a duplicate ID by accident.</p>
 <p>This method, too, is quite simple: add a prefix to all user IDs. With this
 code:</p>

-<pre>$config->set('HTML', 'EnableAttrID', true);
-$config->set('Attr', 'IDPrefix', 'user_');</pre>
+<pre>$config-&gt;set('Attr.EnableID', true);
+$config-&gt;set('Attr.IDPrefix', 'user_');</pre>

 <p>...this:</p>

@@ -109,7 +109,7 @@ user_ to the beginning.&quot;</p>
 nothing about multiple HTML Purifier outputs on one page.  Thus, we have
 a second configuration value to piggy-back off of: %Attr.IDPrefixLocal:</p>

-<pre>$config->set('Attr', 'IDPrefixLocal', 'comment' . $id . '_');</pre>
+<pre>$config-&gt;set('Attr.IDPrefixLocal', 'comment' . $id . '_');</pre>

 <p>This new attributes does nothing but append on to regular IDPrefix, but is
 special in that it is volatile: it's value is determined at run-time and
@@ -137,11 +137,12 @@ anchors is beyond me.</p>

 <p>To revert back to pre-1.2.0 behavior, simply:</p>

-<pre>$config->set('HTML', 'EnableAttrID', true);</pre>
+<pre>$config-&gt;set('Attr.EnableID', true);</pre>

 <p>Don't come crying to me when your page mysteriously stops validating, though.</p>

-<div id="version">$Id$</div>
-
 </body>
 </html>
+
+<!-- vim: et sw=4 sts=4
+-->
--- a/docs/enduser-overview.txt
+++ b/docs/enduser-overview.txt
@@ -55,3 +55,5 @@ HTML tags.  Things like blog comments are, in all likelihood, most appropriately
 written in an extremely restrictive set of markup that doesn't require
 all this functionality (or not written in HTML at all), although this may
 be changing in the future with the addition of levels of filtering.
+
+    vim: et sw=4 sts=4
--- a/docs/enduser-security.txt
+++ b/docs/enduser-security.txt
@@ -14,3 +14,5 @@ to be effective. Things to remember:

 4. CSS: document pending
 Explain which CSS styles we blocked and why.
+
+    vim: et sw=4 sts=4
--- a/docs/enduser-slow.html
+++ b/docs/enduser-slow.html
@@ -115,3 +115,6 @@ if you decide to do that! Especially if you port HTML Purifier to C++.

 </body>
 </html>
+
+<!-- vim: et sw=4 sts=4
+-->
--- a/docs/enduser-tidy.html
+++ b/docs/enduser-tidy.html
@@ -76,7 +76,7 @@ associated with it, although it may change depending on your doctype.</p>
 change the level of cleaning by setting the %HTML.TidyLevel configuration
 directive:</p>

-<pre>$config->set('HTML', 'TidyLevel', 'heavy'); // burn baby burn!</pre>
+<pre>$config-&gt;set('HTML.TidyLevel', 'heavy'); // burn baby burn!</pre>

 <h2>Is the light level really light?</h2>

@@ -165,17 +165,17 @@ smoketest</a>.</p>
 so happy about the br@clear implementation. That's perfectly fine!
 HTML Purifier will make accomodations:</p>

-<pre>$config->set('HTML', 'Doctype', 'XHTML 1.0 Transitional');
-$config->set('HTML', 'TidyLevel', 'heavy'); // all changes, minus...
-<strong>$config->set('HTML', 'TidyRemove', 'br@clear');</strong></pre>
+<pre>$config-&gt;set('HTML.Doctype', 'XHTML 1.0 Transitional');
+$config-&gt;set('HTML.TidyLevel', 'heavy'); // all changes, minus...
+<strong>$config-&gt;set('HTML.TidyRemove', 'br@clear');</strong></pre>

 <p>That third line does the magic, removing the br@clear fix
 from the module, ensuring that <code>&lt;br clear="both" /&gt;</code>
 will pass through unharmed. The reverse is possible too:</p>

-<pre>$config->set('HTML', 'Doctype', 'XHTML 1.0 Transitional');
-$config->set('HTML', 'TidyLevel', 'none'); // no changes, plus...
-<strong>$config->set('HTML', 'TidyAdd', 'p@align');</strong></pre>
+<pre>$config-&gt;set('HTML.Doctype', 'XHTML 1.0 Transitional');
+$config-&gt;set('HTML.TidyLevel', 'none'); // no changes, plus...
+<strong>$config-&gt;set('HTML.TidyAdd', 'p@align');</strong></pre>

 <p>In this case, all transformations are shut off, except for the p@align
 one, which you found handy.</p>
@@ -225,6 +225,7 @@ and if that still doesn't satisfy your appetite, do some fine-tuning.
 Other than that, don't worry about it: this all works silently and
 effectively in the background.</p>

-<div id="version">$Id$</div>
-
 </body></html>
+
+<!-- vim: et sw=4 sts=4
+-->
--- a/docs/enduser-uri-filter.html
+++ b/docs/enduser-uri-filter.html
@@ -160,27 +160,14 @@
 </p>

 <pre>$uri = $config->getDefinition('URI');
-$uri->addFilter(new HTMLPurifier_URIFilter_<strong>NameOfFilter</strong>());</pre>
+$uri->addFilter(new HTMLPurifier_URIFilter_<strong>NameOfFilter</strong>(), $config);</pre>

 <p>
-  If you want to be really fancy, you can define a configuration directive
-  for your filter and have HTML Purifier automatically manage whether or
-  not your filter gets loaded or not (this is how internal filters manage
-  things):
+    After adding a filter, you won't be able to set configuration directives.
+    Structure your code accordingly.
 </p>

-<pre>HTMLPurifier_ConfigSchema::define(
-    'URI', '<strong>NameOfFilter</strong>', false, 'bool',
-    '<strong>What your filter does.</strong>'
-);
-$uri = $config->getDefinition('URI', true);
-$uri->registerFilter(new HTMLPurifier_URIFilter_<strong>NameOfFilter</strong>());
-</pre>
-
-<p>
-  Now, your filter will only be called when %URI.<strong>NameOfFilter</strong>
-  is set to true.
-</p>
+<!-- XXX: link to new documentation system -->

 <h2>Post-filter</h2>

@@ -205,12 +192,13 @@ $uri->registerFilter(new HTMLPurifier_URIFilter_<strong>NameOfFilter</strong>())

 <p>
  Check the
-  <a href="http://htmlpurifier.org/svnroot/htmlpurifier/trunk/library/HTMLPurifier/URIFilter/">URIFilter</a>
-  directory for more implementation examples, and see <a href="http://htmlpurifier.org/svnroot/htmlpurifier/trunk/docs/proposal-new-directives.txt">the
+  <a href="http://repo.or.cz/w/htmlpurifier.git?a=tree;hb=HEAD;f=library/HTMLPurifier/URIFilter">URIFilter</a>
+  directory for more implementation examples, and see <a href="proposal-new-directives.txt">the
  new directives proposal document</a> for ideas on what could be implemented
  as a filter.
 </p>

-<div id="version">$Id$</div>
-
 </body></html>
+
+<!-- vim: et sw=4 sts=4
+-->
--- a/docs/enduser-utf8.html
+++ b/docs/enduser-utf8.html
@@ -481,7 +481,7 @@ if we don't know it's character encoding? And how do we figure out
 the character encoding, if we don't know the contents of the
 <code>META</code> tag?</p>

-<p>Fortunantely for us, the characters we need to write the
+<p>Fortunately for us, the characters we need to write the
 <code>META</code> are in ASCII, which is pretty much universal
 over every character encoding that is in common use today. So,
 all the web-browser has to do is parse all the way down until
@@ -526,7 +526,7 @@ you don't have to use those user-unfriendly entities.</p>

 <h3 id="whyutf8-user">User-friendly</h3>

-<p>Websites encoded in Latin-1 (ISO-8859-1) which ocassionally need
+<p>Websites encoded in Latin-1 (ISO-8859-1) which occasionally need
 a special character outside of their scope often will use a character
 entity reference to achieve the desired effect. For instance, &theta; can be
 written <code>&amp;theta;</code>, regardless of the character encoding's
@@ -584,13 +584,15 @@ disappeared off the web, so I am linking to the Web Archive copy.)</p>
 <h4 id="whyutf8-forms-urlencoded"><code>application/x-www-form-urlencoded</code></h4>

 <p>This is the Content-Type that GET requests must use, and POST requests
-use by default. It involves the ubiquituous percent encoding format that
+use by default. It involves the ubiquitous percent encoding format that
 looks something like: <code>%C3%86</code>. There is no official way of
 determining the character encoding of such a request, since the percent
 encoding operates on a byte level, so it is usually assumed that it
 is the same as the encoding the page containing the form was submitted
-in. You'll run into very few problems if you only use characters in
-the character encoding you chose.</p>
+in. (<a href="http://tools.ietf.org/html/rfc3986#section-2.5">RFC 3986</a>
+recommends that textual identifiers be translated to UTF-8; however, browser
+compliance is spotty.) You'll run into very few problems
+if you only use characters in the character encoding you chose.</p>

 <p>However, once you start adding characters outside of your encoding
 (and this is a lot more common than you may think: take curly
@@ -672,7 +674,7 @@ it up to the module iconv to do the dirty work.</p>
 <p>This approach, however, is not perfect. iconv is blithely unaware
 of HTML character entities. HTML Purifier, in order to
 protect against sophisticated escaping schemes, normalizes all character
-and numeric entitie references before processing the text. This leads to
+and numeric entity references before processing the text. This leads to
 one important ramification:</p>

 <p><strong>Any character that is not supported by the target character
@@ -768,7 +770,7 @@ the text when you try to convert it to UTF-8. You'll have to convert
 it to a binary field, convert it to a Shift-JIS field (the real encoding),
 and then finally to UTF-8. Many a website had pages irreversibly mangled
 because they didn't realize that they'd been deluding themselves about
-the character encoding all along, don't become the next victim.</p>
+the character encoding all along; don't become the next victim.</p>

 <p>For <a href="http://www.postgresql.org/docs/8.2/static/multibyte.html">PostgreSQL</a>, there appears to be no direct way to change the
 encoding of a database (as of 8.2). You will have to dump the data, and then reimport
@@ -788,7 +790,7 @@ usually supported).</p>

 <h4 id="migrate-db-binary">Binary</h4>

-<p>Due to the abovementioned compatibility issues, a more interoperable
+<p>Due to the aforementioned compatibility issues, a more interoperable
 way of storing UTF-8 text is to stuff it in a binary datatype.
 <code>CHAR</code> becomes <code>BINARY</code>, <code>VARCHAR</code> becomes
 <code>VARBINARY</code> and <code>TEXT</code> becomes <code>BLOB</code>.
@@ -915,8 +917,8 @@ anyway. So we'll deal with the other two edge cases.</p>
 would like to read your website but get heaps of question marks or
 other meaningless characters. Fixing this problem requires the
 installation of a font or language pack which is often highly
-dependent on what the language is. <a href="http://bn.wikipedia.org/wiki/%E0%A6%89%E0%A6%87%E0%A6%95%E0%A6%BF%E0%A6%AA%E0%A7%87%E0%A6%A1%E0%A6%BF%E0%A6%AF%E0%A6%BC%E0%A6%BE:Bangla_script_display_help">Here is an example</a>
-of such a help file for the Bengali language, I am sure there are
+dependent on what the language is. <a href="http://bn.wikipedia.org/wiki/%E0%A6%89%E0%A6%87%E0%A6%95%E0%A6%BF%E0%A6%AA%E0%A7%87%E0%A6%A1%E0%A6%BF%E0%A6%AF%E0%A6%BC%E0%A6%BE:Bangla_script_display_and_input_help">Here is an example</a>
+of such a help file for the Bengali language; I am sure there are
 others out there too. You just have to point users to the appropriate
 help file.</p>

@@ -926,7 +928,7 @@ help file.</p>
 characters embedded in what otherwise would be very bland ASCII are
 letters of the
 <a href="http://en.wikipedia.org/wiki/International_Phonetic_Alphabet">International
-Phonetic Alphabet (IPA)</a>, use to designate pronounciations in a very standard
+Phonetic Alphabet (IPA)</a>, use to designate pronunciations in a very standard
 manner (you probably see them all the time in your dictionary). Your
 average font probably won't have support for all of the IPA characters
 like &#664; (bilabial click) or &#658; (voiced postalveolar fricative).
@@ -939,11 +941,11 @@ most widely used browser in the entire world? Microsoft IE 6
 is not smart enough to borrow from other fonts when a character isn't
 present, so more often than not you'll be slapped with a nice big &#65533;.
 To get things to work, MSIE 6 needs a little nudge. You could configure it
-to use a different font to render the text, but you can acheive the same
+to use a different font to render the text, but you can achieve the same
 effect by selectively changing the font for blocks of special characters
 to known good Unicode fonts.</p>

-<p>Fortunantely, the folks over at Wikipedia have already done all the
+<p>Fortunately, the folks over at Wikipedia have already done all the
 heavy lifting for you. Get the CSS from the horses mouth here:
 <a href="http://en.wikipedia.org/wiki/MediaWiki:Common.css">Common.css</a>,
 and search for &quot;.IPA&quot; There are also a smattering of
@@ -970,7 +972,7 @@ users.</p>
 <h3 id="migrate-variablewidth">Dealing with variable width in functions</h3>

 <p>When people claim that PHP6 will solve all our Unicode problems, they're
-misinformed. It will not fix any of the abovementioned troubles. It will,
+misinformed. It will not fix any of the aforementioned troubles. It will,
 however, fix the problem we are about to discuss: processing UTF-8 text
 in PHP.</p>

@@ -1033,7 +1035,7 @@ directory.</p>
 <p>Well, that's it. Hopefully this document has served as a very
 practical springboard into knowledge of how UTF-8 works.  You may have
 decided that you don't want to migrate yet: that's fine, just know
-what will happen to your output and what bug reports you may recieve.</p>
+what will happen to your output and what bug reports you may receive.</p>

 <p>Many other developers have already discussed the subject of Unicode,
 UTF-8 and internationalization, and I would like to defer to them for
@@ -1053,3 +1055,6 @@ a more in-depth look into character sets and encodings.</p>

 </body>
 </html>
+
+<!-- vim: et sw=4 sts=4
+-->
--- a/docs/enduser-youtube.html
+++ b/docs/enduser-youtube.html
@@ -67,15 +67,15 @@ into your documents. YouTube's code goes like this:</p>
 </ol>

 <p>What point 2 means is that if we have code like <code>&lt;span
-class=&quot;embed-youtube&quot;&gt;AyPzM5WK8ys&lt;/span&gt;</code> your
+class=&quot;youtube-embed&quot;&gt;AyPzM5WK8ys&lt;/span&gt;</code> your
 application can reconstruct the full object from this small snippet that
 passes through HTML Purifier <em>unharmed</em>.
-<a href="http://htmlpurifier.org/svnroot/htmlpurifier/trunk/library/HTMLPurifier/Filter/YouTube.php">Show me the code!</a></p>
+<a href="http://repo.or.cz/w/htmlpurifier.git?a=blob;hb=HEAD;f=library/HTMLPurifier/Filter/YouTube.php">Show me the code!</a></p>

 <p>And the corresponding usage:</p>

 <pre>&lt;?php
-    $config->set('Filter', 'YouTube', true);
+    $config-&gt;set('Filter.YouTube', true);
 ?&gt;</pre>

 <p>There is a bit going in the two code snippets, so let's explain.</p>
@@ -148,3 +148,6 @@ with the core!</p>

 </body>
 </html>
+
+<!-- vim: et sw=4 sts=4
+-->
--- a/docs/examples/basic.php
+++ b/docs/examples/basic.php
@@ -8,8 +8,8 @@ require_once '../../library/HTMLPurifier.auto.php';
 $config = HTMLPurifier_Config::createDefault();

 // configuration goes here:
-$config->set('Core', 'Encoding', 'UTF-8'); // replace with your encoding
-$config->set('HTML', 'Doctype', 'XHTML 1.0 Transitional'); // replace with your doctype
+$config->set('Core.Encoding', 'UTF-8'); // replace with your encoding
+$config->set('HTML.Doctype', 'XHTML 1.0 Transitional'); // replace with your doctype

 $purifier = new HTMLPurifier($config);

@@ -20,3 +20,4 @@ $pure_html = $purifier->purify($html);

 echo '<pre>' . htmlspecialchars($pure_html) . '</pre>';

+// vim: et sw=4 sts=4
--- a/docs/fixquotes.htc
+++ b/docs/fixquotes.htc
@@ -4,3 +4,6 @@ function init() {
  element.innerHTML = '&#8220;'+element.innerHTML+'&#8221;';
 }
 </script>
+
+<!-- vim: et sw=4 sts=4
+-->
--- a/docs/index.html
+++ b/docs/index.html
@@ -98,8 +98,8 @@ the code. They may be upgraded to HTML files or stay as TXT scratchpads.</p>
 <table class="table">

 <thead><tr>
-    <th width="10%">Type</th>
-    <th width="20%">Name</th>
+    <th style="width:10%">Type</th>
+    <th style="width:20%">Name</th>
    <th>Description</th>
 </tr></thead>

@@ -117,6 +117,12 @@ the code. They may be upgraded to HTML files or stay as TXT scratchpads.</p>
    <td>Common security issues that may still arise (half-baked).</td>
 </tr>

+<tr>
+    <td>Development</td>
+    <td><a href="dev-config-bcbreaks.txt">Config BC Breaks</a></td>
+    <td>Backwards-incompatible changes in HTML Purifier 4.0.0</td>
+</tr>
+
 <tr>
    <td>Development</td>
    <td><a href="dev-code-quality.txt">Code Quality Issues</a></td>
@@ -175,6 +181,8 @@ the code. They may be upgraded to HTML files or stay as TXT scratchpads.</p>

 </table>

-<div id="version">$Id$</div>
 </body>
 </html>
+
+<!-- vim: et sw=4 sts=4
+-->
--- a/docs/proposal-colors.html
+++ b/docs/proposal-colors.html
@@ -42,7 +42,8 @@ into the mix.</li>
 something like that?</li>
 </ol>

-<div id="version">$Id$</div>
-
 </body>
 </html>
+
+<!-- vim: et sw=4 sts=4
+-->
--- a/docs/proposal-config.txt
+++ b/docs/proposal-config.txt
@@ -19,3 +19,5 @@ Definition objects are complex datatypes influenced by their respective
 directive namespaces (HTMLDefinition with HTML and CSSDefinition with CSS).
 If any of these directives is updated, HTML Purifier forces the definition
 to be regenerated.
+
+    vim: et sw=4 sts=4
--- a/docs/proposal-css-extraction.txt
+++ b/docs/proposal-css-extraction.txt
@@ -30,3 +30,5 @@ Beyond that, HTML Purifier can magically merge common CSS values together,
 and a whole manner of other heuristic things.  HTML Purifier should also
 make it easy for an admin to re-style the HTML semantically. Speed is not
 an issue. Also, better WYSIWYG editors are needed.
+
+    vim: et sw=4 sts=4
--- a/docs/proposal-errors.txt
+++ b/docs/proposal-errors.txt
@@ -0,0 +1,211 @@
+Considerations for ErrorCollection
+
+Presently, HTML Purifier takes a code-execution centric approach to handling
+errors. Errors are organized and grouped according to which segment of the
+code triggers them, not necessarily the portion of the input document that
+triggered the error. This means that errors are pseudo-sorted by category,
+rather than location in the document.
+
+One easy way to "fix" this problem would be to re-sort according to line number.
+However, the "category" style information we derive from naively following
+program execution is still useful. After all, each of the strategies which
+can report errors still process the document mostly linearly. Furthermore,
+not only do they process linearly, but the way they pass off operations to
+sub-systems mirrors that of the document. For example, AttrValidator will
+linearly proceed through elements, and on each element will use AttrDef to
+validate those contents. From there, the attribute might have more
+sub-components, which have execution passed off accordingly.
+
+In fact, each strategy handles a very specific class of "error."
+
+RemoveForeignElements   - element tokens
+MakeWellFormed          - element token ordering
+FixNesting              - element token ordering
+ValidateAttributes      - attributes of elements
+
+The crucial point is that while we care about the hierarchy governing these
+different errors, we *don't* care about any other information about what actually
+happens to the elements. This brings up another point: if HTML Purifier fixes
+something, this is not really a notice/warning/error; it's really a suggestion
+of a way to fix the aforementioned defects.
+
+In short, the refactoring to take this into account kinda sucks.
+
+Errors should not be recorded in order that they are reported. Instead, they
+should be bound to the line (and preferably element) in which they were found.
+This means we need some way to uniquely identify every element in the document,
+which doesn't presently exist. An easy way of adding this would be to track
+line columns. An important ramification of this is that we *must* use the
+DirectLex implementation.
+
+    1. Implement column numbers for DirectLex [DONE!]
+    2. Disable error collection when not using DirectLex [DONE!]
+
+Next, we need to re-orient all of the error declarations to place CurrentToken
+at utmost important. Since this is passed via Context, it's not always clear
+if that's available. ErrorCollector should complain HARD if it isn't available.
+There are some locations when we don't have a token available. These include:
+
+    * Lexing - this can actually have a row and column, but NOT correspond to
+      a token
+    * End of document errors - bump this to the end
+
+Actually, we *don't* have to complain if CurrentToken isn't available; we just
+set it as a document-wide error. And actually, nothing needs to be done here.
+
+Something interesting to consider is whether or not we care about the locations
+of attributes and CSS properties, i.e. the sub-objects that compose these things.
+In terms of consistency, at the very least attributes should have column/line
+numbers attached to them. However, this may be overkill, as attributes are
+uniquely identifiable. You could go even further, with CSS, but they are also
+uniquely identifiable.
+
+Bottom-line is, however, this information must be available, in form of the
+CurrentAttribute and CurrentCssProperty (theoretical) context variables, and
+it must be used to organize the errors that the sub-processes may throw.
+There is also a hierarchy of sorts that may make merging this into one context
+variable more sense, if it hadn't been for HTML's reasonably rigid structure.
+A CSS property will never contain an HTML attribute. So we won't ever get
+recursive relations, and having multiple depths won't ever make sense. Leave
+this be.
+
+We already have this information, and consequently, using start and end is
+*unnecessary*, so long as the context variables are set appropriately. We don't
+care if an error was thrown by an attribute transform or an attribute definition;
+to the end user these are the same (for a developer, they are different, but
+they're better off with a stack trace (which we should add support for) in such
+cases).
+
+    3. Remove start()/end() code. Don't get rid of recursion, though [DONE]
+    4. Setup ErrorCollector to use context information to setup hierarchies.
+       This may require a different internal format. Use objects if it gets
+       complex. [DONE]
+
+       ASIDE
+            More on this topic: since we are now binding errors to lines
+            and columns, a particular error can have three relationships to that
+            specific location:
+
+            1. The token at that location directly
+                RemoveForeignElements
+                AttrValidator (transforms)
+                MakeWellFormed
+            2. A "component" of that token (i.e. attribute)
+                AttrValidator (removals)
+            3. A modification to that node (i.e. contents from start to end
+               token) as a whole
+                FixNesting
+
+            This needs to be marked accordingly. In the presentation, it might
+            make sense keep (3) separate, have (2) a sublist of (1). (1) can
+            be a closing tag, in which case (3) makes no sense at all, OR it
+            should be related with its opening tag (this may not necessarily
+            be possible before MakeWellFormed is run).
+
+            So, the line and column counts as our identifier, so:
+
+            $errors[$line][$col] = ...
+
+            Then, we need to identify case 1, 2 or 3. They are identified as
+            such:
+
+            1. Need some sort of semaphore in RemoveForeignElements, etc.
+            2. If CurrentAttr/CurrentCssProperty is non-null
+            3. Default (FixNesting, MakeWellFormed)
+
+            One consideration about (1) is that it usually is actually a
+            (3) modification, but we have no way of knowing about that because
+            of various optimizations. However, they can probably be treated
+            the same. The other difficulty is that (3) is never a line and
+            column; rather, it is a range (i.e. a duple) and telling the user
+            the very start of the range may confuse them. For example,
+
+            <b>Foo<div>bar</div></b>
+            ^     ^
+
+            The node being operated on is <b>, so the error would be assigned
+            to the first caret, with a "node reorganized" error. Then, the
+            ChildDef would have submitted its own suggestions and errors with
+            regard to what's going in the internals.  So I suppose this is
+            ok. :-)
+
+            Now, the structure of the earlier mentioned ... would be something
+            like this:
+
+            object {
+                type = (token|attr|property),
+                value, // appropriate for type
+                errors => array(),
+                sub-errors = [recursive],
+            }
+
+            This helps us keep things agnostic. It is also sufficiently complex
+            enough to warrant an object.
+
+So, more wanking about the object format is in order. The way HTML Purifier is
+currently setup, the only possible hierarchy is:
+
+    token -> attr -> css property
+
+These relations do not exist all of the time; a comment or end token would not
+ever have any attributes, and non-style attributes would never have CSS properties
+associated with them.
+
+I believe that it is worth supporting multiple paths. At some point, we might
+have a hierarchy like:
+
+    * -> syntax
+      -> token -> attr -> css property
+                       -> url
+               -> css stylesheet <style>
+
+et cetera. Now, one of the practical implications of this is that every "node"
+on our tree is well-defined, so in theory it should be possible to either 1.
+create a separate class for each error struct, or 2. embed this information
+directly into HTML Purifier's token stream.  Embedding the information in the
+token stream is not a terribly good idea, since tokens can be removed, etc.
+So that leaves us with 1... and if we use a generic interface we can cut down
+on a lot of code we might need. So let's leave it like this.
+
+~~~~
+
+Then we setup suggestions.
+
+    5. Setup a separate error class which tells the user any modifications
+       HTML Purifier made.
+
+Some information about this:
+
+Our current paradigm is to tell the user what HTML Purifier did to the HTML.
+This is the most natural mode of operation, since that's what HTML Purifier
+is all about; it was not meant to be a validator.
+
+However, most other people have experience dealing with a validator. In cases
+where HTML Purifier unambiguously does the right thing, simply giving the user
+the correct version isn't a bad idea, but problems arise when:
+
+- The user has such bad HTML we do something odd, when we should have just
+  flagged the HTML as an error. Such examples are when we do things like
+  remove text from directly inside a <table> tag. It was probably meant to
+  be in a <td> tag or be outside the table, but we're not smart enough to
+  realize this so we just remove it. In such a case, we should tell the user
+  that there was foreign data in the table, but then we shouldn't "demand"
+  the user remove the data; it's more of a "here's a possible way of
+  rectifying the problem"
+
+- Giving line context for input is hard enough, but feasible; giving output
+  line context will be extremely difficult due to shifting lines; we'd probably
+  have to track what the tokens are and then find the appropriate out context
+  and it's not guaranteed to work etc etc etc.
+
+````````````
+
+Don't forget to spruce up output.
+
+    6. Output needs to automatically give line and column numbers, basically
+       "at line" on steroids. Look at W3C's output; it's ok. [PARTIALLY DONE]
+
+       - We need a standard CSS to apply (check demo.css for some starting
+         styling; some buttons would also be hip)
+
+    vim: et sw=4 sts=4
--- a/docs/proposal-filter-levels.txt
+++ b/docs/proposal-filter-levels.txt
@@ -133,3 +133,5 @@ Dramatic - border, list-style-position (list-style), margin, padding,

 Dramatic elements substantially change the look of text in ways that should
 probably have been reserved to other areas.
+
+    vim: et sw=4 sts=4
--- a/docs/proposal-language.txt
+++ b/docs/proposal-language.txt
@@ -60,3 +60,5 @@ Neat functionality:
 -  Roman numeral formatting

 Items marked with a + likely need to be addressed by HTML Purifier
+
+    vim: et sw=4 sts=4
--- a/docs/proposal-new-directives.txt
+++ b/docs/proposal-new-directives.txt
@@ -41,3 +41,4 @@ implemented, give us a ring, and we'll move it up the priority chain.
    absolute DNS.  While this is actually the preferred method according to
    the RFC, most people opt to use a relative domain name relative to . (root).

+    vim: et sw=4 sts=4
--- a/docs/proposal-plists.txt
+++ b/docs/proposal-plists.txt
@@ -0,0 +1,218 @@
+THE UNIVERSAL DESIGN PATTERN: PROPERTIES
+Steve Yegge
+
+Implementation:
+    get(name)
+    put(name, value)
+    has(name)
+    remove(name)
+    iteration, with filtering [this will be our namespaces]
+    parent
+
+Representations:
+    - Keys are strings
+    - It's nice to not need to quote keys (if we formulate our own language,
+      consider this)
+    - Property not present representation (key missing)
+    - Frequent removal/re-add may have null help. If null is valid, use
+      another value. (PHP semantics are weird here)
+
+Data structures:
+    - LinkedHashMap is wonderful (O(1) access and maintains order)
+    - Using a special property that points to the parent is usual
+    - Multiple inheritance possible, need rules for which to lookup first
+    - Iterative inheritance is best
+    - Consider performance!
+
+Deletion
+    - Tricky problem with inheritance
+    - Distinguish between "not found" and "look in my parent for the property"
+    [Maybe HTML Purifier won't allow deletion]
+
+Read/write asymmetry (it's correct!)
+
+Read-only plists
+    - Allow ability to freeze [this is what we have already]
+    - Don't overuse it
+
+Performance:
+    - Intern strings (PHP does this already)
+    - Don't be case-insensitive
+    - If all properties in a plist are known a-priori, you can use a "perfect"
+      hash function. Often overkill.
+    - Copy-on-read caching "plundering" reduces lookup, but uses memory and can
+      grow stale. Use as last resort.
+    - Refactoring to fields. Watch for API compatibility, system complexity,
+      and lack of flexibility.
+    - Refrigerator: external data-structure to hold plists
+
+Transient properties:
+    [Don't need to worry about this]
+    - Use a separate plist for transient properties
+    - Non-numeric override; numeric should ADD
+    - Deletion: removeTransientProperty() and transientlyRemoveProperty()
+
+Persistence:
+    - XML/JSON are good
+    - Text-based is good for readability, maintainability and bootstrapping
+    - Compressed binary format for network transport [not necessary]
+    - RDBMS or XML database
+
+Querying: [not relevant]
+    - XML database is nice for XPath/XQuery
+    - jQuery for JSON
+    - Just load it all into a program
+
+Backfills/Data integrity:
+    - Use usual methods
+    - Lazy backfill is a nice hack
+
+Type systems:
+    - Flags: ReadOnly, Permanent, DontEnum
+    - Typed properties isn't that useful [It's also Not-PHP]
+    - Seperate meta-list of directive properties IS useful
+    - Duck typing is useful for systems designed fully around properties pattern
+
+Trade-off:
+    + Flexibility
+    + Extensibility
+    + Unit-testing/prototype-speed
+    - Performance
+    - Data integrity
+    - Navagability/Query-ability
+    - Reversability (hard to go back)
+
+HTML Purifier
+
+We are not happy with our current system of defining configuration directives,
+because it has become clear that things will get a lot nicer if we allow
+multiple namespaces, and there are some features that naturally lend themselves
+to inheritance, which we do not really support well.
+
+One of the considered implementation changes would be to go from a structure
+like:
+
+array(
+    'Namespace' => array(
+        'Directive' => 'val1',
+        'Directive2' => 'val2',
+    )
+)
+
+to:
+
+array(
+    'Namespace.Directive' => 'val1',
+    'Namespace.Directive2' => 'val2',
+)
+
+The below implementation takes more memory, however, and it makes it a bit
+complicated to grab all values from a namespace.
+
+The alternate implementation choice is to allow nested plists. This keeps
+iteration easy, but is problematic for inheritance (it would be difficult
+to distinguish a plist from an array) and retrieval (when specifying multiple
+namespaces we would need some multiple de-referencing).
+
+----
+
+We can bite the performance hit, and just do iteration with filter
+(the strncmp call should be relatively cheap). Then, users should be able
+to optimize doing something like:
+
+$config = HTMLPurifier_Config::createDefault();
+if (!file_exists('config.php')) {
+    // set up $config
+    $config->save('config.php');
+} else {
+    $config->load('config.php');
+}
+
+Or maybe memcache, or something. This means that "// set up $config" must
+not have any dynamic parts, or the user has to invalidate the cache when
+they do update it. We have to think about this a little more carefully; the
+file call might be more expensive.
+
+----
+
+This might get expensive, however, when we actually care about iterating
+over the configuration and want the actual values. So what about nesting the
+lists?
+
+"ns.sub.directive" => values['ns']['sub']['directive']
+
+We can distinguish between plists and arrays by using ArrayObjects for the
+plists, and regular arrays for the arrays? Alternatively, use ArrayObjects
+for the arrays, and regular arrays for the plists.
+
+----
+
+Implementation demands, and what has caused them:
+
+1. DefinitionCache, the HTML, CSS and URI namespaces have caches attached to them
+   Results:
+    - getBatchSerial()
+        - getBatch() : in general, the ability to traverse just a namespace
+
+2. AutoFormat/Filter, this is a plugin architecture, directives not hard-coded
+    - getBatch()
+
+3. Configuration form
+    - Namespaces used to organize directives
+
+Other than that, we have a pure plist. PERHAPS we should maintain separate things
+for these different demands.
+
+Issue 2: Directives for configuring the plugins are regular plists, but
+when enabling them, while it's "plist-ish", what you're really doing is adding
+them to an array of "autoformatters"/"filters" to enable. We can setup
+magic BC as well as in the new interface, but there should also be an
+add('AutoFormat', 'AutoParagraph'); which does the right thing.
+
+One thing to consider is whether or not inheritance rules will apply to these.
+I'd say yes. That means that they're still plisty, in fact, the underlying
+implementation will probably be a plist. However, they will get their OWN
+plists, and will NOT support nesting.
+
+Issue 1: Our current implementation is generally not efficient; md5(serialize($foo))
+is pretty expensive. So, I don't think there will be any problems if it
+gets "less" efficient, as long as we give users a properly fast alternative;
+DefinitionRev gives us a way to do this, by simply telling the user they must
+update it whenever they update Configuration directives as well. (There are
+obvious BC concerns here).
+
+In such a case, we simply iterate over our plist (performing full retrievals
+for each value), grab the entries we care about, and then serialize and hash.
+It's going to be slow either way, due to the ability of plists to inherit.
+If we ksort(), we don't have to traverse the entire array, however, the
+cost of a ksort() call may not be worth it.
+
+At this point, last time, I started worrying about the performance implications
+of allowing inheritance, and wondering whether or not I wanted to squash
+the plist. At first blush, our code might be under the assumption that
+accessing properties is cheap; but actually we prefer to copy out the value
+into a member variable if it's going to be used many times. With this is mind
+I don't think CPU consumption from a few nested function calls is going to
+be a problem. We *are* going to enforce a function only interface.
+
+The next issue at hand is how we're going to manage the "special" plists,
+which should still be able to be inherited. Basically, it means that multiple
+plists would be attached to the configuration object, which is not the
+best for memory performance. The alternative is to keep them all in one
+big plist, and then eat the one-time cost of traversing the entire plist
+to grab the appropriate values.
+
+I think at this point we can write the generic interface, and then set up separate
+plists if that ends up being necessary for performance (it probably won't.) Now
+lets code our generic plist implementation.
+
+----
+
+Iterating over the plist presents some problems. The way we've chosen to solve
+this is to squash all of the parents.
+
+----
+
+But I don't need iteration.
+
+    vim: et sw=4 sts=4
--- a/docs/ref-content-models.txt
+++ b/docs/ref-content-models.txt
@@ -46,3 +46,5 @@ is eliminated completely, in the latter case, the text of the node
 would is preserved (as the parent node does allow PCDATA).  Custom
 content model implementations probably are not the best way of handling
 these cases, instead, node bubbling should be implemented instead.
+
+    vim: et sw=4 sts=4
--- a/docs/ref-css-length.txt
+++ b/docs/ref-css-length.txt
@@ -26,3 +26,5 @@ Watch out: font-sizes can also be nested to get successively larger
 (although I do not relish having to keep track of context font-sizes,
 this may be necessary, especially for some of the more advanced features
 for preventing things like white on white).
+
+    vim: et sw=4 sts=4
--- a/docs/ref-devnetwork.html
+++ b/docs/ref-devnetwork.html
@@ -40,6 +40,8 @@ the development of this library in these forum threads:</p>

 <p>...as well as any I may have forgotten.</p>

-<div id="version">$Id$</div>
 </body>
 </html>
+
+<!-- vim: et sw=4 sts=4
+-->
--- a/docs/ref-html-modularization.txt
+++ b/docs/ref-html-modularization.txt
@@ -162,3 +162,5 @@ array of content set names to content set contents. If the content set
 already exists, your values are appended on to it (great for, say,
 registering the font tag as an inline element), otherwise it is
 created. They are substituted into content_model.
+
+    vim: et sw=4 sts=4
--- a/docs/ref-proprietary-tags.txt
+++ b/docs/ref-proprietary-tags.txt
@@ -22,3 +22,5 @@ HTML Purifier context.

 These should be put into their own Tidy module, not loaded by default(?). These
 all qualify as "lenient" transforms.
+
+    vim: et sw=4 sts=4
--- a/docs/ref-whatwg.txt
+++ b/docs/ref-whatwg.txt
@@ -22,3 +22,5 @@ another DirectLex parser, other parsers like ph5p
 <http://jero.net/lab/ph5p/> can be adapted to DOMLex to support much more
 flexible HTML parsing (a cool feature I've seen is how they resolve
 <b>bold<i>both</b>italic</i>).
+
+    vim: et sw=4 sts=4
--- a/docs/specimens/LICENSE
+++ b/docs/specimens/LICENSE
@@ -6,3 +6,5 @@ windows-live-mail-desktop-beta.html - donated by laacz, public domain
 img.png - LGPL, from <http://commons.wikimedia.org/wiki/Image:Pastille_chrome.png>

 All other files are by me, and are licensed under LGPL.
+
+    vim: et sw=4 sts=4
--- a/docs/style.css
+++ b/docs/style.css
@@ -72,3 +72,5 @@ q:after {
 .fixme:before {content:"Fix me: "; font-weight:bold; color:#C00; }

 #applicability {margin: 1em 5%; font-style:italic;}
+
+/* vim: et sw=4 sts=4 */
--- a/extras/ConfigDoc/HTMLXSLTProcessor.php
+++ b/extras/ConfigDoc/HTMLXSLTProcessor.php
@@ -83,3 +83,4 @@ class ConfigDoc_HTMLXSLTProcessor

 }

+// vim: et sw=4 sts=4
--- a/extras/FSTools.php
+++ b/extras/FSTools.php
@@ -153,3 +153,5 @@ class FSTools
    }

 }
+
+// vim: et sw=4 sts=4
--- a/extras/FSTools/File.php
+++ b/extras/FSTools/File.php
@@ -122,3 +122,5 @@ class FSTools_File
    }

 }
+
+// vim: et sw=4 sts=4
--- a/extras/HTMLPurifierExtras.auto.php
+++ b/extras/HTMLPurifierExtras.auto.php
@@ -7,3 +7,5 @@
 set_include_path(dirname(__FILE__) . PATH_SEPARATOR . get_include_path() );
 require_once 'HTMLPurifierExtras.php';
 require_once 'HTMLPurifierExtras.autoload.php';
+
+// vim: et sw=4 sts=4
--- a/extras/HTMLPurifierExtras.autoload.php
+++ b/extras/HTMLPurifierExtras.autoload.php
@@ -21,3 +21,5 @@ if (function_exists('spl_autoload_register')) {
        return HTMLPurifierExtras::autoload($class);
    }
 }
+
+// vim: et sw=4 sts=4
--- a/extras/HTMLPurifierExtras.php
+++ b/extras/HTMLPurifierExtras.php
@@ -25,3 +25,5 @@ class HTMLPurifierExtras
    }

 }
+
+// vim: et sw=4 sts=4
--- a/extras/README
+++ b/extras/README
@@ -28,3 +28,5 @@ the filesystem. It currently consists of two classes:
  method imaginable one would need.

 Check the files themselves for more information.
+
+    vim: et sw=4 sts=4
--- a/library/HTMLPurifier.auto.php
+++ b/library/HTMLPurifier.auto.php
@@ -7,3 +7,5 @@
 set_include_path(dirname(__FILE__) . PATH_SEPARATOR . get_include_path() );
 require_once 'HTMLPurifier/Bootstrap.php';
 require_once 'HTMLPurifier.autoload.php';
+
+// vim: et sw=4 sts=4
--- a/library/HTMLPurifier.autoload.php
+++ b/library/HTMLPurifier.autoload.php
@@ -17,3 +17,5 @@ if (function_exists('spl_autoload_register') && function_exists('spl_autoload_un
        return HTMLPurifier_Bootstrap::autoload($class);
    }
 }
+
+// vim: et sw=4 sts=4
--- a/library/HTMLPurifier.func.php
+++ b/library/HTMLPurifier.func.php
@@ -20,3 +20,4 @@ function HTMLPurifier($html, $config = null) {
    return $purifier->purify($html, $config);
 }

+// vim: et sw=4 sts=4
--- a/library/HTMLPurifier.includes.php
+++ b/library/HTMLPurifier.includes.php
@@ -7,7 +7,7 @@
 * primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS
 * FILE, changes will be overwritten the next time the script is run.
 *
- * @version 3.1.1
+ * @version 4.1.1
 *
 * @warning
 *      You must *not* include any other HTML Purifier files before this file,
@@ -41,6 +41,7 @@ require 'HTMLPurifier/Encoder.php';
 require 'HTMLPurifier/EntityLookup.php';
 require 'HTMLPurifier/EntityParser.php';
 require 'HTMLPurifier/ErrorCollector.php';
+require 'HTMLPurifier/ErrorStruct.php';
 require 'HTMLPurifier/Exception.php';
 require 'HTMLPurifier/Filter.php';
 require 'HTMLPurifier/Generator.php';
@@ -54,6 +55,8 @@ require 'HTMLPurifier/LanguageFactory.php';
 require 'HTMLPurifier/Length.php';
 require 'HTMLPurifier/Lexer.php';
 require 'HTMLPurifier/PercentEncoder.php';
+require 'HTMLPurifier/PropertyList.php';
+require 'HTMLPurifier/PropertyListIterator.php';
 require 'HTMLPurifier/Strategy.php';
 require 'HTMLPurifier/StringHash.php';
 require 'HTMLPurifier/StringHashParser.php';
@@ -95,6 +98,8 @@ require 'HTMLPurifier/AttrDef/CSS/Percentage.php';
 require 'HTMLPurifier/AttrDef/CSS/TextDecoration.php';
 require 'HTMLPurifier/AttrDef/CSS/URI.php';
 require 'HTMLPurifier/AttrDef/HTML/Bool.php';
+require 'HTMLPurifier/AttrDef/HTML/Nmtokens.php';
+require 'HTMLPurifier/AttrDef/HTML/Class.php';
 require 'HTMLPurifier/AttrDef/HTML/Color.php';
 require 'HTMLPurifier/AttrDef/HTML/FrameTarget.php';
 require 'HTMLPurifier/AttrDef/HTML/ID.php';
@@ -102,12 +107,12 @@ require 'HTMLPurifier/AttrDef/HTML/Pixels.php';
 require 'HTMLPurifier/AttrDef/HTML/Length.php';
 require 'HTMLPurifier/AttrDef/HTML/LinkTypes.php';
 require 'HTMLPurifier/AttrDef/HTML/MultiLength.php';
-require 'HTMLPurifier/AttrDef/HTML/Nmtokens.php';
 require 'HTMLPurifier/AttrDef/URI/Email.php';
 require 'HTMLPurifier/AttrDef/URI/Host.php';
 require 'HTMLPurifier/AttrDef/URI/IPv4.php';
 require 'HTMLPurifier/AttrDef/URI/IPv6.php';
 require 'HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php';
+require 'HTMLPurifier/AttrTransform/Background.php';
 require 'HTMLPurifier/AttrTransform/BdoDir.php';
 require 'HTMLPurifier/AttrTransform/BgColor.php';
 require 'HTMLPurifier/AttrTransform/BoolToCSS.php';
@@ -115,13 +120,16 @@ require 'HTMLPurifier/AttrTransform/Border.php';
 require 'HTMLPurifier/AttrTransform/EnumToCSS.php';
 require 'HTMLPurifier/AttrTransform/ImgRequired.php';
 require 'HTMLPurifier/AttrTransform/ImgSpace.php';
+require 'HTMLPurifier/AttrTransform/Input.php';
 require 'HTMLPurifier/AttrTransform/Lang.php';
 require 'HTMLPurifier/AttrTransform/Length.php';
 require 'HTMLPurifier/AttrTransform/Name.php';
+require 'HTMLPurifier/AttrTransform/NameSync.php';
 require 'HTMLPurifier/AttrTransform/SafeEmbed.php';
 require 'HTMLPurifier/AttrTransform/SafeObject.php';
 require 'HTMLPurifier/AttrTransform/SafeParam.php';
 require 'HTMLPurifier/AttrTransform/ScriptRequired.php';
+require 'HTMLPurifier/AttrTransform/Textarea.php';
 require 'HTMLPurifier/ChildDef/Chameleon.php';
 require 'HTMLPurifier/ChildDef/Custom.php';
 require 'HTMLPurifier/ChildDef/Empty.php';
@@ -137,10 +145,12 @@ require 'HTMLPurifier/DefinitionCache/Decorator/Memory.php';
 require 'HTMLPurifier/HTMLModule/Bdo.php';
 require 'HTMLPurifier/HTMLModule/CommonAttributes.php';
 require 'HTMLPurifier/HTMLModule/Edit.php';
+require 'HTMLPurifier/HTMLModule/Forms.php';
 require 'HTMLPurifier/HTMLModule/Hypertext.php';
 require 'HTMLPurifier/HTMLModule/Image.php';
 require 'HTMLPurifier/HTMLModule/Legacy.php';
 require 'HTMLPurifier/HTMLModule/List.php';
+require 'HTMLPurifier/HTMLModule/Name.php';
 require 'HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php';
 require 'HTMLPurifier/HTMLModule/Object.php';
 require 'HTMLPurifier/HTMLModule/Presentation.php';
@@ -155,14 +165,18 @@ require 'HTMLPurifier/HTMLModule/Target.php';
 require 'HTMLPurifier/HTMLModule/Text.php';
 require 'HTMLPurifier/HTMLModule/Tidy.php';
 require 'HTMLPurifier/HTMLModule/XMLCommonAttributes.php';
+require 'HTMLPurifier/HTMLModule/Tidy/Name.php';
 require 'HTMLPurifier/HTMLModule/Tidy/Proprietary.php';
 require 'HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php';
 require 'HTMLPurifier/HTMLModule/Tidy/Strict.php';
 require 'HTMLPurifier/HTMLModule/Tidy/Transitional.php';
 require 'HTMLPurifier/HTMLModule/Tidy/XHTML.php';
 require 'HTMLPurifier/Injector/AutoParagraph.php';
+require 'HTMLPurifier/Injector/DisplayLinkURI.php';
 require 'HTMLPurifier/Injector/Linkify.php';
 require 'HTMLPurifier/Injector/PurifierLinkify.php';
+require 'HTMLPurifier/Injector/RemoveEmpty.php';
+require 'HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php';
 require 'HTMLPurifier/Injector/SafeObject.php';
 require 'HTMLPurifier/Lexer/DOMLex.php';
 require 'HTMLPurifier/Lexer/DirectLex.php';
@@ -185,6 +199,7 @@ require 'HTMLPurifier/URIFilter/DisableExternalResources.php';
 require 'HTMLPurifier/URIFilter/HostBlacklist.php';
 require 'HTMLPurifier/URIFilter/MakeAbsolute.php';
 require 'HTMLPurifier/URIFilter/Munge.php';
+require 'HTMLPurifier/URIScheme/data.php';
 require 'HTMLPurifier/URIScheme/ftp.php';
 require 'HTMLPurifier/URIScheme/http.php';
 require 'HTMLPurifier/URIScheme/https.php';
--- a/library/HTMLPurifier.kses.php
+++ b/library/HTMLPurifier.kses.php
@@ -17,12 +17,14 @@ function kses($string, $allowed_html, $allowed_protocols = null) {
            $allowed_attributes["$element.$attribute"] = true;
        }
    }
-    $config->set('HTML', 'AllowedElements', $allowed_elements);
-    $config->set('HTML', 'AllowedAttributes', $allowed_attributes);
+    $config->set('HTML.AllowedElements', $allowed_elements);
+    $config->set('HTML.AllowedAttributes', $allowed_attributes);
    $allowed_schemes = array();
    if ($allowed_protocols !== null) {
-        $config->set('URI', 'AllowedSchemes', $allowed_protocols);
+        $config->set('URI.AllowedSchemes', $allowed_protocols);
    }
    $purifier = new HTMLPurifier($config);
    return $purifier->purify($string);
 }
+
+// vim: et sw=4 sts=4
--- a/library/HTMLPurifier.path.php
+++ b/library/HTMLPurifier.path.php
@@ -7,3 +7,5 @@
 */

 set_include_path(dirname(__FILE__) . PATH_SEPARATOR . get_include_path() );
+
+// vim: et sw=4 sts=4
--- a/library/HTMLPurifier.php
+++ b/library/HTMLPurifier.php
@@ -19,7 +19,7 @@
 */

 /*
-    HTML Purifier 3.1.1 - Standards Compliant HTML Filtering
+    HTML Purifier 4.1.1 - Standards Compliant HTML Filtering
    Copyright (C) 2006-2008 Edward Z. Yang

    This library is free software; you can redistribute it and/or
@@ -55,10 +55,10 @@ class HTMLPurifier
 {

    /** Version of HTML Purifier */
-    public $version = '3.1.1';
+    public $version = '4.1.1';

    /** Constant with version of HTML Purifier */
-    const VERSION = '3.1.1';
+    const VERSION = '4.1.1';

    /** Global configuration object */
    public $config;
@@ -128,7 +128,7 @@ class HTMLPurifier
        $context->register('Generator', $this->generator);

        // set up global context variables
-        if ($config->get('Core', 'CollectErrors')) {
+        if ($config->get('Core.CollectErrors')) {
            // may get moved out if other facilities use it
            $language_factory = HTMLPurifier_LanguageFactory::instance();
            $language = $language_factory->create($config, $context);
@@ -152,6 +152,7 @@ class HTMLPurifier
        $filters = array();
        foreach ($filter_flags as $filter => $flag) {
            if (!$flag) continue;
+            if (strpos($filter, '.') !== false) continue;
            $class = "HTMLPurifier_Filter_$filter";
            $filters[] = new $class;
        }
@@ -232,3 +233,5 @@ class HTMLPurifier
    }

 }
+
+// vim: et sw=4 sts=4
--- a/library/HTMLPurifier.safe-includes.php
+++ b/library/HTMLPurifier.safe-includes.php
@@ -35,6 +35,7 @@ require_once $__dir . '/HTMLPurifier/Encoder.php';
 require_once $__dir . '/HTMLPurifier/EntityLookup.php';
 require_once $__dir . '/HTMLPurifier/EntityParser.php';
 require_once $__dir . '/HTMLPurifier/ErrorCollector.php';
+require_once $__dir . '/HTMLPurifier/ErrorStruct.php';
 require_once $__dir . '/HTMLPurifier/Exception.php';
 require_once $__dir . '/HTMLPurifier/Filter.php';
 require_once $__dir . '/HTMLPurifier/Generator.php';
@@ -48,6 +49,8 @@ require_once $__dir . '/HTMLPurifier/LanguageFactory.php';
 require_once $__dir . '/HTMLPurifier/Length.php';
 require_once $__dir . '/HTMLPurifier/Lexer.php';
 require_once $__dir . '/HTMLPurifier/PercentEncoder.php';
+require_once $__dir . '/HTMLPurifier/PropertyList.php';
+require_once $__dir . '/HTMLPurifier/PropertyListIterator.php';
 require_once $__dir . '/HTMLPurifier/Strategy.php';
 require_once $__dir . '/HTMLPurifier/StringHash.php';
 require_once $__dir . '/HTMLPurifier/StringHashParser.php';
@@ -89,6 +92,8 @@ require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Percentage.php';
 require_once $__dir . '/HTMLPurifier/AttrDef/CSS/TextDecoration.php';
 require_once $__dir . '/HTMLPurifier/AttrDef/CSS/URI.php';
 require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Bool.php';
+require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Nmtokens.php';
+require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Class.php';
 require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Color.php';
 require_once $__dir . '/HTMLPurifier/AttrDef/HTML/FrameTarget.php';
 require_once $__dir . '/HTMLPurifier/AttrDef/HTML/ID.php';
@@ -96,12 +101,12 @@ require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Pixels.php';
 require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Length.php';
 require_once $__dir . '/HTMLPurifier/AttrDef/HTML/LinkTypes.php';
 require_once $__dir . '/HTMLPurifier/AttrDef/HTML/MultiLength.php';
-require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Nmtokens.php';
 require_once $__dir . '/HTMLPurifier/AttrDef/URI/Email.php';
 require_once $__dir . '/HTMLPurifier/AttrDef/URI/Host.php';
 require_once $__dir . '/HTMLPurifier/AttrDef/URI/IPv4.php';
 require_once $__dir . '/HTMLPurifier/AttrDef/URI/IPv6.php';
 require_once $__dir . '/HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php';
+require_once $__dir . '/HTMLPurifier/AttrTransform/Background.php';
 require_once $__dir . '/HTMLPurifier/AttrTransform/BdoDir.php';
 require_once $__dir . '/HTMLPurifier/AttrTransform/BgColor.php';
 require_once $__dir . '/HTMLPurifier/AttrTransform/BoolToCSS.php';
@@ -109,13 +114,16 @@ require_once $__dir . '/HTMLPurifier/AttrTransform/Border.php';
 require_once $__dir . '/HTMLPurifier/AttrTransform/EnumToCSS.php';
 require_once $__dir . '/HTMLPurifier/AttrTransform/ImgRequired.php';
 require_once $__dir . '/HTMLPurifier/AttrTransform/ImgSpace.php';
+require_once $__dir . '/HTMLPurifier/AttrTransform/Input.php';
 require_once $__dir . '/HTMLPurifier/AttrTransform/Lang.php';
 require_once $__dir . '/HTMLPurifier/AttrTransform/Length.php';
 require_once $__dir . '/HTMLPurifier/AttrTransform/Name.php';
+require_once $__dir . '/HTMLPurifier/AttrTransform/NameSync.php';
 require_once $__dir . '/HTMLPurifier/AttrTransform/SafeEmbed.php';
 require_once $__dir . '/HTMLPurifier/AttrTransform/SafeObject.php';
 require_once $__dir . '/HTMLPurifier/AttrTransform/SafeParam.php';
 require_once $__dir . '/HTMLPurifier/AttrTransform/ScriptRequired.php';
+require_once $__dir . '/HTMLPurifier/AttrTransform/Textarea.php';
 require_once $__dir . '/HTMLPurifier/ChildDef/Chameleon.php';
 require_once $__dir . '/HTMLPurifier/ChildDef/Custom.php';
 require_once $__dir . '/HTMLPurifier/ChildDef/Empty.php';
@@ -131,10 +139,12 @@ require_once $__dir . '/HTMLPurifier/DefinitionCache/Decorator/Memory.php';
 require_once $__dir . '/HTMLPurifier/HTMLModule/Bdo.php';
 require_once $__dir . '/HTMLPurifier/HTMLModule/CommonAttributes.php';
 require_once $__dir . '/HTMLPurifier/HTMLModule/Edit.php';
+require_once $__dir . '/HTMLPurifier/HTMLModule/Forms.php';
 require_once $__dir . '/HTMLPurifier/HTMLModule/Hypertext.php';
 require_once $__dir . '/HTMLPurifier/HTMLModule/Image.php';
 require_once $__dir . '/HTMLPurifier/HTMLModule/Legacy.php';
 require_once $__dir . '/HTMLPurifier/HTMLModule/List.php';
+require_once $__dir . '/HTMLPurifier/HTMLModule/Name.php';
 require_once $__dir . '/HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php';
 require_once $__dir . '/HTMLPurifier/HTMLModule/Object.php';
 require_once $__dir . '/HTMLPurifier/HTMLModule/Presentation.php';
@@ -149,14 +159,18 @@ require_once $__dir . '/HTMLPurifier/HTMLModule/Target.php';
 require_once $__dir . '/HTMLPurifier/HTMLModule/Text.php';
 require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy.php';
 require_once $__dir . '/HTMLPurifier/HTMLModule/XMLCommonAttributes.php';
+require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy/Name.php';
 require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy/Proprietary.php';
 require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php';
 require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy/Strict.php';
 require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy/Transitional.php';
 require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy/XHTML.php';
 require_once $__dir . '/HTMLPurifier/Injector/AutoParagraph.php';
+require_once $__dir . '/HTMLPurifier/Injector/DisplayLinkURI.php';
 require_once $__dir . '/HTMLPurifier/Injector/Linkify.php';
 require_once $__dir . '/HTMLPurifier/Injector/PurifierLinkify.php';
+require_once $__dir . '/HTMLPurifier/Injector/RemoveEmpty.php';
+require_once $__dir . '/HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php';
 require_once $__dir . '/HTMLPurifier/Injector/SafeObject.php';
 require_once $__dir . '/HTMLPurifier/Lexer/DOMLex.php';
 require_once $__dir . '/HTMLPurifier/Lexer/DirectLex.php';
@@ -179,6 +193,7 @@ require_once $__dir . '/HTMLPurifier/URIFilter/DisableExternalResources.php';
 require_once $__dir . '/HTMLPurifier/URIFilter/HostBlacklist.php';
 require_once $__dir . '/HTMLPurifier/URIFilter/MakeAbsolute.php';
 require_once $__dir . '/HTMLPurifier/URIFilter/Munge.php';
+require_once $__dir . '/HTMLPurifier/URIScheme/data.php';
 require_once $__dir . '/HTMLPurifier/URIScheme/ftp.php';
 require_once $__dir . '/HTMLPurifier/URIScheme/http.php';
 require_once $__dir . '/HTMLPurifier/URIScheme/https.php';
--- a/library/HTMLPurifier/AttrCollections.php
+++ b/library/HTMLPurifier/AttrCollections.php
@@ -125,3 +125,4 @@ class HTMLPurifier_AttrCollections

 }

+// vim: et sw=4 sts=4
--- a/library/HTMLPurifier/AttrDef.php
+++ b/library/HTMLPurifier/AttrDef.php
@@ -82,5 +82,42 @@ abstract class HTMLPurifier_AttrDef
        return preg_replace('/rgb\((\d+)\s*,\s*(\d+)\s*,\s*(\d+)\)/', 'rgb(\1,\2,\3)', $string);
    }

+    /**
+     * Parses a possibly escaped CSS string and returns the "pure" 
+     * version of it.
+     */
+    protected function expandCSSEscape($string) {
+        // flexibly parse it
+        $ret = '';
+        for ($i = 0, $c = strlen($string); $i < $c; $i++) {
+            if ($string[$i] === '\\') {
+                $i++;
+                if ($i >= $c) {
+                    $ret .= '\\';
+                    break;
+                }
+                if (ctype_xdigit($string[$i])) {
+                    $code = $string[$i];
+                    for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
+                        if (!ctype_xdigit($string[$i])) break;
+                        $code .= $string[$i];
+                    }
+                    // We have to be extremely careful when adding
+                    // new characters, to make sure we're not breaking
+                    // the encoding.
+                    $char = HTMLPurifier_Encoder::unichr(hexdec($code));
+                    if (HTMLPurifier_Encoder::cleanUTF8($char) === '') continue;
+                    $ret .= $char;
+                    if ($i < $c && trim($string[$i]) !== '') $i--;
+                    continue;
+                }
+                if ($string[$i] === "\n") continue;
+            }
+            $ret .= $string[$i];
+        }
+        return $ret;
+    }
+
 }

+// vim: et sw=4 sts=4
--- a/library/HTMLPurifier/AttrDef/CSS.php
+++ b/library/HTMLPurifier/AttrDef/CSS.php
@@ -84,3 +84,4 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef

 }

+// vim: et sw=4 sts=4
--- a/library/HTMLPurifier/AttrDef/CSS/AlphaValue.php
+++ b/library/HTMLPurifier/AttrDef/CSS/AlphaValue.php
@@ -17,3 +17,5 @@ class HTMLPurifier_AttrDef_CSS_AlphaValue extends HTMLPurifier_AttrDef_CSS_Numbe
    }

 }
+
+// vim: et sw=4 sts=4
--- a/library/HTMLPurifier/AttrDef/CSS/Background.php
+++ b/library/HTMLPurifier/AttrDef/CSS/Background.php
@@ -84,3 +84,4 @@ class HTMLPurifier_AttrDef_CSS_Background extends HTMLPurifier_AttrDef

 }

+// vim: et sw=4 sts=4
--- a/library/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php
+++ b/library/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php
@@ -59,7 +59,8 @@ class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef
        $keywords = array();
        $keywords['h'] = false; // left, right
        $keywords['v'] = false; // top, bottom
-        $keywords['c'] = false; // center
+        $keywords['ch'] = false; // center (first word)
+        $keywords['cv'] = false; // center (second word)
        $measures = array();

        $i = 0;
@@ -79,6 +80,13 @@ class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef
            $lbit = ctype_lower($bit) ? $bit : strtolower($bit);
            if (isset($lookup[$lbit])) {
                $status = $lookup[$lbit];
+                if ($status == 'c') {
+                    if ($i == 0) {
+                        $status = 'ch';
+                    } else {
+                        $status = 'cv';
+                    }
+                }
                $keywords[$status] = $lbit;
                $i++;
            }
@@ -101,20 +109,19 @@ class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef

        if (!$i) return false; // no valid values were caught

-        
        $ret = array();

        // first keyword
        if     ($keywords['h'])     $ret[] = $keywords['h'];
-        elseif (count($measures))   $ret[] = array_shift($measures);
-        elseif ($keywords['c']) {
-            $ret[] = $keywords['c'];
-            $keywords['c'] = false; // prevent re-use: center = center center
+        elseif ($keywords['ch']) {
+            $ret[] = $keywords['ch'];
+            $keywords['cv'] = false; // prevent re-use: center = center center
        }
+        elseif (count($measures))   $ret[] = array_shift($measures);

        if     ($keywords['v'])     $ret[] = $keywords['v'];
+        elseif ($keywords['cv'])    $ret[] = $keywords['cv'];
        elseif (count($measures))   $ret[] = array_shift($measures);
-        elseif ($keywords['c'])     $ret[] = $keywords['c'];

        if (empty($ret)) return false;
        return implode(' ', $ret);
@@ -123,3 +130,4 @@ class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef

 }

+// vim: et sw=4 sts=4
--- a/library/HTMLPurifier/AttrDef/CSS/Border.php
+++ b/library/HTMLPurifier/AttrDef/CSS/Border.php
@@ -40,3 +40,4 @@ class HTMLPurifier_AttrDef_CSS_Border extends HTMLPurifier_AttrDef

 }

+// vim: et sw=4 sts=4
--- a/library/HTMLPurifier/AttrDef/CSS/Color.php
+++ b/library/HTMLPurifier/AttrDef/CSS/Color.php
@@ -9,7 +9,7 @@ class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
    public function validate($color, $config, $context) {

        static $colors = null;
-        if ($colors === null) $colors = $config->get('Core', 'ColorKeywords');
+        if ($colors === null) $colors = $config->get('Core.ColorKeywords');

        $color = trim($color);
        if ($color === '') return false;
@@ -75,3 +75,4 @@ class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef

 }

+// vim: et sw=4 sts=4
--- a/library/HTMLPurifier/AttrDef/CSS/Composite.php
+++ b/library/HTMLPurifier/AttrDef/CSS/Composite.php
@@ -35,3 +35,4 @@ class HTMLPurifier_AttrDef_CSS_Composite extends HTMLPurifier_AttrDef

 }

+// vim: et sw=4 sts=4
--- a/library/HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php
+++ b/library/HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php
@@ -5,7 +5,7 @@
 */
 class HTMLPurifier_AttrDef_CSS_DenyElementDecorator extends HTMLPurifier_AttrDef
 {
-    protected $def, $element;
+    public $def, $element;

    /**
     * @param $def Definition to wrap
@@ -24,3 +24,5 @@ class HTMLPurifier_AttrDef_CSS_DenyElementDecorator extends HTMLPurifier_AttrDef
        return $this->def->validate($string, $config, $context);
    }
 }
+
+// vim: et sw=4 sts=4
--- a/library/HTMLPurifier/AttrDef/CSS/Filter.php
+++ b/library/HTMLPurifier/AttrDef/CSS/Filter.php
@@ -50,3 +50,5 @@ class HTMLPurifier_AttrDef_CSS_Filter extends HTMLPurifier_AttrDef
    }

 }
+
+// vim: et sw=4 sts=4
--- a/library/HTMLPurifier/AttrDef/CSS/Font.php
+++ b/library/HTMLPurifier/AttrDef/CSS/Font.php
@@ -146,3 +146,4 @@ class HTMLPurifier_AttrDef_CSS_Font extends HTMLPurifier_AttrDef

 }

+// vim: et sw=4 sts=4
--- a/library/HTMLPurifier/AttrDef/CSS/FontFamily.php
+++ b/library/HTMLPurifier/AttrDef/CSS/FontFamily.php
@@ -34,37 +34,10 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
                $quote = $font[0];
                if ($font[$length - 1] !== $quote) continue;
                $font = substr($font, 1, $length - 2);
-                
-                $new_font = '';
-                for ($i = 0, $c = strlen($font); $i < $c; $i++) {
-                    if ($font[$i] === '\\') {
-                        $i++;
-                        if ($i >= $c) {
-                            $new_font .= '\\';
-                            break;
-                        }
-                        if (ctype_xdigit($font[$i])) {
-                            $code = $font[$i];
-                            for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
-                                if (!ctype_xdigit($font[$i])) break;
-                                $code .= $font[$i];
-                            }
-                            // We have to be extremely careful when adding
-                            // new characters, to make sure we're not breaking
-                            // the encoding.
-                            $char = HTMLPurifier_Encoder::unichr(hexdec($code));
-                            if (HTMLPurifier_Encoder::cleanUTF8($char) === '') continue;
-                            $new_font .= $char;
-                            if ($i < $c && trim($font[$i]) !== '') $i--;
-                            continue;
-                        }
-                        if ($font[$i] === "\n") continue;
-                    }
-                    $new_font .= $font[$i];
-                }
-                
-                $font = $new_font;
            }
+
+            $font = $this->expandCSSEscape($font);
+
            // $font is a pure representation of the font name

            if (ctype_alnum($font) && $font !== '') {
@@ -73,12 +46,21 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
                continue;
            }

-            // complicated font, requires quoting
+            // bugger out on whitespace.  form feed (0C) really
+            // shouldn't show up regardless
+            $font = str_replace(array("\n", "\t", "\r", "\x0C"), ' ', $font);

-            // armor single quotes and new lines
-            $font = str_replace("\\", "\\\\", $font);
-            $font = str_replace("'", "\\'", $font);
-            $final .= "'$font', ";
+            // These ugly transforms don't pose a security
+            // risk (as \\ and \" might).  We could try to be clever and
+            // use single-quote wrapping when there is a double quote
+            // present, but I have choosen not to implement that.
+            // (warning: this code relies on the selection of quotation
+            // mark below)
+            $font = str_replace('\\', '\\5C ', $font);
+            $font = str_replace('"',  '\\22 ', $font);
+
+            // complicated font, requires quoting
+            $final .= "\"$font\", "; // note that this will later get turned into &quot;
        }
        $final = rtrim($final, ', ');
        if ($final === '') return false;
@@ -87,3 +69,4 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef

 }

+// vim: et sw=4 sts=4
--- a/library/HTMLPurifier/AttrDef/CSS/ImportantDecorator.php
+++ b/library/HTMLPurifier/AttrDef/CSS/ImportantDecorator.php
@@ -5,7 +5,7 @@
 */
 class HTMLPurifier_AttrDef_CSS_ImportantDecorator extends HTMLPurifier_AttrDef
 {
-    protected $def, $allow;
+    public $def, $allow;

    /**
     * @param $def Definition to wrap
@@ -36,3 +36,5 @@ class HTMLPurifier_AttrDef_CSS_ImportantDecorator extends HTMLPurifier_AttrDef
        return $string;
    }
 }
+
+// vim: et sw=4 sts=4
--- a/library/HTMLPurifier/AttrDef/CSS/Length.php
+++ b/library/HTMLPurifier/AttrDef/CSS/Length.php
@@ -44,3 +44,4 @@ class HTMLPurifier_AttrDef_CSS_Length extends HTMLPurifier_AttrDef

 }

+// vim: et sw=4 sts=4
--- a/library/HTMLPurifier/AttrDef/CSS/ListStyle.php
+++ b/library/HTMLPurifier/AttrDef/CSS/ListStyle.php
@@ -75,3 +75,4 @@ class HTMLPurifier_AttrDef_CSS_ListStyle extends HTMLPurifier_AttrDef

 }

+// vim: et sw=4 sts=4
--- a/library/HTMLPurifier/AttrDef/CSS/Multiple.php
+++ b/library/HTMLPurifier/AttrDef/CSS/Multiple.php
@@ -55,3 +55,4 @@ class HTMLPurifier_AttrDef_CSS_Multiple extends HTMLPurifier_AttrDef

 }

+// vim: et sw=4 sts=4
--- a/library/HTMLPurifier/AttrDef/CSS/Number.php
+++ b/library/HTMLPurifier/AttrDef/CSS/Number.php
@@ -66,3 +66,4 @@ class HTMLPurifier_AttrDef_CSS_Number extends HTMLPurifier_AttrDef

 }

+// vim: et sw=4 sts=4
--- a/library/HTMLPurifier/AttrDef/CSS/Percentage.php
+++ b/library/HTMLPurifier/AttrDef/CSS/Percentage.php
@@ -37,3 +37,4 @@ class HTMLPurifier_AttrDef_CSS_Percentage extends HTMLPurifier_AttrDef

 }

+// vim: et sw=4 sts=4
--- a/library/HTMLPurifier/AttrDef/CSS/TextDecoration.php
+++ b/library/HTMLPurifier/AttrDef/CSS/TextDecoration.php
@@ -35,3 +35,4 @@ class HTMLPurifier_AttrDef_CSS_TextDecoration extends HTMLPurifier_AttrDef

 }

+// vim: et sw=4 sts=4
--- a/library/HTMLPurifier/AttrDef/CSS/URI.php
+++ b/library/HTMLPurifier/AttrDef/CSS/URI.php
@@ -34,22 +34,19 @@ class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI
            $uri = substr($uri, 1, $new_length - 1);
        }

-        $keys   = array(  '(',   ')',   ',',   ' ',   '"',   "'");
-        $values = array('\\(', '\\)', '\\,', '\\ ', '\\"', "\\'");
-        $uri = str_replace($values, $keys, $uri);
+        $uri = $this->expandCSSEscape($uri);

        $result = parent::validate($uri, $config, $context);

        if ($result === false) return false;

-        // escape necessary characters according to CSS spec
-        // except for the comma, none of these should appear in the
-        // URI at all
-        $result = str_replace($keys, $values, $result);
+        // extra sanity check; should have been done by URI
+        $result = str_replace(array('"', "\\", "\n", "\x0c", "\r"), "", $result);

-        return "url($result)";
+        return "url(\"$result\")";

    }

 }

+// vim: et sw=4 sts=4
--- a/library/HTMLPurifier/AttrDef/Enum.php
+++ b/library/HTMLPurifier/AttrDef/Enum.php
@@ -62,3 +62,4 @@ class HTMLPurifier_AttrDef_Enum extends HTMLPurifier_AttrDef

 }

+// vim: et sw=4 sts=4
--- a/Show More
+++ b/Show More
@@ -1 +1 @@
 .1.1
 .1.1