From 5fa575f8ac284e060a8893c83c03a82bc55f57c1 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Thu, 15 May 2008 05:16:36 +0000 Subject: [PATCH] [2.1.4] [MFH] Encoder optimization and shut-up operator bugfix from r1680 git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/php4@1718 48356398-32a2-884e-a903-53898d9a118a --- NEWS | 2 ++ library/HTMLPurifier/Encoder.php | 16 +++++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/NEWS b/NEWS index 7ba715d6..627afd1d 100644 --- a/NEWS +++ b/NEWS @@ -17,6 +17,7 @@ ERRATA 2.1.4, unknown release date ! DefinitionCacheFactory now can register new implementations ! CSS properties are now case-insensitive +! Encoder optimized with valid UTF-8 input - Colors missing # but in hex form will be corrected - CSS Number algorithm improved - Autoclose now operates iteratively, i.e.
now has @@ -26,6 +27,7 @@ ERRATA - Fix bug with rgb(0, 1, 2) color syntax with spaces inside shorthand syntax - HTMLPurifier_HTMLDefinition->addAttribute can now be called multiple times on the same element without emitting errors. +- Iconv uses set_error_handler instead of shut-up operator 2.1.3, released 2007-11-05 ! tests/multitest.php allows you to test multiple versions by running diff --git a/library/HTMLPurifier/Encoder.php b/library/HTMLPurifier/Encoder.php index e5adf83f..31ebb785 100644 --- a/library/HTMLPurifier/Encoder.php +++ b/library/HTMLPurifier/Encoder.php @@ -62,6 +62,11 @@ class HTMLPurifier_Encoder trigger_error('Cannot instantiate encoder, call methods statically', E_USER_ERROR); } + /** + * Error-handler that mutes errors, alternative to shut-up operator. + */ + function muteErrorHandler() {} + /** * Cleans a UTF-8 string for well-formedness and SGML validity * @@ -106,9 +111,18 @@ class HTMLPurifier_Encoder static $iconv = null; if ($iconv === null) $iconv = function_exists('iconv'); + // UTF-8 validity is checked since PHP 4.3.5 + // This is an optimization: if the string is already valid UTF-8, no + // need to do iconv/php stuff. 99% of the time, this will be the case. + if (preg_match('/^.{1}/us', $str)) { + return strtr($str, $non_sgml_chars); + } + if ($iconv && !$force_php) { // do the shortcut way - $str = @iconv('UTF-8', 'UTF-8//IGNORE', $str); + set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler')); + $str = iconv('UTF-8', 'UTF-8//IGNORE', $str); + restore_error_handler(); return strtr($str, $non_sgml_chars); }