1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-08-03 12:47:56 +02:00

Compare commits

...

30 Commits

Author SHA1 Message Date
Edward Z. Yang
e9f529e78f Release 3.3.0.
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2009-02-16 17:18:31 -05:00
Edward Z. Yang
e802065b65 Punt Lexer test entirely for 5.0.5.
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2009-02-16 17:18:30 -05:00
Edward Z. Yang
1d70929eba Add text parameter to unit tests, forces text output.
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2009-02-16 17:18:30 -05:00
Edward Z. Yang
77f57aa264 Fix CSSDefinition Printer problems with important decorator.
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2009-02-15 14:11:22 -05:00
Edward Z. Yang
db218c7b2b Fix YouTube rendering problem on versions of Firefox.
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2009-02-15 14:11:21 -05:00
Edward Z. Yang
762c089431 Ignore generated test-schema.html file in smoketests.
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2009-02-15 14:11:20 -05:00
Edward Z. Yang
07ed1bbf8c Fix broken trusted comments functionality.
This fix is slightly hackish, as we simply treat comments as whitespace.
This should largely be correct, and breaks no current test cases,
although it could result in noncompliant behavior.

Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2009-02-05 18:04:10 -05:00
Edward Z. Yang
b9094d5ec8 Convert HTMLPurifier_Config to use property list backend.
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2009-02-02 18:42:23 -05:00
Edward Z. Yang
b31f280d41 Ignore htmlt.ini files.
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2009-02-02 18:18:10 -05:00
Edward Z. Yang
1b962e68f0 Downgrade directory not found and permissions errors to warnings.
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2009-02-02 10:08:37 -05:00
Edward Z. Yang
0c9dc02d4a Use default configuration when resetting; prevents zombie defaults for encodings from carrying over.
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2009-01-30 17:42:41 -05:00
Edward Z. Yang
bfe474042f Implement "carryover" functionality, requested by Kinderlehrer <bitweaver@7doves.com>
This commit is a limited implementation of the "active formatting
elements" algorithm implemented in HTML5, which preserves certain
formatting elements such as <a> and <b> when exiting or entering nodes.

Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2008-12-20 13:06:00 -05:00
Edward Z. Yang
119ebcda71 Implement user-friendly links to test-cases on web tester.
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2008-12-20 13:01:20 -05:00
Edward Z. Yang
3dfcd016d3 Fix standards-compliance issue with YouTube filter with double hyphens.
Thanks Pierre Attar for reporting.

Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2008-12-12 16:27:23 -05:00
Edward Z. Yang
0c9dfc6c3d Don't add vimline to auto-generated files.
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2008-12-12 15:44:13 -05:00
Edward Z. Yang
33a873f5cb Fix missing numbers when pass/fail count is zero.
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2008-12-06 16:08:09 -05:00
Edward Z. Yang
12b811d749 Add vim modelines to all files.
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2008-12-06 04:24:59 -05:00
Edward Z. Yang
781f9a4084 Update PH5P.patch, and add NEWS entry for trailing whitespace purge.
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2008-12-06 02:30:52 -05:00
Edward Z. Yang
2c955af135 Remove trailing whitespace.
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2008-12-06 02:28:20 -05:00
Edward Z. Yang
3a6b63dff1 Generic implementation of property-lists.
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2008-12-06 00:43:42 -05:00
Edward Z. Yang
90110a4e3a Fix broken test-suite in early versions of PHP.
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2008-12-05 15:50:59 -05:00
Edward Z. Yang
d67e17a69c Remove unnecessary svn wrapper include.
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2008-12-03 12:45:31 -05:00
Edward Z. Yang
5cfecebb33 Fix bug involving whitespace-only nodes. Thanks Eric Wald for reporting.
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2008-12-02 20:13:47 -05:00
Edward Z. Yang
f5cd2c07ea Implement 'overflow' CSS property.
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2008-11-27 16:14:50 -05:00
Edward Z. Yang
6691676666 Fix newline issues in tests.
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2008-11-26 15:30:59 -05:00
Edward Z. Yang
e128c09132 Fix bug with testEncodingSupportsASCII() with strange iconv
implementations.

Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2008-11-26 15:17:09 -05:00
Edward Z. Yang
527f154d3d Add verbose mode to command line test runner.
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2008-11-23 20:45:21 -05:00
Edward Z. Yang
778ddf7c96 Turn on unit tests for UnitConverter.
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2008-11-23 20:43:58 -05:00
Edward Z. Yang
c5d4b1ec93 Fix missing version number in config directive, and add TODO item.
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2008-11-05 02:56:13 -05:00
Edward Z. Yang
6fe6cc8901 Update gitignore with post-release files, new NEWS entry and spellcheck UTF-8.
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2008-11-01 01:51:51 -04:00
711 changed files with 7215 additions and 5779 deletions

5
.gitignore vendored
View File

@@ -3,11 +3,16 @@ test-settings.php
library/HTMLPurifier/DefinitionCache/Serializer/*/
library/standalone/
library/HTMLPurifier.standalone.php
library/HTMLPurifier*.tgz
library/package*.xml
smoketests/test-schema.html
configdoc/*.html
configdoc/configdoc.xml
docs/doxygen*
*.phpt.diff
*.phpt.exp
*.phpt.log
*.phpt.out
*.phpt.php
*.phpt.skip.php
*.htmlt.ini

View File

@@ -5,3 +5,5 @@ Almost everything written by Edward Z. Yang (Ambush Commander). Lots of thanks
to the DevNetwork Community for their help (see docs/ref-devnetwork.html for
more details), Feyd especially (namely IPv6 and optimization). Thanks to RSnake
for letting me package his fantastic XSS cheatsheet for a smoketest.
vim: et sw=4 sts=4

View File

@@ -31,7 +31,7 @@ PROJECT_NAME = HTMLPurifier
# This could be handy for archiving the generated documentation or
# if some version control system is used.
PROJECT_NUMBER = 3.2.0
PROJECT_NUMBER = 3.3.0
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
# base path where the generated documentation will be put.
@@ -1313,3 +1313,5 @@ DOT_CLEANUP = YES
# used. If set to NO the values of all tags below this one will be ignored.
SEARCHENGINE = NO
# vim: et sw=4 sts=4

2
FOCUS
View File

@@ -1,4 +1,4 @@
5 - Major feature enhancements
7 - Major bugfixes
[ Appendix A: Release focus IDs ]
0 - N/A

View File

@@ -18,8 +18,6 @@ with these contents.
HTML Purifier is PHP 5 only, and is actively tested from PHP 5.0.5 and
up. It has no core dependencies with other libraries. PHP
4 support was deprecated on December 31, 2007 with HTML Purifier 3.0.0.
Essential security fixes will be issued for the 2.1.x branch until
August 8, 2008.
These optional extensions can enhance the capabilities of HTML Purifier:
@@ -372,3 +370,4 @@ If your website is in a different encoding or doctype, use this code:
$clean_html = $purifier->purify($dirty_html);
?>
vim: et sw=4 sts=4

View File

@@ -65,3 +65,5 @@ Sinon, utilisez:
$html_propre = $purificateur->purify($html_salle);
?>
vim: et sw=4 sts=4

View File

@@ -501,4 +501,4 @@ necessary. Here is a sample; alter the names:
That's all there is to it!
vim: et sw=4 sts=4

26
NEWS
View File

@@ -9,6 +9,30 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
. Internal change
==========================
3.3.0, released 2009-02-16
! Implement CSS property 'overflow' when %CSS.AllowTricky is true.
! Implement generic property list classess
- Fix bug with testEncodingSupportsASCII() algorithm when iconv() implementation
does not do the "right thing" with characters not supported in the output
set.
- Spellcheck UTF-8: The Secret To Character Encoding
- Fix improper removal of the contents of elements with only whitespace. Thanks
Eric Wald for reporting.
- Fix broken test suite in versions of PHP without spl_autoload_register()
- Fix degenerate case with YouTube filter involving double hyphens.
Thanks Pierre Attar for reporting.
- Fix YouTube rendering problem on certain versions of Firefox.
- Fix CSSDefinition Printer problems with decorators
- Add text parameter to unit tests, forces text output
. Add verbose mode to command line test runner, use (--verbose)
. Turn on unit tests for UnitConverter
. Fix missing version number in configuration %Attr.DefaultImageAlt (added 3.2.0)
. Fix newline errors that caused spurious failures when CRLF HTML Purifier was
tested on Linux.
. Removed trailing whitespace from all text files, see
remote-trailing-whitespace.php maintenance script.
. Convert configuration to use property list backend.
3.2.0, released 2008-10-31
# Using %Core.CollectErrors forces line number/column tracking on, whereas
previously you could theoretically turn it off.
@@ -814,3 +838,5 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
+ Shorthand CSS properties
+ Table CSS properties
+ Deprecated attribute transformations
vim: et sw=4 sts=4

2
README
View File

@@ -20,3 +20,5 @@ Places to go:
* See WYSIWYG for information on editors like TinyMCE and FCKeditor
HTML Purifier can be found on the web at: http://htmlpurifier.org/
vim: et sw=4 sts=4

15
TODO
View File

@@ -15,12 +15,13 @@ afraid to cast your vote for the next feature to be implemented!
prevent structures from being parsed and serialized multiple times.
- Built-in support for target="_blank" on all external links
- Allow <a id="asdf" name="asdf">
- Implement overflow CSS property (as per jlp09550)
- Convert configuration to allow an arbitrary number of namespaces;
then rename as appropriate.
FUTURE VERSIONS
---------------
3.3 release [It's All About Trust] (floating)
4.1 release [It's All About Trust] (floating)
# Implement untrusted, dangerous elements/attributes
# Implement IDREF support (harder than it seems, since you cannot have
IDREFs to non-existent IDs)
@@ -28,11 +29,11 @@ FUTURE VERSIONS
- Implement <area>
- Figure out how to simultaneously set %CSS.Trusted and %HTML.Trusted (?)
3.4 release [Error'ed]
4.2 release [Error'ed]
# Error logging for filtering/cleanup procedures
- XSS-attempt detection--certain errors are flagged XSS-like
3.5 release [Do What I Mean, Not What I Say]
4.3 release [Do What I Mean, Not What I Say]
# Additional support for poorly written HTML
- Microsoft Word HTML cleaning (i.e. MsoNormal, but research essential!)
- Friendly strict handling of <address> (block -> <br>)
@@ -47,7 +48,7 @@ FUTURE VERSIONS
dupe detector would also need to detect the suffix as well)
- Externalize inline CSS to promote clean HTML, proposed by Sander Tekelenburg
4.0 release [Beyond HTML]
5.0 release [Beyond HTML]
# Legit token based CSS parsing (will require revamping almost every
AttrDef class). Probably will use CSSTidy class?
# More control over allowed CSS properties using a modularization
@@ -57,7 +58,7 @@ FUTURE VERSIONS
- Convert RTL/LTR override characters to <bdo> tags, or vice versa on demand.
Also, enable disabling of directionality
5.0 release [To XML and Beyond]
6.0 release [To XML and Beyond]
- Extended HTML capabilities based on namespacing and tag transforms (COMPLEX)
- Hooks for adding custom processors to custom namespaced tags and
attributes, offer default implementation
@@ -126,3 +127,5 @@ Wontfix
- Pretty-printing HTML: users can use Tidy on the output on entire page
- Native content compression, whitespace stripping: use gzip if this is
really important
vim: et sw=4 sts=4

View File

@@ -1 +1 @@
3.2.0
3.3.0

View File

@@ -1,6 +1,6 @@
HTML Purifier 3.2.0 is an amalgamation of new features and fixes that
have accumulated over a four month period. Some notable features
include %AutoFormat.RemoveEmpty, column tracking for tokens,
%AutoFormat.DisplayLinkURI and %Attr.DefaultImageAlt. There were also
major improvements to the test suite interface, error collection output
and the auto-formatter framework.
HTML Purifier 3.3.0 is fixes a number of obscure bugs reported and fixed
over a four month period. It is probably the last release in the 3.x
series. Notable new features include support for the overflow CSS
property; notable bugfixes include fixed YouTube rendering in certain
versions of Firefox, CSSDefinition Printer, improved early PHP support
and bugs in iconv.

View File

@@ -16,3 +16,5 @@ trouble. Therein lies the solution:
HTML Purifier is perfect for filtering pure-HTML input from WYSIWYG editors.
Enough said.
vim: et sw=4 sts=4

View File

@@ -12,3 +12,5 @@ $begin = xdebug_memory_usage();
$schema = HTMLPurifier_ConfigSchema::makeFromSerial();
echo xdebug_memory_usage() - $begin;
// vim: et sw=4 sts=4

View File

@@ -153,3 +153,6 @@ echo '<div>Random input was: ' .
</body></html>
<?php
// vim: et sw=4 sts=4

View File

@@ -17,3 +17,5 @@ $data = $purifier->purify(file_get_contents('samples/Lexer/4.html'));
xdebug_stop_trace();
echo "Trace finished.";
// vim: et sw=4 sts=4

View File

@@ -51,3 +51,5 @@
<div style="text-align:center;">Click on photo to see HR version</div></div>
</body>
</html>
<!-- vim: et sw=4 sts=4 -->

View File

@@ -15,3 +15,5 @@ function rwt(el,ct,cd,sg){var e = window.encodeURIComponent ? encodeURIComponent
function qs(el) {if (window.RegExp && window.encodeURIComponent) {var ue=el.href;var qe=encodeURIComponent(document.f.q.value);if(ue.indexOf("q=")!=-1){el.href=ue.replace(new RegExp("q=[^&$]*"),"q="+qe);}else{el.href=ue+"&q="+qe;}}return 1;}
// -->
</script><table border=0 cellspacing=0 cellpadding=4><tr><td nowrap><font size=-1><b>Web</b>&nbsp;&nbsp;&nbsp;&nbsp;<a id=1a class=q href="/imghp?hl=en&tab=wi" onClick="return qs(this);">Images</a>&nbsp;&nbsp;&nbsp;&nbsp;<a id=2a class=q href="http://groups.google.com/grphp?hl=en&tab=wg" onClick="return qs(this);">Groups</a>&nbsp;&nbsp;&nbsp;&nbsp;<a id=4a class=q href="http://news.google.com/nwshp?hl=en&tab=wn" onClick="return qs(this);">News</a>&nbsp;&nbsp;&nbsp;&nbsp;<a id=5a class=q href="http://froogle.google.com/frghp?hl=en&tab=wf" onClick="return qs(this);">Froogle</a>&nbsp;&nbsp;&nbsp;&nbsp;<a id=8a class=q href="/lochp?hl=en&tab=wl" onClick="return qs(this);">Local</a>&nbsp;&nbsp;&nbsp;&nbsp;<b><a href="/intl/en/options/" class=q>more&nbsp;&raquo;</a></b></font></td></tr></table><table cellspacing=0 cellpadding=0><tr><td width=25%>&nbsp;</td><td align=center><input type=hidden name=hl value=en><input maxlength=2048 size=55 name=q value="" title="Google Search"><br><input type=submit value="Google Search" name=btnG><input type=submit value="I'm Feeling Lucky" name=btnI></td><td valign=top nowrap width=25%><font size=-2>&nbsp;&nbsp;<a href=/advanced_search?hl=en>Advanced Search</a><br>&nbsp;&nbsp;<a href=/preferences?hl=en>Preferences</a><br>&nbsp;&nbsp;<a href=/language_tools?hl=en>Language Tools</a></font></td></tr></table></form><br><br><font size=-1><a href="/ads/">Advertising&nbsp;Programs</a> - <a href=/services/>Business Solutions</a> - <a href=/about.html>About Google</a></font><p><font size=-2>&copy;2006 Google</font></p></center></body></html>
<!-- vim: et sw=4 sts=4 -->

View File

@@ -126,3 +126,5 @@ if (objAdMgr.isSlotAvailable("leaderboard")) {
</body>
</html>
<!-- vim: et sw=4 sts=4 -->

View File

@@ -538,3 +538,5 @@ Retrieved from "<a href="http://en.wikipedia.org/wiki/Tai_Chi_Chuan">http://en.w
<!-- Served by srv25 in 0.089 secs. -->
</body></html>
<!-- vim: et sw=4 sts=4 -->

View File

@@ -3,3 +3,5 @@ Disclaimer:
The HTML used in these samples are taken from random websites. I claim
no copyright over these and assert that I may use them like this under
fair use.
vim: et sw=4 sts=4

View File

@@ -59,3 +59,4 @@ if (php_sapi_name() != 'cli') {
echo 'Files generated successfully.';
}
// vim: et sw=4 sts=4

View File

@@ -40,3 +40,5 @@ h4 {font-family:sans-serif; font-size:0.9em; font-weight:bold; }
.deprecated {color: #CCC;}
.deprecated table.constraints th {background:#FFF;}
.deprecated-notice {color: #000; text-align:center; margin-bottom: 1em;}
/* vim: et sw=4 sts=4 */

View File

@@ -231,3 +231,5 @@
</xsl:template>
</xsl:stylesheet>
<!-- vim: et sw=4 sts=4 -->

View File

@@ -12,3 +12,5 @@
<type id="hash">Associative array</type>
<type id="mixed">Mixed</type>
</types>
<!-- vim: et sw=4 sts=4 -->

View File

@@ -38,7 +38,7 @@
</directive>
<directive id="CSS.AllowedProperties">
<file name="HTMLPurifier/CSSDefinition.php">
<line>274</line>
<line>275</line>
</file>
</directive>
<directive id="Cache.DefinitionImpl">
@@ -302,7 +302,7 @@
</directive>
<directive id="Core.EscapeInvalidChildren">
<file name="HTMLPurifier/ChildDef/Required.php">
<line>55</line>
<line>62</line>
</file>
</directive>
<directive id="Cache.SerializerPath">

View File

@@ -214,3 +214,5 @@ the usual things required are:</p>
<p>See <code>HTMLPurifier/HTMLModule.php</code> for details.</p>
</body></html>
<!-- vim: et sw=4 sts=4 -->

View File

@@ -25,3 +25,5 @@ URIScheme - needs to have callable generic checks
mailto - doesn't validate emails, doesn't validate querystring
news - doesn't validate opaque path
nntp - doesn't constrain path
vim: et sw=4 sts=4

View File

@@ -372,3 +372,5 @@ Test.Example</pre>
</body>
</html>
<!-- vim: et sw=4 sts=4 -->

View File

@@ -63,3 +63,5 @@
</p>
</body></html>
<!-- vim: et sw=4 sts=4 -->

View File

@@ -277,3 +277,5 @@ DEPRECATED-VERSION: If the directive was deprecated, when was it deprecated?
DEPRECATED-USE: If the directive was deprecated, what should the user use now?
REQUIRES: What classes does this configuration directive require, but are
not part of the HTML Purifier core?
vim: et sw=4 sts=4

View File

@@ -78,3 +78,5 @@ help you find the correct functionality more quickly. Here they are:</p>
</dl>
</body></html>
<!-- vim: et sw=4 sts=4 -->

View File

@@ -28,3 +28,5 @@ that itch, put it here!</p>
</ul>
</body></html>
<!-- vim: et sw=4 sts=4 -->

View File

@@ -304,3 +304,5 @@ Mozilla on inside and needs -moz-outline, no IE support.</td></tr>
</table>
</body></html>
<!-- vim: et sw=4 sts=4 -->

View File

@@ -794,3 +794,5 @@ $form->excludes = array('form' => true);</strong></pre>
</ul>
</body></html>
<!-- vim: et sw=4 sts=4 -->

View File

@@ -143,3 +143,5 @@ anchors is beyond me.</p>
</body>
</html>
<!-- vim: et sw=4 sts=4 -->

View File

@@ -55,3 +55,5 @@ HTML tags. Things like blog comments are, in all likelihood, most appropriately
written in an extremely restrictive set of markup that doesn't require
all this functionality (or not written in HTML at all), although this may
be changing in the future with the addition of levels of filtering.
vim: et sw=4 sts=4

View File

@@ -14,3 +14,5 @@ to be effective. Things to remember:
4. CSS: document pending
Explain which CSS styles we blocked and why.
vim: et sw=4 sts=4

View File

@@ -115,3 +115,5 @@ if you decide to do that! Especially if you port HTML Purifier to C++.
</body>
</html>
<!-- vim: et sw=4 sts=4 -->

View File

@@ -226,3 +226,5 @@ Other than that, don't worry about it: this all works silently and
effectively in the background.</p>
</body></html>
<!-- vim: et sw=4 sts=4 -->

View File

@@ -212,3 +212,5 @@ $uri->registerFilter(new HTMLPurifier_URIFilter_<strong>NameOfFilter</strong>())
</p>
</body></html>
<!-- vim: et sw=4 sts=4 -->

View File

@@ -481,7 +481,7 @@ if we don't know it's character encoding? And how do we figure out
the character encoding, if we don't know the contents of the
<code>META</code> tag?</p>
<p>Fortunantely for us, the characters we need to write the
<p>Fortunately for us, the characters we need to write the
<code>META</code> are in ASCII, which is pretty much universal
over every character encoding that is in common use today. So,
all the web-browser has to do is parse all the way down until
@@ -526,7 +526,7 @@ you don't have to use those user-unfriendly entities.</p>
<h3 id="whyutf8-user">User-friendly</h3>
<p>Websites encoded in Latin-1 (ISO-8859-1) which ocassionally need
<p>Websites encoded in Latin-1 (ISO-8859-1) which occasionally need
a special character outside of their scope often will use a character
entity reference to achieve the desired effect. For instance, &theta; can be
written <code>&amp;theta;</code>, regardless of the character encoding's
@@ -584,7 +584,7 @@ disappeared off the web, so I am linking to the Web Archive copy.)</p>
<h4 id="whyutf8-forms-urlencoded"><code>application/x-www-form-urlencoded</code></h4>
<p>This is the Content-Type that GET requests must use, and POST requests
use by default. It involves the ubiquituous percent encoding format that
use by default. It involves the ubiquitous percent encoding format that
looks something like: <code>%C3%86</code>. There is no official way of
determining the character encoding of such a request, since the percent
encoding operates on a byte level, so it is usually assumed that it
@@ -674,7 +674,7 @@ it up to the module iconv to do the dirty work.</p>
<p>This approach, however, is not perfect. iconv is blithely unaware
of HTML character entities. HTML Purifier, in order to
protect against sophisticated escaping schemes, normalizes all character
and numeric entitie references before processing the text. This leads to
and numeric entity references before processing the text. This leads to
one important ramification:</p>
<p><strong>Any character that is not supported by the target character
@@ -770,7 +770,7 @@ the text when you try to convert it to UTF-8. You'll have to convert
it to a binary field, convert it to a Shift-JIS field (the real encoding),
and then finally to UTF-8. Many a website had pages irreversibly mangled
because they didn't realize that they'd been deluding themselves about
the character encoding all along, don't become the next victim.</p>
the character encoding all along; don't become the next victim.</p>
<p>For <a href="http://www.postgresql.org/docs/8.2/static/multibyte.html">PostgreSQL</a>, there appears to be no direct way to change the
encoding of a database (as of 8.2). You will have to dump the data, and then reimport
@@ -790,7 +790,7 @@ usually supported).</p>
<h4 id="migrate-db-binary">Binary</h4>
<p>Due to the abovementioned compatibility issues, a more interoperable
<p>Due to the aforementioned compatibility issues, a more interoperable
way of storing UTF-8 text is to stuff it in a binary datatype.
<code>CHAR</code> becomes <code>BINARY</code>, <code>VARCHAR</code> becomes
<code>VARBINARY</code> and <code>TEXT</code> becomes <code>BLOB</code>.
@@ -917,8 +917,8 @@ anyway. So we'll deal with the other two edge cases.</p>
would like to read your website but get heaps of question marks or
other meaningless characters. Fixing this problem requires the
installation of a font or language pack which is often highly
dependent on what the language is. <a href="http://bn.wikipedia.org/wiki/%E0%A6%89%E0%A6%87%E0%A6%95%E0%A6%BF%E0%A6%AA%E0%A7%87%E0%A6%A1%E0%A6%BF%E0%A6%AF%E0%A6%BC%E0%A6%BE:Bangla_script_display_help">Here is an example</a>
of such a help file for the Bengali language, I am sure there are
dependent on what the language is. <a href="http://bn.wikipedia.org/wiki/%E0%A6%89%E0%A6%87%E0%A6%95%E0%A6%BF%E0%A6%AA%E0%A7%87%E0%A6%A1%E0%A6%BF%E0%A6%AF%E0%A6%BC%E0%A6%BE:Bangla_script_display_and_input_help">Here is an example</a>
of such a help file for the Bengali language; I am sure there are
others out there too. You just have to point users to the appropriate
help file.</p>
@@ -928,7 +928,7 @@ help file.</p>
characters embedded in what otherwise would be very bland ASCII are
letters of the
<a href="http://en.wikipedia.org/wiki/International_Phonetic_Alphabet">International
Phonetic Alphabet (IPA)</a>, use to designate pronounciations in a very standard
Phonetic Alphabet (IPA)</a>, use to designate pronunciations in a very standard
manner (you probably see them all the time in your dictionary). Your
average font probably won't have support for all of the IPA characters
like &#664; (bilabial click) or &#658; (voiced postalveolar fricative).
@@ -941,11 +941,11 @@ most widely used browser in the entire world? Microsoft IE 6
is not smart enough to borrow from other fonts when a character isn't
present, so more often than not you'll be slapped with a nice big &#65533;.
To get things to work, MSIE 6 needs a little nudge. You could configure it
to use a different font to render the text, but you can acheive the same
to use a different font to render the text, but you can achieve the same
effect by selectively changing the font for blocks of special characters
to known good Unicode fonts.</p>
<p>Fortunantely, the folks over at Wikipedia have already done all the
<p>Fortunately, the folks over at Wikipedia have already done all the
heavy lifting for you. Get the CSS from the horses mouth here:
<a href="http://en.wikipedia.org/wiki/MediaWiki:Common.css">Common.css</a>,
and search for &quot;.IPA&quot; There are also a smattering of
@@ -972,7 +972,7 @@ users.</p>
<h3 id="migrate-variablewidth">Dealing with variable width in functions</h3>
<p>When people claim that PHP6 will solve all our Unicode problems, they're
misinformed. It will not fix any of the abovementioned troubles. It will,
misinformed. It will not fix any of the aforementioned troubles. It will,
however, fix the problem we are about to discuss: processing UTF-8 text
in PHP.</p>
@@ -1035,7 +1035,7 @@ directory.</p>
<p>Well, that's it. Hopefully this document has served as a very
practical springboard into knowledge of how UTF-8 works. You may have
decided that you don't want to migrate yet: that's fine, just know
what will happen to your output and what bug reports you may recieve.</p>
what will happen to your output and what bug reports you may receive.</p>
<p>Many other developers have already discussed the subject of Unicode,
UTF-8 and internationalization, and I would like to defer to them for
@@ -1055,3 +1055,5 @@ a more in-depth look into character sets and encodings.</p>
</body>
</html>
<!-- vim: et sw=4 sts=4 -->

View File

@@ -148,3 +148,5 @@ with the core!</p>
</body>
</html>
<!-- vim: et sw=4 sts=4 -->

View File

@@ -20,3 +20,4 @@ $pure_html = $purifier->purify($html);
echo '<pre>' . htmlspecialchars($pure_html) . '</pre>';
// vim: et sw=4 sts=4

View File

@@ -4,3 +4,5 @@ function init() {
element.innerHTML = '&#8220;'+element.innerHTML+'&#8221;';
}
</script>
<!-- vim: et sw=4 sts=4 -->

View File

@@ -177,3 +177,5 @@ the code. They may be upgraded to HTML files or stay as TXT scratchpads.</p>
</body>
</html>
<!-- vim: et sw=4 sts=4 -->

View File

@@ -44,3 +44,5 @@ something like that?</li>
</body>
</html>
<!-- vim: et sw=4 sts=4 -->

View File

@@ -19,3 +19,5 @@ Definition objects are complex datatypes influenced by their respective
directive namespaces (HTMLDefinition with HTML and CSSDefinition with CSS).
If any of these directives is updated, HTML Purifier forces the definition
to be regenerated.
vim: et sw=4 sts=4

View File

@@ -30,3 +30,5 @@ Beyond that, HTML Purifier can magically merge common CSS values together,
and a whole manner of other heuristic things. HTML Purifier should also
make it easy for an admin to re-style the HTML semantically. Speed is not
an issue. Also, better WYSIWYG editors are needed.
vim: et sw=4 sts=4

View File

@@ -207,3 +207,5 @@ Don't forget to spruce up output.
- We need a standard CSS to apply (check demo.css for some starting
styling; some buttons would also be hip)
vim: et sw=4 sts=4

View File

@@ -133,3 +133,5 @@ Dramatic - border, list-style-position (list-style), margin, padding,
Dramatic elements substantially change the look of text in ways that should
probably have been reserved to other areas.
vim: et sw=4 sts=4

View File

@@ -60,3 +60,5 @@ Neat functionality:
- Roman numeral formatting
Items marked with a + likely need to be addressed by HTML Purifier
vim: et sw=4 sts=4

View File

@@ -41,3 +41,4 @@ implemented, give us a ring, and we'll move it up the priority chain.
absolute DNS. While this is actually the preferred method according to
the RFC, most people opt to use a relative domain name relative to . (root).
vim: et sw=4 sts=4

View File

@@ -46,3 +46,5 @@ is eliminated completely, in the latter case, the text of the node
would is preserved (as the parent node does allow PCDATA). Custom
content model implementations probably are not the best way of handling
these cases, instead, node bubbling should be implemented instead.
vim: et sw=4 sts=4

View File

@@ -26,3 +26,5 @@ Watch out: font-sizes can also be nested to get successively larger
(although I do not relish having to keep track of context font-sizes,
this may be necessary, especially for some of the more advanced features
for preventing things like white on white).
vim: et sw=4 sts=4

View File

@@ -42,3 +42,5 @@ the development of this library in these forum threads:</p>
</body>
</html>
<!-- vim: et sw=4 sts=4 -->

View File

@@ -162,3 +162,5 @@ array of content set names to content set contents. If the content set
already exists, your values are appended on to it (great for, say,
registering the font tag as an inline element), otherwise it is
created. They are substituted into content_model.
vim: et sw=4 sts=4

View File

@@ -22,3 +22,5 @@ HTML Purifier context.
These should be put into their own Tidy module, not loaded by default(?). These
all qualify as "lenient" transforms.
vim: et sw=4 sts=4

View File

@@ -22,3 +22,5 @@ another DirectLex parser, other parsers like ph5p
<http://jero.net/lab/ph5p/> can be adapted to DOMLex to support much more
flexible HTML parsing (a cool feature I've seen is how they resolve
<b>bold<i>both</b>italic</i>).
vim: et sw=4 sts=4

View File

@@ -6,3 +6,5 @@ windows-live-mail-desktop-beta.html - donated by laacz, public domain
img.png - LGPL, from <http://commons.wikimedia.org/wiki/Image:Pastille_chrome.png>
All other files are by me, and are licensed under LGPL.
vim: et sw=4 sts=4

View File

@@ -163,3 +163,5 @@ div.segment {width:250px; float:left; margin-top:1em;}
</body>
</html>
<!-- vim: et sw=4 sts=4 -->

View File

@@ -127,3 +127,5 @@ style='color:black'>www.example.com/disclaimer</span></a><o:p></o:p></span></p>
</body>
</html>
<!-- vim: et sw=4 sts=4 -->

View File

@@ -72,3 +72,5 @@ title="Join Windows Live to share photos using Windows Live Photo E-mail.">Onlin
pictures are available for 30 days. <A style="COLOR: #0088e4"
href="http://g.msn.com/5meen_us/175">Get Windows Live Mail desktop to create
your own photo e-mails. </A></SPAN></NOBR></DIV></BODY></HTML>
<!-- vim: et sw=4 sts=4 -->

View File

@@ -72,3 +72,5 @@ q:after {
.fixme:before {content:"Fix me: "; font-weight:bold; color:#C00; }
#applicability {margin: 1em 5%; font-style:italic;}
/* vim: et sw=4 sts=4 */

View File

@@ -83,3 +83,4 @@ class ConfigDoc_HTMLXSLTProcessor
}
// vim: et sw=4 sts=4

View File

@@ -153,3 +153,5 @@ class FSTools
}
}
// vim: et sw=4 sts=4

View File

@@ -122,3 +122,5 @@ class FSTools_File
}
}
// vim: et sw=4 sts=4

View File

@@ -7,3 +7,5 @@
set_include_path(dirname(__FILE__) . PATH_SEPARATOR . get_include_path() );
require_once 'HTMLPurifierExtras.php';
require_once 'HTMLPurifierExtras.autoload.php';
// vim: et sw=4 sts=4

View File

@@ -21,3 +21,5 @@ if (function_exists('spl_autoload_register')) {
return HTMLPurifierExtras::autoload($class);
}
}
// vim: et sw=4 sts=4

View File

@@ -25,3 +25,5 @@ class HTMLPurifierExtras
}
}
// vim: et sw=4 sts=4

View File

@@ -28,3 +28,5 @@ the filesystem. It currently consists of two classes:
method imaginable one would need.
Check the files themselves for more information.
vim: et sw=4 sts=4

View File

@@ -7,3 +7,5 @@
set_include_path(dirname(__FILE__) . PATH_SEPARATOR . get_include_path() );
require_once 'HTMLPurifier/Bootstrap.php';
require_once 'HTMLPurifier.autoload.php';
// vim: et sw=4 sts=4

View File

@@ -17,3 +17,5 @@ if (function_exists('spl_autoload_register') && function_exists('spl_autoload_un
return HTMLPurifier_Bootstrap::autoload($class);
}
}
// vim: et sw=4 sts=4

View File

@@ -20,3 +20,4 @@ function HTMLPurifier($html, $config = null) {
return $purifier->purify($html, $config);
}
// vim: et sw=4 sts=4

View File

@@ -7,7 +7,7 @@
* primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS
* FILE, changes will be overwritten the next time the script is run.
*
* @version 3.2.0
* @version 3.3.0
*
* @warning
* You must *not* include any other HTML Purifier files before this file,
@@ -55,6 +55,8 @@ require 'HTMLPurifier/LanguageFactory.php';
require 'HTMLPurifier/Length.php';
require 'HTMLPurifier/Lexer.php';
require 'HTMLPurifier/PercentEncoder.php';
require 'HTMLPurifier/PropertyList.php';
require 'HTMLPurifier/PropertyListIterator.php';
require 'HTMLPurifier/Strategy.php';
require 'HTMLPurifier/StringHash.php';
require 'HTMLPurifier/StringHashParser.php';

View File

@@ -26,3 +26,5 @@ function kses($string, $allowed_html, $allowed_protocols = null) {
$purifier = new HTMLPurifier($config);
return $purifier->purify($string);
}
// vim: et sw=4 sts=4

View File

@@ -7,3 +7,5 @@
*/
set_include_path(dirname(__FILE__) . PATH_SEPARATOR . get_include_path() );
// vim: et sw=4 sts=4

View File

@@ -19,7 +19,7 @@
*/
/*
HTML Purifier 3.2.0 - Standards Compliant HTML Filtering
HTML Purifier 3.3.0 - Standards Compliant HTML Filtering
Copyright (C) 2006-2008 Edward Z. Yang
This library is free software; you can redistribute it and/or
@@ -55,10 +55,10 @@ class HTMLPurifier
{
/** Version of HTML Purifier */
public $version = '3.2.0';
public $version = '3.3.0';
/** Constant with version of HTML Purifier */
const VERSION = '3.2.0';
const VERSION = '3.3.0';
/** Global configuration object */
public $config;
@@ -232,3 +232,5 @@ class HTMLPurifier
}
}
// vim: et sw=4 sts=4

View File

@@ -49,6 +49,8 @@ require_once $__dir . '/HTMLPurifier/LanguageFactory.php';
require_once $__dir . '/HTMLPurifier/Length.php';
require_once $__dir . '/HTMLPurifier/Lexer.php';
require_once $__dir . '/HTMLPurifier/PercentEncoder.php';
require_once $__dir . '/HTMLPurifier/PropertyList.php';
require_once $__dir . '/HTMLPurifier/PropertyListIterator.php';
require_once $__dir . '/HTMLPurifier/Strategy.php';
require_once $__dir . '/HTMLPurifier/StringHash.php';
require_once $__dir . '/HTMLPurifier/StringHashParser.php';

View File

@@ -125,3 +125,4 @@ class HTMLPurifier_AttrCollections
}
// vim: et sw=4 sts=4

View File

@@ -84,3 +84,4 @@ abstract class HTMLPurifier_AttrDef
}
// vim: et sw=4 sts=4

View File

@@ -84,3 +84,4 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
}
// vim: et sw=4 sts=4

View File

@@ -17,3 +17,5 @@ class HTMLPurifier_AttrDef_CSS_AlphaValue extends HTMLPurifier_AttrDef_CSS_Numbe
}
}
// vim: et sw=4 sts=4

View File

@@ -84,3 +84,4 @@ class HTMLPurifier_AttrDef_CSS_Background extends HTMLPurifier_AttrDef
}
// vim: et sw=4 sts=4

View File

@@ -123,3 +123,4 @@ class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef
}
// vim: et sw=4 sts=4

View File

@@ -40,3 +40,4 @@ class HTMLPurifier_AttrDef_CSS_Border extends HTMLPurifier_AttrDef
}
// vim: et sw=4 sts=4

View File

@@ -75,3 +75,4 @@ class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
}
// vim: et sw=4 sts=4

View File

@@ -35,3 +35,4 @@ class HTMLPurifier_AttrDef_CSS_Composite extends HTMLPurifier_AttrDef
}
// vim: et sw=4 sts=4

View File

@@ -5,7 +5,7 @@
*/
class HTMLPurifier_AttrDef_CSS_DenyElementDecorator extends HTMLPurifier_AttrDef
{
protected $def, $element;
public $def, $element;
/**
* @param $def Definition to wrap
@@ -24,3 +24,5 @@ class HTMLPurifier_AttrDef_CSS_DenyElementDecorator extends HTMLPurifier_AttrDef
return $this->def->validate($string, $config, $context);
}
}
// vim: et sw=4 sts=4

View File

@@ -50,3 +50,5 @@ class HTMLPurifier_AttrDef_CSS_Filter extends HTMLPurifier_AttrDef
}
}
// vim: et sw=4 sts=4

View File

@@ -146,3 +146,4 @@ class HTMLPurifier_AttrDef_CSS_Font extends HTMLPurifier_AttrDef
}
// vim: et sw=4 sts=4

View File

@@ -87,3 +87,4 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
}
// vim: et sw=4 sts=4

View File

@@ -5,7 +5,7 @@
*/
class HTMLPurifier_AttrDef_CSS_ImportantDecorator extends HTMLPurifier_AttrDef
{
protected $def, $allow;
public $def, $allow;
/**
* @param $def Definition to wrap
@@ -36,3 +36,5 @@ class HTMLPurifier_AttrDef_CSS_ImportantDecorator extends HTMLPurifier_AttrDef
return $string;
}
}
// vim: et sw=4 sts=4

View File

@@ -44,3 +44,4 @@ class HTMLPurifier_AttrDef_CSS_Length extends HTMLPurifier_AttrDef
}
// vim: et sw=4 sts=4

View File

@@ -75,3 +75,4 @@ class HTMLPurifier_AttrDef_CSS_ListStyle extends HTMLPurifier_AttrDef
}
// vim: et sw=4 sts=4

View File

@@ -55,3 +55,4 @@ class HTMLPurifier_AttrDef_CSS_Multiple extends HTMLPurifier_AttrDef
}
// vim: et sw=4 sts=4

View File

@@ -66,3 +66,4 @@ class HTMLPurifier_AttrDef_CSS_Number extends HTMLPurifier_AttrDef
}
// vim: et sw=4 sts=4

View File

@@ -37,3 +37,4 @@ class HTMLPurifier_AttrDef_CSS_Percentage extends HTMLPurifier_AttrDef
}
// vim: et sw=4 sts=4

View File

@@ -35,3 +35,4 @@ class HTMLPurifier_AttrDef_CSS_TextDecoration extends HTMLPurifier_AttrDef
}
// vim: et sw=4 sts=4

View File

@@ -53,3 +53,4 @@ class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI
}
// vim: et sw=4 sts=4

Some files were not shown because too many files have changed in this diff Show More