mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-08-03 04:37:39 +02:00
Compare commits
54 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
280211f70b | ||
|
3fd51d527c | ||
|
0e6e2c4edf | ||
|
22d24e6b04 | ||
|
3a2fd0b5db | ||
|
25fa53c15b | ||
|
0b6ae1c3c1 | ||
|
ab263a0bf1 | ||
|
c5b18d345c | ||
|
d26418ca3a | ||
|
d304c5c976 | ||
|
f7bc0b0875 | ||
|
70515dd48f | ||
|
1555cb617f | ||
|
cd4500457e | ||
|
fa413e96ac | ||
|
d0fdcc103e | ||
|
6a06b92f0c | ||
|
3184fee468 | ||
|
ed7983b559 | ||
|
92df9e5b28 | ||
|
2f41bd07fa | ||
|
c6914dce51 | ||
|
9977350143 | ||
|
d9e60350d3 | ||
|
c807ed5fe2 | ||
|
c9b6f125aa | ||
|
dc28346677 | ||
|
8423daef05 | ||
|
617f70a8ac | ||
|
0423985b45 | ||
|
e013bc9126 | ||
|
1d90bb2397 | ||
|
03dabec2c0 | ||
|
85090520f1 | ||
|
3b6aa10592 | ||
|
3a4b92da81 | ||
|
0ec9731184 | ||
|
e05bd77344 | ||
|
334ffac5b4 | ||
|
a227cb483a | ||
|
aa0fdeee30 | ||
|
ba418a1f19 | ||
|
c845f0bb78 | ||
|
594268ca3b | ||
|
965be3bd73 | ||
|
700d5bcbfc | ||
|
fd384129bf | ||
|
f8b47c64dd | ||
|
a5ceb1e22a | ||
|
636e2883df | ||
|
dba3ed7770 | ||
|
de9869d942 | ||
|
cfcdce0db8 |
4
.gitignore
vendored
4
.gitignore
vendored
@@ -1,9 +1,13 @@
|
||||
conf/
|
||||
test-settings.php
|
||||
library/HTMLPurifier/DefinitionCache/Serializer/*/
|
||||
library/standalone/
|
||||
library/HTMLPurifier.standalone.php
|
||||
configdoc/*.html
|
||||
configdoc/configdoc.xml
|
||||
*.phpt.diff
|
||||
*.phpt.exp
|
||||
*.phpt.log
|
||||
*.phpt.out
|
||||
*.phpt.php
|
||||
*.phpt.skip.php
|
||||
|
2
Doxyfile
2
Doxyfile
@@ -31,7 +31,7 @@ PROJECT_NAME = HTMLPurifier
|
||||
# This could be handy for archiving the generated documentation or
|
||||
# if some version control system is used.
|
||||
|
||||
PROJECT_NUMBER = 3.1.1
|
||||
PROJECT_NUMBER = 3.2.0
|
||||
|
||||
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
|
||||
# base path where the generated documentation will be put.
|
||||
|
2
FOCUS
2
FOCUS
@@ -1,4 +1,4 @@
|
||||
9 - Major security fixes
|
||||
5 - Major feature enhancements
|
||||
|
||||
[ Appendix A: Release focus IDs ]
|
||||
0 - N/A
|
||||
|
71
NEWS
71
NEWS
@@ -9,11 +9,76 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
||||
. Internal change
|
||||
==========================
|
||||
|
||||
3.2.0, unknown release date
|
||||
|
||||
3.1.2, unknown release date
|
||||
3.2.0, released 2008-10-31
|
||||
# Using %Core.CollectErrors forces line number/column tracking on, whereas
|
||||
previously you could theoretically turn it off.
|
||||
# HTMLPurifier_Injector->notifyEnd() is formally deprecated. Please
|
||||
use handleEnd() instead.
|
||||
! %Output.AttrSort for when you need your attributes in alphabetical order to
|
||||
deal with a bug in FCKEditor. Requested by frank farmer.
|
||||
! Enable HTML comments when %HTML.Trusted is on. Requested by Waldo Jaquith.
|
||||
! Proper support for name attribute. It is now allowed and equivalent to the id
|
||||
attribute in a and img tags, and is only converted to id when %HTML.TidyLevel
|
||||
is heavy (for all doctypes).
|
||||
! %AutoFormat.RemoveEmpty to remove some empty tags from documents. Please don't
|
||||
use on hand-written HTML.
|
||||
! Add error-cases for unsupported elements in MakeWellFormed. This enables
|
||||
the strategy to be used, standalone, on untrusted input.
|
||||
! %Core.AggressivelyFixLt is on by default. This causes more sensible
|
||||
processing of left angled brackets in smileys and other whatnot.
|
||||
! Test scripts now have a 'type' parameter, which lets you say 'htmlpurifier',
|
||||
'phpt', 'vtest', etc. in order to only execute those tests. This supercedes
|
||||
the --only-phpt parameter, although for backwards-compatibility the flag
|
||||
will still work.
|
||||
! AutoParagraph auto-formatter will now preserve double-newlines upon output.
|
||||
Users who are not performing inbound filtering, this may seem a little
|
||||
useless, but as a bonus, the test suite and handling of edge cases is also
|
||||
improved.
|
||||
! Experimental implementation of forms for %HTML.Trusted
|
||||
! Track column numbers when maintain line numbers is on
|
||||
! Proprietary 'background' attribute on table-related elements converted into
|
||||
corresponding CSS. Thanks Fusemail for sponsoring this feature!
|
||||
! Add forward(), forwardUntilEndToken(), backward() and current() to Injector
|
||||
supertype.
|
||||
! HTMLPurifier_Injector->handleEnd() permits modification to end tokens. The
|
||||
time of operation varies slightly from notifyEnd() as *all* end tokens are
|
||||
processed by the injector before they are subject to the well-formedness rules.
|
||||
! %Attr.DefaultImageAlt allows overriding default behavior of setting alt to
|
||||
basename of image when not present.
|
||||
! %AutoFormat.DisplayLinkURI neuters <a> tags into plain text URLs.
|
||||
- Fix two bugs in %URI.MakeAbsolute; one involving empty paths in base URLs,
|
||||
the other involving an undefined $is_folder error.
|
||||
- Throw error when %Core.Encoding is set to a spurious value. Previously,
|
||||
this errored silently and returned false.
|
||||
- Redirected stderr to stdout for flush error output.
|
||||
- %URI.DisableExternal will now use the host in %URI.Base if %URI.Host is not
|
||||
available.
|
||||
- Do not re-munge URL if the output URL has the same host as the input URL.
|
||||
Requested by Chris.
|
||||
- Fix error in documentation regarding %Filter.ExtractStyleBlocks
|
||||
- Prevent <![CDATA[<body></body>]]> from triggering %Core.ConvertDocumentToFragment
|
||||
- Fix bug with inline elements in blockquotes conflicting with strict doctype
|
||||
- Detect if HTML support is disabled for DOM by checking for loadHTML() method.
|
||||
- Fix bug where dots and double-dots in absolute URLs without hostname were
|
||||
not collapsed by URIFilter_MakeAbsolute.
|
||||
- Fix bug with anonymous modules operating on SafeEmbed or SafeObject elements
|
||||
by reordering their addition.
|
||||
- Will now throw exception on many error conditions during lexer creation; also
|
||||
throw an exception when MaintainLineNumbers is true, but a non-tracksLineNumbers
|
||||
is being used.
|
||||
- Detect if domxml extension is loaded, and use DirectLEx accordingly.
|
||||
- Improve handling of big numbers with floating point arithmetic in UnitConverter.
|
||||
Reported by David Morton.
|
||||
. Strategy_MakeWellFormed now operates in-place, saving memory and allowing
|
||||
for more interesting filter-backtracking
|
||||
. New HTMLPurifier_Injector->rewind() functionality, allows injectors to rewind
|
||||
index to reprocess tokens.
|
||||
. StringHashParser now allows for multiline sections with "empty" content;
|
||||
previously the section would remain undefined.
|
||||
. Added --quick option to multitest.php, which tests only the most recent
|
||||
release for each series.
|
||||
. Added --distro option to multitest.php, which accepts either 'normal' or
|
||||
'standalone'. This supercedes --exclude-normal and --exclude-standalone
|
||||
|
||||
3.1.1, released 2008-06-19
|
||||
# %URI.Munge now, by default, does not munge resources (for example, <img src="">)
|
||||
|
15
TODO
15
TODO
@@ -14,25 +14,25 @@ afraid to cast your vote for the next feature to be implemented!
|
||||
- Investigate how early internal structures can be accessed; this would
|
||||
prevent structures from being parsed and serialized multiple times.
|
||||
- Built-in support for target="_blank" on all external links
|
||||
- Gitify the repository
|
||||
- Allow <a id="asdf" name="asdf">
|
||||
- Implement overflow CSS property (as per jlp09550)
|
||||
|
||||
FUTURE VERSIONS
|
||||
---------------
|
||||
|
||||
3.2 release [It's All About Trust] (floating)
|
||||
3.3 release [It's All About Trust] (floating)
|
||||
# Implement untrusted, dangerous elements/attributes
|
||||
- Forms are especially wanted
|
||||
# Implement IDREF support (harder than it seems, since you cannot have
|
||||
IDREFs to non-existent IDs)
|
||||
# Frameset XHTML 1.0 and HTML 4.01 doctypes
|
||||
- Implement <area>
|
||||
- Figure out how to simultaneously set %CSS.Trusted and %HTML.Trusted (?)
|
||||
|
||||
3.3 release [Error'ed]
|
||||
3.4 release [Error'ed]
|
||||
# Error logging for filtering/cleanup procedures
|
||||
- XSS-attempt detection--certain errors are flagged XSS-like
|
||||
|
||||
3.4 release [Do What I Mean, Not What I Say]
|
||||
3.5 release [Do What I Mean, Not What I Say]
|
||||
# Additional support for poorly written HTML
|
||||
- Microsoft Word HTML cleaning (i.e. MsoNormal, but research essential!)
|
||||
- Friendly strict handling of <address> (block -> <br>)
|
||||
@@ -43,7 +43,6 @@ FUTURE VERSIONS
|
||||
contents should be dropped or not (currently, there's code that could do
|
||||
something like this if it didn't drop the inner text too.)
|
||||
- Remove <span> tags that don't do anything (no attributes)
|
||||
- Remove empty inline tags<i></i>
|
||||
- Append something to duplicate IDs so they're still usable (impl. note: the
|
||||
dupe detector would also need to detect the suffix as well)
|
||||
- Externalize inline CSS to promote clean HTML, proposed by Sander Tekelenburg
|
||||
@@ -53,14 +52,12 @@ FUTURE VERSIONS
|
||||
AttrDef class). Probably will use CSSTidy class?
|
||||
# More control over allowed CSS properties using a modularization
|
||||
# HTML 5 support
|
||||
# IRI support
|
||||
# IRI support (this includes IDN)
|
||||
- Standardize token armor for all areas of processing
|
||||
- Convert RTL/LTR override characters to <bdo> tags, or vice versa on demand.
|
||||
Also, enable disabling of directionality
|
||||
|
||||
5.0 release [To XML and Beyond]
|
||||
- AllowedAttributes and ForbiddenAttributes step on the toes of XML by
|
||||
using periods; this needs to be changed.
|
||||
- Extended HTML capabilities based on namespacing and tag transforms (COMPLEX)
|
||||
- Hooks for adding custom processors to custom namespaced tags and
|
||||
attributes, offer default implementation
|
||||
|
14
WHATSNEW
14
WHATSNEW
@@ -1,8 +1,6 @@
|
||||
HTML Purifier 3.1.1 is a security and bugfix release. This release addresses
|
||||
two security vulnerabilities, both related to CSS, and one of which only
|
||||
applies to users using Shift_JIS as their output encoding. There is also
|
||||
a security improvement regarding the imagecrash attack. There is a backwards
|
||||
incompatible change in which resources are no longer munged
|
||||
by default; please enable using %URI.MungeResources. Besides this, there
|
||||
are numerous improvements to URI munging, esp. with the addition of
|
||||
%URI.MungeSecretKey, as well as an experimental %HTML.SafeObject and %HTML.SafeEmbed.
|
||||
HTML Purifier 3.2.0 is an amalgamation of new features and fixes that
|
||||
have accumulated over a four month period. Some notable features
|
||||
include %AutoFormat.RemoveEmpty, column tracking for tokens,
|
||||
%AutoFormat.DisplayLinkURI and %Attr.DefaultImageAlt. There were also
|
||||
major improvements to the test suite interface, error collection output
|
||||
and the auto-formatter framework.
|
||||
|
@@ -5,15 +5,15 @@
|
||||
<line>131</line>
|
||||
</file>
|
||||
<file name="HTMLPurifier/Lexer.php">
|
||||
<line>85</line>
|
||||
<line>81</line>
|
||||
</file>
|
||||
<file name="HTMLPurifier/Lexer/DirectLex.php">
|
||||
<line>50</line>
|
||||
<line>62</line>
|
||||
<line>327</line>
|
||||
<line>53</line>
|
||||
<line>73</line>
|
||||
<line>348</line>
|
||||
</file>
|
||||
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
|
||||
<line>44</line>
|
||||
<line>47</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="CSS.MaxImgLength">
|
||||
@@ -69,29 +69,18 @@
|
||||
<directive id="Core.Encoding">
|
||||
<file name="HTMLPurifier/Encoder.php">
|
||||
<line>267</line>
|
||||
<line>294</line>
|
||||
<line>300</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Test.ForceNoIconv">
|
||||
<file name="HTMLPurifier/Encoder.php">
|
||||
<line>272</line>
|
||||
<line>302</line>
|
||||
<line>308</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Core.EscapeNonASCIICharacters">
|
||||
<file name="HTMLPurifier/Encoder.php">
|
||||
<line>298</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Core.MaintainLineNumbers">
|
||||
<file name="HTMLPurifier/ErrorCollector.php">
|
||||
<line>81</line>
|
||||
</file>
|
||||
<file name="HTMLPurifier/Lexer.php">
|
||||
<line>82</line>
|
||||
</file>
|
||||
<file name="HTMLPurifier/Lexer/DirectLex.php">
|
||||
<line>45</line>
|
||||
<line>304</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Output.CommentScriptContents">
|
||||
@@ -151,41 +140,44 @@
|
||||
</directive>
|
||||
<directive id="HTML.Trusted">
|
||||
<file name="HTMLPurifier/HTMLModuleManager.php">
|
||||
<line>198</line>
|
||||
<line>202</line>
|
||||
</file>
|
||||
<file name="HTMLPurifier/Lexer.php">
|
||||
<line>238</line>
|
||||
<line>258</line>
|
||||
</file>
|
||||
<file name="HTMLPurifier/HTMLModule/Image.php">
|
||||
<line>27</line>
|
||||
</file>
|
||||
<file name="HTMLPurifier/Lexer/DirectLex.php">
|
||||
<line>34</line>
|
||||
<line>36</line>
|
||||
</file>
|
||||
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
|
||||
<line>23</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.AllowedModules">
|
||||
<file name="HTMLPurifier/HTMLModuleManager.php">
|
||||
<line>205</line>
|
||||
<line>209</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.CoreModules">
|
||||
<file name="HTMLPurifier/HTMLModuleManager.php">
|
||||
<line>206</line>
|
||||
<line>210</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.Proprietary">
|
||||
<file name="HTMLPurifier/HTMLModuleManager.php">
|
||||
<line>220</line>
|
||||
<line>221</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.SafeObject">
|
||||
<file name="HTMLPurifier/HTMLModuleManager.php">
|
||||
<line>225</line>
|
||||
<line>226</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.SafeEmbed">
|
||||
<file name="HTMLPurifier/HTMLModuleManager.php">
|
||||
<line>228</line>
|
||||
<line>229</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Attr.IDBlacklist">
|
||||
@@ -200,21 +192,26 @@
|
||||
</directive>
|
||||
<directive id="Core.LexerImpl">
|
||||
<file name="HTMLPurifier/Lexer.php">
|
||||
<line>70</line>
|
||||
<line>76</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Core.MaintainLineNumbers">
|
||||
<file name="HTMLPurifier/Lexer.php">
|
||||
<line>80</line>
|
||||
</file>
|
||||
<file name="HTMLPurifier/Lexer/DirectLex.php">
|
||||
<line>48</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Core.ConvertDocumentToFragment">
|
||||
<file name="HTMLPurifier/Lexer.php">
|
||||
<line>230</line>
|
||||
<line>267</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="URI.Host">
|
||||
<file name="HTMLPurifier/URIDefinition.php">
|
||||
<line>64</line>
|
||||
</file>
|
||||
<file name="HTMLPurifier/URIFilter/DisableExternal.php">
|
||||
<line>8</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="URI.Base">
|
||||
<file name="HTMLPurifier/URIDefinition.php">
|
||||
@@ -293,9 +290,14 @@
|
||||
<line>19</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Attr.DefaultImageAlt">
|
||||
<file name="HTMLPurifier/AttrTransform/ImgRequired.php">
|
||||
<line>25</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Attr.DefaultInvalidImageAlt">
|
||||
<file name="HTMLPurifier/AttrTransform/ImgRequired.php">
|
||||
<line>27</line>
|
||||
<line>32</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Core.EscapeInvalidChildren">
|
||||
@@ -361,12 +363,12 @@
|
||||
</directive>
|
||||
<directive id="Core.DirectLexLineNumberSyncInterval">
|
||||
<file name="HTMLPurifier/Lexer/DirectLex.php">
|
||||
<line>59</line>
|
||||
<line>70</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Core.EscapeInvalidTags">
|
||||
<file name="HTMLPurifier/Strategy/MakeWellFormed.php">
|
||||
<line>22</line>
|
||||
<line>45</line>
|
||||
</file>
|
||||
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
|
||||
<line>19</line>
|
||||
@@ -374,12 +376,12 @@
|
||||
</directive>
|
||||
<directive id="Core.RemoveScriptContents">
|
||||
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
|
||||
<line>22</line>
|
||||
<line>25</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Core.HiddenElements">
|
||||
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
|
||||
<line>23</line>
|
||||
<line>26</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="URI.HostBlacklist">
|
||||
|
@@ -213,6 +213,4 @@ the usual things required are:</p>
|
||||
|
||||
<p>See <code>HTMLPurifier/HTMLModule.php</code> for details.</p>
|
||||
|
||||
<div id="version">$Id$</div>
|
||||
|
||||
</body></html>
|
||||
|
@@ -239,15 +239,15 @@ Test.Example</pre>
|
||||
object; users have a little bit of leeway when setting configuration
|
||||
values (for example, a lookup value can be specified as a list;
|
||||
HTML Purifier will flip it as necessary.) These types are defined
|
||||
in <a href="http://htmlpurifier.org/svnroot/htmlpurifier/trunk/library/HTMLPurifier/VarParser.php">
|
||||
in <a href="http://repo.or.cz/w/htmlpurifier.git?a=blob;hb=HEAD;f=library/HTMLPurifier/VarParser.php">
|
||||
library/HTMLPurifier/VarParser.php</a>.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
For more information on what values are allowed, and how they are parsed,
|
||||
consult <a href="http://htmlpurifier.org/svnroot/htmlpurifier/trunk/library/HTMLPurifier/ConfigSchema/InterchangeBuilder.php">
|
||||
consult <a href="http://repo.or.cz/w/htmlpurifier.git?a=blob;hb=HEAD;f=library/HTMLPurifier/ConfigSchema/InterchangeBuilder.php">
|
||||
library/HTMLPurifier/ConfigSchema/InterchangeBuilder.php</a>, as well
|
||||
as <a href="http://htmlpurifier.org/svnroot/htmlpurifier/trunk/library/HTMLPurifier/ConfigSchema/Interchange/Directive.php">
|
||||
as <a href="http://repo.or.cz/w/htmlpurifier.git?a=blob;hb=HEAD;f=library/HTMLPurifier/ConfigSchema/Interchange/Directive.php">
|
||||
library/HTMLPurifier/ConfigSchema/Interchange/Directive.php</a> for
|
||||
the semantics of the parsed values.
|
||||
</p>
|
||||
@@ -272,7 +272,7 @@ Test.Example</pre>
|
||||
|
||||
<p>
|
||||
All directive files go through a rigorous validation process
|
||||
through <a href="http://htmlpurifier.org/svnroot/htmlpurifier/trunk/library/HTMLPurifier/ConfigSchema/">
|
||||
through <a href="http://repo.or.cz/w/htmlpurifier.git?a=blob;hb=HEAD;f=library/HTMLPurifier/ConfigSchema/Validator.php">
|
||||
library/HTMLPurifier/ConfigSchema/Validator.php</a>, as well
|
||||
as some basic checks during building. While
|
||||
listing every error out here is out-of-scope for this document, we
|
||||
@@ -339,7 +339,7 @@ Test.Example</pre>
|
||||
The most difficult part is translating the Interchange member variable (valueAliases)
|
||||
into a directive file key (VALUE-ALIASES), but there's a one-to-one
|
||||
correspondence currently. If the two formats diverge, any discrepancies
|
||||
will be described in <a href="http://htmlpurifier.org/svnroot/htmlpurifier/trunk/library/HTMLPurifier/ConfigSchema/InterchangeBuilder.php">
|
||||
will be described in <a href="http://repo.or.cz/w/htmlpurifier.git?a=blob;hb=HEAD;f=library/HTMLPurifier/ConfigSchema/InterchangeBuilder.php">
|
||||
library/HTMLPurifier/ConfigSchema/InterchangeBuilder.php</a>.
|
||||
</p>
|
||||
|
||||
@@ -369,8 +369,6 @@ Test.Example</pre>
|
||||
which <code>HTMLPurifier_Config</code> uses to validate its incoming
|
||||
data. There is also an XML serializer, which is used to build documentation.
|
||||
</p>
|
||||
|
||||
<div id="version">$Id$</div>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
@@ -62,6 +62,4 @@
|
||||
do.
|
||||
</p>
|
||||
|
||||
<div id="version">$Id$</div>
|
||||
|
||||
</body></html>
|
||||
|
@@ -77,6 +77,4 @@ help you find the correct functionality more quickly. Here they are:</p>
|
||||
|
||||
</dl>
|
||||
|
||||
<div id="version">$Id$</div>
|
||||
|
||||
</body></html>
|
||||
|
@@ -27,6 +27,4 @@ that itch, put it here!</p>
|
||||
<li>Parallelize strategies</li>
|
||||
</ul>
|
||||
|
||||
<div id="version">$Id$</div>
|
||||
|
||||
</body></html>
|
||||
|
@@ -303,6 +303,4 @@ Mozilla on inside and needs -moz-outline, no IE support.</td></tr>
|
||||
|
||||
</table>
|
||||
|
||||
<div id="version">$Id$</div>
|
||||
|
||||
</body></html>
|
||||
|
@@ -213,7 +213,7 @@ $def = $config->getHTMLDefinition(true);</pre>
|
||||
<pre>$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('HTML', 'DefinitionID', 'enduser-customize.html tutorial');
|
||||
$config->set('HTML', 'DefinitionRev', 1);
|
||||
<strong>$config->set('Core', 'DefinitionCache', null); // remove this later!</strong>
|
||||
<strong>$config->set('Cache', 'DefinitionImpl', null); // remove this later!</strong>
|
||||
$def = $config->getHTMLDefinition(true);</pre>
|
||||
|
||||
<p>
|
||||
@@ -269,7 +269,7 @@ $def = $config->getHTMLDefinition(true);</pre>
|
||||
<pre>$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('HTML', 'DefinitionID', 'enduser-customize.html tutorial');
|
||||
$config->set('HTML', 'DefinitionRev', 1);
|
||||
$config->set('Core', 'DefinitionCache', null); // remove this later!
|
||||
$config->set('Cache', 'DefinitionImpl', null); // remove this later!
|
||||
$def = $config->getHTMLDefinition(true);
|
||||
<strong>$def->addAttribute('a', 'target', 'Enum#_blank,_self,_target,_top');</strong></pre>
|
||||
|
||||
@@ -372,10 +372,10 @@ $def = $config->getHTMLDefinition(true);
|
||||
|
||||
<p>
|
||||
For a complete list, consult
|
||||
<a href="http://htmlpurifier.org/svnroot/htmlpurifier/trunk/library/HTMLPurifier/AttrTypes.php"><code>library/HTMLPurifier/AttrTypes.php</code></a>;
|
||||
<a href="http://repo.or.cz/w/htmlpurifier.git?a=blob;hb=HEAD;f=library/HTMLPurifier/AttrTypes.php"><code>library/HTMLPurifier/AttrTypes.php</code></a>;
|
||||
more information on attributes that accept parameters can be found on their
|
||||
respective includes in
|
||||
<a href="http://htmlpurifier.org/svnroot/htmlpurifier/trunk/library/HTMLPurifier/AttrDef/"><code>library/HTMLPurifier/AttrDef</code></a>.
|
||||
<a href="http://repo.or.cz/w/htmlpurifier.git?a=tree;hb=HEAD;f=library/HTMLPurifier/AttrDef"><code>library/HTMLPurifier/AttrDef</code></a>.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
@@ -387,7 +387,7 @@ $def = $config->getHTMLDefinition(true);
|
||||
<pre>$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('HTML', 'DefinitionID', 'enduser-customize.html tutorial');
|
||||
$config->set('HTML', 'DefinitionRev', 1);
|
||||
$config->set('Core', 'DefinitionCache', null); // remove this later!
|
||||
$config->set('Cache', 'DefinitionImpl', null); // remove this later!
|
||||
$def = $config->getHTMLDefinition(true);
|
||||
<strong>$def->addAttribute('a', 'target', new HTMLPurifier_AttrDef_Enum(
|
||||
array('_blank','_self','_target','_top')
|
||||
@@ -734,7 +734,7 @@ $def = $config->getHTMLDefinition(true);
|
||||
<pre>$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('HTML', 'DefinitionID', 'enduser-customize.html tutorial');
|
||||
$config->set('HTML', 'DefinitionRev', 1);
|
||||
$config->set('Core', 'DefinitionCache', null); // remove this later!
|
||||
$config->set('Cache', 'DefinitionImpl', null); // remove this later!
|
||||
$def = $config->getHTMLDefinition(true);
|
||||
$def->addAttribute('a', 'target', new HTMLPurifier_AttrDef_Enum(
|
||||
array('_blank','_self','_target','_top')
|
||||
@@ -764,7 +764,7 @@ $form->excludes = array('form' => true);</strong></pre>
|
||||
<p>
|
||||
And that's all there is to it! Implementing the rest of the form
|
||||
module is left as an exercise to the user; to see more examples
|
||||
check the <a href="http://htmlpurifier.org/svnroot/htmlpurifier/trunk/library/HTMLPurifier/HTMLModule/"><code>library/HTMLPurifier/HTMLModule/</code></a> directory
|
||||
check the <a href="http://repo.or.cz/w/htmlpurifier.git?a=tree;hb=HEAD;f=library/HTMLPurifier/HTMLModule"><code>library/HTMLPurifier/HTMLModule/</code></a> directory
|
||||
in your local HTML Purifier installation.
|
||||
</p>
|
||||
|
||||
@@ -789,10 +789,8 @@ $form->excludes = array('form' => true);</strong></pre>
|
||||
</p>
|
||||
|
||||
<ul>
|
||||
<li><a href="http://htmlpurifier.org/svnroot/htmlpurifier/trunk/library/HTMLPurifier/HTMLModule.php"><code>library/HTMLPurifier/HTMLModule.php</code></a></li>
|
||||
<li><a href="http://htmlpurifier.org/svnroot/htmlpurifier/trunk/library/HTMLPurifier/ElementDef.php"><code>library/HTMLPurifier/ElementDef.php</code></a></li>
|
||||
<li><a href="http://repo.or.cz/w/htmlpurifier.git?a=blob;hb=HEAD;f=library/HTMLPurifier/HTMLModule.php"><code>library/HTMLPurifier/HTMLModule.php</code></a></li>
|
||||
<li><a href="http://repo.or.cz/w/htmlpurifier.git?a=blob;hb=HEAD;f=library/HTMLPurifier/ElementDef.php"><code>library/HTMLPurifier/ElementDef.php</code></a></li>
|
||||
</ul>
|
||||
|
||||
<div id="version">$Id: enduser-tidy.html 1158 2007-06-18 19:26:29Z Edward $</div>
|
||||
|
||||
</body></html>
|
||||
|
@@ -141,7 +141,5 @@ anchors is beyond me.</p>
|
||||
|
||||
<p>Don't come crying to me when your page mysteriously stops validating, though.</p>
|
||||
|
||||
<div id="version">$Id$</div>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
@@ -225,6 +225,4 @@ and if that still doesn't satisfy your appetite, do some fine-tuning.
|
||||
Other than that, don't worry about it: this all works silently and
|
||||
effectively in the background.</p>
|
||||
|
||||
<div id="version">$Id$</div>
|
||||
|
||||
</body></html>
|
||||
|
@@ -205,12 +205,10 @@ $uri->registerFilter(new HTMLPurifier_URIFilter_<strong>NameOfFilter</strong>())
|
||||
|
||||
<p>
|
||||
Check the
|
||||
<a href="http://htmlpurifier.org/svnroot/htmlpurifier/trunk/library/HTMLPurifier/URIFilter/">URIFilter</a>
|
||||
directory for more implementation examples, and see <a href="http://htmlpurifier.org/svnroot/htmlpurifier/trunk/docs/proposal-new-directives.txt">the
|
||||
<a href="http://repo.or.cz/w/htmlpurifier.git?a=tree;hb=HEAD;f=library/HTMLPurifier/URIFilter">URIFilter</a>
|
||||
directory for more implementation examples, and see <a href="proposal-new-directives.txt">the
|
||||
new directives proposal document</a> for ideas on what could be implemented
|
||||
as a filter.
|
||||
</p>
|
||||
|
||||
<div id="version">$Id$</div>
|
||||
|
||||
</body></html>
|
||||
|
@@ -589,8 +589,10 @@ looks something like: <code>%C3%86</code>. There is no official way of
|
||||
determining the character encoding of such a request, since the percent
|
||||
encoding operates on a byte level, so it is usually assumed that it
|
||||
is the same as the encoding the page containing the form was submitted
|
||||
in. You'll run into very few problems if you only use characters in
|
||||
the character encoding you chose.</p>
|
||||
in. (<a href="http://tools.ietf.org/html/rfc3986#section-2.5">RFC 3986</a>
|
||||
recommends that textual identifiers be translated to UTF-8; however, browser
|
||||
compliance is spotty.) You'll run into very few problems
|
||||
if you only use characters in the character encoding you chose.</p>
|
||||
|
||||
<p>However, once you start adding characters outside of your encoding
|
||||
(and this is a lot more common than you may think: take curly
|
||||
|
@@ -70,7 +70,7 @@ into your documents. YouTube's code goes like this:</p>
|
||||
class="embed-youtube">AyPzM5WK8ys</span></code> your
|
||||
application can reconstruct the full object from this small snippet that
|
||||
passes through HTML Purifier <em>unharmed</em>.
|
||||
<a href="http://htmlpurifier.org/svnroot/htmlpurifier/trunk/library/HTMLPurifier/Filter/YouTube.php">Show me the code!</a></p>
|
||||
<a href="http://repo.or.cz/w/htmlpurifier.git?a=blob;hb=HEAD;f=library/HTMLPurifier/Filter/YouTube.php">Show me the code!</a></p>
|
||||
|
||||
<p>And the corresponding usage:</p>
|
||||
|
||||
|
@@ -98,8 +98,8 @@ the code. They may be upgraded to HTML files or stay as TXT scratchpads.</p>
|
||||
<table class="table">
|
||||
|
||||
<thead><tr>
|
||||
<th width="10%">Type</th>
|
||||
<th width="20%">Name</th>
|
||||
<th style="width:10%">Type</th>
|
||||
<th style="width:20%">Name</th>
|
||||
<th>Description</th>
|
||||
</tr></thead>
|
||||
|
||||
@@ -175,6 +175,5 @@ the code. They may be upgraded to HTML files or stay as TXT scratchpads.</p>
|
||||
|
||||
</table>
|
||||
|
||||
<div id="version">$Id$</div>
|
||||
</body>
|
||||
</html>
|
||||
|
@@ -42,7 +42,5 @@ into the mix.</li>
|
||||
something like that?</li>
|
||||
</ol>
|
||||
|
||||
<div id="version">$Id$</div>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
209
docs/proposal-errors.txt
Normal file
209
docs/proposal-errors.txt
Normal file
@@ -0,0 +1,209 @@
|
||||
Considerations for ErrorCollection
|
||||
|
||||
Presently, HTML Purifier takes a code-execution centric approach to handling
|
||||
errors. Errors are organized and grouped according to which segment of the
|
||||
code triggers them, not necessarily the portion of the input document that
|
||||
triggered the error. This means that errors are pseudo-sorted by category,
|
||||
rather than location in the document.
|
||||
|
||||
One easy way to "fix" this problem would be to re-sort according to line number.
|
||||
However, the "category" style information we derive from naively following
|
||||
program execution is still useful. After all, each of the strategies which
|
||||
can report errors still process the document mostly linearly. Furthermore,
|
||||
not only do they process linearly, but the way they pass off operations to
|
||||
sub-systems mirrors that of the document. For example, AttrValidator will
|
||||
linearly proceed through elements, and on each element will use AttrDef to
|
||||
validate those contents. From there, the attribute might have more
|
||||
sub-components, which have execution passed off accordingly.
|
||||
|
||||
In fact, each strategy handles a very specific class of "error."
|
||||
|
||||
RemoveForeignElements - element tokens
|
||||
MakeWellFormed - element token ordering
|
||||
FixNesting - element token ordering
|
||||
ValidateAttributes - attributes of elements
|
||||
|
||||
The crucial point is that while we care about the hierarchy governing these
|
||||
different errors, we *don't* care about any other information about what actually
|
||||
happens to the elements. This brings up another point: if HTML Purifier fixes
|
||||
something, this is not really a notice/warning/error; it's really a suggestion
|
||||
of a way to fix the aforementioned defects.
|
||||
|
||||
In short, the refactoring to take this into account kinda sucks.
|
||||
|
||||
Errors should not be recorded in order that they are reported. Instead, they
|
||||
should be bound to the line (and preferably element) in which they were found.
|
||||
This means we need some way to uniquely identify every element in the document,
|
||||
which doesn't presently exist. An easy way of adding this would be to track
|
||||
line columns. An important ramification of this is that we *must* use the
|
||||
DirectLex implementation.
|
||||
|
||||
1. Implement column numbers for DirectLex [DONE!]
|
||||
2. Disable error collection when not using DirectLex [DONE!]
|
||||
|
||||
Next, we need to re-orient all of the error declarations to place CurrentToken
|
||||
at utmost important. Since this is passed via Context, it's not always clear
|
||||
if that's available. ErrorCollector should complain HARD if it isn't available.
|
||||
There are some locations when we don't have a token available. These include:
|
||||
|
||||
* Lexing - this can actually have a row and column, but NOT correspond to
|
||||
a token
|
||||
* End of document errors - bump this to the end
|
||||
|
||||
Actually, we *don't* have to complain if CurrentToken isn't available; we just
|
||||
set it as a document-wide error. And actually, nothing needs to be done here.
|
||||
|
||||
Something interesting to consider is whether or not we care about the locations
|
||||
of attributes and CSS properties, i.e. the sub-objects that compose these things.
|
||||
In terms of consistency, at the very least attributes should have column/line
|
||||
numbers attached to them. However, this may be overkill, as attributes are
|
||||
uniquely identifiable. You could go even further, with CSS, but they are also
|
||||
uniquely identifiable.
|
||||
|
||||
Bottom-line is, however, this information must be available, in form of the
|
||||
CurrentAttribute and CurrentCssProperty (theoretical) context variables, and
|
||||
it must be used to organize the errors that the sub-processes may throw.
|
||||
There is also a hierarchy of sorts that may make merging this into one context
|
||||
variable more sense, if it hadn't been for HTML's reasonably rigid structure.
|
||||
A CSS property will never contain an HTML attribute. So we won't ever get
|
||||
recursive relations, and having multiple depths won't ever make sense. Leave
|
||||
this be.
|
||||
|
||||
We already have this information, and consequently, using start and end is
|
||||
*unnecessary*, so long as the context variables are set appropriately. We don't
|
||||
care if an error was thrown by an attribute transform or an attribute definition;
|
||||
to the end user these are the same (for a developer, they are different, but
|
||||
they're better off with a stack trace (which we should add support for) in such
|
||||
cases).
|
||||
|
||||
3. Remove start()/end() code. Don't get rid of recursion, though [DONE]
|
||||
4. Setup ErrorCollector to use context information to setup hierarchies.
|
||||
This may require a different internal format. Use objects if it gets
|
||||
complex. [DONE]
|
||||
|
||||
ASIDE
|
||||
More on this topic: since we are now binding errors to lines
|
||||
and columns, a particular error can have three relationships to that
|
||||
specific location:
|
||||
|
||||
1. The token at that location directly
|
||||
RemoveForeignElements
|
||||
AttrValidator (transforms)
|
||||
MakeWellFormed
|
||||
2. A "component" of that token (i.e. attribute)
|
||||
AttrValidator (removals)
|
||||
3. A modification to that node (i.e. contents from start to end
|
||||
token) as a whole
|
||||
FixNesting
|
||||
|
||||
This needs to be marked accordingly. In the presentation, it might
|
||||
make sense keep (3) separate, have (2) a sublist of (1). (1) can
|
||||
be a closing tag, in which case (3) makes no sense at all, OR it
|
||||
should be related with its opening tag (this may not necessarily
|
||||
be possible before MakeWellFormed is run).
|
||||
|
||||
So, the line and column counts as our identifier, so:
|
||||
|
||||
$errors[$line][$col] = ...
|
||||
|
||||
Then, we need to identify case 1, 2 or 3. They are identified as
|
||||
such:
|
||||
|
||||
1. Need some sort of semaphore in RemoveForeignElements, etc.
|
||||
2. If CurrentAttr/CurrentCssProperty is non-null
|
||||
3. Default (FixNesting, MakeWellFormed)
|
||||
|
||||
One consideration about (1) is that it usually is actually a
|
||||
(3) modification, but we have no way of knowing about that because
|
||||
of various optimizations. However, they can probably be treated
|
||||
the same. The other difficulty is that (3) is never a line and
|
||||
column; rather, it is a range (i.e. a duple) and telling the user
|
||||
the very start of the range may confuse them. For example,
|
||||
|
||||
<b>Foo<div>bar</div></b>
|
||||
^ ^
|
||||
|
||||
The node being operated on is <b>, so the error would be assigned
|
||||
to the first caret, with a "node reorganized" error. Then, the
|
||||
ChildDef would have submitted its own suggestions and errors with
|
||||
regard to what's going in the internals. So I suppose this is
|
||||
ok. :-)
|
||||
|
||||
Now, the structure of the earlier mentioned ... would be something
|
||||
like this:
|
||||
|
||||
object {
|
||||
type = (token|attr|property),
|
||||
value, // appropriate for type
|
||||
errors => array(),
|
||||
sub-errors = [recursive],
|
||||
}
|
||||
|
||||
This helps us keep things agnostic. It is also sufficiently complex
|
||||
enough to warrant an object.
|
||||
|
||||
So, more wanking about the object format is in order. The way HTML Purifier is
|
||||
currently setup, the only possible hierarchy is:
|
||||
|
||||
token -> attr -> css property
|
||||
|
||||
These relations do not exist all of the time; a comment or end token would not
|
||||
ever have any attributes, and non-style attributes would never have CSS properties
|
||||
associated with them.
|
||||
|
||||
I believe that it is worth supporting multiple paths. At some point, we might
|
||||
have a hierarchy like:
|
||||
|
||||
* -> syntax
|
||||
-> token -> attr -> css property
|
||||
-> url
|
||||
-> css stylesheet <style>
|
||||
|
||||
et cetera. Now, one of the practical implications of this is that every "node"
|
||||
on our tree is well-defined, so in theory it should be possible to either 1.
|
||||
create a separate class for each error struct, or 2. embed this information
|
||||
directly into HTML Purifier's token stream. Embedding the information in the
|
||||
token stream is not a terribly good idea, since tokens can be removed, etc.
|
||||
So that leaves us with 1... and if we use a generic interface we can cut down
|
||||
on a lot of code we might need. So let's leave it like this.
|
||||
|
||||
~~~~
|
||||
|
||||
Then we setup suggestions.
|
||||
|
||||
5. Setup a separate error class which tells the user any modifications
|
||||
HTML Purifier made.
|
||||
|
||||
Some information about this:
|
||||
|
||||
Our current paradigm is to tell the user what HTML Purifier did to the HTML.
|
||||
This is the most natural mode of operation, since that's what HTML Purifier
|
||||
is all about; it was not meant to be a validator.
|
||||
|
||||
However, most other people have experience dealing with a validator. In cases
|
||||
where HTML Purifier unambiguously does the right thing, simply giving the user
|
||||
the correct version isn't a bad idea, but problems arise when:
|
||||
|
||||
- The user has such bad HTML we do something odd, when we should have just
|
||||
flagged the HTML as an error. Such examples are when we do things like
|
||||
remove text from directly inside a <table> tag. It was probably meant to
|
||||
be in a <td> tag or be outside the table, but we're not smart enough to
|
||||
realize this so we just remove it. In such a case, we should tell the user
|
||||
that there was foreign data in the table, but then we shouldn't "demand"
|
||||
the user remove the data; it's more of a "here's a possible way of
|
||||
rectifying the problem"
|
||||
|
||||
- Giving line context for input is hard enough, but feasible; giving output
|
||||
line context will be extremely difficult due to shifting lines; we'd probably
|
||||
have to track what the tokens are and then find the appropriate out context
|
||||
and it's not guaranteed to work etc etc etc.
|
||||
|
||||
````````````
|
||||
|
||||
Don't forget to spruce up output.
|
||||
|
||||
6. Output needs to automatically give line and column numbers, basically
|
||||
"at line" on steroids. Look at W3C's output; it's ok. [PARTIALLY DONE]
|
||||
|
||||
- We need a standard CSS to apply (check demo.css for some starting
|
||||
styling; some buttons would also be hip)
|
@@ -40,6 +40,5 @@ the development of this library in these forum threads:</p>
|
||||
|
||||
<p>...as well as any I may have forgotten.</p>
|
||||
|
||||
<div id="version">$Id$</div>
|
||||
</body>
|
||||
</html>
|
||||
|
@@ -7,7 +7,7 @@
|
||||
* primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS
|
||||
* FILE, changes will be overwritten the next time the script is run.
|
||||
*
|
||||
* @version 3.1.1
|
||||
* @version 3.2.0
|
||||
*
|
||||
* @warning
|
||||
* You must *not* include any other HTML Purifier files before this file,
|
||||
@@ -41,6 +41,7 @@ require 'HTMLPurifier/Encoder.php';
|
||||
require 'HTMLPurifier/EntityLookup.php';
|
||||
require 'HTMLPurifier/EntityParser.php';
|
||||
require 'HTMLPurifier/ErrorCollector.php';
|
||||
require 'HTMLPurifier/ErrorStruct.php';
|
||||
require 'HTMLPurifier/Exception.php';
|
||||
require 'HTMLPurifier/Filter.php';
|
||||
require 'HTMLPurifier/Generator.php';
|
||||
@@ -108,6 +109,7 @@ require 'HTMLPurifier/AttrDef/URI/Host.php';
|
||||
require 'HTMLPurifier/AttrDef/URI/IPv4.php';
|
||||
require 'HTMLPurifier/AttrDef/URI/IPv6.php';
|
||||
require 'HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php';
|
||||
require 'HTMLPurifier/AttrTransform/Background.php';
|
||||
require 'HTMLPurifier/AttrTransform/BdoDir.php';
|
||||
require 'HTMLPurifier/AttrTransform/BgColor.php';
|
||||
require 'HTMLPurifier/AttrTransform/BoolToCSS.php';
|
||||
@@ -115,6 +117,7 @@ require 'HTMLPurifier/AttrTransform/Border.php';
|
||||
require 'HTMLPurifier/AttrTransform/EnumToCSS.php';
|
||||
require 'HTMLPurifier/AttrTransform/ImgRequired.php';
|
||||
require 'HTMLPurifier/AttrTransform/ImgSpace.php';
|
||||
require 'HTMLPurifier/AttrTransform/Input.php';
|
||||
require 'HTMLPurifier/AttrTransform/Lang.php';
|
||||
require 'HTMLPurifier/AttrTransform/Length.php';
|
||||
require 'HTMLPurifier/AttrTransform/Name.php';
|
||||
@@ -122,6 +125,7 @@ require 'HTMLPurifier/AttrTransform/SafeEmbed.php';
|
||||
require 'HTMLPurifier/AttrTransform/SafeObject.php';
|
||||
require 'HTMLPurifier/AttrTransform/SafeParam.php';
|
||||
require 'HTMLPurifier/AttrTransform/ScriptRequired.php';
|
||||
require 'HTMLPurifier/AttrTransform/Textarea.php';
|
||||
require 'HTMLPurifier/ChildDef/Chameleon.php';
|
||||
require 'HTMLPurifier/ChildDef/Custom.php';
|
||||
require 'HTMLPurifier/ChildDef/Empty.php';
|
||||
@@ -137,10 +141,12 @@ require 'HTMLPurifier/DefinitionCache/Decorator/Memory.php';
|
||||
require 'HTMLPurifier/HTMLModule/Bdo.php';
|
||||
require 'HTMLPurifier/HTMLModule/CommonAttributes.php';
|
||||
require 'HTMLPurifier/HTMLModule/Edit.php';
|
||||
require 'HTMLPurifier/HTMLModule/Forms.php';
|
||||
require 'HTMLPurifier/HTMLModule/Hypertext.php';
|
||||
require 'HTMLPurifier/HTMLModule/Image.php';
|
||||
require 'HTMLPurifier/HTMLModule/Legacy.php';
|
||||
require 'HTMLPurifier/HTMLModule/List.php';
|
||||
require 'HTMLPurifier/HTMLModule/Name.php';
|
||||
require 'HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php';
|
||||
require 'HTMLPurifier/HTMLModule/Object.php';
|
||||
require 'HTMLPurifier/HTMLModule/Presentation.php';
|
||||
@@ -155,14 +161,17 @@ require 'HTMLPurifier/HTMLModule/Target.php';
|
||||
require 'HTMLPurifier/HTMLModule/Text.php';
|
||||
require 'HTMLPurifier/HTMLModule/Tidy.php';
|
||||
require 'HTMLPurifier/HTMLModule/XMLCommonAttributes.php';
|
||||
require 'HTMLPurifier/HTMLModule/Tidy/Name.php';
|
||||
require 'HTMLPurifier/HTMLModule/Tidy/Proprietary.php';
|
||||
require 'HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php';
|
||||
require 'HTMLPurifier/HTMLModule/Tidy/Strict.php';
|
||||
require 'HTMLPurifier/HTMLModule/Tidy/Transitional.php';
|
||||
require 'HTMLPurifier/HTMLModule/Tidy/XHTML.php';
|
||||
require 'HTMLPurifier/Injector/AutoParagraph.php';
|
||||
require 'HTMLPurifier/Injector/DisplayLinkURI.php';
|
||||
require 'HTMLPurifier/Injector/Linkify.php';
|
||||
require 'HTMLPurifier/Injector/PurifierLinkify.php';
|
||||
require 'HTMLPurifier/Injector/RemoveEmpty.php';
|
||||
require 'HTMLPurifier/Injector/SafeObject.php';
|
||||
require 'HTMLPurifier/Lexer/DOMLex.php';
|
||||
require 'HTMLPurifier/Lexer/DirectLex.php';
|
||||
|
@@ -19,7 +19,7 @@
|
||||
*/
|
||||
|
||||
/*
|
||||
HTML Purifier 3.1.1 - Standards Compliant HTML Filtering
|
||||
HTML Purifier 3.2.0 - Standards Compliant HTML Filtering
|
||||
Copyright (C) 2006-2008 Edward Z. Yang
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
@@ -55,10 +55,10 @@ class HTMLPurifier
|
||||
{
|
||||
|
||||
/** Version of HTML Purifier */
|
||||
public $version = '3.1.1';
|
||||
public $version = '3.2.0';
|
||||
|
||||
/** Constant with version of HTML Purifier */
|
||||
const VERSION = '3.1.1';
|
||||
const VERSION = '3.2.0';
|
||||
|
||||
/** Global configuration object */
|
||||
public $config;
|
||||
|
@@ -35,6 +35,7 @@ require_once $__dir . '/HTMLPurifier/Encoder.php';
|
||||
require_once $__dir . '/HTMLPurifier/EntityLookup.php';
|
||||
require_once $__dir . '/HTMLPurifier/EntityParser.php';
|
||||
require_once $__dir . '/HTMLPurifier/ErrorCollector.php';
|
||||
require_once $__dir . '/HTMLPurifier/ErrorStruct.php';
|
||||
require_once $__dir . '/HTMLPurifier/Exception.php';
|
||||
require_once $__dir . '/HTMLPurifier/Filter.php';
|
||||
require_once $__dir . '/HTMLPurifier/Generator.php';
|
||||
@@ -102,6 +103,7 @@ require_once $__dir . '/HTMLPurifier/AttrDef/URI/Host.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/URI/IPv4.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/URI/IPv6.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/Background.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/BdoDir.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/BgColor.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/BoolToCSS.php';
|
||||
@@ -109,6 +111,7 @@ require_once $__dir . '/HTMLPurifier/AttrTransform/Border.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/EnumToCSS.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/ImgRequired.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/ImgSpace.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/Input.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/Lang.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/Length.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/Name.php';
|
||||
@@ -116,6 +119,7 @@ require_once $__dir . '/HTMLPurifier/AttrTransform/SafeEmbed.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/SafeObject.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/SafeParam.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/ScriptRequired.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/Textarea.php';
|
||||
require_once $__dir . '/HTMLPurifier/ChildDef/Chameleon.php';
|
||||
require_once $__dir . '/HTMLPurifier/ChildDef/Custom.php';
|
||||
require_once $__dir . '/HTMLPurifier/ChildDef/Empty.php';
|
||||
@@ -131,10 +135,12 @@ require_once $__dir . '/HTMLPurifier/DefinitionCache/Decorator/Memory.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Bdo.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/CommonAttributes.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Edit.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Forms.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Hypertext.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Image.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Legacy.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/List.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Name.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Object.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Presentation.php';
|
||||
@@ -149,14 +155,17 @@ require_once $__dir . '/HTMLPurifier/HTMLModule/Target.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Text.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/XMLCommonAttributes.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy/Name.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy/Proprietary.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy/Strict.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy/Transitional.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy/XHTML.php';
|
||||
require_once $__dir . '/HTMLPurifier/Injector/AutoParagraph.php';
|
||||
require_once $__dir . '/HTMLPurifier/Injector/DisplayLinkURI.php';
|
||||
require_once $__dir . '/HTMLPurifier/Injector/Linkify.php';
|
||||
require_once $__dir . '/HTMLPurifier/Injector/PurifierLinkify.php';
|
||||
require_once $__dir . '/HTMLPurifier/Injector/RemoveEmpty.php';
|
||||
require_once $__dir . '/HTMLPurifier/Injector/SafeObject.php';
|
||||
require_once $__dir . '/HTMLPurifier/Lexer/DOMLex.php';
|
||||
require_once $__dir . '/HTMLPurifier/Lexer/DirectLex.php';
|
||||
|
@@ -42,10 +42,7 @@ class HTMLPurifier_AttrDef_HTML_LinkTypes extends HTMLPurifier_AttrDef
|
||||
}
|
||||
|
||||
if (empty($ret_lookup)) return false;
|
||||
|
||||
$ret_array = array();
|
||||
foreach ($ret_lookup as $part => $bool) $ret_array[] = $part;
|
||||
$string = implode(' ', $ret_array);
|
||||
$string = implode(' ', array_keys($ret_lookup));
|
||||
|
||||
return $string;
|
||||
|
||||
|
22
library/HTMLPurifier/AttrTransform/Background.php
Normal file
22
library/HTMLPurifier/AttrTransform/Background.php
Normal file
@@ -0,0 +1,22 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Pre-transform that changes proprietary background attribute to CSS.
|
||||
*/
|
||||
class HTMLPurifier_AttrTransform_Background extends HTMLPurifier_AttrTransform {
|
||||
|
||||
public function transform($attr, $config, $context) {
|
||||
|
||||
if (!isset($attr['background'])) return $attr;
|
||||
|
||||
$background = $this->confiscateAttr($attr, 'background');
|
||||
// some validation should happen here
|
||||
|
||||
$this->prependCSS($attr, "background-image:url($background);");
|
||||
|
||||
return $attr;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -22,7 +22,12 @@ class HTMLPurifier_AttrTransform_ImgRequired extends HTMLPurifier_AttrTransform
|
||||
|
||||
if (!isset($attr['alt'])) {
|
||||
if ($src) {
|
||||
$attr['alt'] = basename($attr['src']);
|
||||
$alt = $config->get('Attr', 'DefaultImageAlt');
|
||||
if ($alt === null) {
|
||||
$attr['alt'] = basename($attr['src']);
|
||||
} else {
|
||||
$attr['alt'] = $alt;
|
||||
}
|
||||
} else {
|
||||
$attr['alt'] = $config->get('Attr', 'DefaultInvalidImageAlt');
|
||||
}
|
||||
|
39
library/HTMLPurifier/AttrTransform/Input.php
Normal file
39
library/HTMLPurifier/AttrTransform/Input.php
Normal file
@@ -0,0 +1,39 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Performs miscellaneous cross attribute validation and filtering for
|
||||
* input elements. This is meant to be a post-transform.
|
||||
*/
|
||||
class HTMLPurifier_AttrTransform_Input extends HTMLPurifier_AttrTransform {
|
||||
|
||||
protected $pixels;
|
||||
|
||||
public function __construct() {
|
||||
$this->pixels = new HTMLPurifier_AttrDef_HTML_Pixels();
|
||||
}
|
||||
|
||||
public function transform($attr, $config, $context) {
|
||||
if (!isset($attr['type'])) $t = 'text';
|
||||
else $t = strtolower($attr['type']);
|
||||
if (isset($attr['checked']) && $t !== 'radio' && $t !== 'checkbox') {
|
||||
unset($attr['checked']);
|
||||
}
|
||||
if (isset($attr['maxlength']) && $t !== 'text' && $t !== 'password') {
|
||||
unset($attr['maxlength']);
|
||||
}
|
||||
if (isset($attr['size']) && $t !== 'text' && $t !== 'password') {
|
||||
$result = $this->pixels->validate($attr['size'], $config, $context);
|
||||
if ($result === false) unset($attr['size']);
|
||||
else $attr['size'] = $result;
|
||||
}
|
||||
if (isset($attr['src']) && $t !== 'image') {
|
||||
unset($attr['src']);
|
||||
}
|
||||
if (!isset($attr['value']) && ($t === 'radio' || $t === 'checkbox')) {
|
||||
$attr['value'] = '';
|
||||
}
|
||||
return $attr;
|
||||
}
|
||||
|
||||
}
|
||||
|
16
library/HTMLPurifier/AttrTransform/Textarea.php
Normal file
16
library/HTMLPurifier/AttrTransform/Textarea.php
Normal file
@@ -0,0 +1,16 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Sets height/width defaults for <textarea>
|
||||
*/
|
||||
class HTMLPurifier_AttrTransform_Textarea extends HTMLPurifier_AttrTransform
|
||||
{
|
||||
|
||||
public function transform($attr, $config, $context) {
|
||||
// Calculated from Firefox
|
||||
if (!isset($attr['cols'])) $attr['cols'] = '22';
|
||||
if (!isset($attr['rows'])) $attr['rows'] = '3';
|
||||
return $attr;
|
||||
}
|
||||
|
||||
}
|
@@ -32,6 +32,9 @@ class HTMLPurifier_AttrTypes
|
||||
|
||||
// unimplemented aliases
|
||||
$this->info['ContentType'] = new HTMLPurifier_AttrDef_Text();
|
||||
$this->info['ContentTypes'] = new HTMLPurifier_AttrDef_Text();
|
||||
$this->info['Charsets'] = new HTMLPurifier_AttrDef_Text();
|
||||
$this->info['Character'] = new HTMLPurifier_AttrDef_Text();
|
||||
|
||||
// number is really a positive integer (one or more digits)
|
||||
// FIXME: ^^ not always, see start and value of list items
|
||||
|
@@ -35,8 +35,8 @@ class HTMLPurifier_AttrValidator
|
||||
if (!$current_token) $context->register('CurrentToken', $token);
|
||||
|
||||
if (
|
||||
!$token instanceof HTMLPurifier_Token_Start &&
|
||||
!$token instanceof HTMLPurifier_Token_Empty
|
||||
!$token instanceof HTMLPurifier_Token_Start &&
|
||||
!$token instanceof HTMLPurifier_Token_Empty
|
||||
) return $token;
|
||||
|
||||
// create alias to global definition array, see also $defs
|
||||
@@ -50,14 +50,18 @@ class HTMLPurifier_AttrValidator
|
||||
// nothing currently utilizes this
|
||||
foreach ($definition->info_attr_transform_pre as $transform) {
|
||||
$attr = $transform->transform($o = $attr, $config, $context);
|
||||
if ($e && ($attr != $o)) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
|
||||
if ($e) {
|
||||
if ($attr != $o) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
|
||||
}
|
||||
}
|
||||
|
||||
// do local transformations only applicable to this element (pre)
|
||||
// ex. <p align="right"> to <p style="text-align:right;">
|
||||
foreach ($definition->info[$token->name]->attr_transform_pre as $transform) {
|
||||
$attr = $transform->transform($o = $attr, $config, $context);
|
||||
if ($e && ($attr != $o)) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
|
||||
if ($e) {
|
||||
if ($attr != $o) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
|
||||
}
|
||||
}
|
||||
|
||||
// create alias to this element's attribute definition array, see
|
||||
@@ -114,6 +118,8 @@ class HTMLPurifier_AttrValidator
|
||||
|
||||
// simple substitution
|
||||
$attr[$attr_key] = $result;
|
||||
} else {
|
||||
// nothing happens
|
||||
}
|
||||
|
||||
// we'd also want slightly more complicated substitution
|
||||
@@ -130,13 +136,17 @@ class HTMLPurifier_AttrValidator
|
||||
// global (error reporting untested)
|
||||
foreach ($definition->info_attr_transform_post as $transform) {
|
||||
$attr = $transform->transform($o = $attr, $config, $context);
|
||||
if ($e && ($attr != $o)) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
|
||||
if ($e) {
|
||||
if ($attr != $o) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
|
||||
}
|
||||
}
|
||||
|
||||
// local (error reporting untested)
|
||||
foreach ($definition->info[$token->name]->attr_transform_post as $transform) {
|
||||
$attr = $transform->transform($o = $attr, $config, $context);
|
||||
if ($e && ($attr != $o)) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
|
||||
if ($e) {
|
||||
if ($attr != $o) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
|
||||
}
|
||||
}
|
||||
|
||||
$token->attr = $attr;
|
||||
|
@@ -24,6 +24,14 @@ abstract class HTMLPurifier_ChildDef
|
||||
*/
|
||||
public $elements = array();
|
||||
|
||||
/**
|
||||
* Get lookup of tag names that should not close this element automatically.
|
||||
* All other elements will do so.
|
||||
*/
|
||||
public function getNonAutoCloseElements($config) {
|
||||
return $this->elements;
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates nodes according to definition and returns modification.
|
||||
*
|
||||
|
@@ -5,8 +5,6 @@
|
||||
*
|
||||
* @warning Currently this class is an all or nothing proposition, that is,
|
||||
* it will only give a bool return value.
|
||||
* @note This class is currently not used by any code, although it is unit
|
||||
* tested.
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
|
||||
{
|
||||
|
@@ -10,16 +10,19 @@ class HTMLPurifier_ChildDef_StrictBlockquote extends HTMLPurifier_ChildDef_Requi
|
||||
public $allow_empty = true;
|
||||
public $type = 'strictblockquote';
|
||||
protected $init = false;
|
||||
|
||||
/**
|
||||
* @note We don't want MakeWellFormed to auto-close inline elements since
|
||||
* they might be allowed.
|
||||
*/
|
||||
public function getNonAutoCloseElements($config) {
|
||||
$this->init($config);
|
||||
return $this->fake_elements;
|
||||
}
|
||||
|
||||
public function validateChildren($tokens_of_children, $config, $context) {
|
||||
|
||||
$def = $config->getHTMLDefinition();
|
||||
if (!$this->init) {
|
||||
// allow all inline elements
|
||||
$this->real_elements = $this->elements;
|
||||
$this->fake_elements = $def->info_content_sets['Flow'];
|
||||
$this->fake_elements['#PCDATA'] = true;
|
||||
$this->init = true;
|
||||
}
|
||||
$this->init($config);
|
||||
|
||||
// trick the parent class into thinking it allows more
|
||||
$this->elements = $this->fake_elements;
|
||||
@@ -29,6 +32,7 @@ class HTMLPurifier_ChildDef_StrictBlockquote extends HTMLPurifier_ChildDef_Requi
|
||||
if ($result === false) return array();
|
||||
if ($result === true) $result = $tokens_of_children;
|
||||
|
||||
$def = $config->getHTMLDefinition();
|
||||
$block_wrap_start = new HTMLPurifier_Token_Start($def->info_block_wrapper);
|
||||
$block_wrap_end = new HTMLPurifier_Token_End( $def->info_block_wrapper);
|
||||
$is_inline = false;
|
||||
@@ -68,5 +72,16 @@ class HTMLPurifier_ChildDef_StrictBlockquote extends HTMLPurifier_ChildDef_Requi
|
||||
if ($is_inline) $ret[] = $block_wrap_end;
|
||||
return $ret;
|
||||
}
|
||||
|
||||
private function init($config) {
|
||||
if (!$this->init) {
|
||||
$def = $config->getHTMLDefinition();
|
||||
// allow all inline elements
|
||||
$this->real_elements = $this->elements;
|
||||
$this->fake_elements = $def->info_content_sets['Flow'];
|
||||
$this->fake_elements['#PCDATA'] = true;
|
||||
$this->init = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -20,7 +20,7 @@ class HTMLPurifier_Config
|
||||
/**
|
||||
* HTML Purifier's version
|
||||
*/
|
||||
public $version = '3.1.1';
|
||||
public $version = '3.2.0';
|
||||
|
||||
/**
|
||||
* Bool indicator whether or not to automatically finalize
|
||||
|
File diff suppressed because one or more lines are too long
@@ -0,0 +1,9 @@
|
||||
Attr.DefaultImageAlt
|
||||
TYPE: string/null
|
||||
DEFAULT: null
|
||||
--DESCRIPTION--
|
||||
This is the content of the alt tag of an image if the user had not
|
||||
previously specified an alt attribute. This applies to all images without
|
||||
a valid alt attribute, as opposed to %Attr.DefaultInvalidImageAlt, which
|
||||
only applies to invalid images, and overrides in the case of an invalid image.
|
||||
Default behavior with null is to use the basename of the src tag for the alt.
|
@@ -0,0 +1,10 @@
|
||||
AutoFormat.DisplayLinkURI
|
||||
TYPE: bool
|
||||
VERSION: 3.2.0
|
||||
DEFAULT: false
|
||||
--DESCRIPTION--
|
||||
<p>
|
||||
This directive turns on the in-text display of URIs in <a> tags, and disables
|
||||
those links. For example, <a href="http://example.com">example</a> becomes
|
||||
example (<a>http://example.com</a>).
|
||||
</p>
|
@@ -0,0 +1,44 @@
|
||||
AutoFormat.RemoveEmpty
|
||||
TYPE: bool
|
||||
VERSION: 3.2.0
|
||||
DEFAULT: false
|
||||
--DESCRIPTION--
|
||||
<p>
|
||||
When enabled, HTML Purifier will attempt to remove empty elements that
|
||||
contribute no semantic information to the document. The following types
|
||||
of nodes will be removed:
|
||||
</p>
|
||||
<ul><li>
|
||||
Tags with no attributes and no content, and that are not empty
|
||||
elements (remove <code><a></a></code> but not
|
||||
<code><br /></code>), and
|
||||
</li>
|
||||
<li>
|
||||
Tags with no content, except for:<ul>
|
||||
<li>The <code>colgroup</code> element, or</li>
|
||||
<li>
|
||||
Elements with the <code>id</code> or <code>name</code> attribute,
|
||||
when those attributes are permitted on those elements.
|
||||
</li>
|
||||
</ul></li>
|
||||
</ul>
|
||||
<p>
|
||||
Please be very careful when using this functionality; while it may not
|
||||
seem that empty elements contain useful information, they can alter the
|
||||
layout of a document given appropriate styling. This directive is most
|
||||
useful when you are processing machine-generated HTML, please avoid using
|
||||
it on regular user HTML.
|
||||
</p>
|
||||
<p>
|
||||
Elements that contain only whitespace will be treated as empty. Non-breaking
|
||||
spaces, however, do not count as whitespace.
|
||||
</p>
|
||||
<p>
|
||||
This algorithm is not perfect; you may still notice some empty tags,
|
||||
particularly if a node had elements, but those elements were later removed
|
||||
because they were not permitted in that context, or tags that, after
|
||||
being auto-closed by another tag, where empty. This is for safety reasons
|
||||
to prevent clever code from breaking validation. The general rule of thumb:
|
||||
if a tag looked empty on the way end, it will get removed; if HTML Purifier
|
||||
made it empty, it will stay.
|
||||
</p>
|
@@ -1,13 +1,17 @@
|
||||
Core.AggressivelyFixLt
|
||||
TYPE: bool
|
||||
VERSION: 2.1.0
|
||||
DEFAULT: false
|
||||
DEFAULT: true
|
||||
--DESCRIPTION--
|
||||
|
||||
This directive enables aggressive pre-filter fixes HTML Purifier can
|
||||
perform in order to ensure that open angled-brackets do not get killed
|
||||
during parsing stage. Enabling this will result in two preg_replace_callback
|
||||
calls and one preg_replace call for every bit of HTML passed through here.
|
||||
It is not necessary and will have no effect for PHP 4.
|
||||
|
||||
|
||||
<p>
|
||||
This directive enables aggressive pre-filter fixes HTML Purifier can
|
||||
perform in order to ensure that open angled-brackets do not get killed
|
||||
during parsing stage. Enabling this will result in two preg_replace_callback
|
||||
calls and at least two preg_replace calls for every HTML document parsed;
|
||||
if your users make very well-formed HTML, you can set this directive false.
|
||||
This has no effect when DirectLex is used.
|
||||
</p>
|
||||
<p>
|
||||
<strong>Notice:</strong> This directive's default turned from false to true
|
||||
in HTML Purifier 3.2.0.
|
||||
</p>
|
@@ -14,13 +14,49 @@ EXTERNAL: CSSTidy
|
||||
<p>
|
||||
Sample usage:
|
||||
</p>
|
||||
<pre><![CDATA[$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('Filter', 'ExtractStyleBlocks', true);
|
||||
$purifier = new HTMLPurifier($config);
|
||||
$styles = $purifier->context->get('StyleBlocks');
|
||||
foreach ($styles as $style) {
|
||||
echo '<style type="text/css">' . $style . "</style>\n";
|
||||
}]]></pre>
|
||||
<pre><![CDATA[
|
||||
<?php
|
||||
header('Content-type: text/html; charset=utf-8');
|
||||
echo '<?xml version="1.0" encoding="UTF-8"?>';
|
||||
?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
|
||||
<head>
|
||||
<title>Filter.ExtractStyleBlocks</title>
|
||||
<?php
|
||||
require_once '/path/to/library/HTMLPurifier.auto.php';
|
||||
require_once '/path/to/csstidy.class.php';
|
||||
|
||||
$dirty = '<style>body {color:#F00;}</style> Some text';
|
||||
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('Filter', 'ExtractStyleBlocks', true);
|
||||
$purifier = new HTMLPurifier($config);
|
||||
|
||||
$html = $purifier->purify($dirty);
|
||||
|
||||
// This implementation writes the stylesheets to the styles/ directory.
|
||||
// You can also echo the styles inside the document, but it's a bit
|
||||
// more difficult to make sure they get interpreted properly by
|
||||
// browsers; try the usual CSS armoring techniques.
|
||||
$styles = $purifier->context->get('StyleBlocks');
|
||||
$dir = 'styles/';
|
||||
if (!is_dir($dir)) mkdir($dir);
|
||||
$hash = sha1($_GET['html']);
|
||||
foreach ($styles as $i => $style) {
|
||||
file_put_contents($name = $dir . $hash . "_$i");
|
||||
echo '<link rel="stylesheet" type="text/css" href="'.$name.'" />';
|
||||
}
|
||||
?>
|
||||
</head>
|
||||
<body>
|
||||
<div>
|
||||
<?php echo $html; ?>
|
||||
</div>
|
||||
</b]]><![CDATA[ody>
|
||||
</html>
|
||||
]]></pre>
|
||||
<p>
|
||||
<strong>Warning:</strong> It is possible for a user to mount an
|
||||
imagecrash attack using this CSS. Counter-measures are difficult;
|
||||
|
@@ -1,6 +1,6 @@
|
||||
Output.SortAttr
|
||||
TYPE: bool
|
||||
VERSION: 3.1.2
|
||||
VERSION: 3.2.0
|
||||
DEFAULT: false
|
||||
--DESCRIPTION--
|
||||
<p>
|
||||
|
@@ -37,33 +37,35 @@ class HTMLPurifier_ContentSets
|
||||
// sorry, no way of overloading
|
||||
foreach ($modules as $module_i => $module) {
|
||||
foreach ($module->content_sets as $key => $value) {
|
||||
if (isset($this->info[$key])) {
|
||||
$temp = $this->convertToLookup($value);
|
||||
if (isset($this->lookup[$key])) {
|
||||
// add it into the existing content set
|
||||
$this->info[$key] = $this->info[$key] . ' | ' . $value;
|
||||
$this->lookup[$key] = array_merge($this->lookup[$key], $temp);
|
||||
} else {
|
||||
$this->info[$key] = $value;
|
||||
$this->lookup[$key] = $temp;
|
||||
}
|
||||
}
|
||||
}
|
||||
// perform content_set expansions
|
||||
$this->keys = array_keys($this->info);
|
||||
foreach ($this->info as $i => $set) {
|
||||
// only performed once, so infinite recursion is not
|
||||
// a problem
|
||||
$this->info[$i] =
|
||||
str_replace(
|
||||
$this->keys,
|
||||
// must be recalculated each time due to
|
||||
// changing substitutions
|
||||
array_values($this->info),
|
||||
$set);
|
||||
$old_lookup = false;
|
||||
while ($old_lookup !== $this->lookup) {
|
||||
$old_lookup = $this->lookup;
|
||||
foreach ($this->lookup as $i => $set) {
|
||||
$add = array();
|
||||
foreach ($set as $element => $x) {
|
||||
if (isset($this->lookup[$element])) {
|
||||
$add += $this->lookup[$element];
|
||||
unset($this->lookup[$i][$element]);
|
||||
}
|
||||
}
|
||||
$this->lookup[$i] += $add;
|
||||
}
|
||||
}
|
||||
$this->values = array_values($this->info);
|
||||
|
||||
// generate lookup tables
|
||||
foreach ($this->info as $name => $set) {
|
||||
$this->lookup[$name] = $this->convertToLookup($set);
|
||||
foreach ($this->lookup as $key => $lookup) {
|
||||
$this->info[$key] = implode(' | ', array_keys($lookup));
|
||||
}
|
||||
$this->keys = array_keys($this->info);
|
||||
$this->values = array_values($this->info);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -75,12 +77,22 @@ class HTMLPurifier_ContentSets
|
||||
if (!empty($def->child)) return; // already done!
|
||||
$content_model = $def->content_model;
|
||||
if (is_string($content_model)) {
|
||||
$def->content_model = str_replace(
|
||||
$this->keys, $this->values, $content_model);
|
||||
// Assume that $this->keys is alphanumeric
|
||||
$def->content_model = preg_replace_callback(
|
||||
'/\b(' . implode('|', $this->keys) . ')\b/',
|
||||
array($this, 'generateChildDefCallback'),
|
||||
$content_model
|
||||
);
|
||||
//$def->content_model = str_replace(
|
||||
// $this->keys, $this->values, $content_model);
|
||||
}
|
||||
$def->child = $this->getChildDef($def, $module);
|
||||
}
|
||||
|
||||
public function generateChildDefCallback($matches) {
|
||||
return $this->info[$matches[0]];
|
||||
}
|
||||
|
||||
/**
|
||||
* Instantiates a ChildDef based on content_model and content_model_type
|
||||
* member variables in HTMLPurifier_ElementDef
|
||||
|
@@ -271,6 +271,12 @@ class HTMLPurifier_Encoder
|
||||
set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
|
||||
if ($iconv && !$config->get('Test', 'ForceNoIconv')) {
|
||||
$str = iconv($encoding, 'utf-8//IGNORE', $str);
|
||||
if ($str === false) {
|
||||
// $encoding is not a valid encoding
|
||||
restore_error_handler();
|
||||
trigger_error('Invalid encoding ' . $encoding, E_USER_ERROR);
|
||||
return '';
|
||||
}
|
||||
// If the string is bjorked by Shift_JIS or a similar encoding
|
||||
// that doesn't support all of ASCII, convert the naughty
|
||||
// characters to their true byte-wise ASCII/UTF-8 equivalents.
|
||||
@@ -282,7 +288,7 @@ class HTMLPurifier_Encoder
|
||||
restore_error_handler();
|
||||
return $str;
|
||||
}
|
||||
trigger_error('Encoding not supported', E_USER_ERROR);
|
||||
trigger_error('Encoding not supported, please install iconv', E_USER_ERROR);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@@ -7,22 +7,37 @@
|
||||
class HTMLPurifier_ErrorCollector
|
||||
{
|
||||
|
||||
protected $errors = array();
|
||||
/**
|
||||
* Identifiers for the returned error array. These are purposely numeric
|
||||
* so list() can be used.
|
||||
*/
|
||||
const LINENO = 0;
|
||||
const SEVERITY = 1;
|
||||
const MESSAGE = 2;
|
||||
const CHILDREN = 3;
|
||||
|
||||
protected $errors;
|
||||
protected $_current;
|
||||
protected $_stacks = array(array());
|
||||
protected $locale;
|
||||
protected $generator;
|
||||
protected $context;
|
||||
|
||||
protected $lines = array();
|
||||
|
||||
public function __construct($context) {
|
||||
$this->locale =& $context->get('Locale');
|
||||
$this->generator =& $context->get('Generator');
|
||||
$this->context = $context;
|
||||
$this->_current =& $this->_stacks[0];
|
||||
$this->errors =& $this->_stacks[0];
|
||||
}
|
||||
|
||||
/**
|
||||
* Sends an error message to the collector for later use
|
||||
* @param $line Integer line number, or HTMLPurifier_Token that caused error
|
||||
* @param $severity int Error severity, PHP error style (don't use E_USER_)
|
||||
* @param $msg string Error message text
|
||||
* @param $subst1 string First substitution for $msg
|
||||
* @param $subst2 string ...
|
||||
*/
|
||||
public function send($severity, $msg) {
|
||||
|
||||
@@ -35,6 +50,7 @@ class HTMLPurifier_ErrorCollector
|
||||
|
||||
$token = $this->context->get('CurrentToken', true);
|
||||
$line = $token ? $token->line : $this->context->get('CurrentLine', true);
|
||||
$col = $token ? $token->col : $this->context->get('CurrentCol', true);
|
||||
$attr = $this->context->get('CurrentAttr', true);
|
||||
|
||||
// perform special substitutions, also add custom parameters
|
||||
@@ -55,13 +71,66 @@ class HTMLPurifier_ErrorCollector
|
||||
|
||||
if (!empty($subst)) $msg = strtr($msg, $subst);
|
||||
|
||||
$this->errors[] = array($line, $severity, $msg);
|
||||
// (numerically indexed)
|
||||
$error = array(
|
||||
self::LINENO => $line,
|
||||
self::SEVERITY => $severity,
|
||||
self::MESSAGE => $msg,
|
||||
self::CHILDREN => array()
|
||||
);
|
||||
$this->_current[] = $error;
|
||||
|
||||
|
||||
// NEW CODE BELOW ...
|
||||
|
||||
$struct = null;
|
||||
// Top-level errors are either:
|
||||
// TOKEN type, if $value is set appropriately, or
|
||||
// "syntax" type, if $value is null
|
||||
$new_struct = new HTMLPurifier_ErrorStruct();
|
||||
$new_struct->type = HTMLPurifier_ErrorStruct::TOKEN;
|
||||
if ($token) $new_struct->value = clone $token;
|
||||
if (is_int($line) && is_int($col)) {
|
||||
if (isset($this->lines[$line][$col])) {
|
||||
$struct = $this->lines[$line][$col];
|
||||
} else {
|
||||
$struct = $this->lines[$line][$col] = $new_struct;
|
||||
}
|
||||
// These ksorts may present a performance problem
|
||||
ksort($this->lines[$line], SORT_NUMERIC);
|
||||
} else {
|
||||
if (isset($this->lines[-1])) {
|
||||
$struct = $this->lines[-1];
|
||||
} else {
|
||||
$struct = $this->lines[-1] = $new_struct;
|
||||
}
|
||||
}
|
||||
ksort($this->lines, SORT_NUMERIC);
|
||||
|
||||
// Now, check if we need to operate on a lower structure
|
||||
if (!empty($attr)) {
|
||||
$struct = $struct->getChild(HTMLPurifier_ErrorStruct::ATTR, $attr);
|
||||
if (!$struct->value) {
|
||||
$struct->value = array($attr, 'PUT VALUE HERE');
|
||||
}
|
||||
}
|
||||
if (!empty($cssprop)) {
|
||||
$struct = $struct->getChild(HTMLPurifier_ErrorStruct::CSSPROP, $cssprop);
|
||||
if (!$struct->value) {
|
||||
// if we tokenize CSS this might be a little more difficult to do
|
||||
$struct->value = array($cssprop, 'PUT VALUE HERE');
|
||||
}
|
||||
}
|
||||
|
||||
// Ok, structs are all setup, now time to register the error
|
||||
$struct->addError($severity, $msg);
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves raw error data for custom formatter to use
|
||||
* @param List of arrays in format of array(Error message text,
|
||||
* token that caused error, tokens surrounding token)
|
||||
* @param List of arrays in format of array(line of error,
|
||||
* error severity, error message,
|
||||
* recursive sub-errors array)
|
||||
*/
|
||||
public function getRaw() {
|
||||
return $this->errors;
|
||||
@@ -70,38 +139,25 @@ class HTMLPurifier_ErrorCollector
|
||||
/**
|
||||
* Default HTML formatting implementation for error messages
|
||||
* @param $config Configuration array, vital for HTML output nature
|
||||
* @param $errors Errors array to display; used for recursion.
|
||||
*/
|
||||
public function getHTMLFormatted($config) {
|
||||
public function getHTMLFormatted($config, $errors = null) {
|
||||
$ret = array();
|
||||
|
||||
$errors = $this->errors;
|
||||
$this->generator = new HTMLPurifier_Generator($config, $this->context);
|
||||
if ($errors === null) $errors = $this->errors;
|
||||
|
||||
// sort error array by line
|
||||
// line numbers are enabled if they aren't explicitly disabled
|
||||
if ($config->get('Core', 'MaintainLineNumbers') !== false) {
|
||||
$has_line = array();
|
||||
$lines = array();
|
||||
$original_order = array();
|
||||
foreach ($errors as $i => $error) {
|
||||
$has_line[] = (int) (bool) $error[0];
|
||||
$lines[] = $error[0];
|
||||
$original_order[] = $i;
|
||||
// 'At line' message needs to be removed
|
||||
|
||||
// generation code for new structure goes here. It needs to be recursive.
|
||||
foreach ($this->lines as $line => $col_array) {
|
||||
if ($line == -1) continue;
|
||||
foreach ($col_array as $col => $struct) {
|
||||
$this->_renderStruct($ret, $struct, $line, $col);
|
||||
}
|
||||
array_multisort($has_line, SORT_DESC, $lines, SORT_ASC, $original_order, SORT_ASC, $errors);
|
||||
}
|
||||
|
||||
foreach ($errors as $error) {
|
||||
list($line, $severity, $msg) = $error;
|
||||
$string = '';
|
||||
$string .= '<strong>' . $this->locale->getErrorName($severity) . '</strong>: ';
|
||||
$string .= $this->generator->escape($msg);
|
||||
if ($line) {
|
||||
// have javascript link generation that causes
|
||||
// textarea to skip to the specified line
|
||||
$string .= $this->locale->formatMessage(
|
||||
'ErrorCollector: At line', array('line' => $line));
|
||||
}
|
||||
$ret[] = $string;
|
||||
if (isset($this->lines[-1])) {
|
||||
$this->_renderStruct($ret, $this->lines[-1]);
|
||||
}
|
||||
|
||||
if (empty($errors)) {
|
||||
@@ -112,5 +168,41 @@ class HTMLPurifier_ErrorCollector
|
||||
|
||||
}
|
||||
|
||||
private function _renderStruct(&$ret, $struct, $line = null, $col = null) {
|
||||
$stack = array($struct);
|
||||
$context_stack = array(array());
|
||||
while ($current = array_pop($stack)) {
|
||||
$context = array_pop($context_stack);
|
||||
foreach ($current->errors as $error) {
|
||||
list($severity, $msg) = $error;
|
||||
$string = '';
|
||||
$string .= '<div>';
|
||||
// W3C uses an icon to indicate the severity of the error.
|
||||
$error = $this->locale->getErrorName($severity);
|
||||
$string .= "<span class=\"error e$severity\"><strong>$error</strong></span> ";
|
||||
if (!is_null($line) && !is_null($col)) {
|
||||
$string .= "<em class=\"location\">Line $line, Column $col: </em> ";
|
||||
} else {
|
||||
$string .= '<em class="location">End of Document: </em> ';
|
||||
}
|
||||
$string .= '<strong class="description">' . $this->generator->escape($msg) . '</strong> ';
|
||||
$string .= '</div>';
|
||||
// Here, have a marker for the character on the column appropriate.
|
||||
// Be sure to clip extremely long lines.
|
||||
//$string .= '<pre>';
|
||||
//$string .= '';
|
||||
//$string .= '</pre>';
|
||||
$ret[] = $string;
|
||||
}
|
||||
foreach ($current->children as $type => $array) {
|
||||
$context[] = $current;
|
||||
$stack = array_merge($stack, array_reverse($array, true));
|
||||
for ($i = count($array); $i > 0; $i--) {
|
||||
$context_stack[] = $context;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
58
library/HTMLPurifier/ErrorStruct.php
Normal file
58
library/HTMLPurifier/ErrorStruct.php
Normal file
@@ -0,0 +1,58 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Records errors for particular segments of an HTML document such as tokens,
|
||||
* attributes or CSS properties. They can contain error structs (which apply
|
||||
* to components of what they represent), but their main purpose is to hold
|
||||
* errors applying to whatever struct is being used.
|
||||
*/
|
||||
class HTMLPurifier_ErrorStruct
|
||||
{
|
||||
|
||||
/**
|
||||
* Possible values for $children first-key. Note that top-level structures
|
||||
* are automatically token-level.
|
||||
*/
|
||||
const TOKEN = 0;
|
||||
const ATTR = 1;
|
||||
const CSSPROP = 2;
|
||||
|
||||
/**
|
||||
* Type of this struct.
|
||||
*/
|
||||
public $type;
|
||||
|
||||
/**
|
||||
* Value of the struct we are recording errors for. There are various
|
||||
* values for this:
|
||||
* - TOKEN: Instance of HTMLPurifier_Token
|
||||
* - ATTR: array('attr-name', 'value')
|
||||
* - CSSPROP: array('prop-name', 'value')
|
||||
*/
|
||||
public $value;
|
||||
|
||||
/**
|
||||
* Errors registered for this structure.
|
||||
*/
|
||||
public $errors = array();
|
||||
|
||||
/**
|
||||
* Child ErrorStructs that are from this structure. For example, a TOKEN
|
||||
* ErrorStruct would contain ATTR ErrorStructs. This is a multi-dimensional
|
||||
* array in structure: [TYPE]['identifier']
|
||||
*/
|
||||
public $children = array();
|
||||
|
||||
public function getChild($type, $id) {
|
||||
if (!isset($this->children[$type][$id])) {
|
||||
$this->children[$type][$id] = new HTMLPurifier_ErrorStruct();
|
||||
$this->children[$type][$id]->type = $type;
|
||||
}
|
||||
return $this->children[$type][$id];
|
||||
}
|
||||
|
||||
public function addError($severity, $message) {
|
||||
$this->errors[] = array($severity, $message);
|
||||
}
|
||||
|
||||
}
|
117
library/HTMLPurifier/HTMLModule/Forms.php
Normal file
117
library/HTMLPurifier/HTMLModule/Forms.php
Normal file
@@ -0,0 +1,117 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* XHTML 1.1 Forms module, defines all form-related elements found in HTML 4.
|
||||
*/
|
||||
class HTMLPurifier_HTMLModule_Forms extends HTMLPurifier_HTMLModule
|
||||
{
|
||||
public $name = 'Forms';
|
||||
public $safe = false;
|
||||
|
||||
public $content_sets = array(
|
||||
'Block' => 'Form',
|
||||
'Inline' => 'Formctrl',
|
||||
);
|
||||
|
||||
public function setup($config) {
|
||||
$form = $this->addElement('form', 'Form',
|
||||
'Required: Heading | List | Block | fieldset', 'Common', array(
|
||||
'accept' => 'ContentTypes',
|
||||
'accept-charset' => 'Charsets',
|
||||
'action*' => 'URI',
|
||||
'method' => 'Enum#get,post',
|
||||
// really ContentType, but these two are the only ones used today
|
||||
'enctype' => 'Enum#application/x-www-form-urlencoded,multipart/form-data',
|
||||
));
|
||||
$form->excludes = array('form' => true);
|
||||
|
||||
$input = $this->addElement('input', 'Formctrl', 'Empty', 'Common', array(
|
||||
'accept' => 'ContentTypes',
|
||||
'accesskey' => 'Character',
|
||||
'alt' => 'Text',
|
||||
'checked' => 'Bool#checked',
|
||||
'disabled' => 'Bool#disabled',
|
||||
'maxlength' => 'Number',
|
||||
'name' => 'CDATA',
|
||||
'readonly' => 'Bool#readonly',
|
||||
'size' => 'Number',
|
||||
'src' => 'URI#embeds',
|
||||
'tabindex' => 'Number',
|
||||
'type' => 'Enum#text,password,checkbox,button,radio,submit,reset,file,hidden,image',
|
||||
'value' => 'CDATA',
|
||||
));
|
||||
$input->attr_transform_post[] = new HTMLPurifier_AttrTransform_Input();
|
||||
|
||||
$this->addElement('select', 'Formctrl', 'Required: optgroup | option', 'Common', array(
|
||||
'disabled' => 'Bool#disabled',
|
||||
'multiple' => 'Bool#multiple',
|
||||
'name' => 'CDATA',
|
||||
'size' => 'Number',
|
||||
'tabindex' => 'Number',
|
||||
));
|
||||
|
||||
$this->addElement('option', false, 'Optional: #PCDATA', 'Common', array(
|
||||
'disabled' => 'Bool#disabled',
|
||||
'label' => 'Text',
|
||||
'selected' => 'Bool#selected',
|
||||
'value' => 'CDATA',
|
||||
));
|
||||
// It's illegal for there to be more than one selected, but not
|
||||
// be multiple. Also, no selected means undefined behavior. This might
|
||||
// be difficult to implement; perhaps an injector, or a context variable.
|
||||
|
||||
$textarea = $this->addElement('textarea', 'Formctrl', 'Optional: #PCDATA', 'Common', array(
|
||||
'accesskey' => 'Character',
|
||||
'cols*' => 'Number',
|
||||
'disabled' => 'Bool#disabled',
|
||||
'name' => 'CDATA',
|
||||
'readonly' => 'Bool#readonly',
|
||||
'rows*' => 'Number',
|
||||
'tabindex' => 'Number',
|
||||
));
|
||||
$textarea->attr_transform_pre[] = new HTMLPurifier_AttrTransform_Textarea();
|
||||
|
||||
$button = $this->addElement('button', 'Formctrl', 'Optional: #PCDATA | Heading | List | Block | Inline', 'Common', array(
|
||||
'accesskey' => 'Character',
|
||||
'disabled' => 'Bool#disabled',
|
||||
'name' => 'CDATA',
|
||||
'tabindex' => 'Number',
|
||||
'type' => 'Enum#button,submit,reset',
|
||||
'value' => 'CDATA',
|
||||
));
|
||||
|
||||
// For exclusions, ideally we'd specify content sets, not literal elements
|
||||
$button->excludes = $this->makeLookup(
|
||||
'form', 'fieldset', // Form
|
||||
'input', 'select', 'textarea', 'label', 'button', // Formctrl
|
||||
'a' // as per HTML 4.01 spec, this is omitted by modularization
|
||||
);
|
||||
|
||||
// Extra exclusion: img usemap="" is not permitted within this element.
|
||||
// We'll omit this for now, since we don't have any good way of
|
||||
// indicating it yet.
|
||||
|
||||
// This is HIGHLY user-unfriendly; we need a custom child-def for this
|
||||
$this->addElement('fieldset', 'Form', 'Custom: (#WS?,legend,(Flow|#PCDATA)*)', 'Common');
|
||||
|
||||
$label = $this->addElement('label', 'Formctrl', 'Optional: #PCDATA | Inline', 'Common', array(
|
||||
'accesskey' => 'Character',
|
||||
// 'for' => 'IDREF', // IDREF not implemented, cannot allow
|
||||
));
|
||||
$label->excludes = array('label' => true);
|
||||
|
||||
$this->addElement('legend', false, 'Optional: #PCDATA | Inline', 'Common', array(
|
||||
'accesskey' => 'Character',
|
||||
));
|
||||
|
||||
$this->addElement('optgroup', false, 'Required: option', 'Common', array(
|
||||
'disabled' => 'Bool#disabled',
|
||||
'label*' => 'Text',
|
||||
));
|
||||
|
||||
// Don't forget an injector for <isindex>. This one's a little complex
|
||||
// because it maps to multiple elements.
|
||||
|
||||
}
|
||||
}
|
||||
|
16
library/HTMLPurifier/HTMLModule/Name.php
Normal file
16
library/HTMLPurifier/HTMLModule/Name.php
Normal file
@@ -0,0 +1,16 @@
|
||||
<?php
|
||||
|
||||
class HTMLPurifier_HTMLModule_Name extends HTMLPurifier_HTMLModule
|
||||
{
|
||||
|
||||
public $name = 'Name';
|
||||
|
||||
public function setup($config) {
|
||||
$elements = array('a', 'applet', 'form', 'frame', 'iframe', 'img', 'map');
|
||||
foreach ($elements as $name) {
|
||||
$element = $this->addBlankElement($name);
|
||||
$element->attr['name'] = 'ID';
|
||||
}
|
||||
}
|
||||
|
||||
}
|
23
library/HTMLPurifier/HTMLModule/Tidy/Name.php
Normal file
23
library/HTMLPurifier/HTMLModule/Tidy/Name.php
Normal file
@@ -0,0 +1,23 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Name is deprecated, but allowed in strict doctypes, so onl
|
||||
*/
|
||||
class HTMLPurifier_HTMLModule_Tidy_Name extends HTMLPurifier_HTMLModule_Tidy
|
||||
{
|
||||
public $name = 'Tidy_Name';
|
||||
public $defaultLevel = 'heavy';
|
||||
public function makeFixes() {
|
||||
|
||||
$r = array();
|
||||
|
||||
// @name for img, a -----------------------------------------------
|
||||
// Technically, it's allowed even on strict, so we allow authors to use
|
||||
// it. However, it's deprecated in future versions of XHTML.
|
||||
$r['img@name'] =
|
||||
$r['a@name'] = new HTMLPurifier_AttrTransform_Name();
|
||||
|
||||
return $r;
|
||||
}
|
||||
}
|
||||
|
@@ -7,7 +7,15 @@ class HTMLPurifier_HTMLModule_Tidy_Proprietary extends HTMLPurifier_HTMLModule_T
|
||||
public $defaultLevel = 'light';
|
||||
|
||||
public function makeFixes() {
|
||||
return array();
|
||||
$r = array();
|
||||
$r['table@background'] = new HTMLPurifier_AttrTransform_Background();
|
||||
$r['td@background'] = new HTMLPurifier_AttrTransform_Background();
|
||||
$r['th@background'] = new HTMLPurifier_AttrTransform_Background();
|
||||
$r['tr@background'] = new HTMLPurifier_AttrTransform_Background();
|
||||
$r['thead@background'] = new HTMLPurifier_AttrTransform_Background();
|
||||
$r['tfoot@background'] = new HTMLPurifier_AttrTransform_Background();
|
||||
$r['tbody@background'] = new HTMLPurifier_AttrTransform_Background();
|
||||
return $r;
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -103,10 +103,6 @@ class HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4 extends HTMLPurifier_HTMLModule
|
||||
// @hspace for img ------------------------------------------------
|
||||
$r['img@hspace'] = new HTMLPurifier_AttrTransform_ImgSpace('hspace');
|
||||
|
||||
// @name for img, a -----------------------------------------------
|
||||
$r['img@name'] =
|
||||
$r['a@name'] = new HTMLPurifier_AttrTransform_Name();
|
||||
|
||||
// @noshade for hr ------------------------------------------------
|
||||
// this transformation is not precise but often good enough.
|
||||
// different browsers use different styles to designate noshade
|
||||
|
@@ -63,7 +63,11 @@ class HTMLPurifier_HTMLModuleManager
|
||||
$common = array(
|
||||
'CommonAttributes', 'Text', 'Hypertext', 'List',
|
||||
'Presentation', 'Edit', 'Bdo', 'Tables', 'Image',
|
||||
'StyleAttribute', 'Scripting', 'Object'
|
||||
'StyleAttribute',
|
||||
// Unsafe:
|
||||
'Scripting', 'Object', 'Forms',
|
||||
// Sorta legacy, but present in strict:
|
||||
'Name',
|
||||
);
|
||||
$transitional = array('Legacy', 'Target');
|
||||
$xml = array('XMLCommonAttributes');
|
||||
@@ -82,7 +86,7 @@ class HTMLPurifier_HTMLModuleManager
|
||||
$this->doctypes->register(
|
||||
'HTML 4.01 Strict', false,
|
||||
array_merge($common, $non_xml),
|
||||
array('Tidy_Strict', 'Tidy_Proprietary'),
|
||||
array('Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'),
|
||||
array(),
|
||||
'-//W3C//DTD HTML 4.01//EN',
|
||||
'http://www.w3.org/TR/html4/strict.dtd'
|
||||
@@ -91,7 +95,7 @@ class HTMLPurifier_HTMLModuleManager
|
||||
$this->doctypes->register(
|
||||
'XHTML 1.0 Transitional', true,
|
||||
array_merge($common, $transitional, $xml, $non_xml),
|
||||
array('Tidy_Transitional', 'Tidy_XHTML', 'Tidy_Proprietary'),
|
||||
array('Tidy_Transitional', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Name'),
|
||||
array(),
|
||||
'-//W3C//DTD XHTML 1.0 Transitional//EN',
|
||||
'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'
|
||||
@@ -100,7 +104,7 @@ class HTMLPurifier_HTMLModuleManager
|
||||
$this->doctypes->register(
|
||||
'XHTML 1.0 Strict', true,
|
||||
array_merge($common, $xml, $non_xml),
|
||||
array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Strict', 'Tidy_Proprietary'),
|
||||
array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'),
|
||||
array(),
|
||||
'-//W3C//DTD XHTML 1.0 Strict//EN',
|
||||
'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'
|
||||
@@ -109,7 +113,7 @@ class HTMLPurifier_HTMLModuleManager
|
||||
$this->doctypes->register(
|
||||
'XHTML 1.1', true,
|
||||
array_merge($common, $xml, array('Ruby')),
|
||||
array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict'), // Tidy_XHTML1_1
|
||||
array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict', 'Tidy_Name'), // Tidy_XHTML1_1
|
||||
array(),
|
||||
'-//W3C//DTD XHTML 1.1//EN',
|
||||
'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd'
|
||||
@@ -212,9 +216,6 @@ class HTMLPurifier_HTMLModuleManager
|
||||
}
|
||||
}
|
||||
|
||||
// merge in custom modules
|
||||
$modules = array_merge($modules, $this->userModules);
|
||||
|
||||
// add proprietary module (this gets special treatment because
|
||||
// it is completely removed from doctypes, etc.)
|
||||
if ($config->get('HTML', 'Proprietary')) {
|
||||
@@ -229,6 +230,9 @@ class HTMLPurifier_HTMLModuleManager
|
||||
$modules[] = 'SafeEmbed';
|
||||
}
|
||||
|
||||
// merge in custom modules
|
||||
$modules = array_merge($modules, $this->userModules);
|
||||
|
||||
foreach ($modules as $module) {
|
||||
$this->processModule($module);
|
||||
$this->modules[$module]->setup($config);
|
||||
@@ -378,7 +382,11 @@ class HTMLPurifier_HTMLModuleManager
|
||||
|
||||
$this->contentSets->generateChildDef($def, $module);
|
||||
}
|
||||
|
||||
|
||||
// This can occur if there is a blank definition, but no base to
|
||||
// mix it in with
|
||||
if (!$def) return false;
|
||||
|
||||
// add information on required attributes
|
||||
foreach ($def->attr as $attr_name => $attr_def) {
|
||||
if ($attr_def->required) {
|
||||
|
@@ -5,6 +5,11 @@
|
||||
* This enables "formatter-like" functionality such as auto-paragraphing,
|
||||
* smiley-ification and linkification to take place.
|
||||
*
|
||||
* A note on how handlers create changes; this is done by assigning a new
|
||||
* value to the $token reference. These values can take a variety of forms and
|
||||
* are best described HTMLPurifier_Strategy_MakeWellFormed->processToken()
|
||||
* documentation.
|
||||
*
|
||||
* @todo Allow injectors to request a re-run on their output. This
|
||||
* would help if an operation is recursive.
|
||||
*/
|
||||
@@ -16,13 +21,6 @@ abstract class HTMLPurifier_Injector
|
||||
*/
|
||||
public $name;
|
||||
|
||||
/**
|
||||
* Amount of tokens the injector needs to skip + 1. Because
|
||||
* the decrement is the first thing that happens, this needs to
|
||||
* be one greater than the "real" skip count.
|
||||
*/
|
||||
public $skip = 1;
|
||||
|
||||
/**
|
||||
* Instance of HTMLPurifier_HTMLDefinition
|
||||
*/
|
||||
@@ -54,6 +52,32 @@ abstract class HTMLPurifier_Injector
|
||||
*/
|
||||
public $needed = array();
|
||||
|
||||
/**
|
||||
* Index of inputTokens to rewind to.
|
||||
*/
|
||||
protected $rewind = false;
|
||||
|
||||
/**
|
||||
* Rewind to a spot to re-perform processing. This is useful if you
|
||||
* deleted a node, and now need to see if this change affected any
|
||||
* earlier nodes. Rewinding does not affect other injectors, and can
|
||||
* result in infinite loops if not used carefully.
|
||||
* @warning HTML Purifier will prevent you from fast-forwarding with this
|
||||
* function.
|
||||
*/
|
||||
public function rewind($index) {
|
||||
$this->rewind = $index;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves rewind, and then unsets it.
|
||||
*/
|
||||
public function getRewind() {
|
||||
$r = $this->rewind;
|
||||
$this->rewind = false;
|
||||
return $r;
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepares the injector by giving it the config and context objects:
|
||||
* this allows references to important variables to be made within
|
||||
@@ -116,6 +140,69 @@ abstract class HTMLPurifier_Injector
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Iterator function, which starts with the next token and continues until
|
||||
* you reach the end of the input tokens.
|
||||
* @warning Please prevent previous references from interfering with this
|
||||
* functions by setting $i = null beforehand!
|
||||
* @param &$i Current integer index variable for inputTokens
|
||||
* @param &$current Current token variable. Do NOT use $token, as that variable is also a reference
|
||||
*/
|
||||
protected function forward(&$i, &$current) {
|
||||
if ($i === null) $i = $this->inputIndex + 1;
|
||||
else $i++;
|
||||
if (!isset($this->inputTokens[$i])) return false;
|
||||
$current = $this->inputTokens[$i];
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Similar to _forward, but accepts a third parameter $nesting (which
|
||||
* should be initialized at 0) and stops when we hit the end tag
|
||||
* for the node $this->inputIndex starts in.
|
||||
*/
|
||||
protected function forwardUntilEndToken(&$i, &$current, &$nesting) {
|
||||
$result = $this->forward($i, $current);
|
||||
if (!$result) return false;
|
||||
if ($nesting === null) $nesting = 0;
|
||||
if ($current instanceof HTMLPurifier_Token_Start) $nesting++;
|
||||
elseif ($current instanceof HTMLPurifier_Token_End) {
|
||||
if ($nesting <= 0) return false;
|
||||
$nesting--;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Iterator function, starts with the previous token and continues until
|
||||
* you reach the beginning of input tokens.
|
||||
* @warning Please prevent previous references from interfering with this
|
||||
* functions by setting $i = null beforehand!
|
||||
* @param &$i Current integer index variable for inputTokens
|
||||
* @param &$current Current token variable. Do NOT use $token, as that variable is also a reference
|
||||
*/
|
||||
protected function backward(&$i, &$current) {
|
||||
if ($i === null) $i = $this->inputIndex - 1;
|
||||
else $i--;
|
||||
if ($i < 0) return false;
|
||||
$current = $this->inputTokens[$i];
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initializes the iterator at the current position. Use in a do {} while;
|
||||
* loop to force the _forward and _backward functions to start at the
|
||||
* current location.
|
||||
* @warning Please prevent previous references from interfering with this
|
||||
* functions by setting $i = null beforehand!
|
||||
* @param &$i Current integer index variable for inputTokens
|
||||
* @param &$current Current token variable. Do NOT use $token, as that variable is also a reference
|
||||
*/
|
||||
protected function current(&$i, &$current) {
|
||||
if ($i === null) $i = $this->inputIndex;
|
||||
$current = $this->inputTokens[$i];
|
||||
}
|
||||
|
||||
/**
|
||||
* Handler that is called when a text token is processed
|
||||
*/
|
||||
@@ -126,9 +213,17 @@ abstract class HTMLPurifier_Injector
|
||||
*/
|
||||
public function handleElement(&$token) {}
|
||||
|
||||
/**
|
||||
* Handler that is called when an end token is processed
|
||||
*/
|
||||
public function handleEnd(&$token) {
|
||||
$this->notifyEnd($token);
|
||||
}
|
||||
|
||||
/**
|
||||
* Notifier that is called when an end token is processed
|
||||
* @note This differs from handlers in that the token is read-only
|
||||
* @deprecated
|
||||
*/
|
||||
public function notifyEnd($token) {}
|
||||
|
||||
|
@@ -3,6 +3,8 @@
|
||||
/**
|
||||
* Injector that auto paragraphs text in the root node based on
|
||||
* double-spacing.
|
||||
* @todo Ensure all states are unit tested, including variations as well.
|
||||
* @todo Make a graph of the flow control for this Injector.
|
||||
*/
|
||||
class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
|
||||
{
|
||||
@@ -18,116 +20,177 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
|
||||
|
||||
public function handleText(&$token) {
|
||||
$text = $token->data;
|
||||
if (empty($this->currentNesting)) {
|
||||
if (!$this->allowsElement('p')) return;
|
||||
// case 1: we're in root node (and it allows paragraphs)
|
||||
$token = array($this->_pStart());
|
||||
$this->_splitText($text, $token);
|
||||
} elseif ($this->currentNesting[count($this->currentNesting)-1]->name == 'p') {
|
||||
// case 2: we're in a paragraph
|
||||
$token = array();
|
||||
$this->_splitText($text, $token);
|
||||
} elseif ($this->allowsElement('p')) {
|
||||
// case 3: we're in an element that allows paragraphs
|
||||
if (strpos($text, "\n\n") !== false) {
|
||||
// case 3.1: this text node has a double-newline
|
||||
$token = array($this->_pStart());
|
||||
$this->_splitText($text, $token);
|
||||
} else {
|
||||
$ok = false;
|
||||
// test if up-coming tokens are either block or have
|
||||
// a double newline in them
|
||||
$nesting = 0;
|
||||
for ($i = $this->inputIndex + 1; isset($this->inputTokens[$i]); $i++) {
|
||||
if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_Start){
|
||||
if (!$this->_isInline($this->inputTokens[$i])) {
|
||||
// we haven't found a double-newline, and
|
||||
// we've hit a block element, so don't paragraph
|
||||
$ok = false;
|
||||
break;
|
||||
}
|
||||
$nesting++;
|
||||
}
|
||||
if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_End) {
|
||||
if ($nesting <= 0) break;
|
||||
$nesting--;
|
||||
}
|
||||
if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_Text) {
|
||||
// found it!
|
||||
if (strpos($this->inputTokens[$i]->data, "\n\n") !== false) {
|
||||
$ok = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Does the current parent allow <p> tags?
|
||||
if ($this->allowsElement('p')) {
|
||||
if (empty($this->currentNesting) || strpos($text, "\n\n") !== false) {
|
||||
// Note that we have differing behavior when dealing with text
|
||||
// in the anonymous root node, or a node inside the document.
|
||||
// If the text as a double-newline, the treatment is the same;
|
||||
// if it doesn't, see the next if-block if you're in the document.
|
||||
|
||||
$i = $nesting = null;
|
||||
if (!$this->forwardUntilEndToken($i, $current, $nesting) && $token->is_whitespace) {
|
||||
// State 1.1: ... ^ (whitespace, then document end)
|
||||
// ----
|
||||
// This is a degenerate case
|
||||
} else {
|
||||
// State 1.2: PAR1
|
||||
// ----
|
||||
|
||||
// State 1.3: PAR1\n\nPAR2
|
||||
// ------------
|
||||
|
||||
// State 1.4: <div>PAR1\n\nPAR2 (see State 2)
|
||||
// ------------
|
||||
$token = array($this->_pStart());
|
||||
$this->_splitText($text, $token);
|
||||
}
|
||||
if ($ok) {
|
||||
// case 3.2: this text node is next to another node
|
||||
// that will start a paragraph
|
||||
} else {
|
||||
// State 2: <div>PAR1... (similar to 1.4)
|
||||
// ----
|
||||
|
||||
// We're in an element that allows paragraph tags, but we're not
|
||||
// sure if we're going to need them.
|
||||
if ($this->_pLookAhead()) {
|
||||
// State 2.1: <div>PAR1<b>PAR1\n\nPAR2
|
||||
// ----
|
||||
// Note: This will always be the first child, since any
|
||||
// previous inline element would have triggered this very
|
||||
// same routine, and found the double newline. One possible
|
||||
// exception would be a comment.
|
||||
$token = array($this->_pStart(), $token);
|
||||
} else {
|
||||
// State 2.2.1: <div>PAR1<div>
|
||||
// ----
|
||||
|
||||
// State 2.2.2: <div>PAR1<b>PAR1</b></div>
|
||||
// ----
|
||||
}
|
||||
}
|
||||
// Is the current parent a <p> tag?
|
||||
} elseif (
|
||||
!empty($this->currentNesting) &&
|
||||
$this->currentNesting[count($this->currentNesting)-1]->name == 'p'
|
||||
) {
|
||||
// State 3.1: ...<p>PAR1
|
||||
// ----
|
||||
|
||||
// State 3.2: ...<p>PAR1\n\nPAR2
|
||||
// ------------
|
||||
$token = array();
|
||||
$this->_splitText($text, $token);
|
||||
// Abort!
|
||||
} else {
|
||||
// State 4.1: ...<b>PAR1
|
||||
// ----
|
||||
|
||||
// State 4.2: ...<b>PAR1\n\nPAR2
|
||||
// ------------
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public function handleElement(&$token) {
|
||||
// check if we're inside a tag already
|
||||
if (!empty($this->currentNesting)) {
|
||||
if ($this->allowsElement('p')) {
|
||||
// special case: we're in an element that allows paragraphs
|
||||
|
||||
// this token is already paragraph, abort
|
||||
if ($token->name == 'p') return;
|
||||
|
||||
// this token is a block level, abort
|
||||
if (!$this->_isInline($token)) return;
|
||||
|
||||
// check if this token is adjacent to the parent token
|
||||
$prev = $this->inputTokens[$this->inputIndex - 1];
|
||||
if (!$prev instanceof HTMLPurifier_Token_Start) {
|
||||
// not adjacent, we can abort early
|
||||
// add lead paragraph tag if our token is inline
|
||||
// and the previous tag was an end paragraph
|
||||
if (
|
||||
$prev->name == 'p' && $prev instanceof HTMLPurifier_Token_End &&
|
||||
$this->_isInline($token)
|
||||
) {
|
||||
$token = array($this->_pStart(), $token);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// this token is the first child of the element that allows
|
||||
// paragraph. We have to peek ahead and see whether or not
|
||||
// there is anything inside that suggests that a paragraph
|
||||
// will be needed
|
||||
$ok = false;
|
||||
// maintain a mini-nesting counter, this lets us bail out
|
||||
// early if possible
|
||||
$j = 1; // current nesting, one is due to parent (we recalculate current token)
|
||||
for ($i = $this->inputIndex; isset($this->inputTokens[$i]); $i++) {
|
||||
if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_Start) $j++;
|
||||
if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_End) $j--;
|
||||
if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_Text) {
|
||||
if (strpos($this->inputTokens[$i]->data, "\n\n") !== false) {
|
||||
$ok = true;
|
||||
break;
|
||||
// We don't have to check if we're already in a <p> tag for block
|
||||
// tokens, because the tag would have been autoclosed by MakeWellFormed.
|
||||
if ($this->allowsElement('p')) {
|
||||
if (!empty($this->currentNesting)) {
|
||||
if ($this->_isInline($token)) {
|
||||
// State 1: <div>...<b>
|
||||
// ---
|
||||
|
||||
// Check if this token is adjacent to the parent token
|
||||
// (seek backwards until token isn't whitespace)
|
||||
$i = null;
|
||||
$this->backward($i, $prev);
|
||||
|
||||
if (!$prev instanceof HTMLPurifier_Token_Start) {
|
||||
// Token wasn't adjacent
|
||||
|
||||
if (
|
||||
$prev instanceof HTMLPurifier_Token_Text &&
|
||||
substr($prev->data, -2) === "\n\n"
|
||||
) {
|
||||
// State 1.1.4: <div><p>PAR1</p>\n\n<b>
|
||||
// ---
|
||||
|
||||
// Quite frankly, this should be handled by splitText
|
||||
$token = array($this->_pStart(), $token);
|
||||
} else {
|
||||
// State 1.1.1: <div><p>PAR1</p><b>
|
||||
// ---
|
||||
|
||||
// State 1.1.2: <div><br /><b>
|
||||
// ---
|
||||
|
||||
// State 1.1.3: <div>PAR<b>
|
||||
// ---
|
||||
}
|
||||
|
||||
} else {
|
||||
// State 1.2.1: <div><b>
|
||||
// ---
|
||||
|
||||
// Lookahead to see if <p> is needed.
|
||||
if ($this->_pLookAhead()) {
|
||||
// State 1.3.1: <div><b>PAR1\n\nPAR2
|
||||
// ---
|
||||
$token = array($this->_pStart(), $token);
|
||||
} else {
|
||||
// State 1.3.2: <div><b>PAR1</b></div>
|
||||
// ---
|
||||
|
||||
// State 1.3.3: <div><b>PAR1</b><div></div>\n\n</div>
|
||||
// ---
|
||||
}
|
||||
}
|
||||
if ($j <= 0) break;
|
||||
} else {
|
||||
// State 2.3: ...<div>
|
||||
// -----
|
||||
}
|
||||
if ($ok) {
|
||||
} else {
|
||||
if ($this->_isInline($token)) {
|
||||
// State 3.1: <b>
|
||||
// ---
|
||||
// This is where the {p} tag is inserted, not reflected in
|
||||
// inputTokens yet, however.
|
||||
$token = array($this->_pStart(), $token);
|
||||
} else {
|
||||
// State 3.2: <div>
|
||||
// -----
|
||||
}
|
||||
|
||||
$i = null;
|
||||
if ($this->backward($i, $prev)) {
|
||||
if (
|
||||
!$prev instanceof HTMLPurifier_Token_Text
|
||||
) {
|
||||
// State 3.1.1: ...</p>{p}<b>
|
||||
// ---
|
||||
|
||||
// State 3.2.1: ...</p><div>
|
||||
// -----
|
||||
|
||||
if (!is_array($token)) $token = array($token);
|
||||
array_unshift($token, new HTMLPurifier_Token_Text("\n\n"));
|
||||
} else {
|
||||
// State 3.1.2: ...</p>\n\n{p}<b>
|
||||
// ---
|
||||
|
||||
// State 3.2.2: ...</p>\n\n<div>
|
||||
// -----
|
||||
|
||||
// Note: PAR<ELEM> cannot occur because PAR would have been
|
||||
// wrapped in <p> tags.
|
||||
}
|
||||
}
|
||||
}
|
||||
return;
|
||||
} else {
|
||||
// State 2.2: <ul><li>
|
||||
// ----
|
||||
|
||||
// State 2.4: <p><b>
|
||||
// ---
|
||||
}
|
||||
|
||||
// check if the start tag counts as a "block" element
|
||||
if (!$this->_isInline($token)) return;
|
||||
|
||||
// append a paragraph tag before the token
|
||||
$token = array($this->_pStart(), $token);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -142,96 +205,80 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
|
||||
*/
|
||||
private function _splitText($data, &$result) {
|
||||
$raw_paragraphs = explode("\n\n", $data);
|
||||
|
||||
// remove empty paragraphs
|
||||
$paragraphs = array();
|
||||
$paragraphs = array(); // without empty paragraphs
|
||||
$needs_start = false;
|
||||
$needs_end = false;
|
||||
|
||||
$c = count($raw_paragraphs);
|
||||
if ($c == 1) {
|
||||
// there were no double-newlines, abort quickly
|
||||
// There were no double-newlines, abort quickly. In theory this
|
||||
// should never happen.
|
||||
$result[] = new HTMLPurifier_Token_Text($data);
|
||||
return;
|
||||
}
|
||||
|
||||
for ($i = 0; $i < $c; $i++) {
|
||||
$par = $raw_paragraphs[$i];
|
||||
if (trim($par) !== '') {
|
||||
$paragraphs[] = $par;
|
||||
continue;
|
||||
}
|
||||
if ($i == 0 && empty($result)) {
|
||||
// The empty result indicates that the AutoParagraph
|
||||
// injector did not add any start paragraph tokens.
|
||||
// The fact that the first paragraph is empty indicates
|
||||
// that there was a double-newline at the start of the
|
||||
// data.
|
||||
// Combined together, this means that we are in a paragraph,
|
||||
// and the newline means we should start a new one.
|
||||
$result[] = new HTMLPurifier_Token_End('p');
|
||||
// However, the start token should only be added if
|
||||
// there is more processing to be done (i.e. there are
|
||||
// real paragraphs in here). If there are none, the
|
||||
// next start paragraph tag will be handled by the
|
||||
// next run-around the injector
|
||||
$needs_start = true;
|
||||
} elseif ($i + 1 == $c) {
|
||||
// a double-paragraph at the end indicates that
|
||||
// there is an overriding need to start a new paragraph
|
||||
// for the next section. This has no effect until
|
||||
// we've processed all of the other paragraphs though
|
||||
$needs_end = true;
|
||||
} else {
|
||||
if ($i == 0) {
|
||||
// Double newline at the front
|
||||
if (empty($result)) {
|
||||
// The empty result indicates that the AutoParagraph
|
||||
// injector did not add any start paragraph tokens.
|
||||
// This means that we have been in a paragraph for
|
||||
// a while, and the newline means we should start a new one.
|
||||
$result[] = new HTMLPurifier_Token_End('p');
|
||||
$result[] = new HTMLPurifier_Token_Text("\n\n");
|
||||
// However, the start token should only be added if
|
||||
// there is more processing to be done (i.e. there are
|
||||
// real paragraphs in here). If there are none, the
|
||||
// next start paragraph tag will be handled by the
|
||||
// next call to the injector
|
||||
$needs_start = true;
|
||||
} else {
|
||||
// We just started a new paragraph!
|
||||
// Reinstate a double-newline for presentation's sake, since
|
||||
// it was in the source code.
|
||||
array_unshift($result, new HTMLPurifier_Token_Text("\n\n"));
|
||||
}
|
||||
} elseif ($i + 1 == $c) {
|
||||
// Double newline at the end
|
||||
// There should be a trailing </p> when we're finally done.
|
||||
$needs_end = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// check if there are no "real" paragraphs to be processed
|
||||
// Check if this was just a giant blob of whitespace. Move this earlier,
|
||||
// perhaps?
|
||||
if (empty($paragraphs)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// add a start tag if an end tag was added while processing
|
||||
// the raw paragraphs (that happens if there's a leading double
|
||||
// newline)
|
||||
if ($needs_start) $result[] = $this->_pStart();
|
||||
|
||||
// append the paragraphs onto the result
|
||||
foreach ($paragraphs as $par) {
|
||||
$result[] = new HTMLPurifier_Token_Text($par);
|
||||
$result[] = new HTMLPurifier_Token_End('p');
|
||||
// Add the start tag indicated by \n\n at the beginning of $data
|
||||
if ($needs_start) {
|
||||
$result[] = $this->_pStart();
|
||||
}
|
||||
|
||||
// remove trailing start token, if one is needed, it will
|
||||
// be handled the next time this injector is called
|
||||
array_pop($result);
|
||||
|
||||
// check the outside to determine whether or not the
|
||||
// end paragraph tag should be removed. It should be removed
|
||||
// unless the next non-whitespace token is a paragraph
|
||||
// or a block element.
|
||||
$remove_paragraph_end = true;
|
||||
|
||||
if (!$needs_end) {
|
||||
// Start of the checks one after the current token's index
|
||||
for ($i = $this->inputIndex + 1; isset($this->inputTokens[$i]); $i++) {
|
||||
if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_Start || $this->inputTokens[$i] instanceof HTMLPurifier_Token_Empty) {
|
||||
$remove_paragraph_end = $this->_isInline($this->inputTokens[$i]);
|
||||
}
|
||||
// check if we can abort early (whitespace means we carry-on!)
|
||||
if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_Text && !$this->inputTokens[$i]->is_whitespace) break;
|
||||
// end tags will automatically be handled by MakeWellFormed,
|
||||
// so we don't have to worry about them
|
||||
if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_End) break;
|
||||
}
|
||||
} else {
|
||||
$remove_paragraph_end = false;
|
||||
// Append the paragraphs onto the result
|
||||
foreach ($paragraphs as $par) {
|
||||
$result[] = new HTMLPurifier_Token_Text($par);
|
||||
$result[] = new HTMLPurifier_Token_End('p');
|
||||
$result[] = new HTMLPurifier_Token_Text("\n\n");
|
||||
$result[] = $this->_pStart();
|
||||
}
|
||||
|
||||
// check the outside to determine whether or not the
|
||||
// end paragraph tag should be removed
|
||||
if ($remove_paragraph_end) {
|
||||
array_pop($result);
|
||||
// Remove trailing start token; Injector will handle this later if
|
||||
// it was indeed needed. This prevents from needing to do a lookahead,
|
||||
// at the cost of a lookbehind later.
|
||||
array_pop($result);
|
||||
|
||||
// If there is no need for an end tag, remove all of it and let
|
||||
// MakeWellFormed close it later.
|
||||
if (!$needs_end) {
|
||||
array_pop($result); // removes \n\n
|
||||
array_pop($result); // removes </p>
|
||||
}
|
||||
|
||||
}
|
||||
@@ -244,5 +291,49 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
|
||||
return isset($this->htmlDefinition->info['p']->child->elements[$token->name]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Looks ahead in the token list and determines whether or not we need
|
||||
* to insert a <p> tag.
|
||||
*/
|
||||
private function _pLookAhead() {
|
||||
$this->current($i, $current);
|
||||
if ($current instanceof HTMLPurifier_Token_Start) $nesting = 1;
|
||||
else $nesting = 0;
|
||||
$ok = false;
|
||||
while ($this->forwardUntilEndToken($i, $current, $nesting)) {
|
||||
$result = $this->_checkNeedsP($current);
|
||||
if ($result !== null) {
|
||||
$ok = $result;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return $ok;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines if a particular token requires an earlier inline token
|
||||
* to get a paragraph. This should be used with _forwardUntilEndToken
|
||||
*/
|
||||
private function _checkNeedsP($current) {
|
||||
if ($current instanceof HTMLPurifier_Token_Start){
|
||||
if (!$this->_isInline($current)) {
|
||||
// <div>PAR1<div>
|
||||
// ----
|
||||
// Terminate early, since we hit a block element
|
||||
return false;
|
||||
}
|
||||
} elseif ($current instanceof HTMLPurifier_Token_Text) {
|
||||
if (strpos($current->data, "\n\n") !== false) {
|
||||
// <div>PAR1<b>PAR1\n\nPAR2
|
||||
// ----
|
||||
return true;
|
||||
} else {
|
||||
// <div>PAR1<b>PAR1...
|
||||
// ----
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
24
library/HTMLPurifier/Injector/DisplayLinkURI.php
Normal file
24
library/HTMLPurifier/Injector/DisplayLinkURI.php
Normal file
@@ -0,0 +1,24 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Injector that displays the URL of an anchor instead of linking to it, in addition to showing the text of the link.
|
||||
*/
|
||||
class HTMLPurifier_Injector_DisplayLinkURI extends HTMLPurifier_Injector
|
||||
{
|
||||
|
||||
public $name = 'DisplayLinkURI';
|
||||
public $needed = array('a');
|
||||
|
||||
public function handleElement(&$token) {
|
||||
}
|
||||
|
||||
public function handleEnd(&$token) {
|
||||
if (isset($token->start->attr['href'])){
|
||||
$url = $token->start->attr['href'];
|
||||
unset($token->start->attr['href']);
|
||||
$token = array($token, new HTMLPurifier_Token_Text(" ($url)"));
|
||||
} else {
|
||||
// nothing to display
|
||||
}
|
||||
}
|
||||
}
|
40
library/HTMLPurifier/Injector/RemoveEmpty.php
Normal file
40
library/HTMLPurifier/Injector/RemoveEmpty.php
Normal file
@@ -0,0 +1,40 @@
|
||||
<?php
|
||||
|
||||
class HTMLPurifier_Injector_RemoveEmpty extends HTMLPurifier_Injector
|
||||
{
|
||||
|
||||
private $context, $config;
|
||||
|
||||
public function prepare($config, $context) {
|
||||
parent::prepare($config, $context);
|
||||
$this->config = $config;
|
||||
$this->context = $context;
|
||||
$this->attrValidator = new HTMLPurifier_AttrValidator();
|
||||
}
|
||||
|
||||
public function handleElement(&$token) {
|
||||
if (!$token instanceof HTMLPurifier_Token_Start) return;
|
||||
$next = false;
|
||||
for ($i = $this->inputIndex + 1, $c = count($this->inputTokens); $i < $c; $i++) {
|
||||
$next = $this->inputTokens[$i];
|
||||
if ($next instanceof HTMLPurifier_Token_Text && $next->is_whitespace) continue;
|
||||
break;
|
||||
}
|
||||
if (!$next || ($next instanceof HTMLPurifier_Token_End && $next->name == $token->name)) {
|
||||
if ($token->name == 'colgroup') return;
|
||||
$this->attrValidator->validateToken($token, $this->config, $this->context);
|
||||
$token->armor['ValidateAttributes'] = true;
|
||||
if (isset($token->attr['id']) || isset($token->attr['name'])) return;
|
||||
$token = $i - $this->inputIndex + 1;
|
||||
for ($b = $this->inputIndex - 1; $b > 0; $b--) {
|
||||
$prev = $this->inputTokens[$b];
|
||||
if ($prev instanceof HTMLPurifier_Token_Text && $prev->is_whitespace) continue;
|
||||
break;
|
||||
}
|
||||
// This is safe because we removed the token that triggered this.
|
||||
$this->rewind($b - 1);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@@ -72,7 +72,10 @@ class HTMLPurifier_Injector_SafeObject extends HTMLPurifier_Injector
|
||||
}
|
||||
}
|
||||
|
||||
public function notifyEnd($token) {
|
||||
public function handleEnd(&$token) {
|
||||
// This is the WRONG way of handling the object and param stacks;
|
||||
// we should be inserting them directly on the relevant object tokens
|
||||
// so that the global stack handling handles it.
|
||||
if ($token->name == 'object') {
|
||||
array_pop($this->objectStack);
|
||||
array_pop($this->paramStack);
|
||||
|
@@ -15,7 +15,8 @@ $messages = array(
|
||||
'Item separator last' => ' and ', // non-Harvard style
|
||||
|
||||
'ErrorCollector: No errors' => 'No errors detected. However, because error reporting is still incomplete, there may have been errors that the error collector was not notified of; please inspect the output HTML carefully.',
|
||||
'ErrorCollector: At line' => ' at line $line',
|
||||
'ErrorCollector: At line' => ' at line $line',
|
||||
'ErrorCollector: Incidental errors' => 'Incidental errors',
|
||||
|
||||
'Lexer: Unclosed comment' => 'Unclosed comment',
|
||||
'Lexer: Unescaped lt' => 'Unescaped less-than sign (<) should be <',
|
||||
@@ -30,6 +31,8 @@ $messages = array(
|
||||
'Strategy_RemoveForeignElements: Comment removed' => 'Comment containing "$CurrentToken.Data" removed',
|
||||
'Strategy_RemoveForeignElements: Foreign meta element removed' => 'Unrecognized $CurrentToken.Serialized meta tag and all descendants removed',
|
||||
'Strategy_RemoveForeignElements: Token removed to end' => 'Tags and text starting from $1 element where removed to end',
|
||||
'Strategy_RemoveForeignElements: Trailing hyphen in comment removed' => 'Trailing hyphen(s) in comment removed',
|
||||
'Strategy_RemoveForeignElements: Hyphens in comment collapsed' => 'Double hyphens in comments are not allowed, and were collapsed into single hyphens',
|
||||
|
||||
'Strategy_MakeWellFormed: Unnecessary end tag removed' => 'Unnecessary $CurrentToken.Serialized tag removed',
|
||||
'Strategy_MakeWellFormed: Unnecessary end tag to text' => 'Unnecessary $CurrentToken.Serialized tag converted to text',
|
||||
@@ -50,8 +53,8 @@ $messages = array(
|
||||
);
|
||||
|
||||
$errorNames = array(
|
||||
E_ERROR => 'Error',
|
||||
E_ERROR => 'Error',
|
||||
E_WARNING => 'Warning',
|
||||
E_NOTICE => 'Notice'
|
||||
E_NOTICE => 'Notice'
|
||||
);
|
||||
|
||||
|
@@ -42,6 +42,12 @@
|
||||
class HTMLPurifier_Lexer
|
||||
{
|
||||
|
||||
/**
|
||||
* Whether or not this lexer implements line-number/column-number tracking.
|
||||
* If it does, set to true.
|
||||
*/
|
||||
public $tracksLineNumbers = false;
|
||||
|
||||
// -- STATIC ----------------------------------------------------------
|
||||
|
||||
/**
|
||||
@@ -70,46 +76,65 @@ class HTMLPurifier_Lexer
|
||||
$lexer = $config->get('Core', 'LexerImpl');
|
||||
}
|
||||
|
||||
$needs_tracking =
|
||||
$config->get('Core', 'MaintainLineNumbers') ||
|
||||
$config->get('Core', 'CollectErrors');
|
||||
|
||||
$inst = null;
|
||||
if (is_object($lexer)) {
|
||||
return $lexer;
|
||||
$inst = $lexer;
|
||||
} else {
|
||||
|
||||
if (is_null($lexer)) { do {
|
||||
// auto-detection algorithm
|
||||
|
||||
if ($needs_tracking) {
|
||||
$lexer = 'DirectLex';
|
||||
break;
|
||||
}
|
||||
|
||||
if (
|
||||
class_exists('DOMDocument') &&
|
||||
method_exists('DOMDocument', 'loadHTML') &&
|
||||
!extension_loaded('domxml')
|
||||
) {
|
||||
// check for DOM support, because while it's part of the
|
||||
// core, it can be disabled compile time. Also, the PECL
|
||||
// domxml extension overrides the default DOM, and is evil
|
||||
// and nasty and we shan't bother to support it
|
||||
$lexer = 'DOMLex';
|
||||
} else {
|
||||
$lexer = 'DirectLex';
|
||||
}
|
||||
|
||||
} while(0); } // do..while so we can break
|
||||
|
||||
// instantiate recognized string names
|
||||
switch ($lexer) {
|
||||
case 'DOMLex':
|
||||
$inst = new HTMLPurifier_Lexer_DOMLex();
|
||||
break;
|
||||
case 'DirectLex':
|
||||
$inst = new HTMLPurifier_Lexer_DirectLex();
|
||||
break;
|
||||
case 'PH5P':
|
||||
$inst = new HTMLPurifier_Lexer_PH5P();
|
||||
break;
|
||||
default:
|
||||
throw new HTMLPurifier_Exception("Cannot instantiate unrecognized Lexer type " . htmlspecialchars($lexer));
|
||||
}
|
||||
}
|
||||
|
||||
if (is_null($lexer)) { do {
|
||||
// auto-detection algorithm
|
||||
|
||||
// once PHP DOM implements native line numbers, or we
|
||||
// hack out something using XSLT, remove this stipulation
|
||||
$line_numbers = $config->get('Core', 'MaintainLineNumbers');
|
||||
if (
|
||||
$line_numbers === true ||
|
||||
($line_numbers === null && $config->get('Core', 'CollectErrors'))
|
||||
) {
|
||||
$lexer = 'DirectLex';
|
||||
break;
|
||||
}
|
||||
|
||||
if (class_exists('DOMDocument')) {
|
||||
// check for DOM support, because, surprisingly enough,
|
||||
// it's *not* part of the core!
|
||||
$lexer = 'DOMLex';
|
||||
} else {
|
||||
$lexer = 'DirectLex';
|
||||
}
|
||||
|
||||
} while(0); } // do..while so we can break
|
||||
if (!$inst) throw new HTMLPurifier_Exception('No lexer was instantiated');
|
||||
|
||||
// instantiate recognized string names
|
||||
switch ($lexer) {
|
||||
case 'DOMLex':
|
||||
return new HTMLPurifier_Lexer_DOMLex();
|
||||
case 'DirectLex':
|
||||
return new HTMLPurifier_Lexer_DirectLex();
|
||||
case 'PH5P':
|
||||
return new HTMLPurifier_Lexer_PH5P();
|
||||
default:
|
||||
trigger_error("Cannot instantiate unrecognized Lexer type " . htmlspecialchars($lexer), E_USER_ERROR);
|
||||
// once PHP DOM implements native line numbers, or we
|
||||
// hack out something using XSLT, remove this stipulation
|
||||
if ($needs_tracking && !$inst->tracksLineNumbers) {
|
||||
throw new HTMLPurifier_Exception('Cannot use lexer that does not support line numbers with Core.MaintainLineNumbers or Core.CollectErrors (use DirectLex instead)');
|
||||
}
|
||||
|
||||
return $inst;
|
||||
|
||||
}
|
||||
|
||||
// -- CONVENIENCE MEMBERS ---------------------------------------------
|
||||
@@ -226,11 +251,6 @@ class HTMLPurifier_Lexer
|
||||
*/
|
||||
public function normalize($html, $config, $context) {
|
||||
|
||||
// extract body from document if applicable
|
||||
if ($config->get('Core', 'ConvertDocumentToFragment')) {
|
||||
$html = $this->extractBody($html);
|
||||
}
|
||||
|
||||
// normalize newlines to \n
|
||||
$html = str_replace("\r\n", "\n", $html);
|
||||
$html = str_replace("\r", "\n", $html);
|
||||
@@ -243,6 +263,11 @@ class HTMLPurifier_Lexer
|
||||
// escape CDATA
|
||||
$html = $this->escapeCDATA($html);
|
||||
|
||||
// extract body from document if applicable
|
||||
if ($config->get('Core', 'ConvertDocumentToFragment')) {
|
||||
$html = $this->extractBody($html);
|
||||
}
|
||||
|
||||
// expand entities that aren't the big five
|
||||
$html = $this->_entity_parser->substituteNonSpecialEntities($html);
|
||||
|
||||
|
@@ -45,7 +45,10 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
||||
$char = '[^a-z!\/]';
|
||||
$comment = "/<!--(.*?)(-->|\z)/is";
|
||||
$html = preg_replace_callback($comment, array($this, 'callbackArmorCommentEntities'), $html);
|
||||
$html = preg_replace("/<($char)/i", '<\\1', $html);
|
||||
do {
|
||||
$old = $html;
|
||||
$html = preg_replace("/<($char)/i", '<\\1', $html);
|
||||
} while ($html !== $old);
|
||||
$html = preg_replace_callback($comment, array($this, 'callbackUndoCommentSubst'), $html); // fix comments
|
||||
}
|
||||
|
||||
|
@@ -13,6 +13,8 @@
|
||||
class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||
{
|
||||
|
||||
public $tracksLineNumbers = true;
|
||||
|
||||
/**
|
||||
* Whitespace characters for str(c)spn.
|
||||
*/
|
||||
@@ -42,6 +44,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||
$inside_tag = false; // whether or not we're parsing the inside of a tag
|
||||
$array = array(); // result array
|
||||
|
||||
// This is also treated to mean maintain *column* numbers too
|
||||
$maintain_line_numbers = $config->get('Core', 'MaintainLineNumbers');
|
||||
|
||||
if ($maintain_line_numbers === null) {
|
||||
@@ -50,9 +53,17 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||
$maintain_line_numbers = $config->get('Core', 'CollectErrors');
|
||||
}
|
||||
|
||||
if ($maintain_line_numbers) $current_line = 1;
|
||||
else $current_line = false;
|
||||
if ($maintain_line_numbers) {
|
||||
$current_line = 1;
|
||||
$current_col = 0;
|
||||
$length = strlen($html);
|
||||
} else {
|
||||
$current_line = false;
|
||||
$current_col = false;
|
||||
$length = false;
|
||||
}
|
||||
$context->register('CurrentLine', $current_line);
|
||||
$context->register('CurrentCol', $current_col);
|
||||
$nl = "\n";
|
||||
// how often to manually recalculate. This will ALWAYS be right,
|
||||
// but it's pretty wasteful. Set to 0 to turn off
|
||||
@@ -68,14 +79,31 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||
|
||||
while(++$loops) {
|
||||
|
||||
// recalculate lines
|
||||
if (
|
||||
$maintain_line_numbers && // line number tracking is on
|
||||
$synchronize_interval && // synchronization is on
|
||||
$cursor > 0 && // cursor is further than zero
|
||||
$loops % $synchronize_interval === 0 // time to synchronize!
|
||||
) {
|
||||
$current_line = 1 + $this->substrCount($html, $nl, 0, $cursor);
|
||||
// $cursor is either at the start of a token, or inside of
|
||||
// a tag (i.e. there was a < immediately before it), as indicated
|
||||
// by $inside_tag
|
||||
|
||||
if ($maintain_line_numbers) {
|
||||
|
||||
// $rcursor, however, is always at the start of a token.
|
||||
$rcursor = $cursor - (int) $inside_tag;
|
||||
|
||||
// Column number is cheap, so we calculate it every round.
|
||||
// We're interested at the *end* of the newline string, so
|
||||
// we need to add strlen($nl) == 1 to $nl_pos before subtracting it
|
||||
// from our "rcursor" position.
|
||||
$nl_pos = strrpos($html, $nl, $rcursor - $length);
|
||||
$current_col = $rcursor - (is_bool($nl_pos) ? 0 : $nl_pos + 1);
|
||||
|
||||
// recalculate lines
|
||||
if (
|
||||
$synchronize_interval && // synchronization is on
|
||||
$cursor > 0 && // cursor is further than zero
|
||||
$loops % $synchronize_interval === 0 // time to synchronize!
|
||||
) {
|
||||
$current_line = 1 + $this->substrCount($html, $nl, 0, $cursor);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
$position_next_lt = strpos($html, '<', $cursor);
|
||||
@@ -99,7 +127,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||
)
|
||||
);
|
||||
if ($maintain_line_numbers) {
|
||||
$token->line = $current_line;
|
||||
$token->rawPosition($current_line, $current_col);
|
||||
$current_line += $this->substrCount($html, $nl, $cursor, $position_next_lt - $cursor);
|
||||
}
|
||||
$array[] = $token;
|
||||
@@ -119,7 +147,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||
)
|
||||
)
|
||||
);
|
||||
if ($maintain_line_numbers) $token->line = $current_line;
|
||||
if ($maintain_line_numbers) $token->rawPosition($current_line, $current_col);
|
||||
$array[] = $token;
|
||||
break;
|
||||
} elseif ($inside_tag && $position_next_gt !== false) {
|
||||
@@ -167,7 +195,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||
)
|
||||
);
|
||||
if ($maintain_line_numbers) {
|
||||
$token->line = $current_line;
|
||||
$token->rawPosition($current_line, $current_col);
|
||||
$current_line += $this->substrCount($html, $nl, $cursor, $strlen_segment);
|
||||
}
|
||||
$array[] = $token;
|
||||
@@ -182,7 +210,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||
$type = substr($segment, 1);
|
||||
$token = new HTMLPurifier_Token_End($type);
|
||||
if ($maintain_line_numbers) {
|
||||
$token->line = $current_line;
|
||||
$token->rawPosition($current_line, $current_col);
|
||||
$current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
|
||||
}
|
||||
$array[] = $token;
|
||||
@@ -197,20 +225,12 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||
if (!ctype_alpha($segment[0])) {
|
||||
// XML: $segment[0] !== '_' && $segment[0] !== ':'
|
||||
if ($e) $e->send(E_NOTICE, 'Lexer: Unescaped lt');
|
||||
$token = new
|
||||
HTMLPurifier_Token_Text(
|
||||
'<' .
|
||||
$this->parseData(
|
||||
$segment
|
||||
) .
|
||||
'>'
|
||||
);
|
||||
$token = new HTMLPurifier_Token_Text('<');
|
||||
if ($maintain_line_numbers) {
|
||||
$token->line = $current_line;
|
||||
$token->rawPosition($current_line, $current_col);
|
||||
$current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
|
||||
}
|
||||
$array[] = $token;
|
||||
$cursor = $position_next_gt + 1;
|
||||
$inside_tag = false;
|
||||
continue;
|
||||
}
|
||||
@@ -235,7 +255,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||
$token = new HTMLPurifier_Token_Start($segment);
|
||||
}
|
||||
if ($maintain_line_numbers) {
|
||||
$token->line = $current_line;
|
||||
$token->rawPosition($current_line, $current_col);
|
||||
$current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
|
||||
}
|
||||
$array[] = $token;
|
||||
@@ -267,7 +287,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||
$token = new HTMLPurifier_Token_Start($type, $attr);
|
||||
}
|
||||
if ($maintain_line_numbers) {
|
||||
$token->line = $current_line;
|
||||
$token->rawPosition($current_line, $current_col);
|
||||
$current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
|
||||
}
|
||||
$array[] = $token;
|
||||
@@ -284,7 +304,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||
substr($html, $cursor)
|
||||
)
|
||||
);
|
||||
if ($maintain_line_numbers) $token->line = $current_line;
|
||||
if ($maintain_line_numbers) $token->rawPosition($current_line, $current_col);
|
||||
// no cursor scroll? Hmm...
|
||||
$array[] = $token;
|
||||
break;
|
||||
@@ -293,6 +313,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||
}
|
||||
|
||||
$context->destroy('CurrentLine');
|
||||
$context->destroy('CurrentCol');
|
||||
return $array;
|
||||
}
|
||||
|
||||
|
@@ -7,31 +7,61 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
||||
{
|
||||
|
||||
/**
|
||||
* Locally shared variable references
|
||||
* Array stream of tokens being processed.
|
||||
*/
|
||||
protected $inputTokens, $inputIndex, $outputTokens, $currentNesting,
|
||||
$currentInjector, $injectors;
|
||||
protected $tokens;
|
||||
|
||||
/**
|
||||
* Current index in $tokens.
|
||||
*/
|
||||
protected $t;
|
||||
|
||||
/**
|
||||
* Current nesting of elements.
|
||||
*/
|
||||
protected $stack;
|
||||
|
||||
/**
|
||||
* Injectors active in this stream processing.
|
||||
*/
|
||||
protected $injectors;
|
||||
|
||||
/**
|
||||
* Current instance of HTMLPurifier_Config.
|
||||
*/
|
||||
protected $config;
|
||||
|
||||
/**
|
||||
* Current instance of HTMLPurifier_Context.
|
||||
*/
|
||||
protected $context;
|
||||
|
||||
public function execute($tokens, $config, $context) {
|
||||
|
||||
$definition = $config->getHTMLDefinition();
|
||||
|
||||
// local variables
|
||||
$result = array();
|
||||
$generator = new HTMLPurifier_Generator($config, $context);
|
||||
$escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
|
||||
$e = $context->get('ErrorCollector', true);
|
||||
$t = false; // token index
|
||||
$i = false; // injector index
|
||||
$token = false; // the current token
|
||||
$reprocess = false; // whether or not to reprocess the same token
|
||||
$stack = array();
|
||||
|
||||
// member variables
|
||||
$this->currentNesting = array();
|
||||
$this->inputIndex = false;
|
||||
$this->inputTokens =& $tokens;
|
||||
$this->outputTokens =& $result;
|
||||
$this->stack =& $stack;
|
||||
$this->t =& $t;
|
||||
$this->tokens =& $tokens;
|
||||
$this->config = $config;
|
||||
$this->context = $context;
|
||||
|
||||
// context variables
|
||||
$context->register('CurrentNesting', $this->currentNesting);
|
||||
$context->register('InputIndex', $this->inputIndex);
|
||||
$context->register('CurrentNesting', $stack);
|
||||
$context->register('InputIndex', $t);
|
||||
$context->register('InputTokens', $tokens);
|
||||
$context->register('CurrentToken', $token);
|
||||
|
||||
// -- begin INJECTOR --
|
||||
|
||||
@@ -58,73 +88,119 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
||||
$this->injectors[] = $injector;
|
||||
}
|
||||
|
||||
// array index of the injector that resulted in an array
|
||||
// substitution. This enables processTokens() to know which
|
||||
// injectors are affected by the added tokens and which are
|
||||
// not (namely, the ones after the current injector are not
|
||||
// affected)
|
||||
$this->currentInjector = false;
|
||||
|
||||
// give the injectors references to the definition and context
|
||||
// variables for performance reasons
|
||||
foreach ($this->injectors as $i => $injector) {
|
||||
foreach ($this->injectors as $ix => $injector) {
|
||||
$error = $injector->prepare($config, $context);
|
||||
if (!$error) continue;
|
||||
array_splice($this->injectors, $i, 1); // rm the injector
|
||||
array_splice($this->injectors, $ix, 1); // rm the injector
|
||||
trigger_error("Cannot enable {$injector->name} injector because $error is not allowed", E_USER_WARNING);
|
||||
}
|
||||
|
||||
// warning: most foreach loops follow the convention $i => $injector.
|
||||
// Don't define these as loop-wide variables, please!
|
||||
|
||||
// -- end INJECTOR --
|
||||
|
||||
$token = false;
|
||||
$context->register('CurrentToken', $token);
|
||||
// a note on punting:
|
||||
// In order to reduce code duplication, whenever some code needs
|
||||
// to make HTML changes in order to make things "correct", the
|
||||
// new HTML gets sent through the purifier, regardless of its
|
||||
// status. This means that if we add a start token, because it
|
||||
// was totally necessary, we don't have to update nesting; we just
|
||||
// punt ($reprocess = true; continue;) and it does that for us.
|
||||
|
||||
// isset is in loop because $tokens size changes during loop exec
|
||||
for ($this->inputIndex = 0; isset($tokens[$this->inputIndex]); $this->inputIndex++) {
|
||||
for (
|
||||
$t = 0;
|
||||
$t == 0 || isset($tokens[$t - 1]);
|
||||
// only increment if we don't need to reprocess
|
||||
$reprocess ? $reprocess = false : $t++
|
||||
) {
|
||||
|
||||
// if all goes well, this token will be passed through unharmed
|
||||
$token = $tokens[$this->inputIndex];
|
||||
|
||||
//printTokens($tokens, $this->inputIndex);
|
||||
|
||||
foreach ($this->injectors as $injector) {
|
||||
if ($injector->skip > 0) $injector->skip--;
|
||||
// check for a rewind
|
||||
if (is_int($i) && $i >= 0) {
|
||||
// possibility: disable rewinding if the current token has a
|
||||
// rewind set on it already. This would offer protection from
|
||||
// infinite loop, but might hinder some advanced rewinding.
|
||||
$rewind_to = $this->injectors[$i]->getRewind();
|
||||
if (is_int($rewind_to) && $rewind_to < $t) {
|
||||
if ($rewind_to < 0) $rewind_to = 0;
|
||||
while ($t > $rewind_to) {
|
||||
$t--;
|
||||
$prev = $tokens[$t];
|
||||
// indicate that other injectors should not process this token,
|
||||
// but we need to reprocess it
|
||||
unset($prev->skip[$i]);
|
||||
$prev->rewind = $i;
|
||||
if ($prev instanceof HTMLPurifier_Token_Start) array_pop($this->stack);
|
||||
elseif ($prev instanceof HTMLPurifier_Token_End) $this->stack[] = $prev->start;
|
||||
}
|
||||
}
|
||||
$i = false;
|
||||
}
|
||||
|
||||
// quick-check: if it's not a tag, no need to process
|
||||
if (empty( $token->is_tag )) {
|
||||
if ($token instanceof HTMLPurifier_Token_Text) {
|
||||
// injector handler code; duplicated for performance reasons
|
||||
foreach ($this->injectors as $i => $injector) {
|
||||
if (!$injector->skip) $injector->handleText($token);
|
||||
if (is_array($token)) {
|
||||
$this->currentInjector = $i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// handle case of document end
|
||||
if (!isset($tokens[$t])) {
|
||||
// kill processing if stack is empty
|
||||
if (empty($this->stack)) break;
|
||||
|
||||
// peek
|
||||
$top_nesting = array_pop($this->stack);
|
||||
$this->stack[] = $top_nesting;
|
||||
|
||||
// send error
|
||||
if ($e && !isset($top_nesting->armor['MakeWellFormed_TagClosedError'])) {
|
||||
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $top_nesting);
|
||||
}
|
||||
$this->processToken($token, $config, $context);
|
||||
|
||||
// append, don't splice, since this is the end
|
||||
$tokens[] = new HTMLPurifier_Token_End($top_nesting->name);
|
||||
|
||||
// punt!
|
||||
$reprocess = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
$info = $definition->info[$token->name]->child;
|
||||
// if all goes well, this token will be passed through unharmed
|
||||
$token = $tokens[$t];
|
||||
|
||||
//echo '<hr>';
|
||||
//printTokens($tokens, $t);
|
||||
//var_dump($this->stack);
|
||||
|
||||
// quick-check: if it's not a tag, no need to process
|
||||
if (empty($token->is_tag)) {
|
||||
if ($token instanceof HTMLPurifier_Token_Text) {
|
||||
foreach ($this->injectors as $i => $injector) {
|
||||
if (isset($token->skip[$i])) continue;
|
||||
if ($token->rewind !== null && $token->rewind !== $i) continue;
|
||||
$injector->handleText($token);
|
||||
$this->processToken($token, $i);
|
||||
$reprocess = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// another possibility is a comment
|
||||
continue;
|
||||
}
|
||||
|
||||
if (isset($definition->info[$token->name])) {
|
||||
$type = $definition->info[$token->name]->child->type;
|
||||
} else {
|
||||
$type = false; // Type is unknown, treat accordingly
|
||||
}
|
||||
|
||||
// quick tag checks: anything that's *not* an end tag
|
||||
$ok = false;
|
||||
if ($info->type === 'empty' && $token instanceof HTMLPurifier_Token_Start) {
|
||||
// test if it claims to be a start tag but is empty
|
||||
if ($type === 'empty' && $token instanceof HTMLPurifier_Token_Start) {
|
||||
// claims to be a start tag but is empty
|
||||
$token = new HTMLPurifier_Token_Empty($token->name, $token->attr);
|
||||
$ok = true;
|
||||
} elseif ($info->type !== 'empty' && $token instanceof HTMLPurifier_Token_Empty) {
|
||||
} elseif ($type && $type !== 'empty' && $token instanceof HTMLPurifier_Token_Empty) {
|
||||
// claims to be empty but really is a start tag
|
||||
$token = array(
|
||||
new HTMLPurifier_Token_Start($token->name, $token->attr),
|
||||
new HTMLPurifier_Token_End($token->name)
|
||||
);
|
||||
$ok = true;
|
||||
$this->swap(new HTMLPurifier_Token_End($token->name));
|
||||
$this->insertBefore(new HTMLPurifier_Token_Start($token->name, $token->attr));
|
||||
// punt (since we had to modify the input stream in a non-trivial way)
|
||||
$reprocess = true;
|
||||
continue;
|
||||
} elseif ($token instanceof HTMLPurifier_Token_Empty) {
|
||||
// real empty token
|
||||
$ok = true;
|
||||
@@ -132,62 +208,88 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
||||
// start tag
|
||||
|
||||
// ...unless they also have to close their parent
|
||||
if (!empty($this->currentNesting)) {
|
||||
if (!empty($this->stack)) {
|
||||
|
||||
$parent = array_pop($this->currentNesting);
|
||||
$parent_info = $definition->info[$parent->name];
|
||||
$parent = array_pop($this->stack);
|
||||
$this->stack[] = $parent;
|
||||
|
||||
// this can be replaced with a more general algorithm:
|
||||
// if the token is not allowed by the parent, auto-close
|
||||
// the parent
|
||||
if (!isset($parent_info->child->elements[$token->name])) {
|
||||
if (isset($definition->info[$parent->name])) {
|
||||
$elements = $definition->info[$parent->name]->child->getNonAutoCloseElements($config);
|
||||
$autoclose = !isset($elements[$token->name]);
|
||||
} else {
|
||||
$autoclose = false;
|
||||
}
|
||||
|
||||
if ($autoclose) {
|
||||
if ($e) $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
|
||||
// close the parent, then re-loop to reprocess token
|
||||
$result[] = new HTMLPurifier_Token_End($parent->name);
|
||||
$this->inputIndex--;
|
||||
// insert parent end tag before this tag
|
||||
$new_token = new HTMLPurifier_Token_End($parent->name);
|
||||
$new_token->start = $parent;
|
||||
$this->insertBefore($new_token);
|
||||
$reprocess = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
$this->currentNesting[] = $parent; // undo the pop
|
||||
}
|
||||
$ok = true;
|
||||
}
|
||||
|
||||
// injector handler code; duplicated for performance reasons
|
||||
if ($ok) {
|
||||
foreach ($this->injectors as $i => $injector) {
|
||||
if (!$injector->skip) $injector->handleElement($token);
|
||||
if (is_array($token)) {
|
||||
$this->currentInjector = $i;
|
||||
break;
|
||||
if (isset($token->skip[$i])) continue;
|
||||
if ($token->rewind !== null && $token->rewind !== $i) continue;
|
||||
$injector->handleElement($token);
|
||||
$this->processToken($token, $i);
|
||||
$reprocess = true;
|
||||
break;
|
||||
}
|
||||
if (!$reprocess) {
|
||||
// ah, nothing interesting happened; do normal processing
|
||||
$this->swap($token);
|
||||
if ($token instanceof HTMLPurifier_Token_Start) {
|
||||
$this->stack[] = $token;
|
||||
} elseif ($token instanceof HTMLPurifier_Token_End) {
|
||||
throw new HTMLPurifier_Exception('Improper handling of end tag in start code; possible error in MakeWellFormed');
|
||||
}
|
||||
}
|
||||
$this->processToken($token, $config, $context);
|
||||
continue;
|
||||
}
|
||||
|
||||
// sanity check: we should be dealing with a closing tag
|
||||
if (!$token instanceof HTMLPurifier_Token_End) continue;
|
||||
if (!$token instanceof HTMLPurifier_Token_End) {
|
||||
throw new HTMLPurifier_Exception('Unaccounted for tag token in input stream, bug in HTML Purifier');
|
||||
}
|
||||
|
||||
// make sure that we have something open
|
||||
if (empty($this->currentNesting)) {
|
||||
if (empty($this->stack)) {
|
||||
if ($escape_invalid_tags) {
|
||||
if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag to text');
|
||||
$result[] = new HTMLPurifier_Token_Text(
|
||||
$this->swap(new HTMLPurifier_Token_Text(
|
||||
$generator->generateFromToken($token)
|
||||
);
|
||||
} elseif ($e) {
|
||||
$e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag removed');
|
||||
));
|
||||
} else {
|
||||
$this->remove();
|
||||
if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag removed');
|
||||
}
|
||||
$reprocess = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
// first, check for the simplest case: everything closes neatly
|
||||
$current_parent = array_pop($this->currentNesting);
|
||||
// first, check for the simplest case: everything closes neatly.
|
||||
// Eventually, everything passes through here; if there are problems
|
||||
// we modify the input stream accordingly and then punt, so that
|
||||
// the tokens get processed again.
|
||||
$current_parent = array_pop($this->stack);
|
||||
if ($current_parent->name == $token->name) {
|
||||
$result[] = $token;
|
||||
$token->start = $current_parent;
|
||||
foreach ($this->injectors as $i => $injector) {
|
||||
$injector->notifyEnd($token);
|
||||
if (isset($token->skip[$i])) continue;
|
||||
if ($token->rewind !== null && $token->rewind !== $i) continue;
|
||||
$injector->handleEnd($token);
|
||||
$this->processToken($token, $i);
|
||||
$this->stack[] = $current_parent;
|
||||
$reprocess = true;
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
@@ -195,47 +297,56 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
||||
// okay, so we're trying to close the wrong tag
|
||||
|
||||
// undo the pop previous pop
|
||||
$this->currentNesting[] = $current_parent;
|
||||
$this->stack[] = $current_parent;
|
||||
|
||||
// scroll back the entire nest, trying to find our tag.
|
||||
// (feature could be to specify how far you'd like to go)
|
||||
$size = count($this->currentNesting);
|
||||
$size = count($this->stack);
|
||||
// -2 because -1 is the last element, but we already checked that
|
||||
$skipped_tags = false;
|
||||
for ($i = $size - 2; $i >= 0; $i--) {
|
||||
if ($this->currentNesting[$i]->name == $token->name) {
|
||||
// current nesting is modified
|
||||
$skipped_tags = array_splice($this->currentNesting, $i);
|
||||
for ($j = $size - 2; $j >= 0; $j--) {
|
||||
if ($this->stack[$j]->name == $token->name) {
|
||||
$skipped_tags = array_slice($this->stack, $j);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// we still didn't find the tag, so remove
|
||||
// we didn't find the tag, so remove
|
||||
if ($skipped_tags === false) {
|
||||
if ($escape_invalid_tags) {
|
||||
$result[] = new HTMLPurifier_Token_Text(
|
||||
$this->swap(new HTMLPurifier_Token_Text(
|
||||
$generator->generateFromToken($token)
|
||||
);
|
||||
));
|
||||
if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag to text');
|
||||
} elseif ($e) {
|
||||
$e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag removed');
|
||||
} else {
|
||||
$this->remove();
|
||||
if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag removed');
|
||||
}
|
||||
$reprocess = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
// okay, we found it, close all the skipped tags
|
||||
// note that skipped tags contains the element we need closed
|
||||
for ($i = count($skipped_tags) - 1; $i >= 0; $i--) {
|
||||
// please don't redefine $i!
|
||||
if ($i && $e && !isset($skipped_tags[$i]->armor['MakeWellFormed_TagClosedError'])) {
|
||||
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$i]);
|
||||
}
|
||||
$result[] = $new_token = new HTMLPurifier_Token_End($skipped_tags[$i]->name);
|
||||
foreach ($this->injectors as $injector) {
|
||||
$injector->notifyEnd($new_token);
|
||||
// do errors, in REVERSE $j order: a,b,c with </a></b></c>
|
||||
$c = count($skipped_tags);
|
||||
if ($e) {
|
||||
for ($j = $c - 1; $j > 0; $j--) {
|
||||
// notice we exclude $j == 0, i.e. the current ending tag, from
|
||||
// the errors...
|
||||
if (!isset($skipped_tags[$j]->armor['MakeWellFormed_TagClosedError'])) {
|
||||
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// insert tags, in FORWARD $j order: c,b,a with </a></b></c>
|
||||
for ($j = 1; $j < $c; $j++) {
|
||||
// ...as well as from the insertions
|
||||
$new_token = new HTMLPurifier_Token_End($skipped_tags[$j]->name);
|
||||
$new_token->start = $skipped_tags[$j];
|
||||
$this->insertBefore($new_token);
|
||||
}
|
||||
$reprocess = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
$context->destroy('CurrentNesting');
|
||||
@@ -243,59 +354,77 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
||||
$context->destroy('InputIndex');
|
||||
$context->destroy('CurrentToken');
|
||||
|
||||
// we're at the end now, fix all still unclosed tags (this is
|
||||
// duplicated from the end of the loop with some slight modifications)
|
||||
// not using $skipped_tags since it would invariably be all of them
|
||||
if (!empty($this->currentNesting)) {
|
||||
for ($i = count($this->currentNesting) - 1; $i >= 0; $i--) {
|
||||
// please don't redefine $i!
|
||||
if ($e && !isset($this->currentNesting[$i]->armor['MakeWellFormed_TagClosedError'])) {
|
||||
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $this->currentNesting[$i]);
|
||||
}
|
||||
$result[] = $new_token = new HTMLPurifier_Token_End($this->currentNesting[$i]->name);
|
||||
foreach ($this->injectors as $injector) {
|
||||
$injector->notifyEnd($new_token);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unset($this->outputTokens, $this->injectors, $this->currentInjector,
|
||||
$this->currentNesting, $this->inputTokens, $this->inputIndex);
|
||||
|
||||
return $result;
|
||||
unset($this->injectors, $this->stack, $this->tokens, $this->t);
|
||||
return $tokens;
|
||||
}
|
||||
|
||||
function processToken($token, $config, $context) {
|
||||
if (is_array($token)) {
|
||||
// the original token was overloaded by an injector, time
|
||||
// to some fancy acrobatics
|
||||
|
||||
// $this->inputIndex is decremented so that the entire set gets
|
||||
// re-processed
|
||||
array_splice($this->inputTokens, $this->inputIndex--, 1, $token);
|
||||
|
||||
// adjust the injector skips based on the array substitution
|
||||
if ($this->injectors) {
|
||||
$offset = count($token);
|
||||
for ($i = 0; $i <= $this->currentInjector; $i++) {
|
||||
// because of the skip back, we need to add one more
|
||||
// for uninitialized injectors. I'm not exactly
|
||||
// sure why this is the case, but I think it has to
|
||||
// do with the fact that we're decrementing skips
|
||||
// before re-checking text
|
||||
if (!$this->injectors[$i]->skip) $this->injectors[$i]->skip++;
|
||||
$this->injectors[$i]->skip += $offset;
|
||||
}
|
||||
}
|
||||
} elseif ($token) {
|
||||
// regular case
|
||||
$this->outputTokens[] = $token;
|
||||
if ($token instanceof HTMLPurifier_Token_Start) {
|
||||
$this->currentNesting[] = $token;
|
||||
} elseif ($token instanceof HTMLPurifier_Token_End) {
|
||||
array_pop($this->currentNesting); // not actually used
|
||||
/**
|
||||
* Processes arbitrary token values for complicated substitution patterns.
|
||||
* In general:
|
||||
*
|
||||
* If $token is an array, it is a list of tokens to substitute for the
|
||||
* current token. These tokens then get individually processed. If there
|
||||
* is a leading integer in the list, that integer determines how many
|
||||
* tokens from the stream should be removed.
|
||||
*
|
||||
* If $token is a regular token, it is swapped with the current token.
|
||||
*
|
||||
* If $token is false, the current token is deleted.
|
||||
*
|
||||
* If $token is an integer, that number of tokens (with the first token
|
||||
* being the current one) will be deleted.
|
||||
*
|
||||
* @param $token Token substitution value
|
||||
* @param $injector Injector that performed the substitution; default is if
|
||||
* this is not an injector related operation.
|
||||
*/
|
||||
protected function processToken($token, $injector = -1) {
|
||||
|
||||
// normalize forms of token
|
||||
if (is_object($token)) $token = array(1, $token);
|
||||
if (is_int($token)) $token = array($token);
|
||||
if ($token === false) $token = array(1);
|
||||
if (!is_array($token)) throw new HTMLPurifier_Exception('Invalid token type from injector');
|
||||
if (!is_int($token[0])) array_unshift($token, 1);
|
||||
if ($token[0] === 0) throw new HTMLPurifier_Exception('Deleting zero tokens is not valid');
|
||||
|
||||
// $token is now an array with the following form:
|
||||
// array(number nodes to delete, new node 1, new node 2, ...)
|
||||
|
||||
$delete = array_shift($token);
|
||||
$old = array_splice($this->tokens, $this->t, $delete, $token);
|
||||
|
||||
if ($injector > -1) {
|
||||
// determine appropriate skips
|
||||
$oldskip = isset($old[0]) ? $old[0]->skip : array();
|
||||
foreach ($token as $object) {
|
||||
$object->skip = $oldskip;
|
||||
$object->skip[$injector] = true;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Inserts a token before the current token. Cursor now points to this token
|
||||
*/
|
||||
private function insertBefore($token) {
|
||||
array_splice($this->tokens, $this->t, 0, array($token));
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes current token. Cursor now points to new token occupying previously
|
||||
* occupied space.
|
||||
*/
|
||||
private function remove() {
|
||||
array_splice($this->tokens, $this->t, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Swap current token with new token. Cursor points to new token (no change).
|
||||
*/
|
||||
private function swap($token) {
|
||||
$this->tokens[$this->t] = $token;
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -19,6 +19,9 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
|
||||
$escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
|
||||
$remove_invalid_img = $config->get('Core', 'RemoveInvalidImg');
|
||||
|
||||
// currently only used to determine if comments should be kept
|
||||
$trusted = $config->get('HTML', 'Trusted');
|
||||
|
||||
$remove_script_contents = $config->get('Core', 'RemoveScriptContents');
|
||||
$hidden_elements = $config->get('Core', 'HiddenElements');
|
||||
|
||||
@@ -125,6 +128,23 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
|
||||
if ($textify_comments !== false) {
|
||||
$data = $token->data;
|
||||
$token = new HTMLPurifier_Token_Text($data);
|
||||
} elseif ($trusted) {
|
||||
// keep, but perform comment cleaning
|
||||
if ($e) {
|
||||
// perform check whether or not there's a trailing hyphen
|
||||
if (substr($token->data, -1) == '-') {
|
||||
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed');
|
||||
}
|
||||
}
|
||||
$token->data = rtrim($token->data, '-');
|
||||
$found_double_hyphen = false;
|
||||
while (strpos($token->data, '--') !== false) {
|
||||
if ($e && !$found_double_hyphen) {
|
||||
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
|
||||
}
|
||||
$found_double_hyphen = true; // prevent double-erroring
|
||||
$token->data = str_replace('--', '-', $token->data);
|
||||
}
|
||||
} else {
|
||||
// strip comments
|
||||
if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
|
||||
|
@@ -78,6 +78,7 @@ class HTMLPurifier_StringHashParser
|
||||
if (strncmp('--', $line, 2) === 0) {
|
||||
// Multiline declaration
|
||||
$state = trim($line, '- ');
|
||||
if (!isset($ret[$state])) $ret[$state] = '';
|
||||
continue;
|
||||
} elseif (!$state) {
|
||||
$single = true;
|
||||
@@ -94,7 +95,6 @@ class HTMLPurifier_StringHashParser
|
||||
$single = false;
|
||||
$state = false;
|
||||
} else {
|
||||
if (!isset($ret[$state])) $ret[$state] = '';
|
||||
$ret[$state] .= "$line\n";
|
||||
}
|
||||
} while (!feof($fh));
|
||||
|
@@ -5,6 +5,7 @@
|
||||
*/
|
||||
class HTMLPurifier_Token {
|
||||
public $line; /**< Line number node was on in source document. Null if unknown. */
|
||||
public $col; /**< Column of line node was on in source document. Null if unknown. */
|
||||
|
||||
/**
|
||||
* Lookup array of processing that this token is exempt from.
|
||||
@@ -13,17 +14,41 @@ class HTMLPurifier_Token {
|
||||
*/
|
||||
public $armor = array();
|
||||
|
||||
/**
|
||||
* Used during MakeWellFormed.
|
||||
*/
|
||||
public $skip;
|
||||
public $rewind;
|
||||
|
||||
public function __get($n) {
|
||||
if ($n === 'type') {
|
||||
trigger_error('Deprecated type property called; use instanceof', E_USER_NOTICE);
|
||||
switch (get_class($this)) {
|
||||
case 'HTMLPurifier_Token_Start': return 'start';
|
||||
case 'HTMLPurifier_Token_Empty': return 'empty';
|
||||
case 'HTMLPurifier_Token_End': return 'end';
|
||||
case 'HTMLPurifier_Token_Text': return 'text';
|
||||
case 'HTMLPurifier_Token_Comment': return 'comment';
|
||||
case 'HTMLPurifier_Token_Start': return 'start';
|
||||
case 'HTMLPurifier_Token_Empty': return 'empty';
|
||||
case 'HTMLPurifier_Token_End': return 'end';
|
||||
case 'HTMLPurifier_Token_Text': return 'text';
|
||||
case 'HTMLPurifier_Token_Comment': return 'comment';
|
||||
default: return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the position of the token in the source document.
|
||||
*/
|
||||
public function position($l = null, $c = null) {
|
||||
$this->line = $l;
|
||||
$this->col = $c;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience function for DirectLex settings line/col position.
|
||||
*/
|
||||
public function rawPosition($l, $c) {
|
||||
if ($c === -1) $l++;
|
||||
$this->line = $l;
|
||||
$this->col = $c;
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -11,9 +11,10 @@ class HTMLPurifier_Token_Comment extends HTMLPurifier_Token
|
||||
*
|
||||
* @param $data String comment data.
|
||||
*/
|
||||
public function __construct($data, $line = null) {
|
||||
public function __construct($data, $line = null, $col = null) {
|
||||
$this->data = $data;
|
||||
$this->line = $line;
|
||||
$this->col = $col;
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -9,5 +9,9 @@
|
||||
*/
|
||||
class HTMLPurifier_Token_End extends HTMLPurifier_Token_Tag
|
||||
{
|
||||
|
||||
/**
|
||||
* Token that started this node. Added by MakeWellFormed. Please
|
||||
* do not edit this!
|
||||
*/
|
||||
public $start;
|
||||
}
|
||||
|
@@ -33,7 +33,7 @@ class HTMLPurifier_Token_Tag extends HTMLPurifier_Token
|
||||
* @param $name String name.
|
||||
* @param $attr Associative array of attributes.
|
||||
*/
|
||||
public function __construct($name, $attr = array(), $line = null) {
|
||||
public function __construct($name, $attr = array(), $line = null, $col = null) {
|
||||
$this->name = ctype_lower($name) ? $name : strtolower($name);
|
||||
foreach ($attr as $key => $value) {
|
||||
// normalization only necessary when key is not lowercase
|
||||
@@ -49,5 +49,6 @@ class HTMLPurifier_Token_Tag extends HTMLPurifier_Token
|
||||
}
|
||||
$this->attr = $attr;
|
||||
$this->line = $line;
|
||||
$this->col = $col;
|
||||
}
|
||||
}
|
||||
|
@@ -21,10 +21,11 @@ class HTMLPurifier_Token_Text extends HTMLPurifier_Token
|
||||
*
|
||||
* @param $data String parsed character data.
|
||||
*/
|
||||
public function __construct($data, $line = null) {
|
||||
public function __construct($data, $line = null, $col = null) {
|
||||
$this->data = $data;
|
||||
$this->is_whitespace = ctype_space($data);
|
||||
$this->line = $line;
|
||||
$this->col = $col;
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -5,7 +5,7 @@ class HTMLPurifier_URIFilter_DisableExternal extends HTMLPurifier_URIFilter
|
||||
public $name = 'DisableExternal';
|
||||
protected $ourHostParts = false;
|
||||
public function prepare($config) {
|
||||
$our_host = $config->get('URI', 'Host');
|
||||
$our_host = $config->getDefinition('URI')->host;
|
||||
if ($our_host !== null) $this->ourHostParts = array_reverse(explode('.', $our_host));
|
||||
}
|
||||
public function filter(&$uri, $config, $context) {
|
||||
|
@@ -51,12 +51,18 @@ class HTMLPurifier_URIFilter_MakeAbsolute extends HTMLPurifier_URIFilter
|
||||
}
|
||||
if ($uri->path === '') {
|
||||
$uri->path = $this->base->path;
|
||||
}elseif ($uri->path[0] !== '/') {
|
||||
} elseif ($uri->path[0] !== '/') {
|
||||
// relative path, needs more complicated processing
|
||||
$stack = explode('/', $uri->path);
|
||||
$new_stack = array_merge($this->basePathStack, $stack);
|
||||
if ($new_stack[0] !== '' && !is_null($this->base->host)) {
|
||||
array_unshift($new_stack, '');
|
||||
}
|
||||
$new_stack = $this->_collapseStack($new_stack);
|
||||
$uri->path = implode('/', $new_stack);
|
||||
} else {
|
||||
// absolute path, but still we should collapse
|
||||
$uri->path = implode('/', $this->_collapseStack(explode('/', $uri->path)));
|
||||
}
|
||||
// re-combine
|
||||
$uri->scheme = $this->base->scheme;
|
||||
@@ -71,6 +77,7 @@ class HTMLPurifier_URIFilter_MakeAbsolute extends HTMLPurifier_URIFilter
|
||||
*/
|
||||
private function _collapseStack($stack) {
|
||||
$result = array();
|
||||
$is_folder = false;
|
||||
for ($i = 0; isset($stack[$i]); $i++) {
|
||||
$is_folder = false;
|
||||
// absorb an internally duplicated slash
|
||||
|
@@ -28,7 +28,11 @@ class HTMLPurifier_URIFilter_Munge extends HTMLPurifier_URIFilter
|
||||
$this->replace = array_map('rawurlencode', $this->replace);
|
||||
|
||||
$new_uri = strtr($this->target, $this->replace);
|
||||
$uri = $this->parser->parse($new_uri); // overwrite
|
||||
$new_uri = $this->parser->parse($new_uri);
|
||||
// don't redirect if the target host is the same as the
|
||||
// starting host
|
||||
if ($uri->host === $new_uri->host) return true;
|
||||
$uri = $new_uri; // overwrite
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@@ -234,6 +234,18 @@ class HTMLPurifier_UnitConverter
|
||||
* Scales a float to $scale digits right of decimal point, like BCMath.
|
||||
*/
|
||||
private function scale($r, $scale) {
|
||||
if ($scale < 0) {
|
||||
// The f sprintf type doesn't support negative numbers, so we
|
||||
// need to cludge things manually. First get the string.
|
||||
$r = sprintf('%.0f', (float) $r);
|
||||
// Due to floating point precision loss, $r will more than likely
|
||||
// look something like 4652999999999.9234. We grab one more digit
|
||||
// than we need to precise from $r and then use that to round
|
||||
// appropriately.
|
||||
$precise = (string) round(substr($r, 0, strlen($r) + $scale), -1);
|
||||
// Now we return it, truncating the zero that was rounded off.
|
||||
return substr($precise, 0, -1) . str_repeat('0', -$scale + 1);
|
||||
}
|
||||
return sprintf('%.' . $scale . 'f', (float) $r);
|
||||
}
|
||||
|
||||
|
2
plugins/phorum/.gitignore
vendored
Normal file
2
plugins/phorum/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
migrate.php
|
||||
htmlpurifier/*
|
@@ -104,5 +104,5 @@ file_put_contents('library/HTMLPurifier/Config.php', $config_c);
|
||||
|
||||
passthru('php maintenance/flush.php');
|
||||
|
||||
if ($is_dev) echo "Review changes, write something in WHATSNEW and FOCUS, and then SVN commit with log 'Release $version.'" . PHP_EOL;
|
||||
if ($is_dev) echo "Review changes, write something in WHATSNEW and FOCUS, and then commit with log 'Release $version.'" . PHP_EOL;
|
||||
else echo "Numbers updated to dev, no other modifications necessary!";
|
||||
|
39
tests/HTMLPurifier/AttrTransform/BackgroundTest.php
Normal file
39
tests/HTMLPurifier/AttrTransform/BackgroundTest.php
Normal file
@@ -0,0 +1,39 @@
|
||||
<?php
|
||||
|
||||
class HTMLPurifier_AttrTransform_BackgroundTest extends HTMLPurifier_AttrTransformHarness
|
||||
{
|
||||
|
||||
function setUp() {
|
||||
parent::setUp();
|
||||
$this->obj = new HTMLPurifier_AttrTransform_Background();
|
||||
}
|
||||
|
||||
function testEmptyInput() {
|
||||
$this->assertResult( array() );
|
||||
}
|
||||
|
||||
function testBasicTransform() {
|
||||
$this->assertResult(
|
||||
array('background' => 'logo.png'),
|
||||
array('style' => 'background-image:url(logo.png);')
|
||||
);
|
||||
}
|
||||
|
||||
function testPrependNewCSS() {
|
||||
$this->assertResult(
|
||||
array('background' => 'logo.png', 'style' => 'font-weight:bold'),
|
||||
array('style' => 'background-image:url(logo.png);font-weight:bold')
|
||||
);
|
||||
}
|
||||
|
||||
function testLenientTreatmentOfInvalidInput() {
|
||||
// notice that we rely on the CSS validator later to fix this invalid
|
||||
// stuff
|
||||
$this->assertResult(
|
||||
array('background' => 'logo.png);foo:('),
|
||||
array('style' => 'background-image:url(logo.png);foo:();')
|
||||
);
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -19,6 +19,7 @@ class HTMLPurifier_AttrTransform_ImgRequiredTest extends HTMLPurifier_AttrTransf
|
||||
function testAlternateDefaults() {
|
||||
$this->config->set('Attr', 'DefaultInvalidImage', 'blank.png');
|
||||
$this->config->set('Attr', 'DefaultInvalidImageAlt', 'Pawned!');
|
||||
$this->config->set('Attr', 'DefaultImageAlt', 'not pawned');
|
||||
$this->config->set('Core', 'RemoveInvalidImg', false);
|
||||
$this->assertResult(
|
||||
array(),
|
||||
@@ -41,5 +42,13 @@ class HTMLPurifier_AttrTransform_ImgRequiredTest extends HTMLPurifier_AttrTransf
|
||||
);
|
||||
}
|
||||
|
||||
function testAddDefaultAlt() {
|
||||
$this->config->set('Attr', 'DefaultImageAlt', 'default');
|
||||
$this->assertResult(
|
||||
array('src' => ''),
|
||||
array('src' => '', 'alt' => 'default')
|
||||
);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
93
tests/HTMLPurifier/AttrTransform/InputTest.php
Normal file
93
tests/HTMLPurifier/AttrTransform/InputTest.php
Normal file
@@ -0,0 +1,93 @@
|
||||
<?php
|
||||
|
||||
class HTMLPurifier_AttrTransform_InputTest extends HTMLPurifier_AttrTransformHarness
|
||||
{
|
||||
|
||||
function setUp() {
|
||||
parent::setUp();
|
||||
$this->obj = new HTMLPurifier_AttrTransform_Input();
|
||||
}
|
||||
|
||||
function testEmptyInput() {
|
||||
$this->assertResult(array());
|
||||
}
|
||||
|
||||
function testInvalidCheckedWithEmpty() {
|
||||
$this->assertResult(array('checked' => 'checked'), array());
|
||||
}
|
||||
|
||||
function testInvalidCheckedWithPassword() {
|
||||
$this->assertResult(array(
|
||||
'checked' => 'checked',
|
||||
'type' => 'password'
|
||||
), array(
|
||||
'type' => 'password'
|
||||
));
|
||||
}
|
||||
|
||||
function testValidCheckedWithUcCheckbox() {
|
||||
$this->assertResult(array(
|
||||
'checked' => 'checked',
|
||||
'type' => 'CHECKBOX',
|
||||
'value' => 'bar',
|
||||
));
|
||||
}
|
||||
|
||||
function testInvalidMaxlength() {
|
||||
$this->assertResult(array(
|
||||
'maxlength' => '10',
|
||||
'type' => 'checkbox',
|
||||
'value' => 'foo',
|
||||
), array(
|
||||
'type' => 'checkbox',
|
||||
'value' => 'foo',
|
||||
));
|
||||
}
|
||||
|
||||
function testValidMaxLength() {
|
||||
$this->assertResult(array(
|
||||
'maxlength' => '10',
|
||||
));
|
||||
}
|
||||
|
||||
// these two are really bad test-cases
|
||||
|
||||
function testSizeWithCheckbox() {
|
||||
$this->assertResult(array(
|
||||
'type' => 'checkbox',
|
||||
'value' => 'foo',
|
||||
'size' => '100px',
|
||||
), array(
|
||||
'type' => 'checkbox',
|
||||
'value' => 'foo',
|
||||
'size' => '100',
|
||||
));
|
||||
}
|
||||
|
||||
function testSizeWithText() {
|
||||
$this->assertResult(array(
|
||||
'type' => 'password',
|
||||
'size' => '100px', // spurious value, to indicate no validation takes place
|
||||
), array(
|
||||
'type' => 'password',
|
||||
'size' => '100px',
|
||||
));
|
||||
}
|
||||
|
||||
function testInvalidSrc() {
|
||||
$this->assertResult(array(
|
||||
'src' => 'img.png',
|
||||
), array());
|
||||
}
|
||||
|
||||
function testMissingValue() {
|
||||
$this->assertResult(array(
|
||||
'type' => 'checkbox',
|
||||
), array(
|
||||
'type' => 'checkbox',
|
||||
'value' => '',
|
||||
));
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -3,6 +3,15 @@
|
||||
class HTMLPurifier_AttrValidator_ErrorsTest extends HTMLPurifier_ErrorsHarness
|
||||
{
|
||||
|
||||
public function setup() {
|
||||
parent::setup();
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$this->language = HTMLPurifier_LanguageFactory::instance()->create($config, $this->context);
|
||||
$this->context->register('Locale', $this->language);
|
||||
$this->collector = new HTMLPurifier_ErrorCollector($this->context);
|
||||
$this->context->register('Generator', new HTMLPurifier_Generator($config, $this->context));
|
||||
}
|
||||
|
||||
protected function invoke($input) {
|
||||
$validator = new HTMLPurifier_AttrValidator();
|
||||
$validator->validateToken($input, $this->config, $this->context);
|
||||
@@ -18,28 +27,40 @@ class HTMLPurifier_AttrValidator_ErrorsTest extends HTMLPurifier_ErrorsHarness
|
||||
$output = array('class' => 'value'); // must be valid
|
||||
$transform->setReturnValue('transform', $output, array($input, new AnythingExpectation(), new AnythingExpectation()));
|
||||
$def->info_attr_transform_pre[] = $transform;
|
||||
$this->expectErrorCollection(E_NOTICE, 'AttrValidator: Attributes transformed', $input, $output);
|
||||
|
||||
$token = new HTMLPurifier_Token_Start('span', $input, 1);
|
||||
$this->invoke($token);
|
||||
|
||||
$result = $this->collector->getRaw();
|
||||
$expect = array(
|
||||
array(1, E_NOTICE, 'Attributes on <span> transformed from original to class', array()),
|
||||
);
|
||||
$this->assertIdentical($result, $expect);
|
||||
}
|
||||
|
||||
function testAttributesTransformedLocalPre() {
|
||||
$this->config->set('HTML', 'TidyLevel', 'heavy');
|
||||
$input = array('align' => 'right');
|
||||
$output = array('style' => 'text-align:right;');
|
||||
$this->expectErrorCollection(E_NOTICE, 'AttrValidator: Attributes transformed', $input, $output);
|
||||
$token = new HTMLPurifier_Token_Start('p', $input, 1);
|
||||
$this->invoke($token);
|
||||
$result = $this->collector->getRaw();
|
||||
$expect = array(
|
||||
array(1, E_NOTICE, 'Attributes on <p> transformed from align to style', array()),
|
||||
);
|
||||
$this->assertIdentical($result, $expect);
|
||||
}
|
||||
|
||||
// too lazy to check for global post and global pre
|
||||
|
||||
function testAttributeRemoved() {
|
||||
$this->expectErrorCollection(E_ERROR, 'AttrValidator: Attribute removed');
|
||||
$this->expectContext('CurrentAttr', 'foobar');
|
||||
$token = new HTMLPurifier_Token_Start('p', array('foobar' => 'right'), 1);
|
||||
$this->expectContext('CurrentToken', $token);
|
||||
$this->invoke($token);
|
||||
$result = $this->collector->getRaw();
|
||||
$expect = array(
|
||||
array(1, E_ERROR, 'foobar attribute on <p> removed', array()),
|
||||
);
|
||||
$this->assertIdentical($result, $expect);
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -69,5 +69,20 @@ class HTMLPurifier_ChildDef_CustomTest extends HTMLPurifier_ChildDefHarness
|
||||
|
||||
}
|
||||
|
||||
function testPcdata() {
|
||||
$this->obj = new HTMLPurifier_ChildDef_Custom('#PCDATA,a');
|
||||
$this->assertEqual($this->obj->elements, array('#PCDATA' => true, 'a' => true));
|
||||
$this->assertResult('foo<a />');
|
||||
$this->assertResult('<a />', false);
|
||||
}
|
||||
|
||||
function testWhitespace() {
|
||||
$this->obj = new HTMLPurifier_ChildDef_Custom('a');
|
||||
$this->assertEqual($this->obj->elements, array('a' => true));
|
||||
$this->assertResult('foo<a />', false);
|
||||
$this->assertResult('<a />');
|
||||
$this->assertResult(' <a />');
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@@ -67,8 +67,6 @@ class HTMLPurifier_ChildDef_RequiredTest extends HTMLPurifier_ChildDefHarness
|
||||
'Out <b>Bold text</b><img />',
|
||||
'Out <b>Bold text</b><img />'
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@@ -35,16 +35,6 @@ class HTMLPurifier_ComplexHarness extends HTMLPurifier_Harness
|
||||
*/
|
||||
protected $lexer;
|
||||
|
||||
/**
|
||||
* Default config to fall back on if no config is available
|
||||
*/
|
||||
protected $config;
|
||||
|
||||
/**
|
||||
* Default context to fall back on if no context is available
|
||||
*/
|
||||
protected $context;
|
||||
|
||||
public function __construct() {
|
||||
$this->lexer = new HTMLPurifier_Lexer_DirectLex();
|
||||
parent::__construct();
|
||||
@@ -88,9 +78,12 @@ class HTMLPurifier_ComplexHarness extends HTMLPurifier_Harness
|
||||
$expect = $this->generate($expect);
|
||||
}
|
||||
}
|
||||
|
||||
$this->assertIdentical($expect, $result);
|
||||
|
||||
if ($expect !== $result) {
|
||||
echo '<pre>' . htmlspecialchars($result) . '</pre>';
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
|
@@ -38,6 +38,15 @@ class HTMLPurifier_EncoderTest extends HTMLPurifier_Harness
|
||||
);
|
||||
}
|
||||
|
||||
function test_convertToUTF8_spuriousEncoding() {
|
||||
$this->config->set('Core', 'Encoding', 'utf99');
|
||||
$this->expectError('Invalid encoding utf99');
|
||||
$this->assertIdentical(
|
||||
HTMLPurifier_Encoder::convertToUTF8("\xF6", $this->config, $this->context),
|
||||
''
|
||||
);
|
||||
}
|
||||
|
||||
function test_convertToUTF8_iso8859_1() {
|
||||
$this->config->set('Core', 'Encoding', 'ISO-8859-1');
|
||||
$this->assertIdentical(
|
||||
|
@@ -1,134 +1,156 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* @warning HTML output is in flux, but eventually needs to be stabilized.
|
||||
*/
|
||||
class HTMLPurifier_ErrorCollectorTest extends HTMLPurifier_Harness
|
||||
{
|
||||
|
||||
protected $language, $generator, $line;
|
||||
protected $collector;
|
||||
|
||||
public function setup() {
|
||||
generate_mock_once('HTMLPurifier_Language');
|
||||
generate_mock_once('HTMLPurifier_Generator');
|
||||
parent::setup();
|
||||
$this->language = new HTMLPurifier_LanguageMock();
|
||||
$this->language->setReturnValue('getErrorName', 'Error', array(E_ERROR));
|
||||
$this->language->setReturnValue('getErrorName', 'Warning', array(E_WARNING));
|
||||
$this->language->setReturnValue('getErrorName', 'Notice', array(E_NOTICE));
|
||||
// this might prove to be troublesome if we need to set config
|
||||
$this->generator = new HTMLPurifier_Generator($this->config, $this->context);
|
||||
$this->line = false;
|
||||
$this->context->register('Locale', $this->language);
|
||||
$this->context->register('CurrentLine', $this->line);
|
||||
$this->context->register('Generator', $this->generator);
|
||||
$this->collector = new HTMLPurifier_ErrorCollector($this->context);
|
||||
}
|
||||
|
||||
function test() {
|
||||
|
||||
$language = new HTMLPurifier_LanguageMock();
|
||||
$language->setReturnValue('getErrorName', 'Error', array(E_ERROR));
|
||||
$language->setReturnValue('getErrorName', 'Warning', array(E_WARNING));
|
||||
$language->setReturnValue('getMessage', 'Message 1', array('message-1'));
|
||||
$language->setReturnValue('formatMessage', 'Message 2', array('message-2', array(1 => 'param')));
|
||||
$language = $this->language;
|
||||
$language->setReturnValue('getMessage', 'Message 1', array('message-1'));
|
||||
$language->setReturnValue('formatMessage', 'Message 2', array('message-2', array(1 => 'param')));
|
||||
$language->setReturnValue('formatMessage', ' at line 23', array('ErrorCollector: At line', array('line' => 23)));
|
||||
$language->setReturnValue('formatMessage', ' at line 3', array('ErrorCollector: At line', array('line' => 3)));
|
||||
$language->setReturnValue('formatMessage', ' at line 3', array('ErrorCollector: At line', array('line' => 3)));
|
||||
|
||||
$line = false;
|
||||
$this->line = 23;
|
||||
$this->collector->send(E_ERROR, 'message-1');
|
||||
|
||||
$this->context->register('Locale', $language);
|
||||
$this->context->register('CurrentLine', $line);
|
||||
|
||||
$generator = new HTMLPurifier_Generator($this->config, $this->context);
|
||||
$this->context->register('Generator', $generator);
|
||||
|
||||
$collector = new HTMLPurifier_ErrorCollector($this->context);
|
||||
|
||||
$line = 23;
|
||||
$collector->send(E_ERROR, 'message-1');
|
||||
|
||||
$line = 3;
|
||||
$collector->send(E_WARNING, 'message-2', 'param');
|
||||
$this->line = 3;
|
||||
$this->collector->send(E_WARNING, 'message-2', 'param');
|
||||
|
||||
$result = array(
|
||||
0 => array(23, E_ERROR, 'Message 1'),
|
||||
1 => array(3, E_WARNING, 'Message 2')
|
||||
0 => array(23, E_ERROR, 'Message 1', array()),
|
||||
1 => array(3, E_WARNING, 'Message 2', array())
|
||||
);
|
||||
|
||||
$this->assertIdentical($collector->getRaw(), $result);
|
||||
$this->assertIdentical($this->collector->getRaw(), $result);
|
||||
|
||||
/*
|
||||
$formatted_result =
|
||||
'<ul><li><strong>Warning</strong>: Message 2 at line 3</li>'.
|
||||
'<li><strong>Error</strong>: Message 1 at line 23</li></ul>';
|
||||
|
||||
$config = HTMLPurifier_Config::create(array('Core.MaintainLineNumbers' => true));
|
||||
|
||||
$this->assertIdentical($collector->getHTMLFormatted($this->config), $formatted_result);
|
||||
$this->assertIdentical($this->collector->getHTMLFormatted($this->config), $formatted_result);
|
||||
*/
|
||||
|
||||
}
|
||||
|
||||
function testNoErrors() {
|
||||
$language = new HTMLPurifier_LanguageMock();
|
||||
$language->setReturnValue('getMessage', 'No errors', array('ErrorCollector: No errors'));
|
||||
$this->context->register('Locale', $language);
|
||||
$this->language->setReturnValue('getMessage', 'No errors', array('ErrorCollector: No errors'));
|
||||
|
||||
$generator = new HTMLPurifier_Generator($this->config, $this->context);
|
||||
$this->context->register('Generator', $generator);
|
||||
|
||||
$collector = new HTMLPurifier_ErrorCollector($this->context);
|
||||
$formatted_result = '<p>No errors</p>';
|
||||
$this->assertIdentical($collector->getHTMLFormatted($this->config), $formatted_result);
|
||||
$this->assertIdentical(
|
||||
$this->collector->getHTMLFormatted($this->config),
|
||||
$formatted_result
|
||||
);
|
||||
}
|
||||
|
||||
function testNoLineNumbers() {
|
||||
$language = new HTMLPurifier_LanguageMock();
|
||||
$language->setReturnValue('getMessage', 'Message 1', array('message-1'));
|
||||
$language->setReturnValue('getMessage', 'Message 2', array('message-2'));
|
||||
$language->setReturnValue('getErrorName', 'Error', array(E_ERROR));
|
||||
$this->context->register('Locale', $language);
|
||||
$this->language->setReturnValue('getMessage', 'Message 1', array('message-1'));
|
||||
$this->language->setReturnValue('getMessage', 'Message 2', array('message-2'));
|
||||
|
||||
$generator = new HTMLPurifier_Generator($this->config, $this->context);
|
||||
$this->context->register('Generator', $generator);
|
||||
|
||||
$collector = new HTMLPurifier_ErrorCollector($this->context);
|
||||
$collector->send(E_ERROR, 'message-1');
|
||||
$collector->send(E_ERROR, 'message-2');
|
||||
$this->collector->send(E_ERROR, 'message-1');
|
||||
$this->collector->send(E_ERROR, 'message-2');
|
||||
|
||||
$result = array(
|
||||
0 => array(null, E_ERROR, 'Message 1'),
|
||||
1 => array(null, E_ERROR, 'Message 2')
|
||||
0 => array(false, E_ERROR, 'Message 1', array()),
|
||||
1 => array(false, E_ERROR, 'Message 2', array())
|
||||
);
|
||||
$this->assertIdentical($collector->getRaw(), $result);
|
||||
$this->assertIdentical($this->collector->getRaw(), $result);
|
||||
|
||||
/*
|
||||
$formatted_result =
|
||||
'<ul><li><strong>Error</strong>: Message 1</li>'.
|
||||
'<li><strong>Error</strong>: Message 2</li></ul>';
|
||||
$this->assertIdentical($collector->getHTMLFormatted($this->config), $formatted_result);
|
||||
$this->assertIdentical($this->collector->getHTMLFormatted($this->config), $formatted_result);
|
||||
*/
|
||||
}
|
||||
|
||||
function testContextSubstitutions() {
|
||||
|
||||
$language = new HTMLPurifier_LanguageMock();
|
||||
$this->context->register('Locale', $language);
|
||||
|
||||
$generator = new HTMLPurifier_Generator($this->config, $this->context);
|
||||
$this->context->register('Generator', $generator);
|
||||
|
||||
$current_token = false;
|
||||
$this->context->register('CurrentToken', $current_token);
|
||||
|
||||
$collector = new HTMLPurifier_ErrorCollector($this->context);
|
||||
|
||||
// 0
|
||||
$current_token = new HTMLPurifier_Token_Start('a', array('href' => 'http://example.com'), 32);
|
||||
$language->setReturnValue('formatMessage', 'Token message',
|
||||
$this->language->setReturnValue('formatMessage', 'Token message',
|
||||
array('message-data-token', array('CurrentToken' => $current_token)));
|
||||
$collector->send(E_NOTICE, 'message-data-token');
|
||||
$this->collector->send(E_NOTICE, 'message-data-token');
|
||||
|
||||
$current_attr = 'href';
|
||||
$language->setReturnValue('formatMessage', '$CurrentAttr.Name => $CurrentAttr.Value',
|
||||
$this->language->setReturnValue('formatMessage', '$CurrentAttr.Name => $CurrentAttr.Value',
|
||||
array('message-attr', array('CurrentToken' => $current_token)));
|
||||
|
||||
// 1
|
||||
$collector->send(E_NOTICE, 'message-attr'); // test when context isn't available
|
||||
$this->collector->send(E_NOTICE, 'message-attr'); // test when context isn't available
|
||||
|
||||
// 2
|
||||
$this->context->register('CurrentAttr', $current_attr);
|
||||
$collector->send(E_NOTICE, 'message-attr');
|
||||
$this->collector->send(E_NOTICE, 'message-attr');
|
||||
|
||||
$result = array(
|
||||
0 => array(32, E_NOTICE, 'Token message'),
|
||||
1 => array(32, E_NOTICE, '$CurrentAttr.Name => $CurrentAttr.Value'),
|
||||
2 => array(32, E_NOTICE, 'href => http://example.com')
|
||||
0 => array(32, E_NOTICE, 'Token message', array()),
|
||||
1 => array(32, E_NOTICE, '$CurrentAttr.Name => $CurrentAttr.Value', array()),
|
||||
2 => array(32, E_NOTICE, 'href => http://example.com', array())
|
||||
);
|
||||
$this->assertIdentical($collector->getRaw(), $result);
|
||||
$this->assertIdentical($this->collector->getRaw(), $result);
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
function testNestedErrors() {
|
||||
$this->language->setReturnValue('getMessage', 'Message 1', array('message-1'));
|
||||
$this->language->setReturnValue('getMessage', 'Message 2', array('message-2'));
|
||||
$this->language->setReturnValue('formatMessage', 'End Message', array('end-message', array(1 => 'param')));
|
||||
$this->language->setReturnValue('formatMessage', ' at line 4', array('ErrorCollector: At line', array('line' => 4)));
|
||||
|
||||
$this->line = 4;
|
||||
$this->collector->start();
|
||||
$this->collector->send(E_WARNING, 'message-1');
|
||||
$this->collector->send(E_NOTICE, 'message-2');
|
||||
$this->collector->end(E_NOTICE, 'end-message', 'param');
|
||||
|
||||
$expect = array(
|
||||
0 => array(4, E_NOTICE, 'End Message', array(
|
||||
0 => array(4, E_WARNING, 'Message 1', array()),
|
||||
1 => array(4, E_NOTICE, 'Message 2', array()),
|
||||
)),
|
||||
);
|
||||
$result = $this->collector->getRaw();
|
||||
$this->assertIdentical($result, $expect);
|
||||
|
||||
$formatted_expect =
|
||||
'<ul><li><strong>Notice</strong>: End Message at line 4<ul>'.
|
||||
'<li><strong>Warning</strong>: Message 1 at line 4</li>'.
|
||||
'<li><strong>Notice</strong>: Message 2 at line 4</li></ul>'.
|
||||
'</li></ul>';
|
||||
$formatted_result = $this->collector->getHTMLFormatted($this->config);
|
||||
$this->assertIdentical($formatted_result, $formatted_expect);
|
||||
|
||||
}
|
||||
*/
|
||||
|
||||
}
|
||||
|
||||
|
137
tests/HTMLPurifier/HTMLModule/FormsTest.php
Normal file
137
tests/HTMLPurifier/HTMLModule/FormsTest.php
Normal file
@@ -0,0 +1,137 @@
|
||||
<?php
|
||||
|
||||
class HTMLPurifier_HTMLModule_FormsTest extends HTMLPurifier_HTMLModuleHarness
|
||||
{
|
||||
|
||||
function setUp() {
|
||||
parent::setUp();
|
||||
$this->config->set('HTML', 'Trusted', true);
|
||||
$this->config->set('Attr', 'EnableID', true);
|
||||
$this->config->set('Cache', 'DefinitionImpl', null);
|
||||
}
|
||||
|
||||
function testBasicUse() {
|
||||
$this->assertResult( // need support for label for later
|
||||
'
|
||||
<form action="http://somesite.com/prog/adduser" method="post">
|
||||
<p>
|
||||
<label>First name: </label>
|
||||
<input type="text" id="firstname" /><br />
|
||||
<label>Last name: </label>
|
||||
<input type="text" id="lastname" /><br />
|
||||
<label>email: </label>
|
||||
<input type="text" id="email" /><br />
|
||||
<input type="radio" name="sex" value="Male" /> Male<br />
|
||||
<input type="radio" name="sex" value="Female" /> Female<br />
|
||||
<input type="submit" value="Send" /> <input type="reset" />
|
||||
</p>
|
||||
</form>'
|
||||
);
|
||||
}
|
||||
|
||||
function testSelectOption() {
|
||||
$this->assertResult('
|
||||
<form action="http://somesite.com/prog/component-select" method="post">
|
||||
<p>
|
||||
<select multiple="multiple" size="4" name="component-select">
|
||||
<option selected="selected" value="Component_1_a">Component_1</option>
|
||||
<option selected="selected" value="Component_1_b">Component_2</option>
|
||||
<option>Component_3</option>
|
||||
<option>Component_4</option>
|
||||
<option>Component_5</option>
|
||||
<option>Component_6</option>
|
||||
<option>Component_7</option>
|
||||
</select>
|
||||
<input type="submit" value="Send" /><input type="reset" />
|
||||
</p>
|
||||
</form>
|
||||
');
|
||||
}
|
||||
|
||||
function testSelectOptgroup() {
|
||||
$this->assertResult('
|
||||
<form action="http://somesite.com/prog/someprog" method="post">
|
||||
<p>
|
||||
<select name="ComOS">
|
||||
<option selected="selected" label="none" value="none">None</option>
|
||||
<optgroup label="PortMaster 3">
|
||||
<option label="3.7.1" value="pm3_3.7.1">PortMaster 3 with ComOS 3.7.1</option>
|
||||
<option label="3.7" value="pm3_3.7">PortMaster 3 with ComOS 3.7</option>
|
||||
<option label="3.5" value="pm3_3.5">PortMaster 3 with ComOS 3.5</option>
|
||||
</optgroup>
|
||||
<optgroup label="PortMaster 2">
|
||||
<option label="3.7" value="pm2_3.7">PortMaster 2 with ComOS 3.7</option>
|
||||
<option label="3.5" value="pm2_3.5">PortMaster 2 with ComOS 3.5</option>
|
||||
</optgroup>
|
||||
<optgroup label="IRX">
|
||||
<option label="3.7R" value="IRX_3.7R">IRX with ComOS 3.7R</option>
|
||||
<option label="3.5R" value="IRX_3.5R">IRX with ComOS 3.5R</option>
|
||||
</optgroup>
|
||||
</select>
|
||||
</p>
|
||||
</form>
|
||||
');
|
||||
}
|
||||
|
||||
function testTextarea() {
|
||||
$this->assertResult('
|
||||
<form action="http://somesite.com/prog/text-read" method="post">
|
||||
<p>
|
||||
<textarea name="thetext" rows="20" cols="80">
|
||||
First line of initial text.
|
||||
Second line of initial text.
|
||||
</textarea>
|
||||
<input type="submit" value="Send" /><input type="reset" />
|
||||
</p>
|
||||
</form>
|
||||
');
|
||||
}
|
||||
|
||||
// label tests omitted
|
||||
|
||||
function testFieldset() {
|
||||
$this->assertResult('
|
||||
<form action="..." method="post">
|
||||
<fieldset>
|
||||
<legend>Personal Information</legend>
|
||||
Last Name: <input name="personal_lastname" type="text" tabindex="1" />
|
||||
First Name: <input name="personal_firstname" type="text" tabindex="2" />
|
||||
Address: <input name="personal_address" type="text" tabindex="3" />
|
||||
...more personal information...
|
||||
</fieldset>
|
||||
<fieldset>
|
||||
<legend>Medical History</legend>
|
||||
<input name="history_illness" type="checkbox" value="Smallpox" tabindex="20" />Smallpox
|
||||
<input name="history_illness" type="checkbox" value="Mumps" tabindex="21" /> Mumps
|
||||
<input name="history_illness" type="checkbox" value="Dizziness" tabindex="22" /> Dizziness
|
||||
<input name="history_illness" type="checkbox" value="Sneezing" tabindex="23" /> Sneezing
|
||||
...more medical history...
|
||||
</fieldset>
|
||||
<fieldset>
|
||||
<legend>Current Medication</legend>
|
||||
Are you currently taking any medication?
|
||||
<input name="medication_now" type="radio" value="Yes" tabindex="35" />Yes
|
||||
<input name="medication_now" type="radio" value="No" tabindex="35" />No
|
||||
|
||||
If you are currently taking medication, please indicate
|
||||
it in the space below:
|
||||
<textarea name="current_medication" rows="20" cols="50" tabindex="40"></textarea>
|
||||
</fieldset>
|
||||
</form>
|
||||
');
|
||||
}
|
||||
|
||||
function testInputTransform() {
|
||||
$this->assertResult('<input type="checkbox" />', '<input type="checkbox" value="" />');
|
||||
}
|
||||
|
||||
function testTextareaTransform() {
|
||||
$this->assertResult('<textarea></textarea>', '<textarea cols="22" rows="3"></textarea>');
|
||||
}
|
||||
|
||||
function testTextInFieldset() {
|
||||
$this->assertResult('<fieldset> <legend></legend>foo</fieldset>');
|
||||
}
|
||||
|
||||
}
|
||||
|
31
tests/HTMLPurifier/HTMLT.php
Normal file
31
tests/HTMLPurifier/HTMLT.php
Normal file
@@ -0,0 +1,31 @@
|
||||
<?php
|
||||
|
||||
class HTMLPurifier_HTMLT extends HTMLPurifier_Harness
|
||||
{
|
||||
protected $path;
|
||||
|
||||
public function __construct($path) {
|
||||
$this->path = $path;
|
||||
parent::__construct($path);
|
||||
}
|
||||
|
||||
public function testHtmlt() {
|
||||
$parser = new HTMLPurifier_StringHashParser();
|
||||
$hash = $parser->parseFile($this->path); // assume parser normalizes to "\n"
|
||||
if (isset($hash['SKIPIF'])) {
|
||||
if (eval($hash['SKIPIF'])) return;
|
||||
}
|
||||
$this->config->set('Output', 'Newline', "\n");
|
||||
if (isset($hash['INI'])) {
|
||||
// there should be a more efficient way than writing another
|
||||
// ini file every time... probably means building a parser for
|
||||
// ini (check out the yaml implementation we saw somewhere else)
|
||||
$ini_file = $this->path . '.ini';
|
||||
file_put_contents($ini_file, $hash['INI']);
|
||||
$this->config->loadIni($ini_file);
|
||||
}
|
||||
$expect = isset($hash['EXPECT']) ? $hash['EXPECT'] : $hash['HTML'];
|
||||
$this->assertPurification(rtrim($hash['HTML']), rtrim($expect));
|
||||
if (isset($hash['INI'])) unlink($ini_file);
|
||||
}
|
||||
}
|
7
tests/HTMLPurifier/HTMLT/allowed-preserve.htmlt
Normal file
7
tests/HTMLPurifier/HTMLT/allowed-preserve.htmlt
Normal file
@@ -0,0 +1,7 @@
|
||||
--INI--
|
||||
HTML.AllowedElements = b,i,p,a
|
||||
HTML.AllowedAttributes = a.href,*.id
|
||||
--HTML--
|
||||
<p>Par.</p>
|
||||
<p>Para<a href="http://google.com/">gr</a>aph</p>
|
||||
Text<b>Bol<i>d</i></b>
|
7
tests/HTMLPurifier/HTMLT/allowed-remove.htmlt
Normal file
7
tests/HTMLPurifier/HTMLT/allowed-remove.htmlt
Normal file
@@ -0,0 +1,7 @@
|
||||
--INI--
|
||||
HTML.AllowedElements = b,i,p,a
|
||||
HTML.AllowedAttributes = a.href,*.id
|
||||
--HTML--
|
||||
<span>Not allowed</span><a class="mef" id="foobar">Remove id too!</a>
|
||||
--EXPECT--
|
||||
Not allowed<a>Remove id too!</a>
|
4
tests/HTMLPurifier/HTMLT/basic.htmlt
Normal file
4
tests/HTMLPurifier/HTMLT/basic.htmlt
Normal file
@@ -0,0 +1,4 @@
|
||||
--HTML--
|
||||
<b>basic</b>
|
||||
--EXPECT--
|
||||
<b>basic</b>
|
5
tests/HTMLPurifier/HTMLT/blacklist-preserve.htmlt
Normal file
5
tests/HTMLPurifier/HTMLT/blacklist-preserve.htmlt
Normal file
@@ -0,0 +1,5 @@
|
||||
--INI--
|
||||
HTML.ForbiddenElements = b
|
||||
HTML.ForbiddenAttributes = a@href
|
||||
--HTML--
|
||||
<p>foo</p>
|
7
tests/HTMLPurifier/HTMLT/blacklist-remove.htmlt
Normal file
7
tests/HTMLPurifier/HTMLT/blacklist-remove.htmlt
Normal file
@@ -0,0 +1,7 @@
|
||||
--INI--
|
||||
HTML.ForbiddenElements = b
|
||||
HTML.ForbiddenAttributes = a@href
|
||||
--HTML--
|
||||
<b>Foo<a href="bar">bar</a></b>
|
||||
--EXPECT--
|
||||
Foo<a>bar</a>
|
4
tests/HTMLPurifier/HTMLT/css-allowed-preserve.htmlt
Normal file
4
tests/HTMLPurifier/HTMLT/css-allowed-preserve.htmlt
Normal file
@@ -0,0 +1,4 @@
|
||||
--INI--
|
||||
CSS.AllowedProperties = color,background-color
|
||||
--HTML--
|
||||
<div style="color:#f00;background-color:#ded;">red</div>
|
6
tests/HTMLPurifier/HTMLT/css-allowed-remove.htmlt
Normal file
6
tests/HTMLPurifier/HTMLT/css-allowed-remove.htmlt
Normal file
@@ -0,0 +1,6 @@
|
||||
--INI--
|
||||
CSS.AllowedProperties = color,background-color
|
||||
--HTML--
|
||||
<div style="color:#f00;border:1px solid #000">red</div>
|
||||
--EXPECT--
|
||||
<div style="color:#f00;">red</div>
|
5
tests/HTMLPurifier/HTMLT/disable-uri.htmlt
Normal file
5
tests/HTMLPurifier/HTMLT/disable-uri.htmlt
Normal file
@@ -0,0 +1,5 @@
|
||||
--INI--
|
||||
URI.Disable = true
|
||||
--HTML--
|
||||
<img src="foobar" />
|
||||
--EXPECT--
|
6
tests/HTMLPurifier/HTMLT/empty.htmlt
Normal file
6
tests/HTMLPurifier/HTMLT/empty.htmlt
Normal file
@@ -0,0 +1,6 @@
|
||||
--INI--
|
||||
|
||||
--HTML--
|
||||
|
||||
--EXPECT--
|
||||
|
4
tests/HTMLPurifier/HTMLT/id-default.htmlt
Normal file
4
tests/HTMLPurifier/HTMLT/id-default.htmlt
Normal file
@@ -0,0 +1,4 @@
|
||||
--HTML--
|
||||
<span id="moon">foobar</span>
|
||||
--EXPECT--
|
||||
<span>foobar</span>
|
5
tests/HTMLPurifier/HTMLT/id-enabled.htmlt
Normal file
5
tests/HTMLPurifier/HTMLT/id-enabled.htmlt
Normal file
@@ -0,0 +1,5 @@
|
||||
--INI--
|
||||
Attr.EnableID = true
|
||||
--HTML--
|
||||
<span id="moon">foobar</span>
|
||||
<img id="folly" src="folly.png" alt="Omigosh!" />
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user