1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-08-04 05:07:55 +02:00

Scratch PI or JASP from spec. Add extra comments about MarkupFragment's behavior to incoming data. More Todos for our home-brew lexer, and also fix empty tag support in the SAX parer adapter. Add test framework for marking SAX to be tested with the regular result, or a specialized result.

git-svn-id: http://htmlpurifier.org/svnroot/html_purifier/trunk@22 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang
2006-04-15 02:19:27 +00:00
parent 181d544b61
commit dad395ac45
4 changed files with 77 additions and 29 deletions

View File

@@ -1,11 +1,14 @@
<?php
/*
Lexes SGML style documents, aka HTML, XML, XHMTML, you name it.
Forgivingly lexes SGML style documents, aka HTML, XML, XHMTML, you name it.
TODO:
* Validate element names and attributes for correct composition
* Reread the XML spec and make sure I got everything right
* Add support for CDATA sections
* Have comments output with the leading and trailing --s
* Optimize and benchmark
* Check MF_Text behavior: shouldn't the info in there be raw (entities parsed?)
*/
@@ -193,6 +196,7 @@ class HTML_Lexer
}
// uses the PEAR class XML_HTMLSax3 to parse XML
// only shares the tokenizeHTML() function
class HTML_Lexer_Sax extends HTML_Lexer
{
@@ -209,12 +213,22 @@ class HTML_Lexer_Sax extends HTML_Lexer
return $this->tokens;
}
function openHandler(&$parser, $name, $attrs) {
$this->tokens[] = new MF_StartTag($name, $attrs);
function openHandler(&$parser, $name, $attrs, $closed) {
if ($closed) {
$this->tokens[] = new MF_EmptyTag($name, $attrs);
} else {
$this->tokens[] = new MF_StartTag($name, $attrs);
}
return true;
}
function closeHandler(&$parser, $name) {
// HTMLSax3 seems to always send empty tags an extra close tag
// check and ignore if you see it:
// [TESTME] to make sure it doesn't overreach
if (is_a($this->tokens[count($this->tokens)-1], 'MF_EmptyTag')) {
return true;
}
$this->tokens[] = new MF_EndTag($name);
return true;
}