From b267b0c202c8584c021ae59792f3c092c0291e0a Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Fri, 4 Aug 2006 02:59:15 +0000 Subject: [PATCH] Add an attribute entity parse test to Lexer and change PEARSax3 to a proof of concept. git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@158 48356398-32a2-884e-a903-53898d9a118a --- library/HTMLPurifier/Lexer/PEARSax3.php | 5 +++-- tests/HTMLPurifier/LexerTest.php | 9 +++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/library/HTMLPurifier/Lexer/PEARSax3.php b/library/HTMLPurifier/Lexer/PEARSax3.php index 13fac96a..9eee7def 100644 --- a/library/HTMLPurifier/Lexer/PEARSax3.php +++ b/library/HTMLPurifier/Lexer/PEARSax3.php @@ -4,13 +4,14 @@ require_once 'XML/HTMLSax3.php'; // PEAR require_once 'HTMLPurifier/Lexer.php'; /** - * Lexer that uses the PEAR package XML_HTMLSax3 to parse + * Proof-of-concept lexer that uses the PEAR package XML_HTMLSax3 to parse HTML. * * PEAR, not suprisingly, also has a SAX parser for HTML. I don't know * very much about implementation, but it's fairly well written. However, that * abstraction comes at a price: performance. You need to have it installed, * and if the API changes, it might break our adapter. Not sure whether or not - * it's UTF-8 aware, but it has some entity parsing trouble. + * it's UTF-8 aware, but it has some entity parsing trouble (in all areas, + * text and attributes). * * Quite personally, I don't recommend using the PEAR class, and the defaults * don't use it. The unit tests do perform the tests on the SAX parser too, but diff --git a/tests/HTMLPurifier/LexerTest.php b/tests/HTMLPurifier/LexerTest.php index 543b111f..d91e535c 100644 --- a/tests/HTMLPurifier/LexerTest.php +++ b/tests/HTMLPurifier/LexerTest.php @@ -202,6 +202,15 @@ class HTMLPurifier_LexerTest extends UnitTestCase new HTMLPurifier_Token_Text('rarr;') ); + // test entity resolution in attributes + $input[16] = 'Link'; + $expect[16] = array( + new HTMLPurifier_Token_Start('a',array('href' => 'index.php?title=foo&id=bar')) + ,new HTMLPurifier_Token_Text('Link') + ,new HTMLPurifier_Token_End('a') + ); + $sax_expect[16] = false; // PEARSax doesn't support it! + foreach($input as $i => $discard) { $result = $this->DirectLex->tokenizeHTML($input[$i]); $this->assertEqual($expect[$i], $result, 'DirectLexTest '.$i.': %s');