diff --git a/HTML_Purifier.php b/HTML_Purifier.php index 895d5cba..cdf7ef57 100644 --- a/HTML_Purifier.php +++ b/HTML_Purifier.php @@ -8,7 +8,7 @@ class HTML_Purifier var $generator; function HTML_Purifier() { - $this->lexer = new HTML_Lexer(); + $this->lexer = new HTMLPurifier_Lexer(); $this->definition = new PureHTMLDefinition(); $this->generator = new HTML_Generator(); } diff --git a/HTML_Lexer.php b/Lexer.php similarity index 96% rename from HTML_Lexer.php rename to Lexer.php index 1af3d5d0..b83b2458 100644 --- a/HTML_Lexer.php +++ b/Lexer.php @@ -12,13 +12,13 @@ TODO: */ -class HTML_Lexer +class HTMLPurifier_Lexer { // does this version of PHP support utf8 as entity function charset? var $_entity_utf8; - function HTML_Lexer() { + function HTMLPurifier_Lexer() { $this->_entity_utf8 = version_compare(PHP_VERSION, '5', '>='); } @@ -343,7 +343,7 @@ class HTML_Lexer // uses the PEAR class XML_HTMLSax3 to parse XML // only shares the tokenizeHTML() function -class HTML_Lexer_Sax extends HTML_Lexer +class HTMLPurifier_Lexer_Sax extends HTMLPurifier_Lexer { var $tokens = array(); diff --git a/benchmarks/HTML_Lexer.php b/benchmarks/HTML_Lexer.php index 0ea39dc9..52ce2ee9 100644 --- a/benchmarks/HTML_Lexer.php +++ b/benchmarks/HTML_Lexer.php @@ -9,8 +9,8 @@ require_once 'Benchmark/Timer.php'; require_once 'XML/HTMLSax3.php'; require_once 'Text/Password.php'; -require_once '../MarkupFragment.php'; -require_once '../HTML_Lexer.php'; +require_once '../Token.php'; +require_once '../Lexer.php'; class TinyTimer extends Benchmark_Timer { @@ -52,12 +52,12 @@ class TinyTimer extends Benchmark_Timer ?> -Benchmark: HTML_Lexer versus HTMLSax +Benchmark: HTMLPurifier_Lexer versus HTMLSax -

Benchmark: HTML_Lexer versus HTMLSax

+

Benchmark: HTMLPurifier_Lexer versus HTMLSax

- +start(); - $lexer = new HTML_Lexer(); + $lexer = new HTMLPurifier_Lexer(); $tokens = $lexer->tokenizeHTML($document); - $timer->setMarker('HTML_Lexer'); + $timer->setMarker('HTMLPurifier_Lexer'); - $lexer = new HTML_Lexer_Sax(); + $lexer = new HTMLPurifier_Lexer_Sax(); $sax_tokens = $lexer->tokenizeHTML($document); - $timer->setMarker('HTML_Lexer_Sax'); + $timer->setMarker('HTMLPurifier_Lexer_Sax'); $timer->stop(); $timer->display(); @@ -79,7 +79,7 @@ function do_benchmark($name, $document) { // sample of html pages -$dir = 'samples/HTML_Lexer'; +$dir = 'samples/Lexer'; $dh = opendir($dir); while (false !== ($filename = readdir($dh))) { diff --git a/benchmarks/samples/HTML_Lexer/1.html b/benchmarks/samples/Lexer/1.html similarity index 100% rename from benchmarks/samples/HTML_Lexer/1.html rename to benchmarks/samples/Lexer/1.html diff --git a/benchmarks/samples/HTML_Lexer/2.html b/benchmarks/samples/Lexer/2.html similarity index 100% rename from benchmarks/samples/HTML_Lexer/2.html rename to benchmarks/samples/Lexer/2.html diff --git a/benchmarks/samples/HTML_Lexer/3.html b/benchmarks/samples/Lexer/3.html similarity index 100% rename from benchmarks/samples/HTML_Lexer/3.html rename to benchmarks/samples/Lexer/3.html diff --git a/tester.php b/tester.php index dfaa5916..5f172223 100644 --- a/tester.php +++ b/tester.php @@ -5,7 +5,7 @@ load_simpletest(); // includes all relevant simpletest files require_once 'XML/HTMLSax3.php'; // optional PEAR class require_once 'HTML_Purifier.php'; -require_once 'HTML_Lexer.php'; +require_once 'Lexer.php'; require_once 'Token.php'; require_once 'PureHTMLDefinition.php'; require_once 'HTML_Generator.php'; @@ -14,7 +14,7 @@ $test = new GroupTest('HTML_Purifier'); chdir('tests/'); $test->addTestFile('HTML_Purifier.php'); -$test->addTestFile('HTML_Lexer.php'); +$test->addTestFile('Lexer.php'); //$test->addTestFile('Token.php'); $test->addTestFile('PureHTMLDefinition.php'); $test->addTestFile('HTML_Generator.php'); diff --git a/tests/HTML_Lexer.php b/tests/Lexer.php similarity index 89% rename from tests/HTML_Lexer.php rename to tests/Lexer.php index afa75fa1..ef1e95f3 100644 --- a/tests/HTML_Lexer.php +++ b/tests/Lexer.php @@ -4,19 +4,19 @@ * Benchmark the SAX parser with my homemade one */ -class Test_HTML_Lexer extends UnitTestCase +class Test_HTMLPurifier_Lexer extends UnitTestCase { - var $HTML_Lexer; - var $HTML_Lexer_Sax; + var $HTMLPurifier_Lexer; + var $HTMLPurifier_Lexer_Sax; function setUp() { - $this->HTML_Lexer =& new HTML_Lexer(); - $this->HTML_Lexer_Sax =& new HTML_Lexer_Sax(); + $this->HTMLPurifier_Lexer =& new HTMLPurifier_Lexer(); + $this->HTMLPurifier_Lexer_Sax =& new HTMLPurifier_Lexer_Sax(); } function test_nextWhiteSpace() { - $HP =& $this->HTML_Lexer; + $HP =& $this->HTMLPurifier_Lexer; $this->assertIdentical(false, $HP->nextWhiteSpace('asdf')); $this->assertIdentical(0, $HP->nextWhiteSpace(' asdf')); $this->assertIdentical(0, $HP->nextWhiteSpace("\nasdf")); @@ -26,7 +26,7 @@ class Test_HTML_Lexer extends UnitTestCase } function test_parseData() { - $HP =& $this->HTML_Lexer; + $HP =& $this->HTMLPurifier_Lexer; $this->assertIdentical('asdf', $HP->parseData('asdf')); $this->assertIdentical('&', $HP->parseData('&')); $this->assertIdentical('"', $HP->parseData('"')); @@ -144,12 +144,12 @@ class Test_HTML_Lexer extends UnitTestCase // SAX chokes on this? We do have entity parsing on, so it should work! foreach($input as $i => $discard) { - $result = $this->HTML_Lexer->tokenizeHTML($input[$i]); + $result = $this->HTMLPurifier_Lexer->tokenizeHTML($input[$i]); $this->assertEqual($expect[$i], $result); paintIf($result, $expect[$i] != $result); // assert unless I say otherwise - $sax_result = $this->HTML_Lexer_Sax->tokenizeHTML($input[$i]); + $sax_result = $this->HTMLPurifier_Lexer_Sax->tokenizeHTML($input[$i]); if (!isset($sax_expect[$i])) { // by default, assert with normal result $this->assertEqual($expect[$i], $sax_result); @@ -191,7 +191,7 @@ class Test_HTML_Lexer extends UnitTestCase $size = count($input); for($i = 0; $i < $size; $i++) { - $result = $this->HTML_Lexer->tokenizeAttributeString($input[$i]); + $result = $this->HTMLPurifier_Lexer->tokenizeAttributeString($input[$i]); $this->assertEqual($expect[$i], $result); paintIf($result, $expect[$i] != $result); } diff --git a/tests/PureHTMLDefinition.php b/tests/PureHTMLDefinition.php index d9b58a10..843100c8 100644 --- a/tests/PureHTMLDefinition.php +++ b/tests/PureHTMLDefinition.php @@ -7,7 +7,7 @@ class Test_HTMLDTD_ChildDef extends UnitTestCase var $gen; function Test_HTMLDTD_ChildDef() { - $this->lex = new HTML_Lexer(); + $this->lex = new HTMLPurifier_Lexer(); $this->gen = new HTML_Generator(); parent::UnitTestCase(); } @@ -134,7 +134,7 @@ class Test_PureHTMLDefinition extends UnitTestCase $this->UnitTestCase(); $this->def = new PureHTMLDefinition(); $this->def->loadData(); - $this->lex = new HTML_Lexer(); + $this->lex = new HTMLPurifier_Lexer(); } function test_removeForeignElements() {
CaseHTML_LexerHTML_Lexer_Sax
CaseHTMLPurifier_LexerHTMLPurifier_Lexer_Sax