1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-08-01 11:50:28 +02:00

Rename HTML_Lexer to HTMLPurifier_Lexer. However, some more refactoring still needs to be done (namely making the lexer an interface).

Also fixed broken benchmarks.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@65 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang
2006-07-21 23:07:47 +00:00
parent cf4776cfbd
commit 6a6afaccc5
9 changed files with 28 additions and 28 deletions

View File

@@ -8,7 +8,7 @@ class HTML_Purifier
var $generator; var $generator;
function HTML_Purifier() { function HTML_Purifier() {
$this->lexer = new HTML_Lexer(); $this->lexer = new HTMLPurifier_Lexer();
$this->definition = new PureHTMLDefinition(); $this->definition = new PureHTMLDefinition();
$this->generator = new HTML_Generator(); $this->generator = new HTML_Generator();
} }

View File

@@ -12,13 +12,13 @@ TODO:
*/ */
class HTML_Lexer class HTMLPurifier_Lexer
{ {
// does this version of PHP support utf8 as entity function charset? // does this version of PHP support utf8 as entity function charset?
var $_entity_utf8; var $_entity_utf8;
function HTML_Lexer() { function HTMLPurifier_Lexer() {
$this->_entity_utf8 = version_compare(PHP_VERSION, '5', '>='); $this->_entity_utf8 = version_compare(PHP_VERSION, '5', '>=');
} }
@@ -343,7 +343,7 @@ class HTML_Lexer
// uses the PEAR class XML_HTMLSax3 to parse XML // uses the PEAR class XML_HTMLSax3 to parse XML
// only shares the tokenizeHTML() function // only shares the tokenizeHTML() function
class HTML_Lexer_Sax extends HTML_Lexer class HTMLPurifier_Lexer_Sax extends HTMLPurifier_Lexer
{ {
var $tokens = array(); var $tokens = array();

View File

@@ -9,8 +9,8 @@ require_once 'Benchmark/Timer.php';
require_once 'XML/HTMLSax3.php'; require_once 'XML/HTMLSax3.php';
require_once 'Text/Password.php'; require_once 'Text/Password.php';
require_once '../MarkupFragment.php'; require_once '../Token.php';
require_once '../HTML_Lexer.php'; require_once '../Lexer.php';
class TinyTimer extends Benchmark_Timer class TinyTimer extends Benchmark_Timer
{ {
@@ -52,12 +52,12 @@ class TinyTimer extends Benchmark_Timer
?> ?>
<html> <html>
<head> <head>
<title>Benchmark: HTML_Lexer versus HTMLSax</title> <title>Benchmark: HTMLPurifier_Lexer versus HTMLSax</title>
</head> </head>
<body> <body>
<h1>Benchmark: HTML_Lexer versus HTMLSax</h1> <h1>Benchmark: HTMLPurifier_Lexer versus HTMLSax</h1>
<table border="1"> <table border="1">
<tr><th>Case</th><th>HTML_Lexer</th><th>HTML_Lexer_Sax</th></tr> <tr><th>Case</th><th>HTMLPurifier_Lexer</th><th>HTMLPurifier_Lexer_Sax</th></tr>
<?php <?php
@@ -65,13 +65,13 @@ function do_benchmark($name, $document) {
$timer = new TinyTimer($name); $timer = new TinyTimer($name);
$timer->start(); $timer->start();
$lexer = new HTML_Lexer(); $lexer = new HTMLPurifier_Lexer();
$tokens = $lexer->tokenizeHTML($document); $tokens = $lexer->tokenizeHTML($document);
$timer->setMarker('HTML_Lexer'); $timer->setMarker('HTMLPurifier_Lexer');
$lexer = new HTML_Lexer_Sax(); $lexer = new HTMLPurifier_Lexer_Sax();
$sax_tokens = $lexer->tokenizeHTML($document); $sax_tokens = $lexer->tokenizeHTML($document);
$timer->setMarker('HTML_Lexer_Sax'); $timer->setMarker('HTMLPurifier_Lexer_Sax');
$timer->stop(); $timer->stop();
$timer->display(); $timer->display();
@@ -79,7 +79,7 @@ function do_benchmark($name, $document) {
// sample of html pages // sample of html pages
$dir = 'samples/HTML_Lexer'; $dir = 'samples/Lexer';
$dh = opendir($dir); $dh = opendir($dir);
while (false !== ($filename = readdir($dh))) { while (false !== ($filename = readdir($dh))) {

View File

@@ -5,7 +5,7 @@ load_simpletest(); // includes all relevant simpletest files
require_once 'XML/HTMLSax3.php'; // optional PEAR class require_once 'XML/HTMLSax3.php'; // optional PEAR class
require_once 'HTML_Purifier.php'; require_once 'HTML_Purifier.php';
require_once 'HTML_Lexer.php'; require_once 'Lexer.php';
require_once 'Token.php'; require_once 'Token.php';
require_once 'PureHTMLDefinition.php'; require_once 'PureHTMLDefinition.php';
require_once 'HTML_Generator.php'; require_once 'HTML_Generator.php';
@@ -14,7 +14,7 @@ $test = new GroupTest('HTML_Purifier');
chdir('tests/'); chdir('tests/');
$test->addTestFile('HTML_Purifier.php'); $test->addTestFile('HTML_Purifier.php');
$test->addTestFile('HTML_Lexer.php'); $test->addTestFile('Lexer.php');
//$test->addTestFile('Token.php'); //$test->addTestFile('Token.php');
$test->addTestFile('PureHTMLDefinition.php'); $test->addTestFile('PureHTMLDefinition.php');
$test->addTestFile('HTML_Generator.php'); $test->addTestFile('HTML_Generator.php');

View File

@@ -4,19 +4,19 @@
* Benchmark the SAX parser with my homemade one * Benchmark the SAX parser with my homemade one
*/ */
class Test_HTML_Lexer extends UnitTestCase class Test_HTMLPurifier_Lexer extends UnitTestCase
{ {
var $HTML_Lexer; var $HTMLPurifier_Lexer;
var $HTML_Lexer_Sax; var $HTMLPurifier_Lexer_Sax;
function setUp() { function setUp() {
$this->HTML_Lexer =& new HTML_Lexer(); $this->HTMLPurifier_Lexer =& new HTMLPurifier_Lexer();
$this->HTML_Lexer_Sax =& new HTML_Lexer_Sax(); $this->HTMLPurifier_Lexer_Sax =& new HTMLPurifier_Lexer_Sax();
} }
function test_nextWhiteSpace() { function test_nextWhiteSpace() {
$HP =& $this->HTML_Lexer; $HP =& $this->HTMLPurifier_Lexer;
$this->assertIdentical(false, $HP->nextWhiteSpace('asdf')); $this->assertIdentical(false, $HP->nextWhiteSpace('asdf'));
$this->assertIdentical(0, $HP->nextWhiteSpace(' asdf')); $this->assertIdentical(0, $HP->nextWhiteSpace(' asdf'));
$this->assertIdentical(0, $HP->nextWhiteSpace("\nasdf")); $this->assertIdentical(0, $HP->nextWhiteSpace("\nasdf"));
@@ -26,7 +26,7 @@ class Test_HTML_Lexer extends UnitTestCase
} }
function test_parseData() { function test_parseData() {
$HP =& $this->HTML_Lexer; $HP =& $this->HTMLPurifier_Lexer;
$this->assertIdentical('asdf', $HP->parseData('asdf')); $this->assertIdentical('asdf', $HP->parseData('asdf'));
$this->assertIdentical('&', $HP->parseData('&amp;')); $this->assertIdentical('&', $HP->parseData('&amp;'));
$this->assertIdentical('"', $HP->parseData('&quot;')); $this->assertIdentical('"', $HP->parseData('&quot;'));
@@ -144,12 +144,12 @@ class Test_HTML_Lexer extends UnitTestCase
// SAX chokes on this? We do have entity parsing on, so it should work! // SAX chokes on this? We do have entity parsing on, so it should work!
foreach($input as $i => $discard) { foreach($input as $i => $discard) {
$result = $this->HTML_Lexer->tokenizeHTML($input[$i]); $result = $this->HTMLPurifier_Lexer->tokenizeHTML($input[$i]);
$this->assertEqual($expect[$i], $result); $this->assertEqual($expect[$i], $result);
paintIf($result, $expect[$i] != $result); paintIf($result, $expect[$i] != $result);
// assert unless I say otherwise // assert unless I say otherwise
$sax_result = $this->HTML_Lexer_Sax->tokenizeHTML($input[$i]); $sax_result = $this->HTMLPurifier_Lexer_Sax->tokenizeHTML($input[$i]);
if (!isset($sax_expect[$i])) { if (!isset($sax_expect[$i])) {
// by default, assert with normal result // by default, assert with normal result
$this->assertEqual($expect[$i], $sax_result); $this->assertEqual($expect[$i], $sax_result);
@@ -191,7 +191,7 @@ class Test_HTML_Lexer extends UnitTestCase
$size = count($input); $size = count($input);
for($i = 0; $i < $size; $i++) { for($i = 0; $i < $size; $i++) {
$result = $this->HTML_Lexer->tokenizeAttributeString($input[$i]); $result = $this->HTMLPurifier_Lexer->tokenizeAttributeString($input[$i]);
$this->assertEqual($expect[$i], $result); $this->assertEqual($expect[$i], $result);
paintIf($result, $expect[$i] != $result); paintIf($result, $expect[$i] != $result);
} }

View File

@@ -7,7 +7,7 @@ class Test_HTMLDTD_ChildDef extends UnitTestCase
var $gen; var $gen;
function Test_HTMLDTD_ChildDef() { function Test_HTMLDTD_ChildDef() {
$this->lex = new HTML_Lexer(); $this->lex = new HTMLPurifier_Lexer();
$this->gen = new HTML_Generator(); $this->gen = new HTML_Generator();
parent::UnitTestCase(); parent::UnitTestCase();
} }
@@ -134,7 +134,7 @@ class Test_PureHTMLDefinition extends UnitTestCase
$this->UnitTestCase(); $this->UnitTestCase();
$this->def = new PureHTMLDefinition(); $this->def = new PureHTMLDefinition();
$this->def->loadData(); $this->def->loadData();
$this->lex = new HTML_Lexer(); $this->lex = new HTMLPurifier_Lexer();
} }
function test_removeForeignElements() { function test_removeForeignElements() {