mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-08-01 11:50:28 +02:00
Rename HTML_Lexer to HTMLPurifier_Lexer. However, some more refactoring still needs to be done (namely making the lexer an interface).
Also fixed broken benchmarks. git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@65 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
@@ -8,7 +8,7 @@ class HTML_Purifier
|
|||||||
var $generator;
|
var $generator;
|
||||||
|
|
||||||
function HTML_Purifier() {
|
function HTML_Purifier() {
|
||||||
$this->lexer = new HTML_Lexer();
|
$this->lexer = new HTMLPurifier_Lexer();
|
||||||
$this->definition = new PureHTMLDefinition();
|
$this->definition = new PureHTMLDefinition();
|
||||||
$this->generator = new HTML_Generator();
|
$this->generator = new HTML_Generator();
|
||||||
}
|
}
|
||||||
|
@@ -12,13 +12,13 @@ TODO:
|
|||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
class HTML_Lexer
|
class HTMLPurifier_Lexer
|
||||||
{
|
{
|
||||||
|
|
||||||
// does this version of PHP support utf8 as entity function charset?
|
// does this version of PHP support utf8 as entity function charset?
|
||||||
var $_entity_utf8;
|
var $_entity_utf8;
|
||||||
|
|
||||||
function HTML_Lexer() {
|
function HTMLPurifier_Lexer() {
|
||||||
$this->_entity_utf8 = version_compare(PHP_VERSION, '5', '>=');
|
$this->_entity_utf8 = version_compare(PHP_VERSION, '5', '>=');
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -343,7 +343,7 @@ class HTML_Lexer
|
|||||||
|
|
||||||
// uses the PEAR class XML_HTMLSax3 to parse XML
|
// uses the PEAR class XML_HTMLSax3 to parse XML
|
||||||
// only shares the tokenizeHTML() function
|
// only shares the tokenizeHTML() function
|
||||||
class HTML_Lexer_Sax extends HTML_Lexer
|
class HTMLPurifier_Lexer_Sax extends HTMLPurifier_Lexer
|
||||||
{
|
{
|
||||||
|
|
||||||
var $tokens = array();
|
var $tokens = array();
|
@@ -9,8 +9,8 @@ require_once 'Benchmark/Timer.php';
|
|||||||
require_once 'XML/HTMLSax3.php';
|
require_once 'XML/HTMLSax3.php';
|
||||||
require_once 'Text/Password.php';
|
require_once 'Text/Password.php';
|
||||||
|
|
||||||
require_once '../MarkupFragment.php';
|
require_once '../Token.php';
|
||||||
require_once '../HTML_Lexer.php';
|
require_once '../Lexer.php';
|
||||||
|
|
||||||
class TinyTimer extends Benchmark_Timer
|
class TinyTimer extends Benchmark_Timer
|
||||||
{
|
{
|
||||||
@@ -52,12 +52,12 @@ class TinyTimer extends Benchmark_Timer
|
|||||||
?>
|
?>
|
||||||
<html>
|
<html>
|
||||||
<head>
|
<head>
|
||||||
<title>Benchmark: HTML_Lexer versus HTMLSax</title>
|
<title>Benchmark: HTMLPurifier_Lexer versus HTMLSax</title>
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<h1>Benchmark: HTML_Lexer versus HTMLSax</h1>
|
<h1>Benchmark: HTMLPurifier_Lexer versus HTMLSax</h1>
|
||||||
<table border="1">
|
<table border="1">
|
||||||
<tr><th>Case</th><th>HTML_Lexer</th><th>HTML_Lexer_Sax</th></tr>
|
<tr><th>Case</th><th>HTMLPurifier_Lexer</th><th>HTMLPurifier_Lexer_Sax</th></tr>
|
||||||
<?php
|
<?php
|
||||||
|
|
||||||
|
|
||||||
@@ -65,13 +65,13 @@ function do_benchmark($name, $document) {
|
|||||||
$timer = new TinyTimer($name);
|
$timer = new TinyTimer($name);
|
||||||
$timer->start();
|
$timer->start();
|
||||||
|
|
||||||
$lexer = new HTML_Lexer();
|
$lexer = new HTMLPurifier_Lexer();
|
||||||
$tokens = $lexer->tokenizeHTML($document);
|
$tokens = $lexer->tokenizeHTML($document);
|
||||||
$timer->setMarker('HTML_Lexer');
|
$timer->setMarker('HTMLPurifier_Lexer');
|
||||||
|
|
||||||
$lexer = new HTML_Lexer_Sax();
|
$lexer = new HTMLPurifier_Lexer_Sax();
|
||||||
$sax_tokens = $lexer->tokenizeHTML($document);
|
$sax_tokens = $lexer->tokenizeHTML($document);
|
||||||
$timer->setMarker('HTML_Lexer_Sax');
|
$timer->setMarker('HTMLPurifier_Lexer_Sax');
|
||||||
|
|
||||||
$timer->stop();
|
$timer->stop();
|
||||||
$timer->display();
|
$timer->display();
|
||||||
@@ -79,7 +79,7 @@ function do_benchmark($name, $document) {
|
|||||||
|
|
||||||
// sample of html pages
|
// sample of html pages
|
||||||
|
|
||||||
$dir = 'samples/HTML_Lexer';
|
$dir = 'samples/Lexer';
|
||||||
$dh = opendir($dir);
|
$dh = opendir($dir);
|
||||||
while (false !== ($filename = readdir($dh))) {
|
while (false !== ($filename = readdir($dh))) {
|
||||||
|
|
||||||
|
@@ -5,7 +5,7 @@ load_simpletest(); // includes all relevant simpletest files
|
|||||||
require_once 'XML/HTMLSax3.php'; // optional PEAR class
|
require_once 'XML/HTMLSax3.php'; // optional PEAR class
|
||||||
|
|
||||||
require_once 'HTML_Purifier.php';
|
require_once 'HTML_Purifier.php';
|
||||||
require_once 'HTML_Lexer.php';
|
require_once 'Lexer.php';
|
||||||
require_once 'Token.php';
|
require_once 'Token.php';
|
||||||
require_once 'PureHTMLDefinition.php';
|
require_once 'PureHTMLDefinition.php';
|
||||||
require_once 'HTML_Generator.php';
|
require_once 'HTML_Generator.php';
|
||||||
@@ -14,7 +14,7 @@ $test = new GroupTest('HTML_Purifier');
|
|||||||
|
|
||||||
chdir('tests/');
|
chdir('tests/');
|
||||||
$test->addTestFile('HTML_Purifier.php');
|
$test->addTestFile('HTML_Purifier.php');
|
||||||
$test->addTestFile('HTML_Lexer.php');
|
$test->addTestFile('Lexer.php');
|
||||||
//$test->addTestFile('Token.php');
|
//$test->addTestFile('Token.php');
|
||||||
$test->addTestFile('PureHTMLDefinition.php');
|
$test->addTestFile('PureHTMLDefinition.php');
|
||||||
$test->addTestFile('HTML_Generator.php');
|
$test->addTestFile('HTML_Generator.php');
|
||||||
|
@@ -4,19 +4,19 @@
|
|||||||
* Benchmark the SAX parser with my homemade one
|
* Benchmark the SAX parser with my homemade one
|
||||||
*/
|
*/
|
||||||
|
|
||||||
class Test_HTML_Lexer extends UnitTestCase
|
class Test_HTMLPurifier_Lexer extends UnitTestCase
|
||||||
{
|
{
|
||||||
|
|
||||||
var $HTML_Lexer;
|
var $HTMLPurifier_Lexer;
|
||||||
var $HTML_Lexer_Sax;
|
var $HTMLPurifier_Lexer_Sax;
|
||||||
|
|
||||||
function setUp() {
|
function setUp() {
|
||||||
$this->HTML_Lexer =& new HTML_Lexer();
|
$this->HTMLPurifier_Lexer =& new HTMLPurifier_Lexer();
|
||||||
$this->HTML_Lexer_Sax =& new HTML_Lexer_Sax();
|
$this->HTMLPurifier_Lexer_Sax =& new HTMLPurifier_Lexer_Sax();
|
||||||
}
|
}
|
||||||
|
|
||||||
function test_nextWhiteSpace() {
|
function test_nextWhiteSpace() {
|
||||||
$HP =& $this->HTML_Lexer;
|
$HP =& $this->HTMLPurifier_Lexer;
|
||||||
$this->assertIdentical(false, $HP->nextWhiteSpace('asdf'));
|
$this->assertIdentical(false, $HP->nextWhiteSpace('asdf'));
|
||||||
$this->assertIdentical(0, $HP->nextWhiteSpace(' asdf'));
|
$this->assertIdentical(0, $HP->nextWhiteSpace(' asdf'));
|
||||||
$this->assertIdentical(0, $HP->nextWhiteSpace("\nasdf"));
|
$this->assertIdentical(0, $HP->nextWhiteSpace("\nasdf"));
|
||||||
@@ -26,7 +26,7 @@ class Test_HTML_Lexer extends UnitTestCase
|
|||||||
}
|
}
|
||||||
|
|
||||||
function test_parseData() {
|
function test_parseData() {
|
||||||
$HP =& $this->HTML_Lexer;
|
$HP =& $this->HTMLPurifier_Lexer;
|
||||||
$this->assertIdentical('asdf', $HP->parseData('asdf'));
|
$this->assertIdentical('asdf', $HP->parseData('asdf'));
|
||||||
$this->assertIdentical('&', $HP->parseData('&'));
|
$this->assertIdentical('&', $HP->parseData('&'));
|
||||||
$this->assertIdentical('"', $HP->parseData('"'));
|
$this->assertIdentical('"', $HP->parseData('"'));
|
||||||
@@ -144,12 +144,12 @@ class Test_HTML_Lexer extends UnitTestCase
|
|||||||
// SAX chokes on this? We do have entity parsing on, so it should work!
|
// SAX chokes on this? We do have entity parsing on, so it should work!
|
||||||
|
|
||||||
foreach($input as $i => $discard) {
|
foreach($input as $i => $discard) {
|
||||||
$result = $this->HTML_Lexer->tokenizeHTML($input[$i]);
|
$result = $this->HTMLPurifier_Lexer->tokenizeHTML($input[$i]);
|
||||||
$this->assertEqual($expect[$i], $result);
|
$this->assertEqual($expect[$i], $result);
|
||||||
paintIf($result, $expect[$i] != $result);
|
paintIf($result, $expect[$i] != $result);
|
||||||
|
|
||||||
// assert unless I say otherwise
|
// assert unless I say otherwise
|
||||||
$sax_result = $this->HTML_Lexer_Sax->tokenizeHTML($input[$i]);
|
$sax_result = $this->HTMLPurifier_Lexer_Sax->tokenizeHTML($input[$i]);
|
||||||
if (!isset($sax_expect[$i])) {
|
if (!isset($sax_expect[$i])) {
|
||||||
// by default, assert with normal result
|
// by default, assert with normal result
|
||||||
$this->assertEqual($expect[$i], $sax_result);
|
$this->assertEqual($expect[$i], $sax_result);
|
||||||
@@ -191,7 +191,7 @@ class Test_HTML_Lexer extends UnitTestCase
|
|||||||
|
|
||||||
$size = count($input);
|
$size = count($input);
|
||||||
for($i = 0; $i < $size; $i++) {
|
for($i = 0; $i < $size; $i++) {
|
||||||
$result = $this->HTML_Lexer->tokenizeAttributeString($input[$i]);
|
$result = $this->HTMLPurifier_Lexer->tokenizeAttributeString($input[$i]);
|
||||||
$this->assertEqual($expect[$i], $result);
|
$this->assertEqual($expect[$i], $result);
|
||||||
paintIf($result, $expect[$i] != $result);
|
paintIf($result, $expect[$i] != $result);
|
||||||
}
|
}
|
@@ -7,7 +7,7 @@ class Test_HTMLDTD_ChildDef extends UnitTestCase
|
|||||||
var $gen;
|
var $gen;
|
||||||
|
|
||||||
function Test_HTMLDTD_ChildDef() {
|
function Test_HTMLDTD_ChildDef() {
|
||||||
$this->lex = new HTML_Lexer();
|
$this->lex = new HTMLPurifier_Lexer();
|
||||||
$this->gen = new HTML_Generator();
|
$this->gen = new HTML_Generator();
|
||||||
parent::UnitTestCase();
|
parent::UnitTestCase();
|
||||||
}
|
}
|
||||||
@@ -134,7 +134,7 @@ class Test_PureHTMLDefinition extends UnitTestCase
|
|||||||
$this->UnitTestCase();
|
$this->UnitTestCase();
|
||||||
$this->def = new PureHTMLDefinition();
|
$this->def = new PureHTMLDefinition();
|
||||||
$this->def->loadData();
|
$this->def->loadData();
|
||||||
$this->lex = new HTML_Lexer();
|
$this->lex = new HTMLPurifier_Lexer();
|
||||||
}
|
}
|
||||||
|
|
||||||
function test_removeForeignElements() {
|
function test_removeForeignElements() {
|
||||||
|
Reference in New Issue
Block a user