diff --git a/HTML_Lexer.php b/HTML_Lexer.php
index ebcd0a33..ebcc655b 100644
--- a/HTML_Lexer.php
+++ b/HTML_Lexer.php
@@ -1,11 +1,14 @@
tokens;
}
- function openHandler(&$parser, $name, $attrs) {
- $this->tokens[] = new MF_StartTag($name, $attrs);
+ function openHandler(&$parser, $name, $attrs, $closed) {
+ if ($closed) {
+ $this->tokens[] = new MF_EmptyTag($name, $attrs);
+ } else {
+ $this->tokens[] = new MF_StartTag($name, $attrs);
+ }
return true;
}
function closeHandler(&$parser, $name) {
+ // HTMLSax3 seems to always send empty tags an extra close tag
+ // check and ignore if you see it:
+ // [TESTME] to make sure it doesn't overreach
+ if (is_a($this->tokens[count($this->tokens)-1], 'MF_EmptyTag')) {
+ return true;
+ }
$this->tokens[] = new MF_EndTag($name);
return true;
}
diff --git a/MarkupFragment.php b/MarkupFragment.php
index 5dd403a3..9b32e80f 100644
--- a/MarkupFragment.php
+++ b/MarkupFragment.php
@@ -9,7 +9,8 @@ class MF_Tag extends MF
{
var $name;
function MF_Tag($name) {
- $this->name = strtolower($name);
+ $this->name = strtolower($name); // for some reason, the SAX parser
+ // uses uppercase. Investigate?
}
}
@@ -31,6 +32,7 @@ class MF_Text extends MF
var $data;
function MF_Text($data) {
$this->data = trim($data); // fairly certain trimming it's okay
+ // but it's not default SAX behavior
}
function append($mf_text) {
return new MF_Text($this->data . $mf_text->data);
diff --git a/docs/spec.txt b/docs/spec.txt
index 4ae43cfe..9c85668a 100644
--- a/docs/spec.txt
+++ b/docs/spec.txt
@@ -20,7 +20,7 @@ of little classes:
* Comment(text) is escapeHandler (has leading -)
* CharacterData(text) is escapeHandler (has leading [)
-Ignorable (although we probably want to output them raw):
+Ignorable/not being implemented (although we probably want to output them raw):
* ProcessingInstructions(text) is piHandler
* JavaOrASPInstructions(text) is jaspHandler
diff --git a/tests/HTML_Lexer.php b/tests/HTML_Lexer.php
index 8a3563f3..a4bed417 100644
--- a/tests/HTML_Lexer.php
+++ b/tests/HTML_Lexer.php
@@ -1,5 +1,9 @@
bold text';
- $expect[] = array(
+ $input[2] = 'This is bold text';
+ $expect[2] = array(
new MF_Text('This is ')
,new MF_StartTag('b', array())
,new MF_Text('bold')
@@ -40,8 +48,8 @@ class TestCase_HTML_Lexer extends UnitTestCase
,new MF_Text(' text')
);
- $input[] = '
Totally rad dude. asdf
';
- $expect[] = array(
+ $input[3] = 'Totally rad dude. asdf
';
+ $expect[3] = array(
new MF_StartTag('DIV', array())
,new MF_Text('Totally rad dude. ')
,new MF_StartTag('b', array())
@@ -50,8 +58,8 @@ class TestCase_HTML_Lexer extends UnitTestCase
,new MF_EndTag('div')
);
- $input[] = '';
- $expect[] = array(
+ $input[4] = '';
+ $expect[4] = array(
new MF_StartTag('asdf')
,new MF_EndTag('asdf')
,new MF_StartTag('d')
@@ -63,8 +71,8 @@ class TestCase_HTML_Lexer extends UnitTestCase
,new MF_EndTag('ASDF')
);
- $input[] = 'Link to foobar';
- $expect[] = array(
+ $input[5] = 'Link to foobar';
+ $expect[5] = array(
new MF_StartTag('a',array('href'=>'foobar.php','title'=>'foo!'))
,new MF_Text('Link to ')
,new MF_StartTag('b',array('id'=>'asdf'))
@@ -73,32 +81,56 @@ class TestCase_HTML_Lexer extends UnitTestCase
,new MF_EndTag('a')
);
- $input[] = '
';
- $expect[] = array(
+ $input[6] = '
';
+ $expect[6] = array(
new MF_EmptyTag('br')
);
- $input[] = ' ';
- $expect[] = array(
+ // [INVALID] [RECOVERABLE]
+ $input[7] = ' ';
+ $expect[7] = array(
new MF_Comment(' Comment ')
,new MF_Text(' ')
,new MF_Comment(' not so well formed -')
);
+ $sax_expect[7] = false; // we need to figure out proper comment output
- $input[] = '''))
+ );
- $size = count($input);
- for($i = 0; $i < $size; $i++) {
+ $input[9] = '<b>';
+ $expect[9] = array(
+ new MF_Text('<b>')
+ );
+ // however, we may want to change both styles
+ // into parsed: ''. SAX has an option for this
+
+ foreach($input as $i => $discard) {
$result = $this->HTML_Lexer->tokenizeHTML($input[$i]);
$this->assertEqual($expect[$i], $result);
paintIf($result, $expect[$i] != $result);
- // since I didn't write the parser, I can't define its behavior
- // however, make sure that the class runs without any errors
- $exp_result = $this->HTML_Lexer_Sax->tokenizeHTML($input[$i]);
+ // assert unless I say otherwise
+ $sax_result = $this->HTML_Lexer_Sax->tokenizeHTML($input[$i]);
+ if (!isset($sax_expect[$i])) {
+ // by default, assert with normal result
+ $this->assertEqual($expect[$i], $sax_result);
+ paintIf($sax_result, $expect[$i] != $sax_result);
+ } elseif ($sax_expect[$i] === false) {
+ // assertions were turned off, optionally dump
+ // paintIf($sax_expect, $i == NUMBER);
+ } else {
+ // match with a custom SAX result array
+ $this->assertEqual($sax_expect[$i], $sax_result);
+ paintIf($sax_result, $sax_expect[$i] != $sax_result);
+ }
}
}