diff --git a/HTML_Lexer.php b/HTML_Lexer.php
index d69629d4..ebcd0a33 100644
--- a/HTML_Lexer.php
+++ b/HTML_Lexer.php
@@ -9,7 +9,7 @@ TODO:
 
 */
 
-class MarkupLexer
+class HTML_Lexer
 {
     
     function nextQuote($string, $offset = 0) {
@@ -98,7 +98,10 @@ class MarkupLexer
                     continue;
                 }
                 
-                // Check if it is self closing, if so, remove trailing slash
+                // Check if it is explicitly self closing, if so, remove
+                // trailing slash. Remember, we could have a tag like <br>, so
+                // any later token processing scripts must convert improperly
+                // classified EmptyTags from StartTags.
                 $is_self_closing = (strpos($segment,'/') === strlen($segment) - 1);
                 if ($is_self_closing) {
                     $segment = substr($segment, 0, strlen($segment) - 1);
@@ -189,4 +192,45 @@ class MarkupLexer
     
 }
 
+// uses the PEAR class XML_HTMLSax3 to parse XML
+class HTML_Lexer_Sax extends HTML_Lexer
+{
+    
+    var $tokens = array();
+    
+    function tokenizeHTML($html) {
+        $this->tokens = array();
+        $parser=& new XML_HTMLSax3();
+        $parser->set_object($this);
+        $parser->set_element_handler('openHandler','closeHandler');
+        $parser->set_data_handler('dataHandler');
+        $parser->set_escape_handler('escapeHandler');
+        $parser->parse($html);
+        return $this->tokens;
+    }
+    
+    function openHandler(&$parser, $name, $attrs) {
+        $this->tokens[] = new MF_StartTag($name, $attrs);
+        return true;
+    }
+    
+    function closeHandler(&$parser, $name) {
+        $this->tokens[] = new MF_EndTag($name);
+        return true;
+    }
+    
+    function dataHandler(&$parser, $data) {
+        $this->tokens[] = new MF_Text($data);
+        return true;
+    }
+    
+    function escapeHandler(&$parser, $data) {
+        if (strpos($data, '-') === 0) {
+            $this->tokens[] = new MF_Comment($data);
+        }
+        return true;
+    }
+    
+}
+
 ?>
\ No newline at end of file
diff --git a/tests/HTML_Lexer.php b/tests/HTML_Lexer.php
index a4d9c35e..8a3563f3 100644
--- a/tests/HTML_Lexer.php
+++ b/tests/HTML_Lexer.php
@@ -1,16 +1,18 @@
 <?php
 
-class TestCase_MarkupLexer extends UnitTestCase
+class TestCase_HTML_Lexer extends UnitTestCase
 {
     
-    var $MarkupLexer;
+    var $HTML_Lexer;
+    var $HTML_Lexer_Sax;
     
     function setUp() {
-        $this->MarkupLexer =& new MarkupLexer();
+        $this->HTML_Lexer     =& new HTML_Lexer();
+        $this->HTML_Lexer_Sax =& new HTML_Lexer_Sax();
     }
     
     function test_nextWhiteSpace() {
-        $HP =& $this->MarkupLexer;
+        $HP =& $this->HTML_Lexer;
         $this->assertIdentical(false, $HP->nextWhiteSpace('asdf'));
         $this->assertIdentical(0, $HP->nextWhiteSpace(' asdf'));
         $this->assertIdentical(0, $HP->nextWhiteSpace("\nasdf"));
@@ -90,9 +92,13 @@ class TestCase_MarkupLexer extends UnitTestCase
         
         $size = count($input);
         for($i = 0; $i < $size; $i++) {
-            $result = $this->MarkupLexer->tokenizeHTML($input[$i]);
+            $result = $this->HTML_Lexer->tokenizeHTML($input[$i]);
             $this->assertEqual($expect[$i], $result);
             paintIf($result, $expect[$i] != $result);
+            
+            // since I didn't write the parser, I can't define its behavior
+            // however, make sure that the class runs without any errors
+            $exp_result = $this->HTML_Lexer_Sax->tokenizeHTML($input[$i]);
         }
         
     }
@@ -116,7 +122,7 @@ class TestCase_MarkupLexer extends UnitTestCase
         
         $size = count($input);
         for($i = 0; $i < $size; $i++) {
-            $result = $this->MarkupLexer->tokenizeAttributeString($input[$i]);
+            $result = $this->HTML_Lexer->tokenizeAttributeString($input[$i]);
             $this->assertEqual($expect[$i], $result);
             paintIf($result, $expect[$i] != $result);
         }