mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-08-05 05:37:49 +02:00
Finish documenting PEARSax3, touch up the other docs. Nuke the original lexer.txt document.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@102 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
@@ -2,7 +2,25 @@
|
||||
|
||||
require_once 'HTMLPurifier/Lexer.php';
|
||||
|
||||
// PHP5 only!
|
||||
/**
|
||||
* Parser that uses PHP 5's DOM extension (part of the core).
|
||||
*
|
||||
* In PHP 5, the DOM XML extension was revamped into DOM and added to the core.
|
||||
* It gives us a forgiving HTML parser, which we use to transform the HTML
|
||||
* into a DOM, and then into the tokens. It is blazingly fast (for large
|
||||
* documents, it performs twenty times faster than
|
||||
* HTMLPurifier_Lexer_DirectLex,and is the default choice for PHP 5.
|
||||
*
|
||||
* @notice
|
||||
* Any empty elements will have empty tokens associated with them, even if
|
||||
* this is prohibited by the spec. This is cannot be fixed until the spec
|
||||
* comes into play.
|
||||
*
|
||||
* @todo Determine DOM's entity parsing behavior, point to local entity files
|
||||
* if necessary.
|
||||
* @todo Make div access less fragile, and refrain from preprocessing when
|
||||
* HTML tag and friends are already present.
|
||||
*/
|
||||
|
||||
class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
||||
{
|
||||
@@ -19,6 +37,16 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursive function that tokenizes a node, putting it into an accumulator.
|
||||
*
|
||||
* @param $node DOMNode to be tokenized.
|
||||
* @param $tokens Array-list of already tokenized tokens.
|
||||
* @param $collect Says whether or start and close are collected, set to
|
||||
* false at first recursion because it's the implicit DIV
|
||||
* tag you're dealing with.
|
||||
* @returns Tokens of node appended to previously passed tokens.
|
||||
*/
|
||||
protected function tokenizeDOM($node, $tokens = array(), $collect = false) {
|
||||
// recursive goodness!
|
||||
|
||||
@@ -63,6 +91,12 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a DOMNamedNodeMap of DOMAttr objects into an assoc array.
|
||||
*
|
||||
* @param $attribute_list DOMNamedNodeMap of DOMAttr objects.
|
||||
* @returns Associative array of attributes.
|
||||
*/
|
||||
protected function transformAttrToAssoc($attribute_list) {
|
||||
$attribute_array = array();
|
||||
// undocumented behavior
|
||||
|
Reference in New Issue
Block a user