mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-08-02 20:27:40 +02:00
Quality control, improve a little documentation and fix UTF-8 unfriendliness in the Generator.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@138 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
@@ -13,6 +13,7 @@ class HTMLPurifier_Generator
|
|||||||
}
|
}
|
||||||
|
|
||||||
function generateFromToken($token) {
|
function generateFromToken($token) {
|
||||||
|
if (!isset($token->type)) return '';
|
||||||
if ($token->type == 'start') {
|
if ($token->type == 'start') {
|
||||||
$attr = $this->generateAttributes($token->attributes);
|
$attr = $this->generateAttributes($token->attributes);
|
||||||
return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
|
return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
|
||||||
@@ -25,7 +26,7 @@ class HTMLPurifier_Generator
|
|||||||
return '<' . $token->name . ($attr ? ' ' : '') . $attr . ' />';
|
return '<' . $token->name . ($attr ? ' ' : '') . $attr . ' />';
|
||||||
|
|
||||||
} elseif ($token->type == 'text') {
|
} elseif ($token->type == 'text') {
|
||||||
return htmlentities($token->data, ENT_COMPAT, 'UTF-8');
|
return htmlspecialchars($token->data, ENT_COMPAT, 'UTF-8');
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
return '';
|
return '';
|
||||||
@@ -36,7 +37,7 @@ class HTMLPurifier_Generator
|
|||||||
function generateAttributes($assoc_array_of_attributes) {
|
function generateAttributes($assoc_array_of_attributes) {
|
||||||
$html = '';
|
$html = '';
|
||||||
foreach ($assoc_array_of_attributes as $key => $value) {
|
foreach ($assoc_array_of_attributes as $key => $value) {
|
||||||
$html .= $key.'="'.htmlentities($value, ENT_COMPAT, 'UTF-8').'" ';
|
$html .= $key.'="'.htmlspecialchars($value, ENT_COMPAT, 'UTF-8').'" ';
|
||||||
}
|
}
|
||||||
return rtrim($html);
|
return rtrim($html);
|
||||||
}
|
}
|
||||||
|
@@ -5,18 +5,20 @@ require_once 'HTMLPurifier/Token.php';
|
|||||||
/**
|
/**
|
||||||
* Forgivingly lexes HTML (SGML-style) markup into tokens.
|
* Forgivingly lexes HTML (SGML-style) markup into tokens.
|
||||||
*
|
*
|
||||||
* The lexer parses a string of SGML-style markup and converts them into
|
* A lexer parses a string of SGML-style markup and converts them into
|
||||||
* corresponding tokens. It doesn't check for well-formedness, although its
|
* corresponding tokens. It doesn't check for well-formedness, although its
|
||||||
* internal mechanism may make this automatic (such as the case of
|
* internal mechanism may make this automatic (such as the case of
|
||||||
* HTMLPurifier_Lexer_DOMLex). There are several implementations to choose
|
* HTMLPurifier_Lexer_DOMLex). There are several implementations to choose
|
||||||
* from.
|
* from.
|
||||||
*
|
*
|
||||||
* The lexer is HTML-oriented: it might work with XML, but it's not
|
* A lexer is HTML-oriented: it might work with XML, but it's not
|
||||||
* recommended, as we adhere to a subset of the specification for optimization
|
* recommended, as we adhere to a subset of the specification for optimization
|
||||||
* reasons.
|
* reasons.
|
||||||
*
|
*
|
||||||
* This class should not be directly instantiated, but you may use create() to
|
* This class should not be directly instantiated, but you may use create() to
|
||||||
* retrieve a default copy of the lexer.
|
* retrieve a default copy of the lexer. Being a supertype, this class
|
||||||
|
* does not actually define any implementation, but offers commonly used
|
||||||
|
* convenience functions for subclasses.
|
||||||
*
|
*
|
||||||
* @note The unit tests will instantiate this class for testing purposes, as
|
* @note The unit tests will instantiate this class for testing purposes, as
|
||||||
* many of the utility functions require a class to be instantiated.
|
* many of the utility functions require a class to be instantiated.
|
||||||
|
@@ -1,15 +1,18 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
require_once 'HTMLPurifier/Generator.php';
|
require_once 'HTMLPurifier/Generator.php';
|
||||||
|
require_once 'HTMLPurifier/EntityLookup.php';
|
||||||
|
|
||||||
class HTMLPurifier_GeneratorTest extends UnitTestCase
|
class HTMLPurifier_GeneratorTest extends UnitTestCase
|
||||||
{
|
{
|
||||||
|
|
||||||
var $gen;
|
var $gen;
|
||||||
|
var $_entity_lookup;
|
||||||
|
|
||||||
function HTMLPurifier_GeneratorTest() {
|
function HTMLPurifier_GeneratorTest() {
|
||||||
$this->UnitTestCase();
|
$this->UnitTestCase();
|
||||||
$this->gen = new HTMLPurifier_Generator();
|
$this->gen = new HTMLPurifier_Generator();
|
||||||
|
$this->_entity_lookup = HTMLPurifier_EntityLookup::instance();
|
||||||
}
|
}
|
||||||
|
|
||||||
function test_generateFromToken() {
|
function test_generateFromToken() {
|
||||||
@@ -39,6 +42,15 @@ class HTMLPurifier_GeneratorTest extends UnitTestCase
|
|||||||
$inputs[5] = new HTMLPurifier_Token_Empty('br');
|
$inputs[5] = new HTMLPurifier_Token_Empty('br');
|
||||||
$expect[5] = '<br />';
|
$expect[5] = '<br />';
|
||||||
|
|
||||||
|
// test fault tolerance
|
||||||
|
$inputs[6] = null;
|
||||||
|
$expect[6] = '';
|
||||||
|
|
||||||
|
// don't convert non-special characters
|
||||||
|
$theta_char = $this->_entity_lookup->table['theta'];
|
||||||
|
$inputs[7] = new HTMLPurifier_Token_Text($theta_char);
|
||||||
|
$expect[7] = $theta_char;
|
||||||
|
|
||||||
foreach ($inputs as $i => $input) {
|
foreach ($inputs as $i => $input) {
|
||||||
$result = $this->gen->generateFromToken($input);
|
$result = $this->gen->generateFromToken($input);
|
||||||
$this->assertEqual($result, $expect[$i]);
|
$this->assertEqual($result, $expect[$i]);
|
||||||
@@ -64,6 +76,11 @@ class HTMLPurifier_GeneratorTest extends UnitTestCase
|
|||||||
$inputs[3] = array('src' => 'picture.jpg', 'alt' => 'Short & interesting');
|
$inputs[3] = array('src' => 'picture.jpg', 'alt' => 'Short & interesting');
|
||||||
$expect[3] = 'src="picture.jpg" alt="Short & interesting"';
|
$expect[3] = 'src="picture.jpg" alt="Short & interesting"';
|
||||||
|
|
||||||
|
// don't escape nonspecial characters
|
||||||
|
$theta_char = $this->_entity_lookup->table['theta'];
|
||||||
|
$inputs[4] = array('title' => 'Theta is ' . $theta_char);
|
||||||
|
$expect[4] = 'title="Theta is ' . $theta_char . '"';
|
||||||
|
|
||||||
foreach ($inputs as $i => $input) {
|
foreach ($inputs as $i => $input) {
|
||||||
$result = $this->gen->generateAttributes($input);
|
$result = $this->gen->generateAttributes($input);
|
||||||
$this->assertEqual($result, $expect[$i]);
|
$this->assertEqual($result, $expect[$i]);
|
||||||
|
Reference in New Issue
Block a user