mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-08-03 20:58:11 +02:00
Compare commits
43 Commits
v1.4.1-str
...
v1.4.0
Author | SHA1 | Date | |
---|---|---|---|
|
2d49299621 | ||
|
ab5c782c77 | ||
|
8893b87e04 | ||
|
aeef746060 | ||
|
da13c6ac87 | ||
|
ccae73c25a | ||
|
8d6bfa4037 | ||
|
712d81ebea | ||
|
f7f6fed86a | ||
|
2293c67eec | ||
|
108df87824 | ||
|
5e366b25f8 | ||
|
2e16c4a968 | ||
|
a8db22dfff | ||
|
fbe2c25f8a | ||
|
158be61def | ||
|
d693c4ea09 | ||
|
c24916e1d6 | ||
|
a68b6afda1 | ||
|
78cf7db82e | ||
|
9b375fdfb8 | ||
|
0dd866cc15 | ||
|
ad1169c711 | ||
|
2816ae535f | ||
|
462d3ab72f | ||
|
cf1d868782 | ||
|
c705e17a58 | ||
|
1cce367950 | ||
|
61f852d429 | ||
|
3a73c2cf04 | ||
|
e75b676656 | ||
|
b53370efbf | ||
|
d60f345cab | ||
|
aefda60696 | ||
|
2ffa5d3135 | ||
|
23d3490d49 | ||
|
582ffc4143 | ||
|
d52189a19d | ||
|
02006d6e64 | ||
|
dcaa374dae | ||
|
e2cc37724b | ||
|
3ad6239dc3 | ||
|
663fb4e1b2 |
2
Doxyfile
2
Doxyfile
@@ -4,7 +4,7 @@
|
||||
# Project related configuration options
|
||||
#---------------------------------------------------------------------------
|
||||
PROJECT_NAME = HTML Purifier
|
||||
PROJECT_NUMBER = 1.4.1
|
||||
PROJECT_NUMBER = 1.4.0
|
||||
OUTPUT_DIRECTORY = "C:/Documents and Settings/Edward/My Documents/My Webs/htmlpurifier/docs/doxygen"
|
||||
CREATE_SUBDIRS = NO
|
||||
OUTPUT_LANGUAGE = English
|
||||
|
4
NEWS
4
NEWS
@@ -9,10 +9,6 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
||||
. Internal change
|
||||
==========================
|
||||
|
||||
1.4.1, released 2007-01-21
|
||||
! docs/enduser-youtube.html updated according to new functionality
|
||||
- YouTube IDs can have underscores and dashes
|
||||
|
||||
1.4.0, released 2007-01-21
|
||||
! Implemented list-style-image, URIs now allowed in list-style
|
||||
! Implemented background-image, background-repeat, background-attachment
|
||||
|
@@ -37,7 +37,7 @@ from a specific website, it probably is okay. If no amount of pleading will
|
||||
convince the people upstairs that they should just settle with just linking
|
||||
to their movies, you may find this technique very useful.</p>
|
||||
|
||||
<h2>Looking in</h2>
|
||||
<h2>Sample</h2>
|
||||
|
||||
<p>Below is custom code that allows users to embed
|
||||
YouTube videos. This is not favoritism: this trick can easily be adapted for
|
||||
@@ -69,27 +69,55 @@ into your documents. YouTube's code goes like this:</p>
|
||||
<p>What point 2 means is that if we have code like <code><span
|
||||
class="embed-youtube">AyPzM5WK8ys</span></code> your
|
||||
application can reconstruct the full object from this small snippet that
|
||||
passes through HTML Purifier <em>unharmed</em>.
|
||||
<a href="http://hp.jpsband.org/svnroot/htmlpurifier/trunk/library/HTMLPurifier/Filter/YouTube.php">Show me the code!</a></p>
|
||||
passes through HTML Purifier <em>unharmed</em>.</p>
|
||||
|
||||
<p>And the corresponding usage:</p>
|
||||
<pre>
|
||||
<?php
|
||||
|
||||
<pre><?php
|
||||
// assuming $purifier is an instance of HTMLPurifier
|
||||
require_once 'HTMLPurifier/Filter/YouTube.php';
|
||||
$purifier->addFilter(new HTMLPurifier_Filter_YouTube());
|
||||
?></pre>
|
||||
class HTMLPurifierX_PreserveYouTube extends HTMLPurifier
|
||||
{
|
||||
function purify($html, $config = null) {
|
||||
$pre_regex = '#<object[^>]+>.+?'.
|
||||
'http://www.youtube.com/v/([A-Za-z0-9]+).+?</object>#';
|
||||
$pre_replace = '<span class="youtube-embed">\1</span>';
|
||||
$html = preg_replace($pre_regex, $pre_replace, $html);
|
||||
$html = parent::purify($html, $config);
|
||||
$post_regex = '#<span class="youtube-embed">([A-Za-z0-9]+)</span>#';
|
||||
$post_replace = '<object width="425" height="350" '.
|
||||
'data="http://www.youtube.com/v/\1">'.
|
||||
'<param name="movie" value="http://www.youtube.com/v/\1"></param>'.
|
||||
'<param name="wmode" value="transparent"></param>'.
|
||||
'<!--[if IE]>'.
|
||||
'<embed src="http://www.youtube.com/v/\1"'.
|
||||
'type="application/x-shockwave-flash"'.
|
||||
'wmode="transparent" width="425" height="350" />'.
|
||||
'<![endif]-->'.
|
||||
'</object>';
|
||||
$html = preg_replace($post_regex, $post_replace, $html);
|
||||
return $html;
|
||||
}
|
||||
}
|
||||
|
||||
<p>There is a bit going in the two code snippets, so let's explain.</p>
|
||||
$purifier = new HTMLPurifierX_PreserveYouTube();
|
||||
$html_still_with_youtube = $purifier->purify($html_with_youtube);
|
||||
|
||||
?>
|
||||
</pre>
|
||||
|
||||
<p>There is a bit going on here, so let's explain.</p>
|
||||
|
||||
<ol>
|
||||
<li>This is a Filter object, which intercepts the HTML that is
|
||||
coming into and out of the purifier. You can add as many
|
||||
filter objects as you like. <code>preFilter()</code>
|
||||
processes the code before it gets purified, and <code>postFilter()</code>
|
||||
processes the code afterwards. So, we'll use <code>preFilter()</code> to
|
||||
replace the object tag with a <code>span</code>, and <code>postFilter()</code>
|
||||
to restore it.</li>
|
||||
<li>The class uses the prefix <code>HTMLPurifierX</code> because it's
|
||||
userspace code. Don't use <code>HTMLPurifier</code> in front of your
|
||||
class, since it might clobber another class in the library.</li>
|
||||
<li>In order to keep the interface compatible, we've extended HTMLPurifier
|
||||
into a new class that preserves the YouTube videos. This means that
|
||||
all you have to do is replace all instances of
|
||||
<code>new HTMLPurifier</code> to <code>new
|
||||
HTMLPurifierX_PreserveYouTube</code>. There's other ways to go about
|
||||
doing this: if you were calling a function that wrapped HTML Purifier,
|
||||
you could paste the PHP right there. If you wanted to be really
|
||||
fancy, you could make a decorator for HTMLPurifier.</li>
|
||||
<li>The first preg_replace call replaces any YouTube code users may have
|
||||
embedded into the benign span tag. Span is used because it is inline,
|
||||
and objects are inline too. We are very careful to be extremely
|
||||
@@ -137,16 +165,17 @@ it is important that you are cognizant of the risk.</p>
|
||||
|
||||
<p>This should go without saying, but if you're going to adapt this code
|
||||
for Google Video or the like, make sure you do it <em>right</em>. It's
|
||||
extremely easy to allow a character too many in <code>postFilter()</code> and
|
||||
extremely easy to allow a character too many in the final section and
|
||||
suddenly you're introducing XSS into HTML Purifier's XSS free output. HTML
|
||||
Purifier may be well written, but it cannot guard against vulnerabilities
|
||||
introduced after it has finished.</p>
|
||||
|
||||
<h2>Help out!</h2>
|
||||
<h2>Future plans</h2>
|
||||
|
||||
<p>If you write a filter for your favorite video destination (or anything
|
||||
like that, for that matter), send it over and it might get included
|
||||
with the core!</p>
|
||||
<p>This functionality is part of the core library, using the
|
||||
HTMLPurifier_Filter class to acheive the desired effect. Our implementation
|
||||
is slightly different, and this page will be updated to reflect that
|
||||
once 1.4.0 is released.</p>
|
||||
|
||||
</body>
|
||||
</html>
|
@@ -22,7 +22,7 @@
|
||||
*/
|
||||
|
||||
/*
|
||||
HTML Purifier 1.4.1 - Standards Compliant HTML Filtering
|
||||
HTML Purifier 1.4.0 - Standards Compliant HTML Filtering
|
||||
Copyright (C) 2006 Edward Z. Yang
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
@@ -64,7 +64,7 @@ require_once 'HTMLPurifier/Encoder.php';
|
||||
class HTMLPurifier
|
||||
{
|
||||
|
||||
var $version = '1.4.1';
|
||||
var $version = '1.4.0';
|
||||
|
||||
var $config;
|
||||
var $filters;
|
||||
|
@@ -53,8 +53,8 @@ class HTMLPurifier_Config
|
||||
* or a string filename of an ini file.
|
||||
* @return Configured HTMLPurifier_Config object
|
||||
*/
|
||||
static function create($config) {
|
||||
if ($config instanceof HTMLPurifier_Config) return $config;
|
||||
function create($config) {
|
||||
if (is_a($config, 'HTMLPurifier_Config')) return $config;
|
||||
$ret = HTMLPurifier_Config::createDefault();
|
||||
if (is_string($config)) $ret->loadIni($config);
|
||||
elseif (is_array($config)) $ret->loadArray($config);
|
||||
@@ -66,7 +66,7 @@ class HTMLPurifier_Config
|
||||
* @static
|
||||
* @return Default HTMLPurifier_Config object.
|
||||
*/
|
||||
static function createDefault() {
|
||||
function createDefault() {
|
||||
$definition =& HTMLPurifier_ConfigSchema::instance();
|
||||
$config = new HTMLPurifier_Config($definition);
|
||||
return $config;
|
||||
|
@@ -69,7 +69,7 @@ class HTMLPurifier_ConfigSchema {
|
||||
* Retrieves an instance of the application-wide configuration definition.
|
||||
* @static
|
||||
*/
|
||||
static function &instance($prototype = null) {
|
||||
function &instance($prototype = null) {
|
||||
static $instance;
|
||||
if ($prototype !== null) {
|
||||
$instance = $prototype;
|
||||
@@ -91,7 +91,7 @@ class HTMLPurifier_ConfigSchema {
|
||||
* HTMLPurifier_DirectiveDef::$type for allowed values
|
||||
* @param $description Description of directive for documentation
|
||||
*/
|
||||
static function define(
|
||||
function define(
|
||||
$namespace, $name, $default, $type,
|
||||
$description
|
||||
) {
|
||||
@@ -155,7 +155,7 @@ class HTMLPurifier_ConfigSchema {
|
||||
* @param $namespace Namespace's name
|
||||
* @param $description Description of the namespace
|
||||
*/
|
||||
static function defineNamespace($namespace, $description) {
|
||||
function defineNamespace($namespace, $description) {
|
||||
$def =& HTMLPurifier_ConfigSchema::instance();
|
||||
if (isset($def->info[$namespace])) {
|
||||
trigger_error('Cannot redefine namespace', E_USER_ERROR);
|
||||
@@ -188,7 +188,7 @@ class HTMLPurifier_ConfigSchema {
|
||||
* @param $alias Name of aliased value
|
||||
* @param $real Value aliased value will be converted into
|
||||
*/
|
||||
static function defineValueAliases($namespace, $name, $aliases) {
|
||||
function defineValueAliases($namespace, $name, $aliases) {
|
||||
$def =& HTMLPurifier_ConfigSchema::instance();
|
||||
if (!isset($def->info[$namespace][$name])) {
|
||||
trigger_error('Cannot set value alias for non-existant directive',
|
||||
@@ -219,7 +219,7 @@ class HTMLPurifier_ConfigSchema {
|
||||
* @param $name Name of directive
|
||||
* @param $allowed_values Arraylist of allowed values
|
||||
*/
|
||||
static function defineAllowedValues($namespace, $name, $allowed_values) {
|
||||
function defineAllowedValues($namespace, $name, $allowed_values) {
|
||||
$def =& HTMLPurifier_ConfigSchema::instance();
|
||||
if (!isset($def->info[$namespace][$name])) {
|
||||
trigger_error('Cannot define allowed values for undefined directive',
|
||||
@@ -256,7 +256,7 @@ class HTMLPurifier_ConfigSchema {
|
||||
* @param $new_namespace
|
||||
* @param $new_name Directive that the alias will be to
|
||||
*/
|
||||
static function defineAlias($namespace, $name, $new_namespace, $new_name) {
|
||||
function defineAlias($namespace, $name, $new_namespace, $new_name) {
|
||||
$def =& HTMLPurifier_ConfigSchema::instance();
|
||||
if (!isset($def->info[$namespace])) {
|
||||
trigger_error('Cannot define directive alias in undefined namespace',
|
||||
@@ -374,7 +374,7 @@ class HTMLPurifier_ConfigSchema {
|
||||
*/
|
||||
function isError($var) {
|
||||
if (!is_object($var)) return false;
|
||||
if (!($var instanceof HTMLPurifier_Error)) return false;
|
||||
if (!is_a($var, 'HTMLPurifier_Error')) return false;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@@ -90,7 +90,7 @@ class HTMLPurifier_Encoder
|
||||
* would need that, and I'm probably not going to implement them.
|
||||
* Once again, PHP 6 should solve all our problems.
|
||||
*/
|
||||
static function cleanUTF8($str, $force_php = false) {
|
||||
function cleanUTF8($str, $force_php = false) {
|
||||
|
||||
static $non_sgml_chars = array();
|
||||
if (empty($non_sgml_chars)) {
|
||||
@@ -273,7 +273,7 @@ class HTMLPurifier_Encoder
|
||||
// | 00000000 | 00010000 | 11111111 | 11111111 | Defined upper limit of legal scalar codes
|
||||
// +----------+----------+----------+----------+
|
||||
|
||||
static function unichr($code) {
|
||||
function unichr($code) {
|
||||
if($code > 1114111 or $code < 0 or
|
||||
($code >= 55296 and $code <= 57343) ) {
|
||||
// bits are set outside the "valid" range as defined
|
||||
@@ -314,7 +314,7 @@ class HTMLPurifier_Encoder
|
||||
* Converts a string to UTF-8 based on configuration.
|
||||
* @static
|
||||
*/
|
||||
static function convertToUTF8($str, $config, &$context) {
|
||||
function convertToUTF8($str, $config, &$context) {
|
||||
static $iconv = null;
|
||||
if ($iconv === null) $iconv = function_exists('iconv');
|
||||
$encoding = $config->get('Core', 'Encoding');
|
||||
@@ -333,7 +333,7 @@ class HTMLPurifier_Encoder
|
||||
* @note Currently, this is a lossy conversion, with unexpressable
|
||||
* characters being omitted.
|
||||
*/
|
||||
static function convertFromUTF8($str, $config, &$context) {
|
||||
function convertFromUTF8($str, $config, &$context) {
|
||||
static $iconv = null;
|
||||
if ($iconv === null) $iconv = function_exists('iconv');
|
||||
$encoding = $config->get('Core', 'Encoding');
|
||||
@@ -366,7 +366,7 @@ class HTMLPurifier_Encoder
|
||||
* @note Sort of with cleanUTF8() but it assumes that $str is
|
||||
* well-formed UTF-8
|
||||
*/
|
||||
static function convertToASCIIDumbLossless($str) {
|
||||
function convertToASCIIDumbLossless($str) {
|
||||
$bytesleft = 0;
|
||||
$result = '';
|
||||
$working = 0;
|
||||
|
@@ -29,7 +29,7 @@ class HTMLPurifier_EntityLookup {
|
||||
* @static
|
||||
* @param Optional prototype of custom lookup table to overload with.
|
||||
*/
|
||||
static function instance($prototype = false) {
|
||||
function instance($prototype = false) {
|
||||
// no references, since PHP doesn't copy unless modified
|
||||
static $instance = null;
|
||||
if ($prototype) {
|
||||
|
@@ -9,13 +9,13 @@ class HTMLPurifier_Filter_YouTube extends HTMLPurifier_Filter
|
||||
|
||||
function preFilter($html, $config, &$context) {
|
||||
$pre_regex = '#<object[^>]+>.+?'.
|
||||
'http://www.youtube.com/v/([A-Za-z0-9\-_]+).+?</object>#';
|
||||
'http://www.youtube.com/v/([A-Za-z0-9]+).+?</object>#';
|
||||
$pre_replace = '<span class="youtube-embed">\1</span>';
|
||||
return preg_replace($pre_regex, $pre_replace, $html);
|
||||
}
|
||||
|
||||
function postFilter($html, $config, &$context) {
|
||||
$post_regex = '#<span class="youtube-embed">([A-Za-z0-9\-_]+)</span>#';
|
||||
$post_regex = '#<span class="youtube-embed">([A-Za-z0-9]+)</span>#';
|
||||
$post_replace = '<object width="425" height="350" '.
|
||||
'data="http://www.youtube.com/v/\1">'.
|
||||
'<param name="movie" value="http://www.youtube.com/v/\1"></param>'.
|
||||
|
@@ -536,7 +536,7 @@ class HTMLPurifier_HTMLDefinition
|
||||
|
||||
// protect against stdclasses floating around
|
||||
foreach ($this->info as $key => $obj) {
|
||||
if ($obj instanceof stdClass) {
|
||||
if (is_a($obj, 'stdclass')) {
|
||||
unset($this->info[$key]);
|
||||
}
|
||||
}
|
||||
|
@@ -144,7 +144,7 @@ class HTMLPurifier_Lexer
|
||||
* @param $prototype Optional prototype lexer.
|
||||
* @return Concrete lexer.
|
||||
*/
|
||||
static function create($prototype = null) {
|
||||
function create($prototype = null) {
|
||||
// we don't really care if it's a reference or a copy
|
||||
static $lexer = null;
|
||||
if ($prototype) {
|
||||
@@ -170,7 +170,7 @@ class HTMLPurifier_Lexer
|
||||
* @param $string HTML string to process.
|
||||
* @returns HTML with CDATA sections escaped.
|
||||
*/
|
||||
static function escapeCDATA($string) {
|
||||
function escapeCDATA($string) {
|
||||
return preg_replace_callback(
|
||||
'/<!\[CDATA\[(.+?)\]\]>/',
|
||||
array('HTMLPurifier_Lexer', 'CDATACallback'),
|
||||
@@ -188,7 +188,7 @@ class HTMLPurifier_Lexer
|
||||
* and 1 the inside of the CDATA section.
|
||||
* @returns Escaped internals of the CDATA section.
|
||||
*/
|
||||
static function CDATACallback($matches) {
|
||||
function CDATACallback($matches) {
|
||||
// not exactly sure why the character set is needed, but whatever
|
||||
return htmlspecialchars($matches[1], ENT_COMPAT, 'UTF-8');
|
||||
}
|
||||
|
@@ -38,7 +38,7 @@ class HTMLPurifier_URISchemeRegistry
|
||||
* @note Pass a registry object $prototype with a compatible interface and
|
||||
* the function will copy it and return it all further times.
|
||||
*/
|
||||
static function &instance($prototype = null) {
|
||||
function &instance($prototype = null) {
|
||||
static $instance = null;
|
||||
if ($prototype !== null) {
|
||||
$instance = $prototype;
|
||||
|
@@ -3,7 +3,7 @@
|
||||
header('Content-type: text/html; charset=UTF-8');
|
||||
|
||||
require_once '../library/HTMLPurifier.auto.php';
|
||||
error_reporting(E_ALL | E_STRICT);
|
||||
error_reporting(E_ALL);
|
||||
|
||||
function escapeHTML($string) {
|
||||
$string = HTMLPurifier_Encoder::cleanUTF8($string);
|
||||
@@ -11,4 +11,4 @@ function escapeHTML($string) {
|
||||
return $string;
|
||||
}
|
||||
|
||||
?>
|
||||
?>
|
@@ -73,7 +73,7 @@ class Debugger
|
||||
/**
|
||||
* @static
|
||||
*/
|
||||
static function &instance() {
|
||||
function &instance() {
|
||||
static $soleInstance = false;
|
||||
if (!$soleInstance) $soleInstance = new Debugger();
|
||||
return $soleInstance;
|
||||
@@ -145,4 +145,4 @@ class Debugger
|
||||
|
||||
}
|
||||
|
||||
?>
|
||||
?>
|
@@ -16,8 +16,9 @@ class HTMLPurifier_LexerTest extends UnitTestCase
|
||||
|
||||
$this->DirectLex = new HTMLPurifier_Lexer_DirectLex();
|
||||
|
||||
// E_STRICT = 2048, int used for PHP4 compat
|
||||
if ( $GLOBALS['HTMLPurifierTest']['PEAR'] &&
|
||||
((error_reporting() & E_STRICT) != E_STRICT)
|
||||
((error_reporting() & 2048) != 2048)
|
||||
) {
|
||||
$this->_has_pear = true;
|
||||
require_once 'HTMLPurifier/Lexer/PEARSax3.php';
|
||||
@@ -326,4 +327,4 @@ class HTMLPurifier_LexerTest extends UnitTestCase
|
||||
|
||||
}
|
||||
|
||||
?>
|
||||
?>
|
@@ -3,7 +3,7 @@
|
||||
// call one file using /?f=FileTest.php , see $test_files array for
|
||||
// valid values
|
||||
|
||||
error_reporting(E_ALL | E_STRICT);
|
||||
error_reporting(E_ALL);
|
||||
define('HTMLPurifierTest', 1);
|
||||
|
||||
// wishlist: automated calling of this file from multiple PHP versions so we
|
||||
|
@@ -8,7 +8,7 @@ function tally_errors() {
|
||||
foreach ($queue->_expectation_queue as $e) {
|
||||
if (count($e) != 2) return; // fut-compat
|
||||
if (!isset($e[0])) return; // fut-compat
|
||||
$e[0]->_dumper = new SimpleDumper();
|
||||
$e[0]->_dumper = &new SimpleDumper();
|
||||
$this->fail('Error expectation not fulfilled: ' .
|
||||
$e[0]->testMessage(null));
|
||||
}
|
||||
|
Reference in New Issue
Block a user