1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-08-03 20:58:11 +02:00

[1.3.0] More control of URIs granted

# Invalid images are now removed, rather than replaced with a dud <img src="" alt="Invalid image" />. Previous behavior can be restored with new directive %Core.RemoveInvalidImg set to false.
! New directives %URI.DisableExternalResources and %URI.DisableResources
! New directive %Attr.DisableURI, which eliminates all hyperlinking
- Missing "Available since" documentation added

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@575 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang
2006-11-23 23:59:20 +00:00
parent 61b6ee7183
commit 49cb2a4a7c
10 changed files with 168 additions and 50 deletions

View File

@@ -24,7 +24,7 @@ HTMLPurifier_ConfigSchema::define(
'This directive has been available since 1.2.0.'
);
HTMLPurifier_ConfigSchema::Define(
HTMLPurifier_ConfigSchema::define(
'URI', 'DisableExternal', false, 'bool',
'Disables links to external websites. This is a highly effective '.
'anti-spam and anti-pagerank-leech measure, but comes at a hefty price: no'.
@@ -34,6 +34,26 @@ HTMLPurifier_ConfigSchema::Define(
'This directive has been available since 1.2.0.'
);
HTMLPurifier_ConfigSchema::define(
'URI', 'DisableExternalResources', false, 'bool',
'Disables the embedding of external resources, preventing users from '.
'embedding things like images from other hosts. This prevents '.
'access tracking (good for email viewers), bandwidth leeching, '.
'cross-site request forging, goatse.cx posting, and '.
'other nasties, but also results in '.
'a loss of end-user functionality (they can\'t directly post a pic '.
'they posted from Flickr anymore). Use it if you don\'t have a '.
'robust user-content moderation team. This directive has been '.
'available since 1.3.0.'
);
HTMLPurifier_ConfigSchema::define(
'URI', 'DisableResources', false, 'bool',
'Disables embedding resources, essentially meaning no pictures. You can '.
'still link to them though. See %URI.DisableExternalResources for why '.
'this might be a good idea. This directive has been available since 1.3.0.'
);
/**
* Validates a URI as defined by RFC 3986.
* @note Scheme-specific mechanics deferred to HTMLPurifier_URIScheme
@@ -43,15 +63,15 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
var $host;
var $PercentEncoder;
var $embeds;
var $embeds_resource;
/**
* @param $embeds Does the URI here result in an extra HTTP request?
* @param $embeds_resource_resource Does the URI here result in an extra HTTP request?
*/
function HTMLPurifier_AttrDef_URI($embeds = false) {
function HTMLPurifier_AttrDef_URI($embeds_resource = false) {
$this->host = new HTMLPurifier_AttrDef_Host();
$this->PercentEncoder = new HTMLPurifier_PercentEncoder();
$this->embeds = (bool) $embeds;
$this->embeds_resource = (bool) $embeds_resource;
}
function validate($uri, $config, &$context) {
@@ -105,18 +125,25 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
}
// the URI we're processing embeds a resource in the page, but the URI
// the URI we're processing embeds_resource a resource in the page, but the URI
// it references cannot be located
if ($this->embeds && !$scheme_obj->browsable) {
if ($this->embeds_resource && !$scheme_obj->browsable) {
return false;
}
if ($authority !== null) {
// remove URI if it's absolute and we disallow externals
// remove URI if it's absolute and we disabled externals or
// if it's absolute and embedded and we disabled external resources
unset($our_host);
if ($config->get('URI', 'DisableExternal')) {
if (
$config->get('URI', 'DisableExternal') ||
(
$config->get('URI', 'DisableExternalResources') &&
$this->embeds_resource
)
) {
$our_host = $config->get('URI', 'Host');
if ($our_host === null) return false;
}

View File

@@ -43,7 +43,8 @@ HTMLPurifier_ConfigSchema::define(
HTMLPurifier_ConfigSchema::define(
'HTML', 'Strict', false, 'bool',
'Determines whether or not to use Transitional (loose) or Strict rulesets.'
'Determines whether or not to use Transitional (loose) or Strict rulesets. '.
'This directive has been available since 1.3.0.'
);
HTMLPurifier_ConfigSchema::define(
@@ -53,14 +54,16 @@ HTMLPurifier_ConfigSchema::define(
'Example: by default value, <code>&lt;blockquote&gt;Foo&lt;/blockquote&gt;</code> '.
'would become <code>&lt;blockquote&gt;&lt;p&gt;Foo&lt;/p&gt;&lt;/blockquote&gt;</code>. The '.
'<code>&lt;p&gt;</code> tags can be replaced '.
'with whatever you desire, as long as it is a block level element.'
'with whatever you desire, as long as it is a block level element. '.
'This directive has been available since 1.3.0.'
);
HTMLPurifier_ConfigSchema::define(
'HTML', 'Parent', 'div', 'string',
'String name of element that HTML fragment passed to library will be '.
'inserted in. An interesting variation would be using span as the '.
'parent element, meaning that only inline tags would be allowed.'
'parent element, meaning that only inline tags would be allowed. '.
'This directive has been available since 1.3.0.'
);
HTMLPurifier_ConfigSchema::define(
@@ -72,7 +75,8 @@ HTMLPurifier_ConfigSchema::define(
'supported in the first place (like embed). If you change this, you '.
'probably also want to change %HTML.AllowedAttributes. '.
'<strong>Warning:</strong> If another directive conflicts with the '.
'elements here, <em>that</em> directive will win and override.'
'elements here, <em>that</em> directive will win and override. '.
'This directive has been available since 1.3.0.'
);
HTMLPurifier_ConfigSchema::define(
@@ -84,7 +88,14 @@ HTMLPurifier_ConfigSchema::define(
'elements here, <em>that</em> directive will win and override. For '.
'example, %HTML.EnableAttrID will take precedence over *.id in this '.
'directive. You must set that directive to true before you can use '.
'IDs at all.'
'IDs at all. This directive has been available since 1.3.0.'
);
HTMLPurifier_ConfigSchema::define(
'Attr', 'DisableURI', false, 'bool',
'Disables all URIs in all forms. Not sure why you\'d want to do that '.
'(after all, the Internet\'s founded on the notion of a hyperlink). '.
'This directive has been available since 1.3.0.'
);
/**
@@ -444,16 +455,18 @@ class HTMLPurifier_HTMLDefinition
$this->info['td']->attr['colspan'] =
$this->info['th']->attr['colspan'] = $e__NumberSpan;
$e_URI = new HTMLPurifier_AttrDef_URI();
$this->info['a']->attr['href'] =
$this->info['img']->attr['longdesc'] =
$this->info['del']->attr['cite'] =
$this->info['ins']->attr['cite'] =
$this->info['blockquote']->attr['cite'] =
$this->info['q']->attr['cite'] = $e_URI;
// URI that causes HTTP request
$this->info['img']->attr['src'] = new HTMLPurifier_AttrDef_URI(true);
if (!$config->get('Attr', 'DisableURI')) {
$e_URI = new HTMLPurifier_AttrDef_URI();
$this->info['a']->attr['href'] =
$this->info['img']->attr['longdesc'] =
$this->info['del']->attr['cite'] =
$this->info['ins']->attr['cite'] =
$this->info['blockquote']->attr['cite'] =
$this->info['q']->attr['cite'] = $e_URI;
// URI that causes HTTP request
$this->info['img']->attr['src'] = new HTMLPurifier_AttrDef_URI(true);
}
if (!$this->strict) {
$this->info['li']->attr['value'] = new HTMLPurifier_AttrDef_Integer();

View File

@@ -5,6 +5,14 @@ require_once 'HTMLPurifier/HTMLDefinition.php';
require_once 'HTMLPurifier/Generator.php';
require_once 'HTMLPurifier/TagTransform.php';
HTMLPurifier_ConfigSchema::define(
'Core', 'RemoveInvalidImg', true, 'bool',
'This directive enables pre-emptive URI checking in <code>img</code> '.
'tags, as the attribute validation strategy is not authorized to '.
'remove elements from the document. This directive has been available '.
'since 1.3.0, revert to pre-1.3.0 behavior by setting to false.'
);
/**
* Removes all unrecognized tags from the list of tokens.
*
@@ -25,7 +33,23 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
if (!empty( $token->is_tag )) {
// DEFINITION CALL
if (isset($definition->info[$token->name])) {
// leave untouched
// leave untouched, except for a few special cases:
// hard-coded image special case, pre-emptively drop
// if not available. Probably not abstract-able
if ( $token->name == 'img' ) {
if (!isset($token->attr['src'])) continue;
if (!isset($definition->info['img']->attr['src'])) {
continue;
}
$token->attr['src'] =
$definition->
info['img']->
attr['src']->
validate($token->attr['src']);
if ($token->attr['src'] === false) continue;
}
} elseif (
isset($definition->info_tag_transform[$token->name])
) {