mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-08-03 20:58:11 +02:00
[1.3.0] More control of URIs granted
# Invalid images are now removed, rather than replaced with a dud <img src="" alt="Invalid image" />. Previous behavior can be restored with new directive %Core.RemoveInvalidImg set to false. ! New directives %URI.DisableExternalResources and %URI.DisableResources ! New directive %Attr.DisableURI, which eliminates all hyperlinking - Missing "Available since" documentation added git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@575 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
@@ -24,7 +24,7 @@ HTMLPurifier_ConfigSchema::define(
|
||||
'This directive has been available since 1.2.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::Define(
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'URI', 'DisableExternal', false, 'bool',
|
||||
'Disables links to external websites. This is a highly effective '.
|
||||
'anti-spam and anti-pagerank-leech measure, but comes at a hefty price: no'.
|
||||
@@ -34,6 +34,26 @@ HTMLPurifier_ConfigSchema::Define(
|
||||
'This directive has been available since 1.2.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'URI', 'DisableExternalResources', false, 'bool',
|
||||
'Disables the embedding of external resources, preventing users from '.
|
||||
'embedding things like images from other hosts. This prevents '.
|
||||
'access tracking (good for email viewers), bandwidth leeching, '.
|
||||
'cross-site request forging, goatse.cx posting, and '.
|
||||
'other nasties, but also results in '.
|
||||
'a loss of end-user functionality (they can\'t directly post a pic '.
|
||||
'they posted from Flickr anymore). Use it if you don\'t have a '.
|
||||
'robust user-content moderation team. This directive has been '.
|
||||
'available since 1.3.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'URI', 'DisableResources', false, 'bool',
|
||||
'Disables embedding resources, essentially meaning no pictures. You can '.
|
||||
'still link to them though. See %URI.DisableExternalResources for why '.
|
||||
'this might be a good idea. This directive has been available since 1.3.0.'
|
||||
);
|
||||
|
||||
/**
|
||||
* Validates a URI as defined by RFC 3986.
|
||||
* @note Scheme-specific mechanics deferred to HTMLPurifier_URIScheme
|
||||
@@ -43,15 +63,15 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
||||
|
||||
var $host;
|
||||
var $PercentEncoder;
|
||||
var $embeds;
|
||||
var $embeds_resource;
|
||||
|
||||
/**
|
||||
* @param $embeds Does the URI here result in an extra HTTP request?
|
||||
* @param $embeds_resource_resource Does the URI here result in an extra HTTP request?
|
||||
*/
|
||||
function HTMLPurifier_AttrDef_URI($embeds = false) {
|
||||
function HTMLPurifier_AttrDef_URI($embeds_resource = false) {
|
||||
$this->host = new HTMLPurifier_AttrDef_Host();
|
||||
$this->PercentEncoder = new HTMLPurifier_PercentEncoder();
|
||||
$this->embeds = (bool) $embeds;
|
||||
$this->embeds_resource = (bool) $embeds_resource;
|
||||
}
|
||||
|
||||
function validate($uri, $config, &$context) {
|
||||
@@ -105,18 +125,25 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
||||
}
|
||||
|
||||
|
||||
// the URI we're processing embeds a resource in the page, but the URI
|
||||
// the URI we're processing embeds_resource a resource in the page, but the URI
|
||||
// it references cannot be located
|
||||
if ($this->embeds && !$scheme_obj->browsable) {
|
||||
if ($this->embeds_resource && !$scheme_obj->browsable) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
if ($authority !== null) {
|
||||
|
||||
// remove URI if it's absolute and we disallow externals
|
||||
// remove URI if it's absolute and we disabled externals or
|
||||
// if it's absolute and embedded and we disabled external resources
|
||||
unset($our_host);
|
||||
if ($config->get('URI', 'DisableExternal')) {
|
||||
if (
|
||||
$config->get('URI', 'DisableExternal') ||
|
||||
(
|
||||
$config->get('URI', 'DisableExternalResources') &&
|
||||
$this->embeds_resource
|
||||
)
|
||||
) {
|
||||
$our_host = $config->get('URI', 'Host');
|
||||
if ($our_host === null) return false;
|
||||
}
|
||||
|
@@ -43,7 +43,8 @@ HTMLPurifier_ConfigSchema::define(
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'HTML', 'Strict', false, 'bool',
|
||||
'Determines whether or not to use Transitional (loose) or Strict rulesets.'
|
||||
'Determines whether or not to use Transitional (loose) or Strict rulesets. '.
|
||||
'This directive has been available since 1.3.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
@@ -53,14 +54,16 @@ HTMLPurifier_ConfigSchema::define(
|
||||
'Example: by default value, <code><blockquote>Foo</blockquote></code> '.
|
||||
'would become <code><blockquote><p>Foo</p></blockquote></code>. The '.
|
||||
'<code><p></code> tags can be replaced '.
|
||||
'with whatever you desire, as long as it is a block level element.'
|
||||
'with whatever you desire, as long as it is a block level element. '.
|
||||
'This directive has been available since 1.3.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'HTML', 'Parent', 'div', 'string',
|
||||
'String name of element that HTML fragment passed to library will be '.
|
||||
'inserted in. An interesting variation would be using span as the '.
|
||||
'parent element, meaning that only inline tags would be allowed.'
|
||||
'parent element, meaning that only inline tags would be allowed. '.
|
||||
'This directive has been available since 1.3.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
@@ -72,7 +75,8 @@ HTMLPurifier_ConfigSchema::define(
|
||||
'supported in the first place (like embed). If you change this, you '.
|
||||
'probably also want to change %HTML.AllowedAttributes. '.
|
||||
'<strong>Warning:</strong> If another directive conflicts with the '.
|
||||
'elements here, <em>that</em> directive will win and override.'
|
||||
'elements here, <em>that</em> directive will win and override. '.
|
||||
'This directive has been available since 1.3.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
@@ -84,7 +88,14 @@ HTMLPurifier_ConfigSchema::define(
|
||||
'elements here, <em>that</em> directive will win and override. For '.
|
||||
'example, %HTML.EnableAttrID will take precedence over *.id in this '.
|
||||
'directive. You must set that directive to true before you can use '.
|
||||
'IDs at all.'
|
||||
'IDs at all. This directive has been available since 1.3.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Attr', 'DisableURI', false, 'bool',
|
||||
'Disables all URIs in all forms. Not sure why you\'d want to do that '.
|
||||
'(after all, the Internet\'s founded on the notion of a hyperlink). '.
|
||||
'This directive has been available since 1.3.0.'
|
||||
);
|
||||
|
||||
/**
|
||||
@@ -444,16 +455,18 @@ class HTMLPurifier_HTMLDefinition
|
||||
$this->info['td']->attr['colspan'] =
|
||||
$this->info['th']->attr['colspan'] = $e__NumberSpan;
|
||||
|
||||
$e_URI = new HTMLPurifier_AttrDef_URI();
|
||||
$this->info['a']->attr['href'] =
|
||||
$this->info['img']->attr['longdesc'] =
|
||||
$this->info['del']->attr['cite'] =
|
||||
$this->info['ins']->attr['cite'] =
|
||||
$this->info['blockquote']->attr['cite'] =
|
||||
$this->info['q']->attr['cite'] = $e_URI;
|
||||
|
||||
// URI that causes HTTP request
|
||||
$this->info['img']->attr['src'] = new HTMLPurifier_AttrDef_URI(true);
|
||||
if (!$config->get('Attr', 'DisableURI')) {
|
||||
$e_URI = new HTMLPurifier_AttrDef_URI();
|
||||
$this->info['a']->attr['href'] =
|
||||
$this->info['img']->attr['longdesc'] =
|
||||
$this->info['del']->attr['cite'] =
|
||||
$this->info['ins']->attr['cite'] =
|
||||
$this->info['blockquote']->attr['cite'] =
|
||||
$this->info['q']->attr['cite'] = $e_URI;
|
||||
|
||||
// URI that causes HTTP request
|
||||
$this->info['img']->attr['src'] = new HTMLPurifier_AttrDef_URI(true);
|
||||
}
|
||||
|
||||
if (!$this->strict) {
|
||||
$this->info['li']->attr['value'] = new HTMLPurifier_AttrDef_Integer();
|
||||
|
@@ -5,6 +5,14 @@ require_once 'HTMLPurifier/HTMLDefinition.php';
|
||||
require_once 'HTMLPurifier/Generator.php';
|
||||
require_once 'HTMLPurifier/TagTransform.php';
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Core', 'RemoveInvalidImg', true, 'bool',
|
||||
'This directive enables pre-emptive URI checking in <code>img</code> '.
|
||||
'tags, as the attribute validation strategy is not authorized to '.
|
||||
'remove elements from the document. This directive has been available '.
|
||||
'since 1.3.0, revert to pre-1.3.0 behavior by setting to false.'
|
||||
);
|
||||
|
||||
/**
|
||||
* Removes all unrecognized tags from the list of tokens.
|
||||
*
|
||||
@@ -25,7 +33,23 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
|
||||
if (!empty( $token->is_tag )) {
|
||||
// DEFINITION CALL
|
||||
if (isset($definition->info[$token->name])) {
|
||||
// leave untouched
|
||||
// leave untouched, except for a few special cases:
|
||||
|
||||
// hard-coded image special case, pre-emptively drop
|
||||
// if not available. Probably not abstract-able
|
||||
if ( $token->name == 'img' ) {
|
||||
if (!isset($token->attr['src'])) continue;
|
||||
if (!isset($definition->info['img']->attr['src'])) {
|
||||
continue;
|
||||
}
|
||||
$token->attr['src'] =
|
||||
$definition->
|
||||
info['img']->
|
||||
attr['src']->
|
||||
validate($token->attr['src']);
|
||||
if ($token->attr['src'] === false) continue;
|
||||
}
|
||||
|
||||
} elseif (
|
||||
isset($definition->info_tag_transform[$token->name])
|
||||
) {
|
||||
|
Reference in New Issue
Block a user