mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-07-30 19:00:10 +02:00
[2.1.0] Implement MakeAbsolute URI filter
- Move some directives with complex dependencies to URIDefinition - Fix a missing extends - Add hierarchical information to URI schemes - Fix bug in URIHarness. git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1346 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
@@ -7,54 +7,59 @@ require_once 'HTMLPurifier/URISchemeRegistry.php';
|
||||
require_once 'HTMLPurifier/AttrDef/URI/Host.php';
|
||||
require_once 'HTMLPurifier/PercentEncoder.php';
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'URI', 'DefaultScheme', 'http', 'string',
|
||||
'Defines through what scheme the output will be served, in order to '.
|
||||
'select the proper object validator when no scheme information is present.'
|
||||
);
|
||||
// special case filtering directives
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'URI', 'Host', null, 'string/null',
|
||||
'Defines the domain name of the server, so we can determine whether or '.
|
||||
'an absolute URI is from your website or not. Not strictly necessary, '.
|
||||
'as users should be using relative URIs to reference resources on your '.
|
||||
'website. It will, however, let you use absolute URIs to link to '.
|
||||
'subdomains of the domain you post here: i.e. example.com will allow '.
|
||||
'sub.example.com. However, higher up domains will still be excluded: '.
|
||||
'if you set %URI.Host to sub.example.com, example.com will be blocked. '.
|
||||
'This directive has been available since 1.2.0.'
|
||||
);
|
||||
'URI', 'Munge', null, 'string/null', '
|
||||
<p>
|
||||
Munges all browsable (usually http, https and ftp)
|
||||
absolute URI\'s into another URI, usually a URI redirection service.
|
||||
This directive accepts a URI, formatted with a <code>%s</code> where
|
||||
the url-encoded original URI should be inserted (sample:
|
||||
<code>http://www.google.com/url?q=%s</code>).
|
||||
</p>
|
||||
<p>
|
||||
Uses for this directive:
|
||||
</p>
|
||||
<ul>
|
||||
<li>
|
||||
Prevent PageRank leaks, while being fairly transparent
|
||||
to users (you may also want to add some client side JavaScript to
|
||||
override the text in the statusbar). <strong>Notice</strong>:
|
||||
Many security experts believe that this form of protection does not deter spam-bots.
|
||||
</li>
|
||||
<li>
|
||||
Redirect users to a splash page telling them they are leaving your
|
||||
website. While this is poor usability practice, it is often mandated
|
||||
in corporate environments.
|
||||
</li>
|
||||
</ul>
|
||||
<p>
|
||||
This directive has been available since 1.3.0.
|
||||
</p>
|
||||
');
|
||||
|
||||
// disabling directives
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'URI', 'DisableResources', false, 'bool',
|
||||
'Disables embedding resources, essentially meaning no pictures. You can '.
|
||||
'still link to them though. See %URI.DisableExternalResources for why '.
|
||||
'this might be a good idea. This directive has been available since 1.3.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'URI', 'Munge', null, 'string/null',
|
||||
'Munges all browsable (usually http, https and ftp) URI\'s into some URL '.
|
||||
'redirection service. Pass this directive a URI, with %s inserted where '.
|
||||
'the url-encoded original URI should be inserted (sample: '.
|
||||
'<code>http://www.google.com/url?q=%s</code>). '.
|
||||
'This prevents PageRank leaks, while being as transparent as possible '.
|
||||
'to users (you may also want to add some client side JavaScript to '.
|
||||
'override the text in the statusbar). Warning: many security experts '.
|
||||
'believe that this form of protection does not deter spam-bots. '.
|
||||
'You can also use this directive to redirect users to a splash page '.
|
||||
'telling them they are leaving your website. '.
|
||||
'This directive has been available since 1.3.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'URI', 'Disable', false, 'bool',
|
||||
'Disables all URIs in all forms. Not sure why you\'d want to do that '.
|
||||
'(after all, the Internet\'s founded on the notion of a hyperlink). '.
|
||||
'This directive has been available since 1.3.0.'
|
||||
);
|
||||
'URI', 'Disable', false, 'bool', '
|
||||
<p>
|
||||
Disables all URIs in all forms. Not sure why you\'d want to do that
|
||||
(after all, the Internet\'s founded on the notion of a hyperlink).
|
||||
This directive has been available since 1.3.0.
|
||||
</p>
|
||||
');
|
||||
HTMLPurifier_ConfigSchema::defineAlias('Attr', 'DisableURI', 'URI', 'Disable');
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'URI', 'DisableResources', false, 'bool', '
|
||||
<p>
|
||||
Disables embedding resources, essentially meaning no pictures. You can
|
||||
still link to them though. See %URI.DisableExternalResources for why
|
||||
this might be a good idea. This directive has been available since 1.3.0.
|
||||
</p>
|
||||
');
|
||||
|
||||
/**
|
||||
* Validates a URI as defined by RFC 3986.
|
||||
* @note Scheme-specific mechanics deferred to HTMLPurifier_URIScheme
|
||||
@@ -118,7 +123,7 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
||||
|
||||
// munge scheme off if necessary (this must be last)
|
||||
if (!is_null($uri->scheme) && is_null($uri->host)) {
|
||||
if ($config->get('URI', 'DefaultScheme') == $uri->scheme) {
|
||||
if ($uri_def->defaultScheme == $uri->scheme) {
|
||||
$uri->scheme = null;
|
||||
}
|
||||
}
|
||||
|
@@ -37,11 +37,12 @@ class HTMLPurifier_URI
|
||||
if (!$scheme_obj) return false; // invalid scheme, clean it out
|
||||
} else {
|
||||
// no scheme: retrieve the default one
|
||||
$scheme_obj = $registry->getScheme($config->get('URI', 'DefaultScheme'), $config, $context);
|
||||
$def = $config->getDefinition('URI');
|
||||
$scheme_obj = $registry->getScheme($def->defaultScheme, $config, $context);
|
||||
if (!$scheme_obj) {
|
||||
// something funky happened to the default scheme object
|
||||
trigger_error(
|
||||
'Default scheme object "' . $config->get('URI', 'DefaultScheme') . '" was not readable',
|
||||
'Default scheme object "' . $def->defaultScheme . '" was not readable',
|
||||
E_USER_WARNING
|
||||
);
|
||||
return false;
|
||||
@@ -107,5 +108,12 @@ class HTMLPurifier_URI
|
||||
return $result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a copy of the URI object
|
||||
*/
|
||||
function copy() {
|
||||
return unserialize(serialize($this));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@@ -2,10 +2,12 @@
|
||||
|
||||
require_once 'HTMLPurifier/Definition.php';
|
||||
require_once 'HTMLPurifier/URIFilter.php';
|
||||
require_once 'HTMLPurifier/URIParser.php';
|
||||
|
||||
require_once 'HTMLPurifier/URIFilter/DisableExternal.php';
|
||||
require_once 'HTMLPurifier/URIFilter/DisableExternalResources.php';
|
||||
require_once 'HTMLPurifier/URIFilter/HostBlacklist.php';
|
||||
require_once 'HTMLPurifier/URIFilter/MakeAbsolute.php';
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'URI', 'DefinitionID', null, 'string/null', '
|
||||
@@ -25,6 +27,48 @@ HTMLPurifier_ConfigSchema::define(
|
||||
</p>
|
||||
');
|
||||
|
||||
// informative URI directives
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'URI', 'DefaultScheme', 'http', 'string', '
|
||||
<p>
|
||||
Defines through what scheme the output will be served, in order to
|
||||
select the proper object validator when no scheme information is present.
|
||||
</p>
|
||||
');
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'URI', 'Host', null, 'string/null', '
|
||||
<p>
|
||||
Defines the domain name of the server, so we can determine whether or
|
||||
an absolute URI is from your website or not. Not strictly necessary,
|
||||
as users should be using relative URIs to reference resources on your
|
||||
website. It will, however, let you use absolute URIs to link to
|
||||
subdomains of the domain you post here: i.e. example.com will allow
|
||||
sub.example.com. However, higher up domains will still be excluded:
|
||||
if you set %URI.Host to sub.example.com, example.com will be blocked.
|
||||
<strong>Note:</strong> This directive overrides %URI.Base because
|
||||
a given page may be on a sub-domain, but you wish HTML Purifier to be
|
||||
more relaxed and allow some of the parent domains too.
|
||||
This directive has been available since 1.2.0.
|
||||
</p>
|
||||
');
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'URI', 'Base', null, 'string/null', '
|
||||
<p>
|
||||
The base URI is the URI of the document this purified HTML will be
|
||||
inserted into. This information is important if HTML Purifier needs
|
||||
to calculate absolute URIs from relative URIs, such as when %URI.MakeAbsolute
|
||||
is on. You may use a non-absolute URI for this value, but behavior
|
||||
may vary (%URI.MakeAbsolute deals nicely with both absolute and
|
||||
relative paths, but forwards-compatibility is not guaranteed).
|
||||
<strong>Warning:</strong> If set, the scheme on this URI
|
||||
overrides the one specified by %URI.DefaultScheme. This directive has
|
||||
been available since 2.1.0.
|
||||
</p>
|
||||
');
|
||||
|
||||
class HTMLPurifier_URIDefinition extends HTMLPurifier_Definition
|
||||
{
|
||||
|
||||
@@ -32,10 +76,26 @@ class HTMLPurifier_URIDefinition extends HTMLPurifier_Definition
|
||||
var $filters = array();
|
||||
var $registeredFilters = array();
|
||||
|
||||
/**
|
||||
* HTMLPurifier_URI object of the base specified at %URI.Base
|
||||
*/
|
||||
var $base;
|
||||
|
||||
/**
|
||||
* String host to consider "home" base
|
||||
*/
|
||||
var $host;
|
||||
|
||||
/**
|
||||
* Name of default scheme based on %URI.DefaultScheme and %URI.Base
|
||||
*/
|
||||
var $defaultScheme;
|
||||
|
||||
function HTMLPurifier_URIDefinition() {
|
||||
$this->registerFilter(new HTMLPurifier_URIFilter_DisableExternal());
|
||||
$this->registerFilter(new HTMLPurifier_URIFilter_DisableExternalResources());
|
||||
$this->registerFilter(new HTMLPurifier_URIFilter_HostBlacklist());
|
||||
$this->registerFilter(new HTMLPurifier_URIFilter_MakeAbsolute());
|
||||
}
|
||||
|
||||
function registerFilter($filter) {
|
||||
@@ -43,6 +103,11 @@ class HTMLPurifier_URIDefinition extends HTMLPurifier_Definition
|
||||
}
|
||||
|
||||
function doSetup($config) {
|
||||
$this->setupFilters($config);
|
||||
$this->setupMemberVariables($config);
|
||||
}
|
||||
|
||||
function setupFilters($config) {
|
||||
foreach ($this->registeredFilters as $name => $filter) {
|
||||
$conf = $config->get('URI', $name);
|
||||
if ($conf !== false && $conf !== null) {
|
||||
@@ -53,6 +118,18 @@ class HTMLPurifier_URIDefinition extends HTMLPurifier_Definition
|
||||
unset($this->registeredFilters);
|
||||
}
|
||||
|
||||
function setupMemberVariables($config) {
|
||||
$this->host = $config->get('URI', 'Host');
|
||||
$base_uri = $config->get('URI', 'Base');
|
||||
if (!is_null($base_uri)) {
|
||||
$parser = new HTMLPurifier_URIParser();
|
||||
$this->base = $parser->parse($base_uri);
|
||||
$this->defaultScheme = $this->base->scheme;
|
||||
if (is_null($this->host)) $this->host = $this->base->host;
|
||||
}
|
||||
if (is_null($this->defaultScheme)) $this->defaultScheme = $config->get('URI', 'DefaultScheme');
|
||||
}
|
||||
|
||||
function filter(&$uri, $config, &$context) {
|
||||
foreach ($this->filters as $name => $x) {
|
||||
$result = $this->filters[$name]->filter($uri, $config, $context);
|
||||
|
@@ -10,7 +10,7 @@ HTMLPurifier_ConfigSchema::define(
|
||||
'This directive has been available since 1.3.0.'
|
||||
);
|
||||
|
||||
class HTMLPurifier_URIFilter_HostBlacklist
|
||||
class HTMLPurifier_URIFilter_HostBlacklist extends HTMLPurifier_URIFilter
|
||||
{
|
||||
var $name = 'HostBlacklist';
|
||||
var $blacklist = array();
|
||||
|
115
library/HTMLPurifier/URIFilter/MakeAbsolute.php
Normal file
115
library/HTMLPurifier/URIFilter/MakeAbsolute.php
Normal file
@@ -0,0 +1,115 @@
|
||||
<?php
|
||||
|
||||
// does not support network paths
|
||||
|
||||
require_once 'HTMLPurifier/URIFilter.php';
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'URI', 'MakeAbsolute', false, 'bool', '
|
||||
<p>
|
||||
Converts all URIs into absolute forms. This is useful when the HTML
|
||||
being filtered assumes a specific base path, but will actually be
|
||||
viewed in a different context (and setting an alternate base URI is
|
||||
not possible). %URI.Base must be set for this directive to work.
|
||||
This directive has been available since 2.1.0.
|
||||
</p>
|
||||
');
|
||||
|
||||
class HTMLPurifier_URIFilter_MakeAbsolute extends HTMLPurifier_URIFilter
|
||||
{
|
||||
var $name = 'MakeAbsolute';
|
||||
var $base;
|
||||
var $basePathStack = array();
|
||||
function prepare($config) {
|
||||
$def = $config->getDefinition('URI');
|
||||
$this->base = $def->base;
|
||||
if (is_null($this->base)) {
|
||||
trigger_error('URI.MakeAbsolute is being ignored due to lack of value for URI.Base configuration', E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
$this->base->fragment = null; // fragment is invalid for base URI
|
||||
$stack = explode('/', $this->base->path);
|
||||
array_pop($stack); // discard last segment
|
||||
$stack = $this->_collapseStack($stack); // do pre-parsing
|
||||
$this->basePathStack = $stack;
|
||||
}
|
||||
function filter(&$uri, $config, &$context) {
|
||||
if (is_null($this->base)) return true; // abort early
|
||||
if (
|
||||
$uri->path === '' && is_null($uri->scheme) &&
|
||||
is_null($uri->host) && is_null($uri->query) && is_null($uri->fragment)
|
||||
) {
|
||||
// reference to current document
|
||||
$uri = $this->base->copy();
|
||||
return true;
|
||||
}
|
||||
if (!is_null($uri->scheme)) {
|
||||
// absolute URI already: don't change
|
||||
if (!is_null($uri->host)) return true;
|
||||
$scheme_obj = $uri->getSchemeObj($config, $context);
|
||||
if (!$scheme_obj->hierarchical) {
|
||||
// non-hierarchal URI with explicit scheme, don't change
|
||||
return true;
|
||||
}
|
||||
// special case: had a scheme but always is hierarchical and had no authority
|
||||
}
|
||||
if (!is_null($uri->host)) {
|
||||
// network path, don't bother
|
||||
return true;
|
||||
}
|
||||
if ($uri->path === '') {
|
||||
$uri->path = $this->base->path;
|
||||
}elseif ($uri->path[0] !== '/') {
|
||||
// relative path, needs more complicated processing
|
||||
$stack = explode('/', $uri->path);
|
||||
$new_stack = array_merge($this->basePathStack, $stack);
|
||||
$new_stack = $this->_collapseStack($new_stack);
|
||||
$uri->path = implode('/', $new_stack);
|
||||
}
|
||||
// re-combine
|
||||
$uri->scheme = $this->base->scheme;
|
||||
if (is_null($uri->userinfo)) $uri->userinfo = $this->base->userinfo;
|
||||
if (is_null($uri->host)) $uri->host = $this->base->host;
|
||||
if (is_null($uri->port)) $uri->port = $this->base->port;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve dots and double-dots in a path stack
|
||||
* @private
|
||||
*/
|
||||
function _collapseStack($stack) {
|
||||
$result = array();
|
||||
for ($i = 0; isset($stack[$i]); $i++) {
|
||||
$is_folder = false;
|
||||
// absorb an internally duplicated slash
|
||||
if ($stack[$i] == '' && $i && isset($stack[$i+1])) continue;
|
||||
if ($stack[$i] == '..') {
|
||||
if (!empty($result)) {
|
||||
$segment = array_pop($result);
|
||||
if ($segment === '' && empty($result)) {
|
||||
// error case: attempted to back out too far:
|
||||
// restore the leading slash
|
||||
$result[] = '';
|
||||
} elseif ($segment === '..') {
|
||||
$result[] = '..'; // cannot remove .. with ..
|
||||
}
|
||||
} else {
|
||||
// relative path, preserve the double-dots
|
||||
$result[] = '..';
|
||||
}
|
||||
$is_folder = true;
|
||||
continue;
|
||||
}
|
||||
if ($stack[$i] == '.') {
|
||||
// silently absorb
|
||||
$is_folder = true;
|
||||
continue;
|
||||
}
|
||||
$result[] = $stack[$i];
|
||||
}
|
||||
if ($is_folder) $result[] = '';
|
||||
return $result;
|
||||
}
|
||||
}
|
||||
|
@@ -19,6 +19,12 @@ class HTMLPurifier_URIScheme
|
||||
*/
|
||||
var $browsable = false;
|
||||
|
||||
/**
|
||||
* Whether or not the URI always uses <hier_part>, resolves edge cases
|
||||
* with making relative URIs absolute
|
||||
*/
|
||||
var $hierarchical = false;
|
||||
|
||||
/**
|
||||
* Validates the components of a URI
|
||||
* @note This implementation should be called by children if they define
|
||||
|
@@ -9,6 +9,7 @@ class HTMLPurifier_URIScheme_ftp extends HTMLPurifier_URIScheme {
|
||||
|
||||
var $default_port = 21;
|
||||
var $browsable = true; // usually
|
||||
var $hierarchical = true;
|
||||
|
||||
function validate(&$uri, $config, &$context) {
|
||||
parent::validate($uri, $config, $context);
|
||||
|
@@ -9,6 +9,7 @@ class HTMLPurifier_URIScheme_http extends HTMLPurifier_URIScheme {
|
||||
|
||||
var $default_port = 80;
|
||||
var $browsable = true;
|
||||
var $hierarchical = true;
|
||||
|
||||
function validate(&$uri, $config, &$context) {
|
||||
parent::validate($uri, $config, $context);
|
||||
|
Reference in New Issue
Block a user