mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-07-31 03:10:09 +02:00
[3.1.0] Revamp URI handling of percent encoding and validation.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1709 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
@@ -1,7 +1,12 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* HTML Purifier's internal representation of a URI
|
||||
* HTML Purifier's internal representation of a URI.
|
||||
* @note
|
||||
* Internal data-structures are completely escaped. If the data needs
|
||||
* to be used in a non-URI context (which is very unlikely), be sure
|
||||
* to decode it first. The URI may not necessarily be well-formed until
|
||||
* validate() is called.
|
||||
*/
|
||||
class HTMLPurifier_URI
|
||||
{
|
||||
@@ -49,13 +54,27 @@ class HTMLPurifier_URI
|
||||
}
|
||||
|
||||
/**
|
||||
* Generic validation method applicable for all schemes
|
||||
* Generic validation method applicable for all schemes. May modify
|
||||
* this URI in order to get it into a compliant form.
|
||||
* @param $config Instance of HTMLPurifier_Config
|
||||
* @param $context Instance of HTMLPurifier_Context
|
||||
* @return True if validation/filtering succeeds, false if failure
|
||||
*/
|
||||
public function validate($config, $context) {
|
||||
|
||||
// ABNF definitions from RFC 3986
|
||||
$chars_sub_delims = '!$&\'()*+,;=';
|
||||
$chars_gen_delims = ':/?#[]@';
|
||||
$chars_pchar = $chars_sub_delims . ':@';
|
||||
|
||||
// validate scheme (MUST BE FIRST!)
|
||||
if (!is_null($this->scheme) && is_null($this->host)) {
|
||||
$def = $config->getDefinition('URI');
|
||||
if ($def->defaultScheme === $this->scheme) {
|
||||
$this->scheme = null;
|
||||
}
|
||||
}
|
||||
|
||||
// validate host
|
||||
if (!is_null($this->host)) {
|
||||
$host_def = new HTMLPurifier_AttrDef_URI_Host();
|
||||
@@ -63,18 +82,51 @@ class HTMLPurifier_URI
|
||||
if ($this->host === false) $this->host = null;
|
||||
}
|
||||
|
||||
// validate username
|
||||
if (!is_null($this->userinfo)) {
|
||||
$encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . ':');
|
||||
$this->userinfo = $encoder->encode($this->userinfo);
|
||||
}
|
||||
|
||||
// validate port
|
||||
if (!is_null($this->port)) {
|
||||
if ($this->port < 1 || $this->port > 65535) $this->port = null;
|
||||
}
|
||||
|
||||
// query and fragment are quite simple in terms of definition:
|
||||
// *( pchar / "/" / "?" ), so define their validation routines
|
||||
// when we start fixing percent encoding
|
||||
|
||||
// path gets to be validated against a hodge-podge of rules depending
|
||||
// on the status of authority and scheme, but it's not that important,
|
||||
// esp. since it won't be applicable to everyone
|
||||
// validate path
|
||||
$path_parts = array();
|
||||
$segments_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/');
|
||||
if (!is_null($this->host)) {
|
||||
// path-abempty (hier and relative)
|
||||
$this->path = $segments_encoder->encode($this->path);
|
||||
} elseif ($this->path !== '' && $this->path[0] === '/') {
|
||||
// path-absolute (hier and relative)
|
||||
if (strlen($this->path) >= 2 && $this->path[1] === '/') {
|
||||
// This shouldn't ever happen!
|
||||
$this->path = '';
|
||||
} else {
|
||||
$this->path = $segments_encoder->encode($this->path);
|
||||
}
|
||||
} elseif (!is_null($this->scheme) && $this->path !== '') {
|
||||
// path-rootless (hier)
|
||||
// Short circuit evaluation means we don't need to check nz
|
||||
$this->path = $segments_encoder->encode($this->path);
|
||||
} elseif (is_null($this->scheme) && $this->path !== '') {
|
||||
// path-noscheme (relative)
|
||||
// (once again, not checking nz)
|
||||
$segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@');
|
||||
$c = strpos($this->path, '/');
|
||||
if ($c !== false) {
|
||||
$this->path =
|
||||
$segment_nc_encoder->encode(substr($this->path, 0, $c)) .
|
||||
$segments_encoder->encode(substr($this->path, $c));
|
||||
} else {
|
||||
$this->path = $segment_nc_encoder->encode($this->path);
|
||||
}
|
||||
} else {
|
||||
// path-empty (hier and relative)
|
||||
$this->path = ''; // just to be safe
|
||||
}
|
||||
|
||||
return true;
|
||||
|
||||
|
Reference in New Issue
Block a user