1
0
mirror of https://github.com/guzzle/guzzle.git synced 2025-02-12 03:24:26 +01:00

Improving URL encoding of path and query.

This commit updates path and query string encoding to better handle
strings that are already percent encoded, and now allows for all of the
characters valid for paths and queries specified in RFC 3986 to be
present in a URL without being encoded.
This commit is contained in:
Michael Dowling 2014-10-30 16:52:02 -07:00
parent 55d00ea7ae
commit 241611c4cd
4 changed files with 98 additions and 34 deletions

View File

@ -9,12 +9,14 @@ class Query extends Collection
const RFC3986 = 'RFC3986';
const RFC1738 = 'RFC1738';
/** @var bool URL encode fields and values */
private $encoding = self::RFC3986;
/** @var callable Encoding function */
private $encoding = [__CLASS__, 'rfc3986Encoding'];
/** @var callable */
private $aggregator;
private static $queryPattern = '/[^a-zA-Z0-9\-\._~!\$\'\(\)\*\+,;%:@\/\?]+|%(?![A-Fa-f0-9]{2})/';
/**
* Parse a query string into a Query object
*
@ -75,34 +77,21 @@ class Query extends Collection
$result = '';
$aggregator = $this->aggregator;
$encoder = $this->encoding;
foreach ($aggregator($this->data) as $key => $values) {
foreach ($values as $value) {
if ($result) {
$result .= '&';
}
if ($this->encoding == self::RFC1738) {
$result .= urlencode($key);
if ($value !== null) {
$result .= '=' . urlencode($value);
}
} elseif ($this->encoding == self::RFC3986) {
$result .= rawurlencode($key);
if ($value !== null) {
$result .= '=' . rawurlencode($value);
}
} else {
$result .= $key;
if ($value !== null) {
$result .= '=' . $value;
}
$result .= $encoder($key);
if ($value !== null) {
$result .= '=' . $encoder($value);
}
}
}
// Query strings allow for "/" characters.
// See: http://tools.ietf.org/html/rfc3986#section-3.4
return $this->encoding ? str_replace('%2F', '/', $result) : $result;
return $result;
}
/**
@ -131,10 +120,18 @@ class Query extends Collection
*/
public function setEncodingType($type)
{
if ($type === false || $type === self::RFC1738 || $type === self::RFC3986) {
$this->encoding = $type;
} else {
throw new \InvalidArgumentException('Invalid URL encoding type');
switch ($type) {
case self::RFC3986:
$this->encoding = [__CLASS__, 'rfc3986Encoding'];
break;
case self::RFC1738:
$this->encoding = [__CLASS__, 'rfc1738Encoding'];
break;
case false:
$this->encoding = function ($v) { return $v; };
break;
default:
throw new \InvalidArgumentException('Invalid URL encoding type');
}
}
@ -207,4 +204,26 @@ class Query extends Collection
return $result;
}
private static function rfc3986Encoding($str)
{
static $cb = [__CLASS__, 'rawurlencodeMatch'];
return preg_replace_callback(self::$queryPattern, $cb, $str);
}
private static function rfc1738Encoding($str)
{
static $cb = [__CLASS__, 'urlencodeMatch'];
return preg_replace_callback(self::$queryPattern, $cb, $str);
}
private static function rawurlencodeMatch(array $match)
{
return rawurlencode($match[0]);
}
private static function urlencodeMatch(array $match)
{
return urlencode($match[0]);
}
}

View File

@ -1,6 +1,8 @@
<?php
namespace GuzzleHttp;
use GuzzleHttp\Ring\Core;
/**
* Parses and generates URLs based on URL parts
*/
@ -13,11 +15,12 @@ class Url
private $password;
private $path = '';
private $fragment;
private static $defaultPorts = ['http' => 80, 'https' => 443, 'ftp' => 21];
/** @var Query Query part of the URL */
private $query;
private static $defaultPorts = ['http' => 80, 'https' => 443, 'ftp' => 21];
/**
* Factory method to create a new URL from a URL string
*
@ -28,9 +31,9 @@ class Url
*/
public static function fromString($url)
{
static $defaults = array('scheme' => null, 'host' => null,
static $defaults = ['scheme' => null, 'host' => null,
'path' => null, 'port' => null, 'query' => null,
'user' => null, 'pass' => null, 'fragment' => null);
'user' => null, 'pass' => null, 'fragment' => null];
if (false === ($parts = parse_url($url))) {
throw new \InvalidArgumentException('Unable to parse malformed '
@ -141,11 +144,13 @@ class Url
$this->username = $username;
$this->password = $password;
$this->fragment = $fragment;
if (!$query) {
$this->query = new Query();
} else {
$this->setQuery($query);
}
$this->setPath($path);
}
@ -222,8 +227,9 @@ class Url
public function setScheme($scheme)
{
// Remove the default port if one is specified
if ($this->port && isset(self::$defaultPorts[$this->scheme]) &&
self::$defaultPorts[$this->scheme] == $this->port
if ($this->port
&& isset(self::$defaultPorts[$this->scheme])
&& self::$defaultPorts[$this->scheme] == $this->port
) {
$this->port = null;
}
@ -271,15 +277,15 @@ class Url
}
/**
* Set the path part of the URL
* Set the path part of the URL.
*
* The provided URL is URL encoded as necessary.
*
* @param string $path Path string to set
*/
public function setPath($path)
{
static $search = [' ', '?'];
static $replace = ['%20', '%3F'];
$this->path = str_replace($search, $replace, $path);
$this->path = self::encodePath($path);
}
/**
@ -433,7 +439,7 @@ class Url
$this->query = new Query($query);
} else {
throw new \InvalidArgumentException('Query must be a Query, '
. 'array, or string. ' . gettype($query) . ' provided.');
. 'array, or string. Got ' . Core::describeType($query));
}
}
@ -551,4 +557,24 @@ class Url
return $result;
}
/**
* Encodes the path part of a URL without double-encoding percent-encoded
* key value pairs.
*
* @param string $path Path to encode
*
* @return string
*/
public static function encodePath($path)
{
static $pattern = '/[^a-zA-Z0-9\-\._~!\$&\'\(\)\*\+,;=%:@\/]+|%(?![A-Fa-f0-9]{2})/';
static $cb = [__CLASS__, 'encodeMatch'];
return preg_replace_callback($pattern, $cb, $path);
}
private static function encodeMatch(array $match)
{
return rawurlencode($match[0]);
}
}

View File

@ -174,4 +174,17 @@ class QueryTest extends \PHPUnit_Framework_TestCase
$q = Query::fromString('foo=bar%2Fbaz&bam=boo boo!', false);
$this->assertEquals('foo=bar%2Fbaz&bam=boo boo!', (string) $q);
}
public function testQueryDoesNotDoubleEncodeValues()
{
$q = new Query();
$q->set('foo%20baz', 'bar');
$this->assertEquals('foo%20baz=bar', (string) $q);
}
public function testQueryIsNormalizedAndProperlyEncodedFromString()
{
$q = Query::fromString('foo=bar%2Fbaz&bam=boo boo!?');
$this->assertEquals('foo=bar/baz&bam=boo%20boo!?', (string) $q);
}
}

View File

@ -330,4 +330,10 @@ class UrlTest extends \PHPUnit_Framework_TestCase
$url->addPath('?');
$this->assertEquals('http://foo.com/baz%20bar/%3F?a=b', (string) $url);
}
public function testCorrectlyEncodesPathWithoutDoubleEncoding()
{
$url = Url::fromString('http://foo.com/baz%20 bar:boo/baz!');
$this->assertEquals('/baz%20%20bar:boo/baz!', $url->getPath());
}
}