mirror of
https://github.com/moodle/moodle.git
synced 2025-02-26 13:03:53 +01:00
Prior to this change, all the line endings in the imported HTMLPurifier library were using CRLF (\r\n aka Windows style), but the HTMLPurifier source and also the downloadable artefacts use LF (\n aka Linux style) as line endings. This has been the case since 510d190382003985eafd6f4407190d43509016a5 when with the commit "MDL-38672 import HTML Purifier 4.5.0" all line endings were changed from LF to CRLF. There was no comment in the commit on why this change was done. As the original source uses LF, this commit partly reverts 510d190382003985eafd6f4407190d43509016a5 and goes back to LF as line endings. Signed-off-by: Daniel Ziegenberg <daniel@ziegenberg.at>
72 lines
2.2 KiB
PHP
72 lines
2.2 KiB
PHP
<?php
|
|
|
|
/**
|
|
* Parses a URI into the components and fragment identifier as specified
|
|
* by RFC 3986.
|
|
*/
|
|
class HTMLPurifier_URIParser
|
|
{
|
|
|
|
/**
|
|
* Instance of HTMLPurifier_PercentEncoder to do normalization with.
|
|
*/
|
|
protected $percentEncoder;
|
|
|
|
public function __construct()
|
|
{
|
|
$this->percentEncoder = new HTMLPurifier_PercentEncoder();
|
|
}
|
|
|
|
/**
|
|
* Parses a URI.
|
|
* @param $uri string URI to parse
|
|
* @return HTMLPurifier_URI representation of URI. This representation has
|
|
* not been validated yet and may not conform to RFC.
|
|
*/
|
|
public function parse($uri)
|
|
{
|
|
$uri = $this->percentEncoder->normalize($uri);
|
|
|
|
// Regexp is as per Appendix B.
|
|
// Note that ["<>] are an addition to the RFC's recommended
|
|
// characters, because they represent external delimeters.
|
|
$r_URI = '!'.
|
|
'(([a-zA-Z0-9\.\+\-]+):)?'. // 2. Scheme
|
|
'(//([^/?#"<>]*))?'. // 4. Authority
|
|
'([^?#"<>]*)'. // 5. Path
|
|
'(\?([^#"<>]*))?'. // 7. Query
|
|
'(#([^"<>]*))?'. // 8. Fragment
|
|
'!';
|
|
|
|
$matches = array();
|
|
$result = preg_match($r_URI, $uri, $matches);
|
|
|
|
if (!$result) return false; // *really* invalid URI
|
|
|
|
// seperate out parts
|
|
$scheme = !empty($matches[1]) ? $matches[2] : null;
|
|
$authority = !empty($matches[3]) ? $matches[4] : null;
|
|
$path = $matches[5]; // always present, can be empty
|
|
$query = !empty($matches[6]) ? $matches[7] : null;
|
|
$fragment = !empty($matches[8]) ? $matches[9] : null;
|
|
|
|
// further parse authority
|
|
if ($authority !== null) {
|
|
$r_authority = "/^((.+?)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/";
|
|
$matches = array();
|
|
preg_match($r_authority, $authority, $matches);
|
|
$userinfo = !empty($matches[1]) ? $matches[2] : null;
|
|
$host = !empty($matches[3]) ? $matches[3] : '';
|
|
$port = !empty($matches[4]) ? (int) $matches[5] : null;
|
|
} else {
|
|
$port = $host = $userinfo = null;
|
|
}
|
|
|
|
return new HTMLPurifier_URI(
|
|
$scheme, $userinfo, $host, $port, $path, $query, $fragment);
|
|
}
|
|
|
|
}
|
|
|
|
// vim: et sw=4 sts=4
|