Files
moodle/lib/htmlpurifier/HTMLPurifier/Injector/Linkify.php
Daniel Ziegenberg be7f6d4834 MDL-74823 lib: normalize line endings for HTMLPurifier
Prior to this change, all the line endings in the imported HTMLPurifier
library were using CRLF (\r\n aka Windows style), but the HTMLPurifier
source and also the downloadable artefacts use LF (\n aka Linux style)
as line endings. This has been the case since
510d190382 when with the commit
"MDL-38672 import HTML Purifier 4.5.0" all line endings were changed
from LF to CRLF. There was no comment in the commit on why this change
was done.

As the original source uses LF, this commit partly reverts
510d190382 and goes back to LF as line
endings.

Signed-off-by: Daniel Ziegenberg <daniel@ziegenberg.at>
2022-11-17 18:26:26 +01:00

68 lines
2.0 KiB
PHP

<?php
/**
* Injector that converts http, https and ftp text URLs to actual links.
*/
class HTMLPurifier_Injector_Linkify extends HTMLPurifier_Injector
{
/**
* @type string
*/
public $name = 'Linkify';
/**
* @type array
*/
public $needed = array('a' => array('href'));
/**
* @param HTMLPurifier_Token $token
*/
public function handleText(&$token)
{
if (!$this->allowsElement('a')) {
return;
}
if (strpos($token->data, '://') === false) {
// our really quick heuristic failed, abort
// this may not work so well if we want to match things like
// "google.com", but then again, most people don't
return;
}
// there is/are URL(s). Let's split the string.
// We use this regex:
// https://gist.github.com/gruber/249502
// but with @cscott's backtracking fix and also
// the Unicode characters un-Unicodified.
$bits = preg_split(
'/\\b((?:[a-z][\\w\\-]+:(?:\\/{1,3}|[a-z0-9%])|www\\d{0,3}[.]|[a-z0-9.\\-]+[.][a-z]{2,4}\\/)(?:[^\\s()<>]|\\((?:[^\\s()<>]|(?:\\([^\\s()<>]+\\)))*\\))+(?:\\((?:[^\\s()<>]|(?:\\([^\\s()<>]+\\)))*\\)|[^\\s`!()\\[\\]{};:\'".,<>?\x{00ab}\x{00bb}\x{201c}\x{201d}\x{2018}\x{2019}]))/iu',
$token->data, -1, PREG_SPLIT_DELIM_CAPTURE);
if ($bits === false) {
return;
}
$token = array();
// $i = index
// $c = count
// $l = is link
for ($i = 0, $c = count($bits), $l = false; $i < $c; $i++, $l = !$l) {
if (!$l) {
if ($bits[$i] === '') {
continue;
}
$token[] = new HTMLPurifier_Token_Text($bits[$i]);
} else {
$token[] = new HTMLPurifier_Token_Start('a', array('href' => $bits[$i]));
$token[] = new HTMLPurifier_Token_Text($bits[$i]);
$token[] = new HTMLPurifier_Token_End('a');
}
}
}
}
// vim: et sw=4 sts=4