1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-08-06 22:26:31 +02:00

Fix #76, linkify includes dots at end of URL.

Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
This commit is contained in:
Edward Z. Yang
2016-03-02 02:05:54 -08:00
parent aebe1c02a2
commit 92aabf2b23
3 changed files with 18 additions and 3 deletions

View File

@@ -31,9 +31,14 @@ class HTMLPurifier_Injector_Linkify extends HTMLPurifier_Injector
return;
}
// there is/are URL(s). Let's split the string:
// Note: this regex is extremely permissive
$bits = preg_split('#((?:https?|ftp)://[^\s\'",<>()]+)#Su', $token->data, -1, PREG_SPLIT_DELIM_CAPTURE);
// there is/are URL(s). Let's split the string.
// We use this regex:
// https://gist.github.com/gruber/249502
// but with @cscott's backtracking fix and also
// the Unicode characters un-Unicodified.
$bits = preg_split(
'/\\b((?:[a-z][\\w\\-]+:(?:\\/{1,3}|[a-z0-9%])|www\\d{0,3}[.]|[a-z0-9.\\-]+[.][a-z]{2,4}\\/)(?:[^\\s()<>]|\\((?:[^\\s()<>]|(?:\\([^\\s()<>]+\\)))*\\))+(?:\\((?:[^\\s()<>]|(?:\\([^\\s()<>]+\\)))*\\)|[^\\s`!()\\[\\]{};:\'".,<>?\x{00ab}\x{00bb}\x{201c}\x{201d}\x{2018}\x{2019}]))/iu',
$token->data, -1, PREG_SPLIT_DELIM_CAPTURE);
$token = array();