diff --git a/NEWS b/NEWS index 1c5147a1..130b14e7 100644 --- a/NEWS +++ b/NEWS @@ -13,6 +13,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier # URI parsing algorithm was made more strict, so only prefixes which looks like schemes will actually be schemes. Thanks Michael Gusev for fixing. +- Made Linkify URL parser a bit less permissive, so that non-breaking + spaces and commas are not included as part of URL. Thanks nAS for fixing. 4.5.0, released 2013-02-17 # Fix bug where stacked attribute transforms clobber each other; diff --git a/library/HTMLPurifier/Injector/Linkify.php b/library/HTMLPurifier/Injector/Linkify.php index 296dac28..6c3a1e6a 100644 --- a/library/HTMLPurifier/Injector/Linkify.php +++ b/library/HTMLPurifier/Injector/Linkify.php @@ -21,7 +21,8 @@ class HTMLPurifier_Injector_Linkify extends HTMLPurifier_Injector // there is/are URL(s). Let's split the string: // Note: this regex is extremely permissive - $bits = preg_split('#((?:https?|ftp)://[^\s\'"<>()]+)#S', $token->data, -1, PREG_SPLIT_DELIM_CAPTURE); + $bits = preg_split('#((?:https?|ftp)://[^\s\'",<>()]+)#Su', $token->data, -1, PREG_SPLIT_DELIM_CAPTURE); + $token = array();