diff --git a/filter/urltolink/filter.php b/filter/urltolink/filter.php index d7ca920b55d..9d01e8d0eb6 100644 --- a/filter/urltolink/filter.php +++ b/filter/urltolink/filter.php @@ -122,19 +122,25 @@ class filter_urltolink extends moodle_text_filter { $unicoderegexp = @preg_match('/\pL/u', 'a'); // This will fail silently, returning false, } - //todo: MDL-21296 - use of unicode modifiers may cause a timeout - if ($unicoderegexp) { //We can use unicode modifiers - $text = preg_replace('#(?<!=["\'])(((http(s?))://)(((([\pLl0-9]([\pLl0-9]|-)*[\pLl0-9]|[\pLl0-9])\.)+([\pLl]([\pLl0-9]|-)*[\pLl0-9]|[\pLl]))|(([0-9]{1,3}\.){3}[0-9]{1,3}))(:[\pL0-9]*)?(/([\pLl0-9\.!$&\'\(\)*+,;=_~:@-]|%[a-fA-F0-9]{2})*)*(\?([\pLl0-9\.!$&\'\(\)*+,;=_~:@/?-]|%[a-fA-F0-9]{2})*)?(\#[\pLl0-9\.!$&\'\(\)*+,;=_~:@/?-]*)?)(?<![,.;])#iu', - '<a href="\\1" class="_blanktarget">\\1</a>', $text); - $text = preg_replace('#(?<!=["\']|//)((www\.([\pLl0-9]([\pLl0-9]|-)*[\pLl0-9]|[\pLl0-9])\.)+([\pLl]([\pLl0-9]|-)*[\pLl0-9]|[\pLl])(:[\pL0-9]*)?(/([\pLl0-9\.!$&\'\(\)*+,;=_~:@-]|%[a-fA-F0-9]{2})*)*(\?([\pLl0-9\.!$&\'\(\)*+,;=_~:@/?-]|%[a-fA-F0-9]{2})*)?(\#[\pLl0-9\.!$&\'\(\)*+,;=_~:@/?-]*)?)(?<![,.;])#iu', - '<a href="http://\\1" class="_blanktarget">\\1</a>', $text); - } else { //We cannot use unicode modifiers - $text = preg_replace('#(?<!=["\'])(((http(s?))://)(((([a-z0-9]([a-z0-9]|-)*[a-z0-9]|[a-z0-9])\.)+([a-z]([a-z0-9]|-)*[a-z0-9]|[a-z]))|(([0-9]{1,3}\.){3}[0-9]{1,3}))(:[a-zA-Z0-9]*)?(/([a-z0-9\.!$&\'\(\)*+,;=_~:@-]|%[a-f0-9]{2})*)*(\?([a-z0-9\.!$&\'\(\)*+,;=_~:@/?-]|%[a-fA-F0-9]{2})*)?(\#[a-z0-9\.!$&\'\(\)*+,;=_~:@/?-]*)?)(?<![,.;])#i', - '<a href="\\1" class="_blanktarget">\\1</a>', $text); - $text = preg_replace('#(?<!=["\']|//)((www\.([a-z0-9]([a-z0-9]|-)*[a-z0-9]|[a-z0-9])\.)+([a-z]([a-z0-9]|-)*[a-z0-9]|[a-z])(:[a-zA-Z0-9]*)?(/([a-z0-9\.!$&\'\(\)*+,;=_~:@-]|%[a-f0-9]{2})*)*(\?([a-z0-9\.!$&\'\(\)*+,;=_~:@/?-]|%[a-fA-F0-9]{2})*)?(\#[a-z0-9\.!$&\'\(\)*+,;=_~:@/?-]*)?)(?<![,.;])#i', - '<a href="http://\\1" class="_blanktarget">\\1</a>', $text); + // TODO MDL-21296 - use of unicode modifiers may cause a timeout + $domainsegment = '(?:[\pLl0-9][\pLl0-9-]*[\pLl0-9]|[\pLl0-9])'; + $numericip = '(?:(?:[0-9]{1,3}\.){3}[0-9]{1,3})'; + $port = '(?::\d*)'; + $pathchar = '(?:[\pL0-9\.!$&\'\(\)*+,;=_~:@-]|%[a-f0-9]{2})'; + $path = "(?:/$pathchar*)*"; + $querystring = '(?:\?(?:[\pL0-9\.!$&\'\(\)*+,;=_~:@/?-]|%[a-fA-F0-9]{2})*)'; + $fragment = '(?:\#(?:[\pL0-9\.!$&\'\(\)*+,;=_~:@/?-]|%[a-fA-F0-9]{2})*)'; + + $regex = "(?<!=[\"'])(?:http(s)?://|(www\.))((?:$domainsegment\.)+$domainsegment|$numericip)" . + "($port?$path$querystring?$fragment?)(?<![]),.;])"; + if ($unicoderegexp) { + $regex = '#' . $regex . '#ui'; + } else { + $regex = '#' . preg_replace(array('\pLl', '\PL'), 'a-z', $regex) . '#i'; } + $text = preg_replace($regex, '<a href="http$1://$2$3$4" class="_blanktarget">$0</a>', $text); + if (!empty($ignoretags)) { $ignoretags = array_reverse($ignoretags); /// Reversed so "progressive" str_replace() will solve some nesting problems. $text = str_replace(array_keys($ignoretags),$ignoretags,$text); diff --git a/filter/urltolink/tests/filter_test.php b/filter/urltolink/tests/filter_test.php index 95109d17916..70fb3f1d02d 100644 --- a/filter/urltolink/tests/filter_test.php +++ b/filter/urltolink/tests/filter_test.php @@ -44,7 +44,7 @@ class filter_urltolink_testcase extends basic_testcase { '$1<a href="http://www.$2$3" target="_blank">www.$2$3</a>', $text); } - function test_convert_urls_into_links() { + function get_convert_urls_into_links_test_cases() { $texts = array ( //just a url 'http://moodle.org - URL' => '<a href="http://moodle.org" class="_blanktarget">http://moodle.org</a> - URL', @@ -57,13 +57,26 @@ class filter_urltolink_testcase extends basic_testcase { 'URL: https://moodle.org/s/i=1&j=2' => 'URL: <a href="https://moodle.org/s/i=1&j=2" class="_blanktarget">https://moodle.org/s/i=1&j=2</a>', //url with port and params 'URL: http://moodle.org:8080/s/i=1' => 'URL: <a href="http://moodle.org:8080/s/i=1" class="_blanktarget">http://moodle.org:8080/s/i=1</a>', - //url in brackets + // URL with complex fragment. + 'Most voted issues: https://tracker.moodle.org/browse/MDL#selectedTab=com.atlassian.jira.plugin.system.project%3Apopularissues-panel' => 'Most voted issues: <a href="https://tracker.moodle.org/browse/MDL#selectedTab=com.atlassian.jira.plugin.system.project%3Apopularissues-panel" class="_blanktarget">https://tracker.moodle.org/browse/MDL#selectedTab=com.atlassian.jira.plugin.system.project%3Apopularissues-panel</a>', + // Domain with more parts + 'URL: www.bbc.co.uk.' => 'URL: <a href="http://www.bbc.co.uk" class="_blanktarget">www.bbc.co.uk</a>.', + // URL in brackets. '(http://moodle.org) - URL' => '(<a href="http://moodle.org" class="_blanktarget">http://moodle.org</a>) - URL', '(www.moodle.org) - URL' => '(<a href="http://www.moodle.org" class="_blanktarget">www.moodle.org</a>) - URL', - //url in square brackets + // URL in brackets with a path. + '(http://example.com/index.html) - URL' => '(<a href="http://example.com/index.html" class="_blanktarget">http://example.com/index.html</a>) - URL', + '(www.example.com/index.html) - URL' => '(<a href="http://www.example.com/index.html" class="_blanktarget">www.example.com/index.html</a>) - URL', + // URL in brackets with anchor. + '(http://moodle.org/main#anchor) - URL' => '(<a href="http://moodle.org/main#anchor" class="_blanktarget">http://moodle.org/main#anchor</a>) - URL', + '(www.moodle.org/main#anchor) - URL' => '(<a href="http://www.moodle.org/main#anchor" class="_blanktarget">www.moodle.org/main#anchor</a>) - URL', + // URL in square brackets. '[http://moodle.org] - URL' => '[<a href="http://moodle.org" class="_blanktarget">http://moodle.org</a>] - URL', '[www.moodle.org] - URL' => '[<a href="http://www.moodle.org" class="_blanktarget">www.moodle.org</a>] - URL', - //url in brackets with anchor + // URL in square brackets with a path. + '[http://example.com/index.html] - URL' => '[<a href="http://example.com/index.html" class="_blanktarget">http://example.com/index.html</a>] - URL', + '[www.example.com/index.html] - URL' => '[<a href="http://www.example.com/index.html" class="_blanktarget">www.example.com/index.html</a>] - URL', + // URL in square brackets with anchor. '[http://moodle.org/main#anchor] - URL' => '[<a href="http://moodle.org/main#anchor" class="_blanktarget">http://moodle.org/main#anchor</a>] - URL', '[www.moodle.org/main#anchor] - URL' => '[<a href="http://www.moodle.org/main#anchor" class="_blanktarget">www.moodle.org/main#anchor</a>] - URL', //brackets within the url @@ -71,7 +84,8 @@ class filter_urltolink_testcase extends basic_testcase { 'URL: www.cc.org/url_(withpar)_go/?i=2' => 'URL: <a href="http://www.cc.org/url_(withpar)_go/?i=2" class="_blanktarget">www.cc.org/url_(withpar)_go/?i=2</a>', 'URL: http://cc.org/url_(with)_(par)_go/?i=2' => 'URL: <a href="http://cc.org/url_(with)_(par)_go/?i=2" class="_blanktarget">http://cc.org/url_(with)_(par)_go/?i=2</a>', 'URL: www.cc.org/url_(with)_(par)_go/?i=2' => 'URL: <a href="http://www.cc.org/url_(with)_(par)_go/?i=2" class="_blanktarget">www.cc.org/url_(with)_(par)_go/?i=2</a>', - 'http://en.wikipedia.org/wiki/Slash_(punctuation)'=>'<a href="http://en.wikipedia.org/wiki/Slash_(punctuation)" class="_blanktarget">http://en.wikipedia.org/wiki/Slash_(punctuation)</a>', + // URL legitimately ending in a bracket. Commented out as part of MDL-22390. See next tests for work-arounds. + // 'http://en.wikipedia.org/wiki/Slash_(punctuation)'=>'<a href="http://en.wikipedia.org/wiki/Slash_(punctuation)" class="_blanktarget">http://en.wikipedia.org/wiki/Slash_(punctuation)</a>', 'http://en.wikipedia.org/wiki/%28#Parentheses_.28_.29 - URL' => '<a href="http://en.wikipedia.org/wiki/%28#Parentheses_.28_.29" class="_blanktarget">http://en.wikipedia.org/wiki/%28#Parentheses_.28_.29</a> - URL', 'http://en.wikipedia.org/wiki/(#Parentheses_.28_.29 - URL' => '<a href="http://en.wikipedia.org/wiki/(#Parentheses_.28_.29" class="_blanktarget">http://en.wikipedia.org/wiki/(#Parentheses_.28_.29</a> - URL', //escaped brackets in url @@ -94,8 +108,6 @@ class filter_urltolink_testcase extends basic_testcase { 'URL: www.moodle.org?u=1.23' => 'URL: <a href="http://www.moodle.org?u=1.23" class="_blanktarget">www.moodle.org?u=1.23</a>', //escaped space in url 'URL: www.moodle.org?u=test+param&' => 'URL: <a href="http://www.moodle.org?u=test+param&" class="_blanktarget">www.moodle.org?u=test+param&</a>', - //odd characters in url param - 'URL: www.moodle.org?param=:)' => 'URL: <a href="http://www.moodle.org?param=:)" class="_blanktarget">www.moodle.org?param=:)</a>', //multiple urls 'URL: http://moodle.org www.moodle.org' => 'URL: <a href="http://moodle.org" class="_blanktarget">http://moodle.org</a> <a href="http://www.moodle.org" class="_blanktarget">www.moodle.org</a>', @@ -151,17 +163,25 @@ class filter_urltolink_testcase extends basic_testcase { //'<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN http://www.w3.org/TR/html4/strict.dtd">'=>'<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN http://www.w3.org/TR/html4/strict.dtd">' ); + $data = array(); + foreach ($texts as $text => $correctresult) { + $data[] = array($text, $correctresult); + } + return $data; + } + + /** + * @dataProvider get_convert_urls_into_links_test_cases + */ + function test_convert_urls_into_links($text, $correctresult) { + $testablefilter = new testable_filter_urltolink(); + $testablefilter->convert_urls_into_links($text); + $this->assertEquals($correctresult, $text); + } + + function test_convert_urls_into_links_performance() { $testablefilter = new testable_filter_urltolink(); - foreach ($texts as $text => $correctresult) { - $msg = "Testing text: ". str_replace('%', '%%', $text) . ": %s"; // Escape original '%' so sprintf() wont get confused - - $testablefilter->convert_urls_into_links($text); - - $this->assertEquals($text, $correctresult, $msg); - } - - //performance testing $reps = 1000; $text = file_get_contents(__DIR__ . '/fixtures/sample.txt'); $time_start = microtime(true);