diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Host.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Host.php index 3b4d186743e..e54a3344a74 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Host.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Host.php @@ -97,7 +97,7 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef // PHP 5.3 and later support this functionality natively if (function_exists('idn_to_ascii')) { - $string = idn_to_ascii($string); + $string = idn_to_ascii($string, IDNA_NONTRANSITIONAL_TO_ASCII, INTL_IDNA_VARIANT_UTS46); // If we have Net_IDNA2 support, we can support IRIs by // punycoding them. (This is the most portable thing to do, diff --git a/lib/htmlpurifier/readme_moodle.txt b/lib/htmlpurifier/readme_moodle.txt index 43c714b177a..7b85b2f358d 100644 --- a/lib/htmlpurifier/readme_moodle.txt +++ b/lib/htmlpurifier/readme_moodle.txt @@ -13,3 +13,7 @@ Description of HTML Purifier v4.9.3 library import into Moodle HTMLPurifier.path.php * add locallib.php with Moodle specific extensions to /lib/htmlpurifier/ * add this readme_moodle.txt to /lib/htmlpurifier/ + +Modifications: +* MDL-60337 use correct IDN variant for converting domain names to ascii + Check status of https://github.com/ezyang/htmlpurifier/pull/148 diff --git a/lib/tests/htmlpurifier_test.php b/lib/tests/htmlpurifier_test.php index f7d5679462e..e14b3191fdf 100644 --- a/lib/tests/htmlpurifier_test.php +++ b/lib/tests/htmlpurifier_test.php @@ -319,6 +319,30 @@ class core_htmlpurifier_testcase extends basic_testcase { $this->assertSame('<a>link</a>', purify_html($text)); } + /** + * Test non-ascii domain names + */ + public function test_idn() { + + // Example of domain that gives the same result in IDNA2003 and IDNA2008 . + $text = '<a href="http://правительство.рф">правительство.рф</a>'; + $expected = '<a href="http://xn--80aealotwbjpid2k.xn--p1ai">правительство.рф</a>'; + $this->assertSame($expected, purify_html($text)); + + // Examples of deviations from http://www.unicode.org/reports/tr46/#Table_Deviation_Characters . + $text = '<a href="http://teßt.de">teßt.de</a>'; + $expected = '<a href="http://xn--tet-6ka.de">teßt.de</a>'; + $this->assertSame($expected, purify_html($text)); + + $text = '<a href="http://βόλος.com">http://βόλος.com</a>'; + $expected = '<a href="http://xn--nxasmm1c.com">http://βόλος.com</a>'; + $this->assertSame($expected, purify_html($text)); + + $text = '<a href="http://نامهای.com">http://نامهای.com</a>'; + $expected = '<a href="http://xn--mgba3gch31f060k.com">http://نامهای.com</a>'; + $this->assertSame($expected, purify_html($text)); + } + /** * Tests media tags. *