From fc6d743e0a2f64526ca19bd49206be573f5f359b Mon Sep 17 00:00:00 2001 From: Koen Punt Date: Wed, 16 Apr 2014 11:09:59 +0200 Subject: [PATCH] Add support for unicode regular expressions --- AltoRouter.php | 4 ++-- AltoRouterTest.php | 54 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 2 deletions(-) diff --git a/AltoRouter.php b/AltoRouter.php index 840a694..c0e526c 100644 --- a/AltoRouter.php +++ b/AltoRouter.php @@ -170,7 +170,7 @@ class AltoRouter { if ($_route === '*') { $match = true; } elseif (isset($_route[0]) && $_route[0] === '@') { - $match = preg_match('`' . substr($_route, 1) . '`', $requestUrl, $params); + $match = preg_match('`' . substr($_route, 1) . '`u', $requestUrl, $params); } else { $route = null; $regex = false; @@ -249,6 +249,6 @@ class AltoRouter { } } - return "`^$route$`"; + return "`^$route$`u"; } } diff --git a/AltoRouterTest.php b/AltoRouterTest.php index 0474f8a..499280c 100644 --- a/AltoRouterTest.php +++ b/AltoRouterTest.php @@ -266,6 +266,36 @@ class AltoRouterTest extends PHPUnit_Framework_TestCase } + public function testMatchWithUnicodeRegex() + { + $pattern = '/(?[^'; + // Arabic characters + $pattern .= '\x{0600}-\x{06FF}'; + $pattern .= '\x{FB50}-\x{FDFD}'; + $pattern .= '\x{FE70}-\x{FEFF}'; + $pattern .= '\x{0750}-\x{077F}'; + // Alphanumeric, /, _, - and space characters + $pattern .= 'a-zA-Z0-9\/_-\s'; + // 'ZERO WIDTH NON-JOINER' + $pattern .= '\x{200C}'; + $pattern .= ']+)'; + + $this->router->map('GET', '@' . $pattern, 'unicode_action', 'unicode_route'); + + $this->assertEquals(array( + 'target' => 'unicode_action', + 'name' => 'unicode_route', + 'params' => array( + 'path' => '大家好' + ) + ), $this->router->match('/大家好', 'GET')); + + $this->assertFalse($this->router->match('/﷽‎', 'GET')); + } + + /** + * @covers AltoRouter::addMatchTypes + */ public function testMatchWithCustomNamedRegex() { $this->router->addMatchTypes(array('cId' => '[a-zA-Z]{2}[0-9](?:_[0-9]++)?')); @@ -290,4 +320,28 @@ class AltoRouterTest extends PHPUnit_Framework_TestCase $this->assertFalse($this->router->match('/some-other-thing', 'GET')); } + + public function testMatchWithCustomNamedUnicodeRegex() + { + $pattern = '[^'; + // Arabic characters + $pattern .= '\x{0600}-\x{06FF}'; + $pattern .= '\x{FB50}-\x{FDFD}'; + $pattern .= '\x{FE70}-\x{FEFF}'; + $pattern .= '\x{0750}-\x{077F}'; + $pattern .= ']+'; + + $this->router->addMatchTypes(array('nonArabic' => $pattern)); + $this->router->map('GET', '/bar/[nonArabic:string]', 'non_arabic_action', 'non_arabic_route'); + + $this->assertEquals(array( + 'target' => 'non_arabic_action', + 'name' => 'non_arabic_route', + 'params' => array( + 'string' => 'some-path' + ) + ), $this->router->match('/bar/some-path', 'GET')); + + $this->assertFalse($this->router->match('/﷽‎', 'GET')); + } }