From f1abc38c1744c77fc3f26df5ce47d8d9455be764 Mon Sep 17 00:00:00 2001 From: "Daniel St. Jules" Date: Tue, 6 Aug 2013 22:39:47 -0400 Subject: [PATCH] Fix for collapseWhitespace() not being able to handle multibyte space characters --- README.md | 11 ++++++----- src/Stringy/StaticStringy.php | 7 ++++--- src/Stringy/Stringy.php | 13 +++++++++---- tests/Stringy/CommonTest.php | 2 ++ tests/Stringy/StaticStringyTest.php | 4 ++-- tests/Stringy/StringyTest.php | 4 ++-- 6 files changed, 25 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 51535dc..113ede3 100644 --- a/README.md +++ b/README.md @@ -97,7 +97,7 @@ of the former is the following: ```php use Stringy\Stringy as S; -echo S::create("Fòô Bàř", 'UTF-8')->collapseWhitespace()->swapCase(); // 'fÒÔ bÀŘ' +echo S::create('Fòô Bàř', 'UTF-8')->collapseWhitespace()->swapCase(); // 'fÒÔ bÀŘ' ``` `Stringy\Stringy` contains a __toString() method, which returns the current @@ -108,8 +108,8 @@ Using the static wrapper, an alternative is the following: ```php use Stringy\StaticStringy as S; -$string = S::collapseWhitespace("Fòô Bàř", 'UTF-8'); -echo S::swapCase($string, 'UTF-8'); // 'fÒÔ bÀŘ'' +$string = S::collapseWhitespace('Fòô Bàř', 'UTF-8'); +echo S::swapCase($string, 'UTF-8'); // 'fÒÔ bÀŘ' ``` ## Methods @@ -151,10 +151,11 @@ S::camelize('Camel-Case'); // 'camelCase' $stringy->collapseWhitespace() -S::collapseWhitespace(string $str) +S::collapseWhitespace(string $str [, string $encoding ]) Trims the string and replaces consecutive whitespace characters with a -single space. This includes tabs and newline characters. +single space. This includes tabs and newline characters, as well as +multibyte whitespace such as the thin space and ideographic space. ```php S::create(' Ο συγγραφέας ')->collapseWhitespace(); diff --git a/src/Stringy/StaticStringy.php b/src/Stringy/StaticStringy.php index f116a21..bb90c77 100644 --- a/src/Stringy/StaticStringy.php +++ b/src/Stringy/StaticStringy.php @@ -142,14 +142,15 @@ class StaticStringy /** * Trims the string and replaces consecutive whitespace characters with a - * single space. This includes tabs and newline characters. + * single space. This includes tabs and newline characters, as well as + * multibyte whitespace such as the thin space and ideographic space. * * @param string $str The string to cleanup whitespace * @return string The trimmed string with condensed whitespace */ - public static function collapseWhitespace($str) + public static function collapseWhitespace($str, $encoding = null) { - return $result = Stringy::create($str)->collapseWhitespace()->str; + return $result = Stringy::create($str, $encoding)->collapseWhitespace()->str; } /** diff --git a/src/Stringy/Stringy.php b/src/Stringy/Stringy.php index 7a0a75e..578c305 100644 --- a/src/Stringy/Stringy.php +++ b/src/Stringy/Stringy.php @@ -249,16 +249,21 @@ class Stringy /** * Trims the string and replaces consecutive whitespace characters with a - * single space. This includes tabs and newline characters. + * single space. This includes tabs and newline characters, as well as + * multibyte whitespace such as the thin space and ideographic space. * * @return Stringy Object with a trimmed $str and condensed whitespace */ public function collapseWhitespace() { - $stringy = self::create($this->str, $this->encoding); - $stringy->str = preg_replace('/\s+/u', ' ', $stringy->trim()); + $regexEncoding = mb_regex_encoding(); + mb_regex_encoding($this->encoding); - return $stringy; + $stringy = self::create($this->str, $this->encoding); + $stringy->str = mb_ereg_replace('[[:space:]]+', ' ', $stringy); + mb_regex_encoding($regexEncoding); + + return $stringy->trim(); } /** diff --git a/tests/Stringy/CommonTest.php b/tests/Stringy/CommonTest.php index f2a6a03..07afcbd 100644 --- a/tests/Stringy/CommonTest.php +++ b/tests/Stringy/CommonTest.php @@ -173,6 +173,8 @@ abstract class CommonTest extends PHPUnit_Framework_TestCase array('test string', 'test string'), array('Ο συγγραφέας', ' Ο συγγραφέας '), array('123', ' 123 '), + array('1 2 3', '  1  2  3  ', 'UTF-8'), // ideographic spaces + array('', '   ', 'UTF-8'), // thin space and space array('', ' '), array('', ''), ); diff --git a/tests/Stringy/StaticStringyTest.php b/tests/Stringy/StaticStringyTest.php index d2be4f2..a329497 100644 --- a/tests/Stringy/StaticStringyTest.php +++ b/tests/Stringy/StaticStringyTest.php @@ -111,9 +111,9 @@ class StaticStringyTestCase extends CommonTest /** * @dataProvider stringsForCollapseWhitespace */ - public function testCollapseWhitespace($expected, $str) + public function testCollapseWhitespace($expected, $str, $encoding = null) { - $result = S::collapseWhitespace($str); + $result = S::collapseWhitespace($str, $encoding); $this->assertInternalType('string', $result); $this->assertEquals($expected, $result); } diff --git a/tests/Stringy/StringyTest.php b/tests/Stringy/StringyTest.php index 45bfdc8..2df7b61 100644 --- a/tests/Stringy/StringyTest.php +++ b/tests/Stringy/StringyTest.php @@ -145,9 +145,9 @@ class StringyTestCase extends CommonTest /** * @dataProvider stringsForCollapseWhitespace */ - public function testCollapseWhitespace($expected, $str) + public function testCollapseWhitespace($expected, $str, $encoding = null) { - $stringy = S::create($str); + $stringy = S::create($str, $encoding); $result = $stringy->collapseWhitespace(); $this->assertInstanceOf('Stringy\Stringy', $result); $this->assertEquals($expected, $result);