diff --git a/README.md b/README.md index b77cc97..83bb6f9 100644 --- a/README.md +++ b/README.md @@ -916,14 +916,18 @@ s('i like to watch television')->titleize($ignore); // 'I Like to Watch Television' ``` -##### toAscii() +##### toAscii([, string $locale = 'en' [, bool $removeUnsupported = true ]]) Returns an ASCII version of the string. A set of non-ASCII characters are replaced with their closest ASCII counterparts, and the rest are removed -unless instructed otherwise. +unless instructed otherwise. The locale of the source string can be supplied +for locale-specific transliteration. The locale can be supplied in any of the +following formats: en, en_GB, or en-GB. ```php s('fòôbàř')->toAscii(); // 'foobar' +s('äöü')->toAscii(); // 'aou' +s('äöü')->toAscii('de'); // 'aeoeue' ``` ##### toBoolean() diff --git a/src/StaticStringy.php b/src/StaticStringy.php index f89b18d..4486665 100644 --- a/src/StaticStringy.php +++ b/src/StaticStringy.php @@ -72,7 +72,7 @@ use ReflectionMethod; * @method static string swapCase(string $str, string $encoding = null) * @method static string tidy(string $str, string $encoding = null) * @method static string titleize(string $str, string $encoding = null) - * @method static string toAscii(string $str, bool $removeUnsupported = true, string $encoding = null) + * @method static string toAscii(string $str, string $locale = 'en', bool $removeUnsupported = true, string $encoding = null) * @method static bool toBoolean(string $str, string $encoding = null) * @method static string toLowerCase(string $str, string $encoding = null) * @method static string toSpaces(string $str, int $tabLength = 4, string $encoding = null) diff --git a/src/Stringy.php b/src/Stringy.php index 0c982f0..0ade923 100644 --- a/src/Stringy.php +++ b/src/Stringy.php @@ -1424,16 +1424,22 @@ class Stringy implements Countable, IteratorAggregate, ArrayAccess /** * Returns an ASCII version of the string. A set of non-ASCII characters are * replaced with their closest ASCII counterparts, and the rest are removed - * unless instructed otherwise. + * unless instructed otherwise. The locale can be supplied in any of the + * following formats: en, en_GB, or en-GB. * + * @param string $locale Locale of the source string * @param bool $removeUnsupported Whether or not to remove the * unsupported characters * @return static Object whose $str contains only ASCII characters */ - public function toAscii($removeUnsupported = true) + public function toAscii($locale = 'en', $removeUnsupported = true) { $str = $this->str; + foreach ($this->localeSpecificCharsArray($locale) as $key => $value) { + $str = str_replace($value, $key, $str); + } + foreach ($this->charsArray() as $key => $value) { $str = str_replace($value, $key, $str); } @@ -1686,7 +1692,7 @@ class Stringy implements Countable, IteratorAggregate, ArrayAccess 'α', 'ά', 'ἀ', 'ἁ', 'ἂ', 'ἃ', 'ἄ', 'ἅ', 'ἆ', 'ἇ', 'ᾀ', 'ᾁ', 'ᾂ', 'ᾃ', 'ᾄ', 'ᾅ', 'ᾆ', 'ᾇ', 'ὰ', 'ά', 'ᾰ', 'ᾱ', 'ᾲ', 'ᾳ', 'ᾴ', 'ᾶ', 'ᾷ', 'а', 'أ', 'အ', - 'ာ', 'ါ', 'ǻ', 'ǎ', 'ª', 'ა', 'अ', 'ا', 'a'), + 'ာ', 'ါ', 'ǻ', 'ǎ', 'ª', 'ა', 'अ', 'ا', 'a', 'ä'), 'b' => array('б', 'β', 'Ъ', 'Ь', 'ب', 'ဗ', 'ბ', 'b'), 'c' => array('ç', 'ć', 'č', 'ĉ', 'ċ', 'c'), 'd' => array('ď', 'ð', 'đ', 'ƌ', 'ȡ', 'ɖ', 'ɗ', 'ᵭ', 'ᶁ', 'ᶑ', @@ -1715,7 +1721,8 @@ class Stringy implements Countable, IteratorAggregate, ArrayAccess 'o' => array('ó', 'ò', 'ỏ', 'õ', 'ọ', 'ô', 'ố', 'ồ', 'ổ', 'ỗ', 'ộ', 'ơ', 'ớ', 'ờ', 'ở', 'ỡ', 'ợ', 'ø', 'ō', 'ő', 'ŏ', 'ο', 'ὀ', 'ὁ', 'ὂ', 'ὃ', 'ὄ', 'ὅ', 'ὸ', 'ό', - 'о', 'و', 'θ', 'ို', 'ǒ', 'ǿ', 'º', 'ო', 'ओ', 'o'), + 'о', 'و', 'θ', 'ို', 'ǒ', 'ǿ', 'º', 'ო', 'ओ', 'o', + 'ö'), 'p' => array('п', 'π', 'ပ', 'პ', 'پ', 'p'), 'q' => array('ყ', 'q'), 'r' => array('ŕ', 'ř', 'ŗ', 'р', 'ρ', 'ر', 'რ', 'r'), @@ -1726,7 +1733,7 @@ class Stringy implements Countable, IteratorAggregate, ArrayAccess 'u' => array('ú', 'ù', 'ủ', 'ũ', 'ụ', 'ư', 'ứ', 'ừ', 'ử', 'ữ', 'ự', 'û', 'ū', 'ů', 'ű', 'ŭ', 'ų', 'µ', 'у', 'ဉ', 'ု', 'ူ', 'ǔ', 'ǖ', 'ǘ', 'ǚ', 'ǜ', 'უ', 'उ', 'u', - 'ў'), + 'ў', 'ü'), 'v' => array('в', 'ვ', 'ϐ', 'v'), 'w' => array('ŵ', 'ω', 'ώ', 'ဝ', 'ွ', 'w'), 'x' => array('χ', 'ξ', 'x'), @@ -1734,7 +1741,7 @@ class Stringy implements Countable, IteratorAggregate, ArrayAccess 'ϋ', 'ύ', 'ΰ', 'ي', 'ယ', 'y'), 'z' => array('ź', 'ž', 'ż', 'з', 'ζ', 'ز', 'ဇ', 'ზ', 'z'), 'aa' => array('ع', 'आ', 'آ'), - 'ae' => array('ä', 'æ', 'ǽ'), + 'ae' => array('æ', 'ǽ'), 'ai' => array('ऐ'), 'at' => array('@'), 'ch' => array('ч', 'ჩ', 'ჭ', 'چ'), @@ -1747,7 +1754,7 @@ class Stringy implements Countable, IteratorAggregate, ArrayAccess 'kh' => array('х', 'خ', 'ხ'), 'lj' => array('љ'), 'nj' => array('њ'), - 'oe' => array('ö', 'œ', 'ؤ'), + 'oe' => array('œ', 'ؤ'), 'oi' => array('ऑ'), 'oii' => array('ऒ'), 'ps' => array('ψ'), @@ -1757,7 +1764,6 @@ class Stringy implements Countable, IteratorAggregate, ArrayAccess 'sx' => array('ŝ'), 'th' => array('þ', 'ϑ', 'ث', 'ذ', 'ظ'), 'ts' => array('ц', 'ც', 'წ'), - 'ue' => array('ü'), 'uu' => array('ऊ'), 'ya' => array('я'), 'yu' => array('ю'), @@ -1767,7 +1773,7 @@ class Stringy implements Countable, IteratorAggregate, ArrayAccess 'Ặ', 'Â', 'Ấ', 'Ầ', 'Ẩ', 'Ẫ', 'Ậ', 'Å', 'Ā', 'Ą', 'Α', 'Ά', 'Ἀ', 'Ἁ', 'Ἂ', 'Ἃ', 'Ἄ', 'Ἅ', 'Ἆ', 'Ἇ', 'ᾈ', 'ᾉ', 'ᾊ', 'ᾋ', 'ᾌ', 'ᾍ', 'ᾎ', 'ᾏ', 'Ᾰ', 'Ᾱ', - 'Ὰ', 'Ά', 'ᾼ', 'А', 'Ǻ', 'Ǎ', 'A'), + 'Ὰ', 'Ά', 'ᾼ', 'А', 'Ǻ', 'Ǎ', 'A', 'Ä'), 'B' => array('Б', 'Β', 'ब', 'B'), 'C' => array('Ç','Ć', 'Č', 'Ĉ', 'Ċ', 'C'), 'D' => array('Ď', 'Ð', 'Đ', 'Ɖ', 'Ɗ', 'Ƌ', 'ᴅ', 'ᴆ', 'Д', 'Δ', @@ -1791,7 +1797,7 @@ class Stringy implements Countable, IteratorAggregate, ArrayAccess 'O' => array('Ó', 'Ò', 'Ỏ', 'Õ', 'Ọ', 'Ô', 'Ố', 'Ồ', 'Ổ', 'Ỗ', 'Ộ', 'Ơ', 'Ớ', 'Ờ', 'Ở', 'Ỡ', 'Ợ', 'Ø', 'Ō', 'Ő', 'Ŏ', 'Ο', 'Ό', 'Ὀ', 'Ὁ', 'Ὂ', 'Ὃ', 'Ὄ', 'Ὅ', 'Ὸ', - 'Ό', 'О', 'Θ', 'Ө', 'Ǒ', 'Ǿ', 'O'), + 'Ό', 'О', 'Θ', 'Ө', 'Ǒ', 'Ǿ', 'O', 'Ö'), 'P' => array('П', 'Π', 'P'), 'Q' => array('Q'), 'R' => array('Ř', 'Ŕ', 'Р', 'Ρ', 'Ŗ', 'R'), @@ -1799,14 +1805,14 @@ class Stringy implements Countable, IteratorAggregate, ArrayAccess 'T' => array('Ť', 'Ţ', 'Ŧ', 'Ț', 'Т', 'Τ', 'T'), 'U' => array('Ú', 'Ù', 'Ủ', 'Ũ', 'Ụ', 'Ư', 'Ứ', 'Ừ', 'Ử', 'Ữ', 'Ự', 'Û', 'Ū', 'Ů', 'Ű', 'Ŭ', 'Ų', 'У', 'Ǔ', 'Ǖ', - 'Ǘ', 'Ǚ', 'Ǜ', 'U', 'Ў'), + 'Ǘ', 'Ǚ', 'Ǜ', 'U', 'Ў', 'Ü'), 'V' => array('В', 'V'), 'W' => array('Ω', 'Ώ', 'Ŵ', 'W'), 'X' => array('Χ', 'Ξ', 'X'), 'Y' => array('Ý', 'Ỳ', 'Ỷ', 'Ỹ', 'Ỵ', 'Ÿ', 'Ῠ', 'Ῡ', 'Ὺ', 'Ύ', 'Ы', 'Й', 'Υ', 'Ϋ', 'Ŷ', 'Y'), 'Z' => array('Ź', 'Ž', 'Ż', 'З', 'Ζ', 'Z'), - 'AE' => array('Ä', 'Æ', 'Ǽ'), + 'AE' => array('Æ', 'Ǽ'), 'CH' => array('Ч'), 'DJ' => array('Ђ'), 'DZ' => array('Џ'), @@ -1817,14 +1823,13 @@ class Stringy implements Countable, IteratorAggregate, ArrayAccess 'KH' => array('Х'), 'LJ' => array('Љ'), 'NJ' => array('Њ'), - 'OE' => array('Ö', 'Œ'), + 'OE' => array('Œ'), 'PS' => array('Ψ'), 'SH' => array('Ш'), 'SHCH' => array('Щ'), 'SS' => array('ẞ'), 'TH' => array('Þ'), 'TS' => array('Ц'), - 'UE' => array('Ü'), 'YA' => array('Я'), 'YU' => array('Ю'), 'ZH' => array('Ж'), @@ -1837,6 +1842,42 @@ class Stringy implements Countable, IteratorAggregate, ArrayAccess ); } + /** + * Returns the locale-specific replacements for the toAscii() method. + * + * @param string $locale Locale of the source string + * @return array An array of replacements. + */ + protected function localeSpecificCharsArray($locale = 'en') + { + $split = preg_split('/[-_]/', $locale); + $locale = strtolower($split[0]); + + static $charsArray = array(); + if (isset($charsArray[$locale])) { + return $charsArray[$locale]; + } + + $localeSpecific = array( + 'de' => array( + 'ae' => array('ä'), + 'oe' => array('ö'), + 'ue' => array('ü'), + 'AE' => array('Ä'), + 'OE' => array('Ö'), + 'UE' => array('Ü') + ) + ); + + if (isset($localeSpecific[$locale])) { + $charsArray[$locale] = $localeSpecific[$locale]; + } else { + $charsArray[$locale] = array(); + } + + return $charsArray[$locale]; + } + /** * Adds the specified amount of left and right padding to the given string. * The default character used is a space. diff --git a/tests/StringyTest.php b/tests/StringyTest.php index 94b1398..f50ab30 100644 --- a/tests/StringyTest.php +++ b/tests/StringyTest.php @@ -653,10 +653,11 @@ class StringyTestCase extends PHPUnit_Framework_TestCase /** * @dataProvider toAsciiProvider() */ - public function testToAscii($expected, $str, $removeUnsupported = true) + public function testToAscii($expected, $str, $locale = 'en', + $removeUnsupported = true) { $stringy = S::create($str); - $result = $stringy->toAscii($removeUnsupported); + $result = $stringy->toAscii($locale, $removeUnsupported); $this->assertStringy($result); $this->assertEquals($expected, $result); $this->assertEquals($str, $stringy); @@ -681,7 +682,10 @@ class StringyTestCase extends PHPUnit_Framework_TestCase array(' ', ' '), // medium mathematical space (U+205F) array(' ', ' '), // ideographic space (U+3000) array('', '𐍉'), // some uncommon, unsupported character (U+10349) - array('𐍉', '𐍉', false), + array('𐍉', '𐍉', 'en', false), + array('aouAOU', 'äöüÄÖÜ'), + array('aeoeueAEOEUE', 'äöüÄÖÜ', 'de'), + array('aeoeueAEOEUE', 'äöüÄÖÜ', 'de_DE') ); }