diff --git a/README.md b/README.md index 00dfe6e..1072d4d 100644 --- a/README.md +++ b/README.md @@ -923,9 +923,10 @@ s('i like to watch television')->titleize($ignore); Returns an ASCII version of the string. A set of non-ASCII characters are replaced with their closest ASCII counterparts, and the rest are removed -unless instructed otherwise. The locale of the source string can be supplied -for locale-specific transliteration. The locale can be supplied in any of the -following formats: en, en_GB, or en-GB. +by default. The language or locale of the source string can be supplied +for language-specific transliteration in any of the following formats: +en, en_GB, or en-GB. For example, passing "de" results in "äöü" mapping +to "aeoeue" rather than "aou" as in other languages. ```php s('fòôbàř')->toAscii(); // 'foobar' diff --git a/src/StaticStringy.php b/src/StaticStringy.php index 648546b..11f9a06 100644 --- a/src/StaticStringy.php +++ b/src/StaticStringy.php @@ -75,7 +75,7 @@ use ReflectionMethod; * @method static string swapCase(string $str, string $encoding = null) * @method static string tidy(string $str, string $encoding = null) * @method static string titleize(string $str, string $encoding = null) - * @method static string toAscii(string $str, string $locale = 'en', bool $removeUnsupported = true, string $encoding = null) + * @method static string toAscii(string $str, string $language = 'en', bool $removeUnsupported = true, string $encoding = null) * @method static bool toBoolean(string $str, string $encoding = null) * @method static string toLowerCase(string $str, string $encoding = null) * @method static string toSpaces(string $str, int $tabLength = 4, string $encoding = null) diff --git a/src/Stringy.php b/src/Stringy.php index 7a303cd..c5e06fd 100644 --- a/src/Stringy.php +++ b/src/Stringy.php @@ -1425,20 +1425,23 @@ class Stringy implements Countable, IteratorAggregate, ArrayAccess /** * Returns an ASCII version of the string. A set of non-ASCII characters are * replaced with their closest ASCII counterparts, and the rest are removed - * unless instructed otherwise. The locale can be supplied in any of the - * following formats: en, en_GB, or en-GB. + * by default. The language or locale of the source string can be supplied + * for language-specific transliteration in any of the following formats: + * en, en_GB, or en-GB. For example, passing "de" results in "äöü" mapping + * to "aeoeue" rather than "aou" as in other languages. * - * @param string $locale Locale of the source string + * @param string $language Language of the source string * @param bool $removeUnsupported Whether or not to remove the * unsupported characters * @return static Object whose $str contains only ASCII characters */ - public function toAscii($locale = 'en', $removeUnsupported = true) + public function toAscii($language = 'en', $removeUnsupported = true) { $str = $this->str; - foreach ($this->localeSpecificCharsArray($locale) as $key => $value) { - $str = str_replace($value, $key, $str); + $langSpecific = $this->langSpecificCharsArray($language); + if (!empty($langSpecific)) { + $str = str_replace($langSpecific[0], $langSpecific[1], $str); } foreach ($this->charsArray() as $key => $value) { @@ -1814,26 +1817,26 @@ class Stringy implements Countable, IteratorAggregate, ArrayAccess 'Ы', 'Й', 'Υ', 'Ϋ', 'Ŷ', 'Y'], 'Z' => ['Ź', 'Ž', 'Ż', 'З', 'Ζ', 'Z'], 'AE' => ['Æ', 'Ǽ'], - 'CH' => ['Ч'], - 'DJ' => ['Ђ'], - 'DZ' => ['Џ'], - 'GX' => ['Ĝ'], - 'HX' => ['Ĥ'], - 'IJ' => ['IJ'], - 'JX' => ['Ĵ'], - 'KH' => ['Х'], - 'LJ' => ['Љ'], - 'NJ' => ['Њ'], - 'OE' => ['Œ'], - 'PS' => ['Ψ'], - 'SH' => ['Ш'], - 'SHCH' => ['Щ'], - 'SS' => ['ẞ'], - 'TH' => ['Þ'], - 'TS' => ['Ц'], - 'YA' => ['Я'], - 'YU' => ['Ю'], - 'ZH' => ['Ж'], + 'Ch' => ['Ч'], + 'Dj' => ['Ђ'], + 'Dz' => ['Џ'], + 'Gx' => ['Ĝ'], + 'Hx' => ['Ĥ'], + 'Ij' => ['IJ'], + 'Jx' => ['Ĵ'], + 'Kh' => ['Х'], + 'Lj' => ['Љ'], + 'Nj' => ['Њ'], + 'Oe' => ['Œ'], + 'Ps' => ['Ψ'], + 'Sh' => ['Ш'], + 'Shch' => ['Щ'], + 'Ss' => ['ẞ'], + 'Th' => ['Þ'], + 'Ts' => ['Ц'], + 'Ya' => ['Я'], + 'Yu' => ['Ю'], + 'Zh' => ['Ж'], ' ' => ["\xC2\xA0", "\xE2\x80\x80", "\xE2\x80\x81", "\xE2\x80\x82", "\xE2\x80\x83", "\xE2\x80\x84", "\xE2\x80\x85", "\xE2\x80\x86", "\xE2\x80\x87", @@ -1844,39 +1847,37 @@ class Stringy implements Countable, IteratorAggregate, ArrayAccess } /** - * Returns the locale-specific replacements for the toAscii() method. + * Returns language-specific replacements for the toAscii() method. + * For example, German will map 'ä' to 'ae', while other languages + * will simply return 'a'. * - * @param string $locale Locale of the source string - * @return array An array of replacements. + * param string $language Language of the source string + * @return array An array of replacements. */ - protected function localeSpecificCharsArray($locale = 'en') + protected static function langSpecificCharsArray($language = 'en') { - $split = preg_split('/[-_]/', $locale); - $locale = strtolower($split[0]); + $split = preg_split('/[-_]/', $language); + $language = strtolower($split[0]); static $charsArray = []; - if (isset($charsArray[$locale])) { - return $charsArray[$locale]; + if (isset($charsArray[$language])) { + return $charsArray[$language]; } - $localeSpecific = [ + $languageSpecific = [ 'de' => [ - 'ae' => ['ä'], - 'oe' => ['ö'], - 'ue' => ['ü'], - 'AE' => ['Ä'], - 'OE' => ['Ö'], - 'UE' => ['Ü'] + ['ä', 'ö', 'ü', 'Ä', 'Ö', 'Ü' ], + ['ae', 'oe', 'ue', 'AE', 'OE', 'UE'], ] ]; - if (isset($localeSpecific[$locale])) { - $charsArray[$locale] = $localeSpecific[$locale]; + if (isset($languageSpecific[$language])) { + $charsArray[$language] = $languageSpecific[$language]; } else { - $charsArray[$locale] = []; + $charsArray[$language] = []; } - return $charsArray[$locale]; + return $charsArray[$language]; } /** diff --git a/tests/StringyTest.php b/tests/StringyTest.php index 051afa8..570e6ab 100644 --- a/tests/StringyTest.php +++ b/tests/StringyTest.php @@ -653,11 +653,11 @@ class StringyTestCase extends PHPUnit_Framework_TestCase /** * @dataProvider toAsciiProvider() */ - public function testToAscii($expected, $str, $locale = 'en', + public function testToAscii($expected, $str, $language = 'en', $removeUnsupported = true) { $stringy = S::create($str); - $result = $stringy->toAscii($locale, $removeUnsupported); + $result = $stringy->toAscii($language, $removeUnsupported); $this->assertStringy($result); $this->assertEquals($expected, $result); $this->assertEquals($str, $stringy);