1
0
mirror of https://github.com/danielstjules/Stringy.git synced 2025-08-08 06:16:32 +02:00

Fix #116 with breaking change: add locale parameter to toAscii

This fixes a previous regression in handling of some umlaut
characters, and commits a breaking change by adding the param
before $removeUnsupported
This commit is contained in:
Daniel St. Jules
2017-03-02 23:58:05 -05:00
parent edbda419cb
commit b0976c72b7
4 changed files with 69 additions and 20 deletions

View File

@@ -916,14 +916,18 @@ s('i like to watch television')->titleize($ignore);
// 'I Like to Watch Television' // 'I Like to Watch Television'
``` ```
##### toAscii() ##### toAscii([, string $locale = 'en' [, bool $removeUnsupported = true ]])
Returns an ASCII version of the string. A set of non-ASCII characters are Returns an ASCII version of the string. A set of non-ASCII characters are
replaced with their closest ASCII counterparts, and the rest are removed replaced with their closest ASCII counterparts, and the rest are removed
unless instructed otherwise. unless instructed otherwise. The locale of the source string can be supplied
for locale-specific transliteration. The locale can be supplied in any of the
following formats: en, en_GB, or en-GB.
```php ```php
s('fòôbàř')->toAscii(); // 'foobar' s('fòôbàř')->toAscii(); // 'foobar'
s('äöü')->toAscii(); // 'aou'
s('äöü')->toAscii('de'); // 'aeoeue'
``` ```
##### toBoolean() ##### toBoolean()

View File

@@ -72,7 +72,7 @@ use ReflectionMethod;
* @method static string swapCase(string $str, string $encoding = null) * @method static string swapCase(string $str, string $encoding = null)
* @method static string tidy(string $str, string $encoding = null) * @method static string tidy(string $str, string $encoding = null)
* @method static string titleize(string $str, string $encoding = null) * @method static string titleize(string $str, string $encoding = null)
* @method static string toAscii(string $str, bool $removeUnsupported = true, string $encoding = null) * @method static string toAscii(string $str, string $locale = 'en', bool $removeUnsupported = true, string $encoding = null)
* @method static bool toBoolean(string $str, string $encoding = null) * @method static bool toBoolean(string $str, string $encoding = null)
* @method static string toLowerCase(string $str, string $encoding = null) * @method static string toLowerCase(string $str, string $encoding = null)
* @method static string toSpaces(string $str, int $tabLength = 4, string $encoding = null) * @method static string toSpaces(string $str, int $tabLength = 4, string $encoding = null)

View File

@@ -1424,16 +1424,22 @@ class Stringy implements Countable, IteratorAggregate, ArrayAccess
/** /**
* Returns an ASCII version of the string. A set of non-ASCII characters are * Returns an ASCII version of the string. A set of non-ASCII characters are
* replaced with their closest ASCII counterparts, and the rest are removed * replaced with their closest ASCII counterparts, and the rest are removed
* unless instructed otherwise. * unless instructed otherwise. The locale can be supplied in any of the
* following formats: en, en_GB, or en-GB.
* *
* @param string $locale Locale of the source string
* @param bool $removeUnsupported Whether or not to remove the * @param bool $removeUnsupported Whether or not to remove the
* unsupported characters * unsupported characters
* @return static Object whose $str contains only ASCII characters * @return static Object whose $str contains only ASCII characters
*/ */
public function toAscii($removeUnsupported = true) public function toAscii($locale = 'en', $removeUnsupported = true)
{ {
$str = $this->str; $str = $this->str;
foreach ($this->localeSpecificCharsArray($locale) as $key => $value) {
$str = str_replace($value, $key, $str);
}
foreach ($this->charsArray() as $key => $value) { foreach ($this->charsArray() as $key => $value) {
$str = str_replace($value, $key, $str); $str = str_replace($value, $key, $str);
} }
@@ -1686,7 +1692,7 @@ class Stringy implements Countable, IteratorAggregate, ArrayAccess
'α', 'ά', 'ἀ', 'ἁ', 'ἂ', 'ἃ', 'ἄ', 'ἅ', 'ἆ', 'ἇ', 'α', 'ά', 'ἀ', 'ἁ', 'ἂ', 'ἃ', 'ἄ', 'ἅ', 'ἆ', 'ἇ',
'ᾀ', 'ᾁ', 'ᾂ', 'ᾃ', 'ᾄ', 'ᾅ', 'ᾆ', 'ᾇ', 'ὰ', 'ά', 'ᾀ', 'ᾁ', 'ᾂ', 'ᾃ', 'ᾄ', 'ᾅ', 'ᾆ', 'ᾇ', 'ὰ', 'ά',
'ᾰ', 'ᾱ', 'ᾲ', 'ᾳ', 'ᾴ', 'ᾶ', 'ᾷ', 'а', 'أ', 'အ', 'ᾰ', 'ᾱ', 'ᾲ', 'ᾳ', 'ᾴ', 'ᾶ', 'ᾷ', 'а', 'أ', 'အ',
'ာ', 'ါ', 'ǻ', 'ǎ', 'ª', 'ა', 'अ', 'ا', ''), 'ာ', 'ါ', 'ǻ', 'ǎ', 'ª', 'ა', 'अ', 'ا', '', 'ä'),
'b' => array('б', 'β', 'Ъ', 'Ь', 'ب', 'ဗ', 'ბ', ''), 'b' => array('б', 'β', 'Ъ', 'Ь', 'ب', 'ဗ', 'ბ', ''),
'c' => array('ç', 'ć', 'č', 'ĉ', 'ċ', ''), 'c' => array('ç', 'ć', 'č', 'ĉ', 'ċ', ''),
'd' => array('ď', 'ð', 'đ', 'ƌ', 'ȡ', 'ɖ', 'ɗ', 'ᵭ', 'ᶁ', 'ᶑ', 'd' => array('ď', 'ð', 'đ', 'ƌ', 'ȡ', 'ɖ', 'ɗ', 'ᵭ', 'ᶁ', 'ᶑ',
@@ -1715,7 +1721,8 @@ class Stringy implements Countable, IteratorAggregate, ArrayAccess
'o' => array('ó', 'ò', 'ỏ', 'õ', 'ọ', 'ô', 'ố', 'ồ', 'ổ', 'ỗ', 'o' => array('ó', 'ò', 'ỏ', 'õ', 'ọ', 'ô', 'ố', 'ồ', 'ổ', 'ỗ',
'ộ', 'ơ', 'ớ', 'ờ', 'ở', 'ỡ', 'ợ', 'ø', 'ō', 'ő', 'ộ', 'ơ', 'ớ', 'ờ', 'ở', 'ỡ', 'ợ', 'ø', 'ō', 'ő',
'ŏ', 'ο', 'ὀ', 'ὁ', 'ὂ', 'ὃ', 'ὄ', 'ὅ', 'ὸ', 'ό', 'ŏ', 'ο', 'ὀ', 'ὁ', 'ὂ', 'ὃ', 'ὄ', 'ὅ', 'ὸ', 'ό',
'о', 'و', 'θ', 'ို', 'ǒ', 'ǿ', 'º', 'ო', 'ओ', ''), 'о', 'و', 'θ', 'ို', 'ǒ', 'ǿ', 'º', 'ო', 'ओ', '',
'ö'),
'p' => array('п', 'π', 'ပ', 'პ', 'پ', ''), 'p' => array('п', 'π', 'ပ', 'პ', 'پ', ''),
'q' => array('', ''), 'q' => array('', ''),
'r' => array('ŕ', 'ř', 'ŗ', 'р', 'ρ', 'ر', 'რ', ''), 'r' => array('ŕ', 'ř', 'ŗ', 'р', 'ρ', 'ر', 'რ', ''),
@@ -1726,7 +1733,7 @@ class Stringy implements Countable, IteratorAggregate, ArrayAccess
'u' => array('ú', 'ù', 'ủ', 'ũ', 'ụ', 'ư', 'ứ', 'ừ', 'ử', 'ữ', 'u' => array('ú', 'ù', 'ủ', 'ũ', 'ụ', 'ư', 'ứ', 'ừ', 'ử', 'ữ',
'ự', 'û', 'ū', 'ů', 'ű', 'ŭ', 'ų', 'µ', 'у', 'ဉ', 'ự', 'û', 'ū', 'ů', 'ű', 'ŭ', 'ų', 'µ', 'у', 'ဉ',
'ု', 'ူ', 'ǔ', 'ǖ', 'ǘ', 'ǚ', 'ǜ', 'უ', 'उ', '', 'ု', 'ူ', 'ǔ', 'ǖ', 'ǘ', 'ǚ', 'ǜ', 'უ', 'उ', '',
'ў'), 'ў', 'ü'),
'v' => array('в', 'ვ', 'ϐ', ''), 'v' => array('в', 'ვ', 'ϐ', ''),
'w' => array('ŵ', 'ω', 'ώ', '', 'ွ', ''), 'w' => array('ŵ', 'ω', 'ώ', '', 'ွ', ''),
'x' => array('χ', 'ξ', ''), 'x' => array('χ', 'ξ', ''),
@@ -1734,7 +1741,7 @@ class Stringy implements Countable, IteratorAggregate, ArrayAccess
'ϋ', 'ύ', 'ΰ', 'ي', 'ယ', ''), 'ϋ', 'ύ', 'ΰ', 'ي', 'ယ', ''),
'z' => array('ź', 'ž', 'ż', 'з', 'ζ', 'ز', 'ဇ', 'ზ', ''), 'z' => array('ź', 'ž', 'ż', 'з', 'ζ', 'ز', 'ဇ', 'ზ', ''),
'aa' => array('ع', 'आ', 'آ'), 'aa' => array('ع', 'आ', 'آ'),
'ae' => array('ä', 'æ', 'ǽ'), 'ae' => array('æ', 'ǽ'),
'ai' => array('ऐ'), 'ai' => array('ऐ'),
'at' => array('@'), 'at' => array('@'),
'ch' => array('ч', 'ჩ', 'ჭ', 'چ'), 'ch' => array('ч', 'ჩ', 'ჭ', 'چ'),
@@ -1747,7 +1754,7 @@ class Stringy implements Countable, IteratorAggregate, ArrayAccess
'kh' => array('х', 'خ', 'ხ'), 'kh' => array('х', 'خ', 'ხ'),
'lj' => array('љ'), 'lj' => array('љ'),
'nj' => array('њ'), 'nj' => array('њ'),
'oe' => array('ö', 'œ', 'ؤ'), 'oe' => array('œ', 'ؤ'),
'oi' => array('ऑ'), 'oi' => array('ऑ'),
'oii' => array('ऒ'), 'oii' => array('ऒ'),
'ps' => array('ψ'), 'ps' => array('ψ'),
@@ -1757,7 +1764,6 @@ class Stringy implements Countable, IteratorAggregate, ArrayAccess
'sx' => array('ŝ'), 'sx' => array('ŝ'),
'th' => array('þ', 'ϑ', 'ث', 'ذ', 'ظ'), 'th' => array('þ', 'ϑ', 'ث', 'ذ', 'ظ'),
'ts' => array('ц', 'ც', 'წ'), 'ts' => array('ц', 'ც', 'წ'),
'ue' => array('ü'),
'uu' => array('ऊ'), 'uu' => array('ऊ'),
'ya' => array('я'), 'ya' => array('я'),
'yu' => array('ю'), 'yu' => array('ю'),
@@ -1767,7 +1773,7 @@ class Stringy implements Countable, IteratorAggregate, ArrayAccess
'Ặ', 'Â', 'Ấ', 'Ầ', 'Ẩ', 'Ẫ', 'Ậ', 'Å', 'Ā', 'Ą', 'Ặ', 'Â', 'Ấ', 'Ầ', 'Ẩ', 'Ẫ', 'Ậ', 'Å', 'Ā', 'Ą',
'Α', 'Ά', 'Ἀ', 'Ἁ', 'Ἂ', 'Ἃ', 'Ἄ', 'Ἅ', 'Ἆ', 'Ἇ', 'Α', 'Ά', 'Ἀ', 'Ἁ', 'Ἂ', 'Ἃ', 'Ἄ', 'Ἅ', 'Ἆ', 'Ἇ',
'ᾈ', 'ᾉ', 'ᾊ', 'ᾋ', 'ᾌ', 'ᾍ', 'ᾎ', 'ᾏ', 'Ᾰ', 'Ᾱ', 'ᾈ', 'ᾉ', 'ᾊ', 'ᾋ', 'ᾌ', 'ᾍ', 'ᾎ', 'ᾏ', 'Ᾰ', 'Ᾱ',
'Ὰ', 'Ά', 'ᾼ', 'А', 'Ǻ', 'Ǎ', ''), 'Ὰ', 'Ά', 'ᾼ', 'А', 'Ǻ', 'Ǎ', '', 'Ä'),
'B' => array('Б', 'Β', 'ब', ''), 'B' => array('Б', 'Β', 'ब', ''),
'C' => array('Ç','Ć', 'Č', 'Ĉ', 'Ċ', ''), 'C' => array('Ç','Ć', 'Č', 'Ĉ', 'Ċ', ''),
'D' => array('Ď', 'Ð', 'Đ', 'Ɖ', 'Ɗ', 'Ƌ', 'ᴅ', 'ᴆ', 'Д', 'Δ', 'D' => array('Ď', 'Ð', 'Đ', 'Ɖ', 'Ɗ', 'Ƌ', 'ᴅ', 'ᴆ', 'Д', 'Δ',
@@ -1791,7 +1797,7 @@ class Stringy implements Countable, IteratorAggregate, ArrayAccess
'O' => array('Ó', 'Ò', 'Ỏ', 'Õ', 'Ọ', 'Ô', 'Ố', 'Ồ', 'Ổ', 'Ỗ', 'O' => array('Ó', 'Ò', 'Ỏ', 'Õ', 'Ọ', 'Ô', 'Ố', 'Ồ', 'Ổ', 'Ỗ',
'Ộ', 'Ơ', 'Ớ', 'Ờ', 'Ở', 'Ỡ', 'Ợ', 'Ø', 'Ō', 'Ő', 'Ộ', 'Ơ', 'Ớ', 'Ờ', 'Ở', 'Ỡ', 'Ợ', 'Ø', 'Ō', 'Ő',
'Ŏ', 'Ο', 'Ό', 'Ὀ', 'Ὁ', 'Ὂ', 'Ὃ', 'Ὄ', 'Ὅ', 'Ὸ', 'Ŏ', 'Ο', 'Ό', 'Ὀ', 'Ὁ', 'Ὂ', 'Ὃ', 'Ὄ', 'Ὅ', 'Ὸ',
'Ό', 'О', 'Θ', 'Ө', 'Ǒ', 'Ǿ', ''), 'Ό', 'О', 'Θ', 'Ө', 'Ǒ', 'Ǿ', '', 'Ö'),
'P' => array('П', 'Π', ''), 'P' => array('П', 'Π', ''),
'Q' => array(''), 'Q' => array(''),
'R' => array('Ř', 'Ŕ', 'Р', 'Ρ', 'Ŗ', ''), 'R' => array('Ř', 'Ŕ', 'Р', 'Ρ', 'Ŗ', ''),
@@ -1799,14 +1805,14 @@ class Stringy implements Countable, IteratorAggregate, ArrayAccess
'T' => array('Ť', 'Ţ', 'Ŧ', 'Ț', 'Т', 'Τ', ''), 'T' => array('Ť', 'Ţ', 'Ŧ', 'Ț', 'Т', 'Τ', ''),
'U' => array('Ú', 'Ù', 'Ủ', 'Ũ', 'Ụ', 'Ư', 'Ứ', 'Ừ', 'Ử', 'Ữ', 'U' => array('Ú', 'Ù', 'Ủ', 'Ũ', 'Ụ', 'Ư', 'Ứ', 'Ừ', 'Ử', 'Ữ',
'Ự', 'Û', 'Ū', 'Ů', 'Ű', 'Ŭ', 'Ų', 'У', 'Ǔ', 'Ǖ', 'Ự', 'Û', 'Ū', 'Ů', 'Ű', 'Ŭ', 'Ų', 'У', 'Ǔ', 'Ǖ',
'Ǘ', 'Ǚ', 'Ǜ', '', 'Ў'), 'Ǘ', 'Ǚ', 'Ǜ', '', 'Ў', 'Ü'),
'V' => array('В', ''), 'V' => array('В', ''),
'W' => array('Ω', 'Ώ', 'Ŵ', ''), 'W' => array('Ω', 'Ώ', 'Ŵ', ''),
'X' => array('Χ', 'Ξ', ''), 'X' => array('Χ', 'Ξ', ''),
'Y' => array('Ý', 'Ỳ', 'Ỷ', 'Ỹ', 'Ỵ', 'Ÿ', 'Ῠ', 'Ῡ', 'Ὺ', 'Ύ', 'Y' => array('Ý', 'Ỳ', 'Ỷ', 'Ỹ', 'Ỵ', 'Ÿ', 'Ῠ', 'Ῡ', 'Ὺ', 'Ύ',
'Ы', 'Й', 'Υ', 'Ϋ', 'Ŷ', ''), 'Ы', 'Й', 'Υ', 'Ϋ', 'Ŷ', ''),
'Z' => array('Ź', 'Ž', 'Ż', 'З', 'Ζ', ''), 'Z' => array('Ź', 'Ž', 'Ż', 'З', 'Ζ', ''),
'AE' => array('Ä', 'Æ', 'Ǽ'), 'AE' => array('Æ', 'Ǽ'),
'CH' => array('Ч'), 'CH' => array('Ч'),
'DJ' => array('Ђ'), 'DJ' => array('Ђ'),
'DZ' => array('Џ'), 'DZ' => array('Џ'),
@@ -1817,14 +1823,13 @@ class Stringy implements Countable, IteratorAggregate, ArrayAccess
'KH' => array('Х'), 'KH' => array('Х'),
'LJ' => array('Љ'), 'LJ' => array('Љ'),
'NJ' => array('Њ'), 'NJ' => array('Њ'),
'OE' => array('Ö', 'Œ'), 'OE' => array('Œ'),
'PS' => array('Ψ'), 'PS' => array('Ψ'),
'SH' => array('Ш'), 'SH' => array('Ш'),
'SHCH' => array('Щ'), 'SHCH' => array('Щ'),
'SS' => array('ẞ'), 'SS' => array('ẞ'),
'TH' => array('Þ'), 'TH' => array('Þ'),
'TS' => array('Ц'), 'TS' => array('Ц'),
'UE' => array('Ü'),
'YA' => array('Я'), 'YA' => array('Я'),
'YU' => array('Ю'), 'YU' => array('Ю'),
'ZH' => array('Ж'), 'ZH' => array('Ж'),
@@ -1837,6 +1842,42 @@ class Stringy implements Countable, IteratorAggregate, ArrayAccess
); );
} }
/**
* Returns the locale-specific replacements for the toAscii() method.
*
* @param string $locale Locale of the source string
* @return array An array of replacements.
*/
protected function localeSpecificCharsArray($locale = 'en')
{
$split = preg_split('/[-_]/', $locale);
$locale = strtolower($split[0]);
static $charsArray = array();
if (isset($charsArray[$locale])) {
return $charsArray[$locale];
}
$localeSpecific = array(
'de' => array(
'ae' => array('ä'),
'oe' => array('ö'),
'ue' => array('ü'),
'AE' => array('Ä'),
'OE' => array('Ö'),
'UE' => array('Ü')
)
);
if (isset($localeSpecific[$locale])) {
$charsArray[$locale] = $localeSpecific[$locale];
} else {
$charsArray[$locale] = array();
}
return $charsArray[$locale];
}
/** /**
* Adds the specified amount of left and right padding to the given string. * Adds the specified amount of left and right padding to the given string.
* The default character used is a space. * The default character used is a space.

View File

@@ -653,10 +653,11 @@ class StringyTestCase extends PHPUnit_Framework_TestCase
/** /**
* @dataProvider toAsciiProvider() * @dataProvider toAsciiProvider()
*/ */
public function testToAscii($expected, $str, $removeUnsupported = true) public function testToAscii($expected, $str, $locale = 'en',
$removeUnsupported = true)
{ {
$stringy = S::create($str); $stringy = S::create($str);
$result = $stringy->toAscii($removeUnsupported); $result = $stringy->toAscii($locale, $removeUnsupported);
$this->assertStringy($result); $this->assertStringy($result);
$this->assertEquals($expected, $result); $this->assertEquals($expected, $result);
$this->assertEquals($str, $stringy); $this->assertEquals($str, $stringy);
@@ -681,7 +682,10 @@ class StringyTestCase extends PHPUnit_Framework_TestCase
array(' ', ''), // medium mathematical space (U+205F) array(' ', ''), // medium mathematical space (U+205F)
array(' ', ' '), // ideographic space (U+3000) array(' ', ' '), // ideographic space (U+3000)
array('', '𐍉'), // some uncommon, unsupported character (U+10349) array('', '𐍉'), // some uncommon, unsupported character (U+10349)
array('𐍉', '𐍉', false), array('𐍉', '𐍉', 'en', false),
array('aouAOU', 'äöüÄÖÜ'),
array('aeoeueAEOEUE', 'äöüÄÖÜ', 'de'),
array('aeoeueAEOEUE', 'äöüÄÖÜ', 'de_DE')
); );
} }