1
0
mirror of https://github.com/danielstjules/Stringy.git synced 2025-08-12 08:14:06 +02:00

toAscii changes: rename locale to language, correct some transliterations

This commit is contained in:
Daniel St. Jules
2017-03-08 19:14:42 -05:00
parent bf2a302aa1
commit 72102ae911
4 changed files with 53 additions and 51 deletions

View File

@@ -923,9 +923,10 @@ s('i like to watch television')->titleize($ignore);
Returns an ASCII version of the string. A set of non-ASCII characters are Returns an ASCII version of the string. A set of non-ASCII characters are
replaced with their closest ASCII counterparts, and the rest are removed replaced with their closest ASCII counterparts, and the rest are removed
unless instructed otherwise. The locale of the source string can be supplied by default. The language or locale of the source string can be supplied
for locale-specific transliteration. The locale can be supplied in any of the for language-specific transliteration in any of the following formats:
following formats: en, en_GB, or en-GB. en, en_GB, or en-GB. For example, passing "de" results in "äöü" mapping
to "aeoeue" rather than "aou" as in other languages.
```php ```php
s('fòôbàř')->toAscii(); // 'foobar' s('fòôbàř')->toAscii(); // 'foobar'

View File

@@ -75,7 +75,7 @@ use ReflectionMethod;
* @method static string swapCase(string $str, string $encoding = null) * @method static string swapCase(string $str, string $encoding = null)
* @method static string tidy(string $str, string $encoding = null) * @method static string tidy(string $str, string $encoding = null)
* @method static string titleize(string $str, string $encoding = null) * @method static string titleize(string $str, string $encoding = null)
* @method static string toAscii(string $str, string $locale = 'en', bool $removeUnsupported = true, string $encoding = null) * @method static string toAscii(string $str, string $language = 'en', bool $removeUnsupported = true, string $encoding = null)
* @method static bool toBoolean(string $str, string $encoding = null) * @method static bool toBoolean(string $str, string $encoding = null)
* @method static string toLowerCase(string $str, string $encoding = null) * @method static string toLowerCase(string $str, string $encoding = null)
* @method static string toSpaces(string $str, int $tabLength = 4, string $encoding = null) * @method static string toSpaces(string $str, int $tabLength = 4, string $encoding = null)

View File

@@ -1425,20 +1425,23 @@ class Stringy implements Countable, IteratorAggregate, ArrayAccess
/** /**
* Returns an ASCII version of the string. A set of non-ASCII characters are * Returns an ASCII version of the string. A set of non-ASCII characters are
* replaced with their closest ASCII counterparts, and the rest are removed * replaced with their closest ASCII counterparts, and the rest are removed
* unless instructed otherwise. The locale can be supplied in any of the * by default. The language or locale of the source string can be supplied
* following formats: en, en_GB, or en-GB. * for language-specific transliteration in any of the following formats:
* en, en_GB, or en-GB. For example, passing "de" results in "äöü" mapping
* to "aeoeue" rather than "aou" as in other languages.
* *
* @param string $locale Locale of the source string * @param string $language Language of the source string
* @param bool $removeUnsupported Whether or not to remove the * @param bool $removeUnsupported Whether or not to remove the
* unsupported characters * unsupported characters
* @return static Object whose $str contains only ASCII characters * @return static Object whose $str contains only ASCII characters
*/ */
public function toAscii($locale = 'en', $removeUnsupported = true) public function toAscii($language = 'en', $removeUnsupported = true)
{ {
$str = $this->str; $str = $this->str;
foreach ($this->localeSpecificCharsArray($locale) as $key => $value) { $langSpecific = $this->langSpecificCharsArray($language);
$str = str_replace($value, $key, $str); if (!empty($langSpecific)) {
$str = str_replace($langSpecific[0], $langSpecific[1], $str);
} }
foreach ($this->charsArray() as $key => $value) { foreach ($this->charsArray() as $key => $value) {
@@ -1814,26 +1817,26 @@ class Stringy implements Countable, IteratorAggregate, ArrayAccess
'Ы', 'Й', 'Υ', 'Ϋ', 'Ŷ', ''], 'Ы', 'Й', 'Υ', 'Ϋ', 'Ŷ', ''],
'Z' => ['Ź', 'Ž', 'Ż', 'З', 'Ζ', ''], 'Z' => ['Ź', 'Ž', 'Ż', 'З', 'Ζ', ''],
'AE' => ['Æ', 'Ǽ'], 'AE' => ['Æ', 'Ǽ'],
'CH' => ['Ч'], 'Ch' => ['Ч'],
'DJ' => ['Ђ'], 'Dj' => ['Ђ'],
'DZ' => ['Џ'], 'Dz' => ['Џ'],
'GX' => ['Ĝ'], 'Gx' => ['Ĝ'],
'HX' => ['Ĥ'], 'Hx' => ['Ĥ'],
'IJ' => ['IJ'], 'Ij' => ['IJ'],
'JX' => ['Ĵ'], 'Jx' => ['Ĵ'],
'KH' => ['Х'], 'Kh' => ['Х'],
'LJ' => ['Љ'], 'Lj' => ['Љ'],
'NJ' => ['Њ'], 'Nj' => ['Њ'],
'OE' => ['Œ'], 'Oe' => ['Œ'],
'PS' => ['Ψ'], 'Ps' => ['Ψ'],
'SH' => ['Ш'], 'Sh' => ['Ш'],
'SHCH' => ['Щ'], 'Shch' => ['Щ'],
'SS' => ['ẞ'], 'Ss' => ['ẞ'],
'TH' => ['Þ'], 'Th' => ['Þ'],
'TS' => ['Ц'], 'Ts' => ['Ц'],
'YA' => ['Я'], 'Ya' => ['Я'],
'YU' => ['Ю'], 'Yu' => ['Ю'],
'ZH' => ['Ж'], 'Zh' => ['Ж'],
' ' => ["\xC2\xA0", "\xE2\x80\x80", "\xE2\x80\x81", ' ' => ["\xC2\xA0", "\xE2\x80\x80", "\xE2\x80\x81",
"\xE2\x80\x82", "\xE2\x80\x83", "\xE2\x80\x84", "\xE2\x80\x82", "\xE2\x80\x83", "\xE2\x80\x84",
"\xE2\x80\x85", "\xE2\x80\x86", "\xE2\x80\x87", "\xE2\x80\x85", "\xE2\x80\x86", "\xE2\x80\x87",
@@ -1844,39 +1847,37 @@ class Stringy implements Countable, IteratorAggregate, ArrayAccess
} }
/** /**
* Returns the locale-specific replacements for the toAscii() method. * Returns language-specific replacements for the toAscii() method.
* For example, German will map 'ä' to 'ae', while other languages
* will simply return 'a'.
* *
* @param string $locale Locale of the source string * param string $language Language of the source string
* @return array An array of replacements. * @return array An array of replacements.
*/ */
protected function localeSpecificCharsArray($locale = 'en') protected static function langSpecificCharsArray($language = 'en')
{ {
$split = preg_split('/[-_]/', $locale); $split = preg_split('/[-_]/', $language);
$locale = strtolower($split[0]); $language = strtolower($split[0]);
static $charsArray = []; static $charsArray = [];
if (isset($charsArray[$locale])) { if (isset($charsArray[$language])) {
return $charsArray[$locale]; return $charsArray[$language];
} }
$localeSpecific = [ $languageSpecific = [
'de' => [ 'de' => [
'ae' => ['ä'], [', 'ö', 'ü', 'Ä', 'Ö', ' ],
'oe' => ['], ['ae', 'oe', 'ue', 'AE', 'OE', 'UE'],
'ue' => ['ü'],
'AE' => ['Ä'],
'OE' => ['Ö'],
'UE' => ['Ü']
] ]
]; ];
if (isset($localeSpecific[$locale])) { if (isset($languageSpecific[$language])) {
$charsArray[$locale] = $localeSpecific[$locale]; $charsArray[$language] = $languageSpecific[$language];
} else { } else {
$charsArray[$locale] = []; $charsArray[$language] = [];
} }
return $charsArray[$locale]; return $charsArray[$language];
} }
/** /**

View File

@@ -653,11 +653,11 @@ class StringyTestCase extends PHPUnit_Framework_TestCase
/** /**
* @dataProvider toAsciiProvider() * @dataProvider toAsciiProvider()
*/ */
public function testToAscii($expected, $str, $locale = 'en', public function testToAscii($expected, $str, $language = 'en',
$removeUnsupported = true) $removeUnsupported = true)
{ {
$stringy = S::create($str); $stringy = S::create($str);
$result = $stringy->toAscii($locale, $removeUnsupported); $result = $stringy->toAscii($language, $removeUnsupported);
$this->assertStringy($result); $this->assertStringy($result);
$this->assertEquals($expected, $result); $this->assertEquals($expected, $result);
$this->assertEquals($str, $stringy); $this->assertEquals($str, $stringy);