mirror of
https://github.com/phpbb/phpbb.git
synced 2025-06-03 13:05:44 +02:00
Case folding! :D
git-svn-id: file:///svn/phpbb/trunk@6464 89ea8834-ac86-4346-8a33-228a782c2dd0
This commit is contained in:
parent
6972d28633
commit
722ab535a6
147
phpBB/develop/generate_utf_casefold.php
Normal file
147
phpBB/develop/generate_utf_casefold.php
Normal file
@ -0,0 +1,147 @@
|
||||
<?php
|
||||
/**
|
||||
*
|
||||
* @package phpBB3
|
||||
* @version $Id$
|
||||
* @copyright (c) 2005 phpBB Group
|
||||
* @license http://opensource.org/licenses/gpl-license.php GNU Public License
|
||||
*
|
||||
*/
|
||||
|
||||
if (php_sapi_name() != 'cli')
|
||||
{
|
||||
// die("This program must be run from the command line.\n");
|
||||
}
|
||||
|
||||
set_time_limit(0);
|
||||
|
||||
define('IN_PHPBB', true);
|
||||
$phpbb_root_path = '../';
|
||||
$phpEx = substr(strrchr(__FILE__, '.'), 1);
|
||||
|
||||
echo "Checking for required files\n";
|
||||
download('http://unicode.org/Public/UNIDATA/CaseFolding.txt');
|
||||
echo "\n";
|
||||
|
||||
|
||||
/**
|
||||
* Load the CaseFolding table
|
||||
*/
|
||||
echo "Loading CaseFolding\n";
|
||||
$unidata = file_get_contents('CaseFolding.txt');
|
||||
|
||||
|
||||
function utf8_chr($cp)
|
||||
{
|
||||
if ($cp > 0xFFFF)
|
||||
{
|
||||
return chr(0xF0 | ($cp >> 18)) . chr(0x80 | (($cp >> 12) & 0x3F)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
|
||||
}
|
||||
else if ($cp > 0x7FF)
|
||||
{
|
||||
return chr(0xE0 | ($cp >> 12)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
|
||||
}
|
||||
else if ($cp > 0x7F)
|
||||
{
|
||||
return chr(0xC0 | ($cp >> 6)) . chr(0x80 | ($cp & 0x3F));
|
||||
}
|
||||
else
|
||||
{
|
||||
return chr($cp);
|
||||
}
|
||||
}
|
||||
|
||||
preg_match_all('/^([0-9A-F]+); ([CFS]); ([0-9A-F]+(?: [0-9A-F]+)*);/im', $unidata, $array, PREG_SET_ORDER);
|
||||
|
||||
$uniarray = array();
|
||||
|
||||
foreach ($array as $value)
|
||||
{
|
||||
$uniarray[$value[2]][utf8_chr(hexdec((string)$value[1]))] = implode(array_map('utf8_chr', array_map('hexdec', explode(' ', $value[3]))));
|
||||
}
|
||||
|
||||
foreach ($uniarray as $idx => $contents)
|
||||
{
|
||||
echo "Writing to case_fold_$idx.$phpEx\n";
|
||||
$fp = fopen($phpbb_root_path . 'includes/utf/data/case_fold_' . $idx . '.' . $phpEx, 'wb');
|
||||
fwrite($fp, '<?php return ' . my_var_export($contents) . ';');
|
||||
fclose($fp);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a parsable string representation of a variable
|
||||
*
|
||||
* This is function is limited to array/strings/integers
|
||||
*
|
||||
* @param mixed $var Variable
|
||||
* @return string PHP code representing the variable
|
||||
*/
|
||||
function my_var_export($var)
|
||||
{
|
||||
if (is_array($var))
|
||||
{
|
||||
$lines = array();
|
||||
|
||||
foreach ($var as $k => $v)
|
||||
{
|
||||
$lines[] = my_var_export($k) . '=>' . my_var_export($v);
|
||||
}
|
||||
|
||||
return 'array(' . implode(',', $lines) . ')';
|
||||
}
|
||||
elseif (is_string($var))
|
||||
{
|
||||
return "'" . str_replace(array('\\', "'"), array('\\\\', "\\'"), $var) . "'";
|
||||
}
|
||||
else
|
||||
{
|
||||
return $var;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Download a file to the develop/ dir
|
||||
*
|
||||
* @param string $url URL of the file to download
|
||||
* @return void
|
||||
*/
|
||||
function download($url)
|
||||
{
|
||||
global $phpbb_root_path;
|
||||
|
||||
if (file_exists($phpbb_root_path . 'develop/' . basename($url)))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
echo 'Downloading from ', $url, ' ';
|
||||
|
||||
if (!$fpr = fopen($url, 'rb'))
|
||||
{
|
||||
die("Can't download from $url\nPlease download it yourself and put it in the develop/ dir, kthxbai");
|
||||
}
|
||||
|
||||
if (!$fpw = fopen($phpbb_root_path . 'develop/' . basename($url), 'wb'))
|
||||
{
|
||||
die("Can't open develop/" . basename($url) . " for output... please check your permissions or something");
|
||||
}
|
||||
|
||||
$i = 0;
|
||||
$chunk = 32768;
|
||||
$done = '';
|
||||
|
||||
while (!feof($fpr))
|
||||
{
|
||||
$i += fwrite($fpw, fread($fpr, $chunk));
|
||||
echo str_repeat("\x08", strlen($done));
|
||||
|
||||
$done = ($i >> 10) . ' KiB';
|
||||
echo $done;
|
||||
}
|
||||
fclose($fpr);
|
||||
fclose($fpw);
|
||||
|
||||
echo "\n";
|
||||
}
|
||||
|
||||
?>
|
1
phpBB/includes/utf/data/case_fold_C.php
Normal file
1
phpBB/includes/utf/data/case_fold_C.php
Normal file
File diff suppressed because one or more lines are too long
1
phpBB/includes/utf/data/case_fold_F.php
Normal file
1
phpBB/includes/utf/data/case_fold_F.php
Normal file
@ -0,0 +1 @@
|
||||
<?php return array('ß'=>'ss','İ'=>'i̇','ʼn'=>'ʼn','ǰ'=>'ǰ','ΐ'=>'ΐ','ΰ'=>'ΰ','և'=>'եւ','ẖ'=>'ẖ','ẗ'=>'ẗ','ẘ'=>'ẘ','ẙ'=>'ẙ','ẚ'=>'aʾ','ὐ'=>'ὐ','ὒ'=>'ὒ','ὔ'=>'ὔ','ὖ'=>'ὖ','ᾀ'=>'ἀι','ᾁ'=>'ἁι','ᾂ'=>'ἂι','ᾃ'=>'ἃι','ᾄ'=>'ἄι','ᾅ'=>'ἅι','ᾆ'=>'ἆι','ᾇ'=>'ἇι','ᾈ'=>'ἀι','ᾉ'=>'ἁι','ᾊ'=>'ἂι','ᾋ'=>'ἃι','ᾌ'=>'ἄι','ᾍ'=>'ἅι','ᾎ'=>'ἆι','ᾏ'=>'ἇι','ᾐ'=>'ἠι','ᾑ'=>'ἡι','ᾒ'=>'ἢι','ᾓ'=>'ἣι','ᾔ'=>'ἤι','ᾕ'=>'ἥι','ᾖ'=>'ἦι','ᾗ'=>'ἧι','ᾘ'=>'ἠι','ᾙ'=>'ἡι','ᾚ'=>'ἢι','ᾛ'=>'ἣι','ᾜ'=>'ἤι','ᾝ'=>'ἥι','ᾞ'=>'ἦι','ᾟ'=>'ἧι','ᾠ'=>'ὠι','ᾡ'=>'ὡι','ᾢ'=>'ὢι','ᾣ'=>'ὣι','ᾤ'=>'ὤι','ᾥ'=>'ὥι','ᾦ'=>'ὦι','ᾧ'=>'ὧι','ᾨ'=>'ὠι','ᾩ'=>'ὡι','ᾪ'=>'ὢι','ᾫ'=>'ὣι','ᾬ'=>'ὤι','ᾭ'=>'ὥι','ᾮ'=>'ὦι','ᾯ'=>'ὧι','ᾲ'=>'ὰι','ᾳ'=>'αι','ᾴ'=>'άι','ᾶ'=>'ᾶ','ᾷ'=>'ᾶι','ᾼ'=>'αι','ῂ'=>'ὴι','ῃ'=>'ηι','ῄ'=>'ήι','ῆ'=>'ῆ','ῇ'=>'ῆι','ῌ'=>'ηι','ῒ'=>'ῒ','ΐ'=>'ΐ','ῖ'=>'ῖ','ῗ'=>'ῗ','ῢ'=>'ῢ','ΰ'=>'ΰ','ῤ'=>'ῤ','ῦ'=>'ῦ','ῧ'=>'ῧ','ῲ'=>'ὼι','ῳ'=>'ωι','ῴ'=>'ώι','ῶ'=>'ῶ','ῷ'=>'ῶι','ῼ'=>'ωι','ff'=>'ff','fi'=>'fi','fl'=>'fl','ffi'=>'ffi','ffl'=>'ffl','ſt'=>'st','st'=>'st','ﬓ'=>'մն','ﬔ'=>'մե','ﬕ'=>'մի','ﬖ'=>'վն','ﬗ'=>'մխ');
|
1
phpBB/includes/utf/data/case_fold_S.php
Normal file
1
phpBB/includes/utf/data/case_fold_S.php
Normal file
@ -0,0 +1 @@
|
||||
<?php return array('ᾈ'=>'ᾀ','ᾉ'=>'ᾁ','ᾊ'=>'ᾂ','ᾋ'=>'ᾃ','ᾌ'=>'ᾄ','ᾍ'=>'ᾅ','ᾎ'=>'ᾆ','ᾏ'=>'ᾇ','ᾘ'=>'ᾐ','ᾙ'=>'ᾑ','ᾚ'=>'ᾒ','ᾛ'=>'ᾓ','ᾜ'=>'ᾔ','ᾝ'=>'ᾕ','ᾞ'=>'ᾖ','ᾟ'=>'ᾗ','ᾨ'=>'ᾠ','ᾩ'=>'ᾡ','ᾪ'=>'ᾢ','ᾫ'=>'ᾣ','ᾬ'=>'ᾤ','ᾭ'=>'ᾥ','ᾮ'=>'ᾦ','ᾯ'=>'ᾧ','ᾼ'=>'ᾳ','ῌ'=>'ῃ','ῼ'=>'ῳ');
|
@ -930,4 +930,48 @@ function utf8_from_unicode($array)
|
||||
return $str;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Takes an array of ints representing the Unicode characters and returns
|
||||
* a UTF-8 string.
|
||||
*
|
||||
* @param string $text text to be case folded
|
||||
* @param string $option determines how we will fold the cases
|
||||
* @return string case folded text
|
||||
*/
|
||||
function utf8_case_fold($text, $option = 'full')
|
||||
{
|
||||
static $uniarray = array();
|
||||
global $phpbb_root_path, $phpEx;
|
||||
|
||||
// common is always set
|
||||
if (!isset($uniarray['C']))
|
||||
{
|
||||
$uniarray['C'] = include($phpbb_root_path . 'includes/utf/data/case_fold_C.' . $phpEx);
|
||||
}
|
||||
|
||||
// only set full if we need to
|
||||
if ($option === 'full' && !isset($uniarray['F']))
|
||||
{
|
||||
$uniarray['F'] = include($phpbb_root_path . 'includes/utf/data/case_fold_F.' . $phpEx);
|
||||
}
|
||||
|
||||
// only set simple if we need to
|
||||
if ($option !== 'full' && !isset($uniarray['S']))
|
||||
{
|
||||
$uniarray['S'] = include($phpbb_root_path . 'includes/utf/data/case_fold_S.' . $phpEx);
|
||||
}
|
||||
|
||||
$text = strtr($text, $uniarray['C']);
|
||||
if ($option === 'full')
|
||||
{
|
||||
$text = strtr($text, $uniarray['F']);
|
||||
}
|
||||
else
|
||||
{
|
||||
$text = strtr($text, $uniarray['S']);
|
||||
}
|
||||
return $text;
|
||||
}
|
||||
|
||||
?>
|
Loading…
x
Reference in New Issue
Block a user