1
0
mirror of https://github.com/phpbb/phpbb.git synced 2025-06-03 13:05:44 +02:00

Case folding! :D

git-svn-id: file:///svn/phpbb/trunk@6464 89ea8834-ac86-4346-8a33-228a782c2dd0
This commit is contained in:
David M 2006-10-07 22:51:55 +00:00
parent 6972d28633
commit 722ab535a6
5 changed files with 194 additions and 0 deletions

View File

@ -0,0 +1,147 @@
<?php
/**
*
* @package phpBB3
* @version $Id$
* @copyright (c) 2005 phpBB Group
* @license http://opensource.org/licenses/gpl-license.php GNU Public License
*
*/
if (php_sapi_name() != 'cli')
{
// die("This program must be run from the command line.\n");
}
set_time_limit(0);
define('IN_PHPBB', true);
$phpbb_root_path = '../';
$phpEx = substr(strrchr(__FILE__, '.'), 1);
echo "Checking for required files\n";
download('http://unicode.org/Public/UNIDATA/CaseFolding.txt');
echo "\n";
/**
* Load the CaseFolding table
*/
echo "Loading CaseFolding\n";
$unidata = file_get_contents('CaseFolding.txt');
function utf8_chr($cp)
{
if ($cp > 0xFFFF)
{
return chr(0xF0 | ($cp >> 18)) . chr(0x80 | (($cp >> 12) & 0x3F)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
}
else if ($cp > 0x7FF)
{
return chr(0xE0 | ($cp >> 12)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
}
else if ($cp > 0x7F)
{
return chr(0xC0 | ($cp >> 6)) . chr(0x80 | ($cp & 0x3F));
}
else
{
return chr($cp);
}
}
preg_match_all('/^([0-9A-F]+); ([CFS]); ([0-9A-F]+(?: [0-9A-F]+)*);/im', $unidata, $array, PREG_SET_ORDER);
$uniarray = array();
foreach ($array as $value)
{
$uniarray[$value[2]][utf8_chr(hexdec((string)$value[1]))] = implode(array_map('utf8_chr', array_map('hexdec', explode(' ', $value[3]))));
}
foreach ($uniarray as $idx => $contents)
{
echo "Writing to case_fold_$idx.$phpEx\n";
$fp = fopen($phpbb_root_path . 'includes/utf/data/case_fold_' . $idx . '.' . $phpEx, 'wb');
fwrite($fp, '<?php return ' . my_var_export($contents) . ';');
fclose($fp);
}
/**
* Return a parsable string representation of a variable
*
* This is function is limited to array/strings/integers
*
* @param mixed $var Variable
* @return string PHP code representing the variable
*/
function my_var_export($var)
{
if (is_array($var))
{
$lines = array();
foreach ($var as $k => $v)
{
$lines[] = my_var_export($k) . '=>' . my_var_export($v);
}
return 'array(' . implode(',', $lines) . ')';
}
elseif (is_string($var))
{
return "'" . str_replace(array('\\', "'"), array('\\\\', "\\'"), $var) . "'";
}
else
{
return $var;
}
}
/**
* Download a file to the develop/ dir
*
* @param string $url URL of the file to download
* @return void
*/
function download($url)
{
global $phpbb_root_path;
if (file_exists($phpbb_root_path . 'develop/' . basename($url)))
{
return;
}
echo 'Downloading from ', $url, ' ';
if (!$fpr = fopen($url, 'rb'))
{
die("Can't download from $url\nPlease download it yourself and put it in the develop/ dir, kthxbai");
}
if (!$fpw = fopen($phpbb_root_path . 'develop/' . basename($url), 'wb'))
{
die("Can't open develop/" . basename($url) . " for output... please check your permissions or something");
}
$i = 0;
$chunk = 32768;
$done = '';
while (!feof($fpr))
{
$i += fwrite($fpw, fread($fpr, $chunk));
echo str_repeat("\x08", strlen($done));
$done = ($i >> 10) . ' KiB';
echo $done;
}
fclose($fpr);
fclose($fpw);
echo "\n";
}
?>

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1 @@
<?php return array('ß'=>'ss','İ'=>'i̇','ʼn'=>'ʼn','ǰ'=>'ǰ','ΐ'=>'ΐ','ΰ'=>'ΰ','և'=>'եւ','ẖ'=>'ẖ','ẗ'=>'ẗ','ẘ'=>'ẘ','ẙ'=>'ẙ','ẚ'=>'aʾ','ὐ'=>'ὐ','ὒ'=>'ὒ','ὔ'=>'ὔ','ὖ'=>'ὖ','ᾀ'=>'ἀι','ᾁ'=>'ἁι','ᾂ'=>'ἂι','ᾃ'=>'ἃι','ᾄ'=>'ἄι','ᾅ'=>'ἅι','ᾆ'=>'ἆι','ᾇ'=>'ἇι','ᾈ'=>'ἀι','ᾉ'=>'ἁι','ᾊ'=>'ἂι','ᾋ'=>'ἃι','ᾌ'=>'ἄι','ᾍ'=>'ἅι','ᾎ'=>'ἆι','ᾏ'=>'ἇι','ᾐ'=>'ἠι','ᾑ'=>'ἡι','ᾒ'=>'ἢι','ᾓ'=>'ἣι','ᾔ'=>'ἤι','ᾕ'=>'ἥι','ᾖ'=>'ἦι','ᾗ'=>'ἧι','ᾘ'=>'ἠι','ᾙ'=>'ἡι','ᾚ'=>'ἢι','ᾛ'=>'ἣι','ᾜ'=>'ἤι','ᾝ'=>'ἥι','ᾞ'=>'ἦι','ᾟ'=>'ἧι','ᾠ'=>'ὠι','ᾡ'=>'ὡι','ᾢ'=>'ὢι','ᾣ'=>'ὣι','ᾤ'=>'ὤι','ᾥ'=>'ὥι','ᾦ'=>'ὦι','ᾧ'=>'ὧι','ᾨ'=>'ὠι','ᾩ'=>'ὡι','ᾪ'=>'ὢι','ᾫ'=>'ὣι','ᾬ'=>'ὤι','ᾭ'=>'ὥι','ᾮ'=>'ὦι','ᾯ'=>'ὧι','ᾲ'=>'ὰι','ᾳ'=>'αι','ᾴ'=>'άι','ᾶ'=>'ᾶ','ᾷ'=>'ᾶι','ᾼ'=>'αι','ῂ'=>'ὴι','ῃ'=>'ηι','ῄ'=>'ήι','ῆ'=>'ῆ','ῇ'=>'ῆι','ῌ'=>'ηι','ῒ'=>'ῒ','ΐ'=>'ΐ','ῖ'=>'ῖ','ῗ'=>'ῗ','ῢ'=>'ῢ','ΰ'=>'ΰ','ῤ'=>'ῤ','ῦ'=>'ῦ','ῧ'=>'ῧ','ῲ'=>'ὼι','ῳ'=>'ωι','ῴ'=>'ώι','ῶ'=>'ῶ','ῷ'=>'ῶι','ῼ'=>'ωι','ff'=>'ff','fi'=>'fi','fl'=>'fl','ffi'=>'ffi','ffl'=>'ffl','ſt'=>'st','st'=>'st','ﬓ'=>'մն','ﬔ'=>'մե','ﬕ'=>'մի','ﬖ'=>'վն','ﬗ'=>'մխ');

View File

@ -0,0 +1 @@
<?php return array('ᾈ'=>'ᾀ','ᾉ'=>'ᾁ','ᾊ'=>'ᾂ','ᾋ'=>'ᾃ','ᾌ'=>'ᾄ','ᾍ'=>'ᾅ','ᾎ'=>'ᾆ','ᾏ'=>'ᾇ','ᾘ'=>'ᾐ','ᾙ'=>'ᾑ','ᾚ'=>'ᾒ','ᾛ'=>'ᾓ','ᾜ'=>'ᾔ','ᾝ'=>'ᾕ','ᾞ'=>'ᾖ','ᾟ'=>'ᾗ','ᾨ'=>'ᾠ','ᾩ'=>'ᾡ','ᾪ'=>'ᾢ','ᾫ'=>'ᾣ','ᾬ'=>'ᾤ','ᾭ'=>'ᾥ','ᾮ'=>'ᾦ','ᾯ'=>'ᾧ','ᾼ'=>'ᾳ','ῌ'=>'ῃ','ῼ'=>'ῳ');

View File

@ -930,4 +930,48 @@ function utf8_from_unicode($array)
return $str;
}
/**
* Takes an array of ints representing the Unicode characters and returns
* a UTF-8 string.
*
* @param string $text text to be case folded
* @param string $option determines how we will fold the cases
* @return string case folded text
*/
function utf8_case_fold($text, $option = 'full')
{
static $uniarray = array();
global $phpbb_root_path, $phpEx;
// common is always set
if (!isset($uniarray['C']))
{
$uniarray['C'] = include($phpbb_root_path . 'includes/utf/data/case_fold_C.' . $phpEx);
}
// only set full if we need to
if ($option === 'full' && !isset($uniarray['F']))
{
$uniarray['F'] = include($phpbb_root_path . 'includes/utf/data/case_fold_F.' . $phpEx);
}
// only set simple if we need to
if ($option !== 'full' && !isset($uniarray['S']))
{
$uniarray['S'] = include($phpbb_root_path . 'includes/utf/data/case_fold_S.' . $phpEx);
}
$text = strtr($text, $uniarray['C']);
if ($option === 'full')
{
$text = strtr($text, $uniarray['F']);
}
else
{
$text = strtr($text, $uniarray['S']);
}
return $text;
}
?>