1
0
mirror of https://github.com/phpbb/phpbb.git synced 2025-08-01 14:30:32 +02:00

Added: finished UTF normalization conformance tests

Added: now generate_utf_files.php also generates the files needed by the search indexer


git-svn-id: file:///svn/phpbb/trunk@6162 89ea8834-ac86-4346-8a33-228a782c2dd0
This commit is contained in:
Ludovic Arnaud
2006-07-10 01:40:59 +00:00
parent 46af817cb0
commit 8c5b957228
2 changed files with 250 additions and 52 deletions

View File

@@ -72,6 +72,7 @@ require_once($phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx);
$i = $n = 0;
$failed = FALSE;
$tested_chars = array();
$fp = fopen($phpbb_root_path . 'develop/NormalizationTest.txt', 'rb');
while (!feof($fp))
@@ -103,6 +104,16 @@ while (!feof($fp))
list($c1, $c2, $c3, $c4, $c5) = explode(';', $line);
if (!strpos($c1, ' '))
{
/**
* We are currently testing a single character, we add it to the list of
* characters we have processed so that we can exclude it when testing
* for invariants
*/
$tested_chars[$c1] = 1;
}
foreach ($test_suite as $form => $serie)
{
foreach ($serie as $expected => $tests)
@@ -119,17 +130,81 @@ while (!feof($fp))
$failed = TRUE;
$hex_result = utf_to_hexseq($utf_result);
echo "FAILED $expected == $form($test) ($hex_expected != $hex_result)\n";
echo "\nFAILED $expected == $form($test) ($hex_expected != $hex_result)";
}
}
}
if ($failed)
{
die("\nFailed at line $n\n");
die("\n\nFailed at line $n\n");
}
}
}
fclose($fp);
/**
* Test for invariants
*/
echo "\n\nTesting for invariants...\n\n";
$fp = fopen($phpbb_root_path . 'develop/UnicodeData.txt', 'rt');
$n = 0;
while (!feof($fp))
{
if (++$n % 100 == 0)
{
echo $n, ' ';
}
$line = fgets($fp, 1024);
if (!$pos = strpos($line, ';'))
{
continue;
}
$hex_tested = $hex_expected = substr($line, 0, $pos);
if (isset($tested_chars[$hex_tested]))
{
continue;
}
$utf_expected = hex_to_utf($hex_expected);
if ($utf_expected >= UTF8_SURROGATE_FIRST
&& $utf_expected <= UTF8_SURROGATE_LAST)
{
/**
* Surrogates are illegal on their own, we expect the normalizer
* to return a replacement char
*/
$utf_expected = UTF8_REPLACEMENT;
$hex_expected = utf_to_hexseq($utf_expected);
}
foreach (array('nfc', 'nfkc', 'nfd', 'nfkd') as $form)
{
$utf_result = utf_normalizer::$form($utf_expected);
$hex_result = utf_to_hexseq($utf_result);
// echo "$form($utf_expected) == $utf_result\n";
if (strcmp($utf_expected, $utf_result))
{
$failed = 1;
echo "\nFAILED $hex_expected == $form($hex_tested) ($hex_expected != $hex_result)";
}
}
if ($failed)
{
die("\n\nFailed at line $n\n");
}
}
fclose($fp);
die("\n\nALL TESTS PASSED SUCCESSFULLY\n");