Added entities_to_utf8() function to textlib, suitable to convert

any entity (numerical and html) to utf-8, providing a PHP4 and PH5
way to do the work.
Also, added the code2utf8() function to convert Unicode code numbers
to UTF-8 chars.

Merged from MOODLE_18_STABLE
This commit is contained in:
stronk7 2007-06-13 19:43:56 +00:00
parent bf9ae76c36
commit 4e2cb0e3f3

View File

@ -299,6 +299,43 @@ class textlib {
return $encoded;
}
/**
* Converts all the numeric entities &#nnnn; or &#xnnn; to UTF-8
* Original from laurynas dot butkus at gmail at:
* http://php.net/manual/en/function.html-entity-decode.php#75153
* with some custom mods to provide more functionality
*
* @param string $str input string
* @param boolean $htmlent convert also html entities (defaults to true)
*
* NOTE: we could have used typo3 entities_to_utf8() here
* but the direct alternative used runs 400% quicker
* and uses 0.5Mb less memory, so, let's use it
* (tested agains 10^6 conversions)
*/
function entities_to_utf8($str, $htmlent=true) {
static $trans_tbl; /// Going to use static translit table
/// Replace numeric entities
$result = preg_replace('~&#x([0-9a-f]+);~ei', 'textlib::code2utf8(hexdec("\\1"))', $str);
$result = preg_replace('~&#([0-9]+);~e', 'textlib::code2utf8(\\1)', $result);
/// Replace literal entities (if desired)
if ($htmlent) {
/// Generate/create $trans_tbl
if (!isset($trans_tbl)) {
$trans_tbl = array();
foreach (get_html_translation_table(HTML_ENTITIES) as $val=>$key) {
$trans_tbl[$key] = utf8_encode($val);
}
}
$result = strtr($result, $trans_tbl);
}
/// Return utf8-ised string
return $result;
}
/**
* Converts all Unicode chars > 127 to numeric entities &#nnnn; or &#xnnn;.
*
@ -353,5 +390,28 @@ class textlib {
}
return $encodings;
}
/**
* Returns the utf8 string corresponding to the unicode value
* (from php.net, courtesy - romans@void.lv)
*
* @param int $num one unicode value
* @return string the UTF-8 char corresponding to the unicode value
*/
function code2utf8($num) {
if ($num < 128) {
return chr($num);
}
if ($num < 2048) {
return chr(($num >> 6) + 192) . chr(($num & 63) + 128);
}
if ($num < 65536) {
return chr(($num >> 12) + 224) . chr((($num >> 6) & 63) + 128) . chr(($num & 63) + 128);
}
if ($num < 2097152) {
return chr(($num >> 18) + 240) . chr((($num >> 12) & 63) + 128) . chr((($num >> 6) & 63) + 128) . chr(($num & 63) + 128);
}
return '';
}
}
?>