mirror of
https://github.com/moodle/moodle.git
synced 2025-01-31 04:33:13 +01:00
Added entities_to_utf8() function to textlib, suitable to convert
any entity (numerical and html) to utf-8, providing a PHP4 and PH5 way to do the work. Also, added the code2utf8() function to convert Unicode code numbers to UTF-8 chars. Merged from MOODLE_18_STABLE
This commit is contained in:
parent
bf9ae76c36
commit
4e2cb0e3f3
@ -299,6 +299,43 @@ class textlib {
|
||||
return $encoded;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts all the numeric entities &#nnnn; or &#xnnn; to UTF-8
|
||||
* Original from laurynas dot butkus at gmail at:
|
||||
* http://php.net/manual/en/function.html-entity-decode.php#75153
|
||||
* with some custom mods to provide more functionality
|
||||
*
|
||||
* @param string $str input string
|
||||
* @param boolean $htmlent convert also html entities (defaults to true)
|
||||
*
|
||||
* NOTE: we could have used typo3 entities_to_utf8() here
|
||||
* but the direct alternative used runs 400% quicker
|
||||
* and uses 0.5Mb less memory, so, let's use it
|
||||
* (tested agains 10^6 conversions)
|
||||
*/
|
||||
function entities_to_utf8($str, $htmlent=true) {
|
||||
|
||||
static $trans_tbl; /// Going to use static translit table
|
||||
|
||||
/// Replace numeric entities
|
||||
$result = preg_replace('~&#x([0-9a-f]+);~ei', 'textlib::code2utf8(hexdec("\\1"))', $str);
|
||||
$result = preg_replace('~&#([0-9]+);~e', 'textlib::code2utf8(\\1)', $result);
|
||||
|
||||
/// Replace literal entities (if desired)
|
||||
if ($htmlent) {
|
||||
/// Generate/create $trans_tbl
|
||||
if (!isset($trans_tbl)) {
|
||||
$trans_tbl = array();
|
||||
foreach (get_html_translation_table(HTML_ENTITIES) as $val=>$key) {
|
||||
$trans_tbl[$key] = utf8_encode($val);
|
||||
}
|
||||
}
|
||||
$result = strtr($result, $trans_tbl);
|
||||
}
|
||||
/// Return utf8-ised string
|
||||
return $result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts all Unicode chars > 127 to numeric entities &#nnnn; or &#xnnn;.
|
||||
*
|
||||
@ -353,5 +390,28 @@ class textlib {
|
||||
}
|
||||
return $encodings;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the utf8 string corresponding to the unicode value
|
||||
* (from php.net, courtesy - romans@void.lv)
|
||||
*
|
||||
* @param int $num one unicode value
|
||||
* @return string the UTF-8 char corresponding to the unicode value
|
||||
*/
|
||||
function code2utf8($num) {
|
||||
if ($num < 128) {
|
||||
return chr($num);
|
||||
}
|
||||
if ($num < 2048) {
|
||||
return chr(($num >> 6) + 192) . chr(($num & 63) + 128);
|
||||
}
|
||||
if ($num < 65536) {
|
||||
return chr(($num >> 12) + 224) . chr((($num >> 6) & 63) + 128) . chr(($num & 63) + 128);
|
||||
}
|
||||
if ($num < 2097152) {
|
||||
return chr(($num >> 18) + 240) . chr((($num >> 12) & 63) + 128) . chr((($num >> 6) & 63) + 128) . chr(($num & 63) + 128);
|
||||
}
|
||||
return '';
|
||||
}
|
||||
}
|
||||
?>
|
||||
|
Loading…
x
Reference in New Issue
Block a user