Merge branch 'w33_MDL-29027_m22_textlib' of git://github.com/skodak/moodle

This commit is contained in:
Eloy Lafuente (stronk7) 2011-08-24 18:49:05 +02:00
commit 4f7c38660a
2 changed files with 515 additions and 187 deletions

View File

@ -1,5 +1,4 @@
<?php
// This file is part of Moodle - http://moodle.org/
//
// Moodle is free software: you can redistribute it and/or modify
@ -39,12 +38,246 @@ class textlib_test extends UnitTestCase {
public static $includecoverage = array('lib/textlib.class.php');
public function test_parse_charset() {
$this->assertIdentical(textlib::parse_charset('Cp1250'), 'windows-1250');
// does typo3 work? some encoding moodle does not use
$this->assertIdentical(textlib::parse_charset('ms-ansi'), 'windows-1252');
}
public function test_convert() {
$utf8 = "Žluťoučký koníček";
$iso2 = pack("H*", "ae6c75bb6f75e86bfd206b6f6eede8656b");
$win = pack("H*", "8e6c759d6f75e86bfd206b6f6eede8656b");
$this->assertIdentical(textlib::convert($utf8, 'utf-8', 'iso-8859-2'), $iso2);
$this->assertIdentical(textlib::convert($iso2, 'iso-8859-2', 'utf-8'), $utf8);
$this->assertIdentical(textlib::convert($utf8, 'utf-8', 'win-1250'), $win);
$this->assertIdentical(textlib::convert($win, 'win-1250', 'utf-8'), $utf8);
$this->assertIdentical(textlib::convert($win, 'win-1250', 'iso-8859-2'), $iso2);
$this->assertIdentical(textlib::convert($iso2, 'iso-8859-2', 'win-1250'), $win);
$this->assertIdentical(textlib::convert($iso2, 'iso-8859-2', 'iso-8859-2'), $iso2);
$this->assertIdentical(textlib::convert($win, 'win-1250', 'cp1250'), $win);
$utf8 = '言語設定';
$str = pack("H*", "b8c0b8ecc0dfc4ea"); //EUC-JP
$this->assertIdentical(textlib::convert($utf8, 'utf-8', 'EUC-JP'), $str);
$this->assertIdentical(textlib::convert($str, 'EUC-JP', 'utf-8'), $utf8);
$str = pack("H*", "1b24423840386c405f446a1b2842"); //ISO-2022-JP
$this->assertIdentical(textlib::convert($utf8, 'utf-8', 'ISO-2022-JP'), $str);
$this->assertIdentical(textlib::convert($str, 'ISO-2022-JP', 'utf-8'), $utf8);
$str = pack("H*", "8cbe8cea90dd92e8"); //SHIFT-JIS
$this->assertIdentical(textlib::convert($utf8, 'utf-8', 'SHIFT-JIS'), $str);
$this->assertIdentical(textlib::convert($str, 'SHIFT-JIS', 'utf-8'), $utf8);
$utf8 = '简体中文';
$str = pack("H*", "bcf2cce5d6d0cec4"); //GB2312
$this->assertIdentical(textlib::convert($utf8, 'utf-8', 'GB2312'), $str);
$this->assertIdentical(textlib::convert($str, 'GB2312', 'utf-8'), $utf8);
$str = pack("H*", "bcf2cce5d6d0cec4"); //GB18030
$this->assertIdentical(textlib::convert($utf8, 'utf-8', 'GB18030'), $str);
$this->assertIdentical(textlib::convert($str, 'GB18030', 'utf-8'), $utf8);
}
public function test_substr() {
$str = "Žluťoučký koníček";
$this->assertIdentical(textlib::substr($str, 1, 3), 'luť');
$this->assertIdentical(textlib::substr($str, 0, 100), $str);
$this->assertIdentical(textlib::substr($str, -3, 2), 'če');
$iso2 = pack("H*", "ae6c75bb6f75e86bfd206b6f6eede8656b");
$this->assertIdentical(textlib::substr($iso2, 1, 3, 'iso-8859-2'), textlib::convert('luť', 'utf-8', 'iso-8859-2'));
$this->assertIdentical(textlib::substr($iso2, 0, 100, 'iso-8859-2'), textlib::convert($str, 'utf-8', 'iso-8859-2'));
$this->assertIdentical(textlib::substr($iso2, -3, 2, 'iso-8859-2'), textlib::convert('če', 'utf-8', 'iso-8859-2'));
$win = pack("H*", "8e6c759d6f75e86bfd206b6f6eede8656b");
$this->assertIdentical(textlib::substr($win, 1, 3, 'cp1250'), textlib::convert('luť', 'utf-8', 'cp1250'));
$this->assertIdentical(textlib::substr($win, 0, 100, 'cp1250'), textlib::convert($str, 'utf-8', 'cp1250'));
$this->assertIdentical(textlib::substr($win, -3, 2, 'cp1250'), textlib::convert('če', 'utf-8', 'cp1250'));
$str = pack("H*", "b8c0b8ecc0dfc4ea"); //EUC-JP
$s = pack("H*", "b8ec"); //EUC-JP
$this->assertIdentical(textlib::substr($str, 1, 1, 'EUC-JP'), $s);
$str = pack("H*", "1b24423840386c405f446a1b2842"); //ISO-2022-JP
$s = pack("H*", "1b2442386c1b2842"); //ISO-2022-JP
$this->assertIdentical(textlib::substr($str, 1, 1, 'ISO-2022-JP'), $s);
$str = pack("H*", "8cbe8cea90dd92e8"); //SHIFT-JIS
$s = pack("H*", "8cea"); //SHIFT-JIS
$this->assertIdentical(textlib::substr($str, 1, 1, 'SHIFT-JIS'), $s);
$str = pack("H*", "bcf2cce5d6d0cec4"); //GB2312
$s = pack("H*", "cce5"); //GB2312
$this->assertIdentical(textlib::substr($str, 1, 1, 'GB2312'), $s);
$str = pack("H*", "bcf2cce5d6d0cec4"); //GB18030
$s = pack("H*", "cce5"); //GB18030
$this->assertIdentical(textlib::substr($str, 1, 1, 'GB18030'), $s);
}
public function test_strlen() {
$str = "Žluťoučký koníček";
$this->assertIdentical(textlib::strlen($str), 17);
$iso2 = pack("H*", "ae6c75bb6f75e86bfd206b6f6eede8656b");
$this->assertIdentical(textlib::strlen($iso2, 'iso-8859-2'), 17);
$win = pack("H*", "8e6c759d6f75e86bfd206b6f6eede8656b");
$this->assertIdentical(textlib::strlen($win, 'cp1250'), 17);
$str = pack("H*", "b8ec"); //EUC-JP
$this->assertIdentical(textlib::strlen($str, 'EUC-JP'), 1);
$str = pack("H*", "b8c0b8ecc0dfc4ea"); //EUC-JP
$this->assertIdentical(textlib::strlen($str, 'EUC-JP'), 4);
$str = pack("H*", "1b2442386c1b2842"); //ISO-2022-JP
$this->assertIdentical(textlib::strlen($str, 'ISO-2022-JP'), 1);
$str = pack("H*", "1b24423840386c405f446a1b2842"); //ISO-2022-JP
$this->assertIdentical(textlib::strlen($str, 'ISO-2022-JP'), 4);
$str = pack("H*", "8cea"); //SHIFT-JIS
$this->assertIdentical(textlib::strlen($str, 'SHIFT-JIS'), 1);
$str = pack("H*", "8cbe8cea90dd92e8"); //SHIFT-JIS
$this->assertIdentical(textlib::strlen($str, 'SHIFT-JIS'), 4);
$str = pack("H*", "cce5"); //GB2312
$this->assertIdentical(textlib::strlen($str, 'GB2312'), 1);
$str = pack("H*", "bcf2cce5d6d0cec4"); //GB2312
$this->assertIdentical(textlib::strlen($str, 'GB2312'), 4);
$str = pack("H*", "cce5"); //GB18030
$this->assertIdentical(textlib::strlen($str, 'GB18030'), 1);
$str = pack("H*", "bcf2cce5d6d0cec4"); //GB18030
$this->assertIdentical(textlib::strlen($str, 'GB18030'), 4);
}
public function test_strtolower() {
$str = "Žluťoučký koníček";
$low = 'žluťoučký koníček';
$this->assertIdentical(textlib::strtolower($str), $low);
$iso2 = pack("H*", "ae6c75bb6f75e86bfd206b6f6eede8656b");
$this->assertIdentical(textlib::strtolower($iso2, 'iso-8859-2'), textlib::convert($low, 'utf-8', 'iso-8859-2'));
$win = pack("H*", "8e6c759d6f75e86bfd206b6f6eede8656b");
$this->assertIdentical(textlib::strtolower($win, 'cp1250'), textlib::convert($low, 'utf-8', 'cp1250'));
$str = '言語設定';
$this->assertIdentical(textlib::strtolower($str), $str);
$str = '简体中文';
$this->assertIdentical(textlib::strtolower($str), $str);
$str = pack("H*", "1b24423840386c405f446a1b2842"); //ISO-2022-JP
$this->assertIdentical(textlib::strtolower($str, 'ISO-2022-JP'), $str);
$str = pack("H*", "8cbe8cea90dd92e8"); //SHIFT-JIS
$this->assertIdentical(textlib::strtolower($str, 'SHIFT-JIS'), $str);
$str = pack("H*", "bcf2cce5d6d0cec4"); //GB2312
$this->assertIdentical(textlib::strtolower($str, 'GB2312'), $str);
$str = pack("H*", "bcf2cce5d6d0cec4"); //GB18030
$this->assertIdentical(textlib::strtolower($str, 'GB18030'), $str);
}
public function test_strtoupper() {
$str = "Žluťoučký koníček";
$up = 'ŽLUŤOUČKÝ KONÍČEK';
$this->assertIdentical(textlib::strtoupper($str), $up);
$iso2 = pack("H*", "ae6c75bb6f75e86bfd206b6f6eede8656b");
$this->assertIdentical(textlib::strtoupper($iso2, 'iso-8859-2'), textlib::convert($up, 'utf-8', 'iso-8859-2'));
$win = pack("H*", "8e6c759d6f75e86bfd206b6f6eede8656b");
$this->assertIdentical(textlib::strtoupper($win, 'cp1250'), textlib::convert($up, 'utf-8', 'cp1250'));
$str = '言語設定';
$this->assertIdentical(textlib::strtoupper($str), $str);
$str = '简体中文';
$this->assertIdentical(textlib::strtoupper($str), $str);
$str = pack("H*", "1b24423840386c405f446a1b2842"); //ISO-2022-JP
$this->assertIdentical(textlib::strtoupper($str, 'ISO-2022-JP'), $str);
$str = pack("H*", "8cbe8cea90dd92e8"); //SHIFT-JIS
$this->assertIdentical(textlib::strtoupper($str, 'SHIFT-JIS'), $str);
$str = pack("H*", "bcf2cce5d6d0cec4"); //GB2312
$this->assertIdentical(textlib::strtoupper($str, 'GB2312'), $str);
$str = pack("H*", "bcf2cce5d6d0cec4"); //GB18030
$this->assertIdentical(textlib::strtoupper($str, 'GB18030'), $str);
}
public function test_strpos() {
$str = "Žluťoučký koníček";
$this->assertIdentical(textlib::strpos($str, 'koníč'), 10);
}
public function test_strrpos() {
$str = "Žluťoučký koníček";
$this->assertIdentical(textlib::strrpos($str, 'o'), 11);
}
public function test_specialtoascii() {
$str = "Žluťoučký koníček";
$this->assertIdentical(textlib::specialtoascii($str), 'Zlutoucky konicek');
}
public function test_encode_mimeheader() {
$str = "Žluťoučký koníček";
$this->assertIdentical(textlib::encode_mimeheader($str), '=?utf-8?B?xb1sdcWlb3XEjWvDvSBrb27DrcSNZWs=?=');
}
public function test_entities_to_utf8() {
$str = "&#x17d;lu&#x165;ou&#x10d;k&#xfd; kon&#237;&#269;ek";
$this->assertIdentical(textlib::entities_to_utf8($str), "Žluťoučký koníček");
}
public function test_utf8_to_entities() {
$str = "Žluťoučký koníček";
$this->assertIdentical(textlib::utf8_to_entities($str), "&#x17d;lu&#x165;ou&#x10d;k&#xfd; kon&#xed;&#x10d;ek");
$this->assertIdentical(textlib::utf8_to_entities($str, true), "&#381;lu&#357;ou&#269;k&#253; kon&#237;&#269;ek");
}
public function test_trim_utf8_bom() {
$bom = "\xef\xbb\xbf";
$str = "Žluťoučký koníček";
$this->assertIdentical(textlib::trim_utf8_bom($bom.$str.$bom), $str.$bom);
}
public function test_get_encodings() {
$encodings = textlib::get_encodings();
$this->assertTrue(is_array($encodings));
$this->assertTrue(count($encodings) > 1);
$this->assertTrue(isset($encodings['UTF-8']));
}
public function test_code2utf8() {
$this->assertIdentical(textlib::code2utf8(381), 'Ž');
}
public function test_strtotitle() {
$str = "žluťoučký koníček";
$this->assertIdentical(textlib::strtotitle($str), "Žluťoučký Koníček");
}
public function test_asort() {
global $SESSION;
$SESSION->lang = 'en'; // make sure we test en language to get consistent results, hopefully all systems have this locale
$arr = array('b'=>'ab', 1=>'aa', 0=>'cc');
textlib_get_instance()->asort($arr);
textlib::asort($arr);
$this->assertIdentical(array_keys($arr), array(1, 'b', 0));
$this->assertIdentical(array_values($arr), array('aa', 'ab', 'cc'));
@ -55,10 +288,22 @@ class textlib_test extends UnitTestCase {
}
$arr = array('a'=>'áb', 'b'=>'ab', 1=>'aa', 0=>'cc');
textlib_get_instance()->asort($arr);
textlib::asort($arr);
$this->assertIdentical(array_keys($arr), array(1, 'b', 'a', 0), $error);
unset($SESSION->lang);
}
public function test_deprecated_textlib_get_instance() {
$textlib = textlib_get_instance();
$this->assertIdentical($textlib->substr('abc', 1, 1), 'b');
$this->assertIdentical($textlib->strlen('abc'), 3);
$this->assertIdentical($textlib->strtoupper('Abc'), 'ABC');
$this->assertIdentical($textlib->strtolower('Abc'), 'abc');
$this->assertIdentical($textlib->strpos('abc', 'a'), 0);
$this->assertIdentical($textlib->strpos('abc', 'd'), false);
$this->assertIdentical($textlib->strrpos('abcabc', 'a'), 3);
$this->assertIdentical($textlib->specialtoascii('ábc'), 'abc');
$this->assertIdentical($textlib->strtotitle('abc ABC'), 'Abc Abc');
}
}

View File

@ -1,5 +1,4 @@
<?php
// This file is part of Moodle - http://moodle.org/
//
// Moodle is free software: you can redistribute it and/or modify
@ -25,72 +24,17 @@
defined('MOODLE_INTERNAL') || die();
/**
* As we implement the singleton pattern to use this class (only one instance
* is shared globally), we need this helper function
* Original singleton helper function, please use static methods instead,
* ex: textlib::convert()
*
* IMPORTANT Note: Typo3 libraries always expect lowercase charsets to use 100%
* its capabilities so, don't forget to make the conversion
* from every wrapper function!
*
* @return textlib singleton instance of textlib
* @deprecated
* @return textlib instance
*/
function textlib_get_instance() {
global $CFG;
static $instance = null;
if (!$instance) {
/// initialisation is delayed because we do not want this on each page ;-)
/// Required files
require_once($CFG->libdir.'/typo3/class.t3lib_cs.php');
require_once($CFG->libdir.'/typo3/class.t3lib_div.php');
/// If ICONV is available, lets Typo3 library use it for convert
if (extension_loaded('iconv')) {
$GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_convMethod'] = 'iconv';
/// Else if mbstring is available, lets Typo3 library use it
} else if (extension_loaded('mbstring')) {
$GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_convMethod'] = 'mbstring';
/// Else if recode is available, lets Typo3 library use it
} else if (extension_loaded('recode')) {
$GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_convMethod'] = 'recode';
} else {
$GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_convMethod'] = '';
}
/// If mbstring is available, lets Typo3 library use it for functions
if (extension_loaded('mbstring')) {
$GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] = 'mbstring';
} else {
$GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] = '';
}
/// Tell Typo3 we are curl enabled always (mandatory since 2.0)
$GLOBALS['TYPO3_CONF_VARS']['SYS']['curlUse'] = '1';
/// And this directory must exist to allow Typo to cache conversion
/// tables when using internal functions
make_upload_directory('temp/typo3temp/cs');
/// Make sure typo is using our dir permissions
$GLOBALS['TYPO3_CONF_VARS']['BE']['folderCreateMask'] = decoct($CFG->directorypermissions);
/// Default mask for Typo
$GLOBALS['TYPO3_CONF_VARS']['BE']['fileCreateMask'] = $CFG->directorypermissions;
/// This full path constants must be defined too, transforming backslashes
/// to forward slashed beacuse Typo3 requires it.
define ('PATH_t3lib', str_replace('\\','/',$CFG->libdir.'/typo3/'));
define ('PATH_typo3', str_replace('\\','/',$CFG->libdir.'/typo3/'));
define ('PATH_site', str_replace('\\','/',$CFG->dataroot.'/temp/'));
define ('TYPO3_OS', stristr(PHP_OS,'win')&&!stristr(PHP_OS,'darwin')?'WIN':'');
$instance = new textlib();
}
return $instance;
return new textlib();
}
/**
* This class is used to manipulate strings under Moodle 1.6 an later. As
* utf-8 text become mandatory a pool of safe functions under this encoding
@ -102,133 +46,262 @@ function textlib_get_instance() {
*
* Take a look to its own copyright and license details.
*
* @package moodlecore
* IMPORTANT Note: Typo3 libraries always expect lowercase charsets to use 100%
* its capabilities so, don't forget to make the conversion
* from every wrapper function!
*
* @package core
* @subpackage lib
* @copyright 1999 onwards Martin Dougiamas {@link http://moodle.com}
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
*/
class textlib {
var $typo3cs;
/**
* Return t3lib helper class
* @return t3lib_cs
*/
protected static function typo3() {
static $typo3cs = null;
if (isset($typo3cs)) {
return $typo3cs;
}
global $CFG;
// Required files
require_once($CFG->libdir.'/typo3/class.t3lib_cs.php');
require_once($CFG->libdir.'/typo3/class.t3lib_div.php');
// do not use mbstring or recode because it may return invalid results in some corner cases
$GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_convMethod'] = 'iconv';
$GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] = 'iconv';
// Tell Typo3 we are curl enabled always (mandatory since 2.0)
$GLOBALS['TYPO3_CONF_VARS']['SYS']['curlUse'] = '1';
// And this directory must exist to allow Typo to cache conversion
// tables when using internal functions
make_upload_directory('temp/typo3temp/cs');
// Make sure typo is using our dir permissions
$GLOBALS['TYPO3_CONF_VARS']['BE']['folderCreateMask'] = decoct($CFG->directorypermissions);
// Default mask for Typo
$GLOBALS['TYPO3_CONF_VARS']['BE']['fileCreateMask'] = $CFG->directorypermissions;
// This full path constants must be defined too, transforming backslashes
// to forward slashed because Typo3 requires it.
define ('PATH_t3lib', str_replace('\\','/',$CFG->libdir.'/typo3/'));
define ('PATH_typo3', str_replace('\\','/',$CFG->libdir.'/typo3/'));
define ('PATH_site', str_replace('\\','/',$CFG->dataroot.'/temp/'));
define ('TYPO3_OS', stristr(PHP_OS,'win')&&!stristr(PHP_OS,'darwin')?'WIN':'');
$typo3cs = new t3lib_cs();
return $typo3cs;
}
/**
* Standard constructor of the class. All it does is to instantiate
* a new t3lib_cs object to have all their functions ready.
* Standardise charset name
*
* Instead of istantiating a lot of objects of this class everytime
* some of their functions is going to be used, you can invoke the:
* textlib_get_instance() function, avoiding the creation of them
* (following the singleton pattern)
* Please note it does not mean the returned charset is actually supported.
*
* @static
* @param string $charset raw charset name
* @return string normalised lowercase charset name
*/
function textlib() {
/// Instantiate a conversor object some of the methods in typo3
/// reference to $this and cannot be executed in a static context
$this->typo3cs = new t3lib_cs();
public static function parse_charset($charset) {
$charset = strtolower($charset);
// shortcuts so that we do not have to load typo3 on every page
if ($charset === 'utf8' or $charset === 'utf-8') {
return 'utf-8';
}
if (preg_match('/^(cp|win|windows)-?(12[0-9]{2})$/', $charset, $matches)) {
return 'windows-'.$matches[2];
}
if (preg_match('/^iso-8859-[0-9]+$/', $charset, $matches)) {
return $charset;
}
if ($charset === 'euc-jp') {
return 'euc-jp';
}
if ($charset === 'iso-2022-jp') {
return 'iso-2022-jp';
}
if ($charset === 'shift-jis' or $charset === 'shift_jis') {
return 'shift_jis';
}
if ($charset === 'gb2312') {
return 'gb2312';
}
if ($charset === 'gb18030') {
return 'gb18030';
}
// fallback to typo3
return self::typo3()->parse_charset($charset);
}
/**
* Converts the text between different encodings. It will use iconv, mbstring
* or internal (typo3) methods to try such conversion. Returns false if fails.
* Converts the text between different encodings. It uses iconv extension with //TRANSLIT parameter,
* falls back to typo3.
* Returns false if fails.
*
* @param string $text
* @param string $fromCS source encoding
* @param string $toCS result encoding
* @return string|bool converted string or false on error
*/
function convert($text, $fromCS, $toCS='utf-8') {
/// Normalize charsets
$fromCS = $this->typo3cs->parse_charset($fromCS);
$toCS = $this->typo3cs->parse_charset($toCS);
/// Avoid some notices from Typo3 code
$oldlevel = error_reporting(E_PARSE);
/// Call Typo3 conv() function. It will do all the work
$result = $this->typo3cs->conv($text, $fromCS, $toCS);
/// Restore original debug level
error_reporting($oldlevel);
public static function convert($text, $fromCS, $toCS='utf-8') {
$fromCS = self::parse_charset($fromCS);
$toCS = self::parse_charset($toCS);
$text = (string)$text; // we can work only with strings
if ($text === '') {
return '';
}
$result = iconv($fromCS, $toCS.'//TRANSLIT', $text);
if ($result === false or $result === '') {
// note: iconv is prone to return empty string when invalid char encountered, or false if encoding unsupported
$oldlevel = error_reporting(E_PARSE);
$result = self::typo3()->conv($text, $fromCS, $toCS);
error_reporting($oldlevel);
}
return $result;
}
/**
* Multibyte safe substr() function, uses mbstring if available.
* Multibyte safe substr() function, uses iconv for utf-8, falls back to typo3.
*
* @param string $text
* @param int $start negative value means from end
* @param int $len
* @param string $charset encoding of the text
* @return string
*/
function substr($text, $start, $len=null, $charset='utf-8') {
/// Normalize charset
$charset = $this->typo3cs->parse_charset($charset);
/// Avoid some notices from Typo3 code
public static function substr($text, $start, $len=null, $charset='utf-8') {
$charset = self::parse_charset($charset);
if ($charset === 'utf-8') {
return iconv_substr($text, $start, $len, $charset);
}
$oldlevel = error_reporting(E_PARSE);
/// Call Typo3 substr() function. It will do all the work
$result = $this->typo3cs->substr($charset,$text,$start,$len);
/// Restore original debug level
$result = self::typo3()->substr($charset, $text, $start, $len);
error_reporting($oldlevel);
return $result;
}
/**
* Multibyte safe strlen() function, uses mbstring if available.
* Multibyte safe strlen() function, uses iconv for utf-8, falls back to typo3.
*
* @param string $text
* @param string $charset encoding of the text
* @return int number of characters
*/
function strlen($text, $charset='utf-8') {
/// Normalize charset
$charset = $this->typo3cs->parse_charset($charset);
/// Avoid some notices from Typo3 code
public static function strlen($text, $charset='utf-8') {
$charset = self::parse_charset($charset);
if ($charset === 'utf-8') {
return iconv_strlen($text, $charset);
}
$oldlevel = error_reporting(E_PARSE);
/// Call Typo3 strlen() function. It will do all the work
$result = $this->typo3cs->strlen($charset,$text);
/// Restore original debug level
$result = self::typo3()->strlen($charset, $text);
error_reporting($oldlevel);
return $result;
}
/**
* Multibyte safe strtolower() function, uses mbstring if available.
* Multibyte safe strtolower() function, uses mbstring, falls back to typo3.
*
* @param string $text
* @param string $charset encoding of the text (may not work for all encodings)
* @return string lower case text
*/
function strtolower($text, $charset='utf-8') {
/// Normalize charset
$charset = $this->typo3cs->parse_charset($charset);
/// Avoid some notices from Typo3 code
public static function strtolower($text, $charset='utf-8') {
$charset = self::parse_charset($charset);
if ($charset === 'utf-8' and function_exists('mb_strtolower')) {
return mb_strtolower($text, $charset);
}
$oldlevel = error_reporting(E_PARSE);
/// Call Typo3 conv_case() function. It will do all the work
$result = $this->typo3cs->conv_case($charset,$text,'toLower');
/// Restore original debug level
$result = self::typo3()->conv_case($charset, $text, 'toLower');
error_reporting($oldlevel);
return $result;
}
/**
* Multibyte safe strtoupper() function, uses mbstring if available.
* Multibyte safe strtoupper() function, uses mbstring, falls back to typo3.
*
* @param string $text
* @param string $charset encoding of the text (may not work for all encodings)
* @return string upper case text
*/
function strtoupper($text, $charset='utf-8') {
/// Normalize charset
$charset = $this->typo3cs->parse_charset($charset);
/// Avoid some notices from Typo3 code
public static function strtoupper($text, $charset='utf-8') {
$charset = self::parse_charset($charset);
if ($charset === 'utf-8' and function_exists('mb_strtoupper')) {
return mb_strtoupper($text, $charset);
}
$oldlevel = error_reporting(E_PARSE);
/// Call Typo3 conv_case() function. It will do all the work
$result = $this->typo3cs->conv_case($charset,$text,'toUpper');
/// Restore original debug level
$result = self::typo3()->conv_case($charset, $text, 'toUpper');
error_reporting($oldlevel);
return $result;
}
/**
* UTF-8 ONLY safe strpos() function, uses mbstring if available.
* UTF-8 ONLY safe strpos(), uses iconv..
*
* @param string $haystack
* @param string $needle
* @param int $offset
* @return string
*/
function strpos($haystack,$needle,$offset=0) {
/// Call Typo3 utf8_strpos() function. It will do all the work
return $this->typo3cs->utf8_strpos($haystack,$needle,$offset);
public static function strpos($haystack, $needle, $offset=0) {
return iconv_strpos($haystack, $needle, $offset, 'utf-8');
}
/**
* UTF-8 ONLY safe strrpos() function, uses mbstring if available.
* UTF-8 ONLY safe strrpos(), uses iconv.
*
* @param string $haystack
* @param string $needle
* @return string
*/
function strrpos($haystack,$needle) {
/// Call Typo3 utf8_strrpos() function. It will do all the work
return $this->typo3cs->utf8_strrpos($haystack,$needle);
public static function strrpos($haystack, $needle) {
return iconv_strrpos($haystack, $needle, 'utf-8');
}
/**
* Try to convert upper unicode characters to plain ascii,
* the returned string may cantain unconverted unicode characters.
* the returned string may contain unconverted unicode characters.
*
* @param string $text
* @param string $charset encoding of the text
* @return string
*/
function specialtoascii($text,$charset='utf-8') {
/// Normalize charset
$charset = $this->typo3cs->parse_charset($charset);
/// Avoid some notices from Typo3 code
public static function specialtoascii($text, $charset='utf-8') {
$charset = self::parse_charset($charset);
$oldlevel = error_reporting(E_PARSE);
$result = $this->typo3cs->specCharsToASCII($charset,$text);
/// Restore original debug level
$result = self::typo3()->specCharsToASCII($charset, $text);
error_reporting($oldlevel);
return $result;
}
@ -237,30 +310,34 @@ class textlib {
* Generate a correct base64 encoded header to be used in MIME mail messages.
* This function seems to be 100% compliant with RFC1342. Credits go to:
* paravoid (http://www.php.net/manual/en/function.mb-encode-mimeheader.php#60283).
*
* @param string $text
* @param string $charset encoding of the text
* @return string
*/
function encode_mimeheader($text, $charset='utf-8') {
public static function encode_mimeheader($text, $charset='utf-8') {
if (empty($text)) {
return (string)$text;
}
/// Normalize charset
$charset = $this->typo3cs->parse_charset($charset);
/// If the text is pure ASCII, we don't need to encode it
if ($this->convert($text, $charset, 'ascii') == $text) {
// Normalize charset
$charset = self::parse_charset($charset);
// If the text is pure ASCII, we don't need to encode it
if (self::convert($text, $charset, 'ascii') == $text) {
return $text;
}
/// Although RFC says that line feed should be \r\n, it seems that
/// some mailers double convert \r, so we are going to use \n alone
// Although RFC says that line feed should be \r\n, it seems that
// some mailers double convert \r, so we are going to use \n alone
$linefeed="\n";
/// Define start and end of every chunk
// Define start and end of every chunk
$start = "=?$charset?B?";
$end = "?=";
/// Acumulate results
// Accumulate results
$encoded = '';
/// Max line length is 75 (including start and end)
// Max line length is 75 (including start and end)
$length = 75 - strlen($start) - strlen($end);
/// Multi-byte ratio
$multilength = $this->strlen($text, $charset);
/// Detect if strlen and friends supported
// Multi-byte ratio
$multilength = self::strlen($text, $charset);
// Detect if strlen and friends supported
if ($multilength === false) {
if ($charset == 'GB18030' or $charset == 'gb18030') {
while (strlen($text)) {
@ -287,30 +364,30 @@ class textlib {
}
}
$ratio = $multilength / strlen($text);
/// Base64 ratio
// Base64 ratio
$magic = $avglength = floor(3 * $length * $ratio / 4);
/// basic infinite loop protection
// basic infinite loop protection
$maxiterations = strlen($text)*2;
$iteration = 0;
/// Iterate over the string in magic chunks
// Iterate over the string in magic chunks
for ($i=0; $i <= $multilength; $i+=$magic) {
if ($iteration++ > $maxiterations) {
return false; // probably infinite loop
}
$magic = $avglength;
$offset = 0;
/// Ensure the chunk fits in length, reduding magic if necessary
// Ensure the chunk fits in length, reducing magic if necessary
do {
$magic -= $offset;
$chunk = $this->substr($text, $i, $magic, $charset);
$chunk = self::substr($text, $i, $magic, $charset);
$chunk = base64_encode($chunk);
$offset++;
} while (strlen($chunk) > $length);
/// This chunk doen't break any multi-byte char. Use it.
// This chunk doesn't break any multi-byte char. Use it.
if ($chunk)
$encoded .= ' '.$start.$chunk.$end.$linefeed;
}
/// Strip the first space and the last linefeed
// Strip the first space and the last linefeed
$encoded = substr($encoded, 1, -strlen($linefeed));
return $encoded;
@ -324,23 +401,23 @@ class textlib {
*
* @param string $str input string
* @param boolean $htmlent convert also html entities (defaults to true)
* @return string
*
* NOTE: we could have used typo3 entities_to_utf8() here
* but the direct alternative used runs 400% quicker
* and uses 0.5Mb less memory, so, let's use it
* (tested agains 10^6 conversions)
* (tested against 10^6 conversions)
*/
function entities_to_utf8($str, $htmlent=true) {
public static function entities_to_utf8($str, $htmlent=true) {
static $trans_tbl; // Going to use static transliteration table
static $trans_tbl; /// Going to use static translit table
/// Replace numeric entities
// Replace numeric entities
$result = preg_replace('~&#x([0-9a-f]+);~ei', 'textlib::code2utf8(hexdec("\\1"))', $str);
$result = preg_replace('~&#([0-9]+);~e', 'textlib::code2utf8(\\1)', $result);
/// Replace literal entities (if desired)
// Replace literal entities (if desired)
if ($htmlent) {
/// Generate/create $trans_tbl
// Generate/create $trans_tbl
if (!isset($trans_tbl)) {
$trans_tbl = array();
foreach (get_html_translation_table(HTML_ENTITIES) as $val=>$key) {
@ -349,37 +426,40 @@ class textlib {
}
$result = strtr($result, $trans_tbl);
}
/// Return utf8-ised string
// Return utf8-ised string
return $result;
}
/**
* Converts all Unicode chars > 127 to numeric entities &#nnnn; or &#xnnn;.
*
* @param string input string
* @param boolean output decadic only number entities
* @param boolean remove all nonumeric entities
* @return string converted string
* @param string $str input string
* @param boolean $dec output decadic only number entities
* @param boolean $nonnum remove all non-numeric entities
* @return string converted string
*/
function utf8_to_entities($str, $dec=false, $nonnum=false) {
/// Avoid some notices from Typo3 code
public static function utf8_to_entities($str, $dec=false, $nonnum=false) {
// Avoid some notices from Typo3 code
$oldlevel = error_reporting(E_PARSE);
if ($nonnum) {
$str = $this->typo3cs->entities_to_utf8($str, true);
$str = self::typo3()->entities_to_utf8($str, true);
}
$result = $this->typo3cs->utf8_to_entities($str);
$result = self::typo3()->utf8_to_entities($str);
if ($dec) {
$result = preg_replace('/&#x([0-9a-f]+);/ie', "'&#'.hexdec('$1').';'", $result);
}
/// Restore original debug level
// Restore original debug level
error_reporting($oldlevel);
return $result;
}
/**
* Removes the BOM from unicode string - see http://unicode.org/faq/utf_bom.html
*
* @param string $str
* @return string
*/
function trim_utf8_bom($str) {
public static function trim_utf8_bom($str) {
$bom = "\xef\xbb\xbf";
if (strpos($str, $bom) === 0) {
return substr($str, strlen($bom));
@ -391,7 +471,7 @@ class textlib {
* Returns encoding options for select boxes, utf-8 and platform encoding first
* @return array encodings
*/
function get_encodings() {
public static function get_encodings() {
$encodings = array();
$encodings['UTF-8'] = 'UTF-8';
$winenc = strtoupper(get_string('localewincharset', 'langconfig'));
@ -401,7 +481,7 @@ class textlib {
$nixenc = strtoupper(get_string('oldcharset', 'langconfig'));
$encodings[$nixenc] = $nixenc;
foreach ($this->typo3cs->synonyms as $enc) {
foreach (self::typo3()->synonyms as $enc) {
$enc = strtoupper($enc);
$encodings[$enc] = $enc;
}
@ -415,7 +495,7 @@ class textlib {
* @param int $num one unicode value
* @return string the UTF-8 char corresponding to the unicode value
*/
function code2utf8($num) {
public static function code2utf8($num) {
if ($num < 128) {
return chr($num);
}
@ -434,32 +514,33 @@ class textlib {
/**
* Makes first letter of each word capital - words must be separated by spaces.
* Use with care, this function does not work properly in many locales!!!
*
* @param string $text
* @return string
*/
function strtotitle($text) {
public static function strtotitle($text) {
if (empty($text)) {
return $text;
}
if (function_exists('mb_convert_case')) {
return mb_convert_case($text, MB_CASE_TITLE,"UTF-8");
return mb_convert_case($text, MB_CASE_TITLE, 'UTF-8');
}
$text = $this->strtolower($text);
$text = self::strtolower($text);
$words = explode(' ', $text);
foreach ($words as $i=>$word) {
$length = $this->strlen($word);
$length = self::strlen($word);
if (!$length) {
continue;
} else if ($length == 1) {
$words[$i] = $this->strtoupper($word);
$words[$i] = self::strtoupper($word);
} else {
$letter = $this->substr($word, 0, 1);
$letter = $this->strtoupper($letter);
$rest = $this->substr($word, 1);
$letter = self::substr($word, 0, 1);
$letter = self::strtoupper($letter);
$rest = self::substr($word, 1);
$words[$i] = $letter.$rest;
}
}
@ -468,11 +549,13 @@ class textlib {
/**
* Locale aware sorting, the key associations are kept, values are sorted alphabetically.
*
* Note: this function is using current moodle locale.
*
* @param array $arr array to be sorted
* @param string $lang moodle language
* @return void, modifies parameter
*/
function asort(array &$arr) {
public static function asort(array &$arr) {
if (function_exists('collator_asort')) {
if ($coll = collator_create(get_string('locale', 'langconfig'))) {
collator_asort($coll, $arr);