diff --git a/lib/simpletest/testtextlib.php b/lib/simpletest/testtextlib.php index 19e0fc51510..e093db38054 100644 --- a/lib/simpletest/testtextlib.php +++ b/lib/simpletest/testtextlib.php @@ -1,5 +1,4 @@ assertIdentical(textlib::parse_charset('Cp1250'), 'windows-1250'); + // does typo3 work? some encoding moodle does not use + $this->assertIdentical(textlib::parse_charset('ms-ansi'), 'windows-1252'); + } + + public function test_convert() { + $utf8 = "Žluťoučký koníček"; + $iso2 = pack("H*", "ae6c75bb6f75e86bfd206b6f6eede8656b"); + $win = pack("H*", "8e6c759d6f75e86bfd206b6f6eede8656b"); + $this->assertIdentical(textlib::convert($utf8, 'utf-8', 'iso-8859-2'), $iso2); + $this->assertIdentical(textlib::convert($iso2, 'iso-8859-2', 'utf-8'), $utf8); + $this->assertIdentical(textlib::convert($utf8, 'utf-8', 'win-1250'), $win); + $this->assertIdentical(textlib::convert($win, 'win-1250', 'utf-8'), $utf8); + $this->assertIdentical(textlib::convert($win, 'win-1250', 'iso-8859-2'), $iso2); + $this->assertIdentical(textlib::convert($iso2, 'iso-8859-2', 'win-1250'), $win); + $this->assertIdentical(textlib::convert($iso2, 'iso-8859-2', 'iso-8859-2'), $iso2); + $this->assertIdentical(textlib::convert($win, 'win-1250', 'cp1250'), $win); + + + $utf8 = '言語設定'; + $str = pack("H*", "b8c0b8ecc0dfc4ea"); //EUC-JP + $this->assertIdentical(textlib::convert($utf8, 'utf-8', 'EUC-JP'), $str); + $this->assertIdentical(textlib::convert($str, 'EUC-JP', 'utf-8'), $utf8); + + $str = pack("H*", "1b24423840386c405f446a1b2842"); //ISO-2022-JP + $this->assertIdentical(textlib::convert($utf8, 'utf-8', 'ISO-2022-JP'), $str); + $this->assertIdentical(textlib::convert($str, 'ISO-2022-JP', 'utf-8'), $utf8); + + $str = pack("H*", "8cbe8cea90dd92e8"); //SHIFT-JIS + $this->assertIdentical(textlib::convert($utf8, 'utf-8', 'SHIFT-JIS'), $str); + $this->assertIdentical(textlib::convert($str, 'SHIFT-JIS', 'utf-8'), $utf8); + + $utf8 = '简体中文'; + $str = pack("H*", "bcf2cce5d6d0cec4"); //GB2312 + $this->assertIdentical(textlib::convert($utf8, 'utf-8', 'GB2312'), $str); + $this->assertIdentical(textlib::convert($str, 'GB2312', 'utf-8'), $utf8); + + $str = pack("H*", "bcf2cce5d6d0cec4"); //GB18030 + $this->assertIdentical(textlib::convert($utf8, 'utf-8', 'GB18030'), $str); + $this->assertIdentical(textlib::convert($str, 'GB18030', 'utf-8'), $utf8); + } + + public function test_substr() { + $str = "Žluťoučký koníček"; + $this->assertIdentical(textlib::substr($str, 1, 3), 'luť'); + $this->assertIdentical(textlib::substr($str, 0, 100), $str); + $this->assertIdentical(textlib::substr($str, -3, 2), 'če'); + + $iso2 = pack("H*", "ae6c75bb6f75e86bfd206b6f6eede8656b"); + $this->assertIdentical(textlib::substr($iso2, 1, 3, 'iso-8859-2'), textlib::convert('luť', 'utf-8', 'iso-8859-2')); + $this->assertIdentical(textlib::substr($iso2, 0, 100, 'iso-8859-2'), textlib::convert($str, 'utf-8', 'iso-8859-2')); + $this->assertIdentical(textlib::substr($iso2, -3, 2, 'iso-8859-2'), textlib::convert('če', 'utf-8', 'iso-8859-2')); + + $win = pack("H*", "8e6c759d6f75e86bfd206b6f6eede8656b"); + $this->assertIdentical(textlib::substr($win, 1, 3, 'cp1250'), textlib::convert('luť', 'utf-8', 'cp1250')); + $this->assertIdentical(textlib::substr($win, 0, 100, 'cp1250'), textlib::convert($str, 'utf-8', 'cp1250')); + $this->assertIdentical(textlib::substr($win, -3, 2, 'cp1250'), textlib::convert('če', 'utf-8', 'cp1250')); + + + $str = pack("H*", "b8c0b8ecc0dfc4ea"); //EUC-JP + $s = pack("H*", "b8ec"); //EUC-JP + $this->assertIdentical(textlib::substr($str, 1, 1, 'EUC-JP'), $s); + + $str = pack("H*", "1b24423840386c405f446a1b2842"); //ISO-2022-JP + $s = pack("H*", "1b2442386c1b2842"); //ISO-2022-JP + $this->assertIdentical(textlib::substr($str, 1, 1, 'ISO-2022-JP'), $s); + + $str = pack("H*", "8cbe8cea90dd92e8"); //SHIFT-JIS + $s = pack("H*", "8cea"); //SHIFT-JIS + $this->assertIdentical(textlib::substr($str, 1, 1, 'SHIFT-JIS'), $s); + + $str = pack("H*", "bcf2cce5d6d0cec4"); //GB2312 + $s = pack("H*", "cce5"); //GB2312 + $this->assertIdentical(textlib::substr($str, 1, 1, 'GB2312'), $s); + + $str = pack("H*", "bcf2cce5d6d0cec4"); //GB18030 + $s = pack("H*", "cce5"); //GB18030 + $this->assertIdentical(textlib::substr($str, 1, 1, 'GB18030'), $s); + } + + public function test_strlen() { + $str = "Žluťoučký koníček"; + $this->assertIdentical(textlib::strlen($str), 17); + + $iso2 = pack("H*", "ae6c75bb6f75e86bfd206b6f6eede8656b"); + $this->assertIdentical(textlib::strlen($iso2, 'iso-8859-2'), 17); + + $win = pack("H*", "8e6c759d6f75e86bfd206b6f6eede8656b"); + $this->assertIdentical(textlib::strlen($win, 'cp1250'), 17); + + + $str = pack("H*", "b8ec"); //EUC-JP + $this->assertIdentical(textlib::strlen($str, 'EUC-JP'), 1); + $str = pack("H*", "b8c0b8ecc0dfc4ea"); //EUC-JP + $this->assertIdentical(textlib::strlen($str, 'EUC-JP'), 4); + + $str = pack("H*", "1b2442386c1b2842"); //ISO-2022-JP + $this->assertIdentical(textlib::strlen($str, 'ISO-2022-JP'), 1); + $str = pack("H*", "1b24423840386c405f446a1b2842"); //ISO-2022-JP + $this->assertIdentical(textlib::strlen($str, 'ISO-2022-JP'), 4); + + $str = pack("H*", "8cea"); //SHIFT-JIS + $this->assertIdentical(textlib::strlen($str, 'SHIFT-JIS'), 1); + $str = pack("H*", "8cbe8cea90dd92e8"); //SHIFT-JIS + $this->assertIdentical(textlib::strlen($str, 'SHIFT-JIS'), 4); + + $str = pack("H*", "cce5"); //GB2312 + $this->assertIdentical(textlib::strlen($str, 'GB2312'), 1); + $str = pack("H*", "bcf2cce5d6d0cec4"); //GB2312 + $this->assertIdentical(textlib::strlen($str, 'GB2312'), 4); + + $str = pack("H*", "cce5"); //GB18030 + $this->assertIdentical(textlib::strlen($str, 'GB18030'), 1); + $str = pack("H*", "bcf2cce5d6d0cec4"); //GB18030 + $this->assertIdentical(textlib::strlen($str, 'GB18030'), 4); + } + + public function test_strtolower() { + $str = "Žluťoučký koníček"; + $low = 'žluťoučký koníček'; + $this->assertIdentical(textlib::strtolower($str), $low); + + $iso2 = pack("H*", "ae6c75bb6f75e86bfd206b6f6eede8656b"); + $this->assertIdentical(textlib::strtolower($iso2, 'iso-8859-2'), textlib::convert($low, 'utf-8', 'iso-8859-2')); + + $win = pack("H*", "8e6c759d6f75e86bfd206b6f6eede8656b"); + $this->assertIdentical(textlib::strtolower($win, 'cp1250'), textlib::convert($low, 'utf-8', 'cp1250')); + + + $str = '言語設定'; + $this->assertIdentical(textlib::strtolower($str), $str); + + $str = '简体中文'; + $this->assertIdentical(textlib::strtolower($str), $str); + + $str = pack("H*", "1b24423840386c405f446a1b2842"); //ISO-2022-JP + $this->assertIdentical(textlib::strtolower($str, 'ISO-2022-JP'), $str); + + $str = pack("H*", "8cbe8cea90dd92e8"); //SHIFT-JIS + $this->assertIdentical(textlib::strtolower($str, 'SHIFT-JIS'), $str); + + $str = pack("H*", "bcf2cce5d6d0cec4"); //GB2312 + $this->assertIdentical(textlib::strtolower($str, 'GB2312'), $str); + + $str = pack("H*", "bcf2cce5d6d0cec4"); //GB18030 + $this->assertIdentical(textlib::strtolower($str, 'GB18030'), $str); + } + + public function test_strtoupper() { + $str = "Žluťoučký koníček"; + $up = 'ŽLUŤOUČKÝ KONÍČEK'; + $this->assertIdentical(textlib::strtoupper($str), $up); + + $iso2 = pack("H*", "ae6c75bb6f75e86bfd206b6f6eede8656b"); + $this->assertIdentical(textlib::strtoupper($iso2, 'iso-8859-2'), textlib::convert($up, 'utf-8', 'iso-8859-2')); + + $win = pack("H*", "8e6c759d6f75e86bfd206b6f6eede8656b"); + $this->assertIdentical(textlib::strtoupper($win, 'cp1250'), textlib::convert($up, 'utf-8', 'cp1250')); + + + $str = '言語設定'; + $this->assertIdentical(textlib::strtoupper($str), $str); + + $str = '简体中文'; + $this->assertIdentical(textlib::strtoupper($str), $str); + + $str = pack("H*", "1b24423840386c405f446a1b2842"); //ISO-2022-JP + $this->assertIdentical(textlib::strtoupper($str, 'ISO-2022-JP'), $str); + + $str = pack("H*", "8cbe8cea90dd92e8"); //SHIFT-JIS + $this->assertIdentical(textlib::strtoupper($str, 'SHIFT-JIS'), $str); + + $str = pack("H*", "bcf2cce5d6d0cec4"); //GB2312 + $this->assertIdentical(textlib::strtoupper($str, 'GB2312'), $str); + + $str = pack("H*", "bcf2cce5d6d0cec4"); //GB18030 + $this->assertIdentical(textlib::strtoupper($str, 'GB18030'), $str); + } + + public function test_strpos() { + $str = "Žluťoučký koníček"; + $this->assertIdentical(textlib::strpos($str, 'koníč'), 10); + } + + public function test_strrpos() { + $str = "Žluťoučký koníček"; + $this->assertIdentical(textlib::strrpos($str, 'o'), 11); + } + + public function test_specialtoascii() { + $str = "Žluťoučký koníček"; + $this->assertIdentical(textlib::specialtoascii($str), 'Zlutoucky konicek'); + } + + public function test_encode_mimeheader() { + $str = "Žluťoučký koníček"; + $this->assertIdentical(textlib::encode_mimeheader($str), '=?utf-8?B?xb1sdcWlb3XEjWvDvSBrb27DrcSNZWs=?='); + } + + public function test_entities_to_utf8() { + $str = "Žluťoučký koníček"; + $this->assertIdentical(textlib::entities_to_utf8($str), "Žluťoučký koníček"); + } + + public function test_utf8_to_entities() { + $str = "Žluťoučký koníček"; + $this->assertIdentical(textlib::utf8_to_entities($str), "Žluťoučký koníček"); + $this->assertIdentical(textlib::utf8_to_entities($str, true), "Žluťoučký koníček"); + + } + + public function test_trim_utf8_bom() { + $bom = "\xef\xbb\xbf"; + $str = "Žluťoučký koníček"; + $this->assertIdentical(textlib::trim_utf8_bom($bom.$str.$bom), $str.$bom); + } + + public function test_get_encodings() { + $encodings = textlib::get_encodings(); + $this->assertTrue(is_array($encodings)); + $this->assertTrue(count($encodings) > 1); + $this->assertTrue(isset($encodings['UTF-8'])); + } + + public function test_code2utf8() { + $this->assertIdentical(textlib::code2utf8(381), 'Ž'); + } + + public function test_strtotitle() { + $str = "žluťoučký koníček"; + $this->assertIdentical(textlib::strtotitle($str), "Žluťoučký Koníček"); + } + public function test_asort() { global $SESSION; $SESSION->lang = 'en'; // make sure we test en language to get consistent results, hopefully all systems have this locale $arr = array('b'=>'ab', 1=>'aa', 0=>'cc'); - textlib_get_instance()->asort($arr); + textlib::asort($arr); $this->assertIdentical(array_keys($arr), array(1, 'b', 0)); $this->assertIdentical(array_values($arr), array('aa', 'ab', 'cc')); @@ -55,10 +288,22 @@ class textlib_test extends UnitTestCase { } $arr = array('a'=>'áb', 'b'=>'ab', 1=>'aa', 0=>'cc'); - textlib_get_instance()->asort($arr); + textlib::asort($arr); $this->assertIdentical(array_keys($arr), array(1, 'b', 'a', 0), $error); unset($SESSION->lang); } + public function test_deprecated_textlib_get_instance() { + $textlib = textlib_get_instance(); + $this->assertIdentical($textlib->substr('abc', 1, 1), 'b'); + $this->assertIdentical($textlib->strlen('abc'), 3); + $this->assertIdentical($textlib->strtoupper('Abc'), 'ABC'); + $this->assertIdentical($textlib->strtolower('Abc'), 'abc'); + $this->assertIdentical($textlib->strpos('abc', 'a'), 0); + $this->assertIdentical($textlib->strpos('abc', 'd'), false); + $this->assertIdentical($textlib->strrpos('abcabc', 'a'), 3); + $this->assertIdentical($textlib->specialtoascii('ábc'), 'abc'); + $this->assertIdentical($textlib->strtotitle('abc ABC'), 'Abc Abc'); + } } diff --git a/lib/textlib.class.php b/lib/textlib.class.php index 1adceea660b..4dff46b5038 100644 --- a/lib/textlib.class.php +++ b/lib/textlib.class.php @@ -1,5 +1,4 @@ libdir.'/typo3/class.t3lib_cs.php'); - require_once($CFG->libdir.'/typo3/class.t3lib_div.php'); - - /// If ICONV is available, lets Typo3 library use it for convert - if (extension_loaded('iconv')) { - $GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_convMethod'] = 'iconv'; - /// Else if mbstring is available, lets Typo3 library use it - } else if (extension_loaded('mbstring')) { - $GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_convMethod'] = 'mbstring'; - /// Else if recode is available, lets Typo3 library use it - } else if (extension_loaded('recode')) { - $GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_convMethod'] = 'recode'; - } else { - $GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_convMethod'] = ''; - } - - /// If mbstring is available, lets Typo3 library use it for functions - if (extension_loaded('mbstring')) { - $GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] = 'mbstring'; - } else { - $GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] = ''; - } - - /// Tell Typo3 we are curl enabled always (mandatory since 2.0) - $GLOBALS['TYPO3_CONF_VARS']['SYS']['curlUse'] = '1'; - - /// And this directory must exist to allow Typo to cache conversion - /// tables when using internal functions - make_upload_directory('temp/typo3temp/cs'); - - /// Make sure typo is using our dir permissions - $GLOBALS['TYPO3_CONF_VARS']['BE']['folderCreateMask'] = decoct($CFG->directorypermissions); - - /// Default mask for Typo - $GLOBALS['TYPO3_CONF_VARS']['BE']['fileCreateMask'] = $CFG->directorypermissions; - - /// This full path constants must be defined too, transforming backslashes - /// to forward slashed beacuse Typo3 requires it. - define ('PATH_t3lib', str_replace('\\','/',$CFG->libdir.'/typo3/')); - define ('PATH_typo3', str_replace('\\','/',$CFG->libdir.'/typo3/')); - define ('PATH_site', str_replace('\\','/',$CFG->dataroot.'/temp/')); - define ('TYPO3_OS', stristr(PHP_OS,'win')&&!stristr(PHP_OS,'darwin')?'WIN':''); - - $instance = new textlib(); - } - return $instance; + return new textlib(); } + /** * This class is used to manipulate strings under Moodle 1.6 an later. As * utf-8 text become mandatory a pool of safe functions under this encoding @@ -102,133 +46,262 @@ function textlib_get_instance() { * * Take a look to its own copyright and license details. * - * @package moodlecore + * IMPORTANT Note: Typo3 libraries always expect lowercase charsets to use 100% + * its capabilities so, don't forget to make the conversion + * from every wrapper function! + * + * @package core + * @subpackage lib * @copyright 1999 onwards Martin Dougiamas {@link http://moodle.com} * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later */ class textlib { - var $typo3cs; + /** + * Return t3lib helper class + * @return t3lib_cs + */ + protected static function typo3() { + static $typo3cs = null; + + if (isset($typo3cs)) { + return $typo3cs; + } + + global $CFG; + + // Required files + require_once($CFG->libdir.'/typo3/class.t3lib_cs.php'); + require_once($CFG->libdir.'/typo3/class.t3lib_div.php'); + + // do not use mbstring or recode because it may return invalid results in some corner cases + $GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_convMethod'] = 'iconv'; + $GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] = 'iconv'; + + // Tell Typo3 we are curl enabled always (mandatory since 2.0) + $GLOBALS['TYPO3_CONF_VARS']['SYS']['curlUse'] = '1'; + + // And this directory must exist to allow Typo to cache conversion + // tables when using internal functions + make_upload_directory('temp/typo3temp/cs'); + + // Make sure typo is using our dir permissions + $GLOBALS['TYPO3_CONF_VARS']['BE']['folderCreateMask'] = decoct($CFG->directorypermissions); + + // Default mask for Typo + $GLOBALS['TYPO3_CONF_VARS']['BE']['fileCreateMask'] = $CFG->directorypermissions; + + // This full path constants must be defined too, transforming backslashes + // to forward slashed because Typo3 requires it. + define ('PATH_t3lib', str_replace('\\','/',$CFG->libdir.'/typo3/')); + define ('PATH_typo3', str_replace('\\','/',$CFG->libdir.'/typo3/')); + define ('PATH_site', str_replace('\\','/',$CFG->dataroot.'/temp/')); + define ('TYPO3_OS', stristr(PHP_OS,'win')&&!stristr(PHP_OS,'darwin')?'WIN':''); + + $typo3cs = new t3lib_cs(); + + return $typo3cs; + } /** - * Standard constructor of the class. All it does is to instantiate - * a new t3lib_cs object to have all their functions ready. + * Standardise charset name * - * Instead of istantiating a lot of objects of this class everytime - * some of their functions is going to be used, you can invoke the: - * textlib_get_instance() function, avoiding the creation of them - * (following the singleton pattern) + * Please note it does not mean the returned charset is actually supported. + * + * @static + * @param string $charset raw charset name + * @return string normalised lowercase charset name */ - function textlib() { - /// Instantiate a conversor object some of the methods in typo3 - /// reference to $this and cannot be executed in a static context - $this->typo3cs = new t3lib_cs(); + public static function parse_charset($charset) { + $charset = strtolower($charset); + + // shortcuts so that we do not have to load typo3 on every page + + if ($charset === 'utf8' or $charset === 'utf-8') { + return 'utf-8'; + } + + if (preg_match('/^(cp|win|windows)-?(12[0-9]{2})$/', $charset, $matches)) { + return 'windows-'.$matches[2]; + } + + if (preg_match('/^iso-8859-[0-9]+$/', $charset, $matches)) { + return $charset; + } + + if ($charset === 'euc-jp') { + return 'euc-jp'; + } + if ($charset === 'iso-2022-jp') { + return 'iso-2022-jp'; + } + if ($charset === 'shift-jis' or $charset === 'shift_jis') { + return 'shift_jis'; + } + if ($charset === 'gb2312') { + return 'gb2312'; + } + if ($charset === 'gb18030') { + return 'gb18030'; + } + + // fallback to typo3 + return self::typo3()->parse_charset($charset); } /** - * Converts the text between different encodings. It will use iconv, mbstring - * or internal (typo3) methods to try such conversion. Returns false if fails. + * Converts the text between different encodings. It uses iconv extension with //TRANSLIT parameter, + * falls back to typo3. + * Returns false if fails. + * + * @param string $text + * @param string $fromCS source encoding + * @param string $toCS result encoding + * @return string|bool converted string or false on error */ - function convert($text, $fromCS, $toCS='utf-8') { - /// Normalize charsets - $fromCS = $this->typo3cs->parse_charset($fromCS); - $toCS = $this->typo3cs->parse_charset($toCS); - /// Avoid some notices from Typo3 code - $oldlevel = error_reporting(E_PARSE); - /// Call Typo3 conv() function. It will do all the work - $result = $this->typo3cs->conv($text, $fromCS, $toCS); - /// Restore original debug level - error_reporting($oldlevel); + public static function convert($text, $fromCS, $toCS='utf-8') { + $fromCS = self::parse_charset($fromCS); + $toCS = self::parse_charset($toCS); + + $text = (string)$text; // we can work only with strings + + if ($text === '') { + return ''; + } + + $result = iconv($fromCS, $toCS.'//TRANSLIT', $text); + + if ($result === false or $result === '') { + // note: iconv is prone to return empty string when invalid char encountered, or false if encoding unsupported + $oldlevel = error_reporting(E_PARSE); + $result = self::typo3()->conv($text, $fromCS, $toCS); + error_reporting($oldlevel); + } + return $result; } /** - * Multibyte safe substr() function, uses mbstring if available. + * Multibyte safe substr() function, uses iconv for utf-8, falls back to typo3. + * + * @param string $text + * @param int $start negative value means from end + * @param int $len + * @param string $charset encoding of the text + * @return string */ - function substr($text, $start, $len=null, $charset='utf-8') { - /// Normalize charset - $charset = $this->typo3cs->parse_charset($charset); - /// Avoid some notices from Typo3 code + public static function substr($text, $start, $len=null, $charset='utf-8') { + $charset = self::parse_charset($charset); + + if ($charset === 'utf-8') { + return iconv_substr($text, $start, $len, $charset); + } + $oldlevel = error_reporting(E_PARSE); - /// Call Typo3 substr() function. It will do all the work - $result = $this->typo3cs->substr($charset,$text,$start,$len); - /// Restore original debug level + $result = self::typo3()->substr($charset, $text, $start, $len); error_reporting($oldlevel); + return $result; } /** - * Multibyte safe strlen() function, uses mbstring if available. + * Multibyte safe strlen() function, uses iconv for utf-8, falls back to typo3. + * + * @param string $text + * @param string $charset encoding of the text + * @return int number of characters */ - function strlen($text, $charset='utf-8') { - /// Normalize charset - $charset = $this->typo3cs->parse_charset($charset); - /// Avoid some notices from Typo3 code + public static function strlen($text, $charset='utf-8') { + $charset = self::parse_charset($charset); + + if ($charset === 'utf-8') { + return iconv_strlen($text, $charset); + } + $oldlevel = error_reporting(E_PARSE); - /// Call Typo3 strlen() function. It will do all the work - $result = $this->typo3cs->strlen($charset,$text); - /// Restore original debug level + $result = self::typo3()->strlen($charset, $text); error_reporting($oldlevel); + return $result; } /** - * Multibyte safe strtolower() function, uses mbstring if available. + * Multibyte safe strtolower() function, uses mbstring, falls back to typo3. + * + * @param string $text + * @param string $charset encoding of the text (may not work for all encodings) + * @return string lower case text */ - function strtolower($text, $charset='utf-8') { - /// Normalize charset - $charset = $this->typo3cs->parse_charset($charset); - /// Avoid some notices from Typo3 code + public static function strtolower($text, $charset='utf-8') { + $charset = self::parse_charset($charset); + + if ($charset === 'utf-8' and function_exists('mb_strtolower')) { + return mb_strtolower($text, $charset); + } + $oldlevel = error_reporting(E_PARSE); - /// Call Typo3 conv_case() function. It will do all the work - $result = $this->typo3cs->conv_case($charset,$text,'toLower'); - /// Restore original debug level + $result = self::typo3()->conv_case($charset, $text, 'toLower'); error_reporting($oldlevel); + return $result; } /** - * Multibyte safe strtoupper() function, uses mbstring if available. + * Multibyte safe strtoupper() function, uses mbstring, falls back to typo3. + * + * @param string $text + * @param string $charset encoding of the text (may not work for all encodings) + * @return string upper case text */ - function strtoupper($text, $charset='utf-8') { - /// Normalize charset - $charset = $this->typo3cs->parse_charset($charset); - /// Avoid some notices from Typo3 code + public static function strtoupper($text, $charset='utf-8') { + $charset = self::parse_charset($charset); + + if ($charset === 'utf-8' and function_exists('mb_strtoupper')) { + return mb_strtoupper($text, $charset); + } + $oldlevel = error_reporting(E_PARSE); - /// Call Typo3 conv_case() function. It will do all the work - $result = $this->typo3cs->conv_case($charset,$text,'toUpper'); - /// Restore original debug level + $result = self::typo3()->conv_case($charset, $text, 'toUpper'); error_reporting($oldlevel); + return $result; } /** - * UTF-8 ONLY safe strpos() function, uses mbstring if available. + * UTF-8 ONLY safe strpos(), uses iconv.. + * + * @param string $haystack + * @param string $needle + * @param int $offset + * @return string */ - function strpos($haystack,$needle,$offset=0) { - /// Call Typo3 utf8_strpos() function. It will do all the work - return $this->typo3cs->utf8_strpos($haystack,$needle,$offset); + public static function strpos($haystack, $needle, $offset=0) { + return iconv_strpos($haystack, $needle, $offset, 'utf-8'); } /** - * UTF-8 ONLY safe strrpos() function, uses mbstring if available. + * UTF-8 ONLY safe strrpos(), uses iconv. + * + * @param string $haystack + * @param string $needle + * @return string */ - function strrpos($haystack,$needle) { - /// Call Typo3 utf8_strrpos() function. It will do all the work - return $this->typo3cs->utf8_strrpos($haystack,$needle); + public static function strrpos($haystack, $needle) { + return iconv_strrpos($haystack, $needle, 'utf-8'); } /** * Try to convert upper unicode characters to plain ascii, - * the returned string may cantain unconverted unicode characters. + * the returned string may contain unconverted unicode characters. + * + * @param string $text + * @param string $charset encoding of the text + * @return string */ - function specialtoascii($text,$charset='utf-8') { - /// Normalize charset - $charset = $this->typo3cs->parse_charset($charset); - /// Avoid some notices from Typo3 code + public static function specialtoascii($text, $charset='utf-8') { + $charset = self::parse_charset($charset); $oldlevel = error_reporting(E_PARSE); - $result = $this->typo3cs->specCharsToASCII($charset,$text); - /// Restore original debug level + $result = self::typo3()->specCharsToASCII($charset, $text); error_reporting($oldlevel); return $result; } @@ -237,30 +310,34 @@ class textlib { * Generate a correct base64 encoded header to be used in MIME mail messages. * This function seems to be 100% compliant with RFC1342. Credits go to: * paravoid (http://www.php.net/manual/en/function.mb-encode-mimeheader.php#60283). + * + * @param string $text + * @param string $charset encoding of the text + * @return string */ - function encode_mimeheader($text, $charset='utf-8') { + public static function encode_mimeheader($text, $charset='utf-8') { if (empty($text)) { return (string)$text; } - /// Normalize charset - $charset = $this->typo3cs->parse_charset($charset); - /// If the text is pure ASCII, we don't need to encode it - if ($this->convert($text, $charset, 'ascii') == $text) { + // Normalize charset + $charset = self::parse_charset($charset); + // If the text is pure ASCII, we don't need to encode it + if (self::convert($text, $charset, 'ascii') == $text) { return $text; } - /// Although RFC says that line feed should be \r\n, it seems that - /// some mailers double convert \r, so we are going to use \n alone + // Although RFC says that line feed should be \r\n, it seems that + // some mailers double convert \r, so we are going to use \n alone $linefeed="\n"; - /// Define start and end of every chunk + // Define start and end of every chunk $start = "=?$charset?B?"; $end = "?="; - /// Acumulate results + // Accumulate results $encoded = ''; - /// Max line length is 75 (including start and end) + // Max line length is 75 (including start and end) $length = 75 - strlen($start) - strlen($end); - /// Multi-byte ratio - $multilength = $this->strlen($text, $charset); - /// Detect if strlen and friends supported + // Multi-byte ratio + $multilength = self::strlen($text, $charset); + // Detect if strlen and friends supported if ($multilength === false) { if ($charset == 'GB18030' or $charset == 'gb18030') { while (strlen($text)) { @@ -287,30 +364,30 @@ class textlib { } } $ratio = $multilength / strlen($text); - /// Base64 ratio + // Base64 ratio $magic = $avglength = floor(3 * $length * $ratio / 4); - /// basic infinite loop protection + // basic infinite loop protection $maxiterations = strlen($text)*2; $iteration = 0; - /// Iterate over the string in magic chunks + // Iterate over the string in magic chunks for ($i=0; $i <= $multilength; $i+=$magic) { if ($iteration++ > $maxiterations) { return false; // probably infinite loop } $magic = $avglength; $offset = 0; - /// Ensure the chunk fits in length, reduding magic if necessary + // Ensure the chunk fits in length, reducing magic if necessary do { $magic -= $offset; - $chunk = $this->substr($text, $i, $magic, $charset); + $chunk = self::substr($text, $i, $magic, $charset); $chunk = base64_encode($chunk); $offset++; } while (strlen($chunk) > $length); - /// This chunk doen't break any multi-byte char. Use it. + // This chunk doesn't break any multi-byte char. Use it. if ($chunk) $encoded .= ' '.$start.$chunk.$end.$linefeed; } - /// Strip the first space and the last linefeed + // Strip the first space and the last linefeed $encoded = substr($encoded, 1, -strlen($linefeed)); return $encoded; @@ -324,23 +401,23 @@ class textlib { * * @param string $str input string * @param boolean $htmlent convert also html entities (defaults to true) + * @return string * * NOTE: we could have used typo3 entities_to_utf8() here * but the direct alternative used runs 400% quicker * and uses 0.5Mb less memory, so, let's use it - * (tested agains 10^6 conversions) + * (tested against 10^6 conversions) */ - function entities_to_utf8($str, $htmlent=true) { + public static function entities_to_utf8($str, $htmlent=true) { + static $trans_tbl; // Going to use static transliteration table - static $trans_tbl; /// Going to use static translit table - - /// Replace numeric entities + // Replace numeric entities $result = preg_replace('~&#x([0-9a-f]+);~ei', 'textlib::code2utf8(hexdec("\\1"))', $str); $result = preg_replace('~&#([0-9]+);~e', 'textlib::code2utf8(\\1)', $result); - /// Replace literal entities (if desired) + // Replace literal entities (if desired) if ($htmlent) { - /// Generate/create $trans_tbl + // Generate/create $trans_tbl if (!isset($trans_tbl)) { $trans_tbl = array(); foreach (get_html_translation_table(HTML_ENTITIES) as $val=>$key) { @@ -349,37 +426,40 @@ class textlib { } $result = strtr($result, $trans_tbl); } - /// Return utf8-ised string + // Return utf8-ised string return $result; } /** * Converts all Unicode chars > 127 to numeric entities &#nnnn; or &#xnnn;. * - * @param string input string - * @param boolean output decadic only number entities - * @param boolean remove all nonumeric entities - * @return string converted string + * @param string $str input string + * @param boolean $dec output decadic only number entities + * @param boolean $nonnum remove all non-numeric entities + * @return string converted string */ - function utf8_to_entities($str, $dec=false, $nonnum=false) { - /// Avoid some notices from Typo3 code + public static function utf8_to_entities($str, $dec=false, $nonnum=false) { + // Avoid some notices from Typo3 code $oldlevel = error_reporting(E_PARSE); if ($nonnum) { - $str = $this->typo3cs->entities_to_utf8($str, true); + $str = self::typo3()->entities_to_utf8($str, true); } - $result = $this->typo3cs->utf8_to_entities($str); + $result = self::typo3()->utf8_to_entities($str); if ($dec) { $result = preg_replace('/&#x([0-9a-f]+);/ie', "'&#'.hexdec('$1').';'", $result); } - /// Restore original debug level + // Restore original debug level error_reporting($oldlevel); return $result; } /** * Removes the BOM from unicode string - see http://unicode.org/faq/utf_bom.html + * + * @param string $str + * @return string */ - function trim_utf8_bom($str) { + public static function trim_utf8_bom($str) { $bom = "\xef\xbb\xbf"; if (strpos($str, $bom) === 0) { return substr($str, strlen($bom)); @@ -391,7 +471,7 @@ class textlib { * Returns encoding options for select boxes, utf-8 and platform encoding first * @return array encodings */ - function get_encodings() { + public static function get_encodings() { $encodings = array(); $encodings['UTF-8'] = 'UTF-8'; $winenc = strtoupper(get_string('localewincharset', 'langconfig')); @@ -401,7 +481,7 @@ class textlib { $nixenc = strtoupper(get_string('oldcharset', 'langconfig')); $encodings[$nixenc] = $nixenc; - foreach ($this->typo3cs->synonyms as $enc) { + foreach (self::typo3()->synonyms as $enc) { $enc = strtoupper($enc); $encodings[$enc] = $enc; } @@ -415,7 +495,7 @@ class textlib { * @param int $num one unicode value * @return string the UTF-8 char corresponding to the unicode value */ - function code2utf8($num) { + public static function code2utf8($num) { if ($num < 128) { return chr($num); } @@ -434,32 +514,33 @@ class textlib { /** * Makes first letter of each word capital - words must be separated by spaces. * Use with care, this function does not work properly in many locales!!! + * * @param string $text * @return string */ - function strtotitle($text) { + public static function strtotitle($text) { if (empty($text)) { return $text; } if (function_exists('mb_convert_case')) { - return mb_convert_case($text, MB_CASE_TITLE,"UTF-8"); + return mb_convert_case($text, MB_CASE_TITLE, 'UTF-8'); } - $text = $this->strtolower($text); + $text = self::strtolower($text); $words = explode(' ', $text); foreach ($words as $i=>$word) { - $length = $this->strlen($word); + $length = self::strlen($word); if (!$length) { continue; } else if ($length == 1) { - $words[$i] = $this->strtoupper($word); + $words[$i] = self::strtoupper($word); } else { - $letter = $this->substr($word, 0, 1); - $letter = $this->strtoupper($letter); - $rest = $this->substr($word, 1); + $letter = self::substr($word, 0, 1); + $letter = self::strtoupper($letter); + $rest = self::substr($word, 1); $words[$i] = $letter.$rest; } } @@ -468,11 +549,13 @@ class textlib { /** * Locale aware sorting, the key associations are kept, values are sorted alphabetically. + * + * Note: this function is using current moodle locale. + * * @param array $arr array to be sorted - * @param string $lang moodle language * @return void, modifies parameter */ - function asort(array &$arr) { + public static function asort(array &$arr) { if (function_exists('collator_asort')) { if ($coll = collator_create(get_string('locale', 'langconfig'))) { collator_asort($coll, $arr);