From 25af2aa6631ef4900fb9c021ee3b290f8af2fa19 Mon Sep 17 00:00:00 2001 From: Petr Skoda Date: Sat, 14 Jan 2012 20:55:02 +0100 Subject: [PATCH] MDL-31142 fix textlib::substr() performance, invalid length in iconv_substr; use mbstring by default --- lib/simpletest/testtextlib.php | 2 ++ lib/textlib.class.php | 55 +++++++++++++++++++++++++++------- 2 files changed, 46 insertions(+), 11 deletions(-) diff --git a/lib/simpletest/testtextlib.php b/lib/simpletest/testtextlib.php index 09f1458f8f8..2b86475b1d5 100644 --- a/lib/simpletest/testtextlib.php +++ b/lib/simpletest/testtextlib.php @@ -83,6 +83,8 @@ class textlib_test extends UnitTestCase { public function test_substr() { $str = "Žluťoučký koníček"; + $this->assertIdentical(textlib::substr($str, 0), $str); + $this->assertIdentical(textlib::substr($str, 1), 'luťoučký koníček'); $this->assertIdentical(textlib::substr($str, 1, 3), 'luť'); $this->assertIdentical(textlib::substr($str, 0, 100), $str); $this->assertIdentical(textlib::substr($str, -3, 2), 'če'); diff --git a/lib/textlib.class.php b/lib/textlib.class.php index 558be5c5282..97027389308 100644 --- a/lib/textlib.class.php +++ b/lib/textlib.class.php @@ -182,7 +182,7 @@ class textlib { } /** - * Multibyte safe substr() function, uses iconv for utf-8, falls back to typo3. + * Multibyte safe substr() function, uses mbstring or iconv for UTF-8, falls back to typo3. * * @param string $text * @param int $start negative value means from end @@ -194,18 +194,39 @@ class textlib { $charset = self::parse_charset($charset); if ($charset === 'utf-8') { - return iconv_substr($text, $start, $len, $charset); + if (function_exists('mb_substr')) { + // this is much faster than iconv - see MDL-31142 + if ($len === null) { + $oldcharset = mb_internal_encoding(); + mb_internal_encoding('UTF-8'); + $result = mb_substr($text, $start); + mb_internal_encoding($oldcharset); + return $result; + } else { + return mb_substr($text, $start, $len, 'UTF-8'); + } + + } else { + if ($len === null) { + $len = iconv_strlen($text, 'UTF-8'); + } + return iconv_substr($text, $start, $len, 'UTF-8'); + } } $oldlevel = error_reporting(E_PARSE); - $result = self::typo3()->substr($charset, $text, $start, $len); + if ($len === null) { + $result = self::typo3()->substr($charset, $text, $start); + } else { + $result = self::typo3()->substr($charset, $text, $start, $len); + } error_reporting($oldlevel); return $result; } /** - * Multibyte safe strlen() function, uses iconv for utf-8, falls back to typo3. + * Multibyte safe strlen() function, uses mbstring or iconv for UTF-8, falls back to typo3. * * @param string $text * @param string $charset encoding of the text @@ -215,7 +236,11 @@ class textlib { $charset = self::parse_charset($charset); if ($charset === 'utf-8') { - return iconv_strlen($text, $charset); + if (function_exists('mb_strlen')) { + return mb_strlen($text, 'UTF-8'); + } else { + return iconv_strlen($text, 'UTF-8'); + } } $oldlevel = error_reporting(E_PARSE); @@ -236,7 +261,7 @@ class textlib { $charset = self::parse_charset($charset); if ($charset === 'utf-8' and function_exists('mb_strtolower')) { - return mb_strtolower($text, $charset); + return mb_strtolower($text, 'UTF-8'); } $oldlevel = error_reporting(E_PARSE); @@ -257,7 +282,7 @@ class textlib { $charset = self::parse_charset($charset); if ($charset === 'utf-8' and function_exists('mb_strtoupper')) { - return mb_strtoupper($text, $charset); + return mb_strtoupper($text, 'UTF-8'); } $oldlevel = error_reporting(E_PARSE); @@ -268,7 +293,7 @@ class textlib { } /** - * UTF-8 ONLY safe strpos(), uses iconv.. + * UTF-8 ONLY safe strpos(), uses mbstring, falls back to iconv. * * @param string $haystack * @param string $needle @@ -276,18 +301,26 @@ class textlib { * @return string */ public static function strpos($haystack, $needle, $offset=0) { - return iconv_strpos($haystack, $needle, $offset, 'utf-8'); + if (function_exists('mb_strpos')) { + return mb_strpos($haystack, $needle, $offset, 'UTF-8'); + } else { + return iconv_strpos($haystack, $needle, $offset, 'UTF-8'); + } } /** - * UTF-8 ONLY safe strrpos(), uses iconv. + * UTF-8 ONLY safe strrpos(), uses mbstring, falls back to iconv. * * @param string $haystack * @param string $needle * @return string */ public static function strrpos($haystack, $needle) { - return iconv_strrpos($haystack, $needle, 'utf-8'); + if (function_exists('mb_strpos')) { + return mb_strrpos($haystack, $needle, null, 'UTF-8'); + } else { + return iconv_strrpos($haystack, $needle, 'UTF-8'); + } } /**