Merge branch 'MDL-53273-master' of git://github.com/merrill-oakland/moodle

This commit is contained in:
Andrew Nicols 2016-03-09 14:11:17 +08:00
commit 02726a29cc
4 changed files with 78 additions and 2 deletions

View File

@ -248,6 +248,30 @@ class core_text {
return $result;
}
/**
* Truncates a string to no more than a certain number of bytes in a multi-byte safe manner.
* UTF-8 only!
*
* Many of the other charsets we test for (like ISO-2022-JP and EUC-JP) are not supported
* by typo3, and will give invalid results, so we are supporting UTF-8 only.
*
* @param string $string String to truncate
* @param int $bytes Maximum length of bytes in the result
* @return string Portion of string specified by $bytes
* @since Moodle 3.1
*/
public static function str_max_bytes($string, $bytes) {
if (function_exists('mb_strcut')) {
return mb_strcut($string, 0, $bytes, 'UTF-8');
}
$oldlevel = error_reporting(E_PARSE);
$result = self::typo3()->strtrunc('utf-8', $string, $bytes);
error_reporting($oldlevel);
return $result;
}
/**
* Finds the last occurrence of a character in a string within another.
* UTF-8 ONLY safe mb_strrchr().
@ -707,4 +731,4 @@ class core_text {
}
return implode(' ', $words);
}
}
}

View File

@ -179,6 +179,56 @@ class core_text_testcase extends advanced_testcase {
$this->assertSame(4, core_text::strlen($str, 'GB18030'));
}
/**
* Test unicode safe string truncation.
*/
public function test_str_max_bytes() {
// These are all 3 byte characters, so this is a 12-byte string.
$str = '言語設定';
$this->assertEquals(12, strlen($str));
// Step back, shortening the string 1 byte at a time. Should remove in 1 char chunks.
$conv = core_text::str_max_bytes($str, 12);
$this->assertEquals(12, strlen($conv));
$this->assertSame('言語設定', $conv);
$conv = core_text::str_max_bytes($str, 11);
$this->assertEquals(9, strlen($conv));
$this->assertSame('言語設', $conv);
$conv = core_text::str_max_bytes($str, 10);
$this->assertEquals(9, strlen($conv));
$this->assertSame('言語設', $conv);
$conv = core_text::str_max_bytes($str, 9);
$this->assertEquals(9, strlen($conv));
$this->assertSame('言語設', $conv);
$conv = core_text::str_max_bytes($str, 8);
$this->assertEquals(6, strlen($conv));
$this->assertSame('言語', $conv);
// Now try a mixed byte string.
$str = '言語設a定';
$this->assertEquals(13, strlen($str));
$conv = core_text::str_max_bytes($str, 11);
$this->assertEquals(10, strlen($conv));
$this->assertSame('言語設a', $conv);
$conv = core_text::str_max_bytes($str, 10);
$this->assertEquals(10, strlen($conv));
$this->assertSame('言語設a', $conv);
$conv = core_text::str_max_bytes($str, 9);
$this->assertEquals(9, strlen($conv));
$this->assertSame('言語設', $conv);
$conv = core_text::str_max_bytes($str, 8);
$this->assertEquals(6, strlen($conv));
$this->assertSame('言語', $conv);
// Test 0 byte case.
$conv = core_text::str_max_bytes($str, 0);
$this->assertEquals(0, strlen($conv));
$this->assertSame('', $conv);
}
/**
* Tests the static strtolower method.
*/

View File

@ -70,6 +70,8 @@ information provided here is intended especially for developers.
is now a part of \antivirus_clamav\scanner class methods.
* \repository::antivir_scan_file() has been deprecated, \core\antivirus\manager::scan_file() that
applies antivirus plugins is replacing its functionality.
* Added core_text::str_max_bytes() which safely truncates multi-byte strings to a
maximum number of bytes.
=== 3.0 ===

View File

@ -53,7 +53,7 @@ class document extends \core_search\document {
public static function format_string_for_engine($string) {
// 2^15 default. We could convert this to a setting as is possible to
// change the max in solr.
return substr($string, 0, 32766);
return \core_text::str_max_bytes($string, 32766);
}
/**