mirror of
https://github.com/moodle/moodle.git
synced 2025-04-20 16:04:25 +02:00
MDL-70796 count_words: match the count from LibraOffice & MS Word
This commit is contained in:
parent
4e398ff3f7
commit
6c7cf1123e
@ -8378,14 +8378,14 @@ function count_words($string) {
|
||||
$string = strip_tags($string);
|
||||
// Decode HTML entities.
|
||||
$string = html_entity_decode($string);
|
||||
// Replace underscores (which are classed as word characters) with spaces.
|
||||
$string = preg_replace('/_/u', ' ', $string);
|
||||
// Remove any characters that shouldn't be treated as word boundaries.
|
||||
$string = preg_replace('/[\'"’-]/u', '', $string);
|
||||
// Remove dots and commas from within numbers only.
|
||||
$string = preg_replace('/([0-9])[.,]([0-9])/u', '$1$2', $string);
|
||||
|
||||
return count(preg_split('/\w\b/u', $string)) - 1;
|
||||
// Now, the word count is the number of blocks of characters separated
|
||||
// by any sort of space. That seems to be the definition used by all other systems.
|
||||
// To be precise about what is considered to separate words:
|
||||
// * Anything that Unicode considers a 'Separator'
|
||||
// * Anything that Unicode considers a 'Control character'
|
||||
// * An em- or en- dash.
|
||||
return count(preg_split('~[\p{Z}\p{Cc}—–]+~u', $string, -1, PREG_SPLIT_NO_EMPTY));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -3808,22 +3808,27 @@ class core_moodlelib_testcase extends advanced_testcase {
|
||||
* @return array of test cases.
|
||||
*/
|
||||
public function count_words_testcases(): array {
|
||||
// The counts here should match MS Word and Libre Office.
|
||||
return [
|
||||
[0, ''],
|
||||
[4, 'one two three four'],
|
||||
[3, "one two three'four"],
|
||||
[3, 'one+two three’four'],
|
||||
[2, 'one"two three-four'],
|
||||
[4, 'one@two three_four'],
|
||||
[4, 'one\two three/four'],
|
||||
[1, "a'b"],
|
||||
[1, '1+1=2'],
|
||||
[1, ' one-sided '],
|
||||
[2, 'one two'],
|
||||
[1, 'email@example.com'],
|
||||
[2, 'first\part second/part'],
|
||||
[4, '<p>one two<br></br>three four</p>'],
|
||||
[4, '<p>one two<br>three four</p>'],
|
||||
[4, '<p>one two<br />three four</p>'], // XHTML style.
|
||||
[4, ' one ... two three...four '],
|
||||
[4, 'one.2 3,four'],
|
||||
[3, ' one ... three '],
|
||||
[1, 'just...one'],
|
||||
[3, ' one & three '],
|
||||
[1, 'just&one'],
|
||||
[2, 'em—dash'],
|
||||
[2, 'en–dash'],
|
||||
[4, '1³ £2 €3.45 $6,789'],
|
||||
[4, 'one—two ブルース カンベッル'],
|
||||
[4, 'one…two ブルース … カンベッル'],
|
||||
[2, 'ブルース カンベッル'], // MS word counts this as 11, but we don't handle that yet.
|
||||
[4, '<p>one two</p><p>three four</p>'],
|
||||
[4, '<p>one two</p><p><br/></p><p>three four</p>'],
|
||||
[4, '<p>one</p><ul><li>two</li><li>three</li></ul><p>four.</p>'],
|
||||
@ -3832,7 +3837,12 @@ class core_moodlelib_testcase extends advanced_testcase {
|
||||
[1, '<p>em<strong>phas</strong>is.</p>'],
|
||||
[1, '<p>em<em>phas</em>is.</p>'],
|
||||
[2, "one\ntwo"],
|
||||
[2, "one\rtwo"],
|
||||
[2, "one\ttwo"],
|
||||
[2, "one\vtwo"],
|
||||
[2, "one\ftwo"],
|
||||
[1, "SO<sub>4</sub><sup>2-</sup>"],
|
||||
[6, '4+4=8 i.e. O(1) a,b,c,d I’m black&blue_really'],
|
||||
];
|
||||
}
|
||||
|
||||
|
@ -295,7 +295,7 @@ class qtype_essay_question_test extends advanced_testcase {
|
||||
public function test_get_validation_error(int $responserequired,
|
||||
int $minwordlimit, int $maxwordlimit, string $expected): void {
|
||||
$question = test_question_maker::make_an_essay_question();
|
||||
$response = ['answer' => 'In this essay, I will be testing a function called check_input_word_count().'];
|
||||
$response = ['answer' => 'One two three four five six seven eight nine ten eleven twelve thirteen fourteen.'];
|
||||
$question->responserequired = $responserequired;
|
||||
$question->minwordlimit = $minwordlimit;
|
||||
$question->maxwordlimit = $maxwordlimit;
|
||||
|
Loading…
x
Reference in New Issue
Block a user