mirror of
https://github.com/moodle/moodle.git
synced 2025-06-08 09:05:59 +02:00
MDL-46256 count_words: fix handling of paragraphs
This commit is contained in:
parent
82a050d67b
commit
6a62cbe599
@ -8352,10 +8352,27 @@ function moodle_setlocale($locale='') {
|
|||||||
* Words are defined as things between whitespace.
|
* Words are defined as things between whitespace.
|
||||||
*
|
*
|
||||||
* @category string
|
* @category string
|
||||||
* @param string $string The text to be searched for words.
|
* @param string $string The text to be searched for words. May be HTML.
|
||||||
* @return int The count of words in the specified string
|
* @return int The count of words in the specified string
|
||||||
*/
|
*/
|
||||||
function count_words($string) {
|
function count_words($string) {
|
||||||
|
// Before stripping tags, add a space after the close tag of anything that is not obviously inline.
|
||||||
|
// Also, br is a special case because it definitely delimits a word, but has no close tag.
|
||||||
|
$string = preg_replace('~
|
||||||
|
( # Capture the tag we match.
|
||||||
|
</ # Start of close tag.
|
||||||
|
(?! # Do not match any of these specific close tag names.
|
||||||
|
a> | b> | del> | em> | i> |
|
||||||
|
ins> | s> | small> |
|
||||||
|
strong> | sub> | sup> | u>
|
||||||
|
)
|
||||||
|
\w+ # But, apart from those execptions, match any tag name.
|
||||||
|
> # End of close tag.
|
||||||
|
|
|
||||||
|
<br> | <br\s*/> # Special cases that are not close tags.
|
||||||
|
)
|
||||||
|
~x', '$1 ', $string); // Add a space after the close tag.
|
||||||
|
// Now remove HTML tags.
|
||||||
$string = strip_tags($string);
|
$string = strip_tags($string);
|
||||||
// Decode HTML entities.
|
// Decode HTML entities.
|
||||||
$string = html_entity_decode($string);
|
$string = html_entity_decode($string);
|
||||||
|
@ -3792,7 +3792,7 @@ class core_moodlelib_testcase extends advanced_testcase {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test function count_words().
|
* Test function {@see count_words()}.
|
||||||
*
|
*
|
||||||
* @dataProvider count_words_testcases
|
* @dataProvider count_words_testcases
|
||||||
* @param int $expectedcount number of words in $string.
|
* @param int $expectedcount number of words in $string.
|
||||||
@ -3809,16 +3809,30 @@ class core_moodlelib_testcase extends advanced_testcase {
|
|||||||
*/
|
*/
|
||||||
public function count_words_testcases(): array {
|
public function count_words_testcases(): array {
|
||||||
return [
|
return [
|
||||||
|
[0, ''],
|
||||||
|
[4, 'one two three four'],
|
||||||
[3, "one two three'four"],
|
[3, "one two three'four"],
|
||||||
[3, 'one+two three’four'],
|
[3, 'one+two three’four'],
|
||||||
[2, 'one"two three-four'],
|
[2, 'one"two three-four'],
|
||||||
[4, 'one@two three_four'],
|
[4, 'one@two three_four'],
|
||||||
[4, 'one\two three/four'],
|
[4, 'one\two three/four'],
|
||||||
|
[4, '<p>one two<br></br>three four</p>'],
|
||||||
|
[4, '<p>one two<br>three four</p>'],
|
||||||
|
[4, '<p>one two<br />three four</p>'], // XHTML style.
|
||||||
[4, ' one ... two three...four '],
|
[4, ' one ... two three...four '],
|
||||||
[4, 'one.2 3,four'],
|
[4, 'one.2 3,four'],
|
||||||
[4, '1³ £2 €3.45 $6,789'],
|
[4, '1³ £2 €3.45 $6,789'],
|
||||||
[4, 'one—two ブルース カンベッル'],
|
[4, 'one—two ブルース カンベッル'],
|
||||||
[4, 'one…two ブルース … カンベッル'],
|
[4, 'one…two ブルース … カンベッル'],
|
||||||
|
[4, '<p>one two</p><p>three four</p>'],
|
||||||
|
[4, '<p>one two</p><p><br/></p><p>three four</p>'],
|
||||||
|
[4, '<p>one</p><ul><li>two</li><li>three</li></ul><p>four.</p>'],
|
||||||
|
[1, '<p>em<b>phas</b>is.</p>'],
|
||||||
|
[1, '<p>em<i>phas</i>is.</p>'],
|
||||||
|
[1, '<p>em<strong>phas</strong>is.</p>'],
|
||||||
|
[1, '<p>em<em>phas</em>is.</p>'],
|
||||||
|
[2, "one\ntwo"],
|
||||||
|
[1, "SO<sub>4</sub><sup>2-</sup>"],
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user