mirror of
https://github.com/moodle/moodle.git
synced 2025-06-03 14:46:35 +02:00
MDL-46256 count_words: fix handling of paragraphs
This commit is contained in:
parent
82a050d67b
commit
6a62cbe599
@ -8352,10 +8352,27 @@ function moodle_setlocale($locale='') {
|
||||
* Words are defined as things between whitespace.
|
||||
*
|
||||
* @category string
|
||||
* @param string $string The text to be searched for words.
|
||||
* @param string $string The text to be searched for words. May be HTML.
|
||||
* @return int The count of words in the specified string
|
||||
*/
|
||||
function count_words($string) {
|
||||
// Before stripping tags, add a space after the close tag of anything that is not obviously inline.
|
||||
// Also, br is a special case because it definitely delimits a word, but has no close tag.
|
||||
$string = preg_replace('~
|
||||
( # Capture the tag we match.
|
||||
</ # Start of close tag.
|
||||
(?! # Do not match any of these specific close tag names.
|
||||
a> | b> | del> | em> | i> |
|
||||
ins> | s> | small> |
|
||||
strong> | sub> | sup> | u>
|
||||
)
|
||||
\w+ # But, apart from those execptions, match any tag name.
|
||||
> # End of close tag.
|
||||
|
|
||||
<br> | <br\s*/> # Special cases that are not close tags.
|
||||
)
|
||||
~x', '$1 ', $string); // Add a space after the close tag.
|
||||
// Now remove HTML tags.
|
||||
$string = strip_tags($string);
|
||||
// Decode HTML entities.
|
||||
$string = html_entity_decode($string);
|
||||
|
@ -3792,7 +3792,7 @@ class core_moodlelib_testcase extends advanced_testcase {
|
||||
}
|
||||
|
||||
/**
|
||||
* Test function count_words().
|
||||
* Test function {@see count_words()}.
|
||||
*
|
||||
* @dataProvider count_words_testcases
|
||||
* @param int $expectedcount number of words in $string.
|
||||
@ -3809,16 +3809,30 @@ class core_moodlelib_testcase extends advanced_testcase {
|
||||
*/
|
||||
public function count_words_testcases(): array {
|
||||
return [
|
||||
[0, ''],
|
||||
[4, 'one two three four'],
|
||||
[3, "one two three'four"],
|
||||
[3, 'one+two three’four'],
|
||||
[2, 'one"two three-four'],
|
||||
[4, 'one@two three_four'],
|
||||
[4, 'one\two three/four'],
|
||||
[4, '<p>one two<br></br>three four</p>'],
|
||||
[4, '<p>one two<br>three four</p>'],
|
||||
[4, '<p>one two<br />three four</p>'], // XHTML style.
|
||||
[4, ' one ... two three...four '],
|
||||
[4, 'one.2 3,four'],
|
||||
[4, '1³ £2 €3.45 $6,789'],
|
||||
[4, 'one—two ブルース カンベッル'],
|
||||
[4, 'one…two ブルース … カンベッル'],
|
||||
[4, '<p>one two</p><p>three four</p>'],
|
||||
[4, '<p>one two</p><p><br/></p><p>three four</p>'],
|
||||
[4, '<p>one</p><ul><li>two</li><li>three</li></ul><p>four.</p>'],
|
||||
[1, '<p>em<b>phas</b>is.</p>'],
|
||||
[1, '<p>em<i>phas</i>is.</p>'],
|
||||
[1, '<p>em<strong>phas</strong>is.</p>'],
|
||||
[1, '<p>em<em>phas</em>is.</p>'],
|
||||
[2, "one\ntwo"],
|
||||
[1, "SO<sub>4</sub><sup>2-</sup>"],
|
||||
];
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user