This commit is contained in:
Eloy Lafuente (stronk7) 2021-02-01 23:20:01 +01:00
commit 43b36f6d68
2 changed files with 87 additions and 33 deletions

View File

@ -8352,10 +8352,27 @@ function moodle_setlocale($locale='') {
* Words are defined as things between whitespace.
*
* @category string
* @param string $string The text to be searched for words.
* @param string $string The text to be searched for words. May be HTML.
* @return int The count of words in the specified string
*/
function count_words($string) {
// Before stripping tags, add a space after the close tag of anything that is not obviously inline.
// Also, br is a special case because it definitely delimits a word, but has no close tag.
$string = preg_replace('~
( # Capture the tag we match.
</ # Start of close tag.
(?! # Do not match any of these specific close tag names.
a> | b> | del> | em> | i> |
ins> | s> | small> |
strong> | sub> | sup> | u>
)
\w+ # But, apart from those execptions, match any tag name.
> # End of close tag.
|
<br> | <br\s*/> # Special cases that are not close tags.
)
~x', '$1 ', $string); // Add a space after the close tag.
// Now remove HTML tags.
$string = strip_tags($string);
// Decode HTML entities.
$string = html_entity_decode($string);
@ -8375,11 +8392,12 @@ function count_words($string) {
* Letters are defined as chars not in tags and different from whitespace.
*
* @category string
* @param string $string The text to be searched for letters.
* @param string $string The text to be searched for letters. May be HTML.
* @return int The count of letters in the specified text.
*/
function count_letters($string) {
$string = strip_tags($string); // Tags are out now.
$string = html_entity_decode($string);
$string = preg_replace('/[[:space:]]*/', '', $string); // Whitespace are out now.
return core_text::strlen($string);

View File

@ -3792,39 +3792,75 @@ class core_moodlelib_testcase extends advanced_testcase {
}
/**
* Test function count_words().
* Test function {@see count_words()}.
*
* @dataProvider count_words_testcases
* @param int $expectedcount number of words in $string.
* @param string $string the test string to count the words of.
*/
public function test_count_words() {
$count = count_words("one two three'four");
$this->assertEquals(3, $count);
$count = count_words('one+two threefour');
$this->assertEquals(3, $count);
$count = count_words('one"two three-four');
$this->assertEquals(2, $count);
$count = count_words('one@two three_four');
$this->assertEquals(4, $count);
$count = count_words('one\two three/four');
$this->assertEquals(4, $count);
$count = count_words(' one ... two &nbsp; three...four ');
$this->assertEquals(4, $count);
$count = count_words('one.2 3,four');
$this->assertEquals(4, $count);
$count = count_words('1³ £2 €3.45 $6,789');
$this->assertEquals(4, $count);
$count = count_words('one—two ブルース カンベッル');
$this->assertEquals(4, $count);
$count = count_words('one…two ブルース … カンベッル');
$this->assertEquals(4, $count);
public function test_count_words(int $expectedcount, string $string): void {
$this->assertEquals($expectedcount, count_words($string));
}
/**
* Data provider for {@see test_count_words}.
*
* @return array of test cases.
*/
public function count_words_testcases(): array {
return [
[0, ''],
[4, 'one two three four'],
[3, "one two three'four"],
[3, 'one+two threefour'],
[2, 'one"two three-four'],
[4, 'one@two three_four'],
[4, 'one\two three/four'],
[4, '<p>one two<br></br>three four</p>'],
[4, '<p>one two<br>three four</p>'],
[4, '<p>one two<br />three four</p>'], // XHTML style.
[4, ' one ... two &nbsp; three...four '],
[4, 'one.2 3,four'],
[4, '1³ £2 €3.45 $6,789'],
[4, 'one—two ブルース カンベッル'],
[4, 'one…two ブルース … カンベッル'],
[4, '<p>one two</p><p>three four</p>'],
[4, '<p>one two</p><p><br/></p><p>three four</p>'],
[4, '<p>one</p><ul><li>two</li><li>three</li></ul><p>four.</p>'],
[1, '<p>em<b>phas</b>is.</p>'],
[1, '<p>em<i>phas</i>is.</p>'],
[1, '<p>em<strong>phas</strong>is.</p>'],
[1, '<p>em<em>phas</em>is.</p>'],
[2, "one\ntwo"],
[1, "SO<sub>4</sub><sup>2-</sup>"],
];
}
/**
* Test function {@see count_letters()}.
*
* @dataProvider count_letters_testcases
* @param int $expectedcount number of characters in $string.
* @param string $string the test string to count the letters of.
*/
public function test_count_letters(int $expectedcount, string $string): void {
$this->assertEquals($expectedcount, count_letters($string));
}
/**
* Data provider for {@see count_letters_testcases}.
*
* @return array of test cases.
*/
public function count_letters_testcases(): array {
return [
[0, ''],
[1, 'x'],
[1, '&amp;'],
[4, '<p>frog</p>'],
];
}
/**
* Tests the getremoteaddr() function.
*/