Fix Plainify edge cases

This commit replaces the main part of `helpers.StripHTML` with Go's implementation in its html/template package.

It's a little slower, but correctness is more important:

```bash
BenchmarkStripHTMLOld-10    	  680316	      1764 ns/op	     728 B/op	       4 allocs/op
BenchmarkStripHTMLNew-10    	  384520	      3099 ns/op	    2089 B/op	      10 allocs/op
```

Fixes #9199
Fixes #9909
Closes #9410
This commit is contained in:
Bjørn Erik Pedersen
2022-05-25 10:56:14 +02:00
parent cd0112a05a
commit 3854a6fa6c
10 changed files with 103 additions and 85 deletions

View File

@@ -25,6 +25,7 @@ import (
"github.com/gohugoio/hugo/common/text"
"github.com/gohugoio/hugo/deps"
"github.com/gohugoio/hugo/helpers"
"github.com/gohugoio/hugo/tpl"
"github.com/spf13/cast"
)
@@ -52,7 +53,7 @@ func (ns *Namespace) CountRunes(s any) (int, error) {
}
counter := 0
for _, r := range helpers.StripHTML(ss) {
for _, r := range tpl.StripHTML(ss) {
if !helpers.IsWhitespace(r) {
counter++
}
@@ -83,11 +84,11 @@ func (ns *Namespace) CountWords(s any) (int, error) {
}
if !isCJKLanguage {
return len(strings.Fields(helpers.StripHTML((ss)))), nil
return len(strings.Fields(tpl.StripHTML(ss))), nil
}
counter := 0
for _, word := range strings.Fields(helpers.StripHTML(ss)) {
for _, word := range strings.Fields(tpl.StripHTML(ss)) {
runeCount := utf8.RuneCountInString(word)
if len(word) == runeCount {
counter++