mirror of
https://github.com/gohugoio/hugo.git
synced 2025-08-31 22:41:53 +02:00
Fix Plainify edge cases
This commit replaces the main part of `helpers.StripHTML` with Go's implementation in its html/template package. It's a little slower, but correctness is more important: ```bash BenchmarkStripHTMLOld-10 680316 1764 ns/op 728 B/op 4 allocs/op BenchmarkStripHTMLNew-10 384520 3099 ns/op 2089 B/op 10 allocs/op ``` Fixes #9199 Fixes #9909 Closes #9410
This commit is contained in:
@@ -18,9 +18,14 @@ import (
|
||||
"io"
|
||||
"reflect"
|
||||
"regexp"
|
||||
"strings"
|
||||
"unicode"
|
||||
|
||||
bp "github.com/gohugoio/hugo/bufferpool"
|
||||
|
||||
"github.com/gohugoio/hugo/output"
|
||||
|
||||
htmltemplate "github.com/gohugoio/hugo/tpl/internal/go_templates/htmltemplate"
|
||||
texttemplate "github.com/gohugoio/hugo/tpl/internal/go_templates/texttemplate"
|
||||
)
|
||||
|
||||
@@ -163,3 +168,44 @@ func GetHasLockFromContext(ctx context.Context) bool {
|
||||
func SetHasLockInContext(ctx context.Context, hasLock bool) context.Context {
|
||||
return context.WithValue(ctx, texttemplate.HasLockContextKey, hasLock)
|
||||
}
|
||||
|
||||
const hugoNewLinePlaceholder = "___hugonl_"
|
||||
|
||||
var (
|
||||
stripHTMLReplacerPre = strings.NewReplacer("\n", " ", "</p>", hugoNewLinePlaceholder, "<br>", hugoNewLinePlaceholder, "<br />", hugoNewLinePlaceholder)
|
||||
whitespaceRe = regexp.MustCompile(`\s+`)
|
||||
)
|
||||
|
||||
// StripHTML strips out all HTML tags in s.
|
||||
func StripHTML(s string) string {
|
||||
// Shortcut strings with no tags in them
|
||||
if !strings.ContainsAny(s, "<>") {
|
||||
return s
|
||||
}
|
||||
|
||||
pre := stripHTMLReplacerPre.Replace(s)
|
||||
preReplaced := pre != s
|
||||
|
||||
s = htmltemplate.StripTags(pre)
|
||||
|
||||
if preReplaced {
|
||||
s = strings.ReplaceAll(s, hugoNewLinePlaceholder, "\n")
|
||||
}
|
||||
|
||||
var wasSpace bool
|
||||
b := bp.GetBuffer()
|
||||
defer bp.PutBuffer(b)
|
||||
for _, r := range s {
|
||||
isSpace := unicode.IsSpace(r)
|
||||
if !(isSpace && wasSpace) {
|
||||
b.WriteRune(r)
|
||||
}
|
||||
wasSpace = isSpace
|
||||
}
|
||||
|
||||
if b.Len() > 0 {
|
||||
s = b.String()
|
||||
}
|
||||
|
||||
return s
|
||||
}
|
||||
|
Reference in New Issue
Block a user