mirror of
https://github.com/gohugoio/hugo.git
synced 2025-08-31 22:41:53 +02:00
Fix Plainify edge cases
This commit replaces the main part of `helpers.StripHTML` with Go's implementation in its html/template package. It's a little slower, but correctness is more important: ```bash BenchmarkStripHTMLOld-10 680316 1764 ns/op 728 B/op 4 allocs/op BenchmarkStripHTMLNew-10 384520 3099 ns/op 2089 B/op 10 allocs/op ``` Fixes #9199 Fixes #9909 Closes #9410
This commit is contained in:
@@ -34,7 +34,6 @@ import (
|
||||
|
||||
"github.com/gohugoio/hugo/markup"
|
||||
|
||||
bp "github.com/gohugoio/hugo/bufferpool"
|
||||
"github.com/gohugoio/hugo/config"
|
||||
)
|
||||
|
||||
@@ -104,45 +103,6 @@ func NewContentSpec(cfg config.Provider, logger loggers.Logger, contentFs afero.
|
||||
return spec, nil
|
||||
}
|
||||
|
||||
var stripHTMLReplacer = strings.NewReplacer("\n", " ", "</p>", "\n", "<br>", "\n", "<br />", "\n")
|
||||
|
||||
// StripHTML accepts a string, strips out all HTML tags and returns it.
|
||||
func StripHTML(s string) string {
|
||||
// Shortcut strings with no tags in them
|
||||
if !strings.ContainsAny(s, "<>") {
|
||||
return s
|
||||
}
|
||||
s = stripHTMLReplacer.Replace(s)
|
||||
|
||||
// Walk through the string removing all tags
|
||||
b := bp.GetBuffer()
|
||||
defer bp.PutBuffer(b)
|
||||
var inTag, isSpace, wasSpace bool
|
||||
for _, r := range s {
|
||||
if !inTag {
|
||||
isSpace = false
|
||||
}
|
||||
|
||||
switch {
|
||||
case r == '<':
|
||||
inTag = true
|
||||
case r == '>':
|
||||
inTag = false
|
||||
case unicode.IsSpace(r):
|
||||
isSpace = true
|
||||
fallthrough
|
||||
default:
|
||||
if !inTag && (!isSpace || (isSpace && !wasSpace)) {
|
||||
b.WriteRune(r)
|
||||
}
|
||||
}
|
||||
|
||||
wasSpace = isSpace
|
||||
|
||||
}
|
||||
return b.String()
|
||||
}
|
||||
|
||||
// stripEmptyNav strips out empty <nav> tags from content.
|
||||
func stripEmptyNav(in []byte) []byte {
|
||||
return bytes.Replace(in, []byte("<nav>\n</nav>\n\n"), []byte(``), -1)
|
||||
|
@@ -52,44 +52,6 @@ func TestTrimShortHTML(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestStripHTML(t *testing.T) {
|
||||
type test struct {
|
||||
input, expected string
|
||||
}
|
||||
data := []test{
|
||||
{"<h1>strip h1 tag <h1>", "strip h1 tag "},
|
||||
{"<p> strip p tag </p>", " strip p tag "},
|
||||
{"</br> strip br<br>", " strip br\n"},
|
||||
{"</br> strip br2<br />", " strip br2\n"},
|
||||
{"This <strong>is</strong> a\nnewline", "This is a newline"},
|
||||
{"No Tags", "No Tags"},
|
||||
{`<p>Summary Next Line.
|
||||
<figure >
|
||||
|
||||
<img src="/not/real" />
|
||||
|
||||
|
||||
</figure>
|
||||
.
|
||||
More text here.</p>
|
||||
|
||||
<p>Some more text</p>`, "Summary Next Line. . More text here.\nSome more text\n"},
|
||||
}
|
||||
for i, d := range data {
|
||||
output := StripHTML(d.input)
|
||||
if d.expected != output {
|
||||
t.Errorf("Test %d failed. Expected %q got %q", i, d.expected, output)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkStripHTML(b *testing.B) {
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
StripHTML(tstHTMLContent)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStripEmptyNav(t *testing.T) {
|
||||
c := qt.New(t)
|
||||
cleaned := stripEmptyNav([]byte("do<nav>\n</nav>\n\nbedobedo"))
|
||||
|
Reference in New Issue
Block a user