Fix Plainify edge cases

This commit replaces the main part of `helpers.StripHTML` with Go's implementation in its html/template package.

It's a little slower, but correctness is more important:

```bash
BenchmarkStripHTMLOld-10    	  680316	      1764 ns/op	     728 B/op	       4 allocs/op
BenchmarkStripHTMLNew-10    	  384520	      3099 ns/op	    2089 B/op	      10 allocs/op
```

Fixes #9199
Fixes #9909
Closes #9410
This commit is contained in:
Bjørn Erik Pedersen
2022-05-25 10:56:14 +02:00
parent cd0112a05a
commit 3854a6fa6c
10 changed files with 103 additions and 85 deletions

View File

@@ -28,3 +28,44 @@ func TestExtractBaseof(t *testing.T) {
c.Assert(extractBaseOf("not baseof for you"), qt.Equals, "")
c.Assert(extractBaseOf("template: blog/baseof.html:23:11:"), qt.Equals, "blog/baseof.html")
}
func TestStripHTML(t *testing.T) {
type test struct {
input, expected string
}
data := []test{
{"<h1>strip h1 tag <h1>", "strip h1 tag "},
{"<p> strip p tag </p>", " strip p tag "},
{"</br> strip br<br>", " strip br\n"},
{"</br> strip br2<br />", " strip br2\n"},
{"This <strong>is</strong> a\nnewline", "This is a newline"},
{"No Tags", "No Tags"},
{`<p>Summary Next Line.
<figure >
<img src="/not/real" />
</figure>
.
More text here.</p>
<p>Some more text</p>`, "Summary Next Line. . More text here.\nSome more text\n"},
// Issue 9199
{"<div data-action='click->my-controller#doThing'>qwe</div>", "qwe"},
{"Hello, World!", "Hello, World!"},
{"foo&amp;bar", "foo&amp;bar"},
{`Hello <a href="www.example.com/">World</a>!`, "Hello World!"},
{"Foo <textarea>Bar</textarea> Baz", "Foo Bar Baz"},
{"Foo <!-- Bar --> Baz", "Foo Baz"},
}
for i, d := range data {
output := StripHTML(d.input)
if d.expected != output {
t.Errorf("Test %d failed. Expected %q got %q", i, d.expected, output)
}
}
}
const tstHTMLContent = "<!DOCTYPE html><html><head><script src=\"http://two/foobar.js\"></script></head><body><nav><ul><li hugo-nav=\"section_0\"></li><li hugo-nav=\"section_1\"></li></ul></nav><article>content <a href=\"http://two/foobar\">foobar</a>. Follow up</article><p>This is some text.<br>And some more.</p></body></html>"