publisher: Fix tag collector for nested table elements

Fixes #7318
This commit is contained in:
Bjørn Erik Pedersen
2020-05-25 21:05:59 +02:00
parent 915202494b
commit c950c86b4e
3 changed files with 27 additions and 1 deletions

View File

@@ -116,7 +116,13 @@ func (w *cssClassCollectorWriter) Write(p []byte) (n int, err error) {
w.buff.Reset()
if strings.HasPrefix(s, "</") {
continue
}
s, tagName := w.insertStandinHTMLElement(s)
el := parseHTMLElement(s)
el.Tag = tagName
w.collector.mu.Lock()
w.collector.elementSet[s] = true
@@ -132,6 +138,20 @@ func (w *cssClassCollectorWriter) Write(p []byte) (n int, err error) {
return
}
// The net/html parser does not handle single table elemnts as input, e.g. tbody.
// We only care about the element/class/ids, so just store away the original tag name
// and pretend it's a <div>.
func (c *cssClassCollectorWriter) insertStandinHTMLElement(el string) (string, string) {
tag := el[1:]
spacei := strings.Index(tag, " ")
if spacei != -1 {
tag = tag[:spacei]
}
newv := strings.Replace(el, tag, "div", 1)
return newv, strings.ToLower(tag)
}
func (c *cssClassCollectorWriter) endCollecting(drop bool) {
c.isCollecting = false
c.inQuote = false