markup/goldmark: Sanitize TOC heading titles

Fixes #13401
This commit is contained in:
Joe Mooring
2025-08-10 15:12:31 -07:00
committed by Bjørn Erik Pedersen
parent f5245a7d5f
commit 5fdcc09062
4 changed files with 109 additions and 41 deletions

View File

@@ -15,7 +15,10 @@ package goldmark
import (
"bytes"
"regexp"
"strings"
"github.com/microcosm-cc/bluemonday"
strikethroughAst "github.com/yuin/goldmark/extension/ast"
emojiAst "github.com/yuin/goldmark-emoji/ast"
@@ -61,7 +64,7 @@ func (t *tocTransformer) Transform(n *ast.Document, reader text.Reader, pc parse
s := ast.WalkStatus(ast.WalkContinue)
if n.Kind() == ast.KindHeading {
if inHeading && !entering {
tocHeading.Title = headingText.String()
tocHeading.Title = sanitizeTOCHeadingTitle(headingText.String())
headingText.Reset()
toc.AddAt(tocHeading, row, level-1)
tocHeading = &tableofcontents.Heading{}
@@ -139,3 +142,40 @@ func (e *tocExtension) Extend(m goldmark.Markdown) {
// This must run after the ID generation (priority 100).
110)))
}
var tocSanitizerPolicy = newTOCSanitizerPolicy()
// newTOCSanitizerPolicy returns a bluemonday policy for sanitizing TOC heading
// titles against an allowlist of inline HTML elements and attributes,
// specifically excluding anchor elements to prevent links within TOC heading
// titles.
func newTOCSanitizerPolicy() *bluemonday.Policy {
p := bluemonday.NewPolicy()
p.AllowElements(
"abbr", "b", "bdi", "bdo", "br", "cite", "code", "data", "del", "dfn",
"em", "i", "ins", "kbd", "mark", "q", "rp", "rt", "ruby", "s", "samp",
"small", "span", "strong", "sub", "sup", "time", "u", "var", "wbr",
)
p.AllowStandardAttributes()
p.AllowStyling()
p.AllowImages()
p.AllowAttrs("cite").OnElements("del", "ins", "q")
p.AllowAttrs("datetime").OnElements("del", "ins", "time")
p.AllowAttrs("value").OnElements("data")
return p
}
var whiteSpaceRe = regexp.MustCompile(`\s+`)
// sanitizeTOCHeadingTitle sanitizes s for use as a TOC heading title.
func sanitizeTOCHeadingTitle(s string) string {
if strings.IndexByte(s, '<') == -1 {
return s
}
// Sanitize the string.
ss := tocSanitizerPolicy.Sanitize(s)
// Remove extraneous whitespace.
return whiteSpaceRe.ReplaceAllString(strings.TrimSpace(ss), " ")
}

View File

@@ -90,6 +90,11 @@ title: p6 (strikethrough)
title: p7 (emoji)
---
## A :snake: emoji
-- content/p8.md --
---
title: p8 (link)
---
## A [link](https://example.org)
`
b := hugolib.Test(t, files)
@@ -111,36 +116,41 @@ title: p7 (emoji)
</nav>`)
// markdown
b.AssertFileContent("public/p2/index.html", `<nav id="TableOfContents">
<li><a href="#">Some <em>emphasized</em> text</a></li>
<li><a href="#">Some <code>inline</code> code</a></li>
<li><a href="#">Something to escape A &lt; B &amp;&amp; C &gt; B</a></li>
`)
b.AssertFileContent("public/p2/index.html",
`<li><a href="#">Some <em>emphasized</em> text</a></li>`,
`<li><a href="#">Some <code>inline</code> code</a></li>`,
`<li><a href="#">Something to escape A &lt; B &amp;&amp; C &gt; B</a></li>`,
)
// image
b.AssertFileContent("public/p3/index.html", `
<li><a href="#">An image <img src="a.jpg" alt="kitten"></a></li>
`)
b.AssertFileContent("public/p3/index.html",
`<li><a href="#">An image <img src="a.jpg" alt="kitten"></a></li>`,
)
// raw html
b.AssertFileContent("public/p4/index.html", `
<li><a href="#">Some <!-- raw HTML omitted -->raw<!-- raw HTML omitted --> HTML</a></li>
`)
b.AssertFileContent("public/p4/index.html",
`<li><a href="#">Some raw HTML</a></li>`,
)
// typographer
b.AssertFileContent("public/p5/index.html", `
<li><a href="#">Some &quot;typographer&quot; markup</a></li>
`)
b.AssertFileContent("public/p5/index.html",
`<li><a href="#">Some &quot;typographer&quot; markup</a></li>`,
)
// strikethrough
b.AssertFileContent("public/p6/index.html", `
<li><a href="#">Some ~~deleted~~ text</a></li>
`)
b.AssertFileContent("public/p6/index.html",
`<li><a href="#">Some ~~deleted~~ text</a></li>`,
)
// emoji
b.AssertFileContent("public/p7/index.html", `
<li><a href="#">A :snake: emoji</a></li>
`)
b.AssertFileContent("public/p7/index.html",
`<li><a href="#">A :snake: emoji</a></li>`,
)
// link
b.AssertFileContent("public/p8/index.html",
`<li><a href="#">A link</a></li>`,
)
}
func TestTableOfContentsAdvanced(t *testing.T) {
@@ -214,6 +224,11 @@ title: p6 (strikethrough)
title: p7 (emoji)
---
## A :snake: emoji
-- content/p8.md --
---
title: p8 (link)
---
## A [link](https://example.org)
`
b := hugolib.Test(t, files)
@@ -231,37 +246,41 @@ title: p7 (emoji)
</nav>`)
// markdown
b.AssertFileContent("public/p2/index.html", `<nav id="TableOfContents">
<li><a href="#some-emphasized-text">Some <em>emphasized</em> text</a></li>
<li><a href="#some-inline-code">Some <code>inline</code> code</a></li>
<li><a href="#something-to-escape-a--b--c--b">Something to escape A &lt; B &amp;&amp; C &gt; B</a></li>
`)
b.AssertFileContent("public/p2/index.html",
`<li><a href="#some-emphasized-text">Some <em>emphasized</em> text</a></li>`,
`<li><a href="#some-inline-code">Some <code>inline</code> code</a></li>`,
`<li><a href="#something-to-escape-a--b--c--b">Something to escape A &lt; B &amp;&amp; C &gt; B</a></li>`,
)
// image
b.AssertFileContent("public/p3/index.html", `
<li><a href="#an-image-kitten">An image <img src="a.jpg" alt="kitten" /></a></li>
`)
b.AssertFileContent("public/p3/index.html",
`<li><a href="#an-image-kitten">An image <img src="a.jpg" alt="kitten"/></a></li>`,
)
// raw html
b.AssertFileContent("public/p4/index.html", `
<li><a href="#some-raw-html">Some <span>raw</span> HTML</a></li>
`)
b.AssertFileContent("public/p4/index.html",
`<li><a href="#some-raw-html">Some <span>raw</span> HTML</a></li>`,
)
// typographer
b.AssertFileContent("public/p5/index.html", `
<li><a href="#some-typographer-markup">Some &ldquo;typographer&rdquo; markup</a></li>
`)
b.AssertFileContent("public/p5/index.html",
`<li><a href="#some-typographer-markup">Some &ldquo;typographer&rdquo; markup</a></li>`,
)
// strikethrough
b.AssertFileContent("public/p6/index.html", `
<li><a href="#some-deleted-text">Some <del>deleted</del> text</a></li>
`)
b.AssertFileContent("public/p6/index.html",
`<li><a href="#some-deleted-text">Some <del>deleted</del> text</a></li>`,
)
// emoji
b.AssertFileContent("public/p7/index.html",
`<li><a href="#a-snake-emoji">A &#x1f40d; emoji</a></li>`,
)
b.AssertFileContent("public/p7/index.html", `
<li><a href="#a-snake-emoji">A &#x1f40d; emoji</a></li>
`)
// link
b.AssertFileContent("public/p8/index.html",
`<li><a href="#a-link">A link</a></li>`,
)
}
func TestIssue13416(t *testing.T) {