From 157d3703c3ec7bd2b25270c55b0d6e6d203f76e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Erik=20Pedersen?= Date: Sat, 15 Feb 2025 17:13:20 +0100 Subject: [PATCH] Add autoID for definition terms Fixes #13403 See #11566 Co-authored-by: Joe Mooring --- markup/goldmark/autoid.go | 42 +++++++-- markup/goldmark/autoid_test.go | 18 ++-- markup/goldmark/convert.go | 12 +-- markup/goldmark/goldmark_config/config.go | 39 ++++++-- .../extensions/attributes/attributes.go | 91 ++++++++++++++++--- .../attributes/attributes_integration_test.go | 74 +++++++++++++++ markup/goldmark/toc.go | 8 +- markup/markup_config/config.go | 8 ++ markup/tableofcontents/tableofcontents.go | 17 +++- 9 files changed, 262 insertions(+), 47 deletions(-) create mode 100644 markup/goldmark/internal/extensions/attributes/attributes_integration_test.go diff --git a/markup/goldmark/autoid.go b/markup/goldmark/autoid.go index e1fdfacb4..89259d33a 100644 --- a/markup/goldmark/autoid.go +++ b/markup/goldmark/autoid.go @@ -26,6 +26,7 @@ import ( "github.com/gohugoio/hugo/common/text" "github.com/yuin/goldmark/ast" + east "github.com/yuin/goldmark/extension/ast" "github.com/yuin/goldmark/parser" "github.com/yuin/goldmark/util" @@ -43,11 +44,11 @@ func sanitizeAnchorName(b []byte, idType string) []byte { func sanitizeAnchorNameWithHook(b []byte, idType string, hook func(buf *bytes.Buffer)) []byte { buf := bp.GetBuffer() - if idType == goldmark_config.AutoHeadingIDTypeBlackfriday { + if idType == goldmark_config.AutoIDTypeBlackfriday { // TODO(bep) make it more efficient. buf.WriteString(blackfriday.SanitizedAnchorName(string(b))) } else { - asciiOnly := idType == goldmark_config.AutoHeadingIDTypeGitHubAscii + asciiOnly := idType == goldmark_config.AutoIDTypeGitHubAscii if asciiOnly { // Normalize it to preserve accents if possible. @@ -90,8 +91,9 @@ func isAlphaNumeric(r rune) bool { var _ parser.IDs = (*idFactory)(nil) type idFactory struct { - idType string - vals map[string]struct{} + idType string + vals map[string]struct{} + duplicates []string } func newIDFactory(idType string) *idFactory { @@ -101,11 +103,28 @@ func newIDFactory(idType string) *idFactory { } } +type stringValuesProvider interface { + StringValues() []string +} + +var _ stringValuesProvider = (*idFactory)(nil) + +func (ids *idFactory) StringValues() []string { + values := make([]string, 0, len(ids.vals)) + for k := range ids.vals { + values = append(values, k) + } + values = append(values, ids.duplicates...) + return values +} + func (ids *idFactory) Generate(value []byte, kind ast.NodeKind) []byte { return sanitizeAnchorNameWithHook(value, ids.idType, func(buf *bytes.Buffer) { if buf.Len() == 0 { if kind == ast.KindHeading { buf.WriteString("heading") + } else if kind == east.KindDefinitionTerm { + buf.WriteString("term") } else { buf.WriteString("id") } @@ -123,11 +142,18 @@ func (ids *idFactory) Generate(value []byte, kind ast.NodeKind) []byte { buf.Truncate(pos) } } - - ids.vals[buf.String()] = struct{}{} + ids.put(buf.String()) }) } -func (ids *idFactory) Put(value []byte) { - ids.vals[util.BytesToReadOnlyString(value)] = struct{}{} +func (ids *idFactory) put(s string) { + if _, found := ids.vals[s]; found { + ids.duplicates = append(ids.duplicates, s) + } else { + ids.vals[s] = struct{}{} + } +} + +func (ids *idFactory) Put(value []byte) { + ids.put(string(value)) } diff --git a/markup/goldmark/autoid_test.go b/markup/goldmark/autoid_test.go index 0bdb63c12..e0770d86c 100644 --- a/markup/goldmark/autoid_test.go +++ b/markup/goldmark/autoid_test.go @@ -78,9 +78,9 @@ tabspace expect := expectlines[i] c.Run(input, func(c *qt.C) { b := []byte(input) - got := string(sanitizeAnchorName(b, goldmark_config.AutoHeadingIDTypeGitHub)) + got := string(sanitizeAnchorName(b, goldmark_config.AutoIDTypeGitHub)) c.Assert(got, qt.Equals, expect) - c.Assert(sanitizeAnchorNameString(input, goldmark_config.AutoHeadingIDTypeGitHub), qt.Equals, expect) + c.Assert(sanitizeAnchorNameString(input, goldmark_config.AutoIDTypeGitHub), qt.Equals, expect) c.Assert(string(b), qt.Equals, input) }) } @@ -89,20 +89,20 @@ tabspace func TestSanitizeAnchorNameAsciiOnly(t *testing.T) { c := qt.New(t) - c.Assert(sanitizeAnchorNameString("god is神真美好 good", goldmark_config.AutoHeadingIDTypeGitHubAscii), qt.Equals, "god-is-good") - c.Assert(sanitizeAnchorNameString("Resumé", goldmark_config.AutoHeadingIDTypeGitHubAscii), qt.Equals, "resume") + c.Assert(sanitizeAnchorNameString("god is神真美好 good", goldmark_config.AutoIDTypeGitHubAscii), qt.Equals, "god-is-good") + c.Assert(sanitizeAnchorNameString("Resumé", goldmark_config.AutoIDTypeGitHubAscii), qt.Equals, "resume") } func TestSanitizeAnchorNameBlackfriday(t *testing.T) { c := qt.New(t) - c.Assert(sanitizeAnchorNameString("Let's try this, shall we?", goldmark_config.AutoHeadingIDTypeBlackfriday), qt.Equals, "let-s-try-this-shall-we") + c.Assert(sanitizeAnchorNameString("Let's try this, shall we?", goldmark_config.AutoIDTypeBlackfriday), qt.Equals, "let-s-try-this-shall-we") } func BenchmarkSanitizeAnchorName(b *testing.B) { input := []byte("God is good: 神真美好") b.ResetTimer() for i := 0; i < b.N; i++ { - result := sanitizeAnchorName(input, goldmark_config.AutoHeadingIDTypeGitHub) + result := sanitizeAnchorName(input, goldmark_config.AutoIDTypeGitHub) if len(result) != 24 { b.Fatalf("got %d", len(result)) } @@ -113,7 +113,7 @@ func BenchmarkSanitizeAnchorNameAsciiOnly(b *testing.B) { input := []byte("God is good: 神真美好") b.ResetTimer() for i := 0; i < b.N; i++ { - result := sanitizeAnchorName(input, goldmark_config.AutoHeadingIDTypeGitHubAscii) + result := sanitizeAnchorName(input, goldmark_config.AutoIDTypeGitHubAscii) if len(result) != 12 { b.Fatalf("got %d", len(result)) } @@ -124,7 +124,7 @@ func BenchmarkSanitizeAnchorNameBlackfriday(b *testing.B) { input := []byte("God is good: 神真美好") b.ResetTimer() for i := 0; i < b.N; i++ { - result := sanitizeAnchorName(input, goldmark_config.AutoHeadingIDTypeBlackfriday) + result := sanitizeAnchorName(input, goldmark_config.AutoIDTypeBlackfriday) if len(result) != 24 { b.Fatalf("got %d", len(result)) } @@ -135,7 +135,7 @@ func BenchmarkSanitizeAnchorNameString(b *testing.B) { input := "God is good: 神真美好" b.ResetTimer() for i := 0; i < b.N; i++ { - result := sanitizeAnchorNameString(input, goldmark_config.AutoHeadingIDTypeGitHub) + result := sanitizeAnchorNameString(input, goldmark_config.AutoIDTypeGitHub) if len(result) != 24 { b.Fatalf("got %d", len(result)) } diff --git a/markup/goldmark/convert.go b/markup/goldmark/convert.go index 823a43c9d..ceacd150e 100644 --- a/markup/goldmark/convert.go +++ b/markup/goldmark/convert.go @@ -61,7 +61,7 @@ func (p provide) New(cfg converter.ProviderConfig) (converter.Provider, error) { cfg: cfg, md: md, sanitizeAnchorName: func(s string) string { - return sanitizeAnchorNameString(s, cfg.MarkupConfig().Goldmark.Parser.AutoHeadingIDType) + return sanitizeAnchorNameString(s, cfg.MarkupConfig().Goldmark.Parser.AutoIDType) }, }, nil }), nil @@ -188,16 +188,12 @@ func newMarkdown(pcfg converter.ProviderConfig) goldmark.Markdown { extensions = append(extensions, emoji.Emoji) } - if cfg.Parser.AutoHeadingID { - parserOptions = append(parserOptions, parser.WithAutoHeadingID()) - } - if cfg.Parser.Attribute.Title { parserOptions = append(parserOptions, parser.WithAttribute()) } - if cfg.Parser.Attribute.Block { - extensions = append(extensions, attributes.New()) + if cfg.Parser.Attribute.Block || cfg.Parser.AutoHeadingID || cfg.Parser.AutoDefinitionTermID { + extensions = append(extensions, attributes.New(cfg.Parser)) } md := goldmark.New( @@ -295,7 +291,7 @@ func (c *goldmarkConverter) Convert(ctx converter.RenderContext) (converter.Resu } func (c *goldmarkConverter) newParserContext(rctx converter.RenderContext) *parserContext { - ctx := parser.NewContext(parser.WithIDs(newIDFactory(c.cfg.MarkupConfig().Goldmark.Parser.AutoHeadingIDType))) + ctx := parser.NewContext(parser.WithIDs(newIDFactory(c.cfg.MarkupConfig().Goldmark.Parser.AutoIDType))) ctx.Set(tocEnableKey, rctx.RenderTOC) return &parserContext{ Context: ctx, diff --git a/markup/goldmark/goldmark_config/config.go b/markup/goldmark/goldmark_config/config.go index c6e0bcd3d..04eb371d9 100644 --- a/markup/goldmark/goldmark_config/config.go +++ b/markup/goldmark/goldmark_config/config.go @@ -15,9 +15,9 @@ package goldmark_config const ( - AutoHeadingIDTypeGitHub = "github" - AutoHeadingIDTypeGitHubAscii = "github-ascii" - AutoHeadingIDTypeBlackfriday = "blackfriday" + AutoIDTypeGitHub = "github" + AutoIDTypeGitHubAscii = "github-ascii" + AutoIDTypeBlackfriday = "blackfriday" ) // Default holds the default Goldmark configuration. @@ -79,7 +79,8 @@ var Default = Config{ }, Parser: Parser{ AutoHeadingID: true, - AutoHeadingIDType: AutoHeadingIDTypeGitHub, + AutoDefinitionTermID: false, + AutoIDType: AutoIDTypeGitHub, WrapStandAloneImageWithinParagraph: true, Attribute: ParserAttribute{ Title: true, @@ -97,6 +98,16 @@ type Config struct { RenderHooks RenderHooks } +func (c *Config) Init() error { + if err := c.Parser.Init(); err != nil { + return err + } + if c.Parser.AutoDefinitionTermID && !c.Extensions.DefinitionList { + c.Parser.AutoDefinitionTermID = false + } + return nil +} + // RenderHooks contains configuration for Goldmark render hooks. type RenderHooks struct { Image ImageRenderHook @@ -250,16 +261,30 @@ type Parser struct { // auto generated heading ids. AutoHeadingID bool - // The strategy to use when generating heading IDs. - // Available options are "github", "github-ascii". + // Enables auto definition term ids. + AutoDefinitionTermID bool + + // The strategy to use when generating IDs. + // Available options are "github", "github-ascii", and "blackfriday". // Default is "github", which will create GitHub-compatible anchor names. - AutoHeadingIDType string + AutoIDType string // Enables custom attributes. Attribute ParserAttribute // Whether to wrap stand-alone images within a paragraph or not. WrapStandAloneImageWithinParagraph bool + + // Renamed to AutoIDType in 0.144.0. + AutoHeadingIDType string `json:"-"` +} + +func (p *Parser) Init() error { + // Renamed from AutoHeadingIDType to AutoIDType in 0.144.0. + if p.AutoHeadingIDType != "" { + p.AutoIDType = p.AutoHeadingIDType + } + return nil } type ParserAttribute struct { diff --git a/markup/goldmark/internal/extensions/attributes/attributes.go b/markup/goldmark/internal/extensions/attributes/attributes.go index feb3d915b..526635f45 100644 --- a/markup/goldmark/internal/extensions/attributes/attributes.go +++ b/markup/goldmark/internal/extensions/attributes/attributes.go @@ -1,8 +1,11 @@ package attributes import ( + "github.com/gohugoio/hugo/markup/goldmark/goldmark_config" + "github.com/gohugoio/hugo/markup/goldmark/internal/render" "github.com/yuin/goldmark" "github.com/yuin/goldmark/ast" + east "github.com/yuin/goldmark/extension/ast" "github.com/yuin/goldmark/parser" "github.com/yuin/goldmark/text" "github.com/yuin/goldmark/util" @@ -14,24 +17,29 @@ import ( var ( kindAttributesBlock = ast.NewNodeKind("AttributesBlock") + attrNameID = []byte("id") - defaultParser = new(attrParser) - defaultTransformer = new(transformer) - attributes goldmark.Extender = new(attrExtension) + defaultParser = new(attrParser) ) -func New() goldmark.Extender { - return attributes +func New(cfg goldmark_config.Parser) goldmark.Extender { + return &attrExtension{cfg: cfg} } -type attrExtension struct{} +type attrExtension struct { + cfg goldmark_config.Parser +} func (a *attrExtension) Extend(m goldmark.Markdown) { + if a.cfg.Attribute.Block { + m.Parser().AddOptions( + parser.WithBlockParsers( + util.Prioritized(defaultParser, 100)), + ) + } m.Parser().AddOptions( - parser.WithBlockParsers( - util.Prioritized(defaultParser, 100)), parser.WithASTTransformers( - util.Prioritized(defaultTransformer, 100), + util.Prioritized(&transformer{cfg: a.cfg}, 100), ), ) } @@ -92,18 +100,47 @@ func (a *attributesBlock) Kind() ast.NodeKind { return kindAttributesBlock } -type transformer struct{} +type transformer struct { + cfg goldmark_config.Parser +} + +func (a *transformer) isFragmentNode(n ast.Node) bool { + switch n.Kind() { + case east.KindDefinitionTerm, ast.KindHeading: + return true + default: + return false + } +} func (a *transformer) Transform(node *ast.Document, reader text.Reader, pc parser.Context) { - attributes := make([]ast.Node, 0, 500) + var attributes []ast.Node + if a.cfg.Attribute.Block { + attributes = make([]ast.Node, 0, 500) + } ast.Walk(node, func(node ast.Node, entering bool) (ast.WalkStatus, error) { - if entering && node.Kind() == kindAttributesBlock { + if !entering { + return ast.WalkContinue, nil + } + + if a.isFragmentNode(node) { + if id, found := node.Attribute(attrNameID); !found { + a.generateAutoID(node, reader, pc) + } else { + pc.IDs().Put(id.([]byte)) + } + } + + if a.cfg.Attribute.Block && node.Kind() == kindAttributesBlock { // Attributes for fenced code blocks are handled in their own extension, // but note that we currently only support code block attributes when // CodeFences=true. if node.PreviousSibling() != nil && node.PreviousSibling().Kind() != ast.KindFencedCodeBlock && !node.HasBlankPreviousLines() { attributes = append(attributes, node) return ast.WalkSkipChildren, nil + } else { + // remove attributes node + node.Parent().RemoveChild(node.Parent(), node) } } @@ -123,3 +160,33 @@ func (a *transformer) Transform(node *ast.Document, reader text.Reader, pc parse attr.Parent().RemoveChild(attr.Parent(), attr) } } + +func (a *transformer) generateAutoID(n ast.Node, reader text.Reader, pc parser.Context) { + var text []byte + switch n := n.(type) { + case *ast.Heading: + if a.cfg.AutoHeadingID { + text = textHeadingID(n, reader) + } + case *east.DefinitionTerm: + if a.cfg.AutoDefinitionTermID { + text = []byte(render.TextPlain(n, reader.Source())) + } + } + + if len(text) > 0 { + headingID := pc.IDs().Generate(text, n.Kind()) + n.SetAttribute(attrNameID, headingID) + } +} + +// Markdown settext headers can have multiple lines, use the last line for the ID. +func textHeadingID(node *ast.Heading, reader text.Reader) []byte { + var line []byte + lastIndex := node.Lines().Len() - 1 + if lastIndex > -1 { + lastLine := node.Lines().At(lastIndex) + line = lastLine.Value(reader.Source()) + } + return line +} diff --git a/markup/goldmark/internal/extensions/attributes/attributes_integration_test.go b/markup/goldmark/internal/extensions/attributes/attributes_integration_test.go new file mode 100644 index 000000000..fcce68ac2 --- /dev/null +++ b/markup/goldmark/internal/extensions/attributes/attributes_integration_test.go @@ -0,0 +1,74 @@ +package attributes_test + +import ( + "testing" + + "github.com/gohugoio/hugo/hugolib" +) + +func TestDescriptionListAutoID(t *testing.T) { + t.Parallel() + + files := ` +-- hugo.toml -- +[markup.goldmark.parser] +autoHeadingID = true +autoDefinitionTermID = true +autoIDType = 'github-ascii' +-- content/p1.md -- +--- +title: "Title" +--- + +## Title with id set {#title-with-id} + +## Title with id set duplicate {#title-with-id} + +## My Title + +Base Name +: Base name of the file. + +Base Name +: Duplicate term name. + +My Title +: Term with same name as title. + +Foo@Bar +: The foo bar. + +foo [something](/a/b/) bar +: A foo bar. + +良善天父 +: The good father. + +Ā ā Ă ă Ą ą Ć ć Ĉ ĉ Ċ ċ Č č Ď +: Testing accents. + +Mutiline set text header +Second line +--------------- + +-- layouts/_default/single.html -- +{{ .Content }}|Identifiers: {{ .Fragments.Identifiers }}| +` + + b := hugolib.Test(t, files) + + b.AssertFileContent("public/p1/index.html", + `
Base Name
`, + `
Base Name
`, + `
Foo@Bar
`, + `

My Title

`, + `
foo something bar
`, + `

Title with id set

`, + `

Title with id set duplicate

`, + `
My Title
`, + `
良善天父
`, + `
Ā ā Ă ă Ą ą Ć ć Ĉ ĉ Ċ ċ Č č Ď
`, + `

Mutiline set text header`, + "|Identifiers: [a-a-a-a-a-a-c-c-c-c-c-c-c-c-d base-name base-name-1 foo-something-bar foobar my-title my-title-1 second-line term title-with-id title-with-id]|", + ) +} diff --git a/markup/goldmark/toc.go b/markup/goldmark/toc.go index b0f7e703f..538f65df4 100644 --- a/markup/goldmark/toc.go +++ b/markup/goldmark/toc.go @@ -53,6 +53,10 @@ func (t *tocTransformer) Transform(n *ast.Document, reader text.Reader, pc parse headingText bytes.Buffer ) + if ids := pc.IDs().(stringValuesProvider).StringValues(); len(ids) > 0 { + toc.SetIdentifiers(ids) + } + ast.Walk(n, func(n ast.Node, entering bool) (ast.WalkStatus, error) { s := ast.WalkStatus(ast.WalkContinue) if n.Kind() == ast.KindHeading { @@ -131,5 +135,7 @@ func (e *tocExtension) Extend(m goldmark.Markdown) { r.AddOptions(e.options...) m.Parser().AddOptions(parser.WithASTTransformers(util.Prioritized(&tocTransformer{ r: r, - }, 10))) + }, + // This must run after the ID generation (priority 100). + 110))) } diff --git a/markup/markup_config/config.go b/markup/markup_config/config.go index 0350a78a2..e944caae6 100644 --- a/markup/markup_config/config.go +++ b/markup/markup_config/config.go @@ -41,6 +41,10 @@ type Config struct { AsciidocExt asciidocext_config.Config } +func (c *Config) Init() error { + return c.Goldmark.Init() +} + func Decode(cfg config.Provider) (conf Config, err error) { conf = Default @@ -57,6 +61,10 @@ func Decode(cfg config.Provider) (conf Config, err error) { return } + if err = conf.Init(); err != nil { + return + } + if err = highlight.ApplyLegacyConfig(cfg, &conf.Highlight); err != nil { return } diff --git a/markup/tableofcontents/tableofcontents.go b/markup/tableofcontents/tableofcontents.go index 560e421b7..741179d96 100644 --- a/markup/tableofcontents/tableofcontents.go +++ b/markup/tableofcontents/tableofcontents.go @@ -31,7 +31,8 @@ var Empty = &Fragments{ // Builder is used to build the ToC data structure. type Builder struct { - toc *Fragments + identifiersSet bool + toc *Fragments } // AddAt adds the heading to the ToC. @@ -42,6 +43,16 @@ func (b *Builder) AddAt(h *Heading, row, level int) { b.toc.addAt(h, row, level) } +// SetIdentifiers sets the identifiers in the ToC. +func (b *Builder) SetIdentifiers(ids []string) { + if b.toc == nil { + b.toc = &Fragments{} + } + b.identifiersSet = true + sort.Strings(ids) + b.toc.Identifiers = ids +} + // Build returns the ToC. func (b Builder) Build() *Fragments { if b.toc == nil { @@ -51,7 +62,9 @@ func (b Builder) Build() *Fragments { b.toc.walk(func(h *Heading) { if h.ID != "" { b.toc.HeadingsMap[h.ID] = h - b.toc.Identifiers = append(b.toc.Identifiers, h.ID) + if !b.identifiersSet { + b.toc.Identifiers = append(b.toc.Identifiers, h.ID) + } } }) sort.Strings(b.toc.Identifiers)