diff --git a/markup/goldmark/autoid.go b/markup/goldmark/autoid.go index e1fdfacb4..89259d33a 100644 --- a/markup/goldmark/autoid.go +++ b/markup/goldmark/autoid.go @@ -26,6 +26,7 @@ import ( "github.com/gohugoio/hugo/common/text" "github.com/yuin/goldmark/ast" + east "github.com/yuin/goldmark/extension/ast" "github.com/yuin/goldmark/parser" "github.com/yuin/goldmark/util" @@ -43,11 +44,11 @@ func sanitizeAnchorName(b []byte, idType string) []byte { func sanitizeAnchorNameWithHook(b []byte, idType string, hook func(buf *bytes.Buffer)) []byte { buf := bp.GetBuffer() - if idType == goldmark_config.AutoHeadingIDTypeBlackfriday { + if idType == goldmark_config.AutoIDTypeBlackfriday { // TODO(bep) make it more efficient. buf.WriteString(blackfriday.SanitizedAnchorName(string(b))) } else { - asciiOnly := idType == goldmark_config.AutoHeadingIDTypeGitHubAscii + asciiOnly := idType == goldmark_config.AutoIDTypeGitHubAscii if asciiOnly { // Normalize it to preserve accents if possible. @@ -90,8 +91,9 @@ func isAlphaNumeric(r rune) bool { var _ parser.IDs = (*idFactory)(nil) type idFactory struct { - idType string - vals map[string]struct{} + idType string + vals map[string]struct{} + duplicates []string } func newIDFactory(idType string) *idFactory { @@ -101,11 +103,28 @@ func newIDFactory(idType string) *idFactory { } } +type stringValuesProvider interface { + StringValues() []string +} + +var _ stringValuesProvider = (*idFactory)(nil) + +func (ids *idFactory) StringValues() []string { + values := make([]string, 0, len(ids.vals)) + for k := range ids.vals { + values = append(values, k) + } + values = append(values, ids.duplicates...) + return values +} + func (ids *idFactory) Generate(value []byte, kind ast.NodeKind) []byte { return sanitizeAnchorNameWithHook(value, ids.idType, func(buf *bytes.Buffer) { if buf.Len() == 0 { if kind == ast.KindHeading { buf.WriteString("heading") + } else if kind == east.KindDefinitionTerm { + buf.WriteString("term") } else { buf.WriteString("id") } @@ -123,11 +142,18 @@ func (ids *idFactory) Generate(value []byte, kind ast.NodeKind) []byte { buf.Truncate(pos) } } - - ids.vals[buf.String()] = struct{}{} + ids.put(buf.String()) }) } -func (ids *idFactory) Put(value []byte) { - ids.vals[util.BytesToReadOnlyString(value)] = struct{}{} +func (ids *idFactory) put(s string) { + if _, found := ids.vals[s]; found { + ids.duplicates = append(ids.duplicates, s) + } else { + ids.vals[s] = struct{}{} + } +} + +func (ids *idFactory) Put(value []byte) { + ids.put(string(value)) } diff --git a/markup/goldmark/autoid_test.go b/markup/goldmark/autoid_test.go index 0bdb63c12..e0770d86c 100644 --- a/markup/goldmark/autoid_test.go +++ b/markup/goldmark/autoid_test.go @@ -78,9 +78,9 @@ tabspace expect := expectlines[i] c.Run(input, func(c *qt.C) { b := []byte(input) - got := string(sanitizeAnchorName(b, goldmark_config.AutoHeadingIDTypeGitHub)) + got := string(sanitizeAnchorName(b, goldmark_config.AutoIDTypeGitHub)) c.Assert(got, qt.Equals, expect) - c.Assert(sanitizeAnchorNameString(input, goldmark_config.AutoHeadingIDTypeGitHub), qt.Equals, expect) + c.Assert(sanitizeAnchorNameString(input, goldmark_config.AutoIDTypeGitHub), qt.Equals, expect) c.Assert(string(b), qt.Equals, input) }) } @@ -89,20 +89,20 @@ tabspace func TestSanitizeAnchorNameAsciiOnly(t *testing.T) { c := qt.New(t) - c.Assert(sanitizeAnchorNameString("god is神真美好 good", goldmark_config.AutoHeadingIDTypeGitHubAscii), qt.Equals, "god-is-good") - c.Assert(sanitizeAnchorNameString("Resumé", goldmark_config.AutoHeadingIDTypeGitHubAscii), qt.Equals, "resume") + c.Assert(sanitizeAnchorNameString("god is神真美好 good", goldmark_config.AutoIDTypeGitHubAscii), qt.Equals, "god-is-good") + c.Assert(sanitizeAnchorNameString("Resumé", goldmark_config.AutoIDTypeGitHubAscii), qt.Equals, "resume") } func TestSanitizeAnchorNameBlackfriday(t *testing.T) { c := qt.New(t) - c.Assert(sanitizeAnchorNameString("Let's try this, shall we?", goldmark_config.AutoHeadingIDTypeBlackfriday), qt.Equals, "let-s-try-this-shall-we") + c.Assert(sanitizeAnchorNameString("Let's try this, shall we?", goldmark_config.AutoIDTypeBlackfriday), qt.Equals, "let-s-try-this-shall-we") } func BenchmarkSanitizeAnchorName(b *testing.B) { input := []byte("God is good: 神真美好") b.ResetTimer() for i := 0; i < b.N; i++ { - result := sanitizeAnchorName(input, goldmark_config.AutoHeadingIDTypeGitHub) + result := sanitizeAnchorName(input, goldmark_config.AutoIDTypeGitHub) if len(result) != 24 { b.Fatalf("got %d", len(result)) } @@ -113,7 +113,7 @@ func BenchmarkSanitizeAnchorNameAsciiOnly(b *testing.B) { input := []byte("God is good: 神真美好") b.ResetTimer() for i := 0; i < b.N; i++ { - result := sanitizeAnchorName(input, goldmark_config.AutoHeadingIDTypeGitHubAscii) + result := sanitizeAnchorName(input, goldmark_config.AutoIDTypeGitHubAscii) if len(result) != 12 { b.Fatalf("got %d", len(result)) } @@ -124,7 +124,7 @@ func BenchmarkSanitizeAnchorNameBlackfriday(b *testing.B) { input := []byte("God is good: 神真美好") b.ResetTimer() for i := 0; i < b.N; i++ { - result := sanitizeAnchorName(input, goldmark_config.AutoHeadingIDTypeBlackfriday) + result := sanitizeAnchorName(input, goldmark_config.AutoIDTypeBlackfriday) if len(result) != 24 { b.Fatalf("got %d", len(result)) } @@ -135,7 +135,7 @@ func BenchmarkSanitizeAnchorNameString(b *testing.B) { input := "God is good: 神真美好" b.ResetTimer() for i := 0; i < b.N; i++ { - result := sanitizeAnchorNameString(input, goldmark_config.AutoHeadingIDTypeGitHub) + result := sanitizeAnchorNameString(input, goldmark_config.AutoIDTypeGitHub) if len(result) != 24 { b.Fatalf("got %d", len(result)) } diff --git a/markup/goldmark/convert.go b/markup/goldmark/convert.go index 823a43c9d..ceacd150e 100644 --- a/markup/goldmark/convert.go +++ b/markup/goldmark/convert.go @@ -61,7 +61,7 @@ func (p provide) New(cfg converter.ProviderConfig) (converter.Provider, error) { cfg: cfg, md: md, sanitizeAnchorName: func(s string) string { - return sanitizeAnchorNameString(s, cfg.MarkupConfig().Goldmark.Parser.AutoHeadingIDType) + return sanitizeAnchorNameString(s, cfg.MarkupConfig().Goldmark.Parser.AutoIDType) }, }, nil }), nil @@ -188,16 +188,12 @@ func newMarkdown(pcfg converter.ProviderConfig) goldmark.Markdown { extensions = append(extensions, emoji.Emoji) } - if cfg.Parser.AutoHeadingID { - parserOptions = append(parserOptions, parser.WithAutoHeadingID()) - } - if cfg.Parser.Attribute.Title { parserOptions = append(parserOptions, parser.WithAttribute()) } - if cfg.Parser.Attribute.Block { - extensions = append(extensions, attributes.New()) + if cfg.Parser.Attribute.Block || cfg.Parser.AutoHeadingID || cfg.Parser.AutoDefinitionTermID { + extensions = append(extensions, attributes.New(cfg.Parser)) } md := goldmark.New( @@ -295,7 +291,7 @@ func (c *goldmarkConverter) Convert(ctx converter.RenderContext) (converter.Resu } func (c *goldmarkConverter) newParserContext(rctx converter.RenderContext) *parserContext { - ctx := parser.NewContext(parser.WithIDs(newIDFactory(c.cfg.MarkupConfig().Goldmark.Parser.AutoHeadingIDType))) + ctx := parser.NewContext(parser.WithIDs(newIDFactory(c.cfg.MarkupConfig().Goldmark.Parser.AutoIDType))) ctx.Set(tocEnableKey, rctx.RenderTOC) return &parserContext{ Context: ctx, diff --git a/markup/goldmark/goldmark_config/config.go b/markup/goldmark/goldmark_config/config.go index c6e0bcd3d..04eb371d9 100644 --- a/markup/goldmark/goldmark_config/config.go +++ b/markup/goldmark/goldmark_config/config.go @@ -15,9 +15,9 @@ package goldmark_config const ( - AutoHeadingIDTypeGitHub = "github" - AutoHeadingIDTypeGitHubAscii = "github-ascii" - AutoHeadingIDTypeBlackfriday = "blackfriday" + AutoIDTypeGitHub = "github" + AutoIDTypeGitHubAscii = "github-ascii" + AutoIDTypeBlackfriday = "blackfriday" ) // Default holds the default Goldmark configuration. @@ -79,7 +79,8 @@ var Default = Config{ }, Parser: Parser{ AutoHeadingID: true, - AutoHeadingIDType: AutoHeadingIDTypeGitHub, + AutoDefinitionTermID: false, + AutoIDType: AutoIDTypeGitHub, WrapStandAloneImageWithinParagraph: true, Attribute: ParserAttribute{ Title: true, @@ -97,6 +98,16 @@ type Config struct { RenderHooks RenderHooks } +func (c *Config) Init() error { + if err := c.Parser.Init(); err != nil { + return err + } + if c.Parser.AutoDefinitionTermID && !c.Extensions.DefinitionList { + c.Parser.AutoDefinitionTermID = false + } + return nil +} + // RenderHooks contains configuration for Goldmark render hooks. type RenderHooks struct { Image ImageRenderHook @@ -250,16 +261,30 @@ type Parser struct { // auto generated heading ids. AutoHeadingID bool - // The strategy to use when generating heading IDs. - // Available options are "github", "github-ascii". + // Enables auto definition term ids. + AutoDefinitionTermID bool + + // The strategy to use when generating IDs. + // Available options are "github", "github-ascii", and "blackfriday". // Default is "github", which will create GitHub-compatible anchor names. - AutoHeadingIDType string + AutoIDType string // Enables custom attributes. Attribute ParserAttribute // Whether to wrap stand-alone images within a paragraph or not. WrapStandAloneImageWithinParagraph bool + + // Renamed to AutoIDType in 0.144.0. + AutoHeadingIDType string `json:"-"` +} + +func (p *Parser) Init() error { + // Renamed from AutoHeadingIDType to AutoIDType in 0.144.0. + if p.AutoHeadingIDType != "" { + p.AutoIDType = p.AutoHeadingIDType + } + return nil } type ParserAttribute struct { diff --git a/markup/goldmark/internal/extensions/attributes/attributes.go b/markup/goldmark/internal/extensions/attributes/attributes.go index feb3d915b..526635f45 100644 --- a/markup/goldmark/internal/extensions/attributes/attributes.go +++ b/markup/goldmark/internal/extensions/attributes/attributes.go @@ -1,8 +1,11 @@ package attributes import ( + "github.com/gohugoio/hugo/markup/goldmark/goldmark_config" + "github.com/gohugoio/hugo/markup/goldmark/internal/render" "github.com/yuin/goldmark" "github.com/yuin/goldmark/ast" + east "github.com/yuin/goldmark/extension/ast" "github.com/yuin/goldmark/parser" "github.com/yuin/goldmark/text" "github.com/yuin/goldmark/util" @@ -14,24 +17,29 @@ import ( var ( kindAttributesBlock = ast.NewNodeKind("AttributesBlock") + attrNameID = []byte("id") - defaultParser = new(attrParser) - defaultTransformer = new(transformer) - attributes goldmark.Extender = new(attrExtension) + defaultParser = new(attrParser) ) -func New() goldmark.Extender { - return attributes +func New(cfg goldmark_config.Parser) goldmark.Extender { + return &attrExtension{cfg: cfg} } -type attrExtension struct{} +type attrExtension struct { + cfg goldmark_config.Parser +} func (a *attrExtension) Extend(m goldmark.Markdown) { + if a.cfg.Attribute.Block { + m.Parser().AddOptions( + parser.WithBlockParsers( + util.Prioritized(defaultParser, 100)), + ) + } m.Parser().AddOptions( - parser.WithBlockParsers( - util.Prioritized(defaultParser, 100)), parser.WithASTTransformers( - util.Prioritized(defaultTransformer, 100), + util.Prioritized(&transformer{cfg: a.cfg}, 100), ), ) } @@ -92,18 +100,47 @@ func (a *attributesBlock) Kind() ast.NodeKind { return kindAttributesBlock } -type transformer struct{} +type transformer struct { + cfg goldmark_config.Parser +} + +func (a *transformer) isFragmentNode(n ast.Node) bool { + switch n.Kind() { + case east.KindDefinitionTerm, ast.KindHeading: + return true + default: + return false + } +} func (a *transformer) Transform(node *ast.Document, reader text.Reader, pc parser.Context) { - attributes := make([]ast.Node, 0, 500) + var attributes []ast.Node + if a.cfg.Attribute.Block { + attributes = make([]ast.Node, 0, 500) + } ast.Walk(node, func(node ast.Node, entering bool) (ast.WalkStatus, error) { - if entering && node.Kind() == kindAttributesBlock { + if !entering { + return ast.WalkContinue, nil + } + + if a.isFragmentNode(node) { + if id, found := node.Attribute(attrNameID); !found { + a.generateAutoID(node, reader, pc) + } else { + pc.IDs().Put(id.([]byte)) + } + } + + if a.cfg.Attribute.Block && node.Kind() == kindAttributesBlock { // Attributes for fenced code blocks are handled in their own extension, // but note that we currently only support code block attributes when // CodeFences=true. if node.PreviousSibling() != nil && node.PreviousSibling().Kind() != ast.KindFencedCodeBlock && !node.HasBlankPreviousLines() { attributes = append(attributes, node) return ast.WalkSkipChildren, nil + } else { + // remove attributes node + node.Parent().RemoveChild(node.Parent(), node) } } @@ -123,3 +160,33 @@ func (a *transformer) Transform(node *ast.Document, reader text.Reader, pc parse attr.Parent().RemoveChild(attr.Parent(), attr) } } + +func (a *transformer) generateAutoID(n ast.Node, reader text.Reader, pc parser.Context) { + var text []byte + switch n := n.(type) { + case *ast.Heading: + if a.cfg.AutoHeadingID { + text = textHeadingID(n, reader) + } + case *east.DefinitionTerm: + if a.cfg.AutoDefinitionTermID { + text = []byte(render.TextPlain(n, reader.Source())) + } + } + + if len(text) > 0 { + headingID := pc.IDs().Generate(text, n.Kind()) + n.SetAttribute(attrNameID, headingID) + } +} + +// Markdown settext headers can have multiple lines, use the last line for the ID. +func textHeadingID(node *ast.Heading, reader text.Reader) []byte { + var line []byte + lastIndex := node.Lines().Len() - 1 + if lastIndex > -1 { + lastLine := node.Lines().At(lastIndex) + line = lastLine.Value(reader.Source()) + } + return line +} diff --git a/markup/goldmark/internal/extensions/attributes/attributes_integration_test.go b/markup/goldmark/internal/extensions/attributes/attributes_integration_test.go new file mode 100644 index 000000000..fcce68ac2 --- /dev/null +++ b/markup/goldmark/internal/extensions/attributes/attributes_integration_test.go @@ -0,0 +1,74 @@ +package attributes_test + +import ( + "testing" + + "github.com/gohugoio/hugo/hugolib" +) + +func TestDescriptionListAutoID(t *testing.T) { + t.Parallel() + + files := ` +-- hugo.toml -- +[markup.goldmark.parser] +autoHeadingID = true +autoDefinitionTermID = true +autoIDType = 'github-ascii' +-- content/p1.md -- +--- +title: "Title" +--- + +## Title with id set {#title-with-id} + +## Title with id set duplicate {#title-with-id} + +## My Title + +Base Name +: Base name of the file. + +Base Name +: Duplicate term name. + +My Title +: Term with same name as title. + +Foo@Bar +: The foo bar. + +foo [something](/a/b/) bar +: A foo bar. + +良善天父 +: The good father. + +Ā ā Ă ă Ą ą Ć ć Ĉ ĉ Ċ ċ Č č Ď +: Testing accents. + +Mutiline set text header +Second line +--------------- + +-- layouts/_default/single.html -- +{{ .Content }}|Identifiers: {{ .Fragments.Identifiers }}| +` + + b := hugolib.Test(t, files) + + b.AssertFileContent("public/p1/index.html", + `