markup/goldmark: Make auto IDs GitHub compatible

You can turn off this behaviour: ```toml [markup] [markup.goldmark] [markup.goldmark.parser] autoHeadingIDAsciiOnly = true ``` Note that the `anchorize` now adapts its behaviour depending on the default Markdown handler. Fixes #6616
2025-08-18 21:11:19 +02:00 · 2020-01-04 11:28:19 +01:00
parent ae816452b1
commit a82d2700fc
12 changed files with 421 additions and 35 deletions
--- a/helpers/content.go
+++ b/helpers/content.go
@@ -48,8 +48,9 @@ var (

 // ContentSpec provides functionality to render markdown content.
 type ContentSpec struct {
-	Converters       markup.ConverterProvider
-	MardownConverter converter.Converter // Markdown converter with no document context
+	Converters          markup.ConverterProvider
+	MardownConverter    converter.Converter // Markdown converter with no document context
+	anchorNameSanitizer converter.AnchorNameSanitizer

 	// SummaryLength is the length of the summary that Hugo extracts from a content.
 	summaryLength int
@@ -91,6 +92,17 @@ func NewContentSpec(cfg config.Provider, logger *loggers.Logger, contentFs afero
 		return nil, err
 	}
 	spec.MardownConverter = conv
+	if as, ok := conv.(converter.AnchorNameSanitizer); ok {
+		spec.anchorNameSanitizer = as
+	} else {
+		// Use Goldmark's sanitizer
+		p := converterProvider.Get("goldmark")
+		conv, err := p.New(converter.DocumentContext{})
+		if err != nil {
+			return nil, err
+		}
+		spec.anchorNameSanitizer = conv.(converter.AnchorNameSanitizer)
+	}

 	return spec, nil
 }
@@ -192,6 +204,10 @@ func (c *ContentSpec) RenderMarkdown(src []byte) ([]byte, error) {
 	return b.Bytes(), nil
 }

+func (c *ContentSpec) SanitizeAnchorName(s string) string {
+	return c.anchorNameSanitizer.SanitizeAnchorName(s)
+}
+
 func (c *ContentSpec) ResolveMarkup(in string) string {
 	in = strings.ToLower(in)
 	switch in {
--- a/helpers/path.go
+++ b/helpers/path.go
@@ -24,6 +24,8 @@ import (
 	"strings"
 	"unicode"

+	"github.com/gohugoio/hugo/common/text"
+
 	"github.com/gohugoio/hugo/config"

 	"github.com/gohugoio/hugo/hugofs"
@@ -31,9 +33,6 @@ import (
 	"github.com/gohugoio/hugo/common/hugio"
 	_errors "github.com/pkg/errors"
 	"github.com/spf13/afero"
-	"golang.org/x/text/runes"
-	"golang.org/x/text/transform"
-	"golang.org/x/text/unicode/norm"
 )

 var (
@@ -134,6 +133,10 @@ func ishex(c rune) bool {
 // are also removed.
 // Spaces will be replaced with a single hyphen, and sequential hyphens will be reduced to one.
 func (p *PathSpec) UnicodeSanitize(s string) string {
+	if p.RemovePathAccents {
+		s = text.RemoveAccentsString(s)
+	}
+
 	source := []rune(s)
 	target := make([]rune, 0, len(source))
 	var prependHyphen bool
@@ -154,17 +157,7 @@ func (p *PathSpec) UnicodeSanitize(s string) string {
 		}
 	}

-	var result string
-
-	if p.RemovePathAccents {
-		// remove accents - see https://blog.golang.org/normalization
-		t := transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)), norm.NFC)
-		result, _, _ = transform.String(t, string(target))
-	} else {
-		result = string(target)
-	}
-
-	return result
+	return string(target)
 }

 // ReplaceExtension takes a path and an extension, strips the old extension