markup/goldmark: Make auto IDs GitHub compatible

You can turn off this behaviour:

```toml
[markup]
  [markup.goldmark]
    [markup.goldmark.parser]
      autoHeadingIDAsciiOnly = true
```
Note that the `anchorize` now adapts its behaviour depending on the default Markdown handler.

Fixes #6616
This commit is contained in:
Bjørn Erik Pedersen
2020-01-04 11:28:19 +01:00
parent ae816452b1
commit a82d2700fc
12 changed files with 421 additions and 35 deletions

View File

@@ -48,8 +48,9 @@ var (
// ContentSpec provides functionality to render markdown content.
type ContentSpec struct {
Converters markup.ConverterProvider
MardownConverter converter.Converter // Markdown converter with no document context
Converters markup.ConverterProvider
MardownConverter converter.Converter // Markdown converter with no document context
anchorNameSanitizer converter.AnchorNameSanitizer
// SummaryLength is the length of the summary that Hugo extracts from a content.
summaryLength int
@@ -91,6 +92,17 @@ func NewContentSpec(cfg config.Provider, logger *loggers.Logger, contentFs afero
return nil, err
}
spec.MardownConverter = conv
if as, ok := conv.(converter.AnchorNameSanitizer); ok {
spec.anchorNameSanitizer = as
} else {
// Use Goldmark's sanitizer
p := converterProvider.Get("goldmark")
conv, err := p.New(converter.DocumentContext{})
if err != nil {
return nil, err
}
spec.anchorNameSanitizer = conv.(converter.AnchorNameSanitizer)
}
return spec, nil
}
@@ -192,6 +204,10 @@ func (c *ContentSpec) RenderMarkdown(src []byte) ([]byte, error) {
return b.Bytes(), nil
}
func (c *ContentSpec) SanitizeAnchorName(s string) string {
return c.anchorNameSanitizer.SanitizeAnchorName(s)
}
func (c *ContentSpec) ResolveMarkup(in string) string {
in = strings.ToLower(in)
switch in {

View File

@@ -24,6 +24,8 @@ import (
"strings"
"unicode"
"github.com/gohugoio/hugo/common/text"
"github.com/gohugoio/hugo/config"
"github.com/gohugoio/hugo/hugofs"
@@ -31,9 +33,6 @@ import (
"github.com/gohugoio/hugo/common/hugio"
_errors "github.com/pkg/errors"
"github.com/spf13/afero"
"golang.org/x/text/runes"
"golang.org/x/text/transform"
"golang.org/x/text/unicode/norm"
)
var (
@@ -134,6 +133,10 @@ func ishex(c rune) bool {
// are also removed.
// Spaces will be replaced with a single hyphen, and sequential hyphens will be reduced to one.
func (p *PathSpec) UnicodeSanitize(s string) string {
if p.RemovePathAccents {
s = text.RemoveAccentsString(s)
}
source := []rune(s)
target := make([]rune, 0, len(source))
var prependHyphen bool
@@ -154,17 +157,7 @@ func (p *PathSpec) UnicodeSanitize(s string) string {
}
}
var result string
if p.RemovePathAccents {
// remove accents - see https://blog.golang.org/normalization
t := transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)), norm.NFC)
result, _, _ = transform.String(t, string(target))
} else {
result = string(target)
}
return result
return string(target)
}
// ReplaceExtension takes a path and an extension, strips the old extension