mirror of
https://github.com/gohugoio/hugo.git
synced 2025-08-17 21:01:26 +02:00
markup/goldmark: Make auto IDs GitHub compatible
You can turn off this behaviour: ```toml [markup] [markup.goldmark] [markup.goldmark.parser] autoHeadingIDAsciiOnly = true ``` Note that the `anchorize` now adapts its behaviour depending on the default Markdown handler. Fixes #6616
This commit is contained in:
125
markup/goldmark/autoid.go
Normal file
125
markup/goldmark/autoid.go
Normal file
@@ -0,0 +1,125 @@
|
||||
// Copyright 2019 The Hugo Authors. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package goldmark
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"strconv"
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/gohugoio/hugo/common/text"
|
||||
|
||||
"github.com/yuin/goldmark/ast"
|
||||
"github.com/yuin/goldmark/parser"
|
||||
"github.com/yuin/goldmark/util"
|
||||
|
||||
bp "github.com/gohugoio/hugo/bufferpool"
|
||||
)
|
||||
|
||||
func sanitizeAnchorNameString(s string, asciiOnly bool) string {
|
||||
return string(sanitizeAnchorName([]byte(s), asciiOnly))
|
||||
}
|
||||
|
||||
func sanitizeAnchorName(b []byte, asciiOnly bool) []byte {
|
||||
return sanitizeAnchorNameWithHook(b, asciiOnly, nil)
|
||||
}
|
||||
|
||||
func sanitizeAnchorNameWithHook(b []byte, asciiOnly bool, hook func(buf *bytes.Buffer)) []byte {
|
||||
buf := bp.GetBuffer()
|
||||
|
||||
if asciiOnly {
|
||||
// Normalize it to preserve accents if possible.
|
||||
b = text.RemoveAccents(b)
|
||||
}
|
||||
|
||||
for len(b) > 0 {
|
||||
r, size := utf8.DecodeRune(b)
|
||||
switch {
|
||||
case asciiOnly && size != 1:
|
||||
case isSpace(r):
|
||||
buf.WriteString("-")
|
||||
case r == '-' || isAlphaNumeric(r):
|
||||
buf.WriteRune(unicode.ToLower(r))
|
||||
default:
|
||||
}
|
||||
|
||||
b = b[size:]
|
||||
}
|
||||
|
||||
if hook != nil {
|
||||
hook(buf)
|
||||
}
|
||||
|
||||
result := make([]byte, buf.Len())
|
||||
copy(result, buf.Bytes())
|
||||
|
||||
bp.PutBuffer(buf)
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func isAlphaNumeric(r rune) bool {
|
||||
return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)
|
||||
}
|
||||
|
||||
func isSpace(r rune) bool {
|
||||
return r == ' ' || r == '\t'
|
||||
}
|
||||
|
||||
var _ parser.IDs = (*idFactory)(nil)
|
||||
|
||||
type idFactory struct {
|
||||
asciiOnly bool
|
||||
vals map[string]struct{}
|
||||
}
|
||||
|
||||
func newIDFactory(asciiOnly bool) *idFactory {
|
||||
return &idFactory{
|
||||
vals: make(map[string]struct{}),
|
||||
asciiOnly: asciiOnly,
|
||||
}
|
||||
}
|
||||
|
||||
func (ids *idFactory) Generate(value []byte, kind ast.NodeKind) []byte {
|
||||
return sanitizeAnchorNameWithHook(value, ids.asciiOnly, func(buf *bytes.Buffer) {
|
||||
if buf.Len() == 0 {
|
||||
if kind == ast.KindHeading {
|
||||
buf.WriteString("heading")
|
||||
} else {
|
||||
buf.WriteString("id")
|
||||
}
|
||||
}
|
||||
|
||||
if _, found := ids.vals[util.BytesToReadOnlyString(buf.Bytes())]; found {
|
||||
// Append a hypen and a number, starting with 1.
|
||||
buf.WriteRune('-')
|
||||
pos := buf.Len()
|
||||
for i := 1; ; i++ {
|
||||
buf.WriteString(strconv.Itoa(i))
|
||||
if _, found := ids.vals[util.BytesToReadOnlyString(buf.Bytes())]; !found {
|
||||
break
|
||||
}
|
||||
buf.Truncate(pos)
|
||||
}
|
||||
}
|
||||
|
||||
ids.vals[buf.String()] = struct{}{}
|
||||
|
||||
})
|
||||
}
|
||||
|
||||
func (ids *idFactory) Put(value []byte) {
|
||||
ids.vals[util.BytesToReadOnlyString(value)] = struct{}{}
|
||||
}
|
121
markup/goldmark/autoid_test.go
Normal file
121
markup/goldmark/autoid_test.go
Normal file
@@ -0,0 +1,121 @@
|
||||
// Copyright 2019 The Hugo Authors. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package goldmark
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
qt "github.com/frankban/quicktest"
|
||||
)
|
||||
|
||||
func TestSanitizeAnchorName(t *testing.T) {
|
||||
c := qt.New(t)
|
||||
|
||||
// Tests generated manually on github.com
|
||||
tests := `
|
||||
God is good: 神真美好
|
||||
Number 32
|
||||
Question?
|
||||
1+2=3
|
||||
Special !"#$%&(parens)=?´* chars
|
||||
Resumé
|
||||
One-Hyphen
|
||||
Multiple--Hyphens
|
||||
Trailing hyphen-
|
||||
Many spaces here
|
||||
Forward/slash
|
||||
Backward\slash
|
||||
Under_score
|
||||
`
|
||||
|
||||
expect := `
|
||||
god-is-good-神真美好
|
||||
number-32
|
||||
question
|
||||
123
|
||||
special-parens-chars
|
||||
resumé
|
||||
one-hyphen
|
||||
multiple--hyphens
|
||||
trailing-hyphen-
|
||||
many---spaces--here
|
||||
forwardslash
|
||||
backwardslash
|
||||
under_score
|
||||
`
|
||||
|
||||
tests, expect = strings.TrimSpace(tests), strings.TrimSpace(expect)
|
||||
|
||||
testlines, expectlines := strings.Split(tests, "\n"), strings.Split(expect, "\n")
|
||||
|
||||
if len(testlines) != len(expectlines) {
|
||||
panic("test setup failed")
|
||||
}
|
||||
|
||||
for i, input := range testlines {
|
||||
input := input
|
||||
expect := expectlines[i]
|
||||
c.Run(input, func(c *qt.C) {
|
||||
b := []byte(input)
|
||||
got := string(sanitizeAnchorName(b, false))
|
||||
c.Assert(got, qt.Equals, expect)
|
||||
c.Assert(sanitizeAnchorNameString(input, false), qt.Equals, expect)
|
||||
c.Assert(string(b), qt.Equals, input)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestSanitizeAnchorNameAsciiOnly(t *testing.T) {
|
||||
c := qt.New(t)
|
||||
|
||||
c.Assert(sanitizeAnchorNameString("god is神真美好 good", true), qt.Equals, "god-is-good")
|
||||
c.Assert(sanitizeAnchorNameString("Resumé", true), qt.Equals, "resume")
|
||||
|
||||
}
|
||||
|
||||
func BenchmarkSanitizeAnchorName(b *testing.B) {
|
||||
input := []byte("God is good: 神真美好")
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
result := sanitizeAnchorName(input, false)
|
||||
if len(result) != 24 {
|
||||
b.Fatalf("got %d", len(result))
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkSanitizeAnchorNameAsciiOnly(b *testing.B) {
|
||||
input := []byte("God is good: 神真美好")
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
result := sanitizeAnchorName(input, true)
|
||||
if len(result) != 12 {
|
||||
b.Fatalf("got %d", len(result))
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkSanitizeAnchorNameString(b *testing.B) {
|
||||
input := "God is good: 神真美好"
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
result := sanitizeAnchorNameString(input, false)
|
||||
if len(result) != 24 {
|
||||
b.Fatalf("got %d", len(result))
|
||||
}
|
||||
}
|
||||
}
|
@@ -50,19 +50,33 @@ type provide struct {
|
||||
|
||||
func (p provide) New(cfg converter.ProviderConfig) (converter.Provider, error) {
|
||||
md := newMarkdown(cfg)
|
||||
|
||||
return converter.NewProvider("goldmark", func(ctx converter.DocumentContext) (converter.Converter, error) {
|
||||
return &goldmarkConverter{
|
||||
ctx: ctx,
|
||||
cfg: cfg,
|
||||
md: md,
|
||||
sanitizeAnchorName: func(s string) string {
|
||||
return sanitizeAnchorNameString(s, cfg.MarkupConfig.Goldmark.Parser.AutoHeadingIDAsciiOnly)
|
||||
},
|
||||
}, nil
|
||||
}), nil
|
||||
}
|
||||
|
||||
var (
|
||||
_ converter.AnchorNameSanitizer = (*goldmarkConverter)(nil)
|
||||
)
|
||||
|
||||
type goldmarkConverter struct {
|
||||
md goldmark.Markdown
|
||||
ctx converter.DocumentContext
|
||||
cfg converter.ProviderConfig
|
||||
|
||||
sanitizeAnchorName func(s string) string
|
||||
}
|
||||
|
||||
func (c *goldmarkConverter) SanitizeAnchorName(s string) string {
|
||||
return c.sanitizeAnchorName(s)
|
||||
}
|
||||
|
||||
func newMarkdown(pcfg converter.ProviderConfig) goldmark.Markdown {
|
||||
@@ -226,7 +240,7 @@ func (c *goldmarkConverter) Convert(ctx converter.RenderContext) (result convert
|
||||
|
||||
buf := &bufWriter{Buffer: &bytes.Buffer{}}
|
||||
result = buf
|
||||
pctx := newParserContext(ctx)
|
||||
pctx := c.newParserContext(ctx)
|
||||
reader := text.NewReader(ctx.Src)
|
||||
|
||||
doc := c.md.Parser().Parse(
|
||||
@@ -265,8 +279,8 @@ func (c *goldmarkConverter) Supports(feature identity.Identity) bool {
|
||||
return featureSet[feature.GetIdentity()]
|
||||
}
|
||||
|
||||
func newParserContext(rctx converter.RenderContext) *parserContext {
|
||||
ctx := parser.NewContext()
|
||||
func (c *goldmarkConverter) newParserContext(rctx converter.RenderContext) *parserContext {
|
||||
ctx := parser.NewContext(parser.WithIDs(newIDFactory(c.cfg.MarkupConfig.Goldmark.Parser.AutoHeadingIDAsciiOnly)))
|
||||
ctx.Set(tocEnableKey, rctx.RenderTOC)
|
||||
return &parserContext{
|
||||
Context: ctx,
|
||||
|
@@ -28,6 +28,23 @@ import (
|
||||
qt "github.com/frankban/quicktest"
|
||||
)
|
||||
|
||||
func convert(c *qt.C, mconf markup_config.Config, content string) converter.Result {
|
||||
|
||||
p, err := Provider.New(
|
||||
converter.ProviderConfig{
|
||||
MarkupConfig: mconf,
|
||||
Logger: loggers.NewErrorLogger(),
|
||||
},
|
||||
)
|
||||
c.Assert(err, qt.IsNil)
|
||||
conv, err := p.New(converter.DocumentContext{DocumentID: "thedoc"})
|
||||
c.Assert(err, qt.IsNil)
|
||||
b, err := conv.Convert(converter.RenderContext{RenderTOC: true, Src: []byte(content)})
|
||||
c.Assert(err, qt.IsNil)
|
||||
|
||||
return b
|
||||
}
|
||||
|
||||
func TestConvert(t *testing.T) {
|
||||
c := qt.New(t)
|
||||
|
||||
@@ -92,29 +109,23 @@ description
|
||||
: the description for the content.
|
||||
|
||||
|
||||
## 神真美好
|
||||
|
||||
## 神真美好
|
||||
|
||||
## 神真美好
|
||||
|
||||
[^1]: And that's the footnote.
|
||||
|
||||
`
|
||||
|
||||
// Code fences
|
||||
content = strings.Replace(content, "§§§", "```", -1)
|
||||
|
||||
mconf := markup_config.Default
|
||||
mconf.Highlight.NoClasses = false
|
||||
mconf.Goldmark.Renderer.Unsafe = true
|
||||
|
||||
p, err := Provider.New(
|
||||
converter.ProviderConfig{
|
||||
MarkupConfig: mconf,
|
||||
Logger: loggers.NewErrorLogger(),
|
||||
},
|
||||
)
|
||||
c.Assert(err, qt.IsNil)
|
||||
conv, err := p.New(converter.DocumentContext{DocumentID: "thedoc"})
|
||||
c.Assert(err, qt.IsNil)
|
||||
b, err := conv.Convert(converter.RenderContext{RenderTOC: true, Src: []byte(content)})
|
||||
c.Assert(err, qt.IsNil)
|
||||
|
||||
b := convert(c, mconf, content)
|
||||
got := string(b.Bytes())
|
||||
|
||||
// Links
|
||||
@@ -123,6 +134,9 @@ description
|
||||
// Header IDs
|
||||
c.Assert(got, qt.Contains, `<h2 id="custom">Custom ID</h2>`, qt.Commentf(got))
|
||||
c.Assert(got, qt.Contains, `<h2 id="auto-id">Auto ID</h2>`, qt.Commentf(got))
|
||||
c.Assert(got, qt.Contains, `<h2 id="神真美好">神真美好</h2>`, qt.Commentf(got))
|
||||
c.Assert(got, qt.Contains, `<h2 id="神真美好-1">神真美好</h2>`, qt.Commentf(got))
|
||||
c.Assert(got, qt.Contains, `<h2 id="神真美好-2">神真美好</h2>`, qt.Commentf(got))
|
||||
|
||||
// Code fences
|
||||
c.Assert(got, qt.Contains, "<div class=\"highlight\"><pre class=\"chroma\"><code class=\"language-bash\" data-lang=\"bash\">LINE1\n</code></pre></div>")
|
||||
@@ -148,6 +162,20 @@ description
|
||||
|
||||
}
|
||||
|
||||
func TestConvertAutoIDAsciiOnly(t *testing.T) {
|
||||
c := qt.New(t)
|
||||
|
||||
content := `
|
||||
## God is Good: 神真美好
|
||||
`
|
||||
mconf := markup_config.Default
|
||||
mconf.Goldmark.Parser.AutoHeadingIDAsciiOnly = true
|
||||
b := convert(c, mconf, content)
|
||||
got := string(b.Bytes())
|
||||
|
||||
c.Assert(got, qt.Contains, "<h2 id=\"god-is-good-\">")
|
||||
}
|
||||
|
||||
func TestCodeFence(t *testing.T) {
|
||||
c := qt.New(t)
|
||||
|
||||
|
@@ -69,6 +69,10 @@ type Parser struct {
|
||||
// auto generated heading ids.
|
||||
AutoHeadingID bool
|
||||
|
||||
// When AutoHeadingID is enabled this will generate IDs with Ascii
|
||||
// characters only.
|
||||
AutoHeadingIDAsciiOnly bool
|
||||
|
||||
// Enables custom attributes.
|
||||
Attribute bool
|
||||
}
|
||||
|
Reference in New Issue
Block a user