mirror of
https://github.com/gohugoio/hugo.git
synced 2025-08-16 20:53:59 +02:00
markup/goldmark: Make auto IDs GitHub compatible
You can turn off this behaviour: ```toml [markup] [markup.goldmark] [markup.goldmark.parser] autoHeadingIDAsciiOnly = true ``` Note that the `anchorize` now adapts its behaviour depending on the default Markdown handler. Fixes #6616
This commit is contained in:
125
markup/goldmark/autoid.go
Normal file
125
markup/goldmark/autoid.go
Normal file
@@ -0,0 +1,125 @@
|
||||
// Copyright 2019 The Hugo Authors. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package goldmark
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"strconv"
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/gohugoio/hugo/common/text"
|
||||
|
||||
"github.com/yuin/goldmark/ast"
|
||||
"github.com/yuin/goldmark/parser"
|
||||
"github.com/yuin/goldmark/util"
|
||||
|
||||
bp "github.com/gohugoio/hugo/bufferpool"
|
||||
)
|
||||
|
||||
func sanitizeAnchorNameString(s string, asciiOnly bool) string {
|
||||
return string(sanitizeAnchorName([]byte(s), asciiOnly))
|
||||
}
|
||||
|
||||
func sanitizeAnchorName(b []byte, asciiOnly bool) []byte {
|
||||
return sanitizeAnchorNameWithHook(b, asciiOnly, nil)
|
||||
}
|
||||
|
||||
func sanitizeAnchorNameWithHook(b []byte, asciiOnly bool, hook func(buf *bytes.Buffer)) []byte {
|
||||
buf := bp.GetBuffer()
|
||||
|
||||
if asciiOnly {
|
||||
// Normalize it to preserve accents if possible.
|
||||
b = text.RemoveAccents(b)
|
||||
}
|
||||
|
||||
for len(b) > 0 {
|
||||
r, size := utf8.DecodeRune(b)
|
||||
switch {
|
||||
case asciiOnly && size != 1:
|
||||
case isSpace(r):
|
||||
buf.WriteString("-")
|
||||
case r == '-' || isAlphaNumeric(r):
|
||||
buf.WriteRune(unicode.ToLower(r))
|
||||
default:
|
||||
}
|
||||
|
||||
b = b[size:]
|
||||
}
|
||||
|
||||
if hook != nil {
|
||||
hook(buf)
|
||||
}
|
||||
|
||||
result := make([]byte, buf.Len())
|
||||
copy(result, buf.Bytes())
|
||||
|
||||
bp.PutBuffer(buf)
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func isAlphaNumeric(r rune) bool {
|
||||
return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)
|
||||
}
|
||||
|
||||
func isSpace(r rune) bool {
|
||||
return r == ' ' || r == '\t'
|
||||
}
|
||||
|
||||
var _ parser.IDs = (*idFactory)(nil)
|
||||
|
||||
type idFactory struct {
|
||||
asciiOnly bool
|
||||
vals map[string]struct{}
|
||||
}
|
||||
|
||||
func newIDFactory(asciiOnly bool) *idFactory {
|
||||
return &idFactory{
|
||||
vals: make(map[string]struct{}),
|
||||
asciiOnly: asciiOnly,
|
||||
}
|
||||
}
|
||||
|
||||
func (ids *idFactory) Generate(value []byte, kind ast.NodeKind) []byte {
|
||||
return sanitizeAnchorNameWithHook(value, ids.asciiOnly, func(buf *bytes.Buffer) {
|
||||
if buf.Len() == 0 {
|
||||
if kind == ast.KindHeading {
|
||||
buf.WriteString("heading")
|
||||
} else {
|
||||
buf.WriteString("id")
|
||||
}
|
||||
}
|
||||
|
||||
if _, found := ids.vals[util.BytesToReadOnlyString(buf.Bytes())]; found {
|
||||
// Append a hypen and a number, starting with 1.
|
||||
buf.WriteRune('-')
|
||||
pos := buf.Len()
|
||||
for i := 1; ; i++ {
|
||||
buf.WriteString(strconv.Itoa(i))
|
||||
if _, found := ids.vals[util.BytesToReadOnlyString(buf.Bytes())]; !found {
|
||||
break
|
||||
}
|
||||
buf.Truncate(pos)
|
||||
}
|
||||
}
|
||||
|
||||
ids.vals[buf.String()] = struct{}{}
|
||||
|
||||
})
|
||||
}
|
||||
|
||||
func (ids *idFactory) Put(value []byte) {
|
||||
ids.vals[util.BytesToReadOnlyString(value)] = struct{}{}
|
||||
}
|
Reference in New Issue
Block a user