mirror of
https://github.com/gohugoio/hugo.git
synced 2025-08-15 20:44:01 +02:00
@@ -203,6 +203,7 @@ func removeTOMLIdentifier(datum []byte) []byte {
|
||||
|
||||
// HandleYAMLMetaData unmarshals YAML-encoded datum and returns a Go interface
|
||||
// representing the encoded data structure.
|
||||
// TODO(bep) 2errors remove these handlers (and hopefully package)
|
||||
func HandleYAMLMetaData(datum []byte) (map[string]interface{}, error) {
|
||||
m := map[string]interface{}{}
|
||||
err := yaml.Unmarshal(datum, &m)
|
||||
|
95
parser/metadecoders/decoder.go
Normal file
95
parser/metadecoders/decoder.go
Normal file
@@ -0,0 +1,95 @@
|
||||
// Copyright 2018 The Hugo Authors. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package metadecoders
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
|
||||
"github.com/BurntSushi/toml"
|
||||
"github.com/chaseadamsio/goorgeous"
|
||||
"github.com/gohugoio/hugo/parser/pageparser"
|
||||
"github.com/pkg/errors"
|
||||
yaml "gopkg.in/yaml.v1"
|
||||
)
|
||||
|
||||
type Format string
|
||||
|
||||
const (
|
||||
// These are the supported metdata formats in Hugo. Most of these are also
|
||||
// supported as /data formats.
|
||||
ORG Format = "org"
|
||||
JSON Format = "json"
|
||||
TOML Format = "toml"
|
||||
YAML Format = "yaml"
|
||||
)
|
||||
|
||||
// FormatFromFrontMatterType will return empty if not supported.
|
||||
func FormatFromFrontMatterType(typ pageparser.ItemType) Format {
|
||||
switch typ {
|
||||
case pageparser.TypeFrontMatterJSON:
|
||||
return JSON
|
||||
case pageparser.TypeFrontMatterORG:
|
||||
return ORG
|
||||
case pageparser.TypeFrontMatterTOML:
|
||||
return TOML
|
||||
case pageparser.TypeFrontMatterYAML:
|
||||
return YAML
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
// UnmarshalToMap will unmarshall data in format f into a new map. This is
|
||||
// what's needed for Hugo's front matter decoding.
|
||||
func UnmarshalToMap(data []byte, f Format) (map[string]interface{}, error) {
|
||||
m := make(map[string]interface{})
|
||||
|
||||
if data == nil {
|
||||
return m, nil
|
||||
}
|
||||
|
||||
var err error
|
||||
|
||||
switch f {
|
||||
case ORG:
|
||||
m, err = goorgeous.OrgHeaders(data)
|
||||
case JSON:
|
||||
err = json.Unmarshal(data, &m)
|
||||
case TOML:
|
||||
_, err = toml.Decode(string(data), &m)
|
||||
case YAML:
|
||||
err = yaml.Unmarshal(data, &m)
|
||||
|
||||
// To support boolean keys, the `yaml` package unmarshals maps to
|
||||
// map[interface{}]interface{}. Here we recurse through the result
|
||||
// and change all maps to map[string]interface{} like we would've
|
||||
// gotten from `json`.
|
||||
if err == nil {
|
||||
for k, v := range m {
|
||||
if vv, changed := stringifyMapKeys(v); changed {
|
||||
m[k] = vv
|
||||
}
|
||||
}
|
||||
}
|
||||
default:
|
||||
return nil, errors.Errorf("unmarshal of format %q is not supported", f)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "unmarshal failed for format %q", f)
|
||||
}
|
||||
|
||||
return m, nil
|
||||
|
||||
}
|
31
parser/metadecoders/json.go
Normal file
31
parser/metadecoders/json.go
Normal file
@@ -0,0 +1,31 @@
|
||||
// Copyright 2018 The Hugo Authors. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package metadecoders
|
||||
|
||||
import "encoding/json"
|
||||
|
||||
// HandleJSONData unmarshals JSON-encoded datum and returns a Go interface
|
||||
// representing the encoded data structure.
|
||||
func HandleJSONData(datum []byte) (interface{}, error) {
|
||||
if datum == nil {
|
||||
// Package json returns on error on nil input.
|
||||
// Return an empty map to be consistent with our other supported
|
||||
// formats.
|
||||
return make(map[string]interface{}), nil
|
||||
}
|
||||
|
||||
var f interface{}
|
||||
err := json.Unmarshal(datum, &f)
|
||||
return f, err
|
||||
}
|
84
parser/metadecoders/yaml.go
Normal file
84
parser/metadecoders/yaml.go
Normal file
@@ -0,0 +1,84 @@
|
||||
// Copyright 2018 The Hugo Authors. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// The metadecoders package contains functions to decode metadata (e.g. page front matter)
|
||||
// from different formats: TOML, YAML, JSON.
|
||||
package metadecoders
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/spf13/cast"
|
||||
yaml "gopkg.in/yaml.v1"
|
||||
)
|
||||
|
||||
// HandleYAMLData unmarshals YAML-encoded datum and returns a Go interface
|
||||
// representing the encoded data structure.
|
||||
func HandleYAMLData(datum []byte) (interface{}, error) {
|
||||
var m interface{}
|
||||
err := yaml.Unmarshal(datum, &m)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// To support boolean keys, the `yaml` package unmarshals maps to
|
||||
// map[interface{}]interface{}. Here we recurse through the result
|
||||
// and change all maps to map[string]interface{} like we would've
|
||||
// gotten from `json`.
|
||||
if mm, changed := stringifyMapKeys(m); changed {
|
||||
return mm, nil
|
||||
}
|
||||
|
||||
return m, nil
|
||||
}
|
||||
|
||||
// stringifyMapKeys recurses into in and changes all instances of
|
||||
// map[interface{}]interface{} to map[string]interface{}. This is useful to
|
||||
// work around the impedence mismatch between JSON and YAML unmarshaling that's
|
||||
// described here: https://github.com/go-yaml/yaml/issues/139
|
||||
//
|
||||
// Inspired by https://github.com/stripe/stripe-mock, MIT licensed
|
||||
func stringifyMapKeys(in interface{}) (interface{}, bool) {
|
||||
switch in := in.(type) {
|
||||
case []interface{}:
|
||||
for i, v := range in {
|
||||
if vv, replaced := stringifyMapKeys(v); replaced {
|
||||
in[i] = vv
|
||||
}
|
||||
}
|
||||
case map[interface{}]interface{}:
|
||||
res := make(map[string]interface{})
|
||||
var (
|
||||
ok bool
|
||||
err error
|
||||
)
|
||||
for k, v := range in {
|
||||
var ks string
|
||||
|
||||
if ks, ok = k.(string); !ok {
|
||||
ks, err = cast.ToStringE(k)
|
||||
if err != nil {
|
||||
ks = fmt.Sprintf("%v", k)
|
||||
}
|
||||
}
|
||||
if vv, replaced := stringifyMapKeys(v); replaced {
|
||||
res[ks] = vv
|
||||
} else {
|
||||
res[ks] = v
|
||||
}
|
||||
}
|
||||
return res, true
|
||||
}
|
||||
|
||||
return nil, false
|
||||
}
|
@@ -16,87 +16,95 @@ package pageparser
|
||||
import "fmt"
|
||||
|
||||
type Item struct {
|
||||
typ itemType
|
||||
Typ ItemType
|
||||
pos pos
|
||||
Val []byte
|
||||
}
|
||||
|
||||
type Items []Item
|
||||
|
||||
func (i Item) ValStr() string {
|
||||
return string(i.Val)
|
||||
}
|
||||
|
||||
func (i Item) IsText() bool {
|
||||
return i.typ == tText
|
||||
return i.Typ == tText
|
||||
}
|
||||
|
||||
func (i Item) IsShortcodeName() bool {
|
||||
return i.typ == tScName
|
||||
return i.Typ == tScName
|
||||
}
|
||||
|
||||
func (i Item) IsLeftShortcodeDelim() bool {
|
||||
return i.typ == tLeftDelimScWithMarkup || i.typ == tLeftDelimScNoMarkup
|
||||
return i.Typ == tLeftDelimScWithMarkup || i.Typ == tLeftDelimScNoMarkup
|
||||
}
|
||||
|
||||
func (i Item) IsRightShortcodeDelim() bool {
|
||||
return i.typ == tRightDelimScWithMarkup || i.typ == tRightDelimScNoMarkup
|
||||
return i.Typ == tRightDelimScWithMarkup || i.Typ == tRightDelimScNoMarkup
|
||||
}
|
||||
|
||||
func (i Item) IsShortcodeClose() bool {
|
||||
return i.typ == tScClose
|
||||
return i.Typ == tScClose
|
||||
}
|
||||
|
||||
func (i Item) IsShortcodeParam() bool {
|
||||
return i.typ == tScParam
|
||||
return i.Typ == tScParam
|
||||
}
|
||||
|
||||
func (i Item) IsShortcodeParamVal() bool {
|
||||
return i.typ == tScParamVal
|
||||
return i.Typ == tScParamVal
|
||||
}
|
||||
|
||||
func (i Item) IsShortcodeMarkupDelimiter() bool {
|
||||
return i.typ == tLeftDelimScWithMarkup || i.typ == tRightDelimScWithMarkup
|
||||
return i.Typ == tLeftDelimScWithMarkup || i.Typ == tRightDelimScWithMarkup
|
||||
}
|
||||
|
||||
func (i Item) IsFrontMatter() bool {
|
||||
return i.Typ >= TypeFrontMatterYAML && i.Typ <= TypeFrontMatterORG
|
||||
}
|
||||
|
||||
func (i Item) IsDone() bool {
|
||||
return i.typ == tError || i.typ == tEOF
|
||||
return i.Typ == tError || i.Typ == tEOF
|
||||
}
|
||||
|
||||
func (i Item) IsEOF() bool {
|
||||
return i.typ == tEOF
|
||||
return i.Typ == tEOF
|
||||
}
|
||||
|
||||
func (i Item) IsError() bool {
|
||||
return i.typ == tError
|
||||
return i.Typ == tError
|
||||
}
|
||||
|
||||
func (i Item) String() string {
|
||||
switch {
|
||||
case i.typ == tEOF:
|
||||
case i.Typ == tEOF:
|
||||
return "EOF"
|
||||
case i.typ == tError:
|
||||
case i.Typ == tError:
|
||||
return string(i.Val)
|
||||
case i.typ > tKeywordMarker:
|
||||
case i.Typ > tKeywordMarker:
|
||||
return fmt.Sprintf("<%s>", i.Val)
|
||||
case len(i.Val) > 50:
|
||||
return fmt.Sprintf("%v:%.20q...", i.typ, i.Val)
|
||||
return fmt.Sprintf("%v:%.20q...", i.Typ, i.Val)
|
||||
}
|
||||
return fmt.Sprintf("%v:[%s]", i.typ, i.Val)
|
||||
return fmt.Sprintf("%v:[%s]", i.Typ, i.Val)
|
||||
}
|
||||
|
||||
type itemType int
|
||||
type ItemType int
|
||||
|
||||
const (
|
||||
tError itemType = iota
|
||||
tError ItemType = iota
|
||||
tEOF
|
||||
|
||||
// page items
|
||||
tHTMLLead // <
|
||||
tSummaryDivider // <!--more-->
|
||||
tSummaryDividerOrg // # more
|
||||
tFrontMatterYAML
|
||||
tFrontMatterTOML
|
||||
tFrontMatterJSON
|
||||
tFrontMatterORG
|
||||
TypeHTMLDocument // document starting with < as first non-whitespace
|
||||
TypeHTMLComment // We ignore leading comments
|
||||
TypeLeadSummaryDivider // <!--more-->
|
||||
TypeSummaryDividerOrg // # more
|
||||
TypeFrontMatterYAML
|
||||
TypeFrontMatterTOML
|
||||
TypeFrontMatterJSON
|
||||
TypeFrontMatterORG
|
||||
TypeIgnore // // The BOM Unicode byte order marker and possibly others
|
||||
|
||||
// shortcode items
|
||||
tLeftDelimScNoMarkup
|
||||
|
@@ -33,8 +33,8 @@ const eof = -1
|
||||
type stateFunc func(*pageLexer) stateFunc
|
||||
|
||||
type lexerShortcodeState struct {
|
||||
currLeftDelimItem itemType
|
||||
currRightDelimItem itemType
|
||||
currLeftDelimItem ItemType
|
||||
currRightDelimItem ItemType
|
||||
currShortcodeName string // is only set when a shortcode is in opened state
|
||||
closingState int // > 0 = on its way to be closed
|
||||
elementStepNum int // step number in element
|
||||
@@ -50,14 +50,24 @@ type pageLexer struct {
|
||||
pos pos // input position
|
||||
start pos // item start position
|
||||
width pos // width of last element
|
||||
lastPos pos // position of the last item returned by nextItem
|
||||
|
||||
contentSections int
|
||||
// Set when we have parsed any summary divider
|
||||
summaryDividerChecked bool
|
||||
|
||||
lexerShortcodeState
|
||||
|
||||
// items delivered to client
|
||||
items []Item
|
||||
items Items
|
||||
}
|
||||
|
||||
// Implement the Result interface
|
||||
func (l *pageLexer) Iterator() *Iterator {
|
||||
return l.newIterator()
|
||||
}
|
||||
|
||||
func (l *pageLexer) Input() []byte {
|
||||
return l.input
|
||||
|
||||
}
|
||||
|
||||
// note: the input position here is normally 0 (start), but
|
||||
@@ -79,6 +89,10 @@ func newPageLexer(input []byte, inputPosition pos, stateStart stateFunc) *pageLe
|
||||
return lexer
|
||||
}
|
||||
|
||||
func (l *pageLexer) newIterator() *Iterator {
|
||||
return &Iterator{l: l, lastPos: -1}
|
||||
}
|
||||
|
||||
// main loop
|
||||
func (l *pageLexer) run() *pageLexer {
|
||||
for l.state = l.stateStart; l.state != nil; {
|
||||
@@ -89,6 +103,7 @@ func (l *pageLexer) run() *pageLexer {
|
||||
|
||||
// Shortcode syntax
|
||||
var (
|
||||
leftDelimSc = []byte("{{")
|
||||
leftDelimScNoMarkup = []byte("{{<")
|
||||
rightDelimScNoMarkup = []byte(">}}")
|
||||
leftDelimScWithMarkup = []byte("{{%")
|
||||
@@ -99,11 +114,14 @@ var (
|
||||
|
||||
// Page syntax
|
||||
var (
|
||||
byteOrderMark = '\ufeff'
|
||||
summaryDivider = []byte("<!--more-->")
|
||||
summaryDividerOrg = []byte("# more")
|
||||
delimTOML = []byte("+++")
|
||||
delimYAML = []byte("---")
|
||||
delimOrg = []byte("#+")
|
||||
htmlCOmmentStart = []byte("<!--")
|
||||
htmlCOmmentEnd = []byte("-->")
|
||||
)
|
||||
|
||||
func (l *pageLexer) next() rune {
|
||||
@@ -131,13 +149,13 @@ func (l *pageLexer) backup() {
|
||||
}
|
||||
|
||||
// sends an item back to the client.
|
||||
func (l *pageLexer) emit(t itemType) {
|
||||
func (l *pageLexer) emit(t ItemType) {
|
||||
l.items = append(l.items, Item{t, l.start, l.input[l.start:l.pos]})
|
||||
l.start = l.pos
|
||||
}
|
||||
|
||||
// special case, do not send '\\' back to client
|
||||
func (l *pageLexer) ignoreEscapesAndEmit(t itemType) {
|
||||
func (l *pageLexer) ignoreEscapesAndEmit(t ItemType) {
|
||||
val := bytes.Map(func(r rune) rune {
|
||||
if r == '\\' {
|
||||
return -1
|
||||
@@ -160,25 +178,12 @@ func (l *pageLexer) ignore() {
|
||||
|
||||
var lf = []byte("\n")
|
||||
|
||||
// nice to have in error logs
|
||||
func (l *pageLexer) lineNum() int {
|
||||
return bytes.Count(l.input[:l.lastPos], lf) + 1
|
||||
}
|
||||
|
||||
// nil terminates the parser
|
||||
func (l *pageLexer) errorf(format string, args ...interface{}) stateFunc {
|
||||
l.items = append(l.items, Item{tError, l.start, []byte(fmt.Sprintf(format, args...))})
|
||||
return nil
|
||||
}
|
||||
|
||||
// consumes and returns the next item
|
||||
func (l *pageLexer) nextItem() Item {
|
||||
item := l.items[0]
|
||||
l.items = l.items[1:]
|
||||
l.lastPos = item.pos
|
||||
return item
|
||||
}
|
||||
|
||||
func (l *pageLexer) consumeCRLF() bool {
|
||||
var consumed bool
|
||||
for _, r := range crLf {
|
||||
@@ -192,12 +197,28 @@ func (l *pageLexer) consumeCRLF() bool {
|
||||
}
|
||||
|
||||
func lexMainSection(l *pageLexer) stateFunc {
|
||||
// Fast forward as far as possible.
|
||||
var l1, l2, l3 int
|
||||
if !l.summaryDividerChecked {
|
||||
// TODO(bep) 2errors make the summary divider per type
|
||||
l1 = l.index(summaryDivider)
|
||||
l2 = l.index(summaryDividerOrg)
|
||||
if l1 == -1 && l2 == -1 {
|
||||
l.summaryDividerChecked = true
|
||||
}
|
||||
}
|
||||
l3 = l.index(leftDelimSc)
|
||||
skip := minPositiveIndex(l1, l2, l3)
|
||||
if skip > 0 {
|
||||
l.pos += pos(skip)
|
||||
}
|
||||
|
||||
for {
|
||||
if l.isShortCodeStart() {
|
||||
if l.pos > l.start {
|
||||
l.emit(tText)
|
||||
}
|
||||
if bytes.HasPrefix(l.input[l.pos:], leftDelimScWithMarkup) {
|
||||
if l.hasPrefix(leftDelimScWithMarkup) {
|
||||
l.currLeftDelimItem = tLeftDelimScWithMarkup
|
||||
l.currRightDelimItem = tRightDelimScWithMarkup
|
||||
} else {
|
||||
@@ -207,21 +228,21 @@ func lexMainSection(l *pageLexer) stateFunc {
|
||||
return lexShortcodeLeftDelim
|
||||
}
|
||||
|
||||
if l.contentSections <= 1 {
|
||||
if bytes.HasPrefix(l.input[l.pos:], summaryDivider) {
|
||||
if !l.summaryDividerChecked {
|
||||
if l.hasPrefix(summaryDivider) {
|
||||
if l.pos > l.start {
|
||||
l.emit(tText)
|
||||
}
|
||||
l.contentSections++
|
||||
l.summaryDividerChecked = true
|
||||
l.pos += pos(len(summaryDivider))
|
||||
l.emit(tSummaryDivider)
|
||||
} else if bytes.HasPrefix(l.input[l.pos:], summaryDividerOrg) {
|
||||
l.emit(TypeLeadSummaryDivider)
|
||||
} else if l.hasPrefix(summaryDividerOrg) {
|
||||
if l.pos > l.start {
|
||||
l.emit(tText)
|
||||
}
|
||||
l.contentSections++
|
||||
l.summaryDividerChecked = true
|
||||
l.pos += pos(len(summaryDividerOrg))
|
||||
l.emit(tSummaryDividerOrg)
|
||||
l.emit(TypeSummaryDividerOrg)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -237,7 +258,7 @@ func lexMainSection(l *pageLexer) stateFunc {
|
||||
}
|
||||
|
||||
func (l *pageLexer) isShortCodeStart() bool {
|
||||
return bytes.HasPrefix(l.input[l.pos:], leftDelimScWithMarkup) || bytes.HasPrefix(l.input[l.pos:], leftDelimScNoMarkup)
|
||||
return l.hasPrefix(leftDelimScWithMarkup) || l.hasPrefix(leftDelimScNoMarkup)
|
||||
}
|
||||
|
||||
func lexIntroSection(l *pageLexer) stateFunc {
|
||||
@@ -250,28 +271,37 @@ LOOP:
|
||||
|
||||
switch {
|
||||
case r == '+':
|
||||
return l.lexFrontMatterSection(tFrontMatterTOML, r, "TOML", delimTOML)
|
||||
return l.lexFrontMatterSection(TypeFrontMatterTOML, r, "TOML", delimTOML)
|
||||
case r == '-':
|
||||
return l.lexFrontMatterSection(tFrontMatterYAML, r, "YAML", delimYAML)
|
||||
return l.lexFrontMatterSection(TypeFrontMatterYAML, r, "YAML", delimYAML)
|
||||
case r == '{':
|
||||
return lexFrontMatterJSON
|
||||
case r == '#':
|
||||
return lexFrontMatterOrgMode
|
||||
case r == byteOrderMark:
|
||||
l.emit(TypeIgnore)
|
||||
case !isSpace(r) && !isEndOfLine(r):
|
||||
// No front matter.
|
||||
if r == '<' {
|
||||
l.emit(tHTMLLead)
|
||||
// Not need to look further. Hugo treats this as plain HTML,
|
||||
// no front matter, no shortcodes, no nothing.
|
||||
l.pos = pos(len(l.input))
|
||||
l.emit(tText)
|
||||
break LOOP
|
||||
l.backup()
|
||||
if l.hasPrefix(htmlCOmmentStart) {
|
||||
right := l.index(htmlCOmmentEnd)
|
||||
if right == -1 {
|
||||
return l.errorf("starting HTML comment with no end")
|
||||
}
|
||||
l.pos += pos(right) + pos(len(htmlCOmmentEnd))
|
||||
l.emit(TypeHTMLComment)
|
||||
} else {
|
||||
// Not need to look further. Hugo treats this as plain HTML,
|
||||
// no front matter, no shortcodes, no nothing.
|
||||
l.pos = pos(len(l.input))
|
||||
l.emit(TypeHTMLDocument)
|
||||
}
|
||||
}
|
||||
return l.errorf("failed to detect front matter type; got unknown identifier %q", r)
|
||||
break LOOP
|
||||
}
|
||||
}
|
||||
|
||||
l.contentSections = 1
|
||||
|
||||
// Now move on to the shortcodes.
|
||||
return lexMainSection
|
||||
}
|
||||
@@ -324,7 +354,7 @@ func lexFrontMatterJSON(l *pageLexer) stateFunc {
|
||||
}
|
||||
|
||||
l.consumeCRLF()
|
||||
l.emit(tFrontMatterJSON)
|
||||
l.emit(TypeFrontMatterJSON)
|
||||
|
||||
return lexMainSection
|
||||
}
|
||||
@@ -338,7 +368,7 @@ func lexFrontMatterOrgMode(l *pageLexer) stateFunc {
|
||||
|
||||
l.backup()
|
||||
|
||||
if !bytes.HasPrefix(l.input[l.pos:], delimOrg) {
|
||||
if !l.hasPrefix(delimOrg) {
|
||||
// TODO(bep) consider error
|
||||
return lexMainSection
|
||||
}
|
||||
@@ -351,7 +381,7 @@ LOOP:
|
||||
|
||||
switch {
|
||||
case r == '\n':
|
||||
if !bytes.HasPrefix(l.input[l.pos:], delimOrg) {
|
||||
if !l.hasPrefix(delimOrg) {
|
||||
break LOOP
|
||||
}
|
||||
case r == eof:
|
||||
@@ -360,24 +390,25 @@ LOOP:
|
||||
}
|
||||
}
|
||||
|
||||
l.emit(tFrontMatterORG)
|
||||
l.emit(TypeFrontMatterORG)
|
||||
|
||||
return lexMainSection
|
||||
|
||||
}
|
||||
|
||||
func (l *pageLexer) printCurrentInput() {
|
||||
fmt.Printf("input[%d:]: %q", l.pos, string(l.input[l.pos:]))
|
||||
}
|
||||
|
||||
// Handle YAML or TOML front matter.
|
||||
func (l *pageLexer) lexFrontMatterSection(tp itemType, delimr rune, name string, delim []byte) stateFunc {
|
||||
func (l *pageLexer) lexFrontMatterSection(tp ItemType, delimr rune, name string, delim []byte) stateFunc {
|
||||
|
||||
for i := 0; i < 2; i++ {
|
||||
if r := l.next(); r != delimr {
|
||||
return l.errorf("invalid %s delimiter", name)
|
||||
}
|
||||
}
|
||||
|
||||
if !l.consumeCRLF() {
|
||||
return l.errorf("invalid %s delimiter", name)
|
||||
}
|
||||
|
||||
// We don't care about the delimiters.
|
||||
l.ignore()
|
||||
|
||||
@@ -387,7 +418,7 @@ func (l *pageLexer) lexFrontMatterSection(tp itemType, delimr rune, name string,
|
||||
return l.errorf("EOF looking for end %s front matter delimiter", name)
|
||||
}
|
||||
if isEndOfLine(r) {
|
||||
if bytes.HasPrefix(l.input[l.pos:], delim) {
|
||||
if l.hasPrefix(delim) {
|
||||
l.emit(tp)
|
||||
l.pos += 3
|
||||
l.consumeCRLF()
|
||||
@@ -402,7 +433,7 @@ func (l *pageLexer) lexFrontMatterSection(tp itemType, delimr rune, name string,
|
||||
|
||||
func lexShortcodeLeftDelim(l *pageLexer) stateFunc {
|
||||
l.pos += pos(len(l.currentLeftShortcodeDelim()))
|
||||
if bytes.HasPrefix(l.input[l.pos:], leftComment) {
|
||||
if l.hasPrefix(leftComment) {
|
||||
return lexShortcodeComment
|
||||
}
|
||||
l.emit(l.currentLeftShortcodeDelimItem())
|
||||
@@ -412,7 +443,7 @@ func lexShortcodeLeftDelim(l *pageLexer) stateFunc {
|
||||
}
|
||||
|
||||
func lexShortcodeComment(l *pageLexer) stateFunc {
|
||||
posRightComment := bytes.Index(l.input[l.pos:], append(rightComment, l.currentRightShortcodeDelim()...))
|
||||
posRightComment := l.index(append(rightComment, l.currentRightShortcodeDelim()...))
|
||||
if posRightComment <= 1 {
|
||||
return l.errorf("comment must be closed")
|
||||
}
|
||||
@@ -493,7 +524,7 @@ func lexShortcodeParam(l *pageLexer, escapedQuoteStart bool) stateFunc {
|
||||
|
||||
}
|
||||
|
||||
func lexShortcodeQuotedParamVal(l *pageLexer, escapedQuotedValuesAllowed bool, typ itemType) stateFunc {
|
||||
func lexShortcodeQuotedParamVal(l *pageLexer, escapedQuotedValuesAllowed bool, typ ItemType) stateFunc {
|
||||
openQuoteFound := false
|
||||
escapedInnerQuoteFound := false
|
||||
escapedQuoteState := 0
|
||||
@@ -592,7 +623,7 @@ Loop:
|
||||
}
|
||||
|
||||
func lexEndOfShortcode(l *pageLexer) stateFunc {
|
||||
if bytes.HasPrefix(l.input[l.pos:], l.currentRightShortcodeDelim()) {
|
||||
if l.hasPrefix(l.currentRightShortcodeDelim()) {
|
||||
return lexShortcodeRightDelim
|
||||
}
|
||||
switch r := l.next(); {
|
||||
@@ -606,7 +637,7 @@ func lexEndOfShortcode(l *pageLexer) stateFunc {
|
||||
|
||||
// scans the elements inside shortcode tags
|
||||
func lexInsideShortcode(l *pageLexer) stateFunc {
|
||||
if bytes.HasPrefix(l.input[l.pos:], l.currentRightShortcodeDelim()) {
|
||||
if l.hasPrefix(l.currentRightShortcodeDelim()) {
|
||||
return lexShortcodeRightDelim
|
||||
}
|
||||
switch r := l.next(); {
|
||||
@@ -643,11 +674,19 @@ func lexInsideShortcode(l *pageLexer) stateFunc {
|
||||
|
||||
// state helpers
|
||||
|
||||
func (l *pageLexer) currentLeftShortcodeDelimItem() itemType {
|
||||
func (l *pageLexer) index(sep []byte) int {
|
||||
return bytes.Index(l.input[l.pos:], sep)
|
||||
}
|
||||
|
||||
func (l *pageLexer) hasPrefix(prefix []byte) bool {
|
||||
return bytes.HasPrefix(l.input[l.pos:], prefix)
|
||||
}
|
||||
|
||||
func (l *pageLexer) currentLeftShortcodeDelimItem() ItemType {
|
||||
return l.currLeftDelimItem
|
||||
}
|
||||
|
||||
func (l *pageLexer) currentRightShortcodeDelimItem() itemType {
|
||||
func (l *pageLexer) currentRightShortcodeDelimItem() ItemType {
|
||||
return l.currRightDelimItem
|
||||
}
|
||||
|
||||
@@ -668,6 +707,23 @@ func (l *pageLexer) currentRightShortcodeDelim() []byte {
|
||||
|
||||
// helper functions
|
||||
|
||||
// returns the min index > 0
|
||||
func minPositiveIndex(indices ...int) int {
|
||||
min := -1
|
||||
|
||||
for _, j := range indices {
|
||||
if j <= 0 {
|
||||
continue
|
||||
}
|
||||
if min == -1 {
|
||||
min = j
|
||||
} else if j < min {
|
||||
min = j
|
||||
}
|
||||
}
|
||||
return min
|
||||
}
|
||||
|
||||
func isSpace(r rune) bool {
|
||||
return r == ' ' || r == '\t'
|
||||
}
|
||||
|
29
parser/pageparser/pagelexer_test.go
Normal file
29
parser/pageparser/pagelexer_test.go
Normal file
@@ -0,0 +1,29 @@
|
||||
// Copyright 2018 The Hugo Authors. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package pageparser
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestMinPositiveIndex(t *testing.T) {
|
||||
assert := require.New(t)
|
||||
assert.Equal(1, minPositiveIndex(4, 1, 2, 3))
|
||||
assert.Equal(2, minPositiveIndex(4, 0, -2, 2, 5))
|
||||
assert.Equal(-1, minPositiveIndex())
|
||||
assert.Equal(-1, minPositiveIndex(-2, -3))
|
||||
|
||||
}
|
@@ -17,72 +17,90 @@
|
||||
// See slides here: http://cuddle.googlecode.com/hg/talk/lex.html
|
||||
package pageparser
|
||||
|
||||
func Parse(input []byte) *Tokens {
|
||||
return ParseFrom(input, 0)
|
||||
import (
|
||||
"bytes"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
// Result holds the parse result.
|
||||
type Result interface {
|
||||
// Iterator returns a new Iterator positioned at the benning of the parse tree.
|
||||
Iterator() *Iterator
|
||||
// Input returns the input to Parse.
|
||||
Input() []byte
|
||||
}
|
||||
|
||||
func ParseFrom(input []byte, from int) *Tokens {
|
||||
var _ Result = (*pageLexer)(nil)
|
||||
|
||||
// Parse parses the page in the given reader.
|
||||
func Parse(r io.Reader) (Result, error) {
|
||||
b, err := ioutil.ReadAll(r)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "failed to read page content")
|
||||
}
|
||||
lexer := newPageLexer(b, 0, lexIntroSection)
|
||||
lexer.run()
|
||||
return lexer, nil
|
||||
|
||||
}
|
||||
|
||||
func parseMainSection(input []byte, from int) Result {
|
||||
lexer := newPageLexer(input, pos(from), lexMainSection) // TODO(bep) 2errors
|
||||
lexer.run()
|
||||
return &Tokens{lexer: lexer}
|
||||
return lexer
|
||||
}
|
||||
|
||||
type Tokens struct {
|
||||
lexer *pageLexer
|
||||
token [3]Item // 3-item look-ahead is what we currently need
|
||||
peekCount int
|
||||
// An Iterator has methods to iterate a parsed page with support going back
|
||||
// if needed.
|
||||
type Iterator struct {
|
||||
l *pageLexer
|
||||
lastPos pos // position of the last item returned by nextItem
|
||||
}
|
||||
|
||||
func (t *Tokens) Next() Item {
|
||||
if t.peekCount > 0 {
|
||||
t.peekCount--
|
||||
} else {
|
||||
t.token[0] = t.lexer.nextItem()
|
||||
// consumes and returns the next item
|
||||
func (t *Iterator) Next() Item {
|
||||
t.lastPos++
|
||||
return t.current()
|
||||
}
|
||||
|
||||
var errIndexOutOfBounds = Item{tError, 0, []byte("no more tokens")}
|
||||
|
||||
func (t *Iterator) current() Item {
|
||||
if t.lastPos >= pos(len(t.l.items)) {
|
||||
return errIndexOutOfBounds
|
||||
}
|
||||
return t.token[t.peekCount]
|
||||
return t.l.items[t.lastPos]
|
||||
}
|
||||
|
||||
// backs up one token.
|
||||
func (t *Tokens) Backup() {
|
||||
t.peekCount++
|
||||
}
|
||||
|
||||
// backs up two tokens.
|
||||
func (t *Tokens) Backup2(t1 Item) {
|
||||
t.token[1] = t1
|
||||
t.peekCount = 2
|
||||
}
|
||||
|
||||
// backs up three tokens.
|
||||
func (t *Tokens) Backup3(t2, t1 Item) {
|
||||
t.token[1] = t1
|
||||
t.token[2] = t2
|
||||
t.peekCount = 3
|
||||
func (t *Iterator) Backup() {
|
||||
if t.lastPos < 0 {
|
||||
panic("need to go forward before going back")
|
||||
}
|
||||
t.lastPos--
|
||||
}
|
||||
|
||||
// check for non-error and non-EOF types coming next
|
||||
func (t *Tokens) IsValueNext() bool {
|
||||
func (t *Iterator) IsValueNext() bool {
|
||||
i := t.Peek()
|
||||
return i.typ != tError && i.typ != tEOF
|
||||
return i.Typ != tError && i.Typ != tEOF
|
||||
}
|
||||
|
||||
// look at, but do not consume, the next item
|
||||
// repeated, sequential calls will return the same item
|
||||
func (t *Tokens) Peek() Item {
|
||||
if t.peekCount > 0 {
|
||||
return t.token[t.peekCount-1]
|
||||
}
|
||||
t.peekCount = 1
|
||||
t.token[0] = t.lexer.nextItem()
|
||||
return t.token[0]
|
||||
func (t *Iterator) Peek() Item {
|
||||
return t.l.items[t.lastPos+1]
|
||||
}
|
||||
|
||||
// Consume is a convencience method to consume the next n tokens,
|
||||
// but back off Errors and EOF.
|
||||
func (t *Tokens) Consume(cnt int) {
|
||||
func (t *Iterator) Consume(cnt int) {
|
||||
for i := 0; i < cnt; i++ {
|
||||
token := t.Next()
|
||||
if token.typ == tError || token.typ == tEOF {
|
||||
if token.Typ == tError || token.Typ == tEOF {
|
||||
t.Backup()
|
||||
break
|
||||
}
|
||||
@@ -90,6 +108,6 @@ func (t *Tokens) Consume(cnt int) {
|
||||
}
|
||||
|
||||
// LineNumber returns the current line number. Used for logging.
|
||||
func (t *Tokens) LineNumber() int {
|
||||
return t.lexer.lineNum()
|
||||
func (t *Iterator) LineNumber() int {
|
||||
return bytes.Count(t.l.input[:t.current().pos], lf) + 1
|
||||
}
|
||||
|
@@ -26,27 +26,26 @@ type lexerTest struct {
|
||||
items []Item
|
||||
}
|
||||
|
||||
func nti(tp itemType, val string) Item {
|
||||
func nti(tp ItemType, val string) Item {
|
||||
return Item{tp, 0, []byte(val)}
|
||||
}
|
||||
|
||||
var (
|
||||
tstJSON = `{ "a": { "b": "\"Hugo\"}" } }`
|
||||
tstHTMLLead = nti(tHTMLLead, " <")
|
||||
tstFrontMatterTOML = nti(tFrontMatterTOML, "foo = \"bar\"\n")
|
||||
tstFrontMatterYAML = nti(tFrontMatterYAML, "foo: \"bar\"\n")
|
||||
tstFrontMatterYAMLCRLF = nti(tFrontMatterYAML, "foo: \"bar\"\r\n")
|
||||
tstFrontMatterJSON = nti(tFrontMatterJSON, tstJSON+"\r\n")
|
||||
tstFrontMatterTOML = nti(TypeFrontMatterTOML, "\nfoo = \"bar\"\n")
|
||||
tstFrontMatterYAML = nti(TypeFrontMatterYAML, "\nfoo: \"bar\"\n")
|
||||
tstFrontMatterYAMLCRLF = nti(TypeFrontMatterYAML, "\r\nfoo: \"bar\"\r\n")
|
||||
tstFrontMatterJSON = nti(TypeFrontMatterJSON, tstJSON+"\r\n")
|
||||
tstSomeText = nti(tText, "\nSome text.\n")
|
||||
tstSummaryDivider = nti(tSummaryDivider, "<!--more-->")
|
||||
tstSummaryDividerOrg = nti(tSummaryDividerOrg, "# more")
|
||||
tstSummaryDivider = nti(TypeLeadSummaryDivider, "<!--more-->")
|
||||
tstSummaryDividerOrg = nti(TypeSummaryDividerOrg, "# more")
|
||||
|
||||
tstORG = `
|
||||
#+TITLE: T1
|
||||
#+AUTHOR: A1
|
||||
#+DESCRIPTION: D1
|
||||
`
|
||||
tstFrontMatterORG = nti(tFrontMatterORG, tstORG)
|
||||
tstFrontMatterORG = nti(TypeFrontMatterORG, tstORG)
|
||||
)
|
||||
|
||||
var crLfReplacer = strings.NewReplacer("\r", "#", "\n", "$")
|
||||
@@ -54,8 +53,15 @@ var crLfReplacer = strings.NewReplacer("\r", "#", "\n", "$")
|
||||
// TODO(bep) a way to toggle ORG mode vs the rest.
|
||||
var frontMatterTests = []lexerTest{
|
||||
{"empty", "", []Item{tstEOF}},
|
||||
{"HTML Document", ` <html> `, []Item{tstHTMLLead, nti(tText, "html> "), tstEOF}},
|
||||
{"Byte order mark", "\ufeff\nSome text.\n", []Item{nti(TypeIgnore, "\ufeff"), tstSomeText, tstEOF}},
|
||||
{"HTML Document", ` <html> `, []Item{nti(TypeHTMLDocument, " <html> "), tstEOF}},
|
||||
{"HTML Document 2", `<html><h1>Hugo Rocks</h1></html>`, []Item{nti(TypeHTMLDocument, "<html><h1>Hugo Rocks</h1></html>"), tstEOF}},
|
||||
{"No front matter", "\nSome text.\n", []Item{tstSomeText, tstEOF}},
|
||||
{"YAML front matter", "---\nfoo: \"bar\"\n---\n\nSome text.\n", []Item{tstFrontMatterYAML, tstSomeText, tstEOF}},
|
||||
{"YAML empty front matter", "---\n---\n\nSome text.\n", []Item{nti(TypeFrontMatterYAML, "\n"), tstSomeText, tstEOF}},
|
||||
|
||||
{"YAML commented out front matter", "<!--\n---\nfoo: \"bar\"\n---\n-->\nSome text.\n", []Item{nti(TypeHTMLComment, "<!--\n---\nfoo: \"bar\"\n---\n-->"), tstSomeText, tstEOF}},
|
||||
|
||||
// Note that we keep all bytes as they are, but we need to handle CRLF
|
||||
{"YAML front matter CRLF", "---\r\nfoo: \"bar\"\r\n---\n\nSome text.\n", []Item{tstFrontMatterYAMLCRLF, tstSomeText, tstEOF}},
|
||||
{"TOML front matter", "+++\nfoo = \"bar\"\n+++\n\nSome text.\n", []Item{tstFrontMatterTOML, tstSomeText, tstEOF}},
|
||||
@@ -80,11 +86,12 @@ func TestFrontMatter(t *testing.T) {
|
||||
func collect(input []byte, skipFrontMatter bool, stateStart stateFunc) (items []Item) {
|
||||
l := newPageLexer(input, 0, stateStart)
|
||||
l.run()
|
||||
t := l.newIterator()
|
||||
|
||||
for {
|
||||
item := l.nextItem()
|
||||
item := t.Next()
|
||||
items = append(items, item)
|
||||
if item.typ == tEOF || item.typ == tError {
|
||||
if item.Typ == tEOF || item.Typ == tError {
|
||||
break
|
||||
}
|
||||
}
|
||||
@@ -97,7 +104,7 @@ func equal(i1, i2 []Item) bool {
|
||||
return false
|
||||
}
|
||||
for k := range i1 {
|
||||
if i1[k].typ != i2[k].typ {
|
||||
if i1[k].Typ != i2[k].Typ {
|
||||
return false
|
||||
}
|
||||
if !reflect.DeepEqual(i1[k].Val, i2[k].Val) {
|
||||
|
Reference in New Issue
Block a user