Resolve error handling/parser related TODOs

See #5324
This commit is contained in:
Bjørn Erik Pedersen
2018-10-23 14:37:09 +02:00
parent f669ef6bec
commit 6636cf1bea
14 changed files with 67 additions and 68 deletions

View File

@@ -103,10 +103,9 @@ const (
tEOF
// page items
TypeHTMLDocument // document starting with < as first non-whitespace
TypeHTMLStart // document starting with < as first non-whitespace
TypeHTMLComment // We ignore leading comments
TypeLeadSummaryDivider // <!--more-->
TypeSummaryDividerOrg // # more
TypeLeadSummaryDivider // <!--more-->, # more
TypeFrontMatterYAML
TypeFrontMatterTOML
TypeFrontMatterJSON

View File

@@ -48,6 +48,8 @@ type pageLexer struct {
start int // item start position
width int // width of last element
// The summary divider to look for.
summaryDivider []byte
// Set when we have parsed any summary divider
summaryDividerChecked bool
@@ -69,7 +71,6 @@ func (l *pageLexer) Input() []byte {
// note: the input position here is normally 0 (start), but
// can be set if position of first shortcode is known
// TODO(bep) 2errors byte
func newPageLexer(input []byte, inputPosition int, stateStart stateFunc) *pageLexer {
lexer := &pageLexer{
input: input,
@@ -117,7 +118,7 @@ var (
delimTOML = []byte("+++")
delimYAML = []byte("---")
delimOrg = []byte("#+")
htmlCOmmentStart = []byte("<!--")
htmlCommentStart = []byte("<!--")
htmlCOmmentEnd = []byte("-->")
)
@@ -195,17 +196,18 @@ func (l *pageLexer) consumeCRLF() bool {
func lexMainSection(l *pageLexer) stateFunc {
// Fast forward as far as possible.
var l1, l2, l3 int
if !l.summaryDividerChecked {
// TODO(bep) 2errors make the summary divider per type
l1 = l.index(summaryDivider)
l2 = l.index(summaryDividerOrg)
if l1 == -1 && l2 == -1 {
var l1, l2 int
if !l.summaryDividerChecked && l.summaryDivider != nil {
l1 = l.index(l.summaryDivider)
if l1 == -1 {
l.summaryDividerChecked = true
}
}
l3 = l.index(leftDelimSc)
skip := minPositiveIndex(l1, l2, l3)
l2 = l.index(leftDelimSc)
skip := minPositiveIndex(l1, l2)
if skip > 0 {
l.pos += skip
}
@@ -225,23 +227,14 @@ func lexMainSection(l *pageLexer) stateFunc {
return lexShortcodeLeftDelim
}
if !l.summaryDividerChecked {
if l.hasPrefix(summaryDivider) {
if !l.summaryDividerChecked && l.summaryDivider != nil {
if l.hasPrefix(l.summaryDivider) {
if l.pos > l.start {
l.emit(tText)
}
l.summaryDividerChecked = true
l.pos += len(summaryDivider)
//l.consumeCRLF()
l.pos += len(l.summaryDivider)
l.emit(TypeLeadSummaryDivider)
} else if l.hasPrefix(summaryDividerOrg) {
if l.pos > l.start {
l.emit(tText)
}
l.summaryDividerChecked = true
l.pos += len(summaryDividerOrg)
//l.consumeCRLF()
l.emit(TypeSummaryDividerOrg)
}
}
@@ -261,6 +254,8 @@ func (l *pageLexer) isShortCodeStart() bool {
}
func lexIntroSection(l *pageLexer) stateFunc {
l.summaryDivider = summaryDivider
LOOP:
for {
r := l.next()
@@ -283,7 +278,7 @@ LOOP:
// No front matter.
if r == '<' {
l.backup()
if l.hasPrefix(htmlCOmmentStart) {
if l.hasPrefix(htmlCommentStart) {
right := l.index(htmlCOmmentEnd)
if right == -1 {
return l.errorf("starting HTML comment with no end")
@@ -291,10 +286,14 @@ LOOP:
l.pos += right + len(htmlCOmmentEnd)
l.emit(TypeHTMLComment)
} else {
// Not need to look further. Hugo treats this as plain HTML,
// no front matter, no shortcodes, no nothing.
l.pos = len(l.input)
l.emit(TypeHTMLDocument)
if l.pos > l.start {
l.emit(tText)
}
l.next()
// This is the start of a plain HTML document with no
// front matter. I still can contain shortcodes, so we
// have to keep looking.
l.emit(TypeHTMLStart)
}
}
break LOOP
@@ -365,10 +364,11 @@ func lexFrontMatterOrgMode(l *pageLexer) stateFunc {
#+DESCRIPTION: Just another golang parser for org content!
*/
l.summaryDivider = summaryDividerOrg
l.backup()
if !l.hasPrefix(delimOrg) {
// TODO(bep) consider error
return lexMainSection
}

View File

@@ -48,7 +48,7 @@ func Parse(r io.Reader) (Result, error) {
}
func parseMainSection(input []byte, from int) Result {
lexer := newPageLexer(input, from, lexMainSection) // TODO(bep) 2errors
lexer := newPageLexer(input, from, lexMainSection)
lexer.run()
return lexer
}

View File

@@ -38,7 +38,7 @@ var (
tstFrontMatterJSON = nti(TypeFrontMatterJSON, tstJSON+"\r\n")
tstSomeText = nti(tText, "\nSome text.\n")
tstSummaryDivider = nti(TypeLeadSummaryDivider, "<!--more-->")
tstSummaryDividerOrg = nti(TypeSummaryDividerOrg, "# more")
tstHtmlStart = nti(TypeHTMLStart, "<")
tstORG = `
#+TITLE: T1
@@ -54,8 +54,8 @@ var crLfReplacer = strings.NewReplacer("\r", "#", "\n", "$")
var frontMatterTests = []lexerTest{
{"empty", "", []Item{tstEOF}},
{"Byte order mark", "\ufeff\nSome text.\n", []Item{nti(TypeIgnore, "\ufeff"), tstSomeText, tstEOF}},
{"HTML Document", ` <html> `, []Item{nti(TypeHTMLDocument, " <html> "), tstEOF}},
{"HTML Document 2", `<html><h1>Hugo Rocks</h1></html>`, []Item{nti(TypeHTMLDocument, "<html><h1>Hugo Rocks</h1></html>"), tstEOF}},
{"HTML Document", ` <html> `, []Item{nti(tText, " "), tstHtmlStart, nti(tText, "html> "), tstEOF}},
{"HTML Document with shortcode", `<html>{{< sc1 >}}</html>`, []Item{tstHtmlStart, nti(tText, "html>"), tstLeftNoMD, tstSC1, tstRightNoMD, nti(tText, "</html>"), tstEOF}},
{"No front matter", "\nSome text.\n", []Item{tstSomeText, tstEOF}},
{"YAML front matter", "---\nfoo: \"bar\"\n---\n\nSome text.\n", []Item{tstFrontMatterYAML, tstSomeText, tstEOF}},
{"YAML empty front matter", "---\n---\n\nSome text.\n", []Item{nti(TypeFrontMatterYAML, ""), tstSomeText, tstEOF}},
@@ -65,7 +65,7 @@ var frontMatterTests = []lexerTest{
{"TOML front matter", "+++\nfoo = \"bar\"\n+++\n\nSome text.\n", []Item{tstFrontMatterTOML, tstSomeText, tstEOF}},
{"JSON front matter", tstJSON + "\r\n\nSome text.\n", []Item{tstFrontMatterJSON, tstSomeText, tstEOF}},
{"ORG front matter", tstORG + "\nSome text.\n", []Item{tstFrontMatterORG, tstSomeText, tstEOF}},
{"Summary divider ORG", tstORG + "\nSome text.\n# more\nSome text.\n", []Item{tstFrontMatterORG, tstSomeText, tstSummaryDividerOrg, tstSomeText, tstEOF}},
{"Summary divider ORG", tstORG + "\nSome text.\n# more\nSome text.\n", []Item{tstFrontMatterORG, tstSomeText, nti(TypeLeadSummaryDivider, "# more"), tstSomeText, tstEOF}},
{"Summary divider", "+++\nfoo = \"bar\"\n+++\n\nSome text.\n<!--more-->\nSome text.\n", []Item{tstFrontMatterTOML, tstSomeText, tstSummaryDivider, tstSomeText, tstEOF}},
}