Move the emoji parsing to pageparser

This avoids double parsing the page content when `enableEmoji=true`. This commit also adds some general improvements to the parser, making it in general much faster: ```bash benchmark old ns/op new ns/op delta BenchmarkShortcodeLexer-4 90258 101730 +12.71% BenchmarkParse-4 148940 15037 -89.90% benchmark old allocs new allocs delta BenchmarkShortcodeLexer-4 456 700 +53.51% BenchmarkParse-4 28 33 +17.86% benchmark old bytes new bytes delta BenchmarkShortcodeLexer-4 69875 81014 +15.94% BenchmarkParse-4 8128 8304 +2.17% ``` Running some site benchmarks with Emoji support turned on: ```bash benchmark old ns/op new ns/op delta BenchmarkSiteBuilding/TOML,num_langs=3,num_pages=5000,tags_per_page=5,shortcodes,render-4 924556797 818115620 -11.51% benchmark old allocs new allocs delta BenchmarkSiteBuilding/TOML,num_langs=3,num_pages=5000,tags_per_page=5,shortcodes,render-4 4112613 4133787 +0.51% benchmark old bytes new bytes delta BenchmarkSiteBuilding/TOML,num_langs=3,num_pages=5000,tags_per_page=5,shortcodes,render-4 426982864 424363832 -0.61% ``` Fixes #5534
2025-08-14 20:33:59 +02:00 · 2018-12-17 21:03:23 +01:00
parent a8853f1c5a
commit 9cd54cab20
13 changed files with 388 additions and 71 deletions
--- a/parser/pageparser/pageparser.go
+++ b/parser/pageparser/pageparser.go
@@ -27,7 +27,7 @@ import (

 // Result holds the parse result.
 type Result interface {
-	// Iterator returns a new Iterator positioned at the benning of the parse tree.
+	// Iterator returns a new Iterator positioned at the beginning of the parse tree.
 	Iterator() *Iterator
 	// Input returns the input to Parse.
 	Input() []byte
@@ -35,27 +35,21 @@ type Result interface {

 var _ Result = (*pageLexer)(nil)

-// Parse parses the page in the given reader.
-func Parse(r io.Reader) (Result, error) {
+// Parse parses the page in the given reader according to the given Config.
+func Parse(r io.Reader, cfg Config) (Result, error) {
 	b, err := ioutil.ReadAll(r)
 	if err != nil {
 		return nil, errors.Wrap(err, "failed to read page content")
 	}
-	return parseBytes(b)
+	return parseBytes(b, cfg)
 }

-func parseBytes(b []byte) (Result, error) {
-	lexer := newPageLexer(b, 0, lexIntroSection)
+func parseBytes(b []byte, cfg Config) (Result, error) {
+	lexer := newPageLexer(b, lexIntroSection, cfg)
 	lexer.run()
 	return lexer, nil
 }

-func parseMainSection(input []byte, from int) Result {
-	lexer := newPageLexer(input, from, lexMainSection)
-	lexer.run()
-	return lexer
-}
-
 // An Iterator has methods to iterate a parsed page with support going back
 // if needed.
 type Iterator struct {