Move the emoji parsing to pageparser

This avoids double parsing the page content when `enableEmoji=true`.

This commit also adds some general improvements to the parser, making it in general much faster:

```bash
benchmark                     old ns/op     new ns/op     delta
BenchmarkShortcodeLexer-4     90258         101730        +12.71%
BenchmarkParse-4              148940        15037         -89.90%

benchmark                     old allocs     new allocs     delta
BenchmarkShortcodeLexer-4     456            700            +53.51%
BenchmarkParse-4              28             33             +17.86%

benchmark                     old bytes     new bytes     delta
BenchmarkShortcodeLexer-4     69875         81014         +15.94%
BenchmarkParse-4              8128          8304          +2.17%
```

Running some site benchmarks with Emoji support turned on:

```bash
benchmark                                                                                     old ns/op     new ns/op     delta
BenchmarkSiteBuilding/TOML,num_langs=3,num_pages=5000,tags_per_page=5,shortcodes,render-4     924556797     818115620     -11.51%

benchmark                                                                                     old allocs     new allocs     delta
BenchmarkSiteBuilding/TOML,num_langs=3,num_pages=5000,tags_per_page=5,shortcodes,render-4     4112613        4133787        +0.51%

benchmark                                                                                     old bytes     new bytes     delta
BenchmarkSiteBuilding/TOML,num_langs=3,num_pages=5000,tags_per_page=5,shortcodes,render-4     426982864     424363832     -0.61%
```

Fixes #5534
This commit is contained in:
Bjørn Erik Pedersen
2018-12-17 21:03:23 +01:00
parent a8853f1c5a
commit 9cd54cab20
13 changed files with 388 additions and 71 deletions

View File

@@ -27,7 +27,7 @@ import (
// Result holds the parse result.
type Result interface {
// Iterator returns a new Iterator positioned at the benning of the parse tree.
// Iterator returns a new Iterator positioned at the beginning of the parse tree.
Iterator() *Iterator
// Input returns the input to Parse.
Input() []byte
@@ -35,27 +35,21 @@ type Result interface {
var _ Result = (*pageLexer)(nil)
// Parse parses the page in the given reader.
func Parse(r io.Reader) (Result, error) {
// Parse parses the page in the given reader according to the given Config.
func Parse(r io.Reader, cfg Config) (Result, error) {
b, err := ioutil.ReadAll(r)
if err != nil {
return nil, errors.Wrap(err, "failed to read page content")
}
return parseBytes(b)
return parseBytes(b, cfg)
}
func parseBytes(b []byte) (Result, error) {
lexer := newPageLexer(b, 0, lexIntroSection)
func parseBytes(b []byte, cfg Config) (Result, error) {
lexer := newPageLexer(b, lexIntroSection, cfg)
lexer.run()
return lexer, nil
}
func parseMainSection(input []byte, from int) Result {
lexer := newPageLexer(input, from, lexMainSection)
lexer.run()
return lexer
}
// An Iterator has methods to iterate a parsed page with support going back
// if needed.
type Iterator struct {