Lazy calculate WordCount, ReadingTime and FuzzyWordCount

This avoids having to execute these expensive operations for sites not using these values.

This commit sums up a set of wordcounting and autosummary related performance improvements.

The effect of these kind of depends on what features your site use, but a benchmark from 4 Hugo sites in the wild shows promise:

```
benchmark           old ns/op       new ns/op       delta
BenchmarkHugo-4     21293005843     20032857342     -5.92%

benchmark           old allocs     new allocs     delta
BenchmarkHugo-4     65290922       65186032       -0.16%

benchmark           old bytes      new bytes      delta
BenchmarkHugo-4     9771213416     9681866464     -0.91%
```

Closes #2378
This commit is contained in:
Bjørn Erik Pedersen
2016-08-17 13:41:48 +02:00
parent 4abaec5c04
commit dd45e6d7e5
7 changed files with 103 additions and 57 deletions

View File

@@ -107,9 +107,10 @@ type Source struct {
source.File
}
type PageMeta struct {
WordCount int
FuzzyWordCount int
ReadingTime int
wordCount int
fuzzyWordCount int
readingTime int
pageMetaInit sync.Once
Weight int
}
@@ -485,28 +486,48 @@ func (p *Page) ReadFrom(buf io.Reader) (int64, error) {
return int64(len(p.rawContent)), nil
}
func (p *Page) WordCount() int {
p.analyzePage()
return p.wordCount
}
func (p *Page) ReadingTime() int {
p.analyzePage()
return p.readingTime
}
func (p *Page) FuzzyWordCount() int {
p.analyzePage()
return p.fuzzyWordCount
}
func (p *Page) analyzePage() {
if p.isCJKLanguage {
p.WordCount = 0
for _, word := range p.PlainWords() {
runeCount := utf8.RuneCountInString(word)
if len(word) == runeCount {
p.WordCount++
} else {
p.WordCount += runeCount
p.pageMetaInit.Do(func() {
if p.isCJKLanguage {
p.wordCount = 0
for _, word := range p.PlainWords() {
runeCount := utf8.RuneCountInString(word)
if len(word) == runeCount {
p.wordCount++
} else {
p.wordCount += runeCount
}
}
} else {
p.wordCount = helpers.TotalWords(p.Plain())
}
} else {
p.WordCount = len(p.PlainWords())
}
p.FuzzyWordCount = (p.WordCount + 100) / 100 * 100
// TODO(bep) is set in a test. Fix that.
if p.fuzzyWordCount == 0 {
p.fuzzyWordCount = (p.wordCount + 100) / 100 * 100
}
if p.isCJKLanguage {
p.ReadingTime = (p.WordCount + 500) / 501
} else {
p.ReadingTime = (p.WordCount + 212) / 213
}
if p.isCJKLanguage {
p.readingTime = (p.wordCount + 500) / 501
} else {
p.readingTime = (p.wordCount + 212) / 213
}
})
}
func (p *Page) permalink() (*url.URL, error) {