Introduce a tree map for all content

This commit introduces a new data structure to store pages and their resources.

This data structure is backed by radix trees.

This simplies tree operations, makes all pages a bundle,  and paves the way for #6310.

It also solves a set of annoying issues (see list below).

Not a motivation behind this, but this commit also makes Hugo in general a little bit faster and more memory effective (see benchmarks). Especially for partial rebuilds on content edits, but also when taxonomies is in use.

```
name                                   old time/op    new time/op    delta
SiteNew/Bundle_with_image/Edit-16        1.32ms ± 8%    1.00ms ± 9%  -24.42%  (p=0.029 n=4+4)
SiteNew/Bundle_with_JSON_file/Edit-16    1.28ms ± 0%    0.94ms ± 0%  -26.26%  (p=0.029 n=4+4)
SiteNew/Tags_and_categories/Edit-16      33.9ms ± 2%    21.8ms ± 1%  -35.67%  (p=0.029 n=4+4)
SiteNew/Canonify_URLs/Edit-16            40.6ms ± 1%    37.7ms ± 3%   -7.20%  (p=0.029 n=4+4)
SiteNew/Deep_content_tree/Edit-16        56.7ms ± 0%    51.7ms ± 1%   -8.82%  (p=0.029 n=4+4)
SiteNew/Many_HTML_templates/Edit-16      19.9ms ± 2%    18.3ms ± 3%   -7.64%  (p=0.029 n=4+4)
SiteNew/Page_collections/Edit-16         37.9ms ± 4%    34.0ms ± 2%  -10.28%  (p=0.029 n=4+4)
SiteNew/Bundle_with_image-16             10.7ms ± 0%    10.6ms ± 0%   -1.15%  (p=0.029 n=4+4)
SiteNew/Bundle_with_JSON_file-16         10.8ms ± 0%    10.7ms ± 0%   -1.05%  (p=0.029 n=4+4)
SiteNew/Tags_and_categories-16           43.2ms ± 1%    39.6ms ± 1%   -8.35%  (p=0.029 n=4+4)
SiteNew/Canonify_URLs-16                 47.6ms ± 1%    47.3ms ± 0%     ~     (p=0.057 n=4+4)
SiteNew/Deep_content_tree-16             73.0ms ± 1%    74.2ms ± 1%     ~     (p=0.114 n=4+4)
SiteNew/Many_HTML_templates-16           37.9ms ± 0%    38.1ms ± 1%     ~     (p=0.114 n=4+4)
SiteNew/Page_collections-16              53.6ms ± 1%    54.7ms ± 1%   +2.09%  (p=0.029 n=4+4)

name                                   old alloc/op   new alloc/op   delta
SiteNew/Bundle_with_image/Edit-16         486kB ± 0%     430kB ± 0%  -11.47%  (p=0.029 n=4+4)
SiteNew/Bundle_with_JSON_file/Edit-16     265kB ± 0%     209kB ± 0%  -21.06%  (p=0.029 n=4+4)
SiteNew/Tags_and_categories/Edit-16      13.6MB ± 0%     8.8MB ± 0%  -34.93%  (p=0.029 n=4+4)
SiteNew/Canonify_URLs/Edit-16            66.5MB ± 0%    63.9MB ± 0%   -3.95%  (p=0.029 n=4+4)
SiteNew/Deep_content_tree/Edit-16        28.8MB ± 0%    25.8MB ± 0%  -10.55%  (p=0.029 n=4+4)
SiteNew/Many_HTML_templates/Edit-16      6.16MB ± 0%    5.56MB ± 0%   -9.86%  (p=0.029 n=4+4)
SiteNew/Page_collections/Edit-16         16.9MB ± 0%    16.0MB ± 0%   -5.19%  (p=0.029 n=4+4)
SiteNew/Bundle_with_image-16             2.28MB ± 0%    2.29MB ± 0%   +0.35%  (p=0.029 n=4+4)
SiteNew/Bundle_with_JSON_file-16         2.07MB ± 0%    2.07MB ± 0%     ~     (p=0.114 n=4+4)
SiteNew/Tags_and_categories-16           14.3MB ± 0%    13.2MB ± 0%   -7.30%  (p=0.029 n=4+4)
SiteNew/Canonify_URLs-16                 69.1MB ± 0%    69.0MB ± 0%     ~     (p=0.343 n=4+4)
SiteNew/Deep_content_tree-16             31.3MB ± 0%    31.8MB ± 0%   +1.49%  (p=0.029 n=4+4)
SiteNew/Many_HTML_templates-16           10.8MB ± 0%    10.9MB ± 0%   +1.11%  (p=0.029 n=4+4)
SiteNew/Page_collections-16              21.4MB ± 0%    21.6MB ± 0%   +1.15%  (p=0.029 n=4+4)

name                                   old allocs/op  new allocs/op  delta
SiteNew/Bundle_with_image/Edit-16         4.74k ± 0%     3.86k ± 0%  -18.57%  (p=0.029 n=4+4)
SiteNew/Bundle_with_JSON_file/Edit-16     4.73k ± 0%     3.85k ± 0%  -18.58%  (p=0.029 n=4+4)
SiteNew/Tags_and_categories/Edit-16        301k ± 0%      198k ± 0%  -34.14%  (p=0.029 n=4+4)
SiteNew/Canonify_URLs/Edit-16              389k ± 0%      373k ± 0%   -4.07%  (p=0.029 n=4+4)
SiteNew/Deep_content_tree/Edit-16          338k ± 0%      262k ± 0%  -22.63%  (p=0.029 n=4+4)
SiteNew/Many_HTML_templates/Edit-16        102k ± 0%       88k ± 0%  -13.81%  (p=0.029 n=4+4)
SiteNew/Page_collections/Edit-16           176k ± 0%      152k ± 0%  -13.32%  (p=0.029 n=4+4)
SiteNew/Bundle_with_image-16              26.8k ± 0%     26.8k ± 0%   +0.05%  (p=0.029 n=4+4)
SiteNew/Bundle_with_JSON_file-16          26.8k ± 0%     26.8k ± 0%   +0.05%  (p=0.029 n=4+4)
SiteNew/Tags_and_categories-16             273k ± 0%      245k ± 0%  -10.36%  (p=0.029 n=4+4)
SiteNew/Canonify_URLs-16                   396k ± 0%      398k ± 0%   +0.39%  (p=0.029 n=4+4)
SiteNew/Deep_content_tree-16               317k ± 0%      325k ± 0%   +2.53%  (p=0.029 n=4+4)
SiteNew/Many_HTML_templates-16             146k ± 0%      147k ± 0%   +0.98%  (p=0.029 n=4+4)
SiteNew/Page_collections-16                210k ± 0%      215k ± 0%   +2.44%  (p=0.029 n=4+4)
```

Fixes #6312
Fixes #6087
Fixes #6738
Fixes #6412
Fixes #6743
Fixes #6875
Fixes #6034
Fixes #6902
Fixes #6173
Fixes #6590
This commit is contained in:
Bjørn Erik Pedersen
2019-09-10 11:26:34 +02:00
parent e5329f13c0
commit eada236f87
71 changed files with 4859 additions and 2531 deletions

View File

@@ -14,6 +14,7 @@
package hugolib
import (
"context"
"io"
"path/filepath"
"sort"
@@ -28,8 +29,8 @@ import (
"github.com/gohugoio/hugo/output"
"github.com/gohugoio/hugo/parser/metadecoders"
"github.com/gohugoio/hugo/common/para"
"github.com/gohugoio/hugo/hugofs"
"github.com/pkg/errors"
"github.com/gohugoio/hugo/source"
@@ -77,11 +78,16 @@ type HugoSites struct {
// As loaded from the /data dirs
data map[string]interface{}
content *pageMaps
// Keeps track of bundle directories and symlinks to enable partial rebuilding.
ContentChanges *contentChangeMap
init *hugoSitesInit
workers *para.Workers
numWorkers int
*fatalErrorHandler
*testCounters
}
@@ -175,7 +181,7 @@ func (h *HugoSites) gitInfoForPage(p page.Page) (*gitmap.GitInfo, error) {
func (h *HugoSites) siteInfos() page.Sites {
infos := make(page.Sites, len(h.Sites))
for i, site := range h.Sites {
infos[i] = &site.Info
infos[i] = site.Info
}
return infos
}
@@ -245,25 +251,22 @@ func (h *HugoSites) PrintProcessingStats(w io.Writer) {
// GetContentPage finds a Page with content given the absolute filename.
// Returns nil if none found.
func (h *HugoSites) GetContentPage(filename string) page.Page {
for _, s := range h.Sites {
pos := s.rawAllPages.findPagePosByFilename(filename)
if pos == -1 {
continue
}
return s.rawAllPages[pos]
}
var p page.Page
// If not found already, this may be bundled in another content file.
dir := filepath.Dir(filename)
for _, s := range h.Sites {
pos := s.rawAllPages.findPagePosByFilnamePrefix(dir)
if pos == -1 {
continue
h.content.walkBundles(func(b *contentNode) bool {
if b.p == nil || b.fi == nil {
return false
}
return s.rawAllPages[pos]
}
return nil
if b.fi.Meta().Filename() == filename {
p = b.p
return true
}
return false
})
return p
}
// NewHugoSites creates a new collection of sites given the input sites, building
@@ -282,11 +285,22 @@ func newHugoSites(cfg deps.DepsCfg, sites ...*Site) (*HugoSites, error) {
var contentChangeTracker *contentChangeMap
numWorkers := config.GetNumWorkerMultiplier()
if numWorkers > len(sites) {
numWorkers = len(sites)
}
var workers *para.Workers
if numWorkers > 1 {
workers = para.New(numWorkers)
}
h := &HugoSites{
running: cfg.Running,
multilingual: langConfig,
multihost: cfg.Cfg.GetBool("multihost"),
Sites: sites,
workers: workers,
numWorkers: numWorkers,
init: &hugoSitesInit{
data: lazy.New(),
layouts: lazy.New(),
@@ -400,13 +414,27 @@ func applyDeps(cfg deps.DepsCfg, sites ...*Site) error {
return err
}
d.Site = &s.Info
d.Site = s.Info
siteConfig, err := loadSiteConfig(s.language)
if err != nil {
return errors.Wrap(err, "load site config")
}
s.siteConfigConfig = siteConfig
pm := &pageMap{
contentMap: newContentMap(contentMapConfig{
lang: s.Lang(),
taxonomyConfig: s.siteCfg.taxonomiesConfig.Values(),
taxonomyDisabled: !s.isEnabled(page.KindTaxonomy),
taxonomyTermDisabled: !s.isEnabled(page.KindTaxonomyTerm),
pageDisabled: !s.isEnabled(page.KindPage),
}),
s: s,
}
s.PageCollections = newPageCollections(pm)
s.siteRefLinker, err = newSiteRefLinker(s.language, s)
return err
}
@@ -525,6 +553,26 @@ func (h *HugoSites) resetLogs() {
}
}
func (h *HugoSites) withSite(fn func(s *Site) error) error {
if h.workers == nil {
for _, s := range h.Sites {
if err := fn(s); err != nil {
return err
}
}
return nil
}
g, _ := h.workers.Start(context.Background())
for _, s := range h.Sites {
s := s
g.Run(func() error {
return fn(s)
})
}
return g.Wait()
}
func (h *HugoSites) createSitesFromConfig(cfg config.Provider) error {
oldLangs, _ := h.Cfg.Get("languagesSorted").(langs.Languages)
@@ -567,7 +615,7 @@ func (h *HugoSites) createSitesFromConfig(cfg config.Provider) error {
func (h *HugoSites) toSiteInfos() []*SiteInfo {
infos := make([]*SiteInfo, len(h.Sites))
for i, s := range h.Sites {
infos[i] = &s.Info
infos[i] = s.Info
}
return infos
}
@@ -603,9 +651,6 @@ type BuildCfg struct {
// For regular builds, this will allways return true.
// TODO(bep) rename/work this.
func (cfg *BuildCfg) shouldRender(p *pageState) bool {
if !p.render {
return false
}
if p.forceRender {
return true
}
@@ -652,9 +697,21 @@ func (h *HugoSites) renderCrossSitesArtifacts() error {
}
func (h *HugoSites) removePageByFilename(filename string) {
for _, s := range h.Sites {
s.removePageFilename(filename)
}
h.content.withMaps(func(m *pageMap) error {
m.deleteBundleMatching(func(b *contentNode) bool {
if b.p == nil {
return false
}
if b.fi == nil {
return false
}
return b.fi.Meta().Filename() == filename
})
return nil
})
}
func (h *HugoSites) createPageCollections() error {
@@ -683,19 +740,13 @@ func (h *HugoSites) createPageCollections() error {
}
func (s *Site) preparePagesForRender(isRenderingSite bool, idx int) error {
for _, p := range s.workAllPages {
if err := p.initOutputFormat(isRenderingSite, idx); err != nil {
return err
var err error
s.pageMap.withEveryBundlePage(func(p *pageState) bool {
if err = p.initOutputFormat(isRenderingSite, idx); err != nil {
return true
}
}
for _, p := range s.headlessPages {
if err := p.initOutputFormat(isRenderingSite, idx); err != nil {
return err
}
}
return false
})
return nil
}
@@ -837,49 +888,60 @@ func (h *HugoSites) findPagesByKindIn(kind string, inPages page.Pages) page.Page
}
func (h *HugoSites) resetPageState() {
for _, s := range h.Sites {
for _, p := range s.rawAllPages {
for _, po := range p.pageOutputs {
if po.cp == nil {
continue
}
po.cp.Reset()
}
h.content.walkBundles(func(n *contentNode) bool {
if n.p == nil {
return false
}
}
p := n.p
for _, po := range p.pageOutputs {
if po.cp == nil {
continue
}
po.cp.Reset()
}
return false
})
}
func (h *HugoSites) resetPageStateFromEvents(idset identity.Identities) {
for _, s := range h.Sites {
PAGES:
for _, p := range s.rawAllPages {
OUTPUTS:
for _, po := range p.pageOutputs {
if po.cp == nil {
continue
}
for id := range idset {
if po.cp.dependencyTracker.Search(id) != nil {
po.cp.Reset()
continue OUTPUTS
}
}
h.content.walkBundles(func(n *contentNode) bool {
if n.p == nil {
return false
}
p := n.p
OUTPUTS:
for _, po := range p.pageOutputs {
if po.cp == nil {
continue
}
for _, s := range p.shortcodeState.shortcodes {
for id := range idset {
if idm, ok := s.info.(identity.Manager); ok && idm.Search(id) != nil {
for _, po := range p.pageOutputs {
if po.cp != nil {
po.cp.Reset()
}
}
continue PAGES
}
for id := range idset {
if po.cp.dependencyTracker.Search(id) != nil {
po.cp.Reset()
continue OUTPUTS
}
}
}
}
if p.shortcodeState == nil {
return false
}
for _, s := range p.shortcodeState.shortcodes {
for id := range idset {
if idm, ok := s.info.(identity.Manager); ok && idm.Search(id) != nil {
for _, po := range p.pageOutputs {
if po.cp != nil {
po.cp.Reset()
}
}
return false
}
}
}
return false
})
}
// Used in partial reloading to determine if the change is in a bundle.