Introduce a tree map for all content

This commit introduces a new data structure to store pages and their resources.

This data structure is backed by radix trees.

This simplies tree operations, makes all pages a bundle,  and paves the way for #6310.

It also solves a set of annoying issues (see list below).

Not a motivation behind this, but this commit also makes Hugo in general a little bit faster and more memory effective (see benchmarks). Especially for partial rebuilds on content edits, but also when taxonomies is in use.

```
name                                   old time/op    new time/op    delta
SiteNew/Bundle_with_image/Edit-16        1.32ms ± 8%    1.00ms ± 9%  -24.42%  (p=0.029 n=4+4)
SiteNew/Bundle_with_JSON_file/Edit-16    1.28ms ± 0%    0.94ms ± 0%  -26.26%  (p=0.029 n=4+4)
SiteNew/Tags_and_categories/Edit-16      33.9ms ± 2%    21.8ms ± 1%  -35.67%  (p=0.029 n=4+4)
SiteNew/Canonify_URLs/Edit-16            40.6ms ± 1%    37.7ms ± 3%   -7.20%  (p=0.029 n=4+4)
SiteNew/Deep_content_tree/Edit-16        56.7ms ± 0%    51.7ms ± 1%   -8.82%  (p=0.029 n=4+4)
SiteNew/Many_HTML_templates/Edit-16      19.9ms ± 2%    18.3ms ± 3%   -7.64%  (p=0.029 n=4+4)
SiteNew/Page_collections/Edit-16         37.9ms ± 4%    34.0ms ± 2%  -10.28%  (p=0.029 n=4+4)
SiteNew/Bundle_with_image-16             10.7ms ± 0%    10.6ms ± 0%   -1.15%  (p=0.029 n=4+4)
SiteNew/Bundle_with_JSON_file-16         10.8ms ± 0%    10.7ms ± 0%   -1.05%  (p=0.029 n=4+4)
SiteNew/Tags_and_categories-16           43.2ms ± 1%    39.6ms ± 1%   -8.35%  (p=0.029 n=4+4)
SiteNew/Canonify_URLs-16                 47.6ms ± 1%    47.3ms ± 0%     ~     (p=0.057 n=4+4)
SiteNew/Deep_content_tree-16             73.0ms ± 1%    74.2ms ± 1%     ~     (p=0.114 n=4+4)
SiteNew/Many_HTML_templates-16           37.9ms ± 0%    38.1ms ± 1%     ~     (p=0.114 n=4+4)
SiteNew/Page_collections-16              53.6ms ± 1%    54.7ms ± 1%   +2.09%  (p=0.029 n=4+4)

name                                   old alloc/op   new alloc/op   delta
SiteNew/Bundle_with_image/Edit-16         486kB ± 0%     430kB ± 0%  -11.47%  (p=0.029 n=4+4)
SiteNew/Bundle_with_JSON_file/Edit-16     265kB ± 0%     209kB ± 0%  -21.06%  (p=0.029 n=4+4)
SiteNew/Tags_and_categories/Edit-16      13.6MB ± 0%     8.8MB ± 0%  -34.93%  (p=0.029 n=4+4)
SiteNew/Canonify_URLs/Edit-16            66.5MB ± 0%    63.9MB ± 0%   -3.95%  (p=0.029 n=4+4)
SiteNew/Deep_content_tree/Edit-16        28.8MB ± 0%    25.8MB ± 0%  -10.55%  (p=0.029 n=4+4)
SiteNew/Many_HTML_templates/Edit-16      6.16MB ± 0%    5.56MB ± 0%   -9.86%  (p=0.029 n=4+4)
SiteNew/Page_collections/Edit-16         16.9MB ± 0%    16.0MB ± 0%   -5.19%  (p=0.029 n=4+4)
SiteNew/Bundle_with_image-16             2.28MB ± 0%    2.29MB ± 0%   +0.35%  (p=0.029 n=4+4)
SiteNew/Bundle_with_JSON_file-16         2.07MB ± 0%    2.07MB ± 0%     ~     (p=0.114 n=4+4)
SiteNew/Tags_and_categories-16           14.3MB ± 0%    13.2MB ± 0%   -7.30%  (p=0.029 n=4+4)
SiteNew/Canonify_URLs-16                 69.1MB ± 0%    69.0MB ± 0%     ~     (p=0.343 n=4+4)
SiteNew/Deep_content_tree-16             31.3MB ± 0%    31.8MB ± 0%   +1.49%  (p=0.029 n=4+4)
SiteNew/Many_HTML_templates-16           10.8MB ± 0%    10.9MB ± 0%   +1.11%  (p=0.029 n=4+4)
SiteNew/Page_collections-16              21.4MB ± 0%    21.6MB ± 0%   +1.15%  (p=0.029 n=4+4)

name                                   old allocs/op  new allocs/op  delta
SiteNew/Bundle_with_image/Edit-16         4.74k ± 0%     3.86k ± 0%  -18.57%  (p=0.029 n=4+4)
SiteNew/Bundle_with_JSON_file/Edit-16     4.73k ± 0%     3.85k ± 0%  -18.58%  (p=0.029 n=4+4)
SiteNew/Tags_and_categories/Edit-16        301k ± 0%      198k ± 0%  -34.14%  (p=0.029 n=4+4)
SiteNew/Canonify_URLs/Edit-16              389k ± 0%      373k ± 0%   -4.07%  (p=0.029 n=4+4)
SiteNew/Deep_content_tree/Edit-16          338k ± 0%      262k ± 0%  -22.63%  (p=0.029 n=4+4)
SiteNew/Many_HTML_templates/Edit-16        102k ± 0%       88k ± 0%  -13.81%  (p=0.029 n=4+4)
SiteNew/Page_collections/Edit-16           176k ± 0%      152k ± 0%  -13.32%  (p=0.029 n=4+4)
SiteNew/Bundle_with_image-16              26.8k ± 0%     26.8k ± 0%   +0.05%  (p=0.029 n=4+4)
SiteNew/Bundle_with_JSON_file-16          26.8k ± 0%     26.8k ± 0%   +0.05%  (p=0.029 n=4+4)
SiteNew/Tags_and_categories-16             273k ± 0%      245k ± 0%  -10.36%  (p=0.029 n=4+4)
SiteNew/Canonify_URLs-16                   396k ± 0%      398k ± 0%   +0.39%  (p=0.029 n=4+4)
SiteNew/Deep_content_tree-16               317k ± 0%      325k ± 0%   +2.53%  (p=0.029 n=4+4)
SiteNew/Many_HTML_templates-16             146k ± 0%      147k ± 0%   +0.98%  (p=0.029 n=4+4)
SiteNew/Page_collections-16                210k ± 0%      215k ± 0%   +2.44%  (p=0.029 n=4+4)
```

Fixes #6312
Fixes #6087
Fixes #6738
Fixes #6412
Fixes #6743
Fixes #6875
Fixes #6034
Fixes #6902
Fixes #6173
Fixes #6590
This commit is contained in:
Bjørn Erik Pedersen
2019-09-10 11:26:34 +02:00
parent e5329f13c0
commit eada236f87
71 changed files with 4859 additions and 2531 deletions

View File

@@ -17,43 +17,25 @@ import (
"fmt"
"path"
"path/filepath"
"sort"
"strings"
"sync"
"time"
"github.com/gohugoio/hugo/resources/resource"
"github.com/gohugoio/hugo/common/herrors"
"github.com/pkg/errors"
"github.com/gohugoio/hugo/helpers"
"github.com/gohugoio/hugo/cache"
"github.com/gohugoio/hugo/resources/page"
)
// Used in the page cache to mark more than one hit for a given key.
var ambiguityFlag = &pageState{}
// PageCollections contains the page collections for a site.
type PageCollections struct {
pagesMap *pagesMap
// Includes absolute all pages (of all types), including drafts etc.
rawAllPages pageStatePages
// rawAllPages plus additional pages created during the build process.
workAllPages pageStatePages
// Includes headless bundles, i.e. bundles that produce no output for its content page.
headlessPages pageStatePages
pageMap *pageMap
// Lazy initialized page collections
pages *lazyPagesFactory
regularPages *lazyPagesFactory
allPages *lazyPagesFactory
allRegularPages *lazyPagesFactory
// The index for .Site.GetPage etc.
pageIndex *cache.Lazy
}
// Pages returns all pages.
@@ -78,25 +60,6 @@ func (c *PageCollections) AllRegularPages() page.Pages {
return c.allRegularPages.get()
}
// Get initializes the index if not already done so, then
// looks up the given page ref, returns nil if no value found.
func (c *PageCollections) getFromCache(ref string) (page.Page, error) {
v, found, err := c.pageIndex.Get(ref)
if err != nil {
return nil, err
}
if !found {
return nil, nil
}
p := v.(page.Page)
if p != ambiguityFlag {
return p, nil
}
return nil, fmt.Errorf("page reference %q is ambiguous", ref)
}
type lazyPagesFactory struct {
pages page.Pages
@@ -115,83 +78,19 @@ func newLazyPagesFactory(factory page.PagesFactory) *lazyPagesFactory {
return &lazyPagesFactory{factory: factory}
}
func newPageCollections() *PageCollections {
return newPageCollectionsFromPages(nil)
}
func newPageCollections(m *pageMap) *PageCollections {
if m == nil {
panic("must provide a pageMap")
}
func newPageCollectionsFromPages(pages pageStatePages) *PageCollections {
c := &PageCollections{rawAllPages: pages}
c := &PageCollections{pageMap: m}
c.pages = newLazyPagesFactory(func() page.Pages {
pages := make(page.Pages, len(c.workAllPages))
for i, p := range c.workAllPages {
pages[i] = p
}
return pages
return m.createListAllPages()
})
c.regularPages = newLazyPagesFactory(func() page.Pages {
return c.findPagesByKindInWorkPages(page.KindPage, c.workAllPages)
})
c.pageIndex = cache.NewLazy(func() (map[string]interface{}, error) {
index := make(map[string]interface{})
add := func(ref string, p page.Page) {
ref = strings.ToLower(ref)
existing := index[ref]
if existing == nil {
index[ref] = p
} else if existing != ambiguityFlag && existing != p {
index[ref] = ambiguityFlag
}
}
for _, pageCollection := range []pageStatePages{c.workAllPages, c.headlessPages} {
for _, p := range pageCollection {
if p.IsPage() {
sourceRefs := p.sourceRefs()
for _, ref := range sourceRefs {
add(ref, p)
}
sourceRef := sourceRefs[0]
// Ref/Relref supports this potentially ambiguous lookup.
add(p.File().LogicalName(), p)
translationBaseName := p.File().TranslationBaseName()
dir, _ := path.Split(sourceRef)
dir = strings.TrimSuffix(dir, "/")
if translationBaseName == "index" {
add(dir, p)
add(path.Base(dir), p)
} else {
add(translationBaseName, p)
}
// We need a way to get to the current language version.
pathWithNoExtensions := path.Join(dir, translationBaseName)
add(pathWithNoExtensions, p)
} else {
sourceRefs := p.sourceRefs()
for _, ref := range sourceRefs {
add(ref, p)
}
ref := p.SectionsPath()
// index the canonical, unambiguous virtual ref
// e.g. /section
// (this may already have been indexed above)
add("/"+ref, p)
}
}
}
return index, nil
return c.findPagesByKindIn(page.KindPage, c.pages.get())
})
return c
@@ -249,64 +148,157 @@ func (c *PageCollections) getPage(typ string, sections ...string) page.Page {
return p
}
// Case insensitive page lookup.
// getPageRef resolves a Page from ref/relRef, with a slightly more comprehensive
// search path than getPageNew.
func (c *PageCollections) getPageRef(context page.Page, ref string) (page.Page, error) {
n, err := c.getContentNode(context, true, ref)
if err != nil || n == nil || n.p == nil {
return nil, err
}
return n.p, nil
}
func (c *PageCollections) getPageNew(context page.Page, ref string) (page.Page, error) {
var anError error
n, err := c.getContentNode(context, false, ref)
if err != nil || n == nil || n.p == nil {
return nil, err
}
return n.p, nil
}
ref = strings.ToLower(ref)
func (c *PageCollections) getSectionOrPage(ref string) (*contentNode, string) {
var n *contentNode
// Absolute (content root relative) reference.
if strings.HasPrefix(ref, "/") {
p, err := c.getFromCache(ref)
if err == nil && p != nil {
return p, nil
}
if err != nil {
anError = err
}
s, v, found := c.pageMap.sections.LongestPrefix(ref)
} else if context != nil {
if found {
n = v.(*contentNode)
}
if found && s == ref {
// A section
return n, ""
}
m := c.pageMap
filename := strings.TrimPrefix(strings.TrimPrefix(ref, s), "/")
langSuffix := "." + m.s.Lang()
// Trim both extension and any language code.
name := helpers.PathNoExt(filename)
name = strings.TrimSuffix(name, langSuffix)
// These are reserved bundle names and will always be stored by their owning
// folder name.
name = strings.TrimSuffix(name, "/index")
name = strings.TrimSuffix(name, "/_index")
if !found {
return nil, name
}
// Check if it's a section with filename provided.
if !n.p.File().IsZero() && n.p.File().LogicalName() == filename {
return n, name
}
return m.getPage(s, name), name
}
func (c *PageCollections) getContentNode(context page.Page, isReflink bool, ref string) (*contentNode, error) {
defer herrors.Recover()
ref = filepath.ToSlash(strings.ToLower(strings.TrimSpace(ref)))
if ref == "" {
ref = "/"
}
inRef := ref
var doSimpleLookup bool
if isReflink || context == nil {
// For Ref/Reflink and .Site.GetPage do simple name lookups for the potentially ambigous myarticle.md and /myarticle.md,
// but not when we get ./myarticle*, section/myarticle.
doSimpleLookup = ref[0] != '.' || ref[0] == '/' && strings.Count(ref, "/") == 1
}
if context != nil && !strings.HasPrefix(ref, "/") {
// Try the page-relative path.
var dir string
if !context.File().IsZero() {
dir = filepath.ToSlash(context.File().Dir())
var base string
if context.File().IsZero() {
base = context.SectionsPath()
} else {
dir = context.SectionsPath()
}
ppath := path.Join("/", strings.ToLower(dir), ref)
p, err := c.getFromCache(ppath)
if err == nil && p != nil {
return p, nil
}
if err != nil {
anError = err
base = filepath.ToSlash(filepath.Dir(context.File().FileInfo().Meta().Path()))
}
ref = path.Join("/", strings.ToLower(base), ref)
}
if !strings.HasPrefix(ref, "/") {
ref = "/" + ref
}
m := c.pageMap
// It's either a section, a page in a section or a taxonomy node.
// Start with the most likely:
n, name := c.getSectionOrPage(ref)
if n != nil {
return n, nil
}
if !strings.HasPrefix(inRef, "/") {
// Many people will have "post/foo.md" in their content files.
p, err := c.getFromCache("/" + ref)
if err == nil && p != nil {
return p, nil
if n, _ := c.getSectionOrPage("/" + inRef); n != nil {
return n, nil
}
}
// Check if it's a taxonomy node
s, v, found := m.taxonomies.LongestPrefix(ref)
if found {
if !m.onSameLevel(ref, s) {
return nil, nil
}
return v.(*contentNode), nil
}
getByName := func(s string) (*contentNode, error) {
n := m.pageReverseIndex.Get(s)
if n != nil {
if n == ambigousContentNode {
return nil, fmt.Errorf("page reference %q is ambiguous", ref)
}
return n, nil
}
return nil, nil
}
var module string
if context != nil && !context.File().IsZero() {
module = context.File().FileInfo().Meta().Module()
}
if module == "" && !c.pageMap.s.home.File().IsZero() {
module = c.pageMap.s.home.File().FileInfo().Meta().Module()
}
if module != "" {
n, err := getByName(module + ref)
if err != nil {
anError = err
return nil, err
}
if n != nil {
return n, nil
}
}
// Last try.
ref = strings.TrimPrefix(ref, "/")
p, err := c.getFromCache(ref)
if err != nil {
anError = err
if !doSimpleLookup {
return nil, nil
}
if p == nil && anError != nil {
return nil, wrapErr(errors.Wrap(anError, "failed to resolve ref"), context)
}
// Ref/relref supports this potentially ambigous lookup.
return getByName(name)
return p, nil
}
func (*PageCollections) findPagesByKindIn(kind string, inPages page.Pages) page.Pages {
@@ -318,238 +310,3 @@ func (*PageCollections) findPagesByKindIn(kind string, inPages page.Pages) page.
}
return pages
}
func (c *PageCollections) findPagesByKind(kind string) page.Pages {
return c.findPagesByKindIn(kind, c.Pages())
}
func (c *PageCollections) findWorkPagesByKind(kind string) pageStatePages {
var pages pageStatePages
for _, p := range c.workAllPages {
if p.Kind() == kind {
pages = append(pages, p)
}
}
return pages
}
func (*PageCollections) findPagesByKindInWorkPages(kind string, inPages pageStatePages) page.Pages {
var pages page.Pages
for _, p := range inPages {
if p.Kind() == kind {
pages = append(pages, p)
}
}
return pages
}
func (c *PageCollections) addPage(page *pageState) {
c.rawAllPages = append(c.rawAllPages, page)
}
func (c *PageCollections) removePageFilename(filename string) {
if i := c.rawAllPages.findPagePosByFilename(filename); i >= 0 {
c.clearResourceCacheForPage(c.rawAllPages[i])
c.rawAllPages = append(c.rawAllPages[:i], c.rawAllPages[i+1:]...)
}
}
func (c *PageCollections) removePage(page *pageState) {
if i := c.rawAllPages.findPagePos(page); i >= 0 {
c.clearResourceCacheForPage(c.rawAllPages[i])
c.rawAllPages = append(c.rawAllPages[:i], c.rawAllPages[i+1:]...)
}
}
func (c *PageCollections) replacePage(page *pageState) {
// will find existing page that matches filepath and remove it
c.removePage(page)
c.addPage(page)
}
func (c *PageCollections) clearResourceCacheForPage(page *pageState) {
if len(page.resources) > 0 {
page.s.ResourceSpec.DeleteCacheByPrefix(page.targetPaths().SubResourceBaseTarget)
}
}
func (c *PageCollections) assemblePagesMap(s *Site) error {
c.pagesMap = newPagesMap(s)
rootSections := make(map[string]bool)
// Add all branch nodes first.
for _, p := range c.rawAllPages {
rootSections[p.Section()] = true
if p.IsPage() {
continue
}
c.pagesMap.addPage(p)
}
// Create missing home page and the first level sections if no
// _index provided.
s.home = c.pagesMap.getOrCreateHome()
for k := range rootSections {
c.pagesMap.createSectionIfNotExists(k)
}
// Attach the regular pages to their section.
for _, p := range c.rawAllPages {
if p.IsNode() {
continue
}
c.pagesMap.addPage(p)
}
return nil
}
func (c *PageCollections) createWorkAllPages() error {
c.workAllPages = make(pageStatePages, 0, len(c.rawAllPages))
c.headlessPages = make(pageStatePages, 0)
var (
homeDates *resource.Dates
sectionDates *resource.Dates
siteLastmod time.Time
siteLastDate time.Time
sectionsParamId = "mainSections"
sectionsParamIdLower = strings.ToLower(sectionsParamId)
)
mainSections, mainSectionsFound := c.pagesMap.s.Info.Params()[sectionsParamIdLower]
var (
bucketsToRemove []string
rootBuckets []*pagesMapBucket
walkErr error
)
c.pagesMap.r.Walk(func(s string, v interface{}) bool {
bucket := v.(*pagesMapBucket)
parentBucket := c.pagesMap.parentBucket(s)
if parentBucket != nil {
if !mainSectionsFound && strings.Count(s, "/") == 1 && bucket.owner.IsSection() {
// Root section
rootBuckets = append(rootBuckets, bucket)
}
}
if bucket.owner.IsHome() {
if resource.IsZeroDates(bucket.owner) {
// Calculate dates from the page tree.
homeDates = &bucket.owner.m.Dates
}
}
sectionDates = nil
if resource.IsZeroDates(bucket.owner) {
sectionDates = &bucket.owner.m.Dates
}
if parentBucket != nil {
bucket.parent = parentBucket
if bucket.owner.IsSection() {
parentBucket.bucketSections = append(parentBucket.bucketSections, bucket)
}
}
if bucket.isEmpty() {
if bucket.owner.IsSection() && bucket.owner.File().IsZero() {
// Check for any nested section.
var hasDescendant bool
c.pagesMap.r.WalkPrefix(s, func(ss string, v interface{}) bool {
if s != ss {
hasDescendant = true
return true
}
return false
})
if !hasDescendant {
// This is an auto-created section with, now, nothing in it.
bucketsToRemove = append(bucketsToRemove, s)
return false
}
}
}
if !bucket.disabled {
c.workAllPages = append(c.workAllPages, bucket.owner)
}
if !bucket.view {
for _, p := range bucket.headlessPages {
ps := p.(*pageState)
ps.parent = bucket.owner
c.headlessPages = append(c.headlessPages, ps)
}
for _, p := range bucket.pages {
ps := p.(*pageState)
ps.parent = bucket.owner
c.workAllPages = append(c.workAllPages, ps)
if homeDates != nil {
homeDates.UpdateDateAndLastmodIfAfter(ps)
}
if sectionDates != nil {
sectionDates.UpdateDateAndLastmodIfAfter(ps)
}
if p.Lastmod().After(siteLastmod) {
siteLastmod = p.Lastmod()
}
if p.Date().After(siteLastDate) {
siteLastDate = p.Date()
}
}
}
return false
})
if walkErr != nil {
return walkErr
}
c.pagesMap.s.lastmod = siteLastmod
if !mainSectionsFound {
// Calculare main section
var (
maxRootBucketWeight int
maxRootBucket *pagesMapBucket
)
for _, b := range rootBuckets {
weight := len(b.pages) + (len(b.bucketSections) * 5)
if weight >= maxRootBucketWeight {
maxRootBucket = b
maxRootBucketWeight = weight
}
}
if maxRootBucket != nil {
// Try to make this as backwards compatible as possible.
mainSections = []string{maxRootBucket.owner.Section()}
}
}
c.pagesMap.s.Info.Params()[sectionsParamId] = mainSections
c.pagesMap.s.Info.Params()[sectionsParamIdLower] = mainSections
for _, key := range bucketsToRemove {
c.pagesMap.r.Delete(key)
}
sort.Sort(c.workAllPages)
return nil
}