Add a consolidated file cache

This commits reworks how file caching is performed in Hugo. Now there is only one way, and it can be configured.

This is the default configuration:

```toml
[caches]
[caches.getjson]
dir = ":cacheDir"
maxAge = -1
[caches.getcsv]
dir = ":cacheDir"
maxAge = -1
[caches.images]
dir = ":resourceDir/_gen"
maxAge = -1
[caches.assets]
dir = ":resourceDir/_gen"
maxAge = -1
```

You can override any of these cache setting in your own `config.toml`.

The placeholders explained:

`:cacheDir`: This is the value of the `cacheDir` config option if set (can also be set via OS env variable `HUGO_CACHEDIR`). It will fall back to `/opt/build/cache/hugo_cache/` on Netlify, or a `hugo_cache` directory below the OS temp dir for the others.
`:resourceDir`: This is the value of the `resourceDir` config option.

`maxAge` is the time in seconds before a cache entry will be evicted, -1 means forever and 0 effectively turns that particular cache off.

This means that if you run your builds on Netlify, all caches configured with `:cacheDir` will be saved and restored on the next build. For other CI vendors, please read their documentation. For an CircleCI example, see 6c3960a8f4/.circleci/config.yml

Fixes #5404
This commit is contained in:
Bjørn Erik Pedersen
2018-11-08 10:24:13 +01:00
parent 7d78a2afd3
commit f7aeaa6129
26 changed files with 1192 additions and 543 deletions

442
cache/filecache/filecache.go vendored Normal file
View File

@@ -0,0 +1,442 @@
// Copyright 2018 The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package filecache
import (
"bytes"
"io"
"io/ioutil"
"path"
"path/filepath"
"strings"
"time"
"github.com/gohugoio/hugo/common/hugio"
"github.com/gohugoio/hugo/helpers"
"github.com/gohugoio/hugo/hugolib/paths"
"github.com/pkg/errors"
"github.com/BurntSushi/locker"
"github.com/bep/mapstructure"
"github.com/spf13/afero"
)
const (
cachesConfigKey = "caches"
resourcesGenDir = ":resourceDir/_gen"
)
var defaultCacheConfig = cacheConfig{
MaxAge: -1, // Never expire
Dir: ":cacheDir",
}
const (
cacheKeyGetJSON = "getjson"
cacheKeyGetCSV = "getcsv"
cacheKeyImages = "images"
cacheKeyAssets = "assets"
)
var defaultCacheConfigs = map[string]cacheConfig{
cacheKeyGetJSON: defaultCacheConfig,
cacheKeyGetCSV: defaultCacheConfig,
cacheKeyImages: cacheConfig{
MaxAge: -1,
Dir: resourcesGenDir,
},
cacheKeyAssets: cacheConfig{
MaxAge: -1,
Dir: resourcesGenDir,
},
}
type cachesConfig map[string]cacheConfig
type cacheConfig struct {
// Maxe age of ache entries in this cache. Any items older than this will
// be removed and not returned from the cache.
// -1 means forever, 0 means cache is disabled.
MaxAge int
// The directory where files are stored.
Dir string
}
// Cache caches a set of files in a directory. This is usually a file on
// disk, but since this is backed by an Afero file system, it can be anything.
type Cache struct {
Fs afero.Fs
// Max age in seconds.
maxAge int
nlocker *locker.Locker
}
// ItemInfo contains info about a cached file.
type ItemInfo struct {
// This is the file's name relative to the cache's filesystem.
Name string
}
// NewCache creates a new file cache with the given filesystem and max age.
func NewCache(fs afero.Fs, maxAge int) *Cache {
return &Cache{
Fs: fs,
nlocker: locker.NewLocker(),
maxAge: maxAge,
}
}
// lockedFile is a file with a lock that is released on Close.
type lockedFile struct {
afero.File
unlock func()
}
func (l *lockedFile) Close() error {
defer l.unlock()
return l.File.Close()
}
// GetWriteCloser returns a transactional writer into the cache.
// It's important that it's closed when done.
func (c *Cache) WriteCloser(id string) (ItemInfo, io.WriteCloser, error) {
id = cleanID(id)
c.nlocker.Lock(id)
info := ItemInfo{Name: id}
f, err := helpers.OpenFileForWriting(c.Fs, id)
if err != nil {
c.nlocker.Unlock(id)
return info, nil, err
}
return info, &lockedFile{
File: f,
unlock: func() { c.nlocker.Unlock(id) },
}, nil
}
// ReadOrCreate tries to lookup the file in cache.
// If found, it is passed to read and then closed.
// If not found a new file is created and passed to create, which should close
// it when done.
func (c *Cache) ReadOrCreate(id string,
read func(info ItemInfo, r io.Reader) error,
create func(info ItemInfo, w io.WriteCloser) error) (info ItemInfo, err error) {
id = cleanID(id)
c.nlocker.Lock(id)
defer c.nlocker.Unlock(id)
info = ItemInfo{Name: id}
if r := c.getOrRemove(id); r != nil {
err = read(info, r)
defer r.Close()
return
}
f, err := helpers.OpenFileForWriting(c.Fs, id)
if err != nil {
return
}
err = create(info, f)
return
}
// GetOrCreate tries to get the file with the given id from cache. If not found or expired, create will
// be invoked and the result cached.
// This method is protected by a named lock using the given id as identifier.
func (c *Cache) GetOrCreate(id string, create func() (io.ReadCloser, error)) (ItemInfo, io.ReadCloser, error) {
id = cleanID(id)
c.nlocker.Lock(id)
defer c.nlocker.Unlock(id)
info := ItemInfo{Name: id}
if r := c.getOrRemove(id); r != nil {
return info, r, nil
}
r, err := create()
if err != nil {
return info, nil, err
}
if c.maxAge == 0 {
// No caching.
return info, hugio.ToReadCloser(r), nil
}
var buff bytes.Buffer
return info,
hugio.ToReadCloser(&buff),
afero.WriteReader(c.Fs, id, io.TeeReader(r, &buff))
}
// GetOrCreateBytes is the same as GetOrCreate, but produces a byte slice.
func (c *Cache) GetOrCreateBytes(id string, create func() ([]byte, error)) (ItemInfo, []byte, error) {
id = cleanID(id)
c.nlocker.Lock(id)
defer c.nlocker.Unlock(id)
info := ItemInfo{Name: id}
if r := c.getOrRemove(id); r != nil {
defer r.Close()
b, err := ioutil.ReadAll(r)
return info, b, err
}
b, err := create()
if err != nil {
return info, nil, err
}
if c.maxAge == 0 {
return info, b, nil
}
if err := afero.WriteReader(c.Fs, id, bytes.NewReader(b)); err != nil {
return info, nil, err
}
return info, b, nil
}
// GetBytes gets the file content with the given id from the cahce, nil if none found.
func (c *Cache) GetBytes(id string) (ItemInfo, []byte, error) {
id = cleanID(id)
c.nlocker.Lock(id)
defer c.nlocker.Unlock(id)
info := ItemInfo{Name: id}
if r := c.getOrRemove(id); r != nil {
defer r.Close()
b, err := ioutil.ReadAll(r)
return info, b, err
}
return info, nil, nil
}
// Get gets the file with the given id from the cahce, nil if none found.
func (c *Cache) Get(id string) (ItemInfo, io.ReadCloser, error) {
id = cleanID(id)
c.nlocker.Lock(id)
defer c.nlocker.Unlock(id)
info := ItemInfo{Name: id}
r := c.getOrRemove(id)
return info, r, nil
}
// get gets the file with the given id. If it's expired, it will
// be removed.
func (c *Cache) getOrRemove(id string) hugio.ReadSeekCloser {
if c.maxAge == 0 {
// No caching.
return nil
}
if c.maxAge > 0 {
fi, err := c.Fs.Stat(id)
if err != nil {
return nil
}
expiry := time.Now().Add(-time.Duration(c.maxAge) * time.Second)
expired := fi.ModTime().Before(expiry)
if expired {
c.Fs.Remove(id)
return nil
}
}
f, err := c.Fs.Open(id)
if err != nil {
return nil
}
return f
}
// For testing
func (c *Cache) getString(id string) string {
id = cleanID(id)
c.nlocker.Lock(id)
defer c.nlocker.Unlock(id)
if r := c.getOrRemove(id); r != nil {
defer r.Close()
b, _ := ioutil.ReadAll(r)
return string(b)
}
return ""
}
// Caches is a named set of caches.
type Caches map[string]*Cache
// Get gets a named cache, nil if none found.
func (f Caches) Get(name string) *Cache {
return f[strings.ToLower(name)]
}
// GetJSOnCache gets the file cache for getJSON.
func (f Caches) GetJSONCache() *Cache {
return f[cacheKeyGetJSON]
}
// GetCSVCache gets the file cache for getCSV.
func (f Caches) GetCSVCache() *Cache {
return f[cacheKeyGetCSV]
}
// ImageCache gets the file cache for processed images.
func (f Caches) ImageCache() *Cache {
return f[cacheKeyImages]
}
// AssetsCache gets the file cache for assets (processed resources, SCSS etc.).
func (f Caches) AssetsCache() *Cache {
return f[cacheKeyAssets]
}
// NewCachesFromPaths creates a new set of file caches from the given
// configuration.
func NewCachesFromPaths(p *paths.Paths) (Caches, error) {
dcfg, err := decodeConfig(p)
if err != nil {
return nil, err
}
fs := p.Fs.Source
m := make(Caches)
for k, v := range dcfg {
baseDir := filepath.Join(v.Dir, k)
if err = fs.MkdirAll(baseDir, 0777); err != nil {
return nil, err
}
bfs := afero.NewBasePathFs(fs, baseDir)
m[k] = NewCache(bfs, v.MaxAge)
}
return m, nil
}
func decodeConfig(p *paths.Paths) (cachesConfig, error) {
c := make(cachesConfig)
valid := make(map[string]bool)
// Add defaults
for k, v := range defaultCacheConfigs {
c[k] = v
valid[k] = true
}
cfg := p.Cfg
m := cfg.GetStringMap(cachesConfigKey)
_, isOsFs := p.Fs.Source.(*afero.OsFs)
for k, v := range m {
cc := defaultCacheConfig
if err := mapstructure.WeakDecode(v, &cc); err != nil {
return nil, err
}
if cc.Dir == "" {
return c, errors.New("must provide cache Dir")
}
name := strings.ToLower(k)
if !valid[name] {
return nil, errors.Errorf("%q is not a valid cache name", name)
}
c[name] = cc
}
// This is a very old flag in Hugo, but we need to respect it.
disabled := cfg.GetBool("ignoreCache")
for k, v := range c {
v.Dir = filepath.Clean(v.Dir)
dir := filepath.ToSlash(v.Dir)
parts := strings.Split(dir, "/")
first := parts[0]
if strings.HasPrefix(first, ":") {
resolved, err := resolveDirPlaceholder(p, first)
if err != nil {
return c, err
}
resolved = filepath.ToSlash(resolved)
v.Dir = filepath.FromSlash(path.Join((append([]string{resolved}, parts[1:]...))...))
} else if isOsFs && !path.IsAbs(dir) {
return c, errors.Errorf("%q must either start with a placeholder (e.g. :cacheDir, :resourceDir) or be absolute", v.Dir)
}
if disabled {
v.MaxAge = 0
}
c[k] = v
}
return c, nil
}
// Resolves :resourceDir => /myproject/resources etc., :cacheDir => ...
func resolveDirPlaceholder(p *paths.Paths, placeholder string) (string, error) {
switch strings.ToLower(placeholder) {
case ":resourcedir":
return p.AbsResourcesDir, nil
case ":cachedir":
return helpers.GetCacheDir(p.Fs.Source, p.Cfg)
}
return "", errors.Errorf("%q is not a valid placeholder (valid values are :cacheDir or :resourceDir)", placeholder)
}
func cleanID(name string) string {
return filepath.Clean(name)
}

306
cache/filecache/filecache_test.go vendored Normal file
View File

@@ -0,0 +1,306 @@
// Copyright 2018 The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package filecache
import (
"fmt"
"io"
"io/ioutil"
"path/filepath"
"regexp"
"runtime"
"strings"
"sync"
"testing"
"time"
"github.com/gohugoio/hugo/common/hugio"
"github.com/gohugoio/hugo/config"
"github.com/gohugoio/hugo/hugofs"
"github.com/gohugoio/hugo/hugolib/paths"
"github.com/spf13/afero"
"github.com/spf13/viper"
"github.com/stretchr/testify/require"
)
func TestFileCache(t *testing.T) {
t.Parallel()
assert := require.New(t)
for _, cacheDir := range []string{"mycache", ""} {
configStr := `
cacheDir = "CACHEDIR"
[caches]
[caches.getJSON]
maxAge = 111
dir = ":cacheDir/c"
`
configStr = strings.Replace(configStr, "CACHEDIR", cacheDir, 1)
cfg, err := config.FromConfigString(configStr, "toml")
assert.NoError(err)
fs := hugofs.NewMem(cfg)
p, err := paths.New(fs, cfg)
assert.NoError(err)
caches, err := NewCachesFromPaths(p)
assert.NoError(err)
c := caches.Get("GetJSON")
assert.NotNil(c)
assert.Equal(111, c.maxAge)
bfs, ok := c.Fs.(*afero.BasePathFs)
assert.True(ok)
filename, err := bfs.RealPath("key")
assert.NoError(err)
if cacheDir != "" {
assert.Equal(filepath.FromSlash(cacheDir+"/c/getjson/key"), filename)
} else {
// Temp dir.
assert.Regexp(regexp.MustCompile("hugo_cache.*key"), filename)
}
rf := func(s string) func() (io.ReadCloser, error) {
return func() (io.ReadCloser, error) {
return struct {
io.ReadSeeker
io.Closer
}{
strings.NewReader(s),
ioutil.NopCloser(nil),
}, nil
}
}
bf := func() ([]byte, error) {
return []byte("bcd"), nil
}
for i := 0; i < 2; i++ {
info, r, err := c.GetOrCreate("a", rf("abc"))
assert.NoError(err)
assert.NotNil(r)
assert.Equal("a", info.Name)
b, _ := ioutil.ReadAll(r)
r.Close()
assert.Equal("abc", string(b))
info, b, err = c.GetOrCreateBytes("b", bf)
assert.NoError(err)
assert.NotNil(r)
assert.Equal("b", info.Name)
assert.Equal("bcd", string(b))
_, b, err = c.GetOrCreateBytes("a", bf)
assert.NoError(err)
assert.Equal("abc", string(b))
_, r, err = c.GetOrCreate("a", rf("bcd"))
assert.NoError(err)
b, _ = ioutil.ReadAll(r)
r.Close()
assert.Equal("abc", string(b))
}
assert.NotNil(caches.Get("getJSON"))
info, w, err := caches.ImageCache().WriteCloser("mykey")
assert.NoError(err)
assert.Equal("mykey", info.Name)
io.WriteString(w, "Hugo is great!")
w.Close()
assert.Equal("Hugo is great!", caches.ImageCache().getString("mykey"))
info, r, err := caches.ImageCache().Get("mykey")
assert.NoError(err)
assert.NotNil(r)
assert.Equal("mykey", info.Name)
b, _ := ioutil.ReadAll(r)
r.Close()
assert.Equal("Hugo is great!", string(b))
info, b, err = caches.ImageCache().GetBytes("mykey")
assert.NoError(err)
assert.Equal("mykey", info.Name)
assert.Equal("Hugo is great!", string(b))
}
}
func TestFileCacheConcurrent(t *testing.T) {
t.Parallel()
assert := require.New(t)
configStr := `
[caches]
[caches.getjson]
maxAge = 1
dir = "/cache/c"
`
cfg, err := config.FromConfigString(configStr, "toml")
assert.NoError(err)
fs := hugofs.NewMem(cfg)
p, err := paths.New(fs, cfg)
assert.NoError(err)
caches, err := NewCachesFromPaths(p)
assert.NoError(err)
const cacheName = "getjson"
filenameData := func(i int) (string, string) {
data := fmt.Sprintf("data: %d", i)
filename := fmt.Sprintf("file%d", i)
return filename, data
}
var wg sync.WaitGroup
for i := 0; i < 50; i++ {
wg.Add(1)
go func(i int) {
defer wg.Done()
for j := 0; j < 20; j++ {
c := caches.Get(cacheName)
assert.NotNil(c)
filename, data := filenameData(i)
_, r, err := c.GetOrCreate(filename, func() (io.ReadCloser, error) {
return hugio.ToReadCloser(strings.NewReader(data)), nil
})
assert.NoError(err)
b, _ := ioutil.ReadAll(r)
r.Close()
assert.Equal(data, string(b))
// Trigger some expiration.
time.Sleep(50 * time.Millisecond)
}
}(i)
}
wg.Wait()
}
func TestDecodeConfig(t *testing.T) {
t.Parallel()
assert := require.New(t)
configStr := `
[caches]
[caches.getJSON]
maxAge = 1234
dir = "/path/to/c1"
[caches.getCSV]
maxAge = 3456
dir = "/path/to/c2"
[caches.images]
dir = "/path/to/c3"
`
cfg, err := config.FromConfigString(configStr, "toml")
assert.NoError(err)
fs := hugofs.NewMem(cfg)
p, err := paths.New(fs, cfg)
assert.NoError(err)
decoded, err := decodeConfig(p)
assert.NoError(err)
assert.Equal(4, len(decoded))
c2 := decoded["getcsv"]
assert.Equal(3456, c2.MaxAge)
assert.Equal(filepath.FromSlash("/path/to/c2"), c2.Dir)
c3 := decoded["images"]
assert.Equal(-1, c3.MaxAge)
assert.Equal(filepath.FromSlash("/path/to/c3"), c3.Dir)
}
func TestDecodeConfigIgnoreCache(t *testing.T) {
t.Parallel()
assert := require.New(t)
configStr := `
ignoreCache = true
[caches]
[caches.getJSON]
maxAge = 1234
dir = "/path/to/c1"
[caches.getCSV]
maxAge = 3456
dir = "/path/to/c2"
[caches.images]
dir = "/path/to/c3"
`
cfg, err := config.FromConfigString(configStr, "toml")
assert.NoError(err)
fs := hugofs.NewMem(cfg)
p, err := paths.New(fs, cfg)
assert.NoError(err)
decoded, err := decodeConfig(p)
assert.NoError(err)
assert.Equal(4, len(decoded))
for _, v := range decoded {
assert.Equal(0, v.MaxAge)
}
}
func TestDecodeConfigDefault(t *testing.T) {
assert := require.New(t)
cfg := viper.New()
if runtime.GOOS == "windows" {
cfg.Set("resourceDir", "c:\\cache\\resources")
cfg.Set("cacheDir", "c:\\cache\\thecache")
} else {
cfg.Set("resourceDir", "/cache/resources")
cfg.Set("cacheDir", "/cache/thecache")
}
fs := hugofs.NewMem(cfg)
p, err := paths.New(fs, cfg)
assert.NoError(err)
decoded, err := decodeConfig(p)
assert.NoError(err)
assert.Equal(4, len(decoded))
if runtime.GOOS == "windows" {
assert.Equal("c:\\cache\\resources\\_gen", decoded[cacheKeyImages].Dir)
} else {
assert.Equal("/cache/resources/_gen", decoded[cacheKeyImages].Dir)
}
}