Add a HTTP cache for remote resources.

Fixes #12502
Closes #11891
This commit is contained in:
Bjørn Erik Pedersen
2024-05-17 17:06:47 +02:00
parent c71e24af51
commit 447108fed2
32 changed files with 1150 additions and 236 deletions

View File

@@ -36,6 +36,11 @@ func newResourceCache(rs *Spec, memCache *dynacache.Cache) *ResourceCache {
"/res1",
dynacache.OptionsPartition{ClearWhen: dynacache.ClearOnChange, Weight: 40},
),
CacheResourceRemote: dynacache.GetOrCreatePartition[string, resource.Resource](
memCache,
"/resr",
dynacache.OptionsPartition{ClearWhen: dynacache.ClearOnChange, Weight: 40},
),
cacheResources: dynacache.GetOrCreatePartition[string, resource.Resources](
memCache,
"/ress",
@@ -53,6 +58,7 @@ type ResourceCache struct {
sync.RWMutex
cacheResource *dynacache.Partition[string, resource.Resource]
CacheResourceRemote *dynacache.Partition[string, resource.Resource]
cacheResources *dynacache.Partition[string, resource.Resources]
cacheResourceTransformation *dynacache.Partition[string, *resourceAdapterInner]

View File

@@ -23,6 +23,9 @@ import (
"strings"
"time"
"github.com/bep/logg"
"github.com/gohugoio/httpcache"
hhttpcache "github.com/gohugoio/hugo/cache/httpcache"
"github.com/gohugoio/hugo/helpers"
"github.com/gohugoio/hugo/hugofs/glob"
"github.com/gohugoio/hugo/identity"
@@ -31,7 +34,9 @@ import (
"github.com/gohugoio/hugo/cache/dynacache"
"github.com/gohugoio/hugo/cache/filecache"
"github.com/gohugoio/hugo/common/hcontext"
"github.com/gohugoio/hugo/common/hugio"
"github.com/gohugoio/hugo/common/tasks"
"github.com/gohugoio/hugo/resources"
"github.com/gohugoio/hugo/resources/resource"
)
@@ -39,19 +44,76 @@ import (
// Client contains methods to create Resource objects.
// tasks to Resource objects.
type Client struct {
rs *resources.Spec
httpClient *http.Client
cacheGetResource *filecache.Cache
rs *resources.Spec
httpClient *http.Client
httpCacheConfig hhttpcache.ConfigCompiled
cacheGetResource *filecache.Cache
resourceIDDispatcher hcontext.ContextDispatcher[string]
// Set when watching.
remoteResourceChecker *tasks.RunEvery
remoteResourceLogger logg.LevelLogger
}
type contextKey string
// New creates a new Client with the given specification.
func New(rs *resources.Spec) *Client {
fileCache := rs.FileCaches.GetResourceCache()
resourceIDDispatcher := hcontext.NewContextDispatcher[string](contextKey("resourceID"))
httpCacheConfig := rs.Cfg.GetConfigSection("httpCacheCompiled").(hhttpcache.ConfigCompiled)
var remoteResourceChecker *tasks.RunEvery
if rs.Cfg.Watching() && !httpCacheConfig.IsPollingDisabled() {
remoteResourceChecker = &tasks.RunEvery{
HandleError: func(name string, err error) {
rs.Logger.Warnf("Failed to check remote resource: %s", err)
},
RunImmediately: false,
}
if err := remoteResourceChecker.Start(); err != nil {
panic(err)
}
rs.BuildClosers.Add(remoteResourceChecker)
}
httpTimeout := 2 * time.Minute // Need to cover retries.
if httpTimeout < (rs.Cfg.Timeout() + 30*time.Second) {
httpTimeout = rs.Cfg.Timeout() + 30*time.Second
}
return &Client{
rs: rs,
rs: rs,
httpCacheConfig: httpCacheConfig,
resourceIDDispatcher: resourceIDDispatcher,
remoteResourceChecker: remoteResourceChecker,
remoteResourceLogger: rs.Logger.InfoCommand("remote"),
httpClient: &http.Client{
Timeout: time.Minute,
Timeout: httpTimeout,
Transport: &httpcache.Transport{
Cache: fileCache.AsHTTPCache(),
CacheKey: func(req *http.Request) string {
return resourceIDDispatcher.Get(req.Context())
},
Around: func(req *http.Request, key string) func() {
return fileCache.NamedLock(key)
},
AlwaysUseCachedResponse: func(req *http.Request, key string) bool {
return !httpCacheConfig.For(req.URL.String())
},
ShouldCache: func(req *http.Request, resp *http.Response, key string) bool {
return shouldCache(resp.StatusCode)
},
MarkCachedResponses: true,
EnableETagPair: true,
Transport: &transport{
Cfg: rs.Cfg,
Logger: rs.Logger,
},
},
},
cacheGetResource: rs.FileCaches.GetResourceCache(),
cacheGetResource: fileCache,
}
}

View File

@@ -134,8 +134,7 @@ mediaTypes = ['text/plain']
// This is hard to get stable on GitHub Actions, it sometimes succeeds due to timing issues.
if err != nil {
b.AssertLogContains("Got Err")
b.AssertLogContains("Retry timeout")
b.AssertLogContains("ContentLength:0")
b.AssertLogContains("retry timeout")
}
})
}

View File

@@ -14,22 +14,27 @@
package create
import (
"bufio"
"bytes"
"context"
"fmt"
"io"
"math/rand"
"mime"
"net/http"
"net/http/httputil"
"net/url"
"path"
"strings"
"time"
gmaps "maps"
"github.com/gohugoio/httpcache"
"github.com/gohugoio/hugo/common/hugio"
"github.com/gohugoio/hugo/common/loggers"
"github.com/gohugoio/hugo/common/maps"
"github.com/gohugoio/hugo/common/tasks"
"github.com/gohugoio/hugo/common/types"
"github.com/gohugoio/hugo/config"
"github.com/gohugoio/hugo/identity"
"github.com/gohugoio/hugo/media"
"github.com/gohugoio/hugo/resources"
@@ -92,6 +97,60 @@ var temporaryHTTPStatusCodes = map[int]bool{
504: true,
}
func (c *Client) configurePollingIfEnabled(uri, optionsKey string, getRes func() (*http.Response, error)) {
if c.remoteResourceChecker == nil {
return
}
// Set up polling for changes to this resource.
pollingConfig := c.httpCacheConfig.PollConfigFor(uri)
if pollingConfig.IsZero() || pollingConfig.Config.Disable {
return
}
if c.remoteResourceChecker.Has(optionsKey) {
return
}
var lastChange time.Time
c.remoteResourceChecker.Add(optionsKey,
tasks.Func{
IntervalLow: pollingConfig.Config.Low,
IntervalHigh: pollingConfig.Config.High,
F: func(interval time.Duration) (time.Duration, error) {
start := time.Now()
defer func() {
duration := time.Since(start)
c.rs.Logger.Debugf("Polled remote resource for changes in %13s. Interval: %4s (low: %4s high: %4s) resource: %q ", duration, interval, pollingConfig.Config.Low, pollingConfig.Config.High, uri)
}()
// TODO(bep) figure out a ways to remove unused tasks.
res, err := getRes()
if err != nil {
return pollingConfig.Config.High, err
}
// The caching is delayed until the body is read.
io.Copy(io.Discard, res.Body)
res.Body.Close()
x1, x2 := res.Header.Get(httpcache.XETag1), res.Header.Get(httpcache.XETag2)
if x1 != x2 {
lastChange = time.Now()
c.remoteResourceLogger.Logf("detected change in remote resource %q", uri)
c.rs.Rebuilder.SignalRebuild(identity.StringIdentity(optionsKey))
}
if time.Since(lastChange) < 10*time.Second {
// The user is typing, check more often.
return 0, nil
}
// Increase the interval to avoid hammering the server.
interval += 1 * time.Second
return interval, nil
},
})
}
// FromRemote expects one or n-parts of a URL to a resource
// If you provide multiple parts they will be joined together to the final URL.
func (c *Client) FromRemote(uri string, optionsm map[string]any) (resource.Resource, error) {
@@ -101,168 +160,139 @@ func (c *Client) FromRemote(uri string, optionsm map[string]any) (resource.Resou
}
method := "GET"
if s, ok := maps.LookupEqualFold(optionsm, "method"); ok {
if s, _, ok := maps.LookupEqualFold(optionsm, "method"); ok {
method = strings.ToUpper(s.(string))
}
isHeadMethod := method == "HEAD"
resourceID := calculateResourceID(uri, optionsm)
optionsm = gmaps.Clone(optionsm)
userKey, optionsKey := remoteResourceKeys(uri, optionsm)
_, httpResponse, err := c.cacheGetResource.GetOrCreate(resourceID, func() (io.ReadCloser, error) {
// A common pattern is to use the key in the options map as
// a way to control cache eviction,
// so make sure we use any user provided kehy as the file cache key,
// but the auto generated and more stable key for everything else.
filecacheKey := userKey
return c.rs.ResourceCache.CacheResourceRemote.GetOrCreate(optionsKey, func(key string) (resource.Resource, error) {
options, err := decodeRemoteOptions(optionsm)
if err != nil {
return nil, fmt.Errorf("failed to decode options for resource %s: %w", uri, err)
}
if err := c.validateFromRemoteArgs(uri, options); err != nil {
return nil, err
}
var (
start time.Time
nextSleep = time.Duration((rand.Intn(1000) + 100)) * time.Millisecond
nextSleepLimit = time.Duration(5) * time.Second
)
getRes := func() (*http.Response, error) {
ctx := context.Background()
ctx = c.resourceIDDispatcher.Set(ctx, filecacheKey)
for {
b, retry, err := func() ([]byte, bool, error) {
req, err := options.NewRequest(uri)
if err != nil {
return nil, false, fmt.Errorf("failed to create request for resource %s: %w", uri, err)
}
res, err := c.httpClient.Do(req)
if err != nil {
return nil, false, err
}
defer res.Body.Close()
if res.StatusCode != http.StatusNotFound {
if res.StatusCode < 200 || res.StatusCode > 299 {
return nil, temporaryHTTPStatusCodes[res.StatusCode], toHTTPError(fmt.Errorf("failed to fetch remote resource: %s", http.StatusText(res.StatusCode)), res, !isHeadMethod)
}
}
b, err := httputil.DumpResponse(res, true)
if err != nil {
return nil, false, toHTTPError(err, res, !isHeadMethod)
}
return b, false, nil
}()
req, err := options.NewRequest(uri)
if err != nil {
if retry {
if start.IsZero() {
start = time.Now()
} else if d := time.Since(start) + nextSleep; d >= c.rs.Cfg.Timeout() {
c.rs.Logger.Errorf("Retry timeout (configured to %s) fetching remote resource.", c.rs.Cfg.Timeout())
return nil, err
}
time.Sleep(nextSleep)
if nextSleep < nextSleepLimit {
nextSleep *= 2
}
continue
}
return nil, err
return nil, fmt.Errorf("failed to create request for resource %s: %w", uri, err)
}
return hugio.ToReadCloser(bytes.NewReader(b)), nil
req = req.WithContext(ctx)
return c.httpClient.Do(req)
}
})
if err != nil {
return nil, err
}
defer httpResponse.Close()
res, err := http.ReadResponse(bufio.NewReader(httpResponse), nil)
if err != nil {
return nil, err
}
defer res.Body.Close()
if res.StatusCode == http.StatusNotFound {
// Not found. This matches how looksup for local resources work.
return nil, nil
}
var (
body []byte
mediaType media.Type
)
// A response to a HEAD method should not have a body. If it has one anyway, that body must be ignored.
// See https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods/HEAD
if !isHeadMethod && res.Body != nil {
body, err = io.ReadAll(res.Body)
res, err := getRes()
if err != nil {
return nil, fmt.Errorf("failed to read remote resource %q: %w", uri, err)
return nil, err
}
}
defer res.Body.Close()
filename := path.Base(rURL.Path)
if _, params, _ := mime.ParseMediaType(res.Header.Get("Content-Disposition")); params != nil {
if _, ok := params["filename"]; ok {
filename = params["filename"]
c.configurePollingIfEnabled(uri, optionsKey, getRes)
if res.StatusCode == http.StatusNotFound {
// Not found. This matches how lookups for local resources work.
return nil, nil
}
}
contentType := res.Header.Get("Content-Type")
// For HEAD requests we have no body to work with, so we need to use the Content-Type header.
if isHeadMethod || c.rs.ExecHelper.Sec().HTTP.MediaTypes.Accept(contentType) {
var found bool
mediaType, found = c.rs.MediaTypes().GetByType(contentType)
if !found {
// A media type not configured in Hugo, just create one from the content type string.
mediaType, _ = media.FromString(contentType)
if res.StatusCode < 200 || res.StatusCode > 299 {
return nil, toHTTPError(fmt.Errorf("failed to fetch remote resource: %s", http.StatusText(res.StatusCode)), res, !isHeadMethod)
}
}
if mediaType.IsZero() {
var extensionHints []string
// mime.ExtensionsByType gives a long list of extensions for text/plain,
// just use ".txt".
if strings.HasPrefix(contentType, "text/plain") {
extensionHints = []string{".txt"}
} else {
exts, _ := mime.ExtensionsByType(contentType)
if exts != nil {
extensionHints = exts
var (
body []byte
mediaType media.Type
)
// A response to a HEAD method should not have a body. If it has one anyway, that body must be ignored.
// See https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods/HEAD
if !isHeadMethod && res.Body != nil {
body, err = io.ReadAll(res.Body)
if err != nil {
return nil, fmt.Errorf("failed to read remote resource %q: %w", uri, err)
}
}
// Look for a file extension. If it's .txt, look for a more specific.
if extensionHints == nil || extensionHints[0] == ".txt" {
if ext := path.Ext(filename); ext != "" {
extensionHints = []string{ext}
filename := path.Base(rURL.Path)
if _, params, _ := mime.ParseMediaType(res.Header.Get("Content-Disposition")); params != nil {
if _, ok := params["filename"]; ok {
filename = params["filename"]
}
}
// Now resolve the media type primarily using the content.
mediaType = media.FromContent(c.rs.MediaTypes(), extensionHints, body)
contentType := res.Header.Get("Content-Type")
}
// For HEAD requests we have no body to work with, so we need to use the Content-Type header.
if isHeadMethod || c.rs.ExecHelper.Sec().HTTP.MediaTypes.Accept(contentType) {
var found bool
mediaType, found = c.rs.MediaTypes().GetByType(contentType)
if !found {
// A media type not configured in Hugo, just create one from the content type string.
mediaType, _ = media.FromString(contentType)
}
}
if mediaType.IsZero() {
return nil, fmt.Errorf("failed to resolve media type for remote resource %q", uri)
}
if mediaType.IsZero() {
resourceID = filename[:len(filename)-len(path.Ext(filename))] + "_" + resourceID + mediaType.FirstSuffix.FullSuffix
data := responseToData(res, false)
var extensionHints []string
return c.rs.NewResource(
resources.ResourceSourceDescriptor{
MediaType: mediaType,
Data: data,
GroupIdentity: identity.StringIdentity(resourceID),
LazyPublish: true,
OpenReadSeekCloser: func() (hugio.ReadSeekCloser, error) {
return hugio.NewReadSeekerNoOpCloser(bytes.NewReader(body)), nil
},
TargetPath: resourceID,
})
// mime.ExtensionsByType gives a long list of extensions for text/plain,
// just use ".txt".
if strings.HasPrefix(contentType, "text/plain") {
extensionHints = []string{".txt"}
} else {
exts, _ := mime.ExtensionsByType(contentType)
if exts != nil {
extensionHints = exts
}
}
// Look for a file extension. If it's .txt, look for a more specific.
if extensionHints == nil || extensionHints[0] == ".txt" {
if ext := path.Ext(filename); ext != "" {
extensionHints = []string{ext}
}
}
// Now resolve the media type primarily using the content.
mediaType = media.FromContent(c.rs.MediaTypes(), extensionHints, body)
}
if mediaType.IsZero() {
return nil, fmt.Errorf("failed to resolve media type for remote resource %q", uri)
}
userKey = filename[:len(filename)-len(path.Ext(filename))] + "_" + userKey + mediaType.FirstSuffix.FullSuffix
data := responseToData(res, false)
return c.rs.NewResource(
resources.ResourceSourceDescriptor{
MediaType: mediaType,
Data: data,
GroupIdentity: identity.StringIdentity(optionsKey),
LazyPublish: true,
OpenReadSeekCloser: func() (hugio.ReadSeekCloser, error) {
return hugio.NewReadSeekerNoOpCloser(bytes.NewReader(body)), nil
},
TargetPath: userKey,
})
})
}
func (c *Client) validateFromRemoteArgs(uri string, options fromRemoteOptions) error {
@@ -277,11 +307,17 @@ func (c *Client) validateFromRemoteArgs(uri string, options fromRemoteOptions) e
return nil
}
func calculateResourceID(uri string, optionsm map[string]any) string {
if key, found := maps.LookupEqualFold(optionsm, "key"); found {
return identity.HashString(key)
func remoteResourceKeys(uri string, optionsm map[string]any) (string, string) {
var userKey string
if key, k, found := maps.LookupEqualFold(optionsm, "key"); found {
userKey = identity.HashString(key)
delete(optionsm, k)
}
return identity.HashString(uri, optionsm)
optionsKey := identity.HashString(uri, optionsm)
if userKey == "" {
userKey = optionsKey
}
return userKey, optionsKey
}
func addDefaultHeaders(req *http.Request) {
@@ -350,3 +386,71 @@ func decodeRemoteOptions(optionsm map[string]any) (fromRemoteOptions, error) {
return options, nil
}
var _ http.RoundTripper = (*transport)(nil)
type transport struct {
Cfg config.AllProvider
Logger loggers.Logger
}
func (t *transport) RoundTrip(req *http.Request) (resp *http.Response, err error) {
defer func() {
if resp != nil && resp.StatusCode != http.StatusNotFound && resp.StatusCode != http.StatusNotModified {
t.Logger.Debugf("Fetched remote resource: %s", req.URL.String())
}
}()
var (
start time.Time
nextSleep = time.Duration((rand.Intn(1000) + 100)) * time.Millisecond
nextSleepLimit = time.Duration(5) * time.Second
retry bool
)
for {
resp, retry, err = func() (*http.Response, bool, error) {
resp2, err := http.DefaultTransport.RoundTrip(req)
if err != nil {
return resp2, false, err
}
if resp2.StatusCode != http.StatusNotFound && resp2.StatusCode != http.StatusNotModified {
if resp2.StatusCode < 200 || resp2.StatusCode > 299 {
return resp2, temporaryHTTPStatusCodes[resp2.StatusCode], nil
}
}
return resp2, false, nil
}()
if retry {
if start.IsZero() {
start = time.Now()
} else if d := time.Since(start) + nextSleep; d >= t.Cfg.Timeout() {
msg := "<nil>"
if resp != nil {
msg = resp.Status
}
err := toHTTPError(fmt.Errorf("retry timeout (configured to %s) fetching remote resource: %s", t.Cfg.Timeout(), msg), resp, req.Method != "HEAD")
return resp, err
}
time.Sleep(nextSleep)
if nextSleep < nextSleepLimit {
nextSleep *= 2
}
continue
}
return
}
}
// We need to send the redirect responses back to the HTTP client from RoundTrip,
// but we don't want to cache them.
func shouldCache(statusCode int) bool {
switch statusCode {
case http.StatusMovedPermanently, http.StatusFound, http.StatusSeeOther, http.StatusTemporaryRedirect, http.StatusPermanentRedirect:
return false
}
return true
}

View File

@@ -115,15 +115,21 @@ func TestOptionsNewRequest(t *testing.T) {
c.Assert(req.Header["User-Agent"], qt.DeepEquals, []string{"foo"})
}
func TestCalculateResourceID(t *testing.T) {
func TestRemoteResourceKeys(t *testing.T) {
t.Parallel()
c := qt.New(t)
c.Assert(calculateResourceID("foo", nil), qt.Equals, "5917621528921068675")
c.Assert(calculateResourceID("foo", map[string]any{"bar": "baz"}), qt.Equals, "7294498335241413323")
check := func(uri string, optionsm map[string]any, expect1, expect2 string) {
got1, got2 := remoteResourceKeys(uri, optionsm)
c.Assert(got1, qt.Equals, expect1)
c.Assert(got2, qt.Equals, expect2)
}
c.Assert(calculateResourceID("foo", map[string]any{"key": "1234", "bar": "baz"}), qt.Equals, "14904296279238663669")
c.Assert(calculateResourceID("asdf", map[string]any{"key": "1234", "bar": "asdf"}), qt.Equals, "14904296279238663669")
c.Assert(calculateResourceID("asdf", map[string]any{"key": "12345", "bar": "asdf"}), qt.Equals, "12191037851845371770")
check("foo", nil, "5917621528921068675", "5917621528921068675")
check("foo", map[string]any{"bar": "baz"}, "7294498335241413323", "7294498335241413323")
check("foo", map[string]any{"key": "1234", "bar": "baz"}, "14904296279238663669", "7294498335241413323")
check("foo", map[string]any{"key": "12345", "bar": "baz"}, "12191037851845371770", "7294498335241413323")
check("asdf", map[string]any{"key": "1234", "bar": "asdf"}, "14904296279238663669", "3787889110563790121")
check("asdf", map[string]any{"key": "12345", "bar": "asdf"}, "12191037851845371770", "3787889110563790121")
}

View File

@@ -29,6 +29,7 @@ import (
"github.com/gohugoio/hugo/common/hexec"
"github.com/gohugoio/hugo/common/loggers"
"github.com/gohugoio/hugo/common/paths"
"github.com/gohugoio/hugo/common/types"
"github.com/gohugoio/hugo/identity"
@@ -53,6 +54,8 @@ func NewSpec(
logger loggers.Logger,
errorHandler herrors.ErrorSender,
execHelper *hexec.Exec,
buildClosers types.CloseAdder,
rebuilder identity.SignalRebuilder,
) (*Spec, error) {
conf := s.Cfg.GetConfig().(*allconfig.Config)
imgConfig := conf.Imaging
@@ -87,10 +90,12 @@ func NewSpec(
}
rs := &Spec{
PathSpec: s,
Logger: logger,
ErrorSender: errorHandler,
imaging: imaging,
PathSpec: s,
Logger: logger,
ErrorSender: errorHandler,
BuildClosers: buildClosers,
Rebuilder: rebuilder,
imaging: imaging,
ImageCache: newImageCache(
fileCaches.ImageCache(),
memCache,
@@ -111,8 +116,10 @@ func NewSpec(
type Spec struct {
*helpers.PathSpec
Logger loggers.Logger
ErrorSender herrors.ErrorSender
Logger loggers.Logger
ErrorSender herrors.ErrorSender
BuildClosers types.CloseAdder
Rebuilder identity.SignalRebuilder
TextTemplates tpl.TemplateParseFinder