Skip to content

Commit 4e1ceff

Browse files
committed
lib/revproxy: refine the logic for cacheability
In addition to "immutable", treat an object as effectively cacheable if its freshness period (max-age) is so long that we never need to worry about updating it (because the backing store will be long gone). NPM in particular seems to set expirations to either 5m or 365d, on all the binary artifacts that one fetches from there.
1 parent b663731 commit 4e1ceff

File tree

1 file changed

+48
-28
lines changed

1 file changed

+48
-28
lines changed

lib/revproxy/revproxy.go

+48-28
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ import (
3232
"time"
3333

3434
"github.com/creachadair/mds/cache"
35+
"github.com/creachadair/mds/mapset"
3536
"github.com/creachadair/scheddle"
3637
"github.com/creachadair/taskgroup"
3738
"github.com/tailscale/go-cache-plugin/lib/s3util"
@@ -234,7 +235,8 @@ func (s *Server) ServeHTTP(w http.ResponseWriter, r *http.Request) {
234235
if canCache {
235236
proxy.ModifyResponse = func(rsp *http.Response) error {
236237
maxAge, isVolatile := s.canMemoryCache(rsp)
237-
if !isVolatile && !s.canCacheResponse(rsp) {
238+
canCacheResponse := s.canCacheResponse(rsp)
239+
if !canCacheResponse && !isVolatile {
238240
// A response we cannot cache at all.
239241
setXCacheInfo(rsp.Header, "fetch, uncached", "")
240242
s.rspNotCached.Add(1)
@@ -249,7 +251,8 @@ func (s *Server) ServeHTTP(w http.ResponseWriter, r *http.Request) {
249251
Reader: io.TeeReader(rsp.Body, &buf),
250252
Closer: rsp.Body,
251253
}
252-
if isVolatile {
254+
if !canCacheResponse && isVolatile {
255+
// A volatile response we can cache temporarily.
253256
setXCacheInfo(rsp.Header, "fetch, cached, volatile", hash)
254257
updateCache = func() {
255258
body := buf.Bytes()
@@ -323,16 +326,44 @@ func hostMatchesTarget(host string, targets []string) bool {
323326

324327
// canCacheRequest reports whether r is a request whose response can be cached.
325328
func (s *Server) canCacheRequest(r *http.Request) bool {
326-
return r.Method == "GET" && !slices.Contains(splitCacheControl(r.Header), "no-store")
329+
return r.Method == "GET" && !parseCacheControl(r.Header.Get("Cache-Control")).Keys.Has("no-store")
327330
}
328331

329332
// canCacheResponse reports whether r is a response whose body can be cached.
330333
func (s *Server) canCacheResponse(rsp *http.Response) bool {
331334
if rsp.StatusCode != http.StatusOK {
332335
return false
333336
}
334-
cc := splitCacheControl(rsp.Header)
335-
return !slices.Contains(cc, "no-store") && slices.Contains(cc, "immutable")
337+
cc := parseCacheControl(rsp.Header.Get("Cache-Control"))
338+
if cc.Keys.Has("no-store") {
339+
return false
340+
} else if cc.Keys.Has("immutable") {
341+
return true
342+
}
343+
344+
// We treat a response that is not immutable but requires validation as
345+
// cacheable if its max-age is so long it doesn't matter.
346+
const goodLongTime = 60 * 24 * time.Hour
347+
return cc.Keys.Has("must-revalidate") && cc.MaxAge > goodLongTime
348+
}
349+
350+
type cacheControl struct {
351+
Keys mapset.Set[string]
352+
MaxAge time.Duration
353+
}
354+
355+
func parseCacheControl(s string) (out cacheControl) {
356+
for _, v := range strings.Split(s, ",") {
357+
key, val, ok := strings.Cut(strings.TrimSpace(v), "=")
358+
if ok && key == "max-age" {
359+
sec, err := strconv.Atoi(val)
360+
if err == nil {
361+
out.MaxAge = time.Duration(sec) * time.Second
362+
}
363+
}
364+
out.Keys.Add(key)
365+
}
366+
return
336367
}
337368

338369
// canMemoryCache reports whether r is a volatile response whose body can be
@@ -342,21 +373,19 @@ func (s *Server) canMemoryCache(rsp *http.Response) (time.Duration, bool) {
342373
if rsp.StatusCode != http.StatusOK {
343374
return 0, false
344375
}
345-
var maxAge time.Duration
346-
for _, v := range splitCacheControl(rsp.Header) {
347-
if v == "no-store" || v == "immutable" {
348-
return 0, false // don't cache immutable things in memory
349-
}
350-
sfx, ok := strings.CutPrefix(v, "max-age=")
351-
if !ok {
352-
continue
353-
}
354-
sec, err := strconv.Atoi(sfx)
355-
if err == nil {
356-
maxAge = time.Duration(min(sec, 3600)) * time.Second
357-
}
376+
cc := parseCacheControl(rsp.Header.Get("Cache-Control"))
377+
if cc.Keys.Has("no-store") || cc.Keys.Has("no-cache") {
378+
// While no-cache doesn't mean we can't cache it, it requires
379+
// re-validation before reusing the response, so treat that as if it were
380+
// no-store.
381+
return 0, false
358382
}
359-
return maxAge, maxAge > 0
383+
384+
// We'll cache things in memory if they aren't expected to last too long.
385+
if cc.MaxAge > 0 && cc.MaxAge < time.Hour {
386+
return cc.MaxAge, true
387+
}
388+
return 0, false
360389
}
361390

362391
// hashRequest generates the storage digest for the specified request URL.
@@ -375,12 +404,3 @@ func writeCachedResponse(w http.ResponseWriter, hdr http.Header, body []byte) {
375404
}
376405
w.Write(body)
377406
}
378-
379-
// splitCacheControl returns the tokens of the cache control header from h.
380-
func splitCacheControl(h http.Header) []string {
381-
fs := strings.Split(h.Get("Cache-Control"), ",")
382-
for i, v := range fs {
383-
fs[i] = strings.TrimSpace(v)
384-
}
385-
return fs
386-
}

0 commit comments

Comments
 (0)