Skip to content

Commit 5a0f60d

Browse files
committed
feat: Add caching
- Fixed code lints - Refactored the code to use `httputil.ReverseProxy` with a director function - Add caching - Fixes #7 Signed-off-by: Luis Davim <[email protected]>
1 parent 7914025 commit 5a0f60d

11 files changed

+698
-311
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@ aws-s3-reverse-proxy
22
aws-s3-reverse-proxy.tar
33
config
44
test.txt
5+
cache.d/

cache/cache.go

+271
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,271 @@
1+
package cache
2+
3+
// This file was initially taken from https://github.com/hauke96/tiny-http-proxy/blob/master/cache.go
4+
// some extra functionality has been added:
5+
// Allow invalidating cahce items
6+
// expiring cached items based on a global TTL
7+
8+
import (
9+
"bufio"
10+
"bytes"
11+
"crypto/sha256"
12+
"encoding/hex"
13+
"fmt"
14+
"hash"
15+
"io"
16+
"io/fs"
17+
"os"
18+
"path/filepath"
19+
"sync"
20+
"time"
21+
22+
log "github.com/sirupsen/logrus"
23+
)
24+
25+
type Cache struct {
26+
folder string
27+
hash hash.Hash
28+
knownValues map[string][]byte
29+
busyValues map[string]*sync.Mutex
30+
mutex *sync.Mutex
31+
maxSize int64
32+
ttl time.Duration
33+
}
34+
35+
type Options struct {
36+
Path string
37+
MaxSize int64
38+
TTL time.Duration
39+
}
40+
41+
func CreateCache(opts Options) (*Cache, error) {
42+
fileInfos, err := os.ReadDir(opts.Path)
43+
if err != nil {
44+
log.Warnf("Cannot open cache folder '%s': %s", opts.Path, err)
45+
log.Infof("Create cache folder '%s'", opts.Path)
46+
if err := os.Mkdir(opts.Path, os.ModePerm); err != nil {
47+
return nil, err
48+
}
49+
}
50+
51+
values := make(map[string][]byte, 0)
52+
busy := make(map[string]*sync.Mutex, 0)
53+
54+
// Go through every file an save its name in the map. The content of the file
55+
// is loaded when needed. This makes sure that we don't have to read
56+
// the directory content each time the user wants data that's not yet loaded.
57+
for _, info := range fileInfos {
58+
if !info.IsDir() {
59+
values[info.Name()] = nil
60+
}
61+
}
62+
63+
hash := sha256.New()
64+
65+
mutex := &sync.Mutex{}
66+
67+
c := &Cache{
68+
folder: opts.Path,
69+
hash: hash,
70+
knownValues: values,
71+
busyValues: busy,
72+
mutex: mutex,
73+
maxSize: opts.MaxSize,
74+
ttl: opts.TTL,
75+
}
76+
77+
go func() {
78+
ticker := time.NewTicker(c.ttl)
79+
defer ticker.Stop()
80+
for range ticker.C {
81+
files, err := c.findFilesOlderThanTTL()
82+
if err != nil {
83+
continue
84+
}
85+
for _, file := range files {
86+
err := c.deleteFromHash(file.Name())
87+
if err != nil {
88+
continue
89+
}
90+
}
91+
}
92+
}()
93+
94+
return c, nil
95+
}
96+
97+
// Returns true if the resource is found, and false otherwise. If the
98+
// resource is busy, this method will hang until the resource is free. If
99+
// the resource is not found, a lock indicating that the resource is busy will
100+
// be returned. Once the resource Has been put into cache the busy lock *must*
101+
// be unlocked to allow others to access the newly cached resource
102+
func (c *Cache) Has(key string) (*sync.Mutex, bool) {
103+
hashValue := calcHash(key)
104+
105+
c.mutex.Lock()
106+
defer c.mutex.Unlock()
107+
108+
// If the resource is busy, wait for it to be free. This is the case if
109+
// the resource is currently being cached as a result of another request.
110+
// Also, release the lock on the cache to allow other readers while waiting
111+
if lock, busy := c.busyValues[hashValue]; busy {
112+
c.mutex.Unlock()
113+
lock.Lock()
114+
// just waiting in case lock was previously acquired
115+
lock.Unlock()
116+
c.mutex.Lock()
117+
}
118+
119+
// If a resource is in the shared cache, it can't be reserved. One can simply
120+
// access it directly from the cache
121+
if _, found := c.knownValues[hashValue]; found {
122+
return nil, true
123+
}
124+
125+
// The resource is not in the cache, mark the resource as busy until it has
126+
// been cached successfully. Unlocking lock is required!
127+
lock := new(sync.Mutex)
128+
lock.Lock()
129+
c.busyValues[hashValue] = lock
130+
return lock, false
131+
}
132+
133+
func (c *Cache) Get(key string) (*io.Reader, error) {
134+
var response io.Reader
135+
hashValue := calcHash(key)
136+
137+
// Try to get content. Error if not found.
138+
c.mutex.Lock()
139+
content, ok := c.knownValues[hashValue]
140+
c.mutex.Unlock()
141+
if !ok && len(content) > 0 {
142+
log.Debugf("Cache doesn't know key '%s'", hashValue)
143+
return nil, fmt.Errorf("key '%s' is not known to cache", hashValue)
144+
}
145+
146+
log.Debugf("Cache has key '%s'", hashValue)
147+
148+
// Key is known, but not loaded into RAM
149+
if content == nil {
150+
log.Debugf("Cache item '%s' known but is not stored in memory. Using file.", hashValue)
151+
152+
file, err := os.Open(filepath.Join(c.folder, hashValue))
153+
if err != nil {
154+
log.Errorf("Error reading cached file '%s': %s", hashValue, err)
155+
// forget the cached item
156+
_ = c.deleteFromHash(hashValue)
157+
return nil, err
158+
}
159+
160+
response = file
161+
162+
log.Debugf("Create reader from file %s", hashValue)
163+
} else { // Key is known and data is already loaded to RAM
164+
response = bytes.NewReader(content)
165+
log.Debugf("Create reader from %d byte large cache content", len(content))
166+
}
167+
168+
return &response, nil
169+
}
170+
171+
func (c *Cache) Delete(key string) error {
172+
return c.deleteFromHash(calcHash(key))
173+
}
174+
175+
func (c *Cache) deleteFromHash(hashValue string) error {
176+
c.mutex.Lock()
177+
defer c.mutex.Unlock()
178+
179+
// If the resource is busy, wait for it to be free. This is the case if
180+
// the resource is currently being cached as a result of another request.
181+
// Also, release the lock on the cache to allow other readers while waiting
182+
if lock, busy := c.busyValues[hashValue]; busy {
183+
c.mutex.Unlock()
184+
lock.Lock()
185+
// just waiting in case lock was previously acquired
186+
lock.Unlock()
187+
c.mutex.Lock()
188+
}
189+
190+
delete(c.busyValues, hashValue)
191+
delete(c.knownValues, hashValue)
192+
193+
return os.Remove(filepath.Join(c.folder, hashValue))
194+
}
195+
196+
func (c *Cache) findFilesOlderThanTTL() ([]fs.DirEntry, error) {
197+
var files []fs.DirEntry
198+
tmpfiles, err := os.ReadDir(c.folder)
199+
if err != nil {
200+
return files, err
201+
}
202+
203+
for _, file := range tmpfiles {
204+
if file.Type().IsRegular() {
205+
info, err := file.Info()
206+
if err != nil {
207+
return files, err
208+
}
209+
if time.Since(info.ModTime()) > c.ttl {
210+
files = append(files, file)
211+
}
212+
}
213+
}
214+
return files, err
215+
}
216+
217+
// release is an internal method which atomically caches an item and unmarks
218+
// the item as busy, if it was busy before. The busy lock *must* be unlocked
219+
// elsewhere!
220+
func (c *Cache) release(hashValue string, content []byte) {
221+
c.mutex.Lock()
222+
delete(c.busyValues, hashValue)
223+
c.knownValues[hashValue] = content
224+
c.mutex.Unlock()
225+
}
226+
227+
func (c *Cache) Put(key string, content *io.Reader, contentLength int64) error {
228+
hashValue := calcHash(key)
229+
230+
// Small enough to put it into the in-memory cache
231+
if contentLength <= c.maxSize*1024*1024 {
232+
buffer := &bytes.Buffer{}
233+
_, err := io.Copy(buffer, *content)
234+
if err != nil {
235+
return err
236+
}
237+
238+
defer c.release(hashValue, buffer.Bytes())
239+
log.Debugf("Added %s into in-memory cache", hashValue)
240+
241+
err = os.WriteFile(filepath.Join(c.folder, hashValue), buffer.Bytes(), 0644)
242+
if err != nil {
243+
return err
244+
}
245+
log.Debugf("Wrote content of entry %s into file", hashValue)
246+
} else { // Too large for in-memory cache, just write to file
247+
defer c.release(hashValue, nil)
248+
log.Debugf("Added nil-entry for %s into in-memory cache", hashValue)
249+
250+
file, err := os.Create(filepath.Join(c.folder, hashValue))
251+
if err != nil {
252+
return err
253+
}
254+
255+
writer := bufio.NewWriter(file)
256+
_, err = io.Copy(writer, *content)
257+
if err != nil {
258+
return err
259+
}
260+
log.Debugf("Wrote content of entry %s into file", hashValue)
261+
}
262+
263+
log.Debugf("Cache wrote content into '%s'", hashValue)
264+
265+
return nil
266+
}
267+
268+
func calcHash(data string) string {
269+
sha := sha256.Sum256([]byte(data))
270+
return hex.EncodeToString(sha[:])
271+
}

go.mod

+27-6
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,32 @@
11
module github.com/Kriechi/aws-s3-reverse-proxy
22

3+
go 1.20
4+
35
require (
4-
github.com/aws/aws-sdk-go v1.38.25
5-
github.com/prometheus/client_golang v1.11.1
6-
github.com/sirupsen/logrus v1.8.1
7-
github.com/stretchr/testify v1.7.0
8-
gopkg.in/alecthomas/kingpin.v2 v2.2.6
6+
github.com/alecthomas/kingpin/v2 v2.3.2
7+
github.com/aws/aws-sdk-go v1.44.308
8+
github.com/prometheus/client_golang v1.16.0
9+
github.com/sirupsen/logrus v1.9.3
10+
github.com/stretchr/testify v1.8.4
11+
k8s.io/utils v0.0.0-20230726121419-3b25d923346b
912
)
1013

11-
go 1.16
14+
require (
15+
github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137 // indirect
16+
github.com/beorn7/perks v1.0.1 // indirect
17+
github.com/cespare/xxhash/v2 v2.2.0 // indirect
18+
github.com/davecgh/go-spew v1.1.1 // indirect
19+
github.com/golang/protobuf v1.5.3 // indirect
20+
github.com/jmespath/go-jmespath v0.4.0 // indirect
21+
github.com/kr/text v0.2.0 // indirect
22+
github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
23+
github.com/pmezard/go-difflib v1.0.0 // indirect
24+
github.com/prometheus/client_model v0.3.0 // indirect
25+
github.com/prometheus/common v0.42.0 // indirect
26+
github.com/prometheus/procfs v0.10.1 // indirect
27+
github.com/rogpeppe/go-internal v1.11.0 // indirect
28+
github.com/xhit/go-str2duration/v2 v2.1.0 // indirect
29+
golang.org/x/sys v0.10.0 // indirect
30+
google.golang.org/protobuf v1.30.0 // indirect
31+
gopkg.in/yaml.v3 v3.0.1 // indirect
32+
)

0 commit comments

Comments
 (0)