Skip to content

Commit d1a6b3b

Browse files
committed
feat: Add caching
- Fixed code lints - Refactored the code into packages - Add caching - Add a helm chart to deploy to k8s - Fixes #7 Signed-off-by: Luis Davim <[email protected]>
1 parent 7914025 commit d1a6b3b

23 files changed

+1280
-329
lines changed

.gitignore

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
aws-s3-reverse-proxy
1+
./aws-s3-reverse-proxy
22
aws-s3-reverse-proxy.tar
33
config
44
test.txt
5+
cache.d/

cache/cache.go

+271
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,271 @@
1+
package cache
2+
3+
// This file was initially taken from https://github.com/hauke96/tiny-http-proxy/blob/master/cache.go
4+
// some extra functionality and fixes have been added:
5+
// Allow invalidating cahce items
6+
// Expiring cached items based on a global TTL
7+
8+
import (
9+
"bufio"
10+
"bytes"
11+
"crypto/sha256"
12+
"encoding/hex"
13+
"fmt"
14+
"hash"
15+
"io"
16+
"io/fs"
17+
"os"
18+
"path/filepath"
19+
"sync"
20+
"time"
21+
22+
log "github.com/sirupsen/logrus"
23+
)
24+
25+
type Cache struct {
26+
folder string
27+
hash hash.Hash
28+
knownValues map[string][]byte
29+
busyValues map[string]*sync.Mutex
30+
mutex *sync.Mutex
31+
maxSize int64
32+
ttl time.Duration
33+
}
34+
35+
type Options struct {
36+
Path string
37+
MaxSize int64
38+
TTL time.Duration
39+
}
40+
41+
func CreateCache(opts Options) (*Cache, error) {
42+
fileInfos, err := os.ReadDir(opts.Path)
43+
if err != nil {
44+
log.Warnf("Cannot open cache folder '%s': %s", opts.Path, err)
45+
log.Infof("Create cache folder '%s'", opts.Path)
46+
if err := os.Mkdir(opts.Path, os.ModePerm); err != nil {
47+
return nil, err
48+
}
49+
}
50+
51+
values := make(map[string][]byte, 0)
52+
busy := make(map[string]*sync.Mutex, 0)
53+
54+
// Go through every file an save its name in the map. The content of the file
55+
// is loaded when needed. This makes sure that we don't have to read
56+
// the directory content each time the user wants data that's not yet loaded.
57+
for _, info := range fileInfos {
58+
if !info.IsDir() {
59+
values[info.Name()] = nil
60+
}
61+
}
62+
63+
hash := sha256.New()
64+
65+
mutex := &sync.Mutex{}
66+
67+
c := &Cache{
68+
folder: opts.Path,
69+
hash: hash,
70+
knownValues: values,
71+
busyValues: busy,
72+
mutex: mutex,
73+
maxSize: opts.MaxSize,
74+
ttl: opts.TTL,
75+
}
76+
77+
go func() {
78+
ticker := time.NewTicker(c.ttl)
79+
defer ticker.Stop()
80+
for range ticker.C {
81+
files, err := c.findFilesOlderThanTTL()
82+
if err != nil {
83+
continue
84+
}
85+
for _, file := range files {
86+
err := c.deleteFromHash(file.Name())
87+
if err != nil {
88+
continue
89+
}
90+
}
91+
}
92+
}()
93+
94+
return c, nil
95+
}
96+
97+
// Returns true if the resource is found, and false otherwise. If the
98+
// resource is busy, this method will hang until the resource is free. If
99+
// the resource is not found, a lock indicating that the resource is busy will
100+
// be returned. Once the resource Has been put into cache the busy lock *must*
101+
// be unlocked to allow others to access the newly cached resource
102+
func (c *Cache) Has(key string) (*sync.Mutex, bool) {
103+
hashValue := calcHash(key)
104+
105+
c.mutex.Lock()
106+
defer c.mutex.Unlock()
107+
108+
// If the resource is busy, wait for it to be free. This is the case if
109+
// the resource is currently being cached as a result of another request.
110+
// Also, release the lock on the cache to allow other readers while waiting
111+
if lock, busy := c.busyValues[hashValue]; busy {
112+
c.mutex.Unlock()
113+
lock.Lock()
114+
// just waiting in case lock was previously acquired
115+
lock.Unlock()
116+
c.mutex.Lock()
117+
}
118+
119+
// If a resource is in the shared cache, it can't be reserved. One can simply
120+
// access it directly from the cache
121+
if _, found := c.knownValues[hashValue]; found {
122+
return nil, true
123+
}
124+
125+
// The resource is not in the cache, mark the resource as busy until it has
126+
// been cached successfully. Unlocking lock is required!
127+
lock := new(sync.Mutex)
128+
lock.Lock()
129+
c.busyValues[hashValue] = lock
130+
return lock, false
131+
}
132+
133+
func (c *Cache) Get(key string) (*io.Reader, error) {
134+
var response io.Reader
135+
hashValue := calcHash(key)
136+
137+
// Try to get content. Error if not found.
138+
c.mutex.Lock()
139+
content, ok := c.knownValues[hashValue]
140+
c.mutex.Unlock()
141+
if !ok && len(content) > 0 {
142+
log.Debugf("Cache doesn't know key '%s'", hashValue)
143+
return nil, fmt.Errorf("key '%s' is not known to cache", hashValue)
144+
}
145+
146+
log.Debugf("Cache has key '%s'", hashValue)
147+
148+
// Key is known, but not loaded into RAM
149+
if content == nil {
150+
log.Debugf("Cache item '%s' known but is not stored in memory. Using file.", hashValue)
151+
152+
file, err := os.Open(filepath.Join(c.folder, hashValue))
153+
if err != nil {
154+
log.Errorf("Error reading cached file '%s': %s", hashValue, err)
155+
// forget the cached item
156+
_ = c.deleteFromHash(hashValue)
157+
return nil, err
158+
}
159+
160+
response = file
161+
162+
log.Debugf("Create reader from file %s", hashValue)
163+
} else { // Key is known and data is already loaded to RAM
164+
response = bytes.NewReader(content)
165+
log.Debugf("Create reader from %d byte large cache content", len(content))
166+
}
167+
168+
return &response, nil
169+
}
170+
171+
func (c *Cache) Delete(key string) error {
172+
return c.deleteFromHash(calcHash(key))
173+
}
174+
175+
func (c *Cache) deleteFromHash(hashValue string) error {
176+
c.mutex.Lock()
177+
defer c.mutex.Unlock()
178+
179+
// If the resource is busy, wait for it to be free. This is the case if
180+
// the resource is currently being cached as a result of another request.
181+
// Also, release the lock on the cache to allow other readers while waiting
182+
if lock, busy := c.busyValues[hashValue]; busy {
183+
c.mutex.Unlock()
184+
lock.Lock()
185+
// just waiting in case lock was previously acquired
186+
lock.Unlock()
187+
c.mutex.Lock()
188+
}
189+
190+
delete(c.busyValues, hashValue)
191+
delete(c.knownValues, hashValue)
192+
193+
return os.Remove(filepath.Join(c.folder, hashValue))
194+
}
195+
196+
func (c *Cache) findFilesOlderThanTTL() ([]fs.DirEntry, error) {
197+
var files []fs.DirEntry
198+
tmpfiles, err := os.ReadDir(c.folder)
199+
if err != nil {
200+
return files, err
201+
}
202+
203+
for _, file := range tmpfiles {
204+
if file.Type().IsRegular() {
205+
info, err := file.Info()
206+
if err != nil {
207+
return files, err
208+
}
209+
if time.Since(info.ModTime()) > c.ttl {
210+
files = append(files, file)
211+
}
212+
}
213+
}
214+
return files, err
215+
}
216+
217+
// release is an internal method which atomically caches an item and unmarks
218+
// the item as busy, if it was busy before. The busy lock *must* be unlocked
219+
// elsewhere!
220+
func (c *Cache) release(hashValue string, content []byte) {
221+
c.mutex.Lock()
222+
delete(c.busyValues, hashValue)
223+
c.knownValues[hashValue] = content
224+
c.mutex.Unlock()
225+
}
226+
227+
func (c *Cache) Put(key string, content *io.Reader, contentLength int64) error {
228+
hashValue := calcHash(key)
229+
230+
// Small enough to put it into the in-memory cache
231+
if contentLength <= c.maxSize*1024*1024 {
232+
buffer := &bytes.Buffer{}
233+
_, err := io.Copy(buffer, *content)
234+
if err != nil {
235+
return err
236+
}
237+
238+
defer c.release(hashValue, buffer.Bytes())
239+
log.Debugf("Added %s into in-memory cache", hashValue)
240+
241+
err = os.WriteFile(filepath.Join(c.folder, hashValue), buffer.Bytes(), 0644)
242+
if err != nil {
243+
return err
244+
}
245+
log.Debugf("Wrote content of entry %s into file", hashValue)
246+
} else { // Too large for in-memory cache, just write to file
247+
defer c.release(hashValue, nil)
248+
log.Debugf("Added nil-entry for %s into in-memory cache", hashValue)
249+
250+
file, err := os.Create(filepath.Join(c.folder, hashValue))
251+
if err != nil {
252+
return err
253+
}
254+
255+
writer := bufio.NewWriter(file)
256+
_, err = io.Copy(writer, *content)
257+
if err != nil {
258+
return err
259+
}
260+
log.Debugf("Wrote content of entry %s into file", hashValue)
261+
}
262+
263+
log.Debugf("Cache wrote content into '%s'", hashValue)
264+
265+
return nil
266+
}
267+
268+
func calcHash(data string) string {
269+
sha := sha256.Sum256([]byte(data))
270+
return hex.EncodeToString(sha[:])
271+
}
+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Patterns to ignore when building packages.
2+
# This supports shell glob matching, relative path matching, and
3+
# negation (prefixed with !). Only one pattern per line.
4+
.DS_Store
5+
# Common VCS dirs
6+
.git/
7+
.gitignore
8+
.bzr/
9+
.bzrignore
10+
.hg/
11+
.hgignore
12+
.svn/
13+
# Common backup files
14+
*.swp
15+
*.bak
16+
*.tmp
17+
*.orig
18+
*~
19+
# Various IDEs
20+
.project
21+
.idea/
22+
*.tmproj
23+
.vscode/
+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
apiVersion: v2
2+
name: aws-s3-reverse-proxy
3+
description: A Helm chart for Kubernetes
4+
5+
# A chart can be either an 'application' or a 'library' chart.
6+
#
7+
# Application charts are a collection of templates that can be packaged into versioned archives
8+
# to be deployed.
9+
#
10+
# Library charts provide useful utilities or functions for the chart developer. They're included as
11+
# a dependency of application charts to inject those utilities and functions into the rendering
12+
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
13+
type: application
14+
15+
# This is the chart version. This version number should be incremented each time you make changes
16+
# to the chart and its templates, including the app version.
17+
# Versions are expected to follow Semantic Versioning (https://semver.org/)
18+
version: 0.1.0
19+
20+
# This is the version number of the application being deployed. This version number should be
21+
# incremented each time you make changes to the application. Versions are not expected to
22+
# follow Semantic Versioning. They should reflect the version the application is using.
23+
# It is recommended to use it with quotes.
24+
appVersion: "1.1.0"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
1. Get the application URL by running these commands:
2+
{{- if .Values.ingress.enabled }}
3+
{{- range $host := .Values.ingress.hosts }}
4+
{{- range .paths }}
5+
http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }}
6+
{{- end }}
7+
{{- end }}
8+
{{- else if contains "NodePort" .Values.service.type }}
9+
export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "aws-s3-reverse-proxy.fullname" . }})
10+
export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
11+
echo http://$NODE_IP:$NODE_PORT
12+
{{- else if contains "LoadBalancer" .Values.service.type }}
13+
NOTE: It may take a few minutes for the LoadBalancer IP to be available.
14+
You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "aws-s3-reverse-proxy.fullname" . }}'
15+
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "aws-s3-reverse-proxy.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
16+
echo http://$SERVICE_IP:{{ .Values.service.port }}
17+
{{- else if contains "ClusterIP" .Values.service.type }}
18+
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "aws-s3-reverse-proxy.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
19+
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
20+
echo "Visit http://127.0.0.1:8080 to use your application"
21+
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
22+
{{- end }}

0 commit comments

Comments
 (0)