Skip to content

Commit d6dd86a

Browse files
[8.17](backport #6623) [k8s] Fix logical race conditions in kubernetes_secrets provider (#6795)
* [k8s] Fix logical race conditions in kubernetes_secrets provider (#6623) * fix: refactor kubernetes_secrets provider to eliminate race conditions * fix: add changelog fragment and unit-tests for kubernetes_secrets provider * fix: replace RWMutex with Mutex * fix: rename newExpirationStore to newExpirationCache * fix: introduce kubernetes_secrets provider name as a const * fix: extend AddConditionally doc string to describe the case of condition is nil * fix: gosec lint (cherry picked from commit 6d4b91c) # Conflicts: # internal/pkg/composable/providers/kubernetessecrets/kubernetes_secrets.go * fix: resolve conflicts * fix: implicit memory aliasing in for loop * fix: make Run not to fail if getK8sClientFunc returns an err which is appropriate only for 8.17.x --------- Co-authored-by: Panos Koutsovasilis <[email protected]>
1 parent b64c3ee commit d6dd86a

File tree

6 files changed

+1480
-755
lines changed

6 files changed

+1480
-755
lines changed
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# Kind can be one of:
2+
# - breaking-change: a change to previously-documented behavior
3+
# - deprecation: functionality that is being removed in a later release
4+
# - bug-fix: fixes a problem in a previous version
5+
# - enhancement: extends functionality but does not break or fix existing behavior
6+
# - feature: new functionality
7+
# - known-issue: problems that we are aware of in a given version
8+
# - security: impacts on the security of a product or a user’s deployment.
9+
# - upgrade: important information for someone upgrading from a prior version
10+
# - other: does not fit into any of the other categories
11+
kind: bug-fix
12+
13+
# Change summary; a 80ish characters long description of the change.
14+
summary: Fix logical race conditions in kubernetes_secrets provider
15+
16+
# Long description; in case the summary is not enough to describe the change
17+
# this field accommodate a description without length limits.
18+
# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment.
19+
#description:
20+
21+
# Affected component; usually one of "elastic-agent", "fleet-server", "filebeat", "metricbeat", "auditbeat", "all", etc.
22+
component: elastic-agent
23+
24+
# PR URL; optional; the PR number that added the changeset.
25+
# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added.
26+
# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number.
27+
# Please provide it if you are adding a fragment for a different PR.
28+
pr: https://github.com/elastic/elastic-agent/pull/6623
29+
30+
# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of).
31+
# If not present is automatically filled by the tooling with the issue linked to the PR number.
32+
issue: https://github.com/elastic/elastic-agent/issues/6340

internal/pkg/composable/providers/kubernetessecrets/config.go

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,20 +10,23 @@ import (
1010
"github.com/elastic/elastic-agent-autodiscover/kubernetes"
1111
)
1212

13-
// Config for kubernetes provider
13+
// Config for kubernetes_secrets provider
1414
type Config struct {
1515
KubeConfig string `config:"kube_config"`
1616
KubeClientOptions kubernetes.KubeClientOptions `config:"kube_client_options"`
1717

18-
RefreshInterval time.Duration `config:"cache_refresh_interval"`
18+
RefreshInterval time.Duration `config:"cache_refresh_interval" validate:"positive,nonzero"`
1919
TTLDelete time.Duration `config:"cache_ttl"`
20-
RequestTimeout time.Duration `config:"cache_request_timeout"`
20+
RequestTimeout time.Duration `config:"cache_request_timeout" validate:"positive,nonzero"`
2121
DisableCache bool `config:"cache_disable"`
2222
}
2323

24-
func (c *Config) InitDefaults() {
25-
c.RefreshInterval = 60 * time.Second
26-
c.TTLDelete = 1 * time.Hour
27-
c.RequestTimeout = 5 * time.Second
28-
c.DisableCache = false
24+
// defaultConfig returns default configuration for kubernetes_secrets provider
25+
func defaultConfig() *Config {
26+
return &Config{
27+
RefreshInterval: 60 * time.Second,
28+
TTLDelete: 1 * time.Hour,
29+
RequestTimeout: 5 * time.Second,
30+
DisableCache: false,
31+
}
2932
}
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
2+
// or more contributor license agreements. Licensed under the Elastic License 2.0;
3+
// you may not use this file except in compliance with the Elastic License 2.0.
4+
5+
package kubernetessecrets
6+
7+
import (
8+
"sync"
9+
"time"
10+
)
11+
12+
// expirationCache is a store that expires items after time.Now - secret.lastAccess > ttl (if ttl > 0) at Get or List.
13+
// expirationCache works with *cacheEntry, a pointer struct that wraps secret, instead of secret directly because map
14+
// structure in standard go library never removes the buckets from memory even after removing all the elements from it.
15+
// However, since *cacheEntry is a pointer it can be garbage collected when no longer referenced by the GC, such as
16+
// when deleted from the map. More importantly working with a pointer makes the entry in the map bucket, that doesn't
17+
// get deallocated, to utilise only 8 bytes on a 64-bit system.
18+
type expirationCache struct {
19+
sync.Mutex
20+
// ttl is the time-to-live for items in the cache
21+
ttl time.Duration
22+
// items is the underlying cache store.
23+
items map[string]*cacheEntry
24+
}
25+
26+
type cacheEntry struct {
27+
s secret
28+
lastAccess time.Time
29+
}
30+
31+
// Get returns the secret associated with the given key from the store if it exists and is not expired. If updateAccess is true
32+
// and the secret exists, essentially the expiration check is skipped and the lastAccess timestamp is updated to time.Now().
33+
func (c *expirationCache) Get(key string, updateAccess bool) (secret, bool) {
34+
c.Lock()
35+
defer c.Unlock()
36+
37+
entry, exists := c.items[key]
38+
if !exists {
39+
return secret{}, false
40+
}
41+
if updateAccess {
42+
entry.lastAccess = time.Now()
43+
} else if c.isExpired(entry.lastAccess) {
44+
delete(c.items, key)
45+
return secret{}, false
46+
}
47+
48+
return entry.s, true
49+
}
50+
51+
// AddConditionally adds the given secret to the store if the given condition returns true. If there is no existing
52+
// secret, the condition will be called with an empty secret and false. If updateAccess is true and the secret already exists,
53+
// then the lastAccess timestamp is updated to time.Now() independently of the condition result.
54+
// Note: if the given condition is nil, then it is considered as a condition that always returns false.
55+
func (c *expirationCache) AddConditionally(key string, in secret, updateAccess bool, condition conditionFn) {
56+
c.Lock()
57+
defer c.Unlock()
58+
entry, exists := c.items[key]
59+
if !exists {
60+
if condition != nil && condition(secret{}, false) {
61+
c.items[key] = &cacheEntry{in, time.Now()}
62+
}
63+
return
64+
}
65+
66+
if condition != nil && condition(entry.s, true) {
67+
entry.s = in
68+
entry.lastAccess = time.Now()
69+
} else if updateAccess {
70+
entry.lastAccess = time.Now()
71+
}
72+
}
73+
74+
// isExpired returns true if the item has expired based on the ttl
75+
func (c *expirationCache) isExpired(lastAccess time.Time) bool {
76+
if c.ttl <= 0 {
77+
// no expiration
78+
return false
79+
}
80+
// we expire if the last access is older than the ttl
81+
return time.Since(lastAccess) > c.ttl
82+
}
83+
84+
// ListKeys returns a list of all the keys of the secrets in the store without checking for expiration
85+
func (c *expirationCache) ListKeys() []string {
86+
c.Lock()
87+
defer c.Unlock()
88+
89+
length := len(c.items)
90+
if length == 0 {
91+
return nil
92+
}
93+
list := make([]string, 0, length)
94+
for key := range c.items {
95+
list = append(list, key)
96+
}
97+
return list
98+
}
99+
100+
// List returns a list of all the secrets in the store that are not expired
101+
func (c *expirationCache) List() []secret {
102+
c.Lock()
103+
defer c.Unlock()
104+
105+
length := len(c.items)
106+
if length == 0 {
107+
return nil
108+
}
109+
list := make([]secret, 0, length)
110+
for _, entry := range c.items {
111+
if c.isExpired(entry.lastAccess) {
112+
continue
113+
}
114+
list = append(list, entry.s)
115+
}
116+
return list
117+
}
118+
119+
// newExpirationCache creates and returns an expirationCache
120+
func newExpirationCache(ttl time.Duration) *expirationCache {
121+
return &expirationCache{
122+
items: make(map[string]*cacheEntry),
123+
ttl: ttl,
124+
}
125+
}

0 commit comments

Comments
 (0)