Skip to content

Commit 6759ab6

Browse files
committed
Add support for StatefulSets
apply deislabs#43 With this change, Osiris can now manage both Deployments and StatefulSets. I introduced a new service annotation `osiris.deislabs.io/statefulset` to link a service with its statefulset, otherwise everything works the same as with Deployments. Internally, I tried to avoid duplicated code, and introduced a `kind` parameter to make a distinction between deployments and statefulsets.
1 parent f79f0aa commit 6759ab6

File tree

15 files changed

+529
-275
lines changed

15 files changed

+529
-275
lines changed

README.md

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,11 @@ Various types of Kubernetes resources can be Osiris-enabled using an annotation.
2929
Osiris-enabled pods are automatically instrumented with a __metrics-collecting
3030
proxy__ deployed as a sidecar container.
3131

32-
Osiris-enabled deployments (if _already_ scaled to a configurable minimum number
33-
of replicas-- one by default) automatically have metrics from their pods
34-
continuously scraped and analyzed by the __zeroscaler__ component. When the
35-
aggregated metrics reveal that all of the deployment's pods are idling, the
36-
zeroscaler scales the deployment to zero replicas.
32+
Osiris-enabled deployments or statefulSets (if _already_ scaled to a configurable
33+
minimum number of replicas-- one by default) automatically have metrics from
34+
their pods continuously scraped and analyzed by the __zeroscaler__ component.
35+
When the aggregated metrics reveal that all of the deployment's pods are idling,
36+
the zeroscaler scales the deployment to zero replicas.
3737

3838
Under normal circumstances, scaling a deployment to zero replicas poses a
3939
problem: any services that select pods from that deployment (and only that
@@ -175,14 +175,14 @@ spec:
175175

176176
Most of Osiris configuration is done with Kubernetes annotations - as seen in the Usage section.
177177

178-
#### Deployment Annotations
178+
#### Deployment & StatefulSet Annotations
179179

180-
The following table lists the supported annotations for Kubernetes `Deployments` and their default values.
180+
The following table lists the supported annotations for Kubernetes `Deployments` and `StatefulSets`, and their default values.
181181

182182
| Annotation | Description | Default |
183183
| ---------- | ----------- | ------- |
184-
| `osiris.dm.gg/enabled` | Enable the zeroscaler component to scrape and analyze metrics from the deployment's pods and scale the deployment to zero when idle. Allowed values: `y`, `yes`, `true`, `on`, `1`. | _no value_ (= disabled) |
185-
| `osiris.dm.gg/minReplicas` | The minimum number of replicas to set on the deployment when Osiris will scale up. If you set `2`, Osiris will scale the deployment from `0` to `2` replicas directly. Osiris won't collect metrics from deployments which have more than `minReplicas` replicas - to avoid useless collections of metrics. | `1` |
184+
| `osiris.dm.gg/enabled` | Enable the zeroscaler component to scrape and analyze metrics from the deployment's or statefulSet's pods and scale the deployment/statefulSet to zero when idle. Allowed values: `y`, `yes`, `true`, `on`, `1`. | _no value_ (= disabled) |
185+
| `osiris.dm.gg/minReplicas` | The minimum number of replicas to set on the deployment/statefulSet when Osiris will scale up. If you set `2`, Osiris will scale the deployment/statefulSet from `0` to `2` replicas directly. Osiris won't collect metrics from deployments/statefulSets which have more than `minReplicas` replicas - to avoid useless collections of metrics. | `1` |
186186
| `osiris.dm.gg/metricsCheckInterval` | The interval in which Osiris would repeatedly track the pod http request metrics. The value is the number of seconds of the interval. Note that this value override the global value defined by the `zeroscaler.metricsCheckInterval` Helm value. | _value of the `zeroscaler.metricsCheckInterval` Helm value_ |
187187

188188
#### Pod Annotations
@@ -202,6 +202,7 @@ The following table lists the supported annotations for Kubernetes `Services` an
202202
| ---------- | ----------- | ------- |
203203
| `osiris.dm.gg/enabled` | Enable this service's endpoints to be managed by the Osiris endpoints controller. Allowed values: `y`, `yes`, `true`, `on`, `1`. | _no value_ (= disabled) |
204204
| `osiris.dm.gg/deployment` | Name of the deployment which is behind this service. This is _required_ to map the service with its deployment. | _no value_ |
205+
| `osiris.dm.gg/statefulset` | Name of the statefulSet which is behind this service. This is _required_ to map the service with its statefulSet. | _no value_ |
205206
| `osiris.dm.gg/loadBalancerHostname` | Map requests coming from a specific hostname to this service. Note that if you have multiple hostnames, you can set them with different annotations, using `osiris.dm.gg/loadBalancerHostname-1`, `osiris.dm.gg/loadBalancerHostname-2`, ... | _no value_ |
206207
| `osiris.dm.gg/ingressHostname` | Map requests coming from a specific hostname to this service. If you use an ingress in front of your service, this is required to create a link between the ingress and the service. Note that if you have multiple hostnames, you can set them with different annotations, using `osiris.dm.gg/ingressHostname-1`, `osiris.dm.gg/ingressHostname-2`, ... | _no value_ |
207208
| `osiris.dm.gg/ingressDefaultPort` | Custom service port when the request comes from an ingress. Default behaviour if there are more than 1 port on the service, is to look for a port named `http`, and fallback to the port `80`. Set this if you have multiple ports and using a non-standard port with a non-standard name. | _no value_ |

charts/osiris/templates/cluster-role.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ rules:
3232
- apps
3333
resources:
3434
- deployments
35+
- statefulsets
3536
verbs:
3637
- get
3738
- list

pkg/deployments/activator/activating.go

Lines changed: 56 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,24 +15,24 @@ import (
1515
func (a *activator) activateDeployment(
1616
ctx context.Context,
1717
app *app,
18-
) (*deploymentActivation, error) {
18+
) (*appActivation, error) {
1919
deploymentsClient := a.kubeClient.AppsV1().Deployments(app.namespace)
2020
deployment, err := deploymentsClient.Get(
2121
ctx,
22-
app.deploymentName,
22+
app.name,
2323
metav1.GetOptions{},
2424
)
2525
if err != nil {
2626
return nil, err
2727
}
28-
da := &deploymentActivation{
28+
da := &appActivation{
2929
readyAppPodIPs: map[string]struct{}{},
3030
successCh: make(chan struct{}),
3131
timeoutCh: make(chan struct{}),
3232
}
3333
glog.Infof(
3434
"Activating deployment %s in namespace %s",
35-
app.deploymentName,
35+
app.name,
3636
app.namespace,
3737
)
3838
go da.watchForCompletion(
@@ -55,7 +55,58 @@ func (a *activator) activateDeployment(
5555
patchesBytes, _ := json.Marshal(patches)
5656
_, err = deploymentsClient.Patch(
5757
ctx,
58-
app.deploymentName,
58+
app.name,
59+
k8s_types.JSONPatchType,
60+
patchesBytes,
61+
metav1.PatchOptions{},
62+
)
63+
return da, err
64+
}
65+
66+
func (a *activator) activateStatefulSet(
67+
ctx context.Context,
68+
app *app,
69+
) (*appActivation, error) {
70+
statefulSetsClient := a.kubeClient.AppsV1().StatefulSets(app.namespace)
71+
statefulSet, err := statefulSetsClient.Get(
72+
ctx,
73+
app.name,
74+
metav1.GetOptions{},
75+
)
76+
if err != nil {
77+
return nil, err
78+
}
79+
da := &appActivation{
80+
readyAppPodIPs: map[string]struct{}{},
81+
successCh: make(chan struct{}),
82+
timeoutCh: make(chan struct{}),
83+
}
84+
glog.Infof(
85+
"Activating statefulSet %s in namespace %s",
86+
app.name,
87+
app.namespace,
88+
)
89+
go da.watchForCompletion(
90+
a.kubeClient,
91+
app,
92+
labels.Set(statefulSet.Spec.Selector.MatchLabels).AsSelector(),
93+
)
94+
if statefulSet.Spec.Replicas == nil || *statefulSet.Spec.Replicas > 0 {
95+
// We don't need to do this, as it turns out! Scaling is either already
96+
// in progress-- perhaps initiated by another process-- or may even be
97+
// completed already. Just return dr and allow the caller to move on to
98+
// verifying / waiting for this activation to be complete.
99+
return da, nil
100+
}
101+
patches := []kubernetes.PatchOperation{{
102+
Op: "replace",
103+
Path: "/spec/replicas",
104+
Value: kubernetes.GetMinReplicas(statefulSet.Annotations, 1),
105+
}}
106+
patchesBytes, _ := json.Marshal(patches)
107+
_, err = statefulSetsClient.Patch(
108+
ctx,
109+
app.name,
59110
k8s_types.JSONPatchType,
60111
patchesBytes,
61112
metav1.PatchOptions{},

pkg/deployments/activator/activator.go

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -21,16 +21,16 @@ type Activator interface {
2121
}
2222

2323
type activator struct {
24-
kubeClient kubernetes.Interface
25-
servicesInformer cache.SharedIndexInformer
26-
nodeInformer cache.SharedIndexInformer
27-
services map[string]*corev1.Service
28-
nodeAddresses map[string]struct{}
29-
appsByHost map[string]*app
30-
indicesLock sync.RWMutex
31-
deploymentActivations map[string]*deploymentActivation
32-
deploymentActivationsLock sync.Mutex
33-
srv *http.Server
24+
kubeClient kubernetes.Interface
25+
servicesInformer cache.SharedIndexInformer
26+
nodeInformer cache.SharedIndexInformer
27+
services map[string]*corev1.Service
28+
nodeAddresses map[string]struct{}
29+
appsByHost map[string]*app
30+
indicesLock sync.RWMutex
31+
appActivations map[string]*appActivation
32+
appActivationsLock sync.Mutex
33+
srv *http.Server
3434
}
3535

3636
func NewActivator(kubeClient kubernetes.Interface) Activator {
@@ -56,8 +56,8 @@ func NewActivator(kubeClient kubernetes.Interface) Activator {
5656
Addr: fmt.Sprintf(":%d", port),
5757
Handler: mux,
5858
},
59-
appsByHost: map[string]*app{},
60-
deploymentActivations: map[string]*deploymentActivation{},
59+
appsByHost: map[string]*app{},
60+
appActivations: map[string]*appActivation{},
6161
}
6262
a.servicesInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{
6363
AddFunc: a.syncService,
@@ -106,7 +106,7 @@ func (a *activator) syncService(obj interface{}) {
106106
a.indicesLock.Lock()
107107
defer a.indicesLock.Unlock()
108108
svc := obj.(*corev1.Service)
109-
svcKey := getKey(svc.Namespace, svc.Name)
109+
svcKey := getKey(svc.Namespace, "Service", svc.Name)
110110
if k8s.ResourceIsOsirisEnabled(svc.Annotations) {
111111
a.services[svcKey] = svc
112112
} else {
@@ -119,7 +119,7 @@ func (a *activator) syncDeletedService(obj interface{}) {
119119
a.indicesLock.Lock()
120120
defer a.indicesLock.Unlock()
121121
svc := obj.(*corev1.Service)
122-
svcKey := getKey(svc.Namespace, svc.Name)
122+
svcKey := getKey(svc.Namespace, "Service", svc.Name)
123123
delete(a.services, svcKey)
124124
a.updateIndex()
125125
}

pkg/deployments/activator/app.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,18 @@ import (
55
"net/url"
66
)
77

8+
type appKind string
9+
10+
const (
11+
appKindDeployment appKind = "Deployment"
12+
appKindStatefulSet appKind = "StatefulSet"
13+
)
14+
815
type app struct {
916
namespace string
1017
serviceName string
11-
deploymentName string
18+
name string
19+
kind appKind
1220
targetURL *url.URL
1321
proxyRequestHandler *httputil.ReverseProxy
1422
}

pkg/deployments/activator/deployment_activation.go renamed to pkg/deployments/activator/app_activation.go

Lines changed: 29 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -15,34 +15,34 @@ import (
1515
k8s "github.com/dailymotion/osiris/pkg/kubernetes"
1616
)
1717

18-
type deploymentActivation struct {
18+
type appActivation struct {
1919
readyAppPodIPs map[string]struct{}
2020
endpoints *corev1.Endpoints
2121
lock sync.Mutex
2222
successCh chan struct{}
2323
timeoutCh chan struct{}
2424
}
2525

26-
func (d *deploymentActivation) watchForCompletion(
26+
func (a *appActivation) watchForCompletion(
2727
kubeClient kubernetes.Interface,
2828
app *app,
2929
appPodSelector labels.Selector,
3030
) {
3131
ctx, cancel := context.WithCancel(context.Background())
3232
defer cancel()
33-
// Watch the pods managed by this deployment
33+
// Watch the pods managed by this deployment/statefulSet
3434
podsInformer := k8s.PodsIndexInformer(
3535
kubeClient,
3636
app.namespace,
3737
nil,
3838
appPodSelector,
3939
)
4040
podsInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{
41-
AddFunc: d.syncPod,
41+
AddFunc: a.syncPod,
4242
UpdateFunc: func(_, newObj interface{}) {
43-
d.syncPod(newObj)
43+
a.syncPod(newObj)
4444
},
45-
DeleteFunc: d.syncPod,
45+
DeleteFunc: a.syncPod,
4646
})
4747
// Watch the corresponding endpoints resource for this service
4848
endpointsInformer := k8s.EndpointsIndexInformer(
@@ -55,9 +55,9 @@ func (d *deploymentActivation) watchForCompletion(
5555
nil,
5656
)
5757
endpointsInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{
58-
AddFunc: d.syncEndpoints,
58+
AddFunc: a.syncEndpoints,
5959
UpdateFunc: func(_, newObj interface{}) {
60-
d.syncEndpoints(newObj)
60+
a.syncEndpoints(newObj)
6161
},
6262
})
6363
go podsInformer.Run(ctx.Done())
@@ -66,23 +66,24 @@ func (d *deploymentActivation) watchForCompletion(
6666
defer timer.Stop()
6767
for {
6868
select {
69-
case <-d.successCh:
69+
case <-a.successCh:
7070
return
7171
case <-timer.C:
7272
glog.Errorf(
73-
"Activation of deployment %s in namespace %s timed out",
74-
app.deploymentName,
73+
"Activation of %s %s in namespace %s timed out",
74+
app.kind,
75+
app.name,
7576
app.namespace,
7677
)
77-
close(d.timeoutCh)
78+
close(a.timeoutCh)
7879
return
7980
}
8081
}
8182
}
8283

83-
func (d *deploymentActivation) syncPod(obj interface{}) {
84-
d.lock.Lock()
85-
defer d.lock.Unlock()
84+
func (a *appActivation) syncPod(obj interface{}) {
85+
a.lock.Lock()
86+
defer a.lock.Unlock()
8687
pod := obj.(*corev1.Pod)
8788
var ready bool
8889
for _, condition := range pod.Status.Conditions {
@@ -95,27 +96,27 @@ func (d *deploymentActivation) syncPod(obj interface{}) {
9596
}
9697
// Keep track of which pods are ready
9798
if ready {
98-
d.readyAppPodIPs[pod.Status.PodIP] = struct{}{}
99+
a.readyAppPodIPs[pod.Status.PodIP] = struct{}{}
99100
} else {
100-
delete(d.readyAppPodIPs, pod.Status.PodIP)
101+
delete(a.readyAppPodIPs, pod.Status.PodIP)
101102
}
102-
d.checkActivationComplete()
103+
a.checkActivationComplete()
103104
}
104105

105-
func (d *deploymentActivation) syncEndpoints(obj interface{}) {
106-
d.lock.Lock()
107-
defer d.lock.Unlock()
108-
d.endpoints = obj.(*corev1.Endpoints)
109-
d.checkActivationComplete()
106+
func (a *appActivation) syncEndpoints(obj interface{}) {
107+
a.lock.Lock()
108+
defer a.lock.Unlock()
109+
a.endpoints = obj.(*corev1.Endpoints)
110+
a.checkActivationComplete()
110111
}
111112

112-
func (d *deploymentActivation) checkActivationComplete() {
113-
if d.endpoints != nil {
114-
for _, subset := range d.endpoints.Subsets {
113+
func (a *appActivation) checkActivationComplete() {
114+
if a.endpoints != nil {
115+
for _, subset := range a.endpoints.Subsets {
115116
for _, address := range subset.Addresses {
116-
if _, ok := d.readyAppPodIPs[address.IP]; ok {
117+
if _, ok := a.readyAppPodIPs[address.IP]; ok {
117118
glog.Infof("App pod with ip %s is in service", address.IP)
118-
close(d.successCh)
119+
close(a.successCh)
119120
return
120121
}
121122
}

0 commit comments

Comments
 (0)