Skip to content

Commit d5b9a47

Browse files
committed
wait for cache sync before ready
Signed-off-by: jukie <[email protected]>
1 parent 2ec695d commit d5b9a47

File tree

1 file changed

+27
-1
lines changed

1 file changed

+27
-1
lines changed

internal/provider/kubernetes/kubernetes.go

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ package kubernetes
88
import (
99
"context"
1010
"fmt"
11+
"net/http"
1112
"time"
1213

1314
appsv1 "k8s.io/api/apps/v1"
@@ -65,6 +66,28 @@ var (
6566
webhookTLSPort = 9443
6667
)
6768

69+
// cacheReadyCheck returns a healthz.Checker that verifies the manager's cache has synced.
70+
// This ensures the control plane has populated its cache with all resources from the API server
71+
// before reporting ready. This prevents serving inconsistent xDS configuration to Envoy proxies
72+
// when running multiple control plane replicas during periods of resource churn.
73+
func cacheReadyCheck(mgr manager.Manager) healthz.Checker {
74+
return func(req *http.Request) error {
75+
// Use a short timeout to avoid blocking the health check indefinitely.
76+
// The readiness probe will retry periodically until the cache syncs.
77+
//
78+
// TODO: For v1.7.0 Make configurable via API and align with helm container readiness probe timeout.
79+
ctx, cancel := context.WithTimeout(req.Context(), 1*time.Second)
80+
defer cancel()
81+
82+
// WaitForCacheSync returns true if the cache has synced, false if the context is cancelled.
83+
if !mgr.GetCache().WaitForCacheSync(ctx) {
84+
return fmt.Errorf("cache not synced yet")
85+
}
86+
87+
return nil
88+
}
89+
}
90+
6891
// New creates a new Provider from the provided EnvoyGateway.
6992
func New(ctx context.Context, restCfg *rest.Config, svrCfg *ec.Server, resources *message.ProviderResources) (*Provider, error) {
7093
// TODO: Decide which mgr opts should be exposed through envoygateway.provider.kubernetes API.
@@ -199,7 +222,10 @@ func New(ctx context.Context, restCfg *rest.Config, svrCfg *ec.Server, resources
199222
}
200223

201224
// Add ready check health probes.
202-
if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil {
225+
// Use a custom readiness check that waits for the cache to sync before reporting ready.
226+
// This ensures the control plane has a consistent view of all resources before serving
227+
// xDS configuration to proxies, preventing inconsistent state when multiple replicas exist.
228+
if err := mgr.AddReadyzCheck("cache-sync", cacheReadyCheck(mgr)); err != nil {
203229
return nil, fmt.Errorf("unable to set up ready check: %w", err)
204230
}
205231

0 commit comments

Comments
 (0)