@@ -8,6 +8,7 @@ package kubernetes
88import (
99 "context"
1010 "fmt"
11+ "net/http"
1112 "time"
1213
1314 appsv1 "k8s.io/api/apps/v1"
6566 webhookTLSPort = 9443
6667)
6768
69+ // cacheReadyCheck returns a healthz.Checker that verifies the manager's cache has synced.
70+ // This ensures the control plane has populated its cache with all resources from the API server
71+ // before reporting ready. This prevents serving inconsistent xDS configuration to Envoy proxies
72+ // when running multiple control plane replicas during periods of resource churn.
73+ func cacheReadyCheck (mgr manager.Manager ) healthz.Checker {
74+ return func (req * http.Request ) error {
75+ // Use a short timeout to avoid blocking the health check indefinitely.
76+ // The readiness probe will retry periodically until the cache syncs.
77+ //
78+ // TODO: For v1.7.0 Make configurable via API and align with helm container readiness probe timeout.
79+ ctx , cancel := context .WithTimeout (req .Context (), 1 * time .Second )
80+ defer cancel ()
81+
82+ // WaitForCacheSync returns true if the cache has synced, false if the context is cancelled.
83+ if ! mgr .GetCache ().WaitForCacheSync (ctx ) {
84+ return fmt .Errorf ("cache not synced yet" )
85+ }
86+
87+ return nil
88+ }
89+ }
90+
6891// New creates a new Provider from the provided EnvoyGateway.
6992func New (ctx context.Context , restCfg * rest.Config , svrCfg * ec.Server , resources * message.ProviderResources ) (* Provider , error ) {
7093 // TODO: Decide which mgr opts should be exposed through envoygateway.provider.kubernetes API.
@@ -199,7 +222,10 @@ func New(ctx context.Context, restCfg *rest.Config, svrCfg *ec.Server, resources
199222 }
200223
201224 // Add ready check health probes.
202- if err := mgr .AddReadyzCheck ("readyz" , healthz .Ping ); err != nil {
225+ // Use a custom readiness check that waits for the cache to sync before reporting ready.
226+ // This ensures the control plane has a consistent view of all resources before serving
227+ // xDS configuration to proxies, preventing inconsistent state when multiple replicas exist.
228+ if err := mgr .AddReadyzCheck ("cache-sync" , cacheReadyCheck (mgr )); err != nil {
203229 return nil , fmt .Errorf ("unable to set up ready check: %w" , err )
204230 }
205231
0 commit comments