Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 32 additions & 2 deletions internal/provider/kubernetes/kubernetes.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ package kubernetes
import (
"context"
"fmt"
"net/http"
"time"

appsv1 "k8s.io/api/apps/v1"
Expand Down Expand Up @@ -65,6 +66,28 @@ var (
webhookTLSPort = 9443
)

// cacheReadyCheck returns a healthz.Checker that verifies the manager's cache has synced.
// This ensures the control plane has populated its cache with all resources from the API server
// before reporting ready. This prevents serving inconsistent xDS configuration to Envoy proxies
// when running multiple control plane replicas during periods of resource churn.
func cacheReadyCheck(mgr manager.Manager) healthz.Checker {
return func(req *http.Request) error {
// Use a short timeout to avoid blocking the health check indefinitely.
// The readiness probe will retry periodically until the cache syncs.
//
// TODO: For v1.7.0 Make configurable via API and align with helm container readiness probe timeout.
ctx, cancel := context.WithTimeout(req.Context(), 1*time.Second)
defer cancel()

// WaitForCacheSync returns true if the cache has synced, false if the context is cancelled.
if !mgr.GetCache().WaitForCacheSync(ctx) {
return fmt.Errorf("cache not synced yet")
}

return nil
}
}

// New creates a new Provider from the provided EnvoyGateway.
func New(ctx context.Context, restCfg *rest.Config, svrCfg *ec.Server, resources *message.ProviderResources) (*Provider, error) {
// TODO: Decide which mgr opts should be exposed through envoygateway.provider.kubernetes API.
Expand All @@ -73,8 +96,10 @@ func New(ctx context.Context, restCfg *rest.Config, svrCfg *ec.Server, resources
Scheme: envoygateway.GetScheme(),
Logger: svrCfg.Logger.Logger,
HealthProbeBindAddress: healthProbeBindAddress,
LeaderElection: false,
LeaderElectionID: "5b9825d2.gateway.envoyproxy.io",
LeaderElectionNamespace: svrCfg.ControllerNamespace,
Controller: config.Controller{NeedLeaderElection: ptr.To(false)},
}

log.SetLogger(mgrOpts.Logger)
Expand All @@ -84,6 +109,9 @@ func New(ctx context.Context, restCfg *rest.Config, svrCfg *ec.Server, resources

if !ptr.Deref(svrCfg.EnvoyGateway.Provider.Kubernetes.LeaderElection.Disable, false) {
mgrOpts.LeaderElection = true
mgrOpts.Controller.NeedLeaderElection = ptr.To(true)
mgrOpts.Controller.EnableWarmup = ptr.To(true)

if svrCfg.EnvoyGateway.Provider.Kubernetes.LeaderElection.LeaseDuration != nil {
ld, err := time.ParseDuration(string(*svrCfg.EnvoyGateway.Provider.Kubernetes.LeaderElection.LeaseDuration))
if err != nil {
Expand All @@ -107,7 +135,6 @@ func New(ctx context.Context, restCfg *rest.Config, svrCfg *ec.Server, resources
}
mgrOpts.RenewDeadline = ptr.To(rd)
}
mgrOpts.Controller = config.Controller{NeedLeaderElection: ptr.To(false)}
}

if svrCfg.EnvoyGateway.Provider.Kubernetes.CacheSyncPeriod != nil {
Expand Down Expand Up @@ -199,7 +226,10 @@ func New(ctx context.Context, restCfg *rest.Config, svrCfg *ec.Server, resources
}

// Add ready check health probes.
if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil {
// Use a custom readiness check that waits for the cache to sync before reporting ready.
// This ensures the control plane has a consistent view of all resources before serving
// xDS configuration to proxies, preventing inconsistent state when multiple replicas exist.
if err := mgr.AddReadyzCheck("cache-sync", cacheReadyCheck(mgr)); err != nil {
return nil, fmt.Errorf("unable to set up ready check: %w", err)
}

Expand Down
1 change: 1 addition & 0 deletions release-notes/current.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ bug fixes: |
- Fixed Listener port limit typo 65353 -> 65535.
- Fixed issue where reloading invalid envoy gateway configuration.
- Fixed missing JWT provider configuration when JWT authentication is configured on multiple HTTP listeners sharing the same port.
- Fixed config issue in controller config that was leading to inconsistent xds state

# Enhancements that improve performance.
performance improvements: |
Expand Down