Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 55 additions & 1 deletion azureappconfiguration/azureappconfiguration.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"strings"
"sync"
"sync/atomic"
"time"

"github.com/Azure/AppConfiguration-GoProvider/azureappconfiguration/internal/jsonc"
"github.com/Azure/AppConfiguration-GoProvider/azureappconfiguration/internal/refresh"
Expand Down Expand Up @@ -140,7 +141,7 @@ func Load(ctx context.Context, authentication AuthenticationOptions, options *Op
}
}

if err := azappcfg.load(ctx); err != nil {
if err := azappcfg.startupWithRetry(ctx, options.StartupOptions.Timeout, azappcfg.load); err != nil {
return nil, err
}
// Set the initial load finished flag
Expand Down Expand Up @@ -681,6 +682,59 @@ func (azappcfg *AzureAppConfiguration) executeFailoverPolicy(ctx context.Context
return fmt.Errorf("failed to get settings from all clients: %v", errors)
}

// startupWithRetry implements retry logic for startup loading with timeout and exponential backoff
func (azappcfg *AzureAppConfiguration) startupWithRetry(ctx context.Context, timeout time.Duration, operation func(context.Context) error) error {
// If no timeout is specified, use the default startup timeout
if timeout <= 0 {
timeout = defaultStartupTimeout
}

// Create a context with timeout for the entire startup process
startupCtx, cancel := context.WithTimeout(ctx, timeout)
defer cancel()

attempt := 0
startTime := time.Now()

for {
attempt++

// Try to load with the current context
err := operation(startupCtx)
if err == nil {
return nil
}

// Check if the error is retriable
if !(isFailoverable(err) ||
strings.Contains(err.Error(), "no client is available") ||
strings.Contains(err.Error(), "failed to get settings from all clients")) {
return fmt.Errorf("load from Azure App Configuration failed with non-retriable error: %w", err)
}

// Calculate backoff duration
timeElapsed := time.Since(startTime)
backoffDuration := getFixedBackoffDuration(timeElapsed)
if backoffDuration == 0 {
backoffDuration = calculateBackoffDuration(attempt)
}
Comment on lines +716 to +720
Copy link

Copilot AI Sep 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The logic for choosing between fixed and calculated backoff duration is unclear. Consider adding a comment explaining when and why fixed backoff is preferred over exponential backoff.

Copilot uses AI. Check for mistakes.

// Check if we have enough time left to wait and retry
timeRemaining := timeout - timeElapsed
if timeRemaining <= backoffDuration {
return fmt.Errorf("load from Azure App Configuration failed after %d attempts within timeout %v: %w", attempt, timeout, err)
}

// Wait for the backoff duration before retrying
select {
case <-startupCtx.Done():
return fmt.Errorf("load from Azure App Configuration timed out: %w", startupCtx.Err())
case <-time.After(backoffDuration):
// Continue to next retry attempt
}
}
}

func (azappcfg *AzureAppConfiguration) trimPrefix(key string) string {
result := key
for _, prefix := range azappcfg.trimPrefixes {
Expand Down
23 changes: 19 additions & 4 deletions azureappconfiguration/client_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -335,17 +335,17 @@ func (client *configurationClientWrapper) updateBackoffStatus(success bool) {
client.backOffEndTime = time.Time{}
} else {
client.failedAttempts++
client.backOffEndTime = time.Now().Add(client.getBackoffDuration())
client.backOffEndTime = time.Now().Add(calculateBackoffDuration(client.failedAttempts))
}
}

func (client *configurationClientWrapper) getBackoffDuration() time.Duration {
if client.failedAttempts <= 1 {
func calculateBackoffDuration(failedAttempts int) time.Duration {
if failedAttempts <= 1 {
return minBackoffDuration
}

// Cap the exponent to prevent overflow
exponent := math.Min(float64(client.failedAttempts-1), float64(safeShiftLimit))
exponent := math.Min(float64(failedAttempts-1), float64(safeShiftLimit))
calculatedMilliseconds := float64(minBackoffDuration.Milliseconds()) * math.Pow(2, exponent)
if calculatedMilliseconds > float64(maxBackoffDuration.Milliseconds()) || calculatedMilliseconds <= 0 {
calculatedMilliseconds = float64(maxBackoffDuration.Milliseconds())
Expand All @@ -355,6 +355,21 @@ func (client *configurationClientWrapper) getBackoffDuration() time.Duration {
return jitter(calculatedDuration)
}

func getFixedBackoffDuration(timeElapsed time.Duration) time.Duration {
if timeElapsed < time.Second*100 {
return time.Second * 5
}
if timeElapsed < time.Second*200 {
return time.Second * 10
}

if timeElapsed < time.Second*600 {
return minBackoffDuration
}

return 0
}

func jitter(duration time.Duration) time.Duration {
// Calculate the amount of jitter to add to the duration
jitter := float64(duration) * jitterRatio
Expand Down
5 changes: 5 additions & 0 deletions azureappconfiguration/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,8 @@ const (
jitterRatio float64 = 0.25
safeShiftLimit int = 63
)

// Startup constants
const (
defaultStartupTimeout time.Duration = 100 * time.Second
)
213 changes: 210 additions & 3 deletions azureappconfiguration/failover_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"fmt"
"net"
"net/http"
"strings"
"testing"
"time"

Expand Down Expand Up @@ -664,17 +665,223 @@ func TestClientWrapper_GetBackoffDuration(t *testing.T) {

// First failure should return minimum backoff duration
client.failedAttempts = 1
duration := client.getBackoffDuration()
duration := calculateBackoffDuration(client.failedAttempts)
assert.True(t, duration >= minBackoffDuration)
assert.True(t, duration <= minBackoffDuration*2) // Account for jitter

// Multiple failures should increase duration exponentially
client.failedAttempts = 3
duration3 := client.getBackoffDuration()
duration3 := calculateBackoffDuration(client.failedAttempts)
assert.True(t, duration3 > duration)

// Very high failure count should be capped at max duration
client.failedAttempts = 100
durationMax := client.getBackoffDuration()
durationMax := calculateBackoffDuration(client.failedAttempts)
assert.True(t, durationMax <= maxBackoffDuration*2) // Account for jitter
}

// Test startupWithRetry with successful operation on first attempt
func TestStartupWithRetry_Success_FirstAttempt(t *testing.T) {
mockClientManager := new(mockClientManager)

azappcfg := &AzureAppConfiguration{
clientManager: mockClientManager,
keyValues: make(map[string]any),
featureFlags: make(map[string]any),
kvSelectors: []Selector{{KeyFilter: "*", LabelFilter: "\x00"}},
}

// Create a successful operation that simply returns nil
operation := func(ctx context.Context) error {
return nil // Success on first attempt
}

ctx := context.Background()
err := azappcfg.startupWithRetry(ctx, 10*time.Second, operation)

assert.NoError(t, err)
}

// Test startupWithRetry with retry after retriable error
func TestStartupWithRetry_Success_AfterRetriableError(t *testing.T) {
mockClientManager := new(mockClientManager)

azappcfg := &AzureAppConfiguration{
clientManager: mockClientManager,
keyValues: make(map[string]any),
featureFlags: make(map[string]any),
kvSelectors: []Selector{{KeyFilter: "*", LabelFilter: "\x00"}},
}

callCount := 0
retriableError := &azcore.ResponseError{StatusCode: http.StatusServiceUnavailable}

// Create an operation that fails first, then succeeds
operation := func(ctx context.Context) error {
callCount++
if callCount == 1 {
return retriableError // Fail on first attempt
}
return nil // Success on second attempt
}

ctx := context.Background()
err := azappcfg.startupWithRetry(ctx, 10*time.Second, operation)

assert.NoError(t, err)
assert.Equal(t, 2, callCount, "Operation should be called twice")
}

// Test startupWithRetry with non-retriable error
func TestStartupWithRetry_NonRetriableError(t *testing.T) {
mockClientManager := new(mockClientManager)

azappcfg := &AzureAppConfiguration{
clientManager: mockClientManager,
keyValues: make(map[string]any),
featureFlags: make(map[string]any),
kvSelectors: []Selector{{KeyFilter: "*", LabelFilter: "\x00"}},
}

callCount := 0
nonRetriableError := &azcore.ResponseError{StatusCode: http.StatusBadRequest}

// Create an operation that fails with non-retriable error
operation := func(ctx context.Context) error {
callCount++
return nonRetriableError // Non-retriable error
}

ctx := context.Background()
err := azappcfg.startupWithRetry(ctx, 10*time.Second, operation)

assert.Error(t, err)
assert.Contains(t, err.Error(), "load from Azure App Configuration failed with non-retriable error")
assert.Equal(t, 1, callCount, "Operation should be called only once for non-retriable error")
}

// Test startupWithRetry with timeout
func TestStartupWithRetry_Timeout(t *testing.T) {
mockClientManager := new(mockClientManager)

azappcfg := &AzureAppConfiguration{
clientManager: mockClientManager,
keyValues: make(map[string]any),
featureFlags: make(map[string]any),
kvSelectors: []Selector{{KeyFilter: "*", LabelFilter: "\x00"}},
}

callCount := 0
retriableError := &azcore.ResponseError{StatusCode: http.StatusServiceUnavailable}

// Create an operation that always fails with retriable error
operation := func(ctx context.Context) error {
callCount++
return retriableError // Always fail
}

ctx := context.Background()
// Use a very short timeout to trigger timeout quickly
err := azappcfg.startupWithRetry(ctx, 100*time.Millisecond, operation)

assert.Error(t, err)
assert.True(t,
err.Error() == "startup timeout reached after 100ms" ||
err.Error() == fmt.Sprintf("load from Azure App Configuration failed after %d attempts within timeout 100ms: %v", callCount, retriableError),
"Error should indicate timeout or max attempts reached: %v", err)
assert.True(t, callCount >= 1, "Operation should be called at least once")
}

// Test startupWithRetry with context cancellation during backoff
func TestStartupWithRetry_ContextCancelledDuringBackoff(t *testing.T) {
mockClientManager := new(mockClientManager)

azappcfg := &AzureAppConfiguration{
clientManager: mockClientManager,
keyValues: make(map[string]any),
featureFlags: make(map[string]any),
kvSelectors: []Selector{{KeyFilter: "*", LabelFilter: "\x00"}},
}

callCount := 0
retriableError := &azcore.ResponseError{StatusCode: http.StatusServiceUnavailable}

// Create an operation that fails with retriable error
operation := func(ctx context.Context) error {
callCount++
return retriableError
}

ctx, cancel := context.WithCancel(context.Background())

// Cancel the context after a short delay to simulate cancellation during backoff
go func() {
time.Sleep(50 * time.Millisecond)
cancel()
}()

err := azappcfg.startupWithRetry(ctx, 10*time.Second, operation)

assert.Error(t, err)
assert.Contains(t, err.Error(), "load from Azure App Configuration timed out: context canceled")
}

// Test startupWithRetry with default timeout when zero timeout provided
func TestStartupWithRetry_DefaultTimeout(t *testing.T) {
mockClientManager := new(mockClientManager)

azappcfg := &AzureAppConfiguration{
clientManager: mockClientManager,
keyValues: make(map[string]any),
featureFlags: make(map[string]any),
kvSelectors: []Selector{{KeyFilter: "*", LabelFilter: "\x00"}},
}

callCount := 0
// Create an operation that succeeds
operation := func(ctx context.Context) error {
callCount++
return nil
}

ctx := context.Background()
// Pass zero timeout to test default timeout usage
err := azappcfg.startupWithRetry(ctx, 0, operation)

assert.NoError(t, err)
assert.Equal(t, 1, callCount, "Operation should be called once")
}

// Test startupWithRetry with insufficient time remaining for retry
func TestStartupWithRetry_InsufficientTimeForRetry(t *testing.T) {
mockClientManager := new(mockClientManager)

azappcfg := &AzureAppConfiguration{
clientManager: mockClientManager,
keyValues: make(map[string]any),
featureFlags: make(map[string]any),
kvSelectors: []Selector{{KeyFilter: "*", LabelFilter: "\x00"}},
}

callCount := 0
retriableError := &azcore.ResponseError{StatusCode: http.StatusServiceUnavailable}

// Create an operation that always fails
operation := func(ctx context.Context) error {
callCount++
// Add some delay to consume time
time.Sleep(50 * time.Millisecond)
return retriableError
}

ctx := context.Background()
// Use a short timeout that will be consumed by the first failure and not allow retry
err := azappcfg.startupWithRetry(ctx, 80*time.Millisecond, operation)

assert.Error(t, err)
assert.True(t,
err.Error() == "startup timeout reached after 80ms" ||
strings.Contains(err.Error(), "load from Azure App Configuration failed after") && strings.Contains(err.Error(), "attempts within timeout"),
"Error should indicate timeout or insufficient time: %v", err)
assert.True(t, callCount >= 1, "Operation should be called at least once")
}
9 changes: 9 additions & 0 deletions azureappconfiguration/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ type Options struct {
// LoadBalancingEnabled specifies whether to enable load balancing across multiple replicas of the Azure App Configuration service.
// It defaults to false.
LoadBalancingEnabled bool

// StartupOptions is used when initially loading data into the configuration provider.
StartupOptions StartupOptions
}

// AuthenticationOptions contains parameters for authenticating with the Azure App Configuration service.
Expand Down Expand Up @@ -159,3 +162,9 @@ type ConstructionOptions struct {
// If not provided, the default separator "." will be used.
Separator string
}

// StartupOptions is used when initially loading data into the configuration provider.
type StartupOptions struct {
// Timeout specifies the amount of time allowed to load data from Azure App Configuration on startup.
Timeout time.Duration
}
Comment on lines +166 to +170
Copy link

Copilot AI Sep 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The StartupOptions struct needs an import statement for the time package to be valid.

Copilot uses AI. Check for mistakes.
Loading