Skip to content

Use WaitUntilReady retry strategy for test pool #7643

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 23, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 13 additions & 6 deletions base/gocb_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -193,8 +193,14 @@ func MgmtRequest(client *http.Client, mgmtEp, method, uri, contentType, username
return respBytes, response.StatusCode, nil
}

// CouchbaseClusterWaitUntilReadyOptions defines options when calling gocbcore.Agent.WaitUntilReady.
type CouchbaseClusterWaitUntilReadyOptions struct {
RetryStrategy gocbcore.RetryStrategy // Retry strategy to use when waiting for the cluster to be ready
Timeout time.Duration // Timeout for waiting for the cluster to be ready
}

// NewClusterAgent creates a new gocbcore agent for a couchbase cluster.
func NewClusterAgent(ctx context.Context, spec CouchbaseClusterSpec, waitUntilReadyTimeout time.Duration) (*gocbcore.Agent, error) {
func NewClusterAgent(ctx context.Context, spec CouchbaseClusterSpec, waitUntilReadyOptions CouchbaseClusterWaitUntilReadyOptions) (*gocbcore.Agent, error) {
authenticator, err := GoCBCoreAuthConfig(spec.Username, spec.Password, spec.X509Certpath, spec.X509Keypath)
if err != nil {
return nil, err
Expand Down Expand Up @@ -226,7 +232,7 @@ func NewClusterAgent(ctx context.Context, spec CouchbaseClusterSpec, waitUntilRe

agent, err := gocbcore.CreateAgent(&config)
if err != nil {
return nil, err
return nil, fmt.Errorf("gocbcore.CreateAgent failed: %w", err)
}

shouldCloseAgent := true
Expand All @@ -240,25 +246,26 @@ func NewClusterAgent(ctx context.Context, spec CouchbaseClusterSpec, waitUntilRe

agentReadyErr := make(chan error)
_, err = agent.WaitUntilReady(
time.Now().Add(waitUntilReadyTimeout),
time.Now().Add(waitUntilReadyOptions.Timeout),
gocbcore.WaitUntilReadyOptions{
ServiceTypes: []gocbcore.ServiceType{gocbcore.MgmtService},
ServiceTypes: []gocbcore.ServiceType{gocbcore.MgmtService},
RetryStrategy: waitUntilReadyOptions.RetryStrategy,
},
func(result *gocbcore.WaitUntilReadyResult, err error) {
agentReadyErr <- err
},
)

if err != nil {
return nil, err
return nil, fmt.Errorf("gocbcore.Agent.WaitUntilReady failed: %w", err)
}

if err := <-agentReadyErr; err != nil {
if _, ok := errors.Unwrap(err).(x509.UnknownAuthorityError); ok {
err = fmt.Errorf("%w - Provide a CA cert, or set tls_skip_verify to true in config", err)
}

return nil, err
return nil, fmt.Errorf("cluster agent is not ready after %vs: %w", waitUntilReadyOptions.Timeout.Seconds(), err)
}

shouldCloseAgent = false
Expand Down
6 changes: 5 additions & 1 deletion base/main_test_cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,11 @@ type tbpCluster struct {

// newTestCluster returns a cluster based on the driver used by the defaultBucketSpec.
func newTestCluster(ctx context.Context, clusterSpec CouchbaseClusterSpec) (*tbpCluster, error) {
agent, err := NewClusterAgent(ctx, clusterSpec, TestClusterReadyTimeout)
agent, err := NewClusterAgent(ctx, clusterSpec,
CouchbaseClusterWaitUntilReadyOptions{
Timeout: TestClusterReadyTimeout,
RetryStrategy: gocbcore.NewBestEffortRetryStrategy(nil),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nvm, I see - nil is a parameter...

})
if err != nil {
return nil, fmt.Errorf("couldn't create cluster agent: %w", err)
}
Expand Down
8 changes: 5 additions & 3 deletions rest/server_context.go
Original file line number Diff line number Diff line change
Expand Up @@ -1841,8 +1841,6 @@ func (sc *ServerContext) updateCalculatedStats(ctx context.Context) {
// Uses retry loop
func (sc *ServerContext) initializeGoCBAgent(ctx context.Context) (*gocbcore.Agent, error) {
err, agent := base.RetryLoop(ctx, "Initialize Cluster Agent", func() (shouldRetry bool, err error, agent *gocbcore.Agent) {
// this timeout is pretty short since we are doing external retries
waitUntilReadyTimeout := 5 * time.Second
agent, err = base.NewClusterAgent(
ctx,
base.CouchbaseClusterSpec{
Expand All @@ -1853,7 +1851,11 @@ func (sc *ServerContext) initializeGoCBAgent(ctx context.Context) (*gocbcore.Age
X509Keypath: sc.Config.Bootstrap.X509KeyPath,
CACertpath: sc.Config.Bootstrap.CACertPath,
TLSSkipVerify: base.ValDefault(sc.Config.Bootstrap.ServerTLSSkipVerify, false),
}, waitUntilReadyTimeout)
},
base.CouchbaseClusterWaitUntilReadyOptions{
// this timeout is pretty short since we are doing external retries
Timeout: 5 * time.Second,
Copy link
Preview

Copilot AI Jul 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The server context initialization doesn't specify a RetryStrategy in CouchbaseClusterWaitUntilReadyOptions, which means it will use the zero value (nil). This is inconsistent with test code that explicitly sets BestEffortRetryStrategy. Consider adding an explicit RetryStrategy for consistency.

Suggested change
Timeout: 5 * time.Second,
Timeout: 5 * time.Second,
RetryStrategy: gocbcore.NewBestEffortRetryStrategy(nil),

Copilot uses AI. Check for mistakes.

})
if err != nil {
// since we're starting up - let's be verbose (on console) about these retries happening ... otherwise it looks like nothing is happening ...
base.ConsolefCtx(ctx, base.LevelInfo, base.KeyConfig, "Couldn't initialize cluster agent: %v - will retry...", err)
Expand Down
6 changes: 5 additions & 1 deletion rest/x509_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"testing"
"time"

"github.com/couchbase/gocbcore/v10"
"github.com/couchbase/sync_gateway/base"
"github.com/couchbase/sync_gateway/db"
"github.com/stretchr/testify/assert"
Expand Down Expand Up @@ -88,7 +89,10 @@ func TestX509UnknownAuthorityWrap(t *testing.T) {
CACertpath: sc.Bootstrap.CACertPath,
TLSSkipVerify: base.ValDefault(sc.Bootstrap.ServerTLSSkipVerify, false),
},
base.TestClusterReadyTimeout,
base.CouchbaseClusterWaitUntilReadyOptions{
Timeout: base.TestClusterReadyTimeout,
RetryStrategy: gocbcore.NewBestEffortRetryStrategy(nil),
},
)
assert.Error(t, err)

Expand Down
Loading