-
Notifications
You must be signed in to change notification settings - Fork 61
test: fix flaky test #453
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
test: fix flaky test #453
Conversation
0b1890e
to
e7c2e9c
Compare
e002994
to
76e4a2f
Compare
test_helpers/pool_helper.go
Outdated
// Wait for the role to be applied. | ||
for { | ||
time.Sleep(10 * time.Millisecond) | ||
|
||
var reason string | ||
|
||
data, err := conn.Do(tarantool.NewCallRequest("box.info")).Get() | ||
switch { | ||
case err != nil: | ||
reason = fmt.Sprintf("failed to get box.info: %s", err) | ||
case len(data) < 1: | ||
reason = "box.info is empty" | ||
default: | ||
status, statusFound := data[0].(map[interface{}]interface{})["status"] | ||
readonly, readonlyFound := data[0].(map[interface{}]interface{})["ro"] | ||
switch { | ||
case !statusFound: | ||
reason = "box.info.status is missing" | ||
case status != "running": | ||
reason = fmt.Sprintf("box.info.status='%s' (waiting for 'running')", status) | ||
case !readonlyFound: | ||
reason = "box.info.ro is missing" | ||
case readonly != isReplica: | ||
reason = fmt.Sprintf("box.info.ro='%v' (waiting for '%v')", readonly, isReplica) | ||
} | ||
} | ||
|
||
if len(reason) == 0 { | ||
break | ||
} | ||
|
||
select { | ||
case <-ctx.Done(): | ||
return fmt.Errorf("%w: failed to apply role, the last reason: %s", ctx.Err(), reason) | ||
default: | ||
continue | ||
} | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this is more readable version of this code, imo:
// Wait for the role to be applied. | |
for { | |
time.Sleep(10 * time.Millisecond) | |
var reason string | |
data, err := conn.Do(tarantool.NewCallRequest("box.info")).Get() | |
switch { | |
case err != nil: | |
reason = fmt.Sprintf("failed to get box.info: %s", err) | |
case len(data) < 1: | |
reason = "box.info is empty" | |
default: | |
status, statusFound := data[0].(map[interface{}]interface{})["status"] | |
readonly, readonlyFound := data[0].(map[interface{}]interface{})["ro"] | |
switch { | |
case !statusFound: | |
reason = "box.info.status is missing" | |
case status != "running": | |
reason = fmt.Sprintf("box.info.status='%s' (waiting for 'running')", status) | |
case !readonlyFound: | |
reason = "box.info.ro is missing" | |
case readonly != isReplica: | |
reason = fmt.Sprintf("box.info.ro='%v' (waiting for '%v')", readonly, isReplica) | |
} | |
} | |
if len(reason) == 0 { | |
break | |
} | |
select { | |
case <-ctx.Done(): | |
return fmt.Errorf("%w: failed to apply role, the last reason: %s", ctx.Err(), reason) | |
default: | |
continue | |
} | |
} | |
var reason string | |
// Wait for the role to be applied. | |
for { | |
select { | |
case <-time.After(10 * time.Millisecond): | |
case <-ctx.Done(): | |
return fmt.Errorf("%w: failed to apply role, the last reason: %s", ctx.Err(), reason) | |
} | |
data, err := conn.Do(tarantool.NewCallRequest("box.info")).Get() | |
switch { | |
case err != nil: | |
reason = fmt.Sprintf("failed to get box.info: %s", err) | |
continue | |
case len(data) < 1: | |
reason = "box.info is empty" | |
continue | |
} | |
status, statusFound := data[0].(map[interface{}]interface{})["status"] | |
readonly, readonlyFound := data[0].(map[interface{}]interface{})["ro"] | |
switch { | |
case !statusFound: | |
reason = "box.info.status is missing" | |
case status != "running": | |
reason = fmt.Sprintf("box.info.status='%s' (waiting for 'running')", status) | |
case !readonlyFound: | |
reason = "box.info.ro is missing" | |
case readonly != isReplica: | |
reason = fmt.Sprintf("box.info.ro='%v' (waiting for '%v')", readonly, isReplica) | |
default: | |
return nil | |
} | |
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It can be further optimised for readability with something like that:
func checkInfoStatus(data []interface{}) string {
status, statusFound := data[0].(map[interface{}]interface{})["status"]
switch {
case !statusFound:
return "box.info.status is missing"
case status != "running":
return fmt.Sprintf("box.info.status='%s' (waiting for 'running')", status)
default:
return ""
}
}
func checkInfoRO(data []interface{}, isReplica bool) string {
readonly, readonlyFound := data[0].(map[interface{}]interface{})["ro"]
switch {
case !readonlyFound:
reason = "box.info.ro is missing"
case readonly != isReplica:
reason = fmt.Sprintf("box.info.ro='%v' (waiting for '%v')", readonly, isReplica)
default:
return nil
}
}
func SetInstanceRO(ctx context.Context, dialer tarantool.Dialer, connOpts tarantool.Opts,
isReplica bool) error {
conn, err := tarantool.Connect(ctx, dialer, connOpts)
if err != nil {
return err
}
defer conn.Close()
req := tarantool.NewCallRequest("box.cfg").
Args([]interface{}{map[string]bool{"read_only": isReplica}})
if _, err := conn.Do(req).Get(); err != nil {
return err
}
var reason string
// Wait for the role to be applied.
for {
select {
case <-time.After(10 * time.Millisecond):
case <-ctx.Done():
return fmt.Errorf("%w: failed to apply role, the last reason: %s", ctx.Err(), reason)
}
data, err := conn.Do(tarantool.NewCallRequest("box.info")).Get()
switch {
case err != nil:
reason = fmt.Sprintf("failed to get box.info: %s", err)
case len(data) < 1:
reason = "box.info is empty"
case checkInfoStatus(data) != "":
reason = checkInfoStatus(data)
case checkInfoRO(data) != "":
reason = checkInfoRO(data)
default:
return nil
}
}
}
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Rearranged in a slightly different manner that seems more readable.
test_helpers/pool_helper.go
Outdated
errs := make([]error, len(dialers)) | ||
var wg sync.WaitGroup | ||
for i, dialer := range dialers { | ||
ctx, cancel := GetConnectContext() | ||
err := SetInstanceRO(ctx, dialer, connOpts, roles[i]) | ||
cancel() | ||
if err != nil { | ||
return err | ||
} | ||
wg.Add(1) | ||
// Pass loop variables to avoid its closure. | ||
go func(i int, dialer tarantool.Dialer) { | ||
defer wg.Done() | ||
errs[i] = SetInstanceRO(ctx, dialer, connOpts, roles[i]) | ||
}(i, dialer) | ||
} | ||
wg.Wait() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Usually it's better not to use wg.Add()
in a cycle, to avoid possible race conditions if starting goroutine and awaiter are different ones.
There's no problem in that code, but rule of thumb is to avoid that kind of problems without double-checking: if we know number of workers before running them - use wg.Add(count)
.
errs := make([]error, len(dialers))
var wg sync.WaitGroup
wg.Add(len(dialers))
for i, dialer := range dialers {
// Pass loop variables to avoid its closure.
go func(i int, dialer tarantool.Dialer) {
defer wg.Done()
errs[i] = SetInstanceRO(ctx, dialer, connOpts, roles[i])
}(i, dialer)
}
wg.Wait()
it's just a nitpicking, but it can save some time in the future.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Theoretically, I understand these changes and believe they should work, but I can't guarantee it.
It's up to you to agree or disagree with style comments, since it's test code.
Helper method that performs initial assigning of master/replica roles and is widely used in ConnectionPool tests was adjusted to wait for the roles to be applied successfully. Prior to this patch it doesn't, so sometimes subsequent test code might work unexpectedly (the problem was caught with TestConnectionHandlerOpenUpdateClose) Closes #452
76e4a2f
to
537f38f
Compare
@@ -206,6 +208,45 @@ func SetInstanceRO(ctx context.Context, dialer tarantool.Dialer, connOpts tarant | |||
return err | |||
} | |||
|
|||
checkRole := func(conn *tarantool.Connection, isReplica bool) string { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The nested function doesn't use local variable capture, maybe it still makes sense to put this function in a separate unit so as not to mess up the code inside here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, technically this nested function is able to "live" outside, but it tightly coupled semantically to the outer function, and as I see it doesn't make sense outside of it as a standalone function. Please, consider it just as a way to rearrange code within SetInstanceRO
to make it more readable.
go func(i int, dialer tarantool.Dialer) { | ||
defer wg.Done() | ||
errs[i] = SetInstanceRO(ctx, dialer, connOpts, roles[i]) | ||
}(i, dialer) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
go func(i int, dialer tarantool.Dialer) { | |
defer wg.Done() | |
errs[i] = SetInstanceRO(ctx, dialer, connOpts, roles[i]) | |
}(i, dialer) | |
go func() { | |
defer wg.Done() | |
errs[i] = SetInstanceRO(ctx, dialer, connOpts, roles[i]) | |
}() |
It's just a cosmetic, so up to you.
Since Go 1.22 it's safe to capture variables when using closures with concurrency inside loop body.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I know, but in go.mod there is line
go 1.20
Helper method that performs initial assigning of master/replica roles and is widely used in
ConnectionPool
tests was adjusted to wait for the roles to be applied successfully.Prior to this patch it doesn't, so sometimes subsequent test code might work unexpectedly (the problem was caught with
TestConnectionHandlerOpenUpdateClose
)Related issues:
Closes #452