Skip to content

Commit

Permalink
(feat) internal/civisibility: add Known Tests feature and refactor EF…
Browse files Browse the repository at this point in the history
…D logic V2 (#3140)
  • Loading branch information
tonyredondo authored Feb 3, 2025
1 parent 1e52457 commit c9f90c7
Show file tree
Hide file tree
Showing 14 changed files with 408 additions and 280 deletions.
3 changes: 3 additions & 0 deletions internal/civisibility/constants/test_tags.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,9 @@ const (
// This constant is used to tag test events that are part of a retry execution
TestIsRetry = "test.is_retry"

// TestRetryReason indicates the reason for retrying the test
TestRetryReason = "test.retry_reason"

// TestEarlyFlakeDetectionRetryAborted indicates a retry abort reason by the early flake detection feature
TestEarlyFlakeDetectionRetryAborted = "test.early_flake.abort_reason"

Expand Down
27 changes: 16 additions & 11 deletions internal/civisibility/integrations/civisibility_features.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ var (
// ciVisibilitySettings contains the CI Visibility settings for this session
ciVisibilitySettings net.SettingsResponseData

// ciVisibilityEarlyFlakyDetectionSettings contains the CI Visibility Early Flake Detection data for this session
ciVisibilityEarlyFlakyDetectionSettings net.EfdResponseData
// ciVisibilityKnownTests contains the CI Visibility Known Tests data for this session
ciVisibilityKnownTests net.KnownTestsResponseData

// ciVisibilityFlakyRetriesSettings contains the CI Visibility Flaky Retries settings for this session
ciVisibilityFlakyRetriesSettings FlakyRetriesSetting
Expand Down Expand Up @@ -121,15 +121,20 @@ func ensureAdditionalFeaturesInitialization(serviceName string) {
return
}

// if early flake detection is enabled then we run the early flake detection request
if ciVisibilitySettings.EarlyFlakeDetection.Enabled {
ciEfdData, err := ciVisibilityClient.GetEarlyFlakeDetectionData()
// if early flake detection is enabled then we run the known tests request
if ciVisibilitySettings.KnownTestsEnabled {
ciEfdData, err := ciVisibilityClient.GetKnownTests()
if err != nil {
log.Error("civisibility: error getting CI visibility early flake detection data: %v", err)
log.Error("civisibility: error getting CI visibility known tests data: %v", err)
} else if ciEfdData != nil {
ciVisibilityEarlyFlakyDetectionSettings = *ciEfdData
log.Debug("civisibility: early flake detection data loaded.")
ciVisibilityKnownTests = *ciEfdData
log.Debug("civisibility: known tests data loaded.")
}
} else {
// "known_tests_enabled" parameter works as a kill-switch for EFD, so if “known_tests_enabled” is false it
// will disable EFD even if “early_flake_detection.enabled” is set to true (which should not happen normally,
// the backend should disable both of them in that case)
ciVisibilitySettings.EarlyFlakeDetection.Enabled = false
}

// if flaky test retries is enabled then let's load the flaky retries settings
Expand Down Expand Up @@ -172,11 +177,11 @@ func GetSettings() *net.SettingsResponseData {
return &ciVisibilitySettings
}

// GetEarlyFlakeDetectionSettings gets the early flake detection known tests data
func GetEarlyFlakeDetectionSettings() *net.EfdResponseData {
// GetKnownTests gets the known tests data
func GetKnownTests() *net.KnownTestsResponseData {
// call to ensure the additional features initialization is completed (service name can be null here)
ensureAdditionalFeaturesInitialization("")
return &ciVisibilityEarlyFlakyDetectionSettings
return &ciVisibilityKnownTests
}

// GetFlakyRetriesSettings gets the flaky retries settings
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ type MockClient struct {
SendCoveragePayloadFunc func(ciTestCovPayload io.Reader) error
SendCoveragePayloadWithFormatFunc func(ciTestCovPayload io.Reader, format string) error
GetSettingsFunc func() (*net.SettingsResponseData, error)
GetEarlyFlakeDetectionDataFunc func() (*net.EfdResponseData, error)
GetKnownTestsFunc func() (*net.KnownTestsResponseData, error)
GetCommitsFunc func(localCommits []string) ([]string, error)
SendPackFilesFunc func(commitSha string, packFiles []string) (bytes int64, err error)
GetSkippableTestsFunc func() (correlationId string, skippables map[string]map[string][]net.SkippableResponseDataAttributes, err error)
Expand All @@ -91,8 +91,8 @@ func (m *MockClient) GetSettings() (*net.SettingsResponseData, error) {
return m.GetSettingsFunc()
}

func (m *MockClient) GetEarlyFlakeDetectionData() (*net.EfdResponseData, error) {
return m.GetEarlyFlakeDetectionDataFunc()
func (m *MockClient) GetKnownTests() (*net.KnownTestsResponseData, error) {
return m.GetKnownTestsFunc()
}

func (m *MockClient) GetCommits(localCommits []string) ([]string, error) {
Expand Down
201 changes: 111 additions & 90 deletions internal/civisibility/integrations/gotesting/instrumentation.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import (
"fmt"
"reflect"
"runtime"
"slices"
"sync"
"sync/atomic"
"testing"
Expand All @@ -36,7 +35,9 @@ type (
panicData any // panic data recovered from an internal test execution when using an additional feature wrapper
panicStacktrace string // stacktrace from the panic recovered from an internal test
isARetry bool // flag to tag if a current test execution is a retry
isANewTest bool // flag to tag if a current test execution is part of a new test (EFD not known test)
isANewTest bool // flag to tag if a current test execution is part of a new test
isEFDExecution bool // flag to tag if a current test execution is part of an EFD execution
isATRExecution bool // flag to tag if a current test execution is part of an ATR execution
hasAdditionalFeatureWrapper bool // flag to check if the current execution is part of an additional feature wrapper
}

Expand Down Expand Up @@ -191,20 +192,29 @@ func applyFlakyTestRetriesAdditionalFeature(targetFunc func(*testing.T)) (func(*
initialRetryCount: flakyRetrySettings.RetryCount,
adjustRetryCount: nil, // No adjustRetryCount
shouldRetry: func(ptrToLocalT *testing.T, executionIndex int, remainingRetries int64) bool {
remainingTotalRetries := atomic.AddInt64(&flakyRetrySettings.RemainingTotalRetryCount, -1)
// Decide whether to retry
return ptrToLocalT.Failed() && remainingRetries >= 0 && remainingTotalRetries >= 0
return ptrToLocalT.Failed() && remainingRetries >= 0 && atomic.LoadInt64(&flakyRetrySettings.RemainingTotalRetryCount) >= 0
},
perExecution: func(ptrToLocalT *testing.T, executionIndex int, duration time.Duration) {
if executionIndex > 0 {
atomic.AddInt64(&flakyRetrySettings.RemainingTotalRetryCount, -1)
}
},
perExecution: nil, // No perExecution needed
onRetryEnd: func(t *testing.T, executionIndex int, lastPtrToLocalT *testing.T) {
// Update original `t` with results from last execution
tCommonPrivates := getTestPrivateFields(t)
if tCommonPrivates == nil {
panic("getting test private fields failed")
}
tCommonPrivates.SetFailed(lastPtrToLocalT.Failed())
tCommonPrivates.SetSkipped(lastPtrToLocalT.Skipped())

// Update parent status if failed
if lastPtrToLocalT.Failed() {
tParentCommonPrivates := getTestParentPrivateFields(t)
if tParentCommonPrivates == nil {
panic("getting test parent private fields failed")
}
tParentCommonPrivates.SetFailed(true)
}

Expand All @@ -218,14 +228,17 @@ func applyFlakyTestRetriesAdditionalFeature(targetFunc func(*testing.T)) (func(*
}

fmt.Printf(" [ %v after %v retries by Datadog's auto test retries ]\n", status, executionIndex)
}

// Check if total retry count was exceeded
if flakyRetrySettings.RemainingTotalRetryCount < 1 {
fmt.Println(" the maximum number of total retries was exceeded.")
// Check if total retry count was exceeded
if atomic.LoadInt64(&flakyRetrySettings.RemainingTotalRetryCount) < 1 {
fmt.Println(" the maximum number of total retries was exceeded.")
}
}
},
execMetaAdjust: nil, // No execMetaAdjust needed
execMetaAdjust: func(execMeta *testExecutionMetadata, executionIndex int) {
// Set the flag ATR execution to true
execMeta.isATRExecution = true
},
})
}, true
}
Expand All @@ -234,89 +247,82 @@ func applyFlakyTestRetriesAdditionalFeature(targetFunc func(*testing.T)) (func(*

// applyEarlyFlakeDetectionAdditionalFeature applies the early flake detection feature as a wrapper of a func(*testing.T)
func applyEarlyFlakeDetectionAdditionalFeature(testInfo *commonInfo, targetFunc func(*testing.T), settings *net.SettingsResponseData) (func(*testing.T), bool) {
earlyFlakeDetectionData := integrations.GetEarlyFlakeDetectionSettings()
if earlyFlakeDetectionData != nil &&
len(earlyFlakeDetectionData.Tests) > 0 {

// Define is a known test flag
isAKnownTest := false

// Check if the test is a known test or a new one
if knownSuites, ok := earlyFlakeDetectionData.Tests[testInfo.moduleName]; ok {
if knownTests, ok := knownSuites[testInfo.suiteName]; ok {
if slices.Contains(knownTests, testInfo.testName) {
isAKnownTest = true
}
}
}
isKnown, hasKnownData := isKnownTest(testInfo)
if !hasKnownData || isKnown {
return targetFunc, false
}

// If it's a new test, then we apply the EFD wrapper
if !isAKnownTest {
return func(t *testing.T) {
var testPassCount, testSkipCount, testFailCount int

runTestWithRetry(&runTestWithRetryOptions{
targetFunc: targetFunc,
t: t,
initialRetryCount: 0,
adjustRetryCount: func(duration time.Duration) int64 {
slowTestRetriesSettings := settings.EarlyFlakeDetection.SlowTestRetries
durationSecs := duration.Seconds()
if durationSecs < 5 {
return int64(slowTestRetriesSettings.FiveS)
} else if durationSecs < 10 {
return int64(slowTestRetriesSettings.TenS)
} else if durationSecs < 30 {
return int64(slowTestRetriesSettings.ThirtyS)
} else if duration.Minutes() < 5 {
return int64(slowTestRetriesSettings.FiveM)
}
return 0
},
shouldRetry: func(ptrToLocalT *testing.T, executionIndex int, remainingRetries int64) bool {
return remainingRetries >= 0
},
perExecution: func(ptrToLocalT *testing.T, executionIndex int, duration time.Duration) {
// Collect test results
if ptrToLocalT.Failed() {
testFailCount++
} else if ptrToLocalT.Skipped() {
testSkipCount++
} else {
testPassCount++
}
},
onRetryEnd: func(t *testing.T, executionIndex int, lastPtrToLocalT *testing.T) {
// Update test status based on collected counts
tCommonPrivates := getTestPrivateFields(t)
// If it's a new test, then we apply the EFD wrapper
return func(t *testing.T) {
var testPassCount, testSkipCount, testFailCount int

runTestWithRetry(&runTestWithRetryOptions{
targetFunc: targetFunc,
t: t,
initialRetryCount: 0,
adjustRetryCount: func(duration time.Duration) int64 {
slowTestRetriesSettings := settings.EarlyFlakeDetection.SlowTestRetries
durationSecs := duration.Seconds()
if durationSecs < 5 {
return int64(slowTestRetriesSettings.FiveS)
} else if durationSecs < 10 {
return int64(slowTestRetriesSettings.TenS)
} else if durationSecs < 30 {
return int64(slowTestRetriesSettings.ThirtyS)
} else if duration.Minutes() < 5 {
return int64(slowTestRetriesSettings.FiveM)
}
return 0
},
shouldRetry: func(ptrToLocalT *testing.T, executionIndex int, remainingRetries int64) bool {
return remainingRetries >= 0
},
perExecution: func(ptrToLocalT *testing.T, executionIndex int, duration time.Duration) {
// Collect test results
if ptrToLocalT.Failed() {
testFailCount++
} else if ptrToLocalT.Skipped() {
testSkipCount++
} else {
testPassCount++
}
},
onRetryEnd: func(t *testing.T, executionIndex int, lastPtrToLocalT *testing.T) {
// Update test status based on collected counts
tCommonPrivates := getTestPrivateFields(t)
if tCommonPrivates == nil {
panic("getting test private fields failed")
}
status := "passed"
if testPassCount == 0 {
if testSkipCount > 0 {
status = "skipped"
tCommonPrivates.SetSkipped(true)
}
if testFailCount > 0 {
status = "failed"
tCommonPrivates.SetFailed(true)
tParentCommonPrivates := getTestParentPrivateFields(t)
status := "passed"
if testPassCount == 0 {
if testSkipCount > 0 {
status = "skipped"
tCommonPrivates.SetSkipped(true)
}
if testFailCount > 0 {
status = "failed"
tCommonPrivates.SetFailed(true)
tParentCommonPrivates.SetFailed(true)
}
if tParentCommonPrivates == nil {
panic("getting test parent private fields failed")
}
tParentCommonPrivates.SetFailed(true)
}
}

// Print summary after retries
if executionIndex > 0 {
fmt.Printf(" [ %v after %v retries by Datadog's early flake detection ]\n", status, executionIndex)
}
},
execMetaAdjust: func(execMeta *testExecutionMetadata, executionIndex int) {
// Set the flag new test to true
execMeta.isANewTest = true
},
})
}, true
}
}
return targetFunc, false
// Print summary after retries
if executionIndex > 0 {
fmt.Printf(" [ %v after %v retries by Datadog's early flake detection ]\n", status, executionIndex)
}
},
execMetaAdjust: func(execMeta *testExecutionMetadata, executionIndex int) {
// Set the flag new test to true
execMeta.isANewTest = true
// Set the flag EFD execution to true
execMeta.isEFDExecution = true
},
})
}, true
}

// runTestWithRetry encapsulates the common retry logic for test functions.
Expand All @@ -336,7 +342,10 @@ func runTestWithRetry(options *runTestWithRetryOptions) {

for {
// Clear the matcher subnames map before each execution to avoid subname tests being called "parent/subname#NN" due to retries
getTestContextMatcherPrivateFields(options.t).ClearSubNames()
matcher := getTestContextMatcherPrivateFields(options.t)
if matcher != nil {
matcher.ClearSubNames()
}

// Increment execution index
executionIndex++
Expand All @@ -348,6 +357,12 @@ func runTestWithRetry(options *runTestWithRetryOptions) {
// Create a dummy parent so we can run the test using this local copy
// without affecting the test parent
localTPrivateFields := getTestPrivateFields(ptrToLocalT)
if localTPrivateFields == nil {
panic("getting test private fields failed")
}
if localTPrivateFields.parent == nil {
panic("parent of the test is nil")
}
*localTPrivateFields.parent = unsafe.Pointer(&testing.T{})

// Create an execution metadata instance
Expand All @@ -362,6 +377,12 @@ func runTestWithRetry(options *runTestWithRetryOptions) {
if originalExecMeta.isARetry {
execMeta.isARetry = true
}
if originalExecMeta.isEFDExecution {
execMeta.isEFDExecution = true
}
if originalExecMeta.isATRExecution {
execMeta.isATRExecution = true
}
}

// If we are in a retry execution, set the `isARetry` flag
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,12 @@ func instrumentTestingTFunc(f func(*testing.T)) func(*testing.T) {
if parentExecMeta.isARetry {
execMeta.isARetry = true
}
if parentExecMeta.isEFDExecution {
execMeta.isEFDExecution = true
}
if parentExecMeta.isATRExecution {
execMeta.isATRExecution = true
}
}
}

Expand All @@ -175,6 +181,15 @@ func instrumentTestingTFunc(f func(*testing.T)) func(*testing.T) {
if execMeta.isARetry {
// Set the retry tag
test.SetTag(constants.TestIsRetry, "true")

// If the execution is an EFD execution we tag the test event reason
if execMeta.isEFDExecution {
// Set the EFD as the retry reason
test.SetTag(constants.TestRetryReason, "efd")
} else if execMeta.isATRExecution {
// Set the ATR as the retry reason
test.SetTag(constants.TestRetryReason, "atr")
}
}

defer func() {
Expand Down
Loading

0 comments on commit c9f90c7

Please sign in to comment.