Skip to content

Commit 19b7b77

Browse files
committed
Fix upgrade test case for global sync mode
1 parent a03d901 commit 19b7b77

File tree

2 files changed

+54
-13
lines changed

2 files changed

+54
-13
lines changed

e2e/test_operator_upgrades/operator_upgrades_test.go

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -510,7 +510,7 @@ var _ = Describe("Operator Upgrades", Label("e2e", "pr"), func() {
510510
)
511511

512512
DescribeTable(
513-
"one process is marked for removal",
513+
"one process is marked for removal and is stuck in removal",
514514
func(beforeVersion string, targetVersion string) {
515515
if fixtures.VersionsAreProtocolCompatible(beforeVersion, targetVersion) {
516516
Skip("this test only affects version incompatible upgrades")
@@ -561,6 +561,24 @@ var _ = Describe("Operator Upgrades", Label("e2e", "pr"), func() {
561561
}
562562

563563
if len(processGroup.ProcessGroupConditions) > 0 {
564+
// Ignore process groups that are stuck in terminating.If the global synchronization mode is active
565+
// this will be the case for all the transaction system process groups as one process groups is
566+
// blocked to be removed.
567+
if processGroup.GetConditionTime(fdbv1beta2.ResourcesTerminating) != nil {
568+
log.Println(
569+
"processGroup",
570+
processGroup.ProcessGroupID,
571+
"will be ignored since the process group is in terminating",
572+
)
573+
continue
574+
}
575+
576+
log.Println(
577+
"processGroup",
578+
processGroup.ProcessGroupID,
579+
"processes conditions:",
580+
processGroup.ProcessGroupConditions,
581+
)
564582
processesToUpdate++
565583
}
566584
}
@@ -570,6 +588,17 @@ var _ = Describe("Operator Upgrades", Label("e2e", "pr"), func() {
570588
return processesToUpdate
571589
}).WithTimeout(30 * time.Minute).WithPolling(5 * time.Second).MustPassRepeatedly(5).Should(BeNumerically("==", 0))
572590

591+
// Remove the buggify option and make sure that the terminating processes are removed.
592+
fdbCluster.SetBuggifyBlockRemoval(nil)
593+
Eventually(func(g Gomega) {
594+
processGroups := fdbCluster.GetCluster().Status.ProcessGroups
595+
596+
for _, processGroup := range processGroups {
597+
g.Expect(processGroup.GetConditionTime(fdbv1beta2.ResourcesTerminating)).
598+
To(BeNil())
599+
}
600+
}).WithTimeout(5 * time.Minute).WithPolling(5 * time.Second).Should(Succeed())
601+
573602
// Make sure the cluster has no data loss.
574603
fdbCluster.EnsureTeamTrackersHaveMinReplicas()
575604
},

internal/coordination/coordination.go

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -408,7 +408,6 @@ func UpdateGlobalCoordinationState(
408408
}
409409

410410
processes := GetProcessesFromProcessMap(processGroup.ProcessGroupID, processesMap)
411-
412411
var excluded bool
413412
for _, process := range processes {
414413
excluded = excluded || process.Excluded
@@ -418,10 +417,14 @@ func UpdateGlobalCoordinationState(
418417
// exclusion timestamp set or because the processes are excluded.
419418
if !(processGroup.IsExcluded() || excluded) {
420419
if _, ok := pendingForExclusion[processGroup.ProcessGroupID]; !ok {
420+
logger.V(1).
421+
Info("Adding to pendingForExclusion", "processGroupID", processGroup.ProcessGroupID, "reason", "process group is marked for removal but not excluded")
421422
updatesPendingForExclusion[processGroup.ProcessGroupID] = fdbv1beta2.UpdateActionAdd
422423
}
423424

424425
if _, ok := pendingForInclusion[processGroup.ProcessGroupID]; !ok {
426+
logger.V(1).
427+
Info("Adding to pendingForInclusion", "processGroupID", processGroup.ProcessGroupID, "reason", "process group is marked for removal but not excluded")
425428
updatesPendingForInclusion[processGroup.ProcessGroupID] = fdbv1beta2.UpdateActionAdd
426429
}
427430
} else {
@@ -469,20 +472,29 @@ func UpdateGlobalCoordinationState(
469472
}
470473
}
471474

472-
// If the process group is marked for removal and the resources are stuck in terminating or the processes are not running, we should
473-
// remove them from the restart list, because there are no processes to restart.
474-
if processGroup.GetConditionTime(fdbv1beta2.ResourcesTerminating) != nil ||
475-
processGroup.GetConditionTime(fdbv1beta2.MissingProcesses) != nil {
476-
if _, ok := pendingForRestart[processGroup.ProcessGroupID]; ok {
475+
// If the process group is stuck in terminating, we can add it to the ready for inclusion list.
476+
if processGroup.GetConditionTime(fdbv1beta2.ResourcesTerminating) != nil {
477+
if _, ok := pendingForInclusion[processGroup.ProcessGroupID]; !ok {
477478
logger.V(1).
478-
Info("Removing from pendingForRestart", "processGroupID", processGroup.ProcessGroupID, "reason", "process group is marked for removal")
479-
updatesPendingForRestart[processGroup.ProcessGroupID] = fdbv1beta2.UpdateActionDelete
479+
Info("Adding to pendingForInclusion and readyForInclusion", "processGroupID", processGroup.ProcessGroupID, "reason", "process group is marked for removal and in terminating")
480+
updatesPendingForInclusion[processGroup.ProcessGroupID] = fdbv1beta2.UpdateActionAdd
481+
updatesReadyForInclusion[processGroup.ProcessGroupID] = fdbv1beta2.UpdateActionAdd
480482
}
481483

482-
if _, ok := readyForRestart[processGroup.ProcessGroupID]; ok {
483-
logger.V(1).
484-
Info("Removing from readyForRestart", "processGroupID", processGroup.ProcessGroupID, "reason", "process group is marked for removal")
485-
updatesReadyForRestart[processGroup.ProcessGroupID] = fdbv1beta2.UpdateActionDelete
484+
// If the process group is marked for removal and the resources are stuck in terminating or the processes are not running, we should
485+
// remove them from the restart list, because there are no processes to restart.
486+
if processGroup.GetConditionTime(fdbv1beta2.MissingProcesses) != nil {
487+
if _, ok := pendingForRestart[processGroup.ProcessGroupID]; ok {
488+
logger.V(1).
489+
Info("Removing from pendingForRestart", "processGroupID", processGroup.ProcessGroupID, "reason", "process group is marked for removal")
490+
updatesPendingForRestart[processGroup.ProcessGroupID] = fdbv1beta2.UpdateActionDelete
491+
}
492+
493+
if _, ok := readyForRestart[processGroup.ProcessGroupID]; ok {
494+
logger.V(1).
495+
Info("Removing from readyForRestart", "processGroupID", processGroup.ProcessGroupID, "reason", "process group is marked for removal")
496+
updatesReadyForRestart[processGroup.ProcessGroupID] = fdbv1beta2.UpdateActionDelete
497+
}
486498
}
487499
}
488500

0 commit comments

Comments
 (0)