Skip to content

Commit e64d9d8

Browse files
fix: [NPM] [Linux] race when deleting/readding NetPol (#2978)
fix: [NPM] [Linux] race when deleting/readding CIDR NetPol Signed-off-by: Hunter Gregory <[email protected]>
1 parent 96711bc commit e64d9d8

File tree

6 files changed

+92
-5
lines changed

6 files changed

+92
-5
lines changed

npm/pkg/dataplane/dataplane.go

+29-5
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,12 @@ import (
1818
const (
1919
reconcileDuration = time.Duration(5 * time.Minute)
2020

21-
contextBackground = "BACKGROUND"
22-
contextApplyDP = "APPLY-DP"
23-
contextAddNetPol = "ADD-NETPOL"
24-
contextAddNetPolBootup = "BOOTUP-ADD-NETPOL"
25-
contextDelNetPol = "DEL-NETPOL"
21+
contextBackground = "BACKGROUND"
22+
contextApplyDP = "APPLY-DP"
23+
contextAddNetPol = "ADD-NETPOL"
24+
contextAddNetPolBootup = "BOOTUP-ADD-NETPOL"
25+
contextAddNetPolCIDRPrecaution = "ADD-NETPOL-CIDR-PRECAUTION"
26+
contextDelNetPol = "DEL-NETPOL"
2627
)
2728

2829
var (
@@ -471,6 +472,29 @@ func (dp *DataPlane) addPolicies(netPols []*policies.NPMNetworkPolicy) error {
471472
}
472473
}
473474

475+
if !util.IsWindowsDP() {
476+
for _, netPol := range netPols {
477+
if !(netPol.HasCIDRRules() && dp.ipsetMgr.PreviousApplyFailed()) {
478+
continue
479+
}
480+
481+
if inBootupPhase {
482+
// this should never happen because bootup phase is for windows, but just in case, we don't want to applyDataplaneNow() or else there will be a deadlock on dp.applyInfo
483+
msg := fmt.Sprintf("[DataPlane] [%s] at risk of improperly applying a CIDR policy which is removed then readded", contextAddNetPolCIDRPrecaution)
484+
klog.Warning(msg)
485+
metrics.SendErrorLogAndMetric(util.DaemonDataplaneID, msg)
486+
break
487+
}
488+
489+
// prevent #2977
490+
if err := dp.applyDataPlaneNow(contextAddNetPolCIDRPrecaution); err != nil {
491+
return err // nolint:wrapcheck // unnecessary to wrap error since the provided context is included in the error
492+
}
493+
494+
break
495+
}
496+
}
497+
474498
// 1. Add IPSets and apply for each NetPol.
475499
// Apply IPSets after each NetworkPolicy unless ApplyInBackground=true and we're in the bootup phase (only happens for Windows currently)
476500
for _, netPol := range netPols {

npm/pkg/dataplane/dataplane_test.go

+27
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,33 @@ func TestRemovePolicy(t *testing.T) {
221221
require.NoError(t, err)
222222
}
223223

224+
func TestHandle2977(t *testing.T) {
225+
if util.IsWindowsDP() {
226+
return
227+
}
228+
229+
metrics.InitializeAll()
230+
231+
calls := append(getBootupTestCalls(), getAddPolicyTestCallsForDP(&testPolicyobj)...)
232+
calls = append(calls, policies.GetRemovePolicyTestCalls(&testPolicyobj)...)
233+
calls = append(calls, ipsets.GetApplyIPSetsFailureTestCalls()...)
234+
calls = append(calls, ipsets.GetApplyIPSetsTestCalls(nil, getAffectedIPSets(&testPolicyobj))...)
235+
calls = append(calls, getAddPolicyTestCallsForDP(&testPolicyobj)...)
236+
ioshim := common.NewMockIOShim(calls)
237+
defer ioshim.VerifyCalls(t, calls)
238+
dp, err := NewDataPlane("testnode", ioshim, dpCfg, nil)
239+
require.NoError(t, err)
240+
241+
err = dp.AddPolicy(&testPolicyobj)
242+
require.NoError(t, err)
243+
244+
err = dp.RemovePolicy(testPolicyobj.PolicyKey)
245+
require.Error(t, err)
246+
247+
err = dp.AddPolicy(&testPolicyobj)
248+
require.NoError(t, err)
249+
}
250+
224251
func TestUpdatePolicy(t *testing.T) {
225252
metrics.InitializeAll()
226253

npm/pkg/dataplane/ipsets/ipsetmanager.go

+7
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,13 @@ func NewIPSetManager(iMgrCfg *IPSetManagerCfg, ioShim *common.IOShim) *IPSetMana
7979
}
8080
}
8181

82+
// PreviousApplyFailed is only relevant for Linux right now since Windows doesn't track failures
83+
func (iMgr *IPSetManager) PreviousApplyFailed() bool {
84+
iMgr.Lock()
85+
defer iMgr.Unlock()
86+
return iMgr.consecutiveApplyFailures > 0
87+
}
88+
8289
/*
8390
Reconcile removes empty/unreferenced sets from the cache.
8491
For ApplyAllIPSets mode, those sets are added to the toDeleteCache.

npm/pkg/dataplane/ipsets/testutils_linux.go

+16
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,12 @@ var (
1111
Stdout: "success",
1212
ExitCode: 0,
1313
}
14+
15+
fakeRestoreFailureCommand = testutils.TestCmd{
16+
Cmd: ipsetRestoreStringSlice,
17+
Stdout: "failure",
18+
ExitCode: 1,
19+
}
1420
)
1521

1622
func GetApplyIPSetsTestCalls(toAddOrUpdateIPSets, toDeleteIPSets []*IPSetMetadata) []testutils.TestCmd {
@@ -20,6 +26,16 @@ func GetApplyIPSetsTestCalls(toAddOrUpdateIPSets, toDeleteIPSets []*IPSetMetadat
2026
return []testutils.TestCmd{fakeRestoreSuccessCommand}
2127
}
2228

29+
func GetApplyIPSetsFailureTestCalls() []testutils.TestCmd {
30+
return []testutils.TestCmd{
31+
fakeRestoreFailureCommand,
32+
fakeRestoreFailureCommand,
33+
fakeRestoreFailureCommand,
34+
fakeRestoreFailureCommand,
35+
fakeRestoreFailureCommand,
36+
}
37+
}
38+
2339
func GetResetTestCalls() []testutils.TestCmd {
2440
return []testutils.TestCmd{
2541
{Cmd: []string{"ipset", "list", "--name"}, PipedToCommand: true},

npm/pkg/dataplane/ipsets/testutils_windows.go

+4
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ func GetApplyIPSetsTestCalls(_, _ []*IPSetMetadata) []testutils.TestCmd {
2727
return []testutils.TestCmd{}
2828
}
2929

30+
func GetApplyIPSetsFailureTestCalls() []testutils.TestCmd {
31+
return []testutils.TestCmd{}
32+
}
33+
3034
func GetResetTestCalls() []testutils.TestCmd {
3135
return []testutils.TestCmd{}
3236
}

npm/pkg/dataplane/policies/policy.go

+9
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,15 @@ func NewNPMNetworkPolicy(netPolName, netPolNamespace string) *NPMNetworkPolicy {
4242
}
4343
}
4444

45+
func (netPol *NPMNetworkPolicy) HasCIDRRules() bool {
46+
for _, set := range netPol.RuleIPSets {
47+
if set.Metadata.Type == ipsets.CIDRBlocks {
48+
return true
49+
}
50+
}
51+
return false
52+
}
53+
4554
func (netPol *NPMNetworkPolicy) AllPodSelectorIPSets() []*ipsets.TranslatedIPSet {
4655
return append(netPol.PodSelectorIPSets, netPol.ChildPodSelectorIPSets...)
4756
}

0 commit comments

Comments
 (0)