Skip to content

Commit 654fad3

Browse files
[backport] fix: [Linux] [NPM] handle #2977 for netpols without cidr (#3006)
[backport] fix: [Linux] [NPM] handle #2977 for netpols without cidrs (#2990) * fix: [Linux] [NPM] handle #2977 for netpols without cidrs * fix: lock and no need to track policy key * style: remove dead code --------- Signed-off-by: Hunter Gregory <[email protected]>
1 parent 609e6ee commit 654fad3

File tree

3 files changed

+62
-32
lines changed

3 files changed

+62
-32
lines changed

Diff for: .gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
vendor/
2+
13
# Binaries
24
out/*
35
output/*

Diff for: npm/pkg/dataplane/dataplane.go

+60-25
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"errors"
55
"fmt"
66
"strings"
7+
"sync"
78
"time"
89

910
"github.com/Azure/azure-container-networking/common"
@@ -18,12 +19,12 @@ import (
1819
const (
1920
reconcileDuration = time.Duration(5 * time.Minute)
2021

21-
contextBackground = "BACKGROUND"
22-
contextApplyDP = "APPLY-DP"
23-
contextAddNetPol = "ADD-NETPOL"
24-
contextAddNetPolBootup = "BOOTUP-ADD-NETPOL"
25-
contextAddNetPolCIDRPrecaution = "ADD-NETPOL-CIDR-PRECAUTION"
26-
contextDelNetPol = "DEL-NETPOL"
22+
contextBackground = "BACKGROUND"
23+
contextApplyDP = "APPLY-DP"
24+
contextAddNetPol = "ADD-NETPOL"
25+
contextAddNetPolBootup = "BOOTUP-ADD-NETPOL"
26+
contextAddNetPolPrecaution = "ADD-NETPOL-PRECAUTION"
27+
contextDelNetPol = "DEL-NETPOL"
2728
)
2829

2930
var (
@@ -48,6 +49,11 @@ type Config struct {
4849
*policies.PolicyManagerCfg
4950
}
5051

52+
type removePolicyInfo struct {
53+
sync.Mutex
54+
previousRemovePolicyIPSetsFailed bool
55+
}
56+
5157
type DataPlane struct {
5258
*Config
5359
applyInBackground bool
@@ -64,7 +70,10 @@ type DataPlane struct {
6470
endpointQuery *endpointQuery
6571
applyInfo *applyInfo
6672
netPolQueue *netPolQueue
67-
stopChannel <-chan struct{}
73+
// removePolicyInfo tracks when a policy was removed yet had ApplyIPSet failures.
74+
// This field is only relevant for Linux.
75+
removePolicyInfo removePolicyInfo
76+
stopChannel <-chan struct{}
6877
}
6978

7079
func NewDataPlane(nodeName string, ioShim *common.IOShim, cfg *Config, stopChannel <-chan struct{}) (*DataPlane, error) {
@@ -335,6 +344,9 @@ func (dp *DataPlane) applyDataPlaneNow(context string) error {
335344
}
336345
klog.Infof("[DataPlane] [ApplyDataPlane] [%s] finished applying ipsets", context)
337346

347+
// see comment in RemovePolicy() for why this is here
348+
dp.setRemovePolicyFailure(false)
349+
338350
if dp.applyInBackground {
339351
dp.applyInfo.Lock()
340352
dp.applyInfo.numBatches = 0
@@ -472,26 +484,17 @@ func (dp *DataPlane) addPolicies(netPols []*policies.NPMNetworkPolicy) error {
472484
}
473485
}
474486

475-
if !util.IsWindowsDP() {
476-
for _, netPol := range netPols {
477-
if !(netPol.HasCIDRRules() && dp.ipsetMgr.PreviousApplyFailed()) {
478-
continue
479-
}
480-
481-
if inBootupPhase {
482-
// this should never happen because bootup phase is for windows, but just in case, we don't want to applyDataplaneNow() or else there will be a deadlock on dp.applyInfo
483-
msg := fmt.Sprintf("[DataPlane] [%s] at risk of improperly applying a CIDR policy which is removed then readded", contextAddNetPolCIDRPrecaution)
484-
klog.Warning(msg)
485-
metrics.SendErrorLogAndMetric(util.DaemonDataplaneID, msg)
486-
break
487-
}
488-
487+
if dp.hadRemovePolicyFailure() {
488+
if inBootupPhase {
489+
// this should never happen because bootup phase is for windows, but just in case, we don't want to applyDataplaneNow() or else there will be a deadlock on dp.applyInfo
490+
msg := fmt.Sprintf("[DataPlane] [%s] at risk of improperly applying a policy which is removed then readded", contextAddNetPolPrecaution)
491+
klog.Warning(msg)
492+
metrics.SendErrorLogAndMetric(util.DaemonDataplaneID, msg)
493+
} else {
489494
// prevent #2977
490-
if err := dp.applyDataPlaneNow(contextAddNetPolCIDRPrecaution); err != nil {
495+
if err := dp.applyDataPlaneNow(contextAddNetPolPrecaution); err != nil {
491496
return err // nolint:wrapcheck // unnecessary to wrap error since the provided context is included in the error
492497
}
493-
494-
break
495498
}
496499
}
497500

@@ -531,6 +534,9 @@ func (dp *DataPlane) addPolicies(netPols []*policies.NPMNetworkPolicy) error {
531534
}
532535
klog.Infof("[DataPlane] [%s] finished applying ipsets", contextAddNetPolBootup)
533536

537+
// see comment in RemovePolicy() for why this is here
538+
dp.setRemovePolicyFailure(false)
539+
534540
dp.applyInfo.numBatches = 0
535541
}
536542

@@ -627,7 +633,16 @@ func (dp *DataPlane) RemovePolicy(policyKey string) error {
627633
return err
628634
}
629635

630-
return dp.applyDataPlaneNow(contextApplyDP)
636+
if err := dp.applyDataPlaneNow(contextDelNetPol); err != nil {
637+
// Failed to apply IPSets while removing this policy.
638+
// Consider this removepolicy call a failure until apply IPSets is successful.
639+
// Related to #2977
640+
klog.Info("[DataPlane] remove policy has failed to apply ipsets. setting remove policy failure")
641+
dp.setRemovePolicyFailure(true)
642+
return err // nolint:wrapcheck // unnecessary to wrap error since the provided context is included in the error
643+
}
644+
645+
return nil
631646
}
632647

633648
// UpdatePolicy takes in updated policy object, calculates the delta and applies changes
@@ -749,3 +764,23 @@ func (dp *DataPlane) deleteIPSetsAndReferences(sets []*ipsets.TranslatedIPSet, n
749764
}
750765
return nil
751766
}
767+
768+
func (dp *DataPlane) setRemovePolicyFailure(failed bool) {
769+
if util.IsWindowsDP() {
770+
return
771+
}
772+
773+
dp.removePolicyInfo.Lock()
774+
defer dp.removePolicyInfo.Unlock()
775+
dp.removePolicyInfo.previousRemovePolicyIPSetsFailed = failed
776+
}
777+
778+
func (dp *DataPlane) hadRemovePolicyFailure() bool {
779+
if util.IsWindowsDP() {
780+
return false
781+
}
782+
783+
dp.removePolicyInfo.Lock()
784+
defer dp.removePolicyInfo.Unlock()
785+
return dp.removePolicyInfo.previousRemovePolicyIPSetsFailed
786+
}

Diff for: npm/pkg/dataplane/ipsets/ipsetmanager.go

-7
Original file line numberDiff line numberDiff line change
@@ -79,13 +79,6 @@ func NewIPSetManager(iMgrCfg *IPSetManagerCfg, ioShim *common.IOShim) *IPSetMana
7979
}
8080
}
8181

82-
// PreviousApplyFailed is only relevant for Linux right now since Windows doesn't track failures
83-
func (iMgr *IPSetManager) PreviousApplyFailed() bool {
84-
iMgr.Lock()
85-
defer iMgr.Unlock()
86-
return iMgr.consecutiveApplyFailures > 0
87-
}
88-
8982
/*
9083
Reconcile removes empty/unreferenced sets from the cache.
9184
For ApplyAllIPSets mode, those sets are added to the toDeleteCache.

0 commit comments

Comments
 (0)