4
4
"errors"
5
5
"fmt"
6
6
"strings"
7
+ "sync"
7
8
"time"
8
9
9
10
"github.com/Azure/azure-container-networking/common"
@@ -18,12 +19,12 @@ import (
18
19
const (
19
20
reconcileDuration = time .Duration (5 * time .Minute )
20
21
21
- contextBackground = "BACKGROUND"
22
- contextApplyDP = "APPLY-DP"
23
- contextAddNetPol = "ADD-NETPOL"
24
- contextAddNetPolBootup = "BOOTUP-ADD-NETPOL"
25
- contextAddNetPolCIDRPrecaution = "ADD-NETPOL-CIDR -PRECAUTION"
26
- contextDelNetPol = "DEL-NETPOL"
22
+ contextBackground = "BACKGROUND"
23
+ contextApplyDP = "APPLY-DP"
24
+ contextAddNetPol = "ADD-NETPOL"
25
+ contextAddNetPolBootup = "BOOTUP-ADD-NETPOL"
26
+ contextAddNetPolPrecaution = "ADD-NETPOL-PRECAUTION"
27
+ contextDelNetPol = "DEL-NETPOL"
27
28
)
28
29
29
30
var (
@@ -48,6 +49,11 @@ type Config struct {
48
49
* policies.PolicyManagerCfg
49
50
}
50
51
52
+ type removePolicyInfo struct {
53
+ sync.Mutex
54
+ previousRemovePolicyIPSetsFailed bool
55
+ }
56
+
51
57
type DataPlane struct {
52
58
* Config
53
59
applyInBackground bool
@@ -64,7 +70,10 @@ type DataPlane struct {
64
70
endpointQuery * endpointQuery
65
71
applyInfo * applyInfo
66
72
netPolQueue * netPolQueue
67
- stopChannel <- chan struct {}
73
+ // removePolicyInfo tracks when a policy was removed yet had ApplyIPSet failures.
74
+ // This field is only relevant for Linux.
75
+ removePolicyInfo removePolicyInfo
76
+ stopChannel <- chan struct {}
68
77
}
69
78
70
79
func NewDataPlane (nodeName string , ioShim * common.IOShim , cfg * Config , stopChannel <- chan struct {}) (* DataPlane , error ) {
@@ -335,6 +344,9 @@ func (dp *DataPlane) applyDataPlaneNow(context string) error {
335
344
}
336
345
klog .Infof ("[DataPlane] [ApplyDataPlane] [%s] finished applying ipsets" , context )
337
346
347
+ // see comment in RemovePolicy() for why this is here
348
+ dp .setRemovePolicyFailure (false )
349
+
338
350
if dp .applyInBackground {
339
351
dp .applyInfo .Lock ()
340
352
dp .applyInfo .numBatches = 0
@@ -472,26 +484,17 @@ func (dp *DataPlane) addPolicies(netPols []*policies.NPMNetworkPolicy) error {
472
484
}
473
485
}
474
486
475
- if ! util .IsWindowsDP () {
476
- for _ , netPol := range netPols {
477
- if ! (netPol .HasCIDRRules () && dp .ipsetMgr .PreviousApplyFailed ()) {
478
- continue
479
- }
480
-
481
- if inBootupPhase {
482
- // this should never happen because bootup phase is for windows, but just in case, we don't want to applyDataplaneNow() or else there will be a deadlock on dp.applyInfo
483
- msg := fmt .Sprintf ("[DataPlane] [%s] at risk of improperly applying a CIDR policy which is removed then readded" , contextAddNetPolCIDRPrecaution )
484
- klog .Warning (msg )
485
- metrics .SendErrorLogAndMetric (util .DaemonDataplaneID , msg )
486
- break
487
- }
488
-
487
+ if dp .hadRemovePolicyFailure () {
488
+ if inBootupPhase {
489
+ // this should never happen because bootup phase is for windows, but just in case, we don't want to applyDataplaneNow() or else there will be a deadlock on dp.applyInfo
490
+ msg := fmt .Sprintf ("[DataPlane] [%s] at risk of improperly applying a policy which is removed then readded" , contextAddNetPolPrecaution )
491
+ klog .Warning (msg )
492
+ metrics .SendErrorLogAndMetric (util .DaemonDataplaneID , msg )
493
+ } else {
489
494
// prevent #2977
490
- if err := dp .applyDataPlaneNow (contextAddNetPolCIDRPrecaution ); err != nil {
495
+ if err := dp .applyDataPlaneNow (contextAddNetPolPrecaution ); err != nil {
491
496
return err // nolint:wrapcheck // unnecessary to wrap error since the provided context is included in the error
492
497
}
493
-
494
- break
495
498
}
496
499
}
497
500
@@ -531,6 +534,9 @@ func (dp *DataPlane) addPolicies(netPols []*policies.NPMNetworkPolicy) error {
531
534
}
532
535
klog .Infof ("[DataPlane] [%s] finished applying ipsets" , contextAddNetPolBootup )
533
536
537
+ // see comment in RemovePolicy() for why this is here
538
+ dp .setRemovePolicyFailure (false )
539
+
534
540
dp .applyInfo .numBatches = 0
535
541
}
536
542
@@ -627,7 +633,16 @@ func (dp *DataPlane) RemovePolicy(policyKey string) error {
627
633
return err
628
634
}
629
635
630
- return dp .applyDataPlaneNow (contextApplyDP )
636
+ if err := dp .applyDataPlaneNow (contextDelNetPol ); err != nil {
637
+ // Failed to apply IPSets while removing this policy.
638
+ // Consider this removepolicy call a failure until apply IPSets is successful.
639
+ // Related to #2977
640
+ klog .Info ("[DataPlane] remove policy has failed to apply ipsets. setting remove policy failure" )
641
+ dp .setRemovePolicyFailure (true )
642
+ return err // nolint:wrapcheck // unnecessary to wrap error since the provided context is included in the error
643
+ }
644
+
645
+ return nil
631
646
}
632
647
633
648
// UpdatePolicy takes in updated policy object, calculates the delta and applies changes
@@ -749,3 +764,23 @@ func (dp *DataPlane) deleteIPSetsAndReferences(sets []*ipsets.TranslatedIPSet, n
749
764
}
750
765
return nil
751
766
}
767
+
768
+ func (dp * DataPlane ) setRemovePolicyFailure (failed bool ) {
769
+ if util .IsWindowsDP () {
770
+ return
771
+ }
772
+
773
+ dp .removePolicyInfo .Lock ()
774
+ defer dp .removePolicyInfo .Unlock ()
775
+ dp .removePolicyInfo .previousRemovePolicyIPSetsFailed = failed
776
+ }
777
+
778
+ func (dp * DataPlane ) hadRemovePolicyFailure () bool {
779
+ if util .IsWindowsDP () {
780
+ return false
781
+ }
782
+
783
+ dp .removePolicyInfo .Lock ()
784
+ defer dp .removePolicyInfo .Unlock ()
785
+ return dp .removePolicyInfo .previousRemovePolicyIPSetsFailed
786
+ }
0 commit comments