Skip to content

Commit b84dbfd

Browse files
[backport] [NPM] Backport NPM Changes for v1.5.41 (#3344)
* test: [NPM] fix windows unit test for policymanager (#3161) * test: fix windows unit test for policymanager Signed-off-by: Hunter Gregory <[email protected]> * fix(test): flip bool Signed-off-by: Hunter Gregory <[email protected]> --------- Signed-off-by: Hunter Gregory <[email protected]> * fix: [NPM] close telemetry handler before crashing (#3333) fix: close telemetry handler before crashing Signed-off-by: Hunter Gregory <[email protected]> * fix(log): time waiting for appinsights to close was unbounded (#3337) * fix: time waiting for appinsights to close was unbounded Signed-off-by: Hunter Gregory <[email protected]> * fix: close timer in case it hasn't fired yet Signed-off-by: Hunter Gregory <[email protected]> --------- Signed-off-by: Hunter Gregory <[email protected]> * resolved merge conflict as there isnt npm lite in v1.5 --------- Signed-off-by: Hunter Gregory <[email protected]> Co-authored-by: Hunter Gregory <[email protected]>
1 parent 269fc2d commit b84dbfd

File tree

5 files changed

+54
-8
lines changed

5 files changed

+54
-8
lines changed

aitelemetry/telemetrywrapper.go

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ const (
2727
azurePublicCloudStr = "AzurePublicCloud"
2828
hostNameKey = "hostname"
2929
defaultTimeout = 10
30+
maxCloseTimeoutInSeconds = 30
3031
defaultBatchIntervalInSecs = 15
3132
defaultBatchSizeInBytes = 32768
3233
defaultGetEnvRetryCount = 5
@@ -330,8 +331,35 @@ func (th *telemetryHandle) Close(timeout int) {
330331
timeout = defaultTimeout
331332
}
332333

334+
// max wait is the minimum of the timeout and maxCloseTimeoutInSeconds
335+
maxWaitTimeInSeconds := timeout
336+
if maxWaitTimeInSeconds < maxCloseTimeoutInSeconds {
337+
maxWaitTimeInSeconds = maxCloseTimeoutInSeconds
338+
}
339+
333340
// wait for items to be sent otherwise timeout
334-
<-th.client.Channel().Close(time.Duration(timeout) * time.Second)
341+
// similar to the example in the appinsights-go repo: https://github.com/microsoft/ApplicationInsights-Go#shutdown
342+
timer := time.NewTimer(time.Duration(maxWaitTimeInSeconds) * time.Second)
343+
defer timer.Stop()
344+
select {
345+
case <-th.client.Channel().Close(time.Duration(timeout) * time.Second):
346+
// timeout specified for retries.
347+
348+
// If we got here, then all telemetry was submitted
349+
// successfully, and we can proceed to exiting.
350+
351+
case <-timer.C:
352+
// absolute timeout. This covers any
353+
// previous telemetry submission that may not have
354+
// completed before Close was called.
355+
356+
// There are a number of reasons we could have
357+
// reached here. We gave it a go, but telemetry
358+
// submission failed somewhere. Perhaps old events
359+
// were still retrying, or perhaps we're throttled.
360+
// Either way, we don't want to wait around for it
361+
// to complete, so let's just exit.
362+
}
335363

336364
// Remove diganostic message listener
337365
if th.diagListener != nil {

npm/cmd/start.go

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,10 @@ func newStartNPMCmd() *cobra.Command {
5656
KubeConfigPath: viper.GetString(flagKubeConfigPath),
5757
}
5858

59-
return start(*config, flags)
59+
// start is blocking, unless there's an error
60+
err = start(*config, flags)
61+
metrics.Close()
62+
return err
6063
},
6164
}
6265

@@ -117,7 +120,10 @@ func start(config npmconfig.Config, flags npmconfig.Flags) error {
117120
klog.Infof("Resync period for NPM pod is set to %d.", int(resyncPeriod/time.Minute))
118121
factory := informers.NewSharedInformerFactory(clientset, resyncPeriod)
119122

120-
k8sServerVersion := k8sServerVersion(clientset)
123+
err = metrics.CreateTelemetryHandle(config.NPMVersion(), version, npm.GetAIMetadata())
124+
if err != nil {
125+
klog.Infof("CreateTelemetryHandle failed with error %v. AITelemetry is not initialized.", err)
126+
}
121127

122128
var dp dataplane.GenericDataplane
123129
stopChannel := wait.NeverStop
@@ -181,11 +187,9 @@ func start(config npmconfig.Config, flags npmconfig.Flags) error {
181187
}
182188
dp.RunPeriodicTasks()
183189
}
190+
191+
k8sServerVersion := k8sServerVersion(clientset)
184192
npMgr := npm.NewNetworkPolicyManager(config, factory, dp, exec.New(), version, k8sServerVersion)
185-
err = metrics.CreateTelemetryHandle(config.NPMVersion(), version, npm.GetAIMetadata())
186-
if err != nil {
187-
klog.Infof("CreateTelemetryHandle failed with error %v. AITelemetry is not initialized.", err)
188-
}
189193

190194
go restserver.NPMRestServerListenAndServe(config, npMgr)
191195

npm/metrics/ai-utils.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ import (
1111
"k8s.io/klog"
1212
)
1313

14+
const telemetryCloseWaitTimeSeconds = 10
15+
1416
var (
1517
th aitelemetry.TelemetryHandle
1618
npmVersion int
@@ -54,6 +56,15 @@ func CreateTelemetryHandle(npmVersionNum int, imageVersion, aiMetadata string) e
5456
return nil
5557
}
5658

59+
// Close cleans up the telemetry handle, which effectively waits for all telemetry data to be sent
60+
func Close() {
61+
if th == nil {
62+
return
63+
}
64+
65+
th.Close(telemetryCloseWaitTimeSeconds)
66+
}
67+
5768
// SendErrorLogAndMetric sends a metric through AI telemetry and sends a log to the Kusto Messages table
5869
func SendErrorLogAndMetric(operationID int, format string, args ...interface{}) {
5970
// Send error metrics

npm/pkg/dataplane/ipsets/ipsetmanager_linux.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -422,6 +422,7 @@ func (iMgr *IPSetManager) applyIPSets() error {
422422
msg := fmt.Sprintf("exceeded max consecutive failures (%d) when applying ipsets. final error: %s", maxConsecutiveFailures, restoreError.Error())
423423
klog.Error(msg)
424424
metrics.SendErrorLogAndMetric(util.IpsmID, msg)
425+
metrics.Close()
425426
panic(msg)
426427
}
427428

npm/pkg/dataplane/policies/policymanager_test.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,9 @@ func TestBootup(t *testing.T) {
115115
metrics.IncNumACLRules()
116116

117117
require.NoError(t, pMgr.Bootup(epIDs))
118-
require.Equal(t, util.IptablesNft, util.Iptables)
118+
if !util.IsWindowsDP() {
119+
require.Equal(t, util.IptablesNft, util.Iptables)
120+
}
119121

120122
expectedNumACLs := 11
121123
if util.IsWindowsDP() {

0 commit comments

Comments
 (0)