Skip to content

Commit b22ac38

Browse files
behzad-mirvipul-21
andauthored
feat: Migration stateless cni (#2470)
* 🌈 feat: adding flags for stateless cni (#2103) feat: stateless cni * feat: create stateless cni binary for swift (#2275) * enabling CNS telemetry * CNI Telemetry enabled on CNS * Code changes for Statefull CNI Migration * Making changes to the CNI state migration code. * Make code changes for Stateless CNI migrations. * Make changes to statless CNI migration branch. * Stateless CNI migration code changes * resolving migration issue * remove cni changes * Applying changes to CNIReonciler * Addressing the comments. * Addressing the comments * addressing the latest comments * Addressing Evan's comments * Adding a MigrateSate() function to the cnireconciler --------- Co-authored-by: Vipul Singh <[email protected]>
1 parent e5b9e16 commit b22ac38

File tree

11 files changed

+204
-30
lines changed

11 files changed

+204
-30
lines changed

cni/linux.Dockerfile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@ RUN GOOS=$OS CGO_ENABLED=0 go build -a -o /go/bin/azure-vnet-telemetry -trimpath
1313
RUN GOOS=$OS CGO_ENABLED=0 go build -a -o /go/bin/azure-vnet-ipam -trimpath -ldflags "-X main.version="$VERSION"" -gcflags="-dwarflocationlists=true" cni/ipam/plugin/main.go
1414
RUN GOOS=$OS CGO_ENABLED=0 go build -a -o /go/bin/azurecni-stateless -trimpath -ldflags "-X main.version="$VERSION"" -gcflags="-dwarflocationlists=true" cni/network/stateless/main.go
1515

16+
FROM scratch as bins
17+
COPY --from=azure-vnet /go/bin/* /
18+
1619
FROM mcr.microsoft.com/cbl-mariner/base/core:2.0 AS compressor
1720
ARG OS
1821
WORKDIR /payload

cns/cnireconciler/podinfoprovider.go

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,13 @@ package cnireconciler
22

33
import (
44
"fmt"
5+
"net"
6+
"strings"
57

68
"github.com/Azure/azure-container-networking/cni/api"
79
"github.com/Azure/azure-container-networking/cni/client"
810
"github.com/Azure/azure-container-networking/cns"
11+
"github.com/Azure/azure-container-networking/cns/logger"
912
"github.com/Azure/azure-container-networking/cns/restserver"
1013
"github.com/Azure/azure-container-networking/store"
1114
"github.com/pkg/errors"
@@ -101,3 +104,57 @@ func endpointStateToPodInfoByIP(state map[string]*restserver.EndpointInfo) (map[
101104
}
102105
return podInfoByIP, nil
103106
}
107+
108+
// MigrateCNISate returns an endpoint state of CNS by reading the CNI state file
109+
func MigrateCNISate() (map[string]*restserver.EndpointInfo, error) {
110+
return migrateCNISate(exec.New())
111+
}
112+
113+
func migrateCNISate(exc exec.Interface) (map[string]*restserver.EndpointInfo, error) {
114+
cli := client.New(exc)
115+
state, err := cli.GetEndpointState()
116+
if err != nil {
117+
return nil, fmt.Errorf("failed to invoke CNI client.GetEndpointState(): %w", err)
118+
}
119+
endpointState := cniStateToCnsEndpointState(state)
120+
return endpointState, nil
121+
}
122+
123+
// cniStateToCnsEndpointState converts an AzureCNIState dumped from a CNI exec
124+
// into a EndpointInfo map, using the containerID as keys in the map.
125+
// The map then will be saved on CNS endpoint state
126+
func cniStateToCnsEndpointState(state *api.AzureCNIState) map[string]*restserver.EndpointInfo {
127+
logger.Printf("Generating CNS Endpoint State")
128+
endpointState := map[string]*restserver.EndpointInfo{}
129+
for epID, endpoint := range state.ContainerInterfaces {
130+
endpointInfo := &restserver.EndpointInfo{PodName: endpoint.PodName, PodNamespace: endpoint.PodNamespace, IfnameToIPMap: make(map[string]*restserver.IPInfo)}
131+
ipInfo := &restserver.IPInfo{}
132+
for _, epIP := range endpoint.IPAddresses {
133+
if epIP.IP.To4() == nil { // is an ipv6 address
134+
ipconfig := net.IPNet{IP: epIP.IP, Mask: epIP.Mask}
135+
ipInfo.IPv6 = append(ipInfo.IPv6, ipconfig)
136+
137+
} else {
138+
ipconfig := net.IPNet{IP: epIP.IP, Mask: epIP.Mask}
139+
ipInfo.IPv4 = append(ipInfo.IPv4, ipconfig)
140+
}
141+
}
142+
endpointID, Ifname := extractEndpointInfo(epID, endpoint.ContainerID)
143+
endpointInfo.IfnameToIPMap[Ifname] = ipInfo
144+
endpointState[endpointID] = endpointInfo
145+
logger.Printf("CNS endpoint state extracted from CNI: [%+v]", *endpointInfo)
146+
}
147+
return endpointState
148+
}
149+
150+
// extractEndpointInfo extract Interface Name and endpointID for each endpoint based the CNI state
151+
func extractEndpointInfo(epID, containerID string) (endpointID, interfaceName string) {
152+
ifName := restserver.InterfaceName
153+
if strings.Contains(epID, "-eth") {
154+
ifName = epID[len(epID)-4:]
155+
}
156+
if containerID == "" {
157+
return epID, ifName
158+
}
159+
return containerID, ifName
160+
}

cns/cnireconciler/podinfoprovider_test.go

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,39 @@ func TestNewCNIPodInfoProvider(t *testing.T) {
3131
{
3232
name: "good",
3333
exec: newCNIStateFakeExec(
34-
`{"ContainerInterfaces":{"3f813b02-eth0":{"PodName":"metrics-server-77c8679d7d-6ksdh","PodNamespace":"kube-system","PodEndpointID":"3f813b02-eth0","ContainerID":"3f813b029429b4e41a09ab33b6f6d365d2ed704017524c78d1d0dece33cdaf46","IPAddresses":[{"IP":"10.241.0.17","Mask":"//8AAA=="}]},"6e688597-eth0":{"PodName":"tunnelfront-5d96f9b987-65xbn","PodNamespace":"kube-system","PodEndpointID":"6e688597-eth0","ContainerID":"6e688597eafb97c83c84e402cc72b299bfb8aeb02021e4c99307a037352c0bed","IPAddresses":[{"IP":"10.241.0.13","Mask":"//8AAA=="}]}}}`,
34+
`{"ContainerInterfaces":{"3f813b02-eth0":{"PodName":"metrics-server-77c8679d7d-6ksdh","IfName":"eth0",
35+
"PodNamespace":"kube-system","PodEndpointID":"3f813b02-eth0",
36+
"ContainerID":"3f813b029429b4e41a09ab33b6f6d365d2ed704017524c78d1d0dece33cdaf46",
37+
"IPAddresses":[{"IP":"10.241.0.17","Mask":"//8AAA=="}]},
38+
"6e688597-eth0":{"PodName":"tunnelfront-5d96f9b987-65xbn","IfName":"eth0","PodNamespace":"kube-system",
39+
"PodEndpointID":"6e688597-eth0","ContainerID":"6e688597eafb97c83c84e402cc72b299bfb8aeb02021e4c99307a037352c0bed",
40+
"IPAddresses":[{"IP":"10.241.0.13","Mask":"//8AAA=="}]}}}`,
3541
),
3642
want: map[string]cns.PodInfo{
3743
"10.241.0.13": cns.NewPodInfo("6e688597eafb97c83c84e402cc72b299bfb8aeb02021e4c99307a037352c0bed", "6e688597-eth0", "tunnelfront-5d96f9b987-65xbn", "kube-system"),
3844
"10.241.0.17": cns.NewPodInfo("3f813b029429b4e41a09ab33b6f6d365d2ed704017524c78d1d0dece33cdaf46", "3f813b02-eth0", "metrics-server-77c8679d7d-6ksdh", "kube-system"),
3945
},
4046
wantErr: false,
4147
},
48+
{
49+
name: "dualstack",
50+
exec: newCNIStateFakeExec(
51+
`{"ContainerInterfaces":{"3f813b02-eth0":{"PodName":"metrics-server-77c8679d7d-6ksdh","IfName":"eth0",
52+
"PodNamespace":"kube-system","PodEndpointID":"3f813b02-eth0",
53+
"ContainerID":"3f813b029429b4e41a09ab33b6f6d365d2ed704017524c78d1d0dece33cdaf46",
54+
"IPAddresses":[{"IP":"10.241.0.17","Mask":"//8AAA=="},{"IP":"2001:0db8:abcd:0015::0","Mask":"//8AAA=="}]},
55+
"6e688597-eth0":{"PodName":"tunnelfront-5d96f9b987-65xbn","IfName":"eth0","PodNamespace":"kube-system",
56+
"PodEndpointID":"6e688597-eth0","ContainerID":"6e688597eafb97c83c84e402cc72b299bfb8aeb02021e4c99307a037352c0bed",
57+
"IPAddresses":[{"IP":"10.241.0.13","Mask":"//8AAA=="},{"IP":"2001:0db8:abcd:0014::0","Mask":"//8AAA=="}]}}}`,
58+
),
59+
want: map[string]cns.PodInfo{
60+
"2001:db8:abcd:15::": cns.NewPodInfo("3f813b029429b4e41a09ab33b6f6d365d2ed704017524c78d1d0dece33cdaf46", "3f813b02-eth0", "metrics-server-77c8679d7d-6ksdh", "kube-system"),
61+
"2001:db8:abcd:14::": cns.NewPodInfo("6e688597eafb97c83c84e402cc72b299bfb8aeb02021e4c99307a037352c0bed", "6e688597-eth0", "tunnelfront-5d96f9b987-65xbn", "kube-system"),
62+
"10.241.0.17": cns.NewPodInfo("3f813b029429b4e41a09ab33b6f6d365d2ed704017524c78d1d0dece33cdaf46", "3f813b02-eth0", "metrics-server-77c8679d7d-6ksdh", "kube-system"),
63+
"10.241.0.13": cns.NewPodInfo("6e688597eafb97c83c84e402cc72b299bfb8aeb02021e4c99307a037352c0bed", "6e688597-eth0", "tunnelfront-5d96f9b987-65xbn", "kube-system"),
64+
},
65+
wantErr: false,
66+
},
4267
{
4368
name: "empty CNI response",
4469
exec: newCNIStateFakeExec(

cns/configuration/configuration.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ type CNSConfig struct {
3535
EnableCNIConflistGeneration bool
3636
EnableIPAMv2 bool
3737
EnablePprof bool
38+
EnableStateMigration bool
3839
EnableSubnetScarcity bool
3940
EnableSwiftV2 bool
4041
InitializeFromCNI bool

cns/restserver/ipam.go

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,11 @@ var (
2929
ErrEndpointStateNotFound = errors.New("endpoint state could not be found in the statefile")
3030
)
3131

32+
const (
33+
ContainerIDLength = 8
34+
InterfaceName = "eth0"
35+
)
36+
3237
// requestIPConfigHandlerHelper validates the request, assign IPs and return the IPConfigs
3338
func (service *HTTPRestService) requestIPConfigHandlerHelper(ctx context.Context, ipconfigsRequest cns.IPConfigsRequest) (*cns.IPConfigsResponse, error) {
3439
// For SWIFT v2 scenario, the validator function will also modify the ipconfigsRequest.
@@ -1008,9 +1013,9 @@ func (service *HTTPRestService) EndpointHandlerAPI(w http.ResponseWriter, r *htt
10081013
// GetEndpointHandler handles the incoming GetEndpoint requests with http Get method
10091014
func (service *HTTPRestService) GetEndpointHandler(w http.ResponseWriter, r *http.Request) {
10101015
logger.Printf("[GetEndpointState] GetEndpoint for %s", r.URL.Path)
1011-
10121016
endpointID := strings.TrimPrefix(r.URL.Path, cns.EndpointPath)
10131017
endpointInfo, err := service.GetEndpointHelper(endpointID)
1018+
// Check if the request is valid
10141019
if err != nil {
10151020
response := GetEndpointResponse{
10161021
Response: Response{
@@ -1068,6 +1073,14 @@ func (service *HTTPRestService) GetEndpointHelper(endpointID string) (*EndpointI
10681073
logger.Warnf("[GetEndpointState] Found existing endpoint state for container %s", endpointID)
10691074
return endpointInfo, nil
10701075
}
1076+
// This part is a temprory fix if we have endpoint states belong to CNI version 1.4.X on Windows since the states don't have the containerID
1077+
// In case there was no endpoint founded with ContainerID as the key,
1078+
// then [First 8 character of containerid]-eth0 will be tried
1079+
legacyEndpointID := endpointID[:ContainerIDLength] + "-" + InterfaceName
1080+
if endpointInfo, ok := service.EndpointState[legacyEndpointID]; ok {
1081+
logger.Warnf("[GetEndpointState] Found existing endpoint state for container %s", legacyEndpointID)
1082+
return endpointInfo, nil
1083+
}
10711084
return nil, ErrEndpointStateNotFound
10721085
}
10731086

cns/service/main.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1230,6 +1230,13 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn
12301230
}
12311231
}
12321232

1233+
// perform state migration from CNI in case CNS is set to manage the endpoint state and has emty state
1234+
if cnsconfig.EnableStateMigration && !httpRestServiceImplementation.EndpointStateStore.Exists() {
1235+
if err = PopulateCNSEndpointState(httpRestServiceImplementation.EndpointStateStore); err != nil {
1236+
return errors.Wrap(err, "failed to create CNS EndpointState From CNI")
1237+
}
1238+
}
1239+
12331240
var podInfoByIPProvider cns.PodInfoByIPProvider
12341241
switch {
12351242
case cnsconfig.ManageEndpointState:
@@ -1516,3 +1523,17 @@ func createOrUpdateNodeInfoCRD(ctx context.Context, restConfig *rest.Config, nod
15161523

15171524
return nil
15181525
}
1526+
1527+
// PopulateCNSEndpointState initilizes CNS Endpoint State by Migrating the CNI state.
1528+
func PopulateCNSEndpointState(endpointStateStore store.KeyValueStore) error {
1529+
logger.Printf("State Migration is enabled")
1530+
endpointState, err := cnireconciler.MigrateCNISate()
1531+
if err != nil {
1532+
return errors.Wrap(err, "failed to create CNS Endpoint state from CNI")
1533+
}
1534+
err = endpointStateStore.Write(restserver.EndpointStoreKey, endpointState)
1535+
if err != nil {
1536+
return fmt.Errorf("failed to write endpoint state to store: %w", err)
1537+
}
1538+
return nil
1539+
}

network/endpoint.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ type EndpointInfo struct {
9292
NICType cns.NICType
9393
SkipDefaultRoutes bool
9494
HNSEndpointID string
95+
HostIfName string
9596
}
9697

9798
// RouteInfo contains information about an IP route.
@@ -267,6 +268,8 @@ func (ep *endpoint) getInfo() *EndpointInfo {
267268
PODName: ep.PODName,
268269
PODNameSpace: ep.PODNameSpace,
269270
NetworkContainerID: ep.NetworkContainerID,
271+
HNSEndpointID: ep.HnsId,
272+
HostIfName: ep.HostIfName,
270273
}
271274

272275
info.Routes = append(info.Routes, ep.Routes...)
@@ -350,3 +353,11 @@ func GetPodNameWithoutSuffix(podName string) string {
350353
logger.Info("Pod name after splitting based on", zap.Any("nameSplit", nameSplit))
351354
return strings.Join(nameSplit, "-")
352355
}
356+
357+
// IsEndpointStateInComplete returns true if both HNSEndpointID and HostVethName are missing.
358+
func (epInfo *EndpointInfo) IsEndpointStateIncomplete() bool {
359+
if epInfo.HNSEndpointID == "" && epInfo.IfName == "" {
360+
return true
361+
}
362+
return false
363+
}

network/endpoint_linux.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -531,3 +531,9 @@ func getDefaultGateway(routes []RouteInfo) net.IP {
531531

532532
return nil
533533
}
534+
535+
// GetEndpointInfoByIPImpl returns an endpointInfo that contains corresponding HostVethName.
536+
// TODO: It needs to be tested to see if HostVethName is required for SingleTenancy, WorkItem: 26606939
537+
func (epInfo *EndpointInfo) GetEndpointInfoByIPImpl(_ []net.IPNet, _ string) (*EndpointInfo, error) {
538+
return epInfo, nil
539+
}

network/endpoint_windows.go

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import (
1515
"github.com/Azure/azure-container-networking/platform"
1616
"github.com/Microsoft/hcsshim"
1717
"github.com/Microsoft/hcsshim/hcn"
18+
"github.com/pkg/errors"
1819
"go.uber.org/zap"
1920
)
2021

@@ -495,3 +496,28 @@ func (ep *endpoint) getInfoImpl(epInfo *EndpointInfo) {
495496
func (nm *networkManager) updateEndpointImpl(nw *network, existingEpInfo *EndpointInfo, targetEpInfo *EndpointInfo) (*endpoint, error) {
496497
return nil, nil
497498
}
499+
500+
// GetEndpointInfoByIPImpl returns an endpointInfo with the corrsponding HNS Endpoint ID that matches an specific IP Address.
501+
func (epInfo *EndpointInfo) GetEndpointInfoByIPImpl(ipAddresses []net.IPNet, networkID string) (*EndpointInfo, error) {
502+
// check if network exists, only create the network does not exist
503+
hnsResponse, err := Hnsv2.GetNetworkByName(networkID)
504+
if err != nil {
505+
return epInfo, errors.Wrapf(err, "HNS Network not found")
506+
}
507+
hcnEndpoints, err := Hnsv2.ListEndpointsOfNetwork(hnsResponse.Id)
508+
if err != nil {
509+
return epInfo, errors.Wrapf(err, "failed to fetch HNS endpoints for the given network")
510+
}
511+
for i := range hcnEndpoints {
512+
for _, ipConfiguration := range hcnEndpoints[i].IpConfigurations {
513+
for _, ipAddress := range ipAddresses {
514+
prefixLength, _ := ipAddress.Mask.Size()
515+
if ipConfiguration.IpAddress == ipAddress.IP.String() && ipConfiguration.PrefixLength == uint8(prefixLength) {
516+
epInfo.HNSEndpointID = hcnEndpoints[i].Id
517+
return epInfo, nil
518+
}
519+
}
520+
}
521+
}
522+
return epInfo, errors.Wrapf(err, "No HNSEndpointID matches the IPAddress: "+ipAddresses[0].IP.String())
523+
}

network/manager.go

Lines changed: 38 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -420,6 +420,39 @@ func (nm *networkManager) UpdateEndpointState(ep *endpoint) error {
420420
return nil
421421
}
422422

423+
// GetEndpointState will make a call to CNS GetEndpointState API in the stateless CNI mode to fetch the endpointInfo
424+
// TODO unit tests need to be added, WorkItem: 26606939
425+
func (nm *networkManager) GetEndpointState(networkID, endpointID string) (*EndpointInfo, error) {
426+
endpointResponse, err := nm.CnsClient.GetEndpoint(context.TODO(), endpointID)
427+
if err != nil {
428+
return nil, errors.Wrapf(err, "Get endpoint API returend with error")
429+
}
430+
epInfo := &EndpointInfo{
431+
Id: endpointID,
432+
IfIndex: EndpointIfIndex, // Azure CNI supports only one interface
433+
IfName: endpointResponse.EndpointInfo.HostVethName,
434+
ContainerID: endpointID,
435+
PODName: endpointResponse.EndpointInfo.PodName,
436+
PODNameSpace: endpointResponse.EndpointInfo.PodNamespace,
437+
NetworkContainerID: endpointID,
438+
HNSEndpointID: endpointResponse.EndpointInfo.HnsEndpointID,
439+
}
440+
441+
for _, ip := range endpointResponse.EndpointInfo.IfnameToIPMap {
442+
epInfo.IPAddresses = ip.IPv4
443+
epInfo.IPAddresses = append(epInfo.IPAddresses, ip.IPv6...)
444+
445+
}
446+
if epInfo.IsEndpointStateIncomplete() {
447+
epInfo, err = epInfo.GetEndpointInfoByIPImpl(epInfo.IPAddresses, networkID)
448+
if err != nil {
449+
return nil, errors.Wrapf(err, "Get endpoint API returend with error")
450+
}
451+
}
452+
logger.Info("returning getEndpoint API with", zap.String("Endpoint Info: ", epInfo.PrettyString()), zap.String("HNISID : ", epInfo.HNSEndpointID))
453+
return epInfo, nil
454+
}
455+
423456
// DeleteEndpoint deletes an existing container endpoint.
424457
func (nm *networkManager) DeleteEndpoint(networkID, endpointID string, epInfo *EndpointInfo) error {
425458
nm.Lock()
@@ -475,45 +508,23 @@ func (nm *networkManager) DeleteEndpointState(networkID string, epInfo *Endpoint
475508
}
476509

477510
// GetEndpointInfo returns information about the given endpoint.
478-
func (nm *networkManager) GetEndpointInfo(networkId string, endpointId string) (*EndpointInfo, error) {
511+
func (nm *networkManager) GetEndpointInfo(networkID, endpointID string) (*EndpointInfo, error) {
479512
nm.Lock()
480513
defer nm.Unlock()
481514

482515
if nm.IsStatelessCNIMode() {
483516
logger.Info("calling cns getEndpoint API")
484-
endpointResponse, err := nm.CnsClient.GetEndpoint(context.TODO(), endpointId)
485-
if err != nil {
486-
return nil, errors.Wrapf(err, "Get endpoint API returend with error")
487-
}
488-
if endpointResponse.EndpointInfo.HnsEndpointID == "" && endpointResponse.EndpointInfo.HostVethName == "" {
489-
return nil, errors.New("Get endpoint API returend with empty HNSEndpointID and HostVethName")
490-
}
491-
epInfo := &EndpointInfo{
492-
Id: endpointId,
493-
IfIndex: EndpointIfIndex, // Azure CNI supports only one interface
494-
IfName: endpointResponse.EndpointInfo.HostVethName,
495-
ContainerID: endpointId,
496-
PODName: endpointResponse.EndpointInfo.PodName,
497-
PODNameSpace: endpointResponse.EndpointInfo.PodNamespace,
498-
NetworkContainerID: endpointId,
499-
HNSEndpointID: endpointResponse.EndpointInfo.HnsEndpointID,
500-
}
517+
epInfo, err := nm.GetEndpointState(networkID, endpointID)
501518

502-
for _, ip := range endpointResponse.EndpointInfo.IfnameToIPMap {
503-
epInfo.IPAddresses = ip.IPv4
504-
epInfo.IPAddresses = append(epInfo.IPAddresses, ip.IPv6...)
505-
506-
}
507-
logger.Info("returning getEndpoint API with", zap.String("Endpoint Info: ", epInfo.PrettyString()), zap.String("HNISID : ", epInfo.HNSEndpointID))
508-
return epInfo, nil
519+
return epInfo, err
509520
}
510521

511-
nw, err := nm.getNetwork(networkId)
522+
nw, err := nm.getNetwork(networkID)
512523
if err != nil {
513524
return nil, err
514525
}
515526

516-
ep, err := nw.getEndpoint(endpointId)
527+
ep, err := nw.getEndpoint(endpointID)
517528
if err != nil {
518529
return nil, err
519530
}

0 commit comments

Comments
 (0)