Skip to content

Commit

Permalink
azure: Check public zone for stray record set
Browse files Browse the repository at this point in the history
The azure destroyer checks for all the record sets present
in the resource group and deletes them. But in the case where
the resource group is destroyed before the destroy cluster
is called, there will be stray record sets that don't get cleaned
up. This frequently occurs in the azure test environment where
the reaper deletes the resource group after sometime but fails to
clean up these stray record sets.

Adding the fix in the off chance this scenario happens in the
customer world.
  • Loading branch information
rna-afk committed Jan 23, 2025
1 parent d2bb750 commit 3d549df
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 9 deletions.
1 change: 1 addition & 0 deletions pkg/asset/cluster/azure/azure.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ func Metadata(config *types.InstallConfig) *azure.Metadata {
Region: config.Platform.Azure.Region,
ResourceGroupName: config.Azure.ResourceGroupName,
BaseDomainResourceGroupName: config.Azure.BaseDomainResourceGroupName,
ZoneName: config.BaseDomain,
}
}

Expand Down
76 changes: 67 additions & 9 deletions pkg/destroy/azure/azure.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ type ClusterUninstaller struct {
ResourceGroupName string
BaseDomainResourceGroupName string
NetworkResourceGroupName string
ZoneName string
ClusterName string

Logger logrus.FieldLogger

Expand Down Expand Up @@ -134,6 +136,8 @@ func New(logger logrus.FieldLogger, metadata *types.ClusterMetadata) (providers.
Logger: logger,
BaseDomainResourceGroupName: metadata.Azure.BaseDomainResourceGroupName,
CloudName: cloudName,
ZoneName: metadata.Azure.ZoneName,
ClusterName: metadata.ClusterName,
}, nil
}

Expand Down Expand Up @@ -193,7 +197,7 @@ func (o *ClusterUninstaller) Run() (*types.ClusterQuota, error) {
if o.CloudName == azure.StackCloud {
err = deleteAzureStackPublicRecords(ctx, o)
} else {
err = deletePublicRecords(ctx, o.zonesClient, o.recordsClient, o.privateZonesClient, o.privateRecordSetsClient, o.Logger, o.ResourceGroupName)
err = deletePublicRecords(ctx, o)
}
if err != nil {
o.Logger.Debug(err)
Expand Down Expand Up @@ -419,17 +423,71 @@ func deleteAzureStackPublicRecords(ctx context.Context, o *ClusterUninstaller) e
return utilerrors.NewAggregate(errs)
}

func deletePublicRecords(ctx context.Context, dnsClient dns.ZonesClient, recordsClient dns.RecordSetsClient, privateDNSClient privatedns.PrivateZonesClient, privateRecordsClient privatedns.RecordSetsClient, logger logrus.FieldLogger, rgName string) error {
func deleteRecordsFromBaseDomain(ctx context.Context, o *ClusterUninstaller) error {
ctx, cancel := context.WithTimeout(ctx, 10*time.Minute)
defer cancel()

if o.BaseDomainResourceGroupName == "" || o.ZoneName == "" || o.ClusterName == "" {
o.Logger.Debugf("could not find values in the metadata to get record set")
return nil
}

var errs []error
apiURL := fmt.Sprintf("api.%s", o.ClusterName)
appsURL := fmt.Sprintf("*.apps.%s", o.ClusterName)
errs = append(errs, deleteRecordsets(ctx, o, apiURL))
errs = append(errs, deleteRecordsets(ctx, o, appsURL))
return utilerrors.NewAggregate(errs)
}

func deleteRecordsets(ctx context.Context, o *ClusterUninstaller, url string) error {
var errs []error
tag := fmt.Sprintf("kubernetes.io_cluster.%s", o.InfraID)
result, err := o.recordsClient.Get(ctx, o.BaseDomainResourceGroupName, o.ZoneName, url, dns.CNAME)
if err != nil {
errs = append(errs, fmt.Errorf("failed to get base domain dns zone: %w", err))
if isAuthError(err) {
return err
}
}

if value, ok := result.Metadata[tag]; ok {
if *value == "owned" {
deleteResult, err := o.recordsClient.Delete(ctx, o.BaseDomainResourceGroupName, o.ZoneName, url, dns.CNAME, "")
if err != nil {
if deleteResult.IsHTTPStatus(http.StatusNotFound) {
o.Logger.Debug("already deleted")
return utilerrors.NewAggregate(errs)
}
errs = append(errs, fmt.Errorf("failed to delete base domain dns zone: %w", err))
if isAuthError(err) {
return err
}
} else {
o.Logger.WithField("record", url).Info("deleted")
}
}
} else {
o.Logger.WithField("record", url).Debugf("metadata mismatch")
}
return utilerrors.NewAggregate(errs)
}

func deletePublicRecords(ctx context.Context, o *ClusterUninstaller) error {
ctx, cancel := context.WithTimeout(ctx, 10*time.Minute)
defer cancel()

// collect records from private zones in rgName
var errs []error

zonesPage, err := dnsClient.ListByResourceGroup(ctx, rgName, to.Int32Ptr(100))
zonesPage, err := o.zonesClient.ListByResourceGroup(ctx, o.ResourceGroupName, to.Int32Ptr(100))
if err != nil {
if zonesPage.Response().IsHTTPStatus(http.StatusNotFound) {
logger.Debug("already deleted")
o.Logger.Debug("already deleted")
err2 := deleteRecordsFromBaseDomain(ctx, o)
if err2 != nil {
o.Logger.Debugf("failed to delete record sets from the base domain: %w", err)
}
return utilerrors.NewAggregate(errs)
}
errs = append(errs, fmt.Errorf("failed to list dns zone: %w", err))
Expand All @@ -448,7 +506,7 @@ func deletePublicRecords(ctx context.Context, dnsClient dns.ZonesClient, records

for _, zone := range zonesPage.Values() {
if zone.ZoneType == dns.Private {
if err := deletePublicRecordsForZone(ctx, dnsClient, recordsClient, logger, rgName, to.String(zone.Name)); err != nil {
if err := deletePublicRecordsForZone(ctx, o.zonesClient, o.recordsClient, o.Logger, o.ResourceGroupName, to.String(zone.Name)); err != nil {
errs = append(errs, fmt.Errorf("failed to delete public records for %s: %w", to.String(zone.Name), err))
if isAuthError(err) {
return err
Expand All @@ -459,10 +517,10 @@ func deletePublicRecords(ctx context.Context, dnsClient dns.ZonesClient, records
}
}

privateZonesPage, err := privateDNSClient.ListByResourceGroup(ctx, rgName, to.Int32Ptr(100))
privateZonesPage, err := o.privateZonesClient.ListByResourceGroup(ctx, o.ResourceGroupName, to.Int32Ptr(100))
if err != nil {
if privateZonesPage.Response().IsHTTPStatus(http.StatusNotFound) {
logger.Debug("already deleted")
o.Logger.Debug("already deleted")
return utilerrors.NewAggregate(errs)
}
errs = append(errs, fmt.Errorf("failed to list private dns zone: %w", err))
Expand All @@ -479,7 +537,7 @@ func deletePublicRecords(ctx context.Context, dnsClient dns.ZonesClient, records
pageCount++

for _, zone := range privateZonesPage.Values() {
if err := deletePublicRecordsForPrivateZone(ctx, privateRecordsClient, dnsClient, recordsClient, logger, rgName, to.String(zone.Name)); err != nil {
if err := deletePublicRecordsForPrivateZone(ctx, o.privateRecordSetsClient, o.zonesClient, o.recordsClient, o.Logger, o.ResourceGroupName, to.String(zone.Name)); err != nil {
errs = append(errs, fmt.Errorf("failed to delete public records for %s: %w", to.String(zone.Name), err))
if isAuthError(err) {
return err
Expand All @@ -490,7 +548,7 @@ func deletePublicRecords(ctx context.Context, dnsClient dns.ZonesClient, records
}

if pageCount == 0 {
logger.Warn("no DNS records found: either they were already deleted or the service principal lacks permissions to list them")
o.Logger.Warn("no DNS records found: either they were already deleted or the service principal lacks permissions to list them")
}

return utilerrors.NewAggregate(errs)
Expand Down
1 change: 1 addition & 0 deletions pkg/infrastructure/azure/dns.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ func createDNSEntries(ctx context.Context, in clusterapi.InfraReadyInput, extLBF
for k, v := range in.InstallConfig.Config.Azure.UserTags {
azureTags[k] = ptr.To(v)
}
azureTags[fmt.Sprintf("kubernetes.io_cluster.%s", in.InfraID)] = ptr.To("owned")
azureCluster := &capz.AzureCluster{}
key := client.ObjectKey{
Name: in.InfraID,
Expand Down
1 change: 1 addition & 0 deletions pkg/types/azure/metadata.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ type Metadata struct {
Region string `json:"region"`
ResourceGroupName string `json:"resourceGroupName"`
BaseDomainResourceGroupName string `json:"baseDomainResourceGroupName"`
ZoneName string `json:"zoneName"`
}

// Keys used to save Metadata information as tags.
Expand Down

0 comments on commit 3d549df

Please sign in to comment.