Skip to content

Commit

Permalink
azure: Check public zone for stray record set
Browse files Browse the repository at this point in the history
The azure destroyer checks for all the record sets present
in the resource group and deletes them. But in the case where
the resource group is destroyed before the destroy cluster
is called, there will be stray record sets that don't get cleaned
up. This frequently occurs in the azure test environment where
the reaper deletes the resource group after sometime but fails to
clean up these stray record sets.

Adding the fix in the off chance this scenario happens in the
customer world.
  • Loading branch information
rna-afk committed Jan 14, 2025
1 parent d2bb750 commit 003e406
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 9 deletions.
2 changes: 2 additions & 0 deletions pkg/asset/cluster/azure/azure.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ func Metadata(config *types.InstallConfig) *azure.Metadata {
Region: config.Platform.Azure.Region,
ResourceGroupName: config.Azure.ResourceGroupName,
BaseDomainResourceGroupName: config.Azure.BaseDomainResourceGroupName,
ZoneName: config.BaseDomain,
APIURL: fmt.Sprintf("api.%s", config.ObjectMeta.Name),
}
}

Expand Down
45 changes: 36 additions & 9 deletions pkg/destroy/azure/azure.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ type ClusterUninstaller struct {
ResourceGroupName string
BaseDomainResourceGroupName string
NetworkResourceGroupName string
ZoneName string
APIURL string

Logger logrus.FieldLogger

Expand Down Expand Up @@ -134,6 +136,8 @@ func New(logger logrus.FieldLogger, metadata *types.ClusterMetadata) (providers.
Logger: logger,
BaseDomainResourceGroupName: metadata.Azure.BaseDomainResourceGroupName,
CloudName: cloudName,
ZoneName: metadata.Azure.ZoneName,
APIURL: metadata.Azure.APIURL,
}, nil
}

Expand Down Expand Up @@ -193,7 +197,7 @@ func (o *ClusterUninstaller) Run() (*types.ClusterQuota, error) {
if o.CloudName == azure.StackCloud {
err = deleteAzureStackPublicRecords(ctx, o)
} else {
err = deletePublicRecords(ctx, o.zonesClient, o.recordsClient, o.privateZonesClient, o.privateRecordSetsClient, o.Logger, o.ResourceGroupName)
err = deletePublicRecords(ctx, o)
}
if err != nil {
o.Logger.Debug(err)
Expand Down Expand Up @@ -419,17 +423,40 @@ func deleteAzureStackPublicRecords(ctx context.Context, o *ClusterUninstaller) e
return utilerrors.NewAggregate(errs)
}

func deletePublicRecords(ctx context.Context, dnsClient dns.ZonesClient, recordsClient dns.RecordSetsClient, privateDNSClient privatedns.PrivateZonesClient, privateRecordsClient privatedns.RecordSetsClient, logger logrus.FieldLogger, rgName string) error {
func deleteRecordsFromBaseDomain(ctx context.Context, o *ClusterUninstaller) error {
ctx, cancel := context.WithTimeout(ctx, 10*time.Minute)
defer cancel()

var errs []error
fmt.Printf("%s : %s : %s", o.BaseDomainResourceGroupName, o.ZoneName, o.APIURL)
result, err := o.recordsClient.Delete(ctx, o.BaseDomainResourceGroupName, o.ZoneName, o.APIURL, dns.CNAME, "")
if err != nil {
if result.IsHTTPStatus(http.StatusNotFound) {
o.Logger.Debug("already deleted the public records from zone")
o.Logger.Debug(errs)
return utilerrors.NewAggregate(errs)
}
errs = append(errs, fmt.Errorf("failed to get base domain dns zone: %w", err))
if isAuthError(err) {
return err
}
}

return utilerrors.NewAggregate(errs)
}

func deletePublicRecords(ctx context.Context, o *ClusterUninstaller) error {
ctx, cancel := context.WithTimeout(ctx, 10*time.Minute)
defer cancel()

// collect records from private zones in rgName
var errs []error

zonesPage, err := dnsClient.ListByResourceGroup(ctx, rgName, to.Int32Ptr(100))
zonesPage, err := o.zonesClient.ListByResourceGroup(ctx, o.ResourceGroupName, to.Int32Ptr(100))
if err != nil {
if zonesPage.Response().IsHTTPStatus(http.StatusNotFound) {
logger.Debug("already deleted")
o.Logger.Debug("already deleted")
deleteRecordsFromBaseDomain(ctx, o)
return utilerrors.NewAggregate(errs)
}
errs = append(errs, fmt.Errorf("failed to list dns zone: %w", err))
Expand All @@ -448,7 +475,7 @@ func deletePublicRecords(ctx context.Context, dnsClient dns.ZonesClient, records

for _, zone := range zonesPage.Values() {
if zone.ZoneType == dns.Private {
if err := deletePublicRecordsForZone(ctx, dnsClient, recordsClient, logger, rgName, to.String(zone.Name)); err != nil {
if err := deletePublicRecordsForZone(ctx, o.zonesClient, o.recordsClient, o.Logger, o.ResourceGroupName, to.String(zone.Name)); err != nil {
errs = append(errs, fmt.Errorf("failed to delete public records for %s: %w", to.String(zone.Name), err))
if isAuthError(err) {
return err
Expand All @@ -459,10 +486,10 @@ func deletePublicRecords(ctx context.Context, dnsClient dns.ZonesClient, records
}
}

privateZonesPage, err := privateDNSClient.ListByResourceGroup(ctx, rgName, to.Int32Ptr(100))
privateZonesPage, err := o.privateZonesClient.ListByResourceGroup(ctx, o.ResourceGroupName, to.Int32Ptr(100))
if err != nil {
if privateZonesPage.Response().IsHTTPStatus(http.StatusNotFound) {
logger.Debug("already deleted")
o.Logger.Debug("already deleted")
return utilerrors.NewAggregate(errs)
}
errs = append(errs, fmt.Errorf("failed to list private dns zone: %w", err))
Expand All @@ -479,7 +506,7 @@ func deletePublicRecords(ctx context.Context, dnsClient dns.ZonesClient, records
pageCount++

for _, zone := range privateZonesPage.Values() {
if err := deletePublicRecordsForPrivateZone(ctx, privateRecordsClient, dnsClient, recordsClient, logger, rgName, to.String(zone.Name)); err != nil {
if err := deletePublicRecordsForPrivateZone(ctx, o.privateRecordSetsClient, o.zonesClient, o.recordsClient, o.Logger, o.ResourceGroupName, to.String(zone.Name)); err != nil {
errs = append(errs, fmt.Errorf("failed to delete public records for %s: %w", to.String(zone.Name), err))
if isAuthError(err) {
return err
Expand All @@ -490,7 +517,7 @@ func deletePublicRecords(ctx context.Context, dnsClient dns.ZonesClient, records
}

if pageCount == 0 {
logger.Warn("no DNS records found: either they were already deleted or the service principal lacks permissions to list them")
o.Logger.Warn("no DNS records found: either they were already deleted or the service principal lacks permissions to list them")
}

return utilerrors.NewAggregate(errs)
Expand Down
2 changes: 2 additions & 0 deletions pkg/types/azure/metadata.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ type Metadata struct {
Region string `json:"region"`
ResourceGroupName string `json:"resourceGroupName"`
BaseDomainResourceGroupName string `json:"baseDomainResourceGroupName"`
ZoneName string `json:"zoneName"`
APIURL string `json:"apiURL"`
}

// Keys used to save Metadata information as tags.
Expand Down

0 comments on commit 003e406

Please sign in to comment.