Skip to content

Commit

Permalink
Merge pull request #833 from yastij/fail-deleted-machines
Browse files Browse the repository at this point in the history
read failureReason and failureMessage from the vsphereVM object to the vspheremachine
  • Loading branch information
k8s-ci-robot authored Mar 17, 2020
2 parents 39568c1 + 2bae906 commit 06eeb56
Show file tree
Hide file tree
Showing 7 changed files with 124 additions and 13 deletions.
31 changes: 31 additions & 0 deletions api/v1alpha3/vspherevm_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package v1alpha3
import (
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/cluster-api/errors"
)

const (
Expand Down Expand Up @@ -82,6 +83,36 @@ type VSphereVMStatus struct {
// network interfaces.
// +optional
Network []NetworkStatus `json:"network,omitempty"`

// FailureReason will be set in the event that there is a terminal problem
// reconciling the vspherevm and will contain a succinct value suitable
// for vm interpretation.
//
// This field should not be set for transitive errors that a controller
// faces that are expected to be fixed automatically over
// time (like service outages), but instead indicate that something is
// fundamentally wrong with the vm.
//
// Any transient errors that occur during the reconciliation of vspherevms
// can be added as events to the vspherevm object and/or logged in the
// controller's output.
// +optional
FailureReason *errors.MachineStatusError `json:"failureReason,omitempty"`

// FailureMessage will be set in the event that there is a terminal problem
// reconciling the vspherevm and will contain a more verbose string suitable
// for logging and human consumption.
//
// This field should not be set for transitive errors that a controller
// faces that are expected to be fixed automatically over
// time (like service outages), but instead indicate that something is
// fundamentally wrong with the vm.
//
// Any transient errors that occur during the reconciliation of vspherevms
// can be added as events to the vspherevm object and/or logged in the
// controller's output.
// +optional
FailureMessage *string `json:"failureMessage,omitempty"`
}

// +kubebuilder:object:root=true
Expand Down
10 changes: 10 additions & 0 deletions api/v1alpha3/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 22 additions & 0 deletions config/crd/bases/infrastructure.cluster.x-k8s.io_vspherevms.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,28 @@ spec:
to determine the actual type of clone operation used to create this
VM.
type: string
failureMessage:
description: "FailureMessage will be set in the event that there is
a terminal problem reconciling the vspherevm and will contain a
more verbose string suitable for logging and human consumption.
\n This field should not be set for transitive errors that a controller
faces that are expected to be fixed automatically over time (like
service outages), but instead indicate that something is fundamentally
wrong with the vm. \n Any transient errors that occur during the
reconciliation of vspherevms can be added as events to the vspherevm
object and/or logged in the controller's output."
type: string
failureReason:
description: "FailureReason will be set in the event that there is
a terminal problem reconciling the vspherevm and will contain a
succinct value suitable for vm interpretation. \n This field should
not be set for transitive errors that a controller faces that are
expected to be fixed automatically over time (like service outages),
but instead indicate that something is fundamentally wrong with
the vm. \n Any transient errors that occur during the reconciliation
of vspherevms can be added as events to the vspherevm object and/or
logged in the controller's output."
type: string
network:
description: Network returns the network status for each of the machine's
configured network interfaces.
Expand Down
41 changes: 31 additions & 10 deletions controllers/vspheremachine_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -243,21 +243,16 @@ func (r machineReconciler) reconcileDeleteVM(ctx *context.MachineContext) error

func (r machineReconciler) reconcileDeleteVMPre7(ctx *context.MachineContext) error {
// Get ready to find the associated VSphereVM resource.
vm := &infrav1.VSphereVM{}
vmKey := apitypes.NamespacedName{
Namespace: ctx.VSphereMachine.Namespace,
Name: ctx.Machine.Name,
}

vm, err := r.findVMPre7(ctx)
// Attempt to find the associated VSphereVM resource.
if err := ctx.Client.Get(ctx, vmKey, vm); err != nil {
// If an error occurs finding the VSphereVM resource other than
// IsNotFound, then return the error. Otherwise it means the VSphereVM
// is already deleted, and that's okay.
if !apierrors.IsNotFound(err) {
return errors.Wrapf(err, "failed to get VSphereVM %s", vmKey)
}
} else if vm.GetDeletionTimestamp().IsZero() {
if err != nil {
return err
}
if vm.GetDeletionTimestamp().IsZero() {
// If the VSphereVM was found and it's not already enqueued for
// deletion, go ahead and attempt to delete it.
if err := ctx.Client.Delete(ctx, vm); err != nil {
Expand All @@ -274,7 +269,33 @@ func (r machineReconciler) reconcileDeleteVMPre7(ctx *context.MachineContext) er
return nil
}

func (r machineReconciler) findVMPre7(ctx *context.MachineContext) (*infrav1.VSphereVM, error) {
// Get ready to find the associated VSphereVM resource.
vm := &infrav1.VSphereVM{}
vmKey := apitypes.NamespacedName{
Namespace: ctx.VSphereMachine.Namespace,
Name: ctx.Machine.Name,
}
// Attempt to find the associated VSphereVM resource.
if err := ctx.Client.Get(ctx, vmKey, vm); err != nil {
return nil, err
}
return vm, nil
}

func (r machineReconciler) reconcileNormal(ctx *context.MachineContext) (reconcile.Result, error) {
vsphereVM, err := r.findVMPre7(ctx)
if err != nil {
if !apierrors.IsNotFound(err) {
return reconcile.Result{}, err
}
}
if vsphereVM != nil {
// Reconcile VSphereMachine's failures
ctx.VSphereMachine.Status.FailureReason = vsphereVM.Status.FailureReason
ctx.VSphereMachine.Status.FailureMessage = vsphereVM.Status.FailureMessage
}

// If the VSphereMachine is in an error state, return early.
if ctx.VSphereMachine.Status.FailureReason != nil || ctx.VSphereMachine.Status.FailureMessage != nil {
ctx.Logger.Info("Error state detected, skipping reconciliation")
Expand Down
7 changes: 6 additions & 1 deletion controllers/vspherevm_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,8 @@ type vmReconciler struct {
}

// Reconcile ensures the back-end state reflects the Kubernetes resource state intent.
// nolint:gocognit
func (r vmReconciler) Reconcile(req ctrl.Request) (_ ctrl.Result, reterr error) {

// Get the VSphereVM resource for this request.
vsphereVM := &infrav1.VSphereVM{}
if err := r.Client.Get(r, req.NamespacedName, vsphereVM); err != nil {
Expand Down Expand Up @@ -262,6 +262,11 @@ func (r vmReconciler) reconcileDelete(ctx *context.VMContext) (reconcile.Result,
}

func (r vmReconciler) reconcileNormal(ctx *context.VMContext) (reconcile.Result, error) {

if ctx.VSphereVM.Status.FailureReason != nil || ctx.VSphereVM.Status.FailureMessage != nil {
r.Logger.Info("VM is failed, won't reconcile", "namespace", ctx.VSphereVM.Namespace, "name", ctx.VSphereVM.Name)
return reconcile.Result{}, nil
}
// If the VSphereVM doesn't have our finalizer, add it.
ctrlutil.AddFinalizer(ctx.VSphereVM, infrav1.VMFinalizer)

Expand Down
12 changes: 12 additions & 0 deletions pkg/services/govmomi/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,15 @@ func isNotFound(err error) bool {
return false
}
}

func wasNotFoundByBIOSUUID(err error) bool {
switch err.(type) {
case errNotFound, *errNotFound:
if err.(errNotFound).uuid != "" && !err.(errNotFound).instanceUUID {
return true
}
return false
default:
return false
}
}
14 changes: 12 additions & 2 deletions pkg/services/govmomi/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package govmomi

import (
"encoding/base64"
"fmt"

"github.com/pkg/errors"

Expand All @@ -27,6 +28,8 @@ import (
"github.com/vmware/govmomi/vim25/types"
corev1 "k8s.io/api/core/v1"
apitypes "k8s.io/apimachinery/pkg/types"
"k8s.io/utils/pointer"
capierrors "sigs.k8s.io/cluster-api/errors"

infrav1 "sigs.k8s.io/cluster-api-provider-vsphere/api/v1alpha3"
"sigs.k8s.io/cluster-api-provider-vsphere/pkg/context"
Expand Down Expand Up @@ -69,8 +72,15 @@ func (vms *VMService) ReconcileVM(ctx *context.VMContext) (vm infrav1.VirtualMac
if !isNotFound(err) {
return vm, err
}
// If VM's MoRef could not be found then the VM does not exist,
// and the VM should be created.

// If the machine was not found by BIOS UUID it means that it got deleted from vcenter directly
if wasNotFoundByBIOSUUID(err) {
ctx.VSphereVM.Status.FailureReason = capierrors.MachineStatusErrorPtr(capierrors.UpdateMachineError)
ctx.VSphereVM.Status.FailureMessage = pointer.StringPtr(fmt.Sprintf("Unable to find VM by BIOS UUID %s. The vm was removed from infra", ctx.VSphereVM.Spec.BiosUUID))
return vm, err
}

// Otherwise, this is a new machine and the the VM should be created.

// Get the bootstrap data.
bootstrapData, err := vms.getBootstrapData(ctx)
Expand Down

0 comments on commit 06eeb56

Please sign in to comment.