From a6dabf87402fd68814568b20bf6b89b34eef6cc7 Mon Sep 17 00:00:00 2001 From: Levi Blackstone Date: Mon, 29 Jul 2019 12:51:25 -0600 Subject: [PATCH] Fix Deployment await logic for extensions/v1beta1 (#657) The Deployment await logic was not correctly checking rollout success for the `extensions/v1beta1` apiVersion, which was causing the await logic to erroneously pass for change triggered rollouts, even if the Deployment was invalid. Added additional logic to the readiness check to properly detect readiness for `extensions/v1beta1` Deployments. --- CHANGELOG.md | 3 +- pkg/await/apps_deployment.go | 88 ++++++++++++++++++++++++++++-------- pkg/await/core_pod.go | 2 +- 3 files changed, 73 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 40efa219b6..96f3e4782c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,7 +23,8 @@ (https://github.com/pulumi/pulumi-kubernetes/pull/637). - Use `opts` instead of `__opts__` and `resource_name` instead of `__name__` in Python SDK (https://github.com/pulumi/pulumi-kubernetes/pull/639). -- Properly detect failed Deployment on rollout. (https://github.com/pulumi/pulumi-kubernetes/pull/646). +- Properly detect failed Deployment on rollout. (https://github.com/pulumi/pulumi-kubernetes/pull/646 + and https://github.com/pulumi/pulumi-kubernetes/pull/657). - Use dry-run support if available when diffing the actual and desired state of a resource (https://github.com/pulumi/pulumi-kubernetes/pull/649) - Fix panic when `.metadata.label` is mistyped diff --git a/pkg/await/apps_deployment.go b/pkg/await/apps_deployment.go index 31bd320807..2690574e16 100644 --- a/pkg/await/apps_deployment.go +++ b/pkg/await/apps_deployment.go @@ -3,6 +3,7 @@ package await import ( "fmt" "reflect" + "strconv" "strings" "time" @@ -388,12 +389,35 @@ func (dia *deploymentInitAwaiter) processDeploymentEvent(event watch.Event) { dia.deployment = deployment + // extensions/v1beta1 does not include the "Progressing" status for rollouts. + // Note: We must use the input apiVersion rather than the Deployment watch Event we're processing here, because + // the Progressing status field will not be present if the Deployment was created with the `extensions/v1beta1` API, + // regardless of what the Event apiVersion says. + extensionsV1Beta1API := dia.config.createAwaitConfig.currentInputs.GetAPIVersion() == "extensions/v1beta1" + // Get current generation of the Deployment. dia.currentGeneration = deployment.GetAnnotations()[revision] if dia.currentGeneration == "" { // No current generation, Deployment controller has not yet created a ReplicaSet. Do // nothing. return + } else if extensionsV1Beta1API { + if currentGenerationInt, err := strconv.Atoi(dia.currentGeneration); err == nil { + if int64(currentGenerationInt) != dia.deployment.GetGeneration() { + // If the generation is set, make sure it matches the revision annotation, otherwise, ignore this + // event because the status we care about may not be set yet. + return + } + if rawObservedGeneration, ok := openapi.Pluck( + deployment.Object, "status", "observedGeneration"); ok { + observedGeneration, _ := rawObservedGeneration.(int64) + if int64(currentGenerationInt) != observedGeneration { + // If the generation is set, make sure it matches the .status.observedGeneration, otherwise, + // ignore this event because the status we care about may not be set yet. + return + } + } + } } // Check Deployments conditions to see whether new ReplicaSet is available. If it is, we are @@ -405,12 +429,6 @@ func (dia *deploymentInitAwaiter) processDeploymentEvent(event watch.Event) { return } - // extensions/v1beta1 does not include the "Progressing" status for rollouts. - // Note: We must use the input apiVersion rather than the Deployment watch Event we're processing here, because - // the Progressing status field will not be present if the Deployment was created with the `extensions/v1beta1` API, - // regardless of what the Event apiVersion says. - progressingStatusUnavailable := dia.config.createAwaitConfig.currentInputs.GetAPIVersion() == "extensions/v1beta1" - // Success occurs when the ReplicaSet of the `currentGeneration` is marked as available, and // when the deployment is available. for _, rawCondition := range conditions { @@ -419,7 +437,7 @@ func (dia *deploymentInitAwaiter) processDeploymentEvent(event watch.Event) { continue } - if progressingStatusUnavailable { + if extensionsV1Beta1API { // Since we can't tell for sure from this version of the API, mark as available. dia.replicaSetAvailable = true } else if condition["type"] == "Progressing" { @@ -528,29 +546,63 @@ func (dia *deploymentInitAwaiter) checkReplicaSetStatus() { var rawReadyReplicas interface{} var readyReplicas int64 var readyReplicasExists bool + var unavailableReplicas int64 + var expectedNumberOfUpdatedReplicas bool // extensions/v1beta1/ReplicaSet does not include the "readyReplicas" status for rollouts, // so use the Deployment "readyReplicas" status instead. // Note: We must use the input apiVersion rather than the Deployment watch Event we're processing here, because // the Progressing status field will not be present if the Deployment was created with the `extensions/v1beta1` API, // regardless of what the Event apiVersion says. - statusUnavailable := dia.config.createAwaitConfig.currentInputs.GetAPIVersion() == "extensions/v1beta1" - if statusUnavailable { + extensionsV1Beta1API := dia.config.createAwaitConfig.currentInputs.GetAPIVersion() == "extensions/v1beta1" + if extensionsV1Beta1API { rawReadyReplicas, readyReplicasExists = openapi.Pluck(dia.deployment.Object, "status", "readyReplicas") readyReplicas, _ = rawReadyReplicas.(int64) + + if rawUpdatedReplicas, ok := openapi.Pluck(dia.deployment.Object, "status", "updatedReplicas"); ok { + updatedReplicas, _ := rawUpdatedReplicas.(int64) + expectedNumberOfUpdatedReplicas = updatedReplicas == specReplicas + } + + // Check replicas status, which is present on all apiVersions of the Deployment resource. + // Note that this status field does not appear immediately on update, so it's not sufficient to + // determine readiness by itself. + rawReplicas, replicasExists := openapi.Pluck(dia.deployment.Object, "status", "replicas") + replicas, _ := rawReplicas.(int64) + tooManyReplicas := replicasExists && replicas > specReplicas + + // Check unavailableReplicas status, which is present on all apiVersions of the Deployment resource. + // Note that this status field does not appear immediately on update, so it's not sufficient to + // determine readiness by itself. + unavailableReplicasPresent := false + if rawUnavailableReplicas, ok := openapi.Pluck( + dia.deployment.Object, "status", "unavailableReplicas"); ok { + unavailableReplicas, _ = rawUnavailableReplicas.(int64) + + unavailableReplicasPresent = unavailableReplicas != 0 + } + + if dia.changeTriggeredRollout() { + dia.updatedReplicaSetReady = lastGeneration != dia.currentGeneration && updatedReplicaSetCreated && + readyReplicasExists && readyReplicas >= specReplicas && !unavailableReplicasPresent && !tooManyReplicas && + expectedNumberOfUpdatedReplicas + } else { + dia.updatedReplicaSetReady = updatedReplicaSetCreated && + readyReplicasExists && readyReplicas >= specReplicas && !tooManyReplicas + } } else { rawReadyReplicas, readyReplicasExists = openapi.Pluck(rs.Object, "status", "readyReplicas") readyReplicas, _ = rawReadyReplicas.(int64) - } - glog.V(3).Infof("ReplicaSet %q requests '%v' replicas, but has '%v' ready", - rs.GetName(), specReplicas, readyReplicas) + glog.V(3).Infof("ReplicaSet %q requests '%v' replicas, but has '%v' ready", + rs.GetName(), specReplicas, readyReplicas) - if dia.changeTriggeredRollout() { - dia.updatedReplicaSetReady = lastGeneration != dia.currentGeneration && updatedReplicaSetCreated && - readyReplicasExists && readyReplicas >= specReplicas - } else { - dia.updatedReplicaSetReady = updatedReplicaSetCreated && - readyReplicasExists && readyReplicas >= specReplicas + if dia.changeTriggeredRollout() { + dia.updatedReplicaSetReady = lastGeneration != dia.currentGeneration && updatedReplicaSetCreated && + readyReplicasExists && readyReplicas >= specReplicas + } else { + dia.updatedReplicaSetReady = updatedReplicaSetCreated && + readyReplicasExists && readyReplicas >= specReplicas + } } if !dia.updatedReplicaSetReady { diff --git a/pkg/await/core_pod.go b/pkg/await/core_pod.go index c70cae3393..5de51027e6 100644 --- a/pkg/await/core_pod.go +++ b/pkg/await/core_pod.go @@ -56,7 +56,7 @@ import ( // The subtlety of this that "Running" actually just means that the Pod has been bound to a node, // all containers have been created, and at least one is still "alive" -- a status is set once the // liveness and readiness probes (which usually simply ping some endpoint, and which are -// customizable by the user) return success. Along the say several things can go wrong (e.g., image +// customizable by the user) return success. Along the way several things can go wrong (e.g., image // pull error, container exits with code 1), but each of these things would prevent the probes from // reporting success, or they would be picked up by the Kubelet. //