diff --git a/CHANGELOG.md b/CHANGELOG.md index 40efa219b6..96f3e4782c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,7 +23,8 @@ (https://github.com/pulumi/pulumi-kubernetes/pull/637). - Use `opts` instead of `__opts__` and `resource_name` instead of `__name__` in Python SDK (https://github.com/pulumi/pulumi-kubernetes/pull/639). -- Properly detect failed Deployment on rollout. (https://github.com/pulumi/pulumi-kubernetes/pull/646). +- Properly detect failed Deployment on rollout. (https://github.com/pulumi/pulumi-kubernetes/pull/646 + and https://github.com/pulumi/pulumi-kubernetes/pull/657). - Use dry-run support if available when diffing the actual and desired state of a resource (https://github.com/pulumi/pulumi-kubernetes/pull/649) - Fix panic when `.metadata.label` is mistyped diff --git a/pkg/await/apps_deployment.go b/pkg/await/apps_deployment.go index 31bd320807..2690574e16 100644 --- a/pkg/await/apps_deployment.go +++ b/pkg/await/apps_deployment.go @@ -3,6 +3,7 @@ package await import ( "fmt" "reflect" + "strconv" "strings" "time" @@ -388,12 +389,35 @@ func (dia *deploymentInitAwaiter) processDeploymentEvent(event watch.Event) { dia.deployment = deployment + // extensions/v1beta1 does not include the "Progressing" status for rollouts. + // Note: We must use the input apiVersion rather than the Deployment watch Event we're processing here, because + // the Progressing status field will not be present if the Deployment was created with the `extensions/v1beta1` API, + // regardless of what the Event apiVersion says. + extensionsV1Beta1API := dia.config.createAwaitConfig.currentInputs.GetAPIVersion() == "extensions/v1beta1" + // Get current generation of the Deployment. dia.currentGeneration = deployment.GetAnnotations()[revision] if dia.currentGeneration == "" { // No current generation, Deployment controller has not yet created a ReplicaSet. Do // nothing. return + } else if extensionsV1Beta1API { + if currentGenerationInt, err := strconv.Atoi(dia.currentGeneration); err == nil { + if int64(currentGenerationInt) != dia.deployment.GetGeneration() { + // If the generation is set, make sure it matches the revision annotation, otherwise, ignore this + // event because the status we care about may not be set yet. + return + } + if rawObservedGeneration, ok := openapi.Pluck( + deployment.Object, "status", "observedGeneration"); ok { + observedGeneration, _ := rawObservedGeneration.(int64) + if int64(currentGenerationInt) != observedGeneration { + // If the generation is set, make sure it matches the .status.observedGeneration, otherwise, + // ignore this event because the status we care about may not be set yet. + return + } + } + } } // Check Deployments conditions to see whether new ReplicaSet is available. If it is, we are @@ -405,12 +429,6 @@ func (dia *deploymentInitAwaiter) processDeploymentEvent(event watch.Event) { return } - // extensions/v1beta1 does not include the "Progressing" status for rollouts. - // Note: We must use the input apiVersion rather than the Deployment watch Event we're processing here, because - // the Progressing status field will not be present if the Deployment was created with the `extensions/v1beta1` API, - // regardless of what the Event apiVersion says. - progressingStatusUnavailable := dia.config.createAwaitConfig.currentInputs.GetAPIVersion() == "extensions/v1beta1" - // Success occurs when the ReplicaSet of the `currentGeneration` is marked as available, and // when the deployment is available. for _, rawCondition := range conditions { @@ -419,7 +437,7 @@ func (dia *deploymentInitAwaiter) processDeploymentEvent(event watch.Event) { continue } - if progressingStatusUnavailable { + if extensionsV1Beta1API { // Since we can't tell for sure from this version of the API, mark as available. dia.replicaSetAvailable = true } else if condition["type"] == "Progressing" { @@ -528,29 +546,63 @@ func (dia *deploymentInitAwaiter) checkReplicaSetStatus() { var rawReadyReplicas interface{} var readyReplicas int64 var readyReplicasExists bool + var unavailableReplicas int64 + var expectedNumberOfUpdatedReplicas bool // extensions/v1beta1/ReplicaSet does not include the "readyReplicas" status for rollouts, // so use the Deployment "readyReplicas" status instead. // Note: We must use the input apiVersion rather than the Deployment watch Event we're processing here, because // the Progressing status field will not be present if the Deployment was created with the `extensions/v1beta1` API, // regardless of what the Event apiVersion says. - statusUnavailable := dia.config.createAwaitConfig.currentInputs.GetAPIVersion() == "extensions/v1beta1" - if statusUnavailable { + extensionsV1Beta1API := dia.config.createAwaitConfig.currentInputs.GetAPIVersion() == "extensions/v1beta1" + if extensionsV1Beta1API { rawReadyReplicas, readyReplicasExists = openapi.Pluck(dia.deployment.Object, "status", "readyReplicas") readyReplicas, _ = rawReadyReplicas.(int64) + + if rawUpdatedReplicas, ok := openapi.Pluck(dia.deployment.Object, "status", "updatedReplicas"); ok { + updatedReplicas, _ := rawUpdatedReplicas.(int64) + expectedNumberOfUpdatedReplicas = updatedReplicas == specReplicas + } + + // Check replicas status, which is present on all apiVersions of the Deployment resource. + // Note that this status field does not appear immediately on update, so it's not sufficient to + // determine readiness by itself. + rawReplicas, replicasExists := openapi.Pluck(dia.deployment.Object, "status", "replicas") + replicas, _ := rawReplicas.(int64) + tooManyReplicas := replicasExists && replicas > specReplicas + + // Check unavailableReplicas status, which is present on all apiVersions of the Deployment resource. + // Note that this status field does not appear immediately on update, so it's not sufficient to + // determine readiness by itself. + unavailableReplicasPresent := false + if rawUnavailableReplicas, ok := openapi.Pluck( + dia.deployment.Object, "status", "unavailableReplicas"); ok { + unavailableReplicas, _ = rawUnavailableReplicas.(int64) + + unavailableReplicasPresent = unavailableReplicas != 0 + } + + if dia.changeTriggeredRollout() { + dia.updatedReplicaSetReady = lastGeneration != dia.currentGeneration && updatedReplicaSetCreated && + readyReplicasExists && readyReplicas >= specReplicas && !unavailableReplicasPresent && !tooManyReplicas && + expectedNumberOfUpdatedReplicas + } else { + dia.updatedReplicaSetReady = updatedReplicaSetCreated && + readyReplicasExists && readyReplicas >= specReplicas && !tooManyReplicas + } } else { rawReadyReplicas, readyReplicasExists = openapi.Pluck(rs.Object, "status", "readyReplicas") readyReplicas, _ = rawReadyReplicas.(int64) - } - glog.V(3).Infof("ReplicaSet %q requests '%v' replicas, but has '%v' ready", - rs.GetName(), specReplicas, readyReplicas) + glog.V(3).Infof("ReplicaSet %q requests '%v' replicas, but has '%v' ready", + rs.GetName(), specReplicas, readyReplicas) - if dia.changeTriggeredRollout() { - dia.updatedReplicaSetReady = lastGeneration != dia.currentGeneration && updatedReplicaSetCreated && - readyReplicasExists && readyReplicas >= specReplicas - } else { - dia.updatedReplicaSetReady = updatedReplicaSetCreated && - readyReplicasExists && readyReplicas >= specReplicas + if dia.changeTriggeredRollout() { + dia.updatedReplicaSetReady = lastGeneration != dia.currentGeneration && updatedReplicaSetCreated && + readyReplicasExists && readyReplicas >= specReplicas + } else { + dia.updatedReplicaSetReady = updatedReplicaSetCreated && + readyReplicasExists && readyReplicas >= specReplicas + } } if !dia.updatedReplicaSetReady { diff --git a/pkg/await/core_pod.go b/pkg/await/core_pod.go index c70cae3393..5de51027e6 100644 --- a/pkg/await/core_pod.go +++ b/pkg/await/core_pod.go @@ -56,7 +56,7 @@ import ( // The subtlety of this that "Running" actually just means that the Pod has been bound to a node, // all containers have been created, and at least one is still "alive" -- a status is set once the // liveness and readiness probes (which usually simply ping some endpoint, and which are -// customizable by the user) return success. Along the say several things can go wrong (e.g., image +// customizable by the user) return success. Along the way several things can go wrong (e.g., image // pull error, container exits with code 1), but each of these things would prevent the probes from // reporting success, or they would be picked up by the Kubelet. //