Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion bundle/phases/deploy.go
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,6 @@ func Deploy(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHand
return
}

logDeployTelemetry(ctx, b)
bundle.ApplyContext(ctx, b, scripts.Execute(config.ScriptPostDeploy))
}

Expand Down
4 changes: 3 additions & 1 deletion bundle/phases/telemetry.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ func getExecutionTimes(b *bundle.Bundle) []protos.IntMapEntry {
return executionTimes
}

func logDeployTelemetry(ctx context.Context, b *bundle.Bundle) {
// LogDeployTelemetry logs a telemetry event for a bundle deploy command.
func LogDeployTelemetry(ctx context.Context, b *bundle.Bundle, errMsg string) {
resourcesCount := int64(0)
_, err := dyn.MapByPattern(b.Config.Value(), dyn.NewPattern(dyn.Key("resources"), dyn.AnyKey(), dyn.AnyKey()), func(p dyn.Path, v dyn.Value) (dyn.Value, error) {
resourcesCount++
Expand Down Expand Up @@ -149,6 +150,7 @@ func logDeployTelemetry(ctx context.Context, b *bundle.Bundle) {
BundleDeployEvent: &protos.BundleDeployEvent{
BundleUuid: bundleUuid,
DeploymentId: b.Metrics.DeploymentId.String(),
ErrorMessage: errMsg,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[IMPORTANT] ErrorMessage is sent verbatim from logdiag.GetFirstErrorSummary() / retErr.Error(). Many deploy errors interpolate local filesystem paths or user-controlled config values (e.g., from statemgmt/state_pull.go, config/mutator/translate_paths.go). This starts shipping raw PII/workspace details to telemetry with no sanitization or size bound.

Fix: emit a sanitized error code/category, or at least scrub paths and cap length (e.g., 500 chars).


ResourceCount: resourcesCount,
ResourceJobCount: int64(len(b.Config.Resources.Jobs)),
Expand Down
23 changes: 19 additions & 4 deletions cmd/bundle/utils/process.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ func ProcessBundle(cmd *cobra.Command, opts ProcessOptions) (*bundle.Bundle, err
return b, err
}

func ProcessBundleRet(cmd *cobra.Command, opts ProcessOptions) (*bundle.Bundle, *statemgmt.StateDesc, error) {
func ProcessBundleRet(cmd *cobra.Command, opts ProcessOptions) (b *bundle.Bundle, stateDesc *statemgmt.StateDesc, retErr error) {
var err error
ctx := cmd.Context()
if opts.SkipInitContext {
Expand All @@ -93,7 +93,24 @@ func ProcessBundleRet(cmd *cobra.Command, opts ProcessOptions) (*bundle.Bundle,
}

// Load bundle config and apply target
b := root.MustConfigureBundle(cmd)
b = root.MustConfigureBundle(cmd)

// Log deploy telemetry on all exit paths. This is a defer to ensure
// telemetry is logged even when the deploy command fails, for both
// diagnostic errors and regular Go errors.
if opts.Deploy {
defer func() {
if b == nil {
return
}
errMsg := logdiag.GetFirstErrorSummary(ctx)
if errMsg == "" && retErr != nil && !errors.Is(retErr, root.ErrAlreadyPrinted) {
errMsg = retErr.Error()
Comment on lines +103 to +108
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[SUGGESTION] No new unit tests for the defer + error capture logic. The core behavior change (telemetry always fires, error message captured from logdiag or retErr) should have test coverage. Consider testing:

  • Telemetry fires on deploy failure with error message
  • ErrAlreadyPrinted errors fall through to GetFirstErrorSummary
  • Successful deploy passes empty error message

}
phases.LogDeployTelemetry(ctx, b, errMsg)
}()
Comment on lines +98 to +111
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[IMPORTANT] This defer runs whenever b != nil, but cmdctx.SetConfigUsed() may not have been called yet (e.g., if configureBundle fails on auth/profile errors before reaching SetConfigUsed in cmd/root/bundle.go:187). When telemetry upload later calls cmdctx.ConfigUsed(ctx) in libs/telemetry/logger.go, it will panic.

Fix: guard with if !cmdctx.HasConfigUsed(ctx) { return } inside the defer, or move the defer setup to after SetConfigUsed() has succeeded.

Comment on lines +98 to +111
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[IMPORTANT] This defer will now log a full BundleDeployEvent on deploy failure. But cmd/root/root.go:182-189 still appends a legacy empty BundleDeployEvent on every nonzero bundle_deploy exit. Result: two deploy events per failed deploy, which will skew failure counts and error-rate dashboards.

Fix: remove the root-level failure fallback, or gate it on "no deploy event was already logged".

}

if logdiag.HasError(ctx) {
return b, nil, root.ErrAlreadyPrinted
}
Expand Down Expand Up @@ -147,8 +164,6 @@ func ProcessBundleRet(cmd *cobra.Command, opts ProcessOptions) (*bundle.Bundle,
}
}

var stateDesc *statemgmt.StateDesc

shouldReadState := opts.ReadState || opts.AlwaysPull || opts.InitIDs || opts.ErrorOnEmptyState || opts.PreDeployChecks || opts.Deploy || opts.ReadPlanPath != ""

if shouldReadState {
Expand Down
16 changes: 16 additions & 0 deletions libs/logdiag/logdiag.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ type LogDiagData struct {
// If Collect is true, diagnostics are appended to Collected. Use SetCollected() to set.
Collect bool
Collected []diag.Diagnostic

// Summary of the first error diagnostic logged, if any.
FirstErrorSummary string
}

// IsSetup returns whether InitContext() was already called.
Expand Down Expand Up @@ -117,6 +120,16 @@ func FlushCollected(ctx context.Context) diag.Diagnostics {
return result
}

// GetFirstErrorSummary returns the summary of the first error diagnostic
// logged, or an empty string if no errors have been logged.
func GetFirstErrorSummary(ctx context.Context) string {
val := read(ctx)
val.mu.Lock()
defer val.mu.Unlock()

return val.FirstErrorSummary
}

func LogDiag(ctx context.Context, d diag.Diagnostic) {
val := read(ctx)
val.mu.Lock()
Expand All @@ -125,6 +138,9 @@ func LogDiag(ctx context.Context, d diag.Diagnostic) {
switch d.Severity {
case diag.Error:
val.Errors += 1
if val.FirstErrorSummary == "" {
val.FirstErrorSummary = d.Summary
}
case diag.Warning:
val.Warnings += 1
case diag.Recommendation:
Expand Down
3 changes: 3 additions & 0 deletions libs/telemetry/protos/bundle_deploy.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ type BundleDeployEvent struct {
// UUID associated with the deployment.
DeploymentId string `json:"deployment_id,omitempty"`

// Error message encountered during the bundle deploy command, if any.
ErrorMessage string `json:"error_message,omitempty"`

ResourceCount int64 `json:"resource_count"`
ResourceJobCount int64 `json:"resource_job_count"`
ResourcePipelineCount int64 `json:"resource_pipeline_count"`
Expand Down
Loading