Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions internal/worker/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package worker
import (
"context"
"errors"
"fmt"

"github.com/warpdotdev/oz-agent-worker/internal/metrics"
)
Expand Down Expand Up @@ -41,3 +42,17 @@ func taskFailureLabels(err error) (phase, reason string) {
}
return metrics.TaskFailurePhaseBackend, metrics.TaskFailureReasonUnknown
}

// userFacingTaskError returns a user-friendly error message for a task execution
// failure. Well-known infrastructure errors (context cancellation, deadline exceeded)
// are translated into clear, actionable messages instead of exposing raw Go error strings.
func userFacingTaskError(err error) string {
switch {
case errors.Is(err, context.Canceled):
return "The task was interrupted due to an infrastructure issue (context canceled). This is typically transient — please try again."
case errors.Is(err, context.DeadlineExceeded):
return "The task exceeded its maximum allowed execution time and was terminated. Consider breaking the task into smaller steps or increasing the timeout."
default:
return fmt.Sprintf("Failed to execute task: %v", err)
}
}
2 changes: 1 addition & 1 deletion internal/worker/worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -500,7 +500,7 @@ func (w *Worker) executeTask(ctx context.Context, span trace.Span, assignment *t
span.RecordError(err)
span.SetStatus(codes.Error, reason)
log.Errorf(ctx, "Task execution failed: taskID=%s, error=%v", taskID, err)
if statusErr := w.sendTaskFailed(taskID, fmt.Sprintf("Failed to execute task: %v", err)); statusErr != nil {
if statusErr := w.sendTaskFailed(taskID, userFacingTaskError(err)); statusErr != nil {
log.Errorf(ctx, "Failed to send task failed message: %v", statusErr)
}
return
Expand Down
95 changes: 95 additions & 0 deletions internal/worker/worker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,101 @@ func TestExecuteTaskReportsTaskFailedOnBackendError(t *testing.T) {
}
}

func TestExecuteTaskReportsUserFriendlyMessageOnContextCanceled(t *testing.T) {
w := &Worker{
ctx: context.Background(),
config: Config{},
sendChan: make(chan []byte, 1),
activeTasks: map[string]context.CancelFunc{"task-1": func() {}},
backend: &recordingBackend{err: context.Canceled},
}

w.executeTask(context.Background(), trace.SpanFromContext(context.Background()), &types.TaskAssignmentMessage{
TaskID: "task-1",
Task: &types.Task{ID: "task-1", Title: "test task"},
}, time.Now())

msg := readWebSocketMessage(t, w.sendChan)
if msg.Type != types.MessageTypeTaskFailed {
t.Fatalf("message type = %q, want %q", msg.Type, types.MessageTypeTaskFailed)
}

var failed types.TaskFailedMessage
if err := json.Unmarshal(msg.Data, &failed); err != nil {
t.Fatalf("failed to unmarshal task failed message: %v", err)
}
want := "The task was interrupted due to an infrastructure issue (context canceled). This is typically transient — please try again."
if failed.Message != want {
t.Errorf("message = %q, want %q", failed.Message, want)
}
}

func TestExecuteTaskReportsUserFriendlyMessageOnDeadlineExceeded(t *testing.T) {
w := &Worker{
ctx: context.Background(),
config: Config{},
sendChan: make(chan []byte, 1),
activeTasks: map[string]context.CancelFunc{"task-1": func() {}},
backend: &recordingBackend{err: context.DeadlineExceeded},
}

w.executeTask(context.Background(), trace.SpanFromContext(context.Background()), &types.TaskAssignmentMessage{
TaskID: "task-1",
Task: &types.Task{ID: "task-1", Title: "test task"},
}, time.Now())

msg := readWebSocketMessage(t, w.sendChan)
if msg.Type != types.MessageTypeTaskFailed {
t.Fatalf("message type = %q, want %q", msg.Type, types.MessageTypeTaskFailed)
}

var failed types.TaskFailedMessage
if err := json.Unmarshal(msg.Data, &failed); err != nil {
t.Fatalf("failed to unmarshal task failed message: %v", err)
}
want := "The task exceeded its maximum allowed execution time and was terminated. Consider breaking the task into smaller steps or increasing the timeout."
if failed.Message != want {
t.Errorf("message = %q, want %q", failed.Message, want)
}
}

func TestUserFacingTaskError(t *testing.T) {
tests := []struct {
name string
err error
want string
}{
{
name: "context canceled",
err: context.Canceled,
want: "The task was interrupted due to an infrastructure issue (context canceled). This is typically transient — please try again.",
},
{
name: "wrapped context canceled",
err: fmt.Errorf("exec failed: %w", context.Canceled),
want: "The task was interrupted due to an infrastructure issue (context canceled). This is typically transient — please try again.",
},
{
name: "deadline exceeded",
err: context.DeadlineExceeded,
want: "The task exceeded its maximum allowed execution time and was terminated. Consider breaking the task into smaller steps or increasing the timeout.",
},
{
name: "generic error",
err: errors.New("boom"),
want: "Failed to execute task: boom",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := userFacingTaskError(tt.err)
if got != tt.want {
t.Errorf("userFacingTaskError() = %q, want %q", got, tt.want)
}
})
}
}

func readWebSocketMessage(t *testing.T, messages <-chan []byte) types.WebSocketMessage {
t.Helper()

Expand Down
Loading