Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 55 additions & 1 deletion src/core/__tests__/batch.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
import { test } from 'vitest';
import assert from 'node:assert/strict';
import { validateAndNormalizeBatchSteps, type DaemonBatchStep } from '../batch.ts';
import {
runBatch,
validateAndNormalizeBatchSteps,
type BatchRequest,
type DaemonBatchStep,
} from '../batch.ts';
import type { DaemonResponse, ResponseLevel } from '../../kernel/contracts.ts';

test('validateAndNormalizeBatchSteps rejects unknown top-level step fields', () => {
assert.throws(
Expand Down Expand Up @@ -36,3 +42,51 @@ test('validateAndNormalizeBatchSteps validates runtime hints', () => {
/runtime is invalid/i,
);
});

// Records the responseLevel each step is invoked with, so the Phase 4
// intermediate-step elision can be asserted end to end.
function recordingInvoke(seen: (ResponseLevel | undefined)[]) {
return async (req: BatchRequest): Promise<DaemonResponse> => {
seen.push(req.meta?.responseLevel);
return { ok: true, data: { command: req.command } };
};
}

function batchRequest(commands: string[], responseLevel?: ResponseLevel): BatchRequest {
return {
token: 't',
command: 'batch',
positionals: [],
flags: { batchSteps: commands.map((command) => ({ command })) },
...(responseLevel ? { meta: { responseLevel } } : {}),
};
}

test('batch elides intermediate steps to digest, final step keeps requested level (full)', async () => {
const seen: (ResponseLevel | undefined)[] = [];
const response = await runBatch(
batchRequest(['snapshot', 'find', 'get'], 'full'),
'session',
recordingInvoke(seen),
);
assert.equal(response.ok, true);
assert.deepEqual(seen, ['digest', 'digest', 'full']);
});

test('batch at digest keeps every step at digest', async () => {
const seen: (ResponseLevel | undefined)[] = [];
await runBatch(batchRequest(['snapshot', 'find'], 'digest'), 'session', recordingInvoke(seen));
assert.deepEqual(seen, ['digest', 'digest']);
});

test('a single-step batch never elides (the only step is final)', async () => {
const seen: (ResponseLevel | undefined)[] = [];
await runBatch(batchRequest(['snapshot'], 'full'), 'session', recordingInvoke(seen));
assert.deepEqual(seen, ['full']);
});

test('default batch (no responseLevel) passes meta through unchanged — byte-identical', async () => {
const seen: (ResponseLevel | undefined)[] = [];
await runBatch(batchRequest(['snapshot', 'find', 'get']), 'session', recordingInvoke(seen));
assert.deepEqual(seen, [undefined, undefined, undefined]);
});
40 changes: 37 additions & 3 deletions src/core/batch.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
import { type DaemonRequest, type DaemonResponse } from '../kernel/contracts.ts';
import {
type DaemonRequest,
type DaemonResponse,
type ResponseLevel,
isNonDefaultResponseLevel,
} from '../kernel/contracts.ts';
import { AppError, asAppError } from '../kernel/errors.ts';
import { isRecord } from '../utils/parsing.ts';
import {
Expand Down Expand Up @@ -90,7 +95,14 @@ export async function runBatch(
const startedAt = Date.now();
const partialResults: BatchStepResult[] = [];
for (const [index, step] of steps.entries()) {
const stepResponse = await runBatchStep(req, sessionName, step, invoke, index + 1);
const stepResponse = await runBatchStep(
req,
sessionName,
step,
invoke,
index + 1,
index === steps.length - 1,
);
if (!stepResponse.ok) {
return {
ok: false,
Expand Down Expand Up @@ -208,12 +220,34 @@ export function mergeParentFlags<TFlags extends Record<string, unknown>>(
return childFlags;
}

// Phase 4 (agent-cost) batch-step elision. When a non-default response level is
// requested for the whole batch, INTERMEDIATE steps are forced to `digest` so a
// multi-step run collapses tokens, while the FINAL step keeps the requested
// level. With no responseLevel (or `default`) this is a no-op, so the per-step
// meta is passed through unchanged — byte-identical to today (Maestro `.ad`
// recompare safe).
function batchStepResponseLevel(
requested: ResponseLevel | undefined,
isFinalStep: boolean,
): ResponseLevel | undefined {
if (!isNonDefaultResponseLevel(requested)) return requested;
return isFinalStep ? requested : 'digest';
}

function batchStepMeta(meta: BatchRequest['meta'], isFinalStep: boolean): BatchRequest['meta'] {
const requested = meta?.responseLevel;
const stepLevel = batchStepResponseLevel(requested, isFinalStep);
if (stepLevel === requested) return meta;
return { ...meta, responseLevel: stepLevel };
}

async function runBatchStep(
req: BatchRequest,
sessionName: string,
step: NormalizedBatchStep,
invoke: BatchInvoke,
stepNumber: number,
isFinalStep: boolean,
): Promise<
| { ok: true; step: number; result: BatchStepResult }
| {
Expand Down Expand Up @@ -241,7 +275,7 @@ async function runBatchStep(
positionals: step.positionals,
flags: stepFlags,
runtime: step.runtime === undefined ? req.runtime : step.runtime,
meta: req.meta,
meta: batchStepMeta(req.meta, isFinalStep),
});
const durationMs = Date.now() - stepStartedAt;
if (!response.ok) {
Expand Down
130 changes: 130 additions & 0 deletions src/daemon/__tests__/response-views.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ import type { DaemonResponseData } from '../types.ts';

const snapshotView = RESPONSE_VIEWS.snapshot;
const screenshotView = RESPONSE_VIEWS.screenshot;
const findView = RESPONSE_VIEWS.find;
const getView = RESPONSE_VIEWS.get;

const SNAPSHOT_DATA: DaemonResponseData = {
nodes: [
Expand Down Expand Up @@ -103,3 +105,131 @@ test('screenshot digest tolerates a path-only result with no overlay refs', () =
const digest = screenshotView!({ path: '/tmp/s.png' }, 'digest');
expect(digest).toEqual({ path: '/tmp/s.png', overlayCount: 0, overlayRefs: [] });
});

// A verbose matched node as it appears on the `find`/`get` wire: the semantic
// attributes (kept) plus the geometry/index/process plumbing (the token sink).
const MATCHED_NODE = {
ref: 'e7',
role: 'AXButton',
type: 'Button',
label: 'Sign in',
value: 'enabled',
identifier: 'login-button',
enabled: true,
selected: false,
focused: false,
hittable: true,
// verbose framing the digest intentionally drops:
rect: { x: 10, y: 20, width: 100, height: 44 },
index: 7,
parentIndex: 3,
depth: 4,
pid: 1234,
bundleId: 'com.demo.app',
appName: 'Demo',
windowTitle: 'Demo',
surface: 'app',
visibleToUser: true,
};

const COMPACT_NODE = {
ref: 'e7',
role: 'AXButton',
type: 'Button',
label: 'Sign in',
value: 'enabled',
identifier: 'login-button',
enabled: true,
selected: false,
focused: false,
hittable: true,
};

test('find and get views are registered (shared selector-read view)', () => {
expect(typeof findView).toBe('function');
expect(typeof getView).toBe('function');
expect(findView).toBe(getView);
});

test('find get-text digest keeps ref + text, drops the verbose node', () => {
const digest = findView!({ ref: '@e7', text: 'Sign in', node: MATCHED_NODE }, 'digest');
expect(digest).toEqual({ ref: '@e7', text: 'Sign in' });
expect('node' in digest).toBe(false);
});

test('a text read keeps every OTHER cheap field (e.g. warning) while dropping the node', () => {
const digest = findView!(
{
ref: '@e7',
text: 'Sign in',
warning: 'recovered from a blocking dialog',
node: MATCHED_NODE,
},
'digest',
);
expect(digest).toEqual({
ref: '@e7',
text: 'Sign in',
warning: 'recovered from a blocking dialog',
});
});

test('find get-attrs digest compacts the node to semantic attributes only', () => {
const digest = findView!({ ref: '@e7', node: MATCHED_NODE }, 'digest');
expect(digest).toEqual({ ref: '@e7', node: COMPACT_NODE });
// The geometry/index/process plumbing (the token sink) is dropped from the node.
expect('rect' in (digest.node as Record<string, unknown>)).toBe(false);
expect('parentIndex' in (digest.node as Record<string, unknown>)).toBe(false);
});

test('an attrs read compacts the node but keeps every other cheap field (e.g. warning)', () => {
const digest = getView!({ ref: 'e7', warning: 'partial tree', node: MATCHED_NODE }, 'digest');
expect(digest).toEqual({ ref: 'e7', warning: 'partial tree', node: COMPACT_NODE });
});

// REGRESSION: `find` is registered command-wide, but `find fill/focus/type` return
// the underlying INTERACTION response (carrying cheap, agent-critical signals like
// `warning`/`message`), which has no verbose snapshot node. The conservative view
// must return such a node-less shape UNCHANGED — never allowlist-narrow it.
test('find fill/focus/type interaction responses pass through UNCHANGED (warning kept)', () => {
const fillResponse: DaemonResponseData = {
ref: 'e3',
text: 'hello',
message: 'Filled 5 chars',
warning: 'Recovered from a blocking system dialog',
};
const digest = findView!(fillResponse, 'digest');
expect(digest).toBe(fillResponse); // same reference — not narrowed at all
expect(digest).toEqual(fillResponse);
});

test('find exists/wait/click digests pass through the cheap actionable signals', () => {
// No verbose node → returned UNCHANGED (same reference).
const exists: DaemonResponseData = { found: true };
const wait: DaemonResponseData = { found: true, waitedMs: 320 };
const click: DaemonResponseData = { ref: '@e7', locator: 'text', query: 'Sign in', x: 60, y: 42 };
expect(findView!(exists, 'digest')).toBe(exists);
expect(findView!(wait, 'digest')).toBe(wait);
expect(findView!(click, 'digest')).toBe(click);
});

test('get text digest keeps selector + text and drops the node', () => {
const digest = getView!(
{ selector: 'text=Sign in', text: 'Sign in', node: MATCHED_NODE },
'digest',
);
expect(digest).toEqual({ selector: 'text=Sign in', text: 'Sign in' });
});

test('get attrs digest compacts the node under a ref target', () => {
const digest = getView!({ ref: 'e7', node: MATCHED_NODE }, 'digest');
expect(digest).toEqual({ ref: 'e7', node: COMPACT_NODE });
});

test('find/get default and full return today’s shape unchanged (same reference)', () => {
const data: DaemonResponseData = { ref: '@e7', text: 'Sign in', node: MATCHED_NODE };
expect(findView!(data, 'default')).toBe(data);
expect(findView!(data, 'full')).toBe(data);
expect(getView!(data, 'default')).toBe(data);
expect(getView!(data, 'full')).toBe(data);
});
58 changes: 58 additions & 0 deletions src/daemon/response-views.ts
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,65 @@ function screenshotView(data: DaemonResponseData, level: ResponseLevel): DaemonR
};
}

// The semantic attributes of a single matched node an agent reasons about. The
// verbose framing a digest drops — geometry (`rect`), tree indices
// (`index`/`parentIndex`/`depth`), and process/app plumbing
// (`pid`/`bundleId`/`appName`/`windowTitle`/`surface`/…) — is intentionally absent.
const SELECTOR_DIGEST_NODE_FIELDS = [
'role',
'type',
'subrole',
'label',
'value',
'identifier',
'enabled',
'selected',
'focused',
'hittable',
] as const;

function compactSelectorNode(node: SnapshotNode): Record<string, unknown> {
const compact: Record<string, unknown> = { ref: node.ref };
for (const field of SELECTOR_DIGEST_NODE_FIELDS) {
const value = node[field];
if (value !== undefined) compact[field] = value;
}
return compact;
}

/**
* Token-cheap digest shared by the `find` and `get` commands. The ONLY token
* sink in their results is the verbose matched `node`, which appears solely on a
* selector READ (text / attrs). The view is deliberately CONSERVATIVE: it acts
* only on a result that carries such a `node` and otherwise returns the data
* UNCHANGED — so the cheap exists/wait/click results AND the mutating
* `find fill` / `find focus` / `find type` interaction responses (which can
* carry agent-critical signals like `warning` / `message`) are never silently
* narrowed.
*
* • a text read drops the redundant `node` — the `text` IS the answer;
* • an attrs read compacts the `node` to its semantic attributes only;
*
* In both cases every OTHER (cheap) field is preserved verbatim. `default` and
* `full` return today's shape unchanged (nothing richer is computed yet).
*/
function selectorReadView(data: DaemonResponseData, level: ResponseLevel): DaemonResponseData {
if (level !== 'digest') return data;
const node = data.node;
if (!node || typeof node !== 'object') return data;
// A text read already carries the answer in `text`, so the node is redundant
// framing — drop only the node and keep every other (cheap) field.
if (typeof data.text === 'string') {
const { node: _node, ...rest } = data;
return rest;
}
// An attrs read: compact only the verbose node, keeping every other cheap field.
return { ...data, node: compactSelectorNode(node as SnapshotNode) };
}

export const RESPONSE_VIEWS: Record<string, ResponseView> = {
snapshot: snapshotView,
screenshot: screenshotView,
find: selectorReadView,
get: selectorReadView,
};
Loading