Skip to content

Commit 6cd4a65

Browse files
committed
fix type, press_key
1 parent 8633f55 commit 6cd4a65

23 files changed

+1256
-2787
lines changed

browserbase/README.md

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -22,27 +22,3 @@ to provide browser automation tools.
2222
```bash
2323
node dist/index.js
2424
```
25-
26-
The server communicates over stdio according to the Model Context Protocol.
27-
28-
## Structure
29-
30-
* `src/`: TypeScript source code
31-
* `index.ts`: Main entry point, env checks, shutdown
32-
* `server.ts`: MCP Server setup and request routing
33-
* `sessionManager.ts`: Handles Browserbase session creation/management
34-
* `tools/`: Tool definitions and implementations
35-
* `resources/`: Resource (screenshot) handling
36-
* `types.ts`: Shared TypeScript types
37-
* `dist/`: Compiled JavaScript output
38-
* `tests/`: Placeholder for tests
39-
* `utils/`: Placeholder for utility scripts
40-
* `Dockerfile`: For building a Docker image
41-
* Configuration files (`.json`, `.ts`, `.mjs`, `.npmignore`)
42-
43-
## TODO
44-
45-
* Implement true `ref`-based interaction logic for click, type, drag, hover, select_option.
46-
* Implement element-specific screenshots using `ref`.
47-
* Add more standard Playwright MCP tools (tabs, navigation, etc.).
48-
* Add tests.

browserbase/package-lock.json

Lines changed: 27 additions & 13 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

browserbase/package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,9 @@
2222
"dependencies": {
2323
"@browserbasehq/sdk": "^2.0.0",
2424
"@modelcontextprotocol/sdk": "^1.10.2",
25+
"playwright-core": "^1.45.3",
2526
"puppeteer-core": "^23.9.0",
26-
"playwright-core": "^1.45.3"
27+
"zod": "^3.24.3"
2728
},
2829
"devDependencies": {
2930
"shx": "^0.3.4",

browserbase/src/config.ts

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
// Define the structure for configuration
2+
export interface Config {
3+
browserbaseApiKey: string;
4+
browserbaseProjectId: string;
5+
// Add other configuration options here later if needed
6+
}
7+
8+
// Function to load and validate configuration (currently from environment variables)
9+
export function resolveConfig(): Config {
10+
const apiKey = process.env.BROWSERBASE_API_KEY;
11+
const projectId = process.env.BROWSERBASE_PROJECT_ID;
12+
13+
if (!apiKey) {
14+
throw new Error("BROWSERBASE_API_KEY environment variable is required");
15+
}
16+
if (!projectId) {
17+
throw new Error("BROWSERBASE_PROJECT_ID environment variable is required");
18+
}
19+
20+
// Load config from environment variables or defaults
21+
const config: Config = {
22+
browserbaseApiKey: apiKey,
23+
browserbaseProjectId: projectId,
24+
};
25+
26+
return config;
27+
}

browserbase/src/context.ts

Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,208 @@
1+
import type { Server } from "@modelcontextprotocol/sdk/server/index.js";
2+
import type { BrowserSession } from "./sessionManager.js";
3+
import {
4+
getSession,
5+
defaultSessionId,
6+
closeAllSessions,
7+
} from "./sessionManager.js";
8+
import type { Tool, ToolContext } from "./tools/tool.js";
9+
import type { Config } from "./config.js";
10+
import {
11+
Resource,
12+
CallToolResult,
13+
TextContent,
14+
ImageContent,
15+
ResourceListChangedNotificationSchema,
16+
} from "@modelcontextprotocol/sdk/types.js";
17+
import { z } from "zod";
18+
19+
/**
20+
* Manages the context for tool execution within a specific Browserbase session.
21+
*
22+
* Role Analogy:
23+
* This class holds session-specific state (like latest snapshots, resources)
24+
* and provides access to the active page/browser for the current session.
25+
* This is somewhat analogous to the role of the `Tab` class in the Playwright
26+
* MCP example, which encapsulates state for a single page.
27+
*
28+
* Differences from Playwright MCP Context Example:
29+
* - Browser Lifecycle: This Context does NOT manage the browser launch/
30+
* connection lifecycle; that is handled by `sessionManager` (sessionManager.ts) interacting
31+
* with the Browserbase API.
32+
* - Tab Management: This Context focuses on a single active session determined
33+
* by `currentSessionId`, unlike the Playwright example which explicitly
34+
* manages an array of `Tab` objects.
35+
* - Execution Model: This Context uses a `run`/`CallToolResult` pattern. Its `run`
36+
* method calls `tool.run`, which performs the action and returns the final
37+
* result structure. The Playwright example uses a `handle`/`ToolActionResult`
38+
* pattern where the Context interprets the result to perform actions.
39+
*/
40+
export class Context {
41+
private server: Server;
42+
private config: Config;
43+
public currentSessionId: string = defaultSessionId;
44+
private screenshots = new Map<string, string>();
45+
private latestSnapshots = new Map<string, any>();
46+
private screenshotResources = new Map<string, { format: string; bytes: string; uri: string }>();
47+
48+
constructor(server: Server, config: Config) {
49+
this.server = server;
50+
this.config = config;
51+
this.screenshotResources = new Map();
52+
}
53+
54+
// --- Snapshot State Handling ---
55+
56+
getLatestSnapshot(sessionId: string): any | undefined {
57+
return this.latestSnapshots.get(sessionId);
58+
}
59+
60+
setLatestSnapshot(sessionId: string, snapshot: any): void {
61+
this.latestSnapshots.set(sessionId, snapshot);
62+
}
63+
64+
clearLatestSnapshot(sessionId: string): void {
65+
this.latestSnapshots.delete(sessionId);
66+
}
67+
68+
// --- Resource Handling Methods ---
69+
70+
listResources(): Resource[] {
71+
const resources: Resource[] = [];
72+
for (const [name, data] of this.screenshotResources.entries()) {
73+
resources.push({
74+
uri: data.uri,
75+
mimeType: data.format,
76+
name: `Screenshot: ${name}`,
77+
});
78+
}
79+
return resources;
80+
}
81+
82+
readResource(uri: string): { uri: string; mimeType: string; blob: string } {
83+
const prefix = "mcp://screenshots/";
84+
if (uri.startsWith(prefix)) {
85+
const name = uri.split("/").pop() || "";
86+
const data = this.screenshotResources.get(name);
87+
if (data) {
88+
return {
89+
uri,
90+
mimeType: data.format,
91+
blob: data.bytes,
92+
};
93+
} else {
94+
throw new Error(`Screenshot resource not found: ${name}`);
95+
}
96+
} else {
97+
throw new Error(`Resource URI format not recognized: ${uri}`);
98+
}
99+
}
100+
101+
addScreenshot(name: string, format: "png" | "jpeg", bytes: string): void {
102+
const uri = `mcp://screenshots/${name}`;
103+
this.screenshotResources.set(name, { format, bytes, uri });
104+
this.server.notification({
105+
method: "resources/list_changed",
106+
params: {}
107+
});
108+
}
109+
110+
// --- Session and Tool Execution ---
111+
112+
public async getActivePage(): Promise<BrowserSession["page"] | null> {
113+
const session = await getSession(this.currentSessionId, this.config);
114+
if (!session || session.page.isClosed()) {
115+
return null;
116+
}
117+
return session.page;
118+
}
119+
120+
public async getActiveBrowser(): Promise<BrowserSession["browser"] | null> {
121+
const session = await getSession(this.currentSessionId, this.config);
122+
if (!session || !session.browser.isConnected()) {
123+
return null;
124+
}
125+
return session.browser;
126+
}
127+
128+
async run(tool: Tool<any>, args: any): Promise<CallToolResult> {
129+
let initialPage: BrowserSession["page"] | null = null;
130+
let initialBrowser: BrowserSession["browser"] | null = null;
131+
if (tool.schema.name !== "browserbase_create_session") {
132+
initialPage = await this.getActivePage();
133+
initialBrowser = await this.getActiveBrowser();
134+
if (!initialPage || !initialBrowser) {
135+
throw new Error(
136+
`Failed to get valid page/browser for session ${this.currentSessionId} required by tool ${tool.schema.name}`
137+
);
138+
}
139+
}
140+
const validatedArgs = args; // Simplified validation
141+
const toolContext: ToolContext = {
142+
page: initialPage!,
143+
browser: initialBrowser!,
144+
server: this.server,
145+
sessionId: this.currentSessionId,
146+
config: this.config,
147+
context: this,
148+
};
149+
let result: CallToolResult;
150+
try {
151+
const validatedArgs = tool.schema.inputSchema.parse(args);
152+
result = await tool.run(toolContext, validatedArgs);
153+
154+
// Append context info if successful and not snapshot itself
155+
if (!result.isError && tool.schema.name !== "browserbase_snapshot") {
156+
const currentPage = await this.getActivePage();
157+
let currentStateText = `\n\nCurrent Session: ${this.currentSessionId}`;
158+
if (currentPage && !currentPage.isClosed()) {
159+
try {
160+
currentStateText += `\nURL: ${currentPage.url()}\nTitle: ${await currentPage.title()}`;
161+
} catch (stateError) {
162+
currentStateText += `\nURL/Title: [Error: ${stateError}]`;
163+
}
164+
} else {
165+
currentStateText += `\nURL/Title: [Page unavailable]`;
166+
}
167+
let textContent = result.content?.find((c) => c.type === "text") as
168+
| TextContent
169+
| undefined;
170+
if (textContent) {
171+
textContent.text += currentStateText;
172+
} else {
173+
if (!result.content) result.content = [];
174+
result.content.push({ type: "text", text: currentStateText });
175+
}
176+
}
177+
return result;
178+
} catch (error) {
179+
if (error instanceof z.ZodError) {
180+
const errorMsg = error.issues.map((issue) => issue.message).join(", ");
181+
return {
182+
content: [{ type: "text", text: `Error: ${errorMsg}` }],
183+
isError: true,
184+
};
185+
}
186+
return {
187+
content: [{ type: "text", text: `Error: ${error instanceof Error ? error.message : String(error)}` }],
188+
isError: true,
189+
};
190+
}
191+
}
192+
193+
async close(): Promise<void> {
194+
const page = await this.getActivePage();
195+
if (page && !page.isClosed()) {
196+
try {
197+
await page.close();
198+
} catch (e) {
199+
console.error(`Error closing page: ${e}`);
200+
}
201+
} else {
202+
console.warn(
203+
`No active page found for session ${this.currentSessionId} to close.`
204+
);
205+
}
206+
await closeAllSessions();
207+
}
208+
}

0 commit comments

Comments
 (0)