Skip to content

Commit 620303b

Browse files
committed
layout, tools after session creation insufficient
1 parent 6cd4a65 commit 620303b

File tree

12 files changed

+1550
-660
lines changed

12 files changed

+1550
-660
lines changed

browserbase/src/config.ts

+294-19
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,302 @@
1-
// Define the structure for configuration
1+
/**
2+
* Copyright (c) Microsoft Corporation.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
import fs from 'fs';
18+
import net from 'net';
19+
import os from 'os';
20+
import path from 'path';
21+
import { devices, type BrowserContextOptions, type LaunchOptions } from 'playwright';
22+
import { sanitizeForFilePath } from './tools/utils.js'; // Assuming this path is correct
23+
24+
// Define ToolCapability type
25+
export type ToolCapability = 'core' | 'vision' | string; // Example capabilities
26+
27+
// Define simpler intermediate types
28+
export type LaunchOptionsWithExtras = LaunchOptions & {
29+
assistantMode?: boolean;
30+
webSocketPort?: number;
31+
channel?: string; // Ensure channel is part of it
32+
};
33+
export type BrowserConfig = {
34+
browserName?: 'chromium' | 'firefox' | 'webkit';
35+
userDataDir?: string;
36+
launchOptions?: LaunchOptionsWithExtras;
37+
contextOptions?: BrowserContextOptions;
38+
cdpEndpoint?: string;
39+
};
40+
41+
// Define the main Config interface using BrowserConfig
242
export interface Config {
3-
browserbaseApiKey: string;
4-
browserbaseProjectId: string;
5-
// Add other configuration options here later if needed
43+
browserbaseApiKey?: string; // Make optional for easier merging
44+
browserbaseProjectId?: string; // Make optional for easier merging
45+
browser?: BrowserConfig;
46+
server?: {
47+
port?: number;
48+
host?: string;
49+
};
50+
capabilities?: ToolCapability[];
51+
vision?: boolean;
52+
outputDir?: string;
53+
// Tool-specific configurations
54+
tools?: {
55+
[toolName: string]: any; // Allow arbitrary tool-specific config
56+
browser_take_screenshot?: {
57+
omitBase64?: boolean;
58+
};
59+
};
660
}
761

8-
// Function to load and validate configuration (currently from environment variables)
9-
export function resolveConfig(): Config {
10-
const apiKey = process.env.BROWSERBASE_API_KEY;
11-
const projectId = process.env.BROWSERBASE_PROJECT_ID;
62+
// Define Command Line Options Structure
63+
export type CLIOptions = {
64+
browser?: string;
65+
capabilities?: string; // Renamed from 'caps'
66+
cdpEndpoint?: string;
67+
executablePath?: string;
68+
headless?: boolean;
69+
device?: string;
70+
userDataDir?: string;
71+
port?: number;
72+
host?: string;
73+
vision?: boolean;
74+
config?: string; // Path to config file
75+
};
1276

13-
if (!apiKey) {
14-
throw new Error("BROWSERBASE_API_KEY environment variable is required");
15-
}
16-
if (!projectId) {
17-
throw new Error("BROWSERBASE_PROJECT_ID environment variable is required");
77+
// Default Configuration Values
78+
const defaultConfig: Config = {
79+
browser: {
80+
browserName: 'chromium',
81+
userDataDir: os.tmpdir(),
82+
launchOptions: {
83+
channel: 'chrome',
84+
headless: os.platform() === 'linux' && !process.env.DISPLAY,
85+
assistantMode: true, // Default assistantMode
86+
},
87+
contextOptions: {
88+
viewport: null,
89+
},
90+
},
91+
};
92+
93+
// Resolve final configuration by merging defaults, file config, and CLI options
94+
export async function resolveConfig(cliOptions: CLIOptions): Promise<Config> {
95+
const fileConfig = await loadConfig(cliOptions.config);
96+
const cliConfig = await configFromCLIOptions(cliOptions);
97+
// Order: Defaults < File Config < CLI Overrides
98+
const mergedConfig = mergeConfig(defaultConfig, mergeConfig(fileConfig, cliConfig));
99+
100+
// --- Add Browserbase Env Vars ---
101+
// Ensure env vars are read *after* dotenv potentially runs (in index.ts)
102+
mergedConfig.browserbaseApiKey = process.env.BROWSERBASE_API_KEY;
103+
mergedConfig.browserbaseProjectId = process.env.BROWSERBASE_PROJECT_ID;
104+
// --------------------------------
105+
106+
// Basic validation for Browserbase keys
107+
if (!mergedConfig.browserbaseApiKey) {
108+
console.warn("Warning: BROWSERBASE_API_KEY environment variable not set.");
109+
}
110+
if (!mergedConfig.browserbaseProjectId) {
111+
console.warn("Warning: BROWSERBASE_PROJECT_ID environment variable not set.");
112+
}
113+
114+
return mergedConfig;
115+
}
116+
117+
// Create Config structure based on CLI options
118+
export async function configFromCLIOptions(cliOptions: CLIOptions): Promise<Config> {
119+
let browserName: 'chromium' | 'firefox' | 'webkit' = 'chromium'; // Default
120+
let channel: string | undefined = 'chrome'; // Default channel for chromium
121+
122+
switch (cliOptions.browser) {
123+
case 'chrome':
124+
case 'chrome-beta':
125+
case 'chrome-canary':
126+
case 'chrome-dev':
127+
case 'chromium':
128+
case 'msedge':
129+
case 'msedge-beta':
130+
case 'msedge-canary':
131+
case 'msedge-dev':
132+
browserName = 'chromium';
133+
channel = cliOptions.browser;
134+
break;
135+
case 'firefox':
136+
browserName = 'firefox';
137+
channel = undefined; // Firefox doesn't use channel
138+
break;
139+
case 'webkit':
140+
browserName = 'webkit';
141+
channel = undefined; // Webkit doesn't use channel
142+
break;
143+
// Keep default if browser option is invalid or missing
144+
}
145+
146+
// Use the specific LaunchOptionsWithExtras type here
147+
const launchOptions: LaunchOptionsWithExtras = {
148+
channel: browserName === 'chromium' ? channel : undefined,
149+
executablePath: cliOptions.executablePath,
150+
headless: cliOptions.headless,
151+
assistantMode: true, // Ensure assistantMode is included
152+
};
153+
154+
// Add WebSocket port only for Chromium as needed by assistantMode
155+
if (browserName === 'chromium') {
156+
(launchOptions as any).webSocketPort = await findFreePort();
157+
}
158+
159+
// Use the standard BrowserContextOptions type here
160+
const contextOptions: BrowserContextOptions | undefined = cliOptions.device ? devices[cliOptions.device] : undefined;
161+
162+
// Initialize browser config structure first using BrowserConfig type
163+
const browserConfig: BrowserConfig = {
164+
browserName,
165+
userDataDir: cliOptions.userDataDir ?? await createUserDataDir({ browserName, channel }),
166+
launchOptions: undefined, // Initialize as undefined
167+
contextOptions: undefined, // Initialize as undefined
168+
cdpEndpoint: cliOptions.cdpEndpoint,
169+
};
170+
171+
// Assign potentially undefined options
172+
browserConfig.launchOptions = launchOptions;
173+
browserConfig.contextOptions = contextOptions;
174+
175+
return {
176+
browser: browserConfig, // Use the structured object
177+
server: {
178+
port: cliOptions.port,
179+
host: cliOptions.host,
180+
},
181+
// Use renamed cliOptions.capabilities
182+
capabilities: cliOptions.capabilities?.split(',').map((c: string) => c.trim() as ToolCapability),
183+
vision: !!cliOptions.vision,
184+
};
185+
}
186+
187+
// Utility function to find a free network port
188+
async function findFreePort(): Promise<number> {
189+
return new Promise((resolve, reject) => {
190+
const server = net.createServer();
191+
server.unref(); // Prevent server from keeping Node.js process open
192+
server.on('error', reject);
193+
server.listen(0, () => {
194+
const address = server.address();
195+
const port = typeof address === 'string' ? parseInt(address.split(':')[1], 10) : address?.port;
196+
server.close(() => {
197+
if (port) {
198+
resolve(port);
199+
} else {
200+
reject(new Error('Unable to retrieve port from server address.'));
201+
}
202+
});
203+
});
204+
});
205+
}
206+
207+
// Load configuration from a JSON file
208+
async function loadConfig(configFile: string | undefined): Promise<Config> {
209+
if (!configFile) {
210+
return {}; // Return empty config if no file path provided
211+
}
212+
213+
try {
214+
const configContent = await fs.promises.readFile(configFile, 'utf8');
215+
return JSON.parse(configContent);
216+
} catch (error: any) {
217+
// Handle file not found gracefully, but throw for other errors
218+
if (error.code === 'ENOENT') {
219+
console.warn(`Config file not found: ${configFile}. Using defaults and CLI options.`);
220+
return {};
18221
}
222+
throw new Error(`Failed to load or parse config file: ${configFile}, ${error}`);
223+
}
224+
}
19225

20-
// Load config from environment variables or defaults
21-
const config: Config = {
22-
browserbaseApiKey: apiKey,
23-
browserbaseProjectId: projectId,
24-
};
226+
// Create a user data directory for the browser session
227+
async function createUserDataDir(options: { browserName: string, channel: string | undefined }): Promise<string> {
228+
let cacheDirectory: string;
229+
if (process.platform === 'linux')
230+
cacheDirectory = process.env.XDG_CACHE_HOME || path.join(os.homedir(), '.cache');
231+
else if (process.platform === 'darwin')
232+
cacheDirectory = path.join(os.homedir(), 'Library', 'Caches');
233+
else if (process.platform === 'win32')
234+
cacheDirectory = process.env.LOCALAPPDATA || path.join(os.homedir(), 'AppData', 'Local');
235+
else
236+
throw new Error('Unsupported platform: ' + process.platform);
237+
238+
const profileDirName = sanitizeForFilePath(`mcp-${options.channel ?? options.browserName}-profile`);
239+
const result = path.join(cacheDirectory, 'ms-playwright', profileDirName);
240+
await fs.promises.mkdir(result, { recursive: true });
241+
return result;
242+
}
243+
244+
// Create an output file path within the configured output directory
245+
export async function outputFile(config: Config, name: string): Promise<string> {
246+
const outputDir = config.outputDir ?? os.tmpdir();
247+
await fs.promises.mkdir(outputDir, { recursive: true });
248+
const sanitizedName = sanitizeForFilePath(name);
249+
return path.join(outputDir, sanitizedName);
250+
}
251+
252+
// Helper function to merge config objects, excluding undefined values
253+
function pickDefined<T extends object>(obj: T | undefined): Partial<T> {
254+
if (!obj) return {};
255+
return Object.fromEntries(
256+
Object.entries(obj).filter(([_, v]) => v !== undefined)
257+
) as Partial<T>;
258+
}
259+
260+
// Merge two configuration objects (overrides takes precedence)
261+
function mergeConfig(base: Config, overrides: Config): Config {
262+
// Use the simpler BrowserConfig type for merging browser options
263+
const browserLaunchOptions: LaunchOptionsWithExtras = {
264+
...pickDefined(base.browser?.launchOptions),
265+
...pickDefined(overrides.browser?.launchOptions),
266+
assistantMode: true, // Always ensure assistantMode is true
267+
};
268+
269+
// Remove channel if browser is not chromium
270+
if (overrides.browser?.browserName && overrides.browser.browserName !== 'chromium') {
271+
delete browserLaunchOptions.channel;
272+
}
273+
274+
// Use the simpler BrowserConfig type for merging browser options
275+
const browser: BrowserConfig = {
276+
...pickDefined(base.browser),
277+
...pickDefined(overrides.browser),
278+
launchOptions: browserLaunchOptions,
279+
contextOptions: {
280+
...pickDefined(base.browser?.contextOptions),
281+
...pickDefined(overrides.browser?.contextOptions),
282+
},
283+
};
284+
285+
// Merge tools config carefully
286+
const tools = {
287+
...pickDefined(base.tools),
288+
...pickDefined(overrides.tools),
289+
// Specific tool config merge if needed, e.g.:
290+
// browser_take_screenshot: {
291+
// ...pickDefined(base.tools?.browser_take_screenshot),
292+
// ...pickDefined(overrides.tools?.browser_take_screenshot),
293+
// }
294+
};
25295

26-
return config;
296+
return {
297+
...pickDefined(base),
298+
...pickDefined(overrides),
299+
browser,
300+
tools,
301+
};
27302
}

0 commit comments

Comments
 (0)