Skip to content

Commit 9c8093a

Browse files
committed
feat: add internal parser for Jupyter Notebook JSON files
1 parent e966e92 commit 9c8093a

File tree

2 files changed

+122
-18
lines changed

2 files changed

+122
-18
lines changed

files-to-prompt.test.ts

+25-1
Original file line numberDiff line numberDiff line change
@@ -432,7 +432,7 @@ describe('files-to-prompt.ts', () => {
432432
expect(stdoutOutput).toContain(ipynbFileContents);
433433
});
434434

435-
test('should include .ipynb files verbatim when --nbconvert is set to invalid command', async () => {
435+
test('should include .ipynb files verbatim when --nbconvert is set to invalid command', async () => {
436436
const ipynbFilePath = path.join(testDir, 'notebook.ipynb');
437437
fs.writeFileSync(ipynbFilePath, ipynbFileContents);
438438

@@ -466,4 +466,28 @@ describe('files-to-prompt.ts', () => {
466466
expect(stdoutOutput).toContain('```python');
467467
expect(stdoutOutput).toContain('print(\'Hello, World!\')');
468468
});
469+
470+
test('should convert .ipynb files to ASCII when --nbconvert --format asciidoc is passed using internal converter', async () => {
471+
const ipynbFilePath = path.join(testDir, 'notebook.ipynb');
472+
fs.writeFileSync(ipynbFilePath, ipynbFileContents);
473+
474+
const args = [testDir, '--nbconvert', 'internal', '--format', 'asciidoc'];
475+
await runScript(args);
476+
expect(stderrOutput).toBeEmpty();
477+
expect(stdoutOutput).toContain(ipynbFilePath);
478+
expect(stdoutOutput).toContain('+*In[1]:*+');
479+
expect(stdoutOutput).toContain('print(\'Hello, World!\')');
480+
});
481+
482+
test('should convert .ipynb files to Markdown when --nbconvert --format markdown is passed using internal converter', async () => {
483+
const ipynbFilePath = path.join(testDir, 'notebook.ipynb');
484+
fs.writeFileSync(ipynbFilePath, ipynbFileContents);
485+
486+
const args = [testDir, '--nbconvert', 'internal', '--format', 'markdown'];
487+
await runScript(args);
488+
expect(stderrOutput).toBeEmpty();
489+
expect(stdoutOutput).toContain(ipynbFilePath);
490+
expect(stdoutOutput).toContain('```python');
491+
expect(stdoutOutput).toContain('print(\'Hello, World!\')');
492+
});
469493
});

files-to-prompt.ts

+97-17
Original file line numberDiff line numberDiff line change
@@ -164,18 +164,25 @@ export async function isBinaryFile(filePath: string, chunkSize: number = 8192):
164164
*/
165165
async function processFile(filePath: string, config: ProcessingConfig): Promise<void> {
166166
try {
167-
if (await isBinaryFile(filePath)) {
168-
error(`Warning: Skipping binary file ${filePath}`);
169-
} else {
170-
if (config.nbconvertName && filePath.endsWith('.ipynb')) {
171-
await convertIPythonNotebook(filePath, config);
167+
if (config.nbconvertName && filePath.endsWith('.ipynb')) {
168+
// Handle Jupyter Notebook files first
169+
if (config.nbconvertName === 'internal') {
170+
// internal conversion requested
171+
await convertIPythonNotebookInternal(filePath, config);
172172
} else {
173-
const fileContents = fs.readFileSync(filePath, 'utf8');
174-
output(filePath);
175-
output('---');
176-
output(fileContents);
177-
output('---');
173+
// external conversion requested
174+
await convertIPythonNotebookExternal(filePath, config);
178175
}
176+
} else if (await isBinaryFile(filePath)) {
177+
// Skip binary files
178+
error(`Warning: Skipping binary file ${filePath}`);
179+
} else {
180+
// Put everything else verbatim on the output stream
181+
const fileContents = fs.readFileSync(filePath, 'utf8');
182+
output(filePath);
183+
output('---');
184+
output(fileContents);
185+
output('---');
179186
}
180187
} catch (err) {
181188
// This should not happen unless e.g. files get deleted while this tool runs
@@ -186,7 +193,78 @@ async function processFile(filePath: string, config: ProcessingConfig): Promise<
186193
}
187194
}
188195

189-
async function convertIPythonNotebook(filePath: string, config: ProcessingConfig): Promise<void> {
196+
async function convertIPythonNotebookInternal(filePath: string, config: ProcessingConfig): Promise<void> {
197+
try {
198+
const ipynbContents = await fs.promises.readFile(filePath, 'utf8');
199+
const ipynbData = JSON.parse(ipynbContents);
200+
201+
let convertedContent = '';
202+
if (config.convertFormat === 'asciidoc') {
203+
convertedContent = convertToAsciidoc(ipynbData);
204+
} else {
205+
convertedContent = convertToMarkdown(ipynbData);
206+
}
207+
208+
output(`${filePath}`);
209+
output('---');
210+
output(convertedContent);
211+
output('---');
212+
} catch (err) {
213+
error(`Error converting .ipynb file ${filePath}: ${err}`);
214+
}
215+
}
216+
217+
function convertToAsciidoc(ipynbData: any): string {
218+
let asciidocContent = '';
219+
220+
for (const cell of ipynbData.cells) {
221+
switch (cell.cell_type) {
222+
case 'code':
223+
asciidocContent += `+*In[${cell.execution_count}]:*+\n[source, ipython3]\n----\n${cell.source.join('')}\n----\n\n`;
224+
for (const output of cell.outputs) {
225+
if (output.data['text/plain']) {
226+
asciidocContent += `+*Out[${cell.execution_count}]:*+\n----\n${output.data['text/plain']}\n----\n\n`;
227+
}
228+
// TODO: handle images
229+
// if (output.data['image/png']) {
230+
// asciidocContent += `+*Out[${cell.execution_count}]:*+\n[PNG Image]\n\n`;
231+
// }
232+
}
233+
break;
234+
case 'markdown':
235+
asciidocContent += `${cell.source.join('')}\n\n`;
236+
break;
237+
}
238+
}
239+
240+
return asciidocContent;
241+
}
242+
243+
function convertToMarkdown(ipynbData: any): string {
244+
let markdownContent = '';
245+
246+
for (const cell of ipynbData.cells) {
247+
switch (cell.cell_type) {
248+
case 'code':
249+
markdownContent += `\`\`\`python\n${cell.source.join('')}\n\`\`\`\n\n`;
250+
for (const output of cell.outputs) {
251+
if (output.data['text/plain']) {
252+
markdownContent += `\`\`\`\n${output.data['text/plain']}\n\`\`\`\n\n`;
253+
}
254+
// TODO: handle images
255+
// if (output.data['image/png']) {
256+
// }
257+
}
258+
break;
259+
case 'markdown':
260+
markdownContent += `${cell.source.join('')}\n\n`;
261+
break;
262+
}
263+
}
264+
return markdownContent;
265+
}
266+
267+
async function convertIPythonNotebookExternal(filePath: string, config: ProcessingConfig): Promise<void> {
190268
const tempDir = await fs.promises.mkdtemp(path.join(os.tmpdir(), 'files-to-prompt-'));
191269
const tempFilePath = path.join(tempDir, path.basename(filePath));
192270

@@ -473,14 +551,16 @@ export async function main( args: string[] ): Promise<void> {
473551
if (i + 1 < args.length) {
474552
config.nbconvertName = args[++i];
475553
} else {
476-
error('Error: --nbconvert option requires the filename or full path of the tool');
554+
error('Error: --nbconvert option requires the filename or full path of the tool or \'internal\'');
477555
return;
478556
}
479-
try {
480-
execSync(`${config.nbconvertName} --version`, { stdio: 'ignore' });
481-
} catch (err) {
482-
error(`Warning: ${config.nbconvertName} command not found`);
483-
config.nbconvertName = '';
557+
if (!(config.nbconvertName === 'internal')) {
558+
try {
559+
execSync(`${config.nbconvertName} --version`, { stdio: 'ignore' });
560+
} catch (err) {
561+
error(`Warning: ${config.nbconvertName} command not found`);
562+
config.nbconvertName = '';
563+
}
484564
}
485565
break;
486566
case '--format':

0 commit comments

Comments
 (0)