Skip to content

Commit 771d602

Browse files
committed
Feat: Adding file through Notion
Signed-off-by: Daishan Peng <[email protected]>
1 parent 50bdc85 commit 771d602

File tree

12 files changed

+613
-163
lines changed

12 files changed

+613
-163
lines changed

actions/knowledge/filehelper.ts

+18
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import fs from 'fs';
44
import path from 'path';
5+
import { FileDetail } from '@/actions/knowledge/util';
56

67
export async function getFileOrFolderSizeInKB(
78
filePath: string
@@ -31,3 +32,20 @@ export async function getFileOrFolderSizeInKB(
3132
export async function getBasename(filePath: string): Promise<string> {
3233
return path.basename(filePath);
3334
}
35+
36+
export async function importFiles(
37+
files: string[],
38+
type: 'local' | 'notion'
39+
): Promise<Map<string, FileDetail>> {
40+
const result: Map<string, FileDetail> = new Map();
41+
42+
for (const file of files) {
43+
result.set(file, {
44+
fileName: path.basename(file),
45+
size: await getFileOrFolderSizeInKB(file),
46+
type: type,
47+
});
48+
}
49+
50+
return result;
51+
}

actions/knowledge/knowledge.ts

+62-47
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ import path from 'path';
44
import { exec } from 'child_process';
55
import { promisify } from 'util';
66
import { KNOWLEDGE_DIR } from '@/config/env';
7+
import { FileDetail } from '@/actions/knowledge/util';
8+
import { getFileOrFolderSizeInKB } from '@/actions/knowledge/filehelper';
79

810
const execPromise = promisify(exec);
911

@@ -39,97 +41,110 @@ export async function deleteDataset(datasetID: string): Promise<void> {
3941

4042
export async function firstIngestion(
4143
scriptId: string,
42-
files: string[]
44+
files: Map<string, FileDetail>
4345
): Promise<boolean> {
4446
const dir = path.join(KNOWLEDGE_DIR(), 'script_data', scriptId, 'data');
45-
return !fs.existsSync(dir) && files.length > 0;
47+
return !fs.existsSync(dir) && files.size > 0;
4648
}
4749

48-
export async function ensureFilesIngested(
49-
files: string[],
50-
updateOnly: boolean,
50+
export async function ensureFiles(
51+
files: Map<string, FileDetail>,
5152
scriptId: string,
52-
token: string
53-
): Promise<string> {
53+
updateOnly: boolean
54+
): Promise<void> {
5455
const dir = path.join(KNOWLEDGE_DIR(), 'script_data', scriptId, 'data');
55-
if (!fs.existsSync(dir) && files.length > 0) {
56+
if (!fs.existsSync(dir) && files.size > 0) {
5657
fs.mkdirSync(dir, { recursive: true });
57-
} else if (!fs.existsSync(dir) && files.length === 0) {
58-
// if there are no files in the directory and no dropped files, do nothing
59-
return '';
6058
}
6159

62-
for (const file of files) {
63-
const filePath = path.join(dir, path.basename(file));
64-
try {
65-
if (!fs.existsSync(filePath)) {
66-
await fs.promises.copyFile(file, filePath);
60+
for (const file of Array.from(files.entries())) {
61+
if (!fs.existsSync(path.join(dir, file[1].type))) {
62+
fs.mkdirSync(path.join(dir, file[1].type), { recursive: true });
63+
}
64+
const filePath = path.join(dir, file[1].type, path.basename(file[0]));
65+
if (!fs.existsSync(filePath)) {
66+
if (file[1].type === 'local') {
67+
await fs.promises.copyFile(file[0], filePath);
68+
} else if (file[1].type === 'notion') {
69+
if (
70+
fs.existsSync(filePath) &&
71+
fs.lstatSync(filePath).isSymbolicLink()
72+
) {
73+
continue;
74+
}
75+
await fs.promises.symlink(file[0], filePath);
6776
}
68-
} catch (error) {
69-
return `Error copying file ${file}: ${error}`;
7077
}
7178
}
7279

7380
if (!updateOnly) {
74-
try {
75-
const filesInDir = await fs.promises.readdir(dir);
81+
for (const type of ['local', 'notion']) {
82+
if (!fs.existsSync(path.join(dir, type))) {
83+
continue;
84+
}
85+
const filesInDir = await fs.promises.readdir(path.join(dir, type));
7686
for (const fileName of filesInDir) {
77-
const fullPath = path.join(dir, fileName);
78-
const fileInDroppedFiles = files.find(
87+
const fullPath = path.join(dir, type, fileName);
88+
const fileInDroppedFiles = Array.from(files.keys()).find(
7989
(file) => path.basename(file) === path.basename(fullPath)
8090
);
81-
if (!fileInDroppedFiles || !files || files.length === 0) {
91+
if (!fileInDroppedFiles || !files || files.size === 0) {
8292
await fs.promises.unlink(fullPath);
8393
}
8494
}
85-
} catch (error) {
86-
return `Error deleting files: ${error}`;
8795
}
8896
}
8997

90-
try {
91-
await runKnowledgeIngest(
92-
scriptId,
93-
path.join(KNOWLEDGE_DIR(), 'script_data', scriptId),
94-
token
95-
);
96-
} catch (error) {
97-
return `Error running knowledge ingestion: ${error}`;
98-
}
99-
100-
return '';
98+
return;
10199
}
102100

103-
async function runKnowledgeIngest(
101+
export async function runKnowledgeIngest(
104102
id: string,
105-
knowledgePath: string,
106103
token: string
107104
): Promise<void> {
105+
if (!fs.existsSync(path.join(KNOWLEDGE_DIR(), 'script_data', id, 'data'))) {
106+
return;
107+
}
108108
await execPromise(
109109
`${process.env.KNOWLEDGE_BIN} ingest --prune --dataset ${id} ./data`,
110110
{
111-
cwd: knowledgePath,
111+
cwd: path.join(KNOWLEDGE_DIR(), 'script_data', id),
112112
env: { ...process.env, GPTSCRIPT_GATEWAY_API_KEY: token },
113113
}
114114
);
115115

116116
return;
117117
}
118118

119-
export async function getFiles(scriptId: string): Promise<string[]> {
119+
export async function getFiles(
120+
scriptId: string
121+
): Promise<Map<string, FileDetail>> {
122+
const result = new Map<string, FileDetail>();
120123
const dir = path.join(KNOWLEDGE_DIR(), 'script_data', scriptId, 'data');
121124
if (!fs.existsSync(dir)) {
122-
return [];
125+
return result;
126+
}
127+
for (const type of ['local', 'notion']) {
128+
if (!fs.existsSync(path.join(dir, type))) {
129+
continue;
130+
}
131+
const files = await fs.promises.readdir(path.join(dir, type));
132+
for (const file of files) {
133+
let filePath = path.join(dir, type, file);
134+
if (fs.lstatSync(filePath).isSymbolicLink()) {
135+
filePath = await fs.promises.readlink(filePath);
136+
}
137+
result.set(filePath, {
138+
type: type as 'local' | 'notion',
139+
fileName: file,
140+
size: await getFileOrFolderSizeInKB(path.join(dir, type, file)),
141+
});
142+
}
123143
}
124-
const files = await fs.promises.readdir(dir);
125-
return files.map((file) => path.join(dir, file));
144+
return result;
126145
}
127146

128147
export async function datasetExists(scriptId: string): Promise<boolean> {
129148
const dir = path.join(KNOWLEDGE_DIR(), 'script_data', scriptId, 'data');
130149
return fs.existsSync(dir);
131150
}
132-
133-
export async function getKnowledgeBinaryPath(): Promise<string> {
134-
return process.env.KNOWLEDGE_BIN || 'knowledge';
135-
}

actions/knowledge/notion.ts

+90
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
'use server';
2+
3+
import fs from 'fs';
4+
import path from 'path';
5+
import { WORKSPACE_DIR } from '@/config/env';
6+
import {
7+
GPTScript,
8+
PromptFrame,
9+
Run,
10+
RunEventType,
11+
} from '@gptscript-ai/gptscript';
12+
13+
export async function isNotionConfigured() {
14+
return fs.existsSync(
15+
path.join(
16+
WORKSPACE_DIR(),
17+
'knowledge',
18+
'integrations',
19+
'notion',
20+
'metadata.json'
21+
)
22+
);
23+
}
24+
25+
function readFilesRecursive(dir: string): string[] {
26+
let results: string[] = [];
27+
28+
const list = fs.readdirSync(dir);
29+
list.forEach((file) => {
30+
if (file === 'metadata.json') return;
31+
const filePath = path.join(dir, file);
32+
const stat = fs.statSync(filePath);
33+
34+
if (stat && stat.isDirectory()) {
35+
// Recursively read the directory
36+
results = results.concat(readFilesRecursive(filePath));
37+
} else {
38+
// Add the file path to the results
39+
results.push(filePath);
40+
}
41+
});
42+
43+
return results;
44+
}
45+
46+
export async function getNotionFiles(): Promise<
47+
Map<string, { url: string; fileName: string }>
48+
> {
49+
const dir = path.join(WORKSPACE_DIR(), 'knowledge', 'integrations', 'notion');
50+
const filePaths = readFilesRecursive(dir);
51+
const metadataFromFiles = fs.readFileSync(path.join(dir, 'metadata.json'));
52+
const metadata = JSON.parse(metadataFromFiles.toString());
53+
const result = new Map<string, { url: string; fileName: string }>();
54+
for (const filePath of filePaths) {
55+
const pageID = path.basename(path.dirname(filePath));
56+
result.set(filePath, {
57+
url: metadata[pageID].url,
58+
fileName: path.basename(filePath),
59+
});
60+
}
61+
62+
return result;
63+
}
64+
65+
export async function runNotionSync(authed: boolean): Promise<void> {
66+
const gptscript = new GPTScript({
67+
DefaultModelProvider: 'github.com/gptscript-ai/gateway-provider',
68+
});
69+
70+
const runningTool = await gptscript.run(
71+
'github.com/gptscript-ai/knowledge-notion-integration',
72+
{
73+
prompt: true,
74+
}
75+
);
76+
if (!authed) {
77+
const handlePromptEvent = (runningTool: Run) => {
78+
return new Promise<string>((resolve) => {
79+
runningTool.on(RunEventType.Prompt, (data: PromptFrame) => {
80+
resolve(data.id);
81+
});
82+
});
83+
};
84+
85+
const id = await handlePromptEvent(runningTool);
86+
await gptscript.promptResponse({ id, responses: {} });
87+
}
88+
await runningTool.text();
89+
return;
90+
}

actions/knowledge/util.ts

+6
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
export interface FileDetail {
2+
fileName: string;
3+
size: number;
4+
type: 'local' | 'notion';
5+
}
6+
17
export function gatewayTool(): string {
28
return 'github.com/gptscript-ai/knowledge/[email protected]';
39
}

0 commit comments

Comments
 (0)