Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat: Adding file through Notion and onedrive #401

Merged
merged 12 commits into from
Sep 13, 2024
27 changes: 25 additions & 2 deletions actions/knowledge/filehelper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

import fs from 'fs';
import path from 'path';
import { WORKSPACE_DIR } from '@/config/env';
import { FileDetail } from '@/model/knowledge';

export async function getFileOrFolderSizeInKB(
filePath: string
Expand All @@ -28,6 +30,27 @@ export async function getFileOrFolderSizeInKB(
return 0;
}

export async function getBasename(filePath: string): Promise<string> {
return path.basename(filePath);
export async function importFiles(files: string[]) {
const result: Map<string, FileDetail> = new Map();

for (const file of files) {
// check if filepath lives in notion or onedrive integration folders
// The file should live in a folder with the pattern ${WORKSPACE_DIR}/knowledge/integrations/${type}/${DocumentId}/${fileName}
const baseDir = path.dirname(path.dirname(file));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are we calling path.dirname twice? What is the path we're retrieving? Whatever the answer, it could be a good comment.

let type = path.basename(baseDir);
if (
type !== 'notion' &&
type !== 'onedrive' &&
baseDir !== path.join(WORKSPACE_DIR(), 'knowledge', 'integrations', type)
) {
type = 'local';
}
result.set(file, {
fileName: path.basename(file),
size: await getFileOrFolderSizeInKB(file),
type: type as any,
});
}

return result;
}
110 changes: 62 additions & 48 deletions actions/knowledge/knowledge.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ import path from 'path';
import { exec } from 'child_process';
import { promisify } from 'util';
import { KNOWLEDGE_DIR } from '@/config/env';
import { getFileOrFolderSizeInKB } from '@/actions/knowledge/filehelper';
import { FileDetail } from '@/model/knowledge';

const execPromise = promisify(exec);

Expand Down Expand Up @@ -41,77 +43,74 @@ export async function deleteDataset(datasetID: string): Promise<void> {

export async function firstIngestion(
scriptId: string,
files: string[]
files: Map<string, FileDetail>
): Promise<boolean> {
const dir = path.join(KNOWLEDGE_DIR(), 'script_data', scriptId, 'data');
return !fs.existsSync(dir) && files.length > 0;
return !fs.existsSync(dir) && files.size > 0;
}

export async function ensureFilesIngested(
files: string[],
updateOnly: boolean,
export async function ensureFiles(
files: Map<string, FileDetail>,
scriptId: string,
token: string
): Promise<string> {
updateOnly: boolean
): Promise<void> {
const dir = path.join(KNOWLEDGE_DIR(), 'script_data', scriptId, 'data');
if (!fs.existsSync(dir) && files.length > 0) {
if (!fs.existsSync(dir) && files.size > 0) {
fs.mkdirSync(dir, { recursive: true });
} else if (!fs.existsSync(dir) && files.length === 0) {
// if there are no files in the directory and no dropped files, do nothing
return '';
}

for (const file of files) {
const filePath = path.join(dir, path.basename(file));
try {
if (!fs.existsSync(filePath)) {
await fs.promises.copyFile(file, filePath);
for (const [location, file] of Array.from(files.entries())) {
if (!fs.existsSync(path.join(dir, file.type))) {
fs.mkdirSync(path.join(dir, file.type), { recursive: true });
}
const filePath = path.join(dir, file.type, path.basename(location));
if (!fs.existsSync(filePath)) {
if (file.type === 'local') {
await fs.promises.copyFile(location, filePath);
} else if (file.type === 'notion' || file.type === 'onedrive') {
if (
fs.existsSync(filePath) &&
fs.lstatSync(filePath).isSymbolicLink()
) {
continue;
}
await fs.promises.symlink(location, filePath);
}
} catch (error) {
return `Error copying file ${file}: ${error}`;
}
}

if (!updateOnly) {
try {
const filesInDir = await fs.promises.readdir(dir);
for (const type of ['local', 'notion', 'onedrive']) {
if (!fs.existsSync(path.join(dir, type))) {
continue;
}
const filesInDir = await fs.promises.readdir(path.join(dir, type));
for (const fileName of filesInDir) {
const fullPath = path.join(dir, fileName);
const fileInDroppedFiles = files.find(
const fullPath = path.join(dir, type, fileName);
const fileInDroppedFiles = Array.from(files.keys()).find(
(file) => path.basename(file) === path.basename(fullPath)
);
if (!fileInDroppedFiles || !files || files.length === 0) {
if (!fileInDroppedFiles || !files || files.size === 0) {
await fs.promises.unlink(fullPath);
}
}
} catch (error) {
return `Error deleting files: ${error}`;
}
}

try {
await runKnowledgeIngest(
scriptId,
path.join(KNOWLEDGE_DIR(), 'script_data', scriptId),
token
);
} catch (error) {
console.error(error);
return `Error running knowledge ingestion: ${error}`;
}

return '';
return;
}

async function runKnowledgeIngest(
export async function runKnowledgeIngest(
id: string,
knowledgePath: string,
token: string
): Promise<void> {
if (!fs.existsSync(path.join(KNOWLEDGE_DIR(), 'script_data', id, 'data'))) {
return;
}
const { stdout, stderr } = await execPromise(
`${process.env.KNOWLEDGE_BIN} ingest --prune --dataset ${id} ./data`,
{
cwd: knowledgePath,
cwd: path.join(KNOWLEDGE_DIR(), 'script_data', id),
env: { ...process.env, GPTSCRIPT_GATEWAY_API_KEY: token },
}
);
Expand All @@ -120,20 +119,35 @@ async function runKnowledgeIngest(
return;
}

export async function getFiles(scriptId: string): Promise<string[]> {
export async function getFiles(
scriptId: string
): Promise<Map<string, FileDetail>> {
const result = new Map<string, FileDetail>();
const dir = path.join(KNOWLEDGE_DIR(), 'script_data', scriptId, 'data');
if (!fs.existsSync(dir)) {
return [];
return result;
}
const files = await fs.promises.readdir(dir);
return files.map((file) => path.join(dir, file));
for (const type of ['local', 'notion', 'onedrive']) {
if (!fs.existsSync(path.join(dir, type))) {
continue;
}
const files = await fs.promises.readdir(path.join(dir, type));
for (const file of files) {
let filePath = path.join(dir, type, file);
if (fs.lstatSync(filePath).isSymbolicLink()) {
filePath = await fs.promises.readlink(filePath);
}
result.set(filePath, {
type: type as any,
fileName: file,
size: await getFileOrFolderSizeInKB(path.join(dir, type, file)),
});
}
}
return result;
}

export async function datasetExists(scriptId: string): Promise<boolean> {
const dir = path.join(KNOWLEDGE_DIR(), 'script_data', scriptId, 'data');
return fs.existsSync(dir);
}

export async function getKnowledgeBinaryPath(): Promise<string> {
return process.env.KNOWLEDGE_BIN || 'knowledge';
}
38 changes: 38 additions & 0 deletions actions/knowledge/notion.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
'use server';

import fs from 'fs';
import path from 'path';
import { WORKSPACE_DIR } from '@/config/env';
import { runSyncTool } from '@/actions/knowledge/tool';

export async function isNotionConfigured() {
return fs.existsSync(
path.join(
WORKSPACE_DIR(),
'knowledge',
'integrations',
'notion',
'metadata.json'
)
);
}

export async function getNotionFiles() {
const dir = path.join(WORKSPACE_DIR(), 'knowledge', 'integrations', 'notion');
const metadataFromFiles = fs.readFileSync(path.join(dir, 'metadata.json'));
const metadata = JSON.parse(metadataFromFiles.toString());
const result = new Map<string, { url: string; fileName: string }>();
for (const pageID in metadata) {
const filePath = path.join(dir, pageID, metadata[pageID].filename);
result.set(filePath, {
url: metadata[pageID].url,
fileName: path.basename(filePath),
});
}

return result;
}

export async function runNotionSync(authed: boolean): Promise<void> {
return runSyncTool(authed, 'notion');
}
77 changes: 77 additions & 0 deletions actions/knowledge/onedrive.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
'use server';
import fs from 'fs';
import path from 'path';
import { WORKSPACE_DIR } from '@/config/env';
import { runSyncTool } from '@/actions/knowledge/tool';

export async function isOneDriveConfigured() {
return fs.existsSync(
path.join(
WORKSPACE_DIR(),
'knowledge',
'integrations',
'onedrive',
'metadata.json'
)
);
}

export async function getOneDriveFiles(): Promise<
Map<string, { url: string; fileName: string; displayName: string }>
> {
const dir = path.join(
WORKSPACE_DIR(),
'knowledge',
'integrations',
'onedrive'
);
const metadataFromFile = fs.readFileSync(path.join(dir, 'metadata.json'));
const metadata = JSON.parse(metadataFromFile.toString());
const result = new Map<
string,
{ url: string; fileName: string; displayName: string }
>();
for (const documentID in metadata) {
result.set(path.join(dir, documentID, metadata[documentID].fileName), {
url: metadata[documentID].url,
fileName: metadata[documentID].fileName,
displayName: metadata[documentID].displayName,
});
}
return result;
}

export async function syncSharedLink(link: string): Promise<void> {
const dir = path.join(
WORKSPACE_DIR(),
'knowledge',
'integrations',
'onedrive'
);
const externalLinkFile = path.join(dir, 'externalLinks.json');
if (!fs.existsSync(externalLinkFile)) {
fs.writeFileSync(externalLinkFile, '{}');
}

const externalLink = JSON.parse(fs.readFileSync(externalLinkFile).toString());
externalLink[link] = 'true';
fs.writeFileSync(externalLinkFile, JSON.stringify(externalLink));

await runSyncTool(true, 'onedrive');
return;
}

export async function clearOneDriveFiles(): Promise<void> {
const dir = path.join(
WORKSPACE_DIR(),
'knowledge',
'integrations',
'onedrive'
);
const externalLinkFile = path.join(dir, 'externalLinks.json');
fs.rmSync(externalLinkFile, { recursive: true, force: true });
}

export async function runOneDriveSync(authed: boolean): Promise<void> {
return runSyncTool(authed, 'onedrive');
}
69 changes: 69 additions & 0 deletions actions/knowledge/tool.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
'use server';

import {
GPTScript,
PromptFrame,
Run,
RunEventType,
} from '@gptscript-ai/gptscript';
import path from 'path';
import { WORKSPACE_DIR } from '@/config/env';
import fs from 'fs';

export async function runSyncTool(
authed: boolean,
tool: 'notion' | 'onedrive'
): Promise<void> {
const gptscript = new GPTScript({
DefaultModelProvider: 'github.com/gptscript-ai/gateway-provider',
});

let toolUrl = '';
if (tool === 'notion') {
toolUrl = 'github.com/gptscript-ai/knowledge-notion-integration';
} else if (tool === 'onedrive') {
toolUrl = 'github.com/gptscript-ai/knowledge-onedrive-integration';
}
const runningTool = await gptscript.run(toolUrl, {
prompt: true,
});
if (!authed) {
const handlePromptEvent = (runningTool: Run) => {
return new Promise<string>((resolve) => {
runningTool.on(RunEventType.Prompt, (data: PromptFrame) => {
resolve(data.id);
});
});
};

const id = await handlePromptEvent(runningTool);
await gptscript.promptResponse({ id, responses: {} });
}
await runningTool.text();
return;
}

/**
* syncFiles syncs all files only when they are selected
* todo: we can stop syncing once file is no longer used by any other script
*/
export async function syncFiles(
selectedFiles: string[],
type: 'notion' | 'onedrive'
): Promise<void> {
const dir = path.join(WORKSPACE_DIR(), 'knowledge', 'integrations', type);
const metadataFromFiles = fs.readFileSync(path.join(dir, 'metadata.json'));
const metadata = JSON.parse(metadataFromFiles.toString());
for (const file of selectedFiles) {
const baseDir = path.dirname(path.dirname(file));
if (baseDir === dir) {
const documentID = path.basename(path.dirname(file));
const detail = metadata[documentID];
detail.sync = true;
metadata[documentID] = detail;
}
}
fs.writeFileSync(path.join(dir, 'metadata.json'), JSON.stringify(metadata));
await runSyncTool(true, type);
return;
}
Loading
Loading