@@ -4,6 +4,8 @@ import path from 'path';
4
4
import { exec } from 'child_process' ;
5
5
import { promisify } from 'util' ;
6
6
import { KNOWLEDGE_DIR } from '@/config/env' ;
7
+ import { FileDetail } from '@/actions/knowledge/util' ;
8
+ import { getFileOrFolderSizeInKB } from '@/actions/knowledge/filehelper' ;
7
9
8
10
const execPromise = promisify ( exec ) ;
9
11
@@ -39,97 +41,110 @@ export async function deleteDataset(datasetID: string): Promise<void> {
39
41
40
42
export async function firstIngestion (
41
43
scriptId : string ,
42
- files : string [ ]
44
+ files : Map < string , FileDetail >
43
45
) : Promise < boolean > {
44
46
const dir = path . join ( KNOWLEDGE_DIR ( ) , 'script_data' , scriptId , 'data' ) ;
45
- return ! fs . existsSync ( dir ) && files . length > 0 ;
47
+ return ! fs . existsSync ( dir ) && files . size > 0 ;
46
48
}
47
49
48
- export async function ensureFilesIngested (
49
- files : string [ ] ,
50
- updateOnly : boolean ,
50
+ export async function ensureFiles (
51
+ files : Map < string , FileDetail > ,
51
52
scriptId : string ,
52
- token : string
53
- ) : Promise < string > {
53
+ updateOnly : boolean
54
+ ) : Promise < void > {
54
55
const dir = path . join ( KNOWLEDGE_DIR ( ) , 'script_data' , scriptId , 'data' ) ;
55
- if ( ! fs . existsSync ( dir ) && files . length > 0 ) {
56
+ if ( ! fs . existsSync ( dir ) && files . size > 0 ) {
56
57
fs . mkdirSync ( dir , { recursive : true } ) ;
57
- } else if ( ! fs . existsSync ( dir ) && files . length === 0 ) {
58
- // if there are no files in the directory and no dropped files, do nothing
59
- return '' ;
60
58
}
61
59
62
- for ( const file of files ) {
63
- const filePath = path . join ( dir , path . basename ( file ) ) ;
64
- try {
65
- if ( ! fs . existsSync ( filePath ) ) {
66
- await fs . promises . copyFile ( file , filePath ) ;
60
+ for ( const file of Array . from ( files . entries ( ) ) ) {
61
+ if ( ! fs . existsSync ( path . join ( dir , file [ 1 ] . type ) ) ) {
62
+ fs . mkdirSync ( path . join ( dir , file [ 1 ] . type ) , { recursive : true } ) ;
63
+ }
64
+ const filePath = path . join ( dir , file [ 1 ] . type , path . basename ( file [ 0 ] ) ) ;
65
+ if ( ! fs . existsSync ( filePath ) ) {
66
+ if ( file [ 1 ] . type === 'local' ) {
67
+ await fs . promises . copyFile ( file [ 0 ] , filePath ) ;
68
+ } else if ( file [ 1 ] . type === 'notion' ) {
69
+ if (
70
+ fs . existsSync ( filePath ) &&
71
+ fs . lstatSync ( filePath ) . isSymbolicLink ( )
72
+ ) {
73
+ continue ;
74
+ }
75
+ await fs . promises . symlink ( file [ 0 ] , filePath ) ;
67
76
}
68
- } catch ( error ) {
69
- return `Error copying file ${ file } : ${ error } ` ;
70
77
}
71
78
}
72
79
73
80
if ( ! updateOnly ) {
74
- try {
75
- const filesInDir = await fs . promises . readdir ( dir ) ;
81
+ for ( const type of [ 'local' , 'notion' ] ) {
82
+ if ( ! fs . existsSync ( path . join ( dir , type ) ) ) {
83
+ continue ;
84
+ }
85
+ const filesInDir = await fs . promises . readdir ( path . join ( dir , type ) ) ;
76
86
for ( const fileName of filesInDir ) {
77
- const fullPath = path . join ( dir , fileName ) ;
78
- const fileInDroppedFiles = files . find (
87
+ const fullPath = path . join ( dir , type , fileName ) ;
88
+ const fileInDroppedFiles = Array . from ( files . keys ( ) ) . find (
79
89
( file ) => path . basename ( file ) === path . basename ( fullPath )
80
90
) ;
81
- if ( ! fileInDroppedFiles || ! files || files . length === 0 ) {
91
+ if ( ! fileInDroppedFiles || ! files || files . size === 0 ) {
82
92
await fs . promises . unlink ( fullPath ) ;
83
93
}
84
94
}
85
- } catch ( error ) {
86
- return `Error deleting files: ${ error } ` ;
87
95
}
88
96
}
89
97
90
- try {
91
- await runKnowledgeIngest (
92
- scriptId ,
93
- path . join ( KNOWLEDGE_DIR ( ) , 'script_data' , scriptId ) ,
94
- token
95
- ) ;
96
- } catch ( error ) {
97
- return `Error running knowledge ingestion: ${ error } ` ;
98
- }
99
-
100
- return '' ;
98
+ return ;
101
99
}
102
100
103
- async function runKnowledgeIngest (
101
+ export async function runKnowledgeIngest (
104
102
id : string ,
105
- knowledgePath : string ,
106
103
token : string
107
104
) : Promise < void > {
105
+ if ( ! fs . existsSync ( path . join ( KNOWLEDGE_DIR ( ) , 'script_data' , id , 'data' ) ) ) {
106
+ return ;
107
+ }
108
108
await execPromise (
109
109
`${ process . env . KNOWLEDGE_BIN } ingest --prune --dataset ${ id } ./data` ,
110
110
{
111
- cwd : knowledgePath ,
111
+ cwd : path . join ( KNOWLEDGE_DIR ( ) , 'script_data' , id ) ,
112
112
env : { ...process . env , GPTSCRIPT_GATEWAY_API_KEY : token } ,
113
113
}
114
114
) ;
115
115
116
116
return ;
117
117
}
118
118
119
- export async function getFiles ( scriptId : string ) : Promise < string [ ] > {
119
+ export async function getFiles (
120
+ scriptId : string
121
+ ) : Promise < Map < string , FileDetail > > {
122
+ const result = new Map < string , FileDetail > ( ) ;
120
123
const dir = path . join ( KNOWLEDGE_DIR ( ) , 'script_data' , scriptId , 'data' ) ;
121
124
if ( ! fs . existsSync ( dir ) ) {
122
- return [ ] ;
125
+ return result ;
126
+ }
127
+ for ( const type of [ 'local' , 'notion' ] ) {
128
+ if ( ! fs . existsSync ( path . join ( dir , type ) ) ) {
129
+ continue ;
130
+ }
131
+ const files = await fs . promises . readdir ( path . join ( dir , type ) ) ;
132
+ for ( const file of files ) {
133
+ let filePath = path . join ( dir , type , file ) ;
134
+ if ( fs . lstatSync ( filePath ) . isSymbolicLink ( ) ) {
135
+ filePath = await fs . promises . readlink ( filePath ) ;
136
+ }
137
+ result . set ( filePath , {
138
+ type : type as 'local' | 'notion' ,
139
+ fileName : file ,
140
+ size : await getFileOrFolderSizeInKB ( path . join ( dir , type , file ) ) ,
141
+ } ) ;
142
+ }
123
143
}
124
- const files = await fs . promises . readdir ( dir ) ;
125
- return files . map ( ( file ) => path . join ( dir , file ) ) ;
144
+ return result ;
126
145
}
127
146
128
147
export async function datasetExists ( scriptId : string ) : Promise < boolean > {
129
148
const dir = path . join ( KNOWLEDGE_DIR ( ) , 'script_data' , scriptId , 'data' ) ;
130
149
return fs . existsSync ( dir ) ;
131
150
}
132
-
133
- export async function getKnowledgeBinaryPath ( ) : Promise < string > {
134
- return process . env . KNOWLEDGE_BIN || 'knowledge' ;
135
- }
0 commit comments