@@ -3,18 +3,20 @@ import Extractor from "./extractor";
3
3
import {
4
4
IContentMetadata ,
5
5
IExtractor ,
6
- IExtractionPolicy ,
6
+ IExtractionGraph ,
7
7
IIndex ,
8
8
INamespace ,
9
9
ITask ,
10
- IAddExtractorPolicyResponse ,
10
+ IAddExtractorGraphResponse ,
11
11
IDocument ,
12
12
ISearchIndexResponse ,
13
13
IBaseContentMetadata ,
14
14
ISchema ,
15
15
IMtlsConfig ,
16
16
IContent ,
17
17
IExtractResponse ,
18
+ IExtractionPolicy ,
19
+ IExtractedMetadata ,
18
20
} from "./types" ;
19
21
import { v4 as uuidv4 } from "uuid" ;
20
22
import CryptoJS from "crypto-js" ;
@@ -25,18 +27,18 @@ class IndexifyClient {
25
27
public serviceUrl : string ;
26
28
private client : AxiosInstance ;
27
29
public namespace : string ;
28
- public extractionPolicies : IExtractionPolicy [ ] ;
30
+ public extractionGraphs : IExtractionGraph [ ] ;
29
31
30
32
constructor (
31
33
serviceUrl : string = DEFAULT_SERVICE_URL ,
32
34
namespace : string = "default" ,
33
35
// optional mtls config
34
- extractionPolicies : IExtractionPolicy [ ] ,
36
+ extractionGraphs : IExtractionGraph [ ] ,
35
37
httpsAgent ?: any
36
38
) {
37
39
this . serviceUrl = serviceUrl ;
38
40
this . namespace = namespace ;
39
- this . extractionPolicies = extractionPolicies ;
41
+ this . extractionGraphs = extractionGraphs ;
40
42
41
43
this . client = axios . create ( {
42
44
baseURL : `${ serviceUrl } /namespaces/${ namespace } ` ,
@@ -60,7 +62,7 @@ class IndexifyClient {
60
62
return new IndexifyClient (
61
63
serviceUrl ,
62
64
namespace ,
63
- response . data . namespace . extraction_policies . map (
65
+ response . data . namespace . extraction_graphs . map (
64
66
( item : {
65
67
id : string ;
66
68
name : string ;
@@ -79,7 +81,7 @@ class IndexifyClient {
79
81
content_source : item . content_source ,
80
82
} ;
81
83
}
82
- ) as IExtractionPolicy [ ] ,
84
+ ) as IExtractionGraph [ ] ,
83
85
IndexifyClient . getHttpsAgent ( { mtlsConfig } )
84
86
) ;
85
87
}
@@ -166,26 +168,26 @@ class IndexifyClient {
166
168
}
167
169
168
170
static async createNamespace ( {
169
- namespace ,
170
- extraction_policies ,
171
+ name ,
172
+ extractionGraphs ,
171
173
labels,
172
174
mtlsConfig,
173
175
} : {
174
- namespace : string ;
175
- extraction_policies ?: IExtractionPolicy [ ] ;
176
+ name : string ;
177
+ extractionGraphs ?: IExtractionGraph [ ] ;
176
178
labels ?: Record < string , string > ;
177
179
mtlsConfig ?: IMtlsConfig ;
178
180
} ) {
179
181
await axios . post (
180
182
`${ DEFAULT_SERVICE_URL } /namespaces` ,
181
183
{
182
- name : namespace ,
183
- extraction_policies : extraction_policies ?? [ ] ,
184
+ name : name ,
185
+ extraction_graphs : extractionGraphs ?? [ ] ,
184
186
labels : labels ?? { } ,
185
187
} ,
186
188
{ httpsAgent : IndexifyClient . getHttpsAgent ( { mtlsConfig } ) }
187
189
) ;
188
- const client = await IndexifyClient . createClient ( { namespace } ) ;
190
+ const client = await IndexifyClient . createClient ( { namespace : name } ) ;
189
191
return client ;
190
192
}
191
193
@@ -215,36 +217,44 @@ class IndexifyClient {
215
217
return resp . data [ "results" ] ;
216
218
}
217
219
218
- async addExtractionPolicy (
219
- extractionPolicy : IExtractionPolicy
220
- ) : Promise < IAddExtractorPolicyResponse > {
221
- const resp = await this . client . post ( "extraction_policies" , {
222
- extractor : extractionPolicy . extractor ,
223
- name : extractionPolicy . name ,
224
- input_params : extractionPolicy . input_params ,
225
- filters_eq : extractionPolicy . labels_eq ,
226
- content_source : extractionPolicy . content_source ?? "ingestion" ,
220
+ async createExtractionGraph (
221
+ name : string ,
222
+ extractionPolicies : IExtractionPolicy | IExtractionPolicy [ ]
223
+ ) : Promise < IAddExtractorGraphResponse > {
224
+ const policiesArray = Array . isArray ( extractionPolicies )
225
+ ? extractionPolicies
226
+ : [ extractionPolicies ] ;
227
+
228
+ const resp = await this . client . post ( "extraction_graphs" , {
229
+ name,
230
+ extraction_policies : policiesArray ,
227
231
} ) ;
228
232
229
233
// update this.extractor_bindings
230
- await this . getExtractionPolicies ( ) ;
234
+ await this . getExtractionGraphs ( ) ;
231
235
232
236
return resp . data ;
233
237
}
234
238
235
- async getContent (
236
- parent_id ?: string ,
237
- labels_eq ?: string
238
- ) : Promise < IContentMetadata [ ] > {
239
+ async getExtractedContent ( {
240
+ parent_id,
241
+ source,
242
+ labels_eq,
243
+ } : {
244
+ parent_id ?: string ;
245
+ source ?: string ;
246
+ labels_eq ?: string ;
247
+ } = { } ) : Promise < IContentMetadata [ ] > {
239
248
const resp = await this . client . get ( "content" , {
240
- params : { parent_id, labels_eq } ,
249
+ params : { parent_id, labels_eq, source } ,
241
250
} ) ;
242
251
return resp . data . content_list . map ( ( content : IBaseContentMetadata ) => {
243
252
return this . baseContentToContentMetadata ( content ) ;
244
253
} ) ;
245
254
}
246
255
247
256
async addDocuments (
257
+ extractionGraphNames : string | string [ ] ,
248
258
documents :
249
259
| IDocument
250
260
| string
@@ -285,14 +295,26 @@ class IndexifyClient {
285
295
) ;
286
296
}
287
297
288
- await this . client . post ( "add_texts" , { documents : newDocuments } ) ;
298
+ const extractionGraphNamesArray = Array . isArray ( extractionGraphNames )
299
+ ? extractionGraphNames
300
+ : [ extractionGraphNames ] ;
301
+
302
+ await this . client . post ( "add_texts" , {
303
+ documents : newDocuments ,
304
+ extraction_graph_names : extractionGraphNamesArray ,
305
+ } ) ;
289
306
}
290
307
291
308
async getContentMetadata ( id : string ) : Promise < IContentMetadata > {
292
309
const resp = await this . client . get ( `content/${ id } ` ) ;
293
310
return this . baseContentToContentMetadata ( resp . data . content_metadata ) ;
294
311
}
295
312
313
+ async getStructuredMetadata ( id : string ) : Promise < IExtractedMetadata [ ] > {
314
+ const resp = await this . client . get ( `content/${ id } /metadata` ) ;
315
+ return resp . data . metadata ;
316
+ }
317
+
296
318
async getContentTree ( id : string ) : Promise < IContentMetadata [ ] > {
297
319
const resp = await this . client . get ( `content/${ id } /content-tree` ) ;
298
320
return resp . data . content_tree_metadata ;
@@ -307,9 +329,9 @@ class IndexifyClient {
307
329
}
308
330
}
309
331
310
- async getTasks ( extraction_policy ?: string ) : Promise < ITask [ ] > {
332
+ async getTasks ( extraction_graph ?: string ) : Promise < ITask [ ] > {
311
333
const resp = await this . client . get ( "tasks" , {
312
- params : { extraction_policy } ,
334
+ params : { extraction_graph } ,
313
335
} ) ;
314
336
return resp . data . tasks ;
315
337
}
@@ -319,41 +341,79 @@ class IndexifyClient {
319
341
return resp . data . schemas ;
320
342
}
321
343
322
- async uploadFile ( fileInput : string | Blob ) : Promise < any > {
344
+ async uploadFile (
345
+ extractionGraphNames : string | string [ ] ,
346
+ fileInput : string | Blob ,
347
+ labels : Record < string , any > = { } ,
348
+ id ?: string
349
+ ) : Promise < string > {
323
350
function isBlob ( input : any ) : input is Blob {
324
351
return input instanceof Blob ;
325
352
}
326
353
354
+ const extractionGraphNamesArray = Array . isArray ( extractionGraphNames )
355
+ ? extractionGraphNames
356
+ : [ extractionGraphNames ] ;
357
+
358
+ const params = new URLSearchParams ( {
359
+ extraction_graph_names : extractionGraphNamesArray . join ( "," ) ,
360
+ ...( id ? { id : id } : { } ) ,
361
+ } ) ;
362
+
327
363
if ( typeof window === "undefined" ) {
328
364
// node
329
365
if ( typeof fileInput !== "string" ) {
330
366
throw Error ( "Expected string" ) ;
331
367
}
332
- const FormData = require ( "form-data" ) ;
368
+
333
369
const fs = require ( "fs" ) ;
370
+
371
+ // Create form
372
+ const FormData = require ( "form-data" ) ;
334
373
const formData = new FormData ( ) ;
335
- formData . append ( "file" , fs . createReadStream ( fileInput as string ) ) ;
336
- await this . client . post ( "upload_file" , formData , {
374
+ formData . append ( "file" , fs . createReadStream ( fileInput as string ) ) ; //stream
375
+
376
+ // Append labels to the form data
377
+ Object . keys ( labels ) . forEach ( ( key ) => {
378
+ formData . append ( key , labels [ key ] ) ;
379
+ } ) ;
380
+
381
+ // Upload File
382
+ const res = await this . client . post ( "upload_file" , formData , {
337
383
headers : {
338
384
...formData . getHeaders ( ) ,
339
385
} ,
386
+ params,
340
387
} ) ;
388
+ return res . data . content_id
341
389
} else {
342
390
// browser
343
391
if ( ! isBlob ( fileInput ) ) {
344
392
throw Error ( "Expected blob" ) ;
345
393
}
394
+
395
+ // Create form
346
396
const formData = new FormData ( ) ;
347
- formData . append ( "file" , fileInput ) ;
348
- await this . client . post ( "/upload_file" , formData ) ;
397
+ formData . append ( "file" , fileInput ) ; //blob
398
+
399
+ // Append labels to the form data
400
+ Object . keys ( labels ) . forEach ( ( key ) => {
401
+ formData . append ( key , labels [ key ] ) ;
402
+ } ) ;
403
+
404
+ // Upload File
405
+ const res = await this . client . post ( "/upload_file" , formData , {
406
+ params
407
+ } ) ;
408
+ return res . data . content_id
349
409
}
350
410
}
351
411
352
- async getExtractionPolicies ( ) : Promise < IExtractionPolicy [ ] > {
412
+ async getExtractionGraphs ( ) : Promise < IExtractionGraph [ ] > {
353
413
const resp = await this . client . get ( "" ) ;
354
- const policies = resp . data . namespace ?. extraction_policies ?? [ ] ;
355
- this . extractionPolicies = policies ;
356
- return policies ;
414
+ const extractionGraphs = resp . data . namespace ?. extraction_graphs ?? [ ] ;
415
+ this . extractionGraphs = extractionGraphs ;
416
+ return extractionGraphs ;
357
417
}
358
418
359
419
async extract ( {
@@ -385,12 +445,17 @@ class IndexifyClient {
385
445
url : string ,
386
446
mime_type : string ,
387
447
labels : Record < string , string > ,
448
+ extractionGraphNames : string | string [ ] ,
388
449
id ?: string
389
450
) : Promise < AxiosResponse > {
451
+ const extractionGraphNamesArray = Array . isArray ( extractionGraphNames )
452
+ ? extractionGraphNames
453
+ : [ extractionGraphNames ] ;
390
454
const resp = await this . client . post ( "ingest_remote_file" , {
391
455
url,
392
456
mime_type,
393
457
labels,
458
+ extraction_graph_names : extractionGraphNamesArray ,
394
459
id,
395
460
} ) ;
396
461
return resp ;
0 commit comments