Skip to content
This repository was archived by the owner on Mar 28, 2025. It is now read-only.

Commit 0509b9c

Browse files
authored
Update client for extraction graphs update (#30)
* update functions for extraction graphs * bring back getExtractedMetadata * update getExtractedContent * update method names + fix upload test * update basecontentmetadata with extraction_graph_names * update tests * version bump
1 parent 9483430 commit 0509b9c

File tree

5 files changed

+271
-127
lines changed

5 files changed

+271
-127
lines changed

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "getindexify",
3-
"version": "0.0.37",
3+
"version": "0.0.38",
44
"description": "This is the TypeScript client for interacting with the Indexify service.",
55
"main": "./dist/index.js",
66
"module": "./dist/index.mjs",

src/client.ts

Lines changed: 107 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,20 @@ import Extractor from "./extractor";
33
import {
44
IContentMetadata,
55
IExtractor,
6-
IExtractionPolicy,
6+
IExtractionGraph,
77
IIndex,
88
INamespace,
99
ITask,
10-
IAddExtractorPolicyResponse,
10+
IAddExtractorGraphResponse,
1111
IDocument,
1212
ISearchIndexResponse,
1313
IBaseContentMetadata,
1414
ISchema,
1515
IMtlsConfig,
1616
IContent,
1717
IExtractResponse,
18+
IExtractionPolicy,
19+
IExtractedMetadata,
1820
} from "./types";
1921
import { v4 as uuidv4 } from "uuid";
2022
import CryptoJS from "crypto-js";
@@ -25,18 +27,18 @@ class IndexifyClient {
2527
public serviceUrl: string;
2628
private client: AxiosInstance;
2729
public namespace: string;
28-
public extractionPolicies: IExtractionPolicy[];
30+
public extractionGraphs: IExtractionGraph[];
2931

3032
constructor(
3133
serviceUrl: string = DEFAULT_SERVICE_URL,
3234
namespace: string = "default",
3335
// optional mtls config
34-
extractionPolicies: IExtractionPolicy[],
36+
extractionGraphs: IExtractionGraph[],
3537
httpsAgent?: any
3638
) {
3739
this.serviceUrl = serviceUrl;
3840
this.namespace = namespace;
39-
this.extractionPolicies = extractionPolicies;
41+
this.extractionGraphs = extractionGraphs;
4042

4143
this.client = axios.create({
4244
baseURL: `${serviceUrl}/namespaces/${namespace}`,
@@ -60,7 +62,7 @@ class IndexifyClient {
6062
return new IndexifyClient(
6163
serviceUrl,
6264
namespace,
63-
response.data.namespace.extraction_policies.map(
65+
response.data.namespace.extraction_graphs.map(
6466
(item: {
6567
id: string;
6668
name: string;
@@ -79,7 +81,7 @@ class IndexifyClient {
7981
content_source: item.content_source,
8082
};
8183
}
82-
) as IExtractionPolicy[],
84+
) as IExtractionGraph[],
8385
IndexifyClient.getHttpsAgent({ mtlsConfig })
8486
);
8587
}
@@ -166,26 +168,26 @@ class IndexifyClient {
166168
}
167169

168170
static async createNamespace({
169-
namespace,
170-
extraction_policies,
171+
name,
172+
extractionGraphs,
171173
labels,
172174
mtlsConfig,
173175
}: {
174-
namespace: string;
175-
extraction_policies?: IExtractionPolicy[];
176+
name: string;
177+
extractionGraphs?: IExtractionGraph[];
176178
labels?: Record<string, string>;
177179
mtlsConfig?: IMtlsConfig;
178180
}) {
179181
await axios.post(
180182
`${DEFAULT_SERVICE_URL}/namespaces`,
181183
{
182-
name: namespace,
183-
extraction_policies: extraction_policies ?? [],
184+
name: name,
185+
extraction_graphs: extractionGraphs ?? [],
184186
labels: labels ?? {},
185187
},
186188
{ httpsAgent: IndexifyClient.getHttpsAgent({ mtlsConfig }) }
187189
);
188-
const client = await IndexifyClient.createClient({ namespace });
190+
const client = await IndexifyClient.createClient({ namespace: name });
189191
return client;
190192
}
191193

@@ -215,36 +217,44 @@ class IndexifyClient {
215217
return resp.data["results"];
216218
}
217219

218-
async addExtractionPolicy(
219-
extractionPolicy: IExtractionPolicy
220-
): Promise<IAddExtractorPolicyResponse> {
221-
const resp = await this.client.post("extraction_policies", {
222-
extractor: extractionPolicy.extractor,
223-
name: extractionPolicy.name,
224-
input_params: extractionPolicy.input_params,
225-
filters_eq: extractionPolicy.labels_eq,
226-
content_source: extractionPolicy.content_source ?? "ingestion",
220+
async createExtractionGraph(
221+
name: string,
222+
extractionPolicies: IExtractionPolicy | IExtractionPolicy[]
223+
): Promise<IAddExtractorGraphResponse> {
224+
const policiesArray = Array.isArray(extractionPolicies)
225+
? extractionPolicies
226+
: [extractionPolicies];
227+
228+
const resp = await this.client.post("extraction_graphs", {
229+
name,
230+
extraction_policies: policiesArray,
227231
});
228232

229233
// update this.extractor_bindings
230-
await this.getExtractionPolicies();
234+
await this.getExtractionGraphs();
231235

232236
return resp.data;
233237
}
234238

235-
async getContent(
236-
parent_id?: string,
237-
labels_eq?: string
238-
): Promise<IContentMetadata[]> {
239+
async getExtractedContent({
240+
parent_id,
241+
source,
242+
labels_eq,
243+
}: {
244+
parent_id?: string;
245+
source?: string;
246+
labels_eq?: string;
247+
} = {}): Promise<IContentMetadata[]> {
239248
const resp = await this.client.get("content", {
240-
params: { parent_id, labels_eq },
249+
params: { parent_id, labels_eq, source },
241250
});
242251
return resp.data.content_list.map((content: IBaseContentMetadata) => {
243252
return this.baseContentToContentMetadata(content);
244253
});
245254
}
246255

247256
async addDocuments(
257+
extractionGraphNames: string | string[],
248258
documents:
249259
| IDocument
250260
| string
@@ -285,14 +295,26 @@ class IndexifyClient {
285295
);
286296
}
287297

288-
await this.client.post("add_texts", { documents: newDocuments });
298+
const extractionGraphNamesArray = Array.isArray(extractionGraphNames)
299+
? extractionGraphNames
300+
: [extractionGraphNames];
301+
302+
await this.client.post("add_texts", {
303+
documents: newDocuments,
304+
extraction_graph_names: extractionGraphNamesArray,
305+
});
289306
}
290307

291308
async getContentMetadata(id: string): Promise<IContentMetadata> {
292309
const resp = await this.client.get(`content/${id}`);
293310
return this.baseContentToContentMetadata(resp.data.content_metadata);
294311
}
295312

313+
async getStructuredMetadata(id: string): Promise<IExtractedMetadata[]> {
314+
const resp = await this.client.get(`content/${id}/metadata`);
315+
return resp.data.metadata;
316+
}
317+
296318
async getContentTree(id: string): Promise<IContentMetadata[]> {
297319
const resp = await this.client.get(`content/${id}/content-tree`);
298320
return resp.data.content_tree_metadata;
@@ -307,9 +329,9 @@ class IndexifyClient {
307329
}
308330
}
309331

310-
async getTasks(extraction_policy?: string): Promise<ITask[]> {
332+
async getTasks(extraction_graph?: string): Promise<ITask[]> {
311333
const resp = await this.client.get("tasks", {
312-
params: { extraction_policy },
334+
params: { extraction_graph },
313335
});
314336
return resp.data.tasks;
315337
}
@@ -319,41 +341,79 @@ class IndexifyClient {
319341
return resp.data.schemas;
320342
}
321343

322-
async uploadFile(fileInput: string | Blob): Promise<any> {
344+
async uploadFile(
345+
extractionGraphNames: string | string[],
346+
fileInput: string | Blob,
347+
labels: Record<string, any> = {},
348+
id?: string
349+
): Promise<string> {
323350
function isBlob(input: any): input is Blob {
324351
return input instanceof Blob;
325352
}
326353

354+
const extractionGraphNamesArray = Array.isArray(extractionGraphNames)
355+
? extractionGraphNames
356+
: [extractionGraphNames];
357+
358+
const params = new URLSearchParams({
359+
extraction_graph_names: extractionGraphNamesArray.join(","),
360+
...(id ? { id: id } : {}),
361+
});
362+
327363
if (typeof window === "undefined") {
328364
// node
329365
if (typeof fileInput !== "string") {
330366
throw Error("Expected string");
331367
}
332-
const FormData = require("form-data");
368+
333369
const fs = require("fs");
370+
371+
// Create form
372+
const FormData = require("form-data");
334373
const formData = new FormData();
335-
formData.append("file", fs.createReadStream(fileInput as string));
336-
await this.client.post("upload_file", formData, {
374+
formData.append("file", fs.createReadStream(fileInput as string)); //stream
375+
376+
// Append labels to the form data
377+
Object.keys(labels).forEach((key) => {
378+
formData.append(key, labels[key]);
379+
});
380+
381+
// Upload File
382+
const res = await this.client.post("upload_file", formData, {
337383
headers: {
338384
...formData.getHeaders(),
339385
},
386+
params,
340387
});
388+
return res.data.content_id
341389
} else {
342390
// browser
343391
if (!isBlob(fileInput)) {
344392
throw Error("Expected blob");
345393
}
394+
395+
// Create form
346396
const formData = new FormData();
347-
formData.append("file", fileInput);
348-
await this.client.post("/upload_file", formData);
397+
formData.append("file", fileInput); //blob
398+
399+
// Append labels to the form data
400+
Object.keys(labels).forEach((key) => {
401+
formData.append(key, labels[key]);
402+
});
403+
404+
// Upload File
405+
const res = await this.client.post("/upload_file", formData, {
406+
params
407+
});
408+
return res.data.content_id
349409
}
350410
}
351411

352-
async getExtractionPolicies(): Promise<IExtractionPolicy[]> {
412+
async getExtractionGraphs(): Promise<IExtractionGraph[]> {
353413
const resp = await this.client.get("");
354-
const policies = resp.data.namespace?.extraction_policies ?? [];
355-
this.extractionPolicies = policies;
356-
return policies;
414+
const extractionGraphs = resp.data.namespace?.extraction_graphs ?? [];
415+
this.extractionGraphs = extractionGraphs;
416+
return extractionGraphs;
357417
}
358418

359419
async extract({
@@ -385,12 +445,17 @@ class IndexifyClient {
385445
url: string,
386446
mime_type: string,
387447
labels: Record<string, string>,
448+
extractionGraphNames: string | string[],
388449
id?: string
389450
): Promise<AxiosResponse> {
451+
const extractionGraphNamesArray = Array.isArray(extractionGraphNames)
452+
? extractionGraphNames
453+
: [extractionGraphNames];
390454
const resp = await this.client.post("ingest_remote_file", {
391455
url,
392456
mime_type,
393457
labels,
458+
extraction_graph_names: extractionGraphNamesArray,
394459
id,
395460
});
396461
return resp;

src/index.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import {
88
IIndex,
99
IContentMetadata,
1010
IExtractedMetadata,
11+
IExtractionGraph,
1112
IExtractionPolicy,
1213
ISearchIndexResponse,
1314
ITask,
@@ -27,6 +28,7 @@ export {
2728
IIndex,
2829
IContentMetadata,
2930
IExtractedMetadata,
31+
IExtractionGraph,
3032
IExtractionPolicy,
3133
ISearchIndexResponse,
3234
ITask,

src/types.ts

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
export interface INamespace {
22
name: string;
3-
extraction_policies: IExtractionPolicy[];
3+
extraction_graphs: IExtractionGraph[];
44
}
55

66
export interface IEmbeddingSchema {
@@ -43,6 +43,7 @@ export interface IBaseContentMetadata {
4343
source: string;
4444
size: number;
4545
hash: string;
46+
extraction_graph_names: string[];
4647
}
4748

4849
export interface IContentMetadata extends IBaseContentMetadata {
@@ -52,10 +53,17 @@ export interface IContentMetadata extends IBaseContentMetadata {
5253
export interface IExtractedMetadata {
5354
id: string;
5455
content_id: string;
55-
metadata: object[];
56+
metadata: { [key: string]: any };
5657
extractor_name: string;
5758
}
5859

60+
export interface IExtractionGraph {
61+
id: string;
62+
name: string;
63+
namespace: string;
64+
extraction_policies: IExtractionPolicy[];
65+
}
66+
5967
export interface IExtractionPolicy {
6068
id?: string;
6169
extractor: string;
@@ -133,8 +141,8 @@ export interface ISearchIndexResponse {
133141
root_content_metadata?: IContentMetadata;
134142
}
135143

136-
export interface IAddExtractorPolicyResponse {
137-
index_names: string[];
144+
export interface IAddExtractorGraphResponse {
145+
indexes: string[];
138146
}
139147

140148
export interface IMtlsConfig {

0 commit comments

Comments
 (0)