Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 37 additions & 1 deletion packages/api/src/functions/documents-post.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,31 @@ export async function postDocuments(request: HttpRequest, context: InvocationCon

// Initialize embeddings model and vector database
const embeddings = new AzureOpenAIEmbeddings({ azureADTokenProvider });
await AzureCosmosDBNoSQLVectorStore.fromDocuments(documents, embeddings, { credentials });
const store = await AzureCosmosDBNoSQLVectorStore.fromDocuments([], embeddings, { credentials });

// Remove existing documents with the same filename to avoid duplicates
try {
await store.delete({
filter: `SELECT * FROM c WHERE c.metadata.source = "${filename.replaceAll('"', '\\"')}"`,
});
} catch (error: unknown) {
// If deletion fails (e.g., container doesn't exist yet), just log and continue
context.log(`Warning: Could not delete existing documents: ${(error as Error).message}`);
}

// Add the new documents
await store.addDocuments(documents);
} else {
// If no environment variables are set, it means we are running locally
context.log('No Azure OpenAI endpoint set, using Ollama models and local DB');
const embeddings = new OllamaEmbeddings({ model: ollamaEmbeddingsModel });
const folderExists = await checkFolderExists(faissStoreFolder);
if (folderExists) {
const store = await FaissStore.load(faissStoreFolder, embeddings);

// Remove existing documents with the same filename to avoid duplicates
await removeDuplicateDocuments(store, filename);

await store.addDocuments(documents);
await store.save(faissStoreFolder);
} else {
Expand Down Expand Up @@ -90,6 +107,25 @@ export async function postDocuments(request: HttpRequest, context: InvocationCon
}
}

async function removeDuplicateDocuments(store: FaissStore, filename: string): Promise<void> {
const docstore = store.getDocstore();
const mapping = store.getMapping();
const idsToDelete: string[] = [];

// Find all document IDs that have the same filename
for (const [vectorIndex, documentId] of Object.entries(mapping)) {
const document = docstore.search(documentId);
if (document && document.metadata?.source === filename) {
idsToDelete.push(documentId);
}
}

// Delete the existing documents with the same filename
if (idsToDelete.length > 0) {
await store.delete({ ids: idsToDelete });
}
}

async function checkFolderExists(folderPath: string): Promise<boolean> {
try {
const stats = await fs.stat(folderPath);
Expand Down