zilliztech · RauchenwaldC · Sep 7, 2025
diff --git a/.env.example b/.env.example
@@ -20,6 +20,10 @@ EMBEDDING_MODEL=text-embedding-3-small
 # You can customize it according to the throughput of your embedding model. Generally, larger batch size means less indexing time.
 EMBEDDING_BATCH_SIZE=100
 
+# Maximum number of chunks to index before stopping (default: 450000)
+# Set a lower value to limit indexing for very large codebases. Minimum value is 1000.
+# CHUNK_LIMIT=450000
+
 # =============================================================================
 # OpenAI Configuration
 # =============================================================================

diff --git a/packages/core/src/context.ts b/packages/core/src/context.ts
@@ -702,8 +702,9 @@ export class Context {
     ): Promise<{ processedFiles: number; totalChunks: number; status: 'completed' | 'limit_reached' }> {
         const isHybrid = this.getIsHybrid();
         const EMBEDDING_BATCH_SIZE = Math.max(1, parseInt(envManager.get('EMBEDDING_BATCH_SIZE') || '100', 10));
-        const CHUNK_LIMIT = 450000;
+        const CHUNK_LIMIT = Math.max(1000, parseInt(envManager.get('CHUNK_LIMIT') || '450000', 10));
         console.log(`[Context] 🔧 Using EMBEDDING_BATCH_SIZE: ${EMBEDDING_BATCH_SIZE}`);
+        console.log(`[Context] 🔧 Using CHUNK_LIMIT: ${CHUNK_LIMIT}`);
 
         let chunkBuffer: Array<{ chunk: CodeChunk; codebasePath: string }> = [];
         let processedFiles = 0;