Skip to content

Commit 4b9afbb

Browse files
authored
retrieval : fix memory leak in retrieval query handling (ggml-org#8955)
* retrieval * Reuse querybatch to reduce frequent memory allocation * delete unused white space
1 parent 37501d9 commit 4b9afbb

File tree

1 file changed

+3
-1
lines changed

1 file changed

+3
-1
lines changed

examples/retrieval/retrieval.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,14 +253,15 @@ int main(int argc, char ** argv) {
253253
chunks[i].tokens.clear();
254254
}
255255

256+
struct llama_batch query_batch = llama_batch_init(n_batch, 0, 1);
257+
256258
// start loop, receive query and return top k similar chunks based on cosine similarity
257259
std::string query;
258260
while (true) {
259261
printf("Enter query: ");
260262
std::getline(std::cin, query);
261263
std::vector<int32_t> query_tokens = llama_tokenize(ctx, query, true);
262264

263-
struct llama_batch query_batch = llama_batch_init(n_batch, 0, 1);
264265
batch_add_seq(query_batch, query_tokens, 0);
265266

266267
std::vector<float> query_emb(n_embd, 0);
@@ -293,6 +294,7 @@ int main(int argc, char ** argv) {
293294
}
294295

295296
// clean up
297+
llama_batch_free(query_batch);
296298
llama_print_timings(ctx);
297299
llama_free(ctx);
298300
llama_free_model(model);

0 commit comments

Comments
 (0)