context : pad the total context to 256

ggerganov · ggerganov · commit d2c30c61a64a · 2025-11-07T14:20:00.000+02:00
diff --git a/include/llama.h b/include/llama.h
@@ -463,6 +463,7 @@ extern "C" {
 
     // NOTE: After creating a llama_context, it is recommended to query the actual values using these functions
     //       In some cases the requested values via llama_context_params may differ from the actual values used by the context
+    //       ref: https://github.com/ggml-org/llama.cpp/pull/17046#discussion_r2503085732
     LLAMA_API uint32_t llama_n_ctx      (const struct llama_context * ctx);
     LLAMA_API uint32_t llama_n_ctx_seq  (const struct llama_context * ctx);
     LLAMA_API uint32_t llama_n_batch    (const struct llama_context * ctx);
diff --git a/src/llama-context.cpp b/src/llama-context.cpp
@@ -112,10 +112,14 @@ llama_context::llama_context(
         }
     }
 
+    // ref: https://github.com/ggml-org/llama.cpp/pull/17046#discussion_r2503085732
+    cparams.n_ctx = GGML_PAD(cparams.n_ctx, 256);
+
     if (cparams.kv_unified) {
         cparams.n_ctx_seq = cparams.n_ctx;
     } else {
         cparams.n_ctx_seq = cparams.n_ctx / cparams.n_seq_max;
+        cparams.n_ctx_seq = GGML_PAD(cparams.n_ctx_seq, 256);
 
         if (cparams.n_ctx_seq == 0) {
             throw std::runtime_error("n_ctx_seq == 0");

Original file line number	Diff line number	Diff line change
`@@ -112,10 +112,14 @@ llama_context::llama_context(`
`112`	`112`	`}`
`113`	`113`	`}`
`114`	`114`
	`115`	`+ // ref: https://github.com/ggml-org/llama.cpp/pull/17046#discussion_r2503085732`
	`116`	`+ cparams.n_ctx = GGML_PAD(cparams.n_ctx, 256);`
	`117`	`+`
`115`	`118`	`if (cparams.kv_unified) {`
`116`	`119`	`cparams.n_ctx_seq = cparams.n_ctx;`
`117`	`120`	`} else {`
`118`	`121`	`cparams.n_ctx_seq = cparams.n_ctx / cparams.n_seq_max;`
	`122`	`+ cparams.n_ctx_seq = GGML_PAD(cparams.n_ctx_seq, 256);`
`119`	`123`
`120`	`124`	`if (cparams.n_ctx_seq == 0) {`
`121`	`125`	`throw std::runtime_error("n_ctx_seq == 0");`