Skip to content

Commit d2c30c6

Browse files
committed
context : pad the total context to 256
1 parent 5d884e6 commit d2c30c6

File tree

2 files changed

+5
-0
lines changed

2 files changed

+5
-0
lines changed

include/llama.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,7 @@ extern "C" {
463463

464464
// NOTE: After creating a llama_context, it is recommended to query the actual values using these functions
465465
// In some cases the requested values via llama_context_params may differ from the actual values used by the context
466+
// ref: https://github.com/ggml-org/llama.cpp/pull/17046#discussion_r2503085732
466467
LLAMA_API uint32_t llama_n_ctx (const struct llama_context * ctx);
467468
LLAMA_API uint32_t llama_n_ctx_seq (const struct llama_context * ctx);
468469
LLAMA_API uint32_t llama_n_batch (const struct llama_context * ctx);

src/llama-context.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,10 +112,14 @@ llama_context::llama_context(
112112
}
113113
}
114114

115+
// ref: https://github.com/ggml-org/llama.cpp/pull/17046#discussion_r2503085732
116+
cparams.n_ctx = GGML_PAD(cparams.n_ctx, 256);
117+
115118
if (cparams.kv_unified) {
116119
cparams.n_ctx_seq = cparams.n_ctx;
117120
} else {
118121
cparams.n_ctx_seq = cparams.n_ctx / cparams.n_seq_max;
122+
cparams.n_ctx_seq = GGML_PAD(cparams.n_ctx_seq, 256);
119123

120124
if (cparams.n_ctx_seq == 0) {
121125
throw std::runtime_error("n_ctx_seq == 0");

0 commit comments

Comments
 (0)