Skip to content

Commit 83d33ac

Browse files
committed
Remove vocab_size from dynamic axes
Signed-off-by: quic-sanising <[email protected]>
1 parent ebfbaea commit 83d33ac

File tree

2 files changed

+1
-5
lines changed

2 files changed

+1
-5
lines changed

QEfficient/transformers/models/modeling_auto.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,6 @@ def export(self, export_dir: Optional[str] = None) -> str:
255255
fbs if self.continuous_batching else bs, self.model.config.vocab_size, dtype=torch.int32)
256256
dynamic_axes["repetition_penalty_retain_state"] = {
257257
0: "full_batch_size" if self.continuous_batching else "batch_size",
258-
1: "vocab_size",
259258
}
260259
output_names.append("repetition_penalty_retain_state_RetainedState")
261260

@@ -266,7 +265,6 @@ def export(self, export_dir: Optional[str] = None) -> str:
266265
fbs if self.continuous_batching else bs, self.model.config.vocab_size, dtype=torch.int32)
267266
dynamic_axes["presence_penalty_retain_state"] = {
268267
0: "full_batch_size" if self.continuous_batching else "batch_size",
269-
1: "vocab_size",
270268
}
271269
output_names.append("presence_penalty_retain_state_RetainedState")
272270

@@ -374,7 +372,6 @@ def compile(
374372
}
375373
if self.include_sampler:
376374
prefill_specialization.update({
377-
"vocab_size": self.model.config.vocab_size,
378375
"max_top_k_ids": constants.Constants.MAX_TOP_K_IDS,
379376
})
380377
prefill_specialization.update({"num_logits_to_keep": 1})
@@ -396,7 +393,6 @@ def compile(
396393
}
397394
if self.include_sampler:
398395
decode_specialization.update({
399-
"vocab_size": self.model.config.vocab_size,
400396
"max_top_k_ids": constants.Constants.MAX_TOP_K_IDS,
401397
})
402398
if self.continuous_batching:

QEfficient/utils/constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def get_models_dir():
4444

4545
QEFF_MODELS_DIR = get_models_dir()
4646

47-
ONNX_EXPORT_EXAMPLE_BATCH_SIZE = 1
47+
ONNX_EXPORT_EXAMPLE_BATCH_SIZE = 2
4848
ONNX_EXPORT_EXAMPLE_SEQ_LEN = 32
4949
ONNX_EXPORT_EXAMPLE_FBS = 4
5050
ONNX_EXPORT_EXAMPLE_NLK = 2 # Number of Logits to Keep

0 commit comments

Comments
 (0)