Skip to content

Commit 54a9b6f

Browse files
authored
[Bug-fix:] QEFFAutoModelForImageTextToText class docstring fixing (#372)
This pull request addresses the issue with the code block in the class docstring of `QEFFAutoModelForImageTextToText`. Previously, the docstring was not displaying correctly on `gh-pages` due to an error in the Python code block. --------- Signed-off-by: Abukhoyer Shaik <[email protected]>
1 parent bdcd7e5 commit 54a9b6f

File tree

3 files changed

+14
-13
lines changed

3 files changed

+14
-13
lines changed

QEfficient/transformers/models/modeling_auto.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -1176,6 +1176,7 @@ class QEFFAutoModelForImageTextToText:
11761176
:kv_offload (bool): Flag to toggle between single and dual QPC approaches. If set to False, the Single QPC approach will be used; otherwise, the dual QPC approach will be applied. Defaults to True.
11771177
11781178
.. code-block:: python
1179+
11791180
import requests
11801181
from PIL import Image
11811182
from transformers import AutoProcessor, TextStreamer
@@ -1189,8 +1190,8 @@ class QEFFAutoModelForImageTextToText:
11891190
image_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/0052a70beed5bf71b92610a43a52df6d286cd5f3/diffusers/rabbit.jpg"
11901191
11911192
## STEP - 1 Load the Processor and Model, and kv_offload=True/False for dual and single qpc
1192-
processor = AutoProcessor.from_pretrained(model_name, token=token)
1193-
model = QEFFAutoModelForImageTextToText.from_pretrained(model_name, token=token, attn_implementation="eager", kv_offload=False)
1193+
processor = AutoProcessor.from_pretrained(model_name, token=HF_TOKEN)
1194+
model = QEFFAutoModelForImageTextToText.from_pretrained(model_name, token=HF_TOKEN, attn_implementation="eager", kv_offload=False)
11941195
11951196
## STEP - 2 Export & Compile the Model
11961197
model.compile(
@@ -1220,12 +1221,12 @@ class QEFFAutoModelForImageTextToText:
12201221
return_tensors="pt",
12211222
add_special_tokens=False,
12221223
padding="max_length",
1223-
max_length=prefill_seq_len,
1224+
max_length=32,
12241225
)
12251226
12261227
## STEP - 4 Run Inference on the compiled model
12271228
streamer = TextStreamer(processor.tokenizer)
1228-
model.generate(inputs=inputs, streamer=streamer, generation_len=generation_len)
1229+
model.generate(inputs=inputs, streamer=streamer, generation_len=512)
12291230
12301231
"""
12311232

tests/transformers/spd/test_pld_inference.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -145,9 +145,9 @@ def get_padded_input_len(input_len: int, prefill_seq_len: int, ctx_len: int):
145145
"""
146146
num_chunks = -(input_len // -prefill_seq_len) # ceil divide without float
147147
input_len_padded = num_chunks * prefill_seq_len # Convert input_len to a multiple of prefill_seq_len
148-
assert (
149-
input_len_padded <= ctx_len
150-
), "input_len rounded to nearest prefill_seq_len multiple should be less than ctx_len"
148+
assert input_len_padded <= ctx_len, (
149+
"input_len rounded to nearest prefill_seq_len multiple should be less than ctx_len"
150+
)
151151
return input_len_padded
152152

153153

tests/transformers/spd/test_spd_inference.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -75,9 +75,9 @@ def get_padded_input_len(input_len: int, prefill_seq_len: int, ctx_len: int):
7575
"""
7676
num_chunks = -(input_len // -prefill_seq_len) # ceil divide without float
7777
input_len_padded = num_chunks * prefill_seq_len # Convert input_len to a multiple of prefill_seq_len
78-
assert (
79-
input_len_padded <= ctx_len
80-
), "input_len rounded to nearest prefill_seq_len multiple should be less than ctx_len"
78+
assert input_len_padded <= ctx_len, (
79+
"input_len rounded to nearest prefill_seq_len multiple should be less than ctx_len"
80+
)
8181
return input_len_padded
8282

8383

@@ -320,9 +320,9 @@ def test_spec_decode_inference(
320320
for prompt, generation in zip(prompts, batch_decode):
321321
print(f"{prompt=} {generation=}")
322322
# validation check
323-
assert mean_num_accepted_tokens == float(
324-
num_speculative_tokens + 1
325-
), f"mean number of accepted tokens is {mean_num_accepted_tokens} but should be {num_speculative_tokens + 1}"
323+
assert mean_num_accepted_tokens == float(num_speculative_tokens + 1), (
324+
f"mean number of accepted tokens is {mean_num_accepted_tokens} but should be {num_speculative_tokens + 1}"
325+
)
326326
del target_model_session
327327
del draft_model_session
328328
generated_ids = np.asarray(generated_ids[0]).flatten()

0 commit comments

Comments
 (0)