Skip to content

Commit a6f25fd

Browse files
authored
Merge branch 'main' into image_text_support
2 parents 3a1e5b9 + 7bc9d1e commit a6f25fd

File tree

1 file changed

+23
-0
lines changed

1 file changed

+23
-0
lines changed

examples/basic_gguf_models.py

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# -----------------------------------------------------------------------------
2+
#
3+
# Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
4+
# SPDX-License-Identifier: BSD-3-Clause
5+
#
6+
# -----------------------------------------------------------------------------
7+
8+
# This is the work example of the GGUF models with the AI 100
9+
10+
from transformers import AutoTokenizer
11+
12+
from QEfficient import QEFFAutoModelForCausalLM as AutoModelForCausalLM
13+
14+
# Load the model and tokenizer
15+
model_name = "MaziyarPanahi/Mistral-7B-Instruct-v0.3-GGUF"
16+
gguf_file = "Mistral-7B-Instruct-v0.3.fp16.gguf"
17+
# org_model_name = "mistralai/Mistral-7B-Instruct-v0.3"
18+
19+
tokenizer = AutoTokenizer.from_pretrained(model_name, gguf_file=gguf_file)
20+
model = AutoModelForCausalLM.from_pretrained(model_name, gguf_file=gguf_file)
21+
22+
generated_qpc_path = model.compile(prefill_seq_len=32, ctx_len=128, num_cores=16, num_devices=1)
23+
model.generate(prompts=["How are you?"], tokenizer=tokenizer)

0 commit comments

Comments
 (0)