Skip to content

Commit d62922c

Browse files
authored
[FIX] Fix load model bugs (lm-sys#259)
1 parent e2de15f commit d62922c

File tree

2 files changed

+4
-4
lines changed

2 files changed

+4
-4
lines changed

fastchat/serve/cli.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from fastchat.serve.monkey_patch_non_inplace import replace_llama_attn_with_non_inplace_operations
1414

1515

16-
def load_model(model_name, device, num_gpus, load_8bit=False):
16+
def load_model(model_name, device, num_gpus, load_8bit=False, debug=False):
1717
if device == "cpu":
1818
kwargs = {}
1919
elif device == "cuda":
@@ -52,7 +52,7 @@ def load_model(model_name, device, num_gpus, load_8bit=False):
5252
if (device == "mps" or device == "cpu") and load_8bit:
5353
compress_module(model)
5454

55-
if args.debug:
55+
if debug:
5656
print(model)
5757

5858
return model, tokenizer
@@ -129,7 +129,7 @@ def main(args):
129129

130130
# Model
131131
model, tokenizer = load_model(args.model_name, args.device,
132-
args.num_gpus, args.load_8bit)
132+
args.num_gpus, args.load_8bit, args.debug)
133133

134134
# Chat
135135
conv = conv_templates[args.conv_template].copy()

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "fschat"
7-
version = "0.1.7"
7+
version = "0.1.8"
88
description = "An open platform for training, serving, and evaluating large language model based chatbots."
99
readme = "README.md"
1010
requires-python = ">=3.8"

0 commit comments

Comments
 (0)