[FIX] Fix load model bugs (lm-sys#259)

merrymercy · web-flow · commit d62922cbd909 · 2023-04-06T17:07:20.000-07:00
diff --git a/fastchat/serve/cli.py b/fastchat/serve/cli.py
@@ -13,7 +13,7 @@
 from fastchat.serve.monkey_patch_non_inplace import replace_llama_attn_with_non_inplace_operations
 
 
-def load_model(model_name, device, num_gpus, load_8bit=False):
+def load_model(model_name, device, num_gpus, load_8bit=False, debug=False):
     if device == "cpu":
         kwargs = {}
     elif device == "cuda":
@@ -52,7 +52,7 @@ def load_model(model_name, device, num_gpus, load_8bit=False):
     if (device == "mps" or device == "cpu") and load_8bit:
         compress_module(model)
 
-    if args.debug:
+    if debug:
         print(model)
 
     return model, tokenizer
@@ -129,7 +129,7 @@ def main(args):
 
     # Model
     model, tokenizer = load_model(args.model_name, args.device,
-        args.num_gpus, args.load_8bit)
+        args.num_gpus, args.load_8bit, args.debug)
 
     # Chat
     conv = conv_templates[args.conv_template].copy()
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "fschat"
-version = "0.1.7"
+version = "0.1.8"
 description = "An open platform for training, serving, and evaluating large language model based chatbots."
 readme = "README.md"
 requires-python = ">=3.8"