Update web_demo.py

vansin · web-flow · commit e25ad2d0c956 · 2024-04-19T23:47:18.000+08:00
diff --git a/web_demo.py b/web_demo.py
@@ -1,3 +1,4 @@
+
 # isort: skip_file
 import copy
 import warnings
@@ -16,6 +17,10 @@
 logger = logging.get_logger(__name__)
 
 
+import argparse
+
+
+
 @dataclass
 class GenerationConfig:
     # this config is used for chat to provide more diversity
@@ -163,9 +168,13 @@ def on_btn_click():
 
 
 @st.cache_resource
-def load_model():
-    model = AutoModelForCausalLM.from_pretrained('meta-llama/Meta-Llama-3-8B-Instruct').cuda()
-    tokenizer = AutoTokenizer.from_pretrained('meta-llama/Meta-Llama-3-8B-Instruct', trust_remote_code=True)
+def load_model(arg1):
+    # model = AutoModelForCausalLM.from_pretrained(args.m).cuda()
+    # tokenizer = AutoTokenizer.from_pretrained(args.m, trust_remote_code=True)
+    model = AutoModelForCausalLM.from_pretrained(arg1, torch_dtype=torch.float16).cuda()
+    tokenizer = AutoTokenizer.from_pretrained(arg1, trust_remote_code=True)
+
+  
     return model, tokenizer
 
 
@@ -207,10 +216,10 @@ def combine_history(prompt):
     return total_prompt
 
 
-def main():
+def main(arg1):
     # torch.cuda.empty_cache()
     print('load model begin.')
-    model, tokenizer = load_model()
+    model, tokenizer = load_model(arg1)
     print('load model end.')
 
     st.title('Llama3-Instruct')
@@ -259,4 +268,7 @@ def main():
 
 
 if __name__ == '__main__':
-    main()
+
+    import sys
+    arg1 = sys.argv[1]
+    main(arg1)