1313# Import CodeProject.AI SDK
1414from codeproject_ai_sdk import RequestData , ModuleRunner , ModuleOptions , LogMethod , LogVerbosity , JSON
1515
16- from multimode_llm import MultiModeLLM , use_ONNX , use_MLX
16+ from multimode_llm import MultiModeLLM , accel_mode
1717
1818class MultiModeLLM_adapter (ModuleRunner ):
1919
2020 def initialise (self ) -> None :
2121
22- if use_ONNX :
22+ if accel_mode == 'ONNX' :
2323 (cuda_major , cuda_minor ) = self .system_info .getCudaVersion
2424 if cuda_major and (cuda_major >= 12 or (cuda_major == 11 and cuda_minor == 8 )) :
2525 self .inference_device = "GPU"
26- self .inference_library = "CUDA"
26+ self .inference_library = "ONNX/ CUDA"
2727 self .device = "cuda"
2828 self .model_repo = "microsoft/Phi-3-vision-128k-instruct-onnx-cuda"
2929 self .model_filename = None # "Phi-3-vision-128k-instruct.gguf"
@@ -36,13 +36,15 @@ def initialise(self) -> None:
3636 self .model_repo = "microsoft/Phi-3-vision-128k-instruct-onnx-cpu"
3737 self .model_filename = None # "Phi-3-vision-128k-instruct.gguf"
3838 self .models_dir = "cpu-int4-rtn-block-32-acc-level-4"
39- elif use_MLX :
39+
40+ elif accel_mode == 'MLX' : # macOS
4041 self .inference_device = "GPU"
4142 self .inference_library = "MLX"
4243 self .device = "mps"
4344 self .model_repo = "microsoft/Phi-3.5-vision-instruct"
4445 self .model_filename = None # "Phi-3.5-vision-instruct.gguf"
4546 self .models_dir = "models"
47+
4648 else :
4749 print ("*** Multi-modal LLM using CPU only: This module requires > 16Gb RAM" )
4850 # If only...
@@ -55,25 +57,32 @@ def initialise(self) -> None:
5557 self .model_repo = "microsoft/Phi-3-vision-128k-instruct"
5658 self .model_filename = None # "Phi-3-vision-128k-instruct.gguf"
5759 self .models_dir = "./models"
58-
59- verbose = self .log_verbosity != LogVerbosity .Quiet
60- self .multimode_chat = MultiModeLLM (model_repo = self .model_repo ,
61- filename = self .model_filename ,
62- model_dir = os .path .join (ModuleOptions .module_path ,self .models_dir ),
63- device = self .device ,
64- inference_library = self .inference_library ,
65- verbose = verbose )
66-
67- if self .multimode_chat .model_path :
68- self .log (LogMethod .Info | LogMethod .Server , {
69- "message" : f"Using model from '{ self .multimode_chat .model_path } '" ,
70- "loglevel" : "information"
71- })
72- else :
60+
61+
62+ if self ._performing_self_test and self .device == "cpu" :
7363 self .log (LogMethod .Error | LogMethod .Server , {
74- "message" : f"Unable to load Multi-mode model " ,
64+ "message" : f"Unable to perform self-text without acceleration " ,
7565 "loglevel" : "error"
7666 })
67+ else :
68+ verbose = self .log_verbosity != LogVerbosity .Quiet
69+ self .multimode_chat = MultiModeLLM (model_repo = self .model_repo ,
70+ filename = self .model_filename ,
71+ model_dir = os .path .join (ModuleOptions .module_path ,self .models_dir ),
72+ device = self .device ,
73+ inference_library = self .inference_library ,
74+ verbose = verbose )
75+
76+ if self .multimode_chat .model_path :
77+ self .log (LogMethod .Info | LogMethod .Server , {
78+ "message" : f"Using model from '{ self .multimode_chat .model_path } '" ,
79+ "loglevel" : "information"
80+ })
81+ else :
82+ self .log (LogMethod .Error | LogMethod .Server , {
83+ "message" : f"Unable to load Multi-mode model" ,
84+ "loglevel" : "error"
85+ })
7786
7887 self .reply_text = ""
7988 self .cancelled = False
@@ -113,7 +122,7 @@ def long_process(self, data: RequestData) -> JSON:
113122 error = None
114123
115124 try :
116- if use_ONNX :
125+ if accel_mode == 'ONNX' :
117126 (generator , tokenizer_stream ) = self .multimode_chat .do_chat (user_prompt , image ,
118127 system_prompt ,
119128 max_tokens = max_tokens ,
@@ -196,6 +205,9 @@ def cancel_command_task(self):
196205
197206 def selftest (self ) -> JSON :
198207
208+ if accel_mode == None :
209+ return { "success" : False , "message" : "Not performing self-test on CPU due to time taken" }
210+
199211 request_data = RequestData ()
200212 request_data .queue = self .queue_name
201213 request_data .command = "prompt"
@@ -213,7 +225,7 @@ def selftest(self) -> JSON:
213225 print (f"Info: Self-test for { self .module_id } . Success: { result ['success' ]} " )
214226 # print(f"Info: Self-test output for {self.module_id}: {result}")
215227
216- return { "success" : result ['success' ], "message" : "MulitModal LLM test successful" }
228+ return { "success" : result ['success' ], "message" : "MultiModal LLM test successful" }
217229
218230
219231if __name__ == "__main__" :
0 commit comments