diff --git a/xinference/model/llm/llm_family_modelscope.json b/xinference/model/llm/llm_family_modelscope.json index 107fa3123e..f248ec4be7 100644 --- a/xinference/model/llm/llm_family_modelscope.json +++ b/xinference/model/llm/llm_family_modelscope.json @@ -571,6 +571,39 @@ "model_hub": "modelscope", "model_id": "AI-ModelScope/CodeLlama-34b-Instruct-hf", "model_revision": "v1.0.2" + }, + { + "model_format": "ggufv2", + "model_size_in_billions": 7, + "quantizations": [ + "Q4_K_M" + ], + "model_hub": "modelscope", + "model_id": "Xorbits/CodeLlama-7B-Instruct-GGUF", + "model_file_name_template": "codellama-7b-instruct.{quantization}.gguf", + "model_revision": "v0.0.1" + }, + { + "model_format": "ggufv2", + "model_size_in_billions": 13, + "quantizations": [ + "Q4_K_M" + ], + "model_hub": "modelscope", + "model_id": "Xorbits/CodeLlama-13B-Instruct-GGUF", + "model_file_name_template": "codellama-13b-instruct.{quantization}.gguf", + "model_revision": "v0.0.1" + }, + { + "model_format": "ggufv2", + "model_size_in_billions": 34, + "quantizations": [ + "Q4_K_M" + ], + "model_hub": "modelscope", + "model_id": "Xorbits/CodeLlama-34B-Instruct-GGUF", + "model_file_name_template": "codellama-34b-instruct.{quantization}.gguf", + "model_revision": "v0.1.0" } ], "prompt_style": {