Skip to content

Commit

Permalink
BUG: Fix missing qwen 1.5 7b gguf (#1027)
Browse files Browse the repository at this point in the history
  • Loading branch information
codingl2k1 authored Feb 22, 2024
1 parent 9efc998 commit 79e2d06
Show file tree
Hide file tree
Showing 2 changed files with 93 additions and 12 deletions.
51 changes: 48 additions & 3 deletions xinference/model/llm/llm_family.json
Original file line number Diff line number Diff line change
Expand Up @@ -1514,10 +1514,33 @@
],
"model_id": "Qwen/Qwen1.5-72B-Chat-AWQ"
},
{
"model_format": "ggufv2",
"model_size_in_billions": "0_5",
"quantizations": [
"q2_k",
"q3_k_m",
"q4_0",
"q4_k_m",
"q5_0",
"q5_k_m",
"q6_k",
"q8_0"
],
"model_id": "Qwen/Qwen1.5-0.5B-Chat-GGUF",
"model_file_name_template": "qwen1_5-0_5b-chat-{quantization}.gguf"
},
{
"model_format": "ggufv2",
"model_size_in_billions": "1_8",
"quantizations": [
"q2_k",
"q3_k_m",
"q4_0",
"q4_k_m",
"q5_0",
"q5_k_m",
"q6_k",
"q8_0"
],
"model_id": "Qwen/Qwen1.5-1.8B-Chat-GGUF",
Expand All @@ -1527,6 +1550,13 @@
"model_format": "ggufv2",
"model_size_in_billions": 4,
"quantizations": [
"q2_k",
"q3_k_m",
"q4_0",
"q4_k_m",
"q5_0",
"q5_k_m",
"q6_k",
"q8_0"
],
"model_id": "Qwen/Qwen1.5-4B-Chat-GGUF",
Expand All @@ -1536,7 +1566,14 @@
"model_format": "ggufv2",
"model_size_in_billions": 7,
"quantizations": [
"q5_k_m"
"q2_k",
"q3_k_m",
"q4_0",
"q4_k_m",
"q5_0",
"q5_k_m",
"q6_k",
"q8_0"
],
"model_id": "Qwen/Qwen1.5-7B-Chat-GGUF",
"model_file_name_template": "qwen1_5-7b-chat-{quantization}.gguf"
Expand All @@ -1545,7 +1582,14 @@
"model_format": "ggufv2",
"model_size_in_billions": 14,
"quantizations": [
"q5_k_m"
"q2_k",
"q3_k_m",
"q4_0",
"q4_k_m",
"q5_0",
"q5_k_m",
"q6_k",
"q8_0"
],
"model_id": "Qwen/Qwen1.5-14B-Chat-GGUF",
"model_file_name_template": "qwen1_5-14b-chat-{quantization}.gguf"
Expand All @@ -1554,7 +1598,8 @@
"model_format": "ggufv2",
"model_size_in_billions": 72,
"quantizations": [
"q2_k"
"q2_k",
"q3_k_m"
],
"model_id": "Qwen/Qwen1.5-72B-Chat-GGUF",
"model_file_name_template": "qwen1_5-72b-chat-{quantization}.gguf"
Expand Down
54 changes: 45 additions & 9 deletions xinference/model/llm/llm_family_modelscope.json
Original file line number Diff line number Diff line change
Expand Up @@ -1821,61 +1821,97 @@
"model_format": "ggufv2",
"model_size_in_billions": "0_5",
"quantizations": [
"q2_k",
"q3_k_m",
"q4_0",
"q4_k_m",
"q5_0",
"q5_k_m",
"q6_k",
"q8_0"
],
"model_id": "qwen/Qwen1.5-0.5B-Chat-GGUF",
"model_hub": "modelscope",
"model_file_name_template": "qwen1.5-0.5b-chat-{quantization}.gguf"
"model_file_name_template": "qwen1_5-0_5b-chat-{quantization}.gguf"
},
{
"model_format": "ggufv2",
"model_size_in_billions": "1_8",
"quantizations": [
"q2_k",
"q3_k_m",
"q4_0",
"q4_k_m",
"q5_0",
"q5_k_m",
"q6_k",
"q8_0"
],
"model_id": "qwen/Qwen1.5-1.8B-Chat-GGUF",
"model_hub": "modelscope",
"model_file_name_template": "qwen1.5-1_8b-chat-{quantization}.gguf"
"model_file_name_template": "qwen1_5-1_8b-chat-{quantization}.gguf"
},
{
"model_format": "ggufv2",
"model_size_in_billions": 4,
"quantizations": [
"q2_k",
"q3_k_m",
"q4_0",
"q4_k_m",
"q5_0",
"q5_k_m",
"q6_k",
"q8_0"
],
"model_id": "qwen/Qwen1.5-4B-Chat-GGUF",
"model_hub": "modelscope",
"model_file_name_template": "qwen1.5-4b-chat-{quantization}.gguf"
"model_file_name_template": "qwen1_5-4b-chat-{quantization}.gguf"
},
{
"model_format": "ggufv2",
"model_size_in_billions": 7,
"quantizations": [
"q5_k_m"
"q2_k",
"q3_k_m",
"q4_0",
"q4_k_m",
"q5_0",
"q5_k_m",
"q6_k",
"q8_0"
],
"model_id": "qwen/Qwen1.5-7B-Chat-GGUF",
"model_hub": "modelscope",
"model_file_name_template": "qwen1.5-7b-chat-{quantization}.gguf"
"model_file_name_template": "qwen1_5-7b-chat-{quantization}.gguf"
},
{
"model_format": "ggufv2",
"model_size_in_billions": 14,
"quantizations": [
"q5_k_m"
"q2_k",
"q3_k_m",
"q4_0",
"q4_k_m",
"q5_0",
"q5_k_m",
"q6_k",
"q8_0"
],
"model_id": "qwen/Qwen1.5-14B-Chat-GGUF",
"model_hub": "modelscope",
"model_file_name_template": "qwen1.5-14b-chat-{quantization}.gguf"
"model_file_name_template": "qwen1_5-14b-chat-{quantization}.gguf"
},
{
"model_format": "ggufv2",
"model_size_in_billions": 72,
"quantizations": [
"q2_k"
"q2_k",
"q3_k_m"
],
"model_id": "qwen/Qwen1.5-72B-Chat-GGUF",
"model_hub": "modelscope",
"model_file_name_template": "qwen1.5-72b-chat-{quantization}.gguf"
"model_file_name_template": "qwen1_5-72b-chat-{quantization}.gguf"
}
],
"prompt_style": {
Expand Down

0 comments on commit 79e2d06

Please sign in to comment.