From d5109644b6d982d79d4fdc3a1ce52ec0dd786592 Mon Sep 17 00:00:00 2001 From: UranusSeven <109661872+UranusSeven@users.noreply.github.com> Date: Tue, 11 Jul 2023 19:55:57 +0800 Subject: [PATCH] ENH: optimize error msg for foundation models (#153) --- README.md | 19 +++++++++++-------- xinference/core/model.py | 6 ++---- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index af5481f19f..825a4f3fd4 100644 --- a/README.md +++ b/README.md @@ -158,17 +158,20 @@ To view the builtin models, run the following command: $ xinference list --all ``` -| Name | Format | Size (in billions) | Quantization | -| -------------------- | ------- | ------------------ |--------------------------------------------------------------------------------------------------------------------------------| -| baichuan | ggmlv3 | [7] | ['q2_K', 'q3_K_L', 'q3_K_M', 'q3_K_S', 'q4_0', 'q4_1', 'q4_K_M', 'q4_K_S', 'q5_0', 'q5_1', 'q5_K_M', 'q5_K_S', 'q6_K', 'q8_0'] | -| wizardlm-v1.0 | ggmlv3 | [7, 13, 33] | ['q2_K', 'q3_K_L', 'q3_K_M', 'q3_K_S', 'q4_0', 'q4_1', 'q4_K_M', 'q4_K_S', 'q5_0', 'q5_1', 'q5_K_M', 'q5_K_S', 'q6_K', 'q8_0'] | -| vicuna-v1.3 | ggmlv3 | [7, 13] | ['q2_K', 'q3_K_L', 'q3_K_M', 'q3_K_S', 'q4_0', 'q4_1', 'q4_K_M', 'q4_K_S', 'q5_0', 'q5_1', 'q5_K_M', 'q5_K_S', 'q6_K', 'q8_0'] | -| orca | ggmlv3 | [3, 7, 13] | ['q4_0', 'q4_1', 'q5_0', 'q5_1', 'q8_0'] | -| chatglm | ggmlv3 | [6] | ['q4_0', 'q4_1', 'q5_0', 'q5_1', 'q8_0'] | -| chatglm2 | ggmlv3 | [6] | ['q4_0', 'q4_1', 'q5_0', 'q5_1', 'q8_0'] | +| Name | Type | Language | Format | Size (in billions) | Quantization | +| -------------------- |------------------|----------|--------|--------------------|----------------------------------------| +| baichuan | Foundation Model | en, zh | ggmlv3 | 7 | 'q2_K', 'q3_K_L', ... , 'q6_K', 'q8_0' | +| chatglm | SFT Model | en, zh | ggmlv3 | 6 | 'q4_0', 'q4_1', 'q5_0', 'q5_1', 'q8_0' | +| chatglm2 | SFT Model | en, zh | ggmlv3 | 6 | 'q4_0', 'q4_1', 'q5_0', 'q5_1', 'q8_0' | +| wizardlm-v1.0 | SFT Model | en | ggmlv3 | 7, 13, 33 | 'q2_K', 'q3_K_L', ... , 'q6_K', 'q8_0' | +| vicuna-v1.3 | SFT Model | en | ggmlv3 | 7, 13 | 'q2_K', 'q3_K_L', ... , 'q6_K', 'q8_0' | +| orca | SFT Model | en | ggmlv3 | 3, 7, 13 | 'q4_0', 'q4_1', 'q5_0', 'q5_1', 'q8_0' | + **NOTE**: - Xinference will download models automatically for you, and by default the models will be saved under `${USER}/.xinference/cache`. +- Foundation models only provide interface `generate`. +- SFT models provide both `generate` and `chat`. ## Roadmap Xinference is currently under active development. Here's a roadmap outlining our planned diff --git a/xinference/core/model.py b/xinference/core/model.py index 3a43ea0339..aa067a970e 100644 --- a/xinference/core/model.py +++ b/xinference/core/model.py @@ -77,10 +77,8 @@ async def _wrap_generator(self, ret: Any): return ret async def generate(self, prompt: str, *args, **kwargs): - logger.warning("Generate, self address: %s", self.address) - if not hasattr(self._model, "generate"): - raise AttributeError("generate") + raise AttributeError(f"Model {self._model.model_spec} is not for generate.") return self._wrap_generator( getattr(self._model, "generate")(prompt, *args, **kwargs) @@ -88,7 +86,7 @@ async def generate(self, prompt: str, *args, **kwargs): async def chat(self, prompt: str, *args, **kwargs): if not hasattr(self._model, "chat"): - raise AttributeError("chat") + raise AttributeError(f"Model {self._model.model_spec} is not for chat.") return self._wrap_generator( getattr(self._model, "chat")(prompt, *args, **kwargs)