deepseek-r1

femto · Feb 17, 2025 · 7faa8a3 · 7faa8a3
1 parent dcd26ab
commit 7faa8a3
Show file tree

Hide file tree

Showing 13 changed files with 45 additions and 250 deletions.
diff --git a/config/config.yaml.example b/config/config.yaml.example
@@ -21,7 +21,7 @@ models:
     api_type: "openai"
     base_url: "${DEFAULT_BASE_URL}"
     api_key: "${DEFAULT_API_KEY}"
-    model: "deepseek-chat"
+    model: "${DEFAULT_MODEL}"
     temperature: 0
   "gpt-4-0125":
     api_type: "openai"
@@ -33,6 +33,12 @@ models:
     base_url: "${CLAUDE_BASE_URL}"
     api_key: "${CLAUDE_API_KEY}"
     temperature: 0.7
+  "deepseek-r1":
+   api_type: "azure_inference"  # or azure / ollama / groq etc.
+   api_key: "YOUR_KEY"
+   base_url: "YOUR_URL"
+   model: "DeepSeek-R1"
+   temperature: 0.1
 ell:
   store: 'logs'
   autocommit:true

diff --git a/examples/smart_minion/brain.py b/examples/smart_minion/brain.py
@@ -17,7 +17,8 @@
 
 async def smart_brain():
     # 使用从 minion/__init__.py 导入的 config 对象
-    model = "default"
+    model = "gpt-4o"
+    model = "deepseek-r1"
     #model = "llama3.2"
     llm_config = config.models.get(model)
 
@@ -30,8 +31,9 @@ async def smart_brain():
         llm=llm,
         #llms={"route": [ "llama3.2","llama3.1"]}
     )
-    # obs, score, *_ = await brain.step(query="what's the solution for game of 24 for 4 3 9 8")
-    # print(obs)
+    obs, score, *_ = await brain.step(query="what's the solution for game of 24 for 1 3 4 6", route="python")
+    print(obs)
+
     current_file_dir = os.path.dirname(os.path.abspath(__file__))
     cache_plan = os.path.join(current_file_dir, "aime", "plan_gpt4o.1.json")
     obs, score, *_ = await brain.step(

diff --git a/examples/smart_minion/code_contests/evalute_code_contests_hard.py b/examples/smart_minion/code_contests/evalute_code_contests_hard.py
@@ -242,9 +242,9 @@ async def solve_question(item):
     )
     return answer
 
-model = "gpt-4o"
+#model = "gpt-4o"
 #model = "claude"
-#model = "default"
+model = "default"
 
 llm = create_llm_provider(config.models.get(model))
 cost_manager = CostManager()

diff --git a/minion/actions/action_node.py b/minion/actions/action_node.py
@@ -6,7 +6,7 @@
 
 from minion.message_types import Message
 from minion.models.schemas import Answer
-from minion.providers import BaseLLM
+from minion.providers import BaseProvider
 from minion.utils.utils import extract_json
 
 
@@ -21,7 +21,7 @@ async def __call__(self, *args, **kwargs):
 
 class LLMActionNode(ActionNode):
     def __init__(self,
-                 llm: BaseLLM,
+                 llm: BaseProvider,
                  input_parser: Optional[callable] = None,
                  output_parser: Optional[callable] = None):
         self.llm = llm

diff --git a/minion/actions/lmp_action_node.py b/minion/actions/lmp_action_node.py
@@ -22,7 +22,7 @@
 class LmpActionNode(LLMActionNode):
     def __init__(self, llm, input_parser=None, output_parser=None):
         super().__init__(llm, input_parser, output_parser)
-        ell.init(**config.ell, default_client=self.llm.client_ell)
+        #ell.init(**config.ell, default_client=self.llm.client_ell)
 
     @ell.complex(model="gpt-4o-mini")
     def ell_call(self, ret):
@@ -84,11 +84,11 @@ async def execute(self, messages: Union[str, Message, List[Message]], response_f
                 api_params['response_format'] = { "type": "text" }
 
             if isinstance(messages, str):
-                messages = [user(messages)]
+                messages = [Message(role="user", content=messages)]
             elif isinstance(messages, Message):
                 messages = [messages]
 
-            messages.append(user(content=prompt))
+            messages.append(Message(role="user", content=prompt))
 
         response = await super().execute(messages, **api_params)
 

diff --git a/minion/main/worker.py b/minion/main/worker.py
@@ -465,7 +465,7 @@ async def execute_calculation(self):
                 logger.error(error)
                 continue  # try again?
             output, error = obs["output"], obs["error"]
-            self.answer = self.input.answer = output
+            self.answer = self.input.answer = output #answer is only output
             # print("#####OUTPUT#####")
             # print(output)
             print(f"###solution###:{self.answer}")
@@ -611,7 +611,7 @@ async def invoke_minion(self, minion_name, worker_config=None):
             processed_answer = self.input.apply_post_processing(answer)
         else:
             processed_answer = answer
-
+        self.answer = processed_answer
         return worker, processed_answer
 
     async def choose_minion_and_run(self):

diff --git a/minion/providers/__init__.py b/minion/providers/__init__.py
@@ -1,4 +1,4 @@
-from minion.providers.base_llm import BaseLLM
+from minion.providers.base_provider import BaseProvider
 from minion.providers.llm_provider_registry import LLMRegistry, create_llm_provider
 
-__all__ = ["BaseLLM", "LLMRegistry", "create_llm_provider"]
+__all__ = ["BaseProvider", "LLMRegistry", "create_llm_provider"]
diff --git a/minion/providers/base_llm.py → minion/providers/base_provider.py b/minion/providers/base_llm.py → minion/providers/base_provider.py
@@ -1,5 +1,5 @@
 from abc import ABC, abstractmethod
-from typing import AsyncIterator, List, Optional
+from typing import AsyncIterator, List, Optional, Any, Generator
 
 
 from minion.configs.config import LLMConfig, config
@@ -8,8 +8,10 @@
 from minion.providers.cost import CostManager
 
 
-class BaseLLM(ABC):
-    def __init__(self, config: LLMConfig):
+class BaseProvider(ABC):
+    """Base class for all LLM providers"""
+
+    def __init__(self, config: Any) -> None:
         self.config = config
         self.cost_manager = CostManager()
         self._setup_retry_config()
@@ -20,7 +22,7 @@ def __init__(self, config: LLMConfig):
 
     @abstractmethod
     def _setup(self) -> None:
-        """初始化具体的LLM客户端"""
+        """Setup the LLM provider with configuration"""
         pass
 
     def _setup_retry_config(self):
@@ -51,14 +53,12 @@ def _setup_retry_config(self):
 
     @abstractmethod
     async def generate(self, messages: List[Message], temperature: Optional[float] = None, **kwargs) -> str:
-        """生成回复"""
+        """Generate completion from messages"""
         pass
 
     @abstractmethod
-    async def generate_stream(
-        self, messages: List[Message], temperature: Optional[float] = None, **kwargs
-    ) -> AsyncIterator[str]:
-        """流式生成回复"""
+    async def generate_stream(self, messages: List[Message], temperature: Optional[float] = None, **kwargs) -> Generator[str, None, str]:
+        """Generate streaming completion from messages"""
         pass
 
     def get_cost(self) -> CostManager:

diff --git a/minion/providers/litellm_provider.py b/minion/providers/litellm_provider.py
@@ -3,7 +3,8 @@
 from typing import AsyncIterator, List, Optional
 
 from minion.message_types import ContentType, Message
-from minion.providers.base_llm import BaseLLM
+
+from minion.providers.base_provider import BaseProvider
 from minion.providers.llm_provider_registry import llm_registry
 
 with warnings.catch_warnings():
@@ -15,7 +16,7 @@
 
 
 @llm_registry.register("litellm")
-class LiteLLMProvider(BaseLLM):
+class LiteLLMProvider(BaseProvider):
     def _setup(self) -> None:
         # 设置API密钥
         os.environ["OPENAI_API_KEY"] = self.config.api_key

diff --git a/minion/providers/llm_provider_registry.py b/minion/providers/llm_provider_registry.py
@@ -2,7 +2,7 @@
 from typing import Type
 
 from minion.configs.config import LLMConfig
-from minion.providers.base_llm import BaseLLM
+from minion.providers.base_provider import BaseProvider
 
 
 class LLMRegistry:
@@ -21,7 +21,7 @@ def decorator(cls):
 
         return decorator
 
-    def get_provider(self, api_type: str) -> Type[BaseLLM]:
+    def get_provider(self, api_type: str) -> Type[BaseProvider]:
         if api_type not in self.providers:
             # 尝试动态导入
             try:
@@ -40,6 +40,6 @@ def get_provider(self, api_type: str) -> Type[BaseLLM]:
 llm_registry = LLMRegistry()
 
 
-def create_llm_provider(config: LLMConfig) -> BaseLLM:
+def create_llm_provider(config: LLMConfig) -> BaseProvider:
     provider_cls = llm_registry.get_provider(config.api_type)
     return provider_cls(config)
diff --git a/minion/providers/openai_provider.py b/minion/providers/openai_provider.py
@@ -6,13 +6,14 @@
 from minion.const import MINION_ROOT
 from minion.logs import log_llm_stream
 from minion.message_types import ImageContent, ImageUtils, Message, MessageContent
-from minion.providers.base_llm import BaseLLM
+from minion.providers.base_provider import BaseProvider
+
 from minion.providers.cost import CostManager
 from minion.providers.llm_provider_registry import llm_registry
 
 
 @llm_registry.register("openai")
-class OpenAIProvider(BaseLLM):
+class OpenAIProvider(BaseProvider):
     def _setup(self) -> None:
         import openai
         # 创建客户端配置
@@ -23,6 +24,7 @@ def _setup(self) -> None:
         self.client_ell = openai.OpenAI(**client_kwargs)
         self.client = openai.AsyncOpenAI(**client_kwargs)
 
+    #or should we call _convert_messages
     def _prepare_messages(self, messages: List[Message] | Message | str) -> List[dict]:
         """准备发送给API的消息格式