diff --git a/config/config.yaml.example b/config/config.yaml.example index 956c1dcb..87ce31dc 100644 --- a/config/config.yaml.example +++ b/config/config.yaml.example @@ -21,7 +21,7 @@ models: api_type: "openai" base_url: "${DEFAULT_BASE_URL}" api_key: "${DEFAULT_API_KEY}" - model: "deepseek-chat" + model: "${DEFAULT_MODEL}" temperature: 0 "gpt-4-0125": api_type: "openai" @@ -33,6 +33,12 @@ models: base_url: "${CLAUDE_BASE_URL}" api_key: "${CLAUDE_API_KEY}" temperature: 0.7 + "deepseek-r1": + api_type: "azure_inference" # or azure / ollama / groq etc. + api_key: "YOUR_KEY" + base_url: "YOUR_URL" + model: "DeepSeek-R1" + temperature: 0.1 ell: store: 'logs' autocommit:true diff --git a/examples/smart_minion/brain.py b/examples/smart_minion/brain.py index d530eab5..e76304ab 100644 --- a/examples/smart_minion/brain.py +++ b/examples/smart_minion/brain.py @@ -17,7 +17,8 @@ async def smart_brain(): # 使用从 minion/__init__.py 导入的 config 对象 - model = "default" + model = "gpt-4o" + model = "deepseek-r1" #model = "llama3.2" llm_config = config.models.get(model) @@ -30,8 +31,9 @@ async def smart_brain(): llm=llm, #llms={"route": [ "llama3.2","llama3.1"]} ) - # obs, score, *_ = await brain.step(query="what's the solution for game of 24 for 4 3 9 8") - # print(obs) + obs, score, *_ = await brain.step(query="what's the solution for game of 24 for 1 3 4 6", route="python") + print(obs) + current_file_dir = os.path.dirname(os.path.abspath(__file__)) cache_plan = os.path.join(current_file_dir, "aime", "plan_gpt4o.1.json") obs, score, *_ = await brain.step( diff --git a/examples/smart_minion/code_contests/evalute_code_contests_hard.py b/examples/smart_minion/code_contests/evalute_code_contests_hard.py index 697c16cf..10c5dd6b 100644 --- a/examples/smart_minion/code_contests/evalute_code_contests_hard.py +++ b/examples/smart_minion/code_contests/evalute_code_contests_hard.py @@ -242,9 +242,9 @@ async def solve_question(item): ) return answer -model = "gpt-4o" +#model = "gpt-4o" #model = "claude" -#model = "default" +model = "default" llm = create_llm_provider(config.models.get(model)) cost_manager = CostManager() diff --git a/minion/actions/action_node.py b/minion/actions/action_node.py index a1f685d8..3d0de981 100644 --- a/minion/actions/action_node.py +++ b/minion/actions/action_node.py @@ -6,7 +6,7 @@ from minion.message_types import Message from minion.models.schemas import Answer -from minion.providers import BaseLLM +from minion.providers import BaseProvider from minion.utils.utils import extract_json @@ -21,7 +21,7 @@ async def __call__(self, *args, **kwargs): class LLMActionNode(ActionNode): def __init__(self, - llm: BaseLLM, + llm: BaseProvider, input_parser: Optional[callable] = None, output_parser: Optional[callable] = None): self.llm = llm diff --git a/minion/actions/lmp_action_node.py b/minion/actions/lmp_action_node.py index 7450d0ff..e91ba547 100644 --- a/minion/actions/lmp_action_node.py +++ b/minion/actions/lmp_action_node.py @@ -22,7 +22,7 @@ class LmpActionNode(LLMActionNode): def __init__(self, llm, input_parser=None, output_parser=None): super().__init__(llm, input_parser, output_parser) - ell.init(**config.ell, default_client=self.llm.client_ell) + #ell.init(**config.ell, default_client=self.llm.client_ell) @ell.complex(model="gpt-4o-mini") def ell_call(self, ret): @@ -84,11 +84,11 @@ async def execute(self, messages: Union[str, Message, List[Message]], response_f api_params['response_format'] = { "type": "text" } if isinstance(messages, str): - messages = [user(messages)] + messages = [Message(role="user", content=messages)] elif isinstance(messages, Message): messages = [messages] - messages.append(user(content=prompt)) + messages.append(Message(role="user", content=prompt)) response = await super().execute(messages, **api_params) diff --git a/minion/main/worker.py b/minion/main/worker.py index 87cb8e63..1a3f3ad6 100644 --- a/minion/main/worker.py +++ b/minion/main/worker.py @@ -465,7 +465,7 @@ async def execute_calculation(self): logger.error(error) continue # try again? output, error = obs["output"], obs["error"] - self.answer = self.input.answer = output + self.answer = self.input.answer = output #answer is only output # print("#####OUTPUT#####") # print(output) print(f"###solution###:{self.answer}") @@ -611,7 +611,7 @@ async def invoke_minion(self, minion_name, worker_config=None): processed_answer = self.input.apply_post_processing(answer) else: processed_answer = answer - + self.answer = processed_answer return worker, processed_answer async def choose_minion_and_run(self): diff --git a/minion/providers/__init__.py b/minion/providers/__init__.py index cf3908a0..d268ab12 100644 --- a/minion/providers/__init__.py +++ b/minion/providers/__init__.py @@ -1,4 +1,4 @@ -from minion.providers.base_llm import BaseLLM +from minion.providers.base_provider import BaseProvider from minion.providers.llm_provider_registry import LLMRegistry, create_llm_provider -__all__ = ["BaseLLM", "LLMRegistry", "create_llm_provider"] +__all__ = ["BaseProvider", "LLMRegistry", "create_llm_provider"] diff --git a/minion/providers/base_llm.py b/minion/providers/base_provider.py similarity index 83% rename from minion/providers/base_llm.py rename to minion/providers/base_provider.py index 08a81fda..2a86b845 100644 --- a/minion/providers/base_llm.py +++ b/minion/providers/base_provider.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from typing import AsyncIterator, List, Optional +from typing import AsyncIterator, List, Optional, Any, Generator from minion.configs.config import LLMConfig, config @@ -8,8 +8,10 @@ from minion.providers.cost import CostManager -class BaseLLM(ABC): - def __init__(self, config: LLMConfig): +class BaseProvider(ABC): + """Base class for all LLM providers""" + + def __init__(self, config: Any) -> None: self.config = config self.cost_manager = CostManager() self._setup_retry_config() @@ -20,7 +22,7 @@ def __init__(self, config: LLMConfig): @abstractmethod def _setup(self) -> None: - """初始化具体的LLM客户端""" + """Setup the LLM provider with configuration""" pass def _setup_retry_config(self): @@ -51,14 +53,12 @@ def _setup_retry_config(self): @abstractmethod async def generate(self, messages: List[Message], temperature: Optional[float] = None, **kwargs) -> str: - """生成回复""" + """Generate completion from messages""" pass @abstractmethod - async def generate_stream( - self, messages: List[Message], temperature: Optional[float] = None, **kwargs - ) -> AsyncIterator[str]: - """流式生成回复""" + async def generate_stream(self, messages: List[Message], temperature: Optional[float] = None, **kwargs) -> Generator[str, None, str]: + """Generate streaming completion from messages""" pass def get_cost(self) -> CostManager: diff --git a/minion/providers/litellm_provider.py b/minion/providers/litellm_provider.py index 38e8758c..ebbc56f1 100644 --- a/minion/providers/litellm_provider.py +++ b/minion/providers/litellm_provider.py @@ -3,7 +3,8 @@ from typing import AsyncIterator, List, Optional from minion.message_types import ContentType, Message -from minion.providers.base_llm import BaseLLM + +from minion.providers.base_provider import BaseProvider from minion.providers.llm_provider_registry import llm_registry with warnings.catch_warnings(): @@ -15,7 +16,7 @@ @llm_registry.register("litellm") -class LiteLLMProvider(BaseLLM): +class LiteLLMProvider(BaseProvider): def _setup(self) -> None: # 设置API密钥 os.environ["OPENAI_API_KEY"] = self.config.api_key diff --git a/minion/providers/llm_provider_registry.py b/minion/providers/llm_provider_registry.py index 2ec54019..2ba39ea8 100644 --- a/minion/providers/llm_provider_registry.py +++ b/minion/providers/llm_provider_registry.py @@ -2,7 +2,7 @@ from typing import Type from minion.configs.config import LLMConfig -from minion.providers.base_llm import BaseLLM +from minion.providers.base_provider import BaseProvider class LLMRegistry: @@ -21,7 +21,7 @@ def decorator(cls): return decorator - def get_provider(self, api_type: str) -> Type[BaseLLM]: + def get_provider(self, api_type: str) -> Type[BaseProvider]: if api_type not in self.providers: # 尝试动态导入 try: @@ -40,6 +40,6 @@ def get_provider(self, api_type: str) -> Type[BaseLLM]: llm_registry = LLMRegistry() -def create_llm_provider(config: LLMConfig) -> BaseLLM: +def create_llm_provider(config: LLMConfig) -> BaseProvider: provider_cls = llm_registry.get_provider(config.api_type) return provider_cls(config) diff --git a/minion/providers/openai_provider.py b/minion/providers/openai_provider.py index 2bc6eac2..8d74abeb 100644 --- a/minion/providers/openai_provider.py +++ b/minion/providers/openai_provider.py @@ -6,13 +6,14 @@ from minion.const import MINION_ROOT from minion.logs import log_llm_stream from minion.message_types import ImageContent, ImageUtils, Message, MessageContent -from minion.providers.base_llm import BaseLLM +from minion.providers.base_provider import BaseProvider + from minion.providers.cost import CostManager from minion.providers.llm_provider_registry import llm_registry @llm_registry.register("openai") -class OpenAIProvider(BaseLLM): +class OpenAIProvider(BaseProvider): def _setup(self) -> None: import openai # 创建客户端配置 @@ -23,6 +24,7 @@ def _setup(self) -> None: self.client_ell = openai.OpenAI(**client_kwargs) self.client = openai.AsyncOpenAI(**client_kwargs) + #or should we call _convert_messages def _prepare_messages(self, messages: List[Message] | Message | str) -> List[dict]: """准备发送给API的消息格式 diff --git a/minion/providers/provider.py b/minion/providers/provider.py deleted file mode 100644 index e090af60..00000000 --- a/minion/providers/provider.py +++ /dev/null @@ -1,218 +0,0 @@ -import os -import time -import warnings -from functools import partial - -from dotenv import load_dotenv - -with warnings.catch_warnings(): - warnings.simplefilter("ignore") - import litellm - -from litellm import completion as litellm_completion -from litellm import completion_cost as litellm_completion_cost -from litellm.exceptions import ( - APIConnectionError, - RateLimitError, - ServiceUnavailableError, -) -from tenacity import ( - retry, - retry_if_exception_type, - stop_after_attempt, - wait_random_exponential, -) - -os.environ["LITELLM_LOG"] = "DEBUG" - -__all__ = ["LLM"] - -message_separator = "\n\n----------\n\n" - - -class LLM: - def __init__( - self, - model=None, - api_key=None, - base_url=None, - api_version=None, - num_retries=3, - retry_min_wait=1, - retry_max_wait=10, - llm_timeout=30, - llm_temperature=0.7, - llm_top_p=0.9, - custom_llm_provider=None, - max_input_tokens=4096, - max_output_tokens=2048, - cost=None, - ): - from agent_as_a_judge.llm.cost import Cost - - self.cost = Cost() - self.model_name = model - self.api_key = api_key - self.base_url = base_url - self.api_version = api_version - self.max_input_tokens = max_input_tokens - self.max_output_tokens = max_output_tokens - self.llm_timeout = llm_timeout - self.llm_temperature = llm_temperature - self.llm_top_p = llm_top_p - self.num_retries = num_retries - self.retry_min_wait = retry_min_wait - self.retry_max_wait = retry_max_wait - self.custom_llm_provider = custom_llm_provider - - self.model_info = None - try: - self.model_info = litellm.get_model_info(self.model_name) - except Exception: - print(f"Could not get model info for {self.model_name}") - - if self.max_input_tokens is None and self.model_info: - self.max_input_tokens = self.model_info.get("max_input_tokens", 4096) - if self.max_output_tokens is None and self.model_info: - self.max_output_tokens = self.model_info.get("max_output_tokens", 1024) - - self._initialize_completion_function() - - def _initialize_completion_function(self): - completion_func = partial( - litellm_completion, - model=self.model_name, - api_key=self.api_key, - base_url=self.base_url, - api_version=self.api_version, - custom_llm_provider=self.custom_llm_provider, - max_tokens=self.max_output_tokens, - timeout=self.llm_timeout, - temperature=self.llm_temperature, - top_p=self.llm_top_p, - ) - - def attempt_on_error(retry_state): - print(f"Could not get model info for {self.model_name}") - return True - - @retry( - reraise=True, - stop=stop_after_attempt(self.num_retries), - wait=wait_random_exponential(min=self.retry_min_wait, max=self.retry_max_wait), - retry=retry_if_exception_type((RateLimitError, APIConnectionError, ServiceUnavailableError)), - after=attempt_on_error, - ) - def wrapper(*args, **kwargs): - resp = completion_func(*args, **kwargs) - message_back = resp["choices"][0]["message"]["content"] - # logger.debug(message_back) - return resp, message_back - - self._completion = wrapper - - @property - def completion(self): - return self._completion - - def _llm_inference(self, messages: list) -> dict: - """Perform LLM inference using the provided messages.""" - start_time = time.time() - response, cost, accumulated_cost = self.do_completion(messages=messages, temperature=0.0) - inference_time = time.time() - start_time - - llm_response = response.choices[0].message["content"] - input_token, output_token = ( - response.usage.prompt_tokens, - response.usage.completion_tokens, - ) - - return { - "llm_response": llm_response, - "input_tokens": input_token, - "output_tokens": output_token, - "cost": cost, - "accumulated_cost": accumulated_cost, - "inference_time": inference_time, - } - - def do_completion(self, *args, **kwargs): - resp, msg = self._completion(*args, **kwargs) - cur_cost, accumulated_cost = self.post_completion(resp) - return resp, cur_cost, accumulated_cost - - def post_completion(self, response: str): - try: - cur_cost = self.completion_cost(response) - except Exception: - cur_cost = 0 - - return cur_cost, self.cost.accumulated_cost # , cost_msg - - def get_token_count(self, messages): - return litellm.token_counter(model=self.model_name, messages=messages) - - def is_local(self): - if self.base_url: - return any(substring in self.base_url for substring in ["localhost", "127.0.0.1", "0.0.0.0"]) - if self.model_name and self.model_name.startswith("ollama"): - return True - return False - - def completion_cost(self, response): - if not self.is_local(): - try: - cost = litellm_completion_cost(completion_response=response) - if self.cost: - self.cost.add_cost(cost) - return cost - except Exception: - print("Cost calculation not supported for this model.") - return 0.0 - - def __str__(self): - return f"LLM(model={self.model_name}, base_url={self.base_url})" - - def __repr__(self): - return str(self) - - def do_multimodal_completion(self, text, image_path): - messages = self.prepare_messages(text, image_path=image_path) - response, cur_cost, accumulated_cost = self.do_completion(messages=messages) - return response, cur_cost, accumulated_cost - - @staticmethod - def encode_image(image_path): - import base64 - - with open(image_path, "rb") as image_file: - return base64.b64encode(image_file.read()).decode("utf-8") - - def prepare_messages(self, text, image_path=None): - messages = [{"role": "user", "content": text}] - if image_path: - base64_image = self.encode_image(image_path) - messages[0]["content"] = [ - {"type": "text", "text": text}, - { - "type": "image_url", - "image_url": {"url": "data:image/jpeg;base64," + base64_image}, - }, - ] - return messages - - -if __name__ == "__main__": - load_dotenv() - - model_name = "gpt-4o-2024-08-06" - api_key = os.getenv("OPENAI_API_KEY") - base_url = "https://api.openai.com/v1" - - llm_instance = LLM(model=model_name, api_key=api_key, base_url=base_url) - - image_path = "/Users/zhugem/Desktop/DevAI/studio/workspace/sample/results/prediction_interactive.png" - - for i in range(1): - multimodal_response = llm_instance.do_multimodal_completion("What’s in this image?", image_path) - print(multimodal_response) diff --git a/requirements.txt b/requirements.txt index a7da644e..589f3e56 100644 --- a/requirements.txt +++ b/requirements.txt @@ -41,4 +41,6 @@ graphviz astroid pysnooper #ldb dependencies -#llmdebugger \ No newline at end of file +#llmdebugger +azure-identity +azure-ai-inference \ No newline at end of file