diff --git a/config/config.yaml.example b/config/config.yaml.example
index 956c1dcb..87ce31dc 100644
--- a/config/config.yaml.example
+++ b/config/config.yaml.example
@@ -21,7 +21,7 @@ models:
     api_type: "openai"
     base_url: "${DEFAULT_BASE_URL}"
     api_key: "${DEFAULT_API_KEY}"
-    model: "deepseek-chat"
+    model: "${DEFAULT_MODEL}"
     temperature: 0
   "gpt-4-0125":
     api_type: "openai"
@@ -33,6 +33,12 @@ models:
     base_url: "${CLAUDE_BASE_URL}"
     api_key: "${CLAUDE_API_KEY}"
     temperature: 0.7
+  "deepseek-r1":
+   api_type: "azure_inference"  # or azure / ollama / groq etc.
+   api_key: "YOUR_KEY"
+   base_url: "YOUR_URL"
+   model: "DeepSeek-R1"
+   temperature: 0.1
 ell:
   store: 'logs'
   autocommit:true
diff --git a/examples/smart_minion/brain.py b/examples/smart_minion/brain.py
index d530eab5..e76304ab 100644
--- a/examples/smart_minion/brain.py
+++ b/examples/smart_minion/brain.py
@@ -17,7 +17,8 @@
 
 async def smart_brain():
     # 使用从 minion/__init__.py 导入的 config 对象
-    model = "default"
+    model = "gpt-4o"
+    model = "deepseek-r1"
     #model = "llama3.2"
     llm_config = config.models.get(model)
     
@@ -30,8 +31,9 @@ async def smart_brain():
         llm=llm,
         #llms={"route": [ "llama3.2","llama3.1"]}
     )
-    # obs, score, *_ = await brain.step(query="what's the solution for game of 24 for 4 3 9 8")
-    # print(obs)
+    obs, score, *_ = await brain.step(query="what's the solution for game of 24 for 1 3 4 6", route="python")
+    print(obs)
+
     current_file_dir = os.path.dirname(os.path.abspath(__file__))
     cache_plan = os.path.join(current_file_dir, "aime", "plan_gpt4o.1.json")
     obs, score, *_ = await brain.step(
diff --git a/examples/smart_minion/code_contests/evalute_code_contests_hard.py b/examples/smart_minion/code_contests/evalute_code_contests_hard.py
index 697c16cf..10c5dd6b 100644
--- a/examples/smart_minion/code_contests/evalute_code_contests_hard.py
+++ b/examples/smart_minion/code_contests/evalute_code_contests_hard.py
@@ -242,9 +242,9 @@ async def solve_question(item):
     )
     return answer
 
-model = "gpt-4o"
+#model = "gpt-4o"
 #model = "claude"
-#model = "default"
+model = "default"
 
 llm = create_llm_provider(config.models.get(model))
 cost_manager = CostManager()
diff --git a/minion/actions/action_node.py b/minion/actions/action_node.py
index a1f685d8..3d0de981 100644
--- a/minion/actions/action_node.py
+++ b/minion/actions/action_node.py
@@ -6,7 +6,7 @@
 
 from minion.message_types import Message
 from minion.models.schemas import Answer
-from minion.providers import BaseLLM
+from minion.providers import BaseProvider
 from minion.utils.utils import extract_json
 
 
@@ -21,7 +21,7 @@ async def __call__(self, *args, **kwargs):
 
 class LLMActionNode(ActionNode):
     def __init__(self,
-                 llm: BaseLLM,
+                 llm: BaseProvider,
                  input_parser: Optional[callable] = None,
                  output_parser: Optional[callable] = None):
         self.llm = llm
diff --git a/minion/actions/lmp_action_node.py b/minion/actions/lmp_action_node.py
index 7450d0ff..e91ba547 100644
--- a/minion/actions/lmp_action_node.py
+++ b/minion/actions/lmp_action_node.py
@@ -22,7 +22,7 @@
 class LmpActionNode(LLMActionNode):
     def __init__(self, llm, input_parser=None, output_parser=None):
         super().__init__(llm, input_parser, output_parser)
-        ell.init(**config.ell, default_client=self.llm.client_ell)
+        #ell.init(**config.ell, default_client=self.llm.client_ell)
 
     @ell.complex(model="gpt-4o-mini")
     def ell_call(self, ret):
@@ -84,11 +84,11 @@ async def execute(self, messages: Union[str, Message, List[Message]], response_f
                 api_params['response_format'] = { "type": "text" }
 
             if isinstance(messages, str):
-                messages = [user(messages)]
+                messages = [Message(role="user", content=messages)]
             elif isinstance(messages, Message):
                 messages = [messages]
                 
-            messages.append(user(content=prompt))
+            messages.append(Message(role="user", content=prompt))
 
         response = await super().execute(messages, **api_params)
 
diff --git a/minion/main/worker.py b/minion/main/worker.py
index 87cb8e63..1a3f3ad6 100644
--- a/minion/main/worker.py
+++ b/minion/main/worker.py
@@ -465,7 +465,7 @@ async def execute_calculation(self):
                 logger.error(error)
                 continue  # try again?
             output, error = obs["output"], obs["error"]
-            self.answer = self.input.answer = output
+            self.answer = self.input.answer = output #answer is only output
             # print("#####OUTPUT#####")
             # print(output)
             print(f"###solution###:{self.answer}")
@@ -611,7 +611,7 @@ async def invoke_minion(self, minion_name, worker_config=None):
             processed_answer = self.input.apply_post_processing(answer)
         else:
             processed_answer = answer
-
+        self.answer = processed_answer
         return worker, processed_answer
 
     async def choose_minion_and_run(self):
diff --git a/minion/providers/__init__.py b/minion/providers/__init__.py
index cf3908a0..d268ab12 100644
--- a/minion/providers/__init__.py
+++ b/minion/providers/__init__.py
@@ -1,4 +1,4 @@
-from minion.providers.base_llm import BaseLLM
+from minion.providers.base_provider import BaseProvider
 from minion.providers.llm_provider_registry import LLMRegistry, create_llm_provider
 
-__all__ = ["BaseLLM", "LLMRegistry", "create_llm_provider"]
+__all__ = ["BaseProvider", "LLMRegistry", "create_llm_provider"]
diff --git a/minion/providers/base_llm.py b/minion/providers/base_provider.py
similarity index 83%
rename from minion/providers/base_llm.py
rename to minion/providers/base_provider.py
index 08a81fda..2a86b845 100644
--- a/minion/providers/base_llm.py
+++ b/minion/providers/base_provider.py
@@ -1,5 +1,5 @@
 from abc import ABC, abstractmethod
-from typing import AsyncIterator, List, Optional
+from typing import AsyncIterator, List, Optional, Any, Generator
 
 
 from minion.configs.config import LLMConfig, config
@@ -8,8 +8,10 @@
 from minion.providers.cost import CostManager
 
 
-class BaseLLM(ABC):
-    def __init__(self, config: LLMConfig):
+class BaseProvider(ABC):
+    """Base class for all LLM providers"""
+    
+    def __init__(self, config: Any) -> None:
         self.config = config
         self.cost_manager = CostManager()
         self._setup_retry_config()
@@ -20,7 +22,7 @@ def __init__(self, config: LLMConfig):
 
     @abstractmethod
     def _setup(self) -> None:
-        """初始化具体的LLM客户端"""
+        """Setup the LLM provider with configuration"""
         pass
 
     def _setup_retry_config(self):
@@ -51,14 +53,12 @@ def _setup_retry_config(self):
 
     @abstractmethod
     async def generate(self, messages: List[Message], temperature: Optional[float] = None, **kwargs) -> str:
-        """生成回复"""
+        """Generate completion from messages"""
         pass
 
     @abstractmethod
-    async def generate_stream(
-        self, messages: List[Message], temperature: Optional[float] = None, **kwargs
-    ) -> AsyncIterator[str]:
-        """流式生成回复"""
+    async def generate_stream(self, messages: List[Message], temperature: Optional[float] = None, **kwargs) -> Generator[str, None, str]:
+        """Generate streaming completion from messages"""
         pass
 
     def get_cost(self) -> CostManager:
diff --git a/minion/providers/litellm_provider.py b/minion/providers/litellm_provider.py
index 38e8758c..ebbc56f1 100644
--- a/minion/providers/litellm_provider.py
+++ b/minion/providers/litellm_provider.py
@@ -3,7 +3,8 @@
 from typing import AsyncIterator, List, Optional
 
 from minion.message_types import ContentType, Message
-from minion.providers.base_llm import BaseLLM
+
+from minion.providers.base_provider import BaseProvider
 from minion.providers.llm_provider_registry import llm_registry
 
 with warnings.catch_warnings():
@@ -15,7 +16,7 @@
 
 
 @llm_registry.register("litellm")
-class LiteLLMProvider(BaseLLM):
+class LiteLLMProvider(BaseProvider):
     def _setup(self) -> None:
         # 设置API密钥
         os.environ["OPENAI_API_KEY"] = self.config.api_key
diff --git a/minion/providers/llm_provider_registry.py b/minion/providers/llm_provider_registry.py
index 2ec54019..2ba39ea8 100644
--- a/minion/providers/llm_provider_registry.py
+++ b/minion/providers/llm_provider_registry.py
@@ -2,7 +2,7 @@
 from typing import Type
 
 from minion.configs.config import LLMConfig
-from minion.providers.base_llm import BaseLLM
+from minion.providers.base_provider import BaseProvider
 
 
 class LLMRegistry:
@@ -21,7 +21,7 @@ def decorator(cls):
 
         return decorator
 
-    def get_provider(self, api_type: str) -> Type[BaseLLM]:
+    def get_provider(self, api_type: str) -> Type[BaseProvider]:
         if api_type not in self.providers:
             # 尝试动态导入
             try:
@@ -40,6 +40,6 @@ def get_provider(self, api_type: str) -> Type[BaseLLM]:
 llm_registry = LLMRegistry()
 
 
-def create_llm_provider(config: LLMConfig) -> BaseLLM:
+def create_llm_provider(config: LLMConfig) -> BaseProvider:
     provider_cls = llm_registry.get_provider(config.api_type)
     return provider_cls(config)
diff --git a/minion/providers/openai_provider.py b/minion/providers/openai_provider.py
index 2bc6eac2..8d74abeb 100644
--- a/minion/providers/openai_provider.py
+++ b/minion/providers/openai_provider.py
@@ -6,13 +6,14 @@
 from minion.const import MINION_ROOT
 from minion.logs import log_llm_stream
 from minion.message_types import ImageContent, ImageUtils, Message, MessageContent
-from minion.providers.base_llm import BaseLLM
+from minion.providers.base_provider import BaseProvider
+
 from minion.providers.cost import CostManager
 from minion.providers.llm_provider_registry import llm_registry
 
 
 @llm_registry.register("openai")
-class OpenAIProvider(BaseLLM):
+class OpenAIProvider(BaseProvider):
     def _setup(self) -> None:
         import openai
         # 创建客户端配置
@@ -23,6 +24,7 @@ def _setup(self) -> None:
         self.client_ell = openai.OpenAI(**client_kwargs)
         self.client = openai.AsyncOpenAI(**client_kwargs)
 
+    #or should we call _convert_messages
     def _prepare_messages(self, messages: List[Message] | Message | str) -> List[dict]:
         """准备发送给API的消息格式
         
diff --git a/minion/providers/provider.py b/minion/providers/provider.py
deleted file mode 100644
index e090af60..00000000
--- a/minion/providers/provider.py
+++ /dev/null
@@ -1,218 +0,0 @@
-import os
-import time
-import warnings
-from functools import partial
-
-from dotenv import load_dotenv
-
-with warnings.catch_warnings():
-    warnings.simplefilter("ignore")
-    import litellm
-
-from litellm import completion as litellm_completion
-from litellm import completion_cost as litellm_completion_cost
-from litellm.exceptions import (
-    APIConnectionError,
-    RateLimitError,
-    ServiceUnavailableError,
-)
-from tenacity import (
-    retry,
-    retry_if_exception_type,
-    stop_after_attempt,
-    wait_random_exponential,
-)
-
-os.environ["LITELLM_LOG"] = "DEBUG"
-
-__all__ = ["LLM"]
-
-message_separator = "\n\n----------\n\n"
-
-
-class LLM:
-    def __init__(
-        self,
-        model=None,
-        api_key=None,
-        base_url=None,
-        api_version=None,
-        num_retries=3,
-        retry_min_wait=1,
-        retry_max_wait=10,
-        llm_timeout=30,
-        llm_temperature=0.7,
-        llm_top_p=0.9,
-        custom_llm_provider=None,
-        max_input_tokens=4096,
-        max_output_tokens=2048,
-        cost=None,
-    ):
-        from agent_as_a_judge.llm.cost import Cost
-
-        self.cost = Cost()
-        self.model_name = model
-        self.api_key = api_key
-        self.base_url = base_url
-        self.api_version = api_version
-        self.max_input_tokens = max_input_tokens
-        self.max_output_tokens = max_output_tokens
-        self.llm_timeout = llm_timeout
-        self.llm_temperature = llm_temperature
-        self.llm_top_p = llm_top_p
-        self.num_retries = num_retries
-        self.retry_min_wait = retry_min_wait
-        self.retry_max_wait = retry_max_wait
-        self.custom_llm_provider = custom_llm_provider
-
-        self.model_info = None
-        try:
-            self.model_info = litellm.get_model_info(self.model_name)
-        except Exception:
-            print(f"Could not get model info for {self.model_name}")
-
-        if self.max_input_tokens is None and self.model_info:
-            self.max_input_tokens = self.model_info.get("max_input_tokens", 4096)
-        if self.max_output_tokens is None and self.model_info:
-            self.max_output_tokens = self.model_info.get("max_output_tokens", 1024)
-
-        self._initialize_completion_function()
-
-    def _initialize_completion_function(self):
-        completion_func = partial(
-            litellm_completion,
-            model=self.model_name,
-            api_key=self.api_key,
-            base_url=self.base_url,
-            api_version=self.api_version,
-            custom_llm_provider=self.custom_llm_provider,
-            max_tokens=self.max_output_tokens,
-            timeout=self.llm_timeout,
-            temperature=self.llm_temperature,
-            top_p=self.llm_top_p,
-        )
-
-        def attempt_on_error(retry_state):
-            print(f"Could not get model info for {self.model_name}")
-            return True
-
-        @retry(
-            reraise=True,
-            stop=stop_after_attempt(self.num_retries),
-            wait=wait_random_exponential(min=self.retry_min_wait, max=self.retry_max_wait),
-            retry=retry_if_exception_type((RateLimitError, APIConnectionError, ServiceUnavailableError)),
-            after=attempt_on_error,
-        )
-        def wrapper(*args, **kwargs):
-            resp = completion_func(*args, **kwargs)
-            message_back = resp["choices"][0]["message"]["content"]
-            # logger.debug(message_back)
-            return resp, message_back
-
-        self._completion = wrapper
-
-    @property
-    def completion(self):
-        return self._completion
-
-    def _llm_inference(self, messages: list) -> dict:
-        """Perform LLM inference using the provided messages."""
-        start_time = time.time()
-        response, cost, accumulated_cost = self.do_completion(messages=messages, temperature=0.0)
-        inference_time = time.time() - start_time
-
-        llm_response = response.choices[0].message["content"]
-        input_token, output_token = (
-            response.usage.prompt_tokens,
-            response.usage.completion_tokens,
-        )
-
-        return {
-            "llm_response": llm_response,
-            "input_tokens": input_token,
-            "output_tokens": output_token,
-            "cost": cost,
-            "accumulated_cost": accumulated_cost,
-            "inference_time": inference_time,
-        }
-
-    def do_completion(self, *args, **kwargs):
-        resp, msg = self._completion(*args, **kwargs)
-        cur_cost, accumulated_cost = self.post_completion(resp)
-        return resp, cur_cost, accumulated_cost
-
-    def post_completion(self, response: str):
-        try:
-            cur_cost = self.completion_cost(response)
-        except Exception:
-            cur_cost = 0
-
-        return cur_cost, self.cost.accumulated_cost  # , cost_msg
-
-    def get_token_count(self, messages):
-        return litellm.token_counter(model=self.model_name, messages=messages)
-
-    def is_local(self):
-        if self.base_url:
-            return any(substring in self.base_url for substring in ["localhost", "127.0.0.1", "0.0.0.0"])
-        if self.model_name and self.model_name.startswith("ollama"):
-            return True
-        return False
-
-    def completion_cost(self, response):
-        if not self.is_local():
-            try:
-                cost = litellm_completion_cost(completion_response=response)
-                if self.cost:
-                    self.cost.add_cost(cost)
-                return cost
-            except Exception:
-                print("Cost calculation not supported for this model.")
-        return 0.0
-
-    def __str__(self):
-        return f"LLM(model={self.model_name}, base_url={self.base_url})"
-
-    def __repr__(self):
-        return str(self)
-
-    def do_multimodal_completion(self, text, image_path):
-        messages = self.prepare_messages(text, image_path=image_path)
-        response, cur_cost, accumulated_cost = self.do_completion(messages=messages)
-        return response, cur_cost, accumulated_cost
-
-    @staticmethod
-    def encode_image(image_path):
-        import base64
-
-        with open(image_path, "rb") as image_file:
-            return base64.b64encode(image_file.read()).decode("utf-8")
-
-    def prepare_messages(self, text, image_path=None):
-        messages = [{"role": "user", "content": text}]
-        if image_path:
-            base64_image = self.encode_image(image_path)
-            messages[0]["content"] = [
-                {"type": "text", "text": text},
-                {
-                    "type": "image_url",
-                    "image_url": {"url": "data:image/jpeg;base64," + base64_image},
-                },
-            ]
-        return messages
-
-
-if __name__ == "__main__":
-    load_dotenv()
-
-    model_name = "gpt-4o-2024-08-06"
-    api_key = os.getenv("OPENAI_API_KEY")
-    base_url = "https://api.openai.com/v1"
-
-    llm_instance = LLM(model=model_name, api_key=api_key, base_url=base_url)
-
-    image_path = "/Users/zhugem/Desktop/DevAI/studio/workspace/sample/results/prediction_interactive.png"
-
-    for i in range(1):
-        multimodal_response = llm_instance.do_multimodal_completion("What’s in this image?", image_path)
-        print(multimodal_response)
diff --git a/requirements.txt b/requirements.txt
index a7da644e..589f3e56 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -41,4 +41,6 @@ graphviz
 astroid
 pysnooper
 #ldb dependencies
-#llmdebugger
\ No newline at end of file
+#llmdebugger
+azure-identity
+azure-ai-inference
\ No newline at end of file