dcot

femto · Oct 19, 2024 · 4d2d154 · 4d2d154
1 parent 8e62fcd
commit 4d2d154
Show file tree

Hide file tree

Showing 7 changed files with 148 additions and 25 deletions.
diff --git a/examples/smart_minion/brain.py b/examples/smart_minion/brain.py
@@ -9,6 +9,7 @@
 
 import yaml
 
+from metagpt.configs.models_config import ModelsConfig
 from metagpt.llm import LLM
 from metagpt.minion.brain import Brain
 from metagpt.minion.rpyc_python_env import RpycPythonEnv
@@ -22,7 +23,14 @@ async def smart_brain():
 
     # Load the .env file
     load_dotenv()
-    llm = LLM()
+    gpt4o_llm_config = ModelsConfig.default().get("gpt-4o")
+    ModelsConfig.default().get("gpt-4o-mini")
+    ModelsConfig.default().get("deepseek-chat")
+
+    llm = LLM(llm_config=gpt4o_llm_config)
+    # llm = LLM(llm_config=gpt4o_mini_llm_config)
+    # llm = LLM(llm_config=deepseek_llm_config)
+
     # Load the config file
     current_dir = os.path.dirname(os.path.abspath(__file__))
 
@@ -90,22 +98,22 @@ async def smart_brain():
     #
     # cache_plan = os.path.join(current_file_dir, "aime", "plan_gpt4o.3.json")
     # obs, score, *_ = await brain.step(
-    #     query="Alice and Bob play the following game. A stack of $n$ tokens lies before them. The players take turns with Alice going first. On each turn, the player removes $1$ token or $4$ tokens from the stack. The player who removes the last token wins. Find the number of positive integers $n$ less than or equal to $2024$ such that there is a strategy that guarantees that Bob wins, regardless of Alice’s moves.",
-    #     route="native",
+    #     query="Every morning, Aya does a $9$ kilometer walk, and then finishes at the coffee shop. One day, she walks at $s$ kilometers per hour, and the walk takes $4$ hours, including $t$ minutes at the coffee shop. Another morning, she walks at $s+2$ kilometers per hour, and the walk takes $2$ hours and $24$ minutes, including $t$ minutes at the coffee shop. This morning, if she walks at $s+\frac12$ kilometers per hour, how many minutes will the walk take, including the $t$ minutes at the coffee shop?",
+    #     route="dcot",
     #     dataset="aime 2024",
-    #     cache_plan=cache_plan,
+    #     check=False
     # )
     # print(obs)
 
     # cache_plan = os.path.join(current_file_dir, "aime", "plan_gpt4o.7.json")
     #
-    # obs, score, *_ = await brain.step(
-    #     query="Find the largest possible real part of\[(75+117i)z+\frac{96+144i}{z}\]where $z$ is a complex number with $|z|=4$.",
-    #     route="cot",
-    #     dataset="aime 2024",
-    #     cache_plan=cache_plan,
-    # )
-    # print(obs)
+    obs, score, *_ = await brain.step(
+        query="Find the largest possible real part of\[(75+117i)z+\frac{96+144i}{z}\]where $z$ is a complex number with $|z|=4$.",
+        route="dcot",
+        dataset="aime 2024",
+        check=False,
+    )
+    print(obs)
 
     # obs, score, *_ = await brain.step(
     #     query="Real numbers $x$ and $y$ with $x,y>1$ satisfy $\log_x(y^x)=\log_y(x^{4y})=10.$ What is the value of $xy$?",

diff --git a/examples/smart_minion/evalute_aime.py b/examples/smart_minion/evalute_aime.py
@@ -350,7 +350,7 @@ async def main():
     data = load_dataset("qq8933/AIME_1983_2024", split="train")
 
     #
-    json_storer = JsonStatsStorer("logs/stats_output.json")
+    json_storer = JsonStatsStorer("logs/aime_stats_output.json")
 
     # tracker = AsyncStatsTracker(stats_db_url)
     # In your main function or wherever you set up your application
@@ -362,10 +362,8 @@ async def main():
 
     correct, count, matched_ids, mismatched_ids = await evaluate_dataset(
         data,
-        to_processed_id=None,
         concurrency_count=1,
         stats_storer=stats_storer,
-        start_id=None,
         continue_process=True,
         run_filename="run_aime.json",
     )

diff --git a/examples/smart_minion/gsm8k/evalute_gsm8k_re2.py b/examples/smart_minion/gsm8k/evalute_gsm8k_re2.py
@@ -226,7 +226,7 @@ async def main():
     # data = await load_data_sample(file_name, samples=1055)
 
     correct, count, matched_ids, mismatched_ids = await evaluate_dataset(
-        data, run_filename="run_gsm8k_deepseek_re2.json", continue_process=True, concurrency_count=1
+        data, run_filename="run_gsm8k_deepseek_re2.json", continue_process=True, concurrency_count=70
     )
 
     print(f"Accuracy: {correct/count:.2%}")

diff --git a/metagpt/minion/input.py b/metagpt/minion/input.py
@@ -78,7 +78,8 @@ class Input(BaseModel):
     score_func: Any = None
 
     answer: str = ""  # the extracted final answer
-    solution: str = ""
+    answer_code: str = ""  # the extracted final answer
+    full_output: str = ""
     raw_answer: str = ""  # the complete answer with cot thought
     feedback: str = ""  # the feedback for improvement
 

diff --git a/metagpt/minion/preprocessing.py b/metagpt/minion/preprocessing.py
@@ -46,7 +46,7 @@ def apply_attention_enhancement(self, perceptions, step):
         for field in step["apply_to"]:
             if field in perceptions:
                 for _ in range(step.get("repeat", 1)):
-                    perceptions[field] = self.apply_re2(perceptions[field])
+                    perceptions[field] = self.apply_re2(field, perceptions[field])
         return perceptions
 
     async def apply_semantic_refinement(self, perceptions, step):
@@ -55,7 +55,7 @@ async def apply_semantic_refinement(self, perceptions, step):
                 perceptions[field] = await self.apply_rephrase(perceptions[field])
         return perceptions
 
-    def apply_re2(self, text):
+    def apply_re2(self, field, text):
         if not text:
             return text
         return f"{text}\nRead the above again: {text}"

diff --git a/metagpt/minion/prompt.py b/metagpt/minion/prompt.py
@@ -406,12 +406,19 @@
                 """
 )
 tmp = """
-Solution:
-{{input.solution}}
+{% if input.full_output %}
+Full Output:
+{{ input.full_output }}
+{% endif %}
+
+{% if input.answer_code %}
+Answer Code:
+{{ input.answer_code }}
+{% endif %}
+
 Answer:
-{{
-     input.answer}}
-     """
+{{input.answer}}
+"""
 CHECK_PROMPT = f"""Given the following problem details:
 
 {ASK_PROMPT_JINJA}
@@ -431,6 +438,39 @@
 
 """
 
+CHECK_PROMPT1 = f"""Given the following problem details:
+
+{ASK_PROMPT_JINJA}
+
+{tmp}
+
+Given the complete solution process and final answer above, evaluate:
+
+1. Process Validation:
+- Are the thinking steps logical and complete?
+- Are mathematical derivations correct?
+- Are units handled properly?
+- Is step counting accurate?
+
+2. Answer Validation:
+- Does the final answer follow from the steps?
+- Is it numerically correct?
+- Are units correct?
+
+Your feedback should be structured as:
+<root>
+    <process_check>
+        Evaluate the solution process
+    </process_check>
+    <answer_check>
+        Evaluate the final answer
+    </answer_check>
+    <correct>true/false</correct>
+    <score>score value</score>
+</root>
+
+"""
+
 DOT_PROMPT = (
     """
 # Diagram of Thought Iterative Reasoning Prompt
@@ -504,3 +544,48 @@
 """
     + ASK_PROMPT_JINJA
 )
+
+DCOT_PROMPT = (
+    """
+You are an AI assistant designed to solve complex problems by dynamically reasoning through multiple perspectives, employing reflection, and adapting your approach as new information emerges. Your task is to solve the problem step by step, incorporating deep reasoning, critical reflection, and strategic adjustments throughout the process.
+
+Thinking and Perspective Exploration:
+
+Enclose all thoughts within <thinking> tags. Examine the problem from multiple angles, exploring alternative approaches and considering possible solutions or errors.
+Be open to unconventional thinking, challenging assumptions, and exploring edge cases or rare conditions.
+Step-by-Step Breakdown:
+
+Use <step> tags to break down the solution into clear, logical steps. Start with a 50-step budget, requesting more if the problem demands additional complexity.
+After each step, indicate the remaining budget with <count> tags and evaluate whether the approach is on track. Adjust if needed.
+Reflection and Progress Evaluation:
+
+After every 3 steps, perform a detailed self-reflection using <reflection> tags. Critically assess your progress, and consider potential biases, assumptions, and alternative viewpoints.
+Assign a reward score between 0.0 and 1.0 after each reflection, using the following criteria:
+0.8+: Continue the current approach.
+0.5-0.7: Consider minor adjustments or refinements.
+Below 0.5: Reevaluate the approach and consider backtracking or starting fresh with an alternate strategy.
+Dynamic Reasoning Adjustments:
+
+If a low reward score is assigned, justify backtracking or changing your approach within <thinking> tags. Be explicit about your reasoning and decision-making process.
+If you are uncertain, simulate different potential paths and compare outcomes before choosing the optimal approach.
+Mathematical and Formal Reasoning:
+
+For mathematical problems, show all work in detail using LaTeX for formal notation. Provide detailed proofs or calculations to support your conclusions.
+Multi-Solution Comparison:
+
+Whenever feasible, explore multiple methods to reach the solution. Compare their effectiveness within <reflection> tags and assess their strengths and weaknesses.
+Synthesizing the Final Answer:
+
+Once all steps are complete and you've settled on the best approach, synthesize your final answer using <answer> tags. Provide a concise, well-reasoned summary of your solution, explaining why it is the most effective.
+Final Reflection and Reward:
+
+Conclude with a final reflection on the overall solution. Discuss the effectiveness of your approach, the challenges faced, and any learning opportunities encountered along the way.
+Assign a final reward score (0.0 to 1.0) based on the overall quality of your solution.
+Exploration of Broader Implications:
+
+When applicable, consider the broader implications of your solution. What insights can be drawn from the process? Are there larger principles or concepts that apply?
+
+By incorporating multi-step reasoning, critical reflection, and adaptive problem-solving, you will dynamically develop the best solution while learning from each phase of the process.
+"""
+    + ASK_PROMPT_JINJA
+)
diff --git a/metagpt/minion/worker.py b/metagpt/minion/worker.py
@@ -33,6 +33,7 @@
     ASK_PROMPT,
     ASK_PROMPT_JINJA,
     COT_PROBLEM_INSTRUCTION,
+    DCOT_PROMPT,
     DOT_PROMPT,
     IDENTIFY_PROMPT,
     MATH_PLAN_PROMPT,
@@ -77,7 +78,16 @@ def extract_final_answer(text):
     if match_tag:
         return match_tag.group(1).strip()
 
-    return None
+    return text
+
+
+def extract_answer(text):
+    # Match for <final_answer> tag
+    match_tag = re.search(r"<answer>\s*(.*?)\s*</answer>", text, re.DOTALL)
+    if match_tag:
+        return match_tag.group(1).strip()
+
+    return text
 
 
 class MetaPlan(BaseModel):
@@ -287,6 +297,27 @@ async def execute(self):
         return self.answer  # maybe also adds score?
 
 
+# https://x.com/_philschmid/status/1842846050320544016
+@register_route_downstream
+class DcotMinion(Minion):
+    """Dynamic Chain of Thought Strategy"""
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.input.instruction = ""
+
+    async def execute(self):
+        node = ActionNode(key="answer", expected_type=str, instruction="", example="")
+        prompt = Template(DCOT_PROMPT)
+        prompt = prompt.render(input=self.input)
+        node = await node.fill(context=prompt, llm=self.brain.llm, schema="raw")
+        self.answer_node = node
+        self.answer = self.input.answer = extract_answer(node.content)
+
+        self.raw_answer = self.input.raw_answer = node.content
+        return self.answer  # maybe also adds score?
+
+
 @register_route_downstream
 class MultiPlanMinion(Minion):
     "This Strategy will first generate multiple plan, and then compare each plan, see which one is more promising to produce good result, first try most promising plan, then to less promising plan."
@@ -550,7 +581,7 @@ def extract_code(text):
 
             # deepseek may still put ```python...``` in the returned json
             code = extract_code(node.content)
-            self.answer_code = self.input.solution = code
+            self.answer_code = self.input.answer_code = code
 
             self.input.run_id = self.input.run_id or uuid.uuid4()
             result = self.brain.python_env.step(f"<id>{self.input.query_id}/{self.input.run_id}</id>{code}")