Skip to content

Commit

Permalink
dcot
Browse files Browse the repository at this point in the history
  • Loading branch information
femto committed Oct 19, 2024
1 parent 8e62fcd commit 4d2d154
Show file tree
Hide file tree
Showing 7 changed files with 148 additions and 25 deletions.
30 changes: 19 additions & 11 deletions examples/smart_minion/brain.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

import yaml

from metagpt.configs.models_config import ModelsConfig
from metagpt.llm import LLM
from metagpt.minion.brain import Brain
from metagpt.minion.rpyc_python_env import RpycPythonEnv
Expand All @@ -22,7 +23,14 @@ async def smart_brain():

# Load the .env file
load_dotenv()
llm = LLM()
gpt4o_llm_config = ModelsConfig.default().get("gpt-4o")
ModelsConfig.default().get("gpt-4o-mini")
ModelsConfig.default().get("deepseek-chat")

llm = LLM(llm_config=gpt4o_llm_config)
# llm = LLM(llm_config=gpt4o_mini_llm_config)
# llm = LLM(llm_config=deepseek_llm_config)

# Load the config file
current_dir = os.path.dirname(os.path.abspath(__file__))

Expand Down Expand Up @@ -90,22 +98,22 @@ async def smart_brain():
#
# cache_plan = os.path.join(current_file_dir, "aime", "plan_gpt4o.3.json")
# obs, score, *_ = await brain.step(
# query="Alice and Bob play the following game. A stack of $n$ tokens lies before them. The players take turns with Alice going first. On each turn, the player removes $1$ token or $4$ tokens from the stack. The player who removes the last token wins. Find the number of positive integers $n$ less than or equal to $2024$ such that there is a strategy that guarantees that Bob wins, regardless of Alice’s moves.",
# route="native",
# query="Every morning, Aya does a $9$ kilometer walk, and then finishes at the coffee shop. One day, she walks at $s$ kilometers per hour, and the walk takes $4$ hours, including $t$ minutes at the coffee shop. Another morning, she walks at $s+2$ kilometers per hour, and the walk takes $2$ hours and $24$ minutes, including $t$ minutes at the coffee shop. This morning, if she walks at $s+\frac12$ kilometers per hour, how many minutes will the walk take, including the $t$ minutes at the coffee shop?",
# route="dcot",
# dataset="aime 2024",
# cache_plan=cache_plan,
# check=False
# )
# print(obs)

# cache_plan = os.path.join(current_file_dir, "aime", "plan_gpt4o.7.json")
#
# obs, score, *_ = await brain.step(
# query="Find the largest possible real part of\[(75+117i)z+\frac{96+144i}{z}\]where $z$ is a complex number with $|z|=4$.",
# route="cot",
# dataset="aime 2024",
# cache_plan=cache_plan,
# )
# print(obs)
obs, score, *_ = await brain.step(
query="Find the largest possible real part of\[(75+117i)z+\frac{96+144i}{z}\]where $z$ is a complex number with $|z|=4$.",
route="dcot",
dataset="aime 2024",
check=False,
)
print(obs)

# obs, score, *_ = await brain.step(
# query="Real numbers $x$ and $y$ with $x,y>1$ satisfy $\log_x(y^x)=\log_y(x^{4y})=10.$ What is the value of $xy$?",
Expand Down
4 changes: 1 addition & 3 deletions examples/smart_minion/evalute_aime.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ async def main():
data = load_dataset("qq8933/AIME_1983_2024", split="train")

#
json_storer = JsonStatsStorer("logs/stats_output.json")
json_storer = JsonStatsStorer("logs/aime_stats_output.json")

# tracker = AsyncStatsTracker(stats_db_url)
# In your main function or wherever you set up your application
Expand All @@ -362,10 +362,8 @@ async def main():

correct, count, matched_ids, mismatched_ids = await evaluate_dataset(
data,
to_processed_id=None,
concurrency_count=1,
stats_storer=stats_storer,
start_id=None,
continue_process=True,
run_filename="run_aime.json",
)
Expand Down
2 changes: 1 addition & 1 deletion examples/smart_minion/gsm8k/evalute_gsm8k_re2.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ async def main():
# data = await load_data_sample(file_name, samples=1055)

correct, count, matched_ids, mismatched_ids = await evaluate_dataset(
data, run_filename="run_gsm8k_deepseek_re2.json", continue_process=True, concurrency_count=1
data, run_filename="run_gsm8k_deepseek_re2.json", continue_process=True, concurrency_count=70
)

print(f"Accuracy: {correct/count:.2%}")
Expand Down
3 changes: 2 additions & 1 deletion metagpt/minion/input.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,8 @@ class Input(BaseModel):
score_func: Any = None

answer: str = "" # the extracted final answer
solution: str = ""
answer_code: str = "" # the extracted final answer
full_output: str = ""
raw_answer: str = "" # the complete answer with cot thought
feedback: str = "" # the feedback for improvement

Expand Down
4 changes: 2 additions & 2 deletions metagpt/minion/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def apply_attention_enhancement(self, perceptions, step):
for field in step["apply_to"]:
if field in perceptions:
for _ in range(step.get("repeat", 1)):
perceptions[field] = self.apply_re2(perceptions[field])
perceptions[field] = self.apply_re2(field, perceptions[field])
return perceptions

async def apply_semantic_refinement(self, perceptions, step):
Expand All @@ -55,7 +55,7 @@ async def apply_semantic_refinement(self, perceptions, step):
perceptions[field] = await self.apply_rephrase(perceptions[field])
return perceptions

def apply_re2(self, text):
def apply_re2(self, field, text):
if not text:
return text
return f"{text}\nRead the above again: {text}"
Expand Down
95 changes: 90 additions & 5 deletions metagpt/minion/prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,12 +406,19 @@
"""
)
tmp = """
Solution:
{{input.solution}}
{% if input.full_output %}
Full Output:
{{ input.full_output }}
{% endif %}
{% if input.answer_code %}
Answer Code:
{{ input.answer_code }}
{% endif %}
Answer:
{{
input.answer}}
"""
{{input.answer}}
"""
CHECK_PROMPT = f"""Given the following problem details:
{ASK_PROMPT_JINJA}
Expand All @@ -431,6 +438,39 @@
"""

CHECK_PROMPT1 = f"""Given the following problem details:
{ASK_PROMPT_JINJA}
{tmp}
Given the complete solution process and final answer above, evaluate:
1. Process Validation:
- Are the thinking steps logical and complete?
- Are mathematical derivations correct?
- Are units handled properly?
- Is step counting accurate?
2. Answer Validation:
- Does the final answer follow from the steps?
- Is it numerically correct?
- Are units correct?
Your feedback should be structured as:
<root>
<process_check>
Evaluate the solution process
</process_check>
<answer_check>
Evaluate the final answer
</answer_check>
<correct>true/false</correct>
<score>score value</score>
</root>
"""

DOT_PROMPT = (
"""
# Diagram of Thought Iterative Reasoning Prompt
Expand Down Expand Up @@ -504,3 +544,48 @@
"""
+ ASK_PROMPT_JINJA
)

DCOT_PROMPT = (
"""
You are an AI assistant designed to solve complex problems by dynamically reasoning through multiple perspectives, employing reflection, and adapting your approach as new information emerges. Your task is to solve the problem step by step, incorporating deep reasoning, critical reflection, and strategic adjustments throughout the process.
Thinking and Perspective Exploration:
Enclose all thoughts within <thinking> tags. Examine the problem from multiple angles, exploring alternative approaches and considering possible solutions or errors.
Be open to unconventional thinking, challenging assumptions, and exploring edge cases or rare conditions.
Step-by-Step Breakdown:
Use <step> tags to break down the solution into clear, logical steps. Start with a 50-step budget, requesting more if the problem demands additional complexity.
After each step, indicate the remaining budget with <count> tags and evaluate whether the approach is on track. Adjust if needed.
Reflection and Progress Evaluation:
After every 3 steps, perform a detailed self-reflection using <reflection> tags. Critically assess your progress, and consider potential biases, assumptions, and alternative viewpoints.
Assign a reward score between 0.0 and 1.0 after each reflection, using the following criteria:
0.8+: Continue the current approach.
0.5-0.7: Consider minor adjustments or refinements.
Below 0.5: Reevaluate the approach and consider backtracking or starting fresh with an alternate strategy.
Dynamic Reasoning Adjustments:
If a low reward score is assigned, justify backtracking or changing your approach within <thinking> tags. Be explicit about your reasoning and decision-making process.
If you are uncertain, simulate different potential paths and compare outcomes before choosing the optimal approach.
Mathematical and Formal Reasoning:
For mathematical problems, show all work in detail using LaTeX for formal notation. Provide detailed proofs or calculations to support your conclusions.
Multi-Solution Comparison:
Whenever feasible, explore multiple methods to reach the solution. Compare their effectiveness within <reflection> tags and assess their strengths and weaknesses.
Synthesizing the Final Answer:
Once all steps are complete and you've settled on the best approach, synthesize your final answer using <answer> tags. Provide a concise, well-reasoned summary of your solution, explaining why it is the most effective.
Final Reflection and Reward:
Conclude with a final reflection on the overall solution. Discuss the effectiveness of your approach, the challenges faced, and any learning opportunities encountered along the way.
Assign a final reward score (0.0 to 1.0) based on the overall quality of your solution.
Exploration of Broader Implications:
When applicable, consider the broader implications of your solution. What insights can be drawn from the process? Are there larger principles or concepts that apply?
By incorporating multi-step reasoning, critical reflection, and adaptive problem-solving, you will dynamically develop the best solution while learning from each phase of the process.
"""
+ ASK_PROMPT_JINJA
)
35 changes: 33 additions & 2 deletions metagpt/minion/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
ASK_PROMPT,
ASK_PROMPT_JINJA,
COT_PROBLEM_INSTRUCTION,
DCOT_PROMPT,
DOT_PROMPT,
IDENTIFY_PROMPT,
MATH_PLAN_PROMPT,
Expand Down Expand Up @@ -77,7 +78,16 @@ def extract_final_answer(text):
if match_tag:
return match_tag.group(1).strip()

return None
return text


def extract_answer(text):
# Match for <final_answer> tag
match_tag = re.search(r"<answer>\s*(.*?)\s*</answer>", text, re.DOTALL)
if match_tag:
return match_tag.group(1).strip()

return text


class MetaPlan(BaseModel):
Expand Down Expand Up @@ -287,6 +297,27 @@ async def execute(self):
return self.answer # maybe also adds score?


# https://x.com/_philschmid/status/1842846050320544016
@register_route_downstream
class DcotMinion(Minion):
"""Dynamic Chain of Thought Strategy"""

def __init__(self, **kwargs):
super().__init__(**kwargs)
self.input.instruction = ""

async def execute(self):
node = ActionNode(key="answer", expected_type=str, instruction="", example="")
prompt = Template(DCOT_PROMPT)
prompt = prompt.render(input=self.input)
node = await node.fill(context=prompt, llm=self.brain.llm, schema="raw")
self.answer_node = node
self.answer = self.input.answer = extract_answer(node.content)

self.raw_answer = self.input.raw_answer = node.content
return self.answer # maybe also adds score?


@register_route_downstream
class MultiPlanMinion(Minion):
"This Strategy will first generate multiple plan, and then compare each plan, see which one is more promising to produce good result, first try most promising plan, then to less promising plan."
Expand Down Expand Up @@ -550,7 +581,7 @@ def extract_code(text):

# deepseek may still put ```python...``` in the returned json
code = extract_code(node.content)
self.answer_code = self.input.solution = code
self.answer_code = self.input.answer_code = code

self.input.run_id = self.input.run_id or uuid.uuid4()
result = self.brain.python_env.step(f"<id>{self.input.query_id}/{self.input.run_id}</id>{code}")
Expand Down

0 comments on commit 4d2d154

Please sign in to comment.