diff --git a/examples/smart_minion/human_eval/evalute_human_eval_error.py b/examples/smart_minion/human_eval/evalute_human_eval_error.py index aed50220..1d79113f 100644 --- a/examples/smart_minion/human_eval/evalute_human_eval_error.py +++ b/examples/smart_minion/human_eval/evalute_human_eval_error.py @@ -284,13 +284,13 @@ async def main(): # 从原始数据集中获取对应的完整数据 if idx < len(original_data): mismatched_data.append(original_data[idx]) - + # 使用新的数据集运行评估 correct, count, matched_ids, mismatched_ids = await evaluate_dataset( mismatched_data, run_filename=f"run_human_eval_ldb_{model}0.json", continue_process=True, - concurrency_count=1 + concurrency_count=60 ) print(f"Accuracy: {correct/count:.2%}") diff --git a/examples/smart_minion/human_eval/humaneval_public_test.jsonl b/examples/smart_minion/human_eval/humaneval_public_test.jsonl index b5014bc4..ee717d7e 100755 --- a/examples/smart_minion/human_eval/humaneval_public_test.jsonl +++ b/examples/smart_minion/human_eval/humaneval_public_test.jsonl @@ -30,6 +30,7 @@ {"problem_id": "HumanEval/29", "test": ["assert candidate([], 'a') == []", "assert candidate(['abc', 'bcd', 'cde', 'array'], 'a') == ['abc', 'array']"], "entry_point": "filter_by_prefix"} {"problem_id": "HumanEval/30", "test": ["assert candidate([-1, 2, -4, 5, 6]) == [2, 5, 6]", "assert candidate([5, 3, -5, 2, -3, 3, 9, 0, 123, 1, -10]) == [5, 3, 2, 3, 9, 123, 1]"], "entry_point": "get_positive"} {"problem_id": "HumanEval/31", "test": ["assert candidate(6) == False", "assert candidate(101) == True", "assert candidate(11) == True", "assert candidate(13441) == True", "assert candidate(61) == True", "assert candidate(4) == False", "assert candidate(1) == False"], "entry_point": "is_prime"} +{"problem_id": "HumanEval/32", "test": ["assert round(find_zero([1, 2]), 2) == -0.5", "assert round(find_zero([-6, 11, -6, 1]), 2) == 1.0"], "entry_point": "find_zero"} {"problem_id": "HumanEval/33", "test": ["assert candidate([1, 2, 3]) == [1, 2, 3]", "assert candidate([5, 6, 3, 4, 8, 9, 2]) == [2, 6, 3, 4, 8, 9, 5"], "entry_point": "sort_third"} {"problem_id": "HumanEval/34", "test": ["assert candidate([5, 3, 5, 2, 3, 3, 9, 0, 123]) == [0, 2, 3, 5, 9, 123]"], "entry_point": "unique"} {"problem_id": "HumanEval/35", "test": ["assert candidate([1, 2, 3]) == 3", "assert candidate([5, 3, -5, 2, -3, 3, 9, 0, 123, 1, -10]) == 123"], "entry_point": "max_element"} diff --git a/minion/main/improve_route.py b/minion/main/improve_route.py index d1bf1928..89fa87cf 100644 --- a/minion/main/improve_route.py +++ b/minion/main/improve_route.py @@ -1,4 +1,5 @@ from enum import Enum, auto +from typing import Dict, Type class ImproveRoute(Enum): """改进路由的枚举类""" @@ -8,8 +9,14 @@ class ImproveRoute(Enum): @classmethod def get_route(cls, route_name: str) -> "ImproveRoute": - """根据字符串获取对应的改进路由""" - try: - return cls(route_name.lower()) #or use llm to recommendend improve route? - except ValueError: - return cls.FEEDBACK # 默认返回 feedback 路由 \ No newline at end of file + """根据字符串获取对应的改进路由 + + Args: + route_name: 路由名称字符串 + + Returns: + ImproveRoute: 匹配的改进路由,如果没有匹配项则返回默认的 FEEDBACK 路由 + """ + from minion.main.minion import IMPROVER_MINIONS + route_name = route_name.lower() + return IMPROVER_MINIONS.get(route_name, cls.FEEDBACK) \ No newline at end of file diff --git a/minion/main/minion.py b/minion/main/minion.py index 5bfc89ac..35a69da2 100644 --- a/minion/main/minion.py +++ b/minion/main/minion.py @@ -150,17 +150,17 @@ async def execute(self): async def improve(self): # 获取改进路由 route_name = getattr(self.input, 'improve_route', 'feedback') - improve_route = ImproveRoute.get_route(route_name) + improver_cls = ImproveRoute.get_route(route_name) - # 获取对应的 improver class - improver_cls = IMPROVER_MINIONS.get(improve_route.value) if improver_cls: improver = improver_cls( input=self.input, brain=self.brain, worker=self ) - return await improver.execute() + self.answer = await improver.execute() + return self.answer # fallback - return await self.execute() + self.answer = await self.execute() + return self.answer diff --git a/minion/main/prompt.py b/minion/main/prompt.py index 66abbc34..04caeb06 100644 --- a/minion/main/prompt.py +++ b/minion/main/prompt.py @@ -405,6 +405,15 @@ please ensure you correctly indent the code, and don't use // as comment """ ) +#try not to use sympy +PYTHON_PROMPT = ( + """ + Write python code to solve the problem, also noted the python program must print out answer""" + + COMMON_ERROR + + """Please ensure all the variables are defined, don't use variables before defining them + please ensure you correctly indent the code, and don't use // as comment + """ +) tmp = """ {% if input.full_output %} Full Output: