diff --git a/README.md b/README.md index fcfb9282..8052e56a 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,32 @@ Minion is designed to execute any type of queries, offering a variety of feature Minion +## Working Principle + +The following flowchart illustrates how Minion collaborates: + +```mermaid +flowchart LR + A[Query] --> B[Solution] + B --> C{Check} + C -->|Not Satisfied| D[Improve] + D -->|Callback| B + C -->|Satisfied| E[Final Result] + + style A fill:#f96,stroke:#333,stroke-width:2px + style B fill:#69f,stroke:#333,stroke-width:2px + style C fill:#f69,stroke:#333,stroke-width:2px + style D fill:#96f,stroke:#333,stroke-width:2px + style E fill:#6f9,stroke:#333,stroke-width:2px +``` + +The flowchart demonstrates the complete process from query to final result: +1. First receives the user query (Query) +2. System generates a solution (Solution) +3. Performs solution verification (Check) +4. If unsatisfactory, makes improvements (Improve) and returns to generate new solutions +5. If satisfactory, outputs the final result (Final Result) + ## Benchmarks Minion has achieved impressive results on various benchmarks: diff --git a/examples/smart_minion/brain.py b/examples/smart_minion/brain.py index 554a3268..04d0eeb6 100644 --- a/examples/smart_minion/brain.py +++ b/examples/smart_minion/brain.py @@ -28,7 +28,7 @@ async def smart_brain(): brain = Brain( python_env=RpycPythonEnv(port=python_env_config.get("port", 3007)), llm=llm, - llms={"route": [ "llama3.2","llama3.1"]} + #llms={"route": [ "llama3.2","llama3.1"]} ) # obs, score, *_ = await brain.step(query="what's the solution for game of 24 for 4 3 9 8") # print(obs) diff --git a/examples/smart_minion/human_eval/evalute_human_eval.py b/examples/smart_minion/human_eval/evalute_human_eval.py index 2ba2d998..fa703c60 100644 --- a/examples/smart_minion/human_eval/evalute_human_eval.py +++ b/examples/smart_minion/human_eval/evalute_human_eval.py @@ -234,16 +234,34 @@ def load_execution_config(file_path): ensemble_logic = json.load(file) return ensemble_logic -async def solve_question(item, route=None): - # Implement your problem-solving logic here - # For example, this could be a math solver or text parser +async def solve_question(item): brain = Brain(stats_storer=None, python_env=RpycPythonEnv(ports=3007), llm=llm) - current_dir = os.path.dirname(os.path.abspath(__file__)) ensemble_logic_path = os.path.join(current_dir, "human_eval_config.json") - obs, score, *_ = await brain.step(query=item["prompt"], execution_config=load_execution_config(ensemble_logic_path)) - # print(obs) - return obs + # 加载测试用例 + test_cases_path = os.path.join(current_dir, "humaneval_public_test.jsonl") + test_cases = load_jsonl(test_cases_path) + # 查找对应的测试用例 + metadata = {"test_cases": []} + for test_case in test_cases: + if test_case["problem_id"] == item["task_id"]: + metadata["test_cases"] = test_case.get("test", []) + break + answer, score, *_ = await brain.step( + query="""Please provide a complete function implementation including: +- Full function definition +- All necessary logic +- Proper return statement +- Handle all edge cases + +Here is the function to implement: +""" + item["prompt"], + entry_point=item["entry_point"], + dataset="HumanEval", + execution_config=load_execution_config(ensemble_logic_path), + metadata=metadata + ) + return answer #model = "gpt-4o-mini" model = "default" @@ -255,11 +273,21 @@ async def main(): current_dir = os.path.dirname(os.path.abspath(__file__)) file_name = os.path.join(current_dir, "human_eval_test.jsonl") #file_name = os.path.join(current_dir, "humaneval_validate.jsonl") + #file_name = os.path.join(current_dir, "humaneval_one.jsonl") data = load_jsonl(file_name) # data = await load_data_sample(file_name, samples=1055) + # from datasets import load_dataset + # + # ds = load_dataset("openai/openai_humaneval") + #human_eval = ds["test"][38] + #human_eval = ds["test"][50] + # human_eval = ds["test"][32] + # + # data = [human_eval] #already a dict + correct, count, matched_ids, mismatched_ids = await evaluate_dataset( - data, run_filename=f"run_humaneval_test_python_{model}.json", continue_process=True, concurrency_count=60 + data, run_filename=f"run_humaneval_test_python_ldb_{model}1.json", continue_process=True, concurrency_count=60 ) print(f"Accuracy: {correct/count:.2%}") diff --git a/minion/main/prompt.py b/minion/main/prompt.py index 04caeb06..9912d755 100644 --- a/minion/main/prompt.py +++ b/minion/main/prompt.py @@ -409,7 +409,8 @@ PYTHON_PROMPT = ( """ Write python code to solve the problem, also noted the python program must print out answer""" - + COMMON_ERROR + #+ COMMON_ERROR + #+ COMMON_SYMPY_ERROR + """Please ensure all the variables are defined, don't use variables before defining them please ensure you correctly indent the code, and don't use // as comment """ diff --git a/minion/main/worker.py b/minion/main/worker.py index 117132b6..1f376134 100644 --- a/minion/main/worker.py +++ b/minion/main/worker.py @@ -60,7 +60,7 @@ ) from minion.utils.answer_extraction import extract_final_answer, extract_longest_json_from_string, extract_python, \ extract_answer - +-->Worker-->Check-->Improve(woker againim) class WorkerMinion(Minion): pass