readme

femto · Jan 2, 2025 · 4fc1dcc · 4fc1dcc
1 parent 673f5b7
commit 4fc1dcc
Show file tree

Hide file tree

Showing 5 changed files with 66 additions and 11 deletions.
diff --git a/README.md b/README.md
@@ -11,6 +11,32 @@ Minion is designed to execute any type of queries, offering a variety of feature
 
 <img src="assets/minion1.webp" alt="Minion" width="200" align="right">
 
+## Working Principle
+
+The following flowchart illustrates how Minion collaborates:
+
+```mermaid
+flowchart LR
+    A[Query] --> B[Solution]
+    B --> C{Check}
+    C -->|Not Satisfied| D[Improve]
+    D -->|Callback| B
+    C -->|Satisfied| E[Final Result]
+
+    style A fill:#f96,stroke:#333,stroke-width:2px
+    style B fill:#69f,stroke:#333,stroke-width:2px
+    style C fill:#f69,stroke:#333,stroke-width:2px
+    style D fill:#96f,stroke:#333,stroke-width:2px
+    style E fill:#6f9,stroke:#333,stroke-width:2px
+```
+
+The flowchart demonstrates the complete process from query to final result:
+1. First receives the user query (Query)
+2. System generates a solution (Solution)
+3. Performs solution verification (Check)
+4. If unsatisfactory, makes improvements (Improve) and returns to generate new solutions
+5. If satisfactory, outputs the final result (Final Result)
+
 ## Benchmarks
 
 Minion has achieved impressive results on various benchmarks:

diff --git a/examples/smart_minion/brain.py b/examples/smart_minion/brain.py
@@ -28,7 +28,7 @@ async def smart_brain():
     brain = Brain(
         python_env=RpycPythonEnv(port=python_env_config.get("port", 3007)), 
         llm=llm,
-        llms={"route": [ "llama3.2","llama3.1"]}
+        #llms={"route": [ "llama3.2","llama3.1"]}
     )
     # obs, score, *_ = await brain.step(query="what's the solution for game of 24 for 4 3 9 8")
     # print(obs)

diff --git a/examples/smart_minion/human_eval/evalute_human_eval.py b/examples/smart_minion/human_eval/evalute_human_eval.py
@@ -234,16 +234,34 @@ def load_execution_config(file_path):
         ensemble_logic = json.load(file)
     return ensemble_logic
 
-async def solve_question(item, route=None):
-    # Implement your problem-solving logic here
-    # For example, this could be a math solver or text parser
+async def solve_question(item):
     brain = Brain(stats_storer=None, python_env=RpycPythonEnv(ports=3007), llm=llm)
-
     current_dir = os.path.dirname(os.path.abspath(__file__))
     ensemble_logic_path = os.path.join(current_dir, "human_eval_config.json")
-    obs, score, *_ = await brain.step(query=item["prompt"], execution_config=load_execution_config(ensemble_logic_path))
-    # print(obs)
-    return obs
+    # 加载测试用例
+    test_cases_path = os.path.join(current_dir, "humaneval_public_test.jsonl")
+    test_cases = load_jsonl(test_cases_path)
+    # 查找对应的测试用例
+    metadata = {"test_cases": []}
+    for test_case in test_cases:
+        if test_case["problem_id"] == item["task_id"]:
+            metadata["test_cases"] = test_case.get("test", [])
+            break
+    answer, score, *_ = await brain.step(
+        query="""Please provide a complete function implementation including:
+- Full function definition
+- All necessary logic
+- Proper return statement
+- Handle all edge cases
+
+Here is the function to implement:
+""" + item["prompt"],
+    entry_point=item["entry_point"],
+        dataset="HumanEval",
+        execution_config=load_execution_config(ensemble_logic_path),
+        metadata=metadata
+    )
+    return answer
 
 #model = "gpt-4o-mini"
 model = "default"
@@ -255,11 +273,21 @@ async def main():
     current_dir = os.path.dirname(os.path.abspath(__file__))
     file_name = os.path.join(current_dir, "human_eval_test.jsonl")
     #file_name = os.path.join(current_dir, "humaneval_validate.jsonl")
+    #file_name = os.path.join(current_dir, "humaneval_one.jsonl")
     data = load_jsonl(file_name)
     # data = await load_data_sample(file_name, samples=1055)
 
+    # from datasets import load_dataset
+    #
+    # ds = load_dataset("openai/openai_humaneval")
+    #human_eval = ds["test"][38]
+    #human_eval = ds["test"][50]
+    # human_eval = ds["test"][32]
+    #
+    # data = [human_eval] #already a dict
+
     correct, count, matched_ids, mismatched_ids = await evaluate_dataset(
-        data, run_filename=f"run_humaneval_test_python_{model}.json", continue_process=True, concurrency_count=60
+        data, run_filename=f"run_humaneval_test_python_ldb_{model}1.json", continue_process=True, concurrency_count=60
     )
 
     print(f"Accuracy: {correct/count:.2%}")

diff --git a/minion/main/prompt.py b/minion/main/prompt.py
@@ -409,7 +409,8 @@
 PYTHON_PROMPT = (
     """
     Write python code to solve the problem, also noted the python program must print out answer"""
-    + COMMON_ERROR
+    #+ COMMON_ERROR
+    #+ COMMON_SYMPY_ERROR
     + """Please ensure all the variables are defined, don't use variables before defining them
                 please ensure you correctly indent the code, and don't use // as comment
                 """

diff --git a/minion/main/worker.py b/minion/main/worker.py
@@ -60,7 +60,7 @@
 )
 from minion.utils.answer_extraction import extract_final_answer, extract_longest_json_from_string, extract_python, \
     extract_answer
-
+-->Worker-->Check-->Improve(woker againim)
 
 class WorkerMinion(Minion):
     pass