diff --git a/examples/smart_minion/human_eval/evalute_human_eval_error.py b/examples/smart_minion/human_eval/evalute_human_eval_error.py index f8e0e33a..e1ea5d5d 100644 --- a/examples/smart_minion/human_eval/evalute_human_eval_error.py +++ b/examples/smart_minion/human_eval/evalute_human_eval_error.py @@ -304,7 +304,7 @@ async def main(): # 使用新的数据集运行评估 correct, count, matched_ids, mismatched_ids = await evaluate_dataset( mismatched_data, - run_filename=f"run_human_eval_test_{model}1.json", + run_filename=f"run_human_eval_test_{model}4.json", continue_process=True, concurrency_count=60 )