|
| 1 | +from opencompass.openicl.icl_prompt_template import PromptTemplate |
| 2 | +from opencompass.openicl.icl_retriever import FixKRetriever |
| 3 | +from opencompass.openicl.icl_inferencer import PPLInferencer |
| 4 | +from opencompass.openicl.icl_evaluator import AccEvaluator |
| 5 | +from opencompass.datasets import CEvalDataset |
| 6 | + |
| 7 | +ceval_subject_mapping = { |
| 8 | + 'computer_network': ['Computer Network', '计算机网络', 'STEM'], |
| 9 | + 'operating_system': ['Operating System', '操作系统', 'STEM'], |
| 10 | + 'computer_architecture': ['Computer Architecture', '计算机组成', 'STEM'], |
| 11 | + 'college_programming': ['College Programming', '大学编程', 'STEM'], |
| 12 | + 'college_physics': ['College Physics', '大学物理', 'STEM'], |
| 13 | + 'college_chemistry': ['College Chemistry', '大学化学', 'STEM'], |
| 14 | + 'advanced_mathematics': ['Advanced Mathematics', '高等数学', 'STEM'], |
| 15 | + 'probability_and_statistics': ['Probability and Statistics', '概率统计', 'STEM'], |
| 16 | + 'discrete_mathematics': ['Discrete Mathematics', '离散数学', 'STEM'], |
| 17 | + 'electrical_engineer': ['Electrical Engineer', '注册电气工程师', 'STEM'], |
| 18 | + 'metrology_engineer': ['Metrology Engineer', '注册计量师', 'STEM'], |
| 19 | + 'high_school_mathematics': ['High School Mathematics', '高中数学', 'STEM'], |
| 20 | + 'high_school_physics': ['High School Physics', '高中物理', 'STEM'], |
| 21 | + 'high_school_chemistry': ['High School Chemistry', '高中化学', 'STEM'], |
| 22 | + 'high_school_biology': ['High School Biology', '高中生物', 'STEM'], |
| 23 | + 'middle_school_mathematics': ['Middle School Mathematics', '初中数学', 'STEM'], |
| 24 | + 'middle_school_biology': ['Middle School Biology', '初中生物', 'STEM'], |
| 25 | + 'middle_school_physics': ['Middle School Physics', '初中物理', 'STEM'], |
| 26 | + 'middle_school_chemistry': ['Middle School Chemistry', '初中化学', 'STEM'], |
| 27 | + 'veterinary_medicine': ['Veterinary Medicine', '兽医学', 'STEM'], |
| 28 | + 'college_economics': ['College Economics', '大学经济学', 'Social Science'], |
| 29 | + 'business_administration': ['Business Administration', '工商管理', 'Social Science'], |
| 30 | + 'marxism': ['Marxism', '马克思主义基本原理', 'Social Science'], |
| 31 | + 'mao_zedong_thought': ['Mao Zedong Thought', '毛泽东思想和中国特色社会主义理论体系概论', 'Social Science'], |
| 32 | + 'education_science': ['Education Science', '教育学', 'Social Science'], |
| 33 | + 'teacher_qualification': ['Teacher Qualification', '教师资格', 'Social Science'], |
| 34 | + 'high_school_politics': ['High School Politics', '高中政治', 'Social Science'], |
| 35 | + 'high_school_geography': ['High School Geography', '高中地理', 'Social Science'], |
| 36 | + 'middle_school_politics': ['Middle School Politics', '初中政治', 'Social Science'], |
| 37 | + 'middle_school_geography': ['Middle School Geography', '初中地理', 'Social Science'], |
| 38 | + 'modern_chinese_history': ['Modern Chinese History', '近代史纲要', 'Humanities'], |
| 39 | + 'ideological_and_moral_cultivation': ['Ideological and Moral Cultivation', '思想道德修养与法律基础', 'Humanities'], |
| 40 | + 'logic': ['Logic', '逻辑学', 'Humanities'], |
| 41 | + 'law': ['Law', '法学', 'Humanities'], |
| 42 | + 'chinese_language_and_literature': ['Chinese Language and Literature', '中国语言文学', 'Humanities'], |
| 43 | + 'art_studies': ['Art Studies', '艺术学', 'Humanities'], |
| 44 | + 'professional_tour_guide': ['Professional Tour Guide', '导游资格', 'Humanities'], |
| 45 | + 'legal_professional': ['Legal Professional', '法律职业资格', 'Humanities'], |
| 46 | + 'high_school_chinese': ['High School Chinese', '高中语文', 'Humanities'], |
| 47 | + 'high_school_history': ['High School History', '高中历史', 'Humanities'], |
| 48 | + 'middle_school_history': ['Middle School History', '初中历史', 'Humanities'], |
| 49 | + 'civil_servant': ['Civil Servant', '公务员', 'Other'], |
| 50 | + 'sports_science': ['Sports Science', '体育学', 'Other'], |
| 51 | + 'plant_protection': ['Plant Protection', '植物保护', 'Other'], |
| 52 | + 'basic_medicine': ['Basic Medicine', '基础医学', 'Other'], |
| 53 | + 'clinical_medicine': ['Clinical Medicine', '临床医学', 'Other'], |
| 54 | + 'urban_and_rural_planner': ['Urban and Rural Planner', '注册城乡规划师', 'Other'], |
| 55 | + 'accountant': ['Accountant', '注册会计师', 'Other'], |
| 56 | + 'fire_engineer': ['Fire Engineer', '注册消防工程师', 'Other'], |
| 57 | + 'environmental_impact_assessment_engineer': ['Environmental Impact Assessment Engineer', '环境影响评价工程师', 'Other'], |
| 58 | + 'tax_accountant': ['Tax Accountant', '税务师', 'Other'], |
| 59 | + 'physician': ['Physician', '医师资格', 'Other'], |
| 60 | +} |
| 61 | +ceval_all_sets = list(ceval_subject_mapping.keys()) |
| 62 | + |
| 63 | +ceval_datasets = [] |
| 64 | +for _split in ["val", "test"]: |
| 65 | + for _name in ceval_all_sets: |
| 66 | + ceval_reader_cfg = dict( |
| 67 | + input_columns=["question", "A", "B", "C", "D"], |
| 68 | + output_column="answer", |
| 69 | + train_split="dev", |
| 70 | + test_split=_split, |
| 71 | + ) |
| 72 | + |
| 73 | + _ch_name = ceval_subject_mapping[_name][1] |
| 74 | + |
| 75 | + hint = f"以下是关于{_ch_name}的单项选择题,请直接给出正确答案的选项。" |
| 76 | + question_and_options = "{question}\nA. {A}\nB. {B}\nC. {C}\nD. {D}" |
| 77 | + ceval_infer_cfg = dict( |
| 78 | + ice_template=dict( |
| 79 | + type=PromptTemplate, |
| 80 | + template={answer: f"{question_and_options}\n答案: {answer}\n" for answer in ["A", "B", "C", "D"]}, |
| 81 | + ), |
| 82 | + prompt_template=dict( |
| 83 | + type=PromptTemplate, |
| 84 | + template={answer: f"{hint}\n</E>{question_and_options}\n答案: {answer}" for answer in ["A", "B", "C", "D"]}, |
| 85 | + ice_token="</E>", |
| 86 | + ), |
| 87 | + retriever=dict(type=FixKRetriever, fix_id_list=[0, 1, 2, 3, 4]), |
| 88 | + inferencer=dict(type=PPLInferencer), |
| 89 | + ) |
| 90 | + |
| 91 | + ceval_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) |
| 92 | + |
| 93 | + ceval_datasets.append( |
| 94 | + dict( |
| 95 | + type=CEvalDataset, |
| 96 | + path="./data/ceval_internal/formal_ceval", |
| 97 | + name=_name, |
| 98 | + abbr="ceval-" + _name if _split == "val" else "ceval-test-" + _name, |
| 99 | + reader_cfg=ceval_reader_cfg, |
| 100 | + infer_cfg=ceval_infer_cfg, |
| 101 | + eval_cfg=ceval_eval_cfg, |
| 102 | + )) |
0 commit comments