Skip to content

Commit

Permalink
fit newest opencompass
Browse files Browse the repository at this point in the history
  • Loading branch information
xingyuanbu committed May 29, 2024
1 parent 228aa0d commit 6976666
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 26 deletions.
19 changes: 7 additions & 12 deletions configs/eval_subjective_mtbench101.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,7 @@
from .datasets.subjective.multiround.mtbench101_judge import subjective_datasets


from opencompass.models import HuggingFaceCausalLM, HuggingFace, HuggingFaceChatGLM3
from opencompass.models.openai_api import OpenAIAllesAPIN
# from opencompass.models.idealab_api import IdeaLabAllesAPIN
from opencompass.models import HuggingFaceCausalLM, HuggingFace, HuggingFaceChatGLM3, OpenAI
from opencompass.partitioners import NaivePartitioner, SizePartitioner
from opencompass.partitioners.sub_naive import SubjectiveNaivePartitioner
from opencompass.partitioners.sub_size import SubjectiveSizePartitioner
Expand Down Expand Up @@ -74,7 +72,6 @@
partitioner=dict(type=SizePartitioner, max_task_size=10000),
runner=dict(
type=SlurmSequentialRunner,
# type=LocalRunner,
partition='llm_dev2',
quotatype='auto',
max_num_workers=32,
Expand All @@ -85,28 +82,26 @@
# -------------Evalation Stage ----------------------------------------

## ------------- JudgeLLM Configuration
judge_model = dict(
judge_models = [dict(
abbr='GPT4-Turbo',
type=OpenAIAllesAPIN,
# type=IdeaLabAllesAPIN,
type=OpenAI,
path='',
key='', # The key will be obtained from $OPENAI_API_KEY, but you can write down your key here as well
url='https://api.openai.com/v1',
meta_template=api_meta_template,
query_per_second=16,
max_out_len=4096,
max_seq_len=4096,
batch_size=8,
temperature=0,
)
temperature=0.8,
)]

## ------------- Evaluation Configuration



eval = dict(
partitioner=dict(type=SubjectiveSizePartitioner, max_task_size=10000, mode='singlescore', models=models),
runner=dict(type=LocalRunner, max_num_workers=32, task=dict(type=SubjectiveEvalTask, judge_cfg=judge_model)),
partitioner=dict(type=SubjectiveSizePartitioner, max_task_size=100000, mode='singlescore', models=models, judge_models=judge_models),
runner=dict(type=LocalRunner, max_num_workers=32, task=dict(type=SubjectiveEvalTask)),
)

summarizer = dict(type=MTBench101Summarizer, judge_type='single')
Expand Down
8 changes: 0 additions & 8 deletions opencompass/datasets/subjective/mtbench101.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,22 +272,14 @@ def load(self, path: str, name: str):
# filename = osp.join(path, 'mtbench101.jsonl')
dataset = DatasetDict()
raw_data = []
print('load...........')


lines = open(filename, 'r', encoding='utf-8').readlines()
conversations = []
for line in lines:
line = json.loads(line)
conversations.append(line)


step = 0
print('=========dataset========================')

for dialogue in conversations:


multi_id = dialogue['id']
task = dialogue['task']
if task in skip_first_tasks:
Expand Down
8 changes: 3 additions & 5 deletions opencompass/summarizers/subjective/mtbench101.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,7 @@ def post_process_mtbench101(judgement: str):

else:
return None
print('=========judgement=========')
print(judgement)


return {'score': score,'judgement':judgement}


Expand Down Expand Up @@ -79,7 +77,7 @@ def get_final_results(judged_answers, references,output_dir,fout_flag,model):
fout = osp.join(
output_dir,
'task_score.csv')

columns = list(final_task_scores.keys())

print('================task_score=====================')
Expand Down Expand Up @@ -113,7 +111,7 @@ def __init__(self, config: ConfigDict, judge_type='single') -> None:
model_abbr_from_cfg(model) for model in self.eval_model_cfgs
]

self.judge_abbr = model_abbr_from_cfg(self.cfg['judge_model'])
self.judge_abbr = model_abbr_from_cfg(self.cfg['judge_models'][0])

self.judge_function =post_process_mtbench101

Expand Down
2 changes: 1 addition & 1 deletion readme_mtbench101.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
## 💥What's New

- **[2024.02.22]** Our paper is now accessible at https://arxiv.org/abs/2402.14762.
- **[2024.05.15]** MT-Bench-101 is accepted to the ACL 2024 main conference.
- **[2024.05.15]** MT-Bench-101 has been accepted by ACL 2024 main conference.
- **[2024.05.28]** Code and dataset are now available.

## About MT-Bench-101
Expand Down

0 comments on commit 6976666

Please sign in to comment.