Skip to content

Commit

Permalink
Merge commit '07a6dacf33141fdd176c5870574cbba5b73c27e3' into mtbench101
Browse files Browse the repository at this point in the history
  • Loading branch information
xingyuanbu committed May 26, 2024
2 parents 3da589c + 07a6dac commit 880f00e
Show file tree
Hide file tree
Showing 1,217 changed files with 32,233 additions and 19,229 deletions.
2 changes: 1 addition & 1 deletion .github/scripts/oc_score_assert.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

output_path = 'regression_result_daily'

model_list = ['internlm-7b-hf', 'internlm-chat-7b-hf', 'chatglm3-6b-base-hf']
model_list = ['internlm2-7b-hf', 'internlm-chat-7b-hf', 'chatglm3-6b-base-hf']
dataset_list = [
'ARC-c', 'chid-dev', 'chid-test', 'openai_humaneval', 'openbookqa',
'openbookqa_fact'
Expand Down
30 changes: 19 additions & 11 deletions .github/scripts/oc_score_baseline.yaml
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
internlm-7b-hf:
ARC-c: 36.27
chid-dev: 81.68
chid-test: 83.67
openai_humaneval: 10.37
openbookqa: 44.4
openbookqa_fact: 73.2
ARC-c: 34.24
chid-dev: 79.70
chid-test: 81.12
openai_humaneval: 10.98
openbookqa: 47.20
openbookqa_fact: 74.00

internlm-chat-7b-hf:
ARC-c: 36.95
Expand All @@ -15,9 +15,17 @@ internlm-chat-7b-hf:
openbookqa_fact: 80.4

chatglm3-6b-base-hf:
ARC-c: 43.05
chid-dev: 80.2
chid-test: 80.77
ARC-c: 44.41
chid-dev: 78.22
chid-test: 78.57
openai_humaneval: 20.73
openbookqa: 79.8
openbookqa_fact: 92.2
openbookqa: 78.40
openbookqa_fact: 92.00

internlm2-7b-hf:
ARC-c: 34.92
chid-dev: 55.94
chid-test: 53.70
openai_humaneval: 44.51
openbookqa: 83.00
openbookqa_fact: 83.00
77 changes: 77 additions & 0 deletions .github/scripts/pr_oc_score_assert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import csv
import os

import pytest

output_path = 'regression_result'
model = 'internlm2-chat-7b-hf'
dataset = 'siqa'


@pytest.fixture()
def result_scores():
file = find_csv_files(output_path)
if file is None:
return None
return read_csv_file(file)


@pytest.mark.usefixtures('result_scores')
class TestChatScore:
"""Test cases for chat model."""

def test_model_dataset_score(self, result_scores):
result_score = result_scores.get(model).get(dataset)
assert_score(result_score, 79.53)


def assert_score(score, baseline):
if score is None or score == '-':
assert False, 'value is none'
if float(score) < (baseline * 1.03) and float(score) > (baseline * 0.97):
print(score + ' between ' + str(baseline * 0.97) + ' and ' +
str(baseline * 1.03))
assert True
else:
assert False, score + ' not between ' + str(
baseline * 0.97) + ' and ' + str(baseline * 1.03)


def find_csv_files(directory):
csv_files = []
for root, dirs, files in os.walk(directory):
for file in files:
if file.endswith('.csv'):
csv_files.append(os.path.join(root, file))
if len(csv_files) > 1:
raise 'have more than 1 result file, please check the result manually'
if len(csv_files) == 0:
return None
return csv_files[0]


def read_csv_file(file_path):
with open(file_path, 'r') as csvfile:
reader = csv.DictReader(csvfile)
filtered_data = []

for row in reader:
filtered_row = {
k: v
for k, v in row.items()
if k not in ['version', 'metric', 'mode']
}
filtered_data.append(filtered_row)

result = {}
for data in filtered_data:
dataset = data.get('dataset')
for key in data.keys():
if key == 'dataset':
continue
else:
if key in result.keys():
result.get(key)[dataset] = data.get(key)
else:
result[key] = {dataset: data.get(key)}
return result
11 changes: 7 additions & 4 deletions .github/workflows/daily-run-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ env:
PIP_CACHE_PATH: /cpfs01/user/qa-llm-cicd/.cache/pip
USERSPACE_PREFIX: /cpfs01/user/qa-llm-cicd
HF_CACHE_PATH: /cpfs01/shared/public/public_hdd/llmeval/model_weights/hf_hub
HF_DATASETS_OFFLINE: 1
TRANSFORMERS_OFFLINE: 1
HF_HUB_OFFLINE: 1

jobs:
daily_run_test:
Expand All @@ -35,22 +38,21 @@ jobs:
eval "$(conda shell.bash hook)"
conda activate ${{env.CONDA_ENV}}
pip install -e . --cache-dir ${{env.PIP_CACHE_PATH}}
pip install human_eval transformers==4.33.0 --cache-dir ${{env.PIP_CACHE_PATH}}
pip install human_eval transformers==4.33.0 protobuf --cache-dir ${{env.PIP_CACHE_PATH}}
conda info --envs
- name: Prepare - prepare data and hf model
run: |
cp -r ${{env.USERSPACE_PREFIX}}/data .
rm -rf ~/.cache/huggingface/hub -f && mkdir ~/.cache -p && mkdir ~/.cache/huggingface -p
ln -s ${{env.HF_CACHE_PATH}} ~/.cache/huggingface/hub
export HF_DATASETS_OFFLINE=1; export TRANSFORMERS_OFFLINE=1;
- name: Run test
run: |
eval "$(conda shell.bash hook)"
conda activate ${{env.CONDA_ENV}}
conda info --envs
rm -rf regression_result_daily
export from_tf=TRUE
python3 run.py --models hf_internlm_chat_7b hf_internlm_7b hf_chatglm3_6b_base hf_chatglm3_6b hf_qwen_7b_chat hf_qwen_7b --datasets FewCLUE_chid_ppl humaneval_gen ARC_c_ppl obqa_ppl --work-dir regression_result_daily
python3 run.py --models hf_internlm_chat_7b hf_internlm2_7b hf_chatglm3_6b_base hf_chatglm3_6b hf_qwen_7b_chat hf_qwen_7b --datasets FewCLUE_chid_ppl humaneval_gen ARC_c_ppl obqa_ppl --work-dir regression_result_daily
- name: Get result
run: |
eval "$(conda shell.bash hook)"
Expand All @@ -59,8 +61,9 @@ jobs:
- name: Remove Conda Env
if: always()
run: |
cp -r regression_result_daily/* /cpfs01/user/qa-llm-cicd/report
eval "$(conda shell.bash hook)"
conda env remove --name ${{env.CONDA_ENV}}
conda env remove -y --name ${{env.CONDA_ENV}}
conda info --envs
notify_to_feishu:
Expand Down
6 changes: 5 additions & 1 deletion .github/workflows/link-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,8 @@ jobs:
- name: linkchecker
run: |
pip install linkchecker
linkchecker https://opencompass.readthedocs.io/ --no-robots -t 30
linkchecker https://opencompass.readthedocs.io/ --no-robots -t 30 --no-warnings |
--ignore-url https://opencompass\.readthedocs\.io/.*/static/images/opencompass_logo\.svg |
--ignore-url https://opencompass\.readthedocs\.io/.*/_static/images/icon-menu-dots\.svg |
--ignore-url https://opencompass\.readthedocs\.io/policy |
--ignore-url https://opencompass\.readthedocs\.io/(en|zh_CN)/[0-9a-f]{40}/.*
12 changes: 7 additions & 5 deletions .github/workflows/pr-run-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ env:
CONDA_ENV: opencompass_base
USERSPACE_PREFIX: /cpfs01/user/qa-llm-cicd
HF_CACHE_PATH: /cpfs01/shared/public/public_hdd/llmeval/model_weights/hf_hub
HF_DATASETS_OFFLINE: 1
TRANSFORMERS_OFFLINE: 1
HF_HUB_OFFLINE: 1

jobs:
pr_run_test:
Expand All @@ -42,21 +45,20 @@ jobs:
cp -r ${{env.USERSPACE_PREFIX}}/data .
rm -rf ~/.cache/huggingface/hub -f && mkdir ~/.cache -p && mkdir ~/.cache/huggingface -p
ln -s ${{env.HF_CACHE_PATH}} ~/.cache/huggingface/hub
export HF_DATASETS_OFFLINE=1; export TRANSFORMERS_OFFLINE=1;
- name: Run test
run: |
eval "$(conda shell.bash hook)"
conda activate ${{env.CONDA_ENV}}
conda info --envs
rm -rf regression_result
python3 run.py --models hf_internlm_chat_7b --datasets siqa_gen --work-dir regression_result --debug
python3 run.py --models hf_internlm2_chat_7b --datasets siqa_gen --work-dir regression_result --debug
- name: Get result
run: |
score=$(sed -n '$p' regression_result/*/summary/*.csv | awk -F ',' '{print $NF}')
if (( ${score%.*} >= 70 && ${score%.*} <= 75 )); then
echo "score is $score between 70 and 75"
if (( ${score%.*} >= 79 && ${score%.*} <= 81 )); then
echo "score is $score between 79 and 81"
else
echo "score is $score not between 70 and 75"
echo "score is $score not between 79 and 81"
exit 1
fi
rm -rf regression_result
Expand Down
12 changes: 6 additions & 6 deletions .github/workflows/pr-stage-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ jobs:
run: python -m pip install -e .
- name: Prepare dataset
run: |
wget https://github.com/open-compass/opencompass/releases/download/0.1.8.rc1/OpenCompassData-core-20231110.zip
unzip OpenCompassData-core-20231110.zip
wget https://github.com/open-compass/opencompass/releases/download/0.2.2.rc1/OpenCompassData-core-20240207.zip
unzip OpenCompassData-core-20240207.zip
- name: Dry run test
run: |
python run.py --models hf_opt_125m --datasets siqa_gen winograd_ppl --dry-run
Expand Down Expand Up @@ -85,8 +85,8 @@ jobs:
run: python -m pip install -e .
- name: Prepare dataset
run: |
wget https://github.com/open-compass/opencompass/releases/download/0.1.8.rc1/OpenCompassData-core-20231110.zip
unzip OpenCompassData-core-20231110.zip
wget https://github.com/open-compass/opencompass/releases/download/0.2.2.rc1/OpenCompassData-core-20240207.zip
unzip OpenCompassData-core-20240207.zip
- name: Dry run test
run: |
python run.py --models hf_opt_125m --datasets siqa_gen winograd_ppl --dry-run
Expand Down Expand Up @@ -114,8 +114,8 @@ jobs:
run: pip install -e .
- name: Prepare dataset
run: |
Invoke-WebRequest -Uri https://github.com/open-compass/opencompass/releases/download/0.1.8.rc1/OpenCompassData-core-20231110.zip -OutFile OpenCompassData-core-20231110.zip
unzip OpenCompassData-core-20231110.zip
Invoke-WebRequest -Uri https://github.com/open-compass/opencompass/releases/download/0.2.2.rc1/OpenCompassData-core-20240207.zip -OutFile OpenCompassData-core-20240207.zip
unzip OpenCompassData-core-20240207.zip
- name: Dry run test
run: |
python run.py --models hf_opt_125m --datasets siqa_gen winograd_ppl --dry-run
12 changes: 12 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@

output_*/
outputs/
scripts/
icl_inference_output/
.vscode/
tmp/
Expand All @@ -10,8 +11,11 @@ configs/secrets.py
configs/datasets/log.json
configs/eval_debug*.py
configs/viz_*.py
configs/**/*_bkup.py
opencompass/**/*_bkup.py
data
work_dirs
outputs
models/*
configs/internal/
# Byte-compiled / optimized / DLL files
Expand Down Expand Up @@ -91,8 +95,12 @@ docs/zh_cn/_build/

# sft config ignore list
configs/sft_cfg/*B_*
configs/sft_cfg/1B/*
configs/sft_cfg/7B/*
configs/sft_cfg/20B/*
configs/sft_cfg/60B/*
configs/sft_cfg/100B/*

configs/cky/
# in case llama clone in the opencompass
llama/
Expand All @@ -117,6 +125,10 @@ turbomind/
*.txt
*.jpg
*.json
*.jsonl
*.csv
*.npy
*.c

# aliyun
core.*
2 changes: 1 addition & 1 deletion .owners.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,6 @@ assign:
assignees:
- Leymore
- bittersweet1999
- yingfhu
- liushz
- kennymckormick
- tonysy
8 changes: 3 additions & 5 deletions .pre-commit-config-zh-cn.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ exclude: |
opencompass/datasets/medbench/|
opencompass/datasets/teval/|
opencompass/datasets/NPHardEval/|
opencompass/datasets/TheoremQA|
docs/zh_cn/advanced_guides/compassbench_intro.md
)
repos:
Expand Down Expand Up @@ -44,26 +45,23 @@ repos:
(?x)^(
dicts/|
projects/.*?/dicts/|
configs/
configs/.*?/.*\.txt
)
- id: check-yaml
- id: end-of-file-fixer
exclude: |
(?x)^(
dicts/|
projects/.*?/dicts/|
configs/
configs/.*?/.*\.txt
)
- id: requirements-txt-fixer
- id: double-quote-string-fixer
exclude: configs/
- id: check-merge-conflict
- id: fix-encoding-pragma
args: ["--remove"]
- id: mixed-line-ending
args: ["--fix=lf"]
- id: mixed-line-ending
args: ["--fix=lf"]
- repo: https://gitee.com/openmmlab/mirrors-mdformat
rev: 0.7.9
hooks:
Expand Down
8 changes: 3 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ exclude: |
opencompass/datasets/medbench/|
opencompass/datasets/teval/|
opencompass/datasets/NPHardEval/|
opencompass/datasets/TheoremQA|
docs/zh_cn/advanced_guides/compassbench_intro.md
)
repos:
Expand Down Expand Up @@ -44,26 +45,23 @@ repos:
(?x)^(
dicts/|
projects/.*?/dicts/|
configs/
configs/.*?/.*\.txt
)
- id: check-yaml
- id: end-of-file-fixer
exclude: |
(?x)^(
dicts/|
projects/.*?/dicts/|
configs/
configs/.*?/.*\.txt
)
- id: requirements-txt-fixer
- id: double-quote-string-fixer
exclude: configs/
- id: check-merge-conflict
- id: fix-encoding-pragma
args: ["--remove"]
- id: mixed-line-ending
args: ["--fix=lf"]
- id: mixed-line-ending
args: ["--fix=lf"]
- repo: https://github.com/executablebooks/mdformat
rev: 0.7.9
hooks:
Expand Down
Loading

0 comments on commit 880f00e

Please sign in to comment.