Skip to content

Commit fbffc31

Browse files
add vl awq testcase and refactor pipeline testcase (InternLM#1630)
* update * update * update * add passkey in longtext testcase * update * update * updaste * update * update * update * update * update * update * update * fix evaluate * update * update * update * update * update
1 parent cd19422 commit fbffc31

16 files changed

+1421
-485
lines changed

.github/scripts/eval_opencompass_config.py

+73-6
Original file line numberDiff line numberDiff line change
@@ -495,26 +495,40 @@
495495
tb_qwen_chat_7b = dict(type=TurboMindModel,
496496
abbr='qwen-7b-chat-turbomind',
497497
path='Qwen/Qwen-7B-Chat',
498-
engine_config=tb_engine_config_template_max_bs_16,
498+
engine_config=tb_engine_config_template_max_bs_128,
499499
gen_config=qwen_gen_config_template,
500500
max_out_len=MAX_NEW_TOKENS,
501501
max_seq_len=MAX_SESSION_LEN,
502-
batch_size=16,
503-
concurrency=16,
502+
batch_size=128,
503+
concurrency=128,
504504
meta_template=qwen_meta_template,
505505
run_cfg=run_cfg_tp1_template,
506506
end_str='<|im_end|>')
507507

508+
tb_qwen_chat_7b_w4a16 = dict(
509+
type=TurboMindModel,
510+
abbr='qwen-7b-chat-4bits-turbomind',
511+
path='Qwen/Qwen-7B-Chat-inner-4bits',
512+
engine_config=tb_awq_engine_config_template_max_bs_128,
513+
gen_config=qwen_gen_config_template,
514+
max_out_len=MAX_NEW_TOKENS,
515+
max_seq_len=MAX_SESSION_LEN,
516+
batch_size=128,
517+
concurrency=128,
518+
meta_template=qwen_meta_template,
519+
run_cfg=run_cfg_tp1_template,
520+
end_str='<|im_end|>')
521+
508522
# config for qwen-chat-7b pytorch
509523
pt_qwen_chat_7b = dict(type=LmdeployPytorchModel,
510524
abbr='qwen-7b-chat-pytorch',
511525
path='Qwen/Qwen-7B-Chat',
512-
engine_config=pt_engine_config_template_max_bs_16,
526+
engine_config=pt_engine_config_template_max_bs_64,
513527
gen_config=qwen_gen_config_template,
514528
max_out_len=MAX_NEW_TOKENS,
515529
max_seq_len=MAX_SESSION_LEN,
516-
batch_size=16,
517-
concurrency=16,
530+
batch_size=64,
531+
concurrency=64,
518532
meta_template=qwen_meta_template,
519533
run_cfg=run_cfg_tp1_template,
520534
end_str='<|im_end|>')
@@ -552,6 +566,21 @@
552566
run_cfg=run_cfg_tp1_template,
553567
end_str='[INST]')
554568

569+
# config for llama2-chat-7b-w4a16 turbomind
570+
tb_llama2_chat_7b_wa416 = dict(
571+
type=TurboMindModel,
572+
abbr='llama-2-7b-chat-4bits-turbomind',
573+
path='meta-llama/Llama-2-7b-chat-hf-inner-4bits',
574+
engine_config=tb_awq_engine_config_template_max_bs_128,
575+
gen_config=gen_config_template,
576+
max_out_len=MAX_NEW_TOKENS,
577+
max_seq_len=MAX_SESSION_LEN,
578+
batch_size=128,
579+
concurrency=128,
580+
meta_template=llama2_meta_template,
581+
run_cfg=run_cfg_tp1_template,
582+
end_str='[INST]')
583+
555584
# config for llama2-chat-7b pytorch
556585
pt_llama2_chat_7b = dict(type=LmdeployPytorchModel,
557586
abbr='llama-2-7b-chat-pytorch',
@@ -854,3 +883,41 @@
854883
meta_template=llama3_meta_template,
855884
run_cfg=run_cfg_tp1_template,
856885
end_str='[INST]')
886+
887+
# config for code llama
888+
tb_codellama_7b_chat = dict(type=TurboMindModel,
889+
abbr='codellama-7b-chat-turbomind',
890+
path='codellama/CodeLlama-7b-Instruct-hf',
891+
engine_config=tb_engine_config_template_max_bs_128,
892+
gen_config=gen_config_template,
893+
max_out_len=MAX_NEW_TOKENS,
894+
max_seq_len=MAX_SESSION_LEN,
895+
batch_size=128,
896+
concurrency=128,
897+
run_cfg=dict(num_gpus=1, num_procs=1),
898+
end_str='</s>')
899+
900+
tb_codellama_7b_chat_w4a16 = dict(
901+
type=TurboMindModel,
902+
abbr='codellama-7b-chat-4bits-turbomind',
903+
path='codellama/CodeLlama-7b-Instruct-hf-inner-4bits',
904+
engine_config=tb_awq_engine_config_template_max_bs_128,
905+
gen_config=gen_config_template,
906+
max_out_len=MAX_NEW_TOKENS,
907+
max_seq_len=MAX_SESSION_LEN,
908+
batch_size=128,
909+
concurrency=128,
910+
run_cfg=dict(num_gpus=1, num_procs=1),
911+
end_str='</s>')
912+
913+
pt_codellama_7b_chat = dict(type=LmdeployPytorchModel,
914+
abbr='codellama-7b-chat-pytorch',
915+
path='codellama/CodeLlama-7b-Instruct-hf',
916+
engine_config=pt_engine_config_template_max_bs_128,
917+
gen_config=gen_config_template,
918+
max_out_len=MAX_NEW_TOKENS,
919+
max_seq_len=MAX_SESSION_LEN,
920+
batch_size=128,
921+
concurrency=128,
922+
run_cfg=dict(num_gpus=1, num_procs=1),
923+
end_str='</s>')

.github/scripts/set_benchmark_param.sh

+5
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,8 @@ if [[ $1 == *"internlm2-chat-20b"* ]] || [[ $1 == *"Qwen1.5-32B-Chat"* ]]
3333
then
3434
echo "TP_INFO=--tp 2" >> "$GITHUB_ENV"
3535
fi
36+
37+
if [[ $1 == *"internlm2"* ]] || [[ $1 == *"Llama-3"* ]]
38+
then
39+
echo "LONGTEXT_BENCHMARK=TRUE" >> "$GITHUB_ENV"
40+
fi

.github/workflows/benchmark.yml

+3-3
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ on:
3232
required: true
3333
description: 'Dependency packages, you can also set a specific version'
3434
type: string
35-
default: 'packaging transformers_stream_generator transformers datasets matplotlib jmespath'
35+
default: 'packaging transformers_stream_generator transformers==4.41.0 datasets matplotlib jmespath'
3636
default_tp:
3737
required: true
3838
description: 'Default tp value'
@@ -431,7 +431,7 @@ jobs:
431431
- name: Start restful api turbomind - kvint4
432432
if: contains(fromJSON(github.event.inputs.backend), 'turbomind') && contains(fromJSON(github.event.inputs.kvint_quantization), 'kvint4')
433433
run: |
434-
lmdeploy serve api_server $MODEL_PATH $MAX_ENTRY_COUNT $MODEL_FORMAT $TP_INFO --quant-policy 4 --log-level ${{inputs.log_level}} > turbomind_kvint4_run.log 2>&1 &
434+
lmdeploy serve api_server $MODEL_PATH $MAX_ENTRY_COUNT $MAX_BATCH_SIZE $MODEL_FORMAT $TP_INFO --quant-policy 4 --log-level ${{inputs.log_level}} > turbomind_kvint4_run.log 2>&1 &
435435
echo "restful_pid=$!" >> "$GITHUB_ENV"
436436
sleep 180s
437437
- name: Run restful benchmark -kvint4
@@ -456,7 +456,7 @@ jobs:
456456
- name: Start restful api turbomind - kvint8
457457
if: contains(fromJSON(github.event.inputs.backend), 'turbomind') && contains(fromJSON(github.event.inputs.kvint_quantization), 'kvint8')
458458
run: |
459-
lmdeploy serve api_server $MODEL_PATH $MAX_ENTRY_COUNT $MODEL_FORMAT $TP_INFO --quant-policy 8 --log-level ${{inputs.log_level}} > turbomind_kvint8_run.log 2>&1 &
459+
lmdeploy serve api_server $MODEL_PATH $MAX_ENTRY_COUNT $MAX_BATCH_SIZE $MODEL_FORMAT $TP_INFO --quant-policy 8 --log-level ${{inputs.log_level}} > turbomind_kvint8_run.log 2>&1 &
460460
echo "restful_pid=$!" >> "$GITHUB_ENV"
461461
sleep 180s
462462
- name: Run restful benchmark -kvint8

.github/workflows/daily_ete_test.yml

+10-8
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ on:
3232
required: true
3333
description: 'Dependency packages, you can also set a specific version'
3434
type: string
35-
default: 'packaging transformers_stream_generator transformers datasets matplotlib openai attrdict timm modelscope jmespath'
35+
default: 'packaging transformers_stream_generator transformers==4.41.0 datasets matplotlib openai attrdict timm modelscope jmespath'
3636
tools_regression:
3737
required: true
3838
description: 'Whether start a tool regression'
@@ -60,7 +60,7 @@ env:
6060

6161
jobs:
6262
linux-build:
63-
if: ${{github.event_name == 'schedule' || (!cancelled() && !inputs.offline_mode)}}
63+
if: ${{!cancelled() && (github.event_name == 'schedule' || !inputs.offline_mode)}}
6464
strategy:
6565
matrix:
6666
pyver: [py38, py310]
@@ -96,7 +96,7 @@ jobs:
9696

9797
test_tools:
9898
needs: linux-build
99-
if: ${{github.event_name == 'schedule' || (!cancelled() && inputs.tools_regression)}}
99+
if: ${{!cancelled() && (github.event_name == 'schedule' || inputs.tools_regression)}}
100100
runs-on: [self-hosted, linux-a100]
101101
timeout-minutes: 300
102102
env:
@@ -137,7 +137,7 @@ jobs:
137137
python3 -m pip install torch==2.2.1 torchvision==0.17.1 --index-url https://download.pytorch.org/whl/cu118
138138
- name: Install lmdeploy - dependency
139139
run: |
140-
python3 -m pip install ${{inputs.dependency_pkgs || 'packaging transformers_stream_generator transformers datasets matplotlib openai attrdict timm modelscope jmespath'}}
140+
python3 -m pip install ${{inputs.dependency_pkgs || 'packaging transformers_stream_generator transformers==4.41.0 datasets matplotlib openai attrdict timm modelscope jmespath'}}
141141
# manually install flash attn
142142
# the install packeage from. https://github.com/Dao-AILab/flash-attention/releases
143143
python3 -m pip install /root/packages/flash_attn-2.5.7+cu118torch2.2cxx11abiFALSE-cp38-cp38-linux_x86_64.whl
@@ -240,7 +240,9 @@ jobs:
240240
continue-on-error: true
241241
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'turbomind') && contains(fromJSON(github.event.inputs.model), 'interface-pipeline'))
242242
run: |
243-
pytest autotest/interface/pipeline -m 'not pr_test' --alluredir=allure-results
243+
pytest autotest/interface/pipeline/test_pipeline_func.py -m 'not pr_test' -n 4 --alluredir=allure-results || true
244+
pytest autotest/interface/pipeline/test_pipeline_turbomind_longtext_func.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=allure-results || true
245+
pytest autotest/interface/pipeline/test_pipeline_turbomind_longtext_func.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=allure-results
244246
- name: Test lmdeploy - local testcase
245247
if: github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.model), 'local_case')
246248
run: |
@@ -264,7 +266,7 @@ jobs:
264266
265267
266268
test_triton:
267-
if: ${{github.event_name == 'schedule' || (!cancelled() && inputs.triton_regression)}}
269+
if: ${{!cancelled() && (github.event_name == 'schedule' || inputs.triton_regression)}}
268270
runs-on: [self-hosted, linux-a100-2]
269271
needs: test_tools
270272
timeout-minutes: 30
@@ -420,7 +422,7 @@ jobs:
420422
421423
422424
test_restful:
423-
if: ${{github.event_name == 'schedule' || (!cancelled() && inputs.restful_regression)}}
425+
if: ${{!cancelled() && (github.event_name == 'schedule' || inputs.restful_regression)}}
424426
runs-on: [self-hosted, linux-a100]
425427
needs: test_tools
426428
strategy:
@@ -459,7 +461,7 @@ jobs:
459461
python3 -m pip install torch==2.2.1 torchvision==0.17.1 --index-url https://download.pytorch.org/whl/cu118
460462
- name: Install lmdeploy - dependency
461463
run: |
462-
python3 -m pip install ${{inputs.dependency_pkgs || 'packaging transformers_stream_generator transformers datasets matplotlib openai attrdict timm modelscope jmespath'}}
464+
python3 -m pip install ${{inputs.dependency_pkgs || 'packaging transformers_stream_generator transformers==4.41.0 datasets matplotlib openai attrdict timm modelscope jmespath'}}
463465
# manually install flash attn
464466
# the install packeage from. https://github.com/Dao-AILab/flash-attention/releases
465467
python3 -m pip install /root/packages/flash_attn-2.5.7+cu118torch2.2cxx11abiFALSE-cp38-cp38-linux_x86_64.whl

.github/workflows/evaluate.yml

+62-31
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@ on:
55
inputs:
66
repo_org:
77
required: false
8-
description: 'Tested repository organization name. Default is InternLM'
8+
description: 'Tested repository organization name. Default is InternLM/lmdeploy'
99
type: string
10-
default: InternLM
10+
default: 'InternLM/lmdeploy'
1111
repo_ref:
1212
required: false
1313
description: 'Set branch or tag or commit id. Default is "main"'
@@ -28,10 +28,52 @@ on:
2828
description: 'CUDA_VISIBLE_DEVICES.'
2929
type: string
3030
default: '0,1,2,3,4,5,6,7'
31+
dependency_pkgs:
32+
required: true
33+
description: 'Dependency packages, you can also set a specific version'
34+
type: string
35+
default: 'pynvml packaging protobuf transformers_stream_generator transformers==4.41.0'
3136

3237

3338
jobs:
39+
linux-build:
40+
if: ${{github.event_name == 'schedule' || (!cancelled() && !inputs.offline_mode)}}
41+
strategy:
42+
matrix:
43+
pyver: [py38]
44+
runs-on: ubuntu-latest
45+
env:
46+
PYTHON_VERSION: ${{ matrix.pyver }}
47+
PLAT_NAME: manylinux2014_x86_64
48+
DOCKER_TAG: cuda11.8
49+
OUTPUT_FOLDER: cuda11.8_dist_${{ github.run_id }}
50+
steps:
51+
- name: Checkout repository
52+
uses: actions/checkout@v3
53+
with:
54+
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
55+
ref: ${{github.event.inputs.repo_ref || 'main'}}
56+
- name: Build
57+
run: |
58+
echo ${PYTHON_VERSION}
59+
echo ${PLAT_NAME}
60+
echo ${DOCKER_TAG}
61+
echo ${OUTPUT_FOLDER}
62+
echo ${GITHUB_RUN_ID}
63+
# remove -it
64+
sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh
65+
bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER}
66+
- name: Upload Artifacts
67+
uses: actions/upload-artifact@v4
68+
with:
69+
if-no-files-found: error
70+
path: builder/manywheel/${{ env.OUTPUT_FOLDER }}
71+
retention-days: 1
72+
name: my-artifact-${{ github.run_id }}-${{ matrix.pyver }}
73+
74+
3475
evaluate:
76+
needs: linux-build
3577
runs-on: [self-hosted, linux-a100]
3678
timeout-minutes: 4320 # 72hours
3779
container:
@@ -48,52 +90,41 @@ jobs:
4890
steps:
4991
- name: Setup systems
5092
run: |
51-
rm /etc/apt/sources.list.d/cuda*.list
52-
apt-get update && apt-get install -y --no-install-recommends rapidjson-dev \
53-
libgoogle-glog-dev
54-
rm -rf /var/lib/apt/lists
5593
export TIME_STAMP="$(date +'%Y%m%d-%H%M%S')"
5694
echo "TIME_STAMP=$TIME_STAMP" >> $GITHUB_ENV
57-
- name: Checkout repository
95+
- name: Clone repository
5896
uses: actions/checkout@v3
5997
with:
60-
repository: '${{ github.event.inputs.repo_org}}/lmdeploy'
61-
ref: ${{github.event.inputs.repo_ref}}
98+
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
99+
ref: ${{github.event.inputs.repo_ref || 'main'}}
100+
- name: Download Artifacts
101+
uses: actions/download-artifact@v4
102+
with:
103+
name: my-artifact-${{ github.run_id }}-py38
62104
- name: Install pytorch
63105
run: |
64106
python3 -m pip cache dir
65107
python3 -m pip install torch==2.1.0 torchvision==0.16.0 --index-url https://download.pytorch.org/whl/cu118
66-
- name: Build lmdeploy
108+
- name: Install lmdeploy - dependency
67109
run: |
68-
python3 -m pip install cmake
69-
python3 -m pip install -r requirements/build.txt
70-
mkdir build
71-
cd build
72-
cmake .. \
73-
-DCMAKE_BUILD_TYPE=RelWithDebInfo \
74-
-DCMAKE_EXPORT_COMPILE_COMMANDS=1 \
75-
-DCMAKE_INSTALL_PREFIX=./install \
76-
-DBUILD_PY_FFI=ON \
77-
-DBUILD_MULTI_GPU=ON \
78-
-DCMAKE_CUDA_FLAGS="-lineinfo" \
79-
-DUSE_NVTX=ON \
80-
-DSM=80 \
81-
-DCMAKE_CUDA_ARCHITECTURES=80 \
82-
-DBUILD_TEST=OFF
83-
make -j$(nproc) && make install
84-
- name: Install lmdeploy from source
85-
run: |
86-
python3 -m pip install pynvml packaging protobuf transformers_stream_generator
110+
python3 -m pip install ${{inputs.dependency_pkgs }}
87111
# manually install flash attn
112+
# the install packeage from. https://github.com/Dao-AILab/flash-attention/releases
88113
python3 -m pip install /root/packages/flash_attn-2.3.6+cu118torch2.1cxx11abiFALSE-cp38-cp38-linux_x86_64.whl
89-
python3 -m pip install -r requirements.txt
90-
python3 -m pip install .
114+
- name: Install lmdeploy
115+
run: |
116+
python3 -m pip install lmdeploy-*.whl
117+
python3 -m pip install -r requirements/test.txt
91118
- name: Install opencompass
92119
run: |
93120
git clone --depth=1 https://github.com/open-compass/opencompass.git
94121
cd opencompass
95122
python3 -m pip install --user -e .
96123
echo "OPENCOMPASS_DIR=$(pwd)" >> $GITHUB_ENV
124+
- name: Check env
125+
run: |
126+
python3 -m pip list
127+
lmdeploy check_env
97128
- name: Setup paths for evaluation
98129
run: |
99130
ln -s /root/opencompass-data ./data

.github/workflows/pr_ete_test.yml

+1
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ jobs:
5858
python3 -m pip install -r requirements/build.txt
5959
mkdir build
6060
cd build
61+
cp -r /nvme/qa_test_models/offline_pkg/_deps .
6162
cmake .. \
6263
-DCMAKE_BUILD_TYPE=RelWithDebInfo \
6364
-DCMAKE_EXPORT_COMPILE_COMMANDS=1 \

autotest/config.yaml

+8
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,14 @@ quatization_case_config:
8686
- internlm/internlm2-20b
8787
- Qwen/Qwen1.5-7B-Chat
8888
- meta-llama/Meta-Llama-3-8B-Instruct
89+
- Qwen/Qwen-VL-Chat
90+
- liuhaotian/llava-v1.5-7b
91+
- liuhaotian/llava-v1.5-13b
92+
- liuhaotian/llava-v1.6-vicuna-7b
93+
- 01-ai/Yi-VL-6B
94+
- deepseek-ai/deepseek-vl-1.3b-chat
95+
- OpenGVLab/InternVL-Chat-V1-5
96+
- internlm/internlm-xcomposer2-vl-7b
8997
kvint:
9098
- meta-llama/Llama-2-7b-chat-hf
9199
- meta-llama/Meta-Llama-3-8B-Instruct

0 commit comments

Comments
 (0)