Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@ save*
*.pid
*.ipynb*
.venv/
*.sh
*.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
base:
seed: &seed 42
model:
type: IndustrialCoder
path: model/IndustrialCoder-32B
tokenizer_mode: slow
torch_dtype: auto
# Reduce peak memory in catcher stage for large models.
use_cpu_to_save_cuda_mem_for_catcher: False
eval:
eval_pos: [fake_quant]
name: wikitext2
download: True
seq_len: 2048
bs: 1
inference_per_block: False
quant:
method: RTN
weight:
quant_type: float-quant
bit: e4m3
symmetric: True
granularity: per_channel
use_qtorch: True
act:
quant_type: float-quant
bit: e4m3
symmetric: True
granularity: per_token
use_qtorch: True
save:
save_vllm: True
save_path: ./save_for_vllm/industrialcoder_rtn_fp8_wikitext/
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
base:
seed: &seed 42
model:
type: IndustrialCoder
path: model/IndustrialCoder-32B
tokenizer_mode: slow
torch_dtype: auto
# Reduce peak memory in catcher stage for large models.
use_cpu_to_save_cuda_mem_for_catcher: False
calib:
name: pileval
download: True
# path: calib data path
n_samples: 128
bs: -1
seq_len: 512
preproc: txt_general_preproc
seed: *seed
eval:
eval_pos: [fake_quant]
name: wikitext2
download: True
seq_len: 2048
bs: 20
inference_per_block: True
quant:
method: Awq
weight:
bit: 4
symmetric: True
granularity: per_group
group_size: 128
need_pack: True
special:
trans: True
trans_version: v2
weight_clip: True
quant_out: True
save:
save_vllm: True
save_path: ./save_for_vllm/industrialcoder_rtn_int_awq_wikitext/
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
base:
seed: &seed 42
model:
type: IndustrialCoder
path: model/IndustrialCoder-32B
tokenizer_mode: slow
torch_dtype: auto
# Reduce peak memory in catcher stage for large models.
use_cpu_to_save_cuda_mem_for_catcher: False
calib:
name: wikitext2
download: True
n_samples: 128
# path: calib data path
bs: 1
seq_len: 2048
preproc: wikitext2_gptq
seed: *seed
eval:
eval_pos: [fake_quant]
name: wikitext2
download: True
seq_len: 2048
bs: 20
inference_per_block: True
quant:
method: GPTQ
weight:
bit: 4
symmetric: True
granularity: per_group
group_size: 128
need_pack: True
special:
actorder: True
static_groups: True
percdamp: 0.01
blocksize: 128
true_sequential: True
quant_out: True
save:
save_vllm: True
save_path: ./save_for_vllm/industrialcoder_rtn_int_gptq_wikitext/
33 changes: 33 additions & 0 deletions configs/quantization/backend/vllm/fp8/thinking_model_fp8.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
base:
seed: &seed 42
model:
type: IndustrialCoder
path: /mnt/lm_data_afs/wangzining/charles/industrial_thinking/IndustrialCoder-Thinking
tokenizer_mode: slow
torch_dtype: auto
# Reduce peak memory in catcher stage for large models.
use_cpu_to_save_cuda_mem_for_catcher: False
eval:
eval_pos: [fake_quant]
name: wikitext2
download: True
seq_len: 2048
bs: 1
inference_per_block: False
quant:
method: RTN
weight:
quant_type: float-quant
bit: e4m3
symmetric: True
granularity: per_channel
use_qtorch: True
act:
quant_type: float-quant
bit: e4m3
symmetric: True
granularity: per_token
use_qtorch: True
save:
save_vllm: True
save_path: ./save_for_vllm/thinking_rtn_fp8_wikitext/
41 changes: 41 additions & 0 deletions configs/quantization/backend/vllm/thinkingmodel/gptq_w4a16.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
base:
seed: &seed 0
model:
type: IndustrialCoder
path: /mnt/lm_data_afs/wangzining/charles/lab/llmc/thinkingmodel/IndustrialCoder-Thinking
torch_dtype: auto
calib:
name: wikitext2
download: True
n_samples: 128
# path: calib data path
bs: 1
seq_len: 2048
preproc: wikitext2_gptq
seed: *seed
eval:
eval_pos: [fake_quant]
name: wikitext2
download: True
# path: eval data path
bs: 1
seq_len: 2048
inference_per_block: False
quant:
method: GPTQ
weight:
bit: 4
symmetric: True
granularity: per_group
group_size: 128
need_pack: True
special:
actorder: True
static_groups: True
percdamp: 0.01
blocksize: 128
true_sequential: True
quant_out: True
save:
save_vllm: True
save_path: /mnt/lm_data_afs/wangzining/charles/lab/llmc/save_for_vllm/thinking_gptq_w4/
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
base:
seed: &seed 42
model:
type: IndustrialCoder
path: /mnt/lm_data_afs/wangzining/charles/industrial_thinking/IndustrialCoder-Thinking
tokenizer_mode: slow
torch_dtype: auto

use_cpu_to_save_cuda_mem_for_catcher: False
calib:
name: wikitext2
download: True
# path: /mnt/lm_data_afs/wangzining/charles/datasets/wikitext2
n_samples: 128
bs: 1
seq_len: 512
preproc: txt_general_preproc
seed: *seed
eval:
eval_pos: [fake_quant]
name: wikitext2
download: True
# path: /mnt/lm_data_afs/wangzining/charles/datasets/wikitext2
seq_len: 2048
bs: 1
inference_per_block: False
quant:
method: Awq
weight:
bit: 4
symmetric: True
granularity: per_group
group_size: 128
need_pack: True
special:
trans: True
trans_version: v2
weight_clip: True
quant_out: True
save:
save_vllm: True

save_path: /mnt/lm_data_afs/wangzining/charles/lab/llmc/save_for_vllm/thinking_awq_w4/
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
base:
seed: &seed 42
model:
type: IndustrialCoder
path: /mnt/lm_data_afs/wangzining/charles/industrial_thinking/IndustrialCoder-Thinking
tokenizer_mode: slow
torch_dtype: auto
calib:
name: pileval
download: False
path: calib data path
n_samples: 128
bs: -1
seq_len: 512
preproc: txt_general_preproc
seed: *seed
eval:
eval_pos: [fake_quant]
name: wikitext2
download: False
path: eval data path
seq_len: 2048
# For 7B / 13B model eval, bs can be set to "1", and inference_per_block can be set to "False".
# For 70B model eval, bs can be set to "20", and inference_per_block can be set to "True".
bs: 1
inference_per_block: False
quant:
method: Awq
weight:
bit: 4
symmetric: True
granularity: per_group
group_size: 128
need_pack: True
special:
trans: True
trans_version: v2
weight_clip: True
quant_out: True
save:
save_vllm: True
save_path: /path/to/save_for_vllm_awq_w4/
54 changes: 54 additions & 0 deletions configs/quantization/video_gen/wan2_2_t2v/awq_w_a.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
base:
seed: &seed 42
model:
type: Wan2T2V
path: /mnt/lm_data_afs/wangzining/charles/lab/llmc/model/Wan2.2-T2V-A14B-Diffusers
torch_dtype: auto
calib:
name: t2v
download: False
path: ./assets/wan_t2v/calib/
sample_steps: 20
bs: 1
target_height: 480
target_width: 832
num_frames: 81
guidance_scale: 5.0
seed: *seed
eval:
eval_pos: [transformed, fake_quant]
type: video_gen
name: t2v
download: False
path: ./assets/wan_t2v/calib/
bs: 1
target_height: 480
target_width: 832
num_frames: 81
guidance_scale: 5.0
output_video_path: ./output_videos_awq/
inference_per_block: True
quant:
video_gen:
method: Awq
weight:
# quant_type: int-quant
quant_type: hif4
bit: 4
symmetric: True
granularity: per_channel
group_size: -1
act:
# quant_type: int-qu
quant_type: hif4
bit: 4
symmetric: True
granularity: per_token
special:
trans: True
trans_version: v2
weight_clip: True
clip_sym: True
save:
save_lightx2v: True
save_path: ./save_for_lightx2v/wan2_2_t2v/awq_w_a/original/
8 changes: 4 additions & 4 deletions configs/quantization/video_gen/wan_i2v/awq_w_a.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ base:
seed: &seed 42
model:
type: WanI2V
path: /path/to/model
path: /mnt/lm_data_afs/wangzining/charles/lab/llmc/models/Wan2.2-T2V-A14B/
torch_dtype: auto
calib:
name: i2v
Expand Down Expand Up @@ -31,12 +31,12 @@ quant:
video_gen:
method: Awq
weight:
bit: 8
bit: 4
symmetric: True
granularity: per_channel
group_size: -1
act:
bit: 8
bit: 4
symmetric: True
granularity: per_token
special:
Expand All @@ -46,4 +46,4 @@ quant:
clip_sym: True
save:
save_lightx2v: True
save_path: /path/to/x2v/
save_path: ../lightx2v/wan_i2v_awq_w_a/x2v/
Loading