Skip to content

Commit a29573a

Browse files
authored
add quantized model save and load for SD & LCM (#2588)
* add quantized model save and load for SD & LCM
1 parent 0491221 commit a29573a

File tree

10 files changed

+407
-373
lines changed

10 files changed

+407
-373
lines changed

models_v2/pytorch/LCM/inference/cpu/README.md

+6
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,12 @@ bash download_dataset.sh
6262
| **BATCH_SIZE** (optional) | `export BATCH_SIZE=<set a value for batch size, else it will run with default batch size>` |
6363
| **TORCH_INDUCTOR** (optional) | `export TORCH_INDUCTOR=< 0 or 1> (Compile model with PyTorch Inductor backend)` |
6464
65+
* NOTE:
66+
For `compile-inductor` mode, please do calibration to get quantized model before running `INT8-BF16` or `INT8-FP32`.
67+
```
68+
bash do_calibration.sh
69+
```
70+
6571
8. Run `run_model.sh`
6672
6773
## Output

models_v2/pytorch/LCM/inference/cpu/diffusers.patch

+23-14
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
diff --git a/src/diffusers/models/transformer_2d.py b/src/diffusers/models/transformer_2d.py
2-
index 24abf54d..3fa7df5f 100644
2+
index 24abf54d6..3fa7df5f3 100644
33
--- a/src/diffusers/models/transformer_2d.py
44
+++ b/src/diffusers/models/transformer_2d.py
55
@@ -385,7 +385,7 @@ class Transformer2DModel(ModelMixin, ConfigMixin):
@@ -21,7 +21,7 @@ index 24abf54d..3fa7df5f 100644
2121
output = hidden_states + residual
2222
elif self.is_input_vectorized:
2323
diff --git a/src/diffusers/models/unet_2d_condition.py b/src/diffusers/models/unet_2d_condition.py
24-
index f248b243..27d4802d 100644
24+
index f248b243f..7c83d2cf5 100644
2525
--- a/src/diffusers/models/unet_2d_condition.py
2626
+++ b/src/diffusers/models/unet_2d_condition.py
2727
@@ -799,8 +799,8 @@ class UNet2DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin)
@@ -34,8 +34,17 @@ index f248b243..27d4802d 100644
3434
attention_mask: Optional[torch.Tensor] = None,
3535
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
3636
added_cond_kwargs: Optional[Dict[str, torch.Tensor]] = None,
37+
@@ -808,7 +808,7 @@ class UNet2DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin)
38+
mid_block_additional_residual: Optional[torch.Tensor] = None,
39+
down_intrablock_additional_residuals: Optional[Tuple[torch.Tensor]] = None,
40+
encoder_attention_mask: Optional[torch.Tensor] = None,
41+
- return_dict: bool = True,
42+
+ return_dict: bool = False,
43+
) -> Union[UNet2DConditionOutput, Tuple]:
44+
r"""
45+
The [`UNet2DConditionModel`] forward method.
3746
diff --git a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py
38-
index ff5eea2d..10ea4af1 100644
47+
index ff5eea2d5..8a9461c87 100644
3948
--- a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py
4049
+++ b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py
4150
@@ -701,17 +701,33 @@ class LatentConsistencyModelPipeline(
@@ -58,16 +67,16 @@ index ff5eea2d..10ea4af1 100644
5867
+ model_pred = self.traced_unet(
5968
+ latents.to(memory_format=torch.channels_last).to(dtype=self.precision),
6069
+ t,
61-
+ encoder_hidden_states=prompt_embeds.to(dtype=self.precision),
62-
+ timestep_cond=w_embedding.to(dtype=self.precision)
63-
+ )['sample']
70+
+ prompt_embeds.to(dtype=self.precision),
71+
+ w_embedding.to(dtype=self.precision)
72+
+ )[0]
6473
+ elif hasattr(self, 'precision'):
6574
+ model_pred = self.unet(
6675
+ latents.to(memory_format=torch.channels_last).to(dtype=self.precision),
6776
+ t,
68-
+ encoder_hidden_states=prompt_embeds.to(dtype=self.precision),
69-
+ timestep_cond=w_embedding.to(dtype=self.precision)
70-
+ )['sample']
77+
+ prompt_embeds.to(dtype=self.precision),
78+
+ w_embedding.to(dtype=self.precision)
79+
+ )[0]
7180
+ else:
7281
+ model_pred = self.unet(
7382
+ latents,
@@ -91,7 +100,7 @@ index ff5eea2d..10ea4af1 100644
91100
if not output_type == "latent":
92101
image = self.vae.decode(denoised / self.vae.config.scaling_factor, return_dict=False)[0]
93102
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
94-
index 9911cbe7..98c7f2ab 100644
103+
index 9911cbe75..a4e7101e3 100644
95104
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
96105
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
97106
@@ -832,19 +832,33 @@ class StableDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lo
@@ -116,14 +125,14 @@ index 9911cbe7..98c7f2ab 100644
116125
+ noise_pred = self.traced_unet(
117126
+ latent_model_input.to(memory_format=torch.channels_last).to(dtype=self.precision),
118127
+ t,
119-
+ encoder_hidden_states=prompt_embeds.to(dtype=self.precision)
120-
+ )['sample']
128+
+ prompt_embeds.to(dtype=self.precision)
129+
+ )[0]
121130
+ elif hasattr(self, 'precision'):
122131
+ noise_pred = self.unet(
123132
+ latent_model_input.to(memory_format=torch.channels_last).to(dtype=self.precision),
124133
+ t,
125-
+ encoder_hidden_states=prompt_embeds.to(dtype=self.precision)
126-
+ )['sample']
134+
+ prompt_embeds.to(dtype=self.precision)
135+
+ )[0]
127136
+ else:
128137
+ noise_pred = self.unet(
129138
+ latent_model_input,
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
#!/usr/bin/env bash
2+
#
3+
# Copyright (c) 2024 Intel Corporation
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
18+
MODEL_DIR=${MODEL_DIR-$PWD}
19+
20+
if [ ! -e "${MODEL_DIR}/inference.py" ]; then
21+
echo "Could not find the script of inference.py. Please set environment variable '\${MODEL_DIR}'."
22+
echo "From which the inference.py exist at the: \${MODEL_DIR}/inference.py"
23+
exit 1
24+
fi
25+
26+
if [ ! -d "${DATASET_DIR}" ]; then
27+
echo "The DATASET_DIR \${DATASET_DIR} does not exist"
28+
exit 1
29+
fi
30+
31+
if [ -z "${OUTPUT_DIR}" ]; then
32+
echo "The required environment variable OUTPUT_DIR has not been set"
33+
exit 1
34+
fi
35+
36+
INT8_MODEL=${INT8_MODEL:-"quantized_model.pt2"}
37+
38+
mkdir -p ${OUTPUT_DIR}
39+
40+
export DNNL_PRIMITIVE_CACHE_CAPACITY=1024
41+
export KMP_BLOCKTIME=200
42+
export KMP_AFFINITY=granularity=fine,compact,1,0
43+
44+
export TORCHINDUCTOR_FREEZING=1
45+
export TORCHINDUCTOR_CPP_ENABLE_TILING_HEURISTIC=0
46+
export TORCHINDUCTOR_ENABLE_LINEAR_BINARY_FOLDING=1
47+
48+
python -m torch.backends.xeon.run_cpu --disable-numactl \
49+
--log_path ${OUTPUT_DIR} \
50+
${MODEL_DIR}/inference.py \
51+
--model_name_or_path="SimianLuo/LCM_Dreamshaper_v7" \
52+
--dataset_path=${DATASET_DIR} \
53+
--quantized_model_path=${INT8_MODEL} \
54+
--compile_inductor \
55+
--precision=int8-bf16 \
56+
--calibration

0 commit comments

Comments
 (0)