intel
diff --git a/‎models_v2/pytorch/LCM/inference/cpu/README.md‎
Lines changed: 6 additions & 0 deletions b/‎models_v2/pytorch/LCM/inference/cpu/README.md‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎models_v2/pytorch/LCM/inference/cpu/diffusers.patch‎
Lines changed: 23 additions & 14 deletions b/‎models_v2/pytorch/LCM/inference/cpu/diffusers.patch‎
Lines changed: 23 additions & 14 deletions
diff --git a/‎models_v2/pytorch/LCM/inference/cpu/do_calibration.sh‎
Lines changed: 56 additions & 0 deletions b/‎models_v2/pytorch/LCM/inference/cpu/do_calibration.sh‎
Lines changed: 56 additions & 0 deletions
@@ -62,6 +62,12 @@ bash download_dataset.sh
 | **BATCH_SIZE** (optional)  |                        `export BATCH_SIZE=<set a value for batch size, else it will run with default batch size>`                                |
 | **TORCH_INDUCTOR** (optional)    | `export TORCH_INDUCTOR=< 0 or 1> (Compile model with PyTorch Inductor backend)`   |
 
+* NOTE:
+For `compile-inductor` mode, please do calibration to get quantized model before running `INT8-BF16` or `INT8-FP32`.
+  ```
+  bash do_calibration.sh
+  ```
+
 8. Run `run_model.sh`
 
 ## Output
 
@@ -1,5 +1,5 @@
 diff --git a/src/diffusers/models/transformer_2d.py b/src/diffusers/models/transformer_2d.py
-index 24abf54d..3fa7df5f 100644
+index 24abf54d6..3fa7df5f3 100644
 --- a/src/diffusers/models/transformer_2d.py
 +++ b/src/diffusers/models/transformer_2d.py
@@ -385,7 +385,7 @@ class Transformer2DModel(ModelMixin, ConfigMixin):
@@ -21,7 +21,7 @@ index 24abf54d..3fa7df5f 100644
              output = hidden_states + residual
          elif self.is_input_vectorized:
 diff --git a/src/diffusers/models/unet_2d_condition.py b/src/diffusers/models/unet_2d_condition.py
-index f248b243..27d4802d 100644
+index f248b243f..7c83d2cf5 100644
 --- a/src/diffusers/models/unet_2d_condition.py
 +++ b/src/diffusers/models/unet_2d_condition.py
@@ -799,8 +799,8 @@ class UNet2DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin)
@@ -34,8 +34,17 @@ index f248b243..27d4802d 100644
          attention_mask: Optional[torch.Tensor] = None,
          cross_attention_kwargs: Optional[Dict[str, Any]] = None,
          added_cond_kwargs: Optional[Dict[str, torch.Tensor]] = None,
+@@ -808,7 +808,7 @@ class UNet2DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin)
+         mid_block_additional_residual: Optional[torch.Tensor] = None,
+         down_intrablock_additional_residuals: Optional[Tuple[torch.Tensor]] = None,
+         encoder_attention_mask: Optional[torch.Tensor] = None,
+-        return_dict: bool = True,
++        return_dict: bool = False,
+     ) -> Union[UNet2DConditionOutput, Tuple]:
+         r"""
+         The [`UNet2DConditionModel`] forward method.
 diff --git a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py
-index ff5eea2d..10ea4af1 100644
+index ff5eea2d5..8a9461c87 100644
 --- a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py
 +++ b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py
@@ -701,17 +701,33 @@ class LatentConsistencyModelPipeline(
@@ -58,16 +67,16 @@ index ff5eea2d..10ea4af1 100644
 +                    model_pred = self.traced_unet(
 +                        latents.to(memory_format=torch.channels_last).to(dtype=self.precision),
 +                        t,
-+                        encoder_hidden_states=prompt_embeds.to(dtype=self.precision),
-+                        timestep_cond=w_embedding.to(dtype=self.precision)
-+                    )['sample']
++                        prompt_embeds.to(dtype=self.precision),
++                        w_embedding.to(dtype=self.precision)
++                    )[0]
 +                elif hasattr(self, 'precision'):
 +                    model_pred = self.unet(
 +                        latents.to(memory_format=torch.channels_last).to(dtype=self.precision),
 +                        t,
-+                        encoder_hidden_states=prompt_embeds.to(dtype=self.precision),
-+                        timestep_cond=w_embedding.to(dtype=self.precision)
-+                    )['sample']
++                        prompt_embeds.to(dtype=self.precision),
++                        w_embedding.to(dtype=self.precision)
++                    )[0]
 +                else:
 +                    model_pred = self.unet(
 +                        latents,
@@ -91,7 +100,7 @@ index ff5eea2d..10ea4af1 100644
          if not output_type == "latent":
              image = self.vae.decode(denoised / self.vae.config.scaling_factor, return_dict=False)[0]
 diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
-index 9911cbe7..98c7f2ab 100644
+index 9911cbe75..a4e7101e3 100644
 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
 +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
@@ -832,19 +832,33 @@ class StableDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lo
@@ -116,14 +125,14 @@ index 9911cbe7..98c7f2ab 100644
 +                    noise_pred = self.traced_unet(
 +                        latent_model_input.to(memory_format=torch.channels_last).to(dtype=self.precision),
 +                        t,
-+                        encoder_hidden_states=prompt_embeds.to(dtype=self.precision)
-+                    )['sample']
++                        prompt_embeds.to(dtype=self.precision)
++                    )[0]
 +                elif hasattr(self, 'precision'):
 +                    noise_pred = self.unet(
 +                        latent_model_input.to(memory_format=torch.channels_last).to(dtype=self.precision),
 +                        t,
-+                        encoder_hidden_states=prompt_embeds.to(dtype=self.precision)
-+                    )['sample']
++                        prompt_embeds.to(dtype=self.precision)
++                    )[0]
 +                else:
 +                    noise_pred = self.unet(
 +                        latent_model_input,
 
@@ -0,0 +1,56 @@
+#!/usr/bin/env bash
+#
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+MODEL_DIR=${MODEL_DIR-$PWD}
+
+if [ ! -e "${MODEL_DIR}/inference.py" ]; then
+  echo "Could not find the script of inference.py. Please set environment variable '\${MODEL_DIR}'."
+  echo "From which the inference.py exist at the: \${MODEL_DIR}/inference.py"
+  exit 1
+fi
+
+if [ ! -d "${DATASET_DIR}" ]; then
+  echo "The DATASET_DIR \${DATASET_DIR} does not exist"
+  exit 1
+fi
+
+if [ -z "${OUTPUT_DIR}" ]; then
+  echo "The required environment variable OUTPUT_DIR has not been set"
+  exit 1
+fi
+
+INT8_MODEL=${INT8_MODEL:-"quantized_model.pt2"}
+
+mkdir -p ${OUTPUT_DIR}
+
+export DNNL_PRIMITIVE_CACHE_CAPACITY=1024
+export KMP_BLOCKTIME=200
+export KMP_AFFINITY=granularity=fine,compact,1,0
+
+export TORCHINDUCTOR_FREEZING=1
+export TORCHINDUCTOR_CPP_ENABLE_TILING_HEURISTIC=0
+export TORCHINDUCTOR_ENABLE_LINEAR_BINARY_FOLDING=1
+
+python -m torch.backends.xeon.run_cpu --disable-numactl \
+            --log_path ${OUTPUT_DIR} \
+            ${MODEL_DIR}/inference.py \
+            --model_name_or_path="SimianLuo/LCM_Dreamshaper_v7" \
+            --dataset_path=${DATASET_DIR} \
+            --quantized_model_path=${INT8_MODEL} \
+            --compile_inductor \
+            --precision=int8-bf16 \
+            --calibration