Skip to content

Commit

Permalink
Add diffuser tests for optimized sdxl flow on HPU (#1554)
Browse files Browse the repository at this point in the history
Co-authored-by: regisss <[email protected]>
  • Loading branch information
sushildubey171 and regisss authored Feb 7, 2025
1 parent 2cc56e6 commit 4abb0e6
Showing 1 changed file with 141 additions and 0 deletions.
141 changes: 141 additions & 0 deletions tests/test_diffusers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1294,6 +1294,147 @@ def test_stable_diffusion_xl_hpu_graphs(self):
self.assertEqual(len(images), 10)
self.assertEqual(images[-1].shape, (64, 64, 3))

def test_stable_diffusion_xl_num_images_per_prompt_optimized(self):
import habana_frameworks.torch.hpu as torch_hpu

kwargs = {"timestep_spacing": "linspace"}
scheduler = GaudiEulerDiscreteScheduler.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler", **kwargs
)
scheduler.hpu_opt = True
kwargs = {
"scheduler": scheduler,
"use_habana": True,
"use_hpu_graphs": True,
"gaudi_config": "Habana/stable-diffusion",
"torch_dtype": torch.bfloat16,
}

os.environ["PATCH_SDPA"] = "1"

from optimum.habana.diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl_mlperf import (
StableDiffusionXLPipeline_HPU,
)

model_name_or_path = "stabilityai/stable-diffusion-xl-base-1.0"

sd_pipe = StableDiffusionXLPipeline_HPU.from_pretrained(
model_name_or_path,
**kwargs,
)

sd_pipe.unet.set_default_attn_processor(sd_pipe.unet)
sd_pipe.to(torch.device("hpu"))
sd_pipe.unet = torch_hpu.wrap_in_hpu_graph(sd_pipe.unet)
sd_pipe.set_progress_bar_config(disable=None)

prompt = "A painting of a squirrel eating a burger"

# Test num_images_per_prompt=1 (default)
images = sd_pipe(prompt, num_inference_steps=2, output_type="np").images

self.assertEqual(len(images), 1)
self.assertEqual(images[0].shape, (1024, 1024, 3))

# Test num_images_per_prompt=1 (default) for several prompts
num_prompts = 3
images = sd_pipe([prompt] * num_prompts, num_inference_steps=2, output_type="np").images

self.assertEqual(len(images), num_prompts)
self.assertEqual(images[-1].shape, (1024, 1024, 3))

# Test num_images_per_prompt for single prompt
num_images_per_prompt = 2
images = sd_pipe(
prompt, num_inference_steps=2, output_type="np", num_images_per_prompt=num_images_per_prompt
).images

self.assertEqual(len(images), num_images_per_prompt)
self.assertEqual(images[-1].shape, (1024, 1024, 3))

# Test num_images_per_prompt for several prompts
num_prompts = 2
images = sd_pipe(
[prompt] * num_prompts,
num_inference_steps=2,
output_type="np",
num_images_per_prompt=num_images_per_prompt,
).images

self.assertEqual(len(images), num_prompts * num_images_per_prompt)
self.assertEqual(images[-1].shape, (1024, 1024, 3))

os.environ.pop("PATCH_SDPA")

def test_stable_diffusion_xl_optimized_fp8(self):
import habana_frameworks.torch.hpu as torch_hpu

kwargs = {"timestep_spacing": "linspace"}
scheduler = GaudiEulerDiscreteScheduler.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler", **kwargs
)
scheduler.hpu_opt = True
kwargs = {
"scheduler": scheduler,
"use_habana": True,
"use_hpu_graphs": True,
"gaudi_config": "Habana/stable-diffusion",
"torch_dtype": torch.bfloat16,
}

os.environ["PATCH_SDPA"] = "1"
# Set QUANT_CONFIG environment variable
os.environ["QUANT_CONFIG"] = "./quantization/stable-diffusion-xl/quantize_config.json"

from optimum.habana.diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl_mlperf import (
StableDiffusionXLPipeline_HPU,
)

model_name_or_path = "stabilityai/stable-diffusion-xl-base-1.0"

sd_pipe = StableDiffusionXLPipeline_HPU.from_pretrained(
model_name_or_path,
**kwargs,
)
sd_pipe.unet.set_default_attn_processor(sd_pipe.unet)
sd_pipe.to(torch.device("hpu"))

quant_config_path = os.getenv("QUANT_CONFIG")

original_dir = os.getcwd()
config_dir = Path(os.path.dirname(__file__)).parent / "examples" / "stable-diffusion"
os.chdir(config_dir)

if quant_config_path:
import habana_frameworks.torch.core as htcore
from neural_compressor.torch.quantization import FP8Config, convert, prepare

htcore.hpu_set_env()

config = FP8Config.from_json_file(quant_config_path)

if config.measure:
print("Running measurements")
sd_pipe.unet = prepare(sd_pipe.unet, config)
elif config.quantize:
print("Running quantization")
sd_pipe.unet = convert(sd_pipe.unet, config)
htcore.hpu_initialize(sd_pipe.unet, mark_only_scales_as_const=True)

sd_pipe.unet = torch_hpu.wrap_in_hpu_graph(sd_pipe.unet)
sd_pipe.set_progress_bar_config(disable=None)

prompt = "A painting of a squirrel eating a burger"

# Test using quantization configuration
images = sd_pipe(prompt, num_inference_steps=2, output_type="np").images

self.assertEqual(len(images), 1)
self.assertEqual(images[0].shape, (1024, 1024, 3))
os.chdir(original_dir)

os.environ.pop("PATCH_SDPA")

@slow
def test_stable_diffusion_xl_generation_throughput(self):
prompts = [
Expand Down

0 comments on commit 4abb0e6

Please sign in to comment.