Skip to content

Commit 75af1bd

Browse files
committed
support 2 conditionings cfg
1 parent 63a6df3 commit 75af1bd

File tree

1 file changed

+36
-5
lines changed

1 file changed

+36
-5
lines changed

stable-diffusion.cpp

Lines changed: 36 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -806,6 +806,11 @@ class StableDiffusionGGML {
806806
float skip_layer_start = 0.01,
807807
float skip_layer_end = 0.2,
808808
ggml_tensor* noise_mask = nullptr) {
809+
810+
// TODO (Pix2Pix): separate image guidance params (right now it's reusing distilled guidance)
811+
812+
float img_cfg_scale = guidance;
813+
809814
LOG_DEBUG("Sample");
810815
struct ggml_init_params params;
811816
size_t data_size = ggml_row_size(init_latent->type, init_latent->ne[0]);
@@ -828,12 +833,15 @@ class StableDiffusionGGML {
828833
struct ggml_tensor* noised_input = ggml_dup_tensor(work_ctx, noise);
829834

830835
bool has_unconditioned = cfg_scale != 1.0 && uncond.c_crossattn != NULL;
836+
bool has_img_guidance = version == VERSION_INSTRUCT_PIX2PIX && cfg_scale != img_cfg_scale;
837+
has_unconditioned = has_unconditioned || has_img_guidance;
831838
bool has_skiplayer = slg_scale != 0.0 && skip_layers.size() > 0;
832839

833840
// denoise wrapper
834-
struct ggml_tensor* out_cond = ggml_dup_tensor(work_ctx, x);
835-
struct ggml_tensor* out_uncond = NULL;
836-
struct ggml_tensor* out_skip = NULL;
841+
struct ggml_tensor* out_cond = ggml_dup_tensor(work_ctx, x);
842+
struct ggml_tensor* out_uncond = NULL;
843+
struct ggml_tensor* out_skip = NULL;
844+
struct ggml_tensor* out_img_cond = NULL;
837845

838846
if (has_unconditioned) {
839847
out_uncond = ggml_dup_tensor(work_ctx, x);
@@ -846,6 +854,9 @@ class StableDiffusionGGML {
846854
LOG_WARN("SLG is incompatible with %s models", model_version_to_str[version]);
847855
}
848856
}
857+
if (has_img_guidance) {
858+
out_img_cond = ggml_dup_tensor(work_ctx, x);
859+
}
849860
struct ggml_tensor* denoised = ggml_dup_tensor(work_ctx, x);
850861

851862
auto denoise = [&](ggml_tensor* input, float sigma, int step) -> ggml_tensor* {
@@ -927,6 +938,22 @@ class StableDiffusionGGML {
927938
negative_data = (float*)out_uncond->data;
928939
}
929940

941+
float* img_cond_data = NULL;
942+
if (has_img_guidance) {
943+
diffusion_model->compute(n_threads,
944+
noised_input,
945+
timesteps,
946+
uncond.c_crossattn,
947+
cond.c_concat,
948+
uncond.c_vector,
949+
guidance_tensor,
950+
-1,
951+
controls,
952+
control_strength,
953+
&out_img_cond);
954+
img_cond_data = (float*)out_img_cond->data;
955+
}
956+
930957
int step_count = sigmas.size();
931958
bool is_skiplayer_step = has_skiplayer && step > (int)(skip_layer_start * step_count) && step < (int)(skip_layer_end * step_count);
932959
float* skip_layer_data = NULL;
@@ -961,7 +988,11 @@ class StableDiffusionGGML {
961988
int64_t i3 = i / out_cond->ne[0] * out_cond->ne[1] * out_cond->ne[2];
962989
float scale = min_cfg + (cfg_scale - min_cfg) * (i3 * 1.0f / ne3);
963990
} else {
964-
latent_result = negative_data[i] + cfg_scale * (positive_data[i] - negative_data[i]);
991+
if(has_img_guidance){
992+
latent_result = negative_data[i] + img_cfg_scale * (img_cond_data[i] - negative_data[i]) + cfg_scale * (positive_data[i] - img_cond_data[i]);
993+
} else {
994+
latent_result = negative_data[i] + cfg_scale * (positive_data[i] - negative_data[i]);
995+
}
965996
}
966997
}
967998
if (is_skiplayer_step) {
@@ -1362,7 +1393,7 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,
13621393
sd_ctx->sd->diffusion_model->get_adm_in_channels());
13631394

13641395
SDCondition uncond;
1365-
if (cfg_scale != 1.0) {
1396+
if (cfg_scale != 1.0 || sd_ctx->sd->version == VERSION_INSTRUCT_PIX2PIX && cfg_scale!=guidance) {
13661397
bool force_zero_embeddings = false;
13671398
if (sd_version_is_sdxl(sd_ctx->sd->version) && negative_prompt.size() == 0) {
13681399
force_zero_embeddings = true;

0 commit comments

Comments
 (0)