@@ -49,8 +49,7 @@ const char* sampling_methods_str[] = {
49
49
" iPNDM_v" ,
50
50
" LCM" ,
51
51
" DDIM \" trailing\" " ,
52
- " TCD"
53
- };
52
+ " TCD" };
54
53
55
54
/* ================================================== Helper Functions ================================================*/
56
55
@@ -683,7 +682,7 @@ class StableDiffusionGGML {
683
682
float curr_multiplier = kv.second ;
684
683
lora_state_diff[lora_name] -= curr_multiplier;
685
684
}
686
-
685
+
687
686
size_t rm = lora_state_diff.size () - lora_state.size ();
688
687
if (rm != 0 ) {
689
688
LOG_INFO (" Attempting to apply %lu LoRAs (removing %lu applied LoRAs)" , lora_state.size (), rm);
@@ -806,7 +805,6 @@ class StableDiffusionGGML {
806
805
float skip_layer_start = 0.01 ,
807
806
float skip_layer_end = 0.2 ,
808
807
ggml_tensor* noise_mask = nullptr ) {
809
-
810
808
// TODO (Pix2Pix): separate image guidance params (right now it's reusing distilled guidance)
811
809
812
810
float img_cfg_scale = guidance;
@@ -834,7 +832,7 @@ class StableDiffusionGGML {
834
832
835
833
bool has_unconditioned = cfg_scale != 1.0 && uncond.c_crossattn != NULL ;
836
834
bool has_img_guidance = version == VERSION_INSTRUCT_PIX2PIX && cfg_scale != img_cfg_scale;
837
- has_unconditioned = has_unconditioned || has_img_guidance;
835
+ has_unconditioned = has_unconditioned || has_img_guidance;
838
836
bool has_skiplayer = slg_scale != 0.0 && skip_layers.size () > 0 ;
839
837
840
838
// denoise wrapper
@@ -988,7 +986,7 @@ class StableDiffusionGGML {
988
986
int64_t i3 = i / out_cond->ne [0 ] * out_cond->ne [1 ] * out_cond->ne [2 ];
989
987
float scale = min_cfg + (cfg_scale - min_cfg) * (i3 * 1 .0f / ne3);
990
988
} else {
991
- if (has_img_guidance){
989
+ if (has_img_guidance) {
992
990
latent_result = negative_data[i] + img_cfg_scale * (img_cond_data[i] - negative_data[i]) + cfg_scale * (positive_data[i] - img_cond_data[i]);
993
991
} else {
994
992
latent_result = negative_data[i] + cfg_scale * (positive_data[i] - negative_data[i]);
@@ -1393,7 +1391,7 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,
1393
1391
sd_ctx->sd ->diffusion_model ->get_adm_in_channels ());
1394
1392
1395
1393
SDCondition uncond;
1396
- if (cfg_scale != 1.0 || sd_ctx->sd ->version == VERSION_INSTRUCT_PIX2PIX && cfg_scale!= guidance) {
1394
+ if (cfg_scale != 1.0 || sd_ctx->sd ->version == VERSION_INSTRUCT_PIX2PIX && cfg_scale != guidance) {
1397
1395
bool force_zero_embeddings = false ;
1398
1396
if (sd_version_is_sdxl (sd_ctx->sd ->version ) && negative_prompt.size () == 0 ) {
1399
1397
force_zero_embeddings = true ;
@@ -1739,6 +1737,14 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx,
1739
1737
1740
1738
sd_image_to_tensor (init_image.data , init_img);
1741
1739
1740
+ ggml_tensor* init_latent = NULL ;
1741
+ if (!sd_ctx->sd ->use_tiny_autoencoder ) {
1742
+ ggml_tensor* moments = sd_ctx->sd ->encode_first_stage (work_ctx, init_img);
1743
+ init_latent = sd_ctx->sd ->get_first_stage_encoding (work_ctx, moments);
1744
+ } else {
1745
+ init_latent = sd_ctx->sd ->encode_first_stage (work_ctx, init_img);
1746
+ }
1747
+
1742
1748
ggml_tensor* masked_image;
1743
1749
1744
1750
if (sd_version_is_inpaint (sd_ctx->sd ->version )) {
@@ -1786,12 +1792,7 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx,
1786
1792
}
1787
1793
} else if (sd_ctx->sd ->version == VERSION_INSTRUCT_PIX2PIX) {
1788
1794
// Not actually masked, we're just highjacking the masked_image variable since it will be used the same way
1789
- if (!sd_ctx->sd ->use_tiny_autoencoder ) {
1790
- ggml_tensor* moments = sd_ctx->sd ->encode_first_stage (work_ctx, init_img);
1791
- masked_image = sd_ctx->sd ->get_first_stage_encoding (work_ctx, moments);
1792
- } else {
1793
- masked_image = sd_ctx->sd ->encode_first_stage (work_ctx, init_img);
1794
- }
1795
+ masked_image = init_latent;
1795
1796
} else {
1796
1797
// LOG_WARN("Inpainting with a base model is not great");
1797
1798
masked_image = ggml_new_tensor_4d (work_ctx, GGML_TYPE_F32, width / 8 , height / 8 , 1 , 1 );
@@ -1805,14 +1806,6 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx,
1805
1806
}
1806
1807
}
1807
1808
1808
- ggml_tensor* init_latent = NULL ;
1809
- if (!sd_ctx->sd ->use_tiny_autoencoder ) {
1810
- ggml_tensor* moments = sd_ctx->sd ->encode_first_stage (work_ctx, init_img);
1811
- init_latent = sd_ctx->sd ->get_first_stage_encoding (work_ctx, moments);
1812
- } else {
1813
- init_latent = sd_ctx->sd ->encode_first_stage (work_ctx, init_img);
1814
- }
1815
-
1816
1809
print_ggml_tensor (init_latent, true );
1817
1810
size_t t1 = ggml_time_ms ();
1818
1811
LOG_INFO (" encode_first_stage completed, taking %.2fs" , (t1 - t0) * 1 .0f / 1000 );
0 commit comments