@@ -346,6 +346,38 @@ struct CompVisVDenoiser : public CompVisDenoiser {
346
346
}
347
347
};
348
348
349
+ struct CompVisEDMVDenoiser : public CompVisVDenoiser {
350
+ float sigmas[TIMESTEPS];
351
+ float min_sigma = 0.002 ;
352
+ float max_sigma = 120.0 ;
353
+ float sigma_data = 1 ;
354
+ std::shared_ptr<SigmaSchedule> schedule = std::make_shared<ExponentialSchedule>();
355
+
356
+ CompVisEDMVDenoiser (float min_sigma = 0.002 , float max_sigma = 120.0 , float sigma_data = 1 ) : min_sigma(min_sigma), max_sigma(max_sigma), sigma_data(sigma_data) {
357
+ }
358
+
359
+ float t_to_sigma (float t) {
360
+ return std::exp (t * 4 );
361
+ }
362
+
363
+ float sigma_to_t (float s) {
364
+ return 0.25 * std::log (s);
365
+ }
366
+
367
+ float sigma_min () {
368
+ return min_sigma;
369
+ }
370
+
371
+ float sigma_max () {
372
+ return max_sigma;
373
+ }
374
+
375
+ std::vector<float > get_sigmas (uint32_t n) {
376
+ auto bound_t_to_sigma = std::bind (&Denoiser::t_to_sigma, this , std::placeholders::_1);
377
+ return schedule->get_sigmas (n, sigma_min (), sigma_max (), bound_t_to_sigma);
378
+ }
379
+ };
380
+
349
381
float time_snr_shift (float alpha, float t) {
350
382
if (alpha == 1 .0f ) {
351
383
return t;
@@ -1019,7 +1051,7 @@ static void sample_k_diffusion(sample_method_t method,
1019
1051
// also needed to invert the behavior of CompVisDenoiser
1020
1052
// (k-diffusion's LMSDiscreteScheduler)
1021
1053
float beta_start = 0 .00085f ;
1022
- float beta_end = 0 .0120f ;
1054
+ float beta_end = 0 .0120f ;
1023
1055
std::vector<double > alphas_cumprod;
1024
1056
std::vector<double > compvis_sigmas;
1025
1057
@@ -1030,8 +1062,9 @@ static void sample_k_diffusion(sample_method_t method,
1030
1062
(i == 0 ? 1 .0f : alphas_cumprod[i - 1 ]) *
1031
1063
(1 .0f -
1032
1064
std::pow (sqrtf (beta_start) +
1033
- (sqrtf (beta_end) - sqrtf (beta_start)) *
1034
- ((float )i / (TIMESTEPS - 1 )), 2 ));
1065
+ (sqrtf (beta_end) - sqrtf (beta_start)) *
1066
+ ((float )i / (TIMESTEPS - 1 )),
1067
+ 2 ));
1035
1068
compvis_sigmas[i] =
1036
1069
std::sqrt ((1 - alphas_cumprod[i]) /
1037
1070
alphas_cumprod[i]);
@@ -1061,7 +1094,8 @@ static void sample_k_diffusion(sample_method_t method,
1061
1094
// - pred_prev_sample -> "x_t-1"
1062
1095
int timestep =
1063
1096
roundf (TIMESTEPS -
1064
- i * ((float )TIMESTEPS / steps)) - 1 ;
1097
+ i * ((float )TIMESTEPS / steps)) -
1098
+ 1 ;
1065
1099
// 1. get previous step value (=t-1)
1066
1100
int prev_timestep = timestep - TIMESTEPS / steps;
1067
1101
// The sigma here is chosen to cause the
@@ -1086,10 +1120,9 @@ static void sample_k_diffusion(sample_method_t method,
1086
1120
float * vec_x = (float *)x->data ;
1087
1121
for (int j = 0 ; j < ggml_nelements (x); j++) {
1088
1122
vec_x[j] *= std::sqrt (sigma * sigma + 1 ) /
1089
- sigma;
1123
+ sigma;
1090
1124
}
1091
- }
1092
- else {
1125
+ } else {
1093
1126
// For the subsequent steps after the first one,
1094
1127
// at this point x = latents or x = sample, and
1095
1128
// needs to be prescaled with x <- sample / c_in
@@ -1127,9 +1160,8 @@ static void sample_k_diffusion(sample_method_t method,
1127
1160
float alpha_prod_t = alphas_cumprod[timestep];
1128
1161
// Note final_alpha_cumprod = alphas_cumprod[0] due to
1129
1162
// trailing timestep spacing
1130
- float alpha_prod_t_prev = prev_timestep >= 0 ?
1131
- alphas_cumprod[prev_timestep] : alphas_cumprod[0 ];
1132
- float beta_prod_t = 1 - alpha_prod_t ;
1163
+ float alpha_prod_t_prev = prev_timestep >= 0 ? alphas_cumprod[prev_timestep] : alphas_cumprod[0 ];
1164
+ float beta_prod_t = 1 - alpha_prod_t ;
1133
1165
// 3. compute predicted original sample from predicted
1134
1166
// noise also called "predicted x_0" of formula (12)
1135
1167
// from https://arxiv.org/pdf/2010.02502.pdf
@@ -1145,7 +1177,7 @@ static void sample_k_diffusion(sample_method_t method,
1145
1177
vec_pred_original_sample[j] =
1146
1178
(vec_x[j] / std::sqrt (sigma * sigma + 1 ) -
1147
1179
std::sqrt (beta_prod_t ) *
1148
- vec_model_output[j]) *
1180
+ vec_model_output[j]) *
1149
1181
(1 / std::sqrt (alpha_prod_t ));
1150
1182
}
1151
1183
}
@@ -1159,8 +1191,8 @@ static void sample_k_diffusion(sample_method_t method,
1159
1191
// sigma_t = sqrt((1 - alpha_t-1)/(1 - alpha_t)) *
1160
1192
// sqrt(1 - alpha_t/alpha_t-1)
1161
1193
float beta_prod_t_prev = 1 - alpha_prod_t_prev;
1162
- float variance = (beta_prod_t_prev / beta_prod_t ) *
1163
- (1 - alpha_prod_t / alpha_prod_t_prev);
1194
+ float variance = (beta_prod_t_prev / beta_prod_t ) *
1195
+ (1 - alpha_prod_t / alpha_prod_t_prev);
1164
1196
float std_dev_t = eta * std::sqrt (variance);
1165
1197
// 6. compute "direction pointing to x_t" of formula
1166
1198
// (12) from https://arxiv.org/pdf/2010.02502.pdf
@@ -1179,8 +1211,8 @@ static void sample_k_diffusion(sample_method_t method,
1179
1211
std::pow (std_dev_t , 2 )) *
1180
1212
vec_model_output[j];
1181
1213
vec_x[j] = std::sqrt (alpha_prod_t_prev) *
1182
- vec_pred_original_sample[j] +
1183
- pred_sample_direction;
1214
+ vec_pred_original_sample[j] +
1215
+ pred_sample_direction;
1184
1216
}
1185
1217
}
1186
1218
if (eta > 0 ) {
@@ -1208,7 +1240,7 @@ static void sample_k_diffusion(sample_method_t method,
1208
1240
// by Semi-Linear Consistency Function with Trajectory
1209
1241
// Mapping", arXiv:2402.19159 [cs.CV]
1210
1242
float beta_start = 0 .00085f ;
1211
- float beta_end = 0 .0120f ;
1243
+ float beta_end = 0 .0120f ;
1212
1244
std::vector<double > alphas_cumprod;
1213
1245
std::vector<double > compvis_sigmas;
1214
1246
@@ -1219,8 +1251,9 @@ static void sample_k_diffusion(sample_method_t method,
1219
1251
(i == 0 ? 1 .0f : alphas_cumprod[i - 1 ]) *
1220
1252
(1 .0f -
1221
1253
std::pow (sqrtf (beta_start) +
1222
- (sqrtf (beta_end) - sqrtf (beta_start)) *
1223
- ((float )i / (TIMESTEPS - 1 )), 2 ));
1254
+ (sqrtf (beta_end) - sqrtf (beta_start)) *
1255
+ ((float )i / (TIMESTEPS - 1 )),
1256
+ 2 ));
1224
1257
compvis_sigmas[i] =
1225
1258
std::sqrt ((1 - alphas_cumprod[i]) /
1226
1259
alphas_cumprod[i]);
@@ -1235,13 +1268,10 @@ static void sample_k_diffusion(sample_method_t method,
1235
1268
for (int i = 0 ; i < steps; i++) {
1236
1269
// Analytic form for TCD timesteps
1237
1270
int timestep = TIMESTEPS - 1 -
1238
- (TIMESTEPS / original_steps) *
1239
- (int )floor (i * ((float )original_steps / steps));
1271
+ (TIMESTEPS / original_steps) *
1272
+ (int )floor (i * ((float )original_steps / steps));
1240
1273
// 1. get previous step value
1241
- int prev_timestep = i >= steps - 1 ? 0 :
1242
- TIMESTEPS - 1 - (TIMESTEPS / original_steps) *
1243
- (int )floor ((i + 1 ) *
1244
- ((float )original_steps / steps));
1274
+ int prev_timestep = i >= steps - 1 ? 0 : TIMESTEPS - 1 - (TIMESTEPS / original_steps) * (int )floor ((i + 1 ) * ((float )original_steps / steps));
1245
1275
// Here timestep_s is tau_n' in Algorithm 4. The _s
1246
1276
// notation appears to be that from C. Lu,
1247
1277
// "DPM-Solver: A Fast ODE Solver for Diffusion
@@ -1258,10 +1288,9 @@ static void sample_k_diffusion(sample_method_t method,
1258
1288
float * vec_x = (float *)x->data ;
1259
1289
for (int j = 0 ; j < ggml_nelements (x); j++) {
1260
1290
vec_x[j] *= std::sqrt (sigma * sigma + 1 ) /
1261
- sigma;
1291
+ sigma;
1262
1292
}
1263
- }
1264
- else {
1293
+ } else {
1265
1294
float * vec_x = (float *)x->data ;
1266
1295
for (int j = 0 ; j < ggml_nelements (x); j++) {
1267
1296
vec_x[j] *= std::sqrt (sigma * sigma + 1 );
@@ -1294,15 +1323,14 @@ static void sample_k_diffusion(sample_method_t method,
1294
1323
// DPM-Solver. In fact, we have alpha_{t_n} =
1295
1324
// \sqrt{\hat{alpha_n}}, [...]"
1296
1325
float alpha_prod_t = alphas_cumprod[timestep];
1297
- float beta_prod_t = 1 - alpha_prod_t ;
1326
+ float beta_prod_t = 1 - alpha_prod_t ;
1298
1327
// Note final_alpha_cumprod = alphas_cumprod[0] since
1299
1328
// TCD is always "trailing"
1300
- float alpha_prod_t_prev = prev_timestep >= 0 ?
1301
- alphas_cumprod[prev_timestep] : alphas_cumprod[0 ];
1329
+ float alpha_prod_t_prev = prev_timestep >= 0 ? alphas_cumprod[prev_timestep] : alphas_cumprod[0 ];
1302
1330
// The subscript _s are the only portion in this
1303
1331
// section (2) unique to TCD
1304
1332
float alpha_prod_s = alphas_cumprod[timestep_s];
1305
- float beta_prod_s = 1 - alpha_prod_s;
1333
+ float beta_prod_s = 1 - alpha_prod_s;
1306
1334
// 3. Compute the predicted noised sample x_s based on
1307
1335
// the model parameterization
1308
1336
//
@@ -1317,7 +1345,7 @@ static void sample_k_diffusion(sample_method_t method,
1317
1345
vec_pred_original_sample[j] =
1318
1346
(vec_x[j] / std::sqrt (sigma * sigma + 1 ) -
1319
1347
std::sqrt (beta_prod_t ) *
1320
- vec_model_output[j]) *
1348
+ vec_model_output[j]) *
1321
1349
(1 / std::sqrt (alpha_prod_t ));
1322
1350
}
1323
1351
}
@@ -1339,9 +1367,9 @@ static void sample_k_diffusion(sample_method_t method,
1339
1367
// pred_epsilon = model_output
1340
1368
vec_x[j] =
1341
1369
std::sqrt (alpha_prod_s) *
1342
- vec_pred_original_sample[j] +
1370
+ vec_pred_original_sample[j] +
1343
1371
std::sqrt (beta_prod_s) *
1344
- vec_model_output[j];
1372
+ vec_model_output[j];
1345
1373
}
1346
1374
}
1347
1375
// 4. Sample and inject noise z ~ N(0, I) for
@@ -1357,7 +1385,7 @@ static void sample_k_diffusion(sample_method_t method,
1357
1385
// In this case, x is still pred_noised_sample,
1358
1386
// continue in-place
1359
1387
ggml_tensor_set_f32_randn (noise, rng);
1360
- float * vec_x = (float *)x->data ;
1388
+ float * vec_x = (float *)x->data ;
1361
1389
float * vec_noise = (float *)noise->data ;
1362
1390
for (int j = 0 ; j < ggml_nelements (x); j++) {
1363
1391
// Corresponding to (35) in Zheng et
@@ -1366,10 +1394,10 @@ static void sample_k_diffusion(sample_method_t method,
1366
1394
vec_x[j] =
1367
1395
std::sqrt (alpha_prod_t_prev /
1368
1396
alpha_prod_s) *
1369
- vec_x[j] +
1397
+ vec_x[j] +
1370
1398
std::sqrt (1 - alpha_prod_t_prev /
1371
- alpha_prod_s) *
1372
- vec_noise[j];
1399
+ alpha_prod_s) *
1400
+ vec_noise[j];
1373
1401
}
1374
1402
}
1375
1403
}
0 commit comments