Merge pull request #495 from Project-MONAI/494-clipping-min-and-max-values-on-scheduler

ericspod · web-flow · commit 5672a9037d69 · 2024-06-21T10:43:02.000+01:00
Allowing sample clipping values besides clip_sample True/False
diff --git a/generative/networks/nets/patchgan_discriminator.py b/generative/networks/nets/patchgan_discriminator.py
@@ -87,7 +87,8 @@ def __init__(
             pool = None
         else:
             pool = get_pool_layer(
-                (pooling_method, {"kernel_size": kernel_size, "stride": 2, 'padding': self.padding}), spatial_dims=spatial_dims
+                (pooling_method, {"kernel_size": kernel_size, "stride": 2, "padding": self.padding}),
+                spatial_dims=spatial_dims,
             )
         self.num_channels = num_channels
         for i_ in range(self.num_d):
diff --git a/generative/networks/nets/vqvae.py b/generative/networks/nets/vqvae.py
@@ -17,7 +17,7 @@
 import torch.nn as nn
 from monai.networks.blocks import Convolution
 from monai.networks.layers import Act
-from monai.utils import ensure_tuple_rep
+from monai.utils.misc import ensure_tuple_rep
 
 from generative.networks.layers.vector_quantizer import EMAQuantizer, VectorQuantizer
 
diff --git a/generative/networks/schedulers/ddim.py b/generative/networks/schedulers/ddim.py
@@ -70,6 +70,8 @@ class DDIMScheduler(Scheduler):
             `set_alpha_to_one=False`, to make the last step use step 0 for the previous alpha product, as done in
             stable diffusion.
         prediction_type: member of DDPMPredictionType
+        clip_sample_min: if clip_sample is True, minimum value to clamp the prediction by.
+        clip_sample_max: if clip_sample is False, maximum value to clamp the prediction by.
         schedule_args: arguments to pass to the schedule function
 
     """
@@ -82,13 +84,18 @@ def __init__(
         set_alpha_to_one: bool = True,
         steps_offset: int = 0,
         prediction_type: str = DDIMPredictionType.EPSILON,
+        clip_sample_min: int = -1,
+        clip_sample_max: int = 1,
         **schedule_args,
     ) -> None:
         super().__init__(num_train_timesteps, schedule, **schedule_args)
 
         if prediction_type not in DDIMPredictionType.__members__.values():
             raise ValueError("Argument `prediction_type` must be a member of DDIMPredictionType")
 
+        if clip_sample_min >= clip_sample_max:
+            raise ValueError("clip_sample_min must be < clip_sample_max")
+
         self.prediction_type = prediction_type
 
         # At every step in ddim, we are looking into the previous alphas_cumprod
@@ -107,6 +114,7 @@ def __init__(
         self.timesteps = torch.from_numpy(np.arange(0, self.num_train_timesteps)[::-1].astype(np.int64))
 
         self.clip_sample = clip_sample
+        self.clip_sample_values = [clip_sample_min, clip_sample_max]
         self.steps_offset = steps_offset
 
         # default the number of inference timesteps to the number of train steps
@@ -203,7 +211,9 @@ def step(
 
         # 4. Clip "predicted x_0"
         if self.clip_sample:
-            pred_original_sample = torch.clamp(pred_original_sample, -1, 1)
+            pred_original_sample = torch.clamp(
+                pred_original_sample, self.clip_sample_values[0], self.clip_sample_values[1]
+            )
 
         # 5. compute variance: "sigma_t(η)" -> see formula (16)
         # σ_t = sqrt((1 − α_t−1)/(1 − α_t)) * sqrt(1 − α_t/α_t−1)
@@ -278,7 +288,9 @@ def reversed_step(
 
         # 4. Clip "predicted x_0"
         if self.clip_sample:
-            pred_original_sample = torch.clamp(pred_original_sample, -1, 1)
+            pred_original_sample = torch.clamp(
+                pred_original_sample, self.clip_sample_values[0], self.clip_sample_values[1]
+            )
 
         # 5. compute "direction pointing to x_t" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf
         pred_sample_direction = (1 - alpha_prod_t_next) ** (0.5) * pred_epsilon
diff --git a/generative/networks/schedulers/ddpm.py b/generative/networks/schedulers/ddpm.py
@@ -76,6 +76,8 @@ class DDPMScheduler(Scheduler):
         variance_type: member of DDPMVarianceType
         clip_sample: option to clip predicted sample between -1 and 1 for numerical stability.
         prediction_type: member of DDPMPredictionType
+        clip_sample_min: if clip_sample is True, minimum value to clamp the prediction by.
+        clip_sample_max: if clip_sample is False, maximum value to clamp the prediction by.
         schedule_args: arguments to pass to the schedule function
     """
 
@@ -86,6 +88,8 @@ def __init__(
         variance_type: str = DDPMVarianceType.FIXED_SMALL,
         clip_sample: bool = True,
         prediction_type: str = DDPMPredictionType.EPSILON,
+        clip_sample_min: int = -1,
+        clip_sample_max: int = 1,
         **schedule_args,
     ) -> None:
         super().__init__(num_train_timesteps, schedule, **schedule_args)
@@ -96,9 +100,13 @@ def __init__(
         if prediction_type not in DDPMPredictionType.__members__.values():
             raise ValueError("Argument `prediction_type` must be a member of `DDPMPredictionType`")
 
+        if clip_sample_min >= clip_sample_max:
+            raise ValueError("clip_sample_min must be < clip_sample_max")
+
         self.clip_sample = clip_sample
         self.variance_type = variance_type
         self.prediction_type = prediction_type
+        self.clip_sample_values = [clip_sample_min, clip_sample_max]
 
     def set_timesteps(self, num_inference_steps: int, device: str | torch.device | None = None) -> None:
         """
@@ -218,7 +226,9 @@ def step(
 
         # 3. Clip "predicted x_0"
         if self.clip_sample:
-            pred_original_sample = torch.clamp(pred_original_sample, -1, 1)
+            pred_original_sample = torch.clamp(
+                pred_original_sample, self.clip_sample_values[0], self.clip_sample_values[1]
+            )
 
         # 4. Compute coefficients for pred_original_sample x_0 and current sample x_t
         # See formula (7) from https://arxiv.org/pdf/2006.11239.pdf