bayesflow-org
diff --git a/‎bayesflow/experimental/diffusion_model/__init__.py‎
Lines changed: 3 additions & 3 deletions b/‎bayesflow/experimental/diffusion_model/__init__.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎bayesflow/experimental/diffusion_model/diffusion_model.py‎
Lines changed: 139 additions & 76 deletions b/‎bayesflow/experimental/diffusion_model/diffusion_model.py‎
Lines changed: 139 additions & 76 deletions
diff --git a/‎bayesflow/experimental/diffusion_model/dispatch.py‎
Lines changed: 6 additions & 22 deletions b/‎bayesflow/experimental/diffusion_model/dispatch.py‎
Lines changed: 6 additions & 22 deletions
diff --git a/‎bayesflow/experimental/diffusion_model/schedules/__init__.py‎
Lines changed: 3 additions & 0 deletions b/‎bayesflow/experimental/diffusion_model/schedules/__init__.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎bayesflow/experimental/diffusion_model/cosine_noise_schedule.py‎ renamed to ‎bayesflow/experimental/diffusion_model/schedules/cosine_noise_schedule.py‎
Lines changed: 18 additions & 7 deletions b/‎bayesflow/experimental/diffusion_model/cosine_noise_schedule.py‎ renamed to ‎bayesflow/experimental/diffusion_model/schedules/cosine_noise_schedule.py‎
Lines changed: 18 additions & 7 deletions
diff --git a/‎bayesflow/experimental/diffusion_model/edm_noise_schedule.py‎ renamed to ‎bayesflow/experimental/diffusion_model/schedules/edm_noise_schedule.py‎
Lines changed: 12 additions & 11 deletions b/‎bayesflow/experimental/diffusion_model/edm_noise_schedule.py‎ renamed to ‎bayesflow/experimental/diffusion_model/schedules/edm_noise_schedule.py‎
Lines changed: 12 additions & 11 deletions
diff --git a/‎bayesflow/experimental/diffusion_model/noise_schedule.py‎ renamed to ‎bayesflow/experimental/diffusion_model/schedules/noise_schedule.py‎
Lines changed: 52 additions & 23 deletions b/‎bayesflow/experimental/diffusion_model/noise_schedule.py‎ renamed to ‎bayesflow/experimental/diffusion_model/schedules/noise_schedule.py‎
Lines changed: 52 additions & 23 deletions
diff --git a/‎bayesflow/experimental/free_form_flow/free_form_flow.py‎
Lines changed: 0 additions & 9 deletions b/‎bayesflow/experimental/free_form_flow/free_form_flow.py‎
Lines changed: 0 additions & 9 deletions
diff --git a/‎bayesflow/networks/consistency_models/consistency_model.py‎
Lines changed: 0 additions & 9 deletions b/‎bayesflow/networks/consistency_models/consistency_model.py‎
Lines changed: 0 additions & 9 deletions
@@ -1,7 +1,7 @@
 from .diffusion_model import DiffusionModel
-from .noise_schedule import NoiseSchedule
-from .cosine_noise_schedule import CosineNoiseSchedule
-from .edm_noise_schedule import EDMNoiseSchedule
+from bayesflow.experimental.diffusion_model.schedules import CosineNoiseSchedule
+from bayesflow.experimental.diffusion_model.schedules import EDMNoiseSchedule
+from bayesflow.experimental.diffusion_model.schedules import NoiseSchedule
 from .dispatch import find_noise_schedule
 
 from ...utils._docs import _add_imports_to_all
 
@@ -1,5 +1,6 @@
 from functools import singledispatch
-from .noise_schedule import NoiseSchedule
+
+from .schedules.noise_schedule import NoiseSchedule
 
 
 @singledispatch
@@ -16,34 +17,17 @@ def _(noise_schedule: NoiseSchedule):
 def _(name: str, *args, **kwargs):
     match name.lower():
         case "cosine":
-            from .cosine_noise_schedule import CosineNoiseSchedule
+            from .schedules import CosineNoiseSchedule
 
-            return CosineNoiseSchedule()
+            return CosineNoiseSchedule(*args, **kwargs)
         case "edm":
-            from .edm_noise_schedule import EDMNoiseSchedule
+            from .schedules import EDMNoiseSchedule
 
-            return EDMNoiseSchedule()
+            return EDMNoiseSchedule(*args, **kwargs)
         case other:
             raise ValueError(f"Unsupported noise schedule name: '{other}'.")
 
 
-@find_noise_schedule.register
-def _(config: dict, *args, **kwargs):
-    name = config.get("name", "").lower()
-    params = {k: v for k, v in config.items() if k != "name"}
-    match name:
-        case "cosine":
-            from .cosine_noise_schedule import CosineNoiseSchedule
-
-            return CosineNoiseSchedule(**params)
-        case "edm":
-            from .edm_noise_schedule import EDMNoiseSchedule
-
-            return EDMNoiseSchedule(**params)
-        case other:
-            raise ValueError(f"Unsupported noise schedule config: '{other}'.")
-
-
 @find_noise_schedule.register
 def _(cls: type, *args, **kwargs):
     if issubclass(cls, NoiseSchedule):
 
@@ -0,0 +1,3 @@
+from .noise_schedule import NoiseSchedule
+from .cosine_noise_schedule import CosineNoiseSchedule
+from .edm_noise_schedule import EDMNoiseSchedule
@@ -1,5 +1,5 @@
 import math
-from typing import Union, Literal
+from typing import Literal
 
 from keras import ops
 
@@ -14,7 +14,14 @@
 class CosineNoiseSchedule(NoiseSchedule):
     """Cosine noise schedule for diffusion models. This schedule is based on the cosine schedule from [1].
 
-    [1] Diffusion Models Beat GANs on Image Synthesis: Dhariwal and Nichol (2022)
+    A cosine schedule is a popular technique for controlling how the variance (noise level) or
+    learning rate evolves during the training of diffusion models. It was proposed as an improvement
+    over the original linear beta schedule in [2]
+
+    [1] Dhariwal, P., & Nichol, A. (2021). Diffusion models beat gans on image synthesis.
+    Advances in Neural Information Processing Systems, 34, 8780-8794.
+    [2] Ho, J., Jain, A., & Abbeel, P. (2020). Denoising diffusion probabilistic models.
+    Advances in Neural Information Processing Systems, 33, 6840-6851.
     """
 
     def __init__(
@@ -51,12 +58,12 @@ def __init__(
     def _truncated_t(self, t: Tensor) -> Tensor:
         return self._t_min + (self._t_max - self._t_min) * t
 
-    def get_log_snr(self, t: Union[float, Tensor], training: bool) -> Tensor:
+    def get_log_snr(self, t: Tensor | float, training: bool) -> Tensor:
         """Get the log signal-to-noise ratio (lambda) for a given diffusion time."""
         t_trunc = self._truncated_t(t)
         return -2 * ops.log(ops.tan(math.pi * t_trunc * 0.5)) + 2 * self._shift
 
-    def get_t_from_log_snr(self, log_snr_t: Union[Tensor, float], training: bool) -> Tensor:
+    def get_t_from_log_snr(self, log_snr_t: Tensor | float, training: bool) -> Tensor:
         """Get the diffusion time (t) from the log signal-to-noise ratio (lambda)."""
         # SNR = -2 * log(tan(pi*t/2)) => t = 2/pi * arctan(exp(-snr/2))
         return 2 / math.pi * ops.arctan(ops.exp((2 * self._shift - log_snr_t) * 0.5))
@@ -76,9 +83,13 @@ def derivative_log_snr(self, log_snr_t: Tensor, training: bool) -> Tensor:
         return -factor * dsnr_dt
 
     def get_config(self):
-        return dict(
-            min_log_snr=self.log_snr_min, max_log_snr=self.log_snr_max, shift=self._shift, weighting=self._weighting
-        )
+        config = {
+            "min_log_snr": self.log_snr_min,
+            "max_log_snr": self.log_snr_max,
+            "shift": self._shift,
+            "weighting": self._weighting,
+        }
+        return config
 
     @classmethod
     def from_config(cls, config, custom_objects=None):
 
@@ -1,5 +1,4 @@
 import math
-from typing import Union
 
 from keras import ops
 
@@ -15,7 +14,8 @@ class EDMNoiseSchedule(NoiseSchedule):
     """EDM noise schedule for diffusion models. This schedule is based on the EDM paper [1].
     This should be used with the F-prediction type in the diffusion model.
 
-    [1] Elucidating the Design Space of Diffusion-Based Generative Models: Karras et al. (2022)
+    [1] Karras, T., Aittala, M., Aila, T., & Laine, S. (2022). Elucidating the design space of diffusion-based
+    generative models. Advances in Neural Information Processing Systems, 35, 26565-26577.
     """
 
     def __init__(self, sigma_data: float = 1.0, sigma_min: float = 1e-4, sigma_max: float = 80.0):
@@ -26,7 +26,7 @@ def __init__(self, sigma_data: float = 1.0, sigma_min: float = 1e-4, sigma_max:
         ----------
         sigma_data : float, optional
             The standard deviation of the output distribution. Input of the network is scaled by this factor and
-            the weighting function is scaled by this factor as well.
+            the weighting function is scaled by this factor as well. Default is 1.0.
         sigma_min : float, optional
             The minimum noise level. Only relevant for sampling. Default is 1e-4.
         sigma_max : float, optional
@@ -50,21 +50,21 @@ def __init__(self, sigma_data: float = 1.0, sigma_min: float = 1e-4, sigma_max:
         self._log_snr_min_training = self.log_snr_min - 1  # one is never sampler during training
         self._log_snr_max_training = self.log_snr_max + 1  # 0 is almost surely never sampled during training
 
-    def get_log_snr(self, t: Union[float, Tensor], training: bool) -> Tensor:
+    def get_log_snr(self, t: float | Tensor, training: bool) -> Tensor:
         """Get the log signal-to-noise ratio (lambda) for a given diffusion time."""
         if training:
-            # SNR = -dist.icdf(t_trunc) # negative seems to be wrong in the paper in the Kingma paper
+            # SNR = -dist.icdf(t_trunc) # negative seems to be wrong in the Kingma paper
             loc = -2 * self.p_mean
             scale = 2 * self.p_std
             snr = loc + scale * ops.erfinv(2 * t - 1) * math.sqrt(2)
             snr = ops.clip(snr, x_min=self._log_snr_min_training, x_max=self._log_snr_max_training)
-        else:  # sampling
+        else:
             sigma_min_rho = self.sigma_min ** (1 / self.rho)
             sigma_max_rho = self.sigma_max ** (1 / self.rho)
             snr = -2 * self.rho * ops.log(sigma_max_rho + (1 - t) * (sigma_min_rho - sigma_max_rho))
         return snr
 
-    def get_t_from_log_snr(self, log_snr_t: Union[float, Tensor], training: bool) -> Tensor:
+    def get_t_from_log_snr(self, log_snr_t: float | Tensor, training: bool) -> Tensor:
         """Get the diffusion time (t) from the log signal-to-noise ratio (lambda)."""
         if training:
             # SNR = -dist.icdf(t_trunc) => t = dist.cdf(-snr)  # negative seems to be wrong in the Kingma paper
@@ -80,7 +80,7 @@ def get_t_from_log_snr(self, log_snr_t: Union[float, Tensor], training: bool) ->
             t = 1 - ((ops.exp(-log_snr_t / (2 * self.rho)) - sigma_max_rho) / (sigma_min_rho - sigma_max_rho))
         return t
 
-    def derivative_log_snr(self, log_snr_t: Tensor, training: bool) -> Tensor:
+    def derivative_log_snr(self, log_snr_t: Tensor, training: bool = False) -> Tensor:
         """Compute d/dt log(1 + e^(-snr(t))), which is used for the reverse SDE."""
         if training:
             raise NotImplementedError("Derivative of log SNR is not implemented for training mode.")
@@ -101,11 +101,12 @@ def derivative_log_snr(self, log_snr_t: Tensor, training: bool) -> Tensor:
 
     def get_weights_for_snr(self, log_snr_t: Tensor) -> Tensor:
         """Get weights for the signal-to-noise ratio (snr) for a given log signal-to-noise ratio (lambda)."""
-        # for F-prediction: w = (ops.exp(-log_snr_t) + sigma_data^2) / (ops.exp(-log_snr_t)*sigma_data^2)
-        return ops.exp(-log_snr_t) / ops.square(self.sigma_data) + 1
+        # for F-loss: w = (ops.exp(-log_snr_t) + sigma_data^2) / (ops.exp(-log_snr_t)*sigma_data^2)
+        return 1 + ops.exp(-log_snr_t) / ops.square(self.sigma_data)
 
     def get_config(self):
-        return dict(sigma_data=self.sigma_data, sigma_min=self.sigma_min, sigma_max=self.sigma_max)
+        config = {"sigma_data": self.sigma_data, "sigma_min": self.sigma_min, "sigma_max": self.sigma_max}
+        return config
 
     @classmethod
     def from_config(cls, config, custom_objects=None):
 
@@ -1,5 +1,5 @@
 from abc import ABC, abstractmethod
-from typing import Union, Literal
+from typing import Literal
 
 from keras import ops
 
@@ -33,7 +33,7 @@ def __init__(
         weighting: Literal["sigmoid", "likelihood_weighting"] = None,
     ):
         """
-        Initialize the noise schedule.
+        Initialize the noise schedule with given variance and weighting strategy.
 
         Parameters
         ----------
@@ -54,21 +54,23 @@ def __init__(
         self._weighting = weighting
 
     @abstractmethod
-    def get_log_snr(self, t: Union[float, Tensor], training: bool) -> Tensor:
+    def get_log_snr(self, t: float | Tensor, training: bool) -> Tensor:
         """Get the log signal-to-noise ratio (lambda) for a given diffusion time."""
         pass
 
     @abstractmethod
-    def get_t_from_log_snr(self, log_snr_t: Union[float, Tensor], training: bool) -> Tensor:
+    def get_t_from_log_snr(self, log_snr_t: float | Tensor, training: bool) -> Tensor:
         """Get the diffusion time (t) from the log signal-to-noise ratio (lambda)."""
         pass
 
     @abstractmethod
-    def derivative_log_snr(self, log_snr_t: Union[float, Tensor], training: bool) -> Tensor:
+    def derivative_log_snr(self, log_snr_t: float | Tensor, training: bool) -> Tensor:
         r"""Compute \beta(t) = d/dt log(1 + e^(-snr(t))). This is usually used for the reverse SDE."""
         pass
 
-    def get_drift_diffusion(self, log_snr_t: Tensor, x: Tensor = None, training: bool = False) -> tuple[Tensor, Tensor]:
+    def get_drift_diffusion(
+        self, log_snr_t: Tensor, x: Tensor = None, training: bool = False
+    ) -> Tensor | tuple[Tensor, Tensor]:
         r"""Compute the drift and optionally the squared diffusion term for the reverse SDE.
         It can be derived from the derivative of the schedule:
 
@@ -97,10 +99,10 @@ def get_drift_diffusion(self, log_snr_t: Tensor, x: Tensor = None, training: boo
             raise ValueError(f"Unknown variance type: {self._variance_type}")
         return f, beta
 
-    def get_alpha_sigma(self, log_snr_t: Tensor, training: bool) -> tuple[Tensor, Tensor]:
+    def get_alpha_sigma(self, log_snr_t: Tensor) -> tuple[Tensor, Tensor]:
         """Get alpha and sigma for a given log signal-to-noise ratio (lambda).
 
-        Default is a variance preserving schedule::
+        Default is a variance preserving schedule:
 
             alpha(t) = sqrt(sigmoid(log_snr_t))
             sigma(t) = sqrt(sigmoid(-log_snr_t))
@@ -120,9 +122,32 @@ def get_alpha_sigma(self, log_snr_t: Tensor, training: bool) -> tuple[Tensor, Te
         return alpha_t, sigma_t
 
     def get_weights_for_snr(self, log_snr_t: Tensor) -> Tensor:
-        """Get weights for the signal-to-noise ratio (snr) for a given log signal-to-noise ratio (lambda).
-        Default weighting is None, which means only ones are returned.
-        Generally, weighting functions should be defined for a noise prediction loss.
+        """
+        Compute loss weights based on log signal-to-noise ratio (log-SNR).
+
+        This method returns a tensor of weights used for loss re-weighting in diffusion models,
+        depending on the selected strategy. If no weighting is specified, uniform weights (ones)
+        are returned.
+
+        Supported weighting strategies:
+        - "sigmoid": Based on Kingma et al. (2023), uses a sigmoid of shifted log-SNR.
+        - "likelihood_weighting": Based on Song et al. (2021), uses ratio of diffusion drift
+          to squared noise scale.
+
+        Parameters
+        ----------
+        log_snr_t : Tensor
+            A tensor containing the log signal-to-noise ratio values.
+
+        Returns
+        -------
+        Tensor
+            A tensor of weights corresponding to each log-SNR value.
+
+        Raises
+        ------
+        TypeError
+            If the weighting strategy specified in `self._weighting` is unknown.
         """
         if self._weighting is None:
             return ops.ones_like(log_snr_t)
@@ -131,33 +156,37 @@ def get_weights_for_snr(self, log_snr_t: Tensor) -> Tensor:
             return ops.sigmoid(-log_snr_t + 2)
         elif self._weighting == "likelihood_weighting":
             # likelihood weighting based on Song et al. (2021)
-            g_squared = self.get_drift_diffusion(log_snr_t=log_snr_t)
-            sigma_t = self.get_alpha_sigma(log_snr_t=log_snr_t, training=True)[1]
+            g_squared = self.get_drift_diffusion(log_snr_t)
+            _, sigma_t = self.get_alpha_sigma(log_snr_t)
             return g_squared / ops.square(sigma_t)
         else:
             raise TypeError(f"Unknown weighting type: {self._weighting}")
 
     def get_config(self):
-        return dict(name=self.name, variance_type=self._variance_type, weighting=self._weighting)
+        return {"name": self.name, "variance_type": self._variance_type, "weighting": self._weighting}
 
     @classmethod
     def from_config(cls, config, custom_objects=None):
         return cls(**deserialize(config, custom_objects=custom_objects))
 
     def validate(self):
         """Validate the noise schedule."""
+
         if self.log_snr_min >= self.log_snr_max:
             raise ValueError("min_log_snr must be less than max_log_snr.")
-        for training in [True, False]:
+
+        # Validate log SNR values and corresponding time mappings for both training and inference
+        for training in (True, False):
             if not ops.isfinite(self.get_log_snr(0.0, training=training)):
-                raise ValueError(f"log_snr(0) must be finite with training={training}.")
+                raise ValueError(f"log_snr(0.0) must be finite (training={training})")
             if not ops.isfinite(self.get_log_snr(1.0, training=training)):
-                raise ValueError(f"log_snr(1) must be finite with training={training}.")
+                raise ValueError(f"log_snr(1.0) must be finite (training={training})")
             if not ops.isfinite(self.get_t_from_log_snr(self.log_snr_max, training=training)):
-                raise ValueError(f"t(0) must be finite with training={training}.")
+                raise ValueError(f"t(log_snr_max) must be finite (training={training})")
             if not ops.isfinite(self.get_t_from_log_snr(self.log_snr_min, training=training)):
-                raise ValueError(f"t(1) must be finite with training={training}.")
-        if not ops.isfinite(self.derivative_log_snr(self.log_snr_max, training=False)):
-            raise ValueError("dt/t log_snr(0) must be finite.")
-        if not ops.isfinite(self.derivative_log_snr(self.log_snr_min, training=False)):
-            raise ValueError("dt/t log_snr(1) must be finite.")
+                raise ValueError(f"t(log_snr_min) must be finite (training={training})")
+
+        # Validate log SNR derivatives at the boundaries
+        for boundary, name in [(self.log_snr_max, "log_snr_max (t=0)"), (self.log_snr_min, "log_snr_min (t=1)")]:
+            if not ops.isfinite(self.derivative_log_snr(boundary, training=False)):
+                raise ValueError(f"derivative_log_snr at {name} must be finite.")
@@ -1,8 +1,6 @@
 import keras
 from keras import ops
 
-import warnings
-
 from bayesflow.distributions import Distribution
 from bayesflow.types import Tensor
 from bayesflow.utils import (
@@ -86,13 +84,6 @@ def __init__(
         """
         super().__init__(base_distribution, **kwargs)
 
-        if encoder_subnet_kwargs or decoder_subnet_kwargs:
-            warnings.warn(
-                "Using `subnet_kwargs` is deprecated."
-                "Instead, instantiate the network yourself and pass the arguments directly.",
-                DeprecationWarning,
-            )
-
         encoder_subnet_kwargs = encoder_subnet_kwargs or {}
         decoder_subnet_kwargs = decoder_subnet_kwargs or {}
 
 
@@ -3,8 +3,6 @@
 
 import numpy as np
 
-import warnings
-
 from bayesflow.types import Tensor
 from bayesflow.utils import find_network, layer_kwargs, weighted_mean
 from bayesflow.utils.serialization import deserialize, serializable, serialize
@@ -76,13 +74,6 @@ def __init__(
 
         self.total_steps = float(total_steps)
 
-        if subnet_kwargs:
-            warnings.warn(
-                "Using `subnet_kwargs` is deprecated."
-                "Instead, instantiate the network yourself and pass the arguments directly.",
-                DeprecationWarning,
-            )
-
         subnet_kwargs = subnet_kwargs or {}
         if subnet == "mlp":
             subnet_kwargs = ConsistencyModel.MLP_DEFAULT_CONFIG | subnet_kwargs
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+from .noise_schedule import NoiseSchedule`
	`2`	`+from .cosine_noise_schedule import CosineNoiseSchedule`
	`3`	`+from .edm_noise_schedule import EDMNoiseSchedule`