bayesflow-org
diff --git a/‎bayesflow/networks/coupling_flow/coupling_flow.py
Lines changed: 17 additions & 12 deletions b/‎bayesflow/networks/coupling_flow/coupling_flow.py
Lines changed: 17 additions & 12 deletions
diff --git a/‎bayesflow/networks/flow_matching/__init__.py
Lines changed: 0 additions & 1 deletion b/‎bayesflow/networks/flow_matching/__init__.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎bayesflow/networks/flow_matching/flow_matching.py
Lines changed: 113 additions & 155 deletions b/‎bayesflow/networks/flow_matching/flow_matching.py
Lines changed: 113 additions & 155 deletions
diff --git a/‎bayesflow/utils/__init__.py
Lines changed: 2 additions & 0 deletions b/‎bayesflow/utils/__init__.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎bayesflow/utils/jacobian_trace/jacobian_trace.py
Lines changed: 5 additions & 2 deletions b/‎bayesflow/utils/jacobian_trace/jacobian_trace.py
Lines changed: 5 additions & 2 deletions
diff --git a/‎bayesflow/utils/optimal_transport/__init__.py
Lines changed: 1 addition & 0 deletions b/‎bayesflow/utils/optimal_transport/__init__.py
Lines changed: 1 addition & 0 deletions
@@ -50,13 +50,13 @@ def __init__(
 
         self.invertible_layers = []
         for i in range(depth):
-            if (p := find_permutation(permutation, **kwargs)) is not None:
+            if (p := find_permutation(permutation, **kwargs.get("permutation_kwargs", {}))) is not None:
                 self.invertible_layers.append(p)
 
-            self.invertible_layers.append(DualCoupling(subnet, transform, **kwargs))
+            self.invertible_layers.append(DualCoupling(subnet, transform, **kwargs.get("coupling_kwargs", {})))
 
             if use_actnorm:
-                self.invertible_layers.append(ActNorm(**kwargs))
+                self.invertible_layers.append(ActNorm(**kwargs.get("actnorm_kwargs", {})))
 
     # noinspection PyMethodOverriding
     def build(self, xz_shape, conditions_shape=None):
@@ -79,37 +79,42 @@ def call(
         return self._forward(xz, conditions=conditions, **kwargs)
 
     def _forward(
-        self, x: Tensor, conditions: Tensor = None, jacobian: bool = False, **kwargs
+        self, x: Tensor, conditions: Tensor = None, density: bool = False, **kwargs
     ) -> Tensor | tuple[Tensor, Tensor]:
         z = x
         log_det = keras.ops.zeros(keras.ops.shape(x)[:-1])
         for layer in self.invertible_layers:
             z, det = layer(z, conditions=conditions, inverse=False, **kwargs)
             log_det += det
 
-        if jacobian:
-            return z, log_det
+        if density:
+            log_prob = self.base_distribution.log_prob(z)
+            log_density = log_prob + log_det
+            return z, log_density
+
         return z
 
     def _inverse(
-        self, z: Tensor, conditions: Tensor = None, jacobian: bool = False, **kwargs
+        self, z: Tensor, conditions: Tensor = None, density: bool = False, **kwargs
     ) -> Tensor | tuple[Tensor, Tensor]:
         x = z
         log_det = keras.ops.zeros(keras.ops.shape(z)[:-1])
         for layer in reversed(self.invertible_layers):
             x, det = layer(x, conditions=conditions, inverse=True, **kwargs)
             log_det += det
 
-        if jacobian:
-            return x, log_det
+        if density:
+            log_prob = self.base_distribution.log_prob(z)
+            log_density = log_prob - log_det
+            return x, log_density
+
         return x
 
     def compute_metrics(self, data: dict[str, Tensor], stage: str = "training") -> dict[str, Tensor]:
         inference_variables = data["inference_variables"]
         inference_conditions = data.get("inference_conditions")
 
-        z, log_det = self(inference_variables, conditions=inference_conditions, inverse=False, jacobian=True)
-        log_prob = self.base_distribution.log_prob(z)
-        loss = -keras.ops.mean(log_prob + log_det)
+        z, log_density = self(inference_variables, conditions=inference_conditions, inverse=False, density=True)
+        loss = -keras.ops.mean(log_density)
 
         return {"loss": loss}
@@ -1,2 +1 @@
-
 from .flow_matching import FlowMatching
@@ -1,182 +1,140 @@
-
-from typing import Tuple, Union
-
 import keras
 from keras.saving import (
     register_keras_serializable,
 )
-from scipy.integrate import solve_ivp
 
 from bayesflow.types import Tensor
-from bayesflow.utils import find_network, keras_kwargs
+from bayesflow.utils import find_network, jacobian_trace, keras_kwargs, optimal_transport
 
 from ..inference_network import InferenceNetwork
 
 
 @register_keras_serializable(package="bayesflow.networks")
 class FlowMatching(InferenceNetwork):
-    def __init__(self, network: str = "resnet", **kwargs):
-        super().__init__(**keras_kwargs(kwargs))
-        self.network = find_network(network, **kwargs)
+    def __init__(self, subnet: str = "resnet", base_distribution: str = "normal", **kwargs):
+        super().__init__(base_distribution=base_distribution, **keras_kwargs(kwargs))
+        self.subnet = find_network(subnet, **kwargs.get("subnet_kwargs", {}))
+
+        output_projector_kwargs = kwargs.get("output_projector_kwargs", {})
+        output_projector_kwargs.setdefault("bias_initializer", "zeros")
+        self.output_projector = keras.layers.Dense(None, **output_projector_kwargs)
+
+    def build(self, xz_shape, conditions_shape=None):
+        super().build(xz_shape)
+
+        self.output_projector.units = xz_shape[-1]
+
+        xz = keras.ops.zeros(xz_shape)
+        if conditions_shape is None:
+            conditions = None
+        else:
+            conditions = keras.ops.zeros(conditions_shape)
+
+        self.call(xz, conditions=conditions, steps=1)
+
+    def call(
+        self,
+        xz: Tensor,
+        conditions: Tensor = None,
+        inverse: bool = False,
+        **kwargs,
+    ):
+        if inverse:
+            return self._inverse(xz, conditions=conditions, **kwargs)
+        return self._forward(xz, conditions=conditions, **kwargs)
+
+    def velocity(self, x: Tensor, t: int | float | Tensor, conditions: Tensor = None) -> Tensor:
+        t = keras.ops.convert_to_tensor(t, dtype=x.dtype)
+        match keras.ops.ndim(t):
+            case 0:
+                t = keras.ops.full((keras.ops.shape(x)[0], 1), t, dtype=x.dtype)
+            case 1:
+                t = keras.ops.expand_dims(t, 1)
 
-    def velocity(self, x: Tensor, t: Tensor, conditions: any = None):
         if conditions is None:
-            xtc = keras.ops.concatenate([x, t], axis=1)
+            xtc = keras.ops.concatenate([x, t], axis=-1)
         else:
-            xtc = keras.ops.concatenate([x, t, conditions], axis=1)
+            xtc = keras.ops.concatenate([x, t, conditions], axis=-1)
 
-        return self.network(xtc)
+        return self.output_projector(self.subnet(xtc))
 
-    def _forward(self, x: Tensor, conditions: any = None, jacobian: bool = False, steps: int = 100, method: str = "RK45") -> Union[Tensor, Tuple[Tensor, Tensor]]:
-        def dfdt(t: float, x: Tensor):
-            t = keras.ops.full((keras.ops.shape(x)[0], 1), t)
-            return self.velocity(x, t, conditions)
+    def _forward(
+        self, x: Tensor, conditions: Tensor = None, density: bool = False, **kwargs
+    ) -> Tensor | tuple[Tensor, Tensor]:
+        steps = kwargs.get("steps", 100)
+        z = keras.ops.copy(x)
+        t = keras.ops.ones((keras.ops.shape(x)[0], 1), dtype=x.dtype)
+        dt = -1.0 / steps
 
-        return solve_ivp(dfdt, t_span=(1.0, 0.0), y0=x, method=method, vectorized=True)[1]
+        if density:
+            trace = keras.ops.zeros(keras.ops.shape(x)[0], dtype=x.dtype)
+
+            def f(arg):
+                return self.velocity(arg, t, conditions)
+
+            for _ in range(steps):
+                v, tr = jacobian_trace(f, z, kwargs.get("trace_samples", 100))
+                z += dt * v
+                trace += dt * tr
+
+            log_prob = self.base_distribution.log_prob(z)
+
+            log_density = log_prob + trace
+
+            return z, log_density
+        else:
+            for _ in range(steps):
+                v = self.velocity(z, t, conditions)
+                z += dt * v
 
-    def _inverse(self, z: Tensor, conditions: any = None, jacobian: bool = False, steps: int = 100, method: str = "RK45") -> Union[Tensor, Tuple[Tensor, Tensor]]:
-        def dfdt(t: float, x: Tensor):
-            t = keras.ops.full((keras.ops.shape(x)[0], 1), t)
-            return self.velocity(x, t, conditions)
+            return z
 
-        return solve_ivp(dfdt, t_span=(0.0, 1.0), y0=z, method=method, vectorized=True)[1]
+    def _inverse(
+        self, z: Tensor, conditions: Tensor = None, density: bool = False, **kwargs
+    ) -> Tensor | tuple[Tensor, Tensor]:
+        steps = kwargs.get("steps", 100)
+        x = keras.ops.copy(z)
+        t = keras.ops.zeros((keras.ops.shape(x)[0], 1), dtype=x.dtype)
+        dt = 1.0 / steps
+
+        if density:
+            trace = keras.ops.zeros(keras.ops.shape(x)[0], dtype=x.dtype)
+
+            def f(arg):
+                return self.velocity(arg, t, conditions)
+
+            for _ in range(steps):
+                v, tr = jacobian_trace(f, x, kwargs.get("trace_samples", 100))
+                x += dt * v
+                trace += dt * tr
+
+            log_prob = self.base_distribution.log_prob(z)
+
+            log_density = log_prob - trace
+
+            return x, log_density
+        else:
+            for _ in range(steps):
+                v = self.velocity(x, t, conditions)
+                x += dt * v
+
+            return x
+
+    def compute_metrics(self, data: dict[str, Tensor], stage: str = "training") -> dict[str, Tensor]:
+        x1 = data["inference_variables"]
+        c = data.get("inference_conditions")
+
+        x0 = self.base_distribution.sample(keras.ops.shape(x1))
+
+        x0, x1 = optimal_transport(x0, x1)
 
-    def compute_loss(self, x=None, **kwargs):
-        x0, x1, *conditions = x
         t = keras.random.uniform((keras.ops.shape(x0)[0], 1))
 
         x = t * x1 + (1 - t) * x0
-        xtc = keras.ops.concatenate([x, t, *conditions], axis=-1)
 
-        predicted_velocity = self.network(xtc)
+        predicted_velocity = self.velocity(x, t, c)
         target_velocity = x1 - x0
 
-        return keras.losses.mean_squared_error(predicted_velocity, target_velocity)
-
-
-# @register_keras_serializable(package="bayesflow.networks")
-# class FlowMatching(InferenceNetwork):
-#     def __init__(self, network: keras.Layer, **kwargs):
-#         super().__init__(**kwargs)
-#         self.network = network
-#
-#     @classmethod
-#     def new(cls, network: str = "resnet", base_distribution: str = "normal"):
-#         # TODO: we probably want to provide a factory method like this, since the other networks use it
-#         #  for high-level input parameters
-#         # network = find_network(network)
-#         return cls(network, base_distribution=base_distribution)
-#
-#     @classmethod
-#     def from_config(cls, config: dict, custom_objects=None) -> "FlowMatching":
-#         # TODO: the base distribution must be savable and loadable
-#         #  ideally we also don't want to have to manually deserialize it in every subclass of InferenceNetwork
-#         base_distribution = deserialize_keras_object(config.pop("base_distribution"))
-#         network = deserialize_keras_object(config.pop("network"))
-#         return cls(network, base_distribution=base_distribution, **config)
-#
-#     def get_config(self) -> dict:
-#         base_config = super().get_config()
-#         config = {"network": serialize_keras_object(self.network)}
-#         return base_config | config
-#
-#     def build(self, input_shape):
-#         self.network.build(input_shape)
-#
-#     def _forward(self, x: Tensor, conditions: any = None, jacobian: bool = False, steps: int = 100, method: str = "RK45") -> Union[Tensor, Tuple[Tensor, Tensor]]:
-#         # implement conditions = None and jacobian = False first
-#         # then work your way up
-#         raise NotImplementedError
-#
-#     def _inverse(self, z: Tensor, conditions: any = None, jacobian: bool = False, steps: int = 100, method: str = "RK45") -> Union[Tensor, Tuple[Tensor, Tensor]]:
-#         raise NotImplementedError
-#
-#     def compute_loss(self, x=None, **kwargs):
-#         # x should ideally contain both x0 and x1,
-#         # where the optimal transport matching already happened in the worker process
-#         # this is possible, but might not be super user-friendly. We will have to see.
-#         x0, x1, t = x
-#
-#         xt = t * x1 + (1 - t) * x0
-#
-#         # get velocity at xt
-#         v = ...
-#
-#         # target velocity:
-#         vstar = x1 - x0
-#
-#         # return mse between v and vstar
-#
-#
-# # TODO: see below for reference implementation
-#
-#
-# class FlowMatching(keras.Model):
-#     def __init__(self, network: keras.Layer, base_distribution):
-#         super().__init__()
-#         self.network = network
-#         self.base_distribution = find_distribution(base_distribution)
-#
-#     def call(self, inferred_variables, inference_conditions):
-#         return self.network(keras.ops.concatenate([inferred_variables, inference_conditions], axis=1))
-#
-#     def compute_loss(self, x=None, y=None, y_pred=None, **kwargs):
-#         return keras.losses.mean_squared_error(y, y_pred)
-#
-#     def velocity(self, x: Tensor, t: Tensor, c: Tensor = None):
-#         if c is None:
-#             xtc = keras.ops.concatenate([x, t], axis=1)
-#         else:
-#             xtc = keras.ops.concatenate([x, t, c], axis=1)
-#
-#         return self.network(xtc)
-#
-#     def forward(self, x, c=None, method="RK45") -> Tensor:
-#         def f(t, x):
-#             t = keras.ops.full((keras.ops.shape(x)[0], 1), t)
-#             return self.velocity(x, t, c)
-#
-#         bunch = solve_ivp(f, t_span=(1.0, 0.0), y0=x, method=method, vectorized=True)
-#
-#         return bunch[1]
-#
-#     def inverse(self, x, c=None, method="RK45") -> Tensor:
-#         def f(t, x):
-#             t = keras.ops.full((keras.ops.shape(x)[0], 1), t)
-#             return self.velocity(x, t, c)
-#
-#         bunch = solve_ivp(f, t_span=(0.0, 1.0), y0=x, method=method, vectorized=True)
-#
-#         return bunch[1]
-#
-#     def sample(self, batch_shape: Shape) -> Tensor:
-#         z = self.base_distribution.sample(batch_shape)
-#         return self.inverse(z)
-#
-#     def log_prob(self, x: Tensor, c: Tensor = None) -> Tensor:
-#         raise NotImplementedError(f"Keras does not yet support backend-agnostic Vector-Jacobian Products.")
-#
-#
-# def hutchinson_trace(f: callable, x: Tensor) -> (Tensor, Tensor):
-#     # TODO: test this for all 3 backends
-#     noise = keras.random.normal(keras.ops.shape(x))
-#
-#     match keras.backend.backend():
-#         case "jax":
-#             import jax
-#             fx, jvp = jax.jvp(f, (x,), (noise,))
-#         case "tensorflow":
-#             import tensorflow as tf
-#             with tf.GradientTape(persistent=True) as tape:
-#                 tape.watch(x)
-#                 fx = f(x)
-#             jvp = tape.gradient(fx, x, output_gradients=noise)
-#         case "torch":
-#             import torch
-#             fx, jvp = torch.autograd.functional.jvp(f, x, noise, create_graph=True)
-#         case other:
-#             raise NotImplementedError(f"Backend {other} is not supported for trace estimation.")
-#
-#     trace = keras.ops.sum(jvp * noise, axis=1)
-#
-#     return fx, trace
+        loss = keras.losses.mean_squared_error(predicted_velocity, target_velocity)
+
+        return {"loss": loss}
@@ -7,3 +7,5 @@
 from .jacobian_trace import jacobian_trace
 
 from .dispatch import find_distribution, find_network, find_permutation, find_pooling, find_recurrent_net
+
+from .optimal_transport import optimal_transport
@@ -22,7 +22,9 @@ def jacobian_trace(f: callable, x: Tensor, samples: int = 1) -> (Tensor, Tensor)
     :return: Tensor of shape (n,)
         An unbiased estimate of the trace of the Jacobian of f.
     """
-
+    # copy here to avoid causing outside side effects
+    # TODO: this may not be necessary for every backend
+    x = keras.ops.copy(x)
     batch_size, dims = keras.ops.shape(x)
 
     match keras.backend.backend():
@@ -86,8 +88,9 @@ def jacobian_trace(f: callable, x: Tensor, samples: int = 1) -> (Tensor, Tensor)
         case "torch":
             import torch
 
+            x.requires_grad_(True)
+
             with torch.enable_grad():
-                x.requires_grad = True
                 fx = f(x)
 
             trace = keras.ops.zeros(keras.ops.shape(x)[0])
 
@@ -0,0 +1 @@
+from .optimal_transport import optimal_transport
Original file line number	Diff line number	Diff line change
`@@ -1,2 +1 @@`
`1`		`-`
`2`	`1`	`from .flow_matching import FlowMatching`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+from .optimal_transport import optimal_transport`