Add activation function for quantile estimation

han-ol · han-ol · commit ce078559e665 · 2025-01-10T18:45:26.000+01:00
diff --git a/bayesflow/link_functions/__init__.py b/bayesflow/link_functions/__init__.py
@@ -0,0 +1 @@
+from .ordered_quantiles import OrderedQuantiles
diff --git a/bayesflow/link_functions/ordered_quantiles.py b/bayesflow/link_functions/ordered_quantiles.py
@@ -0,0 +1,62 @@
+import keras
+
+from bayesflow.utils import keras_kwargs
+
+from collections.abc import Sequence
+
+
+class OrderedQuantiles(keras.Layer):
+    def __init__(self, quantile_levels: Sequence[float] = None, axis: int = None, **kwargs):
+        super().__init__(**keras_kwargs(kwargs))
+        self.quantile_levels = quantile_levels
+        self.axis = axis
+
+    def build(self, input_shape):
+        super().build(input_shape)
+        if 1 < len(input_shape) <= 3:
+            self.axis = -2
+            if self.quantile_levels is not None:
+                num_quantile_levels = len(self.quantile_levels)
+                # choose quantile level closest to median as anchor
+                self.anchor_quantile_index = keras.ops.argmin(
+                    keras.ops.abs(keras.ops.convert_to_tensor(self.quantile_levels) - 0.5)
+                )
+            else:
+                num_quantile_levels = input_shape[self.axis]
+                self.anchor_quantile_index = num_quantile_levels // 2
+
+            self.group_indeces = dict(
+                below=list(range(0, self.anchor_quantile_index)),
+                above=list(range(self.anchor_quantile_index + 1, num_quantile_levels)),
+            )
+        else:
+            raise AssertionError(
+                "Cannot resolve which axis should be ordered automatically from input shape " + str(input_shape)
+            )
+
+    def call(self, inputs):
+        # Divide in anchor, below and above
+        below_inputs = keras.ops.take(inputs, self.group_indeces["below"], axis=self.axis)
+        anchor_input = keras.ops.take(inputs, self.anchor_quantile_index, axis=self.axis)
+        above_inputs = keras.ops.take(inputs, self.group_indeces["above"], axis=self.axis)
+
+        # prepare a reshape target to aid broadcasting correctly
+        broadcast_shape = list(below_inputs.shape)  # convert to list to allow item assignment
+        broadcast_shape[self.axis] = 1
+        broadcast_shape = tuple(broadcast_shape)
+
+        anchor_input = keras.ops.reshape(anchor_input, broadcast_shape)
+
+        # Apply softplus for positivity and cumulate to ensure ordered quantiles
+        below = keras.activations.softplus(below_inputs)
+        above = keras.activations.softplus(above_inputs)
+
+        below = anchor_input - keras.ops.flip(keras.ops.cumsum(below, axis=self.axis), self.axis)
+        above = anchor_input + keras.ops.cumsum(above, axis=self.axis)
+
+        # Concatenate and reshape back
+        x = keras.ops.concatenate([below, anchor_input, above], self.axis)
+        return x
+
+    def compute_output_shape(self, input_shape):
+        return input_shape
diff --git a/bayesflow/networks/point_inference_network.py b/bayesflow/networks/point_inference_network.py
@@ -21,7 +21,7 @@ def __init__(
         scoring_rules: dict[str, ScoringRule],
         body_subnet: str | type = "mlp",  # naming: shared_subnet / body / subnet ?
         heads_subnet: dict[str, str | keras.Layer] = None,  # TODO: `type` instead of `keras.Layer` ? Too specific ?
-        activations: dict[str, keras.layers.Activation | Callable | str] = None,
+        activations: dict[str, keras.Layer | Callable | str] = None,
         **kwargs,
     ):
         super().__init__(
@@ -36,17 +36,17 @@ def __init__(
 
         self.body_subnet = find_network(body_subnet, **kwargs.get("body_subnet_kwargs", {}))
 
-        if heads_subnet:
+        if heads_subnet is not None:
             self.heads = {
                 key: [find_network(value, **kwargs.get("heads_subnet_kwargs", {}).get(key, {}))]
                 for key, value in heads_subnet.items()
             }
         else:
             self.heads = {key: [] for key in self.scoring_rules.keys()}
 
-        if activations:
+        if activations is not None:
             self.activations = {
-                key: (value if isinstance(value, keras.layers.Activation) else keras.layers.Activation(value))
+                key: (value if isinstance(value, keras.Layer) else keras.layers.Activation(value))
                 for key, value in activations.items()
             }  # make sure that each value is an Activation object
         else:
@@ -64,16 +64,16 @@ def __init__(
 
         assert set(self.scoring_rules.keys()) == set(self.heads.keys()) == set(self.activations.keys())
 
-    def build(self, xz_shape: Shape, conditions_shape: Shape = None) -> None:
+    def build(self, xz_shape: Shape, conditions_shape: Shape) -> None:
         # build the shared body network
         input_shape = conditions_shape
         self.body_subnet.build(input_shape)
         body_output_shape = self.body_subnet.compute_output_shape(input_shape)
 
         for key in self.heads.keys():
-            # head_output_shape (excluding batch_size) convention is (*prediction_shape, *parameter_block_shape)
-            prediction_shape = self.scoring_rules[key].prediction_shape
-            head_output_shape = prediction_shape + xz_shape[1:]
+            # head_output_shape (excluding batch_size) convention is (*target_shape, *parameter_block_shape)
+            target_shape = self.scoring_rules[key].target_shape
+            head_output_shape = target_shape + xz_shape[1:]
 
             # set correct head shape
             self.heads[key][-3].units = prod(head_output_shape)
@@ -91,13 +91,18 @@ def call(
         conditions: Tensor = None,
         training: bool = False,
         **kwargs,
-    ) -> Tensor | tuple[Tensor, Tensor]:
+    ) -> dict[str, Tensor]:
         # TODO: remove unnecessary simularity with InferenceNetwork
         return self._forward(xz, conditions=conditions, training=training, **kwargs)
 
     def _forward(
-        self, x: Tensor, conditions: Tensor = None, training: bool = False, **kwargs
-    ) -> Tensor | tuple[Tensor, Tensor]:
+        self,
+        x: Tensor,
+        conditions: Tensor = None,
+        training: bool = False,
+        **kwargs,
+        # TODO: propagate training flag
+    ) -> dict[str, Tensor]:
         body_output = self.body_subnet(conditions)
 
         output = dict()

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+from .ordered_quantiles import OrderedQuantiles`