From 39690b257a9fd7043f38e429758ac278f2548054 Mon Sep 17 00:00:00 2001
From: stefanradev93 <stefan.radev93@gmail.com>
Date: Fri, 17 May 2024 13:04:53 -0400
Subject: [PATCH] Add docs for coupling flow

---
 .../networks/coupling_flow/coupling_flow.py   | 105 +++++++++++++++++-
 1 file changed, 102 insertions(+), 3 deletions(-)

diff --git a/bayesflow/experimental/networks/coupling_flow/coupling_flow.py b/bayesflow/experimental/networks/coupling_flow/coupling_flow.py
index 2907d2e44..5957ebf0a 100644
--- a/bayesflow/experimental/networks/coupling_flow/coupling_flow.py
+++ b/bayesflow/experimental/networks/coupling_flow/coupling_flow.py
@@ -9,7 +9,28 @@
 
 
 class CouplingFlow(keras.Sequential):
-    """ Implements a coupling flow as a sequence of dual couplings with swap permutations """
+    """ Implements a coupling flow as a sequence of dual couplings with permutations and activation
+    normalization. Incorporates ideas from [1-4].
+
+    [1] Kingma, D. P., & Dhariwal, P. (2018).
+    Glow: Generative flow with invertible 1x1 convolutions.
+    Advances in Neural Information Processing Systems, 31.
+
+    [2] Durkan, C., Bekasov, A., Murray, I., & Papamakarios, G. (2019).
+    Neural spline flows. Advances in Neural Information Processing Systems, 32.
+
+    [3] Ardizzone, L., Kruse, J., Lüth, C., Bracher, N., Rother, C., & Köthe, U. (2020).
+    Conditional invertible neural networks for diverse image-to-image translation.
+    In DAGM German Conference on Pattern Recognition (pp. 373-387). Springer, Cham.
+
+    [4] Radev, S. T., Mertens, U. K., Voss, A., Ardizzone, L., & Köthe, U. (2020).
+    BayesFlow: Learning complex stochastic models with invertible neural networks.
+    IEEE Transactions on Neural Networks and Learning Systems.
+
+    [5] Alexanderson, S., & Henter, G. E. (2020).
+    Robust model training and generalisation with Studentising flows.
+    arXiv preprint arXiv:2006.06599.
+    """
     def __init__(self, couplings: Sequence[AllInOneCoupling], base_distribution: Distribution):
         super().__init__(couplings)
         self.base_distribution = base_distribution
@@ -18,7 +39,7 @@ def __init__(self, couplings: Sequence[AllInOneCoupling], base_distribution: Dis
     def all_in_one(
             cls,
             target_dim: int,
-            num_layers: int,
+            num_layers: 6,
             subnet_builder="default",
             transform="affine",
             permutation="fixed",
@@ -26,7 +47,85 @@ def all_in_one(
             base_distribution="normal",
             **kwargs
     ) -> "CouplingFlow":
-        """ Construct a uniform coupling flow, consisting of dual couplings with a single type of transform. """
+        """Construct a coupling flow, consisting of dual couplings with a single type of transform.
+
+        Parameters
+        ----------
+        target_dim : int
+            The dimensionality of the latent space, e.g., for estimating a model with 2 parameters, set
+            ``target_dim=2``
+        num_layers : int, optional, default: 6
+            The number of dual coupling layers in the coupling flow. More layers will result in better
+            performance for some applications at the cost of increased training time.
+        subnet_builder : str or callable, optional, default: "default"
+            Determines the structure of the internal networks used to generate the internal parameters
+            for the coupling transforms. You can also pass a function that accepts a ``target_dim`` parameter
+            and generates a custom architecture accordingly.
+
+            The default builder will suffice for most applications. You can control the settings of the
+            default networks by passing them as a dictionary into the ````subnet_settings```` optional keyword
+            argument. For instance, to increase the dropout rate, you can do:
+            subnet_settings=dict(dropout_rate=0.05).
+
+            See below for a full list of settings.
+        transform : str or callable, optional, default: "affine"
+            The type of coupling transform used. Custom transforms can be passed as callable objects
+            that implement a ``forward()`` and an ``inverse()`` method.
+
+            Note: The string options are ``["affine", "spline"]``, where "spline" will typically result in
+            better performance for low-dimensional problems at the cost of ~1.5x increase in training time.
+        permutation : str, optional, default: "fixed"
+            The type of permutation to apply between layers. Should be in ``["fixed", "learnable"]``
+            Specifying a learnable permutation is advisable when you have many parameters and very few
+            coupling layers to ensure proper mixing between dimensions (i.e., representation of correlations).
+        act_norm : bool, optional, default: True
+            A flag indicating whether to apply an invertible activation normalization layer prior to each
+            coupling transformation. Don't touch unless you know what you are doing.
+        base_distribution: str or callable, optional, default: "gaussian"
+            The latent space distribution into which your targets are transformed. Currently implemented are:
+
+            - "gaussian" : The standard choice, don't touch unless you know what you are doing.
+
+            - "student": : Can help stabilize training by controlling the influence function of
+                potentially problematic inputs in the training data, as suggested by [5].
+
+            - "mixture"  : Can help with learning multimodal distribution, especially when using
+                ``transform="affine"``, and you have some prior knowledge about the number of modes
+
+            - callable   : Any other custom distribution implemented appropriately.
+        **kwargs : dict, optional, default: {}
+
+            Optional keyword arguments that will be passed to the ``subnet_builder`` or to the ``base_distribution``.
+
+            For the ``subnet_builder``, you can pass a ``subnet_settings`` dictionary which can modify
+            the following default settings:
+
+            ``default_settings=dict(
+                hidden_dim=512,
+                num_hidden=2,
+                activation="gelu",
+                residual=True,
+                spectral_norm=False,
+                dropout_rate=0.05,
+                zero_output_init=True
+            )``
+
+            For instance, to increase regularization for small data sets, you can pass:
+
+             ``default_settings=dict(dropout_rate=0.2)``
+
+            See the implementation of ``bayesflow.resnet.ConditionalResidualBlock`` for more details.
+
+            For the ``base_distribution``  you can provide a ``base_distribution_parameters`` dictionary which is
+            specific for each type of base distribution using.
+
+            #TODO
+
+        Returns
+        -------
+        flow : bayesflow.networks.CouplingFlow
+            The callable coupling flow which be seamlessly interact with other keras objects.
+        """
 
         base_distribution = find_distribution(base_distribution, shape=(target_dim,))