Merge branch 'dev' into forward-kwargs

bayesflow-org · Dec 20, 2024 · a4b6558 · a4b6558
2 parents 59df799 + 770244e
commit a4b6558
Show file tree

Hide file tree

Showing 193 changed files with 4,279 additions and 1,459 deletions.
diff --git a/.github/workflows/docs.yaml → .github/workflows/multiversion-docs.yaml b/.github/workflows/docs.yaml → .github/workflows/multiversion-docs.yaml
@@ -1,12 +1,14 @@
 
 # From https://github.com/eeholmes/readthedoc-test/blob/main/.github/workflows/docs_pages.yml
-name: docs
+name: multiversion-docs
 
-# execute this workflow automatically when we push to master
 on:
-  push:
-    branches:
-      - master
+  workflow_dispatch:
+# execute this workflow automatically when we push to master or dev
+#   push:
+#     branches:
+#       - master
+#       - dev
 
 jobs:
 
@@ -17,13 +19,15 @@ jobs:
       - name: Checkout main
         uses: actions/checkout@v3
         with:
-          path: master
+          path: dev
+          fetch-depth: 0
+          fetch-tags: true
 
-      - name: Checkout gh-pages
+      - name: Checkout gh-pages-dev
         uses: actions/checkout@v3
         with:
-          path: gh-pages
-          ref: gh-pages
+          path: gh-pages-dev
+          ref: gh-pages-dev
 
       - name: Set up Python
         uses: actions/setup-python@v4
@@ -33,17 +37,21 @@ jobs:
 
       - name: Install dependencies
         run: |
-          cd ./master
+          cd ./dev
           python -m pip install .[docs]
+      - name: Create local branches
+        run: |
+          cd ./dev
+          git branch master remotes/origin/master
       - name: Make the Sphinx docs
         run: |
-          cd ./master/docsrc
+          cd ./dev/docsrc
           make clean
           make github
       - name: Commit changes to docs
         run: |
-          cd ./gh-pages
-          cp -R ../master/docs/* ./
+          cd ./gh-pages-dev
+          cp -R ../dev/docs/* ./
           git config --local user.email ""
           git config --local user.name "github-actions"
           git add -A

diff --git a/.gitignore b/.gitignore
@@ -5,6 +5,11 @@ __pycache__/
 projects/
 */bayesflow.egg-info
 docsrc/_build/
+docsrc/_build_polyversion
+docsrc/.bf_doc_gen_venv
+docsrc/source/api
+docsrc/source/_examples
+docsrc/source/contributing.md
 build
 docs/
 

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -100,7 +100,7 @@ Make sure to occasionally also run multi-backend tests for your OS using [tox](h
 tox --parallel auto
 ```
 
-See [tox.ini](tox.ini) for details on the environment configurations.
+See `tox.ini` for details on the environment configurations.
 Multi-OS tests will automatically be run once you create a pull request.
 
 Note that to be backend-agnostic, your code must not:
@@ -137,12 +137,24 @@ z = keras.ops.convert_to_numpy(x)
 ### 4. Document your changes
 
 The documentation uses [sphinx](https://www.sphinx-doc.org/) and relies on [numpy style docstrings](https://numpydoc.readthedocs.io/en/latest/format.html) in classes and functions.
-The overall *structure* of the documentation is manually designed. This also applies to the API documentation. This has two implications for you:
 
-1. If you add to existing submodules, the documentation will update automatically (given that you use proper numpy docstrings).
-2. If you add a new submodule or subpackage, you need to add a file to `docsrc/source/api` and a reference to the new module to the appropriate section of `docsrc/source/api/bayesflow.rst`.
+Run the following command to install all necessary packages for setting up documentation generation:
 
-You can re-build the documentation with
+```
+pip install .[docs]
+```
+
+The overall *structure* of the documentation is manually designed, but the API documentation is auto-generated.
+
+You can re-build the current documentation with
+
+```bash
+cd docsrc
+make clean && make dev
+# in case of issues, try `make clean-all`
+```
+
+We also provide a multi-version documentation. To generate it, run
 
 ```bash
 cd docsrc

diff --git a/bayesflow/__init__.py b/bayesflow/__init__.py
@@ -7,9 +7,11 @@
     distributions,
     networks,
     simulators,
+    workflows,
     utils,
 )
 
+from .workflows import BasicWorkflow
 from .approximators import ContinuousApproximator
 from .adapters import Adapter
 from .datasets import OfflineDataset, OnlineDataset, DiskDataset

diff --git a/bayesflow/adapters/adapter.py b/bayesflow/adapters/adapter.py
@@ -9,6 +9,7 @@
 
 from .transforms import (
     AsSet,
+    AsTimeSeries,
     Broadcast,
     Concatenate,
     Constrain,
@@ -112,6 +113,14 @@ def as_set(self, keys: str | Sequence[str]):
         self.transforms.append(transform)
         return self
 
+    def as_time_series(self, keys: str | Sequence[str]):
+        if isinstance(keys, str):
+            keys = [keys]
+
+        transform = MapTransform({key: AsTimeSeries() for key in keys})
+        self.transforms.append(transform)
+        return self
+
     def broadcast(
         self, keys: str | Sequence[str], *, to: str, expand: str | int | tuple = "left", exclude: int | tuple = -1
     ):

diff --git a/bayesflow/adapters/transforms/__init__.py b/bayesflow/adapters/transforms/__init__.py
@@ -1,4 +1,5 @@
 from .as_set import AsSet
+from .as_time_series import AsTimeSeries
 from .broadcast import Broadcast
 from .concatenate import Concatenate
 from .constrain import Constrain

diff --git a/bayesflow/adapters/transforms/as_set.py b/bayesflow/adapters/transforms/as_set.py
@@ -11,6 +11,12 @@ class AsSet(ElementwiseTransform):
     This is useful, for example, in a linear regression context where we can index
     the observations in arbitrary order and always get the same regression line.
 
+    Currently, all this transform does is to ensure that the variable
+    arrays are at least 3D. The 2rd dimension is treated as the
+    set dimension and the 3rd dimension as the data dimension.
+    In the future, the transform will have more advanced behavior
+    to better ensure the correct treatment of sets.
+
     Useage:
 
     adapter = (

diff --git a/bayesflow/adapters/transforms/as_time_series.py b/bayesflow/adapters/transforms/as_time_series.py
@@ -0,0 +1,32 @@
+import numpy as np
+
+from .elementwise_transform import ElementwiseTransform
+
+
+class AsTimeSeries(ElementwiseTransform):
+    """
+    The `.as_time_series` transform can be used to indicate that
+    variables shall be treated as time series.
+
+    Currently, all this transformation does is to ensure that the variable
+    arrays are at least 3D. The 2rd dimension is treated as the
+    time series dimension and the 3rd dimension as the data dimension.
+    In the future, the transform will have more advanced behavior
+    to better ensure the correct treatment of time series data.
+
+    Useage:
+
+    adapter = (
+        bf.Adapter()
+        .as_time_series(["x", "y"])
+        )
+    """
+
+    def forward(self, data: np.ndarray, **kwargs) -> np.ndarray:
+        return np.atleast_3d(data)
+
+    def inverse(self, data: np.ndarray, **kwargs) -> np.ndarray:
+        if data.shape[2] == 1:
+            return np.squeeze(data, axis=2)
+
+        return data
diff --git a/bayesflow/adapters/transforms/concatenate.py b/bayesflow/adapters/transforms/concatenate.py
@@ -12,13 +12,22 @@
 
 @serializable(package="bayesflow.adapters")
 class Concatenate(Transform):
-    """Concatenate multiple arrays into a new key.
-    Parameters:
-
-    keys:
-
-    into:
+    """Concatenate multiple arrays into a new key. Used to specify how data variables should be treated by the network.
 
+    Parameters:
+        keys: Input a list of strings, where the strings are the names of data variables.
+        into: A string telling the network how to use the variables named in keys.
+        axis: integer specifing along which axis to concatonate the keys. The last axis is used by default.
+
+    Example:
+    Suppose you have a simulator that generates variables "beta" and "sigma" from priors and then observation
+    variables "x" and "y". We can then use concatonate in the following way
+
+    adapter = (
+        bf.Adapter()
+            .concatenate(["beta", "sigma"], into="inference_variables")
+            .concatenate(["x", "y"], into="summary_variables")
+     )
     """
 
     def __init__(self, keys: Sequence[str], *, into: str, axis: int = -1):

diff --git a/bayesflow/adapters/transforms/constrain.py b/bayesflow/adapters/transforms/constrain.py
@@ -32,7 +32,7 @@ class Constrain(ElementwiseTransform):
 
 
     Examples:
-        Let sigma be the standard deviation of a normal distribution,
+        1) Let sigma be the standard deviation of a normal distribution,
         then sigma should always be greater than zero.
 
         Useage:
@@ -41,8 +41,8 @@ class Constrain(ElementwiseTransform):
             .constrain("sigma", lower=0)
             )
 
-        Suppose p is the parameter for a binomial distribution where p must be in [0,1]
-        then we would constrain the neural network to estimate p in the following way.
+        2 ) Suppose p is the parameter for a binomial distribution where p must be in
+        [0,1] then we would constrain the neural network to estimate p in the following way.
 
         Usage:
         adapter = (

diff --git a/bayesflow/adapters/transforms/drop.py b/bayesflow/adapters/transforms/drop.py
@@ -11,6 +11,26 @@
 
 @serializable(package="bayesflow.adapters")
 class Drop(Transform):
+    """
+    Transform to drop variables from further calculation.
+
+    Parameters:
+        keys: list of strings, containing names of data variables that should be dropped
+
+    Example:
+
+    >>> import bayesflow as bf
+    >>> a = [1, 2, 3, 4]
+    >>> b = [[1, 2], [3, 4]]
+    >>> c = [[5, 6, 7, 8]]
+    >>> dat = dict(a=a, b=b, c=c)
+    >>> dat
+        {'a': [1, 2, 3, 4], 'b': [[1, 2], [3, 4]], 'c': [[5, 6, 7, 8]]}
+    >>> drop = bf.adapters.transforms.Drop(("b", "c"))
+    >>> drop.forward(dat)
+        {'a': [1, 2, 3, 4]}
+    """
+
     def __init__(self, keys: Sequence[str]):
         self.keys = keys
 

diff --git a/bayesflow/adapters/transforms/elementwise_transform.py b/bayesflow/adapters/transforms/elementwise_transform.py
@@ -4,6 +4,8 @@
 
 @serializable(package="bayesflow.adapters")
 class ElementwiseTransform:
+    """Base class on which other transforms are based"""
+
     def __call__(self, data: np.ndarray, inverse: bool = False, **kwargs) -> np.ndarray:
         if inverse:
             return self.inverse(data, **kwargs)

diff --git a/bayesflow/adapters/transforms/filter_transform.py b/bayesflow/adapters/transforms/filter_transform.py
@@ -19,6 +19,11 @@ def __call__(self, key: str, value: np.ndarray, inverse: bool) -> bool:
 
 @serializable(package="bayesflow.adapters")
 class FilterTransform(Transform):
+    """
+    Implements a transform that applies a different transform on a subset of the data. Used by other transforms and
+    base adapter class.
+    """
+
     def __init__(
         self,
         *,

diff --git a/bayesflow/adapters/transforms/keep.py b/bayesflow/adapters/transforms/keep.py
@@ -25,8 +25,9 @@ class Keep(Transform):
 
         adapter = (
             bf.adapters.Adapter()
-            # only keep theta and x
-            .keep(("theta", "x"))
+                # drop data from unneeded priors alpha, and r
+                # only keep theta and x
+                .keep(("theta", "x"))
             )
 
     Example:

diff --git a/bayesflow/adapters/transforms/one_hot.py b/bayesflow/adapters/transforms/one_hot.py
@@ -9,6 +9,10 @@
 
 @serializable(package="bayesflow.adapters")
 class OneHot(ElementwiseTransform):
+    """
+    Changes data to be one-hot encoded.
+    """
+
     def __init__(self, num_classes: int):
         super().__init__()
         self.num_classes = num_classes

diff --git a/bayesflow/adapters/transforms/rename.py b/bayesflow/adapters/transforms/rename.py
@@ -7,6 +7,24 @@
 
 @serializable(package="bayesflow.adapters")
 class Rename(Transform):
+    """
+    Transform to rename keys in data dictionary. Useful to rename variables to match those required by
+    approximator. This transform can only rename one variable at a time.
+
+    Parameters:
+        - from_key: str of variable name that should be renamed
+        - to_key: str representing new name
+
+    Example:
+        adapter = (
+            bf.adapters.Adapter()
+
+            # rename the variables to match the required approximator inputs
+            .rename("theta", "inference_variables")
+            .rename("x", "inference_conditions")
+        )
+    """
+
     def __init__(self, from_key: str, to_key: str):
         super().__init__()
         self.from_key = from_key

diff --git a/bayesflow/adapters/transforms/standardize.py b/bayesflow/adapters/transforms/standardize.py
@@ -10,6 +10,21 @@
 
 @serializable(package="bayesflow.adapters")
 class Standardize(ElementwiseTransform):
+    """
+    Transform that when applied standardizes data using typical z-score standardization i.e. for some unstandardized
+    data x the standardized version z  would be
+
+    z = (x - mean(x))/std(x)
+
+    Parameters:
+    mean: integer or float used to specify a mean if known but will be estimated from data when not provided
+    std: integer or float used to specify a standard devation if known but will be estimated from data when not provided
+    axis: integer representing a specific axis along which standardization should take place. By default
+        standardization happens individually for each dimension
+    momentum: float in (0,1) specifying the momentum during training
+
+    """
+
     def __init__(
         self,
         mean: int | float | np.ndarray = None,

diff --git a/bayesflow/adapters/transforms/transform.py b/bayesflow/adapters/transforms/transform.py
@@ -4,6 +4,10 @@
 
 @serializable(package="bayesflow.adapters")
 class Transform:
+    """
+    Base class on which other transforms are based
+    """
+
     def __call__(self, data: dict[str, np.ndarray], *, inverse: bool = False, **kwargs) -> dict[str, np.ndarray]:
         if inverse:
             return self.inverse(data, **kwargs)