From 2ac6df7ca044d50bc0d2584adc37e6e0f2135988 Mon Sep 17 00:00:00 2001
From: Will Dumm <wrhdumm@gmail.com>
Date: Mon, 8 Apr 2024 11:46:37 -0700
Subject: [PATCH] update docs, format, and lint

---
 docs/quickstart.rst           |  4 ++--
 gctree/branching_processes.py | 18 ++++++++++--------
 gctree/cli.py                 |  4 ++--
 gctree/isotype.py             |  2 +-
 4 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/docs/quickstart.rst b/docs/quickstart.rst
index 5fe28b24..fdf1fcdc 100644
--- a/docs/quickstart.rst
+++ b/docs/quickstart.rst
@@ -106,7 +106,7 @@ This file may be manipulated using ``gctree infer``, instead of providing
 a dnapars ``outfile``.
 
 .. note::
-  Although described below, using mutability parsimony or isotype parsimony
+  Although described below, using context likelihood, mutability parsimony, or isotype parsimony
    as ranking criteria is experimental, and has not yet been shown in a careful
    validation to improve tree inference. Only the default branching process
    likelihood is recommended for tree ranking!
@@ -117,7 +117,7 @@ between trees. Providing arguments ``--isotype_mapfile`` and
 arguments ``--mutability`` and ``--substitution`` allows trees to be ranked
 according to a context-sensitive mutation model. By default, trees are ranked
 lexicographically, first maximizing likelihood, then minimizing isotype
-parsimony and mutabilities, if such information is provided.
+parsimony, and finally maximizing a context-based poisson likelihood, if such information is provided.
 Ranking priorities can be adjusted using the argument ``--ranking_coeffs``.
 
 For example, to find the optimal tree
diff --git a/gctree/branching_processes.py b/gctree/branching_processes.py
index cb6fbca7..17b9d43e 100755
--- a/gctree/branching_processes.py
+++ b/gctree/branching_processes.py
@@ -411,7 +411,7 @@ def mle(self, **kwargs) -> Tuple[np.float64, np.float64]:
             (p, q) = \arg\max_{p,q\in [0,1]}\ell(p, q)
 
         Args:
-            kwargs: keyword arguments passed along to the log likelihood :meth:`CollapsedTree.ll`
+            kwargs: keyword arguments passed along to the branching process likelihood :meth:`CollapsedTree.ll`
 
         Returns:
             Tuple :math:`(p, q)` with estimated branching probability and estimated mutation probability
@@ -1052,7 +1052,7 @@ def ll(
             marginal: compute the marginal likelihood over trees, otherwise compute the joint likelihood of trees
 
         Returns:
-            Log likelihood :math:`\ell(p, q; T, A)` and its gradient :math:`\nabla\ell(p, q; T, A)`
+            Log branching process likelihood :math:`\ell(p, q; T, A)` and its gradient :math:`\nabla\ell(p, q; T, A)`
         """
         if self._cm_countlist is None:
             if self._forest is not None:
@@ -1124,7 +1124,7 @@ def mle(self, **kwargs) -> Tuple[np.float64, np.float64]:
             (p, q) = \arg\max_{p,q\in [0,1]}\ell(p, q)
 
         Args:
-            kwargs: keyword arguments passed along to the log likelihood :meth:`CollapsedForest.ll`
+            kwargs: keyword arguments passed along to the branching process likelihood :meth:`CollapsedForest.ll`
 
         Returns:
             Tuple :math:`(p, q)` with estimated branching probability and estimated mutation probability
@@ -1151,7 +1151,7 @@ def filter_trees(  # noqa: C901
 
         Trim the forest to minimize a linear
         combination of branching process likelihood, isotype parsimony score,
-        mutability parsimony score, and number of alleles, with coefficients
+        context/mutability-based Poisson likelihood, and number of alleles, with coefficients
         provided in the argument ``ranking_coeffs`, in that order.
 
         Args:
@@ -1169,7 +1169,7 @@ def filter_trees(  # noqa: C901
             ignore_isotype: Ignore isotype parsimony when ranking. By default, isotype information added with
                 :meth:``add_isotypes`` will be used to compute isotype parsimony, which is used in ranking.
             chain_split: The index at which non-adjacent sequences are concatenated, for calculating
-                mutability parsimony.
+                context-based Poisson likelihood.
             verbose: print information about trimming
             outbase: file name stem for a file with information for each tree in the DAG.
             summarize_forest: whether to write a summary of the forest to file `[outbase].forest_summary.log`
@@ -1182,7 +1182,8 @@ def filter_trees(  # noqa: C901
 
         Returns:
             The trimmed forest, containing all optimal trees according to the specified criteria, and a tuple
-            of data about the trees in that forest, with format (ll, isotype parsimony, mutability parsimony, alleles).
+            of data about the trees in that forest, with format (branching process likelihood, isotype parsimony,
+            context-based Poisson likelihood, alleles).
         """
         dag = self._forest
 
@@ -1681,7 +1682,7 @@ def _mle_helper(
     bounds = ((1e-6, 1 - 1e-6), (1e-6, 1 - 1e-6))
 
     def f(x):
-        """Negative log likelihood."""
+        """Negative log branching process likelihood."""
         return tuple(-y for y in ll(*x, **kwargs))
 
     grad_check = sco.check_grad(lambda x: f(x)[0], lambda x: f(x)[1], x_0)
@@ -1919,7 +1920,8 @@ def accum_func(cmsetlist: List[multiset.FrozenMultiset]):
 
 
 def _ll_genotype_dagfuncs(p: np.float64, q: np.float64) -> hdag.utils.HistoryDagFilter:
-    """Return functions for counting tree log likelihood on the history DAG.
+    """Return functions for counting tree log branching process likelihood on
+    the history DAG.
 
     For numerical consistency, we resort to the use of ``decimal.Decimal``.
     This is exactly for the purpose of solving the problem that float sum is
diff --git a/gctree/cli.py b/gctree/cli.py
index 9f80a91c..46f0ee72 100644
--- a/gctree/cli.py
+++ b/gctree/cli.py
@@ -537,7 +537,7 @@ def get_parser():
         help=(
             "when using concatenated heavy and light chains, this is the 0-based"
             " index at which the 2nd chain begins, needed for determining coding frame in both chains,"
-            " and also to correctly calculate mutability parsimony."
+            " and also to correctly calculate context-based Poisson likelihood."
         ),
     )
     parser_infer.add_argument(
@@ -632,7 +632,7 @@ def get_parser():
             "Coefficients are in order: isotype parsimony, mutation model parsimony, number of alleles. "
             "A coefficient of -1 will be applied to branching process likelihood. "
             "If not provided, trees will be ranked lexicographically by likelihood, "
-            "isotype parsimony, and mutability parsimony in that order."
+            "isotype parsimony, and context-based Poisson likelihood in that order."
         ),
     )
     parser_infer.add_argument(
diff --git a/gctree/isotype.py b/gctree/isotype.py
index 31055ac9..abe9ec39 100644
--- a/gctree/isotype.py
+++ b/gctree/isotype.py
@@ -46,7 +46,7 @@ def get_parser() -> argparse.ArgumentParser:
             "  nodes.\n\n"
             "This tool doesn’t make any judgements about which tree is best.\n"
             "Tree output order is the same as in gctree inference: ranking is\n"
-            "by log likelihood before isotype additions. A determination of\n"
+            "by branching process likelihood before isotype additions. A determination of\n"
             "which is the best tree is left to the user, based on likelihoods,\n"
             "isotype parsimony score, and changes in the number of nodes after\n"
             "isotype additions.\n"