Skip to content

Commit

Permalink
update docs, format, and lint
Browse files Browse the repository at this point in the history
  • Loading branch information
willdumm committed Apr 8, 2024
1 parent c2d3726 commit 2ac6df7
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 13 deletions.
4 changes: 2 additions & 2 deletions docs/quickstart.rst
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ This file may be manipulated using ``gctree infer``, instead of providing
a dnapars ``outfile``.

.. note::
Although described below, using mutability parsimony or isotype parsimony
Although described below, using context likelihood, mutability parsimony, or isotype parsimony
as ranking criteria is experimental, and has not yet been shown in a careful
validation to improve tree inference. Only the default branching process
likelihood is recommended for tree ranking!
Expand All @@ -117,7 +117,7 @@ between trees. Providing arguments ``--isotype_mapfile`` and
arguments ``--mutability`` and ``--substitution`` allows trees to be ranked
according to a context-sensitive mutation model. By default, trees are ranked
lexicographically, first maximizing likelihood, then minimizing isotype
parsimony and mutabilities, if such information is provided.
parsimony, and finally maximizing a context-based poisson likelihood, if such information is provided.
Ranking priorities can be adjusted using the argument ``--ranking_coeffs``.

For example, to find the optimal tree
Expand Down
18 changes: 10 additions & 8 deletions gctree/branching_processes.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,7 @@ def mle(self, **kwargs) -> Tuple[np.float64, np.float64]:
(p, q) = \arg\max_{p,q\in [0,1]}\ell(p, q)
Args:
kwargs: keyword arguments passed along to the log likelihood :meth:`CollapsedTree.ll`
kwargs: keyword arguments passed along to the branching process likelihood :meth:`CollapsedTree.ll`
Returns:
Tuple :math:`(p, q)` with estimated branching probability and estimated mutation probability
Expand Down Expand Up @@ -1052,7 +1052,7 @@ def ll(
marginal: compute the marginal likelihood over trees, otherwise compute the joint likelihood of trees
Returns:
Log likelihood :math:`\ell(p, q; T, A)` and its gradient :math:`\nabla\ell(p, q; T, A)`
Log branching process likelihood :math:`\ell(p, q; T, A)` and its gradient :math:`\nabla\ell(p, q; T, A)`
"""
if self._cm_countlist is None:
if self._forest is not None:
Expand Down Expand Up @@ -1124,7 +1124,7 @@ def mle(self, **kwargs) -> Tuple[np.float64, np.float64]:
(p, q) = \arg\max_{p,q\in [0,1]}\ell(p, q)
Args:
kwargs: keyword arguments passed along to the log likelihood :meth:`CollapsedForest.ll`
kwargs: keyword arguments passed along to the branching process likelihood :meth:`CollapsedForest.ll`
Returns:
Tuple :math:`(p, q)` with estimated branching probability and estimated mutation probability
Expand All @@ -1151,7 +1151,7 @@ def filter_trees( # noqa: C901
Trim the forest to minimize a linear
combination of branching process likelihood, isotype parsimony score,
mutability parsimony score, and number of alleles, with coefficients
context/mutability-based Poisson likelihood, and number of alleles, with coefficients
provided in the argument ``ranking_coeffs`, in that order.
Args:
Expand All @@ -1169,7 +1169,7 @@ def filter_trees( # noqa: C901
ignore_isotype: Ignore isotype parsimony when ranking. By default, isotype information added with
:meth:``add_isotypes`` will be used to compute isotype parsimony, which is used in ranking.
chain_split: The index at which non-adjacent sequences are concatenated, for calculating
mutability parsimony.
context-based Poisson likelihood.
verbose: print information about trimming
outbase: file name stem for a file with information for each tree in the DAG.
summarize_forest: whether to write a summary of the forest to file `[outbase].forest_summary.log`
Expand All @@ -1182,7 +1182,8 @@ def filter_trees( # noqa: C901
Returns:
The trimmed forest, containing all optimal trees according to the specified criteria, and a tuple
of data about the trees in that forest, with format (ll, isotype parsimony, mutability parsimony, alleles).
of data about the trees in that forest, with format (branching process likelihood, isotype parsimony,
context-based Poisson likelihood, alleles).
"""
dag = self._forest

Expand Down Expand Up @@ -1681,7 +1682,7 @@ def _mle_helper(
bounds = ((1e-6, 1 - 1e-6), (1e-6, 1 - 1e-6))

def f(x):
"""Negative log likelihood."""
"""Negative log branching process likelihood."""
return tuple(-y for y in ll(*x, **kwargs))

grad_check = sco.check_grad(lambda x: f(x)[0], lambda x: f(x)[1], x_0)
Expand Down Expand Up @@ -1919,7 +1920,8 @@ def accum_func(cmsetlist: List[multiset.FrozenMultiset]):


def _ll_genotype_dagfuncs(p: np.float64, q: np.float64) -> hdag.utils.HistoryDagFilter:
"""Return functions for counting tree log likelihood on the history DAG.
"""Return functions for counting tree log branching process likelihood on
the history DAG.
For numerical consistency, we resort to the use of ``decimal.Decimal``.
This is exactly for the purpose of solving the problem that float sum is
Expand Down
4 changes: 2 additions & 2 deletions gctree/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,7 +537,7 @@ def get_parser():
help=(
"when using concatenated heavy and light chains, this is the 0-based"
" index at which the 2nd chain begins, needed for determining coding frame in both chains,"
" and also to correctly calculate mutability parsimony."
" and also to correctly calculate context-based Poisson likelihood."
),
)
parser_infer.add_argument(
Expand Down Expand Up @@ -632,7 +632,7 @@ def get_parser():
"Coefficients are in order: isotype parsimony, mutation model parsimony, number of alleles. "
"A coefficient of -1 will be applied to branching process likelihood. "
"If not provided, trees will be ranked lexicographically by likelihood, "
"isotype parsimony, and mutability parsimony in that order."
"isotype parsimony, and context-based Poisson likelihood in that order."
),
)
parser_infer.add_argument(
Expand Down
2 changes: 1 addition & 1 deletion gctree/isotype.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def get_parser() -> argparse.ArgumentParser:
" nodes.\n\n"
"This tool doesn’t make any judgements about which tree is best.\n"
"Tree output order is the same as in gctree inference: ranking is\n"
"by log likelihood before isotype additions. A determination of\n"
"by branching process likelihood before isotype additions. A determination of\n"
"which is the best tree is left to the user, based on likelihoods,\n"
"isotype parsimony score, and changes in the number of nodes after\n"
"isotype additions.\n"
Expand Down

0 comments on commit 2ac6df7

Please sign in to comment.