Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

merge updates from dev (take 2) #65

Merged
merged 5 commits into from
May 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci-dev.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@ jobs:
ci:
uses: qiime2/distributions/.github/workflows/lib-ci-dev.yaml@dev
with:
distro: core
distro: amplicon
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# q2-diversity-lib

![](https://github.com/qiime2/q2-diversity-lib/workflows/ci/badge.svg)
![](https://github.com/qiime2/q2-diversity-lib/workflows/ci-dev/badge.svg)

This is a QIIME 2 plugin. For details on QIIME 2, see https://qiime2.org.
1 change: 1 addition & 0 deletions ci/recipe/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ requirements:
- qiime2 {{ qiime2_epoch }}.*
- q2-types {{ qiime2_epoch }}.*
- scikit-bio {{ scikit_bio }}
- scikit-learn {{ scikit_learn }}
- scipy {{ scipy }}
- unifrac {{ unifrac }}
- unifrac-binaries {{ unifrac_binaries }}
Expand Down
10 changes: 8 additions & 2 deletions q2_diversity_lib/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def part_f(i, m):


@decorator
def _disallow_empty_tables(wrapped_function, *args, **kwargs):
def _validate_tables(wrapped_function, *args, **kwargs):
bound_arguments = signature(wrapped_function).bind(*args, **kwargs)
table = bound_arguments.arguments.get('table')
if table is None:
Expand All @@ -66,6 +66,12 @@ def _disallow_empty_tables(wrapped_function, *args, **kwargs):
if tab_obj.is_empty():
raise ValueError("The provided table is empty")

if np.isnan(tab_obj.matrix_data.data).sum() > 0:
raise ValueError("The provided table contains NaN")

if (tab_obj.matrix_data.data < 0).sum() > 0:
raise ValueError("The provided table contains negative values")

return wrapped_function(*args, **kwargs)


Expand Down Expand Up @@ -98,7 +104,7 @@ def _validate_requested_cpus(wrapped_function, *args, **kwargs):

cpus_requested = b_a_arguments[param_name]

if cpus_requested == 'auto':
if cpus_requested == 0:
# mutate bound_arguments.arguments 'auto' to the requested # of cpus...
b_a_arguments[param_name] = cpus_available
# ...and update cpus requested to prevent TypeError
Expand Down
92 changes: 49 additions & 43 deletions q2_diversity_lib/alpha.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,13 @@
import pandas as pd
import skbio.diversity
import biom
import numpy as np

from q2_types.feature_table import BIOMV210Format
from q2_types.sample_data import AlphaDiversityFormat
from q2_types.tree import NewickFormat
from ._util import (_drop_undefined_samples, _partition,
_disallow_empty_tables,

from ._util import (_validate_tables,
_validate_requested_cpus,
_omp_cmd_wrapper)

Expand Down Expand Up @@ -44,7 +45,7 @@


# --------------------- Phylogenetic -----------------------------------------
@_disallow_empty_tables
@_validate_tables
@_validate_requested_cpus
def faith_pd(table: BIOMV210Format, phylogeny: NewickFormat,
threads: int = 1) -> AlphaDiversityFormat:
Expand All @@ -55,59 +56,64 @@ def faith_pd(table: BIOMV210Format, phylogeny: NewickFormat,


# --------------------- Non-Phylogenetic -------------------------------------
@_disallow_empty_tables
def _skbio_alpha_diversity_from_1d(v, metric):
# alpha_diversity expects a 2d structure
v = np.reshape(v, (1, len(v)))
result = skbio.diversity.alpha_diversity(metric=metric,
counts=v,
ids=['placeholder', ],
validate=False)
return result.iloc[0]


@_validate_tables
def observed_features(table: biom.Table) -> pd.Series:
presence_absence_table = table.pa(inplace=False)
return pd.Series(presence_absence_table.sum('sample').astype(int),
index=table.ids(), name='observed_features')
results = []
for v in presence_absence_table.iter_data(dense=True):
results.append(_skbio_alpha_diversity_from_1d(v.astype(int),
'observed_otus'))
results = pd.Series(results, index=table.ids(), name='observed_features')
return results


@_disallow_empty_tables
@_validate_tables
def pielou_evenness(table: biom.Table,
drop_undefined_samples: bool = False) -> pd.Series:
if drop_undefined_samples:
table = _drop_undefined_samples(table, minimum_nonzero_elements=2)
def transform_(v, i, m):
if (v > 0).sum() < 2:
return np.zeros(len(v))
else:
return v

table = table.transform(transform_, inplace=False).remove_empty()

results = []
for partition in _partition(table):
counts = partition.matrix_data.T.toarray()
sample_ids = partition.ids(axis='sample')
results.append(skbio.diversity.alpha_diversity(metric='pielou_e',
counts=counts,
ids=sample_ids))
result = pd.concat(results)
result.name = 'pielou_evenness'
return result


@_disallow_empty_tables
for v in table.iter_data(dense=True):
results.append(_skbio_alpha_diversity_from_1d(v, 'pielou_e'))
results = pd.Series(results, index=table.ids(), name='pielou_evenness')
return results


@_validate_tables
def shannon_entropy(table: biom.Table,
drop_undefined_samples: bool = False) -> pd.Series:
if drop_undefined_samples:
table = _drop_undefined_samples(table, minimum_nonzero_elements=1)
table = table.remove_empty(inplace=False)

results = []
for partition in _partition(table):
counts = partition.matrix_data.T.toarray()
sample_ids = partition.ids(axis='sample')
results.append(skbio.diversity.alpha_diversity(metric='shannon',
counts=counts,
ids=sample_ids))
result = pd.concat(results)
result.name = 'shannon_entropy'
return result


@_disallow_empty_tables
for v in table.iter_data(dense=True):
results.append(_skbio_alpha_diversity_from_1d(v, 'shannon'))
results = pd.Series(results, index=table.ids(), name='shannon_entropy')
return results


@_validate_tables
def alpha_passthrough(table: biom.Table, metric: str) -> pd.Series:
results = []
for partition in _partition(table):
counts = partition.matrix_data.astype(int).T.toarray()
sample_ids = partition.ids(axis='sample')

results.append(skbio.diversity.alpha_diversity(metric=metric,
counts=counts,
ids=sample_ids))
result = pd.concat(results)
result.name = metric
return result

for v in table.iter_data(dense=True):
results.append(_skbio_alpha_diversity_from_1d(v.astype(int), metric))
results = pd.Series(results, index=table.ids(), name=metric)
return results
25 changes: 12 additions & 13 deletions q2_diversity_lib/beta.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
from q2_types.distance_matrix import LSMatFormat
from q2_types.feature_table import BIOMV210Format
from q2_types.tree import NewickFormat
from ._util import (_disallow_empty_tables,

from ._util import (_validate_tables,
_validate_requested_cpus,
_omp_cmd_wrapper)

Expand Down Expand Up @@ -51,7 +52,7 @@


# -------------------- Method Dispatch -----------------------
@_disallow_empty_tables
@_validate_tables
@_validate_requested_cpus
def beta_passthrough(table: biom.Table, metric: str, pseudocount: int = 1,
n_jobs: int = 1) -> skbio.DistanceMatrix:
Expand Down Expand Up @@ -82,11 +83,11 @@ def jensen_shannon(x, y, **kwds):
pass

return skbio.diversity.beta_diversity(
metric=metric, counts=counts, ids=sample_ids, validate=True,
metric=metric, counts=counts, ids=sample_ids, validate=False,
pairwise_func=sklearn.metrics.pairwise_distances, n_jobs=n_jobs)


@_disallow_empty_tables
@_validate_tables
@_validate_requested_cpus
def beta_phylogenetic_passthrough(table: BIOMV210Format,
phylogeny: NewickFormat,
Expand Down Expand Up @@ -134,9 +135,7 @@ def beta_phylogenetic_passthrough(table: BIOMV210Format,
return result


# Note, this method doesn't have a corresponding cli invocation, so we'll
# just rely on unifrac doing the right thing with `threads` here.
@_disallow_empty_tables
@_validate_tables
@_validate_requested_cpus
def beta_phylogenetic_meta_passthrough(tables: BIOMV210Format,
phylogenies: NewickFormat,
Expand Down Expand Up @@ -168,7 +167,7 @@ def beta_phylogenetic_meta_passthrough(tables: BIOMV210Format,


# --------------------Non-Phylogenetic-----------------------
@_disallow_empty_tables
@_validate_tables
@_validate_requested_cpus
def bray_curtis(table: biom.Table, n_jobs: int = 1) -> skbio.DistanceMatrix:
counts = table.matrix_data.toarray().T
Expand All @@ -177,13 +176,13 @@ def bray_curtis(table: biom.Table, n_jobs: int = 1) -> skbio.DistanceMatrix:
metric='braycurtis',
counts=counts,
ids=sample_ids,
validate=True,
validate=False,
pairwise_func=sklearn.metrics.pairwise_distances,
n_jobs=n_jobs
)


@_disallow_empty_tables
@_validate_tables
@_validate_requested_cpus
def jaccard(table: biom.Table, n_jobs: int = 1) -> skbio.DistanceMatrix:
counts = table.matrix_data.toarray().T
Expand All @@ -192,14 +191,14 @@ def jaccard(table: biom.Table, n_jobs: int = 1) -> skbio.DistanceMatrix:
metric='jaccard',
counts=counts,
ids=sample_ids,
validate=True,
validate=False,
pairwise_func=sklearn.metrics.pairwise_distances,
n_jobs=n_jobs
)


# ------------------------Phylogenetic-----------------------
@_disallow_empty_tables
@_validate_tables
@_validate_requested_cpus
def unweighted_unifrac(table: BIOMV210Format,
phylogeny: NewickFormat,
Expand All @@ -223,7 +222,7 @@ def unweighted_unifrac(table: BIOMV210Format,
return result


@_disallow_empty_tables
@_validate_tables
@_validate_requested_cpus
def weighted_unifrac(table: BIOMV210Format, phylogeny: NewickFormat,
threads: int = 1, bypass_tips: bool = False
Expand Down
22 changes: 10 additions & 12 deletions q2_diversity_lib/plugin_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
# ----------------------------------------------------------------------------

from qiime2.plugin import (Plugin, Citations, Bool, Int, Range, Choices, Str,
Float, List)
Float, List, Threads)
from q2_types.feature_table import (FeatureTable, Frequency, RelativeFrequency,
PresenceAbsence)
from q2_types.tree import Phylogeny, Rooted
Expand Down Expand Up @@ -56,7 +56,7 @@
inputs={'table': FeatureTable[Frequency | RelativeFrequency
| PresenceAbsence],
'phylogeny': Phylogeny[Rooted]},
parameters={'threads': Int % Range(1, None) | Str % Choices(['auto'])},
parameters={'threads': Threads},
outputs=[('vector', SampleData[AlphaDiversity])],
input_descriptions={
'table': "The feature table containing the samples for which Faith's "
Expand Down Expand Up @@ -145,8 +145,8 @@
# TODO: Augment citations as needed
plugin.methods.register_function(
function=beta.bray_curtis,
inputs={'table': FeatureTable[Frequency]},
parameters={'n_jobs': Int % Range(1, None) | Str % Choices(['auto'])},
inputs={'table': FeatureTable[Frequency | RelativeFrequency]},
parameters={'n_jobs': Threads},
outputs=[('distance_matrix', DistanceMatrix)],
input_descriptions={
'table': "The feature table containing the samples for which "
Expand Down Expand Up @@ -174,7 +174,7 @@
function=beta.jaccard,
inputs={'table': FeatureTable[Frequency | RelativeFrequency
| PresenceAbsence]},
parameters={'n_jobs': Int % Range(1, None) | Str % Choices(['auto'])},
parameters={'n_jobs': Threads},
outputs=[('distance_matrix', DistanceMatrix)],
input_descriptions={
'table': "The feature table containing the samples for which "
Expand Down Expand Up @@ -202,7 +202,7 @@
inputs={'table': FeatureTable[Frequency | RelativeFrequency
| PresenceAbsence],
'phylogeny': Phylogeny[Rooted]},
parameters={'threads': Int % Range(1, None) | Str % Choices(['auto']),
parameters={'threads': Threads,
'bypass_tips': Bool},
outputs=[('distance_matrix', DistanceMatrix)],
input_descriptions={
Expand Down Expand Up @@ -246,7 +246,7 @@
function=beta.weighted_unifrac,
inputs={'table': FeatureTable[Frequency | RelativeFrequency],
'phylogeny': Phylogeny[Rooted]},
parameters={'threads': Int % Range(1, None) | Str % Choices(['auto']),
parameters={'threads': Threads,
'bypass_tips': Bool},
outputs=[('distance_matrix', DistanceMatrix)],
input_descriptions={
Expand Down Expand Up @@ -310,8 +310,7 @@
function=beta.beta_passthrough,
inputs={'table': FeatureTable[Frequency]},
parameters={'metric': Str % Choices(beta.METRICS['NONPHYLO']['UNIMPL']),
'pseudocount': Int % Range(1, None),
'n_jobs': Int % Range(1, None) | Str % Choices(['auto'])},
'pseudocount': Int % Range(1, None), 'n_jobs': Threads},
outputs=[('distance_matrix', DistanceMatrix)],
input_descriptions={
'table': 'The feature table containing the samples over which beta '
Expand Down Expand Up @@ -347,7 +346,7 @@
inputs={'table': FeatureTable[Frequency],
'phylogeny': Phylogeny[Rooted]},
parameters={'metric': Str % Choices(beta.METRICS['PHYLO']['UNIMPL']),
'threads': Int % Range(1, None) | Str % Choices(['auto']),
'threads': Threads,
'variance_adjusted': Bool,
'alpha': Float % Range(0, 1, inclusive_end=True),
'bypass_tips': Bool},
Expand Down Expand Up @@ -412,13 +411,12 @@
]
)


plugin.methods.register_function(
function=beta.beta_phylogenetic_meta_passthrough,
inputs={'tables': List[FeatureTable[Frequency]],
'phylogenies': List[Phylogeny[Rooted]]},
parameters={'metric': Str % Choices(beta.METRICS['PHYLO']['UNIMPL']),
'threads': Int % Range(1, None) | Str % Choices(['auto']),
'threads': Threads,
'variance_adjusted': Bool,
'alpha': Float % Range(0, 1, inclusive_end=True),
'bypass_tips': Bool,
Expand Down
Loading
Loading