Skip to content

Commit 9eeb6ff

Browse files
committed
added methods from skbio
1 parent 206827b commit 9eeb6ff

File tree

1 file changed

+110
-0
lines changed

1 file changed

+110
-0
lines changed

q2_diversity_lib/alpha.py

+110
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,13 @@
77
# ----------------------------------------------------------------------------
88

99
import pandas as pd
10+
import numpy as np
11+
1012
import skbio.diversity
13+
from skbio.diversity._util import _validate_counts_vector
14+
15+
from scipy.special import gammaln
16+
1117
import biom
1218
import numpy as np
1319

@@ -94,6 +100,14 @@ def transform_(v, i, m):
94100
results.append(_skbio_alpha_diversity_from_1d(v, 'pielou_e'))
95101
results = pd.Series(results, index=table.ids(), name='pielou_evenness')
96102
return results
103+
# legacy code
104+
for partition in _partition(table):
105+
counts = partition.matrix_data.T.toarray()
106+
sample_ids = partition.ids(axis='sample')
107+
results = [_p_evenness(c, sample_ids)for c in counts]
108+
result = pd.Series(results, index=sample_ids)
109+
result.name = 'pielou_evenness'
110+
return result
97111

98112

99113
@_validate_tables
@@ -107,6 +121,15 @@ def shannon_entropy(table: biom.Table,
107121
results.append(_skbio_alpha_diversity_from_1d(v, 'shannon'))
108122
results = pd.Series(results, index=table.ids(), name='shannon_entropy')
109123
return results
124+
#LEGACY
125+
for partition in _partition(table):
126+
counts = partition.matrix_data.T.toarray()
127+
sample_ids = partition.ids(axis='sample')
128+
# TODO replace with internal shannons method
129+
results = [_shannon(c, sample_ids)for c in counts]
130+
result = pd.Series(results, index=sample_ids)
131+
result.name = 'shannon_entropy'
132+
return result
110133

111134

112135
@_validate_tables
@@ -115,5 +138,92 @@ def alpha_passthrough(table: biom.Table, metric: str) -> pd.Series:
115138

116139
for v in table.iter_data(dense=True):
117140
results.append(_skbio_alpha_diversity_from_1d(v.astype(int), metric))
141+
# TODO write if statements for hard coding the following metrics:
142+
# berger-parker,brillion, simpsons D, etsy, goods, margalef's, mcIntosh,
143+
# strongs
118144
results = pd.Series(results, index=table.ids(), name=metric)
119145
return results
146+
147+
148+
# c&p methods from skbio
149+
def _berger_parker(counts):
150+
counts = _validate_counts_vector(counts)
151+
return counts.max() / counts.sum()
152+
153+
154+
def _brillouin_d(counts):
155+
counts = _validate_counts_vector(counts)
156+
nz = counts[counts.nonzero()]
157+
n = nz.sum()
158+
return (gammaln(n + 1) - gammaln(nz + 1).sum()) / n
159+
160+
161+
def _simpsons_dominance(counts):
162+
counts = _validate_counts_vector(counts)
163+
freqs = counts / counts.sum()
164+
return (freqs * freqs).sum()
165+
166+
167+
def _etsy_ci(counts):
168+
counts = _validate_counts_vector(counts)
169+
170+
f1 = _singles(counts)
171+
f2 = _doubles(counts)
172+
n = counts.sum()
173+
z = 1.959963985
174+
W = (f1 * (n - f1) + 2 * n * f2) / (n ** 3)
175+
176+
return f1 / n - z * np.sqrt(W), f1 / n + z * np.sqrt(W)
177+
178+
179+
def _goods_coverage(counts):
180+
counts = _validate_counts_vector(counts)
181+
f1 = _singles(counts)
182+
N = counts.sum()
183+
return 1 - (f1 / N)
184+
185+
186+
def _margalef(counts):
187+
counts = _validate_counts_vector(counts)
188+
# replaced observed_otu call to sobs
189+
return (skbio.diversity.alpha.sobs(counts) - 1) / np.log(counts.sum())
190+
191+
192+
def _mcintosh_d(counts):
193+
counts = _validate_counts_vector(counts)
194+
u = np.sqrt((counts * counts).sum())
195+
n = counts.sum()
196+
return (n - u) / (n - np.sqrt(n))
197+
198+
199+
def _strong(counts):
200+
counts = _validate_counts_vector(counts)
201+
n = counts.sum()
202+
# replaced observed_otu call to sobs
203+
s = skbio.diversity.alpha.sobs(counts)
204+
i = np.arange(1, len(counts) + 1)
205+
sorted_sum = np.sort(counts)[::-1].cumsum()
206+
return (sorted_sum / n - (i / s)).max()
207+
208+
209+
def _singles(counts):
210+
counts = _validate_counts_vector(counts)
211+
return (counts == 1).sum()
212+
213+
214+
def _doubles(counts):
215+
counts = _validate_counts_vector(counts)
216+
return (counts == 2).sum()
217+
218+
219+
def _p_evenness(counts, sample_ids):
220+
counts = _validate_counts_vector(counts)
221+
return _shannon(counts, base=np.e) / np.log(
222+
skbio.diversity.alpha.sobs(counts=counts))
223+
224+
225+
def _shannon(counts, base=2):
226+
counts = _validate_counts_vector(counts)
227+
freqs = counts / counts.sum()
228+
nonzero_freqs = freqs[freqs.nonzero()]
229+
return -(nonzero_freqs * np.log(nonzero_freqs)).sum() / np.log(base)

0 commit comments

Comments
 (0)