7
7
# ----------------------------------------------------------------------------
8
8
9
9
import pandas as pd
10
+ import numpy as np
11
+
10
12
import skbio .diversity
13
+ from skbio .diversity ._util import _validate_counts_vector
14
+
15
+ from scipy .special import gammaln
16
+
11
17
import biom
12
18
import numpy as np
13
19
@@ -94,6 +100,14 @@ def transform_(v, i, m):
94
100
results .append (_skbio_alpha_diversity_from_1d (v , 'pielou_e' ))
95
101
results = pd .Series (results , index = table .ids (), name = 'pielou_evenness' )
96
102
return results
103
+ # legacy code
104
+ for partition in _partition (table ):
105
+ counts = partition .matrix_data .T .toarray ()
106
+ sample_ids = partition .ids (axis = 'sample' )
107
+ results = [_p_evenness (c , sample_ids )for c in counts ]
108
+ result = pd .Series (results , index = sample_ids )
109
+ result .name = 'pielou_evenness'
110
+ return result
97
111
98
112
99
113
@_validate_tables
@@ -107,6 +121,15 @@ def shannon_entropy(table: biom.Table,
107
121
results .append (_skbio_alpha_diversity_from_1d (v , 'shannon' ))
108
122
results = pd .Series (results , index = table .ids (), name = 'shannon_entropy' )
109
123
return results
124
+ #LEGACY
125
+ for partition in _partition (table ):
126
+ counts = partition .matrix_data .T .toarray ()
127
+ sample_ids = partition .ids (axis = 'sample' )
128
+ # TODO replace with internal shannons method
129
+ results = [_shannon (c , sample_ids )for c in counts ]
130
+ result = pd .Series (results , index = sample_ids )
131
+ result .name = 'shannon_entropy'
132
+ return result
110
133
111
134
112
135
@_validate_tables
@@ -115,5 +138,92 @@ def alpha_passthrough(table: biom.Table, metric: str) -> pd.Series:
115
138
116
139
for v in table .iter_data (dense = True ):
117
140
results .append (_skbio_alpha_diversity_from_1d (v .astype (int ), metric ))
141
+ # TODO write if statements for hard coding the following metrics:
142
+ # berger-parker,brillion, simpsons D, etsy, goods, margalef's, mcIntosh,
143
+ # strongs
118
144
results = pd .Series (results , index = table .ids (), name = metric )
119
145
return results
146
+
147
+
148
+ # c&p methods from skbio
149
+ def _berger_parker (counts ):
150
+ counts = _validate_counts_vector (counts )
151
+ return counts .max () / counts .sum ()
152
+
153
+
154
+ def _brillouin_d (counts ):
155
+ counts = _validate_counts_vector (counts )
156
+ nz = counts [counts .nonzero ()]
157
+ n = nz .sum ()
158
+ return (gammaln (n + 1 ) - gammaln (nz + 1 ).sum ()) / n
159
+
160
+
161
+ def _simpsons_dominance (counts ):
162
+ counts = _validate_counts_vector (counts )
163
+ freqs = counts / counts .sum ()
164
+ return (freqs * freqs ).sum ()
165
+
166
+
167
+ def _etsy_ci (counts ):
168
+ counts = _validate_counts_vector (counts )
169
+
170
+ f1 = _singles (counts )
171
+ f2 = _doubles (counts )
172
+ n = counts .sum ()
173
+ z = 1.959963985
174
+ W = (f1 * (n - f1 ) + 2 * n * f2 ) / (n ** 3 )
175
+
176
+ return f1 / n - z * np .sqrt (W ), f1 / n + z * np .sqrt (W )
177
+
178
+
179
+ def _goods_coverage (counts ):
180
+ counts = _validate_counts_vector (counts )
181
+ f1 = _singles (counts )
182
+ N = counts .sum ()
183
+ return 1 - (f1 / N )
184
+
185
+
186
+ def _margalef (counts ):
187
+ counts = _validate_counts_vector (counts )
188
+ # replaced observed_otu call to sobs
189
+ return (skbio .diversity .alpha .sobs (counts ) - 1 ) / np .log (counts .sum ())
190
+
191
+
192
+ def _mcintosh_d (counts ):
193
+ counts = _validate_counts_vector (counts )
194
+ u = np .sqrt ((counts * counts ).sum ())
195
+ n = counts .sum ()
196
+ return (n - u ) / (n - np .sqrt (n ))
197
+
198
+
199
+ def _strong (counts ):
200
+ counts = _validate_counts_vector (counts )
201
+ n = counts .sum ()
202
+ # replaced observed_otu call to sobs
203
+ s = skbio .diversity .alpha .sobs (counts )
204
+ i = np .arange (1 , len (counts ) + 1 )
205
+ sorted_sum = np .sort (counts )[::- 1 ].cumsum ()
206
+ return (sorted_sum / n - (i / s )).max ()
207
+
208
+
209
+ def _singles (counts ):
210
+ counts = _validate_counts_vector (counts )
211
+ return (counts == 1 ).sum ()
212
+
213
+
214
+ def _doubles (counts ):
215
+ counts = _validate_counts_vector (counts )
216
+ return (counts == 2 ).sum ()
217
+
218
+
219
+ def _p_evenness (counts , sample_ids ):
220
+ counts = _validate_counts_vector (counts )
221
+ return _shannon (counts , base = np .e ) / np .log (
222
+ skbio .diversity .alpha .sobs (counts = counts ))
223
+
224
+
225
+ def _shannon (counts , base = 2 ):
226
+ counts = _validate_counts_vector (counts )
227
+ freqs = counts / counts .sum ()
228
+ nonzero_freqs = freqs [freqs .nonzero ()]
229
+ return - (nonzero_freqs * np .log (nonzero_freqs )).sum () / np .log (base )
0 commit comments