-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfinalProject_main.py
executable file
·320 lines (280 loc) · 15.4 KB
/
finalProject_main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Final Project for CSCI5352 with Prof. Dan Larremore
CU Boulder, F2020
@author: liamfriar
Generate synthetic data for microbial taxa correlation networks.
Agent-based model. Each agent is an individual microbe.
Each microbe belongs to a taxon.
Taxa are not meant to represent any real taxa.
Taxa have a characteristics that make them more or less likely to survive
given environmental conditions and the abundance of other taxa.
Taxa are initialized at random abundances, and allowed to die and reproduce
over iterative time steps in a stable environment, keeping the sum total
abundance across taxa constant. After some number of time steps, the environment
changes. Sampling some portion of the population, relative abundance tables are
built before and after the environmental change. Spearman's Rank Coefficient
Matrices are created to correlate the relative abundances of taxa across multiple
runs.
A separate script analyzes these synthetic data to try to predict the taxa
interaction rules from the pre- or post- environmental change data.
Taxon class was created for this script.
"""
import os
import numpy as np
#import sys
import random
#from scipy import stats
import pandas as pd
from Taxon_Class import Taxon
run_folder = "run1/"
os.mkdir(run_folder)
#os.chdir('/Users/liamfriar/anaconda3/envs/CSCI5352/files')
#Model parameters
n_samples = 100
#number of simulations per set of taxa (taxa_list)
max_abundance = 25000
#maximum total number of individuals across taxa
n_taxa = 100 #must be even for major_relationships assignment to work.
t_steps = 100 #does not include after environmental change if applicable
t_after_change = 0 #number of time steps after environmental change.
n_env_params = 3
n_maj_relationships = 2
sampling_efficiency = 1 #portion of individuals that will be counted
random_reciprocity = False #if true, relationship a->b and b<-a are independent of each other.
#if false, then reciprocity determines b<-a
reciprocity_list = [1, 0, -1] # {-1, 0, 1} multiply relationship b<-a = a<-b * reciprocity
weights_array = np.array( [ [0.75, 0, 0.25, 0], [0.5, 0, 0.5, 0], [0.25, 0, 0.75, 0] ] )
#weights determines relative importance of major_relationship, relationships, env_prefs, survival_constant (in that order)
#see method definition calc_p_survival in Class Taxon
folder_name_list = ["Rel_Weighted/", "Equal_Weighted/", "Env_Weighted/"]
#Create relationships_array
#row is the "receiving" taxon, col is the "giving" taxon.
#for example, if relationships_matrix[0,1] == 1 and relationships_matrix[1,0] = -1
#that means taxon 0 benefits from taxon 1, but taxon 1 is hindered by taxon 0
relationships_matrix = np.zeros([n_taxa, n_taxa])
for i in range(n_taxa):
relationships_matrix[i,i] = 0 #no relationship with self. (on diagonal)
for j in range(i):
if random_reciprocity:
relationships_matrix[i,j] = np.round( random.uniform(-1, 1), decimals=2)
relationships_matrix[j,i] = np.round( random.uniform(-1, 1), decimals=2)
else:
reciprocity = random.choice(reciprocity_list)
relationship_a = np.round( random.uniform(-1, 1), decimals=2)
relationship_b = relationship_a * reciprocity
if random.random() < 0.5:
relationships_matrix[i,j] = relationship_a
relationships_matrix[j,i] = relationship_b
else:
relationships_matrix[i,j] = relationship_b
relationships_matrix[j,i] = relationship_a
#make a number of lists moving left to right, in each list, swap partnerships until doesn't interfere with past partnerships.
#set major_relationship for each Taxon
#value is index of partner taxon
'''
major_rel_list = np.ones( [n_taxa, n_maj_relationships] ) * (-1)
potential_partners = np.asarray(range(n_taxa))
is_available = ( np.ones(n_taxa) * n_maj_relationships ).astype(int)
for tax in range(n_taxa):
if is_available[tax] != 0:
eligible_partners = (is_available != 0) * (potential_partners != tax)
major_partners = ( np.random.choice(potential_partners[ eligible_partners ], is_available[tax], replace=False) ).astype(int)
major_rel_list[tax][-is_available[tax]: ] = major_partners
is_available[tax] = 0
for partner in major_partners:
major_rel_list[partner][-is_available[partner]] = tax
is_available[partner] -= 1
'''
major_rel_list = np.ones( [n_taxa, n_maj_relationships] ) * (-1)
potential_partners = np.asarray(range(n_taxa))
for rel in range(n_maj_relationships):
is_available = np.ones(n_taxa).astype(int)
for tax in range(n_taxa):
if is_available[tax] != 0:
eligible_partners = (is_available != 0) * (potential_partners != tax)
partner = random.choice( potential_partners[ eligible_partners ] )
major_rel_list[tax][rel] = partner
is_available[tax] = 0
major_rel_list[partner][rel] = tax
is_available[partner] = 0
overlap = True
while overlap == True:
overlap = False
for tax in range(n_taxa):
if major_rel_list[tax][rel] in major_rel_list[tax][:rel]:
swap1a = tax
swap1b = major_rel_list[tax][rel]
swap2a = int(major_rel_list[random.choice(potential_partners)])
swap2b = int(major_rel_list[swap2a][rel])
major_rel_list[swap1a][rel] = swap2a
major_rel_list[swap1b][rel] = swap2b
major_rel_list[swap2a][rel] = swap1a
major_rel_list[swap2b][rel] = swap1b
overlap = True
break
#Initialize Taxon instances
taxa_list = [None] * n_taxa
for tax in range(n_taxa):
#all parameters are in range [-1,1] to keep probabilities normalized.
#See calc_p_survival method in Taxon class for use.
env_prefs_array = [0] * n_env_params
for param in range(n_env_params):
env_prefs_array[param] = np.round( random.uniform(-1, 1), decimals=2)
survival_constant = np.round( random.uniform(-1, 1), decimals=2)
#Create new instance of Taxon and set properties.
current_taxon = taxa_list[tax] = Taxon(tax)
current_taxon.set_env_prefs( env_prefs_array )
current_taxon.set_relationships( relationships_matrix[tax,], major_rel_list[tax] )
current_taxon.set_survival_constant( survival_constant )
#######
### Code below uses the same taxa_list to simulate many separate environments ###
########
#Relative abundances are tracked within Taxon instances (within taxa_list)
#and also within a curr_abundances array with indices corresponding to taxa_list indices
for w in range( weights_array.shape[0] ):
weights = weights_array[w]
folder_name = folder_name_list[w]
os.mkdir(run_folder + folder_name)
rel_abundance_over_samples = np.zeros((n_samples, n_taxa ))
#Will keep track of relative abundances per sample (each row is a sample)
env_params_over_samples = np.zeros((n_samples, n_env_params))
#keep track of environmental parameters (columns) over samples (rows)
for sample in range(n_samples):
print("sample=", sample)
#Initialize community by giving abundances to each Taxon. Need relative abundances to sum to 1.
random_abundances = np.zeros(n_taxa)
for tax in range(n_taxa):
random_abundances[tax] = random.random()
#Normalize
sum_random_abundances = sum(random_abundances)
#absolute abundances
initial_abundances = np.round( max_abundance * (random_abundances / sum_random_abundances), decimals=0 )
curr_abundances = np.zeros(n_taxa)
for tax in range(n_taxa):
curr_abundances[tax] = taxa_list[tax].abundance = int( np.round( initial_abundances[tax], decimals=0 ) )
#See the t loop below for use of curr_abundances.
#Initialize environment, as defined by some number of environmental parameters [-1,1]
env_params = np.zeros(n_env_params)
this_param = 0
for param in range(n_env_params):
this_param = np.round( random.uniform(-1, 1) , decimals=2 )
env_params[param] = this_param
env_params_over_samples[sample, param] = this_param
#Can go negative so that a Taxon with a negative env_prefs_array value
#can have a positive fitness affect.
#time steps loop.
#Taxa multiply back to max_abundance. p_survival is calculated for each taxon.
#individuals die with p = (1 - self.p_survival).
#Note that Taxon.abundance instance properties are not updated until end of loop.
id2add = None #initialize
sum_abundances = None #initialize
rel_abundances = np.zeros(n_taxa) #initialize
for t in range(t_steps):
#Growth! Double all populations. If total under max_abundance, randomly add
#individuals with equal probability across taxa
curr_abundances *= 2 #first defined in "intialize commmunity" section
sum_abundances = sum(curr_abundances)
if sum_abundances > max_abundance :
curr_abundances = np.round( (curr_abundances / sum_abundances), decimals=0 )
curr_abundances = curr_abundances.astype(int)
vacancies = int( max_abundance - sum(curr_abundances) )
#randomly add individuals to reach max_abundance
for vac in range(vacancies):
#randomly choose a Taxon from taxa_list to increment abundance by one
id2add = int( np.floor(n_taxa * random.random()) )
curr_abundances[ id2add ] += 1
#Calculate p_survival, kill individuals in each Taxon, update Taxon.abundance and curr_abundances
rel_abundances = np.round ( curr_abundances / max_abundance, decimals = 3 ) #want to refer to separate data
#rel_abundances will not update as each taxa loses abundance
for tax in range( n_taxa ) :
current_taxon = taxa_list[ tax ]
current_taxon.calc_p_survival( rel_abundances, env_params, weights )
#loop through individuals within each Taxon and kill them with (p = 1-self.p_survival)
for individual in range(curr_abundances[ tax ]):
if random.random() > current_taxon.p_survival:
curr_abundances[ tax ] -= 1
current_taxon.abundance = curr_abundances[ tax ]
#Change the environment
for param in range(n_env_params):
env_params[param] = np.round( random.uniform(-1, 1) , decimals=2 )
#same as t in range(t_steps) loop above, but environment has changed.
for t in range(t_after_change):
#Growth!
curr_abundances *= 2 #first defined in "intialize commmunity" section
sum_abundances = sum(curr_abundances)
if sum_abundances > max_abundance :
curr_abundances = np.round( (curr_abundances / sum_abundances), decimals=0 )
curr_abundances = curr_abundances.astype(int)
vacancies = int( max_abundance - sum(curr_abundances) )
#randomly add individuals to reach max_abundance
for vac in range(vacancies):
#randomly choose a Taxon from taxa_list to increment abundance by one
id2add = int( np.floor(n_taxa * random.random()) )
curr_abundances[ id2add ] += 1
#Calculate p_survival, kill individuals in each Taxon, update Taxon.abundance and curr_abundances
rel_abundances = 0 + curr_abundances #want to refer to separate data
#rel_abundances will not update as each taxa loses abundance
for tax in range( n_taxa ) :
current_taxon = taxa_list[ tax ]
current_taxon.calc_p_survival( rel_abundances, env_params, weights )
#loop through individuals within each Taxon and kill them with (p = 1-self.p_survival)
for individual in range(curr_abundances[ tax ]):
if random.random() > current_taxon.p_survival:
curr_abundances[ tax ] -= 1
current_taxon.abundance = curr_abundances[ tax ]
#sample
this_sample = np.zeros(n_taxa)
for tax in range(n_taxa) :
for individual in range(curr_abundances[ tax ]) :
if random.random() < sampling_efficiency :
this_sample[tax] += 1
this_sample = np.round( (this_sample/ sum( this_sample ) ) , decimals=3 )
rel_abundance_over_samples[sample, :] = this_sample
########################
#####End sample loop####
########################
#Save data that changes with each iteration of "w" loop that loops through weights.
#Save relative abundance tables (columns = taxa), rows = samples
df_rel_abundances = pd.DataFrame(rel_abundance_over_samples)
df_rel_abundances.to_csv( (run_folder + folder_name + 'Rel_Abundances.csv') , index=False)
#Save environmental parameters for each sample.
df_env = pd.DataFrame(env_params_over_samples)
df_env.to_csv( (run_folder + folder_name + 'Env_Params.csv') , index = False)
#save weights array
df_weights = pd.DataFrame(weights)
df_weights.to_csv( (run_folder + folder_name + 'Weights.csv') , index = False)
#Save data that did not change as weights varied.
#Save metadata
metadata = [ ["n_samples", "max_abundance", "n_taxa", "t_steps", "t_after_change", "n_env_params", "sampling_efficiency", "random_reciprocity"],
[n_samples, max_abundance, n_taxa, t_steps, t_after_change, n_env_params, sampling_efficiency, random_reciprocity] ]
df_meta = pd.DataFrame(metadata)
df_meta.to_csv( (run_folder + 'metadata.csv') ,index = False)
#Save relationships array
df_relationships = pd.DataFrame(relationships_matrix)
df_relationships.to_csv( (run_folder + 'Relationships_Array.csv'), index = False)
#Save major relationships and environmental preferences and survival constants
#env_prefs_list: columns are environmental variables, rows are taxa.
#major_rel_list: first value is index of partner taxon. Second value is relationship value.
#Second value recorded when Taxon instances initialized to make sure data are identical.
#I am reusing the variable name for this list to make sure it is what is actually being
#used by the instances of Taxon.
surv_c_list = np.zeros((n_taxa, 1))
env_prefs_list = np.zeros((n_taxa, n_env_params))
major_rel_list = np.zeros((n_taxa, n_maj_relationships))
for tax in range(n_taxa):
current_taxon = taxa_list[tax]
surv_c_list[tax] = current_taxon.survival_constant
for rel in range(n_maj_relationships):
major_rel_list[tax][rel] = current_taxon.major_relationships[rel]
for e in range(n_env_params):
env_prefs_list[tax][e] = current_taxon.env_prefs[e]
df_surv_c = pd.DataFrame(surv_c_list)
df_surv_c.to_csv((run_folder + 'Survival_C.csv'), index = False)
df_env_prefs = pd.DataFrame(env_prefs_list)
df_env_prefs.to_csv((run_folder + 'Env_Prefs.csv'), index = False)
df_maj_rel = pd.DataFrame(major_rel_list)
df_maj_rel.to_csv((run_folder + 'Major_Rels.csv'), index = False)
#Make weights loop through a list of lists.
#make sure everything resets at start of weights loop that needs to.