-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathsimpleExample.py
319 lines (194 loc) · 8.36 KB
/
simpleExample.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
#!/usr/bin/env python
# coding: utf-8
# *Note: This file is provided in two formats:
# Python (simpleExample.py) and a Jupyter
# notebook (simpleExample.ipynb). The
# Jupyter notebook opens in a web browser and
# includes plots in an interactive format. To
# open the .ipynb file, run:*
#
# jupyter notebook simpleExample.ipynb
#
# *To run the .py file in iPython at the command line, run:*
#
# ipython
# %run simpleExample.py
# plt.show()
# simpleExample.ipynb
# -------------------
#
# - Bryan Daniels
#
# - Last updated 2022/6/13
#
# - Uses a simple exponential decay example to demonstrate usage of SirIsaac
# In[1]:
import numpy as np
from matplotlib import pyplot as plt
from SirIsaac import fittingProblem
# Load example data
# -----------------
# In the example data file, we have five columns, each with 100 data points, listing:
#
# * Initial condition *x_init*
# * Input condition *tau*
# * Measurement time *t*
# * Measurement value *x*
# * Measurement uncertainty (standard deviation)
# In[2]:
data = np.loadtxt('simpleExample_data.csv',delimiter=',')
# We now put this in a format compatible with SirIsaac. First we make a list of input values (in this case initial conditions):
#
#
# In[3]:
indepParamsList = [ [ expt[0],expt[1] ] for expt in data ]
# In[4]:
indepParamsList[:3]
# Next, we have a corresponding list of data taken at each of those input values, in the format below. In this case, we only have one variable *x*. (Note: In general, multiple timepoints could be also be measured at each input value; in all of our examples, we measure all variables at a single timepoint per input value.)
# In[5]:
# [ {'var1': { time0: ( value, uncertainty ) },
# 'var2': { time0: ( value, uncertainty ) },
# ... },
# {'var1': { time1: ( value, uncertainty ) },
# 'var2': { time1: ( value, uncertainty ) },
# ... },
# ... ]
# In[6]:
sirIsaacData = []
for expt in data:
sirIsaacData.append( { 'x': { expt[2]: ( expt[3], expt[4] ) } } )
# In[7]:
sirIsaacData[:3]
# Finally, SirIsaac will need to know what to call the input and output values. In this case, the input corresponds to the initial value of *x*. The way to indicate this to SirIsaac is by using the name 'x_init', where 'x' is the name of the corresponding variable.
#
# Here we have two inputs and one output:
# In[8]:
outputNames = ['x']
indepParamNames = ['x_init','tau']
# Create SirIsaac FittingProblem
# ------------------------------
# We'll attempt to fit a model in the "continuous time sigmoidal network" (CTSN) class. To do this, we'll create an instance of a CTSNFittingProblem. Here we set up its arguments and create it:
# In[9]:
# complexityList lists which models in the model class may be tested.
# (Note that by default SirIsaac will still stop once 3 models have
# smaller estimated log-likelihood.)
complexityStepsize = 2 # increase complexity with steps of size 2
complexityMax = 25 # don't try models with complexity > 25
complexityList = range(0,complexityMax,complexityStepsize)
# ensGen controls the generation of the initial ensemble of
# parameter starting points.
# (We use a small number for totalSteps here so that the example
# runs faster; a more typical value for totalSteps is 1e3)
totalSteps = 20 #1e3
keepSteps = 10
seeds = (1,1) # use a fixed random seed
ensTemperature = 100.
ensGen = fittingProblem.EnsembleGenerator( totalSteps, keepSteps,
temperature=ensTemperature, seeds=seeds )
# Parameters that control when local fitting stops.
avegtol = 1e-2
maxiter = 100
# priorSigma controls the width of priors on all parameters
priorSigma = 3.
# If you have mpi4py installed, you can run on multiple processors
numprocs = 4 #10
# We'll only use a subset of our data to make the example run faster
N = 20
p = fittingProblem.CTSNFittingProblem( complexityList,
sirIsaacData[:N], indepParamsList=indepParamsList[:N],
outputNames=outputNames, indepParamNames=indepParamNames,
ensGen=ensGen, avegtol=avegtol, maxiter=maxiter,
priorSigma=priorSigma, numprocs=numprocs, verbose=True )
# Run parameter fitting
# ---------------------
# The bulk of computation time is used to fit the parameters of each model to the data. Uncomment the following lines to run the parameter fitting, which takes an hour or two using 4 processors. Or skip ahead to load a version that has already been fit.
# In[16]:
## Uncomment to run parameter fitting.
#p.fitAll()
#
#fittingProblem.save(p,'simpleExample_savedFittingProblem.pkl')
# In[10]:
# Load saved version of fittingProblem that has already been fit.
p = fittingProblem.load('simpleExample_savedFittingProblem.pkl')
# Analyze the selected model
# --------------------------
# Here we plot predicted timecourses from the selected model for the first 10 in-sample initial conditions, using plotBestModelResults:
# In[11]:
plt.figure(figsize=(20,2))
p.plotBestModelResults(plotInitialConditions=True,indices=range(10));
# And now for out-of-sample data:
# In[12]:
plt.figure(figsize=(20,2))
m = p.getBestModel()
m.plotResults(sirIsaacData[20:30],indepParamsList[20:30],
plotInitialConditions=True,plotFittingData=True);
# We can look at the selected model's parameters:
# In[13]:
m = p.getBestModel()
print(m.getParameters())
# The following will use SloppyCell to output a latex file with the ODEs describing the selected model:
# In[14]:
m = p.getBestModel()
fittingProblem.IO.eqns_TeX_file(m.net,filename='simpleExample_selectedModel.tex')
# More details
# ------------
# We can examine the dynamics of the hidden nodes as well using plotResults.
# In[15]:
plt.figure(figsize=(20,4))
m = p.getBestModel()
m.plotResults(p.fittingData[:10],
p.indepParamsList[:10],
plotInitialConditions=True,
plotHiddenNodes=True);
# We have access to raw trajectories using evaluateVec. Here we use this to plot a projection of trajectories in phase space for the first in-sample initial conditions:
# In[17]:
plt.figure(figsize=(4,4))
times = np.linspace(0,1,1000)
xdata = m.evaluateVec(times,'x',p.indepParamsList[0])
X2data = m.evaluateVec(times,'X_2',p.indepParamsList[0])
fittingProblem.Plotting.plot(xdata,X2data)
plt.xlabel('x')
plt.ylabel('X_2');
# We can also look at other models that SirIsaac fit in searching for the best one. In this case, 'Model 4' was selected because it has the largest estimated log-likelihood:
# In[18]:
for name in p.fittingModelNames:
if name in p.logLikelihoodDict.keys():
print('{}: #species = {}, #params = {}, L = {}'.format(name,
len(p.fittingModelDict[name].speciesNames),
p.numParametersDict[name],
p.logLikelihoodDict[name]))
print()
print('Selected model:',p.maxLogLikelihoodName())
# We can also look at predictions of a model with more parameters:
# In[19]:
plt.figure(figsize=(20,2))
m2 = p.fittingModelDict['Model 7']
m2.plotResults(sirIsaacData[:10],indepParamsList[:10],
plotInitialConditions=True,plotFittingData=True);
# Also potentially useful is the Hessian at the best-fit parameters:
# In[20]:
hess = p.HessianDict[p.maxLogLikelihoodName()]
u,singVals,vt = np.linalg.svd( hess )
np.sort(singVals)
# Other details about what happened during parameter fitting are stored within each fittingModel:
# In[21]:
m = p.getBestModel()
print("Acceptance ratio for initial parameter ensemble = {}".format(m.acceptanceRatio))
c = sum(np.array(m.currentResiduals(p.fittingData,p.indepParamsList,includePriors=False))**2)
print("Sum of squared residuals at best-fit (without priors) = {}".format(c))
print("Convergence flags for local fits: {}".format(m.convFlagList))
print("Number of cost evaluations for local fits: {}".format(m.numCostCallsList))
print("Number of gradient evaluations for local fits: {}".format(m.numGradCallsList))
# Finally, since in this case we know the function used to create the data, we can compare:
# In[22]:
plt.figure(figsize=(20,2))
indicesToPlot = range(10)
axArray = p.plotBestModelResults(plotInitialConditions=True,indices=indicesToPlot)
# compare to model that generated the data (see simpleExample_makeData.py)
f = lambda x0,tau,t: x0 * np.exp(-times/tau)
for i,indepParams in enumerate(np.array(indepParamsList)[indicesToPlot]):
times = np.linspace(0,1,100)
x0,tau = indepParams
fittingProblem.Plotting.sca(axArray[0][i])
fittingProblem.Plotting.plot(times,f(x0,tau,times),'k:')
# In[ ]: