|
5 | 5 |
|
6 | 6 | # The Scientific Python Stack
|
7 | 7 |
|
| 8 | +# <headingcell level=3> |
| 9 | + |
| 10 | +# Numpy |
| 11 | + |
8 | 12 | # <markdowncell>
|
9 | 13 |
|
10 | 14 | # The scientific Python stack consists of Numpy, Scipy, and Matplotlib. Numpy brings to Python what Matlab is well known for : strong array manipulation and matrix operations, elementary mathematical functions, random numbers, ...
|
|
31 | 35 |
|
32 | 36 | # <codecell>
|
33 | 37 |
|
34 |
| -print sin(0.5) |
| 38 | +# Another function for sin can be found in the Python math library |
| 39 | +import math |
| 40 | + |
| 41 | +print math.sin(0.5) |
35 | 42 | print np.sin(0.5)
|
36 | 43 |
|
37 | 44 | sin(0.5) == np.sin(0.5)
|
38 | 45 |
|
39 | 46 | # <codecell>
|
40 | 47 |
|
41 |
| -x = [] # x is an empty list ( not a numpy array here ) |
42 |
| -for i in range(10) : |
43 |
| - x.append(float(i) / 10) |
| 48 | +# BUT |
| 49 | +x = np.linspace(0, 2 * np.pi, 30) |
| 50 | +print np.sin(x), "\n" |
| 51 | +print sin(x) |
44 | 52 |
|
45 |
| -# Equivalent numpy array generation |
46 |
| -y = np.arange(0, 1, 0.1) |
| 53 | +# <markdowncell> |
47 | 54 |
|
48 |
| -# These should still be equal |
49 |
| -print sin(y), "\n" |
50 |
| -print np.sin(y) |
| 55 | +# Note how this function doesn't like to be passed lists or arrays. It's thrown us an error, and provided us with a **traceback** - a "chronological" trace of where the error occurred, and what caused it. This one's not particularly long, as the error is immediate ( and not in a subfunction ), but it does tell us which line it's on. It seems that `math.sin()` doesn't like to be passed anything but length-1 arrays - that is, single floats or ints. Not particularly useful when we're trying to be array-oriented, which is what numpy excels at. |
| 56 | +# |
| 57 | +# OK, back to numpy. |
51 | 58 |
|
52 | 59 | # <codecell>
|
53 | 60 |
|
54 | 61 | # Numpy arrays can be easily manipulated
|
55 |
| -blah = np.random.lognormal(0, 1, (5, 3)) |
| 62 | +blah = np.random.lognormal(0, 1, (4, 3)) |
56 | 63 |
|
57 | 64 | print np.dot(blah, blah.T), "\n"
|
58 | 65 | print blah.shape, "\n"
|
|
63 | 70 | # The numpy array is its own type, but it's still a container. It expands on the capabilities of the standard Python list.
|
64 | 71 | # Access is easier though : for multidimensional arrays, you just use a comma per dimension
|
65 | 72 | print type(blah[0, 0])
|
| 73 | +print blah[:, 2::2] |
66 | 74 |
|
67 | 75 | # <codecell>
|
68 | 76 |
|
69 |
| -# The : operator still works great. Standard arithmetic is ELEMENT-wise. Dimensions are broadcast. |
| 77 | +# The : operator is still used. Standard arithmetic is ELEMENT-wise. Dimensions are broadcast. |
70 | 78 | print blah[:, 1] * blah[:, 2] - 2.5
|
| 79 | +print blah.sum(), blah.mean(), blah.min(), blah.max(), blah.var(), blah.cumsum().std() |
| 80 | + |
| 81 | +# <codecell> |
| 82 | + |
| 83 | +# Quick note on interactive environments. If you're in IPython, Spyder, or similar, you can tab-complete : |
| 84 | +blah |
| 85 | + |
| 86 | +# <codecell> |
| 87 | + |
| 88 | +# We can manipulate the shapes of arrays |
| 89 | +print blah.shape |
| 90 | +print blah.ravel().shape # Unravel / flatten the array |
| 91 | +print blah.reshape(2, 6).shape |
| 92 | +print blah.reshape(2, 6).T |
| 93 | + |
| 94 | +# <markdowncell> |
| 95 | + |
| 96 | +# **Linear Algebra** |
| 97 | +# |
| 98 | +# Numpy has a module called `linalg`, which offers things like norms, decompositions, matrix powers, inner and outer products, trace, eigenvalues, etc... |
| 99 | + |
| 100 | +# <codecell> |
| 101 | + |
| 102 | +# Linear algebra |
| 103 | +A = np.random.normal(size=(3, 3)) |
| 104 | +print np.linalg.inv(A), "\n" |
| 105 | + |
| 106 | +# Modules can be imported under their own namespace for short |
| 107 | +import numpy.linalg as lin |
| 108 | +print lin.inv(A), "\n" |
| 109 | +print lin.eigvals(A), "\n" |
| 110 | + |
| 111 | +# If we want to do some real linear algebra, we can skip elementwise syntax by declaring an array as a numpy matrix : |
| 112 | +B = np.matrix(A) |
| 113 | +print B * B, "\n" # now MATRIX multiplication |
| 114 | +print A * A # still ELEMENTWISE multiplication |
| 115 | + |
| 116 | +# <headingcell level=3> |
| 117 | + |
| 118 | +# Matplotlib |
| 119 | + |
| 120 | +# <markdowncell> |
| 121 | + |
| 122 | +# Before we continue onto Scipy, let's look into some plotting. Matplotlib is aimed to feel familiar to Matlab users. |
| 123 | + |
| 124 | +# <codecell> |
| 125 | + |
| 126 | +import matplotlib.pyplot as plt |
| 127 | +figsize(12, 8) |
| 128 | + |
| 129 | +x = np.random.exponential(3, size=10000) |
| 130 | + |
| 131 | +plt.subplot(1, 2, 1) |
| 132 | +plt.plot(x[::100]) |
| 133 | +plt.subplot(1, 2, 2) |
| 134 | +plt.hist(x, 50) |
| 135 | +plt.show() |
| 136 | + |
| 137 | +# Note that, in an interactive setting, you will need to call plt.show(), which I've included above for completeness |
| 138 | +# In IPython using the --pylab inline argument, you don't need it; I'll drop it from here |
| 139 | + |
| 140 | +# <codecell> |
| 141 | + |
| 142 | +# Like Matlab, we can feed style arguments to the plot function |
| 143 | +x = np.linspace(0, 2 * np.pi, 100) |
| 144 | +y = np.sin(x) + np.random.normal(scale = 0.2, size = 100) |
| 145 | + |
| 146 | +# Calling plot() several times without creating a new figure or subplot puts them on the same figure |
| 147 | +plt.plot(x, y, "r.") |
| 148 | +plt.plot(x, np.sin(x), "k-") |
| 149 | + |
| 150 | +# <codecell> |
| 151 | + |
| 152 | +# 50x50x3 matrix on a scatter plot, third dimension plotted as size |
| 153 | +x = np.random.uniform(low = 0, high = 1, size = (50, 50, 3)) |
| 154 | + |
| 155 | +# Transparency ? Sure. |
| 156 | +plt.scatter(x[:, 0], x[:, 1], s=x[:, 2]*500, alpha = 0.3) |
| 157 | + |
| 158 | +# Set axis limits |
| 159 | +plt.xlim([0, 1]) |
| 160 | +plt.ylim([0, 1]) |
| 161 | + |
| 162 | +# <codecell> |
| 163 | + |
| 164 | +# Area plots ? |
| 165 | +x = np.linspace(0, 6 * np.pi, 100) |
| 166 | +y = np.exp(-0.2*x) * np.sin(x) |
| 167 | + |
| 168 | +plt.plot(x, y, "k", linewidth=4) |
| 169 | +plt.fill_between(x, y, y2=0, facecolor="red") |
71 | 170 |
|
72 | 171 | # <codecell>
|
73 | 172 |
|
| 173 | +# Error bars ? Let's say we got ten measurements of the above process, the decaying sine wave |
| 174 | +print y.shape |
| 175 | +measuredy = np.tile(y, (100, 1)) # tile y, 10x1 copies |
| 176 | +print measuredy.shape |
| 177 | + |
| 178 | +# Let's add some heteroscedastic noise to our observations |
| 179 | +measuredy = np.random.normal(loc=y, scale=np.abs(y+0.01)/2., size = measuredy.shape) |
| 180 | + |
| 181 | +# Let's assume we already know our error is Gaussian, for simplicity. Compute mean and std : |
| 182 | +estmean = measuredy.mean(axis=0) |
| 183 | +eststd = measuredy.std(axis=0) |
| 184 | + |
| 185 | +# Plot the estimated mean with one standard deviation error bars, and the real signal |
| 186 | +plt.plot(x, y, "r", linewidth=3) |
| 187 | +plt.errorbar(x, estmean, yerr = eststd) |
| 188 | + |
| 189 | +# <codecell> |
| 190 | + |
| 191 | +# Two dimensional, or image plots ? Perlin noise is fun ! |
| 192 | + |
| 193 | +# Initial 2D noisy signal |
| 194 | +X = np.random.normal(size=(256, 256)) |
| 195 | +plt.figure() |
| 196 | +plt.imshow(X, cmap="gray") |
| 197 | +plt.title("Original 2D Gaussian noise") |
| 198 | + |
| 199 | +# We'll use a 2D Gaussian for smoothing, with identity as covariance matrix |
| 200 | +# We'll grab a scipy function for it for now |
| 201 | +import scipy.ndimage as nd |
| 202 | +plt.figure() |
| 203 | +temp = np.zeros_like(X) |
| 204 | +temp[128, 128] = 1. |
| 205 | +plt.imshow(nd.filters.gaussian_filter(temp, 20, mode="wrap")) |
| 206 | +plt.title("Gaussian kernel") |
| 207 | + |
| 208 | +# Generate the Perlin noise |
| 209 | +perlin = zeros_like(X) |
| 210 | +for i in 2**np.arange(6) : |
| 211 | + perlin += nd.filters.gaussian_filter(X, i, mode="wrap") * i**2 |
| 212 | + |
| 213 | +# and plot it several ways |
| 214 | +plt.figure() |
| 215 | +plt.imshow(perlin) |
| 216 | +plt.title("Perlin Noise") |
| 217 | + |
| 218 | +plt.figure() |
| 219 | +plt.imshow(perlin, cmap="gray") |
| 220 | +plt.contour(perlin, linewidths=3) |
| 221 | +plt.title("Greyscale, with contours") |
| 222 | +#plt.xlim([0, 256]) |
| 223 | +#plt.ylim([0, 256]) |
| 224 | +#plt.axes().set_aspect('equal', 'datalim') |
| 225 | + |
| 226 | +# <codecell> |
| 227 | + |
| 228 | +# And, of course ( stolen from Jake VanderPlas ) |
| 229 | + |
| 230 | +def norm(x, x0, sigma): |
| 231 | + return np.exp(-0.5 * (x - x0) ** 2 / sigma ** 2) |
| 232 | + |
| 233 | +def sigmoid(x, x0, alpha): |
| 234 | + return 1. / (1. + np.exp(- (x - x0) / alpha)) |
| 235 | + |
| 236 | +# define the curves |
| 237 | +x = np.linspace(0, 1, 100) |
| 238 | +y1 = np.sqrt(norm(x, 0.7, 0.05)) + 0.2 * (1.5 - sigmoid(x, 0.8, 0.05)) |
| 239 | + |
| 240 | +y2 = 0.2 * norm(x, 0.5, 0.2) + np.sqrt(norm(x, 0.6, 0.05)) + 0.1 * (1 - sigmoid(x, 0.75, 0.05)) |
| 241 | + |
| 242 | +y3 = 0.05 + 1.4 * norm(x, 0.85, 0.08) |
| 243 | +y3[x > 0.85] = 0.05 + 1.4 * norm(x[x > 0.85], 0.85, 0.3) |
| 244 | + |
| 245 | + |
| 246 | +with plt.xkcd() : |
| 247 | + plt.plot(x, y1, c='gray') |
| 248 | + plt.plot(x, y2, c='blue') |
| 249 | + plt.plot(x, y3, c='red') |
| 250 | + |
| 251 | + plt.text(0.3, -0.1, "Yard") |
| 252 | + plt.text(0.5, -0.1, "Steps") |
| 253 | + plt.text(0.7, -0.1, "Door") |
| 254 | + plt.text(0.9, -0.1, "Inside") |
| 255 | + |
| 256 | + plt.text(0.05, 1.1, "fear that\nthere's\nsomething\nbehind me") |
| 257 | + plt.plot([0.15, 0.2], [1.0, 0.2], '-k', lw=0.5) |
| 258 | + |
| 259 | + plt.text(0.25, 0.8, "forward\nspeed") |
| 260 | + plt.plot([0.32, 0.35], [0.75, 0.35], '-k', lw=0.5) |
| 261 | + |
| 262 | + plt.text(0.9, 0.4, "embarrassment") |
| 263 | + plt.plot([1.0, 0.8], [0.55, 1.05], '-k', lw=0.5) |
| 264 | + |
| 265 | + plt.title("Walking back to my\nfront door at night:") |
| 266 | + |
| 267 | + plt.xlim([0, 1]) |
| 268 | + plt.ylim([0, 1.5]) |
| 269 | + |
| 270 | +# <headingcell level=3> |
| 271 | + |
| 272 | +# Scipy |
| 273 | + |
| 274 | +# <markdowncell> |
| 275 | + |
| 276 | +# Scipy does all sorts of awesome stuff : integration, interpolation, optimisation, FFTs, signal processing, more linear algebra, stats, image processing, ... |
| 277 | + |
| 278 | +# <codecell> |
| 279 | + |
| 280 | +import scipy as sp |
| 281 | +import scipy.spatial as spatial |
| 282 | +from scipy.integrate import odeint |
| 283 | +from scipy.optimize import minimize |
| 284 | +from scipy.signal import detrend |
| 285 | +import scipy.stats as st |
| 286 | + |
| 287 | +# Convex hulls |
| 288 | +ax = plt.axes() |
| 289 | +points = np.random.normal(0.5, 0.2, size=(20,2)) |
| 290 | +plt.scatter(points[:, 0], points[:, 1], s=200, c="r") |
| 291 | +hull = spatial.ConvexHull(points) |
| 292 | +for i in hull.simplices : |
| 293 | + plt.plot(points[i, 0], points[i, 1], "k", linewidth=4) |
| 294 | +spatial.voronoi_plot_2d(spatial.Voronoi(points), ax) |
| 295 | +plt.title("Convex Hull and Voronoi Tessellation") |
| 296 | + |
| 297 | + |
| 298 | +# Ordinary Differential Equations |
| 299 | +r0 = 16. |
| 300 | +gamma = .5 |
| 301 | +t = np.linspace(0, 8, 200) |
| 302 | +def SIR(y, t) : |
| 303 | + S = - r0 * gamma * y[0] * y[1] |
| 304 | + I = r0 * gamma * y[0] * y[1] - gamma * y[1] |
| 305 | + R = gamma * y[1] |
| 306 | + return [S, I, R] |
| 307 | +solution = odeint(SIR, [0.99, .01, 0.], t) |
| 308 | + |
| 309 | +plt.figure() |
| 310 | +plt.plot(t, solution, linewidth=3) |
| 311 | +plt.title("Measles, Single Epidemic") |
| 312 | +plt.xlabel("Time (weeks)") |
| 313 | +plt.ylabel("Proportion of Population") |
| 314 | +plt.legend(["Susceptible", "Infected", "Recovered"]) |
| 315 | + |
| 316 | + |
| 317 | +# Signal Processing - create trending signal, detrend, FFT |
| 318 | +x = np.linspace(0, 1, 300) |
| 319 | +trend = np.zeros_like(x) |
| 320 | +trend[100:200] = np.linspace(0, 10, 100) |
| 321 | +trend[200:] = np.linspace(10, -20, 100) |
| 322 | +y = np.random.normal(loc = 3*np.sin(2*np.pi*20*x)) + trend # Signal is a sine wave with noise and trend |
| 323 | +yt = detrend(y, bp = [100, 200]) # detrend, giving break points |
| 324 | +Yfft = sp.fftpack.rfft(yt) # Calculate FFT |
| 325 | +freqs = sp.fftpack.rfftfreq(len(yt), x[1] - x[0]) # Frequencies of the FFT |
| 326 | + |
| 327 | +plt.figure() |
| 328 | +plt.subplot(311) |
| 329 | +plt.title("Detrending") |
| 330 | +plt.plot(x, y, linewidth=2) |
| 331 | +plt.plot(x, trend, linewidth=4) |
| 332 | +plt.subplot(312) |
| 333 | +plt.plot(x, yt, linewidth=2) |
| 334 | +plt.subplot(313) |
| 335 | +plt.title("FFT") |
| 336 | +plt.plot(freqs, (np.abs(Yfft)**2), linewidth=2) |
| 337 | + |
| 338 | + |
| 339 | +# Distributions |
| 340 | +plt.figure() |
| 341 | +plt.subplot(131) |
| 342 | +plt.title("Normal PDF") |
| 343 | +plt.plot(np.arange(-5, 5, 0.1), st.norm.pdf(np.arange(-5, 5, 0.1), 0, 1), linewidth=2) |
| 344 | +plt.subplot(132) |
| 345 | +plt.title("Beta CDF") |
| 346 | +plt.plot(st.beta.cdf(np.linspace(0, 1, 100), 0.5, 0.5), linewidth=2) |
| 347 | +plt.subplot(133) |
| 348 | +plt.title("Erlang PDF") |
| 349 | +plt.plot(np.linspace(0, 10, 100), st.erlang.pdf(np.linspace(0, 10, 100), 2, loc=0), linewidth=2) |
| 350 | + |
| 351 | + |
| 352 | +# Statistical Tests |
| 353 | +a = np.random.normal(0, 1.5, size=1000) |
| 354 | +b = np.random.normal(.2, 1, size=1000) |
| 355 | +plt.figure() |
| 356 | +plt.hist(a, 30) |
| 357 | +plt.hist(b, 30) |
| 358 | +plt.title("p = " + str(st.ttest_ind(a, b)[1])) |
| 359 | + |
| 360 | +# <markdowncell> |
| 361 | + |
| 362 | +# There's a lot more you can do with `numpy`, `scipy`, and `matplotlib`. The documentation's pretty good, so look around for what you're after - it's probably already been implemented. |
74 | 363 |
|
0 commit comments