Skip to content

Commit c7b24c0

Browse files
committed
added materials from 2017,
with a few initial edits on interactive jobs
1 parent 1c2a7b7 commit c7b24c0

File tree

10 files changed

+1935
-0
lines changed

10 files changed

+1935
-0
lines changed

Makefile

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
all: intro.html intro_slides.html
2+
3+
intro.html: intro.md
4+
pandoc -s -o intro.html intro.md
5+
6+
intro_slides.html: intro.md
7+
pandoc -s --webtex -t slidy -o intro_slides.html intro.md
8+
9+
clean:
10+
rm -rf intro.html

calc.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
import numpy as np
2+
import time
3+
n = 10000
4+
x = np.random.normal(0, 1, size=(n, n))
5+
print(time.time())
6+
x = x.T.dot(x)
7+
print(time.time())
8+
U = np.linalg.cholesky(x)
9+
print(time.time())

intro.html

Lines changed: 534 additions & 0 deletions
Large diffs are not rendered by default.

intro.md

Lines changed: 686 additions & 0 deletions
Large diffs are not rendered by default.

intro_slides.html

Lines changed: 596 additions & 0 deletions
Large diffs are not rendered by default.

job.sh

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#!/bin/bash
2+
# Job name:
3+
#SBATCH --job-name=test
4+
#
5+
# Account:
6+
#SBATCH --account=co_stat
7+
#
8+
# Partition:
9+
#SBATCH --partition=savio2
10+
#
11+
# Wall clock limit (30 seconds here):
12+
#SBATCH --time=00:00:30
13+
#
14+
## Command(s) to run:
15+
module load python/3.2.3 numpy
16+
python3 calc.py >& calc.out

parallel-multi.R

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
library(doMPI)
2+
3+
cl = startMPIcluster() # by default will start one fewer slave
4+
registerDoMPI(cl)
5+
clusterSize(cl) # just to check
6+
7+
dat <- read.csv('/global/scratch/paciorek/bayArea.csv', header = FALSE,
8+
stringsAsFactors = FALSE)
9+
names(dat)[16:18] <- c('delay', 'origin', 'dest')
10+
table(dat$dest)
11+
12+
destVals <- unique(dat$dest)
13+
14+
# restrict to only columns we need to reduce copying time
15+
dat2 <- subset(dat, select = c('delay', 'origin', 'dest'))
16+
17+
# some overhead in copying 'dat2' to worker processes...
18+
results <- foreach(destVal = destVals) %dopar% {
19+
sub <- subset(dat2, dest == destVal)
20+
summary(sub$delay)
21+
}
22+
23+
24+
results
25+
26+
closeCluster(cl)
27+
mpi.quit()

parallel-one.R

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
library(doParallel)
2+
3+
nCores <- as.numeric(Sys.getenv('SLURM_CPUS_ON_NODE'))
4+
registerDoParallel(nCores)
5+
6+
dat <- read.csv('/global/scratch/paciorek/bayArea.csv', header = FALSE,
7+
stringsAsFactors = FALSE)
8+
names(dat)[16:18] <- c('delay', 'origin', 'dest')
9+
table(dat$dest)
10+
11+
destVals <- unique(dat$dest)
12+
13+
results <- foreach(destVal = destVals) %dopar% {
14+
sub <- subset(dat, dest == destVal)
15+
summary(sub$delay)
16+
}
17+
18+
results

parallel.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
from IPython.parallel import Client
2+
c = Client()
3+
c.ids
4+
5+
dview = c[:]
6+
dview.block = True
7+
dview.apply(lambda : "Hello, World")
8+
9+
lview = c.load_balanced_view()
10+
lview.block = True
11+
12+
import pandas
13+
dat = pandas.read_csv('bayArea.csv', header = None)
14+
dat.columns = ('Year','Month','DayofMonth','DayOfWeek','DepTime','CRSDepTime','ArrTime','CRSArrTime','UniqueCarrier','FlightNum','TailNum','ActualElapsedTime','CRSElapsedTime','AirTime','ArrDelay','DepDelay','Origin','Dest','Distance','TaxiIn','TaxiOut','Cancelled','CancellationCode','Diverted','CarrierDelay','WeatherDelay','NASDelay','SecurityDelay','LateAircraftDelay')
15+
16+
dview.execute('import statsmodels.api as sm')
17+
18+
dat2 = dat.loc[:, ('DepDelay','Year','Dest','Origin')]
19+
dests = dat2.Dest.unique()
20+
21+
mydict = dict(dat2 = dat2, dests = dests)
22+
dview.push(mydict)
23+
24+
def f(id):
25+
sub = dat2.loc[dat2.Dest == dests[id],:]
26+
sub = sm.add_constant(sub)
27+
model = sm.OLS(sub.DepDelay, sub.loc[:,('const','Year')])
28+
results = model.fit()
29+
return results.params
30+
31+
import time
32+
time.time()
33+
parallel_result = lview.map(f, range(len(dests)))
34+
#result = map(f, range(len(dests)))
35+
time.time()
36+
37+
# some NaN values because all 'Year' values are the same for some destinations
38+
39+
parallel_result

savioOverview.jpeg

47.3 KB
Loading

0 commit comments

Comments
 (0)