ucberkeley
diff --git a/‎Makefile
Lines changed: 10 additions & 0 deletions b/‎Makefile
Lines changed: 10 additions & 0 deletions
diff --git a/‎calc.py
Lines changed: 9 additions & 0 deletions b/‎calc.py
Lines changed: 9 additions & 0 deletions
diff --git a/‎intro.html
Lines changed: 534 additions & 0 deletions b/‎intro.html
Lines changed: 534 additions & 0 deletions
diff --git a/‎intro.md
Lines changed: 686 additions & 0 deletions b/‎intro.md
Lines changed: 686 additions & 0 deletions
diff --git a/‎intro_slides.html
Lines changed: 596 additions & 0 deletions b/‎intro_slides.html
Lines changed: 596 additions & 0 deletions
diff --git a/‎job.sh
Lines changed: 16 additions & 0 deletions b/‎job.sh
Lines changed: 16 additions & 0 deletions
diff --git a/‎parallel-multi.R
Lines changed: 27 additions & 0 deletions b/‎parallel-multi.R
Lines changed: 27 additions & 0 deletions
diff --git a/‎parallel-one.R
Lines changed: 18 additions & 0 deletions b/‎parallel-one.R
Lines changed: 18 additions & 0 deletions
diff --git a/‎parallel.py
Lines changed: 39 additions & 0 deletions b/‎parallel.py
Lines changed: 39 additions & 0 deletions
diff --git a/‎savioOverview.jpeg
47.3 KB b/‎savioOverview.jpeg
47.3 KB
@@ -0,0 +1,10 @@
+all: intro.html intro_slides.html
+
+intro.html: intro.md
+	pandoc -s -o intro.html intro.md
+
+intro_slides.html: intro.md
+	pandoc -s --webtex -t slidy -o intro_slides.html intro.md
+
+clean:
+	rm -rf intro.html
@@ -0,0 +1,9 @@
+import numpy as np
+import time
+n = 10000
+x = np.random.normal(0, 1, size=(n, n))
+print(time.time())
+x = x.T.dot(x)
+print(time.time())
+U = np.linalg.cholesky(x)
+print(time.time())
@@ -0,0 +1,16 @@
+#!/bin/bash
+# Job name:
+#SBATCH --job-name=test
+#
+# Account:
+#SBATCH --account=co_stat
+#
+# Partition:
+#SBATCH --partition=savio2
+#
+# Wall clock limit (30 seconds here):
+#SBATCH --time=00:00:30
+#
+## Command(s) to run:
+module load python/3.2.3 numpy
+python3 calc.py >& calc.out
@@ -0,0 +1,27 @@
+library(doMPI)
+
+cl = startMPIcluster()  # by default will start one fewer slave
+registerDoMPI(cl)
+clusterSize(cl) # just to check
+
+dat <- read.csv('/global/scratch/paciorek/bayArea.csv', header = FALSE,
+                stringsAsFactors = FALSE)
+names(dat)[16:18] <- c('delay', 'origin', 'dest')
+table(dat$dest)
+
+destVals <- unique(dat$dest)
+
+# restrict to only columns we need to reduce copying time
+dat2 <- subset(dat, select = c('delay', 'origin', 'dest'))
+
+# some overhead in copying 'dat2' to worker processes...
+results <- foreach(destVal = destVals) %dopar% {
+    sub <- subset(dat2, dest == destVal)
+    summary(sub$delay)
+}
+
+
+results
+
+closeCluster(cl)
+mpi.quit()
@@ -0,0 +1,18 @@
+library(doParallel)
+
+nCores <- as.numeric(Sys.getenv('SLURM_CPUS_ON_NODE'))
+registerDoParallel(nCores)
+
+dat <- read.csv('/global/scratch/paciorek/bayArea.csv', header = FALSE,
+                stringsAsFactors = FALSE)
+names(dat)[16:18] <- c('delay', 'origin', 'dest')
+table(dat$dest)
+
+destVals <- unique(dat$dest)
+
+results <- foreach(destVal = destVals) %dopar% {
+    sub <- subset(dat, dest == destVal)
+    summary(sub$delay)
+}
+
+results
@@ -0,0 +1,39 @@
+from IPython.parallel import Client
+c = Client()
+c.ids
+
+dview = c[:]
+dview.block = True
+dview.apply(lambda : "Hello, World")
+
+lview = c.load_balanced_view()
+lview.block = True
+
+import pandas
+dat = pandas.read_csv('bayArea.csv', header = None)
+dat.columns = ('Year','Month','DayofMonth','DayOfWeek','DepTime','CRSDepTime','ArrTime','CRSArrTime','UniqueCarrier','FlightNum','TailNum','ActualElapsedTime','CRSElapsedTime','AirTime','ArrDelay','DepDelay','Origin','Dest','Distance','TaxiIn','TaxiOut','Cancelled','CancellationCode','Diverted','CarrierDelay','WeatherDelay','NASDelay','SecurityDelay','LateAircraftDelay')
+
+dview.execute('import statsmodels.api as sm')
+
+dat2 = dat.loc[:, ('DepDelay','Year','Dest','Origin')]
+dests = dat2.Dest.unique()
+
+mydict = dict(dat2 = dat2, dests = dests)
+dview.push(mydict)
+
+def f(id):
+    sub = dat2.loc[dat2.Dest == dests[id],:]
+    sub = sm.add_constant(sub)
+    model = sm.OLS(sub.DepDelay, sub.loc[:,('const','Year')])
+    results = model.fit()
+    return results.params
+
+import time
+time.time()
+parallel_result = lview.map(f, range(len(dests)))
+#result = map(f, range(len(dests)))
+time.time()
+
+# some NaN values because all 'Year' values are the same for some destinations
+
+parallel_result