Skip to content
This repository has been archived by the owner on May 28, 2020. It is now read-only.

Commit

Permalink
New Test Run Driver API_ Not all the way there
Browse files Browse the repository at this point in the history
  • Loading branch information
OSUmageed committed Apr 29, 2019
1 parent dc33a56 commit fbba956
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 23 deletions.
38 changes: 34 additions & 4 deletions src/shellLaunch/LANL_kodiak/hAffinity.sh → src/shellLaunch/Slurm/hPerfTest.sh
100755 → 100644
Original file line number Diff line number Diff line change
@@ -1,5 +1,34 @@
#!/bin/bash

nxs=
tpbs=
gpuas=

affinity()
{
export nxs="5e6 2e7 4e7 6e7"
export tpbs="64 128 256 512 768 1024"
gStep=20
gEnd=$(( 10*$gStep ))
export gpuas=$(seq 0 $gStep $gEnd)
}

sweep() {
ni=""
a=0
for i in $(seq 10); do
a=$(( $a+1e7 + 1e6*($i-1) ))
ni+=" "
echo $a
done
export nxs=ni
export tpbs=$(seq 128 128 1024)
gStart=100
gStep=5
gEnd=$(( 10*$gStep+$gStart ))
export gpuas=$(seq $gStart $gStep $gEnd)
}

## SLURM NOT GRID
RPATH=$(python3 -c "import os; print(os.path.realpath('$0'))")
THISPATH=$(dirname $RPATH)
Expand All @@ -11,15 +40,15 @@ LOGPATH="${SRCPATH}/rsltlog"
tfile="${LOGPATH}/otime.dat"
opath="${SRCPATH}/rslts"
nprocs=$(( $(nproc)/2 ))
npr=$(( $SLURM_NNODES*$nprocs ))
nper=${5-:$SLURM_NNODES}
npr=$(( $nper*$nprocs ))

gStep=20
gEnd=$(( 10*$gStep ))
bindir=${SRCPATH}/bin
testdir=${SRCPATH}/oneD/tests

nxs="5e6 2e7 4e7 6e7"
tpbs="64 128 256 512 768 1024"
affinity

mkdir -p $opath
mkdir -p $LOGPATH
Expand All @@ -32,6 +61,7 @@ fi
eq=$1
tf=$2
sc=$3
rnode=${4-:0}
hname=$(hostname)

confile="${testdir}/${eq}Test.json"
Expand All @@ -51,7 +81,7 @@ do
lx=$(( $nx/10000 + 1 ))
S0=$SECONDS

srun -N $SLURM_NNODES -n $npr $execfile $sc $confile $opath tpb $tpb gpuA $g nX $nx lx $lx tf $tf 2>&1 | tee -a $logf
srun -N $SLURM_NNODES -n $npr -r $rnode $execfile $sc $confile $opath tpb $tpb gpuA $g nX $nx lx $lx tf $tf 2>&1 | tee -a $logf

echo --------------------------- | tee -a $logf
echo -e "len|\t eq|\t sch|\t tpb|\t gpuA|\t nX" | tee -a $logf
Expand Down
4 changes: 0 additions & 4 deletions src/shellLaunch/LANL_kodiak/hSweep.sh → src/shellLaunch/Slurm/hSweep.sh
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,6 @@ npr=$(( $SLURM_NNODES*$nprocs ))
rm -f $tfile
rm -rf $opath

eqs=(euler heat)
tfs=(0.06 1.2)
nxStart=100000

for ix in $(seq 2)
do
eq=$eqs[$ix]
Expand Down
59 changes: 44 additions & 15 deletions src/shellLaunch/driver
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@ import sys
import subprocess as sp
import shlex
import time
import multiprocessing as mp

helpstr = '''
USAGE: ./driver [RUNTYPE] <OPTIONS>
RUNTYPE: "affinity (screening run) or sweep (full experiment)
RUNTYPE: "affinity (screening run)
sweep (full experiment)
OPTIONS:
FORMAT: var value
Expand All @@ -20,9 +22,47 @@ OPTIONS:
'''

'''
RUNTYPE: Affinity or sweep
RUNSCRIPT: Realpath to shell runner
RUNDETAILS: t_Final for heat and euler by type
NPER: Number of nodes per experiment. If>0 parallel
'''

rundict = {"affinity": {"heat": 2.0, "euler": 0.08},
"sweep": {"heat": 1.2, "euler": 0.06} }

NPER = 2

def run_sweep(rs, rt, rd, e, s, r=0):
runcmd=[rs, rt, e, rundetails[e], s, r]
if NPER: runcmd.append(NPER)
runcmd=[str(r) for r in runcmd]
sp.call(runcmd)

def sweep_serial(rs, rd, rt, eq, sch):
NPER = False
tfirst=time.time()
for e in eq:
te=time.time()
for s in sch:
tbf=time.time()
run_sweep(rs, rt, rd, e, s)
taf=time.time()
print(rt, e, s, "A Completed in ", taf-tbf)
print(rd, e, sch, " AA Completed in ", taf-te)
print(rd, eq, sch, " AAA combos Completed in ", taf-tfirst)

# Can run in parallel with multiprocess
# nper is nodes per eq, scheme combo
def sweep_parallel(rs, rd, rt, eq, sch):
combs = [(e, s) for e in eq for s in sch]
ncomb = len(combs)
args = [(rs, rt, rd, combs[k][0], combs[k][1], k*NPER) for k in range(ncomb)]

p = mp.Pool(ncomb)
p.map(run_sweep, **args)

def inputError(strer):
print("\n"+strer+"\n")
print(helpstr)
Expand All @@ -34,7 +74,7 @@ if __name__ == "__main__":

args=sys.argv
runtype=args[1]
runscript=op.join(op.join(tpath, "LANL_kodiak"), "h"+runtype.title()+".sh")
runscript=op.join(op.join(tpath, "Slurm"), "hPerfTest.sh")
rundetails=rundict[runtype]
eq=rundetails.keys()
sch=["S", "C"]
Expand All @@ -52,16 +92,5 @@ if __name__ == "__main__":
else:
inputError("a - " + a + ", is not a valid option")

tfirst=time.time()
for e in eq:
te=time.time()
for s in sch:
tbf=time.time()
runcmd=[runscript, e, rundetails[e], s]
runcmd=[str(r) for r in runcmd]
print(runcmd)
sp.call(runcmd)
taf=time.time()
print(runtype, e, s, " Completed in ", taf-tbf)
print(runtype, e, sch, " ALL Completed in ", taf-te)
print(runtype, eq, sch, " ALL combos Completed in ", taf-tfirst)
sweep_serial(runscript, runtype, rundetails, eq, sch)
sweep_parallel(runscript, runtype, rundetails, eq, sch)

0 comments on commit fbba956

Please sign in to comment.