diff --git a/src/shellLaunch/LANL_kodiak/hAffinity.sh b/src/shellLaunch/Slurm/hPerfTest.sh old mode 100755 new mode 100644 similarity index 68% rename from src/shellLaunch/LANL_kodiak/hAffinity.sh rename to src/shellLaunch/Slurm/hPerfTest.sh index a6e1736..b2713a2 --- a/src/shellLaunch/LANL_kodiak/hAffinity.sh +++ b/src/shellLaunch/Slurm/hPerfTest.sh @@ -1,5 +1,34 @@ #!/bin/bash +nxs= +tpbs= +gpuas= + +affinity() +{ + export nxs="5e6 2e7 4e7 6e7" + export tpbs="64 128 256 512 768 1024" + gStep=20 + gEnd=$(( 10*$gStep )) + export gpuas=$(seq 0 $gStep $gEnd) +} + +sweep() { + ni="" + a=0 + for i in $(seq 10); do + a=$(( $a+1e7 + 1e6*($i-1) )) + ni+=" " + echo $a + done + export nxs=ni + export tpbs=$(seq 128 128 1024) + gStart=100 + gStep=5 + gEnd=$(( 10*$gStep+$gStart )) + export gpuas=$(seq $gStart $gStep $gEnd) +} + ## SLURM NOT GRID RPATH=$(python3 -c "import os; print(os.path.realpath('$0'))") THISPATH=$(dirname $RPATH) @@ -11,15 +40,15 @@ LOGPATH="${SRCPATH}/rsltlog" tfile="${LOGPATH}/otime.dat" opath="${SRCPATH}/rslts" nprocs=$(( $(nproc)/2 )) -npr=$(( $SLURM_NNODES*$nprocs )) +nper=${5-:$SLURM_NNODES} +npr=$(( $nper*$nprocs )) gStep=20 gEnd=$(( 10*$gStep )) bindir=${SRCPATH}/bin testdir=${SRCPATH}/oneD/tests -nxs="5e6 2e7 4e7 6e7" -tpbs="64 128 256 512 768 1024" +affinity mkdir -p $opath mkdir -p $LOGPATH @@ -32,6 +61,7 @@ fi eq=$1 tf=$2 sc=$3 +rnode=${4-:0} hname=$(hostname) confile="${testdir}/${eq}Test.json" @@ -51,7 +81,7 @@ do lx=$(( $nx/10000 + 1 )) S0=$SECONDS - srun -N $SLURM_NNODES -n $npr $execfile $sc $confile $opath tpb $tpb gpuA $g nX $nx lx $lx tf $tf 2>&1 | tee -a $logf + srun -N $SLURM_NNODES -n $npr -r $rnode $execfile $sc $confile $opath tpb $tpb gpuA $g nX $nx lx $lx tf $tf 2>&1 | tee -a $logf echo --------------------------- | tee -a $logf echo -e "len|\t eq|\t sch|\t tpb|\t gpuA|\t nX" | tee -a $logf diff --git a/src/shellLaunch/LANL_kodiak/hSweep.sh b/src/shellLaunch/Slurm/hSweep.sh old mode 100755 new mode 100644 similarity index 96% rename from src/shellLaunch/LANL_kodiak/hSweep.sh rename to src/shellLaunch/Slurm/hSweep.sh index 4570506..6923eb9 --- a/src/shellLaunch/LANL_kodiak/hSweep.sh +++ b/src/shellLaunch/Slurm/hSweep.sh @@ -19,10 +19,6 @@ npr=$(( $SLURM_NNODES*$nprocs )) rm -f $tfile rm -rf $opath -eqs=(euler heat) -tfs=(0.06 1.2) -nxStart=100000 - for ix in $(seq 2) do eq=$eqs[$ix] diff --git a/src/shellLaunch/driver b/src/shellLaunch/driver index 3b2a80a..3517022 100755 --- a/src/shellLaunch/driver +++ b/src/shellLaunch/driver @@ -6,11 +6,13 @@ import sys import subprocess as sp import shlex import time +import multiprocessing as mp helpstr = ''' USAGE: ./driver [RUNTYPE] -RUNTYPE: "affinity (screening run) or sweep (full experiment) +RUNTYPE: "affinity (screening run) + sweep (full experiment) OPTIONS: FORMAT: var value @@ -20,9 +22,47 @@ OPTIONS: ''' +''' + RUNTYPE: Affinity or sweep + RUNSCRIPT: Realpath to shell runner + RUNDETAILS: t_Final for heat and euler by type + NPER: Number of nodes per experiment. If>0 parallel +''' + rundict = {"affinity": {"heat": 2.0, "euler": 0.08}, "sweep": {"heat": 1.2, "euler": 0.06} } +NPER = 2 + +def run_sweep(rs, rt, rd, e, s, r=0): + runcmd=[rs, rt, e, rundetails[e], s, r] + if NPER: runcmd.append(NPER) + runcmd=[str(r) for r in runcmd] + sp.call(runcmd) + +def sweep_serial(rs, rd, rt, eq, sch): + NPER = False + tfirst=time.time() + for e in eq: + te=time.time() + for s in sch: + tbf=time.time() + run_sweep(rs, rt, rd, e, s) + taf=time.time() + print(rt, e, s, "A Completed in ", taf-tbf) + print(rd, e, sch, " AA Completed in ", taf-te) + print(rd, eq, sch, " AAA combos Completed in ", taf-tfirst) + +# Can run in parallel with multiprocess +# nper is nodes per eq, scheme combo +def sweep_parallel(rs, rd, rt, eq, sch): + combs = [(e, s) for e in eq for s in sch] + ncomb = len(combs) + args = [(rs, rt, rd, combs[k][0], combs[k][1], k*NPER) for k in range(ncomb)] + + p = mp.Pool(ncomb) + p.map(run_sweep, **args) + def inputError(strer): print("\n"+strer+"\n") print(helpstr) @@ -34,7 +74,7 @@ if __name__ == "__main__": args=sys.argv runtype=args[1] - runscript=op.join(op.join(tpath, "LANL_kodiak"), "h"+runtype.title()+".sh") + runscript=op.join(op.join(tpath, "Slurm"), "hPerfTest.sh") rundetails=rundict[runtype] eq=rundetails.keys() sch=["S", "C"] @@ -52,16 +92,5 @@ if __name__ == "__main__": else: inputError("a - " + a + ", is not a valid option") - tfirst=time.time() - for e in eq: - te=time.time() - for s in sch: - tbf=time.time() - runcmd=[runscript, e, rundetails[e], s] - runcmd=[str(r) for r in runcmd] - print(runcmd) - sp.call(runcmd) - taf=time.time() - print(runtype, e, s, " Completed in ", taf-tbf) - print(runtype, e, sch, " ALL Completed in ", taf-te) - print(runtype, eq, sch, " ALL combos Completed in ", taf-tfirst) + sweep_serial(runscript, runtype, rundetails, eq, sch) + sweep_parallel(runscript, runtype, rundetails, eq, sch)