Skip to content
This repository has been archived by the owner on May 28, 2020. It is now read-only.

Commit

Permalink
Fixed Bugs with slurm hSweep. Works and hSweep tests started. Updated…
Browse files Browse the repository at this point in the history
… gitignore for results and logs. Found bug in cudaDeviceGetByPCIBusID (memory leak). Contacted nvidia. Patch to skip over this method applied to gpuDetector. Remove with future corrected release.
  • Loading branch information
OSUmageed committed Apr 29, 2019
1 parent fbba956 commit 4bbebba
Show file tree
Hide file tree
Showing 5 changed files with 41 additions and 28 deletions.
5 changes: 3 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,11 @@ cubin/
/misc/
.vscode/
.idea/
*/*log/
temp*
*.pdf
src/*/trslt/*.json
src/*/rslts/*.json
src/trslt/
src/rslts/
src/*/tests/testResult/
src/*/utilities/js*
src/config.mk
Expand Down
Empty file modified src/shellLaunch/README.md
100755 → 100644
Empty file.
39 changes: 22 additions & 17 deletions src/shellLaunch/Slurm/hPerfTest.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,18 @@ affinity()
sweep() {
ni=""
a=0
nb=$(printf "%.f" "1e6")
for i in $(seq 10); do
a=$(( $a+1e7 + 1e6*($i-1) ))
ni+=" "
a=$(( $nb*(9+$i)+$a ))
ni+="$a "
echo $a
done
export nxs=ni
export nxs=$ni
export tpbs=$(seq 128 128 1024)
gStart=100
gStep=5
gEnd=$(( 10*$gStep+$gStart ))
export gpuas=$(seq $gStart $gStep $gEnd)
gEnd=$(( 10*$gStep+$gStart ))
export gpuas=$(seq $gStart $gStep $gEnd)
}

## SLURM NOT GRID
Expand All @@ -40,15 +41,15 @@ LOGPATH="${SRCPATH}/rsltlog"
tfile="${LOGPATH}/otime.dat"
opath="${SRCPATH}/rslts"
nprocs=$(( $(nproc)/2 ))
nper=${5-:$SLURM_NNODES}
nper=${6-:$SLURM_NNODES}
npr=$(( $nper*$nprocs ))

gStep=20
gEnd=$(( 10*$gStep ))
bindir=${SRCPATH}/bin
testdir=${SRCPATH}/oneD/tests

affinity
$1

mkdir -p $opath
mkdir -p $LOGPATH
Expand All @@ -58,29 +59,33 @@ if [[ $# -lt 3 ]]; then
return 0
fi

eq=$1
tf=$2
sc=$3
rnode=${4-:0}
eq=$2
tf=$3
sc=$4
rnode=${5-:0}
hname=$(hostname)
hnm=${hname%%.*}

confile="${testdir}/${eq}Test.json"
execfile="${bindir}/${eq}"
logf="${LOGPATH}/${eq}_${sc}_AFF_${hname}.log"
logf="${LOGPATH}/${eq}_${sc}_AFF_${hnm}.log"
rm -f $logf
touch $logf

echo $tpbs $nxs $gpuas $logf

for tpb in $tpbs
do
for nxi in $nxs
for nx in $nxs
do
snx0=$SECONDS
printf -v nx "%.f" "$nxi"
for g in $(seq 0 $gStep $gEnd)
for g in $gpuas
do
echo -------- START ------------ | tee -a $logf
lx=$(( $nx/10000 + 1 ))
S0=$SECONDS


echo "srun -N $SLURM_NNODES -n $npr -r $rnode $execfile $sc $confile $opath tpb $tpb gpuA $g nX $nx lx $lx tf $tf"
srun -N $SLURM_NNODES -n $npr -r $rnode $execfile $sc $confile $opath tpb $tpb gpuA $g nX $nx lx $lx tf $tf 2>&1 | tee -a $logf

echo --------------------------- | tee -a $logf
Expand All @@ -92,6 +97,6 @@ do
snx1=$(( $SECONDS-$snx0 ))
echo All together $snx1 secs | tee -a $logf
snxout=$(( $snx1/60 ))
echo $eq "|" $sc "|" $tpb "|" $nxi :: $snxout >> $tfile
echo $eq "|" $sc "|" $tpb "|" $nx :: $snxout >> $tfile
done
done
9 changes: 5 additions & 4 deletions src/shellLaunch/driver
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,10 @@ def run_sweep(rs, rt, rd, e, s, r=0):
runcmd=[rs, rt, e, rundetails[e], s, r]
if NPER: runcmd.append(NPER)
runcmd=[str(r) for r in runcmd]
print(runcmd)
sp.call(runcmd)

def sweep_serial(rs, rd, rt, eq, sch):
def sweep_serial(rs, rt, rd, eq, sch):
NPER = False
tfirst=time.time()
for e in eq:
Expand All @@ -55,7 +56,7 @@ def sweep_serial(rs, rd, rt, eq, sch):

# Can run in parallel with multiprocess
# nper is nodes per eq, scheme combo
def sweep_parallel(rs, rd, rt, eq, sch):
def sweep_parallel(rs, rt, rd, eq, sch):
combs = [(e, s) for e in eq for s in sch]
ncomb = len(combs)
args = [(rs, rt, rd, combs[k][0], combs[k][1], k*NPER) for k in range(ncomb)]
Expand All @@ -73,7 +74,7 @@ if __name__ == "__main__":
print("MAKE SURE TO CLEAR THE PATHS IF THAT'S NECESSARY")

args=sys.argv
runtype=args[1]
runtype=args[1].lower()
runscript=op.join(op.join(tpath, "Slurm"), "hPerfTest.sh")
rundetails=rundict[runtype]
eq=rundetails.keys()
Expand All @@ -93,4 +94,4 @@ if __name__ == "__main__":
inputError("a - " + a + ", is not a valid option")

sweep_serial(runscript, runtype, rundetails, eq, sch)
sweep_parallel(runscript, runtype, rundetails, eq, sch)
# sweep_parallel(runscript, runtype, rundetails, eq, sch)
16 changes: 11 additions & 5 deletions src/utilities/gpuDetector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,17 @@

#include <cstring>
#include <iostream>
#include <sstream>
#include <string>

#include <cuda.h>
#include <cuda_runtime.h>
#include <mpi.h>
#include <unistd.h>

// There's a memory leak in cudaGetDeviceByPciBusID so avoid it and assign simply
// unitl it's fixed. This makes most of the code here useless.
#define PCIBUS_BUG 1

int getHost(hvec &ids, hname *newHost)
{
char machineName[RLEN];
Expand Down Expand Up @@ -102,16 +105,19 @@ bool detector(const int ranko, const int sz, const int startpos)
int nset = 0;
int dev;
std::string pcistr;
std::stringstream bufs;
char bufs[20];

for (int i = startpos; i<machineSize; i++)
{
if ((nGo - nset) == 0) break;
if (i == machineRank)
{
bufs << std::hex << pcivec[3*nset] << ":" << pcivec[3*nset+1] << ":" << pcivec[3*nset+2];

cudaDeviceGetByPCIBusId(&dev, bufs.str().c_str());
#ifndef PCIBUS_BUG
sprintf(bufs, "%x:%x:%x", pcivec[3*nset],pcivec[3*nset+1],pcivec[3*nset+2]);
cudaDeviceGetByPCIBusId(&dev, bufs);
#else
dev=i-1;
#endif
cudaGetDeviceProperties(&props, dev);

if (!props.kernelExecTimeoutEnabled)
Expand Down

0 comments on commit 4bbebba

Please sign in to comment.