Skip to content

Commit ccde191

Browse files
committed
initial commit
1 parent a7ade47 commit ccde191

8 files changed

+207
-2
lines changed

README.md

+13-2
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,13 @@
1-
# hpc_utils
2-
Simple Scripts used to interact with the HPC
1+
# HPC Utils
2+
3+
Provides a number of convience functions to work with HPC.
4+
5+
### Setup
6+
7+
1. Copy content of `host/bin` to a user executable location in your remote host $PATH. typically `$HOME/bin`
8+
2. Create a Directory `${HOME}/Notebooks` to hold your Jupyter Notebooks
9+
3. Setup a Conda environment in you `$HOME` directory
10+
4. Setup Jupyter-Lab with single user config (good to also setup password authentication)
11+
5. Exchange SSH keys with client machine (laptop)
12+
6. Copy content of `local/bin` to a user executable location in your local host (Mac Laptop) $PATH. typically `$HOME/bin`
13+

host/bin/interactive.sh

+31
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#!/bin/bash
2+
3+
## The Queue Name / Type
4+
## The interactive Queue is the one we are interested in
5+
HAS_INTERACTIVE='interactive'
6+
7+
## This function looks at the running batch jobs and returns the
8+
## JOBID of the first job using the interactive QUEUE
9+
function get_jobid(){
10+
local bjobs_output=$(bjobs -o "jobid queue")
11+
while IFS= read -r line ; do
12+
if [[ "${line}" == *"$HAS_INTERACTIVE"* ]]; then
13+
jbod_id=`echo $line |cut -f1 -d' '`
14+
echo $jbod_id
15+
break
16+
fi
17+
done <<< "$bjobs_output"
18+
}
19+
20+
## capture jobids and queues in a variable
21+
bjobs_output=$(bjobs -o "jobid queue")
22+
23+
## If we are already runing an interactive job attach to it
24+
## If we are not running an interactive job start one
25+
if [[ "${bjobs_output}" == *"$HAS_INTERACTIVE"* ]]; then
26+
jobid=$(get_jobid)
27+
echo "Interactive session $jobid is already running!!"
28+
battach $jobid
29+
else
30+
bsub -Is -q interactive -n 2 -W 4:00 -R rusage[mem=16GB] /bin/bash
31+
fi

host/bin/interactive_gpu.sh

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#!/bin/bash
2+
function get_jobid(){
3+
local bjobs_output=$(bjobs -o "jobid queue")
4+
while IFS= read -r line ; do
5+
if [[ "${line}" == *"$HAS_INTERACTIVE"* ]]; then
6+
jbod_id=`echo $line |cut -f1 -d' '`
7+
echo $jbod_id
8+
break
9+
fi
10+
done <<< "$bjobs_output"
11+
}
12+
13+
14+
bjobs_output=$(bjobs -o "jobid queue")
15+
16+
HAS_INTERACTIVE='gpu'
17+
if [[ "${bjobs_output}" == *"$HAS_INTERACTIVE"* ]]; then
18+
jobid=$(get_jobid)
19+
echo "GPU session $jobid is already running!!"
20+
battach $jobid
21+
else
22+
bsub -Is -q gpu -gpu "num=1:mode=exclusive_process:mps=yes:j_exclusive=yes:gvendor=nvidia" /bin/bash
23+
fi

host/bin/launch_jupyter_lab.sh

+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#!/bin/bash
2+
3+
function get_jobid(){
4+
local bjobs_output=$(bjobs -o "jobid job_name")
5+
while IFS= read -r line ; do
6+
if [[ "${line}" == *"$HAS_JOB_NAME"* ]]; then
7+
jbod_id=`echo $line |cut -f1 -d' '`
8+
echo $jbod_id
9+
break
10+
fi
11+
done <<< "$bjobs_output"
12+
}
13+
14+
bjobs_output=$(bjobs -o "jobid job_name")
15+
16+
HAS_JOB_NAME='start_jupyter_lab.sh'
17+
if [[ "${bjobs_output}" == *"$HAS_JOB_NAME"* ]]; then
18+
jobid=$(get_jobid)
19+
echo "GPU session $jobid is already running!!"
20+
#battach $jobid
21+
else
22+
bsub -Is -q gpu -W 6:00 -n 4 -R "rusage[mem=16384]" -gpu "num=1:mode=exclusive_process:mps=yes:j_exclusive=yes:gvendor=nvidia" start_jupyter_lab.sh
23+
fi
24+
25+
26+
27+
28+

host/bin/path_ls.sh

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
#!/bin/bash
2+
echo $PATH|sed -e 's/:/\n/g'

host/bin/start_jupyter_lab.sh

+56
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
#!/bin/bash
2+
VAR_DIR=${HOME}/var/jupyter
3+
mkdir -p ${VAR_DIR}
4+
JUPYTER_IP_FILE=`echo ${VAR_DIR}/jupyter_host_ip.txt`
5+
6+
function start_on_port(){
7+
# netstat -tuln --> list all the ports in use
8+
# tr -s ' ' --> squeezes multiple space into one space (make a consistent delimiter)
9+
# cut -d' ' -f4 --> gets the fourth field of the output (which is address and port)
10+
# rev | cut -d':' -f1 | rev --> reverse the input get the port as the first field and reverse back to original (required because ipv6 notation is :::)
11+
# grep -v '^$' --> drop blank lines
12+
# sort -n | uniq --> only include unique port numbers
13+
14+
port_list=$(netstat -tuln | \
15+
tr -s ' ' | \
16+
cut -d' ' -f4 | \
17+
rev | cut -d':' -f1 | rev | \
18+
grep -v '^$' | \
19+
sort -n | uniq)
20+
21+
# Start port number to search from
22+
start_port=8001
23+
24+
# Loop to find the first open port above 8000
25+
while true; do
26+
# Check if the port is open using netstat and grep
27+
if ! echo $port_list | grep -q "$start_port\b"; then
28+
echo $start_port
29+
break
30+
fi
31+
32+
# If the port is not open, check the next port
33+
((start_port++))
34+
done
35+
}
36+
37+
mkdir -p ${HOME}/Notebooks
38+
cd ${HOME}/Notebooks
39+
40+
# Delete any old jupyter ip files if they exist
41+
# we check this file after startup for the ssh tunnel
42+
if [ -f "$JUPYTER_IP_FILE" ] ; then
43+
rm "$JUPYTER_IP_FILE"
44+
fi
45+
46+
# Make sure we are starting jupyter lab in the base conda environment
47+
active_env=`conda info|egrep "active environment"|cut -d: -f2|tr -d '[:space:]'`
48+
if [ "$active_env" == "base" ]; then
49+
host_ip=`ifconfig 2>/dev/null|grep inet|grep 255.255.252.0|sed -e's/^[ ]*//'|cut -d' ' -f2`
50+
host_port=$(start_on_port)
51+
echo jupyter-lab --ip $host_ip --port=$host_port
52+
echo "$host_ip:$host_port" >${JUPYTER_IP_FILE}
53+
nohup jupyter-lab --no-browser --ip $host_ip --port=$host_port 1>${VAR_DIR}/jupyter-lab.log 2>${VAR_DIR}/jupyter-lab.err &
54+
else
55+
echo "You Must start in the base conda environment!"
56+
fi

host/bin/stop_jupyter_lab.sh

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#!/bin/bash
2+
function get_jobid(){
3+
local bjobs_output=$(bjobs -o "jobid job_name")
4+
while IFS= read -r line ; do
5+
if [[ "${line}" == *"$HAS_JOB_NAME"* ]]; then
6+
jbod_id=`echo $line |cut -f1 -d' '`
7+
echo $jbod_id
8+
break
9+
fi
10+
done <<< "$bjobs_output"
11+
}
12+
13+
14+
bjobs_output=$(bjobs -o "jobid job_name")
15+
16+
HAS_JOB_NAME='start_jupyter_lab.sh'
17+
if [[ "${bjobs_output}" == *"$HAS_JOB_NAME"* ]]; then
18+
jobid=$(get_jobid)
19+
echo "Killing ${HAS_JOB_NAME} job $jobid"
20+
bkill $jobid
21+
fi

local/bin/launch_jupyter_remote.sh

+33
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
#!/bin/bash
2+
3+
# Requires that SSH keys have been exchanged for no password authentication
4+
remote_host_user="[email protected]"
5+
6+
username=$(echo $remote_host_user | cut -d "@" -f 1)
7+
8+
function start_on_port(){
9+
local port=$1
10+
11+
# Loop to find the first open port above 8000
12+
while true; do
13+
local result=$(lsof -i :"$port" -sTCP:LISTEN -t)
14+
if ! [[ -n "$result" ]]; then
15+
# The port is open
16+
echo $port
17+
break
18+
fi
19+
20+
# If the port is not open, check the next port
21+
((port++))
22+
done
23+
}
24+
25+
26+
ssh ${remote_host_user} 'launch_jupyter_lab.sh'
27+
if [ $? -eq 0 ]
28+
then
29+
JUPYTER_IP_PORT=`ssh ${remote_host_user} "cat /home/${username}/var/jupyter/jupyter_host_ip.txt"`
30+
start_port=$(start_on_port 8088)
31+
echo "Jupyter Lab is running at http://127.0.0.1:${start_port}"
32+
ssh -L ${start_port}:${JUPYTER_IP_PORT} ${remote_host_user}
33+
fi

0 commit comments

Comments
 (0)