-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsingleCuffdiff.sh
executable file
·133 lines (105 loc) · 5.12 KB
/
singleCuffdiff.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#!/bin/bash
#--------------------------------------------------------------------------------------------------------------
# singleCuffdiff - find significant changes in transcript expression, splicing, and promoter use
#
# - Run Cuffdiff by using the merged transcriptome assembly along with the BAM files from TopHat for each replicate
#
# - The default is to use EdgeR parameters (?)
#
# - Can send you emails when each job starts to execute and when each is finished so
# that you know when to submit the jobs for the next step
#
#--------------------------------------------------------------------------------------------------------------
#----SETTINGS---------------
source "${TAILOR_CONFIG}"
#---------------------------
#----COMMAND LINE ARGUMENTS-----------------------------------------------------------
readarray -t experiment1Group < $1 # read in all replicates, -t strips newlines
experiment1Name=$2
readarray -t experiment2Group < $3 # read in all replicates, -t strips newlines
experiment2Name=$4
#-------------------------------------------------------------------------------------
#----JOB SUBMISSION PARAMETERS---------------------------------------------------------------------
PROCESSORS=7
MEMORY="8036" # PER PROCESSOR!! - 2048=2G, 4096=4G, 8192=8G, 16384=16G, 32768=32G, 65536=64G
DURATION="60:00" # HH:MM - 72:00=3 days, 96:00=4 days, 192:00=8 days, 384:00=16 days, 768:00=32 days
QUEUE="long" # short = max 4 hours; long = max 30 days
#--------------------------------------------------------------------------------------------------
#----PATHS-----------------------------------------------------------------------------------------
INPUT_GTF="${CUFFDIFF_INPUT_GTF}"
INPUT_ALIGNMENTS="${CUFFDIFF_INPUT_ALIGNMENTS}"
OUTPUT="${CUFFDIFF_OUTPUT}"
SCRIPTS=${OUTPUT}/${JOBS_SCRIPTS_DIR}
JOBS=${OUTPUT}/${JOBS_OUT_DIR}
#--------------------------------------------------------------------------------------------------
#----OUTPUT------------------
if [ ! -d ${OUTPUT} ]; then
mkdir ${OUTPUT}
fi
if [ ! -d ${SCRIPTS} ]; then
mkdir ${SCRIPTS}
fi
if [ ! -d ${JOBS} ]; then
mkdir ${JOBS}
fi
#----------------------------
COMMAND=cuffdiff
FOLD_CHANGE=$(eval echo $CUFFDIFF_FOLDCHANGE_OUTPUT)
#-------------------------------------------------------------------
# Create a list of all the BAM files for the 2 experiments
#-------------------------------------------------------------------
echo -e "\nGroup 1 size is ${#experiment1Group[@]}"
echo -e "\nGroup 1 is (${experiment1Group[@]})"
echo -e "\nGroup 2 size is ${#experiment2Group[@]}"
echo -e "\nGroup 2 is (${experiment2Group[@]})"
experiment1Files=""
experiment2Files=""
# ${!array[*]} gives indicies
# ${#array[@]} gives the length
for i in ${!experiment1Group[*]}
do
# append a comma if necessary
if [ $i -gt 0 ]
then
experiment1Files+=","
fi
experiment1Files+="${INPUT_ALIGNMENTS}/${experiment1Group[$i]}_out/${CUFFDIFF_ALIGNMENT_FILE}"
done
for i in ${!experiment2Group[*]}
do
# append a comma if necessary
if [ $i -gt 0 ]
then
experiment2Files+=","
fi
experiment2Files+="${INPUT_ALIGNMENTS}/${experiment2Group[$i]}_out/${CUFFDIFF_ALIGNMENT_FILE}"
done
#--------------------------------------------------------------------
EXTRA_PARAMETERS=$(eval echo "$EXTRA_CUFFDIFF_PARAMETERS")
echo -e "\n${EXTRA_PARAMETERS}"
COMMAND_LINE="${COMMAND} -p ${PROCESSORS} ${EXTRA_PARAMETERS} -o ${OUTPUT}/${FOLD_CHANGE} ${INPUT_GTF} $experiment1Files $experiment2Files"
scriptString="mktemp -p ${SCRIPTS} ${COMMAND}.${FOLD_CHANGE}.XXXXXXXXXXX"
echo -e "\n${scriptString}"
tempScript=`${scriptString}`
echo -e "\n${tempScript}"
chmod=`chmod 777 ${tempScript}`
chmodString="chmod 777 ${tempScript}"
echo -e `${chmodString}`
echo -e "source loadModules.sh\n\n" > ${tempScript}
echo "$COMMAND_LINE" >> ${tempScript}
if [ $SCHEDULER == "sge" ]; then
SUBMIT_COMMAND="qsub -q $QUEUE -cwd -S /bin/bash -N ${FOLD_CHANGE} -pe smp ${PROCESSORS} -l h_rt=${DURATION},s_rt=${DURATION},vf=${MEMORY} -m eas -M ${USER_EMAIL} ${tempScript}"
else
# Old submit_command requesting only Intel processors
# SUBMIT_COMMAND="bsub -q $QUEUE -J ${FOLD_CHANGE}.${COMMAND} -n ${PROCESSORS} -R model==Intel_EM64T -R span[hosts=1] -R rusage[mem=${MEMORY}] -W ${DURATION} -u ${LSB_MAILTO} -B -o ${JOBS}/${COMMAND}.${FOLD_CHANGE}.%J.out -e ${JOBS}/${COMMAND}.${FOLD_CHANGE}.%J.error bash ${tempScript}"
# We may want to consider -R span[hosts=2] (MPI)
SUBMIT_COMMAND="bsub -q $QUEUE -J ${FOLD_CHANGE}.${COMMAND} -n ${PROCESSORS} -R span[hosts=1] -R rusage[mem=${MEMORY}] -W ${DURATION} -u ${LSB_MAILTO} -B -o ${JOBS}/${COMMAND}.${FOLD_CHANGE}.%J.out -e ${JOBS}/${COMMAND}.${FOLD_CHANGE}.%J.error bash ${tempScript}"
fi
date=`date`
echo -e "\n# $date\n" >> ${OUTPUT}/${COMMAND}.jobs.log
echo -e "\n# Job Script\n" >> ${OUTPUT}/${COMMAND}.jobs.log
cat ${tempScript} >> ${OUTPUT}/${COMMAND}.jobs.log
echo -e "\n# Job Submission\n${SUBMIT_COMMAND}\n" >> ${OUTPUT}/${COMMAND}.jobs.log
echo -e "\n#-------------------------------------------------------------------------------------------------------" >> ${OUTPUT}/${COMMAND}.jobs.log
echo `${SUBMIT_COMMAND}`
# rm ${tempScript}