9
9
# ## as well as sbatch -c. demux threads remains fixed at 1.
10
10
# ## Note -c set to 4 and thread counts set to 7 during testing.
11
11
# SBATCH -c 2
12
- # SBATCH --gres=node_jobs:2
12
+ # ## Commented out for now, but there is a possibility it will be needed
13
+ # ## in the future.
14
+ # ##SBATCH --gres=node_jobs:2
13
15
14
16
15
17
echo " ---------------"
@@ -53,8 +55,8 @@ export TMPDIR=${TMPDIR}
53
55
export TMPDIR=$( mktemp -d)
54
56
echo $TMPDIR
55
57
56
- mkdir -p ${WKDIR} NuQCJob /fastp_reports_dir/html
57
- mkdir -p ${WKDIR} NuQCJob /fastp_reports_dir/json
58
+ mkdir -p ${WKDIR} /fastp_reports_dir/html
59
+ mkdir -p ${WKDIR} /fastp_reports_dir/json
58
60
59
61
export ADAPTER_ONLY_OUTPUT=${OUTPUT} /only-adapter-filtered
60
62
mkdir -p ${ADAPTER_ONLY_OUTPUT}
@@ -74,9 +76,8 @@ function mux-runner () {
74
76
75
77
jobd=${TMPDIR}
76
78
id_map=${jobd} /id_map
77
- seqs_r1=${jobd} /seqs.r1.fastq.gz
78
- seqs_r2=${jobd} /seqs.r2.fastq
79
- r1_filt=${jobd} /seqs.r1.adapter-removed.fastq.gz
79
+ seqs_reads=${jobd} /seqs.interleaved.fastq
80
+ seq_reads_filter_alignment=${jobd} /seqs.interleaved.filter_alignment.fastq
80
81
81
82
for i in $( seq 1 ${n} )
82
83
do
@@ -86,14 +87,19 @@ function mux-runner () {
86
87
base=$( echo ${line} | cut -f 3 -d" " )
87
88
r1_name=$( basename ${r1} .fastq.gz)
88
89
r2_name=$( basename ${r2} .fastq.gz)
89
- r1_adapter_only =${ADAPTER_ONLY_OUTPUT} /${r1_name} .fastq.gz
90
+ r_adapter_only =${ADAPTER_ONLY_OUTPUT} /${r1_name} .interleave .fastq.gz
90
91
91
92
s_name=$( basename " ${r1} " | sed -r ' s/\.fastq\.gz//' )
92
93
html_name=$( echo " $s_name .html" )
93
94
json_name=$( echo " $s_name .json" )
94
95
95
96
echo -e " ${i} \t${r1_name} \t${r2_name} \t${base} " >> ${id_map}
96
97
98
+ # movi, in the current version, works on the interleaved version of the
99
+ # fwd/rev reads so we are gonna take advantage fastp default output
100
+ # to minimize steps. Additionally, movi expects the input to not be
101
+ # gz, so we are not going to compress seqs_r1
102
+
97
103
fastp \
98
104
-l 100 \
99
105
-i ${r1} \
@@ -102,47 +108,39 @@ function mux-runner () {
102
108
--adapter_fasta fastp_known_adapters_formatted.fna \
103
109
--html REMOVED/qp-knight-lab-processing/qp_klp/tests/data/output_dir/NuQCJob/fastp_reports_dir/html/${html_name} \
104
110
--json REMOVED/qp-knight-lab-processing/qp_klp/tests/data/output_dir/NuQCJob/fastp_reports_dir/json/${json_name} \
105
- --stdout | gzip > ${r1_filt }
111
+ --stdout | gzip > ${r_adapter_only }
106
112
107
113
# multiplex and write adapter filtered data all at once
108
- zcat ${r1_filt } | \
114
+ zcat ${r_adapter_only } | \
109
115
sed -r " 1~4s/^@(.*)/@${i}${delimiter} \1/" \
110
- >> ${seqs_r1}
111
- cat ${r1_filt} | \
112
- gzip -c > ${r1_adapter_only} &
113
- wait
114
-
115
- rm ${r1_filt} &
116
- wait
116
+ >> ${seqs_reads}
117
117
done
118
118
119
119
# minimap/samtools pair commands are now generated in NuQCJob._generate_mmi_filter_cmds()
120
- # and passed to this template. This method assumes ${jobd} is the correct location to
121
- # filter files, the initial file is "${jobd}/seqs.r1.fastq"), and the output name is
122
- # "${jobd}/seqs.r1.ALIGN.fastq".
123
- minimap2 -2 -ax sr -t 1 /databases/minimap2/db_1.mmi ${jobd} /seqs.r1.fastq -a | samtools fastq -@ 1 -f 12 -F 256 > ${jobd} /foo
120
+ # and passed to this template.
121
+ minimap2 -2 -ax sr -t 1 /databases/minimap2/db_1.mmi ${jobd} /seqs.interleaved.fastq -a | samtools fastq -@ 1 -f 12 -F 256 > ${jobd} /foo
124
122
minimap2 -2 -ax sr -t 1 /databases/minimap2/db_2.mmi ${jobd} /foo -a | samtools fastq -@ 1 -f 12 -F 256 > ${jobd} /bar
125
- mv ${jobd} /bar ${jobd} /seqs.r1.ALIGN .fastq
123
+ mv ${jobd} /bar ${jobd} /seqs.interleaved.filter_alignment .fastq
126
124
[ -e ${jobd} /foo ] && rm ${jobd} /foo
127
125
[ -e ${jobd} /bar ] && rm ${jobd} /bar
128
126
129
127
/home/user/user_dir/Movi/build/movi-default query \
130
128
--index /scratch/movi_hg38_chm13_hprc94 \
131
- --read <( zcat ${jobd} /seqs.r1.ALIGN.fastq.gz ) \
129
+ --read ${seq_reads_filter_alignment} \
132
130
--stdout | gzip > ${jobd} /seqs.movi.txt.gz
133
131
134
132
python /home/user/user_dir/human_host_filtration/scripts/qiita_filter_pmls.py <( zcat ${jobd} /seqs.movi.txt.gz) | \
135
- seqtk subseq ${jobd} /seqs.r1.ALIGN.fastq.gz - | gzip > ${jobd} /seqs.r1. final.fastq.gz
133
+ seqtk subseq ${seq_reads_filter_alignment} - > ${jobd} /seqs.final.fastq
136
134
137
- REMOVED/sequence_processing_pipeline/scripts/splitter ${jobd} /seqs.r1. final.fastq \
135
+ REMOVED/sequence_processing_pipeline/scripts/splitter ${jobd} /seqs.final.fastq \
138
136
${jobd} /reads.r1.fastq ${delimiter} ${r1_tag} &
139
- REMOVED/sequence_processing_pipeline/scripts/splitter ${jobd} /seqs.r1. final.fastq \
137
+ REMOVED/sequence_processing_pipeline/scripts/splitter ${jobd} /seqs.final.fastq \
140
138
${jobd} /reads.r2.fastq ${delimiter} ${r2_tag} &
141
139
wait
142
140
fastq_pair -t 50000000 ${jobd} /reads.r1.fastq ${jobd} /reads.r2.fastq
143
141
144
142
# keep seqs.movi.txt and migrate it to NuQCJob directory.
145
- mv ${jobd} /seqs.movi.txt.gz REMOVED/qp-knight-lab-processing/qp_klp/tests/data/output_dir/NuQCJob/seqs.movi.${SLURM_ARRAY_TASK_ID} .txt.gz
143
+ mv ${jobd} /seqs.movi.txt.gz REMOVED/qp-knight-lab-processing/qp_klp/tests/data/output_dir/NuQCJob/logs/ seqs.movi.${SLURM_ARRAY_TASK_ID} .txt.gz
146
144
}
147
145
export -f mux-runner
148
146
0 commit comments