Skip to content

Commit e27e21d

Browse files
Updates from production. tests updated.
1 parent 10ca7f5 commit e27e21d

9 files changed

+32
-26
lines changed

qp_klp/Step.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,7 @@ def _quality_control(self, config, input_file_path):
331331
length_limit=config['length_limit'],
332332
cores_per_task=config['cores_per_task'],
333333
movi_path=config['movi_executable_path'],
334+
gres_value=config['gres_value'],
334335
pmls_path=config['pmls_path'])
335336

336337
nuqc_job.run(callback=self.update_callback)

qp_klp/tests/data/configuration_profiles/iseq_metagenomic.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
"length_limit": 100,
6868
"cores_per_task": 4,
6969
"movi_executable_path": "/home/user/user_dir/Movi/build/movi-default",
70+
"gres_value": 4,
7071
"pmls_path": "/home/user/user_dir/human_host_filtration/scripts/qiita_filter_pmls.py"
7172
},
7273
"seqpro": {

qp_klp/tests/data/configuration_profiles/miseq_amplicon.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
"length_limit": 100,
3636
"cores_per_task": 2,
3737
"movi_executable_path": "/home/user/user_dir/Movi/build/movi-default",
38+
"gres_value": 4,
3839
"pmls_path": "/home/user/user_dir/human_host_filtration/scripts/qiita_filter_pmls.py"
3940
},
4041
"seqpro": {

qp_klp/tests/data/configuration_profiles/miseq_metagenomic.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
"length_limit": 100,
4747
"cores_per_task": 2,
4848
"movi_executable_path": "/home/user/user_dir/Movi/build/movi-default",
49+
"gres_value": 2,
4950
"pmls_path": "/home/user/user_dir/human_host_filtration/scripts/qiita_filter_pmls.py"
5051
},
5152
"seqpro": {

qp_klp/tests/data/configuration_profiles/miseq_metatranscriptomic.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
"length_limit": 100,
4747
"cores_per_task": 2,
4848
"movi_executable_path": "/home/user/user_dir/Movi/build/movi-default",
49+
"gres_value": 4,
4950
"pmls_path": "/home/user/user_dir/human_host_filtration/scripts/qiita_filter_pmls.py"
5051
},
5152
"seqpro": {

qp_klp/tests/data/configuration_profiles/novaseq_amplicon.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
"length_limit": 100,
3636
"cores_per_task": 4,
3737
"movi_executable_path": "/home/user/user_dir/Movi/build/movi-default",
38+
"gres_value": 4,
3839
"pmls_path": "/home/user/user_dir/human_host_filtration/scripts/qiita_filter_pmls.py"
3940
},
4041
"seqpro": {

qp_klp/tests/data/configuration_profiles/novaseq_metagenomic.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
"length_limit": 100,
4747
"cores_per_task": 4,
4848
"movi_executable_path": "/home/user/user_dir/Movi/build/movi-default",
49+
"gres_value": 4,
4950
"pmls_path": "/home/user/user_dir/human_host_filtration/scripts/qiita_filter_pmls.py"
5051
},
5152
"seqpro": {

qp_klp/tests/data/configuration_profiles/novaseq_metatranscriptomic.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
"length_limit": 100,
6868
"cores_per_task": 4,
6969
"movi_executable_path": "/home/user/user_dir/Movi/build/movi-default",
70+
"gres_value": 4,
7071
"pmls_path": "/home/user/user_dir/human_host_filtration/scripts/qiita_filter_pmls.py"
7172
},
7273
"seqpro": {

qp_klp/tests/data/process_all_fastq_files.sh

Lines changed: 24 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,9 @@
99
### as well as sbatch -c. demux threads remains fixed at 1.
1010
### Note -c set to 4 and thread counts set to 7 during testing.
1111
#SBATCH -c 2
12-
#SBATCH --gres=node_jobs:2
12+
### Commented out for now, but there is a possibility it will be needed
13+
### in the future.
14+
###SBATCH --gres=node_jobs:2
1315

1416

1517
echo "---------------"
@@ -53,8 +55,8 @@ export TMPDIR=${TMPDIR}
5355
export TMPDIR=$(mktemp -d)
5456
echo $TMPDIR
5557

56-
mkdir -p ${WKDIR}NuQCJob/fastp_reports_dir/html
57-
mkdir -p ${WKDIR}NuQCJob/fastp_reports_dir/json
58+
mkdir -p ${WKDIR}/fastp_reports_dir/html
59+
mkdir -p ${WKDIR}/fastp_reports_dir/json
5860

5961
export ADAPTER_ONLY_OUTPUT=${OUTPUT}/only-adapter-filtered
6062
mkdir -p ${ADAPTER_ONLY_OUTPUT}
@@ -74,9 +76,8 @@ function mux-runner () {
7476

7577
jobd=${TMPDIR}
7678
id_map=${jobd}/id_map
77-
seqs_r1=${jobd}/seqs.r1.fastq.gz
78-
seqs_r2=${jobd}/seqs.r2.fastq
79-
r1_filt=${jobd}/seqs.r1.adapter-removed.fastq.gz
79+
seqs_reads=${jobd}/seqs.interleaved.fastq
80+
seq_reads_filter_alignment=${jobd}/seqs.interleaved.filter_alignment.fastq
8081

8182
for i in $(seq 1 ${n})
8283
do
@@ -86,14 +87,19 @@ function mux-runner () {
8687
base=$(echo ${line} | cut -f 3 -d" ")
8788
r1_name=$(basename ${r1} .fastq.gz)
8889
r2_name=$(basename ${r2} .fastq.gz)
89-
r1_adapter_only=${ADAPTER_ONLY_OUTPUT}/${r1_name}.fastq.gz
90+
r_adapter_only=${ADAPTER_ONLY_OUTPUT}/${r1_name}.interleave.fastq.gz
9091

9192
s_name=$(basename "${r1}" | sed -r 's/\.fastq\.gz//')
9293
html_name=$(echo "$s_name.html")
9394
json_name=$(echo "$s_name.json")
9495

9596
echo -e "${i}\t${r1_name}\t${r2_name}\t${base}" >> ${id_map}
9697

98+
# movi, in the current version, works on the interleaved version of the
99+
# fwd/rev reads so we are gonna take advantage fastp default output
100+
# to minimize steps. Additionally, movi expects the input to not be
101+
# gz, so we are not going to compress seqs_r1
102+
97103
fastp \
98104
-l 100 \
99105
-i ${r1} \
@@ -102,47 +108,39 @@ function mux-runner () {
102108
--adapter_fasta fastp_known_adapters_formatted.fna \
103109
--html REMOVED/qp-knight-lab-processing/qp_klp/tests/data/output_dir/NuQCJob/fastp_reports_dir/html/${html_name} \
104110
--json REMOVED/qp-knight-lab-processing/qp_klp/tests/data/output_dir/NuQCJob/fastp_reports_dir/json/${json_name} \
105-
--stdout | gzip > ${r1_filt}
111+
--stdout | gzip > ${r_adapter_only}
106112

107113
# multiplex and write adapter filtered data all at once
108-
zcat ${r1_filt} | \
114+
zcat ${r_adapter_only} | \
109115
sed -r "1~4s/^@(.*)/@${i}${delimiter}\1/" \
110-
>> ${seqs_r1}
111-
cat ${r1_filt} | \
112-
gzip -c > ${r1_adapter_only} &
113-
wait
114-
115-
rm ${r1_filt} &
116-
wait
116+
>> ${seqs_reads}
117117
done
118118

119119
# minimap/samtools pair commands are now generated in NuQCJob._generate_mmi_filter_cmds()
120-
# and passed to this template. This method assumes ${jobd} is the correct location to
121-
# filter files, the initial file is "${jobd}/seqs.r1.fastq"), and the output name is
122-
# "${jobd}/seqs.r1.ALIGN.fastq".
123-
minimap2 -2 -ax sr -t 1 /databases/minimap2/db_1.mmi ${jobd}/seqs.r1.fastq -a | samtools fastq -@ 1 -f 12 -F 256 > ${jobd}/foo
120+
# and passed to this template.
121+
minimap2 -2 -ax sr -t 1 /databases/minimap2/db_1.mmi ${jobd}/seqs.interleaved.fastq -a | samtools fastq -@ 1 -f 12 -F 256 > ${jobd}/foo
124122
minimap2 -2 -ax sr -t 1 /databases/minimap2/db_2.mmi ${jobd}/foo -a | samtools fastq -@ 1 -f 12 -F 256 > ${jobd}/bar
125-
mv ${jobd}/bar ${jobd}/seqs.r1.ALIGN.fastq
123+
mv ${jobd}/bar ${jobd}/seqs.interleaved.filter_alignment.fastq
126124
[ -e ${jobd}/foo ] && rm ${jobd}/foo
127125
[ -e ${jobd}/bar ] && rm ${jobd}/bar
128126

129127
/home/user/user_dir/Movi/build/movi-default query \
130128
--index /scratch/movi_hg38_chm13_hprc94 \
131-
--read <(zcat ${jobd}/seqs.r1.ALIGN.fastq.gz) \
129+
--read ${seq_reads_filter_alignment} \
132130
--stdout | gzip > ${jobd}/seqs.movi.txt.gz
133131

134132
python /home/user/user_dir/human_host_filtration/scripts/qiita_filter_pmls.py <(zcat ${jobd}/seqs.movi.txt.gz) | \
135-
seqtk subseq ${jobd}/seqs.r1.ALIGN.fastq.gz - | gzip > ${jobd}/seqs.r1.final.fastq.gz
133+
seqtk subseq ${seq_reads_filter_alignment} - > ${jobd}/seqs.final.fastq
136134

137-
REMOVED/sequence_processing_pipeline/scripts/splitter ${jobd}/seqs.r1.final.fastq \
135+
REMOVED/sequence_processing_pipeline/scripts/splitter ${jobd}/seqs.final.fastq \
138136
${jobd}/reads.r1.fastq ${delimiter} ${r1_tag} &
139-
REMOVED/sequence_processing_pipeline/scripts/splitter ${jobd}/seqs.r1.final.fastq \
137+
REMOVED/sequence_processing_pipeline/scripts/splitter ${jobd}/seqs.final.fastq \
140138
${jobd}/reads.r2.fastq ${delimiter} ${r2_tag} &
141139
wait
142140
fastq_pair -t 50000000 ${jobd}/reads.r1.fastq ${jobd}/reads.r2.fastq
143141

144142
# keep seqs.movi.txt and migrate it to NuQCJob directory.
145-
mv ${jobd}/seqs.movi.txt.gz REMOVED/qp-knight-lab-processing/qp_klp/tests/data/output_dir/NuQCJob/seqs.movi.${SLURM_ARRAY_TASK_ID}.txt.gz
143+
mv ${jobd}/seqs.movi.txt.gz REMOVED/qp-knight-lab-processing/qp_klp/tests/data/output_dir/NuQCJob/logs/seqs.movi.${SLURM_ARRAY_TASK_ID}.txt.gz
146144
}
147145
export -f mux-runner
148146

0 commit comments

Comments
 (0)