-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSingularity.pipeline.script.txt
128 lines (83 loc) · 6.74 KB
/
Singularity.pipeline.script.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#Copied all fq.gz files into rawfastq folder with 3 negative controls: S1, S13 and SNTC
cat *_R1*.gz > all_r1.fq #combine all files into 1 file
cat *_R2*.gz > all_r2.fq #combine all files into 1 file
mv all_r1.fq all_r1.fq.gz #they are zip file, so change the file name to .gz
gunzip all_r1.fq.gz #unzip file
mv all_r2.fq all_r2.fq.gz
gunzip all_r2.fq.gz
mkdir hf_qc
-------------------------------------
#Generate Manifest
-------------------------------------
singularity exec --cleanenv /home/Image/mrcmicrobiome.sif bash -c 'perl /home/Image/mima_prepare_manifest_and_qc.pl /srv/scratch/mrcbio/howard/EllisFok/rawfastq/ /srv/scratch/mrcbio/howard/EllisFok/Analysis_2023/hf_d.manifest all_r1.fq all_r2.fq /srv/scratch/mrcbio/howard/EllisFok/Analysis_2023/hf_qc 1 /home/z3520673/Image/mrcmicrobiome.sif'
singularity exec /home/Image/mrcmicrobiome.sif bash -c 'fastp -i all_r1.fq -I all_r2.fq -o /srv/scratch/mrcbio/howard/EllisFok/Analysis_2023/hf_qc/ALL_R1.fq -O /srv/scratch/mrcbio/howard/EllisFok/Analysis_2023/hf_qc/ALL_R2.fq -h /srv/scratch/mrcbio/howard/EllisFok/Analysis_2023/hf_qc/fastp.outreport.html'
singularity exec /home/z3520673/Image/mrcmicrobiome.sif bash -c '. activate qiime2-2020.8 && qiime tools import --type 'SampleData[PairedEndSequencesWithQuality]' --input-path hf_d.manifest --output-path demux.qza --input-format PairedEndFastqManifestPhred33'
----------------------------------------------------------
#Running dada2 within qiime2 to perform quality control
----------------------------------------------------------
singularity exec /home/Image/mrcmicrobiome.sif bash -c '. activate qiime2-2020.8 && \
qiime dada2 denoise-paired \
--i-demultiplexed-seqs demux.qza \
--p-trunc-len-f 0 \
--p-trunc-len-r 0 \
--p-trim-left-f 4 \
--p-trim-left-r 4 \
--p-n-threads 1 \
--o-representative-sequences rep-seqs.qza \
--o-table table.qza \
--o-denoising-stats stats-dada2.qza'
singularity exec /home/Image/mrcmicrobiome.sif bash -c '. activate qiime2-2020.8 && \
qiime tools export \
--input-path stats-dada2.qza \
--output-path stats-dada2'
singularity exec /home/Image/mrcmicrobiome.sif bash -c '. activate qiime2-2020.8 && \
qiime tools export \
--input-path rep-seqs.qza \
--output-path rep-seqs'
singularity exec /home/Image/mrcmicrobiome.sif bash -c '. activate qiime2-2020.8 && \
qiime tools export \
--input-path table.qza \
--output-path table'
singularity exec /home/Image/mrcmicrobiome.sif bash -c '. activate qiime2-2020.8 && \
biom convert -i table/feature-table.biom -o table/feature-table.txt --to-tsv'
----------------------------------------------------------
#Host contamination removal
----------------------------------------------------------
singularity exec /home/Image/mrcmicrobiome.sif bash -c '. activate qiime2-2020.8 && perl /home/Image/mima_decontaminate_host.pl rep-seqs.qza table.qza hf_host_decontamination /srv/scratch/mrcbio/humangenome/humangenome 1'
singularity exec /home/Image/mrcmicrobiome.sif bash -c '. activate qiime2-2020.8 && qiime tools export --input-path rep-seqs.qza --output-path hf_host_decontamination/rep-seqsold'
singularity exec /home/Image/mrcmicrobiome.sif bash -c '. activate qiime2-2020.8 && bowtie2 -x /srv/scratch/mrcbio/humangenome/humangenome -U hf_host_decontamination/rep-seqsold/dna-sequences.fasta -f --sensitive-local -p 1 --un unaligned_bowtie2.fasta --al hf_host_decontamination/rep-seqsold/matched_dnaseq.fasta > hf_host_decontamination/rep-seqsold/align-hostgenome.sam.txt'
grep ">" hf_host_decontamination/rep-seqsold/matched_dnaseq.fasta | perl -ne 'chomp; s/^>//; print "$_\t1\n";' > hf_host_decontamination/metadata.table.txt ; echo -e "feature-id\tSIDs" | cat - hf_host_decontamination/metadata.table.txt > temp.out | mv temp.out hf_host_decontamination/metadata.table.txt
singularity exec /home/Image/mrcmicrobiome.sif bash -c '. activate qiime2-2020.8 && qiime feature-table filter-features --i-table table.qza --m-metadata-file hf_host_decontamination/metadata.table.txt --p-where "SIDS==1" --p-exclude-ids --o-filtered-table hf_host_decontamination/table.qza'
perl -e 'open(I, "hf_host_decontamination/metadata.table.txt"); <I>;while(<I>){ chomp; @m = split /\t/; $hash{$m[0]} =1; } open(II, "hf_host_decontamination/rep-seqsold/dna-sequences.fasta"); while($id = <II>){ $seq = <II>; chomp($id); $id =~ s/^>//; if(exists $hash{$id}){}else{print ">$id\n$seq";} } ' > hf_host_decontamination/filtered_dnaseqs.fasta
singularity exec /home/Image/mrcmicrobiome.sif bash -c '. activate qiime2-2020.8 && qiime tools import --type 'FeatureData[Sequence]' --input-path hf_host_decontamination/filtered_dnaseqs.fasta --output-path hf_host_decontamination/rep-seqs.qza'
singularity exec /home/Image/mrcmicrobiome.sif bash -c '. activate qiime2-2020.8 && qiime tools export --input-path hf_host_decontamination/table.qza --output-path hf_host_decontamination/otable'
singularity exec /home/Image/mrcmicrobiome.sif bash -c '. activate qiime2-2020.8 && biom convert -i hf_host_decontamination/otable/feature-table.biom -o hf_host_decontamination/otable/feature.txt --to-tsv'
singularity exec /home/Image/mrcmicrobiome.sif bash -c '. activate qiime2-2020.8 && qiime feature-table summarize --i-table hf_host_decontamination/table.qza --o-visualization hf_host_decontamination/tablesummarize.qzv'
singularity exec /home/Image/mrcmicrobiome.sif bash -c '. activate qiime2-2020.8 && qiime tools export --input-path hf_host_decontamination/tablesummarize.qzv --output-path hf_host_decontamination/otablesummarize'
----------------------------------------------------------
#Taxonomy annotation and diversity analysis (cut-off at 3000 features; including all negative controls)
----------------------------------------------------------
mkdir hf_diver
cp hf_host_decontamination/rep-seqs.qza hf_diver/
cp hf_host_decontamination/table.qza hf_diver/
singularity exec /home/Image/mrcmicrobiome.sif bash -c '. activate qiime2-2020.8 && \
perl /home/applications/mima/mima_diversity-basic-statistic.pl \
-m metadata3.txt \
-c /data/bio/workshop/db/gg-v3v4-classifier.qza \
-n 1 \
-d 2900 \
-o hf_diver'
singularity exec /home/Image/mrcmicrobiome.sif bash -c '. activate qiime2-2020.8 && sh beta-diversity-commands.sh '
#Installation of 16S copy number normalisation plugin into QIIME2
https://forum.qiime2.org/t/q2-gcn-norm-plugin-for-normalizing-sequences-by-16s-rrna-gene-copy-number/12715
# Add the jiungwenchen channel
conda config --add channels jiungwenchen
# Update conda to the latest version
conda update -n base -c defaults conda
conda install -c jiungwenchen q2-gcn-norm
#Perform 16S rRNA copy number correction
qiime gcn-norm copy-num-normalize \
--i-table table.qza \
--i-taxonomy taxonomy.qza \
--o-gcn-norm-table table-normalized.qza
# use table-normalized.qza for part 2 analysis