-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdemo1.py
64 lines (53 loc) · 2.44 KB
/
demo1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/usr/bin/env python3
import os
import argparse
from pipeline import Pipeline
parser = argparse.ArgumentParser(description = "ngs demo pipeline")
parser.add_argument('-t', type = int, help = "test mode OR not,1 for true, 0 for false", default = 1, dest = 'test')
args = parser.parse_args()
os.system("mkdir -p demo/output")
demo_record = "demo/output/demo.csv"
# create pipeline
pipeline = Pipeline(demo_record, test = args.test)
fq1 = "demo/input/SRR1770413_1.head.fastq.gz"
fq2 = "demo/input/SRR1770413_2.head.fastq.gz"
ID = os.path.basename(fq1).split("_")[0]
bwa_mem_template = "bwa mem -t 2 -R '@RG\\tID:test\\tPL:illumina\\tSM:E.coli_K12' demo/genome/E.coli_K12_MG1655.fa {} {} | samtools view -Sb - > demo/output/{}.bwa_mem.bam"
bwa_mem_cmd = bwa_mem_template.format(fq1, fq2, ID)
pipeline.append(ID, "bwa_mem", bwa_mem_cmd, log = "demo/output/bwa_mem.log")
reorder_template = "samtools sort -@ 2 -m 1G -O bam \
-o demo/output/{}.reorder.bwa_mem.bam \
demo/output/{}.bwa_mem.bam"
reorder_cmd = reorder_template.format(ID, ID)
pipeline.append(ID, "reorder", reorder_cmd, log = "demo/output/reorder.log")
mark_dup_template = "java -Xmx4g -Djava.io.tmpdir=/tmp \
-jar demo/tools/picard-tools-1.119/MarkDuplicates.jar \
I=demo/output/{}.reorder.bwa_mem.bam \
O=demo/output/{}.nodup.reorder.bwa_mem.bam \
METRICS_FILE=demo/output/duplicate_report.txt \
CREATE_INDEX=true \
ASSUME_SORTED=true \
REMOVE_DUPLICATES=true \
VALIDATION_STRINGENCY=LENIENT"
mark_dup_cmd = mark_dup_template.format(ID, ID)
pipeline.append(ID, "mark_dup", mark_dup_cmd, log = "demo/output/mark_dup.log")
print_reads_template = "java -Xmx4g -Djava.io.tmpdir=/tmp \
-jar demo/tools/GATK/GenomeAnalysisTK-3.8-0.jar \
-T PrintReads \
-R demo/genome/E.coli_K12_MG1655.fa \
-I demo/output/{}.nodup.reorder.bwa_mem.bam \
-o demo/output/{}.recal.bam \
-l INFO \
--read_filter MappingQualityZero"
print_reads_cmd = print_reads_template.format(ID, ID, ID)
pipeline.append(ID, "print_reads", print_reads_cmd, log = "demo/output/print_reads.log")
gatk_vcf_template = "java -Xmx4g -Djava.io.tmpdir=/tmp \
-jar demo/tools/GATK/GenomeAnalysisTK-3.8-0.jar \
-T HaplotypeCaller \
-R demo/genome/E.coli_K12_MG1655.fa \
-nct 2 \
-I demo/output/{}.recal.bam \
-o demo/output/{}.gatk.vcf"
gatk_vcf_cmd = gatk_vcf_template.format(ID, ID)
pipeline.append(ID, "gatk_vcf", gatk_vcf_cmd, log = "demo/output/gatk_vcf.log")
pipeline.run_pipeline()