-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsbatch_command_withRG_align_fastq.py
executable file
·39 lines (32 loc) · 1.28 KB
/
sbatch_command_withRG_align_fastq.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#!/usr/local/Anaconda/envs/py3.4.3/bin/python
"""
Creates and runs with sbatch
bwa-mem hg19 command to align fastq files
with proper RG info
"""
import argparse
import subprocess
parser = argparse.ArgumentParser()
parser.add_argument('file', help='Input bam file to generate read group information')
args = parser.parse_args()
bamfile = args.file
# Runs samtools view -h. Needs samtools(1.2) already loaded in bash environment)
samtools_input = 'samtools view -h ' + bamfile + '| head -n 100 | grep ^@RG'
samtools_view = (subprocess.check_output(samtools_input, shell=True)).decode('utf-8')
info = samtools_view.split('\t')
# Builds the new RG from file name and NISC provided info from their bam
ID = 'ID:' + info[4].split(':')[1]
SM = 'SM:' + bamfile.split('.')[0]
LB = 'LB:' + info[4].split(':')[1].split('.')[2]
PL = 'PL:Illumina\\" \\'
Output = SM + '.bwa-mem.hg19.bam'
# Joins all together
RG_core = '\\\\t'.join(['\\"\@RG',ID, SM, LB, PL])
core = bamfile.split('.')[0]
# runs alignment!
run_bwa = ('sbatch --mem=50G --cpus-per-task=10 run_bwa-mem_hg19.sh ' +
' ' + core + '_1.fastq' + ' ' + core + '_2.fastq ' +
'\\"\\@RG\\\\t' + ID + '\\\\t' + SM + '\\\\t' + LB + '\\\\t' + 'PL:Illumina\\" ' +
core + '.bwa-mem.hg19.bam')
print(run_bwa, sep='')
subprocess.call(run_bwa, shell=True)