-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmap_nuc_to_aa.py
executable file
·68 lines (50 loc) · 2.01 KB
/
map_nuc_to_aa.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/usr/bin/env python
import argparse
parser = argparse.ArgumentParser(description='Maps the nucleotide sequence to aligned amino acid sequence')
parser.add_argument('-n', metavar='', required=True, help='Input single line Nucleotide FASTA file')
parser.add_argument('-a', metavar='', required=True, help='Input single line Aligned amino acid FASTA file')
parser.add_argument('-o', metavar='', required=True, help='Output single line Aligned nucleotide FASTA file')
parser.add_argument('-mb', action = 'store_true',
help='Use this flag if the nucleotide sequences are composed of only codon middle bases')
args = parser.parse_args()
def build_dict(fasta, skip):
fasta_dict = {}
name = ""
for _ in range(len(fasta)):
temp = []
if fasta[_].startswith('>'):
name = fasta[_]
else:
for i in range(0, len(fasta[_]), skip):
temp.append(fasta[_][i:i + skip])
fasta_dict[name] = temp
return fasta_dict
if __name__ == '__main__':
with open(args.n, 'r') as fopen:
nuc_fasta = [line.rstrip() for line in fopen]
with open(args.a, 'r') as fopen:
aa_align_fasta = [line.rstrip() for line in fopen]
if not args.mb:
nuc_dict = build_dict(nuc_fasta, 3)
else:
nuc_dict = build_dict(nuc_fasta, 1)
aa_align_dict = build_dict(aa_align_fasta, 1)
nuc_align_fasta = []
for seq in aa_align_dict:
nuc_align_fasta.append(seq)
count_dashes = 0
temp = []
for i in range(len(aa_align_dict[seq])):
if aa_align_dict[seq][i] != '-':
temp.append(nuc_dict[seq][i - count_dashes])
else:
count_dashes += 1
if not args.mb:
temp.append('---')
else:
temp.append('-')
nuc_align_fasta.append("".join(temp))
fout = open(args.o, 'w')
for _ in range(len(nuc_align_fasta)):
fout.write('%s\n' % nuc_align_fasta[_])
fout.close()