-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathtrim_ends.py
executable file
·39 lines (34 loc) · 1.22 KB
/
trim_ends.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import argparse
def replace_str_index(text,index=0,replacement=''):
return '%s%s%s'%(text[:index],replacement,text[index+1:])
def find(s, ch):
return [i for i, ltr in enumerate(s) if ltr == ch]
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='')
parser.add_argument('unwrap_fasta', help='')
args = parser.parse_args()
# Grabs name of file.
fasta_name = str(args.unwrap_fasta) + "_swift.fasta"
new_fasta = open(fasta_name, "w")
# Grabs only the fasta we want (not the reference fasta),
# with the bases 201-29740 (per sarscov2_masterfile.txt).
line_num = 0
n_indexes = []
for line in open("repositioned_unwrap.fasta"):
# Finds correctly masked positions in reference
if(line_num == 1):
n_indexes = find(line, "n")
# Writes fasta header with real name
elif(line_num == 2):
fasta_header = ">" + args.unwrap_fasta + "\n"
new_fasta.write(fasta_header)
# Masks positions by ref masked
elif(line_num==3):
line_masked = line
for index in n_indexes:
if(line_masked[index]!="-"):
line_masked = replace_str_index(line_masked,index,"n")
line_trimmed = line_masked[201:29741]
final_line = line_trimmed.replace("-","")
new_fasta.write(final_line)
line_num = line_num + 1