-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathReadFASTAAlignment.py
27 lines (20 loc) · 1.21 KB
/
ReadFASTAAlignment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# INPUT: filename of text file of alignment in FASTA format
# OUTPUT: header: a list of header lines
# sequence: a list of sequence lines, with newline characters and spaces removed
# AUTHOR:
# NOTE: the first version of this program does not account for the fact
# that some FASTA files have sequence lines spread out over multiple lines
import re # import regular expression tools
def ReadFASTAAlignment(filename):
header = [] # empty list for header lines
sequence = [] # empty list for sequence lines
error = [] # empty list for error messages
f = open(filename,'r') # open text file for reading
for line in f: # loop through lines of the file
line = line.replace("\n","") # remove newline character
if line[0] == '>': # header lines must begin with >
header.append(line) # add to the header list
else:
sequence.append(line) # add to the sequence list
f.close()
return header, sequence, error