-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathFindBestAlignmentRow.py
41 lines (31 loc) · 1.76 KB
/
FindBestAlignmentRow.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# INPUT: sequence: a biological sequnce
# MSAfilename: the filename of a multiple sequence alignment in FASTA format
# sequencetype: a string which can be "protein" or "RNA" or "DNA"
# OUTPUT: sequencenumber: an integer, starting at 1, telling which sequence
# in the file was the best match
# header: the header of the best-matching sequence
# MSAsequence: the best-matching sequence from the alignment
# NOTE: Some FASTA-formatted files have the sequence spread out over
# multiple lines and have spaces within the line. The code below
# accumulates lines until a line starting with >, which is where
# the next sequence starts.
# AUTHOR:
from AlignmentPrograms import LocalAlignment
def FindBestAlignmentRow(sequence,MSAfilename,sequencetype):
# open MSAfilename for reading
aligned = "" # accumulating sequence here
c = 0 # sequence counter
bestscore = 0;
bestmatches = 0;
# loop through lines in the file
for line in f:
if line[0] == '>': # header line
c = c + 1 # increment counter
if len(aligned) > 0: # enough sequence has been accumulated
matches, score, a1, a2, p1, p2 = LocalAlignment(sequence,aligned,sequencetype)
# keep track of which alignment has the most matches, here
# or keep track of which alignment has the best score
# save all variables for the best one
else:
aligned = aligned + line # append new sequence line
return bestnumber, bestheader, bestsequence, bestmatches, bestscore, besta1, besta2, bestp1, bestp2