-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvalidate.py
142 lines (134 loc) · 5 KB
/
validate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
"""This module contains capabilities for evaluating models from SPARC simulations. Pass the word 'rmsd' or the word 'tmscore' to calculate that quantity.
The -i parameter specifies either a file containing models or a directory in which all .pdb files will be evaluated.
The -n parameter specifies a native structure file.
The -d parameter specifies the path to the directive used by SPARC to generate the models. This parameter is necessary to examine segments of the native structure file; the files will be identified by the output names in the directive file."""
from main import *
from tmscore import *
rmsd_mode = "rmsd"
tmscore_mode = "tmscore"
sparc_mode = "sparc"
def sparc_validation(input, native, range, output=None, distributions=None):
backup_sec_structs = []
if output:
outfile = open(output, "w")
for i, path in enumerate([native, input]):
peptide = Polypeptide()
readrange = None
if i == 0: readrange = range
for modelno in peptide.iter_models(path, secondary_structure=True, range=readrange):
if i == 0:
print peptide.aminoacids, peptide.secondary_structures
modelno = "model_orig.pdb"
if len(peptide.secondary_structures):
backup_sec_structs = peptide.secondary_structures
else:
peptide.secondary_structures = backup_sec_structs
spscore = ""
for d in distributions:
subscore = d.score(peptide, peptide.aminoacids)
spscore += "{:.4f},".format(subscore)
spscore = spscore[:-1]
if output:
outfile.write("{};{}\n".format(modelno, spscore))
else:
print "{}\t{}".format(modelno, spscore)
if output:
outfile.close()
if __name__ == '__main__':
args = sys.argv[1:]
input = None
native = None
directive = None
output = None
mode = args[0]
i = 1
while i < len(args):
if args[i].lower() == "-n":
assert len(args) > i + 1, "Not enough arguments"
native = args[i + 1]
i += 2
elif args[i].lower() == "-i":
assert len(args) > i + 1, "Not enough arguments"
input = args[i + 1]
i += 2
elif args[i].lower() == "-d":
assert len(args) > i + 1, "Not enough arguments"
directive = args[i + 1]
i += 2
elif args[i].lower() == "-o":
assert len(args) > i + 1, "Not enough arguments"
output = args[i + 1]
i += 2
else:
assert False, "Unexpected command-line argument {}".format(args[i])
input = os.path.realpath(input)
native = os.path.realpath(native)
distributions = None
if mode == sparc_mode:
sparc_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "potential")
weights = {} #{ "consec": 4.0, "secondary": 4.0, "short_range": 1.0, "long_range": 4.0, "medium": 5.0 }
distributions = load_dists(sparc_dir, secondary=True, weights=weights)
if directive:
seq = None
runs = []
with open(directive, "r") as file:
lines = file.readlines()
seq = lines[0].strip()
del lines[0]
processing_runs = False
for line in lines:
if len(line.strip()) == 0:
if not processing_runs:
processing_runs = True
continue
if processing_runs:
if line[0] == "#": line = line[1:]
comps = line.strip().split(";")
runs.append([[y.strip() for y in x.split(",")] for x in comps])
completed_outputs = []
for run in runs:
modelname = None
if len(run[1]) > 1: # run[1] must be the input paths, and run[2] must be the output path name
modelname = run[2][0]
range = [[int(x) for x in run[0][0].split("-")][0], [int(x) for x in run[0][1].split("-")][1]]
else: # run[1] must be the output path name
modelname = run[1][0]
range = [int(x) for x in run[0][0].split("-")]
if output:
if os.path.exists(os.path.join(output, modelname[:-4] + ".txt")): continue
if modelname in completed_outputs: continue
if not os.path.exists(os.path.join(input, modelname)):
print modelname, "does not exist, skipping"
continue
if mode == rmsd_mode:
if output:
print modelname, "-->", modelname[:-4] + ".txt"
min_rmsd(os.path.join(input, modelname), native, range=range, output=os.path.join(output, modelname[:-4] + ".txt"))
else:
print modelname
min_rmsd(os.path.join(input, modelname), native, range=range)
elif mode == tmscore_mode:
print modelname
best_tm_score(os.path.join(input, modelname), native, range=range)
elif mode == sparc_mode:
if output:
print modelname, "-->", modelname[:-4] + ".txt"
sparc_validation(os.path.join(input, modelname), native, range, os.path.join(output, modelname[:-4] + ".txt"), distributions=distributions)
else:
print modelname
sparc_validation(os.path.join(input, modelname), native, range, distributions=distributions)
completed_outputs.append(modelname)
else:
if os.path.isdir(input):
for path in os.listdir(input):
if ".pdb" not in path or path[0] == "." or "native" in path: continue
print path
if mode == rmsd_mode:
min_rmsd(os.path.join(input, path), native)
elif mode == tmscore_mode:
best_tm_score(os.path.join(input, path), native)
else:
if mode == rmsd_mode:
min_rmsd(input, native)
elif mode == tmscore_mode:
best_tm_score(input, native)