Skip to content

Commit 89ae6bd

Browse files
committed
BSAS clustering improved. Tests
1 parent 9c6b857 commit 89ae6bd

16 files changed

+20560
-27
lines changed

bin/post/lgd_cluster_bsas.py

Lines changed: 67 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -2,30 +2,55 @@
22

33
"""Cluster LightDock final swarm results using BSAS algorithm"""
44

5-
import Bio.PDB
65
import sys
6+
import argparse
7+
import Bio.PDB
78
from lightdock.util.analysis import read_lightdock_output
9+
from lightdock.util.logger import LoggingManager
10+
from lightdock.constants import CLUSTER_REPRESENTATIVES_FILE
11+
12+
13+
log = LoggingManager.get_logger('lgd_cluster_bsas')
14+
15+
16+
def parse_command_line():
17+
"""Parses command line arguments"""
18+
parser = argparse.ArgumentParser(prog='lgd_cluster_bsas')
19+
20+
parser.add_argument("gso_output_file", help="LightDock output file", metavar="gso_output_file")
21+
22+
return parser.parse_args()
823

924

1025
def get_ca_atoms(ids_list):
26+
"""Get all Carbon-alpha atoms of the PDB files specified by the ids_list.
27+
28+
PDB files follow the format lightdock_ID.pdb where ID is in ids_list
29+
"""
1130
ca_atoms = {}
12-
pdb_parser = Bio.PDB.PDBParser(QUIET=True)
13-
for struct_id in ids_list:
14-
pdb_file = "lightdock_%d.pdb" % struct_id
15-
print "Reading CA from %s" % pdb_file
16-
structure = pdb_parser.get_structure(pdb_file, pdb_file)
17-
model = structure[0]
18-
for chain in model:
19-
for residue in chain:
20-
try:
21-
ca_atoms[struct_id].append(residue['CA'])
22-
except:
23-
ca_atoms[struct_id] = [residue['CA']]
31+
try:
32+
pdb_parser = Bio.PDB.PDBParser(QUIET=True)
33+
for struct_id in ids_list:
34+
pdb_file = "lightdock_%d.pdb" % struct_id
35+
log.info("Reading CA from %s" % pdb_file)
36+
structure = pdb_parser.get_structure(pdb_file, pdb_file)
37+
model = structure[0]
38+
for chain in model:
39+
for residue in chain:
40+
try:
41+
ca_atoms[struct_id].append(residue['CA'])
42+
except:
43+
ca_atoms[struct_id] = [residue['CA']]
44+
except IOError, e:
45+
log.error('Error found reading a structure: %s' % str(e))
46+
log.error('Did you generate the LightDock structures corresponding to this output file?')
47+
raise SystemExit()
48+
2449
return ca_atoms
2550

2651

2752
def clusterize(sorted_ids):
28-
N = len(sorted_ids)
53+
"""Clusters the structures identified by the IDS inside sorted_ids list"""
2954
super_imposer = Bio.PDB.Superimposer()
3055

3156
clusters_found = 0
@@ -35,41 +60,57 @@ def clusterize(sorted_ids):
3560
ca_atoms = get_ca_atoms(sorted_ids)
3661

3762
for j in sorted_ids[1:]:
38-
print "Glowworm %d with pdb lightdock_%d.pdb" % (j, j)
63+
log.info("Glowworm %d with pdb lightdock_%d.pdb" % (j, j))
3964
in_cluster = False
4065
for cluster_id in clusters.keys():
4166
# For each cluster representative
4267
representative_id = clusters[cluster_id][0]
4368
super_imposer.set_atoms(ca_atoms[representative_id], ca_atoms[j])
4469
rmsd = super_imposer.rms
45-
print 'RMSD between %d and %d is %5.3f' % (representative_id, j, rmsd)
70+
log.info('RMSD between %d and %d is %5.3f' % (representative_id, j, rmsd))
4671
if rmsd <= 4.0:
4772
clusters[cluster_id].append(j)
48-
print "Glowworm %d goes into cluster %d" % (j, cluster_id)
73+
log.info("Glowworm %d goes into cluster %d" % (j, cluster_id))
4974
in_cluster = True
5075
break
5176

5277
if not in_cluster:
5378
clusters_found += 1
5479
clusters[clusters_found] = [j]
55-
print "New cluster %d" % clusters_found
56-
print clusters
80+
log.info("New cluster %d" % clusters_found)
5781
return clusters
5882

5983

6084
def write_cluster_info(clusters, gso_data):
61-
with open('cluster.repr', 'w') as output:
85+
"""Writes the clustering result"""
86+
with open(CLUSTER_REPRESENTATIVES_FILE, 'w') as output:
6287
for id_cluster, ids in clusters.iteritems():
6388
output.write("%d:%d:%8.5f:%d:%s\n" % (id_cluster, len(ids), gso_data[ids[0]].scoring,
6489
ids[0], 'lightdock_%d.pdb' % ids[0]))
65-
90+
log.info("Cluster result written to %s file" % CLUSTER_REPRESENTATIVES_FILE)
91+
6692

6793
if __name__ == '__main__':
6894

69-
gso_data = read_lightdock_output(sys.argv[1])
70-
sorted_data = sorted(gso_data, key=lambda k: k.scoring, reverse=True)
95+
try:
96+
# Parse command line
97+
args = parse_command_line()
98+
99+
# Read LightDock output data
100+
gso_data = read_lightdock_output(args.gso_output_file)
101+
102+
# Sort the glowworms data by scoring
103+
sorted_data = sorted(gso_data, key=lambda k: k.scoring, reverse=True)
104+
105+
# Get the Glowworm ids sorted by their scoring
106+
sorted_ids = [g.id_glowworm for g in sorted_data]
107+
108+
# Calculate the different clusters
109+
clusters = clusterize(sorted_ids)
71110

72-
sorted_ids = [g.id_glowworm for g in sorted_data]
73-
clusters = clusterize(sorted_ids)
111+
# Write clustering information
112+
write_cluster_info(clusters, gso_data)
74113

75-
write_cluster_info(clusters, gso_data)
114+
except Exception, e:
115+
log.error('Clustering has failed. Please see error:')
116+
log.error(str(e))
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
0:1:15.57999:7:lightdock_7.pdb
2+
1:1: 9.27345:6:lightdock_6.pdb
3+
2:1: 8.16623:3:lightdock_3.pdb
4+
3:1: 8.03540:1:lightdock_1.pdb
5+
4:1: 5.85748:5:lightdock_5.pdb
6+
5:1: 2.35484:0:lightdock_0.pdb
7+
6:1:-2.19542:9:lightdock_9.pdb
8+
7:1:-10.36291:2:lightdock_2.pdb
9+
8:1:-23.42993:8:lightdock_8.pdb
10+
9:1:-33.94650:4:lightdock_4.pdb
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#Coordinates RecID LigID Luciferin Neighbor's number Vision Range Scoring
2+
(14.7506204, 20.4942702, 9.1081612, -0.3097001, 0.9272613, 0.0883699, -0.1909530) 0 0 5.00000000 0 0.200 0.00000000
3+
(10.9102570, 18.6188382, 12.7293257, 0.3381876, 0.7277442, 0.4423856, 0.4003905) 0 0 5.00000000 0 0.200 0.00000000
4+
(14.1718507, 13.3712219, 13.4655547, -0.4251338, 0.1366711, -0.0881444, 0.8904004) 0 0 5.00000000 0 0.200 0.00000000
5+
(12.3061363, 16.1167652, 19.7338787, 0.7615686, 0.3660267, -0.5340857, -0.0281107) 0 0 5.00000000 0 0.200 0.00000000
6+
( 4.1971806, 18.2428115, 5.4964526, -0.6319303, -0.0590299, 0.0760820, 0.7690196) 0 0 5.00000000 0 0.200 0.00000000
7+
( 7.7158419, 23.6550300, 12.1196869, 0.1060624, -0.1248064, 0.1610813, -0.9732558) 0 0 5.00000000 0 0.200 0.00000000
8+
( 9.7472898, 20.3147556, 3.1938222, -0.0750314, 0.7368451, -0.6619065, -0.1153658) 0 0 5.00000000 0 0.200 0.00000000
9+
(16.8561428, 14.0622243, 9.5679458, -0.0081179, -0.5965452, 0.0298970, 0.8019813) 0 0 5.00000000 0 0.200 0.00000000
10+
( 3.8286750, 12.3853831, 16.6062180, 0.5916744, 0.2222111, 0.5014851, 0.5908099) 0 0 5.00000000 0 0.200 0.00000000
11+
( 7.1131252, 26.2597576, 17.0843743, -0.3944380, -0.8431897, 0.3243713, 0.1680270) 0 0 5.00000000 0 0.200 0.00000000
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#Coordinates RecID LigID Luciferin Neighbor's number Vision Range Scoring
2+
(14.7506204, 20.4942702, 9.1081612, -0.3097001, 0.9272613, 0.0883699, -0.1909530) 0 0 3.54114126 0 4.200 2.35484429
3+
(10.9102570, 18.6188382, 12.7293257, 0.3381876, 0.7277442, 0.4423856, 0.4003905) 0 0 12.01044519 0 4.200 8.03539503
4+
(14.1718507, 13.3712219, 13.4655547, -0.4251338, 0.1366711, -0.0881444, 0.8904004) 0 0 -15.42014263 0 4.200 -10.36291104
5+
(12.3061363, 16.1167652, 19.7338787, 0.7615686, 0.3660267, -0.5340857, -0.0281107) 0 0 12.20551035 0 4.200 8.16622958
6+
( 4.1971806, 18.2428115, 5.4964526, -0.6319303, -0.0590299, 0.0760820, 0.7690196) 0 0 -50.58162040 0 4.200 -33.94649715
7+
( 7.7158419, 23.6550300, 12.1196869, 0.1060624, -0.1248064, 0.1610813, -0.9732558) 0 0 8.76332521 0 4.200 5.85747935
8+
( 9.7472898, 20.3147556, 3.1938222, -0.0750314, 0.7368451, -0.6619065, -0.1153658) 0 0 13.85630467 0 4.200 9.27345408
9+
(16.8561428, 14.0622243, 9.5679458, -0.0081179, -0.5965452, 0.0298970, 0.8019813) 0 0 23.25891247 0 4.200 15.57999251
10+
( 3.8286750, 12.3853831, 16.6062180, 0.5916744, 0.2222111, 0.5014851, 0.5908099) 0 0 -34.90214836 0 4.200 -23.42992610
11+
( 7.1131252, 26.2597576, 17.0843743, -0.3944380, -0.8431897, 0.3243713, 0.1680270) 0 0 -3.24298792 0 4.200 -2.19542222

0 commit comments

Comments
 (0)