2
2
3
3
"""Cluster LightDock final swarm results using BSAS algorithm"""
4
4
5
- import Bio .PDB
6
5
import sys
6
+ import argparse
7
+ import Bio .PDB
7
8
from lightdock .util .analysis import read_lightdock_output
9
+ from lightdock .util .logger import LoggingManager
10
+ from lightdock .constants import CLUSTER_REPRESENTATIVES_FILE
11
+
12
+
13
+ log = LoggingManager .get_logger ('lgd_cluster_bsas' )
14
+
15
+
16
+ def parse_command_line ():
17
+ """Parses command line arguments"""
18
+ parser = argparse .ArgumentParser (prog = 'lgd_cluster_bsas' )
19
+
20
+ parser .add_argument ("gso_output_file" , help = "LightDock output file" , metavar = "gso_output_file" )
21
+
22
+ return parser .parse_args ()
8
23
9
24
10
25
def get_ca_atoms (ids_list ):
26
+ """Get all Carbon-alpha atoms of the PDB files specified by the ids_list.
27
+
28
+ PDB files follow the format lightdock_ID.pdb where ID is in ids_list
29
+ """
11
30
ca_atoms = {}
12
- pdb_parser = Bio .PDB .PDBParser (QUIET = True )
13
- for struct_id in ids_list :
14
- pdb_file = "lightdock_%d.pdb" % struct_id
15
- print "Reading CA from %s" % pdb_file
16
- structure = pdb_parser .get_structure (pdb_file , pdb_file )
17
- model = structure [0 ]
18
- for chain in model :
19
- for residue in chain :
20
- try :
21
- ca_atoms [struct_id ].append (residue ['CA' ])
22
- except :
23
- ca_atoms [struct_id ] = [residue ['CA' ]]
31
+ try :
32
+ pdb_parser = Bio .PDB .PDBParser (QUIET = True )
33
+ for struct_id in ids_list :
34
+ pdb_file = "lightdock_%d.pdb" % struct_id
35
+ log .info ("Reading CA from %s" % pdb_file )
36
+ structure = pdb_parser .get_structure (pdb_file , pdb_file )
37
+ model = structure [0 ]
38
+ for chain in model :
39
+ for residue in chain :
40
+ try :
41
+ ca_atoms [struct_id ].append (residue ['CA' ])
42
+ except :
43
+ ca_atoms [struct_id ] = [residue ['CA' ]]
44
+ except IOError , e :
45
+ log .error ('Error found reading a structure: %s' % str (e ))
46
+ log .error ('Did you generate the LightDock structures corresponding to this output file?' )
47
+ raise SystemExit ()
48
+
24
49
return ca_atoms
25
50
26
51
27
52
def clusterize (sorted_ids ):
28
- N = len ( sorted_ids )
53
+ """Clusters the structures identified by the IDS inside sorted_ids list"""
29
54
super_imposer = Bio .PDB .Superimposer ()
30
55
31
56
clusters_found = 0
@@ -35,41 +60,57 @@ def clusterize(sorted_ids):
35
60
ca_atoms = get_ca_atoms (sorted_ids )
36
61
37
62
for j in sorted_ids [1 :]:
38
- print "Glowworm %d with pdb lightdock_%d.pdb" % (j , j )
63
+ log . info ( "Glowworm %d with pdb lightdock_%d.pdb" % (j , j ) )
39
64
in_cluster = False
40
65
for cluster_id in clusters .keys ():
41
66
# For each cluster representative
42
67
representative_id = clusters [cluster_id ][0 ]
43
68
super_imposer .set_atoms (ca_atoms [representative_id ], ca_atoms [j ])
44
69
rmsd = super_imposer .rms
45
- print 'RMSD between %d and %d is %5.3f' % (representative_id , j , rmsd )
70
+ log . info ( 'RMSD between %d and %d is %5.3f' % (representative_id , j , rmsd ) )
46
71
if rmsd <= 4.0 :
47
72
clusters [cluster_id ].append (j )
48
- print "Glowworm %d goes into cluster %d" % (j , cluster_id )
73
+ log . info ( "Glowworm %d goes into cluster %d" % (j , cluster_id ) )
49
74
in_cluster = True
50
75
break
51
76
52
77
if not in_cluster :
53
78
clusters_found += 1
54
79
clusters [clusters_found ] = [j ]
55
- print "New cluster %d" % clusters_found
56
- print clusters
80
+ log .info ("New cluster %d" % clusters_found )
57
81
return clusters
58
82
59
83
60
84
def write_cluster_info (clusters , gso_data ):
61
- with open ('cluster.repr' , 'w' ) as output :
85
+ """Writes the clustering result"""
86
+ with open (CLUSTER_REPRESENTATIVES_FILE , 'w' ) as output :
62
87
for id_cluster , ids in clusters .iteritems ():
63
88
output .write ("%d:%d:%8.5f:%d:%s\n " % (id_cluster , len (ids ), gso_data [ids [0 ]].scoring ,
64
89
ids [0 ], 'lightdock_%d.pdb' % ids [0 ]))
65
-
90
+ log .info ("Cluster result written to %s file" % CLUSTER_REPRESENTATIVES_FILE )
91
+
66
92
67
93
if __name__ == '__main__' :
68
94
69
- gso_data = read_lightdock_output (sys .argv [1 ])
70
- sorted_data = sorted (gso_data , key = lambda k : k .scoring , reverse = True )
95
+ try :
96
+ # Parse command line
97
+ args = parse_command_line ()
98
+
99
+ # Read LightDock output data
100
+ gso_data = read_lightdock_output (args .gso_output_file )
101
+
102
+ # Sort the glowworms data by scoring
103
+ sorted_data = sorted (gso_data , key = lambda k : k .scoring , reverse = True )
104
+
105
+ # Get the Glowworm ids sorted by their scoring
106
+ sorted_ids = [g .id_glowworm for g in sorted_data ]
107
+
108
+ # Calculate the different clusters
109
+ clusters = clusterize (sorted_ids )
71
110
72
- sorted_ids = [ g . id_glowworm for g in sorted_data ]
73
- clusters = clusterize ( sorted_ids )
111
+ # Write clustering information
112
+ write_cluster_info ( clusters , gso_data )
74
113
75
- write_cluster_info (clusters , gso_data )
114
+ except Exception , e :
115
+ log .error ('Clustering has failed. Please see error:' )
116
+ log .error (str (e ))
0 commit comments