22
33"""Cluster LightDock final swarm results using BSAS algorithm"""
44
5- import Bio .PDB
65import sys
6+ import argparse
7+ import Bio .PDB
78from lightdock .util .analysis import read_lightdock_output
9+ from lightdock .util .logger import LoggingManager
10+ from lightdock .constants import CLUSTER_REPRESENTATIVES_FILE
11+
12+
13+ log = LoggingManager .get_logger ('lgd_cluster_bsas' )
14+
15+
16+ def parse_command_line ():
17+ """Parses command line arguments"""
18+ parser = argparse .ArgumentParser (prog = 'lgd_cluster_bsas' )
19+
20+ parser .add_argument ("gso_output_file" , help = "LightDock output file" , metavar = "gso_output_file" )
21+
22+ return parser .parse_args ()
823
924
1025def get_ca_atoms (ids_list ):
26+ """Get all Carbon-alpha atoms of the PDB files specified by the ids_list.
27+
28+ PDB files follow the format lightdock_ID.pdb where ID is in ids_list
29+ """
1130 ca_atoms = {}
12- pdb_parser = Bio .PDB .PDBParser (QUIET = True )
13- for struct_id in ids_list :
14- pdb_file = "lightdock_%d.pdb" % struct_id
15- print "Reading CA from %s" % pdb_file
16- structure = pdb_parser .get_structure (pdb_file , pdb_file )
17- model = structure [0 ]
18- for chain in model :
19- for residue in chain :
20- try :
21- ca_atoms [struct_id ].append (residue ['CA' ])
22- except :
23- ca_atoms [struct_id ] = [residue ['CA' ]]
31+ try :
32+ pdb_parser = Bio .PDB .PDBParser (QUIET = True )
33+ for struct_id in ids_list :
34+ pdb_file = "lightdock_%d.pdb" % struct_id
35+ log .info ("Reading CA from %s" % pdb_file )
36+ structure = pdb_parser .get_structure (pdb_file , pdb_file )
37+ model = structure [0 ]
38+ for chain in model :
39+ for residue in chain :
40+ try :
41+ ca_atoms [struct_id ].append (residue ['CA' ])
42+ except :
43+ ca_atoms [struct_id ] = [residue ['CA' ]]
44+ except IOError , e :
45+ log .error ('Error found reading a structure: %s' % str (e ))
46+ log .error ('Did you generate the LightDock structures corresponding to this output file?' )
47+ raise SystemExit ()
48+
2449 return ca_atoms
2550
2651
2752def clusterize (sorted_ids ):
28- N = len ( sorted_ids )
53+ """Clusters the structures identified by the IDS inside sorted_ids list"""
2954 super_imposer = Bio .PDB .Superimposer ()
3055
3156 clusters_found = 0
@@ -35,41 +60,57 @@ def clusterize(sorted_ids):
3560 ca_atoms = get_ca_atoms (sorted_ids )
3661
3762 for j in sorted_ids [1 :]:
38- print "Glowworm %d with pdb lightdock_%d.pdb" % (j , j )
63+ log . info ( "Glowworm %d with pdb lightdock_%d.pdb" % (j , j ) )
3964 in_cluster = False
4065 for cluster_id in clusters .keys ():
4166 # For each cluster representative
4267 representative_id = clusters [cluster_id ][0 ]
4368 super_imposer .set_atoms (ca_atoms [representative_id ], ca_atoms [j ])
4469 rmsd = super_imposer .rms
45- print 'RMSD between %d and %d is %5.3f' % (representative_id , j , rmsd )
70+ log . info ( 'RMSD between %d and %d is %5.3f' % (representative_id , j , rmsd ) )
4671 if rmsd <= 4.0 :
4772 clusters [cluster_id ].append (j )
48- print "Glowworm %d goes into cluster %d" % (j , cluster_id )
73+ log . info ( "Glowworm %d goes into cluster %d" % (j , cluster_id ) )
4974 in_cluster = True
5075 break
5176
5277 if not in_cluster :
5378 clusters_found += 1
5479 clusters [clusters_found ] = [j ]
55- print "New cluster %d" % clusters_found
56- print clusters
80+ log .info ("New cluster %d" % clusters_found )
5781 return clusters
5882
5983
6084def write_cluster_info (clusters , gso_data ):
61- with open ('cluster.repr' , 'w' ) as output :
85+ """Writes the clustering result"""
86+ with open (CLUSTER_REPRESENTATIVES_FILE , 'w' ) as output :
6287 for id_cluster , ids in clusters .iteritems ():
6388 output .write ("%d:%d:%8.5f:%d:%s\n " % (id_cluster , len (ids ), gso_data [ids [0 ]].scoring ,
6489 ids [0 ], 'lightdock_%d.pdb' % ids [0 ]))
65-
90+ log .info ("Cluster result written to %s file" % CLUSTER_REPRESENTATIVES_FILE )
91+
6692
6793if __name__ == '__main__' :
6894
69- gso_data = read_lightdock_output (sys .argv [1 ])
70- sorted_data = sorted (gso_data , key = lambda k : k .scoring , reverse = True )
95+ try :
96+ # Parse command line
97+ args = parse_command_line ()
98+
99+ # Read LightDock output data
100+ gso_data = read_lightdock_output (args .gso_output_file )
101+
102+ # Sort the glowworms data by scoring
103+ sorted_data = sorted (gso_data , key = lambda k : k .scoring , reverse = True )
104+
105+ # Get the Glowworm ids sorted by their scoring
106+ sorted_ids = [g .id_glowworm for g in sorted_data ]
107+
108+ # Calculate the different clusters
109+ clusters = clusterize (sorted_ids )
71110
72- sorted_ids = [ g . id_glowworm for g in sorted_data ]
73- clusters = clusterize ( sorted_ids )
111+ # Write clustering information
112+ write_cluster_info ( clusters , gso_data )
74113
75- write_cluster_info (clusters , gso_data )
114+ except Exception , e :
115+ log .error ('Clustering has failed. Please see error:' )
116+ log .error (str (e ))
0 commit comments