8
8
import numpy as np
9
9
import scipy .cluster .hierarchy as hier
10
10
import Bio .PDB
11
- from lightdock .constants import CLUSTER_ANALYSIS_FILE , DEFAULT_CLUSTER_FOLDER , DEFAULT_RMSD_EXTENSION , \
11
+ from lightdock .constants import CLUSTER_ANALYSIS_FILE , DEFAULT_SWARM_FOLDER , DEFAULT_RMSD_EXTENSION , \
12
12
NUMPY_FILE_SAVE_EXTENSION , EVALUATION_FILE , SCORING_FILE , GSO_OUTPUT_FILE , LIGHTDOCK_PDB_FILE , \
13
13
CLUSTER_DEFAULT_NAME , CLUSTER_REPRESENTATIVES_FILE
14
14
from lightdock .util .logger import LoggingManager
15
- from lightdock .util .analysis import read_rmsd_and_contacts_data ,\
16
- read_lightdock_output
15
+ from lightdock .util .analysis import read_rmsd_and_contacts_data , read_lightdock_output
17
16
18
17
19
- log = LoggingManager .get_logger ('cluster_hierarchical' )
18
+ log = LoggingManager .get_logger ('lgd_cluster_hierarchical' )
19
+
20
20
21
21
POPULATION_THRESHOLD = 10
22
22
@@ -27,7 +27,7 @@ def parse_command_line():
27
27
"""
28
28
parser = argparse .ArgumentParser (prog = 'cluster_poses' )
29
29
30
- parser .add_argument ("cluster_id " , help = "cluster to consider" , type = int , metavar = "cluster_id " )
30
+ parser .add_argument ("swarm_id " , help = "swarm to consider for clustering " , type = int , metavar = "swarm_id " )
31
31
parser .add_argument ("steps" , help = "steps to consider" , type = int , metavar = "steps" )
32
32
parser .add_argument ("-f" , "--file_name" , help = "lightdock output file to consider" , dest = "result_file" )
33
33
parser .add_argument ("-p" , "--ponderated" , help = "Structures selection takes into account cluster population" ,
@@ -38,7 +38,7 @@ def parse_command_line():
38
38
return parser .parse_args ()
39
39
40
40
41
- def calculate_inter_rmsd (cluster_id ):
41
+ def calculate_inter_rmsd (swarm_id ):
42
42
N = len (solutions )
43
43
distances = np .zeros ((N , N ))
44
44
indexes = np .triu_indices (N )
@@ -48,7 +48,7 @@ def calculate_inter_rmsd(cluster_id):
48
48
ca_atoms = [[] for _ in xrange (N )]
49
49
for i in range (N ):
50
50
log .info ('Reading structure %d' % i )
51
- structure_file = os .path .join (DEFAULT_CLUSTER_FOLDER + str (cluster_id ),
51
+ structure_file = os .path .join (DEFAULT_SWARM_FOLDER + str (swarm_id ),
52
52
LIGHTDOCK_PDB_FILE % i )
53
53
structure = pdb_parser .get_structure ("reference" , structure_file )
54
54
model = structure [0 ]
@@ -61,7 +61,7 @@ def calculate_inter_rmsd(cluster_id):
61
61
super_imposer .set_atoms (ca_atoms [i ], ca_atoms [j ])
62
62
distances [i ][j ] = super_imposer .rms
63
63
distances [j ][i ] = distances [i ][j ]
64
- numpy_file_name = os .path .join (DEFAULT_CLUSTER_FOLDER + str (cluster_id ),
64
+ numpy_file_name = os .path .join (DEFAULT_SWARM_FOLDER + str (swarm_id ),
65
65
CLUSTER_DEFAULT_NAME + DEFAULT_RMSD_EXTENSION )
66
66
np .save (numpy_file_name , distances )
67
67
log .info ('Done.' )
@@ -83,17 +83,17 @@ def stats(data):
83
83
# Get contacts and rmsds
84
84
contacts , rmsds = read_rmsd_and_contacts_data (EVALUATION_FILE )
85
85
86
- cluster_id = args .cluster_id
87
- log .info ("cluster %d" % cluster_id )
86
+ swarm_id = args .swarm_id
87
+ log .info ("cluster %d" % swarm_id )
88
88
solutions = []
89
89
if args .result_file :
90
- result_file_name = os .path .join (DEFAULT_CLUSTER_FOLDER + str (cluster_id ), args .result_file )
90
+ result_file_name = os .path .join (DEFAULT_SWARM_FOLDER + str (swarm_id ), args .result_file )
91
91
else :
92
- result_file_name = os .path .join (DEFAULT_CLUSTER_FOLDER + str (cluster_id ), GSO_OUTPUT_FILE % args .steps )
93
- scoring_file_name = os .path .join (DEFAULT_CLUSTER_FOLDER + str (cluster_id ), SCORING_FILE )
92
+ result_file_name = os .path .join (DEFAULT_SWARM_FOLDER + str (swarm_id ), GSO_OUTPUT_FILE % args .steps )
93
+ scoring_file_name = os .path .join (DEFAULT_SWARM_FOLDER + str (swarm_id ), SCORING_FILE )
94
94
results = read_lightdock_output (result_file_name )
95
95
for result in results :
96
- result .id_cluster = cluster_id
96
+ result .id_cluster = swarm_id
97
97
try :
98
98
result .rmsd = rmsds [result .id_cluster ][result .id_glowworm ]
99
99
result .contacts = contacts [result .id_cluster ][result .id_glowworm ]
@@ -104,21 +104,21 @@ def stats(data):
104
104
105
105
if args .rmsd_file :
106
106
log .info ('Previous RMSD matrix found. Loading...' )
107
- rmsd_matrix_file = os .path .join (DEFAULT_CLUSTER_FOLDER + str (cluster_id ),
107
+ rmsd_matrix_file = os .path .join (DEFAULT_SWARM_FOLDER + str (swarm_id ),
108
108
CLUSTER_DEFAULT_NAME +
109
109
DEFAULT_RMSD_EXTENSION + NUMPY_FILE_SAVE_EXTENSION )
110
110
distances = np .load (rmsd_matrix_file )
111
111
else :
112
112
log .info ('Calculating RMSD distances...' )
113
- distances = calculate_inter_rmsd (cluster_id )
113
+ distances = calculate_inter_rmsd (swarm_id )
114
114
log .info ('Done.' )
115
115
116
116
# Calculate clusters
117
117
clusters = hier .fclusterdata (distances , distances .max (), criterion = 'maxclust' ,
118
118
metric = 'euclidean' , depth = 2 , method = 'complete' )
119
119
120
120
# Save data
121
- data_file_name = os .path .join (DEFAULT_CLUSTER_FOLDER + str (cluster_id ), CLUSTER_ANALYSIS_FILE )
121
+ data_file_name = os .path .join (DEFAULT_SWARM_FOLDER + str (swarm_id ), CLUSTER_ANALYSIS_FILE )
122
122
with open (data_file_name , 'w' ) as output :
123
123
output .write ("Clusters found: %d" % max (clusters ) + os .linesep )
124
124
output .write (os .linesep )
@@ -150,7 +150,7 @@ def stats(data):
150
150
output .write ("Clashes: %s%s" % (stats ([solution .contacts for solution in cluster_solutions ]), os .linesep ))
151
151
output .write (os .linesep )
152
152
153
- cluster_file_name = os .path .join (DEFAULT_CLUSTER_FOLDER + str (cluster_id ), CLUSTER_REPRESENTATIVES_FILE )
153
+ cluster_file_name = os .path .join (DEFAULT_SWARM_FOLDER + str (swarm_id ), CLUSTER_REPRESENTATIVES_FILE )
154
154
with open (cluster_file_name , 'w' ) as output :
155
155
solutions_clustered = {}
156
156
for id_solution , solution in enumerate (solutions ):
@@ -192,7 +192,7 @@ def stats(data):
192
192
solution .pdb_file ) + os .linesep )
193
193
194
194
195
- log .info ("Cluster: %d" % args .cluster_id )
195
+ log .info ("Cluster: %d" % args .swarm_id )
196
196
log .info ("Number of steps: %d" % args .steps )
197
197
log .info ("Done." )
198
198
0 commit comments