4
4
import scipy .spatial .distance as ssd
5
5
import pandas as pd
6
6
import matplotlib .pyplot as plt
7
+ import matplotlib as mpl
7
8
import sys
8
9
import os
9
10
import numpy as np
@@ -16,15 +17,25 @@ def clusterKmeans(ddGs, k):
16
17
centroid , label = sc .vq .kmeans2 (ddGs , k )
17
18
return label
18
19
19
- def clusterHierarchical (M , k , return_z = None ):
20
+ def clusterHierarchical (M , k , return_z = None , return_order = None ):
20
21
if return_z is None :
21
22
return_z = False
23
+ if return_order is None :
24
+ return_order = False
22
25
z = sch .linkage (M , method = 'average' , metric = 'euclidean' )
23
26
labels = pd .Series (sch .fcluster (z , k , 'maxclust' ), index = M .index )
27
+
28
+ to_return = labels
29
+ if return_z or return_order :
30
+ to_return = [labels ]
24
31
if return_z :
25
- return labels , z
26
- else :
27
- return labels
32
+ to_return .append (z )
33
+ if return_order :
34
+ #order = np.array(sch.dendrogram(z, no_plot=True, count_sort='ascending')['leaves'])
35
+ order = sch .leaves_list (z )
36
+ to_return .append (order )
37
+
38
+ return to_return
28
39
29
40
def clusterHierarchicalCorr (M , k , return_z = None ):
30
41
if return_z is None :
@@ -155,11 +166,11 @@ def consensusCluster(D, method=None, subsample=None, n_samples=None, k=None,
155
166
I += n
156
167
M = M / I
157
168
158
- labels , z = clusterHierarchical (M , k , return_z = True )
169
+ labels , z , order = clusterHierarchical (M , k , return_z = True , return_order = True )
159
170
if plot :
160
171
sns .clustermap (M , yticklabels = False , xticklabels = False , square = True ,
161
172
row_linkage = z , col_linkage = z )
162
- return labels , M
173
+ return labels , M , order
163
174
164
175
def optimizeNumClusters (D , method = None , subsample = None , n_samples = None , ks = None ,
165
176
numCores = None ):
@@ -169,7 +180,7 @@ def optimizeNumClusters(D, method=None, subsample=None, n_samples=None, ks=None,
169
180
cdfs = {}
170
181
for k in ks :
171
182
print k
172
- labels , M = consensusCluster (D , method = method , subsample = subsample ,
183
+ labels , M , order = consensusCluster (D , method = method , subsample = subsample ,
173
184
n_samples = n_samples , k = k , numCores = numCores ,
174
185
plot = False )
175
186
x , cdf = getCDF (M )
@@ -192,21 +203,21 @@ def plotCDFs(cdfs):
192
203
colorVal = scalarMap .to_rgba (i )
193
204
plt .plot (x , cdfs .loc [:, col ], color = colorVal , label = col )
194
205
195
- values = np .arange (23 , 50 , 4 )
196
- cm = 'Spectral'
197
- cNorm = mpl .colors .Normalize (vmin = 0 , vmax = len (values )- 1 )
198
- scalarMap = mpl .cm .ScalarMappable (norm = cNorm , cmap = cm )
199
-
200
- for i , col in enumerate (values ):
201
- if i % 2 == 0 :
202
- colorVal = 'k'
203
- else :
204
- colorVal = '0.7'
205
- # colorVal = scalarMap.to_rgba(i)
206
- plt .plot (x , cdfs .loc [:, col ], color = colorVal , label = col )
206
+ ## values = np.arange(23, 50, 4)
207
+ # cm = 'Spectral'
208
+ # cNorm = mpl.colors.Normalize(vmin=0, vmax=len(values)-1)
209
+ # scalarMap = mpl.cm.ScalarMappable(norm=cNorm, cmap=cm)
210
+ #
211
+ # for i, col in enumerate(values):
212
+ # # if i%2==0:
213
+ # # colorVal = 'k'
214
+ # # else:
215
+ # # colorVal = '0.7'
216
+ # colorVal = scalarMap.to_rgba(i)
217
+ # plt.plot(x, cdfs.loc[:, col], color=colorVal, label=col)
207
218
208
219
plt .legend (loc = 'lower right' )
209
- plt .savefig (os .path .join (figDirectory , 'all_cdfs.subsampled_0.8.n_samples_500.pdf' ))
220
+ # plt.savefig(os.path.join(figDirectory, 'all_cdfs.subsampled_0.8.n_samples_500.pdf'))
210
221
211
222
def getDeltaK (cdfs ):
212
223
x = cdfs .index
0 commit comments