forked from TencentAILabHealthcare/scPROTEIN
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvisualization.py
74 lines (55 loc) · 2.17 KB
/
visualization.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from scprotein import *
from operator import itemgetter
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import MinMaxScaler,StandardScaler
from sklearn import metrics
from sklearn.metrics import silhouette_score,adjusted_rand_score,normalized_mutual_info_score
from sklearn.metrics.cluster import contingency_matrix
import warnings
warnings.filterwarnings("ignore")
seed = 1
def purity_score(y_true, y_pred):
contingency_matrix1 = contingency_matrix(y_true, y_pred)
return np.sum(np.amax(contingency_matrix1, axis=0)) / np.sum(contingency_matrix1)
def dimension_reduce(embedding):
X_trans_PCA = PCA(n_components=50, random_state=seed).fit_transform(embedding)
X_trans = TSNE(n_components=2,random_state=seed).fit_transform(X_trans_PCA)
return X_trans
# load ground truth cell label
Y_cell_type_label = load_cell_type_labels()
label_dict = {'sc_m0':0, 'sc_u':1}
target_names = ['Macrophage','Monocyte']
Y_label = np.array(itemgetter(*list(Y_cell_type_label))(label_dict))
# load learned cell embedding
X_fea = np.load('scPROTEIN_embedding.npy')
print(X_fea.shape)
k_means = KMeans(n_clusters=len(target_names))
y_predict = k_means.fit_predict(X_fea)
df_result = pd.DataFrame()
df_result['ARI'] = [np.round(adjusted_rand_score(Y_label,y_predict),3)]
df_result['ASW'] = [np.round(silhouette_score(X_fea,y_predict),3)]
df_result['NMI'] = [np.round(normalized_mutual_info_score(Y_label,y_predict),3)]
df_result['PS'] = [np.round(purity_score(Y_label,y_predict),3)]
print(df_result)
X_trans_learned = dimension_reduce(X_fea)
# plot
colors = [plt.cm.Set2(2), plt.cm.Set2(1)]
fig = plt.figure(figsize=(5,5))
for i in range(len(target_names)):
plt.scatter(X_trans_learned[Y_label == i, 0]
, X_trans_learned[Y_label == i, 1]
, s = 10
, color=colors[i]
, label=target_names[i]
)
plt.xlabel('TSNE 1')
plt.ylabel('TSNE 2')
plt.xticks([])
plt.yticks([])
plt.title('scPROTEIN')
plt.savefig('TSNE_result.jpg', bbox_inches='tight',dpi=300)