-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathlda_svi_cupy.py
151 lines (126 loc) · 4.79 KB
/
lda_svi_cupy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# -*- coding: utf-8 -*-
"""
Created on Sat Mar 3 20:28:53 2018
@author: ryuhei
"""
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from chainer.backends import cuda
import datasets
np.random.seed(0)
def add_at(a, slices, value):
xp = cuda.get_array_module(a)
if xp is np:
return np.add.at(a, slices, value)
else:
return xp.scatter_add(
a, slices, value.astype(np.float32))
if __name__ == '__main__':
dataset_location = r'E:\Dataset\bow'
corpus = datasets.KosDataset(dataset_location)
D = corpus.num_docs
V = corpus.num_terms
K = 20
hp_alpha = 1 / K
hp_eta = 0.1
use_gpu = True
num_epochs = 2000
batch_size = 100
max_iter_local = 200 # max iteration for local optimization
thresh_local_convergence = 0.001 # convergence threshold for local optim
# learning rate \rho is scheduled as \rho_t = (t + \tau)^{-kappa}
tau = 1.0
kappa = 0.9
if not use_gpu:
xp = np
from scipy.special import loggamma
def digamma(x):
eps = x * 1e-3
return (loggamma(x + eps) - loggamma(x - eps)) / (eps + eps)
else:
import cupy
xp = cupy
digamma = cupy.ElementwiseKernel(
'T x', 'T y',
'''
T eps = x * 1e-3;
y = (lgamma(x + eps) - lgamma(x - eps)) / (eps + eps);
''',
'elementwise_digamma',
)
docs = corpus.docs
# Initialize lambda according to footnote 6
p_lambda = np.random.exponential(D * 100 / float(K * V), (K, V)) + hp_eta
p_lambda = xp.asarray(p_lambda, np.float32)
# Step 3
t = 0
rho = 1
ppl_history = []
for epoch in range(1, num_epochs + 1):
print('epoch', epoch)
print('rho =', rho)
ppls = []
perm = np.random.permutation(D)
num_batches = D // batch_size
indexes = np.array_split(perm, num_batches)
for batch in tqdm(indexes, total=num_batches):
t += 1
rho = (t + tau) ** -kappa # learning rate
# Step 5-9
digamma_lambda = digamma(p_lambda)
digamma_sum_lambda = digamma(p_lambda.sum(1))[:, None, None]
B = len(batch) # actual size of this mini-batch
lengths = [len(docs[d][0]) for d in batch]
max_length = max(lengths)
words = np.zeros((B, max_length), np.int64)
counts = np.zeros((B, max_length), np.float32)
for i, d in enumerate(batch):
words_d, counts_d = docs[d]
length = len(words_d)
words[i, :length] = words_d
counts[i, :length] = counts_d
xp_counts = xp.asarray(counts)
# Step 4
p_gamma = xp.asarray(
np.random.gamma(100, 0.01, (K, B)), np.float32)
for ite in range(max_iter_local):
p_gamma_prev = p_gamma
digamma_gamma = digamma(p_gamma)
digamma_sum_gamma = digamma(p_gamma.sum(0))
e_log_theta = digamma_gamma - digamma_sum_gamma[None]
e_log_beta = digamma_lambda[:, words] - digamma_sum_lambda
exponent = e_log_theta[..., None] + e_log_beta
p_phi = xp.exp(exponent)
p_phi /= p_phi.sum(0, keepdims=True)
p_gamma = hp_alpha + np.sum(p_phi * xp_counts[None], -1)
mean_diff = xp.abs(p_gamma_prev - p_gamma).mean(0).max()
if mean_diff < thresh_local_convergence:
break
# Step 10
lambda_hat = xp.zeros_like(p_lambda)
add_at(lambda_hat, (Ellipsis, words), p_phi * xp_counts[None])
lambda_hat *= D / batch_size
lambda_hat += hp_eta
# Step 11
p_lambda = (1 - rho) * p_lambda + rho * lambda_hat
# Rough evaluation
e_beta = p_lambda / p_lambda.sum(1, keepdims=True)
# ppl = np.average(-np.log(np.sum(p_phi * e_beta[:, words], 0)),
# weights=counts)
ppl = np.average(cuda.to_cpu(
-xp.log(xp.sum(p_phi[:, -1] * e_beta[:, words[-1]], 0))),
weights=counts[-1])
ppls.append(ppl)
epoch_ppl = cuda.to_cpu(np.average(ppls))
print('Perplexity:', epoch_ppl)
ppl_history.append(epoch_ppl)
plt.plot(ppl_history)
plt.grid()
plt.show()
topics = p_lambda / p_lambda.sum(1, keepdims=True)
topics = cuda.to_cpu(topics)
word_ranks = [[corpus.id2word[w] for w in np.argsort(topic)[::-1]]
for topic in cuda.to_cpu(topics)]
for k, word_ranks_k in enumerate(word_ranks):
print('{:2d} {}'.format(k, word_ranks_k[:5]))