-
Notifications
You must be signed in to change notification settings - Fork 12
Expand file tree
/
Copy pathmodel.py
More file actions
106 lines (81 loc) · 4.21 KB
/
model.py
File metadata and controls
106 lines (81 loc) · 4.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import torch.nn as nn
from torch.nn import Module
import torch.nn.functional as Func
import torch.optim as optim
from torch.autograd import Variable
import torch
from layers import HM_LSTM
from utils import masked_NLLLoss
import time
class HM_Net(Module):
def __init__(self, a, size_list, dict_size, embed_size):
super(HM_Net, self).__init__()
self.dict_size = dict_size
self.size_list = size_list
self.drop = nn.Dropout(p=0.5)
self.embed_in = nn.Embedding(dict_size, embed_size)
self.HM_LSTM = HM_LSTM(a, embed_size, size_list)
self.weight = nn.Linear(size_list[0]+size_list[1], 2)
self.embed_out1 = nn.Linear(size_list[0], dict_size)
self.embed_out2 = nn.Linear(size_list[1], dict_size)
self.relu = nn.ReLU()
# self.logsoftmax = nn.LogSoftmax()
# self.loss = masked_NLLLoss()
self.loss = nn.CrossEntropyLoss()
def forward(self, inputs, target, hidden):
# inputs : batch_size * time_steps
# mask : batch_size * time_steps
emb = self.embed_in(Variable(inputs, volatile=not self.training)) # batch_size * time_steps * embed_size
emb = self.drop(emb)
h_1, h_2, z_1, z_2, hidden = self.HM_LSTM(emb, hidden) # batch_size * time_steps * hidden_size
# mask = Variable(mask, requires_grad=False)
# batch_loss = Variable(torch.zeros(batch_size).cuda())
h_1 = self.drop(h_1) # batch_size * time_steps * hidden_size
h_2 = self.drop(h_2)
h = torch.cat((h_1, h_2), 2)
g = Func.sigmoid(self.weight(h.view(h.size(0)*h.size(1), h.size(2))))
g_1 = g[:, 0:1] # batch_size * time_steps, 1
g_2 = g[:, 1:2]
h_e1 = g_1.expand(g_1.size(0), self.dict_size)*self.embed_out1(h_1.view(h_1.size(0)*h_1.size(1), h_2.size(2)))
h_e2 = g_2.expand(g_2.size(0), self.dict_size)*self.embed_out2(h_2.view(h_2.size(0)*h_2.size(1), h_2.size(2)))
h_e = self.relu(h_e1 + h_e2) # batch_size*time_steps, hidden_size
batch_loss = self.loss(h_e, Variable(target))
return batch_loss, hidden, z_1, z_2
def init_hidden(self, batch_size):
h_t1 = Variable(torch.zeros(self.size_list[0], batch_size).float().cuda(), requires_grad=False)
c_t1 = Variable(torch.zeros(self.size_list[0], batch_size).float().cuda(), requires_grad=False)
z_t1 = Variable(torch.zeros(1, batch_size).float().cuda(), requires_grad=False)
h_t2 = Variable(torch.zeros(self.size_list[1], batch_size).float().cuda(), requires_grad=False)
c_t2 = Variable(torch.zeros(self.size_list[1], batch_size).float().cuda(), requires_grad=False)
z_t2 = Variable(torch.zeros(1, batch_size).float().cuda(), requires_grad=False)
hidden = (h_t1, c_t1, z_t1, h_t2, c_t2, z_t2)
return hidden
class Tutorial_Net(Module):
def __init__(self, hidden_size, dict_size, embed_size):
super(Tutorial_Net, self).__init__()
self.dict_size = dict_size
self.hidden_size = hidden_size
self.drop = nn.Dropout(p=0.5)
self.embed_in = nn.Embedding(dict_size, embed_size)
self.LSTM = nn.LSTM(embed_size, hidden_size, 2, batch_first=True, dropout=0.5)
self.embed_out = nn.Linear(hidden_size, dict_size)
self.loss = nn.CrossEntropyLoss()
self.init_weight()
def forward(self, inputs, target, hidden):
emb = self.embed_in(Variable(inputs, volatile=not self.training)) # batch_size * time_steps * embed_size
emb = self.drop(emb)
h, hidden = self.LSTM(emb, hidden)
h = self.drop(h) # batch_size * time_steps * hidden_size
h_e = self.embed_out(h.view(h.size(0) * h.size(1), h.size(2))) # batch_size*time_steps, hidden_size
batch_loss = self.loss(h_e, Variable(target))
return batch_loss, hidden
def init_weight(self):
initrange = 0.1
self.embed_in.weight.data.uniform_(-initrange, initrange)
self.embed_out.bias.data.fill_(0)
self.embed_out.weight.data.uniform_(-initrange, initrange)
def init_hidden(self, batch_size):
h = Variable(torch.zeros(2, batch_size, 650).float().cuda())
c = Variable(torch.zeros(2, batch_size, 650).float().cuda())
hidden = (h, c)
return hidden