-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathcnn.py
108 lines (94 loc) · 4.54 KB
/
cnn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import torch
import torch.autograd as ag
import torch.nn as nn
import torch.optim as optim
import pdb
import torch.utils.data as data
import torch.nn.functional as F
import sklearn.metrics as metrics
import argparse
import math
# from dataset import Dataset, Temporal_Data
# from dataset import collate_fn, collate_fn_temporal_for_pcnn, collate_fn_temporal, collate_fn1
# import numpy as np
# from character_process import n_letters
# from utils import pad_sequence
# torch.cuda.manual_seed(1)
# torch.manual_seed(1)
class CNN(nn.Module):
def __init__(self, settings):
super(CNN, self).__init__()
self.word_embed_size = settings['word_embed_size']
self.pos_embed_size = settings['pos_embed_size']
self.input_size = self.word_embed_size + 2 * self.pos_embed_size
self.out_c = settings['out_c']
self.window = 3
self.n_rel = settings['n_rel']
self.vocab_size = settings['vocab_size']
self.pos_limit = settings['pos_limit']
# torch.cuda.manual_seed(2)
# torch.manual_seed(2)
# self.conv = nn.Conv1d(1, self.out_c, (self.window, self.input_size), padding=(self.window//2, 1-self.window//2), bias=False)
# this way may make the conv operation works better
self.conv = nn.Conv2d(1, self.out_c, (self.window, self.input_size), padding=(self.window - 1, 0), bias=False)
self.feature_size = self.out_c
# self.conv_bias_0 = nn.Parameter(torch.zeros(1, self.out_c),requires_grad=True)
# self.conv_bias_1 = nn.Parameter(torch.zeros(1, self.out_c),requires_grad=True)
# self.conv_bias_2 = nn.Parameter(torch.zeros(1, self.out_c),requires_grad=True)
self.conv_bias = nn.Parameter(torch.zeros(1, self.out_c),requires_grad=True)
self.r_embed = nn.Parameter(torch.zeros(self.n_rel, self.feature_size), requires_grad=True)
self.r_bias = nn.Parameter(torch.zeros(self.n_rel), requires_grad=True)
self.tanh = nn.Tanh()
self.dropout = nn.Dropout(settings['dropout_p'])
self.pred_sm = nn.LogSoftmax(dim=-1)
self.atten_sm = nn.Softmax(dim=-1)
self.limit = 30
self.w2v = nn.Embedding(self.vocab_size, self.word_embed_size)
self.pos1_embed = nn.Embedding(self.pos_limit * 2 + 1, self.pos_embed_size)
self.pos2_embed = nn.Embedding(self.pos_limit * 2 + 1, self.pos_embed_size)
# pretrained embedding
self.w2v.weight = nn.Parameter(torch.FloatTensor(settings['word_embeds']), requires_grad=True)
# eye = torch.eye(self.feature_size, self.feature_size)
# # self.att_W = nn.Parameter(eye.expand(self.n_rel, self.feature_size, self.feature_size), requires_grad=True)
# init
con = math.sqrt(6.0/(self.out_c + self.n_rel))
con1 = math.sqrt(6.0 / ((self.pos_embed_size + self.word_embed_size)*self.window))
nn.init.uniform_(self.conv.weight, a=-con1, b=con1)
nn.init.uniform_(self.conv_bias, a=-con1, b=con1)
nn.init.uniform_(self.r_embed, a=-con, b=con)
nn.init.uniform_(self.r_bias, a=-con, b=con)
def forward(self, input):
bags = [item['bag'] for item in input]
labels = [item['label'] for item in input]
features = self._create_sentence_embedding(bags, labels)
return features
def _create_sentence_embedding(self, bags, labels):
batch_features = []
for ix, bag in enumerate(bags):
label = labels[ix]
features = []
for item in bag:
w2v = self.w2v(item.t()[0])
# this may need some modification for further use.
pos1 = self.pos1_embed(item[:, 1])
pos2 = self.pos2_embed(item[:, 2])
feature = torch.cat([w2v, pos1, pos2], dim=-1).unsqueeze(0).unsqueeze(0)
feature = self._enc_each_iter(feature)
# could disable with config.
feature = self.dropout(feature)
# dropout is a little different too.
features.append(feature)
features = torch.cat(features, dim=0)
batch_features.append(features)
return batch_features
def _enc_each_iter(self, feature):
'''
operation in each iteration
:param feature: 1 * 1 * K * D
:return: computed feature
'''
feature = self.conv(feature).squeeze(-1)
feature = (F.max_pool1d(feature, feature.size(-1)).squeeze(-1) + self.conv_bias).reshape(1, self.feature_size)
# this tanh is little different from lin-16's.
feature = self.tanh(feature)
return feature