forked from ikostrikov/TensorFlow-Pointer-Networks
-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathdataset.py
86 lines (69 loc) · 3.35 KB
/
dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
from __future__ import absolute_import, division, print_function
import numpy as np
from scipy.spatial import ConvexHull
class DataGenerator(object):
def __init__(self):
"""Construct a DataGenerator."""
pass
def next_batch(self, batch_size, N, train_mode=True, convex_hull=False):
"""Return the next batch of the data"""
# If training on the convex hull problem: sequence of random points from [0, 1] x [0, 1]
# If training on the sorting problem: sequence of random real numbers in [0, 1]
reader_input_batch = []
# Sorted sequence that we feed to encoder
# In inference we feed an unordered sequence again
decoder_input_batch = []
# Ordered sequence where one hot vector encodes position in the input array
writer_outputs_batch = []
if convex_hull:
for _ in range(N):
reader_input_batch.append(np.zeros([batch_size, 2]))
for _ in range(N+1):
decoder_input_batch.append(np.zeros([batch_size, 2]))
writer_outputs_batch.append(np.zeros([batch_size, N + 1]))
for b in range(batch_size):
sequence = np.random.rand(N, 2)
leftmost_point = np.argmin(sequence[:,0])
hull = ConvexHull(sequence)
v = hull.vertices
v = np.roll(v, -list(v).index(leftmost_point)) #start from leftmost point
for i in range(N):
reader_input_batch[i][b] = sequence[i]
for i in range(len(v)):
if train_mode:
decoder_input_batch[i + 1][b] = sequence[v[i]]
else:
decoder_input_batch[i + 1][b] = sequence[i]
writer_outputs_batch[i][b, v[i]+1] = 1.0
#Write the stop symbol
for i in xrange(len(v), N):
writer_outputs_batch[i][b, 0] = 1.0
if not train_mode:
decoder_input_batch[i + 1][b] = sequence[i]
writer_outputs_batch[N][b, 0] = 1.0
else:
for _ in range(N):
reader_input_batch.append(np.zeros([batch_size, 1]))
for _ in range(N + 1):
decoder_input_batch.append(np.zeros([batch_size, 1]))
writer_outputs_batch.append(np.zeros([batch_size, N + 1]))
for b in range(batch_size):
shuffle = np.random.permutation(N)
sequence = np.sort(np.random.random(N))
shuffled_sequence = sequence[shuffle]
for i in range(N):
reader_input_batch[i][b] = shuffled_sequence[i]
if train_mode:
decoder_input_batch[i + 1][b] = sequence[i]
else:
decoder_input_batch[i + 1][b] = shuffled_sequence[i]
writer_outputs_batch[shuffle[i]][b, i + 1] = 1.0
# Points to the stop symbol
writer_outputs_batch[N][b, 0] = 1.0
return reader_input_batch, decoder_input_batch, writer_outputs_batch
if __name__ == "__main__":
dataset = DataGenerator()
r, d, w = dataset.next_batch(1, 5, train_mode=False, convex_hull=True)
print("Reader: ", r)
print("Decoder: ", d)
print("Writer: ", w)