-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathdata.py
214 lines (179 loc) · 7.32 KB
/
data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
'''
Module contains all the data and image processing functions.
'''
import numpy as np
from PIL import Image
from skimage.color import rgb2lab, lab2rgb, rgb2gray, xyz2lab
from sklearn.neighbors import NearestNeighbors
END = 8189
IMAGE_FOLDER = "../jpg"
OUTPUT_FOLDER = "../output"
'''
Takes in a PIL.Image object and returns average (R, G, B, Y) values.
Ignores pixels of colour (0, 0, 254) since it is the background colour.
Returns None if the entire image is just the background colour since all pixels are ignored.
'''
def calculateRGBY(image):
bgColour = (0, 0, 254)
allPixels = np.array(image.getdata())
pixelFilter = np.logical_or(
allPixels[:,0] != bgColour[0],
allPixels[:,1] != bgColour[1],
allPixels[:,2] != bgColour[2]
)
pixels = allPixels[pixelFilter,:]
if pixels.shape[0] == 0:
return None
rgbSum = np.sum(pixels, axis=0)
rgbAvg = rgbSum / pixels.shape[0]
return np.append(rgbAvg, rgbAvg[0] * rgbAvg[1] - rgbAvg[2] ** 2)
'''
Outputs an .npy file storing an np array containing (R, G, B, Y, index) for each image, sorted by Y (decreasing).
'''
def sortImagesByYellow():
myList = []
for i in range(1, END + 1):
im = Image.open('../segmim/segmim_{:05d}.jpg'.format(i))
rgby = calculateRGBY(im)
if rgby is None:
continue
myList.append(np.append(rgby, i))
def sortByYellow(elem):
return elem[3]
myList.sort(key=sortByYellow, reverse=True)
myList = np.array(myList)
np.save("npy/rgbySortedByYellow", myList)
return myList
'''
Takes in a PIL.Image object and returns a new PIL.Image randomly cropped to the specified dimensions (w, h).
If unspecified, dim = (384, 384) by default.
If image dimensions are less than dim in any axis then it is not cropped along that axis.
'''
def randomCrop(image, dim=(384, 384)):
w, h = image.size
left = np.random.randint(0, max(0, w - dim[0]) + 1)
right = min(w, left + dim[0])
top = np.random.randint(0, max(0, h - dim[1]) + 1)
bottom = min(h, top + dim[1])
return image.crop((left, top, right, bottom))
'''
Outputs a 384x384 image named image_cropped_XXXXX.jpg for every image.
'''
def randomCropAll():
for i in range(1, END + 1):
im = Image.open('{}/image_{:05d}.jpg'.format(IMAGE_FOLDER, i))
croppedIm = randomCrop(im)
croppedIm.save('{}/image_cropped_{:05d}.jpg'.format(IMAGE_FOLDER, i))
'''
Loads images based on the list of image IDs specified.
Returns 2 numpy arrays X, Y.
X is a (n, width, height, 1) array containing the L values in the LAB space.
Y is a (n, width, height, 2) array containing the A, B values in the LAB space.
n refers to the number of images loaded
'''
def loadImageData(ids):
X = []
Y = []
for i in ids:
img = Image.open('{}/image_cropped_{:05d}.jpg'.format(IMAGE_FOLDER, i))
img = np.array(img)
x = rgb2lab(img)[:,:,0]
y = rgb2lab(img)[:,:,1:]
X.append(x.reshape(x.shape + (1,)))
Y.append(y)
X = np.array(X, dtype=float)
Y = np.array(Y, dtype=float)
return X, Y
'''
Prepares a list of image IDs to use in training and testing based on the sorted order of IDs stored in the .npy file.
The .npy file is presumably already sorted by yellow when it was saved.
"fraction" should be a number between 0 and 1 representing the fraction of images to take.
For our project, we use 0.1 for easy, 0.5 for medium and 1.0 for hard,
e.g. easy => take the 10% most yellow images
'''
def getImageIds(fraction):
sortedIds = np.load("npy/rgbySortedByYellow.npy")[:,4].astype(int)
numImages = round(sortedIds.shape[0] * fraction)
return sortedIds[:numImages]
'''
Generates and saves images produced by the model by combining the original input (L)
with the predicted output (AB).
L is an (n, width, height, 1) numpy array containing L values of an LAB image.
AB is an (n, width, height, 2) numpy array containing A and B values an LAB image.
ids is a list containing n items for the image IDs to use when saving the images.
'''
def generateImages(L, AB, ids):
dimensions = L.shape[1:3] + (3,)
for i in range(len(ids)):
cur = np.zeros(dimensions)
cur[:,:,0] = L[i][:,:,0]
cur[:,:,1:] = AB[i]
id = ids[i]
filename = "{}/test_result_{:05d}.jpg".format(OUTPUT_FOLDER, id)
filenameGray = "{}/test_result_gray_{:05d}.jpg".format(OUTPUT_FOLDER, id)
rgb = (lab2rgb(cur) * 255).astype(np.uint8)
gray = (rgb2gray(rgb) * 255).astype(np.uint8)
Image.fromarray(rgb).save(filename)
Image.fromarray(gray).save(filenameGray)
'''
Quantizes image AB values into a discrete probability distribution over the most similar colours
from a specified colour palette (or "bins").
AB is a (width, height, 2) numpy array containing AB values an image.
bins is a (m, 2) numpy array containing m different AB values to quantize to.
k is an int to set the number of nearest bins to quantize to. Default is 5.
Returns a (width, height, m) numpy array containing the per-pixel
discrete probability distribution.
'''
def quantize(AB, bins, k=5):
numBins = bins.shape[0]
width, height = AB.shape[:2]
numPixels = width * height
nn = NearestNeighbors(n_neighbors=k).fit(bins)
flatAB = AB.reshape(numPixels, 2)
dists, indices = nn.kneighbors(flatAB)
# Using Gaussian distribution for probability values based on distance
sigma = 5.0
weights = np.exp(- dists ** 2 / (2 * sigma ** 2))
weights /= np.sum(weights, axis=1)[:, np.newaxis]
result = np.zeros((numPixels, numBins))
result[np.arange(numPixels)[:, np.newaxis], indices] = weights
return result.reshape(width, height, numBins).astype(np.float32)
'''
Convenience function to call quantize() for multiple images at once.
Y is a (n, width, height, 2) numpy array containing AB values of n images.
bins - see quantize().
k - see quantize(). Defaults to 5.
Returns a (n, width, height, m) numpy array where m is the number of colour bins.
'''
def batchQuantize(Y, bins, k=5):
result = []
for ab in Y:
result.append(quantize(ab, bins, k))
return np.array(result, dtype=np.float32)
'''
Restores image AB values from a discrete probability distribution over the
specified colour palette (or "bins"). Each pixel value is calculated as the
expected value of its distribution
prob is a (width, height, m) numpy array containing the probability distribution
of each colour value per pixel.
bins is a (m, 2) numpy array containing m different AB values in the specified distribution.
T is a parameter in the interval (0, 1] to adjust the distribution. Default is 0.38.
T = 1 predicts the mean while T near 0 predicts the mode.
'''
def unquantize(prob, bins, T=0.38):
adjusted = np.exp(np.log(prob) / T)
adjusted /= np.sum(adjusted, axis=2)[:,:,np.newaxis]
return np.dot(adjusted, bins)
'''
Convenience function to call unquantize() for multiple images at once.
Y is a (n, width, height, m) numpy array containing the per pixel probability distribution
of n images, where m is the number of colour bins.
bins - see unquantize().
T - see unquantize(). Defaults to 0.38.
Returns a (n, width, height, 2) numpy array of AB values for n images.
'''
def batchUnquantize(Y, bins, T=0.38):
result = []
for prob in Y:
result.append(unquantize(prob, bins, T))
return np.array(result)