-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathdataprocessing.py
executable file
·108 lines (78 loc) · 2.37 KB
/
dataprocessing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#!/usr/bin/env python
from PIL import Image
from os import listdir
from os.path import isfile, join
import numpy as np
import pickle
from time import time
import sys
import h5py
import random
from tqdm import tqdm
image_dir = '../img_align_celeba/'
try:
image_locs = [join(image_dir, f) for f in listdir(image_dir) if isfile(join(image_dir, f))]
except:
print("expected aligned images directory, see README")
print("first image at " + str(image_locs[0]))
total_imgs = len(image_locs)
print("found %i images in directory" %total_imgs)
def process_image(im):
if im.mode != "RGB":
im = im.convert("RGB")
new_size = [int(i/1.3) for i in im.size]
im.thumbnail(new_size, Image.ANTIALIAS)
target = np.array(im)[3:-3,4:-4,:]
im = Image.fromarray(target)
new_size = [i/4 for i in im.size]
im.thumbnail(new_size, Image.ANTIALIAS)
input = np.array(im)
return input, target
def proc_loc(loc):
try:
i = Image.open(loc)
#print("open image " + str(i));
input, target = process_image(i)
return (input, target)
except KeyboardInterrupt:
raise
#except:
# return None
try:
hf = h5py.File('faces.hdf5','r+')
except:
hf = h5py.File('faces.hdf5','w')
try:
dset_t = hf.create_dataset("target", (1,160,128,3),
maxshape= (1e6,160,128,3), chunks = (1,160,128,3), compression = "gzip")
except:
dset_t = hf['target']
try:
dset_i = hf.create_dataset("input", (1, 40, 32, 3),
maxshape= (1e6, 40, 32, 3), chunks = (1, 40, 32, 3), compression = "gzip")
except:
dset_i = hf['input']
batch_size = 1024
#num_iter = total_imgs / 1024
num_iter = 5
insert_point = 0
for i in tqdm(range(num_iter)):
sys.stdout.flush()
X_in = []
X_ta = []
a = time()
locs = image_locs[i * batch_size : (i + 1) * batch_size]
proc = [proc_loc(loc) for loc in locs]
for pair in proc:
if pair is not None:
input, target = pair
X_in.append(input)
X_ta.append(target)
X_in = np.array(X_in)
X_ta = np.array(X_ta)
dset_i.resize((insert_point + len(X_in),40, 32, 3))
dset_t.resize((insert_point + len(X_in),160,128,3))
dset_i[insert_point:insert_point + len(X_in)] = X_in
dset_t[insert_point:insert_point + len(X_in)] = X_ta
insert_point += len(X_in)
hf.close()