Skip to content
This repository was archived by the owner on Jan 1, 2021. It is now read-only.

Commit 6a7dfb8

Browse files
committed
solution for assignment 2 and code for assignment 3
1 parent cb78f84 commit 6a7dfb8

File tree

14 files changed

+365132
-1
lines changed

14 files changed

+365132
-1
lines changed

.gitignore

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
*.SUNet
55
*.pyc
66
.env/*
7-
examples/data/*
7+
examples/data
88
examples/graphs/*
99
examples/checkpoints/*
1010
examples/visualization/*

assignments/02_style_transfer/load_vgg.py

+1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
cs20.stanford.edu
77
88
For more details, please read the assignment handout:
9+
https://docs.google.com/document/d/1FpueD-3mScnD0SJQDtwmOb1FrSwo1NGowkXzMwPoLH4/edit?usp=sharing
910
1011
"""
1112
import numpy as np
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
""" Load VGGNet weights needed for the implementation in TensorFlow
2+
of the paper A Neural Algorithm of Artistic Style (Gatys et al., 2016)
3+
4+
Created by Chip Huyen ([email protected])
5+
CS20: "TensorFlow for Deep Learning Research"
6+
cs20.stanford.edu
7+
8+
For more details, please read the assignment handout:
9+
10+
"""
11+
import numpy as np
12+
import scipy.io
13+
import tensorflow as tf
14+
15+
import utils
16+
17+
# VGG-19 parameters file
18+
VGG_DOWNLOAD_LINK = 'http://www.vlfeat.org/matconvnet/models/imagenet-vgg-verydeep-19.mat'
19+
VGG_FILENAME = 'imagenet-vgg-verydeep-19.mat'
20+
EXPECTED_BYTES = 534904783
21+
22+
class VGG(object):
23+
def __init__(self, input_img):
24+
utils.download(VGG_DOWNLOAD_LINK, VGG_FILENAME, EXPECTED_BYTES)
25+
self.vgg_layers = scipy.io.loadmat(VGG_FILENAME)['layers']
26+
self.input_img = input_img
27+
self.mean_pixels = np.array([123.68, 116.779, 103.939]).reshape((1,1,1,3))
28+
29+
def _weights(self, layer_idx, expected_layer_name):
30+
""" Return the weights and biases at layer_idx already trained by VGG
31+
"""
32+
W = self.vgg_layers[0][layer_idx][0][0][2][0][0]
33+
b = self.vgg_layers[0][layer_idx][0][0][2][0][1]
34+
layer_name = self.vgg_layers[0][layer_idx][0][0][0][0]
35+
assert layer_name == expected_layer_name
36+
return W, b.reshape(b.size)
37+
38+
def conv2d_relu(self, prev_layer, layer_idx, layer_name):
39+
""" Return the Conv2D layer with RELU using the weights,
40+
biases from the VGG model at 'layer_idx'.
41+
Don't forget to apply relu to the output from the convolution.
42+
Inputs:
43+
prev_layer: the output tensor from the previous layer
44+
layer_idx: the index to current layer in vgg_layers
45+
layer_name: the string that is the name of the current layer.
46+
It's used to specify variable_scope.
47+
48+
49+
Note that you first need to obtain W and b from from the corresponding VGG's layer
50+
using the function _weights() defined above.
51+
W and b returned from _weights() are numpy arrays, so you have
52+
to convert them to TF tensors. One way to do it is with tf.constant.
53+
54+
Hint for choosing strides size:
55+
for small images, you probably don't want to skip any pixel
56+
"""
57+
###############################
58+
## TO DO
59+
with tf.variable_scope(layer_name) as scope:
60+
W, b = self._weights(layer_idx, layer_name)
61+
W = tf.constant(W, name='weights')
62+
b = tf.constant(b, name='bias')
63+
conv2d = tf.nn.conv2d(prev_layer,
64+
filter=W,
65+
strides=[1, 1, 1, 1],
66+
padding='SAME')
67+
out = tf.nn.relu(conv2d + b)
68+
###############################
69+
setattr(self, layer_name, out)
70+
71+
def avgpool(self, prev_layer, layer_name):
72+
""" Return the average pooling layer. The paper suggests that
73+
average pooling works better than max pooling.
74+
Input:
75+
prev_layer: the output tensor from the previous layer
76+
layer_name: the string that you want to name the layer.
77+
It's used to specify variable_scope.
78+
79+
Hint for choosing strides and kszie: choose what you feel appropriate
80+
"""
81+
###############################
82+
## TO DO
83+
with tf.variable_scope(layer_name):
84+
out = tf.nn.avg_pool(prev_layer,
85+
ksize=[1, 2, 2, 1],
86+
strides=[1, 2, 2, 1],
87+
padding='SAME')
88+
###############################
89+
setattr(self, layer_name, out)
90+
91+
def load(self):
92+
self.conv2d_relu(self.input_img, 0, 'conv1_1')
93+
self.conv2d_relu(self.conv1_1, 2, 'conv1_2')
94+
self.avgpool(self.conv1_2, 'avgpool1')
95+
self.conv2d_relu(self.avgpool1, 5, 'conv2_1')
96+
self.conv2d_relu(self.conv2_1, 7, 'conv2_2')
97+
self.avgpool(self.conv2_2, 'avgpool2')
98+
self.conv2d_relu(self.avgpool2, 10, 'conv3_1')
99+
self.conv2d_relu(self.conv3_1, 12, 'conv3_2')
100+
self.conv2d_relu(self.conv3_2, 14, 'conv3_3')
101+
self.conv2d_relu(self.conv3_3, 16, 'conv3_4')
102+
self.avgpool(self.conv3_4, 'avgpool3')
103+
self.conv2d_relu(self.avgpool3, 19, 'conv4_1')
104+
self.conv2d_relu(self.conv4_1, 21, 'conv4_2')
105+
self.conv2d_relu(self.conv4_2, 23, 'conv4_3')
106+
self.conv2d_relu(self.conv4_3, 25, 'conv4_4')
107+
self.avgpool(self.conv4_4, 'avgpool4')
108+
self.conv2d_relu(self.avgpool4, 28, 'conv5_1')
109+
self.conv2d_relu(self.conv5_1, 30, 'conv5_2')
110+
self.conv2d_relu(self.conv5_2, 32, 'conv5_3')
111+
self.conv2d_relu(self.conv5_3, 34, 'conv5_4')
112+
self.avgpool(self.conv5_4, 'avgpool5')
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,244 @@
1+
import os
2+
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
3+
import time
4+
5+
import numpy as np
6+
import tensorflow as tf
7+
8+
import load_vgg_sol
9+
import utils
10+
11+
def setup():
12+
utils.safe_mkdir('checkpoints')
13+
utils.safe_mkdir('outputs')
14+
15+
class StyleTransfer(object):
16+
def __init__(self, content_img, style_img, img_width, img_height):
17+
'''
18+
img_width and img_height are the dimensions we expect from the generated image.
19+
We will resize input content image and input style image to match this dimension.
20+
Feel free to alter any hyperparameter here and see how it affects your training.
21+
'''
22+
self.img_width = img_width
23+
self.img_height = img_height
24+
self.content_img = utils.get_resized_image(content_img, img_width, img_height)
25+
self.style_img = utils.get_resized_image(style_img, img_width, img_height)
26+
self.initial_img = utils.generate_noise_image(self.content_img, img_width, img_height)
27+
28+
###############################
29+
## TO DO
30+
## create global step (gstep) and hyperparameters for the model
31+
self.content_layer = 'conv4_2'
32+
self.style_layers = ['conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1']
33+
self.content_w = 0.01
34+
self.style_w = 1
35+
self.style_layer_w = [0.5, 1.0, 1.5, 3.0, 4.0]
36+
self.gstep = tf.Variable(0, dtype=tf.int32,
37+
trainable=False, name='global_step')
38+
self.lr = 2.0
39+
###############################
40+
41+
def create_input(self):
42+
'''
43+
We will use one input_img as a placeholder for the content image,
44+
style image, and generated image, because:
45+
1. they have the same dimension
46+
2. we have to extract the same set of features from them
47+
We use a variable instead of a placeholder because we're, at the same time,
48+
training the generated image to get the desirable result.
49+
50+
Note: image height corresponds to number of rows, not columns.
51+
'''
52+
with tf.variable_scope('input') as scope:
53+
self.input_img = tf.get_variable('in_img',
54+
shape=([1, self.img_height, self.img_width, 3]),
55+
dtype=tf.float32,
56+
initializer=tf.zeros_initializer())
57+
def load_vgg(self):
58+
'''
59+
Load the saved model parameters of VGG-19, using the input_img
60+
as the input to compute the output at each layer of vgg.
61+
62+
During training, VGG-19 mean-centered all images and found the mean pixels
63+
to be [123.68, 116.779, 103.939] along RGB dimensions. We have to subtract
64+
this mean from our images.
65+
66+
'''
67+
self.vgg = load_vgg_sol.VGG(self.input_img)
68+
self.vgg.load()
69+
self.content_img -= self.vgg.mean_pixels
70+
self.style_img -= self.vgg.mean_pixels
71+
72+
def _content_loss(self, P, F):
73+
''' Calculate the loss between the feature representation of the
74+
content image and the generated image.
75+
76+
Inputs:
77+
P: content representation of the content image
78+
F: content representation of the generated image
79+
Read the assignment handout for more details
80+
81+
Note: Don't use the coefficient 0.5 as defined in the paper.
82+
Use the coefficient defined in the assignment handout.
83+
'''
84+
# self.content_loss = None
85+
###############################
86+
## TO DO
87+
self.content_loss = tf.reduce_sum((F - P) ** 2) / (4.0 * P.size)
88+
###############################
89+
90+
def _gram_matrix(self, F, N, M):
91+
""" Create and return the gram matrix for tensor F
92+
Hint: you'll first have to reshape F
93+
"""
94+
###############################
95+
## TO DO
96+
F = tf.reshape(F, (M, N))
97+
return tf.matmul(tf.transpose(F), F)
98+
###############################
99+
100+
def _single_style_loss(self, a, g):
101+
""" Calculate the style loss at a certain layer
102+
Inputs:
103+
a is the feature representation of the style image at that layer
104+
g is the feature representation of the generated image at that layer
105+
Output:
106+
the style loss at a certain layer (which is E_l in the paper)
107+
108+
Hint: 1. you'll have to use the function _gram_matrix()
109+
2. we'll use the same coefficient for style loss as in the paper
110+
3. a and g are feature representation, not gram matrices
111+
"""
112+
###############################
113+
## TO DO
114+
N = a.shape[3] # number of filters
115+
M = a.shape[1] * a.shape[2] # height times width of the feature map
116+
A = self._gram_matrix(a, N, M)
117+
G = self._gram_matrix(g, N, M)
118+
return tf.reduce_sum((G - A) ** 2 / ((2 * N * M) ** 2))
119+
###############################
120+
121+
def _style_loss(self, A):
122+
""" Calculate the total style loss as a weighted sum
123+
of style losses at all style layers
124+
Hint: you'll have to use _single_style_loss()
125+
"""
126+
n_layers = len(A)
127+
E = [self._single_style_loss(A[i], getattr(self.vgg, self.style_layers[i])) for i in range(n_layers)]
128+
129+
###############################
130+
## TO DO
131+
self.style_loss = sum([self.style_layer_w[i] * E[i] for i in range(n_layers)])
132+
###############################
133+
134+
def losses(self):
135+
with tf.variable_scope('losses') as scope:
136+
with tf.Session() as sess:
137+
# assign content image to the input variable
138+
sess.run(self.input_img.assign(self.content_img))
139+
gen_img_content = getattr(self.vgg, self.content_layer)
140+
content_img_content = sess.run(gen_img_content)
141+
self._content_loss(content_img_content, gen_img_content)
142+
143+
with tf.Session() as sess:
144+
sess.run(self.input_img.assign(self.style_img))
145+
style_layers = sess.run([getattr(self.vgg, layer) for layer in self.style_layers])
146+
self._style_loss(style_layers)
147+
148+
##########################################
149+
## TO DO: create total loss.
150+
## Hint: don't forget the weights for the content loss and style loss
151+
self.total_loss = self.content_w * self.content_loss + self.style_w * self.style_loss
152+
##########################################
153+
154+
def optimize(self):
155+
###############################
156+
## TO DO: create optimizer
157+
self.opt = tf.train.AdamOptimizer(self.lr).minimize(self.total_loss,
158+
global_step=self.gstep)
159+
###############################
160+
161+
def create_summary(self):
162+
###############################
163+
## TO DO: create summaries for all the losses
164+
## Hint: don't forget to merge them
165+
with tf.name_scope('summaries'):
166+
tf.summary.scalar('content loss', self.content_loss)
167+
tf.summary.scalar('style loss', self.style_loss)
168+
tf.summary.scalar('total loss', self.total_loss)
169+
self.summary_op = tf.summary.merge_all()
170+
###############################
171+
172+
173+
def build(self):
174+
self.create_input()
175+
self.load_vgg()
176+
self.losses()
177+
self.optimize()
178+
self.create_summary()
179+
180+
def train(self, n_iters):
181+
skip_step = 1
182+
with tf.Session() as sess:
183+
184+
###############################
185+
## TO DO:
186+
## 1. initialize your variables
187+
## 2. create writer to write your graph
188+
sess.run(tf.global_variables_initializer())
189+
writer = tf.summary.FileWriter('graphs/style_stranfer', sess.graph)
190+
###############################
191+
sess.run(self.input_img.assign(self.initial_img))
192+
193+
194+
###############################
195+
## TO DO:
196+
## 1. create a saver object
197+
## 2. check if a checkpoint exists, restore the variables
198+
saver = tf.train.Saver()
199+
ckpt = tf.train.get_checkpoint_state(os.path.dirname('checkpoints/style_transfer/checkpoint'))
200+
if ckpt and ckpt.model_checkpoint_path:
201+
saver.restore(sess, ckpt.model_checkpoint_path)
202+
##############################
203+
204+
initial_step = self.gstep.eval()
205+
206+
start_time = time.time()
207+
for index in range(initial_step, n_iters):
208+
if index >= 5 and index < 20:
209+
skip_step = 10
210+
elif index >= 20:
211+
skip_step = 20
212+
213+
sess.run(self.opt)
214+
if (index + 1) % skip_step == 0:
215+
###############################
216+
## TO DO: obtain generated image, loss, and summary
217+
gen_image, total_loss, summary = sess.run([self.input_img,
218+
self.total_loss,
219+
self.summary_op])
220+
221+
###############################
222+
223+
# add back the mean pixels we subtracted before
224+
gen_image = gen_image + self.vgg.mean_pixels
225+
writer.add_summary(summary, global_step=index)
226+
print('Step {}\n Sum: {:5.1f}'.format(index + 1, np.sum(gen_image)))
227+
print(' Loss: {:5.1f}'.format(total_loss))
228+
print(' Took: {} seconds'.format(time.time() - start_time))
229+
start_time = time.time()
230+
231+
filename = 'outputs/%d.png' % (index)
232+
utils.save_image(filename, gen_image)
233+
234+
if (index + 1) % 20 == 0:
235+
###############################
236+
## TO DO: save the variables into a checkpoint
237+
saver.save(sess, 'checkpoints/style_stranfer/style_transfer', index)
238+
###############################
239+
240+
if __name__ == '__main__':
241+
setup()
242+
machine = StyleTransfer('content/deadpool.jpg', 'styles/guernica.jpg', 333, 250)
243+
machine.build()
244+
machine.train(300)

0 commit comments

Comments
 (0)