|
| 1 | +#!/usr/bin/env python |
| 2 | +# -*- coding: utf-8 -*- |
| 3 | +# @Time : 2018/6/14 11:40 |
| 4 | +# @Author : zzy824 |
| 5 | +# @File : demoLV1.py |
| 6 | +""" a XOR neural network by TensorFlow""" |
| 7 | +import tensorflow as tf |
| 8 | +import numpy as np |
| 9 | + |
| 10 | +""" structure of neural network: |
| 11 | + 2 dimension input node |
| 12 | + 2 hidden node |
| 13 | + 1 ouput node |
| 14 | +""" |
| 15 | + |
| 16 | +# define parameter |
| 17 | +D_input = 2 |
| 18 | +D_hidden = 2 |
| 19 | +D_label = 1 |
| 20 | +lr = 0.0001 |
| 21 | + |
| 22 | + |
| 23 | +""" processing of forward """ |
| 24 | +# placeholder initialize by [precision, shape of matrix(None means shape can change randomly), name] |
| 25 | +x = tf.placeholder(tf.float32, [None, D_input], name="x") |
| 26 | +t = tf.placeholder(tf.float32, [None, D_label], name="t") |
| 27 | + |
| 28 | +# initialize W |
| 29 | +W_h1 = tf.Variable(tf.truncated_normal([D_input, D_hidden], stddev=0.1), name="W_h") |
| 30 | +# initialise b |
| 31 | +b_h1 = tf.Variable(tf.constant(0.1, shape=[D_hidden]), name="b_h") |
| 32 | +# calculate Wx+b |
| 33 | +pre_act_h1 = tf.matmul(x, W_h1) + b_h1 |
| 34 | +# calculate a(Wx+b) |
| 35 | +act_h1 = tf.nn.relu(pre_act_h1, name='act_h') |
| 36 | +# initialize output layer |
| 37 | +W_o = tf.Variable(tf.truncated_normal([D_hidden, D_label], stddev=0.1), name="W_o") |
| 38 | +b_o = tf.Variable(tf.constant(0.1, shape=[D_label]), name="b_o") |
| 39 | +pre_act_o = tf.matmul(act_h1, W_o) + b_o |
| 40 | +y = tf.nn.relu(pre_act_o, name="act_y") |
| 41 | + |
| 42 | + |
| 43 | +""" processing of backword """ |
| 44 | +# define loss function: |
| 45 | +loss = tf.reduce_mean((y-t)**2) |
| 46 | +# gradient descent: arg: learning rate; optimizer |
| 47 | +train_step = tf.train.AdamOptimizer(lr).minimize(loss) |
| 48 | + |
| 49 | +""" prepare data for training """ |
| 50 | +X = [[0, 0], [0, 1], [1, 0], [1, 1]] |
| 51 | +Y = [[0], [1], [1], [0]] |
| 52 | +# when using tensorflow the traning data with np.array format is necessary! |
| 53 | +# datatype must below 32bit, it is suggest to use ".astype('float32')" |
| 54 | +X = np.array(X).astype('int16') |
| 55 | +Y = np.array(Y).astype('int16') |
| 56 | + |
| 57 | +""" load neural network """ |
| 58 | +# the defect of tf.Session() is unobviously,which is that method can not use tenor.eval() .etc |
| 59 | +sess = tf.InteractiveSession() |
| 60 | +sess.run(tf.global_variables_initializer()) |
| 61 | +# training network |
| 62 | +""" GD mode: the whole data as input to network. Updating network through the mean |
| 63 | + value of gradient.Code below as annotation """ |
| 64 | +# T = 30000 |
| 65 | +# for i in range(T): |
| 66 | +# sess.run(train_step, feed_dict={x: X, t: Y}) |
| 67 | +""" SGD mode:only one sample as input to network.The advantage is easy |
| 68 | + to get rid of saddle point and disadvantage is the unsteady updating direction. |
| 69 | + Code below as annotation """ |
| 70 | +# T = 30000 |
| 71 | +# for i in range(T): |
| 72 | +# for j in range(X.shape[0]): # X.shape[0] means the numbers of sample |
| 73 | +# sess.run(train_step, feed_dict={x: [X[j]], t: [Y[j]]}) |
| 74 | +""" batch-GD:each time calculate gradient of mean value of batch size of data |
| 75 | + as usual:we definate the sample number under 10 as mini-batch-GD. |
| 76 | + Code below as annotation """ |
| 77 | +# T = 30000 |
| 78 | +# b_size = 2 # parameter batch_size |
| 79 | +# for i in range(T): |
| 80 | +# b_idx = 0 # todo: batch counter, new form of counter! |
| 81 | +# while b_idx < X.shape[0]: |
| 82 | +# sess.run(train_step, feed_dict={x: X[b_idx: b_idx + b_size], |
| 83 | +# t: Y[b_idx: b_idx + b_size]}) |
| 84 | +# b_idx += b_size # refresh index of batch |
| 85 | + |
| 86 | +""" shuffle mode: a trick disorganizes the order of data to improve the effect of training """ |
| 87 | +def shufflelists(lists): |
| 88 | + """function for disorganizing the order of data |
| 89 | +
|
| 90 | + :param lists: [[[0, 0], [0, 1], [1, 0], [1, 1]],[[0], [1], [1], [0]]] |
| 91 | + :return: obvious shuffled list |
| 92 | + """ |
| 93 | + ri = np.random.permutation(len(lists[1])) |
| 94 | + out = [] |
| 95 | + for l in lists: |
| 96 | + out.append(l[ri]) |
| 97 | + return out |
| 98 | +T = 30000 |
| 99 | +b_size = 2 |
| 100 | +for i in range(T): |
| 101 | + b_idx = 0 |
| 102 | + X, Y = shufflelists([X, Y]) |
| 103 | + while b_idx < X.shape[0]: |
| 104 | + sess.run(train_step, feed_dict={x: X[b_idx: b_idx + b_size], |
| 105 | + t: Y[b_idx: b_idx + b_size]}) |
| 106 | + b_idx += b_size |
| 107 | +# check out prediction |
| 108 | +print sess.run(y, feed_dict={x: X}) |
| 109 | + |
| 110 | +# check out hidden layer |
| 111 | +print sess.run(act_h1, feed_dict={x: X}) |
| 112 | + |
| 113 | +# and any value could be examine by sess.run(*arg) |
| 114 | +print sess.run([W_h1, W_o]) |
| 115 | + |
| 116 | + |
| 117 | + |
0 commit comments