利用 TensorFlow 识别 MNIST 数据集
2018-02-08 本文已影响0人
拓季
刚刚接触 TensorFlow 一段时间,觉得了解的还太少,因此一直不愿意动笔写关于其应用的笔记。学习代码的一个很好的方法是对于同一个任务,采用多种不同的方法来实现,这样也可以更加直观的比较不同实现方式的优劣。这里列出几个利用 TensorFlow 搭建不同类型的神经网络来实现 MNIST 字体识别的代码,在此可以更加方便的对比不同网络的构建形式。为了便于理解,做了很多的注释,放在这里以备自己查看使用,代码版权归属于相应的作者。
对于图像数据首先想到的就是利用 CNN 进行处理,这里首先列出从头开始创建 CNN 的实现方式:
# this cell's code is adopted from Udacity
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("./mnist", one_hot=True, reshape=False)
# parameters
learning_rate = 0.00001
epochs = 10
batch_size = 128
# number of samples to calculate validation and accuracy
test_valid_size = 256
# network parameters
n_classes = 10
dropout = 0.75
# weights and biases
# the shape of the filter weight is (height, width, input_depth, output_depth)
weights = {
'wc1': tf.Variable(tf.random_normal([5, 5, 1, 32])),
'wc2': tf.Variable(tf.random_normal([5, 5, 32, 64])),
'wd1': tf.Variable(tf.random_normal([7*7*64, 1024])),
'out': tf.Variable(tf.random_normal([1024, n_classes]))}
# the shape of the filter bias is (output_depth,)
biases = {
'bc1': tf.Variable(tf.random_normal([32])),
'bc2': tf.Variable(tf.random_normal([64])),
'bd1': tf.Variable(tf.random_normal([1024])),
'out': tf.Variable(tf.random_normal([n_classes]))}
# stride for each dimension (batch_size, input_height, input_width, depth)
# generally always set the stride for batch and input_channels
# i.e. the first and fourth element in the strides array to be 1
# This ensures that the model uses all batches and input channels
# It's good practice to remove the batches or channels you want to skip
# from the data set rather than use a stride to skip them
#
# tf.nn.conv2d requires the input be 4D (batch_size, height, width, depth)
def conv2d(x, W, b, strides=1):
x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
x = tf.nn.bias_add(x, b)
# tf.add() doesn't work when the tensors aren't the same shape
return tf.nn.relu(x)
def maxpool2d(x, k=2):
return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME')
def conv_net(x, weights, biases, dropout):
# layer 1 - 28*28*1 to 14*14*32
conv1 = conv2d(x, weights['wc1'], biases['bc1'])
conv1 = maxpool2d(conv1, k=2)
# layer 2 - 14*14*32 to 7*7*64
conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
conv2 = maxpool2d(conv2, k=2)
# fully connected layer - 7*7*64 to 1024
# tensor.get_shape().as_list() will return the shape of the tensor as a list
fc1 = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]])
fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
fc1 = tf.nn.relu(fc1)
fc1 = tf.nn.dropout(fc1, dropout)
# output layer
out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
return out
# session
x = tf.placeholder(tf.float32, [None, 28, 28, 1])
y = tf.placeholder(tf.float32, [None, n_classes])
keep_prob = tf.placeholder(tf.float32)
# model
logits = conv_net(x, weights, biases, keep_prob)
# define loss and optimizer
cost = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\
.minimize(cost)
# accuracy
correct_pred = tf.equal(tf.argmax(logits, axis=1), tf.argmax(y, axis=1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# initializing the viriables
init = tf.global_variables_initializer()
# launch the graph
with tf.Session() as sess:
sess.run(init)
for epoch in range(epochs):
for batch in range(mnist.train.num_examples//batch_size):
batch_x, batch_y = mnist.train.next_batch(batch_size)
sess.run(optimizer, feed_dict={
x: batch_x,
y: batch_y,
keep_prob: dropout
})
# calculate batch loss and accuracy
loss = sess.run(cost, feed_dict={
x: batch_x,
y: batch_y,
keep_prob: 1.})
valid_acc = sess.run(accuracy, feed_dict={
x: mnist.validation.images[:test_valid_size],
y: mnist.validation.labels[:test_valid_size],
keep_prob: 1.})
print('Epoch {:>2}, Batch {:>3} - loss: {:>10.4f} Validation Accuracy: {:.6f}'.format(
epoch + 1,
batch + 1,
loss,
valid_acc))
test_acc = sess.run(accuracy, feed_dict={
x: mnist.test.images[:test_valid_size],
y: mnist.test.labels[:test_valid_size],
keep_prob: 1.})
print('Testing Accuracy: {}'.format(test_acc))
在 TensorFlow 中,还提供了一个更加方便的 tf.layers API,利用其来构建这个同样架构的 CNN 的代码如下:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("./mnist", one_hot=True, reshape=False)
# parameters
learning_rate = 0.001
epochs = 10
batch_size = 128
# number of samples to calculate validation and accuracy
test_valid_size = 256
# network parameters
n_classes = 10
dropout = tf.placeholder(tf.float32)
# Input and target placeholders
inputs_ = tf.placeholder(tf.float32, (None, 28, 28, 1))
targets_ = tf.placeholder(tf.float32)
# build the conv2d graph with tf.layers.conv2d and tf.layers.max_pooling2d
# layer 1 - 28*28*1 to 14*14*32
conv1 = tf.layers.conv2d(inputs_, 32, (5, 5), padding='same', activation=tf.nn.relu)
maxpool1 = tf.layers.max_pooling2d(conv1, (2, 2), (2, 2))
# layer 2 - 14*14*32 to 7*7*64
conv2 = tf.layers.conv2d(maxpool1, 64, (5, 5), padding='same', activation=tf.nn.relu)
maxpool2 = tf.layers.max_pooling2d(conv2, (2, 2), (2, 2))
# Fully connected layer
flattened = tf.reshape(maxpool2, [-1, 7*7*64])
fc1 = tf.layers.dense(flattened, units=1024, activation=tf.nn.relu)
fc1 = tf.layers.dropout(fc1, rate=dropout)
# output logits
logits = tf.layers.dense(fc1, units=n_classes)
# define loss and optimizer
cost = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=targets_))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\
.minimize(cost)
# accuracy
correct_pred = tf.equal(tf.argmax(logits, axis=1), tf.argmax(targets_, axis=1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# initializing the viriables
init = tf.global_variables_initializer()
# launch the graph
with tf.Session() as sess:
sess.run(init)
for epoch in range(epochs):
for batch in range(mnist.train.num_examples//batch_size):
batch_x, batch_y = mnist.train.next_batch(batch_size)
sess.run(optimizer, feed_dict={
inputs_: batch_x,
targets_: batch_y,
dropout: 0.75})
# calculate batch loss and accuracy
loss = sess.run(cost, feed_dict={
inputs_: batch_x,
targets_: batch_y,
dropout: 1.})
valid_acc = sess.run(accuracy, feed_dict={
inputs_: mnist.validation.images[:test_valid_size],
targets_: mnist.validation.labels[:test_valid_size],
dropout: 1.})
print('Epoch {:>2}, Batch {:>3} - loss: {:>10.4f} Validation Accuracy: {:.6f}'.format(
epoch + 1,
batch + 1,
loss,
valid_acc))
test_acc = sess.run(accuracy, feed_dict={
inputs_: mnist.test.images[:test_valid_size],
targets_: mnist.test.labels[:test_valid_size],
dropout: 1.})
print('Testing Accuracy: {}'.format(test_acc))
为了便于对比,在此给出利用 TensorFlow 搭建一个标准的多层神经网络 Standard neural network 来完成同样识别任务的代码,这个 SNN 的最终识别率为 82%,如果可以无障碍的阅读这两段代码,那么对于 TensorFlow 的基本使用也就算是清楚了,代码版权依然归属于 Udacity。
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
mnist = input_data.read_data_sets(".", one_hot=True, reshape=False)
# Parameters
learning_rate = 0.001
training_epochs = 20
batch_size = 128
display_step = 1
n_input = 784 # MNIST data input (img shape: 28*28)
n_classes = 10 # MNIST total classes (0-9 digits)
n_hidden_layer = 256 # layer number of features
# Store layers weight & bias
weights = {
'hidden_layer': tf.Variable(tf.random_normal([n_input, n_hidden_layer])),
'out': tf.Variable(tf.random_normal([n_hidden_layer, n_classes]))
}
biases = {
'hidden_layer': tf.Variable(tf.random_normal([n_hidden_layer])),
'out': tf.Variable(tf.random_normal([n_classes]))
}
# tf Graph input
x = tf.placeholder("float", [None, 28, 28, 1])
y = tf.placeholder("float", [None, n_classes])
x_flat = tf.reshape(x, [-1, n_input])
# Hidden layer with RELU activation
layer_1 = tf.add(tf.matmul(x_flat, weights['hidden_layer']), biases['hidden_layer'])
layer_1 = tf.nn.relu(layer_1)
# Output layer with linear activation
logits = tf.matmul(layer_1, weights['out']) + biases['out']
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)
# Initializing the variables
init = tf.global_variables_initializer()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
# Training cycle
for epoch in range(training_epochs):
total_batch = int(mnist.train.num_examples/batch_size)
# Loop over all batches
for i in range(total_batch):
batch_x, batch_y = mnist.train.next_batch(batch_size)
# Run optimization op (backprop) and cost op (to get loss value)
sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
# Display logs per epoch step
if epoch % display_step == 0:
c = sess.run(cost, feed_dict={x: batch_x, y: batch_y})
print("Epoch:", '%04d' % (epoch+1), "cost=", \
"{:.9f}".format(c))
print("Optimization Finished!")
# Test model
correct_prediction = tf.equal(tf.argmax(logits, axis=1), tf.argmax(y, axis=1))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
# Decrease test_size if you don't have enough memory
test_size = 256
print("Accuracy:", accuracy.eval({x: mnist.test.images[:test_size], y: mnist.test.labels[:test_size]}))
除了使用 CNN 之外,利用 RNN 也同样可以识别 MNIST 数据集,相应的代码如下:
# TensorFlow for RNN
# this cell's code is adopted from
# https://jasdeep06.github.io/posts/Understanding-LSTM-in-Tensorflow-MNIST/
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("./mnist", one_hot=True)
# define constant
# the 28 x 28 lengh input data is unrolled into 28 time steps
time_steps = 28
# hidden LSTM units
num_units = 128
# each input is a row of 28 pixels
input_size = 28
# learning rate for Adam
learning_rate = 0.001
# there are 10 classes in the labels
n_classes = 10
# batch_size
batch_size = 128
# weights and biases for output layer
out_weights = tf.Variable(tf.random_normal([num_units, n_classes]))
out_bias = tf.Variable(tf.random_normal([n_classes]))
# defining inputs and labels placeholders
x = tf.placeholder(tf.float32, [None, time_steps, input_size])
y = tf.placeholder(tf.float32, [None, n_classes])
# processing the input tensor from [batch_size, time_steps, n_input] to
# a 'time_steps' length list of [batch_size, n_input] tensors
inputs = tf.unstack(x, time_steps, 1)
# defining the network
lstm_layer = tf.contrib.rnn.BasicLSTMCell(num_units, forget_bias=1)
outputs, _ = tf.contrib.rnn.static_rnn(lstm_layer, inputs, dtype=tf.float32)
# converting last output of dimension [batch_size, num_units] to
# [batch_size, n_classes] with matrix multplication
prediction = tf.matmul(outputs[-1], out_weights) + out_bias
# defining loss and optimization
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)
# model evaluation
correct_prediction = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# train the model
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
iter = 1
while iter < 800:
batch_x, batch_y = mnist.train.next_batch(batch_size=batch_size)
batch_x = batch_x.reshape((batch_size, time_steps, input_size))
sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
if iter % 10 == 0:
acc = sess.run(accuracy, feed_dict={x: batch_x, y: batch_y})
losses = sess.run(loss, feed_dict={x: batch_x, y: batch_y})
print("For iter ", iter)
print("Accuracy ", accuracy)
print("Loss ", losses)
print("___________________")
iter += 1
test_data = mnist.test.images[:128].reshape((-1, time_steps, input_size))
test_label = mnist.test.labels[:128]
print("Test Accuracy ", sess.run(accuracy, feed_dict={x: test_data, y: test_label}))
按照代码中的参数设定,最终的识别率居然到了 96%,RNN 果然无所不能。