利用 TensorFlow 识别 MNIST 数据集

2018-02-08 本文已影响0人拓季

刚刚接触 TensorFlow 一段时间，觉得了解的还太少，因此一直不愿意动笔写关于其应用的笔记。学习代码的一个很好的方法是对于同一个任务，采用多种不同的方法来实现，这样也可以更加直观的比较不同实现方式的优劣。这里列出几个利用 TensorFlow 搭建不同类型的神经网络来实现 MNIST 字体识别的代码，在此可以更加方便的对比不同网络的构建形式。为了便于理解，做了很多的注释，放在这里以备自己查看使用，代码版权归属于相应的作者。

对于图像数据首先想到的就是利用 CNN 进行处理，这里首先列出从头开始创建 CNN 的实现方式：

# this cell's code is adopted from Udacity
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

mnist = input_data.read_data_sets("./mnist", one_hot=True, reshape=False)

# parameters
learning_rate = 0.00001
epochs = 10
batch_size = 128

# number of samples to calculate validation and accuracy
test_valid_size = 256

# network parameters
n_classes = 10
dropout = 0.75

# weights and biases
# the shape of the filter weight is (height, width, input_depth, output_depth)
weights = {
    'wc1': tf.Variable(tf.random_normal([5, 5, 1, 32])),
    'wc2': tf.Variable(tf.random_normal([5, 5, 32, 64])),
    'wd1': tf.Variable(tf.random_normal([7*7*64, 1024])),
    'out': tf.Variable(tf.random_normal([1024, n_classes]))}

# the shape of the filter bias is (output_depth,)
biases = {
    'bc1': tf.Variable(tf.random_normal([32])),
    'bc2': tf.Variable(tf.random_normal([64])),
    'bd1': tf.Variable(tf.random_normal([1024])),
    'out': tf.Variable(tf.random_normal([n_classes]))}

# stride for each dimension (batch_size, input_height, input_width, depth)
# generally always set the stride for batch and input_channels
# i.e. the first and fourth element in the strides array to be 1
# This ensures that the model uses all batches and input channels
# It's good practice to remove the batches or channels you want to skip
# from the data set rather than use a stride to skip them
#
# tf.nn.conv2d requires the input be 4D (batch_size, height, width, depth)
def conv2d(x, W, b, strides=1):
    x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
    x = tf.nn.bias_add(x, b)
    # tf.add() doesn't work when the tensors aren't the same shape
    return tf.nn.relu(x)


def maxpool2d(x, k=2):
    return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME')


def conv_net(x, weights, biases, dropout):
    # layer 1 - 28*28*1 to 14*14*32
    conv1 = conv2d(x, weights['wc1'], biases['bc1'])
    conv1 = maxpool2d(conv1, k=2)

    # layer 2 - 14*14*32 to 7*7*64
    conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
    conv2 = maxpool2d(conv2, k=2)

    # fully connected layer - 7*7*64 to 1024
    # tensor.get_shape().as_list() will return the shape of the tensor as a list
    fc1 = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]])
    fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
    fc1 = tf.nn.relu(fc1)
    fc1 = tf.nn.dropout(fc1, dropout)

    # output layer
    out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
    return out


# session
x = tf.placeholder(tf.float32, [None, 28, 28, 1])
y = tf.placeholder(tf.float32, [None, n_classes])
keep_prob = tf.placeholder(tf.float32)

# model
logits = conv_net(x, weights, biases, keep_prob)

# define loss and optimizer
cost = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\
    .minimize(cost)

# accuracy
correct_pred = tf.equal(tf.argmax(logits, axis=1), tf.argmax(y, axis=1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# initializing the viriables
init = tf.global_variables_initializer()

# launch the graph
with tf.Session() as sess:
    sess.run(init)

    for epoch in range(epochs):
        for batch in range(mnist.train.num_examples//batch_size):
            batch_x, batch_y = mnist.train.next_batch(batch_size)
            sess.run(optimizer, feed_dict={
                x: batch_x,
                y: batch_y,
                keep_prob: dropout
            })

            # calculate batch loss and accuracy
            loss = sess.run(cost, feed_dict={
                x: batch_x,
                y: batch_y,
                keep_prob: 1.})
            valid_acc = sess.run(accuracy, feed_dict={
                x: mnist.validation.images[:test_valid_size],
                y: mnist.validation.labels[:test_valid_size],
                keep_prob: 1.})

            print('Epoch {:>2}, Batch {:>3} - loss: {:>10.4f} Validation Accuracy: {:.6f}'.format(
                  epoch + 1,
                  batch + 1,
                  loss,
                  valid_acc))

        test_acc = sess.run(accuracy, feed_dict={
            x: mnist.test.images[:test_valid_size],
            y: mnist.test.labels[:test_valid_size],
            keep_prob: 1.})
        print('Testing Accuracy: {}'.format(test_acc))

在 TensorFlow 中，还提供了一个更加方便的 tf.layers API，利用其来构建这个同样架构的 CNN 的代码如下：

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

mnist = input_data.read_data_sets("./mnist", one_hot=True, reshape=False)

# parameters
learning_rate = 0.001
epochs = 10
batch_size = 128

# number of samples to calculate validation and accuracy
test_valid_size = 256

# network parameters
n_classes = 10
dropout = tf.placeholder(tf.float32)

# Input and target placeholders
inputs_ = tf.placeholder(tf.float32, (None, 28, 28, 1))
targets_ = tf.placeholder(tf.float32)

# build the conv2d graph with tf.layers.conv2d and tf.layers.max_pooling2d
# layer 1 - 28*28*1 to 14*14*32
conv1 = tf.layers.conv2d(inputs_, 32, (5, 5), padding='same', activation=tf.nn.relu)
maxpool1 = tf.layers.max_pooling2d(conv1, (2, 2), (2, 2))

# layer 2 - 14*14*32 to 7*7*64
conv2 = tf.layers.conv2d(maxpool1, 64, (5, 5), padding='same', activation=tf.nn.relu)
maxpool2 = tf.layers.max_pooling2d(conv2, (2, 2), (2, 2))

# Fully connected layer
flattened = tf.reshape(maxpool2, [-1, 7*7*64])
fc1 = tf.layers.dense(flattened, units=1024, activation=tf.nn.relu)
fc1 = tf.layers.dropout(fc1, rate=dropout)

# output logits
logits = tf.layers.dense(fc1, units=n_classes)

# define loss and optimizer
cost = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=targets_))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\
    .minimize(cost)

# accuracy
correct_pred = tf.equal(tf.argmax(logits, axis=1), tf.argmax(targets_, axis=1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# initializing the viriables
init = tf.global_variables_initializer()

# launch the graph
with tf.Session() as sess:
    sess.run(init)

    for epoch in range(epochs):
        for batch in range(mnist.train.num_examples//batch_size):
            batch_x, batch_y = mnist.train.next_batch(batch_size)
            sess.run(optimizer, feed_dict={
                inputs_: batch_x,
                targets_: batch_y,
                dropout: 0.75})

            # calculate batch loss and accuracy
            loss = sess.run(cost, feed_dict={
                inputs_: batch_x,
                targets_: batch_y,
                dropout: 1.})
            valid_acc = sess.run(accuracy, feed_dict={
                inputs_: mnist.validation.images[:test_valid_size],
                targets_: mnist.validation.labels[:test_valid_size],
                dropout: 1.})

            print('Epoch {:>2}, Batch {:>3} - loss: {:>10.4f} Validation Accuracy: {:.6f}'.format(
                  epoch + 1,
                  batch + 1,
                  loss,
                  valid_acc))

        test_acc = sess.run(accuracy, feed_dict={
            inputs_: mnist.test.images[:test_valid_size],
            targets_: mnist.test.labels[:test_valid_size],
            dropout: 1.})
        print('Testing Accuracy: {}'.format(test_acc))

为了便于对比，在此给出利用 TensorFlow 搭建一个标准的多层神经网络 Standard neural network 来完成同样识别任务的代码，这个 SNN 的最终识别率为 82%，如果可以无障碍的阅读这两段代码，那么对于 TensorFlow 的基本使用也就算是清楚了，代码版权依然归属于 Udacity。

from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf

mnist = input_data.read_data_sets(".", one_hot=True, reshape=False)

# Parameters
learning_rate = 0.001
training_epochs = 20
batch_size = 128 
display_step = 1

n_input = 784  # MNIST data input (img shape: 28*28)
n_classes = 10  # MNIST total classes (0-9 digits)

n_hidden_layer = 256 # layer number of features

# Store layers weight & bias
weights = {
    'hidden_layer': tf.Variable(tf.random_normal([n_input, n_hidden_layer])),
    'out': tf.Variable(tf.random_normal([n_hidden_layer, n_classes]))
}
biases = {
    'hidden_layer': tf.Variable(tf.random_normal([n_hidden_layer])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}

# tf Graph input
x = tf.placeholder("float", [None, 28, 28, 1])
y = tf.placeholder("float", [None, n_classes])

x_flat = tf.reshape(x, [-1, n_input])

# Hidden layer with RELU activation
layer_1 = tf.add(tf.matmul(x_flat, weights['hidden_layer']), biases['hidden_layer'])
layer_1 = tf.nn.relu(layer_1)
# Output layer with linear activation
logits = tf.matmul(layer_1, weights['out']) + biases['out']

# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)

# Initializing the variables
init = tf.global_variables_initializer()

# Launch the graph
with tf.Session() as sess:
    sess.run(init)
    # Training cycle
    for epoch in range(training_epochs):
        total_batch = int(mnist.train.num_examples/batch_size)
        # Loop over all batches
        for i in range(total_batch):
            batch_x, batch_y = mnist.train.next_batch(batch_size)
            # Run optimization op (backprop) and cost op (to get loss value)
            sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
        # Display logs per epoch step
        if epoch % display_step == 0:
            c = sess.run(cost, feed_dict={x: batch_x, y: batch_y})
            print("Epoch:", '%04d' % (epoch+1), "cost=", \
                "{:.9f}".format(c))
    print("Optimization Finished!")

    # Test model
    correct_prediction = tf.equal(tf.argmax(logits, axis=1), tf.argmax(y, axis=1))
    # Calculate accuracy
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    # Decrease test_size if you don't have enough memory
    test_size = 256
    print("Accuracy:", accuracy.eval({x: mnist.test.images[:test_size], y: mnist.test.labels[:test_size]}))

除了使用 CNN 之外，利用 RNN 也同样可以识别 MNIST 数据集，相应的代码如下：

# TensorFlow for RNN
# this cell's code is adopted from 
# https://jasdeep06.github.io/posts/Understanding-LSTM-in-Tensorflow-MNIST/

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("./mnist", one_hot=True)

# define constant
# the 28 x 28 lengh input data is unrolled into 28 time steps
time_steps = 28
# hidden LSTM units
num_units = 128
# each input is a row of 28 pixels
input_size = 28
# learning rate for Adam
learning_rate = 0.001
# there are 10 classes in the labels
n_classes = 10
# batch_size
batch_size = 128

# weights and biases for output layer
out_weights = tf.Variable(tf.random_normal([num_units, n_classes]))
out_bias = tf.Variable(tf.random_normal([n_classes]))

# defining inputs and labels placeholders
x = tf.placeholder(tf.float32, [None, time_steps, input_size])
y = tf.placeholder(tf.float32, [None, n_classes])

# processing the input tensor from [batch_size, time_steps, n_input] to 
# a 'time_steps' length list of [batch_size, n_input] tensors
inputs = tf.unstack(x, time_steps, 1)

# defining the network
lstm_layer = tf.contrib.rnn.BasicLSTMCell(num_units, forget_bias=1)
outputs, _ = tf.contrib.rnn.static_rnn(lstm_layer, inputs, dtype=tf.float32)

# converting last output of dimension [batch_size, num_units] to 
# [batch_size, n_classes] with matrix multplication
prediction = tf.matmul(outputs[-1], out_weights) + out_bias

# defining loss and optimization
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)

# model evaluation
correct_prediction = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

# train the model
init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    iter = 1
    while iter < 800:
        batch_x, batch_y = mnist.train.next_batch(batch_size=batch_size)
        batch_x = batch_x.reshape((batch_size, time_steps, input_size))
        sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
        
        if iter % 10 == 0:
            acc = sess.run(accuracy, feed_dict={x: batch_x, y: batch_y})
            losses = sess.run(loss, feed_dict={x: batch_x, y: batch_y})
            print("For iter ", iter)
            print("Accuracy ", accuracy)
            print("Loss ", losses)
            print("___________________")
        iter += 1
        
        test_data = mnist.test.images[:128].reshape((-1, time_steps, input_size))
        test_label = mnist.test.labels[:128]
        print("Test Accuracy ", sess.run(accuracy, feed_dict={x: test_data, y: test_label}))

按照代码中的参数设定，最终的识别率居然到了 96%，RNN 果然无所不能。

参考阅读

Understanding LSTM in Tensorflow(MNIST dataset)

利用 TensorFlow 识别 MNIST 数据集

参考阅读

猜你喜欢

热点阅读