TensorFlow实战 | 基于多层LSTM的手写体识别
2018-07-08 本文已影响35人
简书已注销
【下载MNIST数据集】
import tensorflow as tf
import numpy as np
from tensorflow.contrib import rnn
from tensorflow.examples.tutorials.mnist import input_data
# 设置 GPU 按需增长
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
# 首先导入数据,看一下数据的形式
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
print(mnist.train.images.shape)
下载完成后,可以看见项目的文件夹中多了一个MNIST_data的目录,即为MNIST的训练集。
![](https://img.haomeiwen.com/i2145769/7690d86b1cdb0ece.png)
【配置数据】
# settings
lr = 1e-3
# 在训练和测试的时候,我们想用不同的 batch_size.所以采用占位符的方式
batch_size = tf.placeholder(tf.int32, []) # 注意类型必须为 tf.int32
# 一张图片大小的是 28x28,那么我们是每个时刻输入的特征是28维,每个时刻输入一行,一行有28个像素
input_size = 28
# 时序持续长度为28,即每做一次预测,需要先输入28行
timestep_size = 28
# 每个隐含层的节点数
hidden_size = 256
# LSTM layer 的层数
layer_num = 2
# 最后输出分类类别的数量,如果是回归预测的话应该是1
class_num = 10
_X = tf.placeholder(tf.float32, [None, 784])
y = tf.placeholder(tf.float32, [None, class_num])
keep_prob = tf.placeholder(tf.float32)
【定义多层LSTM网络】
def LSTM_NET():
# 将784个点的字符信息还原成28 * 28 的图片
X = tf.reshape(_X, [-1, 28, 28])
def lstm_cell(hidden_size, keep_prob):
cell = rnn.LSTMCell(hidden_size, reuse=tf.get_variable_scope().reuse)
return rnn.DropoutWrapper(cell, output_keep_prob=keep_prob)
# 调用MultiRNNCell实现多层LSTM
mlstm_cell = rnn.MultiRNNCell([lstm_cell(hidden_size, keep_prob) for _ in range(layer_num)], state_is_tuple=True)
# 用全零来初始化state
init_state = mlstm_cell.zero_state(batch_size, dtype=tf.float32)
# 方法一:调用dynamic_rnn() 让网络运行
##############################
outputs, state = tf.nn.dynamic_rnn(mlstm_cell, inputs=X, initial_state=init_state, time_major=False)
h_state = outputs[:, -1, :]
##############################
# 方法二:自定义按时间步展开计算
# outputs = list()
# state = init_state
# with tf.variable_scope('RNN'):
# for timestep in range(timestep_size):
# if timestep > 0:
# tf.get_variable_scope().reuse_variables()
# (cell_output, state) = mlstm_cell(X[:, timestep, :], state)
# outputs.append(cell_output)
# h_state = outputs[-1]
return h_state
net_out = LSTM_NET()
【开始训练】
# LSTM 部分的输出是一个[hidden_size] 的 tensor,我们要进行分类,需要接一个softmax层
# 首先定义 softmax 的连接权重矩阵和偏置
# out_W = tf.placeholder(tf.float32, [hidden_size, class_num], name='out_Weights')
# out_bias = tf.placeholder(tf.float32, [class_num], name='out_bias')
# 训练
W = tf.Variable(tf.truncated_normal([hidden_size, class_num], name='out_Weights'))
bias = tf.Variable(tf.constant(0.1, shape=[class_num]), dtype=tf.float32)
y_pre = tf.nn.softmax(tf.matmul(net_out, W) + bias)
# 损失和评估函数
cross_entropy = -tf.reduce_mean(y * tf.log(y_pre))
train_op = tf.train.AdamOptimizer(lr).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_pre, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
sess.run(tf.global_variables_initializer())
for i in range(2000):
_batch_size = 128
batch = mnist.train.next_batch(_batch_size)
if (i+1) % 200 == 0:
train_accuracy = sess.run(accuracy, feed_dict={_X: batch[0], y: batch[1], keep_prob: 1.0, batch_size: _batch_size})
# 已经迭代完成的 epoch 数: mnist.train.epochs_completed
print("Iter%d, step %d, training accuracy %g" % (mnist.train.epochs_completed, (i + 1), train_accuracy))
sess.run(train_op, feed_dict={_X: batch[0], y: batch[1], keep_prob: 0.5, batch_size: _batch_size})
# 计算测试数据的准确率
print("test accuracy %g"% sess.run(accuracy, feed_dict={
_X: mnist.test.images, y: mnist.test.labels, keep_prob: 1.0, batch_size:mnist.test.images.shape[0]}))
【训练效果】
![](https://img.haomeiwen.com/i2145769/f5f8d2f2c1ba4209.png)
【代码】
【感谢】
感谢每一个读到这里的朋友,如有疑问,请在下方留言与评论,或者发到我的邮箱,互相学习,互相分享,走过路过,点个赞呗😀