(三) tensorflow--alexnet训练mnist

2019-03-24 本文已影响0人计算机视觉__掉队选手

Alexnet是2012年的一个非常出色的神经网络，是2012年图像识别大赛的冠军。该模型一共有8层，前五层为卷基层，用于提取图像特征，后三层为全接连层，用于对图像进行分类。

image

该模型的主要特点如下：
1.提出了ReLU激励函数，可以减少梯度消失的风险；
2.池化层用于特征降维
3.局部归一化处理（LRN）
下面还是在MNIST数据集上，使用alexnet网络进行训练，得到分类的结果。

导入数据

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data",one_hot=True)

初始化参数

leaning_rate = 0.001
num_steps = 5000
batch_size = 128
display_step = 10
num_input = 784
num_classes = 10
dropout = 0.75

定义输入输出

X = tf.placeholder(tf.float32,[None,num_input])
Y = tf.placeholder(tf.float32,[None,num_classes])
keep_prob = tf.placeholder(tf.float32)

定义卷积、池化函数

def conv2d(x,W,b,strides=1):
    x = tf.nn.conv2d(x,W,strides=[1,strides,strides,1],padding='SAME')
    x = tf.nn.bias_add(x,b)
    return tf.nn.relu(x)
def maxpool2d(x,k=2):
    return tf.nn.max_pool(x,ksize=[1,k,k,1],strides=[1,k,k,1],padding='SAME')

定义网络架构

def alex_net(x,weights,bias,dropout):
    x = tf.reshape(x,[-1,28,28,1])
    conv1 = conv2d(x,weights['wc1'],bias['bc1'])
    conv1 = maxpool2d(conv1,k=2)
    conv2 = conv2d(conv1, weights['wc2'], bias['bc2'])
    conv2 = maxpool2d(conv2, k=2)
    conv3 = conv2d(conv2, weights['wc3'], bias['bc3'])
    conv3 = maxpool2d(conv3, k=2)
    fc1 = tf.reshape(conv3,[-1,weights['wd1'].get_shape().as_list()[0]])
    fc1 = tf.nn.relu(tf.matmul(fc1,weights['wd1'])+bias['bd1'],name='fc1')
    fc2 = tf.nn.relu(tf.matmul(fc1,weights['wd2'])+bias['bd2'],name='fc2')
    out = tf.matmul(fc2,weights['out']+bias['out'])
    return out
weights={
    'wc1': tf.Variable(tf.random_normal([3, 3, 1, 64])),
    'wc2': tf.Variable(tf.random_normal([3, 3, 64, 128])),
    'wc3': tf.Variable(tf.random_normal([3, 3, 128, 256])),
    'wd1': tf.Variable(tf.random_normal([4*4*256, 1024])),
    'wd2': tf.Variable(tf.random_normal([1024, 1024])),
    'out': tf.Variable(tf.random_normal([1024, num_classes]))
}
bias={
    'bc1': tf.Variable(tf.random_normal([64])),
    'bc2': tf.Variable(tf.random_normal([128])),
    'bc3': tf.Variable(tf.random_normal([256])),
    'bd1': tf.Variable(tf.random_normal([1024])),
    'bd2': tf.Variable(tf.random_normal([1024])),
    'out': tf.Variable(tf.random_normal([num_classes]))
}

定义损失函数和梯度下降方式

loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
    logits=logits, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=leaning_rate)
train_op = optimizer.minimize(loss_op)

模型评价

correct_pred = tf.equal(tf.argmax(prediction, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_p

训练模型

init = tf.global_variables_initializer()

# Start training
with tf.Session() as sess:

    # Run the initializer
    sess.run(init)

    for step in range(1, num_steps+1):
        batch_x, batch_y = mnist.train.next_batch(batch_size)
        # Run optimization op (backprop)
        sess.run(train_op, feed_dict={X: batch_x, Y: batch_y, keep_prob: 0.8})
        if step % display_step == 0 :
            # Calculate batch loss and accuracy
            loss, acc = sess.run([loss_op, accuracy], feed_dict={X: batch_x,
                                                                 Y: batch_y,
                                                                 keep_prob: 1.0})
            print("Step " + str(step) + ", Minibatch Loss= " + \
                  "{:.4f}".format(loss) + ", Training Accuracy= " + \
                  "{:.3f}".format(acc))

    print("Optimization Finished!")

    # Calculate accuracy for 256 MNIST test images
    print("Testing Accuracy:", \
        sess.run(accuracy, feed_dict={X: mnist.test.images[:256],
                                      Y: mnist.test.labels[:256],
                                      keep_prob: 1.0}))