卷积神经网络-LeNet5、AlexNet、VGGNet、Goo

2020-01-06  本文已影响0人  李涛AT北京

卷积神经网络

1. 预备知识

1.1 神经网络中为什么要标准化

1.2 什么是 LRN

1.3 BN

1.4 1*1卷积的作用

2. 经典卷积神经网络原理

2.1 LeNet5

2.2 AlexNet

AlexNet网络详细结构

2.3 VGG

2.4 GoogLeNet

2.5 ResNet

2.5.1 背景

2.5.3 网络结构

优点

tensorflow2 代码实现

关于网络的实现,网上有很多代码,大家可以根据不同的框架和编码习惯,挑选一篇。

# TensorFlow实现LeNet

import tensorflow as tf
def inference(inputs):
    # input shape: [batch, height, width, 1]
    with tf.variable_scope('conv1'):
        weights = tf.Variable(tf.truncated_normal([5, 5, 1, 6], stddev=0.1))
        biases = tf.Variable(tf.zeros([6]))
        conv1 = tf.nn.conv2d(inputs, weights, strides=[1, 1, 1, 1], padding='VALID')
        conv1 = tf.nn.relu(tf.nn.bias_add(conv1, biases))
        maxpool2 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1])
    with tf.variable_scope('conv3'):
        weights = tf.Variable(tf.truncated_normal([5, 5, 6, 16], stddev=0.1))
        biases = tf.Variable(tf.zeros([16]))
        conv3 = tf.nn.conv2d(maxpool2, weights, strides=[1, 1, 1, 1])
        conv3 = tf.nn.relu(tf.nn.bias_add(conv3, biases))
        maxpool4 = tf.nn.max_pool(conv3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1])
    with tf.variable_scope('conv5'):
        weights = tf.Variable(tf.truncated_normal([5, 5, 6, 16], stddev=0.1))
        biases = tf.Variable(tf.zeros([16]))
        conv5 = tf.nn.conv2d(maxpool4, weights, strides=[1, 1, 1, 1])
        conv5 = tf.nn.relu(tf.nn.bias_add(conv5, biases))
    with tf.variable_scope('fc6'):
        flat = tf.reshape(conv5, [-1, 120])
        weights = tf.Variable(tf.truncated_normal([120, 84], stddev=0.1))
        biases = tf.Variable(tf.zeros([84]))
        fc6 = tf.nn.matmul(flat, weights) + biases
        fc6 = tf.nn.relu(fc6)
    with tf.variable_scope('fc7'):
        weights = tf.Variable(tf.truncated_normal([84, 10], stddev=0.1))
        biases = tf.Variable(tf.zeros([10]))
        fc7 = tf.nn.matmul(fc6, weights) + biases
        fc7 = tf.nn.softmax(fc7)
    return fc7
"""
tf2 搭建ResNet AlexNet 实战
"""

import tensorflow as tf
from tensorflow.keras.datasets import fashion_mnist
from tensorflow import data as tfdata
import numpy as np

# 将 GPU 的显存使用策略设置为 “仅在需要时申请显存空间”。
for gpu in tf.config.experimental.list_physical_devices('GPU'):
    tf.config.experimental.set_memory_growth(gpu, True)

# 1、读取数据
'''由于Imagenet数据集是一个比较庞大的数据集,且网络的输入为224*224,所以,我们定义一个方法,
来读取数据集并将数据resize到224*224的大小'''

class Data_load():
    def __init__(self):
        fashion_mnist = tf.keras.datasets.fashion_mnist
        (self.train_images, self.train_labels), (self.test_images, self.test_labels)\
            = fashion_mnist.load_data()
        # 数据维度扩充成[n,h,w,c]的模式
        self.train_images = np.expand_dims(self.train_images.astype(np.float32) / 255.0, axis=-1)
        self.test_images = np.expand_dims(self.test_images.astype(np.float32)/255.0,axis=-1)
        # 标签
        self.train_labels = self.train_labels.astype(np.int32)
        self.test_labels = self.test_labels.astype(np.int32)
        # 训练和测试的数据个数
        self.num_train, self.num_test = self.train_images.shape[0], self.test_images.shape[0]
    def get_train_batch(self,batch_size):
        # 随机取batch_size个索引
        index = np.random.randint(0, np.shape(self.train_images)[0], batch_size)
        # resize
        resized_images = tf.image.resize_with_pad(self.train_images[index], 224, 224 )
        return resized_images.numpy(), self.train_labels[index]
    def get_test_batch(self,batch_size):
        index = np.random.randint(0, np.shape(self.test_images)[0], batch_size)
        # resize
        resized_images = tf.image.resize_with_pad(self.test_images[index], 224, 224 )
        return resized_images.numpy(), self.test_labels[index]

# 2、定义模型
def MyAlexNet():
    net=tf.keras.Sequential()
    net.add(tf.keras.layers.Conv2D(96,11,(4,4),"same",activation="relu"))
    net.add(tf.keras.layers.MaxPool2D(pool_size=3, strides=2))
    net.add(tf.keras.layers.Conv2D(filters=256, kernel_size=5, padding='same', activation='relu'))
    net.add(tf.keras.layers.MaxPool2D(pool_size=3, strides=2))
    net.add(tf.keras.layers.Conv2D(filters=384, kernel_size=3, padding='same', activation='relu'))
    net.add(tf.keras.layers.Conv2D(filters=384, kernel_size=3, padding='same', activation='relu'))
    net.add(tf.keras.layers.Conv2D(filters=256, kernel_size=3, padding='same', activation='relu'))
    net.add(tf.keras.layers.MaxPool2D(pool_size=3, strides=2))
    net.add(tf.keras.layers.Flatten())
    net.add(tf.keras.layers.Dense(512, activation='relu'))# 为了方便训练 神经元个数改小,原来是1024
    net.add(tf.keras.layers.Dropout(0.5))
    net.add(tf.keras.layers.Dense(256, activation='relu'))# 为了方便训练 神经元个数改小,原来是1024
    net.add(tf.keras.layers.Dropout(0.5))
    net.add(tf.keras.layers.Dense(10, activation='sigmoid'))

    return net


def train(num_epoches,net):
    optimizer = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.0, nesterov=False)
    net.compile(optimizer=optimizer,
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    num_iter = data_load.num_train // batch_size
    for e in range(num_epoches):
        for n in range(num_iter):
            x_batch, y_batch = data_load.get_train_batch(batch_size)
            test_x_batch, test_y_batch = data_load.get_test_batch(batch_size)
            net.fit(x_batch, y_batch,validation_data=(test_x_batch, test_y_batch))


if __name__ == '__main__':
    # 加载数据
    batch_size=64
    data_load=Data_load()
    x_train_batch,y_train_batch=data_load.get_train_batch(batch_size)
    print("x_batch shape:",x_train_batch.shape,"y_batch shape:", y_train_batch.shape)
    
    # 加载网络结构
    net=MyAlexNet()
    X = tf.random.uniform((1,224,224,1))
    for layer in net.layers:
        X = layer(X)
        print(layer.name, 'output shape\t', X.shape)
        
    # 训练
    num_epoches = 1
    train(num_epoches, net)

"""
tf2   VGG13 实战
"""

import tensorflow as tf
from tensorflow.keras import layers, optimizers, datasets, Sequential
import os

os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
tf.random.set_seed(1)

# 定义卷积层
conv_layers = [ # 5 units of conv + max pooling
    # unit 1
    layers.Conv2D(64, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
    layers.Conv2D(64, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
    layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same'),

    # unit 2
    layers.Conv2D(128, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
    layers.Conv2D(128, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
    layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same'),

    # unit 3
    layers.Conv2D(256, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
    layers.Conv2D(256, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
    layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same'),

    # unit 4
    layers.Conv2D(512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
    layers.Conv2D(512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
    layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same'),

    # unit 5
    layers.Conv2D(512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
    layers.Conv2D(512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
    layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same')
]

# 定义全连接层
fc_layers = [
    layers.Dense(256, activation=tf.nn.relu),
    layers.Dense(128, activation=tf.nn.relu),
    layers.Dense(100, activation=None),
]

def preprocess(x, y):
    # [0~1]
    x = tf.cast(x, dtype=tf.float32) / 255.
    y = tf.cast(y, dtype=tf.int32)
    return x,y


def main():

    # [b, 32, 32, 3] => [b, 1, 1, 512]
    conv_net = Sequential(conv_layers)
    fc_net = Sequential(fc_layers)

    conv_net.build(input_shape=[None, 32, 32, 3])
    fc_net.build(input_shape=[None, 512])
    optimizer = optimizers.Adam(lr=1e-4)

    # [1, 2] + [3, 4] => [1, 2, 3, 4]
    variables = conv_net.trainable_variables + fc_net.trainable_variables

    for epoch in range(2):

        for step, (x,y) in enumerate(train_db):

            with tf.GradientTape() as tape:
                
                out = conv_net(x)   # [b, 32, 32, 3] => [b, 1, 1, 512]
                out = tf.reshape(out, [-1, 512])  # flatten, => [b, 512]
                logits = fc_net(out)  # [b, 512] => [b, 100]
                y_onehot = tf.one_hot(y, depth=100)   # [b] => [b, 100]
                # compute loss
                loss = tf.losses.categorical_crossentropy(y_onehot, logits, from_logits=True)
                loss = tf.reduce_mean(loss)

            grads = tape.gradient(loss, variables)
            optimizer.apply_gradients(zip(grads, variables))

            if step %100 == 0:
                print(epoch, step, 'loss:', float(loss))



        total_num = 0
        total_correct = 0
        for x,y in test_db:

            out = conv_net(x)
            out = tf.reshape(out, [-1, 512])
            logits = fc_net(out)
            prob = tf.nn.softmax(logits, axis=1)
            pred = tf.argmax(prob, axis=1)
            pred = tf.cast(pred, dtype=tf.int32)

            correct = tf.cast(tf.equal(pred, y), dtype=tf.int32)
            correct = tf.reduce_sum(correct)

            total_num += x.shape[0]
            total_correct += int(correct)
 
        acc = total_correct / total_num
        print(epoch, 'acc:', acc)



if __name__ == '__main__':
    
    (x,y), (x_test, y_test) = datasets.cifar100.load_data()
    y = tf.squeeze(y, axis=1)
    y_test = tf.squeeze(y_test, axis=1)
    print(x.shape, y.shape, x_test.shape, y_test.shape)


    train_db = tf.data.Dataset.from_tensor_slices((x,y))
    train_db = train_db.shuffle(1000).map(preprocess).batch(128)

    test_db = tf.data.Dataset.from_tensor_slices((x_test,y_test))
    test_db = test_db.map(preprocess).batch(64)

    sample = next(iter(train_db))
    print('sample: \n', sample[0].shape, sample[1].shape,
          tf.reduce_min(sample[0]), tf.reduce_max(sample[0]))


    main()


'''
tf2 搭建ResNet
'''

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, Sequential



class BasicBlock(layers.Layer):

    def __init__(self, filter_num, stride=1):
        super(BasicBlock, self).__init__()

        self.conv1 = layers.Conv2D(filter_num, (3, 3), strides=stride, padding='same')
        self.bn1 = layers.BatchNormalization()
        self.relu = layers.Activation('relu')

        self.conv2 = layers.Conv2D(filter_num, (3, 3), strides=1, padding='same')
        self.bn2 = layers.BatchNormalization()

        if stride != 1:
            self.downsample = Sequential()
            self.downsample.add(layers.Conv2D(filter_num, (1, 1), strides=stride))
        else:
            self.downsample = lambda x:x



    def call(self, inputs, training=None):

        # [b, h, w, c]
        out = self.conv1(inputs)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        identity = self.downsample(inputs)

        output = layers.add([out, identity])
        output = tf.nn.relu(output)

        return output

# Res Block 模块。继承keras.Model或者keras.Layer都可以
class ResNet(keras.Model):
    # 第一个参数layer_dims:[2, 2, 2, 2] 4个Res Block,每个包含2个Basic Block
    # 第二个参数num_classes:我们的全连接输出,取决于输出有多少类。
    def __init__(self, layer_dims, num_classes=100): # [2, 2, 2, 2]
        super(ResNet, self).__init__()
        
        # 预处理层;实现起来比较灵活可以加 MAXPool2D,
        # 从头到尾的顺序,对多个网络层的线性堆叠。使用.add()方法将各层添加到模型中
        self.stem = Sequential([layers.Conv2D(64, (3, 3), strides=(1, 1), padding='valid'),
                                layers.BatchNormalization(),
                                layers.Activation('relu'),
                                layers.MaxPool2D(pool_size=(2, 2), strides=(1, 1), padding='same')
                                ])
        # 创建4个Res Block;
        self.layer1 = self.build_resblock(64,  layer_dims[0])
        self.layer2 = self.build_resblock(128, layer_dims[1], stride=2)
        self.layer3 = self.build_resblock(256, layer_dims[2], stride=2)
        self.layer4 = self.build_resblock(512, layer_dims[3], stride=2)

        # gap:减少参数
        self.gap = layers.GlobalAveragePooling2D()
        self.fc = layers.Dense(num_classes)

    def call(self, inputs, training=None):
        # 前向运算
        x = self.stem(inputs)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.gap(x) # [b, c]
        x = self.fc(x) # [b, 100]

        return x

    def build_resblock(self, filter_num, blocks, stride=1):

        res_blocks = Sequential()
        res_blocks.add(BasicBlock(filter_num, stride))

        for _ in range(1, blocks):
            res_blocks.add(BasicBlock(filter_num, stride=1))

        return res_blocks


def resnet18():
    return ResNet([2, 2, 2, 2])

上一篇下一篇

猜你喜欢

热点阅读