基于PaddlePaddle实现GoogLeNet在Cifar1
GoogLeNet的最早版本,出现在2014年的《Going deeper with convolutions》。之所以名为“GoogLeNet”而非“GoogleNet”,文章说是为了向早期的LeNet致敬。
一般来说,提升网络性能最直接的办法就是增加网络深度和宽度,但是简单的增加网络深度和宽度,会导致网络参数个数的爆炸式增加;网络参数太多又会导致过拟合和模型难以训练。解决网络参数爆炸式增加的方式是将全连接甚至一般的卷积都转化为稀疏连接。
大量的文献表明可以将稀疏矩阵聚类为较为密集的子矩阵来提高计算性能,Inception 结构就是该种思路的具体实现。与AlexNet(8层,6000万个参数)相比,使用了Inception 结构的GoogLeNet的层数达到22层,却只有600万个参数,训练速度更快,精度更高!
Inception结构如下图所示: Inception结构代码实现:
import paddle
import paddle.nn.functional as F # 组网相关的函数,如conv2d, relu...
import numpy as np
from paddle.nn.layer.common import Dropout
from paddle.vision.transforms import Compose, Resize, Transpose, Normalize, ToTensor
from paddle.vision.datasets import Cifar10
from paddle.nn import Sequential, Conv2D, ReLU, MaxPool2D, Linear, Dropout, Flatten, BatchNorm2D
# 定义Inception模块
class Inception(paddle.nn.Layer):
def __init__(self, in_channels, c1, c2, c3, c4):
super().__init__()
#路线1,卷积核1x1
self.route1x1_1 = Conv2D(in_channels, c1, kernel_size=1)
#路线2,卷积层1x1、卷积层3x3
self.route1x1_2 = Conv2D(in_channels, c2[0], kernel_size=1)
self.route3x3_2 = Conv2D(c2[0], c2[1], kernel_size=3, padding=1)
#路线3,卷积层1x1、卷积层5x5
self.route1x1_3 = Conv2D(in_channels, c3[0], kernel_size=1)
self.route5x5_3 = Conv2D(c3[0], c3[1], kernel_size=5, padding=2)
#路线4,池化层3x3、卷积层1x1
self.route3x3_4 = MaxPool2D(kernel_size=3, stride=1, padding=1)
self.route1x1_4 = Conv2D(in_channels, c4, kernel_size=1)
def forward(self, x):
route1 = F.relu(self.route1x1_1(x))
route2 = F.relu(self.route3x3_2(F.relu(self.route1x1_2(x))))
route3 = F.relu(self.route5x5_3(F.relu(self.route1x1_3(x))))
route4 = F.relu(self.route1x1_4(self.route3x3_4(x)))
out = [route1, route2, route3, route4]
return paddle.concat(out, axis=1) #在通道维度(axis=1)上进行连接
GoogLeNet结构及范例代码如下所示
def ConvBNRelu(in_channels, out_channels, kernel, stride=1, padding=0):
layer = Sequential(
Conv2D(in_channels, out_channels, kernel, stride, padding),
BatchNorm2D(out_channels, epsilon=1e-3),
ReLU())
return layer
class GoogLeNet(paddle.nn.Layer):
def __init__(self, in_channel, num_classes):
super(GoogLeNet, self).__init__()
self.b1 = Sequential(
ConvBNRelu(in_channel, out_channels=64, kernel=7, stride=2, padding=3),
MaxPool2D(3, 2))
self.b2 = Sequential(
ConvBNRelu(64, 64, kernel=1),
ConvBNRelu(64, 192, kernel=3, padding=1),
MaxPool2D(3, 2))
self.b3 = Sequential(
Inception(192, 64, (96, 128), (16, 32), 32),
Inception(256, 128, (128, 192), (32, 96), 64),
MaxPool2D(3, 2))
self.b4 = Sequential(
Inception(480, 192, (96, 208), (16, 48), 64),
Inception(512, 160, (112, 224), (24, 64), 64),
Inception(512, 128, (128, 256), (24, 64), 64),
Inception(512, 112, (144, 288), (32, 64), 64),
Inception(528, 256, (160, 320), (32, 128), 128),
MaxPool2D(3, 2))
self.b5 = Sequential(
Inception(832, 256, (160, 320), (32, 128), 128),
Inception(832, 384, (182, 384), (48, 128), 128),
AvgPool2D(2))
self.flatten=paddle.nn.Flatten()
self.b6 = Linear(1024, num_classes)
def forward(self, x):
x = self.b1(x)
x = self.b2(x)
x = self.b3(x)
x = self.b4(x)
x = self.b5(x)
x = self.flatten(x)
x = self.b6(x)
return x
google_net = GoogLeNet(in_channel=3, num_classes=10)
model = paddle.Model(google_net)
from paddle.static import InputSpec
input = InputSpec([None, 3, 96, 96], 'float32', 'image')
label = InputSpec([None, 1], 'int64', 'label')
model = paddle.Model(google_net, input, label)
model.summary()
Layer (type) Input Shape Output Shape Param #
================================================
Conv2D-1 [[1, 3, 96, 96]] [1, 64, 48, 48] 9,472
BatchNorm2D-1 [[1, 64, 48, 48]] [1, 64, 48, 48] 256
ReLU-1 [[1, 64, 48, 48]] [1, 64, 48, 48] 0
MaxPool2D-1 [[1, 64, 48, 48]] [1, 64, 23, 23] 0
Conv2D-2 [[1, 64, 23, 23]] [1, 64, 23, 23] 4,160
BatchNorm2D-2 [[1, 64, 23, 23]] [1, 64, 23, 23] 256
ReLU-2 [[1, 64, 23, 23]] [1, 64, 23, 23] 0
Conv2D-3 [[1, 64, 23, 23]] [1, 192, 23, 23] 110,784
BatchNorm2D-3 [[1, 192, 23, 23]] [1, 192, 23, 23] 768
ReLU-3 [[1, 192, 23, 23]] [1, 192, 23, 23] 0
MaxPool2D-2 [[1, 192, 23, 23]] [1, 192, 11, 11] 0
Conv2D-4 [[1, 192, 11, 11]] [1, 64, 11, 11] 12,352
Conv2D-5 [[1, 192, 11, 11]] [1, 96, 11, 11] 18,528
Conv2D-6 [[1, 96, 11, 11]] [1, 128, 11, 11] 110,720
Conv2D-7 [[1, 192, 11, 11]] [1, 16, 11, 11] 3,088
Conv2D-8 [[1, 16, 11, 11]] [1, 32, 11, 11] 12,832
MaxPool2D-3 [[1, 192, 11, 11]] [1, 192, 11, 11] 0
Conv2D-9 [[1, 192, 11, 11]] [1, 32, 11, 11] 6,176
Inception-1 [[1, 192, 11, 11]] [1, 256, 11, 11] 0
Conv2D-10 [[1, 256, 11, 11]] [1, 128, 11, 11] 32,896
Conv2D-11 [[1, 256, 11, 11]] [1, 128, 11, 11] 32,896
Conv2D-12 [[1, 128, 11, 11]] [1, 192, 11, 11] 221,376
Conv2D-13 [[1, 256, 11, 11]] [1, 32, 11, 11] 8,224
Conv2D-14 [[1, 32, 11, 11]] [1, 96, 11, 11] 76,896
MaxPool2D-4 [[1, 256, 11, 11]] [1, 256, 11, 11] 0
Conv2D-15 [[1, 256, 11, 11]] [1, 64, 11, 11] 16,448
Inception-2 [[1, 256, 11, 11]] [1, 480, 11, 11] 0
MaxPool2D-5 [[1, 480, 11, 11]] [1, 480, 5, 5] 0
Conv2D-16 [[1, 480, 5, 5]] [1, 192, 5, 5] 92,352
Conv2D-17 [[1, 480, 5, 5]] [1, 96, 5, 5] 46,176
Conv2D-18 [[1, 96, 5, 5]] [1, 208, 5, 5] 179,920
Conv2D-19 [[1, 480, 5, 5]] [1, 16, 5, 5] 7,696
Conv2D-20 [[1, 16, 5, 5]] [1, 48, 5, 5] 19,248
MaxPool2D-6 [[1, 480, 5, 5]] [1, 480, 5, 5] 0
Conv2D-21 [[1, 480, 5, 5]] [1, 64, 5, 5] 30,784
Inception-3 [[1, 480, 5, 5]] [1, 512, 5, 5] 0
Conv2D-22 [[1, 512, 5, 5]] [1, 160, 5, 5] 82,080
Conv2D-23 [[1, 512, 5, 5]] [1, 112, 5, 5] 57,456
Conv2D-24 [[1, 112, 5, 5]] [1, 224, 5, 5] 226,016
Conv2D-25 [[1, 512, 5, 5]] [1, 24, 5, 5] 12,312
Conv2D-26 [[1, 24, 5, 5]] [1, 64, 5, 5] 38,464
MaxPool2D-7 [[1, 512, 5, 5]] [1, 512, 5, 5] 0
Conv2D-27 [[1, 512, 5, 5]] [1, 64, 5, 5] 32,832
Inception-4 [[1, 512, 5, 5]] [1, 512, 5, 5] 0
Conv2D-28 [[1, 512, 5, 5]] [1, 128, 5, 5] 65,664
Conv2D-29 [[1, 512, 5, 5]] [1, 128, 5, 5] 65,664
Conv2D-30 [[1, 128, 5, 5]] [1, 256, 5, 5] 295,168
Conv2D-31 [[1, 512, 5, 5]] [1, 24, 5, 5] 12,312
Conv2D-32 [[1, 24, 5, 5]] [1, 64, 5, 5] 38,464
MaxPool2D-8 [[1, 512, 5, 5]] [1, 512, 5, 5] 0
Conv2D-33 [[1, 512, 5, 5]] [1, 64, 5, 5] 32,832
Inception-5 [[1, 512, 5, 5]] [1, 512, 5, 5] 0
Conv2D-34 [[1, 512, 5, 5]] [1, 112, 5, 5] 57,456
Conv2D-35 [[1, 512, 5, 5]] [1, 144, 5, 5] 73,872
Conv2D-36 [[1, 144, 5, 5]] [1, 288, 5, 5] 373,536
Conv2D-37 [[1, 512, 5, 5]] [1, 32, 5, 5] 16,416
Conv2D-38 [[1, 32, 5, 5]] [1, 64, 5, 5] 51,264
MaxPool2D-9 [[1, 512, 5, 5]] [1, 512, 5, 5] 0
Conv2D-39 [[1, 512, 5, 5]] [1, 64, 5, 5] 32,832
Inception-6 [[1, 512, 5, 5]] [1, 528, 5, 5] 0
Conv2D-40 [[1, 528, 5, 5]] [1, 256, 5, 5] 135,424
Conv2D-41 [[1, 528, 5, 5]] [1, 160, 5, 5] 84,640
Conv2D-42 [[1, 160, 5, 5]] [1, 320, 5, 5] 461,120
Conv2D-43 [[1, 528, 5, 5]] [1, 32, 5, 5] 16,928
Conv2D-44 [[1, 32, 5, 5]] [1, 128, 5, 5] 102,528
MaxPool2D-10 [[1, 528, 5, 5]] [1, 528, 5, 5] 0
Conv2D-45 [[1, 528, 5, 5]] [1, 128, 5, 5] 67,712
Inception-7 [[1, 528, 5, 5]] [1, 832, 5, 5] 0
MaxPool2D-11 [[1, 832, 5, 5]] [1, 832, 2, 2] 0
Conv2D-46 [[1, 832, 2, 2]] [1, 256, 2, 2] 213,248
Conv2D-47 [[1, 832, 2, 2]] [1, 160, 2, 2] 133,280
Conv2D-48 [[1, 160, 2, 2]] [1, 320, 2, 2] 461,120
Conv2D-49 [[1, 832, 2, 2]] [1, 32, 2, 2] 26,656
Conv2D-50 [[1, 32, 2, 2]] [1, 128, 2, 2] 102,528
MaxPool2D-12 [[1, 832, 2, 2]] [1, 832, 2, 2] 0
Conv2D-51 [[1, 832, 2, 2]] [1, 128, 2, 2] 106,624
Inception-8 [[1, 832, 2, 2]] [1, 832, 2, 2] 0
Conv2D-52 [[1, 832, 2, 2]] [1, 384, 2, 2] 319,872
Conv2D-53 [[1, 832, 2, 2]] [1, 182, 2, 2] 151,606
Conv2D-54 [[1, 182, 2, 2]] [1, 384, 2, 2] 629,376
Conv2D-55 [[1, 832, 2, 2]] [1, 48, 2, 2] 39,984
Conv2D-56 [[1, 48, 2, 2]] [1, 128, 2, 2] 153,728
MaxPool2D-13 [[1, 832, 2, 2]] [1, 832, 2, 2] 0
Conv2D-57 [[1, 832, 2, 2]] [1, 128, 2, 2] 106,624
Inception-9 [[1, 832, 2, 2]] [1, 1024, 2, 2] 0
AvgPool2D-1 [[1, 1024, 2, 2]] [1, 1024, 1, 1] 0
Flatten-1 [[1, 1024, 1, 1]] [1, 1024] 0
Linear-1 [[1, 1024]] [1, 10] 10,250
=================================================
Total params: 5,942,192
Trainable params: 5,940,912
Non-trainable params: 1,280
=================================================
Input size (MB): 0.11
Forward/backward pass size (MB): 10.98
Params size (MB): 22.67
Estimated Total Size (MB): 33.75
训练代码如下所示:
# Compose: 以列表的方式组合数据集预处理功能
# Resize: 调整图像大小
# Transpose: 调整通道顺序, eg, HWC(img) -> CHW(NN)
# Normalize: 对图像数据归一化
# ToTensor: 将 PIL.Image 或 numpy.ndarray 转换成 paddle.Tensor
# cifar10 手动计算均值和标准差:mean = [125.31, 122.95, 113.86] 和 std = [62.99, 62.08, 66.7] link:https://www.jianshu.com/p/a3f3ffc3cac1
t = Compose([Resize(size=96),
Normalize(mean=[125.31, 122.95, 113.86], std=[62.99, 62.08, 66.7], data_format='HWC'),
Transpose(order=(2,0,1)),
ToTensor(data_format='HWC')])
train_dataset = Cifar10(mode='train', transform=t, backend='cv2')
test_dataset = Cifar10(mode='test', transform=t, backend='cv2')
BATCH_SIZE = 256
train_loader = paddle.io.DataLoader(train_dataset, shuffle=True, batch_size=BATCH_SIZE)
test_loader = paddle.io.DataLoader(test_dataset, batch_size=BATCH_SIZE)
# 为模型训练做准备,设置优化器,损失函数和精度计算方式
learning_rate = 0.001
loss_fn = paddle.nn.CrossEntropyLoss()
opt = paddle.optimizer.Adam(learning_rate=learning_rate, parameters=model.parameters())
model.prepare(optimizer=opt, loss=loss_fn, metrics=paddle.metric.Accuracy())
# 启动模型训练,指定训练数据集,设置训练轮次,设置每次数据集计算的批次大小,设置日志格式
model.fit(train_loader, test_loader, batch_size=256, epochs=20, eval_freq= 5, verbose=1)
model.evaluate(test_loader, verbose=1)
训练结果:测试数据集上的精度在:80%左右
Epoch 20/20
step 196/196 [==============================] - loss: 0.0456 - acc: 0.9586 - 276ms/step
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 40/40 [==============================] - loss: 0.8746 - acc: 0.8007 - 181ms/step