使用Tensorflow实现AlexNet
2019-04-15 本文已影响0人
csuhan
关于Alexet的部分内容可参考https://www.jianshu.com/p/bf923c9917d8
论文地址:https://www.jianshu.com/p/bf923c9917d8
本文代码实现参考了:https://blog.csdn.net/accepthjp/article/details/69999309
1. AlexNet
AlexNet是一个里程碑的网络,它证明了深度学习在图像分类方面的显著能力。同时它还采用了Relu激活函数、Dropout、多GPU并行训练等新特性。其网络结构如下图
Alexnet结构
网络层介绍:
- 输入为224*224*3的影像(实际过程中采用2272273)
- conv1:卷积层,96个11*11的卷积核(两组,每组48个卷积核),布长(stride)为4
- lrn1:局部响应正则化(Local Response Normalization)
- maxpool1:池化层,卷积核为3*3,步长为2
- conv2:卷积层,256个5*5的卷积核(两组,每组128个卷积核),步长为1
- lrn2:局部响应正则化(Local Response Normalization)
- maxpool2:池化层,卷积核为3*3,步长为2
- conv3:卷积层,384个3*3的卷积核(两组,每组192个卷积核),步长为1
- conv4:卷积层,384个3*3的卷积核(两组,每组192个卷积核),步长为1
- conv5:卷积层,256个3*3的卷积核(两组,每组128个卷积核),步长为1
- maxpool5:池化层,卷积核为3*3,步长为2
- fc6:全连接层,输入长度256*6*6,输出长度4096
- relu
- dropout
- fc7:全连接层,输入长度为4096,输出长度4096
- relu
- dropout
- fc8:全连接层,输入长度为4096,输出长度1000
2. Tensorlfow实现
首先定义网络层
#最大池化
def maxpool(x,KHeight,KWidth,strideX,strideY,name,padding='SAME'):
return tf.nn.max_pool(x,ksize=[1,KHeight,KWidth,1],strides=[1,strideX,strideY,1],padding=padding,name=name)
# dropout
def dropout(x,keep_prob,name=None):
return tf.nn.dropout(x,keep_prob=keep_prob,name=name)
# local response normalization
def LRN(x,R,alpha,beta,name=None,bias=1.0):
return tf.nn.local_response_normalization(x,depth_radius=R,alpha=alpha,beta=beta,name=name,bias=bias)
#全连接层
def fc(x,inputD,outputD,useRelu,name):
with tf.variable_scope(name) as scope:
w = tf.get_variable("w",shape=[inputD,outputD],dtype=tf.float32)
b = tf.get_variable("b",shape=[outputD],dtype=tf.float32)
out = tf.nn.bias_add(tf.matmul(x,w),b)
if useRelu:
return tf.nn.relu(out)
else:
return out
#卷积层
def conv2d(x,KHeight,KWidth,strideX,strideY,featureNum,name,padding='SAME',groups=1):
channel = int(x.get_shape()[-1])
conv = lambda a, b: tf.nn.conv2d(a, b, strides = [1, strideY, strideX, 1], padding = padding)
with tf.variable_scope(name) as scope:
w = tf.get_variable("w", shape = [KHeight, KWidth, channel/groups, featureNum])
b = tf.get_variable("b", shape = [featureNum])
xNew = tf.split(value = x, num_or_size_splits = groups, axis = 3)
wNew = tf.split(value = w, num_or_size_splits = groups, axis = 3)
featureMap = [conv(t1, t2) for t1, t2 in zip(xNew, wNew)]
mergeFeatureMap = tf.concat(axis = 3, values = featureMap)
# print mergeFeatureMap.shape
out = tf.nn.bias_add(mergeFeatureMap, b)
return tf.nn.relu(tf.reshape(out, mergeFeatureMap.get_shape().as_list()), name = scope.name)
然后定义网络结构,加载模型
class alexNet():
def __init__(self,x,keepPro,classNum,skip,model_path="bvlc_alexnet.npy"):
self.X = x
self.KEEPPRO = keepPro
self.CLASSNUM = classNum
self.SKIP = skip
self.MODELPATH = model_path
self.buildCNN()
def buildCNN(self):
conv1 = conv2d(self.X,11,11,4,4,96,'conv1',padding='VALID')
lrn1 = LRN(conv1,2, 2e-05, 0.75, "norm1")
pool1 = maxpool(lrn1,3,3,2,2,'pool1',padding='VALID')
conv2 = conv2d(pool1,5,5,1,1,256,'conv2',groups=2)
lrn2 = LRN(conv2,2, 2e-05, 0.75, "norm2")
pool2 = maxpool(lrn2,3,3,2,2,'pool2',padding='VALID')
conv3 = conv2d(pool2,3,3,1,1,384,'conv3')
conv4 = conv2d(conv3,3,3,1,1,384,'conv4',groups=2)
conv5 = conv2d(conv4,3,3,1,1,256,'conv5',groups=2)
pool5 = maxpool(conv5,3,3,2,2,'pool5',padding='VALID')
pool5_flat = tf.reshape(pool5,shape=[-1,6*6*256])
fc1 = fc(pool5_flat,6*6*256,4096,True,'fc6')
drop1 = dropout(fc1,self.KEEPPRO)
fc2 = fc(drop1,4096,4096,True,'fc7')
drop2 = dropout(fc2,self.KEEPPRO)
self.fc3 = fc(drop2,4096,self.CLASSNUM,True,'fc8')
def loadModel(self, sess):
"""load model"""
wDict = np.load(self.MODELPATH, encoding = "bytes").item()
#for layers in model
for name in wDict:
if name not in self.SKIP:
with tf.variable_scope(name, reuse = True):
for p in wDict[name]:
if len(p.shape) == 1:
#bias
sess.run(tf.get_variable('b', trainable = False).assign(p))
else:
#weights
sess.run(tf.get_variable('w', trainable = False).assign(p))
这里说明一下,模型文件默认为bvlc_alexnet.npy
,与程序文件放在同一目录。如果对其结构不清晰,可以输出查看其参数组织形式。
接着实例化模型,启动Session
keeppro = 1 # dropout比例
classNum = 1000 #分类类别
skip = [] #不读取权重的层(用于后期fine-tune)
x=tf.placeholder(dtype=tf.float32,shape=[1,227,227,3])
model = alexNet(x,keeppro,classNum,skip)
score = model.fc3
softmax = tf.nn.softmax(score)
sess = tf.InteractiveSession()
model.loadModel(sess)
然后读取一幅图像,测试效果
im = np.asarray(Image.open("dog.jpg").resize([227,227]))
im_true = im
im = np.expand_dims(im,axis=0)
maxclass = sess.run(softmax,feed_dict={x:im})
classidx = np.argmax(maxclass)
plt.imshow(im_true)
print("the final class is: {}".format(caffe_classes.class_names[classidx]))
其中caffe-classes是ImageNet 1000类的名称。
测试结果如下:博美犬
dog
3. Code
完整代码如下
import tensorflow as tf
import numpy as np
from PIL import Image
import caffe_classes
import matplotlib.pyplot as plt
def maxpool(x,KHeight,KWidth,strideX,strideY,name,padding='SAME'):
return tf.nn.max_pool(x,ksize=[1,KHeight,KWidth,1],strides=[1,strideX,strideY,1],padding=padding,name=name)
def dropout(x,keep_prob,name=None):
return tf.nn.dropout(x,keep_prob=keep_prob,name=name)
def LRN(x,R,alpha,beta,name=None,bias=1.0):
return tf.nn.local_response_normalization(x,depth_radius=R,alpha=alpha,beta=beta,name=name,bias=bias)
def fc(x,inputD,outputD,useRelu,name):
with tf.variable_scope(name) as scope:
w = tf.get_variable("w",shape=[inputD,outputD],dtype=tf.float32)
b = tf.get_variable("b",shape=[outputD],dtype=tf.float32)
out = tf.nn.bias_add(tf.matmul(x,w),b)
if useRelu:
return tf.nn.relu(out)
else:
return out
def conv2d(x,KHeight,KWidth,strideX,strideY,featureNum,name,padding='SAME',groups=1):
channel = int(x.get_shape()[-1])
conv = lambda a, b: tf.nn.conv2d(a, b, strides = [1, strideY, strideX, 1], padding = padding)
with tf.variable_scope(name) as scope:
w = tf.get_variable("w", shape = [KHeight, KWidth, channel/groups, featureNum])
b = tf.get_variable("b", shape = [featureNum])
xNew = tf.split(value = x, num_or_size_splits = groups, axis = 3)
wNew = tf.split(value = w, num_or_size_splits = groups, axis = 3)
featureMap = [conv(t1, t2) for t1, t2 in zip(xNew, wNew)]
mergeFeatureMap = tf.concat(axis = 3, values = featureMap)
# print mergeFeatureMap.shape
out = tf.nn.bias_add(mergeFeatureMap, b)
return tf.nn.relu(tf.reshape(out, mergeFeatureMap.get_shape().as_list()), name = scope.name)
class alexNet():
def __init__(self,x,keepPro,classNum,skip,model_path="bvlc_alexnet.npy"):
self.X = x
self.KEEPPRO = keepPro
self.CLASSNUM = classNum
self.SKIP = skip
self.MODELPATH = model_path
self.buildCNN()
def buildCNN(self):
conv1 = conv2d(self.X,11,11,4,4,96,'conv1',padding='VALID')
lrn1 = LRN(conv1,2, 2e-05, 0.75, "norm1")
pool1 = maxpool(lrn1,3,3,2,2,'pool1',padding='VALID')
conv2 = conv2d(pool1,5,5,1,1,256,'conv2',groups=2)
lrn2 = LRN(conv2,2, 2e-05, 0.75, "norm2")
pool2 = maxpool(lrn2,3,3,2,2,'pool2',padding='VALID')
conv3 = conv2d(pool2,3,3,1,1,384,'conv3')
conv4 = conv2d(conv3,3,3,1,1,384,'conv4',groups=2)
conv5 = conv2d(conv4,3,3,1,1,256,'conv5',groups=2)
pool5 = maxpool(conv5,3,3,2,2,'pool5',padding='VALID')
pool5_flat = tf.reshape(pool5,shape=[-1,6*6*256])
fc1 = fc(pool5_flat,6*6*256,4096,True,'fc6')
drop1 = dropout(fc1,self.KEEPPRO)
fc2 = fc(drop1,4096,4096,True,'fc7')
drop2 = dropout(fc2,self.KEEPPRO)
self.fc3 = fc(drop2,4096,self.CLASSNUM,True,'fc8')
def loadModel(self, sess):
"""load model"""
wDict = np.load(self.MODELPATH, encoding = "bytes").item()
#for layers in model
for name in wDict:
if name not in self.SKIP:
with tf.variable_scope(name, reuse = True):
for p in wDict[name]:
if len(p.shape) == 1:
#bias
sess.run(tf.get_variable('b', trainable = False).assign(p))
else:
#weights
sess.run(tf.get_variable('w', trainable = False).assign(p))
keeppro = 1
classNum = 1000
skip = []
x=tf.placeholder(dtype=tf.float32,shape=[1,227,227,3])
model = alexNet(x,keeppro,classNum,skip)
score = model.fc3
softmax = tf.nn.softmax(score)
sess = tf.InteractiveSession()
model.loadModel(sess)
im = np.asarray(Image.open("dog.jpg").resize([227,227]))
im_true = im
im = np.expand_dims(im,axis=0)
maxclass = sess.run(softmax,feed_dict={x:im})
classidx = np.argmax(maxclass)
plt.imshow(im_true)
print("the final class is: {}".format(caffe_classes.class_names[classidx]))