DSOD源码

2018-11-09  本文已影响54人  yanghedada

看到一篇不需要预训练,就可以直接进行联合训练直接检测的文章。
代码:

github caffe代码
github tensorflow代码

caffe结构图:点这里

我下载的是tensorflow版本的。

论文简介:
这篇文章是按照SSD方式进行目标检测的。结构的对比是按照SSD来办的。这里的Plain Connection 就是SSD,而Dense Connection就是DSOD了。

好了这里的模型介绍结束
下载了这里的tensorflow版的代码,发现这里和论文不一样。

def inference(inputs, is_train, reuse):
    W_init = tf.contrib.layers.xavier_initializer()
    with tf.variable_scope("model", reuse=reuse):
        tl.layers.set_name_reuse(reuse)
        net = InputLayer(inputs, name='input')
        net = Conv2d(net, 64, (3, 3), (2, 2), padding='SAME',
                     W_init=W_init, name='stem1')
        net = BatchNormLayer(net, is_train=is_train, decay=conv_bn_decay, act=tf.nn.relu, name='stem1_bn')
        net = Conv2d(net, 64, (3, 3), (1, 1), padding='SAME',
                     W_init=W_init, name='stem2')
        net = BatchNormLayer(net, is_train=is_train, decay=conv_bn_decay, act=tf.nn.relu, name='stem2_bn')
        net = Conv2d(net, 128, (3, 3), (1, 1), padding='SAME',
                     W_init=W_init, name='stem3')
        net = BatchNormLayer(net, is_train=is_train, decay=conv_bn_decay, act=tf.nn.relu, name='stem3_bn')
        net = MaxPool2d(net, filter_size=(2, 2), strides=(2, 2), name='stem3_pool')
        net = denseblock(net, blocknum=6, step=48, firstchannel=192, is_train=is_train, name='denseblock0', reuse=reuse)
        net = BatchNormLayer(net, is_train=is_train, decay=conv_bn_decay, act=tf.nn.relu, name='denseblock0_bn')
        net = Conv2d(net, 416, (1, 1), (1, 1), padding='SAME',
                     W_init=W_init, name='denseblock0_cnn')
        net = MaxPool2d(net, filter_size=(2, 2), strides=(2, 2), name='denseblock0_pool')
        net = denseblock(net, blocknum=8, step=48, firstchannel=192, is_train=is_train, name='denseblock1', reuse=reuse)
        net = BatchNormLayer(net, is_train=is_train, decay=conv_bn_decay, act=tf.nn.relu, name='denseblock1_bn')
        net = Conv2d(net, 800, (1, 1), (1, 1), padding='SAME',
                     W_init=W_init, name='denseblock1_cnn')
        
        # 第一次的特征输出,没有相加其他的值
        netfirst=BatchNormLayer(net, is_train=is_train, decay=conv_bn_decay, act=tf.nn.relu, name='feature_first_bn')
        net = MaxPool2d(net, filter_size=(2, 2), strides=(2, 2), name='denseblock2_pool1')
        net = denseblock(net, blocknum=8, step=48, firstchannel=192, is_train=is_train, name='denseblock2', reuse=reuse)
        net = BatchNormLayer(net, is_train=is_train, decay=conv_bn_decay, act=tf.nn.relu, name='denseblock2_bn')
        net = Conv2d(net, 1184, (1, 1), (1, 1), padding='SAME',
                     W_init=W_init, name='denseblock2_cnn')
        net = denseblock(net, blocknum=8, step=48, firstchannel=192, is_train=is_train, name='denseblock3', reuse=reuse)
        net = BatchNormLayer(net, is_train=is_train, decay=conv_bn_decay, act=tf.nn.relu, name='denseblock3_bn')
        net = Conv2d(net, 256, (1, 1), (1, 1), padding='SAME',
                     W_init=W_init, name='denseblock2_cnna')
        netpl=MaxPool2d(netfirst, filter_size=(2, 2), strides=(2, 2), name='First_pool')
        netpl=BatchNormLayer(netpl, is_train=is_train, decay=conv_bn_decay, act=tf.nn.relu, name='First_bn')
        netpl = Conv2d(netpl, 256, (1, 1), (1, 1), padding='SAME',
                     W_init=W_init, name='denseblock2_cnnb')
                     
        net=ConcatLayer([net,netpl],-1,"Second_Cat")
        
        # 第二次的特征融合
        netsecond = BatchNormLayer(net, is_train=is_train, decay=conv_bn_decay, act=tf.nn.relu, name='feature_second_bn')
        net = denseblockpl(net, step=256, firstchannel=256, is_train=is_train, name='denseplz1', reuse=reuse)
        # 第三次的特征融合
        netthird = BatchNormLayer(net, is_train=is_train, decay=conv_bn_decay, act=tf.nn.relu,
                                   name='feature_third_bn')
        net = denseblockpl(net, step=128, firstchannel=128, is_train=is_train, name='denseplz2', reuse=reuse)
        # 第四次的特征融合
        netfourth = BatchNormLayer(net, is_train=is_train, decay=conv_bn_decay, act=tf.nn.relu,
                                   name='feature_fourth_bn')
        net = denseblockpl(net, step=128, firstchannel=128, is_train=is_train, name='denseplz3', reuse=reuse)
        # 第五次的特征融合
        netfifth = BatchNormLayer(net, is_train=is_train, decay=conv_bn_decay, act=tf.nn.relu,
                                   name='feature_fifth_bn')
        net = denseblockfin(net, step=128, firstchannel=128, is_train=is_train, name='denseplz4', reuse=reuse)
        # 第六次的特征融合
        netsixth = BatchNormLayer(net, is_train=is_train, decay=conv_bn_decay, act=tf.nn.relu,
                                   name='feature_sixth_bn')
        outfirst=Conv2d(netfirst, default_box_size[0] * (classes_size + 4), (3, 3), (1, 1), padding='SAME',
                     W_init=W_init, name='firstout')
        outsecond=Conv2d(netsecond, default_box_size[1] * (classes_size + 4), (3, 3), (1, 1), padding='SAME',
                     W_init=W_init, name='secondout')
        outthird=Conv2d(netthird, default_box_size[2] * (classes_size + 4), (3, 3), (1, 1), padding='SAME',
                     W_init=W_init, name='thirdout')
        outfourth=Conv2d(netfourth, default_box_size[3] * (classes_size + 4), (3, 3), (1, 1), padding='SAME',
                     W_init=W_init, name='fourthout')
        outfifth=Conv2d(netfifth, default_box_size[4] * (classes_size + 4), (3, 3), (1, 1), padding='SAME',
                     W_init=W_init, name='fifthout')
        outsixth=Conv2d(netsixth, default_box_size[5] * (classes_size + 4), (3, 3), (1, 1), padding='SAME',
                     W_init=W_init, name='sixthout')
        features1=outfirst.outputs
        features2=outsecond.outputs
        features3=outthird.outputs
        features4=outfourth.outputs
        features5=outfifth.outputs
        features6=outsixth.outputs
        feature_maps = [features1, features2, features3, features4, features5,features6]
        global feature_maps_shape
        feature_maps_shape = [m.get_shape().as_list() for m in feature_maps]
        tmp_all_feature = []
        for i, fmap in zip(range(len(feature_maps)), feature_maps):
            width = feature_maps_shape[i][1]
            height = feature_maps_shape[i][2]
            tmp_all_feature.append(
                tf.reshape(fmap, [-1, (width * height * default_box_size[i]), (classes_size + 4)]))
        tmp_all_feature = tf.concat(tmp_all_feature, axis=1)
        feature_class = tmp_all_feature[:, :, :classes_size]
        feature_location = tmp_all_feature[:, :, classes_size:]
        print('##   feature_class shape : ' + str(feature_class.get_shape().as_list()))
        print('##   feature_location shape : ' + str(feature_location.get_shape().as_list()))
        # 生成所有default boxs
        global all_default_boxs
        all_default_boxs = generate_all_default_boxs()
        # print(all_default_boxs)
        global all_default_boxs_len
        all_default_boxs_len = len(all_default_boxs)
        print('##   all default boxs : ' + str(all_default_boxs_len))
    return feature_class,feature_location,all_default_boxs,all_default_boxs_len

这里就是网络的主结构,这里netsecond-netsixth 好像并没有融合上提到的来自那个6层的输出,只是按照SSD进行了融合。

不过这里的代码比较简单对于做实验来说比较好修改,但我没调试。

希望有机会好好深入研究吧。。。。

参考:
DSOD: Learning Deeply Supervised Object Detectors from Scratch

上一篇下一篇

猜你喜欢

热点阅读