MobileFaceNets: Face Verificatio

2018-11-28 本文已影响0人菜鸟瞎编

MobileFaceNets: Efficient CNNs for Accurate RealTime
Face Verification on Mobile Devices https://arxiv.org/ftp/arxiv/papers/1804/1804.07573.pdf

MobileNetV2: Inverted Residuals and Linear Bottlenecks https://arxiv.org/pdf/1801.04381.pdf
http://noahsnail.com/2018/06/06/2018-06-06-MobileNetV2-%20Inverted%20Residuals%20and%20Linear%20Bottlenecks%E8%AE%BA%E6%96%87%E7%BF%BB%E8%AF%91%E2%80%94%E2%80%94%E4%B8%AD%E6%96%87%E7%89%88/

论文要点（论文针对人脸识别这个问题进行分析）：1、特征图的中间像素比边缘像素的feature importance更高。网络最后的全局平均池化层同等特征图每个像素的重要性，这样会降低效果。使用全局深度可分离卷积可提升效果。2、首层步长为2的卷积效果不如步长为1的卷积（我的经验：用步长为1的卷积加上一个步长为2的MaxPool可以达到比步长为2的卷积更好的效果，速度会稍微慢一些）。
个人实现：

@tf.contrib.framework.add_arg_scope
def depthwise_conv(
        x, kernel=3, stride=1, padding='SAME',
        activation_fn=None, normalizer_fn=None,
        weights_initializer=tf.contrib.layers.xavier_initializer(),
        data_format='NHWC', scope='depthwise_conv'):

    with tf.variable_scope(scope):
        assert data_format == 'NHWC'
        in_channels = x.shape[3].value
        W = tf.get_variable(
            'depthwise_weights',
            [kernel, kernel, in_channels, 1], dtype=tf.float32,
            initializer=weights_initializer
        )
        x = tf.nn.depthwise_conv2d(x, W, [1, stride, stride, 1], padding, data_format='NHWC')
        x = normalizer_fn(x) if normalizer_fn is not None else x  # batch normalization
        x = activation_fn(x) if activation_fn is not None else x  # nonlinearity
        return x

def bottleneck(x, expansion, channel, n=1, stride=1, scope="bottleneck"):
    in_channels = x.shape[3].value
    with tf.variable_scope(scope):
        with tf.variable_scope('unit_%d' % 0):
            x = slim.conv2d(x, in_channels*expansion, (1, 1), stride=1, scope='conv1x1_before', activation_fn=tf.nn.relu6)
            x = depthwise_conv(x, kernel=3, stride=stride, scope='depthwise', activation_fn=tf.nn.relu6)
            x = slim.conv2d(x, channel, (1, 1), stride=1, scope='conv1x1_after')

        for i in range(1,n):
            with tf.variable_scope('unit_%d' % i):
                x = slim.conv2d(x, in_channels*expansion, (1, 1), stride=1, scope='conv1x1_before', activation_fn=tf.nn.relu6)
                x = depthwise_conv(x, kernel=3, stride=1, scope='depthwise', activation_fn=tf.nn.relu6)
                x = slim.conv2d(x, channel, (1, 1), stride=1, scope='conv1x1_after')

    return x
          
def mobilefacenet(
        images, landmark_target=None,training=True, loss_weights=None,\
        depth_multiplier='2.0',
        ):
    """
    This is an implementation of MobileFaceNets:
    https://arxiv.org/ftp/arxiv/papers/1804/1804.07573.pdf
    """

    def batch_norm(x):
        x = tf.layers.batch_normalization(
            x, axis=3, center=True, scale=True,
            training=training,
            momentum=BATCH_NORM_MOMENTUM,
            epsilon=BATCH_NORM_EPSILON,
            fused=True, name='batch_norm'
        )
        return x

    with tf.name_scope('standardize_input'):
        x = (2.0 * images) - 1.0

    with tf.variable_scope('MobileFaceNet'):
        params = {
            'padding': 'SAME', 'activation_fn': tf.nn.relu,
            'normalizer_fn': batch_norm, 'data_format': 'NHWC',
            'weights_initializer': tf.contrib.layers.xavier_initializer()
        }
        with slim.arg_scope([slim.conv2d, depthwise_conv], **params):

            x = slim.conv2d(x, 64, (3, 3), stride=2, scope='Conv1')
            print x.get_shape() #(384, 56, 56, 64)

            x = depthwise_conv(x, kernel=3, stride=1, activation_fn=None, scope='depthwise1')
            print x.get_shape() #(384, 56, 56, 64)

            x = bottleneck(x, 2, 64, 5, 2, scope="bottleneck1")
            print x.get_shape() #(384, 28, 28, 64)
            x = bottleneck(x, 4, 128, 1, 2, scope="bottleneck2")
            print x.get_shape() #(384, 14, 14, 128)
            x = bottleneck(x, 2, 128, 6, 1, scope="bottleneck3")
            print x.get_shape() #(384, 14, 14, 128)
            x = bottleneck(x, 4, 128, 1, 2, scope="bottleneck4")
            print x.get_shape() #(384, 7, 7, 128)
            x = bottleneck(x, 2, 128, 2, 1, scope="bottleneck5")
            print x.get_shape() #(384, 7, 7, 128)
            
            x = slim.conv2d(x, 512, (1, 1), stride=1, scope='conv1x1_1')
            print x.get_shape() #(384, 7, 7, 512)
            x = depthwise_conv(x, kernel=3, stride=7, padding="VALID", activation_fn=None, scope='depthwise2')
            print x.get_shape() #(384, 1, 1, 512)
            x = slim.conv2d(x, 254, (1, 1), stride=1, scope='conv1x1_2')
            print x.get_shape() #(384, 1, 1, 254)

    # global average pooling
    landmark_pred = tf.reduce_mean(x, axis=[1, 2])
    print landmark_pred.get_shape() #(384, 254)
    
    landmark_pred = tf.squeeze(landmark_pred, name="landmark_pred")
    return landmark_pred

MobileFaceNets: Face Verificatio

猜你喜欢

热点阅读