tensorflow objection detection m

2017-10-01 本文已影响0人永远学习中

tensorflow 的 objection detection model API的底层是protobuf，所以先看proto文件。
本博文实现的是r-fcn模型。
r-fcn模型基于faster r-cnn改进得到。
参照object_detection/samples/configs中的rfcn_resnet101_pets.config。
网络的配置常见形式为：

model {
(... 模型的配置)
}

train_config : {
(... 训练的配置，校复杂...)
}

train_input_reader: {
(... 训练输入数据的配置..)
}

eval_config: {
(... 测试的配置，较简单...)
}

eval_input_reader: {
(... 测试输入数据的配置...)
}

配置的源头是：
pipeline.proto文件

syntax = "proto2";

package object_detection.protos;

import "object_detection/protos/eval.proto";
import "object_detection/protos/input_reader.proto";
import "object_detection/protos/model.proto";
import "object_detection/protos/train.proto";
//可以看出eval_input_reader和train_input_reader的格式相同。其他的具体的要在进入对应的配置文件中查看。
message TrainEvalPipelineConfig {
  optional DetectionModel model = 1;
  optional TrainConfig train_config = 2;
  optional InputReader train_input_reader = 3;
  optional EvalConfig eval_config = 4;
  optional InputReader eval_input_reader = 5;
}

首先看model.proto

package object_detection.protos;

import "object_detection/protos/faster_rcnn.proto";
import "object_detection/protos/ssd.proto";
// 你要的检测层是FasterRcnn还是ssd，我们使用的是FasterRcnn，看faster_rcnn.proto
message DetectionModel {
  oneof model {
    FasterRcnn faster_rcnn = 1;
    Ssd ssd = 2;
  }
}

在看faster_rcnn.proto

syntax = "proto2";

package object_detection.protos;
//可以看出FasterRcnn中有的几个核心模块
import "object_detection/protos/anchor_generator.proto";
import "object_detection/protos/box_predictor.proto";
import "object_detection/protos/hyperparams.proto";
import "object_detection/protos/image_resizer.proto";
import "object_detection/protos/losses.proto";
import "object_detection/protos/post_processing.proto";

//要看如何配置Faster R-CNN models.
// 具体细节看 meta_architectures/faster_rcnn_meta_arch.py 和 models/model_builder.py
//
// Naming conventions:
// Faster R-CNN models 有两个阶段: 因此我们使用了
// `first_stage_` and `second_stage_` 去表示这两个阶段
message FasterRcnn {
// 是否仅有第一个阶段
  optional bool first_stage_only = 1 [default=false];

  //要预测多少个分类
  optional int32 num_classes = 3;

// 图像进行归一化用于对输入故乡进行前处理
  optional ImageResizer image_resizer = 4;

  // 特征提取的配置
  optional FasterRcnnFeatureExtractor feature_extractor = 5;

  // rpn的参数配置
  optional AnchorGenerator first_stage_anchor_generator = 6;

  // `first_stage_features_to_crop` tensor包含了box的预测
  optional int32 first_stage_atrous_rate = 7 [default=1];

  // 用于进行rpn box的预测的超参数
  optional Hyperparams first_stage_box_predictor_conv_hyperparams = 8;

  // Kernel size to use for the convolution op just prior to RPN box
  // 用于进行rpn box预测的卷积核的大小
  optional int32 first_stage_box_predictor_kernel_size = 9 [default=3];

// 输出的rpn 预测的卷积层的深度
  optional int32 first_stage_box_predictor_depth = 10 [default=512];

// 在第一阶段中用于计算物品位置的图像进入批次
  optional int32 first_stage_minibatch_size = 11 [default=256];

// 每一张图片中对于rpn大概有多少是正的？
  optional float first_stage_positive_balance_fraction = 12 [default=0.5];

  // 用于第一层的nms的值
  optional float first_stage_nms_score_threshold = 13 [default=0.0];

// 第一层中重叠率的阈值
  optional float first_stage_nms_iou_threshold = 14 [default=0.7];

// 多少个rpn提出的nms进入第二阶段
  optional int32 first_stage_max_proposals = 15 [default=300];

  // 第一阶段的定位的损失权重在反向中的比例
  optional float first_stage_localization_loss_weight = 16 [default=1.0];

// 第一阶段总物品判断的损失比例
  optional float first_stage_objectness_loss_weight = 17 [default=1.0];

// 在特征图上进行crop，这里是crop的参数。如果r-fcn的模型是从其他地方恢复得到的则之后的参数是
// 应当被忽略的

// ROI 池化层上切取的输出的大小
  optional int32 initial_crop_size = 18;

// 在ROI pooling上切取的feature map上进行池化是的核的大小
  optional int32 maxpool_kernel_size = 19;

// 上面的有了核的大小，现在是步长
  optional int32 maxpool_stride = 20;

  //第二个识别阶段的参数

  // 用于box预测的超参数。如果predictor的类型定义为rfcn_box_predictor，
 // R-FCN的模型将会重构，另一方面，Faster R-CNN 的模型也将重构
  optional BoxPredictor second_stage_box_predictor  = 21;

//第二个阶段的批次，如果定一个`hard_example_miner`，则这个可以被忽略
  optional int32 second_stage_batch_size = 22 [default=64];

// 每一张图片的正样本的比例
  optional float second_stage_balance_fraction = 23 [default=0.25];

//在第二阶段的box的预测上的预处理，注意，
  // Note: the `score_converter` provided to the FasterRCNNMetaArch constructor
  // is taken from this `second_stage_post_processing` proto.
  optional PostProcessing second_stage_post_processing = 24;

  // 第二阶段定位的损失比例
  optional float second_stage_localization_loss_weight = 25 [default=1.0];

  // 第二阶段的识别的损失比例
  optional float second_stage_classification_loss_weight = 26 [default=1.0];

// 如果不是默认的，则执行难例挖掘
  optional HardExampleMiner hard_example_miner = 27;
}
// FasterRcnnFeatureExtractor的定义
message FasterRcnnFeatureExtractor {
// 使用那种模型，比如：'faster_rcnn_resnet101';具体的在models/model_builder.py中查看
  optional string type = 1;

  // Output stride of extracted RPN feature map.
// rpn特征图的输出
  optional int32 first_stage_features_stride = 2 [default=16];
}

faster_rcnn.proto中有多个文件，接下来一次讲述。
object_detection/protos/anchor_generator.proto

import "object_detection/protos/grid_anchor_generator.proto";
import "object_detection/protos/ssd_anchor_generator.proto";

// 二选一，我们用的当然是GridAnchorGenerator
message AnchorGenerator {
  oneof anchor_generator_oneof {
    GridAnchorGenerator grid_anchor_generator = 1;
    SsdAnchorGenerator ssd_anchor_generator = 2;
  }
}

继续看object_detection/protos/grid_anchor_generator.proto

package object_detection.protos;

// 怎么配置GridAnchorGenerator可以在
// anchor_generators/grid_anchor_generator.py中找到答案
message GridAnchorGenerator {
   // 锚点的高度
  optional int32 height = 1 [default = 256];

//锚点的宽度
  optional int32 width = 2 [default = 256];

// 锚点在高度上的步长
  optional int32 height_stride = 3 [default = 16];

// 锚点在高度上的步长
  optional int32 width_stride = 4 [default = 16];

  // Anchor height offset in pixels.
  optional int32 height_offset = 5 [default = 0];

  // Anchor width offset in pixels.
  optional int32 width_offset = 6 [default = 0];

  // At any given location, len(scales) * len(aspect_ratios) anchors are
  // generated with all possible combinations of scales and aspect ratios.
//一共生成len(scales) * len(aspect_ratios)多个锚点。
  // List of scales for the anchors.
// 一系列锚点的尺度
  repeated float scales = 7;

  // List of aspect ratios for the anchors.
  repeated float aspect_ratios = 8;
}

box预测的box_predictor.proto，

只看和r-fcn有关的。
import "object_detection/protos/hyperparams.proto";

message RfcnBoxPredictor {
// 在box预测中使用的卷积的参数
  optional Hyperparams conv_hyperparams = 1;

// RFCN的corp的bin的大小
  optional int32 num_spatial_bins_height = 2 [default = 3];

  optional int32 num_spatial_bins_width = 3 [default = 3];

  // Target depth to reduce the input image features to.
  optional int32 depth = 4 [default=1024];

  // Size of the encoding for the boxes.
  optional int32 box_code_size = 5 [default = 4];

// 将rfcn的crop归一化到多大
  optional int32 crop_height = 6 [default= 12];

  optional int32 crop_width = 7 [default=12];
}

接下来是object_detection/protos/hyperparams.proto

syntax = "proto2";
package object_detection.protos;
message Hyperparams {


// 可选的参数
  enum Op {
    // Convolution, Separable Convolution, Convolution transpose.
    CONV = 1;
    // Fully connected
    FC = 2;
  }
  optional Op op = 1 [default = CONV];

// 对于卷积中的权重如何进行归一化
  optional Regularizer regularizer = 2;

//初始化权重的方式
  optional Initializer initializer = 3;

  // 你要用那种激活函数
  enum Activation {
    // Use None (no activation)
    NONE = 0;

    // Use tf.nn.relu
    RELU = 1;

    // Use tf.nn.relu6
    RELU_6 = 2;
  }
  optional Activation activation = 4 [default = RELU];

// BN的超参数，如果没有就是不用BN
  optional BatchNorm batch_norm = 5;
}

//选择一种归一化方法
message Regularizer {
  oneof regularizer_oneof {
    L1Regularizer l1_regularizer = 1;
    L2Regularizer l2_regularizer = 2;
  }
}

// Configuration proto for L1 Regularizer.
// See https://www.tensorflow.org/api_docs/python/tf/contrib/layers/l1_regularizer
message L1Regularizer {
  optional float weight = 1 [default = 1.0];
}

// Configuration proto for L2 Regularizer.
// See https://www.tensorflow.org/api_docs/python/tf/contrib/layers/l2_regularizer
message L2Regularizer {
  optional float weight = 1 [default = 1.0];
}

// 选择一种初始化方法
message Initializer {
  oneof initializer_oneof {
    TruncatedNormalInitializer truncated_normal_initializer = 1;
    VarianceScalingInitializer variance_scaling_initializer = 2;
  }
}

// Configuration proto for truncated normal initializer. See
// https://www.tensorflow.org/api_docs/python/tf/truncated_normal_initializer
message TruncatedNormalInitializer {
  optional float mean = 1 [default = 0.0];
  optional float stddev = 2 [default = 1.0];
}

// Configuration proto for variance scaling initializer. See
// https://www.tensorflow.org/api_docs/python/tf/contrib/layers/
// variance_scaling_initializer
message VarianceScalingInitializer {
  optional float factor = 1 [default = 2.0];
  optional bool uniform = 2 [default = false];
  enum Mode {
    FAN_IN = 0;
    FAN_OUT = 1;
    FAN_AVG = 2;
  }
  optional Mode mode = 3 [default = FAN_IN];
}

// BN的参数选择看https://www.tensorflow.org/api_docs/python/tf/contrib/layers/batch_norm
message BatchNorm {
  optional float decay = 1 [default = 0.999];
  optional bool center = 2 [default = true];
  optional bool scale = 3 [default = false];
  optional float epsilon = 4 [default = 0.001];
  // Whether to train the batch norm variables. If this is set to false during
  // training, the current value of the batch_norm variables are used for
  // forward pass but they are never updated.
  optional bool train = 5 [default = true];
}

图像归一化
见object_detection/protos/image_resizer.proto

syntax = "proto2";
package object_detection.protos;

//配置进行那种归一化
message ImageResizer {
  oneof image_resizer_oneof {
    KeepAspectRatioResizer keep_aspect_ratio_resizer = 1;
    FixedShapeResizer fixed_shape_resizer = 2;
  }
}

// 保持图像比例不发生变化
message KeepAspectRatioResizer {
  // Desired size of the smaller image dimension in pixels.
  optional int32 min_dimension = 1 [default = 600];

  // Desired size of the larger image dimension in pixels.
  optional int32 max_dimension = 2 [default = 1024];
}


// Configuration proto for image resizer that resizes to a fixed shape.
message FixedShapeResizer {
  // Desired height of image in pixels.
  optional int32 height = 1 [default = 300];

  // Desired width of image in pixels.
  optional int32 width = 2 [default = 300];
}

后处理：object_detection/protos/post_processing.proto

syntax = "proto2";
package object_detection.protos;
.
// 为检测中的一个批次配置nms
message BatchNonMaxSuppression {
// 前景box的最低阈值
  optional float score_threshold = 1 [default = 0.0];

// 进行融合是的IOU阈值
  optional float iou_threshold = 2 [default = 0.6];

// 每一个类别最多返回多少个检测
  optional int32 max_detections_per_class = 3 [default = 100];

// 总共最多返回多少个
  optional int32 max_total_detections = 5 [default = 100];
}

// 后处理的配置
message PostProcessing {
  // nms的参数
  optional BatchNonMaxSuppression batch_non_max_suppression = 1;

  // Enum to specify how to convert the detection scores.
  enum ScoreConverter {
    // Input scores equals output scores.
    IDENTITY = 0;

    // Applies a sigmoid on input scores.
    SIGMOID = 1;

    // Applies a softmax on input scores
    SOFTMAX = 2;
  }

  // Score converter to use.
  optional ScoreConverter score_converter = 2 [default = IDENTITY];
}

faster r-cnn中的具体细节暂不叙述，接下来看TrainConfig。该文件位于object_detection/protos/train.proto

syntax = "proto2";
package object_detection.protos;
import "object_detection/protos/optimizer.proto";
import "object_detection/protos/preprocessor.proto";
// 提供用于配置 DetectionModel training jobs (train.py)的信息.
message TrainConfig {
  // 输入的批次信息
  optional uint32 batch_size = 1 [default=32];

  // 数据扩容的选项
  repeated PreprocessingStep data_augmentation_options = 2;

  // Whether to synchronize replicas during training.
  optional bool sync_replicas = 3 [default=false];

// 生成保存点的频率
  optional uint32 keep_checkpoint_every_n_hours = 4 [default=1000];

// 进行训练时的优化策略
  optional Optimizer optimizer = 5;

  // If greater than 0, clips gradients by this value.
// 如果大于0，在将梯度在这个值进行截断
  optional float gradient_clipping_by_norm = 6 [default=0.0];

// 从哪里进行参数恢复
  optional string fine_tune_checkpoint = 7 [default=""];


// 如果是从一个检测（detection）模型进行恢复，则分类
// 数量必须相等，如果不定则检查点要从一个识别（classification）模型恢复
  optional bool from_detection_checkpoint = 8 [default=false];

// 总共训练检测模型的次数，如果为0，则一直进行训练
  optional uint32 num_steps = 9 [default=0];

  // Number of training steps between replica startup.
//如果 sync_replicas为true则该值为0，
  optional float startup_delay_steps = 10 [default=15];

  // If greater than 0, multiplies the gradient of bias variables by this
  // amount.
  optional float bias_grad_multiplier = 11 [default=0];

  // Variables that should not be updated during training.
// 训练期间参数不更新
  repeated string freeze_variables = 12;

  // Number of replicas to aggregate before making parameter updates.
  optional int32 replicas_to_aggregate = 13 [default=1];

  // Maximum number of elements to store within a queue.
  optional int32 batch_queue_capacity = 14 [default=600];

  // Number of threads to use for batching.
  optional int32 num_batch_queue_threads = 15 [default=8];

  // Maximum capacity of the queue used to prefetch assembled batches.
  optional int32 prefetch_queue_capacity = 16 [default=10];
}

在之后就是InputReader，来自文件object_detection/protos/input_reader.proto

syntax = "proto2";

package object_detection.protos;
// 这里定义了如何生成Object Detection的输入数据。
//以输入为例， Input readers期望产生一个tensors的dict。其格式如下：
//
// 'image': an [image_height, image_width, channels] image tensor that detection
//    will be run on.
// 'groundtruth_classes': 物体的类型 a [num_boxes] int32 tensor storing the class
//    labels of detected boxes in the image.
// 'groundtruth_boxes': 物体的box a [num_boxes, 4] float tensor storing the coordinates of
//    detected boxes in the image.
// 'groundtruth_instance_masks': 掩膜(Optional), a [num_boxes, image_height,
//    image_width] float tensor storing binary mask of the objects in boxes.

message InputReader {
// 保存了StringIntLabelMap 的存储位置，就是名称对应id。
  optional string label_map_path = 1 [default=""];

// 是否进行顺序打乱
  optional bool shuffle = 2 [default=true];

// 队列中最多保持多少记录
  optional uint32 queue_capacity = 3 [default=2000];

// 数据读取队列中需要保持的最小的records数量，大的话能够更好的进行打乱顺序。
  optional uint32 min_after_dequeue = 4 [default=1000];

// 多久从数据源进行一次数据读取，如果为0，表示一直进行读取。
  optional uint32 num_epochs = 5 [default=0];

// 创建多少个数据读取器。
  optional uint32 num_readers = 6 [default=8];

// 是否加载mask信息
  optional bool load_instance_masks = 7 [default = false];

//你从读数据的类型
  oneof input_reader {
    TFRecordInputReader tf_record_input_reader = 8;
    ExternalInputReader external_input_reader = 9;
  }
}

// An input reader that reads TF Example protos from local TFRecord files.
// 输入存放位置
message TFRecordInputReader {
  // Path to TFRecordFile.
  optional string input_path = 1 [default=""];
}

// An externally defined input reader. Users may define an extension to this
// proto to interface their own input readers.
message ExternalInputReader {
  extensions 1 to 999;
}

测试的配置，EvalConfig，来自文件object_detection/protos/eval.proto

syntax = "proto2";
package object_detection.protos;
//给模型评价 (eval.py).的配置信息
message EvalConfig {
// 生成多少个可视化的图片
  optional uint32 num_visualizations = 1 [default=10];
// 多少个图片进行测试
  optional uint32 num_examples = 2 [default=5000];

  // How often to run evaluation.
// 多久运行一次测试
  optional uint32 eval_interval_secs = 3 [default=300];

// 运行测试的最大次数，如果给0，则表示将会一直运行
  optional uint32 max_evals = 4 [default=0];

// 在使用TensorFlow graph 进行测试的时候是否保存到磁盘上
  optional bool save_graph = 5 [default=false];

// 在那里保存可视化的结果，如果为空，则不保存
  optional string visualization_export_dir = 6 [default=""];

  // BNS name of the TensorFlow master.
  optional string eval_master = 7 [default=""];

// 用于测评的指标，当前支持Pascal VOC的检测标准
  optional string metrics_set = 8 [default="pascal_voc_metrics"];

// 将coco的检测导出使之为JSON格式
  optional string export_path = 9 [default=''];

  // Option to not read groundtruth labels and only export detections to
  // COCO-compatible JSON file.
// 是否读取前景标签的选项，
  optional bool ignore_groundtruth = 10 [default=false];

  // Use exponential moving averages of variables for evaluation.
  // TODO: When this is false make sure the model is constructed
  // without moving averages in restore_fn.
  optional bool use_moving_averages = 11 [default=false];

// 是否对mask进行评价
  optional bool eval_instance_masks = 12 [default=false];
}

tensorflow objection detection m

猜你喜欢

热点阅读