tensorflow objection detection m
2017-10-01 本文已影响0人
永远学习中
tensorflow 的 objection detection model API的底层是protobuf,所以先看proto文件。
本博文实现的是r-fcn模型。
r-fcn模型基于faster r-cnn改进得到。
参照object_detection/samples/configs中的rfcn_resnet101_pets.config。
网络的配置常见形式为:
model {
(... 模型的配置)
}
train_config : {
(... 训练的配置,校复杂...)
}
train_input_reader: {
(... 训练输入数据的配置..)
}
eval_config: {
(... 测试的配置,较简单...)
}
eval_input_reader: {
(... 测试输入数据的配置...)
}
配置的源头是:
pipeline.proto文件
syntax = "proto2";
package object_detection.protos;
import "object_detection/protos/eval.proto";
import "object_detection/protos/input_reader.proto";
import "object_detection/protos/model.proto";
import "object_detection/protos/train.proto";
//可以看出eval_input_reader和train_input_reader的格式相同。其他的具体的要在进入对应的配置文件中查看。
message TrainEvalPipelineConfig {
optional DetectionModel model = 1;
optional TrainConfig train_config = 2;
optional InputReader train_input_reader = 3;
optional EvalConfig eval_config = 4;
optional InputReader eval_input_reader = 5;
}
首先看model.proto
package object_detection.protos;
import "object_detection/protos/faster_rcnn.proto";
import "object_detection/protos/ssd.proto";
// 你要的检测层是FasterRcnn还是ssd,我们使用的是FasterRcnn,看faster_rcnn.proto
message DetectionModel {
oneof model {
FasterRcnn faster_rcnn = 1;
Ssd ssd = 2;
}
}
在看faster_rcnn.proto
syntax = "proto2";
package object_detection.protos;
//可以看出FasterRcnn中有的几个核心模块
import "object_detection/protos/anchor_generator.proto";
import "object_detection/protos/box_predictor.proto";
import "object_detection/protos/hyperparams.proto";
import "object_detection/protos/image_resizer.proto";
import "object_detection/protos/losses.proto";
import "object_detection/protos/post_processing.proto";
//要看如何配置Faster R-CNN models.
// 具体细节看 meta_architectures/faster_rcnn_meta_arch.py 和 models/model_builder.py
//
// Naming conventions:
// Faster R-CNN models 有两个阶段: 因此我们使用了
// `first_stage_` and `second_stage_` 去表示这两个阶段
message FasterRcnn {
// 是否仅有第一个阶段
optional bool first_stage_only = 1 [default=false];
//要预测多少个分类
optional int32 num_classes = 3;
// 图像进行归一化用于对输入故乡进行前处理
optional ImageResizer image_resizer = 4;
// 特征提取的配置
optional FasterRcnnFeatureExtractor feature_extractor = 5;
// rpn的参数配置
optional AnchorGenerator first_stage_anchor_generator = 6;
// `first_stage_features_to_crop` tensor包含了box的预测
optional int32 first_stage_atrous_rate = 7 [default=1];
// 用于进行rpn box的预测的超参数
optional Hyperparams first_stage_box_predictor_conv_hyperparams = 8;
// Kernel size to use for the convolution op just prior to RPN box
// 用于进行rpn box预测的卷积核的大小
optional int32 first_stage_box_predictor_kernel_size = 9 [default=3];
// 输出的rpn 预测的卷积层的深度
optional int32 first_stage_box_predictor_depth = 10 [default=512];
// 在第一阶段中用于计算物品位置的图像进入批次
optional int32 first_stage_minibatch_size = 11 [default=256];
// 每一张图片中对于rpn大概有多少是正的?
optional float first_stage_positive_balance_fraction = 12 [default=0.5];
// 用于第一层的nms的值
optional float first_stage_nms_score_threshold = 13 [default=0.0];
// 第一层中重叠率的阈值
optional float first_stage_nms_iou_threshold = 14 [default=0.7];
// 多少个rpn提出的nms进入第二阶段
optional int32 first_stage_max_proposals = 15 [default=300];
// 第一阶段的定位的损失权重在反向中的比例
optional float first_stage_localization_loss_weight = 16 [default=1.0];
// 第一阶段总物品判断的损失比例
optional float first_stage_objectness_loss_weight = 17 [default=1.0];
// 在特征图上进行crop,这里是crop的参数。如果r-fcn的模型是从其他地方恢复得到的则之后的参数是
// 应当被忽略的
// ROI 池化层上切取的输出的大小
optional int32 initial_crop_size = 18;
// 在ROI pooling上切取的feature map上进行池化是的核的大小
optional int32 maxpool_kernel_size = 19;
// 上面的有了核的大小,现在是步长
optional int32 maxpool_stride = 20;
//第二个识别阶段的参数
// 用于box预测的超参数。如果predictor的类型定义为rfcn_box_predictor,
// R-FCN的模型将会重构,另一方面,Faster R-CNN 的模型也将重构
optional BoxPredictor second_stage_box_predictor = 21;
//第二个阶段的批次,如果定一个`hard_example_miner`,则这个可以被忽略
optional int32 second_stage_batch_size = 22 [default=64];
// 每一张图片的正样本的比例
optional float second_stage_balance_fraction = 23 [default=0.25];
//在第二阶段的box的预测上的预处理,注意,
// Note: the `score_converter` provided to the FasterRCNNMetaArch constructor
// is taken from this `second_stage_post_processing` proto.
optional PostProcessing second_stage_post_processing = 24;
// 第二阶段定位的损失比例
optional float second_stage_localization_loss_weight = 25 [default=1.0];
// 第二阶段的识别的损失比例
optional float second_stage_classification_loss_weight = 26 [default=1.0];
// 如果不是默认的,则执行难例挖掘
optional HardExampleMiner hard_example_miner = 27;
}
// FasterRcnnFeatureExtractor的定义
message FasterRcnnFeatureExtractor {
// 使用那种模型,比如:'faster_rcnn_resnet101';具体的在models/model_builder.py中查看
optional string type = 1;
// Output stride of extracted RPN feature map.
// rpn特征图的输出
optional int32 first_stage_features_stride = 2 [default=16];
}
faster_rcnn.proto中有多个文件,接下来一次讲述。
object_detection/protos/anchor_generator.proto
import "object_detection/protos/grid_anchor_generator.proto";
import "object_detection/protos/ssd_anchor_generator.proto";
// 二选一,我们用的当然是GridAnchorGenerator
message AnchorGenerator {
oneof anchor_generator_oneof {
GridAnchorGenerator grid_anchor_generator = 1;
SsdAnchorGenerator ssd_anchor_generator = 2;
}
}
继续看object_detection/protos/grid_anchor_generator.proto
package object_detection.protos;
// 怎么配置GridAnchorGenerator可以在
// anchor_generators/grid_anchor_generator.py中找到答案
message GridAnchorGenerator {
// 锚点的高度
optional int32 height = 1 [default = 256];
//锚点的宽度
optional int32 width = 2 [default = 256];
// 锚点在高度上的步长
optional int32 height_stride = 3 [default = 16];
// 锚点在高度上的步长
optional int32 width_stride = 4 [default = 16];
// Anchor height offset in pixels.
optional int32 height_offset = 5 [default = 0];
// Anchor width offset in pixels.
optional int32 width_offset = 6 [default = 0];
// At any given location, len(scales) * len(aspect_ratios) anchors are
// generated with all possible combinations of scales and aspect ratios.
//一共生成len(scales) * len(aspect_ratios)多个锚点。
// List of scales for the anchors.
// 一系列锚点的尺度
repeated float scales = 7;
// List of aspect ratios for the anchors.
repeated float aspect_ratios = 8;
}
box预测的box_predictor.proto,
只看和r-fcn有关的。
import "object_detection/protos/hyperparams.proto";
message RfcnBoxPredictor {
// 在box预测中使用的卷积的参数
optional Hyperparams conv_hyperparams = 1;
// RFCN的corp的bin的大小
optional int32 num_spatial_bins_height = 2 [default = 3];
optional int32 num_spatial_bins_width = 3 [default = 3];
// Target depth to reduce the input image features to.
optional int32 depth = 4 [default=1024];
// Size of the encoding for the boxes.
optional int32 box_code_size = 5 [default = 4];
// 将rfcn的crop归一化到多大
optional int32 crop_height = 6 [default= 12];
optional int32 crop_width = 7 [default=12];
}
接下来是object_detection/protos/hyperparams.proto
syntax = "proto2";
package object_detection.protos;
message Hyperparams {
// 可选的参数
enum Op {
// Convolution, Separable Convolution, Convolution transpose.
CONV = 1;
// Fully connected
FC = 2;
}
optional Op op = 1 [default = CONV];
// 对于卷积中的权重如何进行归一化
optional Regularizer regularizer = 2;
//初始化权重的方式
optional Initializer initializer = 3;
// 你要用那种激活函数
enum Activation {
// Use None (no activation)
NONE = 0;
// Use tf.nn.relu
RELU = 1;
// Use tf.nn.relu6
RELU_6 = 2;
}
optional Activation activation = 4 [default = RELU];
// BN的超参数,如果没有就是不用BN
optional BatchNorm batch_norm = 5;
}
//选择一种归一化方法
message Regularizer {
oneof regularizer_oneof {
L1Regularizer l1_regularizer = 1;
L2Regularizer l2_regularizer = 2;
}
}
// Configuration proto for L1 Regularizer.
// See https://www.tensorflow.org/api_docs/python/tf/contrib/layers/l1_regularizer
message L1Regularizer {
optional float weight = 1 [default = 1.0];
}
// Configuration proto for L2 Regularizer.
// See https://www.tensorflow.org/api_docs/python/tf/contrib/layers/l2_regularizer
message L2Regularizer {
optional float weight = 1 [default = 1.0];
}
// 选择一种初始化方法
message Initializer {
oneof initializer_oneof {
TruncatedNormalInitializer truncated_normal_initializer = 1;
VarianceScalingInitializer variance_scaling_initializer = 2;
}
}
// Configuration proto for truncated normal initializer. See
// https://www.tensorflow.org/api_docs/python/tf/truncated_normal_initializer
message TruncatedNormalInitializer {
optional float mean = 1 [default = 0.0];
optional float stddev = 2 [default = 1.0];
}
// Configuration proto for variance scaling initializer. See
// https://www.tensorflow.org/api_docs/python/tf/contrib/layers/
// variance_scaling_initializer
message VarianceScalingInitializer {
optional float factor = 1 [default = 2.0];
optional bool uniform = 2 [default = false];
enum Mode {
FAN_IN = 0;
FAN_OUT = 1;
FAN_AVG = 2;
}
optional Mode mode = 3 [default = FAN_IN];
}
// BN的参数选择看https://www.tensorflow.org/api_docs/python/tf/contrib/layers/batch_norm
message BatchNorm {
optional float decay = 1 [default = 0.999];
optional bool center = 2 [default = true];
optional bool scale = 3 [default = false];
optional float epsilon = 4 [default = 0.001];
// Whether to train the batch norm variables. If this is set to false during
// training, the current value of the batch_norm variables are used for
// forward pass but they are never updated.
optional bool train = 5 [default = true];
}
图像归一化
见object_detection/protos/image_resizer.proto
syntax = "proto2";
package object_detection.protos;
//配置进行那种归一化
message ImageResizer {
oneof image_resizer_oneof {
KeepAspectRatioResizer keep_aspect_ratio_resizer = 1;
FixedShapeResizer fixed_shape_resizer = 2;
}
}
// 保持图像比例不发生变化
message KeepAspectRatioResizer {
// Desired size of the smaller image dimension in pixels.
optional int32 min_dimension = 1 [default = 600];
// Desired size of the larger image dimension in pixels.
optional int32 max_dimension = 2 [default = 1024];
}
// Configuration proto for image resizer that resizes to a fixed shape.
message FixedShapeResizer {
// Desired height of image in pixels.
optional int32 height = 1 [default = 300];
// Desired width of image in pixels.
optional int32 width = 2 [default = 300];
}
后处理:object_detection/protos/post_processing.proto
syntax = "proto2";
package object_detection.protos;
.
// 为检测中的一个批次配置nms
message BatchNonMaxSuppression {
// 前景box的最低阈值
optional float score_threshold = 1 [default = 0.0];
// 进行融合是的IOU阈值
optional float iou_threshold = 2 [default = 0.6];
// 每一个类别最多返回多少个检测
optional int32 max_detections_per_class = 3 [default = 100];
// 总共最多返回多少个
optional int32 max_total_detections = 5 [default = 100];
}
// 后处理的配置
message PostProcessing {
// nms的参数
optional BatchNonMaxSuppression batch_non_max_suppression = 1;
// Enum to specify how to convert the detection scores.
enum ScoreConverter {
// Input scores equals output scores.
IDENTITY = 0;
// Applies a sigmoid on input scores.
SIGMOID = 1;
// Applies a softmax on input scores
SOFTMAX = 2;
}
// Score converter to use.
optional ScoreConverter score_converter = 2 [default = IDENTITY];
}
faster r-cnn中的具体细节暂不叙述,接下来看TrainConfig。该文件位于object_detection/protos/train.proto
syntax = "proto2";
package object_detection.protos;
import "object_detection/protos/optimizer.proto";
import "object_detection/protos/preprocessor.proto";
// 提供用于配置 DetectionModel training jobs (train.py)的信息.
message TrainConfig {
// 输入的批次信息
optional uint32 batch_size = 1 [default=32];
// 数据扩容的选项
repeated PreprocessingStep data_augmentation_options = 2;
// Whether to synchronize replicas during training.
optional bool sync_replicas = 3 [default=false];
// 生成保存点的频率
optional uint32 keep_checkpoint_every_n_hours = 4 [default=1000];
// 进行训练时的优化策略
optional Optimizer optimizer = 5;
// If greater than 0, clips gradients by this value.
// 如果大于0,在将梯度在这个值进行截断
optional float gradient_clipping_by_norm = 6 [default=0.0];
// 从哪里进行参数恢复
optional string fine_tune_checkpoint = 7 [default=""];
// 如果是从一个检测(detection)模型进行恢复,则分类
// 数量必须相等,如果不定则检查点要从一个识别(classification)模型恢复
optional bool from_detection_checkpoint = 8 [default=false];
// 总共训练检测模型的次数,如果为0,则一直进行训练
optional uint32 num_steps = 9 [default=0];
// Number of training steps between replica startup.
//如果 sync_replicas为true则该值为0,
optional float startup_delay_steps = 10 [default=15];
// If greater than 0, multiplies the gradient of bias variables by this
// amount.
optional float bias_grad_multiplier = 11 [default=0];
// Variables that should not be updated during training.
// 训练期间参数不更新
repeated string freeze_variables = 12;
// Number of replicas to aggregate before making parameter updates.
optional int32 replicas_to_aggregate = 13 [default=1];
// Maximum number of elements to store within a queue.
optional int32 batch_queue_capacity = 14 [default=600];
// Number of threads to use for batching.
optional int32 num_batch_queue_threads = 15 [default=8];
// Maximum capacity of the queue used to prefetch assembled batches.
optional int32 prefetch_queue_capacity = 16 [default=10];
}
在之后就是InputReader,来自文件object_detection/protos/input_reader.proto
syntax = "proto2";
package object_detection.protos;
// 这里定义了如何生成Object Detection的输入数据。
//以输入为例, Input readers期望产生一个tensors的dict。其格式如下:
//
// 'image': an [image_height, image_width, channels] image tensor that detection
// will be run on.
// 'groundtruth_classes': 物体的类型 a [num_boxes] int32 tensor storing the class
// labels of detected boxes in the image.
// 'groundtruth_boxes': 物体的box a [num_boxes, 4] float tensor storing the coordinates of
// detected boxes in the image.
// 'groundtruth_instance_masks': 掩膜(Optional), a [num_boxes, image_height,
// image_width] float tensor storing binary mask of the objects in boxes.
message InputReader {
// 保存了StringIntLabelMap 的存储位置,就是名称对应id。
optional string label_map_path = 1 [default=""];
// 是否进行顺序打乱
optional bool shuffle = 2 [default=true];
// 队列中最多保持多少记录
optional uint32 queue_capacity = 3 [default=2000];
// 数据读取队列中需要保持的最小的records数量,大的话能够更好的进行打乱顺序。
optional uint32 min_after_dequeue = 4 [default=1000];
// 多久从数据源进行一次数据读取,如果为0,表示一直进行读取。
optional uint32 num_epochs = 5 [default=0];
// 创建多少个数据读取器。
optional uint32 num_readers = 6 [default=8];
// 是否加载mask信息
optional bool load_instance_masks = 7 [default = false];
//你从读数据的类型
oneof input_reader {
TFRecordInputReader tf_record_input_reader = 8;
ExternalInputReader external_input_reader = 9;
}
}
// An input reader that reads TF Example protos from local TFRecord files.
// 输入存放位置
message TFRecordInputReader {
// Path to TFRecordFile.
optional string input_path = 1 [default=""];
}
// An externally defined input reader. Users may define an extension to this
// proto to interface their own input readers.
message ExternalInputReader {
extensions 1 to 999;
}
测试的配置,EvalConfig,来自文件object_detection/protos/eval.proto
syntax = "proto2";
package object_detection.protos;
//给模型评价 (eval.py).的配置信息
message EvalConfig {
// 生成多少个可视化的图片
optional uint32 num_visualizations = 1 [default=10];
// 多少个图片进行测试
optional uint32 num_examples = 2 [default=5000];
// How often to run evaluation.
// 多久运行一次测试
optional uint32 eval_interval_secs = 3 [default=300];
// 运行测试的最大次数,如果给0,则表示将会一直运行
optional uint32 max_evals = 4 [default=0];
// 在使用TensorFlow graph 进行测试的时候是否保存到磁盘上
optional bool save_graph = 5 [default=false];
// 在那里保存可视化的结果,如果为空,则不保存
optional string visualization_export_dir = 6 [default=""];
// BNS name of the TensorFlow master.
optional string eval_master = 7 [default=""];
// 用于测评的指标,当前支持Pascal VOC的检测标准
optional string metrics_set = 8 [default="pascal_voc_metrics"];
// 将coco的检测导出使之为JSON格式
optional string export_path = 9 [default=''];
// Option to not read groundtruth labels and only export detections to
// COCO-compatible JSON file.
// 是否读取前景标签的选项,
optional bool ignore_groundtruth = 10 [default=false];
// Use exponential moving averages of variables for evaluation.
// TODO: When this is false make sure the model is constructed
// without moving averages in restore_fn.
optional bool use_moving_averages = 11 [default=false];
// 是否对mask进行评价
optional bool eval_instance_masks = 12 [default=false];
}