基于tensorflow的实时物体识别

2017-09-03 本文已影响1512人斯坦因和他的狗

google开源了基于深度学习的物体识别模型和python API。

模型 Tensorflow detection model zoo ：不同的模型在效率与准确性上有区别，训练数据集市微软的COCO
python api： Tensorflow Object Detection API
Tensorflow Object Detection API 效果图片
google的api是用于图片物体识别的，但是只需要做三项修改就可以完成实时物体检测。更详细请参考 Dat Tran的文章

API结构微调；
多线程，读取视频流；
多进程，加载物体识别模型；

API结构微调

import os
import cv2
import numpy as np
import multiprocessing
from multiprocessing import Queue, Pool

# tensorflow api 接口相关函数
import tensorflow as tf
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util

# 模型路径
PATH_TO_CKPT = '../object_detection/ssd_mobilenet_v1_coco_11_06_2017/frozen_inference_graph.pb')

# label字典路径，用于识别出物品后展示类别名
PATH_TO_LABELS = '../object_detection/data/mscoco_label_map.pbtxt'
NUM_CLASSES = 90 # 最大分类数量
label_map = label_map_util.load_labelmap(PATH_TO_LABELS) # 获得类别字典
categories = label_map_util.convert_label_map_to_categories(
                                  label_map, 
                                  max_num_classes=NUM_CLASSES,
                                  use_display_name=True)
category_index = label_map_util.create_category_index(categories)

# 物体识别神经网络，向前传播获得识别结果
def detect_objects(image_np, sess, detection_graph):
    # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
    image_np_expanded = np.expand_dims(image_np, axis=0)
    image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')

    # Each box represents a part of the image where a particular object was detected.
    boxes = detection_graph.get_tensor_by_name('detection_boxes:0')

    # Each score represent how level of confidence for each of the objects.
    # Score is shown on the result image, together with the class label.
    scores = detection_graph.get_tensor_by_name('detection_scores:0')
    classes = detection_graph.get_tensor_by_name('detection_classes:0')
    num_detections = detection_graph.get_tensor_by_name('num_detections:0')

    # Actual detection.
    (boxes, scores, classes, num_detections) = sess.run(
        [boxes, scores, classes, num_detections],
        feed_dict={image_tensor: image_np_expanded})

    # Visualization of the results of a detection.
    vis_util.visualize_boxes_and_labels_on_image_array(
        image_np,
        np.squeeze(boxes),
        np.squeeze(classes).astype(np.int32),
        np.squeeze(scores),
        category_index,
        use_normalized_coordinates=True,
        line_thickness=3)
    return image_np

多线程，读取视频流

import cv2
from threading import Thread

# 多线程，高效读视频
class WebcamVideoStream:
    def __init__(self, src, width, height):
        # initialize the video camera stream and read the first frame
        # from the stream
        self.stream = cv2.VideoCapture(src)
        self.stream.set(cv2.CAP_PROP_FRAME_WIDTH, width)
        self.stream.set(cv2.CAP_PROP_FRAME_HEIGHT, height)
        (self.grabbed, self.frame) = self.stream.read()

        # initialize the variable used to indicate if the thread should
        # be stopped
        self.stopped = False

    def start(self):
        # start the thread to read frames from the video stream
        Thread(target=self.update, args=()).start()
        return self

    def update(self):
        # keep looping infinitely until the thread is stopped
        while True:
            # if the thread indicator variable is set, stop the thread
            if self.stopped:
                return

            # otherwise, read the next frame from the stream
            (self.grabbed, self.frame) = self.stream.read()

    def read(self):
        # return the frame most recently read
        return self.frame

    def stop(self):
        # indicate that the thread should be stopped
        self.stopped = True

# 使用方法
video_capture = WebcamVideoStream(src=video_source,
                                      width=width,
                                      height=height).start()
frame = video_capture.read()

多进程，加载物体识别模型

配置参数

 class configs(object):
     def __init__(self):
         self.num_workers = 2 # worker数量
         self.queue_size = 5  # 多进程，输入输出，队列长度
         self.video_source = 0 # 0代表从摄像头读取视频流
         self.width = 720 # 图片宽
         self.height = 490 # 图片高
 args = configs()

定义用于多进程执行的函数word，每个进程执行work函数，都会加载一次模型

def worker(input_q, output_q):
    detection_graph = tf.Graph()
    with detection_graph.as_default(): # 加载模型
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')
        sess = tf.Session(graph=detection_graph)

    while True: # 全局变量input_q与output_q定义，请看下文
        frame = input_q.get() # 从多进程输入队列，取值
        output_q.put(detect_objects(frame, sess, detection_graph)) # detect_objects函数 返回一张图片，标记所有被发现的物品
    sess.close()

多进程 Queue 文档 (Exchanging objects between processes)

import multiprocessing
input_q = Queue(maxsize=args.queue_size) # 多进程输入队列
output_q = Queue(maxsize=args.queue_size) # 多进程输出队列
pool = Pool(args.num_workers, worker, (input_q, output_q)) # 多进程加载模型

video_capture = WebcamVideoStream(src=args.video_source,
                                  width=args.width,
                                  height=args.height).start()

while True: 
    frame = video_capture.read() # video_capture多线程读取视频流
    input_q.put(frame) # 视频帧放入多进程输入队列
    frame = output_q.get() # 多进程输出队列取出标记好物体的图片

    cv2.imshow('Video', frame) # 展示已标记物体的图片
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

pool.terminate() # 关闭多进程
video_capture.stop() # 关闭视频流
cv2.destroyAllWindows() # opencv窗口关闭

简单测试

基于tensorflow的实时物体识别

API结构微调

多线程，读取视频流

多进程，加载物体识别模型

猜你喜欢

热点阅读