TensorRT Python验证代码---分割类

2023-08-21  本文已影响0人  教训小磊
#encoding=gbk
import tensorrt as trt
import numpy as np
import os
import cv2
import pycuda.driver as cuda
import pycuda.autoinit


class HostDeviceMem(object):
    def __init__(self, host_mem, device_mem):
        self.host = host_mem
        self.device = device_mem

    def __str__(self):
        return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)

    def __repr__(self):
        return self.__str__()


class TrtModel:

    def __init__(self, engine_path, max_batch_size=1, dtype=np.float32):

        self.engine_path = engine_path
        self.dtype = dtype
        self.logger = trt.Logger(trt.Logger.WARNING)
        self.runtime = trt.Runtime(self.logger)
        self.engine = self.load_engine(self.runtime, self.engine_path)
        self.max_batch_size = max_batch_size
        self.inputs, self.outputs, self.bindings, self.stream = self.allocate_buffers()
        self.context = self.engine.create_execution_context()

    @staticmethod
    def load_engine(trt_runtime, engine_path):
        trt.init_libnvinfer_plugins(None, "")
        with open(engine_path, 'rb') as f:
            engine_data = f.read()
        engine = trt_runtime.deserialize_cuda_engine(engine_data)
        return engine

    def allocate_buffers(self):

        inputs = []
        outputs = []
        bindings = []
        stream = cuda.Stream()

        for binding in self.engine:
            size = trt.volume(self.engine.get_binding_shape(binding)) * self.max_batch_size
            host_mem = cuda.pagelocked_empty(size, self.dtype)
            device_mem = cuda.mem_alloc(host_mem.nbytes)

            bindings.append(int(device_mem))

            if self.engine.binding_is_input(binding):
                inputs.append(HostDeviceMem(host_mem, device_mem))
            else:
                outputs.append(HostDeviceMem(host_mem, device_mem))

        return inputs, outputs, bindings, stream

    def __call__(self, x: np.ndarray, batch_size=2):

        x = x.astype(self.dtype)

        np.copyto(self.inputs[0].host, x.ravel())

        for inp in self.inputs:
            cuda.memcpy_htod_async(inp.device, inp.host, self.stream)

        self.context.execute_async(batch_size=batch_size, bindings=self.bindings, stream_handle=self.stream.handle)
        for out in self.outputs:
            cuda.memcpy_dtoh_async(out.host, out.device, self.stream)

        self.stream.synchronize()


        return [out.host.reshape(batch_size, -1) for out in self.outputs]


if __name__ == "__main__":

    # 路径配置
    trt_engine_path = r'./trt/best-fp16.engine'
    pic_path = r'./trt/4.jpg'

    if not os.path.exists('./trt/'):
        os.makedirs('./trt/')

    # 调色板配置
    palette = np.random.randint(0, 256, (256, 3), dtype=np.uint8)
    palette[0] = [255, 255, 255]
    palette[1] = [0, 255, 0]
    palette[2] = [0, 0, 255]
    palette[3] = [255, 0, 0]
    palette[4] = [255, 255, 0]
    palette[5] = [255, 0, 255]
    palette[6] = [171, 130, 255]
    palette[7] = [155, 211, 255]
    palette[8] = [0, 255, 255]

    #均值和方差
    mean = (120, 114, 104)
    std = (70, 69, 73)

    # 输入图像预处理
    img = cv2.imread(pic_path)
    imgbak = img.copy()
    img = img[:, :, ::-1]
    img = np.array(img).astype(np.float32)  # 注意输入type一定要np.float32
    img -= mean  # 减均值
    img /= std  # 除方差
    img = np.array([np.transpose(img, (2, 0, 1))])

    #模型推理
    model = TrtModel(trt_engine_path)
    result = model(img, 1)

    # 保存图像
    img_out=np.reshape(result[0][0],(512,512))
    img_out =img_out.astype('uint8')
    pred=palette[img_out]


    cv2.imwrite('./trt/mask4_16.jpg', pred)

    # imgadd=cv2.addWeighted(imgbak,0.6,pred,0.4,0)
    # cv2.imwrite('./trt/out.jpg', imgadd)

上一篇 下一篇

猜你喜欢

热点阅读