TensorRT Python验证代码---分割类
2023-08-21 本文已影响0人
教训小磊
#encoding=gbk
import tensorrt as trt
import numpy as np
import os
import cv2
import pycuda.driver as cuda
import pycuda.autoinit
class HostDeviceMem(object):
def __init__(self, host_mem, device_mem):
self.host = host_mem
self.device = device_mem
def __str__(self):
return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)
def __repr__(self):
return self.__str__()
class TrtModel:
def __init__(self, engine_path, max_batch_size=1, dtype=np.float32):
self.engine_path = engine_path
self.dtype = dtype
self.logger = trt.Logger(trt.Logger.WARNING)
self.runtime = trt.Runtime(self.logger)
self.engine = self.load_engine(self.runtime, self.engine_path)
self.max_batch_size = max_batch_size
self.inputs, self.outputs, self.bindings, self.stream = self.allocate_buffers()
self.context = self.engine.create_execution_context()
@staticmethod
def load_engine(trt_runtime, engine_path):
trt.init_libnvinfer_plugins(None, "")
with open(engine_path, 'rb') as f:
engine_data = f.read()
engine = trt_runtime.deserialize_cuda_engine(engine_data)
return engine
def allocate_buffers(self):
inputs = []
outputs = []
bindings = []
stream = cuda.Stream()
for binding in self.engine:
size = trt.volume(self.engine.get_binding_shape(binding)) * self.max_batch_size
host_mem = cuda.pagelocked_empty(size, self.dtype)
device_mem = cuda.mem_alloc(host_mem.nbytes)
bindings.append(int(device_mem))
if self.engine.binding_is_input(binding):
inputs.append(HostDeviceMem(host_mem, device_mem))
else:
outputs.append(HostDeviceMem(host_mem, device_mem))
return inputs, outputs, bindings, stream
def __call__(self, x: np.ndarray, batch_size=2):
x = x.astype(self.dtype)
np.copyto(self.inputs[0].host, x.ravel())
for inp in self.inputs:
cuda.memcpy_htod_async(inp.device, inp.host, self.stream)
self.context.execute_async(batch_size=batch_size, bindings=self.bindings, stream_handle=self.stream.handle)
for out in self.outputs:
cuda.memcpy_dtoh_async(out.host, out.device, self.stream)
self.stream.synchronize()
return [out.host.reshape(batch_size, -1) for out in self.outputs]
if __name__ == "__main__":
# 路径配置
trt_engine_path = r'./trt/best-fp16.engine'
pic_path = r'./trt/4.jpg'
if not os.path.exists('./trt/'):
os.makedirs('./trt/')
# 调色板配置
palette = np.random.randint(0, 256, (256, 3), dtype=np.uint8)
palette[0] = [255, 255, 255]
palette[1] = [0, 255, 0]
palette[2] = [0, 0, 255]
palette[3] = [255, 0, 0]
palette[4] = [255, 255, 0]
palette[5] = [255, 0, 255]
palette[6] = [171, 130, 255]
palette[7] = [155, 211, 255]
palette[8] = [0, 255, 255]
#均值和方差
mean = (120, 114, 104)
std = (70, 69, 73)
# 输入图像预处理
img = cv2.imread(pic_path)
imgbak = img.copy()
img = img[:, :, ::-1]
img = np.array(img).astype(np.float32) # 注意输入type一定要np.float32
img -= mean # 减均值
img /= std # 除方差
img = np.array([np.transpose(img, (2, 0, 1))])
#模型推理
model = TrtModel(trt_engine_path)
result = model(img, 1)
# 保存图像
img_out=np.reshape(result[0][0],(512,512))
img_out =img_out.astype('uint8')
pred=palette[img_out]
cv2.imwrite('./trt/mask4_16.jpg', pred)
# imgadd=cv2.addWeighted(imgbak,0.6,pred,0.4,0)
# cv2.imwrite('./trt/out.jpg', imgadd)