今天这篇博文是学习大佬作品以后,执行我的需求后的总结,做了一些代码调整,就此记录一下,非常感谢大佬提供如此好的输出。
已知yolov8 训练好的模型一般是pt格式,比如best.pt,现在我期望这个模型可以转成可以部署的格式,不那么明晃晃地调用yolo,于是乎就查到可以转成onnx格式。
1、onnx是什么格式
好像是一堆废话,就是可以用 ONNX Runtime 加载,还有高版本的OpenCV的dnn 也可以加载。
2、基于python 加载onnx模型
(1)将best.pt转成 best.onnx
from ultralytics import YOLO# 加载训练好的 YOLOv8 模型
model = YOLO('E:/skin_yolo/runs/detect/spot_detection60/weights/best.pt')# 导出为 ONNX 格式
#model.export(format='onnx')model.export(format='onnx', imgsz=640)#我的输入是图像尺寸固定的640*640,所以我写死了
(2)python加载模型并做目标检测
首先需要安装onnxruntime、numpy、cv2等库。如果使用 GPU 进行推理,还需安装onnxruntime-gpu。
test_detector.py
#基于yolo模型检测皮肤图像上的目标
#2025-01-06import cv2#引用文件中的函数
from targetDetect import TargetDetection
from forDraw import draw_detections# yolov8 onnx 模型推理
class YOLOV8NDetector:def __init__(self,model_path):super(YOLOV8NDetector, self).__init__()self.model_path = model_pathself.detector = TargetDetection(self.model_path, conf_thres=0.5, iou_thres=0.3)def detect_image(self, input_image, output_image):cv_img = cv2.imread(input_image)boxes, scores, class_ids = self.detector.detect_objects(cv_img)cv_img = draw_detections(cv_img, boxes, scores, class_ids)cv2.namedWindow("output", cv2.WINDOW_NORMAL)cv2.imwrite(output_image, cv_img)cv2.imshow('output', cv_img)cv2.waitKey(0)def detect_video(self, input_video, output_video):cap = cv2.VideoCapture(input_video)fps = int(cap.get(5))videoWriter = Nonewhile True:_, cv_img = cap.read()if cv_img is None:breakboxes, scores, class_ids = self.detector.detect_objects(cv_img)cv_img = draw_detections(cv_img, boxes, scores, class_ids)# 如果视频写入器未初始化,则使用输出视频路径和参数进行初始化if videoWriter is None:fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')# 在这里给值了,它就不是None, 下次判断它就不进这里了videoWriter = cv2.VideoWriter(output_video, fourcc, fps, (cv_img.shape[1], cv_img.shape[0]))videoWriter.write(cv_img)cv2.imshow("aod", cv_img)cv2.waitKey(5)# 等待按键并检查窗口是否关闭if cv2.getWindowProperty("aod", cv2.WND_PROP_AUTOSIZE) < 1:# 点x退出breakcap.release()videoWriter.release()cv2.destroyAllWindows()if __name__ == '__main__':modelpath ="E:/skin_yolo/runs/detect/spot_detection60/weights/best.onnx"#模型路径det = YOLOV8NDetector(modelpath)#检测图片时调用input_image = "E:/Skin_Color/skin_pic/test/12/test.jpg"output_image = 'E:/Skin_Color/skin_pic/test/12/test_out.jpg'det.detect_image(input_image, output_image)#检测视频是调用# input_video = r"E:\yolodataset\video\A13.mp4"# output_video = "../testdata/fortest.mp4"# det.detect_video(input_video, output_video)
可以看出上面的代码依赖两个文件:targetDetect.py 和 forDraw .py
targetDetect.py 中定义了检测目标处理,forDraw .py 中定义了一些目标画框。
targetDetect.py
import time
import cv2
import numpy as np
import onnxruntime#引用文件中的函数
from forDraw import xywh2xyxy, draw_detections,nms # 单类目标用nms , 多类目标用multiclass_nmsclass TargetDetection:def __init__(self, path, conf_thres=0.7, iou_thres=0.5):self.conf_threshold = conf_thresself.iou_threshold = iou_thres# Initialize modelself.initialize_model(path)def __call__(self, image):return self.detect_objects(image)def initialize_model(self, path):self.session = onnxruntime.InferenceSession(path, providers=onnxruntime.get_available_providers())# Get model infoself.get_input_details()self.get_output_details()def detect_objects(self, image):input_tensor = self.prepare_input(image)# Perform inference on the imageoutputs = self.inference(input_tensor)self.boxes, self.scores, self.class_ids = self.process_output(outputs)return self.boxes, self.scores, self.class_idsdef prepare_input(self, image):self.img_height, self.img_width = image.shape[:2]input_img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)# Resize input imageinput_img = cv2.resize(input_img, (self.input_width, self.input_height))# Scale input pixel values to 0 to 1input_img = input_img / 255.0input_img = input_img.transpose(2, 0, 1)input_tensor = input_img[np.newaxis, :, :, :].astype(np.float32)return input_tensordef inference(self, input_tensor):start = time.perf_counter()outputs = self.session.run(self.output_names, {self.input_names[0]: input_tensor})# print(f"Inference time: {(time.perf_counter() - start)*1000:.2f} ms")return outputsdef process_output(self, output):predictions = np.squeeze(output[0]).T# Filter out object confidence scores below thresholdscores = np.max(predictions[:, 4:], axis=1)predictions = predictions[scores > self.conf_threshold, :]scores = scores[scores > self.conf_threshold]if len(scores) == 0:return [], [], []# Get the class with the highest confidenceclass_ids = np.argmax(predictions[:, 4:], axis=1)# Get bounding boxes for each objectboxes = self.extract_boxes(predictions)# Apply non-maxima suppression to suppress weak, overlapping bounding boxesindices = nms(boxes, scores, self.iou_threshold)#我的目标只有一个类#indices = multiclass_nms(boxes, scores, class_ids, self.iou_threshold)#多类return boxes[indices], scores[indices], class_ids[indices]def extract_boxes(self, predictions):# Extract boxes from predictionsboxes = predictions[:, :4]# Scale boxes to original image dimensionsboxes = self.rescale_boxes(boxes)# Convert boxes to xyxy formatboxes = xywh2xyxy(boxes)return boxesdef rescale_boxes(self, boxes):# Rescale boxes to original image dimensionsinput_shape = np.array([self.input_width, self.input_height, self.input_width, self.input_height])boxes = np.divide(boxes, input_shape, dtype=np.float32)boxes *= np.array([self.img_width, self.img_height, self.img_width, self.img_height])return boxesdef draw_detections(self, image, draw_scores=True, mask_alpha=0.4):return draw_detections(image, self.boxes, self.scores,self.class_ids, mask_alpha)def get_input_details(self):model_inputs = self.session.get_inputs()self.input_names = [model_inputs[i].name for i in range(len(model_inputs))]self.input_shape = model_inputs[0].shapeself.input_height = self.input_shape[2]self.input_width = self.input_shape[3]print(self.input_width, self.input_height)def get_output_details(self):model_outputs = self.session.get_outputs()self.output_names = [model_outputs[i].name for i in range(len(model_outputs))]
forDraw .py
import numpy as np
import cv2class_names = ['spot'] #我的类标记# Create a list of colors for each class where each color is a tuple of class number integer valuesrng = np.random.default_rng(1)#此处是1,我的目标只有一个分类
colors = rng.uniform(0, 255, size=(len(class_names), 1))#此处是1,我的目标只有一个分类def nms(boxes, scores, iou_threshold):# 根据 scores 对检测框从高到低进行排序,得到排序后的索引sorted_indices = np.argsort(scores)[::-1] # [::-1] 反转排序顺序keep_boxes = []while sorted_indices.size > 0:# 保留最高分数的边界框box_id = sorted_indices[0]keep_boxes.append(box_id)# 计算当前最高分数的边界框与剩余边界框的 IoUious = compute_iou(boxes[box_id, :], boxes[sorted_indices[1:], :])# 找出 IoU 小于阈值的边界框索引,保留这些框,过滤重叠框keep_indices = np.where(ious < iou_threshold)[0]# 注意:由于 keep_indices 是相对于 sorted_indices[1:] 的索引,# 需要将其整体偏移 +1 来匹配到原始 sorted_indicessorted_indices = sorted_indices[keep_indices + 1]return keep_boxesdef multiclass_nms(boxes, scores, class_ids, iou_threshold):# 获取所有唯一的类别索引unique_class_ids = np.unique(class_ids)keep_boxes = [] # 存储最终保留的边界框索引for class_id in unique_class_ids:# 筛选出属于当前类别的边界框索引class_indices = np.where(class_ids == class_id)[0] # np.where返回元组# 提取属于当前类别的边界框和分数class_boxes = boxes[class_indices, :] # 当前类别的边界框class_scores = scores[class_indices] # 当前类别的分数# 执行 NMS 并获取保留下来的索引class_keep_boxes = nms(class_boxes, class_scores, iou_threshold)# 将保留的索引(对应原始的索引)添加到结果中keep_boxes.extend(class_indices[class_keep_boxes])return keep_boxesdef compute_iou(box, boxes):# 计算交集区域的坐标,xmin 和 ymin: 交集左上角的坐标,xmax 和 ymax: 交集右下角的坐标xmin = np.maximum(box[0], boxes[:, 0])ymin = np.maximum(box[1], boxes[:, 1])xmax = np.minimum(box[2], boxes[:, 2])ymax = np.minimum(box[3], boxes[:, 3])# 计算交集区域面积,如果两个框没有重叠,交集宽度和高度会为负,使用 np.maximum 保证面积非负intersection_area = np.maximum(0, xmax - xmin) * np.maximum(0, ymax - ymin)# 计算每个边界框的面积box_area = (box[2] - box[0]) * (box[3] - box[1])boxes_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])# 计算并集区域面积union_area = box_area + boxes_area - intersection_area# 计算 IoU(交并比)iou = intersection_area / union_area # 交集区域面积 / 并集区域面积return ioudef xywh2xyxy(x):# Convert bounding box (x, y, w, h) to bounding box (x1, y1, x2, y2)# 将边界框从 (x_center, y_center, w, h) 格式转换为 (x1, y1, x2, y2)y = np.copy(x)# 计算左上角坐标 x1 和 y1y[..., 0] = x[..., 0] - x[..., 2] / 2y[..., 1] = x[..., 1] - x[..., 3] / 2# 计算右下角坐标 x2 和 y2y[..., 2] = x[..., 0] + x[..., 2] / 2y[..., 3] = x[..., 1] + x[..., 3] / 2return ydef draw_detections(image, boxes, scores, class_ids, mask_alpha=0.3):#画检测目标det_img = image.copy()img_height, img_width = image.shape[:2]font_size = min([img_height, img_width]) * 0.0006text_thickness = int(min([img_height, img_width]) * 0.001)det_img = draw_masks(det_img, boxes, class_ids, mask_alpha)# Draw bounding boxes and labels of detectionsfor class_id, box, score in zip(class_ids, boxes, scores):color = colors[class_id]draw_box(det_img, box, color)label = class_names[class_id]caption = f'{label} {int(score * 100)}%'draw_text(det_img, caption, box, color, font_size, text_thickness)return det_imgdef draw_box(image: np.ndarray, box: np.ndarray, color: tuple[int, int, int] = (0, 0, 255),thickness: int = 2) -> np.ndarray:x1, y1, x2, y2 = box.astype(int)return cv2.rectangle(image, (x1, y1), (x2, y2), color, thickness)def draw_text(image: np.ndarray, text: str, box: np.ndarray, color: tuple[int, int, int] = (0, 0, 255),font_size: float = 0.001, text_thickness: int = 2) -> np.ndarray:#显示注释x1, y1, x2, y2 = box.astype(int)(tw, th), _ = cv2.getTextSize(text=text, fontFace=cv2.FONT_HERSHEY_SIMPLEX,fontScale=font_size, thickness=text_thickness)th = int(th * 1.2)#线宽cv2.rectangle(image, (x1, y1),(x1 + tw, y1 - th), color, -1)#画注释框return cv2.putText(image, text, (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, font_size, (255, 255, 255), text_thickness,cv2.LINE_AA)def draw_masks(image: np.ndarray, boxes: np.ndarray, classes: np.ndarray, mask_alpha: float = 0.3) -> np.ndarray:mask_img = image.copy()# 画检测到的目标框for box, class_id in zip(boxes, classes):color = colors[class_id]x1, y1, x2, y2 = box.astype(int)cv2.rectangle(mask_img, (x1, y1), (x2, y2), color, -1)# return cv2.addWeighted(mask_img, mask_alpha, image, 1 - mask_alpha, 0)#返回半透明框return image #返回全透明框
看看处理结果
(3)一些感慨
看模型流程
yolo模型导出以后,要加载处理其实还是需要理解透彻模型的过程,首先是输入和输出
一个不错的网站,可以在线查看模型拓扑结构 https://netron.app/
巨长的流程拓扑结构,小白暂时就只盯着输入和输出看了。
预处理和中间过程都很重要
1)预处理可以是一个很大的绊脚石
2)读取图像并将图像的颜色空间从 BGR 格式转换为 RGB 格式 ONNX 模型则期望输入是 RGB 格式;
3)图像大小resize,我训练就将图像用640了,所以需要 resize 到模型要求的输入尺寸;
4)归一化处理,将像素值归一化到 [0, 1] 区间。
5)调整图像通道顺序,一般从 HWC(Height, Width, Channel)转换为 CHW ( Channel,Height, Width,)格式,并增加一个批次维度,使其变为 NCHW 格式,N 为批次大小,通常设为 1。
最后特别感谢大佬的参考:https://blog.csdn.net/MariLN/article/details/144330414