YOLOv8 自定义数据集构建实战:从视频到模型训练的5步完整流程

📅 2026/7/5 13:15:37
YOLOv8 自定义数据集构建实战:从视频到模型训练的5步完整流程
YOLOv8 自定义数据集构建实战从视频到模型训练的5步完整流程在零售货架商品检测项目中我们经常面临标注数据不足的困境。传统的数据采集方式成本高昂且效率低下而视频素材作为丰富的图像来源却常被忽视。本文将带您完成从视频抽帧到YOLOv8模型训练的全流程手把手教您构建专属的商品检测系统。1. 视频素材处理与帧提取零售场景下的监控视频通常包含大量重复帧和无效画面。我们需要智能抽取关键帧既保证数据多样性又避免冗余。以下是使用OpenCV进行自适应抽帧的Python实现import cv2 import os from pathlib import Path def adaptive_video_split(video_path, output_dir, min_interval30, change_threshold0.25): cap cv2.VideoCapture(video_path) fps cap.get(cv2.CAP_PROP_FPS) total_frames int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) prev_frame None frame_count 0 saved_count 0 # 创建按视频文件名分类的输出目录 video_name Path(video_path).stem class_dir os.path.join(output_dir, video_name) os.makedirs(class_dir, exist_okTrue) while cap.isOpened(): ret, frame cap.read() if not ret: break # 每隔min_interval帧强制保存一帧 if frame_count % min_interval 0: save_frame(frame, class_dir, frame_count) saved_count 1 prev_frame frame else: # 计算帧间差异 if prev_frame is not None: diff cv2.absdiff(frame, prev_frame) non_zero np.count_nonzero(diff) / diff.size if non_zero change_threshold: save_frame(frame, class_dir, frame_count) saved_count 1 prev_frame frame frame_count 1 cap.release() return saved_count def save_frame(frame, output_dir, frame_id): filename fframe_{frame_id:06d}.jpg cv2.imwrite(os.path.join(output_dir, filename), frame)关键参数说明min_interval最小抽帧间隔帧数change_threshold画面变化阈值0-1之间提示对于超市货架视频建议设置change_threshold0.3可有效过滤顾客走动带来的微小变化。2. 高效标注工具链配置Roboflow平台提供了从标注到格式转换的一站式解决方案。以下是本地化标注工作流的优化方案2.1 标注工具选型对比工具标注效率(图/小时)支持格式协作功能适合场景LabelImg30-50VOC/YOLO无个人小项目CVAT50-80COCO/VOC/YOLO多人协作中型团队Roboflow70-100全格式云协作企业级项目2.2 自动化标注辅助使用预训练模型进行半自动标注可提升3倍效率# 使用YOLOv8预训练模型辅助标注 yolo predict modelyolov8n.pt sourceframes/*.jpg save_txtTrue save_confTrue生成的预测结果可直接导入LabelImg进行修正大幅减少人工标注时间。3. 数据集格式转换与增强YOLOv8要求特定的数据集结构我们需要将原始标注转换为标准格式dataset/ ├── images/ │ ├── train/ │ ├── val/ │ └── test/ └── labels/ ├── train/ ├── val/ └── test/3.1 格式转换脚本import yaml from sklearn.model_selection import train_test_split def prepare_yolo_dataset(annotations_dir, images_dir, output_dir, test_size0.2): # 创建目录结构 for split in [train, val, test]: os.makedirs(os.path.join(output_dir, images, split), exist_okTrue) os.makedirs(os.path.join(output_dir, labels, split), exist_okTrue) # 获取所有样本 samples [f.split(.)[0] for f in os.listdir(annotations_dir) if f.endswith(.txt)] # 划分训练集、验证集、测试集 train_val, test train_test_split(samples, test_sizetest_size, random_state42) train, val train_test_split(train_val, test_size0.25, random_state42) # 0.6:0.2:0.2 # 复制文件和标签 for split, names in [(train, train), (val, val), (test, test)]: for name in names: # 复制图像 src_img os.path.join(images_dir, f{name}.jpg) dst_img os.path.join(output_dir, images, split, f{name}.jpg) shutil.copy(src_img, dst_img) # 复制标签 src_label os.path.join(annotations_dir, f{name}.txt) dst_label os.path.join(output_dir, labels, split, f{name}.txt) shutil.copy(src_label, dst_label) # 生成data.yaml classes get_classes(annotations_dir) # 从标签提取类别 data { path: os.path.abspath(output_dir), train: images/train, val: images/val, test: images/test, names: {i: cls for i, cls in enumerate(classes)} } with open(os.path.join(output_dir, data.yaml), w) as f: yaml.dump(data, f)3.2 数据增强策略在data.yaml中添加增强配置# 高级数据增强配置 augment: hsv_h: 0.015 # 色调增强 hsv_s: 0.7 # 饱和度增强 hsv_v: 0.4 # 明度增强 degrees: 10.0 # 旋转角度 translate: 0.1 # 平移比例 scale: 0.5 # 缩放比例 shear: 0.0 # 剪切角度 perspective: 0.0001 # 透视变换 flipud: 0.0 # 上下翻转概率 fliplr: 0.5 # 左右翻转概率 mosaic: 1.0 # mosaic增强概率 mixup: 0.1 # mixup增强概率4. YOLOv8模型训练与调优4.1 基础训练配置创建自定义模型配置文件retail_yolov8.yaml# 参数来自YOLOv8n.yaml调整anchor boxes适应商品检测 nc: 10 # 商品类别数 scales: [0.33, 0.25, 0.12] # 模型缩放系数 # 调整anchors适应方形商品 anchors: - [4,5, 8,10, 13,16] # P3/8 - [23,29, 43,55, 73,105] # P4/16 - [146,217, 231,300, 335,433] # P5/32 # 骨干网络配置 backbone: # [from, repeats, module, args] - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 - [-1, 3, C2f, [128, True]] - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 - [-1, 6, C2f, [256, True]] - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 - [-1, 6, C2f, [512, True]] - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32 - [-1, 3, C2f, [1024, True]] - [-1, 1, SPPF, [1024, 5]] # 9 # 检测头配置 head: - [-1, 1, nn.Upsample, [None, 2, nearest]] - [[-1, 6], 1, Concat, [1]] # cat backbone P4 - [-1, 3, C2f, [512]] # 12 - [-1, 1, nn.Upsample, [None, 2, nearest]] - [[-1, 4], 1, Concat, [1]] # cat backbone P3 - [-1, 3, C2f, [256]] # 15 (P3/8-small) - [-1, 1, Conv, [256, 3, 2]] - [[-1, 12], 1, Concat, [1]] # cat head P4 - [-1, 3, C2f, [512]] # 18 (P4/16-medium) - [-1, 1, Conv, [512, 3, 2]] - [[-1, 9], 1, Concat, [1]] # cat head P5 - [-1, 3, C2f, [1024]] # 21 (P5/32-large) - [[15, 18, 21], 1, Detect, [nc]] # Detect(P3, P4, P5)4.2 启动训练任务from ultralytics import YOLO # 加载预训练模型 model YOLO(yolov8n.pt) # 训练参数配置 results model.train( datadataset/data.yaml, cfgretail_yolov8.yaml, epochs300, batch16, imgsz640, device0, # 使用GPU 0 workers8, optimizerAdamW, lr00.001, lrf0.01, warmup_epochs3, weight_decay0.0005, box7.5, # box loss增益 cls0.5, # cls loss增益 dfl1.5, # dfl loss增益 fl_gamma1.5, # focal loss gamma label_smoothing0.1, patience50, save_period10, valTrue, ampTrue # 自动混合精度 )4.3 高级训练技巧学习率调度策略优化# 自定义学习率调度器 def custom_lr_scheduler(optimizer, epoch, lr0): if epoch 10: lr lr0 * (epoch / 10) ** 2 # 热身阶段 elif epoch 200: lr lr0 else: lr lr0 * 0.1 ** ((epoch - 200) // 50) for param_group in optimizer.param_groups: param_group[lr] lr return lr模型EMA指数移动平均配置# 在训练参数中添加 ema: enabled: True decay: 0.9999 updates: 05. 模型验证与部署5.1 性能评估指标from ultralytics.yolo.utils.metrics import ConfusionMatrix # 加载最佳模型 model YOLO(runs/detect/train/weights/best.pt) # 在测试集上评估 metrics model.val( datadataset/data.yaml, splittest, conf0.25, # 置信度阈值 iou0.6, # IoU阈值 device0 ) # 生成混淆矩阵 confusion_matrix ConfusionMatrix(ncmodel.model.nc) confusion_matrix.process_batch(predictions, targets) confusion_matrix.plot(save_dirresults/)5.2 模型导出与优化# 导出为ONNX格式 model.export(formatonnx, imgsz640, opset12, simplifyTrue, dynamicFalse, batch1) # 使用TensorRT优化 trt_model YOLO(yolov8n.onnx).export(formatengine, device0, workspace4)5.3 部署示例代码import cv2 from ultralytics import YOLO class RetailDetector: def __init__(self, model_path): self.model YOLO(model_path) self.class_names self.model.names def process_frame(self, frame): results self.model(frame, streamTrue) for result in results: boxes result.boxes.xyxy.cpu().numpy() confs result.boxes.conf.cpu().numpy() cls_ids result.boxes.cls.cpu().numpy().astype(int) for box, conf, cls_id in zip(boxes, confs, cls_ids): x1, y1, x2, y2 map(int, box) label f{self.class_names[cls_id]}: {conf:.2f} cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2) cv2.putText(frame, label, (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) return frame # 使用示例 detector RetailDetector(best.engine) cap cv2.VideoCapture(retail_store.mp4) while cap.isOpened(): ret, frame cap.read() if not ret: break processed_frame detector.process_frame(frame) cv2.imshow(Retail Detection, processed_frame) if cv2.waitKey(1) 0xFF ord(q): break cap.release() cv2.destroyAllWindows()