1. 环境准备
代码下载:
https://github.com/ultralytics/ultralytics.git
切换到 8.3.0 以上的分支,yolov11 只在上述分支支持, 下载后在 ultralytics 根目录下安装 yolo 依赖环境:
pip install -e . -i https://pypi.tuna.tsinghua.edu.cn/simple
2. 数据集准备
标注工具:LabelImg
若数据已经是标注好的,只需要将 xml 转换成对应的 txt 文件即可
假设标注好的数据如下:
train_img 存放图片,
train_xml 存放标注后的 xml 文件
yolo 训练需要将 xml 转换成对应的 txt,转换脚本:
import os
import xml.etree.ElementTree as ET
from pathlib import Path
import argparse
import cv2
import loggingdef parse_args():parser = argparse.ArgumentParser(description='Convert VOC XML to YOLO TXT')parser.add_argument('--xml-dir', type=str, required=True,help='Directory containing XML annotations')parser.add_argument('--img-dir', type=str, required=True,help='Directory containing corresponding images')parser.add_argument('--output-dir', type=str, required=True,help='Output directory for TXT files')parser.add_argument('--classes', type=str, default='classes.txt',help='Path to class list file (will auto-create if not exists)')parser.add_argument('--img-ext', type=str, default='jpg',help='Image file extension (jpg, png, etc)')return parser.parse_args()def get_image_size(img_path):img = cv2.imread(img_path)return img.shape[1], img.shape[0]def convert_box(size, box):# VOC box format: (xmin, ymin, xmax, ymax)# YOLO format: (x_center, y_center, width, height) normalized [0-1]dw = 1. / size[0]dh = 1. / size[1]x = (box[0] + box[2]) / 2.0y = (box[1] + box[3]) / 2.0w = box[2] - box[0]h = box[3] - box[1]x = x * dww = w * dwy = y * dhh = h * dhreturn (x, y, w, h)def main():args = parse_args()# Setup logginglogging.basicConfig(level=logging.INFO,format='%(asctime)s - %(levelname)s - %(message)s',filename='xml2yolo.log')# Create output directoryPath(args.output_dir).mkdir(parents=True, exist_ok=True)# Load or create class listclass_dict = {}if os.path.exists(args.classes):with open(args.classes, 'r') as f:for idx, line in enumerate(f):class_name = line.strip()class_dict[class_name] = idxlogging.info(f"Loaded {len(class_dict)} classes from {args.classes}")else:logging.warning(f"Class list not found, will create from XML files")# Process XML filesprocessed = 0errors = 0for xml_file in Path(args.xml_dir).glob('*.xml'):try:# Parse XMLtree = ET.parse(xml_file)root = tree.getroot()# Get image pathimg_name = root.find('filename').textimg_path = os.path.join(args.img_dir, f"{Path(xml_file).stem}.{args.img_ext}")# Get image size (priority: XML > image file)size = root.find('size')if size is not None:width = int(size.find('width').text)height = int(size.find('height').text)else:width, height = get_image_size(img_path)if width is None or height is None:raise ValueError("Cannot determine image size")# Prepare TXT contenttxt_lines = []for obj in root.iter('object'):# Get class namecls_name = obj.find('name').text.strip()# Update class dictionaryif cls_name not in class_dict:if os.path.exists(args.classes):raise ValueError(f"Unknown class {cls_name}")class_dict[cls_name] = len(class_dict)# Get bounding boxbndbox = obj.find('bndbox')xmin = float(bndbox.find('xmin').text)ymin = float(bndbox.find('ymin').text)xmax = float(bndbox.find('xmax').text)ymax = float(bndbox.find('ymax').text)# Validate coordinates# if not (0 <= xmin < xmax <= width):# raise ValueError(f"Invalid x coordinates: {xmin}-{xmax} (image width: {width})")# if not (0 <= ymin < ymax <= height):# raise ValueError(f"Invalid y coordinates: {ymin}-{ymax} (image height: {height})")xmin = max(0,xmin)xmax = min(xmax,width)ymin = max(0,ymin)ymax = min(ymax,height)# Convert to YOLO formatyolo_box = convert_box((width, height), (xmin, ymin, xmax, ymax))txt_lines.append(f"{class_dict[cls_name]} {' '.join(map(str, yolo_box))}")# Write TXT filetxt_path = os.path.join(args.output_dir, f"{Path(xml_file).stem}.txt")with open(txt_path, 'w') as f:f.write('\n'.join(txt_lines))processed += 1if processed % 100 == 0:logging.info(f"Processed {processed} files...")except Exception as e:errors += 1logging.error(f"Error processing {xml_file}: {str(e)}")continue# Save class list if createdif not os.path.exists(args.classes):with open(args.classes, 'w') as f:for cls_name, idx in sorted(class_dict.items(), key=lambda x: x[1]):f.write(f"{cls_name}\n")logging.info(f"Created class list with {len(class_dict)} classes at {args.classes}")logging.info(f"Conversion completed. Success: {processed}, Errors: {errors}")if __name__ == '__main__':main()
执行:
python xml_2_yolo.py --xml-dir ./train_xml/ --img-dir=./train_img/ --output-dir=./labels
执行后在当前文件夹 labels 目录下生成对应的 txt 标签,将 txt copy 到 train_img 文件夹下
3. 模型训练
新建train.py,内容如下:
import warnings
warnings.filterwarnings('ignore')
from ultralytics import YOLOif __name__ == '__main__':model = YOLO('ultralytics/cfg/models/11/yolo11n.yaml')model.load('yolo11n.pt') # loading pretrain weightsmodel.train(data='ultralytics/cfg/datasets/elevator.yaml',cache=False,imgsz=640,epochs=200,batch=16,close_mosaic=10,device='0',optimizer='SGD', # using SGDproject='runs/train-elevator',name='exp',)
其中:ultralytics/cfg/models/11/yolo11n.yaml为训练参数配置文件
nc: 4 (修改为实际训练的类别数)
# Ultralytics YOLO 🚀, AGPL-3.0 license
# YOLO11 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect# Parameters
nc: 4 # number of classes
scales: # model compound scaling constants, i.e. 'model=yolo11n.yaml' will call yolo11.yaml with scale 'n'# [depth, width, max_channels]n: [0.50, 0.25, 1024] # summary: 319 layers, 2624080 parameters, 2624064 gradients, 6.6 GFLOPss: [0.50, 0.50, 1024] # summary: 319 layers, 9458752 parameters, 9458736 gradients, 21.7 GFLOPsm: [0.50, 1.00, 512] # summary: 409 layers, 20114688 parameters, 20114672 gradients, 68.5 GFLOPsl: [1.00, 1.00, 512] # summary: 631 layers, 25372160 parameters, 25372144 gradients, 87.6 GFLOPsx: [1.00, 1.50, 512] # summary: 631 layers, 56966176 parameters, 56966160 gradients, 196.0 GFLOPs# YOLO11n backbone
backbone:# [from, repeats, module, args]- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4- [-1, 2, C3k2, [256, False, 0.25]]- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8- [-1, 2, C3k2, [512, False, 0.25]]- [-1, 1, Conv, [512, 3, 2]] # 5-P4/16- [-1, 2, C3k2, [512, True]]- [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32- [-1, 2, C3k2, [1024, True]]- [-1, 1, SPPF, [1024, 5]] # 9- [-1, 2, C2PSA, [1024]] # 10# YOLO11n head
head:- [-1, 1, nn.Upsample, [None, 2, "nearest"]]- [[-1, 6], 1, Concat, [1]] # cat backbone P4- [-1, 2, C3k2, [512, False]] # 13- [-1, 1, nn.Upsample, [None, 2, "nearest"]]- [[-1, 4], 1, Concat, [1]] # cat backbone P3- [-1, 2, C3k2, [256, False]] # 16 (P3/8-small)- [-1, 1, Conv, [256, 3, 2]]- [[-1, 13], 1, Concat, [1]] # cat head P4- [-1, 2, C3k2, [512, False]] # 19 (P4/16-medium)- [-1, 1, Conv, [512, 3, 2]]- [[-1, 10], 1, Concat, [1]] # cat head P5- [-1, 2, C3k2, [1024, True]] # 22 (P5/32-large)- [[16, 19, 22], 1, Detect, [nc]] # Detect(P3, P4, P5)
如上,准备工作已OK
4. 训练
python train.py
5. 训练结果查看
6. 测试
yolo detect predict model=./runs/train-elevator/exp6/weights/best.pt source=../datasets/elevator/val/people\(2231\).jpg
7. 导出onnx模型
模型导出使用专门针对rknn优化的 ultralytics_yolo11 。 该工程在基于不影响输出结果, 不需要重新训练模型的条件下, 有以下改动:
- 修改输出结构, 移除后处理结构(后处理结果对于量化不友好);
- dfl 结构在 NPU 处理上性能不佳,移至模型外部的后处理阶段,此操作大部分情况下可提升推理性能;
- 模型输出分支新增置信度的总和,用于后处理阶段加速阈值筛选。
git clone https://github.com/airockchip/ultralytics_yolo11.git
修改 ./ultralytics/cfg/default.yaml中model文件路径,默认为yolo11n.pt,路径修改为之前训练的模型路径
这里修改为: model: /home/mahxn0/workspace/yolo11/runs/train-elevator/exp6/weights/best.pt
export PYTHONPATH=./
python ./ultralytics/engine/exporter.py
导出结果如下:
Results saved to /home/mahxn0/workspace/yolo11/runs/train-elevator/exp6/weights
Predict: yolo predict task=detect model=/home/mahxn0/workspace/yolo11/runs/train-elevator/exp6/weights/best.onnx imgsz=640
Validate: yolo val task=detect model=/home/mahxn0/workspace/yolo11/runs/train-elevator/exp6/weights/best.onnx imgsz=640 data=ultralytics/cfg/datasets/elevator.yaml
Visualize: https://netron.app
(yolov11) mahxn0@toy:~/workspace/yolo11_rknn$ export PYTHONPATH=./
(yolov11) mahxn0@toy:~/workspace/yolo11_rknn$ python ./ultralytics/engine/exporter.py
8. ONNX转RKNN
使用 https://gitee.com/LubanCat/lubancat_ai_manual_code/tree/master (需要装好rknn_toolkit2的环境)
python convert.py ../model/elevator.onnx rk3568 i8 ./elevator_3568.rknn
9. 交叉编译
本地linux配好交叉编译环境,在yolo11/cpp目录下交叉编译执行:
bash build-linux.sh -t rk3568
10. 板端测试
./yolo11_image_demo model/elevator.rknn ../../../../../datasets/elevator/GasTank297.jpg
至此,基于RK3568/3588的电梯电动车/煤气罐/跌倒检测模型移植完成,下篇将接入rtsp视频流实时测试