cv数据转换

记录一下yolo中数据格式以及按比例分割

输入数据

<?xml version="1.0" ?><annotation verified="yes">
	<folder>cam00</folder>
	<filename>000000.jpg</filename>
	<path>/home/zhaohuaqing/Documents/4_2/images/000000.jpg</path>
	<source>
		<database>Unknown</database>
	</source>
	<size>
		<width>1280</width>
		<height>720</height>
		<depth>3</depth>
	</size>
	<segmented>0</segmented>
	<object>
		<name>red</name>
		<pose>Unspecified</pose>
		<truncated>0</truncated>
		<difficult>0</difficult>
		<bndbox>
			<xmin>190</xmin>
			<ymin>126</ymin>
			<xmax>217</xmax>
			<ymax>183</ymax>
		</bndbox>
	</object>
	<object>
		<name>red</name>
		<pose>Unspecified</pose>
		<truncated>0</truncated>
		<difficult>0</difficult>
		<bndbox>
			<xmin>241</xmin>
			<ymin>120</ymin>
			<xmax>265</xmax>
			<ymax>172</ymax>
		</bndbox>
	</object>
	<object>
		<name>red</name>
		<pose>Unspecified</pose>
		<truncated>0</truncated>
		<difficult>0</difficult>
		<bndbox>
			<xmin>894</xmin>
			<ymin>146</ymin>
			<xmax>922</xmax>
			<ymax>202</ymax>
		</bndbox>
	</object>
	<object>
		<name>red</name>
		<pose>Unspecified</pose>
		<truncated>0</truncated>
		<difficult>0</difficult>
		<bndbox>
			<xmin>956</xmin>
			<ymin>145</ymin>
			<xmax>983</xmax>
			<ymax>200</ymax>
		</bndbox>
	</object>
</annotation>

处理代码

import os
import xml.etree.ElementTree as ET

# 输入文件夹和输出文件夹路径
input_folder = 'Annotations'
output_folder = '处理完后的labels'

# 定义对应的名称和数值
name_map = {'red': 0, 'yellow': 1, 'green': 2, 'off': 3}

# 遍历输入文件夹中的所有 XML 文件
for file_name in os.listdir(input_folder):
    if file_name.endswith('.xml'):
        file_path = os.path.join(input_folder, file_name)

        # 解析 XML 文件
        tree = ET.parse(file_path)
        root = tree.getroot()

        # 获取图像文件名
        filename = root.find('filename').text

        # 获取图像大小
        width = int(root.find('size/width').text)
        height = int(root.find('size/height').text)

        # 获取所有目标对象
        objects = root.findall('object')

        # 初始化 YOLO 格式的 txt 文件内容
        yolo_txt = ''

        # 处理每个目标对象
        for obj in objects:
            # 获取对象的类别和边界框信息
            name = obj.find('name').text
            xmin = int(obj.find('bndbox/xmin').text)
            ymin = int(obj.find('bndbox/ymin').text)
            xmax = int(obj.find('bndbox/xmax').text)
            ymax = int(obj.find('bndbox/ymax').text)

            # 将类别名称转换为数值
            class_num = name_map.get(name, -1)

            # 如果类别名称不在映射表中，则跳过
            if class_num == -1:
                continue

            # 将边界框信息转换为 YOLO 格式
            x_center = round((xmin + xmax) / (2 * width), 6)
            y_center = round((ymin + ymax) / (2 * height), 6)
            width_ratio = round((xmax - xmin) / width, 6)
            height_ratio = round((ymax - ymin) / height, 6)

            # 将 YOLO 格式的信息写入 txt 文件
            yolo_txt += f'{class_num} {x_center} {y_center} {width_ratio} {height_ratio}\n'

        # 将 YOLO 格式的 txt 文件保存到输出文件夹中
        output_file_path = os.path.join(output_folder, os.path.splitext(file_name)[0] + '.txt')
        with open(output_file_path, 'w') as f:
            f.write(yolo_txt)

从而得到相应的处理后的结果：

0 0.158984 0.214583 0.021094 0.079167
0 0.197656 0.202778 0.01875 0.072222
0 0.709375 0.241667 0.021875 0.077778
0 0.757422 0.239583 0.021094 0.076389

按照8：1：1分割成相应的train、test、val数据集代码

import os
import shutil

# 定义源文件夹路径
source_txt_folder = 'labels/test'
source_image_folder = 'JPEGImages'
target_folder = 'imgs/test'

# 检查目标文件夹是否存在，不存在则创建
if not os.path.exists(target_folder):
    os.makedirs(target_folder)

# 获取txt文件列表
txt_files = [f for f in os.listdir(source_txt_folder) if f.endswith('.txt')]

# 遍历txt文件
for txt_file in txt_files:
    # 获取txt文件名（不包括扩展名）
    txt_name = txt_file[:-4]  # 假设txt文件名没有特殊字符，直接切片
    # 检查图片文件是否存在
    image_file = txt_name + '.jpg'  # 假设图片文件是.jpg格式，根据实际情况调整
    image_path = os.path.join(source_image_folder, image_file)
    if os.path.exists(image_path):
        # 将图片复制到目标文件夹
        shutil.copy2(image_path, os.path.join(target_folder, txt_file[:-4]+'.jpg'))
    else:
        print(f"图片文件 '{image_file}' 未找到，跳过")

对应的文件结构如下所示

8fcad2185a4ca5da51f66ec454fe9a2