将json格式的标注数据转化为YOLO格式的txt格式

YOLO需要的数据格式

YOLO需要的数据格式为：类别id、中心点x坐标、中心点y坐标、w、h（相对于图片宽高）。现需要每张图片对应一个txt文件
如下所示：
图片名.txt

4 0.389688 0.298564 0.779375 0.597128

现有数据格式

现在有的只有下面这种结构的json数据格式文件(数据来源：https://aistudio.baidu.com/datasetdetail/183255/0)：

{
    "images":[
        {
            "file_name":"00000.jpg",
            "height":1184,
            "width":1600,
            "id":0  
        },
        {
            "file_name":"00001.jpg",
            "height":1184,
            "width":1600,
            "id":1  
        },
    ],
    "type":"instances",
    "annotations":[
        {
            "area": 7008,
            "iscrowd": 0,
            "image_id": 0,
            "bbox": [
                1143,
                593,
                146,
                48
            ],
            "category_id": 2,
            "id": 1,
            "ignore": 0,
            "segmentation": []
        },
        {
            "area": 83190,
            "iscrowd": 0,
            "image_id": 1,
            "bbox": [
                1195,
                944,
                354,
                235
            ],
            "category_id": 6,
            "id": 2,
            "ignore": 0,
            "segmentation": []
        }
    ],
    "categories":[
        {
            "supercategory": "none",
            "id": 1,
            "name": "Crack"
        },
        {
            "supercategory": "none",
            "id": 2,
            "name": "Manhole"
        }
    ]
}

对应的操作代码为：

import os  
import json  
  
# 读取JSON文件  
with open('train.json', 'r') as f:  
    data = json.load(f)  
  
# 创建新的文件夹  
folder_path = 'txt_id5'  
if not os.path.exists(folder_path):  
    os.makedirs(folder_path)  
for annotation in data['annotations']:
    if annotation['category_id'] == 5:
        image_id = annotation['image_id']
        for images in data['images']:
            if images['id'] == image_id:
                x = (annotation['bbox'][0] + annotation['bbox'][2]) / 2 / images['width']
                y = (annotation['bbox'][1] + annotation['bbox'][3]) / 2 / images['height']
                w = (annotation['bbox'][2] + annotation['bbox'][0]) / images['width']
                h = (annotation['bbox'][3] + annotation['bbox'][1]) / images['height']
                file_name = images['file_name']
                file_path = os.path.join(folder_path,file_name[:-4]+'.txt')
                with open(file_path,'w') as f:
                    # f.write(str(annotation['category_id'])+' '+str(round(x,6))+' '+str(round(y,6))+' '+str(round(w,6))+' '+str(round(h,6)))
                    f.write('6'+' '+str(round(x,6))+' '+str(round(y,6))+' '+str(round(w,6))+' '+str(round(h,6)))

然后每张图片的txt文件将会在对应文件夹中保存，如下图所示:

然后后续需要将每一类别txt文件对应的图片也从整个的文件夹中复制到自定义的目标文件夹中，相应的操作代码为：

import os  
import shutil  
import json  
# 源文件夹路径  
source_folder = 'images'  
  
# 目标文件夹路径  
target_folder = 'category_id8'    

# 读取JSON文件  
with open('train.json', 'r') as f:  
    data = json.load(f)  

for annotation in data['annotations']:
    if annotation['category_id'] == 8:
        image_id = annotation['image_id']
        for images in data['images']:
            if images['id'] == image_id:
                file_name = images['file_name'][:-4]
                # 构建图片文件路径  
                source_image_path = os.path.join(source_folder, file_name + '.jpg')  
                
                # 构建目标图片文件路径  
                target_image_path = os.path.join(target_folder, file_name + '.jpg')  

                # 复制文件  
                shutil.copy2(source_image_path, target_image_path)