labelme生成的标注数据转换成yolov5格式

本文中的代码旨在一键生成yolov5数据集的格式

使用labelme标注的json数据会生成在标注时图像文件所在的路径下，数据形式大概是这样的：

json文件和图像数据同名。

而yolov5实际训练时使用的数据格式是这样的：

网上大部分代码都是将yolov5标注格式的txt生成在根目录下，这样在生成txt文件后还需要手动整理成yolov5可训练的文件形式，下面的代码旨在减少人工处理的时间，一键生成可直接训练的文件形式。

# -*- coding: utf-8 -*-
"""
Time:     2021.10.26
Author:   Athrunsunny
Version:  V 0.1
File:     toyolo.py
Describe: Functions in this file is change the dataset format to yolov5
"""import os
import numpy as np
import json
from glob import glob
import cv2
import shutil
import yaml
from sklearn.model_selection import train_test_split
from tqdm import tqdmROOT_DIR = os.getcwd()def change_image_format(label_path=ROOT_DIR, suffix='.jpg'):"""统一当前文件夹下所有图像的格式，如'.jpg':param suffix: 图像文件后缀:param label_path:当前文件路径:return:"""externs = ['png', 'jpg', 'JPEG', 'BMP', 'bmp']files = list()for extern in externs:files.extend(glob(label_path + "\\*." + extern))for file in files:name = ''.join(file.split('.')[:-1])file_suffix = file.split('.')[-1]if file_suffix != suffix.split('.')[-1]:new_name = name + suffiximage = cv2.imread(file)cv2.imwrite(new_name, image)os.remove(file)def get_all_class(file_list, label_path=ROOT_DIR):"""从json文件中获取当前数据的所有类别:param file_list:当前路径下的所有文件名:param label_path:当前文件路径:return:"""classes = list()for filename in tqdm(file_list):json_path = os.path.join(label_path, filename + '.json')json_file = json.load(open(json_path, "r", encoding="utf-8"))for item in json_file["shapes"]:label_class = item['label']if label_class not in classes:classes.append(label_class)print('read file done')return classesdef split_dataset(label_path, test_size=0.3, isUseTest=False, useNumpyShuffle=False):"""将文件分为训练集，测试集和验证集:param useNumpyShuffle: 使用numpy方法分割数据集:param test_size: 分割测试集或验证集的比例:param isUseTest: 是否使用测试集，默认为False:param label_path:当前文件路径:return:"""files = glob(label_path + "\\*.json")files = [i.replace("\\", "/").split("/")[-1].split(".json")[0] for i in files]if useNumpyShuffle:file_length = len(files)index = np.arange(file_length)np.random.seed(32)np.random.shuffle(index)test_files = Noneif isUseTest:trainval_files, test_files = np.array(files)[index[:int(file_length * (1 - test_size))]], np.array(files)[index[int(file_length * (1 - test_size)):]]else:trainval_files = filestrain_files, val_files = np.array(trainval_files)[index[:int(len(trainval_files) * (1 - test_size))]], \np.array(trainval_files)[index[int(len(trainval_files) * (1 - test_size)):]]else:test_files = Noneif isUseTest:trainval_files, test_files = train_test_split(files, test_size=test_size, random_state=55)else:trainval_files = filestrain_files, val_files = train_test_split(trainval_files, test_size=test_size, random_state=55)return train_files, val_files, test_files, filesdef create_save_file(label_path=ROOT_DIR):"""按照训练时的图像和标注路径创建文件夹:param label_path:当前文件路径:return:"""# 生成训练集train_image = os.path.join(label_path, 'train', 'images')if not os.path.exists(train_image):os.makedirs(train_image)train_label = os.path.join(label_path, 'train', 'labels')if not os.path.exists(train_label):os.makedirs(train_label)# 生成验证集val_image = os.path.join(label_path, 'valid', 'images')if not os.path.exists(val_image):os.makedirs(val_image)val_label = os.path.join(label_path, 'valid', 'labels')if not os.path.exists(val_label):os.makedirs(val_label)# 生成测试集test_image = os.path.join(label_path, 'test', 'images')if not os.path.exists(test_image):os.makedirs(test_image)test_label = os.path.join(label_path, 'test', 'labels')if not os.path.exists(test_label):os.makedirs(test_label)return train_image, train_label, val_image, val_label, test_image, test_labeldef convert(size, box):dw = 1. / (size[0])dh = 1. / (size[1])x = (box[0] + box[1]) / 2.0 - 1y = (box[2] + box[3]) / 2.0 - 1w = box[1] - box[0]h = box[3] - box[2]x = x * dww = w * dwy = y * dhh = h * dhreturn x, y, w, hdef push_into_file(file, images, labels, label_path=ROOT_DIR, suffix='.jpg'):"""最终生成在当前文件夹下的所有文件按image和label分别存在到训练集/验证集/测试集路径的文件夹下:param file: 文件名列表:param images: 存放images的路径:param labels: 存放labels的路径:param label_path: 当前文件路径:param suffix: 图像文件后缀:return:"""for filename in file:image_file = os.path.join(label_path, filename + suffix)label_file = os.path.join(label_path, filename + '.txt')if not os.path.exists(os.path.join(images, filename + suffix)):try:shutil.move(image_file, images)except OSError:passif not os.path.exists(os.path.join(labels, filename + suffix)):try:shutil.move(label_file, labels)except OSError:passdef json2txt(classes, txt_Name='allfiles', label_path=ROOT_DIR, suffix='.jpg'):"""将json文件转化为txt文件，并将json文件存放到指定文件夹:param classes: 类别名:param txt_Name:txt文件，用来存放所有文件的路径:param label_path:当前文件路径:param suffix:图像文件后缀:return:"""store_json = os.path.join(label_path, 'json')if not os.path.exists(store_json):os.makedirs(store_json)_, _, _, files = split_dataset(label_path)if not os.path.exists(os.path.join(label_path, 'tmp')):os.makedirs(os.path.join(label_path, 'tmp'))list_file = open('tmp/%s.txt' % txt_Name, 'w')for json_file_ in tqdm(files):json_filename = os.path.join(label_path, json_file_ + ".json")imagePath = os.path.join(label_path, json_file_ + suffix)list_file.write('%s\n' % imagePath)out_file = open('%s/%s.txt' % (label_path, json_file_), 'w')json_file = json.load(open(json_filename, "r", encoding="utf-8"))if os.path.exists(imagePath):height, width, channels = cv2.imread(imagePath).shapefor multi in json_file["shapes"]:if len(multi["points"][0]) == 0:out_file.write('')continuepoints = np.array(multi["points"])xmin = min(points[:, 0]) if min(points[:, 0]) > 0 else 0xmax = max(points[:, 0]) if max(points[:, 0]) > 0 else 0ymin = min(points[:, 1]) if min(points[:, 1]) > 0 else 0ymax = max(points[:, 1]) if max(points[:, 1]) > 0 else 0label = multi["label"]if xmax <= xmin:passelif ymax <= ymin:passelse:cls_id = classes.index(label)b = (float(xmin), float(xmax), float(ymin), float(ymax))bb = convert((width, height), b)out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')# print(json_filename, xmin, ymin, xmax, ymax, cls_id)if not os.path.exists(os.path.join(store_json, json_file_ + '.json')):try:shutil.move(json_filename, store_json)except OSError:passdef create_yaml(classes, label_path, isUseTest=False):nc = len(classes)if not isUseTest:desired_caps = {'path': label_path,'train': 'train/images','val': 'valid/images','nc': nc,'names': classes}else:desired_caps = {'path': label_path,'train': 'train/images','val': 'valid/images','test': 'test/images','nc': nc,'names': classes}yamlpath = os.path.join(label_path, "data" + ".yaml")# 写入到yaml文件with open(yamlpath, "w+", encoding="utf-8") as f:for key, val in desired_caps.items():yaml.dump({key: val}, f, default_flow_style=False)# 首先确保当前文件夹下的所有图片统一后缀，如.jpg，如果为其他后缀，将suffix改为对应的后缀，如.png
def ChangeToYolo5(label_path=ROOT_DIR, suffix='.jpg', test_size=0.1, isUseTest=False):"""生成最终标准格式的文件:param test_size: 分割测试集或验证集的比例:param label_path:当前文件路径:param suffix: 文件后缀名:param isUseTest: 是否使用测试集:return:"""change_image_format(label_path)train_files, val_files, test_file, files = split_dataset(label_path, test_size=test_size, isUseTest=isUseTest)classes = get_all_class(files)json2txt(classes)create_yaml(classes, label_path, isUseTest=isUseTest)train_image, train_label, val_image, val_label, test_image, test_label = create_save_file(label_path)push_into_file(train_files, train_image, train_label, suffix=suffix)push_into_file(val_files, val_image, val_label, suffix=suffix)if test_file is not None:push_into_file(test_file, test_image, test_label, suffix=suffix)print('create dataset done')if __name__ == "__main__":ChangeToYolo5()

在保存图像的目录下，创建toyolo.py文件，将以上代码拷贝粘贴。

运行时先确保相应的库已经安装，运行后生成的文件目录如下：

生成的data.yaml可以直接复制到\yolov5\data目录下，tmp目录主要是处理的图像名，

json主要是原始标注生成的json

labelme生成的标注数据转换成yolov5格式相关推荐

python批量实现labelImg标注的 xml格式数据转换成 txt格式保存
labelImg标注的 xml格式数据如下: 单个xml文件数据打开如下: python实现labelImg标注的 xml格式数据转换成 txt格式数据的代码xml2txt.py如下: # -*- c ...
TensorFlow学习笔记之 bmp格式、txt格式数据转换成tfrecord 格式
目录一.前言二.bmp 格式数据转换成 tfrecord 格式的代码三.txt 格式数据转换成 tfrecord 格式的代码一.前言之前我们讲过了关于 tfrecord 格式的相关内容,在这 ...
windows系统下如何把excel数据转换成markdown格式的表格
如图,假设我有一个excel表格,想把里面的数据转换成markdown格式的表格: 只需选中excel表格里这些数据, 打开typero这个软件: 按Ctrl V,excel表格的数据就自动被转换成了 ...
把php数据转成json格式转换,php将从数据库中获得的数据转换成json格式并输出的方法...
php将从数据库中获得的数据转换成json格式并输出的方法如下所示: header('content-type:application/json;charset=utf8'); $results = ...
利用ffmpeg把一帧原始视频数据转换成jpg格式的图片
利用ffmpeg对一帧原始的视频数据转换成jpg格式的图片,保存到本地,用于Android显示 #include <jni.h> #include <stdio.h> #inc ...
重庆三调工作中将mdb数据转换成vct格式的分析
重庆三调工作中将mdb数据转换成vctg格式的分析重庆的三调工作已经步入数据汇集的进程中了,最终要提交的成果数据格式为vct格式,这个格式是一个明文书写格式,即该格式不会将数据封装到各种软件数据格式 ...
json格式的数据转换成数组格式。
1.这个方法的作用就是将json格式的数据转换成数组格式. 2.,假设有Person这个类,有json类型数据str=str = [{"name":"张三",& ...
将Excel中的数据转换成JSON格式
将Excel中的数据转换成JSON格式第一步:下载jxl.jar包 <dependency> <groupId>net.sourceforge.jexcelapi</g ...
【工具】Excel 表格数据转换成Json格式的实用工具 excel2json
介绍 excel2json工具是用C# 语言开发的能够把 Excel 表转换成 json 的工具主要功能: 支持读取 Excel 97-2003的 .xls格式和2007的 .xlsx格式: 支持多 ...

labelme生成的标注数据转换成yolov5格式

labelme生成的标注数据转换成yolov5格式相关推荐

最新文章

热门文章