FairMOT训练kitti tracking数据集的汽车类（参考FairVehicle）

工作情况：
kitti数据集的标签（转换成FairMOT类似的gt）

以下是处理图片和gt文本的程序操作，仅做个人记录！！！

1.将原先kitti tracking数据集的标签修改去掉type用文本来表示改成数字代表同时将空格改成逗号

效果图

kitti tracking 标签含义：
The label files contain the following information, which can be read and
written using the matlab tools (readLabels.m) provided within this devkit.
All values (numerical or strings) are separated via spaces, each row
corresponds to one object. The 17 columns represent:#Values    Name      Description
----------------------------------------------------------------------------1    frame        Frame within the sequence where the object appearers1    track id     Unique tracking id of this object within this sequence1    type         Describes the type of object: 'Car', 'Van', 'Truck','Pedestrian', 'Person_sitting', 'Cyclist', 'Tram','Misc' or 'DontCare'1    truncated    Integer (0,1,2) indicating the level of truncation.Note that this is in contrast to the object detectionbenchmark where truncation is a float in [0,1].1    occluded     Integer (0,1,2,3) indicating occlusion state:0 = fully visible, 1 = partly occluded2 = largely occluded, 3 = unknown1    alpha        Observation angle of object, ranging [-pi..pi]4    bbox         2D bounding box of object in the image (0-based index):contains left, top, right, bottom pixel coordinates3    dimensions   3D object dimensions: height, width, length (in meters)3    location     3D object location x,y,z in camera coordinates (in meters)1    rotation_y   Rotation ry around Y-axis in camera coordinates [-pi..pi]1    score        Only for results: Float, indicating confidence indetection, needed for p/r curves, higher is better.

附转换代码：

import os
import numpy as np
import pandas as pd
import os.path as ospdef replace(file, old_content, new_content):content = read_file(file)content = content.replace(old_content, new_content)rewrite_file(file, content)# 读文件内容
def read_file(file):with open(file, encoding='UTF-8') as f:read_all = f.read()f.close()return read_all# 写内容到文件
def rewrite_file(file, data):with open(file, 'w', encoding='UTF-8') as f:f.write(data)f.close()src_data='/media/ckq/data/kitti/MOT/images/train'
seqs = [s for s in os.listdir(src_data)]
#print(seqs)
for seq in seqs:path=osp.join(src_data,seq,'gt/gt.txt')# seq_gt_path = osp.join(src_data, seq, 'gt/gt.txt')# print(seq_gt_path)# gt = np.loadtxt(seq_gt_path, dtype=np.str, delimiter=',')  # 加载成np格式# print(str(gt))replace(path, ' ', ',')replace(path, 'DontCare', '10')replace(path, 'Person', '1')replace(path, 'Pedestrian', '2')replace(path, 'Car', '3')replace(path, 'Person_sitting', '4')replace(path, 'Cyclist', '5')replace(path, 'Van', '6')replace(path, 'Truck', '7')replace(path, 'Tram', '8')replace(path, 'Misc', '9')

2.之后开始给每个数据及的每张图片进行标签：首先说一下我的需求是标注Car： kitti数据集标签生成 gen_lables_kitti_car.py：

附代码（对照FairMOT 和`FairVehicle的生成标签代码写）：

import os.path as osp
import os
import shutil
import numpy as npdef mkdirs(d):# if not osp.exists(d):if not osp.isdir(d):os.makedirs(d)data_root = '/media/ckq/data/kitti/'
seq_root = data_root + 'MOT/images/train'
label_root = data_root + 'MOT/labels_with_ids/train'if not os.path.isdir(label_root):mkdirs(label_root)
else:  # 如果之前已经生成过: 递归删除目录和文件, 重新生成目录shutil.rmtree(label_root)os.makedirs(label_root)cls_map = {'Person=1''Pedestrian=2''Car=3''Person_sitting=4''Cyclist=5''Van=6''Truck=7''Tram=8''Misc=9''DontCare=10'
}print("Dir %s made" % label_root)
#seqs = [s for s in os.listdir(seq_root)]
#seqs=['0000']seqs=['0000', '0001', '0002', '0003','0004', '0005', '0006', '0007','0008', '0009', '0010', '0011','0012', '0014', '0015', '0018','0019', '0020']
#打印序列
print(seqs)tid_curr = 0
tid_last = -1
total_track_id_num = 0
for seq in seqs:  # 每段视频都对应一个gt.txtprint("Process %s, " % seq, end='')seq_info_path = osp.join(seq_root, seq, 'seqinfo.ini')   #提取每个数据的info信息 /media/ckq/data/kitti/MOT/images/train#print(seq_info_path)with open(seq_info_path) as seq_info_h:  # 读取 *.ini 文件seq_info = seq_info_h.read()seq_width = int(seq_info[seq_info.find('imWidth=') + 8:seq_info.find('\nimHeight')])  # 视频的宽seq_height = int(seq_info[seq_info.find('imHeight=') + 9:seq_info.find('\nimExt')])  # 视频的高#print('seq_width:',seq_width)#print('seq_height:', seq_height)gt_txt = osp.join(seq_root, seq, 'gt', 'gt.txt')  # 读取GT文件#print(gt_txt)  #打印路径#gt = np.loadtxt(gt_txt, dtype=np.str, delimiter=',')  # 加载成np格式gt = np.loadtxt(gt_txt, dtype=np.float64, delimiter=',')  # 加载成np格式# print(gt)  #打印文本内容# print('gt.T')# print(gt.T) #也是打印文本内容idx = np.lexsort(gt.T[:2, :])  # 优先按照track id排序(对视频帧进行排序, 而后对轨迹ID进行排序)# print(idx)gt = gt[idx, :]tr_ids = set(gt[:, 1])print("%d track ids in seq %s" % (len(tr_ids), seq))total_track_id_num += len(tr_ids)  # track id统计数量如何正确计算？seq_label_root = osp.join(label_root, seq, 'img1')mkdirs(seq_label_root)# 读取GT数据的每一行(一行即一条数据)# for fid, tid, x, y, w, h, mark, cls, vis_ratio in gt:for fid, tid, type, truncated, occluded, alpha, \bbox_left, bbox_top, bbox_right ,bbox_bottom ,_,_,_,_,_,_,_ in gt:#height, width, length , location_x,location_y,location_z , rotation_y in gt:# frame_id, track_id, top, left, width, height, mark, class, visibility ratio#if cls != 3:  # 我们需要Car的标注数据if type != 3:  # 我们需要Car的标注数据continue# if mark == 0:  # mark为0时忽略(不在当前帧的考虑范围)#     continue# if vis_ratio <= 0.2:#     continuefid = int(fid)tid = int(tid)# 判断是否是同一个track, 记录上一个track和当前trackif not tid == tid_last:  # not 的优先级比 == 高tid_curr += 1tid_last = tid#由于kitti标签与训练标签参数有点不同 需要自己计算 x y w hw=float(bbox_right-bbox_left)h=float(bbox_bottom-bbox_top)x=int(bbox_left+0.5)y=int(bbox_top+0.5)# bbox中心点坐标x += w / 2y += h / 2# 网label中写入track id, bbox中心点坐标和宽高(归一化到0~1)# 第一列的0是默认只对一种类别进行多目标检测跟踪(0是类别)label_str = '0 {:d} {:.6f} {:.6f} {:.6f} {:.6f}\n'.format(tid_curr,x / seq_width,   # center_xy / seq_height,  # center_yw / seq_width,   # bbox_wh / seq_height)  # bbox_h# print(label_str.strip())label_f_path = osp.join(seq_label_root, '{:06d}.txt'.format(fid))with open(label_f_path, 'a') as f:  # 以追加的方式添加每一帧的labelf.write(label_str)print("Total %d track ids in this dataset" % total_track_id_num)
print('Done')kitti数据集标签生成 gen_lables_kitti_car.py：(Car Van Truck)
import os.path as osp
import os
import shutil
import numpy as npdef mkdirs(d):# if not osp.exists(d):if not osp.isdir(d):os.makedirs(d)data_root = '/media/ckq/data/kitti/'
seq_root = data_root + 'MOT/images/train'
label_root = data_root + 'MOT/labels_with_ids/train'if not os.path.isdir(label_root):mkdirs(label_root)
else:  # 如果之前已经生成过: 递归删除目录和文件, 重新生成目录shutil.rmtree(label_root)os.makedirs(label_root)cls_map = {'Person=1''Pedestrian=2''Car=3''Person_sitting=4''Cyclist=5''Van=6''Truck=7''Tram=8''Misc=9''DontCare=10'
}print("Dir %s made" % label_root)
#seqs = [s for s in os.listdir(seq_root)]
#seqs=['0000']seqs=['0000', '0001', '0002', '0003','0004', '0005', '0006', '0007','0008', '0009', '0010', '0011','0012', '0014', '0015', '0018','0019', '0020']
#打印序列
print(seqs)tid_curr = 0
tid_last = -1
total_track_id_num = 0
for seq in seqs:  # 每段视频都对应一个gt.txtprint("Process %s, " % seq, end='')seq_info_path = osp.join(seq_root, seq, 'seqinfo.ini')   #提取每个数据的info信息 /media/ckq/data/kitti/MOT/images/train#print(seq_info_path)with open(seq_info_path) as seq_info_h:  # 读取 *.ini 文件seq_info = seq_info_h.read()seq_width = int(seq_info[seq_info.find('imWidth=') + 8:seq_info.find('\nimHeight')])  # 视频的宽seq_height = int(seq_info[seq_info.find('imHeight=') + 9:seq_info.find('\nimExt')])  # 视频的高#print('seq_width:',seq_width)#print('seq_height:', seq_height)gt_txt = osp.join(seq_root, seq, 'gt', 'gt.txt')  # 读取GT文件#print(gt_txt)  #打印路径#gt = np.loadtxt(gt_txt, dtype=np.str, delimiter=',')  # 加载成np格式gt = np.loadtxt(gt_txt, dtype=np.float64, delimiter=',')  # 加载成np格式# print(gt)  #打印文本内容# print('gt.T')# print(gt.T) #也是打印文本内容idx = np.lexsort(gt.T[:2, :])  # 优先按照track id排序(对视频帧进行排序, 而后对轨迹ID进行排序)# print(idx)gt = gt[idx, :]tr_ids = set(gt[:, 1])print("%d track ids in seq %s" % (len(tr_ids), seq))total_track_id_num += len(tr_ids)  # track id统计数量如何正确计算？seq_label_root = osp.join(label_root, seq, 'img1')mkdirs(seq_label_root)# 读取GT数据的每一行(一行即一条数据)# for fid, tid, x, y, w, h, mark, cls, vis_ratio in gt:for fid, tid, type, truncated, occluded, alpha, \bbox_left, bbox_top, bbox_right ,bbox_bottom ,_,_,_,_,_,_,_ in gt:#height, width, length , location_x,location_y,location_z , rotation_y in gt:# frame_id, track_id, top, left, width, height, mark, class, visibility ratio#if cls != 3:  # 我们需要Car的标注数据#if type != 3:  # 我们需要Car的标注数据flag =(type == 3 or type == 6 or type == 7)  #只要一个符合要求就是真的#print("flag:")#print(flag)if flag==False:  # 我们需要Car Van Truck的标注数据continue# if mark == 0:  # mark为0时忽略(不在当前帧的考虑范围)#     continue# if vis_ratio <= 0.2:#     continuefid = int(fid)tid = int(tid)# 判断是否是同一个track, 记录上一个track和当前trackif not tid == tid_last:  # not 的优先级比 == 高tid_curr += 1tid_last = tid#由于kitti标签与训练标签参数有点不同 需要自己计算 x y w hw=float(bbox_right-bbox_left)h=float(bbox_bottom-bbox_top)x=int(bbox_left+0.5)y=int(bbox_top+0.5)# bbox中心点坐标x += w / 2y += h / 2# 网label中写入track id, bbox中心点坐标和宽高(归一化到0~1)# 第一列的0是默认只对一种类别进行多目标检测跟踪(0是类别)label_str = '0 {:d} {:.6f} {:.6f} {:.6f} {:.6f}\n'.format(tid_curr,x / seq_width,   # center_xy / seq_height,  # center_yw / seq_width,   # bbox_wh / seq_height)  # bbox_h# print(label_str.strip())label_f_path = osp.join(seq_label_root, '{:06d}.txt'.format(fid))with open(label_f_path, 'a') as f:  # 以追加的方式添加每一帧的labelf.write(label_str)print("Total %d track ids in this dataset" % total_track_id_num)
print('Done')

效果图：

其实处理好的时候很奇怪就拿kitti的0000数据来说
在该数据集下其实有154张图片，尽管kitti提够了该数据集所有的标签（每一帧都有标签）但是实际上他所提供的标签有些不是我需要的汽车标签故导致0到108的标签没有（gt.txt）
备注：有几个数据集可能某几帧根本没有标签，在gt.txt标签文件里直接跳过！！

标注完的`效果图：
验证自己处理的标签是否标注正确：

参考链接：整一个生成标签过程https://blog.csdn.net/sinat_33486980/article/details/105684839

只有car:

最后用了这个转换代码生成识别车的种类多一点！！！
含有：Car Van Truck

附代码：（图片的路径下我只放一个数据集用来测试自己标注是否正确）

# -*- coding:utf-8 -*-
import os
import cv2
import os.path as osp
'''
显示跟踪训练数据集标注
'''
root_path = "/home/ckq/Desktop/MOT"
img_dir = "images/train"
label_dir = "labels_with_ids/train"imgs = os.listdir(root_path + "/" + img_dir)  #遍历图片数据集列表  0000 0001........
imgs.sort()
for i, img in enumerate(imgs):  #一个一个遍历#img_name = img[:-1]   #img[:-1] -1代表从右往左 第一个不取#print(img)img_name=img #每个图片集名字print(img_name)label_path=osp.join(root_path,label_dir,img_name,'img1')print(label_path)label_gts_name=os.listdir(label_path)label_gts_name.sort()print(label_gts_name)for frame_gt in label_gts_name:#print(frame_gt)frame_gt_name=frame_gt[:6]#print(frame_gt_name)label_f = open(label_path + "/" +frame_gt_name+".txt", "r") #路劲标签名#print(label_f)lines = label_f.readlines()print(lines)#print(root_path + "/" + img_dir + "/" + img+"/img/"+frame_gt_name)img_data = cv2.imread(root_path + "/" + img_dir + "/" + img+"/img/"+frame_gt_name+".png") #gt对应的图片序号#print(img_data)H, W, C = img_data.shape# print(H)# print(W)# print(C)for line in lines:line_list = line.strip().split()class_num = int(line_list[0])  # 类别号obj_ID = int(line_list[1])  # 目标IDx, y, w, h = line_list[2:]  # 中心坐标，宽高（经过原图宽高归一化后）x = int(float(x) * W)y = int(float(y) * H)w = int(float(w) * W)h = int(float(h) * H)left = int(x - w / 2)top = int(y - h / 2)right = left + wbottom = top + hcv2.circle(img_data, (x, y), 1, (0, 0, 255))cv2.rectangle(img_data, (left, top), (right, bottom), (0, 255, 0), 2)cv2.putText(img_data, str(obj_ID), (left, top), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 255), 1)resized_img = cv2.resize(img_data, (800, 416))cv2.imshow("label", resized_img)cv2.waitKey(100)

3.接下来生成训练文件：

效果图：

附代码：

import os
import os.path as osp
image_flder = "/home/ckq/Desktop/MOT/images/train"
imgs = os.listdir(image_flder)
#print(imgs)
train_f = open("/home/ckq/Desktop/MOT/kitt_car.train", "w")for img_name in imgs:image_path=osp.join(image_flder,img_name,'img')print(image_path)image_names=os.listdir(image_path)image_names.sort()print(image_names)for image_name in image_names:save_str = image_path + '/' + image_name +"\n"print(save_str)train_f.write(save_str)train_f.close()

这里附加一个 png格式的图片转换成jpg格式图片：

效果图：

附代码：

import os
import sys
import os.path as osp
import shutil
from PIL import Image
import os.path as ospdef mkdirs(d):# if not osp.exists(d):if not osp.isdir(d):os.makedirs(d)
# input_folder = "/home/ckq/Desktop/MOT/images/train/0000/img"  # 源文件夹，包含.png格式图片
# output_folder = "/home/ckq/Desktop/MOT/images/train/0000/jpg"  # 输出文件夹
src_folder = "/media/ckq/data/kitti/MOT/images/train"  # 源文件夹，包含.png格式图片
dist_folder = "/media/ckq/data/kitti/MOT_new/images/train"  # 输出文件夹src_folder_names=os.listdir(src_folder)
print(src_folder_names)
for src_folder_name in src_folder_names:input_folder = osp.join(src_folder,src_folder_name,'img1')#print(input_folder)output_folder = osp.join(dist_folder,src_folder_name,'img1')  # 输出文件夹if not os.path.isdir(output_folder):mkdirs(output_folder)else:  # 如果之前已经生成过: 递归删除目录和文件, 重新生成目录shutil.rmtree(output_folder)os.makedirs(output_folder)print(output_folder)a = []for root, dirs, files in os.walk(input_folder):for filename in (x for x in files if x.endswith('.png')):filepath = os.path.join(root, filename)object_class = filename.split('.')[0]a.append(object_class)print(a)for i in a:old_path = input_folder + "/" + str(i) + '.png'new_path = output_folder + "/" + str(i) + '.jpg'img = Image.open(old_path)img.save(new_path)

参考：转图片格式

结果发现我的第一帧是从00000开始而代码是从000001开始的
则图片重新命名

代码：

import re
import sys
import os
import os.path as ospdef renameall(path):fileList = os.listdir(path)  # 待修改文件夹print("修改前：" + str(fileList))  # 输出文件夹中包含的文件os.chdir(path)  # 将当前工作目录修改为待修改文件夹的位置num = 1  # 名称变量for fileName in fileList:  # 遍历文件夹中所有文件pat = ".+\.(jpg|jpeg|JPG)"  # 匹配文件名正则表达式pattern = re.findall(pat, fileName)  # 进行匹配print('pattern[0]:', pattern)print('num：', num, 'filename:', fileName)name = str(num).zfill(6)  # 设置宽度#name = numos.rename(fileName, ('img' + name +'.'+ pattern[0]))  # 文件重新命名num = num + 1  # 改变编号，继续下一项print("---------------------------------------------------")sys.stdin.flush()  # 刷新print("修改后：" + str(os.listdir(path)))  # 输出修改后文件夹中包含的文件#path = '/home/ckq/Desktop/MOT/images/train/0000/img1'  #测试
src_path='/media/ckq/data/kitti/MOT/images/train'
imgs_name = os.listdir(src_path)
imgs_name.sort()
print(imgs_name)
for img_name in imgs_name:img_path=osp.join(src_path,img_name,'img1')print(img_path)renameall(img_path)