将INRIA数据集改为PascalVOC格式

INRIA数据集链接：https://pan.baidu.com/s/1Z2TVvGuuvor7juqj3uPG-g
提取码：07ed

我制作的PascalVOC格式的INRIA数据集链接：https://pan.baidu.com/s/1licLJkXICcwSdAwMb8B0OA
提取码：3rh2

将INRIA数据集改为PascalVOC格式
- PascalVOC格式介绍
- INRIA数据集介绍
- 分别创建对应文件夹
- JPEGImages
- - 将原图像处理为jpg格式保存在JPEGImages文件夹中
- Annotations
- - 将图片信息提取并保存到txt文件中
  - 将txt文本中的信息保存为xml文件
- ImageSets
- - trainval.txt
  - train.txt
  - val.txt

PascalVOC格式介绍

PascalVOC格式数据集由三个部分构成，JPEGImages，Annotations，ImageSets。
JPEGImages：存放的是训练与测试的所有图片。
Annotations：里面存放的是每张图片打完标签所对应的XML文件
ImageSets：里面有个Main文件夹，其中包含存储着图片名称的txt文件，分为训练与测试。

INRIA数据集介绍

此处参考行人检测数据集汇总

该数据库是目前使用最多的静态行人检测数据库，提供原始图片及相应的标注文件。训练集有正样本614张（包含2416个行人），负样本1218张；测试集有正样本288张（包含1126个行人），负样本453张。图片中人体大部分为站立姿势且高度大于100个象素，部分标注可能不正确。图片主要来源于GRAZ-01、个人照片及google，因此图片的清晰度较高。在XP操作系统下部分训练或者测试图片无法看清楚，但可用OpenCV正常读取和显示。

分别创建对应文件夹

JPEGImages

将原图像处理为jpg格式保存在JPEGImages文件夹中

(因为第一次制作数据集，为了省事，我直接把INRIA数据集中的70X134H96和96X160H96中的图片全部放到JPEGImages文件夹中了)

import os
from PIL import Imageimage_path = "JPEGImages/"   # 修改为图片保存路径
image_list = os.listdir(image_path)   # 以列表保存图片名for img in image_list:img = image_path + img   # 图片路径new_img = img[:-3] + "jpg"   # 新图片名im = Image.open(img)   # 打开图片im = im.convert("RGB")   # png为四通道RGBA，jpg为三通道RGBim.save(new_img)   # 保存if img[-3:] == "png":os.remove(img)   # 删除png图片

Annotations

这个地方有点问题，我直接对所有图片进行了批量处理，导致没有提取图片的特征，如果要进行特征提取，就需要一张张处理图片

（此处参考博客：Python 提取图像信息保存为TXT、xml格式）

将图片信息提取并保存到txt文件中

import os
import cv2# 图像处理类  内置各种函数
class image_processing():def __init__(self):self.img_path = "./JPEGImages/"   # 修改为jpg图片保存的路径self.annotations_txt_path = "./"   # 修改为txt文件保存的路径self.annotations_xml_path = "./Annotations/"   # 修改为xml文件保存的路径if not os.path.exists(self.annotations_xml_path):os.makedirs(self.annotations_xml_path)# 图像批量重命名def rename(self):imagelist = os.listdir(self.img_path)total_num = len(imagelist)  # 得到图像数量大小i = 4500for item in imagelist:# print itemif item.endswith('.jpg'):src = os.path.join(os.path.abspath(self.img_path), item)dst = os.path.join(os.path.abspath(self.img_path), '00' + format(str(i), '0>4s') + '.jpg')os.rename(src, dst)print('converting %s to %s ...' % (src, dst))i = i + 1print('total %d to rename & converted %d jpgs' % (total_num, i))# 提取图像的shape到txt文件里def get_image_information(self):image_list = os.listdir(self.img_path)print(len(image_list))file_txt = open('./sex_image_txt.txt', "w")for i in range(len(image_list)):img = cv2.imread(os.path.join(self.img_path + image_list[i]))image_shape = img.shapeprint(image_shape)file_txt.write(image_list[i] + ' ' + '3 ' + '5 ' + '5 ' + str(image_shape[0] - 5) + str(image_shape[1] - 5) + '\n')file_txt.close()if __name__ == '__main__':newname = image_processing()# newname.rename()newname.get_image_information()

将txt文本中的信息保存为xml文件

from xml.dom.minidom import Document
import os
from PIL import Imageann_path = "./sex_image_txt.txt"   # 修改为txt文件路径
img_path = "./JPEGImages/"   # 修改为jpg图片路径
xml_path = "./Annotations/"   # 修改为xml文件路径
database_name = "INRIAPerson Database"# 标签的类别
label_list = ["person"]if not os.path.exists(xml_path):os.mkdir(xml_path)def writeXml(imgname, imgpath, w, h, label_list, wxml, database_name):doc = Document()# ownerannotation = doc.createElement('annotation')doc.appendChild(annotation)# ownerfolder = doc.createElement('folder')annotation.appendChild(folder)folder_txt = doc.createTextNode(database_name)folder.appendChild(folder_txt)filename = doc.createElement('filename')annotation.appendChild(filename)filename_txt = doc.createTextNode(imgname)filename.appendChild(filename_txt)path = doc.createElement('path')annotation.appendChild(path)path_txt = doc.createTextNode(imgpath)path.appendChild(path_txt)# ones#source = doc.createElement('source')annotation.appendChild(source)database = doc.createElement('database')source.appendChild(database)database_txt = doc.createTextNode(database_name)database.appendChild(database_txt)# onee## twos#size = doc.createElement('size')annotation.appendChild(size)width = doc.createElement('width')size.appendChild(width)width_txt = doc.createTextNode(str(w))width.appendChild(width_txt)height = doc.createElement('height')size.appendChild(height)height_txt = doc.createTextNode(str(h))height.appendChild(height_txt)depth = doc.createElement('depth')size.appendChild(depth)depth_txt = doc.createTextNode("3")depth.appendChild(depth_txt)# twoe#segmented = doc.createElement('segmented')annotation.appendChild(segmented)segmented_txt = doc.createTextNode("0")segmented.appendChild(segmented_txt)# objectobject = doc.createElement('object')annotation.appendChild(object)name = doc.createElement('name')object.appendChild(name)name_content = doc.createTextNode(label_list[0])name.appendChild(name_content)pose = doc.createElement('pose')object.appendChild(pose)pose_content = doc.createTextNode("0")pose.appendChild(pose_content)truncated = doc.createElement('truncated')object.appendChild(truncated)truncated_content = doc.createTextNode("0")truncated.appendChild(truncated_content)difficult = doc.createElement('difficult')object.appendChild(difficult)difficult_content = doc.createTextNode("0")difficult.appendChild(difficult_content)bndbox = doc.createElement('bndbox')object.appendChild(bndbox)xmin = doc.createElement('xmin')bndbox.appendChild(xmin)xmin_content = doc.createTextNode(str(5))xmin.appendChild(xmin_content)ymin = doc.createElement('ymin')bndbox.appendChild(ymin)ymin_content = doc.createTextNode(str(5))ymin.appendChild(ymin_content)xmax = doc.createElement('xmax')bndbox.appendChild(xmax)xmax_content = doc.createTextNode(str(w - 5))xmax.appendChild(xmax_content)ymax = doc.createElement('ymax')bndbox.appendChild(ymax)ymax_content = doc.createTextNode(str(h - 5))ymax.appendChild(ymax_content)# threee#with open(wxml, "wb") as f:f.write(doc.toprettyxml(indent='\t', encoding='utf-8'))# f.write(doc.toprettyxml())f.close()returnf = open(ann_path, 'r')
txt_list = f.readlines()
f.close()
im_name_list = []for line in txt_list:line = line.strip()line_split = line.split(' ')# print  lineimg_name = line_split[0]im_name_list.append(img_name)fileimgpath = os.path.join(img_path, img_name)im = Image.open(fileimgpath)width = int(im.size[0])height = int(im.size[1])# print width,height# print label_listsavename = os.path.join(xml_path, img_name.split('.')[0] + '.xml')writeXml(img_name, fileimgpath, width, height, label_list, savename, database_name)

ImageSets

这个文件夹里可以添加其他文件，但只用Main文件就足够运行
Main文件夹下要有三个txt文件，分别是train.txt，val.txt，trainval.txt
train.txt - 训练集图片文件名
val.txt - 测试集图片文件名
trainval.txt - 完整数据集图片文件名

此处我把数据集划分的比例为训练集：测试集=2：1

trainval.txt

import osimage_path = "JPEGImages/"   # 图片路径
filename = "trainval.txt"image_list = os.listdir(image_path)
with open(filename, "w") as f:for im in image_list:s = im[:-4] + " " + "\n"f.write(s)

train.txt

import osimage_path = "JPEGImages/"   # 图片路径
filename = "train.txt"image_list = os.listdir(image_path)i = 0
with open(filename, "w") as f:for im in image_list:if i == 2:i = 0continuei += 1s = im[:-4] + " " + "\n"f.write(s)

val.txt

import osimage_path = "JPEGImages/"   # 图片路径
filename = "val.txt"image_list = os.listdir(image_path)i = 0
with open(filename, "w") as f:for im in image_list:i += 1if i != 3:continueif i == 3:s = im[:-4] + " " + "\n"f.write(s)i = 0