







import os
import shutil
import cv2catogory = ['ship']  #指定类别的名称def custombasename(fullname):  return os.path.basename(os.path.splitext(fullname)[0])  def GetFileFromThisRootDir(dir,ext = None):  allfiles = []  needExtFilter = (ext != None)  for root,dirs,files in os.walk(dir):  for filespath in files:  filepath = os.path.join(root, filespath)  extension = os.path.splitext(filepath)[1][1:]  if needExtFilter and extension in ext:  allfiles.append(filepath)  elif not needExtFilter:  allfiles.append(filepath)  return allfiles  if __name__ == '__main__':root1 = 'H:/DOTA_biqi/Org_data/DOTA/train'pic_path = os.path.join(root1, 'images') #样本图片路径label_path = os.path.join(root1, 'labelTxt') #DOTA标签的所在路径label_list = GetFileFromThisRootDir(label_path)helicopter_pic = 'C:/Users/wytwh/Desktop/ship/train/images'helicopter_label = 'C:/Users/wytwh/Desktop/ship/train/labelTxt'for labelpath in label_list:n = 0f = open(labelpath,'r')lines = f.readlines()splitlines = [x.strip().split(' ') for x in lines]  #根据空格分割for i, splitline  in enumerate(splitlines):if i in [0,1]:  #DOTA数据集前两行对于我们来说是无用的continuecatogory_name = splitline[8]  #类别名称if catogory_name in catogory:n = n+1if n>2:   #样本包含两个及以上的再挑选出来name = custombasename(labelpath)  #名称oldlabelpath = labelpath oldimgpath = os.path.join(pic_path, name+ '.png') img = cv2.imread(oldimgpath) newlabelpath = os.path.join(helicopter_label, name+'.txt')            newimage_path = os.path.join(helicopter_pic, name + '.tif')  #如果要改变图像的后缀,就采用重写的方法           cv2.imwrite(newimage_path, img)#shutil.copyfile(oldimgpath, newimage_path)shutil.copyfile(oldlabelpath, newlabelpath)  break





import os
import codecs
import numpy as np
import math
from dota_utils import GetFileFromThisRootDir
import cv2
import shapely.geometry as shgeo
import dota_utils as util
import copydef choose_best_pointorder_fit_another(poly1, poly2):"""To make the two polygons best fit with each point"""x1 = poly1[0]y1 = poly1[1]x2 = poly1[2]y2 = poly1[3]x3 = poly1[4]y3 = poly1[5]x4 = poly1[6]y4 = poly1[7]combinate = [np.array([x1, y1, x2, y2, x3, y3, x4, y4]), np.array([x2, y2, x3, y3, x4, y4, x1, y1]),np.array([x3, y3, x4, y4, x1, y1, x2, y2]), np.array([x4, y4, x1, y1, x2, y2, x3, y3])]dst_coordinate = np.array(poly2)distances = np.array([np.sum((coord - dst_coordinate)**2) for coord in combinate])sorted = distances.argsort()return combinate[sorted[0]]def cal_line_length(point1, point2):return math.sqrt( math.pow(point1[0] - point2[0], 2) + math.pow(point1[1] - point2[1], 2))class splitbase():def __init__(self,basepath,outpath,code = 'utf-8',gap=100,subsize=1024,thresh=0.7,choosebestpoint=True,):""":param basepath: base path for dota data:param outpath: output base path for dota data,the basepath and outputpath have the similar subdirectory, 'images' and 'labelTxt':param code: encodeing format of txt file:param gap: overlap between two patches  子图间的重叠(防止目标被截断):param subsize: subsize of patch   子图的大小:param thresh: the thresh determine whether to keep the instance if the instance is cut down in the process of split:param choosebestpoint: used to choose the first point for the:param ext: ext for the image format"""self.basepath = basepathself.outpath = outpathself.code = codeself.gap = gapself.subsize = subsizeself.slide = self.subsize - self.gapself.thresh = threshself.imagepath = os.path.join(self.basepath, 'images')self.labelpath = os.path.join(self.basepath, 'labelTxt')self.outimagepath = os.path.join(self.outpath, 'images')self.outlabelpath = os.path.join(self.outpath, 'labelTxt')self.choosebestpoint = choosebestpointif not os.path.exists(self.outimagepath):os.makedirs(self.outimagepath)if not os.path.exists(self.outlabelpath):os.makedirs(self.outlabelpath)## point: (x, y), rec: (xmin, ymin, xmax, ymax)# def __del__(self):#     self.f_sub.close()## grid --> (x, y) position of gridsdef polyorig2sub(self, left, up, poly):polyInsub = np.zeros(len(poly))for i in range(int(len(poly)/2)):polyInsub[i * 2] = int(poly[i * 2] - left)polyInsub[i * 2 + 1] = int(poly[i * 2 + 1] - up)return polyInsubdef calchalf_iou(self, poly1, poly2):"""It is not the iou on usual, the iou is the value of intersection over poly1"""inter_poly = poly1.intersection(poly2)inter_area = inter_poly.areapoly1_area = poly1.areahalf_iou = inter_area / poly1_areareturn inter_poly, half_ioudef saveimagepatches(self, img, subimgname, left, up, ext):subimg = copy.deepcopy(img[up: (up + self.subsize), left: (left + self.subsize)])outdir = os.path.join(self.outimagepath, subimgname + ext)cv2.imwrite(outdir, subimg)def GetPoly4FromPoly5(self, poly):distances = [cal_line_length((poly[i * 2], poly[i * 2 + 1] ), (poly[(i + 1) * 2], poly[(i + 1) * 2 + 1])) for i in range(int(len(poly)/2 - 1))]distances.append(cal_line_length((poly[0], poly[1]), (poly[8], poly[9])))pos = np.array(distances).argsort()[0]count = 0outpoly = []while count < 5:#print('count:', count)if (count == pos):outpoly.append((poly[count * 2] + poly[(count * 2 + 2)%10])/2)outpoly.append((poly[(count * 2 + 1)%10] + poly[(count * 2 + 3)%10])/2)count = count + 1elif (count == (pos + 1)%5):count = count + 1continueelse:outpoly.append(poly[count * 2])outpoly.append(poly[count * 2 + 1])count = count + 1return outpolydef savepatches(self, resizeimg, objects, subimgname, left, up, right, down, ext):outdir = os.path.join(self.outlabelpath, subimgname + '.txt')mask_poly = []imgpoly = shgeo.Polygon([(left, up), (right, up), (right, down),(left, down)])with codecs.open(outdir, 'w', self.code) as f_out:for obj in objects:gtpoly = shgeo.Polygon([(obj['poly'][0], obj['poly'][1]),(obj['poly'][2], obj['poly'][3]),(obj['poly'][4], obj['poly'][5]),(obj['poly'][6], obj['poly'][7])])if (gtpoly.area <= 0):continueinter_poly, half_iou = self.calchalf_iou(gtpoly, imgpoly)# print('writing...')if (half_iou == 1):polyInsub = self.polyorig2sub(left, up, obj['poly'])outline = ' '.join(list(map(str, polyInsub)))outline = outline + ' ' + obj['name'] + ' ' + str(obj['difficult'])f_out.write(outline + '\n')elif (half_iou > 0):#elif (half_iou > self.thresh):##  print('<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<')inter_poly = shgeo.polygon.orient(inter_poly, sign=1)out_poly = list(inter_poly.exterior.coords)[0: -1]if len(out_poly) < 4:continueout_poly2 = []for i in range(len(out_poly)):out_poly2.append(out_poly[i][0])out_poly2.append(out_poly[i][1])if (len(out_poly) == 5):#print('==========================')out_poly2 = self.GetPoly4FromPoly5(out_poly2)elif (len(out_poly) > 5):"""if the cut instance is a polygon with points more than 5, we do not handle it currently"""continueif (self.choosebestpoint):out_poly2 = choose_best_pointorder_fit_another(out_poly2, obj['poly'])polyInsub = self.polyorig2sub(left, up, out_poly2)for index, item in enumerate(polyInsub):if (item <= 1):polyInsub[index] = 1elif (item >= self.subsize):polyInsub[index] = self.subsizeoutline = ' '.join(list(map(str, polyInsub)))if (half_iou > self.thresh):outline = outline + ' ' + obj['name'] + ' ' + str(obj['difficult'])#else:## if the left part is too small, label as '2'#outline = outline + ' ' + obj['name'] + ' ' + '2'f_out.write(outline + '\n') #对于分割后不足thresh的目标(difficult==2)的除去#f_out.write(outline + '\n')#else:#   mask_poly.append(inter_poly)self.saveimagepatches(resizeimg, subimgname, left, up, ext)def SplitSingle(self, imgpath, rate):"""split a single image and ground truth:param name: image name:param rate: the resize scale for the image:param extent: the image format:return:"""img = cv2.imread(imgpath)name = util.custombasename(imgpath)  #得到图片的名称extent = os.path.splitext(imgpath)[-1] #得到图片的后缀if np.shape(img) == ():returnfullname = os.path.join(self.labelpath, name + '.txt')objects = util.parse_dota_poly2(fullname)for obj in objects:obj['poly'] = list(map(lambda x:rate*x, obj['poly']))#obj['poly'] = list(map(lambda x: ([2 * y for y in x]), obj['poly']))if (rate != 1):resizeimg = cv2.resize(img, None, fx=rate, fy=rate, interpolation = cv2.INTER_CUBIC)else:resizeimg = imgoutbasename = name + '__' + str(rate) + '__'weight = np.shape(resizeimg)[1]height = np.shape(resizeimg)[0]left, up = 0, 0while (left < weight):if (left + self.subsize >= weight):left = max(weight - self.subsize, 0)up = 0while (up < height):if (up + self.subsize >= height):up = max(height - self.subsize, 0)right = min(left + self.subsize, weight - 1)down = min(up + self.subsize, height - 1)subimgname = outbasename + str(left) + '___' + str(up)# self.f_sub.write(name + ' ' + subimgname + ' ' + str(left) + ' ' + str(up) + '\n')self.savepatches(resizeimg, objects, subimgname, left, up, right, down, extent)if (up + self.subsize >= height):breakelse:up = up + self.slideif (left + self.subsize >= weight):breakelse:left = left + self.slidedef splitdata(self, rate):""":param rate: resize rate before cut"""imagelists = GetFileFromThisRootDir(self.imagepath)for imgpath in imagelists:print('正在处理 %s'%imgpath)self.SplitSingle(imgpath, rate)if __name__ == '__main__':# example usage of ImgSplitsplit = splitbase(r'/home/yantianwang/lala/ship/train',r'/home/yantianwang/lala/ship/train/examplesplit')split.splitdata(1)



import os
import shutil
import xml.dom.minidomdef custombasename(fullname):return os.path.basename(os.path.splitext(fullname)[0])def GetFileFromThisRootDir(dir,ext = None):allfiles = []needExtFilter = (ext != None)for root,dirs,files in os.walk(dir):for filespath in files:filepath = os.path.join(root, filespath)extension = os.path.splitext(filepath)[1][1:]if needExtFilter and extension in ext:allfiles.append(filepath)elif not needExtFilter:allfiles.append(filepath)return allfilesdef cleandata(path, img_path, blank_label_path, blank_img_path, ext, label_ext):name = custombasename(path)  #名称if label_ext == 'xml':DomTree = xml.dom.minidom.parse(path)  annotation = DomTree.documentElement  objectlist = annotation.getElementsByTagName('object')        if len(objectlist) == 0:image_path = os.path.join(img_path, name + ext) #样本图片的名称shutil.move(image_path, blank_img_path)  #移动该样本图片到blank_img_pathshutil.move(path, blank_label_path)     #移动该样本图片的标签到blank_label_pathelse:f_in =  open(path, 'r')  #打开label文件lines = f_in.readlines()if len(lines) == 0:  #如果为空f_in.close()image_path = os.path.join(img_path, name + ext) #样本图片的名称shutil.move(image_path, blank_img_path)  #移动该样本图片到blank_img_pathshutil.move(path, blank_label_path)     #移动该样本图片的标签到blank_label_pathprint('正在处理 %s'%path)if __name__ == '__main__':root = '/home/yantianwang/lala/ship/train/examplesplit'img_path = os.path.join(root, 'images')  #分割后的样本集label_path = os.path.join(root, 'labelTxt')  #分割后的标签ext = '.tif' #图片的后缀label_ext = 'txt'#空白的样本及标签blank_img_path = os.path.join(root, 'blank_images')blank_label_path = os.path.join(root, 'blank_labelTxt')if not os.path.exists(blank_img_path):os.makedirs(blank_img_path)if not os.path.exists(blank_label_path):os.makedirs(blank_label_path)label_list = GetFileFromThisRootDir(label_path)for path in label_list:cleandata(path, img_path, blank_label_path, blank_img_path, ext, label_ext)








