DOTA数据集转voc
1. DOTA数据集的label转成voc能用的xml文件(只需要改动原DOTA的txt标签路径和新储存xml的文件夹路径就可以了)
import os
from xml.dom.minidom import Document
from xml.dom.minidom import parse
import xml.dom.minidom
import numpy as np
import csv
import cv2
import string
def WriterXMLFiles(filename, path, box_list, label_list, w, h, d):
# dict_box[filename]=json_dict[filename]
doc = xml.dom.minidom.Document()
root = doc.createElement('annotation')
doc.appendChild(root)
foldername = doc.createElement("folder")
foldername.appendChild(doc.createTextNode("JPEGImages"))
root.appendChild(foldername)
nodeFilename = doc.createElement('filename')
nodeFilename.appendChild(doc.createTextNode(filename))
root.appendChild(nodeFilename)
pathname = doc.createElement("path")
pathname.appendChild(doc.createTextNode("xxxx"))
root.appendChild(pathname)
sourcename=doc.createElement("source")
databasename = doc.createElement("database")
databasename.appendChild(doc.createTextNode("Unknown"))
sourcename.appendChild(databasename)
annotationname = doc.createElement("annotation")
annotationname.appendChild(doc.createTextNode("xxx"))
sourcename.appendChild(annotationname)
imagename = doc.createElement("image")
imagename.appendChild(doc.createTextNode("xxx"))
sourcename.appendChild(imagename)
flickridname = doc.createElement("flickrid")
flickridname.appendChild(doc.createTextNode("0"))
sourcename.appendChild(flickridname)
root.appendChild(sourcename)
nodesize = doc.createElement('size')
nodewidth = doc.createElement('width')
nodewidth.appendChild(doc.createTextNode(str(w)))
nodesize.appendChild(nodewidth)
nodeheight = doc.createElement('height')
nodeheight.appendChild(doc.createTextNode(str(h)))
nodesize.appendChild(nodeheight)
nodedepth = doc.createElement('depth')
nodedepth.appendChild(doc.createTextNode(str(d)))
nodesize.appendChild(nodedepth)
root.appendChild(nodesize)
segname = doc.createElement("segmented")
segname.appendChild(doc.createTextNode("0"))
root.appendChild(segname)
for (box, label) in zip(box_list, label_list):
nodeobject = doc.createElement('object')
nodename = doc.createElement('name')
nodename.appendChild(doc.createTextNode(str(label)))
nodeobject.appendChild(nodename)
nodebndbox = doc.createElement('bndbox')
nodex1 = doc.createElement('x1')
nodex1.appendChild(doc.createTextNode(str(box[0])))
nodebndbox.appendChild(nodex1)
nodey1 = doc.createElement('y1')
nodey1.appendChild(doc.createTextNode(str(box[1])))
nodebndbox.appendChild(nodey1)
nodex2 = doc.createElement('x2')
nodex2.appendChild(doc.createTextNode(str(box[2])))
nodebndbox.appendChild(nodex2)
nodey2 = doc.createElement('y2')
nodey2.appendChild(doc.createTextNode(str(box[3])))
nodebndbox.appendChild(nodey2)
nodex3 = doc.createElement('x3')
nodex3.appendChild(doc.createTextNode(str(box[4])))
nodebndbox.appendChild(nodex3)
nodey3 = doc.createElement('y3')
nodey3.appendChild(doc.createTextNode(str(box[5])))
nodebndbox.appendChild(nodey3)
nodex4 = doc.createElement('x4')
nodex4.appendChild(doc.createTextNode(str(box[6])))
nodebndbox.appendChild(nodex4)
nodey4 = doc.createElement('y4')
nodey4.appendChild(doc.createTextNode(str(box[7])))
nodebndbox.appendChild(nodey4)
# ang = doc.createElement('angle')
# ang.appendChild(doc.createTextNode(str(angle)))
# nodebndbox.appendChild(ang)
nodeobject.appendChild(nodebndbox)
root.appendChild(nodeobject)
fp = open(path + filename, 'w')
doc.writexml(fp, indent='\n')
fp.close()
def load_annoataion(p):
'''
load annotation from the text file
:param p:
:return:
'''
text_polys = []
text_tags = []
if not os.path.exists(p):
return np.array(text_polys, dtype=np.float32)
with open(p, 'r') as f:
for line in f.readlines()[2:]:
label = 'text'
# strip BOM. \ufeff for python3, \xef\xbb\bf for python2
#line = [i.strip('\ufeff').strip('\xef\xbb\xbf') for i in line]
#print(line)
x1, y1, x2, y2, x3, y3, x4, y4 ,label= line.split(' ')[0:9]
#print(label)
x1 = float(x1)
y1 = float(y1)
x2 = float(x2)
y2 = float(y2)
x3 = float(x3)
y3 = float(y3)
x4 = float(x4)
y4 = float(y4)
text_polys.append([x1, y1, x2, y2, x3, y3, x4, y4])
text_tags.append(label)
return np.array(text_polys, dtype=np.int32), np.array(text_tags, dtype=np.str)
if __name__ == "__main__":
txt_path = './txts-hbb/'
xml_path = './Annotations-hbb/'
img_path = './JEPGImages/'
print(os.path.exists(txt_path))
txts = os.listdir(txt_path)
for count, t in enumerate(txts):
print(count,t)
boxes, labels = load_annoataion(os.path.join(txt_path, t))
#print(boxes,labels)
xml_name = t.replace('.txt', '.xml')
img_name = t.replace('.txt', '.png')
print(img_name)
img = cv2.imread(os.path.join(img_path, img_name))
h, w, d = img.shape
#print(xml_name, xml_path, boxes, labels, w, h, d)
WriterXMLFiles(xml_name, xml_path, boxes, labels, w, h, d)
if count % 1000 == 0:
print(count)
2. 新建VOC的Main文件夹里面的索引 (只需要改动路径为原txt标签文件夹)
import os
dir = './txts-hbb'
lis = os.listdir(dir)
for i in range(0,len(lis)):
(shotname,ext) = os.path.splitext(lis[i])
f=open('./train.txt','r+')
f.read()
f.write(shotname+'\n')
f.close()