#!/usr/bin/python # -*- coding: UTF-8 -*- # 文件名:txt_to_xml.py from xml.dom.minidom import Document import os import re list = os.listdir("Annotations") os.chdir('Annotations') #os.system('pause') #raw_input(unicode('按回车键退出...','utf-8').encode('gbk')) for oldfilename in list: if str(".txt") not in oldfilename: continue print oldfilename #raw_input(unicode('按回车键退出...','utf-8').encode('gbk')) fileindex = re.findall('\d+',oldfilename) print fileindex #raw_input(unicode('按回车键退出...','utf-8').encode('gbk')) print str(int(fileindex[0])) #raw_input(unicode('按回车键退出...','utf-8').encode('gbk')) newfilename = "00" + str(int(fileindex[0])) + ".xml" #print newfilename #raw_input(unicode('按回车键退出...','utf-8').encode('gbk')) f = open(oldfilename,"r") print 'processing:' + f.name doc = Document() annotation = doc.createElement('annotation') doc.appendChild(annotation) folder = doc.createElement('folder') folder.appendChild(doc.createTextNode('VOC2007')) annotation.appendChild(folder) filename = doc.createElement('filename') filename.appendChild(doc.createTextNode("00" + str(int(fileindex[0])) + ".jpg")) annotation.appendChild(filename) source = doc.createElement('source') annotation.appendChild(source) database = doc.createElement('database') database.appendChild(doc.createTextNode('PASperson Database')) source.appendChild(database) annotation1 = doc.createElement('annotation') annotation1.appendChild(doc.createTextNode('PASperson')) source.appendChild(annotation1) fr = f.readlines() # 调用文件的 readline()方法一次读取 for line in fr: if str(line).__contains__("size"): sizes = [] sizes = re.findall('\d+',line) size = doc.createElement('size') annotation.appendChild(size) width = doc.createElement('width') width.appendChild(doc.createTextNode(sizes[0])) size.appendChild(width) height = doc.createElement('height') height.appendChild(doc.createTextNode(sizes[1])) size.appendChild(height) depth = doc.createElement('depth') depth.appendChild(doc.createTextNode(sizes[2])) size.appendChild(depth) segmented = doc.createElement('segmented') segmented.appendChild(doc.createTextNode('0')) annotation.appendChild(segmented) if (str(line).__contains__('Objects')): nums = re.findall('\d+',line) break for index in range(1,int(nums[0])+1): for line in fr: if str(line).__contains__("Bounding Box for object " + str(index)): coordinate = re.findall('\d+',line) object = doc.createElement('object') annotation.appendChild(object) name = doc.createElement('name') name.appendChild(doc.createTextNode('person')) object.appendChild(name) pose = doc.createElement('pose') pose.appendChild(doc.createTextNode('Unspecified')) object.appendChild(pose) truncated = doc.createElement('truncated') truncated.appendChild(doc.createTextNode('0')) object.appendChild(truncated) difficult = doc.createElement('difficult') difficult.appendChild(doc.createTextNode('0')) object.appendChild(difficult) bndBox = doc.createElement('bndBox') object.appendChild(bndBox) #数字中包含序号,下标应从1开始 xmin = doc.createElement('xmin') xmin.appendChild(doc.createTextNode(coordinate[1])) bndBox.appendChild(xmin) ymin = doc.createElement('ymin') ymin.appendChild(doc.createTextNode(coordinate[2])) bndBox.appendChild(ymin) xmax = doc.createElement('xmax') xmax.appendChild(doc.createTextNode(coordinate[3])) bndBox.appendChild(xmax) ymax = doc.createElement('ymax') ymax.appendChild(doc.createTextNode(coordinate[4])) bndBox.appendChild(ymax) f.close() f = open(newfilename,'w') f.write(doc.toprettyxml(indent="")) f.close() print str(fileindex) + " compelete" print 'process compelete'