python 处理pascal voc数据 读取xml文件
2017-04-15 18:17
543 查看
Pascal VOC数据的annotation是xml文件,要利用xml文件里的标注信息裁剪出数据~~
from __future__ import division
import os
from PIL import Image
import xml.dom.minidom
import numpy as np
ImgPath = 'C:/Users/liesmars/Desktop/VOC2012/JPEGImages/'
AnnoPath = 'C:/Users/liesmars/Desktop/VOC2012/Annotations/'
ProcessedPath = 'C:/Users/liesmars/Desktop/CropedVOC/'
if not os.path.exists(ProcessedPath):
os.makedirs(ProcessedPath)
imagelist = os.listdir(ImgPath)
for image in imagelist:
print 'a new image:', image
image_pre, ext = os.path.splitext(image)
imgfile = ImgPath + image
xmlfile = AnnoPath + image_pre + '.xml'
DomTree = xml.dom.minidom.parse(xmlfile)
annotation = DomTree.documentElement
filenamelist = annotation.getElementsByTagName('filename') #[<DOM Element: filename at 0x381f788>]
filename = filenamelist[0].childNodes[0].data
objectlist = annotation.getElementsByTagName('object')
i = 1
for objects in objectlist:
# print objects
namelist = objects.getElementsByTagName('name')
# print 'namelist:',namelist
objectname = namelist[0].childNodes[0].data
print objectname
bndbox = objects.getElementsByTagName('bndbox')
cropboxes = []
for box in bndbox:
try:
x1_list = box.getElementsByTagName('xmin')
x1 = int(x1_list[0].childNodes[0].data)
y1_list = box.getElementsByTagName('ymin')
y1 = int(y1_list[0].childNodes[0].data)
x2_list = box.getElementsByTagName('xmax')
x2 = int(x2_list[0].childNodes[0].data)
y2_list = box.getElementsByTagName('ymax')
y2 = int(y2_list[0].childNodes[0].data)
w = x2 - x1
h = y2 - y1
img = Image.open(imgfile)
width,height = img.size
obj = np.array([x1,y1,x2,y2])
shift = np.array([[0.8,0.8,1.2,1.2],[0.9,0.9,1.1,1.1],[1,1,1,1],[0.8,0.8,1,1],[1,1,1.2,1.2],\
[0.8,1,1,1.2],[1,0.8,1.2,1],[(x1+w*1/6)/x1,(y1+h*1/6)/y1,(x2+w*1/6)/x2,(y2+h*1/6)/y2],\
[(x1-w*1/6)/x1,(y1-h*1/6)/y1,(x2-w*1/6)/x2,(y2-h*1/6)/y2]])
XYmatrix = np.tile(obj,(9,1))
cropboxes = XYmatrix * shift
for cropbox in cropboxes:
# print 'cropbox:',cropbox
minX = max(0,cropbox[0])
minY = max(0,cropbox[1])
maxX = min(cropbox[2],width)
maxY = min(cropbox[3],height)
cropbox = (minX,minY,maxX,maxY)
cropedimg = img.crop(cropbox)
cropedimg.save(savepath + '/' + image_pre + '_' + str(i) + '.jpg')
i += 1
except Exception, e:
print e
from __future__ import division
import os
from PIL import Image
import xml.dom.minidom
import numpy as np
ImgPath = 'C:/Users/liesmars/Desktop/VOC2012/JPEGImages/'
AnnoPath = 'C:/Users/liesmars/Desktop/VOC2012/Annotations/'
ProcessedPath = 'C:/Users/liesmars/Desktop/CropedVOC/'
if not os.path.exists(ProcessedPath):
os.makedirs(ProcessedPath)
imagelist = os.listdir(ImgPath)
for image in imagelist:
print 'a new image:', image
image_pre, ext = os.path.splitext(image)
imgfile = ImgPath + image
xmlfile = AnnoPath + image_pre + '.xml'
DomTree = xml.dom.minidom.parse(xmlfile)
annotation = DomTree.documentElement
filenamelist = annotation.getElementsByTagName('filename') #[<DOM Element: filename at 0x381f788>]
filename = filenamelist[0].childNodes[0].data
objectlist = annotation.getElementsByTagName('object')
i = 1
for objects in objectlist:
# print objects
namelist = objects.getElementsByTagName('name')
# print 'namelist:',namelist
objectname = namelist[0].childNodes[0].data
print objectname
bndbox = objects.getElementsByTagName('bndbox')
cropboxes = []
for box in bndbox:
try:
x1_list = box.getElementsByTagName('xmin')
x1 = int(x1_list[0].childNodes[0].data)
y1_list = box.getElementsByTagName('ymin')
y1 = int(y1_list[0].childNodes[0].data)
x2_list = box.getElementsByTagName('xmax')
x2 = int(x2_list[0].childNodes[0].data)
y2_list = box.getElementsByTagName('ymax')
y2 = int(y2_list[0].childNodes[0].data)
w = x2 - x1
h = y2 - y1
img = Image.open(imgfile)
width,height = img.size
obj = np.array([x1,y1,x2,y2])
shift = np.array([[0.8,0.8,1.2,1.2],[0.9,0.9,1.1,1.1],[1,1,1,1],[0.8,0.8,1,1],[1,1,1.2,1.2],\
[0.8,1,1,1.2],[1,0.8,1.2,1],[(x1+w*1/6)/x1,(y1+h*1/6)/y1,(x2+w*1/6)/x2,(y2+h*1/6)/y2],\
[(x1-w*1/6)/x1,(y1-h*1/6)/y1,(x2-w*1/6)/x2,(y2-h*1/6)/y2]])
XYmatrix = np.tile(obj,(9,1))
cropboxes = XYmatrix * shift
for cropbox in cropboxes:
# print 'cropbox:',cropbox
minX = max(0,cropbox[0])
minY = max(0,cropbox[1])
maxX = min(cropbox[2],width)
maxY = min(cropbox[3],height)
cropbox = (minX,minY,maxX,maxY)
cropedimg = img.crop(cropbox)
cropedimg.save(savepath + '/' + image_pre + '_' + str(i) + '.jpg')
i += 1
except Exception, e:
print e
相关文章推荐
- 【python图像处理】txt文件数据的读取与写入
- 用python读取oracle的clob类型数据的处理
- Python读取和处理文件后缀为".sqlite"的数据文件
- 数据处理_文件读取_PythonPickle模块(笔记)
- mac环境下,Python读取excel数据,并对行列做输出处理
- ean13码的生成,python读取csv中数据并处理返回并写入到另一个csv文件中
- 深度学习python之制作VOC数据集中的xml文件(Annotations文件夹内)
- C# Python 读取txt中的文件,将16进制的数据按照小端方式排列,处理为有符号的数字,并写入text文件;控制台和界面两种方式
- Python读取和处理文件后缀为.sqlite的数据文件(实例讲解)
- python项目:获取微信好友信息(二)csv数据读取与处理
- 使用Python与openpyxl进行表格处理(一)——读取数据
- 详解Excel互操作中处理数据的几个方法(上:读取数据)
- 读取Xml文件进行绑定数据
- .net c/s 从xml文件读取数据参数,间隔2秒循环访问某个页面
- python处理重定位数据
- php从数据库中读取数据生成xml文件的方法
- python读取文件数据绘直角坐标图
- 如何处理ODBC中EXCEL驱动读取EXCEL文件中字段长度大于255字符时出现的"数据截断"问题.
- 将xml文件的数据载入到DataSet中,在读取出来
- 如何处理Oledb中EXCEL驱动读取EXCEL文件中字段长度大于255字符时出现的"数据截断"问题.