您的位置:首页 > 编程语言 > Python开发

MTCNN中用celebA样本生成landmark训练样本python代码解读

2017-08-28 17:36 483 查看
最近跑通了MTCNN的训练代码,对其中生成positive,negative,part样本gen_48net_data2.py代码进行修改。

项目地址:https://github.com/dlunion/mtcnn

对应代码地址:https://github.com/dlunion/mtcnn/blob/master/train/gen_48net_data2.py

修改后实现:用celebA样本生成MTCNN的landmark训练样本。

标注文件格式如下:

000001.jpg 138 70 343 330 165 184 244 176 196 249 194 271 266 260
000002.jpg 105 107 295 362 140 204 220 204 168 254 146 289 226 289
000003.jpg 205 50 267 173 244 104 264 105 263 121 235 134 251 140
000004.jpg 505 172 1069 932 796 539 984 539 930 687 762 756 915 756
000005.jpg 241 98 365 239 273 169 328 161 298 172 283 208 323 207
000006.jpg 149 95 291 270 202 158 259 165 234 196 197 228 243 233
000007.jpg 71 93 256 335 129 198 204 190 163 242 140 277 202 271


第1列表示: 图片名;

第2-5列表示:标注的人脸bbox位置, x1,y1,x2,y2;

第6-15列表示:标注的landmark位置,  ptx0,pty0,ptx1,pty1,ptx2,pty2,ptx3,pty3,ptx4,pty4;

主要思路:在bbox基础上,随机扰动生成很多bbox,当且仅当五个landmark都在随机扰动生成的crop区域中时,认为是符合要求的landmark训练样本。   代码如下:

import sys
sys.path.append('D:\\Anaconda2\\libs') #导入系统路径,以便cv2模块的导入

import numpy as np
import cv2
import os
import numpy.random as npr
from utils import IoU

stdsize = 48
# 标注txt文件路径,celebA原图路径
anno_file = "E:/face_alignment/data/CelebA/Anno/celebA_bbox_landmark.txt"
im_dir = "E:/face_alignment/data/CelebA/Img/img_celeba.7z/img_celeba/"
# landmark样本的保存路径
pos_save_dir = str(stdsize) + "/landmark"
save_dir = "./" + str(stdsize)

# 创建文件夹函数
def mkr(dr):
if not os.path.exists(dr):
os.mkdir(dr)

mkr(save_dir)
mkr(pos_save_dir)

f1 = open(os.path.join(save_dir, 'landmark_' + str(stdsize) + '.txt'), 'w')
with open(anno_file, 'r') as f:
annotations = f.readlines()
num = len(annotations)
print "%d pics in total" % num
p_idx = 0 # positive
d_idx = 0 # dont care
idx = 0
box_idx = 0

for annotation in annotations:
# strip():去除annotations开头、结尾处空白符,split(' ')按空格进行切片
annotation = annotation.strip().split(' ')
im_path = annotation[0] # 图片名
bbox = map(float, annotation[1:-10]) # bbox 坐标
pts = map(float, annotation[-10:]) # landmark 坐标
boxes = np.array(bbox, dtype=np.float32).reshape(-1, 4) # 将bbox转化为矩阵,并将列resize为4
im_path = im_dir + im_path # 图片全路径
img = cv2.imread(im_path) # 读取图片
idx += 1
if idx % 100 == 0:
print idx, "images done"

height, width, channel = img.shape

backupPts = pts[:]
for box in boxes:
# box (x_left, y_top, x_right, y_bottom)
x1, y1, x2, y2 = box
w = x2 - x1 + 1
h = y2 - y1 + 1

# ignore small faces
# in case the ground truth boxes of small faces are not accurate
if max(w, h) < 12 or x1 < 0 or y1 < 0:
continue

# generate landmark examples and part faces
# 对bbox进行随机scale,offset,得到新的crop区域,即对样本进行扰动,做样本增强
for i in range(10):
pts = backupPts[:]
size = npr.randint(int(min(w, h) * 0.9), np.ceil(1.1 * max(w, h)))
# scale做(0.9~1.1)之间扰动,offser做(-0.1~0.1)之间扰动
# delta here is the offset of box center
delta_x = npr.randint(-w * 0.1, w * 0.1)
delta_y = npr.randint(-h * 0.1, h * 0.1)

nx1 = max(x1 + w / 2 + delta_x - size / 2, 0)
ny1 = max(y1 + h / 2 + delta_y - size / 2, 0)
nx2 = nx1 + size
ny2 = ny1 + size

if nx2 > width or ny2 > height:
continue
crop_box = np.array([nx1, ny1, nx2, ny2])

offset_x1 = (x1 - nx1) / float(size)
offset_y1 = (y1 - ny1) / float(size)
offset_x2 = (x2 - nx2) / float(size)
offset_y2 = (y2 - ny2) / float(size)

# 当且仅当五个landmark都在随机扰动生成的crop区域中时,才保持使用
if pts[0] < nx1 or pts[0] > nx2:
continue
if pts[2] < nx1 or pts[2] > nx2:
continue
if pts[4] < nx1 or pts[4] > nx2:
continue
if pts[6] < nx1 or pts[6] > nx2:
continue
if pts[8] < nx1 or pts[8] > nx2:
continue

if pts[1] < ny1 or pts[1] > ny2:
continue
if pts[3] < ny1 or pts[3] > ny2:
continue
if pts[5] < ny1 or pts[5] > ny2:
continue
if pts[7] < ny1 or pts[7] > ny2:
continue
if pts[9] < ny1 or pts[9] > ny2:
continue

ptss = pts[:]
# 将landmark转化为相对于左上角的归一化值
for k in range(len(ptss) / 2):
pts[k] = (ptss[k*2] - nx1) / float(size);
pts[k+5] = (ptss[k*2+1] - ny1) / float(size);

# 从原图中crop图片区域,并resize成stdsize大小
cropped_im = img[int(ny1) : int(ny2), int(nx1) : int(nx2), :]
resized_im = cv2.resize(cropped_im, (stdsize, stdsize), interpolation=cv2.INTER_LINEAR)

box_ = box.reshape(1, -1)

# 将图片名,bbox偏移量写入txt文本中
save_file = os.path.join(pos_save_dir, "%s.jpg"%p_idx)
f1.write(str(stdsize)+"/landmark/%s.jpg"%p_idx + ' -1 -1 -1 -1 -1')

# 将landmark坐标写入txt文件中
for k in range(len(pts)):
f1.write(" %f" % pts[k])
f1.write("\n")
cv2.imwrite(save_file, resized_im) # 保存图片
p_idx += 1

box_idx += 1
print "%s images done, pos: %s part: %s "%(idx, p_idx, d_idx)

f1.close()



得到的效果图如下:



内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: