python批量预处理图片(CascadeCNN训练前期预处理)
2016-07-05 17:42
218 查看
1、create_negative.py
2、create_positive.py
3、shuffle_write_negatives.py
4、shuffle_write_positives.py
5、write_train_val.py
#功能描述:在每个负样本图片中,选择若干个小块,并存入相应的文件夹中。 import numpy as np #numpy:提供矩阵运算功能的库 import cv2 #cv2:opencv库 import os #os:操作系统相关的信息模块 data_base_dir = "/home/xiao/code/CNN_face_detection/face_pictures/image-noface" #存放原始图片地址 start_neg_dir = 1 end_neg_dir = 50 file_list = [] #建立新列表,用于存放图片名 for file in os.listdir(data_base_dir): #指定目录:data_base_dir中内容 if file.endswith(".jpg"): #文件以‘.jpg',结尾 file_list.append(file) #将jpg图片文件全部全部存入file_list列表中 number_of_pictures = len(file_list) #len(a):列表a长度 print "number_of_pictures:", number_of_pictures #输出图片个数 # ============== create directories ================================== directory = '/home/xiao/code/CNN_face_detection/face_pictures/negatives/negative_' #开始路径 for cur_file in range(1, 50): #range(1, 50)表示cur_file从1循环取到49 path = directory + str(cur_file).zfill(2) #str,zfill(2),字符串宽度为2 if not os.path.exists(path): #如果路径path不存在 os.makedirs(path) #创建path路径 # ============== create negatives ===================================== for current_neg_dir in range(start_neg_dir, end_neg_dir + 1): #current_neg_dir从1循环取到50 save_image_number = 0 #存储图片地址,相对文件名从negative_1 ~~ negative_50 save_dir_neg = "/home/xiao/code/CNN_face_detection/face_pictures/negatives/negative_" + str(current_neg_dir).zfill(2) #取300个图片,(0,300),(300,600),(600,900)..... for current_image in range((current_neg_dir - 1)*300, (current_neg_dir - 1)*300 + 300): if current_image % 10 == 0: #每处理100张,显示1次 print "Processing image number " + str(current_image) read_img_name = data_base_dir + '/' + file_list[current_image].strip() #strip():移除字符串开头和结尾处空格 img = cv2.imread(read_img_name) #读取图片 height, width, channels = img.shape #取长宽,通道数 crop_size = min(height, width) / 2 #从短边中间开始 while crop_size >= 12: for start_height in range(0, height, 100): #从0开始到'height-1'结束,步长100 for start_width in range(0, width, 100): if (start_width + crop_size) > width: break cropped_img = img[start_height : start_height + crop_size, start_width : start_width + crop_size] file_name = save_dir_neg + "/neg" + str(current_neg_dir).zfill(2) + "_" + str(save_image_number).zfill(6) + ".jpg" cv2.imwrite(file_name, cropped_img) save_image_number += 1 crop_size *= 0.5 if current_image == (number_of_pictures - 1): break #跳出本层循环体,从而提前结束本层循环
2、create_positive.py
#功能描述:在每个正样本图片中,选择指定区域,并存入相应的文件夹中。 import numpy as np #numpy:提供矩阵运算功能的库 import cv2 #cv2:opencv库 import os #os:操作系统相关的信息模块 #/home/xiao...:绝对地址,/home/xiao...:相对地址; data_base_dir = "/home/xiao/code/CNN_face_detection/face_pictures" #存放原始图片地址 save_dir = "/home/xiao/code/CNN_face_detection/face_pictures/positives" #保存生成图片地址 #存放图片名及人脸区域(x,y,w,h)的txt文件地址 read_file_name_rect = "/home/xiao/code/CNN_face_detection/face_pictures/pos.txt" # =========== read rect file =============== with open(read_file_name_rect, "r") as ins: #以只读方式打开文件read_file_name_rect,并将其赋值给ins array_rect = [] #定义一个空列表,读文件中每行,作为其一个元素 for line in ins: #依次读ins中每个元素 array_rect.append(line) #将line元素,添加到列表array_rect最后 array_rect = array_rect[1:] # 切片,舍弃第0个,从第1个取到最后一个 number_of_lines = len(array_rect) #取列表长度,即列表中有多少个元素。 print "number_of_lines:", number_of_lines #输出列表元素个数 # =========== Start processing =============== save_file_number = 0 #定义变量,表示保存图片的个数 for current_rect in range(0, number_of_lines): #current_rect依次取值0,1,2.....number_of_lines-1 if current_rect % 10 == 0: #每10次输出一次 print "Processing rect number " + str(current_rect) current_info = array_rect[current_rect].split() #在列表中,以空格为界,对字符串进行切片处理 current_image_name = current_info[0] #图片名 #(x,y)表示图片左上角坐标,w表示宽,h表示高 x = max(0, int(current_info[1])) y = max(0, int(current_info[2])) w = int(current_info[3]) h = int(current_info[4]) if current_image_name is None: #检查图片名是否存在 continue #continue:结束本次循环,break:结束当前整个循环 read_img_name = data_base_dir + '/' + current_image_name #右边进行拼接,得到左边文件名 if not os.path.exists(read_img_name): #检查文件是否存在 continue img = cv2.imread(read_img_name) #调用opencv读取图片 cropped_img = img[y : y + h, x : x + w] #取原图片(y:y+h,x:x+w)区域,作为裁剪新图片 #将新图片文件名和地址写入file_name中,zfill(width),width:最后的字符串宽度 file_name = save_dir + "/pos_" + str(save_file_number).zfill(6) + ".jpg" save_file_number += 1 cv2.imwrite(file_name, cropped_img) #保存图片(cropped_img)到指定位置(file_name)
3、shuffle_write_negatives.py
#功能描述:给定存放负样本图片的文件夹地址,将其图片路径、图片名和标签写入txt文件中。 import os #os:操作系统相关的信息模块 import random #导入随机函数 trainingNet = 48 #选择网络模型型号 #存放原始图片地址 data_base_dir = "/home/xiao/code/CNN_face_detection/face_pictures/negatives" if trainingNet == 12: start_neg_dir = 1 #用于选择相对文件夹 end_neg_dir = 6 # 读取图片文件,并将图片地址、图片名和标签写到txt文件中 write_file_name = '/home/xiao/code/CNN_face_detection/face_pictures/data/all_negatives.txt' elif trainingNet == 24: start_neg_dir = 4 end_neg_dir = 9 # load and open files to read and write write_file_name = '/home/xiao/code/CNN_face_detection/face_pictures/data/all_negatives_24c.txt' elif trainingNet == 48: start_neg_dir = 7 end_neg_dir = 13 # load and open files to read and write write_file_name = '/home/xiao/code/CNN_face_detection/face_pictures/data/all_negatives_48c.txt' write_file = open(write_file_name, "w") #以只写方式打开write_file_name文件 file_list = [] #建立列表,用于保存图片信息 for current_neg_dir in range(start_neg_dir, end_neg_dir + 1): current_dir = data_base_dir + '/negative_' + str(current_neg_dir).zfill(2) for file in os.listdir(current_dir): #file为current_dir当前目录下图片名 if file.endswith(".jpg"): #如果file以jpg结尾 write_name = current_dir + '/' + file + ' ' + str(0) #图片路径 + 图片名 + 标签 file_list.append(write_name) #将write_name添加到file_list列表最后 random.shuffle(file_list) #将列表中所有元素随机排列 number_of_lines = len(file_list) #列表中元素个数 print number_of_lines #将图片信息写入txt文件中,逐行写入 for current_line in range(number_of_lines): write_file.write(file_list[current_line] + '\n') write_file.close() #关闭文件
4、shuffle_write_positives.py
#功能描述:给定存放正样本图片的文件夹地址,将其图片路径、图片名和标签写入txt文件中。 import os #os:操作系统相关的信息模块 import random #导入随机函数 #存放原始图片地址 data_base_dir = "/home/xiao/code/CNN_face_detection/face_pictures/positives" file_list = [] #建立列表,用于保存图片信息 #读取图片文件,并将图片地址、图片名和标签写到txt文件中 write_file_name = '/home/xiao/code/CNN_face_detection/face_pictures/data/all_positives.txt' write_file = open(write_file_name, "w") #以只写方式打开write_file_name文件 for file in os.listdir(data_base_dir): #file为current_dir当前目录下图片名 if file.endswith(".jpg"): #如果file以jpg结尾 write_name = data_base_dir + '/' + file + ' ' + str(1) #图片路径 + 图片名 + 标签 file_list.append(write_name) #将write_name添加到file_list列表最后 random.shuffle(file_list) #将列表中所有元素随机排列 number_of_lines = len(file_list) #列表中元素个数 #将图片信息写入txt文件中,逐行写入 for current_line in range(number_of_lines): write_file.write(file_list[current_line] + '\n') #关闭文件 write_file.close()
5、write_train_val.py
#功能描述:读取正样本和负样本图片,分别取其一部分作为验证集,剩余部分作为训练集。 #将用于验证的图片拷贝到val文件夹下,其图片名和标签写入val.txt文件中,训练集同上。 import os #os:操作系统相关的信息模块 import cv2 #cv2:opencv库 import shutil #shutil:一种高层次的文件操作工具,有较强文件复制和删除功能 import random #导入随机函数 trainingNet = 48 #选择网络模型型号 # 打开文件进行读写 # =================== face_12c ================================= if trainingNet == 12: pos_file_name = '/home/xiao/code/CNN_face_detection/face_pictures/data/all_positives.txt' #读取正样本路径 neg_file_name = '/home/xiao/code/CNN_face_detection/face_pictures/data/all_negatives.txt' #读取负样本路径 train_file_name = '/home/xiao/code/CNN_face_detection/face_pictures/train_12c' #训练图片存放地址 val_file_name = '/home/xiao/code/CNN_face_detection/face_pictures/val' #验证图片存放地址 write_train_name = '/home/xiao/code/CNN_face_detection/face_pictures/data/train_12c.txt' #写入训练样本路径 write_train = open(write_train_name, "w") #以只写方式打开txt文件,写入训练样本相关信息 write_val_name = '/home/xiao/code/CNN_face_detection/face_pictures/data/val.txt' write_val = open(write_val_name, "w") # =================== face_24c ================================= elif trainingNet == 24: pos_file_name = '/home/xiao/code/CNN_face_detection/face_pictures/data/all_positives.txt' neg_file_name = '/home/xiao/code/CNN_face_detection/face_pictures/data/all_negatives_24c.txt' train_file_name = '/home/xiao/code/CNN_face_detection/face_pictures/train_val/train_24c' val_file_name = '/home/xiao/code/CNN_face_detection/face_pictures/train_val/val_24c' write_train_name = '/home/xiao/code/CNN_face_detection/face_pictures/data/train_24c.txt' write_train = open(write_train_name, "w") write_val_name = '/home/xiao/code/CNN_face_detection/face_pictures/data/val_24c.txt' write_val = open(write_val_name, "w") # =================== face_48c ================================= elif trainingNet == 48: pos_file_name = '/home/xiao/code/CNN_face_detection/face_pictures/data/all_positives.txt' neg_file_name = '/home/xiao/code/CNN_face_detection/face_pictures/data/all_negatives_48c.txt' train_file_name = '/home/xiao/code/CNN_face_detection/face_pictures/train_val/train_48c' val_file_name = '/home/xiao/code/CNN_face_detection/face_pictures/train_val/val_48c' write_train_name = '/home/xiao/code/CNN_face_detection/face_pictures/data/train_48c.txt' write_train = open(write_train_name, "w") write_val_name = '/home/xiao/code/CNN_face_detection/face_pictures/data/val_48c.txt' write_val = open(write_val_name, "w") pos = [] with open(pos_file_name, "r") as ins: #以只读方式打开文件pos_file_name,并将其赋值给ins for line in ins: #依次读ins中每个元素 pos.append(line) #将line元素,添加到列表pos最后 neg = [] with open(neg_file_name, "r") as ins: for line in ins: neg.append(line) number_of_pos = len(pos) number_of_neg = len(neg) #分别取正负样本前500个图片,作为验证集 val = [] val[0:500] = pos[0:500] #切片,取正样本前500个图片,作为验证集前500个 val[500:1000] = neg[0:500] #取负样本前500个图片,作为验证集后500个 random.shuffle(val) #将列表中所有元素随机排列 for current_image in range(1000): source = val[current_image][0:-3] #从val中取图片文件名 image_file_name = val[current_image][48:-3] #取图片文件名 label = int(val[current_image][-2:-1]) #取图片标签 destination = val_file_name #目标地址 shutil.copy(source, destination) #将图片拷贝到val文件夹中 image_file_complete = destination + '/' + image_file_name #将图片信息写入txt文件中,逐行写入 write_val.write(image_file_name + ' ' + str(label) + '\n') write_val.close() #训练数据 train = [] train[0:number_of_pos - 500] = pos[500:] #将剩余正样本作为训练集 train[number_of_pos - 500:] = neg[500:] #将剩余负样本作为训练集 random.shuffle(train) number_of_train_data = len(train) #将训练图片信息写入train.txt for current_image in range(number_of_train_data): if current_image % 1000 == 0: #每处理1000张,显示1次 print 'Processing training data : ' + str(current_image) source = train[current_image][0:-3] image_file_name = train[current_image][48:-3] label = train[current_image].strip()[-1:] destination = train_file_name shutil.copy2(source, destination) #将图片拷贝到train文件夹中 write_content = image_file_name + ' ' + label + '\n' write_train.write(write_content) #写入训练图片信息到txt write_train.close()
相关文章推荐
- Python动态类型的学习---引用的理解
- Python3写爬虫(四)多线程实现数据爬取
- 垃圾邮件过滤器 python简单实现
- 下载并遍历 names.txt 文件,输出长度最长的回文人名。
- install and upgrade scrapy
- Scrapy的架构介绍
- Centos6 编译安装Python
- 使用Python生成Excel格式的图片
- 让Python文件也可以当bat文件运行
- [Python]推算数独
- Python中zip()函数用法举例
- Python中map()函数浅析
- Python将excel导入到mysql中
- Python在CAM软件Genesis2000中的应用
- 使用Shiboken为C++和Qt库创建Python绑定
- FREEBASIC 编译可被python调用的dll函数示例
- Python 七步捉虫法