OpenCV3 Python语言实现 笔记5
2017-07-12 20:40
811 查看
目标检测 目标识别
HOG:特征描述符 按八个方向计算颜色梯度 一个单元16*16像素 一个块2*2单元 按块构成特征向量
图像金字塔:使用任意尺度缩小图像大小 使用高斯模糊平滑图像 图像比最小尺寸还大则重复操作
滑动窗口:解决定位问题 使用图像金字塔在不同尺度下重复扫描
非最大抑制:解决区域重叠问题
1.建立图像金字塔,采用滑动窗口搜素图像
2.收集所有超出阈值的含目标窗口,并得到最高响应的窗口W
3.消除所有与W有明显重叠的窗口
4.移动到下一个有最高相应的窗口,在当前尺度下重复上述过程
5.移动图像到金字塔的下一个尺度,重复上述过程
SVM:分类 确定窗口的置信度评分
BOG:词袋技术
1.取一个样本数据集
2.对数据集中的每幅图像提取描述符
3.将每个描述符都添加到BOW训练器中
4.将描述符聚类到k簇中
滑动窗口 图像金字塔
|- car_sliding_windows.py
|- car_detector
|- __init__.py
|- detector.py
|- non_maximum.py
|- pyramid.py
|- sliding_window.py
car_sliding_windows.py
import cv2
import numpy as np
from car_detector.detector import car_detector, bow_features
from car_detector.pyramid import pyramid
from car_detector.non_maximum import non_max_suppression_fast as nms
from car_detector.sliding_window import sliding_window
import urllib
def in_range(number, test, thresh=0.2):
return abs(number - test) < thresh
test_image = "../images/cars.jpg"
img_path = "../images/test.jpg"
urllib.urlretrieve(test_image, img_path)
svm, extractor = car_detector()
detect = cv2.xfeatures2d.SIFT_create()
w, h = 100, 40
img = cv2.imread(img_path)
#img = cv2.imread(test_image)
rectangles = []
counter = 1
scaleFactor = 1.25
scale = 1
font = cv2.FONT_HERSHEY_PLAIN
for resized in pyramid(img, scaleFactor):
scale = float(img.shape[1]) / float(resized.shape[1])
for (x, y, roi) in sliding_window(resized, 20, (100, 40)):
if roi.shape[1] != w or roi.shape[0] != h:
continue
try:
bf = bow_features(roi, extractor, detect)
_, result = svm.predict(bf)
a, res = svm.predict(bf, flags=cv2.ml.STAT_MODEL_RAW_OUTPUT | cv2.ml.STAT_MODEL_UPDATE_MODEL)
print "Class: %d, Score: %f, a: %s" % (result[0][0], res[0][0], res)
score = res[0][0]
if result[0][0] == 1:
if score < -1.0:
rx, ry, rx2, ry2 = int(x * scale), int(y * scale), int((x+w) * scale), int((y+h) * scale)
rectangles.append([rx, ry, rx2, ry2, abs(score)])
except:
pass
counter += 1
windows = np.array(rectangles)
boxes = nms(windows, 0.25)
for (x, y, x2, y2, score) in boxes:
print x, y, x2, y2, score
cv2.rectangle(img, (int(x),int(y)),(int(x2), int(y2)),(0, 255, 0), 1)
cv2.putText(img, "%f" % score, (int(x),int(y)), font, 1, (0, 255, 0))
cv2.imshow("img", img)
cv2.waitKey(0)
Python解释器会将带有 yield的函数其视为一个generator,看起来像函数调用,但不会执行任何函数代码,直到对其调用next()(在 for 循环中会自动调用next())才开始执行,每执行到一个yield语句就会中断,并返回一个迭代值,下次执行时从yield的下一个语句继续执行
通过指定的因子来调整图像大小 建立图像金字塔
pyramid.py
import cv2
def resize(img, scaleFactor):
return cv2.resize(img, (int(img.shape[1] * (1 / scaleFactor)), int(img.shape[0] * (1 / scaleFactor))), interpolation=cv2.INTER_AREA)
def pyramid(image, scale=1.5, minSize=(200, 80)):
yield image #yield 生成器
while True:
image = resize(image, scale)
if image.shape[0] < minSize[1] or image.shape[1] < minSize[0]:
break
yield image
滑动窗口
sliding_window.py
def sliding_window(image, step, window_size):
for y in xrange(0, image.shape[0], step):#高 #xrange生成的不是一个list对象,而是一个生成器
for x in xrange(0, image.shape[1], step):#宽
yield (x, y, image[y:y + window_size[1], x:x + window_size[0]])
non_maximum.py
import numpy as np
def non_max_suppression_fast(boxes, overlapThresh):
# if there are no boxes, return an empty list
if len(boxes) == 0:
return []
# if the bounding boxes integers, convert them to floats --
# this is important since we'll be doing a bunch of divisions
if boxes.dtype.kind == "i":
boxes = boxes.astype("float")
# initialize the list of picked indexes
pick = []
# grab the coordinates of the bounding boxes
x1 = boxes[:,0]
y1 = boxes[:,1]
x2 = boxes[:,2]
y2 = boxes[:,3]
scores = boxes[:,4]
# compute the area of the bounding boxes and sort the bounding
# boxes by the score/probability of the bounding box
area = (x2 - x1 + 1) * (y2 - y1 + 1)
idxs = np.argsort(scores)#返回下标列表 顺序 从小到大的列表
#idxs = idxs[::-1] 逆序
# keep looping while some indexes still remain in the indexes
# list
while len(idxs) > 0:
# grab the last index in the indexes list and add the
# index value to the list of picked indexes
last = len(idxs) - 1
i = idxs[last]
pick.append(i)#用于保存scores最大的下标
# find the largest (x, y) coordinates for the start of
# the bounding box and the smallest (x, y) coordinates
# for the end of the bounding box
#计算两个box的重叠区域,x1和y1取两box间大的, x2和y2取两box间小的
xx1 = np.maximum(x1[i], x1[idxs[:last]])#np.maximum(x,y) X与Y逐位比较取其大者
yy1 = np.maximum(y1[i], y1[idxs[:last]])
xx2 = np.minimum(x2[i], x2[idxs[:last]])#取其小者
yy2 = np.minimum(y2[i], y2[idxs[:last]])
# compute the width and height of the bounding box
w = np.maximum(0, xx2 - xx1 + 1)#无重叠区域(负值)置为0
h = np.maximum(0, yy2 - yy1 + 1)
# compute the ratio of overlap
overlap = (w * h) / area[idxs[:last]]
# delete all indexes from the index list that have
#删除得分最高的box及overlap大于阈值的box,剩余的box继续进行上述过程
idxs = np.delete(idxs, np.concatenate(([last],np.where(overlap > overlapThresh)[0])))
#numpy.where(condition, [x, y])#当conditon的某个位置的为true时,输出x的对应位置的元素,否则选择y对应位置的元素;如果只有参数condition,则函数返回为true的元素的坐标位置信息
#np.concatenate((a,b,c),axis=0) 0按行拼接 1按列拼接
#例np.delete(x,[1,4],axis=0)删除x的1、4行 axis=1 删除1、4列
# return only the bounding boxes that were picked using the
# integer data type
return boxes[pick].astype("int")
detector.py
import cv2
import numpy as np
datapath = "CarData/TrainImages"
SAMPLES = 129
def path(cls,i):
return "%s/%s%d.pgm" % (datapath,cls,i+1)
def get_flann_matcher():
flann_params = dict(algorithm = 1, trees = 5)
return cv2.FlannBasedMatcher(flann_params, {})
def get_bow_extractor(extract, match):
return cv2.BOWImgDescriptorExtractor(extract, match)
def get_extract_detect():
return cv2.xfeatures2d.SIFT_create(), cv2.xfeatures2d.SIFT_create()
def extract_sift(fn, extractor, detector):
im = cv2.imread(fn,0)
return extractor.compute(im, detector.detect(im))[1]
def bow_features(img, extractor_bow, detector):
return extractor_bow.compute(img, detector.detect(img))
def car_detector():
pos, neg = "pos-", "neg-"
detect, extract = get_extract_detect()
matcher = get_flann_matcher()
#extract_bow = get_bow_extractor(extract, matcher)
print "building BOWKMeansTrainer..."
bow_kmeans_trainer = cv2.BOWKMeansTrainer(12)
extract_bow = cv2.BOWImgDescriptorExtractor(extract, matcher)
print "adding features to trainer"
for i in range(SAMPLES):
print i
bow_kmeans_trainer.add(extract_sift(path(pos,i), extract, detect))
bow_kmeans_trainer.add(extract_sift(path(neg,i), extract, detect))
vocabulary = bow_kmeans_trainer.cluster()
extract_bow.setVocabulary(vocabulary)
traindata, trainlabels = [],[]
print "adding to train data"
for i in range(SAMPLES):
print i
traindata.extend(bow_features(cv2.imread(path(pos, i), 0), extract_bow, detect))
trainlabels.append(1)
traindata.extend(bow_features(cv2.imread(path(neg, i), 0), extract_bow, detect))
trainlabels.append(-1)
svm = cv2.ml.SVM_create()
svm.setType(cv2.ml.SVM_C_SVC)
svm.setGamma(1)
svm.setC(35)
svm.setKernel(cv2.ml.SVM_RBF)
svm.train(np.array(traindata), cv2.ml.ROW_SAMPLE, np.array(trainlabels))
return svm, extract_bow
HOG:特征描述符 按八个方向计算颜色梯度 一个单元16*16像素 一个块2*2单元 按块构成特征向量
图像金字塔:使用任意尺度缩小图像大小 使用高斯模糊平滑图像 图像比最小尺寸还大则重复操作
滑动窗口:解决定位问题 使用图像金字塔在不同尺度下重复扫描
非最大抑制:解决区域重叠问题
1.建立图像金字塔,采用滑动窗口搜素图像
2.收集所有超出阈值的含目标窗口,并得到最高响应的窗口W
3.消除所有与W有明显重叠的窗口
4.移动到下一个有最高相应的窗口,在当前尺度下重复上述过程
5.移动图像到金字塔的下一个尺度,重复上述过程
SVM:分类 确定窗口的置信度评分
BOG:词袋技术
1.取一个样本数据集
2.对数据集中的每幅图像提取描述符
3.将每个描述符都添加到BOW训练器中
4.将描述符聚类到k簇中
import cv2 import numpy as np from os.path import join datapath = "CarData/TrainImages/" def path(cls,i): return "%s/%s%d.pgm" % (datapath,cls,i+1) pos, neg = "pos-", "neg-" detect = cv2.xfeatures2d.SIFT_create() extract = cv2.xfeatures2d.SIFT_create()#SIFT特征提取 flann_params = dict(algorithm = 1, trees = 5) matcher = cv2.FlannBasedMatcher(flann_params, {})#FLANN特征匹配 bow_kmeans_trainer = cv2.BOWKMeansTrainer(40)#初始化BOW训练器 簇数40 extract_bow = cv2.BOWImgDescriptorExtractor(extract, matcher)#初始化BOW提取器 def extract_sift(fn): im = cv2.imread(fn,0)################### 1.取一个样本数据集 return extract.compute(im, detect.detect(im))[1]# 2.对数据集中的每幅图像提取描述符 for i in range(8):#每个类8个正样本8个负样本 bow_kmeans_trainer.add(extract_sift(path(pos,i)))# 3.将每个描述符都添加到BOW训练器中 bow_kmeans_trainer.add(extract_sift(path(neg,i))) vocabulary = bow_kmeans_trainer.cluster()####### 4.将描述符聚类到k簇中 extract_bow.setVocabulary( vocabulary ) def bow_features(fn): im = cv2.imread(fn,0) return extract_bow.compute(im, detect.detect(im)) traindata, trainlabels = [],[]#训练数据和标签 for i in range(20): traindata.extend(bow_features(path(pos, i))); trainlabels.append(1)#基于BOW描述符提取器计算得到的描述符 traindata.extend(bow_features(path(neg, i))); trainlabels.append(-1) svm = cv2.ml.SVM_create()#创建svm实例 svm.train(np.array(traindata), cv2.ml.ROW_SAMPLE, np.array(trainlabels))#训练SVM分类器 def predict(fn): f = bow_features(fn); #基于BOW描述符提取器计算测试图片的描述符 p = svm.predict(f) print fn, "\t", p[1][0][0] return p car, notcar = "../images/car.jpg", "../images/bb.jpg" car_img = cv2.imread(car) notcar_img = cv2.imread(notcar) car_predict = predict(car) not_car_predict = predict(notcar) font = cv2.FONT_HERSHEY_SIMPLEX if (car_predict[1][0][0] == 1.0): cv2.putText(car_img,'Car Detected',(10,30), font, 1,(0,255,0),2,cv2.LINE_AA)#cv2.putText if (not_car_predict[1][0][0] == -1.0): cv2.putText 4000 (notcar_img,'Car Not Detected',(10,30), font, 1,(0,0, 255),2,cv2.LINE_AA) cv2.imshow('BOW + SVM Success', car_img) cv2.imshow('BOW + SVM Failure', notcar_img) cv2.waitKey(0) cv2.destroyAllWindows()
滑动窗口 图像金字塔
|- car_sliding_windows.py
|- car_detector
|- __init__.py
|- detector.py
|- non_maximum.py
|- pyramid.py
|- sliding_window.py
car_sliding_windows.py
import cv2
import numpy as np
from car_detector.detector import car_detector, bow_features
from car_detector.pyramid import pyramid
from car_detector.non_maximum import non_max_suppression_fast as nms
from car_detector.sliding_window import sliding_window
import urllib
def in_range(number, test, thresh=0.2):
return abs(number - test) < thresh
test_image = "../images/cars.jpg"
img_path = "../images/test.jpg"
urllib.urlretrieve(test_image, img_path)
svm, extractor = car_detector()
detect = cv2.xfeatures2d.SIFT_create()
w, h = 100, 40
img = cv2.imread(img_path)
#img = cv2.imread(test_image)
rectangles = []
counter = 1
scaleFactor = 1.25
scale = 1
font = cv2.FONT_HERSHEY_PLAIN
for resized in pyramid(img, scaleFactor):
scale = float(img.shape[1]) / float(resized.shape[1])
for (x, y, roi) in sliding_window(resized, 20, (100, 40)):
if roi.shape[1] != w or roi.shape[0] != h:
continue
try:
bf = bow_features(roi, extractor, detect)
_, result = svm.predict(bf)
a, res = svm.predict(bf, flags=cv2.ml.STAT_MODEL_RAW_OUTPUT | cv2.ml.STAT_MODEL_UPDATE_MODEL)
print "Class: %d, Score: %f, a: %s" % (result[0][0], res[0][0], res)
score = res[0][0]
if result[0][0] == 1:
if score < -1.0:
rx, ry, rx2, ry2 = int(x * scale), int(y * scale), int((x+w) * scale), int((y+h) * scale)
rectangles.append([rx, ry, rx2, ry2, abs(score)])
except:
pass
counter += 1
windows = np.array(rectangles)
boxes = nms(windows, 0.25)
for (x, y, x2, y2, score) in boxes:
print x, y, x2, y2, score
cv2.rectangle(img, (int(x),int(y)),(int(x2), int(y2)),(0, 255, 0), 1)
cv2.putText(img, "%f" % score, (int(x),int(y)), font, 1, (0, 255, 0))
cv2.imshow("img", img)
cv2.waitKey(0)
Python解释器会将带有 yield的函数其视为一个generator,看起来像函数调用,但不会执行任何函数代码,直到对其调用next()(在 for 循环中会自动调用next())才开始执行,每执行到一个yield语句就会中断,并返回一个迭代值,下次执行时从yield的下一个语句继续执行
通过指定的因子来调整图像大小 建立图像金字塔
pyramid.py
import cv2
def resize(img, scaleFactor):
return cv2.resize(img, (int(img.shape[1] * (1 / scaleFactor)), int(img.shape[0] * (1 / scaleFactor))), interpolation=cv2.INTER_AREA)
def pyramid(image, scale=1.5, minSize=(200, 80)):
yield image #yield 生成器
while True:
image = resize(image, scale)
if image.shape[0] < minSize[1] or image.shape[1] < minSize[0]:
break
yield image
滑动窗口
sliding_window.py
def sliding_window(image, step, window_size):
for y in xrange(0, image.shape[0], step):#高 #xrange生成的不是一个list对象,而是一个生成器
for x in xrange(0, image.shape[1], step):#宽
yield (x, y, image[y:y + window_size[1], x:x + window_size[0]])
non_maximum.py
import numpy as np
def non_max_suppression_fast(boxes, overlapThresh):
# if there are no boxes, return an empty list
if len(boxes) == 0:
return []
# if the bounding boxes integers, convert them to floats --
# this is important since we'll be doing a bunch of divisions
if boxes.dtype.kind == "i":
boxes = boxes.astype("float")
# initialize the list of picked indexes
pick = []
# grab the coordinates of the bounding boxes
x1 = boxes[:,0]
y1 = boxes[:,1]
x2 = boxes[:,2]
y2 = boxes[:,3]
scores = boxes[:,4]
# compute the area of the bounding boxes and sort the bounding
# boxes by the score/probability of the bounding box
area = (x2 - x1 + 1) * (y2 - y1 + 1)
idxs = np.argsort(scores)#返回下标列表 顺序 从小到大的列表
#idxs = idxs[::-1] 逆序
# keep looping while some indexes still remain in the indexes
# list
while len(idxs) > 0:
# grab the last index in the indexes list and add the
# index value to the list of picked indexes
last = len(idxs) - 1
i = idxs[last]
pick.append(i)#用于保存scores最大的下标
# find the largest (x, y) coordinates for the start of
# the bounding box and the smallest (x, y) coordinates
# for the end of the bounding box
#计算两个box的重叠区域,x1和y1取两box间大的, x2和y2取两box间小的
xx1 = np.maximum(x1[i], x1[idxs[:last]])#np.maximum(x,y) X与Y逐位比较取其大者
yy1 = np.maximum(y1[i], y1[idxs[:last]])
xx2 = np.minimum(x2[i], x2[idxs[:last]])#取其小者
yy2 = np.minimum(y2[i], y2[idxs[:last]])
# compute the width and height of the bounding box
w = np.maximum(0, xx2 - xx1 + 1)#无重叠区域(负值)置为0
h = np.maximum(0, yy2 - yy1 + 1)
# compute the ratio of overlap
overlap = (w * h) / area[idxs[:last]]
# delete all indexes from the index list that have
#删除得分最高的box及overlap大于阈值的box,剩余的box继续进行上述过程
idxs = np.delete(idxs, np.concatenate(([last],np.where(overlap > overlapThresh)[0])))
#numpy.where(condition, [x, y])#当conditon的某个位置的为true时,输出x的对应位置的元素,否则选择y对应位置的元素;如果只有参数condition,则函数返回为true的元素的坐标位置信息
#np.concatenate((a,b,c),axis=0) 0按行拼接 1按列拼接
#例np.delete(x,[1,4],axis=0)删除x的1、4行 axis=1 删除1、4列
# return only the bounding boxes that were picked using the
# integer data type
return boxes[pick].astype("int")
detector.py
import cv2
import numpy as np
datapath = "CarData/TrainImages"
SAMPLES = 129
def path(cls,i):
return "%s/%s%d.pgm" % (datapath,cls,i+1)
def get_flann_matcher():
flann_params = dict(algorithm = 1, trees = 5)
return cv2.FlannBasedMatcher(flann_params, {})
def get_bow_extractor(extract, match):
return cv2.BOWImgDescriptorExtractor(extract, match)
def get_extract_detect():
return cv2.xfeatures2d.SIFT_create(), cv2.xfeatures2d.SIFT_create()
def extract_sift(fn, extractor, detector):
im = cv2.imread(fn,0)
return extractor.compute(im, detector.detect(im))[1]
def bow_features(img, extractor_bow, detector):
return extractor_bow.compute(img, detector.detect(img))
def car_detector():
pos, neg = "pos-", "neg-"
detect, extract = get_extract_detect()
matcher = get_flann_matcher()
#extract_bow = get_bow_extractor(extract, matcher)
print "building BOWKMeansTrainer..."
bow_kmeans_trainer = cv2.BOWKMeansTrainer(12)
extract_bow = cv2.BOWImgDescriptorExtractor(extract, matcher)
print "adding features to trainer"
for i in range(SAMPLES):
print i
bow_kmeans_trainer.add(extract_sift(path(pos,i), extract, detect))
bow_kmeans_trainer.add(extract_sift(path(neg,i), extract, detect))
vocabulary = bow_kmeans_trainer.cluster()
extract_bow.setVocabulary(vocabulary)
traindata, trainlabels = [],[]
print "adding to train data"
for i in range(SAMPLES):
print i
traindata.extend(bow_features(cv2.imread(path(pos, i), 0), extract_bow, detect))
trainlabels.append(1)
traindata.extend(bow_features(cv2.imread(path(neg, i), 0), extract_bow, detect))
trainlabels.append(-1)
svm = cv2.ml.SVM_create()
svm.setType(cv2.ml.SVM_C_SVC)
svm.setGamma(1)
svm.setC(35)
svm.setKernel(cv2.ml.SVM_RBF)
svm.train(np.array(traindata), cv2.ml.ROW_SAMPLE, np.array(trainlabels))
return svm, extract_bow
相关文章推荐
- OpenCV3 Python语言实现 笔记4
- OpenCV3 Python语言实现 笔记1
- OpenCV3 Python语言实现 笔记6
- 《Opencv 3 计算机视觉 python语言实现》· 第二遍 —— 读后笔记
- 《OpenCV 3计算机视觉:Python语言实现》学习笔记——目标跟踪中基本运动检测的思考
- OpenCV3计算机视觉Python语言实现人脸识别笔记
- OpenCV3 Python语言实现 笔记3
- Python语言opencv使用笔记(十一)(详解hough变换检测直线与圆)
- 关于Python+Opencv实现人脸检测的实验笔记(调用图片文件篇)
- Python OpenCV学习笔记之:图像直方图反向投影(backprojection)原理简单实现
- Python语言opencv笔记(四)(图像的阈值处理)
- OpenCV3 椒盐噪声python语言实现
- Python语言opencv使用笔记(十)(图像频域滤波与傅里叶变换)
- Python语言opencv使用笔记(八)(图像金字塔)
- OpenCV 3计算机视觉 Python语言实现(第2版)(含示例代码)
- Python语言opencv使用笔记(六)(图像的形态学转换)
- opencv-python 学习笔记2:实现目光跟随(又叫人脸跟随)
- OpenCV3计算机视觉Python语言实现(三):使用OpenCV3处理图像
- OpenCV 3计算机视觉:Python语言实现(原书第2版) pdf+项目源代码
- Python语言opencv使用笔记(九)(图像直方图)