opencv7-ml之KNN
2015-09-27 18:08
337 查看
准备知识
在文件”opencv\sources\modules\ml\src\precomp.hpp“中
有cvPrepareTrainData的函数原型。
从函数原型的参数可以看出主要为:
const char* /funcname/: 函数的名称
const CvMat* train_data, int tflag,: 训练集、训练集样本的布局
const CvMat* responses, int response_type,:训练集标签、对应数据类型
const CvMat* var_idx,: 用了哪些特征
const CvMat* sample_idx,: 用了哪些样本
bool always_copy_data,: 是否复制数据集
const float*** out_train_samples,: 输出处理过的的训练集
int* _sample_count, : 样本的总数
int* _var_count,: 特征的总数
int* _var_all,
CvMat** out_responses,: 输出训练集标签
CvMat** out_response_map,
CvMat** out_var_idx, : 输出用了哪些特征
CvMat** out_sample_idx=0 : 默认输出为使用了所有的样本
在文件”opencv\sources\modules\ml\src\inner_functions.cpp“中有cvPrepareTrainData的函数实现:
首先在文件“opencv\sources\modules\ml\include\opencv2\ml\ml.hpp”中有:
对于该文件的实现和上面的类定义一样,为了兼容opencv1.0版本,所以之前的CvMat都是1.0的形式;而在后面Mat的都是opencv2.0的形式。
下面是文件“\opencv\sources\modules\ml\src\knearest.cpp”中实现部分:
例子:
上面例子的结果:
对应的使用了opencv2.0的形式写了与上面类似的代码:
生成的结果为:
上面的源码部分还未完全注释,待后续接着注释。
2015年09月27日,第0次修改!
在文件”opencv\sources\modules\ml\src\precomp.hpp“中
有cvPrepareTrainData的函数原型。
int cvPrepareTrainData( const char* /*funcname*/, const CvMat* train_data, int tflag, const CvMat* responses, int response_type, const CvMat* var_idx, const CvMat* sample_idx, bool always_copy_data, const float*** out_train_samples, int* _sample_count, int* _var_count, int* _var_all, CvMat** out_responses, CvMat** out_response_map, CvMat** out_var_idx, CvMat** out_sample_idx=0 )
从函数原型的参数可以看出主要为:
const char* /funcname/: 函数的名称
const CvMat* train_data, int tflag,: 训练集、训练集样本的布局
const CvMat* responses, int response_type,:训练集标签、对应数据类型
const CvMat* var_idx,: 用了哪些特征
const CvMat* sample_idx,: 用了哪些样本
bool always_copy_data,: 是否复制数据集
const float*** out_train_samples,: 输出处理过的的训练集
int* _sample_count, : 样本的总数
int* _var_count,: 特征的总数
int* _var_all,
CvMat** out_responses,: 输出训练集标签
CvMat** out_response_map,
CvMat** out_var_idx, : 输出用了哪些特征
CvMat** out_sample_idx=0 : 默认输出为使用了所有的样本
在文件”opencv\sources\modules\ml\src\inner_functions.cpp“中有cvPrepareTrainData的函数实现:
int cvPrepareTrainData( const char* /*funcname*/, const CvMat* train_data, int tflag, const CvMat* responses, int response_type, const CvMat* var_idx, const CvMat* sample_idx, bool always_copy_data, const float*** out_train_samples, int* _sample_count, int* _var_count, int* _var_all, CvMat** out_responses, CvMat** out_response_map, CvMat** out_var_idx, CvMat** out_sample_idx ) { int ok = 0;//用于标记该函数是否成功执行 CvMat* _var_idx = 0;//默认使用所有的特征 CvMat* _sample_idx = 0;//默认使用所有的样本 CvMat* _responses = 0; int sample_all = 0, sample_count = 0, var_all = 0, var_count = 0; CV_FUNCNAME( "cvPrepareTrainData" ); // step 0. clear all the output pointers to ensure we do not try // to call free() with uninitialized pointers //第0步,先释放所有输出的指针以确保不会有未初始化的指针。 if( out_responses ) *out_responses = 0; if( out_response_map ) *out_response_map = 0; if( out_var_idx ) *out_var_idx = 0; if( out_sample_idx ) *out_sample_idx = 0; if( out_train_samples ) *out_train_samples = 0; if( _sample_count ) *_sample_count = 0; if( _var_count ) *_var_count = 0; if( _var_all ) *_var_all = 0; //重置完成 __BEGIN__; if( !out_train_samples ) CV_ERROR( CV_StsBadArg, "output pointer to train samples is NULL" ); CV_CALL( cvCheckTrainData( train_data, tflag, 0, &var_all, &sample_all )); if( sample_idx ) CV_CALL( _sample_idx = cvPreprocessIndexArray( sample_idx, sample_all )); if( var_idx ) CV_CALL( _var_idx = cvPreprocessIndexArray( var_idx, var_all )); if( responses ) { if( !out_responses ) CV_ERROR( CV_StsNullPtr, "output response pointer is NULL" ); if( response_type == CV_VAR_NUMERICAL ) { CV_CALL( _responses = cvPreprocessOrderedResponses( responses, _sample_idx, sample_all )); } else { CV_CALL( _responses = cvPreprocessCategoricalResponses( responses, _sample_idx, sample_all, out_response_map, 0 )); } } CV_CALL( *out_train_samples = cvGetTrainSamples( train_data, tflag, _var_idx, _sample_idx, &var_count, &sample_count, always_copy_data )); ok = 1; __END__; //如果上面的操作都结束了,那么检测对应的输出需要的指针,是否已经初始化,然后接着对各自的输出指针指向的对象进行置0初始化 if( ok ) { if( out_responses ) *out_responses = _responses, _responses = 0; if( out_var_idx ) *out_var_idx = _var_idx, _var_idx = 0; if( out_sample_idx ) *out_sample_idx = _sample_idx, _sample_idx = 0; if( _sample_count ) *_sample_count = sample_count; if( _var_count ) *_var_count = var_count; if( _var_all ) *_var_all = var_all; } else { if( out_response_map ) cvReleaseMat( out_response_map ); cvFree( out_train_samples ); } if( _responses != responses ) cvReleaseMat( &_responses ); cvReleaseMat( &_var_idx ); cvReleaseMat( &_sample_idx ); return ok;//返回该函数是否成功执行 }
首先在文件“opencv\sources\modules\ml\include\opencv2\ml\ml.hpp”中有:
#define CV_TYPE_NAME_ML_KNN "opencv-ml-knn" struct CvVectors { int type; int dims, count; CvVectors* next; union { uchar** ptr; float** fl; double** db; } data; }; /*********K-Nearest Neighbour Classifier **********/ class CV_EXPORTS_W CvKNearest : public CvStatModel { public: CV_WRAP CvKNearest();//默认构造函数 virtual ~CvKNearest();//虚析构函数 //2个重载的构造函数,参数中一个是CvMat,一个是Mat //trainData:训练集 //responses:训练集的目标值 //sampleIdx:用来指定使用哪些训练样本,0表示使用所有的训练样本 //isRegression:trueb表示knn作为回归,否则为分类器 //max_k:K的上限 CvKNearest( const CvMat* trainData, const CvMat* responses, const CvMat* sampleIdx=0, bool isRegression=false, int max_k=32 ); CV_WRAP CvKNearest( const cv::Mat& trainData, const cv::Mat& responses, const cv::Mat& sampleIdx=cv::Mat(), bool isRegression=false, int max_k=32 ); //训练模型,两个重载函数,一个使用的是CvMat;一个使用的是Mat;Mat的会调用CvMat的train实现训练功能。 //updateBase:用于指定该模型是从头开始训练?(update_base=false),还是使用新的训练数据来进行更新 //(update_base=true).在后者中,参数maxK不能大于原始的值 virtual bool train( const CvMat* trainData, const CvMat* responses, const CvMat* sampleIdx=0, bool is_regression=false, int maxK=32, bool updateBase=false ); CV_WRAP virtual bool train( const cv::Mat& trainData, const cv::Mat& responses, const cv::Mat& sampleIdx=cv::Mat(), bool isRegression=false, int maxK=32, bool updateBase=false ); //寻找邻居 然后预测输入向量的目标:3个重载函数 virtual float find_nearest( const CvMat* samples, int k, CV_OUT CvMat* results=0, const float** neighbors=0, CV_OUT CvMat* neighborResponses=0, CV_OUT CvMat* dist=0 ) const; virtual float find_nearest( const cv::Mat& samples, int k, cv::Mat* results=0, const float** neighbors=0, cv::Mat* neighborResponses=0, cv::Mat* dist=0 ) const; CV_WRAP virtual float find_nearest( const cv::Mat& samples, int k, CV_OUT cv::Mat& results, CV_OUT cv::Mat& neighborResponses, CV_OUT cv::Mat& dists) const; // virtual void find_neighbors_direct( const CvMat* _samples, int k, int start, int end, float* neighbor_responses, const float** neighbors, float* dist ) const; virtual void clear(); int get_max_k() const;//返回近邻中最大值的个数 int get_var_count() const;//返回特征的维度(变量的个数) int get_sample_count() const;//返回训练样本的个数 bool is_regression() const;//返回的true为回归;false为分类 //将结果记录下来 virtual float write_results( int k, int k1, int start, int end, const float* neighbor_responses, const float* dist, CvMat* _results, CvMat* _neighbor_responses, CvMat* _dist, Cv32suf* sort_buf ) const; protected: int max_k, var_count;//最大的k值;特征维度 int total;// bool regression;//是否是回归 CvVectors* samples;//样本向量 }; typedef CvKNearest KNearest;
对于该文件的实现和上面的类定义一样,为了兼容opencv1.0版本,所以之前的CvMat都是1.0的形式;而在后面Mat的都是opencv2.0的形式。
下面是文件“\opencv\sources\modules\ml\src\knearest.cpp”中实现部分:
#include "precomp.hpp" /********K-Nearest Neighbors Classifier ***********/ //1、opencv1.0形式 // k Nearest Neighbors //默认构造函数 CvKNearest::CvKNearest() { samples = 0;//指针初始化 clear(); } //析构函数 CvKNearest::~CvKNearest() { clear();//调用清除函数,该函数在下面 } //构造函数 //_train_data:训练集合,行为样本,列为特征 // _responses:训练集合对应的类别目标 CvKNearest::CvKNearest( const CvMat* _train_data, const CvMat* _responses, const CvMat* _sample_idx, bool _is_regression, int _max_k ) { samples = 0;//指针赋值为0 //调用类成员函数train来训练模型 train( _train_data, _responses, _sample_idx, _is_regression, _max_k, false ); } void CvKNearest::clear() { //类似释放链表一样释放所有的样本 while( samples ) { CvVectors* next_samples = samples->next; cvFree( &samples->data.fl ); cvFree( &samples ); samples = next_samples; } //将对应的类成员置0 var_count = 0; total = 0; max_k = 0; } //返回类成员max_k,表示K的最大上限 int CvKNearest::get_max_k() const { return max_k; } //返回样本的特征维度 int CvKNearest::get_var_count() const { return var_count; } //返回当前knn作为分类器,还是用来做回归 bool CvKNearest::is_regression() const { return regression; } //返回训练集的样本总数 int CvKNearest::get_sample_count() const { return total; } //模型训练函数。主要是对训练数据进行排序,然后采用合适的数据结构来存储数据集。 bool CvKNearest::train( const CvMat* _train_data, const CvMat* _responses, const CvMat* _sample_idx, bool _is_regression, int _max_k, bool _update_base ) { bool ok = false;//用来标记该train函数是否成功执行 CvMat* responses = 0; CV_FUNCNAME( "CvKNearest::train" ); __BEGIN__; CvVectors* _samples = 0; float** _data = 0; int _count = 0, _dims = 0, _dims_all = 0, _rsize = 0; if( !_update_base ) clear(); // Prepare training data and related parameters. //准备好数据和对应的参数 // Treat categorical responses as ordered - to prevent class label compression and // to enable entering new classes in the updates //让类别的目标有序,这样能够防止类别标签被压缩并且能够可以在更新中加入新的类 。该工作通过调用cvPrepareTrainData函数来完成 CV_CALL( cvPrepareTrainData( "CvKNearest::train", _train_data, CV_ROW_SAMPLE, _responses, CV_VAR_ORDERED, 0, _sample_idx, true, (const float***)&_data, &_count, &_dims, &_dims_all, &responses, 0, 0 )); if( !responses ) CV_ERROR( CV_StsNoMem, "Could not allocate memory for responses" ); if( _update_base && _dims != var_count ) CV_ERROR( CV_StsBadArg, "The newly added data have different dimensionality" ); if( !_update_base ) { //如果用户设定的k小于1,那么报错 if( _max_k < 1 ) CV_ERROR( CV_StsOutOfRange, "max_k must be a positive number" ); regression = _is_regression;//是否用作回归 var_count = _dims;//特征的维度 max_k = _max_k;//k的上限 } _rsize = _count*sizeof(float); _samples->next = samples; _samples->type = CV_32F; _samples->data.fl = _data; _samples->count = _count; total += _count; samples = _samples; memcpy( _samples + 1, responses->data.fl, _rsize ); ok = true; __END__; if( responses && responses->data.ptr != _responses->data.ptr ) cvReleaseMat(&responses); return ok; } void CvKNearest::find_neighbors_direct( const CvMat* _samples, int k, int start, int end, float* neighbor_responses, const float** neighbors, float* dist ) const { int i, j, count = end - start, k1 = 0, k2 = 0, d = var_count; CvVectors* s = samples; for( ; s != 0; s = s->next ) { int n = s->count; for( j = 0; j < n; j++ ) { for( i = 0; i < count; i++ ) { double sum = 0; Cv32suf si; const float* v = s->data.fl[j]; const float* u = (float*)(_samples->data.ptr + _samples->step*(start + i)); Cv32suf* dd = (Cv32suf*)(dist + i*k); float* nr; const float** nn; int t, ii, ii1; for( t = 0; t <= d - 4; t += 4 ) { double t0 = u[t] - v[t], t1 = u[t+1] - v[t+1]; double t2 = u[t+2] - v[t+2], t3 = u[t+3] - v[t+3]; sum += t0*t0 + t1*t1 + t2*t2 + t3*t3; } for( ; t < d; t++ ) { double t0 = u[t] - v[t]; sum += t0*t0; } si.f = (float)sum; for( ii = k1-1; ii >= 0; ii-- ) if( si.i > dd[ii].i ) break; if( ii >= k-1 ) continue; nr = neighbor_responses + i*k; nn = neighbors ? neighbors + (start + i)*k : 0; for( ii1 = k2 - 1; ii1 > ii; ii1-- ) { dd[ii1+1].i = dd[ii1].i; nr[ii1+1] = nr[ii1]; if( nn ) nn[ii1+1] = nn[ii1]; } dd[ii+1].i = si.i; nr[ii+1] = ((float*)(s + 1))[j]; if( nn ) nn[ii+1] = v; } k1 = MIN( k1+1, k ); k2 = MIN( k1, k-1 ); } } } float CvKNearest::write_results( int k, int k1, int start, int end, const float* neighbor_responses, const float* dist, CvMat* _results, CvMat* _neighbor_responses, CvMat* _dist, Cv32suf* sort_buf ) const { float result = 0.f; int i, j, j1, count = end - start; double inv_scale = 1./k1; int rstep = _results && !CV_IS_MAT_CONT(_results->type) ? _results->step/sizeof(result) : 1; for( i = 0; i < count; i++ ) { const Cv32suf* nr = (const Cv32suf*)(neighbor_responses + i*k); float* dst; float r; if( _results || start+i == 0 ) { if( regression ) { double s = 0; for( j = 0; j < k1; j++ ) s += nr[j].f; r = (float)(s*inv_scale); } else { int prev_start = 0, best_count = 0, cur_count; Cv32suf best_val; for( j = 0; j < k1; j++ ) sort_buf[j].i = nr[j].i; for( j = k1-1; j > 0; j-- ) { bool swap_fl = false; for( j1 = 0; j1 < j; j1++ ) if( sort_buf[j1].i > sort_buf[j1+1].i ) { int t; CV_SWAP( sort_buf[j1].i, sort_buf[j1+1].i, t ); swap_fl = true; } if( !swap_fl ) break; } best_val.i = 0; for( j = 1; j <= k1; j++ ) if( j == k1 || sort_buf[j].i != sort_buf[j-1].i ) { cur_count = j - prev_start; if( best_count < cur_count ) { best_count = cur_count; best_val.i = sort_buf[j-1].i; } prev_start = j; } r = best_val.f; } if( start+i == 0 ) result = r; if( _results ) _results->data.fl[(start + i)*rstep] = r; } if( _neighbor_responses ) { dst = (float*)(_neighbor_responses->data.ptr + (start + i)*_neighbor_responses->step); for( j = 0; j < k1; j++ ) dst[j] = nr[j].f; for( ; j < k; j++ ) dst[j] = 0.f; } if( _dist ) { dst = (float*)(_dist->data.ptr + (start + i)*_dist->step); for( j = 0; j < k1; j++ ) dst[j] = dist[j + i*k]; for( ; j < k; j++ ) dst[j] = 0.f; } } return result; } struct P1 : cv::ParallelLoopBody { P1(const CvKNearest* _pointer, int _buf_sz, int _k, const CvMat* __samples, const float** __neighbors, int _k1, CvMat* __results, CvMat* __neighbor_responses, CvMat* __dist, float* _result) { pointer = _pointer; k = _k; _samples = __samples; _neighbors = __neighbors; k1 = _k1; _results = __results; _neighbor_responses = __neighbor_responses; _dist = __dist; result = _result; buf_sz = _buf_sz; } const CvKNearest* pointer; int k; const CvMat* _samples; const float** _neighbors; int k1; CvMat* _results; CvMat* _neighbor_responses; CvMat* _dist; float* result; int buf_sz; void operator()( const cv::Range& range ) const { cv::AutoBuffer<float> buf(buf_sz); for(int i = range.start; i < range.end; i += 1 ) { float* neighbor_responses = &buf[0]; float* dist = neighbor_responses + 1*k; Cv32suf* sort_buf = (Cv32suf*)(dist + 1*k); pointer->find_neighbors_direct( _samples, k, i, i + 1, neighbor_responses, _neighbors, dist ); float r = pointer->write_results( k, k1, i, i + 1, neighbor_responses, dist, _results, _neighbor_responses, _dist, sort_buf ); if( i == 0 ) *result = r; } } }; //对每个输入向量(表示为_sample矩阵的每一行),该方法找到k(k≤get_max_k() )个最近邻。 //在回归中,预测结果将是响应的近邻向量的均值。在分类中,通过投票决定类别结果。 //对传统分类和回归预测来说,该方法可以有选择的返回近邻向量本身的指针 //(neighbors, array of k*_samples->rows pointers), //它们相对应的输出值(neighbor_responses, a vector of k*_samples->rows elements) , //和输入向量与近邻之间的距离(dist, also a vector of k*_samples->rows elements)。 //对每个输入向量来说,近邻将按照它们到该向量的距离排序。 //对单个输入向量,所有的输出矩阵是可选的,而且预测值将由该方法返回。 float CvKNearest::find_nearest( const CvMat* _samples, int k, CvMat* _results, const float** _neighbors, CvMat* _neighbor_responses, CvMat* _dist ) const { float result = 0.f; const int max_blk_count = 128, max_buf_sz = 1 << 12; if( !samples ) CV_Error( CV_StsError, "The search tree must be constructed first using train method" ); if( !CV_IS_MAT(_samples) || CV_MAT_TYPE(_samples->type) != CV_32FC1 || _samples->cols != var_count ) CV_Error( CV_StsBadArg, "Input samples must be floating-point matrix (<num_samples>x<var_count>)" ); if( _results && (!CV_IS_MAT(_results) || (_results->cols != 1 && _results->rows != 1) || _results->cols + _results->rows - 1 != _samples->rows) ) CV_Error( CV_StsBadArg, "The results must be 1d vector containing as much elements as the number of samples" ); if( _results && CV_MAT_TYPE(_results->type) != CV_32FC1 && (CV_MAT_TYPE(_results->type) != CV_32SC1 || regression)) CV_Error( CV_StsUnsupportedFormat, "The results must be floating-point or integer (in case of classification) vector" ); if( k < 1 || k > max_k ) CV_Error( CV_StsOutOfRange, "k must be within 1..max_k range" ); if( _neighbor_responses ) { if( !CV_IS_MAT(_neighbor_responses) || CV_MAT_TYPE(_neighbor_responses->type) != CV_32FC1 || _neighbor_responses->rows != _samples->rows || _neighbor_responses->cols != k ) CV_Error( CV_StsBadArg, "The neighbor responses (if present) must be floating-point matrix of <num_samples> x <k> size" ); } if( _dist ) { if( !CV_IS_MAT(_dist) || CV_MAT_TYPE(_dist->type) != CV_32FC1 || _dist->rows != _samples->rows || _dist->cols != k ) CV_Error( CV_StsBadArg, "The distances from the neighbors (if present) must be floating-point matrix of <num_samples> x <k> size" ); } int count = _samples->rows; int count_scale = k*2; int blk_count0 = MIN( count, max_blk_count ); int buf_sz = MIN( blk_count0 * count_scale, max_buf_sz ); blk_count0 = MAX( buf_sz/count_scale, 1 ); blk_count0 += blk_count0 % 2; blk_count0 = MIN( blk_count0, count ); buf_sz = blk_count0 * count_scale + k; int k1 = get_sample_count(); k1 = MIN( k1, k ); cv::parallel_for_(cv::Range(0, count), P1(this, buf_sz, k, _samples, _neighbors, k1, _results, _neighbor_responses, _dist, &result) ); return result; } ///////////////////////////////////////////////////// //2、下面是采用opencv2.0的形式写的 using namespace cv; //构造函数,参数为:训练数据;训练数据的目标值 //_train_data:行表示样本,列表示维度 CvKNearest::CvKNearest( const Mat& _train_data, const Mat& _responses, const Mat& _sample_idx, bool _is_regression, int _max_k ) { samples = 0; //调用对应cpp版本的训练函数 train(_train_data, _responses, _sample_idx, _is_regression, _max_k, false ); } //训练模型 bool CvKNearest::train( const Mat& _train_data, const Mat& _responses, const Mat& _sample_idx, bool _is_regression, int _max_k, bool _update_base ) { //将训练数据、训练数据的目标值、提取出来。然后调用opencv1。0版本的训练函数来训练。 CvMat tdata = _train_data, responses = _responses, sidx = _sample_idx; return train(&tdata, &responses, sidx.data.ptr ? &sidx : 0, _is_regression, _max_k, _update_base ); } //寻找最近邻 float CvKNearest::find_nearest( const Mat& _samples, int k, Mat* _results, const float** _neighbors, Mat* _neighbor_responses, Mat* _dist ) const { CvMat s = _samples, results, *presults = 0, nresponses, *pnresponses = 0, dist, *pdist = 0; if( _results ) { if(!(_results->data && (_results->type() == CV_32F || (_results->type() == CV_32S && regression)) && (_results->cols == 1 || _results->rows == 1) && _results->cols + _results->rows - 1 == _samples.rows) ) _results->create(_samples.rows, 1, CV_32F); presults = &(results = *_results); } if( _neighbor_responses ) { if(!(_neighbor_responses->data && _neighbor_responses->type() == CV_32F && _neighbor_responses->cols == k && _neighbor_responses->rows == _samples.rows) ) _neighbor_responses->create(_samples.rows, k, CV_32F); pnresponses = &(nresponses = *_neighbor_responses); } if( _dist ) { if(!(_dist->data && _dist->type() == CV_32F && _dist->cols == k && _dist->rows == _samples.rows) ) _dist->create(_samples.rows, k, CV_32F); pdist = &(dist = *_dist); } //调用另一个寻找近邻的函数完成剩下的工作(即之前opencv1.0版本的那个函数) return find_nearest(&s, k, presults, _neighbors, pnresponses, pdist ); } //寻找最近邻,该方法是传递引用 float CvKNearest::find_nearest( const cv::Mat& _samples, int k, CV_OUT cv::Mat& results, CV_OUT cv::Mat& neighborResponses, CV_OUT cv::Mat& dists) const { //调用另一个寻找最近邻的函数实现(即上面那个函数) return find_nearest(_samples, k, &results, 0, &neighborResponses, &dists); } /* End of file */
例子:
#include "ml.h" #include "highgui.h" int main( int argc, char** argv ){ const int K = 10; int i, j, k, accuracy; float response; int train_sample_count = 100;//100个训练样本 CvRNG rng_state = cvRNG(-1);//随机数状态 //建立训练集中的数据和类别标签 CvMat* trainData = cvCreateMat( train_sample_count, 2, CV_32FC1 ); CvMat* trainClasses = cvCreateMat( train_sample_count, 1, CV_32FC1 ); //创建一张画布用来呈现结果 IplImage* img = cvCreateImage( cvSize( 500, 500 ), 8, 3 ); float _sample[2]; CvMat sample = cvMat( 1, 2, CV_32FC1, _sample ); cvZero( img );//对画布进行清零操作 CvMat trainData1, trainData2, trainClasses1, trainClasses2; // form the training samples //将trainData中上部分提取出来,然后填充随机数 cvGetRows( trainData, &trainData1, 0, train_sample_count/2 ); cvRandArr( &rng_state, &trainData1, CV_RAND_NORMAL, cvScalar(200,200), cvScalar(50,50) ); //将trainData下部分提取出来,填充随机数 cvGetRows(trainData, &trainData2, train_sample_count / 2, train_sample_count); cvRandArr( &rng_state, &trainData2, CV_RAND_NORMAL, cvScalar(300,300), cvScalar(50,50) ); //对于trainClasses的上半部分置为1,表示前50个都是第1类 cvGetRows( trainClasses, &trainClasses1, 0, train_sample_count/2 ); cvSet( &trainClasses1, cvScalar(1) ); //对于trainClasses的下半部分置为2,表示后50个都是第2类 cvGetRows( trainClasses, &trainClasses2, train_sample_count/2, train_sample_count ); cvSet( &trainClasses2, cvScalar(2) ); //训练knn用作分类 CvKNearest knn( trainData, trainClasses, 0, false, K ); //建立一个K矩阵,用来存储K个最近邻 CvMat* nearests = cvCreateMat( 1, K, CV_32FC1); //将画布上每个点都进行分类 for( i = 0; i < img->height; i++ ){ for( j = 0; j < img->width; j++ ){ sample.data.fl[0] = (float)j; sample.data.fl[1] = (float)i; // estimate the response and get the neighbors’ labels //该函数返回该点的类别 response = knn.find_nearest(&sample,K,0,0,nearests,0); // compute the number of neighbors representing the majority //计算这K个近邻中有多少是支持第response类的。即准确度 for( k = 0, accuracy = 0; k < K; k++ ){ if( nearests->data.fl[k] == response) accuracy++; } // highlight the pixel depending on the accuracy (or confidence) //按照不同的类别赋值不同的颜色,并根据准确度的多少赋值颜色的混合区域,即 //最后的结果的图中中间层就是有争议的部分。 cvSet2D( img, i, j, response == 1 ? (accuracy > 5 ? CV_RGB(180,0,0) : CV_RGB(180,120,0)) : (accuracy > 5 ? CV_RGB(0,180,0) : CV_RGB(120,120,0)) ); } } // display the original training samples //显示原始的100训练样本,用小点标记出来,以区分画布中的其他点 for( i = 0; i < train_sample_count/2; i++ ){ CvPoint pt; pt.x = cvRound(trainData1.data.fl[i*2]); pt.y = cvRound(trainData1.data.fl[i*2+1]); cvCircle( img, pt, 2, CV_RGB(255,0,0), CV_FILLED ); pt.x = cvRound(trainData2.data.fl[i*2]); pt.y = cvRound(trainData2.data.fl[i*2+1]); cvCircle( img, pt, 2, CV_RGB(0,255,0), CV_FILLED ); } //将结果呈现出来 cvNamedWindow( "classifier result", 1 ); cvShowImage( "classifier result", img ); cvWaitKey(0); cvReleaseMat( &trainClasses ); cvReleaseMat( &trainData ); return 0; }
上面例子的结果:
对应的使用了opencv2.0的形式写了与上面类似的代码:
#include "opencv2\ml\ml.hpp" #include "opencv2\highgui\highgui.hpp" #include<iostream> using namespace std; using namespace cv; int main(int argc, char** argv){ const int K = 10; int accuracy=0; float response; int train_sample_count = 100; RNG rng; Mat trainData = Mat::zeros(100,2,CV_32FC1); Mat trainClasses = Mat::zeros(100, 1, CV_32FC1); Mat Image(Size2i(500, 500), CV_8UC3); Image.setTo(0); Mat sample(1, 2, CV_32FC1); Mat trainData1 = trainData(Range(0, train_sample_count / 2), Range::all()); Mat trainData2 = trainData(Range(train_sample_count / 2, train_sample_count), Range::all()); rng.fill(trainData1, RNG::UNIFORM, Scalar(50, 50), Scalar(200, 200)); rng.fill(trainData2, RNG::UNIFORM, Scalar(200, 200), Scalar(300, 300)); Mat trainClasses1 = trainClasses(Range(0, train_sample_count / 2),Range::all()); Mat trainClasses2 = trainClasses(Range(train_sample_count / 2, train_sample_count), Range::all()); trainClasses1.setTo(1); trainClasses2.setTo(2); KNearest Knn(trainData, trainClasses, Mat(), false, K); Mat nearests(1, K, CV_32FC1); Mat_<Vec3b> _I = Image; for (size_t i = 0; i < Image.rows; ++i){ for (size_t j = 0; j < Image.cols; ++j) { sample.at<float>(0,0) = static_cast<float>(i); sample.at<float>(0,1) = static_cast<float>(j); response = Knn.find_nearest(sample, K); for (int k = 0, accuracy = 0; k < K; k++){ if (nearests.data[k] == response) accuracy++; } response == 1 ? (accuracy > 5 ? (_I(i, j)[0] = 180, _I(i, j)[1] = 0, _I(i, j)[2]=0): (_I(i, j)[0] = 180, _I(i, j)[1] = 120, _I(i, j)[2] = 0 )) : (accuracy > 5 ? (_I(i, j)[0] = 0, _I(i, j)[1] = 180, _I(i, j)[2] = 0 ): (_I(i, j)[0] = 120, _I(i, j)[1] = 120, _I(i, j)[2] = 0)); } } Image = _I; for (int i = 0; i < train_sample_count / 2; i++){ Point pt; pt.x = static_cast<int>(trainData1.at<float>(i, 0)); pt.y = static_cast<int>(trainData1.at<float>(i, 1)); circle(Image, pt, 1, Scalar(0, 0, 255),-1,8); pt.x = static_cast<int>(trainData2.at<float>(i, 0)); pt.y = static_cast<int>(trainData2.at<float>(i, 1)); circle(Image, pt, 1, Scalar(255, 0, 0), -1, 8); } namedWindow("result"); imshow("result", Image); waitKey(0); cin.get(); return 0; }
生成的结果为:
上面的源码部分还未完全注释,待后续接着注释。
2015年09月27日,第0次修改!
相关文章推荐
- opencv7-ml之KNN
- Linux Apache服务搭建学习
- Shell编程基础
- Linux 基础入门 第二周9.21~9.27
- 在阿里云的CentOS 6.5 上面安装 timidity++ 和 ffmpeg(含libmp3lame) 实现命令行将midi转换为mp3
- windows安装phpstudy(nginx+php) 出现的问题解决
- top命令显示按内存或CPU排序
- 网站智能客服--图灵机器人接入方式
- 20135316王剑桥 linux第二周课实验笔记
- solr学习之四--------Field、CopyField、DynamicField
- Eclipse 配置tomcat8
- Linux第二次学习笔记
- OpenSSL与PKI
- getopt() 与 getopt_long() 详解
- java.lang.NoSuchMethodException: org.apache.catalina.deploy.WebXml addFilter
- linux的简介 linux与windows服务器系统的区别
- hadoop-0.20.2安装配置
- system play - qemu with a lot of system imgs (maybe I'll develop one by myself)
- OpenvSwitch完全使用手册
- linux 命令