namespace cmt

class CMT
CMT() : str_detector("FAST"), str_descriptor("BRISK") {} //默认的特征检测和描述子

 void initialize(const Mat im_gray, const Rect rect); //rect是初始目标框
void processFrame(const Mat im_gray);

Fusion fusion;//跟踪点融合:融合跟踪和匹配的点 将两种点都放在一起,并且不重复
Matcher matcher; //DescriptorMatcher 用的knnMatch匹配
Tracker tracker; //光溜匹配跟踪 上一帧的特征点使用光流法跟踪得到这一帧的特征点的位置T
Consensus consensus;//包括scale和rotation angel的求取和对目标中心的投票

string str_detector;
string str_descriptor;

vector<Point2f> points_active; //public for visualization purposes
RotatedRect bb_rot;

Ptr<FeatureDetector> detector;
Ptr<DescriptorExtractor> descriptor;

Size2f size_initial;

vector<int> classes_active; //forgroud points -in target boundingbox's

float theta;

Mat im_prev;

} /* namespace CMT */



void CMT::initialize(const Mat im_gray, const Rect rect)

//Remember initial size
size_initial = rect.size();

//Remember initial image
im_prev = im_gray;

//Compute center of rect
Point2f center = Point2f(rect.x + rect.width/2.0, rect.y + rect.height/2.0);

//Initialize rotated bounding box
bb_rot = RotatedRect(center, size_initial, 0.0);

//Initialize detector and descriptor

//FeatureDetector is OpenCV's feature detector,including "FAST","ORB","SIFT"
detector = FeatureDetector::create(str_detector);//FAST
descriptor = DescriptorExtractor::create(str_descriptor);//BRISK

//Get initial keypoints in whole image and compute their descriptors
vector<KeyPoint> keypoints;
detector->detect(im_gray, keypoints);

//Divide keypoints into foreground and background keypoints according to selection
//in target bounding box is foreground
vector<KeyPoint> keypoints_fg;
vector<KeyPoint> keypoints_bg;

for (size_t i = 0; i < keypoints.size(); i++)
KeyPoint k = keypoints[i];
Point2f pt = k.pt;

if (pt.x > rect.x && pt.y > rect.y && pt.x < rect.br().x && pt.y < rect.br().y)



//Create foreground classes
vector<int> classes_fg;
for (size_t i = 0; i < keypoints_fg.size(); i++)

//Compute foreground/background features
Mat descs_fg;
Mat descs_bg;
descriptor->compute(im_gray, keypoints_fg, descs_fg);
descriptor->compute(im_gray, keypoints_bg, descs_bg);

//Only now is the right time to convert keypoints to points, as compute() might remove some keypoints
vector<Point2f> points_fg;
vector<Point2f> points_bg;

for (size_t i = 0; i < keypoints_fg.size(); i++)

for (size_t i = 0; i < keypoints_bg.size(); i++)

//Create normalized points: distance to center
vector<Point2f> points_normalized;
for (size_t i = 0; i < points_fg.size(); i++)
points_normalized.push_back(points_fg[i] - center);

//Initialize matcher
matcher.initialize(points_normalized, descs_fg, classes_fg, descs_bg, center);

//Initialize consensus get Xi,Xj distance and angle

//Create initial set of active keypoints
for (size_t i = 0; i < keypoints_fg.size(); i++)
classes_active = classes_fg;



void CMT::processFrame(Mat im_gray) {

//Track keypoints
vector<Point2f> points_tracked;
vector<unsigned char> status;
tracker.track(im_prev, im_gray, points_active, points_tracked, status);

//cout << points_tracked.size() << " tracked points.";

//keep only successful classes
vector<int> classes_tracked;
for (size_t i = 0; i < classes_active.size(); i++)
if (status[i])


//Detect keypoints, compute descriptors
vector<KeyPoint> keypoints;
detector->detect(im_gray, keypoints);
Mat descriptors;
descriptor->compute(im_gray, keypoints, descriptors);

//Match keypoints globally
vector<Point2f> points_matched_global;
vector<int> classes_matched_global;
matcher.matchGlobal(keypoints, descriptors, points_matched_global, classes_matched_global);

//Fuse tracked and globally matched points
vector<Point2f> points_fused;
vector<int> classes_fused;
fusion.preferFirst(points_tracked, classes_tracked, points_matched_global, classes_matched_global,
points_fused, classes_fused);

//Estimate scale and rotation from the fused points
float scale;
float rotation;
consensus.estimateScaleRotation(points_fused, classes_fused, scale, rotation);

//    FILE_LOG(logDEBUG) << "scale " << scale << ", " << "rotation " << rotation;

//Find inliers and the center of their votes
Point2f center;
vector<Point2f> points_inlier;
vector<int> classes_inlier;
consensus.findConsensus(points_fused, classes_fused, scale, rotation,
center, points_inlier, classes_inlier);

//Match keypoints locally
vector<Point2f> points_matched_local;
vector<int> classes_matched_local;
matcher.matchLocal(keypoints, descriptors, center, scale, rotation, points_matched_local, classes_matched_local);

//Clear active points

//Fuse locally matched points and inliers
fusion.preferFirst(points_matched_local, classes_matched_local, points_inlier, classes_inlier, points_active, classes_active);

bb_rot = RotatedRect(center,  size_initial * scale, rotation/CV_PI * 180);

//Remember current image
im_prev = im_gray;



作者在论文中这样说:The main idea behind CMT is to break down the object of interest into tiny parts, known as keypoints.In each frame, we try to again find the keypoints that were already therein the initial selection of the object of interest.We do this by employing
two different kind of methods.First, we track keypoints from the previous frame to the current frame by estimatingwhat is known as its
optic flow.Second, we match keypoints globally by comparing their
descriptors.As both of these methods are error-prone, we employ a novel way of looking for consensus withinthe found keypoints by letting each keypoint vote for the object center,as shown in the following image:


