您的位置:首页 > 运维架构 > Linux

linux下c/c++实例之七递归扫描目录下的文件

2015-11-09 10:48 591 查看


一、简介

      Linux下递归扫描该目录下所有的文件,完成更为详细的文件操作需求。其他库中比如Qt、Boost库中已有接口函数。


二、详解

1、递归扫描文件的代码

(1)scanfile.cpp:

#include <iostream>
#include <string>
#include <vector>
#include <sys/stat.h>
#include <regex.h>
#include <libgen.h>
#include <dirent.h>
#include <assert.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
using namespace std;
vector<string> v_file;
int regex_match(const char *buffer, const char *pattern)
{
int ret = 0;
char errbuf[1024] = {0};
regex_t reg;
regmatch_t pm[1] = {0};
ret = regcomp(®, pattern, REG_EXTENDED | REG_ICASE);
if (ret != 0) {
regerror(ret, ®, errbuf, sizeof(errbuf));
fprintf(stderr, "%s:regcom(%s)\n", errbuf, pattern);
return -1;
}
if (regexec(®, buffer, 1, pm, 0) == 0) {
regfree(®);
return 0;                         //匹配成功
}
else {
regfree(®);
return -1;
}
}
int scan_dirpath(char *path, char *pattern)    //递归扫描该目录下所有的文件和目录
{
char file_path[512] = {0};
char file[512] = {0};
DIR *dir = NULL;
struct dirent *ptr = NULL;
struct stat buf;
int i, j;
/****************浏览目录***************************/
if ((dir = opendir(path)) == NULL) {
perror("opendir failed!");
return -1;
}
while((ptr = readdir(dir)) != NULL) {
if (ptr->d_name[0] != '.') {//除去根文件目录
strcpy(file_path, path);
if (path[strlen(path) - 1] != '/')  strcat(file_path, "/");
strcat(file_path, ptr->d_name);          //构建完整的文件名
assert(stat(file_path, &buf) != -1);
if(S_ISREG(buf.st_mode)) {        //判断的是文件
for(i = 0; i < strlen(file_path); i++) {
if(file_path[i] == '/') {
memset(file, 0, strlen(file));
j = 0;
continue;
}
file[j++] = file_path[i];
}
if (regex_match(file, pattern) == 0) {  //正则匹配成功
v_file.push_back(file_path);
}
}
else if(S_ISDIR(buf.st_mode)) {   //判断的是目录
scan_dirpath(file_path, pattern);
}
}
}
return 0;
}

int main()
{
char path[512] = "/tmp/other";
char pattern[32] = ".*.cpp";
scan_dirpath(path, pattern);
for (int i = 0; i < v_file.size(); i++) {
cout<<v_file[i]<<endl;
}
return 0;
}


(2)编译运行

g++ -o scanfile scanfile.cpp
./scanfile



2、非递归扫描目录文件的C++模板

(1)scanfile.cpp:
#include <iostream>
#include <string>
#include <vector>
#include <regex.h>
#include <assert.h>
#include <sys/stat.h>
#include <dirent.h>
#include <algorithm>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
using namespace std;
struct scan_info    //扫描文件信息
{
string file_dir;
string file_name;
int create_time;
};
class compare_name    //lhs > rhs,get file in ascending order.
{
public:
/*Sort by file creation time and file_name in descending order, so get file in back will be in ascending order.*/
bool operator()(const scan_info& lhs, const scan_info& rhs) {
if (lhs.file_name > rhs.file_name)  return true;
//else if (lhs.create_time == rhs.create_time && lhs.file_name > rhs.file_name)  return true;
else  return false;
}
};
template <typename compare = compare_name>
class scan_file
{
public:
// Scan file in single-directory mode.
scan_file(const string& file_dir, const string& pattern, int file_count = 1024);
// Scan file in multi-directory mode.
scan_file(const vector<string>& dir_vector, const string& pattern, int file_count = 1024);
// Scan file in dir/sub-dirs mode.
scan_file(const string& dir, const vector<string>& sub_dirs, const string& pattern, int file_count = 1024);
virtual ~scan_file();
public:
// Get a file in given directories. Upon file found, return true, otherwise return false.
// In single-directory mode, return file name, otherwise return full name.
bool get_file(string& file_name);
// Get all files in given directories.
// In single-directory mode, return file name, otherwise return full name.
void get_files(vector<string>& files);
private:
vector<string> dir_vector;
regex_t reg;
vector<scan_info> file_vector;
};
template<typename compare> scan_file<compare>::scan_file(const string& file_dir, const string& pattern, int file_count)
: dir_vector(1, file_dir)
{
assert(regcomp(®, pattern.c_str(), REG_NOSUB | REG_EXTENDED) == 0);
file_vector.reserve(file_count);
}
template<typename compare> scan_file<compare>::scan_file(const vector<string>& dir_vector_, const string& pattern, int file_count)
: dir_vector(dir_vector_)
{
// 以功能更加强大的扩展正则表达式的方式进行匹配,不用存储匹配后的结果
assert(regcomp(®, pattern.c_str(), REG_NOSUB | REG_EXTENDED) == 0);
file_vector.reserve(file_count);
}
template<typename compare> scan_file<compare>::scan_file(const string& dir, const vector<string>& sub_dirs, const string& pattern, int file_count)
{
vector<string>::const_iterator iter;
for (iter = sub_dirs.begin(); iter != sub_dirs.end(); ++iter) {
dir_vector.push_back(dir + '/' + *iter);
}
assert(regcomp(®, pattern.c_str(), REG_NOSUB | REG_EXTENDED) == 0);
file_vector.reserve(file_count);
}
template<typename compare> scan_file<compare>::~scan_file()
{
regfree(®);
}
template<typename compare> bool scan_file<compare>::get_file(string& file_name)
{
/**先扫描目录,将所有的文件都写入到vector中**/
/**如果找到文件,每次从vector中读取一个文件**/
/**不能递归扫描,多文件时会返回全路径**/
DIR* dirp;
dirent ent;
dirent* result;
struct stat stat_buf;
string full_name = "";
scan_info file_info;
file_name = "";
while (file_vector.size() > 0) {
vector<scan_info>::iterator iter = file_vector.begin();
if (access((iter->file_dir + '/' + iter->file_name).c_str(), F_OK) == -1) {
std::pop_heap(file_vector.begin(), file_vector.end(), compare());
file_vector.pop_back();
continue;
}
if (dir_vector.size() == 1)  file_name = iter->file_name;
else  file_name = iter->file_dir + '/' + iter->file_name;
std::pop_heap(file_vector.begin(), file_vector.end(), compare());
file_vector.pop_back();
return true;
}
vector<string>::const_iterator dir_iter;
for (dir_iter = dir_vector.begin(); dir_iter != dir_vector.end(); ++dir_iter) {
assert((dirp = opendir(dir_iter->c_str())) != NULL);
while (readdir_r(dirp, &ent, &result) == 0 && result != 0) {
if (strcmp(ent.d_name, ".") == 0 || strcmp(ent.d_name, "..") == 0)  continue;
if (regexec(®, ent.d_name, (size_t)0, 0, 0) != 0)  continue;
full_name = *dir_iter + '/' + ent.d_name;
assert(::lstat(full_name.c_str(), &stat_buf) >= 0);
if (S_ISDIR(stat_buf.st_mode) == 0) {
file_info.file_dir = *dir_iter;
file_info.file_name = ent.d_name;
file_info.create_time = stat_buf.st_mtime;
file_vector.push_back(file_info);
}
}
closedir(dirp);
}
/**也可以采用文件加载完毕后更改名字
*err_msg << "mv " << m_real_file << " " << m_real_file << ".bak";
*system(err_msg.str().c_str());
**/
if (dir_vector.size() > 0)  dir_vector.clear();
if (file_vector.size() > 0) {
//make_heap以迭代器[start,end] 区间内的元素生成一个堆. 默认使用元素类型 的 < 操作符 进行判断堆的类型, 因此生成的是大顶堆. 这里是小顶堆
std::make_heap(file_vector.begin(), file_vector.end(), compare());
while (file_vector.size() > 0) {
vector<scan_info>::iterator iter = file_vector.begin();
if (access((iter->file_dir + '/' + iter->file_name).c_str(), F_OK) == -1) { //文件不存在
//pop_heap() 并不是真的把最大(最小)的元素从堆中弹出来. 而是重新排序堆. 它把首元素和末元素交换,然后将[first,last-1]的数据再做成一个堆。
std::pop_heap(file_vector.begin(), file_vector.end(), compare());
file_vector.pop_back();
continue;
}
if (dir_vector.size() == 1)  file_name = iter->file_name;
else  file_name = iter->file_dir + '/' + iter->file_name;
std::pop_heap(file_vector.begin(), file_vector.end(), compare());
file_vector.pop_back();
return true;
}

return false;
}
else {
return false;
}
}
template<typename compare> void scan_file<compare>::get_files(vector<string>& files)
{
/**只扫描该目录下的文件,不扫描文件夹**/
/**若想递归扫描,可将每次扫描到的文件push_back进vector**/
DIR* dirp;
dirent ent;
dirent* result;
struct stat stat_buf;
string full_name = "";
files.resize(0);
vector<string>::const_iterator dir_iter;
for (dir_iter = dir_vector.begin(); dir_iter != dir_vector.end(); ++dir_iter) {
assert((dirp = opendir(dir_iter->c_str())) != NULL);
while (readdir_r(dirp, &ent, &result) == 0 && result != 0) {
if (strcmp(ent.d_name, ".") == 0 || strcmp(ent.d_name, "..") == 0)  continue;
full_name = *dir_iter + '/' + ent.d_name;
if (regexec(®, ent.d_name, (size_t)0, 0, 0) != 0)  continue;
assert(::lstat(full_name.c_str(), &stat_buf) >= 0);
if (S_ISDIR(stat_buf.st_mode) == 0) {        //不是文件夹
if (regexec(®, ent.d_name, (size_t)0, 0, 0) == 0) {
files.push_back(ent.d_name);
}
}
}
closedir(dirp);
}
}
int main()
{
string path = "/tmp/other";
string pattern = ".*.cpp";
scan_file<> *tmp = new scan_file<>(path, pattern, 1);
/**********方式一:单个文件获取************/
string file = "";
while (tmp->get_file(file) == true) {
cout<<file<<endl;
}
delete tmp;
/**********方式二:vecotor获取************/
cout<<"------------------------------------"<<endl;
tmp = new scan_file<>(path, pattern, 1);
vector<string> files;
tmp->get_files(files);
vector<string>::iterator it;
for ( it = files.begin(); it < files.end(); it++ ) {
cout<<*it<<endl;
}
delete tmp;
return 0;
}
(2)编译运行:
g++ -o scanfile scanfile.cpp
./scanfile





三、总结

(1)递归扫描路径暂只能使用绝对路径,相对路径需要自己转换。

(2)若有建议,请留言,在此先感谢!
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  c linux