您的位置:首页 > 编程语言 > C语言/C++

基于KMP算法的路径下文本查询程序的c++实现2.0版

2017-09-18 15:45 507 查看
根据C++Primer的TextQuery改编而来

#undef UNICODE
#include<iostream>
#include<string>
#include<set>
#include<map>
#include<vector>
#include<fstream>
#include<sstream>
#include<algorithm>
#include<math.h>
#include <memory>
#include <cstring>
#include <windows.h>
using namespace std;
std::vector<std::string> folder_files;
std::vector<std::string> fileList(const std::string& folder_path);    //file_query function

class TextQuery{
public:
typedef std::vector<std::string>::size_type line_no;            //line number

//load file into internal memory
void read_file(std::ifstream &is){
store_file(is);

build_map();
}
std::set<line_no> run_query(const std::string&) const;          //query the input word from the input file
std::string text_line(line_no) const;                           //catch the words per line
private:
void store_file(std::ifstream&);
void build_map();
std::vector<std::string> lines_of_text;                         //a line number of the words
std::map<std::string, std::set<line_no>> word_map;              //store the relastionships between words and line_numbers
};

void cal_next(const char *str, int *next, int len)
{
next[0] = -1;//next[0]初始化为-1,-1表示不存在相同的最大前缀和最大后缀
int k = -1;//k初始化为-1
for (int q = 1; q <= len - 1; q++)
{
while (k > -1 && str[k + 1] != str[q])//如果下一个不同,那么k就变成next[k],注意next[k]是小于k的,无论k取任何值。
{
k = next[k];//往前回溯
}
if (str[k + 1] == str[q])//如果相同,k++
{
k = k + 1;
}
next[q] = k;//这个是把算的k的值(就是相同的最大前缀和最大后缀长)赋给next[q]
}
}

int KMP(const char *str, int slen, const char *ptr, int plen)
{
int *next = new int[plen];
cal_next(ptr, next, plen);//计算next数组
int k = -1;
for (int i = 0; i < slen; i++)
{
while (k >-1 && ptr[k + 1] != str[i])//ptr和str不匹配,且k>-1(表示ptr和str有部分匹配)
k = next[k];//往前回溯
if (ptr[k + 1] == str[i])
k = k + 1;
if (k == plen - 1)//说明k移动到ptr的最末端
{
//cout << "在位置" << i-plen+1<< endl;
//k = -1;//重新初始化,寻找下一个
//i = i - plen + 2;//i定位到找到位置处的下一个位置(这里默认存在两个匹配字符串可以部分重叠)
return i - plen + 1;//返回相应的位置
}
}
return -1;
}

//as the name of the function :store the file into a vector
void TextQuery::store_file(ifstream &is){
string textline;
while (getline(is, textline)){
lines_of_text.push_back(textline);
}
}

int position;
//create a map ,the keys are words of every line ,the value is line number
void TextQuery::build_map(){
for (line_no line_num = 0; line_num != lines_of_text.size(); line_num++){
istringstream line(lines_of_text[line_num]);
string word;
while (line >> word){
word_map[word].insert(line_num);                    //vector<string>::size_type unsupport (+-*/) operation
}
}
}

set<TextQuery::line_no> TextQuery::run_query(const string &query_word) const{
map< string, set<line_no>>::const_iterator loc = word_map.begin();
for (; loc != word_map.end(); ++loc){
if (-1 != KMP(loc->first.c_str(), loc->first.length(), query_word.c_str(), query_word.length())){
return loc->second;
}
}
return set<line_no>();  //返回一个空集合?
}

//return the string of the line_number
string TextQuery::text_line(line_no line) const{
if (line < lines_of_text.size()){
return lines_of_text[line];
}
throw std::out_of_range("line number out of range");
}

//the first elem is the ifstream,the second elem is the file-name
ifstream& open_file(ifstream &in, const string &file){
in.close();
in.clear();
in.open(file.c_str());
return in;
}

//check the word appeared times, if it appeared more than one times ,add "s" to the end of the word
string make_plural(rsize_t cnt, const string& word, const string& words){
return (cnt == 1) ? word : word + words;
}

//print the result of text-query
void print_results(const set<TextQuery::line_no>& locs, const string& sought, const TextQuery &file){
typedef set<TextQuery::line_no> line_nums;
line_nums::size_type size = locs.size();
cout << sought << " occurs " << size << " " << make_plural(size, "time", "s") << endl;
line_nums::const_iterator it = locs.begin();
for (; it != locs.end(); ++it){
cout << "\t(line" << ((*it) + 1) << ")" << file.text_line(*it) << endl;
}
}

int text_Query(const std::string& file_path, const std::string& s){
ifstream infile;

if (!open_file(infile, file_path)){
cerr << "No input file!" << endl;
return EXIT_FAILURE;
}
TextQuery tq;
tq.read_file(infile);

if (!cin || s == "q") return 0;
set<TextQuery::line_no> locs = tq.run_query(s);
print_results(locs, s, tq);
return 0;
}

void do_it(){
std::cout << "please input the folder path which you want query,then click enter(the separator must be '\\')" << endl;
std::string folderpath;
while (getline(cin, folderpath)){                                           //input a folder_path

if (folderpath == "q"){                                                 //quit the programe
std::cout << "y/n?" << endl;
if (getchar() == 'y') return ;
}

folder_files = fileList(folderpath);                                    //obtain the quantity of files
string s;
std::cout << "please input a string which you want to query(click enter as the end):" << endl;
getline(std::cin, s);

for (std::vector<std::string>::iterator i = folder_files.begin(); i != folder_files.end(); ++i){
cout << *i<<":" << endl;
text_Query(*i, s);
cout << endl;
}
}
}

void test(){
std::string folder;
getline(std::cin, folder);
fileList(folder);
}
int main(){
//test();
do_it();
return 0;
}

//obtain the all files' path under the folder_path
std::vector<std::string> fileList(const std::string& folder_path)
{
WIN32_FIND_DATA FindData;
HANDLE hError;

int file_count(0);
std::string file_path(folder_path); //路径名
std::string full_file_path; //全路径名

file_path.append("/*.*");
hError = FindFirstFile(file_path.c_str(), &FindData);
if (hError == INVALID_HANDLE_VALUE) {
std::cout << "failed to search files." << std::endl;
return std::vector<std::string>();
}
while (FindNextFile(hError, &FindData))
{
//过虑".", "..", "-q"
if (0 == strcmp(FindData.cFileName, ".") ||
0 == strcmp(FindData.cFileName, "..") ||
0 == strcmp(FindData.cFileName, "-q"))
{
continue;
}

//完整路径
full_file_path.append(folder_path);
full_file_path.append("/");
full_file_path.append(FindData.cFileName);
++file_count;

if (FindData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY){
fileList(full_file_path);
}
else{
folder_files.push_back(full_file_path);
}
full_file_path.clear(); //清空目录
}
return folder_files;
}


喜欢的顶一下,就是对我的支持
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  kmp c++