您的位置:首页 > 其它

算法导论—AC自动机

2016-05-03 10:53 218 查看
华电北风吹

日期:2016-05-03

AC自动机是比较高效的多模式匹配算法。类似于KMP在模式串上的状态转移算法,AC自动机通过在trie树上建立状态转移,使得对匹配串遍历一遍就可以找到所有的模式串。

AC自动机一般有以下三步:首先,对所有的模式串建立trie树。然后,对trie树所有节点以它的最长后缀对应的前缀字符串作为它的失配指针,建立AC自动机。最后一步,对文本串在AC自动机上进行匹配。

题目链接:

http://hihocoder.com/problemset/problem/1036?sid=786758

参考代码:

#include <iostream>
#include <queue>
#include <string>
#include <string.h>
using namespace std;

#define size 26

struct node
{
node *fail;          //失败指针
node *next[size];    //Tire每个节点有26个子节点,分别对应26个英文字母
int count;           //该节点是否为单词的末尾节点
node()
{
fail = NULL;
count = 0;
memset(next, NULL, sizeof(next));
}
};

void Insert(node *root, string str)
{
node *p = root;
int i = 0, index;
for (int i = 0; i < str.length(); i++)
{
index = str[i] - 'a';
if (p->next[index] == NULL)
p->next[index] = new node();
p = p->next[index];
}
p->count++;
}
void GetFail(node *root)
{
int i;
root->fail = NULL;
queue<node*> q;
q.push(root);
while (q.empty() == false)
{
node *temp = q.front();
q.pop();
node *p = NULL;
for (i = 0; i<size; i++)
{
if (temp->next[i] != NULL)
{
if (temp == root)
temp->next[i]->fail = root;
else
{
p = temp->fail;
while (p != NULL)
{
if (p->next[i] != NULL)
{
temp->next[i]->fail = p->next[i];
break;
}
p = p->fail;
}
if (p == NULL)
temp->next[i]->fail = root;
}
q.push(temp->next[i]);
}
}
}
}
bool Query(node *root, string str)
{
int cnt = 0, index;
node *p = root;
for (int i = 0; i < str.length(); i++)
{
index = str[i] - 'a';
while (p->next[index] == NULL && p != root)
p = p->fail;
p = p->next[index];
p = (p == NULL) ? root : p;
node *temp = p;
while (temp != root)
{
if (temp->count != 0)
return true;
else
temp = temp->fail;
}
}
return false;
}
int main()
{
int n;
node *root = new node();
cin >> n;
string keyword;
while (n--)
{
cin >> keyword;
Insert(root, keyword);
}
GetFail(root);
string str;
cin >> str;
if (Query(root, str))
cout << "YES" << endl;
else
cout << "NO" << endl;
return 0;
}


上面的代码是理论上正确的代码,但是如果那这个去提交的话会超时,把84行while修改为if以后就可以通过了,但是修改以后逻辑上是错误的代码。

下面给一个AC自动机功能扩展代码:

#include <iostream>
#include <queue>
#include <vector>
#include <string>
#include <string.h>
#include <fstream>
using namespace std;

#define size 26

struct ACNode
{
ACNode *fail;          //失败指针
ACNode *next[size];    //Tire每个节点有26个子节点,分别对应26个英文字母
int count;             //该节点是否为单词的末尾节点,也可用于判断模式是否重复
int patternNo;         //该节点是第几个模式,查找的时候用于还原用于
ACNode()
{
fail = NULL;
count = 0;
patternNo = -1;
memset(next, NULL, sizeof(next));
}
};
void Insert(ACNode *root, string str, int patterNo)
{
ACNode *p = root;
int i = 0, index;
for (int i = 0; i < str.length(); i++)
{
index = str[i] - 'a';
if (p->next[index] == NULL)
p->next[index] = new ACNode();
p = p->next[index];
}
p->count++;
p->patternNo = patterNo;
}
void GetFail(ACNode *root)
{
int i;
root->fail = NULL;
queue<ACNode*> q;
q.push(root);
while (q.empty() == false)
{
ACNode *temp = q.front();
q.pop();
ACNode *p = NULL;
for (i = 0; i<size; i++)
{
if (temp->next[i] != NULL)
{
if (temp == root)
temp->next[i]->fail = root;
else
{
p = temp->fail;
while (p != NULL)
{
if (p->next[i] != NULL)
{
temp->next[i]->fail = p->next[i];
break;
}
p = p->fail;
}
if (p == NULL)
temp->next[i]->fail = root;
}
q.push(temp->next[i]);
}
}
}
}
int Query(ACNode *root, string str, vector<string> &keySet)
{
int cnt = 0, index;
ACNode *p = root;
for (int i = 0; i < str.length();i++)
{
index = str[i] - 'a';
while (p->next[index] == NULL && p != root)
p = p->fail;
p = p->next[index];
p = (p == NULL) ? root : p;
ACNode *temp = p;
while (temp != root)
{
if (temp->count>0)
{
int patternNo = temp->patternNo;
int patternLength = keySet[patternNo].length();
cout << i - patternLength + 1 << "  " << keySet[patternNo] << endl;
cnt += temp->count;
}
temp = temp->fail;
}
}
return cnt;
}
int main()
{
ifstream in(".\\input.txt");
cin.rdbuf(in.rdbuf());

int n;
ACNode *root = new ACNode();
cin >> n;
string keyword;
vector<string> keySet;
for (int i = 0; i < n;i++)
{
cin >> keyword;
keySet.push_back(keyword);
Insert(root, keyword,i);
}
GetFail(root);
string str;
cin >> str;
Query(root, str, keySet);
return 0;
}


参考博客:

http://www.cppblog.com/mythit/archive/2009/04/21/80633.html

http://www.cnblogs.com/xudong-bupt/p/3433506.html
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: