您的位置:首页 > 产品设计 > UI/UE

POJ 2778 DNA Sequence(AC自动机+矩阵加速)

2013-06-27 17:59 253 查看
DNA Sequence

Time Limit: 1000MSMemory Limit: 65536K
Total Submissions: 9899Accepted: 3717
Description

It's well known that DNA Sequence is a sequence only contains A, C, T and G, and it's very useful to analyze a segment of DNA Sequence,For example, if a animal's DNA sequence contains segment ATC then it may mean that the animal may have a genetic disease. Until now scientists have found several those segments, the problem is how many kinds of DNA sequences of a species don't contain those segments.

Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G,and the length of sequences is a given integer n.
Input

First line contains two integer m (0 <= m <= 10), n (1 <= n <=2000000000). Here, m is the number of genetic disease segment, and n is the length of sequences.

Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10.
Output

An integer, the number of DNA sequences, mod 100000.
Sample Input

4 3
AT
AC
AG
AA

Sample Output

36

Source

POJ Monthly--2006.03.26,dodo

A自动机。

要求长度为n,不包含病毒串的个数。

首先利用AC自动机实现状态的转移。

AC自动机其实就和状态机类似的,可以产生L个状态。
然后根据状态间能不能转移,构造一个矩阵。

最后矩阵快速幂求解

//============================================================================
// Name        : HDU.cpp
// Author      :
// Version     :
// Copyright   : Your copyright notice
// Description : Hello World in C++, Ansi-style
//============================================================================

#include <iostream>
#include <stdio.h>
#include <string.h>
#include <algorithm>
#include <queue>
using namespace std;
struct Matrix
{
unsigned long long mat[40][40];
int n;
Matrix(){}
Matrix(int _n)
{
n=_n;
for(int i=0;i<n;i++)
for(int j=0;j<n;j++)
mat[i][j] = 0;
}
Matrix operator *(const Matrix &b)const
{
Matrix ret = Matrix(n);
for(int i=0;i<n;i++)
for(int j=0;j<n;j++)
for(int k=0;k<n;k++)
ret.mat[i][j]+=mat[i][k]*b.mat[k][j];
return ret;
}
};
unsigned long long pow_m(unsigned long long a,int n)
{
unsigned long long ret=1;
unsigned long long tmp = a;
while(n)
{
if(n&1)ret*=tmp;
tmp*=tmp;
n>>=1;
}
return ret;
}
Matrix pow_M(Matrix a,int n)
{
Matrix ret = Matrix(a.n);
for(int i=0;i<a.n;i++)
ret.mat[i][i] = 1;
Matrix tmp = a;
while(n)
{
if(n&1)ret=ret*tmp;
tmp=tmp*tmp;
n>>=1;
}
return ret;
}
struct Trie
{
int next[40][26],fail[40];
bool end[40];
int root,L;
int newnode()
{
for(int i = 0;i < 26;i++)
next[L][i] = -1;
end[L++] = false;
return L-1;
}
void init()
{
L = 0;
root = newnode();
}
void insert(char buf[])
{
int len = strlen(buf);
int now = root;
for(int i = 0;i < len;i++)
{
if(next[now][buf[i]-'a'] == -1)
next[now][buf[i]-'a'] = newnode();
now = next[now][buf[i]-'a'];
}
end[now] = true;
}
void build()
{
queue<int>Q;
fail[root]=root;
for(int i = 0;i < 26;i++)
if(next[root][i] == -1)
next[root][i] = root;
else
{
fail[next[root][i]] = root;
Q.push(next[root][i]);
}
while(!Q.empty())
{
int now = Q.front();
Q.pop();
if(end[fail[now]])end[now]=true;
for(int i = 0;i < 26;i++)
if(next[now][i] == -1)
next[now][i] = next[fail[now]][i];
else
{
fail[next[now][i]] = next[fail[now]][i];
Q.push(next[now][i]);
}
}
}
Matrix getMatrix()
{
Matrix ret = Matrix(L+1);
for(int i = 0;i < L;i++)
for(int j = 0;j < 26;j++)
if(end[next[i][j]]==false)
ret.mat[i][next[i][j]] ++;
for(int i = 0;i < L+1;i++)
ret.mat[i][L] = 1;
return ret;
}
void debug()
{
for(int i = 0;i < L;i++)
{
printf("id = %3d,fail = %3d,end = %3d,chi = [",i,fail[i],end[i]);
for(int j = 0;j < 26;j++)
printf("%2d",next[i][j]);
printf("]\n");
}
}
};
char buf[10];
Trie ac;
int main()
{
//    freopen("in.txt","r",stdin);
//    freopen("out.txt","w",stdout);
int n,L;
while(scanf("%d%d",&n,&L)==2)
{
ac.init();
for(int i = 0;i < n;i++)
{
scanf("%s",buf);
ac.insert(buf);
}
ac.build();
Matrix a = ac.getMatrix();
a = pow_M(a,L);
unsigned long long res = 0;
for(int i = 0;i < a.n;i++)
res += a.mat[0][i];
res--;

/*
* f
=1 + 26^1 + 26^2 +...26^n
* f
=26*f[n-1]+1
* {f
1} = {f[n-1] 1}[26 0;1 1]
* 数是f[L]-1;
* 此题的L<2^31.矩阵的幂不能是L+1次,否则就超时了
*/
a = Matrix(2);
a.mat[0][0]=26;
a.mat[1][0] = a.mat[1][1] = 1;
a=pow_M(a,L);
unsigned long long ans=a.mat[1][0]+a.mat[0][0];
ans--;
ans-=res;
cout<<ans<<endl;
}
return 0;
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: