您的位置：首页 > 编程语言 > C语言/C++

【POJ 2778】DNA Sequence 中文题意&题解&代码（C++）

2016-03-04 19:15 375 查看

DNA Sequence

Time Limit: 1000MS Memory Limit: 65536K

Description

It’s well known that DNA Sequence is a sequence only contains A, C, T and G, and it’s very useful to analyze a segment of DNA Sequence，For example, if a animal’s DNA sequence contains segment ATC then it may mean that the animal may have a genetic disease. Until now scientists have found several those segments, the problem is how many kinds of DNA sequences of a species don’t contain those segments.

Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G，and the length of sequences is a given integer n.

Input

First line contains two integer m (0 <= m <= 10), n (1 <= n <=2000000000). Here, m is the number of genetic disease segment, and n is the length of sequences.

Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10.

Output

An integer, the number of DNA sequences, mod 100000.

Sample Input

4 3

AT

AC

AG

AA

Sample Output

36

中文题意：

给出字符串长度n(<=2000000000)，给出不可以包含的序列，最多10个，每个长度最大是10。问长度为n的合法序列有多少个？序列中只可能包含ACTG四个字符。

题解：

做过ac自动机+dp的题后不难发现这是一道经典dp题，然而看到他的数据范围时，就会发现普通的dp是无法解决问题的，而那夸张的数据范围让我们能想到的只有O(log n)的算法才能过，二分貌似没什么办法，那再结合dp的转移方程，我们发现每一个dp[i][j] 的 j 都只能由dp[i-1][x]转移到，其中所有x为可以连接到编号为j节点的j的父亲，而j的父亲在一开始自动机tire图构造好之后就已经时确定的，也就说假如dp[i][j] 由dp[i-1][x] 更新而来，那么dp[i+1][j] 就一定由dp[i][x] 转移而来，这样的话就可以用矩阵快速幂来优化。

网上有人说递归快速幂会超时，然而博主亲身测试递归的快速幂并不会超时，然而注意不要一边加一边mod，否则会超时！！！

代码：

#include<iostream>
#include<algorithm>
#include<stdio.h>
#include<string.h>
#include<queue>
#define mmod (100000)
using namespace std;
queue<int>q;
int tot,tr[101][4],flag[405],fail[405],m,n;
long long c[105][105],unit[105][105],tmp[105][105],f[2][105];
char s[12];
int getc(char x)
{
if (x=='A') return 0;
if (x=='T') return 1;
if (x=='C') return 2;
if (x=='G') return 3;
}
void init(int x)
{
for (int i=0;i<4;i++)
tr[x][i]=0;
flag[x]=0;
fail[x]=0;
}
void add()
{
int now=0;
int len=strlen(s);
for (int i=0;i<len;i++)
{
int tmp=getc(s[i]);
if (!tr[now][tmp])
{
tot++;
tr[now][tmp]=tot;
init(tot);
}
now=tr[now][tmp];
}
flag[now]=1;
}
void jzc(long long a[105][105],long long b[105][105])
{
for (int i=0;i<=tot;i++)
for (int j=0;j<=tot;j++)
{
c[i][j]=0;
for (int k=0;k<=tot;k++)
c[i][j]+=a[i][k]*b[k][j];
c[i][j]=c[i][j]%mmod;
}
for (int i=0;i<=tot;i++)
for (int j=0;j<=tot;j++)
a[i][j]=c[i][j];
}
inline void getfail()
{
for (int i=0;i<4;i++)
if (tr[0][i]) q.push(tr[0][i]);
while(!q.empty())
{
int now=q.front();q.pop();
for (int i=0;i<4;i++)
if (tr[now][i])
{
fail[tr[now][i]]=tr[fail[now]][i];
flag[tr[now][i]]+=flag[tr[fail[now]][i]];
q.push(tr[now][i]);
}
else tr[now][i]=tr[fail[now]][i];
}
}
void ksm(int x)
{
//  while(x)
//  {
//      if (x & 1) jzc(tmp,unit);
//      jzc(unit,unit);
//      x>>=1;
//  }
//上面是快速幂的位运算写法，有兴趣自己研究。。。
if (x<=1) return ;
ksm(x/2);
jzc(tmp,tmp);
if (x%2==1)
jzc(tmp,unit);
}
int main()
{
scanf("%d%d",&m,&n);
for (int i=1;i<=m;i++)
{
scanf("%s",s);
add();
}
getfail();

for (int i=0;i<=tot;i++)
for (int k=0;k<4;k++)
if (flag[tr[i][k]]==0&&flag[i]==0)
{
tmp[i][tr[i][k]]++;
tmp[i][tr[i][k]]%=mmod;
unit[i][tr[i][k]]=tmp[i][tr[i][k]];
}
//这个初始化数组不加也可以。。
for (int i=0;i<4;i++)
if (flag[tr[0][i]]==0) f[0][tr[0][i]]=(f[0][tr[0][i]]+1)%mmod;

int  ans=0;
if (n<=1)
{
for (int i=0;i<tot;i++)
ans+=f[0][tot];
ans%=mmod;
}
else
{
ksm(n-1);
for (int i=0;i<=tot;i++)
{
unit[0][i]=0;
for (int k=0;k<=tot;k++)
unit[0][i]+=f[0][k]*tmp[k][i];
unit[0][i]%=mmod;
}
for (int i=0;i<=tot;i++)
ans+=unit[0][i];
ans%=mmod;
}
printf("%d\n",ans);
}

不加初始化数组的做法，自己想想为什么可以不初始化：

#include<iostream>
#include<algorithm>
#include<stdio.h>
#include<string.h>
#include<queue>
#define mmod (100000)
using namespace std;
queue<int>q;
int tot,tr[101][4],flag[405],fail[405],m,n;
long long c[105][105],unit[105][105],tmp[105][105];
char s[12];
int getc(char x)
{
if (x=='A') return 0;
if (x=='T') return 1;
if (x=='C') return 2;
if (x=='G') return 3;
}
void add()
{
int now=0;
int len=strlen(s);
for (int i=0;i<len;i++)
{
int tmp=getc(s[i]);
if (!tr[now][tmp])
{
tot++;
tr[now][tmp]=tot;
}
now=tr[now][tmp];
}
flag[now]=1;
}
void mul(long long a[105][105], long long b[105][105])
{
for (int i = 0; i <= tot; i++)
for (int j = 0; j <= tot; j++)
{
c[i][j] = 0;
for (int k = 0; k <= tot; k++)
c[i][j] += a[i][k] * b[k][j];
c[i][j] %= 100000;
}
for(int i = 0;i <= tot; i++)
for(int j = 0; j <= tot; j++)
a[i][j] = c[i][j];
}
inline void getfail()
{
for (int i=0;i<4;i++)
if (tr[0][i]) q.push(tr[0][i]);
while(!q.empty())
{
int now=q.front();q.pop();
for (int i=0;i<4;i++)
if (tr[now][i])
{
fail[tr[now][i]]=tr[fail[now]][i];
flag[tr[now][i]]+=flag[tr[fail[now]][i]];
q.push(tr[now][i]);
}
else tr[now][i]=tr[fail[now]][i];
}
}
void ksm(int x)
{
while(x>0)
{
if (x & 1) mul(tmp,unit);
mul(unit,unit);
x>>=1;
}
}
int main()
{
scanf("%d%d",&m,&n);
for (int i=1;i<=m;i++)
{
scanf("%s",s);
add();
}
getfail();

for (int i=0;i<=tot;i++)
for (int k=0;k<4;k++)
if (flag[tr[i][k]]==0&&flag[i]==0)
{
tmp[i][tr[i][k]]=(tmp[i][tr[i][k]]+1)%mmod;
unit[i][tr[i][k]]=tmp[i][tr[i][k]];
}
int  ans=0;
ksm(n-1);
for (int i=0;i<=tot;i++)
ans=ans+tmp[0][i];
ans=ans%mmod;
printf("%d\n",ans);
}

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航

【POJ 2778】DNA Sequence 中文题意&amp;题解&amp;代码（C++）

DNA Sequence

【POJ 2778】DNA Sequence 中文题意&题解&代码（C++）