您的位置:首页 > 其它

hdu 4622 求区间不同子串数 后缀数组|后缀自动机|字符串hash

2016-09-01 10:59 483 查看
题意:

给你一个长度不超过2000只由小写字母组成的字符串。然后q个询问q不超过10000.每个询问询问一个区间[l,r]内有多少不同的子串。

后缀数组

分析:

这题的做法和论文那题的思想是一样的,就是对于[l,r]区间里的后缀,当一个串i加进去对答案的贡献必须减掉它和之前所有串重复的部分。也就是和它lcp最大的那个,所以答案就是:

所有的串-重复的串。(当然这里的串都是[l,r]区间里的)

我们用一个pos数组进行记录l到r区间出现的后缀,赋初值为-1,pos[i]表示排名第i的后缀是谁,与sa的意义相同,但这里我们只要l到r之间的 后缀,所以对其他赋值为-1。然后按名次从1到n扫描下来,如果pos[i] == -1那么表示该名次下的后缀并不在[l,r]区间中,那么不做处理。否则就做类似论文题的方法进行处理。但这里要注意一个问题,对于加进来的一个在 [l,r]后缀i,我们能获得的新的不同的前缀(即要获得的子串)个数为n-sa[i]+1-d,其中d并不是上面的height[i]了,因为对于 height[i],有可能它的长度已经超过r-sa[i]+1(这是对于i后缀,能提供的最长长度)。所以d应该是对于i之前所有的加进来的后缀j,取 max( min(lcp(j,i),min(r-j+1,r-i+1)) )。当然我们不能每次都枚举j,但我们只要每次都更新下这个d就好了。

以上分析copy自:http://blog.csdn.net/no__stop/article/details/9669325

而这题的关键也就是怎么去维护i串和之前加进来的串j之间最大的lcp(i,j)

看到网上有好几种方法:

1.http://www.cnblogs.com/Lyush/archive/2013/08/02/3233573.html

2.http://blog.csdn.net/u012936765/article/details/44162749

3.http://blog.csdn.net/bossup/article/details/40191095 这种方法我还没看明白QAQ

#include<cstdio>
#include<cstring>
#include<algorithm>
#include<iostream>
using namespace std;
#define rep(i,s,t) for(int i=(s);i<(t);i++)
#define per(i,t,s) for(int i=(t);i>=(s);i--)

const int INF = 1e9 + 9;
const int N = 2000 + 9;

/********************倍增算法*后缀数组模板*******************************/

int sa
, t1
, t2
, c
, rk
, height
;
void build_sa (int s[], int n, int m) {
int i, k, p, *x = t1, *y = t2;
for (i = 0; i < m; i++) c[i] = 0;
for (i = 0; i < n; i++) c[x[i] = s[i]]++;
for (i = 1; i < m; i++) c[i] += c[i - 1];
for (i = n - 1; i >= 0; i--) sa[--c[x[i]]] = i;
for (k = 1; k <= n; k <<= 1) {
p = 0;
for (i = n - k; i < n; i++) y[p++] = i;
for (i = 0; i < n; i++) if (sa[i] >= k) y[p++] = sa[i] - k;

for (i = 0; i < m; i++) c[i] = 0;
for (i = 0; i < n; i++) c[x[y[i]]]++;
for (i = 1; i < m; i++) c[i] += c[i - 1];
for (i = n - 1; i >= 0; i--) sa[--c[x[y[i]]]] = y[i];
swap (x, y);
p = 1;
x[sa[0]] = 0;
for (i = 1; i < n; i++)
x[sa[i]] = y[sa[i - 1]] == y[sa[i]] && y[sa[i - 1] + k] == y[sa[i] + k] ? p - 1 : p ++;
if (p >= n) break;
m = p;
}
}
void getHeight (int s[], int n) {
int i, j, k = 0;
for (i = 0; i <= n; i++) rk[sa[i]] = i;
for (i = 0; i < n; i++) {
if (k) k--;
j = sa[rk[i] - 1];
while (s[i + k] == s[j + k]) k++;
height[rk[i]] = k;
}
}
/********************************************************************************/

int d
[20];
void init (int n, int A[]) {
//int n=A.size();
for (int i = 1; i <= n; i++) d[i][0] = A[i];
for (int j = 1; (1 << j) <= n + 1; j++)
for (int i = 0; i + (1 << j) - 1 <= n; i++)
d[i][j] = min (d[i][j - 1], d[i + (1 << (j - 1) )][j - 1]);
}
int query (int L, int R) {
int k = 0;
while ( (1 << (k + 1) ) <= R - L + 1) k++;
return min (d[L][k], d[R - (1 << k) + 1][k]);
}

int s
, pos
;
char str
;
int main() {
//freopen ("f.txt", "r", stdin);
int T, Q;
scanf ("%d", &T);
while (T--) {
scanf ("%s", str);
int n = strlen (str);
rep (i, 0, n) s[i] = str[i];
s
= 0;
build_sa (s, n + 1, 128);
getHeight (s, n);
init (n, height);
//for(int i=1;i<=n;i++)printf("%d ",sa[i]);printf("\n");
//for(int i=1;i<=n;i++)printf("%d ",height[i]);printf("\n");
scanf ("%d", &Q);
while (Q--) {
int l, r;
scanf ("%d%d", &l, &r);
int ans = (r - l + 1) * (r - l + 2) / 2;
for (int i = 0; i <= n; i++) pos[i] = -1;
for (int i = l; i <= r; i++) pos[rk[i - 1]] = i - 1;
int last = -1, lcp = 0;
for (int i = 1; i <= n; i++) {
if (pos[i] != -1) {
if (last != -1) {
int t = query (rk[last] + 1, rk[pos[i]]);
ans -= min (r - pos[i], min (r - last, t) );
if (r - last >= r - pos[i] && t >= r - pos[i]);
else last = pos[i];
/*
               lcp = min (lcp, t);
 lcp = max (lcp, min (t, r - last) );
*/ans -= min (lcp, r - pos[i]);
} else last = pos[i];
}
}
printf ("%d\n", ans);
}
}
return 0;
}
/*

Sample Input
2
bbaba
5
3 4
2 2
2 5
2 4
1 4
baaba
5
3 3
3 4
1 4
3 5
5 5
Sample Output
3
1
7
5
8
1
3
8
5
1
Hint
I won't do anything against hash because I am nice.Of course this problem has a solution that don't rely on hash.
*/


后缀自动机还不理解,留坑待补~~

字符串Hash也可以解决这道题:

分析:

做法是把每个字符串映射成一个整数,然后递推求出不同的整数个数,判断不同的整数使用Hash,如果用map会超时。

#include<cstdio>
#include<cstring>
#include<algorithm>
using namespace std;
typedef long long ll;
typedef unsigned long long ull;

const int HASH = 10007;
const int N = 2016;
/***********************Hash模板**************************/
struct HASHMAP {
int head[HASH], nex
, cnt;
unsigned long long state
;
int f
;
void init() {
cnt = 0;
memset (head, -1, sizeof (head) );
}
int insert (unsigned long long val, int id) {
int h = val % HASH;
for (int i = head[h]; ~i; i = nex[i])
if (val == state[i]) {
int tmp = f[i];
f[i] = id;
return tmp;
}
f[cnt] = id;
state[cnt] = val;
nex[cnt] = head[h];
head[h] = cnt++;
return 0;
}
} Hash;
/***************************************************************/
const int XX = 123;
unsigned long long xp
, H
;
char str
;
int ans

;
int main() {
//freopen ("f.txt", "r", stdin);
xp[0] = 1;
for (int i = 1; i < N; i++) xp[i] = xp[i - 1] * XX;
int T, Q;
scanf ("%d", &T);
while (T--) {
scanf ("%s", str);
int n = strlen (str);
H[0] = 0;
for (int i = 1; i <= n; i++)
H[i] = H[i - 1] * XX + str[i - 1];
memset (ans, 0, sizeof (ans) );
for (int L = 1; L <= n; L++) {
Hash.init();
for (int i = 1; i + L - 1 <= n; i++) {
int pos = Hash.insert (H[i + L - 1] - H[i - 1] * xp[L], i);
ans[i][i + L - 1]++;
if (pos) ans[pos][i + L - 1]--;
}
}
for (int i = n; i >= 1; i--)
for (int j = 1; j <= n; j++)
ans[i][j] += ans[i + 1][j] + ans[i][j - 1] - ans[i + 1][j - 1];
scanf ("%d", &Q);
while (Q--) {
int l, r;
scanf ("%d%d", &l, &r);
printf ("%d\n", ans[l][r]);
}
}
return 0;
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: 
相关文章推荐