后缀自动机相关

Posted by WHZ0325 on 2023-07-16, Viewed times

P3804 后缀自动机

基数排序得到 endpos 大小。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#include <cstdio>
#include <cstring>
#include <algorithm>
#define N 1000005
typedef long long int ll;
char s[N]; int len[N << 1], fa[N << 1], ch[N << 1][26], tot, lst;
inline void extend(char c) {
int p = lst, np = ++tot; len[np] = len[p] + 1; lst = np;
while(~p && !ch[p][c]) ch[p][c] = np, p = fa[p];
if(~p) {
int q = ch[p][c];
if(len[p] + 1 == len[q]) fa[np] = q;
else {
int nq = ++tot; memcpy(ch[nq], ch[q], sizeof(ch[q]));
len[nq] = len[p] + 1; fa[nq] = fa[q]; fa[q] = fa[np] = nq;
while(~p && ch[p][c] == q) ch[p][c] = nq, p = fa[p];
}
}
}
int cx[N << 1], c[N], res[N << 1];
int main() {
scanf("%s", s); int n = strlen(s); fa[0] = -1;
for(int i = 0; i < n; ++i) { extend(s[i] - 'a'); ++cx[lst]; }
for(int i = 0; i <= tot; ++i) ++c[len[i]];
for(int i = 1; i <= n; ++i) c[i] += c[i - 1];
for(int i = 0; i <= tot; ++i) res[--c[len[i]]] = i;
ll ans = 0;
for(int i = tot; i >= 1; --i) {
cx[fa[res[i]]] += cx[res[i]];
if(cx[res[i]] > 1) {
ans = std::max(ans, (ll)cx[res[i]] * len[res[i]]);
}
}
printf("%lld\n", ans);
return 0;
}

DISUBSTR

本质不同子串数,len[i] - len[fa[i]] 求和即可。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#include <cstdio>
#include <cstring>
#include <map>
#define N 1005
char s[N];
int fa[N << 1], len[N << 1], tot, lst; std::map<char, int> ch[N << 1];
inline void extend(char c) {
int p = lst, np = ++tot; len[np] = len[p] + 1; lst = np;
while(~p && ch[p].find(c) == ch[p].end()) ch[p][c] = np, p = fa[p];
if(~p) {
int q = ch[p][c];
if(len[p] + 1 == len[q]) fa[np] = q;
else {
int nq = ++tot; ch[nq] = ch[q];
len[nq] = len[p] + 1; fa[nq] = fa[q]; fa[q] = fa[np] = nq;
while(~p && ch[p][c] == q) ch[p][c] = nq, p = fa[p];
}
}
}
int main() {
int t; scanf("%d", &t);
while(t--) {
scanf("%s", s); int n = strlen(s);
tot = lst = 0; fa[0] = -1; len[0] = 0;
for(int i = 0; i < n; ++i) extend(s[i]);
int ans = 0;
for(int i = 1; i <= tot; ++i) {
ans += len[i] - len[fa[i]];
}
printf("%d\n", ans);
for(int i = 0; i <= tot; ++i) {
fa[i] = 0; len[i] = 0; ch[i].clear();
}
}
return 0;
}

LCS

两个字符串的最长公共子串,对一个串建后缀自动机,另一个串在上面跑,有边就走,同时累加匹配的长度,没有边就沿 link 往上跳,这时匹配的长度更新为 res = min(len[cur], res),由于匹配串每个字符只会沿边走一次将匹配长度加一,沿 link 上跳一次至少将匹配长度减一,和双指针差不多的证法得到时间复杂度是线性的。

注意跳到 -1 时要将匹配长度手动置 0,且将指针指向后缀自动机的根节点处。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#include <cstdio>
#include <cstring>
#include <algorithm>
#define N 250005
int fa[N << 1], ch[N << 1][26], len[N << 1], tot, lst;
inline void extend(char c) {
int p = lst, np = ++tot; len[np] = len[p] + 1; lst = np;
while(~p && !ch[p][c]) ch[p][c] = np, p = fa[p];
if(~p) {
int q = ch[p][c];
if(len[p] + 1 == len[q]) fa[np] = q;
else {
int nq = ++tot; memcpy(ch[nq], ch[q], sizeof(ch[q]));
len[nq] = len[p] + 1; fa[nq] = fa[q]; fa[q] = fa[np] = nq;
while(~p && ch[p][c] == q) ch[p][c] = nq, p = fa[p];
}
}
}
char s[N];
int main() {
scanf("%s", s); int n = strlen(s); fa[0] = -1;
for(int i = 0; i < n; ++i) extend(s[i] - 'a');
scanf("%s", s); n = strlen(s);
int ans = 0, res = 0, cur = 0;
for(int i = 0; i < n; ++i) {
while(~cur && !ch[cur][s[i] - 'a']) cur = fa[cur];
if(~cur) {
res = std::min(len[cur], res);
++res; cur = ch[cur][s[i] - 'a'];
ans = std::max(ans, res);
}
else res = cur = 0;
}
printf("%d\n", ans);
return 0;
}

LCS2

多个字符串的最长公共子串,先对第一个字符串建后缀自动机,再将其它串放在上面跑。对于后缀自动机上的每一个节点,记录每次经过时匹配的长度的最大值,对各个字符串的最大值取最小值即得公共部分,结果取所有节点的最大值。对于一个节点,其后缀链接树上子树中所有节点的匹配它自然也能够匹配(它是它们的后缀),所以需要一次基数排序来将子树的结果累计进来。需要注意的是,累计的过程中应避免超过它的 len 值导致出错,即 res[fa[i]] = std::max(std::min(len[fa[i]], res[fa[i]]), res[i])

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#include <cstdio>
#include <cstring>
#include <algorithm>
#define N 100005
int fa[N << 1], ch[N << 1][26], len[N << 1], tot, lst;
inline void extend(char c) {
int p = lst, np = ++tot; len[np] = len[p] + 1; lst = np;
while(~p && !ch[p][c]) ch[p][c] = np, p = fa[p];
if(~p) {
int q = ch[p][c];
if(len[p] + 1 == len[q]) fa[np] = q;
else {
int nq = ++tot; memcpy(ch[nq], ch[q], sizeof(ch[q]));
len[nq] = len[p] + 1; fa[nq] = fa[q]; fa[q] = fa[np] = nq;
while(~p && ch[p][c] == q) ch[p][c] = nq, p = fa[p];
}
}
}
char s[N]; int ans[N << 1], res[N << 1], c[N], order[N << 1];
int main() {
scanf("%s", s); int ln = strlen(s); fa[0] = -1;
for(int i = 0; i < ln; ++i) extend(s[i] - 'a');
for(int i = 0; i <= tot; ++i) ++c[len[i]];
for(int i = 1; i <= ln; ++i) c[i] += c[i - 1];
for(int i = 0; i <= tot; ++i) order[--c[len[i]]] = i;
for(int i = 0; i <= tot; ++i) ans[i] = -1;
while(scanf("%s", s) == 1) {
int n = strlen(s); int cur = 0, p = 0;
for(int i = 0; i <= tot; ++i) res[i] = 0;
for(int i = 0; i < n; ++i) {
while(~p && !ch[p][s[i] - 'a']) {
p = fa[p];
if(~p) {
cur = std::min(cur, len[p]);
res[p] = std::max(res[p], cur);
}
}
if(~p) {
p = ch[p][s[i] - 'a'];
res[p] = std::max(res[p], ++cur);
}
else cur = p = 0;
}
for(int i = tot; i >= 1; --i) res[fa[i]] = std::max(std::min(len[fa[i]], res[fa[i]]), res[i]);
for(int i = 1; i <= tot; ++i) ans[i] = (~ans[i]) ? std::min(ans[i], res[i]) : res[i];
}
int out = 0;
for(int i = 1; i <= tot; ++i) out = std::max(out, ans[i]);
printf("%d\n", out);
return 0;
}

NSUBSTR

求一个字符串长度分别为 $1\dots n$ 的各个子串的最大出现次数。注意到较短长度子串最大出现次数一定不会小于较长长度子串最大出现次数,因为较长子串的一部分就是一个较短子串。还是用基数排序的方法算出每个节点的 endpos 大小统计即可,不用写线段树,时间复杂度是线性的。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
#include <cstdio>
#include <cstring>
#include <algorithm>
#define N 250005
int fa[N << 1], ch[N << 1][26], len[N << 1], tot, lst;
inline void extend(char c) {
int p = lst, np = ++tot; len[np] = len[p] + 1; lst = np;
while(~p && !ch[p][c]) ch[p][c] = np, p = fa[p];
if(~p) {
int q = ch[p][c];
if(len[p] + 1 == len[q]) fa[np] = q;
else {
int nq = ++tot; memcpy(ch[nq], ch[q], sizeof(ch[q]));
len[nq] = len[p] + 1; fa[nq] = fa[q]; fa[q] = fa[np] = nq;
while(~p && ch[p][c] == q) ch[p][c] = nq, p = fa[p];
}
}
}
char s[N]; int ans[N], cx[N << 1], c[N], order[N << 1];
int main() {
scanf("%s", s); int n = strlen(s); fa[0] = -1;
for(int i = 0; i < n; ++i) { extend(s[i] - 'a'); ++cx[lst]; }
for(int i = 0; i <= tot; ++i) ++c[len[i]];
for(int i = 1; i <= n; ++i) c[i] += c[i - 1];
for(int i = 0; i <= tot; ++i) order[--c[len[i]]] = i;
for(int i = tot; i >= 1; --i) cx[fa[order[i]]] += cx[order[i]];
for(int i = 0; i <= tot; ++i) ans[len[i]] = std::max(ans[len[i]], cx[i]);
for(int i = n - 1; i >= 0; --i) ans[i] = std::max(ans[i], ans[i + 1]);
for(int i = 1; i <= n; ++i) printf("%d\n", ans[i]);
return 0;
}