|
@@ -1,57 +1,63 @@
|
|
|
-// Suffix array construction in O(L log^2 L) time. Routine for
|
|
|
-// computing the length of the longest common prefix of any two
|
|
|
-// suffixes in O(log L) time.
|
|
|
-// INPUT: string s
|
|
|
-// OUTPUT: array suffix[] such that suffix[i] = index (from 0 to L-1)
|
|
|
-// of substring s[i...L-1] in the list of sorted suffixes.
|
|
|
-// That is, if we take the inverse of the permutation suffix[],
|
|
|
-// we get the actual suffix array.
|
|
|
+// Routine for computing the length of the longest common prefix of any two
|
|
|
+// suffixes
|
|
|
+const int maxn = 1000;
|
|
|
struct SuffixArray {
|
|
|
- const int L;
|
|
|
- string s;
|
|
|
- VVI P;
|
|
|
- vector<pair<PII,int> > M;
|
|
|
- SuffixArray(const string &s) : L(s.length()), s(s), P(1, VI(L, 0)), M(L) {
|
|
|
- for(int i = 0; i < L; i++)
|
|
|
- P[0][i] = int(s[i]);
|
|
|
- for(int skip = 1, level = 1; skip < L; skip *= 2, level++) {
|
|
|
- P.push_back(VI(L, 0));
|
|
|
- for(int i = 0; i < L; i++)
|
|
|
- M[i] = make_pair(make_pair(P[level-1][i], i + skip < L ? P[level-1][i + skip] : -1000), i);
|
|
|
- sort(M.begin(), M.end());
|
|
|
- for(int i = 0; i < L; i++)
|
|
|
- P[level][M[i].second] = (i > 0 && M[i].first == M[i-1].first) ? P[level][M[i-1].second] : i;
|
|
|
+ char s[maxn]; //original string, the last character must be 0 and there is no 0 before.
|
|
|
+ int sa[maxn]; //sa[i] = the index of the i-th smallest suffix.
|
|
|
+ int rrank[maxn]; //rank[i] = rank of the suffix of index i. rank[0] must be n-1.
|
|
|
+ int height[maxn]; // height[i] = The length of the longest common prefix of sa[i-1] and sa[i]
|
|
|
+ int t[maxn], t2[maxn], c[maxn]; // aux
|
|
|
+ int n; // the number of character.
|
|
|
+ void clear() {
|
|
|
+ n = 0;
|
|
|
+ memset(sa, 0, sizeof(sa));
|
|
|
+ memset(t, 0 , sizeof(t));
|
|
|
+ memset(t2, 0 , sizeof(t2));
|
|
|
+ }
|
|
|
+ // m = max(char) + 1;
|
|
|
+ //Build the suffix array of string s, every character should be between 0 and m-1.
|
|
|
+ // initialiser s and n before calling this function.
|
|
|
+ void build_sa(int m) {
|
|
|
+ int i, *x = t, *y = t2;
|
|
|
+ for(i = 0; i < m; i++) c[i] = 0;
|
|
|
+ for(i = 0; i < n; i++) c[x[i] = s[i]]++;
|
|
|
+ for(i = 1; i < m; i++) c[i] += c[i-1];
|
|
|
+ for(i = n-1; i >= 0; i--) sa[--c[x[i]]] = i;
|
|
|
+ for(int k = 1; k <= n; k <<= 1) {
|
|
|
+ int p = 0;
|
|
|
+ for(i = n-k; i < n; i++) y[p++] = i;
|
|
|
+ for(i = 0; i < n; i++) if(sa[i] >= k) y[p++] = sa[i]-k;
|
|
|
+ for(i = 0; i < m; i++) c[i] = 0;
|
|
|
+ for(i = 0; i < n; i++) c[x[y[i]]]++;
|
|
|
+ for(i = 0; i < m; i++) c[i] += c[i-1];
|
|
|
+ for(i = n-1; i >= 0; i--) sa[--c[x[y[i]]]] = y[i];
|
|
|
+ swap(x, y);
|
|
|
+ p = 1; x[sa[0]] = 0;
|
|
|
+ for(i = 1; i < n; i++)
|
|
|
+ x[sa[i]] = y[sa[i-1]]==y[sa[i]] && y[sa[i-1]+k]==y[sa[i]+k] ? p-1 : p++;
|
|
|
+ if(p >= n) break;
|
|
|
+ m = p;
|
|
|
}
|
|
|
- }
|
|
|
- VI GetSuffixArray() { return P.back(); }
|
|
|
- // returns the length of the longest common prefix of s[i...L-1] and s[j...L-1]
|
|
|
- int LongestCommonPrefix(int i, int j) {
|
|
|
- int len = 0;
|
|
|
- if(i == j) return L - i;
|
|
|
- for(int k = P.size() - 1; k >= 0 && i < L && j < L; k--) {
|
|
|
- if (P[k][i] == P[k][j]) {
|
|
|
- i += 1 << k;
|
|
|
- j += 1 << k;
|
|
|
- len += 1 << k;
|
|
|
- }
|
|
|
- }
|
|
|
- return len;
|
|
|
- }
|
|
|
+ }
|
|
|
+ void build_height() {
|
|
|
+ int i, k = 0;
|
|
|
+ for(i = 0; i < n; i++) rrank[sa[i]] = i;
|
|
|
+ for(i = 0; i < n; i++) {
|
|
|
+ if(k) k--;
|
|
|
+ int j = sa[rrank[i]-1];
|
|
|
+ while(s[i+k] == s[j+k]) k++;
|
|
|
+ height[rrank[i]] = k;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ //The LCP of any two suffx = RMQ(height[i+1],...,height[j])
|
|
|
};
|
|
|
-int test() {
|
|
|
- // bobocel is the 0'th suffix
|
|
|
- // obocel is the 5'th suffix
|
|
|
- // bocel is the 1'st suffix
|
|
|
- // ocel is the 6'th suffix
|
|
|
- // cel is the 2'nd suffix
|
|
|
- // el is the 3'rd suffix
|
|
|
- // l is the 4'th suffix
|
|
|
- SuffixArray suffix("bobocel");
|
|
|
- VI v = suffix.GetSuffixArray();
|
|
|
- // Expected output: 0 5 1 6 2 3 4
|
|
|
- // 2
|
|
|
- for(int i = 0; i < v.size(); i++)
|
|
|
- cout << v[i] << " ";
|
|
|
- cout << endl;
|
|
|
- cout << suffix.LongestCommonPrefix(0, 2) << endl;
|
|
|
+void test() {
|
|
|
+ SuffixArray SA;
|
|
|
+ strcpy(SA.s, "1122330");
|
|
|
+ SA.clear();
|
|
|
+ SA.n = strlen(SA.s);
|
|
|
+ SA.build_sa(100);
|
|
|
+ SA.build_height();
|
|
|
+ for (int i = 0; i < SA.n; i++)
|
|
|
+ cout << i << " " << SA.s[i] << " " << SA.sa[i] << " " << SA.rrank[i] << endl;
|
|
|
}
|