SuffixArray.cc 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970
  1. // Suffix array construction in O(L log^2 L) time. Routine for
  2. // computing the length of the longest common prefix of any two
  3. // suffixes in O(log L) time.
  4. //
  5. // INPUT: string s
  6. //
  7. // OUTPUT: array suffix[] such that suffix[i] = index (from 0 to L-1)
  8. // of substring s[i...L-1] in the list of sorted suffixes.
  9. // That is, if we take the inverse of the permutation suffix[],
  10. // we get the actual suffix array.
  11. #include <vector>
  12. #include <iostream>
  13. #include <string>
  14. using namespace std;
  15. struct SuffixArray {
  16. const int L;
  17. string s;
  18. vector<vector<int> > P;
  19. vector<pair<pair<int,int>,int> > M;
  20. SuffixArray(const string &s) : L(s.length()), s(s), P(1, vector<int>(L, 0)), M(L) {
  21. for (int i = 0; i < L; i++) P[0][i] = int(s[i]);
  22. for (int skip = 1, level = 1; skip < L; skip *= 2, level++) {
  23. P.push_back(vector<int>(L, 0));
  24. for (int i = 0; i < L; i++)
  25. M[i] = make_pair(make_pair(P[level-1][i], i + skip < L ? P[level-1][i + skip] : -1000), i);
  26. sort(M.begin(), M.end());
  27. for (int i = 0; i < L; i++)
  28. P[level][M[i].second] = (i > 0 && M[i].first == M[i-1].first) ? P[level][M[i-1].second] : i;
  29. }
  30. }
  31. vector<int> GetSuffixArray() { return P.back(); }
  32. // returns the length of the longest common prefix of s[i...L-1] and s[j...L-1]
  33. int LongestCommonPrefix(int i, int j) {
  34. int len = 0;
  35. if (i == j) return L - i;
  36. for (int k = P.size() - 1; k >= 0 && i < L && j < L; k--) {
  37. if (P[k][i] == P[k][j]) {
  38. i += 1 << k;
  39. j += 1 << k;
  40. len += 1 << k;
  41. }
  42. }
  43. return len;
  44. }
  45. };
  46. int main() {
  47. // bobocel is the 0'th suffix
  48. // obocel is the 5'th suffix
  49. // bocel is the 1'st suffix
  50. // ocel is the 6'th suffix
  51. // cel is the 2'nd suffix
  52. // el is the 3'rd suffix
  53. // l is the 4'th suffix
  54. SuffixArray suffix("bobocel");
  55. vector<int> v = suffix.GetSuffixArray();
  56. // Expected output: 0 5 1 6 2 3 4
  57. // 2
  58. for (int i = 0; i < v.size(); i++) cout << v[i] << " ";
  59. cout << endl;
  60. cout << suffix.LongestCommonPrefix(0, 2) << endl;
  61. }