Competitive Programming: SPOJ – Distinct Substrings

 

Given a string, we need to find the total number of its distinct substrings.

Input

T- number of test cases. T<=20;
Each test case consists of one string, whose length is <= 1000

Output

For each test case output one number saying the number of distinct substrings.

Example

Sample Input:
2
CCCCC
ABABA

Sample Output:
5
9

Explanation for the testcase with string ABABA:
len=1 : A,B
len=2 : AB,BA
len=3 : ABA,BAB
len=4 : ABAB,BABA
len=5 : ABABA
Thus, total number of distinct substrings is 9.

Link to problem

Solution below . . .

Solution

Suffix array and longest common prefix (LCP) array. The link has a detailed description of the data structures and how to use them to solve the distinct substrings problem (see Problem 4).

The link notes that the problem can also be solved by building a suffix trie and counting the nodes. Building a suffix trie is O(n^2). For this problem, we have strings of up to 1,000 characters. Using 1 million operations per second as a ballpark limit, we’re not going to be able to build the suffix trie in the given time limit of 0.5 seconds.

import java.util.Arrays;
import java.util.Scanner;

class Suffix implements Comparable<Suffix> {
  int index;
  int rank[];

  Suffix() {
    rank = new int[2];
  }

  @Override
  public int compareTo(Suffix o) {
    return (rank[0] == o.rank[0]) ? rank[1] - o.rank[1]
        : rank[0] - o.rank[0];
  }
}


class Main {

  static int[] buildSuffixArray(String s, int n) {
    Suffix suffixes[] = new Suffix[n];

    for (int i = 0; i < n; i++) {
      suffixes[i] = new Suffix();
      suffixes[i].index = i;
      suffixes[i].rank[0] = s.charAt(i);
      suffixes[i].rank[1] = ((i + 1) < n) ? (s.charAt(i + 1)) : -1;
    }

    Arrays.sort(suffixes);

    int ind[] = new int[n];

    for (int k = 4; k < 2 * n; k = k * 2) {
      int rank = 0;
      int prevRank = suffixes[0].rank[0];

      suffixes[0].rank[0] = rank;
      ind[suffixes[0].index] = 0;

      for (int i = 1; i < n; i++) {
        if (suffixes[i].rank[0] == prevRank
            && suffixes[i].rank[1] == suffixes[i - 1].rank[1]) {
          prevRank = suffixes[i].rank[0];
          suffixes[i].rank[0] = rank;
        }

        else {
          prevRank = suffixes[i].rank[0];
          suffixes[i].rank[0] = ++rank;
        }
        ind[suffixes[i].index] = i;
      }

      for (int i = 0; i < n; i++) {
        int nextindex = suffixes[i].index + k / 2;
        suffixes[i].rank[1] =
            (nextindex < n) ? suffixes[ind[nextindex]].rank[0] : -1;
      }

      Arrays.sort(suffixes);
    }

    int suffixArr[] = new int[n];
    for (int i = 0; i < n; i++)
      suffixArr[i] = suffixes[i].index;

    return suffixArr;
  }

  static int[] buildLCPArray(String s, int[] suffixArr) {
    int n = suffixArr.length;

    int lcp[] = new int[n];
    int invSuff[] = new int[n];

    for (int i = 0; i < n; i++)
      invSuff[suffixArr[i]] = i;

    int k = 0;

    for (int i = 0; i < n; i++) {
      if (invSuff[i] == n - 1) {
        k = 0;
        continue;
      }

      int j = suffixArr[invSuff[i] + 1];

      while (i + k < n && j + k < n && s.charAt(i + k) == s.charAt(j + k))
        k++;

      lcp[invSuff[i]] = k;

      if (k > 0)
        k--;
    }

    return lcp;
  }

  static int countDistinctSubstrings(String s) {
    int n = s.length();

    int suffixArr[] = buildSuffixArray(s, n);
    int lcp[] = buildLCPArray(s, suffixArr);

    int result = n - suffixArr[0];

    for (int i = 1; i < lcp.length; i++) {
      result += (n - suffixArr[i]) - lcp[i - 1];
    }

    return result;
  }

  public static void main(String[] args) {

    Scanner sc = new Scanner(System.in);

    int T = sc.nextInt();

    while (T-- > 0) {
      String s = sc.next();

      System.out.println(countDistinctSubstrings(s));
    }
    sc.close();
  }

}

Leave a Reply

Your email address will not be published. Required fields are marked *