Given a string s
, your task is to count the number of distinct substrings of s
. A substring is a contiguous sequence of characters within the string. The empty substring is not counted.
s
consisting of lowercase English letters.s
.Constraints:
1 <= s.length <= 10^5
s
contains only lowercase English letters.
The most direct way to solve this problem is to generate all possible substrings of s
, store them in a set (to ensure uniqueness), and finally return the size of the set. However, this brute-force approach is inefficient for large strings because the number of substrings grows quadratically with the length of the string, and storing all substrings uses a lot of memory.
To optimize, we need a way to efficiently count unique substrings without explicitly storing all of them. This leads us to consider advanced data structures like Suffix Tries, Suffix Trees, or Suffix Automata. These data structures allow us to represent all substrings compactly and count them efficiently.
Suffix Automaton is particularly suitable because it can be built in linear time and space, and it allows us to count the number of distinct substrings in O(n)
time, where n
is the length of the string.
We will use a Suffix Automaton to solve the problem efficiently.
This approach is optimal for large strings and is the standard method for problems involving counting unique substrings.
Let's consider the string s = "ababa"
.
The automaton avoids redundant storage and counts all unique substrings in a single pass.
O(n^2)
to generate all substrings, plus O(n^2)
for inserting them into a set.O(n^2)
for storing all substrings.O(n)
, since building the automaton and counting substrings are both linear in the length of s
.O(n)
, as the number of states and transitions is linear in n
.The Suffix Automaton is highly efficient for this problem, making it suitable for large input sizes.
class SuffixAutomaton:
def __init__(self):
self.states = [{'len': 0, 'link': -1, 'next': {}}]
self.last = 0
def extend(self, c):
p = self.last
curr = len(self.states)
self.states.append({'len': self.states[p]['len'] + 1, 'link': 0, 'next': {}})
while p != -1 and c not in self.states[p]['next']:
self.states[p]['next'][c] = curr
p = self.states[p]['link']
if p == -1:
self.states[curr]['link'] = 0
else:
q = self.states[p]['next'][c]
if self.states[p]['len'] + 1 == self.states[q]['len']:
self.states[curr]['link'] = q
else:
clone = len(self.states)
self.states.append({
'len': self.states[p]['len'] + 1,
'link': self.states[q]['link'],
'next': self.states[q]['next'].copy()
})
while p != -1 and self.states[p]['next'][c] == q:
self.states[p]['next'][c] = clone
p = self.states[p]['link']
self.states[q]['link'] = clone
self.states[curr]['link'] = clone
self.last = curr
def count_distinct_substrings(self):
res = 0
for state in self.states[1:]:
res += state['len'] - self.states[state['link']]['len']
return res
class Solution:
def countDistinctSubstrings(self, s: str) -> int:
sa = SuffixAutomaton()
for c in s:
sa.extend(c)
return sa.count_distinct_substrings()
#include <bits/stdc++.h>
using namespace std;
struct State {
int len, link;
unordered_map<char, int> next;
};
class SuffixAutomaton {
public:
vector<State> st;
int last;
SuffixAutomaton() {
st.push_back({0, -1, {}});
last = 0;
}
void extend(char c) {
int cur = st.size();
st.push_back({st[last].len + 1, 0, {}});
int p = last;
while (p != -1 && !st[p].next.count(c)) {
st[p].next[c] = cur;
p = st[p].link;
}
if (p == -1) {
st[cur].link = 0;
} else {
int q = st[p].next[c];
if (st[p].len + 1 == st[q].len) {
st[cur].link = q;
} else {
int clone = st.size();
st.push_back({st[p].len + 1, st[q].link, st[q].next});
while (p != -1 && st[p].next[c] == q) {
st[p].next[c] = clone;
p = st[p].link;
}
st[q].link = st[cur].link = clone;
}
}
last = cur;
}
long long count_distinct_substrings() {
long long res = 0;
for (int i = 1; i < st.size(); ++i) {
res += st[i].len - st[st[i].link].len;
}
return res;
}
};
class Solution {
public:
int countDistinctSubstrings(string s) {
SuffixAutomaton sa;
for (char c : s) sa.extend(c);
return sa.count_distinct_substrings();
}
};
import java.util.*;
class State {
int len, link;
Map<Character, Integer> next = new HashMap<>();
State(int len, int link) {
this.len = len;
this.link = link;
}
}
class SuffixAutomaton {
ArrayList<State> st = new ArrayList<>();
int last;
SuffixAutomaton() {
st.add(new State(0, -1));
last = 0;
}
void extend(char c) {
int cur = st.size();
st.add(new State(st.get(last).len + 1, 0));
int p = last;
while (p != -1 && !st.get(p).next.containsKey(c)) {
st.get(p).next.put(c, cur);
p = st.get(p).link;
}
if (p == -1) {
st.get(cur).link = 0;
} else {
int q = st.get(p).next.get(c);
if (st.get(p).len + 1 == st.get(q).len) {
st.get(cur).link = q;
} else {
int clone = st.size();
State cloneState = new State(st.get(p).len + 1, st.get(q).link);
cloneState.next.putAll(st.get(q).next);
st.add(cloneState);
while (p != -1 && st.get(p).next.get(c) == q) {
st.get(p).next.put(c, clone);
p = st.get(p).link;
}
st.get(q).link = st.get(cur).link = clone;
}
}
last = cur;
}
int countDistinctSubstrings() {
int res = 0;
for (int i = 1; i < st.size(); ++i) {
res += st.get(i).len - st.get(st.get(i).link).len;
}
return res;
}
}
class Solution {
public int countDistinctSubstrings(String s) {
SuffixAutomaton sa = new SuffixAutomaton();
for (char c : s.toCharArray()) sa.extend(c);
return sa.countDistinctSubstrings();
}
}
class SuffixAutomaton {
constructor() {
this.states = [{len: 0, link: -1, next: {}}];
this.last = 0;
}
extend(c) {
let p = this.last;
let curr = this.states.length;
this.states.push({len: this.states[p].len + 1, link: 0, next: {}});
while (p !== -1 && !(c in this.states[p].next)) {
this.states[p].next[c] = curr;
p = this.states[p].link;
}
if (p === -1) {
this.states[curr].link = 0;
} else {
let q = this.states[p].next[c];
if (this.states[p].len + 1 === this.states[q].len) {
this.states[curr].link = q;
} else {
let clone = this.states.length;
this.states.push({
len: this.states[p].len + 1,
link: this.states[q].link,
next: {...this.states[q].next}
});
while (p !== -1 && this.states[p].next[c] === q) {
this.states[p].next[c] = clone;
p = this.states[p].link;
}
this.states[q].link = this.states[curr].link = clone;
}
}
this.last = curr;
}
countDistinctSubstrings() {
let res = 0;
for (let i = 1; i < this.states.length; ++i) {
res += this.states[i].len - this.states[this.states[i].link].len;
}
return res;
}
}
var countDistinctSubstrings = function(s) {
let sa = new SuffixAutomaton();
for (let c of s) sa.extend(c);
return sa.countDistinctSubstrings();
};
To count the number of distinct substrings in a string efficiently, we use a Suffix Automaton. This data structure allows us to represent all unique substrings compactly and count them in linear time. By tracking the length differences between states and their suffix links, we can sum the number of new substrings each state introduces. This approach is both elegant and efficient, making it suitable for large strings and a great example of how advanced data structures can optimize seemingly brute-force problems.