class Solution:
def longestDupSubstring(self, S: str) -> str:
def search(L):
MOD = 2**63 - 1
base = 256
n = len(S)
curr = 0
for i in range(L):
curr = (curr * base + ord(S[i])) % MOD
seen = {curr}
baseL = pow(base, L, MOD)
for i in range(1, n - L + 1):
curr = (curr * base - ord(S[i - 1]) * baseL + ord(S[i + L - 1])) % MOD
if curr in seen:
return i
seen.add(curr)
return -1
left, right = 1, len(S)
res = ""
while left <= right:
mid = (left + right) // 2
idx = search(mid)
if idx != -1:
res = S[idx:idx + mid]
left = mid + 1
else:
right = mid - 1
return res
class Solution {
public:
string longestDupSubstring(string S) {
int n = S.size();
int left = 1, right = n, start = -1;
string res = "";
auto search = [&](int L) {
long long MOD = (1LL << 63) - 1;
long long base = 256, curr = 0;
unordered_set<long long> seen;
for (int i = 0; i < L; ++i)
curr = (curr * base + S[i]) % MOD;
seen.insert(curr);
long long baseL = 1;
for (int i = 0; i < L; ++i) baseL = (baseL * base) % MOD;
for (int i = 1; i <= n - L; ++i) {
curr = (curr * base - S[i - 1] * baseL % MOD + MOD) % MOD;
curr = (curr + S[i + L - 1]) % MOD;
if (seen.count(curr)) return i;
seen.insert(curr);
}
return -1;
};
while (left <= right) {
int mid = (left + right) / 2;
int idx = search(mid);
if (idx != -1) {
res = S.substr(idx, mid);
left = mid + 1;
} else {
right = mid - 1;
}
}
return res;
}
};
class Solution {
public String longestDupSubstring(String S) {
int n = S.length();
int left = 1, right = n;
String res = "";
while (left <= right) {
int mid = (left + right) / 2;
int idx = search(S, mid);
if (idx != -1) {
res = S.substring(idx, idx + mid);
left = mid + 1;
} else {
right = mid - 1;
}
}
return res;
}
private int search(String S, int L) {
long MOD = (1L << 63) - 1;
long base = 256, curr = 0;
int n = S.length();
for (int i = 0; i < L; ++i)
curr = (curr * base + S.charAt(i)) % MOD;
Set<Long> seen = new HashSet<>();
seen.add(curr);
long baseL = 1;
for (int i = 0; i < L; ++i) baseL = (baseL * base) % MOD;
for (int i = 1; i <= n - L; ++i) {
curr = (curr * base - S.charAt(i - 1) * baseL % MOD + MOD) % MOD;
curr = (curr + S.charAt(i + L - 1)) % MOD;
if (seen.contains(curr)) return i;
seen.add(curr);
}
return -1;
}
}
var longestDupSubstring = function(S) {
const n = S.length;
let left = 1, right = n, res = '';
const search = (L) => {
const MOD = 2 ** 63 - 1;
const base = 256;
let curr = 0;
for (let i = 0; i < L; ++i)
curr = (curr * base + S.charCodeAt(i)) % MOD;
const seen = new Set([curr]);
let baseL = 1;
for (let i = 0; i < L; ++i) baseL = (baseL * base) % MOD;
for (let i = 1; i <= n - L; ++i) {
curr = (curr * base - S.charCodeAt(i - 1) * baseL % MOD + MOD) % MOD;
curr = (curr + S.charCodeAt(i + L - 1)) % MOD;
if (seen.has(curr)) return i;
seen.add(curr);
}
return -1;
};
while (left <= right) {
const mid = Math.floor((left + right) / 2);
const idx = search(mid);
if (idx !== -1) {
res = S.substring(idx, idx + mid);
left = mid + 1;
} else {
right = mid - 1;
}
}
return res;
};
Given a string S
, find the longest substring that occurs at least twice in S
. The substrings may overlap. If there is no such substring, return an empty string. The input string consists of lowercase English letters.
S
(string)1 <= S.length <= 3 * 10^4
The problem asks for the longest substring that appears more than once in a given string. The naive approach is to check all possible substrings and see if any repeats, but this quickly becomes infeasible as the string grows longer, due to the huge number of substrings.
To optimize, we need a way to efficiently check for duplicate substrings of a given length. This naturally leads to using hashing (like Rabin-Karp) for substring comparison, combined with binary search to find the maximum length efficiently.
The key insight is: if we can efficiently check whether a duplicate substring of length L
exists, we can use binary search to find the largest such L
.
L
, we check if any substring of that length appears more than once.L
, we use a rolling hash to compute hash values for all substrings of length L
.This approach is both efficient and scalable, handling large input sizes within reasonable time.
Let's consider S = "banana"
.
O(N^3)
(comparing all pairs of substrings)O(1)
(no extra data structures)O(N \log N)
for binary search times O(N)
for hashing each length, so O(N \log N)
overall.O(N)
for storing hashes.This makes the optimized approach practical for large strings.
To find the longest duplicate substring, we combine binary search (to guess the length) with efficient substring comparison using rolling hashes (Rabin-Karp). This avoids the inefficiency of brute-force substring comparison and leverages hash sets for fast duplicate detection. The result is an elegant and scalable solution that efficiently finds the answer even for large inputs.