from collections import Counter
class Solution:
def findSubstring(self, s: str, words: list[str]) -> list[int]:
if not s or not words or not words[0]:
return []
word_len = len(words[0])
num_words = len(words)
substring_len = word_len * num_words
word_count = Counter(words)
res = []
for i in range(word_len):
left = i
right = i
curr_count = Counter()
while right + word_len <= len(s):
word = s[right:right+word_len]
right += word_len
if word in word_count:
curr_count[word] += 1
while curr_count[word] > word_count[word]:
curr_count[s[left:left+word_len]] -= 1
left += word_len
if right - left == substring_len:
res.append(left)
else:
curr_count.clear()
left = right
return res
#include <vector>
#include <string>
#include <unordered_map>
using namespace std;
class Solution {
public:
vector<int> findSubstring(string s, vector<string>& words) {
vector<int> res;
if (words.empty() || s.empty()) return res;
int wordLen = words[0].size();
int numWords = words.size();
int totalLen = wordLen * numWords;
unordered_map<string, int> wordCount;
for (const string& w : words) wordCount[w]++;
for (int i = 0; i < wordLen; ++i) {
int left = i, right = i;
unordered_map<string, int> currCount;
while (right + wordLen <= s.size()) {
string word = s.substr(right, wordLen);
right += wordLen;
if (wordCount.count(word)) {
currCount[word]++;
while (currCount[word] > wordCount[word]) {
currCount[s.substr(left, wordLen)]--;
left += wordLen;
}
if (right - left == totalLen) {
res.push_back(left);
}
} else {
currCount.clear();
left = right;
}
}
}
return res;
}
};
import java.util.*;
class Solution {
public List<Integer> findSubstring(String s, String[] words) {
List<Integer> res = new ArrayList<>();
if (s == null || words == null || words.length == 0) return res;
int wordLen = words[0].length();
int numWords = words.length;
int totalLen = wordLen * numWords;
Map<String, Integer> wordCount = new HashMap<>();
for (String w : words) wordCount.put(w, wordCount.getOrDefault(w, 0) + 1);
for (int i = 0; i < wordLen; i++) {
int left = i, right = i;
Map<String, Integer> currCount = new HashMap<>();
while (right + wordLen <= s.length()) {
String word = s.substring(right, right + wordLen);
right += wordLen;
if (wordCount.containsKey(word)) {
currCount.put(word, currCount.getOrDefault(word, 0) + 1);
while (currCount.get(word) > wordCount.get(word)) {
String leftWord = s.substring(left, left + wordLen);
currCount.put(leftWord, currCount.get(leftWord) - 1);
left += wordLen;
}
if (right - left == totalLen) {
res.add(left);
}
} else {
currCount.clear();
left = right;
}
}
}
return res;
}
}
var findSubstring = function(s, words) {
if (!s || !words.length || !words[0].length) return [];
const wordLen = words[0].length;
const numWords = words.length;
const totalLen = wordLen * numWords;
const wordCount = {};
for (const w of words) wordCount[w] = (wordCount[w] || 0) + 1;
const res = [];
for (let i = 0; i < wordLen; i++) {
let left = i, right = i;
let currCount = {};
while (right + wordLen <= s.length) {
const word = s.substring(right, right + wordLen);
right += wordLen;
if (wordCount[word]) {
currCount[word] = (currCount[word] || 0) + 1;
while (currCount[word] > wordCount[word]) {
const leftWord = s.substring(left, left + wordLen);
currCount[leftWord]--;
left += wordLen;
}
if (right - left === totalLen) {
res.push(left);
}
} else {
currCount = {};
left = right;
}
}
}
return res;
};
Given a string s
and an array of strings words
of equal length, find all starting indices of substring(s) in s
that is a concatenation of each word in words
exactly once and without any intervening characters.
words
must be used exactly once in the substring.s
.words
.
For example, given s = "barfoothefoobarman"
and words = ["foo","bar"]
, the answer is [0,9]
.
The brute-force approach would be to check every possible substring of s
of the total length of all words concatenated together, and see if it contains all the words from words
exactly once. However, this would be inefficient for large strings or many words.
We want to avoid redundant work. Since all words are of the same length, we can "slide" a window of size word_length * number_of_words
across s
, and check if the current window contains all words with the correct counts. To do this efficiently, we can use a hash map (dictionary) to count occurrences of each word.
The insight is to step through s
in increments of word length, and use a sliding window with two pointers to keep track of words seen so far. If a word occurs more than expected, we can move the left pointer forward to discard words until the counts are valid again.
word_len
), the total number of words (num_words
), and the total length to check (substring_len = word_len * num_words
).
word_count
) to store how many times each word appears in words
.
i
from 0
to word_len - 1
, do the following:
left
and right
, both starting at i
.curr_count
) to count words in the current window.right + word_len
is within bounds of s
:
s[right:right+word_len]
.word_count
:
curr_count
.word_count
, move left
forward by word_len
at a time, decrementing the count of the word at left
, until counts are valid.substring_len
, record left
as a valid starting index.word_count
:
curr_count
and move left
to right
.The use of hash maps allows for quick lookups and count comparisons, making the solution efficient. The sliding window ensures we only check each possible substring once, and the offset loop ensures we catch all possible alignments of words.
Let's use s = "barfoothefoobarman"
and words = ["foo","bar"]
.
The algorithm uses the sliding window and hash maps to efficiently check each possibility without recomputing counts from scratch.
s
, check if the next substring_len
characters form a valid concatenation. For each substring, split into words and check each in words
. This is O((n - substring_len + 1) * num_words * word_len), which can be very slow for large inputs.
word_len
), and for each, we move the window in steps of word_len
.s
is processed at most twice (once when entering the window, once when exiting), so the time complexity is O(n * word_len), where n is the length of s
.The optimized approach is much more efficient and suitable for large inputs.
The problem asks us to find all starting indices of substrings in s
that are concatenations of all words in words
exactly once and without overlap. The elegant solution uses a sliding window and hash maps to efficiently count and compare word occurrences as we step through s
in increments of word length. This avoids redundant computation and allows us to solve the problem in linear time relative to the size of s
. The key insight is leveraging the fixed word length and efficiently managing counts with hash maps.