The Design Search Autocomplete System problem asks you to implement an autocomplete system similar to those found in search engines. You are given a list of sentences
and their corresponding times
(how often each sentence has been typed). The system receives a stream of characters (one at a time) via the input(c)
method:
'#'
), the system returns the top 3 historical sentences that have the current prefix, ranked by frequency and lexicographical order.'#'
is input, it means the current input stream forms a new sentence. The system should record this sentence (or increase its frequency if it already exists), and reset the input buffer.Constraints:
When approaching this problem, the first idea might be to check every sentence for every input character, which is inefficient. Instead, we want a way to efficiently find all sentences that start with a given prefix and quickly update frequencies as new sentences are added.
A Trie (prefix tree) is a natural fit for this problem. It allows us to store all sentences in a way that makes searching by prefix fast. Each node in the Trie can keep track of sentences that pass through it, enabling quick retrieval of autocomplete suggestions. We'll also need a way to track and update the frequency of each sentence, which can be done with a hash map.
The main challenge is efficiently maintaining and retrieving the top 3 sentences for any prefix, especially as sentences are added or their frequencies are updated.
Here's how we can design the system step by step:
sentence -> frequency
to track how many times each sentence has been entered.'#'
), insert it into the Trie and update the frequency map.This approach ensures that prefix searches and updates are efficient, and the system can handle dynamic additions of new sentences.
Suppose we initialize the system with:
sentences = ["i love you", "island", "ironman", "i love leetcode"]
times = [5, 3, 2, 2]
Let's walk through the input sequence: 'i' -> ' '
'i'
, the prefix is "i"
. The system finds all sentences starting with "i": "i love you" (5), "island" (3), "ironman" (2), "i love leetcode" (2). The top 3 are returned: ["i love you", "island", "i love leetcode"] (by frequency and lex order).' '
, the prefix is "i "
. Now, only "i love you" (5) and "i love leetcode" (2) start with "i ". The top 2 are returned: ["i love you", "i love leetcode"].'a'
, the prefix is "i a"
, which matches no sentences, so an empty list is returned.'#'
is input, the system adds "i a" as a new sentence with frequency 1 and resets the prefix.Brute-force approach:
O(N * L)
per input, where N
is the number of sentences and L
is the average sentence length.O(K \log K)
where K
is the number of matches.O(P)
, where P
is the prefix length.O(1)
if cached, or O(K \log K)
for sorting.O(T)
, where T
is the total number of characters in all sentences, plus space for the frequency map.The Trie approach is much more efficient, especially as the number of sentences grows.
The Design Search Autocomplete System problem is a classic example of efficiently searching and updating data based on prefixes. By using a Trie to store sentences and a hash map to track frequencies, we can provide fast autocomplete suggestions and dynamically update the system as new sentences are added. The key insight is leveraging the Trie structure for prefix searching and caching the top results for performance.
from collections import defaultdict, deque
class TrieNode:
def __init__(self):
self.children = {}
self.sentences = set()
class AutocompleteSystem:
def __init__(self, sentences, times):
self.root = TrieNode()
self.freq = defaultdict(int)
self.curr_input = ""
self.curr_node = self.root
for s, t in zip(sentences, times):
self.freq[s] += t
self._insert(s)
def _insert(self, sentence):
node = self.root
for c in sentence:
if c not in node.children:
node.children[c] = TrieNode()
node = node.children[c]
node.sentences.add(sentence)
def input(self, c):
results = []
if c == "#":
self.freq[self.curr_input] += 1
self._insert(self.curr_input)
self.curr_input = ""
self.curr_node = self.root
return []
self.curr_input += c
if self.curr_node and c in self.curr_node.children:
self.curr_node = self.curr_node.children[c]
candidates = list(self.curr_node.sentences)
candidates.sort(key=lambda x: (-self.freq[x], x))
results = candidates[:3]
else:
self.curr_node = None
return results
#include <vector>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <algorithm>
using namespace std;
struct TrieNode {
unordered_map<char, TrieNode*> children;
unordered_set<string> sentences;
TrieNode() {}
};
class AutocompleteSystem {
TrieNode* root;
unordered_map<string, int> freq;
string curr_input;
TrieNode* curr_node;
public:
AutocompleteSystem(vector<string>& sentences, vector<int>& times) {
root = new TrieNode();
curr_node = root;
for (int i = 0; i < sentences.size(); ++i) {
freq[sentences[i]] += times[i];
insert(sentences[i]);
}
}
void insert(const string& s) {
TrieNode* node = root;
for (char c : s) {
if (!node->children.count(c))
node->children[c] = new TrieNode();
node = node->children[c];
node->sentences.insert(s);
}
}
vector<string> input(char c) {
if (c == '#') {
freq[curr_input]++;
insert(curr_input);
curr_input = "";
curr_node = root;
return {};
}
curr_input += c;
if (curr_node && curr_node->children.count(c)) {
curr_node = curr_node->children[c];
vector<string> candidates(curr_node->sentences.begin(), curr_node->sentences.end());
sort(candidates.begin(), candidates.end(), [this](const string& a, const string& b) {
if (freq[a] == freq[b]) return a < b;
return freq[a] > freq[b];
});
if (candidates.size() > 3) candidates.resize(3);
return candidates;
} else {
curr_node = nullptr;
return {};
}
}
};
import java.util.*;
class TrieNode {
Map<Character, TrieNode> children = new HashMap<>();
Set<String> sentences = new HashSet<>();
}
class AutocompleteSystem {
TrieNode root;
Map<String, Integer> freq;
StringBuilder currInput;
TrieNode currNode;
public AutocompleteSystem(String[] sentences, int[] times) {
root = new TrieNode();
freq = new HashMap<>();
currInput = new StringBuilder();
currNode = root;
for (int i = 0; i < sentences.length; i++) {
freq.put(sentences[i], freq.getOrDefault(sentences[i], 0) + times[i]);
insert(sentences[i]);
}
}
private void insert(String s) {
TrieNode node = root;
for (char c : s.toCharArray()) {
node.children.putIfAbsent(c, new TrieNode());
node = node.children.get(c);
node.sentences.add(s);
}
}
public List<String> input(char c) {
if (c == '#') {
String sentence = currInput.toString();
freq.put(sentence, freq.getOrDefault(sentence, 0) + 1);
insert(sentence);
currInput = new StringBuilder();
currNode = root;
return new ArrayList<>();
}
currInput.append(c);
if (currNode != null && currNode.children.containsKey(c)) {
currNode = currNode.children.get(c);
List<String> candidates = new ArrayList<>(currNode.sentences);
Collections.sort(candidates, (a, b) -> {
int cmp = freq.get(b) - freq.get(a);
return cmp == 0 ? a.compareTo(b) : cmp;
});
return candidates.subList(0, Math.min(3, candidates.size()));
} else {
currNode = null;
return new ArrayList<>();
}
}
}
class TrieNode {
constructor() {
this.children = {};
this.sentences = new Set();
}
}
class AutocompleteSystem {
constructor(sentences, times) {
this.root = new TrieNode();
this.freq = new Map();
this.currInput = '';
this.currNode = this.root;
for (let i = 0; i < sentences.length; ++i) {
this.freq.set(sentences[i], (this.freq.get(sentences[i]) || 0) + times[i]);
this._insert(sentences[i]);
}
}
_insert(sentence) {
let node = this.root;
for (let c of sentence) {
if (!node.children[c]) node.children[c] = new TrieNode();
node = node.children[c];
node.sentences.add(sentence);
}
}
input(c) {
if (c === '#') {
this.freq.set(this.currInput, (this.freq.get(this.currInput) || 0) + 1);
this._insert(this.currInput);
this.currInput = '';
this.currNode = this.root;
return [];
}
this.currInput += c;
if (this.currNode && this.currNode.children[c]) {
this.currNode = this.currNode.children[c];
let candidates = Array.from(this.currNode.sentences);
candidates.sort((a, b) => {
if (this.freq.get(a) !== this.freq.get(b)) return this.freq.get(b) - this.freq.get(a);
return a.localeCompare(b);
});
return candidates.slice(0, 3);
} else {
this.currNode = null;
return [];
}
}
}