In the Accounts Merge problem, you are given a list of accounts, where each account is represented as a list of strings. The first element of each list is a person's name
, and the rest are that person's email addresses
. Two accounts belong to the same person if there is at least one common email address between them. Your task is to merge such accounts and return the merged account lists, where each merged account has the person's name followed by all unique email addresses sorted lexicographically.
The output should be a list of merged accounts, each as a list with the name and sorted unique emails.
At first glance, this problem seems like it could be solved by simply comparing every account with every other account for shared emails and merging them. However, this brute-force approach would be too slow, especially for large datasets, because it would involve a lot of redundant comparisons.
The key insight is to recognize that this is a connected components problem in disguise. Each email can be thought of as a node, and two emails are connected if they appear in the same account. All emails that are connected (directly or indirectly) belong to the same person and thus the same merged account.
To efficiently group emails belonging to the same person, we can use the Union-Find (Disjoint Set Union, DSU) data structure. This will allow us to quickly group emails and later collect all emails belonging to the same group.
We solve the problem using the Union-Find (DSU) data structure to group emails. Here’s a step-by-step breakdown:
parent
map to keep track of the root email for each email (for Union-Find).email_to_name
map to associate each email with a user’s name.groups
map.We use hash maps for quick lookups and the Union-Find structure for efficient grouping.
Input:
[ ["John", "johnsmith@mail.com", "john00@mail.com"], ["John", "johnnybravo@mail.com"], ["John", "johnsmith@mail.com", "john_newyork@mail.com"], ["Mary", "mary@mail.com"] ]
Step-by-step:
Result:
The merged accounts are returned with emails sorted for each user.
Brute-force approach:
Compare every account with every other account for shared emails, resulting in O(N^2 * K) time, where N is the number of accounts and K is the average number of emails per account. This is inefficient for large datasets.
Optimized Union-Find approach:
The Accounts Merge problem is elegantly solved by modeling it as a connected components problem using the Union-Find data structure. By grouping emails that belong together and mapping them back to user names, we efficiently merge accounts with overlapping emails. This approach is both fast and memory-efficient, showcasing the power of classic data structures for real-world data grouping problems.
class Solution:
def accountsMerge(self, accounts):
parent = {}
email_to_name = {}
def find(x):
parent.setdefault(x, x)
if parent[x] != x:
parent[x] = find(parent[x])
return parent[x]
def union(x, y):
parent[find(x)] = find(y)
for account in accounts:
name = account[0]
for email in account[1:]:
email_to_name[email] = name
union(account[1], email)
groups = {}
for email in email_to_name:
root = find(email)
groups.setdefault(root, set()).add(email)
result = []
for root, emails in groups.items():
result.append([email_to_name[root]] + sorted(emails))
return result
class Solution {
public:
vector<vector<string>> accountsMerge(vector<vector<string>>& accounts) {
unordered_map<string, string> parent;
unordered_map<string, string> emailToName;
function<string(string)> find = [&](string x) {
if (parent.count(x) == 0) parent[x] = x;
if (parent[x] != x) parent[x] = find(parent[x]);
return parent[x];
};
auto unite = [&](string x, string y) {
parent[find(x)] = find(y);
};
for (auto& acc : accounts) {
string name = acc[0];
for (int i = 1; i < acc.size(); ++i) {
emailToName[acc[i]] = name;
unite(acc[1], acc[i]);
}
}
unordered_map<string, set<string>> groups;
for (auto& p : emailToName) {
string root = find(p.first);
groups[root].insert(p.first);
}
vector<vector<string>> res;
for (auto& g : groups) {
vector<string> merged = {emailToName[g.first]};
merged.insert(merged.end(), g.second.begin(), g.second.end());
res.push_back(merged);
}
return res;
}
};
class Solution {
public List<List<String>> accountsMerge(List<List<String>> accounts) {
Map<String, String> parent = new HashMap<>();
Map<String, String> emailToName = new HashMap<>();
// Find with path compression
Function<String, String> find = new Function<String, String>() {
public String apply(String x) {
parent.putIfAbsent(x, x);
if (!parent.get(x).equals(x)) {
parent.put(x, this.apply(parent.get(x)));
}
return parent.get(x);
}
};
// Union
BiConsumer<String, String> union = (x, y) -> {
parent.put(find.apply(x), find.apply(y));
};
for (List<String> acc : accounts) {
String name = acc.get(0);
for (int i = 1; i < acc.size(); ++i) {
emailToName.put(acc.get(i), name);
union.accept(acc.get(1), acc.get(i));
}
}
Map<String, Set<String>> groups = new HashMap<>();
for (String email : emailToName.keySet()) {
String root = find.apply(email);
groups.computeIfAbsent(root, k -> new TreeSet<>()).add(email);
}
List<List<String>> res = new ArrayList<>();
for (String root : groups.keySet()) {
List<String> merged = new ArrayList<>();
merged.add(emailToName.get(root));
merged.addAll(groups.get(root));
res.add(merged);
}
return res;
}
}
var accountsMerge = function(accounts) {
let parent = {};
let emailToName = {};
function find(x) {
if (!(x in parent)) parent[x] = x;
if (parent[x] !== x) parent[x] = find(parent[x]);
return parent[x];
}
function union(x, y) {
parent[find(x)] = find(y);
}
for (let account of accounts) {
let name = account[0];
for (let i = 1; i < account.length; ++i) {
emailToName[account[i]] = name;
union(account[1], account[i]);
}
}
let groups = {};
for (let email in emailToName) {
let root = find(email);
if (!(root in groups)) groups[root] = new Set();
groups[root].add(email);
}
let result = [];
for (let root in groups) {
let merged = [emailToName[root], ...Array.from(groups[root]).sort()];
result.push(merged);
}
return result;
};