Want Help Cracking FAANG?

(Then click this)

×
Back to Question Bank

943. Find the Shortest Superstring - Leetcode Solution

Code Implementation

from typing import List

class Solution:
    def shortestSuperstring(self, words: List[str]) -> str:
        n = len(words)
        overlaps = [[0]*n for _ in range(n)]
        # Precompute overlaps
        for i, x in enumerate(words):
            for j, y in enumerate(words):
                if i != j:
                    for k in range(min(len(x), len(y)), 0, -1):
                        if x[-k:] == y[:k]:
                            overlaps[i][j] = k
                            break

        dp = [[0]*n for _ in range(1<<n)]
        parent = [[-1]*n for _ in range(1<<n)]

        for mask in range(1, 1<<n):
            for j in range(n):
                if not (mask & (1<<j)):
                    continue
                prev_mask = mask ^ (1<<j)
                if prev_mask == 0:
                    continue
                for i in range(n):
                    if not (prev_mask & (1<<i)):
                        continue
                    val = dp[prev_mask][i] + overlaps[i][j]
                    if val > dp[mask][j]:
                        dp[mask][j] = val
                        parent[mask][j] = i

        # Find best ending word
        max_len = -1
        last = -1
        mask = (1<<n) - 1
        for i in range(n):
            if dp[mask][i] > max_len:
                max_len = dp[mask][i]
                last = i

        # Reconstruct path
        path = []
        cur = last
        cur_mask = mask
        while cur != -1:
            path.append(cur)
            temp = parent[cur_mask][cur]
            cur_mask ^= (1<<cur)
            cur = temp
        path = path[::-1]

        # Add words to result
        seen = [False]*n
        res = words[path[0]]
        seen[path[0]] = True
        for k in range(1, len(path)):
            i, j = path[k-1], path[k]
            res += words[j][overlaps[i][j]:]
            seen[j] = True

        # Add any words not in path (for completeness)
        for i in range(n):
            if not seen[i]:
                res += words[i]
        return res
      
#include <vector>
#include <string>
#include <algorithm>

using namespace std;

class Solution {
public:
    string shortestSuperstring(vector<string>& words) {
        int n = words.size();
        vector<vector<int>> overlaps(n, vector<int>(n, 0));
        // Precompute overlaps
        for (int i = 0; i < n; ++i) {
            for (int j = 0; j < n; ++j) {
                if (i == j) continue;
                for (int k = min(words[i].size(), words[j].size()); k > 0; --k) {
                    if (words[i].substr(words[i].size() - k) == words[j].substr(0, k)) {
                        overlaps[i][j] = k;
                        break;
                    }
                }
            }
        }
        int N = 1 << n;
        vector<vector<int>> dp(N, vector<int>(n, 0));
        vector<vector<int>> parent(N, vector<int>(n, -1));
        for (int mask = 1; mask < N; ++mask) {
            for (int j = 0; j < n; ++j) {
                if (!((mask >> j) & 1)) continue;
                int prev_mask = mask ^ (1 << j);
                if (prev_mask == 0) continue;
                for (int i = 0; i < n; ++i) {
                    if (!((prev_mask >> i) & 1)) continue;
                    int val = dp[prev_mask][i] + overlaps[i][j];
                    if (val > dp[mask][j]) {
                        dp[mask][j] = val;
                        parent[mask][j] = i;
                    }
                }
            }
        }
        // Find last index
        int max_len = -1, last = -1, mask = N - 1;
        for (int i = 0; i < n; ++i) {
            if (dp[mask][i] > max_len) {
                max_len = dp[mask][i];
                last = i;
            }
        }
        // Reconstruct path
        vector<int> path;
        int cur = last, cur_mask = mask;
        while (cur != -1) {
            path.push_back(cur);
            int temp = parent[cur_mask][cur];
            cur_mask ^= (1 << cur);
            cur = temp;
        }
        reverse(path.begin(), path.end());
        vector<bool> seen(n, false);
        string res = words[path[0]];
        seen[path[0]] = true;
        for (int k = 1; k < path.size(); ++k) {
            int i = path[k-1], j = path[k];
            res += words[j].substr(overlaps[i][j]);
            seen[j] = true;
        }
        for (int i = 0; i < n; ++i) {
            if (!seen[i]) res += words[i];
        }
        return res;
    }
};
      
import java.util.*;

class Solution {
    public String shortestSuperstring(String[] words) {
        int n = words.length;
        int[][] overlaps = new int[n][n];
        // Precompute overlaps
        for (int i = 0; i < n; ++i) {
            for (int j = 0; j < n; ++j) {
                if (i == j) continue;
                for (int k = Math.min(words[i].length(), words[j].length()); k > 0; --k) {
                    if (words[i].substring(words[i].length() - k).equals(words[j].substring(0, k))) {
                        overlaps[i][j] = k;
                        break;
                    }
                }
            }
        }
        int N = 1 << n;
        int[][] dp = new int[N][n];
        int[][] parent = new int[N][n];
        for (int[] row : parent)
            Arrays.fill(row, -1);
        for (int mask = 1; mask < N; ++mask) {
            for (int j = 0; j < n; ++j) {
                if (((mask >> j) & 1) == 0) continue;
                int prev_mask = mask ^ (1 << j);
                if (prev_mask == 0) continue;
                for (int i = 0; i < n; ++i) {
                    if (((prev_mask >> i) & 1) == 0) continue;
                    int val = dp[prev_mask][i] + overlaps[i][j];
                    if (val > dp[mask][j]) {
                        dp[mask][j] = val;
                        parent[mask][j] = i;
                    }
                }
            }
        }
        // Find last index
        int max_len = -1, last = -1, mask = N - 1;
        for (int i = 0; i < n; ++i) {
            if (dp[mask][i] > max_len) {
                max_len = dp[mask][i];
                last = i;
            }
        }
        // Reconstruct path
        List<Integer> path = new ArrayList<>();
        int cur = last, cur_mask = mask;
        while (cur != -1) {
            path.add(cur);
            int temp = parent[cur_mask][cur];
            cur_mask ^= (1 << cur);
            cur = temp;
        }
        Collections.reverse(path);
        boolean[] seen = new boolean[n];
        StringBuilder res = new StringBuilder(words[path.get(0)]);
        seen[path.get(0)] = true;
        for (int k = 1; k < path.size(); ++k) {
            int i = path.get(k-1), j = path.get(k);
            res.append(words[j].substring(overlaps[i][j]));
            seen[j] = true;
        }
        for (int i = 0; i < n; ++i) {
            if (!seen[i]) res.append(words[i]);
        }
        return res.toString();
    }
}
      
/**
 * @param {string[]} words
 * @return {string}
 */
var shortestSuperstring = function(words) {
    const n = words.length;
    const overlaps = Array.from({length: n}, () => Array(n).fill(0));
    // Precompute overlaps
    for (let i = 0; i < n; ++i) {
        for (let j = 0; j < n; ++j) {
            if (i === j) continue;
            for (let k = Math.min(words[i].length, words[j].length); k > 0; --k) {
                if (words[i].slice(-k) === words[j].slice(0, k)) {
                    overlaps[i][j] = k;
                    break;
                }
            }
        }
    }
    const N = 1 << n;
    const dp = Array.from({length: N}, () => Array(n).fill(0));
    const parent = Array.from({length: N}, () => Array(n).fill(-1));
    for (let mask = 1; mask < N; ++mask) {
        for (let j = 0; j < n; ++j) {
            if (!((mask >> j) & 1)) continue;
            const prev_mask = mask ^ (1 << j);
            if (prev_mask === 0) continue;
            for (let i = 0; i < n; ++i) {
                if (!((prev_mask >> i) & 1)) continue;
                const val = dp[prev_mask][i] + overlaps[i][j];
                if (val > dp[mask][j]) {
                    dp[mask][j] = val;
                    parent[mask][j] = i;
                }
            }
        }
    }
    // Find last index
    let max_len = -1, last = -1, mask = N - 1;
    for (let i = 0; i < n; ++i) {
        if (dp[mask][i] > max_len) {
            max_len = dp[mask][i];
            last = i;
        }
    }
    // Reconstruct path
    const path = [];
    let cur = last, cur_mask = mask;
    while (cur !== -1) {
        path.push(cur);
        const temp = parent[cur_mask][cur];
        cur_mask ^= (1 << cur);
        cur = temp;
    }
    path.reverse();
    const seen = Array(n).fill(false);
    let res = words[path[0]];
    seen[path[0]] = true;
    for (let k = 1; k < path.length; ++k) {
        let i = path[k-1], j = path[k];
        res += words[j].slice(overlaps[i][j]);
        seen[j] = true;
    }
    for (let i = 0; i < n; ++i) {
        if (!seen[i]) res += words[i];
    }
    return res;
};
      

Problem Description

Given an array words containing n strings, find the shortest string that contains each string in words as a substring. The order of appearance of the words can be any, but each word must appear at least once as a substring. You must not reuse any word more than once, and there is guaranteed to be at least one valid solution. If multiple shortest superstrings exist, you can return any of them.

  • Input: An array words of distinct non-empty strings.
  • Output: The shortest possible superstring containing all the words as substrings.
  • Constraints: Each word must be used exactly once and not reused.

Thought Process

The naive way to solve this problem is to try all possible orders (permutations) of the words, concatenate them in each order while maximizing the overlap between consecutive words, and keep track of the shortest result. However, with n words, there are n! permutations, which quickly becomes infeasible as n grows.

To optimize, we notice that the problem is similar to the "Traveling Salesman Problem" (TSP), where each word is a "city", and the "cost" of traveling from word A to word B is the number of extra characters needed to append B after A (i.e., how much B doesn't overlap with the end of A). Our goal is to find a path through all cities (words) minimizing the total cost (i.e., the total length of the resulting superstring).

This analogy suggests we can use dynamic programming with bitmasking to efficiently compute the optimal order and overlaps, thus reducing the exponential brute-force search to something tractable for reasonable n.

Solution Approach

  • 1. Precompute Overlaps: For every pair of words (i, j), calculate the maximum length of overlap where the suffix of words[i] matches the prefix of words[j]. This allows us to quickly know how many characters we can "save" by putting j after i.
  • 2. Dynamic Programming with Bitmasking:
    • Let dp[mask][i] be the maximum total overlap we can get by visiting the set of words in mask and ending at word i.
    • For each possible subset mask of words and for each possible ending word i in that subset, update dp[mask][i] by considering all possible previous words j in mask and using the precomputed overlaps.
    • Also keep a parent array to reconstruct the path (i.e., the order of words) that gives the best overlap.
  • 3. Reconstruct the Path:
    • After filling the DP table, find the ending word that gives the best (maximum) total overlap for the full set of words.
    • Backtrack using the parent array to recover the actual order of words.
  • 4. Build the Superstring:
    • Start with the first word in the path, and for each subsequent word, append only the non-overlapping part (using the precomputed overlaps).
    • If any word was not included in the path (edge case), append it at the end.

This approach ensures that each word is used exactly once and that the resulting string is as short as possible given the overlaps.

Example Walkthrough

Suppose words = ["alex","loves","leetcode"].

  1. Precompute Overlaps:
    • overlap("alex", "loves") = 1 (since "x" and "l" do not overlap, but "alex" and "loves" share "l" at position 0).
    • overlap("loves", "leetcode") = 2 (since "lo" and "le" overlap at "l").
    • overlap("alex", "leetcode") = 0 (no overlap).
    • ...and so on for all pairs.
  2. Dynamic Programming:
    • Compute dp[mask][i] for all mask and i.
    • For example, dp[011][1] means the best overlap ending at "loves" using "alex" and "loves".
  3. Reconstruct Path:
    • After filling the table, suppose the best path is "alex" → "loves" → "leetcode".
  4. Build Superstring:
    • Start with "alex", append "loves" minus the overlapping "l", get "alexloves".
    • Append "leetcode" minus the overlapping "le", get "alexlovesleetcode".

The shortest superstring is "alexlovesleetcode".

Time and Space Complexity

  • Brute-Force Approach:
    • There are n! permutations, and for each, we must check overlaps and build the string. This is factorial time complexity: O(n! * k^2) where k is the average word length.
  • Optimized DP Approach:
    • There are 2^n possible subsets, and for each subset, we consider n possible ending words, and for each, up to n possible previous words: O(n^2 * 2^n) time.
    • Space complexity is also O(n * 2^n) due to the DP and parent tables.
  • Why: The bitmask DP reduces the exponential search space to something manageable for small to moderate n (typically up to 12-16).

Summary

The shortest superstring problem is a classic example of optimizing overlap between strings, closely related to the Traveling Salesman Problem. By precomputing overlaps and using dynamic programming with bitmasking, we can efficiently find a minimal-length string that contains all input words as substrings, using each word exactly once. The approach is both elegant and practical for moderate input sizes, and highlights the power of DP and combinatorial optimization.