The "Find Median from Data Stream" problem asks you to design a data structure that supports two main operations:
addNum(num)
: Add an integer num
from the data stream to the data structure.findMedian()
: Return the median of all elements so far.addNum
and findMedian
.
At first glance, you might think to store all numbers in a list, sort it every time, and pick the median. However, sorting after each insertion is inefficient, especially with many numbers.
Instead, we need a data structure that allows us to efficiently:
The most efficient and common way to solve this problem is by using two heaps:
low
): Keeps track of the smaller half of the numbers. The largest number in this half is at the root.
high
): Keeps track of the larger half of the numbers. The smallest number in this half is at the root.
low
(the largest of the smaller half). If it's smaller, add it to low
. Otherwise, add it to high
.
Let's say we receive the following numbers in order: [6, 10, 2, 6, 5, 0]
Step-by-step:
low = [6]
, high = []
low = [6]
, high = [10]
low = [6, 2]
, high = [10]
low = [6, 2]
, high = [6, 10]
low = [6, 2, 5]
, high = [6, 10]
low = [5, 2, 0]
, high = [6, 6, 10]
(after rebalancing) Brute-force approach:
The "Find Median from Data Stream" problem is efficiently solved with a two-heap approach, using a max heap for the lower half and a min heap for the upper half of the data. This allows for fast insertion and median retrieval, balancing the heaps as needed to ensure the median is always accessible in O(1) time. The elegance of this solution lies in leveraging heap properties to maintain order and quick access to the median, making it ideal for streaming scenarios.
import heapq
class MedianFinder:
def __init__(self):
self.low = [] # Max heap (invert values for min-heap implementation)
self.high = [] # Min heap
def addNum(self, num: int) -> None:
heapq.heappush(self.low, -num)
# Balance: move the largest from low to high
heapq.heappush(self.high, -heapq.heappop(self.low))
# Maintain size property
if len(self.high) > len(self.low):
heapq.heappush(self.low, -heapq.heappop(self.high))
def findMedian(self) -> float:
if len(self.low) > len(self.high):
return -self.low[0]
else:
return (-self.low[0] + self.high[0]) / 2
#include <queue>
using namespace std;
class MedianFinder {
priority_queue<int> low; // Max heap
priority_queue<int, vector<int>, greater<int>> high; // Min heap
public:
MedianFinder() {}
void addNum(int num) {
low.push(num);
high.push(low.top());
low.pop();
if (high.size() > low.size()) {
low.push(high.top());
high.pop();
}
}
double findMedian() {
if (low.size() > high.size())
return low.top();
else
return (low.top() + high.top()) / 2.0;
}
};
import java.util.*;
class MedianFinder {
private PriorityQueue<Integer> low;
private PriorityQueue<Integer> high;
public MedianFinder() {
low = new PriorityQueue<>(Collections.reverseOrder());
high = new PriorityQueue<>();
}
public void addNum(int num) {
low.offer(num);
high.offer(low.poll());
if (high.size() > low.size()) {
low.offer(high.poll());
}
}
public double findMedian() {
if (low.size() > high.size()) {
return low.peek();
} else {
return (low.peek() + high.peek()) / 2.0;
}
}
}
// Helper: MinHeap and MaxHeap classes
class Heap {
constructor(comparator) {
this.data = [];
this.comparator = comparator;
}
size() { return this.data.length; }
peek() { return this.data[0]; }
push(val) {
this.data.push(val);
this._heapifyUp();
}
pop() {
if (this.size() === 1) return this.data.pop();
const top = this.data[0];
this.data[0] = this.data.pop();
this._heapifyDown();
return top;
}
_heapifyUp() {
let idx = this.size() - 1;
while (idx > 0) {
let parent = Math.floor((idx - 1) / 2);
if (this.comparator(this.data[idx], this.data[parent])) {
[this.data[idx], this.data[parent]] = [this.data[parent], this.data[idx]];
idx = parent;
} else break;
}
}
_heapifyDown() {
let idx = 0;
let len = this.size();
while (true) {
let left = 2 * idx + 1;
let right = 2 * idx + 2;
let swap = idx;
if (left < len && this.comparator(this.data[left], this.data[swap])) swap = left;
if (right < len && this.comparator(this.data[right], this.data[swap])) swap = right;
if (swap === idx) break;
[this.data[idx], this.data[swap]] = [this.data[swap], this.data[idx]];
idx = swap;
}
}
}
// MaxHeap: comparator (a, b) => a > b
// MinHeap: comparator (a, b) => a < b
class MedianFinder {
constructor() {
this.low = new Heap((a, b) => a > b); // MaxHeap
this.high = new Heap((a, b) => a < b); // MinHeap
}
addNum(num) {
this.low.push(num);
this.high.push(this.low.pop());
if (this.high.size() > this.low.size()) {
this.low.push(this.high.pop());
}
}
findMedian() {
if (this.low.size() > this.high.size()) {
return this.low.peek();
} else {
return (this.low.peek() + this.high.peek()) / 2;
}
}
}