Hash Tables: Hash Functions, Sets & Maps

// Hash Tables - Hash Functions, Sets & Maps - Greg Hogg DSA Course Materials Lecture 4

#include <iostream>
#include <unordered_set>
#include <unordered_map>
#include <string>

int main() {
    // HashSets

    // Initialize a HashSet
    std::unordered_set<int> s;
    std::cout << "Initial HashSet: "; // Output: []
    for (int x : s) std::cout << x << " ";
    std::cout << std::endl;

    // Add item into Set - O(1)
    s.insert(1);
    s.insert(2);
    s.insert(3);
    std::cout << "HashSet after adding elements: ";
    for (int x : s) std::cout << x << " ";
    std::cout << std::endl;

    // Lookup if item in set - O(1)
    if (s.find(1) == s.end()) {
        std::cout << "1 is not in the set" << std::endl;
    }

    // Remove item from set - O(1)
    s.erase(3);
    std::cout << "HashSet after removing 3: ";
    for (int x : s) std::cout << x << " ";
    std::cout << std::endl;

    // Set construction from a string
    std::string string = "aaaaaaabbbbbbbbbbbccccccccceeeeeeeee";
    std::unordered_set<char> sett;
    for (char c : string) {
        sett.insert(c);
    }
    std::cout << "Set from string: ";
    for (char c : sett) std::cout << c << " ";
    std::cout << std::endl;

    // Loop over items in set - O(n)
    std::cout << "Elements in HashSet:" << std::endl;
    for (int x : s) {
        std::cout << x << std::endl;
    }

    // HashMaps - Dictionaries

    // Initialize a HashMap
    std::unordered_map<std::string, int> d;
    d["greg"] = 1;
    d["steve"] = 2;
    d["rob"] = 3;
    std::cout << "Initial HashMap: ";
    for (const auto& entry : d) {
        std::cout << "{" << entry.first << ": " << entry.second << "} ";
    }
    std::cout << std::endl;

    // Add key:value in dictionary: O(1)
    d["arsh"] = 4;
    std::cout << "HashMap after adding 'arsh': ";
    for (const auto& entry : d) {
        std::cout << "{" << entry.first << ": " << entry.second << "} ";
    }
    std::cout << std::endl;

    // Check for presence of key in dictionary: O(1)
    if (d.find("greg") != d.end()) {
        std::cout << "HashMap contains key 'greg'" << std::endl;
    }

    // Check the value corresponding to a key in the dictionary: O(1)
    std::cout << "Value for key 'greg': " << d["greg"] << std::endl;

    // Loop over the key:value pairs of the dictionary: O(n)
    std::cout << "Key:Value pairs in HashMap:" << std::endl;
    for (const auto& entry : d) {
        std::cout << "Key " << entry.first << ": Value " << entry.second << std::endl;
    }

    // Counter in C++
    std::unordered_map<char, int> counter;
    for (char c : string) {
        counter[c]++;
    }
    std::cout << "Character counts: ";
    for (const auto& entry : counter) {
        std::cout << "{" << entry.first << ": " << entry.second << "} ";
    }
    std::cout << std::endl;

    return 0;
}
// Hash Tables - Hash Functions, Sets & Maps - Greg Hogg DSA Course Materials Lecture 4

import java.util.HashSet;
import java.util.Set;
import java.util.HashMap;
import java.util.Map;
import java.util.Collections;
import java.util.List;
import java.util.ArrayList;

public class Main {
    public static void main(String[] args) {
        // HashSets

        // Initialize a HashSet
        Set<Integer> s = new HashSet<>();
        System.out.println("Initial HashSet: " + s); // Output: []

        // Add item into Set - O(1)
        s.add(1);
        s.add(2);
        s.add(3);
        System.out.println("HashSet after adding elements: " + s); // Output: [1, 2, 3]

        // Lookup if item in set - O(1)
        if (!s.contains(1)) {
            System.out.println("1 is not in the set");
        }

        // Remove item from set - O(1)
        s.remove(3);
        System.out.println("HashSet after removing 3: " + s); // Output: [1, 2]

        // Set construction from a string
        String string = "aaaaaaabbbbbbbbbbbccccccccceeeeeeeee";
        Set<Character> sett = new HashSet<>();
        for (char c : string.toCharArray()) {
            sett.add(c);
        }
        System.out.println("Set from string: " + sett); // Output: [a, b, c, e]

        // Loop over items in set - O(n)
        System.out.println("Elements in HashSet:");
        for (int x : s) {
            System.out.println(x);
        }

        // HashMaps - Dictionaries

        // Initialize a HashMap
        Map<String, Integer> d = new HashMap<>();
        d.put("greg", 1);
        d.put("steve", 2);
        d.put("rob", 3);
        System.out.println("Initial HashMap: " + d); // Output: {greg=1, steve=2, rob=3}

        // Add key:value in dictionary: O(1)
        d.put("arsh", 4);
        System.out.println("HashMap after adding 'arsh': " + d); // Output: {greg=1, steve=2, rob=3, arsh=4}

        // Check for presence of key in dictionary: O(1)
        if (d.containsKey("greg")) {
            System.out.println("HashMap contains key 'greg'"); // Output: true
        }

        // Check the value corresponding to a key in the dictionary: O(1)
        System.out.println("Value for key 'greg': " + d.get("greg")); // Output: 1

        // Loop over the key:value pairs of the dictionary: O(n)
        System.out.println("Key:Value pairs in HashMap:");
        for (Map.Entry<String, Integer> entry : d.entrySet()) {
            System.out.println("Key " + entry.getKey() + ": Value " + entry.getValue());
        }

        // Counter in Java
        Map<Character, Integer> counter = new HashMap<>();
        for (char c : string.toCharArray()) {
            counter.put(c, counter.getOrDefault(c, 0) + 1);
        }
        System.out.println("Character counts: " + counter); // Output: {a=7, b=11, c=9, e=9}
    }
}

Summary

A hash table is a data structure that maps keys to values using a hash function. This function converts the key into an index in an underlying array, enabling average-case O(1) time for insertion, deletion, and lookup.

How Hash Functions Work

A hash function generates a numeric value (hash code) from a key. This code is then converted to an array index using modulo operation. Collisions—when two keys map to the same index—are resolved through techniques like chaining (linked lists at each bucket) or open addressing (probing for next available slot).

HashSet and HashMap

A HashSet stores unique elements and supports fast membership testing. A HashMap associates each key with a value. They both use the same underlying hashing mechanism but serve different purposes: sets manage existence, while maps manage associations.

Operations and Complexity

  • Insert: O(1) average
  • Delete: O(1) average
  • Search: O(1) average

Common Applications

Hash-based structures are perfect for frequency counting, caching, indexing, and solving many interview problems like finding duplicates, two-sum, and anagrams.

Conclusion

Hash tables are among the most powerful tools in programming due to their constant-time performance in common operations. A solid understanding of how they work internally gives you a significant edge in algorithm design.


Understanding Hash Tables, Hash Sets, and Hash Maps

What Is a Hash Table?

A hash table is a foundational data structure in computer science used to store data for fast retrieval. It resembles an array with fixed-size buckets, but its behavior is driven by a process called hashing. In hashing, a hash function takes an input (often a key or value) and produces an integer index that determines where the data should be stored in the hash table. This index is typically computed using a modulo operation to ensure it falls within the bounds of the available buckets.

Hash Functions and Collisions

A hash function transforms a given key—such as a string or number—into a bucket index. However, due to the limited number of buckets and the vast possible input space, multiple inputs may produce the same index. This situation is known as a collision. To manage collisions, hash tables employ different strategies.

Collision Resolution Techniques

The two primary methods for handling collisions in hash tables are separate chaining and linear probing.

Separate chaining uses a linked list at each bucket index. When multiple values hash to the same index, they are stored as nodes in a list. This structure allows each bucket to accommodate multiple elements, and lookup operations traverse the list at the given index to find the desired element.

Linear probing, on the other hand, stores elements directly in the hash table array and uses adjacent slots to resolve collisions. If the initial bucket is occupied, the algorithm checks the next available position until an empty slot is found. Deletion in this method must be handled carefully to preserve lookup correctness, typically by marking deleted positions specially.

Hash Sets: Unique Data Storage

A hash set is a data structure that stores a collection of unique items. It is implemented using a hash table, where each element is stored based on its hashed index. The primary operations supported by a hash set are lookup, insertion, and deletion.

On average, these operations run in O(1) time, assuming a well-distributed hash function. In the worst case, particularly with poor hash functions or many collisions, performance may degrade to O(n). Still, hash sets are highly efficient for checking membership, adding new items, and removing existing ones, which is why they are frequently used in practice.

Hash Maps: Key-Value Pair Storage

A hash map, also known as a dictionary in Python, extends the concept of a hash set by storing key-value pairs. The key is hashed to determine where to store the value. Keys must be hashable, while values can be of any data type.

Hash maps support operations like checking if a key exists, retrieving the value for a key, adding a new key-value pair, or removing an existing one. Just like hash sets, these operations have an average time complexity of O(1), and worst-case complexity of O(n) if many items hash to the same bucket.

Hashability and Immutability

For a value to be used as a key in a hash table, it must be hashable. This means it must be immutable, so its hash value stays consistent over time. In Python, examples of hashable types include strings, integers, and tuples of immutable elements. Examples of non-hashable types include lists and dictionaries, since their contents can change and therefore alter their hash.

Immutability is crucial for maintaining consistency within the hash table. If keys could change after being added to the table, they could hash to different locations, breaking the integrity of the data structure.

Python’s Built-in Hash Structures

Python provides built-in support for hash-based structures with the set and dict types. A set is a hash set that stores unique values, while a dict is a hash map for key-value pairs. Both are optimized for fast lookup, insertion, and deletion, and are widely used for their performance and simplicity.

Specialized Hash Structures in Python

Beyond standard sets and dictionaries, Python offers enhanced hash-based data structures through the collections module. The defaultdict type allows dictionaries to return a default value for missing keys, preventing errors and enabling more compact code. Another powerful structure is the Counter, which automatically counts the frequency of items in a given input, such as characters in a string.

These structures are built on top of the same hashing principles but add specialized behaviors that make them especially useful for tasks like frequency analysis and aggregating grouped data.

Performance and Use Cases

The primary reason hash tables, hash sets, and hash maps are used extensively is their performance. On average, their core operations run in constant time, even for large datasets. This makes them ideal for problems involving quick membership checks, frequency counting, associative mapping, and fast retrieval based on a key.

In algorithm design and technical interviews, hash-based structures frequently arise in problems that require speed and efficiency. Their use can dramatically reduce the time complexity of a problem, especially when compared to linear search through lists or arrays.

Conclusion

Understanding hash tables and their implementations through hash sets and hash maps is essential for effective programming in Python and other languages. Whether using Python’s built-in set and dict types or leveraging specialized tools like defaultdict and Counter, these structures provide reliable, performant solutions to a wide range of data problems.

Get Personalized Lessons at AlgoMap Bootcamp 💡