Hash Tables

🗂️ What is a Hash Table?

A Hash Table (or Hash Map) stores key-value pairs and uses a hash function to compute an index for fast access. Think of it like a dictionary where you can instantly find any word!


# Python dictionary is a hash table
student_grades = {}

# Insert (O(1) average)
student_grades["Alice"] = 95
student_grades["Bob"] = 87
student_grades["Charlie"] = 92

# Access (O(1) average)
print(student_grades["Alice"])  # 95

# Check existence
if "Bob" in student_grades:
    print(f"Bob's grade: {student_grades['Bob']}")

# Delete
del student_grades["Charlie"]
print(student_grades)  # {'Alice': 95, 'Bob': 87}

O(1)

Average Access

Key-Value

Pairs

Hash Function

Core

Lesson 1: Simple Hash Table

Create a basic hash table from scratch:

Basic Hash Table


class HashTable:
    def __init__(self, size=10):
        self.size = size
        self.table = [[] for _ in range(size)]  # List of lists for chaining
    
    def _hash(self, key):
        """Simple hash function"""
        return hash(key) % self.size
    
    def put(self, key, value):
        """Insert key-value pair"""
        index = self._hash(key)
        bucket = self.table[index]
        
        # Check if key already exists
        for i, (k, v) in enumerate(bucket):
            if k == key:
                bucket[i] = (key, value)  # Update existing
                return
        
        # Add new key-value pair
        bucket.append((key, value))
    
    def get(self, key):
        """Get value by key"""
        index = self._hash(key)
        bucket = self.table[index]
        
        for k, v in bucket:
            if k == key:
                return v
        
        raise KeyError(f"Key '{key}' not found")
    
    def delete(self, key):
        """Remove key-value pair"""
        index = self._hash(key)
        bucket = self.table[index]
        
        for i, (k, v) in enumerate(bucket):
            if k == key:
                del bucket[i]
                return
        
        raise KeyError(f"Key '{key}' not found")
    
    def display(self):
        """Show hash table contents"""
        for i, bucket in enumerate(self.table):
            if bucket:
                print(f"Bucket {i}: {bucket}")

# Example usage
ht = HashTable(5)

# Insert data
ht.put("name", "Alice")
ht.put("age", 25)
ht.put("city", "New York")

# Access data
print(ht.get("name"))  # Alice
print(ht.get("age"))   # 25

# Display structure
ht.display()

# Update value
ht.put("age", 26)
print(ht.get("age"))   # 26

Lesson 2: Hash Functions

Understanding different hash functions:

Hash Functions


def simple_hash(key, table_size):
    """Simple hash: sum of ASCII values"""
    return sum(ord(c) for c in str(key)) % table_size

def djb2_hash(key, table_size):
    """DJB2 hash algorithm - better distribution"""
    hash_value = 5381
    for char in str(key):
        hash_value = ((hash_value << 5) + hash_value) + ord(char)
    return hash_value % table_size

def polynomial_hash(key, table_size):
    """Polynomial rolling hash"""
    hash_value = 0
    p = 31  # Prime number
    for char in str(key):
        hash_value = (hash_value * p + ord(char)) % table_size
    return hash_value

# Test different hash functions
keys = ["apple", "banana", "cherry", "date", "elderberry"]
table_size = 7

print("Hash Function Comparison:")
print("Key\t\tSimple\tDJB2\tPolynomial")
print("-" * 40)

for key in keys:
    simple = simple_hash(key, table_size)
    djb2 = djb2_hash(key, table_size)
    poly = polynomial_hash(key, table_size)
    print(f"{key}\t\t{simple}\t{djb2}\t{poly}")

# Check for collisions
def check_collisions(hash_func, keys, table_size):
    """Count collisions for a hash function"""
    indices = [hash_func(key, table_size) for key in keys]
    unique_indices = len(set(indices))
    collisions = len(keys) - unique_indices
    return collisions

print(f"\nCollisions (out of {len(keys)} keys):")
print(f"Simple hash: {check_collisions(simple_hash, keys, table_size)}")
print(f"DJB2 hash: {check_collisions(djb2_hash, keys, table_size)}")
print(f"Polynomial hash: {check_collisions(polynomial_hash, keys, table_size)}")

Lesson 3: Handling Collisions

Two main approaches to handle hash collisions:

🔗

Chaining

Store multiple items in same bucket using linked lists

Simple Dynamic

🔍

Open Addressing

Find another empty slot when collision occurs

Memory efficient Cache friendly

Open Addressing Hash Table


class OpenAddressingHashTable:
    def __init__(self, size=10):
        self.size = size
        self.keys = [None] * size
        self.values = [None] * size
        self.deleted = [False] * size  # Track deleted slots
    
    def _hash(self, key):
        return hash(key) % self.size
    
    def _probe(self, key):
        """Linear probing to find slot"""
        index = self._hash(key)
        
        while self.keys[index] is not None:
            if self.keys[index] == key and not self.deleted[index]:
                return index  # Found existing key
            index = (index + 1) % self.size  # Linear probing
        
        return index  # Found empty slot
    
    def put(self, key, value):
        """Insert key-value pair"""
        index = self._probe(key)
        self.keys[index] = key
        self.values[index] = value
        self.deleted[index] = False
    
    def get(self, key):
        """Get value by key"""
        index = self._hash(key)
        
        while self.keys[index] is not None:
            if self.keys[index] == key and not self.deleted[index]:
                return self.values[index]
            index = (index + 1) % self.size
        
        raise KeyError(f"Key '{key}' not found")
    
    def delete(self, key):
        """Mark key as deleted"""
        index = self._hash(key)
        
        while self.keys[index] is not None:
            if self.keys[index] == key and not self.deleted[index]:
                self.deleted[index] = True
                return
            index = (index + 1) % self.size
        
        raise KeyError(f"Key '{key}' not found")
    
    def display(self):
        """Show hash table state"""
        for i in range(self.size):
            status = "DELETED" if self.deleted[i] else "ACTIVE"
            if self.keys[i] is not None:
                print(f"Index {i}: {self.keys[i]} -> {self.values[i]} ({status})")
            else:
                print(f"Index {i}: EMPTY")

# Example usage
oht = OpenAddressingHashTable(7)

# Insert data
oht.put("apple", 5)
oht.put("banana", 7)
oht.put("cherry", 3)

oht.display()
print()

# This might cause collision and probing
oht.put("grape", 9)  
oht.display()

Lesson 4: Real-World Applications

Databases

Database indexing
Primary key lookup
Query optimization

Caching

Web page caching
Memory caches
CDN systems

Programming

Symbol tables
Variable storage
Function lookup

Security

Password storage
Digital signatures
Checksums

Practice Project: Word Counter

Count word frequency using a hash table:

Word Counter


class WordCounter:
    def __init__(self):
        self.word_count = {}
    
    def add_text(self, text):
        """Add text and count words"""
        # Clean and split text
        words = text.lower().replace(',', '').replace('.', '').split()
        
        for word in words:
            if word in self.word_count:
                self.word_count[word] += 1
            else:
                self.word_count[word] = 1
    
    def get_count(self, word):
        """Get count for specific word"""
        return self.word_count.get(word.lower(), 0)
    
    def most_common(self, n=5):
        """Get n most common words"""
        # Sort by count (descending)
        sorted_words = sorted(self.word_count.items(), 
                            key=lambda x: x[1], reverse=True)
        return sorted_words[:n]
    
    def total_words(self):
        """Get total word count"""
        return sum(self.word_count.values())
    
    def unique_words(self):
        """Get number of unique words"""
        return len(self.word_count)
    
    def display_stats(self):
        """Show word statistics"""
        print(f"Total words: {self.total_words()}")
        print(f"Unique words: {self.unique_words()}")
        print(f"\nTop 5 most common words:")
        for word, count in self.most_common():
            print(f"  '{word}': {count} times")

# Example usage
counter = WordCounter()

# Add some text
text1 = "The quick brown fox jumps over the lazy dog"
text2 = "The dog was lazy but the fox was quick"
text3 = "Quick brown animals are amazing"

counter.add_text(text1)
counter.add_text(text2)
counter.add_text(text3)

# Show statistics
counter.display_stats()

# Check specific words
print(f"\n'the' appears {counter.get_count('the')} times")
print(f"'quick' appears {counter.get_count('quick')} times")

# Advanced: Find words that appear only once
rare_words = [word for word, count in counter.word_count.items() if count == 1]
print(f"\nWords appearing only once: {rare_words}")