Hash Tables

Master fast key-value storage with O(1) operations

šŸ—‚ļø What is a Hash Table?

A Hash Table (or Hash Map) stores key-value pairs and uses a hash function to compute an index for fast access. Think of it like a dictionary where you can instantly find any word!


# Python dictionary is a hash table
student_grades = {}

# Insert (O(1) average)
student_grades["Alice"] = 95
student_grades["Bob"] = 87
student_grades["Charlie"] = 92

# Access (O(1) average)
print(student_grades["Alice"])  # 95

# Check existence
if "Bob" in student_grades:
    print(f"Bob's grade: {student_grades['Bob']}")

# Delete
del student_grades["Charlie"]
print(student_grades)  # {'Alice': 95, 'Bob': 87}
                                    
O(1)
Average Access
Key-Value
Pairs
Hash Function
Core

Lesson 1: Simple Hash Table

Create a basic hash table from scratch:

Basic Hash Table

class HashTable:
    def __init__(self, size=10):
        self.size = size
        self.table = [[] for _ in range(size)]  # List of lists for chaining
    
    def _hash(self, key):
        """Simple hash function"""
        return hash(key) % self.size
    
    def put(self, key, value):
        """Insert key-value pair"""
        index = self._hash(key)
        bucket = self.table[index]
        
        # Check if key already exists
        for i, (k, v) in enumerate(bucket):
            if k == key:
                bucket[i] = (key, value)  # Update existing
                return
        
        # Add new key-value pair
        bucket.append((key, value))
    
    def get(self, key):
        """Get value by key"""
        index = self._hash(key)
        bucket = self.table[index]
        
        for k, v in bucket:
            if k == key:
                return v
        
        raise KeyError(f"Key '{key}' not found")
    
    def delete(self, key):
        """Remove key-value pair"""
        index = self._hash(key)
        bucket = self.table[index]
        
        for i, (k, v) in enumerate(bucket):
            if k == key:
                del bucket[i]
                return
        
        raise KeyError(f"Key '{key}' not found")
    
    def display(self):
        """Show hash table contents"""
        for i, bucket in enumerate(self.table):
            if bucket:
                print(f"Bucket {i}: {bucket}")

# Example usage
ht = HashTable(5)

# Insert data
ht.put("name", "Alice")
ht.put("age", 25)
ht.put("city", "New York")

# Access data
print(ht.get("name"))  # Alice
print(ht.get("age"))   # 25

# Display structure
ht.display()

# Update value
ht.put("age", 26)
print(ht.get("age"))   # 26

Lesson 2: Hash Functions

Understanding different hash functions:

Hash Functions

def simple_hash(key, table_size):
    """Simple hash: sum of ASCII values"""
    return sum(ord(c) for c in str(key)) % table_size

def djb2_hash(key, table_size):
    """DJB2 hash algorithm - better distribution"""
    hash_value = 5381
    for char in str(key):
        hash_value = ((hash_value << 5) + hash_value) + ord(char)
    return hash_value % table_size

def polynomial_hash(key, table_size):
    """Polynomial rolling hash"""
    hash_value = 0
    p = 31  # Prime number
    for char in str(key):
        hash_value = (hash_value * p + ord(char)) % table_size
    return hash_value

# Test different hash functions
keys = ["apple", "banana", "cherry", "date", "elderberry"]
table_size = 7

print("Hash Function Comparison:")
print("Key\t\tSimple\tDJB2\tPolynomial")
print("-" * 40)

for key in keys:
    simple = simple_hash(key, table_size)
    djb2 = djb2_hash(key, table_size)
    poly = polynomial_hash(key, table_size)
    print(f"{key}\t\t{simple}\t{djb2}\t{poly}")

# Check for collisions
def check_collisions(hash_func, keys, table_size):
    """Count collisions for a hash function"""
    indices = [hash_func(key, table_size) for key in keys]
    unique_indices = len(set(indices))
    collisions = len(keys) - unique_indices
    return collisions

print(f"\nCollisions (out of {len(keys)} keys):")
print(f"Simple hash: {check_collisions(simple_hash, keys, table_size)}")
print(f"DJB2 hash: {check_collisions(djb2_hash, keys, table_size)}")
print(f"Polynomial hash: {check_collisions(polynomial_hash, keys, table_size)}")

Lesson 3: Handling Collisions

Two main approaches to handle hash collisions:

šŸ”—

Chaining

Store multiple items in same bucket using linked lists

Simple Dynamic
šŸ”

Open Addressing

Find another empty slot when collision occurs

Memory efficient Cache friendly
Open Addressing Hash Table

class OpenAddressingHashTable:
    def __init__(self, size=10):
        self.size = size
        self.keys = [None] * size
        self.values = [None] * size
        self.deleted = [False] * size  # Track deleted slots
    
    def _hash(self, key):
        return hash(key) % self.size
    
    def _probe(self, key):
        """Linear probing to find slot"""
        index = self._hash(key)
        
        while self.keys[index] is not None:
            if self.keys[index] == key and not self.deleted[index]:
                return index  # Found existing key
            index = (index + 1) % self.size  # Linear probing
        
        return index  # Found empty slot
    
    def put(self, key, value):
        """Insert key-value pair"""
        index = self._probe(key)
        self.keys[index] = key
        self.values[index] = value
        self.deleted[index] = False
    
    def get(self, key):
        """Get value by key"""
        index = self._hash(key)
        
        while self.keys[index] is not None:
            if self.keys[index] == key and not self.deleted[index]:
                return self.values[index]
            index = (index + 1) % self.size
        
        raise KeyError(f"Key '{key}' not found")
    
    def delete(self, key):
        """Mark key as deleted"""
        index = self._hash(key)
        
        while self.keys[index] is not None:
            if self.keys[index] == key and not self.deleted[index]:
                self.deleted[index] = True
                return
            index = (index + 1) % self.size
        
        raise KeyError(f"Key '{key}' not found")
    
    def display(self):
        """Show hash table state"""
        for i in range(self.size):
            status = "DELETED" if self.deleted[i] else "ACTIVE"
            if self.keys[i] is not None:
                print(f"Index {i}: {self.keys[i]} -> {self.values[i]} ({status})")
            else:
                print(f"Index {i}: EMPTY")

# Example usage
oht = OpenAddressingHashTable(7)

# Insert data
oht.put("apple", 5)
oht.put("banana", 7)
oht.put("cherry", 3)

oht.display()
print()

# This might cause collision and probing
oht.put("grape", 9)  
oht.display()

Lesson 4: Real-World Applications

Databases

  • Database indexing
  • Primary key lookup
  • Query optimization

Caching

  • Web page caching
  • Memory caches
  • CDN systems

Programming

  • Symbol tables
  • Variable storage
  • Function lookup

Security

  • Password storage
  • Digital signatures
  • Checksums

Practice Project: Word Counter

Count word frequency using a hash table:

Word Counter

class WordCounter:
    def __init__(self):
        self.word_count = {}
    
    def add_text(self, text):
        """Add text and count words"""
        # Clean and split text
        words = text.lower().replace(',', '').replace('.', '').split()
        
        for word in words:
            if word in self.word_count:
                self.word_count[word] += 1
            else:
                self.word_count[word] = 1
    
    def get_count(self, word):
        """Get count for specific word"""
        return self.word_count.get(word.lower(), 0)
    
    def most_common(self, n=5):
        """Get n most common words"""
        # Sort by count (descending)
        sorted_words = sorted(self.word_count.items(), 
                            key=lambda x: x[1], reverse=True)
        return sorted_words[:n]
    
    def total_words(self):
        """Get total word count"""
        return sum(self.word_count.values())
    
    def unique_words(self):
        """Get number of unique words"""
        return len(self.word_count)
    
    def display_stats(self):
        """Show word statistics"""
        print(f"Total words: {self.total_words()}")
        print(f"Unique words: {self.unique_words()}")
        print(f"\nTop 5 most common words:")
        for word, count in self.most_common():
            print(f"  '{word}': {count} times")

# Example usage
counter = WordCounter()

# Add some text
text1 = "The quick brown fox jumps over the lazy dog"
text2 = "The dog was lazy but the fox was quick"
text3 = "Quick brown animals are amazing"

counter.add_text(text1)
counter.add_text(text2)
counter.add_text(text3)

# Show statistics
counter.display_stats()

# Check specific words
print(f"\n'the' appears {counter.get_count('the')} times")
print(f"'quick' appears {counter.get_count('quick')} times")

# Advanced: Find words that appear only once
rare_words = [word for word, count in counter.word_count.items() if count == 1]
print(f"\nWords appearing only once: {rare_words}")

🧠 Test Your Knowledge

What is the average time complexity for hash table operations?

What happens when two keys hash to the same index?