Counting Sort Algorithm
Master the linear-time sorting algorithm for integers with limited range
🔢 What is Counting Sort?
Counting Sort is like counting votes in an election! Instead of comparing elements, it counts how many times each value appears, then uses these counts to place elements in their correct positions. It's incredibly fast for the right type of data!
# Counting Sort in action!
def counting_sort(arr):
if not arr:
return arr
# Find the range of input
max_val = max(arr)
min_val = min(arr)
range_val = max_val - min_val + 1
# Create count array
count = [0] * range_val
# Count occurrences
for num in arr:
count[num - min_val] += 1
# Build sorted array
result = []
for i in range(range_val):
result.extend([i + min_val] * count[i])
return result
# Example
numbers = [4, 2, 2, 8, 3, 3, 1]
print("Sorted:", counting_sort(numbers))
Counting Sort Characteristics
Linear Time
O(n+k) time complexity - faster than comparison sorts
Stable Sorting
Maintains relative order of equal elements
Integer-Based
Works with integers in a known range
Non-Comparison
Doesn't compare elements directly
Lesson 1: Understanding the Counting Process
Counting Sort works by counting the frequency of each element and using these counts to determine positions:
Counting Sort Steps: [4, 2, 2, 8, 3, 3, 1]
def counting_sort_detailed(arr):
"""Counting Sort with detailed steps"""
print(f"Input array: {arr}")
if not arr:
return arr
# Step 1: Find the range of input
max_val = max(arr)
min_val = min(arr)
range_val = max_val - min_val + 1
print(f"Range: {min_val} to {max_val} (size: {range_val})")
# Step 2: Create and initialize count array
count = [0] * range_val
print(f"Initial count array: {count}")
# Step 3: Count occurrences of each element
print("\nCounting occurrences:")
for num in arr:
count[num - min_val] += 1
print(f" Found {num}, count[{num - min_val}] = {count[num - min_val]}")
print(f"Final count array: {count}")
# Step 4: Build the sorted array
print("\nBuilding sorted array:")
result = []
for i in range(range_val):
value = i + min_val
frequency = count[i]
if frequency > 0:
print(f" Adding {frequency} copies of {value}")
result.extend([value] * frequency)
print(f"Sorted array: {result}")
return result
# Example usage
numbers = [4, 2, 2, 8, 3, 3, 1]
print("=== Counting Sort Step by Step ===")
sorted_numbers = counting_sort_detailed(numbers)
Lesson 2: Basic Implementation
Let's implement Counting Sort step by step:
def counting_sort(arr):
"""
Simple Counting Sort implementation
Time Complexity: O(n + k) where k is the range of input
Space Complexity: O(k)
"""
if not arr:
return arr
# Find the range of the input
max_val = max(arr)
min_val = min(arr)
range_val = max_val - min_val + 1
# Create count array to store count of each element
count = [0] * range_val
# Store count of each element
for num in arr:
count[num - min_val] += 1
# Build the sorted array
result = []
for i in range(range_val):
# Add each element count[i] times
result.extend([i + min_val] * count[i])
return result
# Test the function
test_array = [4, 2, 2, 8, 3, 3, 1]
print("Original array:", test_array)
sorted_array = counting_sort(test_array)
print("Sorted array:", sorted_array)
Lesson 3: Stable Counting Sort
A stable version that preserves the relative order of equal elements:
def stable_counting_sort(arr):
"""
Stable Counting Sort implementation
Preserves the relative order of equal elements
"""
if not arr:
return arr
# Find the range of the input
max_val = max(arr)
min_val = min(arr)
range_val = max_val - min_val + 1
# Create count array
count = [0] * range_val
# Store count of each element
for num in arr:
count[num - min_val] += 1
# Transform count array to store actual positions
# count[i] now contains the number of elements <= i
for i in range(1, range_val):
count[i] += count[i - 1]
# Build the result array
result = [0] * len(arr)
# Process elements from right to left to maintain stability
for i in range(len(arr) - 1, -1, -1):
element = arr[i]
position = count[element - min_val] - 1
result[position] = element
count[element - min_val] -= 1
return result
def counting_sort_inplace(arr):
"""
In-place version of counting sort (modifies original array)
"""
if not arr:
return arr
max_val = max(arr)
min_val = min(arr)
range_val = max_val - min_val + 1
# Create count array
count = [0] * range_val
# Count occurrences
for num in arr:
count[num - min_val] += 1
# Reconstruct the array
index = 0
for i in range(range_val):
while count[i] > 0:
arr[index] = i + min_val
index += 1
count[i] -= 1
return arr
# Test both implementations
test_array1 = [4, 2, 2, 8, 3, 3, 1]
test_array2 = test_array1.copy()
print("Original array:", test_array1)
print("Stable counting sort:", stable_counting_sort(test_array1))
print("In-place counting sort:", counting_sort_inplace(test_array2))
Lesson 4: Counting Sort Variations
Different variations of Counting Sort for specific use cases:
🔹Counting Sort for Characters
def counting_sort_chars(text):
"""Counting sort for characters (case-sensitive)"""
if not text:
return text
# ASCII range for printable characters (32-126)
count = [0] * 95 # 95 printable ASCII characters
# Count occurrences
for char in text:
count[ord(char) - 32] += 1
# Build sorted string
result = []
for i in range(95):
if count[i] > 0:
result.extend([chr(i + 32)] * count[i])
return ''.join(result)
# Example
text = "hello world!"
print(f"Original: '{text}'")
print(f"Sorted: '{counting_sort_chars(text)}'")
🔹Counting Sort for Objects
class Student:
def __init__(self, name, grade):
self.name = name
self.grade = grade # Grade from 0-100
def __str__(self):
return f"{self.name}: {self.grade}"
def __repr__(self):
return self.__str__()
def counting_sort_students(students):
"""Sort students by grade using counting sort"""
if not students:
return students
# Grades range from 0 to 100
count = [[] for _ in range(101)]
# Group students by grade
for student in students:
count[student.grade].append(student)
# Build sorted list
result = []
for grade_list in count:
result.extend(grade_list)
return result
# Example
students = [
Student("Alice", 85),
Student("Bob", 92),
Student("Charlie", 78),
Student("Diana", 92), # Same grade as Bob
Student("Eve", 85) # Same grade as Alice
]
print("Original students:")
for student in students:
print(f" {student}")
sorted_students = counting_sort_students(students)
print("\nSorted by grade:")
for student in sorted_students:
print(f" {student}")
🔹Counting Sort with Negative Numbers
def counting_sort_with_negatives(arr):
"""Counting sort that handles negative numbers"""
if not arr:
return arr
max_val = max(arr)
min_val = min(arr)
range_val = max_val - min_val + 1
# Create count array
count = [0] * range_val
# Count occurrences (shift by min_val to handle negatives)
for num in arr:
count[num - min_val] += 1
# Build sorted array
result = []
for i in range(range_val):
if count[i] > 0:
result.extend([i + min_val] * count[i])
return result
# Example with negative numbers
numbers = [-5, -1, 0, 3, -2, 1, -1, 4]
print(f"Original: {numbers}")
print(f"Sorted: {counting_sort_with_negatives(numbers)}")
🔹Counting Sort for Frequency Analysis
def frequency_analysis(arr):
"""Analyze frequency of elements using counting sort approach"""
if not arr:
return {}
max_val = max(arr)
min_val = min(arr)
range_val = max_val - min_val + 1
# Count frequencies
count = [0] * range_val
for num in arr:
count[num - min_val] += 1
# Create frequency dictionary
frequency = {}
for i in range(range_val):
if count[i] > 0:
frequency[i + min_val] = count[i]
return frequency
def most_frequent_elements(arr, k=3):
"""Find k most frequent elements"""
freq = frequency_analysis(arr)
# Sort by frequency (descending)
sorted_freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)
return sorted_freq[:k]
# Example
data = [1, 3, 2, 3, 4, 1, 3, 2, 5, 3, 1]
print(f"Data: {data}")
print(f"Frequencies: {frequency_analysis(data)}")
print(f"Top 3 most frequent: {most_frequent_elements(data, 3)}")
Lesson 5: When to Use Counting Sort
Understanding the trade-offs and optimal use cases:
import time
import random
def analyze_counting_sort_efficiency():
"""Analyze when counting sort is efficient vs inefficient"""
def measure_time(sort_func, arr):
start = time.time()
sort_func(arr.copy())
return (time.time() - start) * 1000 # Convert to milliseconds
# Comparison sorting algorithm for reference
def quick_sort(arr):
if len(arr) <= 1:
return arr
pivot = arr[len(arr) // 2]
left = [x for x in arr if x < pivot]
middle = [x for x in arr if x == pivot]
right = [x for x in arr if x > pivot]
return quick_sort(left) + middle + quick_sort(right)
print("=== Counting Sort Efficiency Analysis ===")
print(f"{'Scenario':25} | {'Array Size':>10} | {'Range':>8} | {'Counting':>10} | {'Quick':>10} | {'Winner':>10}")
print("-" * 85)
scenarios = [
("Small range, large n", 10000, 100),
("Medium range, large n", 10000, 1000),
("Large range, large n", 10000, 10000),
("Huge range, large n", 10000, 100000),
("Small range, small n", 100, 10),
("Ages (0-120)", 1000, 121),
("Grades (0-100)", 1000, 101),
("ASCII chars (32-126)", 1000, 95)
]
for scenario_name, n, k in scenarios:
# Generate test data
if "ASCII" in scenario_name:
# Generate random ASCII characters
test_data = [random.randint(32, 126) for _ in range(n)]
else:
test_data = [random.randint(0, k-1) for _ in range(n)]
# Measure counting sort
counting_time = measure_time(counting_sort, test_data)
# Measure quick sort
quick_time = measure_time(quick_sort, test_data)
# Determine winner
winner = "Counting" if counting_time < quick_time else "Quick"
print(f"{scenario_name:25} | {n:10d} | {k:8d} | {counting_time:8.2f}ms | {quick_time:8.2f}ms | {winner:>10}")
def memory_usage_analysis():
"""Analyze memory usage of counting sort"""
import sys
print("\n=== Memory Usage Analysis ===")
print(f"{'Range (k)':>10} | {'Count Array':>12} | {'Input Size (n)':>15} | {'Efficiency':>12}")
print("-" * 60)
ranges = [10, 100, 1000, 10000, 100000, 1000000]
input_size = 1000
for k in ranges:
# Memory for count array (assuming 4 bytes per integer)
count_memory = k * 4 # bytes
input_memory = input_size * 4 # bytes
efficiency = "Good" if count_memory <= input_memory else "Poor"
if count_memory > input_memory * 10:
efficiency = "Very Poor"
print(f"{k:10d} | {count_memory:10d}B | {input_memory:13d}B | {efficiency:>12}")
def practical_use_cases():
"""Demonstrate practical use cases for counting sort"""
print("\n=== Practical Use Cases ===")
# Use Case 1: Sorting exam grades
print("\n1. Sorting Exam Grades (0-100):")
grades = [85, 92, 78, 95, 88, 76, 92, 85, 90, 82]
print(f" Original grades: {grades}")
sorted_grades = counting_sort(grades)
print(f" Sorted grades: {sorted_grades}")
# Use Case 2: Age distribution
print("\n2. Age Distribution (0-120):")
ages = [25, 30, 22, 35, 28, 45, 30, 25, 40, 32]
print(f" Original ages: {ages}")
sorted_ages = counting_sort(ages)
print(f" Sorted ages: {sorted_ages}")
# Use Case 3: Character frequency in text
print("\n3. Character Sorting:")
text = "hello world"
chars = list(text)
print(f" Original: '{text}'")
sorted_chars = counting_sort_chars(text)
print(f" Sorted: '{sorted_chars}'")
# Use Case 4: Small integer arrays
print("\n4. Small Integer Arrays (1-10):")
small_ints = [3, 1, 4, 1, 5, 9, 2, 6, 5, 3]
print(f" Original: {small_ints}")
sorted_ints = counting_sort(small_ints)
print(f" Sorted: {sorted_ints}")
def when_not_to_use_counting_sort():
"""Examples of when counting sort is not suitable"""
print("\n=== When NOT to Use Counting Sort ===")
# Case 1: Large range
print("\n1. Large Range Problem:")
large_range = [1, 1000000, 2, 999999]
print(f" Array: {large_range}")
print(f" Range: {max(large_range) - min(large_range) + 1}")
print(" ❌ Would need 1,000,000 count array elements for 4 numbers!")
# Case 2: Floating point numbers
print("\n2. Floating Point Numbers:")
floats = [3.14, 2.71, 1.41, 3.14159]
print(f" Array: {floats}")
print(" ❌ Counting sort only works with integers!")
# Case 3: Strings
print("\n3. String Sorting:")
strings = ["apple", "banana", "cherry", "date"]
print(f" Array: {strings}")
print(" ❌ Counting sort doesn't work with variable-length strings!")
# Case 4: Sparse data
print("\n4. Sparse Data:")
sparse = [1, 1000, 2000, 3000]
print(f" Array: {sparse}")
print(f" Range: {max(sparse) - min(sparse) + 1}")
print(" ❌ Wastes memory on unused count array positions!")
# Run all analyses
analyze_counting_sort_efficiency()
memory_usage_analysis()
practical_use_cases()
when_not_to_use_counting_sort()
Practical Applications
Perfect Use Cases
- Sorting grades (0-100)
- Age distribution (0-120)
- Character frequency analysis
- Small integer ranges
Real-World Examples
- Student grade processing
- Survey response analysis
- Histogram generation
- Bucket sorting preprocessing
Advantages
- Linear time complexity O(n+k)
- Stable sorting algorithm
- No element comparisons needed
- Simple to implement
Limitations
- Only works with integers
- Requires known range
- Memory usage proportional to range
- Inefficient for large ranges
Practice Project: Grade Management System
Let's create a system that manages student grades using Counting Sort:
import random
from collections import defaultdict
class Student:
def __init__(self, student_id, name, grade):
self.student_id = student_id
self.name = name
self.grade = grade # Grade from 0-100
def get_letter_grade(self):
"""Convert numeric grade to letter grade"""
if self.grade >= 90:
return 'A'
elif self.grade >= 80:
return 'B'
elif self.grade >= 70:
return 'C'
elif self.grade >= 60:
return 'D'
else:
return 'F'
def __str__(self):
return f"ID: {self.student_id:03d} | {self.name:15} | {self.grade:3d} ({self.get_letter_grade()})"
def __repr__(self):
return self.__str__()
class GradeManager:
def __init__(self):
self.students = []
def add_student(self, student):
"""Add a student to the system"""
self.students.append(student)
def counting_sort_by_grade(self, students=None, ascending=True):
"""Sort students by grade using counting sort"""
if students is None:
students = self.students.copy()
if not students:
return students
# Grades range from 0 to 100
count = [[] for _ in range(101)]
# Group students by grade
for student in students:
count[student.grade].append(student)
# Build sorted list
result = []
grade_range = range(101) if ascending else range(100, -1, -1)
for grade in grade_range:
result.extend(count[grade])
return result
def get_grade_distribution(self):
"""Get distribution of grades"""
if not self.students:
return {}
# Count grades using counting sort approach
count = [0] * 101
for student in self.students:
count[student.grade] += 1
# Create distribution dictionary
distribution = {}
for grade in range(101):
if count[grade] > 0:
distribution[grade] = count[grade]
return distribution
def get_letter_grade_distribution(self):
"""Get distribution by letter grades"""
letter_counts = defaultdict(int)
for student in self.students:
letter_grade = student.get_letter_grade()
letter_counts[letter_grade] += 1
return dict(letter_counts)
def get_statistics(self):
"""Calculate grade statistics"""
if not self.students:
return {}
grades = [student.grade for student in self.students]
# Use counting sort to get sorted grades
sorted_grades = self.counting_sort_grades_only(grades)
n = len(sorted_grades)
stats = {
'count': n,
'min': sorted_grades[0],
'max': sorted_grades[-1],
'mean': sum(sorted_grades) / n,
'median': sorted_grades[n // 2] if n % 2 == 1 else (sorted_grades[n // 2 - 1] + sorted_grades[n // 2]) / 2
}
return stats
def counting_sort_grades_only(self, grades):
"""Sort just the grade values using counting sort"""
if not grades:
return grades
# Count occurrences
count = [0] * 101
for grade in grades:
count[grade] += 1
# Build sorted array
result = []
for grade in range(101):
result.extend([grade] * count[grade])
return result
def find_students_by_grade_range(self, min_grade, max_grade):
"""Find students within a grade range"""
# Sort students by grade first
sorted_students = self.counting_sort_by_grade()
# Find students in range
result = []
for student in sorted_students:
if min_grade <= student.grade <= max_grade:
result.append(student)
elif student.grade > max_grade:
break # Since sorted, no more matches
return result
def get_top_performers(self, n=5):
"""Get top n performing students"""
sorted_students = self.counting_sort_by_grade(ascending=False)
return sorted_students[:n]
def get_students_needing_help(self, threshold=60):
"""Get students below threshold grade"""
return self.find_students_by_grade_range(0, threshold - 1)
def display_students(self, students=None, title="Students"):
"""Display students in a formatted table"""
if students is None:
students = self.students
print(f"\n📊 {title}")
print("=" * 60)
print(f"{'ID':>3} | {'Name':15} | {'Grade':>5} | {'Letter':>6}")
print("-" * 60)
for student in students:
print(f"{student.student_id:3d} | {student.name:15} | {student.grade:5d} | {student.get_letter_grade():>6}")
print(f"\nTotal students: {len(students)}")
def display_grade_histogram(self):
"""Display a simple histogram of grades"""
distribution = self.get_grade_distribution()
print(f"\n📈 Grade Distribution Histogram")
print("=" * 50)
# Group by 10-point ranges for better visualization
ranges = {}
for grade, count in distribution.items():
range_key = (grade // 10) * 10
ranges[range_key] = ranges.get(range_key, 0) + count
for range_start in sorted(ranges.keys()):
range_end = min(range_start + 9, 100)
count = ranges[range_start]
bar = "█" * count
print(f"{range_start:2d}-{range_end:2d}: {bar} ({count})")
def create_sample_students():
"""Create sample student data"""
names = [
"Alice Johnson", "Bob Smith", "Charlie Brown", "Diana Prince",
"Eve Wilson", "Frank Miller", "Grace Lee", "Henry Davis",
"Ivy Chen", "Jack Taylor", "Kate Anderson", "Liam Garcia",
"Mia Rodriguez", "Noah Martinez", "Olivia Thompson", "Paul White",
"Quinn Harris", "Ruby Clark", "Sam Lewis", "Tina Walker"
]
students = []
for i, name in enumerate(names, 1):
# Generate realistic grade distribution
# 70% of students get 70-100, 20% get 50-69, 10% get 0-49
rand = random.random()
if rand < 0.7:
grade = random.randint(70, 100)
elif rand < 0.9:
grade = random.randint(50, 69)
else:
grade = random.randint(0, 49)
students.append(Student(i, name, grade))
return students
def main():
# Create grade manager
manager = GradeManager()
# Add sample students
sample_students = create_sample_students()
for student in sample_students:
manager.add_student(student)
# Display original list
manager.display_students(title="Original Student List")
# Sort by grade (ascending)
print("\n" + "="*60)
print("SORTING BY GRADE (ASCENDING)")
print("="*60)
grade_sorted_asc = manager.counting_sort_by_grade(ascending=True)
manager.display_students(grade_sorted_asc, "Students Sorted by Grade (Low to High)")
# Sort by grade (descending)
print("\n" + "="*60)
print("SORTING BY GRADE (DESCENDING)")
print("="*60)
grade_sorted_desc = manager.counting_sort_by_grade(ascending=False)
manager.display_students(grade_sorted_desc, "Students Sorted by Grade (High to Low)")
# Display statistics
print("\n" + "="*60)
print("GRADE STATISTICS")
print("="*60)
stats = manager.get_statistics()
print(f"Total Students: {stats['count']}")
print(f"Lowest Grade: {stats['min']}")
print(f"Highest Grade: {stats['max']}")
print(f"Average Grade: {stats['mean']:.1f}")
print(f"Median Grade: {stats['median']:.1f}")
# Letter grade distribution
print("\n📋 Letter Grade Distribution:")
letter_dist = manager.get_letter_grade_distribution()
for letter in ['A', 'B', 'C', 'D', 'F']:
count = letter_dist.get(letter, 0)
percentage = (count / len(manager.students)) * 100
print(f" {letter}: {count:2d} students ({percentage:4.1f}%)")
# Display histogram
manager.display_grade_histogram()
# Top performers
print("\n" + "="*60)
print("TOP PERFORMERS")
print("="*60)
top_students = manager.get_top_performers(5)
manager.display_students(top_students, "Top 5 Students")
# Students needing help
print("\n" + "="*60)
print("STUDENTS NEEDING HELP")
print("="*60)
struggling_students = manager.get_students_needing_help(70)
if struggling_students:
manager.display_students(struggling_students, "Students Below 70%")
else:
print("🎉 All students are performing well!")
# Grade range analysis
print("\n" + "="*60)
print("GRADE RANGE ANALYSIS")
print("="*60)
ranges = [
(90, 100, "Excellent (A)"),
(80, 89, "Good (B)"),
(70, 79, "Satisfactory (C)"),
(60, 69, "Needs Improvement (D)"),
(0, 59, "Failing (F)")
]
for min_grade, max_grade, description in ranges:
students_in_range = manager.find_students_by_grade_range(min_grade, max_grade)
count = len(students_in_range)
percentage = (count / len(manager.students)) * 100
print(f"{description:20}: {count:2d} students ({percentage:4.1f}%)")
# Run the grade management system
main()