Quick Sort Algorithm
Master the divide-and-conquer sorting algorithm that's lightning fast
⚡ What is Quick Sort?
Quick Sort is like organizing a messy room by picking a reference point and putting everything smaller on one side and everything larger on the other side, then repeating this process. It's one of the fastest sorting algorithms in practice!
# Quick Sort in action!
def quick_sort(arr):
if len(arr) <= 1:
return arr
pivot = arr[len(arr) // 2]
left = [x for x in arr if x < pivot]
middle = [x for x in arr if x == pivot]
right = [x for x in arr if x > pivot]
return quick_sort(left) + middle + quick_sort(right)
# Example
numbers = [64, 34, 25, 12, 22, 11, 90]
print("Sorted:", quick_sort(numbers))
Quick Sort Characteristics
Divide & Conquer
Breaks problem into smaller subproblems
In-Place Sorting
Can be implemented with O(log n) space
Cache Efficient
Good locality of reference
Not Stable
May change relative order of equal elements
Lesson 1: Understanding the Partitioning Process
Quick Sort works by selecting a 'pivot' element and partitioning the array around it:
Quick Sort Steps: [3, 6, 8, 10, 1, 2, 1]
def quick_sort_detailed(arr, depth=0):
"""Quick Sort with detailed steps"""
indent = " " * depth
print(f"{indent}Sorting: {arr}")
if len(arr) <= 1:
print(f"{indent}Base case reached: {arr}")
return arr
# Choose pivot (middle element)
pivot = arr[len(arr) // 2]
print(f"{indent}Chosen pivot: {pivot}")
# Partition the array
left = [x for x in arr if x < pivot]
middle = [x for x in arr if x == pivot]
right = [x for x in arr if x > pivot]
print(f"{indent}Left (< {pivot}): {left}")
print(f"{indent}Middle (= {pivot}): {middle}")
print(f"{indent}Right (> {pivot}): {right}")
# Recursively sort left and right parts
print(f"{indent}Recursively sorting left part...")
sorted_left = quick_sort_detailed(left, depth + 1)
print(f"{indent}Recursively sorting right part...")
sorted_right = quick_sort_detailed(right, depth + 1)
# Combine results
result = sorted_left + middle + sorted_right
print(f"{indent}Combined result: {result}")
return result
# Example usage
numbers = [3, 6, 8, 10, 1, 2, 1]
print("=== Quick Sort Step by Step ===")
sorted_numbers = quick_sort_detailed(numbers)
Lesson 2: Basic Implementation
Let's implement Quick Sort in different ways:
def quick_sort_simple(arr):
"""
Simple Quick Sort implementation (creates new arrays)
Time Complexity: O(n log n) average, O(n²) worst case
Space Complexity: O(n) due to new array creation
"""
# Base case: arrays with 0 or 1 element are already sorted
if len(arr) <= 1:
return arr
# Choose pivot (middle element)
pivot = arr[len(arr) // 2]
# Partition the array into three parts
left = [x for x in arr if x < pivot] # Elements less than pivot
middle = [x for x in arr if x == pivot] # Elements equal to pivot
right = [x for x in arr if x > pivot] # Elements greater than pivot
# Recursively sort left and right parts, then combine
return quick_sort_simple(left) + middle + quick_sort_simple(right)
# Test the function
test_array = [64, 34, 25, 12, 22, 11, 90]
print("Original array:", test_array)
sorted_array = quick_sort_simple(test_array)
print("Sorted array:", sorted_array)
Lesson 3: In-Place Quick Sort
The more efficient in-place version that modifies the original array:
def quick_sort_inplace(arr, low=0, high=None):
"""
In-place Quick Sort implementation
Time Complexity: O(n log n) average, O(n²) worst case
Space Complexity: O(log n) due to recursion stack
"""
if high is None:
high = len(arr) - 1
if low < high:
# Partition the array and get the pivot index
pivot_index = partition(arr, low, high)
# Recursively sort elements before and after partition
quick_sort_inplace(arr, low, pivot_index - 1)
quick_sort_inplace(arr, pivot_index + 1, high)
return arr
def partition(arr, low, high):
"""
Lomuto partition scheme
Places pivot at its correct position and returns its index
"""
# Choose the rightmost element as pivot
pivot = arr[high]
# Index of smaller element (indicates right position of pivot)
i = low - 1
for j in range(low, high):
# If current element is smaller than or equal to pivot
if arr[j] <= pivot:
i += 1 # Increment index of smaller element
arr[i], arr[j] = arr[j], arr[i] # Swap elements
# Place pivot at correct position
arr[i + 1], arr[high] = arr[high], arr[i + 1]
return i + 1
# Alternative partition implementation (Hoare partition)
def hoare_partition(arr, low, high):
"""
Hoare partition scheme (more efficient)
"""
pivot = arr[low] # Choose first element as pivot
i = low - 1
j = high + 1
while True:
# Find element on left that should be on right
i += 1
while arr[i] < pivot:
i += 1
# Find element on right that should be on left
j -= 1
while arr[j] > pivot:
j -= 1
# If elements crossed, partitioning is done
if i >= j:
return j
# Swap elements
arr[i], arr[j] = arr[j], arr[i]
def quick_sort_hoare(arr, low=0, high=None):
"""Quick Sort using Hoare partition"""
if high is None:
high = len(arr) - 1
if low < high:
pivot_index = hoare_partition(arr, low, high)
quick_sort_hoare(arr, low, pivot_index)
quick_sort_hoare(arr, pivot_index + 1, high)
return arr
# Test both implementations
test_array1 = [64, 34, 25, 12, 22, 11, 90]
test_array2 = test_array1.copy()
print("Original array:", test_array1)
print("Lomuto partition result:", quick_sort_inplace(test_array1))
print("Hoare partition result:", quick_sort_hoare(test_array2))
Lesson 4: Quick Sort Optimizations
Several techniques to make Quick Sort even faster:
🔹Median-of-Three Pivot Selection
def median_of_three(arr, low, high):
"""Choose median of first, middle, and last elements as pivot"""
mid = (low + high) // 2
# Sort the three elements
if arr[mid] < arr[low]:
arr[low], arr[mid] = arr[mid], arr[low]
if arr[high] < arr[low]:
arr[low], arr[high] = arr[high], arr[low]
if arr[high] < arr[mid]:
arr[mid], arr[high] = arr[high], arr[mid]
# Place median at the end
arr[mid], arr[high] = arr[high], arr[mid]
return arr[high]
🔹Hybrid Quick Sort (with Insertion Sort for small arrays)
def insertion_sort_range(arr, low, high):
"""Insertion sort for a range of array"""
for i in range(low + 1, high + 1):
key = arr[i]
j = i - 1
while j >= low and arr[j] > key:
arr[j + 1] = arr[j]
j -= 1
arr[j + 1] = key
def hybrid_quick_sort(arr, low=0, high=None, threshold=10):
"""
Hybrid Quick Sort that switches to Insertion Sort for small subarrays
"""
if high is None:
high = len(arr) - 1
if low < high:
# Use insertion sort for small subarrays
if high - low + 1 <= threshold:
insertion_sort_range(arr, low, high)
else:
# Use median-of-three for pivot selection
median_of_three(arr, low, high)
pivot_index = partition(arr, low, high)
hybrid_quick_sort(arr, low, pivot_index - 1, threshold)
hybrid_quick_sort(arr, pivot_index + 1, high, threshold)
return arr
🔹Three-Way Partitioning (Dutch National Flag)
def three_way_partition(arr, low, high):
"""
Three-way partitioning for arrays with many duplicate elements
Returns (lt, gt) where:
- Elements < pivot are in arr[low:lt]
- Elements = pivot are in arr[lt:gt+1]
- Elements > pivot are in arr[gt+1:high+1]
"""
pivot = arr[low]
lt = low # arr[low:lt] < pivot
i = low # arr[lt:i] = pivot
gt = high # arr[gt+1:high+1] > pivot
while i <= gt:
if arr[i] < pivot:
arr[lt], arr[i] = arr[i], arr[lt]
lt += 1
i += 1
elif arr[i] > pivot:
arr[i], arr[gt] = arr[gt], arr[i]
gt -= 1
# Don't increment i here as we need to check the swapped element
else:
i += 1
return lt, gt
def three_way_quick_sort(arr, low=0, high=None):
"""Quick Sort with three-way partitioning"""
if high is None:
high = len(arr) - 1
if low < high:
lt, gt = three_way_partition(arr, low, high)
three_way_quick_sort(arr, low, lt - 1)
three_way_quick_sort(arr, gt + 1, high)
return arr
🔹Iterative Quick Sort (avoids recursion)
def iterative_quick_sort(arr):
"""
Iterative implementation of Quick Sort using a stack
Avoids recursion overhead and stack overflow for large arrays
"""
if len(arr) <= 1:
return arr
# Create a stack to store start and end indices
stack = [(0, len(arr) - 1)]
while stack:
low, high = stack.pop()
if low < high:
# Partition the array
pivot_index = partition(arr, low, high)
# Push left and right subarrays to stack
stack.append((low, pivot_index - 1))
stack.append((pivot_index + 1, high))
return arr
Lesson 5: Performance Analysis
Understanding Quick Sort's performance characteristics:
import time
import random
import sys
# Increase recursion limit for large arrays
sys.setrecursionlimit(10000)
def benchmark_quick_sort():
"""Compare different Quick Sort implementations"""
def create_test_data(size, data_type):
"""Create different types of test data"""
if data_type == "random":
return [random.randint(1, 1000) for _ in range(size)]
elif data_type == "sorted":
return list(range(size))
elif data_type == "reverse":
return list(range(size, 0, -1))
elif data_type == "duplicates":
return [random.randint(1, 10) for _ in range(size)]
elif data_type == "nearly_sorted":
arr = list(range(size))
# Swap 5% of elements
for _ in range(size // 20):
i, j = random.randint(0, size-1), random.randint(0, size-1)
arr[i], arr[j] = arr[j], arr[i]
return arr
algorithms = {
"Simple Quick Sort": quick_sort_simple,
"In-place Quick Sort": lambda arr: quick_sort_inplace(arr.copy()),
"Hybrid Quick Sort": lambda arr: hybrid_quick_sort(arr.copy()),
"Three-way Quick Sort": lambda arr: three_way_quick_sort(arr.copy()),
"Iterative Quick Sort": lambda arr: iterative_quick_sort(arr.copy())
}
test_cases = ["random", "sorted", "reverse", "duplicates", "nearly_sorted"]
sizes = [100, 1000, 5000]
for size in sizes:
print(f"\n{'='*60}")
print(f"Array Size: {size}")
print(f"{'='*60}")
for case in test_cases:
print(f"\n{case.replace('_', ' ').title()} Data:")
print("-" * 40)
test_data = create_test_data(size, case)
for name, algorithm in algorithms.items():
try:
start_time = time.time()
algorithm(test_data.copy())
end_time = time.time()
print(f"{name:20}: {(end_time - start_time)*1000:6.2f} ms")
except RecursionError:
print(f"{name:20}: STACK OVERFLOW")
except Exception as e:
print(f"{name:20}: ERROR - {str(e)}")
def analyze_pivot_strategies():
"""Compare different pivot selection strategies"""
def count_comparisons_partition(arr, low, high, pivot_strategy="last"):
"""Count comparisons in partition with different pivot strategies"""
comparisons = 0
if pivot_strategy == "first":
arr[low], arr[high] = arr[high], arr[low]
elif pivot_strategy == "middle":
mid = (low + high) // 2
arr[mid], arr[high] = arr[high], arr[mid]
elif pivot_strategy == "median_of_three":
median_of_three(arr, low, high)
# "last" is default - no change needed
pivot = arr[high]
i = low - 1
for j in range(low, high):
comparisons += 1
if arr[j] <= pivot:
i += 1
arr[i], arr[j] = arr[j], arr[i]
arr[i + 1], arr[high] = arr[high], arr[i + 1]
return i + 1, comparisons
# Test different pivot strategies
test_arrays = {
"Random": [random.randint(1, 100) for _ in range(20)],
"Sorted": list(range(20)),
"Reverse": list(range(20, 0, -1))
}
strategies = ["first", "last", "middle", "median_of_three"]
print("\n=== Pivot Strategy Comparison ===")
for array_type, arr in test_arrays.items():
print(f"\n{array_type} Array: {arr[:10]}...")
for strategy in strategies:
test_arr = arr.copy()
_, comparisons = count_comparisons_partition(test_arr, 0, len(test_arr)-1, strategy)
print(f" {strategy:15}: {comparisons:2d} comparisons")
# Run benchmarks
benchmark_quick_sort()
analyze_pivot_strategies()
Practical Applications
Best Use Cases
- Large datasets (n > 1000)
- General-purpose sorting
- When average performance matters
- Memory-constrained environments
Real-World Examples
- Database query optimization
- Operating system scheduling
- Graphics rendering pipelines
- Search engine indexing
Advantages
- Fast average performance
- In-place sorting possible
- Cache-efficient
- Widely used and optimized
Disadvantages
- O(n²) worst case
- Not stable
- Poor performance on sorted data
- Recursive (stack overflow risk)
Practice Project: File System Organizer
Let's create a program that organizes files using Quick Sort:
import os
from datetime import datetime
import random
class FileInfo:
def __init__(self, name, size, extension, date_modified):
self.name = name
self.size = size # in bytes
self.extension = extension.lower()
self.date_modified = date_modified
def __str__(self):
size_str = self.format_size(self.size)
date_str = self.date_modified.strftime("%Y-%m-%d %H:%M")
return f"{self.name:20} | {size_str:>8} | {self.extension:>5} | {date_str}"
def format_size(self, size_bytes):
"""Convert bytes to human readable format"""
if size_bytes == 0:
return "0 B"
size_names = ["B", "KB", "MB", "GB"]
i = 0
while size_bytes >= 1024 and i < len(size_names) - 1:
size_bytes /= 1024.0
i += 1
return f"{size_bytes:.1f} {size_names[i]}"
def __repr__(self):
return self.__str__()
class FileSystemOrganizer:
def __init__(self):
self.files = []
def add_file(self, file_info):
"""Add a file to the organizer"""
self.files.append(file_info)
def quick_sort_by_name(self, files=None, low=0, high=None):
"""Sort files by name using Quick Sort"""
if files is None:
files = self.files
if high is None:
high = len(files) - 1
if low < high:
pivot_index = self._partition_by_name(files, low, high)
self.quick_sort_by_name(files, low, pivot_index - 1)
self.quick_sort_by_name(files, pivot_index + 1, high)
return files
def _partition_by_name(self, files, low, high):
"""Partition files by name"""
pivot = files[high].name.lower()
i = low - 1
for j in range(low, high):
if files[j].name.lower() <= pivot:
i += 1
files[i], files[j] = files[j], files[i]
files[i + 1], files[high] = files[high], files[i + 1]
return i + 1
def quick_sort_by_size(self, files=None, low=0, high=None, ascending=True):
"""Sort files by size using Quick Sort"""
if files is None:
files = self.files
if high is None:
high = len(files) - 1
if low < high:
pivot_index = self._partition_by_size(files, low, high, ascending)
self.quick_sort_by_size(files, low, pivot_index - 1, ascending)
self.quick_sort_by_size(files, pivot_index + 1, high, ascending)
return files
def _partition_by_size(self, files, low, high, ascending):
"""Partition files by size"""
pivot = files[high].size
i = low - 1
for j in range(low, high):
condition = files[j].size <= pivot if ascending else files[j].size >= pivot
if condition:
i += 1
files[i], files[j] = files[j], files[i]
files[i + 1], files[high] = files[high], files[i + 1]
return i + 1
def quick_sort_by_extension(self, files=None):
"""Sort files by extension using three-way partitioning"""
if files is None:
files = self.files.copy()
return self._three_way_quick_sort_ext(files, 0, len(files) - 1)
def _three_way_quick_sort_ext(self, files, low, high):
"""Three-way Quick Sort for extensions (handles duplicates well)"""
if low < high:
lt, gt = self._three_way_partition_ext(files, low, high)
self._three_way_quick_sort_ext(files, low, lt - 1)
self._three_way_quick_sort_ext(files, gt + 1, high)
return files
def _three_way_partition_ext(self, files, low, high):
"""Three-way partition by extension"""
pivot = files[low].extension
lt = low
i = low
gt = high
while i <= gt:
if files[i].extension < pivot:
files[lt], files[i] = files[i], files[lt]
lt += 1
i += 1
elif files[i].extension > pivot:
files[i], files[gt] = files[gt], files[i]
gt -= 1
else:
i += 1
return lt, gt
def group_by_extension(self):
"""Group files by extension"""
sorted_files = self.quick_sort_by_extension()
groups = {}
for file in sorted_files:
if file.extension not in groups:
groups[file.extension] = []
groups[file.extension].append(file)
return groups
def display_files(self, files=None, title="Files"):
"""Display files in a formatted table"""
if files is None:
files = self.files
print(f"\n📁 {title}")
print("=" * 70)
print(f"{'Name':20} | {'Size':>8} | {'Ext':>5} | {'Modified'}")
print("-" * 70)
for file in files:
print(file)
print(f"\nTotal files: {len(files)}")
def create_sample_files():
"""Create sample file data for demonstration"""
file_names = [
"document.pdf", "image.jpg", "script.py", "data.csv", "photo.png",
"report.docx", "music.mp3", "video.mp4", "archive.zip", "config.json",
"style.css", "index.html", "app.js", "database.db", "backup.tar.gz",
"presentation.pptx", "spreadsheet.xlsx", "readme.txt", "logo.svg", "font.ttf"
]
files = []
base_date = datetime(2024, 1, 1)
for name in file_names:
# Random file size between 1KB and 10MB
size = random.randint(1024, 10 * 1024 * 1024)
# Extract extension
extension = name.split('.')[-1] if '.' in name else ''
# Random modification date
days_offset = random.randint(0, 365)
date_modified = datetime(2024, 1, 1 + days_offset % 28,
random.randint(0, 23), random.randint(0, 59))
files.append(FileInfo(name, size, extension, date_modified))
return files
def main():
# Create file system organizer
organizer = FileSystemOrganizer()
# Add sample files
sample_files = create_sample_files()
for file in sample_files:
organizer.add_file(file)
# Display original files
organizer.display_files(title="Original File List")
# Sort by name
print("\n" + "="*70)
print("SORTING BY NAME")
print("="*70)
name_sorted = organizer.quick_sort_by_name(organizer.files.copy())
organizer.display_files(name_sorted, "Files Sorted by Name")
# Sort by size (largest first)
print("\n" + "="*70)
print("SORTING BY SIZE")
print("="*70)
size_sorted = organizer.quick_sort_by_size(organizer.files.copy(), ascending=False)
organizer.display_files(size_sorted, "Files Sorted by Size (Largest First)")
# Group by extension
print("\n" + "="*70)
print("GROUPING BY EXTENSION")
print("="*70)
groups = organizer.group_by_extension()
for ext, files in groups.items():
ext_display = ext.upper() if ext else "NO EXTENSION"
organizer.display_files(files, f"{ext_display} Files")
# Statistics
print("\n" + "="*70)
print("FILE STATISTICS")
print("="*70)
total_size = sum(file.size for file in organizer.files)
avg_size = total_size / len(organizer.files)
print(f"Total files: {len(organizer.files)}")
print(f"Total size: {FileInfo('', 0, '', datetime.now()).format_size(total_size)}")
print(f"Average size: {FileInfo('', 0, '', datetime.now()).format_size(avg_size)}")
print(f"File types: {len(groups)}")
# Show largest and smallest files
size_sorted = organizer.quick_sort_by_size(organizer.files.copy(), ascending=False)
print(f"\nLargest file: {size_sorted[0].name} ({size_sorted[0].format_size(size_sorted[0].size)})")
print(f"Smallest file: {size_sorted[-1].name} ({size_sorted[-1].format_size(size_sorted[-1].size)})")
# Run the file system organizer
main()