Rust Benchmarks

⚡ What are Benchmarks?

Benchmarks measure code performance by timing execution and providing statistical analysis. They help identify bottlenecks, compare algorithms, and ensure optimizations actually improve performance in Rust applications.


use criterion::{black_box, criterion_group, criterion_main, Criterion};

fn fibonacci_benchmark(c: &mut Criterion) {
    c.bench_function("fib 20", |b| b.iter(|| fibonacci(black_box(20))));
}

Benchmark Tools

📊

Criterion.rs

Statistical benchmarking library

[dev-dependencies]
criterion = "0.5"

🔬

Built-in Bencher

Nightly Rust benchmarking

#[bench]
fn bench_add(b: &mut Bencher) {
    b.iter(|| add(2, 3));
}

⏱️

Instant Timing

Manual timing measurements

let start = Instant::now();
expensive_function();
let duration = start.elapsed();

📈

Profiling Tools

Advanced performance analysis

cargo install flamegraph
cargo flamegraph

🔹 Setting up Criterion

Add Criterion to your project for statistical benchmarking:

🔸 Cargo.toml

[dev-dependencies]
criterion = { version = "0.5", features = ["html_reports"] }

[[bench]]
name = "my_benchmark"
harness = false

🔸 benches/my_benchmark.rs

use criterion::{black_box, criterion_group, criterion_main, Criterion};

fn fibonacci(n: u64) -> u64 {
    match n {
        0 => 1,
        1 => 1,
        n => fibonacci(n-1) + fibonacci(n-2),
    }
}

fn fibonacci_iterative(n: u64) -> u64 {
    let mut a = 0;
    let mut b = 1;
    for _ in 0..n {
        let temp = a;
        a = b;
        b = temp + b;
    }
    b
}

fn criterion_benchmark(c: &mut Criterion) {
    c.bench_function("fib recursive 20", |b| {
        b.iter(|| fibonacci(black_box(20)))
    });
    
    c.bench_function("fib iterative 20", |b| {
        b.iter(|| fibonacci_iterative(black_box(20)))
    });
}

criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);

🔹 Running Benchmarks

Execute and analyze benchmark results:

# Run all benchmarks
cargo bench

# Run specific benchmark
cargo bench fibonacci

# Generate HTML reports
cargo bench -- --output-format html

# Save baseline for comparison
cargo bench -- --save-baseline my_baseline

# Compare with baseline
cargo bench -- --baseline my_baseline

Sample Output:

fib recursive 20 time: [26.029 µs 26.251 µs 26.505 µs]

fib iterative 20 time: [21.018 ns 21.098 ns 21.195 ns]

Performance difference: 1241x faster

🔹 Advanced Benchmarking

Compare multiple implementations and configurations:

use criterion::{BenchmarkId, Criterion, Throughput};

fn bench_sorting_algorithms(c: &mut Criterion) {
    let mut group = c.benchmark_group("sorting");
    
    for size in [100, 1000, 10000].iter() {
        let mut data: Vec<i32> = (0..*size).collect();
        data.reverse(); // Worst case for some algorithms
        
        group.throughput(Throughput::Elements(*size as u64));
        
        group.bench_with_input(
            BenchmarkId::new("bubble_sort", size),
            size,
            |b, &size| {
                b.iter_with_setup(
                    || data.clone(),
                    |mut data| bubble_sort(&mut data)
                )
            },
        );
        
        group.bench_with_input(
            BenchmarkId::new("quick_sort", size),
            size,
            |b, &size| {
                b.iter_with_setup(
                    || data.clone(),
                    |mut data| data.sort_unstable()
                )
            },
        );
    }
    
    group.finish();
}

🔹 Memory Benchmarking

Measure memory usage and allocations:

use std::alloc::{GlobalAlloc, Layout, System};
use std::sync::atomic::{AtomicUsize, Ordering};

struct CountingAllocator;

static ALLOCATED: AtomicUsize = AtomicUsize::new(0);

unsafe impl GlobalAlloc for CountingAllocator {
    unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
        let ret = System.alloc(layout);
        if !ret.is_null() {
            ALLOCATED.fetch_add(layout.size(), Ordering::SeqCst);
        }
        ret
    }

    unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
        System.dealloc(ptr, layout);
        ALLOCATED.fetch_sub(layout.size(), Ordering::SeqCst);
    }
}

#[global_allocator]
static GLOBAL: CountingAllocator = CountingAllocator;

fn memory_benchmark(c: &mut Criterion) {
    c.bench_function("vec_creation", |b| {
        b.iter(|| {
            let before = ALLOCATED.load(Ordering::SeqCst);
            let vec: Vec<i32> = (0..1000).collect();
            let after = ALLOCATED.load(Ordering::SeqCst);
            println!("Allocated: {} bytes", after - before);
            black_box(vec);
        })
    });
}

🔹 Micro-benchmarking Tips

Best practices for accurate benchmarks:

Do:

Use black_box() to prevent optimization
Warm up code before measuring
Test with realistic data sizes
Run multiple iterations
Compare against baselines

Don't:

Benchmark debug builds
Include setup time in measurements
Ignore statistical significance
Benchmark trivial operations
Forget about cache effects

fn proper_benchmark(c: &mut Criterion) {
    let data = generate_test_data(10000);
    
    c.bench_function("process_data", |b| {
        b.iter_with_setup(
            || data.clone(),  // Setup not measured
            |data| {
                // Only this part is measured
                black_box(process_data(black_box(data)))
            }
        )
    });
}

🔹 Profiling with Flamegraph

Visualize performance bottlenecks:

# Install flamegraph
cargo install flamegraph

# Generate flamegraph
cargo flamegraph --bin my_binary

# Profile specific benchmark
cargo flamegraph --bench my_benchmark

# Profile with custom frequency
sudo cargo flamegraph --freq 997 --bin my_binary

Flamegraph benefits:

Visual representation of CPU usage
Identify hot code paths
See function call hierarchy
Interactive SVG output