Complete SciPy Tutorial
Master all SciPy essentials with short, practical examples!
🔬 Welcome to SciPy!
SciPy is your scientific computing toolkit! It solves complex math and science problems with simple code.
from scipy import stats
import numpy as np
data = [85, 90, 78, 92, 88]
print("Average:", np.mean(data))
print("T-test p-value:", stats.ttest_1samp(data, 80).pvalue)
Scientific
Computing
15+
Modules
Powerful
Algorithms
Getting Started
Install and Import
# Install: pip install scipy
import scipy
from scipy import stats, optimize, integrate
import numpy as np
print("SciPy version:", scipy.__version__)
Statistics (scipy.stats)
Basic Statistical Tests
# Sample data
group1 = [85, 90, 78, 92, 88]
group2 = [82, 87, 91, 79, 85]
# T-test (compare two groups)
t_stat, p_value = stats.ttest_ind(group1, group2)
print(f"T-test p-value: {p_value:.3f}")
# One-sample t-test
t_stat, p_value = stats.ttest_1samp(group1, 80)
print(f"One-sample t-test p-value: {p_value:.3f}")
Probability Distributions
# Normal distribution
mean, std = 100, 15
normal_dist = stats.norm(mean, std)
print("Probability x < 85:", normal_dist.cdf(85))
print("95th percentile:", normal_dist.ppf(0.95))
# Generate random samples
samples = normal_dist.rvs(5)
print("Random samples:", samples)
Correlation and Regression
# Correlation
x = [1, 2, 3, 4, 5]
y = [2, 4, 6, 8, 10]
correlation, p_value = stats.pearsonr(x, y)
print(f"Correlation: {correlation:.3f}")
# Linear regression
slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
print(f"Slope: {slope:.2f}, R²: {r_value**2:.3f}")
Optimization (scipy.optimize)
Finding Minimum
# Simple function minimization
def quadratic(x):
return x**2 + 4*x + 3
result = optimize.minimize_scalar(quadratic)
print(f"Minimum at x = {result.x:.2f}")
print(f"Minimum value = {result.fun:.2f}")
# Multi-dimensional optimization
def rosenbrock(x):
return 100*(x[1] - x[0]**2)**2 + (1 - x[0])**2
result = optimize.minimize(rosenbrock, [0, 0])
print(f"Minimum at: {result.x}")
Curve Fitting
# Fit exponential decay
def exponential(x, a, b, c):
return a * np.exp(-b * x) + c
# Sample data
x_data = np.array([0, 1, 2, 3, 4])
y_data = np.array([2.1, 1.3, 0.9, 0.7, 0.6])
# Fit the curve
params, covariance = optimize.curve_fit(exponential, x_data, y_data)
print(f"Fitted parameters: a={params[0]:.2f}, b={params[1]:.2f}, c={params[2]:.2f}")
Root Finding
# Find roots of equation
def equation(x):
return x**3 - 2*x - 5
# Find root near x=2
root = optimize.fsolve(equation, 2)[0]
print(f"Root: x = {root:.4f}")
print(f"Verification: f({root:.4f}) = {equation(root):.2e}")
# Bracket method
root_bracket = optimize.brentq(equation, 2, 3)
print(f"Root (bracket method): {root_bracket:.4f}")
Integration (scipy.integrate)
Numerical Integration
# Simple integration
def f(x):
return x**2
# Integrate x² from 0 to 2
result, error = integrate.quad(f, 0, 2)
print(f"Integral result: {result:.3f}")
print(f"Analytical result: {8/3:.3f}")
# Integration with infinite limits
def gaussian(x):
return np.exp(-x**2)
result_inf, error_inf = integrate.quad(gaussian, -np.inf, np.inf)
print(f"Gaussian integral: {result_inf:.3f}")
Solving ODEs
# Solve differential equation dy/dt = -2y
def dydt(t, y):
return -2 * y
# Initial condition: y(0) = 1
t_span = (0, 2)
y0 = [1]
t_eval = np.linspace(0, 2, 10)
sol = integrate.solve_ivp(dydt, t_span, y0, t_eval=t_eval)
print("Solution at t=1:", sol.y[0][5])
print("Analytical at t=1:", np.exp(-2))
Linear Algebra (scipy.linalg)
Solving Linear Systems
from scipy import linalg
# Solve Ax = b
A = np.array([[3, 2, 1], [1, 4, 2], [2, 1, 3]])
b = np.array([10, 12, 10])
x = linalg.solve(A, b)
print("Solution:", x)
print("Verification:", A @ x) # Should equal b
Matrix Decompositions
# Eigenvalues and eigenvectors
matrix = np.array([[4, 2], [1, 3]])
eigenvals, eigenvecs = linalg.eig(matrix)
print("Eigenvalues:", eigenvals)
# SVD decomposition
U, s, Vt = linalg.svd(matrix)
print("Singular values:", s)
# Matrix properties
print("Determinant:", linalg.det(matrix))
print("Condition number:", linalg.cond(matrix))
Signal Processing (scipy.signal)
Basic Signal Operations
from scipy import signal
# Create a noisy signal
t = np.linspace(0, 1, 100)
clean_signal = np.sin(2 * np.pi * 5 * t) # 5 Hz sine wave
noisy_signal = clean_signal + 0.3 * np.random.randn(100)
# Design a low-pass filter
b, a = signal.butter(4, 0.2) # 4th order, cutoff at 0.2
filtered = signal.filtfilt(b, a, noisy_signal)
print("Original signal std:", noisy_signal.std())
print("Filtered signal std:", filtered.std())
Frequency Analysis
# Find peaks in signal
peaks, _ = signal.find_peaks(clean_signal, height=0.5)
print(f"Found {len(peaks)} peaks")
# Cross-correlation
template = np.sin(2 * np.pi * 5 * t[:20])
correlation = signal.correlate(clean_signal, template, mode='valid')
max_corr = np.argmax(correlation)
print(f"Best match at position: {max_corr}")
Interpolation (scipy.interpolate)
1D Interpolation
from scipy import interpolate
# Sample data points
x = np.array([0, 1, 2, 3, 4])
y = np.array([0, 1, 4, 9, 16]) # y = x²
# Create interpolation function
f_linear = interpolate.interp1d(x, y, kind='linear')
f_cubic = interpolate.interp1d(x, y, kind='cubic')
# Interpolate at new points
x_new = np.array([0.5, 1.5, 2.5])
print("Linear interpolation:", f_linear(x_new))
print("Cubic interpolation:", f_cubic(x_new))
Spline Interpolation
# Spline interpolation
tck = interpolate.splrep(x, y, s=0) # s=0 for exact interpolation
y_spline = interpolate.splev(x_new, tck)
print("Spline interpolation:", y_spline)
# 2D interpolation
x_2d = np.array([0, 1, 2])
y_2d = np.array([0, 1, 2])
z_2d = np.array([[0, 1, 4], [1, 2, 5], [4, 5, 8]])
f_2d = interpolate.interp2d(x_2d, y_2d, z_2d, kind='linear')
print("2D interpolation at (0.5, 0.5):", f_2d(0.5, 0.5))
Sparse Matrices (scipy.sparse)
Creating Sparse Matrices
from scipy import sparse
# Create sparse matrix (mostly zeros)
row = np.array([0, 0, 1, 2, 2, 2])
col = np.array([0, 2, 1, 0, 1, 2])
data = np.array([1, 2, 3, 4, 5, 6])
sparse_matrix = sparse.csr_matrix((data, (row, col)), shape=(3, 3))
print("Sparse matrix:")
print(sparse_matrix.toarray())
# Memory efficiency
dense_matrix = sparse_matrix.toarray()
print(f"Sparse size: {sparse_matrix.data.nbytes} bytes")
print(f"Dense size: {dense_matrix.nbytes} bytes")
🎯 Real-World Examples
A/B Testing
# Website conversion rates
control_group = [0.12, 0.15, 0.11, 0.14, 0.13] # 5 days
test_group = [0.18, 0.19, 0.17, 0.20, 0.18] # 5 days
# Statistical test
t_stat, p_value = stats.ttest_ind(control_group, test_group)
print(f"Control average: {np.mean(control_group):.1%}")
print(f"Test average: {np.mean(test_group):.1%}")
print(f"P-value: {p_value:.4f}")
if p_value < 0.05:
improvement = (np.mean(test_group) - np.mean(control_group)) / np.mean(control_group)
print(f"✅ Significant improvement: {improvement:.1%}")
Quality Control
# Manufacturing measurements
measurements = np.array([99.8, 100.2, 99.9, 100.1, 99.7, 100.3, 99.6])
target = 100.0
# Check if process is on target
t_stat, p_value = stats.ttest_1samp(measurements, target)
print(f"Average: {measurements.mean():.2f}")
print(f"Target: {target}")
print(f"P-value: {p_value:.4f}")
# Control limits (3-sigma)
mean_val = measurements.mean()
std_val = measurements.std()
ucl = mean_val + 3 * std_val
lcl = mean_val - 3 * std_val
print(f"Control limits: [{lcl:.2f}, {ucl:.2f}]")
Data Fitting
# Fit growth model to data
def growth_model(t, a, b, c):
return a / (1 + b * np.exp(-c * t))
# Sample growth data
time = np.array([0, 1, 2, 3, 4, 5])
population = np.array([10, 15, 25, 40, 60, 75])
# Fit the model
params, _ = optimize.curve_fit(growth_model, time, population)
print(f"Growth parameters: a={params[0]:.1f}, b={params[1]:.2f}, c={params[2]:.2f}")
# Predict future values
future_time = 6
prediction = growth_model(future_time, *params)
print(f"Predicted population at t={future_time}: {prediction:.1f}")
Best Practices
📊 Choose Right Module
Use stats for statistics, optimize for finding solutions
🎯 Understand Your Problem
Know what type of analysis you need
🔍 Check Results
Always verify answers make sense
📚 Read Documentation
SciPy has excellent examples and explanations