AUC-ROC Curve
Understand and visualize binary classification performance
📊 What is AUC-ROC?
The ROC (Receiver Operating Characteristic) curve shows how well a binary classifier performs at different thresholds. AUC (Area Under Curve) gives a single number to measure overall performance - higher is better!
from sklearn.metrics import roc_curve, auc
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt
# Train a simple model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
model = LogisticRegression()
model.fit(X_train, y_train)
# Get prediction probabilities
y_proba = model.predict_proba(X_test)[:, 1]
# Calculate ROC curve
fpr, tpr, thresholds = roc_curve(y_test, y_proba)
roc_auc = auc(fpr, tpr)
print(f"AUC Score: {roc_auc:.3f}")
ROC Curve Components
True Positive Rate
Sensitivity: TP / (TP + FN)
# How many actual positives we caught
tpr = true_positives / (true_positives + false_negatives)
print(f"TPR: {tpr:.3f}")
False Positive Rate
1 - Specificity: FP / (FP + TN)
# How many negatives we wrongly called positive
fpr = false_positives / (false_positives + true_negatives)
print(f"FPR: {fpr:.3f}")
Thresholds
Decision boundaries for classification
# Different cutoff points
for threshold in [0.3, 0.5, 0.7]:
predictions = (y_proba >= threshold).astype(int)
Area Under Curve
Single metric summarizing performance
from sklearn.metrics import roc_auc_score
auc_score = roc_auc_score(y_true, y_proba)
print(f"AUC: {auc_score:.3f}")
🔹 Basic ROC Curve
Create and plot your first ROC curve
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt
# Create sample binary classification data
X, y = make_classification(n_samples=1000, n_features=4, n_classes=2, random_state=42)
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# Train model
model = LogisticRegression()
model.fit(X_train, y_train)
# Get prediction probabilities (important: use probabilities, not predictions!)
y_proba = model.predict_proba(X_test)[:, 1] # Probability of positive class
# Calculate ROC curve
fpr, tpr, thresholds = roc_curve(y_test, y_proba)
roc_auc = auc(fpr, tpr)
# Plot ROC curve
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='blue', lw=2, label=f'ROC Curve (AUC = {roc_auc:.3f})')
plt.plot([0, 1], [0, 1], color='red', lw=2, linestyle='--', label='Random Guess')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend(loc="lower right")
plt.grid(True)
plt.show()
print(f"AUC Score: {roc_auc:.3f}")
🔹 Understanding AUC Scores
What different AUC values mean for your model
🎯 AUC Score Interpretation:
- AUC = 1.0: Perfect classifier
- AUC = 0.9-1.0: Excellent performance
- AUC = 0.8-0.9: Good performance
- AUC = 0.7-0.8: Fair performance
- AUC = 0.6-0.7: Poor performance
- AUC = 0.5: Random guessing
- AUC < 0.5: Worse than random (flip predictions!)
from sklearn.metrics import roc_auc_score
# Simple way to get AUC score
auc_score = roc_auc_score(y_test, y_proba)
def interpret_auc(score):
"""Interpret AUC score"""
if score >= 0.9:
return "Excellent! 🎉"
elif score >= 0.8:
return "Good 👍"
elif score >= 0.7:
return "Fair 👌"
elif score >= 0.6:
return "Poor 😐"
elif score >= 0.5:
return "Very Poor 😞"
else:
return "Worse than random! 🤔"
print(f"AUC Score: {auc_score:.3f}")
print(f"Performance: {interpret_auc(auc_score)}")
# Compare with accuracy
from sklearn.metrics import accuracy_score
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.3f}")
print("Note: AUC considers all thresholds, accuracy uses just one!")
🔹 Comparing Multiple Models
Use ROC curves to compare different algorithms
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
# Train multiple models
models = {
'Logistic Regression': LogisticRegression(),
'Random Forest': RandomForestClassifier(n_estimators=100),
'SVM': SVC(probability=True), # Enable probability for SVM
'Naive Bayes': GaussianNB()
}
plt.figure(figsize=(10, 8))
# Plot ROC curve for each model
for name, model in models.items():
# Train model
model.fit(X_train, y_train)
# Get probabilities
if hasattr(model, "predict_proba"):
y_proba = model.predict_proba(X_test)[:, 1]
else:
y_proba = model.decision_function(X_test)
# Calculate ROC
fpr, tpr, _ = roc_curve(y_test, y_proba)
roc_auc = auc(fpr, tpr)
# Plot
plt.plot(fpr, tpr, lw=2, label=f'{name} (AUC = {roc_auc:.3f})')
# Add random line
plt.plot([0, 1], [0, 1], 'k--', lw=2, label='Random Guess')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curves - Model Comparison')
plt.legend(loc="lower right")
plt.grid(True)
plt.show()
🔹 ROC Curve with Cross Validation
Get more reliable ROC curves using cross validation
from sklearn.model_selection import StratifiedKFold
import numpy as np
# Set up cross validation
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
# Store results
tprs = []
aucs = []
mean_fpr = np.linspace(0, 1, 100)
plt.figure(figsize=(10, 8))
# Perform cross validation
for i, (train_idx, test_idx) in enumerate(cv.split(X, y)):
# Split data
X_train_cv, X_test_cv = X[train_idx], X[test_idx]
y_train_cv, y_test_cv = y[train_idx], y[test_idx]
# Train model
model = LogisticRegression()
model.fit(X_train_cv, y_train_cv)
# Get probabilities and ROC
y_proba = model.predict_proba(X_test_cv)[:, 1]
fpr, tpr, _ = roc_curve(y_test_cv, y_proba)
# Interpolate and store
interp_tpr = np.interp(mean_fpr, fpr, tpr)
interp_tpr[0] = 0.0
tprs.append(interp_tpr)
# Calculate AUC
roc_auc = auc(fpr, tpr)
aucs.append(roc_auc)
# Plot individual fold
plt.plot(fpr, tpr, lw=1, alpha=0.3, label=f'ROC fold {i+1} (AUC = {roc_auc:.2f})')
# Plot mean ROC curve
mean_tpr = np.mean(tprs, axis=0)
mean_tpr[-1] = 1.0
mean_auc = auc(mean_fpr, mean_tpr)
std_auc = np.std(aucs)
plt.plot(mean_fpr, mean_tpr, color='blue', lw=2,
label=f'Mean ROC (AUC = {mean_auc:.2f} ± {std_auc:.2f})')
# Add confidence interval
std_tpr = np.std(tprs, axis=0)
tprs_upper = np.minimum(mean_tpr + std_tpr, 1)
tprs_lower = np.maximum(mean_tpr - std_tpr, 0)
plt.fill_between(mean_fpr, tprs_lower, tprs_upper, color='grey', alpha=0.2)
plt.plot([0, 1], [0, 1], 'k--', lw=2, label='Random Guess')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curves with Cross Validation')
plt.legend(loc="lower right")
plt.grid(True)
plt.show()
print(f"Cross-validated AUC: {mean_auc:.3f} (±{std_auc:.3f})")
🔹 When to Use AUC-ROC
Understanding the limitations and best use cases
✅ Use AUC-ROC when:
- Binary classification problems
- Balanced datasets or when both classes matter equally
- Ranking/probability is important
- Comparing models across different thresholds
❌ Consider alternatives when:
- Highly imbalanced data - Use Precision-Recall curve instead
- Multi-class problems - Use macro/micro averaging or other metrics
- Cost-sensitive - Different costs for FP vs FN
# Example: Comparing AUC-ROC vs Precision-Recall for imbalanced data
from sklearn.datasets import make_classification
from sklearn.metrics import precision_recall_curve, average_precision_score
# Create imbalanced dataset (5% positive class)
X_imb, y_imb = make_classification(n_samples=1000, n_classes=2, weights=[0.95, 0.05],
flip_y=0.01, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X_imb, y_imb, test_size=0.3, random_state=42)
# Train model
model = LogisticRegression()
model.fit(X_train, y_train)
y_proba = model.predict_proba(X_test)[:, 1]
# Calculate both metrics
roc_auc = roc_auc_score(y_test, y_proba)
pr_auc = average_precision_score(y_test, y_proba)
print(f"Dataset imbalance: {np.sum(y_test == 1)} positive out of {len(y_test)} total")
print(f"ROC-AUC: {roc_auc:.3f}")
print(f"PR-AUC: {pr_auc:.3f}")
print("\nFor imbalanced data, PR-AUC is often more informative!")