AUC-ROC Curve

Understand and visualize binary classification performance

📊 What is AUC-ROC?

The ROC (Receiver Operating Characteristic) curve shows how well a binary classifier performs at different thresholds. AUC (Area Under Curve) gives a single number to measure overall performance - higher is better!


from sklearn.metrics import roc_curve, auc
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt

# Train a simple model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
model = LogisticRegression()
model.fit(X_train, y_train)

# Get prediction probabilities
y_proba = model.predict_proba(X_test)[:, 1]

# Calculate ROC curve
fpr, tpr, thresholds = roc_curve(y_test, y_proba)
roc_auc = auc(fpr, tpr)

print(f"AUC Score: {roc_auc:.3f}")
                                    
0-1
AUC Range
0.5
Random Guess
1.0
Perfect Score

ROC Curve Components

True Positive Rate

Sensitivity: TP / (TP + FN)

# How many actual positives we caught
tpr = true_positives / (true_positives + false_negatives)
print(f"TPR: {tpr:.3f}")

False Positive Rate

1 - Specificity: FP / (FP + TN)

# How many negatives we wrongly called positive
fpr = false_positives / (false_positives + true_negatives)
print(f"FPR: {fpr:.3f}")
🎯

Thresholds

Decision boundaries for classification

# Different cutoff points
for threshold in [0.3, 0.5, 0.7]:
    predictions = (y_proba >= threshold).astype(int)
📐

Area Under Curve

Single metric summarizing performance

from sklearn.metrics import roc_auc_score
auc_score = roc_auc_score(y_true, y_proba)
print(f"AUC: {auc_score:.3f}")

🔹 Basic ROC Curve

Create and plot your first ROC curve

from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt

# Create sample binary classification data
X, y = make_classification(n_samples=1000, n_features=4, n_classes=2, random_state=42)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train model
model = LogisticRegression()
model.fit(X_train, y_train)

# Get prediction probabilities (important: use probabilities, not predictions!)
y_proba = model.predict_proba(X_test)[:, 1]  # Probability of positive class

# Calculate ROC curve
fpr, tpr, thresholds = roc_curve(y_test, y_proba)
roc_auc = auc(fpr, tpr)

# Plot ROC curve
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='blue', lw=2, label=f'ROC Curve (AUC = {roc_auc:.3f})')
plt.plot([0, 1], [0, 1], color='red', lw=2, linestyle='--', label='Random Guess')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend(loc="lower right")
plt.grid(True)
plt.show()

print(f"AUC Score: {roc_auc:.3f}")

🔹 Understanding AUC Scores

What different AUC values mean for your model

🎯 AUC Score Interpretation:

  • AUC = 1.0: Perfect classifier
  • AUC = 0.9-1.0: Excellent performance
  • AUC = 0.8-0.9: Good performance
  • AUC = 0.7-0.8: Fair performance
  • AUC = 0.6-0.7: Poor performance
  • AUC = 0.5: Random guessing
  • AUC < 0.5: Worse than random (flip predictions!)
from sklearn.metrics import roc_auc_score

# Simple way to get AUC score
auc_score = roc_auc_score(y_test, y_proba)

def interpret_auc(score):
    """Interpret AUC score"""
    if score >= 0.9:
        return "Excellent! 🎉"
    elif score >= 0.8:
        return "Good 👍"
    elif score >= 0.7:
        return "Fair 👌"
    elif score >= 0.6:
        return "Poor 😐"
    elif score >= 0.5:
        return "Very Poor 😞"
    else:
        return "Worse than random! 🤔"

print(f"AUC Score: {auc_score:.3f}")
print(f"Performance: {interpret_auc(auc_score)}")

# Compare with accuracy
from sklearn.metrics import accuracy_score
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.3f}")
print("Note: AUC considers all thresholds, accuracy uses just one!")

🔹 Comparing Multiple Models

Use ROC curves to compare different algorithms

from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB

# Train multiple models
models = {
    'Logistic Regression': LogisticRegression(),
    'Random Forest': RandomForestClassifier(n_estimators=100),
    'SVM': SVC(probability=True),  # Enable probability for SVM
    'Naive Bayes': GaussianNB()
}

plt.figure(figsize=(10, 8))

# Plot ROC curve for each model
for name, model in models.items():
    # Train model
    model.fit(X_train, y_train)
    
    # Get probabilities
    if hasattr(model, "predict_proba"):
        y_proba = model.predict_proba(X_test)[:, 1]
    else:
        y_proba = model.decision_function(X_test)
    
    # Calculate ROC
    fpr, tpr, _ = roc_curve(y_test, y_proba)
    roc_auc = auc(fpr, tpr)
    
    # Plot
    plt.plot(fpr, tpr, lw=2, label=f'{name} (AUC = {roc_auc:.3f})')

# Add random line
plt.plot([0, 1], [0, 1], 'k--', lw=2, label='Random Guess')

plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curves - Model Comparison')
plt.legend(loc="lower right")
plt.grid(True)
plt.show()

🔹 ROC Curve with Cross Validation

Get more reliable ROC curves using cross validation

from sklearn.model_selection import StratifiedKFold
import numpy as np

# Set up cross validation
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Store results
tprs = []
aucs = []
mean_fpr = np.linspace(0, 1, 100)

plt.figure(figsize=(10, 8))

# Perform cross validation
for i, (train_idx, test_idx) in enumerate(cv.split(X, y)):
    # Split data
    X_train_cv, X_test_cv = X[train_idx], X[test_idx]
    y_train_cv, y_test_cv = y[train_idx], y[test_idx]
    
    # Train model
    model = LogisticRegression()
    model.fit(X_train_cv, y_train_cv)
    
    # Get probabilities and ROC
    y_proba = model.predict_proba(X_test_cv)[:, 1]
    fpr, tpr, _ = roc_curve(y_test_cv, y_proba)
    
    # Interpolate and store
    interp_tpr = np.interp(mean_fpr, fpr, tpr)
    interp_tpr[0] = 0.0
    tprs.append(interp_tpr)
    
    # Calculate AUC
    roc_auc = auc(fpr, tpr)
    aucs.append(roc_auc)
    
    # Plot individual fold
    plt.plot(fpr, tpr, lw=1, alpha=0.3, label=f'ROC fold {i+1} (AUC = {roc_auc:.2f})')

# Plot mean ROC curve
mean_tpr = np.mean(tprs, axis=0)
mean_tpr[-1] = 1.0
mean_auc = auc(mean_fpr, mean_tpr)
std_auc = np.std(aucs)

plt.plot(mean_fpr, mean_tpr, color='blue', lw=2,
         label=f'Mean ROC (AUC = {mean_auc:.2f} ± {std_auc:.2f})')

# Add confidence interval
std_tpr = np.std(tprs, axis=0)
tprs_upper = np.minimum(mean_tpr + std_tpr, 1)
tprs_lower = np.maximum(mean_tpr - std_tpr, 0)
plt.fill_between(mean_fpr, tprs_lower, tprs_upper, color='grey', alpha=0.2)

plt.plot([0, 1], [0, 1], 'k--', lw=2, label='Random Guess')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curves with Cross Validation')
plt.legend(loc="lower right")
plt.grid(True)
plt.show()

print(f"Cross-validated AUC: {mean_auc:.3f} (±{std_auc:.3f})")

🔹 When to Use AUC-ROC

Understanding the limitations and best use cases

✅ Use AUC-ROC when:

  • Binary classification problems
  • Balanced datasets or when both classes matter equally
  • Ranking/probability is important
  • Comparing models across different thresholds

❌ Consider alternatives when:

  • Highly imbalanced data - Use Precision-Recall curve instead
  • Multi-class problems - Use macro/micro averaging or other metrics
  • Cost-sensitive - Different costs for FP vs FN
# Example: Comparing AUC-ROC vs Precision-Recall for imbalanced data
from sklearn.datasets import make_classification
from sklearn.metrics import precision_recall_curve, average_precision_score

# Create imbalanced dataset (5% positive class)
X_imb, y_imb = make_classification(n_samples=1000, n_classes=2, weights=[0.95, 0.05], 
                                   flip_y=0.01, random_state=42)

X_train, X_test, y_train, y_test = train_test_split(X_imb, y_imb, test_size=0.3, random_state=42)

# Train model
model = LogisticRegression()
model.fit(X_train, y_train)
y_proba = model.predict_proba(X_test)[:, 1]

# Calculate both metrics
roc_auc = roc_auc_score(y_test, y_proba)
pr_auc = average_precision_score(y_test, y_proba)

print(f"Dataset imbalance: {np.sum(y_test == 1)} positive out of {len(y_test)} total")
print(f"ROC-AUC: {roc_auc:.3f}")
print(f"PR-AUC: {pr_auc:.3f}")
print("\nFor imbalanced data, PR-AUC is often more informative!")

🧠 Test Your Knowledge

What does an AUC score of 0.5 indicate?

What does TPR stand for?

When is AUC-ROC less suitable?