Confusion Matrix

Understand your model's performance with detailed prediction analysis

📊 What is a Confusion Matrix?

A confusion matrix is a table that shows how well your classification model performs. It compares actual vs predicted labels, helping you see exactly where your model makes mistakes.


from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris

# Load data and train model
iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Create confusion matrix
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)

True

Positives

False

Positives

Detailed

Analysis

Key Concepts

✅

True Positives

Correctly predicted positive cases

Correct predictions Good performance

❌

False Positives

Incorrectly predicted as positive

Type I error False alarms

❌

False Negatives

Incorrectly predicted as negative

Type II error Missed cases

✅

True Negatives

Correctly predicted negative cases

Correct rejections Good specificity

🔹 Basic Confusion Matrix

Creating and interpreting a simple confusion matrix

from sklearn.metrics import confusion_matrix
import numpy as np

# Simple example: actual vs predicted
y_true = [0, 1, 0, 1, 0, 1, 1, 0]  # Actual labels
y_pred = [0, 1, 0, 0, 0, 1, 1, 1]  # Predicted labels

# Create confusion matrix
cm = confusion_matrix(y_true, y_pred)
print("Confusion Matrix:")
print(cm)
print()

# Interpret the matrix
tn, fp, fn, tp = cm.ravel()
print(f"True Negatives (TN): {tn}")
print(f"False Positives (FP): {fp}")
print(f"False Negatives (FN): {fn}")
print(f"True Positives (TP): {tp}")

# Calculate basic metrics
accuracy = (tp + tn) / (tp + tn + fp + fn)
precision = tp / (tp + fp)
recall = tp / (tp + fn)

print(f"\nAccuracy: {accuracy:.3f}")
print(f"Precision: {precision:.3f}")
print(f"Recall: {recall:.3f}")

🔹 Multi-class Confusion Matrix

Handling multiple classes

from sklearn.metrics import confusion_matrix, classification_report
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier

# Load iris dataset (3 classes)
iris = load_iris()
X, y = iris.data, iris.target

# Split and train
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Confusion matrix for 3 classes
cm = confusion_matrix(y_test, y_pred)
print("3-Class Confusion Matrix:")
print(cm)
print()

# Class names for better understanding
class_names = iris.target_names
print("Class mapping:")
for i, name in enumerate(class_names):
    print(f"Class {i}: {name}")

# Detailed classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=class_names))

🔹 Visualizing Confusion Matrix

Making confusion matrices easier to read

from sklearn.metrics import confusion_matrix
import numpy as np

def print_confusion_matrix(y_true, y_pred, class_names=None):
    """Print a nicely formatted confusion matrix"""
    cm = confusion_matrix(y_true, y_pred)
    
    if class_names is None:
        class_names = [f"Class {i}" for i in range(len(cm))]
    
    # Print header
    print("Confusion Matrix:")
    print("Predicted ->")
    print("Actual ↓   ", end="")
    for name in class_names:
        print(f"{name:>10}", end="")
    print()
    
    # Print matrix with labels
    for i, (actual_name, row) in enumerate(zip(class_names, cm)):
        print(f"{actual_name:>10}", end="")
        for val in row:
            print(f"{val:>10}", end="")
        print()

# Example usage
y_true = [0, 1, 2, 0, 1, 2, 1, 0]
y_pred = [0, 1, 1, 0, 2, 2, 1, 1]
class_names = ["Cat", "Dog", "Bird"]

print_confusion_matrix(y_true, y_pred, class_names)

🔹 Metrics from Confusion Matrix

Calculate important metrics

from sklearn.metrics import confusion_matrix, precision_recall_fscore_support

def calculate_metrics(y_true, y_pred):
    """Calculate metrics from confusion matrix"""
    cm = confusion_matrix(y_true, y_pred)
    
    # For binary classification
    if cm.shape == (2, 2):
        tn, fp, fn, tp = cm.ravel()
        
        accuracy = (tp + tn) / (tp + tn + fp + fn)
        precision = tp / (tp + fp) if (tp + fp) > 0 else 0
        recall = tp / (tp + fn) if (tp + fn) > 0 else 0
        specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
        f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
        
        print(f"Accuracy: {accuracy:.3f}")
        print(f"Precision: {precision:.3f}")
        print(f"Recall (Sensitivity): {recall:.3f}")
        print(f"Specificity: {specificity:.3f}")
        print(f"F1-Score: {f1:.3f}")
    
    else:
        # For multi-class
        precision, recall, f1, support = precision_recall_fscore_support(y_true, y_pred, average='weighted')
        accuracy = np.trace(cm) / np.sum(cm)
        
        print(f"Accuracy: {accuracy:.3f}")
        print(f"Weighted Precision: {precision:.3f}")
        print(f"Weighted Recall: {recall:.3f}")
        print(f"Weighted F1-Score: {f1:.3f}")

# Example
y_true = [0, 1, 0, 1, 0, 1, 1, 0]
y_pred = [0, 1, 0, 0, 0, 1, 1, 1]
calculate_metrics(y_true, y_pred)

🔹 Common Mistakes Analysis

Understanding what your model gets wrong

from sklearn.metrics import confusion_matrix
import numpy as np

def analyze_mistakes(y_true, y_pred, class_names=None):
    """Analyze common classification mistakes"""
    cm = confusion_matrix(y_true, y_pred)
    
    if class_names is None:
        class_names = [f"Class {i}" for i in range(len(cm))]
    
    print("Most Common Mistakes:")
    print("-" * 40)
    
    # Find biggest off-diagonal elements
    mistakes = []
    for i in range(len(cm)):
        for j in range(len(cm)):
            if i != j and cm[i][j] > 0:  # Off-diagonal (mistakes)
                mistakes.append((cm[i][j], class_names[i], class_names[j]))
    
    # Sort by frequency
    mistakes.sort(reverse=True)
    
    for count, actual, predicted in mistakes[:5]:  # Top 5 mistakes
        print(f"{count} times: {actual} predicted as {predicted}")

# Example with iris dataset
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split

iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

analyze_mistakes(y_test, y_pred, iris.target_names)