Python Model Evaluation Script 🐍

Below is the Python script used to train and evaluate the logistic regression model on the loan dataset. It computes metrics and generates the confusion matrix PNG.


# Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix, classification_report
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer
from imblearn.over_sampling import SMOTE
import matplotlib.pyplot as plt
import seaborn as sns

# Load dataset (assume 'loan-train.csv' in folder)
df = pd.read_csv('loan-train.csv')

# Preprocessing
df.drop('Loan_ID', axis=1, inplace=True)

# Handle missing values
numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns
imputer_num = SimpleImputer(strategy='median')
df[numeric_cols] = imputer_num.fit_transform(df[numeric_cols])

categorical_cols = df.select_dtypes(include='object').columns
imputer_cat = SimpleImputer(strategy='most_frequent')
df[categorical_cols] = imputer_cat.fit_transform(df[categorical_cols])
df.ffill(inplace=True)

# Verify no NaNs
if df.isnull().sum().sum() > 0:
    print("Warning: NaNs still present. Dropping rows.")
    df.dropna(inplace=True)

# Encode categorical
le = LabelEncoder()
for col in categorical_cols:
    if col != 'Loan_Status':
        df[col] = le.fit_transform(df[col])
df['Loan_Status'] = le.fit_transform(df['Loan_Status'])  # Y=1, N=0

# Features and target
X = df.drop('Loan_Status', axis=1)
y = df['Loan_Status']

# Oversample with SMOTE
smote = SMOTE(random_state=42)
X_res, y_res = smote.fit_resample(X, y)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_res, y_res, test_size=0.2, random_state=42)

# Model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Evaluation
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")
print(f"Precision: {precision * 100:.2f}%")
print(f"Recall: {recall * 100:.2f}%")

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, cmap='coolwarm', fmt='d')
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.savefig('images/confusion_matrix.png')
plt.close()

print("Model evaluation completed. Visualization saved as PNG.")
            
Download Python Script