# -*- coding: utf-8 -*-
"""
# Assignment 4
Perform k- means clustering algorithm for customer segmentation from given features. Utilize Euclidean distance and Manhattan distance for this problem. Also, plot in terms of 2D and 3D clusters this problem.

Dataset: Cust_Segmentation.csv
"""

import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import pairwise_distances

# Dataframe
df=pd.read_csv('Cust_Segmentation.csv')

#Selecting relevant numerical features for clustering
features = df[['Age', 'Edu', 'Years Employed', 'Income', 'Card Debt', 'Other Debt', 'DebtIncomeRatio']]

# Standardizing features for fair distance computation
scaler = StandardScaler()
X = scaler.fit_transform(features)

# K-MEans (Euclidean Distance)
kmeans_euclidean = KMeans(n_clusters=7, random_state=42)
labels_euclidean = kmeans_euclidean.fit_predict(X)

# K-MEans (Manhattan Distance)

#Precompute distance matrix with L1 norm
distance_matrix=pairwise_distances(X, metric='manhattan')
kmeans_manhattan= KMeans(n_clusters=4,random_state=42,n_init=10)
labels_manhattan=kmeans_manhattan.fit_predict(distance_matrix)

#Add cluster labels to dataset
df['Cluster_Euclidean']=labels_euclidean
df['Cluster_Manhattan']=labels_manhattan

#2D Plot (Euclidean Distance)
import matplotlib.pyplot as plt

plt.figure(figsize=(8,6))
plt.scatter(X[:, 0], X[:, 3], c=labels_euclidean, cmap='rainbow')
plt.title('K-Means Clustering (Euclidean Distance) -2D')
plt.xlabel('Age(standardized)')
plt.ylabel('income(standardized)')
plt.show()

# 3D Plot (Manhattan Distance)
fig = plt.figure(figsize=(10,8))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=labels_manhattan, cmap='rainbow')
ax.set_title('K-Means Clustering (Manhattan Distance)-3D')
ax.set_xlabel('Age')
ax.set_ylabel('Income')
ax.set_zlabel('Card Debt')
plt.show()