# -*- coding: utf-8 -*-
"""
# Assignment 2
Salary Prediction (write python code) Perform linear regression with one feature using gradient descent (without using library function) to predict the salary of an employee based on the feature Years Experience. Use hyper-parameter tuning for the best result. Plot the hypothesis function and the data points after each epoch. Evaluate the accuracy of the models by computing relevant performance metrics, including the R² value. (Dataset: salary_data.csv)
- Load the dataset and Display the dataset contents
- Display top 5 rows
- Display basic dataset information
- Compute summary statistics of the dataset

Then Execute the following queries
1. View dataset structure and sample records
2. Compute summary statistics
3. Select employees having experience more than 2.5 years
4. Select those employees of salary > 100000
5. Find the minimum, maximum and average salary values
6. Find average salary then display employees those salary more than average salary
7. Find correlation between experience and salary
8. Visualize relationship between Years Experience and Salary or, Scatter plot (Experience vs Salary)
9. Train linear regression model
10. Predict salary for specific experience
11. Plot regression line over data points
12. Evaluate model performance (R², MAE, MSE)
13. Predict salaries for 0-10 years of experience
"""

# 1. View dataset structure and sample records & 2. Compute summary statistics
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

df = pd.read_csv('salary_data.csv')

print(df)
df.head()
df.info()
df.describe()

# 3. Select employees having experience more than 2.5 years
print(df[df['YearsExperience'] > 2.5])

# 4. Select those employees of salary > 100000
print(df[df['Salary'] > 100000])

# 5. Find the minimum, maximum and average salary values
min_salary = df['Salary'].min()
max_salary = df['Salary'].max()
avg_salary = df['Salary'].mean()
print(f"Minimum Salary: ${min_salary:0.2f}")
print(f"Maximum Salary: ${max_salary:0.2f}")
print(f"Average Salary: ${avg_salary:0.2f}")

# 6. Find average salary then display employees those salary more than average salary
print(df[df['Salary'] > avg_salary])

# 7. Find correlation between experience and salary
correlation = df['YearsExperience'].corr(df['Salary'])
print(correlation)

# 8. Visualize relationship between Years Experience and Salary or, Scatter plot (Experience vs Salary)
plt.figure(figsize=(10, 6))
plt.scatter(df['YearsExperience'], df['Salary'], color='green')
plt.title('Years Experience vs. Salary')
plt.xlabel('Years Experience')
plt.ylabel('Salary')
plt.grid()
plt.show()

#  9. Train Linear Regression Model
x = df[['YearsExperience']]
y = df[['Salary']]

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(x_train, y_train)

print("Intercept: ", model.intercept_)
print("Coefficient:", model.coef_[0])

# 10. Predict salary for specific experience
years = 3
predicted_salary = model.predict([[years]])
print(f"Predicted Salary for {years} years of experience: ${predicted_salary[0][0]: .2f}")

# 11. Plot regression line over data points
plt.scatter(df['YearsExperience'], df['Salary'], color='green')
plt.plot(df['YearsExperience'], model.predict(df[['YearsExperience']]), color='red')
plt.title("Regression Line: YearsExperience vs. Salary")
plt.xlabel("YearsExperience")
plt.ylabel("Salary")
plt.grid()
plt.show()

# 12. Evaluate model performance (R², MAE, MSE)
y_pred = model.predict(x_test)

r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)

print(f"R-squared (R²): {r2:.2f}")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Mean Squared Error (MSE): {mse:.2f}")

# 13. Predict salaries for 0-10 years of experience
experience_range = np.arange(0, 11).reshape(-1, 1)
predicted_salaries_range = model.predict(experience_range)

for years, salary in zip(experience_range.flatten(), predicted_salaries_range.flatten()):
  print(f"Predicted Salary for {years} years of experience: ${salary:.2f}")