# -*- coding: utf-8 -*-
"""
# Assignment 3
Perform a non-linear regression to predict China's GDP from 1960 to 2014 from given features. Evaluate the quality of the model by computing relevant performance metrics, including the R² value. Generate and display a plot that compares the actual values to the predicted values (Actual vs Predicted) for both tasks.

Dataset: china_gdp.csv
"""

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

# --- 1. Load Data ---
df = pd.read_csv("china_gdp.csv")

# --- 2. Inspect Data ---
df.head()
df.info()
df.describe()

# --- 3. Prepare Data ---
X_data = df["Year"]
y_data = df["Value"]

X_norm = X_data - X_data.min()
y_scale_factor = y_data.max()
y_norm = y_data / y_scale_factor

# --- 4. Define Non-linear Model (Sigmoid/Logistic Function) ---
def sigmoid(x, Beta_1, Beta_2, Beta_3):
    y = Beta_1 / (1 + np.exp(-Beta_2 * (x - Beta_3)))
    return y

# --- 5. Fit the Model ---
L_guess = y_norm.max() * 1.5
k_guess = 0.1
x0_guess = X_norm[len(X_norm) // 2]

p0 = [L_guess, k_guess, x0_guess]
print(f"\nInitial Guesses (p0): [L_guess={p0[0]:.2f}, k_guess={p0[1]:.2f}, x0_guess={p0[2]:.2f}] (y scaled by {y_scale_factor})")

popt, pcov = curve_fit(sigmoid, X_norm, y_norm, p0=p0, maxfev=10000)
Beta_1_opt, Beta_2_opt, Beta_3_opt = popt
print(f"\nOptimal Parameters (Beta_1, Beta_2, Beta_3): {popt}")

y_pred_norm = sigmoid(X_norm, *popt)
y_pred = y_pred_norm * y_scale_factor

# --- 7. Evaluate Model ---
r2 = r2_score(y_data, y_pred)
mae = mean_absolute_error(y_data, y_pred)
mse = mean_squared_error(y_data, y_pred)
rmse = np.sqrt(mse)

print(f"R-squared (R²): {r2:.6f}")
print(f"Mean Absolute Error (MAE): {mae:,.2f}")
print(f"Mean Squared Error (MSE): {mse:,.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:,.2f}")

# --- 8. Generate Plot (Actual vs. Predicted) ---
plt.plot(X_data, y_data, 'ro', label='Actual GDP', markersize=5)
plt.plot(X_data, y_pred, 'b-', label='Predicted GDP (Sigmoid Fit)', linewidth=2)
plt.title('China GDP: Actual vs. Predicted (Non-linear Regression)')
plt.xlabel('Year')
plt.ylabel('GDP (Value)')
plt.legend()
plt.grid()
plt.show()