# -*- coding: utf-8 -*-
"""
# Assignment 1
Consider the student data set (student.csv) and write program in python to perform the following queries:
"""

# 1. Read student dataset and display the dataset contents
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv('student9.csv')
print(df)

# 2. List students whose average attendance is >= 80%
high_attendance = df[df['Average Attendance'] >= 80]
print(high_attendance[['Student Name', 'Average Attendance']])

# 3. List students whose average attendance is below 60% (Not allowed in Semester Exam)
low_attendance = df[df['Average Attendance'] < 60]
print(low_attendance[['Student Name', 'Average Attendance']])

# 4. Identify students with attendance below 20% in any one subject
attendance_cols = [col for col in df.columns if 'Attendance %' in col]
low_subject_attendance = df[df[attendance_cols].lt(20).any(axis=1)]
print(low_subject_attendance[['Student Name'] + attendance_cols])

# 5. Find students with at least 80% attendance in all subjects
all_high_attendance = df[df[attendance_cols].ge(80).all(axis=1)]
print(all_high_attendance[['Student Name', 'Average Attendance']])

# 6. Find students with 100% attendance in all subjects (Perfect Record)
perfect_attendance = df[df[attendance_cols].eq(100).all(axis=1)]
if perfect_attendance.empty:
    print("No student has a perfect attendance record.")
else:
    print(perfect_attendance[['Student Name', 'Average Attendance']])

# 7. Display Top 5 Students based on average attendance
top_5_students = df.sort_values(by='Average Attendance', ascending=False).head(5)
print(top_5_students[['Student Name', 'Average Attendance']])

# 8. Display Subject-wise Attendance Report
subject_attendance = df[attendance_cols].mean().reset_index()
subject_attendance.columns = ['Subject', 'Average Attendance']
print(subject_attendance)

# 9. Calculate total number of classes attended by each student across all subjects.
present_cols = [col for col in df.columns if 'Present in' in col]
df['Total Classes Attended'] = df[present_cols].sum(axis=1)
print(df[['Student Name', 'Total Classes Attended']])

# 10. Group students into different categories according to average attendance
def categorize_attendance(attendance):
    if attendance >= 85:
        return 'Excellent'
    elif 70 <= attendance < 85:
        return 'Good'
    elif 50 <= attendance < 70:
        return 'Poor'
    else:
        return 'Defaulter'

df['Attendance Category'] = df['Average Attendance'].apply(categorize_attendance)
category_counts = df['Attendance Category'].value_counts()
print(category_counts)

# 11. Generate a list of students for the defaulter (attendance < 50%) students.
defaulter_students = df[df['Attendance Category'] == 'Defaulter']
print(defaulter_students[['Student Name', 'Average Attendance']])

# 12. Generate a report for Head of the Department: number of students falling below threshold per subject (attendance < 40).
hod_report = (df[attendance_cols] < 40).sum().reset_index()
hod_report.columns = ['Subject', 'Number of Students Below 40%']
print(hod_report)

# 13. Identify student(s) with full absent in a particular subject (lets say Design Thinking)
absent_design_thinking = df[df['Attendance % in Design Thinking'] == 0]
if absent_design_thinking.empty:
    print("No students have 0% attendance in Design Thinking.")
else:
    print(absent_design_thinking[['Student Name']])

# 14. Find Student(s) with 0% attendance in ALL Subject
zero_attendance_all = df[df[attendance_cols].eq(0).all(axis=1)]
if zero_attendance_all.empty:
    print("No student has 0% attendance in all subjects.")
else:
    print(zero_attendance_all[['Student Name']])

# 15. Generate a Summary Table (Table will display mean attendance per subject)
print(subject_attendance)

# 16. Show a Bar chart to visualize top 10 students by average attendance
top_10_students = df.sort_values(by='Average Attendance', ascending=False).head(10)
plt.figure(figsize=(12, 6))
plt.bar(top_10_students['Student Name'], top_10_students['Average Attendance'])
plt.xlabel('Student Name')
plt.ylabel('Average Attendance (%)')
plt.title('Top 10 Students by Average Attendance')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()

# 17. Show a Pie chart to visualize Attendance Category Distribution
plt.figure(figsize=(8, 8))
plt.pie(category_counts, labels=category_counts.index, autopct='%1.1f%%', startangle=140)
plt.title('Attendance Category Distribution')
plt.axis('equal')

# 18. Plot a line chart to visualize subject-wise attendance for a specific student
student_name_for_line_chart = df.loc[0, 'Student Name']
student_data = df[df['Student Name'] == student_name_for_line_chart]
subject_attendances = student_data[attendance_cols].T
subject_attendances.columns = ['Attendance']
subject_attendances.index = [col.replace('Attendance % in ', '') for col in subject_attendances.index]
plt.figure(figsize=(12, 6))
plt.plot(subject_attendances.index, subject_attendances['Attendance'], marker='o')
plt.xlabel('Subject')
plt.ylabel('Attendance (%)')
plt.title(f'Subject-wise Attendance for {student_name_for_line_chart}')
plt.xticks(rotation=45, ha='right')
plt.grid(True)
plt.tight_layout()

# 19. Visualize a histogram of average attendance for the whole class
plt.figure(figsize=(10, 6))
plt.hist(df['Average Attendance'], bins=10, edgecolor='black')
plt.xlabel('Average Attendance (%)')
plt.ylabel('Number of Students')
plt.title('Distribution of Average Attendance')

# 20. Plot a Horizontal bar chart to visualize Students with lowest 10 attendance
lowest_10_students = df.sort_values(by='Average Attendance').head(10)
plt.figure(figsize=(12, 6))
plt.barh(lowest_10_students['Student Name'], lowest_10_students['Average Attendance'])
plt.xlabel('Average Attendance (%)')
plt.ylabel('Student Name')
plt.title('Top 10 Students with Lowest Attendance')
plt.tight_layout()