-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathutils.py
More file actions
136 lines (109 loc) · 4.93 KB
/
Copy pathutils.py
File metadata and controls
136 lines (109 loc) · 4.93 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import streamlit as st
import plotly.graph_objects as go
import io
import pandas as pd
class Utils:
def __init__(self):
pass
def export_chart(self, fig, filename):
"""Export chart as HTML file"""
try:
# Create HTML string
html_string = fig.to_html(include_plotlyjs='cdn')
# Create download button
st.download_button(
label="Download Chart",
data=html_string,
file_name=filename,
mime="text/html"
)
st.success(f"Chart exported as {filename}")
except Exception as e:
st.error(f"Error exporting chart: {str(e)}")
def export_data(self, df, filename):
"""Export dataframe as CSV"""
try:
csv = df.to_csv(index=False)
st.download_button(
label="Download Data",
data=csv,
file_name=filename,
mime="text/csv"
)
st.success(f"Data exported as {filename}")
except Exception as e:
st.error(f"Error exporting data: {str(e)}")
def format_number(self, number, format_type="standard"):
"""Format numbers for display"""
if format_type == "percentage":
return f"{number:.1f}%"
elif format_type == "currency":
return f"${number:,.0f}"
elif format_type == "standard":
return f"{number:,.0f}"
else:
return str(number)
def create_summary_card(self, title, value, delta=None, delta_color=None):
"""Create a summary card display"""
if delta is not None and delta_color is not None:
st.metric(
label=title,
value=value,
delta=delta,
delta_color=delta_color
)
elif delta is not None:
st.metric(
label=title,
value=value,
delta=delta
)
else:
st.metric(label=title, value=value)
def validate_data(self, df):
"""Validate the uploaded dataset"""
required_columns = [
'Age', 'Attrition', 'Department', 'DistanceFromHome',
'Education', 'Gender', 'JobRole', 'JobSatisfaction',
'MonthlyIncome', 'OverTime', 'YearsAtCompany'
]
missing_columns = [col for col in required_columns if col not in df.columns]
if missing_columns:
return False, f"Missing required columns: {', '.join(missing_columns)}"
# Check if Attrition column has correct values
attrition_values = df['Attrition'].unique()
if not all(val in ['Yes', 'No'] for val in attrition_values):
return False, "Attrition column must contain only 'Yes' and 'No' values"
return True, "Data validation successful"
def get_data_info(self, df):
"""Get basic information about the dataset"""
info = {
'rows': len(df),
'columns': len(df.columns),
'missing_values': df.isnull().sum().sum(),
'numeric_columns': len(df.select_dtypes(include=['int64', 'float64']).columns),
'categorical_columns': len(df.select_dtypes(include=['object']).columns)
}
return info
def create_insights_text(self, df):
"""Generate insights text based on data analysis"""
total_employees = len(df)
attrition_count = len(df[df['Attrition'] == 'Yes'])
attrition_rate = (attrition_count / total_employees) * 100
# Department with highest attrition
dept_attrition = df.groupby('Department')['Attrition'].apply(lambda x: (x == 'Yes').sum() / len(x) * 100)
highest_dept = dept_attrition.idxmax()
highest_dept_rate = dept_attrition.max()
# Job role with highest attrition
role_attrition = df.groupby('JobRole')['Attrition'].apply(lambda x: (x == 'Yes').sum() / len(x) * 100)
highest_role = role_attrition.idxmax()
highest_role_rate = role_attrition.max()
insights = f"""
## Key Insights
📊 **Overall Attrition Rate**: {attrition_rate:.1f}% ({attrition_count} out of {total_employees} employees)
🏢 **Department Analysis**: {highest_dept} has the highest attrition rate at {highest_dept_rate:.1f}%
👔 **Job Role Analysis**: {highest_role} role shows the highest attrition rate at {highest_role_rate:.1f}%
💰 **Income Impact**: Employees who left had an average monthly income of ${df[df['Attrition'] == 'Yes']['MonthlyIncome'].mean():,.0f}
⏰ **Tenure Pattern**: Average tenure of employees who left is {df[df['Attrition'] == 'Yes']['YearsAtCompany'].mean():.1f} years
"""
return insights