-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathquery_school_metrics.py
More file actions
131 lines (108 loc) · 5.27 KB
/
query_school_metrics.py
File metadata and controls
131 lines (108 loc) · 5.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import json
import pandas as pd
import os
# Define the path to the school data file
DATA_DIR = "data"
SCHOOL_DATA_FILE = os.path.join(DATA_DIR, "school_data_final.json")
# Define the metrics to display in the table
# These should align with RANKING_METRICS_KEYS from js/data.js and other relevant fields
METRIC_COLUMNS = [
'name',
'rank', # The calculated rank from the application
'Median_VCE_Score',
'Pct_Scores_40_Plus',
'Completion_Rate',
'Tertiary_Application_Rate',
# You can add other fields from your JSON like 'type', 'suburb_name', 'level', 'score_2023'
# 'type',
# 'suburb_name',
]
def display_school_metrics():
"""
Loads school data from school_data_final.json, filters out primary schools,
displays key metrics for remaining schools sorted by VCE rank,
and provides statistics on VCE metric availability.
"""
if not os.path.exists(SCHOOL_DATA_FILE):
print(f"Error: School data file not found at {SCHOOL_DATA_FILE}")
return
try:
with open(SCHOOL_DATA_FILE, 'r', encoding='utf-8') as f:
data = json.load(f)
except json.JSONDecodeError as e:
print(f"Error decoding JSON from {SCHOOL_DATA_FILE}: {e}")
return
except Exception as e:
print(f"An unexpected error occurred loading {SCHOOL_DATA_FILE}: {e}")
return
all_schools = data.get('schools', [])
if not all_schools:
print("No schools found in the data file.")
return
# Filter out primary schools
schools_to_display = [s for s in all_schools if s.get('level') != 'Primary']
if not schools_to_display:
print("No non-primary schools found to display.")
return
print(f"Found {len(schools_to_display)} non-primary schools. Preparing metrics table...\n")
# Prepare data for pandas DataFrame
table_data = []
for school in schools_to_display:
row = {}
for col in METRIC_COLUMNS:
row[col] = school.get(col, None) # Use .get() for safety, defaulting to None
table_data.append(row)
if not table_data:
print("No data to display in table.")
return
df = pd.DataFrame(table_data)
# Ensure all desired columns are present in the DataFrame, even if all values are None
# And reorder them as defined in METRIC_COLUMNS
existing_cols = [col for col in METRIC_COLUMNS if col in df.columns]
missing_cols = [col for col in METRIC_COLUMNS if col not in df.columns]
for col in missing_cols:
df[col] = None # Add missing columns filled with None
df = df[METRIC_COLUMNS] # Ensure correct order
# Convert 'rank' to numeric for sorting, coercing errors to NaT (which will be sorted last)
df['rank'] = pd.to_numeric(df['rank'], errors='coerce')
# Sort by VCE rank (ascending, N/A ranks at the bottom)
df = df.sort_values(by='rank', ascending=True, na_position='last')
# Display the DataFrame
# Adjust pandas display options for better console readability
pd.set_option('display.max_rows', None) # Show all rows
pd.set_option('display.max_columns', None) # Show all columns
pd.set_option('display.width', 1000) # Adjust console width
pd.set_option('display.colheader_justify', 'left')
pd.set_option('display.precision', 2) # For floating point numbers
print(df.to_string(index=False))
# Calculate and display statistics for VCE metric availability
print("\n--- VCE Metric Availability Statistics (for non-primary schools) ---")
vce_metric_keys = ['Median_VCE_Score', 'Pct_Scores_40_Plus', 'Completion_Rate', 'Tertiary_Application_Rate']
metric_availability_counts = {1: 0, 2: 0, 3: 0, 4: 0, 0: 0} # Include 0 for completeness
for school in schools_to_display:
num_available_metrics = 0
for key in vce_metric_keys:
value = school.get(key)
# Check if value is not None and not an empty string (though metrics are likely numeric/None)
if value is not None and str(value).strip() != "":
num_available_metrics += 1
if num_available_metrics in metric_availability_counts:
metric_availability_counts[num_available_metrics] += 1
elif num_available_metrics > 0 : # Should not happen if dict is 0-4
metric_availability_counts[num_available_metrics] = 1
print(f"Schools with all 4 VCE metrics: {metric_availability_counts.get(4, 0)}")
print(f"Schools with 3 VCE metrics: {metric_availability_counts.get(3, 0)}")
print(f"Schools with 2 VCE metrics: {metric_availability_counts.get(2, 0)}")
print(f"Schools with 1 VCE metric: {metric_availability_counts.get(1, 0)}")
print(f"Schools with 0 VCE metrics: {metric_availability_counts.get(0, 0)}")
total_processed_for_stats = sum(metric_availability_counts.values())
print(f"Total non-primary schools processed for stats: {total_processed_for_stats}")
# Optionally, you can save this table to a CSV file
# output_csv_file = "school_metrics_summary.csv"
# try:
# df.to_csv(output_csv_file, index=False, encoding='utf-8')
# print(f"\nTable also saved to {output_csv_file}")
# except Exception as e:
# print(f"\nError saving table to CSV: {e}")
if __name__ == "__main__":
display_school_metrics()