-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcount_database_entries.py
More file actions
94 lines (72 loc) · 3.18 KB
/
count_database_entries.py
File metadata and controls
94 lines (72 loc) · 3.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import json
import os
import re
def count_schools():
"""Count the number of schools in school_data_final.json"""
school_data_file = os.path.join('data', 'school_data_final.json')
try:
with open(school_data_file, 'r', encoding='utf-8') as f:
data = json.load(f)
schools = data.get('schools', [])
school_count = len(schools)
print(f"\nNumber of schools in database: {school_count}")
# Count schools by type
school_types = {}
for school in schools:
school_type = school.get('type', 'Unknown')
school_types[school_type] = school_types.get(school_type, 0) + 1
print("\nSchools by type:")
for school_type, count in school_types.items():
print(f" {school_type}: {count}")
return school_count
except Exception as e:
print(f"Error counting schools: {e}")
return 0
def count_suburbs():
"""Count the number of suburbs in geocode-db.js"""
geocode_db_file = os.path.join('js', 'geocode-db.js')
try:
with open(geocode_db_file, 'r', encoding='utf-8') as f:
content = f.read()
# Find the start and end of the victorianSuburbsDB object
start_marker = "const victorianSuburbsDB = {"
end_marker = "};\n\n// Function to get coordinates"
start_idx = content.find(start_marker) + len(start_marker)
end_idx = content.find(end_marker)
if start_idx == -1 or end_idx == -1:
print("Could not find victorianSuburbsDB in geocode-db.js")
return 0
db_content = content[start_idx:end_idx].strip()
# Count the number of suburb entries
suburb_entries = re.findall(r"'[^']+': \{ lat:", db_content)
suburb_count = len(suburb_entries)
print(f"\nNumber of suburbs in database: {suburb_count}")
# Count suburbs by postcode prefix
postcode_regions = {}
postcode_pattern = re.compile(r"postcode: '(\d{4})'")
for line in db_content.split('\n'):
match = postcode_pattern.search(line)
if match:
postcode = match.group(1)
prefix = postcode[:1] # First digit represents the state
postcode_regions[prefix] = postcode_regions.get(prefix, 0) + 1
print("\nSuburbs by postcode region:")
for prefix, count in postcode_regions.items():
region_name = "Victoria" if prefix == "3" else f"Region {prefix}"
print(f" {region_name}: {count}")
return suburb_count
except Exception as e:
print(f"Error counting suburbs: {e}")
return 0
def main():
print("=== Schoolify Database Statistics ===")
school_count = count_schools()
suburb_count = count_suburbs()
print("\n=== Summary ===")
print(f"Total schools: {school_count}")
print(f"Total suburbs: {suburb_count}")
print("\nThis count was performed by examining:")
print("- data/school_data_final.json for schools")
print("- js/geocode-db.js for suburbs")
if __name__ == "__main__":
main()