-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathprecompute_depth.py
More file actions
109 lines (87 loc) · 3.56 KB
/
precompute_depth.py
File metadata and controls
109 lines (87 loc) · 3.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#!/usr/bin/env python3
"""
Pre-compute depth analysis data and save as JSON
This runs once and caches the results
"""
import json
import sys
import os
def find_deepest_path(item):
path = []
designation = item.get('designations', [{}])[0].get('designation', 'Unnamed Element')
path.append({'type': 'Element', 'name': designation})
steward = item.get('stewardOrg', {}).get('name', 'Unknown Steward')
path.append({'type': 'Steward', 'name': steward})
if item.get('classification'):
for cls in item['classification']:
if cls.get('elements'):
for elem in cls['elements']:
if elem.get('name'):
path.append({'type': 'Classification', 'name': elem['name']})
if item.get('dataElementConcept') and item['dataElementConcept'].get('concepts'):
for concept in item['dataElementConcept']['concepts']:
path.append({'type': 'Concept', 'name': concept.get('name', 'Unknown')})
if item.get('property') and item['property'].get('concepts'):
for prop in item['property']['concepts']:
path.append({'type': 'Property', 'name': prop.get('name', 'Unknown')})
return len(path), path
def main():
print("Loading SearchExport.json...")
with open('SearchExport.json', 'r', encoding='utf-8') as f:
data = json.load(f)
print(f"Analyzing {len(data)} items...")
max_depth = 0
deepest_paths = []
depth_distribution = {}
all_paths = []
for i, item in enumerate(data):
depth, path = find_deepest_path(item)
designation = item.get('designations', [{}])[0].get('designation', 'Unnamed Element')
path_data = {
'index': i,
'depth': depth,
'designation': designation,
'path': path
}
all_paths.append(path_data)
if depth > max_depth:
max_depth = depth
deepest_paths = [path_data]
elif depth == max_depth:
deepest_paths.append(path_data)
depth_distribution[depth] = depth_distribution.get(depth, 0) + 1
if (i + 1) % 5000 == 0:
print(f" Processed {i + 1}/{len(data)} items...")
# Sort all paths by depth descending, take top 50
all_paths.sort(key=lambda x: x['depth'], reverse=True)
top_paths = all_paths[:50]
# Calculate average depth
total_depth = sum(d * count for d, count in depth_distribution.items())
total_items = len(data)
avg_depth = total_depth / total_items if total_items > 0 else 0
# Prepare chart data
depths = sorted(depth_distribution.keys())
chart_labels = [f'Depth {d}' for d in depths]
chart_data = [depth_distribution[d] for d in depths]
result = {
'total_items': total_items,
'max_depth': max_depth,
'avg_depth': round(avg_depth, 2),
'deepest_count': len(deepest_paths),
'depth_distribution': depth_distribution,
'chart_labels': chart_labels,
'chart_data': chart_data,
'deepest_paths': top_paths
}
# Save to file
output_file = 'depth_analysis_data.json'
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(result, f, indent=2, ensure_ascii=False)
print(f"\nAnalysis complete!")
print(f" Total items: {total_items:,}")
print(f" Max depth: {max_depth}")
print(f" Avg depth: {avg_depth:.2f}")
print(f" Items at max depth: {len(deepest_paths)}")
print(f"\nSaved to: {output_file}")
if __name__ == '__main__':
main()