-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathanalyze_data_types.py
More file actions
127 lines (104 loc) · 5 KB
/
analyze_data_types.py
File metadata and controls
127 lines (104 loc) · 5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#!/usr/bin/env python3
"""
Analyze data types in the my_points table to see if they need to be separated
"""
import asyncio
import asyncpg
import json
from collections import Counter
async def analyze_data_types():
"""Analyze the data types in my_points table"""
# Database connection
conn = await asyncpg.connect(
host="localhost",
port=5434,
user="postgres",
password="postgres",
database="gis_db"
)
try:
# Get all data from my_points table
rows = await conn.fetch("""
SELECT gid, bcnshp, catspm, colour, colpat, objnam, nobjnm,
txtdsc, ntxtds, inform, ninfom, status, condtn
FROM my_points
ORDER BY gid
""")
print("🔍 تحلیل انواع دادهها در جدول my_points")
print("=" * 60)
print(f"تعداد کل رکوردها: {len(rows)}")
print()
# Analyze bcnshp (beacon shape) values
bcnshp_values = [row['bcnshp'] for row in rows if row['bcnshp'] is not None]
bcnshp_counter = Counter(bcnshp_values)
print("📊 انواع فانوس دریایی (bcnshp):")
for value, count in bcnshp_counter.most_common():
print(f" - نوع {value}: {count} مورد")
print()
# Analyze catspm (category) values
catspm_values = [row['catspm'] for row in rows if row['catspm'] is not None]
catspm_counter = Counter(catspm_values)
if catspm_counter:
print("📊 دستهبندی (catspm):")
for value, count in catspm_counter.most_common():
print(f" - {value}: {count} مورد")
print()
# Analyze objnam (object name) values
objnam_values = [row['objnam'] for row in rows if row['objnam'] is not None]
objnam_counter = Counter(objnam_values)
if objnam_counter:
print("📊 نام اشیاء (objnam):")
for value, count in objnam_counter.most_common():
print(f" - {value}: {count} مورد")
print()
# Analyze txtdsc (text description) values
txtdsc_values = [row['txtdsc'] for row in rows if row['txtdsc'] is not None]
txtdsc_counter = Counter(txtdsc_values)
if txtdsc_counter:
print("📊 توضیحات متنی (txtdsc):")
for value, count in txtdsc_counter.most_common():
print(f" - {value}: {count} مورد")
print()
# Show sample records for each bcnshp type
print("🔍 نمونه رکوردها برای هر نوع:")
for bcnshp_type in sorted(bcnshp_counter.keys()):
print(f"\n--- نوع فانوس {bcnshp_type} ---")
sample_rows = [row for row in rows if row['bcnshp'] == bcnshp_type][:3]
for row in sample_rows:
print(f" GID {row['gid']}:")
print(f" - bcnshp: {row['bcnshp']}")
print(f" - catspm: {row['catspm']}")
print(f" - objnam: {row['objnam']}")
print(f" - txtdsc: {row['txtdsc']}")
print(f" - status: {row['status']}")
print()
# Check if data should be separated
print("🤔 آیا دادهها باید تقسیم شوند؟")
print("=" * 40)
if len(bcnshp_counter) > 1:
print("✅ بله! دادهها بر اساس bcnshp (نوع فانوس) قابل تقسیم هستند:")
for value, count in bcnshp_counter.items():
print(f" - نوع {value}: {count} مورد")
else:
print("❌ خیر، همه دادهها از یک نوع هستند")
if len(catspm_counter) > 1:
print("✅ دادهها بر اساس catspm (دستهبندی) نیز قابل تقسیم هستند:")
for value, count in catspm_counter.items():
print(f" - {value}: {count} مورد")
# Suggest separation strategy
print("\n💡 پیشنهاد تقسیمبندی:")
print("=" * 30)
if len(bcnshp_counter) > 1:
print("1. تقسیم بر اساس bcnshp (نوع فانوس):")
for value, count in bcnshp_counter.items():
table_name = f"beacon_type_{int(value)}" if value else "beacon_unknown"
print(f" - جدول: {table_name} ({count} مورد)")
if len(catspm_counter) > 1:
print("2. تقسیم بر اساس catspm (دستهبندی):")
for value, count in catspm_counter.items():
table_name = f"category_{value.lower().replace(' ', '_')}" if value else "category_unknown"
print(f" - جدول: {table_name} ({count} مورد)")
finally:
await conn.close()
if __name__ == "__main__":
asyncio.run(analyze_data_types())