-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsmart.py
More file actions
91 lines (71 loc) · 3.53 KB
/
smart.py
File metadata and controls
91 lines (71 loc) · 3.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import pandas as pd
import numpy as np
# Set random seed for reproducibility
np.random.seed(42)
# Define time range (about 2 months of hourly readings for multiple meters)
date_range = pd.date_range(start="2024-01-01", periods=24 * 60, freq="H") # 60 days
# Define locations (Provinces & Cities)
locations = [
("Gauteng", "Johannesburg", -26.2041, 28.0473),
("Gauteng", "Pretoria", -25.7479, 28.2293),
("Western Cape", "Cape Town", -33.9249, 18.4241),
("KwaZulu-Natal", "Durban", -29.8587, 31.0218),
("Eastern Cape", "Port Elizabeth", -33.9608, 25.6022),
("Mpumalanga", "Nelspruit", -25.4745, 30.9703),
("Free State", "Bloemfontein", -29.0852, 26.1596),
("Limpopo", "Polokwane", -23.9000, 29.4500),
("North West", "Rustenburg", -25.6676, 27.2421),
("Northern Cape", "Kimberley", -28.7282, 24.7491)
]
# Define customer types
customer_types = ["Residential", "Commercial", "Industrial"]
# Number of smart meters to generate data for
num_meters = 1000 # More meters over a longer period to reach 50,000 rows
# Initialize data storage
data = []
for meter_id in range(1, num_meters + 1):
province, city, lat, lon = locations[np.random.randint(0, len(locations))]
customer_type = np.random.choice(customer_types)
for timestamp in date_range:
if len(data) >= 50_000: # Stop when we reach 50,000 rows
break
# Base energy consumption
base_consumption = np.random.uniform(2, 10) if customer_type == "Residential" else \
np.random.uniform(10, 50) if customer_type == "Commercial" else \
np.random.uniform(50, 200)
# Simulating peak hours (morning 6-9 AM, evening 5-9 PM)
hour = timestamp.hour
if hour in [6, 7, 8, 17, 18, 19, 20]:
base_consumption *= np.random.uniform(1.2, 1.5)
# Simulate Load Shedding Impact (20% chance of reduction)
load_shedding = np.random.choice([0, 1], p=[0.8, 0.2])
if load_shedding:
base_consumption *= np.random.uniform(0.3, 0.7)
# Simulate Solar Power (only for Residential & Commercial)
solar_generation = np.random.uniform(0, base_consumption * 0.5) if customer_type != "Industrial" else 0
# Grid Stability Metrics
voltage = np.random.uniform(215, 245) # Normal voltage in SA varies
frequency = np.random.uniform(49.3, 50.7) # Slight frequency variation
power_factor = np.random.uniform(0.85, 1.0) # Efficiency of energy use
# Simulate Fraud Anomalies (5% chance per meter)
fraud = np.random.choice([0, 1], p=[0.95, 0.05])
# Store Data
data.append([
meter_id, timestamp, province, city, lat, lon, customer_type,
round(base_consumption, 2), round(solar_generation, 2), round(voltage, 1),
round(frequency, 2), round(power_factor, 2), fraud, load_shedding
])
# Stop when we reach 50,000 rows
if len(data) >= 50_000:
break
# Convert to DataFrame
columns = [
"Meter ID", "Timestamp", "Province", "City", "Latitude", "Longitude",
"Customer Type", "Energy Consumption (kWh)", "Solar Generation (kWh)",
"Voltage (kV)", "Frequency (Hz)", "Power Factor", "Fraud", "Load Shedding"
]
df_smart_grid = pd.DataFrame(data, columns=columns)
# Save dataset to CSV
df_smart_grid.to_csv("data/smart_meter_grid_south_africa.csv", index=False)
print("✅ Smart Meter Grid Data (50,000 rows) Generated and Saved as 'smart_meter_grid_south_africa_50k.csv'")
print(df_smart_grid.head()) # Display first few rows