Skip to content

Commit 9476db5

Browse files
authored
Support for drawing availability profiles from historical data (#1372)
1 parent 3db0cf5 commit 9476db5

1 file changed

Lines changed: 82 additions & 5 deletions

File tree

data_toolkit/project/availability/outages/create_availability_iteration_input_csvs.py

Lines changed: 82 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# Copyright 2016-2025 Blue Marble Analytics LLC.
2+
# Copyright 2026 Sylvan Energy Analytics LLC.
23
#
34
# Licensed under the Apache License, Version 2.0 (the "License");
45
# you may not use this file except in compliance with the License.
@@ -74,14 +75,24 @@ def parse_arguments(args):
7475

7576
parser.add_argument("-db", "--database", default="../../io.db")
7677
parser.add_argument(
77-
"-csv",
78-
"--input_csv",
78+
"-o_csv",
79+
"--outage_params_input_csv",
7980
default=None,
8081
help="""Path to the unit availability params CSV file to load into the
8182
raw_data_unit_availability_params table in the database. If not
8283
specified, data will be assumed to have been already loaded into the
8384
database.""",
8485
)
86+
parser.add_argument(
87+
"-hist_csv",
88+
"--historical_availability_csv",
89+
default=None,
90+
help="""Path to the historical availability data CSV file to load for the
91+
historical_year outage model. Expected columns: year, month, day_of_month,
92+
hour_of_day, unit, derate. Each unit can have multiple years of hourly
93+
derate data. If not specified, model will not be able to use historical_year
94+
outage_model.""",
95+
)
8596
parser.add_argument("-stage", "--stage_id", default=1, help="Defaults to 1.")
8697
parser.add_argument("-n_iter", "--n_iterations")
8798
parser.add_argument(
@@ -188,6 +199,7 @@ def get_weighted_availability_adjustment(
188199
project_iteration_seed,
189200
max_integer_for_unit_outage_seeding,
190201
hyb_stor_seed_unit_increment,
202+
historical_data=None,
191203
):
192204
project_outage_adjustment = []
193205
project_hyb_stor_outage_adjustment = []
@@ -227,6 +239,8 @@ def get_weighted_availability_adjustment(
227239
mttr=unit_mttr,
228240
n_units=n_units,
229241
unit_seed=unit_seed,
242+
historical_data=historical_data,
243+
unit=unit,
230244
)
231245

232246
# Get the project outage
@@ -246,6 +260,8 @@ def get_weighted_availability_adjustment(
246260
if unit_seed is None
247261
else unit_seed + hyb_stor_seed_unit_increment
248262
),
263+
historical_data=historical_data,
264+
unit=unit,
249265
)
250266
project_hyb_stor_outage_adjustment.append(
251267
unit_outage_adjustment * unit_weight
@@ -277,8 +293,10 @@ def simulate_project_availability(
277293
study_year,
278294
filepath,
279295
print_ones,
296+
historical_data=None,
280297
):
281298

299+
# print(f"Simulating project {project}, iteration {iteration_n}")
282300
stage_tmp_dict = get_temporal_structure(study_year)
283301

284302
# No stage simulation at this point; assume single stage
@@ -291,6 +309,7 @@ def simulate_project_availability(
291309
project_iteration_seed=project_iteration_seed,
292310
max_integer_for_unit_outage_seeding=max_integer_for_unit_outage_seeding,
293311
hyb_stor_seed_unit_increment=hyb_stor_seed_unit_increment,
312+
historical_data=historical_data,
294313
)
295314

296315
export_df = pd.DataFrame(
@@ -344,16 +363,23 @@ def simulate_unit_outages(
344363
unit_seed,
345364
dt=1,
346365
starting_outage_states=None,
366+
historical_data=None,
367+
unit=None,
347368
):
348369
"""
349-
outage_model: ["Derate", "MC_independent", "MC_sequential"]
370+
outage_model: ["Derate", "MC_independent", "MC_sequential", "historical_year"]
350371
FOR: numpy array with the length of the simulation window and the FOR as
351372
value; note this can vary by timepoint
352373
N_units: integer, number of units modeled
353374
starting_outage_states: array with the starting outage state (1/0) for
354375
each of the N units
355376
dt: outage timestep length
377+
historical_data: dict with unit names as keys and DataFrames containing
378+
historical availability data with columns: year, month, day_of_month,
379+
hour_of_day, unit, derate (for historical_year model)
380+
unit: unit name (for historical_year model)
356381
"""
382+
# print(f"Simulating unit outages for {unit}... Outage model: {outage_model}")
357383
# Seed the simulation if requested
358384
if unit_seed is not None:
359385
np.random.seed(unit_seed)
@@ -411,6 +437,42 @@ def simulate_unit_outages(
411437
availability[t, :] = avail_tmp
412438
avail_last = avail_tmp
413439

440+
elif outage_model == "historical_year":
441+
# Sample a random year from historical data for this unit
442+
if historical_data is None or unit not in historical_data:
443+
raise ValueError(
444+
f"Historical data not provided for unit {unit}. "
445+
"Please provide historical_availability_csv."
446+
)
447+
448+
unit_hist_data = historical_data[unit]
449+
450+
# Get list of unique years in the historical data
451+
available_years = unit_hist_data["year"].unique()
452+
453+
if len(available_years) == 0:
454+
raise ValueError(f"No historical years found for unit {unit}.")
455+
456+
# Randomly select a year
457+
selected_year = np.random.choice(available_years)
458+
459+
# Get the derate values for the selected year
460+
# Data is already sorted by year, month, day_of_month, hour_of_day
461+
year_data = unit_hist_data[unit_hist_data["year"] == selected_year]
462+
derate_values = year_data["value"].values
463+
464+
# Check if we have the right number of hours
465+
if len(derate_values) != len(for_array):
466+
raise ValueError(
467+
f"Historical data for unit {unit}, year {selected_year} "
468+
f"has {len(derate_values)} hours, but expected {len(for_array)}."
469+
)
470+
471+
# Create availability array - replicate the same derate pattern for each
472+
# of the n_units (this represents multiple identical units with the same
473+
# historical outage pattern)
474+
availability = np.outer(derate_values, np.ones(n_units))
475+
414476
else:
415477
availability = np.ones([len(for_array), n_units])
416478

@@ -478,13 +540,28 @@ def main(args=None):
478540
conn = connect_to_database(parsed_args.database)
479541

480542
# ### Load data from CSV
481-
if parsed_args.input_csv is not None:
543+
if parsed_args.outage_params_input_csv is not None:
482544
read_and_import_csv(
483545
conn=conn,
484-
f_path=parsed_args.input_csv,
546+
f_path=parsed_args.outage_params_input_csv,
485547
table="raw_data_unit_availability_params",
486548
)
487549

550+
# Load historical availability data if provided
551+
# Not loaded into the database for now
552+
historical_data = None
553+
if parsed_args.historical_availability_csv is not None:
554+
# Read the CSV file directly into a DataFrame
555+
hist_df = pd.read_csv(parsed_args.historical_availability_csv)
556+
557+
# Expected columns: year, month, day_of_month, hour_of_day, unit, derate
558+
# Create an hour index for ordering (1-8760 or 1-8784 for leap years)
559+
# Sort by temporal columns to ensure proper ordering
560+
hist_df = hist_df.sort_values(["year", "month", "day_of_month", "hour_of_day"])
561+
562+
# Group by unit for unit-level access
563+
historical_data = {unit: group for unit, group in hist_df.groupby("unit")}
564+
488565
# Make out directory if it doesn't exist
489566
if not os.path.exists(parsed_args.output_directory):
490567
os.makedirs(parsed_args.output_directory)

0 commit comments

Comments
 (0)