11# Copyright 2016-2025 Blue Marble Analytics LLC.
2+ # Copyright 2026 Sylvan Energy Analytics LLC.
23#
34# Licensed under the Apache License, Version 2.0 (the "License");
45# you may not use this file except in compliance with the License.
@@ -74,14 +75,24 @@ def parse_arguments(args):
7475
7576 parser .add_argument ("-db" , "--database" , default = "../../io.db" )
7677 parser .add_argument (
77- "-csv " ,
78- "--input_csv " ,
78+ "-o_csv " ,
79+ "--outage_params_input_csv " ,
7980 default = None ,
8081 help = """Path to the unit availability params CSV file to load into the
8182 raw_data_unit_availability_params table in the database. If not
8283 specified, data will be assumed to have been already loaded into the
8384 database.""" ,
8485 )
86+ parser .add_argument (
87+ "-hist_csv" ,
88+ "--historical_availability_csv" ,
89+ default = None ,
90+ help = """Path to the historical availability data CSV file to load for the
91+ historical_year outage model. Expected columns: year, month, day_of_month,
92+ hour_of_day, unit, derate. Each unit can have multiple years of hourly
93+ derate data. If not specified, model will not be able to use historical_year
94+ outage_model.""" ,
95+ )
8596 parser .add_argument ("-stage" , "--stage_id" , default = 1 , help = "Defaults to 1." )
8697 parser .add_argument ("-n_iter" , "--n_iterations" )
8798 parser .add_argument (
@@ -188,6 +199,7 @@ def get_weighted_availability_adjustment(
188199 project_iteration_seed ,
189200 max_integer_for_unit_outage_seeding ,
190201 hyb_stor_seed_unit_increment ,
202+ historical_data = None ,
191203):
192204 project_outage_adjustment = []
193205 project_hyb_stor_outage_adjustment = []
@@ -227,6 +239,8 @@ def get_weighted_availability_adjustment(
227239 mttr = unit_mttr ,
228240 n_units = n_units ,
229241 unit_seed = unit_seed ,
242+ historical_data = historical_data ,
243+ unit = unit ,
230244 )
231245
232246 # Get the project outage
@@ -246,6 +260,8 @@ def get_weighted_availability_adjustment(
246260 if unit_seed is None
247261 else unit_seed + hyb_stor_seed_unit_increment
248262 ),
263+ historical_data = historical_data ,
264+ unit = unit ,
249265 )
250266 project_hyb_stor_outage_adjustment .append (
251267 unit_outage_adjustment * unit_weight
@@ -277,8 +293,10 @@ def simulate_project_availability(
277293 study_year ,
278294 filepath ,
279295 print_ones ,
296+ historical_data = None ,
280297):
281298
299+ # print(f"Simulating project {project}, iteration {iteration_n}")
282300 stage_tmp_dict = get_temporal_structure (study_year )
283301
284302 # No stage simulation at this point; assume single stage
@@ -291,6 +309,7 @@ def simulate_project_availability(
291309 project_iteration_seed = project_iteration_seed ,
292310 max_integer_for_unit_outage_seeding = max_integer_for_unit_outage_seeding ,
293311 hyb_stor_seed_unit_increment = hyb_stor_seed_unit_increment ,
312+ historical_data = historical_data ,
294313 )
295314
296315 export_df = pd .DataFrame (
@@ -344,16 +363,23 @@ def simulate_unit_outages(
344363 unit_seed ,
345364 dt = 1 ,
346365 starting_outage_states = None ,
366+ historical_data = None ,
367+ unit = None ,
347368):
348369 """
349- outage_model: ["Derate", "MC_independent", "MC_sequential"]
370+ outage_model: ["Derate", "MC_independent", "MC_sequential", "historical_year" ]
350371 FOR: numpy array with the length of the simulation window and the FOR as
351372 value; note this can vary by timepoint
352373 N_units: integer, number of units modeled
353374 starting_outage_states: array with the starting outage state (1/0) for
354375 each of the N units
355376 dt: outage timestep length
377+ historical_data: dict with unit names as keys and DataFrames containing
378+ historical availability data with columns: year, month, day_of_month,
379+ hour_of_day, unit, derate (for historical_year model)
380+ unit: unit name (for historical_year model)
356381 """
382+ # print(f"Simulating unit outages for {unit}... Outage model: {outage_model}")
357383 # Seed the simulation if requested
358384 if unit_seed is not None :
359385 np .random .seed (unit_seed )
@@ -411,6 +437,42 @@ def simulate_unit_outages(
411437 availability [t , :] = avail_tmp
412438 avail_last = avail_tmp
413439
440+ elif outage_model == "historical_year" :
441+ # Sample a random year from historical data for this unit
442+ if historical_data is None or unit not in historical_data :
443+ raise ValueError (
444+ f"Historical data not provided for unit { unit } . "
445+ "Please provide historical_availability_csv."
446+ )
447+
448+ unit_hist_data = historical_data [unit ]
449+
450+ # Get list of unique years in the historical data
451+ available_years = unit_hist_data ["year" ].unique ()
452+
453+ if len (available_years ) == 0 :
454+ raise ValueError (f"No historical years found for unit { unit } ." )
455+
456+ # Randomly select a year
457+ selected_year = np .random .choice (available_years )
458+
459+ # Get the derate values for the selected year
460+ # Data is already sorted by year, month, day_of_month, hour_of_day
461+ year_data = unit_hist_data [unit_hist_data ["year" ] == selected_year ]
462+ derate_values = year_data ["value" ].values
463+
464+ # Check if we have the right number of hours
465+ if len (derate_values ) != len (for_array ):
466+ raise ValueError (
467+ f"Historical data for unit { unit } , year { selected_year } "
468+ f"has { len (derate_values )} hours, but expected { len (for_array )} ."
469+ )
470+
471+ # Create availability array - replicate the same derate pattern for each
472+ # of the n_units (this represents multiple identical units with the same
473+ # historical outage pattern)
474+ availability = np .outer (derate_values , np .ones (n_units ))
475+
414476 else :
415477 availability = np .ones ([len (for_array ), n_units ])
416478
@@ -478,13 +540,28 @@ def main(args=None):
478540 conn = connect_to_database (parsed_args .database )
479541
480542 # ### Load data from CSV
481- if parsed_args .input_csv is not None :
543+ if parsed_args .outage_params_input_csv is not None :
482544 read_and_import_csv (
483545 conn = conn ,
484- f_path = parsed_args .input_csv ,
546+ f_path = parsed_args .outage_params_input_csv ,
485547 table = "raw_data_unit_availability_params" ,
486548 )
487549
550+ # Load historical availability data if provided
551+ # Not loaded into the database for now
552+ historical_data = None
553+ if parsed_args .historical_availability_csv is not None :
554+ # Read the CSV file directly into a DataFrame
555+ hist_df = pd .read_csv (parsed_args .historical_availability_csv )
556+
557+ # Expected columns: year, month, day_of_month, hour_of_day, unit, derate
558+ # Create an hour index for ordering (1-8760 or 1-8784 for leap years)
559+ # Sort by temporal columns to ensure proper ordering
560+ hist_df = hist_df .sort_values (["year" , "month" , "day_of_month" , "hour_of_day" ])
561+
562+ # Group by unit for unit-level access
563+ historical_data = {unit : group for unit , group in hist_df .groupby ("unit" )}
564+
488565 # Make out directory if it doesn't exist
489566 if not os .path .exists (parsed_args .output_directory ):
490567 os .makedirs (parsed_args .output_directory )
0 commit comments