Skip to content

Commit fa43505

Browse files
authored
Improve efficiency of Data Toolkit availability profile creation (#1373)
1 parent 9476db5 commit fa43505

1 file changed

Lines changed: 74 additions & 61 deletions

File tree

data_toolkit/project/availability/outages/create_availability_iteration_input_csvs.py

Lines changed: 74 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -481,38 +481,64 @@ def simulate_unit_outages(
481481
return outage_adjustment
482482

483483

484-
def simulate_project_availability_pool(pool_datum):
484+
def simulate_all_project_iterations(pool_datum):
485485
"""
486-
Helper function to easily pass to pool.map if solving subproblems in
487-
parallel
486+
Helper function to simulate all iterations for a single project.
487+
This allows parallelization by project rather than by project-iteration.
488488
"""
489489
[
490-
project_df,
490+
conn_string,
491491
project,
492-
iteration_n,
492+
n_iterations,
493493
user_provided_seeding,
494-
project_iteration_seed,
494+
starting_project_iteration_seed,
495495
max_integer_for_unit_outage_seeding,
496496
hyb_stor_seed_unit_increment,
497497
stage_id,
498498
study_year,
499499
filepath,
500500
print_ones,
501+
historical_data,
501502
] = pool_datum
502503

503-
simulate_project_availability(
504-
project_df=project_df,
505-
project=project,
506-
iteration_n=iteration_n,
507-
user_provided_seeding=user_provided_seeding,
508-
project_iteration_seed=project_iteration_seed,
509-
max_integer_for_unit_outage_seeding=max_integer_for_unit_outage_seeding,
510-
hyb_stor_seed_unit_increment=hyb_stor_seed_unit_increment,
511-
stage_id=stage_id,
512-
study_year=study_year,
513-
filepath=filepath,
514-
print_ones=print_ones,
515-
)
504+
# Reconnect to database in this process
505+
conn = connect_to_database(conn_string)
506+
507+
# Loop through all iterations for this project
508+
project_iteration_seed = starting_project_iteration_seed
509+
for iteration_n in range(1, n_iterations + 1):
510+
project_df = pd.read_sql(
511+
f"""
512+
SELECT * FROM raw_data_unit_availability_params
513+
WHERE project = '{project}'
514+
;""",
515+
conn,
516+
)
517+
518+
simulate_project_availability(
519+
project_df=project_df,
520+
project=project,
521+
iteration_n=iteration_n,
522+
user_provided_seeding=user_provided_seeding,
523+
project_iteration_seed=(
524+
project_iteration_seed if user_provided_seeding else None
525+
),
526+
max_integer_for_unit_outage_seeding=(
527+
max_integer_for_unit_outage_seeding if user_provided_seeding else None
528+
),
529+
hyb_stor_seed_unit_increment=(
530+
hyb_stor_seed_unit_increment if user_provided_seeding else None
531+
),
532+
stage_id=stage_id,
533+
study_year=study_year,
534+
filepath=filepath,
535+
print_ones=print_ones,
536+
historical_data=historical_data,
537+
)
538+
539+
project_iteration_seed += 1
540+
541+
conn.close()
516542

517543

518544
def sort_csv_file(filepath, columns_to_sort_by, ascending):
@@ -575,7 +601,12 @@ def main(args=None):
575601
pool_data = []
576602
project_iteration_seed = int(parsed_args.starting_project_iteration_seed)
577603
hyb_stor_seed_unit_increment = int(parsed_args.hybrid_storage_seed_increment)
578-
for project in projects:
604+
n_iterations = int(parsed_args.n_iterations)
605+
606+
# Calculate total iterations per project for seed incrementing
607+
iterations_per_project = n_iterations
608+
609+
for project_idx, project in enumerate(projects):
579610
# Write header if we are overwriting the file or it doesn't exist
580611
overwrite = parsed_args.overwrite
581612
header = [
@@ -597,54 +628,36 @@ def main(args=None):
597628
csvwriter = csv.writer(f)
598629
csvwriter.writerow(header)
599630

600-
# Create iteration seeds
601-
for iteration_n in range(1, int(parsed_args.n_iterations) + 1):
602-
project_df = pd.read_sql(
603-
f"""
604-
SELECT * FROM raw_data_unit_availability_params
605-
WHERE project = '{project}'
606-
;""",
607-
conn,
608-
)
609-
610-
# Pass user provided seed values if user_provide_seeding
611-
# requested; otherwise, pass None
612-
pool_data.append(
613-
[
614-
project_df,
615-
project,
616-
iteration_n,
617-
parsed_args.user_provided_seeding,
618-
(
619-
project_iteration_seed
620-
if parsed_args.user_provided_seeding
621-
else None
622-
),
623-
(
624-
parsed_args.max_integer_for_unit_outage_seeding
625-
if parsed_args.user_provided_seeding
626-
else None
627-
),
628-
(
629-
hyb_stor_seed_unit_increment
630-
if parsed_args.user_provided_seeding
631-
else None
632-
),
633-
parsed_args.stage_id,
634-
int(parsed_args.study_year),
635-
filepath,
636-
parsed_args.print_ones,
637-
]
638-
)
631+
# Calculate starting seed for this project
632+
starting_seed_for_project = (
633+
int(parsed_args.starting_project_iteration_seed)
634+
+ project_idx * iterations_per_project
635+
)
639636

640-
project_iteration_seed += 1
637+
# Create pool data entry for this project (all iterations)
638+
pool_data.append(
639+
[
640+
parsed_args.database,
641+
project,
642+
n_iterations,
643+
parsed_args.user_provided_seeding,
644+
starting_seed_for_project,
645+
parsed_args.max_integer_for_unit_outage_seeding,
646+
hyb_stor_seed_unit_increment,
647+
parsed_args.stage_id,
648+
int(parsed_args.study_year),
649+
filepath,
650+
parsed_args.print_ones,
651+
historical_data,
652+
]
653+
)
641654

642655
pool_data = tuple(pool_data)
643656

644657
# Pool must use spawn to work properly on Linux
645658
pool = get_context("spawn").Pool(int(parsed_args.n_parallel_projects))
646659

647-
pool.map(simulate_project_availability_pool, pool_data)
660+
pool.map(simulate_all_project_iterations, pool_data)
648661
pool.close()
649662

650663
# Sort the resulting CSV file if requested

0 commit comments

Comments
 (0)