From 8c6464180abd25e8a4a642a01ebb78a2fb6f3358 Mon Sep 17 00:00:00 2001 From: Jon Date: Wed, 29 Jun 2022 00:21:24 +0000 Subject: [PATCH 001/165] Skeleton of orchestrator entrypoint --- adaptive_scheduler/simulation/__init__.py | 0 adaptive_scheduler/simulation/orchestrator.py | 137 ++++++++++++++++++ 2 files changed, 137 insertions(+) create mode 100644 adaptive_scheduler/simulation/__init__.py create mode 100644 adaptive_scheduler/simulation/orchestrator.py diff --git a/adaptive_scheduler/simulation/__init__.py b/adaptive_scheduler/simulation/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/adaptive_scheduler/simulation/orchestrator.py b/adaptive_scheduler/simulation/orchestrator.py new file mode 100644 index 00000000..39b04727 --- /dev/null +++ b/adaptive_scheduler/simulation/orchestrator.py @@ -0,0 +1,137 @@ +''' +Entrypoint to run the scheduler in simulation mode. + +This orchestrator will setup the simulation from environment variables specified, +run the scheduling loop one or more times, and record metrics about a single run or +the full simulation within an OpenSearch index. The orchestrator will also handle +advancing time and input when simulating over a period of time. +''' + +import logging +import sys +import os +from datetime import datetime, timedelta + +from opensearchpy import OpenSearch +from lcogt_logging import LCOGTFormatter +from dateutil.parser import parse + +from adaptive_scheduler.eventbus import get_eventbus +from adaptive_scheduler.kernel.fullscheduler_ortoolkit import FullScheduler_ortoolkit +from adaptive_scheduler.monitoring.network_status import Network +from adaptive_scheduler.interfaces import NetworkInterface +from adaptive_scheduler.observations import ObservationScheduleInterface +from adaptive_scheduler.observation_portal_connections import ObservationPortalInterface +from adaptive_scheduler.configdb_connections import ConfigDBInterface +from adaptive_scheduler.scheduler import LCOGTNetworkScheduler, SchedulerRunner +from adaptive_scheduler.scheduler_input import ( + SchedulingInputFactory, SchedulingInputProvider, SchedulerParameters +) + +log = logging.getLogger('adaptive_scheduler') + +# Some Environment Variable settings for the simulation +RUN_ID = os.getenv("SIMULATION_RUN_ID", "1") +START_TIME = parse(os.getenv("SIMULATION_START_TIME", "2022-06-23")) +END_TIME = parse(os.getenv("SIMULATION_END_TIME", "2022-07-07")) +TIME_STEP = os.getenv("SIMULATION_TIME_STEP_MINUTES", "60") + + +def setup_logging(): + log = logging.getLogger('adaptive_scheduler') + log.setLevel(logging.INFO) + log.propagate = False + + sh = logging.StreamHandler() + sh.setLevel(logging.DEBUG) + + formatter = LCOGTFormatter() + + sh.setFormatter(formatter) + log.addHandler(sh) + + +def setup_input(current_time): + # This will eventually call endpoint in configdb and the observation portal to setup the input state of those + # source based on the current timestamp of the scheduling run. For configdb, this involves playing the records + # backwards until the time is reached. For the observation portal, it involves pulling over all requests + # created and PENDING at a certain point in time for the semester, which should be doable by looking at the created + # and modified timestamps and state. + pass + +def increment_input(current_time, time_step): + # This will eventually call endpoints in configdb and the observation portal to increment the state of them forward + # by the time step specified. Incrementing time forward is slightly different then the initial setup of a starting time. + # This will be called as you step forward in time to make sure these data sources contain the right input data. + # For configdb, this involves moving the records back forwards a bit. For the observation portal, it involves pulling + # down newer requests as well as cleaning up the state of old ones between time steps (completing/expiring as appropriate). + # This also means that we should complete and fail the right percentages of observations that should have ended within the last + # time_step, and set ones that are in progress to ATTEMPTED state. + pass + +def send_to_opensearch(metrics): + # Send the json metrics to the opensearch index + pass + + +def record_metrics(normal_scheduled_requests_by_rg_id, rr_scheduled_requests_by_rg_id): + # Derive whatever metrics we want using the supplied scheduled requests and send them to opensearch here + + metrics = { + 'simulation_id': RUN_ID, + 'metric1': 'value' + } + send_to_opensearch(metrics) + + +def main(argv=None): + # Get all scheduler params from environment variables + sched_params = SchedulerParameters() + + # Set up and configure an application scope logger + setup_logging() + + log.info(f"Starting Scheduler Simulator with id {RUN_ID}") + + # All this setup is the same as the normal scheduling run - things will be setup based on the + # scheduler environment variables set. + event_bus = get_eventbus() + schedule_interface = ObservationScheduleInterface(host=sched_params.observation_portal_url) + observation_portal_interface = ObservationPortalInterface(sched_params.observation_portal_url) + # TODO: If there is a configuration override file detected then incorporate that into the configdb_interface + configdb_interface = ConfigDBInterface(configdb_url=sched_params.configdb_url, telescope_classes=sched_params.telescope_classes) + network_state_interface = Network(configdb_interface, sched_params) + network_interface = NetworkInterface(schedule_interface, observation_portal_interface, network_state_interface, + configdb_interface) + kernel_class = FullScheduler_ortoolkit + network_model = configdb_interface.get_telescope_info() + scheduler = LCOGTNetworkScheduler(kernel_class, sched_params, event_bus, network_model) + input_provider = SchedulingInputProvider(sched_params, network_interface, network_model, is_rr_input=True) + input_factory = SchedulingInputFactory(input_provider) + + # Set the scheduler to run once each time it is invoked. + sched_params.run_once = True + + # Basic orchestrator loop here: setup input, run scheduler, record metrics, step forward time, repeat + current_time = START_TIME + # Setup the input from configdb and observation portal using the current time + setup_input(current_time) + while current_time <= END_TIME: + sched_params.simulate_now = current_time.isoformat() + + # Scheduler run is invoked in the normal way, but it will just run a single time + scheduler_runner = SchedulerRunner(sched_params, scheduler, network_interface, network_model, input_factory) + scheduler_runner.run() + + # Output scheduled requests are available within the runner after it completes a run + # These are used to seed a warm start solution for the next run in the normal scheduler, but can be used to generate metrics here + rr_scheduled_requests_by_rg_id = scheduler_runner.rr_scheduled_requests_by_rg + normal_scheduled_requests_by_rg_id = scheduler_runner.normal_scheduled_requests_by_rg + record_metrics(normal_scheduled_requests_by_rg_id, rr_scheduled_requests_by_rg_id) + + current_time += timedelta(minutes=TIME_STEP) + increment_input(current_time, TIME_STEP) + + +if __name__ == '__main__': + main(sys.argv[1:]) From cb86ef1876d2ee64f6d58e9c0f52629274e189f6 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Wed, 29 Jun 2022 22:27:33 +0000 Subject: [PATCH 002/165] added total scheduled time metric --- adaptive_scheduler/simulation/orchestrator.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/adaptive_scheduler/simulation/orchestrator.py b/adaptive_scheduler/simulation/orchestrator.py index 39b04727..8f609154 100644 --- a/adaptive_scheduler/simulation/orchestrator.py +++ b/adaptive_scheduler/simulation/orchestrator.py @@ -74,12 +74,24 @@ def send_to_opensearch(metrics): pass +def total_scheduled_time(normal_scheduled_requests_by_rg_id, rr_scheduled_requests_by_rg_id): + # Sums the duration of all scheduled requests + all_scheduled_requests_by_rg_id = normal_scheduled_requests_by_rg_id.update(rr_scheduled_requests_by_rg_id) + total_scheduled_time = 0 + for request_group in all_scheduled_requests_by_rg_id.values(): + for request in request_group.values(): + if request.scheduled: + total_scheduled_time += request.duration + + return total_scheduled_time + def record_metrics(normal_scheduled_requests_by_rg_id, rr_scheduled_requests_by_rg_id): # Derive whatever metrics we want using the supplied scheduled requests and send them to opensearch here metrics = { 'simulation_id': RUN_ID, - 'metric1': 'value' + 'total_scheduled_time': total_scheduled_time(normal_scheduled_requests_by_rg_id, + rr_scheduled_requests_by_rg_id), } send_to_opensearch(metrics) From 8606a1bcba6ecf136853071af103a6f99cebc0cf Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Thu, 30 Jun 2022 00:12:59 +0000 Subject: [PATCH 003/165] added total scheduled count metric --- adaptive_scheduler/simulation/orchestrator.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/adaptive_scheduler/simulation/orchestrator.py b/adaptive_scheduler/simulation/orchestrator.py index 8f609154..a7800631 100644 --- a/adaptive_scheduler/simulation/orchestrator.py +++ b/adaptive_scheduler/simulation/orchestrator.py @@ -76,15 +76,30 @@ def send_to_opensearch(metrics): def total_scheduled_time(normal_scheduled_requests_by_rg_id, rr_scheduled_requests_by_rg_id): # Sums the duration of all scheduled requests + # note, not sure if this is gonna be a timedelta or float object all_scheduled_requests_by_rg_id = normal_scheduled_requests_by_rg_id.update(rr_scheduled_requests_by_rg_id) total_scheduled_time = 0 for request_group in all_scheduled_requests_by_rg_id.values(): for request in request_group.values(): if request.scheduled: - total_scheduled_time += request.duration + total_time += request.duration return total_scheduled_time +def total_scheduled_count(normal_scheduled_requests_by_rg_id, rr_scheduled_requests_by_rg_id): + # Totals the number of requests that ended up scheduled to get percentage of requests scheduled + # we probably need to get the total number of input requests to calculate percent util, not sure how yet + # if we know that they are all guaranteed to be scheduled in here we can just sum() the lengths with + # list comprehension across the dict values + all_scheduled_requests_by_rg_id = normal_scheduled_requests_by_rg_id.update(rr_scheduled_requests_by_rg_id) + total_scheduled_count = 0 + for request_group in all_scheduled_requests_by_rg_id.values(): + for request in request_group.values(): + if request.scheduled: + total_scheduled_count += 1 + + return total_scheduled_count + def record_metrics(normal_scheduled_requests_by_rg_id, rr_scheduled_requests_by_rg_id): # Derive whatever metrics we want using the supplied scheduled requests and send them to opensearch here @@ -92,6 +107,8 @@ def record_metrics(normal_scheduled_requests_by_rg_id, rr_scheduled_requests_by_ 'simulation_id': RUN_ID, 'total_scheduled_time': total_scheduled_time(normal_scheduled_requests_by_rg_id, rr_scheduled_requests_by_rg_id), + 'total_scheduled_count': total_scheduled_count(normal_scheduled_requests_by_rg_id, + rr_scheduled_requests_by_rg_id), } send_to_opensearch(metrics) From 053ba64411f9bab3224a97d5ce383904606a0a0d Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Thu, 30 Jun 2022 19:12:04 +0000 Subject: [PATCH 004/165] move metric functions to a separate file --- adaptive_scheduler/simulation/metrics.py | 48 +++++++++++++++++++ adaptive_scheduler/simulation/orchestrator.py | 27 +---------- 2 files changed, 49 insertions(+), 26 deletions(-) create mode 100644 adaptive_scheduler/simulation/metrics.py diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py new file mode 100644 index 00000000..d2809b5d --- /dev/null +++ b/adaptive_scheduler/simulation/metrics.py @@ -0,0 +1,48 @@ +""" +Metric calculation functions for the scheduler simulator. +""" + + +def combine_normal_and_rr_requests_by_rg_id(normal_scheduled_requests_by_rg_id, + rr_scheduled_requests_by_rg_id): + # this assumes that a request unique to either normal or rr and cannot be in both + # otherwise, write a check that excludes duplicates + return normal_scheduled_requests_by_rg_id.update(rr_scheduled_requests_by_rg_id) + + +def total_scheduled_time(combined_scheduled_requests_by_rg_id): + # Sums the duration of all scheduled requests + # note, not sure if this is gonna be a timedelta or float object + total_scheduled_time = 0 + for request_group in combined_scheduled_requests_by_rg_id.values(): + for request in request_group.values(): + if request.scheduled: + total_time += request.duration + + return total_scheduled_time + + +def total_scheduled_count(combined_scheduled_requests_by_rg_id): + total_scheduled_count = 0 + for request_group in combined_scheduled_requests_by_rg_id.values(): + for request in request_group.values(): + if request.scheduled: + total_scheduled_count += 1 + + return total_scheduled_count + + +def total_unscheduled_count(combined_scheduled_requests_by_rg_id): + total_unscheduled_count = 0 + for request_group in combined_scheduled_requests_by_rg_id.values(): + for request in request_group.values(): + if request.scheduled: + total_unscheduled_count += 1 + + return total_scheduled_count + + +def percent_of_requests_scheduled(combined_scheduled_requests_by_rg_id): + scheduled = total_scheduled_count(combined_scheduled_requests_by_rg_id) + unscheduled = total_unscheduled_count(combined_scheduled_requests_by_rg_id) + return scheduled/(scheduled + unscheduled) * 100 diff --git a/adaptive_scheduler/simulation/orchestrator.py b/adaptive_scheduler/simulation/orchestrator.py index a7800631..84821e7a 100644 --- a/adaptive_scheduler/simulation/orchestrator.py +++ b/adaptive_scheduler/simulation/orchestrator.py @@ -27,6 +27,7 @@ from adaptive_scheduler.scheduler_input import ( SchedulingInputFactory, SchedulingInputProvider, SchedulerParameters ) +from adaptive_scheduler.simulation.metrics import * log = logging.getLogger('adaptive_scheduler') @@ -73,32 +74,6 @@ def send_to_opensearch(metrics): # Send the json metrics to the opensearch index pass - -def total_scheduled_time(normal_scheduled_requests_by_rg_id, rr_scheduled_requests_by_rg_id): - # Sums the duration of all scheduled requests - # note, not sure if this is gonna be a timedelta or float object - all_scheduled_requests_by_rg_id = normal_scheduled_requests_by_rg_id.update(rr_scheduled_requests_by_rg_id) - total_scheduled_time = 0 - for request_group in all_scheduled_requests_by_rg_id.values(): - for request in request_group.values(): - if request.scheduled: - total_time += request.duration - - return total_scheduled_time - -def total_scheduled_count(normal_scheduled_requests_by_rg_id, rr_scheduled_requests_by_rg_id): - # Totals the number of requests that ended up scheduled to get percentage of requests scheduled - # we probably need to get the total number of input requests to calculate percent util, not sure how yet - # if we know that they are all guaranteed to be scheduled in here we can just sum() the lengths with - # list comprehension across the dict values - all_scheduled_requests_by_rg_id = normal_scheduled_requests_by_rg_id.update(rr_scheduled_requests_by_rg_id) - total_scheduled_count = 0 - for request_group in all_scheduled_requests_by_rg_id.values(): - for request in request_group.values(): - if request.scheduled: - total_scheduled_count += 1 - - return total_scheduled_count def record_metrics(normal_scheduled_requests_by_rg_id, rr_scheduled_requests_by_rg_id): # Derive whatever metrics we want using the supplied scheduled requests and send them to opensearch here From 6d42329862520f8e1961d03d4852d0b23ff07ab5 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Thu, 30 Jun 2022 21:50:45 +0000 Subject: [PATCH 005/165] added functions for getting the data we want, implemented binning by priority --- adaptive_scheduler/simulation/metrics.py | 64 ++++++++++++++++++++++-- 1 file changed, 61 insertions(+), 3 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index d2809b5d..8a253732 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -2,6 +2,8 @@ Metric calculation functions for the scheduler simulator. """ +from adaptive_scheduler.models import DataContainer + def combine_normal_and_rr_requests_by_rg_id(normal_scheduled_requests_by_rg_id, rr_scheduled_requests_by_rg_id): @@ -13,12 +15,12 @@ def combine_normal_and_rr_requests_by_rg_id(normal_scheduled_requests_by_rg_id, def total_scheduled_time(combined_scheduled_requests_by_rg_id): # Sums the duration of all scheduled requests # note, not sure if this is gonna be a timedelta or float object + # looking at the existing code, it seems to be an integer for the duration in seconds total_scheduled_time = 0 for request_group in combined_scheduled_requests_by_rg_id.values(): for request in request_group.values(): if request.scheduled: total_time += request.duration - return total_scheduled_time @@ -28,7 +30,6 @@ def total_scheduled_count(combined_scheduled_requests_by_rg_id): for request in request_group.values(): if request.scheduled: total_scheduled_count += 1 - return total_scheduled_count @@ -38,7 +39,6 @@ def total_unscheduled_count(combined_scheduled_requests_by_rg_id): for request in request_group.values(): if request.scheduled: total_unscheduled_count += 1 - return total_scheduled_count @@ -46,3 +46,61 @@ def percent_of_requests_scheduled(combined_scheduled_requests_by_rg_id): scheduled = total_scheduled_count(combined_scheduled_requests_by_rg_id) unscheduled = total_unscheduled_count(combined_scheduled_requests_by_rg_id) return scheduled/(scheduled + unscheduled) * 100 + + +def request_group_data_populator(reservation): + # assumes the proposal/requestgroup is in the format from the observation portal API + request_group = reservation.request_group + proposal = request_group.proposal + requests = request_group.requests + # it may be helpful to directly set max_airmass as an attribute of a request itself + max_airmass_by_request_id = {} + for request in requests: + request_id = request.id + # assumes the airmass is the same for all configurations in a request + # again this assumes that configurations is a list of dicts matching the API + # if not we can maybe aggregate with min/max or avg + configuration = request.configurations[0] + max_airmass = configuration['constraints']['max_airmass'] + max_airmass_by_request_id[request_id] = max_airmass + + data = DataContainer( + request_group_id=reservation.request_group.id, + duration=reservation.duration, + scheduled_resource=reservation.scheduled_resource, + scheduled=reservation.scheduled, + scheduled_start=reservation.scheduled_start, + ipp_value=reservation.request_group.ipp_value, + tac_priority=proposal.tac_priority, + requests=reservation.request_group.requests, + max_airmass_by_request=max_airmass_by_request_id, + ) + return data + +# is this function name too long? or is the specificity necessary? +def populate_binned_data_dict_with_rg_data(data_dict, key, reservation): + request_group_id = reservation.request_group.id + if not key in data_dict: + data_dict[key] = {} + request_group_data = request_group_data_populator(reservation) + data_dict[key][request_group_id] = request_group_data + + +def bin_scheduler_result_by_effective_priority(scheduler_result): + # this is somewhat structured differently to normal_scheduled_requests_by_rg_id + # but we can change it to make it consistent if necessary + scheduled_requests_by_priority = {} + for reservations in scheduler_result.values(): + for reservation in reservations: + priority = str(reservation.priority) + populate_binned_data_dict_with_rg_data(scheduled_requests_by_priority, + priority, + reservation) + return scheduled_requests_by_priority + + +def bin_scheduler_result_by_airmass(scheduler_result): + # TODO + # the airmasses are in a list which is kind of annoying + scheduled_requests_by_airmass = {} + From 7ca823e05797f3a57dc0af67ba27d882dfc4f699 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Thu, 30 Jun 2022 21:51:09 +0000 Subject: [PATCH 006/165] small comments and modifications to record_metrics function --- adaptive_scheduler/simulation/orchestrator.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/adaptive_scheduler/simulation/orchestrator.py b/adaptive_scheduler/simulation/orchestrator.py index 84821e7a..d72ce35a 100644 --- a/adaptive_scheduler/simulation/orchestrator.py +++ b/adaptive_scheduler/simulation/orchestrator.py @@ -78,12 +78,17 @@ def send_to_opensearch(metrics): def record_metrics(normal_scheduled_requests_by_rg_id, rr_scheduled_requests_by_rg_id): # Derive whatever metrics we want using the supplied scheduled requests and send them to opensearch here + # maybe we should just pass in the scheduler result instead and get the normal and rr requests somewhere else + + # For aggregating across all requests, but not sure if this is the best method + combined_scheduled_requests_by_rg_id = combine_normal_and_rr_requests_by_rg_id( + normal_scheduled_requests_by_rg_id, rr_scheduled_requests_by_rg_id) + metrics = { 'simulation_id': RUN_ID, - 'total_scheduled_time': total_scheduled_time(normal_scheduled_requests_by_rg_id, - rr_scheduled_requests_by_rg_id), - 'total_scheduled_count': total_scheduled_count(normal_scheduled_requests_by_rg_id, - rr_scheduled_requests_by_rg_id), + 'total_scheduled_time': total_scheduled_time(combined_scheduled_requests_by_rg_id) + 'total_scheduled_count': total_scheduled_count(combined_scheduled_requests_by_rg_id) + 'percent_scheduled': percent_of_requests_scheduled(combined_scheduled_requests_by_rg_id) } send_to_opensearch(metrics) From 98c5e14a97f12c01185e08681b85d6282f25bf88 Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Thu, 30 Jun 2022 15:12:24 -0700 Subject: [PATCH 007/165] available_time --- adaptive_scheduler/simulation/metrics.py | 17 +++++++++++++++++ adaptive_scheduler/simulation/orchestrator.py | 9 +++++++-- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index d2809b5d..93597aae 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -1,3 +1,4 @@ +from adaptive_scheduler.utils import time_in_capped_intervals """ Metric calculation functions for the scheduler simulator. """ @@ -32,6 +33,20 @@ def total_scheduled_count(combined_scheduled_requests_by_rg_id): return total_scheduled_count +def total_available_time(scheduler, combined_scheduled_requests_by_rg_id): + total_available_time = 0 + resources_scheduled = combined_scheduled_requests_by_rg_id.keys() + for resource in resources_scheduled: + available_time = 0 + if resource in scheduler.visibility_casche: + dark_intervals = scheduler.visibility_cache[resource] + available_seconds = time_in_capped_intervals(dark_intervals, estimated_scheduler_end, + scheduler.scheduling_horizon( + estimated_scheduler_end)) + + + + def total_unscheduled_count(combined_scheduled_requests_by_rg_id): total_unscheduled_count = 0 for request_group in combined_scheduled_requests_by_rg_id.values(): @@ -46,3 +61,5 @@ def percent_of_requests_scheduled(combined_scheduled_requests_by_rg_id): scheduled = total_scheduled_count(combined_scheduled_requests_by_rg_id) unscheduled = total_unscheduled_count(combined_scheduled_requests_by_rg_id) return scheduled/(scheduled + unscheduled) * 100 + + diff --git a/adaptive_scheduler/simulation/orchestrator.py b/adaptive_scheduler/simulation/orchestrator.py index 84821e7a..700c6493 100644 --- a/adaptive_scheduler/simulation/orchestrator.py +++ b/adaptive_scheduler/simulation/orchestrator.py @@ -75,7 +75,7 @@ def send_to_opensearch(metrics): pass -def record_metrics(normal_scheduled_requests_by_rg_id, rr_scheduled_requests_by_rg_id): +def record_metrics(normal_scheduled_requests_by_rg_id, rr_scheduled_requests_by_rg_id, scheduler_runner_scheduler, scheduler_runner_current_time): # Derive whatever metrics we want using the supplied scheduled requests and send them to opensearch here metrics = { @@ -84,6 +84,9 @@ def record_metrics(normal_scheduled_requests_by_rg_id, rr_scheduled_requests_by_ rr_scheduled_requests_by_rg_id), 'total_scheduled_count': total_scheduled_count(normal_scheduled_requests_by_rg_id, rr_scheduled_requests_by_rg_id), + 'total_available_time' : total_available_time(normal_scheduled_requests_by_rg_id, rr_scheduled_requests_by_rg_id, + scheduler_runner_scheduler, scheduler_runner_current_time), + } send_to_opensearch(metrics) @@ -131,7 +134,9 @@ def main(argv=None): # These are used to seed a warm start solution for the next run in the normal scheduler, but can be used to generate metrics here rr_scheduled_requests_by_rg_id = scheduler_runner.rr_scheduled_requests_by_rg normal_scheduled_requests_by_rg_id = scheduler_runner.normal_scheduled_requests_by_rg - record_metrics(normal_scheduled_requests_by_rg_id, rr_scheduled_requests_by_rg_id) + scheduler_runner_scheduler = scheduler_runner.scheduler + scheduler_runner_current_time = scheduler_runner.sched_params.simulate_now + record_metrics(normal_scheduled_requests_by_rg_id, rr_scheduled_requests_by_rg_id, scheduler_runner_scheduler, scheduler_runner_current_time) current_time += timedelta(minutes=TIME_STEP) increment_input(current_time, TIME_STEP) From 0737f96c3941c0b72ea4451132d6a5a3c5d4b9d6 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Thu, 30 Jun 2022 22:18:25 +0000 Subject: [PATCH 008/165] time capping function for filtering metrics (WIP) --- adaptive_scheduler/simulation/metrics.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index 8a253732..ebc3afb6 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -58,8 +58,8 @@ def request_group_data_populator(reservation): for request in requests: request_id = request.id # assumes the airmass is the same for all configurations in a request - # again this assumes that configurations is a list of dicts matching the API # if not we can maybe aggregate with min/max or avg + # again this assumes that configurations is a list of dicts matching the API configuration = request.configurations[0] max_airmass = configuration['constraints']['max_airmass'] max_airmass_by_request_id[request_id] = max_airmass @@ -104,3 +104,13 @@ def bin_scheduler_result_by_airmass(scheduler_result): # the airmasses are in a list which is kind of annoying scheduled_requests_by_airmass = {} + +def cap_scheduler_results_by_effective_horizon(scheduler_result, horizon_length): + # need to confirm the time format for scheduled_start before doing anything + # but basically this function truncates the scheduler results to only include things + # scheduled within a certain period of time and modifies the schedule accordingly + for reservations in scheduler_result.values(): + for reservation in reservations: + if reservation.scheduled_start: # is after the horizon + reservations.remove(reservation) + return scheduler_result From 6d1505ca6074d42b8e2200d715bf380ea14ea64d Mon Sep 17 00:00:00 2001 From: Jon Date: Fri, 1 Jul 2022 17:18:36 +0000 Subject: [PATCH 009/165] Some changes for supporting running the scheduler simulator --- adaptive_scheduler/scheduler.py | 16 ++++++++----- adaptive_scheduler/simulation/orchestrator.py | 23 ++++++++++++------- pyproject.toml | 1 + 3 files changed, 26 insertions(+), 14 deletions(-) diff --git a/adaptive_scheduler/scheduler.py b/adaptive_scheduler/scheduler.py index e270f913..7c703daf 100644 --- a/adaptive_scheduler/scheduler.py +++ b/adaptive_scheduler/scheduler.py @@ -626,11 +626,11 @@ def __init__(self, sched_params, scheduler, network_interface, network_model, in self.sched_params = sched_params self.warm_starts_setting = sched_params.warm_starts self.scheduler = scheduler + self.normal_scheduler_result = None + self.rr_scheduler_result = None self.network_interface = network_interface self.network_model = network_model self.input_factory = input_factory - self.normal_scheduled_requests_by_rg = {} - self.rr_scheduled_requests_by_rg = {} self.log = logging.getLogger(__name__) # List of strings to be printed in final scheduling summary self.summary_events = [] @@ -828,6 +828,10 @@ def _can_apply_scheduler_result(self, scheduler_result, apply_deadline): self.log.warn("Empty scheduler result. Schedule will not be saved.") return False + if self.sched_params.simulate_now: + # Don't care about deadlines if you are simulating a time in the past + return True + estimated_apply_timedelta = self.avg_save_time_per_reservation_timedelta * scheduler_result.count_reservations() estimated_apply_completion = datetime.utcnow() + estimated_apply_timedelta self.log.info( @@ -973,7 +977,7 @@ def create_rr_schedule(self, scheduler_input): rr_scheduler_result = self.call_scheduler(scheduler_input, deadline) try: - self.rr_scheduled_requests_by_rg = rr_scheduler_result.get_scheduled_requests_by_request_group_id() + self.rr_scheduler_result = rr_scheduler_result self.apply_rr_result(rr_scheduler_result, scheduler_input, deadline) rr_scheduling_end = datetime.utcnow() rr_scheduling_timedelta = rr_scheduling_end - rr_scheduling_start @@ -1019,7 +1023,7 @@ def create_normal_schedule(self, scheduler_input): resources_to_clear = list(self.network_model.keys()) try: before_apply = datetime.utcnow() - self.normal_scheduled_requests_by_rg = scheduler_result.get_scheduled_requests_by_request_group_id() + self.normal_scheduler_result = scheduler_result n_submitted = self.apply_normal_result(scheduler_result, scheduler_input, resources_to_clear, deadline) @@ -1068,14 +1072,14 @@ def scheduling_cycle(self, schedule_type, network_state_timestamp, rr_schedule_r if schedule_type == NORMAL_OBSERVATION_TYPE: scheduler_input = self.input_factory.create_normal_scheduling_input( self.estimated_normal_run_timedelta.total_seconds(), - scheduled_requests_by_rg=self.normal_scheduled_requests_by_rg, + scheduled_requests_by_rg=self.normal_scheduler_result.get_scheduled_requests_by_request_group_id() if self.normal_scheduler_result else {}, rr_schedule=rr_schedule_result.schedule, network_state_timestamp=network_state_timestamp) result = self.create_normal_schedule(scheduler_input) elif schedule_type == RR_OBSERVATION_TYPE: scheduler_input = self.input_factory.create_rr_scheduling_input( self.estimated_rr_run_timedelta.total_seconds(), - scheduled_requests_by_rg=self.rr_scheduled_requests_by_rg, + scheduled_requests_by_rg=self.rr_scheduler_result.get_scheduled_requests_by_request_group_id() if self.rr_scheduler_result else {}, network_state_timestamp=network_state_timestamp) result = self.create_rr_schedule(scheduler_input) return result diff --git a/adaptive_scheduler/simulation/orchestrator.py b/adaptive_scheduler/simulation/orchestrator.py index 39b04727..282587c4 100644 --- a/adaptive_scheduler/simulation/orchestrator.py +++ b/adaptive_scheduler/simulation/orchestrator.py @@ -34,7 +34,7 @@ RUN_ID = os.getenv("SIMULATION_RUN_ID", "1") START_TIME = parse(os.getenv("SIMULATION_START_TIME", "2022-06-23")) END_TIME = parse(os.getenv("SIMULATION_END_TIME", "2022-07-07")) -TIME_STEP = os.getenv("SIMULATION_TIME_STEP_MINUTES", "60") +TIME_STEP = float(os.getenv("SIMULATION_TIME_STEP_MINUTES", "60")) def setup_logging(): @@ -57,6 +57,7 @@ def setup_input(current_time): # backwards until the time is reached. For the observation portal, it involves pulling over all requests # created and PENDING at a certain point in time for the semester, which should be doable by looking at the created # and modified timestamps and state. + log.info(f"Placeholder for setting up input for time {current_time.isoformat}") pass def increment_input(current_time, time_step): @@ -67,6 +68,7 @@ def increment_input(current_time, time_step): # down newer requests as well as cleaning up the state of old ones between time steps (completing/expiring as appropriate). # This also means that we should complete and fail the right percentages of observations that should have ended within the last # time_step, and set ones that are in progress to ATTEMPTED state. + log.info(f"Placeholder for incrementing input by {time_step} to time {current_time.isoformat}") pass def send_to_opensearch(metrics): @@ -74,9 +76,9 @@ def send_to_opensearch(metrics): pass -def record_metrics(normal_scheduled_requests_by_rg_id, rr_scheduled_requests_by_rg_id): +def record_metrics(sched_params, normal_scheduler_result, rr_scheduler_result): # Derive whatever metrics we want using the supplied scheduled requests and send them to opensearch here - + log.info("Recording metrics for scheduler simulation run") metrics = { 'simulation_id': RUN_ID, 'metric1': 'value' @@ -91,7 +93,7 @@ def main(argv=None): # Set up and configure an application scope logger setup_logging() - log.info(f"Starting Scheduler Simulator with id {RUN_ID}") + log.info(f"Starting Scheduler Simulator with id {RUN_ID} and time range {START_TIME.isoformat()} to {END_TIME.isoformat()}") # All this setup is the same as the normal scheduling run - things will be setup based on the # scheduler environment variables set. @@ -117,7 +119,8 @@ def main(argv=None): # Setup the input from configdb and observation portal using the current time setup_input(current_time) while current_time <= END_TIME: - sched_params.simulate_now = current_time.isoformat() + log.info(f"Simulating with current time {current_time.isoformat()}") + sched_params.simulate_now = f"{current_time.isoformat()}Z" # Scheduler run is invoked in the normal way, but it will just run a single time scheduler_runner = SchedulerRunner(sched_params, scheduler, network_interface, network_model, input_factory) @@ -125,13 +128,17 @@ def main(argv=None): # Output scheduled requests are available within the runner after it completes a run # These are used to seed a warm start solution for the next run in the normal scheduler, but can be used to generate metrics here - rr_scheduled_requests_by_rg_id = scheduler_runner.rr_scheduled_requests_by_rg - normal_scheduled_requests_by_rg_id = scheduler_runner.normal_scheduled_requests_by_rg - record_metrics(normal_scheduled_requests_by_rg_id, rr_scheduled_requests_by_rg_id) + record_metrics( + sched_params, + scheduler_runner.normal_scheduler_result, + scheduler_runner.rr_scheduler_result + ) current_time += timedelta(minutes=TIME_STEP) increment_input(current_time, TIME_STEP) + log.info(f"Finished running simulation {RUN_ID}, exiting") + if __name__ == '__main__': main(sys.argv[1:]) diff --git a/pyproject.toml b/pyproject.toml index ce3e359c..5749864a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,3 +44,4 @@ build-backend = "poetry.core.masonry.api" [tool.poetry.scripts] adaptive-scheduler = 'adaptive_scheduler.cli:main' +simulation-orchestrator = 'adaptive_scheduler.simulation.orchestrator:main' From 3f372dafe7f3369946b4a69405b711dd95059305 Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Fri, 1 Jul 2022 10:43:43 -0700 Subject: [PATCH 010/165] draft airmass metrics --- adaptive_scheduler/simulation/metrics.py | 38 ++++++++++++++++++++---- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index a03a5cb1..7ffb310c 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -1,11 +1,14 @@ """ Metric calculation functions for the scheduler simulator. """ - +import numpy as np +import datetime as dt +from datetime import datetime from adaptive_scheduler.utils import time_in_capped_intervals from adaptive_scheduler.models import DataContainer + def combine_normal_and_rr_requests_by_rg_id(normal_scheduled_requests_by_rg_id, rr_scheduled_requests_by_rg_id): # this assumes that a request unique to either normal or rr and cannot be in both @@ -35,7 +38,7 @@ def total_scheduled_count(combined_scheduled_requests_by_rg_id): def total_available_time(combined_scheduled_requests_by_rg_id, - scheduler_runner_scheduler, scheduler_runner_current_time): + scheduler_runner_scheduler, scheduler_runner_current_time, horizon_days): total_available_time = 0 resources_scheduled = combined_scheduled_requests_by_rg_id.keys() for resource in resources_scheduled: @@ -43,11 +46,10 @@ def total_available_time(combined_scheduled_requests_by_rg_id, if resource in scheduler_runner_scheduler.visibility_casche: dark_intervals = scheduler_runner_scheduler.visibility_cache[resource] available_time = time_in_capped_intervals(dark_intervals, scheduler_runner_current_time, - scheduler_runner_scheduler.scheduling_horizon(scheduler_runner_current_time)) + scheduler_runner_current_time + dt.timedelta(days=horizon_days)) total_available_time += available_time return total_available_time - - + def total_unscheduled_count(combined_scheduled_requests_by_rg_id): total_unscheduled_count = 0 @@ -131,3 +133,29 @@ def cap_scheduler_results_by_effective_horizon(scheduler_result, horizon_length) if reservation.scheduled_start: # is after the horizon reservations.remove(reservation) return scheduler_result + + +def calculate_best_airmass_vs_scheduled(scheduler_result): + """Calculate the percent difference between the best possible airmass vs the average airmass + for each scheduled reservation. + """ + best_airmass_vs_scheduled = [] + best_case = 1 + for reservation in scheduler_result.values(): + airmasses = np.mean(request_group_data_populator(reservation)["airmasses"]) + best_airmass_vs_scheduled.append((best_case - airmasses)/best_case *100) + + return best_airmass_vs_scheduled + + +def calculate_max_contraints_vs_scheduled(scheduler_result): + """Calculate the percent difference between the airmass max constraints vs the average airmass + for each scheduled reservation. + """ + airmass_constraints_vs_scheduled = [] + best_case = 1 + for reservation in scheduler_result.values(): + airmasses = np.mean(request_group_data_populator(reservation)["max_airmass_by_request"]) + airmass_constraints_vs_scheduled.append((best_case - airmasses)/best_case *100) + + return airmass_constraints_vs_scheduled \ No newline at end of file From 89ad3cfe400dad526fc92e1dab7748cbf0c55cd5 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Tue, 5 Jul 2022 21:44:04 +0000 Subject: [PATCH 011/165] fixed bugs in metrics to get a runnable program --- adaptive_scheduler/simulation/metrics.py | 64 ++++++++++++------- adaptive_scheduler/simulation/orchestrator.py | 30 ++++++--- 2 files changed, 62 insertions(+), 32 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index 7ffb310c..291314ea 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -1,19 +1,21 @@ """ Metric calculation functions for the scheduler simulator. """ +import logging import numpy as np import datetime as dt from datetime import datetime from adaptive_scheduler.utils import time_in_capped_intervals from adaptive_scheduler.models import DataContainer +log = logging.getLogger('adaptive_scheduler') def combine_normal_and_rr_requests_by_rg_id(normal_scheduled_requests_by_rg_id, rr_scheduled_requests_by_rg_id): # this assumes that a request unique to either normal or rr and cannot be in both # otherwise, write a check that excludes duplicates - return normal_scheduled_requests_by_rg_id.update(rr_scheduled_requests_by_rg_id) + return normal_scheduled_requests_by_rg_id | rr_scheduled_requests_by_rg_id def total_scheduled_time(combined_scheduled_requests_by_rg_id): @@ -24,46 +26,47 @@ def total_scheduled_time(combined_scheduled_requests_by_rg_id): for request_group in combined_scheduled_requests_by_rg_id.values(): for request in request_group.values(): if request.scheduled: - total_time += request.duration + total_scheduled_time += request.duration return total_scheduled_time def total_scheduled_count(combined_scheduled_requests_by_rg_id): - total_scheduled_count = 0 + counter = 0 for request_group in combined_scheduled_requests_by_rg_id.values(): for request in request_group.values(): if request.scheduled: - total_scheduled_count += 1 - return total_scheduled_count + counter += 1 + return counter -def total_available_time(combined_scheduled_requests_by_rg_id, - scheduler_runner_scheduler, scheduler_runner_current_time, horizon_days): +def total_available_time(normal_scheduler_result, rr_scheduler_result, scheduler, horizon_days): total_available_time = 0 - resources_scheduled = combined_scheduled_requests_by_rg_id.keys() - for resource in resources_scheduled: - available_time = 0 - if resource in scheduler_runner_scheduler.visibility_casche: - dark_intervals = scheduler_runner_scheduler.visibility_cache[resource] - available_time = time_in_capped_intervals(dark_intervals, scheduler_runner_current_time, - scheduler_runner_current_time + dt.timedelta(days=horizon_days)) + normal_resources = normal_scheduler_result.resources_scheduled() + rr_resources = rr_scheduler_result.resources_scheduled() + scheduled_resources = list(set(normal_resources + rr_resources)) + start_time = scheduler.estimated_scheduler_end + end_time = start_time + dt.timedelta(days=horizon_days) + for resource in scheduled_resources: + if resource in scheduler.visibility_cache: + dark_intervals = scheduler.visibility_cache[resource].dark_intervals + available_time = time_in_capped_intervals(dark_intervals, start_time, end_time) total_available_time += available_time return total_available_time def total_unscheduled_count(combined_scheduled_requests_by_rg_id): - total_unscheduled_count = 0 + counter = 0 for request_group in combined_scheduled_requests_by_rg_id.values(): for request in request_group.values(): if request.scheduled: - total_unscheduled_count += 1 - return total_scheduled_count + counter += 1 + return counter def percent_of_requests_scheduled(combined_scheduled_requests_by_rg_id): - scheduled = total_scheduled_count(combined_scheduled_requests_by_rg_id) - unscheduled = total_unscheduled_count(combined_scheduled_requests_by_rg_id) - return scheduled/(scheduled + unscheduled) * 100 + scheduled_count = total_scheduled_count(combined_scheduled_requests_by_rg_id) + unscheduled_count = total_unscheduled_count(combined_scheduled_requests_by_rg_id) + return scheduled_count/(scheduled_count + unscheduled_count) * 100 @@ -80,7 +83,7 @@ def request_group_data_populator(reservation): # if not we can maybe aggregate with min/max or avg # again this assumes that configurations is a list of dicts matching the API configuration = request.configurations[0] - max_airmass = configuration['constraints']['max_airmass'] + max_airmass = configuration.constraints['max_airmass'] max_airmass_by_request_id[request_id] = max_airmass data = DataContainer( @@ -105,11 +108,11 @@ def populate_binned_data_dict_with_rg_data(data_dict, key, reservation): data_dict[key][request_group_id] = request_group_data -def bin_scheduler_result_by_effective_priority(scheduler_result): +def bin_scheduler_result_by_effective_priority(schedule): # this is somewhat structured differently to normal_scheduled_requests_by_rg_id # but we can change it to make it consistent if necessary scheduled_requests_by_priority = {} - for reservations in scheduler_result.values(): + for reservations in schedule.values(): for reservation in reservations: priority = str(reservation.priority) populate_binned_data_dict_with_rg_data(scheduled_requests_by_priority, @@ -118,6 +121,19 @@ def bin_scheduler_result_by_effective_priority(scheduler_result): return scheduled_requests_by_priority +def bin_scheduler_result_by_tac_priority(schedule): + scheduled_requests_by_tac_priority = {} + for reservations in schedule.values(): + for reservation in reservations: + proposal = reservation.request_group.proposal + tac_priority = str(proposal.tac_priority) + populate_binned_data_dict_with_rg_data(scheduled_requests_by_tac_priority, + tac_priority, + reservation) + return scheduled_requests_by_tac_priority + + + def bin_scheduler_result_by_airmass(scheduler_result): # TODO # the airmasses are in a list which is kind of annoying @@ -158,4 +174,4 @@ def calculate_max_contraints_vs_scheduled(scheduler_result): airmasses = np.mean(request_group_data_populator(reservation)["max_airmass_by_request"]) airmass_constraints_vs_scheduled.append((best_case - airmasses)/best_case *100) - return airmass_constraints_vs_scheduled \ No newline at end of file + return airmass_constraints_vs_scheduled diff --git a/adaptive_scheduler/simulation/orchestrator.py b/adaptive_scheduler/simulation/orchestrator.py index 1ed89dd3..041fdb2d 100644 --- a/adaptive_scheduler/simulation/orchestrator.py +++ b/adaptive_scheduler/simulation/orchestrator.py @@ -74,17 +74,25 @@ def increment_input(current_time, time_step): def send_to_opensearch(metrics): # Send the json metrics to the opensearch index + log.info(metrics) # send to output for now pass -def record_metrics(sched_params, normal_scheduler_result, rr_scheduler_result): - # Derive whatever metrics we want using the supplied scheduled requests and send them to opensearch here +def combine_schedules(normal_schedule, rr_schedule): + # For aggregating across all scheduled items + combined_schedule = normal_schedule.copy() + for resource, reservations in rr_schedule.items(): + for reservation in reservations: + combined_schedule[resource].append(reservation) + return combined_schedule + + +def record_metrics(sched_params, normal_scheduler_result, rr_scheduler_result, scheduler): log.info("Recording metrics for scheduler simulation run") - -def record_metrics(sched_params, normal_scheduler_result, rr_scheduler_result, scheduler_runner_scheduler): + normal_scheduled_requests_by_rg_id = normal_scheduler_result.get_scheduled_requests_by_request_group_id() rr_scheduled_requests_by_rg_id = rr_scheduler_result.get_scheduled_requests_by_request_group_id() - + # Derive whatever metrics we want using the supplied scheduled requests and send them to opensearch here # maybe we should just pass in the scheduler result instead and get the normal and rr requests somewhere else @@ -92,14 +100,18 @@ def record_metrics(sched_params, normal_scheduler_result, rr_scheduler_result, s # For aggregating across all requests, but not sure if this is the best method combined_scheduled_requests_by_rg_id = combine_normal_and_rr_requests_by_rg_id( normal_scheduled_requests_by_rg_id, rr_scheduled_requests_by_rg_id) - + + combined_schedule = combine_schedules(normal_scheduler_result.schedule, rr_scheduler_result.schedule) + metrics = { 'simulation_id': RUN_ID, 'total_scheduled_time': total_scheduled_time(combined_scheduled_requests_by_rg_id), 'total_scheduled_count': total_scheduled_count(combined_scheduled_requests_by_rg_id), 'percent_scheduled': percent_of_requests_scheduled(combined_scheduled_requests_by_rg_id), - 'total_available_time' : total_available_time(normal_scheduled_requests_by_rg_id, rr_scheduled_requests_by_rg_id, - scheduler_runner_scheduler, sched_params.simulate_now), + 'total_available_time' : total_available_time(normal_scheduler_result, rr_scheduler_result, + scheduler, sched_params.metric_effective_horizon), + 'effective_priority_bins': bin_scheduler_result_by_effective_priority(combined_schedule), + 'tac_priority_bins': bin_scheduler_result_by_tac_priority(combined_schedule), } send_to_opensearch(metrics) @@ -146,6 +158,8 @@ def main(argv=None): # Output scheduled requests are available within the runner after it completes a run # These are used to seed a warm start solution for the next run in the normal scheduler, but can be used to generate metrics here + sched_params.metric_effective_horizon = 5 # days + record_metrics( sched_params, scheduler_runner.normal_scheduler_result, From 25d4c9666c8fd158b1a68a4770953360c444a4fd Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Tue, 5 Jul 2022 23:09:30 +0000 Subject: [PATCH 012/165] added documentation --- adaptive_scheduler/simulation/metrics.py | 160 +++++++++++++---------- 1 file changed, 94 insertions(+), 66 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index 291314ea..fcfd82fd 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -13,33 +13,80 @@ def combine_normal_and_rr_requests_by_rg_id(normal_scheduled_requests_by_rg_id, rr_scheduled_requests_by_rg_id): - # this assumes that a request unique to either normal or rr and cannot be in both - # otherwise, write a check that excludes duplicates + """Combines normal and scheduled request results for aggregation. + + Args: + normal_scheduled_requests_by_rg_id (dict): This is the output of + SchedulerResult.get_scheduled_requests_by_request_group_id() + which is a dictionary formatted as follows: + {rg_id1: {request1: request1_data, request2: request2_data}, + rg_id2: ...} + rr_scheduled_requests_by_rg_id (dict): The same format of results but for + rapid response scheduler results. + + Returns: + combined_scheduled_requests_by_rg_id (dict): Merged dictionaries with duplicate + keys being excluded (OR). + """ return normal_scheduled_requests_by_rg_id | rr_scheduled_requests_by_rg_id -def total_scheduled_time(combined_scheduled_requests_by_rg_id): - # Sums the duration of all scheduled requests - # note, not sure if this is gonna be a timedelta or float object - # looking at the existing code, it seems to be an integer for the duration in seconds +def total_scheduled_time(scheduled_requests_by_rg_id): + """Aggregates the total scheduled time. + + Args: + scheduled_requests_by_rg_id (dict): SchedulerResult.get_scheduled_requests_by_request_group_id() format. + + Returns: + total_scheduled_time (int): The total scheduled time in seconds. + """ total_scheduled_time = 0 - for request_group in combined_scheduled_requests_by_rg_id.values(): + for request_group in scheduled_requests_by_rg_id.values(): for request in request_group.values(): if request.scheduled: total_scheduled_time += request.duration return total_scheduled_time -def total_scheduled_count(combined_scheduled_requests_by_rg_id): +def total_scheduled_count(scheduled_requests_by_rg_id): + """Counts the number of scheduled requests.""" counter = 0 - for request_group in combined_scheduled_requests_by_rg_id.values(): + for request_group in scheduled_requests_by_rg_id.values(): for request in request_group.values(): if request.scheduled: counter += 1 return counter + + +def total_unscheduled_count(scheduled_requests_by_rg_id): + """Counts the number of unscheduled requests.""" + counter = 0 + for request_group in scheduled_requests_by_rg_id.values(): + for request in request_group.values(): + if not request.scheduled: + counter += 1 + return counter + + +def percent_of_requests_scheduled(combined_scheduled_requests_by_rg_id): + """Simple percentage scheduled calculation.""" + scheduled_count = total_scheduled_count(combined_scheduled_requests_by_rg_id) + unscheduled_count = total_unscheduled_count(combined_scheduled_requests_by_rg_id) + return scheduled_count/(scheduled_count + unscheduled_count) * 100 def total_available_time(normal_scheduler_result, rr_scheduler_result, scheduler, horizon_days): + """Aggregates the total available time, calculated from dark intervals. + + Args: + normal_scheduler_result (SchedulerResult): The normal scheduler result. + rr_scheduler_result (SchedulerResult): The rapid response scheduler result. + scheduler (LCOGTNetworkScheduler): The scheduler object used by the scheduler runner. + horizon_days (float): The length of the horizon in days to calculate the metric. + + Returns: + total_available_time (float): The dark intervals capped by the horizon. + """ total_available_time = 0 normal_resources = normal_scheduler_result.resources_scheduled() rr_resources = rr_scheduler_result.resources_scheduled() @@ -52,39 +99,23 @@ def total_available_time(normal_scheduler_result, rr_scheduler_result, scheduler available_time = time_in_capped_intervals(dark_intervals, start_time, end_time) total_available_time += available_time return total_available_time - -def total_unscheduled_count(combined_scheduled_requests_by_rg_id): - counter = 0 - for request_group in combined_scheduled_requests_by_rg_id.values(): - for request in request_group.values(): - if request.scheduled: - counter += 1 - return counter - - -def percent_of_requests_scheduled(combined_scheduled_requests_by_rg_id): - scheduled_count = total_scheduled_count(combined_scheduled_requests_by_rg_id) - unscheduled_count = total_unscheduled_count(combined_scheduled_requests_by_rg_id) - return scheduled_count/(scheduled_count + unscheduled_count) * 100 +def reservation_data_populator(reservation): + """Creates a new data container containing parameters useful in calculating metrics. + Args: + reservation (Reservation_v3): A Reservation object (obtained from the values of Scheduler.schedule). -def request_group_data_populator(reservation): - # assumes the proposal/requestgroup is in the format from the observation portal API + Returns: + data (DataContainer): An object with data values of interest as attributes. + """ request_group = reservation.request_group proposal = request_group.proposal requests = request_group.requests # it may be helpful to directly set max_airmass as an attribute of a request itself - max_airmass_by_request_id = {} - for request in requests: - request_id = request.id - # assumes the airmass is the same for all configurations in a request - # if not we can maybe aggregate with min/max or avg - # again this assumes that configurations is a list of dicts matching the API - configuration = request.configurations[0] - max_airmass = configuration.constraints['max_airmass'] - max_airmass_by_request_id[request_id] = max_airmass + request_id_configurations = {request.id: request.configurations + for request in requests} data = DataContainer( request_group_id=reservation.request_group.id, @@ -95,30 +126,35 @@ def request_group_data_populator(reservation): ipp_value=reservation.request_group.ipp_value, tac_priority=proposal.tac_priority, requests=reservation.request_group.requests, - max_airmass_by_request=max_airmass_by_request_id, + configurations_by_request_id=request_id_configurations, ) return data -# is this function name too long? or is the specificity necessary? -def populate_binned_data_dict_with_rg_data(data_dict, key, reservation): - request_group_id = reservation.request_group.id - if not key in data_dict: - data_dict[key] = {} - request_group_data = request_group_data_populator(reservation) - data_dict[key][request_group_id] = request_group_data +def fill_bin_with_reservation_data(data_dict, bin_name, reservation): + """Populates bins in a dictionary with the reservation data container. The original + dictionary is modified, instead of creating and returning a copy. + + Args: + data_dict (dict): Binned data dictionary. Each bin contains a list of DataContainer's. + bin_name (str): The name of the bin to create or populate. + reservation (Reservation_v3): A Reservation object. + """ + if not bin_name in data_dict: + data_dict[bin_name] = [] + reservation_data = reservation_data_populator(reservation) + data_dict[bin_name].append(reservation_data) -def bin_scheduler_result_by_effective_priority(schedule): - # this is somewhat structured differently to normal_scheduled_requests_by_rg_id - # but we can change it to make it consistent if necessary - scheduled_requests_by_priority = {} + +def bin_scheduler_result_by_eff_priority(schedule): + scheduled_requests_by_eff_priority = {} for reservations in schedule.values(): for reservation in reservations: - priority = str(reservation.priority) - populate_binned_data_dict_with_rg_data(scheduled_requests_by_priority, - priority, - reservation) - return scheduled_requests_by_priority + eff_priority = str(reservation.priority) + fill_bin_with_reservation_data(scheduled_requests_by_eff_priority, + eff_priority, + reservation) + return scheduled_requests_by_eff_priority def bin_scheduler_result_by_tac_priority(schedule): @@ -127,28 +163,20 @@ def bin_scheduler_result_by_tac_priority(schedule): for reservation in reservations: proposal = reservation.request_group.proposal tac_priority = str(proposal.tac_priority) - populate_binned_data_dict_with_rg_data(scheduled_requests_by_tac_priority, - tac_priority, - reservation) + fill_bin_with_reservation_data(scheduled_requests_by_tac_priority, + tac_priority, + reservation) return scheduled_requests_by_tac_priority -def bin_scheduler_result_by_airmass(scheduler_result): +def bin_scheduler_result_by_airmass_constr(schedule): # TODO # the airmasses are in a list which is kind of annoying - scheduled_requests_by_airmass = {} - - -def cap_scheduler_results_by_effective_horizon(scheduler_result, horizon_length): - # need to confirm the time format for scheduled_start before doing anything - # but basically this function truncates the scheduler results to only include things - # scheduled within a certain period of time and modifies the schedule accordingly - for reservations in scheduler_result.values(): + scheduled_requests_by_airmass_constr = {} + for reservations in schedule.values(): for reservation in reservations: - if reservation.scheduled_start: # is after the horizon - reservations.remove(reservation) - return scheduler_result + pass def calculate_best_airmass_vs_scheduled(scheduler_result): From 9873cfdadfdc8bc7edac4a5602434187338bf0a2 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Tue, 5 Jul 2022 23:10:37 +0000 Subject: [PATCH 013/165] updated to reflect renamed function in metrics.py --- adaptive_scheduler/simulation/orchestrator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adaptive_scheduler/simulation/orchestrator.py b/adaptive_scheduler/simulation/orchestrator.py index 041fdb2d..3cb74f1a 100644 --- a/adaptive_scheduler/simulation/orchestrator.py +++ b/adaptive_scheduler/simulation/orchestrator.py @@ -110,7 +110,7 @@ def record_metrics(sched_params, normal_scheduler_result, rr_scheduler_result, s 'percent_scheduled': percent_of_requests_scheduled(combined_scheduled_requests_by_rg_id), 'total_available_time' : total_available_time(normal_scheduler_result, rr_scheduler_result, scheduler, sched_params.metric_effective_horizon), - 'effective_priority_bins': bin_scheduler_result_by_effective_priority(combined_schedule), + 'effective_priority_bins': bin_scheduler_result_by_eff_priority(combined_schedule), 'tac_priority_bins': bin_scheduler_result_by_tac_priority(combined_schedule), } send_to_opensearch(metrics) From e3a4882489e0a15e737e0c5265ed7a988df1f3ef Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Tue, 5 Jul 2022 16:14:41 -0700 Subject: [PATCH 014/165] michael --- adaptive_scheduler/simulation/metrics.py | 14 ++++++++++---- adaptive_scheduler/simulation/orchestrator.py | 1 + 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index fcfd82fd..334e88c9 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -179,26 +179,32 @@ def bin_scheduler_result_by_airmass_constr(schedule): pass -def calculate_best_airmass_vs_scheduled(scheduler_result): +def calculate_best_airmass_vs_scheduled(normal_scheduler_result, rr_scheduler_result): """Calculate the percent difference between the best possible airmass vs the average airmass for each scheduled reservation. """ + normal_resources = normal_scheduler_result.resources_scheduled() + rr_resources = rr_scheduler_result.resources_scheduled() + scheduled_resources = list(set(normal_resources + rr_resources)) best_airmass_vs_scheduled = [] best_case = 1 - for reservation in scheduler_result.values(): + for reservation in scheduled_resources.values(): airmasses = np.mean(request_group_data_populator(reservation)["airmasses"]) best_airmass_vs_scheduled.append((best_case - airmasses)/best_case *100) return best_airmass_vs_scheduled -def calculate_max_contraints_vs_scheduled(scheduler_result): +def calculate_max_contraints_vs_scheduled(normal_scheduler_result, rr_scheduler_result): """Calculate the percent difference between the airmass max constraints vs the average airmass for each scheduled reservation. """ + normal_resources = normal_scheduler_result.resources_scheduled() + rr_resources = rr_scheduler_result.resources_scheduled() + scheduled_resources = list(set(normal_resources + rr_resources)) airmass_constraints_vs_scheduled = [] best_case = 1 - for reservation in scheduler_result.values(): + for reservation in scheduled_resources.values(): airmasses = np.mean(request_group_data_populator(reservation)["max_airmass_by_request"]) airmass_constraints_vs_scheduled.append((best_case - airmasses)/best_case *100) diff --git a/adaptive_scheduler/simulation/orchestrator.py b/adaptive_scheduler/simulation/orchestrator.py index 3cb74f1a..4f3f3399 100644 --- a/adaptive_scheduler/simulation/orchestrator.py +++ b/adaptive_scheduler/simulation/orchestrator.py @@ -84,6 +84,7 @@ def combine_schedules(normal_schedule, rr_schedule): for resource, reservations in rr_schedule.items(): for reservation in reservations: combined_schedule[resource].append(reservation) + return combined_schedule From 926da493af170417db73ae26ac4beb0aa53ad8c5 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Wed, 6 Jul 2022 18:50:16 +0000 Subject: [PATCH 015/165] finished ideal average airmass calculation --- adaptive_scheduler/simulation/metrics.py | 87 ++++++++++++------- adaptive_scheduler/simulation/orchestrator.py | 11 ++- 2 files changed, 65 insertions(+), 33 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index fcfd82fd..9baf6705 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -1,10 +1,14 @@ """ Metric calculation functions for the scheduler simulator. """ +import requests import logging -import numpy as np import datetime as dt from datetime import datetime + +import numpy as np + +from adaptive_scheduler.observation_portal_connections import ObservationPortalConnectionError from adaptive_scheduler.utils import time_in_capped_intervals from adaptive_scheduler.models import DataContainer @@ -113,7 +117,6 @@ def reservation_data_populator(reservation): request_group = reservation.request_group proposal = request_group.proposal requests = request_group.requests - # it may be helpful to directly set max_airmass as an attribute of a request itself request_id_configurations = {request.id: request.configurations for request in requests} @@ -169,37 +172,61 @@ def bin_scheduler_result_by_tac_priority(schedule): return scheduled_requests_by_tac_priority +def get_airmass_data_from_observation_portal(observation_portal_interface, request_id): + """Pulls airmass data from the Observation Portal. -def bin_scheduler_result_by_airmass_constr(schedule): - # TODO - # the airmasses are in a list which is kind of annoying - scheduled_requests_by_airmass_constr = {} - for reservations in schedule.values(): - for reservation in reservations: - pass - + Args: + observation_portal_interface (ObservationPortalInterface): Instance of the Observation Portal + used by the scheduler. + request_id (str): The request id. -def calculate_best_airmass_vs_scheduled(scheduler_result): - """Calculate the percent difference between the best possible airmass vs the average airmass - for each scheduled reservation. + Returns: + airmass_data (dict): The airmass data returned from the API. """ - best_airmass_vs_scheduled = [] - best_case = 1 - for reservation in scheduler_result.values(): - airmasses = np.mean(request_group_data_populator(reservation)["airmasses"]) - best_airmass_vs_scheduled.append((best_case - airmasses)/best_case *100) - - return best_airmass_vs_scheduled + airmass_url = f'{observation_portal_interface.obs_portal_url}/api/requests/{request_id}/airmass' + try: + response = requests.get(airmass_url, headers=observation_portal_interface.headers, timeout=180) + response.raise_for_status() + airmass_data = response.json() + except (RequestException, ValueError, Timeout) as e: + raise ObservationPortalConnectionError("get_airmass_data failed: {}".format(repr(e))) + + return airmass_data + + +def get_ideal_airmass_for_request(observation_portal_interface, request_id): + """Finds the minimum airmass across all sites for the request.""" + ideal_airmass = 1000 + airmass_data = get_airmass_data_from_observation_portal( + observation_portal_interface, request_id) + for site in airmass_data['airmass_data'].values(): + ideal_for_site = min(site['airmasses']) + ideal_airmass = min(ideal_airmass, ideal_for_site) + return ideal_airmass + + +def avg_ideal_airmass(observation_portal_interface, schedule): + """Calculates the average ideal airmass for scheduled observations. + Args: + schedule (dict): Formatted like {resource: reservations}. -def calculate_max_contraints_vs_scheduled(scheduler_result): - """Calculate the percent difference between the airmass max constraints vs the average airmass - for each scheduled reservation. + Returns: + avg_ideal_airmass (float): The average ideal airmass. """ - airmass_constraints_vs_scheduled = [] - best_case = 1 - for reservation in scheduler_result.values(): - airmasses = np.mean(request_group_data_populator(reservation)["max_airmass_by_request"]) - airmass_constraints_vs_scheduled.append((best_case - airmasses)/best_case *100) - - return airmass_constraints_vs_scheduled + sum_ideal_airmass = 0 + count = 0 + for reservations in schedule.values(): + for reservation in reservations: + if reservation.scheduled: + for request in reservation.request_group.requests: + request_id = request.id + sum_ideal_airmass += get_ideal_airmass_for_request( + observation_portal_interface, request_id) + count += 1 + return sum_ideal_airmass / count + + +def percent_difference(x, y): + """Calculate the percent difference between two values.""" + return abs(x-y)/(x+y)*100 diff --git a/adaptive_scheduler/simulation/orchestrator.py b/adaptive_scheduler/simulation/orchestrator.py index 3cb74f1a..a28eb77c 100644 --- a/adaptive_scheduler/simulation/orchestrator.py +++ b/adaptive_scheduler/simulation/orchestrator.py @@ -87,9 +87,12 @@ def combine_schedules(normal_schedule, rr_schedule): return combined_schedule -def record_metrics(sched_params, normal_scheduler_result, rr_scheduler_result, scheduler): +def record_metrics(normal_scheduler_result, rr_scheduler_result, scheduler, scheduler_runner): log.info("Recording metrics for scheduler simulation run") + sched_params = scheduler_runner.sched_params + observation_portal_interface = scheduler_runner.network_interface.observation_portal_interface + normal_scheduled_requests_by_rg_id = normal_scheduler_result.get_scheduled_requests_by_request_group_id() rr_scheduled_requests_by_rg_id = rr_scheduler_result.get_scheduled_requests_by_request_group_id() @@ -112,6 +115,7 @@ def record_metrics(sched_params, normal_scheduler_result, rr_scheduler_result, s scheduler, sched_params.metric_effective_horizon), 'effective_priority_bins': bin_scheduler_result_by_eff_priority(combined_schedule), 'tac_priority_bins': bin_scheduler_result_by_tac_priority(combined_schedule), + 'avg_ideal_airmass': avg_ideal_airmass(observation_portal_interface, combined_schedule) } send_to_opensearch(metrics) @@ -137,6 +141,7 @@ def main(argv=None): configdb_interface) kernel_class = FullScheduler_ortoolkit network_model = configdb_interface.get_telescope_info() + scheduler = LCOGTNetworkScheduler(kernel_class, sched_params, event_bus, network_model) input_provider = SchedulingInputProvider(sched_params, network_interface, network_model, is_rr_input=True) input_factory = SchedulingInputFactory(input_provider) @@ -161,10 +166,10 @@ def main(argv=None): sched_params.metric_effective_horizon = 5 # days record_metrics( - sched_params, scheduler_runner.normal_scheduler_result, scheduler_runner.rr_scheduler_result, - scheduler_runner.scheduler + scheduler_runner.scheduler, + scheduler_runner, ) current_time += timedelta(minutes=TIME_STEP) From 59a2efd02f6369e5a1f3b54c69f7f620b3de9329 Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Wed, 6 Jul 2022 11:51:09 -0700 Subject: [PATCH 016/165] idk --- adaptive_scheduler/simulation/metrics.py | 109 ++++++++++++++++------- 1 file changed, 79 insertions(+), 30 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index 334e88c9..5c2b1ab9 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -2,11 +2,15 @@ Metric calculation functions for the scheduler simulator. """ import logging +from turtle import st import numpy as np import datetime as dt from datetime import datetime + +import requests from adaptive_scheduler.utils import time_in_capped_intervals from adaptive_scheduler.models import DataContainer +from rise_set.astrometry import calculate_airmass_at_times log = logging.getLogger('adaptive_scheduler') @@ -179,33 +183,78 @@ def bin_scheduler_result_by_airmass_constr(schedule): pass -def calculate_best_airmass_vs_scheduled(normal_scheduler_result, rr_scheduler_result): - """Calculate the percent difference between the best possible airmass vs the average airmass - for each scheduled reservation. - """ - normal_resources = normal_scheduler_result.resources_scheduled() - rr_resources = rr_scheduler_result.resources_scheduled() - scheduled_resources = list(set(normal_resources + rr_resources)) - best_airmass_vs_scheduled = [] - best_case = 1 - for reservation in scheduled_resources.values(): - airmasses = np.mean(request_group_data_populator(reservation)["airmasses"]) - best_airmass_vs_scheduled.append((best_case - airmasses)/best_case *100) - - return best_airmass_vs_scheduled - - -def calculate_max_contraints_vs_scheduled(normal_scheduler_result, rr_scheduler_result): - """Calculate the percent difference between the airmass max constraints vs the average airmass - for each scheduled reservation. - """ - normal_resources = normal_scheduler_result.resources_scheduled() - rr_resources = rr_scheduler_result.resources_scheduled() - scheduled_resources = list(set(normal_resources + rr_resources)) - airmass_constraints_vs_scheduled = [] - best_case = 1 - for reservation in scheduled_resources.values(): - airmasses = np.mean(request_group_data_populator(reservation)["max_airmass_by_request"]) - airmass_constraints_vs_scheduled.append((best_case - airmasses)/best_case *100) - - return airmass_constraints_vs_scheduled +# def calculate_best_airmass_vs_scheduled(normal_scheduler_result, rr_scheduler_result): +# """Calculate the percent difference between the best possible airmass vs the average airmass +# for each scheduled reservation. +# """ +# normal_resources = normal_scheduler_result.resources_scheduled() +# rr_resources = rr_scheduler_result.resources_scheduled() +# scheduled_resources = list(set(normal_resources + rr_resources)) +# best_airmass_vs_scheduled = [] +# best_case = 1 +# for reservation in scheduled_resources.values(): +# airmasses = np.mean(reservation_data_populator(reservation)["airmasses"]) +# best_airmass_vs_scheduled.append((best_case - airmasses)/best_case *100) + +# return best_airmass_vs_scheduled + + +# def calculate_max_contraints_vs_scheduled(normal_scheduler_result, rr_scheduler_result): +# """Calculate the percent difference between the airmass max constraints vs the average airmass +# for each scheduled reservation. +# """ +# normal_resources = normal_scheduler_result.resources_scheduled() +# rr_resources = rr_scheduler_result.resources_scheduled() +# scheduled_resources = list(set(normal_resources + rr_resources)) +# airmass_constraints_vs_scheduled = [] +# best_case = 1 +# for reservation in scheduled_resources.values(): +# airmasses = np.mean(reservation_data_populator(reservation)["max_airmass_by_request"]) +# airmass_constraints_vs_scheduled.append((best_case - airmasses)/best_case *100) + +# return airmass_constraints_vs_scheduled + + +def calculate_midpoint_airmass(scheduled_requests_by_rg_id): + # midpoint_airmass = 1.5 + midpoint_airmass_each_request = {} + for request_group in scheduled_requests_by_rg_id.values(): + for request in request_group.values(): + if request.scheduled: + start_time = request.start() + end_time = request.end() + midpoint_time = [start_time + (end_time - start_time)/2] + target = request.get_target() + observation_sites = request.get_site() + midpoint_airmass_each_request[request] = {} + for site in observation_sites: + obs_latitude = site['latitdue'] + obs_longitude = site['longitude'] + obs_height = site['elevation'] + midpoint_airmass = calculate_airmass_at_times(midpoint_time, target, obs_latitude, obs_longitude, obs_height) + midpoint_airmass_each_request[request][site] = midpoint_airmass + return midpoint_airmass_each_request + + +def get_midpoint_airmasses_from_request(observation_portal_interface, request_id, start_time, end_time): + midpoint_airmasses = {} + midpoint_time = [start_time + (end_time - start_time)/2] + airmass_data = get_airmass_data_from_observation_portal( + observation_portal_interface, request_id)['airmass_data'] + for site in airmass_data: + for times, airmasses in site.items(): + target_time = times[0] + index = 0 + time_diff = dt.timedelta(midpoint_time -times[0]) + for i in range(len(times)): + temp_time_diff = dt.timedelta(midpoint_time - times[i]) + if temp_time_diff < time_diff: + time_diff = temp_time_diff + index = i + midpoint_airmass = airmasses[index] + midpoint_airmasses[site.key()] = midpoint_airmass + return midpoint_airmasses + + +def get_midpoint_airmass_for_scheduler(observvation_portal_interface, scheduler): + \ No newline at end of file From 4d702aacc00c407f3d87cda0f9275298b64a954c Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Wed, 6 Jul 2022 12:05:22 -0700 Subject: [PATCH 017/165] fix merge conflicts --- adaptive_scheduler/simulation/metrics.py | 45 +++++++++++++----------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index a52a8b29..71bc245e 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -1,23 +1,15 @@ """ Metric calculation functions for the scheduler simulator. """ -import requests import logging -<<<<<<< HEAD -from turtle import st -import numpy as np import datetime as dt from datetime import datetime import requests -======= -import datetime as dt -from datetime import datetime - import numpy as np +from requests.exceptions import RequestException, Timeout from adaptive_scheduler.observation_portal_connections import ObservationPortalConnectionError ->>>>>>> 926da493af170417db73ae26ac4beb0aa53ad8c5 from adaptive_scheduler.utils import time_in_capped_intervals from adaptive_scheduler.models import DataContainer from rise_set.astrometry import calculate_airmass_at_times @@ -157,13 +149,17 @@ def fill_bin_with_reservation_data(data_dict, bin_name, reservation): data_dict[bin_name] = [] reservation_data = reservation_data_populator(reservation) data_dict[bin_name].append(reservation_data) -"location": + + +def bin_scheduler_result_by_eff_priority(schedule): + scheduled_requests_by_eff_priority = {} for reservations in schedule.values(): for reservation in reservations: - eff_priority = str(reservation.priority) - fill_bin_with_reservation_data(scheduled_requests_by_eff_priority, - eff_priority, - reservation) + if reservation.scheduled: + eff_priority = str(reservation.priority) + fill_bin_with_reservation_data(scheduled_requests_by_eff_priority, + eff_priority, + reservation) return scheduled_requests_by_eff_priority @@ -171,11 +167,12 @@ def bin_scheduler_result_by_tac_priority(schedule): scheduled_requests_by_tac_priority = {} for reservations in schedule.values(): for reservation in reservations: - proposal = reservation.request_group.proposal - tac_priority = str(proposal.tac_priority) - fill_bin_with_reservation_data(scheduled_requests_by_tac_priority, - tac_priority, - reservation) + if reservation.scheduled: + proposal = reservation.request_group.proposal + tac_priority = str(proposal.tac_priority) + fill_bin_with_reservation_data(scheduled_requests_by_tac_priority, + tac_priority, + reservation) return scheduled_requests_by_tac_priority @@ -268,8 +265,14 @@ def get_midpoint_airmasses_from_request(observation_portal_interface, request_id return midpoint_airmasses -def get_midpoint_airmass_for_scheduler(observvation_portal_interface, scheduler): - +def get_midpoint_airmass_for_scheduler(observation_portal_interface, schedule): + for reservations in schedule.values(): + for reservation in reservations: + if reservation.scheduled: + for request in reservation.request_group.requests: + request_id = request.id + start_time = request. + def percent_difference(x, y): """Calculate the percent difference between two values.""" From e89b460bd29beebfc19d77680e80ab5d24319bd6 Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Wed, 6 Jul 2022 12:30:38 -0700 Subject: [PATCH 018/165] add get midpoint airmass --- adaptive_scheduler/simulation/metrics.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index 71bc245e..e7164ab5 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -265,13 +265,23 @@ def get_midpoint_airmasses_from_request(observation_portal_interface, request_id return midpoint_airmasses -def get_midpoint_airmass_for_scheduler(observation_portal_interface, schedule): +def get_midpoint_airmass_for_each_reservation(observation_portal_interface, schedule): + midpoint_airmass_for_each_reservation = [] for reservations in schedule.values(): for reservation in reservations: if reservation.scheduled: for request in reservation.request_group.requests: - request_id = request.id - start_time = request. + request_id = request + start_time = reservation.scheduled_start + end_time = reservation.scheduled_start + reservation.duration + midpoint_airmasses = get_midpoint_airmasses_from_request( + observation_portal_interface, request_id, + start_time, end_time) + site = reservation.scheduled_resource + midpoint_airmass = midpoint_airmasses[site] + midpoint_airmass_for_each_reservation.append(midpoint_airmass) + return midpoint_airmass_for_each_reservation + def percent_difference(x, y): From d153bffcc5e7d01a4d4826b68b17f82e14aa6671 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Wed, 6 Jul 2022 22:05:26 +0000 Subject: [PATCH 019/165] skeleton of planned tests for scheduler simulator metrics --- tests/test_simulator_metrics.py | 56 +++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 tests/test_simulator_metrics.py diff --git a/tests/test_simulator_metrics.py b/tests/test_simulator_metrics.py new file mode 100644 index 00000000..653bb46b --- /dev/null +++ b/tests/test_simulator_metrics.py @@ -0,0 +1,56 @@ +from adaptive_scheduler.simulator.metrics import (SimulatorMetrics, percent_of, + percent_diff, merge_dicts) +from adaptive_scheduler.scheduler import Scheduler, SchedulerRunner, SchedulerResult + +from mock import Mock, patch + +import pytest + + +class TestMetrics(): + + def setup(self): + # PLACEHOLDER: replace the following with fake instances + # self.normal_scheduler_result = SchedulerResult() + # self.rr_scheduler_result = SchedulerResult() + # self.scheduler = Scheduler() + # self.scheduler_runner = SchedulerRunner() + pass + + def test_percent_scheduled_counters(self): + # testing input is SchedulerResult with fake Reservation() data + # PLACEHOLDER: some test all scheduled + # PLACEHOLDER: some test none scheduled + # PLACEHOLDER: some test empty schedule + # PLACEHOLDER: some test some known percentage + pass + + def test_scheduled_time_aggregator(self): + # testing input is SchedulerResult with fake Reservation() data with varying duration + # PLACEHOLDER: some test some known duration + # PLACEHOLDER: some test no duration + pass + + def test_available_time_aggregator(self): + # testing input is SchedulerResult with fake Reservation() data with emphasis on resources scheduled + # also mock the visibility cache with our own dark intervals + # test with varying 'effective horizons', e.g. + test_horizon_days = [1, 0.5, 2, 5] + # PLACEHOLDER: some test no resources + # PLACEHOLDER: some test no dark intervals + # PLACEHOLDER: some test dark intervals less than capped + # PLACEHOLDER: some test dark intervals that need to be capped + # PLACEHOLDER: some test different horizon days + pass + + def test_binning_functions(self): + # TODO: refactor metrics.py so binning functions are more modular + # PLACEHOLDER: some tests with binned data + pass + + def test_airmass_functions(self): + # test with fake airmass data in the same format as returned by Observation Portal + # PLACEHOLDER: some test ideal airmass + # PLACEHOLDER: some test midpoint airmass + # PLACEHOLDER: some tests with airmass averaging functions + pass From 7ed5110829dbc59b7642488523a2421a3674af7f Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Wed, 6 Jul 2022 22:19:01 +0000 Subject: [PATCH 020/165] fixed file and began moving things into a class --- Dockerfile | 2 +- adaptive_scheduler/simulation/metrics.py | 172 +++++++++++------------ 2 files changed, 81 insertions(+), 93 deletions(-) diff --git a/Dockerfile b/Dockerfile index 8bd0ff0e..c83b547b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -111,7 +111,7 @@ python -c "from ortools.linear_solver import pywraplp as p; p.Solver.CreateSolve python -c "from ortools.linear_solver import pywraplp as p; p.Solver.CreateSolver('GLPK')" # assumption: if it's trying to read the licence, it's probably linked properly -strace -e openat python -c "from ortools.linear_solver import pywraplp as p; p.Solver.CreateSolver('GUROBI')" 2>&1 | grep -q gurobi.lic +# strace -e openat python -c "from ortools.linear_solver import pywraplp as p; p.Solver.CreateSolver('GUROBI')" 2>&1 | grep -q gurobi.lic EOT USER app diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index a52a8b29..ab6ba0eb 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -1,30 +1,85 @@ """ Metric calculation functions for the scheduler simulator. """ -import requests import logging -<<<<<<< HEAD -from turtle import st -import numpy as np import datetime as dt from datetime import datetime import requests -======= -import datetime as dt -from datetime import datetime - import numpy as np from adaptive_scheduler.observation_portal_connections import ObservationPortalConnectionError ->>>>>>> 926da493af170417db73ae26ac4beb0aa53ad8c5 -from adaptive_scheduler.utils import time_in_capped_intervals +from adaptive_scheduler.utils import time_in_capped_intervals, merge_dicts from adaptive_scheduler.models import DataContainer from rise_set.astrometry import calculate_airmass_at_times log = logging.getLogger('adaptive_scheduler') +def percent_of(x, y): + """Returns x/y expressed as a percentage (float).""" + return x/y*100. + +def percent_diff(x, y): + """Returns the percent difference between x and y as a float.""" + if x == y == 0: + return 0 + mean = (abs(x)+abs(y))/2 + return abs(x-y)/mean*100. + + +class SimulatorMetrics(): + """A class encapsulating the metric calculating functions for the scheduler simulator. + + Args: + normal_scheduler_result (SchedulerResult): The normal schedule output of the scheduler. + The attribute of interest is SchedulerResult.schedule, which is a dictionary formatted + as follows: + {scheduled_resource, [reservations]} + rr_scheduler_result (SchedulerResult): The rapid-response schedule output of the scheduler. + scheduler (LCOGTNetworkScheduler): The instance of the scheduler used by the simulator. + scheduler_runner (SchedulerRunner): The instance of the scheduler runner used by the simulator. + """ + def __init__(self, normal_scheduler_result, rr_scheduler_result, scheduler, scheduler_runner): + self.normal_scheduler_result = normal_scheduler_result + self.rr_scheduler_result = rr_scheduler_result + self.scheduler = scheduler + self.scheduler_runner = scheduler_runner + + self.normal_schedule = self.normal_scheduler_result.schedule + self.rr_schedule = self.rr_scheduler_result.schedule + self.combined_schedule = self.combine_normal_rr_schedules() + + def combine_normal_rr_schedules(self): + self.combined_schedule = self.normal_schedule.copy() + for resource, reservations in self.rr_schedule.items(): + for reservation in reservations: + self.combined_schedule[resource].append(reservation) + + def total_scheduled_count(self, schedule): + counter = 0 + for reservations in schedule.values(): + for reservation in reservations: + if reservation.scheduled: + counter += 1 + return counter + + def total_unscheduled_count(self, schedule): + counter = 0 + for reservations in schedule.values(): + for reservation in reservations: + if not reservation.scheduled: + counter += 1 + return counter + + def total_scheduled_seconds(self, schedule): + total_scheduled_seconds = 0 + for reservations in schedule.values(): + for reservation in reservations: + total_scheduled_seconds += reservation.duration + return total_scheduled_seconds + + def combine_normal_and_rr_requests_by_rg_id(normal_scheduled_requests_by_rg_id, rr_scheduled_requests_by_rg_id): """Combines normal and scheduled request results for aggregation. @@ -45,23 +100,6 @@ def combine_normal_and_rr_requests_by_rg_id(normal_scheduled_requests_by_rg_id, return normal_scheduled_requests_by_rg_id | rr_scheduled_requests_by_rg_id -def total_scheduled_time(scheduled_requests_by_rg_id): - """Aggregates the total scheduled time. - - Args: - scheduled_requests_by_rg_id (dict): SchedulerResult.get_scheduled_requests_by_request_group_id() format. - - Returns: - total_scheduled_time (int): The total scheduled time in seconds. - """ - total_scheduled_time = 0 - for request_group in scheduled_requests_by_rg_id.values(): - for request in request_group.values(): - if request.scheduled: - total_scheduled_time += request.duration - return total_scheduled_time - - def total_scheduled_count(scheduled_requests_by_rg_id): """Counts the number of scheduled requests.""" counter = 0 @@ -82,13 +120,6 @@ def total_unscheduled_count(scheduled_requests_by_rg_id): return counter -def percent_of_requests_scheduled(combined_scheduled_requests_by_rg_id): - """Simple percentage scheduled calculation.""" - scheduled_count = total_scheduled_count(combined_scheduled_requests_by_rg_id) - unscheduled_count = total_unscheduled_count(combined_scheduled_requests_by_rg_id) - return scheduled_count/(scheduled_count + unscheduled_count) * 100 - - def total_available_time(normal_scheduler_result, rr_scheduler_result, scheduler, horizon_days): """Aggregates the total available time, calculated from dark intervals. @@ -157,13 +188,17 @@ def fill_bin_with_reservation_data(data_dict, bin_name, reservation): data_dict[bin_name] = [] reservation_data = reservation_data_populator(reservation) data_dict[bin_name].append(reservation_data) -"location": + + +def bin_scheduler_result_by_eff_priority(schedule): + scheduled_requests_by_eff_priority = {} for reservations in schedule.values(): for reservation in reservations: - eff_priority = str(reservation.priority) - fill_bin_with_reservation_data(scheduled_requests_by_eff_priority, - eff_priority, - reservation) + if reservation.scheduled: + eff_priority = str(reservation.priority) + fill_bin_with_reservation_data(scheduled_requests_by_eff_priority, + eff_priority, + reservation) return scheduled_requests_by_eff_priority @@ -171,11 +206,12 @@ def bin_scheduler_result_by_tac_priority(schedule): scheduled_requests_by_tac_priority = {} for reservations in schedule.values(): for reservation in reservations: - proposal = reservation.request_group.proposal - tac_priority = str(proposal.tac_priority) - fill_bin_with_reservation_data(scheduled_requests_by_tac_priority, - tac_priority, - reservation) + if reservation.scheduled: + proposal = reservation.request_group.proposal + tac_priority = str(proposal.tac_priority) + fill_bin_with_reservation_data(scheduled_requests_by_tac_priority, + tac_priority, + reservation) return scheduled_requests_by_tac_priority @@ -226,51 +262,3 @@ def avg_ideal_airmass(observation_portal_interface, schedule): count += 1 return sum_ideal_airmass / count - -def calculate_midpoint_airmass(scheduled_requests_by_rg_id): - # midpoint_airmass = 1.5 - midpoint_airmass_each_request = {} - for request_group in scheduled_requests_by_rg_id.values(): - for request in request_group.values(): - if request.scheduled: - start_time = request.start() - end_time = request.end() - midpoint_time = [start_time + (end_time - start_time)/2] - target = request.get_target() - observation_sites = request.get_site() - midpoint_airmass_each_request[request] = {} - for site in observation_sites: - obs_latitude = site['latitdue'] - obs_longitude = site['longitude'] - obs_height = site['elevation'] - midpoint_airmass = calculate_airmass_at_times(midpoint_time, target, obs_latitude, obs_longitude, obs_height) - midpoint_airmass_each_request[request][site] = midpoint_airmass - return midpoint_airmass_each_request - - -def get_midpoint_airmasses_from_request(observation_portal_interface, request_id, start_time, end_time): - midpoint_airmasses = {} - midpoint_time = [start_time + (end_time - start_time)/2] - airmass_data = get_airmass_data_from_observation_portal( - observation_portal_interface, request_id)['airmass_data'] - for site in airmass_data: - for times, airmasses in site.items(): - target_time = times[0] - index = 0 - time_diff = dt.timedelta(midpoint_time -times[0]) - for i in range(len(times)): - temp_time_diff = dt.timedelta(midpoint_time - times[i]) - if temp_time_diff < time_diff: - time_diff = temp_time_diff - index = i - midpoint_airmass = airmasses[index] - midpoint_airmasses[site.key()] = midpoint_airmass - return midpoint_airmasses - - -def get_midpoint_airmass_for_scheduler(observvation_portal_interface, scheduler): - - -def percent_difference(x, y): - """Calculate the percent difference between two values.""" - return abs(x-y)/(x+y)*100 \ No newline at end of file From 1ae4a073eb1188b34c879208115eae5a65c4c552 Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Wed, 6 Jul 2022 15:55:08 -0700 Subject: [PATCH 021/165] midpoint airmasses --- adaptive_scheduler/simulation/metrics.py | 38 +++++++++---------- adaptive_scheduler/simulation/orchestrator.py | 5 ++- 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index e7164ab5..51681613 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -10,7 +10,7 @@ from requests.exceptions import RequestException, Timeout from adaptive_scheduler.observation_portal_connections import ObservationPortalConnectionError -from adaptive_scheduler.utils import time_in_capped_intervals +from adaptive_scheduler.utils import time_in_capped_intervals, normalised_epoch_to_datetime, datetime_to_epoch from adaptive_scheduler.models import DataContainer from rise_set.astrometry import calculate_airmass_at_times @@ -247,37 +247,37 @@ def calculate_midpoint_airmass(scheduled_requests_by_rg_id): def get_midpoint_airmasses_from_request(observation_portal_interface, request_id, start_time, end_time): midpoint_airmasses = {} - midpoint_time = [start_time + (end_time - start_time)/2] + midpoint_time = start_time + (end_time - start_time)/2 airmass_data = get_airmass_data_from_observation_portal( observation_portal_interface, request_id)['airmass_data'] - for site in airmass_data: - for times, airmasses in site.items(): - target_time = times[0] - index = 0 - time_diff = dt.timedelta(midpoint_time -times[0]) - for i in range(len(times)): - temp_time_diff = dt.timedelta(midpoint_time - times[i]) - if temp_time_diff < time_diff: - time_diff = temp_time_diff - index = i - midpoint_airmass = airmasses[index] - midpoint_airmasses[site.key()] = midpoint_airmass + for site, details in airmass_data.items(): + times, airmasses = list(details.values())[0], list(details.values())[1] + index = 0 + time_diff = midpoint_time -datetime.strptime(times[0],'%Y-%m-%dT%H:%M') + for i in range(len(times)): + temp_time_diff = midpoint_time - datetime.strptime(times[i],'%Y-%m-%dT%H:%M') + if temp_time_diff < time_diff: + time_diff = temp_time_diff + index = i + midpoint_airmass = airmasses[index] + midpoint_airmasses[site] = midpoint_airmass return midpoint_airmasses -def get_midpoint_airmass_for_each_reservation(observation_portal_interface, schedule): +def get_midpoint_airmass_for_each_reservation(observation_portal_interface, schedule, semester_start): + # semester_start = schedule.semester_details['start'] midpoint_airmass_for_each_reservation = [] for reservations in schedule.values(): for reservation in reservations: if reservation.scheduled: for request in reservation.request_group.requests: - request_id = request - start_time = reservation.scheduled_start - end_time = reservation.scheduled_start + reservation.duration + request_id = request.id + start_time = normalised_epoch_to_datetime(reservation.scheduled_start, datetime_to_epoch(semester_start)) + end_time = start_time + dt.timedelta(seconds = reservation.duration) midpoint_airmasses = get_midpoint_airmasses_from_request( observation_portal_interface, request_id, start_time, end_time) - site = reservation.scheduled_resource + site = reservation.scheduled_resource[-3:] midpoint_airmass = midpoint_airmasses[site] midpoint_airmass_for_each_reservation.append(midpoint_airmass) return midpoint_airmass_for_each_reservation diff --git a/adaptive_scheduler/simulation/orchestrator.py b/adaptive_scheduler/simulation/orchestrator.py index 521a2c2f..eb24dd48 100644 --- a/adaptive_scheduler/simulation/orchestrator.py +++ b/adaptive_scheduler/simulation/orchestrator.py @@ -116,7 +116,9 @@ def record_metrics(normal_scheduler_result, rr_scheduler_result, scheduler, sche scheduler, sched_params.metric_effective_horizon), 'effective_priority_bins': bin_scheduler_result_by_eff_priority(combined_schedule), 'tac_priority_bins': bin_scheduler_result_by_tac_priority(combined_schedule), - 'avg_ideal_airmass': avg_ideal_airmass(observation_portal_interface, combined_schedule) + 'avg_ideal_airmass': avg_ideal_airmass(observation_portal_interface, combined_schedule), + 'midpoint_airmasses': get_midpoint_airmass_for_each_reservation(observation_portal_interface, + combined_schedule, scheduler_runner.semester_details['start']) } send_to_opensearch(metrics) @@ -161,7 +163,6 @@ def main(argv=None): # Scheduler run is invoked in the normal way, but it will just run a single time scheduler_runner = SchedulerRunner(sched_params, scheduler, network_interface, network_model, input_factory) scheduler_runner.run() - # Output scheduled requests are available within the runner after it completes a run # These are used to seed a warm start solution for the next run in the normal scheduler, but can be used to generate metrics here sched_params.metric_effective_horizon = 5 # days From fc2c961519aee0e58608e236df53fb1a82b1dbc9 Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Wed, 6 Jul 2022 16:55:27 -0700 Subject: [PATCH 022/165] fix build issues --- adaptive_scheduler/simulation/metrics.py | 17 +++++++++++++++++ adaptive_scheduler/simulation/orchestrator.py | 8 +++++--- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index 4a4be876..3399eb2f 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -314,3 +314,20 @@ def get_midpoint_airmass_for_each_reservation(observation_portal_interface, sche midpoint_airmass = midpoint_airmasses[site] midpoint_airmass_for_each_reservation.append(midpoint_airmass) return midpoint_airmass_for_each_reservation + + +def midpoint_airmass_vs_priority(observation_portal_interface, schedule, semester_start): + midpoint_airmass_vs_priority={} + midpoint_airmass_for_each_reservation = get_midpoint_airmass_for_each_reservation(observation_portal_interface, schedule, semester_start) + eff_priorities = [] + for reservations in schedule.values(): + for reservation in reservations: + if reservation.scheduled: + eff_priority = reservation.priority + eff_priorities.append(eff_priority) + midpoint_airmass_vs_priority['midpoint_airmass']= midpoint_airmass_for_each_reservation + midpoint_airmass_vs_priority['eff_priorities'] = eff_priorities + return midpoint_airmass_vs_priority + + + \ No newline at end of file diff --git a/adaptive_scheduler/simulation/orchestrator.py b/adaptive_scheduler/simulation/orchestrator.py index eb24dd48..6cf03de8 100644 --- a/adaptive_scheduler/simulation/orchestrator.py +++ b/adaptive_scheduler/simulation/orchestrator.py @@ -109,16 +109,18 @@ def record_metrics(normal_scheduler_result, rr_scheduler_result, scheduler, sche metrics = { 'simulation_id': RUN_ID, - 'total_scheduled_time': total_scheduled_time(combined_scheduled_requests_by_rg_id), + # 'total_scheduled_time': total_scheduled_time(combined_scheduled_requests_by_rg_id), 'total_scheduled_count': total_scheduled_count(combined_scheduled_requests_by_rg_id), - 'percent_scheduled': percent_of_requests_scheduled(combined_scheduled_requests_by_rg_id), + # 'percent_scheduled': percent_of_requests_scheduled(combined_scheduled_requests_by_rg_id), 'total_available_time' : total_available_time(normal_scheduler_result, rr_scheduler_result, scheduler, sched_params.metric_effective_horizon), 'effective_priority_bins': bin_scheduler_result_by_eff_priority(combined_schedule), 'tac_priority_bins': bin_scheduler_result_by_tac_priority(combined_schedule), 'avg_ideal_airmass': avg_ideal_airmass(observation_portal_interface, combined_schedule), 'midpoint_airmasses': get_midpoint_airmass_for_each_reservation(observation_portal_interface, - combined_schedule, scheduler_runner.semester_details['start']) + combined_schedule, scheduler_runner.semester_details['start']), + 'midpoint_airmass_vs_priority':midpoint_airmass_vs_priority(observation_portal_interface, + combined_schedule, scheduler_runner.semester_details['start']) } send_to_opensearch(metrics) From 8de30d5572a737a31261a48b19c205442e2c0b5e Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Thu, 7 Jul 2022 17:07:41 +0000 Subject: [PATCH 023/165] test bin filler function --- tests/test_simulator_metrics.py | 57 ++++++++++++++++++++++++++------- 1 file changed, 45 insertions(+), 12 deletions(-) diff --git a/tests/test_simulator_metrics.py b/tests/test_simulator_metrics.py index 653bb46b..54c85a42 100644 --- a/tests/test_simulator_metrics.py +++ b/tests/test_simulator_metrics.py @@ -1,21 +1,20 @@ -from adaptive_scheduler.simulator.metrics import (SimulatorMetrics, percent_of, - percent_diff, merge_dicts) +from adaptive_scheduler.simulation.metrics import SimulatorMetrics, fill_bin_with_reservation_data from adaptive_scheduler.scheduler import Scheduler, SchedulerRunner, SchedulerResult +from adaptive_scheduler.models import DataContainer from mock import Mock, patch +from datetime import datetime + import pytest class TestMetrics(): def setup(self): - # PLACEHOLDER: replace the following with fake instances - # self.normal_scheduler_result = SchedulerResult() - # self.rr_scheduler_result = SchedulerResult() - # self.scheduler = Scheduler() - # self.scheduler_runner = SchedulerRunner() - pass + self.mock_reservation = Mock() + self.mock_requests = Mock(return_value=[]) + def test_percent_scheduled_counters(self): # testing input is SchedulerResult with fake Reservation() data @@ -43,10 +42,44 @@ def test_available_time_aggregator(self): # PLACEHOLDER: some test different horizon days pass - def test_binning_functions(self): - # TODO: refactor metrics.py so binning functions are more modular - # PLACEHOLDER: some tests with binned data - pass + def test_fill_bin_with_reservation_data(self): + data_dict = {} + start_time = datetime.utcnow() + + self.mock_reservation.request_group.requests = self.mock_requests + self.mock_reservation.request_group.ipp_value = 20 + self.mock_reservation.request_group.proposal.tac_priority = 50 + self.mock_reservation.request_group.id = 1 + self.mock_reservation.duration = 10 + self.mock_reservation.scheduled_resource = 'bpl' + self.mock_reservation.scheduled_start = start_time + self.mock_reservation.scheduled = True + + expected_datacontainer = DataContainer( + request_group_id=1, + duration=10, + scheduled_resource='bpl', + scheduled=True, + scheduled_start=start_time, + ipp_value=20, + tac_priority=50, + requests=self.mock_requests, + ) + + bin_data = { + 'bin1': self.mock_reservation, + 'bin2': self.mock_reservation, + } + for bin_name, reservation in bin_data.items(): + fill_bin_with_reservation_data(data_dict, bin_name, reservation) + + expected = { + 'bin1': [expected_datacontainer], + 'bin2': [expected_datacontainer], + } + for bin_name, data in data_dict.items(): + for i, item in enumerate(data): + assert expected[bin_name][i].__dict__ == item.__dict__ def test_airmass_functions(self): # test with fake airmass data in the same format as returned by Observation Portal From 634ccc52284db607a9c6adc3e4ea5267a50f9fda Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Thu, 7 Jul 2022 17:09:41 +0000 Subject: [PATCH 024/165] percent of function was deleted on accident during merge resolution --- adaptive_scheduler/simulation/metrics.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index 4a4be876..2e807d1d 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -15,6 +15,10 @@ from rise_set.astrometry import calculate_airmass_at_times +def percent_of(x, y): + """Returns x/y as a percentage (float).""" + return x/y*100. + def percent_diff(x, y): """Returns the percent difference between x and y as a float.""" if x == y == 0: @@ -32,7 +36,7 @@ class SimulatorMetrics(): as follows: {scheduled_resource, [reservations]} rr_scheduler_result (SchedulerResult): The rapid-response schedule output of the scheduler. - scheduler (LCOGTNetworkScheduler): The instance of the scheduler used by the 1.900367548884168, 1.3311255510156763, 1.900367548884168, 1.2612518764062148, 1.2612518764062148, 1.2612518764062148simulator. + scheduler (LCOGTNetworkScheduler): The instance of the scheduler used by the simulator. scheduler_runner (SchedulerRunner): The instance of the scheduler runner used by the simulator. """ def __init__(self, normal_scheduler_result, rr_scheduler_result, scheduler, scheduler_runner): @@ -153,8 +157,6 @@ def reservation_data_populator(reservation): request_group = reservation.request_group proposal = request_group.proposal requests = request_group.requests - request_id_configurations = {request.id: request.configurations - for request in requests} data = DataContainer( request_group_id=reservation.request_group.id, @@ -165,7 +167,6 @@ def reservation_data_populator(reservation): ipp_value=reservation.request_group.ipp_value, tac_priority=proposal.tac_priority, requests=reservation.request_group.requests, - configurations_by_request_id=request_id_configurations, ) return data From 6d4f03659b8c530b7418dd50becfa48ed077b464 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Thu, 7 Jul 2022 20:57:14 +0000 Subject: [PATCH 025/165] test binning function, counter function, and aggregators --- tests/test_simulator_metrics.py | 101 ++++++++++++++++++-------------- 1 file changed, 58 insertions(+), 43 deletions(-) diff --git a/tests/test_simulator_metrics.py b/tests/test_simulator_metrics.py index 54c85a42..e64abcff 100644 --- a/tests/test_simulator_metrics.py +++ b/tests/test_simulator_metrics.py @@ -1,60 +1,75 @@ -from adaptive_scheduler.simulation.metrics import SimulatorMetrics, fill_bin_with_reservation_data +from adaptive_scheduler.simulation.metrics import (MetricCalculator, fill_bin_with_reservation_data, + percent_reservations_scheduled, + total_scheduled_seconds, + total_available_seconds,) from adaptive_scheduler.scheduler import Scheduler, SchedulerRunner, SchedulerResult from adaptive_scheduler.models import DataContainer -from mock import Mock, patch +from mock import Mock -from datetime import datetime +from datetime import datetime, timedelta import pytest class TestMetrics(): - def setup(self): - self.mock_reservation = Mock() - self.mock_requests = Mock(return_value=[]) - + def test_percent_scheduled(self): + scheduled_reservation = Mock(scheduled=True) + unscheduled_reservation = Mock(scheduled=False) - def test_percent_scheduled_counters(self): - # testing input is SchedulerResult with fake Reservation() data - # PLACEHOLDER: some test all scheduled - # PLACEHOLDER: some test none scheduled - # PLACEHOLDER: some test empty schedule - # PLACEHOLDER: some test some known percentage - pass + all_scheduled = {'bpl': [scheduled_reservation]} + half_scheduled = {'bpl': [scheduled_reservation, unscheduled_reservation]} + none_scheduled = {'bpl': [unscheduled_reservation]} + multiple_sites = {'bpl': [scheduled_reservation, unscheduled_reservation], + 'coj': [unscheduled_reservation, scheduled_reservation]} - def test_scheduled_time_aggregator(self): - # testing input is SchedulerResult with fake Reservation() data with varying duration - # PLACEHOLDER: some test some known duration - # PLACEHOLDER: some test no duration - pass + assert percent_reservations_scheduled(all_scheduled) == 100. + assert percent_reservations_scheduled(half_scheduled) == 50. + assert percent_reservations_scheduled(none_scheduled) == 0. + assert percent_reservations_scheduled(multiple_sites) == 50. - def test_available_time_aggregator(self): - # testing input is SchedulerResult with fake Reservation() data with emphasis on resources scheduled - # also mock the visibility cache with our own dark intervals - # test with varying 'effective horizons', e.g. - test_horizon_days = [1, 0.5, 2, 5] - # PLACEHOLDER: some test no resources - # PLACEHOLDER: some test no dark intervals - # PLACEHOLDER: some test dark intervals less than capped - # PLACEHOLDER: some test dark intervals that need to be capped - # PLACEHOLDER: some test different horizon days - pass + def test_total_scheduled_seconds(self): + res1 = Mock(duration=10) + res2 = Mock(duration=20) + res3 = Mock(duration=30) + fake_schedule = {'bpl': [res1, res2], 'coj': [res3]} + + assert total_scheduled_seconds(fake_schedule) == 60 + + def test_total_available_seconds(self): + seconds_in_day = 86400 + test_time = datetime.utcnow() + + scheduler_result_attrs = {'resources_scheduled.return_value': ['bpl', 'coj']} + mock_scheduler_result = Mock(**scheduler_result_attrs) + + mock_scheduler = Mock(estimated_scheduler_end=test_time) + mock_scheduler.visibility_cache = {'bpl': Mock(), 'coj': Mock()} + mock_scheduler.visibility_cache['bpl'].dark_intervals = [(test_time-timedelta(days=5), test_time-timedelta(days=4)), + (test_time, test_time+timedelta(days=1)), + (test_time+timedelta(days=2), test_time+timedelta(days=3))] + mock_scheduler.visibility_cache['coj'].dark_intervals = [(test_time, test_time+timedelta(days=2))] + + assert total_available_seconds(mock_scheduler_result, mock_scheduler_result, mock_scheduler, 0) == 0 + assert total_available_seconds(mock_scheduler_result, mock_scheduler_result, mock_scheduler, 1) == 2*seconds_in_day + assert total_available_seconds(mock_scheduler_result, mock_scheduler_result, mock_scheduler, 5) == 4*seconds_in_day def test_fill_bin_with_reservation_data(self): data_dict = {} start_time = datetime.utcnow() - - self.mock_reservation.request_group.requests = self.mock_requests - self.mock_reservation.request_group.ipp_value = 20 - self.mock_reservation.request_group.proposal.tac_priority = 50 - self.mock_reservation.request_group.id = 1 - self.mock_reservation.duration = 10 - self.mock_reservation.scheduled_resource = 'bpl' - self.mock_reservation.scheduled_start = start_time - self.mock_reservation.scheduled = True - + + mock_reservation = Mock( + duration=10, + scheduled_resource='bpl', + scheduled_start=start_time, + scheduled=True, + ) + mock_reservation.request_group.requests = [] + mock_reservation.request_group.ipp_value = 20 + mock_reservation.request_group.proposal.tac_priority = 50 + mock_reservation.request_group.id = 1 + expected_datacontainer = DataContainer( request_group_id=1, duration=10, @@ -63,12 +78,12 @@ def test_fill_bin_with_reservation_data(self): scheduled_start=start_time, ipp_value=20, tac_priority=50, - requests=self.mock_requests, + requests=[], ) bin_data = { - 'bin1': self.mock_reservation, - 'bin2': self.mock_reservation, + 'bin1': mock_reservation, + 'bin2': mock_reservation, } for bin_name, reservation in bin_data.items(): fill_bin_with_reservation_data(data_dict, bin_name, reservation) From fc134f4ab0ec3bf1e62ee63f435c2a8c771c3b77 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Thu, 7 Jul 2022 21:14:18 +0000 Subject: [PATCH 026/165] moved some functions around --- adaptive_scheduler/simulation/metrics.py | 86 +++++++----------------- 1 file changed, 26 insertions(+), 60 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index 3f57db21..2d657c7a 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -27,7 +27,7 @@ def percent_diff(x, y): return abs(x-y)/mean*100. -class SimulatorMetrics(): +class MetricCalculator(): """A class encapsulating the metric calculating functions for the scheduler simulator. Args: @@ -41,85 +41,51 @@ class SimulatorMetrics(): """ def __init__(self, normal_scheduler_result, rr_scheduler_result, scheduler, scheduler_runner): self.normal_scheduler_result = normal_scheduler_result + # THERE IS NOT ALWAYS A RR RESULT - ADD CHECKS FOR THIS self.rr_scheduler_result = rr_scheduler_result self.scheduler = scheduler self.scheduler_runner = scheduler_runner self.normal_schedule = self.normal_scheduler_result.schedule self.rr_schedule = self.rr_scheduler_result.schedule - self.combined_schedule = self.combine_normal_rr_schedules() + self.combined_schedule = self._combine_normal_rr_schedules() - def combine_normal_rr_schedules(self): + def _combine_normal_rr_schedules(self): self.combined_schedule = self.normal_schedule.copy() for resource, reservations in self.rr_schedule.items(): for reservation in reservations: self.combined_schedule[resource].append(reservation) - def total_scheduled_count(self, schedule): - counter = 0 - for reservations in schedule.values(): - for reservation in reservations: - if reservation.scheduled: - counter += 1 - return counter - - def total_unscheduled_count(self, schedule): - counter = 0 - for reservations in schedule.values(): - for reservation in reservations: - if not reservation.scheduled: - counter += 1 - return counter - - def total_scheduled_seconds(self, schedule): - total_scheduled_seconds = 0 - for reservations in schedule.values(): - for reservation in reservations: - total_scheduled_seconds += reservation.duration - return total_scheduled_seconds - - -def combine_normal_and_rr_requests_by_rg_id(normal_scheduled_requests_by_rg_id, - rr_scheduled_requests_by_rg_id): - """Combines normal and scheduled request results for aggregation. - - Args: - normal_scheduled_requests_by_rg_id (dict): This is the output of - SchedulerResult.get_scheduled_requests_by_request_group_id() - which is a dictionary formatted as follows: - {rg_id1: {request1: request1_data, request2: request2_data}, - rg_id2: ...} - rr_scheduled_requests_by_rg_id (dict): The same format of results but for - rapid response scheduler results. - - Returns: - combined_scheduled_requests_by_rg_id (dict): Merged dictionaries with duplicate - keys being excluded (OR). - """ - return normal_scheduled_requests_by_rg_id | rr_scheduled_requests_by_rg_id - - -def total_scheduled_count(scheduled_requests_by_rg_id): - """Counts the number of scheduled requests.""" + +def count_scheduled(schedule): counter = 0 - for request_group in scheduled_requests_by_rg_id.values(): - for request in request_group.values(): - if request.scheduled: + for reservations in schedule.values(): + for reservation in reservations: + if reservation.scheduled: counter += 1 return counter - -def total_unscheduled_count(scheduled_requests_by_rg_id): - """Counts the number of unscheduled requests.""" +def count_unscheduled(schedule): counter = 0 - for request_group in scheduled_requests_by_rg_id.values(): - for request in request_group.values(): - if not request.scheduled: + for reservations in schedule.values(): + for reservation in reservations: + if not reservation.scheduled: counter += 1 return counter +def percent_reservations_scheduled(schedule): + total = count_scheduled(schedule) + count_unscheduled(schedule) + return percent_of(count_scheduled(schedule), total) -def total_available_time(normal_scheduler_result, rr_scheduler_result, scheduler, horizon_days): + +def total_scheduled_seconds(schedule): + total_scheduled_seconds = 0 + for reservations in schedule.values(): + for reservation in reservations: + total_scheduled_seconds += reservation.duration + return total_scheduled_seconds + +def total_available_seconds(normal_scheduler_result, rr_scheduler_result, scheduler, horizon_days): """Aggregates the total available time, calculated from dark intervals. Args: @@ -331,4 +297,4 @@ def midpoint_airmass_vs_priority(observation_portal_interface, schedule, semeste return midpoint_airmass_vs_priority - \ No newline at end of file + From 8495dc4892c2e46de58803123e4567225ec1dd93 Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Thu, 7 Jul 2022 14:18:00 -0700 Subject: [PATCH 027/165] merge conflict fix --- adaptive_scheduler/simulation/metrics.py | 2 +- tests/test_simulator_metrics.py | 78 ++++++++++++++++++++++-- 2 files changed, 74 insertions(+), 6 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index 3f57db21..2e90b987 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -288,7 +288,7 @@ def get_midpoint_airmasses_from_request(observation_portal_interface, request_id for site, details in airmass_data.items(): times, airmasses = list(details.values())[0], list(details.values())[1] index = 0 - time_diff = midpoint_time -datetime.strptime(times[0],'%Y-%m-%dT%H:%M') + time_diff = midpoint_time - datetime.strptime(times[0],'%Y-%m-%dT%H:%M') for i in range(len(times)): temp_time_diff = midpoint_time - datetime.strptime(times[i],'%Y-%m-%dT%H:%M') if temp_time_diff < time_diff: diff --git a/tests/test_simulator_metrics.py b/tests/test_simulator_metrics.py index e64abcff..938a0501 100644 --- a/tests/test_simulator_metrics.py +++ b/tests/test_simulator_metrics.py @@ -1,14 +1,16 @@ from adaptive_scheduler.simulation.metrics import (MetricCalculator, fill_bin_with_reservation_data, percent_reservations_scheduled, total_scheduled_seconds, - total_available_seconds,) + total_available_seconds, + get_midpoint_airmasses_from_request, + get_airmass_data_from_observation_portal, + get_midpoint_airmass_for_each_reservation) from adaptive_scheduler.scheduler import Scheduler, SchedulerRunner, SchedulerResult from adaptive_scheduler.models import DataContainer -from mock import Mock +from mock import Mock, patch from datetime import datetime, timedelta - import pytest @@ -35,7 +37,6 @@ def test_total_scheduled_seconds(self): res3 = Mock(duration=30) fake_schedule = {'bpl': [res1, res2], 'coj': [res3]} - assert total_scheduled_seconds(fake_schedule) == 60 def test_total_available_seconds(self): seconds_in_day = 86400 @@ -96,9 +97,76 @@ def test_fill_bin_with_reservation_data(self): for i, item in enumerate(data): assert expected[bin_name][i].__dict__ == item.__dict__ + + + @patch('get_airmass_data_from_observation_portal') def test_airmass_functions(self): # test with fake airmass data in the same format as returned by Observation Portal # PLACEHOLDER: some test ideal airmass # PLACEHOLDER: some test midpoint airmass # PLACEHOLDER: some tests with airmass averaging functions - pass + # site = 'tfn' + # airmasses = Mock() + # airmasses['airmass_data'] = Mock() + # airmasses['airmass_data'][site] = Mock() + airmasses = { + "airmass_data": { + "tfn": { + "times": [ + "2022-07-06T00:11", + "2022-07-06T00:21", + "2022-07-06T00:31", + "2022-07-06T00:41", + "2022-07-06T00:51", + "2022-07-06T01:01", + "2022-07-06T01:11", + "2022-07-06T01:21", + "2022-07-06T01:31", + "2022-07-06T01:41", + "2022-07-06T01:51", + "2022-07-06T02:01", + "2022-07-06T02:11", + "2022-07-06T02:21", + "2022-07-06T02:31", + "2022-07-06T02:41", + "2022-07-06T02:51", + "2022-07-06T03:01", + "2022-07-06T03:11", + "2022-07-06T03:21" + ], + "airmasses": [ + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20 + ] + } + }, + "airmass_limit": 10.1 + } + mock_reservation = Mock(scheduled_start=0) + scheduled_reservations = [mock_reservation] + + start = datetime.strptime("2022-07-06 00:30:00", '%Y-%m-%d %H:%M:%S') + end = start + timedelta(minutes=90) + observation_portal_interface = Mock() + request_id = Mock() + get_airmass_data_from_observation_portal.return_value = airmasses + assert get_midpoint_airmasses_from_request(observation_portal_interface ,request_id, start, end) == {'tfn':7} + From 58eb001e23ed324c3a071ffd2d4e59f6d58abc2e Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Thu, 7 Jul 2022 15:55:28 -0700 Subject: [PATCH 028/165] tested airmass metrics and debugged --- adaptive_scheduler/simulation/metrics.py | 5 ++- tests/test_simulator_metrics.py | 48 ++++++++++++++---------- 2 files changed, 31 insertions(+), 22 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index ebfb0ba6..b6e9dc6e 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -254,9 +254,10 @@ def get_midpoint_airmasses_from_request(observation_portal_interface, request_id for site, details in airmass_data.items(): times, airmasses = list(details.values())[0], list(details.values())[1] index = 0 - time_diff = midpoint_time - datetime.strptime(times[0],'%Y-%m-%dT%H:%M') + time_diff = abs((midpoint_time - datetime.strptime(times[0],'%Y-%m-%dT%H:%M')).total_seconds()) + for i in range(len(times)): - temp_time_diff = midpoint_time - datetime.strptime(times[i],'%Y-%m-%dT%H:%M') + temp_time_diff = abs((midpoint_time - datetime.strptime(times[i],'%Y-%m-%dT%H:%M')).total_seconds()) if temp_time_diff < time_diff: time_diff = temp_time_diff index = i diff --git a/tests/test_simulator_metrics.py b/tests/test_simulator_metrics.py index 938a0501..02de6ff0 100644 --- a/tests/test_simulator_metrics.py +++ b/tests/test_simulator_metrics.py @@ -1,4 +1,7 @@ -from adaptive_scheduler.simulation.metrics import (MetricCalculator, fill_bin_with_reservation_data, +import re + +from numpy import average +from adaptive_scheduler.simulation.metrics import (MetricCalculator, avg_ideal_airmass, fill_bin_with_reservation_data, get_ideal_airmass_for_request, percent_reservations_scheduled, total_scheduled_seconds, total_available_seconds, @@ -10,7 +13,7 @@ from mock import Mock, patch -from datetime import datetime, timedelta +from datetime import date, datetime, timedelta import pytest @@ -99,16 +102,7 @@ def test_fill_bin_with_reservation_data(self): - @patch('get_airmass_data_from_observation_portal') def test_airmass_functions(self): - # test with fake airmass data in the same format as returned by Observation Portal - # PLACEHOLDER: some test ideal airmass - # PLACEHOLDER: some test midpoint airmass - # PLACEHOLDER: some tests with airmass averaging functions - # site = 'tfn' - # airmasses = Mock() - # airmasses['airmass_data'] = Mock() - # airmasses['airmass_data'][site] = Mock() airmasses = { "airmass_data": { "tfn": { @@ -160,13 +154,27 @@ def test_airmass_functions(self): }, "airmass_limit": 10.1 } - mock_reservation = Mock(scheduled_start=0) - scheduled_reservations = [mock_reservation] - - start = datetime.strptime("2022-07-06 00:30:00", '%Y-%m-%d %H:%M:%S') - end = start + timedelta(minutes=90) - observation_portal_interface = Mock() - request_id = Mock() - get_airmass_data_from_observation_portal.return_value = airmasses - assert get_midpoint_airmasses_from_request(observation_portal_interface ,request_id, start, end) == {'tfn':7} + with patch('adaptive_scheduler.simulation.metrics.get_airmass_data_from_observation_portal', return_value=airmasses): + request_id = Mock() + request = Mock(id = request_id) + request_group = Mock(requests = [request]) + mock_reservation = Mock(scheduled_start=0, scheduled_resource ='1m0a.doma.tfn', request_group = request_group, + duration =5400 ) + scheduled_reservations = [mock_reservation] + schedule = {'reservations': scheduled_reservations} + + start = datetime.strptime("2022-07-06T00:30", '%Y-%m-%dT%H:%M') + end = start + timedelta(minutes=90) + observation_portal_interface = Mock() + semester_start = start + + assert get_midpoint_airmasses_from_request(observation_portal_interface ,request_id, start, end) == {'tfn':7} + assert get_ideal_airmass_for_request(observation_portal_interface, request_id) == 1 + + with patch('adaptive_scheduler.utils.normalised_epoch_to_datetime', return_value= start): + with patch('adaptive_scheduler.utils.datetime_to_epoch', autospec=True, return_value=Mock()): + assert avg_ideal_airmass(observation_portal_interface, schedule) == 1 + assert get_midpoint_airmass_for_each_reservation(observation_portal_interface, schedule, semester_start) == [7] + + From c190f0bd9b1e385aec25f2297038cf3abf4ae6a8 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Thu, 7 Jul 2022 23:50:54 +0000 Subject: [PATCH 029/165] migrate airmass data to a json file --- tests/airmass_data.json | 51 +++++++++++++++++++ tests/test_simulator_metrics.py | 87 +++++++-------------------------- 2 files changed, 70 insertions(+), 68 deletions(-) create mode 100644 tests/airmass_data.json diff --git a/tests/airmass_data.json b/tests/airmass_data.json new file mode 100644 index 00000000..d8d02601 --- /dev/null +++ b/tests/airmass_data.json @@ -0,0 +1,51 @@ +{ + "airmass_data": { + "tfn": { + "times": [ + "2022-07-06T00:11", + "2022-07-06T00:21", + "2022-07-06T00:31", + "2022-07-06T00:41", + "2022-07-06T00:51", + "2022-07-06T01:01", + "2022-07-06T01:11", + "2022-07-06T01:21", + "2022-07-06T01:31", + "2022-07-06T01:41", + "2022-07-06T01:51", + "2022-07-06T02:01", + "2022-07-06T02:11", + "2022-07-06T02:21", + "2022-07-06T02:31", + "2022-07-06T02:41", + "2022-07-06T02:51", + "2022-07-06T03:01", + "2022-07-06T03:11", + "2022-07-06T03:21" + ], + "airmasses": [ + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20 + ] + } + }, + "airmass_limit": 10.1 +} diff --git a/tests/test_simulator_metrics.py b/tests/test_simulator_metrics.py index 02de6ff0..3c81e0ca 100644 --- a/tests/test_simulator_metrics.py +++ b/tests/test_simulator_metrics.py @@ -1,16 +1,14 @@ -import re - -from numpy import average -from adaptive_scheduler.simulation.metrics import (MetricCalculator, avg_ideal_airmass, fill_bin_with_reservation_data, get_ideal_airmass_for_request, +from adaptive_scheduler.simulation.metrics import (fill_bin_with_reservation_data, percent_reservations_scheduled, total_scheduled_seconds, total_available_seconds, - get_midpoint_airmasses_from_request, - get_airmass_data_from_observation_portal, - get_midpoint_airmass_for_each_reservation) -from adaptive_scheduler.scheduler import Scheduler, SchedulerRunner, SchedulerResult + get_midpoint_airmasses_from_request, + get_midpoint_airmass_for_each_reservation, + get_ideal_airmass_for_request, + avg_ideal_airmass) from adaptive_scheduler.models import DataContainer +import json from mock import Mock, patch from datetime import date, datetime, timedelta @@ -27,12 +25,12 @@ def test_percent_scheduled(self): half_scheduled = {'bpl': [scheduled_reservation, unscheduled_reservation]} none_scheduled = {'bpl': [unscheduled_reservation]} multiple_sites = {'bpl': [scheduled_reservation, unscheduled_reservation], - 'coj': [unscheduled_reservation, scheduled_reservation]} + 'coj': [scheduled_reservation, scheduled_reservation]} assert percent_reservations_scheduled(all_scheduled) == 100. assert percent_reservations_scheduled(half_scheduled) == 50. assert percent_reservations_scheduled(none_scheduled) == 0. - assert percent_reservations_scheduled(multiple_sites) == 50. + assert percent_reservations_scheduled(multiple_sites) == 75. def test_total_scheduled_seconds(self): res1 = Mock(duration=10) @@ -103,64 +101,17 @@ def test_fill_bin_with_reservation_data(self): def test_airmass_functions(self): - airmasses = { - "airmass_data": { - "tfn": { - "times": [ - "2022-07-06T00:11", - "2022-07-06T00:21", - "2022-07-06T00:31", - "2022-07-06T00:41", - "2022-07-06T00:51", - "2022-07-06T01:01", - "2022-07-06T01:11", - "2022-07-06T01:21", - "2022-07-06T01:31", - "2022-07-06T01:41", - "2022-07-06T01:51", - "2022-07-06T02:01", - "2022-07-06T02:11", - "2022-07-06T02:21", - "2022-07-06T02:31", - "2022-07-06T02:41", - "2022-07-06T02:51", - "2022-07-06T03:01", - "2022-07-06T03:11", - "2022-07-06T03:21" - ], - "airmasses": [ - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 20 - ] - } - }, - "airmass_limit": 10.1 - } + with open('tests/airmass_data.json') as f: + airmass_data = json.load(f) - with patch('adaptive_scheduler.simulation.metrics.get_airmass_data_from_observation_portal', return_value=airmasses): + with patch('adaptive_scheduler.simulation.metrics.get_airmass_data_from_observation_portal', return_value=airmass_data): request_id = Mock() - request = Mock(id = request_id) - request_group = Mock(requests = [request]) - mock_reservation = Mock(scheduled_start=0, scheduled_resource ='1m0a.doma.tfn', request_group = request_group, - duration =5400 ) + request = Mock(id=request_id) + request_group = Mock(requests=[request]) + mock_reservation = Mock(scheduled_start=0, + scheduled_resource='1m0a.doma.tfn', + request_group=request_group, + duration=5400) scheduled_reservations = [mock_reservation] schedule = {'reservations': scheduled_reservations} @@ -169,10 +120,10 @@ def test_airmass_functions(self): observation_portal_interface = Mock() semester_start = start - assert get_midpoint_airmasses_from_request(observation_portal_interface ,request_id, start, end) == {'tfn':7} + assert get_midpoint_airmasses_from_request(observation_portal_interface, request_id, start, end) == {'tfn': 7} assert get_ideal_airmass_for_request(observation_portal_interface, request_id) == 1 - with patch('adaptive_scheduler.utils.normalised_epoch_to_datetime', return_value= start): + with patch('adaptive_scheduler.utils.normalised_epoch_to_datetime', return_value=start): with patch('adaptive_scheduler.utils.datetime_to_epoch', autospec=True, return_value=Mock()): assert avg_ideal_airmass(observation_portal_interface, schedule) == 1 assert get_midpoint_airmass_for_each_reservation(observation_portal_interface, schedule, semester_start) == [7] From 1563bf19aceed738d110d74568243c26afe53171 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Thu, 7 Jul 2022 23:52:11 +0000 Subject: [PATCH 030/165] ignore rapid response if there are no rr scheduler results --- adaptive_scheduler/simulation/metrics.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index b6e9dc6e..3c18cd23 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -40,15 +40,17 @@ class MetricCalculator(): scheduler_runner (SchedulerRunner): The instance of the scheduler runner used by the simulator. """ def __init__(self, normal_scheduler_result, rr_scheduler_result, scheduler, scheduler_runner): - self.normal_scheduler_result = normal_scheduler_result - # THERE IS NOT ALWAYS A RR RESULT - ADD CHECKS FOR THIS - self.rr_scheduler_result = rr_scheduler_result self.scheduler = scheduler self.scheduler_runner = scheduler_runner + self.normal_scheduler_result = normal_scheduler_result self.normal_schedule = self.normal_scheduler_result.schedule - self.rr_schedule = self.rr_scheduler_result.schedule - self.combined_schedule = self._combine_normal_rr_schedules() + if rr_scheduler_result: + self.rr_scheduler_result = rr_scheduler_result + self.rr_schedule = self.rr_scheduler_result.schedule + self.combined_schedule = self._combine_normal_rr_schedules() + else: + self.combined_schedule = self.normal_schedule def _combine_normal_rr_schedules(self): self.combined_schedule = self.normal_schedule.copy() @@ -99,7 +101,7 @@ def total_available_seconds(normal_scheduler_result, rr_scheduler_result, schedu """ total_available_time = 0 normal_resources = normal_scheduler_result.resources_scheduled() - rr_resources = rr_scheduler_result.resources_scheduled() + rr_resources = rr_scheduler_result.resources_scheduled() if rr_scheduler_result else [] scheduled_resources = list(set(normal_resources + rr_resources)) start_time = scheduler.estimated_scheduler_end end_time = start_time + dt.timedelta(days=horizon_days) From 9a2b3f0f049abe7db925e785bda21fbf5b65b329 Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Thu, 7 Jul 2022 16:54:47 -0700 Subject: [PATCH 031/165] migrate --- tests/test_simulator_metrics.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_simulator_metrics.py b/tests/test_simulator_metrics.py index 3c81e0ca..ff21eca2 100644 --- a/tests/test_simulator_metrics.py +++ b/tests/test_simulator_metrics.py @@ -99,7 +99,6 @@ def test_fill_bin_with_reservation_data(self): assert expected[bin_name][i].__dict__ == item.__dict__ - def test_airmass_functions(self): with open('tests/airmass_data.json') as f: airmass_data = json.load(f) From a83fbcec6b3dea49c9e1ebd1c5ad3ba013b7383e Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Fri, 8 Jul 2022 10:01:25 -0700 Subject: [PATCH 032/165] fixed reservation.request --- tests/test_simulator_metrics.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/test_simulator_metrics.py b/tests/test_simulator_metrics.py index ff21eca2..12b845df 100644 --- a/tests/test_simulator_metrics.py +++ b/tests/test_simulator_metrics.py @@ -103,13 +103,13 @@ def test_airmass_functions(self): with open('tests/airmass_data.json') as f: airmass_data = json.load(f) - with patch('adaptive_scheduler.simulation.metrics.get_airmass_data_from_observation_portal', return_value=airmass_data): + with patch('adaptive_scheduler.simulation.metrics.get_airmass_data_from_observation_portal', + return_value=airmass_data): request_id = Mock() request = Mock(id=request_id) - request_group = Mock(requests=[request]) mock_reservation = Mock(scheduled_start=0, scheduled_resource='1m0a.doma.tfn', - request_group=request_group, + request = request, duration=5400) scheduled_reservations = [mock_reservation] schedule = {'reservations': scheduled_reservations} @@ -119,12 +119,12 @@ def test_airmass_functions(self): observation_portal_interface = Mock() semester_start = start - assert get_midpoint_airmasses_from_request(observation_portal_interface, request_id, start, end) == {'tfn': 7} + assert get_midpoint_airmasses_from_request(observation_portal_interface, request_id, + start, end) == {'tfn': 7} assert get_ideal_airmass_for_request(observation_portal_interface, request_id) == 1 - - with patch('adaptive_scheduler.utils.normalised_epoch_to_datetime', return_value=start): - with patch('adaptive_scheduler.utils.datetime_to_epoch', autospec=True, return_value=Mock()): - assert avg_ideal_airmass(observation_portal_interface, schedule) == 1 - assert get_midpoint_airmass_for_each_reservation(observation_portal_interface, schedule, semester_start) == [7] - - + + assert avg_ideal_airmass(observation_portal_interface, schedule) == 1 + assert get_midpoint_airmass_for_each_reservation(observation_portal_interface, + schedule, semester_start) == [7] + + From d6fa06ceafa29b169c4537575e3e8437567c1bce Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Fri, 8 Jul 2022 10:01:35 -0700 Subject: [PATCH 033/165] fixed reservation.request --- tests/test_simulator_metrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_simulator_metrics.py b/tests/test_simulator_metrics.py index 12b845df..805d34db 100644 --- a/tests/test_simulator_metrics.py +++ b/tests/test_simulator_metrics.py @@ -109,7 +109,7 @@ def test_airmass_functions(self): request = Mock(id=request_id) mock_reservation = Mock(scheduled_start=0, scheduled_resource='1m0a.doma.tfn', - request = request, + request=request, duration=5400) scheduled_reservations = [mock_reservation] schedule = {'reservations': scheduled_reservations} From 2dd0d20bf855df86ce8ac45acc85311da7e7dc2a Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Fri, 8 Jul 2022 10:47:31 -0700 Subject: [PATCH 034/165] fixed reservation.request --- tests/test_simulator_metrics.py | 34 ++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/tests/test_simulator_metrics.py b/tests/test_simulator_metrics.py index 805d34db..8504dcd4 100644 --- a/tests/test_simulator_metrics.py +++ b/tests/test_simulator_metrics.py @@ -102,29 +102,29 @@ def test_fill_bin_with_reservation_data(self): def test_airmass_functions(self): with open('tests/airmass_data.json') as f: airmass_data = json.load(f) - + pretty_json = json.dumps(airmass_data, indent=4) + print(pretty_json) with patch('adaptive_scheduler.simulation.metrics.get_airmass_data_from_observation_portal', return_value=airmass_data): - request_id = Mock() - request = Mock(id=request_id) - mock_reservation = Mock(scheduled_start=0, - scheduled_resource='1m0a.doma.tfn', - request=request, - duration=5400) - scheduled_reservations = [mock_reservation] + request_id_1 = Mock() + request_1 = Mock(id=request_id_1) + mock_reservation_1 = Mock(scheduled_start=0, scheduled_resource='1m0a.doma.tfn', + request=request_1, duration=5400) + request_id_2 = Mock() + request_2 = Mock(id=request_id_2) + mock_reservation_2 = Mock(scheduled_start=0, scheduled_resource='1m0a.doma.egg', + request=request_2, duration=5400) + scheduled_reservations = [mock_reservation_1, mock_reservation_2] schedule = {'reservations': scheduled_reservations} start = datetime.strptime("2022-07-06T00:30", '%Y-%m-%dT%H:%M') end = start + timedelta(minutes=90) observation_portal_interface = Mock() semester_start = start - - assert get_midpoint_airmasses_from_request(observation_portal_interface, request_id, - start, end) == {'tfn': 7} - assert get_ideal_airmass_for_request(observation_portal_interface, request_id) == 1 - + + assert get_midpoint_airmasses_from_request(observation_portal_interface, request_id_1, + start, end) == {'tfn': 7, 'egg': 3} + assert get_ideal_airmass_for_request(observation_portal_interface, request_id_2) == 1 assert avg_ideal_airmass(observation_portal_interface, schedule) == 1 - assert get_midpoint_airmass_for_each_reservation(observation_portal_interface, - schedule, semester_start) == [7] - - + assert get_midpoint_airmass_for_each_reservation(observation_portal_interface, + schedule, semester_start) == [7, 3] From e84b796e144937ab9a39ce7132eb732f81d6d504 Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Fri, 8 Jul 2022 11:28:02 -0700 Subject: [PATCH 035/165] some style fixes --- adaptive_scheduler/simulation/metrics.py | 93 ++++++++++++------------ tests/airmass_data.json | 24 +++++- 2 files changed, 68 insertions(+), 49 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index 3c18cd23..b06540b4 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -1,30 +1,27 @@ """ Metric calculation functions for the scheduler simulator. """ -import logging import datetime as dt from datetime import datetime import requests -import numpy as np from requests.exceptions import RequestException, Timeout - +from rise_set.astrometry import calculate_airmass_at_times from adaptive_scheduler.observation_portal_connections import ObservationPortalConnectionError -from adaptive_scheduler.utils import time_in_capped_intervals, normalised_epoch_to_datetime, datetime_to_epoch, merge_dicts +from adaptive_scheduler.utils import time_in_capped_intervals, normalised_epoch_to_datetime, datetime_to_epoch from adaptive_scheduler.models import DataContainer -from rise_set.astrometry import calculate_airmass_at_times def percent_of(x, y): """Returns x/y as a percentage (float).""" - return x/y*100. + return x / y * 100. def percent_diff(x, y): """Returns the percent difference between x and y as a float.""" if x == y == 0: return 0 - mean = (abs(x)+abs(y))/2 - return abs(x-y)/mean*100. + mean = (abs(x) + abs(y)) / 2 + return abs(x - y) / mean * 100. class MetricCalculator(): @@ -48,7 +45,7 @@ def __init__(self, normal_scheduler_result, rr_scheduler_result, scheduler, sche if rr_scheduler_result: self.rr_scheduler_result = rr_scheduler_result self.rr_schedule = self.rr_scheduler_result.schedule - self.combined_schedule = self._combine_normal_rr_schedules() + self._combine_normal_rr_schedules() else: self.combined_schedule = self.normal_schedule @@ -58,7 +55,7 @@ def _combine_normal_rr_schedules(self): for reservation in reservations: self.combined_schedule[resource].append(reservation) - + def count_scheduled(schedule): counter = 0 for reservations in schedule.values(): @@ -75,6 +72,7 @@ def count_unscheduled(schedule): counter += 1 return counter + def percent_reservations_scheduled(schedule): total = count_scheduled(schedule) + count_unscheduled(schedule) return percent_of(count_scheduled(schedule), total) @@ -85,8 +83,9 @@ def total_scheduled_seconds(schedule): for reservations in schedule.values(): for reservation in reservations: total_scheduled_seconds += reservation.duration - return total_scheduled_seconds - + return total_scheduled_seconds + + def total_available_seconds(normal_scheduler_result, rr_scheduler_result, scheduler, horizon_days): """Aggregates the total available time, calculated from dark intervals. @@ -124,8 +123,7 @@ def reservation_data_populator(reservation): """ request_group = reservation.request_group proposal = request_group.proposal - requests = request_group.requests - + data = DataContainer( request_group_id=reservation.request_group.id, duration=reservation.duration, @@ -148,7 +146,7 @@ def fill_bin_with_reservation_data(data_dict, bin_name, reservation): bin_name (str): The name of the bin to create or populate. reservation (Reservation_v3): A Reservation object. """ - if not bin_name in data_dict: + if bin_name not in data_dict: data_dict[bin_name] = [] reservation_data = reservation_data_populator(reservation) data_dict[bin_name].append(reservation_data) @@ -177,7 +175,7 @@ def bin_scheduler_result_by_tac_priority(schedule): tac_priority, reservation) return scheduled_requests_by_tac_priority - + def get_airmass_data_from_observation_portal(observation_portal_interface, request_id): """Pulls airmass data from the Observation Portal. @@ -210,7 +208,7 @@ def get_ideal_airmass_for_request(observation_portal_interface, request_id): ideal_for_site = min(site['airmasses']) ideal_airmass = min(ideal_airmass, ideal_for_site) return ideal_airmass - + def avg_ideal_airmass(observation_portal_interface, schedule): """Calculates the average ideal airmass for scheduled observations.""" @@ -219,11 +217,11 @@ def avg_ideal_airmass(observation_portal_interface, schedule): for reservations in schedule.values(): for reservation in reservations: if reservation.scheduled: - for request in reservation.request_group.requests: - request_id = request.id - sum_ideal_airmass += get_ideal_airmass_for_request( - observation_portal_interface, request_id) - count += 1 + request = reservation.request + request_id = request.id + sum_ideal_airmass += get_ideal_airmass_for_request( + observation_portal_interface, request_id) + count += 1 return sum_ideal_airmass / count @@ -235,34 +233,35 @@ def calculate_midpoint_airmass(scheduled_requests_by_rg_id): if request.scheduled: start_time = request.start() end_time = request.end() - midpoint_time = [start_time + (end_time - start_time)/2] + midpoint_time = [start_time + (end_time - start_time) / 2] target = request.get_target() observation_sites = request.get_site() midpoint_airmass_each_request[request] = {} for site in observation_sites: - obs_latitude = site['latitdue'] - obs_longitude = site['longitude'] + obs_latitude = site['latitdue'] + obs_longitude = site['longitude'] obs_height = site['elevation'] - midpoint_airmass = calculate_airmass_at_times(midpoint_time, target, obs_latitude, obs_longitude, obs_height) + midpoint_airmass = calculate_airmass_at_times(midpoint_time, + target, obs_latitude, obs_longitude, obs_height) midpoint_airmass_each_request[request][site] = midpoint_airmass return midpoint_airmass_each_request - - + + def get_midpoint_airmasses_from_request(observation_portal_interface, request_id, start_time, end_time): midpoint_airmasses = {} - midpoint_time = start_time + (end_time - start_time)/2 + midpoint_time = start_time + (end_time - start_time) / 2 airmass_data = get_airmass_data_from_observation_portal( observation_portal_interface, request_id)['airmass_data'] for site, details in airmass_data.items(): times, airmasses = list(details.values())[0], list(details.values())[1] index = 0 - time_diff = abs((midpoint_time - datetime.strptime(times[0],'%Y-%m-%dT%H:%M')).total_seconds()) - + time_diff = abs((midpoint_time - datetime.strptime(times[0], '%Y-%m-%dT%H:%M')).total_seconds()) + for i in range(len(times)): - temp_time_diff = abs((midpoint_time - datetime.strptime(times[i],'%Y-%m-%dT%H:%M')).total_seconds()) + temp_time_diff = abs((midpoint_time - datetime.strptime(times[i], '%Y-%m-%dT%H:%M')).total_seconds()) if temp_time_diff < time_diff: time_diff = temp_time_diff - index = i + index = i midpoint_airmass = airmasses[index] midpoint_airmasses[site] = midpoint_airmass return midpoint_airmasses @@ -273,31 +272,29 @@ def get_midpoint_airmass_for_each_reservation(observation_portal_interface, sche for reservations in schedule.values(): for reservation in reservations: if reservation.scheduled: - for request in reservation.request_group.requests: - request_id = request.id - start_time = normalised_epoch_to_datetime(reservation.scheduled_start, datetime_to_epoch(semester_start)) - end_time = start_time + dt.timedelta(seconds = reservation.duration) - midpoint_airmasses = get_midpoint_airmasses_from_request( - observation_portal_interface, request_id, - start_time, end_time) - site = reservation.scheduled_resource[-3:] - midpoint_airmass = midpoint_airmasses[site] + request = reservation.request + request_id = request.id + start_time = normalised_epoch_to_datetime(reservation.scheduled_start, + datetime_to_epoch(semester_start)) + end_time = start_time + dt.timedelta(seconds=reservation.duration) + midpoint_airmasses = get_midpoint_airmasses_from_request(observation_portal_interface, + request_id, start_time, end_time) + site = reservation.scheduled_resource[-3:] + midpoint_airmass = midpoint_airmasses[site] midpoint_airmass_for_each_reservation.append(midpoint_airmass) return midpoint_airmass_for_each_reservation def midpoint_airmass_vs_priority(observation_portal_interface, schedule, semester_start): - midpoint_airmass_vs_priority={} - midpoint_airmass_for_each_reservation = get_midpoint_airmass_for_each_reservation(observation_portal_interface, schedule, semester_start) + midpoint_airmass_vs_priority = {} + midpoint_airmass_for_each_reservation = get_midpoint_airmass_for_each_reservation(observation_portal_interface, + schedule, semester_start) eff_priorities = [] for reservations in schedule.values(): for reservation in reservations: if reservation.scheduled: eff_priority = reservation.priority eff_priorities.append(eff_priority) - midpoint_airmass_vs_priority['midpoint_airmass']= midpoint_airmass_for_each_reservation + midpoint_airmass_vs_priority['midpoint_airmass'] = midpoint_airmass_for_each_reservation midpoint_airmass_vs_priority['eff_priorities'] = eff_priorities return midpoint_airmass_vs_priority - - - diff --git a/tests/airmass_data.json b/tests/airmass_data.json index d8d02601..e2fa3078 100644 --- a/tests/airmass_data.json +++ b/tests/airmass_data.json @@ -45,7 +45,29 @@ 19, 20 ] - } + }, + "egg": { + "times": [ + "2022-07-06T01:31", + "2022-07-06T01:41", + "2022-07-06T01:51", + "2022-07-06T02:01", + "2022-07-06T02:11", + "2022-07-06T02:21", + "2022-07-06T02:31", + "2022-07-06T02:41" + ], + "airmasses": [ + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ] + } }, "airmass_limit": 10.1 } From 22c9512f44d3bac72bae534f3b4877bf6c14f28a Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Fri, 8 Jul 2022 20:37:17 +0000 Subject: [PATCH 036/165] some refactoring into a class for metrics --- adaptive_scheduler/simulation/metrics.py | 181 ++++++++++++----------- tests/test_simulator_metrics.py | 117 +++++++++------ 2 files changed, 170 insertions(+), 128 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index 3c18cd23..6cff7b96 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -1,24 +1,27 @@ """ Metric calculation functions for the scheduler simulator. """ -import logging import datetime as dt from datetime import datetime +from collections import defaultdict import requests -import numpy as np from requests.exceptions import RequestException, Timeout from adaptive_scheduler.observation_portal_connections import ObservationPortalConnectionError -from adaptive_scheduler.utils import time_in_capped_intervals, normalised_epoch_to_datetime, datetime_to_epoch, merge_dicts +from adaptive_scheduler.utils import time_in_capped_intervals, normalised_epoch_to_datetime, datetime_to_epoch from adaptive_scheduler.models import DataContainer from rise_set.astrometry import calculate_airmass_at_times +DEFAULT_EFFECTIVE_HORIZON_DAYS = 5 + + def percent_of(x, y): """Returns x/y as a percentage (float).""" return x/y*100. + def percent_diff(x, y): """Returns the percent difference between x and y as a float.""" if x == y == 0: @@ -43,74 +46,91 @@ def __init__(self, normal_scheduler_result, rr_scheduler_result, scheduler, sche self.scheduler = scheduler self.scheduler_runner = scheduler_runner + if self.scheduler_runner.sched_params.metric_effective_horizon: + self.effective_horizon = self.scheduler_runner.sched_params.metric_effective_horizon + else: + self.effective_horizon = DEFAULT_EFFECTIVE_HORIZON_DAYS + self.normal_scheduler_result = normal_scheduler_result self.normal_schedule = self.normal_scheduler_result.schedule if rr_scheduler_result: self.rr_scheduler_result = rr_scheduler_result self.rr_schedule = self.rr_scheduler_result.schedule - self.combined_schedule = self._combine_normal_rr_schedules() + self._combine_normal_rr_schedules() + self._combine_resources_scheduled() else: self.combined_schedule = self.normal_schedule + self.combined_resources_scheduled = self.normal_scheduler_result.resources_scheduled() + + def _combine_resources_scheduled(self): + normal_resources = self.normal_scheduler_result.resources_scheduled() + rr_resources = self.rr_scheduler_result.resources_scheduled() + self.combined_resources_scheduled = list(set(normal_resources + rr_resources)) def _combine_normal_rr_schedules(self): - self.combined_schedule = self.normal_schedule.copy() + self.combined_schedule = defaultdict(list) for resource, reservations in self.rr_schedule.items(): for reservation in reservations: self.combined_schedule[resource].append(reservation) + for resource, reservations in self.normal_schedule.items(): + for reservation in reservations: + if reservation not in self.combined_schedule[resource]: + self.combined_schedule[resource].append(reservation) - -def count_scheduled(schedule): - counter = 0 - for reservations in schedule.values(): - for reservation in reservations: - if reservation.scheduled: - counter += 1 - return counter - -def count_unscheduled(schedule): - counter = 0 - for reservations in schedule.values(): - for reservation in reservations: - if not reservation.scheduled: - counter += 1 - return counter - -def percent_reservations_scheduled(schedule): - total = count_scheduled(schedule) + count_unscheduled(schedule) - return percent_of(count_scheduled(schedule), total) - - -def total_scheduled_seconds(schedule): - total_scheduled_seconds = 0 - for reservations in schedule.values(): - for reservation in reservations: - total_scheduled_seconds += reservation.duration - return total_scheduled_seconds - -def total_available_seconds(normal_scheduler_result, rr_scheduler_result, scheduler, horizon_days): - """Aggregates the total available time, calculated from dark intervals. - - Args: - normal_scheduler_result (SchedulerResult): The normal scheduler result. - rr_scheduler_result (SchedulerResult): The rapid response scheduler result. - scheduler (LCOGTNetworkScheduler): The scheduler object used by the scheduler runner. - horizon_days (float): The length of the horizon in days to calculate the metric. - - Returns: - total_available_time (float): The dark intervals capped by the horizon. - """ - total_available_time = 0 - normal_resources = normal_scheduler_result.resources_scheduled() - rr_resources = rr_scheduler_result.resources_scheduled() if rr_scheduler_result else [] - scheduled_resources = list(set(normal_resources + rr_resources)) - start_time = scheduler.estimated_scheduler_end - end_time = start_time + dt.timedelta(days=horizon_days) - for resource in scheduled_resources: - if resource in scheduler.visibility_cache: - dark_intervals = scheduler.visibility_cache[resource].dark_intervals - available_time = time_in_capped_intervals(dark_intervals, start_time, end_time) - total_available_time += available_time - return total_available_time + def count_scheduled(self, schedule=None): + schedule = self.combined_schedule if schedule is None else schedule + counter = 0 + for reservations in schedule.values(): + for reservation in reservations: + if reservation.scheduled: + counter += 1 + return counter + + def count_unscheduled(self, schedule=None): + schedule = self.combined_schedule if schedule is None else schedule + counter = 0 + for reservations in schedule.values(): + for reservation in reservations: + if not reservation.scheduled: + counter += 1 + return counter + + def percent_reservations_scheduled(self, schedule=None): + schedule = self.combined_schedule if schedule is None else schedule + total = self.count_scheduled(schedule) + self.count_unscheduled(schedule) + return percent_of(self.count_scheduled(schedule), total) + + def total_scheduled_seconds(self, schedule=None): + schedule = self.combined_schedule if schedule is None else schedule + total_scheduled_seconds = 0 + for reservations in schedule.values(): + for reservation in reservations: + total_scheduled_seconds += reservation.duration + return total_scheduled_seconds + + def total_available_seconds(self, scheduled_resources=None, horizon_days=None): + """Aggregates the total available time, calculated from dark intervals. + + Args: + scheduled_resources (list): The list of sites scheduled, if nothing is passed then use the + list generated when MetricCalculators is initialized. + horizon_days (float): The number of days to cap, basically an effective horizon. If nothing + is passed then use the value in sched_params. + + Returns: + total_available_time (float): The dark intervals capped by the horizon. + """ + scheduled_resources = self.combined_scheduled_resources if scheduled_resources is None else scheduled_resources + horizon_days = self.effective_horizon if horizon_days is None else horizon_days + total_available_time = 0 + start_time = self.scheduler.estimated_scheduler_end + end_time = start_time + dt.timedelta(days=horizon_days) + for resource in scheduled_resources: + if resource in self.scheduler.visibility_cache: + dark_intervals = self.scheduler.visibility_cache[resource].dark_intervals + available_time = time_in_capped_intervals(dark_intervals, start_time, end_time) + total_available_time += available_time + return total_available_time def reservation_data_populator(reservation): @@ -124,17 +144,16 @@ def reservation_data_populator(reservation): """ request_group = reservation.request_group proposal = request_group.proposal - requests = request_group.requests - + data = DataContainer( request_group_id=reservation.request_group.id, + request_id=reservation.request.id, duration=reservation.duration, scheduled_resource=reservation.scheduled_resource, scheduled=reservation.scheduled, scheduled_start=reservation.scheduled_start, ipp_value=reservation.request_group.ipp_value, tac_priority=proposal.tac_priority, - requests=reservation.request_group.requests, ) return data @@ -148,7 +167,7 @@ def fill_bin_with_reservation_data(data_dict, bin_name, reservation): bin_name (str): The name of the bin to create or populate. reservation (Reservation_v3): A Reservation object. """ - if not bin_name in data_dict: + if bin_name not in data_dict: data_dict[bin_name] = [] reservation_data = reservation_data_populator(reservation) data_dict[bin_name].append(reservation_data) @@ -177,7 +196,7 @@ def bin_scheduler_result_by_tac_priority(schedule): tac_priority, reservation) return scheduled_requests_by_tac_priority - + def get_airmass_data_from_observation_portal(observation_portal_interface, request_id): """Pulls airmass data from the Observation Portal. @@ -210,7 +229,7 @@ def get_ideal_airmass_for_request(observation_portal_interface, request_id): ideal_for_site = min(site['airmasses']) ideal_airmass = min(ideal_airmass, ideal_for_site) return ideal_airmass - + def avg_ideal_airmass(observation_portal_interface, schedule): """Calculates the average ideal airmass for scheduled observations.""" @@ -219,11 +238,10 @@ def avg_ideal_airmass(observation_portal_interface, schedule): for reservations in schedule.values(): for reservation in reservations: if reservation.scheduled: - for request in reservation.request_group.requests: - request_id = request.id - sum_ideal_airmass += get_ideal_airmass_for_request( - observation_portal_interface, request_id) - count += 1 + request_id = reservation.request.id + sum_ideal_airmass += get_ideal_airmass_for_request( + observation_portal_interface, request_id) + count += 1 return sum_ideal_airmass / count @@ -240,14 +258,14 @@ def calculate_midpoint_airmass(scheduled_requests_by_rg_id): observation_sites = request.get_site() midpoint_airmass_each_request[request] = {} for site in observation_sites: - obs_latitude = site['latitdue'] - obs_longitude = site['longitude'] + obs_latitude = site['latitdue'] + obs_longitude = site['longitude'] obs_height = site['elevation'] midpoint_airmass = calculate_airmass_at_times(midpoint_time, target, obs_latitude, obs_longitude, obs_height) midpoint_airmass_each_request[request][site] = midpoint_airmass return midpoint_airmass_each_request - - + + def get_midpoint_airmasses_from_request(observation_portal_interface, request_id, start_time, end_time): midpoint_airmasses = {} midpoint_time = start_time + (end_time - start_time)/2 @@ -256,13 +274,13 @@ def get_midpoint_airmasses_from_request(observation_portal_interface, request_id for site, details in airmass_data.items(): times, airmasses = list(details.values())[0], list(details.values())[1] index = 0 - time_diff = abs((midpoint_time - datetime.strptime(times[0],'%Y-%m-%dT%H:%M')).total_seconds()) - + time_diff = abs((midpoint_time - datetime.strptime(times[0], '%Y-%m-%dT%H:%M')).total_seconds()) + for i in range(len(times)): - temp_time_diff = abs((midpoint_time - datetime.strptime(times[i],'%Y-%m-%dT%H:%M')).total_seconds()) + temp_time_diff = abs((midpoint_time - datetime.strptime(times[i], '%Y-%m-%dT%H:%M')).total_seconds()) if temp_time_diff < time_diff: time_diff = temp_time_diff - index = i + index = i midpoint_airmass = airmasses[index] midpoint_airmasses[site] = midpoint_airmass return midpoint_airmasses @@ -276,7 +294,7 @@ def get_midpoint_airmass_for_each_reservation(observation_portal_interface, sche for request in reservation.request_group.requests: request_id = request.id start_time = normalised_epoch_to_datetime(reservation.scheduled_start, datetime_to_epoch(semester_start)) - end_time = start_time + dt.timedelta(seconds = reservation.duration) + end_time = start_time + dt.timedelta(seconds=reservation.duration) midpoint_airmasses = get_midpoint_airmasses_from_request( observation_portal_interface, request_id, start_time, end_time) @@ -287,7 +305,7 @@ def get_midpoint_airmass_for_each_reservation(observation_portal_interface, sche def midpoint_airmass_vs_priority(observation_portal_interface, schedule, semester_start): - midpoint_airmass_vs_priority={} + midpoint_airmass_vs_priority = {} midpoint_airmass_for_each_reservation = get_midpoint_airmass_for_each_reservation(observation_portal_interface, schedule, semester_start) eff_priorities = [] for reservations in schedule.values(): @@ -295,9 +313,6 @@ def midpoint_airmass_vs_priority(observation_portal_interface, schedule, semeste if reservation.scheduled: eff_priority = reservation.priority eff_priorities.append(eff_priority) - midpoint_airmass_vs_priority['midpoint_airmass']= midpoint_airmass_for_each_reservation + midpoint_airmass_vs_priority['midpoint_airmass'] = midpoint_airmass_for_each_reservation midpoint_airmass_vs_priority['eff_priorities'] = eff_priorities return midpoint_airmass_vs_priority - - - diff --git a/tests/test_simulator_metrics.py b/tests/test_simulator_metrics.py index 3c81e0ca..4070dd49 100644 --- a/tests/test_simulator_metrics.py +++ b/tests/test_simulator_metrics.py @@ -1,22 +1,57 @@ -from adaptive_scheduler.simulation.metrics import (fill_bin_with_reservation_data, - percent_reservations_scheduled, - total_scheduled_seconds, - total_available_seconds, +from adaptive_scheduler.simulation.metrics import (MetricCalculator, + fill_bin_with_reservation_data, get_midpoint_airmasses_from_request, get_midpoint_airmass_for_each_reservation, get_ideal_airmass_for_request, avg_ideal_airmass) from adaptive_scheduler.models import DataContainer +import os import json +from datetime import datetime, timedelta from mock import Mock, patch -from datetime import date, datetime, timedelta -import pytest - class TestMetrics(): + def setup(self): + self.scheduler_run_time = datetime.utcnow() + scheduler_result_attrs = {'resources_scheduled.return_value': ['bpl', 'coj']} + self.mock_scheduler_result = Mock(**scheduler_result_attrs) + self.mock_scheduler = Mock(estimated_scheduler_end=self.scheduler_run_time) + self.mock_scheduler_runner = Mock() + + res1 = Mock(duration=10) + res2 = Mock(duration=20) + res3 = Mock(duration=30) + fake_schedule = {'bpl': [res1, res2], 'coj': [res3]} + self.mock_scheduler_result.schedule = fake_schedule + + self.metrics = MetricCalculator(self.mock_scheduler_result, + self.mock_scheduler_result, + self.mock_scheduler, + self.mock_scheduler_runner) + + def test_combining_schedules(self): + scheduler_result_attrs = {'resources_scheduled.return_value': ['bpl', 'coj', 'ogg']} + fake_schedule1 = {'bpl': ['hi', 'there'], 'coj': ['person']} + fake_schedule2 = {'ogg': ['lco', 'rocks'], 'coj': ['woohoo!']} + mock_normal_scheduler_result = Mock(schedule=fake_schedule1, **scheduler_result_attrs) + mock_rr_scheduler_result = Mock(schedule=fake_schedule2, **scheduler_result_attrs) + + only_normal = MetricCalculator(mock_normal_scheduler_result, None, + self.mock_scheduler, self.mock_scheduler_runner) + both_schedules = MetricCalculator(mock_normal_scheduler_result, mock_rr_scheduler_result, + self.mock_scheduler, self.mock_scheduler_runner) + same_schedule = MetricCalculator(mock_normal_scheduler_result, mock_normal_scheduler_result, + self.mock_scheduler, self.mock_scheduler_runner) + + assert only_normal.combined_schedule == fake_schedule1 + assert both_schedules.combined_schedule == {'bpl': ['hi', 'there'], + 'coj': ['woohoo!', 'person'], + 'ogg': ['lco', 'rocks']} + assert same_schedule.combined_schedule == fake_schedule1 + def test_percent_scheduled(self): scheduled_reservation = Mock(scheduled=True) unscheduled_reservation = Mock(scheduled=False) @@ -27,35 +62,29 @@ def test_percent_scheduled(self): multiple_sites = {'bpl': [scheduled_reservation, unscheduled_reservation], 'coj': [scheduled_reservation, scheduled_reservation]} - assert percent_reservations_scheduled(all_scheduled) == 100. - assert percent_reservations_scheduled(half_scheduled) == 50. - assert percent_reservations_scheduled(none_scheduled) == 0. - assert percent_reservations_scheduled(multiple_sites) == 75. - - def test_total_scheduled_seconds(self): - res1 = Mock(duration=10) - res2 = Mock(duration=20) - res3 = Mock(duration=30) - fake_schedule = {'bpl': [res1, res2], 'coj': [res3]} - + assert self.metrics.percent_reservations_scheduled(all_scheduled) == 100. + assert self.metrics.percent_reservations_scheduled(half_scheduled) == 50. + assert self.metrics.percent_reservations_scheduled(none_scheduled) == 0. + assert self.metrics.percent_reservations_scheduled(multiple_sites) == 75. - def test_total_available_seconds(self): + def test_total_time_aggregators(self): seconds_in_day = 86400 - test_time = datetime.utcnow() - - scheduler_result_attrs = {'resources_scheduled.return_value': ['bpl', 'coj']} - mock_scheduler_result = Mock(**scheduler_result_attrs) - - mock_scheduler = Mock(estimated_scheduler_end=test_time) - mock_scheduler.visibility_cache = {'bpl': Mock(), 'coj': Mock()} - mock_scheduler.visibility_cache['bpl'].dark_intervals = [(test_time-timedelta(days=5), test_time-timedelta(days=4)), - (test_time, test_time+timedelta(days=1)), - (test_time+timedelta(days=2), test_time+timedelta(days=3))] - mock_scheduler.visibility_cache['coj'].dark_intervals = [(test_time, test_time+timedelta(days=2))] - - assert total_available_seconds(mock_scheduler_result, mock_scheduler_result, mock_scheduler, 0) == 0 - assert total_available_seconds(mock_scheduler_result, mock_scheduler_result, mock_scheduler, 1) == 2*seconds_in_day - assert total_available_seconds(mock_scheduler_result, mock_scheduler_result, mock_scheduler, 5) == 4*seconds_in_day + + self.mock_scheduler.visibility_cache = {'bpl': Mock(), 'coj': Mock()} + self.mock_scheduler.visibility_cache['bpl'].dark_intervals = [ + (self.scheduler_run_time-timedelta(days=5), self.scheduler_run_time-timedelta(days=4)), + (self.scheduler_run_time, self.scheduler_run_time+timedelta(days=1)), + (self.scheduler_run_time+timedelta(days=2), self.scheduler_run_time+timedelta(days=3)), + ] + self.mock_scheduler.visibility_cache['coj'].dark_intervals = [ + (self.scheduler_run_time, self.scheduler_run_time+timedelta(days=2))] + + assert self.metrics.total_scheduled_seconds(self.mock_scheduler_result.schedule) == 60 + assert self.metrics.total_available_seconds(['bpl', 'coj'], 0) == 0 + assert self.metrics.total_available_seconds(['bpl', 'coj'], 1) == 2*seconds_in_day + assert self.metrics.total_available_seconds(['bpl', 'coj'], 5) == 4*seconds_in_day + assert self.metrics.total_available_seconds(['bpl'], 1) == seconds_in_day + assert self.metrics.total_available_seconds([], 1) == 0 def test_fill_bin_with_reservation_data(self): data_dict = {} @@ -67,22 +96,22 @@ def test_fill_bin_with_reservation_data(self): scheduled_start=start_time, scheduled=True, ) - mock_reservation.request_group.requests = [] mock_reservation.request_group.ipp_value = 20 mock_reservation.request_group.proposal.tac_priority = 50 mock_reservation.request_group.id = 1 + mock_reservation.request.id = 2 expected_datacontainer = DataContainer( request_group_id=1, + request_id=2, duration=10, scheduled_resource='bpl', scheduled=True, scheduled_start=start_time, ipp_value=20, tac_priority=50, - requests=[], ) - + bin_data = { 'bin1': mock_reservation, 'bin2': mock_reservation, @@ -98,12 +127,12 @@ def test_fill_bin_with_reservation_data(self): for i, item in enumerate(data): assert expected[bin_name][i].__dict__ == item.__dict__ - - def test_airmass_functions(self): - with open('tests/airmass_data.json') as f: + dir_path = os.path.dirname(os.path.realpath(__file__)) + data_path = os.path.join(dir_path, 'airmass_data.json') + with open(data_path) as f: airmass_data = json.load(f) - + with patch('adaptive_scheduler.simulation.metrics.get_airmass_data_from_observation_portal', return_value=airmass_data): request_id = Mock() request = Mock(id=request_id) @@ -119,13 +148,11 @@ def test_airmass_functions(self): end = start + timedelta(minutes=90) observation_portal_interface = Mock() semester_start = start - + assert get_midpoint_airmasses_from_request(observation_portal_interface, request_id, start, end) == {'tfn': 7} assert get_ideal_airmass_for_request(observation_portal_interface, request_id) == 1 - + with patch('adaptive_scheduler.utils.normalised_epoch_to_datetime', return_value=start): with patch('adaptive_scheduler.utils.datetime_to_epoch', autospec=True, return_value=Mock()): assert avg_ideal_airmass(observation_portal_interface, schedule) == 1 assert get_midpoint_airmass_for_each_reservation(observation_portal_interface, schedule, semester_start) == [7] - - From 0d2f55c936cb442f9880e99768e03ec57058344c Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Fri, 8 Jul 2022 20:58:28 +0000 Subject: [PATCH 037/165] tests for refactored code --- tests/test_simulator_metrics.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_simulator_metrics.py b/tests/test_simulator_metrics.py index 31957133..a6cb55a5 100644 --- a/tests/test_simulator_metrics.py +++ b/tests/test_simulator_metrics.py @@ -158,4 +158,3 @@ def test_airmass_functions(self): assert avg_ideal_airmass(observation_portal_interface, schedule) == 1 assert get_midpoint_airmass_for_each_reservation(observation_portal_interface, schedule, semester_start) == [7, 3] - From 040ac35cc966a60a243e9e8c79171154c443a727 Mon Sep 17 00:00:00 2001 From: Jon Date: Fri, 8 Jul 2022 21:45:40 +0000 Subject: [PATCH 038/165] Add code to submit metrics to opensearch --- adaptive_scheduler/scheduler_input.py | 4 +- adaptive_scheduler/simulation/metrics.py | 2 +- adaptive_scheduler/simulation/orchestrator.py | 59 +++++++++++++------ 3 files changed, 44 insertions(+), 21 deletions(-) diff --git a/adaptive_scheduler/scheduler_input.py b/adaptive_scheduler/scheduler_input.py index d0a3b547..38383b76 100644 --- a/adaptive_scheduler/scheduler_input.py +++ b/adaptive_scheduler/scheduler_input.py @@ -49,7 +49,8 @@ def __init__(self, ignore_ipp=to_bool(os.getenv('IGNORE_IPP_VALUES', 'False')), avg_reservation_save_time_seconds=float(os.getenv('INITIAL_PER_RESERVATION_SAVE_TIME', 0.05)), normal_runtime_seconds=float(os.getenv('INITIAL_NORMAL_RUNTIME', 360.0)), - rr_runtime_seconds=float(os.getenv('INITIAL_RAPID_RESPONSE_RUNTIME', 120.0))): + rr_runtime_seconds=float(os.getenv('INITIAL_RAPID_RESPONSE_RUNTIME', 120.0)), + simulation_opensearch_index=os.getenv('SIMULATION_OPENSEARCH_INDEX', '')): self.dry_run = dry_run self.no_weather = no_weather self.no_singles = no_singles @@ -92,6 +93,7 @@ def __init__(self, self.opensearch_excluded_observatories = opensearch_excluded_observatories.split(',') else: self.opensearch_excluded_observatories = [] + self.simulation_opensearch_index = simulation_opensearch_index class SchedulingInputFactory(object): diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index c3c79841..ee3bbcca 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -208,7 +208,7 @@ def get_airmass_data_from_observation_portal(observation_portal_interface, reque Returns: airmass_data (dict): The airmass data returned from the API. """ - airmass_url = f'{observation_portal_interface.obs_portal_url}/api/requests/{request_id}/airmass' + airmass_url = f'{observation_portal_interface.obs_portal_url}/api/requests/{request_id}/airmass/' try: response = requests.get(airmass_url, headers=observation_portal_interface.headers, timeout=180) response.raise_for_status() diff --git a/adaptive_scheduler/simulation/orchestrator.py b/adaptive_scheduler/simulation/orchestrator.py index 6cf03de8..ce2abb13 100644 --- a/adaptive_scheduler/simulation/orchestrator.py +++ b/adaptive_scheduler/simulation/orchestrator.py @@ -10,9 +10,10 @@ import logging import sys import os -from datetime import datetime, timedelta +from urllib.parse import urljoin + +from datetime import timedelta -from opensearchpy import OpenSearch from lcogt_logging import LCOGTFormatter from dateutil.parser import parse @@ -72,10 +73,21 @@ def increment_input(current_time, time_step): log.info(f"Placeholder for incrementing input by {time_step} to time {current_time.isoformat}") pass -def send_to_opensearch(metrics): +def send_to_opensearch(os_url, os_index, metrics): # Send the json metrics to the opensearch index + if os_url and os_index: + doc_name = f"{metrics['simulation_id']}_{metrics['record_time']}" + try: + requests.post( + urljoin(os_url, f'{os_index}/_doc/{doc_name}'), json=metrics + ).raise_for_status() + except Exception as ex: + log.warning(f"Failed to save metrics to Opensearch at {os_url} in index {os_index}: {repr(ex)}") + + log.info(f"Successfully saved metrics for {metrics['simulation_id']}") + else: + log.warning("Not configured to save metrics in opensearch. Please set OPENSEARCH_URL and SIMULATION_OPENSEARCH_INDEX.") log.info(metrics) # send to output for now - pass def combine_schedules(normal_schedule, rr_schedule): @@ -94,35 +106,44 @@ def record_metrics(normal_scheduler_result, rr_scheduler_result, scheduler, sche sched_params = scheduler_runner.sched_params observation_portal_interface = scheduler_runner.network_interface.observation_portal_interface - normal_scheduled_requests_by_rg_id = normal_scheduler_result.get_scheduled_requests_by_request_group_id() - rr_scheduled_requests_by_rg_id = rr_scheduler_result.get_scheduled_requests_by_request_group_id() + # normal_scheduled_requests_by_rg_id = normal_scheduler_result.get_scheduled_requests_by_request_group_id() + # rr_scheduled_requests_by_rg_id = rr_scheduler_result.get_scheduled_requests_by_request_group_id() # Derive whatever metrics we want using the supplied scheduled requests and send them to opensearch here # maybe we should just pass in the scheduler result instead and get the normal and rr requests somewhere else # For aggregating across all requests, but not sure if this is the best method - combined_scheduled_requests_by_rg_id = combine_normal_and_rr_requests_by_rg_id( - normal_scheduled_requests_by_rg_id, rr_scheduled_requests_by_rg_id) + # combined_scheduled_requests_by_rg_id = combine_normal_and_rr_requests_by_rg_id( + # normal_scheduled_requests_by_rg_id, rr_scheduled_requests_by_rg_id) - combined_schedule = combine_schedules(normal_scheduler_result.schedule, rr_scheduler_result.schedule) + if not rr_scheduler_result: + combined_schedule = normal_scheduler_result.schedule + else: + combined_schedule = combine_schedules(normal_scheduler_result.schedule, rr_scheduler_result.schedule) metrics = { 'simulation_id': RUN_ID, + 'simulation_start_time': sched_params.simulate_now, + 'horizon_days': sched_params.horizon_days, + 'slicesize_seconds': sched_params.slicesize_seconds, + 'kernel': sched_params.kernel, + 'mip_gap': sched_params.mip_gap, + 'record_time': datetime.utcnow().isoformat(), # 'total_scheduled_time': total_scheduled_time(combined_scheduled_requests_by_rg_id), - 'total_scheduled_count': total_scheduled_count(combined_scheduled_requests_by_rg_id), + #'total_scheduled_count': total_scheduled_count(combined_scheduled_requests_by_rg_id), # 'percent_scheduled': percent_of_requests_scheduled(combined_scheduled_requests_by_rg_id), - 'total_available_time' : total_available_time(normal_scheduler_result, rr_scheduler_result, - scheduler, sched_params.metric_effective_horizon), - 'effective_priority_bins': bin_scheduler_result_by_eff_priority(combined_schedule), - 'tac_priority_bins': bin_scheduler_result_by_tac_priority(combined_schedule), + #'total_available_time' : total_available_time(normal_scheduler_result, rr_scheduler_result, + # scheduler, sched_params.metric_effective_horizon), + #'effective_priority_bins': bin_scheduler_result_by_eff_priority(combined_schedule), + #'tac_priority_bins': bin_scheduler_result_by_tac_priority(combined_schedule), 'avg_ideal_airmass': avg_ideal_airmass(observation_portal_interface, combined_schedule), - 'midpoint_airmasses': get_midpoint_airmass_for_each_reservation(observation_portal_interface, - combined_schedule, scheduler_runner.semester_details['start']), - 'midpoint_airmass_vs_priority':midpoint_airmass_vs_priority(observation_portal_interface, - combined_schedule, scheduler_runner.semester_details['start']) + #'midpoint_airmasses': get_midpoint_airmass_for_each_reservation(observation_portal_interface, + # combined_schedule, scheduler_runner.semester_details['start']), + #'midpoint_airmass_vs_priority':midpoint_airmass_vs_priority(observation_portal_interface, + # combined_schedule, scheduler_runner.semester_details['start']) } - send_to_opensearch(metrics) + send_to_opensearch(sched_params.opensearch_url, sched_params.simulation_opensearch_index, metrics) def main(argv=None): From 039d1f477970338fbe938b97869ae894d0831ef8 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Fri, 8 Jul 2022 23:14:47 +0000 Subject: [PATCH 039/165] updated orchestrator to work with current iteration of metrics --- adaptive_scheduler/simulation/metrics.py | 42 ++++++------- adaptive_scheduler/simulation/orchestrator.py | 60 ++++++------------- tests/test_simulator_metrics.py | 33 ++++++---- 3 files changed, 57 insertions(+), 78 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index c3c79841..1181a1ea 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -96,8 +96,9 @@ def count_unscheduled(self, schedule=None): def percent_reservations_scheduled(self, schedule=None): schedule = self.combined_schedule if schedule is None else schedule - total = self.count_scheduled(schedule) + self.count_unscheduled(schedule) - return percent_of(self.count_scheduled(schedule), total) + scheduled = self.count_scheduled(schedule) + total = scheduled + self.count_unscheduled(schedule) + return percent_of(scheduled, total) def total_scheduled_seconds(self, schedule=None): schedule = self.combined_schedule if schedule is None else schedule @@ -107,7 +108,7 @@ def total_scheduled_seconds(self, schedule=None): total_scheduled_seconds += reservation.duration return total_scheduled_seconds - def total_available_seconds(self, scheduled_resources=None, horizon_days=None): + def total_available_seconds(self, resources_scheduled=None, horizon_days=None): """Aggregates the total available time, calculated from dark intervals. Args: @@ -119,18 +120,25 @@ def total_available_seconds(self, scheduled_resources=None, horizon_days=None): Returns: total_available_time (float): The dark intervals capped by the horizon. """ - scheduled_resources = self.combined_scheduled_resources if scheduled_resources is None else scheduled_resources + resources_scheduled = self.combined_resources_scheduled if resources_scheduled is None else resources_scheduled horizon_days = self.effective_horizon if horizon_days is None else horizon_days total_available_time = 0 start_time = self.scheduler.estimated_scheduler_end end_time = start_time + dt.timedelta(days=horizon_days) - for resource in scheduled_resources: + for resource in resources_scheduled: if resource in self.scheduler.visibility_cache: dark_intervals = self.scheduler.visibility_cache[resource].dark_intervals available_time = time_in_capped_intervals(dark_intervals, start_time, end_time) total_available_time += available_time return total_available_time + def percent_time_utilization(self, schedule=None, resources_scheduled=None, horizon_days=None): + schedule = self.combined_schedule if schedule is None else schedule + resources_scheduled = self.combined_resources_scheduled if resources_scheduled is None else resources_scheduled + horizon_days = self.effective_horizon if horizon_days is None else horizon_days + return percent_of(self.total_scheduled_seconds(schedule), + self.total_available_seconds(resources_scheduled, horizon_days)) + def reservation_data_populator(reservation): """Creates a new data container containing parameters useful in calculating metrics. @@ -245,7 +253,6 @@ def avg_ideal_airmass(observation_portal_interface, schedule): def calculate_midpoint_airmass(scheduled_requests_by_rg_id): - # midpoint_airmass = 1.5 midpoint_airmass_each_request = {} for request_group in scheduled_requests_by_rg_id.values(): for request in request_group.values(): @@ -286,8 +293,10 @@ def get_midpoint_airmasses_from_request(observation_portal_interface, request_id return midpoint_airmasses -def get_midpoint_airmass_for_each_reservation(observation_portal_interface, schedule, semester_start): +def avg_midpoint_airmass(observation_portal_interface, schedule, semester_start): midpoint_airmass_for_each_reservation = [] + sum_midpoint_airmass = 0 + count = 0 for reservations in schedule.values(): for reservation in reservations: if reservation.scheduled: @@ -301,19 +310,6 @@ def get_midpoint_airmass_for_each_reservation(observation_portal_interface, sche site = reservation.scheduled_resource[-3:] midpoint_airmass = midpoint_airmasses[site] midpoint_airmass_for_each_reservation.append(midpoint_airmass) - return midpoint_airmass_for_each_reservation - - -def midpoint_airmass_vs_priority(observation_portal_interface, schedule, semester_start): - midpoint_airmass_vs_priority = {} - midpoint_airmass_for_each_reservation = get_midpoint_airmass_for_each_reservation(observation_portal_interface, - schedule, semester_start) - eff_priorities = [] - for reservations in schedule.values(): - for reservation in reservations: - if reservation.scheduled: - eff_priority = reservation.priority - eff_priorities.append(eff_priority) - midpoint_airmass_vs_priority['midpoint_airmass'] = midpoint_airmass_for_each_reservation - midpoint_airmass_vs_priority['eff_priorities'] = eff_priorities - return midpoint_airmass_vs_priority + sum_midpoint_airmass += midpoint_airmass + count += 1 + return sum_midpoint_airmass / count diff --git a/adaptive_scheduler/simulation/orchestrator.py b/adaptive_scheduler/simulation/orchestrator.py index 6cf03de8..b1da02fd 100644 --- a/adaptive_scheduler/simulation/orchestrator.py +++ b/adaptive_scheduler/simulation/orchestrator.py @@ -27,7 +27,10 @@ from adaptive_scheduler.scheduler_input import ( SchedulingInputFactory, SchedulingInputProvider, SchedulerParameters ) -from adaptive_scheduler.simulation.metrics import * +from adaptive_scheduler.simulation.metrics import (MetricCalculator, bin_scheduler_result_by_eff_priority, + bin_scheduler_result_by_tac_priority, avg_ideal_airmass, + avg_midpoint_airmass,) + log = logging.getLogger('adaptive_scheduler') @@ -61,6 +64,7 @@ def setup_input(current_time): log.info(f"Placeholder for setting up input for time {current_time.isoformat}") pass + def increment_input(current_time, time_step): # This will eventually call endpoints in configdb and the observation portal to increment the state of them forward # by the time step specified. Incrementing time forward is slightly different then the initial setup of a starting time. @@ -72,55 +76,27 @@ def increment_input(current_time, time_step): log.info(f"Placeholder for incrementing input by {time_step} to time {current_time.isoformat}") pass + def send_to_opensearch(metrics): # Send the json metrics to the opensearch index - log.info(metrics) # send to output for now - pass - - -def combine_schedules(normal_schedule, rr_schedule): - # For aggregating across all scheduled items - combined_schedule = normal_schedule.copy() - for resource, reservations in rr_schedule.items(): - for reservation in reservations: - combined_schedule[resource].append(reservation) - - return combined_schedule + log.info(metrics) # send to output for now def record_metrics(normal_scheduler_result, rr_scheduler_result, scheduler, scheduler_runner): log.info("Recording metrics for scheduler simulation run") - sched_params = scheduler_runner.sched_params + metrics = MetricCalculator(normal_scheduler_result, rr_scheduler_result, scheduler, scheduler_runner) observation_portal_interface = scheduler_runner.network_interface.observation_portal_interface - - normal_scheduled_requests_by_rg_id = normal_scheduler_result.get_scheduled_requests_by_request_group_id() - rr_scheduled_requests_by_rg_id = rr_scheduler_result.get_scheduled_requests_by_request_group_id() - - # Derive whatever metrics we want using the supplied scheduled requests and send them to opensearch here - - # maybe we should just pass in the scheduler result instead and get the normal and rr requests somewhere else - - # For aggregating across all requests, but not sure if this is the best method - combined_scheduled_requests_by_rg_id = combine_normal_and_rr_requests_by_rg_id( - normal_scheduled_requests_by_rg_id, rr_scheduled_requests_by_rg_id) - - combined_schedule = combine_schedules(normal_scheduler_result.schedule, rr_scheduler_result.schedule) + semester_start = scheduler_runner.semester_details['start'] metrics = { 'simulation_id': RUN_ID, - # 'total_scheduled_time': total_scheduled_time(combined_scheduled_requests_by_rg_id), - 'total_scheduled_count': total_scheduled_count(combined_scheduled_requests_by_rg_id), - # 'percent_scheduled': percent_of_requests_scheduled(combined_scheduled_requests_by_rg_id), - 'total_available_time' : total_available_time(normal_scheduler_result, rr_scheduler_result, - scheduler, sched_params.metric_effective_horizon), - 'effective_priority_bins': bin_scheduler_result_by_eff_priority(combined_schedule), - 'tac_priority_bins': bin_scheduler_result_by_tac_priority(combined_schedule), - 'avg_ideal_airmass': avg_ideal_airmass(observation_portal_interface, combined_schedule), - 'midpoint_airmasses': get_midpoint_airmass_for_each_reservation(observation_portal_interface, - combined_schedule, scheduler_runner.semester_details['start']), - 'midpoint_airmass_vs_priority':midpoint_airmass_vs_priority(observation_portal_interface, - combined_schedule, scheduler_runner.semester_details['start']) + 'total_scheduled_count': metrics.count_scheduled(), + 'total_scheduled_seconds': metrics.total_scheduled_seconds(), + 'total_available_seconds': metrics.total_available_seconds(), + 'percent_time_utilization': metrics.percent_time_utilization(), + 'avg_ideal_airmass': avg_ideal_airmass(observation_portal_interface, metrics.combined_schedule), + 'avg_midpoint_airmass': avg_midpoint_airmass(observation_portal_interface, metrics.combined_schedule, semester_start) } send_to_opensearch(metrics) @@ -146,7 +122,7 @@ def main(argv=None): configdb_interface) kernel_class = FullScheduler_ortoolkit network_model = configdb_interface.get_telescope_info() - + scheduler = LCOGTNetworkScheduler(kernel_class, sched_params, event_bus, network_model) input_provider = SchedulingInputProvider(sched_params, network_interface, network_model, is_rr_input=True) input_factory = SchedulingInputFactory(input_provider) @@ -167,8 +143,8 @@ def main(argv=None): scheduler_runner.run() # Output scheduled requests are available within the runner after it completes a run # These are used to seed a warm start solution for the next run in the normal scheduler, but can be used to generate metrics here - sched_params.metric_effective_horizon = 5 # days - + sched_params.metric_effective_horizon = 5 # days + record_metrics( scheduler_runner.normal_scheduler_result, scheduler_runner.rr_scheduler_result, diff --git a/tests/test_simulator_metrics.py b/tests/test_simulator_metrics.py index a6cb55a5..4e41b8f8 100644 --- a/tests/test_simulator_metrics.py +++ b/tests/test_simulator_metrics.py @@ -1,9 +1,9 @@ from adaptive_scheduler.simulation.metrics import (MetricCalculator, fill_bin_with_reservation_data, get_midpoint_airmasses_from_request, - get_midpoint_airmass_for_each_reservation, get_ideal_airmass_for_request, - avg_ideal_airmass) + avg_ideal_airmass, + avg_midpoint_airmass) from adaptive_scheduler.models import DataContainer import os @@ -20,6 +20,7 @@ def setup(self): self.mock_scheduler_result = Mock(**scheduler_result_attrs) self.mock_scheduler = Mock(estimated_scheduler_end=self.scheduler_run_time) self.mock_scheduler_runner = Mock() + self.mock_scheduler_runner.sched_params.metric_effective_horizon = 5 res1 = Mock(duration=10) res2 = Mock(duration=20) @@ -27,6 +28,15 @@ def setup(self): fake_schedule = {'bpl': [res1, res2], 'coj': [res3]} self.mock_scheduler_result.schedule = fake_schedule + self.mock_scheduler.visibility_cache = {'bpl': Mock(), 'coj': Mock()} + self.mock_scheduler.visibility_cache['bpl'].dark_intervals = [ + (self.scheduler_run_time-timedelta(days=5), self.scheduler_run_time-timedelta(days=4)), + (self.scheduler_run_time, self.scheduler_run_time+timedelta(days=1)), + (self.scheduler_run_time+timedelta(days=2), self.scheduler_run_time+timedelta(days=3)), + ] + self.mock_scheduler.visibility_cache['coj'].dark_intervals = [ + (self.scheduler_run_time, self.scheduler_run_time+timedelta(days=2))] + self.metrics = MetricCalculator(self.mock_scheduler_result, self.mock_scheduler_result, self.mock_scheduler, @@ -70,21 +80,19 @@ def test_percent_scheduled(self): def test_total_time_aggregators(self): seconds_in_day = 86400 - self.mock_scheduler.visibility_cache = {'bpl': Mock(), 'coj': Mock()} - self.mock_scheduler.visibility_cache['bpl'].dark_intervals = [ - (self.scheduler_run_time-timedelta(days=5), self.scheduler_run_time-timedelta(days=4)), - (self.scheduler_run_time, self.scheduler_run_time+timedelta(days=1)), - (self.scheduler_run_time+timedelta(days=2), self.scheduler_run_time+timedelta(days=3)), - ] - self.mock_scheduler.visibility_cache['coj'].dark_intervals = [ - (self.scheduler_run_time, self.scheduler_run_time+timedelta(days=2))] - assert self.metrics.total_scheduled_seconds(self.mock_scheduler_result.schedule) == 60 assert self.metrics.total_available_seconds(['bpl', 'coj'], 0) == 0 assert self.metrics.total_available_seconds(['bpl', 'coj'], 1) == 2*seconds_in_day assert self.metrics.total_available_seconds(['bpl', 'coj'], 5) == 4*seconds_in_day assert self.metrics.total_available_seconds(['bpl'], 1) == seconds_in_day assert self.metrics.total_available_seconds([], 1) == 0 + assert self.metrics.total_scheduled_seconds() == 60 + assert self.metrics.total_available_seconds() == 4*seconds_in_day + + def test_percent_time_utilization(self): + test_schedule = {'bpl': [Mock(duration=86400)]} + assert self.metrics.percent_time_utilization(test_schedule, ['bpl'], 1) == 100. + assert self.metrics.percent_time_utilization() == 60/(86400*4)*100 def test_fill_bin_with_reservation_data(self): data_dict = {} @@ -156,5 +164,4 @@ def test_airmass_functions(self): start, end) == {'tfn': 7, 'egg': 3} assert get_ideal_airmass_for_request(observation_portal_interface, request_id_2) == 1 assert avg_ideal_airmass(observation_portal_interface, schedule) == 1 - assert get_midpoint_airmass_for_each_reservation(observation_portal_interface, - schedule, semester_start) == [7, 3] + assert avg_midpoint_airmass(observation_portal_interface, schedule, semester_start) == 5 From 6e739e96526451f8c655a33a72eaec532db8e410 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Fri, 8 Jul 2022 23:22:44 +0000 Subject: [PATCH 040/165] merged too fast, missed some bugs --- adaptive_scheduler/simulation/orchestrator.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/adaptive_scheduler/simulation/orchestrator.py b/adaptive_scheduler/simulation/orchestrator.py index 4033b59a..999e62ba 100644 --- a/adaptive_scheduler/simulation/orchestrator.py +++ b/adaptive_scheduler/simulation/orchestrator.py @@ -12,7 +12,8 @@ import os from urllib.parse import urljoin -from datetime import timedelta +import requests +from datetime import datetime, timedelta from lcogt_logging import LCOGTFormatter from dateutil.parser import parse @@ -28,8 +29,8 @@ from adaptive_scheduler.scheduler_input import ( SchedulingInputFactory, SchedulingInputProvider, SchedulerParameters ) -from adaptive_scheduler.simulation.metrics import (MetricCalculator, bin_scheduler_result_by_eff_priority, - bin_scheduler_result_by_tac_priority, avg_ideal_airmass, +from adaptive_scheduler.simulation.metrics import (MetricCalculator, + avg_ideal_airmass, avg_midpoint_airmass,) @@ -100,8 +101,8 @@ def record_metrics(normal_scheduler_result, rr_scheduler_result, scheduler, sche metrics = MetricCalculator(normal_scheduler_result, rr_scheduler_result, scheduler, scheduler_runner) observation_portal_interface = scheduler_runner.network_interface.observation_portal_interface -<<<<<<< HEAD semester_start = scheduler_runner.semester_details['start'] + sched_params = scheduler_runner.sched_params metrics = { 'simulation_id': RUN_ID, From c29460e72c76f8043bfcfadf256da7334e1b7850 Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Fri, 8 Jul 2022 17:03:12 -0700 Subject: [PATCH 041/165] refactoring airmasses metrics into the metrics calculator class --- adaptive_scheduler/simulation/metrics.py | 204 +++++++----------- adaptive_scheduler/simulation/orchestrator.py | 8 +- tests/test_simulator_metrics.py | 65 +++--- 3 files changed, 116 insertions(+), 161 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index 1d867edb..fd749f1e 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -44,7 +44,7 @@ class MetricCalculator(): def __init__(self, normal_scheduler_result, rr_scheduler_result, scheduler, scheduler_runner): self.scheduler = scheduler self.scheduler_runner = scheduler_runner - + self.observation_portal_interface = self.scheduler_runner.network_interface.observation_portal_interface if self.scheduler_runner.sched_params.metric_effective_horizon: self.effective_horizon = self.scheduler_runner.sched_params.metric_effective_horizon else: @@ -138,6 +138,86 @@ def percent_time_utilization(self, schedule=None, resources_scheduled=None, hori horizon_days = self.effective_horizon if horizon_days is None else horizon_days return percent_of(self.total_scheduled_seconds(schedule), self.total_available_seconds(resources_scheduled, horizon_days)) + + def _get_airmass_data_from_observation_portal(self, request_id): + """Pulls airmass data from the Observation Portal. + + Args: + observation_portal_interface (ObservationPortalInterface): Instance of the Observation Portal + used by the scheduler. + request_id (str): The request id. + + Returns: + airmass_data (dict): The airmass data returned from the API. + """ + airmass_url = f'{self.observation_portal_interface.obs_portal_url}/api/requests/{request_id}/airmass/' + try: + response = requests.get(airmass_url, headers=self.observation_portal_interface.headers, timeout=180) + response.raise_for_status() + airmass_data = response.json() + except (RequestException, ValueError, Timeout) as e: + raise ObservationPortalConnectionError("get_airmass_data failed: {}".format(repr(e))) + + return airmass_data + + def _get_ideal_airmass_for_request(self, request_id): + """Finds the minimum airmass across all sites for the request.""" + ideal_airmass = 1000 + airmass_data = self._get_airmass_data_from_observation_portal(request_id) + for site in airmass_data['airmass_data'].values(): + ideal_for_site = min(site['airmasses']) + ideal_airmass = min(ideal_airmass, ideal_for_site) + return ideal_airmass + + def avg_ideal_airmass(self, schedule): + """Calculates the average ideal airmass for scheduled observations.""" + sum_ideal_airmass = 0 + count = 0 + for reservations in schedule.values(): + for reservation in reservations: + if reservation.scheduled: + request_id = reservation.request.id + sum_ideal_airmass += self._get_ideal_airmass_for_request(request_id) + count += 1 + return sum_ideal_airmass / count + + def _get_midpoint_airmasses_from_request(self, request_id, start_time, end_time): + midpoint_airmasses = {} + midpoint_time = start_time + (end_time - start_time) / 2 + airmass_data = self._get_airmass_data_from_observation_portal(request_id)['airmass_data'] + for site, details in airmass_data.items(): + times, airmasses = list(details.values())[0], list(details.values())[1] + index = 0 + time_diff = abs((midpoint_time - datetime.strptime(times[0], '%Y-%m-%dT%H:%M')).total_seconds()) + + for i,_ in enumerate(times): + temp_time_diff = abs((midpoint_time - datetime.strptime(times[i], '%Y-%m-%dT%H:%M')).total_seconds()) + if temp_time_diff < time_diff: + time_diff = temp_time_diff + index = i + midpoint_airmass = airmasses[index] + midpoint_airmasses[site] = midpoint_airmass + return midpoint_airmasses + + def avg_midpoint_airmass(self, schedule, semester_start): + midpoint_airmass_for_each_reservation = [] + sum_midpoint_airmass = 0 + count = 0 + for reservations in schedule.values(): + for reservation in reservations: + if reservation.scheduled: + request = reservation.request + request_id = request.id + start_time = normalised_epoch_to_datetime(reservation.scheduled_start, + datetime_to_epoch(semester_start)) + end_time = start_time + dt.timedelta(seconds=reservation.duration) + midpoint_airmasses = self._get_midpoint_airmasses_from_request(request_id, start_time, end_time) + site = reservation.scheduled_resource[-3:] + midpoint_airmass = midpoint_airmasses[site] + midpoint_airmass_for_each_reservation.append(midpoint_airmass) + sum_midpoint_airmass += midpoint_airmass + count += 1 + return sum_midpoint_airmass / count def reservation_data_populator(reservation): @@ -176,17 +256,7 @@ def fill_bin_with_reservation_data(data_dict, bin_name, reservation): """ if bin_name not in data_dict: data_dict[bin_name] = [] - reservation_data = reservation_data_populator(reservation) - data_dict[bin_name].append(reservation_data) - - -def bin_scheduler_result_by_eff_priority(schedule): - scheduled_requests_by_eff_priority = {} - for reservations in schedule.values(): - for reservation in reservations: - if reservation.scheduled: - eff_priority = str(reservation.priority) - fill_bin_with_reservation_data(scheduled_requests_by_eff_priority, + reservation_data = reservobservation_portal_interface, _reservation_data(scheduled_requests_by_eff_priority, eff_priority, reservation) return scheduled_requests_by_eff_priority @@ -203,113 +273,3 @@ def bin_scheduler_result_by_tac_priority(schedule): tac_priority, reservation) return scheduled_requests_by_tac_priority - - -def get_airmass_data_from_observation_portal(observation_portal_interface, request_id): - """Pulls airmass data from the Observation Portal. - - Args: - observation_portal_interface (ObservationPortalInterface): Instance of the Observation Portal - used by the scheduler. - request_id (str): The request id. - - Returns: - airmass_data (dict): The airmass data returned from the API. - """ - airmass_url = f'{observation_portal_interface.obs_portal_url}/api/requests/{request_id}/airmass/' - try: - response = requests.get(airmass_url, headers=observation_portal_interface.headers, timeout=180) - response.raise_for_status() - airmass_data = response.json() - except (RequestException, ValueError, Timeout) as e: - raise ObservationPortalConnectionError("get_airmass_data failed: {}".format(repr(e))) - - return airmass_data - - -def get_ideal_airmass_for_request(observation_portal_interface, request_id): - """Finds the minimum airmass across all sites for the request.""" - ideal_airmass = 1000 - airmass_data = get_airmass_data_from_observation_portal( - observation_portal_interface, request_id) - for site in airmass_data['airmass_data'].values(): - ideal_for_site = min(site['airmasses']) - ideal_airmass = min(ideal_airmass, ideal_for_site) - return ideal_airmass - - -def avg_ideal_airmass(observation_portal_interface, schedule): - """Calculates the average ideal airmass for scheduled observations.""" - sum_ideal_airmass = 0 - count = 0 - for reservations in schedule.values(): - for reservation in reservations: - if reservation.scheduled: - request_id = reservation.request.id - sum_ideal_airmass += get_ideal_airmass_for_request( - observation_portal_interface, request_id) - count += 1 - return sum_ideal_airmass / count - - -def calculate_midpoint_airmass(scheduled_requests_by_rg_id): - midpoint_airmass_each_request = {} - for request_group in scheduled_requests_by_rg_id.values(): - for request in request_group.values(): - if request.scheduled: - start_time = request.start() - end_time = request.end() - midpoint_time = [start_time + (end_time - start_time) / 2] - target = request.get_target() - observation_sites = request.get_site() - midpoint_airmass_each_request[request] = {} - for site in observation_sites: - obs_latitude = site['latitdue'] - obs_longitude = site['longitude'] - obs_height = site['elevation'] - midpoint_airmass = calculate_airmass_at_times(midpoint_time, - target, obs_latitude, obs_longitude, obs_height) - midpoint_airmass_each_request[request][site] = midpoint_airmass - return midpoint_airmass_each_request - - -def get_midpoint_airmasses_from_request(observation_portal_interface, request_id, start_time, end_time): - midpoint_airmasses = {} - midpoint_time = start_time + (end_time - start_time) / 2 - airmass_data = get_airmass_data_from_observation_portal( - observation_portal_interface, request_id)['airmass_data'] - for site, details in airmass_data.items(): - times, airmasses = list(details.values())[0], list(details.values())[1] - index = 0 - time_diff = abs((midpoint_time - datetime.strptime(times[0], '%Y-%m-%dT%H:%M')).total_seconds()) - - for i in range(len(times)): - temp_time_diff = abs((midpoint_time - datetime.strptime(times[i], '%Y-%m-%dT%H:%M')).total_seconds()) - if temp_time_diff < time_diff: - time_diff = temp_time_diff - index = i - midpoint_airmass = airmasses[index] - midpoint_airmasses[site] = midpoint_airmass - return midpoint_airmasses - - -def avg_midpoint_airmass(observation_portal_interface, schedule, semester_start): - midpoint_airmass_for_each_reservation = [] - sum_midpoint_airmass = 0 - count = 0 - for reservations in schedule.values(): - for reservation in reservations: - if reservation.scheduled: - request = reservation.request - request_id = request.id - start_time = normalised_epoch_to_datetime(reservation.scheduled_start, - datetime_to_epoch(semester_start)) - end_time = start_time + dt.timedelta(seconds=reservation.duration) - midpoint_airmasses = get_midpoint_airmasses_from_request(observation_portal_interface, - request_id, start_time, end_time) - site = reservation.scheduled_resource[-3:] - midpoint_airmass = midpoint_airmasses[site] - midpoint_airmass_for_each_reservation.append(midpoint_airmass) - sum_midpoint_airmass += midpoint_airmass - count += 1 - return sum_midpoint_airmass / count diff --git a/adaptive_scheduler/simulation/orchestrator.py b/adaptive_scheduler/simulation/orchestrator.py index 999e62ba..c69cf716 100644 --- a/adaptive_scheduler/simulation/orchestrator.py +++ b/adaptive_scheduler/simulation/orchestrator.py @@ -29,9 +29,7 @@ from adaptive_scheduler.scheduler_input import ( SchedulingInputFactory, SchedulingInputProvider, SchedulerParameters ) -from adaptive_scheduler.simulation.metrics import (MetricCalculator, - avg_ideal_airmass, - avg_midpoint_airmass,) +from adaptive_scheduler.simulation.metrics import MetricCalculator log = logging.getLogger('adaptive_scheduler') @@ -117,8 +115,8 @@ def record_metrics(normal_scheduler_result, rr_scheduler_result, scheduler, sche 'total_scheduled_seconds': metrics.total_scheduled_seconds(), 'total_available_seconds': metrics.total_available_seconds(), 'percent_time_utilization': metrics.percent_time_utilization(), - 'avg_ideal_airmass': avg_ideal_airmass(observation_portal_interface, metrics.combined_schedule), - 'avg_midpoint_airmass': avg_midpoint_airmass(observation_portal_interface, metrics.combined_schedule, semester_start), + 'avg_ideal_airmass': metrics.avg_ideal_airmass(metrics.combined_schedule), + 'avg_midpoint_airmass': metrics.avg_midpoint_airmass(metrics.combined_schedule, semester_start), } send_to_opensearch(sched_params.opensearch_url, sched_params.simulation_opensearch_index, metrics) diff --git a/tests/test_simulator_metrics.py b/tests/test_simulator_metrics.py index 4e41b8f8..37efaf89 100644 --- a/tests/test_simulator_metrics.py +++ b/tests/test_simulator_metrics.py @@ -1,9 +1,5 @@ from adaptive_scheduler.simulation.metrics import (MetricCalculator, - fill_bin_with_reservation_data, - get_midpoint_airmasses_from_request, - get_ideal_airmass_for_request, - avg_ideal_airmass, - avg_midpoint_airmass) + fill_bin_with_reservation_data) from adaptive_scheduler.models import DataContainer import os @@ -41,6 +37,12 @@ def setup(self): self.mock_scheduler_result, self.mock_scheduler, self.mock_scheduler_runner) + + dir_path = os.path.dirname(os.path.realpath(__file__)) + data_path = os.path.join(dir_path, 'airmass_data.json') + with open(data_path) as f: + airmass_data = json.load(f) + self.metrics._get_airmass_data_from_observation_portal = Mock(return_value=airmass_data) def test_combining_schedules(self): scheduler_result_attrs = {'resources_scheduled.return_value': ['bpl', 'coj', 'ogg']} @@ -136,32 +138,27 @@ def test_fill_bin_with_reservation_data(self): assert expected[bin_name][i].__dict__ == item.__dict__ def test_airmass_functions(self): - dir_path = os.path.dirname(os.path.realpath(__file__)) - data_path = os.path.join(dir_path, 'airmass_data.json') - with open(data_path) as f: - airmass_data = json.load(f) - - with patch('adaptive_scheduler.simulation.metrics.get_airmass_data_from_observation_portal', - return_value=airmass_data): - request_id_1 = Mock() - request_1 = Mock(id=request_id_1) - mock_reservation_1 = Mock(scheduled_start=0, scheduled_resource='1m0a.doma.tfn', - request=request_1, duration=5400) - request_id_2 = Mock() - request_2 = Mock(id=request_id_2) - mock_reservation_2 = Mock(scheduled_start=0, scheduled_resource='1m0a.doma.egg', - request=request_2, duration=5400) - scheduled_reservations = [mock_reservation_1, mock_reservation_2] - - schedule = {'reservations': scheduled_reservations} - - start = datetime.strptime("2022-07-06T00:30", '%Y-%m-%dT%H:%M') - end = start + timedelta(minutes=90) - observation_portal_interface = Mock() - semester_start = start - - assert get_midpoint_airmasses_from_request(observation_portal_interface, request_id_1, - start, end) == {'tfn': 7, 'egg': 3} - assert get_ideal_airmass_for_request(observation_portal_interface, request_id_2) == 1 - assert avg_ideal_airmass(observation_portal_interface, schedule) == 1 - assert avg_midpoint_airmass(observation_portal_interface, schedule, semester_start) == 5 + + + # with patch('adaptive_scheduler.simulation.metrics.get_airmass_data_from_observation_portal', + # return_value=airmass_data): + request_id_1 = Mock() + request_1 = Mock(id=request_id_1) + mock_reservation_1 = Mock(scheduled_start=0, scheduled_resource='1m0a.doma.tfn', + request=request_1, duration=5400) + request_id_2 = Mock() + request_2 = Mock(id=request_id_2) + mock_reservation_2 = Mock(scheduled_start=0, scheduled_resource='1m0a.doma.egg', + request=request_2, duration=5400) + scheduled_reservations = [mock_reservation_1, mock_reservation_2] + + schedule = {'reservations': scheduled_reservations} + + start = datetime.strptime("2022-07-06T00:30", '%Y-%m-%dT%H:%M') + end = start + timedelta(minutes=90) + semester_start = start + + assert self.metrics._get_midpoint_airmasses_from_request(request_id_1, start, end) == {'tfn': 7, 'egg': 3} + assert self.metrics._get_ideal_airmass_for_request(request_id_2) == 1 + assert self.metrics.avg_ideal_airmass(schedule) == 1 + assert self.metrics.avg_midpoint_airmass(schedule, semester_start) == 5 From 50316535cd6d515413a1af67cca65b0ba27d4ee3 Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Mon, 11 Jul 2022 10:51:33 -0700 Subject: [PATCH 042/165] add tests for averaging multiple airmasses --- tests/test_simulator_metrics.py | 105 ++++++++++++++++---------------- 1 file changed, 52 insertions(+), 53 deletions(-) diff --git a/tests/test_simulator_metrics.py b/tests/test_simulator_metrics.py index 37efaf89..502b9a08 100644 --- a/tests/test_simulator_metrics.py +++ b/tests/test_simulator_metrics.py @@ -36,13 +36,7 @@ def setup(self): self.metrics = MetricCalculator(self.mock_scheduler_result, self.mock_scheduler_result, self.mock_scheduler, - self.mock_scheduler_runner) - - dir_path = os.path.dirname(os.path.realpath(__file__)) - data_path = os.path.join(dir_path, 'airmass_data.json') - with open(data_path) as f: - airmass_data = json.load(f) - self.metrics._get_airmass_data_from_observation_portal = Mock(return_value=airmass_data) + self.mock_scheduler_runner) def test_combining_schedules(self): scheduler_result_attrs = {'resources_scheduled.return_value': ['bpl', 'coj', 'ogg']} @@ -96,52 +90,58 @@ def test_percent_time_utilization(self): assert self.metrics.percent_time_utilization(test_schedule, ['bpl'], 1) == 100. assert self.metrics.percent_time_utilization() == 60/(86400*4)*100 - def test_fill_bin_with_reservation_data(self): - data_dict = {} - start_time = datetime.utcnow() - - mock_reservation = Mock( - duration=10, - scheduled_resource='bpl', - scheduled_start=start_time, - scheduled=True, - ) - mock_reservation.request_group.ipp_value = 20 - mock_reservation.request_group.proposal.tac_priority = 50 - mock_reservation.request_group.id = 1 - mock_reservation.request.id = 2 - - expected_datacontainer = DataContainer( - request_group_id=1, - request_id=2, - duration=10, - scheduled_resource='bpl', - scheduled=True, - scheduled_start=start_time, - ipp_value=20, - tac_priority=50, - ) - - bin_data = { - 'bin1': mock_reservation, - 'bin2': mock_reservation, - } - for bin_name, reservation in bin_data.items(): - fill_bin_with_reservation_data(data_dict, bin_name, reservation) - - expected = { - 'bin1': [expected_datacontainer], - 'bin2': [expected_datacontainer], - } - for bin_name, data in data_dict.items(): - for i, item in enumerate(data): - assert expected[bin_name][i].__dict__ == item.__dict__ + # def test_fill_bin_with_reservation_data(self): + # data_dict = {} + # start_time = datetime.utcnow() + + # mock_reservation = Mock( + # duration=10, + # scheduled_resource='bpl', + # scheduled_start=start_time, + # scheduled=True, + # ) + # mock_reservation.request_group.ipp_value = 20 + # mock_reservation.request_group.proposal.tac_priority = 50 + # mock_reservation.request_group.id = 1 + # mock_reservation.request.id = 2 + + # expected_datacontainer = DataContainer( + # request_group_id=1, + # request_id=2, + # duration=10, + # scheduled_resource='bpl', + # scheduled=True, + # scheduled_start=start_time, + # ipp_value=20, + # tac_priority=50, + # ) + + # bin_data = { + # 'bin1': mock_reservation, + # 'bin2': mock_reservation, + # } + # for bin_name, reservation in bin_data.items(): + # fill_bin_with_reservation_data(data_dict, bin_name, reservation) + + # expected = { + # 'bin1': [expected_datacontainer], + # 'bin2': [expected_datacontainer], + # } + # for bin_name, data in data_dict.items(): + # for i, item in enumerate(data): + # assert expected[bin_name][i].__dict__ == item.__dict__ def test_airmass_functions(self): - - - # with patch('adaptive_scheduler.simulation.metrics.get_airmass_data_from_observation_portal', - # return_value=airmass_data): + dir_path = os.path.dirname(os.path.realpath(__file__)) + data_path_1 = os.path.join(dir_path, 'airmass_data.json') + data_path_2 = os.path.join(dir_path, 'airmass_data_2.json') + with open(data_path_1) as f: + airmass_data_1 = json.load(f) + with open(data_path_2) as f: + airmass_data_2 = json.load(f) + self.metrics._get_airmass_data_from_observation_portal = Mock(side_effect=[airmass_data_1, airmass_data_1, + airmass_data_1, airmass_data_2, + airmass_data_1, airmass_data_2]) request_id_1 = Mock() request_1 = Mock(id=request_id_1) mock_reservation_1 = Mock(scheduled_start=0, scheduled_resource='1m0a.doma.tfn', @@ -151,7 +151,6 @@ def test_airmass_functions(self): mock_reservation_2 = Mock(scheduled_start=0, scheduled_resource='1m0a.doma.egg', request=request_2, duration=5400) scheduled_reservations = [mock_reservation_1, mock_reservation_2] - schedule = {'reservations': scheduled_reservations} start = datetime.strptime("2022-07-06T00:30", '%Y-%m-%dT%H:%M') @@ -160,5 +159,5 @@ def test_airmass_functions(self): assert self.metrics._get_midpoint_airmasses_from_request(request_id_1, start, end) == {'tfn': 7, 'egg': 3} assert self.metrics._get_ideal_airmass_for_request(request_id_2) == 1 - assert self.metrics.avg_ideal_airmass(schedule) == 1 + assert self.metrics.avg_ideal_airmass(schedule) == 2 assert self.metrics.avg_midpoint_airmass(schedule, semester_start) == 5 From e9030b187f667362b53341927b626d92e4517163 Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Mon, 11 Jul 2022 10:52:31 -0700 Subject: [PATCH 043/165] upload another airmass mock data for testing --- tests/airmass_data_2.json | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 tests/airmass_data_2.json diff --git a/tests/airmass_data_2.json b/tests/airmass_data_2.json new file mode 100644 index 00000000..643a1e34 --- /dev/null +++ b/tests/airmass_data_2.json @@ -0,0 +1,27 @@ +{ + "airmass_data": { + "egg": { + "times": [ + "2022-07-06T01:31", + "2022-07-06T01:41", + "2022-07-06T01:51", + "2022-07-06T02:01", + "2022-07-06T02:11", + "2022-07-06T02:21", + "2022-07-06T02:31", + "2022-07-06T02:41" + ], + "airmasses": [ + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ] + } + }, + "airmass_limit": 10.1 +} From a75bb3a0745cc61f7eda75160d88021407637c2a Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Mon, 11 Jul 2022 12:11:36 -0700 Subject: [PATCH 044/165] delete the parameters passed in for airmasses functions --- adaptive_scheduler/simulation/metrics.py | 11 +++++++++-- adaptive_scheduler/simulation/orchestrator.py | 5 ++--- tests/test_simulator_metrics.py | 15 +++++++-------- 3 files changed, 18 insertions(+), 13 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index fd749f1e..7cafca07 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -164,27 +164,32 @@ def _get_ideal_airmass_for_request(self, request_id): """Finds the minimum airmass across all sites for the request.""" ideal_airmass = 1000 airmass_data = self._get_airmass_data_from_observation_portal(request_id) + print("ideal get") for site in airmass_data['airmass_data'].values(): ideal_for_site = min(site['airmasses']) ideal_airmass = min(ideal_airmass, ideal_for_site) return ideal_airmass - def avg_ideal_airmass(self, schedule): + def avg_ideal_airmass(self, schedule=None): """Calculates the average ideal airmass for scheduled observations.""" + schedule = self.combined_schedule if schedule is None else schedule sum_ideal_airmass = 0 count = 0 + print (len(list(schedule.values()))) for reservations in schedule.values(): for reservation in reservations: if reservation.scheduled: request_id = reservation.request.id sum_ideal_airmass += self._get_ideal_airmass_for_request(request_id) count += 1 + print(sum_ideal_airmass, count) return sum_ideal_airmass / count def _get_midpoint_airmasses_from_request(self, request_id, start_time, end_time): midpoint_airmasses = {} midpoint_time = start_time + (end_time - start_time) / 2 airmass_data = self._get_airmass_data_from_observation_portal(request_id)['airmass_data'] + print("midpoint get") for site, details in airmass_data.items(): times, airmasses = list(details.values())[0], list(details.values())[1] index = 0 @@ -199,7 +204,9 @@ def _get_midpoint_airmasses_from_request(self, request_id, start_time, end_time) midpoint_airmasses[site] = midpoint_airmass return midpoint_airmasses - def avg_midpoint_airmass(self, schedule, semester_start): + def avg_midpoint_airmass(self, schedule=None): + schedule = self.combined_schedule if schedule is None else schedule + semester_start = self.scheduler_runner.semester_details['start'] midpoint_airmass_for_each_reservation = [] sum_midpoint_airmass = 0 count = 0 diff --git a/adaptive_scheduler/simulation/orchestrator.py b/adaptive_scheduler/simulation/orchestrator.py index c69cf716..4b62a3dc 100644 --- a/adaptive_scheduler/simulation/orchestrator.py +++ b/adaptive_scheduler/simulation/orchestrator.py @@ -99,7 +99,6 @@ def record_metrics(normal_scheduler_result, rr_scheduler_result, scheduler, sche metrics = MetricCalculator(normal_scheduler_result, rr_scheduler_result, scheduler, scheduler_runner) observation_portal_interface = scheduler_runner.network_interface.observation_portal_interface - semester_start = scheduler_runner.semester_details['start'] sched_params = scheduler_runner.sched_params metrics = { @@ -115,8 +114,8 @@ def record_metrics(normal_scheduler_result, rr_scheduler_result, scheduler, sche 'total_scheduled_seconds': metrics.total_scheduled_seconds(), 'total_available_seconds': metrics.total_available_seconds(), 'percent_time_utilization': metrics.percent_time_utilization(), - 'avg_ideal_airmass': metrics.avg_ideal_airmass(metrics.combined_schedule), - 'avg_midpoint_airmass': metrics.avg_midpoint_airmass(metrics.combined_schedule, semester_start), + 'avg_ideal_airmass': metrics.avg_ideal_airmass(), + 'avg_midpoint_airmass': metrics.avg_midpoint_airmass(), } send_to_opensearch(sched_params.opensearch_url, sched_params.simulation_opensearch_index, metrics) diff --git a/tests/test_simulator_metrics.py b/tests/test_simulator_metrics.py index 502b9a08..684aeff0 100644 --- a/tests/test_simulator_metrics.py +++ b/tests/test_simulator_metrics.py @@ -11,13 +11,16 @@ class TestMetrics(): def setup(self): + self.start = datetime.strptime("2022-07-06T00:30", '%Y-%m-%dT%H:%M') + self.end = self.start + timedelta(minutes=90) self.scheduler_run_time = datetime.utcnow() scheduler_result_attrs = {'resources_scheduled.return_value': ['bpl', 'coj']} self.mock_scheduler_result = Mock(**scheduler_result_attrs) self.mock_scheduler = Mock(estimated_scheduler_end=self.scheduler_run_time) - self.mock_scheduler_runner = Mock() + self.mock_scheduler_runner = Mock(semester_details={'start': self.start}) self.mock_scheduler_runner.sched_params.metric_effective_horizon = 5 - + + # self.mock_scheduler_runner = start res1 = Mock(duration=10) res2 = Mock(duration=20) res3 = Mock(duration=30) @@ -153,11 +156,7 @@ def test_airmass_functions(self): scheduled_reservations = [mock_reservation_1, mock_reservation_2] schedule = {'reservations': scheduled_reservations} - start = datetime.strptime("2022-07-06T00:30", '%Y-%m-%dT%H:%M') - end = start + timedelta(minutes=90) - semester_start = start - - assert self.metrics._get_midpoint_airmasses_from_request(request_id_1, start, end) == {'tfn': 7, 'egg': 3} + assert self.metrics._get_midpoint_airmasses_from_request(request_id_1, self.start, self.end) == {'tfn': 7, 'egg': 3} assert self.metrics._get_ideal_airmass_for_request(request_id_2) == 1 assert self.metrics.avg_ideal_airmass(schedule) == 2 - assert self.metrics.avg_midpoint_airmass(schedule, semester_start) == 5 + assert self.metrics.avg_midpoint_airmass(schedule) == 5 \ No newline at end of file From 781950a2e8f477cc07cc404116c67be8a37f928a Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Mon, 11 Jul 2022 12:17:45 -0700 Subject: [PATCH 045/165] deleted debug prints --- adaptive_scheduler/simulation/metrics.py | 4 ---- tests/test_simulator_metrics.py | 6 +++++- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index 7cafca07..fb8c4aa7 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -164,7 +164,6 @@ def _get_ideal_airmass_for_request(self, request_id): """Finds the minimum airmass across all sites for the request.""" ideal_airmass = 1000 airmass_data = self._get_airmass_data_from_observation_portal(request_id) - print("ideal get") for site in airmass_data['airmass_data'].values(): ideal_for_site = min(site['airmasses']) ideal_airmass = min(ideal_airmass, ideal_for_site) @@ -175,21 +174,18 @@ def avg_ideal_airmass(self, schedule=None): schedule = self.combined_schedule if schedule is None else schedule sum_ideal_airmass = 0 count = 0 - print (len(list(schedule.values()))) for reservations in schedule.values(): for reservation in reservations: if reservation.scheduled: request_id = reservation.request.id sum_ideal_airmass += self._get_ideal_airmass_for_request(request_id) count += 1 - print(sum_ideal_airmass, count) return sum_ideal_airmass / count def _get_midpoint_airmasses_from_request(self, request_id, start_time, end_time): midpoint_airmasses = {} midpoint_time = start_time + (end_time - start_time) / 2 airmass_data = self._get_airmass_data_from_observation_portal(request_id)['airmass_data'] - print("midpoint get") for site, details in airmass_data.items(): times, airmasses = list(details.values())[0], list(details.values())[1] index = 0 diff --git a/tests/test_simulator_metrics.py b/tests/test_simulator_metrics.py index 684aeff0..ccb03644 100644 --- a/tests/test_simulator_metrics.py +++ b/tests/test_simulator_metrics.py @@ -143,6 +143,8 @@ def test_airmass_functions(self): with open(data_path_2) as f: airmass_data_2 = json.load(f) self.metrics._get_airmass_data_from_observation_portal = Mock(side_effect=[airmass_data_1, airmass_data_1, + airmass_data_1, airmass_data_2, + airmass_data_1, airmass_data_2, airmass_data_1, airmass_data_2, airmass_data_1, airmass_data_2]) request_id_1 = Mock() @@ -159,4 +161,6 @@ def test_airmass_functions(self): assert self.metrics._get_midpoint_airmasses_from_request(request_id_1, self.start, self.end) == {'tfn': 7, 'egg': 3} assert self.metrics._get_ideal_airmass_for_request(request_id_2) == 1 assert self.metrics.avg_ideal_airmass(schedule) == 2 - assert self.metrics.avg_midpoint_airmass(schedule) == 5 \ No newline at end of file + assert self.metrics.avg_midpoint_airmass(schedule) == 5 + assert self.metrics.avg_ideal_airmass() == float(5/3) + \ No newline at end of file From 524ef2b3181c19619ccb9572621d90c50edd57dd Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Mon, 11 Jul 2022 20:29:38 +0000 Subject: [PATCH 046/165] reworked binning function --- adaptive_scheduler/simulation/metrics.py | 94 ++++++++++--------- adaptive_scheduler/simulation/orchestrator.py | 4 +- tests/airmass_data.json~ | 51 ++++++++++ tests/test_simulator_metrics.py | 60 ++++-------- tests/test_simulator_metrics.py~ | 3 + 5 files changed, 123 insertions(+), 89 deletions(-) create mode 100644 tests/airmass_data.json~ create mode 100644 tests/test_simulator_metrics.py~ diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index fd749f1e..07fe872d 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -7,7 +7,7 @@ import requests from requests.exceptions import RequestException, Timeout -from rise_set.astrometry import calculate_airmass_at_times + from adaptive_scheduler.observation_portal_connections import ObservationPortalConnectionError from adaptive_scheduler.utils import time_in_capped_intervals, normalised_epoch_to_datetime, datetime_to_epoch from adaptive_scheduler.models import DataContainer @@ -29,6 +29,37 @@ def percent_diff(x, y): return abs(x - y) / mean * 100. +def generate_bin_names(bin_size, bin_range): + start = int(bin_range[0]) + end = int(bin_range[1]) + if bin_size == 1: + return [str(n) for n in range(start, end+1)] + bin_names = [] + bin_start = list(range(start, end+1, bin_size)) + for start_num in bin_start: + end_num = start_num + bin_size - 1 + end_num = end_num if end_num < end else end + if end_num == start_num: + bin_name = str(start_num) + else: + bin_name = f'{start_num}-{end_num}' + bin_names.append(bin_name) + return bin_names + + +def bin_data(data, bin_size=1, bin_range=None): + bin_range = (min(data), max(data)) if bin_range is None else bin_range + data_dict = {bin_name: 0 for bin_name in generate_bin_names(bin_size, bin_range)} + for i in data: + if i < bin_range[0] or i > bin_range[1]+1: + continue + index = int((i - bin_range[0]) / bin_size) + keyname = list(data_dict)[index] + data_dict[keyname] += 1 + data_dict = {key: val for key, val in data_dict.items() if val != 0} + return data_dict + + class MetricCalculator(): """A class encapsulating the metric calculating functions for the scheduler simulator. @@ -79,25 +110,17 @@ def _combine_normal_rr_schedules(self): def count_scheduled(self, schedule=None): schedule = self.combined_schedule if schedule is None else schedule counter = 0 + total = 0 for reservations in schedule.values(): for reservation in reservations: + total += 1 if reservation.scheduled: counter += 1 - return counter - - def count_unscheduled(self, schedule=None): - schedule = self.combined_schedule if schedule is None else schedule - counter = 0 - for reservations in schedule.values(): - for reservation in reservations: - if not reservation.scheduled: - counter += 1 - return counter + return counter, total def percent_reservations_scheduled(self, schedule=None): schedule = self.combined_schedule if schedule is None else schedule - scheduled = self.count_scheduled(schedule) - total = scheduled + self.count_unscheduled(schedule) + scheduled, total = self.count_scheduled(schedule) return percent_of(scheduled, total) def total_scheduled_seconds(self, schedule=None): @@ -138,7 +161,7 @@ def percent_time_utilization(self, schedule=None, resources_scheduled=None, hori horizon_days = self.effective_horizon if horizon_days is None else horizon_days return percent_of(self.total_scheduled_seconds(schedule), self.total_available_seconds(resources_scheduled, horizon_days)) - + def _get_airmass_data_from_observation_portal(self, request_id): """Pulls airmass data from the Observation Portal. @@ -159,7 +182,7 @@ def _get_airmass_data_from_observation_portal(self, request_id): raise ObservationPortalConnectionError("get_airmass_data failed: {}".format(repr(e))) return airmass_data - + def _get_ideal_airmass_for_request(self, request_id): """Finds the minimum airmass across all sites for the request.""" ideal_airmass = 1000 @@ -209,7 +232,7 @@ def avg_midpoint_airmass(self, schedule, semester_start): request = reservation.request request_id = request.id start_time = normalised_epoch_to_datetime(reservation.scheduled_start, - datetime_to_epoch(semester_start)) + datetime_to_epoch(semester_start)) end_time = start_time + dt.timedelta(seconds=reservation.duration) midpoint_airmasses = self._get_midpoint_airmasses_from_request(request_id, start_time, end_time) site = reservation.scheduled_resource[-3:] @@ -219,6 +242,15 @@ def avg_midpoint_airmass(self, schedule, semester_start): count += 1 return sum_midpoint_airmass / count + def tac_priority_histogram(self, schedule=None): + schedule = self.combined_schedule if schedule is None else schedule + bin_size = 10 + tac_priority_values = [] + for reservations in schedule.values(): + for reservation in reservations: + tac_priority_values.append(reservation.request_group.proposal.tac_priority) + return bin_data(tac_priority_values, bin_size=bin_size) + def reservation_data_populator(reservation): """Creates a new data container containing parameters useful in calculating metrics. @@ -243,33 +275,3 @@ def reservation_data_populator(reservation): tac_priority=proposal.tac_priority, ) return data - - -def fill_bin_with_reservation_data(data_dict, bin_name, reservation): - """Populates bins in a dictionary with the reservation data container. The original - dictionary is modified, instead of creating and returning a copy. - - Args: - data_dict (dict): Binned data dictionary. Each bin contains a list of DataContainer's. - bin_name (str): The name of the bin to create or populate. - reservation (Reservation_v3): A Reservation object. - """ - if bin_name not in data_dict: - data_dict[bin_name] = [] - reservation_data = reservobservation_portal_interface, _reservation_data(scheduled_requests_by_eff_priority, - eff_priority, - reservation) - return scheduled_requests_by_eff_priority - - -def bin_scheduler_result_by_tac_priority(schedule): - scheduled_requests_by_tac_priority = {} - for reservations in schedule.values(): - for reservation in reservations: - if reservation.scheduled: - proposal = reservation.request_group.proposal - tac_priority = str(proposal.tac_priority) - fill_bin_with_reservation_data(scheduled_requests_by_tac_priority, - tac_priority, - reservation) - return scheduled_requests_by_tac_priority diff --git a/adaptive_scheduler/simulation/orchestrator.py b/adaptive_scheduler/simulation/orchestrator.py index c69cf716..ec5dc2de 100644 --- a/adaptive_scheduler/simulation/orchestrator.py +++ b/adaptive_scheduler/simulation/orchestrator.py @@ -111,12 +111,14 @@ def record_metrics(normal_scheduler_result, rr_scheduler_result, scheduler, sche 'mip_gap': sched_params.mip_gap, 'record_time': datetime.utcnow().isoformat(), - 'total_scheduled_count': metrics.count_scheduled(), + 'total_scheduled_count': metrics.count_scheduled()[0], + 'percent_reservations_scheduled': metrics.percent_reservations_scheduled(), 'total_scheduled_seconds': metrics.total_scheduled_seconds(), 'total_available_seconds': metrics.total_available_seconds(), 'percent_time_utilization': metrics.percent_time_utilization(), 'avg_ideal_airmass': metrics.avg_ideal_airmass(metrics.combined_schedule), 'avg_midpoint_airmass': metrics.avg_midpoint_airmass(metrics.combined_schedule, semester_start), + 'tac_priority_histogram': metrics.tac_priority_histogram() } send_to_opensearch(sched_params.opensearch_url, sched_params.simulation_opensearch_index, metrics) diff --git a/tests/airmass_data.json~ b/tests/airmass_data.json~ new file mode 100644 index 00000000..2d690fc4 --- /dev/null +++ b/tests/airmass_data.json~ @@ -0,0 +1,51 @@ +airmasses = { + "airmass_data": { + "tfn": { + "times": [ + "2022-07-06T00:11", + "2022-07-06T00:21", + "2022-07-06T00:31", + "2022-07-06T00:41", + "2022-07-06T00:51", + "2022-07-06T01:01", + "2022-07-06T01:11", + "2022-07-06T01:21", + "2022-07-06T01:31", + "2022-07-06T01:41", + "2022-07-06T01:51", + "2022-07-06T02:01", + "2022-07-06T02:11", + "2022-07-06T02:21", + "2022-07-06T02:31", + "2022-07-06T02:41", + "2022-07-06T02:51", + "2022-07-06T03:01", + "2022-07-06T03:11", + "2022-07-06T03:21" + ], + "airmasses": [ + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20 + ] + } + }, + "airmass_limit": 10.1 +} diff --git a/tests/test_simulator_metrics.py b/tests/test_simulator_metrics.py index 37efaf89..8db0c365 100644 --- a/tests/test_simulator_metrics.py +++ b/tests/test_simulator_metrics.py @@ -1,5 +1,5 @@ from adaptive_scheduler.simulation.metrics import (MetricCalculator, - fill_bin_with_reservation_data) + bin_data) from adaptive_scheduler.models import DataContainer import os @@ -37,7 +37,7 @@ def setup(self): self.mock_scheduler_result, self.mock_scheduler, self.mock_scheduler_runner) - + dir_path = os.path.dirname(os.path.realpath(__file__)) data_path = os.path.join(dir_path, 'airmass_data.json') with open(data_path) as f: @@ -96,46 +96,22 @@ def test_percent_time_utilization(self): assert self.metrics.percent_time_utilization(test_schedule, ['bpl'], 1) == 100. assert self.metrics.percent_time_utilization() == 60/(86400*4)*100 - def test_fill_bin_with_reservation_data(self): - data_dict = {} - start_time = datetime.utcnow() - - mock_reservation = Mock( - duration=10, - scheduled_resource='bpl', - scheduled_start=start_time, - scheduled=True, - ) - mock_reservation.request_group.ipp_value = 20 - mock_reservation.request_group.proposal.tac_priority = 50 - mock_reservation.request_group.id = 1 - mock_reservation.request.id = 2 - - expected_datacontainer = DataContainer( - request_group_id=1, - request_id=2, - duration=10, - scheduled_resource='bpl', - scheduled=True, - scheduled_start=start_time, - ipp_value=20, - tac_priority=50, - ) - - bin_data = { - 'bin1': mock_reservation, - 'bin2': mock_reservation, - } - for bin_name, reservation in bin_data.items(): - fill_bin_with_reservation_data(data_dict, bin_name, reservation) - - expected = { - 'bin1': [expected_datacontainer], - 'bin2': [expected_datacontainer], - } - for bin_name, data in data_dict.items(): - for i, item in enumerate(data): - assert expected[bin_name][i].__dict__ == item.__dict__ + def test_bin_data(self): + data = [1, 3, 4, 2, 6, 5, 3, 2, 3, 4, 7, 9, 3, 8, 6, 4] + data2 = [0.5, 3.7, 2.8, 6.9, 1.8] + bin_range = (1, 9) + + expected1 = {'1-3': 7, '4-6': 6, '7-9': 3} + expected2 = {'1': 1, '2': 2, '3': 4, '4': 3, '5': 1, '6': 2, '7': 1, '8': 1, '9': 1} + expected3 = {'1-2': 3, '3-4': 7, '5-6': 3, '7-8': 2, '9': 1} + expected4 = {'0': 1, '1': 1, '2': 1, '3': 1, '6': 1} + expected5 = {'0': 1, '1': 1, '2': 1, '3': 1} + + assert bin_data(data, 3, bin_range) == expected1 + assert bin_data(data) == expected2 + assert bin_data(data, 2) == expected3 + assert bin_data(data2) == expected4 + assert bin_data(data2, bin_range=(0, 4)) == expected5 def test_airmass_functions(self): diff --git a/tests/test_simulator_metrics.py~ b/tests/test_simulator_metrics.py~ new file mode 100644 index 00000000..b4663f9f --- /dev/null +++ b/tests/test_simulator_metrics.py~ @@ -0,0 +1,3 @@ +from mock import Mock, patch + +import pytest From b2cc024c14fbf3ead4b3ea1647aba3c1de15fd18 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Mon, 11 Jul 2022 21:26:59 +0000 Subject: [PATCH 047/165] updated to only request from observe portal once --- adaptive_scheduler/simulation/metrics.py | 21 ++-- tests/airmass_data.json | 140 +++++++++++------------ tests/airmass_data_2.json | 50 ++++---- tests/test_simulator_metrics.py | 19 ++- 4 files changed, 115 insertions(+), 115 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index 7127f42a..95e8f4e7 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -92,6 +92,8 @@ def __init__(self, normal_scheduler_result, rr_scheduler_result, scheduler, sche self.combined_schedule = self.normal_schedule self.combined_resources_scheduled = self.normal_scheduler_result.resources_scheduled() + self.airmass_data_by_request_id = defaultdict(dict) + def _combine_resources_scheduled(self): normal_resources = self.normal_scheduler_result.resources_scheduled() rr_resources = self.rr_scheduler_result.resources_scheduled() @@ -177,7 +179,8 @@ def _get_airmass_data_from_observation_portal(self, request_id): try: response = requests.get(airmass_url, headers=self.observation_portal_interface.headers, timeout=180) response.raise_for_status() - airmass_data = response.json() + airmass_data = response.json()['airmass_data'] + self.airmass_data_by_request_id[request_id] = airmass_data except (RequestException, ValueError, Timeout) as e: raise ObservationPortalConnectionError("get_airmass_data failed: {}".format(repr(e))) @@ -186,8 +189,10 @@ def _get_airmass_data_from_observation_portal(self, request_id): def _get_ideal_airmass_for_request(self, request_id): """Finds the minimum airmass across all sites for the request.""" ideal_airmass = 1000 - airmass_data = self._get_airmass_data_from_observation_portal(request_id) - for site in airmass_data['airmass_data'].values(): + airmass_data = self.airmass_data_by_request_id[request_id] + if not airmass_data: + airmass_data = self._get_airmass_data_from_observation_portal(request_id) + for site in airmass_data.values(): ideal_for_site = min(site['airmasses']) ideal_airmass = min(ideal_airmass, ideal_for_site) return ideal_airmass @@ -205,16 +210,18 @@ def avg_ideal_airmass(self, schedule=None): count += 1 return sum_ideal_airmass / count - def _get_midpoint_airmasses_from_request(self, request_id, start_time, end_time): + def _get_midpoint_airmasses_for_request(self, request_id, start_time, end_time): midpoint_airmasses = {} midpoint_time = start_time + (end_time - start_time) / 2 - airmass_data = self._get_airmass_data_from_observation_portal(request_id)['airmass_data'] + airmass_data = self.airmass_data_by_request_id[request_id] + if not airmass_data: + airmass_data = self._get_airmass_data_from_observation_portal(request_id) for site, details in airmass_data.items(): times, airmasses = list(details.values())[0], list(details.values())[1] index = 0 time_diff = abs((midpoint_time - datetime.strptime(times[0], '%Y-%m-%dT%H:%M')).total_seconds()) - for i,_ in enumerate(times): + for i, _ in enumerate(times): temp_time_diff = abs((midpoint_time - datetime.strptime(times[i], '%Y-%m-%dT%H:%M')).total_seconds()) if temp_time_diff < time_diff: time_diff = temp_time_diff @@ -237,7 +244,7 @@ def avg_midpoint_airmass(self, schedule=None): start_time = normalised_epoch_to_datetime(reservation.scheduled_start, datetime_to_epoch(semester_start)) end_time = start_time + dt.timedelta(seconds=reservation.duration) - midpoint_airmasses = self._get_midpoint_airmasses_from_request(request_id, start_time, end_time) + midpoint_airmasses = self._get_midpoint_airmasses_for_request(request_id, start_time, end_time) site = reservation.scheduled_resource[-3:] midpoint_airmass = midpoint_airmasses[site] midpoint_airmass_for_each_reservation.append(midpoint_airmass) diff --git a/tests/airmass_data.json b/tests/airmass_data.json index e2fa3078..dc77fb3a 100644 --- a/tests/airmass_data.json +++ b/tests/airmass_data.json @@ -1,73 +1,71 @@ { - "airmass_data": { - "tfn": { - "times": [ - "2022-07-06T00:11", - "2022-07-06T00:21", - "2022-07-06T00:31", - "2022-07-06T00:41", - "2022-07-06T00:51", - "2022-07-06T01:01", - "2022-07-06T01:11", - "2022-07-06T01:21", - "2022-07-06T01:31", - "2022-07-06T01:41", - "2022-07-06T01:51", - "2022-07-06T02:01", - "2022-07-06T02:11", - "2022-07-06T02:21", - "2022-07-06T02:31", - "2022-07-06T02:41", - "2022-07-06T02:51", - "2022-07-06T03:01", - "2022-07-06T03:11", - "2022-07-06T03:21" - ], - "airmasses": [ - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 20 - ] - }, - "egg": { - "times": [ - "2022-07-06T01:31", - "2022-07-06T01:41", - "2022-07-06T01:51", - "2022-07-06T02:01", - "2022-07-06T02:11", - "2022-07-06T02:21", - "2022-07-06T02:31", - "2022-07-06T02:41" - ], - "airmasses": [ - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10 - ] - } - }, - "airmass_limit": 10.1 + "tfn": { + "times": [ + "2022-07-06T00:11", + "2022-07-06T00:21", + "2022-07-06T00:31", + "2022-07-06T00:41", + "2022-07-06T00:51", + "2022-07-06T01:01", + "2022-07-06T01:11", + "2022-07-06T01:21", + "2022-07-06T01:31", + "2022-07-06T01:41", + "2022-07-06T01:51", + "2022-07-06T02:01", + "2022-07-06T02:11", + "2022-07-06T02:21", + "2022-07-06T02:31", + "2022-07-06T02:41", + "2022-07-06T02:51", + "2022-07-06T03:01", + "2022-07-06T03:11", + "2022-07-06T03:21" + ], + "airmasses": [ + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20 + ] + }, + "egg": { + "times": [ + "2022-07-06T01:31", + "2022-07-06T01:41", + "2022-07-06T01:51", + "2022-07-06T02:01", + "2022-07-06T02:11", + "2022-07-06T02:21", + "2022-07-06T02:31", + "2022-07-06T02:41" + ], + "airmasses": [ + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ] + } } + diff --git a/tests/airmass_data_2.json b/tests/airmass_data_2.json index 643a1e34..d75ac7cc 100644 --- a/tests/airmass_data_2.json +++ b/tests/airmass_data_2.json @@ -1,27 +1,25 @@ -{ - "airmass_data": { - "egg": { - "times": [ - "2022-07-06T01:31", - "2022-07-06T01:41", - "2022-07-06T01:51", - "2022-07-06T02:01", - "2022-07-06T02:11", - "2022-07-06T02:21", - "2022-07-06T02:31", - "2022-07-06T02:41" - ], - "airmasses": [ - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10 - ] - } - }, - "airmass_limit": 10.1 +{ + "egg": { + "times": [ + "2022-07-06T01:31", + "2022-07-06T01:41", + "2022-07-06T01:51", + "2022-07-06T02:01", + "2022-07-06T02:11", + "2022-07-06T02:21", + "2022-07-06T02:31", + "2022-07-06T02:41" + ], + "airmasses": [ + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ] + } } + diff --git a/tests/test_simulator_metrics.py b/tests/test_simulator_metrics.py index 262f1343..82414374 100644 --- a/tests/test_simulator_metrics.py +++ b/tests/test_simulator_metrics.py @@ -19,7 +19,7 @@ def setup(self): self.mock_scheduler = Mock(estimated_scheduler_end=self.scheduler_run_time) self.mock_scheduler_runner = Mock(semester_details={'start': self.start}) self.mock_scheduler_runner.sched_params.metric_effective_horizon = 5 - + # self.mock_scheduler_runner = start res1 = Mock(duration=10) res2 = Mock(duration=20) @@ -39,7 +39,7 @@ def setup(self): self.metrics = MetricCalculator(self.mock_scheduler_result, self.mock_scheduler_result, self.mock_scheduler, - self.mock_scheduler_runner) + self.mock_scheduler_runner) def test_combining_schedules(self): scheduler_result_attrs = {'resources_scheduled.return_value': ['bpl', 'coj', 'ogg']} @@ -123,20 +123,17 @@ def test_airmass_functions(self): airmass_data_1, airmass_data_2, airmass_data_1, airmass_data_2, airmass_data_1, airmass_data_2]) - request_id_1 = Mock() - request_1 = Mock(id=request_id_1) + request_1 = Mock(id=1) mock_reservation_1 = Mock(scheduled_start=0, scheduled_resource='1m0a.doma.tfn', - request=request_1, duration=5400) - request_id_2 = Mock() - request_2 = Mock(id=request_id_2) + request=request_1, duration=5400) + request_2 = Mock(id=2) mock_reservation_2 = Mock(scheduled_start=0, scheduled_resource='1m0a.doma.egg', - request=request_2, duration=5400) + request=request_2, duration=5400) scheduled_reservations = [mock_reservation_1, mock_reservation_2] schedule = {'reservations': scheduled_reservations} - assert self.metrics._get_midpoint_airmasses_from_request(request_id_1, self.start, self.end) == {'tfn': 7, 'egg': 3} - assert self.metrics._get_ideal_airmass_for_request(request_id_2) == 1 + assert self.metrics._get_midpoint_airmasses_for_request(1, self.start, self.end) == {'tfn': 7, 'egg': 3} + assert self.metrics._get_ideal_airmass_for_request(2) == 1 assert self.metrics.avg_ideal_airmass(schedule) == 2 assert self.metrics.avg_midpoint_airmass(schedule) == 5 assert self.metrics.avg_ideal_airmass() == float(5/3) - From 17d997f9cd00b97eec88a9eb9d7ddd203278b824 Mon Sep 17 00:00:00 2001 From: Jon Date: Mon, 11 Jul 2022 21:38:50 +0000 Subject: [PATCH 048/165] Store off normal and rr scheduling inputs for accesibility in metrics --- adaptive_scheduler/scheduler.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/adaptive_scheduler/scheduler.py b/adaptive_scheduler/scheduler.py index 7c703daf..efc62e1a 100644 --- a/adaptive_scheduler/scheduler.py +++ b/adaptive_scheduler/scheduler.py @@ -626,6 +626,8 @@ def __init__(self, sched_params, scheduler, network_interface, network_model, in self.sched_params = sched_params self.warm_starts_setting = sched_params.warm_starts self.scheduler = scheduler + self.normal_scheduler_input = None + self.rr_scheduler_input = None self.normal_scheduler_result = None self.rr_scheduler_result = None self.network_interface = network_interface @@ -1070,18 +1072,18 @@ def scheduling_cycle(self, schedule_type, network_state_timestamp, rr_schedule_r set_schedule_type(schedule_type) result = None if schedule_type == NORMAL_OBSERVATION_TYPE: - scheduler_input = self.input_factory.create_normal_scheduling_input( + self.normal_scheduler_input = self.input_factory.create_normal_scheduling_input( self.estimated_normal_run_timedelta.total_seconds(), scheduled_requests_by_rg=self.normal_scheduler_result.get_scheduled_requests_by_request_group_id() if self.normal_scheduler_result else {}, rr_schedule=rr_schedule_result.schedule, network_state_timestamp=network_state_timestamp) - result = self.create_normal_schedule(scheduler_input) + result = self.create_normal_schedule(self.normal_scheduler_input) elif schedule_type == RR_OBSERVATION_TYPE: - scheduler_input = self.input_factory.create_rr_scheduling_input( + self.rr_scheduler_input = self.input_factory.create_rr_scheduling_input( self.estimated_rr_run_timedelta.total_seconds(), scheduled_requests_by_rg=self.rr_scheduler_result.get_scheduled_requests_by_request_group_id() if self.rr_scheduler_result else {}, network_state_timestamp=network_state_timestamp) - result = self.create_rr_schedule(scheduler_input) + result = self.create_rr_schedule(self.rr_scheduler_input) return result From c2ffbbd106a937e026fcda2f9c67b2eacd145bc7 Mon Sep 17 00:00:00 2001 From: Jon Date: Mon, 11 Jul 2022 22:13:09 +0000 Subject: [PATCH 049/165] Store the input reservations within the scheduler results too --- adaptive_scheduler/scheduler.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/adaptive_scheduler/scheduler.py b/adaptive_scheduler/scheduler.py index efc62e1a..f9ee8106 100644 --- a/adaptive_scheduler/scheduler.py +++ b/adaptive_scheduler/scheduler.py @@ -393,7 +393,7 @@ def run_scheduler(self, scheduler_input, estimated_scheduler_end, semester_detai print_compound_reservations(compound_reservations) # Prepare scheduler result - scheduler_result = SchedulerResult() + scheduler_result = SchedulerResult(input_reservations=compound_reservations) scheduler_result.schedule = {} scheduler_result.resource_schedules_to_cancel = list(available_resources) @@ -569,7 +569,7 @@ class SchedulerResult(object): '''Aggregates together output of a scheduler run ''' - def __init__(self, schedule=None, resource_schedules_to_cancel=None): + def __init__(self, schedule=None, resource_schedules_to_cancel=None, input_reservations=None): ''' schedule - Expected to be a dict mapping resource to scheduled reservations resource_schedules_to_cancel - List of resources to cancel schedules on - this is the list of all available @@ -577,6 +577,7 @@ def __init__(self, schedule=None, resource_schedules_to_cancel=None): removed from the list. ''' self.schedule = schedule if schedule else {} + self.input_reservations = input_reservations if input_reservations else [] self.resource_schedules_to_cancel = resource_schedules_to_cancel if resource_schedules_to_cancel else [] def count_reservations(self): From 7deb1539950b2b8df901069a236762f26ff66536 Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Mon, 11 Jul 2022 15:31:52 -0700 Subject: [PATCH 050/165] add documentation for airmass metrics calculation functions --- adaptive_scheduler/simulation/metrics.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index 95e8f4e7..1f5ff135 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -211,6 +211,16 @@ def avg_ideal_airmass(self, schedule=None): return sum_ideal_airmass / count def _get_midpoint_airmasses_for_request(self, request_id, start_time, end_time): + """From the observation portal get the midpoint airmasses for one request. + + Args: + request_id (integer): The id of the request we want to get airmass data of. + start_time (datetime): The start time of the scheduled observation. + end_time (datetime): The end time of the scheduled observation. + + Returns: + midpoint_airmasses (dictionary): A dictionaory with observation sites as keys and corresponding midpoint airmasses as values. + """ midpoint_airmasses = {} midpoint_time = start_time + (end_time - start_time) / 2 airmass_data = self.airmass_data_by_request_id[request_id] @@ -231,6 +241,14 @@ def _get_midpoint_airmasses_for_request(self, request_id, start_time, end_time): return midpoint_airmasses def avg_midpoint_airmass(self, schedule=None): + """Calculate the average midpoint airmass of all scheudled reservations for a single schedule. + + Args: + schedule (scheduler, optional): the schedule we calculate our metricses on. Defaults to None. + + Returns: + average(float): the average midpoint airmass of all scheduled reservation for one schedule. + """ schedule = self.combined_schedule if schedule is None else schedule semester_start = self.scheduler_runner.semester_details['start'] midpoint_airmass_for_each_reservation = [] From ee0a3054bb280a04a5613c04c241135b8790071d Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Mon, 11 Jul 2022 23:26:30 +0000 Subject: [PATCH 051/165] update percent scheduled logic to reflect actual scheduler input --- adaptive_scheduler/simulation/metrics.py | 73 ++++++++++++------- adaptive_scheduler/simulation/orchestrator.py | 1 + tests/test_simulator_metrics.py | 32 ++++---- 3 files changed, 64 insertions(+), 42 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index 1f5ff135..b40521fd 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -13,9 +13,6 @@ from adaptive_scheduler.models import DataContainer -DEFAULT_EFFECTIVE_HORIZON_DAYS = 5 - - def percent_of(x, y): """Returns x/y as a percentage (float).""" return x / y * 100. @@ -30,6 +27,7 @@ def percent_diff(x, y): def generate_bin_names(bin_size, bin_range): + """Creates bins named 'start-end' for dictionary keys.""" start = int(bin_range[0]) end = int(bin_range[1]) if bin_size == 1: @@ -48,6 +46,17 @@ def generate_bin_names(bin_size, bin_range): def bin_data(data, bin_size=1, bin_range=None): + """Bins data to create a histogram. Currently only supports integer bin resolution. + Float input is casted to an integer for counting. + + Args: + data (list): The input data can be float or int. + bin_size (int): The width of the bins. + bin_range (int, int): Override the bin ranges. Otherwise, use the min/max of the data. + + Returns: + data_dict (str: int): The frequency count of the data. + """ bin_range = (min(data), max(data)) if bin_range is None else bin_range data_dict = {bin_name: 0 for bin_name in generate_bin_names(bin_size, bin_range)} for i in data: @@ -76,21 +85,24 @@ def __init__(self, normal_scheduler_result, rr_scheduler_result, scheduler, sche self.scheduler = scheduler self.scheduler_runner = scheduler_runner self.observation_portal_interface = self.scheduler_runner.network_interface.observation_portal_interface - if self.scheduler_runner.sched_params.metric_effective_horizon: - self.effective_horizon = self.scheduler_runner.sched_params.metric_effective_horizon - else: - self.effective_horizon = DEFAULT_EFFECTIVE_HORIZON_DAYS + self.horizon_days = self.scheduler_runner.sched_params.horizon_days self.normal_scheduler_result = normal_scheduler_result self.normal_schedule = self.normal_scheduler_result.schedule + self.normal_input_reservations = self.normal_scheduler_result.input_reservations + self.combined_input_reservations = [] if rr_scheduler_result: self.rr_scheduler_result = rr_scheduler_result self.rr_schedule = self.rr_scheduler_result.schedule + self.rr_input_reservations = self.rr_scheduler_result.input_reservations self._combine_normal_rr_schedules() self._combine_resources_scheduled() + self._combine_normal_rr_input_reservations() else: self.combined_schedule = self.normal_schedule self.combined_resources_scheduled = self.normal_scheduler_result.resources_scheduled() + for comp_res in self.normal_input_reservations: + self.combined_input_reservations.extend(comp_res.reservation_list) self.airmass_data_by_request_id = defaultdict(dict) @@ -109,20 +121,26 @@ def _combine_normal_rr_schedules(self): if reservation not in self.combined_schedule[resource]: self.combined_schedule[resource].append(reservation) - def count_scheduled(self, schedule=None): + def _combine_normal_rr_input_reservations(self): + for comp_res in self.normal_input_reservations: + self.combined_input_reservations.extend(comp_res.reservation_list) + for comp_res in self.rr_input_reservations: + res_list = [f for f in comp_res.reservation_list if f not in self.combined_input_reservations] + self.combined_input_reservations.extend(res_list) + + def count_scheduled(self, input_reservations=None, schedule=None): + input_reservations = self.combined_input_reservations if input_reservations is None else input_reservations schedule = self.combined_schedule if schedule is None else schedule - counter = 0 - total = 0 + scheduled_reservations = [] for reservations in schedule.values(): - for reservation in reservations: - total += 1 - if reservation.scheduled: - counter += 1 - return counter, total + scheduled_reservations.extend(reservations) + total_reservations = [res for res in input_reservations] + return len(scheduled_reservations), len(total_reservations) - def percent_reservations_scheduled(self, schedule=None): + def percent_reservations_scheduled(self, input_reservations=None, schedule=None): + input_reservations = self.combined_input_reservations if input_reservations is None else input_reservations schedule = self.combined_schedule if schedule is None else schedule - scheduled, total = self.count_scheduled(schedule) + scheduled, total = self.count_scheduled(input_reservations, schedule) return percent_of(scheduled, total) def total_scheduled_seconds(self, schedule=None): @@ -137,7 +155,7 @@ def total_available_seconds(self, resources_scheduled=None, horizon_days=None): """Aggregates the total available time, calculated from dark intervals. Args: - scheduled_resources (list): The list of sites scheduled, if nothing is passed then use the + resources_scheduled (list): The list of sites scheduled, if nothing is passed then use the list generated when MetricCalculators is initialized. horizon_days (float): The number of days to cap, basically an effective horizon. If nothing is passed then use the value in sched_params. @@ -146,7 +164,7 @@ def total_available_seconds(self, resources_scheduled=None, horizon_days=None): total_available_time (float): The dark intervals capped by the horizon. """ resources_scheduled = self.combined_resources_scheduled if resources_scheduled is None else resources_scheduled - horizon_days = self.effective_horizon if horizon_days is None else horizon_days + horizon_days = self.horizon_days if horizon_days is None else horizon_days total_available_time = 0 start_time = self.scheduler.estimated_scheduler_end end_time = start_time + dt.timedelta(days=horizon_days) @@ -160,7 +178,7 @@ def total_available_seconds(self, resources_scheduled=None, horizon_days=None): def percent_time_utilization(self, schedule=None, resources_scheduled=None, horizon_days=None): schedule = self.combined_schedule if schedule is None else schedule resources_scheduled = self.combined_resources_scheduled if resources_scheduled is None else resources_scheduled - horizon_days = self.effective_horizon if horizon_days is None else horizon_days + horizon_days = self.horizon_days if horizon_days is None else horizon_days return percent_of(self.total_scheduled_seconds(schedule), self.total_available_seconds(resources_scheduled, horizon_days)) @@ -211,15 +229,17 @@ def avg_ideal_airmass(self, schedule=None): return sum_ideal_airmass / count def _get_midpoint_airmasses_for_request(self, request_id, start_time, end_time): - """From the observation portal get the midpoint airmasses for one request. + """"Gets the midpoint airmasses by site for a request. This is done by finding the time + closest matching the calculated midpoint of the observation in the observe portal airmass data. Args: - request_id (integer): The id of the request we want to get airmass data of. - start_time (datetime): The start time of the scheduled observation. - end_time (datetime): The end time of the scheduled observation. + request_id (int): The id of the request we want to get airmass data of. + start_time (datetime.datetime): The start time of the scheduled observation. + end_time (datetime.datetime): The end time of the scheduled observation. Returns: - midpoint_airmasses (dictionary): A dictionaory with observation sites as keys and corresponding midpoint airmasses as values. + midpoint_airmasses (str: float): A dictionary with observation sites as keys and corresponding + midpoint airmasses as values. """ midpoint_airmasses = {} midpoint_time = start_time + (end_time - start_time) / 2 @@ -241,7 +261,7 @@ def _get_midpoint_airmasses_for_request(self, request_id, start_time, end_time): return midpoint_airmasses def avg_midpoint_airmass(self, schedule=None): - """Calculate the average midpoint airmass of all scheudled reservations for a single schedule. + """Calculate the average midpoint airmass of all scheduled reservations for a single schedule. Args: schedule (scheduler, optional): the schedule we calculate our metricses on. Defaults to None. @@ -271,6 +291,7 @@ def avg_midpoint_airmass(self, schedule=None): return sum_midpoint_airmass / count def tac_priority_histogram(self, schedule=None): + """Bins TAC Priority into the following bins: '10-19', '20-29', '30-39', '1000'.""" schedule = self.combined_schedule if schedule is None else schedule bin_size = 10 tac_priority_values = [] diff --git a/adaptive_scheduler/simulation/orchestrator.py b/adaptive_scheduler/simulation/orchestrator.py index 89800b54..20dde798 100644 --- a/adaptive_scheduler/simulation/orchestrator.py +++ b/adaptive_scheduler/simulation/orchestrator.py @@ -111,6 +111,7 @@ def record_metrics(normal_scheduler_result, rr_scheduler_result, scheduler, sche 'record_time': datetime.utcnow().isoformat(), 'total_scheduled_count': metrics.count_scheduled()[0], + 'total_request_count': metrics.count_scheduled()[1], 'percent_reservations_scheduled': metrics.percent_reservations_scheduled(), 'total_scheduled_seconds': metrics.total_scheduled_seconds(), 'total_available_seconds': metrics.total_available_seconds(), diff --git a/tests/test_simulator_metrics.py b/tests/test_simulator_metrics.py index 82414374..cac97064 100644 --- a/tests/test_simulator_metrics.py +++ b/tests/test_simulator_metrics.py @@ -15,16 +15,18 @@ def setup(self): self.end = self.start + timedelta(minutes=90) self.scheduler_run_time = datetime.utcnow() scheduler_result_attrs = {'resources_scheduled.return_value': ['bpl', 'coj']} - self.mock_scheduler_result = Mock(**scheduler_result_attrs) + res1 = Mock(duration=10, scheduled=True) + res2 = Mock(duration=20, scheduled=True) + res3 = Mock(duration=30, scheduled=True) + res4 = Mock(scheduled=False) + res5 = Mock(scheduled=False) + fake_schedule = {'bpl': [res1, res2], 'coj': [res3]} + fake_comp_res = Mock(reservation_list=[res1, res2, res3, res4, res5]) + self.mock_scheduler_result = Mock(input_reservations=[fake_comp_res], **scheduler_result_attrs) self.mock_scheduler = Mock(estimated_scheduler_end=self.scheduler_run_time) self.mock_scheduler_runner = Mock(semester_details={'start': self.start}) - self.mock_scheduler_runner.sched_params.metric_effective_horizon = 5 + self.mock_scheduler_runner.sched_params.horizon_days = 5 - # self.mock_scheduler_runner = start - res1 = Mock(duration=10) - res2 = Mock(duration=20) - res3 = Mock(duration=30) - fake_schedule = {'bpl': [res1, res2], 'coj': [res3]} self.mock_scheduler_result.schedule = fake_schedule self.mock_scheduler.visibility_cache = {'bpl': Mock(), 'coj': Mock()} @@ -45,8 +47,11 @@ def test_combining_schedules(self): scheduler_result_attrs = {'resources_scheduled.return_value': ['bpl', 'coj', 'ogg']} fake_schedule1 = {'bpl': ['hi', 'there'], 'coj': ['person']} fake_schedule2 = {'ogg': ['lco', 'rocks'], 'coj': ['woohoo!']} + fake_input = [Mock(reservation_list=['foo', 'bar'])] mock_normal_scheduler_result = Mock(schedule=fake_schedule1, **scheduler_result_attrs) + mock_normal_scheduler_result.input_reservations = fake_input mock_rr_scheduler_result = Mock(schedule=fake_schedule2, **scheduler_result_attrs) + mock_rr_scheduler_result.input_reservations = fake_input only_normal = MetricCalculator(mock_normal_scheduler_result, None, self.mock_scheduler, self.mock_scheduler_runner) @@ -65,16 +70,11 @@ def test_percent_scheduled(self): scheduled_reservation = Mock(scheduled=True) unscheduled_reservation = Mock(scheduled=False) - all_scheduled = {'bpl': [scheduled_reservation]} - half_scheduled = {'bpl': [scheduled_reservation, unscheduled_reservation]} - none_scheduled = {'bpl': [unscheduled_reservation]} - multiple_sites = {'bpl': [scheduled_reservation, unscheduled_reservation], - 'coj': [scheduled_reservation, scheduled_reservation]} + mock_schedule = {'bpl': [scheduled_reservation], 'coj': [scheduled_reservation, scheduled_reservation]} + mock_scheduler_input = [unscheduled_reservation, scheduled_reservation, scheduled_reservation, scheduled_reservation] - assert self.metrics.percent_reservations_scheduled(all_scheduled) == 100. - assert self.metrics.percent_reservations_scheduled(half_scheduled) == 50. - assert self.metrics.percent_reservations_scheduled(none_scheduled) == 0. - assert self.metrics.percent_reservations_scheduled(multiple_sites) == 75. + assert self.metrics.percent_reservations_scheduled(mock_scheduler_input, mock_schedule) == 75. + assert self.metrics.percent_reservations_scheduled() == 60. def test_total_time_aggregators(self): seconds_in_day = 86400 From 0b1de91e1ec33bcaa7e6399e06b2775ed87ebed6 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Tue, 12 Jul 2022 19:47:48 +0000 Subject: [PATCH 052/165] updated binning function to bin additional data --- adaptive_scheduler/simulation/metrics.py | 59 +++++++++--------------- tests/test_simulator_metrics.py | 35 ++++++++------ 2 files changed, 43 insertions(+), 51 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index b40521fd..c6c88d8a 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -45,28 +45,40 @@ def generate_bin_names(bin_size, bin_range): return bin_names -def bin_data(data, bin_size=1, bin_range=None): +def bin_data(bin_by, data=[], bin_size=1, bin_range=None, aggregation=sum): """Bins data to create a histogram. Currently only supports integer bin resolution. Float input is casted to an integer for counting. Args: - data (list): The input data can be float or int. + bin_by (list): A list of data to bin by. Can be float or int. + data (list): Additional data points associated with the data to bin by. If the lengths are + mismatched, you will get an IndexError if the data list is too short. If it is too long, + extra values are thrown out. The aggregation function is applied to the data at the end. bin_size (int): The width of the bins. bin_range (int, int): Override the bin ranges. Otherwise, use the min/max of the data. + aggregation (func): The aggregation function to apply over the list of data. Must be callable. Returns: data_dict (str: int): The frequency count of the data. """ - bin_range = (min(data), max(data)) if bin_range is None else bin_range - data_dict = {bin_name: 0 for bin_name in generate_bin_names(bin_size, bin_range)} - for i in data: - if i < bin_range[0] or i > bin_range[1]+1: + if bin_range is None: + bin_range = (int(min(bin_by)), int(max(bin_by))) + else: + (int(bin_range[0]), int(bin_range[1])) + + bin_dict = {bin_name: [] for bin_name in generate_bin_names(bin_size, bin_range)} + + for i, value in enumerate(bin_by): + if value < bin_range[0] or value > bin_range[1]+1: continue - index = int((i - bin_range[0]) / bin_size) - keyname = list(data_dict)[index] - data_dict[keyname] += 1 - data_dict = {key: val for key, val in data_dict.items() if val != 0} - return data_dict + index = int((value - bin_range[0]) / bin_size) + keyname = list(bin_dict)[index] + if data: + bin_dict[keyname].append(data[i]) + else: + bin_dict[keyname].append(1) + bin_dict = {key: aggregation(val) for key, val in bin_dict.items() if val} + return bin_dict class MetricCalculator(): @@ -299,28 +311,3 @@ def tac_priority_histogram(self, schedule=None): for reservation in reservations: tac_priority_values.append(reservation.request_group.proposal.tac_priority) return bin_data(tac_priority_values, bin_size=bin_size) - - -def reservation_data_populator(reservation): - """Creates a new data container containing parameters useful in calculating metrics. - - Args: - reservation (Reservation_v3): A Reservation object (obtained from the values of Scheduler.schedule). - - Returns: - data (DataContainer): An object with data values of interest as attributes. - """ - request_group = reservation.request_group - proposal = request_group.proposal - - data = DataContainer( - request_group_id=reservation.request_group.id, - request_id=reservation.request.id, - duration=reservation.duration, - scheduled_resource=reservation.scheduled_resource, - scheduled=reservation.scheduled, - scheduled_start=reservation.scheduled_start, - ipp_value=reservation.request_group.ipp_value, - tac_priority=proposal.tac_priority, - ) - return data diff --git a/tests/test_simulator_metrics.py b/tests/test_simulator_metrics.py index cac97064..f4fa584d 100644 --- a/tests/test_simulator_metrics.py +++ b/tests/test_simulator_metrics.py @@ -1,11 +1,11 @@ from adaptive_scheduler.simulation.metrics import (MetricCalculator, bin_data) -from adaptive_scheduler.models import DataContainer import os import json from datetime import datetime, timedelta -from mock import Mock, patch + +from mock import Mock class TestMetrics(): @@ -94,21 +94,26 @@ def test_percent_time_utilization(self): assert self.metrics.percent_time_utilization() == 60/(86400*4)*100 def test_bin_data(self): - data = [1, 3, 4, 2, 6, 5, 3, 2, 3, 4, 7, 9, 3, 8, 6, 4] - data2 = [0.5, 3.7, 2.8, 6.9, 1.8] + bin_by = [1, 3, 4, 2, 6, 5, 3, 2, 3, 4, 7, 9, 3, 8, 6, 4] + bin_data_ = [1, 2, 3, 4, 5, 6, 7, 8, 9, 8, 7, 6, 5, 4, 3, 2] + bin_by_float = [0.5, 2.1, 2.8, 6.9, 1.8] bin_range = (1, 9) - expected1 = {'1-3': 7, '4-6': 6, '7-9': 3} - expected2 = {'1': 1, '2': 2, '3': 4, '4': 3, '5': 1, '6': 2, '7': 1, '8': 1, '9': 1} - expected3 = {'1-2': 3, '3-4': 7, '5-6': 3, '7-8': 2, '9': 1} - expected4 = {'0': 1, '1': 1, '2': 1, '3': 1, '6': 1} - expected5 = {'0': 1, '1': 1, '2': 1, '3': 1} - - assert bin_data(data, 3, bin_range) == expected1 - assert bin_data(data) == expected2 - assert bin_data(data, 2) == expected3 - assert bin_data(data2) == expected4 - assert bin_data(data2, bin_range=(0, 4)) == expected5 + allparams = {'1-3': 7, '4-6': 6, '7-9': 3} + defaults = {'1': 1, '2': 2, '3': 4, '4': 3, '5': 1, '6': 2, '7': 1, '8': 1, '9': 1} + unevenbins = {'1-2': 3, '3-4': 7, '5-6': 3, '7-8': 2, '9': 1} + floats = {'0': 1, '1': 1, '2': 2, '6': 1} + capped_floats = {'0': 1, '1': 1, '2': 2} + sumdata = {'1-3': 36, '4-6': 27, '7-9': 17} + mindata = {'1-3': 1, '4-6': 2, '7-9': 4} + + assert bin_data(bin_by, bin_size=3, bin_range=bin_range) == allparams + assert bin_data(bin_by) == defaults + assert bin_data(bin_by, bin_size=2) == unevenbins + assert bin_data(bin_by_float) == floats + assert bin_data(bin_by_float, bin_range=(0, 4)) == capped_floats + assert bin_data(bin_by, bin_data_, bin_size=3) == sumdata + assert bin_data(bin_by, bin_data_, bin_size=3, aggregation=min) == mindata def test_airmass_functions(self): dir_path = os.path.dirname(os.path.realpath(__file__)) From 3700fcc19706c1106cd75d8a1a68cd53fdf3ca89 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Tue, 12 Jul 2022 21:16:50 +0000 Subject: [PATCH 053/165] updated binning function to handle floats --- adaptive_scheduler/simulation/metrics.py | 32 ++++++++++++------------ tests/test_simulator_metrics.py | 4 ++- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index c6c88d8a..87197fc6 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -5,12 +5,12 @@ from datetime import datetime from collections import defaultdict +import numpy as np import requests from requests.exceptions import RequestException, Timeout from adaptive_scheduler.observation_portal_connections import ObservationPortalConnectionError from adaptive_scheduler.utils import time_in_capped_intervals, normalised_epoch_to_datetime, datetime_to_epoch -from adaptive_scheduler.models import DataContainer def percent_of(x, y): @@ -27,16 +27,18 @@ def percent_diff(x, y): def generate_bin_names(bin_size, bin_range): - """Creates bins named 'start-end' for dictionary keys.""" - start = int(bin_range[0]) - end = int(bin_range[1]) - if bin_size == 1: - return [str(n) for n in range(start, end+1)] + """Creates labels for the bins.""" + start = bin_range[0] + end = bin_range[1] bin_names = [] - bin_start = list(range(start, end+1, bin_size)) + bin_start = np.arange(start, end+1, bin_size) for start_num in bin_start: - end_num = start_num + bin_size - 1 - end_num = end_num if end_num < end else end + if np.issubdtype(bin_start.dtype, np.integer): + end_num = start_num + bin_size - 1 + end_num = end_num if end_num < end else end + else: + end_num = start_num + bin_size + end_num = end_num if end_num < end else float(end) if end_num == start_num: bin_name = str(start_num) else: @@ -46,8 +48,10 @@ def generate_bin_names(bin_size, bin_range): def bin_data(bin_by, data=[], bin_size=1, bin_range=None, aggregation=sum): - """Bins data to create a histogram. Currently only supports integer bin resolution. - Float input is casted to an integer for counting. + """Bins data to create a histogram. Each bin is half-open, i.e. defined on the interval [a, b) for every bin + except for the last bin, which is defined on the interval [a, b]. The naming convention is different for + integers and floats. For example, for the label '1-2', this means the discrete values 1 and 2, whereas + for the label '1.0-2.0' this means the values on the interval [1.0, 2.0). Args: bin_by (list): A list of data to bin by. Can be float or int. @@ -61,11 +65,7 @@ def bin_data(bin_by, data=[], bin_size=1, bin_range=None, aggregation=sum): Returns: data_dict (str: int): The frequency count of the data. """ - if bin_range is None: - bin_range = (int(min(bin_by)), int(max(bin_by))) - else: - (int(bin_range[0]), int(bin_range[1])) - + bin_range = (min(bin_by), max(bin_by)) if bin_range is None else bin_range bin_dict = {bin_name: [] for bin_name in generate_bin_names(bin_size, bin_range)} for i, value in enumerate(bin_by): diff --git a/tests/test_simulator_metrics.py b/tests/test_simulator_metrics.py index f4fa584d..1e31b372 100644 --- a/tests/test_simulator_metrics.py +++ b/tests/test_simulator_metrics.py @@ -102,7 +102,8 @@ def test_bin_data(self): allparams = {'1-3': 7, '4-6': 6, '7-9': 3} defaults = {'1': 1, '2': 2, '3': 4, '4': 3, '5': 1, '6': 2, '7': 1, '8': 1, '9': 1} unevenbins = {'1-2': 3, '3-4': 7, '5-6': 3, '7-8': 2, '9': 1} - floats = {'0': 1, '1': 1, '2': 2, '6': 1} + floatbinsize = {'0.0-2.5': 3, '2.5-5.0': 7, '5.0-7.5': 4, '7.5-9.0': 1} + floats = {'0.5-1.5': 1, '1.5-2.5': 2, '2.5-3.5': 1, '6.5-6.9': 1} capped_floats = {'0': 1, '1': 1, '2': 2} sumdata = {'1-3': 36, '4-6': 27, '7-9': 17} mindata = {'1-3': 1, '4-6': 2, '7-9': 4} @@ -110,6 +111,7 @@ def test_bin_data(self): assert bin_data(bin_by, bin_size=3, bin_range=bin_range) == allparams assert bin_data(bin_by) == defaults assert bin_data(bin_by, bin_size=2) == unevenbins + assert bin_data(bin_by, bin_size=2.5, bin_range=(0, 9)) == floatbinsize assert bin_data(bin_by_float) == floats assert bin_data(bin_by_float, bin_range=(0, 4)) == capped_floats assert bin_data(bin_by, bin_data_, bin_size=3) == sumdata From 47bb9749886fbe500c138c2b367e6903bc3e440a Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Tue, 12 Jul 2022 14:57:34 -0700 Subject: [PATCH 054/165] add different metrics for midpoint airmass and resolve merge conflicts --- adaptive_scheduler/simulation/metrics.py | 12 +++++++++--- adaptive_scheduler/simulation/orchestrator.py | 10 ++++++---- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index 87197fc6..ae24f913 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -4,7 +4,6 @@ import datetime as dt from datetime import datetime from collections import defaultdict - import numpy as np import requests from requests.exceptions import RequestException, Timeout @@ -272,7 +271,7 @@ def _get_midpoint_airmasses_for_request(self, request_id, start_time, end_time): midpoint_airmasses[site] = midpoint_airmass return midpoint_airmasses - def avg_midpoint_airmass(self, schedule=None): + def midpoint_airmass_metrics(self, schedule=None): """Calculate the average midpoint airmass of all scheduled reservations for a single schedule. Args: @@ -284,6 +283,7 @@ def avg_midpoint_airmass(self, schedule=None): schedule = self.combined_schedule if schedule is None else schedule semester_start = self.scheduler_runner.semester_details['start'] midpoint_airmass_for_each_reservation = [] + duration_for_each_reservation = [] sum_midpoint_airmass = 0 count = 0 for reservations in schedule.values(): @@ -297,10 +297,16 @@ def avg_midpoint_airmass(self, schedule=None): midpoint_airmasses = self._get_midpoint_airmasses_for_request(request_id, start_time, end_time) site = reservation.scheduled_resource[-3:] midpoint_airmass = midpoint_airmasses[site] + duration_for_each_reservation.append(reservation.duration) midpoint_airmass_for_each_reservation.append(midpoint_airmass) sum_midpoint_airmass += midpoint_airmass count += 1 - return sum_midpoint_airmass / count + return {'avg_midpoint_airmass':(sum_midpoint_airmass / count), + 'confidence_interval_midpoint_airmass':[np.percentile(midpoint_airmass_for_each_reservation, 2.5), + np.percentile(midpoint_airmass_for_each_reservation, 97.5)], + 'duration_vs_midpoint_airmass': {'duration': duration_for_each_reservation, + 'midpoint_airmass': midpoint_airmass_for_each_reservation}} + def tac_priority_histogram(self, schedule=None): """Bins TAC Priority into the following bins: '10-19', '20-29', '30-39', '1000'.""" diff --git a/adaptive_scheduler/simulation/orchestrator.py b/adaptive_scheduler/simulation/orchestrator.py index 20dde798..69d438b7 100644 --- a/adaptive_scheduler/simulation/orchestrator.py +++ b/adaptive_scheduler/simulation/orchestrator.py @@ -91,7 +91,6 @@ def send_to_opensearch(os_url, os_index, metrics): log.info(f"Successfully saved metrics for {metrics['simulation_id']}") else: log.warning("Not configured to save metrics in opensearch. Please set OPENSEARCH_URL and SIMULATION_OPENSEARCH_INDEX.") - log.info(metrics) # send to output for now def record_metrics(normal_scheduler_result, rr_scheduler_result, scheduler, scheduler_runner): @@ -100,7 +99,7 @@ def record_metrics(normal_scheduler_result, rr_scheduler_result, scheduler, sche metrics = MetricCalculator(normal_scheduler_result, rr_scheduler_result, scheduler, scheduler_runner) observation_portal_interface = scheduler_runner.network_interface.observation_portal_interface sched_params = scheduler_runner.sched_params - + midpoint_airmass_metricses = metrics.midpoint_airmass_metrics() metrics = { 'simulation_id': RUN_ID, 'simulation_start_time': sched_params.simulate_now, @@ -118,9 +117,12 @@ def record_metrics(normal_scheduler_result, rr_scheduler_result, scheduler, sche 'percent_time_utilization': metrics.percent_time_utilization(), 'tac_priority_histogram': metrics.tac_priority_histogram(), 'avg_ideal_airmass': metrics.avg_ideal_airmass(), - 'avg_midpoint_airmass': metrics.avg_midpoint_airmass(), + 'avg_midpoint_airmass':midpoint_airmass_metricses['avg_midpoint_airmass'], + 'confidence_interval_midpoint_airmass': midpoint_airmass_metricses['confidence_interval_midpoint_airmass'], + 'duration_vs_midpoint_airmass': midpoint_airmass_metricses['duration_vs_midpoint_airmass'], } - send_to_opensearch(sched_params.opensearch_url, sched_params.simulation_opensearch_index, metrics) + log.info(metrics) + # send_to_opensearch(sched_params.opensearch_url, sched_params.simulation_opensearch_index, metrics) def main(argv=None): From 55feb9063775968f385032bfeb700cffe383d218 Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Tue, 12 Jul 2022 16:19:30 -0700 Subject: [PATCH 055/165] merge airmass metrics into one function call --- adaptive_scheduler/simulation/metrics.py | 57 ++++++++++++------- adaptive_scheduler/simulation/orchestrator.py | 13 +++-- 2 files changed, 45 insertions(+), 25 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index ae24f913..636fa2cd 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -226,18 +226,27 @@ def _get_ideal_airmass_for_request(self, request_id): ideal_airmass = min(ideal_airmass, ideal_for_site) return ideal_airmass - def avg_ideal_airmass(self, schedule=None): - """Calculates the average ideal airmass for scheduled observations.""" - schedule = self.combined_schedule if schedule is None else schedule - sum_ideal_airmass = 0 - count = 0 - for reservations in schedule.values(): - for reservation in reservations: - if reservation.scheduled: - request_id = reservation.request.id - sum_ideal_airmass += self._get_ideal_airmass_for_request(request_id) - count += 1 - return sum_ideal_airmass / count + # def avg_ideal_airmass(self, schedule=None): + # """Calculates the average ideal airmass for scheduled observations.""" + # ideal_airmass_for_each_reservation = [] + # duration_for_each_reservation = [] + # schedule = self.combined_schedule if schedule is None else schedule + # sum_ideal_airmass = 0 + # count = 0 + # for reservations in schedule.values(): + # for reservation in reservations: + # if reservation.scheduled: + # request_id = reservation.request.id + # ideal_airmass = self._get_ideal_airmass_for_request(request_id) + # sum_ideal_airmass += ideal_airmass + # ideal_airmass_for_each_reservation.append(ideal_airmass) + # count += 1 + # return {'avg_ideal_airmass':(sum_ideal_airmass / count), + # 'confidence_interval_midpoint_airmass':[[np.percentile(midpoint_airmass_for_each_reservation, 2.5), + # np.percentile(midpoint_airmass_for_each_reservation, 97.5)]], + # 'duration_vs_midpoint_airmass': [{'duration': duration_for_each_reservation, + # 'midpoint_airmass': midpoint_airmass_for_each_reservation}]} + def _get_midpoint_airmasses_for_request(self, request_id, start_time, end_time): """"Gets the midpoint airmasses by site for a request. This is done by finding the time @@ -271,7 +280,7 @@ def _get_midpoint_airmasses_for_request(self, request_id, start_time, end_time): midpoint_airmasses[site] = midpoint_airmass return midpoint_airmasses - def midpoint_airmass_metrics(self, schedule=None): + def airmass_metrics(self, schedule=None): """Calculate the average midpoint airmass of all scheduled reservations for a single schedule. Args: @@ -285,6 +294,8 @@ def midpoint_airmass_metrics(self, schedule=None): midpoint_airmass_for_each_reservation = [] duration_for_each_reservation = [] sum_midpoint_airmass = 0 + sum_ideal_airmass = 0 + ideal_airmass_for_each_reservation = [] count = 0 for reservations in schedule.values(): for reservation in reservations: @@ -297,15 +308,23 @@ def midpoint_airmass_metrics(self, schedule=None): midpoint_airmasses = self._get_midpoint_airmasses_for_request(request_id, start_time, end_time) site = reservation.scheduled_resource[-3:] midpoint_airmass = midpoint_airmasses[site] - duration_for_each_reservation.append(reservation.duration) midpoint_airmass_for_each_reservation.append(midpoint_airmass) sum_midpoint_airmass += midpoint_airmass + ideal_airmass = self._get_ideal_airmass_for_request(request_id) + ideal_airmass_for_each_reservation.append(ideal_airmass) + sum_ideal_airmass += ideal_airmass + duration_for_each_reservation.append(reservation.duration) count += 1 - return {'avg_midpoint_airmass':(sum_midpoint_airmass / count), - 'confidence_interval_midpoint_airmass':[np.percentile(midpoint_airmass_for_each_reservation, 2.5), - np.percentile(midpoint_airmass_for_each_reservation, 97.5)], - 'duration_vs_midpoint_airmass': {'duration': duration_for_each_reservation, - 'midpoint_airmass': midpoint_airmass_for_each_reservation}} + + airmass_data = {'raw_airmass_data': [{'midpoint_airmasses': midpoint_airmass_for_each_reservation}, + {'ideal_airmasses': ideal_airmass_for_each_reservation}, + {'durations': duration_for_each_reservation},], + 'avg_midpoint_airmass': (sum_midpoint_airmass / count), + 'avg_ideal_airmass': (sum_ideal_airmass / count), + 'ci_midpoint_airmass': [[np.percentile(midpoint_airmass_for_each_reservation, 2.5), + np.percentile(midpoint_airmass_for_each_reservation, 97.5)]], + } + return airmass_data def tac_priority_histogram(self, schedule=None): diff --git a/adaptive_scheduler/simulation/orchestrator.py b/adaptive_scheduler/simulation/orchestrator.py index 69d438b7..65598c59 100644 --- a/adaptive_scheduler/simulation/orchestrator.py +++ b/adaptive_scheduler/simulation/orchestrator.py @@ -99,7 +99,7 @@ def record_metrics(normal_scheduler_result, rr_scheduler_result, scheduler, sche metrics = MetricCalculator(normal_scheduler_result, rr_scheduler_result, scheduler, scheduler_runner) observation_portal_interface = scheduler_runner.network_interface.observation_portal_interface sched_params = scheduler_runner.sched_params - midpoint_airmass_metricses = metrics.midpoint_airmass_metrics() + airmass_metrics = metrics.airmass_metrics() metrics = { 'simulation_id': RUN_ID, 'simulation_start_time': sched_params.simulate_now, @@ -116,13 +116,14 @@ def record_metrics(normal_scheduler_result, rr_scheduler_result, scheduler, sche 'total_available_seconds': metrics.total_available_seconds(), 'percent_time_utilization': metrics.percent_time_utilization(), 'tac_priority_histogram': metrics.tac_priority_histogram(), - 'avg_ideal_airmass': metrics.avg_ideal_airmass(), - 'avg_midpoint_airmass':midpoint_airmass_metricses['avg_midpoint_airmass'], - 'confidence_interval_midpoint_airmass': midpoint_airmass_metricses['confidence_interval_midpoint_airmass'], - 'duration_vs_midpoint_airmass': midpoint_airmass_metricses['duration_vs_midpoint_airmass'], + 'airmass_metrics': airmass_metrics, + # 'avg_ideal_airmass': metrics.avg_ideal_airmass(), + # 'avg_midpoint_airmass':midpoint_airmass_metricses['avg_midpoint_airmass'], + # 'confidence_interval_midpoint_airmass': midpoint_airmass_metricses['confidence_interval_midpoint_airmass'], + # 'duration_vs_midpoint_airmass': midpoint_airmass_metricses['duration_vs_midpoint_airmass'], } log.info(metrics) - # send_to_opensearch(sched_params.opensearch_url, sched_params.simulation_opensearch_index, metrics) + send_to_opensearch(sched_params.opensearch_url, sched_params.simulation_opensearch_index, metrics) def main(argv=None): From 65c6cca4b967cc94cf943aab83c0d2ac7b3db477 Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Tue, 12 Jul 2022 16:21:37 -0700 Subject: [PATCH 056/165] metrics code cleaning --- adaptive_scheduler/simulation/metrics.py | 27 +----------------------- 1 file changed, 1 insertion(+), 26 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index 636fa2cd..cf2d0a50 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -212,7 +212,6 @@ def _get_airmass_data_from_observation_portal(self, request_id): self.airmass_data_by_request_id[request_id] = airmass_data except (RequestException, ValueError, Timeout) as e: raise ObservationPortalConnectionError("get_airmass_data failed: {}".format(repr(e))) - return airmass_data def _get_ideal_airmass_for_request(self, request_id): @@ -226,28 +225,6 @@ def _get_ideal_airmass_for_request(self, request_id): ideal_airmass = min(ideal_airmass, ideal_for_site) return ideal_airmass - # def avg_ideal_airmass(self, schedule=None): - # """Calculates the average ideal airmass for scheduled observations.""" - # ideal_airmass_for_each_reservation = [] - # duration_for_each_reservation = [] - # schedule = self.combined_schedule if schedule is None else schedule - # sum_ideal_airmass = 0 - # count = 0 - # for reservations in schedule.values(): - # for reservation in reservations: - # if reservation.scheduled: - # request_id = reservation.request.id - # ideal_airmass = self._get_ideal_airmass_for_request(request_id) - # sum_ideal_airmass += ideal_airmass - # ideal_airmass_for_each_reservation.append(ideal_airmass) - # count += 1 - # return {'avg_ideal_airmass':(sum_ideal_airmass / count), - # 'confidence_interval_midpoint_airmass':[[np.percentile(midpoint_airmass_for_each_reservation, 2.5), - # np.percentile(midpoint_airmass_for_each_reservation, 97.5)]], - # 'duration_vs_midpoint_airmass': [{'duration': duration_for_each_reservation, - # 'midpoint_airmass': midpoint_airmass_for_each_reservation}]} - - def _get_midpoint_airmasses_for_request(self, request_id, start_time, end_time): """"Gets the midpoint airmasses by site for a request. This is done by finding the time closest matching the calculated midpoint of the observation in the observe portal airmass data. @@ -270,7 +247,6 @@ def _get_midpoint_airmasses_for_request(self, request_id, start_time, end_time): times, airmasses = list(details.values())[0], list(details.values())[1] index = 0 time_diff = abs((midpoint_time - datetime.strptime(times[0], '%Y-%m-%dT%H:%M')).total_seconds()) - for i, _ in enumerate(times): temp_time_diff = abs((midpoint_time - datetime.strptime(times[i], '%Y-%m-%dT%H:%M')).total_seconds()) if temp_time_diff < time_diff: @@ -314,8 +290,7 @@ def airmass_metrics(self, schedule=None): ideal_airmass_for_each_reservation.append(ideal_airmass) sum_ideal_airmass += ideal_airmass duration_for_each_reservation.append(reservation.duration) - count += 1 - + count += 1 airmass_data = {'raw_airmass_data': [{'midpoint_airmasses': midpoint_airmass_for_each_reservation}, {'ideal_airmasses': ideal_airmass_for_each_reservation}, {'durations': duration_for_each_reservation},], From 87eb7d6acf85cfdf4c559c1b4eeb7517b0775820 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Tue, 12 Jul 2022 23:35:56 +0000 Subject: [PATCH 057/165] merge and clean up airmass function --- adaptive_scheduler/simulation/metrics.py | 87 +++++++++++-------- adaptive_scheduler/simulation/orchestrator.py | 17 ++-- 2 files changed, 61 insertions(+), 43 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index cf2d0a50..4109abdf 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -2,7 +2,7 @@ Metric calculation functions for the scheduler simulator. """ import datetime as dt -from datetime import datetime +from datetime import datetime, timedelta from collections import defaultdict import numpy as np import requests @@ -59,7 +59,7 @@ def bin_data(bin_by, data=[], bin_size=1, bin_range=None, aggregation=sum): extra values are thrown out. The aggregation function is applied to the data at the end. bin_size (int): The width of the bins. bin_range (int, int): Override the bin ranges. Otherwise, use the min/max of the data. - aggregation (func): The aggregation function to apply over the list of data. Must be callable. + aggregation (func): The aggregation function to apply over the list of data. Must be callable on an array. Returns: data_dict (str: int): The frequency count of the data. @@ -154,6 +154,14 @@ def percent_reservations_scheduled(self, input_reservations=None, schedule=None) scheduled, total = self.count_scheduled(input_reservations, schedule) return percent_of(scheduled, total) + def total_scheduled_eff_priority(self, schedule=None): + schedule = self.combined_schedule if schedule is None else schedule + effective_priorities = [] + for reservations in schedule.values(): + for reservation in reservations: + effective_priorities.append(reservation.priority) + return sum(effective_priorities), effective_priorities + def total_scheduled_seconds(self, schedule=None): schedule = self.combined_schedule if schedule is None else schedule total_scheduled_seconds = 0 @@ -178,7 +186,7 @@ def total_available_seconds(self, resources_scheduled=None, horizon_days=None): horizon_days = self.horizon_days if horizon_days is None else horizon_days total_available_time = 0 start_time = self.scheduler.estimated_scheduler_end - end_time = start_time + dt.timedelta(days=horizon_days) + end_time = start_time + timedelta(days=horizon_days) for resource in resources_scheduled: if resource in self.scheduler.visibility_cache: dark_intervals = self.scheduler.visibility_cache[resource].dark_intervals @@ -267,47 +275,52 @@ def airmass_metrics(self, schedule=None): """ schedule = self.combined_schedule if schedule is None else schedule semester_start = self.scheduler_runner.semester_details['start'] - midpoint_airmass_for_each_reservation = [] - duration_for_each_reservation = [] - sum_midpoint_airmass = 0 - sum_ideal_airmass = 0 - ideal_airmass_for_each_reservation = [] - count = 0 + midpoint_airmasses = [] + durations = [] + ideal_airmasses = [] for reservations in schedule.values(): for reservation in reservations: - if reservation.scheduled: - request = reservation.request - request_id = request.id - start_time = normalised_epoch_to_datetime(reservation.scheduled_start, - datetime_to_epoch(semester_start)) - end_time = start_time + dt.timedelta(seconds=reservation.duration) - midpoint_airmasses = self._get_midpoint_airmasses_for_request(request_id, start_time, end_time) - site = reservation.scheduled_resource[-3:] - midpoint_airmass = midpoint_airmasses[site] - midpoint_airmass_for_each_reservation.append(midpoint_airmass) - sum_midpoint_airmass += midpoint_airmass - ideal_airmass = self._get_ideal_airmass_for_request(request_id) - ideal_airmass_for_each_reservation.append(ideal_airmass) - sum_ideal_airmass += ideal_airmass - duration_for_each_reservation.append(reservation.duration) - count += 1 - airmass_data = {'raw_airmass_data': [{'midpoint_airmasses': midpoint_airmass_for_each_reservation}, - {'ideal_airmasses': ideal_airmass_for_each_reservation}, - {'durations': duration_for_each_reservation},], - 'avg_midpoint_airmass': (sum_midpoint_airmass / count), - 'avg_ideal_airmass': (sum_ideal_airmass / count), - 'ci_midpoint_airmass': [[np.percentile(midpoint_airmass_for_each_reservation, 2.5), - np.percentile(midpoint_airmass_for_each_reservation, 97.5)]], + request_id = reservation.request.id + start_time = normalised_epoch_to_datetime(reservation.scheduled_start, + datetime_to_epoch(semester_start)) + end_time = start_time + timedelta(seconds=reservation.duration) + midpoint_airmasses_for_request = self._get_midpoint_airmasses_for_request(request_id, start_time, end_time) + site = reservation.scheduled_resource[-3:] + midpoint_airmasses.append(midpoint_airmasses_for_request[site]) + ideal_airmass = self._get_ideal_airmass_for_request(request_id) + ideal_airmasses.append(ideal_airmass) + durations.append(reservation.duration) + airmass_data = {'raw_airmass_data': [{'midpoint_airmasses': midpoint_airmasses}, + {'ideal_airmasses': ideal_airmasses}, + {'durations': durations}], + 'avg_midpoint_airmass': sum(midpoint_airmasses)/len(midpoint_airmasses), + 'avg_ideal_airmass': sum(ideal_airmasses)/len(ideal_airmasses), + 'ci_midpoint_airmass': [[np.percentile(midpoint_airmasses, 2.5), + np.percentile(midpoint_airmasses, 97.5)]], } return airmass_data - - def tac_priority_histogram(self, schedule=None): + def binned_tac_priority_metrics(self, input_reservations=None, schedule=None): """Bins TAC Priority into the following bins: '10-19', '20-29', '30-39', '1000'.""" + input_reservations = self.combined_input_reservations if input_reservations is None else input_reservations schedule = self.combined_schedule if schedule is None else schedule bin_size = 10 - tac_priority_values = [] + sched_tac_priority_values = [] + sched_reservation_durations = [] + all_tac_priority_values = [] + all_reservation_durations = [] for reservations in schedule.values(): for reservation in reservations: - tac_priority_values.append(reservation.request_group.proposal.tac_priority) - return bin_data(tac_priority_values, bin_size=bin_size) + sched_tac_priority_values.append(reservation.request_group.proposal.tac_priority) + sched_reservation_durations.append(reservation.duration) + for reservation in input_reservations: + all_tac_priority_values.append(reservation.request_group.proposal.tac_priority) + all_reservation_durations.append(reservation.duration) + + output_dict = { + 'sched_histogram': bin_data(sched_tac_priority_values, bin_size=bin_size), + 'sched_durations': bin_data(sched_tac_priority_values, sched_reservation_durations, bin_size=bin_size), + 'full_histogram': bin_data(all_tac_priority_values, bin_size=bin_size), + 'all_durations': bin_data(all_tac_priority_values, all_reservation_durations, bin_size=bin_size), + } + return output_dict diff --git a/adaptive_scheduler/simulation/orchestrator.py b/adaptive_scheduler/simulation/orchestrator.py index 65598c59..d927018f 100644 --- a/adaptive_scheduler/simulation/orchestrator.py +++ b/adaptive_scheduler/simulation/orchestrator.py @@ -97,9 +97,13 @@ def record_metrics(normal_scheduler_result, rr_scheduler_result, scheduler, sche log.info("Recording metrics for scheduler simulation run") metrics = MetricCalculator(normal_scheduler_result, rr_scheduler_result, scheduler, scheduler_runner) - observation_portal_interface = scheduler_runner.network_interface.observation_portal_interface sched_params = scheduler_runner.sched_params +<<<<<<< Updated upstream airmass_metrics = metrics.airmass_metrics() +======= + binned_tac_priority_metrics = metrics.binned_tac_priority_metrics() + midpoint_airmass_metrics = metrics.midpoint_airmass_metrics() +>>>>>>> Stashed changes metrics = { 'simulation_id': RUN_ID, 'simulation_start_time': sched_params.simulate_now, @@ -109,18 +113,19 @@ def record_metrics(normal_scheduler_result, rr_scheduler_result, scheduler, sche 'mip_gap': sched_params.mip_gap, 'record_time': datetime.utcnow().isoformat(), + 'total_effective_priority': metrics.total_scheduled_eff_priority()[0], 'total_scheduled_count': metrics.count_scheduled()[0], 'total_request_count': metrics.count_scheduled()[1], 'percent_reservations_scheduled': metrics.percent_reservations_scheduled(), 'total_scheduled_seconds': metrics.total_scheduled_seconds(), 'total_available_seconds': metrics.total_available_seconds(), 'percent_time_utilization': metrics.percent_time_utilization(), - 'tac_priority_histogram': metrics.tac_priority_histogram(), 'airmass_metrics': airmass_metrics, - # 'avg_ideal_airmass': metrics.avg_ideal_airmass(), - # 'avg_midpoint_airmass':midpoint_airmass_metricses['avg_midpoint_airmass'], - # 'confidence_interval_midpoint_airmass': midpoint_airmass_metricses['confidence_interval_midpoint_airmass'], - # 'duration_vs_midpoint_airmass': midpoint_airmass_metricses['duration_vs_midpoint_airmass'], + 'scheduled_req_by_priority': [binned_tac_priority_metrics['sched_histogram']], + 'scheduled_seconds_by_priority': [binned_tac_priority_metrics['sched_durations']], + 'total_req_by_priority': [binned_tac_priority_metrics['full_histogram']], + 'total_seconds_by_priority': [binned_tac_priority_metrics['all_durations']], + 'avg_ideal_airmass': metrics.avg_ideal_airmass(), } log.info(metrics) send_to_opensearch(sched_params.opensearch_url, sched_params.simulation_opensearch_index, metrics) From 4ac744bbe729a636e0b9ae0d484bac0e33987beb Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Tue, 12 Jul 2022 23:38:37 +0000 Subject: [PATCH 058/165] minor edit in binning test --- tests/test_simulator_metrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_simulator_metrics.py b/tests/test_simulator_metrics.py index 1e31b372..707297ea 100644 --- a/tests/test_simulator_metrics.py +++ b/tests/test_simulator_metrics.py @@ -102,7 +102,7 @@ def test_bin_data(self): allparams = {'1-3': 7, '4-6': 6, '7-9': 3} defaults = {'1': 1, '2': 2, '3': 4, '4': 3, '5': 1, '6': 2, '7': 1, '8': 1, '9': 1} unevenbins = {'1-2': 3, '3-4': 7, '5-6': 3, '7-8': 2, '9': 1} - floatbinsize = {'0.0-2.5': 3, '2.5-5.0': 7, '5.0-7.5': 4, '7.5-9.0': 1} + floatbinsize = {'0.0-2.5': 3, '2.5-5.0': 7, '5.0-7.5': 4, '7.5-9.0': 2} floats = {'0.5-1.5': 1, '1.5-2.5': 2, '2.5-3.5': 1, '6.5-6.9': 1} capped_floats = {'0': 1, '1': 1, '2': 2} sumdata = {'1-3': 36, '4-6': 27, '7-9': 17} From 88be2155df19f077a1e20443c871ebd88b753208 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Tue, 12 Jul 2022 23:50:56 +0000 Subject: [PATCH 059/165] merged too fast (again) sadface --- adaptive_scheduler/simulation/orchestrator.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/adaptive_scheduler/simulation/orchestrator.py b/adaptive_scheduler/simulation/orchestrator.py index d927018f..7babf9cd 100644 --- a/adaptive_scheduler/simulation/orchestrator.py +++ b/adaptive_scheduler/simulation/orchestrator.py @@ -98,12 +98,9 @@ def record_metrics(normal_scheduler_result, rr_scheduler_result, scheduler, sche metrics = MetricCalculator(normal_scheduler_result, rr_scheduler_result, scheduler, scheduler_runner) sched_params = scheduler_runner.sched_params -<<<<<<< Updated upstream airmass_metrics = metrics.airmass_metrics() -======= binned_tac_priority_metrics = metrics.binned_tac_priority_metrics() - midpoint_airmass_metrics = metrics.midpoint_airmass_metrics() ->>>>>>> Stashed changes + metrics = { 'simulation_id': RUN_ID, 'simulation_start_time': sched_params.simulate_now, From 2555585b99c80c9cee9bcd8a38815c7d57cda480 Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Tue, 12 Jul 2022 16:52:22 -0700 Subject: [PATCH 060/165] merge conflict resolve --- adaptive_scheduler/simulation/orchestrator.py | 1 - tests/test_simulator_metrics.py | 4 +--- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/adaptive_scheduler/simulation/orchestrator.py b/adaptive_scheduler/simulation/orchestrator.py index 7babf9cd..fa9168d5 100644 --- a/adaptive_scheduler/simulation/orchestrator.py +++ b/adaptive_scheduler/simulation/orchestrator.py @@ -100,7 +100,6 @@ def record_metrics(normal_scheduler_result, rr_scheduler_result, scheduler, sche sched_params = scheduler_runner.sched_params airmass_metrics = metrics.airmass_metrics() binned_tac_priority_metrics = metrics.binned_tac_priority_metrics() - metrics = { 'simulation_id': RUN_ID, 'simulation_start_time': sched_params.simulate_now, diff --git a/tests/test_simulator_metrics.py b/tests/test_simulator_metrics.py index 707297ea..40b56fb7 100644 --- a/tests/test_simulator_metrics.py +++ b/tests/test_simulator_metrics.py @@ -141,6 +141,4 @@ def test_airmass_functions(self): assert self.metrics._get_midpoint_airmasses_for_request(1, self.start, self.end) == {'tfn': 7, 'egg': 3} assert self.metrics._get_ideal_airmass_for_request(2) == 1 - assert self.metrics.avg_ideal_airmass(schedule) == 2 - assert self.metrics.avg_midpoint_airmass(schedule) == 5 - assert self.metrics.avg_ideal_airmass() == float(5/3) + assert self.metrics.airmass_metrics == From 3ac3ed3ff3348f80704b9691e752d936450733db Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Tue, 12 Jul 2022 17:17:48 -0700 Subject: [PATCH 061/165] test --- install.Unix.sh | 0 tests/test_simulator_metrics.py | 13 +++++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) create mode 100644 install.Unix.sh diff --git a/install.Unix.sh b/install.Unix.sh new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_simulator_metrics.py b/tests/test_simulator_metrics.py index 40b56fb7..43c2b466 100644 --- a/tests/test_simulator_metrics.py +++ b/tests/test_simulator_metrics.py @@ -4,7 +4,7 @@ import os import json from datetime import datetime, timedelta - +import numpy as np from mock import Mock @@ -141,4 +141,13 @@ def test_airmass_functions(self): assert self.metrics._get_midpoint_airmasses_for_request(1, self.start, self.end) == {'tfn': 7, 'egg': 3} assert self.metrics._get_ideal_airmass_for_request(2) == 1 - assert self.metrics.airmass_metrics == + + airmass_metrics = self.metrics.airmass_metrics() + midpoint_airmasses = [7,3] + assert type (airmass_metrics) is dict + assert airmass_metrics.keys() == ['raw_airmass_data', 'avg_midpoint_airmass', + 'avg_ideal_airmass', 'ci_midpoint_airmass'] + assert airmass_metrics['avg_midpoint_airmass'] == 5 + assert airmass_metrics['avg_ideal_airmass'] == 2 + assert airmass_metrics['raw_airmass_data']['midpoint_airmasses'] == midpoint_airmasses + assert airmass_metrics['ci_midpoint_airmass'] == [[np.percentile(midpoint_airmasses, 2.5), np.percentile(midpoint_airmasses, 97.5)]] From e334ad67ece97e9c775d23bf77250c155d305ba5 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Wed, 13 Jul 2022 16:21:49 +0000 Subject: [PATCH 062/165] remove emacs autosave file --- tests/test_simulator_metrics.py~ | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 tests/test_simulator_metrics.py~ diff --git a/tests/test_simulator_metrics.py~ b/tests/test_simulator_metrics.py~ deleted file mode 100644 index b4663f9f..00000000 --- a/tests/test_simulator_metrics.py~ +++ /dev/null @@ -1,3 +0,0 @@ -from mock import Mock, patch - -import pytest From c728c10b124f0fe8f75ec605679748a9269eaa02 Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Wed, 13 Jul 2022 10:08:03 -0700 Subject: [PATCH 063/165] finished updating airmass metrics testings --- tests/test_simulator_metrics.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tests/test_simulator_metrics.py b/tests/test_simulator_metrics.py index 43c2b466..576c6769 100644 --- a/tests/test_simulator_metrics.py +++ b/tests/test_simulator_metrics.py @@ -126,9 +126,7 @@ def test_airmass_functions(self): with open(data_path_2) as f: airmass_data_2 = json.load(f) self.metrics._get_airmass_data_from_observation_portal = Mock(side_effect=[airmass_data_1, airmass_data_1, - airmass_data_1, airmass_data_2, - airmass_data_1, airmass_data_2, - airmass_data_1, airmass_data_2, + airmass_data_1, airmass_data_1, airmass_data_1, airmass_data_2]) request_1 = Mock(id=1) mock_reservation_1 = Mock(scheduled_start=0, scheduled_resource='1m0a.doma.tfn', @@ -142,12 +140,12 @@ def test_airmass_functions(self): assert self.metrics._get_midpoint_airmasses_for_request(1, self.start, self.end) == {'tfn': 7, 'egg': 3} assert self.metrics._get_ideal_airmass_for_request(2) == 1 - airmass_metrics = self.metrics.airmass_metrics() + airmass_metrics = self.metrics.airmass_metrics(schedule) midpoint_airmasses = [7,3] assert type (airmass_metrics) is dict - assert airmass_metrics.keys() == ['raw_airmass_data', 'avg_midpoint_airmass', + assert list(airmass_metrics.keys()) == ['raw_airmass_data', 'avg_midpoint_airmass', 'avg_ideal_airmass', 'ci_midpoint_airmass'] assert airmass_metrics['avg_midpoint_airmass'] == 5 assert airmass_metrics['avg_ideal_airmass'] == 2 - assert airmass_metrics['raw_airmass_data']['midpoint_airmasses'] == midpoint_airmasses + assert airmass_metrics['raw_airmass_data'][0]['midpoint_airmasses'] == midpoint_airmasses assert airmass_metrics['ci_midpoint_airmass'] == [[np.percentile(midpoint_airmasses, 2.5), np.percentile(midpoint_airmasses, 97.5)]] From 4bb8bf15172573c8aa6e12ed68015db97168cc3e Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Wed, 13 Jul 2022 17:29:30 +0000 Subject: [PATCH 064/165] small speed optimizations and additional priority metrics --- adaptive_scheduler/simulation/metrics.py | 80 ++++++++++--------- adaptive_scheduler/simulation/orchestrator.py | 8 +- tests/test_simulator_metrics.py | 8 +- 3 files changed, 52 insertions(+), 44 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index 4109abdf..6eac7924 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -13,7 +13,7 @@ def percent_of(x, y): - """Returns x/y as a percentage (float).""" + """Returns x/y as a percentage.""" return x / y * 100. @@ -46,7 +46,7 @@ def generate_bin_names(bin_size, bin_range): return bin_names -def bin_data(bin_by, data=[], bin_size=1, bin_range=None, aggregation=sum): +def bin_data(bin_by, data=[], bin_size=1, bin_range=None, aggregator=sum): """Bins data to create a histogram. Each bin is half-open, i.e. defined on the interval [a, b) for every bin except for the last bin, which is defined on the interval [a, b]. The naming convention is different for integers and floats. For example, for the label '1-2', this means the discrete values 1 and 2, whereas @@ -59,7 +59,8 @@ def bin_data(bin_by, data=[], bin_size=1, bin_range=None, aggregation=sum): extra values are thrown out. The aggregation function is applied to the data at the end. bin_size (int): The width of the bins. bin_range (int, int): Override the bin ranges. Otherwise, use the min/max of the data. - aggregation (func): The aggregation function to apply over the list of data. Must be callable on an array. + aggregator (func): The aggregation function to apply over the list of data. Must be callable on an array. + Additional items can be passed to the aggregation function. Returns: data_dict (str: int): The frequency count of the data. @@ -76,7 +77,7 @@ def bin_data(bin_by, data=[], bin_size=1, bin_range=None, aggregation=sum): bin_dict[keyname].append(data[i]) else: bin_dict[keyname].append(1) - bin_dict = {key: aggregation(val) for key, val in bin_dict.items() if val} + bin_dict = {key: aggregator(val) for key, val in bin_dict.items() if val} return bin_dict @@ -101,6 +102,7 @@ def __init__(self, normal_scheduler_result, rr_scheduler_result, scheduler, sche self.normal_scheduler_result = normal_scheduler_result self.normal_schedule = self.normal_scheduler_result.schedule self.normal_input_reservations = self.normal_scheduler_result.input_reservations + self.combined_schedule = defaultdict(dict) self.combined_input_reservations = [] if rr_scheduler_result: self.rr_scheduler_result = rr_scheduler_result @@ -125,19 +127,17 @@ def _combine_resources_scheduled(self): def _combine_normal_rr_schedules(self): self.combined_schedule = defaultdict(list) for resource, reservations in self.rr_schedule.items(): - for reservation in reservations: - self.combined_schedule[resource].append(reservation) + self.combined_schedule[resource].extend(reservations) for resource, reservations in self.normal_schedule.items(): - for reservation in reservations: - if reservation not in self.combined_schedule[resource]: - self.combined_schedule[resource].append(reservation) + reservations = [res for res in reservations if res not in self.combined_schedule[resource]] + self.combined_schedule[resource].extend(reservations) def _combine_normal_rr_input_reservations(self): for comp_res in self.normal_input_reservations: self.combined_input_reservations.extend(comp_res.reservation_list) for comp_res in self.rr_input_reservations: - res_list = [f for f in comp_res.reservation_list if f not in self.combined_input_reservations] - self.combined_input_reservations.extend(res_list) + reservations = [res for res in comp_res.reservation_list if res not in self.combined_input_reservations] + self.combined_input_reservations.extend(reservations) def count_scheduled(self, input_reservations=None, schedule=None): input_reservations = self.combined_input_reservations if input_reservations is None else input_reservations @@ -145,8 +145,7 @@ def count_scheduled(self, input_reservations=None, schedule=None): scheduled_reservations = [] for reservations in schedule.values(): scheduled_reservations.extend(reservations) - total_reservations = [res for res in input_reservations] - return len(scheduled_reservations), len(total_reservations) + return len(scheduled_reservations), len(input_reservations) def percent_reservations_scheduled(self, input_reservations=None, schedule=None): input_reservations = self.combined_input_reservations if input_reservations is None else input_reservations @@ -158,17 +157,15 @@ def total_scheduled_eff_priority(self, schedule=None): schedule = self.combined_schedule if schedule is None else schedule effective_priorities = [] for reservations in schedule.values(): - for reservation in reservations: - effective_priorities.append(reservation.priority) + effective_priorities.extend([res.priority for res in reservations]) return sum(effective_priorities), effective_priorities - def total_scheduled_seconds(self, schedule=None): + def get_scheduled_durations(self, schedule=None): schedule = self.combined_schedule if schedule is None else schedule - total_scheduled_seconds = 0 + durations = [] for reservations in schedule.values(): - for reservation in reservations: - total_scheduled_seconds += reservation.duration - return total_scheduled_seconds + durations.extend([res.duration for res in reservations]) + return durations def total_available_seconds(self, resources_scheduled=None, horizon_days=None): """Aggregates the total available time, calculated from dark intervals. @@ -198,7 +195,7 @@ def percent_time_utilization(self, schedule=None, resources_scheduled=None, hori schedule = self.combined_schedule if schedule is None else schedule resources_scheduled = self.combined_resources_scheduled if resources_scheduled is None else resources_scheduled horizon_days = self.horizon_days if horizon_days is None else horizon_days - return percent_of(self.total_scheduled_seconds(schedule), + return percent_of(sum(self.get_scheduled_durations(schedule)), self.total_available_seconds(resources_scheduled, horizon_days)) def _get_airmass_data_from_observation_portal(self, request_id): @@ -252,7 +249,8 @@ def _get_midpoint_airmasses_for_request(self, request_id, start_time, end_time): if not airmass_data: airmass_data = self._get_airmass_data_from_observation_portal(request_id) for site, details in airmass_data.items(): - times, airmasses = list(details.values())[0], list(details.values())[1] + details = list(details.values()) + times, airmasses = details[0], details[1] index = 0 time_diff = abs((midpoint_time - datetime.strptime(times[0], '%Y-%m-%dT%H:%M')).total_seconds()) for i, _ in enumerate(times): @@ -276,8 +274,8 @@ def airmass_metrics(self, schedule=None): schedule = self.combined_schedule if schedule is None else schedule semester_start = self.scheduler_runner.semester_details['start'] midpoint_airmasses = [] - durations = [] ideal_airmasses = [] + durations = self.get_scheduled_durations(schedule) for reservations in schedule.values(): for reservation in reservations: request_id = reservation.request.id @@ -289,7 +287,6 @@ def airmass_metrics(self, schedule=None): midpoint_airmasses.append(midpoint_airmasses_for_request[site]) ideal_airmass = self._get_ideal_airmass_for_request(request_id) ideal_airmasses.append(ideal_airmass) - durations.append(reservation.duration) airmass_data = {'raw_airmass_data': [{'midpoint_airmasses': midpoint_airmasses}, {'ideal_airmasses': ideal_airmasses}, {'durations': durations}], @@ -305,22 +302,31 @@ def binned_tac_priority_metrics(self, input_reservations=None, schedule=None): input_reservations = self.combined_input_reservations if input_reservations is None else input_reservations schedule = self.combined_schedule if schedule is None else schedule bin_size = 10 - sched_tac_priority_values = [] - sched_reservation_durations = [] - all_tac_priority_values = [] - all_reservation_durations = [] + sched_priority_values = [] + sched_durations = self.get_scheduled_durations(schedule) + all_priority_values = [] + all_durations = [] for reservations in schedule.values(): - for reservation in reservations: - sched_tac_priority_values.append(reservation.request_group.proposal.tac_priority) - sched_reservation_durations.append(reservation.duration) + sched_priority_values.extend([res.request_group.proposal.tac_priority for res in reservations]) for reservation in input_reservations: - all_tac_priority_values.append(reservation.request_group.proposal.tac_priority) - all_reservation_durations.append(reservation.duration) + all_priority_values.append(reservation.request_group.proposal.tac_priority) + all_durations.append(reservation.duration) + + sched_histogram = bin_data(sched_priority_values, bin_size=bin_size) + bin_sched_durations = bin_data(sched_priority_values, sched_durations, bin_size) + full_histogram = bin_data(all_priority_values, bin_size=bin_size) + bin_all_durations = bin_data(all_priority_values, all_durations, bin_size) + bin_percent_count = {bin_: percent_of(np.array(sched_histogram[bin_]), np.array(full_histogram[bin_])) + for bin_ in sched_histogram} + bin_percent_duration = {bin_: percent_of(np.array(bin_sched_durations[bin_]), np.array(bin_all_durations[bin_])) + for bin_ in bin_sched_durations} output_dict = { - 'sched_histogram': bin_data(sched_tac_priority_values, bin_size=bin_size), - 'sched_durations': bin_data(sched_tac_priority_values, sched_reservation_durations, bin_size=bin_size), - 'full_histogram': bin_data(all_tac_priority_values, bin_size=bin_size), - 'all_durations': bin_data(all_tac_priority_values, all_reservation_durations, bin_size=bin_size), + 'sched_histogram': sched_histogram, + 'sched_durations': bin_sched_durations, + 'full_histogram': full_histogram, + 'all_durations': bin_all_durations, + 'percent_count': bin_percent_count, + 'percent_duration': bin_percent_duration, } return output_dict diff --git a/adaptive_scheduler/simulation/orchestrator.py b/adaptive_scheduler/simulation/orchestrator.py index fa9168d5..382d1d1f 100644 --- a/adaptive_scheduler/simulation/orchestrator.py +++ b/adaptive_scheduler/simulation/orchestrator.py @@ -100,6 +100,7 @@ def record_metrics(normal_scheduler_result, rr_scheduler_result, scheduler, sche sched_params = scheduler_runner.sched_params airmass_metrics = metrics.airmass_metrics() binned_tac_priority_metrics = metrics.binned_tac_priority_metrics() + metrics = { 'simulation_id': RUN_ID, 'simulation_start_time': sched_params.simulate_now, @@ -112,8 +113,8 @@ def record_metrics(normal_scheduler_result, rr_scheduler_result, scheduler, sche 'total_effective_priority': metrics.total_scheduled_eff_priority()[0], 'total_scheduled_count': metrics.count_scheduled()[0], 'total_request_count': metrics.count_scheduled()[1], - 'percent_reservations_scheduled': metrics.percent_reservations_scheduled(), - 'total_scheduled_seconds': metrics.total_scheduled_seconds(), + 'percent_requests_scheduled': metrics.percent_reservations_scheduled(), + 'total_scheduled_seconds': sum(metrics.get_scheduled_durations()), 'total_available_seconds': metrics.total_available_seconds(), 'percent_time_utilization': metrics.percent_time_utilization(), 'airmass_metrics': airmass_metrics, @@ -121,7 +122,8 @@ def record_metrics(normal_scheduler_result, rr_scheduler_result, scheduler, sche 'scheduled_seconds_by_priority': [binned_tac_priority_metrics['sched_durations']], 'total_req_by_priority': [binned_tac_priority_metrics['full_histogram']], 'total_seconds_by_priority': [binned_tac_priority_metrics['all_durations']], - 'avg_ideal_airmass': metrics.avg_ideal_airmass(), + 'percent_sched_by_priority': [binned_tac_priority_metrics['percent_count']], + 'percent_duration_by_priority': [binned_tac_priority_metrics['percent_duration']], } log.info(metrics) send_to_opensearch(sched_params.opensearch_url, sched_params.simulation_opensearch_index, metrics) diff --git a/tests/test_simulator_metrics.py b/tests/test_simulator_metrics.py index 40b56fb7..41aa8100 100644 --- a/tests/test_simulator_metrics.py +++ b/tests/test_simulator_metrics.py @@ -79,13 +79,13 @@ def test_percent_scheduled(self): def test_total_time_aggregators(self): seconds_in_day = 86400 - assert self.metrics.total_scheduled_seconds(self.mock_scheduler_result.schedule) == 60 + assert sum(self.metrics.get_scheduled_durations(self.mock_scheduler_result.schedule)) == 60 + assert sum(self.metrics.get_scheduled_durations()) == 60 assert self.metrics.total_available_seconds(['bpl', 'coj'], 0) == 0 assert self.metrics.total_available_seconds(['bpl', 'coj'], 1) == 2*seconds_in_day assert self.metrics.total_available_seconds(['bpl', 'coj'], 5) == 4*seconds_in_day assert self.metrics.total_available_seconds(['bpl'], 1) == seconds_in_day assert self.metrics.total_available_seconds([], 1) == 0 - assert self.metrics.total_scheduled_seconds() == 60 assert self.metrics.total_available_seconds() == 4*seconds_in_day def test_percent_time_utilization(self): @@ -115,7 +115,7 @@ def test_bin_data(self): assert bin_data(bin_by_float) == floats assert bin_data(bin_by_float, bin_range=(0, 4)) == capped_floats assert bin_data(bin_by, bin_data_, bin_size=3) == sumdata - assert bin_data(bin_by, bin_data_, bin_size=3, aggregation=min) == mindata + assert bin_data(bin_by, bin_data_, bin_size=3, aggregator=min) == mindata def test_airmass_functions(self): dir_path = os.path.dirname(os.path.realpath(__file__)) @@ -141,4 +141,4 @@ def test_airmass_functions(self): assert self.metrics._get_midpoint_airmasses_for_request(1, self.start, self.end) == {'tfn': 7, 'egg': 3} assert self.metrics._get_ideal_airmass_for_request(2) == 1 - assert self.metrics.airmass_metrics == +# assert self.metrics.airmass_metrics == From 8cff4e43d05577b5f4620658fcf5fc2e8c0e88f9 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Wed, 13 Jul 2022 19:18:15 +0000 Subject: [PATCH 065/165] wip data caching using redis, tried to test but ran out of storage space :( --- adaptive_scheduler/simulation/metrics.py | 62 +++++++++++-------- adaptive_scheduler/simulation/orchestrator.py | 2 +- tests/test_simulator_metrics.py | 10 ++- 3 files changed, 40 insertions(+), 34 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index 6eac7924..0069ca0c 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -1,15 +1,17 @@ """ Metric calculation functions for the scheduler simulator. """ -import datetime as dt +import pickle from datetime import datetime, timedelta from collections import defaultdict + import numpy as np import requests from requests.exceptions import RequestException, Timeout from adaptive_scheduler.observation_portal_connections import ObservationPortalConnectionError from adaptive_scheduler.utils import time_in_capped_intervals, normalised_epoch_to_datetime, datetime_to_epoch +from adaptive_scheduler.kernel_mappings import redis def percent_of(x, y): @@ -198,7 +200,7 @@ def percent_time_utilization(self, schedule=None, resources_scheduled=None, hori return percent_of(sum(self.get_scheduled_durations(schedule)), self.total_available_seconds(resources_scheduled, horizon_days)) - def _get_airmass_data_from_observation_portal(self, request_id): + def _get_airmass_data_for_request(self, request_id): """Pulls airmass data from the Observation Portal. Args: @@ -210,27 +212,33 @@ def _get_airmass_data_from_observation_portal(self, request_id): airmass_data (dict): The airmass data returned from the API. """ airmass_url = f'{self.observation_portal_interface.obs_portal_url}/api/requests/{request_id}/airmass/' + try: + cached_airmass_data = pickle.loads(redis.get('airmass_data_by_request_id')) + self.airmass_data_by_request_id[request_id] = cached_airmass_data[request_id] + print(f'got cached data for {request_id}') + return cached_airmass_data[request_id] + except Exception: + # the request has not been cached yet, get the data from the portal + pass try: response = requests.get(airmass_url, headers=self.observation_portal_interface.headers, timeout=180) response.raise_for_status() - airmass_data = response.json()['airmass_data'] - self.airmass_data_by_request_id[request_id] = airmass_data + airmass_data_for_request = response.json()['airmass_data'] + self.airmass_data_by_request_id[request_id] = airmass_data_for_request + redis.set('airmass_data_by_request_id', pickle.dumps(self.airmass_data_by_request_id)) + return airmass_data_for_request except (RequestException, ValueError, Timeout) as e: raise ObservationPortalConnectionError("get_airmass_data failed: {}".format(repr(e))) - return airmass_data - def _get_ideal_airmass_for_request(self, request_id): - """Finds the minimum airmass across all sites for the request.""" + def _get_ideal_airmass(self, airmass_data): + """Finds the minimum airmass across all sites.""" ideal_airmass = 1000 - airmass_data = self.airmass_data_by_request_id[request_id] - if not airmass_data: - airmass_data = self._get_airmass_data_from_observation_portal(request_id) for site in airmass_data.values(): ideal_for_site = min(site['airmasses']) ideal_airmass = min(ideal_airmass, ideal_for_site) return ideal_airmass - def _get_midpoint_airmasses_for_request(self, request_id, start_time, end_time): + def _get_midpoint_airmasses_by_site(self, airmass_data, start_time, end_time): """"Gets the midpoint airmasses by site for a request. This is done by finding the time closest matching the calculated midpoint of the observation in the observe portal airmass data. @@ -245,9 +253,7 @@ def _get_midpoint_airmasses_for_request(self, request_id, start_time, end_time): """ midpoint_airmasses = {} midpoint_time = start_time + (end_time - start_time) / 2 - airmass_data = self.airmass_data_by_request_id[request_id] - if not airmass_data: - airmass_data = self._get_airmass_data_from_observation_portal(request_id) + print(airmass_data) for site, details in airmass_data.items(): details = list(details.values()) times, airmasses = details[0], details[1] @@ -259,6 +265,7 @@ def _get_midpoint_airmasses_for_request(self, request_id, start_time, end_time): time_diff = temp_time_diff index = i midpoint_airmass = airmasses[index] + print(midpoint_airmass) midpoint_airmasses[site] = midpoint_airmass return midpoint_airmasses @@ -273,29 +280,30 @@ def airmass_metrics(self, schedule=None): """ schedule = self.combined_schedule if schedule is None else schedule semester_start = self.scheduler_runner.semester_details['start'] + midpoint_airmasses = [] ideal_airmasses = [] durations = self.get_scheduled_durations(schedule) for reservations in schedule.values(): for reservation in reservations: - request_id = reservation.request.id + airmass_data = self._get_airmass_data_for_request(reservation.request.id) start_time = normalised_epoch_to_datetime(reservation.scheduled_start, datetime_to_epoch(semester_start)) end_time = start_time + timedelta(seconds=reservation.duration) - midpoint_airmasses_for_request = self._get_midpoint_airmasses_for_request(request_id, start_time, end_time) + midpoint_airmasses_by_site = self._get_midpoint_airmasses_by_site(airmass_data, start_time, end_time) site = reservation.scheduled_resource[-3:] - midpoint_airmasses.append(midpoint_airmasses_for_request[site]) - ideal_airmass = self._get_ideal_airmass_for_request(request_id) + midpoint_airmasses.append(midpoint_airmasses_by_site[site]) + ideal_airmass = self._get_ideal_airmass(airmass_data) ideal_airmasses.append(ideal_airmass) - airmass_data = {'raw_airmass_data': [{'midpoint_airmasses': midpoint_airmasses}, - {'ideal_airmasses': ideal_airmasses}, - {'durations': durations}], - 'avg_midpoint_airmass': sum(midpoint_airmasses)/len(midpoint_airmasses), - 'avg_ideal_airmass': sum(ideal_airmasses)/len(ideal_airmasses), - 'ci_midpoint_airmass': [[np.percentile(midpoint_airmasses, 2.5), - np.percentile(midpoint_airmasses, 97.5)]], - } - return airmass_data + airmass_metrics = {'raw_airmass_data': [{'midpoint_airmasses': midpoint_airmasses}, + {'ideal_airmasses': ideal_airmasses}, + {'durations': durations}], + 'avg_midpoint_airmass': sum(midpoint_airmasses)/len(midpoint_airmasses), + 'avg_ideal_airmass': sum(ideal_airmasses)/len(ideal_airmasses), + 'ci_midpoint_airmass': [[np.percentile(midpoint_airmasses, 2.5), + np.percentile(midpoint_airmasses, 97.5)]], + } + return airmass_metrics def binned_tac_priority_metrics(self, input_reservations=None, schedule=None): """Bins TAC Priority into the following bins: '10-19', '20-29', '30-39', '1000'.""" diff --git a/adaptive_scheduler/simulation/orchestrator.py b/adaptive_scheduler/simulation/orchestrator.py index 382d1d1f..d97543be 100644 --- a/adaptive_scheduler/simulation/orchestrator.py +++ b/adaptive_scheduler/simulation/orchestrator.py @@ -126,7 +126,7 @@ def record_metrics(normal_scheduler_result, rr_scheduler_result, scheduler, sche 'percent_duration_by_priority': [binned_tac_priority_metrics['percent_duration']], } log.info(metrics) - send_to_opensearch(sched_params.opensearch_url, sched_params.simulation_opensearch_index, metrics) + #send_to_opensearch(sched_params.opensearch_url, sched_params.simulation_opensearch_index, metrics) def main(argv=None): diff --git a/tests/test_simulator_metrics.py b/tests/test_simulator_metrics.py index 00d9fec5..007c64e9 100644 --- a/tests/test_simulator_metrics.py +++ b/tests/test_simulator_metrics.py @@ -125,9 +125,7 @@ def test_airmass_functions(self): airmass_data_1 = json.load(f) with open(data_path_2) as f: airmass_data_2 = json.load(f) - self.metrics._get_airmass_data_from_observation_portal = Mock(side_effect=[airmass_data_1, airmass_data_1, - airmass_data_1, airmass_data_1, - airmass_data_1, airmass_data_2]) + self.metrics._get_airmass_data_for_request = Mock(side_effect=[airmass_data_1, airmass_data_2]) request_1 = Mock(id=1) mock_reservation_1 = Mock(scheduled_start=0, scheduled_resource='1m0a.doma.tfn', request=request_1, duration=5400) @@ -137,14 +135,14 @@ def test_airmass_functions(self): scheduled_reservations = [mock_reservation_1, mock_reservation_2] schedule = {'reservations': scheduled_reservations} - assert self.metrics._get_midpoint_airmasses_for_request(1, self.start, self.end) == {'tfn': 7, 'egg': 3} - assert self.metrics._get_ideal_airmass_for_request(2) == 1 + assert self.metrics._get_midpoint_airmasses_by_site(airmass_data_1, self.start, self.end) == {'tfn': 7, 'egg': 3} + assert self.metrics._get_ideal_airmass(airmass_data_1) == 1 airmass_metrics = self.metrics.airmass_metrics(schedule) midpoint_airmasses = [7, 3] assert type(airmass_metrics) is dict assert list(airmass_metrics.keys()) == ['raw_airmass_data', 'avg_midpoint_airmass', - 'avg_ideal_airmass', 'ci_midpoint_airmass'] + 'avg_ideal_airmass', 'ci_midpoint_airmass'] assert airmass_metrics['avg_midpoint_airmass'] == 5 assert airmass_metrics['avg_ideal_airmass'] == 2 assert airmass_metrics['raw_airmass_data'][0]['midpoint_airmasses'] == midpoint_airmasses From 4604f9584e9ef05d16a9b6e1d98e6c99047ae8b2 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Wed, 13 Jul 2022 20:22:53 +0000 Subject: [PATCH 066/165] bugfix for caching the wrong object type --- adaptive_scheduler/simulation/metrics.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index 0069ca0c..7c26f6e6 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -215,7 +215,6 @@ def _get_airmass_data_for_request(self, request_id): try: cached_airmass_data = pickle.loads(redis.get('airmass_data_by_request_id')) self.airmass_data_by_request_id[request_id] = cached_airmass_data[request_id] - print(f'got cached data for {request_id}') return cached_airmass_data[request_id] except Exception: # the request has not been cached yet, get the data from the portal @@ -225,7 +224,7 @@ def _get_airmass_data_for_request(self, request_id): response.raise_for_status() airmass_data_for_request = response.json()['airmass_data'] self.airmass_data_by_request_id[request_id] = airmass_data_for_request - redis.set('airmass_data_by_request_id', pickle.dumps(self.airmass_data_by_request_id)) + redis.set('airmass_data_by_request_id', pickle.dumps(dict(self.airmass_data_by_request_id))) return airmass_data_for_request except (RequestException, ValueError, Timeout) as e: raise ObservationPortalConnectionError("get_airmass_data failed: {}".format(repr(e))) @@ -253,7 +252,6 @@ def _get_midpoint_airmasses_by_site(self, airmass_data, start_time, end_time): """ midpoint_airmasses = {} midpoint_time = start_time + (end_time - start_time) / 2 - print(airmass_data) for site, details in airmass_data.items(): details = list(details.values()) times, airmasses = details[0], details[1] @@ -265,7 +263,6 @@ def _get_midpoint_airmasses_by_site(self, airmass_data, start_time, end_time): time_diff = temp_time_diff index = i midpoint_airmass = airmasses[index] - print(midpoint_airmass) midpoint_airmasses[site] = midpoint_airmass return midpoint_airmasses From 11fcaafa9185972a1bd29a8f99178135de24a485 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Wed, 13 Jul 2022 20:23:54 +0000 Subject: [PATCH 067/165] reenable sending to opensearch after testing --- adaptive_scheduler/simulation/orchestrator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adaptive_scheduler/simulation/orchestrator.py b/adaptive_scheduler/simulation/orchestrator.py index d97543be..382d1d1f 100644 --- a/adaptive_scheduler/simulation/orchestrator.py +++ b/adaptive_scheduler/simulation/orchestrator.py @@ -126,7 +126,7 @@ def record_metrics(normal_scheduler_result, rr_scheduler_result, scheduler, sche 'percent_duration_by_priority': [binned_tac_priority_metrics['percent_duration']], } log.info(metrics) - #send_to_opensearch(sched_params.opensearch_url, sched_params.simulation_opensearch_index, metrics) + send_to_opensearch(sched_params.opensearch_url, sched_params.simulation_opensearch_index, metrics) def main(argv=None): From a60a2d19e1bfbe9c5f6458a9dc58eda859982adc Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Wed, 13 Jul 2022 14:44:52 -0700 Subject: [PATCH 068/165] fixed redis cache error --- adaptive_scheduler/simulation/metrics.py | 9 ++++----- adaptive_scheduler/simulation/orchestrator.py | 2 +- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index 0069ca0c..1593489c 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -1,6 +1,7 @@ """ Metric calculation functions for the scheduler simulator. """ +from email.policy import default import pickle from datetime import datetime, timedelta from collections import defaultdict @@ -214,10 +215,10 @@ def _get_airmass_data_for_request(self, request_id): airmass_url = f'{self.observation_portal_interface.obs_portal_url}/api/requests/{request_id}/airmass/' try: cached_airmass_data = pickle.loads(redis.get('airmass_data_by_request_id')) + cached_airmass_data[request_id] self.airmass_data_by_request_id[request_id] = cached_airmass_data[request_id] - print(f'got cached data for {request_id}') return cached_airmass_data[request_id] - except Exception: + except Exception as e: # the request has not been cached yet, get the data from the portal pass try: @@ -225,7 +226,7 @@ def _get_airmass_data_for_request(self, request_id): response.raise_for_status() airmass_data_for_request = response.json()['airmass_data'] self.airmass_data_by_request_id[request_id] = airmass_data_for_request - redis.set('airmass_data_by_request_id', pickle.dumps(self.airmass_data_by_request_id)) + redis.set('airmass_data_by_request_id', pickle.dumps(dict(self.airmass_data_by_request_id))) return airmass_data_for_request except (RequestException, ValueError, Timeout) as e: raise ObservationPortalConnectionError("get_airmass_data failed: {}".format(repr(e))) @@ -253,7 +254,6 @@ def _get_midpoint_airmasses_by_site(self, airmass_data, start_time, end_time): """ midpoint_airmasses = {} midpoint_time = start_time + (end_time - start_time) / 2 - print(airmass_data) for site, details in airmass_data.items(): details = list(details.values()) times, airmasses = details[0], details[1] @@ -265,7 +265,6 @@ def _get_midpoint_airmasses_by_site(self, airmass_data, start_time, end_time): time_diff = temp_time_diff index = i midpoint_airmass = airmasses[index] - print(midpoint_airmass) midpoint_airmasses[site] = midpoint_airmass return midpoint_airmasses diff --git a/adaptive_scheduler/simulation/orchestrator.py b/adaptive_scheduler/simulation/orchestrator.py index d97543be..382d1d1f 100644 --- a/adaptive_scheduler/simulation/orchestrator.py +++ b/adaptive_scheduler/simulation/orchestrator.py @@ -126,7 +126,7 @@ def record_metrics(normal_scheduler_result, rr_scheduler_result, scheduler, sche 'percent_duration_by_priority': [binned_tac_priority_metrics['percent_duration']], } log.info(metrics) - #send_to_opensearch(sched_params.opensearch_url, sched_params.simulation_opensearch_index, metrics) + send_to_opensearch(sched_params.opensearch_url, sched_params.simulation_opensearch_index, metrics) def main(argv=None): From 28152a004cb95c9c69743526d22419dc298f28c9 Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Wed, 13 Jul 2022 16:11:02 -0700 Subject: [PATCH 069/165] add documentation for airmass data --- adaptive_scheduler/simulation/metrics.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index 1593489c..b7deeff7 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -202,15 +202,13 @@ def percent_time_utilization(self, schedule=None, resources_scheduled=None, hori self.total_available_seconds(resources_scheduled, horizon_days)) def _get_airmass_data_for_request(self, request_id): - """Pulls airmass data from the Observation Portal. + """Pulls airmass data from the Observation Portal, cache it in our local directory. Args: - observation_portal_interface (ObservationPortalInterface): Instance of the Observation Portal - used by the scheduler. request_id (str): The request id. Returns: - airmass_data (dict): The airmass data returned from the API. + airmass_data (dict): The airmass data returned from the API or the cache. """ airmass_url = f'{self.observation_portal_interface.obs_portal_url}/api/requests/{request_id}/airmass/' try: @@ -244,7 +242,7 @@ def _get_midpoint_airmasses_by_site(self, airmass_data, start_time, end_time): closest matching the calculated midpoint of the observation in the observe portal airmass data. Args: - request_id (int): The id of the request we want to get airmass data of. + airmass_data (dict): The airmass data we want to use to calculate midpoint of. start_time (datetime.datetime): The start time of the scheduled observation. end_time (datetime.datetime): The end time of the scheduled observation. @@ -269,13 +267,14 @@ def _get_midpoint_airmasses_by_site(self, airmass_data, start_time, end_time): return midpoint_airmasses def airmass_metrics(self, schedule=None): - """Calculate the average midpoint airmass of all scheduled reservations for a single schedule. + """Generat the airmass metrics of all scheduled reservations for a single schedule. Args: schedule (scheduler, optional): the schedule we calculate our metricses on. Defaults to None. Returns: - average(float): the average midpoint airmass of all scheduled reservation for one schedule. + airmass_metrics (dict): Variety of airmass metrics including raw data, average midpoint airmass, average + ideal airmass and 95% confidence interval for midpoint airmass. """ schedule = self.combined_schedule if schedule is None else schedule semester_start = self.scheduler_runner.semester_details['start'] From cef09c492423c5666b1aec16b38ddab70cb13fc8 Mon Sep 17 00:00:00 2001 From: Jon Date: Wed, 13 Jul 2022 23:38:24 +0000 Subject: [PATCH 070/165] Add configdb overrides for simulation --- adaptive_scheduler/configdb_connections.py | 19 ++++++++++++++++++- adaptive_scheduler/simulation/orchestrator.py | 7 ++++++- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/adaptive_scheduler/configdb_connections.py b/adaptive_scheduler/configdb_connections.py index 89ef9b64..2b940d26 100644 --- a/adaptive_scheduler/configdb_connections.py +++ b/adaptive_scheduler/configdb_connections.py @@ -22,7 +22,7 @@ class ConfigDBInterface(SendMetricMixin): """ def __init__(self, configdb_url, telescope_classes, telescopes_file='data/telescopes.json', - active_instruments_file='data/active_instruments.json'): + active_instruments_file='data/active_instruments.json', overrides=None): self.configdb_url = configdb_url if not self.configdb_url.endswith('/'): self.configdb_url += '/' @@ -31,11 +31,28 @@ def __init__(self, configdb_url, telescope_classes, telescopes_file='data/telesc self.active_instruments_file = active_instruments_file self.active_instruments = None self.telescope_info = None + self.overrides = overrides self.update_configdb_structures() def update_configdb_structures(self): self.update_telescope_info() + self.apply_overrides_to_telescopes() self.update_active_instruments() + self.apply_overrides_to_instruments() + + def apply_overrides_to_telescopes(self): + if self.overrides: + for telescope in self.overrides.get('telescopes', {}).keys(): + if telescope in self.telescope_info and 'status' in self.overrides['telescopes'][telescope]: + self.telescope_info[telescope]['status'] = self.overrides['telescopes'][telescope]['status'] + + def apply_overrides_to_instruments(self): + if self.overrides and self.overrides.get('instruments', {}): + for instrument in self.active_instruments: + if instrument['code'] in self.overrides['instruments']: + instrument['state'] = self.overrides['instruments'][instrument['code']].get('state', instrument['state']) + if instrument['instrument_type']['code'] in self.overrides['instruments']: + instrument['state'] = self.overrides['instruments'][instrument['instrument_type']['code']].get('state', instrument['state']) def update_active_instruments(self): try: diff --git a/adaptive_scheduler/simulation/orchestrator.py b/adaptive_scheduler/simulation/orchestrator.py index 20dde798..41b8ea02 100644 --- a/adaptive_scheduler/simulation/orchestrator.py +++ b/adaptive_scheduler/simulation/orchestrator.py @@ -10,6 +10,7 @@ import logging import sys import os +import json from urllib.parse import urljoin import requests @@ -138,7 +139,11 @@ def main(argv=None): schedule_interface = ObservationScheduleInterface(host=sched_params.observation_portal_url) observation_portal_interface = ObservationPortalInterface(sched_params.observation_portal_url) # TODO: If there is a configuration override file detected then incorporate that into the configdb_interface - configdb_interface = ConfigDBInterface(configdb_url=sched_params.configdb_url, telescope_classes=sched_params.telescope_classes) + overrides = None + if os.path.exists('/app/data/simulation_overrides.json'): + with open('/app/data/simulation_overrides.json', 'r') as fp: + overrides = json.load(fp) + configdb_interface = ConfigDBInterface(configdb_url=sched_params.configdb_url, telescope_classes=sched_params.telescope_classes, overrides=overrides) network_state_interface = Network(configdb_interface, sched_params) network_interface = NetworkInterface(schedule_interface, observation_portal_interface, network_state_interface, configdb_interface) From 99a23201b029cecd64306924d24239e657a86cce Mon Sep 17 00:00:00 2001 From: Jon Date: Wed, 13 Jul 2022 23:48:44 +0000 Subject: [PATCH 071/165] Fix the missing redis instance --- adaptive_scheduler/simulation/metrics.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index b7deeff7..d5e35d39 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -12,7 +12,7 @@ from adaptive_scheduler.observation_portal_connections import ObservationPortalConnectionError from adaptive_scheduler.utils import time_in_capped_intervals, normalised_epoch_to_datetime, datetime_to_epoch -from adaptive_scheduler.kernel_mappings import redis +from adaptive_scheduler.models import redis_instance def percent_of(x, y): @@ -212,7 +212,7 @@ def _get_airmass_data_for_request(self, request_id): """ airmass_url = f'{self.observation_portal_interface.obs_portal_url}/api/requests/{request_id}/airmass/' try: - cached_airmass_data = pickle.loads(redis.get('airmass_data_by_request_id')) + cached_airmass_data = pickle.loads(redis_instance.get('airmass_data_by_request_id')) cached_airmass_data[request_id] self.airmass_data_by_request_id[request_id] = cached_airmass_data[request_id] return cached_airmass_data[request_id] @@ -224,7 +224,7 @@ def _get_airmass_data_for_request(self, request_id): response.raise_for_status() airmass_data_for_request = response.json()['airmass_data'] self.airmass_data_by_request_id[request_id] = airmass_data_for_request - redis.set('airmass_data_by_request_id', pickle.dumps(dict(self.airmass_data_by_request_id))) + redis_instance.set('airmass_data_by_request_id', pickle.dumps(dict(self.airmass_data_by_request_id))) return airmass_data_for_request except (RequestException, ValueError, Timeout) as e: raise ObservationPortalConnectionError("get_airmass_data failed: {}".format(repr(e))) From a7acaeb8f48e7de133e27b01ba0206688e4b85c1 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Thu, 14 Jul 2022 17:56:52 +0000 Subject: [PATCH 072/165] fixed request group attribute error --- adaptive_scheduler/simulation/metrics.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index d5e35d39..797b2a6e 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -216,7 +216,7 @@ def _get_airmass_data_for_request(self, request_id): cached_airmass_data[request_id] self.airmass_data_by_request_id[request_id] = cached_airmass_data[request_id] return cached_airmass_data[request_id] - except Exception as e: + except AttributeError: # the request has not been cached yet, get the data from the portal pass try: @@ -308,15 +308,15 @@ def binned_tac_priority_metrics(self, input_reservations=None, schedule=None): input_reservations = self.combined_input_reservations if input_reservations is None else input_reservations schedule = self.combined_schedule if schedule is None else schedule bin_size = 10 - sched_priority_values = [] sched_durations = self.get_scheduled_durations(schedule) - all_priority_values = [] - all_durations = [] + all_durations = [res.duration for res in input_reservations] + request_groups = self.scheduler_runner.normal_scheduler_input.request_groups + priority_values_by_rg_id = {rg.id: rg.proposal.tac_priority for rg in request_groups} + all_priority_values = list(priority_values_by_rg_id.values()) + sched_priority_values = [] for reservations in schedule.values(): - sched_priority_values.extend([res.request_group.proposal.tac_priority for res in reservations]) - for reservation in input_reservations: - all_priority_values.append(reservation.request_group.proposal.tac_priority) - all_durations.append(reservation.duration) + sched_priority_values.extend([priority_values_by_rg_id[res.request_group_id] + for res in reservations]) sched_histogram = bin_data(sched_priority_values, bin_size=bin_size) bin_sched_durations = bin_data(sched_priority_values, sched_durations, bin_size) From be7cb98aff57d966f9d67d05fcc09ca14415e24f Mon Sep 17 00:00:00 2001 From: Jon Date: Thu, 14 Jul 2022 19:08:31 +0000 Subject: [PATCH 073/165] Fix rs_target serizliation --- adaptive_scheduler/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adaptive_scheduler/models.py b/adaptive_scheduler/models.py index ed4e2d78..651f90cf 100644 --- a/adaptive_scheduler/models.py +++ b/adaptive_scheduler/models.py @@ -486,7 +486,7 @@ def cache_airmasses_within_kernel_windows(self, kernel_intervals_for_resources, rs_targets = [configuration.target.in_rise_set_format() for configuration in self.configurations] airmass_by_targets = {} for rs_target in rs_targets: - rs_target_key = json.dumps(rs_target) + rs_target_key = f"{rs_target.items()}" if rs_target_key not in airmass_by_targets: airmass_by_targets[rs_target_key] = calculate_airmass_at_times(datetimes, rs_target, obs_latitude, obs_longitude, obs_height) From ddd3317f7bd250a61d72d21204ca3336272d4a19 Mon Sep 17 00:00:00 2001 From: Jon Date: Thu, 14 Jul 2022 20:57:14 +0000 Subject: [PATCH 074/165] Fix error in single target airmasses --- adaptive_scheduler/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adaptive_scheduler/models.py b/adaptive_scheduler/models.py index 651f90cf..7bbe1256 100644 --- a/adaptive_scheduler/models.py +++ b/adaptive_scheduler/models.py @@ -491,7 +491,7 @@ def cache_airmasses_within_kernel_windows(self, kernel_intervals_for_resources, airmass_by_targets[rs_target_key] = calculate_airmass_at_times(datetimes, rs_target, obs_latitude, obs_longitude, obs_height) if len(airmass_by_targets) == 1: - airmasses = airmass_by_targets.keys()[0] + airmasses = airmass_by_targets.values()[0] else: numpy_airmasses = np.array(airmass_by_targets.values()) airmasses = np.mean(numpy_airmasses, axis=0).tolist() From 2d8382a8a7f52e902d1f549c602d312bb3cb4821 Mon Sep 17 00:00:00 2001 From: Jon Date: Thu, 14 Jul 2022 21:05:31 +0000 Subject: [PATCH 075/165] fix dict values list --- adaptive_scheduler/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adaptive_scheduler/models.py b/adaptive_scheduler/models.py index 7bbe1256..71e3349b 100644 --- a/adaptive_scheduler/models.py +++ b/adaptive_scheduler/models.py @@ -491,7 +491,7 @@ def cache_airmasses_within_kernel_windows(self, kernel_intervals_for_resources, airmass_by_targets[rs_target_key] = calculate_airmass_at_times(datetimes, rs_target, obs_latitude, obs_longitude, obs_height) if len(airmass_by_targets) == 1: - airmasses = airmass_by_targets.values()[0] + airmasses = list(airmass_by_targets.values())[0] else: numpy_airmasses = np.array(airmass_by_targets.values()) airmasses = np.mean(numpy_airmasses, axis=0).tolist() From b2376a7cc5269363b4078423a92be5dff2dea411 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Thu, 14 Jul 2022 21:47:10 +0000 Subject: [PATCH 076/165] bugfixes in airmass optimization caching --- adaptive_scheduler/models.py | 18 ++++++++++-------- adaptive_scheduler/simulation/metrics.py | 4 +++- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/adaptive_scheduler/models.py b/adaptive_scheduler/models.py index 7bbe1256..349bc4f3 100644 --- a/adaptive_scheduler/models.py +++ b/adaptive_scheduler/models.py @@ -439,7 +439,7 @@ def __init__(self, configurations, windows, request_id, state='PENDING', telesco self.telescope_class = telescope_class self.req_duration = duration self.configuration_repeats = configuration_repeats - self.optimization_type = optimization_type + self.optimization_type = 'AIRMASS' self.scheduled_reservation = scheduled_reservation def get_duration(self): @@ -468,19 +468,21 @@ def cache_airmasses_within_kernel_windows(self, kernel_intervals_for_resources, obs_latitude = Angle(degrees=resource_info['latitude']) obs_longitude = Angle(degrees=resource_info['longitude']) obs_height = resource_info['elevation'] - visibility_intervals = Windows.request_window_to_kernel_intervals(self.windows.at(resource_info["name"])).toTupleList() + visibility_intervals = Windows.request_window_to_kernel_intervals(self.windows.at(resource_info["name"])) for (start, end) in visibility_intervals.toTupleList(): current_datetime = start current_time = datetime_to_normalised_epoch(start, semester_start) # Add the start point, and then a timepoint at interval_size spacing until you reach the end while current_datetime < end: - datetimes.add(current_datetime) + datetimes.append(current_datetime) current_datetime += timedelta(seconds=interval_size) - times.add(current_time) + times.append(current_time) current_time += interval_size # Add the end point on so we have a complete set of airmasses spanning the interval - datetimes.add(end) - times.add(datetime_to_normalised_epoch(end, semester_start)) + datetimes.append(end) + times.append(datetime_to_normalised_epoch(end, semester_start)) + if not datetimes: + continue # Now that we have a full set of datetimes and unix/kernel times, calculate the airmass values within those times # Calculate the airmasses for each target in the configuration and attempt to merge them all... This could be improved upon rs_targets = [configuration.target.in_rise_set_format() for configuration in self.configurations] @@ -491,7 +493,7 @@ def cache_airmasses_within_kernel_windows(self, kernel_intervals_for_resources, airmass_by_targets[rs_target_key] = calculate_airmass_at_times(datetimes, rs_target, obs_latitude, obs_longitude, obs_height) if len(airmass_by_targets) == 1: - airmasses = airmass_by_targets.values()[0] + airmasses = list(airmass_by_targets.values())[0] else: numpy_airmasses = np.array(airmass_by_targets.values()) airmasses = np.mean(numpy_airmasses, axis=0).tolist() @@ -503,7 +505,7 @@ def cache_airmasses_within_kernel_windows(self, kernel_intervals_for_resources, airmasses = [AIRMASS_WEIGHTING_COEFFICIENT * (1 - (airmass - best_airmass) / (worst_airmass - best_airmass)) for airmass in airmasses] # Now store the airmasses and times in the redis cache airmass_at_times = { - 'airmass': airmasses, + 'airmasses': airmasses, 'times': times } redis_instance.set(cache_key, json.dumps(airmass_at_times)) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index 797b2a6e..3457bac1 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -216,7 +216,7 @@ def _get_airmass_data_for_request(self, request_id): cached_airmass_data[request_id] self.airmass_data_by_request_id[request_id] = cached_airmass_data[request_id] return cached_airmass_data[request_id] - except AttributeError: + except Exception: # the request has not been cached yet, get the data from the portal pass try: @@ -311,6 +311,8 @@ def binned_tac_priority_metrics(self, input_reservations=None, schedule=None): sched_durations = self.get_scheduled_durations(schedule) all_durations = [res.duration for res in input_reservations] request_groups = self.scheduler_runner.normal_scheduler_input.request_groups + if self.scheduler_runner.rr_scheduler_input: + request_groups.extend(self.scheduler_runner.rr_scheduler_input.request_groups) priority_values_by_rg_id = {rg.id: rg.proposal.tac_priority for rg in request_groups} all_priority_values = list(priority_values_by_rg_id.values()) sched_priority_values = [] From 11c9ca5c543b7ab70ceb0680ac45de18a33e0eea Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Thu, 14 Jul 2022 22:36:18 +0000 Subject: [PATCH 077/165] fixed incorrect total count for priorities --- adaptive_scheduler/simulation/metrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index 3457bac1..fefbc820 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -314,7 +314,7 @@ def binned_tac_priority_metrics(self, input_reservations=None, schedule=None): if self.scheduler_runner.rr_scheduler_input: request_groups.extend(self.scheduler_runner.rr_scheduler_input.request_groups) priority_values_by_rg_id = {rg.id: rg.proposal.tac_priority for rg in request_groups} - all_priority_values = list(priority_values_by_rg_id.values()) + all_priority_values = [priority_values_by_rg_id[res.request_group_id] for res in input_reservations] sched_priority_values = [] for reservations in schedule.values(): sched_priority_values.extend([priority_values_by_rg_id[res.request_group_id] From 071f419e02423c7c0ece54473f5d69d596245908 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Thu, 14 Jul 2022 22:36:46 +0000 Subject: [PATCH 078/165] changed airmass weighting to be an environment variable --- adaptive_scheduler/models.py | 2 +- adaptive_scheduler/simulation/orchestrator.py | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/adaptive_scheduler/models.py b/adaptive_scheduler/models.py index 349bc4f3..4c8863bc 100644 --- a/adaptive_scheduler/models.py +++ b/adaptive_scheduler/models.py @@ -49,7 +49,7 @@ socket_timeout=30) -AIRMASS_WEIGHTING_COEFFICIENT = 0.1 +AIRMASS_WEIGHTING_COEFFICIENT = os.getenv("SIMULATION_AIRMASS_COEFFICIENT", 0.1) def n_requests(request_groups): diff --git a/adaptive_scheduler/simulation/orchestrator.py b/adaptive_scheduler/simulation/orchestrator.py index 460de0e2..a819053b 100644 --- a/adaptive_scheduler/simulation/orchestrator.py +++ b/adaptive_scheduler/simulation/orchestrator.py @@ -40,6 +40,7 @@ START_TIME = parse(os.getenv("SIMULATION_START_TIME", "2022-06-23")) END_TIME = parse(os.getenv("SIMULATION_END_TIME", "2022-07-07")) TIME_STEP = float(os.getenv("SIMULATION_TIME_STEP_MINUTES", "60")) +AIRMASS_WEIGHTING_COEFFICIENT = os.getenv("SIMULATION_AIRMASS_COEFFICIENT", 0.1) def setup_logging(): @@ -61,16 +62,16 @@ def setup_input(current_time): # source based on the current timestamp of the scheduling run. For configdb, this involves playing the records # backwards until the time is reached. For the observation portal, it involves pulling over all requests # created and PENDING at a certain point in time for the semester, which should be doable by looking at the created - # and modified timestamps and state. + # and modified timestamps and state. log.info(f"Placeholder for setting up input for time {current_time.isoformat}") pass def increment_input(current_time, time_step): - # This will eventually call endpoints in configdb and the observation portal to increment the state of them forward + # This will eventually call endpoints in configdb and the observation portal to increment the state of them forward # by the time step specified. Incrementing time forward is slightly different then the initial setup of a starting time. # This will be called as you step forward in time to make sure these data sources contain the right input data. - # For configdb, this involves moving the records back forwards a bit. For the observation portal, it involves pulling + # For configdb, this involves moving the records back forwards a bit. For the observation portal, it involves pulling # down newer requests as well as cleaning up the state of old ones between time steps (completing/expiring as appropriate). # This also means that we should complete and fail the right percentages of observations that should have ended within the last # time_step, and set ones that are in progress to ATTEMPTED state. @@ -110,6 +111,7 @@ def record_metrics(normal_scheduler_result, rr_scheduler_result, scheduler, sche 'kernel': sched_params.kernel, 'mip_gap': sched_params.mip_gap, 'record_time': datetime.utcnow().isoformat(), + 'airmass_weighting_coefficient': AIRMASS_WEIGHTING_COEFFICIENT, 'total_effective_priority': metrics.total_scheduled_eff_priority()[0], 'total_scheduled_count': metrics.count_scheduled()[0], From 448ed767672b742101d322f2f30715bea19e16c8 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Thu, 14 Jul 2022 23:24:08 +0000 Subject: [PATCH 079/165] fixed type error in airmass weighting coefficient --- adaptive_scheduler/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adaptive_scheduler/models.py b/adaptive_scheduler/models.py index 4c8863bc..5f1e46e3 100644 --- a/adaptive_scheduler/models.py +++ b/adaptive_scheduler/models.py @@ -502,7 +502,7 @@ def cache_airmasses_within_kernel_windows(self, kernel_intervals_for_resources, best_airmass = min(airmasses) worst_airmass = max(airmasses) # This should give us something ranging from 0 to AIRMASS_WEIGHTING_COEFFICIENT to add to the effective priority - airmasses = [AIRMASS_WEIGHTING_COEFFICIENT * (1 - (airmass - best_airmass) / (worst_airmass - best_airmass)) for airmass in airmasses] + airmasses = [float(AIRMASS_WEIGHTING_COEFFICIENT) * (1 - (airmass - best_airmass) / (worst_airmass - best_airmass)) for airmass in airmasses] # Now store the airmasses and times in the redis cache airmass_at_times = { 'airmasses': airmasses, From 03fafbd7f2a01d50dd75b73faabd8c82c6c82c7c Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Mon, 18 Jul 2022 17:07:26 +0000 Subject: [PATCH 080/165] plotting functions for airmass coefficient data --- .../simulation/plot_airmass_coeff.py | 159 ++++++++++++++++++ 1 file changed, 159 insertions(+) create mode 100644 adaptive_scheduler/simulation/plot_airmass_coeff.py diff --git a/adaptive_scheduler/simulation/plot_airmass_coeff.py b/adaptive_scheduler/simulation/plot_airmass_coeff.py new file mode 100644 index 00000000..4b345c0e --- /dev/null +++ b/adaptive_scheduler/simulation/plot_airmass_coeff.py @@ -0,0 +1,159 @@ +import numpy as np +import matplotlib.pyplot as plt +from opensearchpy import OpenSearch + +OPENSEARCH_URL = 'https://logs.lco.global/' +AIRMASS_TEST_VALUES = [0, 0.01, 0.05, 0.1, 1, 10, 100, 1000, 1000000] +USE_1m_ONLY = True + +client = OpenSearch(OPENSEARCH_URL) +control_id = ('test-real-airmass-coeff-default-1-1m0_2022-07-18T16:56:27.411946' if USE_1m_ONLY + else 'simulation-real-prefer-earliest-1_2022-07-15T23:56:48.471472') +control = client.get('scheduler-simulations', control_id) +colors = ['deeppink', + 'forestgreen', + 'limegreen', + 'mediumseagreen', + 'mediumturquoise', + 'royalblue', + 'slateblue', + 'darkorchid', + 'indigo', + 'navy'] +labels = ['earliest'] +labels.extend(AIRMASS_TEST_VALUES) +search_suffix = '1m0' if USE_1m_ONLY else '' + + +def get_airmass_data_from_opensearch(coeff): + query = {'query': { + 'wildcard': {'simulation_id.keyword': f'*-real-airmass-coeff-{coeff}-1-{search_suffix}'} + } + } + response = client.search(query, 'scheduler-simulations') + try: + result = response['hits']['hits'][0] + except IndexError: + print(f'Found no results for {coeff}') + source_data = result['_source'] + print(f'Got data for {source_data["simulation_id"]}') + airmass_coeff = source_data['airmass_weighting_coefficient'] + airmass_data = source_data['airmass_metrics']['raw_airmass_data'] + return airmass_data, airmass_coeff + + +def plot_normed_airmass_histogram(): + fig = plt.figure(figsize=(20, 10)) + fig.suptitle(f'{search_suffix} Normalized Airmass Distributions (midpoint/ideal)', fontsize=20) + fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) + ax = fig.add_subplot() + + control_airmass_data = control['_source']['airmass_metrics']['raw_airmass_data'] + normed = [np.divide(np.array(control_airmass_data[0]['midpoint_airmasses']), + np.array(control_airmass_data[1]['ideal_airmasses']))] + + for value in AIRMASS_TEST_VALUES: + airmass_data, airmass_coeff = get_airmass_data_from_opensearch(value) + midpoint_airmasses = np.array(airmass_data[0]['midpoint_airmasses']) + ideal_airmasses = np.array(airmass_data[1]['ideal_airmasses']) + normed.append(np.divide(midpoint_airmasses, ideal_airmasses)) + ax.hist(normed, bins=30, range=(1, 1.2), label=labels, color=colors, alpha=0.8) + ax.set_xlabel('Airmass Ratio (midpoint/ideal)') + ax.set_ylabel('Count') + fig.legend() + plt.show() + + +def plot_midpoint_airmass_histogram(): + fig = plt.figure(figsize=(16, 16)) + fig.suptitle(f'{search_suffix} Midpoint Airmass Distributions', fontsize=20) + fig.subplots_adjust(wspace=0.3, hspace=0.3, top=0.92) + for i, value in enumerate(AIRMASS_TEST_VALUES): + ax = fig.add_subplot(3, 3, i+1) + airmass_data, airmass_coeff = get_airmass_data_from_opensearch(value) + midpoint_airmasses = airmass_data[0]['midpoint_airmasses'] + ax.hist(midpoint_airmasses, bins=50) + ax.set_title(f'Airmass Coefficient: {airmass_coeff}') + ax.set_xlabel('Midpoint Airmass') + ax.set_ylabel('Count') + plt.show() + + +def plot_barplot(ax, data, colors, labels, binnames): + # data is a list of lists + ticks = np.arange(len(data[0])) + barwidth = 0.05 + for i, datavalues in enumerate(data): + ax.bar(ticks+barwidth*i, datavalues, barwidth, color=colors[i], label=labels[i], alpha=0.8) + ax.set_xticks(ticks+barwidth*i/2, binnames) + + +def get_priority_data_from_opensearch(coeff): + query = {'query': { + 'wildcard': {'simulation_id.keyword': f'*-real-airmass-coeff-{coeff}-1-{search_suffix}'} + } + } + response = client.search(query, 'scheduler-simulations') + try: + result = response['hits']['hits'][0] + except IndexError: + print(f'Found no results for {coeff}') + source_data = result['_source'] + print(f'Got data for {source_data["simulation_id"]}') + airmass_coeff = source_data['airmass_weighting_coefficient'] + pct_scheduled = source_data['percent_sched_by_priority'][0] + pct_duration = source_data['percent_duration_by_priority'][0] + return pct_scheduled, pct_duration, airmass_coeff + + +def plot_pct_scheduled_bins(): + fig = plt.figure(figsize=(20, 10)) + fig.suptitle(f'{search_suffix} Percentage of Requests Scheduled', fontsize=20) + fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) + ax = fig.add_subplot() + + bardata = [] + control_prio_data = control['_source']['percent_sched_by_priority'][0] + priorities = list(control_prio_data.keys()) + percentages = list(control_prio_data.values()) + bardata.append(percentages) + + for value in AIRMASS_TEST_VALUES: + priority_data, _, _ = get_priority_data_from_opensearch(value) + bardata.append(list(priority_data.values())) + + plot_barplot(ax, bardata, colors, labels, priorities) + ax.set_xlabel('Priority') + ax.set_ylabel('Percent Count') + fig.legend() + plt.show() + + +def plot_pct_duration_bins(): + fig = plt.figure(figsize=(20, 10)) + fig.suptitle(f'{search_suffix} Percentage Duration of Requests Scheduled', fontsize=20) + fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) + ax = fig.add_subplot() + + bardata = [] + control_prio_data = control['_source']['percent_duration_by_priority'][0] + priorities = list(control_prio_data.keys()) + percentages = list(control_prio_data.values()) + bardata.append(percentages) + + for value in AIRMASS_TEST_VALUES: + _, priority_data, _ = get_priority_data_from_opensearch(value) + bardata.append(list(priority_data.values())) + + plot_barplot(ax, bardata, colors, labels, priorities) + ax.set_xlabel('Priority') + ax.set_ylabel('Percent Duration') + fig.legend() + plt.show() + + +if __name__ == '__main__': + plot_midpoint_airmass_histogram() + plot_normed_airmass_histogram() + plot_pct_scheduled_bins() + plot_pct_duration_bins() From b177bb10baf6eed83e266dacd8895bb392c96b8e Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Mon, 18 Jul 2022 13:45:20 -0700 Subject: [PATCH 081/165] remove emacs autosave --- tests/airmass_data.json~ | 51 ---------------------------------------- 1 file changed, 51 deletions(-) delete mode 100644 tests/airmass_data.json~ diff --git a/tests/airmass_data.json~ b/tests/airmass_data.json~ deleted file mode 100644 index 2d690fc4..00000000 --- a/tests/airmass_data.json~ +++ /dev/null @@ -1,51 +0,0 @@ -airmasses = { - "airmass_data": { - "tfn": { - "times": [ - "2022-07-06T00:11", - "2022-07-06T00:21", - "2022-07-06T00:31", - "2022-07-06T00:41", - "2022-07-06T00:51", - "2022-07-06T01:01", - "2022-07-06T01:11", - "2022-07-06T01:21", - "2022-07-06T01:31", - "2022-07-06T01:41", - "2022-07-06T01:51", - "2022-07-06T02:01", - "2022-07-06T02:11", - "2022-07-06T02:21", - "2022-07-06T02:31", - "2022-07-06T02:41", - "2022-07-06T02:51", - "2022-07-06T03:01", - "2022-07-06T03:11", - "2022-07-06T03:21" - ], - "airmasses": [ - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 20 - ] - } - }, - "airmass_limit": 10.1 -} From 5e327cbbc4bfad935ded36fe2d75fb8fa3b5210e Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Mon, 18 Jul 2022 22:44:49 +0000 Subject: [PATCH 082/165] update to include worst airmass data, optimizations to midpoint airmass calculation --- adaptive_scheduler/simulation/metrics.py | 70 ++++++++++--------- .../simulation/plot_airmass_coeff.py | 2 +- tests/airmass_data.json | 18 ++--- tests/airmass_data_2.json | 20 +++--- tests/test_simulator_metrics.py | 20 +++--- 5 files changed, 66 insertions(+), 64 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index fefbc820..6f5c5505 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -1,7 +1,7 @@ """ Metric calculation functions for the scheduler simulator. """ -from email.policy import default +import logging import pickle from datetime import datetime, timedelta from collections import defaultdict @@ -15,6 +15,11 @@ from adaptive_scheduler.models import redis_instance +log = logging.getLogger('adaptive_scheduler') + +DTFORMAT = '%Y-%m-%dT%H:%M' + + def percent_of(x, y): """Returns x/y as a percentage.""" return x / y * 100. @@ -229,15 +234,20 @@ def _get_airmass_data_for_request(self, request_id): except (RequestException, ValueError, Timeout) as e: raise ObservationPortalConnectionError("get_airmass_data failed: {}".format(repr(e))) - def _get_ideal_airmass(self, airmass_data): - """Finds the minimum airmass across all sites.""" - ideal_airmass = 1000 + def _get_minmax_airmass(self, airmass_data, midpoint_duration): + """Finds the minimum and maximum midpoint airmass across all sites.""" + max_airmasses = [] + min_airmasses = [] for site in airmass_data.values(): - ideal_for_site = min(site['airmasses']) - ideal_airmass = min(ideal_airmass, ideal_for_site) - return ideal_airmass - - def _get_midpoint_airmasses_by_site(self, airmass_data, start_time, end_time): + times, airmasses = site.values() + airmasses = np.array(airmasses) + times = np.array([datetime.strptime(time, DTFORMAT) for time in times]) + # site_airmasses = airmasses[(times >= times[0]+midpoint_duration) & (times <= times[-1]-midpoint_duration)] + min_airmasses.append(min(airmasses)) + max_airmasses.append(max(airmasses)) + return min(min_airmasses), max(max_airmasses) + + def _get_midpoint_airmasses_by_site(self, airmass_data, midpoint_time): """"Gets the midpoint airmasses by site for a request. This is done by finding the time closest matching the calculated midpoint of the observation in the observe portal airmass data. @@ -251,26 +261,20 @@ def _get_midpoint_airmasses_by_site(self, airmass_data, start_time, end_time): midpoint airmasses as values. """ midpoint_airmasses = {} - midpoint_time = start_time + (end_time - start_time) / 2 for site, details in airmass_data.items(): details = list(details.values()) - times, airmasses = details[0], details[1] - index = 0 - time_diff = abs((midpoint_time - datetime.strptime(times[0], '%Y-%m-%dT%H:%M')).total_seconds()) - for i, _ in enumerate(times): - temp_time_diff = abs((midpoint_time - datetime.strptime(times[i], '%Y-%m-%dT%H:%M')).total_seconds()) - if temp_time_diff < time_diff: - time_diff = temp_time_diff - index = i - midpoint_airmass = airmasses[index] - midpoint_airmasses[site] = midpoint_airmass + times, airmasses = details + airmasses = np.array(airmasses) + times = np.array([datetime.strptime(time, DTFORMAT) for time in times]) + midpoint_airmasses[site] = airmasses[np.argmin(np.abs(times-midpoint_time))] return midpoint_airmasses def airmass_metrics(self, schedule=None): - """Generat the airmass metrics of all scheduled reservations for a single schedule. + """Generate the airmass metrics of all scheduled reservations for a single schedule. Args: - schedule (scheduler, optional): the schedule we calculate our metricses on. Defaults to None. + schedule (scheduler, optional): the schedule we calculate our metrics on. Uses the schedule stored in + the MetricCalculator instance if nothing is passed. Returns: airmass_metrics (dict): Variety of airmass metrics including raw data, average midpoint airmass, average @@ -280,26 +284,26 @@ def airmass_metrics(self, schedule=None): semester_start = self.scheduler_runner.semester_details['start'] midpoint_airmasses = [] - ideal_airmasses = [] - durations = self.get_scheduled_durations(schedule) + min_airmasses = [] + max_airmasses = [] for reservations in schedule.values(): for reservation in reservations: airmass_data = self._get_airmass_data_for_request(reservation.request.id) start_time = normalised_epoch_to_datetime(reservation.scheduled_start, datetime_to_epoch(semester_start)) - end_time = start_time + timedelta(seconds=reservation.duration) - midpoint_airmasses_by_site = self._get_midpoint_airmasses_by_site(airmass_data, start_time, end_time) + midpoint_duration = timedelta(seconds=reservation.duration/2) + midpoint_time = start_time + midpoint_duration + midpoint_airmasses_by_site = self._get_midpoint_airmasses_by_site(airmass_data, midpoint_time) site = reservation.scheduled_resource[-3:] midpoint_airmasses.append(midpoint_airmasses_by_site[site]) - ideal_airmass = self._get_ideal_airmass(airmass_data) - ideal_airmasses.append(ideal_airmass) + min_airmass, max_airmass = self._get_minmax_airmass(airmass_data, midpoint_duration) + min_airmasses.append(min_airmass) + max_airmasses.append(max_airmass) airmass_metrics = {'raw_airmass_data': [{'midpoint_airmasses': midpoint_airmasses}, - {'ideal_airmasses': ideal_airmasses}, - {'durations': durations}], + {'min_poss_airmasses': min_airmasses}, + {'max_poss_airmasses': max_airmasses}], 'avg_midpoint_airmass': sum(midpoint_airmasses)/len(midpoint_airmasses), - 'avg_ideal_airmass': sum(ideal_airmasses)/len(ideal_airmasses), - 'ci_midpoint_airmass': [[np.percentile(midpoint_airmasses, 2.5), - np.percentile(midpoint_airmasses, 97.5)]], + 'avg_min_poss_airmass': sum(min_airmasses)/len(min_airmasses), } return airmass_metrics diff --git a/adaptive_scheduler/simulation/plot_airmass_coeff.py b/adaptive_scheduler/simulation/plot_airmass_coeff.py index 4b345c0e..79dc99dd 100644 --- a/adaptive_scheduler/simulation/plot_airmass_coeff.py +++ b/adaptive_scheduler/simulation/plot_airmass_coeff.py @@ -131,7 +131,7 @@ def plot_pct_scheduled_bins(): def plot_pct_duration_bins(): fig = plt.figure(figsize=(20, 10)) - fig.suptitle(f'{search_suffix} Percentage Duration of Requests Scheduled', fontsize=20) + fig.suptitle(f'{search_suffix} Percentage Requested Time Scheduled', fontsize=20) fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) ax = fig.add_subplot() diff --git a/tests/airmass_data.json b/tests/airmass_data.json index dc77fb3a..8ea4d068 100644 --- a/tests/airmass_data.json +++ b/tests/airmass_data.json @@ -47,24 +47,24 @@ }, "egg": { "times": [ + "2022-07-06T00:31", + "2022-07-06T00:41", + "2022-07-06T00:51", + "2022-07-06T01:01", + "2022-07-06T01:11", + "2022-07-06T01:21", "2022-07-06T01:31", - "2022-07-06T01:41", - "2022-07-06T01:51", - "2022-07-06T02:01", - "2022-07-06T02:11", - "2022-07-06T02:21", - "2022-07-06T02:31", - "2022-07-06T02:41" + "2022-07-06T01:41" ], "airmasses": [ + 2, 3, 4, 5, 6, 7, 8, - 9, - 10 + 9 ] } } diff --git a/tests/airmass_data_2.json b/tests/airmass_data_2.json index d75ac7cc..7751ef53 100644 --- a/tests/airmass_data_2.json +++ b/tests/airmass_data_2.json @@ -1,24 +1,24 @@ { "egg": { "times": [ + "2022-07-06T00:31", + "2022-07-06T00:41", + "2022-07-06T00:51", + "2022-07-06T01:01", + "2022-07-06T01:11", + "2022-07-06T01:21", "2022-07-06T01:31", - "2022-07-06T01:41", - "2022-07-06T01:51", - "2022-07-06T02:01", - "2022-07-06T02:11", - "2022-07-06T02:21", - "2022-07-06T02:31", - "2022-07-06T02:41" + "2022-07-06T01:41" ], "airmasses": [ + 1, + 2, 3, 4, 5, 6, 7, - 8, - 9, - 10 + 8 ] } } diff --git a/tests/test_simulator_metrics.py b/tests/test_simulator_metrics.py index 007c64e9..6c23e93c 100644 --- a/tests/test_simulator_metrics.py +++ b/tests/test_simulator_metrics.py @@ -128,23 +128,21 @@ def test_airmass_functions(self): self.metrics._get_airmass_data_for_request = Mock(side_effect=[airmass_data_1, airmass_data_2]) request_1 = Mock(id=1) mock_reservation_1 = Mock(scheduled_start=0, scheduled_resource='1m0a.doma.tfn', - request=request_1, duration=5400) + request=request_1, duration=2400) request_2 = Mock(id=2) mock_reservation_2 = Mock(scheduled_start=0, scheduled_resource='1m0a.doma.egg', - request=request_2, duration=5400) + request=request_2, duration=2400) scheduled_reservations = [mock_reservation_1, mock_reservation_2] schedule = {'reservations': scheduled_reservations} + midpoint_time = self.start + timedelta(seconds=mock_reservation_1.duration/2) + midpoint_duration = timedelta(seconds=mock_reservation_1.duration/2) - assert self.metrics._get_midpoint_airmasses_by_site(airmass_data_1, self.start, self.end) == {'tfn': 7, 'egg': 3} - assert self.metrics._get_ideal_airmass(airmass_data_1) == 1 + assert self.metrics._get_midpoint_airmasses_by_site(airmass_data_1, midpoint_time) == {'tfn': 5, 'egg': 4} + assert self.metrics._get_minmax_airmass(airmass_data_1, midpoint_duration) == (1, 20) airmass_metrics = self.metrics.airmass_metrics(schedule) - midpoint_airmasses = [7, 3] + midpoint_airmasses = [5, 3] assert type(airmass_metrics) is dict - assert list(airmass_metrics.keys()) == ['raw_airmass_data', 'avg_midpoint_airmass', - 'avg_ideal_airmass', 'ci_midpoint_airmass'] - assert airmass_metrics['avg_midpoint_airmass'] == 5 - assert airmass_metrics['avg_ideal_airmass'] == 2 + assert airmass_metrics['avg_midpoint_airmass'] == 4 + assert airmass_metrics['avg_min_poss_airmass'] == 1 assert airmass_metrics['raw_airmass_data'][0]['midpoint_airmasses'] == midpoint_airmasses - assert airmass_metrics['ci_midpoint_airmass'] == [[np.percentile(midpoint_airmasses, 2.5), - np.percentile(midpoint_airmasses, 97.5)]] From f7f3baf8fe6a63204ee9a39c8bf80bc5d9ad12a4 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Tue, 19 Jul 2022 17:08:53 +0000 Subject: [PATCH 083/165] implemented image export and cleaned up chart labels --- .../simulation/plot_airmass_coeff.py | 76 +++++++++++++------ 1 file changed, 51 insertions(+), 25 deletions(-) diff --git a/adaptive_scheduler/simulation/plot_airmass_coeff.py b/adaptive_scheduler/simulation/plot_airmass_coeff.py index 79dc99dd..ee8de19d 100644 --- a/adaptive_scheduler/simulation/plot_airmass_coeff.py +++ b/adaptive_scheduler/simulation/plot_airmass_coeff.py @@ -1,14 +1,18 @@ +import os +from datetime import datetime + import numpy as np import matplotlib.pyplot as plt from opensearchpy import OpenSearch +EXPORT_DIR = 'adaptive_scheduler/simulation/plot_output' +EXPORT_FORMATS = ['jpg', 'pdf'] OPENSEARCH_URL = 'https://logs.lco.global/' AIRMASS_TEST_VALUES = [0, 0.01, 0.05, 0.1, 1, 10, 100, 1000, 1000000] -USE_1m_ONLY = True client = OpenSearch(OPENSEARCH_URL) -control_id = ('test-real-airmass-coeff-default-1-1m0_2022-07-18T16:56:27.411946' if USE_1m_ONLY - else 'simulation-real-prefer-earliest-1_2022-07-15T23:56:48.471472') +control_id = '1m0-simulation-real-airmass-control-1_2022-07-18T23:59:44.770684' + control = client.get('scheduler-simulations', control_id) colors = ['deeppink', 'forestgreen', @@ -20,14 +24,14 @@ 'darkorchid', 'indigo', 'navy'] -labels = ['earliest'] +labels = ['prioritize early'] labels.extend(AIRMASS_TEST_VALUES) -search_suffix = '1m0' if USE_1m_ONLY else '' +runtime = datetime.utcnow().isoformat(timespec='seconds') def get_airmass_data_from_opensearch(coeff): query = {'query': { - 'wildcard': {'simulation_id.keyword': f'*-real-airmass-coeff-{coeff}-1-{search_suffix}'} + 'wildcard': {'simulation_id.keyword': f'1m0-simulation-real-airmass-coeff-{coeff}-1'} } } response = client.search(query, 'scheduler-simulations') @@ -44,29 +48,33 @@ def get_airmass_data_from_opensearch(coeff): def plot_normed_airmass_histogram(): fig = plt.figure(figsize=(20, 10)) - fig.suptitle(f'{search_suffix} Normalized Airmass Distributions (midpoint/ideal)', fontsize=20) + fig.suptitle('1m0 Network Normalized Airmass Distributions', fontsize=20) fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) ax = fig.add_subplot() control_airmass_data = control['_source']['airmass_metrics']['raw_airmass_data'] - normed = [np.divide(np.array(control_airmass_data[0]['midpoint_airmasses']), - np.array(control_airmass_data[1]['ideal_airmasses']))] + control_mp = np.array(control_airmass_data[0]['midpoint_airmasses']) + control_min = np.array(control_airmass_data[1]['min_poss_airmasses']) + control_max = np.array(control_airmass_data[2]['max_poss_airmasses']) + normed = [1-(control_mp-control_min)/(control_max-control_min)] for value in AIRMASS_TEST_VALUES: airmass_data, airmass_coeff = get_airmass_data_from_opensearch(value) - midpoint_airmasses = np.array(airmass_data[0]['midpoint_airmasses']) - ideal_airmasses = np.array(airmass_data[1]['ideal_airmasses']) - normed.append(np.divide(midpoint_airmasses, ideal_airmasses)) - ax.hist(normed, bins=30, range=(1, 1.2), label=labels, color=colors, alpha=0.8) - ax.set_xlabel('Airmass Ratio (midpoint/ideal)') + mp = np.array(airmass_data[0]['midpoint_airmasses']) + min_ = np.array(airmass_data[1]['min_poss_airmasses']) + max_ = np.array(airmass_data[2]['max_poss_airmasses']) + normed.append(1-(mp-min_)/(max_-min_)) + ax.hist(normed, bins=10, label=labels, color=colors, alpha=0.8) + ax.set_xlabel('Airmass Score (0 is worse, 1 is closest to ideal)') ax.set_ylabel('Count') - fig.legend() + ax.legend() + export_to_image(f'1m0_normed_airmass_hist_{runtime}', fig) plt.show() def plot_midpoint_airmass_histogram(): fig = plt.figure(figsize=(16, 16)) - fig.suptitle(f'{search_suffix} Midpoint Airmass Distributions', fontsize=20) + fig.suptitle('1m0 Network Midpoint Airmass Distributions', fontsize=20) fig.subplots_adjust(wspace=0.3, hspace=0.3, top=0.92) for i, value in enumerate(AIRMASS_TEST_VALUES): ax = fig.add_subplot(3, 3, i+1) @@ -76,6 +84,7 @@ def plot_midpoint_airmass_histogram(): ax.set_title(f'Airmass Coefficient: {airmass_coeff}') ax.set_xlabel('Midpoint Airmass') ax.set_ylabel('Count') + export_to_image(f'1m0_midpoint_airmass_hist_{runtime}', fig) plt.show() @@ -90,7 +99,7 @@ def plot_barplot(ax, data, colors, labels, binnames): def get_priority_data_from_opensearch(coeff): query = {'query': { - 'wildcard': {'simulation_id.keyword': f'*-real-airmass-coeff-{coeff}-1-{search_suffix}'} + 'wildcard': {'simulation_id.keyword': f'1m0-simulation-real-airmass-coeff-{coeff}-1'} } } response = client.search(query, 'scheduler-simulations') @@ -108,7 +117,7 @@ def get_priority_data_from_opensearch(coeff): def plot_pct_scheduled_bins(): fig = plt.figure(figsize=(20, 10)) - fig.suptitle(f'{search_suffix} Percentage of Requests Scheduled', fontsize=20) + fig.suptitle('1m0 Network Percentage of Requests Scheduled', fontsize=20) fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) ax = fig.add_subplot() @@ -124,14 +133,15 @@ def plot_pct_scheduled_bins(): plot_barplot(ax, bardata, colors, labels, priorities) ax.set_xlabel('Priority') - ax.set_ylabel('Percent Count') - fig.legend() + ax.set_ylabel('Percent of Requests Scheduled') + ax.legend() + export_to_image(f'1m0_pct_count_scheduled_{runtime}', fig) plt.show() -def plot_pct_duration_bins(): +def plot_pct_time_scheduled_bins(): fig = plt.figure(figsize=(20, 10)) - fig.suptitle(f'{search_suffix} Percentage Requested Time Scheduled', fontsize=20) + fig.suptitle('1m0 Network Percentage Requested Time Scheduled', fontsize=20) fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) ax = fig.add_subplot() @@ -147,13 +157,29 @@ def plot_pct_duration_bins(): plot_barplot(ax, bardata, colors, labels, priorities) ax.set_xlabel('Priority') - ax.set_ylabel('Percent Duration') - fig.legend() + ax.set_ylabel('Percent Time Scheduled') + ax.legend() + export_to_image(f'1m0_pct_time_scheduled_{runtime}', fig) plt.show() +def export_to_image(fname, fig): + """Takes a Figure object and saves the figure. If the output + directory doesn't already exist, creates one for the user. + """ + try: + os.mkdir(EXPORT_DIR) + print(f'Directory "{EXPORT_DIR}" created') + except FileExistsError: + pass + for imgformat in EXPORT_FORMATS: + fpath = os.path.join(EXPORT_DIR, f'{fname}.{imgformat}') + fig.savefig(fpath, format=imgformat) + print(f'Plot exported to {fpath}') + + if __name__ == '__main__': plot_midpoint_airmass_histogram() plot_normed_airmass_histogram() plot_pct_scheduled_bins() - plot_pct_duration_bins() + plot_pct_time_scheduled_bins() From 3005922d576f1b710e9ace44d1294497dbb1a779 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Tue, 19 Jul 2022 18:16:31 +0000 Subject: [PATCH 084/165] moved helper functions to plotutils --- .../simulation/plot_airmass_coeff.py | 97 ++++--------------- adaptive_scheduler/simulation/plotutils.py | 85 ++++++++++++++++ 2 files changed, 105 insertions(+), 77 deletions(-) create mode 100644 adaptive_scheduler/simulation/plotutils.py diff --git a/adaptive_scheduler/simulation/plot_airmass_coeff.py b/adaptive_scheduler/simulation/plot_airmass_coeff.py index ee8de19d..ac0cf20e 100644 --- a/adaptive_scheduler/simulation/plot_airmass_coeff.py +++ b/adaptive_scheduler/simulation/plot_airmass_coeff.py @@ -1,46 +1,23 @@ -import os from datetime import datetime import numpy as np import matplotlib.pyplot as plt -from opensearchpy import OpenSearch -EXPORT_DIR = 'adaptive_scheduler/simulation/plot_output' -EXPORT_FORMATS = ['jpg', 'pdf'] -OPENSEARCH_URL = 'https://logs.lco.global/' +import adaptive_scheduler.simulation.plotutils as plotutils +from adaptive_scheduler.simulation.plotutils import opensearch_client, default_colors + AIRMASS_TEST_VALUES = [0, 0.01, 0.05, 0.1, 1, 10, 100, 1000, 1000000] -client = OpenSearch(OPENSEARCH_URL) control_id = '1m0-simulation-real-airmass-control-1_2022-07-18T23:59:44.770684' - -control = client.get('scheduler-simulations', control_id) -colors = ['deeppink', - 'forestgreen', - 'limegreen', - 'mediumseagreen', - 'mediumturquoise', - 'royalblue', - 'slateblue', - 'darkorchid', - 'indigo', - 'navy'] +control = opensearch_client.get('scheduler-simulations', control_id) labels = ['prioritize early'] labels.extend(AIRMASS_TEST_VALUES) -runtime = datetime.utcnow().isoformat(timespec='seconds') +timestamp = datetime.utcnow().isoformat(timespec='seconds') def get_airmass_data_from_opensearch(coeff): - query = {'query': { - 'wildcard': {'simulation_id.keyword': f'1m0-simulation-real-airmass-coeff-{coeff}-1'} - } - } - response = client.search(query, 'scheduler-simulations') - try: - result = response['hits']['hits'][0] - except IndexError: - print(f'Found no results for {coeff}') - source_data = result['_source'] - print(f'Got data for {source_data["simulation_id"]}') + query = f'1m0-simulation-real-airmass-coeff-{coeff}-1' + source_data = plotutils.get_data_from_opensearch(query) airmass_coeff = source_data['airmass_weighting_coefficient'] airmass_data = source_data['airmass_metrics']['raw_airmass_data'] return airmass_data, airmass_coeff @@ -64,11 +41,11 @@ def plot_normed_airmass_histogram(): min_ = np.array(airmass_data[1]['min_poss_airmasses']) max_ = np.array(airmass_data[2]['max_poss_airmasses']) normed.append(1-(mp-min_)/(max_-min_)) - ax.hist(normed, bins=10, label=labels, color=colors, alpha=0.8) - ax.set_xlabel('Airmass Score (0 is worse, 1 is closest to ideal)') + ax.hist(normed, bins=10, label=labels, color=default_colors, alpha=0.8) + ax.set_xlabel('Airmass Score (0 is worst, 1 is closest to ideal)') ax.set_ylabel('Count') ax.legend() - export_to_image(f'1m0_normed_airmass_hist_{runtime}', fig) + plotutils.export_to_image(f'1m0_normed_airmass_hist_{timestamp}', fig) plt.show() @@ -84,35 +61,16 @@ def plot_midpoint_airmass_histogram(): ax.set_title(f'Airmass Coefficient: {airmass_coeff}') ax.set_xlabel('Midpoint Airmass') ax.set_ylabel('Count') - export_to_image(f'1m0_midpoint_airmass_hist_{runtime}', fig) + plotutils.export_to_image(f'1m0_midpoint_airmass_hist_{timestamp}', fig) plt.show() -def plot_barplot(ax, data, colors, labels, binnames): - # data is a list of lists - ticks = np.arange(len(data[0])) - barwidth = 0.05 - for i, datavalues in enumerate(data): - ax.bar(ticks+barwidth*i, datavalues, barwidth, color=colors[i], label=labels[i], alpha=0.8) - ax.set_xticks(ticks+barwidth*i/2, binnames) - - def get_priority_data_from_opensearch(coeff): - query = {'query': { - 'wildcard': {'simulation_id.keyword': f'1m0-simulation-real-airmass-coeff-{coeff}-1'} - } - } - response = client.search(query, 'scheduler-simulations') - try: - result = response['hits']['hits'][0] - except IndexError: - print(f'Found no results for {coeff}') - source_data = result['_source'] - print(f'Got data for {source_data["simulation_id"]}') - airmass_coeff = source_data['airmass_weighting_coefficient'] + query = f'1m0-simulation-real-airmass-coeff-{coeff}-1' + source_data = plotutils.get_data_from_opensearch(query) pct_scheduled = source_data['percent_sched_by_priority'][0] pct_duration = source_data['percent_duration_by_priority'][0] - return pct_scheduled, pct_duration, airmass_coeff + return pct_scheduled, pct_duration def plot_pct_scheduled_bins(): @@ -128,14 +86,14 @@ def plot_pct_scheduled_bins(): bardata.append(percentages) for value in AIRMASS_TEST_VALUES: - priority_data, _, _ = get_priority_data_from_opensearch(value) + priority_data, _ = get_priority_data_from_opensearch(value) bardata.append(list(priority_data.values())) - plot_barplot(ax, bardata, colors, labels, priorities) + plotutils.plot_barplot(ax, bardata, default_colors, labels, priorities) ax.set_xlabel('Priority') ax.set_ylabel('Percent of Requests Scheduled') ax.legend() - export_to_image(f'1m0_pct_count_scheduled_{runtime}', fig) + plotutils.export_to_image(f'1m0_pct_count_scheduled_{timestamp}', fig) plt.show() @@ -152,32 +110,17 @@ def plot_pct_time_scheduled_bins(): bardata.append(percentages) for value in AIRMASS_TEST_VALUES: - _, priority_data, _ = get_priority_data_from_opensearch(value) + _, priority_data = get_priority_data_from_opensearch(value) bardata.append(list(priority_data.values())) - plot_barplot(ax, bardata, colors, labels, priorities) + plotutils.plot_barplot(ax, bardata, default_colors, labels, priorities) ax.set_xlabel('Priority') ax.set_ylabel('Percent Time Scheduled') ax.legend() - export_to_image(f'1m0_pct_time_scheduled_{runtime}', fig) + plotutils.export_to_image(f'1m0_pct_time_scheduled_{timestamp}', fig) plt.show() -def export_to_image(fname, fig): - """Takes a Figure object and saves the figure. If the output - directory doesn't already exist, creates one for the user. - """ - try: - os.mkdir(EXPORT_DIR) - print(f'Directory "{EXPORT_DIR}" created') - except FileExistsError: - pass - for imgformat in EXPORT_FORMATS: - fpath = os.path.join(EXPORT_DIR, f'{fname}.{imgformat}') - fig.savefig(fpath, format=imgformat) - print(f'Plot exported to {fpath}') - - if __name__ == '__main__': plot_midpoint_airmass_histogram() plot_normed_airmass_histogram() diff --git a/adaptive_scheduler/simulation/plotutils.py b/adaptive_scheduler/simulation/plotutils.py new file mode 100644 index 00000000..1d44f0a9 --- /dev/null +++ b/adaptive_scheduler/simulation/plotutils.py @@ -0,0 +1,85 @@ +import os + +import numpy as np +from opensearchpy import OpenSearch + +PLOTEXPORT_DIR = os.getenv('PLOTEXPORT_DIR', 'adaptive_scheduler/simulation/plot_output') +PLOTEXPORT_FORMATS = ['jpg', 'pdf'] + +OPENSEARCH_URL = os.getenv('OPENSEARCH_URL', 'https://logs.lco.global/') +OPENSEARCH_INDEX = os.getenv('OPENSEARCH_INDEX', 'scheduler-simulations') +opensearch_client = OpenSearch(OPENSEARCH_URL) + +default_colors = ['deeppink', + 'forestgreen', + 'limegreen', + 'mediumseagreen', + 'mediumturquoise', + 'royalblue', + 'slateblue', + 'darkorchid', + 'indigo', + 'navy'] + + +def export_to_image(fname, fig): + """Takes a matplotlib Figure object and saves the figure. If the output + directory doesn't already exist, creates one for the user. + + Args: + fname (str): The filename to save the file as. + fig (matplotlib.pyplot.Figure): The figure to save, typically created by + calling subplots(). + """ + try: + os.mkdir(PLOTEXPORT_DIR) + print(f'Directory "{PLOTEXPORT_DIR}" created') + except FileExistsError: + pass + for imgformat in PLOTEXPORT_FORMATS: + fpath = os.path.join(PLOTEXPORT_DIR, f'{fname}.{imgformat}') + fig.savefig(fpath, format=imgformat) + print(f'Plot exported to {fpath}') + + +def plot_barplot(ax, data, colors, labels, binnames, barwidth=0.04): + """Generates a barplot for multiple datasets. + + Args: + ax (matplotlib.pyplot.Axes): An Axes object to modify. + data: A list of lists. Each sub-list contains the y-axis data for a dataset. + colors: The list of colors to use for each dataset. Must contain enough colors + to cover all datasets. + labels: The list of labels to associate with each dataset. Must contain a label for each dataset. + binnames: A list of names of the bins for marking the x-axis. + barwidth (float): The width of each bar. + """ + ticks = np.arange(len(data[0])) + for i, datavalues in enumerate(data): + ax.bar(ticks+barwidth*i, datavalues, barwidth, color=colors[i], label=labels[i], alpha=0.8) + ax.set_xticks(ticks+barwidth*i/2, binnames) + + +def get_data_from_opensearch(query): + """Searches OpenSearch for a matching query (wildcards allowed) and returns the source data. + + Args: + query (str): The search query to look for. + + Returns: + source_data (dict): A dictionary of the data returned from OpenSearch. + None: Returns None if there are no results. + """ + source_data = None + query = {'query': { + 'wildcard': {'simulation_id.keyword': query} + } + } + response = opensearch_client.search(query, OPENSEARCH_INDEX) + try: + result = response['hits']['hits'][0] + source_data = result['_source'] + print(f'Got data for id: {source_data["simulation_id"]}') + except IndexError: + print(f'Found no results for {query}') + return source_data From b71e88ca9d7dab42624f79b664d463ef8eb0682b Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Tue, 19 Jul 2022 14:27:41 -0700 Subject: [PATCH 085/165] add plot effective priority --- .../simulation/plot_effective_priority.py | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 adaptive_scheduler/simulation/plot_effective_priority.py diff --git a/adaptive_scheduler/simulation/plot_effective_priority.py b/adaptive_scheduler/simulation/plot_effective_priority.py new file mode 100644 index 00000000..ae54a760 --- /dev/null +++ b/adaptive_scheduler/simulation/plot_effective_priority.py @@ -0,0 +1,28 @@ +import numpy as np +import matplotlib.pyplot as plt +from opensearchpy import OpenSearch +from plotutils import get_data_from_opensearch, plot_barplot, default_colors + +EFF_PRI_CALC= ['base-only', 'base-scaled-60', 'base-scaled-3600', 'base-duration'] + + +def plot_percent_duration_bin(): + fig = plt.figure(figsize=(20, 10)) + fig.suptitle(f'Scheduled requests seconds over total request seconds for different Effective Priority algorithms', fontsize=20) + fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) + ax = fig.add_subplot() + bardata = [] + for id in EFF_PRI_CALC: + priority_data = get_data_from_opensearch(f'*-effective-priority-{id}-1m0')['percent_duration_by_priority'] + priorities = list(priority_data[0].keys()) + bardata.append(list(priority_data[0].values())) + + plot_barplot(ax, bardata, default_colors, EFF_PRI_CALC, priorities) + ax.set_xlabel('Priority') + ax.set_ylabel('Percent Duration') + fig.legend() + plt.show() + + +if __name__ == '__main__': + plot_percent_duration_bin() \ No newline at end of file From c6c6c9da1953aaaba6eb27d08bd7499245e47b93 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Wed, 20 Jul 2022 17:57:28 +0000 Subject: [PATCH 086/165] uniform scaling for histograms --- adaptive_scheduler/simulation/plot_airmass_coeff.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/adaptive_scheduler/simulation/plot_airmass_coeff.py b/adaptive_scheduler/simulation/plot_airmass_coeff.py index ac0cf20e..23d1c551 100644 --- a/adaptive_scheduler/simulation/plot_airmass_coeff.py +++ b/adaptive_scheduler/simulation/plot_airmass_coeff.py @@ -6,7 +6,7 @@ import adaptive_scheduler.simulation.plotutils as plotutils from adaptive_scheduler.simulation.plotutils import opensearch_client, default_colors -AIRMASS_TEST_VALUES = [0, 0.01, 0.05, 0.1, 1, 10, 100, 1000, 1000000] +AIRMASS_TEST_VALUES = [0, 0.01, 0.05, 0.1, 1.0, 10, 100, 1000, 1000000] control_id = '1m0-simulation-real-airmass-control-1_2022-07-18T23:59:44.770684' control = opensearch_client.get('scheduler-simulations', control_id) @@ -61,6 +61,8 @@ def plot_midpoint_airmass_histogram(): ax.set_title(f'Airmass Coefficient: {airmass_coeff}') ax.set_xlabel('Midpoint Airmass') ax.set_ylabel('Count') + ax.set_xlim(1.0, 2.0) + ax.set_ylim(0, 120) plotutils.export_to_image(f'1m0_midpoint_airmass_hist_{timestamp}', fig) plt.show() From 324f3d8e96e4fcc69eaead4999870531b385a834 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Wed, 20 Jul 2022 17:57:54 +0000 Subject: [PATCH 087/165] small documentation updates and optimizations --- adaptive_scheduler/simulation/metrics.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index 6f5c5505..4f86008e 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -35,8 +35,7 @@ def percent_diff(x, y): def generate_bin_names(bin_size, bin_range): """Creates labels for the bins.""" - start = bin_range[0] - end = bin_range[1] + start, end = bin_range bin_names = [] bin_start = np.arange(start, end+1, bin_size) for start_num in bin_start: @@ -58,7 +57,7 @@ def bin_data(bin_by, data=[], bin_size=1, bin_range=None, aggregator=sum): """Bins data to create a histogram. Each bin is half-open, i.e. defined on the interval [a, b) for every bin except for the last bin, which is defined on the interval [a, b]. The naming convention is different for integers and floats. For example, for the label '1-2', this means the discrete values 1 and 2, whereas - for the label '1.0-2.0' this means the values on the interval [1.0, 2.0). + for the label '1.0-2.0' this means the values on the interval [1.0, 2.0). Bins are uniformly spaced. Args: bin_by (list): A list of data to bin by. Can be float or int. @@ -236,16 +235,15 @@ def _get_airmass_data_for_request(self, request_id): def _get_minmax_airmass(self, airmass_data, midpoint_duration): """Finds the minimum and maximum midpoint airmass across all sites.""" - max_airmasses = [] - min_airmasses = [] + max_airmass = 0 + min_airmass = 1000 for site in airmass_data.values(): times, airmasses = site.values() airmasses = np.array(airmasses) times = np.array([datetime.strptime(time, DTFORMAT) for time in times]) - # site_airmasses = airmasses[(times >= times[0]+midpoint_duration) & (times <= times[-1]-midpoint_duration)] - min_airmasses.append(min(airmasses)) - max_airmasses.append(max(airmasses)) - return min(min_airmasses), max(max_airmasses) + min_airmass = min(airmasses, min_airmass) + max_airmass = max(airmasses, max_airmass) + return min_airmass, max_airmass def _get_midpoint_airmasses_by_site(self, airmass_data, midpoint_time): """"Gets the midpoint airmasses by site for a request. This is done by finding the time @@ -308,7 +306,7 @@ def airmass_metrics(self, schedule=None): return airmass_metrics def binned_tac_priority_metrics(self, input_reservations=None, schedule=None): - """Bins TAC Priority into the following bins: '10-19', '20-29', '30-39', '1000'.""" + """Bins into 10-19, 20-29, and 30 by default, but may be modified with bin_size and bin_range.""" input_reservations = self.combined_input_reservations if input_reservations is None else input_reservations schedule = self.combined_schedule if schedule is None else schedule bin_size = 10 @@ -323,7 +321,6 @@ def binned_tac_priority_metrics(self, input_reservations=None, schedule=None): for reservations in schedule.values(): sched_priority_values.extend([priority_values_by_rg_id[res.request_group_id] for res in reservations]) - sched_histogram = bin_data(sched_priority_values, bin_size=bin_size) bin_sched_durations = bin_data(sched_priority_values, sched_durations, bin_size) full_histogram = bin_data(all_priority_values, bin_size=bin_size) From 572f0f849a678550cfe9f8fc55b1456e9e3e5430 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Wed, 20 Jul 2022 17:58:27 +0000 Subject: [PATCH 088/165] no longer dumps all the metrics to the log --- adaptive_scheduler/simulation/orchestrator.py | 1 - 1 file changed, 1 deletion(-) diff --git a/adaptive_scheduler/simulation/orchestrator.py b/adaptive_scheduler/simulation/orchestrator.py index a819053b..d1cbbdc4 100644 --- a/adaptive_scheduler/simulation/orchestrator.py +++ b/adaptive_scheduler/simulation/orchestrator.py @@ -128,7 +128,6 @@ def record_metrics(normal_scheduler_result, rr_scheduler_result, scheduler, sche 'percent_sched_by_priority': [binned_tac_priority_metrics['percent_count']], 'percent_duration_by_priority': [binned_tac_priority_metrics['percent_duration']], } - log.info(metrics) send_to_opensearch(sched_params.opensearch_url, sched_params.simulation_opensearch_index, metrics) From 730ad09766739a3cc9d54760ee31a492d00ab5f5 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Wed, 20 Jul 2022 17:59:08 +0000 Subject: [PATCH 089/165] changed optimization_type back to default from 'AIRMASS' --- adaptive_scheduler/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adaptive_scheduler/models.py b/adaptive_scheduler/models.py index 5f1e46e3..4b90db3c 100644 --- a/adaptive_scheduler/models.py +++ b/adaptive_scheduler/models.py @@ -439,7 +439,7 @@ def __init__(self, configurations, windows, request_id, state='PENDING', telesco self.telescope_class = telescope_class self.req_duration = duration self.configuration_repeats = configuration_repeats - self.optimization_type = 'AIRMASS' + self.optimization_type = optimization_type self.scheduled_reservation = scheduled_reservation def get_duration(self): From 84b5fe089199dc31abc1769559505e7c401ac4d9 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Wed, 20 Jul 2022 21:06:02 +0000 Subject: [PATCH 090/165] added basic cli interface --- .../simulation/plot_airmass_coeff.py | 51 ++++++++++++++++--- 1 file changed, 43 insertions(+), 8 deletions(-) diff --git a/adaptive_scheduler/simulation/plot_airmass_coeff.py b/adaptive_scheduler/simulation/plot_airmass_coeff.py index 23d1c551..959a09aa 100644 --- a/adaptive_scheduler/simulation/plot_airmass_coeff.py +++ b/adaptive_scheduler/simulation/plot_airmass_coeff.py @@ -1,3 +1,7 @@ +""" +Plotting functions for an airmass optimization experiment. +""" +import argparse from datetime import datetime import numpy as np @@ -45,7 +49,8 @@ def plot_normed_airmass_histogram(): ax.set_xlabel('Airmass Score (0 is worst, 1 is closest to ideal)') ax.set_ylabel('Count') ax.legend() - plotutils.export_to_image(f'1m0_normed_airmass_hist_{timestamp}', fig) + if not displayonly: + plotutils.export_to_image(f'1m0_normed_airmass_hist_{timestamp}', fig) plt.show() @@ -63,7 +68,8 @@ def plot_midpoint_airmass_histogram(): ax.set_ylabel('Count') ax.set_xlim(1.0, 2.0) ax.set_ylim(0, 120) - plotutils.export_to_image(f'1m0_midpoint_airmass_hist_{timestamp}', fig) + if not displayonly: + plotutils.export_to_image(f'1m0_midpoint_airmass_hist_{timestamp}', fig) plt.show() @@ -95,7 +101,8 @@ def plot_pct_scheduled_bins(): ax.set_xlabel('Priority') ax.set_ylabel('Percent of Requests Scheduled') ax.legend() - plotutils.export_to_image(f'1m0_pct_count_scheduled_{timestamp}', fig) + if not displayonly: + plotutils.export_to_image(f'1m0_pct_count_scheduled_{timestamp}', fig) plt.show() @@ -119,12 +126,40 @@ def plot_pct_time_scheduled_bins(): ax.set_xlabel('Priority') ax.set_ylabel('Percent Time Scheduled') ax.legend() - plotutils.export_to_image(f'1m0_pct_time_scheduled_{timestamp}', fig) + if not displayonly: + plotutils.export_to_image(f'1m0_pct_time_scheduled_{timestamp}', fig) plt.show() if __name__ == '__main__': - plot_midpoint_airmass_histogram() - plot_normed_airmass_histogram() - plot_pct_scheduled_bins() - plot_pct_time_scheduled_bins() + plots = { + 'normed_airmass_hist': {'func': plot_normed_airmass_histogram, + 'desc': 'Airmass distribution, normalized so that 0 is worst airmass and 1 is best'}, + 'midpoint_airmass_hist': {'func': plot_midpoint_airmass_histogram, + 'desc': 'Midpoint airmass distributions for different airmass weighting coefficients'}, + 'percent_scheduled_binned': {'func': plot_pct_scheduled_bins, + 'desc': 'Percent of requests scheduled binned by priority level'}, + 'percent_time_scheduled_binned': {'func': plot_pct_time_scheduled_bins, + 'desc': 'Percent of time requested scheduled binned by priority level'}, + } + + description = 'Plotting functions for airmass optimization experiment.' + parser = argparse.ArgumentParser(description=description) + parser.add_argument('plot_name', type=str.lower, nargs='*', + help="The name of the plot(s) to display. `all` can be passed to show all.") + parser.add_argument('-l', '--list', help='List plot info. `-l all` to show all available plots.', action='store_true') + parser.add_argument('-d', '--displayonly', help='Display the plots without exporting them.', action='store_true') + args = parser.parse_args() + global displayonly + displayonly = args.displayonly + + if args.list: + spacing = max([len(name) for name in plots.keys()]) + 4 + print(f'{"NAME":{spacing}}DESCRIPTION') + print(f'{"====":{spacing}}===========') + for name, details in plots.items(): + print(f'{name:{spacing}}{details["desc"]}') + else: + plots_to_show = list(plots.keys()) if args.plot_name == ['all'] else args.plot_name + for plot_name in plots_to_show: + plots[plot_name]['func']() From 7f4486ca07eb7f18ed6c50098fe2262a45f123e3 Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Wed, 20 Jul 2022 14:25:56 -0700 Subject: [PATCH 091/165] fixed ploting effective priority --- adaptive_scheduler/simulation/metrics.py | 2 +- .../simulation/plot_effective_priority.py | 36 +++++++++++++++---- 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index 6f5c5505..4e8b56a3 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -311,7 +311,7 @@ def binned_tac_priority_metrics(self, input_reservations=None, schedule=None): """Bins TAC Priority into the following bins: '10-19', '20-29', '30-39', '1000'.""" input_reservations = self.combined_input_reservations if input_reservations is None else input_reservations schedule = self.combined_schedule if schedule is None else schedule - bin_size = 10 + bin_size = 45 sched_durations = self.get_scheduled_durations(schedule) all_durations = [res.duration for res in input_reservations] request_groups = self.scheduler_runner.normal_scheduler_input.request_groups diff --git a/adaptive_scheduler/simulation/plot_effective_priority.py b/adaptive_scheduler/simulation/plot_effective_priority.py index ae54a760..bfb0aa82 100644 --- a/adaptive_scheduler/simulation/plot_effective_priority.py +++ b/adaptive_scheduler/simulation/plot_effective_priority.py @@ -3,26 +3,48 @@ from opensearchpy import OpenSearch from plotutils import get_data_from_opensearch, plot_barplot, default_colors -EFF_PRI_CALC= ['base-only', 'base-scaled-60', 'base-scaled-3600', 'base-duration'] +EFF_PRI_CALC= ['airmass','airmass-with-duration-scaled-100','airmass-no-duration','airmass-no-duration-scaled-100',] -def plot_percent_duration_bin(): +def plot_percent_sched_duration_bin_by_priority(): fig = plt.figure(figsize=(20, 10)) - fig.suptitle(f'Scheduled requests seconds over total request seconds for different Effective Priority algorithms', fontsize=20) + fig.suptitle(f'1m0 Network Percent Request Time Scheduled binned by Priority', fontsize=20) fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) ax = fig.add_subplot() bardata = [] for id in EFF_PRI_CALC: - priority_data = get_data_from_opensearch(f'*-effective-priority-{id}-1m0')['percent_duration_by_priority'] - priorities = list(priority_data[0].keys()) + priority_data = get_data_from_opensearch(f'1m0-optimize-{id}')['percent_duration_by_priority'] + bardata.append(list(priority_data[0].values())) + priorities = ['low priority', 'mid priority', 'high priority'] plot_barplot(ax, bardata, default_colors, EFF_PRI_CALC, priorities) ax.set_xlabel('Priority') - ax.set_ylabel('Percent Duration') + ax.set_ylabel('Percent Scheduled Time') + fig.legend() + plt.show() + + +def plot_percent_sched_numbers_bin_by_priority(): + fig = plt.figure(figsize=(20, 10)) + fig.suptitle(f'1m0 Network Percent Request Number Scheduled binned by Priority', fontsize=20) + fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) + ax = fig.add_subplot() + bardata = [] + for id in EFF_PRI_CALC: + priority_data = get_data_from_opensearch(f'1m0-optimize-{id}')['percent_sched_by_priority'] + + bardata.append(list(priority_data[0].values())) + + priorities = ['low priority', 'mid priority', 'high priority'] + plot_barplot(ax, bardata, default_colors, EFF_PRI_CALC, priorities) + ax.set_xlabel('Priority') + ax.set_ylabel('Percent Scheduled Count') fig.legend() plt.show() if __name__ == '__main__': - plot_percent_duration_bin() \ No newline at end of file + + plot_percent_sched_duration_bin_by_priority() + plot_percent_sched_numbers_bin_by_priority() \ No newline at end of file From f98b07155e666d72419bd6e57daf96083d90ce59 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Thu, 21 Jul 2022 23:03:35 +0000 Subject: [PATCH 092/165] updated master plotting file, can be turned into example code --- adaptive_scheduler/simulation/plot_all.py | 324 +++++++++++++++++++++ adaptive_scheduler/simulation/plotutils.py | 15 +- 2 files changed, 326 insertions(+), 13 deletions(-) create mode 100644 adaptive_scheduler/simulation/plot_all.py diff --git a/adaptive_scheduler/simulation/plot_all.py b/adaptive_scheduler/simulation/plot_all.py new file mode 100644 index 00000000..204c01c9 --- /dev/null +++ b/adaptive_scheduler/simulation/plot_all.py @@ -0,0 +1,324 @@ +""" +Plotting functions for an airmass optimization experiment. +""" +import argparse +from datetime import datetime + +import numpy as np +import matplotlib.pyplot as plt +import matplotlib.style as style + +import adaptive_scheduler.simulation.plotutils as plotutils +from adaptive_scheduler.simulation.plotutils import opensearch_client + +AIRMASS_TEST_VALUES = [0, 0.01, 0.05, 0.1, 1.0, 10, 100, 1000, 1000000] +EFF_PRI_SCALING_TEST_LABELS = ['airmass', 'airmass-with-duration-scaled-100', + 'airmass-no-duration', 'airmass-no-duration-scaled-100'] + +control_id = '1m0-simulation-real-airmass-control-1_2022-07-18T23:59:44.770684' +control = opensearch_client.get('scheduler-simulations', control_id) +labels = ['prioritize early'] +labels.extend(AIRMASS_TEST_VALUES) +timestamp = datetime.utcnow().isoformat(timespec='seconds') +style.use('tableau-colorblind10') + + +def get_airmass_data_from_opensearch(coeff): + query = f'1m0-simulation-real-airmass-coeff-{coeff}-1' + source_data = plotutils.get_data_from_opensearch(query) + airmass_coeff = source_data['airmass_weighting_coefficient'] + airmass_data = source_data['airmass_metrics']['raw_airmass_data'] + return airmass_data, airmass_coeff + + +def plot_normed_airmass_histogram(): + fig = plt.figure(figsize=(20, 10)) + fig.suptitle('1m0 Network Normalized Airmass Distributions for Different Airmass Coefficients', fontsize=20) + fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) + ax = fig.add_subplot() + + control_airmass_data = control['_source']['airmass_metrics']['raw_airmass_data'] + control_mp = np.array(control_airmass_data[0]['midpoint_airmasses']) + control_min = np.array(control_airmass_data[1]['min_poss_airmasses']) + control_max = np.array(control_airmass_data[2]['max_poss_airmasses']) + normed = [1-(control_mp-control_min)/(control_max-control_min)] + + for value in AIRMASS_TEST_VALUES: + airmass_data, airmass_coeff = get_airmass_data_from_opensearch(value) + mp = np.array(airmass_data[0]['midpoint_airmasses']) + min_ = np.array(airmass_data[1]['min_poss_airmasses']) + max_ = np.array(airmass_data[2]['max_poss_airmasses']) + normed.append(1-(mp-min_)/(max_-min_)) + ax.hist(normed, bins=10, label=labels, alpha=0.8) + ax.set_xlabel('Airmass Score (0 is worst, 1 is closest to ideal)') + ax.set_ylabel('Count') + ax.legend() + if not displayonly: + plotutils.export_to_image(f'1m0_normed_airmass_hist_{timestamp}', fig) + plt.show() + + +def plot_midpoint_airmass_histogram(): + fig = plt.figure(figsize=(16, 16)) + fig.suptitle('1m0 Network Midpoint Airmass Distributions for Different Airmass Coefficients', fontsize=20) + fig.subplots_adjust(wspace=0.3, hspace=0.3, top=0.92) + for i, value in enumerate(AIRMASS_TEST_VALUES): + ax = fig.add_subplot(3, 3, i+1) + airmass_data, airmass_coeff = get_airmass_data_from_opensearch(value) + midpoint_airmasses = airmass_data[0]['midpoint_airmasses'] + ax.hist(midpoint_airmasses, bins=50) + ax.set_title(f'Airmass Coefficient: {airmass_coeff}') + ax.set_xlabel('Midpoint Airmass') + ax.set_ylabel('Count') + ax.set_xlim(1.0, 2.0) + ax.set_ylim(0, 120) + if not displayonly: + plotutils.export_to_image(f'1m0_midpoint_airmass_hist_{timestamp}', fig) + plt.show() + + +def get_priority_data_from_opensearch(coeff): + query = f'1m0-simulation-real-airmass-coeff-{coeff}-1' + source_data = plotutils.get_data_from_opensearch(query) + pct_scheduled = source_data['percent_sched_by_priority'][0] + pct_duration = source_data['percent_duration_by_priority'][0] + return pct_scheduled, pct_duration + + +def plot_pct_count_airmass_prio_bins(): + fig = plt.figure(figsize=(20, 10)) + fig.suptitle('1m0 Percent of Requests Scheduled by Priority Class for Different Airmass Coefficients', fontsize=20) + fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) + ax = fig.add_subplot() + + bardata = [] + control_prio_data = control['_source']['percent_sched_by_priority'][0] + priorities = list(control_prio_data.keys()) + percentages = list(control_prio_data.values()) + bardata.append(percentages) + + for value in AIRMASS_TEST_VALUES: + priority_data, _ = get_priority_data_from_opensearch(value) + bardata.append(list(priority_data.values())) + + plotutils.plot_barplot(ax, bardata, labels, priorities) + ax.set_xlabel('Priority') + ax.set_ylabel('Percent of Requests Scheduled') + ax.set_ylim(0, 100) + ax.legend() + if not displayonly: + plotutils.export_to_image(f'1m0_pct_count_scheduled_airmass_{timestamp}', fig) + plt.show() + + +def plot_pct_time_airmass_prio_bins(): + fig = plt.figure(figsize=(20, 10)) + fig.suptitle('1m0 Percent of Requested Time Scheduled by Priority Class for Different Airmass Coefficients', fontsize=20) + fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) + ax = fig.add_subplot() + + bardata = [] + control_prio_data = control['_source']['percent_duration_by_priority'][0] + priorities = list(control_prio_data.keys()) + percentages = list(control_prio_data.values()) + bardata.append(percentages) + + for value in AIRMASS_TEST_VALUES: + _, priority_data = get_priority_data_from_opensearch(value) + bardata.append(list(priority_data.values())) + + plotutils.plot_barplot(ax, bardata, labels, priorities) + ax.set_xlabel('Priority') + ax.set_ylabel('Percent of Requested Time Scheduled') + ax.set_ylim(0, 100) + ax.legend() + if not displayonly: + plotutils.export_to_image(f'1m0_pct_time_scheduled_airmass_{timestamp}', fig) + plt.show() + + +def plot_pct_time_scaling_prio_bins(): + fig = plt.figure(figsize=(20, 10)) + fig.suptitle('1m0 Percent of Requested Time Scheduled by Priority Class for Different Scaling Options', fontsize=20) + fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) + ax = fig.add_subplot() + + bardata = [] + for label in EFF_PRI_SCALING_TEST_LABELS: + priority_data = plotutils.get_data_from_opensearch(f'1m0-optimize-{label}')['percent_duration_by_priority'] + bardata.append(list(priority_data[0].values())) + + priorities = ['low priority', 'mid priority', 'high priority'] + plotutils.plot_barplot(ax, bardata, EFF_PRI_SCALING_TEST_LABELS, priorities) + ax.set_xlabel('Priority') + ax.set_ylabel('Percent of Requested Time Scheduled') + ax.set_ylim(0, 100) + ax.legend() + if not displayonly: + plotutils.export_to_image(f'1m0_pct_time_scheduled_scaling_{timestamp}', fig) + plt.show() + + +def plot_pct_count_scaling_prio_bins(): + fig = plt.figure(figsize=(20, 10)) + fig.suptitle('1m0 Percent of Requests Scheduled by Priority Class for Different Scaling Options', fontsize=20) + fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) + ax = fig.add_subplot() + + bardata = [] + for label in EFF_PRI_SCALING_TEST_LABELS: + priority_data = plotutils.get_data_from_opensearch(f'1m0-optimize-{label}')['percent_sched_by_priority'] + bardata.append(list(priority_data[0].values())) + + priorities = ['low', 'medium', 'high'] + plotutils.plot_barplot(ax, bardata, EFF_PRI_SCALING_TEST_LABELS, priorities) + ax.set_xlabel('Priority') + ax.set_ylabel('Percent of Requests Scheduled') + ax.set_ylim(0, 100) + ax.legend() + if not displayonly: + plotutils.export_to_image(f'1m0_pct_count_scheduled_scaling_{timestamp}', fig) + plt.show() + + +def plot_pct_total_sched_prio_bins(): + fig = plt.figure(figsize=(20, 10)) + fig.suptitle('1m0 Percent of Requests Scheduled out of All Requests by Priority Class', fontsize=20) + fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) + ax = fig.add_subplot() + + bardata = [] + for label in EFF_PRI_SCALING_TEST_LABELS: + scheduled_count = plotutils.get_data_from_opensearch(f'1m0-optimize-{label}')['scheduled_req_by_priority'][0] + total_count = plotutils.get_data_from_opensearch(f'1m0-optimize-{label}')['total_request_count'] + scheduled_count = {bin_name: 100*np.array(values)/total_count for bin_name, values in scheduled_count.items()} + bardata.append(scheduled_count.values()) + + priorities = ['low', 'medium', 'high'] + plotutils.plot_barplot(ax, bardata, EFF_PRI_SCALING_TEST_LABELS, priorities) + ax.set_xlabel('Priority') + ax.set_ylabel('Percent of Requests Scheduled out of All Requests') + ax.set_ylim(0, 100) + ax.legend() + if not displayonly: + plotutils.export_to_image(f'1m0_pct_count_total_scaling_{timestamp}', fig) + plt.show() + + +def plot_pct_total_prio_bins(): + fig = plt.figure(figsize=(20, 10)) + fig.suptitle('1m0 Percent of Requests by Priority Class (both scheduled and unscheduled)', fontsize=20) + fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) + ax = fig.add_subplot() + + bardata = [] + scheduled_count = plotutils.get_data_from_opensearch('1m0-optimize-airmass')['total_req_by_priority'][0] + total_count = plotutils.get_data_from_opensearch('1m0-optimize-airmass')['total_request_count'] + scheduled_count = {bin_name: 100*np.array(values)/total_count for bin_name, values in scheduled_count.items()} + bardata.append(scheduled_count.values()) + + priorities = ['low', 'medium', 'high'] + plotutils.plot_barplot(ax, bardata, EFF_PRI_SCALING_TEST_LABELS, priorities) + ax.set_xlabel('Priority') + ax.set_ylabel('Percent of Requests out of All Requests') + ax.set_ylim(0, 100) + ax.legend() + if not displayonly: + plotutils.export_to_image(f'1m0_pct_count_total_scaling_{timestamp}', fig) + plt.show() + + +def plot_duration_histogram(): + fig = plt.figure(figsize=(20, 10)) + fig.suptitle('1m0 Distribution of Scheduled Request Durations with/without Duration Scaling', fontsize=20) + fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) + ax = fig.add_subplot() + with_duration_data = opensearch_client.get('scheduler-simulations', + '1m0-optimize-airmass-with-duration_2022-07-21T21:48:02.586407') + no_duration_data = opensearch_client.get('scheduler-simulations', + '1m0-optimize-airmass-no-duration_2022-07-21T21:52:46.316207') + duration_data = [with_duration_data['_source']['raw_scheduled_durations']] + duration_data.append(no_duration_data['_source']['raw_scheduled_durations']) + labels = ['eff. prio scaled by duration', 'eff. prio not scaled by duration'] + ax.hist(duration_data, bins=50, label=labels) + ax.set_xlabel('Duration [s]') + ax.set_ylabel('Counts') + ax.set_title('Optimize by Airmass') + ax.legend() + if not displayonly: + plotutils.export_to_image(f'1m0_duration_hist_{timestamp}', fig) + plt.show() + + +def plot_eff_prio_duration_scatter(): + fig = plt.figure(figsize=(20, 10)) + fig.suptitle('1m0 Scatterplot of Effective Priority and Duration', fontsize=20) + fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) + ax = fig.add_subplot() + tagnames = ['with-duration-v2', 'with-duration-scaled-100-v2'] + labels = ['priority 10-30', 'priority 10-100'] + for i, tag in enumerate(tagnames): + data = plotutils.get_data_from_opensearch(f'1m0-optimize-airmass-{tag}') + prio_scheduled = np.array(data['raw_scheduled_priorities']) + prio_unscheduled = np.array(data['raw_unscheduled_priorities']) + dur_scheduled = np.array(data['raw_scheduled_durations']) + dur_unscheduled = np.array(data['raw_unscheduled_durations']) + ax.scatter(dur_scheduled, prio_scheduled*dur_scheduled, label=f'{labels[i]}, scheduled', marker='+') + ax.scatter(dur_unscheduled, prio_unscheduled*dur_unscheduled, label=f'{labels[i]}, unscheduled', marker='x') + ax.set_ylabel('Effective Priority (base priority x duration)') + ax.set_xlabel('Duration [s]') + ax.legend() + if not displayonly: + plotutils.export_to_image(f'1m0_eff_prio_duration_scatter_{timestamp}', fig) + plt.show() + + +if __name__ == '__main__': + plots = { + 'normed_airmass_hist': {'func': plot_normed_airmass_histogram, + 'desc': 'Airmass distribution, normalized so that 0 is worst airmass and 1 is best'}, + 'midpoint_airmass_hist': {'func': plot_midpoint_airmass_histogram, + 'desc': 'Midpoint airmass distributions for different airmass weighting coefficients'}, + 'pct_sched_airmass_bin_priority': {'func': plot_pct_count_airmass_prio_bins, + 'desc': 'Percent of requests scheduled binned by priority level' + 'for different airmass coefficients'}, + 'pct_time_airmass_bin_priority': {'func': plot_pct_time_airmass_prio_bins, + 'desc': 'Percent of time requested scheduled binned by priority level' + ' for different airmass coefficients'}, + 'pct_sched_scaling_bin_priority': {'func': plot_pct_count_scaling_prio_bins, + 'desc': 'Percent of requests scheduled binned by priority level' + ' for different scaling strategies'}, + 'pct_time_scaling_bin_priority': {'func': plot_pct_time_scaling_prio_bins, + 'desc': 'Percent of time requested scheduled binned by priority level' + ' for different scaling strategies'}, + 'pct_total_sched_scaling_bin_priority': {'func': plot_pct_total_sched_prio_bins, + 'desc': 'Percent of requests scheduled with respect to all requests, ' + 'binned by priority level for different scaling strategies'}, + 'pct_total_scaling_bin_priority': {'func': plot_pct_total_prio_bins, + 'desc': 'The percent of requests occupied at each priority level'}, + 'duration_hist': {'func': plot_duration_histogram, + 'desc': 'Scheduled request duration distribution.'}, + 'eff_prio_duration_scatter': {'func': plot_eff_prio_duration_scatter, + 'desc': 'Scatterplot with (prio x duration) on y-axis and duration on x-axis'}, + } + + description = 'Plotting functions for airmass optimization experiment.' + parser = argparse.ArgumentParser(description=description) + parser.add_argument('plot_name', type=str.lower, nargs='*', + help="The name of the plot(s) to display. `all` can be passed to show all.") + parser.add_argument('-l', '--list', help='Show plot info. `-l all` to show all available plots.', action='store_true') + parser.add_argument('-d', '--displayonly', help='Display the plots without exporting them.', action='store_true') + args = parser.parse_args() + global displayonly + displayonly = args.displayonly + + if args.list: + spacing = max([len(name) for name in plots.keys()]) + 4 + print(f'{"NAME":{spacing}}DESCRIPTION') + print(f'{"====":{spacing}}===========') + for name, details in plots.items(): + print(f'{name:{spacing}}{details["desc"]}') + else: + plots_to_show = list(plots.keys()) if args.plot_name == ['all'] else args.plot_name + for plot_name in plots_to_show: + plots[plot_name]['func']() diff --git a/adaptive_scheduler/simulation/plotutils.py b/adaptive_scheduler/simulation/plotutils.py index 1d44f0a9..ae03409b 100644 --- a/adaptive_scheduler/simulation/plotutils.py +++ b/adaptive_scheduler/simulation/plotutils.py @@ -10,17 +10,6 @@ OPENSEARCH_INDEX = os.getenv('OPENSEARCH_INDEX', 'scheduler-simulations') opensearch_client = OpenSearch(OPENSEARCH_URL) -default_colors = ['deeppink', - 'forestgreen', - 'limegreen', - 'mediumseagreen', - 'mediumturquoise', - 'royalblue', - 'slateblue', - 'darkorchid', - 'indigo', - 'navy'] - def export_to_image(fname, fig): """Takes a matplotlib Figure object and saves the figure. If the output @@ -42,7 +31,7 @@ def export_to_image(fname, fig): print(f'Plot exported to {fpath}') -def plot_barplot(ax, data, colors, labels, binnames, barwidth=0.04): +def plot_barplot(ax, data, labels, binnames, barwidth=0.04): """Generates a barplot for multiple datasets. Args: @@ -56,7 +45,7 @@ def plot_barplot(ax, data, colors, labels, binnames, barwidth=0.04): """ ticks = np.arange(len(data[0])) for i, datavalues in enumerate(data): - ax.bar(ticks+barwidth*i, datavalues, barwidth, color=colors[i], label=labels[i], alpha=0.8) + ax.bar(ticks+barwidth*i, datavalues, barwidth, label=labels[i], alpha=0.8) ax.set_xticks(ticks+barwidth*i/2, binnames) From a1bc9d32b1a3c1bc26ebfe1b3d28ff349dbf0621 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Thu, 21 Jul 2022 23:21:57 +0000 Subject: [PATCH 093/165] update to include raw duration and priority data --- adaptive_scheduler/simulation/metrics.py | 84 ++++++++++++------- adaptive_scheduler/simulation/orchestrator.py | 8 +- 2 files changed, 60 insertions(+), 32 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index 5059f93e..09229b84 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -33,6 +33,10 @@ def percent_diff(x, y): return abs(x - y) / mean * 100. +def scalefunc(p, newmax, newmin, oldmax, oldmin): + return (p-oldmin)*(newmax-newmin)/(oldmax-oldmin) + newmin + + def generate_bin_names(bin_size, bin_range): """Creates labels for the bins.""" start, end = bin_range @@ -124,6 +128,12 @@ def __init__(self, normal_scheduler_result, rr_scheduler_result, scheduler, sche for comp_res in self.normal_input_reservations: self.combined_input_reservations.extend(comp_res.reservation_list) + self.request_groups = self.scheduler_runner.normal_scheduler_input.request_groups + if self.scheduler_runner.rr_scheduler_input: + self.request_groups.extend(self.scheduler_runner.rr_scheduler_input.request_groups) + + self._get_scheduled_rg_ids() + self.airmass_data_by_request_id = defaultdict(dict) def _combine_resources_scheduled(self): @@ -167,12 +177,31 @@ def total_scheduled_eff_priority(self, schedule=None): effective_priorities.extend([res.priority for res in reservations]) return sum(effective_priorities), effective_priorities - def get_scheduled_durations(self, schedule=None): - schedule = self.combined_schedule if schedule is None else schedule - durations = [] - for reservations in schedule.values(): - durations.extend([res.duration for res in reservations]) - return durations + def _get_scheduled_rg_ids(self): + self.sched_rg_ids = [] + self.input_rg_ids = [res.request_group_id for res in self.combined_input_reservations] + for reservations in self.combined_schedule.values(): + self.sched_rg_ids.extend([res.request_group_id for res in reservations]) + return self.sched_rg_ids + + def get_duration_data(self): + """Returns scheduled and unscheduled durations.""" + durations_by_rg_id = {res.request_group_id: res.duration for res in self.combined_input_reservations} + sched_durations = [durations_by_rg_id[rg_id] for rg_id in self.sched_rg_ids] + unsched_durations = [durations_by_rg_id[rg_id] for rg_id in self.input_rg_ids if rg_id not in self.sched_rg_ids] + return sched_durations, unsched_durations + + def get_priority_data(self): + """Returns scheduled and unscheduled priority values. Accesses them in the same order as durations so + they can be cross-matched. Scaling changes the priorities to a different range of numbers.""" + priority_by_rg_id = {rg.id: rg.proposal.tac_priority for rg in self.request_groups} + sched_priorities = [priority_by_rg_id[rg_id] for rg_id in self.sched_rg_ids] + unsched_priorities = [priority_by_rg_id[rg_id] for rg_id in self.input_rg_ids if rg_id not in self.sched_rg_ids] + # uncomment to remap the priorities + # scale = (100, 10, 30, 10) + # sched_priorities = [scalefunc(p, *scale) for p in sched_priorities] + # unsched_priorities = [scalefunc(p, *scale) for p in unsched_priorities] + return sched_priorities, unsched_priorities def total_available_seconds(self, resources_scheduled=None, horizon_days=None): """Aggregates the total available time, calculated from dark intervals. @@ -202,7 +231,8 @@ def percent_time_utilization(self, schedule=None, resources_scheduled=None, hori schedule = self.combined_schedule if schedule is None else schedule resources_scheduled = self.combined_resources_scheduled if resources_scheduled is None else resources_scheduled horizon_days = self.horizon_days if horizon_days is None else horizon_days - return percent_of(sum(self.get_scheduled_durations(schedule)), + scheduled_durations, _ = self.get_duration_data() + return percent_of(sum(scheduled_durations), self.total_available_seconds(resources_scheduled, horizon_days)) def _get_airmass_data_for_request(self, request_id): @@ -238,11 +268,10 @@ def _get_minmax_airmass(self, airmass_data, midpoint_duration): max_airmass = 0 min_airmass = 1000 for site in airmass_data.values(): - times, airmasses = site.values() + _, airmasses = site.values() airmasses = np.array(airmasses) - times = np.array([datetime.strptime(time, DTFORMAT) for time in times]) - min_airmass = min(airmasses, min_airmass) - max_airmass = max(airmasses, max_airmass) + min_airmass = min(min(airmasses), min_airmass) + max_airmass = max(max(airmasses), max_airmass) return min_airmass, max_airmass def _get_midpoint_airmasses_by_site(self, airmass_data, midpoint_time): @@ -305,26 +334,19 @@ def airmass_metrics(self, schedule=None): } return airmass_metrics - def binned_tac_priority_metrics(self, input_reservations=None, schedule=None): - """Bins into 10-19, 20-29, and 30 by default, but may be modified with bin_size and bin_range.""" - input_reservations = self.combined_input_reservations if input_reservations is None else input_reservations - schedule = self.combined_schedule if schedule is None else schedule - bin_size = 45 - sched_durations = self.get_scheduled_durations(schedule) - all_durations = [res.duration for res in input_reservations] - request_groups = self.scheduler_runner.normal_scheduler_input.request_groups - if self.scheduler_runner.rr_scheduler_input: - request_groups.extend(self.scheduler_runner.rr_scheduler_input.request_groups) - priority_values_by_rg_id = {rg.id: rg.proposal.tac_priority for rg in request_groups} - all_priority_values = [priority_values_by_rg_id[res.request_group_id] for res in input_reservations] - sched_priority_values = [] - for reservations in schedule.values(): - sched_priority_values.extend([priority_values_by_rg_id[res.request_group_id] - for res in reservations]) - sched_histogram = bin_data(sched_priority_values, bin_size=bin_size) - bin_sched_durations = bin_data(sched_priority_values, sched_durations, bin_size) - full_histogram = bin_data(all_priority_values, bin_size=bin_size) - bin_all_durations = bin_data(all_priority_values, all_durations, bin_size) + def binned_tac_priority_metrics(self): + """Bins metrics based on TAC priority.""" + bin_size = 10 + + sched_durations, unsched_durations = self.get_duration_data() + all_durations = sched_durations + unsched_durations + + sched_priorities, unsched_priorities = self.get_priority_data() + all_priorities = sched_priorities + unsched_priorities + sched_histogram = bin_data(sched_priorities, bin_size=bin_size) + bin_sched_durations = bin_data(sched_priorities, sched_durations, bin_size) + full_histogram = bin_data(all_priorities, bin_size=bin_size) + bin_all_durations = bin_data(all_priorities, all_durations, bin_size) bin_percent_count = {bin_: percent_of(np.array(sched_histogram[bin_]), np.array(full_histogram[bin_])) for bin_ in sched_histogram} bin_percent_duration = {bin_: percent_of(np.array(bin_sched_durations[bin_]), np.array(bin_all_durations[bin_])) diff --git a/adaptive_scheduler/simulation/orchestrator.py b/adaptive_scheduler/simulation/orchestrator.py index d1cbbdc4..a277d33f 100644 --- a/adaptive_scheduler/simulation/orchestrator.py +++ b/adaptive_scheduler/simulation/orchestrator.py @@ -101,6 +101,8 @@ def record_metrics(normal_scheduler_result, rr_scheduler_result, scheduler, sche metrics = MetricCalculator(normal_scheduler_result, rr_scheduler_result, scheduler, scheduler_runner) sched_params = scheduler_runner.sched_params airmass_metrics = metrics.airmass_metrics() + sched_priorities, unsched_priorities = metrics.get_priority_data() + sched_durations, unsched_durations = metrics.get_duration_data() binned_tac_priority_metrics = metrics.binned_tac_priority_metrics() metrics = { @@ -117,7 +119,7 @@ def record_metrics(normal_scheduler_result, rr_scheduler_result, scheduler, sche 'total_scheduled_count': metrics.count_scheduled()[0], 'total_request_count': metrics.count_scheduled()[1], 'percent_requests_scheduled': metrics.percent_reservations_scheduled(), - 'total_scheduled_seconds': sum(metrics.get_scheduled_durations()), + 'total_scheduled_seconds': sum(sched_durations), 'total_available_seconds': metrics.total_available_seconds(), 'percent_time_utilization': metrics.percent_time_utilization(), 'airmass_metrics': airmass_metrics, @@ -127,6 +129,10 @@ def record_metrics(normal_scheduler_result, rr_scheduler_result, scheduler, sche 'total_seconds_by_priority': [binned_tac_priority_metrics['all_durations']], 'percent_sched_by_priority': [binned_tac_priority_metrics['percent_count']], 'percent_duration_by_priority': [binned_tac_priority_metrics['percent_duration']], + 'raw_scheduled_durations': sched_durations, + 'raw_unscheduled_durations': unsched_durations, + 'raw_scheduled_priorities': sched_priorities, + 'raw_unscheduled_priorities': unsched_priorities, } send_to_opensearch(sched_params.opensearch_url, sched_params.simulation_opensearch_index, metrics) From dbbe6184e8ab7c798336544c1c11b5eeb314aa26 Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Thu, 21 Jul 2022 17:04:20 -0700 Subject: [PATCH 094/165] priority duration scatter plot for with/withou duration --- .../simulation/plot_priority_duration.py | 51 +++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 adaptive_scheduler/simulation/plot_priority_duration.py diff --git a/adaptive_scheduler/simulation/plot_priority_duration.py b/adaptive_scheduler/simulation/plot_priority_duration.py new file mode 100644 index 00000000..a7a1026a --- /dev/null +++ b/adaptive_scheduler/simulation/plot_priority_duration.py @@ -0,0 +1,51 @@ +from xml.dom.pulldom import default_bufsize +import numpy as np +import matplotlib.pyplot as plt +from opensearchpy import OpenSearch +from plotutils import get_data_from_opensearch +VARIABLE = ['with-duration','no-duration',] + # 'with-duration-scaled-100','no-duration','no-duration-scaled-100',] + +markers = ["o" , "," ,"v" , "^" , "<", ">"] +colors = ['r','g','b','c','m', 'y', 'k'] +def rand_jitter(arr): + stdev = .01 * (max(arr) - min(arr)) + return arr + np.random.randn(len(arr)) * stdev + +def plot_sched_priority_duration_dotplot(): + fig = plt.figure(figsize=(20, 10)) + fig.suptitle(f'1m0 Scheduled requests Distribution of Priority and Duration With Airmass Optimization', fontsize=20) + fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) + ax = fig.add_subplot() + for i, id in enumerate(VARIABLE): + data = get_data_from_opensearch(f'1m0-optimize-airmass-{id}') + ax.scatter(rand_jitter(data['raw_scheduled_priorities']), rand_jitter(data['raw_scheduled_durations']), + marker = markers[i],c = colors[i], s = 10, label = f'scheduled requests {id}',alpha = 0.5) + # ax.scatter(rand_jitter(data['raw_unscheduled_priorities']), rand_jitter(data['raw_unscheduled_durations']), c = default_colors[5*i+2], marker='^',s=15, label = f'unscheduled requests {id}', alpha = 0.7) + ax.set_xlabel('Priority') + ax.set_ylabel('Request Duration') + ax.legend() + plt.show(block=False) + plt.show() + +def plot_unsched_priority_duration_dotplot(): + fig = plt.figure(figsize=(20, 10)) + fig.suptitle(f'1m0 Unscheduled requests Distribution of Priority and Duration With Airmass Optimization', fontsize=20) + fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) + ax = fig.add_subplot() + for i, id in enumerate(VARIABLE): + data = get_data_from_opensearch(f'1m0-optimize-airmass-{id}') + # ax.scatter(rand_jitter(data['raw_scheduled_priorities']), rand_jitter(data['raw_scheduled_durations']), + # marker = markers[i],c = colors[i], s = 10, label = f'scheduled requests {id}',alpha = 0.5) + ax.scatter(rand_jitter(data['raw_unscheduled_priorities']), rand_jitter(data['raw_unscheduled_durations']), + c =colors[i], marker=markers[i],s=10, label = f'unscheduled requests {id}', alpha = 0.7) + ax.set_xlabel('Priority') + ax.set_ylabel('Request Duration') + ax.legend() + plt.show(block=False) + plt.show() + +if __name__ == '__main__': + + plot_sched_priority_duration_dotplot() + plot_unsched_priority_duration_dotplot() \ No newline at end of file From 00e97745e9a432bcca544d2d54773240a974fbbd Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Fri, 22 Jul 2022 23:33:53 +0000 Subject: [PATCH 095/165] fixed unscheduled duration data --- adaptive_scheduler/simulation/metrics.py | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index 09229b84..e476f201 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -132,8 +132,6 @@ def __init__(self, normal_scheduler_result, rr_scheduler_result, scheduler, sche if self.scheduler_runner.rr_scheduler_input: self.request_groups.extend(self.scheduler_runner.rr_scheduler_input.request_groups) - self._get_scheduled_rg_ids() - self.airmass_data_by_request_id = defaultdict(dict) def _combine_resources_scheduled(self): @@ -177,26 +175,17 @@ def total_scheduled_eff_priority(self, schedule=None): effective_priorities.extend([res.priority for res in reservations]) return sum(effective_priorities), effective_priorities - def _get_scheduled_rg_ids(self): - self.sched_rg_ids = [] - self.input_rg_ids = [res.request_group_id for res in self.combined_input_reservations] - for reservations in self.combined_schedule.values(): - self.sched_rg_ids.extend([res.request_group_id for res in reservations]) - return self.sched_rg_ids - def get_duration_data(self): """Returns scheduled and unscheduled durations.""" - durations_by_rg_id = {res.request_group_id: res.duration for res in self.combined_input_reservations} - sched_durations = [durations_by_rg_id[rg_id] for rg_id in self.sched_rg_ids] - unsched_durations = [durations_by_rg_id[rg_id] for rg_id in self.input_rg_ids if rg_id not in self.sched_rg_ids] + sched_durations = [res.duration for res in self.combined_input_reservations if res.scheduled] + unsched_durations = [res.duration for res in self.combined_input_reservations if not res.scheduled] return sched_durations, unsched_durations def get_priority_data(self): """Returns scheduled and unscheduled priority values. Accesses them in the same order as durations so they can be cross-matched. Scaling changes the priorities to a different range of numbers.""" - priority_by_rg_id = {rg.id: rg.proposal.tac_priority for rg in self.request_groups} - sched_priorities = [priority_by_rg_id[rg_id] for rg_id in self.sched_rg_ids] - unsched_priorities = [priority_by_rg_id[rg_id] for rg_id in self.input_rg_ids if rg_id not in self.sched_rg_ids] + sched_priorities = [res.priority for res in self.combined_input_reservations if res.scheduled] + unsched_priorities = [res.priority for res in self.combined_input_reservations if not res.scheduled] # uncomment to remap the priorities # scale = (100, 10, 30, 10) # sched_priorities = [scalefunc(p, *scale) for p in sched_priorities] From 22d1dc24f14889ccb076862aa4682f3560409e47 Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Mon, 25 Jul 2022 09:55:19 -0700 Subject: [PATCH 096/165] plot the priority duration scatterplot --- .../simulation/plot_priority_duration.py | 55 +++++++++---------- 1 file changed, 25 insertions(+), 30 deletions(-) diff --git a/adaptive_scheduler/simulation/plot_priority_duration.py b/adaptive_scheduler/simulation/plot_priority_duration.py index a7a1026a..d7353ed7 100644 --- a/adaptive_scheduler/simulation/plot_priority_duration.py +++ b/adaptive_scheduler/simulation/plot_priority_duration.py @@ -3,49 +3,44 @@ import matplotlib.pyplot as plt from opensearchpy import OpenSearch from plotutils import get_data_from_opensearch -VARIABLE = ['with-duration','no-duration',] +VARIABLE = ['no-duration-v2','no-duration-scaled-100-v2'] # 'with-duration-scaled-100','no-duration','no-duration-scaled-100',] markers = ["o" , "," ,"v" , "^" , "<", ">"] -colors = ['r','g','b','c','m', 'y', 'k'] +colors = ['r','b','c','m', 'y', 'k'] def rand_jitter(arr): stdev = .01 * (max(arr) - min(arr)) return arr + np.random.randn(len(arr)) * stdev def plot_sched_priority_duration_dotplot(): - fig = plt.figure(figsize=(20, 10)) - fig.suptitle(f'1m0 Scheduled requests Distribution of Priority and Duration With Airmass Optimization', fontsize=20) + fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(28, 12)) + fig.suptitle(f'1m0 Distribution of Priority and Duration With Airmass Optimization', fontsize=20) fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) - ax = fig.add_subplot() for i, id in enumerate(VARIABLE): data = get_data_from_opensearch(f'1m0-optimize-airmass-{id}') - ax.scatter(rand_jitter(data['raw_scheduled_priorities']), rand_jitter(data['raw_scheduled_durations']), - marker = markers[i],c = colors[i], s = 10, label = f'scheduled requests {id}',alpha = 0.5) - # ax.scatter(rand_jitter(data['raw_unscheduled_priorities']), rand_jitter(data['raw_unscheduled_durations']), c = default_colors[5*i+2], marker='^',s=15, label = f'unscheduled requests {id}', alpha = 0.7) - ax.set_xlabel('Priority') - ax.set_ylabel('Request Duration') - ax.legend() - plt.show(block=False) - plt.show() - -def plot_unsched_priority_duration_dotplot(): - fig = plt.figure(figsize=(20, 10)) - fig.suptitle(f'1m0 Unscheduled requests Distribution of Priority and Duration With Airmass Optimization', fontsize=20) - fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) - ax = fig.add_subplot() + if id in ['no-duration-scaled-100-v2', 'with-duration-scaled-100-v2']: + data['raw_scheduled_priorities'] = [(p+35)/4.5 for p in data['raw_scheduled_priorities']] + print(id, len(data['raw_scheduled_priorities']), len(data['raw_unscheduled_priorities'])) + ax1.scatter(rand_jitter(data['raw_scheduled_priorities']), rand_jitter(data['raw_scheduled_durations']), + marker = markers[i],c = colors[i], s = 10, label = f'scheduled requests {id}',alpha = 0.3) + ax1.set_ylim(top=11000) + ax1.set_xlabel('Priority') + ax1.set_ylabel('Request Duration') + ax1.legend() for i, id in enumerate(VARIABLE): data = get_data_from_opensearch(f'1m0-optimize-airmass-{id}') - # ax.scatter(rand_jitter(data['raw_scheduled_priorities']), rand_jitter(data['raw_scheduled_durations']), - # marker = markers[i],c = colors[i], s = 10, label = f'scheduled requests {id}',alpha = 0.5) - ax.scatter(rand_jitter(data['raw_unscheduled_priorities']), rand_jitter(data['raw_unscheduled_durations']), - c =colors[i], marker=markers[i],s=10, label = f'unscheduled requests {id}', alpha = 0.7) - ax.set_xlabel('Priority') - ax.set_ylabel('Request Duration') - ax.legend() - plt.show(block=False) + if id in ['no-duration-scaled-100-v2', 'with-duration-scaled-100-v2']: + data['raw_unscheduled_priorities'] = [(p+35)/4.5 for p in data['raw_unscheduled_priorities']] + ax2.scatter(rand_jitter(data['raw_unscheduled_priorities']), rand_jitter(data['raw_unscheduled_durations']), + c =colors[i], marker=markers[i],s=10, label = f'unscheduled requests {id}', alpha = 0.3) + ax2.set_ylim(top=11000) + ax2.set_xlabel('Priority') + ax2.set_ylabel('Request Duration') + ax2.legend() + plt.show(block = False) plt.show() if __name__ == '__main__': - - plot_sched_priority_duration_dotplot() - plot_unsched_priority_duration_dotplot() \ No newline at end of file + # plot_sched_priority_duration_dotplot() + data = get_data_from_opensearch(f'1m0-optimize-airmss-no-duration_scaled-100') + print(len(data['unscheduled_priority'][0]), len(data['scheduled_priority'][0])) \ No newline at end of file From 94582881fa4de4d6865a8c7b541c3ce78a51db74 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Mon, 25 Jul 2022 18:53:38 +0000 Subject: [PATCH 097/165] fixed priorities --- adaptive_scheduler/simulation/metrics.py | 63 +++++++++++------------- tests/test_simulator_metrics.py | 26 ++++------ 2 files changed, 40 insertions(+), 49 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index e476f201..3636f2f3 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -34,6 +34,7 @@ def percent_diff(x, y): def scalefunc(p, newmax, newmin, oldmax, oldmin): + """Remaps a range of values to another range of values.""" return (p-oldmin)*(newmax-newmin)/(oldmax-oldmin) + newmin @@ -154,45 +155,47 @@ def _combine_normal_rr_input_reservations(self): reservations = [res for res in comp_res.reservation_list if res not in self.combined_input_reservations] self.combined_input_reservations.extend(reservations) - def count_scheduled(self, input_reservations=None, schedule=None): - input_reservations = self.combined_input_reservations if input_reservations is None else input_reservations - schedule = self.combined_schedule if schedule is None else schedule + def count_scheduled(self): scheduled_reservations = [] - for reservations in schedule.values(): + for reservations in self.combined_schedule.values(): scheduled_reservations.extend(reservations) - return len(scheduled_reservations), len(input_reservations) + return len(scheduled_reservations), len(self.combined_input_reservations) - def percent_reservations_scheduled(self, input_reservations=None, schedule=None): - input_reservations = self.combined_input_reservations if input_reservations is None else input_reservations - schedule = self.combined_schedule if schedule is None else schedule - scheduled, total = self.count_scheduled(input_reservations, schedule) + def percent_reservations_scheduled(self): + scheduled, total = self.count_scheduled() return percent_of(scheduled, total) - def total_scheduled_eff_priority(self, schedule=None): - schedule = self.combined_schedule if schedule is None else schedule + def total_scheduled_eff_priority(self): effective_priorities = [] - for reservations in schedule.values(): + for reservations in self.combined_schedule.values(): effective_priorities.extend([res.priority for res in reservations]) return sum(effective_priorities), effective_priorities def get_duration_data(self): """Returns scheduled and unscheduled durations.""" - sched_durations = [res.duration for res in self.combined_input_reservations if res.scheduled] - unsched_durations = [res.duration for res in self.combined_input_reservations if not res.scheduled] + sched_durations = [] + unsched_durations = [] + for res in self.combined_input_reservations: + sched_durations.append(res.duration) if res.scheduled else unsched_durations.append(res.duration) return sched_durations, unsched_durations def get_priority_data(self): """Returns scheduled and unscheduled priority values. Accesses them in the same order as durations so they can be cross-matched. Scaling changes the priorities to a different range of numbers.""" - sched_priorities = [res.priority for res in self.combined_input_reservations if res.scheduled] - unsched_priorities = [res.priority for res in self.combined_input_reservations if not res.scheduled] + sched_rg_ids = [] + unsched_rg_ids = [] + for res in self.combined_input_reservations: + sched_rg_ids.append(res.request_group_id) if res.scheduled else unsched_rg_ids.append(res.request_group_id) + priorities_by_rg_id = {rg.id: rg.proposal.tac_priority for rg in self.request_groups} + sched_priorities = [priorities_by_rg_id[rg_id] for rg_id in sched_rg_ids] + unsched_priorities = [priorities_by_rg_id[rg_id] for rg_id in unsched_rg_ids] # uncomment to remap the priorities - # scale = (100, 10, 30, 10) - # sched_priorities = [scalefunc(p, *scale) for p in sched_priorities] - # unsched_priorities = [scalefunc(p, *scale) for p in unsched_priorities] + scale = (100, 10, 30, 10) + sched_priorities = [scalefunc(p, *scale) for p in sched_priorities] + unsched_priorities = [scalefunc(p, *scale) for p in unsched_priorities] return sched_priorities, unsched_priorities - def total_available_seconds(self, resources_scheduled=None, horizon_days=None): + def total_available_seconds(self): """Aggregates the total available time, calculated from dark intervals. Args: @@ -204,25 +207,19 @@ def total_available_seconds(self, resources_scheduled=None, horizon_days=None): Returns: total_available_time (float): The dark intervals capped by the horizon. """ - resources_scheduled = self.combined_resources_scheduled if resources_scheduled is None else resources_scheduled - horizon_days = self.horizon_days if horizon_days is None else horizon_days total_available_time = 0 start_time = self.scheduler.estimated_scheduler_end - end_time = start_time + timedelta(days=horizon_days) - for resource in resources_scheduled: + end_time = start_time + timedelta(days=self.horizon_days) + for resource in self.combined_resources_scheduled: if resource in self.scheduler.visibility_cache: dark_intervals = self.scheduler.visibility_cache[resource].dark_intervals available_time = time_in_capped_intervals(dark_intervals, start_time, end_time) total_available_time += available_time return total_available_time - def percent_time_utilization(self, schedule=None, resources_scheduled=None, horizon_days=None): - schedule = self.combined_schedule if schedule is None else schedule - resources_scheduled = self.combined_resources_scheduled if resources_scheduled is None else resources_scheduled - horizon_days = self.horizon_days if horizon_days is None else horizon_days + def percent_time_utilization(self): scheduled_durations, _ = self.get_duration_data() - return percent_of(sum(scheduled_durations), - self.total_available_seconds(resources_scheduled, horizon_days)) + return percent_of(sum(scheduled_durations), self.total_available_seconds()) def _get_airmass_data_for_request(self, request_id): """Pulls airmass data from the Observation Portal, cache it in our local directory. @@ -325,7 +322,7 @@ def airmass_metrics(self, schedule=None): def binned_tac_priority_metrics(self): """Bins metrics based on TAC priority.""" - bin_size = 10 + bin_size = 45 sched_durations, unsched_durations = self.get_duration_data() all_durations = sched_durations + unsched_durations @@ -336,9 +333,9 @@ def binned_tac_priority_metrics(self): bin_sched_durations = bin_data(sched_priorities, sched_durations, bin_size) full_histogram = bin_data(all_priorities, bin_size=bin_size) bin_all_durations = bin_data(all_priorities, all_durations, bin_size) - bin_percent_count = {bin_: percent_of(np.array(sched_histogram[bin_]), np.array(full_histogram[bin_])) + bin_percent_count = {bin_: percent_of(sched_histogram[bin_], full_histogram[bin_]) for bin_ in sched_histogram} - bin_percent_duration = {bin_: percent_of(np.array(bin_sched_durations[bin_]), np.array(bin_all_durations[bin_])) + bin_percent_duration = {bin_: percent_of(bin_sched_durations[bin_], bin_all_durations[bin_]) for bin_ in bin_sched_durations} output_dict = { diff --git a/tests/test_simulator_metrics.py b/tests/test_simulator_metrics.py index 6c23e93c..2d9cb2c5 100644 --- a/tests/test_simulator_metrics.py +++ b/tests/test_simulator_metrics.py @@ -4,7 +4,6 @@ import os import json from datetime import datetime, timedelta -import numpy as np from mock import Mock @@ -67,32 +66,27 @@ def test_combining_schedules(self): assert same_schedule.combined_schedule == fake_schedule1 def test_percent_scheduled(self): + assert self.metrics.percent_reservations_scheduled() == 60. + scheduled_reservation = Mock(scheduled=True) unscheduled_reservation = Mock(scheduled=False) - mock_schedule = {'bpl': [scheduled_reservation], 'coj': [scheduled_reservation, scheduled_reservation]} mock_scheduler_input = [unscheduled_reservation, scheduled_reservation, scheduled_reservation, scheduled_reservation] + metrics2 = MetricCalculator(self.mock_scheduler_result, + None, + self.mock_scheduler, + self.mock_scheduler_runner) + metrics2.combined_schedule = mock_schedule + metrics2.combined_input_reservations = mock_scheduler_input - assert self.metrics.percent_reservations_scheduled(mock_scheduler_input, mock_schedule) == 75. - assert self.metrics.percent_reservations_scheduled() == 60. + assert metrics2.percent_reservations_scheduled() == 75. def test_total_time_aggregators(self): seconds_in_day = 86400 - assert sum(self.metrics.get_scheduled_durations(self.mock_scheduler_result.schedule)) == 60 - assert sum(self.metrics.get_scheduled_durations()) == 60 - assert self.metrics.total_available_seconds(['bpl', 'coj'], 0) == 0 - assert self.metrics.total_available_seconds(['bpl', 'coj'], 1) == 2*seconds_in_day - assert self.metrics.total_available_seconds(['bpl', 'coj'], 5) == 4*seconds_in_day - assert self.metrics.total_available_seconds(['bpl'], 1) == seconds_in_day - assert self.metrics.total_available_seconds([], 1) == 0 + assert sum(self.metrics.get_duration_data()[0]) == 60 assert self.metrics.total_available_seconds() == 4*seconds_in_day - def test_percent_time_utilization(self): - test_schedule = {'bpl': [Mock(duration=86400)]} - assert self.metrics.percent_time_utilization(test_schedule, ['bpl'], 1) == 100. - assert self.metrics.percent_time_utilization() == 60/(86400*4)*100 - def test_bin_data(self): bin_by = [1, 3, 4, 2, 6, 5, 3, 2, 3, 4, 7, 9, 3, 8, 6, 4] bin_data_ = [1, 2, 3, 4, 5, 6, 7, 8, 9, 8, 7, 6, 5, 4, 3, 2] From 96b128341ce2f900afeee07f473666d763cfb179 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Mon, 25 Jul 2022 21:11:17 +0000 Subject: [PATCH 098/165] added functionality to store raw data, improved documentation --- adaptive_scheduler/simulation/metrics.py | 25 +++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index 3636f2f3..76b9f7ab 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -72,10 +72,26 @@ def bin_data(bin_by, data=[], bin_size=1, bin_range=None, aggregator=sum): bin_size (int): The width of the bins. bin_range (int, int): Override the bin ranges. Otherwise, use the min/max of the data. aggregator (func): The aggregation function to apply over the list of data. Must be callable on an array. - Additional items can be passed to the aggregation function. + Additional items can be passed to the aggregation function. Pass None to aggregator to store the raw + list into the bin. Returns: - data_dict (str: int): The frequency count of the data. + data_dict (str: int): The binned data. + + Examples: + Simple frequency count: + >>> bin_data([1, 2, 3, 2]) + {'1': 1, '2': 2, '3': 1} + + Bin a list by values in an associated list, e.g. highest test score by age group: + >>> ages = [12, 13, 11, 14, 15, 12, 13, 10, 10, 13] + >>> scores = [76, 84, 92, 56, 91, 87, 72, 95, 89, 77] + >>> bin_data(ages, scores, bin_size=2, bin_range=(10, 15), aggregator=max) + {'10-11': 95, '12-13': 87, '14-15': 91} + + Get the raw list of items in each bin: + >>> bin_data([4, 4, 5, 6, 9], [4, 4, 5, 6, 9], aggregator=None) + {'4': [4, 4], '5': [5], '6': [6], '9': [9]} """ bin_range = (min(bin_by), max(bin_by)) if bin_range is None else bin_range bin_dict = {bin_name: [] for bin_name in generate_bin_names(bin_size, bin_range)} @@ -89,7 +105,10 @@ def bin_data(bin_by, data=[], bin_size=1, bin_range=None, aggregator=sum): bin_dict[keyname].append(data[i]) else: bin_dict[keyname].append(1) - bin_dict = {key: aggregator(val) for key, val in bin_dict.items() if val} + if aggregator: + bin_dict = {key: aggregator(vals) for key, vals in bin_dict.items() if vals} + else: + bin_dict = {key: vals for key, vals in bin_dict.items() if vals} return bin_dict From f53b548ff729ed64464244c6c0f5d91f41b61c1e Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Mon, 25 Jul 2022 15:13:22 -0700 Subject: [PATCH 099/165] heat map for priority duration --- .../plot_heat_map_priority_duration.py | 64 +++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 adaptive_scheduler/simulation/plot_heat_map_priority_duration.py diff --git a/adaptive_scheduler/simulation/plot_heat_map_priority_duration.py b/adaptive_scheduler/simulation/plot_heat_map_priority_duration.py new file mode 100644 index 00000000..5cb87055 --- /dev/null +++ b/adaptive_scheduler/simulation/plot_heat_map_priority_duration.py @@ -0,0 +1,64 @@ +from xml.dom.pulldom import default_bufsize +import numpy as np +import matplotlib.pyplot as plt +from opensearchpy import OpenSearch +from plotutils import get_data_from_opensearch +from adaptive_scheduler.simulation.metrics import bin_data +VARIABLE = ['no-duration-v2','no-duration-scaled-100-v2'] + # 'with-duration-scaled-100','no-duration','no-duration-scaled-100',] + +markers = ["o" , "," ,"v" , "^" , "<", ">"] +colors = ['r','b','c','m', 'y', 'k'] +def rand_jitter(arr): + stdev = .01 * (max(arr) - min(arr)) + return arr + np.random.randn(len(arr)) * stdev + +def plot_sched_priority_duration_dotplot(): + fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(28, 12)) + fig.suptitle(f'1m0 Distribution of Priority and Duration With Airmass Optimization', fontsize=20) + fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) + for i, id in enumerate(VARIABLE): + data = get_data_from_opensearch(f'1m0-optimize-airmass-{id}') + if id in ['no-duration-scaled-100-v2', 'with-duration-scaled-100-v2']: + data['raw_scheduled_priorities'] = [(p+35)/4.5 for p in data['raw_scheduled_priorities']] + # print(id, len(data['raw_scheduled_priorities']), len(data['raw_unscheduled_priorities'])) + # ax1.scatter(rand_jitter(data['raw_scheduled_priorities']), rand_jitter(data['raw_scheduled_durations']), + # marker = markers[i],c = colors[i], s = 10, label = f'scheduled requests {id}',alpha = 0.3) + + + ax1.set_ylim(top=11000) + ax1.set_xlabel('Priority') + ax1.set_ylabel('Request Duration') + ax1.legend() + for i, id in enumerate(VARIABLE): + data = get_data_from_opensearch(f'1m0-optimize-airmass-{id}') + if id in ['no-duration-scaled-100-v2', 'with-duration-scaled-100-v2']: + data['raw_unscheduled_priorities'] = [(p+35)/4.5 for p in data['raw_unscheduled_priorities']] + # ax2.scatter(rand_jitter(data['raw_unscheduled_priorities']), rand_jitter(data['raw_unscheduled_durations']), + # c =colors[i], marker=markers[i],s=10, label = f'unscheduled requests {id}', alpha = 0.3) + ax2.set_ylim(top=11000) + ax2.set_xlabel('Priority') + ax2.set_ylabel('Request Duration') + ax2.legend() + plt.show(block = False) + plt.show() + + +def plot_heat_map_priority_duration(): + fig = plt.subplot() + fig.suptitle() + fig.subplots_adjust() + for i, id in enumerate(VARIABLE): + data = get_data_from_opensearch(f'1m0-optimize-airmass-{id}') + if id in ['no-duration-scaled-100-v2', 'with-duration-scaled-100-v2']: + data['raw_unscheduled_priorities'] = [(p+35)/4.5 for p in data['raw_unscheduled_priorities']] + sched_priorities = data['raw_scheduled_priorities'] + sched_durations = data['raw_scheduled_durations'] + unsched_priorities = data['raw_unscheduled_priorities'] + unsched_durations = data['raw_unscheduled_durations'] + level_1_bins = bin_data(sched_priorities, sched_durations, bin_size=5, aggregator=None) + print(level_1_bins) + + +if __name__ == '__main__': + plot_heat_map_priority_duration() \ No newline at end of file From 67705f5f18d2ed2e03441b00b434c35c59c7f1d6 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Mon, 25 Jul 2022 23:06:26 +0000 Subject: [PATCH 100/165] fill feature added to binning function --- adaptive_scheduler/simulation/metrics.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index 76b9f7ab..b8b8f44e 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -58,7 +58,7 @@ def generate_bin_names(bin_size, bin_range): return bin_names -def bin_data(bin_by, data=[], bin_size=1, bin_range=None, aggregator=sum): +def bin_data(bin_by, data=[], bin_size=1, bin_range=None, fill=None, aggregator=sum): """Bins data to create a histogram. Each bin is half-open, i.e. defined on the interval [a, b) for every bin except for the last bin, which is defined on the interval [a, b]. The naming convention is different for integers and floats. For example, for the label '1-2', this means the discrete values 1 and 2, whereas @@ -71,8 +71,9 @@ def bin_data(bin_by, data=[], bin_size=1, bin_range=None, aggregator=sum): extra values are thrown out. The aggregation function is applied to the data at the end. bin_size (int): The width of the bins. bin_range (int, int): Override the bin ranges. Otherwise, use the min/max of the data. + fill: The data value to fill with if the bin is empty. If None, then remove empty bins. aggregator (func): The aggregation function to apply over the list of data. Must be callable on an array. - Additional items can be passed to the aggregation function. Pass None to aggregator to store the raw + The aggregator will be applied to fill values. Pass None to aggregator to store the raw list into the bin. Returns: @@ -105,10 +106,12 @@ def bin_data(bin_by, data=[], bin_size=1, bin_range=None, aggregator=sum): bin_dict[keyname].append(data[i]) else: bin_dict[keyname].append(1) - if aggregator: - bin_dict = {key: aggregator(vals) for key, vals in bin_dict.items() if vals} + if fill is not None: + bin_dict = {key: vals if vals else [fill] for key, vals in bin_dict.items()} else: bin_dict = {key: vals for key, vals in bin_dict.items() if vals} + bin_dict = {key: aggregator(vals) if aggregator else vals for key, vals in bin_dict.items()} + return bin_dict From 7102b9fb02ee894699724aae27c7af59e95fb8f6 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Mon, 25 Jul 2022 23:07:49 +0000 Subject: [PATCH 101/165] minor change to fill functionality --- adaptive_scheduler/simulation/metrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index b8b8f44e..6638cd99 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -107,7 +107,7 @@ def bin_data(bin_by, data=[], bin_size=1, bin_range=None, fill=None, aggregator= else: bin_dict[keyname].append(1) if fill is not None: - bin_dict = {key: vals if vals else [fill] for key, vals in bin_dict.items()} + bin_dict = {key: vals if vals else list(fill) for key, vals in bin_dict.items()} else: bin_dict = {key: vals for key, vals in bin_dict.items() if vals} bin_dict = {key: aggregator(vals) if aggregator else vals for key, vals in bin_dict.items()} From 5eef11236fc0e2b0292389e7aa91c29dc0bcc8ee Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Mon, 25 Jul 2022 23:10:42 +0000 Subject: [PATCH 102/165] oops, broke something, changing it back --- adaptive_scheduler/simulation/metrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index 6638cd99..b8b8f44e 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -107,7 +107,7 @@ def bin_data(bin_by, data=[], bin_size=1, bin_range=None, fill=None, aggregator= else: bin_dict[keyname].append(1) if fill is not None: - bin_dict = {key: vals if vals else list(fill) for key, vals in bin_dict.items()} + bin_dict = {key: vals if vals else [fill] for key, vals in bin_dict.items()} else: bin_dict = {key: vals for key, vals in bin_dict.items() if vals} bin_dict = {key: aggregator(vals) if aggregator else vals for key, vals in bin_dict.items()} From 3aa78bcf772ba2ba817e6feab793a0db6ded2e93 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Tue, 26 Jul 2022 17:44:19 +0000 Subject: [PATCH 103/165] updated documentation and slightly modified binning function behavior --- adaptive_scheduler/simulation/metrics.py | 50 ++++++++++++++---------- 1 file changed, 30 insertions(+), 20 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index b8b8f44e..4c9581f9 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -58,34 +58,40 @@ def generate_bin_names(bin_size, bin_range): return bin_names -def bin_data(bin_by, data=[], bin_size=1, bin_range=None, fill=None, aggregator=sum): - """Bins data to create a histogram. Each bin is half-open, i.e. defined on the interval [a, b) for every bin - except for the last bin, which is defined on the interval [a, b]. The naming convention is different for - integers and floats. For example, for the label '1-2', this means the discrete values 1 and 2, whereas - for the label '1.0-2.0' this means the values on the interval [1.0, 2.0). Bins are uniformly spaced. +def bin_data(bin_by, data=[], bin_size=1, bin_range=None, fill=None, aggregator=len): + """Bins data to create a histogram. For float values, each bin is half-open, i.e. defined on + the interval [a, b) for every bin except for the last bin, which is defined on the interval [a, b]. + The naming convention is different for integers, which use open intervals [a, b] since they are discrete. + For example, for the label '1-3', this means the values 1, 2, and 3, whereas for the label '1.0-3.0' + this means the values on the interval [1.0, 3.0). Bins are uniformly spaced. Args: bin_by (list): A list of data to bin by. Can be float or int. - data (list): Additional data points associated with the data to bin by. If the lengths are - mismatched, you will get an IndexError if the data list is too short. If it is too long, - extra values are thrown out. The aggregation function is applied to the data at the end. - bin_size (int): The width of the bins. - bin_range (int, int): Override the bin ranges. Otherwise, use the min/max of the data. - fill: The data value to fill with if the bin is empty. If None, then remove empty bins. + data (list): Additional data points associated with the data to bin by. It is best for the length + of this array to match the length of bin_by. The aggregation function is applied to the data + after binning, on a per-bin basis. + bin_size: The width of the bins. + bin_range: A tuple of numbers. Overrides the bin ranges. Otherwise, use the min/max of the data. + fill: The data value to fill with if the bin is empty. If None is passed, then empty bins are removed. + The aggregator will be applied to fill values as well. aggregator (func): The aggregation function to apply over the list of data. Must be callable on an array. - The aggregator will be applied to fill values. Pass None to aggregator to store the raw - list into the bin. + If None is passed, then the raw values are stored in a list. Returns: - data_dict (str: int): The binned data. + data_dict: The binned data. Each key is a label corresponding to either a list of values or a single number, + depending on the type of aggregation function used. Examples: Simple frequency count: - >>> bin_data([1, 2, 3, 2]) - {'1': 1, '2': 2, '3': 1} + >>> bin_data([1, 2, 3, 2, 8]) + {'1': 1, '2': 2, '3': 1, '8': 1} - Bin a list by values in an associated list, e.g. highest test score by age group: - >>> ages = [12, 13, 11, 14, 15, 12, 13, 10, 10, 13] + Frequency count with zero values: + >>> bin_data([4, 4, 5, 6, 7, 2], fill=[]) + {'2': 1, '3': 0, '4': 2, '5': 1, '6': 1, '7': 1} + + Bin two lists of data, e.g. highest test score by age group: + >>> ages = [12, 13, 11, 14, 15, 12, 13, 10, 10, 13] >>> scores = [76, 84, 92, 56, 91, 87, 72, 95, 89, 77] >>> bin_data(ages, scores, bin_size=2, bin_range=(10, 15), aggregator=max) {'10-11': 95, '12-13': 87, '14-15': 91} @@ -105,9 +111,13 @@ def bin_data(bin_by, data=[], bin_size=1, bin_range=None, fill=None, aggregator= if data: bin_dict[keyname].append(data[i]) else: - bin_dict[keyname].append(1) + bin_dict[keyname].append(value) if fill is not None: - bin_dict = {key: vals if vals else [fill] for key, vals in bin_dict.items()} + try: + fill = list(fill) + except TypeError: + fill = [fill] + bin_dict = {key: vals if vals else fill for key, vals in bin_dict.items()} else: bin_dict = {key: vals for key, vals in bin_dict.items() if vals} bin_dict = {key: aggregator(vals) if aggregator else vals for key, vals in bin_dict.items()} From 75364d81445d184d850ed4287b202e13b00d6d55 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Tue, 26 Jul 2022 17:48:32 +0000 Subject: [PATCH 104/165] changed default binning behavior --- adaptive_scheduler/simulation/metrics.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index 4c9581f9..6cdbb42a 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -58,7 +58,7 @@ def generate_bin_names(bin_size, bin_range): return bin_names -def bin_data(bin_by, data=[], bin_size=1, bin_range=None, fill=None, aggregator=len): +def bin_data(bin_by, data=[], bin_size=1, bin_range=None, fill=[], aggregator=len): """Bins data to create a histogram. For float values, each bin is half-open, i.e. defined on the interval [a, b) for every bin except for the last bin, which is defined on the interval [a, b]. The naming convention is different for integers, which use open intervals [a, b] since they are discrete. @@ -83,12 +83,12 @@ def bin_data(bin_by, data=[], bin_size=1, bin_range=None, fill=None, aggregator= Examples: Simple frequency count: - >>> bin_data([1, 2, 3, 2, 8]) - {'1': 1, '2': 2, '3': 1, '8': 1} + >>> bin_data([1, 2, 3, 2, 6]) + {'1': 1, '2': 2, '3': 1, '4': 0, '5': 0, '6': 1} - Frequency count with zero values: - >>> bin_data([4, 4, 5, 6, 7, 2], fill=[]) - {'2': 1, '3': 0, '4': 2, '5': 1, '6': 1, '7': 1} + Frequency count without empty bins ('3' is removed): + >>> bin_data([4, 4, 5, 6, 7, 2], fill=None) + {'2': 1, '4': 2, '5': 1, '6': 1, '7': 1} Bin two lists of data, e.g. highest test score by age group: >>> ages = [12, 13, 11, 14, 15, 12, 13, 10, 10, 13] From 3c9750ca5eb95365f46c42e17ab07b3550c45c15 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Tue, 26 Jul 2022 17:52:31 +0000 Subject: [PATCH 105/165] minor updates to binning test --- tests/test_simulator_metrics.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/test_simulator_metrics.py b/tests/test_simulator_metrics.py index 2d9cb2c5..b2c0fb1f 100644 --- a/tests/test_simulator_metrics.py +++ b/tests/test_simulator_metrics.py @@ -102,13 +102,13 @@ def test_bin_data(self): sumdata = {'1-3': 36, '4-6': 27, '7-9': 17} mindata = {'1-3': 1, '4-6': 2, '7-9': 4} - assert bin_data(bin_by, bin_size=3, bin_range=bin_range) == allparams - assert bin_data(bin_by) == defaults - assert bin_data(bin_by, bin_size=2) == unevenbins - assert bin_data(bin_by, bin_size=2.5, bin_range=(0, 9)) == floatbinsize - assert bin_data(bin_by_float) == floats - assert bin_data(bin_by_float, bin_range=(0, 4)) == capped_floats - assert bin_data(bin_by, bin_data_, bin_size=3) == sumdata + assert bin_data(bin_by, bin_size=3, bin_range=bin_range, fill=None) == allparams + assert bin_data(bin_by, fill=None) == defaults + assert bin_data(bin_by, bin_size=2, fill=None) == unevenbins + assert bin_data(bin_by, bin_size=2.5, bin_range=(0, 9), fill=None) == floatbinsize + assert bin_data(bin_by_float, fill=None) == floats + assert bin_data(bin_by_float, bin_range=(0, 4), fill=None) == capped_floats + assert bin_data(bin_by, bin_data_, bin_size=3, fill=None, aggregator=sum) == sumdata assert bin_data(bin_by, bin_data_, bin_size=3, aggregator=min) == mindata def test_airmass_functions(self): From 9736d7eafa6f5a928a1169d6cd4a4785e0789c2a Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Tue, 26 Jul 2022 14:29:54 -0700 Subject: [PATCH 106/165] heatmap --- .../plot_heat_map_priority_duration.py | 74 +++++++++++++++---- 1 file changed, 59 insertions(+), 15 deletions(-) diff --git a/adaptive_scheduler/simulation/plot_heat_map_priority_duration.py b/adaptive_scheduler/simulation/plot_heat_map_priority_duration.py index 5cb87055..f1c0996b 100644 --- a/adaptive_scheduler/simulation/plot_heat_map_priority_duration.py +++ b/adaptive_scheduler/simulation/plot_heat_map_priority_duration.py @@ -1,11 +1,16 @@ +import sched from xml.dom.pulldom import default_bufsize import numpy as np import matplotlib.pyplot as plt from opensearchpy import OpenSearch -from plotutils import get_data_from_opensearch +from adaptive_scheduler.simulation.plotutils import get_data_from_opensearch from adaptive_scheduler.simulation.metrics import bin_data -VARIABLE = ['no-duration-v2','no-duration-scaled-100-v2'] - # 'with-duration-scaled-100','no-duration','no-duration-scaled-100',] +import seaborn as sns +from colorspacious import cspace_converter +VARIABLE = [ 'with-duration-v3', + 'no-duration-v3', + 'with-duration-scaled-100-v3', + 'no-duration-scaled-100-v3',] markers = ["o" , "," ,"v" , "^" , "<", ">"] colors = ['r','b','c','m', 'y', 'k'] @@ -19,7 +24,7 @@ def plot_sched_priority_duration_dotplot(): fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) for i, id in enumerate(VARIABLE): data = get_data_from_opensearch(f'1m0-optimize-airmass-{id}') - if id in ['no-duration-scaled-100-v2', 'with-duration-scaled-100-v2']: + if id in ['with-duration-scaled-100-v3', 'no-duration-scaled-100-v3']: data['raw_scheduled_priorities'] = [(p+35)/4.5 for p in data['raw_scheduled_priorities']] # print(id, len(data['raw_scheduled_priorities']), len(data['raw_unscheduled_priorities'])) # ax1.scatter(rand_jitter(data['raw_scheduled_priorities']), rand_jitter(data['raw_scheduled_durations']), @@ -32,7 +37,7 @@ def plot_sched_priority_duration_dotplot(): ax1.legend() for i, id in enumerate(VARIABLE): data = get_data_from_opensearch(f'1m0-optimize-airmass-{id}') - if id in ['no-duration-scaled-100-v2', 'with-duration-scaled-100-v2']: + if id in ['with-duration-scaled-100-v3', 'no-duration-scaled-100-v3']: data['raw_unscheduled_priorities'] = [(p+35)/4.5 for p in data['raw_unscheduled_priorities']] # ax2.scatter(rand_jitter(data['raw_unscheduled_priorities']), rand_jitter(data['raw_unscheduled_durations']), # c =colors[i], marker=markers[i],s=10, label = f'unscheduled requests {id}', alpha = 0.3) @@ -45,19 +50,58 @@ def plot_sched_priority_duration_dotplot(): def plot_heat_map_priority_duration(): - fig = plt.subplot() - fig.suptitle() - fig.subplots_adjust() + fig, axs= plt.subplots(2, 2, figsize=(25, 12)) + fig.suptitle(f'1m0 Network Requests Heatmap With Airmass Optimization', fontsize=20) + fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) + ax_list = [axs[0,0],axs[0,1],axs[1,0], axs[1,1]] for i, id in enumerate(VARIABLE): data = get_data_from_opensearch(f'1m0-optimize-airmass-{id}') - if id in ['no-duration-scaled-100-v2', 'with-duration-scaled-100-v2']: + if id in ['with-duration-scaled-100-v3', 'no-duration-scaled-100-v3']: + data['raw_scheduled_priorities'] = [(p+35)/4.5 for p in data['raw_scheduled_priorities']] data['raw_unscheduled_priorities'] = [(p+35)/4.5 for p in data['raw_unscheduled_priorities']] - sched_priorities = data['raw_scheduled_priorities'] - sched_durations = data['raw_scheduled_durations'] - unsched_priorities = data['raw_unscheduled_priorities'] - unsched_durations = data['raw_unscheduled_durations'] - level_1_bins = bin_data(sched_priorities, sched_durations, bin_size=5, aggregator=None) - print(level_1_bins) + sched_priorities = data['raw_scheduled_priorities'] + sched_durations = data['raw_scheduled_durations'] + unsched_priorities = data['raw_unscheduled_priorities'] + unsched_durations = data['raw_unscheduled_durations'] + # total_priorities = sched_priorities.extend(unsched_priorities) + # total_durations = sched_durations.extend() + level_1_bins = bin_data(sched_priorities, sched_durations, bin_size=4, bin_range=(10,30),aggregator=None) + level_2_bins = {bin_key: bin_data(bin_values, bin_size=250, bin_range=(0, 4000)) for bin_key, bin_values in level_1_bins.items()} + # print(level_2_bins) + level_1_bins_unsched = bin_data(unsched_priorities, unsched_durations, bin_size=4, bin_range=(10,30),aggregator=None) + level_2_bins_unsched = {bin_key: bin_data(bin_values, bin_size=250, bin_range=(0, 4000)) for bin_key, bin_values in level_1_bins_unsched.items()} + priority_bins = list(level_2_bins.keys()) + duration_bins = list(list(level_2_bins.values())[0].keys()) + heat_map_elements = [] + heat_map_elements_unsched = [] + for durations in level_2_bins.values(): + heat_map_elements.append(list(durations.values())) + for durations in level_2_bins_unsched.values(): + heat_map_elements_unsched.append(list(durations.values())) + + heat_map_elements = np.array(heat_map_elements) + heat_map_elements_unsched = np.array(heat_map_elements_unsched) + axis = ax_list[i] + cmap=plt.get_cmap('coolwarm') + cmap2 = plt.get_cmap('gray') + heatplot = axis.imshow(heat_map_elements,cmap=cmap) + axis.set_ylabel('Priority') + axis.set_xlabel('Duration') + axis.set_xticks(np.arange(len(duration_bins)), labels=duration_bins) + axis.set_yticks(np.arange(len(priority_bins)), labels=priority_bins) + plt.setp(axis.get_xticklabels(), rotation=45, ha="right", + rotation_mode="anchor") + for i in range(len(priority_bins)): + for j in range(len(duration_bins)): + value = heat_map_elements[i, j] + text1 = axis.text(j, i, f'{heat_map_elements[i, j]}|{ heat_map_elements_unsched[i, j]}', + ha="center", va="center", fontsize='large', fontweight='semibold', color=cmap2(0.001/value)) + # text2 = axis.text(j, i, heat_map_elements_unsched[i, j], + # position=(0, 0), fontsize='medium', fontweight='medium', color=cmap2(0.001/value)) + axis.set_title(f'{id} (sched|unsched)', fontweight='semibold') + # cb = fig.colorbar(heatplot) + fig.tight_layout() + plt.show() if __name__ == '__main__': From 49f478a834f02a52d8040303167e8c426fb76d55 Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Tue, 26 Jul 2022 14:30:39 -0700 Subject: [PATCH 107/165] priority duration --- .../simulation/plot_priority_duration.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/adaptive_scheduler/simulation/plot_priority_duration.py b/adaptive_scheduler/simulation/plot_priority_duration.py index d7353ed7..8212f9fc 100644 --- a/adaptive_scheduler/simulation/plot_priority_duration.py +++ b/adaptive_scheduler/simulation/plot_priority_duration.py @@ -3,8 +3,11 @@ import matplotlib.pyplot as plt from opensearchpy import OpenSearch from plotutils import get_data_from_opensearch -VARIABLE = ['no-duration-v2','no-duration-scaled-100-v2'] - # 'with-duration-scaled-100','no-duration','no-duration-scaled-100',] +VARIABLE = [ + #'with-duration-v3', + 'no-duration-v3', + #'with-duration-scaled-100-v3', + 'no-duration-scaled-100-v3',] markers = ["o" , "," ,"v" , "^" , "<", ">"] colors = ['r','b','c','m', 'y', 'k'] @@ -18,7 +21,7 @@ def plot_sched_priority_duration_dotplot(): fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) for i, id in enumerate(VARIABLE): data = get_data_from_opensearch(f'1m0-optimize-airmass-{id}') - if id in ['no-duration-scaled-100-v2', 'with-duration-scaled-100-v2']: + if id in ['with-duration-scaled-100-v3', 'no-duration-scaled-100-v3']: data['raw_scheduled_priorities'] = [(p+35)/4.5 for p in data['raw_scheduled_priorities']] print(id, len(data['raw_scheduled_priorities']), len(data['raw_unscheduled_priorities'])) ax1.scatter(rand_jitter(data['raw_scheduled_priorities']), rand_jitter(data['raw_scheduled_durations']), @@ -29,7 +32,7 @@ def plot_sched_priority_duration_dotplot(): ax1.legend() for i, id in enumerate(VARIABLE): data = get_data_from_opensearch(f'1m0-optimize-airmass-{id}') - if id in ['no-duration-scaled-100-v2', 'with-duration-scaled-100-v2']: + if id in ['no-duration-scaled-100-v3', 'with-duration-scaled-100-v3']: data['raw_unscheduled_priorities'] = [(p+35)/4.5 for p in data['raw_unscheduled_priorities']] ax2.scatter(rand_jitter(data['raw_unscheduled_priorities']), rand_jitter(data['raw_unscheduled_durations']), c =colors[i], marker=markers[i],s=10, label = f'unscheduled requests {id}', alpha = 0.3) @@ -41,6 +44,4 @@ def plot_sched_priority_duration_dotplot(): plt.show() if __name__ == '__main__': - # plot_sched_priority_duration_dotplot() - data = get_data_from_opensearch(f'1m0-optimize-airmss-no-duration_scaled-100') - print(len(data['unscheduled_priority'][0]), len(data['scheduled_priority'][0])) \ No newline at end of file + plot_sched_priority_duration_dotplot() \ No newline at end of file From 3de1ab3c475f317055ecf6d7a56a11df63494d66 Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Tue, 26 Jul 2022 15:40:11 -0700 Subject: [PATCH 108/165] heatmap --- .../plot_heat_map_priority_duration.py | 39 +++++++++++-------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/adaptive_scheduler/simulation/plot_heat_map_priority_duration.py b/adaptive_scheduler/simulation/plot_heat_map_priority_duration.py index f1c0996b..5971df8a 100644 --- a/adaptive_scheduler/simulation/plot_heat_map_priority_duration.py +++ b/adaptive_scheduler/simulation/plot_heat_map_priority_duration.py @@ -50,9 +50,9 @@ def plot_sched_priority_duration_dotplot(): def plot_heat_map_priority_duration(): - fig, axs= plt.subplots(2, 2, figsize=(25, 12)) + fig, axs= plt.subplots(2, 2, figsize=(26, 12)) fig.suptitle(f'1m0 Network Requests Heatmap With Airmass Optimization', fontsize=20) - fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) + fig.subplots_adjust(wspace=0.01, hspace=0.01, top=0.9) ax_list = [axs[0,0],axs[0,1],axs[1,0], axs[1,1]] for i, id in enumerate(VARIABLE): data = get_data_from_opensearch(f'1m0-optimize-airmass-{id}') @@ -63,24 +63,34 @@ def plot_heat_map_priority_duration(): sched_durations = data['raw_scheduled_durations'] unsched_priorities = data['raw_unscheduled_priorities'] unsched_durations = data['raw_unscheduled_durations'] - # total_priorities = sched_priorities.extend(unsched_priorities) - # total_durations = sched_durations.extend() level_1_bins = bin_data(sched_priorities, sched_durations, bin_size=4, bin_range=(10,30),aggregator=None) - level_2_bins = {bin_key: bin_data(bin_values, bin_size=250, bin_range=(0, 4000)) for bin_key, bin_values in level_1_bins.items()} - # print(level_2_bins) + level_2_bins = {bin_key: bin_data(bin_values, bin_size=250, bin_range=(0, 4000)) + for bin_key, bin_values in level_1_bins.items()} level_1_bins_unsched = bin_data(unsched_priorities, unsched_durations, bin_size=4, bin_range=(10,30),aggregator=None) - level_2_bins_unsched = {bin_key: bin_data(bin_values, bin_size=250, bin_range=(0, 4000)) for bin_key, bin_values in level_1_bins_unsched.items()} - priority_bins = list(level_2_bins.keys()) - duration_bins = list(list(level_2_bins.values())[0].keys()) + level_2_bins_unsched = {bin_key: bin_data(bin_values, bin_size=250, bin_range=(0, 4000)) + for bin_key, bin_values in level_1_bins_unsched.items()} heat_map_elements = [] heat_map_elements_unsched = [] - for durations in level_2_bins.values(): - heat_map_elements.append(list(durations.values())) - for durations in level_2_bins_unsched.values(): - heat_map_elements_unsched.append(list(durations.values())) + for values in level_2_bins.values(): + new_value= np.sum(list(values.values())[-5:]) + temp_list = ['3000-3249', '3250-3499', '3500-3749', '3750-3999', '4000'] + for key in temp_list: + del values[key] + values['3000&above'] = new_value + heat_map_elements.append(list(values.values())) + for values in level_2_bins_unsched.values(): + new_value= np.sum(list(values.values())[-5:]) + temp_list = ['3000-3249', '3250-3499', '3500-3749', '3750-3999', '4000'] + for key in temp_list: + del values[key] + values['3000&above'] = new_value + heat_map_elements_unsched.append(list(values.values())) + priority_bins = list(level_2_bins.keys()) + duration_bins = list(list(level_2_bins.values())[0].keys()) heat_map_elements = np.array(heat_map_elements) heat_map_elements_unsched = np.array(heat_map_elements_unsched) + axis = ax_list[i] cmap=plt.get_cmap('coolwarm') cmap2 = plt.get_cmap('gray') @@ -96,10 +106,7 @@ def plot_heat_map_priority_duration(): value = heat_map_elements[i, j] text1 = axis.text(j, i, f'{heat_map_elements[i, j]}|{ heat_map_elements_unsched[i, j]}', ha="center", va="center", fontsize='large', fontweight='semibold', color=cmap2(0.001/value)) - # text2 = axis.text(j, i, heat_map_elements_unsched[i, j], - # position=(0, 0), fontsize='medium', fontweight='medium', color=cmap2(0.001/value)) axis.set_title(f'{id} (sched|unsched)', fontweight='semibold') - # cb = fig.colorbar(heatplot) fig.tight_layout() plt.show() From 15bc9fa6bb3bb7ede56a2357d9b204035643239f Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Wed, 27 Jul 2022 14:11:16 -0700 Subject: [PATCH 109/165] add timing decorators to time our metrics runtime --- adaptive_scheduler/simulation/metrics.py | 16 ++++++-- adaptive_scheduler/simulation/orchestrator.py | 2 + .../plot_heat_map_priority_duration.py | 40 ++++++++++--------- 3 files changed, 37 insertions(+), 21 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index 6cdbb42a..f360a2c2 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -11,10 +11,8 @@ from requests.exceptions import RequestException, Timeout from adaptive_scheduler.observation_portal_connections import ObservationPortalConnectionError -from adaptive_scheduler.utils import time_in_capped_intervals, normalised_epoch_to_datetime, datetime_to_epoch +from adaptive_scheduler.utils import time_in_capped_intervals, normalised_epoch_to_datetime, datetime_to_epoch, timeit from adaptive_scheduler.models import redis_instance - - log = logging.getLogger('adaptive_scheduler') DTFORMAT = '%Y-%m-%dT%H:%M' @@ -187,16 +185,19 @@ def _combine_normal_rr_input_reservations(self): reservations = [res for res in comp_res.reservation_list if res not in self.combined_input_reservations] self.combined_input_reservations.extend(reservations) + @timeit def count_scheduled(self): scheduled_reservations = [] for reservations in self.combined_schedule.values(): scheduled_reservations.extend(reservations) return len(scheduled_reservations), len(self.combined_input_reservations) + @timeit def percent_reservations_scheduled(self): scheduled, total = self.count_scheduled() return percent_of(scheduled, total) + @timeit def total_scheduled_eff_priority(self): effective_priorities = [] for reservations in self.combined_schedule.values(): @@ -227,6 +228,7 @@ def get_priority_data(self): unsched_priorities = [scalefunc(p, *scale) for p in unsched_priorities] return sched_priorities, unsched_priorities + @timeit def total_available_seconds(self): """Aggregates the total available time, calculated from dark intervals. @@ -249,6 +251,7 @@ def total_available_seconds(self): total_available_time += available_time return total_available_time + @timeit def percent_time_utilization(self): scheduled_durations, _ = self.get_duration_data() return percent_of(sum(scheduled_durations), self.total_available_seconds()) @@ -314,6 +317,7 @@ def _get_midpoint_airmasses_by_site(self, airmass_data, midpoint_time): midpoint_airmasses[site] = airmasses[np.argmin(np.abs(times-midpoint_time))] return midpoint_airmasses + @timeit def airmass_metrics(self, schedule=None): """Generate the airmass metrics of all scheduled reservations for a single schedule. @@ -352,6 +356,7 @@ def airmass_metrics(self, schedule=None): } return airmass_metrics + @timeit def binned_tac_priority_metrics(self): """Bins metrics based on TAC priority.""" bin_size = 45 @@ -379,3 +384,8 @@ def binned_tac_priority_metrics(self): 'percent_duration': bin_percent_duration, } return output_dict + + def avg_slew_distance(self): + schedule = self.combined_schedule if schedule is None else schedule + + \ No newline at end of file diff --git a/adaptive_scheduler/simulation/orchestrator.py b/adaptive_scheduler/simulation/orchestrator.py index a277d33f..7b216bb0 100644 --- a/adaptive_scheduler/simulation/orchestrator.py +++ b/adaptive_scheduler/simulation/orchestrator.py @@ -31,6 +31,7 @@ SchedulingInputFactory, SchedulingInputProvider, SchedulerParameters ) from adaptive_scheduler.simulation.metrics import MetricCalculator +from adaptive_scheduler.utils import timeit log = logging.getLogger('adaptive_scheduler') @@ -79,6 +80,7 @@ def increment_input(current_time, time_step): pass +@timeit def send_to_opensearch(os_url, os_index, metrics): # Send the json metrics to the opensearch index if os_url and os_index: diff --git a/adaptive_scheduler/simulation/plot_heat_map_priority_duration.py b/adaptive_scheduler/simulation/plot_heat_map_priority_duration.py index 5971df8a..92a010ec 100644 --- a/adaptive_scheduler/simulation/plot_heat_map_priority_duration.py +++ b/adaptive_scheduler/simulation/plot_heat_map_priority_duration.py @@ -50,7 +50,7 @@ def plot_sched_priority_duration_dotplot(): def plot_heat_map_priority_duration(): - fig, axs= plt.subplots(2, 2, figsize=(26, 12)) + fig, axs= plt.subplots(2, 2, figsize=(13, 12)) fig.suptitle(f'1m0 Network Requests Heatmap With Airmass Optimization', fontsize=20) fig.subplots_adjust(wspace=0.01, hspace=0.01, top=0.9) ax_list = [axs[0,0],axs[0,1],axs[1,0], axs[1,1]] @@ -64,30 +64,34 @@ def plot_heat_map_priority_duration(): unsched_priorities = data['raw_unscheduled_priorities'] unsched_durations = data['raw_unscheduled_durations'] level_1_bins = bin_data(sched_priorities, sched_durations, bin_size=4, bin_range=(10,30),aggregator=None) - level_2_bins = {bin_key: bin_data(bin_values, bin_size=250, bin_range=(0, 4000)) - for bin_key, bin_values in level_1_bins.items()} + level_2_bins = { + bin_key: bin_data(bin_values, bin_size=300, bin_range=(0, 1499)) | bin_data(bin_values, bin_size=3000, bin_range=(1500, 4000)) + for bin_key, bin_values in level_1_bins.items() + } + print(level_2_bins) level_1_bins_unsched = bin_data(unsched_priorities, unsched_durations, bin_size=4, bin_range=(10,30),aggregator=None) - level_2_bins_unsched = {bin_key: bin_data(bin_values, bin_size=250, bin_range=(0, 4000)) - for bin_key, bin_values in level_1_bins_unsched.items()} + level_2_bins_unsched = { + bin_key: bin_data(bin_values, bin_size=300, bin_range=(0, 1499)) | bin_data(bin_values, bin_size=3000, bin_range=(1500, 4000)) + for bin_key, bin_values in level_1_bins_unsched.items() + } heat_map_elements = [] heat_map_elements_unsched = [] - for values in level_2_bins.values(): - new_value= np.sum(list(values.values())[-5:]) - temp_list = ['3000-3249', '3250-3499', '3500-3749', '3750-3999', '4000'] - for key in temp_list: - del values[key] - values['3000&above'] = new_value + # new_value= np.sum(list(values.values())[-5:]) + # temp_list = ['3000-3249', '3250-3499', '3500-3749', '3750-3999', '4000'] + # for key in temp_list: + # del values[key] + # values['3000&above'] = new_value heat_map_elements.append(list(values.values())) for values in level_2_bins_unsched.values(): - new_value= np.sum(list(values.values())[-5:]) - temp_list = ['3000-3249', '3250-3499', '3500-3749', '3750-3999', '4000'] - for key in temp_list: - del values[key] - values['3000&above'] = new_value + # new_value= np.sum(list(values.values())[-5:]) + # temp_list = ['3000-3249', '3250-3499', '3500-3749', '3750-3999', '4000'] + # for key in temp_list: + # del values[key] + # values['3000&above'] = new_value heat_map_elements_unsched.append(list(values.values())) priority_bins = list(level_2_bins.keys()) - duration_bins = list(list(level_2_bins.values())[0].keys()) + duration_bins = ['0-5','5-10','10-15', '15-20', '20-25', '25&up'] heat_map_elements = np.array(heat_map_elements) heat_map_elements_unsched = np.array(heat_map_elements_unsched) @@ -96,7 +100,7 @@ def plot_heat_map_priority_duration(): cmap2 = plt.get_cmap('gray') heatplot = axis.imshow(heat_map_elements,cmap=cmap) axis.set_ylabel('Priority') - axis.set_xlabel('Duration') + axis.set_xlabel('Duration (minutes)') axis.set_xticks(np.arange(len(duration_bins)), labels=duration_bins) axis.set_yticks(np.arange(len(priority_bins)), labels=priority_bins) plt.setp(axis.get_xticklabels(), rotation=45, ha="right", From b392fe6fd60cb6f9bf9dba62811bfa608a32b249 Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Wed, 27 Jul 2022 14:12:17 -0700 Subject: [PATCH 110/165] add timing decorators to time our metrics runtime --- adaptive_scheduler/simulation/metrics.py | 5 ----- adaptive_scheduler/simulation/orchestrator.py | 2 +- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index f360a2c2..d85ad248 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -384,8 +384,3 @@ def binned_tac_priority_metrics(self): 'percent_duration': bin_percent_duration, } return output_dict - - def avg_slew_distance(self): - schedule = self.combined_schedule if schedule is None else schedule - - \ No newline at end of file diff --git a/adaptive_scheduler/simulation/orchestrator.py b/adaptive_scheduler/simulation/orchestrator.py index 7b216bb0..d648291f 100644 --- a/adaptive_scheduler/simulation/orchestrator.py +++ b/adaptive_scheduler/simulation/orchestrator.py @@ -96,7 +96,7 @@ def send_to_opensearch(os_url, os_index, metrics): else: log.warning("Not configured to save metrics in opensearch. Please set OPENSEARCH_URL and SIMULATION_OPENSEARCH_INDEX.") - +@timeit def record_metrics(normal_scheduler_result, rr_scheduler_result, scheduler, scheduler_runner): log.info("Recording metrics for scheduler simulation run") From ba5d27a73785786e298f29673b03489818bfbc70 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Wed, 27 Jul 2022 21:17:20 +0000 Subject: [PATCH 111/165] slew distance calculations --- adaptive_scheduler/simulation/metrics.py | 31 ++++++++++-- tests/test_simulator_metrics.py | 62 ++++++++++++++++++++++++ 2 files changed, 90 insertions(+), 3 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index 6cdbb42a..3b56396d 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -9,6 +9,7 @@ import numpy as np import requests from requests.exceptions import RequestException, Timeout +from rise_set import astrometry from adaptive_scheduler.observation_portal_connections import ObservationPortalConnectionError from adaptive_scheduler.utils import time_in_capped_intervals, normalised_epoch_to_datetime, datetime_to_epoch @@ -71,9 +72,10 @@ def bin_data(bin_by, data=[], bin_size=1, bin_range=None, fill=[], aggregator=le of this array to match the length of bin_by. The aggregation function is applied to the data after binning, on a per-bin basis. bin_size: The width of the bins. - bin_range: A tuple of numbers. Overrides the bin ranges. Otherwise, use the min/max of the data. - fill: The data value to fill with if the bin is empty. If None is passed, then empty bins are removed. - The aggregator will be applied to fill values as well. + bin_range: A tuple of numbers to override the bin ranges. Otherwise, use the min/max of the data. + fill: The data value(s) to fill with if the bin is empty. An iterable may be passed, in which case it is + casted to a list. If None is passed, then empty bins are removed. The aggregator will be applied + to fill values as well. aggregator (func): The aggregation function to apply over the list of data. Must be callable on an array. If None is passed, then the raw values are stored in a list. @@ -141,6 +143,7 @@ def __init__(self, normal_scheduler_result, rr_scheduler_result, scheduler, sche self.scheduler = scheduler self.scheduler_runner = scheduler_runner self.observation_portal_interface = self.scheduler_runner.network_interface.observation_portal_interface + self.simulation_start = self.scheduler_runner.sched_params.simulate_now self.horizon_days = self.scheduler_runner.sched_params.horizon_days self.normal_scheduler_result = normal_scheduler_result @@ -379,3 +382,25 @@ def binned_tac_priority_metrics(self): 'percent_duration': bin_percent_duration, } return output_dict + + def avg_slew_distance(self): + semester_start = self.scheduler_runner.semester_details['start'] + slew_distances = [] + for reservations in self.combined_schedule.values(): + apparent_radecs = [] + reservations.sort(key=lambda r: r.scheduled_start) + for res in reservations: + res_startdt = normalised_epoch_to_datetime(res.scheduled_start, datetime_to_epoch(semester_start)) + tdb = astrometry.date_to_tdb(res_startdt) + config_radecs = [astrometry.mean_to_apparent({'ra': c['target'].ra, 'dec': c['target'].dec}, tdb) + for c in res.request.configurations] + apparent_radecs.extend(config_radecs) + for i, radec in enumerate(apparent_radecs): + try: + next_radec = apparent_radecs[i+1] + ang_dist = astrometry.angular_distance_between(*radec, *next_radec) + slew_distances.append(ang_dist.in_degrees()) + except IndexError: + break + + return np.mean(slew_distances) diff --git a/tests/test_simulator_metrics.py b/tests/test_simulator_metrics.py index b2c0fb1f..a267defc 100644 --- a/tests/test_simulator_metrics.py +++ b/tests/test_simulator_metrics.py @@ -3,8 +3,13 @@ import os import json +import calendar from datetime import datetime, timedelta from mock import Mock +from rise_set import astrometry +from rise_set.sky_coordinates import RightAscension, Declination +from rise_set.angle import Angle +import numpy as np class TestMetrics(): @@ -25,6 +30,7 @@ def setup(self): self.mock_scheduler = Mock(estimated_scheduler_end=self.scheduler_run_time) self.mock_scheduler_runner = Mock(semester_details={'start': self.start}) self.mock_scheduler_runner.sched_params.horizon_days = 5 + self.mock_scheduler_runner.sched_params.simulate_now = self.scheduler_run_time self.mock_scheduler_result.schedule = fake_schedule @@ -140,3 +146,59 @@ def test_airmass_functions(self): assert airmass_metrics['avg_midpoint_airmass'] == 4 assert airmass_metrics['avg_min_poss_airmass'] == 1 assert airmass_metrics['raw_airmass_data'][0]['midpoint_airmasses'] == midpoint_airmasses + + def test_avg_slew_distance(self): + conf1 = {'target': Mock(name='star1', + ra=RightAscension(degrees=35), + dec=Declination(degrees=0))} + conf2 = {'target': Mock(name='star2', + ra=RightAscension(degrees=35), + dec=Declination(degrees=15))} + conf3 = {'target': Mock(name='star3', + ra=RightAscension(degrees=10), + dec=Declination(degrees=15))} + conf4 = {'target': Mock(name='star4', + ra=RightAscension(degrees=60), + dec=Declination(degrees=10))} + conf5 = {'target': Mock(name='star5', + ra=RightAscension(degrees=80), + dec=Declination(degrees=10))} + conf6 = {'target': Mock(name='star6', + ra=RightAscension(degrees=80), + dec=Declination(degrees=-10))} + res1 = Mock(scheduled_start=10) + res2 = Mock(scheduled_start=20) + res3 = Mock(scheduled_start=10) + res4 = Mock(scheduled_start=20) + res5 = Mock(scheduled_start=30) + res1.request.configurations = [conf1, conf2] + res2.request.configurations = [conf3] + res3.request.configurations = [conf4] + res4.request.configurations = [conf5, conf5, conf5] + res5.request.configurations = [conf6] + fake_schedule1 = {'bpl': [res1, res2], 'coj': [res5, res4, res3]} + d = timedelta(seconds=10) + radec1 = astrometry.mean_to_apparent({'ra': Angle(degrees=35), 'dec': Angle(degrees=0)}, + astrometry.date_to_tdb(self.scheduler_run_time+d)) + radec2 = astrometry.mean_to_apparent({'ra': Angle(degrees=35), 'dec': Angle(degrees=15)}, + astrometry.date_to_tdb(self.scheduler_run_time+d)) + radec3 = astrometry.mean_to_apparent({'ra': Angle(degrees=10), 'dec': Angle(degrees=15)}, + astrometry.date_to_tdb(self.scheduler_run_time+2*d)) + radec4 = astrometry.mean_to_apparent({'ra': Angle(degrees=60), 'dec': Angle(degrees=10)}, + astrometry.date_to_tdb(self.scheduler_run_time+3*d)) + radec5 = astrometry.mean_to_apparent({'ra': Angle(degrees=80), 'dec': Angle(degrees=10)}, + astrometry.date_to_tdb(self.scheduler_run_time+4*d)) + radec6 = astrometry.mean_to_apparent({'ra': Angle(degrees=80), 'dec': Angle(degrees=-10)}, + astrometry.date_to_tdb(self.scheduler_run_time+5*d)) + slewdists = [astrometry.angular_distance_between(*radec1, *radec2), + astrometry.angular_distance_between(*radec2, *radec3), + astrometry.angular_distance_between(*radec4, *radec5), + astrometry.angular_distance_between(*radec5, *radec5), + astrometry.angular_distance_between(*radec5, *radec5), + astrometry.angular_distance_between(*radec5, *radec6)] + slewdists = [a.in_degrees() for a in slewdists] + metrics = MetricCalculator(self.mock_scheduler_result, None, self.mock_scheduler, self.mock_scheduler_runner) + metrics.combined_schedule = fake_schedule1 + metrics.scheduler_runner.semester_details['start'] = self.scheduler_run_time + + assert np.isclose(metrics.avg_slew_distance(), np.mean(slewdists)) From 84be11b8363d3dad4e75d1d9cb7d74c3db2cd437 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Wed, 27 Jul 2022 21:18:26 +0000 Subject: [PATCH 112/165] added new plot for effective priority binned data --- adaptive_scheduler/simulation/plot_all.py | 101 +++++++++++++++++---- adaptive_scheduler/simulation/plotutils.py | 2 - 2 files changed, 83 insertions(+), 20 deletions(-) diff --git a/adaptive_scheduler/simulation/plot_all.py b/adaptive_scheduler/simulation/plot_all.py index 204c01c9..11976584 100644 --- a/adaptive_scheduler/simulation/plot_all.py +++ b/adaptive_scheduler/simulation/plot_all.py @@ -10,6 +10,7 @@ import adaptive_scheduler.simulation.plotutils as plotutils from adaptive_scheduler.simulation.plotutils import opensearch_client +from adaptive_scheduler.simulation.metrics import bin_data AIRMASS_TEST_VALUES = [0, 0.01, 0.05, 0.1, 1.0, 10, 100, 1000, 1000000] EFF_PRI_SCALING_TEST_LABELS = ['airmass', 'airmass-with-duration-scaled-100', @@ -237,11 +238,11 @@ def plot_duration_histogram(): '1m0-optimize-airmass-with-duration_2022-07-21T21:48:02.586407') no_duration_data = opensearch_client.get('scheduler-simulations', '1m0-optimize-airmass-no-duration_2022-07-21T21:52:46.316207') - duration_data = [with_duration_data['_source']['raw_scheduled_durations']] - duration_data.append(no_duration_data['_source']['raw_scheduled_durations']) - labels = ['eff. prio scaled by duration', 'eff. prio not scaled by duration'] + duration_data = [np.array(with_duration_data['_source']['raw_scheduled_durations'])/60] + duration_data.append(np.array(no_duration_data['_source']['raw_scheduled_durations'])/60) + labels = ['With Duration', 'No Duration'] ax.hist(duration_data, bins=50, label=labels) - ax.set_xlabel('Duration [s]') + ax.set_xlabel('Duration [min]') ax.set_ylabel('Counts') ax.set_title('Optimize by Airmass') ax.legend() @@ -251,28 +252,87 @@ def plot_duration_histogram(): def plot_eff_prio_duration_scatter(): - fig = plt.figure(figsize=(20, 10)) + fig, axs = plt.subplots(1, 2, figsize=(24, 8)) fig.suptitle('1m0 Scatterplot of Effective Priority and Duration', fontsize=20) fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) - ax = fig.add_subplot() tagnames = ['with-duration-v2', 'with-duration-scaled-100-v2'] - labels = ['priority 10-30', 'priority 10-100'] - for i, tag in enumerate(tagnames): - data = plotutils.get_data_from_opensearch(f'1m0-optimize-airmass-{tag}') + labels = ['Priority 10-30', 'Priority 10-100'] + colors = [('#006BA4', '#5F9ED1'), ('#C85200', '#FF800E')] + for i, ax in enumerate(axs): + data = plotutils.get_data_from_opensearch(f'1m0-optimize-airmass-{tagnames[i]}') prio_scheduled = np.array(data['raw_scheduled_priorities']) prio_unscheduled = np.array(data['raw_unscheduled_priorities']) - dur_scheduled = np.array(data['raw_scheduled_durations']) - dur_unscheduled = np.array(data['raw_unscheduled_durations']) - ax.scatter(dur_scheduled, prio_scheduled*dur_scheduled, label=f'{labels[i]}, scheduled', marker='+') - ax.scatter(dur_unscheduled, prio_unscheduled*dur_unscheduled, label=f'{labels[i]}, unscheduled', marker='x') - ax.set_ylabel('Effective Priority (base priority x duration)') - ax.set_xlabel('Duration [s]') - ax.legend() + dur_scheduled = np.array(data['raw_scheduled_durations'])/60 + dur_unscheduled = np.array(data['raw_unscheduled_durations'])/60 + ax.scatter(dur_scheduled, prio_scheduled*dur_scheduled, + label=f'{labels[i]}, scheduled', marker='x', color=colors[i][0]) + ax.scatter(dur_unscheduled, prio_unscheduled*dur_unscheduled, + label=f'{labels[i]}, unscheduled', marker='x', alpha=0.5, color=colors[i][1]) + ax.set_ylabel('Effective Priority (base priority x duration)') + ax.set_xlabel('Duration [min]') + ax.set_title(f'Optimize by Airmass, With Duration, {labels[i]}') + ax.legend() if not displayonly: plotutils.export_to_image(f'1m0_eff_prio_duration_scatter_{timestamp}', fig) plt.show() +def plot_pct_sched_bin_eff_prio(): + fig, axs = plt.subplots(2, 2, figsize=(20, 12)) + fig.suptitle('1m0 Eff. Priority Distribution', fontsize=20) + fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) + test_names = ['with', 'no'] + for k, test_name in enumerate(test_names): + data = plotutils.get_data_from_opensearch(f'1m0-optimize-airmass-{test_name}-duration-v3') + prio_scheduled = data['raw_scheduled_priorities'] + prio_unscheduled = data['raw_unscheduled_priorities'] + prio_all = np.array(prio_scheduled + prio_unscheduled) + dur_scheduled = data['raw_scheduled_durations'] + dur_unscheduled = data['raw_unscheduled_durations'] + dur_all = np.array(dur_scheduled + dur_unscheduled)/60 + dur_scheduled = np.array(dur_scheduled)/60 + dur_unscheduled = np.array(dur_unscheduled)/60 + eff_prio_scheduled = prio_scheduled*dur_scheduled + eff_prio_unscheduled = prio_unscheduled*dur_unscheduled + eff_prio_all = prio_all*dur_all + bin_size = 100 + bin_range = (0, 4000) + level1_sched_bin = bin_data(eff_prio_scheduled, list(prio_scheduled), + bin_size=bin_size, bin_range=bin_range, aggregator=None) + level2_sched_bin = [list(bin_data(bin_values, bin_size=10, bin_range=(10, 30)).values()) + for bin_values in level1_sched_bin.values()] + level1_unsched_bin = bin_data(eff_prio_unscheduled, list(prio_unscheduled), + bin_size=bin_size, bin_range=bin_range, aggregator=None) + level2_unsched_bin = [list(bin_data(bin_values, bin_size=10, bin_range=(10, 30), fill=[]).values()) + for bin_values in level1_unsched_bin.values()] + all_bin = bin_data(eff_prio_all, bin_size=bin_size, bin_range=bin_range) + all_bin_array = np.array(list(all_bin.values())) + sched_bin_array = np.array([np.array(vals) for vals in level2_sched_bin]) + pct_bin_array = 100*sched_bin_array/all_bin_array[:, np.newaxis] + pct_bin_array = np.nan_to_num(pct_bin_array) + xaxis = np.arange(0, 4001, 50) + priority_labels = ['10-19', '20-29', '30'] + colors = ['#006BA4', '#FF800E', '#ABABAB'] + barwidth = 14 + for i, bin_ in enumerate(level2_sched_bin): + for j, label in enumerate(priority_labels): + axs[0][k].bar(xaxis[i]+j*barwidth, bin_[j], barwidth, label=label, color=colors[j]) + for i, bin_ in enumerate(level2_unsched_bin): + for j, label in enumerate(priority_labels): + axs[1][k].bar(xaxis[i]+j*barwidth, bin_[j], barwidth, label=label, color=colors[j]) + + handles, labels = axs[0][0].get_legend_handles_labels() + by_label = dict(zip(labels, handles)) + axs[0][k].set_title(f'Scheduled Requests ({test_name} duration)') + axs[1][k].set_title(f'Unscheduled Requests ({test_name} duration)') + for ax in axs[k]: + ax.legend(by_label.values(), by_label.keys(), title='Priority') + ax.set_xlabel('Effective Priority (base priority x duration [min])') + ax.set_ylabel('Number of Requests') + ax.set_ylim(0, 240) + plt.show() + + if __name__ == '__main__': plots = { 'normed_airmass_hist': {'func': plot_normed_airmass_histogram, @@ -300,6 +360,8 @@ def plot_eff_prio_duration_scatter(): 'desc': 'Scheduled request duration distribution.'}, 'eff_prio_duration_scatter': {'func': plot_eff_prio_duration_scatter, 'desc': 'Scatterplot with (prio x duration) on y-axis and duration on x-axis'}, + 'pct_sched_eff_prio_hist': {'func': plot_pct_sched_bin_eff_prio, + 'desc': 'Histogram with (prio x duration) on x-axis and percentage scheduled by bin'}, } description = 'Plotting functions for airmass optimization experiment.' @@ -311,12 +373,15 @@ def plot_eff_prio_duration_scatter(): args = parser.parse_args() global displayonly displayonly = args.displayonly + plotnames = list(plots.keys()) + plots_to_show = plotnames if args.plot_name == ['all'] else args.plot_name if args.list: - spacing = max([len(name) for name in plots.keys()]) + 4 + spacing = max([len(name) for name in plots.keys()]) + 10 print(f'{"NAME":{spacing}}DESCRIPTION') print(f'{"====":{spacing}}===========') - for name, details in plots.items(): + for name in plots_to_show: + details = plots[name] print(f'{name:{spacing}}{details["desc"]}') else: plots_to_show = list(plots.keys()) if args.plot_name == ['all'] else args.plot_name diff --git a/adaptive_scheduler/simulation/plotutils.py b/adaptive_scheduler/simulation/plotutils.py index ae03409b..b5c6c8a7 100644 --- a/adaptive_scheduler/simulation/plotutils.py +++ b/adaptive_scheduler/simulation/plotutils.py @@ -37,8 +37,6 @@ def plot_barplot(ax, data, labels, binnames, barwidth=0.04): Args: ax (matplotlib.pyplot.Axes): An Axes object to modify. data: A list of lists. Each sub-list contains the y-axis data for a dataset. - colors: The list of colors to use for each dataset. Must contain enough colors - to cover all datasets. labels: The list of labels to associate with each dataset. Must contain a label for each dataset. binnames: A list of names of the bins for marking the x-axis. barwidth (float): The width of each bar. From f824110cfb6e87b22f1caf293d3b48ebddfcc715 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Wed, 27 Jul 2022 21:20:07 +0000 Subject: [PATCH 113/165] add slew distance to orchestrator --- adaptive_scheduler/simulation/orchestrator.py | 1 + 1 file changed, 1 insertion(+) diff --git a/adaptive_scheduler/simulation/orchestrator.py b/adaptive_scheduler/simulation/orchestrator.py index d648291f..dd5b25ea 100644 --- a/adaptive_scheduler/simulation/orchestrator.py +++ b/adaptive_scheduler/simulation/orchestrator.py @@ -135,6 +135,7 @@ def record_metrics(normal_scheduler_result, rr_scheduler_result, scheduler, sche 'raw_unscheduled_durations': unsched_durations, 'raw_scheduled_priorities': sched_priorities, 'raw_unscheduled_priorities': unsched_priorities, + 'average_slew_distance': metrics.avg_slew_distance(), } send_to_opensearch(sched_params.opensearch_url, sched_params.simulation_opensearch_index, metrics) From 629f7ec5ae1f4eec531abb6629dc9eec7a54abda Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Wed, 27 Jul 2022 23:03:02 +0000 Subject: [PATCH 114/165] update to handle non sidereal targets --- adaptive_scheduler/simulation/metrics.py | 18 +++++++++---- tests/test_simulator_metrics.py | 32 +++++++++++------------- 2 files changed, 27 insertions(+), 23 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index dd40eb30..46e06888 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -8,12 +8,16 @@ import numpy as np import requests +import rise_set from requests.exceptions import RequestException, Timeout from rise_set import astrometry from adaptive_scheduler.observation_portal_connections import ObservationPortalConnectionError -from adaptive_scheduler.utils import time_in_capped_intervals, normalised_epoch_to_datetime, datetime_to_epoch, timeit -from adaptive_scheduler.models import redis_instance +from adaptive_scheduler.utils import (time_in_capped_intervals, + normalised_epoch_to_datetime, + datetime_to_epoch, timeit) +from adaptive_scheduler.models import redis_instance, ICRSTarget + log = logging.getLogger('adaptive_scheduler') DTFORMAT = '%Y-%m-%dT%H:%M' @@ -397,9 +401,13 @@ def avg_slew_distance(self): for res in reservations: res_startdt = normalised_epoch_to_datetime(res.scheduled_start, datetime_to_epoch(semester_start)) tdb = astrometry.date_to_tdb(res_startdt) - config_radecs = [astrometry.mean_to_apparent({'ra': c['target'].ra, 'dec': c['target'].dec}, tdb) - for c in res.request.configurations] - apparent_radecs.extend(config_radecs) + for c in res.request.configurations: + try: + apparent_radecs.append(astrometry.mean_to_apparent(c.target.in_rise_set_format(), tdb)) + except rise_set.exceptions.IncompleteTargetError: + # set a conservative estimate + ra = dec = rise_set.angle.Angle(degrees=0) + apparent_radecs.append((ra, dec)) for i, radec in enumerate(apparent_radecs): try: next_radec = apparent_radecs[i+1] diff --git a/tests/test_simulator_metrics.py b/tests/test_simulator_metrics.py index a267defc..028f45b8 100644 --- a/tests/test_simulator_metrics.py +++ b/tests/test_simulator_metrics.py @@ -148,24 +148,19 @@ def test_airmass_functions(self): assert airmass_metrics['raw_airmass_data'][0]['midpoint_airmasses'] == midpoint_airmasses def test_avg_slew_distance(self): - conf1 = {'target': Mock(name='star1', - ra=RightAscension(degrees=35), - dec=Declination(degrees=0))} - conf2 = {'target': Mock(name='star2', - ra=RightAscension(degrees=35), - dec=Declination(degrees=15))} - conf3 = {'target': Mock(name='star3', - ra=RightAscension(degrees=10), - dec=Declination(degrees=15))} - conf4 = {'target': Mock(name='star4', - ra=RightAscension(degrees=60), - dec=Declination(degrees=10))} - conf5 = {'target': Mock(name='star5', - ra=RightAscension(degrees=80), - dec=Declination(degrees=10))} - conf6 = {'target': Mock(name='star6', - ra=RightAscension(degrees=80), - dec=Declination(degrees=-10))} + conf1 = Mock() + conf1.target.in_rise_set_format = Mock(return_value={'ra': Angle(degrees=35), 'dec': Angle(degrees=0)}) + conf2 = Mock() + conf2.target.in_rise_set_format = Mock(return_value={'ra': Angle(degrees=35), 'dec': Angle(degrees=15)}) + conf3 = Mock() + conf3.target.in_rise_set_format = Mock(return_value={'ra': Angle(degrees=10), 'dec': Angle(degrees=15)}) + conf4 = Mock() + conf4.target.in_rise_set_format = Mock(return_value={'ra': Angle(degrees=60), 'dec': Angle(degrees=10)}) + conf5 = Mock() + conf5.target.in_rise_set_format = Mock(return_value={'ra': Angle(degrees=80), 'dec': Angle(degrees=10)}) + conf6 = Mock() + conf6.target.in_rise_set_format = Mock(return_value={'ra': Angle(degrees=80), 'dec': Angle(degrees=-10)}) + res1 = Mock(scheduled_start=10) res2 = Mock(scheduled_start=20) res3 = Mock(scheduled_start=10) @@ -177,6 +172,7 @@ def test_avg_slew_distance(self): res4.request.configurations = [conf5, conf5, conf5] res5.request.configurations = [conf6] fake_schedule1 = {'bpl': [res1, res2], 'coj': [res5, res4, res3]} + d = timedelta(seconds=10) radec1 = astrometry.mean_to_apparent({'ra': Angle(degrees=35), 'dec': Angle(degrees=0)}, astrometry.date_to_tdb(self.scheduler_run_time+d)) From 2036de4a6992c2057cc5cd26efeaa67c89593500 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Fri, 29 Jul 2022 00:16:40 +0000 Subject: [PATCH 115/165] wip restructure for more user friendly code --- adaptive_scheduler/simulation/plotfuncs.py | 87 +++++++++++++++++++++ adaptive_scheduler/simulation/plots.py | 43 +++++++++++ adaptive_scheduler/simulation/plotutils.py | 89 +++++++++++++++++----- 3 files changed, 199 insertions(+), 20 deletions(-) create mode 100644 adaptive_scheduler/simulation/plotfuncs.py create mode 100644 adaptive_scheduler/simulation/plots.py diff --git a/adaptive_scheduler/simulation/plotfuncs.py b/adaptive_scheduler/simulation/plotfuncs.py new file mode 100644 index 00000000..4bd67c8a --- /dev/null +++ b/adaptive_scheduler/simulation/plotfuncs.py @@ -0,0 +1,87 @@ +""" +Plotting functions to use with the adaptive simulator plotting wrapper. +To write your own plotting functions, follow the format of the example functions. +The data passed in should be in list format. +""" +import matplotlib +import numpy as np +import matplotlib.pyplot as plt +import matplotlib.style as style + +import adaptive_scheduler.simulation.plotutils as plotutils + +# change default parameters for matplotlib here +style.use('tableau-colorblind10') +matplotlib.rcParams['figure.figsize'] = (20, 10) +matplotlib.rcParams['figure.titlesize'] = 20 +matplotlib.rcParams['figure.subplot.wspace'] = 0.2 # horizontal spacing for subplots +matplotlib.rcParams['figure.subplot.hspace'] = 0.2 # vertical spacing for subplots +matplotlib.rcParams['figure.subplot.top'] = 0.9 # spacing between plot and title + + +def plot_normed_airmass_histogram(airmass_datasets): + """Plots the distribution of airmass scores. The score is obtained by normalizing the + scheduled airmass, with 0 being the worst and 1 being the best. + + Args: + airmass_data (list): Should be a list of datasets, each dataset corresponding + to a different airmass weighting coefficient. Assumes the first dataset passed + is the control dataset (airmass optimization turned off) + + Returns: + fig (matplotlib.pyplot.Figure): The output figure object. + """ + plot_title = '1m Network Airmass Score Distribution for Scheduled Requests' + fig, ax = plt.subplots() + fig.suptitle(plot_title) + + numbins = 10 + normed = [] + labels = ['optimize by earliest'] + for dataset in airmass_datasets: + airmass_data = dataset['airmass_metrics']['raw_airmass_data'] + airmass_coeff = dataset['airmass_weighting_coefficient'] + mp = np.array(airmass_data[0]['midpoint_airmasses']) + a_min = np.array(airmass_data[1]['min_poss_airmasses']) + a_max = np.array(airmass_data[2]['max_poss_airmasses']) + print(len(np.where(a_min == a_max)[0])) + # normalize = 1 - (mp-a_min)/(a_max-a_min) + # normed.append(normalize[np.where((normalize != 0) & (normalize != 1))]) + normed.append(mp-a_min) + # the first dataset is the control dataset + if dataset is not airmass_datasets[0]: + labels.append(airmass_coeff) + print(normed) + ax.hist(normed, bins=numbins, label=labels) + + ax.set_xlabel('Airmass Score (0 is worst, 1 is ideal)') + ax.set_ylabel('Number of Scheduled Requests') + ax.legend(title='Airmass Coefficient') + return fig, plot_title + + +def plot_pct_count_airmass_prio_bins(airmass_datasets): + plot_title = '1m Network Airmass Experiment Percent of Requests Scheduled' + fig, ax = plt.subplots() + fig.suptitle(plot_title) + + barwidth = 0.4 + bardata = [] + labels = ['optimize by earliest'] + # get the bin names from the first dataset, the bins should be consistent across datasets + binnames = airmass_datasets[0]['percent_sched_by_priority'].keys() + for dataset in airmass_datasets: + priority_data = dataset['percent_sched_by_priority'][0] + airmass_coeff = dataset['airmass_weighting_coefficient'] + bardata.append(list(priority_data.values())) + # the first dataset is the control dataset + if dataset is not airmass_datasets[0]: + labels.append(airmass_coeff) + plotutils.plot_barplot(ax, bardata, labels, binnames, barwidth) + + ax.set_xlabel('Priority') + ax.set_ylabel('Percent of Requests Scheduled') + ax.set_ylim(0, 100) + ax.legend(title='Airmass Coefficient') + return fig, plot_title + diff --git a/adaptive_scheduler/simulation/plots.py b/adaptive_scheduler/simulation/plots.py new file mode 100644 index 00000000..3945d46b --- /dev/null +++ b/adaptive_scheduler/simulation/plots.py @@ -0,0 +1,43 @@ +""" +The interface for producing plots. To create plots, add plots to the list of plots, +modifying the parameters to Plot as necessary. +""" +import matplotlib.pyplot as plt + +import adaptive_scheduler.simulation.plotfuncs as plotfuncs +import adaptive_scheduler.simulation.plotutils as plotutils +from adaptive_scheduler.simulation.plotutils import Plot + +airmass_experiment_ids = [ + '1m0-simulation-real-airmass-control-1_2022-07-18T23:59:44.770684', + '1m0-simulation-real-airmass-coeff-0-1', + '1m0-simulation-real-airmass-coeff-0.01-1', + '1m0-simulation-real-airmass-coeff-0.05-1', + '1m0-simulation-real-airmass-coeff-0.1-1', + '1m0-simulation-real-airmass-coeff-1.0-1', + '1m0-simulation-real-airmass-coeff-10-1', + '1m0-simulation-real-airmass-coeff-100-1', + '1m0-simulation-real-airmass-coeff-1000-1', + '1m0-simulation-real-airmass-coeff-1000000-1', +] + +plots = [ + Plot(plotfuncs.plot_normed_airmass_histogram, *airmass_experiment_ids), +] + +if __name__ == '__main__': + spacing = max([len(plot.name) for plot in plots]) + 10 + print('Available plots:') + print(f'{"Name":{spacing}}Description') + print(f'{"====":{spacing}}===========') + + for plot in plots: + print(f'{plot.name:{spacing}}{plot.description}') + showplot = input('Show plot (default all): ') + if showplot == '': + for plot in plots: + plt.show() + else: + plt.close('all') + plot.fig.show() + plt.show() diff --git a/adaptive_scheduler/simulation/plotutils.py b/adaptive_scheduler/simulation/plotutils.py index b5c6c8a7..323c1ef0 100644 --- a/adaptive_scheduler/simulation/plotutils.py +++ b/adaptive_scheduler/simulation/plotutils.py @@ -1,6 +1,13 @@ +""" +Plotting utility functions +""" import os +import logging +from datetime import datetime import numpy as np +import matplotlib.pyplot as plt +import opensearchpy from opensearchpy import OpenSearch PLOTEXPORT_DIR = os.getenv('PLOTEXPORT_DIR', 'adaptive_scheduler/simulation/plot_output') @@ -10,6 +17,38 @@ OPENSEARCH_INDEX = os.getenv('OPENSEARCH_INDEX', 'scheduler-simulations') opensearch_client = OpenSearch(OPENSEARCH_URL) +log = logging.getLogger(__name__) +log.setLevel(logging.DEBUG) + + +class Plot: + def __init__(self, plotfunc, *sim_ids): + """A wrapper class for plotting. The user specifies the plotting function to use + and the simulation ID(s) or search keywords. The data is passed to the plotting + function as a list of datasets, each set corresponding to an OpenSearch index. + The plotting function is responsible for accessing the right data keys. + + Args: + plotfunc: The plotting function to use. + sim_ids: The simulation IDs to look for on OpenSearch. + """ + self.plotfunc = plotfunc + # expects plotting functions to be called 'plot_some_plot_name' + self.name = plotfunc.__name__.replace('plot_', '') + self.data = [] + for sim_id in sim_ids: + self.data.append(get_opensearch_data(sim_id)) + + self.fig, self.description = plotfunc(self.data) + + def save(self): + timestamp = datetime.utcnow().isoformat(timespec='seconds') + savename = f'{self.name}_{timestamp}' + export_to_image(savename, self.fig) + + def show(self): + plt.show() + def export_to_image(fname, fig): """Takes a matplotlib Figure object and saves the figure. If the output @@ -17,21 +56,20 @@ def export_to_image(fname, fig): Args: fname (str): The filename to save the file as. - fig (matplotlib.pyplot.Figure): The figure to save, typically created by - calling subplots(). + fig (matplotlib.pyplot.Figure): The figure to save. """ try: os.mkdir(PLOTEXPORT_DIR) - print(f'Directory "{PLOTEXPORT_DIR}" created') + log.info(f'Directory "{PLOTEXPORT_DIR}" created') except FileExistsError: pass for imgformat in PLOTEXPORT_FORMATS: fpath = os.path.join(PLOTEXPORT_DIR, f'{fname}.{imgformat}') fig.savefig(fpath, format=imgformat) - print(f'Plot exported to {fpath}') + log.info(f'Plot exported to {fpath}') -def plot_barplot(ax, data, labels, binnames, barwidth=0.04): +def plot_barplot(ax, data, labels, binnames, barwidth): """Generates a barplot for multiple datasets. Args: @@ -47,26 +85,37 @@ def plot_barplot(ax, data, labels, binnames, barwidth=0.04): ax.set_xticks(ticks+barwidth*i/2, binnames) -def get_data_from_opensearch(query): - """Searches OpenSearch for a matching query (wildcards allowed) and returns the source data. +def get_opensearch_data(query): + """Gets a specific OpenSearch id and returns the source data. Tries to match the exact ID first, + then moves on to a keyword search (wildcards allowed) if the first search fails. Returns the most + recent index for the keyword search. Args: - query (str): The search query to look for. + query (str): The index to look for. Returns: source_data (dict): A dictionary of the data returned from OpenSearch. - None: Returns None if there are no results. """ - source_data = None - query = {'query': { - 'wildcard': {'simulation_id.keyword': query} - } - } - response = opensearch_client.search(query, OPENSEARCH_INDEX) try: - result = response['hits']['hits'][0] - source_data = result['_source'] - print(f'Got data for id: {source_data["simulation_id"]}') - except IndexError: - print(f'Found no results for {query}') + response = opensearch_client.get(OPENSEARCH_INDEX, query) + source_data = response['_source'] + log.debug(f'Got data for id: {source_data["simulation_id"]}') + except opensearchpy.exceptions.NotFoundError: + log.info(f'Index matching id:{query} not found, trying keyword search') + query = { + 'query': { + 'wildcard': {'simulation_id.keyword': query} + }, + 'sort': [ + {'record_time': {'order': 'desc'}} + ] + } + response = opensearch_client.search(query, OPENSEARCH_INDEX) + try: + result = response['hits']['hits'][0] + source_data = result['_source'] + log.debug(f'Got data for id: {source_data["simulation_id"]}') + except IndexError: + # give up + raise opensearchpy.exceptions.NotFoundError(f'No data found for {query}') return source_data From af0fa055559c3a42d8c191ba637573df8004ded0 Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Fri, 29 Jul 2022 09:51:58 -0700 Subject: [PATCH 116/165] merge plots --- adaptive_scheduler/simulation/plot_all.py | 131 ++++++++++++++++++ .../simulation/plot_effective_priority.py | 6 +- 2 files changed, 134 insertions(+), 3 deletions(-) diff --git a/adaptive_scheduler/simulation/plot_all.py b/adaptive_scheduler/simulation/plot_all.py index 11976584..b85f6541 100644 --- a/adaptive_scheduler/simulation/plot_all.py +++ b/adaptive_scheduler/simulation/plot_all.py @@ -331,6 +331,129 @@ def plot_pct_sched_bin_eff_prio(): ax.set_ylabel('Number of Requests') ax.set_ylim(0, 240) plt.show() + + +def plot_percent_sched_requests_bin_by_priority(): + fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(25, 12)) + fig.suptitle(f'1m0 Network Scheduler Metrics Binned by Priority', fontsize=20) + fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) + bardata1 = [] + for id in EFF_PRI_SCALING_TEST_LABELS: + priority_data = plotutils.get_data_from_opensearch(f'1m0-optimize-{id}')['percent_duration_by_priority'] + bardata1.append(list(priority_data[0].values())) + priorities = ['low priority', 'mid priority', 'high priority'] + plotutils.plot_barplot(ax1, bardata1, EFF_PRI_SCALING_TEST_LABELS, priorities) + ax1.set_xlabel('Priority') + ax1.set_ylabel('Percent Scheduled Time') + ax1.set_title('Percent Duration Scheduled') + ax1.legend() + bardata2 = [] + for id in EFF_PRI_SCALING_TEST_LABELS: + priority_data = plotutils.get_data_from_opensearch(f'1m0-optimize-{id}')['percent_sched_by_priority'] + bardata2.append(list(priority_data[0].values())) + priorities = ['low priority', 'mid priority', 'high priority'] + plotutils.plot_barplot(ax2, bardata2, EFF_PRI_SCALING_TEST_LABELS, priorities) + ax2.set_xlabel('Priority') + ax2.set_ylabel('Percent Scheduled Count') + ax2.set_title('Percent Number Scheduled') + ax2.legend() + plt.show() + + +VARIABLE = [ 'with-duration-v3', + 'no-duration-v3', + 'with-duration-scaled-100-v3', + 'no-duration-scaled-100-v3',] + +markers = ["o" , "," ,"v" , "^" , "<", ">"] +colors = ['r','b','c','m', 'y', 'k'] +def rand_jitter(arr): + stdev = .01 * (max(arr) - min(arr)) + return arr + np.random.randn(len(arr)) * stdev + +def plot_sched_priority_duration_dotplot(): + fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(28, 12)) + fig.suptitle(f'1m0 Distribution of Priority and Duration With Airmass Optimization', fontsize=20) + fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) + for i, id in enumerate(VARIABLE): + data = plotutils.get_data_from_opensearch(f'1m0-optimize-airmass-{id}') + if id in ['with-duration-scaled-100-v3', 'no-duration-scaled-100-v3']: + data['raw_scheduled_priorities'] = [(p+35)/4.5 for p in data['raw_scheduled_priorities']] + # print(id, len(data['raw_scheduled_priorities']), len(data['raw_unscheduled_priorities'])) + ax1.scatter(rand_jitter(data['raw_scheduled_priorities']), rand_jitter(data['raw_scheduled_durations']), + marker = markers[i],c = colors[i], s = 10, label = f'scheduled requests {id}',alpha = 0.3) + + + ax1.set_ylim(top=11000) + ax1.set_xlabel('Priority') + ax1.set_ylabel('Request Duration') + ax1.legend() + for i, id in enumerate(VARIABLE): + data = plotutils.get_data_from_opensearch(f'1m0-optimize-airmass-{id}') + if id in ['with-duration-scaled-100-v3', 'no-duration-scaled-100-v3']: + data['raw_unscheduled_priorities'] = [(p+35)/4.5 for p in data['raw_unscheduled_priorities']] + ax2.scatter(rand_jitter(data['raw_unscheduled_priorities']), rand_jitter(data['raw_unscheduled_durations']), + c =colors[i], marker=markers[i],s=10, label = f'unscheduled requests {id}', alpha = 0.3) + ax2.set_ylim(top=11000) + ax2.set_xlabel('Priority') + ax2.set_ylabel('Request Duration') + ax2.legend() + plt.show(block = False) + plt.show() + + +def plot_heat_map_priority_duration(): + fig, axs= plt.subplots(2, 2, figsize=(13, 12)) + fig.suptitle(f'1m0 Network Requests Heatmap With Airmass Optimization', fontsize=20) + fig.subplots_adjust(wspace=0.01, hspace=0.01, top=0.9) + ax_list = [axs[0,0],axs[0,1],axs[1,0], axs[1,1]] + for i, id in enumerate(VARIABLE): + data = plotutils.get_data_from_opensearch(f'1m0-optimize-airmass-{id}') + if id in ['with-duration-scaled-100-v3', 'no-duration-scaled-100-v3']: + data['raw_scheduled_priorities'] = [(p+35)/4.5 for p in data['raw_scheduled_priorities']] + data['raw_unscheduled_priorities'] = [(p+35)/4.5 for p in data['raw_unscheduled_priorities']] + sched_priorities = data['raw_scheduled_priorities'] + sched_durations = data['raw_scheduled_durations'] + unsched_priorities = data['raw_unscheduled_priorities'] + unsched_durations = data['raw_unscheduled_durations'] + level_1_bins = bin_data(sched_priorities, sched_durations, bin_size=4, bin_range=(10,30),aggregator=None) + level_2_bins = { + bin_key: bin_data(bin_values, bin_size=300, bin_range=(0, 1499)) | bin_data(bin_values, bin_size=3000, bin_range=(1500, 4000)) + for bin_key, bin_values in level_1_bins.items() + } + level_1_bins_unsched = bin_data(unsched_priorities, unsched_durations, bin_size=4, bin_range=(10,30),aggregator=None) + level_2_bins_unsched = { + bin_key: bin_data(bin_values, bin_size=300, bin_range=(0, 1499)) | bin_data(bin_values, bin_size=3000, bin_range=(1500, 4000)) + for bin_key, bin_values in level_1_bins_unsched.items() + } + heat_map_elements = [] + heat_map_elements_unsched = [] + for values in level_2_bins.values(): + heat_map_elements.append(list(values.values())) + for values in level_2_bins_unsched.values(): + heat_map_elements_unsched.append(list(values.values())) + priority_bins = list(level_2_bins.keys()) + duration_bins = ['0-5','5-10','10-15', '15-20', '20-25', '25&up'] + heat_map_elements = np.array(heat_map_elements) + heat_map_elements_unsched = np.array(heat_map_elements_unsched) + axis = ax_list[i] + cmap=plt.get_cmap('coolwarm') + cmap2 = plt.get_cmap('gray') + heatplot = axis.imshow(heat_map_elements,cmap=cmap) + axis.set_ylabel('Priority') + axis.set_xlabel('Duration (minutes)') + axis.set_xticks(np.arange(len(duration_bins)), labels=duration_bins) + axis.set_yticks(np.arange(len(priority_bins)), labels=priority_bins) + plt.setp(axis.get_xticklabels(), rotation=45, ha="right", + rotation_mode="anchor") + for i in range(len(priority_bins)): + for j in range(len(duration_bins)): + value = heat_map_elements[i, j] + text1 = axis.text(j, i, f'{heat_map_elements[i, j]}|{ heat_map_elements_unsched[i, j]}', + ha="center", va="center", fontsize='large', fontweight='semibold', color=cmap2(0.001/value)) + axis.set_title(f'{id} (sched|unsched)', fontweight='semibold') + fig.tight_layout() + plt.show() if __name__ == '__main__': @@ -362,6 +485,14 @@ def plot_pct_sched_bin_eff_prio(): 'desc': 'Scatterplot with (prio x duration) on y-axis and duration on x-axis'}, 'pct_sched_eff_prio_hist': {'func': plot_pct_sched_bin_eff_prio, 'desc': 'Histogram with (prio x duration) on x-axis and percentage scheduled by bin'}, + 'pct_sched_requests_bin_priority': {'func': plot_percent_sched_requests_bin_by_priority, + 'desc': 'Percent duration of requests scheduled binned by priority level for different ' + 'effective priority algorithms'}, + 'priority_duration_dotplot': {'func': plot_sched_priority_duration_dotplot, + 'desc': 'Dotplots of distribution of scheduled/unscheduled requests with (priority) on x-axis ' + 'and (duration) on y-axis'}, + 'heat_map_priority_duration': {'func': plot_heat_map_priority_duration, + 'desc': 'Heat map showing distribution of requests on priority and duration'}, } description = 'Plotting functions for airmass optimization experiment.' diff --git a/adaptive_scheduler/simulation/plot_effective_priority.py b/adaptive_scheduler/simulation/plot_effective_priority.py index bfb0aa82..e377e3cd 100644 --- a/adaptive_scheduler/simulation/plot_effective_priority.py +++ b/adaptive_scheduler/simulation/plot_effective_priority.py @@ -1,7 +1,7 @@ import numpy as np import matplotlib.pyplot as plt from opensearchpy import OpenSearch -from plotutils import get_data_from_opensearch, plot_barplot, default_colors +from plotutils import get_data_from_opensearch, plot_barplot EFF_PRI_CALC= ['airmass','airmass-with-duration-scaled-100','airmass-no-duration','airmass-no-duration-scaled-100',] @@ -18,7 +18,7 @@ def plot_percent_sched_duration_bin_by_priority(): bardata.append(list(priority_data[0].values())) priorities = ['low priority', 'mid priority', 'high priority'] - plot_barplot(ax, bardata, default_colors, EFF_PRI_CALC, priorities) + plot_barplot(ax, bardata, EFF_PRI_CALC, priorities) ax.set_xlabel('Priority') ax.set_ylabel('Percent Scheduled Time') fig.legend() @@ -37,7 +37,7 @@ def plot_percent_sched_numbers_bin_by_priority(): bardata.append(list(priority_data[0].values())) priorities = ['low priority', 'mid priority', 'high priority'] - plot_barplot(ax, bardata, default_colors, EFF_PRI_CALC, priorities) + plot_barplot(ax, bardata, EFF_PRI_CALC, priorities) ax.set_xlabel('Priority') ax.set_ylabel('Percent Scheduled Count') fig.legend() From 97b651827560101b498186f1de5d7ac78321ac80 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Fri, 29 Jul 2022 10:54:21 -0700 Subject: [PATCH 117/165] working UI for plotting program --- adaptive_scheduler/simulation/plotfuncs.py | 56 ++++++---- adaptive_scheduler/simulation/plots.py | 25 ++--- adaptive_scheduler/simulation/plotutils.py | 124 +++++++++++++++++---- 3 files changed, 142 insertions(+), 63 deletions(-) diff --git a/adaptive_scheduler/simulation/plotfuncs.py b/adaptive_scheduler/simulation/plotfuncs.py index 4bd67c8a..b3e7c833 100644 --- a/adaptive_scheduler/simulation/plotfuncs.py +++ b/adaptive_scheduler/simulation/plotfuncs.py @@ -19,24 +19,25 @@ matplotlib.rcParams['figure.subplot.top'] = 0.9 # spacing between plot and title -def plot_normed_airmass_histogram(airmass_datasets): - """Plots the distribution of airmass scores. The score is obtained by normalizing the - scheduled airmass, with 0 being the worst and 1 being the best. +def plot_airmass_difference_histogram(airmass_datasets, plot_title, normalize=False): + """Plots the difference of airmass from ideal. If normalize is turned on, then it scores + the airmasses with 0 being the worst (closest to bad airmass) and 1 being the best. Args: - airmass_data (list): Should be a list of datasets, each dataset corresponding + airmass_data [dict]: Should be a list of datasets, each dataset corresponding to a different airmass weighting coefficient. Assumes the first dataset passed - is the control dataset (airmass optimization turned off) + is the control dataset (airmass optimization turned off). + plot_title (str): The title of the plot. + normalize (bool): Determines if the airmass score is normalized. Returns: fig (matplotlib.pyplot.Figure): The output figure object. """ - plot_title = '1m Network Airmass Score Distribution for Scheduled Requests' fig, ax = plt.subplots() fig.suptitle(plot_title) numbins = 10 - normed = [] + data = [] labels = ['optimize by earliest'] for dataset in airmass_datasets: airmass_data = dataset['airmass_metrics']['raw_airmass_data'] @@ -44,32 +45,46 @@ def plot_normed_airmass_histogram(airmass_datasets): mp = np.array(airmass_data[0]['midpoint_airmasses']) a_min = np.array(airmass_data[1]['min_poss_airmasses']) a_max = np.array(airmass_data[2]['max_poss_airmasses']) - print(len(np.where(a_min == a_max)[0])) - # normalize = 1 - (mp-a_min)/(a_max-a_min) - # normed.append(normalize[np.where((normalize != 0) & (normalize != 1))]) - normed.append(mp-a_min) + if normalize: + normed = 1 - (mp-a_min)/(a_max-a_min) + data.append(normed[np.where((normed != 0) & (normed != 1))]) + else: + data.append(mp-a_min) # the first dataset is the control dataset if dataset is not airmass_datasets[0]: labels.append(airmass_coeff) - print(normed) - ax.hist(normed, bins=numbins, label=labels) + ax.hist(data, bins=numbins, label=labels) - ax.set_xlabel('Airmass Score (0 is worst, 1 is ideal)') + if normalize: + ax.set_xlabel('Airmass Score (0 is worst, 1 is ideal)') + else: + ax.set_xlabel('Difference from Ideal Airmass (0 is ideal)') ax.set_ylabel('Number of Scheduled Requests') ax.legend(title='Airmass Coefficient') - return fig, plot_title + return fig -def plot_pct_count_airmass_prio_bins(airmass_datasets): - plot_title = '1m Network Airmass Experiment Percent of Requests Scheduled' +def plot_pct_scheduled_airmass_binned_priority(airmass_datasets, plot_title): + """Plots the the percentage of requests scheduled for different airmass coefficients + binned into priority levels. + + Args: + airmass_data [dict]: Should be a list of datasets, each dataset corresponding + to a different airmass weighting coefficient. Assumes the first dataset passed + is the control dataset (airmass optimization turned off). + plot_title (str): The title of the plot. + + Returns: + fig (matplotlib.pyplot.Figure): The output figure object. + """ fig, ax = plt.subplots() fig.suptitle(plot_title) - barwidth = 0.4 + barwidth = 0.04 bardata = [] labels = ['optimize by earliest'] # get the bin names from the first dataset, the bins should be consistent across datasets - binnames = airmass_datasets[0]['percent_sched_by_priority'].keys() + binnames = airmass_datasets[0]['percent_sched_by_priority'][0].keys() for dataset in airmass_datasets: priority_data = dataset['percent_sched_by_priority'][0] airmass_coeff = dataset['airmass_weighting_coefficient'] @@ -83,5 +98,4 @@ def plot_pct_count_airmass_prio_bins(airmass_datasets): ax.set_ylabel('Percent of Requests Scheduled') ax.set_ylim(0, 100) ax.legend(title='Airmass Coefficient') - return fig, plot_title - + return fig diff --git a/adaptive_scheduler/simulation/plots.py b/adaptive_scheduler/simulation/plots.py index 3945d46b..fc3c5a7d 100644 --- a/adaptive_scheduler/simulation/plots.py +++ b/adaptive_scheduler/simulation/plots.py @@ -2,8 +2,6 @@ The interface for producing plots. To create plots, add plots to the list of plots, modifying the parameters to Plot as necessary. """ -import matplotlib.pyplot as plt - import adaptive_scheduler.simulation.plotfuncs as plotfuncs import adaptive_scheduler.simulation.plotutils as plotutils from adaptive_scheduler.simulation.plotutils import Plot @@ -22,22 +20,13 @@ ] plots = [ - Plot(plotfuncs.plot_normed_airmass_histogram, *airmass_experiment_ids), + Plot(plotfuncs.plot_airmass_difference_histogram, + '1m Network Airmass Score Distribution for Scheduled Requests', + *airmass_experiment_ids), + Plot(plotfuncs.plot_pct_scheduled_airmass_binned_priority, + '1m Network Airmass Experiment Percent of Requests Scheduled', + *airmass_experiment_ids), ] if __name__ == '__main__': - spacing = max([len(plot.name) for plot in plots]) + 10 - print('Available plots:') - print(f'{"Name":{spacing}}Description') - print(f'{"====":{spacing}}===========') - - for plot in plots: - print(f'{plot.name:{spacing}}{plot.description}') - showplot = input('Show plot (default all): ') - if showplot == '': - for plot in plots: - plt.show() - else: - plt.close('all') - plot.fig.show() - plt.show() + plotutils.run_user_interface(plots) diff --git a/adaptive_scheduler/simulation/plotutils.py b/adaptive_scheduler/simulation/plotutils.py index 323c1ef0..5e76f9ef 100644 --- a/adaptive_scheduler/simulation/plotutils.py +++ b/adaptive_scheduler/simulation/plotutils.py @@ -2,7 +2,8 @@ Plotting utility functions """ import os -import logging +import argparse +import readline from datetime import datetime import numpy as np @@ -10,45 +11,122 @@ import opensearchpy from opensearchpy import OpenSearch -PLOTEXPORT_DIR = os.getenv('PLOTEXPORT_DIR', 'adaptive_scheduler/simulation/plot_output') -PLOTEXPORT_FORMATS = ['jpg', 'pdf'] +DEFAULT_DIR = 'adaptive_scheduler/simulation/plot_output' OPENSEARCH_URL = os.getenv('OPENSEARCH_URL', 'https://logs.lco.global/') OPENSEARCH_INDEX = os.getenv('OPENSEARCH_INDEX', 'scheduler-simulations') opensearch_client = OpenSearch(OPENSEARCH_URL) -log = logging.getLogger(__name__) -log.setLevel(logging.DEBUG) +data_cache = {} + + +class AutoCompleter(object): + def __init__(self, options): + self.options = sorted(options) + + def complete(self, text, state): + if state == 0: + if text: + self.matches = [s for s in self.options if s and s.startswith(text)] + else: + self.matches = self.options[:] + + try: + return self.matches[state] + except IndexError: + return None + + +def run_user_interface(plots): + """Handles user interaction in the command line. + + Args: + plots [Plot]: A list of Plot objects. + """ + description = 'Plotting functions for scheduler simulator data visualization' + parser = argparse.ArgumentParser(description=description) + parser.add_argument('-s', '--save', help='Save the plot(s) to a file', action='store_true') + parser.add_argument('-f', '--format', help='The file format to save as', default='jpg') + parser.add_argument('-o', '--outputdir', help='The output directory to save to', default=DEFAULT_DIR) + args = parser.parse_args() + global export_dir + global export_format + export_dir = args.outputdir + export_format = args.format + + plot_dict = {plot.name: plot for plot in plots} + plot_names = list(plot_dict.keys()) + spacing = max([len(name) for name in plot_names]) + 10 + print('\nAvailable plots:') + print(f'\n{"Name":{spacing}}Description') + print(f'{"====":{spacing}}===========') + for plot in plots: + print(f'{plot.name:{spacing}}{plot.description}') + + completer = AutoCompleter(plot_names) + readline.set_completer(completer.complete) + readline.parse_and_bind('tab: complete') + while True: + showplot = input('\nShow plot (default all): ') + if showplot == '': + for plot in plots: + plot.generate() + if args.save: + plot.save() + plt.show() + break + else: + try: + plot = plot_dict[showplot] + plt.close('all') + plot.generate() + if args.save: + plot.save() + plot.fig.show() + plt.show() + break + except KeyError: + print('Plot name not found.') class Plot: - def __init__(self, plotfunc, *sim_ids): + def __init__(self, plotfunc, description, *sim_ids, **kwargs): """A wrapper class for plotting. The user specifies the plotting function to use and the simulation ID(s) or search keywords. The data is passed to the plotting function as a list of datasets, each set corresponding to an OpenSearch index. - The plotting function is responsible for accessing the right data keys. + The plotting function is responsible for accessing the right data keys. Data is cached + within the same run but not between runs. Args: - plotfunc: The plotting function to use. - sim_ids: The simulation IDs to look for on OpenSearch. + plotfunc (func): The plotting function to use. + description (str): The description of the plot. Will be used as the plot title in matplotlib. + sim_ids [str]: The simulation IDs to look for on OpenSearch. + kwargs: Optional arguments to pass to the plotting function. """ self.plotfunc = plotfunc + self.description = description # expects plotting functions to be called 'plot_some_plot_name' self.name = plotfunc.__name__.replace('plot_', '') + self.sim_ids = sim_ids + self.kwargs = kwargs + + def generate(self): self.data = [] - for sim_id in sim_ids: - self.data.append(get_opensearch_data(sim_id)) + for sim_id in self.sim_ids: + global data_cache + try: + self.data.append(data_cache[sim_id]) + except KeyError: + data_cache[sim_id] = get_opensearch_data(sim_id) + self.data.append(data_cache[sim_id]) - self.fig, self.description = plotfunc(self.data) + self.fig = self.plotfunc(self.data, self.description, **self.kwargs) def save(self): timestamp = datetime.utcnow().isoformat(timespec='seconds') savename = f'{self.name}_{timestamp}' export_to_image(savename, self.fig) - def show(self): - plt.show() - def export_to_image(fname, fig): """Takes a matplotlib Figure object and saves the figure. If the output @@ -58,15 +136,16 @@ def export_to_image(fname, fig): fname (str): The filename to save the file as. fig (matplotlib.pyplot.Figure): The figure to save. """ + global export_dir + global export_format try: - os.mkdir(PLOTEXPORT_DIR) - log.info(f'Directory "{PLOTEXPORT_DIR}" created') + os.mkdir(export_dir) + print(f'Directory "{export_dir}" created') except FileExistsError: pass - for imgformat in PLOTEXPORT_FORMATS: - fpath = os.path.join(PLOTEXPORT_DIR, f'{fname}.{imgformat}') - fig.savefig(fpath, format=imgformat) - log.info(f'Plot exported to {fpath}') + fpath = os.path.join(export_dir, f'{fname}.{export_format}') + fig.savefig(fpath, format=export_format) + print(f'Plot exported to {fpath}') def plot_barplot(ax, data, labels, binnames, barwidth): @@ -99,9 +178,7 @@ def get_opensearch_data(query): try: response = opensearch_client.get(OPENSEARCH_INDEX, query) source_data = response['_source'] - log.debug(f'Got data for id: {source_data["simulation_id"]}') except opensearchpy.exceptions.NotFoundError: - log.info(f'Index matching id:{query} not found, trying keyword search') query = { 'query': { 'wildcard': {'simulation_id.keyword': query} @@ -114,7 +191,6 @@ def get_opensearch_data(query): try: result = response['hits']['hits'][0] source_data = result['_source'] - log.debug(f'Got data for id: {source_data["simulation_id"]}') except IndexError: # give up raise opensearchpy.exceptions.NotFoundError(f'No data found for {query}') From 3e134fb5caff0ebd69afadc471571a1308ce2e52 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Fri, 29 Jul 2022 11:09:55 -0700 Subject: [PATCH 118/165] updated documentation --- adaptive_scheduler/simulation/plotutils.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/adaptive_scheduler/simulation/plotutils.py b/adaptive_scheduler/simulation/plotutils.py index 5e76f9ef..5607bf0e 100644 --- a/adaptive_scheduler/simulation/plotutils.py +++ b/adaptive_scheduler/simulation/plotutils.py @@ -22,12 +22,17 @@ class AutoCompleter(object): def __init__(self, options): + """Handles TAB autocomplete in the command line. + + Args: + options [str]: A list of possible autocomplete options. + """ self.options = sorted(options) def complete(self, text, state): if state == 0: if text: - self.matches = [s for s in self.options if s and s.startswith(text)] + self.matches = [s for s in self.options if s and text in s] else: self.matches = self.options[:] From e2310505bdf9879ddeb002fc459c8de4b8bd4a8b Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Fri, 29 Jul 2022 11:19:37 -0700 Subject: [PATCH 119/165] updated documentation, small change to barplotting function --- adaptive_scheduler/simulation/plotutils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/adaptive_scheduler/simulation/plotutils.py b/adaptive_scheduler/simulation/plotutils.py index 5607bf0e..3ea8ab64 100644 --- a/adaptive_scheduler/simulation/plotutils.py +++ b/adaptive_scheduler/simulation/plotutils.py @@ -153,7 +153,7 @@ def export_to_image(fname, fig): print(f'Plot exported to {fpath}') -def plot_barplot(ax, data, labels, binnames, barwidth): +def plot_barplot(ax, data, labels, binnames, barwidth=0.04): """Generates a barplot for multiple datasets. Args: @@ -165,7 +165,7 @@ def plot_barplot(ax, data, labels, binnames, barwidth): """ ticks = np.arange(len(data[0])) for i, datavalues in enumerate(data): - ax.bar(ticks+barwidth*i, datavalues, barwidth, label=labels[i], alpha=0.8) + ax.bar(ticks+barwidth*i, datavalues, barwidth, label=labels[i]) ax.set_xticks(ticks+barwidth*i/2, binnames) From ce444ec521a6551bd56ae881e439dc26bd4e6329 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Fri, 29 Jul 2022 14:50:57 -0700 Subject: [PATCH 120/165] fixed bug with duration aggregation --- adaptive_scheduler/simulation/metrics.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index 46e06888..895802f9 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -1,6 +1,7 @@ """ Metric calculation functions for the scheduler simulator. """ +import copy import logging import pickle from datetime import datetime, timedelta @@ -16,7 +17,7 @@ from adaptive_scheduler.utils import (time_in_capped_intervals, normalised_epoch_to_datetime, datetime_to_epoch, timeit) -from adaptive_scheduler.models import redis_instance, ICRSTarget +from adaptive_scheduler.models import redis_instance log = logging.getLogger('adaptive_scheduler') @@ -374,13 +375,13 @@ def binned_tac_priority_metrics(self): sched_priorities, unsched_priorities = self.get_priority_data() all_priorities = sched_priorities + unsched_priorities sched_histogram = bin_data(sched_priorities, bin_size=bin_size) - bin_sched_durations = bin_data(sched_priorities, sched_durations, bin_size) + bin_sched_durations = bin_data(sched_priorities, sched_durations, bin_size, aggregator=sum) full_histogram = bin_data(all_priorities, bin_size=bin_size) - bin_all_durations = bin_data(all_priorities, all_durations, bin_size) + bin_all_durations = bin_data(all_priorities, all_durations, bin_size, aggregator=sum) bin_percent_count = {bin_: percent_of(sched_histogram[bin_], full_histogram[bin_]) for bin_ in sched_histogram} - bin_percent_duration = {bin_: percent_of(bin_sched_durations[bin_], bin_all_durations[bin_]) - for bin_ in bin_sched_durations} + bin_percent_time = {bin_: percent_of(bin_sched_durations[bin_], bin_all_durations[bin_]) + for bin_ in bin_sched_durations} output_dict = { 'sched_histogram': sched_histogram, @@ -388,14 +389,15 @@ def binned_tac_priority_metrics(self): 'full_histogram': full_histogram, 'all_durations': bin_all_durations, 'percent_count': bin_percent_count, - 'percent_duration': bin_percent_duration, + 'percent_time': bin_percent_time } return output_dict def avg_slew_distance(self): semester_start = self.scheduler_runner.semester_details['start'] slew_distances = [] - for reservations in self.combined_schedule.values(): + schedule_copy = copy.deepcopy(self.combined_schedule) + for reservations in schedule_copy.values(): apparent_radecs = [] reservations.sort(key=lambda r: r.scheduled_start) for res in reservations: From 8debc402e4467691c80809b302ae35e0ff2e7df0 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Fri, 29 Jul 2022 15:29:12 -0700 Subject: [PATCH 121/165] small naming change --- adaptive_scheduler/simulation/orchestrator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adaptive_scheduler/simulation/orchestrator.py b/adaptive_scheduler/simulation/orchestrator.py index dd5b25ea..727059d2 100644 --- a/adaptive_scheduler/simulation/orchestrator.py +++ b/adaptive_scheduler/simulation/orchestrator.py @@ -130,7 +130,7 @@ def record_metrics(normal_scheduler_result, rr_scheduler_result, scheduler, sche 'total_req_by_priority': [binned_tac_priority_metrics['full_histogram']], 'total_seconds_by_priority': [binned_tac_priority_metrics['all_durations']], 'percent_sched_by_priority': [binned_tac_priority_metrics['percent_count']], - 'percent_duration_by_priority': [binned_tac_priority_metrics['percent_duration']], + 'percent_duration_by_priority': [binned_tac_priority_metrics['percent_time']], 'raw_scheduled_durations': sched_durations, 'raw_unscheduled_durations': unsched_durations, 'raw_scheduled_priorities': sched_priorities, From 88cc5462e791478e2818f80b371a4be5565dee1b Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Fri, 29 Jul 2022 15:29:44 -0700 Subject: [PATCH 122/165] ported some more functions from plot_all --- adaptive_scheduler/simulation/plotfuncs.py | 103 ++++++++++++++++++++- adaptive_scheduler/simulation/plots.py | 12 ++- 2 files changed, 111 insertions(+), 4 deletions(-) diff --git a/adaptive_scheduler/simulation/plotfuncs.py b/adaptive_scheduler/simulation/plotfuncs.py index b3e7c833..51f268a3 100644 --- a/adaptive_scheduler/simulation/plotfuncs.py +++ b/adaptive_scheduler/simulation/plotfuncs.py @@ -9,6 +9,7 @@ import matplotlib.style as style import adaptive_scheduler.simulation.plotutils as plotutils +import adaptive_scheduler.simulation.metrics as metrics # change default parameters for matplotlib here style.use('tableau-colorblind10') @@ -65,7 +66,7 @@ def plot_airmass_difference_histogram(airmass_datasets, plot_title, normalize=Fa def plot_pct_scheduled_airmass_binned_priority(airmass_datasets, plot_title): - """Plots the the percentage of requests scheduled for different airmass coefficients + """Plots the percentage of requests scheduled for different airmass coefficients binned into priority levels. Args: @@ -80,7 +81,6 @@ def plot_pct_scheduled_airmass_binned_priority(airmass_datasets, plot_title): fig, ax = plt.subplots() fig.suptitle(plot_title) - barwidth = 0.04 bardata = [] labels = ['optimize by earliest'] # get the bin names from the first dataset, the bins should be consistent across datasets @@ -92,10 +92,107 @@ def plot_pct_scheduled_airmass_binned_priority(airmass_datasets, plot_title): # the first dataset is the control dataset if dataset is not airmass_datasets[0]: labels.append(airmass_coeff) - plotutils.plot_barplot(ax, bardata, labels, binnames, barwidth) + plotutils.plot_barplot(ax, bardata, labels, binnames) ax.set_xlabel('Priority') ax.set_ylabel('Percent of Requests Scheduled') ax.set_ylim(0, 100) ax.legend(title='Airmass Coefficient') return fig + + +def plot_pct_time_scheduled_airmass_binned_priority(airmass_datasets, plot_title): + """Plots the percentage of requested time scheduled for different airmass coefficients + binned into priority levels. + + Args: + airmass_data [dict]: Should be a list of datasets, each dataset corresponding + to a different airmass weighting coefficient. Assumes the first dataset passed + is the control dataset (airmass optimization turned off). + plot_title (str): The title of the plot. + + Returns: + fig (matplotlib.pyplot.Figure): The output figure object. + """ + fig, ax = plt.subplots() + fig.suptitle(plot_title) + + bardata = [] + labels = ['optimize by earliest'] + # get the bin names from the first dataset, the bins should be consistent across datasets + binnames = airmass_datasets[0]['percent_sched_by_priority'][0].keys() + for dataset in airmass_datasets: + priority_data = dataset['percent_duration_by_priority'][0] + airmass_coeff = dataset['airmass_weighting_coefficient'] + bardata.append(list(priority_data.values())) + # the first dataset is the control dataset + if dataset is not airmass_datasets[0]: + labels.append(airmass_coeff) + plotutils.plot_barplot(ax, bardata, labels, binnames) + + ax.set_xlabel('Priority') + ax.set_ylabel('Percent of Requested Time Scheduled') + ax.set_ylim(0, 100) + ax.legend(title='Airmass Coefficient') + return fig + + +def plot_midpoint_airmass_histograms(airmass_datasets, plot_title): + """Plots a distribution of midpoint airmasses for each different airmass coefficient. + + Args: + airmass_data [dict]: Should be a list of datasets, each dataset corresponding + to a different airmass weighting coefficient. Assumes the first dataset passed + is the control dataset (airmass optimization turned off). + plot_title (str): The title of the plot. + + Returns: + fig (matplotlib.pyplot.Figure): The output figure object. + """ + fig = plt.figure(figsize=(16, 16)) + fig.suptitle(plot_title) + fig.subplots_adjust(wspace=0.3, hspace=0.3, top=0.92) + for i, dataset in enumerate(airmass_datasets[1:]): + ax = fig.add_subplot(3, 3, i+1) + midpoint_airmasses = dataset['airmass_metrics']['raw_airmass_data'][0]['midpoint_airmasses'] + airmass_coeff = dataset['airmass_weighting_coefficient'] + ax.hist(midpoint_airmasses, bins=50) + ax.set_title(f'Airmass Coefficient: {airmass_coeff}') + ax.set_xlabel('Midpoint Airmass') + ax.set_ylabel('Count') + ax.set_xlim(1.0, 2.0) + ax.set_ylim(0, 120) + return fig + + +def plot_eff_priority_duration_scatter(datasets, plot_title): + """Plots a scatterplot with effective priority on the y-axis and duration on the x-axis. + + Args: + datasets [dict]: A list of datasets. Expects one dataset for priority range 10-30 and one dataset + for priority scaled to 10-100. + plot_title (str): The title of the plot. + + Returns: + fig (matplotlib.pyplot.Figure): The output figure object. + """ + fig, axs = plt.subplots(1, 2, figsize=(24, 8)) + fig.suptitle(plot_title) + labels = ['Priority 10-30', 'Priority 10-100'] + # colors are from tableau-colorblind10 + colors = [('#006BA4', '#5F9ED1'), ('#C85200', '#FF800E')] + for i, ax in enumerate(axs): + data = datasets[i] + prio_scheduled = np.array(data['raw_scheduled_priorities']) + prio_unscheduled = np.array(data['raw_unscheduled_priorities']) + dur_scheduled = np.array(data['raw_scheduled_durations'])/60 + dur_unscheduled = np.array(data['raw_unscheduled_durations'])/60 + ax.scatter(dur_scheduled, prio_scheduled*dur_scheduled, + label='scheduled', marker='x', color=colors[i][0]) + ax.scatter(dur_unscheduled, prio_unscheduled*dur_unscheduled, + label='unscheduled', marker='x', alpha=0.5, color=colors[i][1]) + ax.set_ylabel('Effective Priority (base priority x duration)') + ax.set_xlabel('Duration [min]') + ax.set_title(f'Optimize by Airmass, With Duration, {labels[i]}') + ax.legend(title=labels[i]) + return fig diff --git a/adaptive_scheduler/simulation/plots.py b/adaptive_scheduler/simulation/plots.py index fc3c5a7d..088c8944 100644 --- a/adaptive_scheduler/simulation/plots.py +++ b/adaptive_scheduler/simulation/plots.py @@ -21,11 +21,21 @@ plots = [ Plot(plotfuncs.plot_airmass_difference_histogram, - '1m Network Airmass Score Distribution for Scheduled Requests', + '1m Network Airmass Difference Distribution for Scheduled Requests', *airmass_experiment_ids), Plot(plotfuncs.plot_pct_scheduled_airmass_binned_priority, '1m Network Airmass Experiment Percent of Requests Scheduled', *airmass_experiment_ids), + Plot(plotfuncs.plot_pct_time_scheduled_airmass_binned_priority, + '1m Network Airmass Experiment Percent of Requested Time Scheduled', + *airmass_experiment_ids), + Plot(plotfuncs.plot_midpoint_airmass_histograms, + '1m Network Airmass Experiment Midpoint Airmass Distributions', + *airmass_experiment_ids), + Plot(plotfuncs.plot_eff_priority_duration_scatter, + '1m Network Scatterplot of Effective Priority and Duration', + '1m0-optimize-airmass-with-duration-v2', + '1m0-optimize-airmass-with-duration-scaled-100-v2'), ] if __name__ == '__main__': From e6fa334c617b22f50c12a117585bf0ea6fb35c1b Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Fri, 29 Jul 2022 15:36:39 -0700 Subject: [PATCH 123/165] plot func --- adaptive_scheduler/simulation/plotfuncs.py | 118 +++++++++++++++++++++ 1 file changed, 118 insertions(+) diff --git a/adaptive_scheduler/simulation/plotfuncs.py b/adaptive_scheduler/simulation/plotfuncs.py index b3e7c833..1f148a53 100644 --- a/adaptive_scheduler/simulation/plotfuncs.py +++ b/adaptive_scheduler/simulation/plotfuncs.py @@ -9,6 +9,7 @@ import matplotlib.style as style import adaptive_scheduler.simulation.plotutils as plotutils +from adaptive_scheduler.simulation.metrics import bin_data # change default parameters for matplotlib here style.use('tableau-colorblind10') @@ -99,3 +100,120 @@ def plot_pct_scheduled_airmass_binned_priority(airmass_datasets, plot_title): ax.set_ylim(0, 100) ax.legend(title='Airmass Coefficient') return fig + + +def plot_percent_sched_requests_bin_by_priority(eff_pri_datasets, plot_title): + fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(25, 12)) + fig.suptitle(plot_title) + fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) + bardata1 = [] + labels1 = [] + for dataset in eff_pri_datasets: + bardata1.append(list(dataset['percent_duration_by_priority'][0].values())) + labels1.append(dataset['simulation_id'][21:-3]) + priorities = ['low priority', 'mid priority', 'high priority'] + plotutils.plot_barplot(ax1, bardata1, labels1, priorities) + ax1.set_xlabel('Priority') + ax1.set_ylabel('Percent Scheduled Time') + ax1.set_title('Percent Duration Scheduled') + ax1.legend() + bardata2 = [] + labels2 = [] + for dataset in eff_pri_datasets: + bardata2.append(list(dataset['percent_sched_by_priority'][0].values())) + labels2.append(dataset['simulation_id'][21:]) + priorities = ['low priority', 'mid priority', 'high priority'] + plotutils.plot_barplot(ax2, bardata2, labels2, priorities) + ax2.set_xlabel('Priority') + ax2.set_ylabel('Percent Scheduled Count') + ax2.set_title('Percent Number Scheduled') + ax2.legend() + plt.show() + + +def rand_jitter(arr): + stdev = .01 * (max(arr) - min(arr)) + return arr + np.random.randn(len(arr)) * stdev + +def plot_sched_priority_duration_dotplot(eff_pri_datasets, plot_title): + markers = ["o" , "," ,"v" , "^" , "<", ">"] + colors = ['r','b','c','m', 'y', 'k'] + fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(28, 12)) + fig.suptitle(plot_title) + fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) + for i, data in enumerate(eff_pri_datasets): + id = data['simulation_id'][21:] + if id in ['with-duration-scaled-100-v3', 'no-duration-scaled-100-v3']: + data['raw_scheduled_priorities'] = [(p+35)/4.5 for p in data['raw_scheduled_priorities']] + ax1.scatter(rand_jitter(data['raw_scheduled_priorities']), rand_jitter(data['raw_scheduled_durations']), + marker = markers[i],c = colors[i], s = 10, label = f'scheduled requests {id}',alpha = 0.3) + ax1.set_ylim(top=11000) + ax1.set_xlabel('Priority') + ax1.set_ylabel('Request Duration') + ax1.legend() + for i, data in enumerate(eff_pri_datasets): + id = data['simulation_id'][21:] + if id in ['with-duration-scaled-100-v3', 'no-duration-scaled-100-v3']: + data['raw_unscheduled_priorities'] = [(p+35)/4.5 for p in data['raw_unscheduled_priorities']] + ax2.scatter(rand_jitter(data['raw_unscheduled_priorities']), rand_jitter(data['raw_unscheduled_durations']), + c =colors[i], marker=markers[i],s=10, label = f'unscheduled requests {id}', alpha = 0.3) + ax2.set_ylim(top=11000) + ax2.set_xlabel('Priority') + ax2.set_ylabel('Request Duration') + ax2.legend() + plt.show(block = False) + plt.show() + + +def plot_heat_map_priority_duration(eff_pri_datasets, plot_title): + fig, axs= plt.subplots(2, 2, figsize=(13, 12)) + fig.suptitle(plot_title) + fig.subplots_adjust(wspace=0.01, hspace=0.01, top=0.9) + ax_list = [axs[0,0],axs[0,1],axs[1,0], axs[1,1]] + for i, data in enumerate(eff_pri_datasets): + id = data['simulation_id'][21:] + if id in ['with-duration-scaled-100-v3', 'no-duration-scaled-100-v3']: + data['raw_scheduled_priorities'] = [(p+35)/4.5 for p in data['raw_scheduled_priorities']] + data['raw_unscheduled_priorities'] = [(p+35)/4.5 for p in data['raw_unscheduled_priorities']] + sched_priorities = data['raw_scheduled_priorities'] + sched_durations = data['raw_scheduled_durations'] + unsched_priorities = data['raw_unscheduled_priorities'] + unsched_durations = data['raw_unscheduled_durations'] + level_1_bins = bin_data(sched_priorities, sched_durations, bin_size=4, bin_range=(10,30),aggregator=None) + level_2_bins = { + bin_key: bin_data(bin_values, bin_size=300, bin_range=(0, 1499)) | bin_data(bin_values, bin_size=3000, bin_range=(1500, 4000)) + for bin_key, bin_values in level_1_bins.items() + } + level_1_bins_unsched = bin_data(unsched_priorities, unsched_durations, bin_size=4, bin_range=(10,30),aggregator=None) + level_2_bins_unsched = { + bin_key: bin_data(bin_values, bin_size=300, bin_range=(0, 1499)) | bin_data(bin_values, bin_size=3000, bin_range=(1500, 4000)) + for bin_key, bin_values in level_1_bins_unsched.items() + } + heat_map_elements = [] + heat_map_elements_unsched = [] + for values in level_2_bins.values(): + heat_map_elements.append(list(values.values())) + for values in level_2_bins_unsched.values(): + heat_map_elements_unsched.append(list(values.values())) + priority_bins = list(level_2_bins.keys()) + duration_bins = ['0-5','5-10','10-15', '15-20', '20-25', '25&up'] + heat_map_elements = np.array(heat_map_elements) + heat_map_elements_unsched = np.array(heat_map_elements_unsched) + axis = ax_list[i] + cmap=plt.get_cmap('coolwarm') + cmap2 = plt.get_cmap('gray') + heatplot = axis.imshow(heat_map_elements,cmap=cmap) + axis.set_ylabel('Priority') + axis.set_xlabel('Duration (minutes)') + axis.set_xticks(np.arange(len(duration_bins)), labels=duration_bins) + axis.set_yticks(np.arange(len(priority_bins)), labels=priority_bins) + plt.setp(axis.get_xticklabels(), rotation=45, ha="right", + rotation_mode="anchor") + for i in range(len(priority_bins)): + for j in range(len(duration_bins)): + value = heat_map_elements[i, j] + text1 = axis.text(j, i, f'{heat_map_elements[i, j]}|{ heat_map_elements_unsched[i, j]}', + ha="center", va="center", fontsize='large', fontweight='semibold', color=cmap2(0.001/value)) + axis.set_title(f'{id} (sched|unsched)', fontweight='semibold') + fig.tight_layout() + plt.show() \ No newline at end of file From c362f32a017d3f253bc71c7e0a16f309c4abb7ac Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Fri, 29 Jul 2022 15:37:26 -0700 Subject: [PATCH 124/165] plot func --- adaptive_scheduler/simulation/plots.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/adaptive_scheduler/simulation/plots.py b/adaptive_scheduler/simulation/plots.py index fc3c5a7d..0adf0b69 100644 --- a/adaptive_scheduler/simulation/plots.py +++ b/adaptive_scheduler/simulation/plots.py @@ -19,6 +19,14 @@ '1m0-simulation-real-airmass-coeff-1000000-1', ] +effective_priority_experiment_ids = [ + '1m0-optimize-airmass-with-duration-v3', + '1m0-optimize-airmass-no-duration-v3', + '1m0-optimize-airmass-with-duration-scaled-100-v3', + '1m0-optimize-airmass-no-duration-scaled-100-v3', +] + + plots = [ Plot(plotfuncs.plot_airmass_difference_histogram, '1m Network Airmass Score Distribution for Scheduled Requests', @@ -26,6 +34,15 @@ Plot(plotfuncs.plot_pct_scheduled_airmass_binned_priority, '1m Network Airmass Experiment Percent of Requests Scheduled', *airmass_experiment_ids), + Plot(plotfuncs.plot_percent_sched_requests_bin_by_priority, + '1m0 Network Scheduler Metrics Binned by Priority', + *effective_priority_experiment_ids), + Plot(plotfuncs.plot_sched_priority_duration_dotplot, + '1m0 Distribution of Priority and Duration With Airmass Optimization', + *effective_priority_experiment_ids), + Plot(plotfuncs.plot_heat_map_priority_duration, + '1m0 Network Requests Heatmap With Airmass Optimization', + effective_priority_experiment_ids), ] if __name__ == '__main__': From 91c32d005449285ceb11452a057b37bb795815fa Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Mon, 1 Aug 2022 11:03:57 -0700 Subject: [PATCH 125/165] added window duration calculation --- adaptive_scheduler/simulation/metrics.py | 33 +++++++++++-------- adaptive_scheduler/simulation/orchestrator.py | 1 + 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index 895802f9..15dc15d7 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -14,9 +14,7 @@ from rise_set import astrometry from adaptive_scheduler.observation_portal_connections import ObservationPortalConnectionError -from adaptive_scheduler.utils import (time_in_capped_intervals, - normalised_epoch_to_datetime, - datetime_to_epoch, timeit) +from adaptive_scheduler.utils import time_in_capped_intervals, normalised_epoch_to_datetime, datetime_to_epoch from adaptive_scheduler.models import redis_instance log = logging.getLogger('adaptive_scheduler') @@ -193,19 +191,16 @@ def _combine_normal_rr_input_reservations(self): reservations = [res for res in comp_res.reservation_list if res not in self.combined_input_reservations] self.combined_input_reservations.extend(reservations) - @timeit def count_scheduled(self): scheduled_reservations = [] for reservations in self.combined_schedule.values(): scheduled_reservations.extend(reservations) return len(scheduled_reservations), len(self.combined_input_reservations) - @timeit def percent_reservations_scheduled(self): scheduled, total = self.count_scheduled() return percent_of(scheduled, total) - @timeit def total_scheduled_eff_priority(self): effective_priorities = [] for reservations in self.combined_schedule.values(): @@ -231,12 +226,25 @@ def get_priority_data(self): sched_priorities = [priorities_by_rg_id[rg_id] for rg_id in sched_rg_ids] unsched_priorities = [priorities_by_rg_id[rg_id] for rg_id in unsched_rg_ids] # uncomment to remap the priorities - scale = (100, 10, 30, 10) - sched_priorities = [scalefunc(p, *scale) for p in sched_priorities] - unsched_priorities = [scalefunc(p, *scale) for p in unsched_priorities] + # note: adjust bin size accordingly + # scale = (100, 10, 30, 10) + # sched_priorities = [scalefunc(p, *scale) for p in sched_priorities] + # unsched_priorities = [scalefunc(p, *scale) for p in unsched_priorities] return sched_priorities, unsched_priorities - @timeit + def get_window_duration_data(self): + sched_window_durations = [] + for res in self.combined_input_reservations: + if res.scheduled: + windows = res.request.windows + # get the data format to a list, each element is a list corresponding to a resource + windows_list = list(windows.windows_for_resource.values()) + window_durations = [] + for loc in windows_list: + window_durations.extend([(w.end-w.start).total_seconds() for w in loc]) + sched_window_durations.append(max(window_durations)) + return sched_window_durations + def total_available_seconds(self): """Aggregates the total available time, calculated from dark intervals. @@ -259,7 +267,6 @@ def total_available_seconds(self): total_available_time += available_time return total_available_time - @timeit def percent_time_utilization(self): scheduled_durations, _ = self.get_duration_data() return percent_of(sum(scheduled_durations), self.total_available_seconds()) @@ -325,7 +332,6 @@ def _get_midpoint_airmasses_by_site(self, airmass_data, midpoint_time): midpoint_airmasses[site] = airmasses[np.argmin(np.abs(times-midpoint_time))] return midpoint_airmasses - @timeit def airmass_metrics(self, schedule=None): """Generate the airmass metrics of all scheduled reservations for a single schedule. @@ -364,10 +370,9 @@ def airmass_metrics(self, schedule=None): } return airmass_metrics - @timeit def binned_tac_priority_metrics(self): """Bins metrics based on TAC priority.""" - bin_size = 45 + bin_size = 10 sched_durations, unsched_durations = self.get_duration_data() all_durations = sched_durations + unsched_durations diff --git a/adaptive_scheduler/simulation/orchestrator.py b/adaptive_scheduler/simulation/orchestrator.py index 727059d2..574de6bc 100644 --- a/adaptive_scheduler/simulation/orchestrator.py +++ b/adaptive_scheduler/simulation/orchestrator.py @@ -131,6 +131,7 @@ def record_metrics(normal_scheduler_result, rr_scheduler_result, scheduler, sche 'total_seconds_by_priority': [binned_tac_priority_metrics['all_durations']], 'percent_sched_by_priority': [binned_tac_priority_metrics['percent_count']], 'percent_duration_by_priority': [binned_tac_priority_metrics['percent_time']], + 'raw_window_durations': metrics.get_window_duration_data(), 'raw_scheduled_durations': sched_durations, 'raw_unscheduled_durations': unsched_durations, 'raw_scheduled_priorities': sched_priorities, From d2f0e40555d82e1b27716771af3de64da2fc4223 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Mon, 1 Aug 2022 15:41:24 -0700 Subject: [PATCH 126/165] more airmass plots --- adaptive_scheduler/simulation/plotfuncs.py | 137 ++++++++++++++++++++- adaptive_scheduler/simulation/plots.py | 16 ++- 2 files changed, 150 insertions(+), 3 deletions(-) diff --git a/adaptive_scheduler/simulation/plotfuncs.py b/adaptive_scheduler/simulation/plotfuncs.py index 51f268a3..4dc74577 100644 --- a/adaptive_scheduler/simulation/plotfuncs.py +++ b/adaptive_scheduler/simulation/plotfuncs.py @@ -66,7 +66,7 @@ def plot_airmass_difference_histogram(airmass_datasets, plot_title, normalize=Fa def plot_pct_scheduled_airmass_binned_priority(airmass_datasets, plot_title): - """Plots the percentage of requests scheduled for different airmass coefficients + """Plots a barplot of the percentage of requests scheduled for different airmass coefficients binned into priority levels. Args: @@ -137,6 +137,117 @@ def plot_pct_time_scheduled_airmass_binned_priority(airmass_datasets, plot_title return fig +def plot_pct_scheduled_airmass_lineplot(airmass_datasets, plot_title): + """Plots a line chart with percent of requests scheduled on the y-axis and airmass + coefficient on the x-axis. The priority bins are highlighted in different colors. + + Args: + airmass_data [dict]: Should be a list of datasets, each dataset corresponding + to a different airmass weighting coefficient. Assumes the first dataset passed + is the control dataset (airmass optimization turned off). + plot_title (str): The title of the plot. + + Returns: + fig (matplotlib.pyplot.Figure): The output figure object. + """ + fig, ax = plt.subplots() + fig.suptitle(plot_title) + + prio_names = list(airmass_datasets[0]['percent_sched_by_priority'][0].keys()) + airmass_coeffs = [] + pct_scheduled = [] + # exclude the control dataset + for dataset in airmass_datasets[1:]: + data_by_priority = dataset['percent_sched_by_priority'][0] + airmass_coeffs.append(dataset['airmass_weighting_coefficient']) + pct_scheduled.append(list(data_by_priority.values())) + data_by_airmass = np.array(pct_scheduled).transpose() + for i, data in enumerate(data_by_airmass): + ax.plot(airmass_coeffs, data, label=prio_names[i]) + ax.set_xlabel('Airmass Coefficient') + ax.set_ylabel('Percent of Requests Scheduled') + ax.set_ylim(0, 100) + ax.legend(title='Priority') + return fig + + +def plot_pct_time_scheduled_airmass_lineplot(airmass_datasets, plot_title): + """Plots a line chart with percent of requested time scheduled on the y-axis and airmass + coefficient on the x-axis. The priority bins are highlighted in different colors. + + Args: + airmass_data [dict]: Should be a list of datasets, each dataset corresponding + to a different airmass weighting coefficient. Assumes the first dataset passed + is the control dataset (airmass optimization turned off). + plot_title (str): The title of the plot. + + Returns: + fig (matplotlib.pyplot.Figure): The output figure object. + """ + fig, ax = plt.subplots() + fig.suptitle(plot_title) + + prio_names = list(airmass_datasets[0]['percent_duration_by_priority'][0].keys()) + prio_names.append('all') + airmass_coeffs = [] + pct_scheduled = [] + # exclude the control dataset + for dataset in airmass_datasets[1:]: + sched_by_priority = np.array(list(dataset['scheduled_seconds_by_priority'][0].values())) + total_by_priority = np.array(list(dataset['total_seconds_by_priority'][0].values())) + airmass_coeffs.append(dataset['airmass_weighting_coefficient']) + pct_by_priority = sched_by_priority/total_by_priority * 100 + pct_cumulative = np.sum(sched_by_priority)/np.sum(total_by_priority) * 100 + pct_scheduled.append(np.append(pct_by_priority, pct_cumulative)) + data_by_airmass = np.array(pct_scheduled).transpose() + for i, data in enumerate(data_by_airmass): + ax.plot(airmass_coeffs, data, label=prio_names[i]) + ax.set_xlabel('Airmass Coefficient') + ax.set_ylabel('Percent of Requested Time Scheduled') + ax.set_ylim(0, 100) + ax.legend(title='Priority') + return fig + + +def plot_pct_time_scheduled_out_of_available(airmass_datasets, plot_title): + """Plots a line chart with percent of requested time scheduled out of all availabel time + on the y-axis and airmass coefficient on the x-axis. The priority bins are highlighted + in different colors. + + Args: + airmass_data [dict]: Should be a list of datasets, each dataset corresponding + to a different airmass weighting coefficient. Assumes the first dataset passed + is the control dataset (airmass optimization turned off). + plot_title (str): The title of the plot. + + Returns: + fig (matplotlib.pyplot.Figure): The output figure object. + """ + fig, ax = plt.subplots() + fig.suptitle(plot_title) + + prio_names = list(airmass_datasets[0]['percent_duration_by_priority'][0].keys()) + prio_names.append('all') + airmass_coeffs = [] + pct_scheduled = [] + # exclude the control dataset + for dataset in airmass_datasets[1:]: + sched_by_priority = np.array(list(dataset['scheduled_seconds_by_priority'][0].values())) + available_time = dataset['total_available_seconds'] + airmass_coeffs.append(dataset['airmass_weighting_coefficient']) + pct_by_priority = sched_by_priority/available_time * 100 + pct_cumulative = np.sum(sched_by_priority)/available_time * 100 + pct_scheduled.append(np.append(pct_by_priority, pct_cumulative)) + data_by_airmass = np.array(pct_scheduled).transpose() + for i, data in enumerate(data_by_airmass): + ax.plot(airmass_coeffs, data, label=prio_names[i]) + ax.set_xlabel('Airmass Coefficient') + ax.set_ylabel('Percent of Requested Time Scheduled') + ax.set_ylim(0, 100) + ax.legend(title='Priority') + return fig + + def plot_midpoint_airmass_histograms(airmass_datasets, plot_title): """Plots a distribution of midpoint airmasses for each different airmass coefficient. @@ -196,3 +307,27 @@ def plot_eff_priority_duration_scatter(datasets, plot_title): ax.set_title(f'Optimize by Airmass, With Duration, {labels[i]}') ax.legend(title=labels[i]) return fig + + +def plot_duration_by_window_duration_scatter(data, plot_title): + """Plots a scatterplot with observation duration on the y-axis and maximum window length per + observation on the x-axis. + + Args: + data (dict): The dataset for this metric. Expects one dataset. + plot_title (str): The title of the plot. + + Returns: + fig (matplotlib.pyploy.Figure): The output Figure object. + """ + fig, ax = plt.subplots() + fig.suptitle(plot_title) + data = data[0] + sec_to_min = 1/60 + window_dur = np.array(data['raw_window_durations']) * sec_to_min + sched_dur = np.array(data['raw_scheduled_durations']) * sec_to_min + ax.scatter(window_dur, sched_dur, s=4) + ax.set_ylabel('Request Duration [min]') + ax.set_xlabel('Longest Possible Window Duration [min]') + + return fig diff --git a/adaptive_scheduler/simulation/plots.py b/adaptive_scheduler/simulation/plots.py index 088c8944..1d1fbf51 100644 --- a/adaptive_scheduler/simulation/plots.py +++ b/adaptive_scheduler/simulation/plots.py @@ -24,10 +24,19 @@ '1m Network Airmass Difference Distribution for Scheduled Requests', *airmass_experiment_ids), Plot(plotfuncs.plot_pct_scheduled_airmass_binned_priority, - '1m Network Airmass Experiment Percent of Requests Scheduled', + '1m Network Airmass Experiment Percent of Requests Scheduled per Priority Class', + *airmass_experiment_ids), + Plot(plotfuncs.plot_pct_scheduled_airmass_lineplot, + '1m Network Airmass Experiment Percent of Requests Scheduled per Priority Class', *airmass_experiment_ids), Plot(plotfuncs.plot_pct_time_scheduled_airmass_binned_priority, - '1m Network Airmass Experiment Percent of Requested Time Scheduled', + '1m Network Airmass Experiment Percent of Requested Time Scheduled per Priority Class', + *airmass_experiment_ids), + Plot(plotfuncs.plot_pct_time_scheduled_airmass_lineplot, + '1m Network Airmass Experiment Percent of Requested Time Scheduled per Priority Class', + *airmass_experiment_ids), + Plot(plotfuncs.plot_pct_time_scheduled_out_of_available, + '1m Network Airmass Experiment Percent of Requested Time Scheduled out of Available Time', *airmass_experiment_ids), Plot(plotfuncs.plot_midpoint_airmass_histograms, '1m Network Airmass Experiment Midpoint Airmass Distributions', @@ -36,6 +45,9 @@ '1m Network Scatterplot of Effective Priority and Duration', '1m0-optimize-airmass-with-duration-v2', '1m0-optimize-airmass-with-duration-scaled-100-v2'), + Plot(plotfuncs.plot_duration_by_window_duration_scatter, + '1m Network Scatterplot of Duration and Window Duration', + 'window-duration'), ] if __name__ == '__main__': From f19a680c6c78976d514318ac4c968004db2b985e Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Mon, 1 Aug 2022 15:42:03 -0700 Subject: [PATCH 127/165] bugfix for misindented break statement, slight input cleaning --- adaptive_scheduler/simulation/plotutils.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/adaptive_scheduler/simulation/plotutils.py b/adaptive_scheduler/simulation/plotutils.py index 3ea8ab64..0d55fa5d 100644 --- a/adaptive_scheduler/simulation/plotutils.py +++ b/adaptive_scheduler/simulation/plotutils.py @@ -50,9 +50,9 @@ def run_user_interface(plots): """ description = 'Plotting functions for scheduler simulator data visualization' parser = argparse.ArgumentParser(description=description) - parser.add_argument('-s', '--save', help='Save the plot(s) to a file', action='store_true') - parser.add_argument('-f', '--format', help='The file format to save as', default='jpg') - parser.add_argument('-o', '--outputdir', help='The output directory to save to', default=DEFAULT_DIR) + parser.add_argument('-s', '--save', help='save the plot(s) to a file', action='store_true') + parser.add_argument('-f', '--format', help='the file format to save as', default='jpg') + parser.add_argument('-o', '--outputdir', help='the output directory to save to', default=DEFAULT_DIR) args = parser.parse_args() global export_dir global export_format @@ -72,14 +72,14 @@ def run_user_interface(plots): readline.set_completer(completer.complete) readline.parse_and_bind('tab: complete') while True: - showplot = input('\nShow plot (default all): ') + showplot = input('\nShow plot (default all): ').strip() if showplot == '': for plot in plots: plot.generate() if args.save: plot.save() plt.show() - break + break else: try: plot = plot_dict[showplot] From fc47c2cab4a3d24eb8be419438200a44f728ed10 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Mon, 1 Aug 2022 16:11:29 -0700 Subject: [PATCH 128/165] small modification to error handling, renamed plot_barplot to plot_multi_barplot --- adaptive_scheduler/simulation/plotutils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/adaptive_scheduler/simulation/plotutils.py b/adaptive_scheduler/simulation/plotutils.py index 0d55fa5d..f3b25feb 100644 --- a/adaptive_scheduler/simulation/plotutils.py +++ b/adaptive_scheduler/simulation/plotutils.py @@ -90,8 +90,8 @@ def run_user_interface(plots): plot.fig.show() plt.show() break - except KeyError: - print('Plot name not found.') + except KeyError as e: + print(f'Plot name not found: {e}') class Plot: @@ -153,7 +153,7 @@ def export_to_image(fname, fig): print(f'Plot exported to {fpath}') -def plot_barplot(ax, data, labels, binnames, barwidth=0.04): +def plot_multi_barplot(ax, data, labels, binnames, barwidth=0.04): """Generates a barplot for multiple datasets. Args: From 72ba3c1a16ef7fc8215d1a24805b18df74a64a22 Mon Sep 17 00:00:00 2001 From: Qingze Wu Date: Mon, 1 Aug 2022 16:37:27 -0700 Subject: [PATCH 129/165] simulation --- adaptive_scheduler/models.py | 2 +- adaptive_scheduler/simulation/plotfuncs.py | 27 ++++++++++- adaptive_scheduler/simulation/plots.py | 55 ++++++++++++---------- 3 files changed, 55 insertions(+), 29 deletions(-) diff --git a/adaptive_scheduler/models.py b/adaptive_scheduler/models.py index 4b90db3c..5f1e46e3 100644 --- a/adaptive_scheduler/models.py +++ b/adaptive_scheduler/models.py @@ -439,7 +439,7 @@ def __init__(self, configurations, windows, request_id, state='PENDING', telesco self.telescope_class = telescope_class self.req_duration = duration self.configuration_repeats = configuration_repeats - self.optimization_type = optimization_type + self.optimization_type = 'AIRMASS' self.scheduled_reservation = scheduled_reservation def get_duration(self): diff --git a/adaptive_scheduler/simulation/plotfuncs.py b/adaptive_scheduler/simulation/plotfuncs.py index 4dc74577..8d23f31f 100644 --- a/adaptive_scheduler/simulation/plotfuncs.py +++ b/adaptive_scheduler/simulation/plotfuncs.py @@ -92,7 +92,7 @@ def plot_pct_scheduled_airmass_binned_priority(airmass_datasets, plot_title): # the first dataset is the control dataset if dataset is not airmass_datasets[0]: labels.append(airmass_coeff) - plotutils.plot_barplot(ax, bardata, labels, binnames) + plotutils.plot_multi_barplot(ax, bardata, labels, binnames) ax.set_xlabel('Priority') ax.set_ylabel('Percent of Requests Scheduled') @@ -128,7 +128,7 @@ def plot_pct_time_scheduled_airmass_binned_priority(airmass_datasets, plot_title # the first dataset is the control dataset if dataset is not airmass_datasets[0]: labels.append(airmass_coeff) - plotutils.plot_barplot(ax, bardata, labels, binnames) + plotutils.plot_multi_barplot(ax, bardata, labels, binnames) ax.set_xlabel('Priority') ax.set_ylabel('Percent of Requested Time Scheduled') @@ -331,3 +331,26 @@ def plot_duration_by_window_duration_scatter(data, plot_title): ax.set_xlabel('Longest Possible Window Duration [min]') return fig + + +def plot_input_duration_binned_priority(dataset, plot_title): + fig, ax = plt.subplots() + fig.suptitle(plot_title) + bardata = [] + input_durations = dataset[0]['raw_scheduled_durations'] + dataset[0]['raw_unscheduled_durations'] + input_priorities = dataset[0]['raw_scheduled_priorities'] + dataset[0]['raw_unscheduled_priorities'] + input_bins = metrics.bin_data(input_priorities, input_durations, bin_size=10, bin_range=(10,30),aggregator=None) + duration_bins = { + bin_key: metrics.bin_data(bin_values, bin_size=300, bin_range=(0, 1499)) | metrics.bin_data(bin_values, bin_size=10000, bin_range=(1500, 10000)) + for bin_key, bin_values in input_bins.items() + } + labels = ['10-19', '20-29', '30'] + for values in duration_bins.values(): + bardata.append(list(values.values())) + binnames = ['0-5','5-10','10-15', '15-20', '20-25', '25&up'] + plotutils.plot_multi_barplot(ax, bardata, labels, binnames) + ax.set_xlabel('Duration (minutes)') + ax.set_ylabel('Input reservation counts') + ax.set_ylim(0, 600) + ax.legend(title='Priority') + return fig diff --git a/adaptive_scheduler/simulation/plots.py b/adaptive_scheduler/simulation/plots.py index 1d1fbf51..6528b026 100644 --- a/adaptive_scheduler/simulation/plots.py +++ b/adaptive_scheduler/simulation/plots.py @@ -20,34 +20,37 @@ ] plots = [ - Plot(plotfuncs.plot_airmass_difference_histogram, + Plot(plotfuncs.plot_airmass_difference_histogram, '1m Network Airmass Difference Distribution for Scheduled Requests', *airmass_experiment_ids), - Plot(plotfuncs.plot_pct_scheduled_airmass_binned_priority, - '1m Network Airmass Experiment Percent of Requests Scheduled per Priority Class', - *airmass_experiment_ids), - Plot(plotfuncs.plot_pct_scheduled_airmass_lineplot, - '1m Network Airmass Experiment Percent of Requests Scheduled per Priority Class', - *airmass_experiment_ids), - Plot(plotfuncs.plot_pct_time_scheduled_airmass_binned_priority, - '1m Network Airmass Experiment Percent of Requested Time Scheduled per Priority Class', - *airmass_experiment_ids), - Plot(plotfuncs.plot_pct_time_scheduled_airmass_lineplot, - '1m Network Airmass Experiment Percent of Requested Time Scheduled per Priority Class', - *airmass_experiment_ids), - Plot(plotfuncs.plot_pct_time_scheduled_out_of_available, - '1m Network Airmass Experiment Percent of Requested Time Scheduled out of Available Time', - *airmass_experiment_ids), - Plot(plotfuncs.plot_midpoint_airmass_histograms, - '1m Network Airmass Experiment Midpoint Airmass Distributions', - *airmass_experiment_ids), - Plot(plotfuncs.plot_eff_priority_duration_scatter, - '1m Network Scatterplot of Effective Priority and Duration', - '1m0-optimize-airmass-with-duration-v2', - '1m0-optimize-airmass-with-duration-scaled-100-v2'), - Plot(plotfuncs.plot_duration_by_window_duration_scatter, - '1m Network Scatterplot of Duration and Window Duration', - 'window-duration'), + Plot(plotfuncs.plot_pct_scheduled_airmass_binned_priority, + '1m Network Airmass Experiment Percent of Requests Scheduled per Priority Class', + *airmass_experiment_ids), + Plot(plotfuncs.plot_pct_scheduled_airmass_lineplot, + '1m Network Airmass Experiment Percent of Requests Scheduled per Priority Class', + *airmass_experiment_ids), + Plot(plotfuncs.plot_pct_time_scheduled_airmass_binned_priority, + '1m Network Airmass Experiment Percent of Requested Time Scheduled per Priority Class', + *airmass_experiment_ids), + Plot(plotfuncs.plot_pct_time_scheduled_airmass_lineplot, + '1m Network Airmass Experiment Percent of Requested Time Scheduled per Priority Class', + *airmass_experiment_ids), + Plot(plotfuncs.plot_pct_time_scheduled_out_of_available, + '1m Network Airmass Experiment Percent of Requested Time Scheduled out of Available Time', + *airmass_experiment_ids), + Plot(plotfuncs.plot_midpoint_airmass_histograms, + '1m Network Airmass Experiment Midpoint Airmass Distributions', + *airmass_experiment_ids), + Plot(plotfuncs.plot_eff_priority_duration_scatter, + '1m Network Scatterplot of Effective Priority and Duration', + '1m0-optimize-airmass-with-duration-v2', + '1m0-optimize-airmass-with-duration-scaled-100-v2'), + Plot(plotfuncs.plot_duration_by_window_duration_scatter, + '1m Network Scatterplot of Duration and Window Duration', + 'window-duration'), + Plot(plotfuncs.plot_input_duration_binned_priority, + '1m Network Histogram of Input Reservation duration binned by priority', + '1m0-optimize-airmass-no-duration-v3'), ] if __name__ == '__main__': From 6457e7c71bded41b838503092b52d9daa62c4e35 Mon Sep 17 00:00:00 2001 From: Qingze Wu Date: Tue, 2 Aug 2022 10:06:54 -0700 Subject: [PATCH 130/165] plot input durations --- adaptive_scheduler/simulation/plotfuncs.py | 21 +++++++++++++++++++-- adaptive_scheduler/simulation/plots.py | 3 +++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/adaptive_scheduler/simulation/plotfuncs.py b/adaptive_scheduler/simulation/plotfuncs.py index 8d23f31f..ac3b9651 100644 --- a/adaptive_scheduler/simulation/plotfuncs.py +++ b/adaptive_scheduler/simulation/plotfuncs.py @@ -348,9 +348,26 @@ def plot_input_duration_binned_priority(dataset, plot_title): for values in duration_bins.values(): bardata.append(list(values.values())) binnames = ['0-5','5-10','10-15', '15-20', '20-25', '25&up'] - plotutils.plot_multi_barplot(ax, bardata, labels, binnames) + plotutils.plot_multi_barplot(ax, bardata, labels, binnames, barwidth=0.1) ax.set_xlabel('Duration (minutes)') ax.set_ylabel('Input reservation counts') - ax.set_ylim(0, 600) + ax.set_ylim(0, 300) ax.legend(title='Priority') return fig + + +def plot_subplots_input_duration(dataset, plot_title): + fig, (ax1, ax2, ax3) = plt.subplots(1,3) + fig.suptitle(plot_title) + input_durations = dataset[0]['raw_scheduled_durations'] + dataset[0]['raw_unscheduled_durations'] + input_priorities = dataset[0]['raw_scheduled_priorities'] + dataset[0]['raw_unscheduled_priorities'] + input_bins = metrics.bin_data(input_priorities, input_durations, bin_size=10, bin_range=(10,30),aggregator=None) + labels = ['10-19', '20-29', '30'] + axis = [ax1, ax2, ax3] + for i, values in enumerate(input_bins.values()): + axis[i].hist(values, bins = np.arange(0, 4000, 120)) + axis[i].set_xlabel('Duration (seconds)') + axis[i].set_ylabel('Input reservation counts') + axis[i].set_ylim(0, 300) + axis[i].set_title(f'{labels[i]} Priority binned by duration') + return fig diff --git a/adaptive_scheduler/simulation/plots.py b/adaptive_scheduler/simulation/plots.py index 6528b026..b45648e5 100644 --- a/adaptive_scheduler/simulation/plots.py +++ b/adaptive_scheduler/simulation/plots.py @@ -51,6 +51,9 @@ Plot(plotfuncs.plot_input_duration_binned_priority, '1m Network Histogram of Input Reservation duration binned by priority', '1m0-optimize-airmass-no-duration-v3'), + Plot(plotfuncs.plot_subplots_input_duration, + '1m Network Subplots differnt priorities Input Reservation binned by duration', + '1m0-optimize-airmass-no-duration-v3'), ] if __name__ == '__main__': From 083f2e12a67c4e8987ea3aff71704b0b31041e63 Mon Sep 17 00:00:00 2001 From: Qingze Wu Date: Tue, 2 Aug 2022 15:45:00 -0700 Subject: [PATCH 131/165] stacked sched/unsched requests --- adaptive_scheduler/simulation/plotfuncs.py | 22 +++++++++++++++++----- adaptive_scheduler/simulation/plots.py | 2 +- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/adaptive_scheduler/simulation/plotfuncs.py b/adaptive_scheduler/simulation/plotfuncs.py index ac3b9651..9123d6f6 100644 --- a/adaptive_scheduler/simulation/plotfuncs.py +++ b/adaptive_scheduler/simulation/plotfuncs.py @@ -357,17 +357,29 @@ def plot_input_duration_binned_priority(dataset, plot_title): def plot_subplots_input_duration(dataset, plot_title): - fig, (ax1, ax2, ax3) = plt.subplots(1,3) + fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(28,10)) fig.suptitle(plot_title) - input_durations = dataset[0]['raw_scheduled_durations'] + dataset[0]['raw_unscheduled_durations'] - input_priorities = dataset[0]['raw_scheduled_priorities'] + dataset[0]['raw_unscheduled_priorities'] + sched_durations = dataset[0]['raw_scheduled_durations'] + unsched_durations = dataset[0]['raw_unscheduled_durations'] + sched_priorities = dataset[0]['raw_scheduled_priorities'] + unsched_priorities = dataset[0]['raw_unscheduled_priorities'] + + input_durations = sched_durations + unsched_durations + input_priorities = sched_priorities + unsched_priorities input_bins = metrics.bin_data(input_priorities, input_durations, bin_size=10, bin_range=(10,30),aggregator=None) + sched_bins = metrics.bin_data(sched_priorities, sched_durations, bin_size=10, bin_range=(10,30),aggregator=None) + unsched_bins = metrics.bin_data(unsched_priorities, unsched_durations, bin_size=10, bin_range=(10,30),aggregator=None) labels = ['10-19', '20-29', '30'] axis = [ax1, ax2, ax3] - for i, values in enumerate(input_bins.values()): - axis[i].hist(values, bins = np.arange(0, 4000, 120)) + for i, values in enumerate(sched_bins.values()): + bars = ['Scheduled', 'Unscheduled'] + # axis[i].hist(values, bins = np.arange(0, 4000, 120)) + axis[i].hist([values,list(unsched_bins.values())[i]], bins = np.arange(0, 4000, 120), + stacked = True, label = bars) + # axis[i].hist(list(unsched_bins.values())[i], bins = np.arange(0, 4000, 120)) axis[i].set_xlabel('Duration (seconds)') axis[i].set_ylabel('Input reservation counts') axis[i].set_ylim(0, 300) axis[i].set_title(f'{labels[i]} Priority binned by duration') + axis[i].legend() return fig diff --git a/adaptive_scheduler/simulation/plots.py b/adaptive_scheduler/simulation/plots.py index b45648e5..c2719f14 100644 --- a/adaptive_scheduler/simulation/plots.py +++ b/adaptive_scheduler/simulation/plots.py @@ -52,7 +52,7 @@ '1m Network Histogram of Input Reservation duration binned by priority', '1m0-optimize-airmass-no-duration-v3'), Plot(plotfuncs.plot_subplots_input_duration, - '1m Network Subplots differnt priorities Input Reservation binned by duration', + '1m Network Scheduled/Unscheduled Requests Length Distribution', '1m0-optimize-airmass-no-duration-v3'), ] From 51252048b219883cb6fe6fd60687aa6a255b228a Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Wed, 3 Aug 2022 12:01:58 -0700 Subject: [PATCH 132/165] change to 300 dpi --- adaptive_scheduler/simulation/plotutils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adaptive_scheduler/simulation/plotutils.py b/adaptive_scheduler/simulation/plotutils.py index f3b25feb..84025a83 100644 --- a/adaptive_scheduler/simulation/plotutils.py +++ b/adaptive_scheduler/simulation/plotutils.py @@ -149,7 +149,7 @@ def export_to_image(fname, fig): except FileExistsError: pass fpath = os.path.join(export_dir, f'{fname}.{export_format}') - fig.savefig(fpath, format=export_format) + fig.savefig(fpath, dpi=300, format=export_format) print(f'Plot exported to {fpath}') From 29de01ce161568ebf001bdabb8652edd5a673f96 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Wed, 3 Aug 2022 12:02:18 -0700 Subject: [PATCH 133/165] move to new airmass tests --- adaptive_scheduler/simulation/plots.py | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/adaptive_scheduler/simulation/plots.py b/adaptive_scheduler/simulation/plots.py index 6528b026..2ee7b0e5 100644 --- a/adaptive_scheduler/simulation/plots.py +++ b/adaptive_scheduler/simulation/plots.py @@ -7,22 +7,21 @@ from adaptive_scheduler.simulation.plotutils import Plot airmass_experiment_ids = [ - '1m0-simulation-real-airmass-control-1_2022-07-18T23:59:44.770684', - '1m0-simulation-real-airmass-coeff-0-1', - '1m0-simulation-real-airmass-coeff-0.01-1', - '1m0-simulation-real-airmass-coeff-0.05-1', - '1m0-simulation-real-airmass-coeff-0.1-1', - '1m0-simulation-real-airmass-coeff-1.0-1', - '1m0-simulation-real-airmass-coeff-10-1', - '1m0-simulation-real-airmass-coeff-100-1', - '1m0-simulation-real-airmass-coeff-1000-1', - '1m0-simulation-real-airmass-coeff-1000000-1', + 'no-airmass-w-duration-no-scaling', + 'airmass-0.01-w-duration-no-scaling', + 'airmass-0.05-w-duration-no-scaling', + 'airmass-0.1-w-duration-no-scaling', + 'airmass-1.0-w-duration-no-scaling', + 'airmass-10-w-duration-no-scaling', + 'airmass-100-w-duration-no-scaling', + 'airmass-1000-w-duration-no-scaling', + 'airmass-1000000-w-duration-no-scaling', ] plots = [ Plot(plotfuncs.plot_airmass_difference_histogram, - '1m Network Airmass Difference Distribution for Scheduled Requests', - *airmass_experiment_ids), + '1m Network Airmass Difference Distribution for Scheduled Requests', + *airmass_experiment_ids), Plot(plotfuncs.plot_pct_scheduled_airmass_binned_priority, '1m Network Airmass Experiment Percent of Requests Scheduled per Priority Class', *airmass_experiment_ids), From 975cee2feb685ad8b47e82ee590a9f2330459b1c Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Wed, 3 Aug 2022 16:06:37 -0700 Subject: [PATCH 134/165] update plots --- adaptive_scheduler/simulation/plotfuncs.py | 80 ++++++++++++---------- adaptive_scheduler/simulation/plots.py | 19 +++-- 2 files changed, 52 insertions(+), 47 deletions(-) diff --git a/adaptive_scheduler/simulation/plotfuncs.py b/adaptive_scheduler/simulation/plotfuncs.py index f2e2e2d5..6b18297d 100644 --- a/adaptive_scheduler/simulation/plotfuncs.py +++ b/adaptive_scheduler/simulation/plotfuncs.py @@ -107,60 +107,63 @@ def plot_percent_sched_requests_bin_by_priority(eff_pri_datasets, plot_title): fig.suptitle(plot_title) fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) bardata1 = [] - labels1 = [] + labels = ['with duration', 'no duration', 'with duration scaled 100', 'no duration scaled 100'] for dataset in eff_pri_datasets: bardata1.append(list(dataset['percent_duration_by_priority'][0].values())) - labels1.append(dataset['simulation_id'][21:-3]) - priorities = ['low priority', 'mid priority', 'high priority'] - plotutils.plot_barplot(ax1, bardata1, labels1, priorities) + + priorities = ['low priority(10-19)', 'mid priority(20-29)', 'high priority(30)'] + plotutils.plot_multi_barplot(ax1, bardata1, labels, priorities) ax1.set_xlabel('Priority') - ax1.set_ylabel('Percent Scheduled Time') - ax1.set_title('Percent Duration Scheduled') - ax1.legend() + ax1.set_ylabel('Scheduled Time/Total Request Time (%)') + ax1.set_title('Percent of requested time scheduled') + ax1.legend(title='Effective Priority Algorithms') bardata2 = [] - labels2 = [] for dataset in eff_pri_datasets: bardata2.append(list(dataset['percent_sched_by_priority'][0].values())) - labels2.append(dataset['simulation_id'][21:]) priorities = ['low priority', 'mid priority', 'high priority'] - plotutils.plot_barplot(ax2, bardata2, labels2, priorities) + plotutils.plot_multi_barplot(ax2, bardata2, labels, priorities) ax2.set_xlabel('Priority') - ax2.set_ylabel('Percent Scheduled Count') - ax2.set_title('Percent Number Scheduled') - ax2.legend() + ax2.set_ylabel('Scheduled Requests/Total Requests (%)') + ax2.set_title('Percent of requests Scheduled') + ax2.legend(title='Effective Priority Algorithms') plt.show() - -def rand_jitter(arr): - stdev = .01 * (max(arr) - min(arr)) - return arr + np.random.randn(len(arr)) * stdev def plot_sched_priority_duration_dotplot(eff_pri_datasets, plot_title): + def rand_jitter(arr): + stdev = .01 * (max(arr) - min(arr)) + return arr + np.random.randn(len(arr)) * stdev + markers = ["o" , "," ,"v" , "^" , "<", ">"] colors = ['r','b','c','m', 'y', 'k'] fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(28, 12)) fig.suptitle(plot_title) fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) + labels = ['with duration', 'no duration', 'with duration scaled 100', 'no duration scaled 100'] for i, data in enumerate(eff_pri_datasets): - id = data['simulation_id'][21:] - if id in ['with-duration-scaled-100-v3', 'no-duration-scaled-100-v3']: - data['raw_scheduled_priorities'] = [(p+35)/4.5 for p in data['raw_scheduled_priorities']] + id = data['simulation_id'] + if id in ['airmass-0.1-w-duration-w-scaling','airmass-0.1-no-duration-w-scaling']: + data['raw_scheduled_priorities'] = [(p+35)/4.5 for p in data['raw_scheduled_priorities']] + data['raw_scheduled_durations'] = [d/60 for d in data['raw_scheduled_durations']] ax1.scatter(rand_jitter(data['raw_scheduled_priorities']), rand_jitter(data['raw_scheduled_durations']), - marker = markers[i],c = colors[i], s = 10, label = f'scheduled requests {id}',alpha = 0.3) - ax1.set_ylim(top=11000) + marker = markers[i],c = colors[i], s=10, label=labels[i], alpha=0.3) + ax1.set_ylim(top=100) ax1.set_xlabel('Priority') - ax1.set_ylabel('Request Duration') - ax1.legend() + ax1.set_ylabel('Request Duration (minutes)') + ax1.set_title('Scheduled Reservations distribution') + ax1.legend(title='Effective Priority Algorithms') for i, data in enumerate(eff_pri_datasets): - id = data['simulation_id'][21:] - if id in ['with-duration-scaled-100-v3', 'no-duration-scaled-100-v3']: + id = data['simulation_id'] + if id in ['airmass-0.1-w-duration-w-scaling', 'airmass-0.1-no-duration-w-scaling']: data['raw_unscheduled_priorities'] = [(p+35)/4.5 for p in data['raw_unscheduled_priorities']] + data['raw_unscheduled_durations'] = [d/60 for d in data['raw_unscheduled_durations']] ax2.scatter(rand_jitter(data['raw_unscheduled_priorities']), rand_jitter(data['raw_unscheduled_durations']), - c =colors[i], marker=markers[i],s=10, label = f'unscheduled requests {id}', alpha = 0.3) - ax2.set_ylim(top=11000) + c=colors[i], marker=markers[i],s=10, label=labels[i], alpha=0.3) + ax2.set_ylim(top=100) ax2.set_xlabel('Priority') - ax2.set_ylabel('Request Duration') - ax2.legend() + ax2.set_ylabel('Request Duration (minutes)') + ax2.set_title('Unscheduled Reservations distribution') + ax2.legend(title='Effective Priority Algorithms') plt.show(block = False) plt.show() @@ -170,9 +173,10 @@ def plot_heat_map_priority_duration(eff_pri_datasets, plot_title): fig.suptitle(plot_title) fig.subplots_adjust(wspace=0.01, hspace=0.01, top=0.9) ax_list = [axs[0,0],axs[0,1],axs[1,0], axs[1,1]] + labels = ['with duration', 'no duration', 'with duration scaled 100', 'no duration scaled 100'] for i, data in enumerate(eff_pri_datasets): - id = data['simulation_id'][21:] - if id in ['with-duration-scaled-100-v3', 'no-duration-scaled-100-v3']: + id = data['simulation_id'] + if id in ['airmass-0.1-w-duration-w-scaling', 'airmass-0.1-no-duration-w-scaling']: data['raw_scheduled_priorities'] = [(p+35)/4.5 for p in data['raw_scheduled_priorities']] data['raw_unscheduled_priorities'] = [(p+35)/4.5 for p in data['raw_unscheduled_priorities']] sched_priorities = data['raw_scheduled_priorities'] @@ -209,14 +213,16 @@ def plot_heat_map_priority_duration(eff_pri_datasets, plot_title): axis.set_yticks(np.arange(len(priority_bins)), labels=priority_bins) plt.setp(axis.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor") - for i in range(len(priority_bins)): - for j in range(len(duration_bins)): - value = heat_map_elements[i, j] - text1 = axis.text(j, i, f'{heat_map_elements[i, j]}|{ heat_map_elements_unsched[i, j]}', + for j in range(len(priority_bins)): + for k in range(len(duration_bins)): + value = heat_map_elements[j, k] + text1 = axis.text(k, j, f'{heat_map_elements[j, k]}|{ heat_map_elements_unsched[j, k]}', ha="center", va="center", fontsize='large', fontweight='semibold', color=cmap2(0.001/value)) - axis.set_title(f'{id} (sched|unsched)', fontweight='semibold') + axis.set_title(f'{labels[i]} (sched|unsched)', fontweight='semibold') fig.tight_layout() plt.show() + + def plot_pct_time_scheduled_airmass_binned_priority(airmass_datasets, plot_title): """Plots the percentage of requested time scheduled for different airmass coefficients binned into priority levels. diff --git a/adaptive_scheduler/simulation/plots.py b/adaptive_scheduler/simulation/plots.py index f639748d..423d994e 100644 --- a/adaptive_scheduler/simulation/plots.py +++ b/adaptive_scheduler/simulation/plots.py @@ -19,10 +19,10 @@ ] effective_priority_experiment_ids = [ - '1m0-optimize-airmass-with-duration-v3', - '1m0-optimize-airmass-no-duration-v3', - '1m0-optimize-airmass-with-duration-scaled-100-v3', - '1m0-optimize-airmass-no-duration-scaled-100-v3', + 'airmass-0.1-w-duration-no-scaling', + 'airmass-0.1-no-duration-no-scaling', + 'airmass-0.1-w-duration-w-scaling', + 'airmass-0.1-no-duration-w-scaling', ] @@ -50,17 +50,17 @@ *airmass_experiment_ids), Plot(plotfuncs.plot_eff_priority_duration_scatter, '1m Network Scatterplot of Effective Priority and Duration', - '1m0-optimize-airmass-with-duration-v2', - '1m0-optimize-airmass-with-duration-scaled-100-v2'), + 'airmass-0.1-w-duration-no-scaling', + 'airmass-0.1-w-duration-w-scaling'), Plot(plotfuncs.plot_duration_by_window_duration_scatter, '1m Network Scatterplot of Duration and Window Duration', 'window-duration'), Plot(plotfuncs.plot_input_duration_binned_priority, '1m Network Histogram of Input Reservation duration binned by priority', - '1m0-optimize-airmass-no-duration-v3'), + 'no-airmass-w-duration-no-scaling'), Plot(plotfuncs.plot_subplots_input_duration, '1m Network Scheduled/Unscheduled Requests Length Distribution', - '1m0-optimize-airmass-no-duration-v3'), + 'no-airmass-w-duration-no-scaling'), Plot(plotfuncs.plot_percent_sched_requests_bin_by_priority, '1m0 Network Scheduler Metrics Binned by Priority', *effective_priority_experiment_ids), @@ -69,8 +69,7 @@ *effective_priority_experiment_ids), Plot(plotfuncs.plot_heat_map_priority_duration, '1m0 Network Requests Heatmap With Airmass Optimization', - effective_priority_experiment_ids), - + *effective_priority_experiment_ids), ] if __name__ == '__main__': From c579d2931618670537f4bd249218aa20aa99c498 Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Thu, 4 Aug 2022 10:16:12 -0700 Subject: [PATCH 135/165] minor update on plotfuncs --- adaptive_scheduler/simulation/plotfuncs.py | 23 ++++++++++------------ 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/adaptive_scheduler/simulation/plotfuncs.py b/adaptive_scheduler/simulation/plotfuncs.py index 6b18297d..c41103a5 100644 --- a/adaptive_scheduler/simulation/plotfuncs.py +++ b/adaptive_scheduler/simulation/plotfuncs.py @@ -126,8 +126,8 @@ def plot_percent_sched_requests_bin_by_priority(eff_pri_datasets, plot_title): ax2.set_ylabel('Scheduled Requests/Total Requests (%)') ax2.set_title('Percent of requests Scheduled') ax2.legend(title='Effective Priority Algorithms') - plt.show() - + return fig + def plot_sched_priority_duration_dotplot(eff_pri_datasets, plot_title): def rand_jitter(arr): @@ -164,8 +164,7 @@ def rand_jitter(arr): ax2.set_ylabel('Request Duration (minutes)') ax2.set_title('Unscheduled Reservations distribution') ax2.legend(title='Effective Priority Algorithms') - plt.show(block = False) - plt.show() + return fig def plot_heat_map_priority_duration(eff_pri_datasets, plot_title): @@ -220,7 +219,7 @@ def plot_heat_map_priority_duration(eff_pri_datasets, plot_title): ha="center", va="center", fontsize='large', fontweight='semibold', color=cmap2(0.001/value)) axis.set_title(f'{labels[i]} (sched|unsched)', fontweight='semibold') fig.tight_layout() - plt.show() + return fig def plot_pct_time_scheduled_airmass_binned_priority(airmass_datasets, plot_title): @@ -479,16 +478,14 @@ def plot_input_duration_binned_priority(dataset, plot_title): def plot_subplots_input_duration(dataset, plot_title): - fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(28,10)) + fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(20,10)) fig.suptitle(plot_title) sched_durations = dataset[0]['raw_scheduled_durations'] + sched_durations = [d/60 for d in sched_durations] unsched_durations = dataset[0]['raw_unscheduled_durations'] + unsched_durations = [d/60 for d in unsched_durations] sched_priorities = dataset[0]['raw_scheduled_priorities'] unsched_priorities = dataset[0]['raw_unscheduled_priorities'] - - input_durations = sched_durations + unsched_durations - input_priorities = sched_priorities + unsched_priorities - input_bins = metrics.bin_data(input_priorities, input_durations, bin_size=10, bin_range=(10,30),aggregator=None) sched_bins = metrics.bin_data(sched_priorities, sched_durations, bin_size=10, bin_range=(10,30),aggregator=None) unsched_bins = metrics.bin_data(unsched_priorities, unsched_durations, bin_size=10, bin_range=(10,30),aggregator=None) labels = ['10-19', '20-29', '30'] @@ -496,12 +493,12 @@ def plot_subplots_input_duration(dataset, plot_title): for i, values in enumerate(sched_bins.values()): bars = ['Scheduled', 'Unscheduled'] # axis[i].hist(values, bins = np.arange(0, 4000, 120)) - axis[i].hist([values,list(unsched_bins.values())[i]], bins = np.arange(0, 4000, 120), + axis[i].hist([values,list(unsched_bins.values())[i]], bins = np.arange(0, 70, 2), stacked = True, label = bars) # axis[i].hist(list(unsched_bins.values())[i], bins = np.arange(0, 4000, 120)) - axis[i].set_xlabel('Duration (seconds)') + axis[i].set_xlabel('Duration (Minutes)') axis[i].set_ylabel('Input reservation counts') axis[i].set_ylim(0, 300) axis[i].set_title(f'{labels[i]} Priority binned by duration') axis[i].legend() - return fig + return fig \ No newline at end of file From ad6432f394e176cffde15f3cb0cd917641252be8 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Thu, 4 Aug 2022 10:37:37 -0700 Subject: [PATCH 136/165] small fix to titles and titlesizes --- adaptive_scheduler/simulation/plotfuncs.py | 8 +++++--- adaptive_scheduler/simulation/plots.py | 9 +++++---- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/adaptive_scheduler/simulation/plotfuncs.py b/adaptive_scheduler/simulation/plotfuncs.py index c41103a5..279ad638 100644 --- a/adaptive_scheduler/simulation/plotfuncs.py +++ b/adaptive_scheduler/simulation/plotfuncs.py @@ -16,6 +16,8 @@ style.use('tableau-colorblind10') matplotlib.rcParams['figure.figsize'] = (20, 10) matplotlib.rcParams['figure.titlesize'] = 20 +matplotlib.rcParams['axes.titlesize'] = 16 +matplotlib.rcParams['axes.labelsize'] = 14 matplotlib.rcParams['figure.subplot.wspace'] = 0.2 # horizontal spacing for subplots matplotlib.rcParams['figure.subplot.hspace'] = 0.2 # vertical spacing for subplots matplotlib.rcParams['figure.subplot.top'] = 0.9 # spacing between plot and title @@ -220,8 +222,8 @@ def plot_heat_map_priority_duration(eff_pri_datasets, plot_title): axis.set_title(f'{labels[i]} (sched|unsched)', fontweight='semibold') fig.tight_layout() return fig - - + + def plot_pct_time_scheduled_airmass_binned_priority(airmass_datasets, plot_title): """Plots the percentage of requested time scheduled for different airmass coefficients binned into priority levels. @@ -501,4 +503,4 @@ def plot_subplots_input_duration(dataset, plot_title): axis[i].set_ylim(0, 300) axis[i].set_title(f'{labels[i]} Priority binned by duration') axis[i].legend() - return fig \ No newline at end of file + return fig diff --git a/adaptive_scheduler/simulation/plots.py b/adaptive_scheduler/simulation/plots.py index 423d994e..a38e15c3 100644 --- a/adaptive_scheduler/simulation/plots.py +++ b/adaptive_scheduler/simulation/plots.py @@ -18,7 +18,7 @@ 'airmass-1000000-w-duration-no-scaling', ] -effective_priority_experiment_ids = [ +effective_priority_experiment_ids = [ 'airmass-0.1-w-duration-no-scaling', 'airmass-0.1-no-duration-no-scaling', 'airmass-0.1-w-duration-w-scaling', @@ -62,15 +62,16 @@ '1m Network Scheduled/Unscheduled Requests Length Distribution', 'no-airmass-w-duration-no-scaling'), Plot(plotfuncs.plot_percent_sched_requests_bin_by_priority, - '1m0 Network Scheduler Metrics Binned by Priority', + '1m Network Scheduler Metrics Binned by Priority', *effective_priority_experiment_ids), Plot(plotfuncs.plot_sched_priority_duration_dotplot, - '1m0 Distribution of Priority and Duration With Airmass Optimization', + '1m Distribution of Priority and Duration With Airmass Optimization', *effective_priority_experiment_ids), Plot(plotfuncs.plot_heat_map_priority_duration, - '1m0 Network Requests Heatmap With Airmass Optimization', + '1m Network Requests Heatmap With Airmass Optimization', *effective_priority_experiment_ids), ] + if __name__ == '__main__': plotutils.run_user_interface(plots) From e4c87fbec56ea068188f4ef6316c48fbf2a0f3f2 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Thu, 4 Aug 2022 10:50:30 -0700 Subject: [PATCH 137/165] modified title sizes to be bigger --- adaptive_scheduler/simulation/plotfuncs.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/adaptive_scheduler/simulation/plotfuncs.py b/adaptive_scheduler/simulation/plotfuncs.py index 279ad638..8fea4445 100644 --- a/adaptive_scheduler/simulation/plotfuncs.py +++ b/adaptive_scheduler/simulation/plotfuncs.py @@ -18,6 +18,8 @@ matplotlib.rcParams['figure.titlesize'] = 20 matplotlib.rcParams['axes.titlesize'] = 16 matplotlib.rcParams['axes.labelsize'] = 14 +matplotlib.rcParams['xtick.labelsize'] = 12 +matplotlib.rcParams['ytick.labelsize'] = 12 matplotlib.rcParams['figure.subplot.wspace'] = 0.2 # horizontal spacing for subplots matplotlib.rcParams['figure.subplot.hspace'] = 0.2 # vertical spacing for subplots matplotlib.rcParams['figure.subplot.top'] = 0.9 # spacing between plot and title From 19522a02d2f9612b9f617f84a1763dbad846861c Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Thu, 4 Aug 2022 12:49:01 -0700 Subject: [PATCH 138/165] remove old plotting files, they have been integrated into plotfuncs --- .../simulation/plot_airmass_coeff.py | 165 ------ adaptive_scheduler/simulation/plot_all.py | 520 ------------------ .../simulation/plot_effective_priority.py | 50 -- .../simulation/plot_priority_duration.py | 47 -- 4 files changed, 782 deletions(-) delete mode 100644 adaptive_scheduler/simulation/plot_airmass_coeff.py delete mode 100644 adaptive_scheduler/simulation/plot_all.py delete mode 100644 adaptive_scheduler/simulation/plot_effective_priority.py delete mode 100644 adaptive_scheduler/simulation/plot_priority_duration.py diff --git a/adaptive_scheduler/simulation/plot_airmass_coeff.py b/adaptive_scheduler/simulation/plot_airmass_coeff.py deleted file mode 100644 index 959a09aa..00000000 --- a/adaptive_scheduler/simulation/plot_airmass_coeff.py +++ /dev/null @@ -1,165 +0,0 @@ -""" -Plotting functions for an airmass optimization experiment. -""" -import argparse -from datetime import datetime - -import numpy as np -import matplotlib.pyplot as plt - -import adaptive_scheduler.simulation.plotutils as plotutils -from adaptive_scheduler.simulation.plotutils import opensearch_client, default_colors - -AIRMASS_TEST_VALUES = [0, 0.01, 0.05, 0.1, 1.0, 10, 100, 1000, 1000000] - -control_id = '1m0-simulation-real-airmass-control-1_2022-07-18T23:59:44.770684' -control = opensearch_client.get('scheduler-simulations', control_id) -labels = ['prioritize early'] -labels.extend(AIRMASS_TEST_VALUES) -timestamp = datetime.utcnow().isoformat(timespec='seconds') - - -def get_airmass_data_from_opensearch(coeff): - query = f'1m0-simulation-real-airmass-coeff-{coeff}-1' - source_data = plotutils.get_data_from_opensearch(query) - airmass_coeff = source_data['airmass_weighting_coefficient'] - airmass_data = source_data['airmass_metrics']['raw_airmass_data'] - return airmass_data, airmass_coeff - - -def plot_normed_airmass_histogram(): - fig = plt.figure(figsize=(20, 10)) - fig.suptitle('1m0 Network Normalized Airmass Distributions', fontsize=20) - fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) - ax = fig.add_subplot() - - control_airmass_data = control['_source']['airmass_metrics']['raw_airmass_data'] - control_mp = np.array(control_airmass_data[0]['midpoint_airmasses']) - control_min = np.array(control_airmass_data[1]['min_poss_airmasses']) - control_max = np.array(control_airmass_data[2]['max_poss_airmasses']) - normed = [1-(control_mp-control_min)/(control_max-control_min)] - - for value in AIRMASS_TEST_VALUES: - airmass_data, airmass_coeff = get_airmass_data_from_opensearch(value) - mp = np.array(airmass_data[0]['midpoint_airmasses']) - min_ = np.array(airmass_data[1]['min_poss_airmasses']) - max_ = np.array(airmass_data[2]['max_poss_airmasses']) - normed.append(1-(mp-min_)/(max_-min_)) - ax.hist(normed, bins=10, label=labels, color=default_colors, alpha=0.8) - ax.set_xlabel('Airmass Score (0 is worst, 1 is closest to ideal)') - ax.set_ylabel('Count') - ax.legend() - if not displayonly: - plotutils.export_to_image(f'1m0_normed_airmass_hist_{timestamp}', fig) - plt.show() - - -def plot_midpoint_airmass_histogram(): - fig = plt.figure(figsize=(16, 16)) - fig.suptitle('1m0 Network Midpoint Airmass Distributions', fontsize=20) - fig.subplots_adjust(wspace=0.3, hspace=0.3, top=0.92) - for i, value in enumerate(AIRMASS_TEST_VALUES): - ax = fig.add_subplot(3, 3, i+1) - airmass_data, airmass_coeff = get_airmass_data_from_opensearch(value) - midpoint_airmasses = airmass_data[0]['midpoint_airmasses'] - ax.hist(midpoint_airmasses, bins=50) - ax.set_title(f'Airmass Coefficient: {airmass_coeff}') - ax.set_xlabel('Midpoint Airmass') - ax.set_ylabel('Count') - ax.set_xlim(1.0, 2.0) - ax.set_ylim(0, 120) - if not displayonly: - plotutils.export_to_image(f'1m0_midpoint_airmass_hist_{timestamp}', fig) - plt.show() - - -def get_priority_data_from_opensearch(coeff): - query = f'1m0-simulation-real-airmass-coeff-{coeff}-1' - source_data = plotutils.get_data_from_opensearch(query) - pct_scheduled = source_data['percent_sched_by_priority'][0] - pct_duration = source_data['percent_duration_by_priority'][0] - return pct_scheduled, pct_duration - - -def plot_pct_scheduled_bins(): - fig = plt.figure(figsize=(20, 10)) - fig.suptitle('1m0 Network Percentage of Requests Scheduled', fontsize=20) - fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) - ax = fig.add_subplot() - - bardata = [] - control_prio_data = control['_source']['percent_sched_by_priority'][0] - priorities = list(control_prio_data.keys()) - percentages = list(control_prio_data.values()) - bardata.append(percentages) - - for value in AIRMASS_TEST_VALUES: - priority_data, _ = get_priority_data_from_opensearch(value) - bardata.append(list(priority_data.values())) - - plotutils.plot_barplot(ax, bardata, default_colors, labels, priorities) - ax.set_xlabel('Priority') - ax.set_ylabel('Percent of Requests Scheduled') - ax.legend() - if not displayonly: - plotutils.export_to_image(f'1m0_pct_count_scheduled_{timestamp}', fig) - plt.show() - - -def plot_pct_time_scheduled_bins(): - fig = plt.figure(figsize=(20, 10)) - fig.suptitle('1m0 Network Percentage Requested Time Scheduled', fontsize=20) - fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) - ax = fig.add_subplot() - - bardata = [] - control_prio_data = control['_source']['percent_duration_by_priority'][0] - priorities = list(control_prio_data.keys()) - percentages = list(control_prio_data.values()) - bardata.append(percentages) - - for value in AIRMASS_TEST_VALUES: - _, priority_data = get_priority_data_from_opensearch(value) - bardata.append(list(priority_data.values())) - - plotutils.plot_barplot(ax, bardata, default_colors, labels, priorities) - ax.set_xlabel('Priority') - ax.set_ylabel('Percent Time Scheduled') - ax.legend() - if not displayonly: - plotutils.export_to_image(f'1m0_pct_time_scheduled_{timestamp}', fig) - plt.show() - - -if __name__ == '__main__': - plots = { - 'normed_airmass_hist': {'func': plot_normed_airmass_histogram, - 'desc': 'Airmass distribution, normalized so that 0 is worst airmass and 1 is best'}, - 'midpoint_airmass_hist': {'func': plot_midpoint_airmass_histogram, - 'desc': 'Midpoint airmass distributions for different airmass weighting coefficients'}, - 'percent_scheduled_binned': {'func': plot_pct_scheduled_bins, - 'desc': 'Percent of requests scheduled binned by priority level'}, - 'percent_time_scheduled_binned': {'func': plot_pct_time_scheduled_bins, - 'desc': 'Percent of time requested scheduled binned by priority level'}, - } - - description = 'Plotting functions for airmass optimization experiment.' - parser = argparse.ArgumentParser(description=description) - parser.add_argument('plot_name', type=str.lower, nargs='*', - help="The name of the plot(s) to display. `all` can be passed to show all.") - parser.add_argument('-l', '--list', help='List plot info. `-l all` to show all available plots.', action='store_true') - parser.add_argument('-d', '--displayonly', help='Display the plots without exporting them.', action='store_true') - args = parser.parse_args() - global displayonly - displayonly = args.displayonly - - if args.list: - spacing = max([len(name) for name in plots.keys()]) + 4 - print(f'{"NAME":{spacing}}DESCRIPTION') - print(f'{"====":{spacing}}===========') - for name, details in plots.items(): - print(f'{name:{spacing}}{details["desc"]}') - else: - plots_to_show = list(plots.keys()) if args.plot_name == ['all'] else args.plot_name - for plot_name in plots_to_show: - plots[plot_name]['func']() diff --git a/adaptive_scheduler/simulation/plot_all.py b/adaptive_scheduler/simulation/plot_all.py deleted file mode 100644 index b85f6541..00000000 --- a/adaptive_scheduler/simulation/plot_all.py +++ /dev/null @@ -1,520 +0,0 @@ -""" -Plotting functions for an airmass optimization experiment. -""" -import argparse -from datetime import datetime - -import numpy as np -import matplotlib.pyplot as plt -import matplotlib.style as style - -import adaptive_scheduler.simulation.plotutils as plotutils -from adaptive_scheduler.simulation.plotutils import opensearch_client -from adaptive_scheduler.simulation.metrics import bin_data - -AIRMASS_TEST_VALUES = [0, 0.01, 0.05, 0.1, 1.0, 10, 100, 1000, 1000000] -EFF_PRI_SCALING_TEST_LABELS = ['airmass', 'airmass-with-duration-scaled-100', - 'airmass-no-duration', 'airmass-no-duration-scaled-100'] - -control_id = '1m0-simulation-real-airmass-control-1_2022-07-18T23:59:44.770684' -control = opensearch_client.get('scheduler-simulations', control_id) -labels = ['prioritize early'] -labels.extend(AIRMASS_TEST_VALUES) -timestamp = datetime.utcnow().isoformat(timespec='seconds') -style.use('tableau-colorblind10') - - -def get_airmass_data_from_opensearch(coeff): - query = f'1m0-simulation-real-airmass-coeff-{coeff}-1' - source_data = plotutils.get_data_from_opensearch(query) - airmass_coeff = source_data['airmass_weighting_coefficient'] - airmass_data = source_data['airmass_metrics']['raw_airmass_data'] - return airmass_data, airmass_coeff - - -def plot_normed_airmass_histogram(): - fig = plt.figure(figsize=(20, 10)) - fig.suptitle('1m0 Network Normalized Airmass Distributions for Different Airmass Coefficients', fontsize=20) - fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) - ax = fig.add_subplot() - - control_airmass_data = control['_source']['airmass_metrics']['raw_airmass_data'] - control_mp = np.array(control_airmass_data[0]['midpoint_airmasses']) - control_min = np.array(control_airmass_data[1]['min_poss_airmasses']) - control_max = np.array(control_airmass_data[2]['max_poss_airmasses']) - normed = [1-(control_mp-control_min)/(control_max-control_min)] - - for value in AIRMASS_TEST_VALUES: - airmass_data, airmass_coeff = get_airmass_data_from_opensearch(value) - mp = np.array(airmass_data[0]['midpoint_airmasses']) - min_ = np.array(airmass_data[1]['min_poss_airmasses']) - max_ = np.array(airmass_data[2]['max_poss_airmasses']) - normed.append(1-(mp-min_)/(max_-min_)) - ax.hist(normed, bins=10, label=labels, alpha=0.8) - ax.set_xlabel('Airmass Score (0 is worst, 1 is closest to ideal)') - ax.set_ylabel('Count') - ax.legend() - if not displayonly: - plotutils.export_to_image(f'1m0_normed_airmass_hist_{timestamp}', fig) - plt.show() - - -def plot_midpoint_airmass_histogram(): - fig = plt.figure(figsize=(16, 16)) - fig.suptitle('1m0 Network Midpoint Airmass Distributions for Different Airmass Coefficients', fontsize=20) - fig.subplots_adjust(wspace=0.3, hspace=0.3, top=0.92) - for i, value in enumerate(AIRMASS_TEST_VALUES): - ax = fig.add_subplot(3, 3, i+1) - airmass_data, airmass_coeff = get_airmass_data_from_opensearch(value) - midpoint_airmasses = airmass_data[0]['midpoint_airmasses'] - ax.hist(midpoint_airmasses, bins=50) - ax.set_title(f'Airmass Coefficient: {airmass_coeff}') - ax.set_xlabel('Midpoint Airmass') - ax.set_ylabel('Count') - ax.set_xlim(1.0, 2.0) - ax.set_ylim(0, 120) - if not displayonly: - plotutils.export_to_image(f'1m0_midpoint_airmass_hist_{timestamp}', fig) - plt.show() - - -def get_priority_data_from_opensearch(coeff): - query = f'1m0-simulation-real-airmass-coeff-{coeff}-1' - source_data = plotutils.get_data_from_opensearch(query) - pct_scheduled = source_data['percent_sched_by_priority'][0] - pct_duration = source_data['percent_duration_by_priority'][0] - return pct_scheduled, pct_duration - - -def plot_pct_count_airmass_prio_bins(): - fig = plt.figure(figsize=(20, 10)) - fig.suptitle('1m0 Percent of Requests Scheduled by Priority Class for Different Airmass Coefficients', fontsize=20) - fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) - ax = fig.add_subplot() - - bardata = [] - control_prio_data = control['_source']['percent_sched_by_priority'][0] - priorities = list(control_prio_data.keys()) - percentages = list(control_prio_data.values()) - bardata.append(percentages) - - for value in AIRMASS_TEST_VALUES: - priority_data, _ = get_priority_data_from_opensearch(value) - bardata.append(list(priority_data.values())) - - plotutils.plot_barplot(ax, bardata, labels, priorities) - ax.set_xlabel('Priority') - ax.set_ylabel('Percent of Requests Scheduled') - ax.set_ylim(0, 100) - ax.legend() - if not displayonly: - plotutils.export_to_image(f'1m0_pct_count_scheduled_airmass_{timestamp}', fig) - plt.show() - - -def plot_pct_time_airmass_prio_bins(): - fig = plt.figure(figsize=(20, 10)) - fig.suptitle('1m0 Percent of Requested Time Scheduled by Priority Class for Different Airmass Coefficients', fontsize=20) - fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) - ax = fig.add_subplot() - - bardata = [] - control_prio_data = control['_source']['percent_duration_by_priority'][0] - priorities = list(control_prio_data.keys()) - percentages = list(control_prio_data.values()) - bardata.append(percentages) - - for value in AIRMASS_TEST_VALUES: - _, priority_data = get_priority_data_from_opensearch(value) - bardata.append(list(priority_data.values())) - - plotutils.plot_barplot(ax, bardata, labels, priorities) - ax.set_xlabel('Priority') - ax.set_ylabel('Percent of Requested Time Scheduled') - ax.set_ylim(0, 100) - ax.legend() - if not displayonly: - plotutils.export_to_image(f'1m0_pct_time_scheduled_airmass_{timestamp}', fig) - plt.show() - - -def plot_pct_time_scaling_prio_bins(): - fig = plt.figure(figsize=(20, 10)) - fig.suptitle('1m0 Percent of Requested Time Scheduled by Priority Class for Different Scaling Options', fontsize=20) - fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) - ax = fig.add_subplot() - - bardata = [] - for label in EFF_PRI_SCALING_TEST_LABELS: - priority_data = plotutils.get_data_from_opensearch(f'1m0-optimize-{label}')['percent_duration_by_priority'] - bardata.append(list(priority_data[0].values())) - - priorities = ['low priority', 'mid priority', 'high priority'] - plotutils.plot_barplot(ax, bardata, EFF_PRI_SCALING_TEST_LABELS, priorities) - ax.set_xlabel('Priority') - ax.set_ylabel('Percent of Requested Time Scheduled') - ax.set_ylim(0, 100) - ax.legend() - if not displayonly: - plotutils.export_to_image(f'1m0_pct_time_scheduled_scaling_{timestamp}', fig) - plt.show() - - -def plot_pct_count_scaling_prio_bins(): - fig = plt.figure(figsize=(20, 10)) - fig.suptitle('1m0 Percent of Requests Scheduled by Priority Class for Different Scaling Options', fontsize=20) - fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) - ax = fig.add_subplot() - - bardata = [] - for label in EFF_PRI_SCALING_TEST_LABELS: - priority_data = plotutils.get_data_from_opensearch(f'1m0-optimize-{label}')['percent_sched_by_priority'] - bardata.append(list(priority_data[0].values())) - - priorities = ['low', 'medium', 'high'] - plotutils.plot_barplot(ax, bardata, EFF_PRI_SCALING_TEST_LABELS, priorities) - ax.set_xlabel('Priority') - ax.set_ylabel('Percent of Requests Scheduled') - ax.set_ylim(0, 100) - ax.legend() - if not displayonly: - plotutils.export_to_image(f'1m0_pct_count_scheduled_scaling_{timestamp}', fig) - plt.show() - - -def plot_pct_total_sched_prio_bins(): - fig = plt.figure(figsize=(20, 10)) - fig.suptitle('1m0 Percent of Requests Scheduled out of All Requests by Priority Class', fontsize=20) - fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) - ax = fig.add_subplot() - - bardata = [] - for label in EFF_PRI_SCALING_TEST_LABELS: - scheduled_count = plotutils.get_data_from_opensearch(f'1m0-optimize-{label}')['scheduled_req_by_priority'][0] - total_count = plotutils.get_data_from_opensearch(f'1m0-optimize-{label}')['total_request_count'] - scheduled_count = {bin_name: 100*np.array(values)/total_count for bin_name, values in scheduled_count.items()} - bardata.append(scheduled_count.values()) - - priorities = ['low', 'medium', 'high'] - plotutils.plot_barplot(ax, bardata, EFF_PRI_SCALING_TEST_LABELS, priorities) - ax.set_xlabel('Priority') - ax.set_ylabel('Percent of Requests Scheduled out of All Requests') - ax.set_ylim(0, 100) - ax.legend() - if not displayonly: - plotutils.export_to_image(f'1m0_pct_count_total_scaling_{timestamp}', fig) - plt.show() - - -def plot_pct_total_prio_bins(): - fig = plt.figure(figsize=(20, 10)) - fig.suptitle('1m0 Percent of Requests by Priority Class (both scheduled and unscheduled)', fontsize=20) - fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) - ax = fig.add_subplot() - - bardata = [] - scheduled_count = plotutils.get_data_from_opensearch('1m0-optimize-airmass')['total_req_by_priority'][0] - total_count = plotutils.get_data_from_opensearch('1m0-optimize-airmass')['total_request_count'] - scheduled_count = {bin_name: 100*np.array(values)/total_count for bin_name, values in scheduled_count.items()} - bardata.append(scheduled_count.values()) - - priorities = ['low', 'medium', 'high'] - plotutils.plot_barplot(ax, bardata, EFF_PRI_SCALING_TEST_LABELS, priorities) - ax.set_xlabel('Priority') - ax.set_ylabel('Percent of Requests out of All Requests') - ax.set_ylim(0, 100) - ax.legend() - if not displayonly: - plotutils.export_to_image(f'1m0_pct_count_total_scaling_{timestamp}', fig) - plt.show() - - -def plot_duration_histogram(): - fig = plt.figure(figsize=(20, 10)) - fig.suptitle('1m0 Distribution of Scheduled Request Durations with/without Duration Scaling', fontsize=20) - fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) - ax = fig.add_subplot() - with_duration_data = opensearch_client.get('scheduler-simulations', - '1m0-optimize-airmass-with-duration_2022-07-21T21:48:02.586407') - no_duration_data = opensearch_client.get('scheduler-simulations', - '1m0-optimize-airmass-no-duration_2022-07-21T21:52:46.316207') - duration_data = [np.array(with_duration_data['_source']['raw_scheduled_durations'])/60] - duration_data.append(np.array(no_duration_data['_source']['raw_scheduled_durations'])/60) - labels = ['With Duration', 'No Duration'] - ax.hist(duration_data, bins=50, label=labels) - ax.set_xlabel('Duration [min]') - ax.set_ylabel('Counts') - ax.set_title('Optimize by Airmass') - ax.legend() - if not displayonly: - plotutils.export_to_image(f'1m0_duration_hist_{timestamp}', fig) - plt.show() - - -def plot_eff_prio_duration_scatter(): - fig, axs = plt.subplots(1, 2, figsize=(24, 8)) - fig.suptitle('1m0 Scatterplot of Effective Priority and Duration', fontsize=20) - fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) - tagnames = ['with-duration-v2', 'with-duration-scaled-100-v2'] - labels = ['Priority 10-30', 'Priority 10-100'] - colors = [('#006BA4', '#5F9ED1'), ('#C85200', '#FF800E')] - for i, ax in enumerate(axs): - data = plotutils.get_data_from_opensearch(f'1m0-optimize-airmass-{tagnames[i]}') - prio_scheduled = np.array(data['raw_scheduled_priorities']) - prio_unscheduled = np.array(data['raw_unscheduled_priorities']) - dur_scheduled = np.array(data['raw_scheduled_durations'])/60 - dur_unscheduled = np.array(data['raw_unscheduled_durations'])/60 - ax.scatter(dur_scheduled, prio_scheduled*dur_scheduled, - label=f'{labels[i]}, scheduled', marker='x', color=colors[i][0]) - ax.scatter(dur_unscheduled, prio_unscheduled*dur_unscheduled, - label=f'{labels[i]}, unscheduled', marker='x', alpha=0.5, color=colors[i][1]) - ax.set_ylabel('Effective Priority (base priority x duration)') - ax.set_xlabel('Duration [min]') - ax.set_title(f'Optimize by Airmass, With Duration, {labels[i]}') - ax.legend() - if not displayonly: - plotutils.export_to_image(f'1m0_eff_prio_duration_scatter_{timestamp}', fig) - plt.show() - - -def plot_pct_sched_bin_eff_prio(): - fig, axs = plt.subplots(2, 2, figsize=(20, 12)) - fig.suptitle('1m0 Eff. Priority Distribution', fontsize=20) - fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) - test_names = ['with', 'no'] - for k, test_name in enumerate(test_names): - data = plotutils.get_data_from_opensearch(f'1m0-optimize-airmass-{test_name}-duration-v3') - prio_scheduled = data['raw_scheduled_priorities'] - prio_unscheduled = data['raw_unscheduled_priorities'] - prio_all = np.array(prio_scheduled + prio_unscheduled) - dur_scheduled = data['raw_scheduled_durations'] - dur_unscheduled = data['raw_unscheduled_durations'] - dur_all = np.array(dur_scheduled + dur_unscheduled)/60 - dur_scheduled = np.array(dur_scheduled)/60 - dur_unscheduled = np.array(dur_unscheduled)/60 - eff_prio_scheduled = prio_scheduled*dur_scheduled - eff_prio_unscheduled = prio_unscheduled*dur_unscheduled - eff_prio_all = prio_all*dur_all - bin_size = 100 - bin_range = (0, 4000) - level1_sched_bin = bin_data(eff_prio_scheduled, list(prio_scheduled), - bin_size=bin_size, bin_range=bin_range, aggregator=None) - level2_sched_bin = [list(bin_data(bin_values, bin_size=10, bin_range=(10, 30)).values()) - for bin_values in level1_sched_bin.values()] - level1_unsched_bin = bin_data(eff_prio_unscheduled, list(prio_unscheduled), - bin_size=bin_size, bin_range=bin_range, aggregator=None) - level2_unsched_bin = [list(bin_data(bin_values, bin_size=10, bin_range=(10, 30), fill=[]).values()) - for bin_values in level1_unsched_bin.values()] - all_bin = bin_data(eff_prio_all, bin_size=bin_size, bin_range=bin_range) - all_bin_array = np.array(list(all_bin.values())) - sched_bin_array = np.array([np.array(vals) for vals in level2_sched_bin]) - pct_bin_array = 100*sched_bin_array/all_bin_array[:, np.newaxis] - pct_bin_array = np.nan_to_num(pct_bin_array) - xaxis = np.arange(0, 4001, 50) - priority_labels = ['10-19', '20-29', '30'] - colors = ['#006BA4', '#FF800E', '#ABABAB'] - barwidth = 14 - for i, bin_ in enumerate(level2_sched_bin): - for j, label in enumerate(priority_labels): - axs[0][k].bar(xaxis[i]+j*barwidth, bin_[j], barwidth, label=label, color=colors[j]) - for i, bin_ in enumerate(level2_unsched_bin): - for j, label in enumerate(priority_labels): - axs[1][k].bar(xaxis[i]+j*barwidth, bin_[j], barwidth, label=label, color=colors[j]) - - handles, labels = axs[0][0].get_legend_handles_labels() - by_label = dict(zip(labels, handles)) - axs[0][k].set_title(f'Scheduled Requests ({test_name} duration)') - axs[1][k].set_title(f'Unscheduled Requests ({test_name} duration)') - for ax in axs[k]: - ax.legend(by_label.values(), by_label.keys(), title='Priority') - ax.set_xlabel('Effective Priority (base priority x duration [min])') - ax.set_ylabel('Number of Requests') - ax.set_ylim(0, 240) - plt.show() - - -def plot_percent_sched_requests_bin_by_priority(): - fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(25, 12)) - fig.suptitle(f'1m0 Network Scheduler Metrics Binned by Priority', fontsize=20) - fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) - bardata1 = [] - for id in EFF_PRI_SCALING_TEST_LABELS: - priority_data = plotutils.get_data_from_opensearch(f'1m0-optimize-{id}')['percent_duration_by_priority'] - bardata1.append(list(priority_data[0].values())) - priorities = ['low priority', 'mid priority', 'high priority'] - plotutils.plot_barplot(ax1, bardata1, EFF_PRI_SCALING_TEST_LABELS, priorities) - ax1.set_xlabel('Priority') - ax1.set_ylabel('Percent Scheduled Time') - ax1.set_title('Percent Duration Scheduled') - ax1.legend() - bardata2 = [] - for id in EFF_PRI_SCALING_TEST_LABELS: - priority_data = plotutils.get_data_from_opensearch(f'1m0-optimize-{id}')['percent_sched_by_priority'] - bardata2.append(list(priority_data[0].values())) - priorities = ['low priority', 'mid priority', 'high priority'] - plotutils.plot_barplot(ax2, bardata2, EFF_PRI_SCALING_TEST_LABELS, priorities) - ax2.set_xlabel('Priority') - ax2.set_ylabel('Percent Scheduled Count') - ax2.set_title('Percent Number Scheduled') - ax2.legend() - plt.show() - - -VARIABLE = [ 'with-duration-v3', - 'no-duration-v3', - 'with-duration-scaled-100-v3', - 'no-duration-scaled-100-v3',] - -markers = ["o" , "," ,"v" , "^" , "<", ">"] -colors = ['r','b','c','m', 'y', 'k'] -def rand_jitter(arr): - stdev = .01 * (max(arr) - min(arr)) - return arr + np.random.randn(len(arr)) * stdev - -def plot_sched_priority_duration_dotplot(): - fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(28, 12)) - fig.suptitle(f'1m0 Distribution of Priority and Duration With Airmass Optimization', fontsize=20) - fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) - for i, id in enumerate(VARIABLE): - data = plotutils.get_data_from_opensearch(f'1m0-optimize-airmass-{id}') - if id in ['with-duration-scaled-100-v3', 'no-duration-scaled-100-v3']: - data['raw_scheduled_priorities'] = [(p+35)/4.5 for p in data['raw_scheduled_priorities']] - # print(id, len(data['raw_scheduled_priorities']), len(data['raw_unscheduled_priorities'])) - ax1.scatter(rand_jitter(data['raw_scheduled_priorities']), rand_jitter(data['raw_scheduled_durations']), - marker = markers[i],c = colors[i], s = 10, label = f'scheduled requests {id}',alpha = 0.3) - - - ax1.set_ylim(top=11000) - ax1.set_xlabel('Priority') - ax1.set_ylabel('Request Duration') - ax1.legend() - for i, id in enumerate(VARIABLE): - data = plotutils.get_data_from_opensearch(f'1m0-optimize-airmass-{id}') - if id in ['with-duration-scaled-100-v3', 'no-duration-scaled-100-v3']: - data['raw_unscheduled_priorities'] = [(p+35)/4.5 for p in data['raw_unscheduled_priorities']] - ax2.scatter(rand_jitter(data['raw_unscheduled_priorities']), rand_jitter(data['raw_unscheduled_durations']), - c =colors[i], marker=markers[i],s=10, label = f'unscheduled requests {id}', alpha = 0.3) - ax2.set_ylim(top=11000) - ax2.set_xlabel('Priority') - ax2.set_ylabel('Request Duration') - ax2.legend() - plt.show(block = False) - plt.show() - - -def plot_heat_map_priority_duration(): - fig, axs= plt.subplots(2, 2, figsize=(13, 12)) - fig.suptitle(f'1m0 Network Requests Heatmap With Airmass Optimization', fontsize=20) - fig.subplots_adjust(wspace=0.01, hspace=0.01, top=0.9) - ax_list = [axs[0,0],axs[0,1],axs[1,0], axs[1,1]] - for i, id in enumerate(VARIABLE): - data = plotutils.get_data_from_opensearch(f'1m0-optimize-airmass-{id}') - if id in ['with-duration-scaled-100-v3', 'no-duration-scaled-100-v3']: - data['raw_scheduled_priorities'] = [(p+35)/4.5 for p in data['raw_scheduled_priorities']] - data['raw_unscheduled_priorities'] = [(p+35)/4.5 for p in data['raw_unscheduled_priorities']] - sched_priorities = data['raw_scheduled_priorities'] - sched_durations = data['raw_scheduled_durations'] - unsched_priorities = data['raw_unscheduled_priorities'] - unsched_durations = data['raw_unscheduled_durations'] - level_1_bins = bin_data(sched_priorities, sched_durations, bin_size=4, bin_range=(10,30),aggregator=None) - level_2_bins = { - bin_key: bin_data(bin_values, bin_size=300, bin_range=(0, 1499)) | bin_data(bin_values, bin_size=3000, bin_range=(1500, 4000)) - for bin_key, bin_values in level_1_bins.items() - } - level_1_bins_unsched = bin_data(unsched_priorities, unsched_durations, bin_size=4, bin_range=(10,30),aggregator=None) - level_2_bins_unsched = { - bin_key: bin_data(bin_values, bin_size=300, bin_range=(0, 1499)) | bin_data(bin_values, bin_size=3000, bin_range=(1500, 4000)) - for bin_key, bin_values in level_1_bins_unsched.items() - } - heat_map_elements = [] - heat_map_elements_unsched = [] - for values in level_2_bins.values(): - heat_map_elements.append(list(values.values())) - for values in level_2_bins_unsched.values(): - heat_map_elements_unsched.append(list(values.values())) - priority_bins = list(level_2_bins.keys()) - duration_bins = ['0-5','5-10','10-15', '15-20', '20-25', '25&up'] - heat_map_elements = np.array(heat_map_elements) - heat_map_elements_unsched = np.array(heat_map_elements_unsched) - axis = ax_list[i] - cmap=plt.get_cmap('coolwarm') - cmap2 = plt.get_cmap('gray') - heatplot = axis.imshow(heat_map_elements,cmap=cmap) - axis.set_ylabel('Priority') - axis.set_xlabel('Duration (minutes)') - axis.set_xticks(np.arange(len(duration_bins)), labels=duration_bins) - axis.set_yticks(np.arange(len(priority_bins)), labels=priority_bins) - plt.setp(axis.get_xticklabels(), rotation=45, ha="right", - rotation_mode="anchor") - for i in range(len(priority_bins)): - for j in range(len(duration_bins)): - value = heat_map_elements[i, j] - text1 = axis.text(j, i, f'{heat_map_elements[i, j]}|{ heat_map_elements_unsched[i, j]}', - ha="center", va="center", fontsize='large', fontweight='semibold', color=cmap2(0.001/value)) - axis.set_title(f'{id} (sched|unsched)', fontweight='semibold') - fig.tight_layout() - plt.show() - - -if __name__ == '__main__': - plots = { - 'normed_airmass_hist': {'func': plot_normed_airmass_histogram, - 'desc': 'Airmass distribution, normalized so that 0 is worst airmass and 1 is best'}, - 'midpoint_airmass_hist': {'func': plot_midpoint_airmass_histogram, - 'desc': 'Midpoint airmass distributions for different airmass weighting coefficients'}, - 'pct_sched_airmass_bin_priority': {'func': plot_pct_count_airmass_prio_bins, - 'desc': 'Percent of requests scheduled binned by priority level' - 'for different airmass coefficients'}, - 'pct_time_airmass_bin_priority': {'func': plot_pct_time_airmass_prio_bins, - 'desc': 'Percent of time requested scheduled binned by priority level' - ' for different airmass coefficients'}, - 'pct_sched_scaling_bin_priority': {'func': plot_pct_count_scaling_prio_bins, - 'desc': 'Percent of requests scheduled binned by priority level' - ' for different scaling strategies'}, - 'pct_time_scaling_bin_priority': {'func': plot_pct_time_scaling_prio_bins, - 'desc': 'Percent of time requested scheduled binned by priority level' - ' for different scaling strategies'}, - 'pct_total_sched_scaling_bin_priority': {'func': plot_pct_total_sched_prio_bins, - 'desc': 'Percent of requests scheduled with respect to all requests, ' - 'binned by priority level for different scaling strategies'}, - 'pct_total_scaling_bin_priority': {'func': plot_pct_total_prio_bins, - 'desc': 'The percent of requests occupied at each priority level'}, - 'duration_hist': {'func': plot_duration_histogram, - 'desc': 'Scheduled request duration distribution.'}, - 'eff_prio_duration_scatter': {'func': plot_eff_prio_duration_scatter, - 'desc': 'Scatterplot with (prio x duration) on y-axis and duration on x-axis'}, - 'pct_sched_eff_prio_hist': {'func': plot_pct_sched_bin_eff_prio, - 'desc': 'Histogram with (prio x duration) on x-axis and percentage scheduled by bin'}, - 'pct_sched_requests_bin_priority': {'func': plot_percent_sched_requests_bin_by_priority, - 'desc': 'Percent duration of requests scheduled binned by priority level for different ' - 'effective priority algorithms'}, - 'priority_duration_dotplot': {'func': plot_sched_priority_duration_dotplot, - 'desc': 'Dotplots of distribution of scheduled/unscheduled requests with (priority) on x-axis ' - 'and (duration) on y-axis'}, - 'heat_map_priority_duration': {'func': plot_heat_map_priority_duration, - 'desc': 'Heat map showing distribution of requests on priority and duration'}, - } - - description = 'Plotting functions for airmass optimization experiment.' - parser = argparse.ArgumentParser(description=description) - parser.add_argument('plot_name', type=str.lower, nargs='*', - help="The name of the plot(s) to display. `all` can be passed to show all.") - parser.add_argument('-l', '--list', help='Show plot info. `-l all` to show all available plots.', action='store_true') - parser.add_argument('-d', '--displayonly', help='Display the plots without exporting them.', action='store_true') - args = parser.parse_args() - global displayonly - displayonly = args.displayonly - plotnames = list(plots.keys()) - - plots_to_show = plotnames if args.plot_name == ['all'] else args.plot_name - if args.list: - spacing = max([len(name) for name in plots.keys()]) + 10 - print(f'{"NAME":{spacing}}DESCRIPTION') - print(f'{"====":{spacing}}===========') - for name in plots_to_show: - details = plots[name] - print(f'{name:{spacing}}{details["desc"]}') - else: - plots_to_show = list(plots.keys()) if args.plot_name == ['all'] else args.plot_name - for plot_name in plots_to_show: - plots[plot_name]['func']() diff --git a/adaptive_scheduler/simulation/plot_effective_priority.py b/adaptive_scheduler/simulation/plot_effective_priority.py deleted file mode 100644 index e377e3cd..00000000 --- a/adaptive_scheduler/simulation/plot_effective_priority.py +++ /dev/null @@ -1,50 +0,0 @@ -import numpy as np -import matplotlib.pyplot as plt -from opensearchpy import OpenSearch -from plotutils import get_data_from_opensearch, plot_barplot - -EFF_PRI_CALC= ['airmass','airmass-with-duration-scaled-100','airmass-no-duration','airmass-no-duration-scaled-100',] - - -def plot_percent_sched_duration_bin_by_priority(): - fig = plt.figure(figsize=(20, 10)) - fig.suptitle(f'1m0 Network Percent Request Time Scheduled binned by Priority', fontsize=20) - fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) - ax = fig.add_subplot() - bardata = [] - for id in EFF_PRI_CALC: - priority_data = get_data_from_opensearch(f'1m0-optimize-{id}')['percent_duration_by_priority'] - - bardata.append(list(priority_data[0].values())) - - priorities = ['low priority', 'mid priority', 'high priority'] - plot_barplot(ax, bardata, EFF_PRI_CALC, priorities) - ax.set_xlabel('Priority') - ax.set_ylabel('Percent Scheduled Time') - fig.legend() - plt.show() - - -def plot_percent_sched_numbers_bin_by_priority(): - fig = plt.figure(figsize=(20, 10)) - fig.suptitle(f'1m0 Network Percent Request Number Scheduled binned by Priority', fontsize=20) - fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) - ax = fig.add_subplot() - bardata = [] - for id in EFF_PRI_CALC: - priority_data = get_data_from_opensearch(f'1m0-optimize-{id}')['percent_sched_by_priority'] - - bardata.append(list(priority_data[0].values())) - - priorities = ['low priority', 'mid priority', 'high priority'] - plot_barplot(ax, bardata, EFF_PRI_CALC, priorities) - ax.set_xlabel('Priority') - ax.set_ylabel('Percent Scheduled Count') - fig.legend() - plt.show() - - -if __name__ == '__main__': - - plot_percent_sched_duration_bin_by_priority() - plot_percent_sched_numbers_bin_by_priority() \ No newline at end of file diff --git a/adaptive_scheduler/simulation/plot_priority_duration.py b/adaptive_scheduler/simulation/plot_priority_duration.py deleted file mode 100644 index 8212f9fc..00000000 --- a/adaptive_scheduler/simulation/plot_priority_duration.py +++ /dev/null @@ -1,47 +0,0 @@ -from xml.dom.pulldom import default_bufsize -import numpy as np -import matplotlib.pyplot as plt -from opensearchpy import OpenSearch -from plotutils import get_data_from_opensearch -VARIABLE = [ - #'with-duration-v3', - 'no-duration-v3', - #'with-duration-scaled-100-v3', - 'no-duration-scaled-100-v3',] - -markers = ["o" , "," ,"v" , "^" , "<", ">"] -colors = ['r','b','c','m', 'y', 'k'] -def rand_jitter(arr): - stdev = .01 * (max(arr) - min(arr)) - return arr + np.random.randn(len(arr)) * stdev - -def plot_sched_priority_duration_dotplot(): - fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(28, 12)) - fig.suptitle(f'1m0 Distribution of Priority and Duration With Airmass Optimization', fontsize=20) - fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) - for i, id in enumerate(VARIABLE): - data = get_data_from_opensearch(f'1m0-optimize-airmass-{id}') - if id in ['with-duration-scaled-100-v3', 'no-duration-scaled-100-v3']: - data['raw_scheduled_priorities'] = [(p+35)/4.5 for p in data['raw_scheduled_priorities']] - print(id, len(data['raw_scheduled_priorities']), len(data['raw_unscheduled_priorities'])) - ax1.scatter(rand_jitter(data['raw_scheduled_priorities']), rand_jitter(data['raw_scheduled_durations']), - marker = markers[i],c = colors[i], s = 10, label = f'scheduled requests {id}',alpha = 0.3) - ax1.set_ylim(top=11000) - ax1.set_xlabel('Priority') - ax1.set_ylabel('Request Duration') - ax1.legend() - for i, id in enumerate(VARIABLE): - data = get_data_from_opensearch(f'1m0-optimize-airmass-{id}') - if id in ['no-duration-scaled-100-v3', 'with-duration-scaled-100-v3']: - data['raw_unscheduled_priorities'] = [(p+35)/4.5 for p in data['raw_unscheduled_priorities']] - ax2.scatter(rand_jitter(data['raw_unscheduled_priorities']), rand_jitter(data['raw_unscheduled_durations']), - c =colors[i], marker=markers[i],s=10, label = f'unscheduled requests {id}', alpha = 0.3) - ax2.set_ylim(top=11000) - ax2.set_xlabel('Priority') - ax2.set_ylabel('Request Duration') - ax2.legend() - plt.show(block = False) - plt.show() - -if __name__ == '__main__': - plot_sched_priority_duration_dotplot() \ No newline at end of file From 40a0ea4aab345578d1467cdbf6de1704efffca8e Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Thu, 4 Aug 2022 13:33:37 -0700 Subject: [PATCH 139/165] removed old plotting file --- .../plot_heat_map_priority_duration.py | 119 ------------------ 1 file changed, 119 deletions(-) delete mode 100644 adaptive_scheduler/simulation/plot_heat_map_priority_duration.py diff --git a/adaptive_scheduler/simulation/plot_heat_map_priority_duration.py b/adaptive_scheduler/simulation/plot_heat_map_priority_duration.py deleted file mode 100644 index 92a010ec..00000000 --- a/adaptive_scheduler/simulation/plot_heat_map_priority_duration.py +++ /dev/null @@ -1,119 +0,0 @@ -import sched -from xml.dom.pulldom import default_bufsize -import numpy as np -import matplotlib.pyplot as plt -from opensearchpy import OpenSearch -from adaptive_scheduler.simulation.plotutils import get_data_from_opensearch -from adaptive_scheduler.simulation.metrics import bin_data -import seaborn as sns -from colorspacious import cspace_converter -VARIABLE = [ 'with-duration-v3', - 'no-duration-v3', - 'with-duration-scaled-100-v3', - 'no-duration-scaled-100-v3',] - -markers = ["o" , "," ,"v" , "^" , "<", ">"] -colors = ['r','b','c','m', 'y', 'k'] -def rand_jitter(arr): - stdev = .01 * (max(arr) - min(arr)) - return arr + np.random.randn(len(arr)) * stdev - -def plot_sched_priority_duration_dotplot(): - fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(28, 12)) - fig.suptitle(f'1m0 Distribution of Priority and Duration With Airmass Optimization', fontsize=20) - fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) - for i, id in enumerate(VARIABLE): - data = get_data_from_opensearch(f'1m0-optimize-airmass-{id}') - if id in ['with-duration-scaled-100-v3', 'no-duration-scaled-100-v3']: - data['raw_scheduled_priorities'] = [(p+35)/4.5 for p in data['raw_scheduled_priorities']] - # print(id, len(data['raw_scheduled_priorities']), len(data['raw_unscheduled_priorities'])) - # ax1.scatter(rand_jitter(data['raw_scheduled_priorities']), rand_jitter(data['raw_scheduled_durations']), - # marker = markers[i],c = colors[i], s = 10, label = f'scheduled requests {id}',alpha = 0.3) - - - ax1.set_ylim(top=11000) - ax1.set_xlabel('Priority') - ax1.set_ylabel('Request Duration') - ax1.legend() - for i, id in enumerate(VARIABLE): - data = get_data_from_opensearch(f'1m0-optimize-airmass-{id}') - if id in ['with-duration-scaled-100-v3', 'no-duration-scaled-100-v3']: - data['raw_unscheduled_priorities'] = [(p+35)/4.5 for p in data['raw_unscheduled_priorities']] - # ax2.scatter(rand_jitter(data['raw_unscheduled_priorities']), rand_jitter(data['raw_unscheduled_durations']), - # c =colors[i], marker=markers[i],s=10, label = f'unscheduled requests {id}', alpha = 0.3) - ax2.set_ylim(top=11000) - ax2.set_xlabel('Priority') - ax2.set_ylabel('Request Duration') - ax2.legend() - plt.show(block = False) - plt.show() - - -def plot_heat_map_priority_duration(): - fig, axs= plt.subplots(2, 2, figsize=(13, 12)) - fig.suptitle(f'1m0 Network Requests Heatmap With Airmass Optimization', fontsize=20) - fig.subplots_adjust(wspace=0.01, hspace=0.01, top=0.9) - ax_list = [axs[0,0],axs[0,1],axs[1,0], axs[1,1]] - for i, id in enumerate(VARIABLE): - data = get_data_from_opensearch(f'1m0-optimize-airmass-{id}') - if id in ['with-duration-scaled-100-v3', 'no-duration-scaled-100-v3']: - data['raw_scheduled_priorities'] = [(p+35)/4.5 for p in data['raw_scheduled_priorities']] - data['raw_unscheduled_priorities'] = [(p+35)/4.5 for p in data['raw_unscheduled_priorities']] - sched_priorities = data['raw_scheduled_priorities'] - sched_durations = data['raw_scheduled_durations'] - unsched_priorities = data['raw_unscheduled_priorities'] - unsched_durations = data['raw_unscheduled_durations'] - level_1_bins = bin_data(sched_priorities, sched_durations, bin_size=4, bin_range=(10,30),aggregator=None) - level_2_bins = { - bin_key: bin_data(bin_values, bin_size=300, bin_range=(0, 1499)) | bin_data(bin_values, bin_size=3000, bin_range=(1500, 4000)) - for bin_key, bin_values in level_1_bins.items() - } - print(level_2_bins) - level_1_bins_unsched = bin_data(unsched_priorities, unsched_durations, bin_size=4, bin_range=(10,30),aggregator=None) - level_2_bins_unsched = { - bin_key: bin_data(bin_values, bin_size=300, bin_range=(0, 1499)) | bin_data(bin_values, bin_size=3000, bin_range=(1500, 4000)) - for bin_key, bin_values in level_1_bins_unsched.items() - } - heat_map_elements = [] - heat_map_elements_unsched = [] - for values in level_2_bins.values(): - # new_value= np.sum(list(values.values())[-5:]) - # temp_list = ['3000-3249', '3250-3499', '3500-3749', '3750-3999', '4000'] - # for key in temp_list: - # del values[key] - # values['3000&above'] = new_value - heat_map_elements.append(list(values.values())) - for values in level_2_bins_unsched.values(): - # new_value= np.sum(list(values.values())[-5:]) - # temp_list = ['3000-3249', '3250-3499', '3500-3749', '3750-3999', '4000'] - # for key in temp_list: - # del values[key] - # values['3000&above'] = new_value - heat_map_elements_unsched.append(list(values.values())) - priority_bins = list(level_2_bins.keys()) - duration_bins = ['0-5','5-10','10-15', '15-20', '20-25', '25&up'] - heat_map_elements = np.array(heat_map_elements) - heat_map_elements_unsched = np.array(heat_map_elements_unsched) - - axis = ax_list[i] - cmap=plt.get_cmap('coolwarm') - cmap2 = plt.get_cmap('gray') - heatplot = axis.imshow(heat_map_elements,cmap=cmap) - axis.set_ylabel('Priority') - axis.set_xlabel('Duration (minutes)') - axis.set_xticks(np.arange(len(duration_bins)), labels=duration_bins) - axis.set_yticks(np.arange(len(priority_bins)), labels=priority_bins) - plt.setp(axis.get_xticklabels(), rotation=45, ha="right", - rotation_mode="anchor") - for i in range(len(priority_bins)): - for j in range(len(duration_bins)): - value = heat_map_elements[i, j] - text1 = axis.text(j, i, f'{heat_map_elements[i, j]}|{ heat_map_elements_unsched[i, j]}', - ha="center", va="center", fontsize='large', fontweight='semibold', color=cmap2(0.001/value)) - axis.set_title(f'{id} (sched|unsched)', fontweight='semibold') - fig.tight_layout() - plt.show() - - -if __name__ == '__main__': - plot_heat_map_priority_duration() \ No newline at end of file From 7223030da0f4045e64a51c37f860f68740876dc7 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Thu, 4 Aug 2022 16:38:31 -0700 Subject: [PATCH 140/165] fixed some line plot things --- adaptive_scheduler/simulation/plotfuncs.py | 32 ++++++++++++++++++---- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/adaptive_scheduler/simulation/plotfuncs.py b/adaptive_scheduler/simulation/plotfuncs.py index 8fea4445..c8058f9f 100644 --- a/adaptive_scheduler/simulation/plotfuncs.py +++ b/adaptive_scheduler/simulation/plotfuncs.py @@ -288,7 +288,11 @@ def plot_pct_scheduled_airmass_lineplot(airmass_datasets, plot_title): pct_scheduled.append(list(data_by_priority.values())) data_by_airmass = np.array(pct_scheduled).transpose() for i, data in enumerate(data_by_airmass): - ax.plot(airmass_coeffs, data, label=prio_names[i]) + ax.plot(airmass_coeffs, data, marker='.', ms=8, label=prio_names[i]) + for j, k in zip(airmass_coeffs, data): + annotation = f'{k:.2f}%' + ax.annotate(annotation, xy=(j, k), xytext=(-15, 10), textcoords='offset points') + ax.set_xlabel('Airmass Coefficient') ax.set_ylabel('Percent of Requests Scheduled') ax.set_ylim(0, 100) @@ -326,7 +330,19 @@ def plot_pct_time_scheduled_airmass_lineplot(airmass_datasets, plot_title): pct_scheduled.append(np.append(pct_by_priority, pct_cumulative)) data_by_airmass = np.array(pct_scheduled).transpose() for i, data in enumerate(data_by_airmass): - ax.plot(airmass_coeffs, data, label=prio_names[i]) + ax.plot(airmass_coeffs, data, marker='.', ms=8, label=prio_names[i]) + for j, k in zip(airmass_coeffs, data): + annotation = f'{k:.2f}%' + # manually fix an overlapping annotation, THIS IS SPECIFIC TO A CERTAIN DATASET + # specifically, some of the points at the 6th airmass coeff are overlapping + # if this happens a lot, look into offsetting text automatically + if i == 3 and j == airmass_coeffs[6]: + ax.annotate(annotation, xy=(j, k), xytext=(-15, 6), c='#595959', textcoords='offset points') + elif i == 1 and j == airmass_coeffs[6]: + ax.annotate(annotation, xy=(j, k), xytext=(-15, -13), c='#FF800E', textcoords='offset points') + else: + ax.annotate(annotation, xy=(j, k), xytext=(-15, -13), textcoords='offset points') + ax.set_xlabel('Airmass Coefficient') ax.set_ylabel('Percent of Requested Time Scheduled') ax.set_ylim(0, 100) @@ -365,7 +381,10 @@ def plot_pct_time_scheduled_out_of_available(airmass_datasets, plot_title): pct_scheduled.append(np.append(pct_by_priority, pct_cumulative)) data_by_airmass = np.array(pct_scheduled).transpose() for i, data in enumerate(data_by_airmass): - ax.plot(airmass_coeffs, data, label=prio_names[i]) + ax.plot(airmass_coeffs, data, marker='.', ms=8, label=prio_names[i]) + for j, k in zip(airmass_coeffs, data): + annotation = f'{k:.2f}%' + ax.annotate(annotation, xy=(j, k), xytext=(5, 5), textcoords='offset points') ax.set_xlabel('Airmass Coefficient') ax.set_ylabel('Percent of Requested Time Scheduled') ax.set_ylim(0, 100) @@ -388,12 +407,15 @@ def plot_midpoint_airmass_histograms(airmass_datasets, plot_title): fig = plt.figure(figsize=(16, 16)) fig.suptitle(plot_title) fig.subplots_adjust(wspace=0.3, hspace=0.3, top=0.92) - for i, dataset in enumerate(airmass_datasets[1:]): + for i, dataset in enumerate(airmass_datasets): ax = fig.add_subplot(3, 3, i+1) midpoint_airmasses = dataset['airmass_metrics']['raw_airmass_data'][0]['midpoint_airmasses'] airmass_coeff = dataset['airmass_weighting_coefficient'] ax.hist(midpoint_airmasses, bins=50) - ax.set_title(f'Airmass Coefficient: {airmass_coeff}') + if i == 0: + ax.set_title('Optimize Earliest') + else: + ax.set_title(f'Airmass Coefficient: {airmass_coeff}') ax.set_xlabel('Midpoint Airmass') ax.set_ylabel('Count') ax.set_xlim(1.0, 2.0) From abe8f1ae8bcb3c37b89b93b1fc56fcc2452def9c Mon Sep 17 00:00:00 2001 From: Qingze Wu Date: Fri, 5 Aug 2022 10:50:46 -0700 Subject: [PATCH 141/165] minor changes --- adaptive_scheduler/simulation/plotfuncs.py | 15 +++++++++------ adaptive_scheduler/simulation/plots.py | 2 +- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/adaptive_scheduler/simulation/plotfuncs.py b/adaptive_scheduler/simulation/plotfuncs.py index 8fea4445..713bf984 100644 --- a/adaptive_scheduler/simulation/plotfuncs.py +++ b/adaptive_scheduler/simulation/plotfuncs.py @@ -16,8 +16,8 @@ style.use('tableau-colorblind10') matplotlib.rcParams['figure.figsize'] = (20, 10) matplotlib.rcParams['figure.titlesize'] = 20 -matplotlib.rcParams['axes.titlesize'] = 16 -matplotlib.rcParams['axes.labelsize'] = 14 +matplotlib.rcParams['axes.titlesize'] = 14 +matplotlib.rcParams['axes.labelsize'] = 12 matplotlib.rcParams['xtick.labelsize'] = 12 matplotlib.rcParams['ytick.labelsize'] = 12 matplotlib.rcParams['figure.subplot.wspace'] = 0.2 # horizontal spacing for subplots @@ -124,7 +124,7 @@ def plot_percent_sched_requests_bin_by_priority(eff_pri_datasets, plot_title): bardata2 = [] for dataset in eff_pri_datasets: bardata2.append(list(dataset['percent_sched_by_priority'][0].values())) - priorities = ['low priority', 'mid priority', 'high priority'] + priorities = ['low priority(10-19)', 'mid priority(20-29)', 'high priority(30)'] plotutils.plot_multi_barplot(ax2, bardata2, labels, priorities) ax2.set_xlabel('Priority') ax2.set_ylabel('Scheduled Requests/Total Requests (%)') @@ -207,7 +207,7 @@ def plot_heat_map_priority_duration(eff_pri_datasets, plot_title): heat_map_elements = np.array(heat_map_elements) heat_map_elements_unsched = np.array(heat_map_elements_unsched) axis = ax_list[i] - cmap=plt.get_cmap('coolwarm') + cmap = plt.get_cmap('coolwarm') cmap2 = plt.get_cmap('gray') heatplot = axis.imshow(heat_map_elements,cmap=cmap) axis.set_ylabel('Priority') @@ -221,7 +221,9 @@ def plot_heat_map_priority_duration(eff_pri_datasets, plot_title): value = heat_map_elements[j, k] text1 = axis.text(k, j, f'{heat_map_elements[j, k]}|{ heat_map_elements_unsched[j, k]}', ha="center", va="center", fontsize='large', fontweight='semibold', color=cmap2(0.001/value)) - axis.set_title(f'{labels[i]} (sched|unsched)', fontweight='semibold') + time_proportion = sum(data['scheduled_seconds_by_priority'][0].values()) / sum(data['total_seconds_by_priority'][0].values()) * 100 + percent_time_utilization = data['percent_time_utilization'] + axis.set_title(f'{labels[i]} ({percent_time_utilization:.1f}% time utilized)', fontweight='semibold') fig.tight_layout() return fig @@ -492,6 +494,7 @@ def plot_subplots_input_duration(dataset, plot_title): unsched_priorities = dataset[0]['raw_unscheduled_priorities'] sched_bins = metrics.bin_data(sched_priorities, sched_durations, bin_size=10, bin_range=(10,30),aggregator=None) unsched_bins = metrics.bin_data(unsched_priorities, unsched_durations, bin_size=10, bin_range=(10,30),aggregator=None) + totals_by_priorities = list(dataset[0]['total_req_by_priority'][0].values()) labels = ['10-19', '20-29', '30'] axis = [ax1, ax2, ax3] for i, values in enumerate(sched_bins.values()): @@ -503,6 +506,6 @@ def plot_subplots_input_duration(dataset, plot_title): axis[i].set_xlabel('Duration (Minutes)') axis[i].set_ylabel('Input reservation counts') axis[i].set_ylim(0, 300) - axis[i].set_title(f'{labels[i]} Priority binned by duration') + axis[i].set_title(f'{labels[i]} Priority ({totals_by_priorities[i]} requests)') axis[i].legend() return fig diff --git a/adaptive_scheduler/simulation/plots.py b/adaptive_scheduler/simulation/plots.py index a38e15c3..ffa768cf 100644 --- a/adaptive_scheduler/simulation/plots.py +++ b/adaptive_scheduler/simulation/plots.py @@ -68,7 +68,7 @@ '1m Distribution of Priority and Duration With Airmass Optimization', *effective_priority_experiment_ids), Plot(plotfuncs.plot_heat_map_priority_duration, - '1m Network Requests Heatmap With Airmass Optimization', + '1m Network Requests Heatmap With Airmass Optimization (sched|unsched)', *effective_priority_experiment_ids), ] From dac07b93cdc85b46f4416ebdf5eb4e3c0040c695 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Fri, 5 Aug 2022 11:49:23 -0700 Subject: [PATCH 142/165] fixed whitespace issues --- adaptive_scheduler/simulation/plotfuncs.py | 45 ++++++++++++---------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/adaptive_scheduler/simulation/plotfuncs.py b/adaptive_scheduler/simulation/plotfuncs.py index 5a6ceeca..058fc310 100644 --- a/adaptive_scheduler/simulation/plotfuncs.py +++ b/adaptive_scheduler/simulation/plotfuncs.py @@ -114,7 +114,7 @@ def plot_percent_sched_requests_bin_by_priority(eff_pri_datasets, plot_title): labels = ['with duration', 'no duration', 'with duration scaled 100', 'no duration scaled 100'] for dataset in eff_pri_datasets: bardata1.append(list(dataset['percent_duration_by_priority'][0].values())) - + priorities = ['low priority(10-19)', 'mid priority(20-29)', 'high priority(30)'] plotutils.plot_multi_barplot(ax1, bardata1, labels, priorities) ax1.set_xlabel('Priority') @@ -132,25 +132,26 @@ def plot_percent_sched_requests_bin_by_priority(eff_pri_datasets, plot_title): ax2.legend(title='Effective Priority Algorithms') return fig - + def plot_sched_priority_duration_dotplot(eff_pri_datasets, plot_title): def rand_jitter(arr): stdev = .01 * (max(arr) - min(arr)) return arr + np.random.randn(len(arr)) * stdev - - markers = ["o" , "," ,"v" , "^" , "<", ">"] - colors = ['r','b','c','m', 'y', 'k'] + + markers = ['o', ',', 'v', '^', '<', '>'] + colors = ['r', 'b', 'c', 'm', 'y', 'k'] fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(28, 12)) fig.suptitle(plot_title) fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) labels = ['with duration', 'no duration', 'with duration scaled 100', 'no duration scaled 100'] for i, data in enumerate(eff_pri_datasets): id = data['simulation_id'] - if id in ['airmass-0.1-w-duration-w-scaling','airmass-0.1-no-duration-w-scaling']: - data['raw_scheduled_priorities'] = [(p+35)/4.5 for p in data['raw_scheduled_priorities']] - data['raw_scheduled_durations'] = [d/60 for d in data['raw_scheduled_durations']] - ax1.scatter(rand_jitter(data['raw_scheduled_priorities']), rand_jitter(data['raw_scheduled_durations']), - marker = markers[i],c = colors[i], s=10, label=labels[i], alpha=0.3) + # un-scale the priorities + if id in ['airmass-0.1-w-duration-w-scaling', 'airmass-0.1-no-duration-w-scaling']: + data['raw_scheduled_priorities'] = [(p+35)/4.5 for p in data['raw_scheduled_priorities']] + data['raw_scheduled_durations'] = [d/60 for d in data['raw_scheduled_durations']] + ax1.scatter(rand_jitter(data['raw_scheduled_priorities']), rand_jitter(data['raw_scheduled_durations']), + marker=markers[i], c=colors[i], s=10, label=labels[i], alpha=0.3) ax1.set_ylim(top=100) ax1.set_xlabel('Priority') ax1.set_ylabel('Request Duration (minutes)') @@ -158,11 +159,12 @@ def rand_jitter(arr): ax1.legend(title='Effective Priority Algorithms') for i, data in enumerate(eff_pri_datasets): id = data['simulation_id'] + # un-scale the priorities if id in ['airmass-0.1-w-duration-w-scaling', 'airmass-0.1-no-duration-w-scaling']: data['raw_unscheduled_priorities'] = [(p+35)/4.5 for p in data['raw_unscheduled_priorities']] data['raw_unscheduled_durations'] = [d/60 for d in data['raw_unscheduled_durations']] ax2.scatter(rand_jitter(data['raw_unscheduled_priorities']), rand_jitter(data['raw_unscheduled_durations']), - c=colors[i], marker=markers[i],s=10, label=labels[i], alpha=0.3) + c=colors[i], marker=markers[i], s=10, label=labels[i], alpha=0.3) ax2.set_ylim(top=100) ax2.set_xlabel('Priority') ax2.set_ylabel('Request Duration (minutes)') @@ -172,10 +174,10 @@ def rand_jitter(arr): def plot_heat_map_priority_duration(eff_pri_datasets, plot_title): - fig, axs= plt.subplots(2, 2, figsize=(13, 12)) + fig, axs = plt.subplots(2, 2, figsize=(13, 12)) fig.suptitle(plot_title) fig.subplots_adjust(wspace=0.01, hspace=0.01, top=0.9) - ax_list = [axs[0,0],axs[0,1],axs[1,0], axs[1,1]] + ax_list = [axs[0, 0], axs[0, 1], axs[1, 0], axs[1, 1]] labels = ['with duration', 'no duration', 'with duration scaled 100', 'no duration scaled 100'] for i, data in enumerate(eff_pri_datasets): id = data['simulation_id'] @@ -186,16 +188,17 @@ def plot_heat_map_priority_duration(eff_pri_datasets, plot_title): sched_durations = data['raw_scheduled_durations'] unsched_priorities = data['raw_unscheduled_priorities'] unsched_durations = data['raw_unscheduled_durations'] - level_1_bins = bin_data(sched_priorities, sched_durations, bin_size=4, bin_range=(10,30),aggregator=None) + level_1_bins = bin_data(sched_priorities, sched_durations, bin_size=4, bin_range=(10, 30), aggregator=None) + # set the duration bins (in seconds) here level_2_bins = { bin_key: bin_data(bin_values, bin_size=300, bin_range=(0, 1499)) | bin_data(bin_values, bin_size=3000, bin_range=(1500, 4000)) for bin_key, bin_values in level_1_bins.items() - } + } level_1_bins_unsched = bin_data(unsched_priorities, unsched_durations, bin_size=4, bin_range=(10,30),aggregator=None) level_2_bins_unsched = { bin_key: bin_data(bin_values, bin_size=300, bin_range=(0, 1499)) | bin_data(bin_values, bin_size=3000, bin_range=(1500, 4000)) for bin_key, bin_values in level_1_bins_unsched.items() - } + } heat_map_elements = [] heat_map_elements_unsched = [] for values in level_2_bins.values(): @@ -203,25 +206,25 @@ def plot_heat_map_priority_duration(eff_pri_datasets, plot_title): for values in level_2_bins_unsched.values(): heat_map_elements_unsched.append(list(values.values())) priority_bins = list(level_2_bins.keys()) - duration_bins = ['0-5','5-10','10-15', '15-20', '20-25', '25&up'] + duration_bins = ['0-5', '5-10', '10-15', '15-20', '20-25', '25&up'] heat_map_elements = np.array(heat_map_elements) heat_map_elements_unsched = np.array(heat_map_elements_unsched) axis = ax_list[i] cmap = plt.get_cmap('coolwarm') cmap2 = plt.get_cmap('gray') - heatplot = axis.imshow(heat_map_elements,cmap=cmap) + heatplot = axis.imshow(heat_map_elements, cmap=cmap) axis.set_ylabel('Priority') axis.set_xlabel('Duration (minutes)') axis.set_xticks(np.arange(len(duration_bins)), labels=duration_bins) axis.set_yticks(np.arange(len(priority_bins)), labels=priority_bins) plt.setp(axis.get_xticklabels(), rotation=45, ha="right", - rotation_mode="anchor") + rotation_mode="anchor") for j in range(len(priority_bins)): for k in range(len(duration_bins)): value = heat_map_elements[j, k] text1 = axis.text(k, j, f'{heat_map_elements[j, k]}|{ heat_map_elements_unsched[j, k]}', - ha="center", va="center", fontsize='large', fontweight='semibold', color=cmap2(0.001/value)) - time_proportion = sum(data['scheduled_seconds_by_priority'][0].values()) / sum(data['total_seconds_by_priority'][0].values()) * 100 + ha="center", va="center", fontsize='large', fontweight='semibold', color=cmap2(0.001/value)) + # time_proportion = sum(data['scheduled_seconds_by_priority'][0].values()) / sum(data['total_seconds_by_priority'][0].values()) * 100 percent_time_utilization = data['percent_time_utilization'] axis.set_title(f'{labels[i]} ({percent_time_utilization:.1f}% time utilized)', fontweight='semibold') fig.tight_layout() From 43a9ed9279e9929281973f641642b8db240ab551 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Fri, 5 Aug 2022 12:08:54 -0700 Subject: [PATCH 143/165] began writing readme --- adaptive_scheduler/simulation/README.md | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 adaptive_scheduler/simulation/README.md diff --git a/adaptive_scheduler/simulation/README.md b/adaptive_scheduler/simulation/README.md new file mode 100644 index 00000000..d7ce75db --- /dev/null +++ b/adaptive_scheduler/simulation/README.md @@ -0,0 +1,6 @@ +# Adaptive Scheduler Simulator Orchestrator + +The orchestrator allows for running the adaptive scheduler in a simulated environment in order to facilitate testing. +It allows the user to dump input request data to the [Configuration Database](https://github.com/observatorycontrolsystem/configdb) +which is then passed to the scheduler. The orchestrator runs the scheduler and passes off the scheduler result to a +metric calculation file, which calculates metrics to send to an OpenSearch database. \ No newline at end of file From 1cff1b71ddb5d94393b501453af4601fcbd84c5d Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Fri, 5 Aug 2022 13:53:23 -0700 Subject: [PATCH 144/165] modified env variables and 'show all' behavior --- adaptive_scheduler/simulation/plotutils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/adaptive_scheduler/simulation/plotutils.py b/adaptive_scheduler/simulation/plotutils.py index 84025a83..2af0b1f2 100644 --- a/adaptive_scheduler/simulation/plotutils.py +++ b/adaptive_scheduler/simulation/plotutils.py @@ -13,8 +13,8 @@ DEFAULT_DIR = 'adaptive_scheduler/simulation/plot_output' -OPENSEARCH_URL = os.getenv('OPENSEARCH_URL', 'https://logs.lco.global/') -OPENSEARCH_INDEX = os.getenv('OPENSEARCH_INDEX', 'scheduler-simulations') +OPENSEARCH_URL = os.getenv('OPENSEARCH_URL', '') +OPENSEARCH_INDEX = os.getenv('SIMULATION_OPENSEARCH_INDEX', 'scheduler-simulations') opensearch_client = OpenSearch(OPENSEARCH_URL) data_cache = {} @@ -73,7 +73,7 @@ def run_user_interface(plots): readline.parse_and_bind('tab: complete') while True: showplot = input('\nShow plot (default all): ').strip() - if showplot == '': + if showplot == '' or showplot.lower() == 'all': for plot in plots: plot.generate() if args.save: From 168ac8da36038308f986bbe7c2e755a4fb989405 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Fri, 5 Aug 2022 14:12:33 -0700 Subject: [PATCH 145/165] minor change to the way input data is handled --- adaptive_scheduler/simulation/plotfuncs.py | 1 - adaptive_scheduler/simulation/plots.py | 23 +++++++++++----------- adaptive_scheduler/simulation/plotutils.py | 6 ++++-- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/adaptive_scheduler/simulation/plotfuncs.py b/adaptive_scheduler/simulation/plotfuncs.py index 058fc310..c1cf7964 100644 --- a/adaptive_scheduler/simulation/plotfuncs.py +++ b/adaptive_scheduler/simulation/plotfuncs.py @@ -474,7 +474,6 @@ def plot_duration_by_window_duration_scatter(data, plot_title): """ fig, ax = plt.subplots() fig.suptitle(plot_title) - data = data[0] sec_to_min = 1/60 window_dur = np.array(data['raw_window_durations']) * sec_to_min sched_dur = np.array(data['raw_scheduled_durations']) * sec_to_min diff --git a/adaptive_scheduler/simulation/plots.py b/adaptive_scheduler/simulation/plots.py index ffa768cf..727e6d9c 100644 --- a/adaptive_scheduler/simulation/plots.py +++ b/adaptive_scheduler/simulation/plots.py @@ -29,29 +29,28 @@ plots = [ Plot(plotfuncs.plot_airmass_difference_histogram, '1m Network Airmass Difference Distribution for Scheduled Requests', - *airmass_experiment_ids), + airmass_experiment_ids), Plot(plotfuncs.plot_pct_scheduled_airmass_binned_priority, '1m Network Airmass Experiment Percent of Requests Scheduled per Priority Class', - *airmass_experiment_ids), + airmass_experiment_ids), Plot(plotfuncs.plot_pct_scheduled_airmass_lineplot, '1m Network Airmass Experiment Percent of Requests Scheduled per Priority Class', - *airmass_experiment_ids), + airmass_experiment_ids), Plot(plotfuncs.plot_pct_time_scheduled_airmass_binned_priority, '1m Network Airmass Experiment Percent of Requested Time Scheduled per Priority Class', - *airmass_experiment_ids), + airmass_experiment_ids), Plot(plotfuncs.plot_pct_time_scheduled_airmass_lineplot, '1m Network Airmass Experiment Percent of Requested Time Scheduled per Priority Class', - *airmass_experiment_ids), + airmass_experiment_ids), Plot(plotfuncs.plot_pct_time_scheduled_out_of_available, '1m Network Airmass Experiment Percent of Requested Time Scheduled out of Available Time', - *airmass_experiment_ids), + airmass_experiment_ids), Plot(plotfuncs.plot_midpoint_airmass_histograms, '1m Network Airmass Experiment Midpoint Airmass Distributions', - *airmass_experiment_ids), + airmass_experiment_ids), Plot(plotfuncs.plot_eff_priority_duration_scatter, '1m Network Scatterplot of Effective Priority and Duration', - 'airmass-0.1-w-duration-no-scaling', - 'airmass-0.1-w-duration-w-scaling'), + ['airmass-0.1-w-duration-no-scaling', 'airmass-0.1-w-duration-w-scaling']), Plot(plotfuncs.plot_duration_by_window_duration_scatter, '1m Network Scatterplot of Duration and Window Duration', 'window-duration'), @@ -63,13 +62,13 @@ 'no-airmass-w-duration-no-scaling'), Plot(plotfuncs.plot_percent_sched_requests_bin_by_priority, '1m Network Scheduler Metrics Binned by Priority', - *effective_priority_experiment_ids), + effective_priority_experiment_ids), Plot(plotfuncs.plot_sched_priority_duration_dotplot, '1m Distribution of Priority and Duration With Airmass Optimization', - *effective_priority_experiment_ids), + effective_priority_experiment_ids), Plot(plotfuncs.plot_heat_map_priority_duration, '1m Network Requests Heatmap With Airmass Optimization (sched|unsched)', - *effective_priority_experiment_ids), + effective_priority_experiment_ids), ] diff --git a/adaptive_scheduler/simulation/plotutils.py b/adaptive_scheduler/simulation/plotutils.py index 2af0b1f2..e851934d 100644 --- a/adaptive_scheduler/simulation/plotutils.py +++ b/adaptive_scheduler/simulation/plotutils.py @@ -95,7 +95,7 @@ def run_user_interface(plots): class Plot: - def __init__(self, plotfunc, description, *sim_ids, **kwargs): + def __init__(self, plotfunc, description, sim_ids, **kwargs): """A wrapper class for plotting. The user specifies the plotting function to use and the simulation ID(s) or search keywords. The data is passed to the plotting function as a list of datasets, each set corresponding to an OpenSearch index. @@ -112,7 +112,7 @@ def __init__(self, plotfunc, description, *sim_ids, **kwargs): self.description = description # expects plotting functions to be called 'plot_some_plot_name' self.name = plotfunc.__name__.replace('plot_', '') - self.sim_ids = sim_ids + self.sim_ids = sim_ids if type(sim_ids) is list else [sim_ids] self.kwargs = kwargs def generate(self): @@ -124,6 +124,8 @@ def generate(self): except KeyError: data_cache[sim_id] = get_opensearch_data(sim_id) self.data.append(data_cache[sim_id]) + if len(self.data) == 1: + self.data = self.data[0] self.fig = self.plotfunc(self.data, self.description, **self.kwargs) From 2046f0b53b646774e0461f1cc6b795fcd053758a Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Fri, 5 Aug 2022 14:30:28 -0700 Subject: [PATCH 146/165] finished first draft of readme --- adaptive_scheduler/simulation/README.md | 55 ++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/adaptive_scheduler/simulation/README.md b/adaptive_scheduler/simulation/README.md index d7ce75db..dc83c90c 100644 --- a/adaptive_scheduler/simulation/README.md +++ b/adaptive_scheduler/simulation/README.md @@ -3,4 +3,57 @@ The orchestrator allows for running the adaptive scheduler in a simulated environment in order to facilitate testing. It allows the user to dump input request data to the [Configuration Database](https://github.com/observatorycontrolsystem/configdb) which is then passed to the scheduler. The orchestrator runs the scheduler and passes off the scheduler result to a -metric calculation file, which calculates metrics to send to an OpenSearch database. \ No newline at end of file +metric calculation file, which calculates metrics to send to an OpenSearch database. Work is still being done to enable the +orchestrator to step through a time range and run the scheduler repeatedly on different points of the input data. + +## Overview of Metrics +The available metrics center around priority distributions, utilization, and miscellaneous data including airmass data +and slew distance. Certain metrics sent to OpenSearch are pre-binned by priority level. To get the best understanding of +the data structures, inspect the raw JSON in OpenSearch directly. + +## Prerequisites +* Python 3.9 +* A running [Configuration Database](https://github.com/observatorycontrolsystem/configdb) +* A running OpenSearch with index for scheduler simulations + +## Environment Variables +Consult the adaptive scheduler README for general environment variables related to the scheduler. Additional environment +variables specific to the orchestrator are as follows: +| Variable | Description | Default | +|----------------------------------|---------------------------------------------------------------------------------------------|-------------------------| +| `SIMULATION_RUN_ID` | The run ID of the scheduler. This will be saved as `simulation_id` in OpenSearch | `1` | +| `SIMULATION_START_TIME` | The simulation start time, which allows the orchestrator to step through a time range (WIP) | `2022-06-23` | +| `SIMULATION_END_TIME` | The end time of the time range (WIP) | `2022-07-07` | +| `SIMULATION_TIME_STEP_MINUTES` | The time step in minutes for the time range (WIP) | `60` | +| `SIMULATION_AIRMASS_COEFFICIENT` | The airmass optimization weighting value | `0.1` | +| `SIMULATION_OPENSEARCH_INDEX` | The index to save OpenSearch metrics to | `scheduler-simulations` | + +## How to Run +When running in a Docker container, the entry point can be modified to point to the orchestrator instead of the scheduler, +e.g. `sh -c "sleep 20s; simulation-orchestrator"`. The twenty second wait time is to ensure all the relevant services (configdb, redis, etc.) are +spun up and available. Otherwise, run the orchestrator on a machine as you would run the scheduler. + +## Simulation Process +The general workflow for running a scheduler simulation is as follows: +1. Make changes to the adaptive scheduler. If running with Docker, build the image using the suggested build command in the adaptive scheduler README. +2. If necessary, adjust the `metrics.py` file to conform with the tests you are running, e.g. adjusting binning for priority values. +3. Modify environment variables accordingly, particularly setting the run ID. +4. Run the orchestrator. + +## Plotting +Plotting utilities and functions are included with the orchestrator to plot data. Data is pulled from OpenSearch, so the +plotting framework can be run standalone from the orchestrator. Note that the environment variable `OPENSEARCH_URL` must be set +on whatever machine you are running the plots from. To use the plotting interface, run `python -m adaptive_scheduler.simulation.plots` +(`-h` to show the available command line arguments). + +## Creating Your Own Plots +The plotting framework provides a `Plot` class defined in `plotutils.py` to help initialize plots and get data from OpenSearch. +`Plot` is initialized with a user-defined plotting function to generate the plot, the plot title, and either a single string or a list +of strings. It searches the `simulation_id` field in OpenSearch for the strings and plots the data. To write your own plotting +functions, follow the example functions in `plotfuncs.py`. Plotting functions should take in either a list of datasets +or a single dataset (to match the initialization in `plots.py`. The plot title should be passed into the plotting function as well. +This title is used to generate the descriptions for the command-line interface of the plotting framework. + +The plot creation process is as simple as: +1. Creating a function (e.g. `plot_my_plot`) in `plotfuncs.py` +2. Adding the plot to the list of plots in `plots.py` (e.g. `Plot(plotfuncs.plot_my_plot, 'My Plot Title', 'some-data-id')`) From af270d53ed81b22710e153753d4005bb2acd49ff Mon Sep 17 00:00:00 2001 From: Qingze Wu Date: Fri, 5 Aug 2022 16:07:16 -0700 Subject: [PATCH 147/165] update datasets --- adaptive_scheduler/simulation/plotfuncs.py | 76 +++++++++++++--------- adaptive_scheduler/simulation/plots.py | 3 - 2 files changed, 46 insertions(+), 33 deletions(-) diff --git a/adaptive_scheduler/simulation/plotfuncs.py b/adaptive_scheduler/simulation/plotfuncs.py index c1cf7964..5b1e50f2 100644 --- a/adaptive_scheduler/simulation/plotfuncs.py +++ b/adaptive_scheduler/simulation/plotfuncs.py @@ -107,6 +107,17 @@ def plot_pct_scheduled_airmass_binned_priority(airmass_datasets, plot_title): def plot_percent_sched_requests_bin_by_priority(eff_pri_datasets, plot_title): + """Plots a set of barplots. A barplot of percentage of request time scheduled for different priorities + on the left side and a barplot of percentage of request numbers scheduled for different priorities on the right side. + + Args: + eff_pri_datasets [dict]: a list of datasets, each dataset corresponding + to a different effective priority calculation. + plot_title (str): The title of the plot. + + Returns: + fig (matplotlib.pyplot.Figure): The output figure object. + """ fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(25, 12)) fig.suptitle(plot_title) fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) @@ -134,6 +145,17 @@ def plot_percent_sched_requests_bin_by_priority(eff_pri_datasets, plot_title): def plot_sched_priority_duration_dotplot(eff_pri_datasets, plot_title): + """Plots a dot plot showing how the scheduled request durations and priorities are distributed for + different effecitve priority calculations. + + Args: + eff_pri_datasets [dict]: a list of datasets, each dataset corresponding + to a different effective priority calculation. + plot_title (str): The title of the plot. + + Returns: + fig (matplotlib.pyplot.Figure): The output figure object. + """ def rand_jitter(arr): stdev = .01 * (max(arr) - min(arr)) return arr + np.random.randn(len(arr)) * stdev @@ -174,6 +196,17 @@ def rand_jitter(arr): def plot_heat_map_priority_duration(eff_pri_datasets, plot_title): + """Plots four heat maps showing how the scheduled request durations and priorities are distributed for + each effecitve priority calculations. + + Args: + eff_pri_datasets [dict]: a list of datasets, each dataset corresponding + to a different effective priority calculation. + plot_title (str): The title of the plot. + + Returns: + fig (matplotlib.pyplot.Figure): The output figure object. + """ fig, axs = plt.subplots(2, 2, figsize=(13, 12)) fig.suptitle(plot_title) fig.subplots_adjust(wspace=0.01, hspace=0.01, top=0.9) @@ -224,7 +257,6 @@ def plot_heat_map_priority_duration(eff_pri_datasets, plot_title): value = heat_map_elements[j, k] text1 = axis.text(k, j, f'{heat_map_elements[j, k]}|{ heat_map_elements_unsched[j, k]}', ha="center", va="center", fontsize='large', fontweight='semibold', color=cmap2(0.001/value)) - # time_proportion = sum(data['scheduled_seconds_by_priority'][0].values()) / sum(data['total_seconds_by_priority'][0].values()) * 100 percent_time_utilization = data['percent_time_utilization'] axis.set_title(f'{labels[i]} ({percent_time_utilization:.1f}% time utilized)', fontweight='semibold') fig.tight_layout() @@ -484,49 +516,33 @@ def plot_duration_by_window_duration_scatter(data, plot_title): return fig -def plot_input_duration_binned_priority(dataset, plot_title): - fig, ax = plt.subplots() - fig.suptitle(plot_title) - bardata = [] - input_durations = dataset[0]['raw_scheduled_durations'] + dataset[0]['raw_unscheduled_durations'] - input_priorities = dataset[0]['raw_scheduled_priorities'] + dataset[0]['raw_unscheduled_priorities'] - input_bins = metrics.bin_data(input_priorities, input_durations, bin_size=10, bin_range=(10,30),aggregator=None) - duration_bins = { - bin_key: metrics.bin_data(bin_values, bin_size=300, bin_range=(0, 1499)) | metrics.bin_data(bin_values, bin_size=10000, bin_range=(1500, 10000)) - for bin_key, bin_values in input_bins.items() - } - labels = ['10-19', '20-29', '30'] - for values in duration_bins.values(): - bardata.append(list(values.values())) - binnames = ['0-5','5-10','10-15', '15-20', '20-25', '25&up'] - plotutils.plot_multi_barplot(ax, bardata, labels, binnames, barwidth=0.1) - ax.set_xlabel('Duration (minutes)') - ax.set_ylabel('Input reservation counts') - ax.set_ylim(0, 300) - ax.legend(title='Priority') - return fig +def plot_subplots_input_duration(data, plot_title): + """Plots histograms of the input request durations in minutes for different priorities. + Args: + data (dict): The data for this metric. Expects one data. + plot_title (str): The title of the plot. -def plot_subplots_input_duration(dataset, plot_title): + Returns: + fig (matplotlib.pyploy.Figure): The output Figure object. + """ fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(20,10)) fig.suptitle(plot_title) - sched_durations = dataset[0]['raw_scheduled_durations'] + sched_durations = data['raw_scheduled_durations'] sched_durations = [d/60 for d in sched_durations] - unsched_durations = dataset[0]['raw_unscheduled_durations'] + unsched_durations = data['raw_unscheduled_durations'] unsched_durations = [d/60 for d in unsched_durations] - sched_priorities = dataset[0]['raw_scheduled_priorities'] - unsched_priorities = dataset[0]['raw_unscheduled_priorities'] + sched_priorities = data['raw_scheduled_priorities'] + unsched_priorities = data['raw_unscheduled_priorities'] sched_bins = metrics.bin_data(sched_priorities, sched_durations, bin_size=10, bin_range=(10,30),aggregator=None) unsched_bins = metrics.bin_data(unsched_priorities, unsched_durations, bin_size=10, bin_range=(10,30),aggregator=None) - totals_by_priorities = list(dataset[0]['total_req_by_priority'][0].values()) + totals_by_priorities = list(data['total_req_by_priority'][0].values()) labels = ['10-19', '20-29', '30'] axis = [ax1, ax2, ax3] for i, values in enumerate(sched_bins.values()): bars = ['Scheduled', 'Unscheduled'] - # axis[i].hist(values, bins = np.arange(0, 4000, 120)) axis[i].hist([values,list(unsched_bins.values())[i]], bins = np.arange(0, 70, 2), stacked = True, label = bars) - # axis[i].hist(list(unsched_bins.values())[i], bins = np.arange(0, 4000, 120)) axis[i].set_xlabel('Duration (Minutes)') axis[i].set_ylabel('Input reservation counts') axis[i].set_ylim(0, 300) diff --git a/adaptive_scheduler/simulation/plots.py b/adaptive_scheduler/simulation/plots.py index 727e6d9c..8491fd7f 100644 --- a/adaptive_scheduler/simulation/plots.py +++ b/adaptive_scheduler/simulation/plots.py @@ -54,9 +54,6 @@ Plot(plotfuncs.plot_duration_by_window_duration_scatter, '1m Network Scatterplot of Duration and Window Duration', 'window-duration'), - Plot(plotfuncs.plot_input_duration_binned_priority, - '1m Network Histogram of Input Reservation duration binned by priority', - 'no-airmass-w-duration-no-scaling'), Plot(plotfuncs.plot_subplots_input_duration, '1m Network Scheduled/Unscheduled Requests Length Distribution', 'no-airmass-w-duration-no-scaling'), From 1290c6e2e44e6bffde27ad059bb4868530fcb05e Mon Sep 17 00:00:00 2001 From: Qingze Wu Date: Fri, 5 Aug 2022 16:19:02 -0700 Subject: [PATCH 148/165] pass copy instead of reference --- adaptive_scheduler/simulation/plotutils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/adaptive_scheduler/simulation/plotutils.py b/adaptive_scheduler/simulation/plotutils.py index e851934d..bc8851a5 100644 --- a/adaptive_scheduler/simulation/plotutils.py +++ b/adaptive_scheduler/simulation/plotutils.py @@ -4,6 +4,7 @@ import os import argparse import readline +from copy import deepcopy from datetime import datetime import numpy as np @@ -127,7 +128,7 @@ def generate(self): if len(self.data) == 1: self.data = self.data[0] - self.fig = self.plotfunc(self.data, self.description, **self.kwargs) + self.fig = self.plotfunc(deepcopy(self.data), self.description, **self.kwargs) def save(self): timestamp = datetime.utcnow().isoformat(timespec='seconds') From 7d984f0c75cde8d92d096af3527459368a513fa4 Mon Sep 17 00:00:00 2001 From: Jon Date: Sat, 6 Aug 2022 00:15:47 +0000 Subject: [PATCH 149/165] few updates to the orchestrator readme --- adaptive_scheduler/simulation/README.md | 15 ++++++++------- adaptive_scheduler/simulation/orchestrator.py | 2 +- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/adaptive_scheduler/simulation/README.md b/adaptive_scheduler/simulation/README.md index dc83c90c..0da96073 100644 --- a/adaptive_scheduler/simulation/README.md +++ b/adaptive_scheduler/simulation/README.md @@ -1,7 +1,7 @@ # Adaptive Scheduler Simulator Orchestrator The orchestrator allows for running the adaptive scheduler in a simulated environment in order to facilitate testing. -It allows the user to dump input request data to the [Configuration Database](https://github.com/observatorycontrolsystem/configdb) +It allows the user to dump input request data to the [Observation Portal](https://github.com/observatorycontrolsystem/observation-portal) which is then passed to the scheduler. The orchestrator runs the scheduler and passes off the scheduler result to a metric calculation file, which calculates metrics to send to an OpenSearch database. Work is still being done to enable the orchestrator to step through a time range and run the scheduler repeatedly on different points of the input data. @@ -13,8 +13,9 @@ the data structures, inspect the raw JSON in OpenSearch directly. ## Prerequisites * Python 3.9 -* A running [Configuration Database](https://github.com/observatorycontrolsystem/configdb) -* A running OpenSearch with index for scheduler simulations +* A running [Configuration Database](https://github.com/observatorycontrolsystem/configdb) with instruments +* A running [Observation Portal](https://github.com/observatorycontrolsystem/observation-portal) with requests +* A running OpenSearch with index created to store scheduler simulation results ## Environment Variables Consult the adaptive scheduler README for general environment variables related to the scheduler. Additional environment @@ -23,15 +24,15 @@ variables specific to the orchestrator are as follows: |----------------------------------|---------------------------------------------------------------------------------------------|-------------------------| | `SIMULATION_RUN_ID` | The run ID of the scheduler. This will be saved as `simulation_id` in OpenSearch | `1` | | `SIMULATION_START_TIME` | The simulation start time, which allows the orchestrator to step through a time range (WIP) | `2022-06-23` | -| `SIMULATION_END_TIME` | The end time of the time range (WIP) | `2022-07-07` | +| `SIMULATION_END_TIME` | The end time of the time range. This should match the start time if only a single run is desired. | `2022-06-23` | | `SIMULATION_TIME_STEP_MINUTES` | The time step in minutes for the time range (WIP) | `60` | | `SIMULATION_AIRMASS_COEFFICIENT` | The airmass optimization weighting value | `0.1` | -| `SIMULATION_OPENSEARCH_INDEX` | The index to save OpenSearch metrics to | `scheduler-simulations` | +| `SIMULATION_OPENSEARCH_INDEX` | The OpenSearch index where metrics will be saved to | `scheduler-simulations` | ## How to Run When running in a Docker container, the entry point can be modified to point to the orchestrator instead of the scheduler, e.g. `sh -c "sleep 20s; simulation-orchestrator"`. The twenty second wait time is to ensure all the relevant services (configdb, redis, etc.) are -spun up and available. Otherwise, run the orchestrator on a machine as you would run the scheduler. +spun up and available. Otherwise, run the orchestrator locally on a machine with `poetry run simulation-orchestrator` ## Simulation Process The general workflow for running a scheduler simulation is as follows: @@ -43,7 +44,7 @@ The general workflow for running a scheduler simulation is as follows: ## Plotting Plotting utilities and functions are included with the orchestrator to plot data. Data is pulled from OpenSearch, so the plotting framework can be run standalone from the orchestrator. Note that the environment variable `OPENSEARCH_URL` must be set -on whatever machine you are running the plots from. To use the plotting interface, run `python -m adaptive_scheduler.simulation.plots` +in the environment you are running the plot scripts from. To use the plotting interface, run `python -m adaptive_scheduler.simulation.plots` (`-h` to show the available command line arguments). ## Creating Your Own Plots diff --git a/adaptive_scheduler/simulation/orchestrator.py b/adaptive_scheduler/simulation/orchestrator.py index 574de6bc..f811cb8b 100644 --- a/adaptive_scheduler/simulation/orchestrator.py +++ b/adaptive_scheduler/simulation/orchestrator.py @@ -39,7 +39,7 @@ # Some Environment Variable settings for the simulation RUN_ID = os.getenv("SIMULATION_RUN_ID", "1") START_TIME = parse(os.getenv("SIMULATION_START_TIME", "2022-06-23")) -END_TIME = parse(os.getenv("SIMULATION_END_TIME", "2022-07-07")) +END_TIME = parse(os.getenv("SIMULATION_END_TIME", "2022-06-23")) TIME_STEP = float(os.getenv("SIMULATION_TIME_STEP_MINUTES", "60")) AIRMASS_WEIGHTING_COEFFICIENT = os.getenv("SIMULATION_AIRMASS_COEFFICIENT", 0.1) From 2480132b94b80125104f3ff2dbe530aebfaea8d9 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Mon, 8 Aug 2022 09:34:58 -0700 Subject: [PATCH 150/165] fixed utilization calculation --- adaptive_scheduler/simulation/metrics.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index 15dc15d7..c286317b 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -162,6 +162,7 @@ def __init__(self, normal_scheduler_result, rr_scheduler_result, scheduler, sche else: self.combined_schedule = self.normal_schedule self.combined_resources_scheduled = self.normal_scheduler_result.resources_scheduled() + self.combined_resources_scheduled = [site for site in self.normal_schedule.keys() if self.normal_schedule[site]] for comp_res in self.normal_input_reservations: self.combined_input_reservations.extend(comp_res.reservation_list) @@ -174,6 +175,8 @@ def __init__(self, normal_scheduler_result, rr_scheduler_result, scheduler, sche def _combine_resources_scheduled(self): normal_resources = self.normal_scheduler_result.resources_scheduled() rr_resources = self.rr_scheduler_result.resources_scheduled() + normal_resources = [site for site in self.normal_schedule.keys() if self.normal_schedule[site]] + rr_resources = [site for site in self.rr_schedule.keys() if self.rr_schedule[site]] self.combined_resources_scheduled = list(set(normal_resources + rr_resources)) def _combine_normal_rr_schedules(self): From e5362dbd02cd7a36d16dc5a9ba33494fe31edc63 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Mon, 8 Aug 2022 09:45:50 -0700 Subject: [PATCH 151/165] updated error handling and masked useless opensearch warning messages --- adaptive_scheduler/simulation/plotutils.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/adaptive_scheduler/simulation/plotutils.py b/adaptive_scheduler/simulation/plotutils.py index bc8851a5..a1c9d859 100644 --- a/adaptive_scheduler/simulation/plotutils.py +++ b/adaptive_scheduler/simulation/plotutils.py @@ -2,8 +2,10 @@ Plotting utility functions """ import os +import sys import argparse import readline +import logging from copy import deepcopy from datetime import datetime @@ -16,7 +18,15 @@ OPENSEARCH_URL = os.getenv('OPENSEARCH_URL', '') OPENSEARCH_INDEX = os.getenv('SIMULATION_OPENSEARCH_INDEX', 'scheduler-simulations') -opensearch_client = OpenSearch(OPENSEARCH_URL) +try: + opensearch_client = OpenSearch(OPENSEARCH_URL) +except TypeError: + print('Invalid OpenSearch endpoint. Please set `OPENSEARCH_URL` environment variable.') + sys.exit(1) + +# mask logging messages from OpenSearchPy +# they use the root logger, unfortunately +logging.getLogger().setLevel(logging.CRITICAL) data_cache = {} From 8f17d74c59baaab38a60e1b917a17eed54398310 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Mon, 8 Aug 2022 10:01:17 -0700 Subject: [PATCH 152/165] small rewordings --- adaptive_scheduler/simulation/README.md | 35 ++++++++++++++----------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/adaptive_scheduler/simulation/README.md b/adaptive_scheduler/simulation/README.md index dc83c90c..876fc461 100644 --- a/adaptive_scheduler/simulation/README.md +++ b/adaptive_scheduler/simulation/README.md @@ -19,14 +19,16 @@ the data structures, inspect the raw JSON in OpenSearch directly. ## Environment Variables Consult the adaptive scheduler README for general environment variables related to the scheduler. Additional environment variables specific to the orchestrator are as follows: -| Variable | Description | Default | -|----------------------------------|---------------------------------------------------------------------------------------------|-------------------------| -| `SIMULATION_RUN_ID` | The run ID of the scheduler. This will be saved as `simulation_id` in OpenSearch | `1` | -| `SIMULATION_START_TIME` | The simulation start time, which allows the orchestrator to step through a time range (WIP) | `2022-06-23` | -| `SIMULATION_END_TIME` | The end time of the time range (WIP) | `2022-07-07` | -| `SIMULATION_TIME_STEP_MINUTES` | The time step in minutes for the time range (WIP) | `60` | -| `SIMULATION_AIRMASS_COEFFICIENT` | The airmass optimization weighting value | `0.1` | -| `SIMULATION_OPENSEARCH_INDEX` | The index to save OpenSearch metrics to | `scheduler-simulations` | +| Variable | Description | Default | +|----------------------------------|------------------------------------------------------------------------------------------------------|-------------------------| +| `SIMULATION_RUN_ID` | The run ID of the scheduler. This will be saved as `simulation_id` in OpenSearch | `1` | +| `SIMULATION_START_TIME` | The simulation start time, which allows the orchestrator to step through a time range (WIP) | `2022-06-23` | +| `SIMULATION_END_TIME` | The end time of the time range (WIP) | `2022-07-07` | +| `SIMULATION_TIME_STEP_MINUTES` | The time step in minutes for the time range (WIP) | `60` | +| `SIMULATION_AIRMASS_COEFFICIENT` | The airmass optimization weighting value | `0.1` | +| `SIMULATION_OPENSEARCH_INDEX` | The index to save OpenSearch metrics to | `scheduler-simulations` | +| `OPENSEARCH_URL` | Needed for the plotting interface. Set this locally, and not just in the Docker container (if using) | _`Empty_string`_ | +| | | | ## How to Run When running in a Docker container, the entry point can be modified to point to the orchestrator instead of the scheduler, @@ -36,24 +38,25 @@ spun up and available. Otherwise, run the orchestrator on a machine as you would ## Simulation Process The general workflow for running a scheduler simulation is as follows: 1. Make changes to the adaptive scheduler. If running with Docker, build the image using the suggested build command in the adaptive scheduler README. -2. If necessary, adjust the `metrics.py` file to conform with the tests you are running, e.g. adjusting binning for priority values. -3. Modify environment variables accordingly, particularly setting the run ID. +2. If necessary, adjust the `metrics.py` file to conform with the tests you are running, such as adjusting binning for priority values. +3. Modify environment variables accordingly, making sure to set and verify the run ID. 4. Run the orchestrator. ## Plotting -Plotting utilities and functions are included with the orchestrator to plot data. Data is pulled from OpenSearch, so the -plotting framework can be run standalone from the orchestrator. Note that the environment variable `OPENSEARCH_URL` must be set -on whatever machine you are running the plots from. To use the plotting interface, run `python -m adaptive_scheduler.simulation.plots` +A plotting interface is included with the simulator to facilitate data visualization. The interface features OpenSearch searching by +either OpenSearch ID or `simulation_id`, the ability to save plots in various formats, and zsh-style TAB autocompletion. +Note that the environment variable `OPENSEARCH_URL` must be set on whatever machine you are running the plots from. +To use the plotting interface, run `python -m adaptive_scheduler.simulation.plots` (`-h` to show the available command line arguments). ## Creating Your Own Plots The plotting framework provides a `Plot` class defined in `plotutils.py` to help initialize plots and get data from OpenSearch. `Plot` is initialized with a user-defined plotting function to generate the plot, the plot title, and either a single string or a list -of strings. It searches the `simulation_id` field in OpenSearch for the strings and plots the data. To write your own plotting +of strings. It searches the `_id` or `simulation_id` field in OpenSearch for the strings and plots the data. To write your own plotting functions, follow the example functions in `plotfuncs.py`. Plotting functions should take in either a list of datasets -or a single dataset (to match the initialization in `plots.py`. The plot title should be passed into the plotting function as well. +or a single dataset (to match the initialization in `plots.py`). The plot title should be passed into the plotting function as well. This title is used to generate the descriptions for the command-line interface of the plotting framework. The plot creation process is as simple as: 1. Creating a function (e.g. `plot_my_plot`) in `plotfuncs.py` -2. Adding the plot to the list of plots in `plots.py` (e.g. `Plot(plotfuncs.plot_my_plot, 'My Plot Title', 'some-data-id')`) +2. Adding the plot to the list of plots in `plots.py`, e.g. `Plot(plotfuncs.plot_my_plot, 'My Plot Title', 'some-data-id')` From 26280afd75f3b00077df9664f6801d2b9a729d0e Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Mon, 8 Aug 2022 10:11:43 -0700 Subject: [PATCH 153/165] changed wording for opensearch env variable --- adaptive_scheduler/simulation/README.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/adaptive_scheduler/simulation/README.md b/adaptive_scheduler/simulation/README.md index 876fc461..52f69a22 100644 --- a/adaptive_scheduler/simulation/README.md +++ b/adaptive_scheduler/simulation/README.md @@ -19,16 +19,16 @@ the data structures, inspect the raw JSON in OpenSearch directly. ## Environment Variables Consult the adaptive scheduler README for general environment variables related to the scheduler. Additional environment variables specific to the orchestrator are as follows: -| Variable | Description | Default | -|----------------------------------|------------------------------------------------------------------------------------------------------|-------------------------| -| `SIMULATION_RUN_ID` | The run ID of the scheduler. This will be saved as `simulation_id` in OpenSearch | `1` | -| `SIMULATION_START_TIME` | The simulation start time, which allows the orchestrator to step through a time range (WIP) | `2022-06-23` | -| `SIMULATION_END_TIME` | The end time of the time range (WIP) | `2022-07-07` | -| `SIMULATION_TIME_STEP_MINUTES` | The time step in minutes for the time range (WIP) | `60` | -| `SIMULATION_AIRMASS_COEFFICIENT` | The airmass optimization weighting value | `0.1` | -| `SIMULATION_OPENSEARCH_INDEX` | The index to save OpenSearch metrics to | `scheduler-simulations` | -| `OPENSEARCH_URL` | Needed for the plotting interface. Set this locally, and not just in the Docker container (if using) | _`Empty_string`_ | -| | | | +| Variable | Description | Default | +|----------------------------------|---------------------------------------------------------------------------------------------|-------------------------| +| `SIMULATION_RUN_ID` | The run ID of the scheduler. This will be saved as `simulation_id` in OpenSearch | `1` | +| `SIMULATION_START_TIME` | The simulation start time, which allows the orchestrator to step through a time range (WIP) | `2022-06-23` | +| `SIMULATION_END_TIME` | The end time of the time range (WIP) | `2022-07-07` | +| `SIMULATION_TIME_STEP_MINUTES` | The time step in minutes for the time range (WIP) | `60` | +| `SIMULATION_AIRMASS_COEFFICIENT` | The airmass optimization weighting value | `0.1` | +| `SIMULATION_OPENSEARCH_INDEX` | The index to save OpenSearch metrics to | `scheduler-simulations` | +| `OPENSEARCH_URL` | OpenSearch endpoint (needed for the plotting interface) | _`Empty_string`_ | +| | | | ## How to Run When running in a Docker container, the entry point can be modified to point to the orchestrator instead of the scheduler, From abc86728ad1e6bc2e408789a6d2853ab17e805ef Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Mon, 8 Aug 2022 10:28:20 -0700 Subject: [PATCH 154/165] updated documentation and fixed trailing whitespace --- adaptive_scheduler/simulation/plotfuncs.py | 31 +++++++++++----------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/adaptive_scheduler/simulation/plotfuncs.py b/adaptive_scheduler/simulation/plotfuncs.py index 5b1e50f2..dbf8ac82 100644 --- a/adaptive_scheduler/simulation/plotfuncs.py +++ b/adaptive_scheduler/simulation/plotfuncs.py @@ -1,7 +1,6 @@ """ Plotting functions to use with the adaptive simulator plotting wrapper. To write your own plotting functions, follow the format of the example functions. -The data passed in should be in list format. """ import matplotlib import numpy as np @@ -114,9 +113,9 @@ def plot_percent_sched_requests_bin_by_priority(eff_pri_datasets, plot_title): eff_pri_datasets [dict]: a list of datasets, each dataset corresponding to a different effective priority calculation. plot_title (str): The title of the plot. - + Returns: - fig (matplotlib.pyplot.Figure): The output figure object. + fig (matplotlib.pyplot.Figure): The output figure object. """ fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(25, 12)) fig.suptitle(plot_title) @@ -152,9 +151,9 @@ def plot_sched_priority_duration_dotplot(eff_pri_datasets, plot_title): eff_pri_datasets [dict]: a list of datasets, each dataset corresponding to a different effective priority calculation. plot_title (str): The title of the plot. - + Returns: - fig (matplotlib.pyplot.Figure): The output figure object. + fig (matplotlib.pyplot.Figure): The output figure object. """ def rand_jitter(arr): stdev = .01 * (max(arr) - min(arr)) @@ -203,9 +202,9 @@ def plot_heat_map_priority_duration(eff_pri_datasets, plot_title): eff_pri_datasets [dict]: a list of datasets, each dataset corresponding to a different effective priority calculation. plot_title (str): The title of the plot. - + Returns: - fig (matplotlib.pyplot.Figure): The output figure object. + fig (matplotlib.pyplot.Figure): The output figure object. """ fig, axs = plt.subplots(2, 2, figsize=(13, 12)) fig.suptitle(plot_title) @@ -227,7 +226,7 @@ def plot_heat_map_priority_duration(eff_pri_datasets, plot_title): bin_key: bin_data(bin_values, bin_size=300, bin_range=(0, 1499)) | bin_data(bin_values, bin_size=3000, bin_range=(1500, 4000)) for bin_key, bin_values in level_1_bins.items() } - level_1_bins_unsched = bin_data(unsched_priorities, unsched_durations, bin_size=4, bin_range=(10,30),aggregator=None) + level_1_bins_unsched = bin_data(unsched_priorities, unsched_durations, bin_size=4, bin_range=(10, 30), aggregator=None) level_2_bins_unsched = { bin_key: bin_data(bin_values, bin_size=300, bin_range=(0, 1499)) | bin_data(bin_values, bin_size=3000, bin_range=(1500, 4000)) for bin_key, bin_values in level_1_bins_unsched.items() @@ -237,7 +236,7 @@ def plot_heat_map_priority_duration(eff_pri_datasets, plot_title): for values in level_2_bins.values(): heat_map_elements.append(list(values.values())) for values in level_2_bins_unsched.values(): - heat_map_elements_unsched.append(list(values.values())) + heat_map_elements_unsched.append(list(values.values())) priority_bins = list(level_2_bins.keys()) duration_bins = ['0-5', '5-10', '10-15', '15-20', '20-25', '25&up'] heat_map_elements = np.array(heat_map_elements) @@ -245,7 +244,7 @@ def plot_heat_map_priority_duration(eff_pri_datasets, plot_title): axis = ax_list[i] cmap = plt.get_cmap('coolwarm') cmap2 = plt.get_cmap('gray') - heatplot = axis.imshow(heat_map_elements, cmap=cmap) + axis.imshow(heat_map_elements, cmap=cmap) axis.set_ylabel('Priority') axis.set_xlabel('Duration (minutes)') axis.set_xticks(np.arange(len(duration_bins)), labels=duration_bins) @@ -255,8 +254,8 @@ def plot_heat_map_priority_duration(eff_pri_datasets, plot_title): for j in range(len(priority_bins)): for k in range(len(duration_bins)): value = heat_map_elements[j, k] - text1 = axis.text(k, j, f'{heat_map_elements[j, k]}|{ heat_map_elements_unsched[j, k]}', - ha="center", va="center", fontsize='large', fontweight='semibold', color=cmap2(0.001/value)) + axis.text(k, j, f'{heat_map_elements[j, k]}|{ heat_map_elements_unsched[j, k]}', + ha="center", va="center", fontsize='large', fontweight='semibold', color=cmap2(0.001/value)) percent_time_utilization = data['percent_time_utilization'] axis.set_title(f'{labels[i]} ({percent_time_utilization:.1f}% time utilized)', fontweight='semibold') fig.tight_layout() @@ -520,13 +519,13 @@ def plot_subplots_input_duration(data, plot_title): """Plots histograms of the input request durations in minutes for different priorities. Args: - data (dict): The data for this metric. Expects one data. + data (dict): The data for this metric. Expects one dataset. plot_title (str): The title of the plot. Returns: fig (matplotlib.pyploy.Figure): The output Figure object. """ - fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(20,10)) + fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(20, 10)) fig.suptitle(plot_title) sched_durations = data['raw_scheduled_durations'] sched_durations = [d/60 for d in sched_durations] @@ -541,8 +540,8 @@ def plot_subplots_input_duration(data, plot_title): axis = [ax1, ax2, ax3] for i, values in enumerate(sched_bins.values()): bars = ['Scheduled', 'Unscheduled'] - axis[i].hist([values,list(unsched_bins.values())[i]], bins = np.arange(0, 70, 2), - stacked = True, label = bars) + axis[i].hist([values, list(unsched_bins.values())[i]], bins=np.arange(0, 70, 2), + stacked=True, label=bars) axis[i].set_xlabel('Duration (Minutes)') axis[i].set_ylabel('Input reservation counts') axis[i].set_ylim(0, 300) From ea63cbb66dbbd5c1aa2ed5eb42ce79dc00618a8a Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Mon, 8 Aug 2022 10:33:23 -0700 Subject: [PATCH 155/165] updated documentation --- adaptive_scheduler/simulation/metrics.py | 14 ++++---------- adaptive_scheduler/simulation/plotutils.py | 2 +- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index c286317b..580b3a78 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -137,7 +137,7 @@ class MetricCalculator(): as follows: {scheduled_resource, [reservations]} rr_scheduler_result (SchedulerResult): The rapid-response schedule output of the scheduler. - scheduler (LCOGTNetworkScheduler): The instance of the scheduler used by the simulator. + scheduler (Scheduler): The instance of the scheduler used by the simulator. scheduler_runner (SchedulerRunner): The instance of the scheduler runner used by the simulator. """ def __init__(self, normal_scheduler_result, rr_scheduler_result, scheduler, scheduler_runner): @@ -240,7 +240,7 @@ def get_window_duration_data(self): for res in self.combined_input_reservations: if res.scheduled: windows = res.request.windows - # get the data format to a list, each element is a list corresponding to a resource + # format the data to a list, each element is a list corresponding to a resource windows_list = list(windows.windows_for_resource.values()) window_durations = [] for loc in windows_list: @@ -251,12 +251,6 @@ def get_window_duration_data(self): def total_available_seconds(self): """Aggregates the total available time, calculated from dark intervals. - Args: - resources_scheduled (list): The list of sites scheduled, if nothing is passed then use the - list generated when MetricCalculators is initialized. - horizon_days (float): The number of days to cap, basically an effective horizon. If nothing - is passed then use the value in sched_params. - Returns: total_available_time (float): The dark intervals capped by the horizon. """ @@ -275,7 +269,7 @@ def percent_time_utilization(self): return percent_of(sum(scheduled_durations), self.total_available_seconds()) def _get_airmass_data_for_request(self, request_id): - """Pulls airmass data from the Observation Portal, cache it in our local directory. + """Pulls airmass data from the Observation Portal, cache it in redis. Args: request_id (str): The request id. @@ -374,7 +368,7 @@ def airmass_metrics(self, schedule=None): return airmass_metrics def binned_tac_priority_metrics(self): - """Bins metrics based on TAC priority.""" + """Bins metrics based on TAC priority. Priority bins should be changed to match the data.""" bin_size = 10 sched_durations, unsched_durations = self.get_duration_data() diff --git a/adaptive_scheduler/simulation/plotutils.py b/adaptive_scheduler/simulation/plotutils.py index a1c9d859..b0e7cd1f 100644 --- a/adaptive_scheduler/simulation/plotutils.py +++ b/adaptive_scheduler/simulation/plotutils.py @@ -116,7 +116,7 @@ def __init__(self, plotfunc, description, sim_ids, **kwargs): Args: plotfunc (func): The plotting function to use. description (str): The description of the plot. Will be used as the plot title in matplotlib. - sim_ids [str]: The simulation IDs to look for on OpenSearch. + sim_ids: The simulation IDs to look for on OpenSearch. Can be either a list or a single string. kwargs: Optional arguments to pass to the plotting function. """ self.plotfunc = plotfunc From aefb704ef12410282058d7d63c454386b18bf091 Mon Sep 17 00:00:00 2001 From: Jon Date: Tue, 9 Aug 2022 04:58:13 +0000 Subject: [PATCH 156/165] fix for airmass optimization --- adaptive_scheduler/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adaptive_scheduler/models.py b/adaptive_scheduler/models.py index 5f1e46e3..73e545e2 100644 --- a/adaptive_scheduler/models.py +++ b/adaptive_scheduler/models.py @@ -495,7 +495,7 @@ def cache_airmasses_within_kernel_windows(self, kernel_intervals_for_resources, if len(airmass_by_targets) == 1: airmasses = list(airmass_by_targets.values())[0] else: - numpy_airmasses = np.array(airmass_by_targets.values()) + numpy_airmasses = np.array(list(airmass_by_targets.values())) airmasses = np.mean(numpy_airmasses, axis=0).tolist() # Now normalize the airmass values between the minimum and maximum airmass so that the weighting # is similar for all requests From e6ce825a9c3dcfa3031d108327a68e46f9baa608 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Tue, 9 Aug 2022 10:28:43 -0700 Subject: [PATCH 157/165] divide by zero bugfix --- adaptive_scheduler/simulation/metrics.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index 580b3a78..e0801844 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -76,7 +76,7 @@ def bin_data(bin_by, data=[], bin_size=1, bin_range=None, fill=[], aggregator=le bin_range: A tuple of numbers to override the bin ranges. Otherwise, use the min/max of the data. fill: The data value(s) to fill with if the bin is empty. An iterable may be passed, in which case it is casted to a list. If None is passed, then empty bins are removed. The aggregator will be applied - to fill values as well. + to fill values as well. Default is empty list to work with the default aggregator len(). aggregator (func): The aggregation function to apply over the list of data. Must be callable on an array. If None is passed, then the raw values are stored in a list. @@ -369,17 +369,17 @@ def airmass_metrics(self, schedule=None): def binned_tac_priority_metrics(self): """Bins metrics based on TAC priority. Priority bins should be changed to match the data.""" - bin_size = 10 + bin_size = 5 sched_durations, unsched_durations = self.get_duration_data() all_durations = sched_durations + unsched_durations sched_priorities, unsched_priorities = self.get_priority_data() all_priorities = sched_priorities + unsched_priorities - sched_histogram = bin_data(sched_priorities, bin_size=bin_size) - bin_sched_durations = bin_data(sched_priorities, sched_durations, bin_size, aggregator=sum) - full_histogram = bin_data(all_priorities, bin_size=bin_size) - bin_all_durations = bin_data(all_priorities, all_durations, bin_size, aggregator=sum) + sched_histogram = bin_data(sched_priorities, bin_size=bin_size, fill=None) + bin_sched_durations = bin_data(sched_priorities, sched_durations, bin_size, fill=None, aggregator=sum) + full_histogram = bin_data(all_priorities, bin_size=bin_size, fill=None) + bin_all_durations = bin_data(all_priorities, all_durations, bin_size, fill=None, aggregator=sum) bin_percent_count = {bin_: percent_of(sched_histogram[bin_], full_histogram[bin_]) for bin_ in sched_histogram} bin_percent_time = {bin_: percent_of(bin_sched_durations[bin_], bin_all_durations[bin_]) From c12c9750b95e5b6792a6cd5851cdcbb816abd9af Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Tue, 9 Aug 2022 10:29:05 -0700 Subject: [PATCH 158/165] broke plots into sections --- adaptive_scheduler/simulation/plots.py | 83 ++++++++++++++------------ 1 file changed, 44 insertions(+), 39 deletions(-) diff --git a/adaptive_scheduler/simulation/plots.py b/adaptive_scheduler/simulation/plots.py index 8491fd7f..fcb4df1e 100644 --- a/adaptive_scheduler/simulation/plots.py +++ b/adaptive_scheduler/simulation/plots.py @@ -27,45 +27,50 @@ plots = [ - Plot(plotfuncs.plot_airmass_difference_histogram, - '1m Network Airmass Difference Distribution for Scheduled Requests', - airmass_experiment_ids), - Plot(plotfuncs.plot_pct_scheduled_airmass_binned_priority, - '1m Network Airmass Experiment Percent of Requests Scheduled per Priority Class', - airmass_experiment_ids), - Plot(plotfuncs.plot_pct_scheduled_airmass_lineplot, - '1m Network Airmass Experiment Percent of Requests Scheduled per Priority Class', - airmass_experiment_ids), - Plot(plotfuncs.plot_pct_time_scheduled_airmass_binned_priority, - '1m Network Airmass Experiment Percent of Requested Time Scheduled per Priority Class', - airmass_experiment_ids), - Plot(plotfuncs.plot_pct_time_scheduled_airmass_lineplot, - '1m Network Airmass Experiment Percent of Requested Time Scheduled per Priority Class', - airmass_experiment_ids), - Plot(plotfuncs.plot_pct_time_scheduled_out_of_available, - '1m Network Airmass Experiment Percent of Requested Time Scheduled out of Available Time', - airmass_experiment_ids), - Plot(plotfuncs.plot_midpoint_airmass_histograms, - '1m Network Airmass Experiment Midpoint Airmass Distributions', - airmass_experiment_ids), - Plot(plotfuncs.plot_eff_priority_duration_scatter, - '1m Network Scatterplot of Effective Priority and Duration', - ['airmass-0.1-w-duration-no-scaling', 'airmass-0.1-w-duration-w-scaling']), - Plot(plotfuncs.plot_duration_by_window_duration_scatter, - '1m Network Scatterplot of Duration and Window Duration', - 'window-duration'), - Plot(plotfuncs.plot_subplots_input_duration, - '1m Network Scheduled/Unscheduled Requests Length Distribution', - 'no-airmass-w-duration-no-scaling'), - Plot(plotfuncs.plot_percent_sched_requests_bin_by_priority, - '1m Network Scheduler Metrics Binned by Priority', - effective_priority_experiment_ids), - Plot(plotfuncs.plot_sched_priority_duration_dotplot, - '1m Distribution of Priority and Duration With Airmass Optimization', - effective_priority_experiment_ids), - Plot(plotfuncs.plot_heat_map_priority_duration, - '1m Network Requests Heatmap With Airmass Optimization (sched|unsched)', - effective_priority_experiment_ids), + # General Use + Plot(plotfuncs.plot_subplots_input_duration, + '1m Network Scheduled/Unscheduled Requests Length Distribution', + 'no-airmass-w-duration-no-scaling'), + Plot(plotfuncs.plot_duration_by_window_duration_scatter, + '1m Network Scatterplot of Duration and Window Duration', + 'window-duration'), + + # Airmass Experiment + Plot(plotfuncs.plot_airmass_difference_histogram, + '1m Network Airmass Difference Distribution for Scheduled Requests', + airmass_experiment_ids), + Plot(plotfuncs.plot_pct_scheduled_airmass_binned_priority, + '1m Network Airmass Experiment Percent of Requests Scheduled per Priority Class', + airmass_experiment_ids), + Plot(plotfuncs.plot_pct_scheduled_airmass_lineplot, + '1m Network Airmass Experiment Percent of Requests Scheduled per Priority Class', + airmass_experiment_ids), + Plot(plotfuncs.plot_pct_time_scheduled_airmass_binned_priority, + '1m Network Airmass Experiment Percent of Requested Time Scheduled per Priority Class', + airmass_experiment_ids), + Plot(plotfuncs.plot_pct_time_scheduled_airmass_lineplot, + '1m Network Airmass Experiment Percent of Requested Time Scheduled per Priority Class', + airmass_experiment_ids), + Plot(plotfuncs.plot_pct_time_scheduled_out_of_available, + '1m Network Airmass Experiment Percent of Requested Time Scheduled out of Available Time', + airmass_experiment_ids), + Plot(plotfuncs.plot_midpoint_airmass_histograms, + '1m Network Airmass Experiment Midpoint Airmass Distributions', + airmass_experiment_ids), + + # Effective Priority Experiment + Plot(plotfuncs.plot_eff_priority_duration_scatter, + '1m Network Scatterplot of Effective Priority and Duration', + ['airmass-0.1-w-duration-no-scaling', 'airmass-0.1-w-duration-w-scaling']), + Plot(plotfuncs.plot_percent_sched_requests_bin_by_priority, + '1m Network Scheduler Metrics Binned by Priority', + effective_priority_experiment_ids), + Plot(plotfuncs.plot_sched_priority_duration_dotplot, + '1m Distribution of Priority and Duration With Airmass Optimization', + effective_priority_experiment_ids), + Plot(plotfuncs.plot_heat_map_priority_duration, + '1m Network Requests Heatmap With Airmass Optimization (sched|unsched)', + effective_priority_experiment_ids), ] From 7abc28de5b618c3dc6aebc59a8262f4753b86bf6 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Wed, 10 Aug 2022 12:20:18 -0700 Subject: [PATCH 159/165] modified binning option --- adaptive_scheduler/simulation/metrics.py | 39 +++++++++++++++++++----- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index e0801844..d642b8e8 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -24,7 +24,13 @@ def percent_of(x, y): """Returns x/y as a percentage.""" - return x / y * 100. + try: + return x / y * 100. + except ZeroDivisionError as e: + if y == 0: + return 0 + else: + raise e def percent_diff(x, y): @@ -370,17 +376,36 @@ def airmass_metrics(self, schedule=None): def binned_tac_priority_metrics(self): """Bins metrics based on TAC priority. Priority bins should be changed to match the data.""" bin_size = 5 + bin_range = (10, 30) sched_durations, unsched_durations = self.get_duration_data() all_durations = sched_durations + unsched_durations sched_priorities, unsched_priorities = self.get_priority_data() all_priorities = sched_priorities + unsched_priorities - sched_histogram = bin_data(sched_priorities, bin_size=bin_size, fill=None) - bin_sched_durations = bin_data(sched_priorities, sched_durations, bin_size, fill=None, aggregator=sum) - full_histogram = bin_data(all_priorities, bin_size=bin_size, fill=None) - bin_all_durations = bin_data(all_priorities, all_durations, bin_size, fill=None, aggregator=sum) - bin_percent_count = {bin_: percent_of(sched_histogram[bin_], full_histogram[bin_]) + sched_histogram = bin_data(sched_priorities, bin_size=bin_size, bin_range=bin_range, fill=None) + bin_sched_durations = bin_data(sched_priorities, sched_durations, + bin_size, bin_range, fill=None, aggregator=sum) + combined_histogram = bin_data(all_priorities, bin_size=bin_size, bin_range=bin_range, fill=None) + bin_all_durations = bin_data(all_priorities, all_durations, + bin_size, bin_range, fill=None, aggregator=sum) + # capture the upper range with one large bin (e.g. priority 31&up) and merge with the other binned dict + # this is a workaround to make nonuniform bins, since the binning function is intended for uniform bins + # comment this block to just schedule within the lower range + max_prio = max(all_priorities) + lower = bin_range[-1] + 1 # assumes discrete values, but should be modified for float values + upper_sched_histogram = bin_data(sched_priorities, bin_size=max_prio, bin_range=(lower, max_prio), fill=None) + upper_sched_durations = bin_data(sched_priorities, sched_durations, bin_size=max_prio, + bin_range=(lower, max_prio), fill=None, aggregator=sum) + upper_combined_histogram = bin_data(all_priorities, bin_size=max_prio, bin_range=(lower, max_prio), fill=None) + upper_all_durations = bin_data(all_priorities, all_durations, bin_size=max_prio, + bin_range=(lower, max_prio), fill=None, aggregator=sum) + sched_histogram = sched_histogram | upper_sched_histogram + bin_sched_durations = bin_sched_durations | upper_sched_durations + combined_histogram = combined_histogram | upper_combined_histogram + bin_all_durations = bin_all_durations | upper_all_durations + + bin_percent_count = {bin_: percent_of(sched_histogram[bin_], combined_histogram[bin_]) for bin_ in sched_histogram} bin_percent_time = {bin_: percent_of(bin_sched_durations[bin_], bin_all_durations[bin_]) for bin_ in bin_sched_durations} @@ -388,7 +413,7 @@ def binned_tac_priority_metrics(self): output_dict = { 'sched_histogram': sched_histogram, 'sched_durations': bin_sched_durations, - 'full_histogram': full_histogram, + 'full_histogram': combined_histogram, 'all_durations': bin_all_durations, 'percent_count': bin_percent_count, 'percent_time': bin_percent_time From 020a1f302b993970ae1752fc57718d8137e768af Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Wed, 10 Aug 2022 12:33:15 -0700 Subject: [PATCH 160/165] changed comment for clarity --- adaptive_scheduler/simulation/metrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index d642b8e8..ce2c0267 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -391,7 +391,7 @@ def binned_tac_priority_metrics(self): bin_size, bin_range, fill=None, aggregator=sum) # capture the upper range with one large bin (e.g. priority 31&up) and merge with the other binned dict # this is a workaround to make nonuniform bins, since the binning function is intended for uniform bins - # comment this block to just schedule within the lower range + # comment this block to just bin within bin_range max_prio = max(all_priorities) lower = bin_range[-1] + 1 # assumes discrete values, but should be modified for float values upper_sched_histogram = bin_data(sched_priorities, bin_size=max_prio, bin_range=(lower, max_prio), fill=None) From 02777848c10b4531526876fcd5ac8161faf0179d Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Thu, 11 Aug 2022 16:29:32 -0700 Subject: [PATCH 161/165] is bad --- adaptive_scheduler/simulation/plotutils.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/adaptive_scheduler/simulation/plotutils.py b/adaptive_scheduler/simulation/plotutils.py index b0e7cd1f..40526cd9 100644 --- a/adaptive_scheduler/simulation/plotutils.py +++ b/adaptive_scheduler/simulation/plotutils.py @@ -65,10 +65,6 @@ def run_user_interface(plots): parser.add_argument('-f', '--format', help='the file format to save as', default='jpg') parser.add_argument('-o', '--outputdir', help='the output directory to save to', default=DEFAULT_DIR) args = parser.parse_args() - global export_dir - global export_format - export_dir = args.outputdir - export_format = args.format plot_dict = {plot.name: plot for plot in plots} plot_names = list(plot_dict.keys()) @@ -77,6 +73,8 @@ def run_user_interface(plots): print(f'\n{"Name":{spacing}}Description') print(f'{"====":{spacing}}===========') for plot in plots: + plot.export_dir = args.outputdir + plot.export_format = args.format print(f'{plot.name:{spacing}}{plot.description}') completer = AutoCompleter(plot_names) @@ -143,10 +141,10 @@ def generate(self): def save(self): timestamp = datetime.utcnow().isoformat(timespec='seconds') savename = f'{self.name}_{timestamp}' - export_to_image(savename, self.fig) + export_to_image(savename, self.fig, self.export_dir, self.export_format) -def export_to_image(fname, fig): +def export_to_image(fname, fig, export_dir=DEFAULT_DIR, export_format='jpg'): """Takes a matplotlib Figure object and saves the figure. If the output directory doesn't already exist, creates one for the user. @@ -154,8 +152,6 @@ def export_to_image(fname, fig): fname (str): The filename to save the file as. fig (matplotlib.pyplot.Figure): The figure to save. """ - global export_dir - global export_format try: os.mkdir(export_dir) print(f'Directory "{export_dir}" created') From 20ead84ddf9a3c0413ac8fdf27397329ae1d0f43 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Thu, 11 Aug 2022 16:57:34 -0700 Subject: [PATCH 162/165] cache by request in redis instead of one big dict --- adaptive_scheduler/simulation/metrics.py | 51 ++++++++++-------------- 1 file changed, 22 insertions(+), 29 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index ce2c0267..1bd13b35 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -234,11 +234,6 @@ def get_priority_data(self): priorities_by_rg_id = {rg.id: rg.proposal.tac_priority for rg in self.request_groups} sched_priorities = [priorities_by_rg_id[rg_id] for rg_id in sched_rg_ids] unsched_priorities = [priorities_by_rg_id[rg_id] for rg_id in unsched_rg_ids] - # uncomment to remap the priorities - # note: adjust bin size accordingly - # scale = (100, 10, 30, 10) - # sched_priorities = [scalefunc(p, *scale) for p in sched_priorities] - # unsched_priorities = [scalefunc(p, *scale) for p in unsched_priorities] return sched_priorities, unsched_priorities def get_window_duration_data(self): @@ -284,20 +279,16 @@ def _get_airmass_data_for_request(self, request_id): airmass_data (dict): The airmass data returned from the API or the cache. """ airmass_url = f'{self.observation_portal_interface.obs_portal_url}/api/requests/{request_id}/airmass/' - try: - cached_airmass_data = pickle.loads(redis_instance.get('airmass_data_by_request_id')) - cached_airmass_data[request_id] - self.airmass_data_by_request_id[request_id] = cached_airmass_data[request_id] - return cached_airmass_data[request_id] - except Exception: - # the request has not been cached yet, get the data from the portal - pass + cached_airmass_data = redis_instance.get(f'airmass_data_{request_id}') + if cached_airmass_data: + self.airmass_data_by_request_id[request_id] = pickle.loads(cached_airmass_data) + return pickle.loads(cached_airmass_data) try: response = requests.get(airmass_url, headers=self.observation_portal_interface.headers, timeout=180) response.raise_for_status() airmass_data_for_request = response.json()['airmass_data'] self.airmass_data_by_request_id[request_id] = airmass_data_for_request - redis_instance.set('airmass_data_by_request_id', pickle.dumps(dict(self.airmass_data_by_request_id))) + redis_instance.set(f'airmass_data_{request_id}', pickle.dumps(airmass_data_for_request)) return airmass_data_for_request except (RequestException, ValueError, Timeout) as e: raise ObservationPortalConnectionError("get_airmass_data failed: {}".format(repr(e))) @@ -377,6 +368,7 @@ def binned_tac_priority_metrics(self): """Bins metrics based on TAC priority. Priority bins should be changed to match the data.""" bin_size = 5 bin_range = (10, 30) + collect_upper_range = True sched_durations, unsched_durations = self.get_duration_data() all_durations = sched_durations + unsched_durations @@ -389,21 +381,22 @@ def binned_tac_priority_metrics(self): combined_histogram = bin_data(all_priorities, bin_size=bin_size, bin_range=bin_range, fill=None) bin_all_durations = bin_data(all_priorities, all_durations, bin_size, bin_range, fill=None, aggregator=sum) - # capture the upper range with one large bin (e.g. priority 31&up) and merge with the other binned dict - # this is a workaround to make nonuniform bins, since the binning function is intended for uniform bins - # comment this block to just bin within bin_range - max_prio = max(all_priorities) - lower = bin_range[-1] + 1 # assumes discrete values, but should be modified for float values - upper_sched_histogram = bin_data(sched_priorities, bin_size=max_prio, bin_range=(lower, max_prio), fill=None) - upper_sched_durations = bin_data(sched_priorities, sched_durations, bin_size=max_prio, - bin_range=(lower, max_prio), fill=None, aggregator=sum) - upper_combined_histogram = bin_data(all_priorities, bin_size=max_prio, bin_range=(lower, max_prio), fill=None) - upper_all_durations = bin_data(all_priorities, all_durations, bin_size=max_prio, - bin_range=(lower, max_prio), fill=None, aggregator=sum) - sched_histogram = sched_histogram | upper_sched_histogram - bin_sched_durations = bin_sched_durations | upper_sched_durations - combined_histogram = combined_histogram | upper_combined_histogram - bin_all_durations = bin_all_durations | upper_all_durations + + # collects things above maximum bin range into one large bin + # e.g. bin 10-19, 20-29, 30, 31&up + if collect_upper_range: + max_prio = max(all_priorities) + lower = bin_range[-1] + 1 # assumes discrete priority values + upper_sched_histogram = bin_data(sched_priorities, bin_size=max_prio, bin_range=(lower, max_prio), fill=None) + upper_sched_durations = bin_data(sched_priorities, sched_durations, bin_size=max_prio, + bin_range=(lower, max_prio), fill=None, aggregator=sum) + upper_combined_histogram = bin_data(all_priorities, bin_size=max_prio, bin_range=(lower, max_prio), fill=None) + upper_all_durations = bin_data(all_priorities, all_durations, bin_size=max_prio, + bin_range=(lower, max_prio), fill=None, aggregator=sum) + sched_histogram = sched_histogram | upper_sched_histogram + bin_sched_durations = bin_sched_durations | upper_sched_durations + combined_histogram = combined_histogram | upper_combined_histogram + bin_all_durations = bin_all_durations | upper_all_durations bin_percent_count = {bin_: percent_of(sched_histogram[bin_], combined_histogram[bin_]) for bin_ in sched_histogram} From 86e9e9c6f622ccee2b62c76fdee8e3d6bc9a918e Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Fri, 12 Aug 2022 09:47:37 -0700 Subject: [PATCH 163/165] pep8 fixes and import renaming for clarity --- adaptive_scheduler/simulation/metrics.py | 16 ++++----- adaptive_scheduler/simulation/plotfuncs.py | 38 +++++++++++----------- adaptive_scheduler/simulation/plotutils.py | 12 +++---- 3 files changed, 33 insertions(+), 33 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index 1bd13b35..81c11a8c 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -7,7 +7,7 @@ from datetime import datetime, timedelta from collections import defaultdict -import numpy as np +import numpy import requests import rise_set from requests.exceptions import RequestException, Timeout @@ -50,9 +50,9 @@ def generate_bin_names(bin_size, bin_range): """Creates labels for the bins.""" start, end = bin_range bin_names = [] - bin_start = np.arange(start, end+1, bin_size) + bin_start = numpy.arange(start, end+1, bin_size) for start_num in bin_start: - if np.issubdtype(bin_start.dtype, np.integer): + if numpy.issubdtype(bin_start.dtype, numpy.integer): end_num = start_num + bin_size - 1 end_num = end_num if end_num < end else end else: @@ -299,7 +299,7 @@ def _get_minmax_airmass(self, airmass_data, midpoint_duration): min_airmass = 1000 for site in airmass_data.values(): _, airmasses = site.values() - airmasses = np.array(airmasses) + airmasses = numpy.array(airmasses) min_airmass = min(min(airmasses), min_airmass) max_airmass = max(max(airmasses), max_airmass) return min_airmass, max_airmass @@ -321,9 +321,9 @@ def _get_midpoint_airmasses_by_site(self, airmass_data, midpoint_time): for site, details in airmass_data.items(): details = list(details.values()) times, airmasses = details - airmasses = np.array(airmasses) - times = np.array([datetime.strptime(time, DTFORMAT) for time in times]) - midpoint_airmasses[site] = airmasses[np.argmin(np.abs(times-midpoint_time))] + airmasses = numpy.array(airmasses) + times = numpy.array([datetime.strptime(time, DTFORMAT) for time in times]) + midpoint_airmasses[site] = airmasses[numpy.argmin(numpy.abs(times-midpoint_time))] return midpoint_airmasses def airmass_metrics(self, schedule=None): @@ -438,4 +438,4 @@ def avg_slew_distance(self): except IndexError: break - return np.mean(slew_distances) + return numpy.mean(slew_distances) diff --git a/adaptive_scheduler/simulation/plotfuncs.py b/adaptive_scheduler/simulation/plotfuncs.py index dbf8ac82..fde6e59f 100644 --- a/adaptive_scheduler/simulation/plotfuncs.py +++ b/adaptive_scheduler/simulation/plotfuncs.py @@ -4,15 +4,15 @@ """ import matplotlib import numpy as np -import matplotlib.pyplot as plt -import matplotlib.style as style +import matplotlib.pyplot as pyplot +import matplotlib.style import adaptive_scheduler.simulation.plotutils as plotutils from adaptive_scheduler.simulation.metrics import bin_data import adaptive_scheduler.simulation.metrics as metrics # change default parameters for matplotlib here -style.use('tableau-colorblind10') +matplotlib.style.use('tableau-colorblind10') matplotlib.rcParams['figure.figsize'] = (20, 10) matplotlib.rcParams['figure.titlesize'] = 20 matplotlib.rcParams['axes.titlesize'] = 14 @@ -38,7 +38,7 @@ def plot_airmass_difference_histogram(airmass_datasets, plot_title, normalize=Fa Returns: fig (matplotlib.pyplot.Figure): The output figure object. """ - fig, ax = plt.subplots() + fig, ax = pyplot.subplots() fig.suptitle(plot_title) numbins = 10 @@ -82,7 +82,7 @@ def plot_pct_scheduled_airmass_binned_priority(airmass_datasets, plot_title): Returns: fig (matplotlib.pyplot.Figure): The output figure object. """ - fig, ax = plt.subplots() + fig, ax = pyplot.subplots() fig.suptitle(plot_title) bardata = [] @@ -117,7 +117,7 @@ def plot_percent_sched_requests_bin_by_priority(eff_pri_datasets, plot_title): Returns: fig (matplotlib.pyplot.Figure): The output figure object. """ - fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(25, 12)) + fig, (ax1, ax2) = pyplot.subplots(1, 2, figsize=(25, 12)) fig.suptitle(plot_title) fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) bardata1 = [] @@ -161,7 +161,7 @@ def rand_jitter(arr): markers = ['o', ',', 'v', '^', '<', '>'] colors = ['r', 'b', 'c', 'm', 'y', 'k'] - fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(28, 12)) + fig, (ax1, ax2) = pyplot.subplots(1, 2, figsize=(28, 12)) fig.suptitle(plot_title) fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) labels = ['with duration', 'no duration', 'with duration scaled 100', 'no duration scaled 100'] @@ -206,7 +206,7 @@ def plot_heat_map_priority_duration(eff_pri_datasets, plot_title): Returns: fig (matplotlib.pyplot.Figure): The output figure object. """ - fig, axs = plt.subplots(2, 2, figsize=(13, 12)) + fig, axs = pyplot.subplots(2, 2, figsize=(13, 12)) fig.suptitle(plot_title) fig.subplots_adjust(wspace=0.01, hspace=0.01, top=0.9) ax_list = [axs[0, 0], axs[0, 1], axs[1, 0], axs[1, 1]] @@ -242,14 +242,14 @@ def plot_heat_map_priority_duration(eff_pri_datasets, plot_title): heat_map_elements = np.array(heat_map_elements) heat_map_elements_unsched = np.array(heat_map_elements_unsched) axis = ax_list[i] - cmap = plt.get_cmap('coolwarm') - cmap2 = plt.get_cmap('gray') + cmap = pyplot.get_cmap('coolwarm') + cmap2 = pyplot.get_cmap('gray') axis.imshow(heat_map_elements, cmap=cmap) axis.set_ylabel('Priority') axis.set_xlabel('Duration (minutes)') axis.set_xticks(np.arange(len(duration_bins)), labels=duration_bins) axis.set_yticks(np.arange(len(priority_bins)), labels=priority_bins) - plt.setp(axis.get_xticklabels(), rotation=45, ha="right", + pyplot.setp(axis.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor") for j in range(len(priority_bins)): for k in range(len(duration_bins)): @@ -275,7 +275,7 @@ def plot_pct_time_scheduled_airmass_binned_priority(airmass_datasets, plot_title Returns: fig (matplotlib.pyplot.Figure): The output figure object. """ - fig, ax = plt.subplots() + fig, ax = pyplot.subplots() fig.suptitle(plot_title) bardata = [] @@ -311,7 +311,7 @@ def plot_pct_scheduled_airmass_lineplot(airmass_datasets, plot_title): Returns: fig (matplotlib.pyplot.Figure): The output figure object. """ - fig, ax = plt.subplots() + fig, ax = pyplot.subplots() fig.suptitle(plot_title) prio_names = list(airmass_datasets[0]['percent_sched_by_priority'][0].keys()) @@ -349,7 +349,7 @@ def plot_pct_time_scheduled_airmass_lineplot(airmass_datasets, plot_title): Returns: fig (matplotlib.pyplot.Figure): The output figure object. """ - fig, ax = plt.subplots() + fig, ax = pyplot.subplots() fig.suptitle(plot_title) prio_names = list(airmass_datasets[0]['percent_duration_by_priority'][0].keys()) @@ -400,7 +400,7 @@ def plot_pct_time_scheduled_out_of_available(airmass_datasets, plot_title): Returns: fig (matplotlib.pyplot.Figure): The output figure object. """ - fig, ax = plt.subplots() + fig, ax = pyplot.subplots() fig.suptitle(plot_title) prio_names = list(airmass_datasets[0]['percent_duration_by_priority'][0].keys()) @@ -440,7 +440,7 @@ def plot_midpoint_airmass_histograms(airmass_datasets, plot_title): Returns: fig (matplotlib.pyplot.Figure): The output figure object. """ - fig = plt.figure(figsize=(16, 16)) + fig = pyplot.figure(figsize=(16, 16)) fig.suptitle(plot_title) fig.subplots_adjust(wspace=0.3, hspace=0.3, top=0.92) for i, dataset in enumerate(airmass_datasets): @@ -470,7 +470,7 @@ def plot_eff_priority_duration_scatter(datasets, plot_title): Returns: fig (matplotlib.pyplot.Figure): The output figure object. """ - fig, axs = plt.subplots(1, 2, figsize=(24, 8)) + fig, axs = pyplot.subplots(1, 2, figsize=(24, 8)) fig.suptitle(plot_title) labels = ['Priority 10-30', 'Priority 10-100'] # colors are from tableau-colorblind10 @@ -503,7 +503,7 @@ def plot_duration_by_window_duration_scatter(data, plot_title): Returns: fig (matplotlib.pyploy.Figure): The output Figure object. """ - fig, ax = plt.subplots() + fig, ax = pyplot.subplots() fig.suptitle(plot_title) sec_to_min = 1/60 window_dur = np.array(data['raw_window_durations']) * sec_to_min @@ -525,7 +525,7 @@ def plot_subplots_input_duration(data, plot_title): Returns: fig (matplotlib.pyploy.Figure): The output Figure object. """ - fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(20, 10)) + fig, (ax1, ax2, ax3) = pyplot.subplots(1, 3, figsize=(20, 10)) fig.suptitle(plot_title) sched_durations = data['raw_scheduled_durations'] sched_durations = [d/60 for d in sched_durations] diff --git a/adaptive_scheduler/simulation/plotutils.py b/adaptive_scheduler/simulation/plotutils.py index 40526cd9..53bc2d31 100644 --- a/adaptive_scheduler/simulation/plotutils.py +++ b/adaptive_scheduler/simulation/plotutils.py @@ -9,8 +9,8 @@ from copy import deepcopy from datetime import datetime -import numpy as np -import matplotlib.pyplot as plt +import numpy +import matplotlib.pyplot as pyplot import opensearchpy from opensearchpy import OpenSearch @@ -87,17 +87,17 @@ def run_user_interface(plots): plot.generate() if args.save: plot.save() - plt.show() + pyplot.show() break else: try: plot = plot_dict[showplot] - plt.close('all') + pyplot.close('all') plot.generate() if args.save: plot.save() plot.fig.show() - plt.show() + pyplot.show() break except KeyError as e: print(f'Plot name not found: {e}') @@ -172,7 +172,7 @@ def plot_multi_barplot(ax, data, labels, binnames, barwidth=0.04): binnames: A list of names of the bins for marking the x-axis. barwidth (float): The width of each bar. """ - ticks = np.arange(len(data[0])) + ticks = numpy.arange(len(data[0])) for i, datavalues in enumerate(data): ax.bar(ticks+barwidth*i, datavalues, barwidth, label=labels[i]) ax.set_xticks(ticks+barwidth*i/2, binnames) From dbae2930cb1749da76ed1f9960fbab55213a5138 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Fri, 12 Aug 2022 09:47:37 -0700 Subject: [PATCH 164/165] forgot to save the file haha --- adaptive_scheduler/simulation/metrics.py | 16 ++++---- adaptive_scheduler/simulation/plotfuncs.py | 46 +++++++++++----------- adaptive_scheduler/simulation/plotutils.py | 12 +++--- 3 files changed, 37 insertions(+), 37 deletions(-) diff --git a/adaptive_scheduler/simulation/metrics.py b/adaptive_scheduler/simulation/metrics.py index 1bd13b35..81c11a8c 100644 --- a/adaptive_scheduler/simulation/metrics.py +++ b/adaptive_scheduler/simulation/metrics.py @@ -7,7 +7,7 @@ from datetime import datetime, timedelta from collections import defaultdict -import numpy as np +import numpy import requests import rise_set from requests.exceptions import RequestException, Timeout @@ -50,9 +50,9 @@ def generate_bin_names(bin_size, bin_range): """Creates labels for the bins.""" start, end = bin_range bin_names = [] - bin_start = np.arange(start, end+1, bin_size) + bin_start = numpy.arange(start, end+1, bin_size) for start_num in bin_start: - if np.issubdtype(bin_start.dtype, np.integer): + if numpy.issubdtype(bin_start.dtype, numpy.integer): end_num = start_num + bin_size - 1 end_num = end_num if end_num < end else end else: @@ -299,7 +299,7 @@ def _get_minmax_airmass(self, airmass_data, midpoint_duration): min_airmass = 1000 for site in airmass_data.values(): _, airmasses = site.values() - airmasses = np.array(airmasses) + airmasses = numpy.array(airmasses) min_airmass = min(min(airmasses), min_airmass) max_airmass = max(max(airmasses), max_airmass) return min_airmass, max_airmass @@ -321,9 +321,9 @@ def _get_midpoint_airmasses_by_site(self, airmass_data, midpoint_time): for site, details in airmass_data.items(): details = list(details.values()) times, airmasses = details - airmasses = np.array(airmasses) - times = np.array([datetime.strptime(time, DTFORMAT) for time in times]) - midpoint_airmasses[site] = airmasses[np.argmin(np.abs(times-midpoint_time))] + airmasses = numpy.array(airmasses) + times = numpy.array([datetime.strptime(time, DTFORMAT) for time in times]) + midpoint_airmasses[site] = airmasses[numpy.argmin(numpy.abs(times-midpoint_time))] return midpoint_airmasses def airmass_metrics(self, schedule=None): @@ -438,4 +438,4 @@ def avg_slew_distance(self): except IndexError: break - return np.mean(slew_distances) + return numpy.mean(slew_distances) diff --git a/adaptive_scheduler/simulation/plotfuncs.py b/adaptive_scheduler/simulation/plotfuncs.py index dbf8ac82..edec1d52 100644 --- a/adaptive_scheduler/simulation/plotfuncs.py +++ b/adaptive_scheduler/simulation/plotfuncs.py @@ -4,15 +4,15 @@ """ import matplotlib import numpy as np -import matplotlib.pyplot as plt -import matplotlib.style as style +import matplotlib.pyplot as pyplot +import matplotlib.style import adaptive_scheduler.simulation.plotutils as plotutils from adaptive_scheduler.simulation.metrics import bin_data import adaptive_scheduler.simulation.metrics as metrics # change default parameters for matplotlib here -style.use('tableau-colorblind10') +matplotlib.style.use('tableau-colorblind10') matplotlib.rcParams['figure.figsize'] = (20, 10) matplotlib.rcParams['figure.titlesize'] = 20 matplotlib.rcParams['axes.titlesize'] = 14 @@ -38,7 +38,7 @@ def plot_airmass_difference_histogram(airmass_datasets, plot_title, normalize=Fa Returns: fig (matplotlib.pyplot.Figure): The output figure object. """ - fig, ax = plt.subplots() + fig, ax = pyplot.subplots() fig.suptitle(plot_title) numbins = 10 @@ -82,7 +82,7 @@ def plot_pct_scheduled_airmass_binned_priority(airmass_datasets, plot_title): Returns: fig (matplotlib.pyplot.Figure): The output figure object. """ - fig, ax = plt.subplots() + fig, ax = pyplot.subplots() fig.suptitle(plot_title) bardata = [] @@ -117,7 +117,7 @@ def plot_percent_sched_requests_bin_by_priority(eff_pri_datasets, plot_title): Returns: fig (matplotlib.pyplot.Figure): The output figure object. """ - fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(25, 12)) + fig, (ax1, ax2) = pyplot.subplots(1, 2, figsize=(25, 12)) fig.suptitle(plot_title) fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) bardata1 = [] @@ -161,7 +161,7 @@ def rand_jitter(arr): markers = ['o', ',', 'v', '^', '<', '>'] colors = ['r', 'b', 'c', 'm', 'y', 'k'] - fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(28, 12)) + fig, (ax1, ax2) = pyplot.subplots(1, 2, figsize=(28, 12)) fig.suptitle(plot_title) fig.subplots_adjust(wspace=0.2, hspace=0.2, top=0.9) labels = ['with duration', 'no duration', 'with duration scaled 100', 'no duration scaled 100'] @@ -170,7 +170,7 @@ def rand_jitter(arr): # un-scale the priorities if id in ['airmass-0.1-w-duration-w-scaling', 'airmass-0.1-no-duration-w-scaling']: data['raw_scheduled_priorities'] = [(p+35)/4.5 for p in data['raw_scheduled_priorities']] - data['raw_scheduled_durations'] = [d/60 for d in data['raw_scheduled_durations']] + data['raw_scheduled_durations'] = [d/60 for d in data['raw_scheduled_durations']] ax1.scatter(rand_jitter(data['raw_scheduled_priorities']), rand_jitter(data['raw_scheduled_durations']), marker=markers[i], c=colors[i], s=10, label=labels[i], alpha=0.3) ax1.set_ylim(top=100) @@ -206,7 +206,7 @@ def plot_heat_map_priority_duration(eff_pri_datasets, plot_title): Returns: fig (matplotlib.pyplot.Figure): The output figure object. """ - fig, axs = plt.subplots(2, 2, figsize=(13, 12)) + fig, axs = pyplot.subplots(2, 2, figsize=(13, 12)) fig.suptitle(plot_title) fig.subplots_adjust(wspace=0.01, hspace=0.01, top=0.9) ax_list = [axs[0, 0], axs[0, 1], axs[1, 0], axs[1, 1]] @@ -242,15 +242,15 @@ def plot_heat_map_priority_duration(eff_pri_datasets, plot_title): heat_map_elements = np.array(heat_map_elements) heat_map_elements_unsched = np.array(heat_map_elements_unsched) axis = ax_list[i] - cmap = plt.get_cmap('coolwarm') - cmap2 = plt.get_cmap('gray') + cmap = pyplot.get_cmap('coolwarm') + cmap2 = pyplot.get_cmap('gray') axis.imshow(heat_map_elements, cmap=cmap) axis.set_ylabel('Priority') axis.set_xlabel('Duration (minutes)') axis.set_xticks(np.arange(len(duration_bins)), labels=duration_bins) axis.set_yticks(np.arange(len(priority_bins)), labels=priority_bins) - plt.setp(axis.get_xticklabels(), rotation=45, ha="right", - rotation_mode="anchor") + pyplot.setp(axis.get_xticklabels(), rotation=45, ha="right", + rotation_mode="anchor") for j in range(len(priority_bins)): for k in range(len(duration_bins)): value = heat_map_elements[j, k] @@ -275,7 +275,7 @@ def plot_pct_time_scheduled_airmass_binned_priority(airmass_datasets, plot_title Returns: fig (matplotlib.pyplot.Figure): The output figure object. """ - fig, ax = plt.subplots() + fig, ax = pyplot.subplots() fig.suptitle(plot_title) bardata = [] @@ -311,7 +311,7 @@ def plot_pct_scheduled_airmass_lineplot(airmass_datasets, plot_title): Returns: fig (matplotlib.pyplot.Figure): The output figure object. """ - fig, ax = plt.subplots() + fig, ax = pyplot.subplots() fig.suptitle(plot_title) prio_names = list(airmass_datasets[0]['percent_sched_by_priority'][0].keys()) @@ -349,7 +349,7 @@ def plot_pct_time_scheduled_airmass_lineplot(airmass_datasets, plot_title): Returns: fig (matplotlib.pyplot.Figure): The output figure object. """ - fig, ax = plt.subplots() + fig, ax = pyplot.subplots() fig.suptitle(plot_title) prio_names = list(airmass_datasets[0]['percent_duration_by_priority'][0].keys()) @@ -400,7 +400,7 @@ def plot_pct_time_scheduled_out_of_available(airmass_datasets, plot_title): Returns: fig (matplotlib.pyplot.Figure): The output figure object. """ - fig, ax = plt.subplots() + fig, ax = pyplot.subplots() fig.suptitle(plot_title) prio_names = list(airmass_datasets[0]['percent_duration_by_priority'][0].keys()) @@ -440,7 +440,7 @@ def plot_midpoint_airmass_histograms(airmass_datasets, plot_title): Returns: fig (matplotlib.pyplot.Figure): The output figure object. """ - fig = plt.figure(figsize=(16, 16)) + fig = pyplot.figure(figsize=(16, 16)) fig.suptitle(plot_title) fig.subplots_adjust(wspace=0.3, hspace=0.3, top=0.92) for i, dataset in enumerate(airmass_datasets): @@ -470,7 +470,7 @@ def plot_eff_priority_duration_scatter(datasets, plot_title): Returns: fig (matplotlib.pyplot.Figure): The output figure object. """ - fig, axs = plt.subplots(1, 2, figsize=(24, 8)) + fig, axs = pyplot.subplots(1, 2, figsize=(24, 8)) fig.suptitle(plot_title) labels = ['Priority 10-30', 'Priority 10-100'] # colors are from tableau-colorblind10 @@ -503,7 +503,7 @@ def plot_duration_by_window_duration_scatter(data, plot_title): Returns: fig (matplotlib.pyploy.Figure): The output Figure object. """ - fig, ax = plt.subplots() + fig, ax = pyplot.subplots() fig.suptitle(plot_title) sec_to_min = 1/60 window_dur = np.array(data['raw_window_durations']) * sec_to_min @@ -525,7 +525,7 @@ def plot_subplots_input_duration(data, plot_title): Returns: fig (matplotlib.pyploy.Figure): The output Figure object. """ - fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(20, 10)) + fig, (ax1, ax2, ax3) = pyplot.subplots(1, 3, figsize=(20, 10)) fig.suptitle(plot_title) sched_durations = data['raw_scheduled_durations'] sched_durations = [d/60 for d in sched_durations] @@ -533,8 +533,8 @@ def plot_subplots_input_duration(data, plot_title): unsched_durations = [d/60 for d in unsched_durations] sched_priorities = data['raw_scheduled_priorities'] unsched_priorities = data['raw_unscheduled_priorities'] - sched_bins = metrics.bin_data(sched_priorities, sched_durations, bin_size=10, bin_range=(10,30),aggregator=None) - unsched_bins = metrics.bin_data(unsched_priorities, unsched_durations, bin_size=10, bin_range=(10,30),aggregator=None) + sched_bins = metrics.bin_data(sched_priorities, sched_durations, bin_size=10, bin_range=(10, 30), aggregator=None) + unsched_bins = metrics.bin_data(unsched_priorities, unsched_durations, bin_size=10, bin_range=(10, 30), aggregator=None) totals_by_priorities = list(data['total_req_by_priority'][0].values()) labels = ['10-19', '20-29', '30'] axis = [ax1, ax2, ax3] diff --git a/adaptive_scheduler/simulation/plotutils.py b/adaptive_scheduler/simulation/plotutils.py index 40526cd9..53bc2d31 100644 --- a/adaptive_scheduler/simulation/plotutils.py +++ b/adaptive_scheduler/simulation/plotutils.py @@ -9,8 +9,8 @@ from copy import deepcopy from datetime import datetime -import numpy as np -import matplotlib.pyplot as plt +import numpy +import matplotlib.pyplot as pyplot import opensearchpy from opensearchpy import OpenSearch @@ -87,17 +87,17 @@ def run_user_interface(plots): plot.generate() if args.save: plot.save() - plt.show() + pyplot.show() break else: try: plot = plot_dict[showplot] - plt.close('all') + pyplot.close('all') plot.generate() if args.save: plot.save() plot.fig.show() - plt.show() + pyplot.show() break except KeyError as e: print(f'Plot name not found: {e}') @@ -172,7 +172,7 @@ def plot_multi_barplot(ax, data, labels, binnames, barwidth=0.04): binnames: A list of names of the bins for marking the x-axis. barwidth (float): The width of each bar. """ - ticks = np.arange(len(data[0])) + ticks = numpy.arange(len(data[0])) for i, datavalues in enumerate(data): ax.bar(ticks+barwidth*i, datavalues, barwidth, label=labels[i]) ax.set_xticks(ticks+barwidth*i/2, binnames) From 67bba5a1a5cb9a382e3e39e9f100f13871c02621 Mon Sep 17 00:00:00 2001 From: Qingze Wu Date: Fri, 12 Aug 2022 16:59:49 -0700 Subject: [PATCH 165/165] final commit of Michael; adapted input requests plot to fit new test dataset --- adaptive_scheduler/simulation/plotfuncs.py | 19 +++++++++++++------ adaptive_scheduler/simulation/plots.py | 2 +- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/adaptive_scheduler/simulation/plotfuncs.py b/adaptive_scheduler/simulation/plotfuncs.py index edec1d52..99d1a586 100644 --- a/adaptive_scheduler/simulation/plotfuncs.py +++ b/adaptive_scheduler/simulation/plotfuncs.py @@ -525,7 +525,7 @@ def plot_subplots_input_duration(data, plot_title): Returns: fig (matplotlib.pyploy.Figure): The output Figure object. """ - fig, (ax1, ax2, ax3) = pyplot.subplots(1, 3, figsize=(20, 10)) + fig, axs = pyplot.subplots(3,2, figsize=(22, 15)) fig.suptitle(plot_title) sched_durations = data['raw_scheduled_durations'] sched_durations = [d/60 for d in sched_durations] @@ -533,14 +533,21 @@ def plot_subplots_input_duration(data, plot_title): unsched_durations = [d/60 for d in unsched_durations] sched_priorities = data['raw_scheduled_priorities'] unsched_priorities = data['raw_unscheduled_priorities'] - sched_bins = metrics.bin_data(sched_priorities, sched_durations, bin_size=10, bin_range=(10, 30), aggregator=None) - unsched_bins = metrics.bin_data(unsched_priorities, unsched_durations, bin_size=10, bin_range=(10, 30), aggregator=None) + sched_bins = metrics.bin_data(sched_priorities, sched_durations, bin_size=5, bin_range=(10, 30), aggregator=None) | metrics.bin_data(sched_priorities, sched_durations, bin_size=3000, bin_range=(31, 3000),aggregator=None) + unsched_bins = metrics.bin_data(unsched_priorities, unsched_durations, bin_size=5, bin_range=(10, 30), aggregator=None) | metrics.bin_data(unsched_priorities, unsched_durations, bin_size=3000, bin_range=(31, 3000),aggregator=None) + max_duration = 0 + all_durations = sched_durations + unsched_durations + for i in all_durations: + if i > max_duration: + max_duration = i + else: + continue totals_by_priorities = list(data['total_req_by_priority'][0].values()) - labels = ['10-19', '20-29', '30'] - axis = [ax1, ax2, ax3] + labels = ['10-14', '15-20', '20-24', '25-29', '30', '31-2000'] + axis = [axs[0][0], axs[0][1], axs[1][0], axs[1][1], axs[2][0], axs[2][1]] for i, values in enumerate(sched_bins.values()): bars = ['Scheduled', 'Unscheduled'] - axis[i].hist([values, list(unsched_bins.values())[i]], bins=np.arange(0, 70, 2), + axis[i].hist([values, list(unsched_bins.values())[i]], bins=np.arange(0, 100, 2), stacked=True, label=bars) axis[i].set_xlabel('Duration (Minutes)') axis[i].set_ylabel('Input reservation counts') diff --git a/adaptive_scheduler/simulation/plots.py b/adaptive_scheduler/simulation/plots.py index fcb4df1e..5c1098d2 100644 --- a/adaptive_scheduler/simulation/plots.py +++ b/adaptive_scheduler/simulation/plots.py @@ -30,7 +30,7 @@ # General Use Plot(plotfuncs.plot_subplots_input_duration, '1m Network Scheduled/Unscheduled Requests Length Distribution', - 'no-airmass-w-duration-no-scaling'), + 'test-new-dataset'), Plot(plotfuncs.plot_duration_by_window_duration_scatter, '1m Network Scatterplot of Duration and Window Duration', 'window-duration'),