From 9810cff69390e287e6e4b15a2871528e89a90b77 Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Thu, 5 Feb 2026 14:35:32 +0000 Subject: [PATCH 01/55] Move lcoa changes to latest copy from master --- src/tlo/methods/healthsystem.py | 73 ++++++++++++++++++++-- tests/test_healthsystem.py | 105 ++++++++++++++++++++++++++++++++ 2 files changed, 174 insertions(+), 4 deletions(-) diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py index 12a36ca8ba..0eec9016ae 100644 --- a/src/tlo/methods/healthsystem.py +++ b/src/tlo/methods/healthsystem.py @@ -255,12 +255,19 @@ class HealthSystem(Module): "Year in which the assumption for `equip_availability` changes (The change happens on 1st January of that " "year.)", ), + # Service Availability "Service_Availability": Parameter( Types.LIST, "List of services to be available. NB. This parameter is over-ridden if an argument is provided" " to the module initialiser.", ), + "year_service_availability_switch": Parameter(Types.INT, "Year in which service availability changes."), + "service_availability_postSwitch": Parameter( + Types.LIST, + "List of services to be available after the switch in `year_service_availability_switch`.", + ), + "policy_name": Parameter(Types.STRING, "Name of priority policy adopted"), "year_mode_switch": Parameter(Types.INT, "Year in which mode switch is enforced"), "scale_to_effective_capabilities": Parameter( @@ -897,6 +904,12 @@ def initialise_simulation(self, sim): Date(self.parameters["year_use_funded_or_actual_staffing_switch"], 1, 1), ) + # Schedule service availability switch + sim.schedule_event( + HealthSystemChangeParameters(self,parameters_to_change=["service_availability"]), + Date(self.parameters["year_service_availability_switch"], 1, 1), + ) + # Schedule a one-off rescaling of _daily_capabilities broken down by officer type and level. # This occurs on 1st January of the year specified in the parameters. sim.schedule_event( @@ -1250,17 +1263,28 @@ def format_clinic_capabilities(self) -> pd.DataFrame: return capabilities_ex + def _compute_factors_for_effective_capabilities(self): + """Compute factor to rescale capabilities to capture effective capability. + Computation of these factors is split from the actual rescaling to facilitate + capturing them even when running the model in mode 1.""" + self._rescaling_factors = defaultdict(dict) + for clinic, clinic_cl in self._daily_capabilities.items(): + for facID_and_officer in clinic_cl.keys(): + self._rescaling_factors[clinic][facID_and_officer] = self._summary_counter.frac_time_used_by_facID_and_officer( + facID_and_officer=facID_and_officer, clinic=clinic + ) + self._summary_counter._rescaling_factors = self._rescaling_factors + def _rescale_capabilities_to_capture_effective_capability(self): # Notice that capabilities will only be expanded through this process # (i.e. won't reduce available capabilities if these were under-used in the last year). # Note: Currently relying on module variable rather than parameter for # scale_to_effective_capabilities, in order to facilitate testing. However # this may eventually come into conflict with the Switcher functions. + self._compute_factors_for_effective_capabilities() for clinic, clinic_cl in self._daily_capabilities.items(): for facID_and_officer in clinic_cl.keys(): - rescaling_factor = self._summary_counter.frac_time_used_by_facID_and_officer( - facID_and_officer=facID_and_officer, clinic=clinic - ) + rescaling_factor = self._rescaling_factors[clinic][facID_and_officer] if rescaling_factor > 1 and rescaling_factor != float("inf"): self._daily_capabilities[clinic][facID_and_officer] *= rescaling_factor @@ -1564,6 +1588,13 @@ def _add_hsi_event_queue_item_to_hsi_event_queue( # Create HSIEventQueue Item, including a counter for the number of HSI_Events, to assist with sorting in the # queue (NB. the sorting is done ascending and by the order of the items in the tuple). + + # First check that the service the HSI needs is available. If not, don't add to queue. + # Don't increment the counter; log and return. + if not self.is_treatment_id_allowed(hsi_event.TREATMENT_ID, self.service_availability): + self.call_and_record_never_ran_hsi_event(hsi_event=hsi_event, priority=priority, clinic=clinic) + return + self.hsi_event_queue_counter += 1 if self.randomise_queue: @@ -2088,6 +2119,12 @@ def on_end_of_month(self) -> None: def on_end_of_year(self) -> None: """Write to log the current states of the summary counters and reset them.""" + + # If we are at the end of the year preceeding the service availability switch, + # compute rescaling factors. + if (self.sim.date.year == self.parameters['year_service_availability_switch'] - 1): + self._compute_factors_for_effective_capabilities() + # If we are at the end of the year preceeding the mode switch, and if wanted # to rescale capabilities to capture effective availability as was recorded, on # average, in the past year, do so here. @@ -2174,6 +2211,12 @@ def run_individual_level_events_in_mode_1( if event.expected_time_requests: ok_to_run = self.check_if_all_required_officers_have_nonzero_capabilities( event.expected_time_requests, clinic=clinic) + + # Check here that the treatment id is allowed at this point as service availability might have changed + # since the event was scheduled + if not self.is_treatment_id_allowed(event.TREATMENT_ID, self.service_availability): + ok_to_run = False + if ok_to_run: # Compute the bed days that are allocated to this HSI and provide this information to the HSI if sum(event.BEDDAYS_FOOTPRINT.values()): @@ -2439,6 +2482,11 @@ def process_events_mode_2(self, hold_over: List[HSIEventQueueItem]) -> None: event_clinic = next_event_tuple.clinic_eligibility capabilities_still_available = set_capabilities_still_available[event_clinic] + # Check here that the treatment id is allowed as service availability might have changed + # since the event was scheduled + if not self.module.is_treatment_id_allowed(event.TREATMENT_ID, self.module.service_availability): + self.module.call_and_record_never_ran_hsi_event(hsi_event=event, priority=next_event_tuple.priority) + if self.sim.date > next_event_tuple.tclose: # The event has expired (after tclose) having never been run. Call the 'never_ran' function self.module.call_and_record_never_ran_hsi_event(hsi_event=event, priority=next_event_tuple.priority) @@ -2759,6 +2807,7 @@ def _reset_internal_stores(self) -> None: self._never_ran_appts = defaultdict(int) # As above, but for `HSI_Event`s that have never ran self._never_ran_appts_by_level = {_level: defaultdict(int) for _level in ("0", "1a", "1b", "2", "3", "4")} + self._rescaling_factors = defaultdict(dict) self._frac_time_used_overall = defaultdict(list) # Running record of the usage of the healthcare system self._sum_of_daily_frac_time_used_by_facID_and_officer = defaultdict(Counter) @@ -2852,6 +2901,7 @@ def write_to_log_and_reset_counters(self): "average_Frac_Time_Used_Overall": { clinic: np.mean(values) for clinic, values in self._frac_time_used_overall.items() }, + "rescaling_factor_for_clinics": self._rescaling_factors, # <-- leaving space here for additional summary measures that may be needed in the future. }, ) @@ -2914,7 +2964,7 @@ def __init__(self, module: HealthSystem, parameters_to_change: List): super().__init__(module) assert isinstance(module, HealthSystem) - self.supported_parameters = ["cons_availability", "equip_availability", "use_funded_or_actual_staffing"] + self.supported_parameters = ["cons_availability", "equip_availability", "use_funded_or_actual_staffing", "service_availability"] if not all(param in self.supported_parameters for param in parameters_to_change): raise ValueError( f"parameters_to_change can only contain the following values: {self.supported_parameters}. " @@ -2935,6 +2985,21 @@ def apply(self, population): if "use_funded_or_actual_staffing" in self.parameters_to_change: self.module.use_funded_or_actual_staffing = p["use_funded_or_actual_staffing_postSwitch"] + if "service_availability" in self.parameters_to_change: + self.module.service_availability = p["service_availability_postSwitch"] + ## As part of the switching, clear the queue of any events currently scheduled + ## that might require one of the omitted services when they actually run. + retained_events = [] + while len(self.module.HSI_EVENT_QUEUE) > 0: + next_event_tuple = hp.heappop(self.module.HSI_EVENT_QUEUE) + if self.module.is_treatment_id_allowed(next_event_tuple.hsi_event.TREATMENT_ID, self.module.service_availability): + retained_events.append(next_event_tuple) + else: + self.module.schedule_to_call_never_ran_on_date(hsi_event=next_event_tuple.hsi_event, tdate=next_event_tuple.topen) + + self.module.HSI_EVENT_QUEUE = retained_events + hp.heapify(self.module.HSI_EVENT_QUEUE) + class DynamicRescalingHRCapabilities(RegularEvent, PopulationScopeEventMixin): """This event exists to scale the daily capabilities assumed at fixed time intervals""" diff --git a/tests/test_healthsystem.py b/tests/test_healthsystem.py index c0599c4ffc..eb81881d80 100644 --- a/tests/test_healthsystem.py +++ b/tests/test_healthsystem.py @@ -3025,3 +3025,108 @@ def schedule_hsi_events(ngenericclinic, nclinic1, sim): clinic1_capabilities_before * 2, clinic1_capabilities_after, ), "Expected Clinic1 capabilities to be rescaled by factor of 2" + + + +def test_service_availability_switch(tmpdir, seed): + """Test that the service availability is updated in the year specified. + Simultaneously check that the switch triggers related behaviors: + 1) compute and write to logs rescaling factors + 2) clear hsi event queue of any events scheduled to run after the switch + that need one of the unavailable services. + """ + + class DummyModuleGenericClinic(Module): + METADATA = {Metadata.DISEASE_MODULE, Metadata.USES_HEALTHSYSTEM} + + def read_parameters(self, data_folder): + pass + + def initialise_population(self, population): + pass + + def initialise_simulation(self, sim): + pass + + # Create a dummy HSI event class + class DummyHSIEvent(HSI_Event, IndividualScopeEventMixin): + def __init__(self, module, person_id, appt_type, level, treatment_id): + super().__init__(module, person_id=person_id) + self.TREATMENT_ID = treatment_id + self.EXPECTED_APPT_FOOTPRINT = self.make_appt_footprint({appt_type: 1}) + self.ACCEPTED_FACILITY_LEVEL = level + + def apply(self, person_id, squeeze_factor): + self.this_hsi_event_ran = True + + log_config = { + "filename": "log", + "directory": tmpdir, + "custom_levels": {"tlo.methods.healthsystem": logging.DEBUG}, + } + start_date = Date(2010, 1, 1) + + sim = Simulation(start_date=start_date, seed=0, log_config=log_config, resourcefilepath=resourcefilepath) + + sim.register( + demography.Demography(), + healthsystem.HealthSystem( + capabilities_coefficient=1.0, + mode_appt_constraints=1, + ignore_priority=False, + randomise_queue=True, + policy_name="", + use_funded_or_actual_staffing="funded_plus", + ), + DummyModuleGenericClinic(), + ) + + hs_params = sim.modules["HealthSystem"].parameters + hs_params["Service_Availability"] = ["ThisEventShouldRun", "ThisEventShouldNotRunPostSwitch"] + year_service_availability_switch = 2011 + hs_params["year_service_availability_switch"] = year_service_availability_switch + hs_params["service_availability_postSwitch"] = ["ThisEventShouldRun"] + + sim.make_initial_population(n=popsize) + ## Schedule 10 events that should run; 10 events that have a treatment id that is not available + ## after service availability switch. + nevents_with_available_ids = 10 + nevents_with_withdrawn_ids = 10 + for i in range(0, nevents_with_available_ids): + hsi = DummyHSIEvent( + module=sim.modules["DummyModuleGenericClinic"], + person_id=i, + appt_type="ConWithDCSA", + level="0", + treatment_id="ThisEventShouldRun", + ) + sim.modules["HealthSystem"].schedule_hsi_event( + hsi, topen=sim.date, tclose=sim.date + pd.DateOffset(days=1), priority=1 + ) + + for i in range(nevents_with_available_ids, nevents_with_available_ids + nevents_with_withdrawn_ids): + hsi = DummyHSIEvent( + module=sim.modules["DummyModuleGenericClinic"], + person_id=i, + appt_type="ConWithDCSA", + level="0", + treatment_id="ThisEventShouldNotRunPostSwitch", + ) + ## These events open after service availability switch + topen = pd.Timestamp(year_service_availability_switch, 1, 1) + sim.modules["HealthSystem"].schedule_hsi_event( + hsi, topen=topen, tclose=topen + pd.DateOffset(days=1), priority=1 + ) + + sim.simulate(end_date=end_date) + output = parse_log_file(sim.log_filepath, level=logging.DEBUG) + hsi_events = output["tlo.methods.healthsystem"]["HSI_Event"] + ## Expect 10 rows in hsi_events['HSI_Event'] with did_run True and TREATMENT_ID ThisEventShouldRun + nevents_ran = hsi_events.groupby("TREATMENT_ID")["did_run"].value_counts() + assert nevents_ran.loc[("ThisEventShouldRun", True)] == nevents_with_available_ids + ## Expect 10 rows in hsi_events['Never_ran_HSI_Event'] with TREATMENT_ID ThisEventShouldNotRunPostSwitch + never_ran_events = output["tlo.methods.healthsystem"]["Never_ran_HSI_Event"] + nevents_did_not_run = never_ran_events[never_ran_events["TREATMENT_ID"] == "ThisEventShouldNotRunPostSwitch"].shape[ + 0 + ] + assert nevents_did_not_run == nevents_with_withdrawn_ids From d057065d3f3d6dcffe35e867618822385e01ecec Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Thu, 5 Feb 2026 14:59:01 +0000 Subject: [PATCH 02/55] Scenario files from old branch --- .../scenario_effect_of_treatment_ids.py | 98 +++++++++++++++++++ .../scenarios_definitions.py | 45 +++++++++ 2 files changed, 143 insertions(+) create mode 100644 src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py create mode 100644 src/scripts/lcoa_inputs_from_tlo_analyses/scenarios_definitions.py diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py new file mode 100644 index 0000000000..66abc56a7c --- /dev/null +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py @@ -0,0 +1,98 @@ +""" +This files runs the full model under a set of scenario in which only a single TREATMENT_ID is included. + +To check scenarios are generated correctly: +``` +tlo scenario-run --draw-only src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py +``` + +Run on the batch system using: + +``` +tlo batch-submit src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py +``` + +or locally using: +``` +tlo scenario-run src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py +``` + +""" +from pathlib import Path +from typing import Dict, List + +from scripts.lcoa_inputs_from_tlo_analyses.scenarios_definitions import ( + ScenarioDefinitions, +) + +from tlo import Date, logging +from tlo.analysis.utils import ( + get_filtered_treatment_ids, + mix_scenarios, +) +from tlo.methods.fullmodel import fullmodel +from tlo.scenario import BaseScenario + + +class EffectOfEachTreatment(BaseScenario): + def __init__(self): + super().__init__() + self.seed = 0 + self.start_date = Date(2010, 1, 1) + self.end_date = Date(2040, 1, 1) + self.pop_size = 100 + self._scenarios = self._get_scenarios() + self.number_of_draws = len(self._scenarios) + self.runs_per_draw = 5 + + def log_configuration(self): + return { + 'filename': 'effect_of_each_treatment_id', + 'directory': Path('./outputs'), + 'custom_levels': { + '*': logging.WARNING, + 'tlo.methods.demography': logging.INFO, + 'tlo.methods.demography.detail': logging.WARNING, + 'tlo.methods.healthburden': logging.INFO, + 'tlo.methods.healthsystem.summary': logging.INFO, + } + } + + def modules(self): + return ( + fullmodel(resourcefilepath=self.resources) + + [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)] + ) + + def draw_parameters(self, draw_number, rng): + scenario_definitions = ScenarioDefinitions() + return mix_scenarios( + scenario_definitions.baseline(), + { + 'HealthSystem': { + 'service_availability_postSwitch': list(self._scenarios.values())[draw_number], + }, + } + ) + + def _get_scenarios(self) -> Dict[str, List[str]]: + """Return the Dict with values for the parameter `Service_Availability` keyed by a name for the scenario. + The sequences of scenarios systematically omits all but one TREATMENT_ID that is defined in the model.""" + + # Generate list of TREATMENT_IDs and filter to the resolution needed + treatments = get_filtered_treatment_ids(depth=None) + # Return 'Service_Availability' values, with scenarios for nothing, and ones for which all but one + # treatment is omitted + service_availability = dict({"Nothing": []}) + # For each treatment group, create scenarios keeping only one treatment from that group + service_availability.update( + {f"Only {treatment}": [treatment] for treatment in treatments} + ) + + return service_availability + + +if __name__ == '__main__': + from tlo.cli import scenario_run + + scenario_run([__file__]) diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/scenarios_definitions.py b/src/scripts/lcoa_inputs_from_tlo_analyses/scenarios_definitions.py new file mode 100644 index 0000000000..29003f8960 --- /dev/null +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/scenarios_definitions.py @@ -0,0 +1,45 @@ +"""The file contains all the definitions of scenarios for the TLO-LCOA project.""" +from typing import Dict + +from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios + + +class ScenarioDefinitions: + + @property + def YEAR_OF_SERVICE_AVAILABILITY_SWITCH(self) -> int: + return 2026 + + + def baseline(self) -> Dict: + """Return the Dict with values for the parameter changes that define the baseline scenario. """ + return mix_scenarios( + get_parameters_for_status_quo(), # <-- Parameters that have been the calibration targets + + { + "HealthSystem": { + "cons_availability": 'default', + 'year_cons_availability_switch': self.YEAR_OF_SERVICE_AVAILABILITY_SWITCH, + 'cons_availability_postSwitch': 'all', + + "mode_appt_constraints": 1, + "year_service_availability_switch": self.YEAR_OF_SERVICE_AVAILABILITY_SWITCH, + + # allow historical HRH scaling to occur 2018-2024 + # 'year_HR_scaling_by_level_and_officer_type': self.YEAR_OF_SERVICE_AVAILABILITY_SWITCH, + 'yearly_HR_scaling_mode': 'historical_scaling', + }, + + "ImprovedHealthSystemAndCareSeekingScenarioSwitcher": { + 'max_healthsystem_function': [False, True], # <-- switch from False to True mid-way + 'year_of_switch': self.YEAR_OF_SERVICE_AVAILABILITY_SWITCH, + }, + + "ImprovedHealthSystemAndCareSeekingScenarioSwitcher": { + 'max_healthcare_seeking': [False, True], # <-- switch from False to True mid-way + 'year_of_switch': self.YEAR_OF_SERVICE_AVAILABILITY_SWITCH, + } + + + }, + ) From b53448e4dc623cd451a58180bc19843078676018 Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Thu, 5 Feb 2026 15:06:39 +0000 Subject: [PATCH 03/55] Resource file --- .../ResourceFile_HealthSystem_parameters.csv | 33 +++++++++++++++++-- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/resources/healthsystem/ResourceFile_HealthSystem_parameters.csv b/resources/healthsystem/ResourceFile_HealthSystem_parameters.csv index 172381eac6..44a0f60bc3 100644 --- a/resources/healthsystem/ResourceFile_HealthSystem_parameters.csv +++ b/resources/healthsystem/ResourceFile_HealthSystem_parameters.csv @@ -1,3 +1,30 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b96e35b518fb33fe0c89b9e47d6b623ec9eb30d454b404554312787a5937404e -size 967 +parameter_name,value +policy_name,Naive +year_mode_switch,2100 +scale_to_effective_capabilities,FALSE +Service_Availability,"[""*""]" +year_service_availability_switch,2100 +service_availability_postSwitch,"[""*""]" +use_funded_or_actual_staffing,funded_plus +mode_appt_constraints,1 +mode_appt_constraints_postSwitch,1 +data_source_for_cons_availability_estimates,original +cons_availability,default +beds_availability,default +equip_availability,default +equip_availability_postSwitch,default +year_equip_availability_switch,2100 +tclose_overwrite,0 +tclose_days_offset_overwrite,7 +HR_scaling_by_level_and_officer_type_mode,default +year_HR_scaling_by_level_and_officer_type,2100 +HR_scaling_by_district_mode,default +year_HR_scaling_by_district,2100 +yearly_HR_scaling_mode,no_scaling +year_cons_availability_switch,2100 +cons_availability_postSwitch,default +use_funded_or_actual_staffing_postSwitch,funded_plus +year_use_funded_or_actual_staffing_switch,2100 +cons_override_treatment_ids,[] +cons_override_treatment_ids_prob_avail,1.0 +clinic_configuration_name,Default From e5e22989422920466687589993a759c59f7df266 Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Thu, 5 Feb 2026 15:19:55 +0000 Subject: [PATCH 04/55] Resource file --- .../ResourceFile_HealthSystem_parameters.csv | 33 ++----------------- 1 file changed, 3 insertions(+), 30 deletions(-) diff --git a/resources/healthsystem/ResourceFile_HealthSystem_parameters.csv b/resources/healthsystem/ResourceFile_HealthSystem_parameters.csv index 44a0f60bc3..0e629536e4 100644 --- a/resources/healthsystem/ResourceFile_HealthSystem_parameters.csv +++ b/resources/healthsystem/ResourceFile_HealthSystem_parameters.csv @@ -1,30 +1,3 @@ -parameter_name,value -policy_name,Naive -year_mode_switch,2100 -scale_to_effective_capabilities,FALSE -Service_Availability,"[""*""]" -year_service_availability_switch,2100 -service_availability_postSwitch,"[""*""]" -use_funded_or_actual_staffing,funded_plus -mode_appt_constraints,1 -mode_appt_constraints_postSwitch,1 -data_source_for_cons_availability_estimates,original -cons_availability,default -beds_availability,default -equip_availability,default -equip_availability_postSwitch,default -year_equip_availability_switch,2100 -tclose_overwrite,0 -tclose_days_offset_overwrite,7 -HR_scaling_by_level_and_officer_type_mode,default -year_HR_scaling_by_level_and_officer_type,2100 -HR_scaling_by_district_mode,default -year_HR_scaling_by_district,2100 -yearly_HR_scaling_mode,no_scaling -year_cons_availability_switch,2100 -cons_availability_postSwitch,default -use_funded_or_actual_staffing_postSwitch,funded_plus -year_use_funded_or_actual_staffing_switch,2100 -cons_override_treatment_ids,[] -cons_override_treatment_ids_prob_avail,1.0 -clinic_configuration_name,Default +version https://git-lfs.github.com/spec/v1 +oid sha256:eaa265d5d9a2b7037609e35d4984387cd14f3675113b1ce46dc3ede47b9e5203 +size 1047 From 5ff3732f9617be1fdaefb1fd7bad70af28c18d8b Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Mon, 9 Feb 2026 14:05:48 +0000 Subject: [PATCH 05/55] Single scenario file and small edits to healthsystem --- .../scenario_effect_of_treatment_ids.py | 45 +++++++++++++++++-- src/tlo/methods/healthsystem.py | 4 +- tests/test_healthsystem.py | 8 ++-- 3 files changed, 48 insertions(+), 9 deletions(-) diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py index 66abc56a7c..91b9af2a2f 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py @@ -1,5 +1,7 @@ """ -This files runs the full model under a set of scenario in which only a single TREATMENT_ID is included. +This file contains all the definitions of scenarios for the TLO-LCOA project. + +It runs the full model under a set of scenario in which only a single TREATMENT_ID is included. To check scenarios are generated correctly: ``` @@ -18,6 +20,7 @@ ``` """ + from pathlib import Path from typing import Dict, List @@ -29,17 +32,53 @@ from tlo.analysis.utils import ( get_filtered_treatment_ids, mix_scenarios, + get_parameters_for_status_quo ) from tlo.methods.fullmodel import fullmodel from tlo.scenario import BaseScenario +class ScenarioDefinitions: + + @property + def YEAR_OF_SERVICE_AVAILABILITY_SWITCH(self) -> int: + return 2026 + + def baseline(self) -> Dict: + """Return the Dict with values for the parameter changes that define the baseline scenario. """ + return mix_scenarios( + get_parameters_for_status_quo(), # <-- Parameters that have been the calibration targets + + { + "HealthSystem": { + "cons_availability": 'default', + 'year_cons_availability_switch': self.YEAR_OF_SERVICE_AVAILABILITY_SWITCH, + 'cons_availability_postSwitch': 'all', + + "mode_appt_constraints": 1, + "year_service_availability_switch": self.YEAR_OF_SERVICE_AVAILABILITY_SWITCH, + + # allow historical HRH scaling to occur 2018-2024 + # 'year_HR_scaling_by_level_and_officer_type': self.YEAR_OF_SERVICE_AVAILABILITY_SWITCH, + 'yearly_HR_scaling_mode': 'historical_scaling', + }, + + "ImprovedHealthSystemAndCareSeekingScenarioSwitcher": { + 'max_healthsystem_function': [False, True], # <-- switch from False to True mid-way + 'max_healthcare_seeking': [False, True], # <-- switch from False to True mid-way + 'year_of_switch': self.YEAR_OF_SERVICE_AVAILABILITY_SWITCH, + } + + + }, + ) + class EffectOfEachTreatment(BaseScenario): def __init__(self): super().__init__() self.seed = 0 self.start_date = Date(2010, 1, 1) - self.end_date = Date(2040, 1, 1) + self.end_date = Date(2041, 1, 1) self.pop_size = 100 self._scenarios = self._get_scenarios() self.number_of_draws = len(self._scenarios) @@ -60,7 +99,7 @@ def log_configuration(self): def modules(self): return ( - fullmodel(resourcefilepath=self.resources) + fullmodel() + [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)] ) diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py index 0eec9016ae..88a9529aab 100644 --- a/src/tlo/methods/healthsystem.py +++ b/src/tlo/methods/healthsystem.py @@ -1592,8 +1592,8 @@ def _add_hsi_event_queue_item_to_hsi_event_queue( # First check that the service the HSI needs is available. If not, don't add to queue. # Don't increment the counter; log and return. if not self.is_treatment_id_allowed(hsi_event.TREATMENT_ID, self.service_availability): - self.call_and_record_never_ran_hsi_event(hsi_event=hsi_event, priority=priority, clinic=clinic) - return + self.schedule_to_call_never_ran_on_date(hsi_event=hsi_event, tdate=topen) + self.hsi_event_queue_counter += 1 diff --git a/tests/test_healthsystem.py b/tests/test_healthsystem.py index eb81881d80..c959162f0f 100644 --- a/tests/test_healthsystem.py +++ b/tests/test_healthsystem.py @@ -3090,8 +3090,8 @@ def apply(self, person_id, squeeze_factor): sim.make_initial_population(n=popsize) ## Schedule 10 events that should run; 10 events that have a treatment id that is not available ## after service availability switch. - nevents_with_available_ids = 10 - nevents_with_withdrawn_ids = 10 + nevents_with_available_ids = 60 + nevents_with_withdrawn_ids = 40 for i in range(0, nevents_with_available_ids): hsi = DummyHSIEvent( module=sim.modules["DummyModuleGenericClinic"], @@ -3121,10 +3121,10 @@ def apply(self, person_id, squeeze_factor): sim.simulate(end_date=end_date) output = parse_log_file(sim.log_filepath, level=logging.DEBUG) hsi_events = output["tlo.methods.healthsystem"]["HSI_Event"] - ## Expect 10 rows in hsi_events['HSI_Event'] with did_run True and TREATMENT_ID ThisEventShouldRun + ## Expect nevents_with_available_ids rows in hsi_events['HSI_Event'] with did_run True and TREATMENT_ID ThisEventShouldRun nevents_ran = hsi_events.groupby("TREATMENT_ID")["did_run"].value_counts() assert nevents_ran.loc[("ThisEventShouldRun", True)] == nevents_with_available_ids - ## Expect 10 rows in hsi_events['Never_ran_HSI_Event'] with TREATMENT_ID ThisEventShouldNotRunPostSwitch + ## Expect nevents_with_withdrawn_ids rows in hsi_events['Never_ran_HSI_Event'] with TREATMENT_ID ThisEventShouldNotRunPostSwitch never_ran_events = output["tlo.methods.healthsystem"]["Never_ran_HSI_Event"] nevents_did_not_run = never_ran_events[never_ran_events["TREATMENT_ID"] == "ThisEventShouldNotRunPostSwitch"].shape[ 0 From 42c67aa0d9619614fd7eed2131e45dace99f80b4 Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Mon, 9 Feb 2026 14:25:14 +0000 Subject: [PATCH 06/55] Additional imports --- .../scenario_effect_of_treatment_ids.py | 8 +--- .../scenarios_definitions.py | 45 ------------------- 2 files changed, 2 insertions(+), 51 deletions(-) delete mode 100644 src/scripts/lcoa_inputs_from_tlo_analyses/scenarios_definitions.py diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py index 91b9af2a2f..6653189cdb 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py @@ -23,11 +23,6 @@ from pathlib import Path from typing import Dict, List - -from scripts.lcoa_inputs_from_tlo_analyses.scenarios_definitions import ( - ScenarioDefinitions, -) - from tlo import Date, logging from tlo.analysis.utils import ( get_filtered_treatment_ids, @@ -35,6 +30,7 @@ get_parameters_for_status_quo ) from tlo.methods.fullmodel import fullmodel +from tlo.methods.scenario_switcher import ImprovedHealthSystemAndCareSeekingScenarioSwitcher from tlo.scenario import BaseScenario class ScenarioDefinitions: @@ -100,7 +96,7 @@ def log_configuration(self): def modules(self): return ( fullmodel() - + [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)] + + [ImprovedHealthSystemAndCareSeekingScenarioSwitcher()] ) def draw_parameters(self, draw_number, rng): diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/scenarios_definitions.py b/src/scripts/lcoa_inputs_from_tlo_analyses/scenarios_definitions.py deleted file mode 100644 index 29003f8960..0000000000 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/scenarios_definitions.py +++ /dev/null @@ -1,45 +0,0 @@ -"""The file contains all the definitions of scenarios for the TLO-LCOA project.""" -from typing import Dict - -from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios - - -class ScenarioDefinitions: - - @property - def YEAR_OF_SERVICE_AVAILABILITY_SWITCH(self) -> int: - return 2026 - - - def baseline(self) -> Dict: - """Return the Dict with values for the parameter changes that define the baseline scenario. """ - return mix_scenarios( - get_parameters_for_status_quo(), # <-- Parameters that have been the calibration targets - - { - "HealthSystem": { - "cons_availability": 'default', - 'year_cons_availability_switch': self.YEAR_OF_SERVICE_AVAILABILITY_SWITCH, - 'cons_availability_postSwitch': 'all', - - "mode_appt_constraints": 1, - "year_service_availability_switch": self.YEAR_OF_SERVICE_AVAILABILITY_SWITCH, - - # allow historical HRH scaling to occur 2018-2024 - # 'year_HR_scaling_by_level_and_officer_type': self.YEAR_OF_SERVICE_AVAILABILITY_SWITCH, - 'yearly_HR_scaling_mode': 'historical_scaling', - }, - - "ImprovedHealthSystemAndCareSeekingScenarioSwitcher": { - 'max_healthsystem_function': [False, True], # <-- switch from False to True mid-way - 'year_of_switch': self.YEAR_OF_SERVICE_AVAILABILITY_SWITCH, - }, - - "ImprovedHealthSystemAndCareSeekingScenarioSwitcher": { - 'max_healthcare_seeking': [False, True], # <-- switch from False to True mid-way - 'year_of_switch': self.YEAR_OF_SERVICE_AVAILABILITY_SWITCH, - } - - - }, - ) From 730542c174b0b2cb15e6e13055473762bde95abb Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Tue, 10 Feb 2026 16:05:43 +0000 Subject: [PATCH 07/55] Prep for submission --- .../scenario_effect_of_treatment_ids.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py index 6653189cdb..4599207416 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py @@ -75,7 +75,7 @@ def __init__(self): self.seed = 0 self.start_date = Date(2010, 1, 1) self.end_date = Date(2041, 1, 1) - self.pop_size = 100 + self.pop_size = 250_000 self._scenarios = self._get_scenarios() self.number_of_draws = len(self._scenarios) self.runs_per_draw = 5 @@ -120,9 +120,10 @@ def _get_scenarios(self) -> Dict[str, List[str]]: # treatment is omitted service_availability = dict({"Nothing": []}) # For each treatment group, create scenarios keeping only one treatment from that group - service_availability.update( - {f"Only {treatment}": [treatment] for treatment in treatments} - ) + # Commenting to allow draw 0 to be run and suspended. + #service_availability.update( + # {f"Only {treatment}": [treatment] for treatment in treatments} + #) return service_availability From 10b70a3eff51b120e22ae044e9aa8bb1ecb6c91c Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Tue, 10 Feb 2026 22:45:06 +0000 Subject: [PATCH 08/55] Modify scenario setup --- .../scenario_effect_of_treatment_ids.py | 85 ++++++++----------- 1 file changed, 37 insertions(+), 48 deletions(-) diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py index 4599207416..34f8979110 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py @@ -24,47 +24,37 @@ from pathlib import Path from typing import Dict, List from tlo import Date, logging -from tlo.analysis.utils import ( - get_filtered_treatment_ids, - mix_scenarios, - get_parameters_for_status_quo -) +from tlo.analysis.utils import get_filtered_treatment_ids, mix_scenarios, get_parameters_for_status_quo from tlo.methods.fullmodel import fullmodel from tlo.methods.scenario_switcher import ImprovedHealthSystemAndCareSeekingScenarioSwitcher from tlo.scenario import BaseScenario -class ScenarioDefinitions: +class ScenarioDefinitions: @property def YEAR_OF_SERVICE_AVAILABILITY_SWITCH(self) -> int: return 2026 def baseline(self) -> Dict: - """Return the Dict with values for the parameter changes that define the baseline scenario. """ + """Return the Dict with values for the parameter changes that define the baseline scenario.""" return mix_scenarios( get_parameters_for_status_quo(), # <-- Parameters that have been the calibration targets - { "HealthSystem": { - "cons_availability": 'default', - 'year_cons_availability_switch': self.YEAR_OF_SERVICE_AVAILABILITY_SWITCH, - 'cons_availability_postSwitch': 'all', - + "cons_availability": "default", + "year_cons_availability_switch": self.YEAR_OF_SERVICE_AVAILABILITY_SWITCH, + "cons_availability_postSwitch": "all", "mode_appt_constraints": 1, "year_service_availability_switch": self.YEAR_OF_SERVICE_AVAILABILITY_SWITCH, - # allow historical HRH scaling to occur 2018-2024 # 'year_HR_scaling_by_level_and_officer_type': self.YEAR_OF_SERVICE_AVAILABILITY_SWITCH, - 'yearly_HR_scaling_mode': 'historical_scaling', + "yearly_HR_scaling_mode": "historical_scaling", }, - "ImprovedHealthSystemAndCareSeekingScenarioSwitcher": { - 'max_healthsystem_function': [False, True], # <-- switch from False to True mid-way - 'max_healthcare_seeking': [False, True], # <-- switch from False to True mid-way - 'year_of_switch': self.YEAR_OF_SERVICE_AVAILABILITY_SWITCH, - } - - + "max_healthsystem_function": [False, True], # <-- switch from False to True mid-way + "max_healthcare_seeking": [False, True], # <-- switch from False to True mid-way + "year_of_switch": self.YEAR_OF_SERVICE_AVAILABILITY_SWITCH, + }, }, ) @@ -82,35 +72,25 @@ def __init__(self): def log_configuration(self): return { - 'filename': 'effect_of_each_treatment_id', - 'directory': Path('./outputs'), - 'custom_levels': { - '*': logging.WARNING, - 'tlo.methods.demography': logging.INFO, - 'tlo.methods.demography.detail': logging.WARNING, - 'tlo.methods.healthburden': logging.INFO, - 'tlo.methods.healthsystem.summary': logging.INFO, - } + "filename": "effect_of_each_treatment_id", + "directory": Path("./outputs"), + "custom_levels": { + "*": logging.WARNING, + "tlo.methods.demography": logging.INFO, + "tlo.methods.demography.detail": logging.WARNING, + "tlo.methods.healthburden": logging.INFO, + "tlo.methods.healthsystem.summary": logging.INFO, + }, } def modules(self): - return ( - fullmodel() - + [ImprovedHealthSystemAndCareSeekingScenarioSwitcher()] - ) + return fullmodel() + [ImprovedHealthSystemAndCareSeekingScenarioSwitcher()] def draw_parameters(self, draw_number, rng): - scenario_definitions = ScenarioDefinitions() - return mix_scenarios( - scenario_definitions.baseline(), - { - 'HealthSystem': { - 'service_availability_postSwitch': list(self._scenarios.values())[draw_number], - }, - } - ) + if draw_number < len(self._scenarios): + return list(self._scenarios.values())[draw_number] - def _get_scenarios(self) -> Dict[str, List[str]]: + def _get_scenarios(self) -> Dict[str, Dict]: """Return the Dict with values for the parameter `Service_Availability` keyed by a name for the scenario. The sequences of scenarios systematically omits all but one TREATMENT_ID that is defined in the model.""" @@ -121,14 +101,23 @@ def _get_scenarios(self) -> Dict[str, List[str]]: service_availability = dict({"Nothing": []}) # For each treatment group, create scenarios keeping only one treatment from that group # Commenting to allow draw 0 to be run and suspended. - #service_availability.update( + # service_availability.update( # {f"Only {treatment}": [treatment] for treatment in treatments} - #) + # ) + + scenario_definitions = ScenarioDefinitions() + + scenarios = { + key: mix_scenarios( + scenario_definitions.baseline(), {"HealthSystem": {"service_availability_postSwitch": value}} + ) + for key, value in service_availability.items() + } - return service_availability + return scenarios -if __name__ == '__main__': +if __name__ == "__main__": from tlo.cli import scenario_run scenario_run([__file__]) From 4474b1a9553c359b0c012540bb2988790d805ade Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Mon, 16 Feb 2026 12:35:57 +0000 Subject: [PATCH 09/55] Uncomment actual scenarios for azure run --- .../scenario_effect_of_treatment_ids.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py index 34f8979110..399832a3b8 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py @@ -101,9 +101,9 @@ def _get_scenarios(self) -> Dict[str, Dict]: service_availability = dict({"Nothing": []}) # For each treatment group, create scenarios keeping only one treatment from that group # Commenting to allow draw 0 to be run and suspended. - # service_availability.update( - # {f"Only {treatment}": [treatment] for treatment in treatments} - # ) + service_availability.update( + {f"Only {treatment}": [treatment] for treatment in treatments} + ) scenario_definitions = ScenarioDefinitions() From aa092a462ea2106ba7038d8d2d25ef3151426576 Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Mon, 16 Feb 2026 15:23:49 +0000 Subject: [PATCH 10/55] Initial figures --- .../analysis_effect_of_treatment_ids.py | 433 ++++++++++++++++++ 1 file changed, 433 insertions(+) create mode 100644 src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py new file mode 100644 index 0000000000..d323d49a60 --- /dev/null +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py @@ -0,0 +1,433 @@ +"""Produce plots to show the impact each set of treatments.""" + +import argparse +import glob +import os +import zipfile +from pathlib import Path +from typing import Tuple + +import numpy as np +import pandas as pd +from matplotlib import pyplot as plt + +from scripts.calibration_analyses.analysis_scripts import plot_legends +from scripts.lcoa_inputs_from_tlo_analyses.scenario_effect_of_treatment_ids import ( + EffectOfEachTreatment, +) +from tlo import Date +from tlo.analysis.utils import ( + CAUSE_OF_DEATH_OR_DALY_LABEL_TO_COLOR_MAP, + extract_results, + get_coarse_appt_type, + get_color_cause_of_death_or_daly_label, + get_color_coarse_appt, + get_color_short_treatment_id, + make_age_grp_lookup, + make_age_grp_types, + order_of_cause_of_death_or_daly_label, + order_of_coarse_appt, + squarify_neat, + summarize, + to_age_group, +) + + +def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = None): + """Produce standard set of plots describing the effect of each TREATMENT_ID. + - We estimate the epidemiological impact as the EXTRA deaths that would occur if that treatment did not occur. + - We estimate the draw on healthcare system resources as the FEWER appointments when that treatment does not occur. + """ + + TARGET_PERIOD = (Date(2010, 1, 1), Date(2025, 12, 31)) + + # Definitions of general helper functions + make_graph_file_name = lambda stub: output_folder / f"{stub.replace('*', '_star_')}.png" # noqa: E731 + + _, age_grp_lookup = make_age_grp_lookup() + + def target_period() -> str: + """Returns the target period as a string of the form YYYY-YYYY""" + return "-".join(str(t.year) for t in TARGET_PERIOD) + + def get_parameter_names_from_scenario_file() -> Tuple[str]: + """Get the tuple of names of the scenarios from `Scenario` class used to create the results.""" + e = EffectOfEachTreatment() + return tuple(e._scenarios.keys()) + + def format_scenario_name(_sn: str) -> str: + """Return a reformatted scenario name ready for plotting. + - Remove prefix of "No " + - Remove suffix of "*" + """ + + if _sn == "Nothing": + return "Nothing" + # In the scenario called "Nothing", all interventions are off. (So, the difference relative to "Nothing" + # reflects the effects of all the interventions.) + + else: + return _sn.lstrip("Only ") + + def set_param_names_as_column_index_level_0(_df): + """Set the columns index (level 0) as the param_names.""" + ordered_param_names_no_prefix = {i: x for i, x in enumerate(param_names)} + names_of_cols_level0 = [ordered_param_names_no_prefix.get(col) for col in _df.columns.levels[0]] + assert len(names_of_cols_level0) == len(_df.columns.levels[0]) + + reformatted_names = map(format_scenario_name, names_of_cols_level0) + _df.columns = _df.columns.set_levels(reformatted_names, level=0) + return _df + + def find_difference_extra_relative_to_comparison( + _ser: pd.Series, comparison: str, scaled: bool = False, drop_comparison: bool = True + ): + """Find the difference in the values in a pd.Series with a multi-index, between the draws (level 0) + within the runs (level 1). Drop the comparison entries. The comparison is made: DIFF(X) = X - COMPARISON.""" + return ( + _ser.unstack() + .apply(lambda x: (x - x[comparison]) / (x[comparison] if scaled else 1.0), axis=0) + .drop(index=([comparison] if drop_comparison else [])) + .stack() + ) + + def find_mean_difference_in_appts_relative_to_comparison( + _df: pd.DataFrame, comparison: str, drop_comparison: bool = True + ): + """Find the mean difference in the number of appointments between each draw and the comparison draw (within each + run). We are looking for the number FEWER appointments that occur when treatment does not happen, so we flip the + sign (as `find_extra_difference_relative_to_comparison` gives the number extra relative the comparison).""" + return -summarize( + pd.concat( + { + _idx: find_difference_extra_relative_to_comparison( + row, comparison=comparison, drop_comparison=drop_comparison + ) + for _idx, row in _df.iterrows() + }, + axis=1, + ).T, + only_mean=True, + ) + + def find_mean_difference_extra_relative_to_comparison_dataframe( + _df: pd.DataFrame, + comparison: str, + drop_comparison: bool = True, + ): + """Same as `find_difference_extra_relative_to_comparison` but for pd.DataFrame, which is the same as + `find_mean_difference_in_appts_relative_to_comparison`. + """ + # todo factorize these three functions more -- it's the same operation for a pd.Series or a pd.DataFrame + return summarize( + pd.concat( + { + _idx: find_difference_extra_relative_to_comparison( + row, comparison=comparison, drop_comparison=drop_comparison + ) + for _idx, row in _df.iterrows() + }, + axis=1, + ).T, + only_mean=True, + ) + + # %% Define parameter names + param_names = get_parameter_names_from_scenario_file() + + # %% Quantify the health gains associated with all interventions combined. + + def get_num_deaths_by_cause_label(_df): + """Return total number of Deaths by label (total by age-group within the TARGET_PERIOD)""" + return _df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD)].groupby(_df["label"]).size() + + def get_num_dalys_by_cause_label(_df): + """Return total number of DALYS (Stacked) by label (total by age-group within the TARGET_PERIOD)""" + return ( + _df.loc[_df.year.between(*[i.year for i in TARGET_PERIOD])] + .drop(columns=["date", "sex", "age_range", "year"]) + .sum() + ) + + num_deaths_by_cause_label = summarize( + extract_results( + results_folder, + module="tlo.methods.demography", + key="death", + custom_generate_series=get_num_deaths_by_cause_label, + do_scaling=True, + ).pipe(set_param_names_as_column_index_level_0)[["Nothing"]] + ) + + num_dalys_by_cause_label = summarize( + extract_results( + results_folder, + module="tlo.methods.healthburden", + key="dalys_stacked_by_age_and_time", + custom_generate_series=get_num_dalys_by_cause_label, + do_scaling=True, + ).pipe(set_param_names_as_column_index_level_0)[["Nothing"]] + ) + + # Plots..... + def do_bar_plot_with_ci(_df, _ax): + """Make a vertical bar plot for each Cause-of-Death Label for the _df onto axis _ax""" + _df_sorted = _df.reindex(index=CAUSE_OF_DEATH_OR_DALY_LABEL_TO_COLOR_MAP.keys(), fill_value=0.0).sort_index( + axis=0, key=order_of_cause_of_death_or_daly_label + ) # include all labels and sort + + for i, cause_label in enumerate(_df_sorted.index): + # plot bar for one cause + color = get_color_cause_of_death_or_daly_label(cause_label) + one_cause = _df_sorted.loc[cause_label] + + mean_deaths = one_cause.loc[(slice(None), "mean")] + lower_bar = mean_deaths["Nothing"] + full_height_of_bar = mean_deaths["Nothing"] + upper_bar = full_height_of_bar - lower_bar + lower_bar_yerr = np.array( + [ + one_cause.loc[("Nothing", "mean")] - one_cause.loc[("Nothing", "lower")], + one_cause.loc[("Nothing", "upper")] - one_cause.loc[("Nothing", "mean")], + ] + ).reshape(2, 1) + full_height_bar_yerr = np.array( + [ + one_cause.loc[("Nothing", "mean")] - one_cause.loc[("Nothing", "lower")], + one_cause.loc[("Nothing", "upper")] - one_cause.loc[("Nothing", "mean")], + ] + ).reshape(2, 1) + + (lb,) = ax.bar(i, lower_bar, yerr=lower_bar_yerr, bottom=0, label="No TREATMENT_IDs", color=color) + (ub,) = _ax.bar( + i, + upper_bar, + yerr=full_height_bar_yerr, + bottom=lower_bar, + label="No TREATMENT_IDs", + color=color, + alpha=0.5, + ) + _ax.set_xticks(range(len(_df_sorted.index))) + _ax.set_xticklabels(_df_sorted.index, rotation=90) + _ax.legend([lb, ub], ["All Services Available", "No Services Available"], loc="upper right") + + fig, ax = plt.subplots() + name_of_plot = f"Deaths With No Services, {target_period()}" + do_bar_plot_with_ci(num_deaths_by_cause_label / 1e3, ax) + ax.set_title(name_of_plot) + ax.set_xlabel("Cause of Death") + ax.set_ylabel("Number of Deaths (/1000)") + ax.set_ylim(0, 500) + ax.grid(axis="y") + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) + fig.tight_layout() + fig.savefig(make_graph_file_name(name_of_plot.replace(" ", "_"))) + plt.close(fig) + + fig, ax = plt.subplots() + name_of_plot = f"DALYS With No Services, {target_period()}" + do_bar_plot_with_ci(num_dalys_by_cause_label / 1e6, ax) + ax.set_title(name_of_plot) + ax.set_xlabel("Cause of Disability/Death") + ax.set_ylabel("Number of DALYS (/millions)") + ax.set_ylim(0, 30) + ax.set_yticks(np.arange(0, 35, 5)) + ax.grid(axis="y") + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) + fig.tight_layout() + fig.savefig(make_graph_file_name(name_of_plot.replace(" ", "_"))) + plt.close(fig) + + # %% Quantify the health gains associated with each TREATMENT_ID (short) individually (i.e., the + # difference in deaths and DALYS between each scenario and the 'Nothing' scenario.) + + def get_num_deaths_by_age_group(_df): + """Return total number of deaths (total by age-group within the TARGET_PERIOD)""" + return ( + _df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD)] + .groupby(_df["age"].map(age_grp_lookup).astype(make_age_grp_types())) + .size() + ) + + def do_barh_plot_with_ci(_df, _ax): + """Make a horizontal bar plot for each TREATMENT_ID for the _df onto axis _ax""" + errors = pd.concat([_df["mean"] - _df["lower"], _df["upper"] - _df["mean"]], axis=1).T.to_numpy() + _df.plot.barh( + ax=_ax, y="mean", xerr=errors, legend=False, color=[get_color_short_treatment_id(_id) for _id in _df.index] + ) + + def do_label_barh_plot(_df, _ax): + """Add text annotation from values in _df onto _ax""" + y_cords = {ylabel.get_text(): ytick for ytick, ylabel in zip(_ax.get_yticks(), _ax.get_yticklabels())} + + pos_on_rhs = _ax.get_xticks()[-1] + + for label, row in _df.iterrows(): + if row["mean"] > 0: + annotation = f"{round(row['mean'], 1)} ({round(row['lower'])}-{round(row['upper'])}) %" + _ax.annotate( + annotation, + xy=(pos_on_rhs, y_cords.get(label)), + xycoords="data", + horizontalalignment="left", + verticalalignment="center", + size=7, + ) + + num_deaths = ( + extract_results( + results_folder, + module="tlo.methods.demography", + key="death", + custom_generate_series=get_num_deaths_by_cause_label, + do_scaling=True, + ) + .pipe(set_param_names_as_column_index_level_0) + .sum() + ) # (Summing across age-groups) + + num_dalys = ( + extract_results( + results_folder, + module="tlo.methods.healthburden", + key="dalys_stacked_by_age_and_time", + custom_generate_series=get_num_dalys_by_cause_label, + do_scaling=True, + ) + .pipe(set_param_names_as_column_index_level_0) + .sum() + ) # (Summing across causes) + + # PLOTS FOR EACH TREATMENT_ID (Short) + + + + # %% Quantify the health associated with each TREATMENT_ID (short) SPLIT BY AGE and WEALTH + + # -- DEATHS + def get_total_num_death_by_agegrp_and_label(_df): + """Return the total number of deaths in the TARGET_PERIOD by age-group and cause label.""" + _df_limited_to_dates = _df.loc[_df["date"].between(*TARGET_PERIOD)] + age_group = to_age_group(_df_limited_to_dates["age"]) + return _df_limited_to_dates.groupby([age_group, "label"])["person_id"].size() + + total_num_death_by_agegrp_and_label = extract_results( + results_folder, + module="tlo.methods.demography", + key="death", + custom_generate_series=get_total_num_death_by_agegrp_and_label, + do_scaling=True, + ).pipe(set_param_names_as_column_index_level_0) + + # -- DALYS + def get_total_num_dalys_by_agegrp_and_label(_df): + """Return the total number of DALYS in the TARGET_PERIOD by age-group and cause label.""" + return ( + _df.loc[_df.year.between(*[i.year for i in TARGET_PERIOD])] + .assign(age_group=_df["age_range"]) + .drop(columns=["date", "year", "sex", "age_range"]) + .melt(id_vars=["age_group"], var_name="label", value_name="dalys") + .groupby(by=["age_group", "label"])["dalys"] + .sum() + ) + + total_num_dalys_by_agegrp_and_label = extract_results( + results_folder, + module="tlo.methods.healthburden", + key="dalys_stacked_by_age_and_time", # <-- for stacking by age and time + custom_generate_series=get_total_num_dalys_by_agegrp_and_label, + do_scaling=True, + ).pipe(set_param_names_as_column_index_level_0) + + # %% Quantify the healthcare system resources used with each TREATMENT_ID (short) (The difference in the number of + # appointments between each scenario and the 'Nothing' scenario.) + + # 1) Examine the HSI that are occurring by TREATMENT_ID + + def get_counts_of_hsi_by_short_treatment_id(_df): + """Get the counts of the short TREATMENT_IDs occurring (up to first underscore)""" + _counts_by_treatment_id = ( + _df.loc[pd.to_datetime(_df["date"]).between(*TARGET_PERIOD), "TREATMENT_ID"] + .apply(pd.Series) + .sum() + .astype(int) + ) + _short_treatment_id = _counts_by_treatment_id.index.map(lambda x: x.split("_")[0] + "*") + return _counts_by_treatment_id.groupby(by=_short_treatment_id).sum() + + counts_of_hsi_by_short_treatment_id = ( + extract_results( + results_folder, + module="tlo.methods.healthsystem.summary", + key="HSI_Event", + custom_generate_series=get_counts_of_hsi_by_short_treatment_id, + do_scaling=True, + ) + .pipe(set_param_names_as_column_index_level_0) + .fillna(0.0) + .sort_index() + ) + + mean_num_hsi_by_short_treatment_id = summarize(counts_of_hsi_by_short_treatment_id, only_mean=True) + + for scenario_name, _counts in mean_num_hsi_by_short_treatment_id.T.iterrows(): + _counts_non_zero = _counts[_counts > 0] + + if len(_counts_non_zero): + fig, ax = plt.subplots() + name_of_plot = f"HSI Events Occurring, {scenario_name}, {target_period()}" + squarify_neat( + sizes=_counts_non_zero.values, + label=_counts_non_zero.index, + colormap=get_color_short_treatment_id, + alpha=1, + pad=True, + ax=ax, + text_kwargs={"color": "black", "size": 8}, + ) + ax.set_axis_off() + ax.set_title(name_of_plot, {"size": 12, "color": "black"}) + fig.savefig(make_graph_file_name(name_of_plot.replace(" ", "_"))) + plt.close(fig) + + # 2) Examine the Difference in the number/type of appointments occurring + + def get_counts_of_appts(_df): + """Get the counts of appointments of each type being used.""" + return ( + _df.loc[pd.to_datetime(_df["date"]).between(*TARGET_PERIOD), "Number_By_Appt_Type_Code"] + .apply(pd.Series) + .sum() + .astype(int) + ) + + counts_of_appts = ( + extract_results( + results_folder, + module="tlo.methods.healthsystem.summary", + key="HSI_Event", + custom_generate_series=get_counts_of_appts, + do_scaling=True, + ) + .pipe(set_param_names_as_column_index_level_0) + .fillna(0.0) + .sort_index() + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("results_folder", type=Path) + args = parser.parse_args() + + apply(results_folder=args.results_folder, output_folder=args.results_folder, resourcefilepath=Path("./resources")) + + # Plot the legends + plot_legends.apply(results_folder=None, output_folder=args.results_folder, resourcefilepath=Path("./resources")) + + with zipfile.ZipFile(args.results_folder / f"images_{args.results_folder.parts[-1]}.zip", mode="w") as archive: + for filename in sorted(glob.glob(str(args.results_folder / "*.png"))): + archive.write(filename, os.path.basename(filename)) From 6ba00465f6516c121822bead035bfe540f0f6b3e Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Tue, 17 Feb 2026 11:14:10 +0000 Subject: [PATCH 11/55] Minor edits --- .../analysis_effect_of_treatment_ids.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py index d323d49a60..876b3ca3ea 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py @@ -39,7 +39,7 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No - We estimate the draw on healthcare system resources as the FEWER appointments when that treatment does not occur. """ - TARGET_PERIOD = (Date(2010, 1, 1), Date(2025, 12, 31)) + TARGET_PERIOD = (Date(2010, 1, 1), Date(2015, 12, 31)) # Definitions of general helper functions make_graph_file_name = lambda stub: output_folder / f"{stub.replace('*', '_star_')}.png" # noqa: E731 From f8e060fdcb8e94b3d4b2f2b0fa6392c2c6191936 Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Mon, 23 Feb 2026 14:47:02 +0000 Subject: [PATCH 12/55] Series empty;debugging --- .../analysis_effect_of_treatment_ids.py | 164 +++++++++++++----- 1 file changed, 118 insertions(+), 46 deletions(-) diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py index 876b3ca3ea..230883c768 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py @@ -9,6 +9,8 @@ import numpy as np import pandas as pd +from matplotlib.lines import Line2D +from matplotlib.patches import Patch from matplotlib import pyplot as plt from scripts.calibration_analyses.analysis_scripts import plot_legends @@ -39,7 +41,7 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No - We estimate the draw on healthcare system resources as the FEWER appointments when that treatment does not occur. """ - TARGET_PERIOD = (Date(2010, 1, 1), Date(2015, 12, 31)) + TARGET_PERIOD = (Date(2026, 1, 1), Date(2041, 1, 1)) # Definitions of general helper functions make_graph_file_name = lambda stub: output_folder / f"{stub.replace('*', '_star_')}.png" # noqa: E731 @@ -50,6 +52,21 @@ def target_period() -> str: """Returns the target period as a string of the form YYYY-YYYY""" return "-".join(str(t.year) for t in TARGET_PERIOD) + def get_periods_within_target_period(period_length_years: int) -> list[tuple[str, tuple[int, int]]]: + """Return chunks within TARGET_PERIOD as [(label, (start_year, end_year)), ...].""" + if period_length_years <= 0: + raise ValueError("period_length_years must be a positive integer.") + start_year, end_year = TARGET_PERIOD[0].year, TARGET_PERIOD[1].year + periods = [] + for chunk_start in range(start_year, end_year + 1, period_length_years): + chunk_end = min(chunk_start + period_length_years - 1, end_year) + periods.append((f"{chunk_start}-{chunk_end}", (chunk_start, chunk_end))) + return periods + + period_length_years_for_bar_plots = 5 + periods_for_bar_plots = get_periods_within_target_period(period_length_years=period_length_years_for_bar_plots) + period_labels_for_bar_plots = [label for label, _ in periods_for_bar_plots] + def get_parameter_names_from_scenario_file() -> Tuple[str]: """Get the tuple of names of the scenarios from `Scenario` class used to create the results.""" e = EffectOfEachTreatment() @@ -57,10 +74,10 @@ def get_parameter_names_from_scenario_file() -> Tuple[str]: def format_scenario_name(_sn: str) -> str: """Return a reformatted scenario name ready for plotting. + - Remove prefix of "No " - Remove suffix of "*" """ - if _sn == "Nothing": return "Nothing" # In the scenario called "Nothing", all interventions are off. (So, the difference relative to "Nothing" @@ -137,6 +154,7 @@ def find_mean_difference_extra_relative_to_comparison_dataframe( # %% Quantify the health gains associated with all interventions combined. + def get_num_deaths_by_cause_label(_df): """Return total number of Deaths by label (total by age-group within the TARGET_PERIOD)""" return _df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD)].groupby(_df["label"]).size() @@ -149,14 +167,68 @@ def get_num_dalys_by_cause_label(_df): .sum() ) + def make_get_num_deaths_by_cause_label_and_period(period_length_years: int): + """Create helper to summarize deaths by cause label and period chunk + overall period.""" + + periods = get_periods_within_target_period(period_length_years=period_length_years) + period_lookup = { + year: period_label + for period_label, (start_year, end_year) in periods + for year in range(start_year, end_year + 1) + } + + def _get_num_deaths_by_cause_label_and_period(_df): + _df_in_target = _df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD)].copy() + _df_in_target["year"] = pd.to_datetime(_df_in_target["date"]).dt.year + _df_in_target["period"] = _df_in_target["year"].map(period_lookup) + + chunked = _df_in_target.groupby(["label", "period"]).size() + overall = _df_in_target.groupby("label").size() + overall.index = pd.MultiIndex.from_arrays( + [overall.index, np.repeat(target_period(), len(overall.index))], names=["label", "period"] + ) + return pd.concat([chunked, overall]).sort_index() + + return _get_num_deaths_by_cause_label_and_period + + def make_get_num_dalys_by_cause_label_and_period(period_length_years: int): + """Create helper to summarize DALYS by cause label and period chunk + overall period.""" + periods = get_periods_within_target_period(period_length_years=period_length_years) + period_lookup = { + year: period_label + for period_label, (period_start, period_end) in periods + for year in range(period_start, period_end + 1) + } + + def _get_num_dalys_by_cause_label_and_period(_df): + start_year, end_year = TARGET_PERIOD[0].year, TARGET_PERIOD[1].year + _df_in_target = _df.loc[_df.year.between(start_year, end_year)].copy() + _df_in_target["period"] = _df_in_target["year"].map(period_lookup) + + melted = ( + _df_in_target.drop(columns=["date", "sex", "age_range"]) + .melt(id_vars=["year", "period"], var_name="label", value_name="dalys") + ) + chunked = melted.groupby(["label", "period"])["dalys"].sum() + overall = melted.groupby("label")["dalys"].sum() + overall.index = pd.MultiIndex.from_arrays( + [overall.index, np.repeat(target_period(), len(overall.index))], names=["label", "period"] + ) + return pd.concat([chunked, overall]).sort_index() + + return _get_num_dalys_by_cause_label_and_period + + num_deaths_by_cause_label = summarize( extract_results( results_folder, module="tlo.methods.demography", key="death", - custom_generate_series=get_num_deaths_by_cause_label, + custom_generate_series=make_get_num_deaths_by_cause_label_and_period( + period_length_years=period_length_years_for_bar_plots + ), do_scaling=True, - ).pipe(set_param_names_as_column_index_level_0)[["Nothing"]] + ).pipe(set_param_names_as_column_index_level_0)[["Nothing", "Contraception_Routine"]] ) num_dalys_by_cause_label = summarize( @@ -164,53 +236,52 @@ def get_num_dalys_by_cause_label(_df): results_folder, module="tlo.methods.healthburden", key="dalys_stacked_by_age_and_time", - custom_generate_series=get_num_dalys_by_cause_label, + custom_generate_series=make_get_num_dalys_by_cause_label_and_period( + period_length_years=period_length_years_for_bar_plots + ), do_scaling=True, - ).pipe(set_param_names_as_column_index_level_0)[["Nothing"]] + ).pipe(set_param_names_as_column_index_level_0)[["Nothing", "Contraception_Routine"]] ) # Plots..... def do_bar_plot_with_ci(_df, _ax): - """Make a vertical bar plot for each Cause-of-Death Label for the _df onto axis _ax""" - _df_sorted = _df.reindex(index=CAUSE_OF_DEATH_OR_DALY_LABEL_TO_COLOR_MAP.keys(), fill_value=0.0).sort_index( - axis=0, key=order_of_cause_of_death_or_daly_label - ) # include all labels and sort + """Make vertical bars by cause, decomposed into period chunks, with overall-period CI.""" + _df_nothing = _df["Contraception_Routine"] + _df_nothing = _df_nothing.reindex( + pd.MultiIndex.from_product( + [CAUSE_OF_DEATH_OR_DALY_LABEL_TO_COLOR_MAP.keys(), period_labels_for_bar_plots + [target_period()]], + names=["label", "period"], + ), + fill_value=0.0, + ) + _df_nothing = _df_nothing.sort_index(axis=0, level=0, key=order_of_cause_of_death_or_daly_label) + + cause_labels = list(_df_nothing.index.get_level_values("label").unique()) - for i, cause_label in enumerate(_df_sorted.index): - # plot bar for one cause + for i, cause_label in enumerate(cause_labels): color = get_color_cause_of_death_or_daly_label(cause_label) - one_cause = _df_sorted.loc[cause_label] - - mean_deaths = one_cause.loc[(slice(None), "mean")] - lower_bar = mean_deaths["Nothing"] - full_height_of_bar = mean_deaths["Nothing"] - upper_bar = full_height_of_bar - lower_bar - lower_bar_yerr = np.array( - [ - one_cause.loc[("Nothing", "mean")] - one_cause.loc[("Nothing", "lower")], - one_cause.loc[("Nothing", "upper")] - one_cause.loc[("Nothing", "mean")], - ] - ).reshape(2, 1) - full_height_bar_yerr = np.array( - [ - one_cause.loc[("Nothing", "mean")] - one_cause.loc[("Nothing", "lower")], - one_cause.loc[("Nothing", "upper")] - one_cause.loc[("Nothing", "mean")], - ] - ).reshape(2, 1) - - (lb,) = ax.bar(i, lower_bar, yerr=lower_bar_yerr, bottom=0, label="No TREATMENT_IDs", color=color) - (ub,) = _ax.bar( - i, - upper_bar, - yerr=full_height_bar_yerr, - bottom=lower_bar, - label="No TREATMENT_IDs", - color=color, - alpha=0.5, - ) - _ax.set_xticks(range(len(_df_sorted.index))) - _ax.set_xticklabels(_df_sorted.index, rotation=90) - _ax.legend([lb, ub], ["All Services Available", "No Services Available"], loc="upper right") + one_cause = _df_nothing.xs(cause_label, level="label") + + bottom = 0.0 + for j, period_label in enumerate(period_labels_for_bar_plots): + chunk_height = one_cause.loc[period_label, "mean"] if period_label in one_cause.index else 0.0 + _ax.bar(i, chunk_height, bottom=bottom, color=color, alpha=0.9 if j % 2 == 0 else 0.35) + bottom += chunk_height + + mean_value = one_cause.loc[target_period(), "mean"] + lower_value = one_cause.loc[target_period(), "lower"] + upper_value = one_cause.loc[target_period(), "upper"] + overall_yerr = np.array([[mean_value - lower_value], [upper_value - mean_value]]) + _ax.errorbar(i, mean_value, yerr=overall_yerr, fmt="none", ecolor="black", capsize=2, linewidth=1.2) + + _ax.set_xticks(range(len(cause_labels))) + _ax.set_xticklabels(cause_labels, rotation=90) + chunk_legend_handles = [ + Patch(facecolor="grey", alpha=0.9 if i % 2 == 0 else 0.35, label=period_label) + for i, period_label in enumerate(period_labels_for_bar_plots) + ] + ci_legend_handle = Line2D([0], [0], color="black", marker="|", markersize=8, linewidth=1.2, label="95% CI") + _ax.legend(handles=chunk_legend_handles + [ci_legend_handle], loc="upper right") fig, ax = plt.subplots() name_of_plot = f"Deaths With No Services, {target_period()}" @@ -421,12 +492,13 @@ def get_counts_of_appts(_df): if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("results_folder", type=Path) + parser.add_argument("outputs_folder", type=Path) args = parser.parse_args() - apply(results_folder=args.results_folder, output_folder=args.results_folder, resourcefilepath=Path("./resources")) + apply(results_folder=args.results_folder, output_folder=args.outputs_folder, resourcefilepath=Path("./resources")) # Plot the legends - plot_legends.apply(results_folder=None, output_folder=args.results_folder, resourcefilepath=Path("./resources")) + plot_legends.apply(results_folder=None, output_folder=args.outputs_folder, resourcefilepath=Path("./resources")) with zipfile.ZipFile(args.results_folder / f"images_{args.results_folder.parts[-1]}.zip", mode="w") as archive: for filename in sorted(glob.glob(str(args.results_folder / "*.png"))): From 373081a03fb42fde397a554059e64a0869346419 Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Mon, 23 Feb 2026 15:24:58 +0000 Subject: [PATCH 13/55] Figures for ContraceptionRoutine only; deaths empty; debugging --- .../analysis_effect_of_treatment_ids.py | 689 +++++++++--------- .../scenario_effect_of_treatment_ids.py | 3 +- 2 files changed, 360 insertions(+), 332 deletions(-) diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py index 230883c768..e1a093ad75 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py @@ -9,9 +9,9 @@ import numpy as np import pandas as pd +from matplotlib import pyplot as plt from matplotlib.lines import Line2D from matplotlib.patches import Patch -from matplotlib import pyplot as plt from scripts.calibration_analyses.analysis_scripts import plot_legends from scripts.lcoa_inputs_from_tlo_analyses.scenario_effect_of_treatment_ids import ( @@ -21,214 +21,358 @@ from tlo.analysis.utils import ( CAUSE_OF_DEATH_OR_DALY_LABEL_TO_COLOR_MAP, extract_results, - get_coarse_appt_type, get_color_cause_of_death_or_daly_label, - get_color_coarse_appt, get_color_short_treatment_id, make_age_grp_lookup, make_age_grp_types, order_of_cause_of_death_or_daly_label, - order_of_coarse_appt, squarify_neat, summarize, to_age_group, ) +TARGET_PERIOD = (Date(2010, 1, 1), Date(2025, 12, 31)) +PERIOD_LENGTH_YEARS_FOR_BAR_PLOTS = 5 + + +def extract_deaths_total(df: pd.DataFrame) -> pd.Series: + return pd.Series({"Total": len(df)}) + +def target_period(target_period_tuple: tuple[Date, Date] = TARGET_PERIOD) -> str: + """Returns the target period as a string of the form YYYY-YYYY.""" + return "-".join(str(t.year) for t in target_period_tuple) + + +def get_periods_within_target_period( + period_length_years: int, + target_period_tuple: tuple[Date, Date] = TARGET_PERIOD, +) -> list[tuple[str, tuple[int, int]]]: + """Return chunks within target period as [(label, (start_year, end_year)), ...].""" + if period_length_years <= 0: + raise ValueError("period_length_years must be a positive integer.") + start_year, end_year = target_period_tuple[0].year, target_period_tuple[1].year + periods = [] + for chunk_start in range(start_year, end_year + 1, period_length_years): + chunk_end = min(chunk_start + period_length_years - 1, end_year) + periods.append((f"{chunk_start}-{chunk_end}", (chunk_start, chunk_end))) + return periods + + +def get_parameter_names_from_scenario_file() -> Tuple[str]: + """Get tuple of scenario names from Scenario class used to create results.""" + e = EffectOfEachTreatment() + return tuple(e._scenarios.keys()) + + +def format_scenario_name(_sn: str) -> str: + """Return reformatted scenario name ready for plotting.""" + if _sn == "Nothing": + return "Nothing" + return _sn.lstrip("Only ") + + +def set_param_names_as_column_index_level_0(_df: pd.DataFrame, param_names: tuple[str, ...]) -> pd.DataFrame: + """Set columns index level 0 as scenario param names.""" + ordered_param_names_no_prefix = {i: x for i, x in enumerate(param_names)} + names_of_cols_level0 = [ordered_param_names_no_prefix.get(col) for col in _df.columns.levels[0]] + assert len(names_of_cols_level0) == len(_df.columns.levels[0]) + + reformatted_names = map(format_scenario_name, names_of_cols_level0) + _df.columns = _df.columns.set_levels(reformatted_names, level=0) + return _df + + +def find_difference_extra_relative_to_comparison( + _ser: pd.Series, + comparison: str, + scaled: bool = False, + drop_comparison: bool = True, +): + """Find run-wise differences relative to comparison in a series with multi-index.""" + return ( + _ser.unstack() + .apply(lambda x: (x - x[comparison]) / (x[comparison] if scaled else 1.0), axis=0) + .drop(index=([comparison] if drop_comparison else [])) + .stack() + ) -def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = None): - """Produce standard set of plots describing the effect of each TREATMENT_ID. - - We estimate the epidemiological impact as the EXTRA deaths that would occur if that treatment did not occur. - - We estimate the draw on healthcare system resources as the FEWER appointments when that treatment does not occur. - """ - TARGET_PERIOD = (Date(2026, 1, 1), Date(2041, 1, 1)) +def find_mean_difference_in_appts_relative_to_comparison( + _df: pd.DataFrame, + comparison: str, + drop_comparison: bool = True, +): + """Find mean fewer appointments when treatment does not happen relative to comparison.""" + return -summarize( + pd.concat( + { + _idx: find_difference_extra_relative_to_comparison( + row, comparison=comparison, drop_comparison=drop_comparison + ) + for _idx, row in _df.iterrows() + }, + axis=1, + ).T, + only_mean=True, + ) - # Definitions of general helper functions - make_graph_file_name = lambda stub: output_folder / f"{stub.replace('*', '_star_')}.png" # noqa: E731 - _, age_grp_lookup = make_age_grp_lookup() +def find_mean_difference_extra_relative_to_comparison_dataframe( + _df: pd.DataFrame, + comparison: str, + drop_comparison: bool = True, +): + """Same as find_difference_extra_relative_to_comparison but for dataframe.""" + return summarize( + pd.concat( + { + _idx: find_difference_extra_relative_to_comparison( + row, comparison=comparison, drop_comparison=drop_comparison + ) + for _idx, row in _df.iterrows() + }, + axis=1, + ).T, + only_mean=True, + ) - def target_period() -> str: - """Returns the target period as a string of the form YYYY-YYYY""" - return "-".join(str(t.year) for t in TARGET_PERIOD) - - def get_periods_within_target_period(period_length_years: int) -> list[tuple[str, tuple[int, int]]]: - """Return chunks within TARGET_PERIOD as [(label, (start_year, end_year)), ...].""" - if period_length_years <= 0: - raise ValueError("period_length_years must be a positive integer.") - start_year, end_year = TARGET_PERIOD[0].year, TARGET_PERIOD[1].year - periods = [] - for chunk_start in range(start_year, end_year + 1, period_length_years): - chunk_end = min(chunk_start + period_length_years - 1, end_year) - periods.append((f"{chunk_start}-{chunk_end}", (chunk_start, chunk_end))) - return periods - - period_length_years_for_bar_plots = 5 - periods_for_bar_plots = get_periods_within_target_period(period_length_years=period_length_years_for_bar_plots) - period_labels_for_bar_plots = [label for label, _ in periods_for_bar_plots] - - def get_parameter_names_from_scenario_file() -> Tuple[str]: - """Get the tuple of names of the scenarios from `Scenario` class used to create the results.""" - e = EffectOfEachTreatment() - return tuple(e._scenarios.keys()) - - def format_scenario_name(_sn: str) -> str: - """Return a reformatted scenario name ready for plotting. - - - Remove prefix of "No " - - Remove suffix of "*" - """ - if _sn == "Nothing": - return "Nothing" - # In the scenario called "Nothing", all interventions are off. (So, the difference relative to "Nothing" - # reflects the effects of all the interventions.) - - else: - return _sn.lstrip("Only ") - - def set_param_names_as_column_index_level_0(_df): - """Set the columns index (level 0) as the param_names.""" - ordered_param_names_no_prefix = {i: x for i, x in enumerate(param_names)} - names_of_cols_level0 = [ordered_param_names_no_prefix.get(col) for col in _df.columns.levels[0]] - assert len(names_of_cols_level0) == len(_df.columns.levels[0]) - - reformatted_names = map(format_scenario_name, names_of_cols_level0) - _df.columns = _df.columns.set_levels(reformatted_names, level=0) - return _df - - def find_difference_extra_relative_to_comparison( - _ser: pd.Series, comparison: str, scaled: bool = False, drop_comparison: bool = True - ): - """Find the difference in the values in a pd.Series with a multi-index, between the draws (level 0) - within the runs (level 1). Drop the comparison entries. The comparison is made: DIFF(X) = X - COMPARISON.""" - return ( - _ser.unstack() - .apply(lambda x: (x - x[comparison]) / (x[comparison] if scaled else 1.0), axis=0) - .drop(index=([comparison] if drop_comparison else [])) - .stack() - ) - def find_mean_difference_in_appts_relative_to_comparison( - _df: pd.DataFrame, comparison: str, drop_comparison: bool = True - ): - """Find the mean difference in the number of appointments between each draw and the comparison draw (within each - run). We are looking for the number FEWER appointments that occur when treatment does not happen, so we flip the - sign (as `find_extra_difference_relative_to_comparison` gives the number extra relative the comparison).""" - return -summarize( - pd.concat( - { - _idx: find_difference_extra_relative_to_comparison( - row, comparison=comparison, drop_comparison=drop_comparison - ) - for _idx, row in _df.iterrows() - }, - axis=1, - ).T, - only_mean=True, - ) +def get_num_deaths_by_cause_label(_df: pd.DataFrame, target_period_tuple: tuple[Date, Date] = TARGET_PERIOD) -> pd.Series: + """Return total deaths by label within target period.""" + return _df.loc[pd.to_datetime(_df.date).between(*target_period_tuple)].groupby(_df["label"]).size() - def find_mean_difference_extra_relative_to_comparison_dataframe( - _df: pd.DataFrame, - comparison: str, - drop_comparison: bool = True, - ): - """Same as `find_difference_extra_relative_to_comparison` but for pd.DataFrame, which is the same as - `find_mean_difference_in_appts_relative_to_comparison`. - """ - # todo factorize these three functions more -- it's the same operation for a pd.Series or a pd.DataFrame - return summarize( - pd.concat( - { - _idx: find_difference_extra_relative_to_comparison( - row, comparison=comparison, drop_comparison=drop_comparison - ) - for _idx, row in _df.iterrows() - }, - axis=1, - ).T, - only_mean=True, - ) - # %% Define parameter names - param_names = get_parameter_names_from_scenario_file() +def get_num_dalys_by_cause_label(_df: pd.DataFrame, target_period_tuple: tuple[Date, Date] = TARGET_PERIOD) -> pd.Series: + """Return total DALYS by label within target period.""" + return ( + _df.loc[_df.year.between(*[i.year for i in target_period_tuple])] + .drop(columns=["date", "sex", "age_range", "year"]) + .sum() + ) + - # %% Quantify the health gains associated with all interventions combined. +def make_get_num_deaths_by_cause_label_and_period( + period_length_years: int, + target_period_tuple: tuple[Date, Date] = TARGET_PERIOD, +): + """Create helper that summarizes deaths by cause and period chunks + overall.""" + periods = get_periods_within_target_period( + period_length_years=period_length_years, + target_period_tuple=target_period_tuple, + ) + period_lookup = { + year: period_label + for period_label, (start_year, end_year) in periods + for year in range(start_year, end_year + 1) + } + target_period_label = target_period(target_period_tuple) + + def _get_num_deaths_by_cause_label_and_period(_df: pd.DataFrame) -> pd.Series: + _df_in_target = _df.loc[pd.to_datetime(_df.date).between(*target_period_tuple)].copy() + _df_in_target["year"] = pd.to_datetime(_df_in_target["date"]).dt.year + _df_in_target["period"] = _df_in_target["year"].map(period_lookup) + + chunked = _df_in_target.groupby(["label", "period"]).size() + overall = _df_in_target.groupby("label").size() + overall.index = pd.MultiIndex.from_arrays( + [overall.index, np.repeat(target_period_label, len(overall.index))], names=["label", "period"] + ) + return pd.concat([chunked, overall]).sort_index() + return _get_num_deaths_by_cause_label_and_period - def get_num_deaths_by_cause_label(_df): - """Return total number of Deaths by label (total by age-group within the TARGET_PERIOD)""" - return _df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD)].groupby(_df["label"]).size() - def get_num_dalys_by_cause_label(_df): - """Return total number of DALYS (Stacked) by label (total by age-group within the TARGET_PERIOD)""" - return ( - _df.loc[_df.year.between(*[i.year for i in TARGET_PERIOD])] - .drop(columns=["date", "sex", "age_range", "year"]) - .sum() +def make_get_num_dalys_by_cause_label_and_period( + period_length_years: int, + target_period_tuple: tuple[Date, Date] = TARGET_PERIOD, +): + """Create helper that summarizes DALYS by cause and period chunks + overall.""" + periods = get_periods_within_target_period( + period_length_years=period_length_years, + target_period_tuple=target_period_tuple, + ) + period_lookup = { + year: period_label + for period_label, (period_start, period_end) in periods + for year in range(period_start, period_end + 1) + } + start_year, end_year = target_period_tuple[0].year, target_period_tuple[1].year + target_period_label = target_period(target_period_tuple) + + def _get_num_dalys_by_cause_label_and_period(_df: pd.DataFrame) -> pd.Series: + _df_in_target = _df.loc[_df.year.between(start_year, end_year)].copy() + _df_in_target["period"] = _df_in_target["year"].map(period_lookup) + + melted = ( + _df_in_target.drop(columns=["date", "sex", "age_range"]) + .melt(id_vars=["year", "period"], var_name="label", value_name="dalys") + ) + chunked = melted.groupby(["label", "period"])["dalys"].sum() + overall = melted.groupby("label")["dalys"].sum() + overall.index = pd.MultiIndex.from_arrays( + [overall.index, np.repeat(target_period_label, len(overall.index))], names=["label", "period"] ) + return pd.concat([chunked, overall]).sort_index() + + return _get_num_dalys_by_cause_label_and_period + + +def do_bar_plot_with_ci( + _df: pd.DataFrame, + _ax, + period_labels_for_bar_plots: list[str], + target_period_label: str, +): + """Make vertical bars by cause, decomposed into period chunks, with overall-period CI.""" + _df_nothing = _df["Contraception_Routine"] + _df_nothing = _df_nothing.reindex( + pd.MultiIndex.from_product( + [CAUSE_OF_DEATH_OR_DALY_LABEL_TO_COLOR_MAP.keys(), period_labels_for_bar_plots + [target_period_label]], + names=["label", "period"], + ), + fill_value=0.0, + ) + _df_nothing = _df_nothing.sort_index(axis=0, level=0, key=order_of_cause_of_death_or_daly_label) + + cause_labels = list(_df_nothing.index.get_level_values("label").unique()) + + for i, cause_label in enumerate(cause_labels): + color = get_color_cause_of_death_or_daly_label(cause_label) + one_cause = _df_nothing.xs(cause_label, level="label") + + bottom = 0.0 + for j, period_label in enumerate(period_labels_for_bar_plots): + chunk_height = one_cause.loc[period_label, "mean"] if period_label in one_cause.index else 0.0 + _ax.bar(i, chunk_height, bottom=bottom, color=color, alpha=0.9 if j % 2 == 0 else 0.35) + bottom += chunk_height + + mean_value = one_cause.loc[target_period_label, "mean"] + lower_value = one_cause.loc[target_period_label, "lower"] + upper_value = one_cause.loc[target_period_label, "upper"] + overall_yerr = np.array([[mean_value - lower_value], [upper_value - mean_value]]) + _ax.errorbar(i, mean_value, yerr=overall_yerr, fmt="none", ecolor="black", capsize=2, linewidth=1.2) + + _ax.set_xticks(range(len(cause_labels))) + _ax.set_xticklabels(cause_labels, rotation=90) + chunk_legend_handles = [ + Patch(facecolor="grey", alpha=0.9 if i % 2 == 0 else 0.35, label=period_label) + for i, period_label in enumerate(period_labels_for_bar_plots) + ] + ci_legend_handle = Line2D([0], [0], color="black", marker="|", markersize=8, linewidth=1.2, label="95% CI") + _ax.legend(handles=chunk_legend_handles + [ci_legend_handle], loc="upper right") + + +def get_num_deaths_by_age_group( + _df: pd.DataFrame, + age_grp_lookup: dict, + target_period_tuple: tuple[Date, Date] = TARGET_PERIOD, +): + """Return total deaths by age-group in target period.""" + return ( + _df.loc[pd.to_datetime(_df.date).between(*target_period_tuple)] + .groupby(_df["age"].map(age_grp_lookup).astype(make_age_grp_types())) + .size() + ) - def make_get_num_deaths_by_cause_label_and_period(period_length_years: int): - """Create helper to summarize deaths by cause label and period chunk + overall period.""" - - periods = get_periods_within_target_period(period_length_years=period_length_years) - period_lookup = { - year: period_label - for period_label, (start_year, end_year) in periods - for year in range(start_year, end_year + 1) - } - - def _get_num_deaths_by_cause_label_and_period(_df): - _df_in_target = _df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD)].copy() - _df_in_target["year"] = pd.to_datetime(_df_in_target["date"]).dt.year - _df_in_target["period"] = _df_in_target["year"].map(period_lookup) - - chunked = _df_in_target.groupby(["label", "period"]).size() - overall = _df_in_target.groupby("label").size() - overall.index = pd.MultiIndex.from_arrays( - [overall.index, np.repeat(target_period(), len(overall.index))], names=["label", "period"] - ) - return pd.concat([chunked, overall]).sort_index() - - return _get_num_deaths_by_cause_label_and_period - - def make_get_num_dalys_by_cause_label_and_period(period_length_years: int): - """Create helper to summarize DALYS by cause label and period chunk + overall period.""" - periods = get_periods_within_target_period(period_length_years=period_length_years) - period_lookup = { - year: period_label - for period_label, (period_start, period_end) in periods - for year in range(period_start, period_end + 1) - } - - def _get_num_dalys_by_cause_label_and_period(_df): - start_year, end_year = TARGET_PERIOD[0].year, TARGET_PERIOD[1].year - _df_in_target = _df.loc[_df.year.between(start_year, end_year)].copy() - _df_in_target["period"] = _df_in_target["year"].map(period_lookup) - - melted = ( - _df_in_target.drop(columns=["date", "sex", "age_range"]) - .melt(id_vars=["year", "period"], var_name="label", value_name="dalys") - ) - chunked = melted.groupby(["label", "period"])["dalys"].sum() - overall = melted.groupby("label")["dalys"].sum() - overall.index = pd.MultiIndex.from_arrays( - [overall.index, np.repeat(target_period(), len(overall.index))], names=["label", "period"] + +def do_barh_plot_with_ci(_df: pd.DataFrame, _ax): + """Make horizontal bar plot for each treatment id.""" + errors = pd.concat([_df["mean"] - _df["lower"], _df["upper"] - _df["mean"]], axis=1).T.to_numpy() + _df.plot.barh(ax=_ax, y="mean", xerr=errors, legend=False, color=[get_color_short_treatment_id(_id) for _id in _df.index]) + + +def do_label_barh_plot(_df: pd.DataFrame, _ax): + """Add text annotation from values in dataframe onto axis.""" + y_cords = {ylabel.get_text(): ytick for ytick, ylabel in zip(_ax.get_yticks(), _ax.get_yticklabels())} + pos_on_rhs = _ax.get_xticks()[-1] + + for label, row in _df.iterrows(): + if row["mean"] > 0: + annotation = f"{round(row['mean'], 1)} ({round(row['lower'])}-{round(row['upper'])}) %" + _ax.annotate( + annotation, + xy=(pos_on_rhs, y_cords.get(label)), + xycoords="data", + horizontalalignment="left", + verticalalignment="center", + size=7, ) - return pd.concat([chunked, overall]).sort_index() - return _get_num_dalys_by_cause_label_and_period + +def get_total_num_death_by_agegrp_and_label( + _df: pd.DataFrame, + target_period_tuple: tuple[Date, Date] = TARGET_PERIOD, +) -> pd.Series: + """Return deaths in target period by age-group and cause label.""" + _df_limited_to_dates = _df.loc[_df["date"].between(*target_period_tuple)] + age_group = to_age_group(_df_limited_to_dates["age"]) + return _df_limited_to_dates.groupby([age_group, "label"])["person_id"].size() + + +def get_total_num_dalys_by_agegrp_and_label( + _df: pd.DataFrame, + target_period_tuple: tuple[Date, Date] = TARGET_PERIOD, +) -> pd.Series: + """Return DALYS in target period by age-group and cause label.""" + return ( + _df.loc[_df.year.between(*[i.year for i in target_period_tuple])] + .assign(age_group=_df["age_range"]) + .drop(columns=["date", "year", "sex", "age_range"]) + .melt(id_vars=["age_group"], var_name="label", value_name="dalys") + .groupby(by=["age_group", "label"])["dalys"] + .sum() + ) +def get_counts_of_hsi_by_short_treatment_id( + _df: pd.DataFrame, + target_period_tuple: tuple[Date, Date] = TARGET_PERIOD, +) -> pd.Series: + """Get counts of short treatment ids occurring in target period.""" + _counts_by_treatment_id = ( + _df.loc[pd.to_datetime(_df["date"]).between(*target_period_tuple), "TREATMENT_ID"] + .apply(pd.Series) + .sum() + .astype(int) + ) + _short_treatment_id = _counts_by_treatment_id.index.map(lambda x: x.split("_")[0] + "*") + return _counts_by_treatment_id.groupby(by=_short_treatment_id).sum() + + +def get_counts_of_appts(_df: pd.DataFrame, target_period_tuple: tuple[Date, Date] = TARGET_PERIOD) -> pd.Series: + """Get counts of appointments of each type being used in target period.""" + return ( + _df.loc[pd.to_datetime(_df["date"]).between(*target_period_tuple), "Number_By_Appt_Type_Code"] + .apply(pd.Series) + .sum() + .astype(int) + ) + + +def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = None): + """Produce standard plots describing effect of each TREATMENT_ID.""" + make_graph_file_name = lambda stub: output_folder / f"{stub.replace('*', '_star_')}.png" # noqa: E731 + + _, age_grp_lookup = make_age_grp_lookup() + period_labels_for_bar_plots = [ + label + for label, _ in get_periods_within_target_period( + period_length_years=PERIOD_LENGTH_YEARS_FOR_BAR_PLOTS, + target_period_tuple=TARGET_PERIOD, + ) + ] + target_period_label = target_period(TARGET_PERIOD) + + param_names = get_parameter_names_from_scenario_file() + num_deaths_by_cause_label = summarize( extract_results( results_folder, module="tlo.methods.demography", key="death", - custom_generate_series=make_get_num_deaths_by_cause_label_and_period( - period_length_years=period_length_years_for_bar_plots - ), + custom_generate_series=extract_deaths_total, do_scaling=True, - ).pipe(set_param_names_as_column_index_level_0)[["Nothing", "Contraception_Routine"]] + ).pipe(set_param_names_as_column_index_level_0, param_names=param_names)[["Contraception_Routine"]] ) num_dalys_by_cause_label = summarize( @@ -237,55 +381,16 @@ def _get_num_dalys_by_cause_label_and_period(_df): module="tlo.methods.healthburden", key="dalys_stacked_by_age_and_time", custom_generate_series=make_get_num_dalys_by_cause_label_and_period( - period_length_years=period_length_years_for_bar_plots + period_length_years=PERIOD_LENGTH_YEARS_FOR_BAR_PLOTS, + target_period_tuple=TARGET_PERIOD, ), do_scaling=True, - ).pipe(set_param_names_as_column_index_level_0)[["Nothing", "Contraception_Routine"]] + ).pipe(set_param_names_as_column_index_level_0, param_names=param_names)[["Contraception_Routine"]] ) - # Plots..... - def do_bar_plot_with_ci(_df, _ax): - """Make vertical bars by cause, decomposed into period chunks, with overall-period CI.""" - _df_nothing = _df["Contraception_Routine"] - _df_nothing = _df_nothing.reindex( - pd.MultiIndex.from_product( - [CAUSE_OF_DEATH_OR_DALY_LABEL_TO_COLOR_MAP.keys(), period_labels_for_bar_plots + [target_period()]], - names=["label", "period"], - ), - fill_value=0.0, - ) - _df_nothing = _df_nothing.sort_index(axis=0, level=0, key=order_of_cause_of_death_or_daly_label) - - cause_labels = list(_df_nothing.index.get_level_values("label").unique()) - - for i, cause_label in enumerate(cause_labels): - color = get_color_cause_of_death_or_daly_label(cause_label) - one_cause = _df_nothing.xs(cause_label, level="label") - - bottom = 0.0 - for j, period_label in enumerate(period_labels_for_bar_plots): - chunk_height = one_cause.loc[period_label, "mean"] if period_label in one_cause.index else 0.0 - _ax.bar(i, chunk_height, bottom=bottom, color=color, alpha=0.9 if j % 2 == 0 else 0.35) - bottom += chunk_height - - mean_value = one_cause.loc[target_period(), "mean"] - lower_value = one_cause.loc[target_period(), "lower"] - upper_value = one_cause.loc[target_period(), "upper"] - overall_yerr = np.array([[mean_value - lower_value], [upper_value - mean_value]]) - _ax.errorbar(i, mean_value, yerr=overall_yerr, fmt="none", ecolor="black", capsize=2, linewidth=1.2) - - _ax.set_xticks(range(len(cause_labels))) - _ax.set_xticklabels(cause_labels, rotation=90) - chunk_legend_handles = [ - Patch(facecolor="grey", alpha=0.9 if i % 2 == 0 else 0.35, label=period_label) - for i, period_label in enumerate(period_labels_for_bar_plots) - ] - ci_legend_handle = Line2D([0], [0], color="black", marker="|", markersize=8, linewidth=1.2, label="95% CI") - _ax.legend(handles=chunk_legend_handles + [ci_legend_handle], loc="upper right") - fig, ax = plt.subplots() - name_of_plot = f"Deaths With No Services, {target_period()}" - do_bar_plot_with_ci(num_deaths_by_cause_label / 1e3, ax) + name_of_plot = f"Deaths With No Services, {target_period_label}" + do_bar_plot_with_ci(num_deaths_by_cause_label / 1e3, ax, period_labels_for_bar_plots, target_period_label) ax.set_title(name_of_plot) ax.set_xlabel("Cause of Death") ax.set_ylabel("Number of Deaths (/1000)") @@ -298,8 +403,8 @@ def do_bar_plot_with_ci(_df, _ax): plt.close(fig) fig, ax = plt.subplots() - name_of_plot = f"DALYS With No Services, {target_period()}" - do_bar_plot_with_ci(num_dalys_by_cause_label / 1e6, ax) + name_of_plot = f"DALYS With No Services, {target_period_label}" + do_bar_plot_with_ci(num_dalys_by_cause_label / 1e6, ax, period_labels_for_bar_plots, target_period_label) ax.set_title(name_of_plot) ax.set_xlabel("Cause of Disability/Death") ax.set_ylabel("Number of DALYS (/millions)") @@ -312,132 +417,55 @@ def do_bar_plot_with_ci(_df, _ax): fig.savefig(make_graph_file_name(name_of_plot.replace(" ", "_"))) plt.close(fig) - # %% Quantify the health gains associated with each TREATMENT_ID (short) individually (i.e., the - # difference in deaths and DALYS between each scenario and the 'Nothing' scenario.) - - def get_num_deaths_by_age_group(_df): - """Return total number of deaths (total by age-group within the TARGET_PERIOD)""" - return ( - _df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD)] - .groupby(_df["age"].map(age_grp_lookup).astype(make_age_grp_types())) - .size() - ) - - def do_barh_plot_with_ci(_df, _ax): - """Make a horizontal bar plot for each TREATMENT_ID for the _df onto axis _ax""" - errors = pd.concat([_df["mean"] - _df["lower"], _df["upper"] - _df["mean"]], axis=1).T.to_numpy() - _df.plot.barh( - ax=_ax, y="mean", xerr=errors, legend=False, color=[get_color_short_treatment_id(_id) for _id in _df.index] - ) - - def do_label_barh_plot(_df, _ax): - """Add text annotation from values in _df onto _ax""" - y_cords = {ylabel.get_text(): ytick for ytick, ylabel in zip(_ax.get_yticks(), _ax.get_yticklabels())} - - pos_on_rhs = _ax.get_xticks()[-1] - - for label, row in _df.iterrows(): - if row["mean"] > 0: - annotation = f"{round(row['mean'], 1)} ({round(row['lower'])}-{round(row['upper'])}) %" - _ax.annotate( - annotation, - xy=(pos_on_rhs, y_cords.get(label)), - xycoords="data", - horizontalalignment="left", - verticalalignment="center", - size=7, - ) - num_deaths = ( extract_results( results_folder, module="tlo.methods.demography", key="death", - custom_generate_series=get_num_deaths_by_cause_label, + custom_generate_series=lambda _df: get_num_deaths_by_cause_label(_df, TARGET_PERIOD), do_scaling=True, ) - .pipe(set_param_names_as_column_index_level_0) + .pipe(set_param_names_as_column_index_level_0, param_names=param_names) .sum() - ) # (Summing across age-groups) + ) num_dalys = ( extract_results( results_folder, module="tlo.methods.healthburden", key="dalys_stacked_by_age_and_time", - custom_generate_series=get_num_dalys_by_cause_label, + custom_generate_series=lambda _df: get_num_dalys_by_cause_label(_df, TARGET_PERIOD), do_scaling=True, ) - .pipe(set_param_names_as_column_index_level_0) + .pipe(set_param_names_as_column_index_level_0, param_names=param_names) .sum() - ) # (Summing across causes) - - # PLOTS FOR EACH TREATMENT_ID (Short) - - - - # %% Quantify the health associated with each TREATMENT_ID (short) SPLIT BY AGE and WEALTH - - # -- DEATHS - def get_total_num_death_by_agegrp_and_label(_df): - """Return the total number of deaths in the TARGET_PERIOD by age-group and cause label.""" - _df_limited_to_dates = _df.loc[_df["date"].between(*TARGET_PERIOD)] - age_group = to_age_group(_df_limited_to_dates["age"]) - return _df_limited_to_dates.groupby([age_group, "label"])["person_id"].size() + ) total_num_death_by_agegrp_and_label = extract_results( results_folder, module="tlo.methods.demography", key="death", - custom_generate_series=get_total_num_death_by_agegrp_and_label, + custom_generate_series=lambda _df: get_total_num_death_by_agegrp_and_label(_df, TARGET_PERIOD), do_scaling=True, - ).pipe(set_param_names_as_column_index_level_0) - - # -- DALYS - def get_total_num_dalys_by_agegrp_and_label(_df): - """Return the total number of DALYS in the TARGET_PERIOD by age-group and cause label.""" - return ( - _df.loc[_df.year.between(*[i.year for i in TARGET_PERIOD])] - .assign(age_group=_df["age_range"]) - .drop(columns=["date", "year", "sex", "age_range"]) - .melt(id_vars=["age_group"], var_name="label", value_name="dalys") - .groupby(by=["age_group", "label"])["dalys"] - .sum() - ) + ).pipe(set_param_names_as_column_index_level_0, param_names=param_names) total_num_dalys_by_agegrp_and_label = extract_results( results_folder, module="tlo.methods.healthburden", - key="dalys_stacked_by_age_and_time", # <-- for stacking by age and time - custom_generate_series=get_total_num_dalys_by_agegrp_and_label, + key="dalys_stacked_by_age_and_time", + custom_generate_series=lambda _df: get_total_num_dalys_by_agegrp_and_label(_df, TARGET_PERIOD), do_scaling=True, - ).pipe(set_param_names_as_column_index_level_0) - - # %% Quantify the healthcare system resources used with each TREATMENT_ID (short) (The difference in the number of - # appointments between each scenario and the 'Nothing' scenario.) - - # 1) Examine the HSI that are occurring by TREATMENT_ID - - def get_counts_of_hsi_by_short_treatment_id(_df): - """Get the counts of the short TREATMENT_IDs occurring (up to first underscore)""" - _counts_by_treatment_id = ( - _df.loc[pd.to_datetime(_df["date"]).between(*TARGET_PERIOD), "TREATMENT_ID"] - .apply(pd.Series) - .sum() - .astype(int) - ) - _short_treatment_id = _counts_by_treatment_id.index.map(lambda x: x.split("_")[0] + "*") - return _counts_by_treatment_id.groupby(by=_short_treatment_id).sum() + ).pipe(set_param_names_as_column_index_level_0, param_names=param_names) counts_of_hsi_by_short_treatment_id = ( extract_results( results_folder, module="tlo.methods.healthsystem.summary", key="HSI_Event", - custom_generate_series=get_counts_of_hsi_by_short_treatment_id, + custom_generate_series=lambda _df: get_counts_of_hsi_by_short_treatment_id(_df, TARGET_PERIOD), do_scaling=True, ) - .pipe(set_param_names_as_column_index_level_0) + .pipe(set_param_names_as_column_index_level_0, param_names=param_names) .fillna(0.0) .sort_index() ) @@ -449,7 +477,7 @@ def get_counts_of_hsi_by_short_treatment_id(_df): if len(_counts_non_zero): fig, ax = plt.subplots() - name_of_plot = f"HSI Events Occurring, {scenario_name}, {target_period()}" + name_of_plot = f"HSI Events Occurring, {scenario_name}, {target_period_label}" squarify_neat( sizes=_counts_non_zero.values, label=_counts_non_zero.index, @@ -464,42 +492,41 @@ def get_counts_of_hsi_by_short_treatment_id(_df): fig.savefig(make_graph_file_name(name_of_plot.replace(" ", "_"))) plt.close(fig) - # 2) Examine the Difference in the number/type of appointments occurring - - def get_counts_of_appts(_df): - """Get the counts of appointments of each type being used.""" - return ( - _df.loc[pd.to_datetime(_df["date"]).between(*TARGET_PERIOD), "Number_By_Appt_Type_Code"] - .apply(pd.Series) - .sum() - .astype(int) - ) - counts_of_appts = ( extract_results( results_folder, module="tlo.methods.healthsystem.summary", key="HSI_Event", - custom_generate_series=get_counts_of_appts, + custom_generate_series=lambda _df: get_counts_of_appts(_df, TARGET_PERIOD), do_scaling=True, ) - .pipe(set_param_names_as_column_index_level_0) + .pipe(set_param_names_as_column_index_level_0, param_names=param_names) .fillna(0.0) .sort_index() ) + return { + "num_deaths": num_deaths, + "num_dalys": num_dalys, + "total_num_death_by_agegrp_and_label": total_num_death_by_agegrp_and_label, + "total_num_dalys_by_agegrp_and_label": total_num_dalys_by_agegrp_and_label, + "counts_of_hsi_by_short_treatment_id": counts_of_hsi_by_short_treatment_id, + "counts_of_appts": counts_of_appts, + "age_grp_lookup": age_grp_lookup, + } + if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("results_folder", type=Path) - parser.add_argument("outputs_folder", type=Path) + parser.add_argument("output_folder", type=Path, nargs="?", default=None) args = parser.parse_args() - apply(results_folder=args.results_folder, output_folder=args.outputs_folder, resourcefilepath=Path("./resources")) + out = args.output_folder if args.output_folder is not None else args.results_folder + apply(results_folder=args.results_folder, output_folder=out, resourcefilepath=Path("./resources")) - # Plot the legends - plot_legends.apply(results_folder=None, output_folder=args.outputs_folder, resourcefilepath=Path("./resources")) + plot_legends.apply(results_folder=None, output_folder=out, resourcefilepath=Path("./resources")) - with zipfile.ZipFile(args.results_folder / f"images_{args.results_folder.parts[-1]}.zip", mode="w") as archive: - for filename in sorted(glob.glob(str(args.results_folder / "*.png"))): + with zipfile.ZipFile(out / f"images_{out.parts[-1]}.zip", mode="w") as archive: + for filename in sorted(glob.glob(str(out / "*.png"))): archive.write(filename, os.path.basename(filename)) diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py index 399832a3b8..52cf002f83 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py @@ -104,7 +104,8 @@ def _get_scenarios(self) -> Dict[str, Dict]: service_availability.update( {f"Only {treatment}": [treatment] for treatment in treatments} ) - + ## Temporary: for testing, only include one scenario with a single treatment ID available + service_availability = dict({"Only Contraception_Routine": ["Contraception_Routine"]}) scenario_definitions = ScenarioDefinitions() scenarios = { From 3dec55bd747faf47f4073aa185b281a701605387 Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Tue, 24 Feb 2026 13:20:01 +0000 Subject: [PATCH 14/55] Edits to read partially completed jobs --- src/tlo/analysis/utils.py | 79 ++++++++++++++++++++++++++++----------- 1 file changed, 57 insertions(+), 22 deletions(-) diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py index 6b4d2cbf9b..e890a12d2d 100644 --- a/src/tlo/analysis/utils.py +++ b/src/tlo/analysis/utils.py @@ -203,7 +203,7 @@ def get_scenario_outputs(scenario_filename: str, outputs_dir: Path) -> list: return folders -def get_scenario_info(scenario_output_dir: Path) -> dict: +def get_scenario_info(scenario_output_dir: Path, autodiscover: bool = False) -> dict: """Utility function to get the the number draws and the number of runs in a batch set. TODO: read the JSON file to get further information @@ -211,6 +211,22 @@ def get_scenario_info(scenario_output_dir: Path) -> dict: info = dict() draw_folders = [f for f in os.scandir(scenario_output_dir) if f.is_dir()] + if autodiscover: + draw_ids = sorted(int(f.name) for f in draw_folders) + runs_by_draw = { + draw: sorted( + int(f.name) + for f in os.scandir(scenario_output_dir / str(draw)) + if f.is_dir() + ) + for draw in draw_ids + } + info['draws'] = draw_ids + info['runs_by_draw'] = runs_by_draw + info['number_of_draws'] = len(draw_ids) + info['runs_per_draw'] = len(runs_by_draw[draw_ids[0]]) if draw_ids else 0 + return info + info['number_of_draws'] = len(draw_folders) run_folders = [f for f in os.scandir(draw_folders[0]) if f.is_dir()] @@ -295,6 +311,8 @@ def extract_results(results_folder: Path, index: str = None, custom_generate_series=None, do_scaling: bool = False, + draw_runs: Optional[List[Tuple[int, int]]] = None, + autodiscover: bool = False, ) -> pd.DataFrame: """Utility function to unpack results. @@ -308,6 +326,9 @@ def extract_results(results_folder: Path, Optionally, with `do_scaling=True`, each element is multiplied by the scaling_factor recorded in the simulation. + If `draw_runs` is provided, only these draw/run pairs are extracted (in the order supplied). If `draw_runs` is + not provided and `autodiscover=True`, available draw/run folders are auto-discovered and extracted. + Note that if runs in the batch have failed (such that logs have not been generated), these are dropped silently. """ @@ -335,30 +356,44 @@ def generate_series(dataframe: pd.DataFrame) -> pd.Series: else: return custom_generate_series(dataframe) - # get number of draws and numbers of runs - info = get_scenario_info(results_folder) + if draw_runs is not None: + selected_draw_runs = draw_runs + elif autodiscover: + info = get_scenario_info(results_folder, autodiscover=True) + selected_draw_runs = [ + (draw, run) + for draw in info['draws'] + for run in info['runs_by_draw'][draw] + ] + else: + # Legacy default behaviour: infer ranges from scenario info. + info = get_scenario_info(results_folder) + selected_draw_runs = [ + (draw, run) + for draw in range(info['number_of_draws']) + for run in range(info['runs_per_draw']) + ] # Collect results from each draw/run res = dict() - for draw in range(info['number_of_draws']): - for run in range(info['runs_per_draw']): - draw_run = (draw, run) + for draw, run in selected_draw_runs: + draw_run = (draw, run) + try: + df: pd.DataFrame = load_pickled_dataframes(results_folder, draw, run, module)[module][key] + output_from_eval: pd.Series = generate_series(df) - try: - df: pd.DataFrame = load_pickled_dataframes(results_folder, draw, run, module)[module][key] - output_from_eval: pd.Series = generate_series(df) - assert isinstance(output_from_eval, pd.Series), ( - 'Custom command does not generate a pd.Series' - ) - if do_scaling: - res[draw_run] = output_from_eval * get_multiplier(draw, run) - else: - res[draw_run] = output_from_eval + assert isinstance(output_from_eval, pd.Series), ( + 'Custom command does not generate a pd.Series' + ) + if do_scaling: + res[draw_run] = output_from_eval * get_multiplier(draw, run) + else: + res[draw_run] = output_from_eval - except KeyError: - # Some logs could not be found - probably because this run failed. - res[draw_run] = None + except KeyError: + # Some logs could not be found - probably because this run failed. + res[draw_run] = None # Use pd.concat to compile results (skips dict items where the values is None) _concat = pd.concat(res, axis=1) @@ -386,7 +421,7 @@ def check_info_value_changes(df): prev_info = row["Info"] return problems - + def remove_events_for_individual_after_death(df): rows_to_drop = [] @@ -430,8 +465,8 @@ def reconstruct_individual_histories(df): if len(problems)>0: print("Values didn't change but were still detected") print(problems) - - + + return df_final From 592fc9e3649f0455de6989135126f361c8a2cbfd Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Tue, 24 Feb 2026 13:49:24 +0000 Subject: [PATCH 15/55] Analysis figures; WIP --- .../analysis_effect_of_treatment_ids.py | 100 ++++++++++-------- .../scenario_effect_of_treatment_ids.py | 2 - 2 files changed, 57 insertions(+), 45 deletions(-) diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py index e1a093ad75..52f4615603 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py @@ -31,8 +31,9 @@ to_age_group, ) -TARGET_PERIOD = (Date(2010, 1, 1), Date(2025, 12, 31)) +TARGET_PERIOD = (Date(2026, 1, 1), Date(2041, 1, 1)) PERIOD_LENGTH_YEARS_FOR_BAR_PLOTS = 5 +results_folder = Path("outputs/s.bhatia@imperial.ac.uk/effect_of_each_treatment_id-2026-02-16T154500Z") def extract_deaths_total(df: pd.DataFrame) -> pd.Series: @@ -219,12 +220,13 @@ def _get_num_dalys_by_cause_label_and_period(_df: pd.DataFrame) -> pd.Series: def do_bar_plot_with_ci( _df: pd.DataFrame, + _param, _ax, period_labels_for_bar_plots: list[str], target_period_label: str, ): """Make vertical bars by cause, decomposed into period chunks, with overall-period CI.""" - _df_nothing = _df["Contraception_Routine"] + _df_nothing = _df[_param] _df_nothing = _df_nothing.reindex( pd.MultiIndex.from_product( [CAUSE_OF_DEATH_OR_DALY_LABEL_TO_COLOR_MAP.keys(), period_labels_for_bar_plots + [target_period_label]], @@ -329,14 +331,16 @@ def get_counts_of_hsi_by_short_treatment_id( target_period_tuple: tuple[Date, Date] = TARGET_PERIOD, ) -> pd.Series: """Get counts of short treatment ids occurring in target period.""" + mask = pd.to_datetime(_df["date"]).between(*target_period_tuple) _counts_by_treatment_id = ( - _df.loc[pd.to_datetime(_df["date"]).between(*target_period_tuple), "TREATMENT_ID"] + _df.loc[mask, "TREATMENT_ID"] .apply(pd.Series) .sum() .astype(int) ) - _short_treatment_id = _counts_by_treatment_id.index.map(lambda x: x.split("_")[0] + "*") - return _counts_by_treatment_id.groupby(by=_short_treatment_id).sum() + ##_short_treatment_id = _counts_by_treatment_id.index.map(lambda x: x.split("_")[0] + "*") + ##return _counts_by_treatment_id.groupby(by=_short_treatment_id).sum() + return _counts_by_treatment_id def get_counts_of_appts(_df: pd.DataFrame, target_period_tuple: tuple[Date, Date] = TARGET_PERIOD) -> pd.Series: @@ -371,8 +375,9 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No module="tlo.methods.demography", key="death", custom_generate_series=extract_deaths_total, - do_scaling=True, - ).pipe(set_param_names_as_column_index_level_0, param_names=param_names)[["Contraception_Routine"]] + do_scaling=False, + autodiscover=True, + ).pipe(set_param_names_as_column_index_level_0, param_names=param_names) ) num_dalys_by_cause_label = summarize( @@ -384,38 +389,42 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No period_length_years=PERIOD_LENGTH_YEARS_FOR_BAR_PLOTS, target_period_tuple=TARGET_PERIOD, ), - do_scaling=True, - ).pipe(set_param_names_as_column_index_level_0, param_names=param_names)[["Contraception_Routine"]] + do_scaling=False, + autodiscover=True, + ).pipe(set_param_names_as_column_index_level_0, param_names=param_names) ) - fig, ax = plt.subplots() - name_of_plot = f"Deaths With No Services, {target_period_label}" - do_bar_plot_with_ci(num_deaths_by_cause_label / 1e3, ax, period_labels_for_bar_plots, target_period_label) - ax.set_title(name_of_plot) - ax.set_xlabel("Cause of Death") - ax.set_ylabel("Number of Deaths (/1000)") - ax.set_ylim(0, 500) - ax.grid(axis="y") - ax.spines["top"].set_visible(False) - ax.spines["right"].set_visible(False) - fig.tight_layout() - fig.savefig(make_graph_file_name(name_of_plot.replace(" ", "_"))) - plt.close(fig) - - fig, ax = plt.subplots() - name_of_plot = f"DALYS With No Services, {target_period_label}" - do_bar_plot_with_ci(num_dalys_by_cause_label / 1e6, ax, period_labels_for_bar_plots, target_period_label) - ax.set_title(name_of_plot) - ax.set_xlabel("Cause of Disability/Death") - ax.set_ylabel("Number of DALYS (/millions)") - ax.set_ylim(0, 30) - ax.set_yticks(np.arange(0, 35, 5)) - ax.grid(axis="y") - ax.spines["top"].set_visible(False) - ax.spines["right"].set_visible(False) - fig.tight_layout() - fig.savefig(make_graph_file_name(name_of_plot.replace(" ", "_"))) - plt.close(fig) + for param in param_names: + param_formatted = format_scenario_name(param) + + fig, ax = plt.subplots() + name_of_plot = f"Deaths With {param_formatted}, {target_period_label}" + do_bar_plot_with_ci(num_deaths_by_cause_label / 1e3, param_formatted, ax, period_labels_for_bar_plots, target_period_label) + ax.set_title(name_of_plot) + ax.set_xlabel("Cause of Death") + ax.set_ylabel("Number of Deaths (/1000)") + ax.set_ylim(0, 500) + ax.grid(axis="y") + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) + fig.tight_layout() + fig.savefig(make_graph_file_name(name_of_plot.replace(" ", "_"))) + plt.close(fig) + + fig, ax = plt.subplots() + name_of_plot = f"DALYS With No Services, {target_period_label}" + do_bar_plot_with_ci(num_dalys_by_cause_label / 1e6, param_formatted, ax, period_labels_for_bar_plots, target_period_label) + ax.set_title(name_of_plot) + ax.set_xlabel("Cause of Disability/Death") + ax.set_ylabel("Number of DALYS (/millions)") + ax.set_ylim(0, 30) + ax.set_yticks(np.arange(0, 35, 5)) + ax.grid(axis="y") + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) + fig.tight_layout() + fig.savefig(make_graph_file_name(name_of_plot.replace(" ", "_"))) + plt.close(fig) num_deaths = ( extract_results( @@ -423,7 +432,8 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No module="tlo.methods.demography", key="death", custom_generate_series=lambda _df: get_num_deaths_by_cause_label(_df, TARGET_PERIOD), - do_scaling=True, + do_scaling=False, + autodiscover=True, ) .pipe(set_param_names_as_column_index_level_0, param_names=param_names) .sum() @@ -435,7 +445,8 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No module="tlo.methods.healthburden", key="dalys_stacked_by_age_and_time", custom_generate_series=lambda _df: get_num_dalys_by_cause_label(_df, TARGET_PERIOD), - do_scaling=True, + do_scaling=False, + autodiscover=True, ) .pipe(set_param_names_as_column_index_level_0, param_names=param_names) .sum() @@ -446,7 +457,8 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No module="tlo.methods.demography", key="death", custom_generate_series=lambda _df: get_total_num_death_by_agegrp_and_label(_df, TARGET_PERIOD), - do_scaling=True, + do_scaling=False, + autodiscover=True, ).pipe(set_param_names_as_column_index_level_0, param_names=param_names) total_num_dalys_by_agegrp_and_label = extract_results( @@ -454,7 +466,8 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No module="tlo.methods.healthburden", key="dalys_stacked_by_age_and_time", custom_generate_series=lambda _df: get_total_num_dalys_by_agegrp_and_label(_df, TARGET_PERIOD), - do_scaling=True, + do_scaling=False, + autodiscover=True, ).pipe(set_param_names_as_column_index_level_0, param_names=param_names) counts_of_hsi_by_short_treatment_id = ( @@ -463,7 +476,8 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No module="tlo.methods.healthsystem.summary", key="HSI_Event", custom_generate_series=lambda _df: get_counts_of_hsi_by_short_treatment_id(_df, TARGET_PERIOD), - do_scaling=True, + do_scaling=False, + autodiscover=True, ) .pipe(set_param_names_as_column_index_level_0, param_names=param_names) .fillna(0.0) @@ -498,7 +512,7 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No module="tlo.methods.healthsystem.summary", key="HSI_Event", custom_generate_series=lambda _df: get_counts_of_appts(_df, TARGET_PERIOD), - do_scaling=True, + do_scaling=False, ) .pipe(set_param_names_as_column_index_level_0, param_names=param_names) .fillna(0.0) diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py index 52cf002f83..0a2ce0a67e 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py @@ -104,8 +104,6 @@ def _get_scenarios(self) -> Dict[str, Dict]: service_availability.update( {f"Only {treatment}": [treatment] for treatment in treatments} ) - ## Temporary: for testing, only include one scenario with a single treatment ID available - service_availability = dict({"Only Contraception_Routine": ["Contraception_Routine"]}) scenario_definitions = ScenarioDefinitions() scenarios = { From ab639c5c216bc38d77cf4e9cf69240dc1bb62ecd Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Wed, 4 Mar 2026 15:54:47 +0000 Subject: [PATCH 16/55] Plot population growth --- .../analysis_effect_of_treatment_ids.py | 154 +++++++++++++++++- 1 file changed, 146 insertions(+), 8 deletions(-) diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py index 52f4615603..e15e1f114a 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py @@ -3,6 +3,8 @@ import argparse import glob import os +import textwrap +import warnings import zipfile from pathlib import Path from typing import Tuple @@ -29,11 +31,24 @@ squarify_neat, summarize, to_age_group, + compute_summary_statistics, ) TARGET_PERIOD = (Date(2026, 1, 1), Date(2041, 1, 1)) PERIOD_LENGTH_YEARS_FOR_BAR_PLOTS = 5 results_folder = Path("outputs/s.bhatia@imperial.ac.uk/effect_of_each_treatment_id-2026-02-16T154500Z") +## retrieved from the suspended run in outputs/s.bhatia@imperial.ac.uk/effect_of_each_treatment_id-2026-02-12T120859Z +SCALING_FACTOR = 58.158436 + + +def get_total_population_by_year(_df): + years_needed = [i.year for i in TARGET_PERIOD] + _df['year'] = pd.to_datetime(_df['date']).dt.year + + # Filter for relevant years and return the total population as a Series + return \ + _df.loc[_df['year'].between(min(years_needed), max(years_needed)), ['year', 'total']].set_index('year')[ + 'total'] def extract_deaths_total(df: pd.DataFrame) -> pd.Series: @@ -226,6 +241,15 @@ def do_bar_plot_with_ci( target_period_label: str, ): """Make vertical bars by cause, decomposed into period chunks, with overall-period CI.""" + available_params = ( + _df.columns.get_level_values(0) + if isinstance(_df.columns, pd.MultiIndex) + else _df.columns + ) + if _param not in available_params: + warnings.warn(f"Parameter '{_param}' not found in dataframe columns. Skipping plot.", stacklevel=2) + return + _df_nothing = _df[_param] _df_nothing = _df_nothing.reindex( pd.MultiIndex.from_product( @@ -264,6 +288,73 @@ def do_bar_plot_with_ci( _ax.legend(handles=chunk_legend_handles + [ci_legend_handle], loc="upper right") +def plot_multiindex_dot_with_interval( + _df: pd.DataFrame, + year: int, + _ax, + central_measure: str = "mean", + value_col: str = "population", + sort: bool = True, + x_label_rotation: int = 90, + x_tick_fontsize: int = 8, + label_wrap_width: int = 18, + max_xticks: int = 30, +): + """Plot central-value dots and lower/upper intervals by category for one year.""" + if not isinstance(_df.index, pd.MultiIndex) or _df.index.nlevels < 3: + raise ValueError("_df index must be a MultiIndex with at least 3 levels: category, stat, year.") + if value_col not in _df.columns: + raise ValueError(f"Column '{value_col}' not found in dataframe.") + + year_level_values = _df.index.get_level_values(2) + available_years = pd.Index(year_level_values.unique()).sort_values() + if year not in available_years: + raise ValueError(f"Year '{year}' not found in index level 2. Available years: {available_years.tolist()}") + + stat_level_values = _df.index.get_level_values(1) + required_stats = {central_measure, "lower", "upper"} + missing_stats = required_stats.difference(set(stat_level_values)) + if missing_stats: + raise ValueError( + f"Missing required stat(s) in index level 1: {sorted(missing_stats)}. " + f"Available stats: {sorted(set(stat_level_values))}" + ) + + _plot = _df.xs(year, level=2)[value_col].unstack(level=1) + _plot = _plot.loc[:, [central_measure, "lower", "upper"]] + _plot = _plot.dropna(subset=[central_measure, "lower", "upper"]) + if _plot.empty: + raise ValueError(f"No plottable rows remain for year '{year}' after selecting required stats.") + + if sort: + _plot = _plot.sort_values(by=central_measure, ascending=True) + + x = np.arange(len(_plot.index)) + _ax.vlines(x, _plot["lower"], _plot["upper"], color="black", linewidth=1.2) + _ax.scatter(x, _plot[central_measure], color="black", s=20, zorder=3) + + # Improve readability when category labels are long and/or numerous. + _ax.figure.set_size_inches(max(12, min(0.25 * len(_plot.index), 36)), 7) + wrapped_labels = [textwrap.fill(str(label), width=label_wrap_width) for label in _plot.index] + if max_xticks is not None and len(x) > max_xticks: + step = int(np.ceil(len(x) / max_xticks)) + shown_positions = x[::step] + shown_labels = [wrapped_labels[i] for i in shown_positions] + _ax.set_xticks(shown_positions) + _ax.set_xticklabels(shown_labels, rotation=x_label_rotation, ha="right", fontsize=x_tick_fontsize) + else: + _ax.set_xticks(x) + _ax.set_xticklabels(wrapped_labels, rotation=x_label_rotation, ha="right", fontsize=x_tick_fontsize) + _ax.set_xlabel(_df.index.names[0] if _df.index.names[0] is not None else "category") + _ax.set_ylabel(value_col) + _ax.set_title(f"{value_col}: {central_measure} with lower/upper ({year})") + _ax.grid(axis="y") + _ax.spines["top"].set_visible(False) + _ax.spines["right"].set_visible(False) + + return _ax + + def get_num_deaths_by_age_group( _df: pd.DataFrame, age_grp_lookup: dict, @@ -369,13 +460,52 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No param_names = get_parameter_names_from_scenario_file() + + # Get total population by year + total_population_by_year = extract_results( + results_folder, + module='tlo.methods.demography', + key='population', + custom_generate_series=get_total_population_by_year, + do_scaling=True, + scaling_factor=SCALING_FACTOR, + autodiscover=True + ) + total_population_by_year = compute_summary_statistics(total_population_by_year, central_measure = 'median') + total_population_by_year = set_param_names_as_column_index_level_0(total_population_by_year, param_names=param_names) + total_population_by_year = (total_population_by_year + .stack(level=["draw", "stat"]) # move draw & stat into index + .reset_index() # turn all index levels into columns + .rename(columns={0: "population"}) # name the value column + ).set_index(["draw", "stat",'year']) + + total_population_by_year = total_population_by_year.rename( + index={"central": "median"}, + level="stat" + ) + + for year in [2026, 2031, 2036, 2040]: + fig, ax = plt.subplots() + name_of_plot = f"Population size in {year}" + plot_multiindex_dot_with_interval(total_population_by_year / 1e6, year, ax, 'median') + ax.set_title(name_of_plot) + ax.set_xlabel("Treatment included") + ax.set_ylabel("Population size (millions)") + ax.grid(axis="y") + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) + fig.tight_layout() + fig.savefig(make_graph_file_name(name_of_plot.replace(" ", "_"))) + plt.close(fig) + num_deaths_by_cause_label = summarize( extract_results( results_folder, module="tlo.methods.demography", key="death", custom_generate_series=extract_deaths_total, - do_scaling=False, + do_scaling=True, + scaling_factor=SCALING_FACTOR, autodiscover=True, ).pipe(set_param_names_as_column_index_level_0, param_names=param_names) ) @@ -389,7 +519,8 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No period_length_years=PERIOD_LENGTH_YEARS_FOR_BAR_PLOTS, target_period_tuple=TARGET_PERIOD, ), - do_scaling=False, + do_scaling=True, + scaling_factor=SCALING_FACTOR, autodiscover=True, ).pipe(set_param_names_as_column_index_level_0, param_names=param_names) ) @@ -432,7 +563,8 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No module="tlo.methods.demography", key="death", custom_generate_series=lambda _df: get_num_deaths_by_cause_label(_df, TARGET_PERIOD), - do_scaling=False, + do_scaling=True, + scaling_factor=SCALING_FACTOR, autodiscover=True, ) .pipe(set_param_names_as_column_index_level_0, param_names=param_names) @@ -445,7 +577,8 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No module="tlo.methods.healthburden", key="dalys_stacked_by_age_and_time", custom_generate_series=lambda _df: get_num_dalys_by_cause_label(_df, TARGET_PERIOD), - do_scaling=False, + do_scaling=True, + scaling_factor=SCALING_FACTOR, autodiscover=True, ) .pipe(set_param_names_as_column_index_level_0, param_names=param_names) @@ -457,7 +590,8 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No module="tlo.methods.demography", key="death", custom_generate_series=lambda _df: get_total_num_death_by_agegrp_and_label(_df, TARGET_PERIOD), - do_scaling=False, + do_scaling=True, + scaling_factor=SCALING_FACTOR, autodiscover=True, ).pipe(set_param_names_as_column_index_level_0, param_names=param_names) @@ -466,7 +600,8 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No module="tlo.methods.healthburden", key="dalys_stacked_by_age_and_time", custom_generate_series=lambda _df: get_total_num_dalys_by_agegrp_and_label(_df, TARGET_PERIOD), - do_scaling=False, + do_scaling=True, + scaling_factor=SCALING_FACTOR, autodiscover=True, ).pipe(set_param_names_as_column_index_level_0, param_names=param_names) @@ -476,7 +611,8 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No module="tlo.methods.healthsystem.summary", key="HSI_Event", custom_generate_series=lambda _df: get_counts_of_hsi_by_short_treatment_id(_df, TARGET_PERIOD), - do_scaling=False, + do_scaling=True, + scaling_factor=SCALING_FACTOR, autodiscover=True, ) .pipe(set_param_names_as_column_index_level_0, param_names=param_names) @@ -512,13 +648,15 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No module="tlo.methods.healthsystem.summary", key="HSI_Event", custom_generate_series=lambda _df: get_counts_of_appts(_df, TARGET_PERIOD), - do_scaling=False, + do_scaling=True, + scaling_factor=SCALING_FACTOR, ) .pipe(set_param_names_as_column_index_level_0, param_names=param_names) .fillna(0.0) .sort_index() ) + return { "num_deaths": num_deaths, "num_dalys": num_dalys, From 3118beba6bf875936df683be4ca18ef4336f51d1 Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Mon, 9 Mar 2026 12:14:51 +0000 Subject: [PATCH 17/55] Working through figs --- .../analysis_effect_of_treatment_ids.py | 475 ++---------------- .../fig_utils.py | 157 ++++++ .../results_processing_utils.py | 263 ++++++++++ 3 files changed, 475 insertions(+), 420 deletions(-) create mode 100644 src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py create mode 100644 src/scripts/lcoa_inputs_from_tlo_analyses/results_processing_utils.py diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py index e15e1f114a..d37b9ffebc 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py @@ -3,447 +3,64 @@ import argparse import glob import os -import textwrap -import warnings import zipfile from pathlib import Path -from typing import Tuple import numpy as np -import pandas as pd from matplotlib import pyplot as plt -from matplotlib.lines import Line2D -from matplotlib.patches import Patch - +import pandas as pd +from tlo import Date from scripts.calibration_analyses.analysis_scripts import plot_legends -from scripts.lcoa_inputs_from_tlo_analyses.scenario_effect_of_treatment_ids import ( - EffectOfEachTreatment, +from scripts.lcoa_inputs_from_tlo_analyses.fig_utils import ( + do_bar_plot_with_ci, + plot_multiindex_dot_with_interval, +) +from scripts.lcoa_inputs_from_tlo_analyses.results_processing_utils import ( + extract_deaths_total, + format_scenario_name, + get_counts_of_appts, + get_counts_of_hsi_by_short_treatment_id, + get_num_dalys_by_cause_label, + get_num_deaths_by_cause_label, + get_parameter_names_from_scenario_file, + get_periods_within_target_period, + get_total_num_dalys_by_agegrp_and_label, + get_total_num_death_by_agegrp_and_label, + get_total_population_by_year, + make_get_num_dalys_by_cause_label_and_period, + set_param_names_as_column_index_level_0, + target_period, +) +from scripts.costing.cost_estimation import ( + apply_discounting_to_cost_data, + do_line_plot_of_cost, + do_stacked_bar_plot_of_cost_by_category, + estimate_input_cost_of_scenarios, + estimate_projected_health_spending, + extract_roi_at_specific_implementation_costs, + generate_multiple_scenarios_roi_plot, + load_unit_cost_assumptions, + summarize_cost_data, + tabulate_roi_estimates, ) -from tlo import Date from tlo.analysis.utils import ( - CAUSE_OF_DEATH_OR_DALY_LABEL_TO_COLOR_MAP, + compute_summary_statistics, extract_results, - get_color_cause_of_death_or_daly_label, get_color_short_treatment_id, make_age_grp_lookup, - make_age_grp_types, - order_of_cause_of_death_or_daly_label, squarify_neat, summarize, - to_age_group, - compute_summary_statistics, + unflatten_flattened_multi_index_in_logging, ) TARGET_PERIOD = (Date(2026, 1, 1), Date(2041, 1, 1)) PERIOD_LENGTH_YEARS_FOR_BAR_PLOTS = 5 results_folder = Path("outputs/s.bhatia@imperial.ac.uk/effect_of_each_treatment_id-2026-02-16T154500Z") -## retrieved from the suspended run in outputs/s.bhatia@imperial.ac.uk/effect_of_each_treatment_id-2026-02-12T120859Z +# SCALING_FACTOR retrieved from the suspended run in +# outputs/s.bhatia@imperial.ac.uk/effect_of_each_treatment_id-2026-02-12T120859Z SCALING_FACTOR = 58.158436 -def get_total_population_by_year(_df): - years_needed = [i.year for i in TARGET_PERIOD] - _df['year'] = pd.to_datetime(_df['date']).dt.year - - # Filter for relevant years and return the total population as a Series - return \ - _df.loc[_df['year'].between(min(years_needed), max(years_needed)), ['year', 'total']].set_index('year')[ - 'total'] - - -def extract_deaths_total(df: pd.DataFrame) -> pd.Series: - return pd.Series({"Total": len(df)}) - -def target_period(target_period_tuple: tuple[Date, Date] = TARGET_PERIOD) -> str: - """Returns the target period as a string of the form YYYY-YYYY.""" - return "-".join(str(t.year) for t in target_period_tuple) - - -def get_periods_within_target_period( - period_length_years: int, - target_period_tuple: tuple[Date, Date] = TARGET_PERIOD, -) -> list[tuple[str, tuple[int, int]]]: - """Return chunks within target period as [(label, (start_year, end_year)), ...].""" - if period_length_years <= 0: - raise ValueError("period_length_years must be a positive integer.") - start_year, end_year = target_period_tuple[0].year, target_period_tuple[1].year - periods = [] - for chunk_start in range(start_year, end_year + 1, period_length_years): - chunk_end = min(chunk_start + period_length_years - 1, end_year) - periods.append((f"{chunk_start}-{chunk_end}", (chunk_start, chunk_end))) - return periods - - -def get_parameter_names_from_scenario_file() -> Tuple[str]: - """Get tuple of scenario names from Scenario class used to create results.""" - e = EffectOfEachTreatment() - return tuple(e._scenarios.keys()) - - -def format_scenario_name(_sn: str) -> str: - """Return reformatted scenario name ready for plotting.""" - if _sn == "Nothing": - return "Nothing" - return _sn.lstrip("Only ") - - -def set_param_names_as_column_index_level_0(_df: pd.DataFrame, param_names: tuple[str, ...]) -> pd.DataFrame: - """Set columns index level 0 as scenario param names.""" - ordered_param_names_no_prefix = {i: x for i, x in enumerate(param_names)} - names_of_cols_level0 = [ordered_param_names_no_prefix.get(col) for col in _df.columns.levels[0]] - assert len(names_of_cols_level0) == len(_df.columns.levels[0]) - - reformatted_names = map(format_scenario_name, names_of_cols_level0) - _df.columns = _df.columns.set_levels(reformatted_names, level=0) - return _df - - -def find_difference_extra_relative_to_comparison( - _ser: pd.Series, - comparison: str, - scaled: bool = False, - drop_comparison: bool = True, -): - """Find run-wise differences relative to comparison in a series with multi-index.""" - return ( - _ser.unstack() - .apply(lambda x: (x - x[comparison]) / (x[comparison] if scaled else 1.0), axis=0) - .drop(index=([comparison] if drop_comparison else [])) - .stack() - ) - - -def find_mean_difference_in_appts_relative_to_comparison( - _df: pd.DataFrame, - comparison: str, - drop_comparison: bool = True, -): - """Find mean fewer appointments when treatment does not happen relative to comparison.""" - return -summarize( - pd.concat( - { - _idx: find_difference_extra_relative_to_comparison( - row, comparison=comparison, drop_comparison=drop_comparison - ) - for _idx, row in _df.iterrows() - }, - axis=1, - ).T, - only_mean=True, - ) - - -def find_mean_difference_extra_relative_to_comparison_dataframe( - _df: pd.DataFrame, - comparison: str, - drop_comparison: bool = True, -): - """Same as find_difference_extra_relative_to_comparison but for dataframe.""" - return summarize( - pd.concat( - { - _idx: find_difference_extra_relative_to_comparison( - row, comparison=comparison, drop_comparison=drop_comparison - ) - for _idx, row in _df.iterrows() - }, - axis=1, - ).T, - only_mean=True, - ) - - -def get_num_deaths_by_cause_label(_df: pd.DataFrame, target_period_tuple: tuple[Date, Date] = TARGET_PERIOD) -> pd.Series: - """Return total deaths by label within target period.""" - return _df.loc[pd.to_datetime(_df.date).between(*target_period_tuple)].groupby(_df["label"]).size() - - -def get_num_dalys_by_cause_label(_df: pd.DataFrame, target_period_tuple: tuple[Date, Date] = TARGET_PERIOD) -> pd.Series: - """Return total DALYS by label within target period.""" - return ( - _df.loc[_df.year.between(*[i.year for i in target_period_tuple])] - .drop(columns=["date", "sex", "age_range", "year"]) - .sum() - ) - - -def make_get_num_deaths_by_cause_label_and_period( - period_length_years: int, - target_period_tuple: tuple[Date, Date] = TARGET_PERIOD, -): - """Create helper that summarizes deaths by cause and period chunks + overall.""" - periods = get_periods_within_target_period( - period_length_years=period_length_years, - target_period_tuple=target_period_tuple, - ) - period_lookup = { - year: period_label - for period_label, (start_year, end_year) in periods - for year in range(start_year, end_year + 1) - } - target_period_label = target_period(target_period_tuple) - - def _get_num_deaths_by_cause_label_and_period(_df: pd.DataFrame) -> pd.Series: - _df_in_target = _df.loc[pd.to_datetime(_df.date).between(*target_period_tuple)].copy() - _df_in_target["year"] = pd.to_datetime(_df_in_target["date"]).dt.year - _df_in_target["period"] = _df_in_target["year"].map(period_lookup) - - chunked = _df_in_target.groupby(["label", "period"]).size() - overall = _df_in_target.groupby("label").size() - overall.index = pd.MultiIndex.from_arrays( - [overall.index, np.repeat(target_period_label, len(overall.index))], names=["label", "period"] - ) - return pd.concat([chunked, overall]).sort_index() - - return _get_num_deaths_by_cause_label_and_period - - -def make_get_num_dalys_by_cause_label_and_period( - period_length_years: int, - target_period_tuple: tuple[Date, Date] = TARGET_PERIOD, -): - """Create helper that summarizes DALYS by cause and period chunks + overall.""" - periods = get_periods_within_target_period( - period_length_years=period_length_years, - target_period_tuple=target_period_tuple, - ) - period_lookup = { - year: period_label - for period_label, (period_start, period_end) in periods - for year in range(period_start, period_end + 1) - } - start_year, end_year = target_period_tuple[0].year, target_period_tuple[1].year - target_period_label = target_period(target_period_tuple) - - def _get_num_dalys_by_cause_label_and_period(_df: pd.DataFrame) -> pd.Series: - _df_in_target = _df.loc[_df.year.between(start_year, end_year)].copy() - _df_in_target["period"] = _df_in_target["year"].map(period_lookup) - - melted = ( - _df_in_target.drop(columns=["date", "sex", "age_range"]) - .melt(id_vars=["year", "period"], var_name="label", value_name="dalys") - ) - chunked = melted.groupby(["label", "period"])["dalys"].sum() - overall = melted.groupby("label")["dalys"].sum() - overall.index = pd.MultiIndex.from_arrays( - [overall.index, np.repeat(target_period_label, len(overall.index))], names=["label", "period"] - ) - return pd.concat([chunked, overall]).sort_index() - - return _get_num_dalys_by_cause_label_and_period - - -def do_bar_plot_with_ci( - _df: pd.DataFrame, - _param, - _ax, - period_labels_for_bar_plots: list[str], - target_period_label: str, -): - """Make vertical bars by cause, decomposed into period chunks, with overall-period CI.""" - available_params = ( - _df.columns.get_level_values(0) - if isinstance(_df.columns, pd.MultiIndex) - else _df.columns - ) - if _param not in available_params: - warnings.warn(f"Parameter '{_param}' not found in dataframe columns. Skipping plot.", stacklevel=2) - return - - _df_nothing = _df[_param] - _df_nothing = _df_nothing.reindex( - pd.MultiIndex.from_product( - [CAUSE_OF_DEATH_OR_DALY_LABEL_TO_COLOR_MAP.keys(), period_labels_for_bar_plots + [target_period_label]], - names=["label", "period"], - ), - fill_value=0.0, - ) - _df_nothing = _df_nothing.sort_index(axis=0, level=0, key=order_of_cause_of_death_or_daly_label) - - cause_labels = list(_df_nothing.index.get_level_values("label").unique()) - - for i, cause_label in enumerate(cause_labels): - color = get_color_cause_of_death_or_daly_label(cause_label) - one_cause = _df_nothing.xs(cause_label, level="label") - - bottom = 0.0 - for j, period_label in enumerate(period_labels_for_bar_plots): - chunk_height = one_cause.loc[period_label, "mean"] if period_label in one_cause.index else 0.0 - _ax.bar(i, chunk_height, bottom=bottom, color=color, alpha=0.9 if j % 2 == 0 else 0.35) - bottom += chunk_height - - mean_value = one_cause.loc[target_period_label, "mean"] - lower_value = one_cause.loc[target_period_label, "lower"] - upper_value = one_cause.loc[target_period_label, "upper"] - overall_yerr = np.array([[mean_value - lower_value], [upper_value - mean_value]]) - _ax.errorbar(i, mean_value, yerr=overall_yerr, fmt="none", ecolor="black", capsize=2, linewidth=1.2) - - _ax.set_xticks(range(len(cause_labels))) - _ax.set_xticklabels(cause_labels, rotation=90) - chunk_legend_handles = [ - Patch(facecolor="grey", alpha=0.9 if i % 2 == 0 else 0.35, label=period_label) - for i, period_label in enumerate(period_labels_for_bar_plots) - ] - ci_legend_handle = Line2D([0], [0], color="black", marker="|", markersize=8, linewidth=1.2, label="95% CI") - _ax.legend(handles=chunk_legend_handles + [ci_legend_handle], loc="upper right") - - -def plot_multiindex_dot_with_interval( - _df: pd.DataFrame, - year: int, - _ax, - central_measure: str = "mean", - value_col: str = "population", - sort: bool = True, - x_label_rotation: int = 90, - x_tick_fontsize: int = 8, - label_wrap_width: int = 18, - max_xticks: int = 30, -): - """Plot central-value dots and lower/upper intervals by category for one year.""" - if not isinstance(_df.index, pd.MultiIndex) or _df.index.nlevels < 3: - raise ValueError("_df index must be a MultiIndex with at least 3 levels: category, stat, year.") - if value_col not in _df.columns: - raise ValueError(f"Column '{value_col}' not found in dataframe.") - - year_level_values = _df.index.get_level_values(2) - available_years = pd.Index(year_level_values.unique()).sort_values() - if year not in available_years: - raise ValueError(f"Year '{year}' not found in index level 2. Available years: {available_years.tolist()}") - - stat_level_values = _df.index.get_level_values(1) - required_stats = {central_measure, "lower", "upper"} - missing_stats = required_stats.difference(set(stat_level_values)) - if missing_stats: - raise ValueError( - f"Missing required stat(s) in index level 1: {sorted(missing_stats)}. " - f"Available stats: {sorted(set(stat_level_values))}" - ) - - _plot = _df.xs(year, level=2)[value_col].unstack(level=1) - _plot = _plot.loc[:, [central_measure, "lower", "upper"]] - _plot = _plot.dropna(subset=[central_measure, "lower", "upper"]) - if _plot.empty: - raise ValueError(f"No plottable rows remain for year '{year}' after selecting required stats.") - - if sort: - _plot = _plot.sort_values(by=central_measure, ascending=True) - - x = np.arange(len(_plot.index)) - _ax.vlines(x, _plot["lower"], _plot["upper"], color="black", linewidth=1.2) - _ax.scatter(x, _plot[central_measure], color="black", s=20, zorder=3) - - # Improve readability when category labels are long and/or numerous. - _ax.figure.set_size_inches(max(12, min(0.25 * len(_plot.index), 36)), 7) - wrapped_labels = [textwrap.fill(str(label), width=label_wrap_width) for label in _plot.index] - if max_xticks is not None and len(x) > max_xticks: - step = int(np.ceil(len(x) / max_xticks)) - shown_positions = x[::step] - shown_labels = [wrapped_labels[i] for i in shown_positions] - _ax.set_xticks(shown_positions) - _ax.set_xticklabels(shown_labels, rotation=x_label_rotation, ha="right", fontsize=x_tick_fontsize) - else: - _ax.set_xticks(x) - _ax.set_xticklabels(wrapped_labels, rotation=x_label_rotation, ha="right", fontsize=x_tick_fontsize) - _ax.set_xlabel(_df.index.names[0] if _df.index.names[0] is not None else "category") - _ax.set_ylabel(value_col) - _ax.set_title(f"{value_col}: {central_measure} with lower/upper ({year})") - _ax.grid(axis="y") - _ax.spines["top"].set_visible(False) - _ax.spines["right"].set_visible(False) - - return _ax - - -def get_num_deaths_by_age_group( - _df: pd.DataFrame, - age_grp_lookup: dict, - target_period_tuple: tuple[Date, Date] = TARGET_PERIOD, -): - """Return total deaths by age-group in target period.""" - return ( - _df.loc[pd.to_datetime(_df.date).between(*target_period_tuple)] - .groupby(_df["age"].map(age_grp_lookup).astype(make_age_grp_types())) - .size() - ) - - -def do_barh_plot_with_ci(_df: pd.DataFrame, _ax): - """Make horizontal bar plot for each treatment id.""" - errors = pd.concat([_df["mean"] - _df["lower"], _df["upper"] - _df["mean"]], axis=1).T.to_numpy() - _df.plot.barh(ax=_ax, y="mean", xerr=errors, legend=False, color=[get_color_short_treatment_id(_id) for _id in _df.index]) - - -def do_label_barh_plot(_df: pd.DataFrame, _ax): - """Add text annotation from values in dataframe onto axis.""" - y_cords = {ylabel.get_text(): ytick for ytick, ylabel in zip(_ax.get_yticks(), _ax.get_yticklabels())} - pos_on_rhs = _ax.get_xticks()[-1] - - for label, row in _df.iterrows(): - if row["mean"] > 0: - annotation = f"{round(row['mean'], 1)} ({round(row['lower'])}-{round(row['upper'])}) %" - _ax.annotate( - annotation, - xy=(pos_on_rhs, y_cords.get(label)), - xycoords="data", - horizontalalignment="left", - verticalalignment="center", - size=7, - ) - - -def get_total_num_death_by_agegrp_and_label( - _df: pd.DataFrame, - target_period_tuple: tuple[Date, Date] = TARGET_PERIOD, -) -> pd.Series: - """Return deaths in target period by age-group and cause label.""" - _df_limited_to_dates = _df.loc[_df["date"].between(*target_period_tuple)] - age_group = to_age_group(_df_limited_to_dates["age"]) - return _df_limited_to_dates.groupby([age_group, "label"])["person_id"].size() - - -def get_total_num_dalys_by_agegrp_and_label( - _df: pd.DataFrame, - target_period_tuple: tuple[Date, Date] = TARGET_PERIOD, -) -> pd.Series: - """Return DALYS in target period by age-group and cause label.""" - return ( - _df.loc[_df.year.between(*[i.year for i in target_period_tuple])] - .assign(age_group=_df["age_range"]) - .drop(columns=["date", "year", "sex", "age_range"]) - .melt(id_vars=["age_group"], var_name="label", value_name="dalys") - .groupby(by=["age_group", "label"])["dalys"] - .sum() - ) - - -def get_counts_of_hsi_by_short_treatment_id( - _df: pd.DataFrame, - target_period_tuple: tuple[Date, Date] = TARGET_PERIOD, -) -> pd.Series: - """Get counts of short treatment ids occurring in target period.""" - mask = pd.to_datetime(_df["date"]).between(*target_period_tuple) - _counts_by_treatment_id = ( - _df.loc[mask, "TREATMENT_ID"] - .apply(pd.Series) - .sum() - .astype(int) - ) - ##_short_treatment_id = _counts_by_treatment_id.index.map(lambda x: x.split("_")[0] + "*") - ##return _counts_by_treatment_id.groupby(by=_short_treatment_id).sum() - return _counts_by_treatment_id - - -def get_counts_of_appts(_df: pd.DataFrame, target_period_tuple: tuple[Date, Date] = TARGET_PERIOD) -> pd.Series: - """Get counts of appointments of each type being used in target period.""" - return ( - _df.loc[pd.to_datetime(_df["date"]).between(*target_period_tuple), "Number_By_Appt_Type_Code"] - .apply(pd.Series) - .sum() - .astype(int) - ) - - def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = None): """Produce standard plots describing effect of each TREATMENT_ID.""" make_graph_file_name = lambda stub: output_folder / f"{stub.replace('*', '_star_')}.png" # noqa: E731 @@ -460,6 +77,24 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No param_names = get_parameter_names_from_scenario_file() + # Costs calculation + alternative_discount_rates = [ + {"discount_rate_cost": 0.03, "discount_rate_health": 0, "discounting_scenario": 'WHO-CHOICE (0.03,0)'}, + {"discount_rate_cost": 0.03, "discount_rate_health": 0.03, "discounting_scenario": 'MAIN (0.03,0.03)'} + ] + + for rates in alternative_discount_rates: + discount_rate_cost = rates["discount_rate_cost"] + discount_rate_health = rates["discount_rate_health"] + input_costs = estimate_input_cost_of_scenarios( + results_folder, + resourcefilepath, + cost_only_used_staff=True, + _discount_rate=discount_rate_cost, + _metric="median",) + + + # Get total population by year total_population_by_year = extract_results( diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py b/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py new file mode 100644 index 0000000000..fadb5fbc0a --- /dev/null +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py @@ -0,0 +1,157 @@ +"""Plotting utilities for treatment-id analysis scripts.""" + +import textwrap +import warnings + +import numpy as np +import pandas as pd +from matplotlib.lines import Line2D +from matplotlib.patches import Patch + +from tlo.analysis.utils import ( + CAUSE_OF_DEATH_OR_DALY_LABEL_TO_COLOR_MAP, + get_color_cause_of_death_or_daly_label, + get_color_short_treatment_id, + order_of_cause_of_death_or_daly_label, +) + + +def do_bar_plot_with_ci( + _df: pd.DataFrame, + _param, + _ax, + period_labels_for_bar_plots: list[str], + target_period_label: str, +): + """Make vertical bars by cause, decomposed into period chunks, with overall-period CI.""" + available_params = _df.columns.get_level_values(0) if isinstance(_df.columns, pd.MultiIndex) else _df.columns + if _param not in available_params: + warnings.warn(f"Parameter '{_param}' not found in dataframe columns. Skipping plot.", stacklevel=2) + return + + _df_nothing = _df[_param] + _df_nothing = _df_nothing.reindex( + pd.MultiIndex.from_product( + [CAUSE_OF_DEATH_OR_DALY_LABEL_TO_COLOR_MAP.keys(), period_labels_for_bar_plots + [target_period_label]], + names=["label", "period"], + ), + fill_value=0.0, + ) + _df_nothing = _df_nothing.sort_index(axis=0, level=0, key=order_of_cause_of_death_or_daly_label) + + cause_labels = list(_df_nothing.index.get_level_values("label").unique()) + + for i, cause_label in enumerate(cause_labels): + color = get_color_cause_of_death_or_daly_label(cause_label) + one_cause = _df_nothing.xs(cause_label, level="label") + + bottom = 0.0 + for j, period_label in enumerate(period_labels_for_bar_plots): + chunk_height = one_cause.loc[period_label, "mean"] if period_label in one_cause.index else 0.0 + _ax.bar(i, chunk_height, bottom=bottom, color=color, alpha=0.9 if j % 2 == 0 else 0.35) + bottom += chunk_height + + mean_value = one_cause.loc[target_period_label, "mean"] + lower_value = one_cause.loc[target_period_label, "lower"] + upper_value = one_cause.loc[target_period_label, "upper"] + overall_yerr = np.array([[mean_value - lower_value], [upper_value - mean_value]]) + _ax.errorbar(i, mean_value, yerr=overall_yerr, fmt="none", ecolor="black", capsize=2, linewidth=1.2) + + _ax.set_xticks(range(len(cause_labels))) + _ax.set_xticklabels(cause_labels, rotation=90) + chunk_legend_handles = [ + Patch(facecolor="grey", alpha=0.9 if i % 2 == 0 else 0.35, label=period_label) + for i, period_label in enumerate(period_labels_for_bar_plots) + ] + ci_legend_handle = Line2D([0], [0], color="black", marker="|", markersize=8, linewidth=1.2, label="95% CI") + _ax.legend(handles=chunk_legend_handles + [ci_legend_handle], loc="upper right") + + +def plot_multiindex_dot_with_interval( + _df: pd.DataFrame, + year: int, + _ax, + central_measure: str = "mean", + value_col: str = "population", + sort: bool = True, + x_label_rotation: int = 90, + x_tick_fontsize: int = 8, + label_wrap_width: int = 18, + max_xticks: int = 30, +): + """Plot central-value dots and lower/upper intervals by category for one year.""" + if not isinstance(_df.index, pd.MultiIndex) or _df.index.nlevels < 3: + raise ValueError("_df index must be a MultiIndex with at least 3 levels: category, stat, year.") + if value_col not in _df.columns: + raise ValueError(f"Column '{value_col}' not found in dataframe.") + + year_level_values = _df.index.get_level_values(2) + available_years = pd.Index(year_level_values.unique()).sort_values() + if year not in available_years: + raise ValueError(f"Year '{year}' not found in index level 2. Available years: {available_years.tolist()}") + + stat_level_values = _df.index.get_level_values(1) + required_stats = {central_measure, "lower", "upper"} + missing_stats = required_stats.difference(set(stat_level_values)) + if missing_stats: + raise ValueError( + f"Missing required stat(s) in index level 1: {sorted(missing_stats)}. " + f"Available stats: {sorted(set(stat_level_values))}" + ) + + _plot = _df.xs(year, level=2)[value_col].unstack(level=1) + _plot = _plot.loc[:, [central_measure, "lower", "upper"]] + _plot = _plot.dropna(subset=[central_measure, "lower", "upper"]) + if _plot.empty: + raise ValueError(f"No plottable rows remain for year '{year}' after selecting required stats.") + + if sort: + _plot = _plot.sort_values(by=central_measure, ascending=True) + + x = np.arange(len(_plot.index)) + _ax.vlines(x, _plot["lower"], _plot["upper"], color="black", linewidth=1.2) + _ax.scatter(x, _plot[central_measure], color="black", s=20, zorder=3) + + _ax.figure.set_size_inches(max(12, min(0.25 * len(_plot.index), 36)), 7) + wrapped_labels = [textwrap.fill(str(label), width=label_wrap_width) for label in _plot.index] + if max_xticks is not None and len(x) > max_xticks: + step = int(np.ceil(len(x) / max_xticks)) + shown_positions = x[::step] + shown_labels = [wrapped_labels[i] for i in shown_positions] + _ax.set_xticks(shown_positions) + _ax.set_xticklabels(shown_labels, rotation=x_label_rotation, ha="right", fontsize=x_tick_fontsize) + else: + _ax.set_xticks(x) + _ax.set_xticklabels(wrapped_labels, rotation=x_label_rotation, ha="right", fontsize=x_tick_fontsize) + _ax.set_xlabel(_df.index.names[0] if _df.index.names[0] is not None else "category") + _ax.set_ylabel(value_col) + _ax.set_title(f"{value_col}: {central_measure} with lower/upper ({year})") + _ax.grid(axis="y") + _ax.spines["top"].set_visible(False) + _ax.spines["right"].set_visible(False) + + return _ax + + +def do_barh_plot_with_ci(_df: pd.DataFrame, _ax): + """Make horizontal bar plot for each treatment id.""" + errors = pd.concat([_df["mean"] - _df["lower"], _df["upper"] - _df["mean"]], axis=1).T.to_numpy() + _df.plot.barh(ax=_ax, y="mean", xerr=errors, legend=False, color=[get_color_short_treatment_id(_id) for _id in _df.index]) + + +def do_label_barh_plot(_df: pd.DataFrame, _ax): + """Add text annotation from values in dataframe onto axis.""" + y_cords = {ylabel.get_text(): ytick for ytick, ylabel in zip(_ax.get_yticks(), _ax.get_yticklabels())} + pos_on_rhs = _ax.get_xticks()[-1] + + for label, row in _df.iterrows(): + if row["mean"] > 0: + annotation = f"{round(row['mean'], 1)} ({round(row['lower'])}-{round(row['upper'])}) %" + _ax.annotate( + annotation, + xy=(pos_on_rhs, y_cords.get(label)), + xycoords="data", + horizontalalignment="left", + verticalalignment="center", + size=7, + ) diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/results_processing_utils.py b/src/scripts/lcoa_inputs_from_tlo_analyses/results_processing_utils.py new file mode 100644 index 0000000000..afcb96f5fe --- /dev/null +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/results_processing_utils.py @@ -0,0 +1,263 @@ +"""Utilities for extracting and processing results for treatment-id analyses.""" + +from typing import Tuple + +import numpy as np +import pandas as pd + +from scripts.lcoa_inputs_from_tlo_analyses.scenario_effect_of_treatment_ids import ( + EffectOfEachTreatment, +) +from tlo import Date +from tlo.analysis.utils import make_age_grp_types, summarize, to_age_group + + +TARGET_PERIOD = (Date(2026, 1, 1), Date(2041, 1, 1)) + +def get_total_population_by_year(_df): + years_needed = [i.year for i in TARGET_PERIOD] + _df["year"] = pd.to_datetime(_df["date"]).dt.year + return _df.loc[_df["year"].between(min(years_needed), max(years_needed)), ["year", "total"]].set_index("year")[ + "total" + ] + + +def extract_deaths_total(df: pd.DataFrame) -> pd.Series: + return pd.Series({"Total": len(df)}) + + +def target_period(target_period_tuple: tuple[Date, Date] = TARGET_PERIOD) -> str: + """Returns the target period as a string of the form YYYY-YYYY.""" + return "-".join(str(t.year) for t in target_period_tuple) + + +def get_periods_within_target_period( + period_length_years: int, + target_period_tuple: tuple[Date, Date] = TARGET_PERIOD, +) -> list[tuple[str, tuple[int, int]]]: + """Return chunks within target period as [(label, (start_year, end_year)), ...].""" + if period_length_years <= 0: + raise ValueError("period_length_years must be a positive integer.") + start_year, end_year = target_period_tuple[0].year, target_period_tuple[1].year + periods = [] + for chunk_start in range(start_year, end_year + 1, period_length_years): + chunk_end = min(chunk_start + period_length_years - 1, end_year) + periods.append((f"{chunk_start}-{chunk_end}", (chunk_start, chunk_end))) + return periods + + +def get_parameter_names_from_scenario_file() -> Tuple[str]: + """Get tuple of scenario names from Scenario class used to create results.""" + e = EffectOfEachTreatment() + return tuple(e._scenarios.keys()) + + +def format_scenario_name(_sn: str) -> str: + """Return reformatted scenario name ready for plotting.""" + if _sn == "Nothing": + return "Nothing" + return _sn.lstrip("Only ") + + +def set_param_names_as_column_index_level_0(_df: pd.DataFrame, param_names: tuple[str, ...]) -> pd.DataFrame: + """Set columns index level 0 as scenario param names.""" + ordered_param_names_no_prefix = {i: x for i, x in enumerate(param_names)} + names_of_cols_level0 = [ordered_param_names_no_prefix.get(col) for col in _df.columns.levels[0]] + assert len(names_of_cols_level0) == len(_df.columns.levels[0]) + + reformatted_names = map(format_scenario_name, names_of_cols_level0) + _df.columns = _df.columns.set_levels(reformatted_names, level=0) + return _df + + +def find_difference_extra_relative_to_comparison( + _ser: pd.Series, + comparison: str, + scaled: bool = False, + drop_comparison: bool = True, +): + """Find run-wise differences relative to comparison in a series with multi-index.""" + return ( + _ser.unstack() + .apply(lambda x: (x - x[comparison]) / (x[comparison] if scaled else 1.0), axis=0) + .drop(index=([comparison] if drop_comparison else [])) + .stack() + ) + + +def find_mean_difference_in_appts_relative_to_comparison( + _df: pd.DataFrame, + comparison: str, + drop_comparison: bool = True, +): + """Find mean fewer appointments when treatment does not happen relative to comparison.""" + return -summarize( + pd.concat( + { + _idx: find_difference_extra_relative_to_comparison( + row, comparison=comparison, drop_comparison=drop_comparison + ) + for _idx, row in _df.iterrows() + }, + axis=1, + ).T, + only_mean=True, + ) + + +def find_mean_difference_extra_relative_to_comparison_dataframe( + _df: pd.DataFrame, + comparison: str, + drop_comparison: bool = True, +): + """Same as find_difference_extra_relative_to_comparison but for dataframe.""" + return summarize( + pd.concat( + { + _idx: find_difference_extra_relative_to_comparison( + row, comparison=comparison, drop_comparison=drop_comparison + ) + for _idx, row in _df.iterrows() + }, + axis=1, + ).T, + only_mean=True, + ) + + +def get_num_deaths_by_cause_label(_df: pd.DataFrame, target_period_tuple: tuple[Date, Date] = TARGET_PERIOD) -> pd.Series: + """Return total deaths by label within target period.""" + return _df.loc[pd.to_datetime(_df.date).between(*target_period_tuple)].groupby(_df["label"]).size() + + +def get_num_dalys_by_cause_label(_df: pd.DataFrame, target_period_tuple: tuple[Date, Date] = TARGET_PERIOD) -> pd.Series: + """Return total DALYS by label within target period.""" + return ( + _df.loc[_df.year.between(*[i.year for i in target_period_tuple])] + .drop(columns=["date", "sex", "age_range", "year"]) + .sum() + ) + + +def make_get_num_deaths_by_cause_label_and_period( + period_length_years: int, + target_period_tuple: tuple[Date, Date] = TARGET_PERIOD, +): + """Create helper that summarizes deaths by cause and period chunks + overall.""" + periods = get_periods_within_target_period( + period_length_years=period_length_years, + target_period_tuple=target_period_tuple, + ) + period_lookup = { + year: period_label + for period_label, (start_year, end_year) in periods + for year in range(start_year, end_year + 1) + } + target_period_label = target_period(target_period_tuple) + + def _get_num_deaths_by_cause_label_and_period(_df: pd.DataFrame) -> pd.Series: + _df_in_target = _df.loc[pd.to_datetime(_df.date).between(*target_period_tuple)].copy() + _df_in_target["year"] = pd.to_datetime(_df_in_target["date"]).dt.year + _df_in_target["period"] = _df_in_target["year"].map(period_lookup) + + chunked = _df_in_target.groupby(["label", "period"]).size() + overall = _df_in_target.groupby("label").size() + overall.index = pd.MultiIndex.from_arrays( + [overall.index, np.repeat(target_period_label, len(overall.index))], names=["label", "period"] + ) + return pd.concat([chunked, overall]).sort_index() + + return _get_num_deaths_by_cause_label_and_period + + +def make_get_num_dalys_by_cause_label_and_period( + period_length_years: int, + target_period_tuple: tuple[Date, Date] = TARGET_PERIOD, +): + """Create helper that summarizes DALYS by cause and period chunks + overall.""" + periods = get_periods_within_target_period( + period_length_years=period_length_years, + target_period_tuple=target_period_tuple, + ) + period_lookup = { + year: period_label + for period_label, (period_start, period_end) in periods + for year in range(period_start, period_end + 1) + } + start_year, end_year = target_period_tuple[0].year, target_period_tuple[1].year + target_period_label = target_period(target_period_tuple) + + def _get_num_dalys_by_cause_label_and_period(_df: pd.DataFrame) -> pd.Series: + _df_in_target = _df.loc[_df.year.between(start_year, end_year)].copy() + _df_in_target["period"] = _df_in_target["year"].map(period_lookup) + + melted = ( + _df_in_target.drop(columns=["date", "sex", "age_range"]) + .melt(id_vars=["year", "period"], var_name="label", value_name="dalys") + ) + chunked = melted.groupby(["label", "period"])["dalys"].sum() + overall = melted.groupby("label")["dalys"].sum() + overall.index = pd.MultiIndex.from_arrays( + [overall.index, np.repeat(target_period_label, len(overall.index))], names=["label", "period"] + ) + return pd.concat([chunked, overall]).sort_index() + + return _get_num_dalys_by_cause_label_and_period + + +def get_num_deaths_by_age_group( + _df: pd.DataFrame, + age_grp_lookup: dict, + target_period_tuple: tuple[Date, Date] = TARGET_PERIOD, +): + """Return total deaths by age-group in target period.""" + return ( + _df.loc[pd.to_datetime(_df.date).between(*target_period_tuple)] + .groupby(_df["age"].map(age_grp_lookup).astype(make_age_grp_types())) + .size() + ) + + +def get_total_num_death_by_agegrp_and_label( + _df: pd.DataFrame, + target_period_tuple: tuple[Date, Date] = TARGET_PERIOD, +) -> pd.Series: + """Return deaths in target period by age-group and cause label.""" + _df_limited_to_dates = _df.loc[_df["date"].between(*target_period_tuple)] + age_group = to_age_group(_df_limited_to_dates["age"]) + return _df_limited_to_dates.groupby([age_group, "label"])["person_id"].size() + + +def get_total_num_dalys_by_agegrp_and_label( + _df: pd.DataFrame, + target_period_tuple: tuple[Date, Date] = TARGET_PERIOD, +) -> pd.Series: + """Return DALYS in target period by age-group and cause label.""" + return ( + _df.loc[_df.year.between(*[i.year for i in target_period_tuple])] + .assign(age_group=_df["age_range"]) + .drop(columns=["date", "year", "sex", "age_range"]) + .melt(id_vars=["age_group"], var_name="label", value_name="dalys") + .groupby(by=["age_group", "label"])["dalys"] + .sum() + ) + + +def get_counts_of_hsi_by_short_treatment_id( + _df: pd.DataFrame, + target_period_tuple: tuple[Date, Date] = TARGET_PERIOD, +) -> pd.Series: + """Get counts of short treatment ids occurring in target period.""" + mask = pd.to_datetime(_df["date"]).between(*target_period_tuple) + _counts_by_treatment_id = _df.loc[mask, "TREATMENT_ID"].apply(pd.Series).sum().astype(int) + return _counts_by_treatment_id + + +def get_counts_of_appts(_df: pd.DataFrame, target_period_tuple: tuple[Date, Date] = TARGET_PERIOD) -> pd.Series: + """Get counts of appointments of each type being used in target period.""" + return ( + _df.loc[pd.to_datetime(_df["date"]).between(*target_period_tuple), "Number_By_Appt_Type_Code"] + .apply(pd.Series) + .sum() + .astype(int) + ) From 46fbbc3c977c578a1d1bdabc0291307e5579fcce Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Mon, 9 Mar 2026 12:15:34 +0000 Subject: [PATCH 18/55] Add multiplier as an argument to extract_results --- src/tlo/analysis/utils.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py index e890a12d2d..74fc0673e4 100644 --- a/src/tlo/analysis/utils.py +++ b/src/tlo/analysis/utils.py @@ -311,6 +311,7 @@ def extract_results(results_folder: Path, index: str = None, custom_generate_series=None, do_scaling: bool = False, + scaling_factor = None, draw_runs: Optional[List[Tuple[int, int]]] = None, autodiscover: bool = False, ) -> pd.DataFrame: @@ -335,9 +336,13 @@ def extract_results(results_folder: Path, def get_multiplier(_draw, _run): """Helper function to get the multiplier from the simulation. Note that if the scaling factor cannot be found a `KeyError` is thrown.""" - return load_pickled_dataframes( - results_folder, _draw, _run, 'tlo.methods.population' - )['tlo.methods.population']['scaling_factor']['scaling_factor'].values[0] + if scaling_factor is not None: + return scaling_factor + else: + return ( + load_pickled_dataframes(results_folder, _draw, _run, 'tlo.methods.population') + ['tlo.methods.population']['scaling_factor']['scaling_factor'].values[0] + ) if custom_generate_series is None: # If there is no `custom_generate_series` provided, it implies that function required selects the specified From 90713c7f269f8d2ef8cba9ceaa7f0398c5aa2a06 Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Mon, 9 Mar 2026 13:27:38 +0000 Subject: [PATCH 19/55] Use new interface for cost estimation --- src/scripts/costing/cost_estimation.py | 277 +++++++++++++----- .../analysis_effect_of_treatment_ids.py | 22 +- src/tlo/analysis/utils.py | 75 ++--- 3 files changed, 247 insertions(+), 127 deletions(-) diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py index 02d0971955..594c2d1d01 100644 --- a/src/scripts/costing/cost_estimation.py +++ b/src/scripts/costing/cost_estimation.py @@ -156,7 +156,8 @@ def get_discount_factor(year): # Compute the cumulative discount factor as the product of (1 + discount_rate) for all previous years discount_factor = 1 for y in range(_initial_year + 1, - year + 1): # only starting from initial year + 1 as the discount factor for initial year should be 1 + year + 1): # only starting from initial year + 1 as the discount factor for initial year + # should be 1 discount_factor *= (1 + _discount_rate.get(y, 0)) # Default to 0 if year not in dictionary return discount_factor else: @@ -282,6 +283,7 @@ def clean_equipment_name(name: str, equipment_drop_list = None) -> str: def estimate_input_cost_of_scenarios(results_folder: Path, resourcefilepath: Path, + suspended_results_folder: Path = None, _draws: Optional[list[int]] = None, _runs: Optional[list[int]] = None, summarize: bool = False, @@ -298,6 +300,10 @@ def estimate_input_cost_of_scenarios(results_folder: Path, Path to the directory containing simulation output files. resourcefilepath : Path, optional Path to the resource files + suspended_results_folder: Path, optional + Path to the directory containing suspended simulation output files (using the suspend and resume functionality), + This is used to extract the scaling_factor to scale result to actual population size. If None, then the + 'scaling_factor' is obtained from the results_folder. _draws : list, optional Specific draws to include in the cost estimation. Defaults to all available draws. _runs : list, optional @@ -316,8 +322,10 @@ def estimate_input_cost_of_scenarios(results_folder: Path, Returns: ------- pd.DataFrame - A dataframe containing discounted costs disaggregated by category, sub-category, category-specific subgroup, year, draw, and run. - Note that if a discount rate is used, the dataframe will provide cost as the NPV during the first year of the dataframe + A dataframe containing discounted costs disaggregated by category, sub-category, category-specific subgroup, + year, draw, and run. + Note that if a discount rate is used, the dataframe will provide cost as the NPV during the first year of the + dataframe """ # Useful common functions @@ -343,6 +351,8 @@ def melt_model_output_draws_and_runs(_df, id_vars): _draws = range(0, info['number_of_draws']) if _runs is None: _runs = range(0, info['runs_per_draw']) + if suspended_results_folder is None: + suspended_results_folder = results_folder # Load cost input files # ------------------------ @@ -354,7 +364,8 @@ def melt_model_output_draws_and_runs(_df, id_vars): facility_id_levels_dict = dict(zip(mfl['Facility_ID'], mfl['Facility_Level'])) fac_levels = set(mfl.Facility_Level) - # If variable discount rate is provided, use the average across the relevant years for the purpose of annuitization of HR and equipment costs + # If variable discount rate is provided, use the average across the relevant years for the purpose of annuitization + # of HR and equipment costs def calculate_annuitization_rate(_discount_rate, _years): if isinstance(_discount_rate, (int, float)): # Single discount rate, return as is @@ -477,25 +488,68 @@ def merge_cost_and_model_data(cost_df, model_df, varnames): return merged_df # Get available staff count for each year and draw - def get_staff_count_by_facid_and_officer_type(_df: pd.Series) -> pd.Series: - """Summarise the parsed logged-key results for one draw (as dataframe) into a pd.Series.""" - _df = _df.set_axis(_df['date'].dt.year).drop(columns=['date']) - _df.index.name = 'year' + def get_staff_count_by_facid_and_officer_type(_df: pd.DataFrame) -> pd.Series: + """ + Convert logged staff dictionary output into tidy format, + summing staff counts across all clinic columns. + + Returns pd.Series indexed by: + (year, FacilityID, Officer) + """ - def change_to_standard_flattened_index_format(col): - parts = col.split("_", 3) # Split by "_" only up to 3 parts - if len(parts) > 2: - return parts[0] + "=" + parts[1] + "|" + parts[2] + "=" + parts[ - 3] # Rejoin with "I" at the second occurrence - return col # If there's no second underscore, return the string as it is + df = _df.copy() + df["year"] = df["date"].dt.year + df = df.drop(columns=["date"]) - _df.columns = [change_to_standard_flattened_index_format(col) for col in _df.columns] + clinic_cols = df.columns.difference(["year"]) - return unflatten_flattened_multi_index_in_logging(_df).stack(level=[0, 1]) # expanded flattened axis + long_frames = [] + + for clinic in clinic_cols: + expanded = df[[clinic, "year"]].copy() + expanded = expanded[expanded[clinic].notna()] + + expanded_dict = expanded[clinic].apply(pd.Series) + expanded_dict["year"] = expanded["year"].values + + long_frames.append(expanded_dict) + + # Combine all clinics + combined = pd.concat(long_frames, ignore_index=True) + + # Melt to long format + long_df = ( + combined + .melt(id_vars=["year"], + var_name="facility_officer", + value_name="count") + .dropna(subset=["count"]) + ) + + # Split FacilityID and Officer + parts = long_df["facility_officer"].str.split("_Officer_", expand=True) + + long_df["FacilityID"] = ( + parts[0] + .str.replace("FacilityID_", "", regex=False) + .astype(int) + ) + long_df["Officer"] = parts[1] + + # SUM ACROSS CLINICS HERE + result = ( + long_df + .groupby(["year", "FacilityID", "Officer"])["count"] + .sum() + .sort_index() + ) + + return result # Staff count by Facility ID available_staff_count_by_facid_and_officertype = extract_results( Path(results_folder), + suspended_results_folder=suspended_results_folder, module='tlo.methods.healthsystem.summary', key='number_of_hcw_staff', custom_generate_series=get_staff_count_by_facid_and_officer_type, @@ -519,22 +573,71 @@ def change_to_standard_flattened_index_format(col): 'Facility_Level'].astype(str) # make sure facility level is stored as string available_staff_count_by_level_and_officer_type = available_staff_count_by_level_and_officer_type.drop( available_staff_count_by_level_and_officer_type[available_staff_count_by_level_and_officer_type[ - 'Facility_Level'] == '5'].index) # drop headquarters because we're only concerned with staff engaged in service delivery + 'Facility_Level'] == '5'].index) # drop headquarters + # because we're only concerned with staff engaged in service delivery available_staff_count_by_level_and_officer_type.rename(columns={'value': 'staff_count'}, inplace=True) # Get list of cadres which were utilised in each run to get the count of staff used in the simulation - # Note that we still cost the full staff count for any cadre-Facility_Level combination that was ever used in a run, and - # not the amount of time which was used - def get_capacity_used_by_officer_type_and_facility_level(_df: pd.Series) -> pd.Series: - """Summarise the parsed logged-key results for one draw (as dataframe) into a pd.Series.""" - _df = _df.set_axis(_df['date'].dt.year).drop(columns=['date']) - _df.index.name = 'year' - return unflatten_flattened_multi_index_in_logging(_df).stack(level=[0, 1]) # expanded flattened axis + # Note that we still cost the full staff count for any cadre-Facility_Level combination that was ever used in a run, + # and not the amount of time which was used + def get_capacity_used_by_officer_type_and_facility_level( + _df: pd.DataFrame + ) -> pd.Series: + """ + Parse logging output and return a Series indexed by: + (year, OfficerType, FacilityLevel) + + Collapses (sums) across clinics. + Uses facility_id_levels_dict to map FacilityID → FacilityLevel. + """ + + # ---- 1. Set year index ---- + _df = _df.set_axis(_df["date"].dt.year).drop(columns=["date"]) + _df.index.name = "year" + + # ---- 2. Unflatten logging columns ---- + _df = unflatten_flattened_multi_index_in_logging(_df) + + # Expect columns like: + # ('Clinic', 'facID_and_officer') + + col_df = _df.columns.to_frame(index=False) + + # ---- 3. Extract OfficerType ---- + col_df["OfficerType"] = ( + col_df["facID_and_officer"] + .str.split("_Officer_") + .str[-1] + ) + + # ---- 4. Extract FacilityID ---- + col_df["FacilityID"] = ( + col_df["facID_and_officer"] + .str.split("_Officer_") + .str[0] + .str.replace("FacilityID_", "", regex=False) + .astype(int) + ) + + # ---- 5. Map to FacilityLevel ---- + col_df["FacilityLevel"] = col_df["FacilityID"].map(facility_id_levels_dict) + + # ---- 6. Rebuild MultiIndex (drop clinic level) ---- + _df.columns = pd.MultiIndex.from_frame( + col_df[["OfficerType", "FacilityLevel"]] + ) + + # ---- 7. Collapse across clinics ---- + _df = _df.groupby(level=["OfficerType", "FacilityLevel"], axis=1).sum() + + # ---- 8. Return stacked format ---- + return _df.stack(["OfficerType", "FacilityLevel"]) annual_capacity_used_by_cadre_and_level = extract_results( Path(results_folder), + suspended_results_folder=suspended_results_folder, module='tlo.methods.healthsystem.summary', - key='Capacity_By_OfficerType_And_FacilityLevel', + key='Capacity_By_FacID_and_Officer', custom_generate_series=get_capacity_used_by_officer_type_and_facility_level, do_scaling=False, ) @@ -552,7 +655,10 @@ def get_capacity_used_by_officer_type_and_facility_level(_df: pd.Series) -> pd.S average_capacity_used_by_cadre_and_level[average_capacity_used_by_cadre_and_level['capacity_used'] != 0][ ['OfficerType', 'FacilityLevel', 'draw', 'run']] print( - f"Out of {average_capacity_used_by_cadre_and_level.groupby(['OfficerType', 'FacilityLevel']).size().count()} cadre and level combinations available, {list_of_cadre_and_level_combinations_used.groupby(['OfficerType', 'FacilityLevel']).size().count()} are used across the simulations") + f"Out of {average_capacity_used_by_cadre_and_level.groupby(['OfficerType', 'FacilityLevel']).size().count()} " + f"cadre and level combinations available, " + f"{list_of_cadre_and_level_combinations_used.groupby(['OfficerType', 'FacilityLevel']).size().count()} " + f"are used across the simulations") list_of_cadre_and_level_combinations_used = list_of_cadre_and_level_combinations_used.rename( columns={'FacilityLevel': 'Facility_Level'}) @@ -564,11 +670,13 @@ def get_capacity_used_by_officer_type_and_facility_level(_df: pd.Series) -> pd.S if (cost_only_used_staff): print( - "The input for 'cost_only_used_staff' implies that only cadre-level combinations which have been used in the run are costed") + "The input for 'cost_only_used_staff' implies that only cadre-level combinations which have been used in " + "the run are costed") staff_size_chosen_for_costing = used_staff_count_by_level_and_officer_type else: print( - "The input for 'cost_only_used_staff' implies that all staff are costed regardless of the cadre-level combinations which have been used in the run are costed") + "The input for 'cost_only_used_staff' implies that all staff are costed regardless of the cadre-level " + "combinations which have been used in the run are costed") staff_size_chosen_for_costing = available_staff_count_by_level_and_officer_type # Calculate various components of HR cost @@ -607,7 +715,8 @@ def calculate_npv_past_training_expenses_by_row(row, r=_discount_rate): if partial_year > 0: npv += annual_cost * partial_year * (1 + r) ** (1 + r) - # Add recruitment cost assuming this happens during the partial year or the year after graduation if partial year == 0 + # Add recruitment cost assuming this happens during the partial year or the year after graduation if + # partial year == 0 npv += row['recruitment_cost_per_person_recruited_usd'] * (1 + r) return npv @@ -619,34 +728,33 @@ def calculate_npv_past_training_expenses_by_row(row, r=_discount_rate): npv_values.append(npv) preservice_training_cost['npv_of_training_and_recruitment_cost'] = npv_values - preservice_training_cost['npv_of_training_and_recruitment_cost_per_recruit'] = preservice_training_cost[ - 'npv_of_training_and_recruitment_cost'] * \ - (1 / (preservice_training_cost[ - 'absorption_rate_of_students_into_public_workforce'] + - preservice_training_cost[ - 'proportion_of_workforce_recruited_from_abroad'])) * \ - (1 / preservice_training_cost[ - 'graduation_rate']) * (1 / - preservice_training_cost[ - 'licensure_exam_passing_rate']) - if _discount_rate == 0: # if the discount rate is 0, then the pre-service + recruitment cost simply needs to be divided by the number of years in tenure + preservice_training_cost['npv_of_training_and_recruitment_cost_per_recruit'] \ + = (preservice_training_cost['npv_of_training_and_recruitment_cost'] * + (1 / (preservice_training_cost['absorption_rate_of_students_into_public_workforce'] + + preservice_training_cost['proportion_of_workforce_recruited_from_abroad'])) * + (1 / preservice_training_cost['graduation_rate']) * + (1 /preservice_training_cost['licensure_exam_passing_rate'])) + if _discount_rate == 0: # if the discount rate is 0, then the pre-service + recruitment cost simply + # needs to be divided by the number of years in tenure preservice_training_cost['annuitisation_rate'] = preservice_training_cost[ 'average_length_of_tenure_in_the_public_sector'] else: preservice_training_cost['annuitisation_rate'] = 1 + (1 - (1 + annuitization_rate) ** ( -preservice_training_cost[ 'average_length_of_tenure_in_the_public_sector'] + 1)) / annuitization_rate - preservice_training_cost['annuitised_training_and_recruitment_cost_per_recruit'] = preservice_training_cost[ - 'npv_of_training_and_recruitment_cost_per_recruit'] / \ - preservice_training_cost[ - 'annuitisation_rate'] - - # Cost per student trained * 1/Rate of absorption from the local and foreign graduates * 1/Graduation rate * attrition rate - # the inverse of attrition rate is the average expected tenure; and the preservice training cost needs to be divided by the average tenure + preservice_training_cost['annuitised_training_and_recruitment_cost_per_recruit'] = \ + (preservice_training_cost['npv_of_training_and_recruitment_cost_per_recruit'] / + preservice_training_cost['annuitisation_rate']) + + # Cost per student trained * 1/Rate of absorption from the local and foreign graduates + # * 1/Graduation rate * attrition rate + # the inverse of attrition rate is the average expected tenure; and the preservice training cost needs to + # be divided by the average tenure preservice_training_cost['cost'] = preservice_training_cost[ 'annuitised_training_and_recruitment_cost_per_recruit'] * \ preservice_training_cost['staff_count'] * preservice_training_cost[ - 'annual_attrition_rate'] # not multiplied with attrition rate again because this is already factored into 'Annual_cost_per_staff_recruited' + 'annual_attrition_rate'] # not multiplied with attrition rate again + # because this is already factored into 'Annual_cost_per_staff_recruited' preservice_training_cost = preservice_training_cost[ ['draw', 'run', 'year', 'OfficerType', 'Facility_Level', 'cost']] @@ -680,7 +788,8 @@ def label_rows_of_cost_dataframe(_df, label_var, label): # Initialize HR with the salary data if (cost_only_used_staff): human_resource_costs = retain_relevant_column_subset( - label_rows_of_cost_dataframe(salary_for_staff, 'cost_subcategory', 'salary_for_cadres_used'), 'OfficerType') + label_rows_of_cost_dataframe(salary_for_staff, 'cost_subcategory', 'salary_for_cadres_used'), + 'OfficerType') # Concatenate additional cost categories additional_costs = [ (preservice_training_cost, 'preservice_training_and_recruitment_cost_for_attrited_workers'), @@ -689,7 +798,8 @@ def label_rows_of_cost_dataframe(_df, label_var, label): ] else: human_resource_costs = retain_relevant_column_subset( - label_rows_of_cost_dataframe(salary_for_staff, 'cost_subcategory', 'salary_for_all_staff'), 'OfficerType') + label_rows_of_cost_dataframe(salary_for_staff, 'cost_subcategory', 'salary_for_all_staff'), + 'OfficerType') # Concatenate additional cost categories additional_costs = [ (preservice_training_cost, 'preservice_training_and_recruitment_cost_for_attrited_workers'), @@ -741,6 +851,7 @@ def get_counts_of_items_requested(_df): cons_req = extract_results( results_folder, + suspended_results_folder=suspended_results_folder, module='tlo.methods.healthsystem.summary', key='Consumables', custom_generate_series=get_counts_of_items_requested, @@ -762,7 +873,8 @@ def get_counts_of_items_requested(_df): # 2.1 Cost of consumables dispensed # --------------------------------------------------------------------------------------------------------------- # Multiply number of items needed by cost of consumable - # consumables_dispensed.columns = consumables_dispensed.columns.get_level_values(0).str() + "_" + consumables_dispensed.columns.get_level_values(1) # Flatten multi-level columns for pandas merge + # consumables_dispensed.columns = consumables_dispensed.columns.get_level_values(0).str() + "_" + + # consumables_dispensed.columns.get_level_values(1) # Flatten multi-level columns for pandas merge unit_costs['consumables'].columns = pd.MultiIndex.from_arrays( [unit_costs['consumables'].columns, [''] * len(unit_costs['consumables'].columns)]) cost_of_consumables_dispensed = consumables_dispensed.merge(unit_costs['consumables'], on=idx['Item_Code'], @@ -796,8 +908,20 @@ def get_counts_of_items_requested(_df): left_on='Item_Code', right_on='item_code', validate='m:1', how='left') + # Identify rows where excess_stock_proportion_of_dispensed is NaN + missing_excess_stock = ( + cost_of_excess_consumables_stocked + ['excess_stock_proportion_of_dispensed'] + .isna() + ) + + # Fill missing values with the average inflow-to-outflow ratio minus 1 + fill_value = average_inflow_to_outflow_ratio_ratio - 1 + cost_of_excess_consumables_stocked.loc[ - cost_of_excess_consumables_stocked.excess_stock_proportion_of_dispensed.isna(), 'excess_stock_proportion_of_dispensed'] = average_inflow_to_outflow_ratio_ratio - 1 # TODO disaggregate the average by program + missing_excess_stock, + 'excess_stock_proportion_of_dispensed' + ] = fill_value # TODO: disaggregate the average by program cost_of_excess_consumables_stocked[quantity_columns] = cost_of_excess_consumables_stocked[ quantity_columns].multiply(cost_of_excess_consumables_stocked[idx[price_column]], axis=0) cost_of_excess_consumables_stocked[quantity_columns] = cost_of_excess_consumables_stocked[ @@ -815,7 +939,8 @@ def get_counts_of_items_requested(_df): def melt_and_label_consumables_cost(_df, label): multi_index = pd.MultiIndex.from_tuples(_df.columns) _df.columns = multi_index - # Select 'Item_Code', 'year', and all columns where both levels of the MultiIndex are numeric (these are the (draw,run) columns with cost values) + # Select 'Item_Code', 'year', and all columns where both levels of the MultiIndex are numeric + # (these are the (draw,run) columns with cost values) selected_columns = [col for col in _df.columns if (col[0] in ['Item_Code', 'year']) or (isinstance(col[0], int) and isinstance(col[1], int))] _df = _df[selected_columns] # Subset the dataframe with the selected columns @@ -827,13 +952,14 @@ def melt_and_label_consumables_cost(_df, label): melted_df['consumable'] = melted_df['Item_Code'].map(consumables_dict) melted_df['cost_subcategory'] = label melted_df[ - 'Facility_Level'] = 'all' # TODO this is temporary until 'tlo.methods.healthsystem.summary' only logs consumable at the aggregate level + 'Facility_Level'] = 'all' + # TODO this is temporary until 'tlo.methods.healthsystem.summary' only logs consumable at the aggregate level melted_df = melted_df.rename(columns={'value': 'cost'}) return melted_df def disaggregate_separately_managed_medical_supplies_from_consumable_costs(_df, _consumables_dict, - # This is a dictionary mapping codes to names + # This is a dictionary mapping codes to names list_of_unique_medical_products): reversed_consumables_dict = {value: key for key, value in _consumables_dict.items()} # reverse dictionary to map names to codes @@ -849,24 +975,26 @@ def disaggregate_separately_managed_medical_supplies_from_consumable_costs(_df, columns='item_code') separately_managed_medical_supplies = [127, 141, 161] # Oxygen, Blood, IRS - cost_of_consumables_dispensed, cost_of_separately_managed_medical_supplies_dispensed = disaggregate_separately_managed_medical_supplies_from_consumable_costs( + cost_of_consumables_dispensed, cost_of_separately_managed_medical_supplies_dispensed = ( + disaggregate_separately_managed_medical_supplies_from_consumable_costs( _df=retain_relevant_column_subset( melt_and_label_consumables_cost(cost_of_consumables_dispensed, 'cost_of_consumables_dispensed'), 'consumable'), _consumables_dict=consumables_dict, - list_of_unique_medical_products=separately_managed_medical_supplies) - cost_of_excess_consumables_stocked, cost_of_separately_managed_medical_supplies_excess_stock = disaggregate_separately_managed_medical_supplies_from_consumable_costs( + list_of_unique_medical_products=separately_managed_medical_supplies)) + cost_of_excess_consumables_stocked, cost_of_separately_managed_medical_supplies_excess_stock = ( + disaggregate_separately_managed_medical_supplies_from_consumable_costs( _df=retain_relevant_column_subset( melt_and_label_consumables_cost(cost_of_excess_consumables_stocked, 'cost_of_excess_consumables_stocked'), 'consumable'), _consumables_dict=consumables_dict, - list_of_unique_medical_products=separately_managed_medical_supplies) + list_of_unique_medical_products=separately_managed_medical_supplies)) consumable_costs = pd.concat([cost_of_consumables_dispensed, cost_of_excess_consumables_stocked]) # 2.4 Supply chain costs # --------------------------------------------------------------------------------------------------------------- - # Assume that the cost of procurement, warehousing and distribution is a fixed proportion of consumable purchase costs + # Assume that the cost of procurement,warehousing and distribution is a fixed proportion of consumable purchase # The fixed proportion is based on Resource Mapping Expenditure data from 2018 resource_mapping_data = unit_costs['actual_expenditure_data'] # Make sure values are numeric @@ -922,7 +1050,8 @@ def disaggregate_separately_managed_medical_supplies_from_consumable_costs(_df, # -------------------------------------------- print("Now estimating Medical equipment costs...") - # Total cost of equipment required as per SEL (HSSP-III) only at facility IDs where it has been used in the simulation + # Total cost of equipment required as per SEL (HSSP-III) only at facility IDs where it has been used in the + # simulation # Get list of equipment used in the simulation by district and level def get_equipment_used_by_district_and_facility(_df: pd.Series) -> pd.Series: """Summarise the parsed logged-key results for one draw (as dataframe) into a pd.Series.""" @@ -987,7 +1116,8 @@ def get_equipment_used_by_district_and_facility(_df: pd.Series) -> pd.Series: on=['District', 'Facility_Level'], how='left') equipment_df.loc[equipment_df.Facility_Count.isna(), 'Facility_Count'] = 0 - # Because levels 1b and 2 are collapsed together, we assume that the same equipment is used by level 1b as that recorded for level 2 + # Because levels 1b and 2 are collapsed together, we assume that the same equipment is used by level 1b as + # that recorded for level 2 def update_itemuse_for_level1b_using_level2_data(_df): # Create a list of District and Item_code combinations for which use == True list_of_equipment_used_at_level2 = \ @@ -1043,7 +1173,8 @@ def update_itemuse_for_level1b_using_level2_data(_df): # Assume that the annual costs are constant each year of the simulation equipment_costs = pd.concat([equipment_costs.assign(year=year) for year in years]) - # TODO If the logger is updated to include year, we may wish to calculate equipment costs by year - currently we assume the same annuitised equipment cost each year + # TODO If the logger is updated to include year, we may wish to calculate equipment costs by year + # (currently we assume the same annuitised equipment cost each year) equipment_costs = equipment_costs.reset_index(drop=True) equipment_costs = equipment_costs.rename(columns={'Equipment_tlo': 'Equipment'}) equipment_costs = prepare_cost_dataframe(equipment_costs, _category_specific_group='Equipment', @@ -1137,8 +1268,8 @@ def update_itemuse_for_level1b_using_level2_data(_df): # Define a function to summarize cost data from -# Note that the dataframe needs to have draw as index and run as columns. if the dataframe is long with draw and run as index, then -# first unstack the dataframe and subsequently apply the summarize function +# Note that the dataframe needs to have draw as index and run as columns. if the dataframe is long with draw and run as +# index, then first unstack the dataframe and subsequently apply the summarize function def summarize_cost_data(_df, _metric: Literal['mean', 'median'] = 'mean') -> pd.DataFrame: """ @@ -1194,8 +1325,8 @@ def estimate_projected_health_spending(resourcefilepath: Path, """ Estimate total projected health spending for a simulation period. - Combines health spending per capita projections (Dieleman et al, 2019) with simulated population estimates to calculate - total health expenditure, optionally applying a discount rate and summarizing across runs. + Combines health spending per capita projections (Dieleman et al, 2019) with simulated population estimates to + calculate total health expenditure, optionally applying a discount rate and summarizing across runs. Parameters: ---------- @@ -1424,7 +1555,8 @@ def do_stacked_bar_plot_of_cost_by_category(_df: pd.DataFrame, if (_disaggregate_by_subgroup is True): for name, df in dfs.items(): dfs[name] = df.copy() # Choose the dataframe to modify - # If sub-groups are more than 10 in number, then disaggregate the top 10 and group the rest into an 'other' category + # If sub-groups are more than 10 in number, then disaggregate the top 10 and group the rest into an + # 'other' category if (len(dfs[name]['cost_subgroup'].unique()) > 10): # Calculate total cost per subgroup subgroup_totals = dfs[name].groupby('cost_subgroup')['cost'].sum() @@ -1870,7 +2002,8 @@ def wrap_text(text, width=15): if (len(_df['cost_subgroup'].unique()) > 10): # Step 2: Group all other consumables into "Other" other_cost = _df.iloc[10:]["cost"].sum() - top_10 = pd.concat([top_10, pd.DataFrame([{"cost_subgroup": "Other", "cost": other_cost}])], ignore_index=True) + top_10 = pd.concat([top_10, pd.DataFrame([{"cost_subgroup": "Other", "cost": other_cost}])], + ignore_index=True) # Prepare data for the treemap total_cost = top_10["cost"].sum() @@ -2015,7 +2148,8 @@ def generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health: # Initialize an empty DataFrame to store values for each 'run' all_run_values = pd.DataFrame() - # Create an array of implementation costs ranging from 0 to the max value of max ability to pay for the current draw + # Create an array of implementation costs ranging from 0 to the max value of max ability to pay for the current + # draw implementation_costs = np.linspace(0, max_ability_to_pay_for_implementation.loc[draw_index].max(), 50) # Add fixed values for ROI ratio calculation additional_costs = np.array([1_000_000_000, 3_000_000_000]) @@ -2121,7 +2255,8 @@ def generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health: # Replace specific x-ticks with % of health spending values if _projected_health_spending: xtick_labels[ - 1] = f'{xticks[1]:,.0f}\n({xticks[1] / (_projected_health_spending / 1e6) :.2%} of \n projected total \n health spend)' + 1] = (f'{xticks[1]:,.0f}\n({xticks[1] / (_projected_health_spending / 1e6) :.2%} of \n projected total ' + f'\n health spend)') for i, tick in enumerate(xticks): if (i != 0) & (i != 1): # Replace for 4000 xtick_labels[i] = f'{tick:,.0f}\n({tick / (_projected_health_spending / 1e6) :.2%})' diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py index d37b9ffebc..68164df31f 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py @@ -4,11 +4,11 @@ import glob import os import zipfile +import pickle from pathlib import Path import numpy as np from matplotlib import pyplot as plt -import pandas as pd from tlo import Date from scripts.calibration_analyses.analysis_scripts import plot_legends from scripts.lcoa_inputs_from_tlo_analyses.fig_utils import ( @@ -55,6 +55,7 @@ TARGET_PERIOD = (Date(2026, 1, 1), Date(2041, 1, 1)) PERIOD_LENGTH_YEARS_FOR_BAR_PLOTS = 5 +suspended_folder = Path("outputs/s.bhatia@imperial.ac.uk/effect_of_each_treatment_id-2026-02-12T120859Z") results_folder = Path("outputs/s.bhatia@imperial.ac.uk/effect_of_each_treatment_id-2026-02-16T154500Z") # SCALING_FACTOR retrieved from the suspended run in # outputs/s.bhatia@imperial.ac.uk/effect_of_each_treatment_id-2026-02-12T120859Z @@ -89,6 +90,7 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No input_costs = estimate_input_cost_of_scenarios( results_folder, resourcefilepath, + suspended_results_folder=suspended_folder, cost_only_used_staff=True, _discount_rate=discount_rate_cost, _metric="median",) @@ -103,7 +105,7 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No key='population', custom_generate_series=get_total_population_by_year, do_scaling=True, - scaling_factor=SCALING_FACTOR, + suspended_results_folder=suspended_folder, autodiscover=True ) total_population_by_year = compute_summary_statistics(total_population_by_year, central_measure = 'median') @@ -140,7 +142,7 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No key="death", custom_generate_series=extract_deaths_total, do_scaling=True, - scaling_factor=SCALING_FACTOR, + suspended_results_folder=suspended_folder, autodiscover=True, ).pipe(set_param_names_as_column_index_level_0, param_names=param_names) ) @@ -155,7 +157,7 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No target_period_tuple=TARGET_PERIOD, ), do_scaling=True, - scaling_factor=SCALING_FACTOR, + suspended_results_folder=suspended_folder, autodiscover=True, ).pipe(set_param_names_as_column_index_level_0, param_names=param_names) ) @@ -199,7 +201,7 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No key="death", custom_generate_series=lambda _df: get_num_deaths_by_cause_label(_df, TARGET_PERIOD), do_scaling=True, - scaling_factor=SCALING_FACTOR, + suspended_results_folder=suspended_folder, autodiscover=True, ) .pipe(set_param_names_as_column_index_level_0, param_names=param_names) @@ -213,7 +215,7 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No key="dalys_stacked_by_age_and_time", custom_generate_series=lambda _df: get_num_dalys_by_cause_label(_df, TARGET_PERIOD), do_scaling=True, - scaling_factor=SCALING_FACTOR, + suspended_results_folder=suspended_folder, autodiscover=True, ) .pipe(set_param_names_as_column_index_level_0, param_names=param_names) @@ -226,7 +228,7 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No key="death", custom_generate_series=lambda _df: get_total_num_death_by_agegrp_and_label(_df, TARGET_PERIOD), do_scaling=True, - scaling_factor=SCALING_FACTOR, + suspended_results_folder=suspended_folder, autodiscover=True, ).pipe(set_param_names_as_column_index_level_0, param_names=param_names) @@ -236,7 +238,7 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No key="dalys_stacked_by_age_and_time", custom_generate_series=lambda _df: get_total_num_dalys_by_agegrp_and_label(_df, TARGET_PERIOD), do_scaling=True, - scaling_factor=SCALING_FACTOR, + suspended_results_folder=suspended_folder, autodiscover=True, ).pipe(set_param_names_as_column_index_level_0, param_names=param_names) @@ -247,7 +249,7 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No key="HSI_Event", custom_generate_series=lambda _df: get_counts_of_hsi_by_short_treatment_id(_df, TARGET_PERIOD), do_scaling=True, - scaling_factor=SCALING_FACTOR, + suspended_results_folder=suspended_folder, autodiscover=True, ) .pipe(set_param_names_as_column_index_level_0, param_names=param_names) @@ -284,7 +286,7 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No key="HSI_Event", custom_generate_series=lambda _df: get_counts_of_appts(_df, TARGET_PERIOD), do_scaling=True, - scaling_factor=SCALING_FACTOR, + suspended_results_folder=suspended_folder, ) .pipe(set_param_names_as_column_index_level_0, param_names=param_names) .fillna(0.0) diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py index 74fc0673e4..ace7e46a5e 100644 --- a/src/tlo/analysis/utils.py +++ b/src/tlo/analysis/utils.py @@ -311,9 +311,7 @@ def extract_results(results_folder: Path, index: str = None, custom_generate_series=None, do_scaling: bool = False, - scaling_factor = None, - draw_runs: Optional[List[Tuple[int, int]]] = None, - autodiscover: bool = False, + suspended_results_folder: Path = None, ) -> pd.DataFrame: """Utility function to unpack results. @@ -326,23 +324,19 @@ def extract_results(results_folder: Path, `custom_generate_series`. Optionally, with `do_scaling=True`, each element is multiplied by the scaling_factor recorded in the simulation. - - If `draw_runs` is provided, only these draw/run pairs are extracted (in the order supplied). If `draw_runs` is - not provided and `autodiscover=True`, available draw/run folders are auto-discovered and extracted. + If the suspend-and-resume functionality is used, scaling factor may be avaialble in the folder where the log + of the suspended run are stored. Note that if runs in the batch have failed (such that logs have not been generated), these are dropped silently. """ - def get_multiplier(_draw, _run): + + def get_multiplier(results_folder, _draw, _run): """Helper function to get the multiplier from the simulation. Note that if the scaling factor cannot be found a `KeyError` is thrown.""" - if scaling_factor is not None: - return scaling_factor - else: - return ( - load_pickled_dataframes(results_folder, _draw, _run, 'tlo.methods.population') - ['tlo.methods.population']['scaling_factor']['scaling_factor'].values[0] - ) + return load_pickled_dataframes( + results_folder, _draw, _run, 'tlo.methods.demography' + )['tlo.methods.demography']['scaling_factor']['scaling_factor'].values[0] if custom_generate_series is None: # If there is no `custom_generate_series` provided, it implies that function required selects the specified @@ -361,44 +355,33 @@ def generate_series(dataframe: pd.DataFrame) -> pd.Series: else: return custom_generate_series(dataframe) - if draw_runs is not None: - selected_draw_runs = draw_runs - elif autodiscover: - info = get_scenario_info(results_folder, autodiscover=True) - selected_draw_runs = [ - (draw, run) - for draw in info['draws'] - for run in info['runs_by_draw'][draw] - ] - else: - # Legacy default behaviour: infer ranges from scenario info. - info = get_scenario_info(results_folder) - selected_draw_runs = [ - (draw, run) - for draw in range(info['number_of_draws']) - for run in range(info['runs_per_draw']) - ] + # get number of draws and numbers of runs + info = get_scenario_info(results_folder) # Collect results from each draw/run res = dict() + for draw in range(info['number_of_draws']): + for run in range(info['runs_per_draw']): - for draw, run in selected_draw_runs: - draw_run = (draw, run) - try: - df: pd.DataFrame = load_pickled_dataframes(results_folder, draw, run, module)[module][key] - output_from_eval: pd.Series = generate_series(df) + draw_run = (draw, run) - assert isinstance(output_from_eval, pd.Series), ( - 'Custom command does not generate a pd.Series' - ) - if do_scaling: - res[draw_run] = output_from_eval * get_multiplier(draw, run) - else: - res[draw_run] = output_from_eval + try: + df: pd.DataFrame = load_pickled_dataframes(results_folder, draw, run, module)[module][key] + output_from_eval: pd.Series = generate_series(df) + assert isinstance(output_from_eval, pd.Series), ( + 'Custom command does not generate a pd.Series' + ) + if do_scaling: + if suspended_results_folder is not None: + res[draw_run] = output_from_eval * get_multiplier(suspended_results_folder, 0, 0) + else: + res[draw_run] = output_from_eval * get_multiplier(results_folder, draw, run) + else: + res[draw_run] = output_from_eval - except KeyError: - # Some logs could not be found - probably because this run failed. - res[draw_run] = None + except KeyError: + # Some logs could not be found - probably because this run failed. + res[draw_run] = None # Use pd.concat to compile results (skips dict items where the values is None) _concat = pd.concat(res, axis=1) From e396e82f46cffe85e7e9b2de2cf1c37c14d18fbb Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Mon, 9 Mar 2026 15:24:47 +0000 Subject: [PATCH 20/55] reinstate autodiscover; working analysis script --- .../analysis_effect_of_treatment_ids.py | 187 +++++------------- src/tlo/analysis/utils.py | 59 +++--- 2 files changed, 87 insertions(+), 159 deletions(-) diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py index 68164df31f..8337703588 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py @@ -6,18 +6,18 @@ import zipfile import pickle from pathlib import Path - +import pandas as pd import numpy as np from matplotlib import pyplot as plt from tlo import Date -from scripts.calibration_analyses.analysis_scripts import plot_legends +from tlo.util import create_age_range_lookup + from scripts.lcoa_inputs_from_tlo_analyses.fig_utils import ( do_bar_plot_with_ci, plot_multiindex_dot_with_interval, ) from scripts.lcoa_inputs_from_tlo_analyses.results_processing_utils import ( extract_deaths_total, - format_scenario_name, get_counts_of_appts, get_counts_of_hsi_by_short_treatment_id, get_num_dalys_by_cause_label, @@ -30,6 +30,7 @@ make_get_num_dalys_by_cause_label_and_period, set_param_names_as_column_index_level_0, target_period, + find_difference_extra_relative_to_comparison, ) from scripts.costing.cost_estimation import ( apply_discounting_to_cost_data, @@ -50,55 +51,38 @@ make_age_grp_lookup, squarify_neat, summarize, - unflatten_flattened_multi_index_in_logging, ) -TARGET_PERIOD = (Date(2026, 1, 1), Date(2041, 1, 1)) +TARGET_PERIOD = (Date(2026, 1, 1), Date(2027, 1, 1)) PERIOD_LENGTH_YEARS_FOR_BAR_PLOTS = 5 suspended_folder = Path("outputs/s.bhatia@imperial.ac.uk/effect_of_each_treatment_id-2026-02-12T120859Z") results_folder = Path("outputs/s.bhatia@imperial.ac.uk/effect_of_each_treatment_id-2026-02-16T154500Z") # SCALING_FACTOR retrieved from the suspended run in # outputs/s.bhatia@imperial.ac.uk/effect_of_each_treatment_id-2026-02-12T120859Z -SCALING_FACTOR = 58.158436 +# SCALING_FACTOR = 58.158436 def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = None): """Produce standard plots describing effect of each TREATMENT_ID.""" - make_graph_file_name = lambda stub: output_folder / f"{stub.replace('*', '_star_')}.png" # noqa: E731 - _, age_grp_lookup = make_age_grp_lookup() - period_labels_for_bar_plots = [ - label - for label, _ in get_periods_within_target_period( - period_length_years=PERIOD_LENGTH_YEARS_FOR_BAR_PLOTS, - target_period_tuple=TARGET_PERIOD, - ) - ] - target_period_label = target_period(TARGET_PERIOD) param_names = get_parameter_names_from_scenario_file() # Costs calculation - alternative_discount_rates = [ - {"discount_rate_cost": 0.03, "discount_rate_health": 0, "discounting_scenario": 'WHO-CHOICE (0.03,0)'}, - {"discount_rate_cost": 0.03, "discount_rate_health": 0.03, "discounting_scenario": 'MAIN (0.03,0.03)'} - ] - - for rates in alternative_discount_rates: - discount_rate_cost = rates["discount_rate_cost"] - discount_rate_health = rates["discount_rate_health"] - input_costs = estimate_input_cost_of_scenarios( - results_folder, - resourcefilepath, - suspended_results_folder=suspended_folder, - cost_only_used_staff=True, - _discount_rate=discount_rate_cost, - _metric="median",) - - - + print("Calculating costs...") + discount_rate_cost = 0.03 + input_costs = estimate_input_cost_of_scenarios( + results_folder, + resourcefilepath, + suspended_results_folder=suspended_folder, + _draws=[0], + _runs=[0], + cost_only_used_staff=True, + _discount_rate=discount_rate_cost, + _metric="median",) # Get total population by year + print("Extracting population data...") total_population_by_year = extract_results( results_folder, module='tlo.methods.demography', @@ -121,79 +105,7 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No level="stat" ) - for year in [2026, 2031, 2036, 2040]: - fig, ax = plt.subplots() - name_of_plot = f"Population size in {year}" - plot_multiindex_dot_with_interval(total_population_by_year / 1e6, year, ax, 'median') - ax.set_title(name_of_plot) - ax.set_xlabel("Treatment included") - ax.set_ylabel("Population size (millions)") - ax.grid(axis="y") - ax.spines["top"].set_visible(False) - ax.spines["right"].set_visible(False) - fig.tight_layout() - fig.savefig(make_graph_file_name(name_of_plot.replace(" ", "_"))) - plt.close(fig) - - num_deaths_by_cause_label = summarize( - extract_results( - results_folder, - module="tlo.methods.demography", - key="death", - custom_generate_series=extract_deaths_total, - do_scaling=True, - suspended_results_folder=suspended_folder, - autodiscover=True, - ).pipe(set_param_names_as_column_index_level_0, param_names=param_names) - ) - - num_dalys_by_cause_label = summarize( - extract_results( - results_folder, - module="tlo.methods.healthburden", - key="dalys_stacked_by_age_and_time", - custom_generate_series=make_get_num_dalys_by_cause_label_and_period( - period_length_years=PERIOD_LENGTH_YEARS_FOR_BAR_PLOTS, - target_period_tuple=TARGET_PERIOD, - ), - do_scaling=True, - suspended_results_folder=suspended_folder, - autodiscover=True, - ).pipe(set_param_names_as_column_index_level_0, param_names=param_names) - ) - - for param in param_names: - param_formatted = format_scenario_name(param) - - fig, ax = plt.subplots() - name_of_plot = f"Deaths With {param_formatted}, {target_period_label}" - do_bar_plot_with_ci(num_deaths_by_cause_label / 1e3, param_formatted, ax, period_labels_for_bar_plots, target_period_label) - ax.set_title(name_of_plot) - ax.set_xlabel("Cause of Death") - ax.set_ylabel("Number of Deaths (/1000)") - ax.set_ylim(0, 500) - ax.grid(axis="y") - ax.spines["top"].set_visible(False) - ax.spines["right"].set_visible(False) - fig.tight_layout() - fig.savefig(make_graph_file_name(name_of_plot.replace(" ", "_"))) - plt.close(fig) - - fig, ax = plt.subplots() - name_of_plot = f"DALYS With No Services, {target_period_label}" - do_bar_plot_with_ci(num_dalys_by_cause_label / 1e6, param_formatted, ax, period_labels_for_bar_plots, target_period_label) - ax.set_title(name_of_plot) - ax.set_xlabel("Cause of Disability/Death") - ax.set_ylabel("Number of DALYS (/millions)") - ax.set_ylim(0, 30) - ax.set_yticks(np.arange(0, 35, 5)) - ax.grid(axis="y") - ax.spines["top"].set_visible(False) - ax.spines["right"].set_visible(False) - fig.tight_layout() - fig.savefig(make_graph_file_name(name_of_plot.replace(" ", "_"))) - plt.close(fig) - + print("Extracting total deaths and DALYs by label...") num_deaths = ( extract_results( results_folder, @@ -207,6 +119,16 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No .pipe(set_param_names_as_column_index_level_0, param_names=param_names) .sum() ) + num_deaths_averted = summarize( + pd.DataFrame( + find_difference_extra_relative_to_comparison(num_deaths, comparison='Nothing')).T, + "median" + ).iloc[0].unstack().sort_values(by='mean', ascending=True) + + pc_deaths_averted = 100.0 * summarize( + pd.DataFrame( + find_difference_extra_relative_to_comparison(num_deaths, comparison='Nothing', scaled=True)).T + ).iloc[0].unstack().sort_values(by='mean', ascending=True) num_dalys = ( extract_results( @@ -222,6 +144,16 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No .sum() ) + num_dalys_averted = summarize( + pd.DataFrame( + find_difference_extra_relative_to_comparison(num_dalys, comparison='Nothing')).T + ).iloc[0].unstack().sort_values(by='mean', ascending=True) + + pc_dalys_averted = 100.0 * summarize( + pd.DataFrame( + find_difference_extra_relative_to_comparison(num_dalys, comparison='Nothing', scaled=True)).T + ).iloc[0].unstack().sort_values(by='mean', ascending=True) + total_num_death_by_agegrp_and_label = extract_results( results_folder, module="tlo.methods.demography", @@ -257,28 +189,7 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No .sort_index() ) - mean_num_hsi_by_short_treatment_id = summarize(counts_of_hsi_by_short_treatment_id, only_mean=True) - - for scenario_name, _counts in mean_num_hsi_by_short_treatment_id.T.iterrows(): - _counts_non_zero = _counts[_counts > 0] - - if len(_counts_non_zero): - fig, ax = plt.subplots() - name_of_plot = f"HSI Events Occurring, {scenario_name}, {target_period_label}" - squarify_neat( - sizes=_counts_non_zero.values, - label=_counts_non_zero.index, - colormap=get_color_short_treatment_id, - alpha=1, - pad=True, - ax=ax, - text_kwargs={"color": "black", "size": 8}, - ) - ax.set_axis_off() - ax.set_title(name_of_plot, {"size": 12, "color": "black"}) - fig.savefig(make_graph_file_name(name_of_plot.replace(" ", "_"))) - plt.close(fig) - + print("Extracting counts of appointments data...") counts_of_appts = ( extract_results( results_folder, @@ -293,15 +204,19 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No .sort_index() ) - return { + "total_population_by_year": total_population_by_year, "num_deaths": num_deaths, + "deaths_averted": num_deaths_averted, + "pc_deaths_averted": pc_deaths_averted, "num_dalys": num_dalys, + "dalys_averted": num_dalys_averted, + "pc_dalys_averted": pc_dalys_averted, + "input_costs": input_costs, "total_num_death_by_agegrp_and_label": total_num_death_by_agegrp_and_label, "total_num_dalys_by_agegrp_and_label": total_num_dalys_by_agegrp_and_label, "counts_of_hsi_by_short_treatment_id": counts_of_hsi_by_short_treatment_id, "counts_of_appts": counts_of_appts, - "age_grp_lookup": age_grp_lookup, } @@ -312,10 +227,8 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No args = parser.parse_args() out = args.output_folder if args.output_folder is not None else args.results_folder - apply(results_folder=args.results_folder, output_folder=out, resourcefilepath=Path("./resources")) - - plot_legends.apply(results_folder=None, output_folder=out, resourcefilepath=Path("./resources")) + results = apply(results_folder=args.results_folder, output_folder=out, resourcefilepath=Path("./resources")) + with open(args.output_folder / 'results.pkl', 'wb') as f: + pickle.dump(results, f) - with zipfile.ZipFile(out / f"images_{out.parts[-1]}.zip", mode="w") as archive: - for filename in sorted(glob.glob(str(out / "*.png"))): - archive.write(filename, os.path.basename(filename)) + print("Analysis complete! Results saved to results.pkl") diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py index ace7e46a5e..49bf511cec 100644 --- a/src/tlo/analysis/utils.py +++ b/src/tlo/analysis/utils.py @@ -312,6 +312,8 @@ def extract_results(results_folder: Path, custom_generate_series=None, do_scaling: bool = False, suspended_results_folder: Path = None, + draw_runs: Optional[List[Tuple[int, int]]] = None, + autodiscover: bool = False, ) -> pd.DataFrame: """Utility function to unpack results. @@ -355,33 +357,46 @@ def generate_series(dataframe: pd.DataFrame) -> pd.Series: else: return custom_generate_series(dataframe) - # get number of draws and numbers of runs - info = get_scenario_info(results_folder) + if draw_runs is not None: + selected_draw_runs = draw_runs + elif autodiscover: + # get number of draws and numbers of runs + info = get_scenario_info(results_folder, autodiscover) + selected_draw_runs = [ + (draw, run) + for draw in info['draws'] + for run in info['runs_by_draw'][draw] + ] + else: + # Legacy default behaviour: infer ranges from scenario info. + info = get_scenario_info(results_folder) + selected_draw_runs = [ + (draw, run) + for draw in range(info['number_of_draws']) + for run in range(info['runs_per_draw']) + ] # Collect results from each draw/run res = dict() - for draw in range(info['number_of_draws']): - for run in range(info['runs_per_draw']): - - draw_run = (draw, run) - - try: - df: pd.DataFrame = load_pickled_dataframes(results_folder, draw, run, module)[module][key] - output_from_eval: pd.Series = generate_series(df) - assert isinstance(output_from_eval, pd.Series), ( - 'Custom command does not generate a pd.Series' - ) - if do_scaling: - if suspended_results_folder is not None: - res[draw_run] = output_from_eval * get_multiplier(suspended_results_folder, 0, 0) - else: - res[draw_run] = output_from_eval * get_multiplier(results_folder, draw, run) + for draw, run in selected_draw_runs: + draw_run = (draw, run) + try: + df: pd.DataFrame = load_pickled_dataframes(results_folder, draw, run, module)[module][key] + output_from_eval: pd.Series = generate_series(df) + assert isinstance(output_from_eval, pd.Series), ( + 'Custom command does not generate a pd.Series' + ) + if do_scaling: + if suspended_results_folder is not None: + res[draw_run] = output_from_eval * get_multiplier(suspended_results_folder, 0, 0) else: - res[draw_run] = output_from_eval + res[draw_run] = output_from_eval * get_multiplier(results_folder, draw, run) + else: + res[draw_run] = output_from_eval - except KeyError: - # Some logs could not be found - probably because this run failed. - res[draw_run] = None + except KeyError: + # Some logs could not be found - probably because this run failed. + res[draw_run] = None # Use pd.concat to compile results (skips dict items where the values is None) _concat = pd.concat(res, axis=1) From 63bf7e38734ff20dde03c3c42434a338b521fb72 Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Tue, 10 Mar 2026 06:59:30 +0000 Subject: [PATCH 21/55] WIP; compute ICERs --- .../analysis_effect_of_treatment_ids.py | 36 ++++++++++++------- 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py index 8337703588..7351751ea2 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py @@ -17,7 +17,6 @@ plot_multiindex_dot_with_interval, ) from scripts.lcoa_inputs_from_tlo_analyses.results_processing_utils import ( - extract_deaths_total, get_counts_of_appts, get_counts_of_hsi_by_short_treatment_id, get_num_dalys_by_cause_label, @@ -53,7 +52,7 @@ summarize, ) -TARGET_PERIOD = (Date(2026, 1, 1), Date(2027, 1, 1)) +TARGET_PERIOD = (Date(2026, 1, 1), Date(2041, 1, 1)) PERIOD_LENGTH_YEARS_FOR_BAR_PLOTS = 5 suspended_folder = Path("outputs/s.bhatia@imperial.ac.uk/effect_of_each_treatment_id-2026-02-12T120859Z") results_folder = Path("outputs/s.bhatia@imperial.ac.uk/effect_of_each_treatment_id-2026-02-16T154500Z") @@ -115,21 +114,22 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No do_scaling=True, suspended_results_folder=suspended_folder, autodiscover=True, - ) - .pipe(set_param_names_as_column_index_level_0, param_names=param_names) - .sum() + ).pipe(set_param_names_as_column_index_level_0, param_names=param_names) ) + num_deaths_averted = summarize( pd.DataFrame( - find_difference_extra_relative_to_comparison(num_deaths, comparison='Nothing')).T, - "median" + find_difference_extra_relative_to_comparison(num_deaths.sum(), comparison='Nothing')).T ).iloc[0].unstack().sort_values(by='mean', ascending=True) + pc_deaths_averted = 100.0 * summarize( pd.DataFrame( - find_difference_extra_relative_to_comparison(num_deaths, comparison='Nothing', scaled=True)).T + find_difference_extra_relative_to_comparison(num_deaths.sum(), comparison='Nothing', scaled=True)).T ).iloc[0].unstack().sort_values(by='mean', ascending=True) + num_deaths = summarize(num_deaths) + num_dalys = ( extract_results( results_folder, @@ -141,19 +141,20 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No autodiscover=True, ) .pipe(set_param_names_as_column_index_level_0, param_names=param_names) - .sum() ) num_dalys_averted = summarize( pd.DataFrame( - find_difference_extra_relative_to_comparison(num_dalys, comparison='Nothing')).T + find_difference_extra_relative_to_comparison(num_dalys.sum(), comparison='Nothing')).T ).iloc[0].unstack().sort_values(by='mean', ascending=True) pc_dalys_averted = 100.0 * summarize( pd.DataFrame( - find_difference_extra_relative_to_comparison(num_dalys, comparison='Nothing', scaled=True)).T + find_difference_extra_relative_to_comparison(num_dalys.sum(), comparison='Nothing', scaled=True)).T ).iloc[0].unstack().sort_values(by='mean', ascending=True) + num_dalys = summarize(num_dalys) + total_num_death_by_agegrp_and_label = extract_results( results_folder, module="tlo.methods.demography", @@ -204,6 +205,17 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No .sort_index() ) + # Computing ICERs + print("Computing ICERs...") + total_input_cost = input_costs.groupby(['draw', 'run'])['cost'].sum() + incremental_scenario_cost = (pd.DataFrame( + find_difference_relative_to_comparison( + total_input_cost, + comparison='Nothing',) + ).T.iloc[0].unstack()).T + + incremental_scenario_cost_summarized = summarize_cost_data(incremental_scenario_cost, _metric='median') + return { "total_population_by_year": total_population_by_year, "num_deaths": num_deaths, @@ -228,7 +240,7 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No out = args.output_folder if args.output_folder is not None else args.results_folder results = apply(results_folder=args.results_folder, output_folder=out, resourcefilepath=Path("./resources")) - with open(args.output_folder / 'results.pkl', 'wb') as f: + with open(args.output_folder / 'fullresults.pkl', 'wb') as f: pickle.dump(results, f) print("Analysis complete! Results saved to results.pkl") From 65f63c6676c33c62b6a4ff1dd1471fa6604c95fc Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Wed, 11 Mar 2026 15:35:43 +0000 Subject: [PATCH 22/55] Additional figures for sanity check --- .../analysis_effect_of_treatment_ids.py | 79 ++++++- .../fig_utils.py | 203 ++++++++++++++++++ .../figures_effect_of_treatment_ids.py | 158 ++++++++++++++ .../results_processing_utils.py | 50 +++++ 4 files changed, 479 insertions(+), 11 deletions(-) create mode 100644 src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py index 7351751ea2..2c0be8753d 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py @@ -27,10 +27,14 @@ get_total_num_death_by_agegrp_and_label, get_total_population_by_year, make_get_num_dalys_by_cause_label_and_period, + make_get_num_deaths_by_cause_label_and_period, + make_get_counts_of_appts_by_period, set_param_names_as_column_index_level_0, target_period, find_difference_extra_relative_to_comparison, + find_difference_relative_to_comparison ) + from scripts.costing.cost_estimation import ( apply_discounting_to_cost_data, do_line_plot_of_cost, @@ -65,17 +69,41 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No """Produce standard plots describing effect of each TREATMENT_ID.""" _, age_grp_lookup = make_age_grp_lookup() + + param_names = get_parameter_names_from_scenario_file() + get_num_deaths_by_cause_label_and_period = make_get_num_deaths_by_cause_label_and_period( + PERIOD_LENGTH_YEARS_FOR_BAR_PLOTS, + TARGET_PERIOD, + ) + get_num_dalys_by_cause_label_and_period = make_get_num_dalys_by_cause_label_and_period( + PERIOD_LENGTH_YEARS_FOR_BAR_PLOTS, + TARGET_PERIOD, + ) + # Get yearly number of appointments; + get_num_appts_by_period = make_get_counts_of_appts_by_period( + period_length_years=1, + target_period_tuple=TARGET_PERIOD, + ) # Costs calculation print("Calculating costs...") + # For now, choose specific draws + # draw_number:Treament ID + # 0 : Nothing + # 10: BreastCancer_Investigation_* + # 15: CardioMetabolicDisorders_Prevention_WeightLoss_* + # 27: Contraception_Routine_* + draws_to_run = [0, 10, 15, 27, 31, 39, 65] + selected_draws = [9, 14, 26, 30, 38, 64] + discount_rate_cost = 0.03 input_costs = estimate_input_cost_of_scenarios( results_folder, resourcefilepath, suspended_results_folder=suspended_folder, - _draws=[0], - _runs=[0], + _draws=draws_to_run, + _runs=[0, 1, 2, 3, 4], cost_only_used_staff=True, _discount_rate=discount_rate_cost, _metric="median",) @@ -110,7 +138,7 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No results_folder, module="tlo.methods.demography", key="death", - custom_generate_series=lambda _df: get_num_deaths_by_cause_label(_df, TARGET_PERIOD), + custom_generate_series=get_num_deaths_by_cause_label_and_period, do_scaling=True, suspended_results_folder=suspended_folder, autodiscover=True, @@ -135,18 +163,17 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No results_folder, module="tlo.methods.healthburden", key="dalys_stacked_by_age_and_time", - custom_generate_series=lambda _df: get_num_dalys_by_cause_label(_df, TARGET_PERIOD), + custom_generate_series=get_num_dalys_by_cause_label_and_period, do_scaling=True, suspended_results_folder=suspended_folder, autodiscover=True, - ) - .pipe(set_param_names_as_column_index_level_0, param_names=param_names) + ).pipe(set_param_names_as_column_index_level_0, param_names=param_names) ) - num_dalys_averted = summarize( + num_dalys_averted = ( pd.DataFrame( - find_difference_extra_relative_to_comparison(num_dalys.sum(), comparison='Nothing')).T - ).iloc[0].unstack().sort_values(by='mean', ascending=True) + find_difference_extra_relative_to_comparison(num_dalys.sum(), comparison='Nothing') + ).T.iloc[0].unstack(level='run')) pc_dalys_averted = 100.0 * summarize( pd.DataFrame( @@ -154,6 +181,7 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No ).iloc[0].unstack().sort_values(by='mean', ascending=True) num_dalys = summarize(num_dalys) + num_dalys_averted_summarized = summarize_cost_data(-1.0 * num_dalys_averted, _metric='median') total_num_death_by_agegrp_and_label = extract_results( results_folder, @@ -204,6 +232,22 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No .fillna(0.0) .sort_index() ) + counts_of_appts = compute_summary_statistics(counts_of_appts, 'median') + + counts_of_appts_by_period = ( + extract_results( + results_folder, + module="tlo.methods.healthsystem.summary", + key="HSI_Event", + custom_generate_series=lambda _df: get_num_appts_by_period(_df), + do_scaling=True, + suspended_results_folder=suspended_folder, + ) + .pipe(set_param_names_as_column_index_level_0, param_names=param_names) + .fillna(0.0) + .sort_index() + ) + counts_of_appts_by_period = compute_summary_statistics(counts_of_appts_by_period, 'median') # Computing ICERs print("Computing ICERs...") @@ -211,10 +255,20 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No incremental_scenario_cost = (pd.DataFrame( find_difference_relative_to_comparison( total_input_cost, - comparison='Nothing',) + comparison=0,) ).T.iloc[0].unstack()).T incremental_scenario_cost_summarized = summarize_cost_data(incremental_scenario_cost, _metric='median') + icers_summarized = (incremental_scenario_cost_summarized.values / + num_dalys_averted_summarized.iloc[selected_draws].values) + + icers_summarized = ( + pd.DataFrame( + icers_summarized, + index=num_dalys_averted_summarized.index[selected_draws], + columns=num_dalys_averted_summarized.columns + ) + ) return { "total_population_by_year": total_population_by_year, @@ -225,10 +279,13 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No "dalys_averted": num_dalys_averted, "pc_dalys_averted": pc_dalys_averted, "input_costs": input_costs, + "incremental_scenario_cost_summarized": incremental_scenario_cost_summarized, + "icers_summarized": icers_summarized, "total_num_death_by_agegrp_and_label": total_num_death_by_agegrp_and_label, "total_num_dalys_by_agegrp_and_label": total_num_dalys_by_agegrp_and_label, "counts_of_hsi_by_short_treatment_id": counts_of_hsi_by_short_treatment_id, "counts_of_appts": counts_of_appts, + "counts_of_appts_by_period": counts_of_appts_by_period, } @@ -243,4 +300,4 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No with open(args.output_folder / 'fullresults.pkl', 'wb') as f: pickle.dump(results, f) - print("Analysis complete! Results saved to results.pkl") + print("Analysis complete! Results saved to fullresults.pkl") diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py b/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py index fadb5fbc0a..980f095e74 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py @@ -5,6 +5,7 @@ import numpy as np import pandas as pd +from matplotlib import pyplot as plt from matplotlib.lines import Line2D from matplotlib.patches import Patch @@ -16,6 +17,25 @@ ) +APPOINTMENT_TYPE_PALETTE = list(plt.get_cmap("tab20").colors) + list(plt.get_cmap("Set2").colors) +APPOINTMENT_TYPE_FIXED_COLORS = {"AccidentsandEmerg": "black"} + + +def get_color_by_appointment_type(appointment_types) -> dict: + """Return a deterministic color map for appointment types.""" + non_fixed_appointment_types = sorted( + appt for appt in appointment_types if appt not in APPOINTMENT_TYPE_FIXED_COLORS + ) + color_by_appointment_type = { + appt: APPOINTMENT_TYPE_PALETTE[i % len(APPOINTMENT_TYPE_PALETTE)] + for i, appt in enumerate(non_fixed_appointment_types) + } + color_by_appointment_type.update( + {appt: color for appt, color in APPOINTMENT_TYPE_FIXED_COLORS.items() if appt in appointment_types} + ) + return color_by_appointment_type + + def do_bar_plot_with_ci( _df: pd.DataFrame, _param, @@ -155,3 +175,186 @@ def do_label_barh_plot(_df: pd.DataFrame, _ax): verticalalignment="center", size=7, ) + + +def plot_appointment_counts_heatmap(_df: pd.DataFrame, plot_stat: str = "central", cmap: str = "viridis", colorbar_label: str = "central value"): + """Plot a heatmap of central values for each draw across the dataframe index.""" + if not isinstance(_df.columns, pd.MultiIndex) or _df.columns.nlevels != 2: + raise ValueError("_df columns must be a 2-level MultiIndex with levels for draw and stat.") + + stat_level_name = "stat" if "stat" in _df.columns.names else _df.columns.names[1] + stat_level_values = _df.columns.get_level_values(stat_level_name) + if "central" not in stat_level_values: + raise ValueError("The column MultiIndex does not contain a 'central' entry in the stat level.") + + _plot = _df.xs(plot_stat, axis=1, level=stat_level_name) + if _plot.empty: + raise ValueError("No plottable data remain after selecting the 'central' columns.") + + _plot = _plot.sort_index(axis=1) + + fig_width = max(12, min(0.3 * len(_plot.columns), 36)) + fig_height = max(6, min(0.35 * len(_plot.index), 18)) + fig, ax = plt.subplots(figsize=(fig_width, fig_height)) + + image = ax.imshow(_plot.to_numpy(), aspect="auto", cmap=cmap) + + ax.set_xticks(np.arange(len(_plot.columns))) + ax.set_xticklabels(_plot.columns, rotation=90, ha="right", fontsize=8) + ax.set_yticks(np.arange(len(_plot.index))) + ax.set_yticklabels([str(label) for label in _plot.index], fontsize=8) + ax.set_xlabel(_df.columns.names[0] if _df.columns.names[0] is not None else "draw") + ax.set_ylabel(_df.index.name if _df.index.name is not None else "category") + + colorbar = fig.colorbar(image, ax=ax) + colorbar.set_label(colorbar_label) + + fig.tight_layout() + return fig, ax + + +def plot_appointment_counts_stacked_bar(_df: pd.DataFrame, plot_stat: str = "central"): + """Plot horizontal stacked bars of appointment counts by draw for a selected summary statistic.""" + if not isinstance(_df.columns, pd.MultiIndex) or _df.columns.nlevels != 2: + raise ValueError("_df columns must be a 2-level MultiIndex with levels for draw and stat.") + + stat_level_name = "stat" if "stat" in _df.columns.names else _df.columns.names[1] + stat_level_values = _df.columns.get_level_values(stat_level_name) + if plot_stat not in stat_level_values: + raise ValueError(f"The column MultiIndex does not contain '{plot_stat}' in the stat level.") + + _plot = _df.xs(plot_stat, axis=1, level=stat_level_name).T + if _plot.empty: + raise ValueError(f"No plottable data remain after selecting the '{plot_stat}' columns.") + + if _plot.isna().any().any(): + warnings.warn( + f"Missing values detected after selecting '{plot_stat}'. Bars will omit missing segments.", + stacklevel=2, + ) + + totals = _plot.sum(axis=1, skipna=True) + _plot = _plot.loc[totals.sort_values(ascending=False).index] + if not (_plot.gt(0).any(axis=1)).any(): + raise ValueError(f"No positive values remain after selecting the '{plot_stat}' columns.") + + fig_width = max(12, min(0.22 * len(_plot.columns) + 12, 30)) + fig_height = max(6, min(0.35 * len(_plot.index), 24)) + fig, ax = plt.subplots(figsize=(fig_width, fig_height)) + + color_by_appointment_type = get_color_by_appointment_type(_plot.columns) + left = np.zeros(len(_plot.index), dtype=float) + y = np.arange(len(_plot.index)) + + for idx, appointment_type in enumerate(_plot.columns): + values = _plot[appointment_type] + mask = values.gt(0) & values.notna() + if not mask.any(): + continue + ax.barh( + y[mask.to_numpy()], + values.loc[mask].to_numpy(), + left=left[mask.to_numpy()], + color=color_by_appointment_type[appointment_type], + label=str(appointment_type), + ) + left[mask.to_numpy()] += values.loc[mask].to_numpy() + + ax.set_yticks(y) + ax.set_yticklabels([str(label) for label in _plot.index], fontsize=12) + #ax.set_xlabel("count") + #ax.set_ylabel(_df.columns.names[0] if _df.columns.names[0] is not None else "draw") + ax.invert_yaxis() + ax.legend( + title="Appointment type", + loc="lower right", + fontsize=19, + title_fontsize=20, + frameon=True, + ) + + fig.tight_layout() + return fig, ax + + +def plot_appointment_counts_by_period_for_draw( + _df: pd.DataFrame, + draw: str, + period_labels: list[str], +): + """Plot central values with lower/upper intervals across period chunks for one draw.""" + if not isinstance(_df.index, pd.MultiIndex) or _df.index.nlevels != 2: + raise ValueError("_df index must be a 2-level MultiIndex with levels for appt_type and period.") + if not isinstance(_df.columns, pd.MultiIndex) or _df.columns.nlevels != 2: + raise ValueError("_df columns must be a 2-level MultiIndex with levels for draw and stat.") + if draw not in _df.columns.get_level_values(0): + available_draws = sorted(set(_df.columns.get_level_values(0))) + raise ValueError(f"Draw '{draw}' not found. Available draws: {available_draws}") + + _plot = _df[draw].reindex( + pd.MultiIndex.from_product( + [ + _df.index.get_level_values("appt_type").unique(), + period_labels, + ], + names=["appt_type", "period"], + ), + fill_value=0.0, + ) + _plot = _plot.loc[:, ["lower", "central", "upper"]] + _plot = _plot.unstack("period") + + central = _plot["central"] + lower = _plot["lower"] + upper = _plot["upper"] + non_zero_mask = central.gt(0).any(axis=1) + central = central.loc[non_zero_mask, period_labels] + lower = lower.loc[non_zero_mask, period_labels] + upper = upper.loc[non_zero_mask, period_labels] + + if central.empty: + raise ValueError(f"No non-zero appointment types remain for draw '{draw}'.") + + color_by_appointment_type = get_color_by_appointment_type(central.index) + x = np.arange(len(period_labels)) + fig_width = max(10, min(1.2 * len(period_labels) + 4, 20)) + fig_height = max(6, min(0.28 * len(central.index) + 6, 18)) + fig, ax = plt.subplots(figsize=(fig_width, fig_height)) + + for appointment_type in central.index: + central_values = central.loc[appointment_type].to_numpy() + lower_values = lower.loc[appointment_type].to_numpy() + upper_values = upper.loc[appointment_type].to_numpy() + yerr = np.vstack([central_values - lower_values, upper_values - central_values]) + ax.errorbar( + x, + central_values, + yerr=yerr, + fmt="o", + color=color_by_appointment_type[appointment_type], + ecolor=color_by_appointment_type[appointment_type], + elinewidth=1.2, + capsize=2, + markersize=4, + label=str(appointment_type), + ) + + ax.set_xticks(x) + ax.set_xticklabels(period_labels, rotation=45, ha="right") + ax.set_xlabel("period") + ax.set_ylabel("appointment count") + ax.set_title(f"Appointment counts by period: {draw}") + ax.grid(axis="y") + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) + ax.legend( + title="Appointment type", + loc="center left", + bbox_to_anchor=(1.02, 0.5), + fontsize=8, + title_fontsize=9, + frameon=True, + ) + + fig.tight_layout() + return fig, ax diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py new file mode 100644 index 0000000000..057f06ad82 --- /dev/null +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py @@ -0,0 +1,158 @@ +import argparse +import glob +import os +import zipfile +from pathlib import Path +import pickle +import numpy as np +import matplotlib.pyplot as plt +from scripts.calibration_analyses.analysis_scripts import plot_legends +from scripts.lcoa_inputs_from_tlo_analyses.results_processing_utils import ( + extract_deaths_total, + format_scenario_name, + get_counts_of_appts, + get_counts_of_hsi_by_short_treatment_id, + get_num_dalys_by_cause_label, + get_num_deaths_by_cause_label, + get_parameter_names_from_scenario_file, + get_periods_within_target_period, + get_total_num_dalys_by_agegrp_and_label, + get_total_num_death_by_agegrp_and_label, + get_total_population_by_year, + make_get_num_dalys_by_cause_label_and_period, + set_param_names_as_column_index_level_0, + target_period, +) +from scripts.lcoa_inputs_from_tlo_analyses.fig_utils import ( + do_bar_plot_with_ci, + plot_multiindex_dot_with_interval, + plot_appointment_counts_by_period_for_draw, + plot_appointment_counts_heatmap, +) +from tlo.analysis.utils import ( + compute_summary_statistics, +) +from tlo import Date + +TARGET_PERIOD = (Date(2026, 1, 1), Date(2041, 1, 1)) +PERIOD_LENGTH_YEARS_FOR_BAR_PLOTS = 5 + +def apply(results_file: Path, output_folder: Path, resourcefilepath: Path = None): + """Produce standard plots describing effect of each TREATMENT_ID.""" + + param_names = get_parameter_names_from_scenario_file() + make_graph_file_name = lambda stub: output_folder / f"{stub.replace('*', '_star_')}.png" # noqa: E731 + period_labels_for_bar_plots = [ + label + for label, _ in get_periods_within_target_period( + period_length_years=PERIOD_LENGTH_YEARS_FOR_BAR_PLOTS, + target_period_tuple=TARGET_PERIOD, + ) + ] + appointment_period_labels = [ + label + for label, _ in get_periods_within_target_period( + period_length_years=1, + target_period_tuple=TARGET_PERIOD, + ) + ] + + target_period_label = target_period(TARGET_PERIOD) + + with open(results_file, "rb") as f: + results = pickle.load(f) + + # Plot number of appointments for each draw + counts_of_appts = results['counts_of_appts'] + fig, ax = plot_appointment_counts_heatmap(counts_of_appts) + fig.savefig(make_graph_file_name("appointment_counts_heatmap")) + plt.close(fig) + + counts_of_appts_by_period = results["counts_of_appts_by_period"] + for param in param_names: + draw = format_scenario_name(param) + name_of_plot = f"Yearly appointment counts for {draw}" + fig, ax = plot_appointment_counts_by_period_for_draw( + counts_of_appts_by_period, + draw=draw, + period_labels=appointment_period_labels, + ) + ax.set_title(name_of_plot) + fig.savefig(make_graph_file_name(name_of_plot)) + plt.close(fig) + + # Plot population growth + total_population_by_year = results['total_population_by_year'] + for year in [2026, 2031, 2036, 2040]: + fig, ax = plt.subplots() + name_of_plot = f"Population size in {year}" + plot_multiindex_dot_with_interval(total_population_by_year / 1e6, year, ax, 'median') + ax.set_title(name_of_plot) + ax.set_xlabel("Treatment included") + ax.set_ylabel("Population size (millions)") + ax.grid(axis="y") + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) + fig.tight_layout() + fig.savefig(make_graph_file_name(name_of_plot.replace(" ", "_"))) + plt.close(fig) + + # Plot number of deaths and DALYS by cause for each parameter, with confidence intervals, for the target period + num_deaths_by_cause_label = results['num_deaths'] + deaths_averted = results['deaths_averted'] + pc_deaths_averted = results['pc_deaths_averted'] + + num_dalys_by_cause_label = results['num_dalys'] + dalys_averted = results['dalys_averted'] + pc_dalys_averted = results['pc_dalys_averted'] + + for param in param_names: + param_formatted = format_scenario_name(param) + print(f"Plotting for {param_formatted}...") + fig, ax = plt.subplots() + name_of_plot = f"Deaths With {param_formatted}, {target_period_label}" + do_bar_plot_with_ci(num_deaths_by_cause_label / 1e3, param_formatted, ax, period_labels_for_bar_plots, target_period_label) + legend = ax.get_legend() + if legend is not None: + legend.remove() + ax.set_title(name_of_plot) + ax.set_xlabel("Cause of Death") + ax.set_ylabel("Number of Deaths (/1000)") + #ax.set_ylim(0, 500) + ax.grid(axis="y") + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) + fig.tight_layout() + fig.savefig(make_graph_file_name(name_of_plot.replace(" ", "_"))) + plt.close(fig) + + fig, ax = plt.subplots() + name_of_plot = f"DALYS With {param_formatted}, {target_period_label}" + do_bar_plot_with_ci(num_dalys_by_cause_label / 1e6, param_formatted, ax, period_labels_for_bar_plots, target_period_label) + legend = ax.get_legend() + if legend is not None: + legend.remove() + ax.set_title(name_of_plot) + ax.set_xlabel("Cause of Disability/Death") + ax.set_ylabel("Number of DALYS (/millions)") + #ax.set_ylim(0, 30) + ##ax.set_yticks(np.arange(0, 35, 5)) + ax.grid(axis="y") + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) + fig.tight_layout() + fig.savefig(make_graph_file_name(name_of_plot.replace(" ", "_"))) + plt.close(fig) + + # Plot cost of each scenario, with confidence intervals, for the target period + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("results_file", type=Path) + parser.add_argument("output_folder", type=Path, nargs="?", default=None) + args = parser.parse_args() + + apply(results_file=args.results_file, output_folder=args.output_folder, resourcefilepath=Path("./resources")) + + plot_legends.apply(results_folder=None, output_folder=args.output_folder, resourcefilepath=Path("./resources")) diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/results_processing_utils.py b/src/scripts/lcoa_inputs_from_tlo_analyses/results_processing_utils.py index afcb96f5fe..c3e46fa402 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/results_processing_utils.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/results_processing_utils.py @@ -14,6 +14,20 @@ TARGET_PERIOD = (Date(2026, 1, 1), Date(2041, 1, 1)) +def find_difference_relative_to_comparison(_ser: pd.Series, + comparison: str, + scaled: bool = False, + drop_comparison: bool = True, + ): + """Find the difference in the values in a pd.Series with a multi-index, between the draws (level 0) + within the runs (level 1), relative to where draw = `comparison`. + The comparison is `X - COMPARISON`.""" + return _ser \ + .unstack(level=0) \ + .apply(lambda x: (x - x[comparison]) / (x[comparison] if scaled else 1.0), axis=1) \ + .drop(columns=([comparison] if drop_comparison else [])) \ + .stack() + def get_total_population_by_year(_df): years_needed = [i.year for i in TARGET_PERIOD] _df["year"] = pd.to_datetime(_df["date"]).dt.year @@ -82,6 +96,7 @@ def find_difference_extra_relative_to_comparison( .apply(lambda x: (x - x[comparison]) / (x[comparison] if scaled else 1.0), axis=0) .drop(index=([comparison] if drop_comparison else [])) .stack() + ) @@ -261,3 +276,38 @@ def get_counts_of_appts(_df: pd.DataFrame, target_period_tuple: tuple[Date, Date .sum() .astype(int) ) + + +def make_get_counts_of_appts_by_period( + period_length_years: int, + target_period_tuple: tuple[Date, Date] = TARGET_PERIOD, +): + """Create helper that summarizes appointment counts by period chunks + overall.""" + periods = get_periods_within_target_period( + period_length_years=period_length_years, + target_period_tuple=target_period_tuple, + ) + period_lookup = { + year: period_label + for period_label, (start_year, end_year) in periods + for year in range(start_year, end_year + 1) + } + target_period_label = target_period(target_period_tuple) + + def _get_counts_of_appts_by_period(_df: pd.DataFrame) -> pd.Series: + _df_in_target = _df.loc[pd.to_datetime(_df["date"]).between(*target_period_tuple)].copy() + _df_in_target["year"] = pd.to_datetime(_df_in_target["date"]).dt.year + _df_in_target["period"] = _df_in_target["year"].map(period_lookup) + + appts = _df_in_target["Number_By_Appt_Type_Code"].apply(pd.Series) + chunked = appts.groupby(_df_in_target["period"]).sum().T.stack() + chunked.index = chunked.index.set_names(["appt_type", "period"]) + + overall = appts.sum() + overall.index = pd.MultiIndex.from_arrays( + [overall.index, np.repeat(target_period_label, len(overall.index))], + names=["appt_type", "period"], + ) + return pd.concat([chunked, overall]).astype(int).sort_index() + + return _get_counts_of_appts_by_period From 7d984bda643504f47b3cd75c639d6c6e87c01c71 Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Sun, 15 Mar 2026 08:00:58 +0000 Subject: [PATCH 23/55] More figs --- .../analysis_effect_of_treatment_ids.py | 36 +++++ .../fig_utils.py | 143 ++++++++++++++++++ .../figures_effect_of_treatment_ids.py | 16 ++ .../results_processing_utils.py | 35 +++++ 4 files changed, 230 insertions(+) diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py index 2c0be8753d..2c62c1962c 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py @@ -29,6 +29,7 @@ make_get_num_dalys_by_cause_label_and_period, make_get_num_deaths_by_cause_label_and_period, make_get_counts_of_appts_by_period, + make_get_counts_of_hsis_by_period, set_param_names_as_column_index_level_0, target_period, find_difference_extra_relative_to_comparison, @@ -63,6 +64,12 @@ # SCALING_FACTOR retrieved from the suspended run in # outputs/s.bhatia@imperial.ac.uk/effect_of_each_treatment_id-2026-02-12T120859Z # SCALING_FACTOR = 58.158436 +EXCLUDED_HSIs = [ + "FirstAttendance_Emergency", + "FirstAttendance_NonEmergency", + "FirstAttendance_SpuriousEmergencyCare", +] + def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = None): @@ -85,6 +92,10 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No period_length_years=1, target_period_tuple=TARGET_PERIOD, ) + get_num_hsi_by_period = make_get_counts_of_hsis_by_period( + period_length_years=1, + target_period_tuple=TARGET_PERIOD, + ) # Costs calculation print("Calculating costs...") @@ -218,6 +229,31 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No .sort_index() ) + counts_of_hsi_by_short_treatment_id = ( + compute_summary_statistics(counts_of_hsi_by_short_treatment_id, 'median') + ) + counts_of_hsi_by_short_treatment_id = ( + counts_of_hsi_by_short_treatment_id.drop(columns=EXCLUDED_HSIs, errors='ignore') + ) + + counts_of_hsi_by_period = ( + extract_results( + results_folder, + module="tlo.methods.healthsystem.summary", + key="HSI_Event", + custom_generate_series=lambda _df: get_num_hsi_by_period(_df, TARGET_PERIOD), + do_scaling=True, + suspended_results_folder=suspended_folder, + autodiscover=True, + ) + .pipe(set_param_names_as_column_index_level_0, param_names=param_names) + .fillna(0.0) + .sort_index() + ) + counts_of_hsi_by_period = ( + compute_summary_statistics(counts_of_hsi_by_period, 'median') + ) + print("Extracting counts of appointments data...") counts_of_appts = ( extract_results( diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py b/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py index 980f095e74..cc1a507e2b 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py @@ -36,6 +36,13 @@ def get_color_by_appointment_type(appointment_types) -> dict: return color_by_appointment_type +def _get_color_for_treatment_id_prefix(treatment_id: str) -> str: + """Return color for a treatment id based on its first underscore-delimited token.""" + prefix = str(treatment_id).split("_")[0] + color = get_color_short_treatment_id(prefix) + return "grey" if pd.isna(color) else color + + def do_bar_plot_with_ci( _df: pd.DataFrame, _param, @@ -277,6 +284,59 @@ def plot_appointment_counts_stacked_bar(_df: pd.DataFrame, plot_stat: str = "cen return fig, ax +def plot_hsi_counts_stacked_bar(_df: pd.DataFrame, plot_stat: str = "central"): + """Plot horizontal stacked bars of HSI counts by draw for a selected summary statistic.""" + if not isinstance(_df.columns, pd.MultiIndex) or _df.columns.nlevels != 2: + raise ValueError("_df columns must be a 2-level MultiIndex with levels for draw and stat.") + + stat_level_name = "stat" if "stat" in _df.columns.names else _df.columns.names[1] + stat_level_values = _df.columns.get_level_values(stat_level_name) + if plot_stat not in stat_level_values: + raise ValueError(f"The column MultiIndex does not contain '{plot_stat}' in the stat level.") + + _plot = _df.xs(plot_stat, axis=1, level=stat_level_name).T + if _plot.empty: + raise ValueError(f"No plottable data remain after selecting the '{plot_stat}' columns.") + + if _plot.isna().any().any(): + warnings.warn( + f"Missing values detected after selecting '{plot_stat}'. Bars will omit missing segments.", + stacklevel=2, + ) + + totals = _plot.sum(axis=1, skipna=True) + _plot = _plot.loc[totals.sort_values(ascending=False).index] + if not (_plot.gt(0).any(axis=1)).any(): + raise ValueError(f"No positive values remain after selecting the '{plot_stat}' columns.") + + fig_width = max(12, min(0.22 * len(_plot.columns) + 12, 30)) + fig_height = max(6, min(0.35 * len(_plot.index), 24)) + fig, ax = plt.subplots(figsize=(fig_width, fig_height)) + + left = np.zeros(len(_plot.index), dtype=float) + y = np.arange(len(_plot.index)) + + for treatment_id in _plot.columns: + values = _plot[treatment_id] + mask = values.gt(0) & values.notna() + if not mask.any(): + continue + ax.barh( + y[mask.to_numpy()], + values.loc[mask].to_numpy(), + left=left[mask.to_numpy()], + color=_get_color_for_treatment_id_prefix(treatment_id), + label=str(treatment_id), + ) + left[mask.to_numpy()] += values.loc[mask].to_numpy() + + ax.set_yticks(y) + ax.set_yticklabels([str(label) for label in _plot.index], fontsize=12) + ax.invert_yaxis() + fig.tight_layout() + return fig, ax + + def plot_appointment_counts_by_period_for_draw( _df: pd.DataFrame, draw: str, @@ -358,3 +418,86 @@ def plot_appointment_counts_by_period_for_draw( fig.tight_layout() return fig, ax + + +def plot_hsi_counts_by_period_for_draw( + _df: pd.DataFrame, + draw: str, + period_labels: list[str], +): + """Plot central values with lower/upper intervals across period chunks for one draw.""" + if not isinstance(_df.index, pd.MultiIndex) or _df.index.nlevels != 2: + raise ValueError("_df index must be a 2-level MultiIndex with levels for short_treatment_id and period.") + if not isinstance(_df.columns, pd.MultiIndex) or _df.columns.nlevels != 2: + raise ValueError("_df columns must be a 2-level MultiIndex with levels for draw and stat.") + if draw not in _df.columns.get_level_values(0): + available_draws = sorted(set(_df.columns.get_level_values(0))) + raise ValueError(f"Draw '{draw}' not found. Available draws: {available_draws}") + + _plot = _df[draw].reindex( + pd.MultiIndex.from_product( + [ + _df.index.get_level_values(0).unique(), + period_labels, + ], + names=["short_treatment_id", "period"], + ), + fill_value=0.0, + ) + _plot = _plot.loc[:, ["lower", "central", "upper"]] + _plot = _plot.unstack("period") + + central = _plot["central"] + lower = _plot["lower"] + upper = _plot["upper"] + non_zero_mask = central.gt(0).any(axis=1) + central = central.loc[non_zero_mask, period_labels] + lower = lower.loc[non_zero_mask, period_labels] + upper = upper.loc[non_zero_mask, period_labels] + + if central.empty: + raise ValueError(f"No non-zero treatment ids remain for draw '{draw}'.") + + x = np.arange(len(period_labels)) + fig_width = max(10, min(1.2 * len(period_labels) + 4, 20)) + fig_height = max(6, min(0.28 * len(central.index) + 6, 18)) + fig, ax = plt.subplots(figsize=(fig_width, fig_height)) + + for treatment_id in central.index: + central_values = central.loc[treatment_id].to_numpy() + lower_values = lower.loc[treatment_id].to_numpy() + upper_values = upper.loc[treatment_id].to_numpy() + yerr = np.vstack([central_values - lower_values, upper_values - central_values]) + color = _get_color_for_treatment_id_prefix(treatment_id) + ax.errorbar( + x, + central_values, + yerr=yerr, + fmt="o", + color=color, + ecolor=color, + elinewidth=1.2, + capsize=2, + markersize=4, + label=str(treatment_id), + ) + + ax.set_xticks(x) + ax.set_xticklabels(period_labels, rotation=45, ha="right") + ax.set_xlabel("period") + ax.set_ylabel("HSI count") + ax.set_title(f"HSI counts by period: {draw}") + ax.grid(axis="y") + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) + ax.legend( + title="Treatment ID", + loc="center left", + bbox_to_anchor=(1.02, 0.5), + fontsize=8, + title_fontsize=9, + frameon=True, + ) + + fig.tight_layout() + return fig, ax diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py index 057f06ad82..795931de89 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py @@ -28,6 +28,7 @@ plot_multiindex_dot_with_interval, plot_appointment_counts_by_period_for_draw, plot_appointment_counts_heatmap, + plot_hsi_counts_stacked_bar ) from tlo.analysis.utils import ( compute_summary_statistics, @@ -81,6 +82,21 @@ def apply(results_file: Path, output_folder: Path, resourcefilepath: Path = None fig.savefig(make_graph_file_name(name_of_plot)) plt.close(fig) + + # Plot number of HSIs for each draw + counts_of_hsi_by_period = results["counts_of_hsi_by_period"] + for param in param_names: + draw = format_scenario_name(param) + name_of_plot = f"Yearly HSI counts for {draw}" + fig, ax = plot_hsi_counts_by_period_for_draw( + counts_of_hsi_by_period, + draw=draw, + period_labels=appointment_period_labels, + ) + ax.set_title(name_of_plot) + fig.savefig(make_graph_file_name(name_of_plot)) + plt.close(fig) + # Plot population growth total_population_by_year = results['total_population_by_year'] for year in [2026, 2031, 2036, 2040]: diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/results_processing_utils.py b/src/scripts/lcoa_inputs_from_tlo_analyses/results_processing_utils.py index c3e46fa402..8762e2992f 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/results_processing_utils.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/results_processing_utils.py @@ -311,3 +311,38 @@ def _get_counts_of_appts_by_period(_df: pd.DataFrame) -> pd.Series: return pd.concat([chunked, overall]).astype(int).sort_index() return _get_counts_of_appts_by_period + + +def make_get_counts_of_hsis_by_period( + period_length_years: int, + target_period_tuple: tuple[Date, Date] = TARGET_PERIOD, +): + """Create helper that summarizes appointment counts by period chunks + overall.""" + periods = get_periods_within_target_period( + period_length_years=period_length_years, + target_period_tuple=target_period_tuple, + ) + period_lookup = { + year: period_label + for period_label, (start_year, end_year) in periods + for year in range(start_year, end_year + 1) + } + target_period_label = target_period(target_period_tuple) + + def _get_counts_of_hsis_by_period(_df: pd.DataFrame) -> pd.Series: + _df_in_target = _df.loc[pd.to_datetime(_df["date"]).between(*target_period_tuple)].copy() + _df_in_target["year"] = pd.to_datetime(_df_in_target["date"]).dt.year + _df_in_target["period"] = _df_in_target["year"].map(period_lookup) + + hsis = _df_in_target["TREATMENT_ID"].apply(pd.Series) + chunked = hsis.groupby(_df_in_target["period"]).sum().T.stack() + chunked.index = chunked.index.set_names(["appt_type", "period"]) + + overall = hsis.sum() + overall.index = pd.MultiIndex.from_arrays( + [overall.index, np.repeat(target_period_label, len(overall.index))], + names=["appt_type", "period"], + ) + return pd.concat([chunked, overall]).astype(int).sort_index() + + return _get_counts_of_hsis_by_period From 5bf0196243f7d05575ac81008b713fb76f21201b Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Mon, 16 Mar 2026 12:51:59 +0000 Subject: [PATCH 24/55] Additional figures --- .../analysis_effect_of_treatment_ids.py | 6 +- .../fig_utils.py | 85 +++++++++++++++++++ .../figures_effect_of_treatment_ids.py | 11 +++ 3 files changed, 97 insertions(+), 5 deletions(-) diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py index 2c62c1962c..3e39b4afe0 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py @@ -70,14 +70,10 @@ "FirstAttendance_SpuriousEmergencyCare", ] - - def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = None): """Produce standard plots describing effect of each TREATMENT_ID.""" _, age_grp_lookup = make_age_grp_lookup() - - param_names = get_parameter_names_from_scenario_file() get_num_deaths_by_cause_label_and_period = make_get_num_deaths_by_cause_label_and_period( PERIOD_LENGTH_YEARS_FOR_BAR_PLOTS, @@ -241,7 +237,7 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No results_folder, module="tlo.methods.healthsystem.summary", key="HSI_Event", - custom_generate_series=lambda _df: get_num_hsi_by_period(_df, TARGET_PERIOD), + custom_generate_series=lambda _df: get_num_hsi_by_period(_df), do_scaling=True, suspended_results_folder=suspended_folder, autodiscover=True, diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py b/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py index cc1a507e2b..b76c69240d 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py @@ -13,7 +13,9 @@ CAUSE_OF_DEATH_OR_DALY_LABEL_TO_COLOR_MAP, get_color_cause_of_death_or_daly_label, get_color_short_treatment_id, + make_calendar_period_type, order_of_cause_of_death_or_daly_label, + order_of_short_treatment_ids, ) @@ -43,6 +45,89 @@ def _get_color_for_treatment_id_prefix(treatment_id: str) -> str: return "grey" if pd.isna(color) else color +def _get_ordered_short_treatment_ids(treatment_ids: pd.Index) -> list[str]: + """Return treatment ids with recognized short ids first in standard order.""" + treatment_ids = pd.Index(treatment_ids).unique() + recognized = [treatment_id for treatment_id in treatment_ids if not pd.isna(get_color_short_treatment_id(treatment_id))] + unrecognized = sorted(str(treatment_id) for treatment_id in treatment_ids if pd.isna(get_color_short_treatment_id(treatment_id))) + recognized = sorted(recognized, key=order_of_short_treatment_ids) + return recognized + unrecognized + + +def plot_deaths_by_period_for_cause( + _df: pd.DataFrame, + cause_label: str, + plot_stat: str = "central", +): + """Plot deaths over time for a single cause, with one line per short treatment id.""" + if not isinstance(_df.index, pd.MultiIndex) or _df.index.nlevels != 2: + raise ValueError("_df index must be a 2-level MultiIndex with levels for label and period.") + if not isinstance(_df.columns, pd.MultiIndex) or _df.columns.nlevels != 2: + raise ValueError("_df columns must be a 2-level MultiIndex with levels for treatment id and stat.") + + label_level_name = "label" if "label" in _df.index.names else _df.index.names[0] + period_level_name = "period" if "period" in _df.index.names else _df.index.names[1] + stat_level_name = "stat" if "stat" in _df.columns.names else _df.columns.names[1] + + available_causes = pd.Index(_df.index.get_level_values(label_level_name).unique()) + if cause_label not in available_causes: + raise ValueError(f"Cause label '{cause_label}' not found. Available causes: {available_causes.tolist()}") + + available_stats = pd.Index(_df.columns.get_level_values(stat_level_name).unique()) + if plot_stat not in available_stats: + raise ValueError(f"Statistic '{plot_stat}' not found. Available stats: {available_stats.tolist()}") + + _plot = _df.xs(cause_label, level=label_level_name) + _plot = _plot.xs(plot_stat, axis=1, level=stat_level_name) + if _plot.empty: + raise ValueError(f"No plottable data remain for cause '{cause_label}' using stat '{plot_stat}'.") + + _plot.index.name = period_level_name + try: + ordered_periods = pd.Index(_plot.index).astype(make_calendar_period_type()) + _plot = _plot.reindex(ordered_periods.sort_values().astype(str)) + except (TypeError, ValueError): + _plot = _plot.loc[pd.Index(_plot.index).drop_duplicates()] + + ordered_treatment_ids = _get_ordered_short_treatment_ids(_plot.columns) + _plot = _plot.loc[:, ordered_treatment_ids] + + fig_width = max(10, min(1.4 * len(_plot.index) + 4, 18)) + fig, ax = plt.subplots(figsize=(fig_width, 6)) + x = np.arange(len(_plot.index)) + + for treatment_id in _plot.columns: + ax.plot( + x, + _plot[treatment_id].to_numpy(), + marker="o", + linewidth=1.8, + markersize=4, + color=_get_color_for_treatment_id_prefix(treatment_id), + label=str(treatment_id), + ) + + ax.set_xticks(x) + ax.set_xticklabels([str(period) for period in _plot.index], rotation=45, ha="right") + ax.set_xlabel("Period") + ax.set_ylabel("Number of deaths") + ax.set_title(str(cause_label)) + ax.grid(axis="y") + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) + ax.legend( + title="Treatment ID", + loc="center left", + bbox_to_anchor=(1.02, 0.5), + fontsize=8, + title_fontsize=9, + frameon=True, + ) + + fig.tight_layout() + return fig, ax + + def do_bar_plot_with_ci( _df: pd.DataFrame, _param, diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py index 795931de89..18f60aad70 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py @@ -28,6 +28,8 @@ plot_multiindex_dot_with_interval, plot_appointment_counts_by_period_for_draw, plot_appointment_counts_heatmap, + plot_deaths_by_period_for_cause, + plot_hsi_counts_by_period_for_draw, plot_hsi_counts_stacked_bar ) from tlo.analysis.utils import ( @@ -160,6 +162,15 @@ def apply(results_file: Path, output_folder: Path, resourcefilepath: Path = None fig.savefig(make_graph_file_name(name_of_plot.replace(" ", "_"))) plt.close(fig) + cause_labels = num_deaths_by_cause_label.index.get_level_values("label").unique() + for cause_label in cause_labels: + fig, ax = plot_deaths_by_period_for_cause(num_deaths_by_cause_label / 1e3, cause_label=cause_label) + name_of_plot = f"Deaths Over Time for {cause_label}" + ax.set_title(name_of_plot) + ax.set_ylabel("Number of deaths (/1000)") + fig.savefig(make_graph_file_name(name_of_plot.replace(" ", "_"))) + plt.close(fig) + # Plot cost of each scenario, with confidence intervals, for the target period From d68491a94379b38c03e3e91023e8f762c9df6ee2 Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Thu, 19 Mar 2026 14:32:01 +0000 Subject: [PATCH 25/55] service availability switch params --- resources/healthsystem/ResourceFile_HealthSystem_parameters.csv | 2 ++ 1 file changed, 2 insertions(+) diff --git a/resources/healthsystem/ResourceFile_HealthSystem_parameters.csv b/resources/healthsystem/ResourceFile_HealthSystem_parameters.csv index c6bd6414e7..44a0f60bc3 100644 --- a/resources/healthsystem/ResourceFile_HealthSystem_parameters.csv +++ b/resources/healthsystem/ResourceFile_HealthSystem_parameters.csv @@ -3,6 +3,8 @@ policy_name,Naive year_mode_switch,2100 scale_to_effective_capabilities,FALSE Service_Availability,"[""*""]" +year_service_availability_switch,2100 +service_availability_postSwitch,"[""*""]" use_funded_or_actual_staffing,funded_plus mode_appt_constraints,1 mode_appt_constraints_postSwitch,1 From b84f06b82cc7a48557e8c33190068631866bc087 Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Fri, 20 Mar 2026 07:38:12 +0000 Subject: [PATCH 26/55] Debugging with a single service --- .../analysis_effect_of_treatment_ids.py | 231 +++++--------- .../fig_utils.py | 302 +++++++----------- .../figures_effect_of_treatment_ids.py | 135 ++++---- .../results_processing_utils.py | 7 +- .../scenario_effect_of_treatment_ids.py | 10 +- src/tlo/methods/healthsystem.py | 4 + 6 files changed, 270 insertions(+), 419 deletions(-) diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py index 3e39b4afe0..4b4218f163 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py @@ -1,6 +1,7 @@ """Produce plots to show the impact each set of treatments.""" import argparse +from datetime import date import glob import os import zipfile @@ -56,9 +57,11 @@ squarify_neat, summarize, ) +# python src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py outputs/s.bhatia@imperial.ac.uk/effect_of_each_treatment_id-2026-02-12T120859Z figs/ --target-start=2010-01-01 --target-end=2025-12-31 +# python src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py outputs/s.bhatia@imperial.ac.uk/effect_of_each_treatment_id-2026-02-16T154500Z figs/ --target-start=2026-01-01 --target-end=2041-01-01 TARGET_PERIOD = (Date(2026, 1, 1), Date(2041, 1, 1)) -PERIOD_LENGTH_YEARS_FOR_BAR_PLOTS = 5 +PERIOD_LENGTH_YEARS_FOR_BAR_PLOTS = 1 suspended_folder = Path("outputs/s.bhatia@imperial.ac.uk/effect_of_each_treatment_id-2026-02-12T120859Z") results_folder = Path("outputs/s.bhatia@imperial.ac.uk/effect_of_each_treatment_id-2026-02-16T154500Z") # SCALING_FACTOR retrieved from the suspended run in @@ -70,74 +73,51 @@ "FirstAttendance_SpuriousEmergencyCare", ] -def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = None): + +def parse_iso_date(value: str) -> Date: + parsed = date.fromisoformat(value) + return Date(parsed.year, parsed.month, parsed.day) + + +def apply( + results_folder: Path, + output_folder: Path, + resourcefilepath: Path = None, + target_period_tuple: tuple[Date, Date] = TARGET_PERIOD, +): """Produce standard plots describing effect of each TREATMENT_ID.""" _, age_grp_lookup = make_age_grp_lookup() param_names = get_parameter_names_from_scenario_file() get_num_deaths_by_cause_label_and_period = make_get_num_deaths_by_cause_label_and_period( PERIOD_LENGTH_YEARS_FOR_BAR_PLOTS, - TARGET_PERIOD, + target_period_tuple, ) get_num_dalys_by_cause_label_and_period = make_get_num_dalys_by_cause_label_and_period( PERIOD_LENGTH_YEARS_FOR_BAR_PLOTS, - TARGET_PERIOD, - ) - # Get yearly number of appointments; - get_num_appts_by_period = make_get_counts_of_appts_by_period( - period_length_years=1, - target_period_tuple=TARGET_PERIOD, + target_period_tuple, ) get_num_hsi_by_period = make_get_counts_of_hsis_by_period( - period_length_years=1, - target_period_tuple=TARGET_PERIOD, + PERIOD_LENGTH_YEARS_FOR_BAR_PLOTS, + target_period_tuple=target_period_tuple, ) - - # Costs calculation - print("Calculating costs...") - # For now, choose specific draws - # draw_number:Treament ID - # 0 : Nothing - # 10: BreastCancer_Investigation_* - # 15: CardioMetabolicDisorders_Prevention_WeightLoss_* - # 27: Contraception_Routine_* - draws_to_run = [0, 10, 15, 27, 31, 39, 65] - selected_draws = [9, 14, 26, 30, 38, 64] - - discount_rate_cost = 0.03 - input_costs = estimate_input_cost_of_scenarios( - results_folder, - resourcefilepath, - suspended_results_folder=suspended_folder, - _draws=draws_to_run, - _runs=[0, 1, 2, 3, 4], - cost_only_used_staff=True, - _discount_rate=discount_rate_cost, - _metric="median",) - + results = {} # Get total population by year print("Extracting population data...") - total_population_by_year = extract_results( - results_folder, - module='tlo.methods.demography', - key='population', - custom_generate_series=get_total_population_by_year, - do_scaling=True, - suspended_results_folder=suspended_folder, - autodiscover=True + total_population_by_year = ( + extract_results( + results_folder, + module='tlo.methods.demography', + key='population', + custom_generate_series=lambda _df: get_total_population_by_year(_df, target_period_tuple), + do_scaling=True, + suspended_results_folder=suspended_folder, + autodiscover=True + ).pipe(set_param_names_as_column_index_level_0, param_names=param_names) ) - total_population_by_year = compute_summary_statistics(total_population_by_year, central_measure = 'median') - total_population_by_year = set_param_names_as_column_index_level_0(total_population_by_year, param_names=param_names) - total_population_by_year = (total_population_by_year - .stack(level=["draw", "stat"]) # move draw & stat into index - .reset_index() # turn all index levels into columns - .rename(columns={0: "population"}) # name the value column - ).set_index(["draw", "stat",'year']) - total_population_by_year = total_population_by_year.rename( - index={"central": "median"}, - level="stat" - ) + total_population_by_year = compute_summary_statistics(total_population_by_year, central_measure='median') + results['total_population_by_year'] = total_population_by_year print("Extracting total deaths and DALYs by label...") num_deaths = ( @@ -155,15 +135,16 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No num_deaths_averted = summarize( pd.DataFrame( find_difference_extra_relative_to_comparison(num_deaths.sum(), comparison='Nothing')).T - ).iloc[0].unstack().sort_values(by='mean', ascending=True) + ).iloc[0].unstack() pc_deaths_averted = 100.0 * summarize( pd.DataFrame( find_difference_extra_relative_to_comparison(num_deaths.sum(), comparison='Nothing', scaled=True)).T - ).iloc[0].unstack().sort_values(by='mean', ascending=True) + ).iloc[0].unstack() - num_deaths = summarize(num_deaths) + num_deaths = compute_summary_statistics(num_deaths, central_measure='median') + results['num_deaths'] = num_deaths num_dalys = ( extract_results( @@ -185,37 +166,16 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No pc_dalys_averted = 100.0 * summarize( pd.DataFrame( find_difference_extra_relative_to_comparison(num_dalys.sum(), comparison='Nothing', scaled=True)).T - ).iloc[0].unstack().sort_values(by='mean', ascending=True) - - num_dalys = summarize(num_dalys) - num_dalys_averted_summarized = summarize_cost_data(-1.0 * num_dalys_averted, _metric='median') + ).iloc[0].unstack() - total_num_death_by_agegrp_and_label = extract_results( - results_folder, - module="tlo.methods.demography", - key="death", - custom_generate_series=lambda _df: get_total_num_death_by_agegrp_and_label(_df, TARGET_PERIOD), - do_scaling=True, - suspended_results_folder=suspended_folder, - autodiscover=True, - ).pipe(set_param_names_as_column_index_level_0, param_names=param_names) - - total_num_dalys_by_agegrp_and_label = extract_results( - results_folder, - module="tlo.methods.healthburden", - key="dalys_stacked_by_age_and_time", - custom_generate_series=lambda _df: get_total_num_dalys_by_agegrp_and_label(_df, TARGET_PERIOD), - do_scaling=True, - suspended_results_folder=suspended_folder, - autodiscover=True, - ).pipe(set_param_names_as_column_index_level_0, param_names=param_names) + num_dalys = compute_summary_statistics(num_dalys, central_measure='median') counts_of_hsi_by_short_treatment_id = ( extract_results( results_folder, module="tlo.methods.healthsystem.summary", key="HSI_Event", - custom_generate_series=lambda _df: get_counts_of_hsi_by_short_treatment_id(_df, TARGET_PERIOD), + custom_generate_series=lambda _df: get_counts_of_hsi_by_short_treatment_id(_df, target_period_tuple), do_scaling=True, suspended_results_folder=suspended_folder, autodiscover=True, @@ -223,14 +183,13 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No .pipe(set_param_names_as_column_index_level_0, param_names=param_names) .fillna(0.0) .sort_index() - ) + ).drop(EXCLUDED_HSIs, errors='ignore') counts_of_hsi_by_short_treatment_id = ( compute_summary_statistics(counts_of_hsi_by_short_treatment_id, 'median') ) - counts_of_hsi_by_short_treatment_id = ( - counts_of_hsi_by_short_treatment_id.drop(columns=EXCLUDED_HSIs, errors='ignore') - ) + + results['counts_of_hsi_by_short_treatment_id'] = counts_of_hsi_by_short_treatment_id counts_of_hsi_by_period = ( extract_results( @@ -245,91 +204,49 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No .pipe(set_param_names_as_column_index_level_0, param_names=param_names) .fillna(0.0) .sort_index() - ) + ).drop(EXCLUDED_HSIs, level=0, errors='ignore') + counts_of_hsi_by_period = ( compute_summary_statistics(counts_of_hsi_by_period, 'median') ) + results['counts_of_hsi_by_period'] = counts_of_hsi_by_period - print("Extracting counts of appointments data...") - counts_of_appts = ( - extract_results( - results_folder, - module="tlo.methods.healthsystem.summary", - key="HSI_Event", - custom_generate_series=lambda _df: get_counts_of_appts(_df, TARGET_PERIOD), - do_scaling=True, - suspended_results_folder=suspended_folder, - ) - .pipe(set_param_names_as_column_index_level_0, param_names=param_names) - .fillna(0.0) - .sort_index() - ) - counts_of_appts = compute_summary_statistics(counts_of_appts, 'median') - - counts_of_appts_by_period = ( - extract_results( - results_folder, - module="tlo.methods.healthsystem.summary", - key="HSI_Event", - custom_generate_series=lambda _df: get_num_appts_by_period(_df), - do_scaling=True, - suspended_results_folder=suspended_folder, - ) - .pipe(set_param_names_as_column_index_level_0, param_names=param_names) - .fillna(0.0) - .sort_index() - ) - counts_of_appts_by_period = compute_summary_statistics(counts_of_appts_by_period, 'median') - - # Computing ICERs - print("Computing ICERs...") - total_input_cost = input_costs.groupby(['draw', 'run'])['cost'].sum() - incremental_scenario_cost = (pd.DataFrame( - find_difference_relative_to_comparison( - total_input_cost, - comparison=0,) - ).T.iloc[0].unstack()).T - - incremental_scenario_cost_summarized = summarize_cost_data(incremental_scenario_cost, _metric='median') - icers_summarized = (incremental_scenario_cost_summarized.values / - num_dalys_averted_summarized.iloc[selected_draws].values) - - icers_summarized = ( - pd.DataFrame( - icers_summarized, - index=num_dalys_averted_summarized.index[selected_draws], - columns=num_dalys_averted_summarized.columns - ) - ) - - return { - "total_population_by_year": total_population_by_year, - "num_deaths": num_deaths, - "deaths_averted": num_deaths_averted, - "pc_deaths_averted": pc_deaths_averted, - "num_dalys": num_dalys, - "dalys_averted": num_dalys_averted, - "pc_dalys_averted": pc_dalys_averted, - "input_costs": input_costs, - "incremental_scenario_cost_summarized": incremental_scenario_cost_summarized, - "icers_summarized": icers_summarized, - "total_num_death_by_agegrp_and_label": total_num_death_by_agegrp_and_label, - "total_num_dalys_by_agegrp_and_label": total_num_dalys_by_agegrp_and_label, - "counts_of_hsi_by_short_treatment_id": counts_of_hsi_by_short_treatment_id, - "counts_of_appts": counts_of_appts, - "counts_of_appts_by_period": counts_of_appts_by_period, - } + return results if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("results_folder", type=Path) parser.add_argument("output_folder", type=Path, nargs="?", default=None) + parser.add_argument("--target-start", type=str, default=None) + parser.add_argument("--target-end", type=str, default=None) args = parser.parse_args() + if (args.target_start is None) != (args.target_end is None): + parser.error("Provide both --target-start and --target-end, or neither.") + + if args.target_start is None: + target_period_tuple = TARGET_PERIOD + else: + target_period_tuple = ( + parse_iso_date(args.target_start), + parse_iso_date(args.target_end), + ) + if not target_period_tuple[0] < target_period_tuple[1]: + parser.error("--target-start must be earlier than --target-end.") + out = args.output_folder if args.output_folder is not None else args.results_folder - results = apply(results_folder=args.results_folder, output_folder=out, resourcefilepath=Path("./resources")) - with open(args.output_folder / 'fullresults.pkl', 'wb') as f: + results = apply( + results_folder=args.results_folder, + output_folder=out, + resourcefilepath=Path("./resources"), + target_period_tuple=target_period_tuple, + ) + outfile = ( + f"{target_period_tuple[1].year:04d}-{target_period_tuple[1].month:02d}-{target_period_tuple[1].day:02d}" + "_fullresults.pkl" + ) + with open(out / outfile, 'wb') as f: pickle.dump(results, f) - print("Analysis complete! Results saved to fullresults.pkl") + print(f"Analysis complete! Results saved to {out / outfile}") diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py b/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py index b76c69240d..218e3f8efb 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py @@ -38,11 +38,11 @@ def get_color_by_appointment_type(appointment_types) -> dict: return color_by_appointment_type -def _get_color_for_treatment_id_prefix(treatment_id: str) -> str: - """Return color for a treatment id based on its first underscore-delimited token.""" - prefix = str(treatment_id).split("_")[0] - color = get_color_short_treatment_id(prefix) - return "grey" if pd.isna(color) else color +def _get_short_treatment_id_and_color(treatment_id: str) -> tuple[str, str]: + """Return short treatment id prefix and plotting color for a treatment id.""" + short_treatment_id = str(treatment_id).split("_")[0] + color = get_color_short_treatment_id(short_treatment_id) + return short_treatment_id, ("grey" if pd.isna(color) else color) def _get_ordered_short_treatment_ids(treatment_ids: pd.Index) -> list[str]: @@ -54,6 +54,29 @@ def _get_ordered_short_treatment_ids(treatment_ids: pd.Index) -> list[str]: return recognized + unrecognized +def _parse_period_label(period_label: str) -> tuple[int, int]: + """Parse a period label of the form YYYY-YYYY into start/end years.""" + start_year_text, end_year_text = str(period_label).split("-", maxsplit=1) + return int(start_year_text), int(end_year_text) + + +def _get_sorted_period_labels_and_display_labels(period_labels: list[str]) -> tuple[list[str], list[str]]: + """Return chronological labels plus display labels, falling back to input order if parsing fails.""" + try: + parsed_periods = [(label, _parse_period_label(label)) for label in period_labels] + except (TypeError, ValueError): + return period_labels, period_labels + + ordered_period_labels = [ + label for label, _ in sorted(parsed_periods, key=lambda item: (item[1][0], item[1][1])) + ] + display_labels = [ + str(start_year) if start_year == end_year else label + for label, (start_year, end_year) in sorted(parsed_periods, key=lambda item: (item[1][0], item[1][1])) + ] + return ordered_period_labels, display_labels + + def plot_deaths_by_period_for_cause( _df: pd.DataFrame, cause_label: str, @@ -97,13 +120,14 @@ def plot_deaths_by_period_for_cause( x = np.arange(len(_plot.index)) for treatment_id in _plot.columns: + _, color = _get_short_treatment_id_and_color(treatment_id) ax.plot( x, _plot[treatment_id].to_numpy(), marker="o", linewidth=1.8, markersize=4, - color=_get_color_for_treatment_id_prefix(treatment_id), + color=color, label=str(treatment_id), ) @@ -268,107 +292,6 @@ def do_label_barh_plot(_df: pd.DataFrame, _ax): size=7, ) - -def plot_appointment_counts_heatmap(_df: pd.DataFrame, plot_stat: str = "central", cmap: str = "viridis", colorbar_label: str = "central value"): - """Plot a heatmap of central values for each draw across the dataframe index.""" - if not isinstance(_df.columns, pd.MultiIndex) or _df.columns.nlevels != 2: - raise ValueError("_df columns must be a 2-level MultiIndex with levels for draw and stat.") - - stat_level_name = "stat" if "stat" in _df.columns.names else _df.columns.names[1] - stat_level_values = _df.columns.get_level_values(stat_level_name) - if "central" not in stat_level_values: - raise ValueError("The column MultiIndex does not contain a 'central' entry in the stat level.") - - _plot = _df.xs(plot_stat, axis=1, level=stat_level_name) - if _plot.empty: - raise ValueError("No plottable data remain after selecting the 'central' columns.") - - _plot = _plot.sort_index(axis=1) - - fig_width = max(12, min(0.3 * len(_plot.columns), 36)) - fig_height = max(6, min(0.35 * len(_plot.index), 18)) - fig, ax = plt.subplots(figsize=(fig_width, fig_height)) - - image = ax.imshow(_plot.to_numpy(), aspect="auto", cmap=cmap) - - ax.set_xticks(np.arange(len(_plot.columns))) - ax.set_xticklabels(_plot.columns, rotation=90, ha="right", fontsize=8) - ax.set_yticks(np.arange(len(_plot.index))) - ax.set_yticklabels([str(label) for label in _plot.index], fontsize=8) - ax.set_xlabel(_df.columns.names[0] if _df.columns.names[0] is not None else "draw") - ax.set_ylabel(_df.index.name if _df.index.name is not None else "category") - - colorbar = fig.colorbar(image, ax=ax) - colorbar.set_label(colorbar_label) - - fig.tight_layout() - return fig, ax - - -def plot_appointment_counts_stacked_bar(_df: pd.DataFrame, plot_stat: str = "central"): - """Plot horizontal stacked bars of appointment counts by draw for a selected summary statistic.""" - if not isinstance(_df.columns, pd.MultiIndex) or _df.columns.nlevels != 2: - raise ValueError("_df columns must be a 2-level MultiIndex with levels for draw and stat.") - - stat_level_name = "stat" if "stat" in _df.columns.names else _df.columns.names[1] - stat_level_values = _df.columns.get_level_values(stat_level_name) - if plot_stat not in stat_level_values: - raise ValueError(f"The column MultiIndex does not contain '{plot_stat}' in the stat level.") - - _plot = _df.xs(plot_stat, axis=1, level=stat_level_name).T - if _plot.empty: - raise ValueError(f"No plottable data remain after selecting the '{plot_stat}' columns.") - - if _plot.isna().any().any(): - warnings.warn( - f"Missing values detected after selecting '{plot_stat}'. Bars will omit missing segments.", - stacklevel=2, - ) - - totals = _plot.sum(axis=1, skipna=True) - _plot = _plot.loc[totals.sort_values(ascending=False).index] - if not (_plot.gt(0).any(axis=1)).any(): - raise ValueError(f"No positive values remain after selecting the '{plot_stat}' columns.") - - fig_width = max(12, min(0.22 * len(_plot.columns) + 12, 30)) - fig_height = max(6, min(0.35 * len(_plot.index), 24)) - fig, ax = plt.subplots(figsize=(fig_width, fig_height)) - - color_by_appointment_type = get_color_by_appointment_type(_plot.columns) - left = np.zeros(len(_plot.index), dtype=float) - y = np.arange(len(_plot.index)) - - for idx, appointment_type in enumerate(_plot.columns): - values = _plot[appointment_type] - mask = values.gt(0) & values.notna() - if not mask.any(): - continue - ax.barh( - y[mask.to_numpy()], - values.loc[mask].to_numpy(), - left=left[mask.to_numpy()], - color=color_by_appointment_type[appointment_type], - label=str(appointment_type), - ) - left[mask.to_numpy()] += values.loc[mask].to_numpy() - - ax.set_yticks(y) - ax.set_yticklabels([str(label) for label in _plot.index], fontsize=12) - #ax.set_xlabel("count") - #ax.set_ylabel(_df.columns.names[0] if _df.columns.names[0] is not None else "draw") - ax.invert_yaxis() - ax.legend( - title="Appointment type", - loc="lower right", - fontsize=19, - title_fontsize=20, - frameon=True, - ) - - fig.tight_layout() - return fig, ax - - def plot_hsi_counts_stacked_bar(_df: pd.DataFrame, plot_stat: str = "central"): """Plot horizontal stacked bars of HSI counts by draw for a selected summary statistic.""" if not isinstance(_df.columns, pd.MultiIndex) or _df.columns.nlevels != 2: @@ -422,30 +345,41 @@ def plot_hsi_counts_stacked_bar(_df: pd.DataFrame, plot_stat: str = "central"): return fig, ax -def plot_appointment_counts_by_period_for_draw( +def plot_hsi_counts_by_period_for_draw( _df: pd.DataFrame, draw: str, - period_labels: list[str], + _dfbaseline: pd.DataFrame ): """Plot central values with lower/upper intervals across period chunks for one draw.""" if not isinstance(_df.index, pd.MultiIndex) or _df.index.nlevels != 2: - raise ValueError("_df index must be a 2-level MultiIndex with levels for appt_type and period.") + raise ValueError("_df index must be a 2-level MultiIndex with levels for short_treatment_id and period.") if not isinstance(_df.columns, pd.MultiIndex) or _df.columns.nlevels != 2: raise ValueError("_df columns must be a 2-level MultiIndex with levels for draw and stat.") if draw not in _df.columns.get_level_values(0): available_draws = sorted(set(_df.columns.get_level_values(0))) raise ValueError(f"Draw '{draw}' not found. Available draws: {available_draws}") - _plot = _df[draw].reindex( + + # Because the baseline includes all treatment ids, we have a large number of HSIs being delivered; + # We are only interested in the HSIs indicated by the draw name i,e. for the draw Hiv_Treament, we + # only want to compare the number of Hiv_Treament HSIs until 2025 and during the implementation period + _dfbaseline = _dfbaseline['Nothing'] # because baseline was run only for Nothing scenario + treatment_id_of_interest = draw.replace("_*", "") + print(f"Filtering baseline to treatment id of interest: '{treatment_id_of_interest}'") + _dfbaseline = _dfbaseline[_dfbaseline.index.get_level_values(0) == treatment_id_of_interest] + + _df = pd.concat([_df[draw], _dfbaseline]) + _plot = _df.reindex( pd.MultiIndex.from_product( [ - _df.index.get_level_values("appt_type").unique(), - period_labels, + _df.index.get_level_values(0).unique(), + _df.index.get_level_values(1).unique(), ], - names=["appt_type", "period"], + names=["treatment_id", "period"], ), fill_value=0.0, ) + period_labels = _plot.index.get_level_values(1).unique() _plot = _plot.loc[:, ["lower", "central", "upper"]] _plot = _plot.unstack("period") @@ -453,47 +387,49 @@ def plot_appointment_counts_by_period_for_draw( lower = _plot["lower"] upper = _plot["upper"] non_zero_mask = central.gt(0).any(axis=1) - central = central.loc[non_zero_mask, period_labels] - lower = lower.loc[non_zero_mask, period_labels] - upper = upper.loc[non_zero_mask, period_labels] + + ordered_period_labels, display_period_labels = _get_sorted_period_labels_and_display_labels(period_labels) + central = central.loc[non_zero_mask, ordered_period_labels] + lower = lower.loc[non_zero_mask, ordered_period_labels] + upper = upper.loc[non_zero_mask, ordered_period_labels] if central.empty: - raise ValueError(f"No non-zero appointment types remain for draw '{draw}'.") + raise ValueError(f"No non-zero treatment ids remain for draw '{draw}'.") - color_by_appointment_type = get_color_by_appointment_type(central.index) - x = np.arange(len(period_labels)) - fig_width = max(10, min(1.2 * len(period_labels) + 4, 20)) + x = np.arange(len(ordered_period_labels)) + fig_width = max(10, min(1.2 * len(ordered_period_labels) + 4, 20)) fig_height = max(6, min(0.28 * len(central.index) + 6, 18)) fig, ax = plt.subplots(figsize=(fig_width, fig_height)) - for appointment_type in central.index: - central_values = central.loc[appointment_type].to_numpy() - lower_values = lower.loc[appointment_type].to_numpy() - upper_values = upper.loc[appointment_type].to_numpy() + for treatment_id in central.index: + central_values = central.loc[treatment_id].to_numpy() + lower_values = lower.loc[treatment_id].to_numpy() + upper_values = upper.loc[treatment_id].to_numpy() yerr = np.vstack([central_values - lower_values, upper_values - central_values]) + _, color = _get_short_treatment_id_and_color(treatment_id) ax.errorbar( x, central_values, yerr=yerr, fmt="o", - color=color_by_appointment_type[appointment_type], - ecolor=color_by_appointment_type[appointment_type], + color=color, + ecolor=color, elinewidth=1.2, capsize=2, markersize=4, - label=str(appointment_type), + label=str(treatment_id), ) ax.set_xticks(x) - ax.set_xticklabels(period_labels, rotation=45, ha="right") + ax.set_xticklabels(display_period_labels, rotation=45, ha="right") ax.set_xlabel("period") - ax.set_ylabel("appointment count") - ax.set_title(f"Appointment counts by period: {draw}") + ax.set_ylabel("HSI count") + ax.set_title(f"HSI counts by period: {draw}") ax.grid(axis="y") ax.spines["top"].set_visible(False) ax.spines["right"].set_visible(False) ax.legend( - title="Appointment type", + title="Treatment ID", loc="center left", bbox_to_anchor=(1.02, 0.5), fontsize=8, @@ -505,77 +441,71 @@ def plot_appointment_counts_by_period_for_draw( return fig, ax -def plot_hsi_counts_by_period_for_draw( - _df: pd.DataFrame, - draw: str, - period_labels: list[str], -): - """Plot central values with lower/upper intervals across period chunks for one draw.""" - if not isinstance(_df.index, pd.MultiIndex) or _df.index.nlevels != 2: - raise ValueError("_df index must be a 2-level MultiIndex with levels for short_treatment_id and period.") +def plot_population_by_year(_df: pd.DataFrame, _dfbaseline: pd.DataFrame): + """Plot yearly central population values for all draws plus baseline.""" if not isinstance(_df.columns, pd.MultiIndex) or _df.columns.nlevels != 2: raise ValueError("_df columns must be a 2-level MultiIndex with levels for draw and stat.") - if draw not in _df.columns.get_level_values(0): - available_draws = sorted(set(_df.columns.get_level_values(0))) - raise ValueError(f"Draw '{draw}' not found. Available draws: {available_draws}") + if not isinstance(_dfbaseline.columns, pd.MultiIndex) or _dfbaseline.columns.nlevels != 2: + raise ValueError("_dfbaseline columns must be a 2-level MultiIndex with levels for draw and stat.") - _plot = _df[draw].reindex( - pd.MultiIndex.from_product( - [ - _df.index.get_level_values(0).unique(), - period_labels, - ], - names=["short_treatment_id", "period"], - ), - fill_value=0.0, - ) - _plot = _plot.loc[:, ["lower", "central", "upper"]] - _plot = _plot.unstack("period") + stat_level_name = "stat" if "stat" in _df.columns.names else _df.columns.names[1] + baseline_draw_level_name = "draw" if "draw" in _dfbaseline.columns.names else _dfbaseline.columns.names[0] - central = _plot["central"] - lower = _plot["lower"] - upper = _plot["upper"] - non_zero_mask = central.gt(0).any(axis=1) - central = central.loc[non_zero_mask, period_labels] - lower = lower.loc[non_zero_mask, period_labels] - upper = upper.loc[non_zero_mask, period_labels] + available_stats = pd.Index(_df.columns.get_level_values(stat_level_name).unique()) + if "central" not in available_stats: + raise ValueError(f"Statistic 'central' not found. Available stats: {available_stats.tolist()}") - if central.empty: - raise ValueError(f"No non-zero treatment ids remain for draw '{draw}'.") + baseline_draws = pd.Index(_dfbaseline.columns.get_level_values(baseline_draw_level_name).unique()) + if "Nothing" not in baseline_draws: + raise ValueError(f"Baseline draw 'Nothing' not found. Available baseline draws: {baseline_draws.tolist()}") - x = np.arange(len(period_labels)) - fig_width = max(10, min(1.2 * len(period_labels) + 4, 20)) - fig_height = max(6, min(0.28 * len(central.index) + 6, 18)) + implementation_central = _df.xs("central", axis=1, level=stat_level_name).copy() + baseline_central = _dfbaseline["Nothing"].loc[:, ["central"]].copy() + + implementation_central.columns = implementation_central.columns.to_series().str.replace(r"_\*$", "", regex=True) + baseline_central.columns = pd.Index(["Nothing"]) + + _plot = pd.concat([baseline_central, implementation_central], axis=1) + _plot = _plot.loc[:, ~_plot.columns.duplicated()] + _plot = _plot.sort_index() + + ordered_treatment_ids = _get_ordered_short_treatment_ids(_plot.columns) + _plot = _plot.loc[:, ordered_treatment_ids] + + if _plot.empty: + raise ValueError("No plottable population data remain after selecting central values.") + + years = pd.Index(_plot.index) + x = np.arange(len(years)) + fig_width = max(10, min(1.0 * len(years) + 4, 20)) + fig_height = 6 fig, ax = plt.subplots(figsize=(fig_width, fig_height)) - for treatment_id in central.index: - central_values = central.loc[treatment_id].to_numpy() - lower_values = lower.loc[treatment_id].to_numpy() - upper_values = upper.loc[treatment_id].to_numpy() - yerr = np.vstack([central_values - lower_values, upper_values - central_values]) - color = _get_color_for_treatment_id_prefix(treatment_id) - ax.errorbar( + for treatment_id in _plot.columns: + short_treatment_id, color = _get_short_treatment_id_and_color(treatment_id) + ax.plot( x, - central_values, - yerr=yerr, - fmt="o", - color=color, - ecolor=color, - elinewidth=1.2, - capsize=2, + _plot[treatment_id].to_numpy(), + marker="o", + linewidth=1.8, markersize=4, - label=str(treatment_id), + color=color, + label=short_treatment_id, ) ax.set_xticks(x) - ax.set_xticklabels(period_labels, rotation=45, ha="right") - ax.set_xlabel("period") - ax.set_ylabel("HSI count") - ax.set_title(f"HSI counts by period: {draw}") + ax.set_xticklabels([str(year) for year in years], rotation=45, ha="right") + ax.set_xlabel("Year") + ax.set_ylabel("Population size") + ax.set_title("Population size by year") ax.grid(axis="y") ax.spines["top"].set_visible(False) ax.spines["right"].set_visible(False) + handles, labels = ax.get_legend_handles_labels() + deduplicated_handles_by_label = dict(zip(labels, handles)) ax.legend( + handles=list(deduplicated_handles_by_label.values()), + labels=list(deduplicated_handles_by_label.keys()), title="Treatment ID", loc="center left", bbox_to_anchor=(1.02, 0.5), diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py index 18f60aad70..03202c8ce7 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py @@ -4,47 +4,43 @@ import zipfile from pathlib import Path import pickle -import numpy as np +import pandas as pd import matplotlib.pyplot as plt from scripts.calibration_analyses.analysis_scripts import plot_legends from scripts.lcoa_inputs_from_tlo_analyses.results_processing_utils import ( - extract_deaths_total, - format_scenario_name, - get_counts_of_appts, - get_counts_of_hsi_by_short_treatment_id, - get_num_dalys_by_cause_label, - get_num_deaths_by_cause_label, get_parameter_names_from_scenario_file, get_periods_within_target_period, - get_total_num_dalys_by_agegrp_and_label, - get_total_num_death_by_agegrp_and_label, - get_total_population_by_year, - make_get_num_dalys_by_cause_label_and_period, - set_param_names_as_column_index_level_0, + format_scenario_name, target_period, ) from scripts.lcoa_inputs_from_tlo_analyses.fig_utils import ( do_bar_plot_with_ci, - plot_multiindex_dot_with_interval, - plot_appointment_counts_by_period_for_draw, - plot_appointment_counts_heatmap, plot_deaths_by_period_for_cause, plot_hsi_counts_by_period_for_draw, - plot_hsi_counts_stacked_bar -) -from tlo.analysis.utils import ( - compute_summary_statistics, + plot_population_by_year, ) from tlo import Date TARGET_PERIOD = (Date(2026, 1, 1), Date(2041, 1, 1)) PERIOD_LENGTH_YEARS_FOR_BAR_PLOTS = 5 -def apply(results_file: Path, output_folder: Path, resourcefilepath: Path = None): + +def load_results_files(results_files: list[Path]) -> dict[Path, dict]: + loaded = {} + for results_file in results_files: + with open(results_file, "rb") as f: + loaded[results_file] = pickle.load(f) + return loaded + +def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path = None): """Produce standard plots describing effect of each TREATMENT_ID.""" + def make_graph_file_name(stub): + filename = stub.replace('*', '_star_').replace(' ', '_').lower() + return output_folder / f"{filename}.png" + param_names = get_parameter_names_from_scenario_file() - make_graph_file_name = lambda stub: output_folder / f"{stub.replace('*', '_star_')}.png" # noqa: E731 + period_labels_for_bar_plots = [ label for label, _ in get_periods_within_target_period( @@ -52,68 +48,65 @@ def apply(results_file: Path, output_folder: Path, resourcefilepath: Path = None target_period_tuple=TARGET_PERIOD, ) ] - appointment_period_labels = [ - label - for label, _ in get_periods_within_target_period( - period_length_years=1, - target_period_tuple=TARGET_PERIOD, - ) - ] target_period_label = target_period(TARGET_PERIOD) - with open(results_file, "rb") as f: - results = pickle.load(f) - - # Plot number of appointments for each draw - counts_of_appts = results['counts_of_appts'] - fig, ax = plot_appointment_counts_heatmap(counts_of_appts) - fig.savefig(make_graph_file_name("appointment_counts_heatmap")) - plt.close(fig) - - counts_of_appts_by_period = results["counts_of_appts_by_period"] - for param in param_names: - draw = format_scenario_name(param) - name_of_plot = f"Yearly appointment counts for {draw}" - fig, ax = plot_appointment_counts_by_period_for_draw( - counts_of_appts_by_period, - draw=draw, - period_labels=appointment_period_labels, + all_results = load_results_files(results_files) + results = all_results[results_files[1]] + + counts_of_hsi_in_implementation_period = all_results[results_files[1]]['counts_of_hsi_by_short_treatment_id'] + + result_df = pd.DataFrame([ + {'treatment_id_included': draw, 'nonzero_hsis': treatment_id} + for draw in counts_of_hsi_in_implementation_period.columns.get_level_values(0).unique() + for treatment_id in ((counts_of_hsi_in_implementation_period[draw] != 0).any(axis=1))[(counts_of_hsi_in_implementation_period[draw] != 0).any(axis=1)].index + ]) + result_df['treatment_id_included'] = result_df['treatment_id_included'].str.replace('_\\*$', '', regex=True) + #133 rows here; + #result_df[result_df['treatment_id_included'] != result_df['nonzero_hsis']] + + + # Plot number of HSIs for each draw dropping the aggregate over the entire period + counts_of_hsi_in_baseline = all_results[results_files[0]]['counts_of_hsi_by_period'] + counts_of_hsi_in_baseline = counts_of_hsi_in_baseline.drop(['2010-2025'], level=1) + + counts_of_hsi_in_implementation_period = all_results[results_files[1]]['counts_of_hsi_by_period'] + counts_of_hsi_in_implementation_period = counts_of_hsi_in_implementation_period.drop(['2026-2041'], level=1) + result_df_by_period = pd.DataFrame([ + {'treatment_id_included': draw, 'nonzero_hsis': treatment_id, 'period': period} + for draw in counts_of_hsi_in_implementation_period.columns.get_level_values(0).unique() + for treatment_id, period in ( + ((counts_of_hsi_in_implementation_period[draw] != 0).any(axis=1))[ + (counts_of_hsi_in_implementation_period[draw] != 0).any(axis=1) + ].index ) - ax.set_title(name_of_plot) - fig.savefig(make_graph_file_name(name_of_plot)) - plt.close(fig) + ]) + result_df_by_period['treatment_id_included'] = result_df_by_period['treatment_id_included'].str.replace( + '_\\*$', '', regex=True + ) - - # Plot number of HSIs for each draw - counts_of_hsi_by_period = results["counts_of_hsi_by_period"] for param in param_names: draw = format_scenario_name(param) + print(f"Plotting HSI counts for {draw}...") name_of_plot = f"Yearly HSI counts for {draw}" fig, ax = plot_hsi_counts_by_period_for_draw( - counts_of_hsi_by_period, - draw=draw, - period_labels=appointment_period_labels, + counts_of_hsi_in_implementation_period, + draw, + counts_of_hsi_in_baseline, ) ax.set_title(name_of_plot) fig.savefig(make_graph_file_name(name_of_plot)) plt.close(fig) # Plot population growth - total_population_by_year = results['total_population_by_year'] - for year in [2026, 2031, 2036, 2040]: - fig, ax = plt.subplots() - name_of_plot = f"Population size in {year}" - plot_multiindex_dot_with_interval(total_population_by_year / 1e6, year, ax, 'median') - ax.set_title(name_of_plot) - ax.set_xlabel("Treatment included") - ax.set_ylabel("Population size (millions)") - ax.grid(axis="y") - ax.spines["top"].set_visible(False) - ax.spines["right"].set_visible(False) - fig.tight_layout() - fig.savefig(make_graph_file_name(name_of_plot.replace(" ", "_"))) - plt.close(fig) + total_population_in_baseline = all_results[results_files[0]]['total_population_by_year'] + total_population_in_implementation = all_results[results_files[1]]['total_population_by_year'] + fig, ax = plot_population_by_year(total_population_in_implementation / 1e6, total_population_in_baseline / 1e6) + name_of_plot = "Population size by year" + ax.set_title(name_of_plot) + ax.set_ylabel("Population size (millions)") + fig.savefig(make_graph_file_name(name_of_plot.replace(" ", "_"))) + plt.close(fig) # Plot number of deaths and DALYS by cause for each parameter, with confidence intervals, for the target period num_deaths_by_cause_label = results['num_deaths'] @@ -176,10 +169,10 @@ def apply(results_file: Path, output_folder: Path, resourcefilepath: Path = None if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("results_file", type=Path) - parser.add_argument("output_folder", type=Path, nargs="?", default=None) + parser.add_argument("results_files", type=Path, nargs="+") + parser.add_argument("--output-folder", type=Path, required=True) args = parser.parse_args() - apply(results_file=args.results_file, output_folder=args.output_folder, resourcefilepath=Path("./resources")) + apply(results_files=args.results_files, output_folder=args.output_folder, resourcefilepath=Path("./resources")) plot_legends.apply(results_folder=None, output_folder=args.output_folder, resourcefilepath=Path("./resources")) diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/results_processing_utils.py b/src/scripts/lcoa_inputs_from_tlo_analyses/results_processing_utils.py index 8762e2992f..06924dcd6d 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/results_processing_utils.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/results_processing_utils.py @@ -28,8 +28,11 @@ def find_difference_relative_to_comparison(_ser: pd.Series, .drop(columns=([comparison] if drop_comparison else [])) \ .stack() -def get_total_population_by_year(_df): - years_needed = [i.year for i in TARGET_PERIOD] +def get_total_population_by_year( + _df: pd.DataFrame, + target_period_tuple: tuple[Date, Date] = TARGET_PERIOD, +) -> pd.Series: + years_needed = [i.year for i in target_period_tuple] _df["year"] = pd.to_datetime(_df["date"]).dt.year return _df.loc[_df["year"].between(min(years_needed), max(years_needed)), ["year", "total"]].set_index("year")[ "total" diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py index 0a2ce0a67e..1d0314d5dd 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py @@ -28,12 +28,13 @@ from tlo.methods.fullmodel import fullmodel from tlo.methods.scenario_switcher import ImprovedHealthSystemAndCareSeekingScenarioSwitcher from tlo.scenario import BaseScenario +from tlo.methods.individual_history_tracker import IndividualHistoryTracker class ScenarioDefinitions: @property def YEAR_OF_SERVICE_AVAILABILITY_SWITCH(self) -> int: - return 2026 + return 2011 def baseline(self) -> Dict: """Return the Dict with values for the parameter changes that define the baseline scenario.""" @@ -64,8 +65,8 @@ def __init__(self): super().__init__() self.seed = 0 self.start_date = Date(2010, 1, 1) - self.end_date = Date(2041, 1, 1) - self.pop_size = 250_000 + self.end_date = Date(2031, 1, 1) + self.pop_size = 1000 self._scenarios = self._get_scenarios() self.number_of_draws = len(self._scenarios) self.runs_per_draw = 5 @@ -80,6 +81,7 @@ def log_configuration(self): "tlo.methods.demography.detail": logging.WARNING, "tlo.methods.healthburden": logging.INFO, "tlo.methods.healthsystem.summary": logging.INFO, + "tlo.methods.individual_history_tracker": logging.INFO, }, } @@ -104,6 +106,8 @@ def _get_scenarios(self) -> Dict[str, Dict]: service_availability.update( {f"Only {treatment}": [treatment] for treatment in treatments} ) + ##service_availability = {"Only Rti_TetanusVaccine": ["Rti_TetanusVaccine"]} + scenario_definitions = ScenarioDefinitions() scenarios = { diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py index 074af82b90..3362b5c3ac 100644 --- a/src/tlo/methods/healthsystem.py +++ b/src/tlo/methods/healthsystem.py @@ -1438,6 +1438,10 @@ def get_equip_availability(self) -> str: def schedule_to_call_never_ran_on_date(self, hsi_event: "HSI_Event", tdate: datetime.datetime): """Function to schedule never_ran being called on a given date""" + if self.sim.date > tdate: + print(f"Warning: trying to schedule never_ran for date {tdate} in the past (current simulation date is {self.sim.date}). This event will not be scheduled.") + print(f"treament id is {hsi_event.TREATMENT_ID}") + self.sim.schedule_event(HSIEventWrapper(hsi_event=hsi_event, run_hsi=False), tdate) def get_mode_appt_constraints(self) -> int: From f5102b0ce3fe0ea428773999c28e03b11a78602c Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Fri, 20 Mar 2026 10:26:23 +0000 Subject: [PATCH 27/55] Scenario file with a single treatment id --- ...ario_effect_of_treatment_ids_no_suspend.py | 124 ++++++++++++++++++ 1 file changed, 124 insertions(+) create mode 100644 src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids_no_suspend.py diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids_no_suspend.py b/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids_no_suspend.py new file mode 100644 index 0000000000..9c078f9ea7 --- /dev/null +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids_no_suspend.py @@ -0,0 +1,124 @@ +""" +This file contains all the definitions of scenarios for the TLO-LCOA project. + +It runs the full model under a set of scenario in which only a single TREATMENT_ID is included. + + +To check scenarios are generated correctly: +``` +tlo scenario-run --draw-only src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids_no_suspend.py +``` + +Run on the batch system using: + +``` +tlo batch-submit src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids_no_suspend.py +``` + +or locally using: +``` +tlo scenario-run src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids_no_suspend.py +``` + +""" + +from pathlib import Path +from typing import Dict, List +from tlo import Date, logging +from tlo.analysis.utils import mix_scenarios, get_parameters_for_status_quo +from tlo.methods.fullmodel import fullmodel +from tlo.methods.scenario_switcher import ImprovedHealthSystemAndCareSeekingScenarioSwitcher +from tlo.scenario import BaseScenario + + + +class ScenarioDefinitions: + @property + def YEAR_OF_SERVICE_AVAILABILITY_SWITCH(self) -> int: + return 2026 + + def baseline(self) -> Dict: + """Return the Dict with values for the parameter changes that define the baseline scenario.""" + return mix_scenarios( + get_parameters_for_status_quo(), # <-- Parameters that have been the calibration targets + { + "HealthSystem": { + "cons_availability": "default", + "year_cons_availability_switch": self.YEAR_OF_SERVICE_AVAILABILITY_SWITCH, + "cons_availability_postSwitch": "all", + "mode_appt_constraints": 1, + "year_service_availability_switch": self.YEAR_OF_SERVICE_AVAILABILITY_SWITCH, + # allow historical HRH scaling to occur 2018-2024 + # 'year_HR_scaling_by_level_and_officer_type': self.YEAR_OF_SERVICE_AVAILABILITY_SWITCH, + "yearly_HR_scaling_mode": "historical_scaling", + }, + "ImprovedHealthSystemAndCareSeekingScenarioSwitcher": { + "max_healthsystem_function": [False, True], # <-- switch from False to True mid-way + "max_healthcare_seeking": [False, True], # <-- switch from False to True mid-way + "year_of_switch": self.YEAR_OF_SERVICE_AVAILABILITY_SWITCH, + }, + }, + ) + + +class EffectOfEachTreatment(BaseScenario): + def __init__(self): + super().__init__() + self.seed = 0 + self.start_date = Date(2010, 1, 1) + self.end_date = Date(2041, 1, 1) + self.pop_size = 250_000 + self._scenarios = self._get_scenarios() + self.number_of_draws = len(self._scenarios) + self.runs_per_draw = 5 + + def log_configuration(self): + return { + "filename": "effect_of_each_treatment_id", + "directory": Path("./outputs"), + "custom_levels": { + "*": logging.WARNING, + "tlo.methods.demography": logging.INFO, + "tlo.methods.demography.detail": logging.WARNING, + "tlo.methods.healthburden": logging.INFO, + "tlo.methods.healthsystem.summary": logging.INFO, + }, + } + + def modules(self): + return fullmodel() + [ImprovedHealthSystemAndCareSeekingScenarioSwitcher()] + + def draw_parameters(self, draw_number, rng): + if draw_number < len(self._scenarios): + return list(self._scenarios.values())[draw_number] + + def _get_scenarios(self) -> Dict[str, Dict]: + """Return the Dict with values for the parameter `Service_Availability` keyed by a name for the scenario. + The sequences of scenarios systematically omits all but one TREATMENT_ID that is defined in the model.""" + + # Generate list of TREATMENT_IDs and filter to the resolution needed + treatments = ["Hiv_Treatment_*"] + # Return 'Service_Availability' values, with scenarios for nothing, and ones for which all but one + # treatment is omitted + service_availability = dict() + # For each treatment group, create scenarios keeping only one treatment from that group + service_availability.update( + {f"Only {treatment}": [treatment] for treatment in treatments} + ) + + scenario_definitions = ScenarioDefinitions() + + scenarios = { + key: mix_scenarios( + scenario_definitions.baseline(), {"HealthSystem": {"service_availability_postSwitch": value}} + ) + for key, value in service_availability.items() + } + + return scenarios + + +if __name__ == "__main__": + from tlo.cli import scenario_run + + scenario_run([__file__]) From 10a1554dfe5b406cf231c07b966ed44157936958 Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Thu, 26 Mar 2026 15:40:24 +0000 Subject: [PATCH 28/55] Additional tests --- tests/test_healthsystem.py | 101 +++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) diff --git a/tests/test_healthsystem.py b/tests/test_healthsystem.py index c959162f0f..0853948c41 100644 --- a/tests/test_healthsystem.py +++ b/tests/test_healthsystem.py @@ -1415,6 +1415,23 @@ def test_is_treatment_id_allowed(): assert hs.is_treatment_id_allowed("Epi", ["Epi", "Epilepsy_*"]) assert hs.is_treatment_id_allowed("Epilepsy", ["Epi", "Epilepsy_*"]) + ## Service availability switch debugging + excluded_hsis = ["FirstAttendance_Emergency_*", "FirstAttendance_NonEmergency_*", "FirstAttendance_SpuriousEmergencyCare_*",] + treatments = get_filtered_treatment_ids(depth=None) + for treatment_allowed in treatments: + print(f"Allowed {treatment_allowed}") + for treatment_requested in treatments: + # If the only treatment allowed is treatment_allowed then all other treatments should return false + if not treatment_requested == treatment_allowed: + print(f"Requested {treatment_requested}") + if treatment_requested in excluded_hsis: + assert hs.is_treatment_id_allowed(treatment_requested, [treatment_allowed]) + elif treatment_requested.startswith(treatment_allowed.replace("_*", "")): + assert hs.is_treatment_id_allowed(treatment_requested, [treatment_allowed]) + else: + assert not hs.is_treatment_id_allowed(treatment_requested, [treatment_allowed]) + + def test_manipulation_of_service_availability(seed, tmpdir): """Check that the parameter `service_availability` can be used to allow/disallow certain `TREATMENT_ID`s. @@ -3130,3 +3147,87 @@ def apply(self, person_id, squeeze_factor): 0 ] assert nevents_did_not_run == nevents_with_withdrawn_ids + + +def test_service_availability_switch_without_assertions(tmpdir, seed): + """Test that the service availability is updated in the year specified. + Simultaneously check that the switch triggers related behaviors: + 1) compute and write to logs rescaling factors + 2) clear hsi event queue of any events scheduled to run after the switch + that need one of the unavailable services. + This test is for checking that events cannot be scheduled in the past + when assertions are quieted as on Azure. + """ + + class DummyModuleGenericClinic(Module): + METADATA = {Metadata.DISEASE_MODULE, Metadata.USES_HEALTHSYSTEM} + + def read_parameters(self, data_folder): + pass + + def initialise_population(self, population): + pass + + def initialise_simulation(self, sim): + pass + + # Create a dummy HSI event class + class DummyHSIEvent(HSI_Event, IndividualScopeEventMixin): + def __init__(self, module, person_id, appt_type, level, treatment_id): + super().__init__(module, person_id=person_id) + self.TREATMENT_ID = treatment_id + self.EXPECTED_APPT_FOOTPRINT = self.make_appt_footprint({appt_type: 1}) + self.ACCEPTED_FACILITY_LEVEL = level + + def apply(self, person_id, squeeze_factor): + self.this_hsi_event_ran = True + + log_config = { + "filename": "log", + "directory": tmpdir, + "custom_levels": {"tlo.methods.healthsystem": logging.DEBUG}, + } + start_date = Date(2010, 1, 1) + end_date = Date(2012, 1, 1) + sim = Simulation(start_date=start_date, seed=0, log_config=log_config, resourcefilepath=resourcefilepath) + + sim.register( + demography.Demography(), + healthsystem.HealthSystem( + capabilities_coefficient=1.0, + mode_appt_constraints=1, + ignore_priority=False, + randomise_queue=True, + policy_name="", + use_funded_or_actual_staffing="funded_plus", + ), + DummyModuleGenericClinic(), + ) + + hs = sim.modules["HealthSystem"] + hs_params = hs.parameters + hs_params["Service_Availability"] = ["ThisEventShouldRun", "ThisEventShouldNotRun"] + sim.make_initial_population(n=popsize) + hsi = DummyHSIEvent( + module=sim.modules["DummyModuleGenericClinic"], + person_id=1, + appt_type="ConWithDCSA", + level="0", + treatment_id="ThisEventShouldNotRun", + ) + sim.modules["HealthSystem"].schedule_hsi_event(hsi, topen=sim.date, tclose=end_date, priority=1) + ## Once the event has been scheduled, update service availability to exclude its treatment id + ## updating the service availability in this way bypasses the clearing of queue that would happen + ## otherwise. + hs.service_availability = ["ThisEventShouldRun"] + sim.simulate(end_date=end_date) + output = parse_log_file(sim.log_filepath, level=logging.DEBUG) + hsi_events = output["tlo.methods.healthsystem"]["HSI_Event"] + breakpoint() + nevents_ran = hsi_events.groupby("TREATMENT_ID")["did_run"].value_counts() + ## Expect that the event with treatment id ThisEventShouldNotRun should never have run. + never_ran_events = output["tlo.methods.healthsystem"]["Never_ran_HSI_Event"] + nevents_did_not_run = never_ran_events[never_ran_events["TREATMENT_ID"] == "ThisEventShouldNotRunPostSwitch"].shape[ + 0 + ] + assert nevents_did_not_run == nevents_with_withdrawn_ids From bbd33581719157969e438d753e53103f0f12cccd Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Thu, 26 Mar 2026 16:00:09 +0000 Subject: [PATCH 29/55] Record never ran and do not reschedule if treatment id unavailable --- src/tlo/methods/healthsystem.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py index bead1ac777..d9d6f189c6 100644 --- a/src/tlo/methods/healthsystem.py +++ b/src/tlo/methods/healthsystem.py @@ -2192,7 +2192,8 @@ def run_individual_level_events_in_mode_1( # Check here that the treatment id is allowed at this point as service availability might have changed # since the event was scheduled if not self.is_treatment_id_allowed(event.TREATMENT_ID, self.service_availability): - ok_to_run = False + call_and_record_never_ran_hsi_event(hsi_event=event, priority=_priority) + continue if ok_to_run: From c3d3e763334e650ef3f2d2a60eb55757468928de Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Thu, 26 Mar 2026 16:05:27 +0000 Subject: [PATCH 30/55] Remove test with incorrect logic --- tests/test_healthsystem_general.py | 84 ------------------------------ 1 file changed, 84 deletions(-) diff --git a/tests/test_healthsystem_general.py b/tests/test_healthsystem_general.py index f80af56dec..906d652e36 100644 --- a/tests/test_healthsystem_general.py +++ b/tests/test_healthsystem_general.py @@ -1651,87 +1651,3 @@ def apply(self, person_id, squeeze_factor): 0 ] assert nevents_did_not_run == nevents_with_withdrawn_ids - - -def test_service_availability_switch_without_assertions(tmpdir, seed): - """Test that the service availability is updated in the year specified. - Simultaneously check that the switch triggers related behaviors: - 1) compute and write to logs rescaling factors - 2) clear hsi event queue of any events scheduled to run after the switch - that need one of the unavailable services. - This test is for checking that events cannot be scheduled in the past - when assertions are quieted as on Azure. - """ - - class DummyModuleGenericClinic(Module): - METADATA = {Metadata.DISEASE_MODULE, Metadata.USES_HEALTHSYSTEM} - - def read_parameters(self, data_folder): - pass - - def initialise_population(self, population): - pass - - def initialise_simulation(self, sim): - pass - - # Create a dummy HSI event class - class DummyHSIEvent(HSI_Event, IndividualScopeEventMixin): - def __init__(self, module, person_id, appt_type, level, treatment_id): - super().__init__(module, person_id=person_id) - self.TREATMENT_ID = treatment_id - self.EXPECTED_APPT_FOOTPRINT = self.make_appt_footprint({appt_type: 1}) - self.ACCEPTED_FACILITY_LEVEL = level - - def apply(self, person_id, squeeze_factor): - self.this_hsi_event_ran = True - - log_config = { - "filename": "log", - "directory": tmpdir, - "custom_levels": {"tlo.methods.healthsystem": logging.DEBUG}, - } - start_date = Date(2010, 1, 1) - end_date = Date(2012, 1, 1) - sim = Simulation(start_date=start_date, seed=0, log_config=log_config, resourcefilepath=resourcefilepath) - - sim.register( - demography.Demography(), - healthsystem.HealthSystem( - capabilities_coefficient=1.0, - mode_appt_constraints=1, - ignore_priority=False, - randomise_queue=True, - policy_name="", - use_funded_or_actual_staffing="funded_plus", - ), - DummyModuleGenericClinic(), - ) - - hs = sim.modules["HealthSystem"] - hs_params = hs.parameters - hs_params["Service_Availability"] = ["ThisEventShouldRun", "ThisEventShouldNotRun"] - sim.make_initial_population(n=popsize) - hsi = DummyHSIEvent( - module=sim.modules["DummyModuleGenericClinic"], - person_id=1, - appt_type="ConWithDCSA", - level="0", - treatment_id="ThisEventShouldNotRun", - ) - sim.modules["HealthSystem"].schedule_hsi_event(hsi, topen=sim.date, tclose=end_date, priority=1) - ## Once the event has been scheduled, update service availability to exclude its treatment id - ## updating the service availability in this way bypasses the clearing of queue that would happen - ## otherwise. - hs.service_availability = ["ThisEventShouldRun"] - sim.simulate(end_date=end_date) - output = parse_log_file(sim.log_filepath, level=logging.DEBUG) - hsi_events = output["tlo.methods.healthsystem"]["HSI_Event"] - breakpoint() - nevents_ran = hsi_events.groupby("TREATMENT_ID")["did_run"].value_counts() - ## Expect that the event with treatment id ThisEventShouldNotRun should never have run. - never_ran_events = output["tlo.methods.healthsystem"]["Never_ran_HSI_Event"] - nevents_did_not_run = never_ran_events[never_ran_events["TREATMENT_ID"] == "ThisEventShouldNotRunPostSwitch"].shape[ - 0 - ] - assert nevents_did_not_run == nevents_with_withdrawn_ids From 424025200c8a420503797ab55993c7e0a8fb8628 Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Thu, 26 Mar 2026 16:10:10 +0000 Subject: [PATCH 31/55] Formatting --- tests/test_healthsystem_general.py | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/tests/test_healthsystem_general.py b/tests/test_healthsystem_general.py index 906d652e36..61ecf04525 100644 --- a/tests/test_healthsystem_general.py +++ b/tests/test_healthsystem_general.py @@ -516,21 +516,24 @@ def test_is_treatment_id_allowed(): assert hs.is_treatment_id_allowed("Epilepsy", ["Epi", "Epilepsy_*"]) ## Service availability switch debugging - excluded_hsis = ["FirstAttendance_Emergency_*", "FirstAttendance_NonEmergency_*", "FirstAttendance_SpuriousEmergencyCare_*",] + excluded_hsis = [ + "FirstAttendance_Emergency_*", + "FirstAttendance_NonEmergency_*", + "FirstAttendance_SpuriousEmergencyCare_*", + ] treatments = get_filtered_treatment_ids(depth=None) for treatment_allowed in treatments: - print(f"Allowed {treatment_allowed}") - for treatment_requested in treatments: - # If the only treatment allowed is treatment_allowed then all other treatments should return false - if not treatment_requested == treatment_allowed: - print(f"Requested {treatment_requested}") - if treatment_requested in excluded_hsis: - assert hs.is_treatment_id_allowed(treatment_requested, [treatment_allowed]) - elif treatment_requested.startswith(treatment_allowed.replace("_*", "")): - assert hs.is_treatment_id_allowed(treatment_requested, [treatment_allowed]) - else: - assert not hs.is_treatment_id_allowed(treatment_requested, [treatment_allowed]) - + print(f"Allowed {treatment_allowed}") + for treatment_requested in treatments: + # If the only treatment allowed is treatment_allowed then all other treatments should return false + if not treatment_requested == treatment_allowed: + print(f"Requested {treatment_requested}") + if treatment_requested in excluded_hsis: + assert hs.is_treatment_id_allowed(treatment_requested, [treatment_allowed]) + elif treatment_requested.startswith(treatment_allowed.replace("_*", "")): + assert hs.is_treatment_id_allowed(treatment_requested, [treatment_allowed]) + else: + assert not hs.is_treatment_id_allowed(treatment_requested, [treatment_allowed]) def test_manipulation_of_service_availability(seed, tmpdir): @@ -1548,7 +1551,6 @@ def schedule_hsi_events(ngenericclinic, nclinic1, sim): ), "Expected Clinic1 capabilities to be rescaled by factor of 2" - def test_service_availability_switch(tmpdir, seed): """Test that the service availability is updated in the year specified. Simultaneously check that the switch triggers related behaviors: From 055303a6173000521deab8bb1f9a1013780cfc80 Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Thu, 26 Mar 2026 16:34:39 +0000 Subject: [PATCH 32/55] Test run with smaller pop size --- .../scenario_effect_of_treatment_ids_no_suspend.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids_no_suspend.py b/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids_no_suspend.py index 9c078f9ea7..67c8d4f0ff 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids_no_suspend.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids_no_suspend.py @@ -66,8 +66,8 @@ def __init__(self): super().__init__() self.seed = 0 self.start_date = Date(2010, 1, 1) - self.end_date = Date(2041, 1, 1) - self.pop_size = 250_000 + self.end_date = Date(2031, 1, 1) + self.pop_size = 50_000 self._scenarios = self._get_scenarios() self.number_of_draws = len(self._scenarios) self.runs_per_draw = 5 From 4962fc491f974d159cf532bf8fa232db18bb7b66 Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Fri, 27 Mar 2026 11:56:06 +0000 Subject: [PATCH 33/55] Test service availability switch with a recurring HSI event --- tests/test_healthsystem_general.py | 109 ++++++++++++++++++++++++++++- 1 file changed, 106 insertions(+), 3 deletions(-) diff --git a/tests/test_healthsystem_general.py b/tests/test_healthsystem_general.py index 61ecf04525..2f47eb4678 100644 --- a/tests/test_healthsystem_general.py +++ b/tests/test_healthsystem_general.py @@ -1611,8 +1611,8 @@ def apply(self, person_id, squeeze_factor): hs_params["service_availability_postSwitch"] = ["ThisEventShouldRun"] sim.make_initial_population(n=popsize) - ## Schedule 10 events that should run; 10 events that have a treatment id that is not available - ## after service availability switch. + # Schedule 10 events that should run; 10 events that have a treatment id + # that is not available after service availability switch. nevents_with_available_ids = 60 nevents_with_withdrawn_ids = 40 for i in range(0, nevents_with_available_ids): @@ -1635,7 +1635,7 @@ def apply(self, person_id, squeeze_factor): level="0", treatment_id="ThisEventShouldNotRunPostSwitch", ) - ## These events open after service availability switch + # These events open after service availability switch topen = pd.Timestamp(year_service_availability_switch, 1, 1) sim.modules["HealthSystem"].schedule_hsi_event( hsi, topen=topen, tclose=topen + pd.DateOffset(days=1), priority=1 @@ -1653,3 +1653,106 @@ def apply(self, person_id, squeeze_factor): 0 ] assert nevents_did_not_run == nevents_with_withdrawn_ids + + + + +def test_service_availability_with_rescheduling_hsi(tmpdir, seed): + """Test that an HSI that attempts to reschedule itself cannot go ahead + if service availability update has made its treatment id unavailable. + """ + + class DummyModuleGenericClinic(Module): + METADATA = {Metadata.DISEASE_MODULE, Metadata.USES_HEALTHSYSTEM} + + def read_parameters(self, data_folder): + pass + + def initialise_population(self, population): + pass + + def initialise_simulation(self, sim): + pass + + # Create a dummy HSI event class + class DummyHSIEvent(HSI_Event, IndividualScopeEventMixin): + def __init__(self, module, person_id, appt_type, level, treatment_id): + super().__init__(module, person_id=person_id) + self.TREATMENT_ID = treatment_id + self.EXPECTED_APPT_FOOTPRINT = self.make_appt_footprint({appt_type: 1}) + self.ACCEPTED_FACILITY_LEVEL = level + + def apply(self, person_id, squeeze_factor): + self.this_hsi_event_ran = True + sim.modules["HealthSystem"].schedule_hsi_event( + self, topen=self.sim.date + pd.DateOffset(years=1), tclose=None, priority=1 + ) + sim.modules["HealthSystem"].schedule_hsi_event( + self, topen=self.sim.date + pd.DateOffset(years=2), tclose=None, priority=1 + ) + sim.modules["HealthSystem"].schedule_hsi_event( + self, topen=self.sim.date + pd.DateOffset(years=3), tclose=None, priority=1 + ) + sim.modules["HealthSystem"].schedule_hsi_event( + self, topen=self.sim.date + pd.DateOffset(years=4), tclose=None, priority=1 + ) + + + + log_config = { + "filename": "log", + "directory": tmpdir, + "custom_levels": {"tlo.methods.healthsystem": logging.DEBUG}, + } + start_date = Date(2010, 1, 1) + end_date = Date(2015, 1, 1) + sim = Simulation(start_date=start_date, seed=0, log_config=log_config, resourcefilepath=resourcefilepath) + + sim.register( + demography.Demography(), + healthsystem.HealthSystem( + capabilities_coefficient=1.0, + mode_appt_constraints=1, + ignore_priority=False, + randomise_queue=True, + policy_name="", + use_funded_or_actual_staffing="funded_plus", + ), + DummyModuleGenericClinic(), + ) + + hs_params = sim.modules["HealthSystem"].parameters + # First allow everything + hs_params["Service_Availability"] = ['*'] + year_service_availability_switch = 2011 + hs_params["year_service_availability_switch"] = year_service_availability_switch + # Post switch treatment id ThisEventShouldNotRunPostSwitch is unavailable + hs_params["service_availability_postSwitch"] = ["ThisEventShouldRunPostSwitch"] + + sim.make_initial_population(n=popsize) + # Schedule event with treatment id ThisEventShouldNotRunPostSwitch + # so that it runs successfully the first time, and reschedules itself. + hsi = DummyHSIEvent( + module=sim.modules["DummyModuleGenericClinic"], + person_id=1, + appt_type="ConWithDCSA", + level="0", + treatment_id="ThisEventShouldNotRunPostSwitch", + ) + sim.modules["HealthSystem"].schedule_hsi_event( + hsi, topen=start_date, tclose=end_date, priority=1 + ) + sim.simulate(end_date=end_date) + output = parse_log_file(sim.log_filepath, level=logging.DEBUG) + hsi_events = output["tlo.methods.healthsystem"]["HSI_Event"] + # Expect the first instance of this HSI to have run, since we scheduled it + # to run before service availability switch + nevents_ran = hsi_events.groupby("TREATMENT_ID")["did_run"].value_counts() + assert nevents_ran.loc[("ThisEventShouldNotRunPostSwitch", True)] == 1 + # and all subsequent instances to have not run. + never_ran_events = output["tlo.methods.healthsystem"]["Never_ran_HSI_Event"] + nevents_did_not_run = never_ran_events[never_ran_events["TREATMENT_ID"] == "ThisEventShouldNotRunPostSwitch"].shape[ + 0 + ] + # Since we scheduled it in 4 years after the first successful run + assert nevents_did_not_run == 4 From 83870ab268febe9921de306869252ab067677559 Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Mon, 30 Mar 2026 16:48:00 +0100 Subject: [PATCH 34/55] Rechecking results processing --- .../analysis_effect_of_treatment_ids.py | 95 ++++++++++--------- .../fig_utils.py | 38 +++++--- .../figures_effect_of_treatment_ids.py | 63 ++++++------ .../scenario_effect_of_treatment_ids.py | 1 + 4 files changed, 108 insertions(+), 89 deletions(-) diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py index 4b4218f163..80877241fc 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py @@ -58,9 +58,8 @@ summarize, ) # python src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py outputs/s.bhatia@imperial.ac.uk/effect_of_each_treatment_id-2026-02-12T120859Z figs/ --target-start=2010-01-01 --target-end=2025-12-31 -# python src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py outputs/s.bhatia@imperial.ac.uk/effect_of_each_treatment_id-2026-02-16T154500Z figs/ --target-start=2026-01-01 --target-end=2041-01-01 - -TARGET_PERIOD = (Date(2026, 1, 1), Date(2041, 1, 1)) +# python src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py outputs/s.bhatia@imperial.ac.uk/effect_of_each_treatment_id-2026-02-16T154500Z figs/ --target-start=2025-01-01 --target-end=2041-01-01 +TARGET_PERIOD = (Date(2025, 1, 1), Date(2041, 1, 1)) PERIOD_LENGTH_YEARS_FOR_BAR_PLOTS = 1 suspended_folder = Path("outputs/s.bhatia@imperial.ac.uk/effect_of_each_treatment_id-2026-02-12T120859Z") results_folder = Path("outputs/s.bhatia@imperial.ac.uk/effect_of_each_treatment_id-2026-02-16T154500Z") @@ -71,9 +70,9 @@ "FirstAttendance_Emergency", "FirstAttendance_NonEmergency", "FirstAttendance_SpuriousEmergencyCare", + "Inpatient_Care" ] - def parse_iso_date(value: str) -> Date: parsed = date.fromisoformat(value) return Date(parsed.year, parsed.month, parsed.day) @@ -119,6 +118,47 @@ def apply( total_population_by_year = compute_summary_statistics(total_population_by_year, central_measure='median') results['total_population_by_year'] = total_population_by_year + counts_of_hsi_by_short_treatment_id = ( + extract_results( + results_folder, + module="tlo.methods.healthsystem.summary", + key="HSI_Event", + custom_generate_series=lambda _df: get_counts_of_hsi_by_short_treatment_id(_df, target_period_tuple), + do_scaling=True, + suspended_results_folder=suspended_folder, + autodiscover=True, + ) + .pipe(set_param_names_as_column_index_level_0, param_names=param_names) + .fillna(0.0) + .sort_index() + ).drop(EXCLUDED_HSIs, errors='ignore') + + counts_of_hsi_by_short_treatment_id = ( + compute_summary_statistics(counts_of_hsi_by_short_treatment_id, 'median') + ) + + results['counts_of_hsi_by_short_treatment_id'] = counts_of_hsi_by_short_treatment_id + + counts_of_hsi_by_period = ( + extract_results( + results_folder, + module="tlo.methods.healthsystem.summary", + key="HSI_Event", + custom_generate_series=lambda _df: get_num_hsi_by_period(_df), + do_scaling=True, + suspended_results_folder=suspended_folder, + autodiscover=True, + ) + .pipe(set_param_names_as_column_index_level_0, param_names=param_names) + .fillna(0.0) + .sort_index() + ).drop(EXCLUDED_HSIs, level=0, errors='ignore') + + counts_of_hsi_by_period = ( + compute_summary_statistics(counts_of_hsi_by_period, 'median') + ) + results['counts_of_hsi_by_period'] = counts_of_hsi_by_period + print("Extracting total deaths and DALYs by label...") num_deaths = ( extract_results( @@ -137,14 +177,16 @@ def apply( find_difference_extra_relative_to_comparison(num_deaths.sum(), comparison='Nothing')).T ).iloc[0].unstack() - pc_deaths_averted = 100.0 * summarize( pd.DataFrame( find_difference_extra_relative_to_comparison(num_deaths.sum(), comparison='Nothing', scaled=True)).T ).iloc[0].unstack() num_deaths = compute_summary_statistics(num_deaths, central_measure='median') + results['num_deaths'] = num_deaths + results['num_deaths_averted'] = num_deaths_averted + results['pc_deaths_averted'] = pc_deaths_averted num_dalys = ( extract_results( @@ -170,46 +212,9 @@ def apply( num_dalys = compute_summary_statistics(num_dalys, central_measure='median') - counts_of_hsi_by_short_treatment_id = ( - extract_results( - results_folder, - module="tlo.methods.healthsystem.summary", - key="HSI_Event", - custom_generate_series=lambda _df: get_counts_of_hsi_by_short_treatment_id(_df, target_period_tuple), - do_scaling=True, - suspended_results_folder=suspended_folder, - autodiscover=True, - ) - .pipe(set_param_names_as_column_index_level_0, param_names=param_names) - .fillna(0.0) - .sort_index() - ).drop(EXCLUDED_HSIs, errors='ignore') - - counts_of_hsi_by_short_treatment_id = ( - compute_summary_statistics(counts_of_hsi_by_short_treatment_id, 'median') - ) - - results['counts_of_hsi_by_short_treatment_id'] = counts_of_hsi_by_short_treatment_id - - counts_of_hsi_by_period = ( - extract_results( - results_folder, - module="tlo.methods.healthsystem.summary", - key="HSI_Event", - custom_generate_series=lambda _df: get_num_hsi_by_period(_df), - do_scaling=True, - suspended_results_folder=suspended_folder, - autodiscover=True, - ) - .pipe(set_param_names_as_column_index_level_0, param_names=param_names) - .fillna(0.0) - .sort_index() - ).drop(EXCLUDED_HSIs, level=0, errors='ignore') - - counts_of_hsi_by_period = ( - compute_summary_statistics(counts_of_hsi_by_period, 'median') - ) - results['counts_of_hsi_by_period'] = counts_of_hsi_by_period + results['num_dalys'] = num_dalys + results['num_dalys_averted'] = num_dalys_averted + results['pc_dalys_averted'] = pc_dalys_averted return results diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py b/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py index 218e3f8efb..0ff9edbfea 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py @@ -22,6 +22,10 @@ APPOINTMENT_TYPE_PALETTE = list(plt.get_cmap("tab20").colors) + list(plt.get_cmap("Set2").colors) APPOINTMENT_TYPE_FIXED_COLORS = {"AccidentsandEmerg": "black"} +def make_graph_file_name(stub): + filename = stub.replace('*', '_star_').replace(' ', '_').lower() + return f"{filename}.png" + def get_color_by_appointment_type(appointment_types) -> dict: """Return a deterministic color map for appointment types.""" @@ -386,7 +390,8 @@ def plot_hsi_counts_by_period_for_draw( central = _plot["central"] lower = _plot["lower"] upper = _plot["upper"] - non_zero_mask = central.gt(0).any(axis=1) + periods_for_filtering = central.columns.difference(["2025-2025"], sort=False) + non_zero_mask = central.loc[:, periods_for_filtering].gt(0).any(axis=1) ordered_period_labels, display_period_labels = _get_sorted_period_labels_and_display_labels(period_labels) central = central.loc[non_zero_mask, ordered_period_labels] @@ -394,7 +399,7 @@ def plot_hsi_counts_by_period_for_draw( upper = upper.loc[non_zero_mask, ordered_period_labels] if central.empty: - raise ValueError(f"No non-zero treatment ids remain for draw '{draw}'.") + print(f"No non-zero treatment ids remain for draw '{draw}'.") x = np.arange(len(ordered_period_labels)) fig_width = max(10, min(1.2 * len(ordered_period_labels) + 4, 20)) @@ -441,31 +446,34 @@ def plot_hsi_counts_by_period_for_draw( return fig, ax -def plot_population_by_year(_df: pd.DataFrame, _dfbaseline: pd.DataFrame): - """Plot yearly central population values for all draws plus baseline.""" +def plot_population_by_year(_df: pd.DataFrame, _dfbaseline: pd.DataFrame | None = None): + """Plot yearly central population values for all draws, optionally with baseline.""" if not isinstance(_df.columns, pd.MultiIndex) or _df.columns.nlevels != 2: raise ValueError("_df columns must be a 2-level MultiIndex with levels for draw and stat.") - if not isinstance(_dfbaseline.columns, pd.MultiIndex) or _dfbaseline.columns.nlevels != 2: - raise ValueError("_dfbaseline columns must be a 2-level MultiIndex with levels for draw and stat.") stat_level_name = "stat" if "stat" in _df.columns.names else _df.columns.names[1] - baseline_draw_level_name = "draw" if "draw" in _dfbaseline.columns.names else _dfbaseline.columns.names[0] available_stats = pd.Index(_df.columns.get_level_values(stat_level_name).unique()) if "central" not in available_stats: raise ValueError(f"Statistic 'central' not found. Available stats: {available_stats.tolist()}") - baseline_draws = pd.Index(_dfbaseline.columns.get_level_values(baseline_draw_level_name).unique()) - if "Nothing" not in baseline_draws: - raise ValueError(f"Baseline draw 'Nothing' not found. Available baseline draws: {baseline_draws.tolist()}") - implementation_central = _df.xs("central", axis=1, level=stat_level_name).copy() - baseline_central = _dfbaseline["Nothing"].loc[:, ["central"]].copy() - implementation_central.columns = implementation_central.columns.to_series().str.replace(r"_\*$", "", regex=True) - baseline_central.columns = pd.Index(["Nothing"]) - _plot = pd.concat([baseline_central, implementation_central], axis=1) + if _dfbaseline is None: + _plot = implementation_central + else: + if not isinstance(_dfbaseline.columns, pd.MultiIndex) or _dfbaseline.columns.nlevels != 2: + raise ValueError("_dfbaseline columns must be a 2-level MultiIndex with levels for draw and stat.") + baseline_draw_level_name = "draw" if "draw" in _dfbaseline.columns.names else _dfbaseline.columns.names[0] + baseline_draws = pd.Index(_dfbaseline.columns.get_level_values(baseline_draw_level_name).unique()) + if "Nothing" not in baseline_draws: + raise ValueError(f"Baseline draw 'Nothing' not found. Available baseline draws: {baseline_draws.tolist()}") + + baseline_central = _dfbaseline["Nothing"].loc[:, ["central"]].copy() + baseline_central.columns = pd.Index(["Nothing"]) + _plot = pd.concat([baseline_central, implementation_central], axis=1) + _plot = _plot.loc[:, ~_plot.columns.duplicated()] _plot = _plot.sort_index() diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py index 03202c8ce7..3e9c752b5e 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py @@ -14,6 +14,7 @@ target_period, ) from scripts.lcoa_inputs_from_tlo_analyses.fig_utils import ( + make_graph_file_name, do_bar_plot_with_ci, plot_deaths_by_period_for_cause, plot_hsi_counts_by_period_for_draw, @@ -21,8 +22,8 @@ ) from tlo import Date -TARGET_PERIOD = (Date(2026, 1, 1), Date(2041, 1, 1)) -PERIOD_LENGTH_YEARS_FOR_BAR_PLOTS = 5 +TARGET_PERIOD = (Date(2025, 1, 1), Date(2041, 1, 1)) +PERIOD_LENGTH_YEARS_FOR_BAR_PLOTS = 1 def load_results_files(results_files: list[Path]) -> dict[Path, dict]: @@ -35,10 +36,6 @@ def load_results_files(results_files: list[Path]) -> dict[Path, dict]: def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path = None): """Produce standard plots describing effect of each TREATMENT_ID.""" - def make_graph_file_name(stub): - filename = stub.replace('*', '_star_').replace(' ', '_').lower() - return output_folder / f"{filename}.png" - param_names = get_parameter_names_from_scenario_file() period_labels_for_bar_plots = [ @@ -56,22 +53,24 @@ def make_graph_file_name(stub): counts_of_hsi_in_implementation_period = all_results[results_files[1]]['counts_of_hsi_by_short_treatment_id'] - result_df = pd.DataFrame([ - {'treatment_id_included': draw, 'nonzero_hsis': treatment_id} - for draw in counts_of_hsi_in_implementation_period.columns.get_level_values(0).unique() - for treatment_id in ((counts_of_hsi_in_implementation_period[draw] != 0).any(axis=1))[(counts_of_hsi_in_implementation_period[draw] != 0).any(axis=1)].index - ]) - result_df['treatment_id_included'] = result_df['treatment_id_included'].str.replace('_\\*$', '', regex=True) - #133 rows here; - #result_df[result_df['treatment_id_included'] != result_df['nonzero_hsis']] - - - # Plot number of HSIs for each draw dropping the aggregate over the entire period counts_of_hsi_in_baseline = all_results[results_files[0]]['counts_of_hsi_by_period'] counts_of_hsi_in_baseline = counts_of_hsi_in_baseline.drop(['2010-2025'], level=1) counts_of_hsi_in_implementation_period = all_results[results_files[1]]['counts_of_hsi_by_period'] - counts_of_hsi_in_implementation_period = counts_of_hsi_in_implementation_period.drop(['2026-2041'], level=1) + counts_of_hsi_in_implementation_period = counts_of_hsi_in_implementation_period.drop(['2025-2041'], level=1) + # Values for the year 2025 have been logged in the implementation period; + # remove them from here and add them to the baseline dataframe. + x = counts_of_hsi_in_implementation_period['Nothing'] + nothing_hsis_in_2025 = x.xs('2025-2025', level = 'period') + nothing_hsis_in_2025 = pd.concat({"2025-2025": nothing_hsis_in_2025}, names=["period"]).reorder_levels(["appt_type", "period"]) + nothing_hsis_in_2025.columns = pd.MultiIndex.from_tuples( + [("Nothing", col) for col in nothing_hsis_in_2025.columns], + names=["draw", "stat"] + ) + counts_of_hsi_in_baseline = pd.concat([counts_of_hsi_in_baseline, nothing_hsis_in_2025], axis=0).sort_index() + # now we can safely drop 2025-2025 from the implementation period dataframe + counts_of_hsi_in_implementation_period = counts_of_hsi_in_implementation_period.drop('2025-2025', level=1) + result_df_by_period = pd.DataFrame([ {'treatment_id_included': draw, 'nonzero_hsis': treatment_id, 'period': period} for draw in counts_of_hsi_in_implementation_period.columns.get_level_values(0).unique() @@ -86,6 +85,8 @@ def make_graph_file_name(stub): ) for param in param_names: + if param == "Nothing": + continue draw = format_scenario_name(param) print(f"Plotting HSI counts for {draw}...") name_of_plot = f"Yearly HSI counts for {draw}" @@ -95,7 +96,8 @@ def make_graph_file_name(stub): counts_of_hsi_in_baseline, ) ax.set_title(name_of_plot) - fig.savefig(make_graph_file_name(name_of_plot)) + outfile = os.path.join(output_folder, make_graph_file_name(name_of_plot)) + fig.savefig(outfile) plt.close(fig) # Plot population growth @@ -109,13 +111,13 @@ def make_graph_file_name(stub): plt.close(fig) # Plot number of deaths and DALYS by cause for each parameter, with confidence intervals, for the target period - num_deaths_by_cause_label = results['num_deaths'] - deaths_averted = results['deaths_averted'] - pc_deaths_averted = results['pc_deaths_averted'] + num_deaths_by_cause_label = all_results[results_files[1]]['num_deaths'] + deaths_averted = all_results[results_files[1]]['num_deaths_averted'] + pc_deaths_averted = all_results[results_files[1]]['pc_deaths_averted'] - num_dalys_by_cause_label = results['num_dalys'] - dalys_averted = results['dalys_averted'] - pc_dalys_averted = results['pc_dalys_averted'] + num_dalys_by_cause_label = all_results[results_files[1]]['num_dalys'] + dalys_averted = all_results[results_files[1]]['num_dalys_averted'] + pc_dalys_averted = all_results[results_files[1]]['pc_dalys_averted'] for param in param_names: param_formatted = format_scenario_name(param) @@ -134,7 +136,8 @@ def make_graph_file_name(stub): ax.spines["top"].set_visible(False) ax.spines["right"].set_visible(False) fig.tight_layout() - fig.savefig(make_graph_file_name(name_of_plot.replace(" ", "_"))) + outfile = os.path.join(output_folder, make_graph_file_name(name_of_plot)) + fig.savefig(outfile) plt.close(fig) fig, ax = plt.subplots() @@ -152,7 +155,8 @@ def make_graph_file_name(stub): ax.spines["top"].set_visible(False) ax.spines["right"].set_visible(False) fig.tight_layout() - fig.savefig(make_graph_file_name(name_of_plot.replace(" ", "_"))) + outfile = os.path.join(output_folder, make_graph_file_name(name_of_plot)) + fig.savefig(outfile) plt.close(fig) cause_labels = num_deaths_by_cause_label.index.get_level_values("label").unique() @@ -161,7 +165,8 @@ def make_graph_file_name(stub): name_of_plot = f"Deaths Over Time for {cause_label}" ax.set_title(name_of_plot) ax.set_ylabel("Number of deaths (/1000)") - fig.savefig(make_graph_file_name(name_of_plot.replace(" ", "_"))) + outfile = os.path.join(output_folder, make_graph_file_name(name_of_plot)) + fig.savefig(outfile) plt.close(fig) # Plot cost of each scenario, with confidence intervals, for the target period @@ -170,7 +175,7 @@ def make_graph_file_name(stub): if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("results_files", type=Path, nargs="+") - parser.add_argument("--output-folder", type=Path, required=True) + parser.add_argument("--output_folder", type=Path, required=True) args = parser.parse_args() apply(results_files=args.results_files, output_folder=args.output_folder, resourcefilepath=Path("./resources")) diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py index 1d0314d5dd..64e5c79639 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py @@ -98,6 +98,7 @@ def _get_scenarios(self) -> Dict[str, Dict]: # Generate list of TREATMENT_IDs and filter to the resolution needed treatments = get_filtered_treatment_ids(depth=None) + # Return 'Service_Availability' values, with scenarios for nothing, and ones for which all but one # treatment is omitted service_availability = dict({"Nothing": []}) From 0be5afc4b67246aceac6b8e8127a0625fa257d04 Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Tue, 31 Mar 2026 10:58:42 +0100 Subject: [PATCH 35/55] More figure edits --- .../fig_utils.py | 4 +-- .../figures_effect_of_treatment_ids.py | 25 +++++++++++-------- .../results_processing_utils.py | 10 +++++--- 3 files changed, 24 insertions(+), 15 deletions(-) diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py b/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py index 0ff9edbfea..9e906650e1 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py @@ -111,8 +111,8 @@ def plot_deaths_by_period_for_cause( _plot.index.name = period_level_name try: - ordered_periods = pd.Index(_plot.index).astype(make_calendar_period_type()) - _plot = _plot.reindex(ordered_periods.sort_values().astype(str)) + ordered_period_labels, display_period_labels = _get_sorted_period_labels_and_display_labels(_plot.index) + _plot = _plot.reindex(ordered_period_labels) except (TypeError, ValueError): _plot = _plot.loc[pd.Index(_plot.index).drop_duplicates()] diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py index 3e9c752b5e..dbf846d40b 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py @@ -111,7 +111,7 @@ def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path plt.close(fig) # Plot number of deaths and DALYS by cause for each parameter, with confidence intervals, for the target period - num_deaths_by_cause_label = all_results[results_files[1]]['num_deaths'] + deaths_averted = all_results[results_files[1]]['num_deaths_averted'] pc_deaths_averted = all_results[results_files[1]]['pc_deaths_averted'] @@ -119,6 +119,20 @@ def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path dalys_averted = all_results[results_files[1]]['num_dalys_averted'] pc_dalys_averted = all_results[results_files[1]]['pc_dalys_averted'] + num_deaths_by_cause_label_baseline = all_results[results_files[0]]['num_deaths'].drop(['2010-2025'], level=1) + num_deaths_by_cause_label_implementation = all_results[results_files[1]]['num_deaths'].drop(['2025-2041'], level=1) + + cause_labels = num_deaths_by_cause_label.index.get_level_values("label").unique() + for cause_label in cause_labels: + fig, ax = plot_deaths_by_period_for_cause(num_deaths_by_cause_label / 1e3, cause_label=cause_label) + name_of_plot = f"Deaths Over Time for {cause_label}" + ax.set_title(name_of_plot) + ax.set_ylabel("Number of deaths (/1000)") + outfile = os.path.join(output_folder, make_graph_file_name(name_of_plot)) + fig.savefig(outfile) + plt.close(fig) + + for param in param_names: param_formatted = format_scenario_name(param) print(f"Plotting for {param_formatted}...") @@ -159,15 +173,6 @@ def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path fig.savefig(outfile) plt.close(fig) - cause_labels = num_deaths_by_cause_label.index.get_level_values("label").unique() - for cause_label in cause_labels: - fig, ax = plot_deaths_by_period_for_cause(num_deaths_by_cause_label / 1e3, cause_label=cause_label) - name_of_plot = f"Deaths Over Time for {cause_label}" - ax.set_title(name_of_plot) - ax.set_ylabel("Number of deaths (/1000)") - outfile = os.path.join(output_folder, make_graph_file_name(name_of_plot)) - fig.savefig(outfile) - plt.close(fig) # Plot cost of each scenario, with confidence intervals, for the target period diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/results_processing_utils.py b/src/scripts/lcoa_inputs_from_tlo_analyses/results_processing_utils.py index 06924dcd6d..aeb28e938a 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/results_processing_utils.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/results_processing_utils.py @@ -12,7 +12,7 @@ from tlo.analysis.utils import make_age_grp_types, summarize, to_age_group -TARGET_PERIOD = (Date(2026, 1, 1), Date(2041, 1, 1)) +TARGET_PERIOD = (Date(2025, 1, 1), Date(2041, 1, 1)) def find_difference_relative_to_comparison(_ser: pd.Series, comparison: str, @@ -66,18 +66,22 @@ def get_periods_within_target_period( def get_parameter_names_from_scenario_file() -> Tuple[str]: """Get tuple of scenario names from Scenario class used to create results.""" e = EffectOfEachTreatment() - return tuple(e._scenarios.keys()) + excluded = {"Only Hiv_Test_Selftest_*"} + # I think Hiv_test_Selftest has been added after I had submitted the draws, hence filtering it out. + return tuple(name for name in e._scenarios.keys() if name not in excluded) def format_scenario_name(_sn: str) -> str: """Return reformatted scenario name ready for plotting.""" if _sn == "Nothing": return "Nothing" - return _sn.lstrip("Only ") + else: + return _sn.removeprefix("Only ") def set_param_names_as_column_index_level_0(_df: pd.DataFrame, param_names: tuple[str, ...]) -> pd.DataFrame: """Set columns index level 0 as scenario param names.""" + ordered_param_names_no_prefix = {i: x for i, x in enumerate(param_names)} names_of_cols_level0 = [ordered_param_names_no_prefix.get(col) for col in _df.columns.levels[0]] assert len(names_of_cols_level0) == len(_df.columns.levels[0]) From 38369b77f8302ce31325d1ecd07d7b92b61fc230 Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Wed, 1 Apr 2026 14:06:23 +0100 Subject: [PATCH 36/55] Scenario without full consumables availability and no improved healthcare seeking --- .../analysis_effect_of_treatment_ids.py | 6 +- .../fig_utils.py | 52 ++++++-- .../figures_effect_of_treatment_ids.py | 120 +++++++++++------- ...ario_effect_of_treatment_ids_no_suspend.py | 16 +-- 4 files changed, 128 insertions(+), 66 deletions(-) diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py index 80877241fc..1730994329 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py @@ -200,10 +200,10 @@ def apply( ).pipe(set_param_names_as_column_index_level_0, param_names=param_names) ) - num_dalys_averted = ( + num_dalys_averted = summarize( pd.DataFrame( - find_difference_extra_relative_to_comparison(num_dalys.sum(), comparison='Nothing') - ).T.iloc[0].unstack(level='run')) + find_difference_extra_relative_to_comparison(num_dalys.sum(), comparison='Nothing')).T + ).iloc[0].unstack() pc_dalys_averted = 100.0 * summarize( pd.DataFrame( diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py b/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py index 9e906650e1..a9c8e1dfde 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py @@ -23,7 +23,7 @@ APPOINTMENT_TYPE_FIXED_COLORS = {"AccidentsandEmerg": "black"} def make_graph_file_name(stub): - filename = stub.replace('*', '_star_').replace(' ', '_').lower() + filename = stub.replace('*', '_star_').replace(' ', '_').replace('/', '').lower() return f"{filename}.png" @@ -85,27 +85,60 @@ def plot_deaths_by_period_for_cause( _df: pd.DataFrame, cause_label: str, plot_stat: str = "central", + _dfbaseline: pd.DataFrame = None, ): """Plot deaths over time for a single cause, with one line per short treatment id.""" if not isinstance(_df.index, pd.MultiIndex) or _df.index.nlevels != 2: raise ValueError("_df index must be a 2-level MultiIndex with levels for label and period.") if not isinstance(_df.columns, pd.MultiIndex) or _df.columns.nlevels != 2: raise ValueError("_df columns must be a 2-level MultiIndex with levels for treatment id and stat.") + if _dfbaseline is None: + raise ValueError("_dfbaseline is required.") + if not isinstance(_dfbaseline.index, pd.MultiIndex) or _dfbaseline.index.nlevels != 2: + raise ValueError("_dfbaseline index must be a 2-level MultiIndex with levels for label and period.") + if not isinstance(_dfbaseline.columns, pd.MultiIndex) or _dfbaseline.columns.nlevels != 2: + raise ValueError("_dfbaseline columns must be a 2-level MultiIndex with levels for draw and stat.") label_level_name = "label" if "label" in _df.index.names else _df.index.names[0] period_level_name = "period" if "period" in _df.index.names else _df.index.names[1] stat_level_name = "stat" if "stat" in _df.columns.names else _df.columns.names[1] + baseline_label_level_name = "label" if "label" in _dfbaseline.index.names else _dfbaseline.index.names[0] + baseline_stat_level_name = "stat" if "stat" in _dfbaseline.columns.names else _dfbaseline.columns.names[1] + baseline_draw_level_name = "draw" if "draw" in _dfbaseline.columns.names else _dfbaseline.columns.names[0] available_causes = pd.Index(_df.index.get_level_values(label_level_name).unique()) if cause_label not in available_causes: raise ValueError(f"Cause label '{cause_label}' not found. Available causes: {available_causes.tolist()}") + available_baseline_causes = pd.Index(_dfbaseline.index.get_level_values(baseline_label_level_name).unique()) + if cause_label not in available_baseline_causes: + raise ValueError( + f"Cause label '{cause_label}' not found in _dfbaseline. " + f"Available causes: {available_baseline_causes.tolist()}" + ) available_stats = pd.Index(_df.columns.get_level_values(stat_level_name).unique()) if plot_stat not in available_stats: raise ValueError(f"Statistic '{plot_stat}' not found. Available stats: {available_stats.tolist()}") + available_baseline_stats = pd.Index(_dfbaseline.columns.get_level_values(baseline_stat_level_name).unique()) + if plot_stat not in available_baseline_stats: + raise ValueError( + f"Statistic '{plot_stat}' not found in _dfbaseline. " + f"Available stats: {available_baseline_stats.tolist()}" + ) + available_baseline_draws = pd.Index(_dfbaseline.columns.get_level_values(baseline_draw_level_name).unique()) + if "Nothing" not in available_baseline_draws: + raise ValueError( + f"Draw 'Nothing' not found in _dfbaseline. Available draws: {available_baseline_draws.tolist()}" + ) - _plot = _df.xs(cause_label, level=label_level_name) - _plot = _plot.xs(plot_stat, axis=1, level=stat_level_name) + _plot = _df.xs(cause_label, level=label_level_name).xs(plot_stat, axis=1, level=stat_level_name) + _plot_baseline = ( + _dfbaseline["Nothing"] + .xs(cause_label, level=baseline_label_level_name) + .loc[:, [plot_stat]] + .rename(columns={plot_stat: "Nothing"}) + ) + _plot = pd.concat([_plot_baseline, _plot]) if _plot.empty: raise ValueError(f"No plottable data remain for cause '{cause_label}' using stat '{plot_stat}'.") @@ -136,7 +169,7 @@ def plot_deaths_by_period_for_cause( ) ax.set_xticks(x) - ax.set_xticklabels([str(period) for period in _plot.index], rotation=45, ha="right") + ax.set_xticklabels(display_period_labels, rotation=45, ha="right") ax.set_xlabel("Period") ax.set_ylabel("Number of deaths") ax.set_title(str(cause_label)) @@ -151,8 +184,6 @@ def plot_deaths_by_period_for_cause( title_fontsize=9, frameon=True, ) - - fig.tight_layout() return fig, ax @@ -276,7 +307,13 @@ def plot_multiindex_dot_with_interval( def do_barh_plot_with_ci(_df: pd.DataFrame, _ax): """Make horizontal bar plot for each treatment id.""" errors = pd.concat([_df["mean"] - _df["lower"], _df["upper"] - _df["mean"]], axis=1).T.to_numpy() - _df.plot.barh(ax=_ax, y="mean", xerr=errors, legend=False, color=[get_color_short_treatment_id(_id) for _id in _df.index]) + _df.plot.barh( + ax=_ax, + y="mean", + xerr=errors, + legend=False, + color=[_get_short_treatment_id_and_color(_id)[1] for _id in _df.index], + ) def do_label_barh_plot(_df: pd.DataFrame, _ax): @@ -369,7 +406,6 @@ def plot_hsi_counts_by_period_for_draw( # only want to compare the number of Hiv_Treament HSIs until 2025 and during the implementation period _dfbaseline = _dfbaseline['Nothing'] # because baseline was run only for Nothing scenario treatment_id_of_interest = draw.replace("_*", "") - print(f"Filtering baseline to treatment id of interest: '{treatment_id_of_interest}'") _dfbaseline = _dfbaseline[_dfbaseline.index.get_level_values(0) == treatment_id_of_interest] _df = pd.concat([_df[draw], _dfbaseline]) diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py index dbf846d40b..0ae03daef0 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py @@ -15,6 +15,7 @@ ) from scripts.lcoa_inputs_from_tlo_analyses.fig_utils import ( make_graph_file_name, + do_barh_plot_with_ci, do_bar_plot_with_ci, plot_deaths_by_period_for_cause, plot_hsi_counts_by_period_for_draw, @@ -88,7 +89,6 @@ def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path if param == "Nothing": continue draw = format_scenario_name(param) - print(f"Plotting HSI counts for {draw}...") name_of_plot = f"Yearly HSI counts for {draw}" fig, ax = plot_hsi_counts_by_period_for_draw( counts_of_hsi_in_implementation_period, @@ -112,19 +112,22 @@ def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path # Plot number of deaths and DALYS by cause for each parameter, with confidence intervals, for the target period - deaths_averted = all_results[results_files[1]]['num_deaths_averted'] - pc_deaths_averted = all_results[results_files[1]]['pc_deaths_averted'] - num_dalys_by_cause_label = all_results[results_files[1]]['num_dalys'] - dalys_averted = all_results[results_files[1]]['num_dalys_averted'] pc_dalys_averted = all_results[results_files[1]]['pc_dalys_averted'] + num_dalys_by_cause_label_baseline = all_results[results_files[0]]['num_dalys'].drop(['2010-2025'], level=1) + num_dalys_by_cause_label_implementation = all_results[results_files[1]]['num_dalys'].drop(['2025-2041'], level=1) + num_deaths_by_cause_label_baseline = all_results[results_files[0]]['num_deaths'].drop(['2010-2025'], level=1) num_deaths_by_cause_label_implementation = all_results[results_files[1]]['num_deaths'].drop(['2025-2041'], level=1) - cause_labels = num_deaths_by_cause_label.index.get_level_values("label").unique() + cause_labels = num_deaths_by_cause_label_implementation.index.get_level_values("label").unique() for cause_label in cause_labels: - fig, ax = plot_deaths_by_period_for_cause(num_deaths_by_cause_label / 1e3, cause_label=cause_label) + fig, ax = plot_deaths_by_period_for_cause( + num_deaths_by_cause_label_implementation / 1e3, + cause_label=cause_label, + _dfbaseline=num_deaths_by_cause_label_baseline / 1e3, + ) name_of_plot = f"Deaths Over Time for {cause_label}" ax.set_title(name_of_plot) ax.set_ylabel("Number of deaths (/1000)") @@ -132,50 +135,81 @@ def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path fig.savefig(outfile) plt.close(fig) - - for param in param_names: - param_formatted = format_scenario_name(param) - print(f"Plotting for {param_formatted}...") - fig, ax = plt.subplots() - name_of_plot = f"Deaths With {param_formatted}, {target_period_label}" - do_bar_plot_with_ci(num_deaths_by_cause_label / 1e3, param_formatted, ax, period_labels_for_bar_plots, target_period_label) - legend = ax.get_legend() - if legend is not None: - legend.remove() + fig, ax = plot_deaths_by_period_for_cause( + num_dalys_by_cause_label_implementation / 1e3, + cause_label=cause_label, + _dfbaseline=num_dalys_by_cause_label_baseline / 1e3, + ) + name_of_plot = f"DALYs Over Time for {cause_label}" ax.set_title(name_of_plot) - ax.set_xlabel("Cause of Death") - ax.set_ylabel("Number of Deaths (/1000)") - #ax.set_ylim(0, 500) - ax.grid(axis="y") - ax.spines["top"].set_visible(False) - ax.spines["right"].set_visible(False) - fig.tight_layout() + ax.set_ylabel("Number of DALYs (/1000)") outfile = os.path.join(output_folder, make_graph_file_name(name_of_plot)) fig.savefig(outfile) plt.close(fig) - fig, ax = plt.subplots() - name_of_plot = f"DALYS With {param_formatted}, {target_period_label}" - do_bar_plot_with_ci(num_dalys_by_cause_label / 1e6, param_formatted, ax, period_labels_for_bar_plots, target_period_label) - legend = ax.get_legend() - if legend is not None: - legend.remove() - ax.set_title(name_of_plot) - ax.set_xlabel("Cause of Disability/Death") - ax.set_ylabel("Number of DALYS (/millions)") - #ax.set_ylim(0, 30) - ##ax.set_yticks(np.arange(0, 35, 5)) - ax.grid(axis="y") - ax.spines["top"].set_visible(False) - ax.spines["right"].set_visible(False) - fig.tight_layout() - outfile = os.path.join(output_folder, make_graph_file_name(name_of_plot)) - fig.savefig(outfile) - plt.close(fig) + deaths_averted = all_results[results_files[1]]['num_deaths_averted'] + deaths_averted_sorted = (deaths_averted.sort_values(by="mean", ascending=True) / 1e3) + fig_height = max(6, min(0.28 * len(deaths_averted_sorted.index) + 4, 18)) + fig, ax = plt.subplots(figsize=(10, fig_height)) + name_of_plot = "Deaths Averted by Each Treatment ID" + do_barh_plot_with_ci(deaths_averted_sorted, ax) + ax.set_title(name_of_plot) + ax.set_xlabel("Number of deaths averted (/1000)") + ax.grid(axis="x") + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) + outfile = os.path.join(output_folder, make_graph_file_name(name_of_plot)) + fig.tight_layout() + fig.savefig(outfile) + plt.close(fig) + dalys_averted = all_results[results_files[1]]['num_dalys_averted'] + dalys_averted_sorted = (dalys_averted.sort_values(by="mean", ascending=True) / 1e3) + fig_height = max(6, min(0.28 * len(dalys_averted_sorted.index) + 4, 18)) + fig, ax = plt.subplots(figsize=(10, fig_height)) + name_of_plot = "DALYS Averted by Each Treatment ID" + do_barh_plot_with_ci(dalys_averted_sorted, ax) + ax.set_title(name_of_plot) + ax.set_xlabel("DALYs averted (/1000)") + ax.grid(axis="x") + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) + outfile = os.path.join(output_folder, make_graph_file_name(name_of_plot)) + fig.tight_layout() + fig.savefig(outfile) + plt.close(fig) - # Plot cost of each scenario, with confidence intervals, for the target period + pc_deaths_averted = all_results[results_files[1]]['pc_deaths_averted'] + pc_deaths_averted_sorted = (pc_deaths_averted.sort_values(by="mean", ascending=True)) + fig_height = max(6, min(0.28 * len(pc_deaths_averted_sorted.index) + 4, 18)) + fig, ax = plt.subplots(figsize=(10, fig_height)) + name_of_plot = "Percentage Deaths Averted by Each Treatment ID" + do_barh_plot_with_ci(pc_deaths_averted_sorted, ax) + ax.set_title(name_of_plot) + ax.set_xlabel("Percentage of deaths averted") + ax.grid(axis="x") + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) + outfile = os.path.join(output_folder, make_graph_file_name(name_of_plot)) + fig.tight_layout() + fig.savefig(outfile) + plt.close(fig) + pc_dalys_averted = all_results[results_files[1]]['pc_dalys_averted'] + pc_dalys_averted_sorted = (pc_dalys_averted.sort_values(by="mean", ascending=True)) + fig_height = max(6, min(0.28 * len(pc_dalys_averted_sorted.index) + 4, 18)) + fig, ax = plt.subplots(figsize=(10, fig_height)) + name_of_plot = "Percentage DALYs Averted by Each Treatment ID" + do_barh_plot_with_ci(pc_dalys_averted_sorted, ax) + ax.set_title(name_of_plot) + ax.set_xlabel("Percentage of DALYs averted") + ax.grid(axis="x") + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) + outfile = os.path.join(output_folder, make_graph_file_name(name_of_plot)) + fig.tight_layout() + fig.savefig(outfile) + plt.close(fig) if __name__ == "__main__": parser = argparse.ArgumentParser() diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids_no_suspend.py b/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids_no_suspend.py index 67c8d4f0ff..9c0c3b7c63 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids_no_suspend.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids_no_suspend.py @@ -27,7 +27,6 @@ from tlo import Date, logging from tlo.analysis.utils import mix_scenarios, get_parameters_for_status_quo from tlo.methods.fullmodel import fullmodel -from tlo.methods.scenario_switcher import ImprovedHealthSystemAndCareSeekingScenarioSwitcher from tlo.scenario import BaseScenario @@ -44,19 +43,12 @@ def baseline(self) -> Dict: { "HealthSystem": { "cons_availability": "default", - "year_cons_availability_switch": self.YEAR_OF_SERVICE_AVAILABILITY_SWITCH, - "cons_availability_postSwitch": "all", "mode_appt_constraints": 1, "year_service_availability_switch": self.YEAR_OF_SERVICE_AVAILABILITY_SWITCH, # allow historical HRH scaling to occur 2018-2024 # 'year_HR_scaling_by_level_and_officer_type': self.YEAR_OF_SERVICE_AVAILABILITY_SWITCH, "yearly_HR_scaling_mode": "historical_scaling", - }, - "ImprovedHealthSystemAndCareSeekingScenarioSwitcher": { - "max_healthsystem_function": [False, True], # <-- switch from False to True mid-way - "max_healthcare_seeking": [False, True], # <-- switch from False to True mid-way - "year_of_switch": self.YEAR_OF_SERVICE_AVAILABILITY_SWITCH, - }, + } }, ) @@ -66,7 +58,7 @@ def __init__(self): super().__init__() self.seed = 0 self.start_date = Date(2010, 1, 1) - self.end_date = Date(2031, 1, 1) + self.end_date = Date(2041, 1, 1) self.pop_size = 50_000 self._scenarios = self._get_scenarios() self.number_of_draws = len(self._scenarios) @@ -86,7 +78,7 @@ def log_configuration(self): } def modules(self): - return fullmodel() + [ImprovedHealthSystemAndCareSeekingScenarioSwitcher()] + return fullmodel() def draw_parameters(self, draw_number, rng): if draw_number < len(self._scenarios): @@ -97,7 +89,7 @@ def _get_scenarios(self) -> Dict[str, Dict]: The sequences of scenarios systematically omits all but one TREATMENT_ID that is defined in the model.""" # Generate list of TREATMENT_IDs and filter to the resolution needed - treatments = ["Hiv_Treatment_*"] + treatments = ["Epilepsy_Treatment_Start_*"] # Return 'Service_Availability' values, with scenarios for nothing, and ones for which all but one # treatment is omitted service_availability = dict() From abfba20a508c72b89d73574f1c1e2d7226094961 Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Thu, 2 Apr 2026 16:18:55 +0100 Subject: [PATCH 37/55] Combine pickle files from the two runs --- .../analysis_effect_of_treatment_ids.py | 22 ++- .../combine_suspended_and_resumed_pickles.py | 132 ++++++++++++++++++ .../fig_utils.py | 87 ++++++++++++ .../figures_effect_of_treatment_ids.py | 18 ++- .../results_processing_utils.py | 34 ++--- 5 files changed, 259 insertions(+), 34 deletions(-) create mode 100644 src/scripts/lcoa_inputs_from_tlo_analyses/combine_suspended_and_resumed_pickles.py diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py index 1730994329..b7b7f5607a 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py @@ -59,7 +59,8 @@ ) # python src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py outputs/s.bhatia@imperial.ac.uk/effect_of_each_treatment_id-2026-02-12T120859Z figs/ --target-start=2010-01-01 --target-end=2025-12-31 # python src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py outputs/s.bhatia@imperial.ac.uk/effect_of_each_treatment_id-2026-02-16T154500Z figs/ --target-start=2025-01-01 --target-end=2041-01-01 -TARGET_PERIOD = (Date(2025, 1, 1), Date(2041, 1, 1)) +# python src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py outputs/s.bhatia@imperial.ac.uk/effect_of_each_treatment_id-combined --target-start=2010-01-01 --target-end=2041-01-01 + PERIOD_LENGTH_YEARS_FOR_BAR_PLOTS = 1 suspended_folder = Path("outputs/s.bhatia@imperial.ac.uk/effect_of_each_treatment_id-2026-02-12T120859Z") results_folder = Path("outputs/s.bhatia@imperial.ac.uk/effect_of_each_treatment_id-2026-02-16T154500Z") @@ -81,8 +82,8 @@ def parse_iso_date(value: str) -> Date: def apply( results_folder: Path, output_folder: Path, - resourcefilepath: Path = None, - target_period_tuple: tuple[Date, Date] = TARGET_PERIOD, + resourcefilepath: Path, + target_period_tuple: tuple[Date, Date] ): """Produce standard plots describing effect of each TREATMENT_ID.""" _, age_grp_lookup = make_age_grp_lookup() @@ -230,15 +231,12 @@ def apply( if (args.target_start is None) != (args.target_end is None): parser.error("Provide both --target-start and --target-end, or neither.") - if args.target_start is None: - target_period_tuple = TARGET_PERIOD - else: - target_period_tuple = ( - parse_iso_date(args.target_start), - parse_iso_date(args.target_end), - ) - if not target_period_tuple[0] < target_period_tuple[1]: - parser.error("--target-start must be earlier than --target-end.") + target_period_tuple = ( + parse_iso_date(args.target_start), + parse_iso_date(args.target_end), + ) + if not target_period_tuple[0] < target_period_tuple[1]: + parser.error("--target-start must be earlier than --target-end.") out = args.output_folder if args.output_folder is not None else args.results_folder results = apply( diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/combine_suspended_and_resumed_pickles.py b/src/scripts/lcoa_inputs_from_tlo_analyses/combine_suspended_and_resumed_pickles.py new file mode 100644 index 0000000000..530f449e64 --- /dev/null +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/combine_suspended_and_resumed_pickles.py @@ -0,0 +1,132 @@ +"""CLI helper to combine suspended and resumed pickle outputs.""" + +# python src/scripts/lcoa_inputs_from_tlo_analyses/combine_suspended_and_resumed_pickles.py --suspended_results_folder outputs/s.bhatia@imperial.ac.uk/effect_of_each_treatment_id-2026-02-12T120859Z --resumed_results_folder outputs/s.bhatia@imperial.ac.uk/effect_of_each_treatment_id-2026-02-16T154500Z_folder --output_folder outputs/s.bhatia@imperial.ac.uk/effect_of_each_treatment_id-combined + + +import argparse +import pickle +import warnings +from pathlib import Path +from typing import Any + +import pandas as pd + +def _validate_input_output_paths( + suspended_results_folder: Path, + resumed_results_folder: Path, + output_folder: Path, +) -> None: + """Validate input/output path constraints for pickle combination helper.""" + suspended_resolved = suspended_results_folder.resolve() + resumed_resolved = resumed_results_folder.resolve() + output_resolved = output_folder.resolve() + + if output_resolved == suspended_resolved or output_resolved == resumed_resolved: + raise ValueError( + "output_folder must be different from both suspended_results_folder and resumed_results_folder." + ) + +def _combine_pickled_objects(suspended_obj: Any, resumed_obj: Any, context: str = "root") -> Any: + """Combine suspended and resumed objects with suspended object first.""" + if suspended_obj is None and resumed_obj is None: + return None + if isinstance(suspended_obj, dict) and isinstance(resumed_obj, dict): + combined = {} + for key, suspended_value in suspended_obj.items(): + if key in resumed_obj: + combined[key] = _combine_pickled_objects( + suspended_value, resumed_obj[key], context=f"{context}.{key}" + ) + else: + combined[key] = suspended_value + for key, resumed_value in resumed_obj.items(): + if key not in combined: + combined[key] = resumed_value + return combined + if isinstance(suspended_obj, pd.DataFrame) and isinstance(resumed_obj, pd.DataFrame): + return pd.concat([suspended_obj, resumed_obj], axis=0) + if isinstance(suspended_obj, pd.Series) and isinstance(resumed_obj, pd.Series): + return pd.concat([suspended_obj, resumed_obj], axis=0) + if isinstance(suspended_obj, list) and isinstance(resumed_obj, list): + return suspended_obj + resumed_obj + if isinstance(suspended_obj, tuple) and isinstance(resumed_obj, tuple): + return suspended_obj + resumed_obj + try: + return suspended_obj + resumed_obj + except TypeError as exc: + raise TypeError( + f"Unsupported combine operation at {context}: " + f"{type(suspended_obj).__name__} and {type(resumed_obj).__name__}." + ) from exc + + +def combine_suspended_and_resumed_pickles( + suspended_results_folder: Path, + resumed_results_folder: Path, + output_folder: Path, +) -> None: + """Combine corresponding suspended and resumed pickles into output folder.""" + _validate_input_output_paths(suspended_results_folder, resumed_results_folder, output_folder) + + draw_dirs = sorted([p for p in resumed_results_folder.iterdir() if p.is_dir()], key=lambda p: p.name) + for draw_dir in draw_dirs: + print(f"Processing draw directory: {draw_dir}...") + run_dirs = sorted([p for p in draw_dir.iterdir() if p.is_dir()], key=lambda p: p.name) + for run_dir in run_dirs: + print(f" Processing run directory: {run_dir}...") + pickles = sorted(run_dir.glob("*.pickle"), key=lambda p: p.name) + for resumed_pickle_path in pickles: + print(f" Processing pickle file: {resumed_pickle_path}...") + with resumed_pickle_path.open("rb") as resumed_file: + resumed_obj = pickle.load(resumed_file) + + suspended_pickle_path = ( + suspended_results_folder / "0" / run_dir.name / resumed_pickle_path.name + ) + if suspended_pickle_path.exists(): + with suspended_pickle_path.open("rb") as suspended_file: + suspended_obj = pickle.load(suspended_file) + try: + combined_obj = _combine_pickled_objects(suspended_obj, resumed_obj) + except TypeError as exc: + raise TypeError( + "Could not combine pickled objects for " + f"{resumed_pickle_path} with types " + f"{type(suspended_obj).__name__} and {type(resumed_obj).__name__}." + ) from exc + else: + warnings.warn( + "No suspended counterpart found for " + f"{resumed_pickle_path} (expected at {suspended_pickle_path}); " + "copying resumed object to output unchanged.", + stacklevel=2, + ) + combined_obj = resumed_obj + + output_pickle_path = output_folder / draw_dir.name / run_dir.name / resumed_pickle_path.name + output_pickle_path.parent.mkdir(parents=True, exist_ok=True) + with output_pickle_path.open("wb") as output_file: + pickle.dump(combined_obj, output_file) + + +def main() -> None: + parser = argparse.ArgumentParser( + description=( + "Combine suspended and resumed pickle outputs into a new output folder, " + "with suspended content prepended where counterparts exist." + ) + ) + parser.add_argument("suspended_results_folder", type=Path) + parser.add_argument("resumed_results_folder", type=Path) + parser.add_argument("output_folder", type=Path) + args = parser.parse_args() + + combine_suspended_and_resumed_pickles( + suspended_results_folder=args.suspended_results_folder, + resumed_results_folder=args.resumed_results_folder, + output_folder=args.output_folder, + ) + + +if __name__ == "__main__": + main() diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py b/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py index a9c8e1dfde..1cc67a1337 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py @@ -187,6 +187,93 @@ def plot_deaths_by_period_for_cause( return fig, ax +def plot_deaths_by_period_for_draw( + _df: pd.DataFrame, + draw: str, + plot_stat: str = "central", + _dfbaseline: pd.DataFrame = None, +): + """Plot deaths over time for a single draw, with one line per cause label.""" + if not isinstance(_df.index, pd.MultiIndex) or _df.index.nlevels != 2: + raise ValueError("_df index must be a 2-level MultiIndex with levels for label and period.") + if not isinstance(_df.columns, pd.MultiIndex) or _df.columns.nlevels != 2: + raise ValueError("_df columns must be a 2-level MultiIndex with levels for draw and stat.") + if _dfbaseline is None: + raise ValueError("_dfbaseline is required.") + if not isinstance(_dfbaseline.index, pd.MultiIndex) or _dfbaseline.index.nlevels != 2: + raise ValueError("_dfbaseline index must be a 2-level MultiIndex with levels for label and period.") + if not isinstance(_dfbaseline.columns, pd.MultiIndex) or _dfbaseline.columns.nlevels != 2: + raise ValueError("_dfbaseline columns must be a 2-level MultiIndex with levels for draw and stat.") + + label_level_name = "label" if "label" in _df.index.names else _df.index.names[0] + period_level_name = "period" if "period" in _df.index.names else _df.index.names[1] + draw_level_name = "draw" if "draw" in _df.columns.names else _df.columns.names[0] + stat_level_name = "stat" if "stat" in _df.columns.names else _df.columns.names[1] + baseline_stat_level_name = "stat" if "stat" in _dfbaseline.columns.names else _dfbaseline.columns.names[1] + baseline_draw_level_name = "draw" if "draw" in _dfbaseline.columns.names else _dfbaseline.columns.names[0] + + available_draws = pd.Index(_df.columns.get_level_values(draw_level_name).unique()) + if draw not in available_draws: + raise ValueError(f"Draw '{draw}' not found. Available draws: {available_draws.tolist()}") + available_stats = pd.Index(_df.columns.get_level_values(stat_level_name).unique()) + if plot_stat not in available_stats: + raise ValueError(f"Statistic '{plot_stat}' not found. Available stats: {available_stats.tolist()}") + available_baseline_stats = pd.Index(_dfbaseline.columns.get_level_values(baseline_stat_level_name).unique()) + if plot_stat not in available_baseline_stats: + raise ValueError( + f"Statistic '{plot_stat}' not found in _dfbaseline. " + f"Available stats: {available_baseline_stats.tolist()}" + ) + available_baseline_draws = pd.Index(_dfbaseline.columns.get_level_values(baseline_draw_level_name).unique()) + if "Nothing" not in available_baseline_draws: + raise ValueError( + f"Draw 'Nothing' not found in _dfbaseline. Available draws: {available_baseline_draws.tolist()}" + ) + + _plot_baseline = _dfbaseline["Nothing"].loc[:, [plot_stat]] + _plot_implementation = _df[draw].loc[:, [plot_stat]] + _plot = pd.concat([_plot_baseline, _plot_implementation]) + if _plot.empty: + raise ValueError(f"No plottable data remain for draw '{draw}' using stat '{plot_stat}'.") + + periods = _plot.index.get_level_values(1).unique() + ordered_period_labels, display_period_labels = _get_sorted_period_labels_and_display_labels(periods) + + fig_width = max(10, min(1.4 * len(_plot.index) + 4, 18)) + fig, ax = plt.subplots(figsize=(fig_width, 6)) + x = np.arange(len(periods)) + + for cause_label in CAUSE_OF_DEATH_OR_DALY_LABEL_TO_COLOR_MAP.keys(): + print(f"************ {cause_label} *************") + ax.plot( + x, + _plot.xs(cause_label, level='label').to_numpy(), + marker="o", + linewidth=1.8, + markersize=4, + color=get_color_cause_of_death_or_daly_label(cause_label), + label=str(cause_label), + ) + + ax.set_xticks(x) + ax.set_xticklabels(display_period_labels, rotation=45, ha="right") + ax.set_xlabel("Period") + ax.set_ylabel("Number of deaths") + ax.set_title(str(draw)) + ax.grid(axis="y") + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) + ax.legend( + title="", + loc="center left", + bbox_to_anchor=(1.02, 0.5), + fontsize=8, + title_fontsize=9, + frameon=True, + ) + return fig, ax + + def do_bar_plot_with_ci( _df: pd.DataFrame, _param, diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py index 0ae03daef0..7aa09dcb71 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py @@ -18,6 +18,7 @@ do_barh_plot_with_ci, do_bar_plot_with_ci, plot_deaths_by_period_for_cause, + plot_deaths_by_period_for_draw, plot_hsi_counts_by_period_for_draw, plot_population_by_year, ) @@ -50,9 +51,6 @@ def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path target_period_label = target_period(TARGET_PERIOD) all_results = load_results_files(results_files) - results = all_results[results_files[1]] - - counts_of_hsi_in_implementation_period = all_results[results_files[1]]['counts_of_hsi_by_short_treatment_id'] counts_of_hsi_in_baseline = all_results[results_files[0]]['counts_of_hsi_by_period'] counts_of_hsi_in_baseline = counts_of_hsi_in_baseline.drop(['2010-2025'], level=1) @@ -121,6 +119,20 @@ def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path num_deaths_by_cause_label_baseline = all_results[results_files[0]]['num_deaths'].drop(['2010-2025'], level=1) num_deaths_by_cause_label_implementation = all_results[results_files[1]]['num_deaths'].drop(['2025-2041'], level=1) + for param in param_names: + draw = format_scenario_name(param) + fig, ax = plot_deaths_by_period_for_draw( + num_deaths_by_cause_label_implementation / 1e3, + draw, + _dfbaseline=num_deaths_by_cause_label_baseline / 1e3, + ) + name_of_plot = f"Deaths Over Time by Cause for {draw}" + ax.set_title(name_of_plot) + ax.set_ylabel("Number of deaths (/1000)") + outfile = os.path.join(output_folder, make_graph_file_name(name_of_plot)) + fig.savefig(outfile) + plt.close(fig) + cause_labels = num_deaths_by_cause_label_implementation.index.get_level_values("label").unique() for cause_label in cause_labels: fig, ax = plot_deaths_by_period_for_cause( diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/results_processing_utils.py b/src/scripts/lcoa_inputs_from_tlo_analyses/results_processing_utils.py index aeb28e938a..2fc505da11 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/results_processing_utils.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/results_processing_utils.py @@ -1,6 +1,5 @@ """Utilities for extracting and processing results for treatment-id analyses.""" -from typing import Tuple import numpy as np import pandas as pd @@ -11,9 +10,6 @@ from tlo import Date from tlo.analysis.utils import make_age_grp_types, summarize, to_age_group - -TARGET_PERIOD = (Date(2025, 1, 1), Date(2041, 1, 1)) - def find_difference_relative_to_comparison(_ser: pd.Series, comparison: str, scaled: bool = False, @@ -30,7 +26,7 @@ def find_difference_relative_to_comparison(_ser: pd.Series, def get_total_population_by_year( _df: pd.DataFrame, - target_period_tuple: tuple[Date, Date] = TARGET_PERIOD, + target_period_tuple: tuple[Date, Date], ) -> pd.Series: years_needed = [i.year for i in target_period_tuple] _df["year"] = pd.to_datetime(_df["date"]).dt.year @@ -43,14 +39,14 @@ def extract_deaths_total(df: pd.DataFrame) -> pd.Series: return pd.Series({"Total": len(df)}) -def target_period(target_period_tuple: tuple[Date, Date] = TARGET_PERIOD) -> str: +def target_period(target_period_tuple: tuple[Date, Date]) -> str: """Returns the target period as a string of the form YYYY-YYYY.""" return "-".join(str(t.year) for t in target_period_tuple) def get_periods_within_target_period( period_length_years: int, - target_period_tuple: tuple[Date, Date] = TARGET_PERIOD, + target_period_tuple: tuple[Date, Date], ) -> list[tuple[str, tuple[int, int]]]: """Return chunks within target period as [(label, (start_year, end_year)), ...].""" if period_length_years <= 0: @@ -63,7 +59,7 @@ def get_periods_within_target_period( return periods -def get_parameter_names_from_scenario_file() -> Tuple[str]: +def get_parameter_names_from_scenario_file() -> tuple[str]: """Get tuple of scenario names from Scenario class used to create results.""" e = EffectOfEachTreatment() excluded = {"Only Hiv_Test_Selftest_*"} @@ -147,12 +143,12 @@ def find_mean_difference_extra_relative_to_comparison_dataframe( ) -def get_num_deaths_by_cause_label(_df: pd.DataFrame, target_period_tuple: tuple[Date, Date] = TARGET_PERIOD) -> pd.Series: +def get_num_deaths_by_cause_label(_df: pd.DataFrame, target_period_tuple: tuple[Date, Date]) -> pd.Series: """Return total deaths by label within target period.""" return _df.loc[pd.to_datetime(_df.date).between(*target_period_tuple)].groupby(_df["label"]).size() -def get_num_dalys_by_cause_label(_df: pd.DataFrame, target_period_tuple: tuple[Date, Date] = TARGET_PERIOD) -> pd.Series: +def get_num_dalys_by_cause_label(_df: pd.DataFrame, target_period_tuple: tuple[Date, Date]) -> pd.Series: """Return total DALYS by label within target period.""" return ( _df.loc[_df.year.between(*[i.year for i in target_period_tuple])] @@ -163,7 +159,7 @@ def get_num_dalys_by_cause_label(_df: pd.DataFrame, target_period_tuple: tuple[D def make_get_num_deaths_by_cause_label_and_period( period_length_years: int, - target_period_tuple: tuple[Date, Date] = TARGET_PERIOD, + target_period_tuple: tuple[Date, Date] ): """Create helper that summarizes deaths by cause and period chunks + overall.""" periods = get_periods_within_target_period( @@ -194,7 +190,7 @@ def _get_num_deaths_by_cause_label_and_period(_df: pd.DataFrame) -> pd.Series: def make_get_num_dalys_by_cause_label_and_period( period_length_years: int, - target_period_tuple: tuple[Date, Date] = TARGET_PERIOD, + target_period_tuple: tuple[Date, Date] ): """Create helper that summarizes DALYS by cause and period chunks + overall.""" periods = get_periods_within_target_period( @@ -230,7 +226,7 @@ def _get_num_dalys_by_cause_label_and_period(_df: pd.DataFrame) -> pd.Series: def get_num_deaths_by_age_group( _df: pd.DataFrame, age_grp_lookup: dict, - target_period_tuple: tuple[Date, Date] = TARGET_PERIOD, + target_period_tuple: tuple[Date, Date], ): """Return total deaths by age-group in target period.""" return ( @@ -242,7 +238,7 @@ def get_num_deaths_by_age_group( def get_total_num_death_by_agegrp_and_label( _df: pd.DataFrame, - target_period_tuple: tuple[Date, Date] = TARGET_PERIOD, + target_period_tuple: tuple[Date, Date], ) -> pd.Series: """Return deaths in target period by age-group and cause label.""" _df_limited_to_dates = _df.loc[_df["date"].between(*target_period_tuple)] @@ -252,7 +248,7 @@ def get_total_num_death_by_agegrp_and_label( def get_total_num_dalys_by_agegrp_and_label( _df: pd.DataFrame, - target_period_tuple: tuple[Date, Date] = TARGET_PERIOD, + target_period_tuple: tuple[Date, Date], ) -> pd.Series: """Return DALYS in target period by age-group and cause label.""" return ( @@ -267,7 +263,7 @@ def get_total_num_dalys_by_agegrp_and_label( def get_counts_of_hsi_by_short_treatment_id( _df: pd.DataFrame, - target_period_tuple: tuple[Date, Date] = TARGET_PERIOD, + target_period_tuple: tuple[Date, Date], ) -> pd.Series: """Get counts of short treatment ids occurring in target period.""" mask = pd.to_datetime(_df["date"]).between(*target_period_tuple) @@ -275,7 +271,7 @@ def get_counts_of_hsi_by_short_treatment_id( return _counts_by_treatment_id -def get_counts_of_appts(_df: pd.DataFrame, target_period_tuple: tuple[Date, Date] = TARGET_PERIOD) -> pd.Series: +def get_counts_of_appts(_df: pd.DataFrame, target_period_tuple: tuple[Date, Date]) -> pd.Series: """Get counts of appointments of each type being used in target period.""" return ( _df.loc[pd.to_datetime(_df["date"]).between(*target_period_tuple), "Number_By_Appt_Type_Code"] @@ -287,7 +283,7 @@ def get_counts_of_appts(_df: pd.DataFrame, target_period_tuple: tuple[Date, Date def make_get_counts_of_appts_by_period( period_length_years: int, - target_period_tuple: tuple[Date, Date] = TARGET_PERIOD, + target_period_tuple: tuple[Date, Date], ): """Create helper that summarizes appointment counts by period chunks + overall.""" periods = get_periods_within_target_period( @@ -322,7 +318,7 @@ def _get_counts_of_appts_by_period(_df: pd.DataFrame) -> pd.Series: def make_get_counts_of_hsis_by_period( period_length_years: int, - target_period_tuple: tuple[Date, Date] = TARGET_PERIOD, + target_period_tuple: tuple[Date, Date], ): """Create helper that summarizes appointment counts by period chunks + overall.""" periods = get_periods_within_target_period( From dbf40a720453d54cbffbea50e49947c43923db12 Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Thu, 16 Apr 2026 14:31:17 +0100 Subject: [PATCH 38/55] Costing code + working with collated susped/resume outputs --- .../analysis_effect_of_treatment_ids.py | 56 +++++++-- .../fig_utils.py | 119 ++++-------------- .../figures_effect_of_treatment_ids.py | 44 ++----- 3 files changed, 87 insertions(+), 132 deletions(-) diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py index b7b7f5607a..425dc83b23 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py @@ -1,5 +1,11 @@ """Produce plots to show the impact each set of treatments.""" +import warnings +from time import perf_counter +from pandas.errors import ( + PerformanceWarning, + SettingWithCopyWarning +) import argparse from datetime import date import glob @@ -62,8 +68,8 @@ # python src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py outputs/s.bhatia@imperial.ac.uk/effect_of_each_treatment_id-combined --target-start=2010-01-01 --target-end=2041-01-01 PERIOD_LENGTH_YEARS_FOR_BAR_PLOTS = 1 -suspended_folder = Path("outputs/s.bhatia@imperial.ac.uk/effect_of_each_treatment_id-2026-02-12T120859Z") -results_folder = Path("outputs/s.bhatia@imperial.ac.uk/effect_of_each_treatment_id-2026-02-16T154500Z") +#suspended_folder = Path("outputs/s.bhatia@imperial.ac.uk/effect_of_each_treatment_id-2026-02-12T120859Z") +#results_folder = Path("outputs/s.bhatia@imperial.ac.uk/effect_of_each_treatment_id-2026-02-16T154500Z") # SCALING_FACTOR retrieved from the suspended run in # outputs/s.bhatia@imperial.ac.uk/effect_of_each_treatment_id-2026-02-12T120859Z # SCALING_FACTOR = 58.158436 @@ -102,6 +108,47 @@ def apply( target_period_tuple=target_period_tuple, ) results = {} + # Costs calculation + print("Calculating costs...") + discount_rate_cost = 0.03 + # Period relevant for costing + TARGET_PERIOD = (Date(2026, 1, 1), Date(2040, 12, 31)) # This is the period that is costed + relevant_period_for_costing = [i.year for i in TARGET_PERIOD] + list_of_relevant_years_for_costing = list(range(relevant_period_for_costing[0], relevant_period_for_costing[1] + 1)) + print("List of relevant years for costing:", list_of_relevant_years_for_costing) + start = perf_counter() + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=PerformanceWarning) + warnings.filterwarnings("ignore", category=UserWarning) + warnings.filterwarnings("ignore", category=SettingWithCopyWarning) + input_costs = estimate_input_cost_of_scenarios( + results_folder, + resourcefilepath, + _years=list_of_relevant_years_for_costing, + cost_only_used_staff=True, + _discount_rate=discount_rate_cost, + _metric="median",) + + elapsed = perf_counter() - start + print(f"\n=== TIMING: estimate_input_cost_of_scenarios took {elapsed:.3f}s ===\n", flush=True) + results['input_costs'] = input_costs + + # Computing ICERs + print("Computing ICERs...") + start = perf_counter() + total_input_cost = input_costs.groupby(['draw', 'run'])['cost'].sum() + incremental_scenario_cost = (pd.DataFrame( + find_difference_relative_to_comparison( + total_input_cost, + comparison=0,) + ).T.iloc[0].unstack()).T + + elapsed = perf_counter() - start + print(f"\n=== TIMING: computing icers took {elapsed:.3f}s ===\n", flush=True) + + incremental_scenario_cost_summarized = summarize_cost_data(incremental_scenario_cost, _metric='median') + results['incremental_scenario_cost'] = incremental_scenario_cost_summarized + # Get total population by year print("Extracting population data...") total_population_by_year = ( @@ -111,7 +158,6 @@ def apply( key='population', custom_generate_series=lambda _df: get_total_population_by_year(_df, target_period_tuple), do_scaling=True, - suspended_results_folder=suspended_folder, autodiscover=True ).pipe(set_param_names_as_column_index_level_0, param_names=param_names) ) @@ -126,7 +172,6 @@ def apply( key="HSI_Event", custom_generate_series=lambda _df: get_counts_of_hsi_by_short_treatment_id(_df, target_period_tuple), do_scaling=True, - suspended_results_folder=suspended_folder, autodiscover=True, ) .pipe(set_param_names_as_column_index_level_0, param_names=param_names) @@ -147,7 +192,6 @@ def apply( key="HSI_Event", custom_generate_series=lambda _df: get_num_hsi_by_period(_df), do_scaling=True, - suspended_results_folder=suspended_folder, autodiscover=True, ) .pipe(set_param_names_as_column_index_level_0, param_names=param_names) @@ -168,7 +212,6 @@ def apply( key="death", custom_generate_series=get_num_deaths_by_cause_label_and_period, do_scaling=True, - suspended_results_folder=suspended_folder, autodiscover=True, ).pipe(set_param_names_as_column_index_level_0, param_names=param_names) ) @@ -196,7 +239,6 @@ def apply( key="dalys_stacked_by_age_and_time", custom_generate_series=get_num_dalys_by_cause_label_and_period, do_scaling=True, - suspended_results_folder=suspended_folder, autodiscover=True, ).pipe(set_param_names_as_column_index_level_0, param_names=param_names) ) diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py b/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py index 1cc67a1337..541d8303b9 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py @@ -85,60 +85,26 @@ def plot_deaths_by_period_for_cause( _df: pd.DataFrame, cause_label: str, plot_stat: str = "central", - _dfbaseline: pd.DataFrame = None, ): """Plot deaths over time for a single cause, with one line per short treatment id.""" if not isinstance(_df.index, pd.MultiIndex) or _df.index.nlevels != 2: raise ValueError("_df index must be a 2-level MultiIndex with levels for label and period.") if not isinstance(_df.columns, pd.MultiIndex) or _df.columns.nlevels != 2: raise ValueError("_df columns must be a 2-level MultiIndex with levels for treatment id and stat.") - if _dfbaseline is None: - raise ValueError("_dfbaseline is required.") - if not isinstance(_dfbaseline.index, pd.MultiIndex) or _dfbaseline.index.nlevels != 2: - raise ValueError("_dfbaseline index must be a 2-level MultiIndex with levels for label and period.") - if not isinstance(_dfbaseline.columns, pd.MultiIndex) or _dfbaseline.columns.nlevels != 2: - raise ValueError("_dfbaseline columns must be a 2-level MultiIndex with levels for draw and stat.") label_level_name = "label" if "label" in _df.index.names else _df.index.names[0] period_level_name = "period" if "period" in _df.index.names else _df.index.names[1] stat_level_name = "stat" if "stat" in _df.columns.names else _df.columns.names[1] - baseline_label_level_name = "label" if "label" in _dfbaseline.index.names else _dfbaseline.index.names[0] - baseline_stat_level_name = "stat" if "stat" in _dfbaseline.columns.names else _dfbaseline.columns.names[1] - baseline_draw_level_name = "draw" if "draw" in _dfbaseline.columns.names else _dfbaseline.columns.names[0] available_causes = pd.Index(_df.index.get_level_values(label_level_name).unique()) if cause_label not in available_causes: raise ValueError(f"Cause label '{cause_label}' not found. Available causes: {available_causes.tolist()}") - available_baseline_causes = pd.Index(_dfbaseline.index.get_level_values(baseline_label_level_name).unique()) - if cause_label not in available_baseline_causes: - raise ValueError( - f"Cause label '{cause_label}' not found in _dfbaseline. " - f"Available causes: {available_baseline_causes.tolist()}" - ) available_stats = pd.Index(_df.columns.get_level_values(stat_level_name).unique()) if plot_stat not in available_stats: raise ValueError(f"Statistic '{plot_stat}' not found. Available stats: {available_stats.tolist()}") - available_baseline_stats = pd.Index(_dfbaseline.columns.get_level_values(baseline_stat_level_name).unique()) - if plot_stat not in available_baseline_stats: - raise ValueError( - f"Statistic '{plot_stat}' not found in _dfbaseline. " - f"Available stats: {available_baseline_stats.tolist()}" - ) - available_baseline_draws = pd.Index(_dfbaseline.columns.get_level_values(baseline_draw_level_name).unique()) - if "Nothing" not in available_baseline_draws: - raise ValueError( - f"Draw 'Nothing' not found in _dfbaseline. Available draws: {available_baseline_draws.tolist()}" - ) _plot = _df.xs(cause_label, level=label_level_name).xs(plot_stat, axis=1, level=stat_level_name) - _plot_baseline = ( - _dfbaseline["Nothing"] - .xs(cause_label, level=baseline_label_level_name) - .loc[:, [plot_stat]] - .rename(columns={plot_stat: "Nothing"}) - ) - _plot = pd.concat([_plot_baseline, _plot]) if _plot.empty: raise ValueError(f"No plottable data remain for cause '{cause_label}' using stat '{plot_stat}'.") @@ -191,26 +157,17 @@ def plot_deaths_by_period_for_draw( _df: pd.DataFrame, draw: str, plot_stat: str = "central", - _dfbaseline: pd.DataFrame = None, ): """Plot deaths over time for a single draw, with one line per cause label.""" if not isinstance(_df.index, pd.MultiIndex) or _df.index.nlevels != 2: raise ValueError("_df index must be a 2-level MultiIndex with levels for label and period.") if not isinstance(_df.columns, pd.MultiIndex) or _df.columns.nlevels != 2: raise ValueError("_df columns must be a 2-level MultiIndex with levels for draw and stat.") - if _dfbaseline is None: - raise ValueError("_dfbaseline is required.") - if not isinstance(_dfbaseline.index, pd.MultiIndex) or _dfbaseline.index.nlevels != 2: - raise ValueError("_dfbaseline index must be a 2-level MultiIndex with levels for label and period.") - if not isinstance(_dfbaseline.columns, pd.MultiIndex) or _dfbaseline.columns.nlevels != 2: - raise ValueError("_dfbaseline columns must be a 2-level MultiIndex with levels for draw and stat.") label_level_name = "label" if "label" in _df.index.names else _df.index.names[0] period_level_name = "period" if "period" in _df.index.names else _df.index.names[1] draw_level_name = "draw" if "draw" in _df.columns.names else _df.columns.names[0] stat_level_name = "stat" if "stat" in _df.columns.names else _df.columns.names[1] - baseline_stat_level_name = "stat" if "stat" in _dfbaseline.columns.names else _dfbaseline.columns.names[1] - baseline_draw_level_name = "draw" if "draw" in _dfbaseline.columns.names else _dfbaseline.columns.names[0] available_draws = pd.Index(_df.columns.get_level_values(draw_level_name).unique()) if draw not in available_draws: @@ -218,36 +175,36 @@ def plot_deaths_by_period_for_draw( available_stats = pd.Index(_df.columns.get_level_values(stat_level_name).unique()) if plot_stat not in available_stats: raise ValueError(f"Statistic '{plot_stat}' not found. Available stats: {available_stats.tolist()}") - available_baseline_stats = pd.Index(_dfbaseline.columns.get_level_values(baseline_stat_level_name).unique()) - if plot_stat not in available_baseline_stats: - raise ValueError( - f"Statistic '{plot_stat}' not found in _dfbaseline. " - f"Available stats: {available_baseline_stats.tolist()}" - ) - available_baseline_draws = pd.Index(_dfbaseline.columns.get_level_values(baseline_draw_level_name).unique()) - if "Nothing" not in available_baseline_draws: - raise ValueError( - f"Draw 'Nothing' not found in _dfbaseline. Available draws: {available_baseline_draws.tolist()}" - ) - _plot_baseline = _dfbaseline["Nothing"].loc[:, [plot_stat]] - _plot_implementation = _df[draw].loc[:, [plot_stat]] - _plot = pd.concat([_plot_baseline, _plot_implementation]) + _plot = _df[draw].loc[:, [plot_stat]] if _plot.empty: raise ValueError(f"No plottable data remain for draw '{draw}' using stat '{plot_stat}'.") - periods = _plot.index.get_level_values(1).unique() - ordered_period_labels, display_period_labels = _get_sorted_period_labels_and_display_labels(periods) + _plot = _plot[plot_stat].unstack(label_level_name) + ordered_causes = [ + cause_label for cause_label in CAUSE_OF_DEATH_OR_DALY_LABEL_TO_COLOR_MAP.keys() + if cause_label in _plot.columns + ] + unordered_causes = sorted( + cause_label for cause_label in _plot.columns if cause_label not in CAUSE_OF_DEATH_OR_DALY_LABEL_TO_COLOR_MAP + ) + _plot = _plot.loc[:, ordered_causes + unordered_causes] - fig_width = max(10, min(1.4 * len(_plot.index) + 4, 18)) + ordered_period_labels, display_period_labels = _get_sorted_period_labels_and_display_labels(_plot.index.tolist()) + _plot = _plot.reindex(ordered_period_labels) + if _plot.empty: + raise ValueError(f"No plottable data remain for draw '{draw}' after reshaping by cause.") + + fig_width = max(10, min(1.4 * len(ordered_period_labels) + 4, 18)) fig, ax = plt.subplots(figsize=(fig_width, 6)) - x = np.arange(len(periods)) - for cause_label in CAUSE_OF_DEATH_OR_DALY_LABEL_TO_COLOR_MAP.keys(): - print(f"************ {cause_label} *************") + for cause_label in _plot.columns: + cause_values = _plot[cause_label] + if cause_values.notna().sum() == 0: + continue ax.plot( - x, - _plot.xs(cause_label, level='label').to_numpy(), + ordered_period_labels, + cause_values.to_numpy(), marker="o", linewidth=1.8, markersize=4, @@ -255,7 +212,7 @@ def plot_deaths_by_period_for_draw( label=str(cause_label), ) - ax.set_xticks(x) + ax.set_xticks(ordered_period_labels) ax.set_xticklabels(display_period_labels, rotation=45, ha="right") ax.set_xlabel("Period") ax.set_ylabel("Number of deaths") @@ -476,7 +433,6 @@ def plot_hsi_counts_stacked_bar(_df: pd.DataFrame, plot_stat: str = "central"): def plot_hsi_counts_by_period_for_draw( _df: pd.DataFrame, draw: str, - _dfbaseline: pd.DataFrame ): """Plot central values with lower/upper intervals across period chunks for one draw.""" if not isinstance(_df.index, pd.MultiIndex) or _df.index.nlevels != 2: @@ -487,15 +443,7 @@ def plot_hsi_counts_by_period_for_draw( available_draws = sorted(set(_df.columns.get_level_values(0))) raise ValueError(f"Draw '{draw}' not found. Available draws: {available_draws}") - - # Because the baseline includes all treatment ids, we have a large number of HSIs being delivered; - # We are only interested in the HSIs indicated by the draw name i,e. for the draw Hiv_Treament, we - # only want to compare the number of Hiv_Treament HSIs until 2025 and during the implementation period - _dfbaseline = _dfbaseline['Nothing'] # because baseline was run only for Nothing scenario - treatment_id_of_interest = draw.replace("_*", "") - _dfbaseline = _dfbaseline[_dfbaseline.index.get_level_values(0) == treatment_id_of_interest] - - _df = pd.concat([_df[draw], _dfbaseline]) + _df = _df[draw] _plot = _df.reindex( pd.MultiIndex.from_product( [ @@ -569,8 +517,8 @@ def plot_hsi_counts_by_period_for_draw( return fig, ax -def plot_population_by_year(_df: pd.DataFrame, _dfbaseline: pd.DataFrame | None = None): - """Plot yearly central population values for all draws, optionally with baseline.""" +def plot_population_by_year(_df: pd.DataFrame): + """Plot yearly central population values for all draws.""" if not isinstance(_df.columns, pd.MultiIndex) or _df.columns.nlevels != 2: raise ValueError("_df columns must be a 2-level MultiIndex with levels for draw and stat.") @@ -582,20 +530,7 @@ def plot_population_by_year(_df: pd.DataFrame, _dfbaseline: pd.DataFrame | None implementation_central = _df.xs("central", axis=1, level=stat_level_name).copy() implementation_central.columns = implementation_central.columns.to_series().str.replace(r"_\*$", "", regex=True) - - if _dfbaseline is None: - _plot = implementation_central - else: - if not isinstance(_dfbaseline.columns, pd.MultiIndex) or _dfbaseline.columns.nlevels != 2: - raise ValueError("_dfbaseline columns must be a 2-level MultiIndex with levels for draw and stat.") - baseline_draw_level_name = "draw" if "draw" in _dfbaseline.columns.names else _dfbaseline.columns.names[0] - baseline_draws = pd.Index(_dfbaseline.columns.get_level_values(baseline_draw_level_name).unique()) - if "Nothing" not in baseline_draws: - raise ValueError(f"Baseline draw 'Nothing' not found. Available baseline draws: {baseline_draws.tolist()}") - - baseline_central = _dfbaseline["Nothing"].loc[:, ["central"]].copy() - baseline_central.columns = pd.Index(["Nothing"]) - _plot = pd.concat([baseline_central, implementation_central], axis=1) + _plot = implementation_central _plot = _plot.loc[:, ~_plot.columns.duplicated()] _plot = _plot.sort_index() diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py index 7aa09dcb71..44fa7f8eb0 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py @@ -52,23 +52,8 @@ def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path all_results = load_results_files(results_files) - counts_of_hsi_in_baseline = all_results[results_files[0]]['counts_of_hsi_by_period'] - counts_of_hsi_in_baseline = counts_of_hsi_in_baseline.drop(['2010-2025'], level=1) - - counts_of_hsi_in_implementation_period = all_results[results_files[1]]['counts_of_hsi_by_period'] - counts_of_hsi_in_implementation_period = counts_of_hsi_in_implementation_period.drop(['2025-2041'], level=1) - # Values for the year 2025 have been logged in the implementation period; - # remove them from here and add them to the baseline dataframe. - x = counts_of_hsi_in_implementation_period['Nothing'] - nothing_hsis_in_2025 = x.xs('2025-2025', level = 'period') - nothing_hsis_in_2025 = pd.concat({"2025-2025": nothing_hsis_in_2025}, names=["period"]).reorder_levels(["appt_type", "period"]) - nothing_hsis_in_2025.columns = pd.MultiIndex.from_tuples( - [("Nothing", col) for col in nothing_hsis_in_2025.columns], - names=["draw", "stat"] - ) - counts_of_hsi_in_baseline = pd.concat([counts_of_hsi_in_baseline, nothing_hsis_in_2025], axis=0).sort_index() - # now we can safely drop 2025-2025 from the implementation period dataframe - counts_of_hsi_in_implementation_period = counts_of_hsi_in_implementation_period.drop('2025-2025', level=1) + counts_of_hsi_in_implementation_period = all_results[results_files[0]]['counts_of_hsi_by_period'] + counts_of_hsi_in_implementation_period = counts_of_hsi_in_implementation_period.drop(['2010-2041'], level=1) result_df_by_period = pd.DataFrame([ {'treatment_id_included': draw, 'nonzero_hsis': treatment_id, 'period': period} @@ -91,7 +76,6 @@ def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path fig, ax = plot_hsi_counts_by_period_for_draw( counts_of_hsi_in_implementation_period, draw, - counts_of_hsi_in_baseline, ) ax.set_title(name_of_plot) outfile = os.path.join(output_folder, make_graph_file_name(name_of_plot)) @@ -99,9 +83,8 @@ def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path plt.close(fig) # Plot population growth - total_population_in_baseline = all_results[results_files[0]]['total_population_by_year'] - total_population_in_implementation = all_results[results_files[1]]['total_population_by_year'] - fig, ax = plot_population_by_year(total_population_in_implementation / 1e6, total_population_in_baseline / 1e6) + total_population_in_implementation = all_results[results_files[0]]['total_population_by_year'] + fig, ax = plot_population_by_year(total_population_in_implementation / 1e6) name_of_plot = "Population size by year" ax.set_title(name_of_plot) ax.set_ylabel("Population size (millions)") @@ -111,20 +94,17 @@ def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path # Plot number of deaths and DALYS by cause for each parameter, with confidence intervals, for the target period - pc_dalys_averted = all_results[results_files[1]]['pc_dalys_averted'] + pc_dalys_averted = all_results[results_files[0]]['pc_dalys_averted'] - num_dalys_by_cause_label_baseline = all_results[results_files[0]]['num_dalys'].drop(['2010-2025'], level=1) - num_dalys_by_cause_label_implementation = all_results[results_files[1]]['num_dalys'].drop(['2025-2041'], level=1) + num_dalys_by_cause_label_implementation = all_results[results_files[0]]['num_dalys'].drop(['2010-2041'], level=1) - num_deaths_by_cause_label_baseline = all_results[results_files[0]]['num_deaths'].drop(['2010-2025'], level=1) - num_deaths_by_cause_label_implementation = all_results[results_files[1]]['num_deaths'].drop(['2025-2041'], level=1) + num_deaths_by_cause_label_implementation = all_results[results_files[0]]['num_deaths'].drop(['2010-2041'], level=1) for param in param_names: draw = format_scenario_name(param) fig, ax = plot_deaths_by_period_for_draw( num_deaths_by_cause_label_implementation / 1e3, draw, - _dfbaseline=num_deaths_by_cause_label_baseline / 1e3, ) name_of_plot = f"Deaths Over Time by Cause for {draw}" ax.set_title(name_of_plot) @@ -138,7 +118,6 @@ def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path fig, ax = plot_deaths_by_period_for_cause( num_deaths_by_cause_label_implementation / 1e3, cause_label=cause_label, - _dfbaseline=num_deaths_by_cause_label_baseline / 1e3, ) name_of_plot = f"Deaths Over Time for {cause_label}" ax.set_title(name_of_plot) @@ -150,7 +129,6 @@ def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path fig, ax = plot_deaths_by_period_for_cause( num_dalys_by_cause_label_implementation / 1e3, cause_label=cause_label, - _dfbaseline=num_dalys_by_cause_label_baseline / 1e3, ) name_of_plot = f"DALYs Over Time for {cause_label}" ax.set_title(name_of_plot) @@ -159,7 +137,7 @@ def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path fig.savefig(outfile) plt.close(fig) - deaths_averted = all_results[results_files[1]]['num_deaths_averted'] + deaths_averted = all_results[results_files[0]]['num_deaths_averted'] deaths_averted_sorted = (deaths_averted.sort_values(by="mean", ascending=True) / 1e3) fig_height = max(6, min(0.28 * len(deaths_averted_sorted.index) + 4, 18)) fig, ax = plt.subplots(figsize=(10, fig_height)) @@ -175,7 +153,7 @@ def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path fig.savefig(outfile) plt.close(fig) - dalys_averted = all_results[results_files[1]]['num_dalys_averted'] + dalys_averted = all_results[results_files[0]]['num_dalys_averted'] dalys_averted_sorted = (dalys_averted.sort_values(by="mean", ascending=True) / 1e3) fig_height = max(6, min(0.28 * len(dalys_averted_sorted.index) + 4, 18)) fig, ax = plt.subplots(figsize=(10, fig_height)) @@ -191,7 +169,7 @@ def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path fig.savefig(outfile) plt.close(fig) - pc_deaths_averted = all_results[results_files[1]]['pc_deaths_averted'] + pc_deaths_averted = all_results[results_files[0]]['pc_deaths_averted'] pc_deaths_averted_sorted = (pc_deaths_averted.sort_values(by="mean", ascending=True)) fig_height = max(6, min(0.28 * len(pc_deaths_averted_sorted.index) + 4, 18)) fig, ax = plt.subplots(figsize=(10, fig_height)) @@ -207,7 +185,7 @@ def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path fig.savefig(outfile) plt.close(fig) - pc_dalys_averted = all_results[results_files[1]]['pc_dalys_averted'] + pc_dalys_averted = all_results[results_files[0]]['pc_dalys_averted'] pc_dalys_averted_sorted = (pc_dalys_averted.sort_values(by="mean", ascending=True)) fig_height = max(6, min(0.28 * len(pc_dalys_averted_sorted.index) + 4, 18)) fig, ax = plt.subplots(figsize=(10, fig_height)) From 9076f2c4819312b16874f3642524095c770b04f2 Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Thu, 23 Apr 2026 16:17:30 +0100 Subject: [PATCH 39/55] Compute ICERs run by run --- .../analysis_effect_of_treatment_ids.py | 170 +++++++++++------ .../fig_utils.py | 95 ++++++++-- .../figures_effect_of_treatment_ids.py | 174 ++++++++++-------- 3 files changed, 294 insertions(+), 145 deletions(-) diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py index 425dc83b23..e0beb1d6a0 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py @@ -66,13 +66,10 @@ # python src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py outputs/s.bhatia@imperial.ac.uk/effect_of_each_treatment_id-2026-02-12T120859Z figs/ --target-start=2010-01-01 --target-end=2025-12-31 # python src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py outputs/s.bhatia@imperial.ac.uk/effect_of_each_treatment_id-2026-02-16T154500Z figs/ --target-start=2025-01-01 --target-end=2041-01-01 # python src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py outputs/s.bhatia@imperial.ac.uk/effect_of_each_treatment_id-combined --target-start=2010-01-01 --target-end=2041-01-01 - +# python src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py outputs/s.bhatia@imperial.ac.uk/effect_of_each_treatment_id-2026-04-01T130709Z --target-start=2010-01-01 --target-end=2041-01-01 --do-comparison=False +# python src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py outputs/s.bhatia@imperial.ac.uk/effect_of_each_treatment_id-combined outputs/generated_outputs --target-start=2010-01-01 --target-end=2041-01-01 --cost-checkpoint-profile=baseline --load-input-costs-from-checkpoint=True PERIOD_LENGTH_YEARS_FOR_BAR_PLOTS = 1 -#suspended_folder = Path("outputs/s.bhatia@imperial.ac.uk/effect_of_each_treatment_id-2026-02-12T120859Z") -#results_folder = Path("outputs/s.bhatia@imperial.ac.uk/effect_of_each_treatment_id-2026-02-16T154500Z") -# SCALING_FACTOR retrieved from the suspended run in -# outputs/s.bhatia@imperial.ac.uk/effect_of_each_treatment_id-2026-02-12T120859Z -# SCALING_FACTOR = 58.158436 + EXCLUDED_HSIs = [ "FirstAttendance_Emergency", "FirstAttendance_NonEmergency", @@ -85,11 +82,25 @@ def parse_iso_date(value: str) -> Date: return Date(parsed.year, parsed.month, parsed.day) +def parse_bool(value: str) -> bool: + normalized = value.strip().lower() + if normalized in {"true", "t", "1", "yes", "y"}: + return True + if normalized in {"false", "f", "0", "no", "n"}: + return False + raise argparse.ArgumentTypeError( + f"Invalid boolean value '{value}'. Use True or False." + ) + + def apply( results_folder: Path, output_folder: Path, resourcefilepath: Path, - target_period_tuple: tuple[Date, Date] + target_period_tuple: tuple[Date, Date], + do_comparison: bool = True, + cost_checkpoint_profile: str | None = None, + load_input_costs_from_checkpoint: bool | None = None, ): """Produce standard plots describing effect of each TREATMENT_ID.""" _, age_grp_lookup = make_age_grp_lookup() @@ -116,38 +127,65 @@ def apply( relevant_period_for_costing = [i.year for i in TARGET_PERIOD] list_of_relevant_years_for_costing = list(range(relevant_period_for_costing[0], relevant_period_for_costing[1] + 1)) print("List of relevant years for costing:", list_of_relevant_years_for_costing) - start = perf_counter() - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=PerformanceWarning) - warnings.filterwarnings("ignore", category=UserWarning) - warnings.filterwarnings("ignore", category=SettingWithCopyWarning) - input_costs = estimate_input_cost_of_scenarios( - results_folder, - resourcefilepath, - _years=list_of_relevant_years_for_costing, - cost_only_used_staff=True, - _discount_rate=discount_rate_cost, - _metric="median",) - - elapsed = perf_counter() - start - print(f"\n=== TIMING: estimate_input_cost_of_scenarios took {elapsed:.3f}s ===\n", flush=True) + checkpoint_path = None + if cost_checkpoint_profile is not None: + checkpoint_path = output_folder / "checkpoints" / f"input_costs_{cost_checkpoint_profile}.pkl" + + if checkpoint_path is not None and load_input_costs_from_checkpoint is True: + print(f"Loading input costs from checkpoint: {checkpoint_path}") + if not checkpoint_path.exists(): + raise FileNotFoundError( + f"Input-cost checkpoint not found at {checkpoint_path}. " + "Run once with --cost-checkpoint-profile and without " + "--load-input-costs-from-checkpoint to create it." + ) + with open(checkpoint_path, "rb") as f: + input_costs = pickle.load(f) + else: + if checkpoint_path is None: + print("No cost checkpoint profile provided. Recomputing input costs.") + else: + print(f"Recomputing input costs") + start = perf_counter() + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=PerformanceWarning) + warnings.filterwarnings("ignore", category=UserWarning) + warnings.filterwarnings("ignore", category=SettingWithCopyWarning) + input_costs = estimate_input_cost_of_scenarios( + results_folder, + resourcefilepath, + _years=list_of_relevant_years_for_costing, + cost_only_used_staff=True, + _discount_rate=discount_rate_cost, + _metric="median",) + + elapsed = perf_counter() - start + print(f"\n=== TIMING: estimate_input_cost_of_scenarios took {elapsed:.3f}s ===\n", flush=True) + if checkpoint_path is not None: + checkpoint_path.parent.mkdir(parents=True, exist_ok=True) + with open(checkpoint_path, "wb") as f: + pickle.dump(input_costs, f) + print(f"Saved input costs checkpoint to: {checkpoint_path}") results['input_costs'] = input_costs - # Computing ICERs - print("Computing ICERs...") - start = perf_counter() - total_input_cost = input_costs.groupby(['draw', 'run'])['cost'].sum() - incremental_scenario_cost = (pd.DataFrame( - find_difference_relative_to_comparison( - total_input_cost, - comparison=0,) - ).T.iloc[0].unstack()).T + # Computing incremental costs + if do_comparison: + print("Computing incremental_scenario_cost...") + start = perf_counter() + total_input_cost = input_costs.groupby(['draw', 'run'])['cost'].sum() + incremental_scenario_cost = (pd.DataFrame( + find_difference_relative_to_comparison( + total_input_cost, + comparison=0,) + ).T.iloc[0].unstack()).T + + elapsed = perf_counter() - start + print(f"\n=== TIMING: computing incremental_scenario_cost took {elapsed:.3f}s ===\n", flush=True) + + incremental_scenario_cost_summarized = summarize_cost_data(incremental_scenario_cost, _metric='median') + incremental_scenario_cost_summarized = incremental_scenario_cost_summarized.rename(columns = {'median':'central'}) - elapsed = perf_counter() - start - print(f"\n=== TIMING: computing icers took {elapsed:.3f}s ===\n", flush=True) - incremental_scenario_cost_summarized = summarize_cost_data(incremental_scenario_cost, _metric='median') - results['incremental_scenario_cost'] = incremental_scenario_cost_summarized # Get total population by year print("Extracting population data...") @@ -216,15 +254,21 @@ def apply( ).pipe(set_param_names_as_column_index_level_0, param_names=param_names) ) - num_deaths_averted = summarize( - pd.DataFrame( - find_difference_extra_relative_to_comparison(num_deaths.sum(), comparison='Nothing')).T - ).iloc[0].unstack() - - pc_deaths_averted = 100.0 * summarize( - pd.DataFrame( - find_difference_extra_relative_to_comparison(num_deaths.sum(), comparison='Nothing', scaled=True)).T - ).iloc[0].unstack() + if do_comparison: + num_deaths_averted = compute_summary_statistics( + -1.0 * pd.DataFrame( + find_difference_extra_relative_to_comparison(num_deaths.sum(), comparison='Nothing')).T, + central_measure='median' + ).iloc[0].unstack() + + pc_deaths_averted = 100.0 * compute_summary_statistics( + -1.0 * pd.DataFrame( + find_difference_extra_relative_to_comparison(num_deaths.sum(), comparison='Nothing', scaled=True)).T, + central_measure='median' + ).iloc[0].unstack() + else: + num_deaths_averted = None + pc_deaths_averted = None num_deaths = compute_summary_statistics(num_deaths, central_measure='median') @@ -243,21 +287,29 @@ def apply( ).pipe(set_param_names_as_column_index_level_0, param_names=param_names) ) - num_dalys_averted = summarize( - pd.DataFrame( - find_difference_extra_relative_to_comparison(num_dalys.sum(), comparison='Nothing')).T - ).iloc[0].unstack() + if do_comparison: + num_dalys_averted = ( + -1.0 * pd.DataFrame( + find_difference_extra_relative_to_comparison(num_dalys.sum(), comparison='Nothing')) - pc_dalys_averted = 100.0 * summarize( - pd.DataFrame( - find_difference_extra_relative_to_comparison(num_dalys.sum(), comparison='Nothing', scaled=True)).T - ).iloc[0].unstack() + ).T.iloc[0].unstack() + + pc_dalys_averted = 100.0 * compute_summary_statistics( + -1.0 * pd.DataFrame( + find_difference_extra_relative_to_comparison(num_dalys.sum(), comparison='Nothing', scaled=True)).T, + central_measure='median' + ).iloc[0].unstack() + incremental_scenario_cost_summarized.index = num_dalys_averted.index + icers_summarized = (incremental_scenario_cost_summarized /num_dalys_averted) + num_dalys_averted = compute_summary_statistics(num_dalys_averted.T, central_measure='median').iloc[0].unstack() num_dalys = compute_summary_statistics(num_dalys, central_measure='median') results['num_dalys'] = num_dalys - results['num_dalys_averted'] = num_dalys_averted - results['pc_dalys_averted'] = pc_dalys_averted + results['num_dalys_averted'] = num_dalys_averted if do_comparison else None + results['pc_dalys_averted'] = pc_dalys_averted if do_comparison else None + results['icers_summarized'] = icers_summarized if do_comparison else None + results['incremental_scenario_cost'] = incremental_scenario_cost_summarized if do_comparison else None return results @@ -268,6 +320,9 @@ def apply( parser.add_argument("output_folder", type=Path, nargs="?", default=None) parser.add_argument("--target-start", type=str, default=None) parser.add_argument("--target-end", type=str, default=None) + parser.add_argument("--do-comparison", type=parse_bool, default=True) + parser.add_argument("--cost-checkpoint-profile", type=str, default=None) + parser.add_argument("--load-input-costs-from-checkpoint", type=parse_bool, default=None) args = parser.parse_args() if (args.target_start is None) != (args.target_end is None): @@ -279,6 +334,10 @@ def apply( ) if not target_period_tuple[0] < target_period_tuple[1]: parser.error("--target-start must be earlier than --target-end.") + if args.load_input_costs_from_checkpoint is not None and args.cost_checkpoint_profile is None: + parser.error( + "Provide --cost-checkpoint-profile when using --load-input-costs-from-checkpoint." + ) out = args.output_folder if args.output_folder is not None else args.results_folder results = apply( @@ -286,6 +345,9 @@ def apply( output_folder=out, resourcefilepath=Path("./resources"), target_period_tuple=target_period_tuple, + do_comparison=args.do_comparison, + cost_checkpoint_profile=args.cost_checkpoint_profile, + load_input_costs_from_checkpoint=args.load_input_costs_from_checkpoint, ) outfile = ( f"{target_period_tuple[1].year:04d}-{target_period_tuple[1].month:02d}-{target_period_tuple[1].day:02d}" diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py b/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py index 541d8303b9..fe7638e9ae 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py @@ -81,6 +81,55 @@ def _get_sorted_period_labels_and_display_labels(period_labels: list[str]) -> tu return ordered_period_labels, display_labels +def _compute_sanitized_asymmetric_errors( + _df: pd.DataFrame, + central_col: str = "central", + lower_col: str = "lower", + upper_col: str = "upper", +) -> tuple[np.ndarray, list]: + """Return non-negative asymmetric errors and labels whose CI bounds were auto-corrected.""" + required_columns = {central_col, lower_col, upper_col} + missing_columns = required_columns.difference(set(_df.columns)) + if missing_columns: + raise ValueError(f"Missing required CI column(s): {sorted(missing_columns)}") + + ci = _df.loc[:, [central_col, lower_col, upper_col]].copy() + ci.columns = ["central", "lower", "upper"] + + swapped_bounds = ci["lower"] > ci["upper"] + if swapped_bounds.any(): + swapped = ci.loc[swapped_bounds, ["lower", "upper"]].copy() + ci.loc[swapped_bounds, "lower"] = swapped["upper"].to_numpy() + ci.loc[swapped_bounds, "upper"] = swapped["lower"].to_numpy() + + central_below_lower = ci["central"] < ci["lower"] + central_above_upper = ci["central"] > ci["upper"] + + lower_error = ci["central"] - ci["lower"] + upper_error = ci["upper"] - ci["central"] + lower_error = lower_error.where(~central_below_lower, 0.0).clip(lower=0.0) + upper_error = upper_error.where(~central_above_upper, 0.0).clip(lower=0.0) + + corrected_rows = swapped_bounds | central_below_lower | central_above_upper + errors = np.vstack([lower_error.to_numpy(dtype=float), upper_error.to_numpy(dtype=float)]) + return errors, list(ci.index[corrected_rows]) + + +def _warn_if_ci_corrected(plot_function_name: str, corrected_labels: list, max_examples: int = 5) -> None: + """Emit one warning with sample labels when CI bounds required correction.""" + unique_labels = list(dict.fromkeys(corrected_labels)) + if not unique_labels: + return + + sample = ", ".join(str(label) for label in unique_labels[:max_examples]) + sample_suffix = "..." if len(unique_labels) > max_examples else "" + warnings.warn( + f"{plot_function_name}: auto-corrected inconsistent CI values for {len(unique_labels)} row(s). " + f"Sample labels: {sample}{sample_suffix}", + stacklevel=2, + ) + + def plot_deaths_by_period_for_cause( _df: pd.DataFrame, cause_label: str, @@ -256,22 +305,32 @@ def do_bar_plot_with_ci( cause_labels = list(_df_nothing.index.get_level_values("label").unique()) + corrected_labels = [] for i, cause_label in enumerate(cause_labels): color = get_color_cause_of_death_or_daly_label(cause_label) one_cause = _df_nothing.xs(cause_label, level="label") bottom = 0.0 for j, period_label in enumerate(period_labels_for_bar_plots): - chunk_height = one_cause.loc[period_label, "mean"] if period_label in one_cause.index else 0.0 + chunk_height = one_cause.loc[period_label, "central"] if period_label in one_cause.index else 0.0 _ax.bar(i, chunk_height, bottom=bottom, color=color, alpha=0.9 if j % 2 == 0 else 0.35) bottom += chunk_height - mean_value = one_cause.loc[target_period_label, "mean"] - lower_value = one_cause.loc[target_period_label, "lower"] - upper_value = one_cause.loc[target_period_label, "upper"] - overall_yerr = np.array([[mean_value - lower_value], [upper_value - mean_value]]) + mean_value = one_cause.loc[target_period_label, "central"] + ci_row = pd.DataFrame( + { + "central": [mean_value], + "lower": [one_cause.loc[target_period_label, "lower"]], + "upper": [one_cause.loc[target_period_label, "upper"]], + }, + index=pd.Index([cause_label], name="label"), + ) + overall_yerr, corrected_row_labels = _compute_sanitized_asymmetric_errors(ci_row) + corrected_labels.extend(corrected_row_labels) _ax.errorbar(i, mean_value, yerr=overall_yerr, fmt="none", ecolor="black", capsize=2, linewidth=1.2) + _warn_if_ci_corrected("do_bar_plot_with_ci", corrected_labels) + _ax.set_xticks(range(len(cause_labels))) _ax.set_xticklabels(cause_labels, rotation=90) chunk_legend_handles = [ @@ -286,7 +345,7 @@ def plot_multiindex_dot_with_interval( _df: pd.DataFrame, year: int, _ax, - central_measure: str = "mean", + central_measure: str = "central", value_col: str = "population", sort: bool = True, x_label_rotation: int = 90, @@ -350,14 +409,15 @@ def plot_multiindex_dot_with_interval( def do_barh_plot_with_ci(_df: pd.DataFrame, _ax): """Make horizontal bar plot for each treatment id.""" - errors = pd.concat([_df["mean"] - _df["lower"], _df["upper"] - _df["mean"]], axis=1).T.to_numpy() + errors, corrected_labels = _compute_sanitized_asymmetric_errors(_df) _df.plot.barh( ax=_ax, - y="mean", + y="central", xerr=errors, legend=False, color=[_get_short_treatment_id_and_color(_id)[1] for _id in _df.index], ) + _warn_if_ci_corrected("do_barh_plot_with_ci", corrected_labels) def do_label_barh_plot(_df: pd.DataFrame, _ax): @@ -366,8 +426,8 @@ def do_label_barh_plot(_df: pd.DataFrame, _ax): pos_on_rhs = _ax.get_xticks()[-1] for label, row in _df.iterrows(): - if row["mean"] > 0: - annotation = f"{round(row['mean'], 1)} ({round(row['lower'])}-{round(row['upper'])}) %" + if row["central"] > 0: + annotation = f"{round(row['central'], 1)} ({round(row['lower'])}-{round(row['upper'])}) %" _ax.annotate( annotation, xy=(pos_on_rhs, y_cords.get(label)), @@ -477,11 +537,18 @@ def plot_hsi_counts_by_period_for_draw( fig_height = max(6, min(0.28 * len(central.index) + 6, 18)) fig, ax = plt.subplots(figsize=(fig_width, fig_height)) + corrected_labels = [] for treatment_id in central.index: central_values = central.loc[treatment_id].to_numpy() - lower_values = lower.loc[treatment_id].to_numpy() - upper_values = upper.loc[treatment_id].to_numpy() - yerr = np.vstack([central_values - lower_values, upper_values - central_values]) + ci_rows = pd.DataFrame( + { + "central": central.loc[treatment_id], + "lower": lower.loc[treatment_id], + "upper": upper.loc[treatment_id], + } + ) + yerr, corrected_periods = _compute_sanitized_asymmetric_errors(ci_rows) + corrected_labels.extend([f"{treatment_id}:{period}" for period in corrected_periods]) _, color = _get_short_treatment_id_and_color(treatment_id) ax.errorbar( x, @@ -496,6 +563,8 @@ def plot_hsi_counts_by_period_for_draw( label=str(treatment_id), ) + _warn_if_ci_corrected("plot_hsi_counts_by_period_for_draw", corrected_labels) + ax.set_xticks(x) ax.set_xticklabels(display_period_labels, rotation=45, ha="right") ax.set_xlabel("period") diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py index 44fa7f8eb0..87275f8a2e 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py @@ -24,6 +24,8 @@ ) from tlo import Date +# python src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py outputs/generated_outputs/2041-01-01_fullresults.pkl --output_folder=figs2 + TARGET_PERIOD = (Date(2025, 1, 1), Date(2041, 1, 1)) PERIOD_LENGTH_YEARS_FOR_BAR_PLOTS = 1 @@ -35,24 +37,32 @@ def load_results_files(results_files: list[Path]) -> dict[Path, dict]: loaded[results_file] = pickle.load(f) return loaded + def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path = None): """Produce standard plots describing effect of each TREATMENT_ID.""" param_names = get_parameter_names_from_scenario_file() - period_labels_for_bar_plots = [ - label - for label, _ in get_periods_within_target_period( - period_length_years=PERIOD_LENGTH_YEARS_FOR_BAR_PLOTS, - target_period_tuple=TARGET_PERIOD, - ) - ] - - target_period_label = target_period(TARGET_PERIOD) - all_results = load_results_files(results_files) + primary_results = all_results[results_files[0]] + + num_deaths_averted = primary_results.get('num_deaths_averted') + pc_deaths_averted = primary_results.get('pc_deaths_averted') + num_dalys_averted = primary_results.get('num_dalys_averted') + pc_dalys_averted = primary_results.get('pc_dalys_averted') + icers = primary_results.get('icers_summarized') + comparison_metrics_available = all( + metric is not None + for metric in ( + num_deaths_averted, + pc_deaths_averted, + num_dalys_averted, + pc_dalys_averted, + icers, + ) + ) - counts_of_hsi_in_implementation_period = all_results[results_files[0]]['counts_of_hsi_by_period'] + counts_of_hsi_in_implementation_period = primary_results['counts_of_hsi_by_period'] counts_of_hsi_in_implementation_period = counts_of_hsi_in_implementation_period.drop(['2010-2041'], level=1) result_df_by_period = pd.DataFrame([ @@ -83,7 +93,7 @@ def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path plt.close(fig) # Plot population growth - total_population_in_implementation = all_results[results_files[0]]['total_population_by_year'] + total_population_in_implementation = primary_results['total_population_by_year'] fig, ax = plot_population_by_year(total_population_in_implementation / 1e6) name_of_plot = "Population size by year" ax.set_title(name_of_plot) @@ -94,11 +104,9 @@ def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path # Plot number of deaths and DALYS by cause for each parameter, with confidence intervals, for the target period - pc_dalys_averted = all_results[results_files[0]]['pc_dalys_averted'] - - num_dalys_by_cause_label_implementation = all_results[results_files[0]]['num_dalys'].drop(['2010-2041'], level=1) + num_dalys_by_cause_label_implementation = primary_results['num_dalys'].drop(['2010-2041'], level=1) - num_deaths_by_cause_label_implementation = all_results[results_files[0]]['num_deaths'].drop(['2010-2041'], level=1) + num_deaths_by_cause_label_implementation = primary_results['num_deaths'].drop(['2010-2041'], level=1) for param in param_names: draw = format_scenario_name(param) @@ -137,69 +145,81 @@ def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path fig.savefig(outfile) plt.close(fig) - deaths_averted = all_results[results_files[0]]['num_deaths_averted'] - deaths_averted_sorted = (deaths_averted.sort_values(by="mean", ascending=True) / 1e3) - fig_height = max(6, min(0.28 * len(deaths_averted_sorted.index) + 4, 18)) - fig, ax = plt.subplots(figsize=(10, fig_height)) - name_of_plot = "Deaths Averted by Each Treatment ID" - do_barh_plot_with_ci(deaths_averted_sorted, ax) - ax.set_title(name_of_plot) - ax.set_xlabel("Number of deaths averted (/1000)") - ax.grid(axis="x") - ax.spines["top"].set_visible(False) - ax.spines["right"].set_visible(False) - outfile = os.path.join(output_folder, make_graph_file_name(name_of_plot)) - fig.tight_layout() - fig.savefig(outfile) - plt.close(fig) + if comparison_metrics_available: + deaths_averted_sorted = (num_deaths_averted.sort_values(by="central", ascending=True) / 1e3) + fig_height = max(6, min(0.28 * len(deaths_averted_sorted.index) + 4, 18)) + fig, ax = plt.subplots(figsize=(10, fig_height)) + name_of_plot = "Deaths Averted by Each Treatment ID" + do_barh_plot_with_ci(deaths_averted_sorted, ax) + ax.set_title(name_of_plot) + ax.set_xlabel("Number of deaths averted (/1000)") + ax.grid(axis="x") + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) + outfile = os.path.join(output_folder, make_graph_file_name(name_of_plot)) + fig.tight_layout() + fig.savefig(outfile) + plt.close(fig) - dalys_averted = all_results[results_files[0]]['num_dalys_averted'] - dalys_averted_sorted = (dalys_averted.sort_values(by="mean", ascending=True) / 1e3) - fig_height = max(6, min(0.28 * len(dalys_averted_sorted.index) + 4, 18)) - fig, ax = plt.subplots(figsize=(10, fig_height)) - name_of_plot = "DALYS Averted by Each Treatment ID" - do_barh_plot_with_ci(dalys_averted_sorted, ax) - ax.set_title(name_of_plot) - ax.set_xlabel("DALYs averted (/1000)") - ax.grid(axis="x") - ax.spines["top"].set_visible(False) - ax.spines["right"].set_visible(False) - outfile = os.path.join(output_folder, make_graph_file_name(name_of_plot)) - fig.tight_layout() - fig.savefig(outfile) - plt.close(fig) + dalys_averted_sorted = (num_dalys_averted.sort_values(by="central", ascending=True) / 1e3) + fig_height = max(6, min(0.28 * len(dalys_averted_sorted.index) + 4, 18)) + fig, ax = plt.subplots(figsize=(10, fig_height)) + name_of_plot = "DALYS Averted by Each Treatment ID" + do_barh_plot_with_ci(dalys_averted_sorted, ax) + ax.set_title(name_of_plot) + ax.set_xlabel("DALYs averted (/1000)") + ax.grid(axis="x") + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) + outfile = os.path.join(output_folder, make_graph_file_name(name_of_plot)) + fig.tight_layout() + fig.savefig(outfile) + plt.close(fig) - pc_deaths_averted = all_results[results_files[0]]['pc_deaths_averted'] - pc_deaths_averted_sorted = (pc_deaths_averted.sort_values(by="mean", ascending=True)) - fig_height = max(6, min(0.28 * len(pc_deaths_averted_sorted.index) + 4, 18)) - fig, ax = plt.subplots(figsize=(10, fig_height)) - name_of_plot = "Percentage Deaths Averted by Each Treatment ID" - do_barh_plot_with_ci(pc_deaths_averted_sorted, ax) - ax.set_title(name_of_plot) - ax.set_xlabel("Percentage of deaths averted") - ax.grid(axis="x") - ax.spines["top"].set_visible(False) - ax.spines["right"].set_visible(False) - outfile = os.path.join(output_folder, make_graph_file_name(name_of_plot)) - fig.tight_layout() - fig.savefig(outfile) - plt.close(fig) + pc_deaths_averted_sorted = (pc_deaths_averted.sort_values(by="central", ascending=True)) + fig_height = max(6, min(0.28 * len(pc_deaths_averted_sorted.index) + 4, 18)) + fig, ax = plt.subplots(figsize=(10, fig_height)) + name_of_plot = "Percentage Deaths Averted by Each Treatment ID" + do_barh_plot_with_ci(pc_deaths_averted_sorted, ax) + ax.set_title(name_of_plot) + ax.set_xlabel("Percentage of deaths averted") + ax.grid(axis="x") + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) + outfile = os.path.join(output_folder, make_graph_file_name(name_of_plot)) + fig.tight_layout() + fig.savefig(outfile) + plt.close(fig) - pc_dalys_averted = all_results[results_files[0]]['pc_dalys_averted'] - pc_dalys_averted_sorted = (pc_dalys_averted.sort_values(by="mean", ascending=True)) - fig_height = max(6, min(0.28 * len(pc_dalys_averted_sorted.index) + 4, 18)) - fig, ax = plt.subplots(figsize=(10, fig_height)) - name_of_plot = "Percentage DALYs Averted by Each Treatment ID" - do_barh_plot_with_ci(pc_dalys_averted_sorted, ax) - ax.set_title(name_of_plot) - ax.set_xlabel("Percentage of DALYs averted") - ax.grid(axis="x") - ax.spines["top"].set_visible(False) - ax.spines["right"].set_visible(False) - outfile = os.path.join(output_folder, make_graph_file_name(name_of_plot)) - fig.tight_layout() - fig.savefig(outfile) - plt.close(fig) + pc_dalys_averted_sorted = (pc_dalys_averted.sort_values(by="central", ascending=True)) + fig_height = max(6, min(0.28 * len(pc_dalys_averted_sorted.index) + 4, 18)) + fig, ax = plt.subplots(figsize=(10, fig_height)) + name_of_plot = "Percentage DALYs Averted by Each Treatment ID" + do_barh_plot_with_ci(pc_dalys_averted_sorted, ax) + ax.set_title(name_of_plot) + ax.set_xlabel("Percentage of DALYs averted") + ax.grid(axis="x") + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) + outfile = os.path.join(output_folder, make_graph_file_name(name_of_plot)) + fig.tight_layout() + fig.savefig(outfile) + plt.close(fig) + + icers_sorted = icers.sort_values(by="central", ascending=True) + fig_height = max(6, min(0.28 * len(icers_sorted.index) + 4, 18)) + fig, ax = plt.subplots(figsize=(10, fig_height)) + name_of_plot = "ICERs for Each Treatment ID" + do_barh_plot_with_ci(icers_sorted, ax) + ax.set_title(name_of_plot) + ax.set_xlabel("ICER (USD per DALY averted)") + ax.grid(axis="x") + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) + outfile = os.path.join(output_folder, make_graph_file_name(name_of_plot)) + fig.tight_layout() + fig.savefig(outfile) + plt.close(fig) if __name__ == "__main__": parser = argparse.ArgumentParser() @@ -208,5 +228,3 @@ def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path args = parser.parse_args() apply(results_files=args.results_files, output_folder=args.output_folder, resourcefilepath=Path("./resources")) - - plot_legends.apply(results_folder=None, output_folder=args.output_folder, resourcefilepath=Path("./resources")) From c4152c9046d92273a95c97747be57c45dcef5d3f Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Mon, 27 Apr 2026 14:13:06 +0100 Subject: [PATCH 40/55] ICER figures --- .../analysis_effect_of_treatment_ids.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py index e0beb1d6a0..1e694a8e05 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py @@ -177,13 +177,17 @@ def apply( find_difference_relative_to_comparison( total_input_cost, comparison=0,) - ).T.iloc[0].unstack()).T + )) elapsed = perf_counter() - start print(f"\n=== TIMING: computing incremental_scenario_cost took {elapsed:.3f}s ===\n", flush=True) - incremental_scenario_cost_summarized = summarize_cost_data(incremental_scenario_cost, _metric='median') - incremental_scenario_cost_summarized = incremental_scenario_cost_summarized.rename(columns = {'median':'central'}) + incremental_scenario_cost = ( + incremental_scenario_cost.T.reorder_levels(["draw", "run"], axis=1).sort_index(axis=1) + ).pipe(set_param_names_as_column_index_level_0, param_names) + + incremental_scenario_cost_summarized = compute_summary_statistics(incremental_scenario_cost, 'median').iloc[0].unstack() + @@ -292,15 +296,16 @@ def apply( -1.0 * pd.DataFrame( find_difference_extra_relative_to_comparison(num_dalys.sum(), comparison='Nothing')) - ).T.iloc[0].unstack() + ) pc_dalys_averted = 100.0 * compute_summary_statistics( -1.0 * pd.DataFrame( find_difference_extra_relative_to_comparison(num_dalys.sum(), comparison='Nothing', scaled=True)).T, central_measure='median' ).iloc[0].unstack() - incremental_scenario_cost_summarized.index = num_dalys_averted.index - icers_summarized = (incremental_scenario_cost_summarized /num_dalys_averted) + # Run-by-run incremental cost-effectiveness ratio calculation + icers = incremental_scenario_cost.T /num_dalys_averted + icers_summarized = compute_summary_statistics(icers.T, central_measure='median').iloc[0].unstack() num_dalys_averted = compute_summary_statistics(num_dalys_averted.T, central_measure='median').iloc[0].unstack() num_dalys = compute_summary_statistics(num_dalys, central_measure='median') From d452444f126d55adecf9ed009db82738694295e7 Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Tue, 28 Apr 2026 16:51:03 +0100 Subject: [PATCH 41/55] Extract number of HCWs and capacity used --- .../analysis_effect_of_treatment_ids.py | 54 ++++++- .../figures_effect_of_treatment_ids.py | 7 + .../results_processing_utils.py | 133 +++++++++++++++++- 3 files changed, 191 insertions(+), 3 deletions(-) diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py index 1e694a8e05..b8b328f439 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py @@ -40,7 +40,10 @@ set_param_names_as_column_index_level_0, target_period, find_difference_extra_relative_to_comparison, - find_difference_relative_to_comparison + find_difference_relative_to_comparison, + get_staff_count_by_facid_and_officer_type, + get_capacity_used_by_officer_type_and_facility_level, + melt_model_output_draws_and_runs ) from scripts.costing.cost_estimation import ( @@ -102,9 +105,13 @@ def apply( cost_checkpoint_profile: str | None = None, load_input_costs_from_checkpoint: bool | None = None, ): - """Produce standard plots describing effect of each TREATMENT_ID.""" + """Process results to produce objects needed for LCOA analysis.""" _, age_grp_lookup = make_age_grp_lookup() + # Extract districts and facility levels from the Master Facility List + mfl = pd.read_csv(resourcefilepath / "healthsystem" / "organisation" / "ResourceFile_Master_Facilities_List.csv") + facility_id_levels_dict = dict(zip(mfl['Facility_ID'], mfl['Facility_Level'])) + param_names = get_parameter_names_from_scenario_file() get_num_deaths_by_cause_label_and_period = make_get_num_deaths_by_cause_label_and_period( PERIOD_LENGTH_YEARS_FOR_BAR_PLOTS, @@ -310,6 +317,49 @@ def apply( num_dalys = compute_summary_statistics(num_dalys, central_measure='median') + # Staff count by Facility ID + available_staff_count_by_facid_and_officertype = extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='number_of_hcw_staff', + custom_generate_series=lambda _df:get_staff_count_by_facid_and_officer_type(_df), + do_scaling=True, + autodiscover=True, + ) + + # Update above series to get staff count by Facility_Level + available_staff_count_by_facid_and_officertype = available_staff_count_by_facid_and_officertype.reset_index().rename( + columns={'FacilityID': 'Facility_ID', 'Officer': 'OfficerType'}) + available_staff_count_by_facid_and_officertype['Facility_ID'] = pd.to_numeric( + available_staff_count_by_facid_and_officertype['Facility_ID']) + available_staff_count_by_facid_and_officertype['Facility_Level'] = available_staff_count_by_facid_and_officertype[ + 'Facility_ID'].map(facility_id_levels_dict) + idx = pd.IndexSlice + available_staff_count_by_level_and_officer_type = available_staff_count_by_facid_and_officertype.drop( + columns=[idx['Facility_ID']]).groupby([idx['year'], idx['Facility_Level'], idx['OfficerType']]).sum() + available_staff_count_by_level_and_officer_type = melt_model_output_draws_and_runs( + available_staff_count_by_level_and_officer_type.reset_index(), + id_vars=['year', 'Facility_Level', 'OfficerType']) + # make sure facility level is stored as string + available_staff_count_by_level_and_officer_type['Facility_Level'] = available_staff_count_by_level_and_officer_type[ + 'Facility_Level'].astype(str) + available_staff_count_by_level_and_officer_type = available_staff_count_by_level_and_officer_type.drop( + available_staff_count_by_level_and_officer_type[available_staff_count_by_level_and_officer_type[ + 'Facility_Level'] == '5'].index) # drop headquarters + # because we're only concerned with staff engaged in service delivery + available_staff_count_by_level_and_officer_type.rename(columns={'value': 'staff_count'}, inplace=True) + + + annual_capacity_used_by_cadre_and_level = extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='Capacity_By_FacID_and_Officer', + custom_generate_series=lambda df: get_capacity_used_by_officer_type_and_facility_level(df, facility_id_levels_dict), + do_scaling=True, + autodiscover=True, + ) + + results['num_dalys'] = num_dalys results['num_dalys_averted'] = num_dalys_averted if do_comparison else None results['pc_dalys_averted'] = pc_dalys_averted if do_comparison else None diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py index 87275f8a2e..2e3ab74d44 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py @@ -207,6 +207,13 @@ def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path plt.close(fig) icers_sorted = icers.sort_values(by="central", ascending=True) + # Do not plot treatment ids with very wide uncertainty + # CervicalCancer_Screening_Xpert_* -110.336087 -6.192826 5064.399284 + # BreastCancer_PalliativeCare_* -25.104866 -5.740423 2611.046029 + # Hiv_Test_* -7335.183554 248.738016 856.794914 + + mask = ~icers_sorted.index.get_level_values("draw").isin(["Hiv_Test_*", "CervicalCancer_Screening_Xpert_*", "BreastCancer_PalliativeCare_*"]) + icers_sorted = icers_sorted[mask] fig_height = max(6, min(0.28 * len(icers_sorted.index) + 4, 18)) fig, ax = plt.subplots(figsize=(10, fig_height)) name_of_plot = "ICERs for Each Treatment ID" diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/results_processing_utils.py b/src/scripts/lcoa_inputs_from_tlo_analyses/results_processing_utils.py index 2fc505da11..e62f3db4ac 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/results_processing_utils.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/results_processing_utils.py @@ -8,7 +8,12 @@ EffectOfEachTreatment, ) from tlo import Date -from tlo.analysis.utils import make_age_grp_types, summarize, to_age_group +from tlo.analysis.utils import ( + make_age_grp_types, + summarize, + to_age_group, + unflatten_flattened_multi_index_in_logging, +) def find_difference_relative_to_comparison(_ser: pd.Series, comparison: str, @@ -35,6 +40,7 @@ def get_total_population_by_year( ] + def extract_deaths_total(df: pd.DataFrame) -> pd.Series: return pd.Series({"Total": len(df)}) @@ -349,3 +355,128 @@ def _get_counts_of_hsis_by_period(_df: pd.DataFrame) -> pd.Series: return pd.concat([chunked, overall]).astype(int).sort_index() return _get_counts_of_hsis_by_period + + + + +# Get available staff count for each year and draw +def get_staff_count_by_facid_and_officer_type(_df: pd.DataFrame) -> pd.Series: + """ + Convert logged staff dictionary output into tidy format, + summing staff counts across all clinic columns. + + Returns pd.Series indexed by: + (year, FacilityID, Officer) + """ + + df = _df.copy() + df["year"] = df["date"].dt.year + df = df.drop(columns=["date"]) + + clinic_cols = df.columns.difference(["year"]) + + long_frames = [] + + for clinic in clinic_cols: + expanded = df[[clinic, "year"]].copy() + expanded = expanded[expanded[clinic].notna()] + + expanded_dict = expanded[clinic].apply(pd.Series) + expanded_dict["year"] = expanded["year"].values + + long_frames.append(expanded_dict) + + # Combine all clinics + combined = pd.concat(long_frames, ignore_index=True) + + # Melt to long format + long_df = ( + combined + .melt(id_vars=["year"], + var_name="facility_officer", + value_name="count") + .dropna(subset=["count"]) + ) + + # Split FacilityID and Officer + parts = long_df["facility_officer"].str.split("_Officer_", expand=True) + + long_df["FacilityID"] = ( + parts[0] + .str.replace("FacilityID_", "", regex=False) + .astype(int) + ) + long_df["Officer"] = parts[1] + + # SUM ACROSS CLINICS HERE + result = ( + long_df + .groupby(["year", "FacilityID", "Officer"])["count"] + .sum() + .sort_index() + ) + + return result + +# Get list of cadres which were utilised in each run to get the count of staff used in the simulation +# Note that we still cost the full staff count for any cadre-Facility_Level combination that was ever used in a run, +# and not the amount of time which was used +def get_capacity_used_by_officer_type_and_facility_level( + _df: pd.DataFrame, + facility_id_levels_dict +) -> pd.Series: + """ + Parse logging output and return a Series indexed by: + (year, OfficerType, FacilityLevel) + + Collapses (sums) across clinics. + Uses facility_id_levels_dict to map FacilityID → FacilityLevel. + """ + + # ---- 1. Set year index ---- + _df = _df.set_axis(_df["date"].dt.year).drop(columns=["date"]) + _df.index.name = "year" + + # ---- 2. Unflatten logging columns ---- + _df = unflatten_flattened_multi_index_in_logging(_df) + + # Expect columns like: + # ('Clinic', 'facID_and_officer') + + col_df = _df.columns.to_frame(index=False) + + # ---- 3. Extract OfficerType ---- + col_df["OfficerType"] = ( + col_df["facID_and_officer"] + .str.split("_Officer_") + .str[-1] + ) + + # ---- 4. Extract FacilityID ---- + col_df["FacilityID"] = ( + col_df["facID_and_officer"] + .str.split("_Officer_") + .str[0] + .str.replace("FacilityID_", "", regex=False) + .astype(int) + ) + + # ---- 5. Map to FacilityLevel ---- + col_df["FacilityLevel"] = col_df["FacilityID"].map(facility_id_levels_dict) + + # ---- 6. Rebuild MultiIndex (drop clinic level) ---- + _df.columns = pd.MultiIndex.from_frame( + col_df[["OfficerType", "FacilityLevel"]] + ) + + # ---- 7. Collapse across clinics ---- + _df = _df.groupby(level=["OfficerType", "FacilityLevel"], axis=1).sum() + + # ---- 8. Return stacked format ---- + return _df.stack(["OfficerType", "FacilityLevel"]) + +def melt_model_output_draws_and_runs(_df, id_vars): + multi_index = pd.MultiIndex.from_tuples(_df.columns) + _df.columns = multi_index + melted_df = pd.melt(_df, id_vars=id_vars).rename(columns={'variable_0': 'draw', 'variable_1': 'run'}) + return melted_df From 08a8fbb256e445e91ef1b275faae689c5e879230 Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Thu, 30 Apr 2026 15:42:52 +0100 Subject: [PATCH 42/55] Extract capacity used per cadre --- .../analysis_effect_of_treatment_ids.py | 66 +++++++------------ 1 file changed, 23 insertions(+), 43 deletions(-) diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py index b8b328f439..a8a9a86337 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py @@ -152,7 +152,7 @@ def apply( if checkpoint_path is None: print("No cost checkpoint profile provided. Recomputing input costs.") else: - print(f"Recomputing input costs") + print("Recomputing input costs") start = perf_counter() with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=PerformanceWarning) @@ -287,7 +287,7 @@ def apply( results['num_deaths_averted'] = num_deaths_averted results['pc_deaths_averted'] = pc_deaths_averted - num_dalys = ( + dalys = ( extract_results( results_folder, module="tlo.methods.healthburden", @@ -299,57 +299,29 @@ def apply( ) if do_comparison: - num_dalys_averted = ( + dalys_averted = ( -1.0 * pd.DataFrame( - find_difference_extra_relative_to_comparison(num_dalys.sum(), comparison='Nothing')) + find_difference_extra_relative_to_comparison(dalys.sum(), comparison='Nothing')) ) pc_dalys_averted = 100.0 * compute_summary_statistics( -1.0 * pd.DataFrame( - find_difference_extra_relative_to_comparison(num_dalys.sum(), comparison='Nothing', scaled=True)).T, + find_difference_extra_relative_to_comparison(dalys.sum(), comparison='Nothing', scaled=True)).T, central_measure='median' ).iloc[0].unstack() # Run-by-run incremental cost-effectiveness ratio calculation - icers = incremental_scenario_cost.T /num_dalys_averted + icers = incremental_scenario_cost.T /dalys_averted icers_summarized = compute_summary_statistics(icers.T, central_measure='median').iloc[0].unstack() - num_dalys_averted = compute_summary_statistics(num_dalys_averted.T, central_measure='median').iloc[0].unstack() + dalys_averted = compute_summary_statistics(dalys_averted.T, central_measure='median').iloc[0].unstack() - num_dalys = compute_summary_statistics(num_dalys, central_measure='median') - - # Staff count by Facility ID - available_staff_count_by_facid_and_officertype = extract_results( - results_folder, - module='tlo.methods.healthsystem.summary', - key='number_of_hcw_staff', - custom_generate_series=lambda _df:get_staff_count_by_facid_and_officer_type(_df), - do_scaling=True, - autodiscover=True, - ) - - # Update above series to get staff count by Facility_Level - available_staff_count_by_facid_and_officertype = available_staff_count_by_facid_and_officertype.reset_index().rename( - columns={'FacilityID': 'Facility_ID', 'Officer': 'OfficerType'}) - available_staff_count_by_facid_and_officertype['Facility_ID'] = pd.to_numeric( - available_staff_count_by_facid_and_officertype['Facility_ID']) - available_staff_count_by_facid_and_officertype['Facility_Level'] = available_staff_count_by_facid_and_officertype[ - 'Facility_ID'].map(facility_id_levels_dict) - idx = pd.IndexSlice - available_staff_count_by_level_and_officer_type = available_staff_count_by_facid_and_officertype.drop( - columns=[idx['Facility_ID']]).groupby([idx['year'], idx['Facility_Level'], idx['OfficerType']]).sum() - available_staff_count_by_level_and_officer_type = melt_model_output_draws_and_runs( - available_staff_count_by_level_and_officer_type.reset_index(), - id_vars=['year', 'Facility_Level', 'OfficerType']) - # make sure facility level is stored as string - available_staff_count_by_level_and_officer_type['Facility_Level'] = available_staff_count_by_level_and_officer_type[ - 'Facility_Level'].astype(str) - available_staff_count_by_level_and_officer_type = available_staff_count_by_level_and_officer_type.drop( - available_staff_count_by_level_and_officer_type[available_staff_count_by_level_and_officer_type[ - 'Facility_Level'] == '5'].index) # drop headquarters - # because we're only concerned with staff engaged in service delivery - available_staff_count_by_level_and_officer_type.rename(columns={'value': 'staff_count'}, inplace=True) + dalys = compute_summary_statistics(dalys, central_measure='median') + # This gives us the capacity used for each cadre and level, for each draw and run + # From this we will extract the run-wise delta in capacity used relative to the Nothing scenario, for each cadre + # and summarise. However since no HSIs are delivered in the Nothing scenario, the capacity used in that scenario is zero, + # so the delta relative to Nothing is just the capacity used in each scenario. annual_capacity_used_by_cadre_and_level = extract_results( results_folder, module='tlo.methods.healthsystem.summary', @@ -358,13 +330,21 @@ def apply( do_scaling=True, autodiscover=True, ) + # Sum across all years and facility levels; so we get the *total* capacity used over the whole period + # TODO: Check with Sakshi if this is what we want. + mask = annual_capacity_used_by_cadre_and_level.index.get_level_values(0).isin(range(2026, 2040)) + capacity_used_by_cadre = ( + annual_capacity_used_by_cadre_and_level[mask].groupby(['OfficerType']). + sum(). + pipe(set_param_names_as_column_index_level_0, param_names=param_names) + ) - - results['num_dalys'] = num_dalys - results['num_dalys_averted'] = num_dalys_averted if do_comparison else None + results['dalys'] = dalys + results['dalys_averted'] = dalys_averted if do_comparison else None results['pc_dalys_averted'] = pc_dalys_averted if do_comparison else None results['icers_summarized'] = icers_summarized if do_comparison else None results['incremental_scenario_cost'] = incremental_scenario_cost_summarized if do_comparison else None + results['capacity_used_by_cadre'] = capacity_used_by_cadre return results From 85823fa7412b44dfd52179bd7ca4d209f17cce84 Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Fri, 1 May 2026 14:51:42 +0100 Subject: [PATCH 43/55] LCOA R script + python hook --- .../analysis_effect_of_treatment_ids.py | 14 +- .../fig_utils.py | 59 +++ .../figures_effect_of_treatment_ids.py | 62 ++- .../optimizer_preaggregated.R | 372 +++++++++++++++++ .../run_preaggregated_optimizer.py | 390 ++++++++++++++++++ 5 files changed, 883 insertions(+), 14 deletions(-) create mode 100644 src/scripts/lcoa_inputs_from_tlo_analyses/optimizer_preaggregated.R create mode 100644 src/scripts/lcoa_inputs_from_tlo_analyses/run_preaggregated_optimizer.py diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py index a8a9a86337..6aeeb7fa51 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py @@ -14,15 +14,12 @@ import pickle from pathlib import Path import pandas as pd -import numpy as np -from matplotlib import pyplot as plt + + from tlo import Date from tlo.util import create_age_range_lookup -from scripts.lcoa_inputs_from_tlo_analyses.fig_utils import ( - do_bar_plot_with_ci, - plot_multiindex_dot_with_interval, -) + from scripts.lcoa_inputs_from_tlo_analyses.results_processing_utils import ( get_counts_of_appts, get_counts_of_hsi_by_short_treatment_id, @@ -63,7 +60,6 @@ extract_results, get_color_short_treatment_id, make_age_grp_lookup, - squarify_neat, summarize, ) # python src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py outputs/s.bhatia@imperial.ac.uk/effect_of_each_treatment_id-2026-02-12T120859Z figs/ --target-start=2010-01-01 --target-end=2025-12-31 @@ -339,6 +335,10 @@ def apply( pipe(set_param_names_as_column_index_level_0, param_names=param_names) ) + capacity_used_by_cadre = ( + compute_summary_statistics(capacity_used_by_cadre, central_measure='median') + ) + results['dalys'] = dalys results['dalys_averted'] = dalys_averted if do_comparison else None results['pc_dalys_averted'] = pc_dalys_averted if do_comparison else None diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py b/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py index fe7638e9ae..46f550e904 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py @@ -437,6 +437,65 @@ def do_label_barh_plot(_df: pd.DataFrame, _ax): size=7, ) +def plot_cadre_time_by_draw_stacked( + _df: pd.DataFrame, + stat: str = "central", + figsize: tuple[float, float] | None = None, +): + """Plot horizontal stacked bars of cadre time use by draw for one summary stat.""" + if not isinstance(_df.columns, pd.MultiIndex) or _df.columns.nlevels != 2: + raise ValueError("_df columns must be a 2-level MultiIndex with levels for draw and stat.") + + stat_level_name = "stat" if "stat" in _df.columns.names else _df.columns.names[1] + available_stats = pd.Index(_df.columns.get_level_values(stat_level_name).unique()) + if stat not in available_stats: + raise ValueError(f"Statistic '{stat}' not found. Available stats: {available_stats.tolist()}") + + _plot = _df.xs(stat, axis=1, level=stat_level_name).T.fillna(0.0) + if _plot.empty: + raise ValueError(f"No plottable data remain for stat '{stat}'.") + + _plot = _plot.loc[_plot.sum(axis=1).sort_values(ascending=True).index] + + if figsize is None: + fig_height = max(6, min(0.35 * len(_plot.index) + 3, 20)) + figsize = (12, fig_height) + fig, ax = plt.subplots(figsize=figsize) + + cadre_colors = list(plt.get_cmap("tab10").colors) + left = np.zeros(len(_plot.index), dtype=float) + y = np.arange(len(_plot.index)) + + for i, cadre in enumerate(_plot.columns): + values = _plot[cadre].to_numpy(dtype=float) + ax.barh( + y, + values, + left=left, + color=cadre_colors[i % len(cadre_colors)], + label=str(cadre), + ) + left += values + + ax.set_yticks(y) + ax.set_yticklabels([str(draw) for draw in _plot.index]) + ax.set_xlabel("Time used") + ax.set_ylabel("Draw") + ax.grid(axis="x") + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) + ax.legend( + loc="lower right", + fontsize=12, + handlelength=2.4, + handleheight=1.6, + borderpad=1.0, + labelspacing=0.8, + frameon=True, + ) + fig.tight_layout() + return fig, ax + def plot_hsi_counts_stacked_bar(_df: pd.DataFrame, plot_stat: str = "central"): """Plot horizontal stacked bars of HSI counts by draw for a selected summary statistic.""" if not isinstance(_df.columns, pd.MultiIndex) or _df.columns.nlevels != 2: diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py index 2e3ab74d44..993fb202ba 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py @@ -17,6 +17,7 @@ make_graph_file_name, do_barh_plot_with_ci, do_bar_plot_with_ci, + plot_cadre_time_by_draw_stacked, plot_deaths_by_period_for_cause, plot_deaths_by_period_for_draw, plot_hsi_counts_by_period_for_draw, @@ -33,6 +34,7 @@ def load_results_files(results_files: list[Path]) -> dict[Path, dict]: loaded = {} for results_file in results_files: + print(f"Loading results file: {results_file}") with open(results_file, "rb") as f: loaded[results_file] = pickle.load(f) return loaded @@ -40,15 +42,19 @@ def load_results_files(results_files: list[Path]) -> dict[Path, dict]: def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path = None): """Produce standard plots describing effect of each TREATMENT_ID.""" + print("Starting figure generation for treatment-ID effects.") + print(f"Output folder: {output_folder}") param_names = get_parameter_names_from_scenario_file() + print(f"Loaded parameter names: {len(param_names)}") all_results = load_results_files(results_files) primary_results = all_results[results_files[0]] + print(f"Using primary results from: {results_files[0]}") num_deaths_averted = primary_results.get('num_deaths_averted') pc_deaths_averted = primary_results.get('pc_deaths_averted') - num_dalys_averted = primary_results.get('num_dalys_averted') + dalys_averted = primary_results.get('dalys_averted') pc_dalys_averted = primary_results.get('pc_dalys_averted') icers = primary_results.get('icers_summarized') comparison_metrics_available = all( @@ -56,14 +62,17 @@ def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path for metric in ( num_deaths_averted, pc_deaths_averted, - num_dalys_averted, + dalys_averted, pc_dalys_averted, icers, ) ) + print(f"Comparison metrics available: {comparison_metrics_available}") counts_of_hsi_in_implementation_period = primary_results['counts_of_hsi_by_period'] counts_of_hsi_in_implementation_period = counts_of_hsi_in_implementation_period.drop(['2010-2041'], level=1) + capacity_used_by_cadre = primary_results.get("capacity_used_by_cadre") + result_df_by_period = pd.DataFrame([ {'treatment_id_included': draw, 'nonzero_hsis': treatment_id, 'period': period} @@ -82,9 +91,30 @@ def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path if param == "Nothing": continue draw = format_scenario_name(param) + print(f"Plotting yearly HSI counts for draw: {draw}") name_of_plot = f"Yearly HSI counts for {draw}" + # Since all HSIs will be delivered before the service availability switch + # retain only the treatment id of interest in this period to avoid plot + # clutter. + pre_switch_periods = ( + ['2010-2010', '2011-2011', '2012-2012', '2013-2013', + '2014-2014', '2015-2015', '2016-2016', '2017-2017', + '2018-2018', '2019-2019', '2020-2020', '2021-2021', + '2022-2022', '2023-2023', '2024-2024', '2025-2025'] + ) + mask_other_periods = ( + ~counts_of_hsi_in_implementation_period. + index. + get_level_values("period"). + isin(pre_switch_periods) + ) + mask_early_periods = ( + counts_of_hsi_in_implementation_period.index.get_level_values("period").isin(pre_switch_periods) & + (counts_of_hsi_in_implementation_period.index.get_level_values("appt_type") == draw.replace("_*", "")) + ) + plot_this = counts_of_hsi_in_implementation_period[mask_other_periods | mask_early_periods] fig, ax = plot_hsi_counts_by_period_for_draw( - counts_of_hsi_in_implementation_period, + plot_this, draw, ) ax.set_title(name_of_plot) @@ -92,8 +122,17 @@ def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path fig.savefig(outfile) plt.close(fig) + print("Plotting capacity used by cadres across draws.") + fig, ax = plot_cadre_time_by_draw_stacked(capacity_used_by_cadre, stat="central") + name_of_plot = "Capacity Used by Cadres (2026-2040)" + ax.set_title(name_of_plot) + outfile = os.path.join(output_folder, make_graph_file_name(name_of_plot)) + fig.savefig(outfile) + plt.close(fig) + # Plot population growth total_population_in_implementation = primary_results['total_population_by_year'] + print("Plotting population size by year.") fig, ax = plot_population_by_year(total_population_in_implementation / 1e6) name_of_plot = "Population size by year" ax.set_title(name_of_plot) @@ -102,14 +141,14 @@ def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path plt.close(fig) # Plot number of deaths and DALYS by cause for each parameter, with confidence intervals, for the target period - - - num_dalys_by_cause_label_implementation = primary_results['num_dalys'].drop(['2010-2041'], level=1) + num_dalys_by_cause_label_implementation = primary_results['dalys'].drop(['2010-2041'], level=1) num_deaths_by_cause_label_implementation = primary_results['num_deaths'].drop(['2010-2041'], level=1) + print("Prepared deaths and DALYs by cause for plotting.") for param in param_names: draw = format_scenario_name(param) + print(f"Plotting deaths over time by cause for draw: {draw}") fig, ax = plot_deaths_by_period_for_draw( num_deaths_by_cause_label_implementation / 1e3, draw, @@ -123,6 +162,7 @@ def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path cause_labels = num_deaths_by_cause_label_implementation.index.get_level_values("label").unique() for cause_label in cause_labels: + print(f"Plotting cause-specific time series for: {cause_label}") fig, ax = plot_deaths_by_period_for_cause( num_deaths_by_cause_label_implementation / 1e3, cause_label=cause_label, @@ -146,6 +186,7 @@ def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path plt.close(fig) if comparison_metrics_available: + print("Plotting comparison metrics: deaths/DALYs averted, percentages, and ICERs.") deaths_averted_sorted = (num_deaths_averted.sort_values(by="central", ascending=True) / 1e3) fig_height = max(6, min(0.28 * len(deaths_averted_sorted.index) + 4, 18)) fig, ax = plt.subplots(figsize=(10, fig_height)) @@ -160,8 +201,9 @@ def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path fig.tight_layout() fig.savefig(outfile) plt.close(fig) + print("Saved: Deaths Averted by Each Treatment ID") - dalys_averted_sorted = (num_dalys_averted.sort_values(by="central", ascending=True) / 1e3) + dalys_averted_sorted = (dalys_averted.sort_values(by="central", ascending=True) / 1e3) fig_height = max(6, min(0.28 * len(dalys_averted_sorted.index) + 4, 18)) fig, ax = plt.subplots(figsize=(10, fig_height)) name_of_plot = "DALYS Averted by Each Treatment ID" @@ -175,6 +217,7 @@ def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path fig.tight_layout() fig.savefig(outfile) plt.close(fig) + print("Saved: DALYS Averted by Each Treatment ID") pc_deaths_averted_sorted = (pc_deaths_averted.sort_values(by="central", ascending=True)) fig_height = max(6, min(0.28 * len(pc_deaths_averted_sorted.index) + 4, 18)) @@ -190,6 +233,7 @@ def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path fig.tight_layout() fig.savefig(outfile) plt.close(fig) + print("Saved: Percentage Deaths Averted by Each Treatment ID") pc_dalys_averted_sorted = (pc_dalys_averted.sort_values(by="central", ascending=True)) fig_height = max(6, min(0.28 * len(pc_dalys_averted_sorted.index) + 4, 18)) @@ -205,6 +249,7 @@ def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path fig.tight_layout() fig.savefig(outfile) plt.close(fig) + print("Saved: Percentage DALYs Averted by Each Treatment ID") icers_sorted = icers.sort_values(by="central", ascending=True) # Do not plot treatment ids with very wide uncertainty @@ -227,6 +272,9 @@ def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path fig.tight_layout() fig.savefig(outfile) plt.close(fig) + print("Saved: ICERs for Each Treatment ID") + + print("Finished generating figures.") if __name__ == "__main__": parser = argparse.ArgumentParser() diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/optimizer_preaggregated.R b/src/scripts/lcoa_inputs_from_tlo_analyses/optimizer_preaggregated.R new file mode 100644 index 0000000000..674d78b374 --- /dev/null +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/optimizer_preaggregated.R @@ -0,0 +1,372 @@ +# Standalone preaggregated optimizer for Python integration. +# Input contract: +# - ce_dalys, conscost, and hr_* are preaggregated totals at full implementation. +# - Decision variables represent fractions of each intervention implemented. +# - feascov and substitute/compulsory constraints still bound implementation shares. + +library(lpSolve) + +find_optimal_package <- function(inputs, objective_input, cet_input, + drug_budget_input, drug_budget.scale, + hr.time.constraint, hr.size, hr.scale, + use_feasiblecov_constraint, feascov_scale, compcov_scale, + compulsory_interventions, substitutes, task_shifting_pharm) { # % complements % + + ## Total DALYs averted based on CE evidence; this was per person in the original + ## script but is cumulative in this version + dalys <- as.numeric(as.character(inputs$ce_dalys)) + ## Cumulative cost of drugs and commodities + drugcost <- as.numeric(as.character(inputs$conscost)) + maxcoverage <- as.numeric(as.character(inputs$feascov)) # Maximum possible coverage (demand constraint) + ## Preaggregated mode: unit case scaling + cases <- rep(1, length(dalys)) + ## Full cost per patient based on CE evidence + fullcost <- as.numeric(as.character(inputs$ce_cost)) + ## Number of minutes of health worker time requires per intervention per person + hrneed <- + inputs[c("hr_clin", "hr_nur", "hr_pharm", "hr_lab", "hr_ment", "hr_nutri")] + hrneed <- as.data.frame(apply(hrneed, 2, as.numeric)) + + n <- length(dalys) # number of interventions included in the analysis + + ################################### + # 3.1 Set up LPP + ################################### + + # Objective - maximize DALYs + #**************************************************** + # Define net health + cet <- cet_input + nethealth <- dalys - fullcost / cet + + # Define objective + if (objective_input == "nethealth") { + objective <- nethealth * cases + } else if (objective_input == "dalys") { + objective <- dalys * cases + } else { + print("ERROR: objective_input can take values dalys or nethealth") + } + + # Constraints - 1. Drug Budget, 2. HR Requirements + #**************************************************** + # 1. Drug Budget + #---------------- + cons_drug <- drugcost * cases # Cost of drugs for the number of cases covered + cons_drug.limit <- drug_budget_input * drug_budget.scale + cons_drug.limit_base <- drug_budget_input # unscaled drug budget + + # 2. HR Constraints + #--------------------- + ## HR minutes required to deliver intervention to all cases in need + hr_minutes_need <- hrneed * cases[row(hrneed)] + + ## Update HR constraints so that nurses, pharmacists, medical officers, etc. represent joint constraints + ## Medical officer + Clinical officer + Medical Assistant + clinicalstaff.need <- hr_minutes_need[c("hr_clin")] + ## Nurse officer + Nurse midwife + nursingstaff.need <- hr_minutes_need[c("hr_nur")] + ## Pharmacist + Pharmacist Technician + Pharmacist Assistant + pharmstaff.need <- hr_minutes_need[c("hr_pharm")] + ## Lab officer + Lab technician + Lab assistant + labstaff.need <- hr_minutes_need[c("hr_lab")] + # remove CHW + mentalstaff.need <- hr_minutes_need[c("hr_ment")] # Mental health staff + nutristaff.need <- hr_minutes_need[c("hr_nutri")] # Nutrition staff + + # Clean total minutes available per cadre + cons_hr.limit <- hr.time.constraint + clinicalstaffmins.limit <- cons_hr.limit[1] + nursingstaffmins.limit <- cons_hr.limit[2] + pharmstaffmins.limit <- cons_hr.limit[3] + labstaffmins.limit <- cons_hr.limit[4] + mentalstaffmins.limit <- cons_hr.limit[5] + nutristaffmins.limit <- cons_hr.limit[6] + + reps <- 4 # set the number of times that the matrix of interventions is duplicated + + # Define a function which duplicates a matrix horizontally + duplicate_matrix_horizontally <- function(reps, matrix) { + matrix <- do.call(rbind, replicate(reps, matrix, simplify = FALSE)) + } + + if (task_shifting_pharm == 0) { + print("") + } else if (task_shifting_pharm == 1) { + clinicalstaff.need <- + duplicate_matrix_horizontally(reps, as.matrix(clinicalstaff.need)) + nursingstaff.need <- + rbind(as.matrix(nursingstaff.need), + as.matrix(nursingstaff.need + pharmstaff.need), + as.matrix(nursingstaff.need + nutristaff.need), + as.matrix(nursingstaff.need + nutristaff.need + pharmstaff.need)) + pharmstaff.need <- + rbind(as.matrix(pharmstaff.need), as.matrix(rep(0, n)), + as.matrix(pharmstaff.need), as.matrix(rep(0, n))) + labstaff.need <- + duplicate_matrix_horizontally(reps, as.matrix(labstaff.need)) + mentalstaff.need <- + duplicate_matrix_horizontally(reps, as.matrix(mentalstaff.need)) + nutristaff.need <- + rbind(as.matrix(nutristaff.need), as.matrix(nutristaff.need), + as.matrix(rep(0, n)), as.matrix(rep(0, n))) + } else { + print("ERROR: tash_shifting_pharm can take values 0 or 1") + } + + # Clean total workforce size per cadre + hr_size.limit <- hr.size + clinicalstaff.limit <- hr_size.limit[1] + nursingstaff.limit <- hr_size.limit[2] + pharmstaff.limit <- hr_size.limit[3] + labstaff.limit <- hr_size.limit[4] + mentalstaff.limit <- hr_size.limit[5] + nutristaff.limit <- hr_size.limit[6] + + clinicalstaff.scale <- hr.scale[1] + nursestaff.scale <- hr.scale[2] + pharmstaff.scale <- hr.scale[3] + labstaff.scale <- hr.scale[4] + mentalstaff.scale <- hr.scale[5] + nutristaff.scale <- hr.scale[6] + + ## Each list here represents the number of staff (of each cadre) needed to deliver each intervention to all cases in need. + ## Eg. for each cesarean section, 45 minutes of medical staff's time is needed (or 104,200 minutes for 2316 cases). On average 39,900 minutes are available per medical staff each year (257.3 million minutes in total divided by 6,400 medical staff). This means that for 2136 cases, 2.16 medical staff are needed (2316*45/(257.3m/6400)) + + cons_hr <- + cbind(clinicalstaff.need / (clinicalstaffmins.limit / clinicalstaff.limit), + nursingstaff.need / (nursingstaffmins.limit / nursingstaff.limit), + pharmstaff.need / (pharmstaffmins.limit / pharmstaff.limit), + labstaff.need / (labstaffmins.limit / labstaff.limit), + mentalstaff.need / (mentalstaffmins.limit / mentalstaff.limit), + nutristaff.need / (nutristaffmins.limit / nutristaff.limit)) + cons_hr.saved <- cons_hr + + cons_hr.limit_base <- + cbind(clinicalstaff.limit, nursingstaff.limit, pharmstaff.limit, + labstaff.limit, mentalstaff.limit, nutristaff.limit) + cons_hr.limit <- + cbind(clinicalstaff.limit * clinicalstaff.scale, + nursingstaff.limit * nursestaff.scale, + pharmstaff.limit * pharmstaff.scale, + labstaff.limit * labstaff.scale, + mentalstaff.limit * mentalstaff.scale, + nutristaff.limit * nutristaff.scale) + + colnames(cons_hr.limit) <- colnames(cons_hr) + cons_hr.limit.saved <- cons_hr.limit + + # Combine the constraints into one matrix + #**************************************************** + # 1. HR + #-------------------------------------- + cons_hr <- as.matrix(cons_hr) + cons_hr.limit <- as.matrix(cons_hr.limit) + + # 2. Drug + #-------------------------------------- + cons_drug <- as.matrix(cons_drug) + cons_drug.limit <- as.matrix(cons_drug.limit) + + # 3. Max coverage + #-------------------------------------- + cons.feascov <- diag(x = cases, n, n) + if (use_feasiblecov_constraint == 1) { + cons.feascov.limit <- as.matrix(maxcoverage * feascov_scale * cases) + } else if (use_feasiblecov_constraint == 0) { + cons.feascov.limit <- as.matrix(cases) # changed the constraint on 12May (multiplied by cases) + } else { + print("ERROR: use_feasiblecov_constraint can take values 0 or 1") + } + + nonneg.lim <- as.matrix(rep(0, n)) + + # 4. Compulsory interventions + #-------------------------------------- + if (length(compulsory_interventions) > 0) { + comp.count <- length(compulsory_interventions) + cons_compulsory <- matrix(0L, length(compulsory_interventions), ncol = n) + cons_compulsory.limit <- matrix(0L, length(compulsory_interventions), ncol = 1) + for (i in 1:length(compulsory_interventions)) { + a <- which(inputs$intcode == compulsory_interventions[i]) + b <- inputs$intervention[a] + # print(paste("Compulsory intervention: ",b, "; Code: ", compulsory_interventions[i], "; Number ",a )) + cons_compulsory[i, a] <- cases[a] + # CHECK THIS CHANGE MADE on 26Aug21 + cons_compulsory.limit[i] <- cases[a] * maxcoverage[a] * feascov_scale * compcov_scale # changed on 12May to maxcoverage because cons.feascov.limit is now maximum number of cases rather than maximum % coverage + } + dim(cons_compulsory) + } else if (length(compulsory_interventions) == 0) { + comp.count <- 1 + cons_compulsory <- matrix(0L, 1, ncol = n) + cons_compulsory.limit <- matrix(0L, 1, ncol = 1) + } + cons_compulsory <- t(cons_compulsory) + + # placeholder# + ###### % Complementary interventions code left out for now % + + # 5. Substitute interventions + #-------------------------------------- + substitutes <- substitutes + subs.count <- length(substitutes) + cons_substitutes.limit <- matrix(0L, length(substitutes), ncol = 1) + cons_substitutes <- matrix(0L, length(substitutes), ncol = n) + + # First find the maximum number of feasible cases among the substitute interventions + subsgrp_casesmax <- matrix(0L, length(substitutes), ncol = 1) + for (i in 1:subs.count) { + for (j in substitutes[i]) { + subsgrp_cases <- 0 + for (k in j) { + a <- which(inputs$intcode == k) + if (use_feasiblecov_constraint == 1) { + cases_max <- cases[a] * maxcoverage[a] * feascov_scale + } else if (use_feasiblecov_constraint == 0) { + cases_max <- cases[a] + } + subsgrp_cases <- cbind(subsgrp_cases, cases_max) + } + subsgrp_casesmax[i] <- max(subsgrp_cases) + # print(paste("Group", i, "Cases max", subsgrp_casesmax[i])) + } + } + + # Next define the constraint such that the sum of the cases for each substitute interventions is less than or equal to the maxumum feasible cases derived above + # print("Substitutes") + for (i in 1:subs.count) { + # print(paste("Substitute group", i)) + # print("------------------------------------------------------------") + for (j in substitutes[i]) { + for (k in j) { + a <- which(inputs$intcode == k) + b <- inputs$intervention[a] + # print(paste("Intervention: ",b, "; Code: ", k, "; Maximum cases for intervention:", cons.feascov.limit[a],"; Number: ",a)) + cons_substitutes[i, a] <- cases[a] # changed on 12May from 1 to cases + cons_substitutes.limit[i] <- subsgrp_casesmax[i] # changed on 12May to maxcoverage because cons.feascov.limit is now maximum number of cases rather than maximum % coverage + } + } + # cons_substitutes.limit[i] <- cons_substitutes.limit[i]/lengths(substitutes)[i] # removed on 12May + # print(paste("Maximum combined cases for group ",i, "= ", subsgrp_casesmax[i])) # print suppressed + } + cons_substitutes <- t(cons_substitutes) + + # Changes to constraints if task-shifting of pharmacist responsibility is allowed + #-------------------------------------------------------------------------------- + # Update the constraint matrices if task shifting is allowed + if (task_shifting_pharm == 0) { + print("No task shifting of pharmaceutical tasks") + } else if (task_shifting_pharm == 1) { + # 1. Objective + objective <- duplicate_matrix_horizontally(reps, as.matrix(objective)) + # 2. Drug budget constraint (cons_drug.limit does not need to be changed) + cons_drug <- duplicate_matrix_horizontally(reps, as.matrix(cons_drug)) + # 3. Feasible coverage constraint + cons.feascov <- duplicate_matrix_horizontally(reps, as.matrix(cons.feascov)) + # 4. Compulsory interventions + cons_compulsory <- duplicate_matrix_horizontally(reps, as.matrix(cons_compulsory)) + # 6. Substitutes + cons_substitutes <- duplicate_matrix_horizontally(reps, as.matrix(cons_substitutes)) + } else { + print("ERROR: task_shifting_pharm can take values 0 or 1") + } + + # Combine constraints 1-5 + print(dim(t(cons_drug))) + print(dim(t(cons_hr))) + print(dim(t(cons.feascov))) + print(dim(t(cons_compulsory))) + print(dim(t(cons_substitutes))) + cons.mat <- rbind(t(cons_drug), t(cons_hr), t(cons.feascov), t(cons.feascov), t(cons_compulsory), t(cons_substitutes)) # % cons_complements % + dim(cons.mat) + cons.mat.limit <- rbind(cons_drug.limit, t(cons_hr.limit), cons.feascov.limit, nonneg.lim, cons_compulsory.limit, cons_substitutes.limit) # cons_complements.limit, + dim(cons.mat.limit) + print(dim(cons.mat)) + print(dim(cons.mat.limit)) + + # Direction of relationship + cons.dir <- rep("<=", 1 + 8 + n) + cons.dir <- c(cons.dir, rep(">=", n), rep(">=", comp.count)) + cons.dir <- c(cons.dir, rep("<=", length(substitutes))) + # % cons.dir <- c(cons.dir,rep("<=",length(complements))) % + length(cons.dir) + length(cons.dir) <- dim(cons.mat.limit)[1] # Assert that the length of the directions list is the same as that of the constraints matrix + + ################################### + # 3.2 - Run LPP + ################################### + solution.class <- lp("max", objective, cons.mat, cons.dir, cons.mat.limit, compute.sens = TRUE) + + ################################### + # 3.3 - Outputs + ################################### + # Export solution to a .csv file + #------------------------------------ + solution <- as.data.frame(solution.class$solution) + solution_hr <- as.data.frame(solution.class$solution) # use this uncollapsed version of the dataframe for HR use calculations below + # Collapse solution by intervention + if (task_shifting_pharm == 1) { + for (i in 1:length(dalys)) { + for (j in 1:(reps - 1)) { + solution[i, 1] <- solution[i, 1] + solution[i + length(dalys) * j, 1] + } + } + solution <- as.data.frame(solution[1:length(dalys), 1]) + } + + # Number of interventions with a positive net health impact + pos_nethealth.count <- sum(nethealth > 0) # this seems to be one less than the figure in the excel + + # Number of interventions in the optimal package + intervention.count <- sum(solution != 0) + + # DALY burden averted as a % of avertible DALY burden + solution_dalysaverted <- solution * cases * dalys # Dalys averted per intervention + dalysavertible <- cases * dalys # Total DALYs that can be averted at maximum coverage + dalys_averted <- round(sum(unlist(lapply(solution_dalysaverted, sum))), 2) + dalys_averted.prop <- sum(unlist(lapply(solution_dalysaverted, sum))) / sum(unlist(lapply(dalysavertible, sum))) + + # Drugs and Commodities cost (% of budget available) + solution_drugexp <- solution * cons_drug[1:length(dalys), ] # Total drug budget required per intervention for the the optimal solution + total_drug_exp <- round(sum(unlist(lapply(solution_drugexp, sum))), 2) # Total drug budget required for the the optimal solution + drug_exp.prop <- total_drug_exp / cons_drug.limit_base + + # Total HR use (% of capacity) + hr_cadres <- c("Clinical staff", "Nurse", "Pharmacist", "Lab", "Mental", "Nutrition") + solution_hruse <- unlist(solution_hr) * cons_hr # Number of minutes per health worker cadre and intervention utlitised by the optimal solution + if (task_shifting_pharm == 1) { + for (i in 1:length(dalys)) { + for (j in 1:(reps - 1)) { + solution_hruse[i, ] <- solution_hruse[i, ] + solution_hruse[i + length(dalys) * j, ] + } + } + solution_hruse <- solution_hruse[1:length(dalys), ] + } + total_hruse <- colSums(solution_hruse, na.rm = FALSE, dims = 1) # Number of minutes per health worker cadre utlitised by the optimal solution + hruse.prop <- round(total_hruse / cons_hr.limit_base, 2) + colnames(hruse.prop) <- hr_cadres + + # Cost-effectiveness Threshold + icer <- fullcost / dalys + temp <- cbind.data.frame(icer, solution, inputs$intervention) + temp["solution.class$solution"] <- as.numeric(temp[[2]]) + temp["icer"] <- as.numeric(temp[[1]]) + cet_soln <- round(max(temp["icer"][temp["solution.class$solution"] > 0]), 2) # previoiusly temp$icer[temp$solution > 0] + a <- which(icer == max(temp["icer"][temp["solution.class$solution"] > 0])) # to check which included intervention has the highest ICER + least.ce.intervention <- inputs$intervention[a] + + # Collapse above outputs so that each intervention appears once in the list irrespective of task-shifting + # pos_nethealth.count, intervention.count, dalys_averted, cet_soln, drug_exp.prop, t(hruse.prop[,visible_cadres]) + + list( + "Total number of interventions in consideration" = length(dalys), + "Number of interventions with positive net health impact" = pos_nethealth.count, + "Number of interventions in the optimal package" = intervention.count, + "Net DALYs averted" = solution.class$objval, + "Total DALYs averted" = sum(unlist(lapply(solution_dalysaverted, sum))), + "Proportion of DALY burden averted" = dalys_averted.prop, + "Proportion of drug budget used" = drug_exp.prop, + "Proportion of HR capacity used by cadre" = hruse.prop, + "CET based on solution" = cet_soln + ) +} diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/run_preaggregated_optimizer.py b/src/scripts/lcoa_inputs_from_tlo_analyses/run_preaggregated_optimizer.py new file mode 100644 index 0000000000..247a5bfde5 --- /dev/null +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/run_preaggregated_optimizer.py @@ -0,0 +1,390 @@ +"""Run the preaggregated R optimizer from Python inputs. + +This script: +1. Loads analysis outputs produced by analysis_effect_of_treatment_ids.py. +2. Builds and writes the optimizer intervention input CSV. +3. Loads optimizer constraints from a separate CSV. +4. Invokes optimizer_preaggregated.R::find_optimal_package via rpy2. +5. Writes optimizer outputs to JSON and optional CSV. +""" + +from __future__ import annotations + +from scripts.lcoa_inputs_from_tlo_analyses.results_processing_utils import format_scenario_name + +import argparse +import json +import pickle +from pathlib import Path +from typing import Any + +import numpy as np +import pandas as pd + + +OPTIMIZER_HR_COLS = ["hr_clin", "hr_nur", "hr_pharm", "hr_lab", "hr_ment", "hr_nutri"] +REQUIRED_OPT_INPUT_COLS = [ + "intcode", + "intervention", + "ce_dalys", + "conscost", + "feascov", + "ce_cost", + *OPTIMIZER_HR_COLS, +] + +GLOBAL_REQUIRED_KEYS = { + "objective_input", + "cet_input", + "drug_budget_input", + "drug_budget.scale", + "use_feasiblecov_constraint", + "feascov_scale", + "compcov_scale", + "task_shifting_pharm", +} + + +def _require_columns(df: pd.DataFrame, required: list[str], df_name: str) -> None: + missing = [c for c in required if c not in df.columns] + if missing: + raise ValueError(f"{df_name} is missing required columns: {missing}") + + +def _coerce_central_series(df: pd.DataFrame, name: str) -> pd.Series: + if "central" not in df.columns: + raise ValueError(f"{name} must contain a 'central' column.") + out = df["central"].copy() + out.index = out.index.map(format_scenario_name) + return out.astype(float) + + +def _build_hr_mapping(officer_types: list[str]) -> dict[str, str]: + """Map officer type labels from model output to optimizer cadre buckets. + + The mapping is deterministic and keyword-based. Unknown officer types are ignored. + """ + # TODO check with Sakshi + # This mapping silently ignores 'DCSA', 'Dental' and 'Radiography' cadres + mapping = ( + { + 'hr_clin': 'Clinical', + 'hr_lab': 'Laboratory', + 'hr_ment': 'Mental', + 'hr_nur': 'Nursing_and_Midwifery', + 'hr_nutri': 'Nutrition', + 'hr_pharm': 'Pharmacy', + } + ) + return mapping + + +def _aggregate_hr_by_intervention( + capacity_used_by_cadre: pd.DataFrame, + interventions: list[str], +) -> pd.DataFrame: + if "central" not in capacity_used_by_cadre.columns: + raise ValueError("results['capacity_used_by_cadre'] must contain a 'central' column.") + + if capacity_used_by_cadre.index.nlevels == 1: + officer_types = capacity_used_by_cadre.index.astype(str).tolist() + else: + officer_types = capacity_used_by_cadre.index.get_level_values(0).astype(str).tolist() + + mapping = _build_hr_mapping(officer_types) + if not mapping: + raise ValueError( + "Could not map any OfficerType values to optimizer HR buckets. " + "Check results['capacity_used_by_cadre'] index labels." + ) + + total_by_bucket = {k: 0.0 for k in OPTIMIZER_HR_COLS} + for idx, value in capacity_used_by_cadre["central"].items(): + officer = str(idx[0] if isinstance(idx, tuple) else idx) + bucket = mapping.get(officer) + if bucket is not None: + total_by_bucket[bucket] += float(value) + + rows = [] + for intervention in interventions: + row = {"intervention": intervention, **total_by_bucket} + rows.append(row) + + return pd.DataFrame(rows) + + +def _build_optimizer_inputs(results: dict[str, Any], constraints_df: pd.DataFrame) -> pd.DataFrame: + dalys_averted = results.get("dalys_averted") + incremental_cost = results.get("incremental_scenario_cost") + capacity_used = results.get("capacity_used_by_cadre") + + if dalys_averted is None or incremental_cost is None or capacity_used is None: + raise ValueError( + "results pickle must contain 'dalys_averted', 'incremental_scenario_cost', and 'capacity_used_by_cadre'." + ) + + ce_dalys = _coerce_central_series(dalys_averted, "results['dalys_averted']") + ce_cost = _coerce_central_series(incremental_cost, "results['incremental_scenario_cost']") + + interventions = sorted(set(ce_dalys.index).intersection(set(ce_cost.index))) + if not interventions: + raise ValueError("No overlapping interventions found between DALYs and costs.") + + hr_df = _aggregate_hr_by_intervention(capacity_used, interventions) + + opt_df = pd.DataFrame( + { + "intcode": interventions, + "intervention": interventions, + "ce_dalys": [float(ce_dalys.loc[i]) for i in interventions], + "conscost": [float(ce_cost.loc[i]) for i in interventions], + "ce_cost": [float(ce_cost.loc[i]) for i in interventions], + } + ) + + opt_df = opt_df.merge(hr_df, on="intervention", how="left") + _require_columns(opt_df, REQUIRED_OPT_INPUT_COLS, "optimizer input dataframe") + + # Ensure numeric columns are numeric. + numeric_cols = [c for c in REQUIRED_OPT_INPUT_COLS if c not in {"intcode", "intervention"}] + for col in numeric_cols: + opt_df[col] = pd.to_numeric(opt_df[col], errors="raise") + + return opt_df[REQUIRED_OPT_INPUT_COLS] + + +def _parse_constraints(constraints_df: pd.DataFrame, intervention_codes: list[str]) -> dict[str, Any]: + _require_columns(constraints_df, ["section"], "constraints CSV") + constraints_df = constraints_df.copy() + constraints_df["section"] = constraints_df["section"].astype(str).str.strip() + + global_df = constraints_df.loc[constraints_df["section"] == "global"].copy() + _require_columns(global_df, ["key", "value"], "global section") + globals_map = ( + global_df.dropna(subset=["key", "value"]) + .drop_duplicates(subset=["key"], keep="last") + .set_index("key")["value"] + .to_dict() + ) + + missing_global = sorted(k for k in GLOBAL_REQUIRED_KEYS if k not in globals_map) + if missing_global: + raise ValueError(f"Missing required global constraints: {missing_global}") + + def parse_vector(section_name: str) -> list[float]: + sec = constraints_df.loc[constraints_df["section"] == section_name].copy() + _require_columns(sec, ["key", "value"], f"{section_name} section") + order = OPTIMIZER_HR_COLS + sec = sec.dropna(subset=["key", "value"]).drop_duplicates(subset=["key"], keep="last") + sec_map = sec.set_index("key")["value"].to_dict() + missing = [k for k in order if k not in sec_map] + if missing: + raise ValueError(f"Missing {section_name} values for keys: {missing}") + return [float(sec_map[k]) for k in order] + + hr_time_constraint = parse_vector("hr_time_constraint") + hr_size = parse_vector("hr_size") + hr_scale = parse_vector("hr_scale") + + compulsory_df = constraints_df.loc[constraints_df["section"] == "compulsory"].copy() + compulsory_interventions: list[str] = [] + if not compulsory_df.empty: + _require_columns(compulsory_df, ["intcode"], "compulsory section") + compulsory_interventions = sorted( + { + format_scenario_name(i) + for i in compulsory_df["intcode"].dropna().astype(str).tolist() + } + ) + + unknown_compulsory = sorted(set(compulsory_interventions) - set(intervention_codes)) + if unknown_compulsory: + raise ValueError(f"Compulsory interventions not in optimizer input: {unknown_compulsory}") + + subs_df = constraints_df.loc[constraints_df["section"] == "substitute_group"].copy() + substitutes: list[list[str]] = [] + if not subs_df.empty: + _require_columns(subs_df, ["group_id", "intcode"], "substitute_group section") + subs_df = subs_df.dropna(subset=["group_id", "intcode"]).copy() + subs_df["intcode"] = subs_df["intcode"].astype(str).map(format_scenario_name) + for _, grp in subs_df.groupby("group_id"): + members = sorted(set(grp["intcode"].tolist())) + if len(members) > 0: + substitutes.append(members) + + unknown_subs = sorted({x for grp in substitutes for x in grp} - set(intervention_codes)) + if unknown_subs: + raise ValueError(f"Substitute interventions not in optimizer input: {unknown_subs}") + + return { + "objective_input": str(globals_map["objective_input"]), + "cet_input": float(globals_map["cet_input"]), + "drug_budget_input": float(globals_map["drug_budget_input"]), + "drug_budget.scale": float(globals_map["drug_budget.scale"]), + "use_feasiblecov_constraint": int(float(globals_map["use_feasiblecov_constraint"])), + "feascov_scale": float(globals_map["feascov_scale"]), + "compcov_scale": float(globals_map["compcov_scale"]), + "task_shifting_pharm": int(float(globals_map["task_shifting_pharm"])), + "hr.time.constraint": hr_time_constraint, + "hr.size": hr_size, + "hr.scale": hr_scale, + "compulsory_interventions": compulsory_interventions, + "substitutes": substitutes, + } + + +def _jsonify(value: Any) -> Any: + if isinstance(value, dict): + return {str(k): _jsonify(v) for k, v in value.items()} + if isinstance(value, (list, tuple)): + return [_jsonify(v) for v in value] + if isinstance(value, pd.Series): + return {str(k): _jsonify(v) for k, v in value.to_dict().items()} + if isinstance(value, pd.DataFrame): + return value.to_dict(orient="records") + if isinstance(value, np.ndarray): + return [_jsonify(v) for v in value.tolist()] + if isinstance(value, (np.integer,)): + return int(value) + if isinstance(value, (np.floating, float)): + return float(value) + if pd.isna(value): + return None + return value + + +def _flatten_optimizer_output_for_csv(result_obj: dict[str, Any]) -> pd.DataFrame: + rows: list[dict[str, Any]] = [] + for k, v in result_obj.items(): + if isinstance(v, dict): + for sk, sv in v.items(): + rows.append({"metric": str(k), "submetric": str(sk), "value": sv}) + elif isinstance(v, list): + rows.append({"metric": str(k), "submetric": "", "value": json.dumps(v)}) + else: + rows.append({"metric": str(k), "submetric": "", "value": v}) + return pd.DataFrame(rows) + + +def _run_optimizer_via_rpy2( + optimizer_inputs: pd.DataFrame, + constraints: dict[str, Any], + r_script_path: Path, +) -> dict[str, Any]: + try: + import rpy2.robjects as ro + from rpy2.robjects import pandas2ri + from rpy2.robjects.conversion import localconverter + from rpy2.robjects.vectors import FloatVector, IntVector, ListVector, StrVector + except ImportError as exc: + raise RuntimeError( + "rpy2 is required but not available. Install rpy2 in your Python environment." + ) from exc + + if not r_script_path.exists(): + raise FileNotFoundError(f"R script not found: {r_script_path}") + + ro.r["source"](str(r_script_path)) + r_func = ro.globalenv.find("find_optimal_package") + + with localconverter(ro.default_converter + pandas2ri.converter): + r_inputs = ro.conversion.py2rpy(optimizer_inputs) + + r_compulsory = StrVector(constraints["compulsory_interventions"]) + + # R code iterates as nested loops over substitutes[i], then k in j; this structure matches list(character vectors). + r_subs = ListVector( + {str(i + 1): StrVector(group) for i, group in enumerate(constraints["substitutes"])} + ) + + result_r = r_func( + r_inputs, + constraints["objective_input"], + constraints["cet_input"], + constraints["drug_budget_input"], + constraints["drug_budget.scale"], + FloatVector(constraints["hr.time.constraint"]), + FloatVector(constraints["hr.size"]), + FloatVector(constraints["hr.scale"]), + IntVector([constraints["use_feasiblecov_constraint"]])[0], + constraints["feascov_scale"], + constraints["compcov_scale"], + r_compulsory, + r_subs, + IntVector([constraints["task_shifting_pharm"]])[0], + ) + + with localconverter(ro.default_converter + pandas2ri.converter): + result_py = ro.conversion.rpy2py(result_r) + + # rpy2 can return named list-like objects; normalize to dict. + if isinstance(result_py, dict): + return {str(k): _jsonify(v) for k, v in result_py.items()} + + if hasattr(result_r, "names"): + out: dict[str, Any] = {} + names = list(result_r.names) + for i, name in enumerate(names): + out[str(name)] = _jsonify(ro.conversion.rpy2py(result_r[i])) + return out + + raise RuntimeError("Unexpected optimizer result type from R.") + + +def _parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser() + parser.add_argument("--analysis-results-pkl", type=Path, required=True) + parser.add_argument("--constraints-csv", type=Path, required=True) + parser.add_argument("--optimizer-input-csv", type=Path, required=True) + parser.add_argument("--optimizer-output-json", type=Path, required=True) + parser.add_argument("--optimizer-output-csv", type=Path, required=False, default=None) + parser.add_argument( + "--r-script-path", + type=Path, + default=Path("src/scripts/lcoa_inputs_from_tlo_analyses/optimizer_preaggregated.R"), + ) + return parser.parse_args() + + +def main() -> None: + args = _parse_args() + + if not args.analysis_results_pkl.exists(): + raise FileNotFoundError(f"Analysis results pickle not found: {args.analysis_results_pkl}") + if not args.constraints_csv.exists(): + raise FileNotFoundError(f"Constraints CSV not found: {args.constraints_csv}") + + with open(args.analysis_results_pkl, "rb") as f: + results = pickle.load(f) + + constraints_df = pd.read_csv(args.constraints_csv) + _require_columns(constraints_df, ["section"], "constraints CSV") + + optimizer_inputs = _build_optimizer_inputs(results, constraints_df) + args.optimizer_input_csv.parent.mkdir(parents=True, exist_ok=True) + optimizer_inputs.to_csv(args.optimizer_input_csv, index=False) + + constraints = _parse_constraints(constraints_df, intervention_codes=optimizer_inputs["intcode"].tolist()) + optimizer_output = _run_optimizer_via_rpy2( + optimizer_inputs=optimizer_inputs, + constraints=constraints, + r_script_path=args.r_script_path, + ) + + args.optimizer_output_json.parent.mkdir(parents=True, exist_ok=True) + with open(args.optimizer_output_json, "w", encoding="utf-8") as f: + json.dump(_jsonify(optimizer_output), f, indent=2, sort_keys=True) + + if args.optimizer_output_csv is not None: + flat_df = _flatten_optimizer_output_for_csv(optimizer_output) + args.optimizer_output_csv.parent.mkdir(parents=True, exist_ok=True) + flat_df.to_csv(args.optimizer_output_csv, index=False) + + print(f"Wrote optimizer input CSV: {args.optimizer_input_csv}") + print(f"Wrote optimizer output JSON: {args.optimizer_output_json}") + if args.optimizer_output_csv is not None: + print(f"Wrote optimizer output CSV: {args.optimizer_output_csv}") + + +if __name__ == "__main__": + main() From 3ecadebb6e9b8a5f2e1aec0a52a2d93b3348a947 Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Tue, 5 May 2026 15:31:18 +0100 Subject: [PATCH 44/55] Retrieve annual capacity by cadre --- .../analysis_effect_of_treatment_ids.py | 30 +++++++++++++++---- .../figures_effect_of_treatment_ids.py | 10 ++----- .../run_preaggregated_optimizer.py | 5 +--- 3 files changed, 28 insertions(+), 17 deletions(-) diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py index 6aeeb7fa51..80932a7e62 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py @@ -172,6 +172,7 @@ def apply( results['input_costs'] = input_costs # Computing incremental costs + # TODO Check with Sakshi if these are annual costs; as everything else is annual. if do_comparison: print("Computing incremental_scenario_cost...") start = perf_counter() @@ -191,9 +192,6 @@ def apply( incremental_scenario_cost_summarized = compute_summary_statistics(incremental_scenario_cost, 'median').iloc[0].unstack() - - - # Get total population by year print("Extracting population data...") total_population_by_year = ( @@ -307,7 +305,7 @@ def apply( central_measure='median' ).iloc[0].unstack() # Run-by-run incremental cost-effectiveness ratio calculation - icers = incremental_scenario_cost.T /dalys_averted + icers = incremental_scenario_cost.T / dalys_averted icers_summarized = compute_summary_statistics(icers.T, central_measure='median').iloc[0].unstack() dalys_averted = compute_summary_statistics(dalys_averted.T, central_measure='median').iloc[0].unstack() @@ -318,6 +316,7 @@ def apply( # From this we will extract the run-wise delta in capacity used relative to the Nothing scenario, for each cadre # and summarise. However since no HSIs are delivered in the Nothing scenario, the capacity used in that scenario is zero, # so the delta relative to Nothing is just the capacity used in each scenario. + # TODO: Check if this should be scaled with population or used as is. annual_capacity_used_by_cadre_and_level = extract_results( results_folder, module='tlo.methods.healthsystem.summary', @@ -326,12 +325,14 @@ def apply( do_scaling=True, autodiscover=True, ) - # Sum across all years and facility levels; so we get the *total* capacity used over the whole period + # Sum across all facility levels and average across years; so we get the *average* annual capacity used over the whole period # TODO: Check with Sakshi if this is what we want. mask = annual_capacity_used_by_cadre_and_level.index.get_level_values(0).isin(range(2026, 2040)) capacity_used_by_cadre = ( - annual_capacity_used_by_cadre_and_level[mask].groupby(['OfficerType']). + annual_capacity_used_by_cadre_and_level[mask].groupby(['OfficerType', 'year']). sum(). + groupby(['OfficerType']). + mean(). pipe(set_param_names_as_column_index_level_0, param_names=param_names) ) @@ -339,12 +340,29 @@ def apply( compute_summary_statistics(capacity_used_by_cadre, central_measure='median') ) + # Get the total available caapacity by cadre needed for LCOA + # resources/healthsystem/human_resources/actual/ResourceFile_Daily_Capabilities.csv + daily_capacity_by_cadre_and_level = ( + pd.read_csv(resourcefilepath / "healthsystem" / "human_resources" / "actual" / "ResourceFile_Daily_Capabilities.csv") + ) + # This gives the total minutes available per day by cadre and facility level. + # Sum across levels to get cadre specific constraints, and multiply by 365 to get annual capacity + annual_capacity_by_cadre = ( + daily_capacity_by_cadre_and_level.groupby('Officer_Category')['Total_Mins_Per_Day'].sum() * 365 + ) + + # Add consumables budget to this dictionary so that we have everything in one place + # USD 225,602,946 (203136642 from donors + 22466304 from the government) + # Ref Revision of Malawi’s Health Bene ts Package: A Critical Analysis of Policy Formulation and Implementation + results['annual_consumables_budget'] = 225602946 + results['dalys'] = dalys results['dalys_averted'] = dalys_averted if do_comparison else None results['pc_dalys_averted'] = pc_dalys_averted if do_comparison else None results['icers_summarized'] = icers_summarized if do_comparison else None results['incremental_scenario_cost'] = incremental_scenario_cost_summarized if do_comparison else None results['capacity_used_by_cadre'] = capacity_used_by_cadre + results['annual_capacity_by_cadre'] = annual_capacity_by_cadre return results diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py index 993fb202ba..9d87ea1357 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py @@ -6,28 +6,25 @@ import pickle import pandas as pd import matplotlib.pyplot as plt -from scripts.calibration_analyses.analysis_scripts import plot_legends + from scripts.lcoa_inputs_from_tlo_analyses.results_processing_utils import ( get_parameter_names_from_scenario_file, - get_periods_within_target_period, format_scenario_name, - target_period, ) from scripts.lcoa_inputs_from_tlo_analyses.fig_utils import ( make_graph_file_name, do_barh_plot_with_ci, - do_bar_plot_with_ci, plot_cadre_time_by_draw_stacked, plot_deaths_by_period_for_cause, plot_deaths_by_period_for_draw, plot_hsi_counts_by_period_for_draw, plot_population_by_year, ) -from tlo import Date + # python src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py outputs/generated_outputs/2041-01-01_fullresults.pkl --output_folder=figs2 -TARGET_PERIOD = (Date(2025, 1, 1), Date(2041, 1, 1)) + PERIOD_LENGTH_YEARS_FOR_BAR_PLOTS = 1 @@ -73,7 +70,6 @@ def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path counts_of_hsi_in_implementation_period = counts_of_hsi_in_implementation_period.drop(['2010-2041'], level=1) capacity_used_by_cadre = primary_results.get("capacity_used_by_cadre") - result_df_by_period = pd.DataFrame([ {'treatment_id_included': draw, 'nonzero_hsis': treatment_id, 'period': period} for draw in counts_of_hsi_in_implementation_period.columns.get_level_values(0).unique() diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/run_preaggregated_optimizer.py b/src/scripts/lcoa_inputs_from_tlo_analyses/run_preaggregated_optimizer.py index 247a5bfde5..a90c0120c6 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/run_preaggregated_optimizer.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/run_preaggregated_optimizer.py @@ -86,10 +86,7 @@ def _aggregate_hr_by_intervention( if "central" not in capacity_used_by_cadre.columns: raise ValueError("results['capacity_used_by_cadre'] must contain a 'central' column.") - if capacity_used_by_cadre.index.nlevels == 1: - officer_types = capacity_used_by_cadre.index.astype(str).tolist() - else: - officer_types = capacity_used_by_cadre.index.get_level_values(0).astype(str).tolist() + officer_types = capacity_used_by_cadre.index.get_level_values(0).astype(str).tolist() mapping = _build_hr_mapping(officer_types) if not mapping: From c6fd68e7c7c3ad28a9346a694ba194de9c301146 Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Mon, 11 May 2026 15:23:47 +0100 Subject: [PATCH 45/55] Run LCOA through a python script --- .../analysis_effect_of_treatment_ids.py | 13 +- .../optimizer_preaggregated.R | 15 +- .../run_preaggregated_optimizer.py | 259 ++++-------------- 3 files changed, 74 insertions(+), 213 deletions(-) diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py index 80932a7e62..cd5ee3a071 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py @@ -8,17 +8,12 @@ ) import argparse from datetime import date -import glob -import os -import zipfile import pickle from pathlib import Path import pandas as pd from tlo import Date -from tlo.util import create_age_range_lookup - from scripts.lcoa_inputs_from_tlo_analyses.results_processing_utils import ( get_counts_of_appts, @@ -351,9 +346,14 @@ def apply( daily_capacity_by_cadre_and_level.groupby('Officer_Category')['Total_Mins_Per_Day'].sum() * 365 ) + staff_count_by_cadre = ( + daily_capacity_by_cadre_and_level.groupby('Officer_Category')['Staff_Count'].sum() + ) + # Add consumables budget to this dictionary so that we have everything in one place # USD 225,602,946 (203136642 from donors + 22466304 from the government) - # Ref Revision of Malawi’s Health Bene ts Package: A Critical Analysis of Policy Formulation and Implementation + # Revision of Malawi’s Health Benefits Package: A Critical Analysis of Policy Formulation and Implementation + # https://doi.org/10.1016/j.vhri.2023.10.007 results['annual_consumables_budget'] = 225602946 results['dalys'] = dalys @@ -363,6 +363,7 @@ def apply( results['incremental_scenario_cost'] = incremental_scenario_cost_summarized if do_comparison else None results['capacity_used_by_cadre'] = capacity_used_by_cadre results['annual_capacity_by_cadre'] = annual_capacity_by_cadre + results['staff_count_by_cadre'] = staff_count_by_cadre return results diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/optimizer_preaggregated.R b/src/scripts/lcoa_inputs_from_tlo_analyses/optimizer_preaggregated.R index 674d78b374..fa43c848d2 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/optimizer_preaggregated.R +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/optimizer_preaggregated.R @@ -1,5 +1,4 @@ # Standalone preaggregated optimizer for Python integration. -# Input contract: # - ce_dalys, conscost, and hr_* are preaggregated totals at full implementation. # - Decision variables represent fractions of each intervention implemented. # - feascov and substitute/compulsory constraints still bound implementation shares. @@ -16,15 +15,15 @@ find_optimal_package <- function(inputs, objective_input, cet_input, ## script but is cumulative in this version dalys <- as.numeric(as.character(inputs$ce_dalys)) ## Cumulative cost of drugs and commodities - drugcost <- as.numeric(as.character(inputs$conscost)) + drugcost <- as.numeric(as.character(inputs$conscost)) maxcoverage <- as.numeric(as.character(inputs$feascov)) # Maximum possible coverage (demand constraint) ## Preaggregated mode: unit case scaling cases <- rep(1, length(dalys)) - ## Full cost per patient based on CE evidence + ## Full cost fullcost <- as.numeric(as.character(inputs$ce_cost)) - ## Number of minutes of health worker time requires per intervention per person + ## Number of minutes of health worker time required per intervention hrneed <- - inputs[c("hr_clin", "hr_nur", "hr_pharm", "hr_lab", "hr_ment", "hr_nutri")] + inputs[c("hr_clin", "hr_nur", "hr_pharm", "hr_lab", "hr_ment", "hr_nutri")] hrneed <- as.data.frame(apply(hrneed, 2, as.numeric)) n <- length(dalys) # number of interventions included in the analysis @@ -33,7 +32,7 @@ find_optimal_package <- function(inputs, objective_input, cet_input, # 3.1 Set up LPP ################################### - # Objective - maximize DALYs + # Objective - maximize DALYs #**************************************************** # Define net health cet <- cet_input @@ -59,7 +58,7 @@ find_optimal_package <- function(inputs, objective_input, cet_input, # 2. HR Constraints #--------------------- ## HR minutes required to deliver intervention to all cases in need - hr_minutes_need <- hrneed * cases[row(hrneed)] + hr_minutes_need <- hrneed * cases[row(hrneed)] ## Update HR constraints so that nurses, pharmacists, medical officers, etc. represent joint constraints ## Medical officer + Clinical officer + Medical Assistant @@ -69,7 +68,7 @@ find_optimal_package <- function(inputs, objective_input, cet_input, ## Pharmacist + Pharmacist Technician + Pharmacist Assistant pharmstaff.need <- hr_minutes_need[c("hr_pharm")] ## Lab officer + Lab technician + Lab assistant - labstaff.need <- hr_minutes_need[c("hr_lab")] + labstaff.need <- hr_minutes_need[c("hr_lab")] # remove CHW mentalstaff.need <- hr_minutes_need[c("hr_ment")] # Mental health staff nutristaff.need <- hr_minutes_need[c("hr_nutri")] # Nutrition staff diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/run_preaggregated_optimizer.py b/src/scripts/lcoa_inputs_from_tlo_analyses/run_preaggregated_optimizer.py index a90c0120c6..09b36f252d 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/run_preaggregated_optimizer.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/run_preaggregated_optimizer.py @@ -50,16 +50,14 @@ def _require_columns(df: pd.DataFrame, required: list[str], df_name: str) -> Non if missing: raise ValueError(f"{df_name} is missing required columns: {missing}") - -def _coerce_central_series(df: pd.DataFrame, name: str) -> pd.Series: - if "central" not in df.columns: - raise ValueError(f"{name} must contain a 'central' column.") +# TODO: Check with Sakshi if we only use the central value. +def _coerce_central_series(df: pd.DataFrame) -> pd.Series: out = df["central"].copy() out.index = out.index.map(format_scenario_name) return out.astype(float) -def _build_hr_mapping(officer_types: list[str]) -> dict[str, str]: +def _rename_hrh_map(_df): """Map officer type labels from model output to optimizer cadre buckets. The mapping is deterministic and keyword-based. Unknown officer types are ignored. @@ -68,166 +66,50 @@ def _build_hr_mapping(officer_types: list[str]) -> dict[str, str]: # This mapping silently ignores 'DCSA', 'Dental' and 'Radiography' cadres mapping = ( { - 'hr_clin': 'Clinical', - 'hr_lab': 'Laboratory', - 'hr_ment': 'Mental', - 'hr_nur': 'Nursing_and_Midwifery', - 'hr_nutri': 'Nutrition', - 'hr_pharm': 'Pharmacy', + 'Clinical': 'hr_clin', + 'Laboratory': 'hr_lab', + 'Mental': 'hr_ment', + 'Nursing_and_Midwifery': 'hr_nur', + 'Nutrition': 'hr_nutri', + 'Pharmacy': 'hr_pharm', } ) - return mapping - - -def _aggregate_hr_by_intervention( - capacity_used_by_cadre: pd.DataFrame, - interventions: list[str], -) -> pd.DataFrame: - if "central" not in capacity_used_by_cadre.columns: - raise ValueError("results['capacity_used_by_cadre'] must contain a 'central' column.") - - officer_types = capacity_used_by_cadre.index.get_level_values(0).astype(str).tolist() - - mapping = _build_hr_mapping(officer_types) - if not mapping: - raise ValueError( - "Could not map any OfficerType values to optimizer HR buckets. " - "Check results['capacity_used_by_cadre'] index labels." - ) - - total_by_bucket = {k: 0.0 for k in OPTIMIZER_HR_COLS} - for idx, value in capacity_used_by_cadre["central"].items(): - officer = str(idx[0] if isinstance(idx, tuple) else idx) - bucket = mapping.get(officer) - if bucket is not None: - total_by_bucket[bucket] += float(value) - - rows = [] - for intervention in interventions: - row = {"intervention": intervention, **total_by_bucket} - rows.append(row) + # Rename dataframes indexed by officer type to what they are called in the + # optimizer + renamed = _df.rename(index=mapping) + return renamed - return pd.DataFrame(rows) +def _build_optimizer_inputs(results: dict[str, Any]) -> pd.DataFrame: - -def _build_optimizer_inputs(results: dict[str, Any], constraints_df: pd.DataFrame) -> pd.DataFrame: dalys_averted = results.get("dalys_averted") incremental_cost = results.get("incremental_scenario_cost") - capacity_used = results.get("capacity_used_by_cadre") - - if dalys_averted is None or incremental_cost is None or capacity_used is None: - raise ValueError( - "results pickle must contain 'dalys_averted', 'incremental_scenario_cost', and 'capacity_used_by_cadre'." - ) + capacity_used = _rename_hrh_map(results.get("capacity_used_by_cadre")) - ce_dalys = _coerce_central_series(dalys_averted, "results['dalys_averted']") - ce_cost = _coerce_central_series(incremental_cost, "results['incremental_scenario_cost']") + ce_dalys = dalys_averted['central'] + ce_cost = incremental_cost['central'] + hr_needs = capacity_used.xs("central", level="stat", axis=1).T interventions = sorted(set(ce_dalys.index).intersection(set(ce_cost.index))) if not interventions: raise ValueError("No overlapping interventions found between DALYs and costs.") - hr_df = _aggregate_hr_by_intervention(capacity_used, interventions) - opt_df = pd.DataFrame( { - "intcode": interventions, + "intcode": range(1, len(interventions) + 1), "intervention": interventions, "ce_dalys": [float(ce_dalys.loc[i]) for i in interventions], - "conscost": [float(ce_cost.loc[i]) for i in interventions], "ce_cost": [float(ce_cost.loc[i]) for i in interventions], + "conscost": [float(ce_cost.loc[i]) for i in interventions], + "hr_clin": [float(hr_needs.loc[i, "hr_clin"]) for i in interventions], + "hr_nur": [float(hr_needs.loc[i, "hr_nur"]) for i in interventions], + "hr_pharm": [float(hr_needs.loc[i, "hr_pharm"]) for i in interventions], + "hr_lab": [float(hr_needs.loc[i, "hr_lab"]) for i in interventions], + "hr_ment": [float(hr_needs.loc[i, "hr_ment"]) for i in interventions], + "hr_nutri": [float(hr_needs.loc[i, "hr_nutri"]) for i in interventions], } ) - opt_df = opt_df.merge(hr_df, on="intervention", how="left") - _require_columns(opt_df, REQUIRED_OPT_INPUT_COLS, "optimizer input dataframe") - - # Ensure numeric columns are numeric. - numeric_cols = [c for c in REQUIRED_OPT_INPUT_COLS if c not in {"intcode", "intervention"}] - for col in numeric_cols: - opt_df[col] = pd.to_numeric(opt_df[col], errors="raise") - - return opt_df[REQUIRED_OPT_INPUT_COLS] - - -def _parse_constraints(constraints_df: pd.DataFrame, intervention_codes: list[str]) -> dict[str, Any]: - _require_columns(constraints_df, ["section"], "constraints CSV") - constraints_df = constraints_df.copy() - constraints_df["section"] = constraints_df["section"].astype(str).str.strip() - - global_df = constraints_df.loc[constraints_df["section"] == "global"].copy() - _require_columns(global_df, ["key", "value"], "global section") - globals_map = ( - global_df.dropna(subset=["key", "value"]) - .drop_duplicates(subset=["key"], keep="last") - .set_index("key")["value"] - .to_dict() - ) - - missing_global = sorted(k for k in GLOBAL_REQUIRED_KEYS if k not in globals_map) - if missing_global: - raise ValueError(f"Missing required global constraints: {missing_global}") - - def parse_vector(section_name: str) -> list[float]: - sec = constraints_df.loc[constraints_df["section"] == section_name].copy() - _require_columns(sec, ["key", "value"], f"{section_name} section") - order = OPTIMIZER_HR_COLS - sec = sec.dropna(subset=["key", "value"]).drop_duplicates(subset=["key"], keep="last") - sec_map = sec.set_index("key")["value"].to_dict() - missing = [k for k in order if k not in sec_map] - if missing: - raise ValueError(f"Missing {section_name} values for keys: {missing}") - return [float(sec_map[k]) for k in order] - - hr_time_constraint = parse_vector("hr_time_constraint") - hr_size = parse_vector("hr_size") - hr_scale = parse_vector("hr_scale") - - compulsory_df = constraints_df.loc[constraints_df["section"] == "compulsory"].copy() - compulsory_interventions: list[str] = [] - if not compulsory_df.empty: - _require_columns(compulsory_df, ["intcode"], "compulsory section") - compulsory_interventions = sorted( - { - format_scenario_name(i) - for i in compulsory_df["intcode"].dropna().astype(str).tolist() - } - ) - - unknown_compulsory = sorted(set(compulsory_interventions) - set(intervention_codes)) - if unknown_compulsory: - raise ValueError(f"Compulsory interventions not in optimizer input: {unknown_compulsory}") - - subs_df = constraints_df.loc[constraints_df["section"] == "substitute_group"].copy() - substitutes: list[list[str]] = [] - if not subs_df.empty: - _require_columns(subs_df, ["group_id", "intcode"], "substitute_group section") - subs_df = subs_df.dropna(subset=["group_id", "intcode"]).copy() - subs_df["intcode"] = subs_df["intcode"].astype(str).map(format_scenario_name) - for _, grp in subs_df.groupby("group_id"): - members = sorted(set(grp["intcode"].tolist())) - if len(members) > 0: - substitutes.append(members) - - unknown_subs = sorted({x for grp in substitutes for x in grp} - set(intervention_codes)) - if unknown_subs: - raise ValueError(f"Substitute interventions not in optimizer input: {unknown_subs}") - - return { - "objective_input": str(globals_map["objective_input"]), - "cet_input": float(globals_map["cet_input"]), - "drug_budget_input": float(globals_map["drug_budget_input"]), - "drug_budget.scale": float(globals_map["drug_budget.scale"]), - "use_feasiblecov_constraint": int(float(globals_map["use_feasiblecov_constraint"])), - "feascov_scale": float(globals_map["feascov_scale"]), - "compcov_scale": float(globals_map["compcov_scale"]), - "task_shifting_pharm": int(float(globals_map["task_shifting_pharm"])), - "hr.time.constraint": hr_time_constraint, - "hr.size": hr_size, - "hr.scale": hr_scale, - "compulsory_interventions": compulsory_interventions, - "substitutes": substitutes, - } + return opt_df def _jsonify(value: Any) -> Any: @@ -250,19 +132,6 @@ def _jsonify(value: Any) -> Any: return value -def _flatten_optimizer_output_for_csv(result_obj: dict[str, Any]) -> pd.DataFrame: - rows: list[dict[str, Any]] = [] - for k, v in result_obj.items(): - if isinstance(v, dict): - for sk, sv in v.items(): - rows.append({"metric": str(k), "submetric": str(sk), "value": sv}) - elif isinstance(v, list): - rows.append({"metric": str(k), "submetric": "", "value": json.dumps(v)}) - else: - rows.append({"metric": str(k), "submetric": "", "value": v}) - return pd.DataFrame(rows) - - def _run_optimizer_via_rpy2( optimizer_inputs: pd.DataFrame, constraints: dict[str, Any], @@ -272,7 +141,7 @@ def _run_optimizer_via_rpy2( import rpy2.robjects as ro from rpy2.robjects import pandas2ri from rpy2.robjects.conversion import localconverter - from rpy2.robjects.vectors import FloatVector, IntVector, ListVector, StrVector + from rpy2.robjects.vectors import FloatVector, ListVector, StrVector except ImportError as exc: raise RuntimeError( "rpy2 is required but not available. Install rpy2 in your Python environment." @@ -287,28 +156,37 @@ def _run_optimizer_via_rpy2( with localconverter(ro.default_converter + pandas2ri.converter): r_inputs = ro.conversion.py2rpy(optimizer_inputs) - r_compulsory = StrVector(constraints["compulsory_interventions"]) - - # R code iterates as nested loops over substitutes[i], then k in j; this structure matches list(character vectors). - r_subs = ListVector( - {str(i + 1): StrVector(group) for i, group in enumerate(constraints["substitutes"])} - ) + r_compulsory = StrVector([]) + r_subs = ListVector({}) result_r = r_func( r_inputs, - constraints["objective_input"], - constraints["cet_input"], - constraints["drug_budget_input"], - constraints["drug_budget.scale"], - FloatVector(constraints["hr.time.constraint"]), - FloatVector(constraints["hr.size"]), - FloatVector(constraints["hr.scale"]), - IntVector([constraints["use_feasiblecov_constraint"]])[0], - constraints["feascov_scale"], - constraints["compcov_scale"], + # whether we are maximizing DALYs or net health + "dalys", + # CET; I believe not relevant here but give a value anyway + 600, + # Drug budget input + constraints['annual_consumables_budget'], + # Drug budget scale set to 1 + 1, + # HR constraints; need to be clinical staff, nursing, pharmacy, lab, + # mental health, nutrition in that order + FloatVector(constraints["hr_time_constraint"]), + # HR size; same order as above + FloatVector(constraints["hr_size"]), + 1, + # use_feasiblecov_constraint; set to 0 to not use, 1 to use + 0, + # Feasible coverage scale; set to 1 + 1, + # Compulsory coverage scale; set to 1 + 1, + # Compulsory interventions; pass empty list, r_compulsory, + # substitutes; pass empty list r_subs, - IntVector([constraints["task_shifting_pharm"]])[0], + # task_shifting_pharm; set to 0 to not allow, 1 to allow + 0, ) with localconverter(ro.default_converter + pandas2ri.converter): @@ -331,10 +209,7 @@ def _run_optimizer_via_rpy2( def _parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser() parser.add_argument("--analysis-results-pkl", type=Path, required=True) - parser.add_argument("--constraints-csv", type=Path, required=True) - parser.add_argument("--optimizer-input-csv", type=Path, required=True) parser.add_argument("--optimizer-output-json", type=Path, required=True) - parser.add_argument("--optimizer-output-csv", type=Path, required=False, default=None) parser.add_argument( "--r-script-path", type=Path, @@ -348,20 +223,17 @@ def main() -> None: if not args.analysis_results_pkl.exists(): raise FileNotFoundError(f"Analysis results pickle not found: {args.analysis_results_pkl}") - if not args.constraints_csv.exists(): - raise FileNotFoundError(f"Constraints CSV not found: {args.constraints_csv}") with open(args.analysis_results_pkl, "rb") as f: results = pickle.load(f) - constraints_df = pd.read_csv(args.constraints_csv) - _require_columns(constraints_df, ["section"], "constraints CSV") - - optimizer_inputs = _build_optimizer_inputs(results, constraints_df) - args.optimizer_input_csv.parent.mkdir(parents=True, exist_ok=True) - optimizer_inputs.to_csv(args.optimizer_input_csv, index=False) + constraints = ({ + 'annual_consumables_budget': results.get("annual_consumables_budget"), + 'hr_time_constraint': _rename_hrh_map(results.get("annual_capacity_by_cadre")), + 'hr_size': _rename_hrh_map(results.get("staff_count_by_cadre")) + }) + optimizer_inputs = _build_optimizer_inputs(results) - constraints = _parse_constraints(constraints_df, intervention_codes=optimizer_inputs["intcode"].tolist()) optimizer_output = _run_optimizer_via_rpy2( optimizer_inputs=optimizer_inputs, constraints=constraints, @@ -372,16 +244,5 @@ def main() -> None: with open(args.optimizer_output_json, "w", encoding="utf-8") as f: json.dump(_jsonify(optimizer_output), f, indent=2, sort_keys=True) - if args.optimizer_output_csv is not None: - flat_df = _flatten_optimizer_output_for_csv(optimizer_output) - args.optimizer_output_csv.parent.mkdir(parents=True, exist_ok=True) - flat_df.to_csv(args.optimizer_output_csv, index=False) - - print(f"Wrote optimizer input CSV: {args.optimizer_input_csv}") - print(f"Wrote optimizer output JSON: {args.optimizer_output_json}") - if args.optimizer_output_csv is not None: - print(f"Wrote optimizer output CSV: {args.optimizer_output_csv}") - - if __name__ == "__main__": main() From fd01d1db4cf0bb71be84d9ec04f5166dcda31bfa Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Wed, 13 May 2026 16:22:17 +0100 Subject: [PATCH 46/55] TLO-LCOA pipeline --- .../optimizer_preaggregated.R | 65 ++++++- .../run_preaggregated_optimizer.py | 182 ++++-------------- 2 files changed, 95 insertions(+), 152 deletions(-) diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/optimizer_preaggregated.R b/src/scripts/lcoa_inputs_from_tlo_analyses/optimizer_preaggregated.R index fa43c848d2..5465dabcd7 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/optimizer_preaggregated.R +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/optimizer_preaggregated.R @@ -4,6 +4,7 @@ # - feascov and substitute/compulsory constraints still bound implementation shares. library(lpSolve) +library(jsonlite) find_optimal_package <- function(inputs, objective_input, cet_input, drug_budget_input, drug_budget.scale, @@ -16,7 +17,7 @@ find_optimal_package <- function(inputs, objective_input, cet_input, dalys <- as.numeric(as.character(inputs$ce_dalys)) ## Cumulative cost of drugs and commodities drugcost <- as.numeric(as.character(inputs$conscost)) - maxcoverage <- as.numeric(as.character(inputs$feascov)) # Maximum possible coverage (demand constraint) + maxcoverage <- 1 ## SB: Changed as I am not using use_feasiblecov_constraint ## Preaggregated mode: unit case scaling cases <- rep(1, length(dalys)) ## Full cost @@ -131,7 +132,6 @@ find_optimal_package <- function(inputs, objective_input, cet_input, ## Each list here represents the number of staff (of each cadre) needed to deliver each intervention to all cases in need. ## Eg. for each cesarean section, 45 minutes of medical staff's time is needed (or 104,200 minutes for 2316 cases). On average 39,900 minutes are available per medical staff each year (257.3 million minutes in total divided by 6,400 medical staff). This means that for 2136 cases, 2.16 medical staff are needed (2316*45/(257.3m/6400)) - cons_hr <- cbind(clinicalstaff.need / (clinicalstaffmins.limit / clinicalstaff.limit), nursingstaff.need / (nursingstaffmins.limit / nursingstaff.limit), @@ -294,14 +294,19 @@ find_optimal_package <- function(inputs, objective_input, cet_input, ################################### # 3.2 - Run LPP ################################### + solution.class <- lp("max", objective, cons.mat, cons.dir, cons.mat.limit, compute.sens = TRUE) + print(solution.class$status) # 0 means optimal in lpSolve + print(solution.class$objval) + saveRDS(solution.class, file = "solution.rds") ################################### # 3.3 - Outputs ################################### # Export solution to a .csv file #------------------------------------ - solution <- as.data.frame(solution.class$solution) + solution <- as.data.frame(solution.class$solution) + solution_hr <- as.data.frame(solution.class$solution) # use this uncollapsed version of the dataframe for HR use calculations below # Collapse solution by intervention if (task_shifting_pharm == 1) { @@ -369,3 +374,57 @@ find_optimal_package <- function(inputs, objective_input, cet_input, "CET based on solution" = cet_soln ) } + + +run_optimizer_from_csv <- function() { + inputs <- readr::read_csv("optimizer_inputs.csv", show_col_types = TRUE) + constraints <- readr::read_csv("hr_constraints.csv", show_col_types = TRUE ) + hr.time.constraint <- c( + constraints$capacity[constraints$Officer_Category == "hr_clin"], + constraints$capacity[constraints$Officer_Category == "hr_nur"], + constraints$capacity[constraints$Officer_Category == "hr_pharm"], + constraints$capacity[constraints$Officer_Category == "hr_lab"], + constraints$capacity[constraints$Officer_Category == "hr_ment"], + constraints$capacity[constraints$Officer_Category == "hr_nutri"] + ) + + hr.size.constraint <- c( + constraints$staff_count[constraints$Officer_Category == "hr_clin"], + constraints$staff_count[constraints$Officer_Category == "hr_nur"], + constraints$staff_count[constraints$Officer_Category == "hr_pharm"], + constraints$staff_count[constraints$Officer_Category == "hr_lab"], + constraints$staff_count[constraints$Officer_Category == "hr_ment"], + constraints$staff_count[constraints$Officer_Category == "hr_nutri"] + ) + + + res <- find_optimal_package( + inputs = inputs, + objective_input = "dalys", + cet_input = 600, + drug_budget_input = 225602946, #TODO - get this from constaint + drug_budget.scale = 1, + hr.time.constraint = hr.time.constraint, + hr.size = hr.size.constraint, + hr.scale = rep(1, length(hr.time.constraint)), + use_feasiblecov_constraint = 0, + feascov_scale = 1, + compcov_scale = 1, + compulsory_interventions = c(), + substitutes = c(), + task_shifting_pharm = 0 + ) + + # Ensure JSON-safe output. + res_json <- lapply(res, function(x) { + if (is.matrix(x) || is.data.frame(x)) { + as.data.frame(x) + } else { + x + } + }) + + output_json_path <- "optimizer_results.json" + + jsonlite::write_json(res_json, path = output_json_path, auto_unbox = TRUE, pretty = TRUE, digits = NA) +} diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/run_preaggregated_optimizer.py b/src/scripts/lcoa_inputs_from_tlo_analyses/run_preaggregated_optimizer.py index 09b36f252d..e8ca9e3c49 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/run_preaggregated_optimizer.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/run_preaggregated_optimizer.py @@ -1,62 +1,30 @@ -"""Run the preaggregated R optimizer from Python inputs. - -This script: -1. Loads analysis outputs produced by analysis_effect_of_treatment_ids.py. -2. Builds and writes the optimizer intervention input CSV. -3. Loads optimizer constraints from a separate CSV. -4. Invokes optimizer_preaggregated.R::find_optimal_package via rpy2. -5. Writes optimizer outputs to JSON and optional CSV. -""" +"""Generate optimizer CSV inputs and run the preaggregated R optimizer via Rscript.""" from __future__ import annotations -from scripts.lcoa_inputs_from_tlo_analyses.results_processing_utils import format_scenario_name - import argparse -import json import pickle from pathlib import Path from typing import Any -import numpy as np import pandas as pd +from scripts.lcoa_inputs_from_tlo_analyses.results_processing_utils import format_scenario_name OPTIMIZER_HR_COLS = ["hr_clin", "hr_nur", "hr_pharm", "hr_lab", "hr_ment", "hr_nutri"] REQUIRED_OPT_INPUT_COLS = [ - "intcode", + "code", + "category", "intervention", "ce_dalys", - "conscost", - "feascov", "ce_cost", + "pop_size", + "pop_pin", + "feascov", + "conscost", *OPTIMIZER_HR_COLS, ] -GLOBAL_REQUIRED_KEYS = { - "objective_input", - "cet_input", - "drug_budget_input", - "drug_budget.scale", - "use_feasiblecov_constraint", - "feascov_scale", - "compcov_scale", - "task_shifting_pharm", -} - - -def _require_columns(df: pd.DataFrame, required: list[str], df_name: str) -> None: - missing = [c for c in required if c not in df.columns] - if missing: - raise ValueError(f"{df_name} is missing required columns: {missing}") - -# TODO: Check with Sakshi if we only use the central value. -def _coerce_central_series(df: pd.DataFrame) -> pd.Series: - out = df["central"].copy() - out.index = out.index.map(format_scenario_name) - return out.astype(float) - - def _rename_hrh_map(_df): """Map officer type labels from model output to optimizer cadre buckets. @@ -79,6 +47,13 @@ def _rename_hrh_map(_df): renamed = _df.rename(index=mapping) return renamed + +# TODO: Check with Sakshi if we only use the central value. +def _coerce_central_series(df: pd.DataFrame) -> pd.Series: + out = df["central"].copy() + out.index = out.index.map(format_scenario_name) + return out.astype(float) + def _build_optimizer_inputs(results: dict[str, Any]) -> pd.DataFrame: dalys_averted = results.get("dalys_averted") @@ -111,138 +86,47 @@ def _build_optimizer_inputs(results: dict[str, Any]) -> pd.DataFrame: return opt_df +def _build_hr_constraints_from_results(results: dict[str, Any]) -> pd.DataFrame: -def _jsonify(value: Any) -> Any: - if isinstance(value, dict): - return {str(k): _jsonify(v) for k, v in value.items()} - if isinstance(value, (list, tuple)): - return [_jsonify(v) for v in value] - if isinstance(value, pd.Series): - return {str(k): _jsonify(v) for k, v in value.to_dict().items()} - if isinstance(value, pd.DataFrame): - return value.to_dict(orient="records") - if isinstance(value, np.ndarray): - return [_jsonify(v) for v in value.tolist()] - if isinstance(value, (np.integer,)): - return int(value) - if isinstance(value, (np.floating, float)): - return float(value) - if pd.isna(value): - return None - return value - - -def _run_optimizer_via_rpy2( - optimizer_inputs: pd.DataFrame, - constraints: dict[str, Any], - r_script_path: Path, -) -> dict[str, Any]: - try: - import rpy2.robjects as ro - from rpy2.robjects import pandas2ri - from rpy2.robjects.conversion import localconverter - from rpy2.robjects.vectors import FloatVector, ListVector, StrVector - except ImportError as exc: - raise RuntimeError( - "rpy2 is required but not available. Install rpy2 in your Python environment." - ) from exc - - if not r_script_path.exists(): - raise FileNotFoundError(f"R script not found: {r_script_path}") - - ro.r["source"](str(r_script_path)) - r_func = ro.globalenv.find("find_optimal_package") - - with localconverter(ro.default_converter + pandas2ri.converter): - r_inputs = ro.conversion.py2rpy(optimizer_inputs) - - r_compulsory = StrVector([]) - r_subs = ListVector({}) - - result_r = r_func( - r_inputs, - # whether we are maximizing DALYs or net health - "dalys", - # CET; I believe not relevant here but give a value anyway - 600, - # Drug budget input - constraints['annual_consumables_budget'], - # Drug budget scale set to 1 - 1, - # HR constraints; need to be clinical staff, nursing, pharmacy, lab, - # mental health, nutrition in that order - FloatVector(constraints["hr_time_constraint"]), - # HR size; same order as above - FloatVector(constraints["hr_size"]), - 1, - # use_feasiblecov_constraint; set to 0 to not use, 1 to use - 0, - # Feasible coverage scale; set to 1 - 1, - # Compulsory coverage scale; set to 1 - 1, - # Compulsory interventions; pass empty list, - r_compulsory, - # substitutes; pass empty list - r_subs, - # task_shifting_pharm; set to 0 to not allow, 1 to allow - 0, + capacity_constraints = _rename_hrh_map(results['annual_capacity_by_cadre']) + count_constraints = _rename_hrh_map(results['staff_count_by_cadre']) + combined = ( + {'capacity': capacity_constraints, 'staff_count': count_constraints} ) + return pd.DataFrame(combined) - with localconverter(ro.default_converter + pandas2ri.converter): - result_py = ro.conversion.rpy2py(result_r) - - # rpy2 can return named list-like objects; normalize to dict. - if isinstance(result_py, dict): - return {str(k): _jsonify(v) for k, v in result_py.items()} - - if hasattr(result_r, "names"): - out: dict[str, Any] = {} - names = list(result_r.names) - for i, name in enumerate(names): - out[str(name)] = _jsonify(ro.conversion.rpy2py(result_r[i])) - return out - - raise RuntimeError("Unexpected optimizer result type from R.") def _parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser() parser.add_argument("--analysis-results-pkl", type=Path, required=True) - parser.add_argument("--optimizer-output-json", type=Path, required=True) parser.add_argument( "--r-script-path", type=Path, default=Path("src/scripts/lcoa_inputs_from_tlo_analyses/optimizer_preaggregated.R"), ) + parser.add_argument("--rscript-bin", type=str, default="Rscript") return parser.parse_args() def main() -> None: args = _parse_args() - if not args.analysis_results_pkl.exists(): - raise FileNotFoundError(f"Analysis results pickle not found: {args.analysis_results_pkl}") - with open(args.analysis_results_pkl, "rb") as f: results = pickle.load(f) - constraints = ({ - 'annual_consumables_budget': results.get("annual_consumables_budget"), - 'hr_time_constraint': _rename_hrh_map(results.get("annual_capacity_by_cadre")), - 'hr_size': _rename_hrh_map(results.get("staff_count_by_cadre")) - }) - optimizer_inputs = _build_optimizer_inputs(results) - - optimizer_output = _run_optimizer_via_rpy2( - optimizer_inputs=optimizer_inputs, - constraints=constraints, - r_script_path=args.r_script_path, - ) + opt_inputs = _build_optimizer_inputs(results) + hr_constraints = _build_hr_constraints_from_results(results) + + optimizer_input_csv = Path("src/scripts/lcoa_inputs_from_tlo_analyses/optimizer_inputs.csv") + hr_constraints_csv = Path("src/scripts/lcoa_inputs_from_tlo_analyses/hr_constraints.csv") + + opt_inputs.to_csv(optimizer_input_csv, index=True) + hr_constraints.to_csv(hr_constraints_csv, index=True) + + print(f"Wrote optimizer input CSV: {optimizer_input_csv}") + print(f"Wrote optimizer constraints CSV: {hr_constraints_csv}") - args.optimizer_output_json.parent.mkdir(parents=True, exist_ok=True) - with open(args.optimizer_output_json, "w", encoding="utf-8") as f: - json.dump(_jsonify(optimizer_output), f, indent=2, sort_keys=True) if __name__ == "__main__": main() From 935e602b4efb7e4a58fd96ed8b05d6eb2df25b04 Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Sun, 31 May 2026 09:18:54 +0100 Subject: [PATCH 47/55] Make consistent the temporal aggregation of constraints and inputs --- .../analysis_effect_of_treatment_ids.py | 60 +++++++- .../figures_effect_of_treatment_ids.py | 134 ++++++++++++++++-- .../optimizer_preaggregated.R | 6 +- .../run_preaggregated_optimizer.py | 4 +- 4 files changed, 182 insertions(+), 22 deletions(-) diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py index cd5ee3a071..819673fc7e 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py @@ -62,6 +62,7 @@ # python src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py outputs/s.bhatia@imperial.ac.uk/effect_of_each_treatment_id-combined --target-start=2010-01-01 --target-end=2041-01-01 # python src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py outputs/s.bhatia@imperial.ac.uk/effect_of_each_treatment_id-2026-04-01T130709Z --target-start=2010-01-01 --target-end=2041-01-01 --do-comparison=False # python src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py outputs/s.bhatia@imperial.ac.uk/effect_of_each_treatment_id-combined outputs/generated_outputs --target-start=2010-01-01 --target-end=2041-01-01 --cost-checkpoint-profile=baseline --load-input-costs-from-checkpoint=True + PERIOD_LENGTH_YEARS_FOR_BAR_PLOTS = 1 EXCLUDED_HSIs = [ @@ -166,6 +167,15 @@ def apply( print(f"Saved input costs checkpoint to: {checkpoint_path}") results['input_costs'] = input_costs + # TODO Ask Sakshi: the hrh costs are the same across all draw; therefore incremental costs and cost for medical consumables + # are the same for each draw. Does that make sense? + # Consumables cost per intervention + total_cons_cost = input_costs.groupby(['draw', 'run', 'cost_category'])['cost'].sum() + total_cons_cost = compute_summary_statistics(total_cons_cost.unstack(['draw', 'run']), 'median') + total_cons_cost = set_param_names_as_column_index_level_0(total_cons_cost, param_names) + + results['total_cons_cost'] = total_cons_cost + # Computing incremental costs # TODO Check with Sakshi if these are annual costs; as everything else is annual. if do_comparison: @@ -320,14 +330,12 @@ def apply( do_scaling=True, autodiscover=True, ) - # Sum across all facility levels and average across years; so we get the *average* annual capacity used over the whole period + # Sum across all facility levels and years; so we get the *total* capacity used over the whole period # TODO: Check with Sakshi if this is what we want. mask = annual_capacity_used_by_cadre_and_level.index.get_level_values(0).isin(range(2026, 2040)) capacity_used_by_cadre = ( - annual_capacity_used_by_cadre_and_level[mask].groupby(['OfficerType', 'year']). + annual_capacity_used_by_cadre_and_level[mask].groupby(['OfficerType']). sum(). - groupby(['OfficerType']). - mean(). pipe(set_param_names_as_column_index_level_0, param_names=param_names) ) @@ -342,8 +350,9 @@ def apply( ) # This gives the total minutes available per day by cadre and facility level. # Sum across levels to get cadre specific constraints, and multiply by 365 to get annual capacity + # and then by the length of the period annual_capacity_by_cadre = ( - daily_capacity_by_cadre_and_level.groupby('Officer_Category')['Total_Mins_Per_Day'].sum() * 365 + daily_capacity_by_cadre_and_level.groupby('Officer_Category')['Total_Mins_Per_Day'].sum() * 365 * 15 ) staff_count_by_cadre = ( @@ -365,6 +374,47 @@ def apply( results['annual_capacity_by_cadre'] = annual_capacity_by_cadre results['staff_count_by_cadre'] = staff_count_by_cadre + # Extract DALYs and costs from the LCOA input workbook (EHP_BasedOnLCOA sheet). + lcoa_workbook_path = Path(__file__).resolve().parent / "ResourceFile_PriorityRanking_ALLPOLICIES_EHP_dalys_costs.xlsx" + lcoa_df = pd.read_excel(lcoa_workbook_path, sheet_name="EHP_BasedOnLCOA") + col_a, col_i, col_j, col_k, col_l, col_m = ( + lcoa_df.columns[0], + lcoa_df.columns[8], + lcoa_df.columns[9], + lcoa_df.columns[10], + lcoa_df.columns[11], + lcoa_df.columns[12], + ) + dalys_and_costs_from_lcoa = lcoa_df[[col_a, col_i, col_j, col_k, col_l, col_m]].rename( + columns={ + col_a: "treatment_id", + col_i: "icer", + col_j: "dalys_per_patient", + col_k: "cost_per_case", + col_l: "eligible_cases", + col_m: "lcoa_flag", + } + ) + for numeric_col in ["icer", "dalys_per_patient", "cost_per_case", "eligible_cases"]: + dalys_and_costs_from_lcoa[numeric_col] = pd.to_numeric( + dalys_and_costs_from_lcoa[numeric_col], errors="coerce" + ) + dalys_and_costs_from_lcoa["overall_dalys"] = ( + dalys_and_costs_from_lcoa["dalys_per_patient"] * dalys_and_costs_from_lcoa["eligible_cases"] + ) + dalys_and_costs_from_lcoa["overall_costs"] = ( + dalys_and_costs_from_lcoa["cost_per_case"] * dalys_and_costs_from_lcoa["eligible_cases"] + ) + dalys_and_costs_from_lcoa = dalys_and_costs_from_lcoa[ + dalys_and_costs_from_lcoa["treatment_id"].notna() + & ( + dalys_and_costs_from_lcoa["overall_dalys"].notna() + | dalys_and_costs_from_lcoa["overall_costs"].notna() + | dalys_and_costs_from_lcoa["icer"].notna() + ) + ] + results["dalys_and_costs_from_lcoa"] = dalys_and_costs_from_lcoa + return results diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py index 9d87ea1357..14ad61f180 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py @@ -54,6 +54,9 @@ def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path dalys_averted = primary_results.get('dalys_averted') pc_dalys_averted = primary_results.get('pc_dalys_averted') icers = primary_results.get('icers_summarized') + incremental_scenario_cost = primary_results.get('incremental_scenario_cost') + dalys_and_costs_from_lcoa = primary_results.get('dalys_and_costs_from_lcoa') + comparison_metrics_available = all( metric is not None for metric in ( @@ -62,6 +65,7 @@ def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path dalys_averted, pc_dalys_averted, icers, + incremental_scenario_cost ) ) print(f"Comparison metrics available: {comparison_metrics_available}") @@ -96,14 +100,17 @@ def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path ['2010-2010', '2011-2011', '2012-2012', '2013-2013', '2014-2014', '2015-2015', '2016-2016', '2017-2017', '2018-2018', '2019-2019', '2020-2020', '2021-2021', - '2022-2022', '2023-2023', '2024-2024', '2025-2025'] + '2022-2022', '2023-2023', '2024-2024', '2025-2025', '2010-2041'] ) + # Filter rows to retain those in implementation period only mask_other_periods = ( ~counts_of_hsi_in_implementation_period. index. get_level_values("period"). - isin(pre_switch_periods) + isin(pre_switch_periods) & + (counts_of_hsi_in_implementation_period > 0).any(axis=1) ) + # In the pre-implentation period only retain the treatment id of interest to avoid plot clutter mask_early_periods = ( counts_of_hsi_in_implementation_period.index.get_level_values("period").isin(pre_switch_periods) & (counts_of_hsi_in_implementation_period.index.get_level_values("appt_type") == draw.replace("_*", "")) @@ -183,13 +190,14 @@ def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path if comparison_metrics_available: print("Plotting comparison metrics: deaths/DALYs averted, percentages, and ICERs.") - deaths_averted_sorted = (num_deaths_averted.sort_values(by="central", ascending=True) / 1e3) - fig_height = max(6, min(0.28 * len(deaths_averted_sorted.index) + 4, 18)) + dalys_averted_sorted = (dalys_averted.sort_values(by="central", ascending=True) / 1e3) + dalys_order = dalys_averted_sorted.index + fig_height = max(6, min(0.28 * len(dalys_averted_sorted.index) + 4, 18)) fig, ax = plt.subplots(figsize=(10, fig_height)) - name_of_plot = "Deaths Averted by Each Treatment ID" - do_barh_plot_with_ci(deaths_averted_sorted, ax) + name_of_plot = "DALYS Averted by Each Treatment ID" + do_barh_plot_with_ci(dalys_averted_sorted, ax) ax.set_title(name_of_plot) - ax.set_xlabel("Number of deaths averted (/1000)") + ax.set_xlabel("DALYs averted (/1000)") ax.grid(axis="x") ax.spines["top"].set_visible(False) ax.spines["right"].set_visible(False) @@ -197,15 +205,15 @@ def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path fig.tight_layout() fig.savefig(outfile) plt.close(fig) - print("Saved: Deaths Averted by Each Treatment ID") + print("Saved: DALYS Averted by Each Treatment ID") - dalys_averted_sorted = (dalys_averted.sort_values(by="central", ascending=True) / 1e3) - fig_height = max(6, min(0.28 * len(dalys_averted_sorted.index) + 4, 18)) + deaths_averted_sorted = (num_deaths_averted / 1e3).reindex(dalys_order) + fig_height = max(6, min(0.28 * len(deaths_averted_sorted.index) + 4, 18)) fig, ax = plt.subplots(figsize=(10, fig_height)) - name_of_plot = "DALYS Averted by Each Treatment ID" - do_barh_plot_with_ci(dalys_averted_sorted, ax) + name_of_plot = "Deaths Averted by Each Treatment ID" + do_barh_plot_with_ci(deaths_averted_sorted, ax) ax.set_title(name_of_plot) - ax.set_xlabel("DALYs averted (/1000)") + ax.set_xlabel("Number of deaths averted (/1000)") ax.grid(axis="x") ax.spines["top"].set_visible(False) ax.spines["right"].set_visible(False) @@ -213,7 +221,8 @@ def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path fig.tight_layout() fig.savefig(outfile) plt.close(fig) - print("Saved: DALYS Averted by Each Treatment ID") + print("Saved: Deaths Averted by Each Treatment ID") + pc_deaths_averted_sorted = (pc_deaths_averted.sort_values(by="central", ascending=True)) fig_height = max(6, min(0.28 * len(pc_deaths_averted_sorted.index) + 4, 18)) @@ -255,6 +264,7 @@ def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path mask = ~icers_sorted.index.get_level_values("draw").isin(["Hiv_Test_*", "CervicalCancer_Screening_Xpert_*", "BreastCancer_PalliativeCare_*"]) icers_sorted = icers_sorted[mask] + icers_sorted = icers_sorted.reindex(dalys_order.intersection(icers_sorted.index)) fig_height = max(6, min(0.28 * len(icers_sorted.index) + 4, 18)) fig, ax = plt.subplots(figsize=(10, fig_height)) name_of_plot = "ICERs for Each Treatment ID" @@ -270,6 +280,102 @@ def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path plt.close(fig) print("Saved: ICERs for Each Treatment ID") + incremental_cost_sorted = incremental_scenario_cost.reindex(dalys_order) + fig_height = max(6, min(0.28 * len(incremental_cost_sorted.index) + 4, 18)) + fig, ax = plt.subplots(figsize=(10, fig_height)) + name_of_plot = "Incremental Cost for Each Treatment ID" + do_barh_plot_with_ci(incremental_cost_sorted, ax) + ax.set_title(name_of_plot) + ax.set_xlabel("Incremental cost (USD)") + ax.grid(axis="x") + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) + outfile = os.path.join(output_folder, make_graph_file_name(name_of_plot)) + fig.tight_layout() + fig.savefig(outfile) + plt.close(fig) + print("Saved: Incremental Cost for Each Treatment ID") + + facet_order = ( + dalys_order + .intersection(incremental_cost_sorted.dropna().index) + .intersection(icers_sorted.dropna().index) + ) + dalys_facet = dalys_averted_sorted.reindex(facet_order) + costs_facet = incremental_cost_sorted.reindex(facet_order) + icers_facet = icers_sorted.reindex(facet_order) + + fig_height = max(6, min(0.28 * len(facet_order) + 4, 18)) + fig, axes = plt.subplots(1, 3, figsize=(20, fig_height), sharey=True) + name_of_plot = "DALYs, Incremental Cost, and ICERs by Treatment ID" + + do_barh_plot_with_ci(dalys_facet, axes[0]) + axes[0].set_title("DALYs") + axes[0].set_xlabel("DALYs averted (/1000)") + + do_barh_plot_with_ci(costs_facet, axes[1]) + axes[1].set_title("Costs") + axes[1].set_xlabel("Incremental cost (USD)") + + do_barh_plot_with_ci(icers_facet, axes[2]) + axes[2].set_title("ICERs") + axes[2].set_xlabel("ICER (USD per DALY averted)") + + if isinstance(dalys_and_costs_from_lcoa, pd.DataFrame): + lcoa_overlay = ( + dalys_and_costs_from_lcoa[["treatment_id", "overall_dalys", "overall_costs", "icer"]] + .dropna(subset=["treatment_id"]) + .drop_duplicates(subset=["treatment_id"], keep="first") + .set_index("treatment_id") + ) + facet_overlay = pd.DataFrame({"draw": facet_order}) + facet_overlay["treatment_id"] = facet_overlay["draw"].str.replace(r"_\*$", "", regex=True) + facet_overlay = facet_overlay.join(lcoa_overlay, on="treatment_id") + + daly_overlay = facet_overlay["overall_dalys"].notna() + if daly_overlay.any(): + # DALY bars are plotted as /1000, so convert overlay values to the same units. + axes[0].scatter( + facet_overlay.loc[daly_overlay, "overall_dalys"] / 1e3, + facet_overlay.index[daly_overlay], + c="black", + s=16, + zorder=10, + ) + + cost_overlay = facet_overlay["overall_costs"].notna() + if cost_overlay.any(): + axes[1].scatter( + facet_overlay.loc[cost_overlay, "overall_costs"], + facet_overlay.index[cost_overlay], + c="black", + s=16, + zorder=10, + ) + + icer_overlay = facet_overlay["icer"].notna() + if icer_overlay.any(): + axes[2].scatter( + facet_overlay.loc[icer_overlay, "icer"], + facet_overlay.index[icer_overlay], + c="black", + s=16, + zorder=10, + ) + + for ax in axes: + ax.grid(axis="x") + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) + + axes[0].set_ylabel("Treatment ID") + fig.suptitle(name_of_plot, y=1.02) + outfile = os.path.join(output_folder, make_graph_file_name(name_of_plot)) + fig.tight_layout() + fig.savefig(outfile) + plt.close(fig) + print("Saved: DALYs, Incremental Cost, and ICERs by Treatment ID") + print("Finished generating figures.") if __name__ == "__main__": diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/optimizer_preaggregated.R b/src/scripts/lcoa_inputs_from_tlo_analyses/optimizer_preaggregated.R index 5465dabcd7..c6dee4761c 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/optimizer_preaggregated.R +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/optimizer_preaggregated.R @@ -397,12 +397,14 @@ run_optimizer_from_csv <- function() { constraints$staff_count[constraints$Officer_Category == "hr_nutri"] ) - + ## Multiply drug_budget_input by the length of the intervention period + ## because all other inputs are over the + ## intervention period res <- find_optimal_package( inputs = inputs, objective_input = "dalys", cet_input = 600, - drug_budget_input = 225602946, #TODO - get this from constaint + drug_budget_input = 225602946 * 15, #TODO - get this from constaint drug_budget.scale = 1, hr.time.constraint = hr.time.constraint, hr.size = hr.size.constraint, diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/run_preaggregated_optimizer.py b/src/scripts/lcoa_inputs_from_tlo_analyses/run_preaggregated_optimizer.py index e8ca9e3c49..32b6295964 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/run_preaggregated_optimizer.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/run_preaggregated_optimizer.py @@ -59,10 +59,12 @@ def _build_optimizer_inputs(results: dict[str, Any]) -> pd.DataFrame: dalys_averted = results.get("dalys_averted") incremental_cost = results.get("incremental_scenario_cost") capacity_used = _rename_hrh_map(results.get("capacity_used_by_cadre")) + conscost = results.get('total_cons_cost') ce_dalys = dalys_averted['central'] ce_cost = incremental_cost['central'] hr_needs = capacity_used.xs("central", level="stat", axis=1).T + conscost = conscost.xs('central', level='stat', axis=1).T interventions = sorted(set(ce_dalys.index).intersection(set(ce_cost.index))) if not interventions: @@ -74,7 +76,7 @@ def _build_optimizer_inputs(results: dict[str, Any]) -> pd.DataFrame: "intervention": interventions, "ce_dalys": [float(ce_dalys.loc[i]) for i in interventions], "ce_cost": [float(ce_cost.loc[i]) for i in interventions], - "conscost": [float(ce_cost.loc[i]) for i in interventions], + "conscost": [float(conscost.loc[i, "medical consumables"]) for i in interventions], "hr_clin": [float(hr_needs.loc[i, "hr_clin"]) for i in interventions], "hr_nur": [float(hr_needs.loc[i, "hr_nur"]) for i in interventions], "hr_pharm": [float(hr_needs.loc[i, "hr_pharm"]) for i in interventions], From 0519c7e022806d5f49c1fa20cb5a073850920810 Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Sun, 31 May 2026 09:30:46 +0100 Subject: [PATCH 48/55] Increase number of draws --- .../scenario_effect_of_treatment_ids.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py index 64e5c79639..bbacda282d 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py @@ -28,13 +28,12 @@ from tlo.methods.fullmodel import fullmodel from tlo.methods.scenario_switcher import ImprovedHealthSystemAndCareSeekingScenarioSwitcher from tlo.scenario import BaseScenario -from tlo.methods.individual_history_tracker import IndividualHistoryTracker class ScenarioDefinitions: @property def YEAR_OF_SERVICE_AVAILABILITY_SWITCH(self) -> int: - return 2011 + return 2026 def baseline(self) -> Dict: """Return the Dict with values for the parameter changes that define the baseline scenario.""" @@ -65,11 +64,11 @@ def __init__(self): super().__init__() self.seed = 0 self.start_date = Date(2010, 1, 1) - self.end_date = Date(2031, 1, 1) - self.pop_size = 1000 + self.end_date = Date(2041, 1, 1) + self.pop_size = 250_000 self._scenarios = self._get_scenarios() self.number_of_draws = len(self._scenarios) - self.runs_per_draw = 5 + self.runs_per_draw = 10 def log_configuration(self): return { @@ -81,7 +80,6 @@ def log_configuration(self): "tlo.methods.demography.detail": logging.WARNING, "tlo.methods.healthburden": logging.INFO, "tlo.methods.healthsystem.summary": logging.INFO, - "tlo.methods.individual_history_tracker": logging.INFO, }, } @@ -98,7 +96,6 @@ def _get_scenarios(self) -> Dict[str, Dict]: # Generate list of TREATMENT_IDs and filter to the resolution needed treatments = get_filtered_treatment_ids(depth=None) - # Return 'Service_Availability' values, with scenarios for nothing, and ones for which all but one # treatment is omitted service_availability = dict({"Nothing": []}) @@ -107,8 +104,12 @@ def _get_scenarios(self) -> Dict[str, Dict]: service_availability.update( {f"Only {treatment}": [treatment] for treatment in treatments} ) - ##service_availability = {"Only Rti_TetanusVaccine": ["Rti_TetanusVaccine"]} - + # overwrite service availability dictionary to run specific scenarios for testing + service_availability = dict( + {"Nothing": [], + "Only AntenatalCare_FollowUp_*": ['AntenatalCare_FollowUp_*'], + "Only BladderCancer_PalliativeCare_*": ['BladderCancer_PalliativeCare_*']} + ) scenario_definitions = ScenarioDefinitions() scenarios = { From 1dc2a3cd9fc5be520c61f9a85206b23e437772f5 Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Wed, 3 Jun 2026 16:54:00 +0100 Subject: [PATCH 49/55] Only Nothing for suspend --- .../scenario_effect_of_treatment_ids.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py index bbacda282d..40cd43e65d 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py @@ -101,15 +101,9 @@ def _get_scenarios(self) -> Dict[str, Dict]: service_availability = dict({"Nothing": []}) # For each treatment group, create scenarios keeping only one treatment from that group # Commenting to allow draw 0 to be run and suspended. - service_availability.update( - {f"Only {treatment}": [treatment] for treatment in treatments} - ) - # overwrite service availability dictionary to run specific scenarios for testing - service_availability = dict( - {"Nothing": [], - "Only AntenatalCare_FollowUp_*": ['AntenatalCare_FollowUp_*'], - "Only BladderCancer_PalliativeCare_*": ['BladderCancer_PalliativeCare_*']} - ) + #service_availability.update( + # {f"Only {treatment}": [treatment] for treatment in treatments} + #) scenario_definitions = ScenarioDefinitions() scenarios = { From aae67d5c1178c42e3d2e35081d31f0bdb7d09e9b Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Fri, 5 Jun 2026 13:08:32 +0100 Subject: [PATCH 50/55] WIP --- .../analysis_effect_of_treatment_ids.py | 11 +++ .../figures_effect_of_treatment_ids.py | 89 +++++++++++++++++++ 2 files changed, 100 insertions(+) diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py index 819673fc7e..73028f42c5 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py @@ -322,6 +322,8 @@ def apply( # and summarise. However since no HSIs are delivered in the Nothing scenario, the capacity used in that scenario is zero, # so the delta relative to Nothing is just the capacity used in each scenario. # TODO: Check if this should be scaled with population or used as is. + # Note that Capacity_By_FacID_and_Officer logs the fraction of time used per officer type; not the absolute time used. + # To get the actual minutes, we need to multiply by the total available minutes. annual_capacity_used_by_cadre_and_level = extract_results( results_folder, module='tlo.methods.healthsystem.summary', @@ -359,6 +361,15 @@ def apply( daily_capacity_by_cadre_and_level.groupby('Officer_Category')['Staff_Count'].sum() ) + # Proportion of capacity used by year and cadre, relative to the total available capacity by cadre + proportion_capacity_used_by_cadre = ( + annual_capacity_used_by_cadre_and_level[mask].groupby(['OfficerType', 'year']). + sum(). + pipe(set_param_names_as_column_index_level_0, param_names=param_names) + ).div(annual_capacity_by_cadre / 15, axis=0, level=0) + proportion_capacity_used_by_cadre = compute_summary_statistics(proportion_capacity_used_by_cadre, central_measure='median') + results['proportion_capacity_used_by_cadre'] = proportion_capacity_used_by_cadre + # Add consumables budget to this dictionary so that we have everything in one place # USD 225,602,946 (203136642 from donors + 22466304 from the government) # Revision of Malawi’s Health Benefits Package: A Critical Analysis of Policy Formulation and Implementation diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py index 14ad61f180..00db6dde66 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py @@ -4,6 +4,7 @@ import zipfile from pathlib import Path import pickle +import numpy as np import pandas as pd import matplotlib.pyplot as plt @@ -37,6 +38,72 @@ def load_results_files(results_files: list[Path]) -> dict[Path, dict]: return loaded +def plot_proportion_capacity_used_by_cadre_over_time_for_draw(_df: pd.DataFrame, draw: str): + """Plot grouped bars by year for one draw; each bar-group decomposes by cadre with CI error bars.""" + if _df is None: + raise ValueError("`proportion_capacity_used_by_cadre` is None.") + if not isinstance(_df.index, pd.MultiIndex) or _df.index.nlevels != 2: + raise ValueError("Expected a 2-level index: (OfficerType, year).") + if not isinstance(_df.columns, pd.MultiIndex) or _df.columns.nlevels != 2: + raise ValueError("Expected a 2-level columns index: (draw, stat).") + + officer_level_name = "OfficerType" if "OfficerType" in _df.index.names else _df.index.names[0] + year_level_name = "year" if "year" in _df.index.names else _df.index.names[1] + draw_level_name = "draw" if "draw" in _df.columns.names else _df.columns.names[0] + + available_draws = pd.Index(_df.columns.get_level_values(draw_level_name).unique()) + if draw not in available_draws: + raise ValueError(f"Draw '{draw}' not found. Available draws: {available_draws.tolist()}") + + draw_df = _df[draw].copy() + required_stats = {"central", "lower", "upper"} + if not required_stats.issubset(draw_df.columns): + raise ValueError( + f"Missing required stats {sorted(required_stats)} in draw '{draw}'. " + f"Found: {draw_df.columns.tolist()}" + ) + + years = sorted(pd.Index(draw_df.index.get_level_values(year_level_name).unique()).tolist()) + cadres = sorted(pd.Index(draw_df.index.get_level_values(officer_level_name).unique()).tolist()) + x = np.arange(len(years), dtype=float) + bar_width = min(0.8 / max(len(cadres), 1), 0.12) + offsets = (np.arange(len(cadres)) - (len(cadres) - 1) / 2) * bar_width + + fig_width = max(10, min(1.3 * len(years) + 6, 24)) + fig_height = max(6, min(0.22 * len(cadres) + 6, 14)) + fig, ax = plt.subplots(figsize=(fig_width, fig_height)) + + for i, cadre in enumerate(cadres): + cadre_df = draw_df.xs(cadre, level=officer_level_name).reindex(years) + central = pd.to_numeric(cadre_df["central"], errors="coerce").fillna(0.0).to_numpy() + lower = pd.to_numeric(cadre_df["lower"], errors="coerce").fillna(0.0).to_numpy() + upper = pd.to_numeric(cadre_df["upper"], errors="coerce").fillna(0.0).to_numpy() + lower_err = np.clip(central - lower, a_min=0.0, a_max=None) + upper_err = np.clip(upper - central, a_min=0.0, a_max=None) + + ax.bar( + x + offsets[i], + central, + width=bar_width, + label=str(cadre), + yerr=np.vstack([lower_err, upper_err]), + capsize=2, + error_kw={"elinewidth": 0.8, "capthick": 0.8}, + alpha=0.9, + ) + + ax.set_xticks(x) + ax.set_xticklabels([str(y) for y in years], rotation=45, ha="right") + ax.set_xlabel("Year") + ax.set_ylabel("Proportion of Capacity Used") + ax.grid(axis="y", alpha=0.3) + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) + ax.legend(title="Cadre", loc="center left", bbox_to_anchor=(1.02, 0.5), fontsize=8, frameon=True) + fig.tight_layout() + return fig, ax + + def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path = None): """Produce standard plots describing effect of each TREATMENT_ID.""" print("Starting figure generation for treatment-ID effects.") @@ -73,6 +140,7 @@ def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path counts_of_hsi_in_implementation_period = primary_results['counts_of_hsi_by_period'] counts_of_hsi_in_implementation_period = counts_of_hsi_in_implementation_period.drop(['2010-2041'], level=1) capacity_used_by_cadre = primary_results.get("capacity_used_by_cadre") + proportion_capacity_used_by_cadre = primary_results.get("proportion_capacity_used_by_cadre") result_df_by_period = pd.DataFrame([ {'treatment_id_included': draw, 'nonzero_hsis': treatment_id, 'period': period} @@ -133,6 +201,27 @@ def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path fig.savefig(outfile) plt.close(fig) + if proportion_capacity_used_by_cadre is not None: + print("Plotting capacity used over time (one figure per treatment ID).") + for param in param_names: + if param == "Nothing": + continue + draw = format_scenario_name(param) + try: + fig, ax = plot_proportion_capacity_used_by_cadre_over_time_for_draw( + proportion_capacity_used_by_cadre, + draw, + ) + except ValueError as exc: + print(f"Skipping capacity-over-time plot for draw '{draw}': {exc}") + continue + + name_of_plot = f"Capacity Used Over Time by Cadre for {draw}" + ax.set_title(name_of_plot) + outfile = os.path.join(output_folder, make_graph_file_name(name_of_plot)) + fig.savefig(outfile) + plt.close(fig) + # Plot population growth total_population_in_implementation = primary_results['total_population_by_year'] print("Plotting population size by year.") From d79defe8a15a2052774b33468e73caa98747d61b Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Tue, 9 Jun 2026 13:31:48 +0100 Subject: [PATCH 51/55] Staff salary per minute and additional figures --- .../ResourceFile_Annual_Salary_Per_Cadre.csv | 10 + .../costing/ResourceFile_Minute_Salary_HR.csv | 1198 +++++++++++++++++ .../analysis_effect_of_treatment_ids.py | 81 +- .../calculate_staff_salary_per_minute.py | 55 + .../fig_utils.py | 295 ++++ .../figures_effect_of_treatment_ids.py | 114 +- .../scenario_effect_of_treatment_ids.py | 6 +- 7 files changed, 1645 insertions(+), 114 deletions(-) create mode 100644 resources/costing/ResourceFile_Annual_Salary_Per_Cadre.csv create mode 100644 resources/costing/ResourceFile_Minute_Salary_HR.csv create mode 100644 src/scripts/lcoa_inputs_from_tlo_analyses/calculate_staff_salary_per_minute.py diff --git a/resources/costing/ResourceFile_Annual_Salary_Per_Cadre.csv b/resources/costing/ResourceFile_Annual_Salary_Per_Cadre.csv new file mode 100644 index 0000000000..b4fdecc1d3 --- /dev/null +++ b/resources/costing/ResourceFile_Annual_Salary_Per_Cadre.csv @@ -0,0 +1,10 @@ +Officer_Category,Annual_Salary_USD +Clinical,6147.453973 +Nursing_and_Midwifery,6023.793376 +Dental,5841.847691 +Laboratory,5769.19231 +Mental,7972.748442 +Nutrition,4769.682776 +Pharmacy,5069.39346 +Radiography,6351.98842 +DCSA,2360.992559 diff --git a/resources/costing/ResourceFile_Minute_Salary_HR.csv b/resources/costing/ResourceFile_Minute_Salary_HR.csv new file mode 100644 index 0000000000..b0e40d85b6 --- /dev/null +++ b/resources/costing/ResourceFile_Minute_Salary_HR.csv @@ -0,0 +1,1198 @@ +Facility_Level,Officer_Type_Code,Minute_Salary_USD,Facility_ID +0,Clinical,0.0,0 +0,Clinical,0.0,4 +0,Clinical,0.0,8 +0,Clinical,0.0,12 +0,Clinical,0.0,16 +0,Clinical,0.0,20 +0,Clinical,0.0,24 +0,Clinical,0.0,28 +0,Clinical,0.0,32 +0,Clinical,0.0,36 +0,Clinical,0.0,40 +0,Clinical,0.0,44 +0,Clinical,0.0,48 +0,Clinical,0.0,52 +0,Clinical,0.0,56 +0,Clinical,0.0,60 +0,Clinical,0.0,64 +0,Clinical,0.0,68 +0,Clinical,0.0,72 +0,Clinical,0.0,76 +0,Clinical,0.0,80 +0,Clinical,0.0,84 +0,Clinical,0.0,88 +0,Clinical,0.0,92 +0,Clinical,0.0,96 +0,Clinical,0.0,100 +0,Clinical,0.0,104 +0,Clinical,0.0,108 +0,Clinical,0.0,112 +0,Clinical,0.0,116 +0,Clinical,0.0,120 +0,Clinical,0.0,124 +0,DCSA,0.11578495447237816,0 +0,DCSA,0.11578495447237816,4 +0,DCSA,0.11578495447237816,8 +0,DCSA,0.11578495447237816,12 +0,DCSA,0.11578495447237816,16 +0,DCSA,0.11578495447237816,20 +0,DCSA,0.11578495447237816,24 +0,DCSA,0.11578495447237816,28 +0,DCSA,0.11578495447237816,32 +0,DCSA,0.11578495447237816,36 +0,DCSA,0.11578495447237816,40 +0,DCSA,0.11578495447237816,44 +0,DCSA,0.11578495447237816,48 +0,DCSA,0.11578495447237816,52 +0,DCSA,0.11578495447237816,56 +0,DCSA,0.11578495447237816,60 +0,DCSA,0.11578495447237816,64 +0,DCSA,0.11578495447237816,68 +0,DCSA,0.11578495447237816,72 +0,DCSA,0.11578495447237816,76 +0,DCSA,0.11578495447237816,80 +0,DCSA,0.11578495447237816,84 +0,DCSA,0.11578495447237816,88 +0,DCSA,0.11578495447237816,92 +0,DCSA,0.11578495447237816,96 +0,DCSA,0.11578495447237816,100 +0,DCSA,0.11578495447237816,104 +0,DCSA,0.11578495447237816,108 +0,DCSA,0.11578495447237816,112 +0,DCSA,0.11578495447237816,116 +0,DCSA,0.11578495447237816,120 +0,DCSA,0.11578495447237816,124 +0,Dental,0.0,0 +0,Dental,0.0,4 +0,Dental,0.0,8 +0,Dental,0.0,12 +0,Dental,0.0,16 +0,Dental,0.0,20 +0,Dental,0.0,24 +0,Dental,0.0,28 +0,Dental,0.0,32 +0,Dental,0.0,36 +0,Dental,0.0,40 +0,Dental,0.0,44 +0,Dental,0.0,48 +0,Dental,0.0,52 +0,Dental,0.0,56 +0,Dental,0.0,60 +0,Dental,0.0,64 +0,Dental,0.0,68 +0,Dental,0.0,72 +0,Dental,0.0,76 +0,Dental,0.0,80 +0,Dental,0.0,84 +0,Dental,0.0,88 +0,Dental,0.0,92 +0,Dental,0.0,96 +0,Dental,0.0,100 +0,Dental,0.0,104 +0,Dental,0.0,108 +0,Dental,0.0,112 +0,Dental,0.0,116 +0,Dental,0.0,120 +0,Dental,0.0,124 +0,Laboratory,0.0,0 +0,Laboratory,0.0,4 +0,Laboratory,0.0,8 +0,Laboratory,0.0,12 +0,Laboratory,0.0,16 +0,Laboratory,0.0,20 +0,Laboratory,0.0,24 +0,Laboratory,0.0,28 +0,Laboratory,0.0,32 +0,Laboratory,0.0,36 +0,Laboratory,0.0,40 +0,Laboratory,0.0,44 +0,Laboratory,0.0,48 +0,Laboratory,0.0,52 +0,Laboratory,0.0,56 +0,Laboratory,0.0,60 +0,Laboratory,0.0,64 +0,Laboratory,0.0,68 +0,Laboratory,0.0,72 +0,Laboratory,0.0,76 +0,Laboratory,0.0,80 +0,Laboratory,0.0,84 +0,Laboratory,0.0,88 +0,Laboratory,0.0,92 +0,Laboratory,0.0,96 +0,Laboratory,0.0,100 +0,Laboratory,0.0,104 +0,Laboratory,0.0,108 +0,Laboratory,0.0,112 +0,Laboratory,0.0,116 +0,Laboratory,0.0,120 +0,Laboratory,0.0,124 +0,Mental,0.0,0 +0,Mental,0.0,4 +0,Mental,0.0,8 +0,Mental,0.0,12 +0,Mental,0.0,16 +0,Mental,0.0,20 +0,Mental,0.0,24 +0,Mental,0.0,28 +0,Mental,0.0,32 +0,Mental,0.0,36 +0,Mental,0.0,40 +0,Mental,0.0,44 +0,Mental,0.0,48 +0,Mental,0.0,52 +0,Mental,0.0,56 +0,Mental,0.0,60 +0,Mental,0.0,64 +0,Mental,0.0,68 +0,Mental,0.0,72 +0,Mental,0.0,76 +0,Mental,0.0,80 +0,Mental,0.0,84 +0,Mental,0.0,88 +0,Mental,0.0,92 +0,Mental,0.0,96 +0,Mental,0.0,100 +0,Mental,0.0,104 +0,Mental,0.0,108 +0,Mental,0.0,112 +0,Mental,0.0,116 +0,Mental,0.0,120 +0,Mental,0.0,124 +0,Nursing_and_Midwifery,0.0,0 +0,Nursing_and_Midwifery,0.0,4 +0,Nursing_and_Midwifery,0.0,8 +0,Nursing_and_Midwifery,0.0,12 +0,Nursing_and_Midwifery,0.0,16 +0,Nursing_and_Midwifery,0.0,20 +0,Nursing_and_Midwifery,0.0,24 +0,Nursing_and_Midwifery,0.0,28 +0,Nursing_and_Midwifery,0.0,32 +0,Nursing_and_Midwifery,0.0,36 +0,Nursing_and_Midwifery,0.0,40 +0,Nursing_and_Midwifery,0.0,44 +0,Nursing_and_Midwifery,0.0,48 +0,Nursing_and_Midwifery,0.0,52 +0,Nursing_and_Midwifery,0.0,56 +0,Nursing_and_Midwifery,0.0,60 +0,Nursing_and_Midwifery,0.0,64 +0,Nursing_and_Midwifery,0.0,68 +0,Nursing_and_Midwifery,0.0,72 +0,Nursing_and_Midwifery,0.0,76 +0,Nursing_and_Midwifery,0.0,80 +0,Nursing_and_Midwifery,0.0,84 +0,Nursing_and_Midwifery,0.0,88 +0,Nursing_and_Midwifery,0.0,92 +0,Nursing_and_Midwifery,0.0,96 +0,Nursing_and_Midwifery,0.0,100 +0,Nursing_and_Midwifery,0.0,104 +0,Nursing_and_Midwifery,0.0,108 +0,Nursing_and_Midwifery,0.0,112 +0,Nursing_and_Midwifery,0.0,116 +0,Nursing_and_Midwifery,0.0,120 +0,Nursing_and_Midwifery,0.0,124 +0,Nutrition,0.0,0 +0,Nutrition,0.0,4 +0,Nutrition,0.0,8 +0,Nutrition,0.0,12 +0,Nutrition,0.0,16 +0,Nutrition,0.0,20 +0,Nutrition,0.0,24 +0,Nutrition,0.0,28 +0,Nutrition,0.0,32 +0,Nutrition,0.0,36 +0,Nutrition,0.0,40 +0,Nutrition,0.0,44 +0,Nutrition,0.0,48 +0,Nutrition,0.0,52 +0,Nutrition,0.0,56 +0,Nutrition,0.0,60 +0,Nutrition,0.0,64 +0,Nutrition,0.0,68 +0,Nutrition,0.0,72 +0,Nutrition,0.0,76 +0,Nutrition,0.0,80 +0,Nutrition,0.0,84 +0,Nutrition,0.0,88 +0,Nutrition,0.0,92 +0,Nutrition,0.0,96 +0,Nutrition,0.0,100 +0,Nutrition,0.0,104 +0,Nutrition,0.0,108 +0,Nutrition,0.0,112 +0,Nutrition,0.0,116 +0,Nutrition,0.0,120 +0,Nutrition,0.0,124 +0,Pharmacy,0.0,0 +0,Pharmacy,0.0,4 +0,Pharmacy,0.0,8 +0,Pharmacy,0.0,12 +0,Pharmacy,0.0,16 +0,Pharmacy,0.0,20 +0,Pharmacy,0.0,24 +0,Pharmacy,0.0,28 +0,Pharmacy,0.0,32 +0,Pharmacy,0.0,36 +0,Pharmacy,0.0,40 +0,Pharmacy,0.0,44 +0,Pharmacy,0.0,48 +0,Pharmacy,0.0,52 +0,Pharmacy,0.0,56 +0,Pharmacy,0.0,60 +0,Pharmacy,0.0,64 +0,Pharmacy,0.0,68 +0,Pharmacy,0.0,72 +0,Pharmacy,0.0,76 +0,Pharmacy,0.0,80 +0,Pharmacy,0.0,84 +0,Pharmacy,0.0,88 +0,Pharmacy,0.0,92 +0,Pharmacy,0.0,96 +0,Pharmacy,0.0,100 +0,Pharmacy,0.0,104 +0,Pharmacy,0.0,108 +0,Pharmacy,0.0,112 +0,Pharmacy,0.0,116 +0,Pharmacy,0.0,120 +0,Pharmacy,0.0,124 +0,Radiography,0.0,0 +0,Radiography,0.0,4 +0,Radiography,0.0,8 +0,Radiography,0.0,12 +0,Radiography,0.0,16 +0,Radiography,0.0,20 +0,Radiography,0.0,24 +0,Radiography,0.0,28 +0,Radiography,0.0,32 +0,Radiography,0.0,36 +0,Radiography,0.0,40 +0,Radiography,0.0,44 +0,Radiography,0.0,48 +0,Radiography,0.0,52 +0,Radiography,0.0,56 +0,Radiography,0.0,60 +0,Radiography,0.0,64 +0,Radiography,0.0,68 +0,Radiography,0.0,72 +0,Radiography,0.0,76 +0,Radiography,0.0,80 +0,Radiography,0.0,84 +0,Radiography,0.0,88 +0,Radiography,0.0,92 +0,Radiography,0.0,96 +0,Radiography,0.0,100 +0,Radiography,0.0,104 +0,Radiography,0.0,108 +0,Radiography,0.0,112 +0,Radiography,0.0,116 +0,Radiography,0.0,120 +0,Radiography,0.0,124 +1a,Clinical,0.07821949136135514,1 +1a,Clinical,0.07821949136135514,5 +1a,Clinical,0.07821949136135514,9 +1a,Clinical,0.07821949136135514,13 +1a,Clinical,0.07821949136135514,17 +1a,Clinical,0.07821949136135514,21 +1a,Clinical,0.07821949136135514,25 +1a,Clinical,0.07821949136135514,29 +1a,Clinical,0.07821949136135514,33 +1a,Clinical,0.07821949136135514,37 +1a,Clinical,0.07821949136135514,41 +1a,Clinical,0.07821949136135514,45 +1a,Clinical,0.07821949136135514,49 +1a,Clinical,0.07821949136135514,53 +1a,Clinical,0.07821949136135514,57 +1a,Clinical,0.07821949136135514,61 +1a,Clinical,0.07821949136135514,65 +1a,Clinical,0.07821949136135514,69 +1a,Clinical,0.07821949136135514,73 +1a,Clinical,0.07821949136135514,77 +1a,Clinical,0.07821949136135514,81 +1a,Clinical,0.07821949136135514,85 +1a,Clinical,0.07821949136135514,89 +1a,Clinical,0.07821949136135514,93 +1a,Clinical,0.07821949136135514,97 +1a,Clinical,0.07821949136135514,101 +1a,Clinical,0.07821949136135514,105 +1a,Clinical,0.07821949136135514,109 +1a,Clinical,0.07821949136135514,113 +1a,Clinical,0.07821949136135514,117 +1a,Clinical,0.07821949136135514,121 +1a,Clinical,0.07821949136135514,125 +1a,DCSA,0.0,1 +1a,DCSA,0.0,5 +1a,DCSA,0.0,9 +1a,DCSA,0.0,13 +1a,DCSA,0.0,17 +1a,DCSA,0.0,21 +1a,DCSA,0.0,25 +1a,DCSA,0.0,29 +1a,DCSA,0.0,33 +1a,DCSA,0.0,37 +1a,DCSA,0.0,41 +1a,DCSA,0.0,45 +1a,DCSA,0.0,49 +1a,DCSA,0.0,53 +1a,DCSA,0.0,57 +1a,DCSA,0.0,61 +1a,DCSA,0.0,65 +1a,DCSA,0.0,69 +1a,DCSA,0.0,73 +1a,DCSA,0.0,77 +1a,DCSA,0.0,81 +1a,DCSA,0.0,85 +1a,DCSA,0.0,89 +1a,DCSA,0.0,93 +1a,DCSA,0.0,97 +1a,DCSA,0.0,101 +1a,DCSA,0.0,105 +1a,DCSA,0.0,109 +1a,DCSA,0.0,113 +1a,DCSA,0.0,117 +1a,DCSA,0.0,121 +1a,DCSA,0.0,125 +1a,Dental,0.07829874543389978,1 +1a,Dental,0.07829874543389978,5 +1a,Dental,0.07829874543389978,9 +1a,Dental,0.07829874543389978,13 +1a,Dental,0.07829874543389978,17 +1a,Dental,0.07829874543389978,21 +1a,Dental,0.07829874543389978,25 +1a,Dental,0.07829874543389978,29 +1a,Dental,0.07829874543389978,33 +1a,Dental,0.07829874543389978,37 +1a,Dental,0.07829874543389978,41 +1a,Dental,0.07829874543389978,45 +1a,Dental,0.07829874543389978,49 +1a,Dental,0.07829874543389978,53 +1a,Dental,0.07829874543389978,57 +1a,Dental,0.07829874543389978,61 +1a,Dental,0.07829874543389978,65 +1a,Dental,0.07829874543389978,69 +1a,Dental,0.07829874543389978,73 +1a,Dental,0.07829874543389978,77 +1a,Dental,0.07829874543389978,81 +1a,Dental,0.07829874543389978,85 +1a,Dental,0.07829874543389978,89 +1a,Dental,0.07829874543389978,93 +1a,Dental,0.07829874543389978,97 +1a,Dental,0.07829874543389978,101 +1a,Dental,0.07829874543389978,105 +1a,Dental,0.07829874543389978,109 +1a,Dental,0.07829874543389978,113 +1a,Dental,0.07829874543389978,117 +1a,Dental,0.07829874543389978,121 +1a,Dental,0.07829874543389978,125 +1a,Laboratory,0.07846168148629579,1 +1a,Laboratory,0.07846168148629579,5 +1a,Laboratory,0.07846168148629579,9 +1a,Laboratory,0.07846168148629579,13 +1a,Laboratory,0.07846168148629579,17 +1a,Laboratory,0.07846168148629579,21 +1a,Laboratory,0.07846168148629579,25 +1a,Laboratory,0.07846168148629579,29 +1a,Laboratory,0.07846168148629579,33 +1a,Laboratory,0.07846168148629579,37 +1a,Laboratory,0.07846168148629579,41 +1a,Laboratory,0.07846168148629579,45 +1a,Laboratory,0.07846168148629579,49 +1a,Laboratory,0.07846168148629579,53 +1a,Laboratory,0.07846168148629579,57 +1a,Laboratory,0.07846168148629579,61 +1a,Laboratory,0.07846168148629579,65 +1a,Laboratory,0.07846168148629579,69 +1a,Laboratory,0.07846168148629579,73 +1a,Laboratory,0.07846168148629579,77 +1a,Laboratory,0.07846168148629579,81 +1a,Laboratory,0.07846168148629579,85 +1a,Laboratory,0.07846168148629579,89 +1a,Laboratory,0.07846168148629579,93 +1a,Laboratory,0.07846168148629579,97 +1a,Laboratory,0.07846168148629579,101 +1a,Laboratory,0.07846168148629579,105 +1a,Laboratory,0.07846168148629579,109 +1a,Laboratory,0.07846168148629579,113 +1a,Laboratory,0.07846168148629579,117 +1a,Laboratory,0.07846168148629579,121 +1a,Laboratory,0.07846168148629579,125 +1a,Mental,0.10685937629466333,1 +1a,Mental,0.10685937629466333,5 +1a,Mental,0.10685937629466333,9 +1a,Mental,0.10685937629466333,13 +1a,Mental,0.10685937629466333,17 +1a,Mental,0.10685937629466333,21 +1a,Mental,0.10685937629466333,25 +1a,Mental,0.10685937629466333,29 +1a,Mental,0.10685937629466333,33 +1a,Mental,0.10685937629466333,37 +1a,Mental,0.10685937629466333,41 +1a,Mental,0.10685937629466333,45 +1a,Mental,0.10685937629466333,49 +1a,Mental,0.10685937629466333,53 +1a,Mental,0.10685937629466333,57 +1a,Mental,0.10685937629466333,61 +1a,Mental,0.10685937629466333,65 +1a,Mental,0.10685937629466333,69 +1a,Mental,0.10685937629466333,73 +1a,Mental,0.10685937629466333,77 +1a,Mental,0.10685937629466333,81 +1a,Mental,0.10685937629466333,85 +1a,Mental,0.10685937629466333,89 +1a,Mental,0.10685937629466333,93 +1a,Mental,0.10685937629466333,97 +1a,Mental,0.10685937629466333,101 +1a,Mental,0.10685937629466333,105 +1a,Mental,0.10685937629466333,109 +1a,Mental,0.10685937629466333,113 +1a,Mental,0.10685937629466333,117 +1a,Mental,0.10685937629466333,121 +1a,Mental,0.10685937629466333,125 +1a,Nursing_and_Midwifery,0.07892770288117427,1 +1a,Nursing_and_Midwifery,0.07892770288117427,5 +1a,Nursing_and_Midwifery,0.07892770288117427,9 +1a,Nursing_and_Midwifery,0.07892770288117427,13 +1a,Nursing_and_Midwifery,0.07892770288117427,17 +1a,Nursing_and_Midwifery,0.07892770288117427,21 +1a,Nursing_and_Midwifery,0.07892770288117427,25 +1a,Nursing_and_Midwifery,0.07892770288117427,29 +1a,Nursing_and_Midwifery,0.07892770288117427,33 +1a,Nursing_and_Midwifery,0.07892770288117427,37 +1a,Nursing_and_Midwifery,0.07892770288117427,41 +1a,Nursing_and_Midwifery,0.07892770288117427,45 +1a,Nursing_and_Midwifery,0.07892770288117427,49 +1a,Nursing_and_Midwifery,0.07892770288117427,53 +1a,Nursing_and_Midwifery,0.07892770288117427,57 +1a,Nursing_and_Midwifery,0.07892770288117427,61 +1a,Nursing_and_Midwifery,0.07892770288117427,65 +1a,Nursing_and_Midwifery,0.07892770288117427,69 +1a,Nursing_and_Midwifery,0.07892770288117427,73 +1a,Nursing_and_Midwifery,0.07892770288117427,77 +1a,Nursing_and_Midwifery,0.07892770288117427,81 +1a,Nursing_and_Midwifery,0.07892770288117427,85 +1a,Nursing_and_Midwifery,0.07892770288117427,89 +1a,Nursing_and_Midwifery,0.07892770288117427,93 +1a,Nursing_and_Midwifery,0.07892770288117427,97 +1a,Nursing_and_Midwifery,0.07892770288117427,101 +1a,Nursing_and_Midwifery,0.07892770288117427,105 +1a,Nursing_and_Midwifery,0.07892770288117427,109 +1a,Nursing_and_Midwifery,0.07892770288117427,113 +1a,Nursing_and_Midwifery,0.07892770288117427,117 +1a,Nursing_and_Midwifery,0.07892770288117427,121 +1a,Nursing_and_Midwifery,0.07892770288117427,125 +1a,Nutrition,0.0639284345009262,1 +1a,Nutrition,0.0639284345009262,5 +1a,Nutrition,0.0639284345009262,9 +1a,Nutrition,0.0639284345009262,13 +1a,Nutrition,0.0639284345009262,17 +1a,Nutrition,0.0639284345009262,21 +1a,Nutrition,0.0639284345009262,25 +1a,Nutrition,0.0639284345009262,29 +1a,Nutrition,0.0639284345009262,33 +1a,Nutrition,0.0639284345009262,37 +1a,Nutrition,0.0639284345009262,41 +1a,Nutrition,0.0639284345009262,45 +1a,Nutrition,0.0639284345009262,49 +1a,Nutrition,0.0639284345009262,53 +1a,Nutrition,0.0639284345009262,57 +1a,Nutrition,0.0639284345009262,61 +1a,Nutrition,0.0639284345009262,65 +1a,Nutrition,0.0639284345009262,69 +1a,Nutrition,0.0639284345009262,73 +1a,Nutrition,0.0639284345009262,77 +1a,Nutrition,0.0639284345009262,81 +1a,Nutrition,0.0639284345009262,85 +1a,Nutrition,0.0639284345009262,89 +1a,Nutrition,0.0639284345009262,93 +1a,Nutrition,0.0639284345009262,97 +1a,Nutrition,0.0639284345009262,101 +1a,Nutrition,0.0639284345009262,105 +1a,Nutrition,0.0639284345009262,109 +1a,Nutrition,0.0639284345009262,113 +1a,Nutrition,0.0639284345009262,117 +1a,Nutrition,0.0639284345009262,121 +1a,Nutrition,0.0639284345009262,125 +1a,Pharmacy,0.06948847292837491,1 +1a,Pharmacy,0.06948847292837491,5 +1a,Pharmacy,0.06948847292837491,9 +1a,Pharmacy,0.06948847292837491,13 +1a,Pharmacy,0.06948847292837491,17 +1a,Pharmacy,0.06948847292837491,21 +1a,Pharmacy,0.06948847292837491,25 +1a,Pharmacy,0.06948847292837491,29 +1a,Pharmacy,0.06948847292837491,33 +1a,Pharmacy,0.06948847292837491,37 +1a,Pharmacy,0.06948847292837491,41 +1a,Pharmacy,0.06948847292837491,45 +1a,Pharmacy,0.06948847292837491,49 +1a,Pharmacy,0.06948847292837491,53 +1a,Pharmacy,0.06948847292837491,57 +1a,Pharmacy,0.06948847292837491,61 +1a,Pharmacy,0.06948847292837491,65 +1a,Pharmacy,0.06948847292837491,69 +1a,Pharmacy,0.06948847292837491,73 +1a,Pharmacy,0.06948847292837491,77 +1a,Pharmacy,0.06948847292837491,81 +1a,Pharmacy,0.06948847292837491,85 +1a,Pharmacy,0.06948847292837491,89 +1a,Pharmacy,0.06948847292837491,93 +1a,Pharmacy,0.06948847292837491,97 +1a,Pharmacy,0.06948847292837491,101 +1a,Pharmacy,0.06948847292837491,105 +1a,Pharmacy,0.06948847292837491,109 +1a,Pharmacy,0.06948847292837491,113 +1a,Pharmacy,0.06948847292837491,117 +1a,Pharmacy,0.06948847292837491,121 +1a,Pharmacy,0.06948847292837491,125 +1a,Radiography,0.08571077482388902,1 +1a,Radiography,0.08571077482388902,5 +1a,Radiography,0.08571077482388902,9 +1a,Radiography,0.08571077482388902,13 +1a,Radiography,0.08571077482388902,17 +1a,Radiography,0.08571077482388902,21 +1a,Radiography,0.08571077482388902,25 +1a,Radiography,0.08571077482388902,29 +1a,Radiography,0.08571077482388902,33 +1a,Radiography,0.08571077482388902,37 +1a,Radiography,0.08571077482388902,41 +1a,Radiography,0.08571077482388902,45 +1a,Radiography,0.08571077482388902,49 +1a,Radiography,0.08571077482388902,53 +1a,Radiography,0.08571077482388902,57 +1a,Radiography,0.08571077482388902,61 +1a,Radiography,0.08571077482388902,65 +1a,Radiography,0.08571077482388902,69 +1a,Radiography,0.08571077482388902,73 +1a,Radiography,0.08571077482388902,77 +1a,Radiography,0.08571077482388902,81 +1a,Radiography,0.08571077482388902,85 +1a,Radiography,0.08571077482388902,89 +1a,Radiography,0.08571077482388902,93 +1a,Radiography,0.08571077482388902,97 +1a,Radiography,0.08571077482388902,101 +1a,Radiography,0.08571077482388902,105 +1a,Radiography,0.08571077482388902,109 +1a,Radiography,0.08571077482388902,113 +1a,Radiography,0.08571077482388902,117 +1a,Radiography,0.08571077482388902,121 +1a,Radiography,0.08571077482388902,125 +1b,Clinical,0.08425025640663661,2 +1b,Clinical,0.08425025640663661,6 +1b,Clinical,0.08425025640663661,10 +1b,Clinical,0.08425025640663661,14 +1b,Clinical,0.08425025640663661,18 +1b,Clinical,0.08425025640663661,22 +1b,Clinical,0.08425025640663661,26 +1b,Clinical,0.08425025640663661,30 +1b,Clinical,0.08425025640663661,34 +1b,Clinical,0.08425025640663661,38 +1b,Clinical,0.08425025640663661,42 +1b,Clinical,0.08425025640663661,46 +1b,Clinical,0.08425025640663661,50 +1b,Clinical,0.08425025640663661,54 +1b,Clinical,0.08425025640663661,58 +1b,Clinical,0.08425025640663661,62 +1b,Clinical,0.08425025640663661,66 +1b,Clinical,0.08425025640663661,70 +1b,Clinical,0.08425025640663661,74 +1b,Clinical,0.08425025640663661,78 +1b,Clinical,0.08425025640663661,82 +1b,Clinical,0.08425025640663661,86 +1b,Clinical,0.08425025640663661,90 +1b,Clinical,0.08425025640663661,94 +1b,Clinical,0.08425025640663661,98 +1b,Clinical,0.08425025640663661,102 +1b,Clinical,0.08425025640663661,106 +1b,Clinical,0.08425025640663661,110 +1b,Clinical,0.08425025640663661,114 +1b,Clinical,0.08425025640663661,118 +1b,Clinical,0.08425025640663661,122 +1b,Clinical,0.08425025640663661,126 +1b,DCSA,0.0,2 +1b,DCSA,0.0,6 +1b,DCSA,0.0,10 +1b,DCSA,0.0,14 +1b,DCSA,0.0,18 +1b,DCSA,0.0,22 +1b,DCSA,0.0,26 +1b,DCSA,0.0,30 +1b,DCSA,0.0,34 +1b,DCSA,0.0,38 +1b,DCSA,0.0,42 +1b,DCSA,0.0,46 +1b,DCSA,0.0,50 +1b,DCSA,0.0,54 +1b,DCSA,0.0,58 +1b,DCSA,0.0,62 +1b,DCSA,0.0,66 +1b,DCSA,0.0,70 +1b,DCSA,0.0,74 +1b,DCSA,0.0,78 +1b,DCSA,0.0,82 +1b,DCSA,0.0,86 +1b,DCSA,0.0,90 +1b,DCSA,0.0,94 +1b,DCSA,0.0,98 +1b,DCSA,0.0,102 +1b,DCSA,0.0,106 +1b,DCSA,0.0,110 +1b,DCSA,0.0,114 +1b,DCSA,0.0,118 +1b,DCSA,0.0,122 +1b,DCSA,0.0,126 +1b,Dental,0.07829874543389978,2 +1b,Dental,0.07829874543389978,6 +1b,Dental,0.07829874543389978,10 +1b,Dental,0.07829874543389978,14 +1b,Dental,0.07829874543389978,18 +1b,Dental,0.07829874543389978,22 +1b,Dental,0.07829874543389978,26 +1b,Dental,0.07829874543389978,30 +1b,Dental,0.07829874543389978,34 +1b,Dental,0.07829874543389978,38 +1b,Dental,0.07829874543389978,42 +1b,Dental,0.07829874543389978,46 +1b,Dental,0.07829874543389978,50 +1b,Dental,0.07829874543389978,54 +1b,Dental,0.07829874543389978,58 +1b,Dental,0.07829874543389978,62 +1b,Dental,0.07829874543389978,66 +1b,Dental,0.07829874543389978,70 +1b,Dental,0.07829874543389978,74 +1b,Dental,0.07829874543389978,78 +1b,Dental,0.07829874543389978,82 +1b,Dental,0.07829874543389978,86 +1b,Dental,0.07829874543389978,90 +1b,Dental,0.07829874543389978,94 +1b,Dental,0.07829874543389978,98 +1b,Dental,0.07829874543389978,102 +1b,Dental,0.07829874543389978,106 +1b,Dental,0.07829874543389978,110 +1b,Dental,0.07829874543389978,114 +1b,Dental,0.07829874543389978,118 +1b,Dental,0.07829874543389978,122 +1b,Dental,0.07829874543389978,126 +1b,Laboratory,0.07011174614698786,2 +1b,Laboratory,0.07011174614698786,6 +1b,Laboratory,0.07011174614698786,10 +1b,Laboratory,0.07011174614698786,14 +1b,Laboratory,0.07011174614698786,18 +1b,Laboratory,0.07011174614698786,22 +1b,Laboratory,0.07011174614698786,26 +1b,Laboratory,0.07011174614698786,30 +1b,Laboratory,0.07011174614698786,34 +1b,Laboratory,0.07011174614698786,38 +1b,Laboratory,0.07011174614698786,42 +1b,Laboratory,0.07011174614698786,46 +1b,Laboratory,0.07011174614698786,50 +1b,Laboratory,0.07011174614698786,54 +1b,Laboratory,0.07011174614698786,58 +1b,Laboratory,0.07011174614698786,62 +1b,Laboratory,0.07011174614698786,66 +1b,Laboratory,0.07011174614698786,70 +1b,Laboratory,0.07011174614698786,74 +1b,Laboratory,0.07011174614698786,78 +1b,Laboratory,0.07011174614698786,82 +1b,Laboratory,0.07011174614698786,86 +1b,Laboratory,0.07011174614698786,90 +1b,Laboratory,0.07011174614698786,94 +1b,Laboratory,0.07011174614698786,98 +1b,Laboratory,0.07011174614698786,102 +1b,Laboratory,0.07011174614698786,106 +1b,Laboratory,0.07011174614698786,110 +1b,Laboratory,0.07011174614698786,114 +1b,Laboratory,0.07011174614698786,118 +1b,Laboratory,0.07011174614698786,122 +1b,Laboratory,0.07011174614698786,126 +1b,Mental,0.10685937629466333,2 +1b,Mental,0.10685937629466333,6 +1b,Mental,0.10685937629466333,10 +1b,Mental,0.10685937629466333,14 +1b,Mental,0.10685937629466333,18 +1b,Mental,0.10685937629466333,22 +1b,Mental,0.10685937629466333,26 +1b,Mental,0.10685937629466333,30 +1b,Mental,0.10685937629466333,34 +1b,Mental,0.10685937629466333,38 +1b,Mental,0.10685937629466333,42 +1b,Mental,0.10685937629466333,46 +1b,Mental,0.10685937629466333,50 +1b,Mental,0.10685937629466333,54 +1b,Mental,0.10685937629466333,58 +1b,Mental,0.10685937629466333,62 +1b,Mental,0.10685937629466333,66 +1b,Mental,0.10685937629466333,70 +1b,Mental,0.10685937629466333,74 +1b,Mental,0.10685937629466333,78 +1b,Mental,0.10685937629466333,82 +1b,Mental,0.10685937629466333,86 +1b,Mental,0.10685937629466333,90 +1b,Mental,0.10685937629466333,94 +1b,Mental,0.10685937629466333,98 +1b,Mental,0.10685937629466333,102 +1b,Mental,0.10685937629466333,106 +1b,Mental,0.10685937629466333,110 +1b,Mental,0.10685937629466333,114 +1b,Mental,0.10685937629466333,118 +1b,Mental,0.10685937629466333,122 +1b,Mental,0.10685937629466333,126 +1b,Nursing_and_Midwifery,0.08303644727374786,2 +1b,Nursing_and_Midwifery,0.08303644727374786,6 +1b,Nursing_and_Midwifery,0.08303644727374786,10 +1b,Nursing_and_Midwifery,0.08303644727374786,14 +1b,Nursing_and_Midwifery,0.08303644727374786,18 +1b,Nursing_and_Midwifery,0.08303644727374786,22 +1b,Nursing_and_Midwifery,0.08303644727374786,26 +1b,Nursing_and_Midwifery,0.08303644727374786,30 +1b,Nursing_and_Midwifery,0.08303644727374786,34 +1b,Nursing_and_Midwifery,0.08303644727374786,38 +1b,Nursing_and_Midwifery,0.08303644727374786,42 +1b,Nursing_and_Midwifery,0.08303644727374786,46 +1b,Nursing_and_Midwifery,0.08303644727374786,50 +1b,Nursing_and_Midwifery,0.08303644727374786,54 +1b,Nursing_and_Midwifery,0.08303644727374786,58 +1b,Nursing_and_Midwifery,0.08303644727374786,62 +1b,Nursing_and_Midwifery,0.08303644727374786,66 +1b,Nursing_and_Midwifery,0.08303644727374786,70 +1b,Nursing_and_Midwifery,0.08303644727374786,74 +1b,Nursing_and_Midwifery,0.08303644727374786,78 +1b,Nursing_and_Midwifery,0.08303644727374786,82 +1b,Nursing_and_Midwifery,0.08303644727374786,86 +1b,Nursing_and_Midwifery,0.08303644727374786,90 +1b,Nursing_and_Midwifery,0.08303644727374786,94 +1b,Nursing_and_Midwifery,0.08303644727374786,98 +1b,Nursing_and_Midwifery,0.08303644727374786,102 +1b,Nursing_and_Midwifery,0.08303644727374786,106 +1b,Nursing_and_Midwifery,0.08303644727374786,110 +1b,Nursing_and_Midwifery,0.08303644727374786,114 +1b,Nursing_and_Midwifery,0.08303644727374786,118 +1b,Nursing_and_Midwifery,0.08303644727374786,122 +1b,Nursing_and_Midwifery,0.08303644727374786,126 +1b,Nutrition,0.0639284345009262,2 +1b,Nutrition,0.0639284345009262,6 +1b,Nutrition,0.0639284345009262,10 +1b,Nutrition,0.0639284345009262,14 +1b,Nutrition,0.0639284345009262,18 +1b,Nutrition,0.0639284345009262,22 +1b,Nutrition,0.0639284345009262,26 +1b,Nutrition,0.0639284345009262,30 +1b,Nutrition,0.0639284345009262,34 +1b,Nutrition,0.0639284345009262,38 +1b,Nutrition,0.0639284345009262,42 +1b,Nutrition,0.0639284345009262,46 +1b,Nutrition,0.0639284345009262,50 +1b,Nutrition,0.0639284345009262,54 +1b,Nutrition,0.0639284345009262,58 +1b,Nutrition,0.0639284345009262,62 +1b,Nutrition,0.0639284345009262,66 +1b,Nutrition,0.0639284345009262,70 +1b,Nutrition,0.0639284345009262,74 +1b,Nutrition,0.0639284345009262,78 +1b,Nutrition,0.0639284345009262,82 +1b,Nutrition,0.0639284345009262,86 +1b,Nutrition,0.0639284345009262,90 +1b,Nutrition,0.0639284345009262,94 +1b,Nutrition,0.0639284345009262,98 +1b,Nutrition,0.0639284345009262,102 +1b,Nutrition,0.0639284345009262,106 +1b,Nutrition,0.0639284345009262,110 +1b,Nutrition,0.0639284345009262,114 +1b,Nutrition,0.0639284345009262,118 +1b,Nutrition,0.0639284345009262,122 +1b,Nutrition,0.0639284345009262,126 +1b,Pharmacy,0.06547484875642307,2 +1b,Pharmacy,0.06547484875642307,6 +1b,Pharmacy,0.06547484875642307,10 +1b,Pharmacy,0.06547484875642307,14 +1b,Pharmacy,0.06547484875642307,18 +1b,Pharmacy,0.06547484875642307,22 +1b,Pharmacy,0.06547484875642307,26 +1b,Pharmacy,0.06547484875642307,30 +1b,Pharmacy,0.06547484875642307,34 +1b,Pharmacy,0.06547484875642307,38 +1b,Pharmacy,0.06547484875642307,42 +1b,Pharmacy,0.06547484875642307,46 +1b,Pharmacy,0.06547484875642307,50 +1b,Pharmacy,0.06547484875642307,54 +1b,Pharmacy,0.06547484875642307,58 +1b,Pharmacy,0.06547484875642307,62 +1b,Pharmacy,0.06547484875642307,66 +1b,Pharmacy,0.06547484875642307,70 +1b,Pharmacy,0.06547484875642307,74 +1b,Pharmacy,0.06547484875642307,78 +1b,Pharmacy,0.06547484875642307,82 +1b,Pharmacy,0.06547484875642307,86 +1b,Pharmacy,0.06547484875642307,90 +1b,Pharmacy,0.06547484875642307,94 +1b,Pharmacy,0.06547484875642307,98 +1b,Pharmacy,0.06547484875642307,102 +1b,Pharmacy,0.06547484875642307,106 +1b,Pharmacy,0.06547484875642307,110 +1b,Pharmacy,0.06547484875642307,114 +1b,Pharmacy,0.06547484875642307,118 +1b,Pharmacy,0.06547484875642307,122 +1b,Pharmacy,0.06547484875642307,126 +1b,Radiography,0.08371257486853223,2 +1b,Radiography,0.08371257486853223,6 +1b,Radiography,0.08371257486853223,10 +1b,Radiography,0.08371257486853223,14 +1b,Radiography,0.08371257486853223,18 +1b,Radiography,0.08371257486853223,22 +1b,Radiography,0.08371257486853223,26 +1b,Radiography,0.08371257486853223,30 +1b,Radiography,0.08371257486853223,34 +1b,Radiography,0.08371257486853223,38 +1b,Radiography,0.08371257486853223,42 +1b,Radiography,0.08371257486853223,46 +1b,Radiography,0.08371257486853223,50 +1b,Radiography,0.08371257486853223,54 +1b,Radiography,0.08371257486853223,58 +1b,Radiography,0.08371257486853223,62 +1b,Radiography,0.08371257486853223,66 +1b,Radiography,0.08371257486853223,70 +1b,Radiography,0.08371257486853223,74 +1b,Radiography,0.08371257486853223,78 +1b,Radiography,0.08371257486853223,82 +1b,Radiography,0.08371257486853223,86 +1b,Radiography,0.08371257486853223,90 +1b,Radiography,0.08371257486853223,94 +1b,Radiography,0.08371257486853223,98 +1b,Radiography,0.08371257486853223,102 +1b,Radiography,0.08371257486853223,106 +1b,Radiography,0.08371257486853223,110 +1b,Radiography,0.08371257486853223,114 +1b,Radiography,0.08371257486853223,118 +1b,Radiography,0.08371257486853223,122 +1b,Radiography,0.08371257486853223,126 +2,Clinical,0.09007068625805861,3 +2,Clinical,0.09007068625805861,7 +2,Clinical,0.09007068625805861,11 +2,Clinical,0.09007068625805861,15 +2,Clinical,0.09007068625805861,19 +2,Clinical,0.09007068625805861,23 +2,Clinical,0.09007068625805861,27 +2,Clinical,0.09007068625805861,31 +2,Clinical,0.09007068625805861,35 +2,Clinical,0.09007068625805861,39 +2,Clinical,0.09007068625805861,43 +2,Clinical,0.09007068625805861,47 +2,Clinical,0.09007068625805861,51 +2,Clinical,0.09007068625805861,55 +2,Clinical,0.09007068625805861,59 +2,Clinical,0.09007068625805861,63 +2,Clinical,0.09007068625805861,67 +2,Clinical,0.09007068625805861,71 +2,Clinical,0.09007068625805861,75 +2,Clinical,0.09007068625805861,79 +2,Clinical,0.09007068625805861,83 +2,Clinical,0.09007068625805861,87 +2,Clinical,0.09007068625805861,91 +2,Clinical,0.09007068625805861,95 +2,Clinical,0.09007068625805861,99 +2,Clinical,0.09007068625805861,103 +2,Clinical,0.09007068625805861,107 +2,Clinical,0.09007068625805861,111 +2,Clinical,0.09007068625805861,115 +2,Clinical,0.09007068625805861,119 +2,Clinical,0.09007068625805861,123 +2,Clinical,0.09007068625805861,127 +2,DCSA,0.0,3 +2,DCSA,0.0,7 +2,DCSA,0.0,11 +2,DCSA,0.0,15 +2,DCSA,0.0,19 +2,DCSA,0.0,23 +2,DCSA,0.0,27 +2,DCSA,0.0,31 +2,DCSA,0.0,35 +2,DCSA,0.0,39 +2,DCSA,0.0,43 +2,DCSA,0.0,47 +2,DCSA,0.0,51 +2,DCSA,0.0,55 +2,DCSA,0.0,59 +2,DCSA,0.0,63 +2,DCSA,0.0,67 +2,DCSA,0.0,71 +2,DCSA,0.0,75 +2,DCSA,0.0,79 +2,DCSA,0.0,83 +2,DCSA,0.0,87 +2,DCSA,0.0,91 +2,DCSA,0.0,95 +2,DCSA,0.0,99 +2,DCSA,0.0,103 +2,DCSA,0.0,107 +2,DCSA,0.0,111 +2,DCSA,0.0,115 +2,DCSA,0.0,119 +2,DCSA,0.0,123 +2,DCSA,0.0,127 +2,Dental,0.07829874543389978,3 +2,Dental,0.07829874543389978,7 +2,Dental,0.07829874543389978,11 +2,Dental,0.07829874543389978,15 +2,Dental,0.07829874543389978,19 +2,Dental,0.07829874543389978,23 +2,Dental,0.07829874543389978,27 +2,Dental,0.07829874543389978,31 +2,Dental,0.07829874543389978,35 +2,Dental,0.07829874543389978,39 +2,Dental,0.07829874543389978,43 +2,Dental,0.07829874543389978,47 +2,Dental,0.07829874543389978,51 +2,Dental,0.07829874543389978,55 +2,Dental,0.07829874543389978,59 +2,Dental,0.07829874543389978,63 +2,Dental,0.07829874543389978,67 +2,Dental,0.07829874543389978,71 +2,Dental,0.07829874543389978,75 +2,Dental,0.07829874543389978,79 +2,Dental,0.07829874543389978,83 +2,Dental,0.07829874543389978,87 +2,Dental,0.07829874543389978,91 +2,Dental,0.07829874543389978,95 +2,Dental,0.07829874543389978,99 +2,Dental,0.07829874543389978,103 +2,Dental,0.07829874543389978,107 +2,Dental,0.07829874543389978,111 +2,Dental,0.07829874543389978,115 +2,Dental,0.07829874543389978,119 +2,Dental,0.07829874543389978,123 +2,Dental,0.07829874543389978,127 +2,Laboratory,0.07387026493555247,3 +2,Laboratory,0.07387026493555247,7 +2,Laboratory,0.07387026493555247,11 +2,Laboratory,0.07387026493555247,15 +2,Laboratory,0.07387026493555247,19 +2,Laboratory,0.07387026493555247,23 +2,Laboratory,0.07387026493555247,27 +2,Laboratory,0.07387026493555247,31 +2,Laboratory,0.07387026493555247,35 +2,Laboratory,0.07387026493555247,39 +2,Laboratory,0.07387026493555247,43 +2,Laboratory,0.07387026493555247,47 +2,Laboratory,0.07387026493555247,51 +2,Laboratory,0.07387026493555247,55 +2,Laboratory,0.07387026493555247,59 +2,Laboratory,0.07387026493555247,63 +2,Laboratory,0.07387026493555247,67 +2,Laboratory,0.07387026493555247,71 +2,Laboratory,0.07387026493555247,75 +2,Laboratory,0.07387026493555247,79 +2,Laboratory,0.07387026493555247,83 +2,Laboratory,0.07387026493555247,87 +2,Laboratory,0.07387026493555247,91 +2,Laboratory,0.07387026493555247,95 +2,Laboratory,0.07387026493555247,99 +2,Laboratory,0.07387026493555247,103 +2,Laboratory,0.07387026493555247,107 +2,Laboratory,0.07387026493555247,111 +2,Laboratory,0.07387026493555247,115 +2,Laboratory,0.07387026493555247,119 +2,Laboratory,0.07387026493555247,123 +2,Laboratory,0.07387026493555247,127 +2,Mental,0.10685937629466331,3 +2,Mental,0.10685937629466331,7 +2,Mental,0.10685937629466331,11 +2,Mental,0.10685937629466331,15 +2,Mental,0.10685937629466331,19 +2,Mental,0.10685937629466331,23 +2,Mental,0.10685937629466331,27 +2,Mental,0.10685937629466331,31 +2,Mental,0.10685937629466331,35 +2,Mental,0.10685937629466331,39 +2,Mental,0.10685937629466331,43 +2,Mental,0.10685937629466331,47 +2,Mental,0.10685937629466331,51 +2,Mental,0.10685937629466331,55 +2,Mental,0.10685937629466331,59 +2,Mental,0.10685937629466331,63 +2,Mental,0.10685937629466331,67 +2,Mental,0.10685937629466331,71 +2,Mental,0.10685937629466331,75 +2,Mental,0.10685937629466331,79 +2,Mental,0.10685937629466331,83 +2,Mental,0.10685937629466331,87 +2,Mental,0.10685937629466331,91 +2,Mental,0.10685937629466331,95 +2,Mental,0.10685937629466331,99 +2,Mental,0.10685937629466331,103 +2,Mental,0.10685937629466331,107 +2,Mental,0.10685937629466331,111 +2,Mental,0.10685937629466331,115 +2,Mental,0.10685937629466331,119 +2,Mental,0.10685937629466331,123 +2,Mental,0.10685937629466331,127 +2,Nursing_and_Midwifery,0.0807874215347857,3 +2,Nursing_and_Midwifery,0.0807874215347857,7 +2,Nursing_and_Midwifery,0.0807874215347857,11 +2,Nursing_and_Midwifery,0.0807874215347857,15 +2,Nursing_and_Midwifery,0.0807874215347857,19 +2,Nursing_and_Midwifery,0.0807874215347857,23 +2,Nursing_and_Midwifery,0.0807874215347857,27 +2,Nursing_and_Midwifery,0.0807874215347857,31 +2,Nursing_and_Midwifery,0.0807874215347857,35 +2,Nursing_and_Midwifery,0.0807874215347857,39 +2,Nursing_and_Midwifery,0.0807874215347857,43 +2,Nursing_and_Midwifery,0.0807874215347857,47 +2,Nursing_and_Midwifery,0.0807874215347857,51 +2,Nursing_and_Midwifery,0.0807874215347857,55 +2,Nursing_and_Midwifery,0.0807874215347857,59 +2,Nursing_and_Midwifery,0.0807874215347857,63 +2,Nursing_and_Midwifery,0.0807874215347857,67 +2,Nursing_and_Midwifery,0.0807874215347857,71 +2,Nursing_and_Midwifery,0.0807874215347857,75 +2,Nursing_and_Midwifery,0.0807874215347857,79 +2,Nursing_and_Midwifery,0.0807874215347857,83 +2,Nursing_and_Midwifery,0.0807874215347857,87 +2,Nursing_and_Midwifery,0.0807874215347857,91 +2,Nursing_and_Midwifery,0.0807874215347857,95 +2,Nursing_and_Midwifery,0.0807874215347857,99 +2,Nursing_and_Midwifery,0.0807874215347857,103 +2,Nursing_and_Midwifery,0.0807874215347857,107 +2,Nursing_and_Midwifery,0.0807874215347857,111 +2,Nursing_and_Midwifery,0.0807874215347857,115 +2,Nursing_and_Midwifery,0.0807874215347857,119 +2,Nursing_and_Midwifery,0.0807874215347857,123 +2,Nursing_and_Midwifery,0.0807874215347857,127 +2,Nutrition,0.0639284345009262,3 +2,Nutrition,0.0639284345009262,7 +2,Nutrition,0.0639284345009262,11 +2,Nutrition,0.0639284345009262,15 +2,Nutrition,0.0639284345009262,19 +2,Nutrition,0.0639284345009262,23 +2,Nutrition,0.0639284345009262,27 +2,Nutrition,0.0639284345009262,31 +2,Nutrition,0.0639284345009262,35 +2,Nutrition,0.0639284345009262,39 +2,Nutrition,0.0639284345009262,43 +2,Nutrition,0.0639284345009262,47 +2,Nutrition,0.0639284345009262,51 +2,Nutrition,0.0639284345009262,55 +2,Nutrition,0.0639284345009262,59 +2,Nutrition,0.0639284345009262,63 +2,Nutrition,0.0639284345009262,67 +2,Nutrition,0.0639284345009262,71 +2,Nutrition,0.0639284345009262,75 +2,Nutrition,0.0639284345009262,79 +2,Nutrition,0.0639284345009262,83 +2,Nutrition,0.0639284345009262,87 +2,Nutrition,0.0639284345009262,91 +2,Nutrition,0.0639284345009262,95 +2,Nutrition,0.0639284345009262,99 +2,Nutrition,0.0639284345009262,103 +2,Nutrition,0.0639284345009262,107 +2,Nutrition,0.0639284345009262,111 +2,Nutrition,0.0639284345009262,115 +2,Nutrition,0.0639284345009262,119 +2,Nutrition,0.0639284345009262,123 +2,Nutrition,0.0639284345009262,127 +2,Pharmacy,0.07361036959357388,3 +2,Pharmacy,0.07361036959357388,7 +2,Pharmacy,0.07361036959357388,11 +2,Pharmacy,0.07361036959357388,15 +2,Pharmacy,0.07361036959357388,19 +2,Pharmacy,0.07361036959357388,23 +2,Pharmacy,0.07361036959357388,27 +2,Pharmacy,0.07361036959357388,31 +2,Pharmacy,0.07361036959357388,35 +2,Pharmacy,0.07361036959357388,39 +2,Pharmacy,0.07361036959357388,43 +2,Pharmacy,0.07361036959357388,47 +2,Pharmacy,0.07361036959357388,51 +2,Pharmacy,0.07361036959357388,55 +2,Pharmacy,0.07361036959357388,59 +2,Pharmacy,0.07361036959357388,63 +2,Pharmacy,0.07361036959357388,67 +2,Pharmacy,0.07361036959357388,71 +2,Pharmacy,0.07361036959357388,75 +2,Pharmacy,0.07361036959357388,79 +2,Pharmacy,0.07361036959357388,83 +2,Pharmacy,0.07361036959357388,87 +2,Pharmacy,0.07361036959357388,91 +2,Pharmacy,0.07361036959357388,95 +2,Pharmacy,0.07361036959357388,99 +2,Pharmacy,0.07361036959357388,103 +2,Pharmacy,0.07361036959357388,107 +2,Pharmacy,0.07361036959357388,111 +2,Pharmacy,0.07361036959357388,115 +2,Pharmacy,0.07361036959357388,119 +2,Pharmacy,0.07361036959357388,123 +2,Pharmacy,0.07361036959357388,127 +2,Radiography,0.08494872731157552,3 +2,Radiography,0.08494872731157552,7 +2,Radiography,0.08494872731157552,11 +2,Radiography,0.08494872731157552,15 +2,Radiography,0.08494872731157552,19 +2,Radiography,0.08494872731157552,23 +2,Radiography,0.08494872731157552,27 +2,Radiography,0.08494872731157552,31 +2,Radiography,0.08494872731157552,35 +2,Radiography,0.08494872731157552,39 +2,Radiography,0.08494872731157552,43 +2,Radiography,0.08494872731157552,47 +2,Radiography,0.08494872731157552,51 +2,Radiography,0.08494872731157552,55 +2,Radiography,0.08494872731157552,59 +2,Radiography,0.08494872731157552,63 +2,Radiography,0.08494872731157552,67 +2,Radiography,0.08494872731157552,71 +2,Radiography,0.08494872731157552,75 +2,Radiography,0.08494872731157552,79 +2,Radiography,0.08494872731157552,83 +2,Radiography,0.08494872731157552,87 +2,Radiography,0.08494872731157552,91 +2,Radiography,0.08494872731157552,95 +2,Radiography,0.08494872731157552,99 +2,Radiography,0.08494872731157552,103 +2,Radiography,0.08494872731157552,107 +2,Radiography,0.08494872731157552,111 +2,Radiography,0.08494872731157552,115 +2,Radiography,0.08494872731157552,119 +2,Radiography,0.08494872731157552,123 +2,Radiography,0.08494872731157552,127 +3,Clinical,0.10529866543952451,128 +3,Clinical,0.10529866543952451,129 +3,Clinical,0.10529866543952451,130 +3,DCSA,0.0,128 +3,DCSA,0.0,129 +3,DCSA,0.0,130 +3,Dental,0.07829874543389978,128 +3,Dental,0.07829874543389978,129 +3,Dental,0.07829874543389978,130 +3,Laboratory,0.0738347576509901,128 +3,Laboratory,0.0738347576509901,129 +3,Laboratory,0.0738347576509901,130 +3,Mental,0.10685937629466333,128 +3,Mental,0.10685937629466333,129 +3,Mental,0.10685937629466333,130 +3,Nursing_and_Midwifery,0.08186628595521346,128 +3,Nursing_and_Midwifery,0.08186628595521346,129 +3,Nursing_and_Midwifery,0.08186628595521346,130 +3,Nutrition,0.0639284345009262,128 +3,Nutrition,0.0639284345009262,129 +3,Nutrition,0.0639284345009262,130 +3,Pharmacy,0.07357821789389614,128 +3,Pharmacy,0.07357821789389614,129 +3,Pharmacy,0.07357821789389614,130 +3,Radiography,0.08448172509117155,128 +3,Radiography,0.08448172509117155,129 +3,Radiography,0.08448172509117155,130 +4,Clinical,0.09878735968086705,131 +4,DCSA,0.0,131 +4,Dental,0.0,131 +4,Laboratory,0.0,131 +4,Mental,0.10685937629466331,131 +4,Nursing_and_Midwifery,0.0811022714277672,131 +4,Nutrition,0.0,131 +4,Pharmacy,0.07364430296692356,131 +4,Radiography,0.0,131 +5,Clinical,0.10583786289783911,132 +5,DCSA,0.0,132 +5,Dental,0.0,132 +5,Laboratory,0.07383396219479044,132 +5,Mental,0.0,132 +5,Nursing_and_Midwifery,0.09092392823473701,132 +5,Nutrition,0.0639284345009262,132 +5,Pharmacy,0.07358379192020066,132 +5,Radiography,0.08207874817927362,132 diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py index 73028f42c5..a71d52db47 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py @@ -317,11 +317,6 @@ def apply( dalys = compute_summary_statistics(dalys, central_measure='median') - # This gives us the capacity used for each cadre and level, for each draw and run - # From this we will extract the run-wise delta in capacity used relative to the Nothing scenario, for each cadre - # and summarise. However since no HSIs are delivered in the Nothing scenario, the capacity used in that scenario is zero, - # so the delta relative to Nothing is just the capacity used in each scenario. - # TODO: Check if this should be scaled with population or used as is. # Note that Capacity_By_FacID_and_Officer logs the fraction of time used per officer type; not the absolute time used. # To get the actual minutes, we need to multiply by the total available minutes. annual_capacity_used_by_cadre_and_level = extract_results( @@ -329,22 +324,15 @@ def apply( module='tlo.methods.healthsystem.summary', key='Capacity_By_FacID_and_Officer', custom_generate_series=lambda df: get_capacity_used_by_officer_type_and_facility_level(df, facility_id_levels_dict), - do_scaling=True, + do_scaling=False, # Not scaling as this is a fraction calculated during runtime. autodiscover=True, - ) - # Sum across all facility levels and years; so we get the *total* capacity used over the whole period - # TODO: Check with Sakshi if this is what we want. - mask = annual_capacity_used_by_cadre_and_level.index.get_level_values(0).isin(range(2026, 2040)) - capacity_used_by_cadre = ( - annual_capacity_used_by_cadre_and_level[mask].groupby(['OfficerType']). - sum(). - pipe(set_param_names_as_column_index_level_0, param_names=param_names) - ) + ).pipe(set_param_names_as_column_index_level_0, param_names=param_names) - capacity_used_by_cadre = ( - compute_summary_statistics(capacity_used_by_cadre, central_measure='median') + results['annual_capacity_used_by_cadre_and_level'] = ( + compute_summary_statistics(annual_capacity_used_by_cadre_and_level, 'median') ) + # Get the total available caapacity by cadre needed for LCOA # resources/healthsystem/human_resources/actual/ResourceFile_Daily_Capabilities.csv daily_capacity_by_cadre_and_level = ( @@ -353,23 +341,58 @@ def apply( # This gives the total minutes available per day by cadre and facility level. # Sum across levels to get cadre specific constraints, and multiply by 365 to get annual capacity # and then by the length of the period - annual_capacity_by_cadre = ( - daily_capacity_by_cadre_and_level.groupby('Officer_Category')['Total_Mins_Per_Day'].sum() * 365 * 15 + annual_capacity_available_by_cadre_and_level = ( + daily_capacity_by_cadre_and_level.groupby(['Officer_Category', 'Facility_Level'])['Total_Mins_Per_Day'].sum() * 365 + ) + + annual_capacity_available_by_cadre_and_level.index = ( + annual_capacity_available_by_cadre_and_level.index.rename(['OfficerType', 'FacilityLevel']) ) + # Now we use the fraction of time used from the results to get the actual minutes used + series_reindexed = annual_capacity_available_by_cadre_and_level.reindex( + annual_capacity_used_by_cadre_and_level.index.droplevel('year') + ) + + series_reindexed.index = annual_capacity_used_by_cadre_and_level.index + + actual_capacity_used_by_cadre_and_level = annual_capacity_used_by_cadre_and_level.mul(series_reindexed, axis=0) + results['actual_annual_capacity_by_cadre'] = compute_summary_statistics(actual_capacity_used_by_cadre_and_level, 'median') + # Load the salary per minutes by cadre and facility level + # ResourceFile_Minute_Salary_HR.csv is produced using Bingling's code, + # except that she was dropping Facility_Level, which I have retained. + salary_by_cadre_and_level = ( + pd.read_csv(resourcefilepath / "costing" / "ResourceFile_Minute_Salary_HR.csv") + ) + salary_by_cadre_and_level =( + salary_by_cadre_and_level + .groupby(['Facility_Level', 'Officer_Type_Code'])['Minute_Salary_USD'] + .mean() + .rename_axis(index={'Facility_Level': 'FacilityLevel', 'Officer_Type_Code': 'OfficerType'}) + ).reorder_levels(['OfficerType', 'FacilityLevel']) + # Multiply actual minutes used by the salary per minute to get the total + # salary cost by cadre and facility level + series_reindexed = salary_by_cadre_and_level.reindex( + actual_capacity_used_by_cadre_and_level.index.droplevel('year') + ) + + series_reindexed.index = actual_capacity_used_by_cadre_and_level.index + + actual_cost_by_cadre_and_level = ( + actual_capacity_used_by_cadre_and_level.mul(series_reindexed, axis=0) + ) + + # Finally we need it by cadre, so we sum across facility levels and then + # summarise + actual_cost_by_cadre_and_level = ( + actual_cost_by_cadre_and_level.groupby(['year', 'OfficerType']).sum() + ) + results['annual_cost_by_cadre'] = compute_summary_statistics(actual_cost_by_cadre_and_level, 'median') + staff_count_by_cadre = ( daily_capacity_by_cadre_and_level.groupby('Officer_Category')['Staff_Count'].sum() ) - # Proportion of capacity used by year and cadre, relative to the total available capacity by cadre - proportion_capacity_used_by_cadre = ( - annual_capacity_used_by_cadre_and_level[mask].groupby(['OfficerType', 'year']). - sum(). - pipe(set_param_names_as_column_index_level_0, param_names=param_names) - ).div(annual_capacity_by_cadre / 15, axis=0, level=0) - proportion_capacity_used_by_cadre = compute_summary_statistics(proportion_capacity_used_by_cadre, central_measure='median') - results['proportion_capacity_used_by_cadre'] = proportion_capacity_used_by_cadre - # Add consumables budget to this dictionary so that we have everything in one place # USD 225,602,946 (203136642 from donors + 22466304 from the government) # Revision of Malawi’s Health Benefits Package: A Critical Analysis of Policy Formulation and Implementation @@ -381,8 +404,6 @@ def apply( results['pc_dalys_averted'] = pc_dalys_averted if do_comparison else None results['icers_summarized'] = icers_summarized if do_comparison else None results['incremental_scenario_cost'] = incremental_scenario_cost_summarized if do_comparison else None - results['capacity_used_by_cadre'] = capacity_used_by_cadre - results['annual_capacity_by_cadre'] = annual_capacity_by_cadre results['staff_count_by_cadre'] = staff_count_by_cadre # Extract DALYs and costs from the LCOA input workbook (EHP_BasedOnLCOA sheet). diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/calculate_staff_salary_per_minute.py b/src/scripts/lcoa_inputs_from_tlo_analyses/calculate_staff_salary_per_minute.py new file mode 100644 index 0000000000..5327be6abd --- /dev/null +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/calculate_staff_salary_per_minute.py @@ -0,0 +1,55 @@ +""" +We calculate the salary cost of current and funded plus HCW. +""" +import itertools +from pathlib import Path + +import numpy as np +import pandas as pd + +resourcefilepath = Path('./resources') + +mfl = pd.read_csv(resourcefilepath / 'healthsystem' / 'organisation' / 'ResourceFile_Master_Facilities_List.csv') + +hr_salary = pd.read_csv(resourcefilepath / + 'costing' / 'ResourceFile_Annual_Salary_Per_Cadre.csv', index_col=False) +# hr_salary_per_level = pd.read_excel(resourcefilepath / +# 'costing' / 'ResourceFile_Costing.xlsx', sheet_name='human_resources') +# as of 2019 +hr_current = pd.read_csv(resourcefilepath / + 'healthsystem' / 'human_resources' / 'actual' / 'ResourceFile_Daily_Capabilities.csv') +hr_established = pd.read_csv(resourcefilepath / + 'healthsystem' / 'human_resources' / 'funded_plus' / 'ResourceFile_Daily_Capabilities.csv') +# for 2020-2024 +historical_scaling = pd.read_csv(resourcefilepath / + 'healthsystem' / 'human_resources' / 'scaling_capabilities' / + 'ResourceFile_dynamic_HR_scaling' / 'historical_scaling.csv' + ).set_index('year') +integrated_historical_scaling = ( + historical_scaling.loc[2020, 'dynamic_HR_scaling_factor'] * + historical_scaling.loc[2021, 'dynamic_HR_scaling_factor'] * + historical_scaling.loc[2022, 'dynamic_HR_scaling_factor'] * + historical_scaling.loc[2023, 'dynamic_HR_scaling_factor'] * + historical_scaling.loc[2024, 'dynamic_HR_scaling_factor'] +) + +# to get minute salary per cadre per level +Annual_PFT = hr_current.groupby(['Facility_Level', 'Officer_Category']).agg( + {'Total_Mins_Per_Day': 'sum', 'Staff_Count': 'sum'}).reset_index() +Annual_PFT['Annual_Mins_Per_Staff'] = 365.25 * Annual_PFT['Total_Mins_Per_Day']/Annual_PFT['Staff_Count'] + +# the hr salary by minute and facility id, as of 2019 +Minute_Salary = Annual_PFT.merge(hr_salary, on=['Officer_Category'], how='outer') +Minute_Salary['Minute_Salary_USD'] = Minute_Salary['Annual_Salary_USD']/Minute_Salary['Annual_Mins_Per_Staff'] +# store the minute salary by cadre and level +Minute_Salary_by_Cadre_Level = Minute_Salary[ + ['Facility_Level', 'Officer_Category', 'Minute_Salary_USD'] +].copy().fillna(0.0) +Minute_Salary = Minute_Salary[['Facility_Level', 'Officer_Category', 'Minute_Salary_USD']].merge( + mfl[['Facility_Level', 'Facility_ID']], on=['Facility_Level'], how='outer' +) + +Minute_Salary = Minute_Salary.fillna(0.0) +Minute_Salary.rename(columns={'Officer_Category': 'Officer_Type_Code'}, inplace=True) + +Minute_Salary.to_csv(resourcefilepath / 'costing' / 'ResourceFile_Minute_Salary_HR.csv', index=False) diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py b/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py index 46f550e904..c5bc04db62 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/fig_utils.py @@ -710,3 +710,298 @@ def plot_population_by_year(_df: pd.DataFrame): fig.tight_layout() return fig, ax + + +def plot_proportion_capacity_used_by_cadre_over_time_for_draw(_df: pd.DataFrame, draw: str): + """Plot grouped bars by year for one draw; each bar-group decomposes by cadre with CI error bars.""" + if _df is None: + raise ValueError("`proportion_capacity_used_by_cadre` is None.") + if not isinstance(_df.index, pd.MultiIndex) or _df.index.nlevels != 2: + raise ValueError("Expected a 2-level index: (OfficerType, year).") + if not isinstance(_df.columns, pd.MultiIndex) or _df.columns.nlevels != 2: + raise ValueError("Expected a 2-level columns index: (draw, stat).") + + officer_level_name = "OfficerType" if "OfficerType" in _df.index.names else _df.index.names[0] + year_level_name = "year" if "year" in _df.index.names else _df.index.names[1] + draw_level_name = "draw" if "draw" in _df.columns.names else _df.columns.names[0] + + available_draws = pd.Index(_df.columns.get_level_values(draw_level_name).unique()) + if draw not in available_draws: + raise ValueError(f"Draw '{draw}' not found. Available draws: {available_draws.tolist()}") + + draw_df = _df[draw].copy() + required_stats = {"central", "lower", "upper"} + if not required_stats.issubset(draw_df.columns): + raise ValueError( + f"Missing required stats {sorted(required_stats)} in draw '{draw}'. " + f"Found: {draw_df.columns.tolist()}" + ) + + years = sorted(pd.Index(draw_df.index.get_level_values(year_level_name).unique()).tolist()) + cadres = sorted(pd.Index(draw_df.index.get_level_values(officer_level_name).unique()).tolist()) + x = np.arange(len(years), dtype=float) + bar_width = min(0.8 / max(len(cadres), 1), 0.12) + offsets = (np.arange(len(cadres)) - (len(cadres) - 1) / 2) * bar_width + + fig_width = max(10, min(1.3 * len(years) + 6, 24)) + fig_height = max(6, min(0.22 * len(cadres) + 6, 14)) + fig, ax = plt.subplots(figsize=(fig_width, fig_height)) + + for i, cadre in enumerate(cadres): + cadre_df = draw_df.xs(cadre, level=officer_level_name).reindex(years) + central = pd.to_numeric(cadre_df["central"], errors="coerce").fillna(0.0).to_numpy() + lower = pd.to_numeric(cadre_df["lower"], errors="coerce").fillna(0.0).to_numpy() + upper = pd.to_numeric(cadre_df["upper"], errors="coerce").fillna(0.0).to_numpy() + lower_err = np.clip(central - lower, a_min=0.0, a_max=None) + upper_err = np.clip(upper - central, a_min=0.0, a_max=None) + + ax.bar( + x + offsets[i], + central, + width=bar_width, + label=str(cadre), + yerr=np.vstack([lower_err, upper_err]), + capsize=2, + error_kw={"elinewidth": 0.8, "capthick": 0.8}, + alpha=0.9, + ) + + ax.set_xticks(x) + ax.set_xticklabels([str(y) for y in years], rotation=45, ha="right") + ax.set_xlabel("Year") + ax.set_ylabel("Proportion of Capacity Used") + ax.grid(axis="y", alpha=0.3) + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) + ax.legend(title="Cadre", loc="center left", bbox_to_anchor=(1.02, 0.5), fontsize=8, frameon=True) + fig.tight_layout() + return fig, ax + + +def plot_cost_by_cadre_over_time_for_draw( + _df: pd.DataFrame, + draw: str, + title: str | None = None, +): + """Plot capacity used over time for one draw, with one line per officer type.""" + if _df is None: + raise ValueError("`_df` is None.") + if not isinstance(_df.index, pd.MultiIndex) or _df.index.nlevels != 2: + raise ValueError("Expected a 2-level index: (year, OfficerType).") + if not isinstance(_df.columns, pd.MultiIndex) or _df.columns.nlevels != 2: + raise ValueError("Expected a 2-level columns index: (draw, stat).") + + year_level_name = "year" if "year" in _df.index.names else _df.index.names[0] + officer_level_name = "OfficerType" if "OfficerType" in _df.index.names else _df.index.names[1] + draw_level_name = "draw" if "draw" in _df.columns.names else _df.columns.names[0] + + available_draws = pd.Index(_df.columns.get_level_values(draw_level_name).unique()) + if draw not in available_draws: + raise ValueError(f"Draw '{draw}' not found. Available draws: {available_draws.tolist()}") + + draw_df = _df[draw].copy() + required_stats = {"central", "lower", "upper"} + if not required_stats.issubset(draw_df.columns): + raise ValueError( + f"Missing required stats {sorted(required_stats)} in draw '{draw}'. " + f"Found: {draw_df.columns.tolist()}" + ) + + years = sorted(pd.Index(draw_df.index.get_level_values(year_level_name).unique()).tolist()) + officers = sorted(pd.Index(draw_df.index.get_level_values(officer_level_name).unique()).tolist(), key=str) + x = np.arange(len(years), dtype=float) + + fig_width = max(10, min(1.3 * len(years) + 4, 24)) + fig_height = max(6, min(0.28 * len(officers) + 4, 16)) + fig, ax = plt.subplots(figsize=(fig_width, fig_height)) + + officer_colors = list(plt.get_cmap("tab10").colors) + plotted_any = False + + for i, officer_type in enumerate(officers): + officer_df = draw_df.xs(officer_type, level=officer_level_name).reindex(years) + if officer_df.empty: + continue + + central = pd.to_numeric(officer_df["central"], errors="coerce").fillna(0.0).to_numpy() + lower = pd.to_numeric(officer_df["lower"], errors="coerce").fillna(0.0).to_numpy() + upper = pd.to_numeric(officer_df["upper"], errors="coerce").fillna(0.0).to_numpy() + + if not (central.any() or lower.any() or upper.any()): + continue + + color = officer_colors[i % len(officer_colors)] + ax.plot( + x, + central, + marker="o", + linewidth=1.8, + markersize=4, + color=color, + label=str(officer_type), + ) + ax.fill_between( + x, + lower, + upper, + color=color, + alpha=0.12, + linewidth=0, + ) + plotted_any = True + + if not plotted_any: + raise ValueError(f"No plottable officer types remain for draw '{draw}'.") + + ax.set_xticks(x) + ax.set_xticklabels([str(year) for year in years], rotation=45, ha="right") + ax.set_xlabel("Year") + ax.set_ylabel("Capacity used") + ax.grid(axis="y", alpha=0.3) + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) + + handles, labels = ax.get_legend_handles_labels() + deduplicated_handles_by_label = dict(zip(labels, handles)) + ax.legend( + handles=list(deduplicated_handles_by_label.values()), + labels=list(deduplicated_handles_by_label.keys()), + title="OfficerType", + loc="center left", + bbox_to_anchor=(1.02, 0.5), + fontsize=8, + frameon=True, + ) + + if title is not None: + ax.set_title(title) + + fig.tight_layout() + return fig, ax + + +def plot_capacity_used_by_cadre_and_level_over_time_for_draw( + _df: pd.DataFrame, + draw: str, + title: str | None = None, +): + """Plot line charts by year for one draw, faceted by facility level.""" + if _df is None: + raise ValueError("`annual_capacity_used_by_cadre_and_level` is None.") + if not isinstance(_df.index, pd.MultiIndex) or _df.index.nlevels != 3: + raise ValueError("Expected a 3-level index: (year, OfficerType, FacilityLevel).") + if not isinstance(_df.columns, pd.MultiIndex) or _df.columns.nlevels != 2: + raise ValueError("Expected a 2-level columns index: (draw, stat).") + + year_level_name = "year" if "year" in _df.index.names else _df.index.names[0] + officer_level_name = "OfficerType" if "OfficerType" in _df.index.names else _df.index.names[1] + facility_level_name = "FacilityLevel" if "FacilityLevel" in _df.index.names else _df.index.names[2] + draw_level_name = "draw" if "draw" in _df.columns.names else _df.columns.names[0] + + available_draws = pd.Index(_df.columns.get_level_values(draw_level_name).unique()) + if draw not in available_draws: + raise ValueError(f"Draw '{draw}' not found. Available draws: {available_draws.tolist()}") + + draw_df = _df[draw].copy() + required_stats = {"central", "lower", "upper"} + if not required_stats.issubset(draw_df.columns): + raise ValueError( + f"Missing required stats {sorted(required_stats)} in draw '{draw}'. " + f"Found: {draw_df.columns.tolist()}" + ) + + years = sorted(pd.Index(draw_df.index.get_level_values(year_level_name).unique()).tolist()) + cadres = sorted(pd.Index(draw_df.index.get_level_values(officer_level_name).unique()).tolist(), key=str) + facility_levels = sorted(pd.Index(draw_df.index.get_level_values(facility_level_name).unique()).tolist(), key=str) + + if not facility_levels: + raise ValueError(f"No facility levels found for draw '{draw}'.") + + x = np.arange(len(years), dtype=float) + fig_width = max(11, min(1.3 * len(years) + 6, 24)) + valid_facility_levels: list[str] = [] + facility_frames: dict[str, pd.DataFrame] = {} + for facility_level in facility_levels: + level_df = draw_df.xs(facility_level, level=facility_level_name).reindex( + pd.MultiIndex.from_product( + [years, cadres], + names=[year_level_name, officer_level_name], + ), + fill_value=0.0, + ) + if level_df.empty: + continue + if not level_df.loc[:, ["central", "lower", "upper"]].fillna(0.0).to_numpy().any(): + continue + valid_facility_levels.append(facility_level) + facility_frames[facility_level] = level_df + + if not valid_facility_levels: + raise ValueError(f"No plottable facility levels remain for draw '{draw}'.") + + fig_height = max(4.5 * len(valid_facility_levels), 5) + fig, axes = plt.subplots( + len(valid_facility_levels), + 1, + figsize=(fig_width, fig_height), + sharex=True, + ) + axes = np.atleast_1d(axes) + + cadre_colors = list(plt.get_cmap("tab10").colors) + + for axis, facility_level in zip(axes, valid_facility_levels): + level_df = facility_frames[facility_level] + + for i, cadre in enumerate(cadres): + cadre_df = level_df.xs(cadre, level=officer_level_name).reindex(years) + central = pd.to_numeric(cadre_df["central"], errors="coerce").fillna(0.0).to_numpy() + lower = pd.to_numeric(cadre_df["lower"], errors="coerce").fillna(0.0).to_numpy() + upper = pd.to_numeric(cadre_df["upper"], errors="coerce").fillna(0.0).to_numpy() + + axis.plot( + x, + central, + label=str(cadre), + color=cadre_colors[i % len(cadre_colors)], + marker="o", + linewidth=1.8, + markersize=4, + ) + axis.fill_between( + x, + lower, + upper, + color=cadre_colors[i % len(cadre_colors)], + alpha=0.12, + linewidth=0, + ) + + axis.set_ylabel("Fraction of available capacity used") + axis.set_title(f"Facility level {facility_level}") + axis.grid(axis="y", alpha=0.3) + axis.spines["top"].set_visible(False) + axis.spines["right"].set_visible(False) + + axes[-1].set_xticks(x) + axes[-1].set_xticklabels([str(y) for y in years], rotation=45, ha="right") + + + handles, labels = axes[0].get_legend_handles_labels() + deduplicated_handles_by_label = dict(zip(labels, handles)) + fig.legend( + deduplicated_handles_by_label.values(), + deduplicated_handles_by_label.keys(), + loc='upper left', + bbox_to_anchor=(0.75, 0.75), + ncols = 2, + fontsize=8, + frameon=True, + ) + + if title is not None: + fig.suptitle(title) + fig.tight_layout(rect=(0, 0, 0.86, 0.96)) + else: + fig.tight_layout() + return fig, axes[0] diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py index 00db6dde66..9aa98b91c2 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py @@ -15,11 +15,12 @@ from scripts.lcoa_inputs_from_tlo_analyses.fig_utils import ( make_graph_file_name, do_barh_plot_with_ci, - plot_cadre_time_by_draw_stacked, plot_deaths_by_period_for_cause, plot_deaths_by_period_for_draw, plot_hsi_counts_by_period_for_draw, plot_population_by_year, + plot_capacity_used_by_cadre_and_level_over_time_for_draw, + plot_cost_by_cadre_over_time_for_draw ) @@ -38,70 +39,6 @@ def load_results_files(results_files: list[Path]) -> dict[Path, dict]: return loaded -def plot_proportion_capacity_used_by_cadre_over_time_for_draw(_df: pd.DataFrame, draw: str): - """Plot grouped bars by year for one draw; each bar-group decomposes by cadre with CI error bars.""" - if _df is None: - raise ValueError("`proportion_capacity_used_by_cadre` is None.") - if not isinstance(_df.index, pd.MultiIndex) or _df.index.nlevels != 2: - raise ValueError("Expected a 2-level index: (OfficerType, year).") - if not isinstance(_df.columns, pd.MultiIndex) or _df.columns.nlevels != 2: - raise ValueError("Expected a 2-level columns index: (draw, stat).") - - officer_level_name = "OfficerType" if "OfficerType" in _df.index.names else _df.index.names[0] - year_level_name = "year" if "year" in _df.index.names else _df.index.names[1] - draw_level_name = "draw" if "draw" in _df.columns.names else _df.columns.names[0] - - available_draws = pd.Index(_df.columns.get_level_values(draw_level_name).unique()) - if draw not in available_draws: - raise ValueError(f"Draw '{draw}' not found. Available draws: {available_draws.tolist()}") - - draw_df = _df[draw].copy() - required_stats = {"central", "lower", "upper"} - if not required_stats.issubset(draw_df.columns): - raise ValueError( - f"Missing required stats {sorted(required_stats)} in draw '{draw}'. " - f"Found: {draw_df.columns.tolist()}" - ) - - years = sorted(pd.Index(draw_df.index.get_level_values(year_level_name).unique()).tolist()) - cadres = sorted(pd.Index(draw_df.index.get_level_values(officer_level_name).unique()).tolist()) - x = np.arange(len(years), dtype=float) - bar_width = min(0.8 / max(len(cadres), 1), 0.12) - offsets = (np.arange(len(cadres)) - (len(cadres) - 1) / 2) * bar_width - - fig_width = max(10, min(1.3 * len(years) + 6, 24)) - fig_height = max(6, min(0.22 * len(cadres) + 6, 14)) - fig, ax = plt.subplots(figsize=(fig_width, fig_height)) - - for i, cadre in enumerate(cadres): - cadre_df = draw_df.xs(cadre, level=officer_level_name).reindex(years) - central = pd.to_numeric(cadre_df["central"], errors="coerce").fillna(0.0).to_numpy() - lower = pd.to_numeric(cadre_df["lower"], errors="coerce").fillna(0.0).to_numpy() - upper = pd.to_numeric(cadre_df["upper"], errors="coerce").fillna(0.0).to_numpy() - lower_err = np.clip(central - lower, a_min=0.0, a_max=None) - upper_err = np.clip(upper - central, a_min=0.0, a_max=None) - - ax.bar( - x + offsets[i], - central, - width=bar_width, - label=str(cadre), - yerr=np.vstack([lower_err, upper_err]), - capsize=2, - error_kw={"elinewidth": 0.8, "capthick": 0.8}, - alpha=0.9, - ) - - ax.set_xticks(x) - ax.set_xticklabels([str(y) for y in years], rotation=45, ha="right") - ax.set_xlabel("Year") - ax.set_ylabel("Proportion of Capacity Used") - ax.grid(axis="y", alpha=0.3) - ax.spines["top"].set_visible(False) - ax.spines["right"].set_visible(False) - ax.legend(title="Cadre", loc="center left", bbox_to_anchor=(1.02, 0.5), fontsize=8, frameon=True) - fig.tight_layout() - return fig, ax def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path = None): @@ -123,6 +60,7 @@ def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path icers = primary_results.get('icers_summarized') incremental_scenario_cost = primary_results.get('incremental_scenario_cost') dalys_and_costs_from_lcoa = primary_results.get('dalys_and_costs_from_lcoa') + annual_cost_by_cadre = primary_results.get('annual_cost_by_cadre') comparison_metrics_available = all( metric is not None @@ -139,8 +77,8 @@ def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path counts_of_hsi_in_implementation_period = primary_results['counts_of_hsi_by_period'] counts_of_hsi_in_implementation_period = counts_of_hsi_in_implementation_period.drop(['2010-2041'], level=1) - capacity_used_by_cadre = primary_results.get("capacity_used_by_cadre") - proportion_capacity_used_by_cadre = primary_results.get("proportion_capacity_used_by_cadre") + annual_capacity_used_by_cadre_and_level = primary_results.get("annual_capacity_used_by_cadre_and_level") + result_df_by_period = pd.DataFrame([ {'treatment_id_included': draw, 'nonzero_hsis': treatment_id, 'period': period} @@ -193,31 +131,45 @@ def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path fig.savefig(outfile) plt.close(fig) - print("Plotting capacity used by cadres across draws.") - fig, ax = plot_cadre_time_by_draw_stacked(capacity_used_by_cadre, stat="central") - name_of_plot = "Capacity Used by Cadres (2026-2040)" - ax.set_title(name_of_plot) - outfile = os.path.join(output_folder, make_graph_file_name(name_of_plot)) - fig.savefig(outfile) - plt.close(fig) + if annual_capacity_used_by_cadre_and_level is not None: + print("Plotting capacity used by cadre and facility level over time (one figure per treatment ID).") + for param in param_names: + if param == "Nothing": + continue + draw = format_scenario_name(param) + try: + name_of_plot = f"Capacity Used by Cadre and Facility Level Over Time for {draw}" + fig, ax = plot_capacity_used_by_cadre_and_level_over_time_for_draw( + annual_capacity_used_by_cadre_and_level, + draw, + title=name_of_plot, + ) + except ValueError as exc: + print(f"Skipping capacity-by-level plot for draw '{draw}': {exc}") + continue + + outfile = os.path.join(output_folder, make_graph_file_name(name_of_plot)) + fig.savefig(outfile) + plt.close(fig) - if proportion_capacity_used_by_cadre is not None: - print("Plotting capacity used over time (one figure per treatment ID).") + if annual_cost_by_cadre is not None: + print("Plotting annual costs by cadre and over time (one figure per treatment ID).") for param in param_names: + print(f"### {param}") if param == "Nothing": continue draw = format_scenario_name(param) try: - fig, ax = plot_proportion_capacity_used_by_cadre_over_time_for_draw( - proportion_capacity_used_by_cadre, + name_of_plot = f"Cost by Cadre Over Time for {draw}" + fig, ax = plot_cost_by_cadre_over_time_for_draw( + annual_cost_by_cadre, draw, + title=name_of_plot, ) except ValueError as exc: - print(f"Skipping capacity-over-time plot for draw '{draw}': {exc}") + print(f"Skipping capacity-by-level plot for draw '{draw}': {exc}") continue - name_of_plot = f"Capacity Used Over Time by Cadre for {draw}" - ax.set_title(name_of_plot) outfile = os.path.join(output_folder, make_graph_file_name(name_of_plot)) fig.savefig(outfile) plt.close(fig) diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py index 40cd43e65d..82c00b9f50 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py @@ -101,9 +101,9 @@ def _get_scenarios(self) -> Dict[str, Dict]: service_availability = dict({"Nothing": []}) # For each treatment group, create scenarios keeping only one treatment from that group # Commenting to allow draw 0 to be run and suspended. - #service_availability.update( - # {f"Only {treatment}": [treatment] for treatment in treatments} - #) + service_availability.update( + {f"Only {treatment}": [treatment] for treatment in treatments} + ) scenario_definitions = ScenarioDefinitions() scenarios = { From 7e5a3f6602d39038a358dbf9459649fa2cfd8036 Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Wed, 10 Jun 2026 11:47:22 +0100 Subject: [PATCH 52/55] Three treatment-ids for resume --- .../scenario_effect_of_treatment_ids.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py index 82c00b9f50..bbacda282d 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py @@ -102,7 +102,13 @@ def _get_scenarios(self) -> Dict[str, Dict]: # For each treatment group, create scenarios keeping only one treatment from that group # Commenting to allow draw 0 to be run and suspended. service_availability.update( - {f"Only {treatment}": [treatment] for treatment in treatments} + {f"Only {treatment}": [treatment] for treatment in treatments} + ) + # overwrite service availability dictionary to run specific scenarios for testing + service_availability = dict( + {"Nothing": [], + "Only AntenatalCare_FollowUp_*": ['AntenatalCare_FollowUp_*'], + "Only BladderCancer_PalliativeCare_*": ['BladderCancer_PalliativeCare_*']} ) scenario_definitions = ScenarioDefinitions() From 77a30992cc8c72c8bf460076f506d756efa694fa Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Fri, 12 Jun 2026 16:30:18 +0100 Subject: [PATCH 53/55] Add annual HRH costs to consumables costs --- .../analysis_effect_of_treatment_ids.py | 160 +++++++++++------- .../scenario_effect_of_treatment_ids.py | 8 +- 2 files changed, 104 insertions(+), 64 deletions(-) diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py index a71d52db47..534fe7282f 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py @@ -165,37 +165,15 @@ def apply( with open(checkpoint_path, "wb") as f: pickle.dump(input_costs, f) print(f"Saved input costs checkpoint to: {checkpoint_path}") - results['input_costs'] = input_costs - - # TODO Ask Sakshi: the hrh costs are the same across all draw; therefore incremental costs and cost for medical consumables - # are the same for each draw. Does that make sense? - # Consumables cost per intervention - total_cons_cost = input_costs.groupby(['draw', 'run', 'cost_category'])['cost'].sum() - total_cons_cost = compute_summary_statistics(total_cons_cost.unstack(['draw', 'run']), 'median') - total_cons_cost = set_param_names_as_column_index_level_0(total_cons_cost, param_names) - results['total_cons_cost'] = total_cons_cost - # Computing incremental costs - # TODO Check with Sakshi if these are annual costs; as everything else is annual. - if do_comparison: - print("Computing incremental_scenario_cost...") - start = perf_counter() - total_input_cost = input_costs.groupby(['draw', 'run'])['cost'].sum() - incremental_scenario_cost = (pd.DataFrame( - find_difference_relative_to_comparison( - total_input_cost, - comparison=0,) - )) - - elapsed = perf_counter() - start - print(f"\n=== TIMING: computing incremental_scenario_cost took {elapsed:.3f}s ===\n", flush=True) - - incremental_scenario_cost = ( - incremental_scenario_cost.T.reorder_levels(["draw", "run"], axis=1).sort_index(axis=1) - ).pipe(set_param_names_as_column_index_level_0, param_names) + # Map draw numbers to draw names in input costs + draw_lookup = dict(enumerate(param_names)) + input_costs.loc[:, "draw_name"] = input_costs["draw"].map(draw_lookup) + input_costs.drop(columns=["draw"], inplace=True) + input_costs.rename(columns={"draw_name": "draw"}, inplace=True) + results['input_costs'] = input_costs - incremental_scenario_cost_summarized = compute_summary_statistics(incremental_scenario_cost, 'median').iloc[0].unstack() # Get total population by year print("Extracting population data...") @@ -297,27 +275,9 @@ def apply( ).pipe(set_param_names_as_column_index_level_0, param_names=param_names) ) - if do_comparison: - dalys_averted = ( - -1.0 * pd.DataFrame( - find_difference_extra_relative_to_comparison(dalys.sum(), comparison='Nothing')) - - ) - - pc_dalys_averted = 100.0 * compute_summary_statistics( - -1.0 * pd.DataFrame( - find_difference_extra_relative_to_comparison(dalys.sum(), comparison='Nothing', scaled=True)).T, - central_measure='median' - ).iloc[0].unstack() - # Run-by-run incremental cost-effectiveness ratio calculation - icers = incremental_scenario_cost.T / dalys_averted - icers_summarized = compute_summary_statistics(icers.T, central_measure='median').iloc[0].unstack() - dalys_averted = compute_summary_statistics(dalys_averted.T, central_measure='median').iloc[0].unstack() - - dalys = compute_summary_statistics(dalys, central_measure='median') + results['dalys'] = compute_summary_statistics(dalys, central_measure='median') - - # Note that Capacity_By_FacID_and_Officer logs the fraction of time used per officer type; not the absolute time used. + # Capacity_By_FacID_and_Officer logs the fraction of time used per officer type; not the absolute time used. # To get the actual minutes, we need to multiply by the total available minutes. annual_capacity_used_by_cadre_and_level = extract_results( results_folder, @@ -326,10 +286,10 @@ def apply( custom_generate_series=lambda df: get_capacity_used_by_officer_type_and_facility_level(df, facility_id_levels_dict), do_scaling=False, # Not scaling as this is a fraction calculated during runtime. autodiscover=True, - ).pipe(set_param_names_as_column_index_level_0, param_names=param_names) + ) results['annual_capacity_used_by_cadre_and_level'] = ( - compute_summary_statistics(annual_capacity_used_by_cadre_and_level, 'median') + compute_summary_statistics(annual_capacity_used_by_cadre_and_level.pipe(set_param_names_as_column_index_level_0, param_names=param_names), 'median') ) @@ -358,6 +318,18 @@ def apply( actual_capacity_used_by_cadre_and_level = annual_capacity_used_by_cadre_and_level.mul(series_reindexed, axis=0) results['actual_annual_capacity_by_cadre'] = compute_summary_statistics(actual_capacity_used_by_cadre_and_level, 'median') + # We need the time used by cadre in 2026 + actual_capacity_used_by_cadre = ( + actual_capacity_used_by_cadre_and_level + .groupby(['OfficerType', 'year']) + .sum() + ) + actual_capacity_used_by_cadre_2026 = actual_capacity_used_by_cadre.xs(2026, level='year', axis=0) + + results['actual_capacity_used_by_cadre_2026'] = ( + compute_summary_statistics(actual_capacity_used_by_cadre_2026, 'median') + ) + print(results['actual_capacity_used_by_cadre_2026']) # Load the salary per minutes by cadre and facility level # ResourceFile_Minute_Salary_HR.csv is produced using Bingling's code, # except that she was dropping Facility_Level, which I have retained. @@ -382,16 +354,90 @@ def apply( actual_capacity_used_by_cadre_and_level.mul(series_reindexed, axis=0) ) - # Finally we need it by cadre, so we sum across facility levels and then - # summarise + # The costing script calculates the HRH costs for the total cadre, not for the strength used. + # We will remove the HRH costs from the input_costs; this includes other things besides salary + # Then we will add to the remaining costs the salary costs calculated above. This will gives us + # the total input costs for the intervention. Note that for the moment, we are ignoring non-salary HRH costs. + + # Wrangling to make the actual_cost_by_cadre_and_level the same same as input_costs actual_cost_by_cadre_and_level = ( - actual_cost_by_cadre_and_level.groupby(['year', 'OfficerType']).sum() + actual_cost_by_cadre_and_level + .stack(level=['draw', 'run']) + .rename('cost') + .reset_index() ) - results['annual_cost_by_cadre'] = compute_summary_statistics(actual_cost_by_cadre_and_level, 'median') - staff_count_by_cadre = ( - daily_capacity_by_cadre_and_level.groupby('Officer_Category')['Staff_Count'].sum() + actual_cost_by_cadre_and_level = actual_cost_by_cadre_and_level.rename(columns={ + 'OfficerType': 'cost_subgroup', + 'FacilityLevel': 'Facility_Level', + }) + actual_cost_by_cadre_and_level['cost_subcategory'] = 'salary_for_cadres_used' + actual_cost_by_cadre_and_level['cost_category'] = 'human resources for health' + + actual_cost_by_cadre_and_level = actual_cost_by_cadre_and_level[[ + 'draw', 'run', 'year', + 'cost_subcategory', + 'Facility_Level', + 'cost_subgroup', + 'cost', + 'cost_category', + ]] + + actual_cost_by_cadre_and_level = actual_cost_by_cadre_and_level.reset_index(drop=True) + + mask = actual_cost_by_cadre_and_level['year'] > 2025 + # Now combine with input_costs + input_costs = input_costs[input_costs['cost_category'] != 'human resources for health'] + input_costs_with_proportional_hrh_costs = pd.concat([input_costs, actual_cost_by_cadre_and_level[mask]]) + + # For LCOA we need annual consumables cost per intervention in the year of the switch i.e. 2026. + annual_cons_cost = input_costs.groupby(['draw', 'run', 'cost_category', 'year'])['cost'].sum() + mask = ( + (annual_cons_cost.index.get_level_values('year') == 2026) & + (annual_cons_cost.index.get_level_values('cost_category') == 'medical consumables') ) + annual_cons_cost_2026 = annual_cons_cost[mask].droplevel(['year', 'cost_category']) + + results['annual_cons_cost_2026'] = (compute_summary_statistics(annual_cons_cost_2026, 'median')).T + + # Computing incremental costs + if do_comparison: + print("Computing incremental_scenario_cost...") + start = perf_counter() + total_input_cost = input_costs_with_proportional_hrh_costs.groupby(['draw', 'run'])['cost'].sum() + incremental_scenario_cost = (pd.DataFrame( + find_difference_relative_to_comparison( + total_input_cost, + comparison=0,) + )) + + elapsed = perf_counter() - start + print(f"\n=== TIMING: computing incremental_scenario_cost took {elapsed:.3f}s ===\n", flush=True) + + incremental_scenario_cost = ( + incremental_scenario_cost.T.reorder_levels(["draw", "run"], axis=1).sort_index(axis=1) + ) + + incremental_scenario_cost_summarized = compute_summary_statistics(incremental_scenario_cost, 'median').iloc[0].unstack() + + + if do_comparison: + dalys_averted = ( + -1.0 * pd.DataFrame( + find_difference_extra_relative_to_comparison(dalys.sum(), comparison='Nothing')) + + ) + + pc_dalys_averted = 100.0 * compute_summary_statistics( + -1.0 * pd.DataFrame( + find_difference_extra_relative_to_comparison(dalys.sum(), comparison='Nothing', scaled=True)).T, + central_measure='median' + ).iloc[0].unstack() + # Run-by-run incremental cost-effectiveness ratio calculation + icers = incremental_scenario_cost.T / dalys_averted + icers_summarized = compute_summary_statistics(icers.T, central_measure='median').iloc[0].unstack() + dalys_averted = compute_summary_statistics(dalys_averted.T, central_measure='median').iloc[0].unstack() + # Add consumables budget to this dictionary so that we have everything in one place # USD 225,602,946 (203136642 from donors + 22466304 from the government) @@ -404,7 +450,7 @@ def apply( results['pc_dalys_averted'] = pc_dalys_averted if do_comparison else None results['icers_summarized'] = icers_summarized if do_comparison else None results['incremental_scenario_cost'] = incremental_scenario_cost_summarized if do_comparison else None - results['staff_count_by_cadre'] = staff_count_by_cadre + # Extract DALYs and costs from the LCOA input workbook (EHP_BasedOnLCOA sheet). lcoa_workbook_path = Path(__file__).resolve().parent / "ResourceFile_PriorityRanking_ALLPOLICIES_EHP_dalys_costs.xlsx" diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py index bbacda282d..82c00b9f50 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/scenario_effect_of_treatment_ids.py @@ -102,13 +102,7 @@ def _get_scenarios(self) -> Dict[str, Dict]: # For each treatment group, create scenarios keeping only one treatment from that group # Commenting to allow draw 0 to be run and suspended. service_availability.update( - {f"Only {treatment}": [treatment] for treatment in treatments} - ) - # overwrite service availability dictionary to run specific scenarios for testing - service_availability = dict( - {"Nothing": [], - "Only AntenatalCare_FollowUp_*": ['AntenatalCare_FollowUp_*'], - "Only BladderCancer_PalliativeCare_*": ['BladderCancer_PalliativeCare_*']} + {f"Only {treatment}": [treatment] for treatment in treatments} ) scenario_definitions = ScenarioDefinitions() From 3512c4f53d5fc6632e13ea74e88ef3c10b37a2b5 Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Sat, 13 Jun 2026 08:45:29 +0100 Subject: [PATCH 54/55] Minor tweaks to output LCOA inputs --- .../analysis_effect_of_treatment_ids.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py index 534fe7282f..e6d0ed05a1 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py @@ -398,7 +398,7 @@ def apply( ) annual_cons_cost_2026 = annual_cons_cost[mask].droplevel(['year', 'cost_category']) - results['annual_cons_cost_2026'] = (compute_summary_statistics(annual_cons_cost_2026, 'median')).T + results['annual_cons_cost_2026'] = (compute_summary_statistics(annual_cons_cost_2026.to_frame().T, 'median')).T # Computing incremental costs if do_comparison: @@ -408,7 +408,7 @@ def apply( incremental_scenario_cost = (pd.DataFrame( find_difference_relative_to_comparison( total_input_cost, - comparison=0,) + comparison='Nothing',) )) elapsed = perf_counter() - start From 97995fceb486246d9d1c598b79f30f3435e443ab Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Sun, 14 Jun 2026 08:03:11 +0100 Subject: [PATCH 55/55] Minor tweaks to extract LCOA inputs --- .../analysis_effect_of_treatment_ids.py | 19 +++++++++++++++++-- .../figures_effect_of_treatment_ids.py | 1 + .../run_preaggregated_optimizer.py | 9 +++++---- 3 files changed, 23 insertions(+), 6 deletions(-) diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py index e6d0ed05a1..17fd1d54a8 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/analysis_effect_of_treatment_ids.py @@ -16,6 +16,7 @@ from tlo import Date from scripts.lcoa_inputs_from_tlo_analyses.results_processing_utils import ( + format_scenario_name, get_counts_of_appts, get_counts_of_hsi_by_short_treatment_id, get_num_dalys_by_cause_label, @@ -169,10 +170,15 @@ def apply( # Map draw numbers to draw names in input costs draw_lookup = dict(enumerate(param_names)) - input_costs.loc[:, "draw_name"] = input_costs["draw"].map(draw_lookup) + formatted_draw_lookup = { + draw: format_scenario_name(name) + for draw, name in draw_lookup.items() + } + input_costs.loc[:, "draw_name"] = input_costs["draw"].map(formatted_draw_lookup) input_costs.drop(columns=["draw"], inplace=True) input_costs.rename(columns={"draw_name": "draw"}, inplace=True) results['input_costs'] = input_costs + print(input_costs.tail()) # Get total population by year @@ -298,6 +304,15 @@ def apply( daily_capacity_by_cadre_and_level = ( pd.read_csv(resourcefilepath / "healthsystem" / "human_resources" / "actual" / "ResourceFile_Daily_Capabilities.csv") ) + staff_count_by_cadre = ( + daily_capacity_by_cadre_and_level.groupby('Officer_Category')['Staff_Count'].sum() + ) + results['staff_count_by_cadre'] = staff_count_by_cadre + + annual_capacity_by_cadre = ( + daily_capacity_by_cadre_and_level.groupby('Officer_Category')['Total_Mins_Per_Day'].sum() * 365 + ) + results['annual_capacity_by_cadre'] = annual_capacity_by_cadre # This gives the total minutes available per day by cadre and facility level. # Sum across levels to get cadre specific constraints, and multiply by 365 to get annual capacity # and then by the length of the period @@ -445,7 +460,7 @@ def apply( # https://doi.org/10.1016/j.vhri.2023.10.007 results['annual_consumables_budget'] = 225602946 - results['dalys'] = dalys + results['dalys'] = compute_summary_statistics(dalys, 'median') results['dalys_averted'] = dalys_averted if do_comparison else None results['pc_dalys_averted'] = pc_dalys_averted if do_comparison else None results['icers_summarized'] = icers_summarized if do_comparison else None diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py b/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py index 9aa98b91c2..46e04fbb6d 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/figures_effect_of_treatment_ids.py @@ -351,6 +351,7 @@ def apply(results_files: list[Path], output_folder: Path, resourcefilepath: Path name_of_plot = "DALYs, Incremental Cost, and ICERs by Treatment ID" do_barh_plot_with_ci(dalys_facet, axes[0]) + axes[0].set_xscale('log') axes[0].set_title("DALYs") axes[0].set_xlabel("DALYs averted (/1000)") diff --git a/src/scripts/lcoa_inputs_from_tlo_analyses/run_preaggregated_optimizer.py b/src/scripts/lcoa_inputs_from_tlo_analyses/run_preaggregated_optimizer.py index 32b6295964..21977a3ab5 100644 --- a/src/scripts/lcoa_inputs_from_tlo_analyses/run_preaggregated_optimizer.py +++ b/src/scripts/lcoa_inputs_from_tlo_analyses/run_preaggregated_optimizer.py @@ -10,6 +10,7 @@ import pandas as pd from scripts.lcoa_inputs_from_tlo_analyses.results_processing_utils import format_scenario_name +# python src/scripts/lcoa_inputs_from_tlo_analyses/run_preaggregated_optimizer.py --analysis-results-pkl outputs/generated_outputs/2041-01-01_fullresults.pkl OPTIMIZER_HR_COLS = ["hr_clin", "hr_nur", "hr_pharm", "hr_lab", "hr_ment", "hr_nutri"] REQUIRED_OPT_INPUT_COLS = [ @@ -58,13 +59,13 @@ def _build_optimizer_inputs(results: dict[str, Any]) -> pd.DataFrame: dalys_averted = results.get("dalys_averted") incremental_cost = results.get("incremental_scenario_cost") - capacity_used = _rename_hrh_map(results.get("capacity_used_by_cadre")) - conscost = results.get('total_cons_cost') + capacity_used = _rename_hrh_map(results.get('actual_capacity_used_by_cadre_2026')) + conscost = results.get('annual_cons_cost_2026') ce_dalys = dalys_averted['central'] ce_cost = incremental_cost['central'] hr_needs = capacity_used.xs("central", level="stat", axis=1).T - conscost = conscost.xs('central', level='stat', axis=1).T + conscost = conscost.xs('central', level='stat', axis=0) interventions = sorted(set(ce_dalys.index).intersection(set(ce_cost.index))) if not interventions: @@ -76,7 +77,7 @@ def _build_optimizer_inputs(results: dict[str, Any]) -> pd.DataFrame: "intervention": interventions, "ce_dalys": [float(ce_dalys.loc[i]) for i in interventions], "ce_cost": [float(ce_cost.loc[i]) for i in interventions], - "conscost": [float(conscost.loc[i, "medical consumables"]) for i in interventions], + "conscost": [float(conscost.loc[i, "cost"]) for i in interventions], "hr_clin": [float(hr_needs.loc[i, "hr_clin"]) for i in interventions], "hr_nur": [float(hr_needs.loc[i, "hr_nur"]) for i in interventions], "hr_pharm": [float(hr_needs.loc[i, "hr_pharm"]) for i in interventions],