diff --git a/resources/ResourceFile_Cervical_Cancer/parameter_values.csv b/resources/ResourceFile_Cervical_Cancer/parameter_values.csv index a277ea3972..0bd9e1e15b 100644 --- a/resources/ResourceFile_Cervical_Cancer/parameter_values.csv +++ b/resources/ResourceFile_Cervical_Cancer/parameter_values.csv @@ -52,4 +52,4 @@ months_to_next_appt_treated_less_than_12_mo_ago,3,local,1,36,vague prior,Missing months_to_next_appt_treated_12_to_36_mo_ago,6,local,1,36,vague prior,Missing months_to_next_appt_treated_36_to_60_mo_ago,12,local,1,36,vague prior,Missing palliative_care_interval_months,1,local,0.5,12,vague prior,Missing -main_polling_frequency_months,1,undetermined,1,12,design decision,N/A \ No newline at end of file +main_polling_frequency_months,1,undetermined,1,12,design decision,N/A diff --git a/resources/ResourceFile_HPV/parameter_values.csv b/resources/ResourceFile_HPV/parameter_values.csv new file mode 100644 index 0000000000..b3979379ba --- /dev/null +++ b/resources/ResourceFile_HPV/parameter_values.csv @@ -0,0 +1,17 @@ +parameter_name,value,param_label,prior_min,prior_max,prior_note,reference, +init_prev_hpv_hr1,0.05,undetermined,0,1,calibration,"Cubie HA, Morton D, Kawonga E, Mautanga M, Mwenitete I, Teakle N, Ngwira B, Walker H, Walker G, Kafwafwa S, Kabota B, Ter Haar R, Campbell C. HPV prevalence in women attending cervical screening in rural Malawi using the cartridge-based Xpert® HPV assay. J Clin Virol. 2017 Feb;87:1-4. doi: 10.1016/j.jcv.2016.11.014. ", +init_prev_hpv_hr2,0.1,undetermined,0,1,calibration,"Cubie HA, Morton D, Kawonga E, Mautanga M, Mwenitete I, Teakle N, Ngwira B, Walker H, Walker G, Kafwafwa S, Kabota B, Ter Haar R, Campbell C. HPV prevalence in women attending cervical screening in rural Malawi using the cartridge-based Xpert® HPV assay. J Clin Virol. 2017 Feb;87:1-4. doi: 10.1016/j.jcv.2016.11.014. ", +init_prev_hpv_hr3,0.06,undetermined,0,1,calibration,"Cubie HA, Morton D, Kawonga E, Mautanga M, Mwenitete I, Teakle N, Ngwira B, Walker H, Walker G, Kafwafwa S, Kabota B, Ter Haar R, Campbell C. HPV prevalence in women attending cervical screening in rural Malawi using the cartridge-based Xpert® HPV assay. J Clin Virol. 2017 Feb;87:1-4. doi: 10.1016/j.jcv.2016.11.014. ", +b_hpv,0.3,undetermined,0,1,calibration,N/A, +rr_hpv_hiv_no_art,2.6,undetermined,0,1,calibration,"Liu G, Sharma M, Tan N, Barnabas RV. HIV-positive women have higher risk of human papilloma virus infection, precancerous lesions, and cervical cancer. AIDS. 2018 Mar 27;32(6):795-808. doi: 10.1097/QAD.0000000000001765. PMID: 29369827; PMCID: PMC5854529.", +rr_hpv_hiv_art_unsuppressed,1.8,undetermined,0,1,calibration,N/A, +rr_hr1_vaccinated,0.1,undetermined,0,1,calibration,N/A, +rr_hr2_vaccinated,0.5,undetermined,0,1,calibration,N/A, +rr_hr3_vaccinated,0.8,undetermined,0,1,calibration,N/A, +rr_hpv_age50plus,0.5,undetermined,0,1,calibration,N/A, +median_clear_hr1,14,undetermined,0,1,calibration,N/A,"Muñoz N, Méndez F, Posso H, Molano M, van den Brule AJ, Ronderos M, Meijer C, Muñoz A; Instituto Nacional de Cancerologia HPV Study Group. Incidence, duration, and determinants of cervical human papillomavirus infection in a cohort of Colombian women with normal cytological results. J Infect Dis. 2004 Dec 15;190(12):2077-87. doi: 10.1086/425907. Epub 2004 Nov 22. PMID: 15551205." +median_clear_hr2,12,undetermined,0,1,calibration,N/A,"Muñoz N, Méndez F, Posso H, Molano M, van den Brule AJ, Ronderos M, Meijer C, Muñoz A; Instituto Nacional de Cancerologia HPV Study Group. Incidence, duration, and determinants of cervical human papillomavirus infection in a cohort of Colombian women with normal cytological results. J Infect Dis. 2004 Dec 15;190(12):2077-87. doi: 10.1086/425907. Epub 2004 Nov 22. PMID: 15551205." +median_clear_hr3,12,undetermined,0,1,calibration,N/A,"Muñoz N, Méndez F, Posso H, Molano M, van den Brule AJ, Ronderos M, Meijer C, Muñoz A; Instituto Nacional de Cancerologia HPV Study Group. Incidence, duration, and determinants of cervical human papillomavirus infection in a cohort of Colombian women with normal cytological results. J Infect Dis. 2004 Dec 15;190(12):2077-87. doi: 10.1086/425907. Epub 2004 Nov 22. PMID: 15551205." +clear_shape,1,undetermined,0,1,calibration,N/A, +rr_clear_hiv_no_art,0.65,undetermined,0,1,calibration,N/A, +rr_clear_hiv_art_unsuppressed,0.8,undetermined,0,1,calibration,N/A, diff --git a/src/scripts/HPV_Analyses/Analysis_HPV.py b/src/scripts/HPV_Analyses/Analysis_HPV.py new file mode 100644 index 0000000000..cf0c09f3f7 --- /dev/null +++ b/src/scripts/HPV_Analyses/Analysis_HPV.py @@ -0,0 +1,397 @@ +""" +Run the HPV modules + """ + +import datetime +import pickle +import random +import pandas as pd +import matplotlib.pyplot as plt +from pathlib import Path + +from tlo import Date, Simulation, logging +from tlo.analysis.utils import parse_log_file, extract_results +from tlo.methods import ( + demography, + enhanced_lifestyle, + epi, + healthburden, + healthseekingbehaviour, + healthsystem, + measles, + simplified_births, + symptommanager, + hpv, + hiv, + tb +) + +results_folder = Path("./outputs") + +# Where will outputs go +outputpath = Path("./outputs") # folder for convenience of storing outputs + +# date-stamp to label log files and any other outputs +datestamp = datetime.date.today().strftime("__%Y_%m_%d") + +# The resource files +resourcefilepath = './resources' + +# %% Run the simulation +start_date = Date(2010, 1, 1) +end_date = Date(2020, 1, 1) +popsize = 2000 + + +# set up the log config +log_config = { + "filename": "test_runs", + "directory": outputpath, + "custom_levels": { + "*": logging.WARNING, + "tlo.methods.hpv": logging.INFO, + }, +} +# +# # Register the appropriate modules +# # need to call epi before tb to get bcg vax +seed = random.randint(0, 50000) +# # seed = 41728 # set seed for reproducibility + +# HPV model labels +AGE_LABELS = ["15_19", "20_24", "25_34", "35_44", "45_54", "55plus"] +HPV_GROUPS = ["hr1", "hr2", "hr3"] +SEXES = ["M", "F"] + +# 1. Run simulation +sim = Simulation(start_date=start_date, seed=seed, log_config=log_config, + show_progress_bar=True, resourcefilepath=resourcefilepath) +sim.register( + demography.Demography(), + simplified_births.SimplifiedBirths(), + enhanced_lifestyle.Lifestyle(), + healthsystem.HealthSystem(service_availability=["*"], # all treatment allowed + mode_appt_constraints=1, # mode of constraints to do with officer numbers and time + cons_availability="default", # mode for consumable constraints (if ignored, all consumables available) + ignore_priority=False, # do not use the priority information in HSI event to schedule + capabilities_coefficient=1.0, # multiplier for the capabilities of health officers + use_funded_or_actual_staffing="actual", # actual: use numbers/distribution of staff available currently + disable=False, # disables the healthsystem (no constraints and no logging) and every HSI runs + disable_and_reject_all=False, # disable healthsystem and no HSI runs + ), + symptommanager.SymptomManager(), + healthseekingbehaviour.HealthSeekingBehaviour(), + healthburden.HealthBurden(), + epi.Epi(), + hiv.Hiv(), + measles.Measles(), + tb.Tb(), + hpv.HPV(), +) + +# # set the scenario +#sim.modules["HPV"].parameters["r_hpv"] = 0.01 +#sim.modules["HPV"].parameters["r_hpv_clear"] = 0.6 + +# # Run the simulation and flush the logger +sim.make_initial_population(n=popsize) +sim.simulate(end_date=end_date) + + +# 2. Parse and save results +output = parse_log_file(sim.log_filepath) + +# save the results, argument 'wb' means write using binary mode. use 'rb' for reading file +with open(outputpath / "default_run.pickle", "wb") as f: + # Pickle the 'data' dictionary using the highest protocol available. + pickle.dump(dict(output), f, pickle.HIGHEST_PROTOCOL) + +# load the results +with open(outputpath / "default_run.pickle", "rb") as f: + output = pickle.load(f) + +#Show the results +hpv_outputs = output["tlo.methods.hpv"]["summary"] +print(hpv_outputs) +# proportion_infected = extract_results( +# results_folder, +# module="tlo.methods.hpv", +# key="summary", +# column="PropInf", +# do_scaling=False, +# ) +# +# number_infected = extract_results( +# results_folder, +# module="tlo.methods.hpv", +# key="summary", +# column="TotalInf", +# do_scaling=True, +# ) + +hpv_outputs = output["tlo.methods.hpv"]["summary"] +hpv_df = pd.DataFrame(hpv_outputs) + +print(hpv_df) +print(hpv_df.columns) + + +# change Year / Month to Date +hpv_df["Year"] = pd.to_numeric(hpv_df["Year"], errors="coerce") +hpv_df["Month"] = pd.to_numeric(hpv_df["Month"], errors="coerce") + +hpv_df = hpv_df.dropna(subset=["Year", "Month"]).copy() +hpv_df["Year"] = hpv_df["Year"].astype(int) +hpv_df["Month"] = hpv_df["Month"].astype(int) + +hpv_df["Date"] = pd.to_datetime( + { + "year": hpv_df["Year"], + "month": hpv_df["Month"], + "day": 1, + } +) +hpv_df = hpv_df.sort_values("Date").reset_index(drop=True) + +# 4. Helper functions +def compute_group_prev_by_sex( + df: pd.DataFrame, + hpv_group: str, + sex: str, + age_labels: list[str], +) -> pd.Series: + + inf_cols = [f"{hpv_group}_{sex}_{age}_Inf" for age in age_labels] + n_cols = [f"Any_{sex}_{age}_N" for age in age_labels] + + missing_inf = [c for c in inf_cols if c not in df.columns] + missing_n = [c for c in n_cols if c not in df.columns] + + if missing_inf or missing_n: + print(f"\nCannot compute {hpv_group}_{sex}_TotalPrev.") + if missing_inf: + print("Missing infection columns:", missing_inf) + if missing_n: + print("Missing denominator columns:", missing_n) + + return pd.Series([float("nan")] * len(df), index=df.index) + + total_inf = df[inf_cols].sum(axis=1) + total_n = df[n_cols].sum(axis=1) + + return total_inf / total_n.replace(0, pd.NA) + +for sex in SEXES: + for group in HPV_GROUPS: + hpv_df[f"{group}_{sex}_TotalPrev"] = compute_group_prev_by_sex( + hpv_df, + hpv_group=group, + sex=sex, + age_labels=AGE_LABELS, + ) + +# Plot 1: Total infection +plt.figure(figsize=(8, 5)) +plt.plot(hpv_df["Date"], hpv_df["TotalPrev"], marker="o") +plt.xlabel("Date") +plt.ylabel("Total HPV prevalence") +plt.title("Total HPV prevalence over time") +plt.grid(True) +plt.tight_layout() +plt.savefig(outputpath / "hpv_total_prevalence.png", dpi=300) +plt.show() + +# 2. HPV prevalence in different gender +plt.figure(figsize=(8, 5)) +plt.plot(hpv_df["Date"], hpv_df["M_Prev"], marker="o", label="Male") +plt.plot(hpv_df["Date"], hpv_df["F_Prev"], marker="o", label="Female") +plt.xlabel("Date") +plt.ylabel("Any HPV prevalence") +plt.title("Any HPV prevalence by sex") +plt.legend() +plt.grid(True) +plt.tight_layout() +plt.savefig(outputpath / "hpv_prevalence_by_sex.png", dpi=300) +plt.show() + +# 3. Prevalence of different HPV groups in female +plt.figure(figsize=(8, 5)) +for group in HPV_GROUPS: + plt.plot( + hpv_df["Date"], + hpv_df[f"{group}_F_TotalPrev"], + marker="o", + label=group, + ) +plt.xlabel("Date") +plt.ylabel("Prevalence") +plt.title("Female HPV prevalence by group") +plt.legend() +plt.grid(True) +plt.tight_layout() +plt.savefig(outputpath / "female_hpv_group_prevalence.png", dpi=300) +plt.show() + +# 4. Prevalence of different HPV groups in male +plt.figure(figsize=(8, 5)) +for group in HPV_GROUPS: + plt.plot( + hpv_df["Date"], + hpv_df[f"{group}_M_TotalPrev"], + marker="o", + label=group, + ) +plt.xlabel("Date") +plt.ylabel("Prevalence") +plt.title("Male HPV prevalence by group") +plt.legend() +plt.grid(True) +plt.tight_layout() +plt.savefig(outputpath / "male_hpv_group_prevalence.png", dpi=300) +plt.show() + +# Plot 5: Multiplicity of infection +plt.figure(figsize=(8, 5)) +plt.plot(hpv_df["Date"], hpv_df["InfGroup1"], marker="o", label="1 HPV group") +plt.plot(hpv_df["Date"], hpv_df["InfGroup2"], marker="o", label="2 HPV groups") +plt.plot(hpv_df["Date"], hpv_df["InfGroup3"], marker="o", label="3 HPV groups") +plt.xlabel("Date") +plt.ylabel("Number of infected individuals") +plt.title("Multiplicity of HPV infection") +plt.legend() +plt.grid(True) +plt.tight_layout() +plt.savefig(outputpath / "hpv_multiplicity_over_time.png", dpi=300) +plt.show() + +# Plot 6: Multiplicity of infection by sex +plt.figure(figsize=(9, 5)) + +plt.plot(hpv_df["Date"], hpv_df["MaleGroup1"], marker="o", label="Male: 1 group") +plt.plot(hpv_df["Date"], hpv_df["MaleGroup2"], marker="o", label="Male: 2 groups") +plt.plot(hpv_df["Date"], hpv_df["MaleGroup3"], marker="o", label="Male: 3 groups") + +plt.plot(hpv_df["Date"], hpv_df["FemaleGroup1"], marker="o", label="Female: 1 group") +plt.plot(hpv_df["Date"], hpv_df["FemaleGroup2"], marker="o", label="Female: 2 groups") +plt.plot(hpv_df["Date"], hpv_df["FemaleGroup3"], marker="o", label="Female: 3 groups") + +plt.xlabel("Date") +plt.ylabel("Number of infected individuals") +plt.title("Multiplicity of HPV infection by sex") +plt.legend() +plt.grid(True) +plt.tight_layout() +plt.savefig(outputpath / "hpv_multiplicity_by_sex.png", dpi=300) +plt.show() + +# Plot 7: Persistent HPV infection prevalence +plt.figure(figsize=(8, 5)) + +for group in HPV_GROUPS: + plt.plot( + hpv_df["Date"], + hpv_df[f"{group}_Persistent12_Prev"], + marker="o", + label=group, + ) + +plt.xlabel("Date") +plt.ylabel("Persistent infection prevalence") +plt.title("Persistent HPV infection prevalence, >=12 months") +plt.legend() +plt.grid(True) +plt.tight_layout() +plt.savefig(outputpath / "hpv_persistent_prevalence.png", dpi=300) +plt.show() + +# Plot 8: Persistent HPV infection by sex +for group in HPV_GROUPS: + male_col = f"{group}_Persistent12_M_Prev" + female_col = f"{group}_Persistent12_F_Prev" + + required_cols = ["Date", male_col, female_col] + + plt.figure(figsize=(8, 5)) + plt.plot(hpv_df["Date"], hpv_df[male_col], marker="o", label="Male") + plt.plot(hpv_df["Date"], hpv_df[female_col], marker="o", label="Female") + + plt.xlabel("Date") + plt.ylabel("Persistent infection prevalence") + plt.title(f"{group} persistent infection prevalence by sex") + plt.legend() + plt.grid(True) + plt.tight_layout() + plt.savefig(outputpath / f"{group}_persistent_by_sex.png", dpi=300) + plt.show() + +# Plot 9: Female any HPV prevalence by age group +female_age_cols = [f"Any_F_{age}_Prev" for age in AGE_LABELS] + +plt.figure(figsize=(9, 5)) + +for age in AGE_LABELS: + plt.plot( + hpv_df["Date"], + hpv_df[f"Any_F_{age}_Prev"], + marker="o", + label=age, + ) + +plt.xlabel("Date") +plt.ylabel("Any HPV prevalence") +plt.title("Female any HPV prevalence by age group") +plt.legend() +plt.grid(True) +plt.tight_layout() +plt.savefig(outputpath / "female_any_hpv_by_age.png", dpi=300) +plt.show() + +# Plot 10: Male any HPV prevalence by age group +male_age_cols = [f"Any_M_{age}_Prev" for age in AGE_LABELS] + +plt.figure(figsize=(9, 5)) + +for age in AGE_LABELS: + plt.plot( + hpv_df["Date"], + hpv_df[f"Any_M_{age}_Prev"], + marker="o", + label=age, + ) + +plt.xlabel("Date") +plt.ylabel("Any HPV prevalence") +plt.title("Male any HPV prevalence by age group") +plt.legend() +plt.grid(True) +plt.tight_layout() +plt.savefig(outputpath / "male_any_hpv_by_age.png", dpi=300) +plt.show() + +# Plot 11: Any HPV prevalence by HIV/ART status +hiv_prev_cols = [ + "Any_HIVneg_Prev", + "Any_HIVpos_unknown_Prev", + "Any_HIVpos_noART_Prev", + "Any_HIVpos_unsupp_Prev", + "Any_HIVpos_supp_Prev", +] + +available_hiv_cols = [c for c in hiv_prev_cols if c in hpv_df.columns] + +if len(available_hiv_cols) > 0: + plt.figure(figsize=(9, 5)) + + for col in available_hiv_cols: + label = col.replace("Any_", "").replace("_Prev", "") + plt.plot(hpv_df["Date"], hpv_df[col], marker="o", label=label) + + plt.xlabel("Date") + plt.ylabel("Any HPV prevalence") + plt.title("Any HPV prevalence by HIV/ART status") + plt.legend() + plt.grid(True) + plt.tight_layout() + plt.savefig(outputpath / "hpv_prevalence_by_hiv_status.png", dpi=300) + plt.show() + diff --git a/src/scripts/analysis_example_Xin/scenario_impact_of_consumables_availability.py b/src/scripts/analysis_example_Xin/scenario_impact_of_consumables_availability.py new file mode 100644 index 0000000000..0d8f0e9a41 --- /dev/null +++ b/src/scripts/analysis_example_Xin/scenario_impact_of_consumables_availability.py @@ -0,0 +1,30 @@ +from tlo import Date, logging +from tlo.methods.fullmodel import fullmodel +from tlo.scenario import BaseScenario + +class ImpactOfConsumablesAvailability(BaseScenario): + def __init__(self): + super().__init__( + seed=0, + start_date=Date(2010, 1, 1), + end_date=Date(2015, 1, 1), + initial_population_size=10_000, + number_of_draws= 2, + runs_per_draw=2, + ) + def log_configuration(self): + return { + 'filename': 'impact_of_consumables_availability', + 'directory': './outputs', + 'custom_levels': { + '*': logging.WARNING, + 'tlo.methods.demography': logging.INFO, + 'tlo.methods.healthburden': logging.INFO, + } + } + def modules(self): + return fullmodel() + def draw_parameters(self, draw_number, rng): + return { + 'HealthSystem': {'cons_availability': ['default', 'all'][draw_number]} + } diff --git a/src/scripts/fullmodel_Xin/fullmodel_test_2DEC.py b/src/scripts/fullmodel_Xin/fullmodel_test_2DEC.py new file mode 100644 index 0000000000..4fedb2c4f0 --- /dev/null +++ b/src/scripts/fullmodel_Xin/fullmodel_test_2DEC.py @@ -0,0 +1,62 @@ +""" +Run the full model with intervention coverage specified at national level + """ +import datetime +import pickle +from pathlib import Path +from tlo import Date, Simulation, logging +from tlo.analysis.utils import parse_log_file +from tlo.methods.fullmodel import fullmodel +# Where will outputs go +outputpath = Path("./outputs") # folder for convenience of storing outputs +# date-stamp to label log files and any other outputs +datestamp = datetime.date.today().strftime("__%Y_%m_%d") +# The resource files +resourcefilepath = Path("./resources") +# %% Run the simulation +start_date = Date(2010, 1, 1) +end_date = Date(2012, 1, 1) +popsize = 500 +# set up the log config +# add deviance measure logger if needed +log_config = { + "filename": "hiv_test_runs", + "directory": outputpath, + "custom_levels": { + "*": logging.WARNING, + "tlo.methods.hiv": logging.INFO, + "tlo.methods.tb": logging.INFO, + "tlo.methods.demography": logging.INFO, + # "tlo.methods.healthsystem.summary": logging.INFO, + "tlo.methods.labour.detail": logging.WARNING, # this logger keeps outputting even when set to warning + }, +} +# Register the appropriate modules +# need to call epi before tb to get bcg vax +# seed = random.randint(0, 50000) +seed = 32 # set seed for reproducibility +sim = Simulation(start_date=start_date, seed=seed, log_config=log_config, + show_progress_bar=True, resourcefilepath=resourcefilepath) +sim.register(*fullmodel(use_simplified_births=False, + module_kwargs={ + "SymptomManager": {"spurious_symptoms": True}, + "HealthSystem": {"disable": False, + "service_availability": ["*"], + "mode_appt_constraints": 1, + "cons_availability": "default", + "beds_availability": "all", + "ignore_priority": False, + "use_funded_or_actual_staffing": "actual"}, + }, +)) +# Run the simulation and flush the logger +sim.make_initial_population(n=popsize) +sim.simulate(end_date=end_date) +# parse the results +output = parse_log_file(sim.log_filepath) +# save the results, argument 'wb' means write using binary mode. use 'rb' for reading file +with open(outputpath / "default_run.pickle", "wb") as f: + # Pickle the 'data' dictionary using the highest protocol available. + pickle.dump(dict(output), f, pickle.HIGHEST_PROTOCOL) +with open(outputpath / "default_run.pickle", "rb") as f: + output = pickle.load(f) diff --git a/src/scripts/hiv_tb_Xin/hiv_tb_test_2DEC.py b/src/scripts/hiv_tb_Xin/hiv_tb_test_2DEC.py new file mode 100644 index 0000000000..1c215b6e75 --- /dev/null +++ b/src/scripts/hiv_tb_Xin/hiv_tb_test_2DEC.py @@ -0,0 +1,88 @@ +""" +Run the HIV/TB modules with intervention coverage specified at national level +save outputs for plotting (file: output_plots_tb.py) + """ +import datetime +import pickle +import random +from pathlib import Path +from tlo import Date, Simulation, logging +from tlo.analysis.utils import parse_log_file +from tlo.methods import ( # deviance_measure, + demography, + enhanced_lifestyle, + epi, + healthburden, + healthseekingbehaviour, + healthsystem, + hiv, + simplified_births, + symptommanager, + tb, +) +# Where will outputs go +outputpath = Path("./outputs") # folder for convenience of storing outputs +# date-stamp to label log files and any other outputs +datestamp = datetime.date.today().strftime("__%Y_%m_%d") +# The resource files +resourcefilepath = './resources' +# %% Run the simulation +start_date = Date(2010, 1, 1) +end_date = Date(2030, 1, 1) +popsize = 5000 +# scenario = 1 +# set up the log config +log_config = { + "filename": "test_runs", + "directory": outputpath, + "custom_levels": { + "*": logging.WARNING, + # "tlo.methods.deviance_measure": logging.INFO, + # "tlo.methods.epi": logging.INFO, + "tlo.methods.hiv": logging.INFO, + "tlo.methods.tb": logging.INFO, + "tlo.methods.demography": logging.INFO, + # "tlo.methods.demography.detail": logging.WARNING, + "tlo.methods.healthsystem.summary": logging.INFO, + # "tlo.methods.healthsystem": logging.INFO, + # "tlo.methods.healthburden": logging.INFO, + }, +} +# Register the appropriate modules +seed = random.randint(0, 50000) +# seed = 41728 # set seed for reproducibility +sim = Simulation(start_date=start_date, seed=seed, log_config=log_config, + show_progress_bar=True, resourcefilepath=resourcefilepath) +sim.register( + demography.Demography(), + simplified_births.SimplifiedBirths(), + enhanced_lifestyle.Lifestyle(), + healthsystem.HealthSystem(service_availability=["*"], # all treatment allowed + mode_appt_constraints=1, # mode of constraints to do with officer numbers and time + cons_availability="default", # mode for consumable constraints (if ignored, all consumables available) + ignore_priority=False, # do not use the priority information in HSI event to schedule + capabilities_coefficient=1.0, # multiplier for the capabilities of health officers + use_funded_or_actual_staffing="actual", # actual: use numbers/distribution of staff available currently + disable=False, # disables the healthsystem (no constraints and no logging) and every HSI runs + disable_and_reject_all=False, # disable healthsystem and no HSI runs + ), + symptommanager.SymptomManager(), + healthseekingbehaviour.HealthSeekingBehaviour(), + healthburden.HealthBurden(), + epi.Epi(), + hiv.Hiv(run_with_checks=False), + tb.Tb(), + # deviance_measure.Deviance(resourcefilepath=resourcefilepath), +) +# Run the simulation and flush the logger +sim.make_initial_population(n=popsize) +sim.simulate(end_date=end_date) +# parse the results +output = parse_log_file(sim.log_filepath) +# save the results, argument 'wb' means write using binary mode. use 'rb' for reading file +with open(outputpath / "default_run.pickle", "wb") as f: + # Pickle the 'data' dictionary using the highest protocol available. + pickle.dump(dict(output), f, pickle.HIGHEST_PROTOCOL) +# load the results +with open(outputpath / "default_run.pickle", "rb") as f: + output = pickle.load(f) diff --git a/src/tlo/methods/hpv.py b/src/tlo/methods/hpv.py new file mode 100644 index 0000000000..25fc7884a0 --- /dev/null +++ b/src/tlo/methods/hpv.py @@ -0,0 +1,934 @@ +from __future__ import annotations + +from pathlib import Path +from typing import TYPE_CHECKING, List, Optional + +import pandas as pd +import math + +# from scripts.diarrhoea_analyses.analysis_diarrhoea_with_and_without_treatment import data +from tlo import DAYS_IN_YEAR, DateOffset, Module, Parameter, Property, Types, logging +from tlo.analysis.utils import get_counts_by_sex_and_age_group +from tlo.events import Event, IndividualScopeEventMixin, PopulationScopeEventMixin, RegularEvent +from tlo.lm import LinearModel, LinearModelType, Predictor +from tlo.methods import Metadata +from tlo.methods.causes import Cause +from tlo.methods.demography import InstantaneousDeath +from tlo.methods.hsi_event import HSI_Event +from tlo.methods.hsi_generic_first_appts import GenericFirstAppointmentsMixin +from tlo.methods.symptommanager import Symptom +from tlo.util import random_date, read_csv_files + +if TYPE_CHECKING: + from tlo.methods.hsi_generic_first_appts import HSIEventScheduler + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +class HPV(Module, GenericFirstAppointmentsMixin): + """This is an HPV infection Process. + Groups: + g1 = HPV16/18 + g2 = other vaccine-covered high-risk HPV (31/33/45/52/58) + g3 = other high-risk HPV (35/39/51/56/59/68) + + It demonstrates the following behaviours in respect of the healthsystem module: + + - Registration of the disease module with healthsystem + - Reading DALY weights and reporting daly values related to this disease + - Health care seeking + - Usual HSI behaviour + - Restrictive requirements on the facility_level for the HSI_event + - Use of the SymptomManager + """ + + INIT_DEPENDENCIES = {'Demography', 'SymptomManager', 'Hiv'} + + OPTIONAL_INIT_DEPENDENCIES = {'HealthBurden'} + + # Declare Metadata + METADATA = { + Metadata.DISEASE_MODULE, + Metadata.USES_SYMPTOMMANAGER, + Metadata.USES_HEALTHSYSTEM, + Metadata.USES_HEALTHBURDEN, + Metadata.REPORTS_DISEASE_NUMBERS + } + + # Declare Causes of Death + CAUSES_OF_DEATH = {} + + # Declare Causes of Disability + CAUSES_OF_DISABILITY = {} + + HPV_GROUPS = ['hr1', 'hr2', 'hr3'] + AGE_BINS = [15, 20, 25, 35, 45, 55, 200] + AGE_LABELS = ['15_19', '20_24', '25_34', '35_44', '45_54', '55plus'] + + PARAMETERS = { + "init_prev_hpv_hr1": Parameter( + Types.REAL, + "Initial prevalence of hpv 16/18 infection", + ), + "init_prev_hpv_hr2": Parameter( + Types.REAL, + "Initial prevalence of HPV 31/33/45/52/58 infection", + ), + "init_prev_hpv_hr3": Parameter( + Types.REAL, + "Initial prevalence of other HR types" + ), + + # ------------------ HPV Transmission ------------------ # + # transmission coefficient for HPV Infection + "b_hpv": Parameter( + Types.REAL, + "Baseline transmission coefficient for HPV Infection", + ), + + # Modifiers + # "rr_hpv_hiv": Parameter( + # Types.REAL, + # "Relative risk for HPV infection among HIV positive people", + # ), + "rr_hpv_hiv_no_art": Parameter( + Types.REAL, + "Relative risk for HPV acquisition among HIV positive people not on ART", + ), + "rr_hpv_hiv_art_unsuppressed": Parameter( + Types.REAL, + "Relative risk for HPV acquisition among HIV positive people on ART but not virally suppressed", + ), + "rr_hr1_vaccinated": Parameter( + Types.REAL, + "Relative risk for hr1 infection if vaccinated", + ), + "rr_hr2_vaccinated": Parameter( + Types.REAL, + "Relative risk for hr2 infection if vaccinated", + ), + "rr_hr3_vaccinated": Parameter( + Types.REAL, + "Relative risk for hr3 infection if vaccinated", + ), + + "rr_hpv_age50plus": Parameter( + Types.REAL, + "Relative risk multiplier for age >=50", + ), + + # ------------------ HPV Self-clear ------------------ # + # Weibull baseline + "median_clear_hr1": Parameter( + Types.REAL, + "Median months to self-clear for hr1 infection", + ), + "median_clear_hr2": Parameter( + Types.REAL, + "Median months to self-clear for hr2 infection", + ), + "median_clear_hr3": Parameter( + Types.REAL, + "Median months to self-clear for hr3 infection", + ), + "clear_shape": Parameter( + Types.REAL, + "Weibull shape parameter for HPV clearance duration", + ), + + # Modifiers + "rr_clear_hiv_no_art": Parameter( + Types.REAL, + "Rate ratio for HPV clearance among PLWH not on ART", + ), + "rr_clear_hiv_art_unsuppressed": Parameter( + Types.REAL, + "Rate ratio for HPV clearance among PLWH on ART but not virally suppressed", + ), + + # As MC suggested, remove the immunity part + # "rr_immunity_hr1": Parameter( + # Types.REAL, + # "Relative risk for reinfection with hr1 if previously infected", + # ), + # "rr_immunity_hr2": Parameter( + # Types.REAL, + # "Relative risk for reinfection with hr2 if previously infected", + # ), + # "rr_immunity_hr3": Parameter( + # Types.REAL, + # "Relative risk for reinfection with hr3 if previously infected", + # ), + } + + PROPERTIES = { + 'hp_is_infected': Property( + Types.BOOL, 'Is infected with oncogenic hpv group'), + 'hp_infected_hr1': Property( + Types.BOOL, 'Current infected with hr1'), + 'hp_infected_hr2': Property( + Types.BOOL, 'Current infected with hr2'), + 'hp_infected_hr3': Property( + Types.BOOL, 'Current infected with hr3'), + 'hp_date_infected_hr1': Property( + Types.DATE, 'Date of infection of hr1'), + 'hp_date_infected_hr2': Property( + Types.DATE, 'Date of infection of hr2'), + 'hp_date_infected_hr3': Property( + Types.DATE, 'Date of infection of hr3'), + 'hp_date_first_infected': Property( + Types.DATE, 'Start date of current HPV infection'), + 'hp_duration_hr1': Property( + Types.INT, 'Duration for current hr1 infection'), + 'hp_duration_hr2': Property( + Types.INT, 'Duration for current hr2 infection'), + 'hp_duration_hr3': Property( + Types.INT, 'Duration for current hr3 infection'), + 'hp_duration_all_clear': Property( + Types.INT, 'Duration for current all HPV infection'), + # 'hp_date_clear_hr1': Property( + # Types.DATE, 'Scheduled clearance date of current hr1 infection'), + # 'hp_date_clear_hr2': Property( + # Types.DATE, 'Scheduled clearance date of current hr2 infection'), + # 'hp_date_clear_hr3': Property( + # Types.DATE, 'Scheduled clearance date of current hr3 infection'), + 'hp_ever_infected_hr1': Property( + Types.BOOL, 'Ever infected with hr1'), + 'hp_ever_infected_hr2': Property( + Types.BOOL, 'Ever infected with hr2'), + 'hp_ever_infected_hr3': Property( + Types.BOOL, 'Ever infected with hr3'), + + 'hp_persistent_hr1': Property( + Types.BOOL, 'Persistent hr1 infection, duration >= 12 months'), + 'hp_persistent_hr2': Property( + Types.BOOL, 'Persistent hr2 infection, duration >= 12 months'), + 'hp_persistent_hr3': Property( + Types.BOOL, 'Persistent hr3 infection, duration >= 12 months'), + + # "va_hpv": Property(Types.INT, "number of doses of hpv vaccine received"), + # "va_hpv_all_doses": Property(Types.BOOL, "whether all doses have been received of the HPV vaccine"), + # "hv_inf": Property(Types.BOOL,"Is person currently infected with HIV + # (NB. AIDS status is determined by presence of the AIDS Symptom.",), + # "hv_art": Property(Types.CATEGORICAL, + # "ART status of person, whether on ART or not; and whether viral load is suppressed or not if on ART.", + # categories=["not", "on_VL_suppressed", "on_not_VL_suppressed"],), + } + + def __init__(self, name=None): + # NB. Parameters passed to the module can be inserted in the __init__ definition. + super().__init__(name) + + def read_parameters(self, resourcefilepath: Optional[Path] = None): + """Read in parameters and do the registration of this module and its symptoms""" + self.load_parameters_from_dataframe( + read_csv_files(Path(resourcefilepath) / "ResourceFile_HPV", + files="parameter_values") + ) + + def initialise_population(self, population): + """Set our property values for the initial population. + + This method is called by the simulation when creating the initial population, and is + responsible for assigning initial values, for every individual, of those properties + 'owned' by this module, i.e. those declared in the PROPERTIES dictionary above. + + :param population: the population of individuals + """ + + df = population.props # a shortcut to the dataframe storing data for individuals + + # Set default for properties + df.loc[df.is_alive, 'hp_is_infected'] = False # default: no individuals infected + df.loc[df.is_alive, 'hp_infected_hr1'] = False + df.loc[df.is_alive, 'hp_infected_hr2'] = False + df.loc[df.is_alive, 'hp_infected_hr3'] = False + df.loc[df.is_alive, 'hp_date_infected_hr1'] = pd.NaT + df.loc[df.is_alive, 'hp_date_infected_hr2'] = pd.NaT + df.loc[df.is_alive, 'hp_date_infected_hr3'] = pd.NaT + df.loc[df.is_alive, 'hp_date_first_infected'] = pd.NaT + df.loc[df.is_alive, 'hp_duration_hr1'] = -1 + df.loc[df.is_alive, 'hp_duration_hr2'] = -1 + df.loc[df.is_alive, 'hp_duration_hr3'] = -1 + df.loc[df.is_alive, 'hp_duration_all_clear'] = -1 + # df.loc[df.is_alive, 'hp_date_clear_hr1'] = pd.NaT + # df.loc[df.is_alive, 'hp_date_clear_hr2'] = pd.NaT + # df.loc[df.is_alive, 'hp_date_clear_hr3'] = pd.NaT + df.loc[df.is_alive, 'hp_ever_infected_hr1'] = False + df.loc[df.is_alive, 'hp_ever_infected_hr2'] = False + df.loc[df.is_alive, 'hp_ever_infected_hr3'] = False + eligible = df.index[df.is_alive & (df.age_years >= 15)] + df.loc[df.is_alive, 'hp_persistent_hr1'] = False + df.loc[df.is_alive, 'hp_persistent_hr2'] = False + df.loc[df.is_alive, 'hp_persistent_hr3'] = False + + for group in self.HPV_GROUPS: + p_init = self.parameters[f'init_prev_hpv_{group}'] + u = self.rng.random(size=len(eligible)) + infected_this_group = eligible[u < p_init] + + for person_id in infected_this_group: + df.at[person_id, f'hp_infected_{group}'] = True + df.at[person_id, f'hp_ever_infected_{group}'] = True + df.at[person_id, 'hp_is_infected'] = True + + # randomly select an infection date for initial population + previous_infection = int(self.rng.randint(0, 24)) # 0-23month + infection_date = self.sim.date - DateOffset(months=previous_infection) + + df.at[person_id, f'hp_date_infected_{group}'] = infection_date + # df.at[person_id, f'hp_date_clear_{group}'] = pd.NaT + df.at[person_id, f'hp_duration_{group}'] = previous_infection + df.at[person_id, f'hp_persistent_{group}'] = previous_infection >= 12 + + initially_infected = df.index[df.is_alive & df.hp_is_infected] + for person_id in initially_infected: + group_dates = [] + + for group in self.HPV_GROUPS: + date = df.at[person_id, f'hp_date_infected_{group}'] + + if not pd.isna(date): + group_dates.append(date) + + if len(group_dates) > 0: + df.at[person_id, 'hp_date_first_infected'] = min(group_dates) + + def _get_age_group_series(self, ages): + return pd.cut( + ages, + bins=self.AGE_BINS, + labels=self.AGE_LABELS, + right=False # right side not included + ) + def _get_age_group(self, age_years): + for i in range(len(self.AGE_BINS)-1): + if self.AGE_BINS[i] <= age_years < self.AGE_BINS[i + 1]: + return self.AGE_LABELS[i] + return None + + def _build_age_mixing_matrix(self, within=0.80, adjacent=0.15, distant=0.05): + labels = self.AGE_LABELS + M = pd.DataFrame(0.0, index=labels, columns=labels, dtype=float) + + for i, label in enumerate(labels): + row = pd.Series(0.0, index=labels, dtype=float) + + # within-group + row[label] = within + + # adjacent groups + neighbors = [] + if i -1 >= 0: + neighbors.append(labels[i - 1]) + if i + 1 < len(labels): + neighbors.append(labels[i + 1]) + + if len(neighbors) > 0: + share_adj = adjacent / len(neighbors) + for nb in neighbors: + row[nb] = share_adj + + # distant group + distant_groups = [x for x in labels if x != label and x not in neighbors] + if len(distant_groups) > 0: + share_dist = distant / len(distant_groups) + for dg in distant_groups: + row[dg] = share_dist + + # normalize to exactly + row = row / row.sum() + M.loc[label] = row + return M + + + def _months_since(self,start_date,end_date=None): + if pd.isna(start_date): + return None + if pd.isna(end_date): + end_date = self.sim.date + + months = (end_date.year - start_date.year) * 12 + (end_date.month - start_date.month) + return max(0, int(months)) + + def _get_clearance_rr(self, person_id): + df = self.sim.population.props + p = self.parameters + + if 'hv_inf' not in df.columns: + return 1.0 + + hv_inf = df.at[person_id, 'hv_inf'] + + if pd.isna(hv_inf) or (hv_inf is False): + return 1.0 + + if 'hv_art' not in df.columns: + return 1.0 + + hv_art = df.at[person_id, 'hv_art'] + + if hv_art == 'not': + return p['rr_clear_hiv_no_art'] + elif hv_art == 'on_not_VL_suppressed': + return p ['rr_clear_hiv_art_unsuppressed'] + else: + return 1.0 + + def _get_clearance_probability(self, group, person_id, duration_months, interval_months = 6): + p = self.parameters + + median = p[f'median_clear_{group}'] + shape = p['clear_shape'] + + # median = scale * (ln 2)^(1/shape) + scale = median / (math.log(2) ** (1.0 / shape)) + + t1 = max(0.0, float(duration_months)) + t0 = max(0.0, t1 - float(interval_months)) + + # Weibull baseline cumulative hazard increment over [t0, t1] + H0_t0 = (t0 / scale) ** shape + H0_t1 = (t1 / scale) ** shape + delta_H0 = max(0.0, H0_t1 - H0_t0) + + rr = self._get_clearance_rr(person_id) + + # p = 1 - exp(- rr * delta_H0) + p_clear = 1.0 - math.exp(-rr * delta_H0) + + return min(max(p_clear, 0.0), 1.0) + + def _get_hpv_group_set(self, person_id): + df = self.sim.population.props + current_groups = set() + + for group in self.HPV_GROUPS: + if df.at[person_id, f'hp_infected_{group}']: + current_groups.add(group) + + return current_groups + + def _set_hpv_group_set(self,person_id, hpv_set): + df = self.sim.population.props + + for group in self.HPV_GROUPS: + df.at[person_id, f'hp_infected_{group}'] = (group in hpv_set) + + df.at[person_id, 'hp_is_infected'] = (len(hpv_set) > 0) + + def _add_new_infection_groups(self, person_id, new_groups): + if len(new_groups) == 0: + return + + df = self.sim.population.props + was_infected_before = df.at[person_id, 'hp_is_infected'] + current_groups = self._get_hpv_group_set(person_id) + updated_groups = current_groups.union(new_groups) + self._set_hpv_group_set(person_id, updated_groups) + + # set infection date for new groups + for group in new_groups: + df.at[person_id, f'hp_date_infected_{group}'] = self.sim.date + df.at[person_id, f'hp_ever_infected_{group}'] = True + df.at[person_id,f'hp_duration_{group}'] = 0 + + # start a new HPV infection process only if the person was uninfected/ self-clear + if not was_infected_before: + df.at[person_id, 'hp_date_first_infected'] = self.sim.date + df.at[person_id, 'hp_duration_all_clear'] = -1 + + # set first infection date + if pd.isna(df.at[person_id, 'hp_date_first_infected']): + df.at[person_id, 'hp_date_first_infected'] = self.sim.date + + + def _clear_single_group(self, person_id, group): + """clear a single HPV group for a person""" + df = self.sim.population.props + + df.at[person_id, f'hp_infected_{group}'] = False + df.at[person_id, f'hp_date_infected_{group}'] = pd.NaT + # df.at[person_id, f'hp_date_clear_{group}'] = pd.NaT + df.at[person_id, f'hp_duration_{group}'] = -1 + df.at[person_id, f'hp_persistent_{group}'] = False + + still_infected = any( + df.at[person_id, f'hp_infected_{group}'] for group in self.HPV_GROUPS + ) + df.at[person_id, 'hp_is_infected'] = still_infected + + if not still_infected: + start_date = df.at[person_id, 'hp_date_first_infected'] + if not pd.isna(start_date): + overall_duration = (self.sim.date.year - start_date.year) * 12 + (self.sim.date.month - start_date.month) + df.at[person_id, 'hp_duration_all_clear'] = overall_duration + + df.at[person_id, 'hp_date_first_infected'] = pd.NaT + + # def _sample_clear_duration(self,group): + # """Sample a infection duration for one HPV group using Weibull distribution""" + # p = self.parameters + # + # median = p[f'median_clear_{group}'] + # shape = p['clear_shape'] + # + # # WeibullMedian= Scale * (ln 2)^(1/shape) + # scale = median / (math.log(2) ** (1.0 / shape)) + # + # u = self.rng.random() + # duration = scale * ((-math.log(1.0 - u)) ** (1.0 / shape)) + # + # duration = max(1,int(round(duration))) + # duration = min(duration, 48) + # return duration + + def _update_persistence_status(self, threshold_months=12): + df = self.sim.population.props + eligible = df.is_alive & (df.age_years >= 15) + + for group in self.HPV_GROUPS: + inf_col = f'hp_infected_{group}' + date_col = f'hp_date_infected_{group}' + dur_col = f'hp_duration_{group}' + pers_col = f'hp_persistent_{group}' + + non_infected = eligible & ~df[inf_col].fillna(False) + df.loc[non_infected, dur_col] = -1 + df.loc[non_infected, pers_col] = False + + infected = eligible & df[inf_col].fillna(False) + + for person_id in df.index[infected]: + date_inf = df.at[person_id, date_col] + + if pd.isna(date_inf): + df.at[person_id, dur_col] = -1 + df.at[person_id, pers_col] = False + continue + + duration = self._months_since(date_inf, self.sim.date) + + df.at[person_id, dur_col] = duration + df.at[person_id, pers_col] = duration >= threshold_months + + def initialise_simulation(self, sim): + """Get ready for simulation start. + + This method is called just before the main simulation loop begins, and after all + modules have read their parameters and the initial population has been created. + It is a good place to add initial events to the event queue. + """ + p = self.parameters + self.lm = {} + self.age_mixing_matrix = self._build_age_mixing_matrix( + within=0.80, + adjacent=0.15, + distant=0.05 + ) + self._pre_logged_prev = {} + + for group in self.HPV_GROUPS: + self.lm[group] = LinearModel( + LinearModelType.MULTIPLICATIVE, + 1.0, + + Predictor('va_hpv') + .when(1, p[f'rr_{group}_vaccinated']) + .when(2, p[f'rr_{group}_vaccinated']), + + # Predictor(f'hp_ever_infected_{group}') + # .when(True, p[f'rr_immunity_{group}']) + + Predictor('age_years', conditions_are_mutually_exclusive=True) + .when('<15', 0.0) + .when('>=50', p['rr_hpv_age50plus']), + + Predictor() + .when('hv_inf & (hv_art =="not")', + p['rr_hpv_hiv_no_art'] + ) + .when( + 'hv_inf & (hv_art == "on_not_VL_suppressed")', + p['rr_hpv_hiv_art_unsuppressed'] + ), + ) + + # add the basic event + event = HpvInfectionEvent(self) + sim.schedule_event(event, sim.date + DateOffset(months=6)) + + # add an event to log to screen + sim.schedule_event(HpvLoggingEvent(self), sim.date + DateOffset(months=6, days=1)) + + def on_birth(self, mother_id, child_id): + """Initialise our properties for a newborn individual. + + This is called by the simulation whenever a new person is born. + + :param mother_id: the ID for the mother for this child + :param child_id: the ID for the new child + """ + + df = self.sim.population.props # shortcut to the population props dataframe + df.at[child_id, 'hp_is_infected'] = False + df.at[child_id, 'hp_infected_hr1'] = False + df.at[child_id, 'hp_infected_hr2'] = False + df.at[child_id, 'hp_infected_hr3'] = False + df.at[child_id, 'hp_date_infected_hr1'] = pd.NaT + df.at[child_id, 'hp_date_infected_hr2'] = pd.NaT + df.at[child_id, 'hp_date_infected_hr3'] = pd.NaT + df.at[child_id, 'hp_date_first_infected'] = pd.NaT + df.at[child_id, 'hp_duration_hr1'] = -1 + df.at[child_id, 'hp_duration_hr2'] = -1 + df.at[child_id, 'hp_duration_hr3'] = -1 + df.at[child_id, 'hp_duration_all_clear'] = -1 + # df.at[child_id, 'hp_date_clear_hr1'] = pd.NaT + # df.at[child_id, 'hp_date_clear_hr2'] = pd.NaT + # df.at[child_id, 'hp_date_clear_hr3'] = pd.NaT + df.at[child_id, 'hp_ever_infected_hr1'] = False + df.at[child_id, 'hp_ever_infected_hr2'] = False + df.at[child_id, 'hp_ever_infected_hr3'] = False + df.at[child_id, 'hp_persistent_hr1'] = False + df.at[child_id, 'hp_persistent_hr2'] = False + df.at[child_id, 'hp_persistent_hr3'] = False + + def report_daly_values(self): + # This must send back a pd.Series or pd.DataFrame that reports on the average daly-weights that have been + # experienced by persons in the previous month. Only rows for alive-persons must be returned. + # The names of the series of columns is taken to be the label of the cause of this disability. + # It will be recorded by the healthburden module as _. + + logger.debug(key="debug", data="This is hpv reporting my health values") + df = self.sim.population.props # shortcut to population properties dataframe + health_values = pd.Series(index=df.index[df.is_alive], data=0.0) + return health_values # returns the series + + def report_summary_stats(self): + df = self.sim.population.props + summary = { + 'infected_any': get_counts_by_sex_and_age_group(df, 'hp_is_infected')} + + for group in self.HPV_GROUPS: + summary[f'infected_{group}'] = get_counts_by_sex_and_age_group(df, f'hp_infected_{group}') + summary[f'persistent_{group}'] = get_counts_by_sex_and_age_group(df, f'hp_persistent_{group}') + + return summary + +class HpvInfectionEvent(RegularEvent, PopulationScopeEventMixin): + """This event is occurring regularly at one 6 months intervals and controls the infection process of HPV.""" + + def __init__(self, module): + super().__init__(module, frequency=DateOffset(months=6)) + assert isinstance(module, HPV) + + def apply(self, population): + logger.debug(key='debug', data='This is HpvInfectionEvent, tracking the disease progression of the population.') + df = population.props + module = self.module + now = self.sim.date + + # 1. define eligible population + eligible = df.index[df.is_alive & (df.age_years >= 15)] + if len(eligible) == 0: + return + + # 2. self-clearance + infected_idx = df.index[df.is_alive & df.hp_is_infected & (df.age_years >= 15)] + for person_id in infected_idx: + current_groups = module._get_hpv_group_set(person_id) + + for group in list(current_groups): + date_inf = df.at[person_id, f'hp_date_infected_{group}'] + if pd.isna(date_inf): + continue + + duration_months = module._months_since (date_inf, now) + if duration_months is None: + continue + + df.at[person_id, f'hp_duration_{group}'] = duration_months + + # Clear rate in 6 months + interval_months = 6 + p_clear = module._get_clearance_probability( + group=group, + person_id=person_id, + duration_months=duration_months, + interval_months=interval_months + ) + + if module.rng.random() < p_clear: + module._clear_single_group(person_id, group) + + + # 3. recalculate prevalence by HPV group after clearance + df_alive = df.loc[df.is_alive & (df.age_years >= 15)].copy() + df_alive['age_group'] = module._get_age_group_series(df_alive['age_years']) + + male_df = df_alive.loc[df_alive.sex == 'M'] + female_df = df_alive.loc[df_alive.sex == 'F'] + + prev_by_age_male = {} + prev_by_age_female = {} + + for group in module.HPV_GROUPS: + prev_by_age_male[group] = ( + male_df.groupby('age_group', observed=True)[f'hp_infected_{group}'] + .mean() + .reindex(module.AGE_LABELS, fill_value=0.0) + ) + prev_by_age_female[group] = ( + female_df.groupby('age_group', observed=True)[f'hp_infected_{group}'] + .mean() + .reindex(module.AGE_LABELS, fill_value=0.0) + ) + + # male_group_inf = df.loc[male_idx, f'hp_infected_{group}'].sum() + # female_group_inf = df.loc[female_idx, f'hp_infected_{group}'].sum() + # + # prev_male[group] = male_group_inf/len(male_idx) if len(male_idx) > 0 else 0 + # prev_female[group] = female_group_inf / len(female_idx) if len(female_idx) > 0 else 0 + + # 4. new infection + interval_years = 0.5 + + for person_id in eligible: + sex = df.at[person_id,'sex'] + current_groups = module._get_hpv_group_set(person_id) + new_group = set() + + if sex == 'F': + source_prev_by_age = prev_by_age_male + elif sex == 'M': + source_prev_by_age = prev_by_age_female + else: + continue + + my_age_group = module._get_age_group(df.at[person_id,'age_years']) + if my_age_group is None: + continue + + mix_row = module.age_mixing_matrix.loc[my_age_group] + + for group in module.HPV_GROUPS: + if group in current_groups: + continue + + weighted_prev = float((mix_row * source_prev_by_age[group]).sum()) + + beta_name = f'b_hpv_{group}' + beta = module.parameters[beta_name] if beta_name in module.parameters else module.parameters['b_hpv'] + + modifier = module.lm[group].predict(df.loc[[person_id]]).iloc[0] + + lambda_inf = beta * weighted_prev * modifier + lambda_inf = max(lambda_inf, 0.0) + + p_inf = 1.0 - math.exp(-lambda_inf * interval_years) + p_inf = min(max(p_inf, 0.0), 1.0) + + if module.rng.random() < p_inf: + new_group.add(group) + + if len(new_group) > 0: + module._add_new_infection_groups(person_id, new_group) + + module._update_persistence_status(threshold_months=12) + +class HpvLoggingEvent(RegularEvent, PopulationScopeEventMixin): + def __init__(self, module): + """Produce a summmary of the numbers of people with respect to their 'hpv status' + """ + # run this event every 6/12 month + self.repeat = 6 + super().__init__(module, frequency=DateOffset(months=self.repeat)) + assert isinstance(module, HPV) + + def apply(self, population): + # get some summary statistics + df = population.props + module = self.module + + eligible = df.index[df.is_alive & (df.age_years >= 15)] + log_data = { + 'Year':self.sim.date.year, + 'Month':self.sim.date.month, + 'EligibleN':int(len(eligible)), + } + + if len(eligible) == 0: + logger.info(key='summary', data=log_data) + return + + sub = df.loc[eligible].copy() + + sub['age_group'] = module._get_age_group_series(sub['age_years']) + sub['hiv_group'] = 'HIVneg' + + if 'hv_inf' in sub.columns: + sub.loc[sub['hv_inf'].fillna(False), 'hiv_group'] = 'HIVpos_unknown' + + if 'hv_art' in sub.columns: + sub.loc[sub['hv_inf'].fillna(False) & (sub['hv_art'] == 'not'), 'hiv_group'] = 'HIVpos_noART' + sub.loc[sub['hv_inf'].fillna(False) & ( + sub['hv_art'] == 'on_not_VL_suppressed'), 'hiv_group'] = 'HIVpos_unsupp' + sub.loc[ + sub['hv_inf'].fillna(False) & (sub['hv_art'] == 'on_VL_suppressed'), 'hiv_group'] = 'HIVpos_supp' + + # 1. Overall summary + total_inf = int(sub['hp_is_infected'].sum()) + log_data['TotalInf'] = total_inf + log_data['TotalPrev'] = sub['hp_is_infected'].mean() + + for sex_name, sex_df in [('M', sub.loc[sub.sex == 'M']), + ('F', sub.loc[sub.sex == 'F'])]: + n = len(sex_df) + log_data[f'{sex_name}_N'] = int(n) + log_data[f'{sex_name}_Inf'] = int(sex_df['hp_is_infected'].sum()) if n > 0 else 0 + log_data[f'{sex_name}_Prev'] = sex_df['hp_is_infected'].mean() if n > 0 else math.nan + + # 2. Prevalence by sex and age group + prev_snapshot = {} + + for sex_name, sex_df in [('All', sub), + ('M', sub.loc[sub.sex == 'M']), + ('F', sub.loc[sub.sex == 'F'])]: + + for age_group in module.AGE_LABELS: + age_df = sex_df.loc[sex_df['age_group'] == age_group] + n = len(age_df) + + log_data[f'Any_{sex_name}_{age_group}_N'] = int(n) + + if n == 0: + log_data[f'Any_{sex_name}_{age_group}_Inf'] = 0 + log_data[f'Any_{sex_name}_{age_group}_Prev'] = math.nan + for hpv_group in module.HPV_GROUPS: + log_data[f'{hpv_group}_{sex_name}_{age_group}_Inf'] = 0 + log_data[f'{hpv_group}_{sex_name}_{age_group}_Prev'] = math.nan + continue + + any_inf = int(age_df['hp_is_infected'].sum()) + any_prev = age_df['hp_is_infected'].mean() + + log_data[f'Any_{sex_name}_{age_group}_Inf'] = any_inf + log_data[f'Any_{sex_name}_{age_group}_Prev'] = any_prev + prev_snapshot[f'Any_{sex_name}_{age_group}_Prev'] = any_prev + + for hpv_group in module.HPV_GROUPS: + inf_n = int(age_df[f'hp_infected_{hpv_group}'].sum()) + prev = age_df[f'hp_infected_{hpv_group}'].mean() + + log_data[f'{hpv_group}_{sex_name}_{age_group}_Inf'] = inf_n + log_data[f'{hpv_group}_{sex_name}_{age_group}_Prev'] = prev + prev_snapshot[f'{hpv_group}_{sex_name}_{age_group}_Prev'] = prev + + # 3. HIV + for hiv_group, hiv_df in sub.groupby('hiv_group', observed=True): + n = len(hiv_df) + log_data[f'Any_{hiv_group}_N'] = int(n) + log_data[f'Any_{hiv_group}_Inf'] = int(hiv_df['hp_is_infected'].sum()) if n > 0 else 0 + log_data[f'Any_{hiv_group}_Prev'] = hiv_df['hp_is_infected'].mean() if n > 0 else math.nan + + for hpv_group in module.HPV_GROUPS: + log_data[f'{hpv_group}_{hiv_group}_Prev'] = ( + hiv_df[f'hp_infected_{hpv_group}'].mean() if n > 0 else math.nan + ) + + # 4. Delta + prev_logged = getattr(module, '_pre_logged_prev', {}) + for key, current_val in prev_snapshot.items(): + previous_val = prev_logged.get(key, math.nan) + if pd.isna(previous_val) or pd.isna(current_val): + log_data[f'{key}_Delta'] = math.nan + else: + log_data[f'{key}_Delta'] = current_val - previous_val + + module._pre_logged_prev = prev_snapshot + + # 5. multiplicity of infection + infection_people = df.index[df.is_alive & (df.age_years >= 15) & df.hp_is_infected] + n_group_1 = 0 + n_group_2 = 0 + n_group_3 = 0 + + male_n_group_1 = 0 + male_n_group_2 = 0 + male_n_group_3 = 0 + + female_n_group_1 = 0 + female_n_group_2 = 0 + female_n_group_3 = 0 + + for person_id in infection_people: + n_group = len(module._get_hpv_group_set(person_id)) + sex = df.at[person_id, 'sex'] + + if n_group == 1: + n_group_1 += 1 + if sex == 'M': + male_n_group_1 += 1 + elif sex =='F': + female_n_group_1 += 1 + + elif n_group == 2: + n_group_2 += 1 + if sex == 'M': + male_n_group_2 += 1 + elif sex =='F': + female_n_group_2 += 1 + + elif n_group == 3: + n_group_3 += 1 + if sex == 'M': + male_n_group_3 += 1 + elif sex =='F': + female_n_group_3 += 1 + + log_data['InfGroup1'] = n_group_1 + log_data['InfGroup2'] = n_group_2 + log_data['InfGroup3'] = n_group_3 + + log_data['MaleGroup1'] = male_n_group_1 + log_data['MaleGroup2'] = male_n_group_2 + log_data['MaleGroup3'] = male_n_group_3 + + log_data['FemaleGroup1'] = female_n_group_1 + log_data['FemaleGroup2'] = female_n_group_2 + log_data['FemaleGroup3'] = female_n_group_3 + + # 6. Persistent infection 统计 + for hpv_group in module.HPV_GROUPS: + pers_col = f'hp_persistent_{hpv_group}' + + if pers_col not in sub.columns: + continue + + persistent = sub[pers_col].fillna(False) + + log_data[f'{hpv_group}_Persistent12_N'] = int(persistent.sum()) + log_data[f'{hpv_group}_Persistent12_Prev'] = float(persistent.mean()) + + for sex_name, sex_df in [('M', sub.loc[sub.sex == 'M']), + ('F', sub.loc[sub.sex == 'F'])]: + n = len(sex_df) + if n > 0: + log_data[f'{hpv_group}_Persistent12_{sex_name}_Prev'] = float( + sex_df[pers_col].fillna(False).mean() + ) + else: + log_data[f'{hpv_group}_Persistent12_{sex_name}_Prev'] = math.nan + + for age_group in module.AGE_LABELS: + age_df = sub.loc[sub['age_group'] == age_group] + n = len(age_df) + if n > 0: + log_data[f'{hpv_group}_Persistent12_{age_group}_Prev'] = float( + age_df[pers_col].fillna(False).mean() + ) + else: + log_data[f'{hpv_group}_Persistent12_{age_group}_Prev'] = math.nan + + logger.info(key='summary', data=log_data) diff --git a/tests/test_docs_data/test_HPV.py b/tests/test_docs_data/test_HPV.py new file mode 100644 index 0000000000..f64b3e7eb7 --- /dev/null +++ b/tests/test_docs_data/test_HPV.py @@ -0,0 +1,170 @@ +import os +from pathlib import Path + +import pandas as pd +import pytest + +from tlo import Date, Simulation, logging +from tlo.methods import ( + demography, + enhanced_lifestyle, + epi, + healthburden, + healthseekingbehaviour, + healthsystem, + hpv, + hiv, + simplified_births, + symptommanager, +) + +try: + resourcefilepath = Path(os.path.dirname(__file__)) / "../resources" +except NameError: + resourcefilepath = "resources" + +def check_dtypes(simulation): + df = simulation.population.props + orig = simulation.population.new_row + assert (df.dtypes == orig.dtypes).all() + +log_config = { + "filename": "hpv_test", # The name of the output file (a timestamp will be appended). + "directory": "./outputs/", # The default output path is `./outputs`. Change it here, if necessary + "custom_levels": { # Customise the output of specific loggers. They are applied in order: + "*": logging.WARNING, # Asterisk matches all loggers - we set the default level to WARNING + "tlo.methods.hpv": logging.INFO, + "tlo.methods.healthsystem": logging.INFO, + "tlo.methods.demography": logging.INFO + } +} + +@pytest.fixture +def sim(seed): + start_date = Date(2010, 1, 1) + sim = Simulation(start_date=start_date, seed=seed, log_config=None, resourcefilepath=resourcefilepath) + + # Register the appropriate modules + sim.register( + demography.Demography(), + simplified_births.SimplifiedBirths(), + enhanced_lifestyle.Lifestyle(), + symptommanager.SymptomManager(), + healthseekingbehaviour.HealthSeekingBehaviour(), + healthburden.HealthBurden(), + healthsystem.HealthSystem( + disable=True, # disables the health system constraints so all HSI events run + ), + epi.Epi(), + hpv.HPV(), + hiv.Hiv + ) + + return sim + + +def test_single_person(sim): + """ + run sim for one person + assign infection + check symptoms scheduled + check symptoms resolved correctly + """ + # set high death rate - change all symptom probabilities to 1 + sim.modules['HPV'].parameters["symptom_prob"]["probability"] = 1 + + sim.make_initial_population(n=1) + df = sim.population.props + person_id = 0 + df.at[person_id, "hp_is_infected"] = True + + # HPV onset event + inf_event = hpv.HPV(person_id=person_id, module=sim.modules['HPV']) + inf_event.apply(person_id) + assert not pd.isnull(df.at[person_id, "me_date_measles"]) + + # check measles symptom resolve event and death scheduled + events_for_this_person = sim.find_events_for_person(person_id) + assert len(events_for_this_person) > 0 + next_event_date, next_event_obj = events_for_this_person[0] + assert isinstance(next_event_obj, (measles.MeaslesDeathEvent, measles.MeaslesSymptomResolveEvent)) + + +@pytest.mark.slow +def test_measles_cases_and_hsi_occurring(sim): + """ Run the measles module + check dtypes consistency + check infections occurring + check measles onset event scheduled + check symptoms assigned + check treatments occurring + """ + + end_date = Date(2011, 12, 31) + popsize = 1000 + + # set high transmission probability + sim.modules['Measles'].parameters['beta_baseline'] = 1.0 + + # set high death rate and change all symptom probabilities to 1 + cfr = sim.modules['Measles'].parameters["case_fatality_rate"] + sim.modules['Measles'].parameters["case_fatality_rate"] = {k: 1.0 for k, v in cfr.items()} + sim.modules['Measles'].parameters["symptom_prob"]["probability"] = 1 + + # Make the population + sim.make_initial_population(n=popsize) + + # check data types + check_dtypes(sim) + sim.simulate(end_date=end_date) + check_dtypes(sim) + + df = sim.population.props + + # check people getting measles + assert df['me_has_measles'].values.sum() > 0 # current cases of measles + + # check that everyone who is currently infected gets a measles onset or symptom resolve event + # they can have multiple symptom resolve events scheduled (by symptom onset and by treatment) + inf = df.loc[df.is_alive & df.me_has_measles].index.tolist() + + for idx in inf: + events_for_this_person = sim.find_events_for_person(idx) + assert len(events_for_this_person) > 0 + # assert measles event in event list for this person + assert "tlo.methods.measles" in str(events_for_this_person) + # find the first measles event + measles_event_date = [date for (date, event) in events_for_this_person if "tlo.methods.measles" in str(event)] + assert measles_event_date[0] >= df.loc[idx, "me_date_measles"] + + # check symptoms assigned + # there is an incubation period, so infected people may not have rash immediately + # if on treatment for measles, must have rash for diagnosis + has_rash = sim.modules['SymptomManager'].who_has('rash') + current_measles_tx = df.index[df.is_alive & df.me_has_measles & df.me_on_treatment] + if current_measles_tx.any(): + assert set(current_measles_tx) <= set(has_rash) + + # check if any measles deaths occurred + assert df.cause_of_death.loc[~df.is_alive].str.startswith('Measles').any() + + +@pytest.mark.slow +def test_measles_zero_death_rate(sim): + + end_date = Date(2010, 12, 31) + popsize = 10_000 + + # set zero death rate + cfr = sim.modules['Measles'].parameters["case_fatality_rate"] + sim.modules['Measles'].parameters["case_fatality_rate"] = {k: 0.0 for k, v in cfr.items()} + + sim.make_initial_population(n=popsize) + sim.simulate(end_date=end_date) + df = sim.population.props + + # no symptoms should equal no treatment (unless other rash has prompted incorrect tx: unlikely) + assert not (df.loc[df.is_alive, 'me_on_treatment']).all() + + # check that there have been no deaths caused by measles + assert not df.cause_of_death.loc[~df.is_alive].str.startswith('Measles').any()