From 6af9c1803aaf664e84e3c8eb3a03c498eb4f99e7 Mon Sep 17 00:00:00 2001
From: Liuxinac <liuxinac@hotmail.com>
Date: Mon, 2 Mar 2026 13:27:27 +0000
Subject: [PATCH 01/10] This is the first draft of HPV model

---
 .../parameter_values.csv                      |   2 +-
 src/scripts/HPV_Analyses/Analysis_HPV.py      |  93 ++++++++
 ...ario_impact_of_consumables_availability.py |  30 +++
 .../fullmodel_Xin/fullmodel_test_2DEC.py      |  62 +++++
 src/scripts/hiv_tb_Xin/hiv_tb_test_2DEC.py    |  88 +++++++
 src/tlo/methods/hpv.py                        | 216 ++++++++++++++++++
 6 files changed, 490 insertions(+), 1 deletion(-)
 create mode 100644 src/scripts/HPV_Analyses/Analysis_HPV.py
 create mode 100644 src/scripts/analysis_example_Xin/scenario_impact_of_consumables_availability.py
 create mode 100644 src/scripts/fullmodel_Xin/fullmodel_test_2DEC.py
 create mode 100644 src/scripts/hiv_tb_Xin/hiv_tb_test_2DEC.py
 create mode 100644 src/tlo/methods/hpv.py

diff --git a/resources/ResourceFile_Cervical_Cancer/parameter_values.csv b/resources/ResourceFile_Cervical_Cancer/parameter_values.csv
index a277ea3972..0bd9e1e15b 100644
--- a/resources/ResourceFile_Cervical_Cancer/parameter_values.csv
+++ b/resources/ResourceFile_Cervical_Cancer/parameter_values.csv
@@ -52,4 +52,4 @@ months_to_next_appt_treated_less_than_12_mo_ago,3,local,1,36,vague prior,Missing
 months_to_next_appt_treated_12_to_36_mo_ago,6,local,1,36,vague prior,Missing
 months_to_next_appt_treated_36_to_60_mo_ago,12,local,1,36,vague prior,Missing
 palliative_care_interval_months,1,local,0.5,12,vague prior,Missing
-main_polling_frequency_months,1,undetermined,1,12,design decision,N/A
\ No newline at end of file
+main_polling_frequency_months,1,undetermined,1,12,design decision,N/A
diff --git a/src/scripts/HPV_Analyses/Analysis_HPV.py b/src/scripts/HPV_Analyses/Analysis_HPV.py
new file mode 100644
index 0000000000..33bddf1436
--- /dev/null
+++ b/src/scripts/HPV_Analyses/Analysis_HPV.py
@@ -0,0 +1,93 @@
+"""
+Run the HPV modules
+ """
+
+import datetime
+import pickle
+import random
+from pathlib import Path
+
+from tlo import Date, Simulation, logging
+from tlo.analysis.utils import parse_log_file
+from tlo.methods import (
+    demography,
+    enhanced_lifestyle,
+    epi,
+    healthburden,
+    healthseekingbehaviour,
+    healthsystem,
+    simplified_births,
+    symptommanager,
+    hpv,
+)
+
+# Where will outputs go
+outputpath = Path("./outputs")  # folder for convenience of storing outputs
+
+# date-stamp to label log files and any other outputs
+datestamp = datetime.date.today().strftime("__%Y_%m_%d")
+
+# The resource files
+resourcefilepath = './resources'
+
+# %% Run the simulation
+start_date = Date(2010, 1, 1)
+end_date = Date(2012, 1, 1)
+popsize = 2000
+
+# scenario = 1
+
+# set up the log config
+log_config = {
+    "filename": "test_runs",
+    "directory": outputpath,
+    "custom_levels": {
+        "*": logging.WARNING,
+        "tlo.methods.hpv": logging.INFO,
+    },
+}
+
+# Register the appropriate modules
+# need to call epi before tb to get bcg vax
+seed = random.randint(0, 50000)
+# seed = 41728  # set seed for reproducibility
+sim = Simulation(start_date=start_date, seed=seed, log_config=log_config,
+                 show_progress_bar=True, resourcefilepath=resourcefilepath)
+sim.register(
+    demography.Demography(),
+    simplified_births.SimplifiedBirths(),
+    enhanced_lifestyle.Lifestyle(),
+    healthsystem.HealthSystem(service_availability=["*"],  # all treatment allowed
+        mode_appt_constraints=1,  # mode of constraints to do with officer numbers and time
+        cons_availability="default",  # mode for consumable constraints (if ignored, all consumables available)
+        ignore_priority=False,  # do not use the priority information in HSI event to schedule
+        capabilities_coefficient=1.0,  # multiplier for the capabilities of health officers
+        use_funded_or_actual_staffing="actual",  # actual: use numbers/distribution of staff available currently
+        disable=False,  # disables the healthsystem (no constraints and no logging) and every HSI runs
+        disable_and_reject_all=False,  # disable healthsystem and no HSI runs
+    ),
+    symptommanager.SymptomManager(),
+    healthseekingbehaviour.HealthSeekingBehaviour(),
+    healthburden.HealthBurden(),
+    epi.Epi(),
+    hpv.HPV(),
+)
+
+# set the scenario
+sim.modules["Hpv"].parameters["r_hpv"] = 0.4
+
+# Run the simulation and flush the logger
+sim.make_initial_population(n=popsize)
+sim.simulate(end_date=end_date)
+
+# parse the results
+output = parse_log_file(sim.log_filepath)
+
+# save the results, argument 'wb' means write using binary mode. use 'rb' for reading file
+with open(outputpath / "default_run.pickle", "wb") as f:
+    # Pickle the 'data' dictionary using the highest protocol available.
+    pickle.dump(dict(output), f, pickle.HIGHEST_PROTOCOL)
+
+# load the results
+with open(outputpath / "default_run.pickle", "rb") as f:
+    output = pickle.load(f)
diff --git a/src/scripts/analysis_example_Xin/scenario_impact_of_consumables_availability.py b/src/scripts/analysis_example_Xin/scenario_impact_of_consumables_availability.py
new file mode 100644
index 0000000000..0d8f0e9a41
--- /dev/null
+++ b/src/scripts/analysis_example_Xin/scenario_impact_of_consumables_availability.py
@@ -0,0 +1,30 @@
+from tlo import Date, logging
+from tlo.methods.fullmodel import fullmodel
+from tlo.scenario import BaseScenario
+
+class ImpactOfConsumablesAvailability(BaseScenario):
+    def __init__(self):
+        super().__init__(
+            seed=0,
+            start_date=Date(2010, 1, 1),
+            end_date=Date(2015, 1, 1),
+            initial_population_size=10_000,
+            number_of_draws= 2,
+            runs_per_draw=2,
+        )
+    def log_configuration(self):
+        return {
+            'filename': 'impact_of_consumables_availability',
+            'directory': './outputs',
+            'custom_levels': {
+                '*': logging.WARNING,
+                'tlo.methods.demography': logging.INFO,
+                'tlo.methods.healthburden': logging.INFO,
+            }
+        }
+    def modules(self):
+        return fullmodel()
+    def draw_parameters(self, draw_number, rng):
+        return {
+            'HealthSystem': {'cons_availability': ['default', 'all'][draw_number]}
+        }
diff --git a/src/scripts/fullmodel_Xin/fullmodel_test_2DEC.py b/src/scripts/fullmodel_Xin/fullmodel_test_2DEC.py
new file mode 100644
index 0000000000..4fedb2c4f0
--- /dev/null
+++ b/src/scripts/fullmodel_Xin/fullmodel_test_2DEC.py
@@ -0,0 +1,62 @@
+"""
+Run the full model with intervention coverage specified at national level
+ """
+import datetime
+import pickle
+from pathlib import Path
+from tlo import Date, Simulation, logging
+from tlo.analysis.utils import parse_log_file
+from tlo.methods.fullmodel import fullmodel
+# Where will outputs go
+outputpath = Path("./outputs")  # folder for convenience of storing outputs
+# date-stamp to label log files and any other outputs
+datestamp = datetime.date.today().strftime("__%Y_%m_%d")
+# The resource files
+resourcefilepath = Path("./resources")
+# %% Run the simulation
+start_date = Date(2010, 1, 1)
+end_date = Date(2012, 1, 1)
+popsize = 500
+# set up the log config
+# add deviance measure logger if needed
+log_config = {
+    "filename": "hiv_test_runs",
+    "directory": outputpath,
+    "custom_levels": {
+        "*": logging.WARNING,
+        "tlo.methods.hiv": logging.INFO,
+        "tlo.methods.tb": logging.INFO,
+        "tlo.methods.demography": logging.INFO,
+        # "tlo.methods.healthsystem.summary": logging.INFO,
+        "tlo.methods.labour.detail": logging.WARNING,  # this logger keeps outputting even when set to warning
+    },
+}
+# Register the appropriate modules
+# need to call epi before tb to get bcg vax
+# seed = random.randint(0, 50000)
+seed = 32  # set seed for reproducibility
+sim = Simulation(start_date=start_date, seed=seed, log_config=log_config,
+                 show_progress_bar=True, resourcefilepath=resourcefilepath)
+sim.register(*fullmodel(use_simplified_births=False,
+    module_kwargs={
+        "SymptomManager": {"spurious_symptoms": True},
+        "HealthSystem": {"disable": False,
+                         "service_availability": ["*"],
+                         "mode_appt_constraints": 1,
+                         "cons_availability": "default",
+                         "beds_availability": "all",
+                         "ignore_priority": False,
+                         "use_funded_or_actual_staffing": "actual"},
+    },
+))
+# Run the simulation and flush the logger
+sim.make_initial_population(n=popsize)
+sim.simulate(end_date=end_date)
+# parse the results
+output = parse_log_file(sim.log_filepath)
+# save the results, argument 'wb' means write using binary mode. use 'rb' for reading file
+with open(outputpath / "default_run.pickle", "wb") as f:
+    # Pickle the 'data' dictionary using the highest protocol available.
+    pickle.dump(dict(output), f, pickle.HIGHEST_PROTOCOL)
+with open(outputpath / "default_run.pickle", "rb") as f:
+    output = pickle.load(f)
diff --git a/src/scripts/hiv_tb_Xin/hiv_tb_test_2DEC.py b/src/scripts/hiv_tb_Xin/hiv_tb_test_2DEC.py
new file mode 100644
index 0000000000..1c215b6e75
--- /dev/null
+++ b/src/scripts/hiv_tb_Xin/hiv_tb_test_2DEC.py
@@ -0,0 +1,88 @@
+"""
+Run the HIV/TB modules with intervention coverage specified at national level
+save outputs for plotting (file: output_plots_tb.py)
+ """
+import datetime
+import pickle
+import random
+from pathlib import Path
+from tlo import Date, Simulation, logging
+from tlo.analysis.utils import parse_log_file
+from tlo.methods import (  # deviance_measure,
+    demography,
+    enhanced_lifestyle,
+    epi,
+    healthburden,
+    healthseekingbehaviour,
+    healthsystem,
+    hiv,
+    simplified_births,
+    symptommanager,
+    tb,
+)
+# Where will outputs go
+outputpath = Path("./outputs")  # folder for convenience of storing outputs
+# date-stamp to label log files and any other outputs
+datestamp = datetime.date.today().strftime("__%Y_%m_%d")
+# The resource files
+resourcefilepath = './resources'
+# %% Run the simulation
+start_date = Date(2010, 1, 1)
+end_date = Date(2030, 1, 1)
+popsize = 5000
+# scenario = 1
+# set up the log config
+log_config = {
+    "filename": "test_runs",
+    "directory": outputpath,
+    "custom_levels": {
+        "*": logging.WARNING,
+        # "tlo.methods.deviance_measure": logging.INFO,
+        # "tlo.methods.epi": logging.INFO,
+        "tlo.methods.hiv": logging.INFO,
+        "tlo.methods.tb": logging.INFO,
+        "tlo.methods.demography": logging.INFO,
+        # "tlo.methods.demography.detail": logging.WARNING,
+        "tlo.methods.healthsystem.summary": logging.INFO,
+        # "tlo.methods.healthsystem": logging.INFO,
+        # "tlo.methods.healthburden": logging.INFO,
+    },
+}
+# Register the appropriate modules
+seed = random.randint(0, 50000)
+# seed = 41728  # set seed for reproducibility
+sim = Simulation(start_date=start_date, seed=seed, log_config=log_config,
+                 show_progress_bar=True, resourcefilepath=resourcefilepath)
+sim.register(
+    demography.Demography(),
+    simplified_births.SimplifiedBirths(),
+    enhanced_lifestyle.Lifestyle(),
+    healthsystem.HealthSystem(service_availability=["*"],  # all treatment allowed
+        mode_appt_constraints=1,  # mode of constraints to do with officer numbers and time
+        cons_availability="default",  # mode for consumable constraints (if ignored, all consumables available)
+        ignore_priority=False,  # do not use the priority information in HSI event to schedule
+        capabilities_coefficient=1.0,  # multiplier for the capabilities of health officers
+        use_funded_or_actual_staffing="actual",  # actual: use numbers/distribution of staff available currently
+        disable=False,  # disables the healthsystem (no constraints and no logging) and every HSI runs
+        disable_and_reject_all=False,  # disable healthsystem and no HSI runs
+    ),
+    symptommanager.SymptomManager(),
+    healthseekingbehaviour.HealthSeekingBehaviour(),
+    healthburden.HealthBurden(),
+    epi.Epi(),
+    hiv.Hiv(run_with_checks=False),
+    tb.Tb(),
+    # deviance_measure.Deviance(resourcefilepath=resourcefilepath),
+)
+# Run the simulation and flush the logger
+sim.make_initial_population(n=popsize)
+sim.simulate(end_date=end_date)
+# parse the results
+output = parse_log_file(sim.log_filepath)
+# save the results, argument 'wb' means write using binary mode. use 'rb' for reading file
+with open(outputpath / "default_run.pickle", "wb") as f:
+    # Pickle the 'data' dictionary using the highest protocol available.
+    pickle.dump(dict(output), f, pickle.HIGHEST_PROTOCOL)
+# load the results
+with open(outputpath / "default_run.pickle", "rb") as f:
+    output = pickle.load(f)
diff --git a/src/tlo/methods/hpv.py b/src/tlo/methods/hpv.py
new file mode 100644
index 0000000000..a6d80ff0c5
--- /dev/null
+++ b/src/tlo/methods/hpv.py
@@ -0,0 +1,216 @@
+from __future__ import annotations
+
+from pathlib import Path
+from typing import TYPE_CHECKING, List, Optional
+
+import pandas as pd
+
+from tlo import DAYS_IN_YEAR, DateOffset, Module, Parameter, Property, Types, logging
+from tlo.analysis.utils import get_counts_by_sex_and_age_group
+from tlo.events import Event, IndividualScopeEventMixin, PopulationScopeEventMixin, RegularEvent
+from tlo.methods import Metadata
+from tlo.methods.causes import Cause
+from tlo.methods.demography import InstantaneousDeath
+from tlo.methods.hsi_event import HSI_Event
+from tlo.methods.hsi_generic_first_appts import GenericFirstAppointmentsMixin
+from tlo.methods.symptommanager import Symptom
+from tlo.util import random_date, read_csv_files
+
+if TYPE_CHECKING:
+    from tlo.methods.hsi_generic_first_appts import HSIEventScheduler
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+
+
+class HPV(Module, GenericFirstAppointmentsMixin):
+    """This is a HPV infectious Process.
+
+    It demonstrates the following behaviours in respect of the healthsystem module:
+
+        - Registration of the disease module with healthsystem
+        - Reading DALY weights and reporting daly values related to this disease
+        - Health care seeking
+        - Usual HSI behaviour
+        - Restrictive requirements on the facility_level for the HSI_event
+        - Use of the SymptomManager
+    """
+
+    INIT_DEPENDENCIES = {'Demography', 'SymptomManager'}
+
+    OPTIONAL_INIT_DEPENDENCIES = {'HealthBurden'}
+
+    # Declare Metadata
+    METADATA = {
+        Metadata.DISEASE_MODULE,
+        Metadata.USES_SYMPTOMMANAGER,
+        Metadata.USES_HEALTHSYSTEM,
+        Metadata.USES_HEALTHBURDEN,
+        Metadata.REPORTS_DISEASE_NUMBERS
+    }
+
+    # Declare Causes of Death
+    CAUSES_OF_DEATH = {}
+
+    # Declare Causes of Disability
+    CAUSES_OF_DISABILITY = {}
+
+    PARAMETERS = {
+        "r_hpv": Parameter(
+            Types.REAL,
+            "probability per month of oncogenic hpv infection",
+        ),
+        "Init_prevalence": Parameter(
+            Types.REAL,
+            "Initial prevalence of oncogenic hpv infection",
+        ),
+    }
+
+    PROPERTIES = {
+        'hp_is_infected': Property(
+            Types.BOOL, 'Is infected with oncogenic hpv infection'),
+    }
+
+    def __init__(self, name=None):
+        # NB. Parameters passed to the module can be inserted in the __init__ definition.
+
+        super().__init__(name)
+
+    def read_parameters(self, resourcefilepath: Optional[Path] = None):
+        """Read in parameters and do the registration of this module and its symptoms"""
+        self.load_parameters_from_dataframe(
+            read_csv_files(Path(resourcefilepath) / "ResourceFile_HPV",
+                           files="parameter_values")
+        )
+
+    def initialise_population(self, population):
+        """Set our property values for the initial population.
+
+        This method is called by the simulation when creating the initial population, and is
+        responsible for assigning initial values, for every individual, of those properties
+        'owned' by this module, i.e. those declared in the PROPERTIES dictionary above.
+
+        :param population: the population of individuals
+        """
+
+        df = population.props  # a shortcut to the dataframe storing data for individiuals
+
+        # Set default for properties
+        df.loc[df.is_alive, 'hp_is_infected'] = False  # default: no individuals infected
+
+        Susceptible = df.loc[df.is_alive & (df.age_years >= 15)].index
+
+
+        # randomly selected some individuals as infected
+        initial_infected = self.parameters['initial_prevalence']
+        index_of_infected = self.rng.choice(Susceptible, size=initial_infected*Susceptible, replace=False)
+        df.loc[index_of_infected, 'hp_is_infected'] = True
+
+    def initialise_simulation(self, sim):
+
+        """Get ready for simulation start.
+
+        This method is called just before the main simulation loop begins, and after all
+        modules have read their parameters and the initial population has been created.
+        It is a good place to add initial events to the event queue.
+        """
+
+        # add the basic event
+        event = HpvInfectionEvent(self)
+        sim.schedule_event(event, sim.date + DateOffset(months=6))
+
+        # add an event to log to screen
+        sim.schedule_event(HpvLoggingEvent(self), sim.date + DateOffset(months=12))
+
+    def on_birth(self, mother_id, child_id):
+        """Initialise our properties for a newborn individual.
+
+        This is called by the simulation whenever a new person is born.
+
+        :param mother_id: the ID for the mother for this child
+        :param child_id: the ID for the new child
+        """
+
+        df = self.sim.population.props  # shortcut to the population props dataframe
+        df.at[child_id, 'hp_is_infected'] = False
+
+    def report_daly_values(self):
+        # This must send back a pd.Series or pd.DataFrame that reports on the average daly-weights that have been
+        # experienced by persons in the previous month. Only rows for alive-persons must be returned.
+        # The names of the series of columns is taken to be the label of the cause of this disability.
+        # It will be recorded by the healthburden module as <ModuleName>_<Cause>.
+
+        logger.debug(key="debug", data="This is hpv reporting my health values")
+
+        df = self.sim.population.props  # shortcut to population properties dataframe
+
+        health_values = pd.Series(index=df.index[df.is_alive], data=0)
+        return health_values  # returns the series
+
+    def report_summary_stats(self):
+        df = self.sim.population.props
+        prevalence_by_age_group_sex = get_counts_by_sex_and_age_group(df, 'hp_is_infected')
+        return {'infected': prevalence_by_age_group_sex}
+
+class HpvInfectionEvent(RegularEvent, PopulationScopeEventMixin):
+    """
+    This event is occurring regularly at one 6 months intervals and controls the infection process of HPV.
+    """
+
+    def __init__(self, module):
+        super().__init__(module, frequency=DateOffset(months=6))
+        assert isinstance(module, HPV)
+
+    def apply(self, population):
+
+        logger.debug(key='debug', data='This is HpvInfectionEvent, tracking the disease progression of the population.')
+
+        df = population.props
+
+        # 1. get (and hold) index of currently infected and uninfected individuals
+        currently_infected = df.index[df.hp_is_infected & df.is_alive]
+        currently_susc = df.index[df.is_alive & ~df.hp_is_infected]
+
+        if df.is_alive.sum():
+            prevalence = len(currently_infected) / (
+                len(currently_infected) + len(currently_susc))
+        else:
+            prevalence = 0
+
+        # 2. handle new infections
+        now_infected = self.module.rng.choice([True, False],
+                                              size=len(currently_susc),
+                                              p=[prevalence, 1 - prevalence])
+
+        # if any are newly infected...
+        if now_infected.sum():
+            infected_idx = currently_susc[now_infected]
+
+            df.loc[infected_idx, 'hp_is_infected'] = True
+
+            # schedule death events for newly infected individuals
+
+        else:
+            logger.debug(key='debug', data='This is HpvInfectionEvent, no one is newly infected.')
+
+class HpvLoggingEvent(RegularEvent, PopulationScopeEventMixin):
+    def __init__(self, module):
+        """Produce a summmary of the numbers of people with respect to their 'hpv status'
+        """
+        # run this event every month
+        self.repeat = 12
+        super().__init__(module, frequency=DateOffset(months=self.repeat))
+        assert isinstance(module, HPV)
+
+    def apply(self, population):
+        # get some summary statistics
+        df = population.props
+
+        infected_total = df.loc[df.is_alive, 'hp_is_infected'].sum()
+        proportion_infected = infected_total / len(df)
+
+        logger.info(key='summary',
+                    data={'TotalInf': infected_total,
+                          'PropInf': proportion_infected,
+                          })
+

From 8a7bb4987205acba210e894dce12f0d89d3d53f8 Mon Sep 17 00:00:00 2001
From: Liuxinac <liuxinac@hotmail.com>
Date: Wed, 25 Mar 2026 12:36:23 +0000
Subject: [PATCH 02/10] Add 3 HPV groups, simple sexual mixing, and
 self-clearance

---
 src/scripts/HPV_Analyses/Analysis_HPV.py |  48 ++++--
 src/tlo/methods/hpv.py                   | 192 ++++++++++++++++++-----
 2 files changed, 189 insertions(+), 51 deletions(-)

diff --git a/src/scripts/HPV_Analyses/Analysis_HPV.py b/src/scripts/HPV_Analyses/Analysis_HPV.py
index 33bddf1436..aac20b77b0 100644
--- a/src/scripts/HPV_Analyses/Analysis_HPV.py
+++ b/src/scripts/HPV_Analyses/Analysis_HPV.py
@@ -8,7 +8,7 @@
 from pathlib import Path
 
 from tlo import Date, Simulation, logging
-from tlo.analysis.utils import parse_log_file
+from tlo.analysis.utils import parse_log_file, extract_results
 from tlo.methods import (
     demography,
     enhanced_lifestyle,
@@ -16,11 +16,14 @@
     healthburden,
     healthseekingbehaviour,
     healthsystem,
+    measles,
     simplified_births,
     symptommanager,
     hpv,
 )
 
+results_folder = Path("./outputs")
+
 # Where will outputs go
 outputpath = Path("./outputs")  # folder for convenience of storing outputs
 
@@ -32,10 +35,9 @@
 
 # %% Run the simulation
 start_date = Date(2010, 1, 1)
-end_date = Date(2012, 1, 1)
+end_date = Date(2020, 1, 1)
 popsize = 2000
 
-# scenario = 1
 
 # set up the log config
 log_config = {
@@ -46,11 +48,11 @@
         "tlo.methods.hpv": logging.INFO,
     },
 }
-
-# Register the appropriate modules
-# need to call epi before tb to get bcg vax
+#
+# # Register the appropriate modules
+# # need to call epi before tb to get bcg vax
 seed = random.randint(0, 50000)
-# seed = 41728  # set seed for reproducibility
+# # seed = 41728  # set seed for reproducibility
 sim = Simulation(start_date=start_date, seed=seed, log_config=log_config,
                  show_progress_bar=True, resourcefilepath=resourcefilepath)
 sim.register(
@@ -71,15 +73,17 @@
     healthburden.HealthBurden(),
     epi.Epi(),
     hpv.HPV(),
+    measles.Measles(),
 )
-
-# set the scenario
-sim.modules["Hpv"].parameters["r_hpv"] = 0.4
-
-# Run the simulation and flush the logger
+#
+# # set the scenario
+#sim.modules["HPV"].parameters["r_hpv"] = 0.01
+#sim.modules["HPV"].parameters["r_hpv_clear"] = 0.6
+#
+# # Run the simulation and flush the logger
 sim.make_initial_population(n=popsize)
 sim.simulate(end_date=end_date)
-
+#
 # parse the results
 output = parse_log_file(sim.log_filepath)
 
@@ -91,3 +95,21 @@
 # load the results
 with open(outputpath / "default_run.pickle", "rb") as f:
     output = pickle.load(f)
+
+#Show the results
+hpv_outputs = output["tlo.methods.hpv"]["summary"]
+# proportion_infected = extract_results(
+#     results_folder,
+#     module="tlo.methods.hpv",
+#     key="summary",
+#     column="PropInf",
+#     do_scaling=False,
+# )
+#
+# number_infected = extract_results(
+#     results_folder,
+#     module="tlo.methods.hpv",
+#     key="summary",
+#     column="TotalInf",
+#     do_scaling=True,
+# )
diff --git a/src/tlo/methods/hpv.py b/src/tlo/methods/hpv.py
index a6d80ff0c5..8e656e10ee 100644
--- a/src/tlo/methods/hpv.py
+++ b/src/tlo/methods/hpv.py
@@ -25,6 +25,10 @@
 
 class HPV(Module, GenericFirstAppointmentsMixin):
     """This is a HPV infectious Process.
+    Groups:
+        g1 = HPV16/18
+        g2 = other vaccine-covered high-risk HPV (31/33/45/52/58)
+        g3 = other high-risk HPV (35/39/51/56/59/68)
 
     It demonstrates the following behaviours in respect of the healthsystem module:
 
@@ -55,25 +59,49 @@ class HPV(Module, GenericFirstAppointmentsMixin):
     # Declare Causes of Disability
     CAUSES_OF_DISABILITY = {}
 
+    HPV_GROUPS = ['hr1', 'hr2', 'hr3']
+
     PARAMETERS = {
-        "r_hpv": Parameter(
+        "init_prev_hpv_hr1": Parameter(
+            Types.REAL,
+            "Initial prevalence of hpv 16/18 infection",
+        ),
+        "init_prev_hpv_hr2": Parameter(
             Types.REAL,
-            "probability per month of oncogenic hpv infection",
+            "Initial prevalence of HPV 31/33/45/52/58 infection",
         ),
-        "Init_prevalence": Parameter(
+        "init_prev_hpv_hr3": Parameter(
             Types.REAL,
-            "Initial prevalence of oncogenic hpv infection",
+            "Initial prevalence of other HR types"
+        ),
+        # Transmission coefficients β
+        "b_hpv": Parameter(
+            Types.REAL,
+            "Baseline transmission coefficient for HPV Infection",
+        ),
+
+        # Clearance probabilities
+        "r_clear_12": Parameter(
+            Types.REAL,
+            "probability of clearing HPV after 12 month",
+        ),
+        "r_clear_24": Parameter(
+            Types.REAL,
+            "probability of clearing HPV after 24 month",
         ),
     }
 
     PROPERTIES = {
         'hp_is_infected': Property(
-            Types.BOOL, 'Is infected with oncogenic hpv infection'),
+            Types.BOOL, 'Is infected with oncogenic hpv group'),
+        'hp_group': Property(
+            Types.STRING, 'Current HPV types carried, separated by "|"'),
+        'hp_date_infected': Property(
+            Types.DATE, 'Date of most recent infection'),
     }
 
     def __init__(self, name=None):
         # NB. Parameters passed to the module can be inserted in the __init__ definition.
-
         super().__init__(name)
 
     def read_parameters(self, resourcefilepath: Optional[Path] = None):
@@ -97,14 +125,58 @@ def initialise_population(self, population):
 
         # Set default for properties
         df.loc[df.is_alive, 'hp_is_infected'] = False  # default: no individuals infected
+        df.loc[df.is_alive, 'hp_group'] = ''  # default: no hpy type
+        df.loc[df.is_alive, 'hp_date_infected'] = pd.NaT  # default: not a time
 
-        Susceptible = df.loc[df.is_alive & (df.age_years >= 15)].index
+        eligible = df.index[df.is_alive & (df.age_years >= 15)]
 
+        for group in self.HPV_GROUPS:
+            p_init = self.parameters[f'init_prev_hpv_{group}']
+            u = self.rng.random(size=len(eligible))
+            infected_this_group = eligible[u < p_init]
+
+            for person_id in infected_this_group:
+                current_groups = self._get_hpv_group_set(person_id)
+                current_groups.add(group)
+                self._set_hpv_group_set(person_id, current_groups)
+
+        initially_infected = df.index[df.is_alive & df.hp_is_infected]
+        if len(initially_infected) > 0:
+            df.loc[initially_infected, 'hp_date_infected'] = self.sim.date
+
+    def _get_hpv_group_set(self,person_id):
+        df = self.sim.population.props
+        hpv_str = df.at[person_id, 'hp_group']
+        if hpv_str is None or hpv_str == '':
+            return set()
+        return set(hpv_str.split('|'))
+
+    def _set_hpv_group_set(self,person_id, hpv_set):
+        df = self.sim.population.props
+
+        if len(hpv_set) == 0:
+            df.at[person_id, 'hp_group'] = ''
+            df.at[person_id, 'hp_is_infected'] = False
+        else:
+            df.at[person_id, 'hp_group'] = '|'.join(sorted(hpv_set))
+            df.at[person_id, 'hp_is_infected'] = True
+
+    def _add_new_infection_groups(self, person_id, new_groups):
+
+        if len(new_groups) == 0:
+            return
+        df = self.sim.population.props
+        current_groups = self._get_hpv_group_set(person_id)
+        updated_groups = current_groups.union(new_groups)
 
-        # randomly selected some individuals as infected
-        initial_infected = self.parameters['initial_prevalence']
-        index_of_infected = self.rng.choice(Susceptible, size=initial_infected*Susceptible, replace=False)
-        df.loc[index_of_infected, 'hp_is_infected'] = True
+        self._set_hpv_group_set(person_id, updated_groups)
+        df.at[person_id, 'hp_date_infected'] = self.sim.date
+
+    def _clear_all_infection(self, person_id):
+        """Clear all HPV groups for a person."""
+        df = self.sim.population.props
+        self._set_hpv_group_set(person_id, set())
+        df.at[person_id, 'hp_date_infected'] = pd.NaT
 
     def initialise_simulation(self, sim):
 
@@ -133,6 +205,8 @@ def on_birth(self, mother_id, child_id):
 
         df = self.sim.population.props  # shortcut to the population props dataframe
         df.at[child_id, 'hp_is_infected'] = False
+        df.at[child_id, 'hp_group'] = ''
+        df.at[child_id, 'hp_date_infected'] = pd.NaT
 
     def report_daly_values(self):
         # This must send back a pd.Series or pd.DataFrame that reports on the average daly-weights that have been
@@ -141,10 +215,8 @@ def report_daly_values(self):
         # It will be recorded by the healthburden module as <ModuleName>_<Cause>.
 
         logger.debug(key="debug", data="This is hpv reporting my health values")
-
         df = self.sim.population.props  # shortcut to population properties dataframe
-
-        health_values = pd.Series(index=df.index[df.is_alive], data=0)
+        health_values = pd.Series(index=df.index[df.is_alive], data=0.0)
         return health_values  # returns the series
 
     def report_summary_stats(self):
@@ -153,45 +225,89 @@ def report_summary_stats(self):
         return {'infected': prevalence_by_age_group_sex}
 
 class HpvInfectionEvent(RegularEvent, PopulationScopeEventMixin):
-    """
-    This event is occurring regularly at one 6 months intervals and controls the infection process of HPV.
-    """
+    """This event is occurring regularly at one 6 months intervals and controls the infection process of HPV."""
 
     def __init__(self, module):
         super().__init__(module, frequency=DateOffset(months=6))
         assert isinstance(module, HPV)
 
     def apply(self, population):
-
         logger.debug(key='debug', data='This is HpvInfectionEvent, tracking the disease progression of the population.')
-
         df = population.props
+        module = self.module
+        now = self.sim.date
 
-        # 1. get (and hold) index of currently infected and uninfected individuals
-        currently_infected = df.index[df.hp_is_infected & df.is_alive]
-        currently_susc = df.index[df.is_alive & ~df.hp_is_infected]
+        # 1. define eligible population
+        eligible = df.index[df.is_alive & (df.age_years >= 15)]
+        if len(eligible) == 0:
+            return
 
-        if df.is_alive.sum():
-            prevalence = len(currently_infected) / (
-                len(currently_infected) + len(currently_susc))
-        else:
-            prevalence = 0
+        # 2. self-clearance
+        infected_idx = df.index[df.is_alive & df.hp_is_infected & (df.age_years >= 15)]
+        for person_id in infected_idx:
+            date_inf = df.at[person_id, 'hp_date_infected']
+            #if data is missing. skip for safety
+            if pd.isna(date_inf):
+                continue
 
-        # 2. handle new infections
-        now_infected = self.module.rng.choice([True, False],
-                                              size=len(currently_susc),
-                                              p=[prevalence, 1 - prevalence])
+            duration = (now.year - date_inf.year) * 12 + (now.month - date_inf.month)
 
-        # if any are newly infected...
-        if now_infected.sum():
-            infected_idx = currently_susc[now_infected]
+            clear_now = False
 
-            df.loc[infected_idx, 'hp_is_infected'] = True
+            if duration >= 24:
+                p_clear = module.parameters['r_clear_24']
+                if module.rng.random() < p_clear:
+                    clear_now = True
 
-            # schedule death events for newly infected individuals
+            elif duration >= 12:
+                p_clear = module.parameters['r_clear_12']
+                if module.rng.random() < p_clear:
+                    clear_now = True
 
-        else:
-            logger.debug(key='debug', data='This is HpvInfectionEvent, no one is newly infected.')
+            if clear_now:
+                module._clear_all_infection(person_id)
+
+        # 3. recalculate prevalence by HPV group after clearance
+        male_idx = df.index[df.is_alive & (df.age_years >= 15) & (df.sex == 'M')]
+        female_idx = df.index[df.is_alive & (df.age_years >= 15) & (df.sex == 'F')]
+
+        prev_male = {}
+        prev_female = {}
+
+        for group in module.HPV_GROUPS:
+            male_carrier = male_idx[df.loc[male_idx, 'hp_group'].fillna('').str.contains(fr'(^|\|){group}(\||$)', regex=True)]
+            female_carrier = female_idx[df.loc[female_idx, 'hp_group'].fillna('').str.contains(fr'(^|\|){group}(\||$)', regex=True)]
+
+            prev_male[group] = len(male_carrier)/len(male_idx) if len(male_idx) > 0 else 0
+            prev_female[group] = len(female_carrier) / len(female_idx) if len(female_idx) > 0 else 0
+
+        #new infection
+        beta=module.parameters['b_hpv']
+
+        for person_id in eligible:
+            sex = df.at[person_id,'sex']
+            current_groups = module._get_hpv_group_set(person_id)
+            new_group = set()
+
+            if sex == 'F':
+                source_prev = prev_male
+            elif sex == 'M':
+                source_prev = prev_female
+            else:
+                continue
+
+            for group in module.HPV_GROUPS:
+                if group in current_groups:
+                    continue
+
+                risk = beta * source_prev[group]
+                risk = min(max(risk, 0.0), 1.0)
+
+                if module.rng.random() < risk:
+                    new_group.add(group)
+
+            if len(new_group) > 0:
+                module._add_new_infection_groups(person_id, new_group)
 
 class HpvLoggingEvent(RegularEvent, PopulationScopeEventMixin):
     def __init__(self, module):

From 8d221b9c336adbb2aa395d05d6356d47a9dcd238 Mon Sep 17 00:00:00 2001
From: Liuxinac <liuxinac@hotmail.com>
Date: Wed, 1 Apr 2026 10:26:41 +0100
Subject: [PATCH 03/10] add group-specific HPV self-clearance timing and annual
 logging

---
 src/tlo/methods/hpv.py | 193 +++++++++++++++++++++++++++++++++--------
 1 file changed, 159 insertions(+), 34 deletions(-)

diff --git a/src/tlo/methods/hpv.py b/src/tlo/methods/hpv.py
index 8e656e10ee..98b36f2ccd 100644
--- a/src/tlo/methods/hpv.py
+++ b/src/tlo/methods/hpv.py
@@ -5,6 +5,7 @@
 
 import pandas as pd
 
+# from scripts.diarrhoea_analyses.analysis_diarrhoea_with_and_without_treatment import data
 from tlo import DAYS_IN_YEAR, DateOffset, Module, Parameter, Property, Types, logging
 from tlo.analysis.utils import get_counts_by_sex_and_age_group
 from tlo.events import Event, IndividualScopeEventMixin, PopulationScopeEventMixin, RegularEvent
@@ -96,8 +97,14 @@ class HPV(Module, GenericFirstAppointmentsMixin):
             Types.BOOL, 'Is infected with oncogenic hpv group'),
         'hp_group': Property(
             Types.STRING, 'Current HPV types carried, separated by "|"'),
-        'hp_date_infected': Property(
-            Types.DATE, 'Date of most recent infection'),
+        'hp_date_infected_hr1': Property(
+            Types.DATE, 'Date of infection of hr1'),
+        'hp_date_infected_hr2': Property(
+            Types.DATE, 'Date of infection of hr2'),
+        'hp_date_infected_hr3': Property(
+            Types.DATE, 'Date of infection of hr3'),
+        'hp_date_first_infected': Property(
+            Types.DATE, 'Date of first HPV infection'),
     }
 
     def __init__(self, name=None):
@@ -121,13 +128,15 @@ def initialise_population(self, population):
         :param population: the population of individuals
         """
 
-        df = population.props  # a shortcut to the dataframe storing data for individiuals
+        df = population.props  # a shortcut to the dataframe storing data for individuals
 
         # Set default for properties
         df.loc[df.is_alive, 'hp_is_infected'] = False  # default: no individuals infected
-        df.loc[df.is_alive, 'hp_group'] = ''  # default: no hpy type
-        df.loc[df.is_alive, 'hp_date_infected'] = pd.NaT  # default: not a time
-
+        df.loc[df.is_alive, 'hp_group'] = ''  # default: no hpv type
+        df.loc[df.is_alive, 'hp_date_infected_hr1'] = pd.NaT
+        df.loc[df.is_alive, 'hp_date_infected_hr2'] = pd.NaT
+        df.loc[df.is_alive, 'hp_date_infected_hr3'] = pd.NaT
+        df.loc[df.is_alive, 'hp_date_first_infected'] = pd.NaT
         eligible = df.index[df.is_alive & (df.age_years >= 15)]
 
         for group in self.HPV_GROUPS:
@@ -140,9 +149,20 @@ def initialise_population(self, population):
                 current_groups.add(group)
                 self._set_hpv_group_set(person_id, current_groups)
 
+                # randomly select a infection date for initial population
+                previous_infection = self.rng.random_integers(1,25) #1-24month
+                infection_date = self.sim.date - DateOffset(months=int(previous_infection))
+                df.at[person_id, f'hp_date_infected_{group}'] = infection_date
+
         initially_infected = df.index[df.is_alive & df.hp_is_infected]
-        if len(initially_infected) > 0:
-            df.loc[initially_infected, 'hp_date_infected'] = self.sim.date
+        for person_id in initially_infected:
+            group_dates = [
+                df.at[person_id, f'hp_date_infected_{group}']
+                for group in self.HPV_GROUPS
+                if not pd.isna(df.at[person_id, f'hp_date_infected_{group}'])
+            ]
+            if len(group_dates) > 0:
+                df.loc[initially_infected, 'hp_date_first_infected'] = min(group_dates)
 
     def _get_hpv_group_set(self,person_id):
         df = self.sim.population.props
@@ -168,15 +188,30 @@ def _add_new_infection_groups(self, person_id, new_groups):
         df = self.sim.population.props
         current_groups = self._get_hpv_group_set(person_id)
         updated_groups = current_groups.union(new_groups)
-
         self._set_hpv_group_set(person_id, updated_groups)
-        df.at[person_id, 'hp_date_infected'] = self.sim.date
+
+        #set infection date for new groups
+        for group in new_groups:
+            df.at[person_id, f'hp_date_infected_{group}'] = self.sim.date
+
+        #set first infection date
+        if pd.isna(df.at[person_id, 'hp_date_first_infected']):
+            df.at[person_id, 'hp_date_first_infected'] = self.sim.date
 
     def _clear_all_infection(self, person_id):
         """Clear all HPV groups for a person."""
         df = self.sim.population.props
         self._set_hpv_group_set(person_id, set())
-        df.at[person_id, 'hp_date_infected'] = pd.NaT
+        for group in self.HPV_GROUPS:
+            df.at[person_id, f'hp_date_infected_{group}'] = pd.NaT
+
+    def _clear_single_group(self, person_id, group):
+        """clear a single HPV group for a person"""
+        df = self.sim.population.props
+        current_groups = self._get_hpv_group_set(person_id)
+        current_groups.discard(group)
+        self._set_hpv_group_set(person_id, current_groups)
+        df.at[person_id, f'hp_date_infected_{group}'] = pd.NaT
 
     def initialise_simulation(self, sim):
 
@@ -206,7 +241,10 @@ def on_birth(self, mother_id, child_id):
         df = self.sim.population.props  # shortcut to the population props dataframe
         df.at[child_id, 'hp_is_infected'] = False
         df.at[child_id, 'hp_group'] = ''
-        df.at[child_id, 'hp_date_infected'] = pd.NaT
+        df.at[child_id, 'hp_date_infected_hr1'] = pd.NaT
+        df.at[child_id, 'hp_date_infected_hr2'] = pd.NaT
+        df.at[child_id, 'hp_date_infected_hr3'] = pd.NaT
+        df.at[child_id, 'hp_date_first_infected'] = pd.NaT
 
     def report_daly_values(self):
         # This must send back a pd.Series or pd.DataFrame that reports on the average daly-weights that have been
@@ -245,27 +283,31 @@ def apply(self, population):
         # 2. self-clearance
         infected_idx = df.index[df.is_alive & df.hp_is_infected & (df.age_years >= 15)]
         for person_id in infected_idx:
-            date_inf = df.at[person_id, 'hp_date_infected']
-            #if data is missing. skip for safety
-            if pd.isna(date_inf):
-                continue
+            current_groups = module._get_hpv_group_set(person_id)
+            for group in list(current_groups):
+                date_col = f'hp_date_infected_{group}'
+                date_inf = df.at[person_id, date_col]
+
 
-            duration = (now.year - date_inf.year) * 12 + (now.month - date_inf.month)
+                if pd.isna(date_inf):#if data is missing. skip for safety
+                    continue
+
+                duration = (now.year - date_inf.year) * 12 + (now.month - date_inf.month)
 
-            clear_now = False
+                clear_now = False
 
-            if duration >= 24:
-                p_clear = module.parameters['r_clear_24']
-                if module.rng.random() < p_clear:
-                    clear_now = True
+                if duration >= 24:
+                    p_clear = module.parameters['r_clear_24']
+                    if module.rng.random() < p_clear:
+                        clear_now = True
 
-            elif duration >= 12:
-                p_clear = module.parameters['r_clear_12']
-                if module.rng.random() < p_clear:
-                    clear_now = True
+                elif duration >= 12:
+                    p_clear = module.parameters['r_clear_12']
+                    if module.rng.random() < p_clear:
+                        clear_now = True
 
-            if clear_now:
-                module._clear_all_infection(person_id)
+                if clear_now:
+                    module._clear_single_group(person_id, group)
 
         # 3. recalculate prevalence by HPV group after clearance
         male_idx = df.index[df.is_alive & (df.age_years >= 15) & (df.sex == 'M')]
@@ -313,7 +355,7 @@ class HpvLoggingEvent(RegularEvent, PopulationScopeEventMixin):
     def __init__(self, module):
         """Produce a summmary of the numbers of people with respect to their 'hpv status'
         """
-        # run this event every month
+        # run this event every 12 month
         self.repeat = 12
         super().__init__(module, frequency=DateOffset(months=self.repeat))
         assert isinstance(module, HPV)
@@ -321,12 +363,95 @@ def __init__(self, module):
     def apply(self, population):
         # get some summary statistics
         df = population.props
+        module = self.module
+
+        eligible = df.index[df.is_alive & (df.age_years >= 15)]
+        male_idx = df.index[df.is_alive & (df.age_years >= 15) & (df.sex == 'M')]
+        female_idx = df.index[df.is_alive & (df.age_years >= 15) & (df.sex == 'F')]
+
+        total_inf = df.loc[eligible, 'hp_is_infected'].sum()
+        total_prev = total_inf / len(eligible) if len(eligible) > 0 else 0
+
+        male_inf = df.loc[male_idx, 'hp_is_infected'].sum()
+        female_inf = df.loc[female_idx, 'hp_is_infected'].sum()
+
+        male_prev = male_inf / len(male_idx) if len(male_idx) > 0 else 0
+        female_prev = female_inf / len(female_idx) if len(female_idx) > 0 else 0
+
+        log_data= {
+            'Year':self.sim.date.year,
+            'TotalInf':int(total_inf),
+            'TotalPrev':total_prev,
+            'MaleInf':int(male_inf),
+            'FemaleInf':int(female_inf),
+            'MalePrev':male_prev,
+            'FemalePrev':female_prev,
+        }
+
+        # group-specific prevalence by sex
+        for group in module.HPV_GROUPS:
+            male_mask = df.loc[male_idx, 'hp_group'].fillna('').str.contains(fr'(^|\|){group}(\||$)', regex=True)
+            female_mask = df.loc[female_idx,'hp_group'].fillna('').str.contains(fr'(^|\|){group}(\||$)', regex=True)
+
+            male_group_inf = male_mask.sum()
+            female_group_inf = female_mask.sum()
+
+            log_data[f'{group}_MaleInf'] = int(male_group_inf)
+            log_data[f'{group}_MalePrev'] = male_group_inf / len(male_idx) if len(male_idx) > 0 else 0
+            log_data[f'{group}_FemaleInf'] = int(female_group_inf)
+            log_data[f'{group}_FemalePrev'] = female_group_inf / len(female_idx) if len(female_idx) > 0 else 0
+
+        # multiplicity of infection
+        infection_people = df.index[df.is_alive & (df.age_years>=15) &df.hp_is_infected]
+        n_group_1 = 0
+        n_group_2 = 0
+        n_group_3 = 0
+
+        male_n_group_1 = 0
+        male_n_group_2 = 0
+        male_n_group_3 = 0
+
+        female_n_group_1 = 0
+        female_n_group_2 = 0
+        female_n_group_3 = 0
+
+        for person_id in infection_people:
+            n_group = len(module._get_hpv_group_set(person_id))
+            sex = df.at[person_id,'sex']
 
-        infected_total = df.loc[df.is_alive, 'hp_is_infected'].sum()
-        proportion_infected = infected_total / len(df)
+            if n_group ==1:
+                n_group_1=1
+                if sex == 'M':
+                    male_n_group_1 += 1
+                elif sex =='F':
+                    female_n_group_1 += 1
+
+            elif n_group == 2:
+                n_group_2 += 1
+                if sex == 'M':
+                    male_n_group_2 += 1
+                elif sex =='F':
+                    female_n_group_2 += 1
+
+            elif n_group == 3:
+                n_group_3 += 1
+                if sex == 'M':
+                    male_n_group_3 += 1
+                elif sex =='F':
+                    female_n_group_3 += 1
+
+        log_data['InfGroup1'] = n_group_1
+        log_data['InfGroup2'] = n_group_2
+        log_data['InfGroup3'] = n_group_3
+
+        log_data['MaleGroup1'] = male_n_group_1
+        log_data['MaleGroup2'] = male_n_group_2
+        log_data['MaleGroup3'] = male_n_group_3
+
+        log_data['FemaleGroup1'] = female_n_group_1
+        log_data['FemaleGroup2'] = female_n_group_2
+        log_data['FemaleGroup3'] = female_n_group_3
 
         logger.info(key='summary',
-                    data={'TotalInf': infected_total,
-                          'PropInf': proportion_infected,
-                          })
+                    data=log_data)
 

From bb183fc9a462c9c9048ddf84b400de67946b10a0 Mon Sep 17 00:00:00 2001
From: Liuxinac <liuxinac@hotmail.com>
Date: Tue, 14 Apr 2026 16:24:10 +0100
Subject: [PATCH 04/10] Update HPV module: add natural immunity,
 HIV/age/vaccination modifiers, and link to HIV module

---
 src/scripts/HPV_Analyses/Analysis_HPV.py |   5 +
 src/tlo/methods/hpv.py                   | 203 +++++++++++++++++------
 2 files changed, 160 insertions(+), 48 deletions(-)

diff --git a/src/scripts/HPV_Analyses/Analysis_HPV.py b/src/scripts/HPV_Analyses/Analysis_HPV.py
index aac20b77b0..326244334c 100644
--- a/src/scripts/HPV_Analyses/Analysis_HPV.py
+++ b/src/scripts/HPV_Analyses/Analysis_HPV.py
@@ -20,6 +20,8 @@
     simplified_births,
     symptommanager,
     hpv,
+    hiv,
+    tb
 )
 
 results_folder = Path("./outputs")
@@ -74,6 +76,8 @@
     epi.Epi(),
     hpv.HPV(),
     measles.Measles(),
+    hiv.Hiv(),
+    tb.Tb(),
 )
 #
 # # set the scenario
@@ -98,6 +102,7 @@
 
 #Show the results
 hpv_outputs = output["tlo.methods.hpv"]["summary"]
+print(hpv_outputs)
 # proportion_infected = extract_results(
 #     results_folder,
 #     module="tlo.methods.hpv",
diff --git a/src/tlo/methods/hpv.py b/src/tlo/methods/hpv.py
index 98b36f2ccd..d6da7c0055 100644
--- a/src/tlo/methods/hpv.py
+++ b/src/tlo/methods/hpv.py
@@ -9,6 +9,7 @@
 from tlo import DAYS_IN_YEAR, DateOffset, Module, Parameter, Property, Types, logging
 from tlo.analysis.utils import get_counts_by_sex_and_age_group
 from tlo.events import Event, IndividualScopeEventMixin, PopulationScopeEventMixin, RegularEvent
+from tlo.lm import LinearModel, LinearModelType, Predictor
 from tlo.methods import Metadata
 from tlo.methods.causes import Cause
 from tlo.methods.demography import InstantaneousDeath
@@ -25,7 +26,7 @@
 
 
 class HPV(Module, GenericFirstAppointmentsMixin):
-    """This is a HPV infectious Process.
+    """This is an HPV infection Process.
     Groups:
         g1 = HPV16/18
         g2 = other vaccine-covered high-risk HPV (31/33/45/52/58)
@@ -41,7 +42,7 @@ class HPV(Module, GenericFirstAppointmentsMixin):
         - Use of the SymptomManager
     """
 
-    INIT_DEPENDENCIES = {'Demography', 'SymptomManager'}
+    INIT_DEPENDENCIES = {'Demography', 'SymptomManager', 'Hiv'} #Epi?
 
     OPTIONAL_INIT_DEPENDENCIES = {'HealthBurden'}
 
@@ -75,6 +76,7 @@ class HPV(Module, GenericFirstAppointmentsMixin):
             Types.REAL,
             "Initial prevalence of other HR types"
         ),
+
         # Transmission coefficients β
         "b_hpv": Parameter(
             Types.REAL,
@@ -90,13 +92,51 @@ class HPV(Module, GenericFirstAppointmentsMixin):
             Types.REAL,
             "probability of clearing HPV after 24 month",
         ),
+
+        # Modifiers
+        "rr_hpv_hiv": Parameter(
+            Types.REAL,
+            "Relative risk for HPV infection among HIV positive people",
+        ),
+        "rr_hr1_vaccinated": Parameter(
+            Types.REAL,
+            "Relative risk for hr1 infection if vaccinated",
+        ),
+        "rr_hr2_vaccinated": Parameter(
+            Types.REAL,
+            "Relative risk for hr2 infection if vaccinated",
+        ),
+        "rr_hr3_vaccinated": Parameter(
+            Types.REAL,
+            "Relative risk for hr1 infection if vaccinated",
+        ),
+        "rr_immunity_hr1": Parameter(
+            Types.REAL,
+            "Relative risk for reinfection with hr1 if previously infected",
+        ),
+        "rr_immunity_hr2": Parameter(
+            Types.REAL,
+            "Relative risk for reinfection with hr2 if previously infected",
+        ),
+        "rr_immunity_hr3": Parameter(
+            Types.REAL,
+            "Relative risk for reinfection with hr3 if previously infected",
+        ),
+        "rr_hpv_age50plus": Parameter(
+            Types.REAL,
+            "Relative risk multiplier for age >=50",
+        ),
     }
 
     PROPERTIES = {
         'hp_is_infected': Property(
             Types.BOOL, 'Is infected with oncogenic hpv group'),
-        'hp_group': Property(
-            Types.STRING, 'Current HPV types carried, separated by "|"'),
+        'hp_infected_hr1': Property(
+            Types.BOOL,'Current infected with hr1'),
+        'hp_infected_hr2': Property(
+            Types.BOOL, 'Current infected with hr2'),
+        'hp_infected_hr3': Property(
+            Types.BOOL, 'Current infected with hr3'),
         'hp_date_infected_hr1': Property(
             Types.DATE, 'Date of infection of hr1'),
         'hp_date_infected_hr2': Property(
@@ -105,6 +145,20 @@ class HPV(Module, GenericFirstAppointmentsMixin):
             Types.DATE, 'Date of infection of hr3'),
         'hp_date_first_infected': Property(
             Types.DATE, 'Date of first HPV infection'),
+        'hp_ever_infected_hr1': Property(
+            Types.BOOL, 'Ever infected with hr1'),
+        'hp_ever_infected_hr2': Property(
+            Types.BOOL, 'Ever infected with hr2'),
+        'hp_ever_infected_hr3': Property(
+            Types.BOOL, 'Ever infected with hr3'),
+
+        # "va_hpv": Property(Types.INT, "number of doses of hpv vaccine received"),
+        # "va_hpv_all_doses": Property(Types.BOOL, "whether all doses have been received of the HPV vaccine"),
+        # "hv_inf": Property(Types.BOOL,"Is person currently infected with HIV
+        #     (NB. AIDS status is determined by presence of the AIDS Symptom.",),
+        # "hv_art": Property(Types.CATEGORICAL,
+        #     "ART status of person, whether on ART or not; and whether viral load is suppressed or not if on ART.",
+        #     categories=["not", "on_VL_suppressed", "on_not_VL_suppressed"],),
     }
 
     def __init__(self, name=None):
@@ -132,11 +186,16 @@ def initialise_population(self, population):
 
         # Set default for properties
         df.loc[df.is_alive, 'hp_is_infected'] = False  # default: no individuals infected
-        df.loc[df.is_alive, 'hp_group'] = ''  # default: no hpv type
+        df.loc[df.is_alive, 'hp_infected_hr1'] = False
+        df.loc[df.is_alive, 'hp_infected_hr2'] = False
+        df.loc[df.is_alive, 'hp_infected_hr3'] = False
         df.loc[df.is_alive, 'hp_date_infected_hr1'] = pd.NaT
         df.loc[df.is_alive, 'hp_date_infected_hr2'] = pd.NaT
         df.loc[df.is_alive, 'hp_date_infected_hr3'] = pd.NaT
         df.loc[df.is_alive, 'hp_date_first_infected'] = pd.NaT
+        df.loc[df.is_alive, 'hp_ever_infected_hr1'] = False
+        df.loc[df.is_alive, 'hp_ever_infected_hr2'] = False
+        df.loc[df.is_alive, 'hp_ever_infected_hr3'] = False
         eligible = df.index[df.is_alive & (df.age_years >= 15)]
 
         for group in self.HPV_GROUPS:
@@ -145,13 +204,13 @@ def initialise_population(self, population):
             infected_this_group = eligible[u < p_init]
 
             for person_id in infected_this_group:
-                current_groups = self._get_hpv_group_set(person_id)
-                current_groups.add(group)
-                self._set_hpv_group_set(person_id, current_groups)
+                df.at[person_id, f'hp_infected_{group}'] = True
+                df.at[person_id, f'hp_ever_infected_{group}'] = True
+                df.at[person_id, 'hp_is_infected'] = True
 
                 # randomly select a infection date for initial population
-                previous_infection = self.rng.random_integers(1,25) #1-24month
-                infection_date = self.sim.date - DateOffset(months=int(previous_infection))
+                previous_infection = int(self.rng.randint(1,25)) #1-24month
+                infection_date = self.sim.date - DateOffset(months=previous_infection)
                 df.at[person_id, f'hp_date_infected_{group}'] = infection_date
 
         initially_infected = df.index[df.is_alive & df.hp_is_infected]
@@ -162,29 +221,51 @@ def initialise_population(self, population):
                 if not pd.isna(df.at[person_id, f'hp_date_infected_{group}'])
             ]
             if len(group_dates) > 0:
-                df.loc[initially_infected, 'hp_date_first_infected'] = min(group_dates)
+                df.at[person_id, 'hp_date_first_infected'] = min(group_dates)
+
+    def _get_age_group(self, age_years):
+        if age_years <15:
+            return '0_15'
+        elif age_years <20:
+            return '15_19'
+        elif age_years <25:
+            return '20_24'
+        elif age_years <30:
+            return '25_29'
+        elif age_years <35:
+            return '30_34'
+        elif age_years <40:
+            return '35_39'
+        elif age_years <45:
+            return '40_44'
+        elif age_years <50:
+            return '45_49'
+        else:
+            return '50plus'
 
-    def _get_hpv_group_set(self,person_id):
+    def _get_hpv_group_set(self, person_id):
         df = self.sim.population.props
-        hpv_str = df.at[person_id, 'hp_group']
-        if hpv_str is None or hpv_str == '':
-            return set()
-        return set(hpv_str.split('|'))
+        current_groups = set()
+
+        for group in self.HPV_GROUPS:
+            if df.at[person_id, f'hp_infected_{group}']:
+                current_groups.add(group)
+
+        return current_groups
 
     def _set_hpv_group_set(self,person_id, hpv_set):
         df = self.sim.population.props
 
-        if len(hpv_set) == 0:
-            df.at[person_id, 'hp_group'] = ''
-            df.at[person_id, 'hp_is_infected'] = False
-        else:
-            df.at[person_id, 'hp_group'] = '|'.join(sorted(hpv_set))
-            df.at[person_id, 'hp_is_infected'] = True
+        for group in self.HPV_GROUPS:
+            df.at[person_id, f'hp_infected_{group}'] = (group in hpv_set)
+
+        df.at[person_id, 'hp_is_infected'] = (len(hpv_set) > 0)
 
     def _add_new_infection_groups(self, person_id, new_groups):
 
         if len(new_groups) == 0:
             return
+
         df = self.sim.population.props
         current_groups = self._get_hpv_group_set(person_id)
         updated_groups = current_groups.union(new_groups)
@@ -193,34 +274,57 @@ def _add_new_infection_groups(self, person_id, new_groups):
         #set infection date for new groups
         for group in new_groups:
             df.at[person_id, f'hp_date_infected_{group}'] = self.sim.date
+            df.at[person_id, f'hp_ever_infected_{group}'] = True
 
         #set first infection date
         if pd.isna(df.at[person_id, 'hp_date_first_infected']):
             df.at[person_id, 'hp_date_first_infected'] = self.sim.date
 
-    def _clear_all_infection(self, person_id):
-        """Clear all HPV groups for a person."""
-        df = self.sim.population.props
-        self._set_hpv_group_set(person_id, set())
-        for group in self.HPV_GROUPS:
-            df.at[person_id, f'hp_date_infected_{group}'] = pd.NaT
-
     def _clear_single_group(self, person_id, group):
         """clear a single HPV group for a person"""
         df = self.sim.population.props
-        current_groups = self._get_hpv_group_set(person_id)
-        current_groups.discard(group)
-        self._set_hpv_group_set(person_id, current_groups)
+
+        df.at[person_id, f'hp_infected_{group}'] = False
         df.at[person_id, f'hp_date_infected_{group}'] = pd.NaT
 
-    def initialise_simulation(self, sim):
+        still_infected= any(
+            df.at[person_id, f'hp_infected_{g}'] for g in self.HPV_GROUPS
+        )
+        df.at[person_id, 'hp_is_infected'] = still_infected
 
+    def initialise_simulation(self, sim):
         """Get ready for simulation start.
 
         This method is called just before the main simulation loop begins, and after all
         modules have read their parameters and the initial population has been created.
         It is a good place to add initial events to the event queue.
         """
+        p = self.parameters
+        self.lm = {}
+
+        for group in self.HPV_GROUPS:
+            self.lm[group] = LinearModel(
+                LinearModelType.MULTIPLICATIVE,
+                1.0,
+
+                Predictor('va_hpv')
+                .when(1,p[f'rr_{group}_vaccinated'])
+                .when(2,p[f'rr_{group}_vaccinated'])
+                .otherwise(1.0),
+
+                Predictor(f'hp_ever_infected_{group}')
+                .when(True, p[f'rr_immunity_{group}'])
+                .otherwise(1.0),
+
+                Predictor('age_years', conditions_are_mutually_exclusive = True)
+                .when('.between(0,15)', 0.0)
+                .when('>=50', p['rr_hpv_age50plus'])
+                .otherwise(1.0),
+
+                Predictor('hv_inf')
+                .when(True,p['rr_hpv_hiv'])
+                .otherwise(1.0),
+                )
 
         # add the basic event
         event = HpvInfectionEvent(self)
@@ -240,11 +344,16 @@ def on_birth(self, mother_id, child_id):
 
         df = self.sim.population.props  # shortcut to the population props dataframe
         df.at[child_id, 'hp_is_infected'] = False
-        df.at[child_id, 'hp_group'] = ''
+        df.at[child_id, 'hp_infected_hr1'] = False
+        df.at[child_id, 'hp_infected_hr2'] = False
+        df.at[child_id, 'hp_infected_hr3'] = False
         df.at[child_id, 'hp_date_infected_hr1'] = pd.NaT
         df.at[child_id, 'hp_date_infected_hr2'] = pd.NaT
         df.at[child_id, 'hp_date_infected_hr3'] = pd.NaT
         df.at[child_id, 'hp_date_first_infected'] = pd.NaT
+        df.at[child_id, 'hp_ever_infected_hr1'] = False
+        df.at[child_id, 'hp_ever_infected_hr2'] = False
+        df.at[child_id, 'hp_ever_infected_hr3'] = False
 
     def report_daly_values(self):
         # This must send back a pd.Series or pd.DataFrame that reports on the average daly-weights that have been
@@ -289,7 +398,7 @@ def apply(self, population):
                 date_inf = df.at[person_id, date_col]
 
 
-                if pd.isna(date_inf):#if data is missing. skip for safety
+                if pd.isna(date_inf): #if data is missing. skip for safety
                     continue
 
                 duration = (now.year - date_inf.year) * 12 + (now.month - date_inf.month)
@@ -317,15 +426,13 @@ def apply(self, population):
         prev_female = {}
 
         for group in module.HPV_GROUPS:
-            male_carrier = male_idx[df.loc[male_idx, 'hp_group'].fillna('').str.contains(fr'(^|\|){group}(\||$)', regex=True)]
-            female_carrier = female_idx[df.loc[female_idx, 'hp_group'].fillna('').str.contains(fr'(^|\|){group}(\||$)', regex=True)]
+            male_group_inf = df.loc[male_idx, f'hp_infected_{group}'].sum()
+            female_group_inf = df.loc[female_idx, f'hp_infected_{group}'].sum()
 
-            prev_male[group] = len(male_carrier)/len(male_idx) if len(male_idx) > 0 else 0
-            prev_female[group] = len(female_carrier) / len(female_idx) if len(female_idx) > 0 else 0
+            prev_male[group] = male_group_inf/len(male_idx) if len(male_idx) > 0 else 0
+            prev_female[group] = female_group_inf / len(female_idx) if len(female_idx) > 0 else 0
 
         #new infection
-        beta=module.parameters['b_hpv']
-
         for person_id in eligible:
             sex = df.at[person_id,'sex']
             current_groups = module._get_hpv_group_set(person_id)
@@ -342,7 +449,10 @@ def apply(self, population):
                 if group in current_groups:
                     continue
 
-                risk = beta * source_prev[group]
+                baseline = module.parameters.get(f'b_hpv_{group}', module.parameters['b_hpv'])*source_prev[group]
+                modifier = module.lm[group].predict(df.loc[[person_id]]).iloc[0]
+
+                risk = baseline * modifier
                 risk = min(max(risk, 0.0), 1.0)
 
                 if module.rng.random() < risk:
@@ -390,11 +500,8 @@ def apply(self, population):
 
         # group-specific prevalence by sex
         for group in module.HPV_GROUPS:
-            male_mask = df.loc[male_idx, 'hp_group'].fillna('').str.contains(fr'(^|\|){group}(\||$)', regex=True)
-            female_mask = df.loc[female_idx,'hp_group'].fillna('').str.contains(fr'(^|\|){group}(\||$)', regex=True)
-
-            male_group_inf = male_mask.sum()
-            female_group_inf = female_mask.sum()
+            male_group_inf = df.loc[male_idx, f'hp_infected_{group}'].sum()
+            female_group_inf = df.loc[female_idx, f'hp_infected_{group}'].sum()
 
             log_data[f'{group}_MaleInf'] = int(male_group_inf)
             log_data[f'{group}_MalePrev'] = male_group_inf / len(male_idx) if len(male_idx) > 0 else 0
@@ -420,7 +527,7 @@ def apply(self, population):
             sex = df.at[person_id,'sex']
 
             if n_group ==1:
-                n_group_1=1
+                n_group_1 +=1
                 if sex == 'M':
                     male_n_group_1 += 1
                 elif sex =='F':

From b535f42923e7a92e4d5b82901ed47f9c98781c16 Mon Sep 17 00:00:00 2001
From: Liuxinac <liuxinac@hotmail.com>
Date: Tue, 14 Apr 2026 16:31:49 +0100
Subject: [PATCH 05/10] Add HPV resource files

---
 resources/ResourceFile_HPV/parameter_values.csv | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
 create mode 100644 resources/ResourceFile_HPV/parameter_values.csv

diff --git a/resources/ResourceFile_HPV/parameter_values.csv b/resources/ResourceFile_HPV/parameter_values.csv
new file mode 100644
index 0000000000..7a443d9eea
--- /dev/null
+++ b/resources/ResourceFile_HPV/parameter_values.csv
@@ -0,0 +1,15 @@
+parameter_name,value,param_label,prior_min,prior_max,prior_note,reference
+init_prev_hpv_hr1,0.03,undetermined,0,1,calibration,N/A
+init_prev_hpv_hr2,0.04,undetermined,0,1,calibration,N/A
+init_prev_hpv_hr3,0.05,undetermined,0,1,calibration,N/A
+b_hpv,0.3,undetermined,0,1,calibration,N/A
+r_clear_12,0.2,undetermined,0,1,calibration,N/A
+r_clear_24,0.3,undetermined,0,1,calibration,N/A
+rr_hpv_hiv,2,undetermined,0,1,calibration,N/A
+rr_hr1_vaccinated,0,undetermined,0,1,calibration,N/A
+rr_hr2_vaccinated,0.6,undetermined,0,1,calibration,N/A
+rr_hr3_vaccinated,0.3,undetermined,0,1,calibration,N/A
+rr_hpv_age50plus,0.1,undetermined,0,1,calibration,N/A
+rr_immunity_hr1,0.8,undetermined,0,1,calibration,N/A
+rr_immunity_hr2,0.8,undetermined,0,1,calibration,N/A
+rr_immunity_hr3,0.8,undetermined,0,1,calibration,N/A

From 3ccf6521f23a0e93ddae0506c1dd4f21510f15aa Mon Sep 17 00:00:00 2001
From: Liuxinac <liuxinac@hotmail.com>
Date: Wed, 15 Apr 2026 11:02:16 +0100
Subject: [PATCH 06/10] Update HPV module: add natural immunity,
 HIV/age/vaccination modifiers, and link to HIV module

---
 src/scripts/HPV_Analyses/Analysis_HPV.py | 61 ++++++++++++++++++++++++
 src/tlo/methods/hpv.py                   |  4 +-
 2 files changed, 63 insertions(+), 2 deletions(-)

diff --git a/src/scripts/HPV_Analyses/Analysis_HPV.py b/src/scripts/HPV_Analyses/Analysis_HPV.py
index 326244334c..e39252e76a 100644
--- a/src/scripts/HPV_Analyses/Analysis_HPV.py
+++ b/src/scripts/HPV_Analyses/Analysis_HPV.py
@@ -5,6 +5,8 @@
 import datetime
 import pickle
 import random
+import pandas as pd
+import matplotlib.pyplot as plt
 from pathlib import Path
 
 from tlo import Date, Simulation, logging
@@ -118,3 +120,62 @@
 #     column="TotalInf",
 #     do_scaling=True,
 # )
+
+hpv_outputs = output["tlo.methods.hpv"]["summary"]
+hpv_df = pd.DataFrame(hpv_outputs)
+
+print(hpv_df)
+print(hpv_df.columns)
+
+# 1. Total infection
+plt.figure(figsize=(8, 5))
+plt.plot(hpv_df["Year"], hpv_df["TotalPrev"], marker="o")
+plt.xlabel("Year")
+plt.ylabel("Total HPV prevalence")
+plt.title("Total HPV prevalence over time")
+plt.grid(True)
+plt.tight_layout()
+plt.savefig(outputpath / "hpv_total_prevalence.png", dpi=300)
+plt.show()
+
+# 2. HPV  prevalence in different gender
+plt.figure(figsize=(8, 5))
+plt.plot(hpv_df["Year"], hpv_df["MalePrev"], marker="o", label="Male")
+plt.plot(hpv_df["Year"], hpv_df["FemalePrev"], marker="o", label="Female")
+plt.xlabel("Year")
+plt.ylabel("HPV prevalence")
+plt.title("HPV prevalence by sex")
+plt.legend()
+plt.grid(True)
+plt.tight_layout()
+plt.savefig(outputpath / "hpv_prevalence_by_sex.png", dpi=300)
+plt.show()
+
+# 3. Prevalence of different HPV groups in female
+plt.figure(figsize=(8, 5))
+plt.plot(hpv_df["Year"], hpv_df["hr1_FemalePrev"], marker="o", label="hr1")
+plt.plot(hpv_df["Year"], hpv_df["hr2_FemalePrev"], marker="o", label="hr2")
+plt.plot(hpv_df["Year"], hpv_df["hr3_FemalePrev"], marker="o", label="hr3")
+plt.xlabel("Year")
+plt.ylabel("Prevalence")
+plt.title("Female HPV prevalence by group")
+plt.legend()
+plt.grid(True)
+plt.tight_layout()
+plt.savefig(outputpath / "female_hpv_group_prevalence.png", dpi=300)
+plt.show()
+
+# 4. Prevalence of different HPV groups in male
+plt.figure(figsize=(8, 5))
+plt.plot(hpv_df["Year"], hpv_df["hr1_MalePrev"], marker="o", label="hr1")
+plt.plot(hpv_df["Year"], hpv_df["hr2_MalePrev"], marker="o", label="hr2")
+plt.plot(hpv_df["Year"], hpv_df["hr3_MalePrev"], marker="o", label="hr3")
+plt.xlabel("Year")
+plt.ylabel("Prevalence")
+plt.title("Male HPV prevalence by group")
+plt.legend()
+plt.grid(True)
+plt.tight_layout()
+plt.savefig(outputpath / "male_hpv_group_prevalence.png", dpi=300)
+plt.show()
+
diff --git a/src/tlo/methods/hpv.py b/src/tlo/methods/hpv.py
index d6da7c0055..e5e25e6886 100644
--- a/src/tlo/methods/hpv.py
+++ b/src/tlo/methods/hpv.py
@@ -25,7 +25,7 @@
 logger.setLevel(logging.INFO)
 
 
-class HPV(Module, GenericFirstAppointmentsMixin):
+class HPV(Module, GenericFirstAppointmentsMixin): #
     """This is an HPV infection Process.
     Groups:
         g1 = HPV16/18
@@ -213,7 +213,7 @@ def initialise_population(self, population):
                 infection_date = self.sim.date - DateOffset(months=previous_infection)
                 df.at[person_id, f'hp_date_infected_{group}'] = infection_date
 
-        initially_infected = df.index[df.is_alive & df.hp_is_infected]
+        initially_infected = df.index[df.is_alive & df.hp_is_infected] #can not make sure when is the first infection
         for person_id in initially_infected:
             group_dates = [
                 df.at[person_id, f'hp_date_infected_{group}']

From 42bfc28505837427737453cb8ad7428182b517d9 Mon Sep 17 00:00:00 2001
From: Liuxinac <liuxinac@hotmail.com>
Date: Wed, 15 Apr 2026 17:04:44 +0100
Subject: [PATCH 07/10] Update HPV self-clearance process

---
 .../ResourceFile_HPV/parameter_values.csv     |   4 +
 src/tlo/methods/hpv.py                        | 180 +++++++++++++-----
 2 files changed, 140 insertions(+), 44 deletions(-)

diff --git a/resources/ResourceFile_HPV/parameter_values.csv b/resources/ResourceFile_HPV/parameter_values.csv
index 7a443d9eea..92ec26091f 100644
--- a/resources/ResourceFile_HPV/parameter_values.csv
+++ b/resources/ResourceFile_HPV/parameter_values.csv
@@ -3,6 +3,10 @@ init_prev_hpv_hr1,0.03,undetermined,0,1,calibration,N/A
 init_prev_hpv_hr2,0.04,undetermined,0,1,calibration,N/A
 init_prev_hpv_hr3,0.05,undetermined,0,1,calibration,N/A
 b_hpv,0.3,undetermined,0,1,calibration,N/A
+median_clear_hr1,12,undetermined,0,1,calibration,N/A
+median_clear_hr2,12,undetermined,0,1,calibration,N/A
+median_clear_hr3,12,undetermined,0,1,calibration,N/A
+clear_shape,2,undetermined,0,1,calibration,N/A
 r_clear_12,0.2,undetermined,0,1,calibration,N/A
 r_clear_24,0.3,undetermined,0,1,calibration,N/A
 rr_hpv_hiv,2,undetermined,0,1,calibration,N/A
diff --git a/src/tlo/methods/hpv.py b/src/tlo/methods/hpv.py
index e5e25e6886..052cc28af6 100644
--- a/src/tlo/methods/hpv.py
+++ b/src/tlo/methods/hpv.py
@@ -4,6 +4,7 @@
 from typing import TYPE_CHECKING, List, Optional
 
 import pandas as pd
+import math
 
 # from scripts.diarrhoea_analyses.analysis_diarrhoea_with_and_without_treatment import data
 from tlo import DAYS_IN_YEAR, DateOffset, Module, Parameter, Property, Types, logging
@@ -25,7 +26,7 @@
 logger.setLevel(logging.INFO)
 
 
-class HPV(Module, GenericFirstAppointmentsMixin): #
+class HPV(Module, GenericFirstAppointmentsMixin):
     """This is an HPV infection Process.
     Groups:
         g1 = HPV16/18
@@ -42,7 +43,7 @@ class HPV(Module, GenericFirstAppointmentsMixin): #
         - Use of the SymptomManager
     """
 
-    INIT_DEPENDENCIES = {'Demography', 'SymptomManager', 'Hiv'} #Epi?
+    INIT_DEPENDENCIES = {'Demography', 'SymptomManager', 'Hiv'}
 
     OPTIONAL_INIT_DEPENDENCIES = {'HealthBurden'}
 
@@ -84,6 +85,23 @@ class HPV(Module, GenericFirstAppointmentsMixin): #
         ),
 
         # Clearance probabilities
+        "median_clear_hr1": Parameter(
+            Types.REAL,
+            "Median months to self-clear for hr1 infection",
+        ),
+        "median_clear_hr2": Parameter(
+            Types.REAL,
+            "Median months to self-clear for hr2 infection",
+        ),
+        "median_clear_hr3": Parameter(
+            Types.REAL,
+            "Median months to self-clear for hr3 infection",
+        ),
+        "clear_shape": Parameter(
+            Types.REAL,
+            "Weibull shape parameter for HPV clearance duration",
+        ),
+
         "r_clear_12": Parameter(
             Types.REAL,
             "probability of clearing HPV after 12 month",
@@ -108,7 +126,7 @@ class HPV(Module, GenericFirstAppointmentsMixin): #
         ),
         "rr_hr3_vaccinated": Parameter(
             Types.REAL,
-            "Relative risk for hr1 infection if vaccinated",
+            "Relative risk for hr3 infection if vaccinated",
         ),
         "rr_immunity_hr1": Parameter(
             Types.REAL,
@@ -132,7 +150,7 @@ class HPV(Module, GenericFirstAppointmentsMixin): #
         'hp_is_infected': Property(
             Types.BOOL, 'Is infected with oncogenic hpv group'),
         'hp_infected_hr1': Property(
-            Types.BOOL,'Current infected with hr1'),
+            Types.BOOL, 'Current infected with hr1'),
         'hp_infected_hr2': Property(
             Types.BOOL, 'Current infected with hr2'),
         'hp_infected_hr3': Property(
@@ -144,7 +162,21 @@ class HPV(Module, GenericFirstAppointmentsMixin): #
         'hp_date_infected_hr3': Property(
             Types.DATE, 'Date of infection of hr3'),
         'hp_date_first_infected': Property(
-            Types.DATE, 'Date of first HPV infection'),
+            Types.DATE, 'Start date of current HPV infection'),
+        'hp_duration_hr1': Property(
+            Types.INT,'Sampled duration for current hr1 infection'),
+        'hp_duration_hr2': Property(
+            Types.INT,'Sampled duration for current hr2 infection'),
+        'hp_duration_hr3': Property(
+            Types.INT,'Sampled duration for current hr3 infection'),
+        'hp_duration_all_clear': Property(
+            Types.INT, 'Duration for current all HPV infection'),
+        'hp_date_clear_hr1': Property(
+            Types.DATE, 'Scheduled clearance date of current hr1 infection'),
+        'hp_date_clear_hr2': Property(
+            Types.DATE, 'Scheduled clearance date of current hr2 infection'),
+        'hp_date_clear_hr3': Property(
+            Types.DATE, 'Scheduled clearance date of current hr3 infection'),
         'hp_ever_infected_hr1': Property(
             Types.BOOL, 'Ever infected with hr1'),
         'hp_ever_infected_hr2': Property(
@@ -193,6 +225,13 @@ def initialise_population(self, population):
         df.loc[df.is_alive, 'hp_date_infected_hr2'] = pd.NaT
         df.loc[df.is_alive, 'hp_date_infected_hr3'] = pd.NaT
         df.loc[df.is_alive, 'hp_date_first_infected'] = pd.NaT
+        df.loc[df.is_alive, 'hp_duration_hr1'] = -1
+        df.loc[df.is_alive, 'hp_duration_hr2'] = -1
+        df.loc[df.is_alive, 'hp_duration_hr3'] = -1
+        df.loc[df.is_alive, 'hp_duration_all_clear'] = -1
+        df.loc[df.is_alive, 'hp_date_clear_hr1'] = pd.NaT
+        df.loc[df.is_alive, 'hp_date_clear_hr2'] = pd.NaT
+        df.loc[df.is_alive, 'hp_date_clear_hr3'] = pd.NaT
         df.loc[df.is_alive, 'hp_ever_infected_hr1'] = False
         df.loc[df.is_alive, 'hp_ever_infected_hr2'] = False
         df.loc[df.is_alive, 'hp_ever_infected_hr3'] = False
@@ -208,12 +247,21 @@ def initialise_population(self, population):
                 df.at[person_id, f'hp_ever_infected_{group}'] = True
                 df.at[person_id, 'hp_is_infected'] = True
 
-                # randomly select a infection date for initial population
-                previous_infection = int(self.rng.randint(1,25)) #1-24month
+                # randomly select an infection date for initial population
+                previous_infection = int(self.rng.randint(0, 24))  # 0-23month
                 infection_date = self.sim.date - DateOffset(months=previous_infection)
+
+                # Sample a duration that longer that previous infection
+                previous_duration = self._sample_clear_duration(group)
+                while previous_duration <= previous_infection:
+                    previous_duration = self._sample_clear_duration(group)
+
+                clear_date = infection_date + DateOffset(months=previous_duration)
                 df.at[person_id, f'hp_date_infected_{group}'] = infection_date
+                df.at[person_id, f'hp_date_clear_{group}'] = clear_date
+                df.at[person_id, f'hp_duration_{group}'] = previous_duration
 
-        initially_infected = df.index[df.is_alive & df.hp_is_infected] #can not make sure when is the first infection
+        initially_infected = df.index[df.is_alive & df.hp_is_infected]
         for person_id in initially_infected:
             group_dates = [
                 df.at[person_id, f'hp_date_infected_{group}']
@@ -221,24 +269,25 @@ def initialise_population(self, population):
                 if not pd.isna(df.at[person_id, f'hp_date_infected_{group}'])
             ]
             if len(group_dates) > 0:
-                df.at[person_id, 'hp_date_first_infected'] = min(group_dates)
+                first_date = min(group_dates)
+                df.at[person_id, 'hp_date_first_infected'] = first_date
 
     def _get_age_group(self, age_years):
-        if age_years <15:
+        if age_years < 15:
             return '0_15'
-        elif age_years <20:
+        elif age_years < 20:
             return '15_19'
-        elif age_years <25:
+        elif age_years < 25:
             return '20_24'
-        elif age_years <30:
+        elif age_years < 30:
             return '25_29'
-        elif age_years <35:
+        elif age_years < 35:
             return '30_34'
-        elif age_years <40:
+        elif age_years < 40:
             return '35_39'
-        elif age_years <45:
+        elif age_years < 45:
             return '40_44'
-        elif age_years <50:
+        elif age_years < 50:
             return '45_49'
         else:
             return '50plus'
@@ -262,11 +311,11 @@ def _set_hpv_group_set(self,person_id, hpv_set):
         df.at[person_id, 'hp_is_infected'] = (len(hpv_set) > 0)
 
     def _add_new_infection_groups(self, person_id, new_groups):
-
         if len(new_groups) == 0:
             return
 
         df = self.sim.population.props
+        was_infected_before = df.at[person_id, 'hp_is_infected']
         current_groups = self._get_hpv_group_set(person_id)
         updated_groups = current_groups.union(new_groups)
         self._set_hpv_group_set(person_id, updated_groups)
@@ -276,22 +325,58 @@ def _add_new_infection_groups(self, person_id, new_groups):
             df.at[person_id, f'hp_date_infected_{group}'] = self.sim.date
             df.at[person_id, f'hp_ever_infected_{group}'] = True
 
+            duration = self._sample_clear_duration(group)
+            df.at[person_id, f'hp_date_clear_{group}'] = self.sim.date + DateOffset(months=duration)
+            df.at[person_id,f'hp_duration_{group}'] = duration
+
         #set first infection date
         if pd.isna(df.at[person_id, 'hp_date_first_infected']):
             df.at[person_id, 'hp_date_first_infected'] = self.sim.date
 
+        # start a new HPV infection process only if the person was uninfected/ self-clear
+        if not was_infected_before:
+            df.at[person_id, 'hp_date_first_infected'] = self.sim.date
+
+
     def _clear_single_group(self, person_id, group):
         """clear a single HPV group for a person"""
         df = self.sim.population.props
 
         df.at[person_id, f'hp_infected_{group}'] = False
         df.at[person_id, f'hp_date_infected_{group}'] = pd.NaT
+        df.at[person_id, f'hp_date_clear_{group}'] = pd.NaT
 
         still_infected= any(
             df.at[person_id, f'hp_infected_{g}'] for g in self.HPV_GROUPS
         )
         df.at[person_id, 'hp_is_infected'] = still_infected
 
+        if not still_infected:
+            start_date = df.at[person_id, 'hp_date_first_infected']
+            if not pd.isna(start_date):
+                overall_duration = (self.sim.date.year - start_date.year) * 12 + (self.sim.date.month - start_date.month)
+                df.at[person_id, 'hp_duration_all_clear'] = overall_duration
+
+            df.at[person_id, 'hp_date_first_infected'] = pd.NaT
+
+    def _sample_clear_duration(self,group):
+        """Sample a infection duration for one HPV group using Weibull distribution"""
+        p = self.parameters
+
+        median = p[f'median_clear_{group}']
+        shape = p['clear_shape']
+
+        # WeibullMedian= Scale * （ln 2)^(1/shape)
+        scale = median / (math.log(2) ** (1.0 / shape))
+
+        u = self.rng.random()
+        duration = scale * ((-math.log(1.0 - u)) ** (1.0 / shape))
+
+        duration =  max(1,int(round(duration)))
+        duration = min(duration, 48)
+
+        return duration
+
     def initialise_simulation(self, sim):
         """Get ready for simulation start.
 
@@ -316,12 +401,12 @@ def initialise_simulation(self, sim):
                 .when(True, p[f'rr_immunity_{group}'])
                 .otherwise(1.0),
 
-                Predictor('age_years', conditions_are_mutually_exclusive = True)
+                Predictor('age_years', conditions_are_mutually_exclusive=True)
                 .when('.between(0,15)', 0.0)
                 .when('>=50', p['rr_hpv_age50plus'])
                 .otherwise(1.0),
 
-                Predictor('hv_inf')
+                Predictor('hv_inf')  # Need to be updated with ART
                 .when(True,p['rr_hpv_hiv'])
                 .otherwise(1.0),
                 )
@@ -351,6 +436,13 @@ def on_birth(self, mother_id, child_id):
         df.at[child_id, 'hp_date_infected_hr2'] = pd.NaT
         df.at[child_id, 'hp_date_infected_hr3'] = pd.NaT
         df.at[child_id, 'hp_date_first_infected'] = pd.NaT
+        df.at[child_id, 'hp_duration_hr1'] = -1
+        df.at[child_id, 'hp_duration_hr2'] = -1
+        df.at[child_id, 'hp_duration_hr3'] = -1
+        df.at[child_id, 'hp_duration_all_clear'] = -1
+        df.at[child_id, 'hp_date_clear_hr1'] = pd.NaT
+        df.at[child_id, 'hp_date_clear_hr2'] = pd.NaT
+        df.at[child_id, 'hp_date_clear_hr3'] = pd.NaT
         df.at[child_id, 'hp_ever_infected_hr1'] = False
         df.at[child_id, 'hp_ever_infected_hr2'] = False
         df.at[child_id, 'hp_ever_infected_hr3'] = False
@@ -394,29 +486,29 @@ def apply(self, population):
         for person_id in infected_idx:
             current_groups = module._get_hpv_group_set(person_id)
             for group in list(current_groups):
-                date_col = f'hp_date_infected_{group}'
-                date_inf = df.at[person_id, date_col]
-
+                clear_date = df.at[person_id, f'hp_date_clear_{group}']
 
-                if pd.isna(date_inf): #if data is missing. skip for safety
+                if pd.isna(clear_date): #if data is missing. skip for safety
                     continue
 
-                duration = (now.year - date_inf.year) * 12 + (now.month - date_inf.month)
-
-                clear_now = False
+                if now >= clear_date:
+                    module._clear_single_group(person_id, group)
 
-                if duration >= 24:
-                    p_clear = module.parameters['r_clear_24']
-                    if module.rng.random() < p_clear:
-                        clear_now = True
+                # duration = (now.year - date_inf.year) * 12 + (now.month - date_inf.month)
+                #
+                # clear_now = False
+                #
+                # if duration >= 24:
+                #     p_clear = module.parameters['r_clear_24']
+                #     if module.rng.random() < p_clear:
+                #         clear_now = True
+                #
+                # elif duration >= 12:
+                #     p_clear = module.parameters['r_clear_12']
+                #     if module.rng.random() < p_clear:
+                #         clear_now = True
 
-                elif duration >= 12:
-                    p_clear = module.parameters['r_clear_12']
-                    if module.rng.random() < p_clear:
-                        clear_now = True
 
-                if clear_now:
-                    module._clear_single_group(person_id, group)
 
         # 3. recalculate prevalence by HPV group after clearance
         male_idx = df.index[df.is_alive & (df.age_years >= 15) & (df.sex == 'M')]
@@ -488,14 +580,14 @@ def apply(self, population):
         male_prev = male_inf / len(male_idx) if len(male_idx) > 0 else 0
         female_prev = female_inf / len(female_idx) if len(female_idx) > 0 else 0
 
-        log_data= {
+        log_data = {
             'Year':self.sim.date.year,
-            'TotalInf':int(total_inf),
-            'TotalPrev':total_prev,
-            'MaleInf':int(male_inf),
-            'FemaleInf':int(female_inf),
-            'MalePrev':male_prev,
-            'FemalePrev':female_prev,
+            'TotalInf': int(total_inf),
+            'TotalPrev': total_prev,
+            'MaleInf': int(male_inf),
+            'FemaleInf': int(female_inf),
+            'MalePrev': male_prev,
+            'FemalePrev': female_prev,
         }
 
         # group-specific prevalence by sex

From 68dbf747e84f8f55ec6609607aaca6416a6f54c9 Mon Sep 17 00:00:00 2001
From: Liuxinac <liuxinac@hotmail.com>
Date: Mon, 27 Apr 2026 16:33:39 +0100
Subject: [PATCH 08/10] Refine HPV self-clearance logic

---
 .../ResourceFile_HPV/parameter_values.csv     |  36 +-
 src/scripts/HPV_Analyses/Analysis_HPV.py      |   4 +-
 src/tlo/methods/hpv.py                        | 554 ++++++++++++------
 3 files changed, 394 insertions(+), 200 deletions(-)

diff --git a/resources/ResourceFile_HPV/parameter_values.csv b/resources/ResourceFile_HPV/parameter_values.csv
index 92ec26091f..b3979379ba 100644
--- a/resources/ResourceFile_HPV/parameter_values.csv
+++ b/resources/ResourceFile_HPV/parameter_values.csv
@@ -1,19 +1,17 @@
-parameter_name,value,param_label,prior_min,prior_max,prior_note,reference
-init_prev_hpv_hr1,0.03,undetermined,0,1,calibration,N/A
-init_prev_hpv_hr2,0.04,undetermined,0,1,calibration,N/A
-init_prev_hpv_hr3,0.05,undetermined,0,1,calibration,N/A
-b_hpv,0.3,undetermined,0,1,calibration,N/A
-median_clear_hr1,12,undetermined,0,1,calibration,N/A
-median_clear_hr2,12,undetermined,0,1,calibration,N/A
-median_clear_hr3,12,undetermined,0,1,calibration,N/A
-clear_shape,2,undetermined,0,1,calibration,N/A
-r_clear_12,0.2,undetermined,0,1,calibration,N/A
-r_clear_24,0.3,undetermined,0,1,calibration,N/A
-rr_hpv_hiv,2,undetermined,0,1,calibration,N/A
-rr_hr1_vaccinated,0,undetermined,0,1,calibration,N/A
-rr_hr2_vaccinated,0.6,undetermined,0,1,calibration,N/A
-rr_hr3_vaccinated,0.3,undetermined,0,1,calibration,N/A
-rr_hpv_age50plus,0.1,undetermined,0,1,calibration,N/A
-rr_immunity_hr1,0.8,undetermined,0,1,calibration,N/A
-rr_immunity_hr2,0.8,undetermined,0,1,calibration,N/A
-rr_immunity_hr3,0.8,undetermined,0,1,calibration,N/A
+parameter_name,value,param_label,prior_min,prior_max,prior_note,reference,
+init_prev_hpv_hr1,0.05,undetermined,0,1,calibration,"Cubie HA, Morton D, Kawonga E, Mautanga M, Mwenitete I, Teakle N, Ngwira B, Walker H, Walker G, Kafwafwa S, Kabota B, Ter Haar R, Campbell C. HPV prevalence in women attending cervical screening in rural Malawi using the cartridge-based Xpert® HPV assay. J Clin Virol. 2017 Feb;87:1-4. doi: 10.1016/j.jcv.2016.11.014. ",
+init_prev_hpv_hr2,0.1,undetermined,0,1,calibration,"Cubie HA, Morton D, Kawonga E, Mautanga M, Mwenitete I, Teakle N, Ngwira B, Walker H, Walker G, Kafwafwa S, Kabota B, Ter Haar R, Campbell C. HPV prevalence in women attending cervical screening in rural Malawi using the cartridge-based Xpert® HPV assay. J Clin Virol. 2017 Feb;87:1-4. doi: 10.1016/j.jcv.2016.11.014. ",
+init_prev_hpv_hr3,0.06,undetermined,0,1,calibration,"Cubie HA, Morton D, Kawonga E, Mautanga M, Mwenitete I, Teakle N, Ngwira B, Walker H, Walker G, Kafwafwa S, Kabota B, Ter Haar R, Campbell C. HPV prevalence in women attending cervical screening in rural Malawi using the cartridge-based Xpert® HPV assay. J Clin Virol. 2017 Feb;87:1-4. doi: 10.1016/j.jcv.2016.11.014. ",
+b_hpv,0.3,undetermined,0,1,calibration,N/A,
+rr_hpv_hiv_no_art,2.6,undetermined,0,1,calibration,"Liu G, Sharma M, Tan N, Barnabas RV. HIV-positive women have higher risk of human papilloma virus infection, precancerous lesions, and cervical cancer. AIDS. 2018 Mar 27;32(6):795-808. doi: 10.1097/QAD.0000000000001765. PMID: 29369827; PMCID: PMC5854529.",
+rr_hpv_hiv_art_unsuppressed,1.8,undetermined,0,1,calibration,N/A,
+rr_hr1_vaccinated,0.1,undetermined,0,1,calibration,N/A,
+rr_hr2_vaccinated,0.5,undetermined,0,1,calibration,N/A,
+rr_hr3_vaccinated,0.8,undetermined,0,1,calibration,N/A,
+rr_hpv_age50plus,0.5,undetermined,0,1,calibration,N/A,
+median_clear_hr1,14,undetermined,0,1,calibration,N/A,"Muñoz N, Méndez F, Posso H, Molano M, van den Brule AJ, Ronderos M, Meijer C, Muñoz A; Instituto Nacional de Cancerologia HPV Study Group. Incidence, duration, and determinants of cervical human papillomavirus infection in a cohort of Colombian women with normal cytological results. J Infect Dis. 2004 Dec 15;190(12):2077-87. doi: 10.1086/425907. Epub 2004 Nov 22. PMID: 15551205."
+median_clear_hr2,12,undetermined,0,1,calibration,N/A,"Muñoz N, Méndez F, Posso H, Molano M, van den Brule AJ, Ronderos M, Meijer C, Muñoz A; Instituto Nacional de Cancerologia HPV Study Group. Incidence, duration, and determinants of cervical human papillomavirus infection in a cohort of Colombian women with normal cytological results. J Infect Dis. 2004 Dec 15;190(12):2077-87. doi: 10.1086/425907. Epub 2004 Nov 22. PMID: 15551205."
+median_clear_hr3,12,undetermined,0,1,calibration,N/A,"Muñoz N, Méndez F, Posso H, Molano M, van den Brule AJ, Ronderos M, Meijer C, Muñoz A; Instituto Nacional de Cancerologia HPV Study Group. Incidence, duration, and determinants of cervical human papillomavirus infection in a cohort of Colombian women with normal cytological results. J Infect Dis. 2004 Dec 15;190(12):2077-87. doi: 10.1086/425907. Epub 2004 Nov 22. PMID: 15551205."
+clear_shape,1,undetermined,0,1,calibration,N/A,
+rr_clear_hiv_no_art,0.65,undetermined,0,1,calibration,N/A,
+rr_clear_hiv_art_unsuppressed,0.8,undetermined,0,1,calibration,N/A,
diff --git a/src/scripts/HPV_Analyses/Analysis_HPV.py b/src/scripts/HPV_Analyses/Analysis_HPV.py
index e39252e76a..baeda1c141 100644
--- a/src/scripts/HPV_Analyses/Analysis_HPV.py
+++ b/src/scripts/HPV_Analyses/Analysis_HPV.py
@@ -140,8 +140,8 @@
 
 # 2. HPV  prevalence in different gender
 plt.figure(figsize=(8, 5))
-plt.plot(hpv_df["Year"], hpv_df["MalePrev"], marker="o", label="Male")
-plt.plot(hpv_df["Year"], hpv_df["FemalePrev"], marker="o", label="Female")
+plt.plot(hpv_df["Year"], hpv_df["M_Prev"], marker="o", label="Male")
+plt.plot(hpv_df["Year"], hpv_df["F_Prev"], marker="o", label="Female")
 plt.xlabel("Year")
 plt.ylabel("HPV prevalence")
 plt.title("HPV prevalence by sex")
diff --git a/src/tlo/methods/hpv.py b/src/tlo/methods/hpv.py
index 052cc28af6..56288ed23d 100644
--- a/src/tlo/methods/hpv.py
+++ b/src/tlo/methods/hpv.py
@@ -63,6 +63,8 @@ class HPV(Module, GenericFirstAppointmentsMixin):
     CAUSES_OF_DISABILITY = {}
 
     HPV_GROUPS = ['hr1', 'hr2', 'hr3']
+    AGE_BINS = [15, 20, 25, 35, 45, 55, 200]
+    AGE_LABELS = ['15_19', '20_24', '25_34','35_44', '45_54', '55plus']
 
     PARAMETERS = {
         "init_prev_hpv_hr1": Parameter(
@@ -78,72 +80,86 @@ class HPV(Module, GenericFirstAppointmentsMixin):
             "Initial prevalence of other HR types"
         ),
 
-        # Transmission coefficients β
+        # ------------------  HPV Transmission  ------------------ #
+        # transmission coefficient for HPV Infection
         "b_hpv": Parameter(
             Types.REAL,
             "Baseline transmission coefficient for HPV Infection",
         ),
 
-        # Clearance probabilities
-        "median_clear_hr1": Parameter(
-            Types.REAL,
-            "Median months to self-clear for hr1 infection",
-        ),
-        "median_clear_hr2": Parameter(
+        # Modifiers
+        # "rr_hpv_hiv": Parameter(
+        #     Types.REAL,
+        #     "Relative risk for HPV infection among HIV positive people",
+        # ),
+        "rr_hpv_hiv_no_art": Parameter(
             Types.REAL,
-            "Median months to self-clear for hr2 infection",
+            "Relative risk for HPV acquisition among HIV positive people not on ART",
         ),
-        "median_clear_hr3": Parameter(
+        "rr_hpv_hiv_art_unsuppressed": Parameter(
             Types.REAL,
-            "Median months to self-clear for hr3 infection",
+            "Relative risk for HPV acquisition among HIV positive people on ART but not virally suppressed",
         ),
-        "clear_shape": Parameter(
+        "rr_hr1_vaccinated": Parameter(
             Types.REAL,
-            "Weibull shape parameter for HPV clearance duration",
+            "Relative risk for hr1 infection if vaccinated",
         ),
-
-        "r_clear_12": Parameter(
+        "rr_hr2_vaccinated": Parameter(
             Types.REAL,
-            "probability of clearing HPV after 12 month",
+            "Relative risk for hr2 infection if vaccinated",
         ),
-        "r_clear_24": Parameter(
+        "rr_hr3_vaccinated": Parameter(
             Types.REAL,
-            "probability of clearing HPV after 24 month",
+            "Relative risk for hr3 infection if vaccinated",
         ),
 
-        # Modifiers
-        "rr_hpv_hiv": Parameter(
-            Types.REAL,
-            "Relative risk for HPV infection among HIV positive people",
-        ),
-        "rr_hr1_vaccinated": Parameter(
+        "rr_hpv_age50plus": Parameter(
             Types.REAL,
-            "Relative risk for hr1 infection if vaccinated",
+            "Relative risk multiplier for age >=50",
         ),
-        "rr_hr2_vaccinated": Parameter(
+
+        # ------------------  HPV Self-clear  ------------------ #
+        # Weibull baseline
+        "median_clear_hr1": Parameter(
             Types.REAL,
-            "Relative risk for hr2 infection if vaccinated",
+            "Median months to self-clear for hr1 infection",
         ),
-        "rr_hr3_vaccinated": Parameter(
+        "median_clear_hr2": Parameter(
             Types.REAL,
-            "Relative risk for hr3 infection if vaccinated",
+            "Median months to self-clear for hr2 infection",
         ),
-        "rr_immunity_hr1": Parameter(
+        "median_clear_hr3": Parameter(
             Types.REAL,
-            "Relative risk for reinfection with hr1 if previously infected",
+            "Median months to self-clear for hr3 infection",
         ),
-        "rr_immunity_hr2": Parameter(
+        "clear_shape": Parameter(
             Types.REAL,
-            "Relative risk for reinfection with hr2 if previously infected",
+            "Weibull shape parameter for HPV clearance duration",
         ),
-        "rr_immunity_hr3": Parameter(
+
+        # Modifiers
+        "rr_clear_hiv_no_art": Parameter(
             Types.REAL,
-            "Relative risk for reinfection with hr3 if previously infected",
+            "Rate ratio for HPV clearance among PLWH not on ART",
         ),
-        "rr_hpv_age50plus": Parameter(
+        "rr_clear_hiv_art_unsuppressed": Parameter(
             Types.REAL,
-            "Relative risk multiplier for age >=50",
+            "Rate ratio for HPV clearance among PLWH on ART but not virally suppressed",
         ),
+
+        ## As MC suggested, remove the immunity part
+        # "rr_immunity_hr1": Parameter(
+        #     Types.REAL,
+        #     "Relative risk for reinfection with hr1 if previously infected",
+        # ),
+        # "rr_immunity_hr2": Parameter(
+        #     Types.REAL,
+        #     "Relative risk for reinfection with hr2 if previously infected",
+        # ),
+        # "rr_immunity_hr3": Parameter(
+        #     Types.REAL,
+        #     "Relative risk for reinfection with hr3 if previously infected",
+        # ),
     }
 
     PROPERTIES = {
@@ -164,19 +180,19 @@ class HPV(Module, GenericFirstAppointmentsMixin):
         'hp_date_first_infected': Property(
             Types.DATE, 'Start date of current HPV infection'),
         'hp_duration_hr1': Property(
-            Types.INT,'Sampled duration for current hr1 infection'),
+            Types.INT,'Duration for current hr1 infection'),
         'hp_duration_hr2': Property(
-            Types.INT,'Sampled duration for current hr2 infection'),
+            Types.INT,'Duration for current hr2 infection'),
         'hp_duration_hr3': Property(
-            Types.INT,'Sampled duration for current hr3 infection'),
+            Types.INT,'Duration for current hr3 infection'),
         'hp_duration_all_clear': Property(
             Types.INT, 'Duration for current all HPV infection'),
-        'hp_date_clear_hr1': Property(
-            Types.DATE, 'Scheduled clearance date of current hr1 infection'),
-        'hp_date_clear_hr2': Property(
-            Types.DATE, 'Scheduled clearance date of current hr2 infection'),
-        'hp_date_clear_hr3': Property(
-            Types.DATE, 'Scheduled clearance date of current hr3 infection'),
+        # 'hp_date_clear_hr1': Property(
+        #     Types.DATE, 'Scheduled clearance date of current hr1 infection'),
+        # 'hp_date_clear_hr2': Property(
+        #     Types.DATE, 'Scheduled clearance date of current hr2 infection'),
+        # 'hp_date_clear_hr3': Property(
+        #     Types.DATE, 'Scheduled clearance date of current hr3 infection'),
         'hp_ever_infected_hr1': Property(
             Types.BOOL, 'Ever infected with hr1'),
         'hp_ever_infected_hr2': Property(
@@ -229,9 +245,9 @@ def initialise_population(self, population):
         df.loc[df.is_alive, 'hp_duration_hr2'] = -1
         df.loc[df.is_alive, 'hp_duration_hr3'] = -1
         df.loc[df.is_alive, 'hp_duration_all_clear'] = -1
-        df.loc[df.is_alive, 'hp_date_clear_hr1'] = pd.NaT
-        df.loc[df.is_alive, 'hp_date_clear_hr2'] = pd.NaT
-        df.loc[df.is_alive, 'hp_date_clear_hr3'] = pd.NaT
+        # df.loc[df.is_alive, 'hp_date_clear_hr1'] = pd.NaT
+        # df.loc[df.is_alive, 'hp_date_clear_hr2'] = pd.NaT
+        # df.loc[df.is_alive, 'hp_date_clear_hr3'] = pd.NaT
         df.loc[df.is_alive, 'hp_ever_infected_hr1'] = False
         df.loc[df.is_alive, 'hp_ever_infected_hr2'] = False
         df.loc[df.is_alive, 'hp_ever_infected_hr3'] = False
@@ -251,15 +267,9 @@ def initialise_population(self, population):
                 previous_infection = int(self.rng.randint(0, 24))  # 0-23month
                 infection_date = self.sim.date - DateOffset(months=previous_infection)
 
-                # Sample a duration that longer that previous infection
-                previous_duration = self._sample_clear_duration(group)
-                while previous_duration <= previous_infection:
-                    previous_duration = self._sample_clear_duration(group)
-
-                clear_date = infection_date + DateOffset(months=previous_duration)
                 df.at[person_id, f'hp_date_infected_{group}'] = infection_date
-                df.at[person_id, f'hp_date_clear_{group}'] = clear_date
-                df.at[person_id, f'hp_duration_{group}'] = previous_duration
+                df.at[person_id, f'hp_date_clear_{group}'] = pd.NaT
+                df.at[person_id, f'hp_duration_{group}'] = previous_infection
 
         initially_infected = df.index[df.is_alive & df.hp_is_infected]
         for person_id in initially_infected:
@@ -269,28 +279,115 @@ def initialise_population(self, population):
                 if not pd.isna(df.at[person_id, f'hp_date_infected_{group}'])
             ]
             if len(group_dates) > 0:
-                first_date = min(group_dates)
-                df.at[person_id, 'hp_date_first_infected'] = first_date
-
-    def _get_age_group(self, age_years):
-        if age_years < 15:
-            return '0_15'
-        elif age_years < 20:
-            return '15_19'
-        elif age_years < 25:
-            return '20_24'
-        elif age_years < 30:
-            return '25_29'
-        elif age_years < 35:
-            return '30_34'
-        elif age_years < 40:
-            return '35_39'
-        elif age_years < 45:
-            return '40_44'
-        elif age_years < 50:
-            return '45_49'
+                df.at[person_id, 'hp_date_first_infected'] = min(group_dates)
+
+    def _get_age_group_series(self, ages):
+        return pd.cut(
+            ages,
+            bins=self.AGE_BINS,
+            labels=self.AGE_LABELS,
+            right=False  # right side not included
+        )
+    def _get_age_group(self,age_years):
+        for i in range(len(self.AGE_BINS)-1):
+            if self.AGE_BINS[i] <= age_years < self.AGE_BINS[i + 1]:
+                return self.AGE_LABELS[i]
+        return None
+
+    def _build_age_mixing_matrix(self, within=0.80, adjacent=0.15, distant=0.05):
+        labels = self.AGE_LABELS
+        M = pd.DataFrame(0.0, index=labels, columns=labels, dtype=float)
+
+        for i, label in enumerate(labels):
+            row = pd.Series(0.0, index=labels, dtype=float)
+
+            # within-group
+            row[label] = within
+
+            # adjacent groups
+            neighbors = []
+            if i -1 >= 0:
+                neighbors.append(labels[i - 1])
+            if i + 1 < len(labels):
+                neighbors.append(labels[i + 1])
+
+            if len(neighbors) > 0:
+                share_adj = adjacent / len(neighbors)
+                for nb in neighbors:
+                    row[nb] = share_adj
+
+            # distant group
+            distant_groups = [x for x in labels if x != label and x not in neighbors]
+            if len(distant_groups) > 0:
+                share_dist = distant / len(distant_groups)
+                for dg in distant_groups:
+                    row[dg] = share_dist
+
+            # normalize to exactly
+            row = row / row.sum()
+            M.loc[label] = row
+        return M
+
+
+    def _months_since(self,start_date,end_date=None):
+        if pd.isna(start_date):
+            return None
+        if pd.isna(end_date):
+            end_date = self.sim.date
+
+        months = (end_date.year - start_date.year) * 12 + (end_date.month - start_date.month)
+        return max(0, int(months))
+
+    def _get_clearance_rr(self, person_id):
+        df = self.sim.population.props
+        p = self.parameters
+
+        if 'hv_inf' not in df.columns:
+            return 1.0
+
+        hv_inf = df.at[person_id, 'hv_inf']
+
+        if pd.isna(hv_inf) or (hv_inf is False):
+            return 1.0
+
+        if 'hv_art' not in df.columns:
+            return 1.0
+
+        hv_art = df.at[person_id, 'hv_art']
+
+        if hv_art == 'not':
+            return p['rr_clear_hiv_no_art']
+        elif hv_art == 'on_not_VL_suppressed':
+            return p ['rr_clear_hiv_art_unsuppressed']
         else:
-            return '50plus'
+            return 1.0
+
+    def _get_clearance_probability(self, group, person_id, duration_months, interval_months = 6):
+        p = self.parameters
+
+        median = p[f'median_clear_{group}']
+        shape = p['clear_shape']
+
+        # median = scale * (ln 2)^(1/shape)
+        scale = median / (math.log(2) ** (1.0 / shape))
+
+        t1 = max(0.0, float(duration_months))
+        t0 = max(0.0, t1 - float(interval_months))
+
+        # Weibull baseline cumulative hazard increment over [t0, t1]
+        H0_t0 = (t0 / scale) ** shape
+        H0_t1 = (t1 / scale) ** shape
+        delta_H0 = max(0.0, H0_t1 - H0_t0)
+
+        rr = self._get_clearance_rr(person_id)
+
+        # p = 1 - exp(- rr * delta_H0)
+        p_clear = 1.0 - math.exp(-rr * delta_H0)
+
+        return min(max(p_clear, 0.0), 1.0)
+
+
+
 
     def _get_hpv_group_set(self, person_id):
         df = self.sim.population.props
@@ -324,19 +421,18 @@ def _add_new_infection_groups(self, person_id, new_groups):
         for group in new_groups:
             df.at[person_id, f'hp_date_infected_{group}'] = self.sim.date
             df.at[person_id, f'hp_ever_infected_{group}'] = True
+            df.at[person_id, f'hp_date_clear_{group}'] = pd.NaT
+            df.at[person_id,f'hp_duration_{group}'] = 0
 
-            duration = self._sample_clear_duration(group)
-            df.at[person_id, f'hp_date_clear_{group}'] = self.sim.date + DateOffset(months=duration)
-            df.at[person_id,f'hp_duration_{group}'] = duration
+        # start a new HPV infection process only if the person was uninfected/ self-clear
+        if not was_infected_before:
+            df.at[person_id, 'hp_date_first_infected'] = self.sim.date
+            df.at[person_id, 'hp_duration_all_clear'] = -1
 
         #set first infection date
         if pd.isna(df.at[person_id, 'hp_date_first_infected']):
             df.at[person_id, 'hp_date_first_infected'] = self.sim.date
 
-        # start a new HPV infection process only if the person was uninfected/ self-clear
-        if not was_infected_before:
-            df.at[person_id, 'hp_date_first_infected'] = self.sim.date
-
 
     def _clear_single_group(self, person_id, group):
         """clear a single HPV group for a person"""
@@ -345,6 +441,7 @@ def _clear_single_group(self, person_id, group):
         df.at[person_id, f'hp_infected_{group}'] = False
         df.at[person_id, f'hp_date_infected_{group}'] = pd.NaT
         df.at[person_id, f'hp_date_clear_{group}'] = pd.NaT
+        df.at[person_id, f'hp_duration_{group}'] = -1
 
         still_infected= any(
             df.at[person_id, f'hp_infected_{g}'] for g in self.HPV_GROUPS
@@ -359,23 +456,22 @@ def _clear_single_group(self, person_id, group):
 
             df.at[person_id, 'hp_date_first_infected'] = pd.NaT
 
-    def _sample_clear_duration(self,group):
-        """Sample a infection duration for one HPV group using Weibull distribution"""
-        p = self.parameters
-
-        median = p[f'median_clear_{group}']
-        shape = p['clear_shape']
-
-        # WeibullMedian= Scale * （ln 2)^(1/shape)
-        scale = median / (math.log(2) ** (1.0 / shape))
-
-        u = self.rng.random()
-        duration = scale * ((-math.log(1.0 - u)) ** (1.0 / shape))
-
-        duration =  max(1,int(round(duration)))
-        duration = min(duration, 48)
-
-        return duration
+    # def _sample_clear_duration(self,group):
+    #     """Sample a infection duration for one HPV group using Weibull distribution"""
+    #     p = self.parameters
+    #
+    #     median = p[f'median_clear_{group}']
+    #     shape = p['clear_shape']
+    #
+    #     # WeibullMedian= Scale * （ln 2)^(1/shape)
+    #     scale = median / (math.log(2) ** (1.0 / shape))
+    #
+    #     u = self.rng.random()
+    #     duration = scale * ((-math.log(1.0 - u)) ** (1.0 / shape))
+    #
+    #     duration = max(1,int(round(duration)))
+    #     duration = min(duration, 48)
+    #     return duration
 
     def initialise_simulation(self, sim):
         """Get ready for simulation start.
@@ -386,6 +482,12 @@ def initialise_simulation(self, sim):
         """
         p = self.parameters
         self.lm = {}
+        self.age_mixing_matrix = self._build_age_mixing_matrix(
+            within=0.80,
+            adjacent=0.15,
+            distant=0.05
+        )
+        self._prev_logged_prev = {}
 
         for group in self.HPV_GROUPS:
             self.lm[group] = LinearModel(
@@ -394,29 +496,31 @@ def initialise_simulation(self, sim):
 
                 Predictor('va_hpv')
                 .when(1,p[f'rr_{group}_vaccinated'])
-                .when(2,p[f'rr_{group}_vaccinated'])
-                .otherwise(1.0),
+                .when(2,p[f'rr_{group}_vaccinated']),
 
-                Predictor(f'hp_ever_infected_{group}')
-                .when(True, p[f'rr_immunity_{group}'])
-                .otherwise(1.0),
+                # Predictor(f'hp_ever_infected_{group}')
+                # .when(True, p[f'rr_immunity_{group}'])
 
                 Predictor('age_years', conditions_are_mutually_exclusive=True)
-                .when('.between(0,15)', 0.0)
-                .when('>=50', p['rr_hpv_age50plus'])
-                .otherwise(1.0),
+                .when('<15', 0.0)
+                .when('>=50', p['rr_hpv_age50plus']),
 
-                Predictor('hv_inf')  # Need to be updated with ART
-                .when(True,p['rr_hpv_hiv'])
-                .otherwise(1.0),
+                Predictor()
+                .when('hv_inf & (hv_art =="not")',
+                p['rr_hpv_hiv_no_art']
                 )
+                .when(
+                    'hv_inf & (hv_art == "on_not_VL_suppressed")',
+                    p['rr_hpv_hiv_art_unsuppressed']
+                ),
+            )
 
         # add the basic event
         event = HpvInfectionEvent(self)
         sim.schedule_event(event, sim.date + DateOffset(months=6))
 
         # add an event to log to screen
-        sim.schedule_event(HpvLoggingEvent(self), sim.date + DateOffset(months=12))
+        sim.schedule_event(HpvLoggingEvent(self), sim.date + DateOffset(months=6, days=1))
 
     def on_birth(self, mother_id, child_id):
         """Initialise our properties for a newborn individual.
@@ -440,9 +544,9 @@ def on_birth(self, mother_id, child_id):
         df.at[child_id, 'hp_duration_hr2'] = -1
         df.at[child_id, 'hp_duration_hr3'] = -1
         df.at[child_id, 'hp_duration_all_clear'] = -1
-        df.at[child_id, 'hp_date_clear_hr1'] = pd.NaT
-        df.at[child_id, 'hp_date_clear_hr2'] = pd.NaT
-        df.at[child_id, 'hp_date_clear_hr3'] = pd.NaT
+        # df.at[child_id, 'hp_date_clear_hr1'] = pd.NaT
+        # df.at[child_id, 'hp_date_clear_hr2'] = pd.NaT
+        # df.at[child_id, 'hp_date_clear_hr3'] = pd.NaT
         df.at[child_id, 'hp_ever_infected_hr1'] = False
         df.at[child_id, 'hp_ever_infected_hr2'] = False
         df.at[child_id, 'hp_ever_infected_hr3'] = False
@@ -485,69 +589,98 @@ def apply(self, population):
         infected_idx = df.index[df.is_alive & df.hp_is_infected & (df.age_years >= 15)]
         for person_id in infected_idx:
             current_groups = module._get_hpv_group_set(person_id)
+
             for group in list(current_groups):
-                clear_date = df.at[person_id, f'hp_date_clear_{group}']
+                date_inf = df.at[person_id, f'hp_date_infected_{group}']
+                if pd.isna(date_inf):
+                    continue
 
-                if pd.isna(clear_date): #if data is missing. skip for safety
+                duration_months = module._months_since (date_inf, now)
+                if duration_months is None:
                     continue
 
-                if now >= clear_date:
-                    module._clear_single_group(person_id, group)
+                df.at[person_id, f'hp_duration_{group}'] = duration_months
 
-                # duration = (now.year - date_inf.year) * 12 + (now.month - date_inf.month)
-                #
-                # clear_now = False
-                #
-                # if duration >= 24:
-                #     p_clear = module.parameters['r_clear_24']
-                #     if module.rng.random() < p_clear:
-                #         clear_now = True
-                #
-                # elif duration >= 12:
-                #     p_clear = module.parameters['r_clear_12']
-                #     if module.rng.random() < p_clear:
-                #         clear_now = True
+                # Clear rate in 6 months
+                interval_months = 6
+                p_clear = module._get_clearance_probability(
+                    group=group,
+                    person_id=person_id,
+                    duration_months=duration_months,
+                    interval_months=interval_months
+                )
 
+                if module.rng.random() < p_clear:
+                    module._clear_single_group(person_id, group)
 
 
         # 3. recalculate prevalence by HPV group after clearance
-        male_idx = df.index[df.is_alive & (df.age_years >= 15) & (df.sex == 'M')]
-        female_idx = df.index[df.is_alive & (df.age_years >= 15) & (df.sex == 'F')]
+        df_alive = df.loc[df.is_alive & (df.age_years >= 15)].copy()
+        df_alive['age_group'] = module._get_age_group_series(df_alive['age_years'])
 
-        prev_male = {}
-        prev_female = {}
+        male_df = df_alive.loc[df_alive.sex == 'M']
+        female_df = df_alive.loc[df_alive.sex == 'F']
 
-        for group in module.HPV_GROUPS:
-            male_group_inf = df.loc[male_idx, f'hp_infected_{group}'].sum()
-            female_group_inf = df.loc[female_idx, f'hp_infected_{group}'].sum()
+        prev_by_age_male = {}
+        prev_by_age_female = {}
 
-            prev_male[group] = male_group_inf/len(male_idx) if len(male_idx) > 0 else 0
-            prev_female[group] = female_group_inf / len(female_idx) if len(female_idx) > 0 else 0
+        for group in module.HPV_GROUPS:
+            prev_by_age_male[group] = (
+                male_df.groupby('age_group', observed=True)[f'hp_infected_{group}']
+                .mean()
+                .reindex(module.AGE_LABELS, fill_value=0.0)
+            )
+            prev_by_age_female[group] = (
+                female_df.groupby('age_group', observed=True)[f'hp_infected_{group}']
+                .mean()
+                .reindex(module.AGE_LABELS, fill_value=0.0)
+            )
+
+            # male_group_inf = df.loc[male_idx, f'hp_infected_{group}'].sum()
+            # female_group_inf = df.loc[female_idx, f'hp_infected_{group}'].sum()
+            #
+            # prev_male[group] = male_group_inf/len(male_idx) if len(male_idx) > 0 else 0
+            # prev_female[group] = female_group_inf / len(female_idx) if len(female_idx) > 0 else 0
+
+        # 4. new infection
+        interval_years = 0.5
 
-        #new infection
         for person_id in eligible:
             sex = df.at[person_id,'sex']
             current_groups = module._get_hpv_group_set(person_id)
             new_group = set()
 
             if sex == 'F':
-                source_prev = prev_male
+                source_prev_by_age = prev_by_age_male
             elif sex == 'M':
-                source_prev = prev_female
+                source_prev_by_age = prev_by_age_female
             else:
                 continue
 
+            my_age_group = module._get_age_group(df.at[person_id,'age_years'])
+            if my_age_group is None:
+                continue
+
+            mix_row = module.age_mixing_matrix.loc[my_age_group]
+
             for group in module.HPV_GROUPS:
                 if group in current_groups:
                     continue
 
-                baseline = module.parameters.get(f'b_hpv_{group}', module.parameters['b_hpv'])*source_prev[group]
+                weighted_prev = float((mix_row * source_prev_by_age[group]).sum())
+
+                beta_name = f'b_hpv_{group}'
+                beta = module.parameters[beta_name] if beta_name in module.parameters else module.parameters['b_hpv']
+
                 modifier = module.lm[group].predict(df.loc[[person_id]]).iloc[0]
 
-                risk = baseline * modifier
-                risk = min(max(risk, 0.0), 1.0)
+                lambda_inf = beta * weighted_prev * modifier
+                lambda_inf = max(lambda_inf, 0.0)
 
-                if module.rng.random() < risk:
+                p_inf = 1.0 - math.exp(-lambda_inf * interval_years)
+                p_inf = min(max(p_inf, 0.0), 1.0)
+
+                if module.rng.random() < p_inf:
                     new_group.add(group)
 
             if len(new_group) > 0:
@@ -557,8 +690,8 @@ class HpvLoggingEvent(RegularEvent, PopulationScopeEventMixin):
     def __init__(self, module):
         """Produce a summmary of the numbers of people with respect to their 'hpv status'
         """
-        # run this event every 12 month
-        self.repeat = 12
+        # run this event every 6/12 month
+        self.repeat = 6
         super().__init__(module, frequency=DateOffset(months=self.repeat))
         assert isinstance(module, HPV)
 
@@ -568,39 +701,103 @@ def apply(self, population):
         module = self.module
 
         eligible = df.index[df.is_alive & (df.age_years >= 15)]
-        male_idx = df.index[df.is_alive & (df.age_years >= 15) & (df.sex == 'M')]
-        female_idx = df.index[df.is_alive & (df.age_years >= 15) & (df.sex == 'F')]
+        log_data = {
+            'Year':self.sim.date.year,
+            'Month':self.sim.date.month,
+            'EligibleN':int(len(eligible)),
+        }
 
-        total_inf = df.loc[eligible, 'hp_is_infected'].sum()
-        total_prev = total_inf / len(eligible) if len(eligible) > 0 else 0
+        if len(eligible) == 0:
+            logger.info(key='summary', data=log_data)
+            return
 
-        male_inf = df.loc[male_idx, 'hp_is_infected'].sum()
-        female_inf = df.loc[female_idx, 'hp_is_infected'].sum()
+        sub = df.loc[eligible].copy()
 
-        male_prev = male_inf / len(male_idx) if len(male_idx) > 0 else 0
-        female_prev = female_inf / len(female_idx) if len(female_idx) > 0 else 0
+        sub['age_group'] = module._get_age_group_series(sub['age_years'])
+        sub['hiv_group'] = 'HIVneg'
 
-        log_data = {
-            'Year':self.sim.date.year,
-            'TotalInf': int(total_inf),
-            'TotalPrev': total_prev,
-            'MaleInf': int(male_inf),
-            'FemaleInf': int(female_inf),
-            'MalePrev': male_prev,
-            'FemalePrev': female_prev,
-        }
+        if 'hv_inf' in sub.columns:
+            sub.loc[sub['hv_inf'].fillna(False), 'hiv_group'] = 'HIVpos_unknown'
 
-        # group-specific prevalence by sex
-        for group in module.HPV_GROUPS:
-            male_group_inf = df.loc[male_idx, f'hp_infected_{group}'].sum()
-            female_group_inf = df.loc[female_idx, f'hp_infected_{group}'].sum()
+            if 'hv_art' in sub.columns:
+                sub.loc[sub['hv_inf'].fillna(False) & (sub['hv_art'] == 'not'), 'hiv_group'] = 'HIVpos_noART'
+                sub.loc[sub['hv_inf'].fillna(False) & (
+                        sub['hv_art'] == 'on_not_VL_suppressed'), 'hiv_group'] = 'HIVpos_unsupp'
+                sub.loc[
+                    sub['hv_inf'].fillna(False) & (sub['hv_art'] == 'on_VL_suppressed'), 'hiv_group'] = 'HIVpos_supp'
+
+        # 1. Overall summary
+        total_inf = int(sub['hp_is_infected'].sum())
+        log_data['TotalInf'] = total_inf
+        log_data['TotalPrev'] = sub['hp_is_infected'].mean()
+
+        for sex_name, sex_df in  [('M', sub.loc[sub.sex == 'M']),
+                                 ('F', sub.loc[sub.sex == 'F'])]:
+            n = len(sex_df)
+            log_data[f'{sex_name}_N'] = int(n)
+            log_data[f'{sex_name}_Inf'] = int(sex_df['hp_is_infected'].sum()) if n > 0 else 0
+            log_data[f'{sex_name}_Prev'] = sex_df['hp_is_infected'].mean() if n > 0 else math.nan
+
+        # 2. Prevalence by sex and age group
+        prev_snapshot = {}
+
+        for sex_name, sex_df in [('All', sub),
+                                 ('M', sub.loc[sub.sex == 'M']),
+                                 ('F', sub.loc[sub.sex == 'F'])]:
+
+            for age_group in module.AGE_LABELS:
+                age_df = sex_df.loc[sex_df['age_group'] == age_group]
+                n = len(age_df)
+
+                log_data[f'Any_{sex_name}_{age_group}_N'] = int(n)
+
+                if n == 0:
+                    log_data[f'Any_{sex_name}_{age_group}_Inf'] = 0
+                    log_data[f'Any_{sex_name}_{age_group}_Prev'] = math.nan
+                    for hpv_group in module.HPV_GROUPS:
+                        log_data[f'{hpv_group}_{sex_name}_{age_group}_Inf'] = 0
+                        log_data[f'{hpv_group}_{sex_name}_{age_group}_Prev'] = math.nan
+                    continue
+
+                any_inf = int(age_df['hp_is_infected'].sum())
+                any_prev = age_df['hp_is_infected'].mean()
+
+                log_data[f'Any_{sex_name}_{age_group}_Inf'] = any_inf
+                log_data[f'Any_{sex_name}_{age_group}_Prev'] = any_prev
+                prev_snapshot[f'Any_{sex_name}_{age_group}_Prev'] = any_prev
+
+                for hpv_group in module.HPV_GROUPS:
+                    inf_n = int(age_df[f'hp_infected_{hpv_group}'].sum())
+                    prev = age_df[f'hp_infected_{hpv_group}'].mean()
+
+                    log_data[f'{hpv_group}_{sex_name}_{age_group}_Inf'] = inf_n
+                    log_data[f'{hpv_group}_{sex_name}_{age_group}_Prev'] = prev
+                    prev_snapshot[f'{hpv_group}_{sex_name}_{age_group}_Prev'] = prev
+
+        # 3. HIV
+        for hiv_group, hiv_df in sub.groupby('hiv_group', observed=True):
+            n = len(hiv_df)
+            log_data[f'Any_{hiv_group}_N'] = int(n)
+            log_data[f'Any_{hiv_group}_Inf'] = int(hiv_df['hp_is_infected'].sum()) if n > 0 else 0
+            log_data[f'Any_{hiv_group}_Prev'] = hiv_df['hp_is_infected'].mean() if n > 0 else math.nan
+
+            for hpv_group in module.HPV_GROUPS:
+                log_data[f'{hpv_group}_{hiv_group}_Prev'] = (
+                    hiv_df[f'hp_infected_{hpv_group}'].mean() if n > 0 else math.nan
+                )
+
+        # 4. Delta
+        prev_logged = getattr(module, '_prev_logged_prev', {})
+        for key, current_val in prev_snapshot.items():
+            previous_val = prev_logged.get(key, math.nan)
+            if pd.isna(previous_val) or pd.isna(current_val):
+                log_data[f'{key}_Delta'] = math.nan
+            else:
+                log_data[f'{key}_Delta'] = current_val - previous_val
 
-            log_data[f'{group}_MaleInf'] = int(male_group_inf)
-            log_data[f'{group}_MalePrev'] = male_group_inf / len(male_idx) if len(male_idx) > 0 else 0
-            log_data[f'{group}_FemaleInf'] = int(female_group_inf)
-            log_data[f'{group}_FemalePrev'] = female_group_inf / len(female_idx) if len(female_idx) > 0 else 0
+        module._prev_logged_prev = prev_snapshot
 
-        # multiplicity of infection
+        # 5. multiplicity of infection
         infection_people = df.index[df.is_alive & (df.age_years>=15) &df.hp_is_infected]
         n_group_1 = 0
         n_group_2 = 0
@@ -651,6 +848,5 @@ def apply(self, population):
         log_data['FemaleGroup2'] = female_n_group_2
         log_data['FemaleGroup3'] = female_n_group_3
 
-        logger.info(key='summary',
-                    data=log_data)
+        logger.info(key='summary', data=log_data)
 

From f465f106b275518ed25891c4b4c60c808d0ea922 Mon Sep 17 00:00:00 2001
From: Liuxinac <liuxinac@hotmail.com>
Date: Thu, 30 Apr 2026 18:43:22 +0100
Subject: [PATCH 09/10] HPV Transmission Model Version 1.0

---
 src/scripts/HPV_Analyses/Analysis_HPV.py | 260 +++++++++++++++++++++--
 src/tlo/methods/hpv.py                   | 141 +++++++++---
 2 files changed, 350 insertions(+), 51 deletions(-)

diff --git a/src/scripts/HPV_Analyses/Analysis_HPV.py b/src/scripts/HPV_Analyses/Analysis_HPV.py
index baeda1c141..cf0c09f3f7 100644
--- a/src/scripts/HPV_Analyses/Analysis_HPV.py
+++ b/src/scripts/HPV_Analyses/Analysis_HPV.py
@@ -57,6 +57,13 @@
 # # need to call epi before tb to get bcg vax
 seed = random.randint(0, 50000)
 # # seed = 41728  # set seed for reproducibility
+
+# HPV model labels
+AGE_LABELS = ["15_19", "20_24", "25_34", "35_44", "45_54", "55plus"]
+HPV_GROUPS = ["hr1", "hr2", "hr3"]
+SEXES = ["M", "F"]
+
+# 1. Run simulation
 sim = Simulation(start_date=start_date, seed=seed, log_config=log_config,
                  show_progress_bar=True, resourcefilepath=resourcefilepath)
 sim.register(
@@ -76,21 +83,22 @@
     healthseekingbehaviour.HealthSeekingBehaviour(),
     healthburden.HealthBurden(),
     epi.Epi(),
-    hpv.HPV(),
-    measles.Measles(),
     hiv.Hiv(),
+    measles.Measles(),
     tb.Tb(),
+    hpv.HPV(),
 )
-#
+
 # # set the scenario
 #sim.modules["HPV"].parameters["r_hpv"] = 0.01
 #sim.modules["HPV"].parameters["r_hpv_clear"] = 0.6
-#
+
 # # Run the simulation and flush the logger
 sim.make_initial_population(n=popsize)
 sim.simulate(end_date=end_date)
-#
-# parse the results
+
+
+# 2. Parse and save results
 output = parse_log_file(sim.log_filepath)
 
 # save the results, argument 'wb' means write using binary mode. use 'rb' for reading file
@@ -127,10 +135,65 @@
 print(hpv_df)
 print(hpv_df.columns)
 
-# 1. Total infection
+
+# change Year / Month to Date
+hpv_df["Year"] = pd.to_numeric(hpv_df["Year"], errors="coerce")
+hpv_df["Month"] = pd.to_numeric(hpv_df["Month"], errors="coerce")
+
+hpv_df = hpv_df.dropna(subset=["Year", "Month"]).copy()
+hpv_df["Year"] = hpv_df["Year"].astype(int)
+hpv_df["Month"] = hpv_df["Month"].astype(int)
+
+hpv_df["Date"] = pd.to_datetime(
+    {
+        "year": hpv_df["Year"],
+        "month": hpv_df["Month"],
+        "day": 1,
+    }
+)
+hpv_df = hpv_df.sort_values("Date").reset_index(drop=True)
+
+# 4. Helper functions
+def compute_group_prev_by_sex(
+    df: pd.DataFrame,
+    hpv_group: str,
+    sex: str,
+    age_labels: list[str],
+) -> pd.Series:
+
+    inf_cols = [f"{hpv_group}_{sex}_{age}_Inf" for age in age_labels]
+    n_cols = [f"Any_{sex}_{age}_N" for age in age_labels]
+
+    missing_inf = [c for c in inf_cols if c not in df.columns]
+    missing_n = [c for c in n_cols if c not in df.columns]
+
+    if missing_inf or missing_n:
+        print(f"\nCannot compute {hpv_group}_{sex}_TotalPrev.")
+        if missing_inf:
+            print("Missing infection columns:", missing_inf)
+        if missing_n:
+            print("Missing denominator columns:", missing_n)
+
+        return pd.Series([float("nan")] * len(df), index=df.index)
+
+    total_inf = df[inf_cols].sum(axis=1)
+    total_n = df[n_cols].sum(axis=1)
+
+    return total_inf / total_n.replace(0, pd.NA)
+
+for sex in SEXES:
+    for group in HPV_GROUPS:
+        hpv_df[f"{group}_{sex}_TotalPrev"] = compute_group_prev_by_sex(
+            hpv_df,
+            hpv_group=group,
+            sex=sex,
+            age_labels=AGE_LABELS,
+        )
+
+# Plot 1: Total infection
 plt.figure(figsize=(8, 5))
-plt.plot(hpv_df["Year"], hpv_df["TotalPrev"], marker="o")
-plt.xlabel("Year")
+plt.plot(hpv_df["Date"], hpv_df["TotalPrev"], marker="o")
+plt.xlabel("Date")
 plt.ylabel("Total HPV prevalence")
 plt.title("Total HPV prevalence over time")
 plt.grid(True)
@@ -140,11 +203,11 @@
 
 # 2. HPV  prevalence in different gender
 plt.figure(figsize=(8, 5))
-plt.plot(hpv_df["Year"], hpv_df["M_Prev"], marker="o", label="Male")
-plt.plot(hpv_df["Year"], hpv_df["F_Prev"], marker="o", label="Female")
-plt.xlabel("Year")
-plt.ylabel("HPV prevalence")
-plt.title("HPV prevalence by sex")
+plt.plot(hpv_df["Date"], hpv_df["M_Prev"], marker="o", label="Male")
+plt.plot(hpv_df["Date"], hpv_df["F_Prev"], marker="o", label="Female")
+plt.xlabel("Date")
+plt.ylabel("Any HPV prevalence")
+plt.title("Any HPV prevalence by sex")
 plt.legend()
 plt.grid(True)
 plt.tight_layout()
@@ -153,10 +216,14 @@
 
 # 3. Prevalence of different HPV groups in female
 plt.figure(figsize=(8, 5))
-plt.plot(hpv_df["Year"], hpv_df["hr1_FemalePrev"], marker="o", label="hr1")
-plt.plot(hpv_df["Year"], hpv_df["hr2_FemalePrev"], marker="o", label="hr2")
-plt.plot(hpv_df["Year"], hpv_df["hr3_FemalePrev"], marker="o", label="hr3")
-plt.xlabel("Year")
+for group in HPV_GROUPS:
+    plt.plot(
+        hpv_df["Date"],
+        hpv_df[f"{group}_F_TotalPrev"],
+        marker="o",
+        label=group,
+    )
+plt.xlabel("Date")
 plt.ylabel("Prevalence")
 plt.title("Female HPV prevalence by group")
 plt.legend()
@@ -167,10 +234,14 @@
 
 # 4. Prevalence of different HPV groups in male
 plt.figure(figsize=(8, 5))
-plt.plot(hpv_df["Year"], hpv_df["hr1_MalePrev"], marker="o", label="hr1")
-plt.plot(hpv_df["Year"], hpv_df["hr2_MalePrev"], marker="o", label="hr2")
-plt.plot(hpv_df["Year"], hpv_df["hr3_MalePrev"], marker="o", label="hr3")
-plt.xlabel("Year")
+for group in HPV_GROUPS:
+    plt.plot(
+        hpv_df["Date"],
+        hpv_df[f"{group}_M_TotalPrev"],
+        marker="o",
+        label=group,
+    )
+plt.xlabel("Date")
 plt.ylabel("Prevalence")
 plt.title("Male HPV prevalence by group")
 plt.legend()
@@ -179,3 +250,148 @@
 plt.savefig(outputpath / "male_hpv_group_prevalence.png", dpi=300)
 plt.show()
 
+# Plot 5: Multiplicity of infection
+plt.figure(figsize=(8, 5))
+plt.plot(hpv_df["Date"], hpv_df["InfGroup1"], marker="o", label="1 HPV group")
+plt.plot(hpv_df["Date"], hpv_df["InfGroup2"], marker="o", label="2 HPV groups")
+plt.plot(hpv_df["Date"], hpv_df["InfGroup3"], marker="o", label="3 HPV groups")
+plt.xlabel("Date")
+plt.ylabel("Number of infected individuals")
+plt.title("Multiplicity of HPV infection")
+plt.legend()
+plt.grid(True)
+plt.tight_layout()
+plt.savefig(outputpath / "hpv_multiplicity_over_time.png", dpi=300)
+plt.show()
+
+# Plot 6: Multiplicity of infection by sex
+plt.figure(figsize=(9, 5))
+
+plt.plot(hpv_df["Date"], hpv_df["MaleGroup1"], marker="o", label="Male: 1 group")
+plt.plot(hpv_df["Date"], hpv_df["MaleGroup2"], marker="o", label="Male: 2 groups")
+plt.plot(hpv_df["Date"], hpv_df["MaleGroup3"], marker="o", label="Male: 3 groups")
+
+plt.plot(hpv_df["Date"], hpv_df["FemaleGroup1"], marker="o", label="Female: 1 group")
+plt.plot(hpv_df["Date"], hpv_df["FemaleGroup2"], marker="o", label="Female: 2 groups")
+plt.plot(hpv_df["Date"], hpv_df["FemaleGroup3"], marker="o", label="Female: 3 groups")
+
+plt.xlabel("Date")
+plt.ylabel("Number of infected individuals")
+plt.title("Multiplicity of HPV infection by sex")
+plt.legend()
+plt.grid(True)
+plt.tight_layout()
+plt.savefig(outputpath / "hpv_multiplicity_by_sex.png", dpi=300)
+plt.show()
+
+# Plot 7: Persistent HPV infection prevalence
+plt.figure(figsize=(8, 5))
+
+for group in HPV_GROUPS:
+    plt.plot(
+        hpv_df["Date"],
+        hpv_df[f"{group}_Persistent12_Prev"],
+        marker="o",
+        label=group,
+    )
+
+plt.xlabel("Date")
+plt.ylabel("Persistent infection prevalence")
+plt.title("Persistent HPV infection prevalence, >=12 months")
+plt.legend()
+plt.grid(True)
+plt.tight_layout()
+plt.savefig(outputpath / "hpv_persistent_prevalence.png", dpi=300)
+plt.show()
+
+# Plot 8: Persistent HPV infection by sex
+for group in HPV_GROUPS:
+    male_col = f"{group}_Persistent12_M_Prev"
+    female_col = f"{group}_Persistent12_F_Prev"
+
+    required_cols = ["Date", male_col, female_col]
+
+    plt.figure(figsize=(8, 5))
+    plt.plot(hpv_df["Date"], hpv_df[male_col], marker="o", label="Male")
+    plt.plot(hpv_df["Date"], hpv_df[female_col], marker="o", label="Female")
+
+    plt.xlabel("Date")
+    plt.ylabel("Persistent infection prevalence")
+    plt.title(f"{group} persistent infection prevalence by sex")
+    plt.legend()
+    plt.grid(True)
+    plt.tight_layout()
+    plt.savefig(outputpath / f"{group}_persistent_by_sex.png", dpi=300)
+    plt.show()
+
+# Plot 9: Female any HPV prevalence by age group
+female_age_cols = [f"Any_F_{age}_Prev" for age in AGE_LABELS]
+
+plt.figure(figsize=(9, 5))
+
+for age in AGE_LABELS:
+    plt.plot(
+        hpv_df["Date"],
+        hpv_df[f"Any_F_{age}_Prev"],
+        marker="o",
+        label=age,
+    )
+
+plt.xlabel("Date")
+plt.ylabel("Any HPV prevalence")
+plt.title("Female any HPV prevalence by age group")
+plt.legend()
+plt.grid(True)
+plt.tight_layout()
+plt.savefig(outputpath / "female_any_hpv_by_age.png", dpi=300)
+plt.show()
+
+# Plot 10: Male any HPV prevalence by age group
+male_age_cols = [f"Any_M_{age}_Prev" for age in AGE_LABELS]
+
+plt.figure(figsize=(9, 5))
+
+for age in AGE_LABELS:
+    plt.plot(
+        hpv_df["Date"],
+        hpv_df[f"Any_M_{age}_Prev"],
+        marker="o",
+        label=age,
+    )
+
+plt.xlabel("Date")
+plt.ylabel("Any HPV prevalence")
+plt.title("Male any HPV prevalence by age group")
+plt.legend()
+plt.grid(True)
+plt.tight_layout()
+plt.savefig(outputpath / "male_any_hpv_by_age.png", dpi=300)
+plt.show()
+
+# Plot 11: Any HPV prevalence by HIV/ART status
+hiv_prev_cols = [
+    "Any_HIVneg_Prev",
+    "Any_HIVpos_unknown_Prev",
+    "Any_HIVpos_noART_Prev",
+    "Any_HIVpos_unsupp_Prev",
+    "Any_HIVpos_supp_Prev",
+]
+
+available_hiv_cols = [c for c in hiv_prev_cols if c in hpv_df.columns]
+
+if len(available_hiv_cols) > 0:
+    plt.figure(figsize=(9, 5))
+
+    for col in available_hiv_cols:
+        label = col.replace("Any_", "").replace("_Prev", "")
+        plt.plot(hpv_df["Date"], hpv_df[col], marker="o", label=label)
+
+    plt.xlabel("Date")
+    plt.ylabel("Any HPV prevalence")
+    plt.title("Any HPV prevalence by HIV/ART status")
+    plt.legend()
+    plt.grid(True)
+    plt.tight_layout()
+    plt.savefig(outputpath / "hpv_prevalence_by_hiv_status.png", dpi=300)
+    plt.show()
+
diff --git a/src/tlo/methods/hpv.py b/src/tlo/methods/hpv.py
index 56288ed23d..237971f0ea 100644
--- a/src/tlo/methods/hpv.py
+++ b/src/tlo/methods/hpv.py
@@ -200,6 +200,13 @@ class HPV(Module, GenericFirstAppointmentsMixin):
         'hp_ever_infected_hr3': Property(
             Types.BOOL, 'Ever infected with hr3'),
 
+        'hp_persistent_hr1': Property(
+            Types.BOOL, 'Persistent hr1 infection, duration >= 12 months'),
+        'hp_persistent_hr2': Property(
+            Types.BOOL, 'Persistent hr2 infection, duration >= 12 months'),
+        'hp_persistent_hr3': Property(
+            Types.BOOL, 'Persistent hr3 infection, duration >= 12 months'),
+
         # "va_hpv": Property(Types.INT, "number of doses of hpv vaccine received"),
         # "va_hpv_all_doses": Property(Types.BOOL, "whether all doses have been received of the HPV vaccine"),
         # "hv_inf": Property(Types.BOOL,"Is person currently infected with HIV
@@ -252,6 +259,9 @@ def initialise_population(self, population):
         df.loc[df.is_alive, 'hp_ever_infected_hr2'] = False
         df.loc[df.is_alive, 'hp_ever_infected_hr3'] = False
         eligible = df.index[df.is_alive & (df.age_years >= 15)]
+        df.loc[df.is_alive, 'hp_persistent_hr1'] = False
+        df.loc[df.is_alive, 'hp_persistent_hr2'] = False
+        df.loc[df.is_alive, 'hp_persistent_hr3'] = False
 
         for group in self.HPV_GROUPS:
             p_init = self.parameters[f'init_prev_hpv_{group}']
@@ -268,16 +278,20 @@ def initialise_population(self, population):
                 infection_date = self.sim.date - DateOffset(months=previous_infection)
 
                 df.at[person_id, f'hp_date_infected_{group}'] = infection_date
-                df.at[person_id, f'hp_date_clear_{group}'] = pd.NaT
+                # df.at[person_id, f'hp_date_clear_{group}'] = pd.NaT
                 df.at[person_id, f'hp_duration_{group}'] = previous_infection
+                df.at[person_id, f'hp_persistent_{group}'] = previous_infection >= 12
 
         initially_infected = df.index[df.is_alive & df.hp_is_infected]
         for person_id in initially_infected:
-            group_dates = [
-                df.at[person_id, f'hp_date_infected_{group}']
-                for group in self.HPV_GROUPS
-                if not pd.isna(df.at[person_id, f'hp_date_infected_{group}'])
-            ]
+            group_dates = []
+
+            for group in self.HPV_GROUPS:
+                date = df.at[person_id, f'hp_date_infected_{group}']
+
+                if not pd.isna(date):
+                    group_dates.append(date)
+
             if len(group_dates) > 0:
                 df.at[person_id, 'hp_date_first_infected'] = min(group_dates)
 
@@ -386,9 +400,6 @@ def _get_clearance_probability(self, group, person_id, duration_months, interval
 
         return min(max(p_clear, 0.0), 1.0)
 
-
-
-
     def _get_hpv_group_set(self, person_id):
         df = self.sim.population.props
         current_groups = set()
@@ -417,11 +428,10 @@ def _add_new_infection_groups(self, person_id, new_groups):
         updated_groups = current_groups.union(new_groups)
         self._set_hpv_group_set(person_id, updated_groups)
 
-        #set infection date for new groups
+        # set infection date for new groups
         for group in new_groups:
             df.at[person_id, f'hp_date_infected_{group}'] = self.sim.date
             df.at[person_id, f'hp_ever_infected_{group}'] = True
-            df.at[person_id, f'hp_date_clear_{group}'] = pd.NaT
             df.at[person_id,f'hp_duration_{group}'] = 0
 
         # start a new HPV infection process only if the person was uninfected/ self-clear
@@ -429,7 +439,7 @@ def _add_new_infection_groups(self, person_id, new_groups):
             df.at[person_id, 'hp_date_first_infected'] = self.sim.date
             df.at[person_id, 'hp_duration_all_clear'] = -1
 
-        #set first infection date
+        # set first infection date
         if pd.isna(df.at[person_id, 'hp_date_first_infected']):
             df.at[person_id, 'hp_date_first_infected'] = self.sim.date
 
@@ -440,11 +450,12 @@ def _clear_single_group(self, person_id, group):
 
         df.at[person_id, f'hp_infected_{group}'] = False
         df.at[person_id, f'hp_date_infected_{group}'] = pd.NaT
-        df.at[person_id, f'hp_date_clear_{group}'] = pd.NaT
+        # df.at[person_id, f'hp_date_clear_{group}'] = pd.NaT
         df.at[person_id, f'hp_duration_{group}'] = -1
+        df.at[person_id, f'hp_persistent_{group}'] = False
 
-        still_infected= any(
-            df.at[person_id, f'hp_infected_{g}'] for g in self.HPV_GROUPS
+        still_infected = any(
+            df.at[person_id, f'hp_infected_{group}'] for group in self.HPV_GROUPS
         )
         df.at[person_id, 'hp_is_infected'] = still_infected
 
@@ -473,6 +484,35 @@ def _clear_single_group(self, person_id, group):
     #     duration = min(duration, 48)
     #     return duration
 
+    def _update_persistence_status(self, threshold_months=12):
+        df = self.sim.population.props
+        eligible = df.is_alive & (df.age_years >= 15)
+
+        for group in self.HPV_GROUPS:
+            inf_col = f'hp_infected_{group}'
+            date_col = f'hp_date_infected_{group}'
+            dur_col = f'hp_duration_{group}'
+            pers_col = f'hp_persistent_{group}'
+
+            non_infected = eligible & ~df[inf_col].fillna(False)
+            df.loc[non_infected, dur_col] = -1
+            df.loc[non_infected, pers_col] = False
+
+            infected = eligible & df[inf_col].fillna(False)
+
+            for person_id in df.index[infected]:
+                date_inf = df.at[person_id, date_col]
+
+                if pd.isna(date_inf):
+                    df.at[person_id, dur_col] = -1
+                    df.at[person_id, pers_col] = False
+                    continue
+
+                duration = self._months_since(date_inf, self.sim.date)
+
+                df.at[person_id, dur_col] = duration
+                df.at[person_id, pers_col] = duration >= threshold_months
+
     def initialise_simulation(self, sim):
         """Get ready for simulation start.
 
@@ -487,7 +527,7 @@ def initialise_simulation(self, sim):
             adjacent=0.15,
             distant=0.05
         )
-        self._prev_logged_prev = {}
+        self._pre_logged_prev = {}
 
         for group in self.HPV_GROUPS:
             self.lm[group] = LinearModel(
@@ -495,8 +535,8 @@ def initialise_simulation(self, sim):
                 1.0,
 
                 Predictor('va_hpv')
-                .when(1,p[f'rr_{group}_vaccinated'])
-                .when(2,p[f'rr_{group}_vaccinated']),
+                .when(1, p[f'rr_{group}_vaccinated'])
+                .when(2, p[f'rr_{group}_vaccinated']),
 
                 # Predictor(f'hp_ever_infected_{group}')
                 # .when(True, p[f'rr_immunity_{group}'])
@@ -550,6 +590,9 @@ def on_birth(self, mother_id, child_id):
         df.at[child_id, 'hp_ever_infected_hr1'] = False
         df.at[child_id, 'hp_ever_infected_hr2'] = False
         df.at[child_id, 'hp_ever_infected_hr3'] = False
+        df.at[child_id, 'hp_persistent_hr1'] = False
+        df.at[child_id, 'hp_persistent_hr2'] = False
+        df.at[child_id, 'hp_persistent_hr3'] = False
 
     def report_daly_values(self):
         # This must send back a pd.Series or pd.DataFrame that reports on the average daly-weights that have been
@@ -563,9 +606,16 @@ def report_daly_values(self):
         return health_values  # returns the series
 
     def report_summary_stats(self):
-        df = self.sim.population.props
-        prevalence_by_age_group_sex = get_counts_by_sex_and_age_group(df, 'hp_is_infected')
-        return {'infected': prevalence_by_age_group_sex}
+        def report_summary_stats(self):
+            df = self.sim.population.props
+            summary = {
+                'infected_any': get_counts_by_sex_and_age_group(df, 'hp_is_infected')}
+
+            for group in self.HPV_GROUPS:
+                summary[f'infected_{group}'] = get_counts_by_sex_and_age_group(df, f'hp_infected_{group}')
+                summary[f'persistent_{group}'] = get_counts_by_sex_and_age_group(df, f'hp_persistent_{group}')
+
+        return summary
 
 class HpvInfectionEvent(RegularEvent, PopulationScopeEventMixin):
     """This event is occurring regularly at one 6 months intervals and controls the infection process of HPV."""
@@ -686,6 +736,8 @@ def apply(self, population):
             if len(new_group) > 0:
                 module._add_new_infection_groups(person_id, new_group)
 
+        module._update_persistence_status(threshold_months=12)
+
 class HpvLoggingEvent(RegularEvent, PopulationScopeEventMixin):
     def __init__(self, module):
         """Produce a summmary of the numbers of people with respect to their 'hpv status'
@@ -731,7 +783,7 @@ def apply(self, population):
         log_data['TotalInf'] = total_inf
         log_data['TotalPrev'] = sub['hp_is_infected'].mean()
 
-        for sex_name, sex_df in  [('M', sub.loc[sub.sex == 'M']),
+        for sex_name, sex_df in [('M', sub.loc[sub.sex == 'M']),
                                  ('F', sub.loc[sub.sex == 'F'])]:
             n = len(sex_df)
             log_data[f'{sex_name}_N'] = int(n)
@@ -787,7 +839,7 @@ def apply(self, population):
                 )
 
         # 4. Delta
-        prev_logged = getattr(module, '_prev_logged_prev', {})
+        prev_logged = getattr(module, '_pre_logged_prev', {})
         for key, current_val in prev_snapshot.items():
             previous_val = prev_logged.get(key, math.nan)
             if pd.isna(previous_val) or pd.isna(current_val):
@@ -795,10 +847,10 @@ def apply(self, population):
             else:
                 log_data[f'{key}_Delta'] = current_val - previous_val
 
-        module._prev_logged_prev = prev_snapshot
+        module._pre_logged_prev = prev_snapshot
 
         # 5. multiplicity of infection
-        infection_people = df.index[df.is_alive & (df.age_years>=15) &df.hp_is_infected]
+        infection_people = df.index[df.is_alive & (df.age_years >= 15) & df.hp_is_infected]
         n_group_1 = 0
         n_group_2 = 0
         n_group_3 = 0
@@ -813,10 +865,10 @@ def apply(self, population):
 
         for person_id in infection_people:
             n_group = len(module._get_hpv_group_set(person_id))
-            sex = df.at[person_id,'sex']
+            sex = df.at[person_id, 'sex']
 
-            if n_group ==1:
-                n_group_1 +=1
+            if n_group == 1:
+                n_group_1 += 1
                 if sex == 'M':
                     male_n_group_1 += 1
                 elif sex =='F':
@@ -848,5 +900,36 @@ def apply(self, population):
         log_data['FemaleGroup2'] = female_n_group_2
         log_data['FemaleGroup3'] = female_n_group_3
 
-        logger.info(key='summary', data=log_data)
+        # 6. Persistent infection 统计
+        for hpv_group in module.HPV_GROUPS:
+            pers_col = f'hp_persistent_{hpv_group}'
 
+            if pers_col not in sub.columns:
+                continue
+
+            persistent = sub[pers_col].fillna(False)
+
+            log_data[f'{hpv_group}_Persistent12_N'] = int(persistent.sum())
+            log_data[f'{hpv_group}_Persistent12_Prev'] = float(persistent.mean())
+
+            for sex_name, sex_df in [('M', sub.loc[sub.sex == 'M']),
+                                     ('F', sub.loc[sub.sex == 'F'])]:
+                n = len(sex_df)
+                if n > 0:
+                    log_data[f'{hpv_group}_Persistent12_{sex_name}_Prev'] = float(
+                        sex_df[pers_col].fillna(False).mean()
+                    )
+                else:
+                    log_data[f'{hpv_group}_Persistent12_{sex_name}_Prev'] = math.nan
+
+            for age_group in module.AGE_LABELS:
+                age_df = sub.loc[sub['age_group'] == age_group]
+                n = len(age_df)
+                if n > 0:
+                    log_data[f'{hpv_group}_Persistent12_{age_group}_Prev'] = float(
+                        age_df[pers_col].fillna(False).mean()
+                    )
+                else:
+                    log_data[f'{hpv_group}_Persistent12_{age_group}_Prev'] = math.nan
+
+        logger.info(key='summary', data=log_data)

From 4e540d60052072d39377a8f3279e17d00a95cdfd Mon Sep 17 00:00:00 2001
From: Liuxinac <liuxinac@hotmail.com>
Date: Tue, 5 May 2026 17:53:37 +0100
Subject: [PATCH 10/10] HPV Transmission Model Version 1.0

---
 src/tlo/methods/hpv.py           |  25 +++--
 tests/test_docs_data/test_HPV.py | 170 +++++++++++++++++++++++++++++++
 2 files changed, 182 insertions(+), 13 deletions(-)
 create mode 100644 tests/test_docs_data/test_HPV.py

diff --git a/src/tlo/methods/hpv.py b/src/tlo/methods/hpv.py
index 237971f0ea..25fc7884a0 100644
--- a/src/tlo/methods/hpv.py
+++ b/src/tlo/methods/hpv.py
@@ -64,7 +64,7 @@ class HPV(Module, GenericFirstAppointmentsMixin):
 
     HPV_GROUPS = ['hr1', 'hr2', 'hr3']
     AGE_BINS = [15, 20, 25, 35, 45, 55, 200]
-    AGE_LABELS = ['15_19', '20_24', '25_34','35_44', '45_54', '55plus']
+    AGE_LABELS = ['15_19', '20_24', '25_34', '35_44', '45_54', '55plus']
 
     PARAMETERS = {
         "init_prev_hpv_hr1": Parameter(
@@ -147,7 +147,7 @@ class HPV(Module, GenericFirstAppointmentsMixin):
             "Rate ratio for HPV clearance among PLWH on ART but not virally suppressed",
         ),
 
-        ## As MC suggested, remove the immunity part
+        # As MC suggested, remove the immunity part
         # "rr_immunity_hr1": Parameter(
         #     Types.REAL,
         #     "Relative risk for reinfection with hr1 if previously infected",
@@ -180,11 +180,11 @@ class HPV(Module, GenericFirstAppointmentsMixin):
         'hp_date_first_infected': Property(
             Types.DATE, 'Start date of current HPV infection'),
         'hp_duration_hr1': Property(
-            Types.INT,'Duration for current hr1 infection'),
+            Types.INT, 'Duration for current hr1 infection'),
         'hp_duration_hr2': Property(
-            Types.INT,'Duration for current hr2 infection'),
+            Types.INT, 'Duration for current hr2 infection'),
         'hp_duration_hr3': Property(
-            Types.INT,'Duration for current hr3 infection'),
+            Types.INT, 'Duration for current hr3 infection'),
         'hp_duration_all_clear': Property(
             Types.INT, 'Duration for current all HPV infection'),
         # 'hp_date_clear_hr1': Property(
@@ -302,7 +302,7 @@ def _get_age_group_series(self, ages):
             labels=self.AGE_LABELS,
             right=False  # right side not included
         )
-    def _get_age_group(self,age_years):
+    def _get_age_group(self, age_years):
         for i in range(len(self.AGE_BINS)-1):
             if self.AGE_BINS[i] <= age_years < self.AGE_BINS[i + 1]:
                 return self.AGE_LABELS[i]
@@ -606,14 +606,13 @@ def report_daly_values(self):
         return health_values  # returns the series
 
     def report_summary_stats(self):
-        def report_summary_stats(self):
-            df = self.sim.population.props
-            summary = {
-                'infected_any': get_counts_by_sex_and_age_group(df, 'hp_is_infected')}
+        df = self.sim.population.props
+        summary = {
+            'infected_any': get_counts_by_sex_and_age_group(df, 'hp_is_infected')}
 
-            for group in self.HPV_GROUPS:
-                summary[f'infected_{group}'] = get_counts_by_sex_and_age_group(df, f'hp_infected_{group}')
-                summary[f'persistent_{group}'] = get_counts_by_sex_and_age_group(df, f'hp_persistent_{group}')
+        for group in self.HPV_GROUPS:
+            summary[f'infected_{group}'] = get_counts_by_sex_and_age_group(df, f'hp_infected_{group}')
+            summary[f'persistent_{group}'] = get_counts_by_sex_and_age_group(df, f'hp_persistent_{group}')
 
         return summary
 
diff --git a/tests/test_docs_data/test_HPV.py b/tests/test_docs_data/test_HPV.py
new file mode 100644
index 0000000000..f64b3e7eb7
--- /dev/null
+++ b/tests/test_docs_data/test_HPV.py
@@ -0,0 +1,170 @@
+import os
+from pathlib import Path
+
+import pandas as pd
+import pytest
+
+from tlo import Date, Simulation, logging
+from tlo.methods import (
+    demography,
+    enhanced_lifestyle,
+    epi,
+    healthburden,
+    healthseekingbehaviour,
+    healthsystem,
+    hpv,
+    hiv,
+    simplified_births,
+    symptommanager,
+)
+
+try:
+    resourcefilepath = Path(os.path.dirname(__file__)) / "../resources"
+except NameError:
+    resourcefilepath = "resources"
+
+def check_dtypes(simulation):
+    df = simulation.population.props
+    orig = simulation.population.new_row
+    assert (df.dtypes == orig.dtypes).all()
+
+log_config = {
+    "filename": "hpv_test",  # The name of the output file (a timestamp will be appended).
+    "directory": "./outputs/",  # The default output path is `./outputs`. Change it here, if necessary
+    "custom_levels": {  # Customise the output of specific loggers. They are applied in order:
+        "*": logging.WARNING,  # Asterisk matches all loggers - we set the default level to WARNING
+        "tlo.methods.hpv": logging.INFO,
+        "tlo.methods.healthsystem": logging.INFO,
+        "tlo.methods.demography": logging.INFO
+    }
+}
+
+@pytest.fixture
+def sim(seed):
+    start_date = Date(2010, 1, 1)
+    sim = Simulation(start_date=start_date, seed=seed, log_config=None, resourcefilepath=resourcefilepath)
+
+    # Register the appropriate modules
+    sim.register(
+        demography.Demography(),
+        simplified_births.SimplifiedBirths(),
+        enhanced_lifestyle.Lifestyle(),
+        symptommanager.SymptomManager(),
+        healthseekingbehaviour.HealthSeekingBehaviour(),
+        healthburden.HealthBurden(),
+        healthsystem.HealthSystem(
+            disable=True,  # disables the health system constraints so all HSI events run
+        ),
+        epi.Epi(),
+        hpv.HPV(),
+        hiv.Hiv
+    )
+
+    return sim
+
+
+def test_single_person(sim):
+    """
+    run sim for one person
+    assign infection
+    check symptoms scheduled
+    check symptoms resolved correctly
+    """
+    # set high death rate - change all symptom probabilities to 1
+    sim.modules['HPV'].parameters["symptom_prob"]["probability"] = 1
+
+    sim.make_initial_population(n=1)
+    df = sim.population.props
+    person_id = 0
+    df.at[person_id, "hp_is_infected"] = True
+
+    # HPV onset event
+    inf_event = hpv.HPV(person_id=person_id, module=sim.modules['HPV'])
+    inf_event.apply(person_id)
+    assert not pd.isnull(df.at[person_id, "me_date_measles"])
+
+    # check measles symptom resolve event and death scheduled
+    events_for_this_person = sim.find_events_for_person(person_id)
+    assert len(events_for_this_person) > 0
+    next_event_date, next_event_obj = events_for_this_person[0]
+    assert isinstance(next_event_obj, (measles.MeaslesDeathEvent, measles.MeaslesSymptomResolveEvent))
+
+
+@pytest.mark.slow
+def test_measles_cases_and_hsi_occurring(sim):
+    """ Run the measles module
+    check dtypes consistency
+    check infections occurring
+    check measles onset event scheduled
+    check symptoms assigned
+    check treatments occurring
+    """
+
+    end_date = Date(2011, 12, 31)
+    popsize = 1000
+
+    # set high transmission probability
+    sim.modules['Measles'].parameters['beta_baseline'] = 1.0
+
+    # set high death rate and change all symptom probabilities to 1
+    cfr = sim.modules['Measles'].parameters["case_fatality_rate"]
+    sim.modules['Measles'].parameters["case_fatality_rate"] = {k: 1.0 for k, v in cfr.items()}
+    sim.modules['Measles'].parameters["symptom_prob"]["probability"] = 1
+
+    # Make the population
+    sim.make_initial_population(n=popsize)
+
+    # check data types
+    check_dtypes(sim)
+    sim.simulate(end_date=end_date)
+    check_dtypes(sim)
+
+    df = sim.population.props
+
+    # check people getting measles
+    assert df['me_has_measles'].values.sum() > 0  # current cases of measles
+
+    # check that everyone who is currently infected gets a measles onset or symptom resolve event
+    # they can have multiple symptom resolve events scheduled (by symptom onset and by treatment)
+    inf = df.loc[df.is_alive & df.me_has_measles].index.tolist()
+
+    for idx in inf:
+        events_for_this_person = sim.find_events_for_person(idx)
+        assert len(events_for_this_person) > 0
+        # assert measles event in event list for this person
+        assert "tlo.methods.measles" in str(events_for_this_person)
+        # find the first measles event
+        measles_event_date = [date for (date, event) in events_for_this_person if "tlo.methods.measles" in str(event)]
+        assert measles_event_date[0] >= df.loc[idx, "me_date_measles"]
+
+    # check symptoms assigned
+    # there is an incubation period, so infected people may not have rash immediately
+    # if on treatment for measles, must have rash for diagnosis
+    has_rash = sim.modules['SymptomManager'].who_has('rash')
+    current_measles_tx = df.index[df.is_alive & df.me_has_measles & df.me_on_treatment]
+    if current_measles_tx.any():
+        assert set(current_measles_tx) <= set(has_rash)
+
+    # check if any measles deaths occurred
+    assert df.cause_of_death.loc[~df.is_alive].str.startswith('Measles').any()
+
+
+@pytest.mark.slow
+def test_measles_zero_death_rate(sim):
+
+    end_date = Date(2010, 12, 31)
+    popsize = 10_000
+
+    # set zero death rate
+    cfr = sim.modules['Measles'].parameters["case_fatality_rate"]
+    sim.modules['Measles'].parameters["case_fatality_rate"] = {k: 0.0 for k, v in cfr.items()}
+
+    sim.make_initial_population(n=popsize)
+    sim.simulate(end_date=end_date)
+    df = sim.population.props
+
+    # no symptoms should equal no treatment (unless other rash has prompted incorrect tx: unlikely)
+    assert not (df.loc[df.is_alive, 'me_on_treatment']).all()
+
+    # check that there have been no deaths caused by measles
+    assert not df.cause_of_death.loc[~df.is_alive].str.startswith('Measles').any()