22Module with helper functions
33"""
44
5- from renal_capacity_model .utils import get_logger
6- import pandas as pd
7- import numpy as np
8- from itertools import product
95import math
6+ from itertools import product
107from typing import TYPE_CHECKING
118
9+ import numpy as np
10+ import pandas as pd
11+
12+ from renal_capacity_model .utils import get_logger
13+
1214if TYPE_CHECKING :
1315 from renal_capacity_model .config import Config
1416
@@ -238,7 +240,7 @@ def adjust_next_modality(event_log: pd.DataFrame) -> pd.DataFrame:
238240
239241
240242def process_event_log (event_log : pd .DataFrame ) -> pd .DataFrame :
241- """Processes event log for easier validation and debugging
243+ """Processes event log for easier validation and debugging. Also removes unnecessary rows that were outdated by the HHD intervention if applied.
242244
243245 Args:
244246 event_log (pd.DataFrame): event log
@@ -247,6 +249,21 @@ def process_event_log(event_log: pd.DataFrame) -> pd.DataFrame:
247249 pd.DataFrame with additional columns ("year_start", "end_time", "year_end")
248250 and clearer information on which modality was next
249251 """
252+ ## loop through the patient ids and remove any rows where time_starting_activity_from is equal to time_starting_activity from in the row below.
253+ for patient_id in event_log ["patient_id" ].unique ():
254+ if (
255+ event_log .loc [
256+ event_log ["patient_id" ] == patient_id , "time_starting_activity_from"
257+ ]
258+ .duplicated ()
259+ .any ()
260+ ):
261+ df = event_log .loc [
262+ event_log ["patient_id" ] == patient_id , "time_starting_activity_from"
263+ ]
264+ duplicates_mask = df .duplicated (keep = False )
265+ duplicate_index = df .index [duplicates_mask ].tolist ()[0 ]
266+ event_log = event_log .drop (duplicate_index )
250267 event_log ["year_start" ] = event_log ["time_starting_activity_from" ].apply (
251268 calculate_lookup_year
252269 )
0 commit comments