@@ -29,16 +29,21 @@ class DataFrameCreator:
2929 _config (dict): The configuration dictionary for the DataFrame.
3030 """
3131
32- def __init__ (self , config_dataframe : dict , h5_path : Path ) -> None :
32+ def __init__ (self , config_dataframe : dict , h5_path : Path ,
33+ is_first_file : bool = True , base_timestamp : pd .Timestamp = None ) -> None :
3334 """
3435 Initializes the DataFrameCreator class.
3536
3637 Args:
3738 config_dataframe (dict): The configuration dictionary with only the dataframe key.
3839 h5_path (Path): Path to the h5 file.
40+ is_first_file (bool): Whether this is the first file in a multi-file run.
41+ base_timestamp (pd.Timestamp): Base timestamp from the first file (for subsequent files).
3942 """
4043 self .h5_file = h5py .File (h5_path , "r" )
4144 self ._config = config_dataframe
45+ self .is_first_file = is_first_file
46+ self .base_timestamp = base_timestamp
4247
4348 index_alias = self ._config .get ("index" , ["countId" ])[0 ]
4449 # all values except the last as slow data starts from start of file
@@ -83,6 +88,19 @@ def get_dataset_array(
8388
8489 return dataset
8590
91+ def get_base_timestamp (self ) -> pd .Timestamp :
92+ """
93+ Extracts the base timestamp from the first file to be used for subsequent files.
94+
95+ Returns:
96+ pd.Timestamp: The base timestamp from the first file.
97+ """
98+ if not self .is_first_file :
99+ raise ValueError ("get_base_timestamp() should only be called on the first file" )
100+
101+ first_timestamp = self .h5_file [self ._config .get ("first_event_time_stamp_key" )][0 ]
102+ return pd .to_datetime (first_timestamp .decode ())
103+
86104 @property
87105 def df_electron (self ) -> pd .DataFrame :
88106 """
@@ -141,14 +159,72 @@ def df_train(self) -> pd.DataFrame:
141159 @property
142160 def df_timestamp (self ) -> pd .DataFrame :
143161 """
144- Uses the first_event_time_stamp_key to get initial timestamp and the
145- ms_markers_key which is a dataset of exposure times same size as the index."""
162+ For files with first_event_time_stamp_key: Uses that as initial timestamp.
163+ For files with only millis_counter_key: Uses that as absolute timestamp.
164+ Both use ms_markers_key for exposure times within the file.
165+ """
146166
147- first_timestamp = self .h5_file [self ._config .get ("first_event_time_stamp_key" )][
148- 0
149- ] # single value
150- ts_start = pd .to_datetime (first_timestamp .decode ())
151- # actually in seconds but using milliseconds for consistency with mpes loader
167+ # Try to determine which timestamp approach to use based on available data
168+ first_timestamp_key = self ._config .get ("first_event_time_stamp_key" )
169+ millis_counter_key = self ._config .get ("millis_counter_key" , "/DLD/millisecCounter" )
170+
171+ has_first_timestamp = (first_timestamp_key is not None and
172+ first_timestamp_key in self .h5_file and
173+ len (self .h5_file [first_timestamp_key ]) > 0 )
174+
175+ has_millis_counter = (millis_counter_key in self .h5_file and
176+ len (self .h5_file [millis_counter_key ]) > 0 )
177+
178+ # Log millisecond counter values for ALL files
179+ if has_millis_counter :
180+ millis_counter_values = self .h5_file [millis_counter_key ][()]
181+
182+ if self .is_first_file and has_first_timestamp :
183+ logger .warning ("DEBUG: Taking first file with scan start timestamp path" )
184+ # First file with scan start timestamp
185+ first_timestamp = self .h5_file [first_timestamp_key ][0 ]
186+ base_ts = pd .to_datetime (first_timestamp .decode ())
187+
188+ # Also log millisecond counter values for first file if available
189+ if has_millis_counter :
190+ millis_counter_values = self .h5_file [millis_counter_key ][()]
191+ millis_min = millis_counter_values [0 ] # First value
192+ millis_max = millis_counter_values [- 1 ] # Last value
193+
194+ # Add the first millisecond counter value to the base timestamp
195+ ts_start = base_ts + pd .Timedelta (milliseconds = millis_min )
196+
197+ # Calculate what these would be as timestamps
198+ ts_min_from_millis = base_ts + pd .Timedelta (milliseconds = millis_min )
199+ ts_max_from_millis = base_ts + pd .Timedelta (milliseconds = millis_max )
200+ else :
201+ # Fallback if no millisecond counter
202+ ts_start = base_ts
203+ elif not self .is_first_file and self .base_timestamp is not None and has_millis_counter :
204+ # Subsequent files: use base timestamp + millisecond counter offset
205+ millis_counter_values = self .h5_file [millis_counter_key ][()] # Get all values
206+
207+ # Get min (first) and max (last) millisecond values
208+ millis_min = millis_counter_values [0 ] # First value
209+ millis_max = millis_counter_values [- 1 ] # Last value
210+
211+ # Calculate timestamps for min and max
212+ ts_min = self .base_timestamp + pd .Timedelta (milliseconds = millis_min )
213+ ts_max = self .base_timestamp + pd .Timedelta (milliseconds = millis_max )
214+
215+ logger .warning (f"DEBUG: Timestamp for min: { ts_min } " )
216+ logger .warning (f"DEBUG: Timestamp for max: { ts_max } " )
217+
218+ # Use the first value (start time) for calculating offset
219+ millis_counter = millis_counter_values [0 ] # First element is the start time
220+ offset = pd .Timedelta (milliseconds = millis_counter )
221+ ts_start = self .base_timestamp + offset
222+ else :
223+ logger .warning ("DEBUG: Falling through to undefined ts_start - THIS IS THE PROBLEM!" )
224+ logger .warning (f"DEBUG: Condition 1: is_first_file={ self .is_first_file } AND has_first_timestamp={ has_first_timestamp } = { self .is_first_file and has_first_timestamp } " )
225+ logger .warning (f"DEBUG: Condition 2: not is_first_file={ not self .is_first_file } AND base_timestamp is not None={ self .base_timestamp is not None } AND has_millis_counter={ has_millis_counter } = { not self .is_first_file and self .base_timestamp is not None and has_millis_counter } " )
226+
227+ # Get exposure times (in seconds) for this file
152228 exposure_time = self .h5_file [self ._config .get ("ms_markers_key" )][()]
153229
154230 # Calculate cumulative exposure times
0 commit comments