Skip to content

Commit 2b9372e

Browse files
committed
fixed errors and added get_count_rate_simple
1 parent 87ec0cf commit 2b9372e

2 files changed

Lines changed: 62 additions & 77 deletions

File tree

src/sed/loader/cfel/buffer_handler.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -277,13 +277,18 @@ def _save_buffer_file(self, file_set, is_first_file=True, base_timestamp=None, i
277277
# This maintains cumulative event counts across multiple files
278278
electron_df = df.dropna(subset=electron_channels).astype(dtypes)
279279
logger.debug(f"Saving electron buffer with shape: {electron_df.shape}")
280-
electron_df.to_parquet(paths["electron"])
280+
# Reset index to column to avoid parquet index issues
281+
electron_df = electron_df.reset_index()
282+
electron_df.to_parquet(paths["electron"], index=False)
281283

282284
# Create and save timed dataframe
283285
dtypes = get_dtypes(self._config, df_timed.columns.values)
284286
timed_df = df_timed.astype(dtypes)
285287
logger.debug(f"Saving timed buffer with shape: {timed_df.shape}")
286-
timed_df.to_parquet(paths["timed"])
288+
# Reset index to column to avoid parquet index issues
289+
timed_df = timed_df.reset_index()
290+
timed_df.to_parquet(paths["timed"], index=False)
291+
287292

288293
logger.debug(f"Processed {paths['raw'].stem} in {time.time() - start_time:.2f}s")
289294

src/sed/loader/cfel/loader.py

Lines changed: 55 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,7 @@ def get_count_rate_ms(
389389
*,
390390
mode: str = "file", # "file" or "point"
391391
first_files: int | None = None,
392+
**kwds,
392393
) -> tuple[np.ndarray, np.ndarray]:
393394
"""
394395
Count-rate calculation using millisecCounter and NumOfEvents.
@@ -510,39 +511,35 @@ def get_count_rate_ms(
510511
# -------------------------------
511512
# File-based count rate
512513
# -------------------------------
513-
# def get_count_rate(
514-
# self,
515-
# fids: Sequence[int] | None = None,
516-
# runs: Sequence[int] | None = None,
517-
# ) -> tuple[np.ndarray, np.ndarray]:
518-
# """
519-
# Returns count rate per file using the total number of events and elapsed time.
520-
# Calculates the count rate using the number of rows and elapsed time for each file.
521-
# Hence the resolution is not very high, but this method is very fast.
522-
523-
# Args:
524-
# fids (Sequence[int]): A sequence of file IDs. Defaults to all files.
514+
def get_count_rate_simple(
515+
self,
516+
fids: Sequence[int] | None = None,
517+
runs: Sequence[int] | None = None,
518+
) -> tuple[np.ndarray, np.ndarray]:
519+
"""
520+
Returns count rate per file using file statistics (coarse, fast method).
521+
Calculates the count rate using the number of rows and elapsed time for each file.
522+
This is a simple, fast method for coarse count rate evaluation.
525523
526-
# Keyword Args:
527-
# runs: A sequence of run IDs.
524+
Args:
525+
fids (Sequence[int]): A sequence of file IDs. Defaults to all files.
528526
529-
# Returns:
530-
# tuple[np.ndarray, np.ndarray]: The count rate and elapsed time in seconds.
527+
Keyword Args:
528+
runs: A sequence of run IDs.
531529
532-
# Raises:
533-
# KeyError: If the file statistics are missing.
534-
# """
535-
# fids_resolved = self._resolve_fids(fids=fids, runs=runs)
530+
Returns:
531+
tuple[np.ndarray, np.ndarray]: The count rate and elapsed time in seconds.
536532
537-
# all_counts = [self.metadata["file_statistics"]["electron"][str(fid)]["num_rows"] for fid in fids_resolved]
538-
# elapsed_times = [self.get_elapsed_time(fids=[fid]) for fid in fids_resolved]
539-
# print(elapsed_times,all_counts)
533+
Raises:
534+
KeyError: If the file statistics are missing.
535+
"""
536+
fids_resolved = self._resolve_fids(fids=fids, runs=runs)
540537

541-
# # count_rate = np.array(all_counts) / np.array(elapsed_times)
542-
# count_rate = np.array(all_counts) / np.array(elapsed_times).flatten()
543-
# print(f"Count rates: {count_rate}")
544-
# times = np.cumsum(elapsed_times)
545-
# return count_rate, times
538+
all_counts = [self.metadata["file_statistics"]["electron"][str(fid)]["num_rows"] for fid in fids_resolved]
539+
elapsed_times = self.get_elapsed_time(fids=fids_resolved)
540+
count_rate = np.array(all_counts) / np.array(elapsed_times)
541+
times = np.cumsum(elapsed_times)
542+
return count_rate, times
546543
def get_count_rate(
547544
self,
548545
fids: Sequence[int] | None = None,
@@ -642,10 +639,9 @@ def get_elapsed_time(
642639
aggregate: bool = False,
643640
) -> float | list[float]:
644641
"""
645-
Calculates the elapsed acquisition time.
642+
Calculates the elapsed acquisition time using millisecCounter.
646643
647-
Uses global timestamp / millisecCounter logic established in
648-
read_dataframe() and df_timestamp.
644+
Uses millisecCounter directly from H5 files for accurate duration calculation.
649645
650646
Parameters
651647
----------
@@ -665,17 +661,7 @@ def get_elapsed_time(
665661
Elapsed time(s) in seconds.
666662
"""
667663

668-
try:
669-
file_statistics = self.metadata["file_statistics"]["timed"]
670-
except Exception as exc:
671-
raise KeyError(
672-
"File statistics missing. Use 'read_dataframe' first."
673-
) from exc
674-
675-
ts_alias = self._config["dataframe"]["columns"].get(
676-
"timestamp",
677-
"timeStamp",
678-
)
664+
millis_key = self._config.get("millis_counter_key", "/DLD/millisecCounter")
679665

680666
# ----------------------------
681667
# Resolve files consistently
@@ -687,57 +673,51 @@ def get_elapsed_time(
687673
)
688674

689675
elapsed_per_file: list[float] = []
690-
prev_max_ts_s = None # Track previous file's max timestamp in seconds
691676

692-
for i, fid in enumerate(fids_resolved):
677+
for fid in fids_resolved:
693678
try:
694-
ts_info = file_statistics[str(fid)]["columns"][ts_alias]
695-
696-
max_ts = ts_info["max"]
697-
min_ts = ts_info["min"]
698-
699-
# Normalize to float seconds
700-
if hasattr(max_ts, "total_seconds"):
701-
max_ts_s = max_ts.total_seconds()
702-
else:
703-
max_ts_s = float(max_ts)
679+
with h5py.File(self.files[fid], "r") as h5:
680+
if millis_key not in h5:
681+
raise KeyError(f"millisecCounter not found in file {self.files[fid]}")
704682

705-
if hasattr(min_ts, "total_seconds"):
706-
min_ts_s = min_ts.total_seconds()
707-
else:
708-
min_ts_s = float(min_ts)
709-
710-
# Calculate elapsed time correctly for multi-file runs
711-
if i == 0:
712-
dt_s = max_ts_s - min_ts_s
713-
else:
714-
dt_s = max_ts_s - prev_max_ts_s
715-
716-
prev_max_ts_s = max_ts_s
717-
718-
if dt_s < 0:
719-
raise ValueError(
720-
f"Negative elapsed time in file {fid}: {dt_s}"
683+
ms = np.asarray(h5[millis_key], dtype=np.float64)
684+
685+
if len(ms) == 0:
686+
raise ValueError(f"Empty millisecCounter in file {self.files[fid]}")
687+
688+
# Duration is simply last - first millisecond value
689+
dt_ms = ms[-1] - ms[0]
690+
dt_s = dt_ms / 1000.0 # Convert to seconds
691+
692+
if dt_s < 0:
693+
raise ValueError(
694+
f"Negative elapsed time in file {fid}: {dt_s}s"
695+
)
696+
697+
elapsed_per_file.append(dt_s)
698+
699+
logger.debug(
700+
f"[get_elapsed_time] File {fid}: ms_min={ms[0]}, ms_max={ms[-1]}, "
701+
f"duration={dt_s:.2f}s"
721702
)
722-
703+
723704
except KeyError as exc:
724705
filename = (
725706
Path(self.files[fid]).name
726707
if fid < len(self.files)
727708
else f"file_{fid}"
728709
)
729710
raise KeyError(
730-
f"Timestamp metadata missing in file {filename} (fid={fid}). "
731-
"Add timestamp column and alias to config before loading."
711+
f"millisecCounter missing in file {filename} (fid={fid}). "
712+
"Ensure millisecCounter is available in the H5 file."
732713
) from exc
733714

734-
elapsed_per_file.append(dt_s)
735-
736715
if aggregate:
737716
return sum(elapsed_per_file)
738717

739718
return elapsed_per_file
740719

720+
741721
def read_dataframe(
742722
self,
743723
files: str | Sequence[str] = None,

0 commit comments

Comments
 (0)