Skip to content

Commit 79c0e34

Browse files
EliEli
authored andcommitted
Restored missing read function.
1 parent dd4b106 commit 79c0e34

1 file changed

Lines changed: 60 additions & 0 deletions

File tree

dms_datastore/reconcile_data.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -605,6 +605,66 @@ def _merge_screened_flags(
605605
return out
606606

607607

608+
def _read_csv_timeseries(path: str) -> pd.DataFrame:
609+
""" Read a dms-datastore CSV file into a DataFrame.
610+
611+
Parameters
612+
----------
613+
path : str
614+
Path to a CSV file with a commented YAML-like header and a ``datetime``
615+
column.
616+
617+
Returns
618+
-------
619+
df : pandas.DataFrame
620+
DataFrame indexed by a ``DatetimeIndex``.
621+
622+
Raises
623+
------
624+
ValueError
625+
If the file does not produce a DatetimeIndex or contains duplicate
626+
timestamps.
627+
628+
Notes
629+
-----
630+
This reader forces ``dtype={'user_flag': str}`` so that screened flag columns do
631+
not become floats due to NA inference. Normalization to nullable Int64 is
632+
performed separately by :func:`_normalize_flag`.
633+
"""
634+
635+
636+
df = pd.read_csv(
637+
path,
638+
comment="#",
639+
parse_dates=["datetime"],
640+
index_col="datetime",
641+
dtype={"user_flag": str},
642+
)
643+
if not isinstance(df.index, pd.DatetimeIndex):
644+
raise ValueError(f"Expected datetime index in {path}")
645+
if df.index.has_duplicates:
646+
647+
# Provide actionable detail while remaining fail-fast.
648+
# Report up to N duplicate timestamp values with their multiplicities.
649+
N = 25
650+
vc = df.index.value_counts()
651+
dups = vc[vc > 1].sort_index()
652+
653+
# Build a compact message; avoid dumping huge lists.
654+
total_dup_rows = int((dups - 1).sum()) # extra rows beyond the first per timestamp
655+
n_dup_keys = int(dups.shape[0])
656+
657+
preview = dups.iloc[:N]
658+
preview_txt = ", ".join([f"{ts.isoformat()}×{int(cnt)}" for ts, cnt in preview.items()])
659+
more = "" if n_dup_keys <= N else f" (+{n_dup_keys - N} more)"
660+
661+
raise ValueError(
662+
"Duplicate timestamps in "
663+
f"{path}: {n_dup_keys} duplicated timestamps, {total_dup_rows} extra rows. "
664+
f"Examples: {preview_txt}{more}"
665+
)
666+
return df
667+
608668
# -----------------------------
609669
# Public APIs
610670
# -----------------------------

0 commit comments

Comments
 (0)