Skip to content

Commit e86a888

Browse files
EliEli
authored andcommitted
Add inspect_duplicate_index plus format/import errors
1 parent bbcbe53 commit e86a888

File tree

4 files changed

+50
-2
lines changed

4 files changed

+50
-2
lines changed

vtools/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
__version__ = "unknown"
2424

2525
from vtools.data.gap import *
26+
from vtools.data.duplicate_index import inspect_duplicate_index
2627
from vtools.data.vtime import *
2728
from vtools.data.timeseries import *
2829
from vtools.data.dst import *

vtools/data/duplicate_index.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
2+
def inspect_duplicate_index(ts, label=None, max_times=5, max_rows=5):
3+
"""
4+
Print a concise diagnostic for duplicate timestamps in a time series.
5+
6+
Parameters
7+
----------
8+
ts : pandas.DataFrame or Series
9+
Time series with DatetimeIndex.
10+
label : str, optional
11+
Context label (e.g., filename or pattern).
12+
max_times : int
13+
Number of duplicate timestamps to report.
14+
max_rows : int
15+
Number of rows per timestamp to print.
16+
"""
17+
import pandas as pd
18+
19+
idx = ts.index
20+
if not isinstance(idx, pd.DatetimeIndex):
21+
print(f"[dup-debug] Index is not DatetimeIndex ({type(idx)})")
22+
return
23+
24+
dup_mask = idx.duplicated(keep=False)
25+
if not dup_mask.any():
26+
print("[dup-debug] No duplicate timestamps detected.")
27+
return
28+
29+
dup_times = idx[dup_mask].unique()
30+
31+
print("\n" + "="*60)
32+
print("[dup-debug] Duplicate timestamp diagnostic")
33+
if label:
34+
print(f"[dup-debug] Context: {label}")
35+
print(f"[dup-debug] Unique duplicate timestamps: {len(dup_times)}")
36+
print(f"[dup-debug] Total duplicate rows: {dup_mask.sum()}")
37+
38+
# show first few duplicate timestamps
39+
for t in dup_times[:max_times]:
40+
print(f"\n[dup-debug] Timestamp: {t}")
41+
rows = ts.loc[t]
42+
if isinstance(rows, pd.Series):
43+
# single row (unlikely here, but safe)
44+
print(rows)
45+
else:
46+
print(rows.head(max_rows))
47+
48+
print("="*60 + "\n")
49+

vtools/data/gap.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
#!/usr/bin/env python
22
# -*- coding: utf-8 -*-
33
import pandas as pd
4-
import matplotlib.pyplot as plt
54

65
import pandas as pd
76
import numpy as np

vtools/functions/climatology.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,6 @@ def apply_climatology(climate, index=None, start=None, end=None, freq=None):
143143
Notes
144144
-----
145145
- If index is not provided, start, end, and freq must be specified to generate a DatetimeIndex using pandas.date_range.
146-
- Backward compatible: original behavior is preserved if index is provided.
147146
"""
148147
if len(climate) not in [12, 365, 366]:
149148
raise ValueError("Length of climatology must be 12, 365 or 366")

0 commit comments

Comments
 (0)