CADWRDeltaModeling
diff --git a/‎docs/doctrees/nbsphinx/notebooks/merge_splice.ipynb‎
Lines changed: 15 additions & 90 deletions b/‎docs/doctrees/nbsphinx/notebooks/merge_splice.ipynb‎
Lines changed: 15 additions & 90 deletions
diff --git a/‎docsrc/notebooks/transition.ipynb‎
Lines changed: 178 additions & 0 deletions b/‎docsrc/notebooks/transition.ipynb‎
Lines changed: 178 additions & 0 deletions
diff --git a/‎tests/test_extrapolation.py‎
Lines changed: 112 additions & 0 deletions b/‎tests/test_extrapolation.py‎
Lines changed: 112 additions & 0 deletions
diff --git a/‎vtools/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎vtools/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎vtools/data/gap.py‎
Lines changed: 62 additions & 0 deletions b/‎vtools/data/gap.py‎
Lines changed: 62 additions & 0 deletions
@@ -5,13 +5,13 @@
    "id": "436d5ccf",
    "metadata": {},
    "source": [
-    "# Understanding `ts_merge` and `ts_splice`\n",
+    "# Merging and Splicing Time Series\n",
     "This tutorial demonstrates the usage and difference between `ts_merge` and `ts_splice`, two methods for folding together time series into a combined data structure.\n",
     "\n",
     "- **`ts_merge`** blends multiple time series together based on priority, filling missing values. It potentiallyu uses all the input series at all timestamps.\n",
     "- **`ts_splice`** stitches together time series in sequential time **blocks** without mixing values.\n",
     "\n",
-    "We will describe the effect on regularly sampled series (which have the  `freq` attribute) and on irregular. We will also  explore the **`names`** argument, which controls how columns are selected or renamed in the merging/splicing process.\n",
+    "We will describe the effect on regularly sampled series (which have the  `freq` attribute) and on irregular. We will also  explore the **`names`** argument, which controls how columns are selected or renamed in the merging/splicing process. There is a file-level command line tools for this as well in the `dms_datastore` package.\n",
     "\n",
     "## Prioritized filling on regular series\n",
     "Let's begin by showing how `ts_merge` and `ts_splice` fold together two regular series but gappy \n",
@@ -22,95 +22,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 53,
+   "execution_count": 1,
    "id": "e52fb077",
    "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Series 1 (Primary):\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "2023-01-01     1.0\n",
-       "2023-01-02     NaN\n",
-       "2023-01-03     3.0\n",
-       "2023-01-04     NaN\n",
-       "2023-01-05     5.0\n",
-       "2023-01-06     6.0\n",
-       "2023-01-07     NaN\n",
-       "2023-01-08     8.0\n",
-       "2023-01-09     9.0\n",
-       "2023-01-10    10.0\n",
-       "Freq: D, Name: A, dtype: float64"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "Series 2 (Secondary - Fills Gaps):\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "2023-01-01    NaN\n",
-       "2023-01-02    2.0\n",
-       "2023-01-03    NaN\n",
-       "2023-01-04    4.0\n",
-       "2023-01-05    NaN\n",
-       "2023-01-06    NaN\n",
-       "2023-01-07    7.0\n",
-       "2023-01-08    NaN\n",
-       "2023-01-09    NaN\n",
-       "2023-01-10    NaN\n",
-       "2023-01-11    3.0\n",
-       "2023-01-12    4.0\n",
-       "Freq: D, Name: A, dtype: float64"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "Series 3 (Tertiary - Fills Gaps):\n"
+     "ename": "NameError",
+     "evalue": "name 'pd' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "Cell \u001b[1;32mIn[1], line 4\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[38;5;66;03m# ========================================\u001b[39;00m\n\u001b[0;32m      2\u001b[0m \u001b[38;5;66;03m# 1️⃣ Creating Regular Time Series (1D Frequency with Missing Data)\u001b[39;00m\n\u001b[0;32m      3\u001b[0m \u001b[38;5;66;03m# ========================================\u001b[39;00m\n\u001b[1;32m----> 4\u001b[0m idx1 \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241m.\u001b[39mdate_range(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m2023-01-01\u001b[39m\u001b[38;5;124m\"\u001b[39m, periods\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m10\u001b[39m, freq\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m1D\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m      5\u001b[0m idx2 \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mdate_range(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m2023-01-01\u001b[39m\u001b[38;5;124m\"\u001b[39m, periods\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m12\u001b[39m, freq\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m1D\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m      6\u001b[0m idx3 \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mdate_range(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m2022-12-31\u001b[39m\u001b[38;5;124m\"\u001b[39m, periods\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m14\u001b[39m, freq\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m1D\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
+      "\u001b[1;31mNameError\u001b[0m: name 'pd' is not defined"
      ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "2022-12-31    1000.0\n",
-       "2023-01-01    1001.0\n",
-       "2023-01-02    1002.0\n",
-       "2023-01-03       NaN\n",
-       "2023-01-04    1004.0\n",
-       "2023-01-05       NaN\n",
-       "2023-01-06       NaN\n",
-       "2023-01-07    1007.0\n",
-       "2023-01-08       NaN\n",
-       "2023-01-09       NaN\n",
-       "2023-01-10       NaN\n",
-       "2023-01-11    1005.0\n",
-       "2023-01-12    1006.0\n",
-       "2023-01-13    1007.0\n",
-       "Freq: D, Name: A, dtype: float64"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
     }
    ],
    "source": [
@@ -148,7 +73,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 54,
+   "execution_count": null,
    "id": "5dd08914",
    "metadata": {},
    "outputs": [
@@ -291,7 +216,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 61,
+   "execution_count": null,
    "id": "9a1d0dae",
    "metadata": {},
    "outputs": [
@@ -429,7 +354,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 63,
+   "execution_count": null,
    "id": "35cfc422",
    "metadata": {},
    "outputs": [
@@ -750,7 +675,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 62,
+   "execution_count": null,
    "id": "c6949e66",
    "metadata": {},
    "outputs": [
 
@@ -0,0 +1,112 @@
+
+import pandas as pd
+import numpy as np
+import pytest
+from pandas.testing import assert_series_equal, assert_frame_equal
+
+import pandas as pd
+import numpy as np
+import pytest
+from pandas.testing import assert_series_equal
+from vtools import extrapolate_ts
+
+# Core test cases
+def test_constant_forward():
+    ts = pd.Series([1, 2, 3], index=pd.date_range("2020-01-01", periods=3, freq="d"))
+    result = extrapolate_ts(ts, end="2020-01-05", method="constant", val=10)
+    expected = pd.Series([1, 2, 3, 10, 10], index=pd.date_range("2020-01-01", periods=5, freq="d"))
+    assert_series_equal(result, expected)
+
+def test_constant_backward():
+    ts = pd.Series([4, 5, 6], index=pd.date_range("2020-01-03", periods=3, freq="d"))
+    result = extrapolate_ts(ts, start="2020-01-01", method="constant", val=0)
+    expected = pd.Series([0, 0, 4, 5, 6], index=pd.date_range("2020-01-01", periods=5, freq="d"))
+    assert_series_equal(result, expected)
+
+def test_taper_forward():
+    ts = pd.Series([5], index=pd.date_range("2020-01-01", periods=1, freq="D"))
+    result = extrapolate_ts(ts, end="2020-01-04", method="taper", val=0)
+    expected = pd.Series([5, 3.3333, 1.6667, 0], index=pd.date_range("2020-01-01", periods=4, freq="D"))
+    assert_series_equal(result.round(4), expected.round(4), check_dtype=False)
+
+def test_taper_backward():
+    ts = pd.Series([5], index=pd.date_range("2020-01-04", periods=1, freq="D"))
+    result = extrapolate_ts(ts, start="2020-01-01", method="taper", val=0)
+    expected = pd.Series([0, 1.6667, 3.3333, 5], index=pd.date_range("2020-01-01", periods=4, freq="D"))
+    assert_series_equal(result.round(4), expected.round(4), check_dtype=False)
+
+def test_linear_slope_bidirectional():
+    ts = pd.Series([2, 4], index=pd.date_range("2020-01-02", periods=2, freq="D"))
+    result = extrapolate_ts(ts, start="2020-01-01", end="2020-01-04", method="linear_slope")
+    expected = pd.Series([0, 2, 4, 6], index=pd.date_range("2020-01-01", periods=4, freq="D"))
+    assert_series_equal(result.round(4), expected.round(4), check_dtype=False)
+
+# Contract violations
+def test_ffill_before_start_error():
+    ts = pd.Series([1, 2, 3], index=pd.date_range("2020-01-03", periods=3, freq="D"))
+    with pytest.raises(ValueError, match="ffill.*before start"):
+        extrapolate_ts(ts, start="2020-01-01", method="ffill")
+
+def test_bfill_after_end_error():
+    ts = pd.Series([1, 2, 3], index=pd.date_range("2020-01-01", periods=3, freq="D"))
+    with pytest.raises(ValueError, match="bfill.*after end"):
+        extrapolate_ts(ts, end="2020-01-05", method="bfill")
+
+def test_taper_without_val():
+    ts = pd.Series([1], index=pd.date_range("2020-01-01", periods=1, freq="D"))
+    with pytest.raises(ValueError, match="requires 'val'"):
+        extrapolate_ts(ts, end="2020-01-02", method="taper")
+
+def test_linear_with_val_error():
+    ts = pd.Series([1, 2], index=pd.date_range("2020-01-01", periods=2, freq="d"))
+    with pytest.raises(ValueError, match="does not use 'val'"):
+        extrapolate_ts(ts, start="2019-12-30", end="2020-01-03", method="linear_slope", val=99)
+
+
+def test_short_linear_error():
+    ts = pd.Series([1], index=pd.date_range("2020-01-01", periods=1, freq="D"))
+    with pytest.raises(ValueError, match="2 data points.*required"):
+        extrapolate_ts(ts, start="2019-12-30", method="linear_slope")
+
+# Regression test from thread
+def test_dtype_preservation():
+    ts = pd.Series([1, 2], dtype="int64", index=pd.date_range("2020-01-02", periods=2, freq="D"))
+    extended = extrapolate_ts(ts, end="2020-01-04", method="ffill")
+    expected = pd.Series([1, 2, 2], index=pd.date_range("2020-01-02", periods=3, freq="D"), dtype="int64")
+    assert_series_equal(extended, expected)
+
+def test_fill_preserves_original():
+    ts = pd.Series([1, 2, 3], index=pd.date_range("2020-01-01", periods=3, freq="D"))
+    result = extrapolate_ts(ts, end="2020-01-05", method="constant", val=5)
+    assert result.loc["2020-01-01"] == 1
+    assert result.loc["2020-01-04"] == 5
+
+
+
+def generate_series(start, periods, freq, values=None):
+    """Helper to generate a series with given freq and values."""
+    idx = pd.date_range(start=start, periods=periods, freq=freq)
+    vals = values if values is not None else np.arange(periods)
+    return pd.Series(vals, index=idx)
+
+
+def test_taper_across_frequencies():
+    freqs = ["15min", "h", "d"]
+    for freq in freqs:
+        ts = generate_series("2020-01-01", periods=2, freq=freq, values=[10, 20])
+        interval = ts.index[1] - ts.index[0]
+        end_time = ts.index[-1] + 3 * interval
+        result = extrapolate_ts(ts, end=end_time, method="taper", val=0.0)
+        assert result.index[-1] == end_time
+
+
+def test_taper_across_frequencies():
+    results = {}
+    for freq in ["15min", "h", "d"]:
+        ts = generate_series("2020-01-01", periods=2, freq=freq, values=[10, 10])  # Ensure 2+ points
+        step = ts.index[1] - ts.index[0]
+        end_time = ts.index[-1] + 3 * step
+        result = extrapolate_ts(ts, end=end_time, method="taper", val=0.0)
+        assert result.index.freqstr.lower() == freq
+        results[freq] = result
+
@@ -11,6 +11,7 @@
 from vtools.data.vtime import *
 from vtools.data.timeseries import *
 
+from vtools.functions.transition import *
 from vtools.functions.climatology import *
 from vtools.functions.interannual import *
 from vtools.functions.filter import *
 
@@ -203,6 +203,68 @@ def gap_distance(ts, disttype="count", to = "good"):
         raise ValueError("invalid input disttype, must be count or freq")
 
 
+import pandas as pd
+import numpy as np
+
+def describe_series_gaps(s: pd.Series, name: str, context: int = 2):
+    """
+    Print gaps in a single Series s, showing `context` non-null points
+    before and after each gap, with an ellipsis marker in between.
+    """
+    mask = s.isna().to_numpy()
+    idx = s.index.to_numpy()
+
+    if not mask.any():
+        print(f"{name}: no missing values\n")
+        return
+
+    # find rising edges (gap starts) and falling edges (gap ends)
+    diffs = np.diff(mask.astype(int), prepend=0, append=0)
+    starts = np.where(diffs == 1)[0]
+    ends   = np.where(diffs == -1)[0] - 1
+
+    for i, (st, en) in enumerate(zip(starts, ends), 1):
+        gap_len = en - st + 1
+        print(f"\n{name} — gap #{i}:")
+        print(f"  from {idx[st]} to {idx[en]}  ({gap_len} samples missing)")
+
+        # pre-gap context
+        pre_idxs = []
+        j = st - 1
+        while j >= 0 and len(pre_idxs) < context:
+            if not mask[j]:
+                pre_idxs.append(j)
+            j -= 1
+        for pi in reversed(pre_idxs):
+            print(f"    → {idx[pi]} : {s.iloc[pi]}")
+
+        # ellipsis marker
+        print("    ... [ missing block ] ...")
+
+        # post-gap context
+        post_idxs = []
+        j = en + 1
+        N = len(mask)
+        while j < N and len(post_idxs) < context:
+            if not mask[j]:
+                post_idxs.append(j)
+            j += 1
+        for pi in post_idxs:
+            print(f"    ← {idx[pi]} : {s.iloc[pi]}")
+    print()
+
+def describe_null(dset, name, context=2):
+    """
+    If dset is a DataFrame, run describe_series_gaps on each column.
+    If it's a Series, just run it once.
+    """
+    if isinstance(dset, pd.DataFrame):
+        for col in dset.columns:
+            describe_series_gaps(dset[col], f"{name}.{col}", context=context)
+    else:
+        describe_series_gaps(dset, name, context=context)
+
+
 
 def example_gap():
     import numpy as np