CADWRDeltaModeling
diff --git a/‎dms_datastore/__main__.py‎
Lines changed: 6 additions & 0 deletions b/‎dms_datastore/__main__.py‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎dms_datastore/_reconcile_cli.py‎
Lines changed: 94 additions & 0 deletions b/‎dms_datastore/_reconcile_cli.py‎
Lines changed: 94 additions & 0 deletions
diff --git a/‎dms_datastore/config_data/des_rationalize_cfg.yaml‎
Lines changed: 71 additions & 0 deletions b/‎dms_datastore/config_data/des_rationalize_cfg.yaml‎
Lines changed: 71 additions & 0 deletions
diff --git a/‎dms_datastore/config_data/dstore_config.yaml‎
Lines changed: 3 additions & 0 deletions b/‎dms_datastore/config_data/dstore_config.yaml‎
Lines changed: 3 additions & 0 deletions
@@ -25,6 +25,9 @@
 from dms_datastore.merge_files import merge_files_cli
 from dms_datastore.dropbox_data import dropbox_cli
 from dms_datastore.coarsen_file import coarsen_ts_cli
+from dms_datastore.update_repo import update_repo
+from dms_datastore.update_flagged_data import update_flagged_data
+from dms_datastore.rationalize_time_partitions import rationalize_time_partitions_cli
 
 
 @click.group(help="DMS CLI tools for data processing and extraction.")
@@ -60,6 +63,9 @@ def cli():
 cli.add_command(data_cache_cli, "data_cache")
 cli.add_command(merge_files_cli, "merge_files")
 cli.add_command(coarsen_ts_cli, "coarsen")
+cli.add_command(update_flagged_data,"updated_flagged_data")
+cli.add_command(update_repo,"update_repo")
+cli.add_command(rationalize_time_partitions_cli, "rationalize_time_partitions")
 
 if __name__ == "__main__":
     cli()
@@ -0,0 +1,94 @@
+# dms_datastore/cli/_reconcile_cli.py
+from __future__ import annotations
+
+import csv
+from dataclasses import asdict, is_dataclass
+from pathlib import Path
+from typing import Iterable, Optional
+
+
+def echo_actions_text(actions: Iterable[object]) -> None:
+    """
+    Print one action per line.
+
+    Assumes action objects have attributes:
+      series_id, shard, action, reason, staged_path, repo_path
+    """
+    actions = list(actions)
+    if not actions:
+        print("no actions (repo already up to date for inspected shards)")
+        return    
+    for a in actions:
+        series_id = getattr(a, "series_id", None)
+        shard = getattr(a, "shard", None)
+        action = getattr(a, "action", None)
+        reason = getattr(a, "reason", None)
+        staged = getattr(a, "staged_path", None)
+        repo = getattr(a, "repo_path", None)
+
+        parts = [
+            str(action),
+            f"series_id={series_id}",
+            f"shard={shard}",
+            f"reason={reason}",
+        ]
+        if staged:
+            parts.append(f"staged={staged}")
+        if repo:
+            parts.append(f"repo={repo}")
+
+        print("  ".join(parts))
+
+
+def write_actions_csv(actions: Iterable[object], out_csv: str) -> None:
+    """
+    Write actions to a CSV file.
+
+    Columns are stable and explicit to support downstream parsing and fixtures.
+    """
+    out_path = Path(out_csv)
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+
+    fieldnames = ["series_id", "shard", "action", "reason", "staged_path", "repo_path"]
+
+    with out_path.open("w", newline="", encoding="utf-8") as f:
+        w = csv.DictWriter(f, fieldnames=fieldnames)
+        w.writeheader()
+        for a in actions:
+            if is_dataclass(a):
+                row = asdict(a)
+            else:
+                row = {
+                    "series_id": getattr(a, "series_id", None),
+                    "shard": getattr(a, "shard", None),
+                    "action": getattr(a, "action", None),
+                    "reason": getattr(a, "reason", None),
+                    "staged_path": getattr(a, "staged_path", None),
+                    "repo_path": getattr(a, "repo_path", None),
+                }
+
+            # enforce stable column ordering
+            w.writerow({k: row.get(k, None) for k in fieldnames})
+
+
+def resolve_plan_flag(apply: bool, plan: bool) -> bool:
+    """
+    Default is plan=True unless apply=True.
+
+    If the user explicitly sets --plan, it wins (and --apply should be absent).
+    """
+    if plan:
+        return True
+    if apply:
+        return False
+    return True  # default
+
+
+def maybe_fail_if_changes(actions: list[object], fail_if_changes: bool) -> None:
+    """
+    Exit code convention:
+      - 0: no actions (or apply mode)
+      - 2: actions exist and fail_if_changes requested
+    """
+    if fail_if_changes and len(actions) > 0:
+        raise SystemExit(2)
@@ -0,0 +1,71 @@
+# YAML-driven time-partition rationalization for instrument files.
+#
+# Goal:
+#   For selected pools of files (matched by a glob pattern), enforce a canonical
+#   non-overlapping partition of time by slicing specific "include" files into
+#   left-closed/right-open windows [start_k, start_{k+1}) and deleting "omit" files.
+#
+# Notes:
+#   - All matching is done by exact basename equality within the pool.
+#   - Time windows are left-closed/right-open to avoid duplicates at boundaries.
+#   - This mode is intended to run *before* general "superset deletes subset" logic.
+
+rationalize:
+  - pattern: "des_mrz@upper*_ec_*.csv"
+    # pattern defines the pool: all files matched by this glob form one candidate set.
+    # If this pattern matches, the rule applies; if it matches nothing, that's an error.
+
+    include:
+      # include is ordered and defines the canonical partitioning plan.
+      #
+      # Each entry has:
+      #   fname: exact basename of a file in the pool (may include ${LAST})
+      #   start: ISO date (YYYY-MM-DD) or ${START}
+      #
+      # start values must be strictly increasing once resolved.
+
+      - fname: "des_mrz@upper_40_ec_1983_1999.csv"
+        start: "${START}"
+        # ${START} means "from the beginning of this file" (no lower time bound).
+        # Window becomes: [-inf, next_start)
+
+      - fname: "des_mrz@upper_40_ec_2000_2007.csv"
+        start: "2000-01-01"
+        # Window becomes: [2000-01-01, next_start)
+
+      - fname: "des_mrz@upper_40_ec_2004_2019.csv"
+        start: "2004-09-21"
+        # Example of intentionally re-partitioning an overlapping file:
+        # you keep only from 2006 onward, and exclude the boundary at next_start.
+
+      - fname: "des_mrz@upper_40_ec_2007_2009.csv"
+        start: "2007-10-01"
+        # Example of intentionally re-partitioning an overlapping file:
+        # you keep only from 2006 onward, and exclude the boundary at next_start.
+
+
+      - fname: "des_mrz@upper_40_ec_2004_2019.csv"
+        start: "2008-08-20"
+        # Example of a repeated file 
+
+
+      - fname: "des_mrz@upper_40_ec_2020_${LAST}.csv"
+        start: "2020-01-01"
+        # ${LAST} is resolved from the pool's max eyear (parsed from filenames),
+        # substituted into fname, and must then match exactly one pool file.
+        # Last window becomes: [2020-01-01, +inf)
+
+    omit:
+      # omit is optional but, if present, acts as a completeness assertion:
+      # include + omit must cover the entire pool (disjoint union).
+      #
+      # omit items are either explicit basenames or special tokens.
+
+      #- "des_mrz@upper_40_ec_2007_2009.csv"
+      #- "${SUPERSEDED}"
+      # ${SUPERSEDED} expands to all pool files that are strict subsets (by year span)
+      # of another pool file using the existing inclusive-year superset rule:
+      #   B supersedes A if B.syear <= A.syear and B.eyear >= A.eyear
+      #
+      # IMPORTANT: If any file would end up in both include and omit after expansion,
+      # that is an error. Be explicit instead.
@@ -9,6 +9,9 @@ sublocations: station_subloc.csv
 file_deletion_list: non_15_min_files_checked.txt
 compare_excepts_formatted: compare_excepts_formatted.txt
 
+des_time_partition: des_rationalize_cfg.yaml
+
+
 repo: "//cnrastore-bdo/Modeling_Data/repo/continuous/screened"
 screened: "//cnrastore-bdo/Modeling_Data/repo/continuous/screened"
 processed: "//cnrastore-bdo/Modeling_Data/repo/continuous/processed"