OceanStreamIO
diff --git a/‎oceanstream/echodata/__init__.py‎
Lines changed: 29 additions & 0 deletions b/‎oceanstream/echodata/__init__.py‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎oceanstream/echodata/calibrate/calibration.py‎
Lines changed: 69 additions & 24 deletions b/‎oceanstream/echodata/calibrate/calibration.py‎
Lines changed: 69 additions & 24 deletions
diff --git a/‎oceanstream/echodata/compute/sv.py‎
Lines changed: 11 additions & 2 deletions b/‎oceanstream/echodata/compute/sv.py‎
Lines changed: 11 additions & 2 deletions
diff --git a/‎oceanstream/echodata/concat.py‎
Lines changed: 142 additions & 2 deletions b/‎oceanstream/echodata/concat.py‎
Lines changed: 142 additions & 2 deletions
diff --git a/‎oceanstream/echodata/convert.py‎
Lines changed: 20 additions & 5 deletions b/‎oceanstream/echodata/convert.py‎
Lines changed: 20 additions & 5 deletions
@@ -104,6 +104,14 @@
     "DenoiseConfig",
     "MVBSConfig",
     "NASCConfig",
+    # Pydantic denoise parameter models
+    "MaskImpulseNoise",
+    "TransientNoiseMask",
+    "RemoveBackgroundNoise",
+    "MaskAttenuatedSignal",
+    "MVBSComputeOptions",
+    "NASCComputeOptions",
+    "fill_missing_frequency_params",
     # Consolidate (depth computation)
     "add_depth_to_sv",
     "choose_depth_flags",
@@ -161,6 +169,27 @@ def __getattr__(name: str):
         )
         return locals()[name]
 
+    # Pydantic denoise parameter models
+    if name in (
+        "MaskImpulseNoise",
+        "TransientNoiseMask",
+        "RemoveBackgroundNoise",
+        "MaskAttenuatedSignal",
+        "MVBSComputeOptions",
+        "NASCComputeOptions",
+        "fill_missing_frequency_params",
+    ):
+        from oceanstream.echodata.models import (
+            MaskImpulseNoise,
+            TransientNoiseMask,
+            RemoveBackgroundNoise,
+            MaskAttenuatedSignal,
+            MVBSComputeOptions,
+            NASCComputeOptions,
+            fill_missing_frequency_params,
+        )
+        return locals()[name]
+    
     # Convert functions
     if name in ("convert_raw_files", "convert_raw_file"):
         from oceanstream.echodata.convert import convert_raw_files, convert_raw_file
 
@@ -186,19 +186,23 @@ def validate_calibration_params(params: dict) -> bool:
     Validate calibration parameters dictionary.
     
     Args:
-        params: Dictionary of calibration parameters by frequency
+        params: Dictionary of calibration parameters by frequency.
+            Keys may be numeric (Hz) or string labels (e.g. "38kHz",
+            "38k_short").
         
     Returns:
         True if valid
         
     Raises:
-        ValueError: If invalid frequency type
+        ValueError: If parameter values are wrong
         TypeError: If parameter types are wrong
     """
     for freq_key, values in params.items():
-        # Frequency keys should be numeric (int)
-        if not isinstance(freq_key, (int, float)):
-            raise TypeError(f"Frequency key must be numeric, got {type(freq_key)}")
+        # Accept both numeric keys (int/float Hz) and string labels
+        if not isinstance(freq_key, (int, float, str)):
+            raise TypeError(
+                f"Frequency key must be numeric or string, got {type(freq_key)}"
+            )
 
         # Values should be a dict
         if not isinstance(values, dict):
@@ -208,34 +212,75 @@ def validate_calibration_params(params: dict) -> bool:
 
 
 def parse_ecs_file(ecs_file: Path) -> dict[int, dict]:
-    """
-    Parse Simrad ECS calibration file format.
-    
+    """Parse Simrad ECS calibration file format.
+
+    ECS files are INI-style text files with ``[ChannelN]`` sections.
+    This parser reads them using :mod:`configparser` and returns
+    calibration values keyed by frequency in Hz.
+
+    Falls back to XML parsing if INI parsing yields no sections (some
+    third-party tools export calibration as XML).
+
     Args:
         ecs_file: Path to .ecs file
-        
+
     Returns:
         Dictionary of calibration values keyed by frequency (Hz)
     """
+    import configparser
+
+    config = configparser.ConfigParser()
+    try:
+        config.read(ecs_file)
+    except configparser.Error:
+        # Not INI format — fall through to XML fallback below
+        config = configparser.ConfigParser()
+
+    params: dict[int, dict] = {}
+    for section in config.sections():
+        if section.startswith("Channel"):
+            freq = config.getfloat(section, "Frequency", fallback=0)
+            freq_hz = int(freq) if freq >= 1000 else int(freq * 1000)
+            params[freq_hz] = {
+                "gain": config.getfloat(section, "Gain", fallback=0),
+                "sa_correction": config.getfloat(section, "SaCorrection", fallback=0),
+                "beamwidth_alongship": config.getfloat(
+                    section, "BeamWidthAlongship", fallback=0
+                ),
+                "beamwidth_athwartship": config.getfloat(
+                    section, "BeamWidthAthwartship", fallback=0
+                ),
+                "angle_offset_alongship": config.getfloat(
+                    section, "AngleOffsetAlongship", fallback=0
+                ),
+                "angle_offset_athwartship": config.getfloat(
+                    section, "AngleOffsetAthwartship", fallback=0
+                ),
+            }
+
+    if params:
+        return params
+
+    # Fallback: try XML parsing for non-standard calibration files
     import xml.etree.ElementTree as ET
-    
-    tree = ET.parse(ecs_file)
+
+    try:
+        tree = ET.parse(ecs_file)
+    except ET.ParseError:
+        logger.warning(f"ECS file {ecs_file} is neither valid INI nor XML")
+        return {}
+
     root = tree.getroot()
-    
-    params = {}
-    
     for cal in root.findall(".//Calibration"):
         freq = int(cal.get("Frequency", 0))
-        params[freq] = {}
-        
-        gain_elem = cal.find("Gain")
-        if gain_elem is not None:
-            params[freq]["gain"] = float(gain_elem.text)
-        
-        sa_elem = cal.find("SaCorrection")
-        if sa_elem is not None:
-            params[freq]["sa_correction"] = float(sa_elem.text)
-    
+        entry: dict[str, float] = {}
+        for tag in ("Gain", "SaCorrection"):
+            elem = cal.find(tag)
+            if elem is not None and elem.text:
+                entry[tag[0].lower() + tag[1:]] = float(elem.text)
+        if entry:
+            params[freq] = entry
+
     return params
 
 
 
@@ -24,6 +24,7 @@ def compute_sv(
     use_dask: bool = True,
     add_depth: bool = True,
     add_location: bool = True,
+    depth_offset: float = 0.0,
     waveform_mode: str = "CW",
     encode_mode: str = "complex",
 ) -> "xr.Dataset":
@@ -40,6 +41,9 @@ def compute_sv(
         use_dask: Enable Dask for large files
         add_depth: Add depth coordinate to output
         add_location: Add lat/lon coordinates to output
+        depth_offset: Transducer depth offset in metres (e.g. depth below
+            waterline).  Passed to echopype ``add_depth`` when platform
+            metadata doesn't already provide it.
         waveform_mode: Waveform mode for EK80 ('CW' for narrowband, 'BB' for broadband)
         encode_mode: Encode mode for EK80 ('complex' or 'power')
         
@@ -90,10 +94,15 @@ def compute_sv_task(echodata_path: Path) -> xr.Dataset:
     logger.info("Computing Sv")
     ds_Sv = ep.calibrate.compute_Sv(echodata, **compute_kwargs)
 
-    # Add depth coordinate
+    # Add depth coordinate using platform-aware flag selection
     if add_depth:
         logger.info("Adding depth coordinate")
-        ds_Sv = ep.consolidate.add_depth(ds_Sv)
+        try:
+            flags = _choose_depth_flags(echodata, depth_offset=depth_offset)
+            ds_Sv = ep.consolidate.add_depth(ds_Sv, echodata, **flags)
+        except (KeyError, ValueError, TypeError) as e:
+            logger.warning(f"Depth-flag add_depth failed ({e}), using simple add_depth")
+            ds_Sv = ep.consolidate.add_depth(ds_Sv)
 
     # Add location coordinates
     if add_location:
 
@@ -2,14 +2,19 @@
 
 Groups raw files by UTC date and concatenates them for daily processing,
 enabling efficient denoising and MVBS/NASC computation over 24-hour periods.
+
+Includes utilities for:
+- Pulse-category splitting (short_pulse / long_pulse) based on frequency
+  combinations in processed Zarr stores.
+- Time-window batch grouping for multi-day concatenation windows.
 """
 
 from __future__ import annotations
 
 import logging
 import re
 from collections import defaultdict
-from datetime import datetime
+from datetime import datetime, timedelta
 from pathlib import Path
 from typing import TYPE_CHECKING, Optional
 
@@ -394,4 +399,139 @@ def merge_location_data(ds: "xr.Dataset", location_data: list[dict]) -> "xr.Data
         merged = merged.reset_coords("time", drop=True)
 
     return merged
-    return dt
+
+
+# ============================================================================
+# Pulse-category splitting utilities
+# ============================================================================
+
+# Well-known pulse categories for Saildrone EK80.
+# Key = friendly name, value = comma-joined sorted frequency_nominal strings.
+PULSE_CATEGORY_CONFIG: dict[str, dict[str, Optional[str]]] = {
+    "short_pulse": {"freq_key": "38000.0,200000.0"},
+    "long_pulse": {"freq_key": "38000.0"},
+    "exported_ds": {"freq_key": None},  # catch-all
+}
+
+
+def detect_pulse_category(ds: "xr.Dataset") -> str:
+    """Classify a Sv dataset into a pulse category.
+
+    Classification is based on the sorted frequency_nominal values present
+    in the ``channel`` dimension, matching the Saildrone EK80 convention:
+
+    - ``"short_pulse"`` → 38 kHz + 200 kHz (dual-frequency, short CW pulse)
+    - ``"long_pulse"``  → 38 kHz only (single-frequency, long CW pulse)
+    - ``"exported_ds"``  → anything else
+
+    Args:
+        ds: Sv xarray.Dataset with a ``frequency_nominal`` coordinate or
+            variable.
+
+    Returns:
+        One of ``"short_pulse"``, ``"long_pulse"``, or ``"exported_ds"``.
+    """
+    import numpy as np
+
+    if "frequency_nominal" in ds:
+        freqs = np.sort(
+            np.unique(ds["frequency_nominal"].values.astype(float))
+        )
+    elif "channel" in ds.dims:
+        freqs = np.sort(
+            np.unique(ds["channel"].values.astype(float))
+        )
+    else:
+        return "exported_ds"
+
+    freq_str = ",".join(f"{f:.1f}" for f in freqs)
+
+    for category, cfg in PULSE_CATEGORY_CONFIG.items():
+        if cfg["freq_key"] is None or freq_str == cfg["freq_key"]:
+            return category
+
+    return "exported_ds"
+
+
+def group_by_pulse_category(
+    paths: list[Path],
+) -> dict[str, list[Path]]:
+    """Group Zarr store paths by pulse category.
+
+    Opens each Zarr lazily to read ``frequency_nominal`` and assigns the
+    file to a pulse category.
+
+    Args:
+        paths: List of Sv Zarr store paths.
+
+    Returns:
+        ``{category: [path, ...]}`` mapping.
+    """
+    import xarray as xr
+
+    groups: dict[str, list[Path]] = defaultdict(list)
+    for p in paths:
+        try:
+            ds = xr.open_zarr(p)
+            cat = detect_pulse_category(ds)
+        except Exception:
+            logger.warning(f"Could not classify {p}, assigning to exported_ds")
+            cat = "exported_ds"
+        groups[cat].append(p)
+    return dict(groups)
+
+
+# ============================================================================
+# Time-window batch grouping
+# ============================================================================
+
+def batch_key(
+    ts: datetime,
+    window_days: int = 1,
+) -> str:
+    """Return a filename-safe key that anchors *ts* to a fixed time window.
+
+    Args:
+        ts: Timestamp (usually a file start time).
+        window_days: Width of the batching window in days.
+
+    Returns:
+        ``"YYYY-MM-DD"`` for single-day windows, or
+        ``"YYYY-MM-DD_to_YYYY-MM-DD"`` for multi-day windows.
+
+    Examples:
+        >>> batch_key(datetime(2023, 8, 10), 1)
+        '2023-08-10'
+        >>> batch_key(datetime(2023, 8, 10), 3)
+        '2023-08-09_to_2023-08-11'
+    """
+    anchor = datetime(ts.year, ts.month, ts.day)
+
+    if window_days <= 1:
+        return f"{anchor:%Y-%m-%d}"
+
+    # Floor to start of rolling window
+    anchor -= timedelta(days=(anchor - datetime.min).days % window_days)
+    end = anchor + timedelta(days=window_days - 1)
+    return f"{anchor:%Y-%m-%d}_to_{end:%Y-%m-%d}"
+
+
+def group_by_time_window(
+    files: list[tuple[Path, datetime]],
+    window_days: int = 1,
+) -> dict[str, list[Path]]:
+    """Group files into time-window batches.
+
+    Args:
+        files: List of ``(path, start_time)`` tuples.
+        window_days: Width of each batch window in days.
+
+    Returns:
+        ``{batch_key_str: [path, ...]}`` mapping, sorted by key.
+    """
+    groups: dict[str, list[Path]] = defaultdict(list)
+    for path, ts in files:
+        key = batch_key(ts, window_days)
+        groups[key].append(path)
+
+    return dict(sorted(groups.items()))
@@ -221,12 +221,27 @@ def detect_sonar_model(raw_file: Path) -> str:
         "EK80" or "EK60" based on file contents
         
     Note:
-        Currently returns "EK80" as default since Saildrone uses EK80.
-        Future: implement actual detection based on file header.
+        Detection reads the first datagram header.
+        Falls back to "EK80" if the header is unrecognised.
     """
-    # TODO: Implement actual detection
-    # For now, default to EK80 (Saildrone standard)
-    logger.debug(f"Auto-detecting sonar model for {raw_file.name}, defaulting to EK80")
+    # Simrad raw files start with a datagram whose 4-byte type field
+    # identifies the instrument:
+    #   EK60 / ES60  → b'CON0' (little-endian ASCII at offset 4)
+    #   EK80 / ES80  → b'XML0'
+    try:
+        with open(raw_file, "rb") as fh:
+            # Skip the 4-byte datagram length and read the 4-byte type.
+            fh.read(4)  # datagram length
+            tag = fh.read(4)
+        if tag == b"CON0":
+            logger.debug(f"{raw_file.name}: detected EK60 (CON0 header)")
+            return "EK60"
+        if tag == b"XML0":
+            logger.debug(f"{raw_file.name}: detected EK80 (XML0 header)")
+            return "EK80"
+        logger.debug(f"{raw_file.name}: unrecognised header {tag!r}, defaulting to EK80")
+    except OSError:
+        logger.debug(f"Cannot read {raw_file.name}, defaulting to EK80")
     return "EK80"