|
| 1 | +""" |
| 2 | +Multi-frequency dB-differencing for acoustic target classification. |
| 3 | +
|
| 4 | +The dB-difference method computes Sv_1 − Sv_2 for a pair of frequencies |
| 5 | +and masks regions where the difference falls within a specified range. |
| 6 | +This is commonly used to discriminate species groups (e.g. krill vs fish) |
| 7 | +based on their known frequency response. |
| 8 | +
|
| 9 | +Ported from echopy (Ariza et al., 2020) with bug fixes and xarray support. |
| 10 | +
|
| 11 | +References: |
| 12 | + - Kloser et al. (2002), ICES Journal of Marine Science |
| 13 | + - De Robertis & Higginbottom (2007) |
| 14 | + - echopy: https://github.com/open-ocean-sounding/echopy |
| 15 | +""" |
| 16 | + |
| 17 | +from __future__ import annotations |
| 18 | + |
| 19 | +import logging |
| 20 | +from dataclasses import dataclass |
| 21 | +from typing import Optional |
| 22 | + |
| 23 | +import numpy as np |
| 24 | +import xarray as xr |
| 25 | + |
| 26 | +logger = logging.getLogger(__name__) |
| 27 | + |
| 28 | + |
| 29 | +@dataclass |
| 30 | +class FrequencyDifferencingResult: |
| 31 | + """Result of frequency differencing analysis. |
| 32 | +
|
| 33 | + Attributes: |
| 34 | + mask: Boolean DataArray where True = within threshold range. |
| 35 | + difference: DataArray of Sv_low − Sv_high values (dB). |
| 36 | + freq_low: Channel/frequency used as minuend. |
| 37 | + freq_high: Channel/frequency used as subtrahend. |
| 38 | + threshold: (low, high) dB threshold tuple applied. |
| 39 | + pixels_in_range: Number of pixels within threshold. |
| 40 | + pixels_total: Total non-NaN pixels. |
| 41 | + fraction_in_range: Fraction of pixels within threshold. |
| 42 | + """ |
| 43 | + |
| 44 | + mask: xr.DataArray |
| 45 | + difference: xr.DataArray |
| 46 | + freq_low: str |
| 47 | + freq_high: str |
| 48 | + threshold: tuple[float, float] |
| 49 | + pixels_in_range: int |
| 50 | + pixels_total: int |
| 51 | + fraction_in_range: float |
| 52 | + |
| 53 | + |
| 54 | +def _resolve_channel( |
| 55 | + ds: xr.Dataset, |
| 56 | + channel: str, |
| 57 | +) -> str: |
| 58 | + """Resolve a channel identifier to a matching channel label in the dataset. |
| 59 | +
|
| 60 | + Supports exact match or substring match (e.g. "38" matches |
| 61 | + "WBT 742057-15 ES38-18"). |
| 62 | +
|
| 63 | + Args: |
| 64 | + ds: Dataset with a ``channel`` dimension. |
| 65 | + channel: Exact or partial channel label. |
| 66 | +
|
| 67 | + Returns: |
| 68 | + Matched channel label string. |
| 69 | +
|
| 70 | + Raises: |
| 71 | + ValueError: If the channel cannot be found. |
| 72 | + """ |
| 73 | + channels = [str(c) for c in ds.channel.values] |
| 74 | + |
| 75 | + # Exact match |
| 76 | + if channel in channels: |
| 77 | + return channel |
| 78 | + |
| 79 | + # Substring/frequency match |
| 80 | + matches = [c for c in channels if channel in c] |
| 81 | + if matches: |
| 82 | + return matches[0] |
| 83 | + |
| 84 | + raise ValueError( |
| 85 | + f"Channel '{channel}' not found. Available: {channels}" |
| 86 | + ) |
| 87 | + |
| 88 | + |
| 89 | +def db_difference( |
| 90 | + ds: xr.Dataset, |
| 91 | + freq_low: str, |
| 92 | + freq_high: str, |
| 93 | + thr: tuple[float, float] = (-12.0, -2.0), |
| 94 | + sv_var: str = "Sv", |
| 95 | +) -> FrequencyDifferencingResult: |
| 96 | + """Compute dB difference between two frequencies and mask within threshold. |
| 97 | +
|
| 98 | + Calculates ``Sv(freq_low) − Sv(freq_high)`` and returns a boolean mask |
| 99 | + where the difference falls within the inclusive range ``[thr[0], thr[1]]``. |
| 100 | +
|
| 101 | + Args: |
| 102 | + ds: xarray Dataset with Sv data and a ``channel`` dimension. |
| 103 | + freq_low: Channel label (or substring like ``"38000"``) for the |
| 104 | + minuend frequency (lower frequency typically). |
| 105 | + freq_high: Channel label (or substring like ``"120000"``) for the |
| 106 | + subtrahend frequency (higher frequency typically). |
| 107 | + thr: Tuple of ``(lower_bound, upper_bound)`` in dB. Pixels where |
| 108 | + ``thr[0] <= (Sv_low - Sv_high) <= thr[1]`` are masked True. |
| 109 | + Default ``(-12, -2)`` is commonly used for krill identification. |
| 110 | + sv_var: Name of the Sv variable in the dataset. Default ``"Sv"``. |
| 111 | +
|
| 112 | + Returns: |
| 113 | + FrequencyDifferencingResult with mask, difference array, and diagnostics. |
| 114 | +
|
| 115 | + Raises: |
| 116 | + ValueError: If the dataset has no ``channel`` dimension, or if the |
| 117 | + requested channels cannot be found, or if ``thr[0] > thr[1]``. |
| 118 | +
|
| 119 | + Example: |
| 120 | + >>> result = db_difference(sv_ds, "38000", "120000", thr=(-12, -2)) |
| 121 | + >>> masked_sv = sv_ds[sv_var].where(~result.mask) |
| 122 | + """ |
| 123 | + if thr[0] > thr[1]: |
| 124 | + raise ValueError( |
| 125 | + f"Lower threshold ({thr[0]}) must be <= upper threshold ({thr[1]}). " |
| 126 | + f"Supply thr as (lower_bound, upper_bound)." |
| 127 | + ) |
| 128 | + |
| 129 | + if "channel" not in ds.dims: |
| 130 | + raise ValueError( |
| 131 | + "Dataset must have a 'channel' dimension for multi-frequency analysis. " |
| 132 | + "Got dimensions: " + str(list(ds.dims)) |
| 133 | + ) |
| 134 | + |
| 135 | + # Resolve channel labels |
| 136 | + ch_low = _resolve_channel(ds, freq_low) |
| 137 | + ch_high = _resolve_channel(ds, freq_high) |
| 138 | + |
| 139 | + if ch_low == ch_high: |
| 140 | + raise ValueError( |
| 141 | + f"freq_low and freq_high resolved to the same channel: '{ch_low}'. " |
| 142 | + f"Provide two distinct frequency channels." |
| 143 | + ) |
| 144 | + |
| 145 | + logger.info( |
| 146 | + "Computing dB difference: %s (low) − %s (high), threshold=[%.1f, %.1f]", |
| 147 | + ch_low, |
| 148 | + ch_high, |
| 149 | + thr[0], |
| 150 | + thr[1], |
| 151 | + ) |
| 152 | + |
| 153 | + # Extract single-channel Sv arrays |
| 154 | + sv_low = ds[sv_var].sel(channel=ch_low) |
| 155 | + sv_high = ds[sv_var].sel(channel=ch_high) |
| 156 | + |
| 157 | + # Compute difference |
| 158 | + difference = sv_low - sv_high |
| 159 | + |
| 160 | + # Build mask: True where difference is within [thr[0], thr[1]] |
| 161 | + # NOTE: echopy original had a bug where the second condition overwrote |
| 162 | + # the first. We fix this by properly combining both conditions. |
| 163 | + mask_low = difference >= thr[0] |
| 164 | + mask_high = difference <= thr[1] |
| 165 | + mask = mask_low & mask_high |
| 166 | + |
| 167 | + # Diagnostics |
| 168 | + valid = np.isfinite(difference.values) |
| 169 | + pixels_total = int(np.sum(valid)) |
| 170 | + pixels_in_range = int(np.sum(mask.values & valid)) |
| 171 | + fraction = pixels_in_range / pixels_total if pixels_total > 0 else 0.0 |
| 172 | + |
| 173 | + logger.info( |
| 174 | + "dB difference: %d/%d pixels (%.1f%%) within threshold", |
| 175 | + pixels_in_range, |
| 176 | + pixels_total, |
| 177 | + fraction * 100, |
| 178 | + ) |
| 179 | + |
| 180 | + return FrequencyDifferencingResult( |
| 181 | + mask=mask, |
| 182 | + difference=difference, |
| 183 | + freq_low=ch_low, |
| 184 | + freq_high=ch_high, |
| 185 | + threshold=thr, |
| 186 | + pixels_in_range=pixels_in_range, |
| 187 | + pixels_total=pixels_total, |
| 188 | + fraction_in_range=fraction, |
| 189 | + ) |
0 commit comments