From 5f2755d2974a7ccf6f1863f7821b4e43b4bb8c8f Mon Sep 17 00:00:00 2001 From: Gautzilla Date: Thu, 28 Aug 2025 11:37:34 +0200 Subject: [PATCH 01/29] add normalization functions to audio_utils module --- src/osekit/utils/audio_utils.py | 22 ++++++++++++++++++++++ tests/test_spectro.py | 1 + 2 files changed, 23 insertions(+) diff --git a/src/osekit/utils/audio_utils.py b/src/osekit/utils/audio_utils.py index 2316ad3d..0ee40c0a 100644 --- a/src/osekit/utils/audio_utils.py +++ b/src/osekit/utils/audio_utils.py @@ -112,3 +112,25 @@ def resample(data: np.ndarray, origin_sr: float, target_sr: float) -> np.ndarray else resample_quality_settings["downsample"] ) return soxr.resample(data, origin_sr, target_sr, quality=quality) + + +def normalize_raw(values: np.ndarray) -> np.ndarray: + """No normalization of the audio data.""" + return values + + +def normalize_dc_reject(values: np.ndarray) -> np.ndarray: + """Reject the DC component of the audio data.""" + return values - values.mean() + + +def normalize_zscore(values: np.ndarray) -> np.ndarray: + """Return normalized zscore from the audio data.""" + return (values - values.mean()) / values.std() + + +normalizations = { + "raw": normalize_raw, + "dc_reject": normalize_dc_reject, + "zscore": normalize_zscore, +} diff --git a/tests/test_spectro.py b/tests/test_spectro.py index d65a98bd..d4b7e6dd 100644 --- a/tests/test_spectro.py +++ b/tests/test_spectro.py @@ -1006,6 +1006,7 @@ def mock_pcolormesh(self, time, freq, sx, **kwargs): assert (plot_kwargs["vmin"], plot_kwargs["vmax"]) == sd.v_lim assert plot_kwargs["cmap"] == sd.colormap + def test_spectro_default_v_lim(audio_files: pytest.fixture) -> None: files, _ = audio_files ad = AudioData.from_files(files) From 05afcc14f7c9642620fcbeedae23bf2b8f18f944 Mon Sep 17 00:00:00 2001 From: Gautzilla Date: Thu, 28 Aug 2025 11:44:57 +0200 Subject: [PATCH 02/29] add normalization util tests --- tests/test_utils.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tests/test_utils.py b/tests/test_utils.py index e0f23908..e71868e7 100755 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -3,6 +3,7 @@ import time from pathlib import Path +import numpy as np import pandas as pd import pytest @@ -12,6 +13,7 @@ locked, nb_files_per_batch, ) +from osekit.utils.audio_utils import normalizations from osekit.utils.formatting_utils import aplose2raven from osekit.utils.path_utils import move_tree @@ -359,3 +361,30 @@ def test_get_closest_value_index( expected: int, ) -> None: assert get_closest_value_index(values=values, target=target) == expected + + +@pytest.mark.parametrize( + ("values", "normalization", "expected"), + [ + pytest.param( + np.array([0.0, 1.0, 2.0]), + "raw", + np.array([0.0, 1.0, 2.0]), + ), + pytest.param( + np.array([0.0, 1.0, 2.0]), + "dc_reject", + np.array([-1.0, 0.0, 1.0]), + ), + pytest.param( + np.array([0.0, 1.0, 2.0]), + "zscore", + np.array([-1.224744871391589, 0.0, 1.224744871391589]), + ), + ], +) +def test_normalization( + values: np.ndarray, normalization: str, expected: np.ndarray +) -> None: + normalized = normalizations[normalization](values) + assert np.array_equal(normalized, expected) From a0b9c917bf89653d9c55cb231a7a84c5eee03b3f Mon Sep 17 00:00:00 2001 From: Gautzilla Date: Thu, 28 Aug 2025 11:54:08 +0200 Subject: [PATCH 03/29] add AudioData.normalization property --- src/osekit/core_api/audio_data.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/osekit/core_api/audio_data.py b/src/osekit/core_api/audio_data.py index 1eacbf92..e3105ddd 100644 --- a/src/osekit/core_api/audio_data.py +++ b/src/osekit/core_api/audio_data.py @@ -7,7 +7,7 @@ from __future__ import annotations from math import ceil -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Literal import numpy as np import soundfile as sf @@ -40,6 +40,7 @@ def __init__( end: Timestamp | None = None, sample_rate: int | None = None, instrument: Instrument | None = None, + normalization: Literal["raw", "reject_dc", "zscore"] = "raw", ) -> None: """Initialize an AudioData from a list of AudioItems. @@ -58,11 +59,14 @@ def __init__( instrument: Instrument | None Instrument that might be used to obtain acoustic pressure from the wav audio data. + normalization: Literal["raw","reject_dc","zscore"] + The type of normalization to apply to the audio data. """ super().__init__(items=items, begin=begin, end=end) self._set_sample_rate(sample_rate=sample_rate) self.instrument = instrument + self.normalization = normalization @property def nb_channels(self) -> int: @@ -77,6 +81,15 @@ def shape(self) -> tuple[int, ...] | int: data_length = round(self.sample_rate * self.duration.total_seconds()) return data_length if self.nb_channels <= 1 else (data_length, self.nb_channels) + @property + def normalization(self) -> Literal["raw", "dc_reject", "zscore"]: + """The type of normalization to apply to the audio data.""" + return self._normalization + + @normalization.setter + def normalization(self, value: Literal["raw", "dc_reject", "zscore"]) -> None: + self._normalization = value + def __eq__(self, other: AudioData) -> bool: """Override __eq__.""" return self.sample_rate == other.sample_rate and super().__eq__(other) From 454f972cfcc9cc3649533aa6df64b9fd0dc74a52 Mon Sep 17 00:00:00 2001 From: Gautzilla Date: Thu, 28 Aug 2025 12:06:56 +0200 Subject: [PATCH 04/29] remove AudioData.get_value reject_dc parameter --- src/osekit/core_api/audio_data.py | 23 ++++++----------------- src/osekit/core_api/spectro_data.py | 4 ++-- tests/test_spectro.py | 6 +----- 3 files changed, 9 insertions(+), 24 deletions(-) diff --git a/src/osekit/core_api/audio_data.py b/src/osekit/core_api/audio_data.py index e3105ddd..3a3d102d 100644 --- a/src/osekit/core_api/audio_data.py +++ b/src/osekit/core_api/audio_data.py @@ -20,7 +20,7 @@ from osekit.core_api.audio_item import AudioItem from osekit.core_api.base_data import BaseData from osekit.core_api.instrument import Instrument -from osekit.utils.audio_utils import resample +from osekit.utils.audio_utils import resample, normalizations if TYPE_CHECKING: from pathlib import Path @@ -113,16 +113,11 @@ def _set_sample_rate(self, sample_rate: int | None = None) -> None: return self.sample_rate = None - def get_value(self, reject_dc: bool = False) -> np.ndarray: + def get_value(self) -> np.ndarray: """Return the value of the audio data. The data from the audio file will be resampled if necessary. - Parameters - ---------- - reject_dc: bool - If True, the values will be centered on 0. - Returns ------- np.ndarray: @@ -136,28 +131,22 @@ def get_value(self, reject_dc: bool = False) -> np.ndarray: item_data = item_data[: min(item_data.shape[0], data.shape[0] - idx)] data[idx : idx + len(item_data)] = item_data idx += len(item_data) - if reject_dc: - data -= data.mean() - return data - def get_value_calibrated(self, reject_dc: bool = False) -> np.ndarray: + return normalizations[self.normalization](data) + + def get_value_calibrated(self) -> np.ndarray: """Return the value of the audio data accounting for the calibration factor. If the instrument parameter of the audio data is not None, the returned value is calibrated in units of Pa. - Parameters - ---------- - reject_dc: bool - If True, the values will be centered on 0. - Returns ------- np.ndarray: The calibrated value of the audio data. """ - raw_data = self.get_value(reject_dc=reject_dc) + raw_data = self.get_value() calibration_factor = ( 1.0 if self.instrument is None else self.instrument.end_to_end ) diff --git a/src/osekit/core_api/spectro_data.py b/src/osekit/core_api/spectro_data.py index 19d80e81..e7be3f3d 100644 --- a/src/osekit/core_api/spectro_data.py +++ b/src/osekit/core_api/spectro_data.py @@ -212,7 +212,7 @@ def get_value(self) -> np.ndarray: raise ValueError("SpectroData should have either items or audio_data.") sx = self.fft.stft( - self.audio_data.get_value_calibrated(reject_dc=True), + self.audio_data.get_value_calibrated(), padding="zeros", ) @@ -261,7 +261,7 @@ def get_welch( nfft = self.fft.mfft _, sx = welch( - self.audio_data.get_value_calibrated(reject_dc=True), + self.audio_data.get_value_calibrated(), fs=self.audio_data.sample_rate, window=window, nperseg=nperseg, diff --git a/tests/test_spectro.py b/tests/test_spectro.py index d4b7e6dd..73284e9b 100644 --- a/tests/test_spectro.py +++ b/tests/test_spectro.py @@ -283,7 +283,7 @@ def test_spectrogram_from_npz_files( for spectro in sd_split: spectro.write(tmp_path / "output") - centered_data = spectro.audio_data.get_value(reject_dc=True) + centered_data = spectro.audio_data.get_value() (tmp_path / "audio").mkdir(exist_ok=True) sf.write( file=tmp_path / "audio" / f"{spectro.audio_data}.wav", @@ -294,10 +294,6 @@ def test_spectrogram_from_npz_files( assert len(list((tmp_path / "output").glob("*.npz"))) == nb_chunks - # Since we reject the DC of audio data before computing Sx values of each chunk, - # we must compare the concatenated chunks with an AudioData made from the - # DC-free parts. - afs = [ AudioFile(f, strptime_format=TIMESTAMP_FORMAT_EXPORTED_FILES_UNLOCALIZED) for f in (tmp_path / "audio").glob("*.wav") From 29804184a105157ab32a0345dd685db3a593fc9f Mon Sep 17 00:00:00 2001 From: Gautzilla Date: Thu, 28 Aug 2025 12:10:43 +0200 Subject: [PATCH 05/29] add AudioData normalization serialization --- src/osekit/core_api/audio_data.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/osekit/core_api/audio_data.py b/src/osekit/core_api/audio_data.py index 3a3d102d..484c0c19 100644 --- a/src/osekit/core_api/audio_data.py +++ b/src/osekit/core_api/audio_data.py @@ -290,6 +290,7 @@ def to_dict(self) -> dict: | instrument_dict | { "sample_rate": self.sample_rate, + "normalization": self.normalization, } ) @@ -317,6 +318,7 @@ def from_dict(cls, dictionary: dict) -> AudioData: return cls.from_base_data( data=base_data, sample_rate=dictionary["sample_rate"], + normalization=dictionary["normalization"], instrument=instrument, ) @@ -328,6 +330,7 @@ def from_files( end: Timestamp | None = None, sample_rate: float | None = None, instrument: Instrument | None = None, + normalization: Literal["raw", "reject_dc", "zscore"] = "raw", ) -> AudioData: """Return an AudioData object from a list of AudioFiles. @@ -346,6 +349,8 @@ def from_files( instrument: Instrument | None Instrument that might be used to obtain acoustic pressure from the wav audio data. + normalization: Literal["raw","reject_dc","zscore"] + The type of normalization to apply to the audio data. Returns ------- @@ -357,6 +362,7 @@ def from_files( data=BaseData.from_files(files, begin, end), sample_rate=sample_rate, instrument=instrument, + normalization=normalization, ) @classmethod @@ -365,6 +371,7 @@ def from_base_data( data: BaseData, sample_rate: float | None = None, instrument: Instrument | None = None, + normalization: Literal["raw", "reject_dc", "zscore"] = "raw", ) -> AudioData: """Return an AudioData object from a BaseData object. @@ -377,6 +384,8 @@ def from_base_data( instrument: Instrument | None Instrument that might be used to obtain acoustic pressure from the wav audio data. + normalization: Literal["raw","reject_dc","zscore"] + The type of normalization to apply to the audio data. Returns ------- @@ -388,4 +397,5 @@ def from_base_data( items=[AudioItem.from_base_item(item) for item in data.items], sample_rate=sample_rate, instrument=instrument, + normalization=normalization, ) From 6b88cab5b706f8cbacf128e16ed608ba39c103b3 Mon Sep 17 00:00:00 2001 From: Gautzilla Date: Thu, 28 Aug 2025 12:14:59 +0200 Subject: [PATCH 06/29] add AudioData normalization serialization tests --- src/osekit/core_api/audio_data.py | 12 +++++----- tests/test_serialization.py | 40 +++++++++++++++++++++++++++++-- 2 files changed, 44 insertions(+), 8 deletions(-) diff --git a/src/osekit/core_api/audio_data.py b/src/osekit/core_api/audio_data.py index 484c0c19..5611e77a 100644 --- a/src/osekit/core_api/audio_data.py +++ b/src/osekit/core_api/audio_data.py @@ -40,7 +40,7 @@ def __init__( end: Timestamp | None = None, sample_rate: int | None = None, instrument: Instrument | None = None, - normalization: Literal["raw", "reject_dc", "zscore"] = "raw", + normalization: Literal["raw", "dc_reject", "zscore"] = "raw", ) -> None: """Initialize an AudioData from a list of AudioItems. @@ -59,7 +59,7 @@ def __init__( instrument: Instrument | None Instrument that might be used to obtain acoustic pressure from the wav audio data. - normalization: Literal["raw","reject_dc","zscore"] + normalization: Literal["raw","dc_reject","zscore"] The type of normalization to apply to the audio data. """ @@ -330,7 +330,7 @@ def from_files( end: Timestamp | None = None, sample_rate: float | None = None, instrument: Instrument | None = None, - normalization: Literal["raw", "reject_dc", "zscore"] = "raw", + normalization: Literal["raw", "dc_reject", "zscore"] = "raw", ) -> AudioData: """Return an AudioData object from a list of AudioFiles. @@ -349,7 +349,7 @@ def from_files( instrument: Instrument | None Instrument that might be used to obtain acoustic pressure from the wav audio data. - normalization: Literal["raw","reject_dc","zscore"] + normalization: Literal["raw","dc_reject","zscore"] The type of normalization to apply to the audio data. Returns @@ -371,7 +371,7 @@ def from_base_data( data: BaseData, sample_rate: float | None = None, instrument: Instrument | None = None, - normalization: Literal["raw", "reject_dc", "zscore"] = "raw", + normalization: Literal["raw", "dc_reject", "zscore"] = "raw", ) -> AudioData: """Return an AudioData object from a BaseData object. @@ -384,7 +384,7 @@ def from_base_data( instrument: Instrument | None Instrument that might be used to obtain acoustic pressure from the wav audio data. - normalization: Literal["raw","reject_dc","zscore"] + normalization: Literal["raw","dc_reject","zscore"] The type of normalization to apply to the audio data. Returns diff --git a/tests/test_serialization.py b/tests/test_serialization.py index cc3f6e8d..fa4d994c 100644 --- a/tests/test_serialization.py +++ b/tests/test_serialization.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Literal import numpy as np import pytest @@ -27,7 +27,7 @@ @pytest.mark.parametrize( - ("audio_files", "begin", "end", "sample_rate"), + ("audio_files", "begin", "end", "sample_rate", "normalization"), [ pytest.param( { @@ -39,8 +39,22 @@ None, None, 48_000, + "raw", id="full_file_no_resample", ), + pytest.param( + { + "duration": 1, + "sample_rate": 48_000, + "nb_files": 1, + "date_begin": Timestamp("2024-01-01 12:00:00"), + }, + None, + None, + 48_000, + "zscore", + id="normalized_audio", + ), pytest.param( { "duration": 1, @@ -51,6 +65,7 @@ None, None, 24_000, + "raw", id="full_file_downsample", ), pytest.param( @@ -63,6 +78,7 @@ None, None, 96_000, + "raw", id="full_file_upsample", ), pytest.param( @@ -75,6 +91,7 @@ Timestamp("2024-01-01 12:00:01"), Timestamp("2024-01-01 12:00:02"), 48_000, + "raw", id="file_part", ), pytest.param( @@ -87,6 +104,7 @@ Timestamp("2024-01-01 12:00:01"), Timestamp("2024-01-01 12:00:02"), 24_000, + "raw", id="two_files_with_resample", ), pytest.param( @@ -100,6 +118,7 @@ Timestamp("2024-01-01 12:00:01"), Timestamp("2024-01-01 12:00:04"), 48_000, + "raw", id="two_files_with_gap", ), pytest.param( @@ -113,8 +132,23 @@ Timestamp("2024-01-01 12:00:01+0200"), Timestamp("2024-01-01 12:00:04+0200"), 48_000, + "raw", id="localized_files", ), + pytest.param( + { + "duration": 2, + "sample_rate": 48_000, + "nb_files": 2, + "inter_file_duration": 1, + "date_begin": Timestamp("2024-01-01 12:00:00+0200"), + }, + Timestamp("2024-01-01 12:00:01+0200"), + Timestamp("2024-01-01 12:00:04+0200"), + 48_000, + "dc_reject", + id="localized_normalized_files", + ), ], indirect=["audio_files"], ) @@ -124,6 +158,7 @@ def test_audio_data_serialization( begin: Timestamp | None, end: Timestamp | None, sample_rate: float, + normalization: Literal["raw", "dc_reject", "zscore"], ) -> None: audio_files, _ = audio_files @@ -132,6 +167,7 @@ def test_audio_data_serialization( begin=begin, end=end, sample_rate=sample_rate, + normalization=normalization, ) assert np.array_equal(ad.get_value(), AudioData.from_dict(ad.to_dict()).get_value()) From c4b2752131db5f57c56c6d6791e0cac7adef8f1f Mon Sep 17 00:00:00 2001 From: Gautzilla Date: Thu, 28 Aug 2025 12:18:50 +0200 Subject: [PATCH 07/29] add normalization to AudioDataset --- src/osekit/core_api/audio_dataset.py | 32 ++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/src/osekit/core_api/audio_dataset.py b/src/osekit/core_api/audio_dataset.py index 595e348c..7ea6cdfc 100644 --- a/src/osekit/core_api/audio_dataset.py +++ b/src/osekit/core_api/audio_dataset.py @@ -73,6 +73,19 @@ def sample_rate(self, sample_rate: float) -> None: for data in self.data: data.sample_rate = sample_rate + @property + def normalization(self) -> Literal["raw", "dc_reject", "zscore"]: + """Return the most frequent normalization among those of this dataset data.""" + normalizations = [data.normalization for data in self.data] + return max(set(normalizations), key=normalizations.count) + + @normalization.setter + def normalization( + self, normalization: Literal["raw", "dc_reject", "zscore"] + ) -> None: + for data in self.data: + data.normalization = normalization + @property def instrument(self) -> Instrument | None: """Instrument that can be used to get acoustic pressure from wav audio data.""" @@ -166,6 +179,7 @@ def from_folder( # noqa: PLR0913 data_duration: Timedelta | None = None, name: str | None = None, instrument: Instrument | None = None, + normalization: Literal["raw", "dc_reject", "zscore"] = "raw", **kwargs: any, ) -> AudioDataset: """Return an AudioDataset from a folder containing the audio files. @@ -207,6 +221,8 @@ def from_folder( # noqa: PLR0913 instrument: Instrument | None Instrument that might be used to obtain acoustic pressure from the wav audio data. + normalization: Literal["raw","dc_reject","zscore"] + The type of normalization to apply to the audio data. kwargs: any Keyword arguments passed to the BaseDataset.from_folder classmethod. @@ -233,6 +249,7 @@ def from_folder( # noqa: PLR0913 base_dataset=base_dataset, name=name, instrument=instrument, + normalization=normalization, ) @classmethod @@ -245,6 +262,7 @@ def from_files( # noqa: PLR0913 data_duration: Timedelta | None = None, name: str | None = None, instrument: Instrument | None = None, + normalization: Literal["raw", "dc_reject", "zscore"] = "raw", ) -> AudioDataset: """Return an AudioDataset object from a list of AudioFiles. @@ -277,6 +295,8 @@ def from_files( # noqa: PLR0913 instrument: Instrument | None Instrument that might be used to obtain acoustic pressure from the wav audio data. + normalization: Literal["raw","dc_reject","zscore"] + The type of normalization to apply to the audio data. Returns ------- @@ -291,7 +311,9 @@ def from_files( # noqa: PLR0913 mode=mode, data_duration=data_duration, ) - return cls.from_base_dataset(base, name=name, instrument=instrument) + return cls.from_base_dataset( + base, name=name, instrument=instrument, normalization=normalization + ) @classmethod def from_base_dataset( @@ -300,10 +322,16 @@ def from_base_dataset( sample_rate: float | None = None, name: str | None = None, instrument: Instrument | None = None, + normalization: Literal["raw", "dc_reject", "zscore"] = "raw", ) -> AudioDataset: """Return an AudioDataset object from a BaseDataset object.""" return cls( - [AudioData.from_base_data(data, sample_rate) for data in base_dataset.data], + [ + AudioData.from_base_data( + data=data, sample_rate=sample_rate, normalization=normalization + ) + for data in base_dataset.data + ], name=name, instrument=instrument, ) From 8f2f6b3b529f6fb3f5b5b30bf50b111ca262c150 Mon Sep 17 00:00:00 2001 From: Gautzilla Date: Fri, 29 Aug 2025 11:36:45 +0200 Subject: [PATCH 08/29] add AudioData and AudioDataset normalization tests --- tests/test_audio.py | 65 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 64 insertions(+), 1 deletion(-) diff --git a/tests/test_audio.py b/tests/test_audio.py index 2acf01c3..191ce751 100644 --- a/tests/test_audio.py +++ b/tests/test_audio.py @@ -23,7 +23,7 @@ from osekit.core_api.audio_file import AudioFile from osekit.core_api.audio_item import AudioItem from osekit.utils import audio_utils -from osekit.utils.audio_utils import generate_sample_audio +from osekit.utils.audio_utils import generate_sample_audio, normalizations @pytest.mark.parametrize( @@ -756,6 +756,69 @@ def resample_mkptch( ) == (upsampling_quality if upsampling_quality is not None else upsampling_default) +@pytest.mark.parametrize( + ("audio_files", "normalization"), + [ + pytest.param( + { + "duration": 1, + "sample_rate": 10, + "nb_files": 1, + "date_begin": pd.Timestamp("2024-01-01 12:00:00"), + "series_type": "increase", + }, + "raw", + id="no_normalization", + ), + pytest.param( + { + "duration": 1, + "sample_rate": 10, + "nb_files": 1, + "date_begin": pd.Timestamp("2024-01-01 12:00:00"), + "series_type": "increase", + }, + "dc_reject", + id="dc_reject", + ), + pytest.param( + { + "duration": 1, + "sample_rate": 10, + "nb_files": 1, + "date_begin": pd.Timestamp("2024-01-01 12:00:00"), + "series_type": "increase", + }, + "zscore", + id="z_score", + ), + ], + indirect=["audio_files"], +) +def test_normalize_audio_data( + audio_files: tuple[list[AudioFile], pytest.fixtures.Subrequest], + normalization: Literal["raw", "dc_reject", "zscore"], +) -> None: + afs, _ = audio_files + + raw_data = np.linspace(0.0, 1.0, 10) + normalized_data = normalizations[normalization](raw_data) + + if normalization == "raw": + assert np.array_equal(raw_data, normalized_data) + else: + assert not np.array_equal(raw_data, normalized_data) + + # AudioData + ad = AudioData.from_files(afs, normalization=normalization) + assert np.array_equal(ad.get_value(), normalized_data) + + # AudioDataset + ads = AudioDataset.from_files(afs, normalization=normalization) + assert ads.data[0].normalization == normalization + assert np.array_equal(ads.data[0].get_value(), normalized_data) + + @pytest.mark.parametrize( ("audio_files", "begin", "end", "mode", "duration", "expected_audio_data"), [ From b74a395cd4b16de12d3048088504503c155593a8 Mon Sep 17 00:00:00 2001 From: Gautzilla Date: Fri, 29 Aug 2025 11:53:14 +0200 Subject: [PATCH 09/29] add AudioDataset normalization serialization tests --- tests/test_serialization.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tests/test_serialization.py b/tests/test_serialization.py index fa4d994c..a8b71dd2 100644 --- a/tests/test_serialization.py +++ b/tests/test_serialization.py @@ -174,7 +174,7 @@ def test_audio_data_serialization( @pytest.mark.parametrize( - ("audio_files", "data_duration", "sample_rate", "name"), + ("audio_files", "data_duration", "sample_rate", "normalization", "name"), [ pytest.param( { @@ -184,6 +184,7 @@ def test_audio_data_serialization( }, Timedelta(seconds=1), 48_000, + "raw", None, id="one_audio_data_one_file_no_resample", ), @@ -195,6 +196,7 @@ def test_audio_data_serialization( }, Timedelta(seconds=2), 48_000, + "raw", None, id="one_audio_data_two_files_no_resample", ), @@ -206,6 +208,7 @@ def test_audio_data_serialization( }, Timedelta(seconds=2), 24_000, + "raw", None, id="one_audio_data_two_files_downsample", ), @@ -217,6 +220,7 @@ def test_audio_data_serialization( }, Timedelta(seconds=1), [12_000, 24_000, 48_000, 96_000], + "raw", None, id="multiple_audio_data_different_sample_rates", ), @@ -228,6 +232,7 @@ def test_audio_data_serialization( }, Timedelta(seconds=1), 48_000, + "raw", "merriweather post pavilion", id="named_ads", ), @@ -240,6 +245,7 @@ def test_audio_data_serialization( }, Timedelta(seconds=1), 48_000, + "raw", "merriweather post pavilion", id="localized_ads", ), @@ -251,6 +257,7 @@ def test_audio_dataset_serialization( audio_files: tuple[list[AudioFile], pytest.fixtures.Subrequest], data_duration: Timestamp | None, sample_rate: float | list[float], + normalization: Literal["raw", "dc_reject", "zscore"], name: str | None, ) -> None: audio_files, request = audio_files @@ -266,6 +273,7 @@ def test_audio_dataset_serialization( tmp_path, strptime_format=strptime_format, data_duration=data_duration, + normalization=normalization, name=name, ) @@ -295,6 +303,7 @@ def test_audio_dataset_serialization( assert ads.has_default_name == ads2.has_default_name assert ads.sample_rate == ads2.sample_rate assert ads.begin == ads2.begin + assert ads.normalization == ads2.normalization assert all( np.array_equal(ad.get_value(), ad2.get_value()) From 0ec5ebadd72bb926839456bf8a7786c78f890f11 Mon Sep 17 00:00:00 2001 From: Gautzilla Date: Fri, 29 Aug 2025 12:04:34 +0200 Subject: [PATCH 10/29] add public_api normalization in analysis --- src/osekit/public_api/analysis.py | 4 ++++ src/osekit/public_api/dataset.py | 1 + tests/test_public_api.py | 33 +++++++++++++++++++++++++++++++ 3 files changed, 38 insertions(+) diff --git a/src/osekit/public_api/analysis.py b/src/osekit/public_api/analysis.py index 0a2a5e64..af97bd95 100644 --- a/src/osekit/public_api/analysis.py +++ b/src/osekit/public_api/analysis.py @@ -68,6 +68,7 @@ def __init__( data_duration: Timedelta | None = None, mode: Literal["files", "timedelta_total", "timedelta_file"] = "timedelta_total", sample_rate: float | None = None, + normalization: Literal["raw", "dc_reject", "zscore"] = "raw", name: str | None = None, subtype: str | None = None, fft: ShortTimeFFT | None = None, @@ -106,6 +107,8 @@ def __init__( Sample rate of the new analysis data. Audio data will be resampled if provided, else the sample rate will be set to the one of the original dataset. + normalization: Literal["raw", "dc_reject", "zscore"] + The type of normalization to apply to the audio data. name: str | None Name of the analysis dataset. Defaulted as the begin timestamp of the analysis dataset. @@ -143,6 +146,7 @@ def __init__( self.mode = mode self.sample_rate = sample_rate self.name = name + self.normalization = normalization self.subtype = subtype self.fft = fft self.v_lim = v_lim diff --git a/src/osekit/public_api/dataset.py b/src/osekit/public_api/dataset.py index 875f4166..c2d73ce2 100644 --- a/src/osekit/public_api/dataset.py +++ b/src/osekit/public_api/dataset.py @@ -207,6 +207,7 @@ def get_analysis_audiodataset(self, analysis: Analysis) -> AudioDataset: end=analysis.end, data_duration=analysis.data_duration, mode=analysis.mode, + normalization=analysis.normalization, name=analysis.name, instrument=self.instrument, ) diff --git a/tests/test_public_api.py b/tests/test_public_api.py index fa3a5836..a24b2479 100644 --- a/tests/test_public_api.py +++ b/tests/test_public_api.py @@ -910,6 +910,32 @@ def test_analysis_is_spectro(analysis: Analysis, expected: bool) -> None: ], id="full_reshape", ), + pytest.param( + { + "duration": 5, + "sample_rate": 48_000, + "nb_files": 1, + "date_begin": Timestamp("2024-01-01 12:00:00"), + }, + None, + Analysis( + analysis_type=AnalysisType.AUDIO, + name=None, + begin=None, + end=None, + data_duration=None, + sample_rate=None, + normalization="zscore", + subtype="DOUBLE", + ), + [ + Event( + begin=Timestamp("2024-01-01 12:00:00"), + end=Timestamp("2024-01-01 12:00:05"), + ), + ], + id="normalized_data", + ), ], indirect=["audio_files"], ) @@ -950,6 +976,8 @@ def test_get_analysis_audiodataset( assert analysis_ds.instrument is dataset.instrument + assert analysis_ds.normalization == analysis.normalization + @pytest.mark.parametrize( ("audio_files", "instrument", "analysis", "expected_data"), @@ -1060,6 +1088,7 @@ def test_edit_analysis_before_run( new_name = "new_analysis" new_instrument = Instrument(end_to_end_db=100) new_data = ads.data[::2] + new_normalization = "zscore" ads.sample_rate = new_sr analysis.sample_rate = new_sr @@ -1067,6 +1096,7 @@ def test_edit_analysis_before_run( ads.name = new_name ads.instrument = new_instrument ads.data = new_data + ads.normalization = new_normalization dataset.run_analysis(analysis, audio_dataset=ads) @@ -1091,5 +1121,8 @@ def test_edit_analysis_before_run( assert analysis_ads.sample_rate == new_sr assert analysis_sds.fft.fs == new_sr + # Analyses have the edited normalization + assert analysis_ads.normalization == new_normalization + # Instrument has been edited assert analysis_ads.instrument.end_to_end_db == new_instrument.end_to_end_db From 1db63405e8e1b762865901cf79c41bde6506c8c7 Mon Sep 17 00:00:00 2001 From: Gautzilla Date: Mon, 1 Sep 2025 09:30:59 +0200 Subject: [PATCH 11/29] add AudioData.normalization in the docs --- docs/source/coreapi_usage.rst | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/docs/source/coreapi_usage.rst b/docs/source/coreapi_usage.rst index 261e8aa6..b2f93d80 100644 --- a/docs/source/coreapi_usage.rst +++ b/docs/source/coreapi_usage.rst @@ -110,6 +110,35 @@ The data is fetched seamlessly on-demand from the audio file(s). The opening/clo Eventual time gap between audio items are filled with ``0.`` values. +Normalization +""""""""""""" + +The fetched audio data can be normalized according to the following presets: + +.. list-table:: Normalization presets + :widths: 10 10 + :header-rows: 1 + + * - Name + - Description + * - ``raw`` + - :math:`x` + * - ``dc_reject`` + - :math:`x-\overline{ x }` + * - ``zscore`` + - :math:`\frac{ x-\overline{x} }{\sigma (x)}` + +To normalize the data, simply set the :attr:`osekit.core_api.audio_data.AudioData.normalization` property to the +requested normalization name: + +.. code-block:: python + + from osekit.core_api.audio_data.AudioData import AudioData + + ad = AudioData(...) + ad.normalization = "zscore" # Note: normalization also is a parameter of the AudioData initializer + + v = ad.get_value() # The fetched data will then be normalized Calibration """"""""""" @@ -366,4 +395,4 @@ should be provided: ltas.plot() plt.show() -A ``SpectroData`` object can be turned into a ``LTASData`` thanks to the :meth:`osekit.core_api.ltas_data.LTASData.from_spectro_data` method. \ No newline at end of file +A ``SpectroData`` object can be turned into a ``LTASData`` thanks to the :meth:`osekit.core_api.ltas_data.LTASData.from_spectro_data` method. From 640ff7fe8d926db175a1ee65e3bddecf08fbc6dc Mon Sep 17 00:00:00 2001 From: Gautzilla Date: Mon, 1 Sep 2025 09:33:18 +0200 Subject: [PATCH 12/29] add AudioDataset.normalization in the docs --- docs/source/coreapi_usage.rst | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/docs/source/coreapi_usage.rst b/docs/source/coreapi_usage.rst index b2f93d80..9d6f98b4 100644 --- a/docs/source/coreapi_usage.rst +++ b/docs/source/coreapi_usage.rst @@ -153,8 +153,8 @@ allows for retrieving the data in the shape of the recorded acoustic pressure. .. code-block:: python - from osekit.core_api.instrument import Instrument from osekit.core_api.audio_data import AudioData + from osekit.core_api.instrument import Instrument import numpy as np instrument = Instrument(end_to_end_db = 150) # The raw 1. WAV value equals 150 dB SPL re 1 uPa @@ -199,6 +199,7 @@ an ``AudioDataset`` from a given folder containing audio files: from pathlib import Path from osekit.core_api.audio_dataset import AudioDataset + from osekit.core_api.instrument import Instrument from pandas import Timestamp, Timedelta folder = Path(r"...") @@ -208,7 +209,9 @@ an ``AudioDataset`` from a given folder containing audio files: strptime_format="%y_%m_%d_%H_%M_%S", # To parse the files begin Timestamp begin=Timestamp("2009-01-06 12:00:00"), end=Timestamp("2009-01-06 14:00:00"), - data_duration=Timedelta("10s") + data_duration=Timedelta("10s"), + instrument=Instrument(end_to_end_db=150), + normalization="dc_reject" ) The resulting ``AudioDataset`` will contain 10s-long ``AudioData`` ranging from ``2009-01-06 12:00:00`` to ``2009-01-06 14:00:00``. From fe4a0d0c86a04a252c5646befd9fcda6254a82b0 Mon Sep 17 00:00:00 2001 From: Gautzilla Date: Mon, 1 Sep 2025 09:46:00 +0200 Subject: [PATCH 13/29] add public API normalization in doc --- docs/source/publicapi_usage.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/source/publicapi_usage.rst b/docs/source/publicapi_usage.rst index 24f53d50..71979cfd 100644 --- a/docs/source/publicapi_usage.rst +++ b/docs/source/publicapi_usage.rst @@ -261,6 +261,8 @@ Let's now say we want to export audio, spectrum matrices and spectrograms with t - ``10 s`` * - Sample rate - ``48 kHz`` + * - Audio data normalization + - ``dc_reject`` (removes the audio DC component) * - FFT - ``hamming window``, ``1024 points``, ``40% overlap`` @@ -291,6 +293,7 @@ Then we are all set for running the analysis: end=dataset.origin_dataset.begin + Timedelta(hours=1.5), # 1h30 after the begin of the original dataset data_duration=Timedelta("10s"), # Duration of the output data sample_rate=48_000, # Sample rate of the output data + normalization="dc_reject", name="full_analysis", # You can name the analysis, or keep the default name. fft=sft, # The FFT parameters ) From 65e0aa9d7df8493375448ac52aa1449e7d6ff0c5 Mon Sep 17 00:00:00 2001 From: Gautzilla Date: Mon, 1 Sep 2025 10:30:53 +0200 Subject: [PATCH 14/29] add AudioDataset.from_folder sample_rate parameter --- src/osekit/core_api/audio_dataset.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/osekit/core_api/audio_dataset.py b/src/osekit/core_api/audio_dataset.py index 7ea6cdfc..25246eb4 100644 --- a/src/osekit/core_api/audio_dataset.py +++ b/src/osekit/core_api/audio_dataset.py @@ -177,6 +177,7 @@ def from_folder( # noqa: PLR0913 timezone: str | pytz.timezone | None = None, mode: Literal["files", "timedelta_total", "timedelta_file"] = "timedelta_total", data_duration: Timedelta | None = None, + sample_rate: float | None = None, name: str | None = None, instrument: Instrument | None = None, normalization: Literal["raw", "dc_reject", "zscore"] = "raw", @@ -216,6 +217,8 @@ def from_folder( # noqa: PLR0913 If mode is set to "files", this parameter has no effect. If provided, audio data will be evenly distributed between begin and end. Else, one data object will cover the whole time period. + sample_rate: float | None + Sample rate of the audio data objects. name: str|None Name of the dataset. instrument: Instrument | None @@ -249,6 +252,7 @@ def from_folder( # noqa: PLR0913 base_dataset=base_dataset, name=name, instrument=instrument, + sample_rate=sample_rate, normalization=normalization, ) @@ -260,6 +264,7 @@ def from_files( # noqa: PLR0913 end: Timestamp | None = None, mode: Literal["files", "timedelta_total", "timedelta_file"] = "timedelta_total", data_duration: Timedelta | None = None, + sample_rate: float | None = None, name: str | None = None, instrument: Instrument | None = None, normalization: Literal["raw", "dc_reject", "zscore"] = "raw", @@ -290,6 +295,8 @@ def from_files( # noqa: PLR0913 If mode is set to "files", this parameter has no effect. If provided, data will be evenly distributed between begin and end. Else, one data object will cover the whole time period. + sample_rate: float | None + Sample rate of the audio data objects. name: str|None Name of the dataset. instrument: Instrument | None @@ -312,7 +319,11 @@ def from_files( # noqa: PLR0913 data_duration=data_duration, ) return cls.from_base_dataset( - base, name=name, instrument=instrument, normalization=normalization + base, + name=name, + sample_rate=sample_rate, + instrument=instrument, + normalization=normalization, ) @classmethod From 76b7a40d625cce5ab559f549211769bfad3153c4 Mon Sep 17 00:00:00 2001 From: Gautzilla Date: Mon, 1 Sep 2025 10:41:08 +0200 Subject: [PATCH 15/29] add normalization in doc notebooks --- docs/source/example_ltas.rst | 1 + docs/source/example_ltas_core.ipynb | 5 ++++- docs/source/example_ltas_public.ipynb | 1 + docs/source/example_multiple_spectrograms.rst | 1 + docs/source/example_multiple_spectrograms_core.ipynb | 4 +++- docs/source/example_multiple_spectrograms_public.ipynb | 1 + docs/source/example_reshaping_multiple_files.rst | 1 + docs/source/example_reshaping_multiple_files_core.ipynb | 2 ++ docs/source/example_reshaping_multiple_files_public.ipynb | 2 ++ docs/source/example_reshaping_one_file.ipynb | 5 +++-- docs/source/example_spectrogram.ipynb | 1 + 11 files changed, 20 insertions(+), 4 deletions(-) diff --git a/docs/source/example_ltas.rst b/docs/source/example_ltas.rst index fa12e02f..0716d7bc 100644 --- a/docs/source/example_ltas.rst +++ b/docs/source/example_ltas.rst @@ -13,6 +13,7 @@ This LTAS will: * Start at the begin of the first audio file * End at the end of the last audio file * Be downsampled at ``24 kHz`` +* Have its DC component removed | The FFT used for computing the spectrograms will use a ``1024 samples``-long hamming window. | The ``hop`` of LTAS ``ShortTimeFFT`` objects is forced to the size of the window (no overlap). diff --git a/docs/source/example_ltas_core.ipynb b/docs/source/example_ltas_core.ipynb index 00bc17ed..32f55310 100644 --- a/docs/source/example_ltas_core.ipynb +++ b/docs/source/example_ltas_core.ipynb @@ -60,7 +60,10 @@ ").data[0]\n", "\n", "# Resampling at 24 kHz\n", - "audio_data.sample_rate = 24_000" + "audio_data.sample_rate = 24_000\n", + "\n", + "# Removing the DC component\n", + "audio_data.normalization = \"dc_reject\"" ] }, { diff --git a/docs/source/example_ltas_public.ipynb b/docs/source/example_ltas_public.ipynb index 270b92f8..00fb0aa8 100644 --- a/docs/source/example_ltas_public.ipynb +++ b/docs/source/example_ltas_public.ipynb @@ -141,6 +141,7 @@ " | AnalysisType.MATRIX, # we want to export both the spectrogram and the sx matrix\n", " nb_ltas_time_bins=3000, # This will turn the regular spectrum computation in a LTAS\n", " sample_rate=sample_rate,\n", + " normalization=\"dc_reject\", # Removes the DC component\n", " fft=sft,\n", " v_lim=(0.0, 150.0), # Boundaries of the spectrograms\n", " colormap=\"viridis\", # Default value\n", diff --git a/docs/source/example_multiple_spectrograms.rst b/docs/source/example_multiple_spectrograms.rst index 42eaffa3..e5919de4 100644 --- a/docs/source/example_multiple_spectrograms.rst +++ b/docs/source/example_multiple_spectrograms.rst @@ -9,6 +9,7 @@ In this example, we want to export spectrograms drawn from the sample audio data * Last spectrogram ends at ``2022-09-25 22:36:25`` * Spectrograms represent ``5 s``-long audio data * Audio data are downsampled sampled at ``24 kHz`` before spectrograms are computed +* The DC component of the audio data is rejected before spectrograms are computed * Spectrograms that are in the gap between recordings should be skipped The FFT used for computing the spectrograms will use a ``1024 samples``-long hamming window, with a ``128 samples``-long hop. diff --git a/docs/source/example_multiple_spectrograms_core.ipynb b/docs/source/example_multiple_spectrograms_core.ipynb index c3cef480..267c0ffa 100644 --- a/docs/source/example_multiple_spectrograms_core.ipynb +++ b/docs/source/example_multiple_spectrograms_core.ipynb @@ -61,6 +61,8 @@ " end=Timestamp(\"2022-09-25 22:36:25\"),\n", " data_duration=Timedelta(seconds=5),\n", " instrument=Instrument(end_to_end_db=150.0),\n", + " sample_rate=24_000,\n", + " normalization=\"dc_reject\",\n", ")" ] }, @@ -192,7 +194,7 @@ "source": [ "import matplotlib.pyplot as plt\n", "\n", - "spectro_dataset.data[0].plot()\n", + "spectro_dataset.data[1].plot()\n", "plt.show()" ] }, diff --git a/docs/source/example_multiple_spectrograms_public.ipynb b/docs/source/example_multiple_spectrograms_public.ipynb index df6bb0f4..969aea10 100644 --- a/docs/source/example_multiple_spectrograms_public.ipynb +++ b/docs/source/example_multiple_spectrograms_public.ipynb @@ -143,6 +143,7 @@ " end=Timestamp(\"2022-09-25 22:36:25\"),\n", " data_duration=Timedelta(seconds=5),\n", " sample_rate=sample_rate,\n", + " normalization=\"dc_reject\",\n", " fft=sft,\n", " v_lim=(0.0, 150.0), # Boundaries of the spectrograms\n", " colormap=\"viridis\", # Default value\n", diff --git a/docs/source/example_reshaping_multiple_files.rst b/docs/source/example_reshaping_multiple_files.rst index 70cc2cad..ec4b20fd 100644 --- a/docs/source/example_reshaping_multiple_files.rst +++ b/docs/source/example_reshaping_multiple_files.rst @@ -9,6 +9,7 @@ In this example, we want to export reshaped files from the sample audio dataset * Last file ends at ``2022-09-25 22:36:25`` * Files are ``5 s``-long * Files are sampled at ``24 kHz`` +* Files are DC-filtered * Files that are in the gap between recordings should be skipped .. toctree:: diff --git a/docs/source/example_reshaping_multiple_files_core.ipynb b/docs/source/example_reshaping_multiple_files_core.ipynb index c97b97e7..8999d8e8 100644 --- a/docs/source/example_reshaping_multiple_files_core.ipynb +++ b/docs/source/example_reshaping_multiple_files_core.ipynb @@ -55,6 +55,8 @@ " begin=Timestamp(\"2022-09-25 22:35:15\"),\n", " end=Timestamp(\"2022-09-25 22:36:25\"),\n", " data_duration=Timedelta(seconds=5),\n", + " sample_rate=24_000,\n", + " normalization=\"dc_reject\",\n", ")" ], "outputs": [], diff --git a/docs/source/example_reshaping_multiple_files_public.ipynb b/docs/source/example_reshaping_multiple_files_public.ipynb index 971b5627..9773d6cb 100644 --- a/docs/source/example_reshaping_multiple_files_public.ipynb +++ b/docs/source/example_reshaping_multiple_files_public.ipynb @@ -111,6 +111,8 @@ " begin=Timestamp(\"2022-09-25 22:35:15\"),\n", " end=Timestamp(\"2022-09-25 22:36:25\"),\n", " data_duration=Timedelta(seconds=5),\n", + " sample_rate=24_000,\n", + " normalization=\"dc_reject\",\n", " name=\"reshape_example\",\n", ")" ], diff --git a/docs/source/example_reshaping_one_file.ipynb b/docs/source/example_reshaping_one_file.ipynb index f3aeb200..e64ebb64 100644 --- a/docs/source/example_reshaping_one_file.ipynb +++ b/docs/source/example_reshaping_one_file.ipynb @@ -86,14 +86,15 @@ "cell_type": "markdown", "id": "8ae4db7363a92148", "metadata": {}, - "source": "Simply resample the `AudioData` by setting this property:" + "source": "Simply resample and normalize the `AudioData` by setting the corresponding properties:" }, { "cell_type": "code", "id": "8c672c04078d395e", "metadata": {}, "source": [ - "audio_data.sample_rate = 24_000" + "audio_data.sample_rate = 24_000\n", + "audio_data.normalization = \"dc_reject\"" ], "outputs": [], "execution_count": null diff --git a/docs/source/example_spectrogram.ipynb b/docs/source/example_spectrogram.ipynb index facc552b..d3810c09 100644 --- a/docs/source/example_spectrogram.ipynb +++ b/docs/source/example_spectrogram.ipynb @@ -79,6 +79,7 @@ " begin=Timestamp(\"2022-09-25 22:34:55\"),\n", " end=Timestamp(\"2022-09-25 22:35:05\"),\n", " sample_rate=40000,\n", + " normalization=\"dc_reject\",\n", " instrument=Instrument(end_to_end_db=150.0),\n", ")" ] From cb04f1364a33c06271ad2c4ebc720d5ff63d30fe Mon Sep 17 00:00:00 2001 From: Gautzilla Date: Mon, 1 Sep 2025 10:42:41 +0200 Subject: [PATCH 16/29] remove reset cell from public LTAS notebook --- docs/source/example_ltas_public.ipynb | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/source/example_ltas_public.ipynb b/docs/source/example_ltas_public.ipynb index 00fb0aa8..8bf52ae6 100644 --- a/docs/source/example_ltas_public.ipynb +++ b/docs/source/example_ltas_public.ipynb @@ -197,7 +197,11 @@ "cell_type": "code", "execution_count": null, "id": "e05d653bc1e8bfe2", - "metadata": {}, + "metadata": { + "tags": [ + "remove-cell" + ] + }, "outputs": [], "source": [ "# Reset the dataset to get all files back to place.\n", From 0861c5812ed98a8517a25b1eb423d82e3c718b3f Mon Sep 17 00:00:00 2001 From: Gautzilla Date: Tue, 2 Sep 2025 17:18:23 +0200 Subject: [PATCH 17/29] change normalization to a Flag --- src/osekit/utils/audio_utils.py | 38 ++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/src/osekit/utils/audio_utils.py b/src/osekit/utils/audio_utils.py index 0ee40c0a..e0ba863d 100644 --- a/src/osekit/utils/audio_utils.py +++ b/src/osekit/utils/audio_utils.py @@ -1,5 +1,6 @@ from __future__ import annotations +import enum from typing import Literal import numpy as np @@ -124,13 +125,40 @@ def normalize_dc_reject(values: np.ndarray) -> np.ndarray: return values - values.mean() +def normalize_peak(values: np.ndarray) -> np.ndarray: + """Return values normalized so that the peak value is 1.0.""" + return values / values.max() + + def normalize_zscore(values: np.ndarray) -> np.ndarray: """Return normalized zscore from the audio data.""" return (values - values.mean()) / values.std() -normalizations = { - "raw": normalize_raw, - "dc_reject": normalize_dc_reject, - "zscore": normalize_zscore, -} +class Normalization(enum.Flag): + RAW = enum.auto() + DC_REJECT = enum.auto() + PEAK = enum.auto() + ZSCORE = enum.auto() + + def __or__(self, other) -> enum.Flag: + combined = super().__or__(other) + + # Only REJECT_DC can be combined with other normalizations + mask = combined.value & ~Normalization.DC_REJECT.value + if mask & (mask - 1): + message = "Combined normalizations can only be DC_REJECT combined with exactly one other normalization type." + raise ValueError(message) + + return combined + + +def normalize(values: np.ndarray, normalization: Normalization) -> np.ndarray: + """Normalize the audio data.""" + if Normalization.DC_REJECT in normalization: + values = normalize_dc_reject(values) + if Normalization.PEAK in normalization: + values = normalize_peak(values) + if Normalization.ZSCORE in normalization: + values = normalize_zscore(values) + return values From cf32e72f91a22b2ae5f5b69eefe68a67f6108e79 Mon Sep 17 00:00:00 2001 From: Gautzilla Date: Tue, 2 Sep 2025 17:22:27 +0200 Subject: [PATCH 18/29] adapt normalization test to new normalization system --- tests/test_utils.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index e71868e7..8831cab8 100755 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -13,7 +13,7 @@ locked, nb_files_per_batch, ) -from osekit.utils.audio_utils import normalizations +from osekit.utils.audio_utils import Normalization, normalize from osekit.utils.formatting_utils import aplose2raven from osekit.utils.path_utils import move_tree @@ -368,23 +368,32 @@ def test_get_closest_value_index( [ pytest.param( np.array([0.0, 1.0, 2.0]), - "raw", + Normalization.RAW, np.array([0.0, 1.0, 2.0]), + id="raw", ), pytest.param( np.array([0.0, 1.0, 2.0]), - "dc_reject", + Normalization.DC_REJECT, np.array([-1.0, 0.0, 1.0]), + id="dc_reject", ), pytest.param( np.array([0.0, 1.0, 2.0]), - "zscore", + Normalization.PEAK, + np.array([0.0, 0.5, 1.0]), + id="peak", + ), + pytest.param( + np.array([0.0, 1.0, 2.0]), + Normalization.ZSCORE, np.array([-1.224744871391589, 0.0, 1.224744871391589]), + id="zscore", ), ], ) def test_normalization( - values: np.ndarray, normalization: str, expected: np.ndarray + values: np.ndarray, normalization: Normalization, expected: np.ndarray ) -> None: - normalized = normalizations[normalization](values) + normalized = normalize(values=values, normalization=normalization) assert np.array_equal(normalized, expected) From 9cd31f2c925f79325a1e6791ec3e682c5326f196 Mon Sep 17 00:00:00 2001 From: Gautzilla Date: Tue, 2 Sep 2025 17:25:37 +0200 Subject: [PATCH 19/29] add combined normalization test --- tests/test_utils.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/test_utils.py b/tests/test_utils.py index 8831cab8..51a98999 100755 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -390,6 +390,12 @@ def test_get_closest_value_index( np.array([-1.224744871391589, 0.0, 1.224744871391589]), id="zscore", ), + pytest.param( + np.array([0.0, 2.0, 4.0]), + Normalization.DC_REJECT | Normalization.PEAK, + np.array([-1.0, 0.0, 1.0]), + id="dc_reject_and_peak", + ), ], ) def test_normalization( From 3df6e01c4154c694ef160cdec89649dea4a00dfc Mon Sep 17 00:00:00 2001 From: Gautzilla Date: Wed, 3 Sep 2025 11:29:14 +0200 Subject: [PATCH 20/29] use metaclass to check normalization validity on call --- src/osekit/utils/audio_utils.py | 26 +++++++----- tests/test_utils.py | 73 +++++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+), 10 deletions(-) diff --git a/src/osekit/utils/audio_utils.py b/src/osekit/utils/audio_utils.py index e0ba863d..7fa7da05 100644 --- a/src/osekit/utils/audio_utils.py +++ b/src/osekit/utils/audio_utils.py @@ -135,22 +135,28 @@ def normalize_zscore(values: np.ndarray) -> np.ndarray: return (values - values.mean()) / values.std() -class Normalization(enum.Flag): - RAW = enum.auto() - DC_REJECT = enum.auto() - PEAK = enum.auto() - ZSCORE = enum.auto() +class NormalizationValider(enum.EnumMeta): + """ + Metaclass used for validating the normalization flag, + as only REJECT_DC can be combined with (exactly) one other normalization. + """ - def __or__(self, other) -> enum.Flag: - combined = super().__or__(other) + def __call__(cls, *args, **kwargs): + instance = super().__call__(*args, **kwargs) - # Only REJECT_DC can be combined with other normalizations - mask = combined.value & ~Normalization.DC_REJECT.value + mask = instance.value & ~Normalization.DC_REJECT.value if mask & (mask - 1): message = "Combined normalizations can only be DC_REJECT combined with exactly one other normalization type." raise ValueError(message) - return combined + return instance + + +class Normalization(enum.Flag, metaclass=NormalizationValider): + RAW = enum.auto() + DC_REJECT = enum.auto() + PEAK = enum.auto() + ZSCORE = enum.auto() def normalize(values: np.ndarray, normalization: Normalization) -> np.ndarray: diff --git a/tests/test_utils.py b/tests/test_utils.py index 51a98999..e5e543c6 100755 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,7 +1,9 @@ from __future__ import annotations import time +from contextlib import nullcontext from pathlib import Path +from typing import Union import numpy as np import pandas as pd @@ -363,6 +365,77 @@ def test_get_closest_value_index( assert get_closest_value_index(values=values, target=target) == expected +@pytest.mark.parametrize( + ("normalizations", "expected"), + [ + pytest.param( + [Normalization.RAW], + nullcontext(Normalization.RAW), + id="raw_is_fine", + ), + pytest.param( + [1], + nullcontext(Normalization.RAW), + id="int_raw_is_fine", + ), + pytest.param( + [Normalization.DC_REJECT], + nullcontext(Normalization.DC_REJECT), + id="dc_reject_is_fine", + ), + pytest.param( + [Normalization.DC_REJECT, Normalization.PEAK], + nullcontext(Normalization.DC_REJECT | Normalization.PEAK), + id="dc_and_peak_is_fine", + ), + pytest.param( + [Normalization.DC_REJECT, Normalization.ZSCORE], + nullcontext(Normalization.DC_REJECT | Normalization.ZSCORE), + id="dc_and_zscore_is_fine", + ), + pytest.param( + [10], + nullcontext(Normalization.DC_REJECT | Normalization.ZSCORE), + id="int_dc_and_zscore_is_fine", + ), + pytest.param( + [Normalization.DC_REJECT, Normalization.PEAK, Normalization.ZSCORE], + pytest.raises(ValueError), + id="dc_reject_can_be_combined_with_only_one_other_value", + ), + pytest.param( + [Normalization.PEAK, Normalization.ZSCORE], + pytest.raises(ValueError), + id="combination_without_dc_raises", + ), + pytest.param( + [4, 8], + pytest.raises(ValueError), + id="int_combination_without_dc_raises", + ), + pytest.param( + [12], + pytest.raises(ValueError), + id="int_direct_combination_without_dc_raises", + ), + ], +) +def test_combined_normalization( + normalizations: list[Union[Normalization, int]], expected +) -> None: + def combine_normalizations(normalizations: list[Union[Normalization, int]]): + normalizations = [ + Normalization(n) if type(n) is int else n for n in normalizations + ] + output = normalizations[0] + for n in normalizations[1:]: + output = output | n + return output + + with expected as e: + assert combine_normalizations(normalizations) == e + + @pytest.mark.parametrize( ("values", "normalization", "expected"), [ From bf0a84d1b700c5ff4fc57ac529955c8363f81896 Mon Sep 17 00:00:00 2001 From: Gautzilla Date: Wed, 3 Sep 2025 11:33:34 +0200 Subject: [PATCH 21/29] use new Normalization flag in AudioData --- src/osekit/core_api/audio_data.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/osekit/core_api/audio_data.py b/src/osekit/core_api/audio_data.py index 5611e77a..af7eb521 100644 --- a/src/osekit/core_api/audio_data.py +++ b/src/osekit/core_api/audio_data.py @@ -7,7 +7,7 @@ from __future__ import annotations from math import ceil -from typing import TYPE_CHECKING, Literal +from typing import TYPE_CHECKING import numpy as np import soundfile as sf @@ -20,7 +20,7 @@ from osekit.core_api.audio_item import AudioItem from osekit.core_api.base_data import BaseData from osekit.core_api.instrument import Instrument -from osekit.utils.audio_utils import resample, normalizations +from osekit.utils.audio_utils import resample, Normalization, normalize if TYPE_CHECKING: from pathlib import Path @@ -40,7 +40,7 @@ def __init__( end: Timestamp | None = None, sample_rate: int | None = None, instrument: Instrument | None = None, - normalization: Literal["raw", "dc_reject", "zscore"] = "raw", + normalization: Normalization = Normalization.RAW, ) -> None: """Initialize an AudioData from a list of AudioItems. @@ -59,7 +59,7 @@ def __init__( instrument: Instrument | None Instrument that might be used to obtain acoustic pressure from the wav audio data. - normalization: Literal["raw","dc_reject","zscore"] + normalization: Normalization The type of normalization to apply to the audio data. """ @@ -82,12 +82,12 @@ def shape(self) -> tuple[int, ...] | int: return data_length if self.nb_channels <= 1 else (data_length, self.nb_channels) @property - def normalization(self) -> Literal["raw", "dc_reject", "zscore"]: + def normalization(self) -> Normalization: """The type of normalization to apply to the audio data.""" return self._normalization @normalization.setter - def normalization(self, value: Literal["raw", "dc_reject", "zscore"]) -> None: + def normalization(self, value: Normalization) -> None: self._normalization = value def __eq__(self, other: AudioData) -> bool: @@ -132,7 +132,7 @@ def get_value(self) -> np.ndarray: data[idx : idx + len(item_data)] = item_data idx += len(item_data) - return normalizations[self.normalization](data) + return normalize(data, self.normalization) def get_value_calibrated(self) -> np.ndarray: """Return the value of the audio data accounting for the calibration factor. @@ -290,7 +290,7 @@ def to_dict(self) -> dict: | instrument_dict | { "sample_rate": self.sample_rate, - "normalization": self.normalization, + "normalization": self.normalization.value, } ) @@ -318,7 +318,7 @@ def from_dict(cls, dictionary: dict) -> AudioData: return cls.from_base_data( data=base_data, sample_rate=dictionary["sample_rate"], - normalization=dictionary["normalization"], + normalization=Normalization(dictionary["normalization"]), instrument=instrument, ) @@ -330,7 +330,7 @@ def from_files( end: Timestamp | None = None, sample_rate: float | None = None, instrument: Instrument | None = None, - normalization: Literal["raw", "dc_reject", "zscore"] = "raw", + normalization: Normalization = Normalization.RAW, ) -> AudioData: """Return an AudioData object from a list of AudioFiles. @@ -349,7 +349,7 @@ def from_files( instrument: Instrument | None Instrument that might be used to obtain acoustic pressure from the wav audio data. - normalization: Literal["raw","dc_reject","zscore"] + normalization: Normalization The type of normalization to apply to the audio data. Returns @@ -371,7 +371,7 @@ def from_base_data( data: BaseData, sample_rate: float | None = None, instrument: Instrument | None = None, - normalization: Literal["raw", "dc_reject", "zscore"] = "raw", + normalization: Normalization = Normalization.RAW, ) -> AudioData: """Return an AudioData object from a BaseData object. From 91472771ad2edaa5f472d0d27beb4f68d6f6353c Mon Sep 17 00:00:00 2001 From: Gautzilla Date: Wed, 3 Sep 2025 11:38:51 +0200 Subject: [PATCH 22/29] use new Normalization flag in AudioDataset --- src/osekit/core_api/audio_dataset.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/osekit/core_api/audio_dataset.py b/src/osekit/core_api/audio_dataset.py index 25246eb4..bf38d4ed 100644 --- a/src/osekit/core_api/audio_dataset.py +++ b/src/osekit/core_api/audio_dataset.py @@ -14,6 +14,7 @@ from osekit.core_api.audio_file import AudioFile from osekit.core_api.base_dataset import BaseDataset from osekit.core_api.json_serializer import deserialize_json +from osekit.utils.audio_utils import Normalization from osekit.utils.multiprocess_utils import multiprocess if TYPE_CHECKING: @@ -74,15 +75,13 @@ def sample_rate(self, sample_rate: float) -> None: data.sample_rate = sample_rate @property - def normalization(self) -> Literal["raw", "dc_reject", "zscore"]: + def normalization(self) -> Normalization: """Return the most frequent normalization among those of this dataset data.""" normalizations = [data.normalization for data in self.data] return max(set(normalizations), key=normalizations.count) @normalization.setter - def normalization( - self, normalization: Literal["raw", "dc_reject", "zscore"] - ) -> None: + def normalization(self, normalization: Normalization) -> None: for data in self.data: data.normalization = normalization @@ -180,7 +179,7 @@ def from_folder( # noqa: PLR0913 sample_rate: float | None = None, name: str | None = None, instrument: Instrument | None = None, - normalization: Literal["raw", "dc_reject", "zscore"] = "raw", + normalization: Normalization = Normalization.RAW, **kwargs: any, ) -> AudioDataset: """Return an AudioDataset from a folder containing the audio files. @@ -224,7 +223,7 @@ def from_folder( # noqa: PLR0913 instrument: Instrument | None Instrument that might be used to obtain acoustic pressure from the wav audio data. - normalization: Literal["raw","dc_reject","zscore"] + normalization: Normalization The type of normalization to apply to the audio data. kwargs: any Keyword arguments passed to the BaseDataset.from_folder classmethod. @@ -267,7 +266,7 @@ def from_files( # noqa: PLR0913 sample_rate: float | None = None, name: str | None = None, instrument: Instrument | None = None, - normalization: Literal["raw", "dc_reject", "zscore"] = "raw", + normalization: Normalization = Normalization.RAW, ) -> AudioDataset: """Return an AudioDataset object from a list of AudioFiles. @@ -302,7 +301,7 @@ def from_files( # noqa: PLR0913 instrument: Instrument | None Instrument that might be used to obtain acoustic pressure from the wav audio data. - normalization: Literal["raw","dc_reject","zscore"] + normalization: Normalization The type of normalization to apply to the audio data. Returns @@ -333,7 +332,7 @@ def from_base_dataset( sample_rate: float | None = None, name: str | None = None, instrument: Instrument | None = None, - normalization: Literal["raw", "dc_reject", "zscore"] = "raw", + normalization: Normalization = Normalization.RAW, ) -> AudioDataset: """Return an AudioDataset object from a BaseDataset object.""" return cls( From 0392ee4c13cd06a1d5f439c6c1c414daa227ecf3 Mon Sep 17 00:00:00 2001 From: Gautzilla Date: Wed, 3 Sep 2025 14:11:40 +0200 Subject: [PATCH 23/29] use Normalization flag in the public API --- src/osekit/public_api/analysis.py | 5 +++-- tests/test_audio.py | 14 ++++++------ tests/test_public_api.py | 3 ++- tests/test_serialization.py | 37 ++++++++++++++++--------------- 4 files changed, 31 insertions(+), 28 deletions(-) diff --git a/src/osekit/public_api/analysis.py b/src/osekit/public_api/analysis.py index af97bd95..4d8d3c51 100644 --- a/src/osekit/public_api/analysis.py +++ b/src/osekit/public_api/analysis.py @@ -4,6 +4,7 @@ from typing import TYPE_CHECKING, Literal from osekit.core_api.frequency_scale import Scale +from osekit.utils.audio_utils import Normalization if TYPE_CHECKING: from pandas import Timedelta, Timestamp @@ -68,7 +69,7 @@ def __init__( data_duration: Timedelta | None = None, mode: Literal["files", "timedelta_total", "timedelta_file"] = "timedelta_total", sample_rate: float | None = None, - normalization: Literal["raw", "dc_reject", "zscore"] = "raw", + normalization: Normalization = Normalization.RAW, name: str | None = None, subtype: str | None = None, fft: ShortTimeFFT | None = None, @@ -107,7 +108,7 @@ def __init__( Sample rate of the new analysis data. Audio data will be resampled if provided, else the sample rate will be set to the one of the original dataset. - normalization: Literal["raw", "dc_reject", "zscore"] + normalization: Normalization The type of normalization to apply to the audio data. name: str | None Name of the analysis dataset. diff --git a/tests/test_audio.py b/tests/test_audio.py index 191ce751..9e67d4b1 100644 --- a/tests/test_audio.py +++ b/tests/test_audio.py @@ -23,7 +23,7 @@ from osekit.core_api.audio_file import AudioFile from osekit.core_api.audio_item import AudioItem from osekit.utils import audio_utils -from osekit.utils.audio_utils import generate_sample_audio, normalizations +from osekit.utils.audio_utils import generate_sample_audio, Normalization, normalize @pytest.mark.parametrize( @@ -767,7 +767,7 @@ def resample_mkptch( "date_begin": pd.Timestamp("2024-01-01 12:00:00"), "series_type": "increase", }, - "raw", + Normalization.RAW, id="no_normalization", ), pytest.param( @@ -778,7 +778,7 @@ def resample_mkptch( "date_begin": pd.Timestamp("2024-01-01 12:00:00"), "series_type": "increase", }, - "dc_reject", + Normalization.DC_REJECT, id="dc_reject", ), pytest.param( @@ -789,7 +789,7 @@ def resample_mkptch( "date_begin": pd.Timestamp("2024-01-01 12:00:00"), "series_type": "increase", }, - "zscore", + Normalization.ZSCORE, id="z_score", ), ], @@ -797,14 +797,14 @@ def resample_mkptch( ) def test_normalize_audio_data( audio_files: tuple[list[AudioFile], pytest.fixtures.Subrequest], - normalization: Literal["raw", "dc_reject", "zscore"], + normalization: Normalization, ) -> None: afs, _ = audio_files raw_data = np.linspace(0.0, 1.0, 10) - normalized_data = normalizations[normalization](raw_data) + normalized_data = normalize(values=raw_data, normalization=normalization) - if normalization == "raw": + if normalization == Normalization.RAW: assert np.array_equal(raw_data, normalized_data) else: assert not np.array_equal(raw_data, normalized_data) diff --git a/tests/test_public_api.py b/tests/test_public_api.py index a24b2479..4e8912b8 100644 --- a/tests/test_public_api.py +++ b/tests/test_public_api.py @@ -19,6 +19,7 @@ from osekit.core_api.spectro_dataset import SpectroDataset from osekit.public_api.analysis import Analysis, AnalysisType from osekit.public_api.dataset import Dataset +from osekit.utils.audio_utils import Normalization @pytest.mark.parametrize( @@ -1088,7 +1089,7 @@ def test_edit_analysis_before_run( new_name = "new_analysis" new_instrument = Instrument(end_to_end_db=100) new_data = ads.data[::2] - new_normalization = "zscore" + new_normalization = Normalization.ZSCORE ads.sample_rate = new_sr analysis.sample_rate = new_sr diff --git a/tests/test_serialization.py b/tests/test_serialization.py index a8b71dd2..474c5116 100644 --- a/tests/test_serialization.py +++ b/tests/test_serialization.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Literal +from typing import TYPE_CHECKING import numpy as np import pytest @@ -21,6 +21,7 @@ from osekit.core_api.spectro_data import SpectroData from osekit.core_api.spectro_dataset import SpectroDataset from osekit.core_api.spectro_file import SpectroFile +from osekit.utils.audio_utils import Normalization if TYPE_CHECKING: from pathlib import Path @@ -39,7 +40,7 @@ None, None, 48_000, - "raw", + Normalization.RAW, id="full_file_no_resample", ), pytest.param( @@ -52,7 +53,7 @@ None, None, 48_000, - "zscore", + Normalization.ZSCORE, id="normalized_audio", ), pytest.param( @@ -65,7 +66,7 @@ None, None, 24_000, - "raw", + Normalization.RAW, id="full_file_downsample", ), pytest.param( @@ -78,7 +79,7 @@ None, None, 96_000, - "raw", + Normalization.RAW, id="full_file_upsample", ), pytest.param( @@ -91,7 +92,7 @@ Timestamp("2024-01-01 12:00:01"), Timestamp("2024-01-01 12:00:02"), 48_000, - "raw", + Normalization.RAW, id="file_part", ), pytest.param( @@ -104,7 +105,7 @@ Timestamp("2024-01-01 12:00:01"), Timestamp("2024-01-01 12:00:02"), 24_000, - "raw", + Normalization.RAW, id="two_files_with_resample", ), pytest.param( @@ -118,7 +119,7 @@ Timestamp("2024-01-01 12:00:01"), Timestamp("2024-01-01 12:00:04"), 48_000, - "raw", + Normalization.RAW, id="two_files_with_gap", ), pytest.param( @@ -132,7 +133,7 @@ Timestamp("2024-01-01 12:00:01+0200"), Timestamp("2024-01-01 12:00:04+0200"), 48_000, - "raw", + Normalization.RAW, id="localized_files", ), pytest.param( @@ -146,7 +147,7 @@ Timestamp("2024-01-01 12:00:01+0200"), Timestamp("2024-01-01 12:00:04+0200"), 48_000, - "dc_reject", + Normalization.DC_REJECT, id="localized_normalized_files", ), ], @@ -158,7 +159,7 @@ def test_audio_data_serialization( begin: Timestamp | None, end: Timestamp | None, sample_rate: float, - normalization: Literal["raw", "dc_reject", "zscore"], + normalization: Normalization, ) -> None: audio_files, _ = audio_files @@ -184,7 +185,7 @@ def test_audio_data_serialization( }, Timedelta(seconds=1), 48_000, - "raw", + Normalization.RAW, None, id="one_audio_data_one_file_no_resample", ), @@ -196,7 +197,7 @@ def test_audio_data_serialization( }, Timedelta(seconds=2), 48_000, - "raw", + Normalization.RAW, None, id="one_audio_data_two_files_no_resample", ), @@ -208,7 +209,7 @@ def test_audio_data_serialization( }, Timedelta(seconds=2), 24_000, - "raw", + Normalization.RAW, None, id="one_audio_data_two_files_downsample", ), @@ -220,7 +221,7 @@ def test_audio_data_serialization( }, Timedelta(seconds=1), [12_000, 24_000, 48_000, 96_000], - "raw", + Normalization.RAW, None, id="multiple_audio_data_different_sample_rates", ), @@ -232,7 +233,7 @@ def test_audio_data_serialization( }, Timedelta(seconds=1), 48_000, - "raw", + Normalization.RAW, "merriweather post pavilion", id="named_ads", ), @@ -245,7 +246,7 @@ def test_audio_data_serialization( }, Timedelta(seconds=1), 48_000, - "raw", + Normalization.RAW, "merriweather post pavilion", id="localized_ads", ), @@ -257,7 +258,7 @@ def test_audio_dataset_serialization( audio_files: tuple[list[AudioFile], pytest.fixtures.Subrequest], data_duration: Timestamp | None, sample_rate: float | list[float], - normalization: Literal["raw", "dc_reject", "zscore"], + normalization: Normalization, name: str | None, ) -> None: audio_files, request = audio_files From b99efcf9ee95e42c9322608f29e4a1f75825aefc Mon Sep 17 00:00:00 2001 From: Gautzilla Date: Wed, 3 Sep 2025 14:20:04 +0200 Subject: [PATCH 24/29] use Normalization flag in example notebooks --- docs/source/example_ltas_core.ipynb | 3 +- docs/source/example_ltas_public.ipynb | 3 +- .../example_multiple_spectrograms_core.ipynb | 3 +- ...example_multiple_spectrograms_public.ipynb | 3 +- ...xample_reshaping_multiple_files_core.ipynb | 3 +- ...mple_reshaping_multiple_files_public.ipynb | 52 ++++++++++--------- docs/source/example_reshaping_one_file.ipynb | 46 ++++++++-------- docs/source/example_spectrogram.ipynb | 3 +- 8 files changed, 63 insertions(+), 53 deletions(-) diff --git a/docs/source/example_ltas_core.ipynb b/docs/source/example_ltas_core.ipynb index 32f55310..a963ec87 100644 --- a/docs/source/example_ltas_core.ipynb +++ b/docs/source/example_ltas_core.ipynb @@ -51,6 +51,7 @@ "audio_folder = Path(r\"_static/sample_audio\")\n", "\n", "from osekit.core_api.audio_dataset import AudioDataset\n", + "from osekit.utils.audio_utils import Normalization\n", "from osekit.core_api.instrument import Instrument\n", "\n", "audio_data = AudioDataset.from_folder(\n", @@ -63,7 +64,7 @@ "audio_data.sample_rate = 24_000\n", "\n", "# Removing the DC component\n", - "audio_data.normalization = \"dc_reject\"" + "audio_data.normalization = Normalization.DC_REJECT" ] }, { diff --git a/docs/source/example_ltas_public.ipynb b/docs/source/example_ltas_public.ipynb index 8bf52ae6..e6fd8a76 100644 --- a/docs/source/example_ltas_public.ipynb +++ b/docs/source/example_ltas_public.ipynb @@ -134,6 +134,7 @@ "metadata": {}, "outputs": [], "source": [ + "from osekit.utils.audio_utils import Normalization\n", "from osekit.public_api.analysis import Analysis, AnalysisType\n", "\n", "analysis = Analysis(\n", @@ -141,7 +142,7 @@ " | AnalysisType.MATRIX, # we want to export both the spectrogram and the sx matrix\n", " nb_ltas_time_bins=3000, # This will turn the regular spectrum computation in a LTAS\n", " sample_rate=sample_rate,\n", - " normalization=\"dc_reject\", # Removes the DC component\n", + " normalization=Normalization.DC_REJECT, # Removes the DC component\n", " fft=sft,\n", " v_lim=(0.0, 150.0), # Boundaries of the spectrograms\n", " colormap=\"viridis\", # Default value\n", diff --git a/docs/source/example_multiple_spectrograms_core.ipynb b/docs/source/example_multiple_spectrograms_core.ipynb index 267c0ffa..c1e6f337 100644 --- a/docs/source/example_multiple_spectrograms_core.ipynb +++ b/docs/source/example_multiple_spectrograms_core.ipynb @@ -52,6 +52,7 @@ "\n", "from osekit.core_api.audio_dataset import AudioDataset\n", "from osekit.core_api.instrument import Instrument\n", + "from osekit.utils.audio_utils import Normalization\n", "from pandas import Timestamp, Timedelta\n", "\n", "audio_dataset = AudioDataset.from_folder(\n", @@ -62,7 +63,7 @@ " data_duration=Timedelta(seconds=5),\n", " instrument=Instrument(end_to_end_db=150.0),\n", " sample_rate=24_000,\n", - " normalization=\"dc_reject\",\n", + " normalization=Normalization.DC_REJECT,\n", ")" ] }, diff --git a/docs/source/example_multiple_spectrograms_public.ipynb b/docs/source/example_multiple_spectrograms_public.ipynb index 969aea10..20e808e8 100644 --- a/docs/source/example_multiple_spectrograms_public.ipynb +++ b/docs/source/example_multiple_spectrograms_public.ipynb @@ -133,6 +133,7 @@ "outputs": [], "source": [ "from osekit.public_api.analysis import Analysis, AnalysisType\n", + "from osekit.utils.audio_utils import Normalization\n", "from pandas import Timestamp, Timedelta\n", "\n", "analysis = Analysis(\n", @@ -143,7 +144,7 @@ " end=Timestamp(\"2022-09-25 22:36:25\"),\n", " data_duration=Timedelta(seconds=5),\n", " sample_rate=sample_rate,\n", - " normalization=\"dc_reject\",\n", + " normalization=Normalization.DC_REJECT,\n", " fft=sft,\n", " v_lim=(0.0, 150.0), # Boundaries of the spectrograms\n", " colormap=\"viridis\", # Default value\n", diff --git a/docs/source/example_reshaping_multiple_files_core.ipynb b/docs/source/example_reshaping_multiple_files_core.ipynb index 8999d8e8..3651f295 100644 --- a/docs/source/example_reshaping_multiple_files_core.ipynb +++ b/docs/source/example_reshaping_multiple_files_core.ipynb @@ -47,6 +47,7 @@ "audio_folder = Path(r\"_static/sample_audio\")\n", "\n", "from osekit.core_api.audio_dataset import AudioDataset\n", + "from osekit.utils.audio_utils import Normalization\n", "from pandas import Timestamp, Timedelta\n", "\n", "audio_dataset = AudioDataset.from_folder(\n", @@ -56,7 +57,7 @@ " end=Timestamp(\"2022-09-25 22:36:25\"),\n", " data_duration=Timedelta(seconds=5),\n", " sample_rate=24_000,\n", - " normalization=\"dc_reject\",\n", + " normalization=Normalization.DC_REJECT,\n", ")" ], "outputs": [], diff --git a/docs/source/example_reshaping_multiple_files_public.ipynb b/docs/source/example_reshaping_multiple_files_public.ipynb index 9773d6cb..66a89228 100644 --- a/docs/source/example_reshaping_multiple_files_public.ipynb +++ b/docs/source/example_reshaping_multiple_files_public.ipynb @@ -2,20 +2,22 @@ "cells": [ { "cell_type": "code", + "execution_count": null, "id": "dc7ebca70b3b5da", "metadata": { "tags": [ "remove-cell" ] }, + "outputs": [], "source": [ "# Executing this cell will disable all TQDM outputs in stdout.\n", "import os\n", "\n", + "from osekit.utils.audio_utils import Normalization\n", + "\n", "os.environ[\"DISABLE_TQDM\"] = \"True\"" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "markdown", @@ -35,10 +37,12 @@ }, { "cell_type": "code", + "execution_count": null, "id": "bb002105fc9632e8", "metadata": { "tags": [] }, + "outputs": [], "source": [ "from pathlib import Path\n", "\n", @@ -52,9 +56,7 @@ ")\n", "\n", "dataset.build()" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "markdown", @@ -64,8 +66,10 @@ }, { "cell_type": "code", + "execution_count": null, "id": "a29c761d4bbd5303", "metadata": {}, + "outputs": [], "source": [ "print(f\"{' DATASET ':#^60}\")\n", "print(f\"{'Begin:':<30}{str(dataset.origin_dataset.begin):>30}\")\n", @@ -86,9 +90,7 @@ " for f in dataset.origin_files\n", " ],\n", ").set_index(\"Name\")" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "markdown", @@ -98,10 +100,12 @@ }, { "cell_type": "code", + "execution_count": null, "id": "b4c2c3857ffcb60f", "metadata": { "tags": [] }, + "outputs": [], "source": [ "from osekit.public_api.analysis import Analysis, AnalysisType\n", "from pandas import Timestamp, Timedelta\n", @@ -112,12 +116,10 @@ " end=Timestamp(\"2022-09-25 22:36:25\"),\n", " data_duration=Timedelta(seconds=5),\n", " sample_rate=24_000,\n", - " normalization=\"dc_reject\",\n", + " normalization=Normalization.DC_REJECT,\n", " name=\"reshape_example\",\n", ")" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "markdown", @@ -130,19 +132,19 @@ }, { "cell_type": "code", + "execution_count": null, "id": "2f799a26d626e418", "metadata": { "tags": [] }, + "outputs": [], "source": [ "# Returns a Core API AudioDataset that matches the analysis\n", "audio_dataset = dataset.get_analysis_audiodataset(analysis=analysis)\n", "\n", "# Filter the returned AudioDataset\n", "audio_dataset.data = [ad for ad in audio_dataset.data if not ad.is_empty]" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "markdown", @@ -152,15 +154,15 @@ }, { "cell_type": "code", + "execution_count": null, "id": "9b65cfdc720d50e6", "metadata": { "tags": [] }, + "outputs": [], "source": [ "dataset.run_analysis(analysis=analysis, audio_dataset=audio_dataset)" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "markdown", @@ -170,8 +172,10 @@ }, { "cell_type": "code", + "execution_count": null, "id": "3cb0adbb96d2251a", "metadata": {}, + "outputs": [], "source": [ "pd.DataFrame(\n", " [\n", @@ -184,25 +188,23 @@ " for ad in dataset.get_dataset(analysis.name).data\n", " ],\n", ").set_index(\"Exported file\")" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "id": "58b7aec2d8863a02", "metadata": { "tags": [ "remove-cell" ] }, + "outputs": [], "source": [ "# Reset the dataset to get all files back to place.\n", "\n", "dataset.reset()" - ], - "outputs": [], - "execution_count": null + ] } ], "metadata": { diff --git a/docs/source/example_reshaping_one_file.ipynb b/docs/source/example_reshaping_one_file.ipynb index e64ebb64..37dbd833 100644 --- a/docs/source/example_reshaping_one_file.ipynb +++ b/docs/source/example_reshaping_one_file.ipynb @@ -24,10 +24,12 @@ }, { "cell_type": "code", + "execution_count": null, "id": "initial_id", "metadata": { "collapsed": true }, + "outputs": [], "source": [ "from pathlib import Path\n", "from osekit.core_api.audio_file import AudioFile\n", @@ -36,9 +38,7 @@ " path=Path(r\"_static/sample_audio/sample_220925_223450.wav\"),\n", " strptime_format=\"%y%m%d_%H%M%S\",\n", ")" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "markdown", @@ -48,16 +48,16 @@ }, { "cell_type": "code", + "execution_count": null, "id": "8df490fff812332", "metadata": {}, + "outputs": [], "source": [ "print(f\"{' FILE ':#^60}\")\n", "print(f\"{'Begin:':<30}{str(audio_file.begin):>30}\")\n", "print(f\"{'End:':<30}{str(audio_file.end):>30}\")\n", "print(f\"{'Sample rate:':<30}{str(audio_file.sample_rate):>30}\")" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "markdown", @@ -67,8 +67,10 @@ }, { "cell_type": "code", + "execution_count": null, "id": "63edf1d81811bf4b", "metadata": {}, + "outputs": [], "source": [ "from osekit.core_api.audio_data import AudioData\n", "from pandas import Timestamp\n", @@ -78,9 +80,7 @@ " begin=Timestamp(\"2022-09-25 22:34:52\"),\n", " end=Timestamp(\"2022-09-25 22:34:56\"),\n", ")" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "markdown", @@ -90,14 +90,16 @@ }, { "cell_type": "code", + "execution_count": null, "id": "8c672c04078d395e", "metadata": {}, + "outputs": [], "source": [ + "from osekit.utils.audio_utils import Normalization\n", + "\n", "audio_data.sample_rate = 24_000\n", - "audio_data.normalization = \"dc_reject\"" - ], - "outputs": [], - "execution_count": null + "audio_data.normalization = Normalization.DC_REJECT # Removes the DC component" + ] }, { "cell_type": "markdown", @@ -107,16 +109,16 @@ }, { "cell_type": "code", + "execution_count": null, "id": "9b3faf84f6e45cf2", "metadata": {}, + "outputs": [], "source": [ "print(f\"{' AUDIO DATA ':#^60}\")\n", "print(f\"{'Begin:':<30}{str(audio_data.begin):>30}\")\n", "print(f\"{'End:':<30}{str(audio_data.end):>30}\")\n", "print(f\"{'Sample rate:':<30}{str(audio_data.sample_rate):>30}\")" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "markdown", @@ -126,17 +128,17 @@ }, { "cell_type": "code", + "execution_count": null, "id": "ddc9e9c82bca1c41", "metadata": {}, + "outputs": [], "source": [ "wav_data = audio_data.get_value()\n", "\n", "print(\n", " f\"WAV data should be {int(audio_data.duration.total_seconds())}*{audio_data.sample_rate:_} samples long: {len(wav_data):_} == {int(audio_data.duration.total_seconds()) * audio_data.sample_rate:_} samples\"\n", ")" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "markdown", @@ -146,17 +148,17 @@ }, { "cell_type": "code", + "execution_count": null, "id": "2d35931b8c9def2e", "metadata": { "tags": [ "skip-execution" ] }, + "outputs": [], "source": [ "audio_data.write(Path(r\"../docs/source/_static/sample_audio/exported_files/\"))" - ], - "outputs": [], - "execution_count": null + ] } ], "metadata": { diff --git a/docs/source/example_spectrogram.ipynb b/docs/source/example_spectrogram.ipynb index d3810c09..436d89fb 100644 --- a/docs/source/example_spectrogram.ipynb +++ b/docs/source/example_spectrogram.ipynb @@ -72,6 +72,7 @@ "source": [ "from osekit.core_api.audio_data import AudioData\n", "from osekit.core_api.instrument import Instrument\n", + "from osekit.utils.audio_utils import Normalization\n", "from pandas import Timestamp\n", "\n", "audio_data = AudioData.from_files(\n", @@ -79,7 +80,7 @@ " begin=Timestamp(\"2022-09-25 22:34:55\"),\n", " end=Timestamp(\"2022-09-25 22:35:05\"),\n", " sample_rate=40000,\n", - " normalization=\"dc_reject\",\n", + " normalization=Normalization.DC_REJECT, # Rejects the DC component\n", " instrument=Instrument(end_to_end_db=150.0),\n", ")" ] From 93f82b4f45553ef9aefcc2a8da1c25ef43bf0d78 Mon Sep 17 00:00:00 2001 From: Gautzilla Date: Wed, 3 Sep 2025 14:26:20 +0200 Subject: [PATCH 25/29] update docs with Normalization flag --- docs/source/coreapi_usage.rst | 36 +++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/docs/source/coreapi_usage.rst b/docs/source/coreapi_usage.rst index 9d6f98b4..5d857cd8 100644 --- a/docs/source/coreapi_usage.rst +++ b/docs/source/coreapi_usage.rst @@ -113,7 +113,7 @@ Eventual time gap between audio items are filled with ``0.`` values. Normalization """"""""""""" -The fetched audio data can be normalized according to the following presets: +The fetched audio data can be normalized according to the presets given by the :class:`osekit.utils.audio_utils.normalization` flag: .. list-table:: Normalization presets :widths: 10 10 @@ -121,25 +121,49 @@ The fetched audio data can be normalized according to the following presets: * - Name - Description - * - ``raw`` + * - ``Normalization.RAW`` - :math:`x` - * - ``dc_reject`` + * - ``Normalization.DC_REJECT`` - :math:`x-\overline{ x }` - * - ``zscore`` + * - ``Normalization.PEAK`` + - :math:`\frac{x}{x_\text{max}}` + * - ``Normalization.ZSCORE`` - :math:`\frac{ x-\overline{x} }{\sigma (x)}` To normalize the data, simply set the :attr:`osekit.core_api.audio_data.AudioData.normalization` property to the -requested normalization name: +requested normalization flag: .. code-block:: python from osekit.core_api.audio_data.AudioData import AudioData + from osekit.utils.audio_utils.normalization import Normalization ad = AudioData(...) - ad.normalization = "zscore" # Note: normalization also is a parameter of the AudioData initializer + ad.normalization = Normalization.ZSCORE # Note: normalization also is a parameter of the AudioData initializer v = ad.get_value() # The fetched data will then be normalized +.. note:: + + The ``Normalization.DC_REJECT`` normalization can be combined with any single other normalization: + + .. code-block:: python + + from osekit.utils.audio_utils.normalization import Normalization + + dc_peak = Normalization.DC_REJECT | Normalization.PEAK + +.. warning:: + + Instantiating another combination of normalizations will raise an error: + + .. code-block:: python + + from osekit.utils.audio_utils.normalization import Normalization + + incorrect_normalization = Normalization.RAW | Normalization.PEAK + incorrect_normalization = Normalization.DC_REJECT | Normalization.RAW | Normalization.PEAK + Calibration """"""""""" From 54f8ccd8b73ebdec10c70b526268bd296cee988b Mon Sep 17 00:00:00 2001 From: Gautzilla Date: Wed, 3 Sep 2025 14:37:17 +0200 Subject: [PATCH 26/29] add Normalization flag to API doc --- docs/source/api.rst | 3 ++- docs/source/coreapi_usage.rst | 2 +- docs/source/normalization.rst | 7 +++++++ docs/source/utils.rst | 9 +++++++++ 4 files changed, 19 insertions(+), 2 deletions(-) create mode 100644 docs/source/normalization.rst create mode 100644 docs/source/utils.rst diff --git a/docs/source/api.rst b/docs/source/api.rst index c27a52f3..b4e33a21 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -5,4 +5,5 @@ :maxdepth: 2 publicapi - coreapi \ No newline at end of file + coreapi + utils diff --git a/docs/source/coreapi_usage.rst b/docs/source/coreapi_usage.rst index 5d857cd8..76e75963 100644 --- a/docs/source/coreapi_usage.rst +++ b/docs/source/coreapi_usage.rst @@ -113,7 +113,7 @@ Eventual time gap between audio items are filled with ``0.`` values. Normalization """"""""""""" -The fetched audio data can be normalized according to the presets given by the :class:`osekit.utils.audio_utils.normalization` flag: +The fetched audio data can be normalized according to the presets given by the :class:`osekit.utils.audio_utils.Normalization` flag: .. list-table:: Normalization presets :widths: 10 10 diff --git a/docs/source/normalization.rst b/docs/source/normalization.rst new file mode 100644 index 00000000..6c6cbfd3 --- /dev/null +++ b/docs/source/normalization.rst @@ -0,0 +1,7 @@ +Normalization +------------- + +.. _normalization: + +.. autoclass:: osekit.utils.audio_utils.Normalization + :members: diff --git a/docs/source/utils.rst b/docs/source/utils.rst new file mode 100644 index 00000000..0d130483 --- /dev/null +++ b/docs/source/utils.rst @@ -0,0 +1,9 @@ +Utils +----- + +.. _utils: + +.. toctree:: + :maxdepth: 1 + + normalization From f62386cd965e8221fed84c48f4320288b094543e Mon Sep 17 00:00:00 2001 From: Gauthier BERTHOMIEU Date: Fri, 5 Sep 2025 11:36:59 +0200 Subject: [PATCH 27/29] add negative peak tests --- tests/test_utils.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/test_utils.py b/tests/test_utils.py index e5e543c6..22cb1686 100755 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -457,6 +457,18 @@ def combine_normalizations(normalizations: list[Union[Normalization, int]]): np.array([0.0, 0.5, 1.0]), id="peak", ), + pytest.param( + np.array([-0.25, 0.5, 0.0]), + Normalization.PEAK, + np.array([-0.5, 1.0, 0.0]), + id="peak_with_negative_values", + ), + pytest.param( + np.array([-0.5, 0.25, 0.0]), + Normalization.PEAK, + np.array([-1.0, 0.5, 0.0]), + id="peak_with_negative_max", + ), pytest.param( np.array([0.0, 1.0, 2.0]), Normalization.ZSCORE, From c04c0f533096d5ca23f5320d09ac9c1a9ae10ec9 Mon Sep 17 00:00:00 2001 From: Gauthier BERTHOMIEU Date: Fri, 5 Sep 2025 11:39:23 +0200 Subject: [PATCH 28/29] fix peak normalization with negative values --- src/osekit/utils/audio_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/osekit/utils/audio_utils.py b/src/osekit/utils/audio_utils.py index 7fa7da05..fc70c3bc 100644 --- a/src/osekit/utils/audio_utils.py +++ b/src/osekit/utils/audio_utils.py @@ -127,7 +127,7 @@ def normalize_dc_reject(values: np.ndarray) -> np.ndarray: def normalize_peak(values: np.ndarray) -> np.ndarray: """Return values normalized so that the peak value is 1.0.""" - return values / values.max() + return values / max(abs(values)) def normalize_zscore(values: np.ndarray) -> np.ndarray: From 60c0634955ab16f57a59c02aca5e97a329248b75 Mon Sep 17 00:00:00 2001 From: Gauthier BERTHOMIEU Date: Thu, 11 Sep 2025 17:12:22 +0200 Subject: [PATCH 29/29] move Normalization import to idoine cell --- docs/source/example_reshaping_multiple_files_public.ipynb | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/source/example_reshaping_multiple_files_public.ipynb b/docs/source/example_reshaping_multiple_files_public.ipynb index 66a89228..3d44c60e 100644 --- a/docs/source/example_reshaping_multiple_files_public.ipynb +++ b/docs/source/example_reshaping_multiple_files_public.ipynb @@ -14,8 +14,6 @@ "# Executing this cell will disable all TQDM outputs in stdout.\n", "import os\n", "\n", - "from osekit.utils.audio_utils import Normalization\n", - "\n", "os.environ[\"DISABLE_TQDM\"] = \"True\"" ] }, @@ -108,6 +106,7 @@ "outputs": [], "source": [ "from osekit.public_api.analysis import Analysis, AnalysisType\n", + "from osekit.utils.audio_utils import Normalization\n", "from pandas import Timestamp, Timedelta\n", "\n", "analysis = Analysis(\n",