diff --git a/docs/source/api.rst b/docs/source/api.rst index c27a52f3..b4e33a21 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -5,4 +5,5 @@ :maxdepth: 2 publicapi - coreapi \ No newline at end of file + coreapi + utils diff --git a/docs/source/coreapi_usage.rst b/docs/source/coreapi_usage.rst index 261e8aa6..76e75963 100644 --- a/docs/source/coreapi_usage.rst +++ b/docs/source/coreapi_usage.rst @@ -110,6 +110,59 @@ The data is fetched seamlessly on-demand from the audio file(s). The opening/clo Eventual time gap between audio items are filled with ``0.`` values. +Normalization +""""""""""""" + +The fetched audio data can be normalized according to the presets given by the :class:`osekit.utils.audio_utils.Normalization` flag: + +.. list-table:: Normalization presets + :widths: 10 10 + :header-rows: 1 + + * - Name + - Description + * - ``Normalization.RAW`` + - :math:`x` + * - ``Normalization.DC_REJECT`` + - :math:`x-\overline{ x }` + * - ``Normalization.PEAK`` + - :math:`\frac{x}{x_\text{max}}` + * - ``Normalization.ZSCORE`` + - :math:`\frac{ x-\overline{x} }{\sigma (x)}` + +To normalize the data, simply set the :attr:`osekit.core_api.audio_data.AudioData.normalization` property to the +requested normalization flag: + +.. code-block:: python + + from osekit.core_api.audio_data.AudioData import AudioData + from osekit.utils.audio_utils.normalization import Normalization + + ad = AudioData(...) + ad.normalization = Normalization.ZSCORE # Note: normalization also is a parameter of the AudioData initializer + + v = ad.get_value() # The fetched data will then be normalized + +.. note:: + + The ``Normalization.DC_REJECT`` normalization can be combined with any single other normalization: + + .. code-block:: python + + from osekit.utils.audio_utils.normalization import Normalization + + dc_peak = Normalization.DC_REJECT | Normalization.PEAK + +.. warning:: + + Instantiating another combination of normalizations will raise an error: + + .. code-block:: python + + from osekit.utils.audio_utils.normalization import Normalization + + incorrect_normalization = Normalization.RAW | Normalization.PEAK + incorrect_normalization = Normalization.DC_REJECT | Normalization.RAW | Normalization.PEAK Calibration """"""""""" @@ -124,8 +177,8 @@ allows for retrieving the data in the shape of the recorded acoustic pressure. .. code-block:: python - from osekit.core_api.instrument import Instrument from osekit.core_api.audio_data import AudioData + from osekit.core_api.instrument import Instrument import numpy as np instrument = Instrument(end_to_end_db = 150) # The raw 1. WAV value equals 150 dB SPL re 1 uPa @@ -170,6 +223,7 @@ an ``AudioDataset`` from a given folder containing audio files: from pathlib import Path from osekit.core_api.audio_dataset import AudioDataset + from osekit.core_api.instrument import Instrument from pandas import Timestamp, Timedelta folder = Path(r"...") @@ -179,7 +233,9 @@ an ``AudioDataset`` from a given folder containing audio files: strptime_format="%y_%m_%d_%H_%M_%S", # To parse the files begin Timestamp begin=Timestamp("2009-01-06 12:00:00"), end=Timestamp("2009-01-06 14:00:00"), - data_duration=Timedelta("10s") + data_duration=Timedelta("10s"), + instrument=Instrument(end_to_end_db=150), + normalization="dc_reject" ) The resulting ``AudioDataset`` will contain 10s-long ``AudioData`` ranging from ``2009-01-06 12:00:00`` to ``2009-01-06 14:00:00``. @@ -366,4 +422,4 @@ should be provided: ltas.plot() plt.show() -A ``SpectroData`` object can be turned into a ``LTASData`` thanks to the :meth:`osekit.core_api.ltas_data.LTASData.from_spectro_data` method. \ No newline at end of file +A ``SpectroData`` object can be turned into a ``LTASData`` thanks to the :meth:`osekit.core_api.ltas_data.LTASData.from_spectro_data` method. diff --git a/docs/source/example_ltas.rst b/docs/source/example_ltas.rst index fa12e02f..0716d7bc 100644 --- a/docs/source/example_ltas.rst +++ b/docs/source/example_ltas.rst @@ -13,6 +13,7 @@ This LTAS will: * Start at the begin of the first audio file * End at the end of the last audio file * Be downsampled at ``24 kHz`` +* Have its DC component removed | The FFT used for computing the spectrograms will use a ``1024 samples``-long hamming window. | The ``hop`` of LTAS ``ShortTimeFFT`` objects is forced to the size of the window (no overlap). diff --git a/docs/source/example_ltas_core.ipynb b/docs/source/example_ltas_core.ipynb index 00bc17ed..a963ec87 100644 --- a/docs/source/example_ltas_core.ipynb +++ b/docs/source/example_ltas_core.ipynb @@ -51,6 +51,7 @@ "audio_folder = Path(r\"_static/sample_audio\")\n", "\n", "from osekit.core_api.audio_dataset import AudioDataset\n", + "from osekit.utils.audio_utils import Normalization\n", "from osekit.core_api.instrument import Instrument\n", "\n", "audio_data = AudioDataset.from_folder(\n", @@ -60,7 +61,10 @@ ").data[0]\n", "\n", "# Resampling at 24 kHz\n", - "audio_data.sample_rate = 24_000" + "audio_data.sample_rate = 24_000\n", + "\n", + "# Removing the DC component\n", + "audio_data.normalization = Normalization.DC_REJECT" ] }, { diff --git a/docs/source/example_ltas_public.ipynb b/docs/source/example_ltas_public.ipynb index 11e7edf3..dce99f89 100644 --- a/docs/source/example_ltas_public.ipynb +++ b/docs/source/example_ltas_public.ipynb @@ -141,6 +141,7 @@ "metadata": {}, "outputs": [], "source": [ + "from osekit.utils.audio_utils import Normalization\n", "from osekit.public_api.analysis import Analysis, AnalysisType\n", "\n", "analysis = Analysis(\n", @@ -148,6 +149,7 @@ " | AnalysisType.MATRIX, # we want to export both the spectrogram and the sx matrix\n", " nb_ltas_time_bins=3000, # This will turn the regular spectrum computation in a LTAS\n", " sample_rate=sample_rate,\n", + " normalization=Normalization.DC_REJECT, # Removes the DC component\n", " fft=sft,\n", " v_lim=(0.0, 150.0), # Boundaries of the spectrograms\n", " colormap=\"viridis\", # Default value\n", @@ -203,7 +205,11 @@ "cell_type": "code", "execution_count": null, "id": "e05d653bc1e8bfe2", - "metadata": {}, + "metadata": { + "tags": [ + "remove-cell" + ] + }, "outputs": [], "source": [ "# Reset the dataset to get all files back to place.\n", diff --git a/docs/source/example_multiple_spectrograms.rst b/docs/source/example_multiple_spectrograms.rst index 42eaffa3..e5919de4 100644 --- a/docs/source/example_multiple_spectrograms.rst +++ b/docs/source/example_multiple_spectrograms.rst @@ -9,6 +9,7 @@ In this example, we want to export spectrograms drawn from the sample audio data * Last spectrogram ends at ``2022-09-25 22:36:25`` * Spectrograms represent ``5 s``-long audio data * Audio data are downsampled sampled at ``24 kHz`` before spectrograms are computed +* The DC component of the audio data is rejected before spectrograms are computed * Spectrograms that are in the gap between recordings should be skipped The FFT used for computing the spectrograms will use a ``1024 samples``-long hamming window, with a ``128 samples``-long hop. diff --git a/docs/source/example_multiple_spectrograms_core.ipynb b/docs/source/example_multiple_spectrograms_core.ipynb index c3cef480..c1e6f337 100644 --- a/docs/source/example_multiple_spectrograms_core.ipynb +++ b/docs/source/example_multiple_spectrograms_core.ipynb @@ -52,6 +52,7 @@ "\n", "from osekit.core_api.audio_dataset import AudioDataset\n", "from osekit.core_api.instrument import Instrument\n", + "from osekit.utils.audio_utils import Normalization\n", "from pandas import Timestamp, Timedelta\n", "\n", "audio_dataset = AudioDataset.from_folder(\n", @@ -61,6 +62,8 @@ " end=Timestamp(\"2022-09-25 22:36:25\"),\n", " data_duration=Timedelta(seconds=5),\n", " instrument=Instrument(end_to_end_db=150.0),\n", + " sample_rate=24_000,\n", + " normalization=Normalization.DC_REJECT,\n", ")" ] }, @@ -192,7 +195,7 @@ "source": [ "import matplotlib.pyplot as plt\n", "\n", - "spectro_dataset.data[0].plot()\n", + "spectro_dataset.data[1].plot()\n", "plt.show()" ] }, diff --git a/docs/source/example_multiple_spectrograms_public.ipynb b/docs/source/example_multiple_spectrograms_public.ipynb index 90f50f07..27ee99b0 100644 --- a/docs/source/example_multiple_spectrograms_public.ipynb +++ b/docs/source/example_multiple_spectrograms_public.ipynb @@ -140,6 +140,7 @@ "outputs": [], "source": [ "from osekit.public_api.analysis import Analysis, AnalysisType\n", + "from osekit.utils.audio_utils import Normalization\n", "from pandas import Timestamp, Timedelta\n", "\n", "analysis = Analysis(\n", @@ -150,6 +151,7 @@ " end=Timestamp(\"2022-09-25 22:36:25\"),\n", " data_duration=Timedelta(seconds=5),\n", " sample_rate=sample_rate,\n", + " normalization=Normalization.DC_REJECT,\n", " fft=sft,\n", " v_lim=(0.0, 150.0), # Boundaries of the spectrograms\n", " colormap=\"viridis\", # Default value\n", diff --git a/docs/source/example_reshaping_multiple_files.rst b/docs/source/example_reshaping_multiple_files.rst index 70cc2cad..ec4b20fd 100644 --- a/docs/source/example_reshaping_multiple_files.rst +++ b/docs/source/example_reshaping_multiple_files.rst @@ -9,6 +9,7 @@ In this example, we want to export reshaped files from the sample audio dataset * Last file ends at ``2022-09-25 22:36:25`` * Files are ``5 s``-long * Files are sampled at ``24 kHz`` +* Files are DC-filtered * Files that are in the gap between recordings should be skipped .. toctree:: diff --git a/docs/source/example_reshaping_multiple_files_core.ipynb b/docs/source/example_reshaping_multiple_files_core.ipynb index c97b97e7..3651f295 100644 --- a/docs/source/example_reshaping_multiple_files_core.ipynb +++ b/docs/source/example_reshaping_multiple_files_core.ipynb @@ -47,6 +47,7 @@ "audio_folder = Path(r\"_static/sample_audio\")\n", "\n", "from osekit.core_api.audio_dataset import AudioDataset\n", + "from osekit.utils.audio_utils import Normalization\n", "from pandas import Timestamp, Timedelta\n", "\n", "audio_dataset = AudioDataset.from_folder(\n", @@ -55,6 +56,8 @@ " begin=Timestamp(\"2022-09-25 22:35:15\"),\n", " end=Timestamp(\"2022-09-25 22:36:25\"),\n", " data_duration=Timedelta(seconds=5),\n", + " sample_rate=24_000,\n", + " normalization=Normalization.DC_REJECT,\n", ")" ], "outputs": [], diff --git a/docs/source/example_reshaping_multiple_files_public.ipynb b/docs/source/example_reshaping_multiple_files_public.ipynb index ec96f0b6..5c0651a3 100644 --- a/docs/source/example_reshaping_multiple_files_public.ipynb +++ b/docs/source/example_reshaping_multiple_files_public.ipynb @@ -113,6 +113,7 @@ "outputs": [], "source": [ "from osekit.public_api.analysis import Analysis, AnalysisType\n", + "from osekit.utils.audio_utils import Normalization\n", "from pandas import Timestamp, Timedelta\n", "\n", "analysis = Analysis(\n", @@ -120,6 +121,8 @@ " begin=Timestamp(\"2022-09-25 22:35:15\"),\n", " end=Timestamp(\"2022-09-25 22:36:25\"),\n", " data_duration=Timedelta(seconds=5),\n", + " sample_rate=24_000,\n", + " normalization=Normalization.DC_REJECT,\n", " name=\"reshape_example\",\n", ")" ] diff --git a/docs/source/example_reshaping_one_file.ipynb b/docs/source/example_reshaping_one_file.ipynb index f3aeb200..37dbd833 100644 --- a/docs/source/example_reshaping_one_file.ipynb +++ b/docs/source/example_reshaping_one_file.ipynb @@ -24,10 +24,12 @@ }, { "cell_type": "code", + "execution_count": null, "id": "initial_id", "metadata": { "collapsed": true }, + "outputs": [], "source": [ "from pathlib import Path\n", "from osekit.core_api.audio_file import AudioFile\n", @@ -36,9 +38,7 @@ " path=Path(r\"_static/sample_audio/sample_220925_223450.wav\"),\n", " strptime_format=\"%y%m%d_%H%M%S\",\n", ")" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "markdown", @@ -48,16 +48,16 @@ }, { "cell_type": "code", + "execution_count": null, "id": "8df490fff812332", "metadata": {}, + "outputs": [], "source": [ "print(f\"{' FILE ':#^60}\")\n", "print(f\"{'Begin:':<30}{str(audio_file.begin):>30}\")\n", "print(f\"{'End:':<30}{str(audio_file.end):>30}\")\n", "print(f\"{'Sample rate:':<30}{str(audio_file.sample_rate):>30}\")" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "markdown", @@ -67,8 +67,10 @@ }, { "cell_type": "code", + "execution_count": null, "id": "63edf1d81811bf4b", "metadata": {}, + "outputs": [], "source": [ "from osekit.core_api.audio_data import AudioData\n", "from pandas import Timestamp\n", @@ -78,25 +80,26 @@ " begin=Timestamp(\"2022-09-25 22:34:52\"),\n", " end=Timestamp(\"2022-09-25 22:34:56\"),\n", ")" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "markdown", "id": "8ae4db7363a92148", "metadata": {}, - "source": "Simply resample the `AudioData` by setting this property:" + "source": "Simply resample and normalize the `AudioData` by setting the corresponding properties:" }, { "cell_type": "code", + "execution_count": null, "id": "8c672c04078d395e", "metadata": {}, - "source": [ - "audio_data.sample_rate = 24_000" - ], "outputs": [], - "execution_count": null + "source": [ + "from osekit.utils.audio_utils import Normalization\n", + "\n", + "audio_data.sample_rate = 24_000\n", + "audio_data.normalization = Normalization.DC_REJECT # Removes the DC component" + ] }, { "cell_type": "markdown", @@ -106,16 +109,16 @@ }, { "cell_type": "code", + "execution_count": null, "id": "9b3faf84f6e45cf2", "metadata": {}, + "outputs": [], "source": [ "print(f\"{' AUDIO DATA ':#^60}\")\n", "print(f\"{'Begin:':<30}{str(audio_data.begin):>30}\")\n", "print(f\"{'End:':<30}{str(audio_data.end):>30}\")\n", "print(f\"{'Sample rate:':<30}{str(audio_data.sample_rate):>30}\")" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "markdown", @@ -125,17 +128,17 @@ }, { "cell_type": "code", + "execution_count": null, "id": "ddc9e9c82bca1c41", "metadata": {}, + "outputs": [], "source": [ "wav_data = audio_data.get_value()\n", "\n", "print(\n", " f\"WAV data should be {int(audio_data.duration.total_seconds())}*{audio_data.sample_rate:_} samples long: {len(wav_data):_} == {int(audio_data.duration.total_seconds()) * audio_data.sample_rate:_} samples\"\n", ")" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "markdown", @@ -145,17 +148,17 @@ }, { "cell_type": "code", + "execution_count": null, "id": "2d35931b8c9def2e", "metadata": { "tags": [ "skip-execution" ] }, + "outputs": [], "source": [ "audio_data.write(Path(r\"../docs/source/_static/sample_audio/exported_files/\"))" - ], - "outputs": [], - "execution_count": null + ] } ], "metadata": { diff --git a/docs/source/example_spectrogram.ipynb b/docs/source/example_spectrogram.ipynb index facc552b..436d89fb 100644 --- a/docs/source/example_spectrogram.ipynb +++ b/docs/source/example_spectrogram.ipynb @@ -72,6 +72,7 @@ "source": [ "from osekit.core_api.audio_data import AudioData\n", "from osekit.core_api.instrument import Instrument\n", + "from osekit.utils.audio_utils import Normalization\n", "from pandas import Timestamp\n", "\n", "audio_data = AudioData.from_files(\n", @@ -79,6 +80,7 @@ " begin=Timestamp(\"2022-09-25 22:34:55\"),\n", " end=Timestamp(\"2022-09-25 22:35:05\"),\n", " sample_rate=40000,\n", + " normalization=Normalization.DC_REJECT, # Rejects the DC component\n", " instrument=Instrument(end_to_end_db=150.0),\n", ")" ] diff --git a/docs/source/normalization.rst b/docs/source/normalization.rst new file mode 100644 index 00000000..6c6cbfd3 --- /dev/null +++ b/docs/source/normalization.rst @@ -0,0 +1,7 @@ +Normalization +------------- + +.. _normalization: + +.. autoclass:: osekit.utils.audio_utils.Normalization + :members: diff --git a/docs/source/publicapi_usage.rst b/docs/source/publicapi_usage.rst index 24f53d50..71979cfd 100644 --- a/docs/source/publicapi_usage.rst +++ b/docs/source/publicapi_usage.rst @@ -261,6 +261,8 @@ Let's now say we want to export audio, spectrum matrices and spectrograms with t - ``10 s`` * - Sample rate - ``48 kHz`` + * - Audio data normalization + - ``dc_reject`` (removes the audio DC component) * - FFT - ``hamming window``, ``1024 points``, ``40% overlap`` @@ -291,6 +293,7 @@ Then we are all set for running the analysis: end=dataset.origin_dataset.begin + Timedelta(hours=1.5), # 1h30 after the begin of the original dataset data_duration=Timedelta("10s"), # Duration of the output data sample_rate=48_000, # Sample rate of the output data + normalization="dc_reject", name="full_analysis", # You can name the analysis, or keep the default name. fft=sft, # The FFT parameters ) diff --git a/docs/source/utils.rst b/docs/source/utils.rst new file mode 100644 index 00000000..0d130483 --- /dev/null +++ b/docs/source/utils.rst @@ -0,0 +1,9 @@ +Utils +----- + +.. _utils: + +.. toctree:: + :maxdepth: 1 + + normalization diff --git a/src/osekit/core_api/audio_data.py b/src/osekit/core_api/audio_data.py index 1eacbf92..af7eb521 100644 --- a/src/osekit/core_api/audio_data.py +++ b/src/osekit/core_api/audio_data.py @@ -20,7 +20,7 @@ from osekit.core_api.audio_item import AudioItem from osekit.core_api.base_data import BaseData from osekit.core_api.instrument import Instrument -from osekit.utils.audio_utils import resample +from osekit.utils.audio_utils import resample, Normalization, normalize if TYPE_CHECKING: from pathlib import Path @@ -40,6 +40,7 @@ def __init__( end: Timestamp | None = None, sample_rate: int | None = None, instrument: Instrument | None = None, + normalization: Normalization = Normalization.RAW, ) -> None: """Initialize an AudioData from a list of AudioItems. @@ -58,11 +59,14 @@ def __init__( instrument: Instrument | None Instrument that might be used to obtain acoustic pressure from the wav audio data. + normalization: Normalization + The type of normalization to apply to the audio data. """ super().__init__(items=items, begin=begin, end=end) self._set_sample_rate(sample_rate=sample_rate) self.instrument = instrument + self.normalization = normalization @property def nb_channels(self) -> int: @@ -77,6 +81,15 @@ def shape(self) -> tuple[int, ...] | int: data_length = round(self.sample_rate * self.duration.total_seconds()) return data_length if self.nb_channels <= 1 else (data_length, self.nb_channels) + @property + def normalization(self) -> Normalization: + """The type of normalization to apply to the audio data.""" + return self._normalization + + @normalization.setter + def normalization(self, value: Normalization) -> None: + self._normalization = value + def __eq__(self, other: AudioData) -> bool: """Override __eq__.""" return self.sample_rate == other.sample_rate and super().__eq__(other) @@ -100,16 +113,11 @@ def _set_sample_rate(self, sample_rate: int | None = None) -> None: return self.sample_rate = None - def get_value(self, reject_dc: bool = False) -> np.ndarray: + def get_value(self) -> np.ndarray: """Return the value of the audio data. The data from the audio file will be resampled if necessary. - Parameters - ---------- - reject_dc: bool - If True, the values will be centered on 0. - Returns ------- np.ndarray: @@ -123,28 +131,22 @@ def get_value(self, reject_dc: bool = False) -> np.ndarray: item_data = item_data[: min(item_data.shape[0], data.shape[0] - idx)] data[idx : idx + len(item_data)] = item_data idx += len(item_data) - if reject_dc: - data -= data.mean() - return data - def get_value_calibrated(self, reject_dc: bool = False) -> np.ndarray: + return normalize(data, self.normalization) + + def get_value_calibrated(self) -> np.ndarray: """Return the value of the audio data accounting for the calibration factor. If the instrument parameter of the audio data is not None, the returned value is calibrated in units of Pa. - Parameters - ---------- - reject_dc: bool - If True, the values will be centered on 0. - Returns ------- np.ndarray: The calibrated value of the audio data. """ - raw_data = self.get_value(reject_dc=reject_dc) + raw_data = self.get_value() calibration_factor = ( 1.0 if self.instrument is None else self.instrument.end_to_end ) @@ -288,6 +290,7 @@ def to_dict(self) -> dict: | instrument_dict | { "sample_rate": self.sample_rate, + "normalization": self.normalization.value, } ) @@ -315,6 +318,7 @@ def from_dict(cls, dictionary: dict) -> AudioData: return cls.from_base_data( data=base_data, sample_rate=dictionary["sample_rate"], + normalization=Normalization(dictionary["normalization"]), instrument=instrument, ) @@ -326,6 +330,7 @@ def from_files( end: Timestamp | None = None, sample_rate: float | None = None, instrument: Instrument | None = None, + normalization: Normalization = Normalization.RAW, ) -> AudioData: """Return an AudioData object from a list of AudioFiles. @@ -344,6 +349,8 @@ def from_files( instrument: Instrument | None Instrument that might be used to obtain acoustic pressure from the wav audio data. + normalization: Normalization + The type of normalization to apply to the audio data. Returns ------- @@ -355,6 +362,7 @@ def from_files( data=BaseData.from_files(files, begin, end), sample_rate=sample_rate, instrument=instrument, + normalization=normalization, ) @classmethod @@ -363,6 +371,7 @@ def from_base_data( data: BaseData, sample_rate: float | None = None, instrument: Instrument | None = None, + normalization: Normalization = Normalization.RAW, ) -> AudioData: """Return an AudioData object from a BaseData object. @@ -375,6 +384,8 @@ def from_base_data( instrument: Instrument | None Instrument that might be used to obtain acoustic pressure from the wav audio data. + normalization: Literal["raw","dc_reject","zscore"] + The type of normalization to apply to the audio data. Returns ------- @@ -386,4 +397,5 @@ def from_base_data( items=[AudioItem.from_base_item(item) for item in data.items], sample_rate=sample_rate, instrument=instrument, + normalization=normalization, ) diff --git a/src/osekit/core_api/audio_dataset.py b/src/osekit/core_api/audio_dataset.py index 595e348c..bf38d4ed 100644 --- a/src/osekit/core_api/audio_dataset.py +++ b/src/osekit/core_api/audio_dataset.py @@ -14,6 +14,7 @@ from osekit.core_api.audio_file import AudioFile from osekit.core_api.base_dataset import BaseDataset from osekit.core_api.json_serializer import deserialize_json +from osekit.utils.audio_utils import Normalization from osekit.utils.multiprocess_utils import multiprocess if TYPE_CHECKING: @@ -73,6 +74,17 @@ def sample_rate(self, sample_rate: float) -> None: for data in self.data: data.sample_rate = sample_rate + @property + def normalization(self) -> Normalization: + """Return the most frequent normalization among those of this dataset data.""" + normalizations = [data.normalization for data in self.data] + return max(set(normalizations), key=normalizations.count) + + @normalization.setter + def normalization(self, normalization: Normalization) -> None: + for data in self.data: + data.normalization = normalization + @property def instrument(self) -> Instrument | None: """Instrument that can be used to get acoustic pressure from wav audio data.""" @@ -164,8 +176,10 @@ def from_folder( # noqa: PLR0913 timezone: str | pytz.timezone | None = None, mode: Literal["files", "timedelta_total", "timedelta_file"] = "timedelta_total", data_duration: Timedelta | None = None, + sample_rate: float | None = None, name: str | None = None, instrument: Instrument | None = None, + normalization: Normalization = Normalization.RAW, **kwargs: any, ) -> AudioDataset: """Return an AudioDataset from a folder containing the audio files. @@ -202,11 +216,15 @@ def from_folder( # noqa: PLR0913 If mode is set to "files", this parameter has no effect. If provided, audio data will be evenly distributed between begin and end. Else, one data object will cover the whole time period. + sample_rate: float | None + Sample rate of the audio data objects. name: str|None Name of the dataset. instrument: Instrument | None Instrument that might be used to obtain acoustic pressure from the wav audio data. + normalization: Normalization + The type of normalization to apply to the audio data. kwargs: any Keyword arguments passed to the BaseDataset.from_folder classmethod. @@ -233,6 +251,8 @@ def from_folder( # noqa: PLR0913 base_dataset=base_dataset, name=name, instrument=instrument, + sample_rate=sample_rate, + normalization=normalization, ) @classmethod @@ -243,8 +263,10 @@ def from_files( # noqa: PLR0913 end: Timestamp | None = None, mode: Literal["files", "timedelta_total", "timedelta_file"] = "timedelta_total", data_duration: Timedelta | None = None, + sample_rate: float | None = None, name: str | None = None, instrument: Instrument | None = None, + normalization: Normalization = Normalization.RAW, ) -> AudioDataset: """Return an AudioDataset object from a list of AudioFiles. @@ -272,11 +294,15 @@ def from_files( # noqa: PLR0913 If mode is set to "files", this parameter has no effect. If provided, data will be evenly distributed between begin and end. Else, one data object will cover the whole time period. + sample_rate: float | None + Sample rate of the audio data objects. name: str|None Name of the dataset. instrument: Instrument | None Instrument that might be used to obtain acoustic pressure from the wav audio data. + normalization: Normalization + The type of normalization to apply to the audio data. Returns ------- @@ -291,7 +317,13 @@ def from_files( # noqa: PLR0913 mode=mode, data_duration=data_duration, ) - return cls.from_base_dataset(base, name=name, instrument=instrument) + return cls.from_base_dataset( + base, + name=name, + sample_rate=sample_rate, + instrument=instrument, + normalization=normalization, + ) @classmethod def from_base_dataset( @@ -300,10 +332,16 @@ def from_base_dataset( sample_rate: float | None = None, name: str | None = None, instrument: Instrument | None = None, + normalization: Normalization = Normalization.RAW, ) -> AudioDataset: """Return an AudioDataset object from a BaseDataset object.""" return cls( - [AudioData.from_base_data(data, sample_rate) for data in base_dataset.data], + [ + AudioData.from_base_data( + data=data, sample_rate=sample_rate, normalization=normalization + ) + for data in base_dataset.data + ], name=name, instrument=instrument, ) diff --git a/src/osekit/core_api/spectro_data.py b/src/osekit/core_api/spectro_data.py index 19d80e81..e7be3f3d 100644 --- a/src/osekit/core_api/spectro_data.py +++ b/src/osekit/core_api/spectro_data.py @@ -212,7 +212,7 @@ def get_value(self) -> np.ndarray: raise ValueError("SpectroData should have either items or audio_data.") sx = self.fft.stft( - self.audio_data.get_value_calibrated(reject_dc=True), + self.audio_data.get_value_calibrated(), padding="zeros", ) @@ -261,7 +261,7 @@ def get_welch( nfft = self.fft.mfft _, sx = welch( - self.audio_data.get_value_calibrated(reject_dc=True), + self.audio_data.get_value_calibrated(), fs=self.audio_data.sample_rate, window=window, nperseg=nperseg, diff --git a/src/osekit/public_api/analysis.py b/src/osekit/public_api/analysis.py index 0a2a5e64..4d8d3c51 100644 --- a/src/osekit/public_api/analysis.py +++ b/src/osekit/public_api/analysis.py @@ -4,6 +4,7 @@ from typing import TYPE_CHECKING, Literal from osekit.core_api.frequency_scale import Scale +from osekit.utils.audio_utils import Normalization if TYPE_CHECKING: from pandas import Timedelta, Timestamp @@ -68,6 +69,7 @@ def __init__( data_duration: Timedelta | None = None, mode: Literal["files", "timedelta_total", "timedelta_file"] = "timedelta_total", sample_rate: float | None = None, + normalization: Normalization = Normalization.RAW, name: str | None = None, subtype: str | None = None, fft: ShortTimeFFT | None = None, @@ -106,6 +108,8 @@ def __init__( Sample rate of the new analysis data. Audio data will be resampled if provided, else the sample rate will be set to the one of the original dataset. + normalization: Normalization + The type of normalization to apply to the audio data. name: str | None Name of the analysis dataset. Defaulted as the begin timestamp of the analysis dataset. @@ -143,6 +147,7 @@ def __init__( self.mode = mode self.sample_rate = sample_rate self.name = name + self.normalization = normalization self.subtype = subtype self.fft = fft self.v_lim = v_lim diff --git a/src/osekit/public_api/dataset.py b/src/osekit/public_api/dataset.py index d8acf245..a8cc8d84 100644 --- a/src/osekit/public_api/dataset.py +++ b/src/osekit/public_api/dataset.py @@ -233,6 +233,7 @@ def get_analysis_audiodataset(self, analysis: Analysis) -> AudioDataset: end=analysis.end, data_duration=analysis.data_duration, mode=analysis.mode, + normalization=analysis.normalization, name=analysis.name, instrument=self.instrument, ) diff --git a/src/osekit/utils/audio_utils.py b/src/osekit/utils/audio_utils.py index 2316ad3d..fc70c3bc 100644 --- a/src/osekit/utils/audio_utils.py +++ b/src/osekit/utils/audio_utils.py @@ -1,5 +1,6 @@ from __future__ import annotations +import enum from typing import Literal import numpy as np @@ -112,3 +113,58 @@ def resample(data: np.ndarray, origin_sr: float, target_sr: float) -> np.ndarray else resample_quality_settings["downsample"] ) return soxr.resample(data, origin_sr, target_sr, quality=quality) + + +def normalize_raw(values: np.ndarray) -> np.ndarray: + """No normalization of the audio data.""" + return values + + +def normalize_dc_reject(values: np.ndarray) -> np.ndarray: + """Reject the DC component of the audio data.""" + return values - values.mean() + + +def normalize_peak(values: np.ndarray) -> np.ndarray: + """Return values normalized so that the peak value is 1.0.""" + return values / max(abs(values)) + + +def normalize_zscore(values: np.ndarray) -> np.ndarray: + """Return normalized zscore from the audio data.""" + return (values - values.mean()) / values.std() + + +class NormalizationValider(enum.EnumMeta): + """ + Metaclass used for validating the normalization flag, + as only REJECT_DC can be combined with (exactly) one other normalization. + """ + + def __call__(cls, *args, **kwargs): + instance = super().__call__(*args, **kwargs) + + mask = instance.value & ~Normalization.DC_REJECT.value + if mask & (mask - 1): + message = "Combined normalizations can only be DC_REJECT combined with exactly one other normalization type." + raise ValueError(message) + + return instance + + +class Normalization(enum.Flag, metaclass=NormalizationValider): + RAW = enum.auto() + DC_REJECT = enum.auto() + PEAK = enum.auto() + ZSCORE = enum.auto() + + +def normalize(values: np.ndarray, normalization: Normalization) -> np.ndarray: + """Normalize the audio data.""" + if Normalization.DC_REJECT in normalization: + values = normalize_dc_reject(values) + if Normalization.PEAK in normalization: + values = normalize_peak(values) + if Normalization.ZSCORE in normalization: + values = normalize_zscore(values) + return values diff --git a/tests/test_audio.py b/tests/test_audio.py index 2acf01c3..9e67d4b1 100644 --- a/tests/test_audio.py +++ b/tests/test_audio.py @@ -23,7 +23,7 @@ from osekit.core_api.audio_file import AudioFile from osekit.core_api.audio_item import AudioItem from osekit.utils import audio_utils -from osekit.utils.audio_utils import generate_sample_audio +from osekit.utils.audio_utils import generate_sample_audio, Normalization, normalize @pytest.mark.parametrize( @@ -756,6 +756,69 @@ def resample_mkptch( ) == (upsampling_quality if upsampling_quality is not None else upsampling_default) +@pytest.mark.parametrize( + ("audio_files", "normalization"), + [ + pytest.param( + { + "duration": 1, + "sample_rate": 10, + "nb_files": 1, + "date_begin": pd.Timestamp("2024-01-01 12:00:00"), + "series_type": "increase", + }, + Normalization.RAW, + id="no_normalization", + ), + pytest.param( + { + "duration": 1, + "sample_rate": 10, + "nb_files": 1, + "date_begin": pd.Timestamp("2024-01-01 12:00:00"), + "series_type": "increase", + }, + Normalization.DC_REJECT, + id="dc_reject", + ), + pytest.param( + { + "duration": 1, + "sample_rate": 10, + "nb_files": 1, + "date_begin": pd.Timestamp("2024-01-01 12:00:00"), + "series_type": "increase", + }, + Normalization.ZSCORE, + id="z_score", + ), + ], + indirect=["audio_files"], +) +def test_normalize_audio_data( + audio_files: tuple[list[AudioFile], pytest.fixtures.Subrequest], + normalization: Normalization, +) -> None: + afs, _ = audio_files + + raw_data = np.linspace(0.0, 1.0, 10) + normalized_data = normalize(values=raw_data, normalization=normalization) + + if normalization == Normalization.RAW: + assert np.array_equal(raw_data, normalized_data) + else: + assert not np.array_equal(raw_data, normalized_data) + + # AudioData + ad = AudioData.from_files(afs, normalization=normalization) + assert np.array_equal(ad.get_value(), normalized_data) + + # AudioDataset + ads = AudioDataset.from_files(afs, normalization=normalization) + assert ads.data[0].normalization == normalization + assert np.array_equal(ads.data[0].get_value(), normalized_data) + + @pytest.mark.parametrize( ("audio_files", "begin", "end", "mode", "duration", "expected_audio_data"), [ diff --git a/tests/test_public_api.py b/tests/test_public_api.py index 058c2b5f..54f181a5 100644 --- a/tests/test_public_api.py +++ b/tests/test_public_api.py @@ -19,6 +19,7 @@ from osekit.core_api.spectro_dataset import SpectroDataset from osekit.public_api.analysis import Analysis, AnalysisType from osekit.public_api.dataset import Dataset +from osekit.utils.audio_utils import Normalization @pytest.mark.parametrize( @@ -910,6 +911,32 @@ def test_analysis_is_spectro(analysis: Analysis, expected: bool) -> None: ], id="full_reshape", ), + pytest.param( + { + "duration": 5, + "sample_rate": 48_000, + "nb_files": 1, + "date_begin": Timestamp("2024-01-01 12:00:00"), + }, + None, + Analysis( + analysis_type=AnalysisType.AUDIO, + name=None, + begin=None, + end=None, + data_duration=None, + sample_rate=None, + normalization="zscore", + subtype="DOUBLE", + ), + [ + Event( + begin=Timestamp("2024-01-01 12:00:00"), + end=Timestamp("2024-01-01 12:00:05"), + ), + ], + id="normalized_data", + ), ], indirect=["audio_files"], ) @@ -950,6 +977,8 @@ def test_get_analysis_audiodataset( assert analysis_ds.instrument is dataset.instrument + assert analysis_ds.normalization == analysis.normalization + @pytest.mark.parametrize( ("audio_files", "instrument", "analysis", "expected_data"), @@ -1060,6 +1089,7 @@ def test_edit_analysis_before_run( new_name = "new_analysis" new_instrument = Instrument(end_to_end_db=100) new_data = ads.data[::2] + new_normalization = Normalization.ZSCORE ads.sample_rate = new_sr analysis.sample_rate = new_sr @@ -1067,6 +1097,7 @@ def test_edit_analysis_before_run( ads.name = new_name ads.instrument = new_instrument ads.data = new_data + ads.normalization = new_normalization dataset.run_analysis(analysis, audio_dataset=ads) @@ -1091,6 +1122,9 @@ def test_edit_analysis_before_run( assert analysis_ads.sample_rate == new_sr assert analysis_sds.fft.fs == new_sr + # Analyses have the edited normalization + assert analysis_ads.normalization == new_normalization + # Instrument has been edited assert analysis_ads.instrument.end_to_end_db == new_instrument.end_to_end_db diff --git a/tests/test_serialization.py b/tests/test_serialization.py index cc3f6e8d..474c5116 100644 --- a/tests/test_serialization.py +++ b/tests/test_serialization.py @@ -21,13 +21,14 @@ from osekit.core_api.spectro_data import SpectroData from osekit.core_api.spectro_dataset import SpectroDataset from osekit.core_api.spectro_file import SpectroFile +from osekit.utils.audio_utils import Normalization if TYPE_CHECKING: from pathlib import Path @pytest.mark.parametrize( - ("audio_files", "begin", "end", "sample_rate"), + ("audio_files", "begin", "end", "sample_rate", "normalization"), [ pytest.param( { @@ -39,8 +40,22 @@ None, None, 48_000, + Normalization.RAW, id="full_file_no_resample", ), + pytest.param( + { + "duration": 1, + "sample_rate": 48_000, + "nb_files": 1, + "date_begin": Timestamp("2024-01-01 12:00:00"), + }, + None, + None, + 48_000, + Normalization.ZSCORE, + id="normalized_audio", + ), pytest.param( { "duration": 1, @@ -51,6 +66,7 @@ None, None, 24_000, + Normalization.RAW, id="full_file_downsample", ), pytest.param( @@ -63,6 +79,7 @@ None, None, 96_000, + Normalization.RAW, id="full_file_upsample", ), pytest.param( @@ -75,6 +92,7 @@ Timestamp("2024-01-01 12:00:01"), Timestamp("2024-01-01 12:00:02"), 48_000, + Normalization.RAW, id="file_part", ), pytest.param( @@ -87,6 +105,7 @@ Timestamp("2024-01-01 12:00:01"), Timestamp("2024-01-01 12:00:02"), 24_000, + Normalization.RAW, id="two_files_with_resample", ), pytest.param( @@ -100,6 +119,7 @@ Timestamp("2024-01-01 12:00:01"), Timestamp("2024-01-01 12:00:04"), 48_000, + Normalization.RAW, id="two_files_with_gap", ), pytest.param( @@ -113,8 +133,23 @@ Timestamp("2024-01-01 12:00:01+0200"), Timestamp("2024-01-01 12:00:04+0200"), 48_000, + Normalization.RAW, id="localized_files", ), + pytest.param( + { + "duration": 2, + "sample_rate": 48_000, + "nb_files": 2, + "inter_file_duration": 1, + "date_begin": Timestamp("2024-01-01 12:00:00+0200"), + }, + Timestamp("2024-01-01 12:00:01+0200"), + Timestamp("2024-01-01 12:00:04+0200"), + 48_000, + Normalization.DC_REJECT, + id="localized_normalized_files", + ), ], indirect=["audio_files"], ) @@ -124,6 +159,7 @@ def test_audio_data_serialization( begin: Timestamp | None, end: Timestamp | None, sample_rate: float, + normalization: Normalization, ) -> None: audio_files, _ = audio_files @@ -132,13 +168,14 @@ def test_audio_data_serialization( begin=begin, end=end, sample_rate=sample_rate, + normalization=normalization, ) assert np.array_equal(ad.get_value(), AudioData.from_dict(ad.to_dict()).get_value()) @pytest.mark.parametrize( - ("audio_files", "data_duration", "sample_rate", "name"), + ("audio_files", "data_duration", "sample_rate", "normalization", "name"), [ pytest.param( { @@ -148,6 +185,7 @@ def test_audio_data_serialization( }, Timedelta(seconds=1), 48_000, + Normalization.RAW, None, id="one_audio_data_one_file_no_resample", ), @@ -159,6 +197,7 @@ def test_audio_data_serialization( }, Timedelta(seconds=2), 48_000, + Normalization.RAW, None, id="one_audio_data_two_files_no_resample", ), @@ -170,6 +209,7 @@ def test_audio_data_serialization( }, Timedelta(seconds=2), 24_000, + Normalization.RAW, None, id="one_audio_data_two_files_downsample", ), @@ -181,6 +221,7 @@ def test_audio_data_serialization( }, Timedelta(seconds=1), [12_000, 24_000, 48_000, 96_000], + Normalization.RAW, None, id="multiple_audio_data_different_sample_rates", ), @@ -192,6 +233,7 @@ def test_audio_data_serialization( }, Timedelta(seconds=1), 48_000, + Normalization.RAW, "merriweather post pavilion", id="named_ads", ), @@ -204,6 +246,7 @@ def test_audio_data_serialization( }, Timedelta(seconds=1), 48_000, + Normalization.RAW, "merriweather post pavilion", id="localized_ads", ), @@ -215,6 +258,7 @@ def test_audio_dataset_serialization( audio_files: tuple[list[AudioFile], pytest.fixtures.Subrequest], data_duration: Timestamp | None, sample_rate: float | list[float], + normalization: Normalization, name: str | None, ) -> None: audio_files, request = audio_files @@ -230,6 +274,7 @@ def test_audio_dataset_serialization( tmp_path, strptime_format=strptime_format, data_duration=data_duration, + normalization=normalization, name=name, ) @@ -259,6 +304,7 @@ def test_audio_dataset_serialization( assert ads.has_default_name == ads2.has_default_name assert ads.sample_rate == ads2.sample_rate assert ads.begin == ads2.begin + assert ads.normalization == ads2.normalization assert all( np.array_equal(ad.get_value(), ad2.get_value()) diff --git a/tests/test_spectro.py b/tests/test_spectro.py index d4b7e6dd..73284e9b 100644 --- a/tests/test_spectro.py +++ b/tests/test_spectro.py @@ -283,7 +283,7 @@ def test_spectrogram_from_npz_files( for spectro in sd_split: spectro.write(tmp_path / "output") - centered_data = spectro.audio_data.get_value(reject_dc=True) + centered_data = spectro.audio_data.get_value() (tmp_path / "audio").mkdir(exist_ok=True) sf.write( file=tmp_path / "audio" / f"{spectro.audio_data}.wav", @@ -294,10 +294,6 @@ def test_spectrogram_from_npz_files( assert len(list((tmp_path / "output").glob("*.npz"))) == nb_chunks - # Since we reject the DC of audio data before computing Sx values of each chunk, - # we must compare the concatenated chunks with an AudioData made from the - # DC-free parts. - afs = [ AudioFile(f, strptime_format=TIMESTAMP_FORMAT_EXPORTED_FILES_UNLOCALIZED) for f in (tmp_path / "audio").glob("*.wav") diff --git a/tests/test_utils.py b/tests/test_utils.py index 1263748a..9326865e 100755 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,8 +1,11 @@ from __future__ import annotations import time +from contextlib import nullcontext from pathlib import Path +from typing import Union +import numpy as np import pandas as pd import pytest @@ -12,6 +15,7 @@ locked, nb_files_per_batch, ) +from osekit.utils.audio_utils import Normalization, normalize from osekit.utils.formatting_utils import aplose2raven from osekit.utils.path_utils import move_tree from pandas import Timedelta @@ -363,3 +367,128 @@ def test_get_closest_value_index( expected: int, ) -> None: assert get_closest_value_index(values=values, target=target) == expected + + +@pytest.mark.parametrize( + ("normalizations", "expected"), + [ + pytest.param( + [Normalization.RAW], + nullcontext(Normalization.RAW), + id="raw_is_fine", + ), + pytest.param( + [1], + nullcontext(Normalization.RAW), + id="int_raw_is_fine", + ), + pytest.param( + [Normalization.DC_REJECT], + nullcontext(Normalization.DC_REJECT), + id="dc_reject_is_fine", + ), + pytest.param( + [Normalization.DC_REJECT, Normalization.PEAK], + nullcontext(Normalization.DC_REJECT | Normalization.PEAK), + id="dc_and_peak_is_fine", + ), + pytest.param( + [Normalization.DC_REJECT, Normalization.ZSCORE], + nullcontext(Normalization.DC_REJECT | Normalization.ZSCORE), + id="dc_and_zscore_is_fine", + ), + pytest.param( + [10], + nullcontext(Normalization.DC_REJECT | Normalization.ZSCORE), + id="int_dc_and_zscore_is_fine", + ), + pytest.param( + [Normalization.DC_REJECT, Normalization.PEAK, Normalization.ZSCORE], + pytest.raises(ValueError), + id="dc_reject_can_be_combined_with_only_one_other_value", + ), + pytest.param( + [Normalization.PEAK, Normalization.ZSCORE], + pytest.raises(ValueError), + id="combination_without_dc_raises", + ), + pytest.param( + [4, 8], + pytest.raises(ValueError), + id="int_combination_without_dc_raises", + ), + pytest.param( + [12], + pytest.raises(ValueError), + id="int_direct_combination_without_dc_raises", + ), + ], +) +def test_combined_normalization( + normalizations: list[Union[Normalization, int]], expected +) -> None: + def combine_normalizations(normalizations: list[Union[Normalization, int]]): + normalizations = [ + Normalization(n) if type(n) is int else n for n in normalizations + ] + output = normalizations[0] + for n in normalizations[1:]: + output = output | n + return output + + with expected as e: + assert combine_normalizations(normalizations) == e + + +@pytest.mark.parametrize( + ("values", "normalization", "expected"), + [ + pytest.param( + np.array([0.0, 1.0, 2.0]), + Normalization.RAW, + np.array([0.0, 1.0, 2.0]), + id="raw", + ), + pytest.param( + np.array([0.0, 1.0, 2.0]), + Normalization.DC_REJECT, + np.array([-1.0, 0.0, 1.0]), + id="dc_reject", + ), + pytest.param( + np.array([0.0, 1.0, 2.0]), + Normalization.PEAK, + np.array([0.0, 0.5, 1.0]), + id="peak", + ), + pytest.param( + np.array([-0.25, 0.5, 0.0]), + Normalization.PEAK, + np.array([-0.5, 1.0, 0.0]), + id="peak_with_negative_values", + ), + pytest.param( + np.array([-0.5, 0.25, 0.0]), + Normalization.PEAK, + np.array([-1.0, 0.5, 0.0]), + id="peak_with_negative_max", + ), + pytest.param( + np.array([0.0, 1.0, 2.0]), + Normalization.ZSCORE, + np.array([-1.224744871391589, 0.0, 1.224744871391589]), + id="zscore", + ), + pytest.param( + np.array([0.0, 2.0, 4.0]), + Normalization.DC_REJECT | Normalization.PEAK, + np.array([-1.0, 0.0, 1.0]), + id="dc_reject_and_peak", + ), + ], +) +def test_normalization( + values: np.ndarray, normalization: Normalization, expected: np.ndarray +) -> None: + normalized = normalize(values=values, normalization=normalization) + assert np.array_equal(normalized, expected)