Skip to content

Commit 1ce8864

Browse files
MaelleTtrtMaëlle TORTEROTOT
andauthored
Fix aplose2raven (#263)
* Partial fix of raven time related issue for duty cycled data * apply duty cycle corr * fix test_utils * fix aplose2raven and adapt test_utils * ruff fix * fix aplose2raven for det_end in OFF duty cycle phase --------- Co-authored-by: Maëlle TORTEROTOT <maelle.torterotot@ensta.fr>
1 parent 5079ed4 commit 1ce8864

2 files changed

Lines changed: 74 additions & 42 deletions

File tree

src/osekit/utils/formatting_utils.py

Lines changed: 45 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,13 @@
11
from __future__ import annotations
22

33
import numpy as np
4-
import pandas as pd
5-
4+
from pandas import DataFrame, Timedelta, Timestamp
65

76
def aplose2raven(
8-
aplose_result: pd.DataFrame,
9-
audio_datetimes: list[pd.Timestamp],
7+
aplose_result: DataFrame,
8+
audio_datetimes: list[Timestamp],
109
audio_durations: list[float],
11-
) -> pd.DataFrame:
10+
) -> DataFrame:
1211
r"""Format an APLOSE result DataFrame to a Raven result DataFrame.
1312
1413
The list of audio files and durations considered for the Raven campaign should be
@@ -53,35 +52,65 @@ def aplose2raven(
5352
- 1
5453
)
5554

55+
# Add beg datetime of the wavfile
56+
aplose_result["wav_timestamp"] = [audio_datetimes[i] for i in index_detection]
57+
5658
# time differences between consecutive datetimes and add wav_duration
5759
filename_diff = [td.total_seconds() for td in np.diff(audio_datetimes).tolist()]
5860
adjust = [0]
59-
adjust.extend([t1 - t2 for (t1, t2) in zip(audio_durations[:-1], filename_diff)])
61+
adjust.extend([t1 - t2 for (t1, t2) in zip(audio_durations[:-1], filename_diff, strict=False)])
6062
cumsum_adjust = list(np.cumsum(adjust))
6163

6264
# adjusted datetimes to match Raven annoying functioning
63-
begin_datetime_adjusted = [
64-
det + pd.Timedelta(seconds=cumsum_adjust[ind])
65-
for (det, ind) in zip(aplose_result["start_datetime"], index_detection)
66-
]
67-
end_datetime_adjusted = [
68-
det + pd.Timedelta(seconds=cumsum_adjust[ind])
69-
for (det, ind) in zip(aplose_result["end_datetime"], index_detection)
70-
]
65+
begin_datetime_adjusted = []
66+
end_datetime_adjusted = []
67+
for (beg_det, end_det, beg_wav, ind) in (zip(aplose_result["start_datetime"], aplose_result["end_datetime"],
68+
aplose_result["wav_timestamp"], index_detection, strict=False)):
69+
"""
70+
For duty cycled data, if the aplose_result detections were reshaped (eg : to 60-second duration),
71+
the start or end of the detection might virtually be located in a OFF duty cycle phase.
72+
This would cause issue in Raven, because the OFF part are not represented,
73+
and the detection start will be located on the previous wav file.
74+
The following 'if' conditions apply the appropriate correction to make the Raven box (1)starts or (2) ends
75+
at the appropriate timing in Raven (ie at the begining or end of a wav file).
76+
"""
77+
78+
if (beg_wav + Timedelta(seconds=audio_durations[ind])) < beg_det < (beg_wav + Timedelta(seconds = filename_diff[ind])):
79+
corr_dur = (audio_datetimes[ind + 1] - beg_det).total_seconds()
80+
begin_datetime_adjusted.append(beg_det + Timedelta(seconds=cumsum_adjust[ind + 1]) + Timedelta(seconds=corr_dur))
81+
end_datetime_adjusted.append(end_det + Timedelta(seconds=cumsum_adjust[ind + 1]))
82+
elif (beg_wav + Timedelta(seconds=audio_durations[ind])) < end_det < (beg_wav + Timedelta(seconds = filename_diff[ind])):
83+
begin_datetime_adjusted.append(
84+
beg_det + Timedelta(seconds=cumsum_adjust[ind])
85+
)
86+
corr_dur = (end_det-beg_det).total_seconds() - ((beg_wav + Timedelta(seconds=audio_durations[ind])) -beg_det).total_seconds()
87+
end_datetime_adjusted.append(end_det + Timedelta(seconds=cumsum_adjust[ind]) - Timedelta(seconds=corr_dur))
88+
89+
else:
90+
# Else, apply normal raven time correction
91+
begin_datetime_adjusted.append(
92+
beg_det + Timedelta(seconds=cumsum_adjust[ind])
93+
)
94+
end_datetime_adjusted.append(
95+
end_det + Timedelta(seconds=cumsum_adjust[ind])
96+
)
97+
98+
# Convert the datetimes to seconds from the start of first wav (raven format)
7199
begin_time_adjusted = [
72100
(d - audio_datetimes[0]).total_seconds() for d in begin_datetime_adjusted
73101
]
74102
end_time_adjusted = [
75103
(d - audio_datetimes[0]).total_seconds() for d in end_datetime_adjusted
76104
]
77-
78-
raven_result = pd.DataFrame()
105+
# Build corrected Raven selection table
106+
raven_result = DataFrame()
79107
raven_result["Selection"] = list(range(1, len(aplose_result) + 1))
80108
raven_result["View"] = [1] * len(aplose_result)
81109
raven_result["Channel"] = [1] * len(aplose_result)
82110
raven_result["Begin Time (s)"] = begin_time_adjusted
83111
raven_result["End Time (s)"] = end_time_adjusted
84112
raven_result["Low Freq (Hz)"] = aplose_result["start_frequency"]
85113
raven_result["High Freq (Hz)"] = aplose_result["end_frequency"]
114+
raven_result["Begin Date Time Real"] = aplose_result["start_datetime"]
86115

87116
return raven_result

tests/test_utils.py

Lines changed: 29 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -20,33 +20,35 @@
2020
def aplose_dataframe() -> pd.DataFrame:
2121
data = pd.DataFrame(
2222
{
23-
"dataset": ["dataset_test", "dataset_test", "dataset_test"],
24-
"filename": ["file1.wav", "file2.wav", "file3.wav"],
25-
"start_time": [0, 0, 5.9],
26-
"end_time": [60, 60, 8.1],
27-
"start_frequency": [0, 0, 18500.0],
28-
"end_frequency": [96000, 96000, 53000.0],
29-
"annotation": ["boat", "boat", "boat"],
30-
"annotator": ["bbjuni", "bbjuni", "bbjuni"],
23+
"dataset": ["dataset_test", "dataset_test", "dataset_test", "dataset_test"],
24+
"filename": ["file1.wav", "file2.wav", "file3.wav", "file4.wav"],
25+
"start_time": [0, 0, 5.9, 0],
26+
"end_time": [30, 30, 8.1, 30],
27+
"start_frequency": [0, 0, 18500.0, 0],
28+
"end_frequency": [96000, 96000, 53000.0, 96000],
29+
"annotation": ["boat", "boat", "boat", "boat"],
30+
"annotator": ["bbjuni", "bbjuni", "bbjuni", "bbjuni"],
3131
"start_datetime": [
3232
pd.Timestamp("2020-05-29T11:30:00.000+00:00"),
3333
pd.Timestamp("2020-05-29T11:31:00.000+00:00"),
3434
pd.Timestamp("2020-05-29T11:31:05.900+00:00"),
35+
pd.Timestamp("2020-05-29T11:32:50.000+00:00"),
3536
],
3637
"end_datetime": [
37-
pd.Timestamp("2020-05-29T11:31:00.000+00:00"),
38-
pd.Timestamp("2020-05-29T11:32:00.000+00:00"),
39-
pd.Timestamp("2020-05-29T11:32:08.100+00:00"),
38+
pd.Timestamp("2020-05-29T11:30:30.000+00:00"),
39+
pd.Timestamp("2020-05-29T11:31:30.000+00:00"),
40+
pd.Timestamp("2020-05-29T11:31:08.100+00:00"),
41+
pd.Timestamp("2020-05-29T11:33:20.000+00:00"),
4042
],
41-
"is_box": [0, 0, 1],
43+
"is_box": [0, 0, 1, 0],
4244
},
4345
)
4446

4547
return data.reset_index(drop=True)
4648

4749

4850
@pytest.fixture
49-
def raven_timestamps() -> list:
51+
def audio_timestamps() -> list:
5052
return list(
5153
pd.date_range(
5254
start="2020-05-29T11:30:00.000+00:00",
@@ -57,31 +59,32 @@ def raven_timestamps() -> list:
5759

5860

5961
@pytest.fixture
60-
def raven_durations(raven_timestamps: pytest.fixture) -> list:
61-
return [60] * len(raven_timestamps)
62+
def audio_durations(audio_timestamps: pytest.fixture) -> list:
63+
return [30] * len(audio_timestamps)
6264

6365

6466
@pytest.mark.unit
6567
def test_aplose2raven(
6668
aplose_dataframe: pytest.fixture,
67-
raven_timestamps: pytest.fixture,
68-
raven_durations: pytest.fixture,
69+
audio_timestamps: pytest.fixture,
70+
audio_durations: pytest.fixture,
6971
) -> None:
7072
raven_dataframe = aplose2raven(
7173
aplose_result=aplose_dataframe,
72-
audio_datetimes=raven_timestamps,
73-
audio_durations=raven_durations,
74+
audio_datetimes=audio_timestamps,
75+
audio_durations=audio_durations,
7476
)
7577

7678
expected_raven_dataframe = pd.DataFrame(
7779
{
78-
"Selection": [1, 2, 3],
79-
"View": [1, 1, 1],
80-
"Channel": [1, 1, 1],
81-
"Begin Time (s)": [0.0, 60.0, 65.9],
82-
"End Time (s)": [60.0, 120.0, 128.1],
83-
"Low Freq (Hz)": [0.0, 0.0, 18500.0],
84-
"High Freq (Hz)": [96000.0, 96000.0, 53000.0],
80+
"Selection": [1, 2, 3, 4],
81+
"View": [1, 1, 1, 1],
82+
"Channel": [1, 1, 1, 1],
83+
"Begin Time (s)": [0.0, 30.0, 35.9, 90.0],
84+
"End Time (s)": [30.0, 60.0, 38.1, 110.0],
85+
"Low Freq (Hz)": [0.0, 0.0, 18500.0, 0.0],
86+
"High Freq (Hz)": [96000.0, 96000.0, 53000.0, 96000.0],
87+
"Begin Date Time Real": aplose_dataframe["start_datetime"],
8588
},
8689
)
8790

0 commit comments

Comments
 (0)