33import numpy as np
44from pandas import DataFrame , Timedelta , Timestamp
55
6+
67def aplose2raven (
78 aplose_result : DataFrame ,
8- audio_datetimes : list [Timestamp ],
9- audio_durations : list [float ],
9+ list_audio_begin_time : list [Timestamp ],
10+ audio_durations : list [Timedelta ],
1011) -> DataFrame :
1112 r"""Format an APLOSE result DataFrame to a Raven result DataFrame.
1213
@@ -19,89 +20,141 @@ def aplose2raven(
1920 aplose_result: Dataframe,
2021 APLOSE formatted result DataFrame.
2122
22- audio_datetimes : list[pd. Timestamp]
23- list of tz-aware timestamps from considered audio files.
23+ list_audio_begin_time : list[Timestamp]
24+ list of tz-aware timestamps from considered audio files begin time .
2425
25- audio_durations: list[float ]
26- list of all considered audio file durations in seconds .
26+ audio_durations: list[Timedelta ]
27+ list of all considered audio file durations.
2728
2829 Returns
2930 -------
3031 Raven formatted DataFrame.
3132
3233 Example of use
3334 --------------
34- aplose_file = Path("path/to/aplose/result/file")
35- timestamp_list = list(filenames)
36- duration_list = list(durations)
37-
38- aplose_result = (
39- pd.read_csv(aplose_file, parse_dates=["start_datetime", "end_datetime"] )
40- .sort_values("start_datetime")
41- .reset_index(drop=True)
42- )
43- raven_result = aplose2raven(aplose_result, filename_list, duration_list )
35+ >>> from pathlib import Path
36+ >>> from pandas import read_csv
37+ >>> from osekit.core_api.audio_dataset import AudioDataset
38+ >>> from osekit.utils.formatting_utils import aplose2raven
39+
40+ >>> dataset_folder = Path(r"path\to\audio\folder" )
41+ >>> dataset = AudioDataset.from_folder(dataset_folder,
42+ >>> strptime_format="strptime_format",
43+ >>> timezone='utc',
44+ >>> )
4445
45- # export to Raven format: tab-separated files with a txt extension
46- raven_result.to_csv('path/to/result/file.txt', sep='\t', index=False)
46+ >>> begin_list = sorted([f.begin for f in list(dataset.files)])
47+ >>> duration_list = sorted([f.duration for f in list(dataset.files)])
48+
49+ >>> csv = Path(r"path\to\result\csv")
50+ >>> df = read_csv(csv,
51+ >>> parse_dates=["start_datetime", "end_datetime"]
52+ >>> ).sort_values("start_datetime")
53+ >>> .reset_index(drop=True)
54+
55+ >>> df_raven = aplose2raven(df, begin_list, duration_list)
56+ >>> raven_result.to_csv('path/to/result/file.txt', sep='\t', index=False)
4757
4858 """
49- # index of the corresponding wav file for each detection
59+ # index of the corresponding audio file for each detection
5060 index_detection = (
51- np .searchsorted (audio_datetimes , aplose_result ["start_datetime" ], side = "right" )
61+ np .searchsorted (list_audio_begin_time ,
62+ aplose_result ["start_datetime" ],
63+ side = "right"
64+ )
5265 - 1
5366 )
5467
55- # Add beg datetime of the wavfile
56- aplose_result [ "wav_timestamp" ] = [ audio_datetimes [ i ] for i in index_detection ]
57-
58- # time differences between consecutive datetimes and add wav_duration
59- filename_diff = [ td . total_seconds () for td in np . diff ( audio_datetimes ). tolist ()]
60- adjust = [ 0 ]
61- adjust . extend ([ t1 - t2 for ( t1 , t2 ) in zip ( audio_durations [: - 1 ], filename_diff , strict = False )])
62- cumsum_adjust = list ( np . cumsum ( adjust ))
68+ """
69+ The following time adjustment is necessary because Raven does not account
70+ for the duty cycle, nor for any potential offset between the end of one
71+ file and the start of the next. To ensure that detection timestamps in
72+ APLOSE format align with the spectrograms displayed by Raven, a correction
73+ of the number of seconds is required, since the software only uses the
74+ elapsed time from the beginning of the first file to generate the bounding boxes.
75+ """
6376
64- # adjusted datetimes to match Raven annoying functioning
65- begin_datetime_adjusted = []
66- end_datetime_adjusted = []
67- for (beg_det , end_det , beg_wav , ind ) in (zip (aplose_result ["start_datetime" ], aplose_result ["end_datetime" ],
68- aplose_result ["wav_timestamp" ], index_detection , strict = False )):
77+ # Add the begin time of the audio file corresponding to each detection
78+ aplose_result ["wav_timestamp" ] = [list_audio_begin_time [i ] for i in index_detection ]
79+
80+ # Compute the time gaps between consecutive audio file begin time
81+ audio_begin_timegap = list (np .diff (list_audio_begin_time ).tolist ())
82+
83+ # Adjustment values: difference between each file's duration
84+ # and the gap until the next file.
85+ # (Required to account for potential gaps/overlaps between files)
86+ adjustment_values = [Timedelta (0 )]
87+ adjustment_values .extend (
88+ [t1 - t2 for (t1 , t2 ) in zip (audio_durations [:- 1 ],
89+ audio_begin_timegap , strict = False )
90+ ]
91+ )
92+
93+ # Cumulative adjustment in seconds, to realign all detection timestamps consistently
94+ cumsum_adjust = list (np .cumsum (adjustment_values ))
95+
96+ detection_begin_datetime_adjusted = []
97+ detection_end_datetime_adjusted = []
98+ for i in range (len (aplose_result )):
99+ detection_begin_time = aplose_result ["start_datetime" ].iloc [i ]
100+ detection_end_time = aplose_result ["end_datetime" ].iloc [i ]
101+ audio_begin_time = aplose_result ["wav_timestamp" ].iloc [i ]
102+ ind = index_detection [i ]
69103 """
70- For duty cycled data, if the aplose_result detections were reshaped (eg : to 60-second duration),
104+ For duty cycled data, if aplose_result detections were reshaped (eg to 60s duration),
71105 the start or end of the detection might virtually be located in a OFF duty cycle phase.
72106 This would cause issue in Raven, because the OFF part are not represented,
73- and the detection start will be located on the previous wav file.
74- The following 'if' conditions apply the appropriate correction to make the Raven box (1)starts or (2) ends
75- at the appropriate timing in Raven (ie at the begining or end of a wav file).
107+ and the detection start will be located on the previous audio file.
108+ The 2 following 'if' conditions apply the appropriate correction
109+ to make the Raven box (1)starts or (2) ends.
110+ at the appropriate timing in Raven (ie at the begining or end of an audio file).
76111 """
77112
78- if (beg_wav + Timedelta (seconds = audio_durations [ind ])) < beg_det < (beg_wav + Timedelta (seconds = filename_diff [ind ])):
79- corr_dur = (audio_datetimes [ind + 1 ] - beg_det ).total_seconds ()
80- begin_datetime_adjusted .append (beg_det + Timedelta (seconds = cumsum_adjust [ind + 1 ]) + Timedelta (seconds = corr_dur ))
81- end_datetime_adjusted .append (end_det + Timedelta (seconds = cumsum_adjust [ind + 1 ]))
82- elif (beg_wav + Timedelta (seconds = audio_durations [ind ])) < end_det < (beg_wav + Timedelta (seconds = filename_diff [ind ])):
83- begin_datetime_adjusted .append (
84- beg_det + Timedelta (seconds = cumsum_adjust [ind ])
113+ audio_begin_time_adjusted = audio_begin_time + audio_durations [ind ]
114+
115+ if ind < len (audio_begin_timegap ):
116+ next_audio_begin_time_adjusted = audio_begin_time + audio_begin_timegap [ind ]
117+ else :
118+ next_audio_begin_time_adjusted += audio_durations [ind ]
119+
120+
121+ if audio_begin_time_adjusted < detection_begin_time < next_audio_begin_time_adjusted :
122+ correction_duration = (list_audio_begin_time [ind + 1 ] - detection_begin_time )
123+ detection_begin_datetime_adjusted .append (detection_begin_time
124+ + cumsum_adjust [ind + 1 ]
125+ + correction_duration
126+ )
127+ detection_end_datetime_adjusted .append (detection_end_time
128+ + cumsum_adjust [ind + 1 ]
129+ )
130+ elif audio_begin_time_adjusted < detection_end_time < next_audio_begin_time_adjusted :
131+ detection_begin_datetime_adjusted .append (
132+ detection_begin_time + cumsum_adjust [ind ]
85133 )
86- corr_dur = (end_det - beg_det ).total_seconds () - ((beg_wav + Timedelta (seconds = audio_durations [ind ])) - beg_det ).total_seconds ()
87- end_datetime_adjusted .append (end_det + Timedelta (seconds = cumsum_adjust [ind ]) - Timedelta (seconds = corr_dur ))
134+ correction_duration = ((detection_end_time - detection_begin_time ) -
135+ ((audio_begin_time + audio_durations [ind ])
136+ - detection_begin_time ))
137+ detection_end_datetime_adjusted .append (detection_end_time +
138+ cumsum_adjust [ind ] -
139+ correction_duration )
88140
89141 else :
90- # Else, apply normal raven time correction
91- begin_datetime_adjusted .append (
92- beg_det + Timedelta ( seconds = cumsum_adjust [ind ])
142+ # Else, apply normal Raven time correction
143+ detection_begin_datetime_adjusted .append (
144+ detection_begin_time + cumsum_adjust [ind ]
93145 )
94- end_datetime_adjusted .append (
95- end_det + Timedelta ( seconds = cumsum_adjust [ind ])
146+ detection_end_datetime_adjusted .append (
147+ detection_end_time + cumsum_adjust [ind ]
96148 )
97149
98- # Convert the datetimes to seconds from the start of first wav (raven format)
150+ # Convert the datetimes to seconds from the start of first audio (raven format)
99151 begin_time_adjusted = [
100- (d - audio_datetimes [0 ]).total_seconds () for d in begin_datetime_adjusted
152+ (d - list_audio_begin_time [0 ]).total_seconds () for d in detection_begin_datetime_adjusted
101153 ]
102154 end_time_adjusted = [
103- (d - audio_datetimes [0 ]).total_seconds () for d in end_datetime_adjusted
155+ (d - list_audio_begin_time [0 ]).total_seconds () for d in detection_end_datetime_adjusted
104156 ]
157+
105158 # Build corrected Raven selection table
106159 raven_result = DataFrame ()
107160 raven_result ["Selection" ] = list (range (1 , len (aplose_result ) + 1 ))
0 commit comments