Skip to content

Commit bda6a6a

Browse files
authored
Export Annotations Task (#518)
* initial exporting annotations task * modify indentation * adding flat csv and json to export zip file
1 parent dced8a1 commit bda6a6a

5 files changed

Lines changed: 256 additions & 3 deletions

File tree

bats_ai/core/tasks/export_task.py

Lines changed: 195 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,14 @@
44
from datetime import timedelta
55
from io import BytesIO, StringIO
66
import json
7+
from urllib.parse import urljoin
78
import zipfile
89

10+
from django.conf import settings
911
from django.contrib.auth.models import User
1012
from django.core.files import File
13+
from django.core.files.storage import default_storage
14+
from django.db.models import Prefetch
1115
from django.utils.timezone import now
1216

1317
from bats_ai.celery import app
@@ -18,6 +22,28 @@
1822
RecordingTag,
1923
SequenceAnnotations,
2024
)
25+
from bats_ai.core.models.recording_annotation import RecordingAnnotationSpecies
26+
27+
RECORDING_ANNOTATION_EXPORT_SCHEMA_VERSION = 1
28+
29+
RECORDING_ANNOTATION_FLAT_FIELDNAMES = [
30+
"recording_id",
31+
"filename",
32+
"grts_cell_id",
33+
"sample_frame_id",
34+
"id",
35+
"owner",
36+
"comments",
37+
"created",
38+
"model",
39+
"species",
40+
"species_codes",
41+
"confidence",
42+
"submitted",
43+
"additional_data",
44+
"spectrogram_url",
45+
"wav_download_url",
46+
]
2147

2248

2349
def build_filters(filters, *, has_confidence=False):
@@ -179,6 +205,175 @@ def export_tag_annotation_summary_task(self, export_id: int):
179205
raise
180206

181207

208+
@app.task(bind=True)
209+
def export_recording_annotation_hierarchy_task(self, export_id: int):
210+
export_record = ExportedAnnotationFile.objects.get(pk=export_id)
211+
try:
212+
recordings_payload, flat_rows, manifest = _collect_recording_annotations_export()
213+
214+
buffer = BytesIO()
215+
with zipfile.ZipFile(buffer, "w", zipfile.ZIP_DEFLATED) as zipf:
216+
_write_recording_annotations_zip(
217+
zipf,
218+
recordings_payload,
219+
flat_rows,
220+
manifest,
221+
)
222+
223+
buffer.seek(0)
224+
filename = f"recording-annotations-{export_id}.zip"
225+
export_record.file.save(filename, File(buffer), save=False)
226+
export_record.download_url = export_record.file.url
227+
export_record.status = "complete"
228+
export_record.expires_at = now() + timedelta(hours=24)
229+
export_record.save()
230+
except Exception:
231+
export_record.status = "failed"
232+
export_record.save()
233+
raise
234+
235+
236+
def _recording_species_lists(annotation):
237+
common_names = []
238+
species_codes = []
239+
for species_link in annotation.ordered_species_links:
240+
species = species_link.species
241+
common_names.append(species.common_name)
242+
species_codes.append(species.species_code)
243+
return common_names, species_codes
244+
245+
246+
def _recording_annotation_entry_dict(annotation, species, species_codes):
247+
return {
248+
"id": annotation.id,
249+
"owner": annotation.owner.username,
250+
"comments": annotation.comments,
251+
"created": annotation.created.isoformat(),
252+
"model": annotation.model,
253+
"species": species,
254+
"species_codes": species_codes,
255+
"confidence": annotation.confidence,
256+
"additional_data": annotation.additional_data,
257+
"submitted": annotation.submitted,
258+
}
259+
260+
261+
def _recording_export_metadata(recording):
262+
return {
263+
"recording_id": recording.id,
264+
"filename": recording.name,
265+
"grts_cell_id": recording.grts_cell_id,
266+
"sample_frame_id": recording.sample_frame_id,
267+
"spectrogram_url": urljoin(
268+
settings.BATAI_WEB_URL,
269+
f"/recording/{recording.id}/spectrogram",
270+
),
271+
"wav_download_url": (
272+
default_storage.url(recording.audio_file.name) if recording.audio_file else None
273+
),
274+
}
275+
276+
277+
def _collect_recording_annotations_export():
278+
species_links_prefetch = Prefetch(
279+
"recordingannotationspecies_set",
280+
queryset=RecordingAnnotationSpecies.objects.select_related("species").order_by("order"),
281+
to_attr="ordered_species_links",
282+
)
283+
annotations = (
284+
RecordingAnnotation.objects.select_related("recording", "owner")
285+
.prefetch_related(species_links_prefetch)
286+
.order_by("recording_id", "id")
287+
)
288+
289+
recordings_by_id = {}
290+
flat_rows = []
291+
submitted_annotation_count = 0
292+
unsubmitted_annotation_count = 0
293+
294+
for annotation in annotations:
295+
recording = annotation.recording
296+
recording_entry = recordings_by_id.get(recording.id)
297+
if recording_entry is None:
298+
recording_metadata = _recording_export_metadata(recording)
299+
recording_entry = {
300+
**recording_metadata,
301+
"submitted_annotations": 0,
302+
"unsubmitted_annotations": 0,
303+
"annotations": [],
304+
}
305+
recordings_by_id[recording.id] = recording_entry
306+
307+
if annotation.submitted:
308+
recording_entry["submitted_annotations"] += 1
309+
submitted_annotation_count += 1
310+
else:
311+
recording_entry["unsubmitted_annotations"] += 1
312+
unsubmitted_annotation_count += 1
313+
314+
species, species_codes = _recording_species_lists(annotation)
315+
annotation_entry = _recording_annotation_entry_dict(
316+
annotation,
317+
species,
318+
species_codes,
319+
)
320+
recording_entry["annotations"].append(annotation_entry)
321+
flat_rows.append(
322+
{
323+
**_recording_export_metadata(recording),
324+
**annotation_entry,
325+
}
326+
)
327+
328+
recordings_payload = sorted(
329+
recordings_by_id.values(),
330+
key=lambda recording: recording["recording_id"],
331+
)
332+
annotation_count = submitted_annotation_count + unsubmitted_annotation_count
333+
manifest = {
334+
"export_type": "recording_annotation_hierarchy",
335+
"schema_version": RECORDING_ANNOTATION_EXPORT_SCHEMA_VERSION,
336+
"exported_at": now().isoformat(),
337+
"recording_count": len(recordings_by_id),
338+
"annotation_count": annotation_count,
339+
"submitted_annotation_count": submitted_annotation_count,
340+
"unsubmitted_annotation_count": unsubmitted_annotation_count,
341+
}
342+
return recordings_payload, flat_rows, manifest
343+
344+
345+
def _flat_row_for_csv(row):
346+
csv_row = {key: row.get(key) for key in RECORDING_ANNOTATION_FLAT_FIELDNAMES}
347+
for key in ("species", "species_codes"):
348+
value = csv_row.get(key)
349+
if isinstance(value, list):
350+
csv_row[key] = "|".join("" if item is None else str(item) for item in value)
351+
if csv_row.get("additional_data") is not None:
352+
csv_row["additional_data"] = json.dumps(csv_row["additional_data"])
353+
return csv_row
354+
355+
356+
def _write_recording_annotations_zip(zipf, recordings_payload, flat_rows, manifest):
357+
zipf.writestr("export_manifest.json", json.dumps(manifest))
358+
zipf.writestr(
359+
"recording_annotations.json",
360+
json.dumps({"recordings": recordings_payload}),
361+
)
362+
if not flat_rows:
363+
return
364+
365+
zipf.writestr(
366+
"recording_annotations_flat.json",
367+
json.dumps(flat_rows),
368+
)
369+
csv_buf = StringIO()
370+
writer = csv.DictWriter(csv_buf, fieldnames=RECORDING_ANNOTATION_FLAT_FIELDNAMES)
371+
writer.writeheader()
372+
for row in flat_rows:
373+
writer.writerow(_flat_row_for_csv(row))
374+
zipf.writestr("recording_annotations_flat.csv", csv_buf.getvalue())
375+
376+
182377
def _collect_tag_summary_rows():
183378
tag_rows = []
184379
tag_user_rows = []

bats_ai/core/views/configuration.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,10 @@
99
from ninja.pagination import RouterPaginated
1010

1111
from bats_ai.core.models import Configuration, ExportedAnnotationFile
12-
from bats_ai.core.tasks.export_task import export_tag_annotation_summary_task
12+
from bats_ai.core.tasks.export_task import (
13+
export_recording_annotation_hierarchy_task,
14+
export_tag_annotation_summary_task,
15+
)
1316

1417
logger = logging.getLogger(__name__)
1518

@@ -99,3 +102,20 @@ def export_tag_summary(request):
99102
)
100103
export_tag_annotation_summary_task.delay(export.id)
101104
return {"exportId": export.id}
105+
106+
107+
@router.post(
108+
"/export-recording-annotations",
109+
response=ExportTagSummaryResponse,
110+
)
111+
def export_recording_annotations(request):
112+
if not request.user.is_authenticated or not request.user.is_superuser:
113+
return JsonResponse({"error": "Permission denied"}, status=403)
114+
115+
export = ExportedAnnotationFile.objects.create(
116+
filters_applied={"type": "recording_annotation_hierarchy"},
117+
status="pending",
118+
expires_at=now() + timedelta(hours=24),
119+
)
120+
export_recording_annotation_hierarchy_task.delay(export.id)
121+
return {"exportId": export.id}

bats_ai/core/views/export_annotation.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,22 @@ def _is_tag_annotation_summary_export(export: ExportedAnnotationFile) -> bool:
2323
)
2424

2525

26+
def _is_recording_annotation_hierarchy_export(
27+
export: ExportedAnnotationFile,
28+
) -> bool:
29+
filters_applied = export.filters_applied
30+
return (
31+
isinstance(filters_applied, dict)
32+
and filters_applied.get("type") == "recording_annotation_hierarchy"
33+
)
34+
35+
2636
def _can_access_export(request, export: ExportedAnnotationFile) -> bool:
2737
# Tag annotation summary exports include user-level aggregate stats,
2838
# so only admins can access them.
29-
if _is_tag_annotation_summary_export(export):
39+
if _is_tag_annotation_summary_export(export) or _is_recording_annotation_hierarchy_export(
40+
export
41+
):
3042
return request.user.is_authenticated and request.user.is_superuser
3143
return True
3244

client/src/api/api.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -787,6 +787,13 @@ async function exportTagSummary(): Promise<ExportTagSummaryResponse> {
787787
return result.data;
788788
}
789789

790+
async function exportRecordingAnnotations(): Promise<ExportTagSummaryResponse> {
791+
const result = await axiosInstance.post<ExportTagSummaryResponse>(
792+
"/configuration/export-recording-annotations",
793+
);
794+
return result.data;
795+
}
796+
790797
export interface VettingDetails {
791798
id: number;
792799
user_id: number;
@@ -911,6 +918,7 @@ export {
911918
getFileAnnotationDetails,
912919
getExportStatus,
913920
exportTagSummary,
921+
exportRecordingAnnotations,
914922
getRecordingTags,
915923
getUnsubmittedNeighbors,
916924
getComputedPulseContour,

client/src/views/Admin.vue

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
<script lang="ts">
22
import { reactive, defineComponent, watch, ref, type Ref } from "vue";
33
import useState from "@use/useState";
4-
import { exportTagSummary, patchConfiguration } from "../api/api";
4+
import {
5+
exportRecordingAnnotations,
6+
exportTagSummary,
7+
patchConfiguration,
8+
} from "../api/api";
59
import NABatAdmin from "./NABat/NABatAdmin.vue";
610
import ColorPickerMenu from "@components/ColorPickerMenu.vue";
711
import ColorSchemeSelect from "@components/ColorSchemeSelect.vue";
@@ -86,6 +90,11 @@ export default defineComponent({
8690
exportId.value = result.exportId;
8791
};
8892
93+
const runRecordingAnnotationsExport = async () => {
94+
const result = await exportRecordingAnnotations();
95+
exportId.value = result.exportId;
96+
};
97+
8998
const clearExport = () => {
9099
exportId.value = null;
91100
};
@@ -99,6 +108,7 @@ export default defineComponent({
99108
defaultColorScheme,
100109
exportId,
101110
runTagSummaryExport,
111+
runRecordingAnnotationsExport,
102112
clearExport,
103113
};
104114
},
@@ -208,6 +218,14 @@ export default defineComponent({
208218
>
209219
Export Tag Annotation Summary
210220
</v-btn>
221+
<v-btn
222+
color="secondary"
223+
variant="outlined"
224+
class="mx-2"
225+
@click="runRecordingAnnotationsExport"
226+
>
227+
Export Recording Annotations
228+
</v-btn>
211229
</v-row>
212230
</v-card-actions>
213231
<Exporting

0 commit comments

Comments
 (0)