Export tag summary (#505)

BryonLewis · web-flow · commit 917495f86b9a · 2026-05-07T13:57:13.000-04:00
* unify dev containers and docker compose up

* add export task for tags

* add endpoint for exporting task summary

* front-end for exporting tag summaries
diff --git a/bats_ai/core/tasks/__init__.py b/bats_ai/core/tasks/__init__.py
@@ -0,0 +1,4 @@
+from __future__ import annotations
+
+# Import task modules so Celery autodiscovery registers decorated tasks.
+from . import export_task, periodic, tasks  # noqa: F401
diff --git a/bats_ai/core/tasks/export_task.py b/bats_ai/core/tasks/export_task.py
@@ -2,10 +2,11 @@
 
 import csv
 from datetime import timedelta
-from io import BytesIO
+from io import BytesIO, StringIO
 import json
 import zipfile
 
+from django.contrib.auth.models import User
 from django.core.files import File
 from django.utils.timezone import now
 
@@ -14,6 +15,7 @@
     Annotations,
     ExportedAnnotationFile,
     RecordingAnnotation,
+    RecordingTag,
     SequenceAnnotations,
 )
 
@@ -152,3 +154,214 @@ def export_annotations_task(filters: dict, annotation_types: list, export_id: in
         export_record.status = "failed"
         export_record.save()
         raise
+
+
+@app.task(bind=True)
+def export_tag_annotation_summary_task(self, export_id: int):
+    export_record = ExportedAnnotationFile.objects.get(pk=export_id)
+    try:
+        tag_rows, tag_user_rows = _collect_tag_summary_rows()
+
+        buffer = BytesIO()
+        with zipfile.ZipFile(buffer, "w", zipfile.ZIP_DEFLATED) as zipf:
+            _write_tag_exports(zipf, tag_rows, tag_user_rows)
+
+        buffer.seek(0)
+        filename = f"tag-annotation-summary-{export_id}.zip"
+        export_record.file.save(filename, File(buffer), save=False)
+        export_record.download_url = export_record.file.url
+        export_record.status = "complete"
+        export_record.expires_at = now() + timedelta(hours=24)
+        export_record.save()
+    except Exception:
+        export_record.status = "failed"
+        export_record.save()
+        raise
+
+
+def _collect_tag_summary_rows():
+    tag_rows = []
+    tag_user_rows = []
+    users = list(User.objects.order_by("username").values("id", "username"))
+    tags = RecordingTag.objects.select_related("user").prefetch_related(
+        "recording_set__recordingannotation_set__owner"
+    )
+
+    for tag in tags:
+        tag_row, user_rows = _build_rows_for_tag(tag, users)
+        tag_rows.append(tag_row)
+        tag_user_rows.extend(user_rows)
+
+    return tag_rows, tag_user_rows
+
+
+def _build_rows_for_tag(tag, users):
+    recordings = list(tag.recording_set.all())
+    total_recordings = len(recordings)
+    annotations_by_user = _group_recording_annotations_by_user(recordings)
+    annotated_total, submitted_total, unsubmitted_total = _collect_total_sets(annotations_by_user)
+
+    tag_row = {
+        "tag_id": tag.id,
+        "tag_text": tag.text,
+        "tag_owner": tag.user.username,
+        "total_recordings": total_recordings,
+        "annotated_recordings": len(annotated_total),
+        "submitted_recordings": len(submitted_total),
+        "unsubmitted_recordings": len(unsubmitted_total),
+        "remaining_recordings": total_recordings - len(annotated_total),
+    }
+    user_rows = _build_user_rows(
+        tag,
+        total_recordings,
+        annotations_by_user,
+        users,
+    )
+    return tag_row, user_rows
+
+
+def _group_recording_annotations_by_user(recordings):
+    annotations_by_user = {}
+    for recording in recordings:
+        for annotation in recording.recordingannotation_set.all():
+            key = annotation.owner_id
+            if key not in annotations_by_user:
+                annotations_by_user[key] = {
+                    "username": annotation.owner.username,
+                    "annotated_recordings": set(),
+                    "submitted_recordings": set(),
+                    "unsubmitted_recordings": set(),
+                }
+            user_stats = annotations_by_user[key]
+            user_stats["annotated_recordings"].add(recording.id)
+            if annotation.submitted:
+                user_stats["submitted_recordings"].add(recording.id)
+            else:
+                user_stats["unsubmitted_recordings"].add(recording.id)
+    return annotations_by_user
+
+
+def _collect_total_sets(annotations_by_user):
+    annotated_total = set()
+    submitted_total = set()
+    unsubmitted_total = set()
+    for user_stats in annotations_by_user.values():
+        annotated_total.update(user_stats["annotated_recordings"])
+        submitted_total.update(user_stats["submitted_recordings"])
+        unsubmitted_total.update(user_stats["unsubmitted_recordings"])
+    return annotated_total, submitted_total, unsubmitted_total
+
+
+def _build_user_rows(tag, total_recordings, annotations_by_user, users):
+    user_rows = []
+    for user in users:
+        owner_id = user["id"]
+        username = user["username"]
+        user_stats = annotations_by_user.get(owner_id)
+        if user_stats is None:
+            user_stats = {
+                "username": username,
+                "annotated_recordings": set(),
+                "submitted_recordings": set(),
+                "unsubmitted_recordings": set(),
+            }
+        annotated_count = len(user_stats["annotated_recordings"])
+        user_rows.append(
+            {
+                "tag_id": tag.id,
+                "tag_text": tag.text,
+                "tag_owner": tag.user.username,
+                "user_id": owner_id,
+                "username": username,
+                "total_recordings": total_recordings,
+                "annotated_recordings": annotated_count,
+                "submitted_recordings": len(user_stats["submitted_recordings"]),
+                "unsubmitted_recordings": len(user_stats["unsubmitted_recordings"]),
+                "remaining_recordings": total_recordings - annotated_count,
+            }
+        )
+    return user_rows
+
+
+def _write_tag_exports(zipf, tag_rows, tag_user_rows):
+    tag_fieldnames = [
+        "tag_id",
+        "tag_text",
+        "tag_owner",
+        "total_recordings",
+        "annotated_recordings",
+        "submitted_recordings",
+        "unsubmitted_recordings",
+        "remaining_recordings",
+    ]
+    tag_user_fieldnames = [
+        "tag_id",
+        "tag_text",
+        "tag_owner",
+        "user_id",
+        "username",
+        "total_recordings",
+        "annotated_recordings",
+        "submitted_recordings",
+        "unsubmitted_recordings",
+        "remaining_recordings",
+    ]
+
+    tag_csv_buf = StringIO()
+    tag_writer = csv.DictWriter(tag_csv_buf, fieldnames=tag_fieldnames)
+    tag_writer.writeheader()
+    for row in tag_rows:
+        tag_writer.writerow(row)
+    zipf.writestr("tag_summary.csv", tag_csv_buf.getvalue())
+
+    tag_user_csv_buf = StringIO()
+    tag_user_writer = csv.DictWriter(tag_user_csv_buf, fieldnames=tag_user_fieldnames)
+    tag_user_writer.writeheader()
+    for row in tag_user_rows:
+        tag_user_writer.writerow(row)
+    zipf.writestr("tag_summary_by_user.csv", tag_user_csv_buf.getvalue())
+
+    users_payload = _build_users_payload(tag_user_rows)
+    zipf.writestr(
+        "tag_annotation_summary.json",
+        json.dumps(
+            {
+                "users": users_payload,
+            },
+            indent=2,
+        ),
+    )
+
+
+def _build_users_payload(tag_user_rows):
+    users_by_id = {}
+    for row in tag_user_rows:
+        user_id = row["user_id"]
+        if user_id not in users_by_id:
+            users_by_id[user_id] = {
+                "user_id": user_id,
+                "username": row["username"],
+                "tags": [],
+            }
+
+        tag_entry = {
+            "tag_id": row["tag_id"],
+            "tag_text": row["tag_text"],
+            "tag_owner": row["tag_owner"],
+            "has_annotations": row["annotated_recordings"] > 0,
+        }
+        if row["annotated_recordings"] > 0:
+            tag_entry.update(
+                {
+                    "total_recordings": row["total_recordings"],
+                    "annotated_recordings": row["annotated_recordings"],
+                    "submitted_recordings": row["submitted_recordings"],
+                    "unsubmitted_recordings": row["unsubmitted_recordings"],
+                    "remaining_recordings": row["remaining_recordings"],
+                }
+            )
+        else:
+            tag_entry["annotated_recordings"] = 0
+        users_by_id[user_id]["tags"].append(tag_entry)
+
+    return sorted(users_by_id.values(), key=lambda user: user["username"])
diff --git a/bats_ai/core/views/configuration.py b/bats_ai/core/views/configuration.py
@@ -1,12 +1,15 @@
 from __future__ import annotations
 
+from datetime import timedelta
 import logging
 
 from django.http import JsonResponse
+from django.utils.timezone import now
 from ninja import Schema
 from ninja.pagination import RouterPaginated
 
-from bats_ai.core.models import Configuration
+from bats_ai.core.models import Configuration, ExportedAnnotationFile
+from bats_ai.core.tasks.export_task import export_tag_annotation_summary_task
 
 logger = logging.getLogger(__name__)
 
@@ -41,9 +44,9 @@ def get_configuration(request):
         spectrogram_x_stretch=config.spectrogram_x_stretch,
         spectrogram_view=config.spectrogram_view,
         default_color_scheme=config.default_color_scheme,
-        default_spectrogram_background_color=config.default_spectrogram_background_color,
+        default_spectrogram_background_color=(config.default_spectrogram_background_color),
         non_admin_upload_enabled=config.non_admin_upload_enabled,
-        mark_annotations_completed_enabled=config.mark_annotations_completed_enabled,
+        mark_annotations_completed_enabled=(config.mark_annotations_completed_enabled),
         is_admin=request.user.is_authenticated and request.user.is_superuser,
     )
 
@@ -78,3 +81,21 @@ def get_current_user(request):
             "id": request.user.id,
         }
     return {"email": "", "name": ""}
+
+
+class ExportTagSummaryResponse(Schema):
+    exportId: int
+
+
+@router.post("/export-tag-summary", response=ExportTagSummaryResponse)
+def export_tag_summary(request):
+    if not request.user.is_authenticated or not request.user.is_superuser:
+        return JsonResponse({"error": "Permission denied"}, status=403)
+
+    export = ExportedAnnotationFile.objects.create(
+        filters_applied={"type": "tag_annotation_summary"},
+        status="pending",
+        expires_at=now() + timedelta(hours=24),
+    )
+    export_tag_annotation_summary_task.delay(export.id)
+    return {"exportId": export.id}
diff --git a/bats_ai/core/views/export_annotation.py b/bats_ai/core/views/export_annotation.py
@@ -15,6 +15,22 @@
 router = Router()
 
 
+def _is_tag_annotation_summary_export(export: ExportedAnnotationFile) -> bool:
+    filters_applied = export.filters_applied
+    return (
+        isinstance(filters_applied, dict)
+        and filters_applied.get("type") == "tag_annotation_summary"
+    )
+
+
+def _can_access_export(request, export: ExportedAnnotationFile) -> bool:
+    # Tag annotation summary exports include user-level aggregate stats,
+    # so only admins can access them.
+    if _is_tag_annotation_summary_export(export):
+        return request.user.is_authenticated and request.user.is_superuser
+    return True
+
+
 class ExportedAnnotationFileSchema(BaseModel):
     id: int
     status: str
@@ -37,16 +53,19 @@ def list_exports(request):
             expiresAt=e.expires_at,
         )
         for e in exports
+        if _can_access_export(request, e)
     ]
 
 
 @router.get("/{export_id}", response=ExportedAnnotationFileSchema)
 def get_export_status(request, export_id: int):
     export = get_object_or_404(ExportedAnnotationFile, pk=export_id)
+    if not _can_access_export(request, export):
+        return JsonResponse({"error": "Permission denied"}, status=403)
     return ExportedAnnotationFileSchema(
         id=export.id,
         status=export.status,
-        downloadUrl=export.download_url if export.status == "complete" else None,
+        downloadUrl=(export.download_url if export.status == "complete" else None),
         created=export.created,
         expiresAt=export.expires_at,
     )
@@ -55,6 +74,8 @@ def get_export_status(request, export_id: int):
 @router.delete("/{export_id}")
 def delete_export(request, export_id: int):
     export = get_object_or_404(ExportedAnnotationFile, pk=export_id)
+    if not _can_access_export(request, export):
+        return JsonResponse({"error": "Permission denied"}, status=403)
 
     # Optional: block deleting exports still in progress
     if export.status not in ("complete", "failed", "expired"):
diff --git a/client/src/api/api.ts b/client/src/api/api.ts
@@ -769,13 +769,24 @@ export interface ExportStatus {
   expiresAt: string;
 }
 
+export interface ExportTagSummaryResponse {
+  exportId: number;
+}
+
 async function getExportStatus(exportId: number) {
   const result = await axiosInstance.get<ExportStatus>(
     `/export-annotation/${exportId}`,
   );
   return result.data;
 }
 
+async function exportTagSummary(): Promise<ExportTagSummaryResponse> {
+  const result = await axiosInstance.post<ExportTagSummaryResponse>(
+    "/configuration/export-tag-summary",
+  );
+  return result.data;
+}
+
 export interface VettingDetails {
   id: number;
   user_id: number;
@@ -899,6 +910,7 @@ export {
   getFilteredProcessingTasks,
   getFileAnnotationDetails,
   getExportStatus,
+  exportTagSummary,
   getRecordingTags,
   getUnsubmittedNeighbors,
   getComputedPulseContour,
diff --git a/client/src/views/Admin.vue b/client/src/views/Admin.vue
diff --git a/docker-compose.override.yml b/docker-compose.override.yml