docs: finalize JOSS paper draft, bibliography, and submission checklist

InfantLab · claude · InfantLab · commit 6629003132c2 · 2026-02-27T15:33:12.000Z
Add paper/checklist.md tracking JOSS submission requirements.
Update paper.md and paper.bib with final references and formatting.
Update CITATION.cff with author ORCIDs, README with badges/structure,
and devcontainer.json configuration.

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -1,63 +1,66 @@
 {
-  "name": "VideoAnnotator (GPU)",
-  "build": {
-    "dockerfile": "../Dockerfile.gpu",
-    "context": "..",
-    "args": {
-      "SKIP_IMAGE_UV_SYNC": "false",
-      "SKIP_TORCH_INSTALL": "false"
-    }
-  },
-  "runArgs": [
-    "--gpus",
-    "all"
-  ],
-  "features": {},
-  "forwardPorts": [
-    18011,
-    18012,
-    18013,
-    18014,
-    18015,
-    19011,
-    19012,
-    19013,
-    19014,
-    19015
-  ],
-  "portsAttributes": {
-    "18011": {
-      "label": "VideoAnnotator API (default)",
-      "onAutoForward": "notify"
-    },
-    "19011": {
-      "label": "Video Annotation Viewer",
-      "onAutoForward": "notify"
-    }
-  },
-  "postCreateCommand": "uv sync && uv sync --extra dev && HADOLINT_DEST_DIR=/usr/local/bin bash scripts/install_hadolint.sh && uv run pre-commit install",
-  "containerEnv": {
-    "UV_LINK_MODE": "copy"
-  },
-  "customizations": {
-    "vscode": {
-      "settings": {
-        "python.defaultInterpreterPath": ".venv/bin/python",
-        "python.formatting.provider": "none",
-        "[python]": {
-          "editor.defaultFormatter": "astral-sh.ruff",
-          "editor.formatOnSave": true,
-          "editor.codeActionsOnSave": {
-            "source.organizeImports": true
-          }
-        }
-      },
-      "extensions": [
-        "astral-sh.ruff",
-        "ms-python.python",
-        "ms-python.vscode-pylance",
-        "GitHub.copilot-chat"
-      ]
-    }
-  }
+    "name":  "VideoAnnotator (GPU)",
+    "build":  {
+                  "dockerfile":  "../Dockerfile.gpu",
+                  "context":  "..",
+                  "args":  {
+                               "SKIP_IMAGE_UV_SYNC":  "true",
+                               "SKIP_TORCH_INSTALL":  "true"
+                           }
+              },
+    "runArgs":  [
+                    "--gpus",
+                    "all"
+                ],
+    "features":  {
+
+                 },
+    "forwardPorts":  [
+                         18011,
+                         18012,
+                         18013,
+                         18014,
+                         18015,
+                         19011,
+                         19012,
+                         19013,
+                         19014,
+                         19015
+                     ],
+    "portsAttributes":  {
+                            "18011":  {
+                                          "label":  "VideoAnnotator API (default)",
+                                          "onAutoForward":  "notify"
+                                      },
+                            "19011":  {
+                                          "label":  "Video Annotation Viewer",
+                                          "onAutoForward":  "notify"
+                                      }
+                        },
+    "postCreateCommand":  "uv sync \u0026\u0026 uv sync --extra dev \u0026\u0026 HADOLINT_DEST_DIR=/usr/local/bin bash scripts/install_hadolint.sh \u0026\u0026 uv run pre-commit install",
+    "containerEnv":  {
+                         "UV_LINK_MODE":  "copy"
+                     },
+    "customizations":  {
+                           "vscode":  {
+                                          "settings":  {
+                                                           "python.defaultInterpreterPath":  ".venv/bin/python",
+                                                           "python.formatting.provider":  "none",
+                                                           "[python]":  {
+                                                                            "editor.defaultFormatter":  "astral-sh.ruff",
+                                                                            "editor.formatOnSave":  true,
+                                                                            "editor.codeActionsOnSave":  {
+                                                                                                             "source.organizeImports":  true
+                                                                                                         }
+                                                                        }
+                                                       },
+                                          "extensions":  [
+                                                             "astral-sh.ruff",
+                                                             "ms-python.python",
+                                                             "ms-python.vscode-pylance",
+                                                             "GitHub.copilot-chat"
+                                                         ]
+                                      }
+                       },
+    "shutdownAction":  "stopContainer"
 }
diff --git a/CITATION.cff b/CITATION.cff
@@ -9,18 +9,23 @@ authors:
   - family-names: Ishaya
     given-names: Jeremiah
     affiliation: "Institute for Life Course Health Research (ILCHR), Stellenbosch University"
+    orcid: "https://orcid.org/0000-0002-9014-9372"
   - family-names: Uwerikowe
     given-names: Irene
     affiliation: "Institute for Life Course Health Research (ILCHR), Stellenbosch University"
+    orcid: "https://orcid.org/0000-0002-1293-7349"
   - family-names: Stamate
     given-names: Daniel
     affiliation: "Department of Computing, Goldsmiths, University of London"
+    orcid: "https://orcid.org/0000-0001-8565-6890"
   - family-names: Lachman
     given-names: Jamie
     affiliation: "Department of Social Policy and Intervention (DISP), University of Oxford"
+    orcid: "https://orcid.org/0000-0001-9475-9218"
   - family-names: Tomlinson
     given-names: Mark
     affiliation: "Institute for Life Course Health Research (ILCHR), Stellenbosch University"
+    orcid: "https://orcid.org/0000-0001-5846-3444"
 license: "MIT"
 repository-code: "https://github.com/InfantLab/VideoAnnotator"
 version: "1.4.1"
diff --git a/README.md b/README.md
@@ -110,29 +110,35 @@ Additional Specs:
 - CLI Validation: `uv run videoannotator validate-emotion path/to/file.emotion.json` returns non-zero exit on failure
   Client tools (e.g. the Video Annotation Viewer) should rely on those sources or the `/api/v1/pipelines` endpoint rather than hard-coding pipeline assumptions.
 
-### **Person Tracking Pipeline**
+### Person Tracking (1 pipeline)
 
-- **Technology**: YOLO11 + ByteTrack multi-object tracking
-- **Outputs**: Bounding boxes, pose keypoints, persistent person IDs
-- **Use cases**: Movement analysis, social interaction tracking, activity recognition
+| Pipeline | Technology | Outputs | Stability |
+|----------|-----------|---------|-----------|
+| **Person Tracking & Pose** | YOLO11 + ByteTrack | COCO bounding boxes, 17-point pose keypoints, persistent person IDs | beta |
 
-### **Face Analysis Pipeline**
+### Face Analysis (3 pipelines)
 
-- **Technology**: [OpenFace 3.0](https://github.com/CMU-MultiComp-Lab/OpenFace-3.0), LAION Face ([LAION](https://laion.ai/)), OpenCV backends
-- **Outputs**: 68-point landmarks, emotions, action units, gaze direction, head pose
-- **Use cases**: Emotional analysis, attention tracking, facial expression studies
+| Pipeline | Technology | Outputs | Stability |
+|----------|-----------|---------|-----------|
+| **Face Analysis** | DeepFace (TensorFlow/OpenCV) | Emotion labels, age/gender, action units | stable |
+| **LAION CLIP Face Embedding** | LAION CLIP-derived model | 512-D semantic embeddings, zero-shot attribute & emotion tagging | experimental |
+| **OpenFace3 Face Embedding** | OpenFace 3.0 (ONNX/PyTorch) | 512-D face embeddings for recognition or clustering | experimental |
 
-### **Scene Detection Pipeline**
+### Scene Detection (1 pipeline)
 
-- **Technology**: PySceneDetect + CLIP environment classification
-- **Outputs**: Scene boundaries, environment labels, temporal segmentation
-- **Use cases**: Context analysis, setting classification, behavioral context
+| Pipeline | Technology | Outputs | Stability |
+|----------|-----------|---------|-----------|
+| **Scene Detection** | PySceneDetect + CLIP | Scene boundaries, environment classification, temporal segmentation | beta |
 
-### **Audio Processing Pipeline**
+### Audio Processing (4 pipelines + 1 combined)
 
-- **Technology**: OpenAI Whisper + pyannote speaker diarization
-- **Outputs**: Speech transcripts, speaker identification, voice emotions
-- **Use cases**: Conversation analysis, language development, vocal behavior
+| Pipeline | Technology | Outputs | Stability |
+|----------|-----------|---------|-----------|
+| **Speech Recognition** | OpenAI Whisper | WebVTT transcripts with word-level timestamps | stable |
+| **Speaker Diarization** | pyannote.audio | RTTM speaker turns with timestamps | stable |
+| **Audio Processing** | Whisper + pyannote (combined) | WebVTT transcripts + RTTM speaker turns | beta |
+| **LAION Empathic Voice** | LAION Empathic Insight + Whisper embeddings | Emotion segments, empathic scores, emotion timeline | stable |
+| **Voice Emotion Baseline** | Spectral CNN over Whisper embeddings | _(planned — not yet implemented)_ | experimental |
 
 ## 💡 Why VideoAnnotator?
 
@@ -275,8 +281,11 @@ docker run -p 18011:18011 --gpus all videoannotator:dev
 
 - **FastAPI** - High-performance REST API with automatic documentation
 - **YOLO11** - State-of-the-art object detection and pose estimation
-- **OpenFace 3.0** - Comprehensive facial behavior analysis
+- **DeepFace / OpenFace 3.0 / LAION CLIP** - Facial analysis, embeddings, and emotion recognition
 - **Whisper** - Robust speech recognition and transcription
+- **pyannote.audio** - Speaker diarization and segmentation
+- **LAION Empathic Insight** - Voice emotion analysis from Whisper embeddings
+- **PySceneDetect + CLIP** - Scene boundary detection and environment classification
 - **PyTorch** - GPU-accelerated machine learning inference
 
 ### **Performance Characteristics**
@@ -340,9 +349,13 @@ MIT License - Full terms in [LICENSE](LICENSE)
 
 Built with and grateful to:
 
-- **[YOLO & Ultralytics](https://ultralytics.com/)** - Object detection and tracking
-- **[OpenFace 3.0](https://github.com/CMU-MultiComp-Lab/OpenFace-3.0)** - Facial behavior analysis
+- **[YOLO & Ultralytics](https://ultralytics.com/)** - Object detection, tracking, and pose estimation
+- **[DeepFace](https://github.com/serengil/deepface)** - Face detection and emotion recognition
+- **[OpenFace 3.0](https://github.com/CMU-MultiComp-Lab/OpenFace-3.0)** - Facial behavior analysis and embeddings
+- **[LAION](https://laion.ai/)** - CLIP face embeddings and empathic voice emotion models
 - **[OpenAI Whisper](https://github.com/openai/whisper)** - Speech recognition
+- **[pyannote.audio](https://github.com/pyannote/pyannote-audio)** - Speaker diarization
+- **[PySceneDetect](https://www.scenedetect.com/)** - Scene boundary detection
 - **[FastAPI](https://github.com/tiangolo/fastapi)** - Modern web framework
 - **[PyTorch](https://pytorch.org/)** - Machine learning infrastructure
 
diff --git a/paper/checklist.md b/paper/checklist.md
@@ -0,0 +1,74 @@
+# JOSS Submission Checklist for VideoAnnotator
+
+Based on https://joss.readthedocs.io/en/latest/submitting.html and
+https://joss.readthedocs.io/en/latest/review_checklist.html (checked 2026-02-27).
+
+---
+
+## Paper (paper.md)
+
+- [x] Word count within 750–1750 range (~1255 words)
+- [x] Summary section — describes all 10 pipelines across 4 modalities
+- [x] Statement of need section
+- [x] State of the field section — covers ELAN, Datavyu, DeepLabCut, YOLO, Py-Feat, OpenFace, openSMILE, PySceneDetect, pyannote
+- [x] Software design section — four architectural layers with trade-offs
+- [x] Research impact statement section — GPI/Stellenbosch/Oxford context, pilot corpus
+- [x] AI usage disclosure section — Copilot and Claude; code/test/docs scope; human-review assertion
+- [x] Quality control section
+- [x] Statement of limitations section
+- [x] Acknowledgements with funding
+- [x] YAML frontmatter (title, tags, authors, affiliations, date, bibliography)
+- [x] Corresponding author marked (`corresponding: true`)
+- [x] All 6 authors have ORCIDs
+
+## Bibliography (paper.bib)
+
+- [x] Correct entry types (`@inproceedings`, `@article`, `@software`, `@book`)
+- [x] DOIs present where available
+- [x] Full venue names (not abbreviated)
+- [x] 14 references covering all cited tools and upstream models
+- [x] Companion project (Video Annotation Viewer) cited
+
+## Repository & Metadata
+
+- [x] OSI-approved license (MIT) with plain-text LICENSE file
+- [x] Open repository on GitHub (InfantLab/VideoAnnotator)
+- [x] Public issue tracker (GitHub Issues)
+- [x] 6+ months public history (Sep 2023 – present, 2.5 years, 236 commits)
+- [x] Multiple releases (v0.5alpha through v1.4.1, 8 tags)
+- [x] CITATION.cff present, matches paper metadata, all ORCIDs included
+- [x] Dependency management (pyproject.toml + uv.lock)
+
+## Documentation
+
+- [x] Installation docs (uv, Docker, DevContainer)
+- [x] Usage examples (examples/ directory, 7+ scripts)
+- [x] API and usage documentation (docs/ directory)
+- [x] README with badges, quick start, architecture, pipeline tables
+- [x] Docker support (CPU/GPU/Dev Dockerfiles + compose)
+
+## Testing & CI
+
+- [x] Automated tests (pytest, 64 test files, ~94% pass rate)
+- [x] CI/CD pipeline (GitHub Actions: test matrix, lint, type check, security scan, build)
+
+## Community Guidelines
+
+- [x] CONTRIBUTING.md (476 lines — setup, style, PR process, release process)
+- [x] CODE_OF_CONDUCT.md (Contributor Covenant v2.1 + research ethics)
+- [x] SECURITY.md (disclosure process, response timeline, compliance)
+
+---
+
+## Before submission
+
+- [x] v1.4.1 git tag created locally
+- [ ] Push tag to remote: `git push origin v1.4.1`
+- [ ] Create GitHub Release for v1.4.1
+- [ ] Improve git contributor attribution (only 1 contributor visible in history)
+
+## After acceptance
+
+- [ ] Create a Zenodo archive and obtain a DOI for the archived version
+- [ ] Update the JOSS review issue with the version number and archive DOI
+- [ ] Ensure the GitHub Release matches the tagged version
diff --git a/paper/paper.bib b/paper/paper.bib
diff --git a/paper/paper.md b/paper/paper.md