just-be-dev
diff --git a/‎.github/workflows/ci.yml‎
Lines changed: 3 additions & 0 deletions b/‎.github/workflows/ci.yml‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎mise.toml‎
Lines changed: 7 additions & 0 deletions b/‎mise.toml‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎scripts/transcribe.py‎
Lines changed: 104 additions & 0 deletions b/‎scripts/transcribe.py‎
Lines changed: 104 additions & 0 deletions
diff --git a/‎scripts/upload-assets.ts‎
Lines changed: 9 additions & 7 deletions b/‎scripts/upload-assets.ts‎
Lines changed: 9 additions & 7 deletions
@@ -3,6 +3,9 @@ name: CI
 on:
   push:
 
+env:
+  MISE_DISABLE_TOOLS: ffmpeg
+
 jobs:
   build-and-test:
     runs-on: ubuntu-latest
 
@@ -1,6 +1,9 @@
 [tools]
 bun = "1.3.5"
 lefthook = "2.0.13"
+python = "3.14.2"
+ffmpeg = "latest"
+uv = "latest"
 
 [settings]
 experimental = true
@@ -43,3 +46,7 @@ description = "Upload assets (images, audio) to R2 and update manifest"
 [tasks.deploy]
 run = "wrangler deploy"
 depends = ["build"]
+
+[tasks.transcribe]
+run = "uv run --with openai-whisper python scripts/transcribe.py"
+description = "Generate word-level transcript from audio/video"
@@ -0,0 +1,104 @@
+#!/usr/bin/env python3
+"""
+Generate word-level transcript from audio/video using OpenAI Whisper.
+
+Usage:
+  python scripts/transcribe.py <input-file> <output-json> [model-size]
+
+Example:
+  python scripts/transcribe.py \
+    public/assets/talks/codegen-in-rust/audio.m4a \
+    public/transcripts/t2468.json \
+    medium
+"""
+
+import sys
+import json
+from pathlib import Path
+
+def transcribe_file(input_path: str, output_path: str, model_size: str = "base"):
+    """
+    Transcribe audio/video file with word-level timestamps.
+
+    Args:
+        input_path: Path to audio/video file
+        output_path: Path to output JSON file
+        model_size: Whisper model size (tiny, base, small, medium, large)
+    """
+    try:
+        import whisper
+    except ImportError:
+        print("Error: openai-whisper is not installed.")
+        print("Install it with: pip install openai-whisper")
+        sys.exit(1)
+
+    print(f"Loading Whisper model: {model_size}")
+    model = whisper.load_model(model_size)
+
+    print(f"Transcribing: {input_path}")
+    print("This may take a while depending on file length and model size...")
+
+    result = model.transcribe(
+        input_path,
+        word_timestamps=True,  # Critical for word-level highlighting
+        language="en"  # Can be removed for auto-detection
+    )
+
+    # Build transcript structure
+    transcript = {
+        "language": result.get("language", "en"),
+        "duration": 0,
+        "segments": []
+    }
+
+    total_words = 0
+
+    for segment in result.get("segments", []):
+        seg_data = {
+            "start": round(segment["start"], 3),
+            "end": round(segment["end"], 3),
+            "text": segment["text"].strip(),
+            "words": []
+        }
+
+        # Extract word-level timestamps if available
+        if "words" in segment:
+            for word in segment["words"]:
+                seg_data["words"].append({
+                    "word": word["word"].strip(),
+                    "start": round(word["start"], 3),
+                    "end": round(word["end"], 3)
+                })
+                total_words += 1
+
+        transcript["segments"].append(seg_data)
+        transcript["duration"] = max(transcript["duration"], seg_data["end"])
+
+    # Ensure output directory exists
+    output_file = Path(output_path)
+    output_file.parent.mkdir(parents=True, exist_ok=True)
+
+    # Write JSON
+    with open(output_file, 'w', encoding='utf-8') as f:
+        json.dump(transcript, f, indent=2, ensure_ascii=False)
+
+    print(f"\n✓ Transcript saved to: {output_path}")
+    print(f"  Language: {transcript['language']}")
+    print(f"  Duration: {transcript['duration']:.1f}s ({transcript['duration'] / 60:.1f}min)")
+    print(f"  Segments: {len(transcript['segments'])}")
+    print(f"  Words: {total_words}")
+
+if __name__ == "__main__":
+    if len(sys.argv) < 3:
+        print(__doc__)
+        sys.exit(1)
+
+    input_file = sys.argv[1]
+    output_file = sys.argv[2]
+    model = sys.argv[3] if len(sys.argv) > 3 else "base"
+
+    if not Path(input_file).exists():
+        print(f"Error: Input file not found: {input_file}")
+        sys.exit(1)
+
+    transcribe_file(input_file, output_file, model)
@@ -1,10 +1,10 @@
 #!/usr/bin/env bun
 
 /**
- * Upload assets (images, audio) from public/assets to Cloudflare R2
+ * Upload assets (images, audio, transcripts) from public/assets to Cloudflare R2
  *
  * This script:
- * - Scans public/assets for media files (images: png, jpg, jpeg, webp, gif, svg; audio: m4a, mp3, wav, ogg)
+ * - Scans public/assets for media files (images: png, jpg, jpeg, webp, gif, svg; audio: m4a, mp3, wav, ogg; transcripts: json)
  * - Generates SHA-256 hash for each file
  * - Uploads to R2 with content-addressable key (hash.ext)
  * - Creates manifest mapping original paths to R2 URLs
@@ -30,11 +30,11 @@ import { join, relative } from "path";
 const BUCKET_NAME = "just-be-dev-assets";
 const CUSTOM_DOMAIN = "https://assets.just-be.dev";
 const ASSETS_DIR = join(import.meta.dir, "../public/assets");
-const MANIFEST_PATH = join(import.meta.dir, "../src/content/image-manifest.json");
+const MANIFEST_PATH = join(import.meta.dir, "../src/content/manifest.json");
 
 interface AssetManifest {
   version: string;
-  images: Record<string, {
+  assets: Record<string, {
     hash: string;
     size: number;
     ext: string;
@@ -51,7 +51,9 @@ async function findAssets(dir: string): Promise<string[]> {
     // Images
     '.png', '.jpg', '.jpeg', '.webp', '.gif', '.svg',
     // Audio
-    '.m4a', '.mp3', '.wav', '.ogg', '.aac', '.flac'
+    '.m4a', '.mp3', '.wav', '.ogg', '.aac', '.flac',
+    // Transcripts
+    '.json'
   ];
   const results: string[] = [];
 
@@ -96,7 +98,7 @@ async function uploadAssets() {
 
   const manifest: AssetManifest = {
     version: "1.0",
-    images: {},
+    assets: {},
   };
 
   let uploadCount = 0;
@@ -122,7 +124,7 @@ async function uploadAssets() {
     }
 
     const stats = await Bun.file(assetPath).stat();
-    manifest.images[originalPath] = {
+    manifest.assets[originalPath] = {
       hash,
       size: stats.size,
       ext,