Skip to content

Commit b785804

Browse files
aramb-devclaude
andcommitted
feat(transcribe): migrate to AssemblyAI SDK, fix playback and error UX
- Switch from raw REST API calls to official AssemblyAI JS SDK (assemblyai v4.23.0) for transcribe submit, polling, and sentences - Add null guards on nullable SDK fields (.words, .timestamps) to prevent .map() on undefined during polling - Fix audio playback: use <video> instead of <audio> so MP4/video containers play correctly in the Studio player - Add feature-language error detection: parse AssemblyAI errors like "iab_categories is not supported for language code 'ar'" and show a clear message with a one-click "Retry without [Feature]" button - Preserve last audio URL and options across error screen for retry - Show selected transcription options (language, diarize, AI features) as badges on the result screen - Make polling more resilient: 5s interval, retry up to 5 consecutive errors before failing, read error response bodies Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 23fa529 commit b785804

11 files changed

Lines changed: 413 additions & 360 deletions

File tree

bun.lock

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
"@radix-ui/react-tabs": "^1.1.13",
2727
"@tabler/icons-react": "^3.36.1",
2828
"@tailwindcss/typography": "^0.5.19",
29+
"assemblyai": "^4.23.0",
2930
"clarity-js": "^0.8.54",
3031
"class-variance-authority": "^0.7.1",
3132
"clsx": "^2.1.1",

src/app/api/prediction/[id]/route.ts

Lines changed: 58 additions & 132 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import { NextRequest, NextResponse } from "next/server"
2-
3-
const ASSEMBLYAI_API_KEY = process.env.ASSEMBLYAI_API_KEY
2+
import { assemblyai } from "@/lib/assemblyai-client"
43

54
interface RouteParams {
65
params: Promise<{ id: string }>
@@ -22,155 +21,84 @@ function mapStatus(assemblyStatus: string): string {
2221
}
2322
}
2423

25-
interface AssemblyAIWord {
26-
text: string
27-
start: number
28-
end: number
29-
confidence: number
30-
speaker?: string
31-
}
32-
33-
interface AssemblyAIUtterance {
34-
text: string
35-
start: number
36-
end: number
37-
confidence: number
38-
speaker: string
39-
words: AssemblyAIWord[]
40-
}
41-
42-
interface AssemblyAISentence {
43-
text: string
44-
start: number
45-
end: number
46-
confidence: number
47-
words: AssemblyAIWord[]
48-
}
49-
50-
// Convert ms timestamps to seconds for a word array
51-
function convertWords(words: AssemblyAIWord[]) {
52-
return words.map((w) => ({
53-
word: w.text,
54-
start: w.start / 1000,
55-
end: w.end / 1000,
56-
}))
57-
}
58-
5924
export async function GET(_request: NextRequest, { params }: RouteParams) {
6025
const { id } = await params
61-
const transcriptId = id
6226

63-
if (!transcriptId) {
27+
if (!id) {
6428
return NextResponse.json(
6529
{ error: "Missing prediction ID" },
6630
{ status: 400 },
6731
)
6832
}
6933

70-
if (!ASSEMBLYAI_API_KEY) {
71-
return NextResponse.json(
72-
{ error: "AssemblyAI API key not configured" },
73-
{ status: 500 },
74-
)
75-
}
76-
7734
try {
78-
console.log(`Checking transcription status for ID: ${transcriptId}`)
79-
80-
const response = await fetch(
81-
`https://api.assemblyai.com/v2/transcript/${transcriptId}`,
82-
{
83-
headers: {
84-
Authorization: ASSEMBLYAI_API_KEY,
85-
},
86-
},
87-
)
88-
89-
const data = await response.json()
90-
91-
if (!response.ok) {
92-
console.error(
93-
`Error checking transcription status: ${response.status}`,
94-
data,
95-
)
96-
return NextResponse.json(
97-
{
98-
error: `Error checking transcription: ${response.status}`,
99-
details: data,
100-
},
101-
{ status: response.status },
102-
)
103-
}
35+
console.log(`Checking transcription status for ID: ${id}`)
10436

105-
const mappedStatus = mapStatus(data.status)
106-
console.log(`Transcription ${transcriptId} status: ${data.status} -> ${mappedStatus}`)
37+
const transcript = await assemblyai.transcripts.get(id)
38+
const mappedStatus = mapStatus(transcript.status)
39+
console.log(`Transcription ${id} status: ${transcript.status} -> ${mappedStatus}`)
10740

108-
// Build normalized response
10941
const result: Record<string, unknown> = {
110-
id: transcriptId,
42+
id,
11143
status: mappedStatus,
11244
}
11345

11446
if (mappedStatus === "failed") {
115-
result.error = data.error || "Unknown transcription error"
47+
result.error = transcript.error || "Unknown transcription error"
11648
}
11749

11850
if (mappedStatus === "succeeded") {
119-
// Normalize output to match WhisperX format: { segments, detected_language }
120-
let segments: { start: number, end: number, text: string, speaker?: string, words?: { word: string, start: number, end: number }[] }[]
121-
122-
if (data.utterances && data.utterances.length > 0) {
51+
// Build segments from utterances (diarization) or sentences
52+
let segments: {
53+
start: number
54+
end: number
55+
text: string
56+
speaker?: string
57+
words?: { word: string; start: number; end: number }[]
58+
}[]
59+
60+
if (transcript.utterances && transcript.utterances.length > 0) {
12361
// Diarization was enabled — use utterances for speaker-labeled segments
124-
segments = data.utterances.map((u: AssemblyAIUtterance) => ({
62+
segments = transcript.utterances.map((u) => ({
12563
start: u.start / 1000,
12664
end: u.end / 1000,
12765
text: u.text,
12866
speaker: u.speaker,
129-
words: convertWords(u.words),
67+
words: u.words?.map((w) => ({
68+
word: w.text,
69+
start: w.start / 1000,
70+
end: w.end / 1000,
71+
})) || [],
13072
}))
13173
} else {
132-
// No diarization — fetch sentences for segment-level output
74+
// No diarization — fetch sentences via SDK
13375
try {
134-
const sentencesRes = await fetch(
135-
`https://api.assemblyai.com/v2/transcript/${transcriptId}/sentences`,
136-
{
137-
headers: {
138-
Authorization: ASSEMBLYAI_API_KEY,
139-
},
140-
},
141-
)
142-
143-
if (sentencesRes.ok) {
144-
const sentencesData = await sentencesRes.json()
145-
segments = sentencesData.sentences.map((s: AssemblyAISentence) => ({
146-
start: s.start / 1000,
147-
end: s.end / 1000,
148-
text: s.text,
149-
words: convertWords(s.words),
150-
}))
151-
} else {
152-
// Fallback: single segment from full text
153-
segments = [{
154-
start: 0,
155-
end: (data.audio_duration || 0) / 1000,
156-
text: data.text || "",
157-
}]
158-
}
76+
const sentencesResponse = await assemblyai.transcripts.sentences(id)
77+
segments = sentencesResponse.sentences.map((s) => ({
78+
start: s.start / 1000,
79+
end: s.end / 1000,
80+
text: s.text,
81+
words: s.words?.map((w) => ({
82+
word: w.text,
83+
start: w.start / 1000,
84+
end: w.end / 1000,
85+
})) || [],
86+
}))
15987
} catch (sentenceError) {
16088
console.error("Error fetching sentences:", sentenceError)
16189
segments = [{
16290
start: 0,
163-
end: (data.audio_duration || 0) / 1000,
164-
text: data.text || "",
91+
end: (transcript.audio_duration || 0),
92+
text: transcript.text || "",
16593
}]
16694
}
16795
}
16896

169-
// Extract AI intelligence data (all timestamps ms→s)
97+
// Extract AI intelligence data
17098
const intelligence: Record<string, unknown> = {}
17199

172-
if (data.chapters && Array.isArray(data.chapters)) {
173-
intelligence.chapters = data.chapters.map((ch: { gist: string, headline: string, summary: string, start: number, end: number }) => ({
100+
if (transcript.chapters && Array.isArray(transcript.chapters)) {
101+
intelligence.chapters = transcript.chapters.map((ch) => ({
174102
gist: ch.gist,
175103
headline: ch.headline,
176104
summary: ch.summary,
@@ -179,12 +107,12 @@ export async function GET(_request: NextRequest, { params }: RouteParams) {
179107
}))
180108
}
181109

182-
if (data.summary) {
183-
intelligence.summary = data.summary
110+
if (transcript.summary) {
111+
intelligence.summary = transcript.summary
184112
}
185113

186-
if (data.sentiment_analysis_results && Array.isArray(data.sentiment_analysis_results)) {
187-
intelligence.sentimentAnalysis = data.sentiment_analysis_results.map((s: { text: string, start: number, end: number, sentiment: string, confidence: number, speaker?: string }) => ({
114+
if (transcript.sentiment_analysis_results && Array.isArray(transcript.sentiment_analysis_results)) {
115+
intelligence.sentimentAnalysis = transcript.sentiment_analysis_results.map((s) => ({
188116
text: s.text,
189117
start: s.start / 1000,
190118
end: s.end / 1000,
@@ -194,44 +122,44 @@ export async function GET(_request: NextRequest, { params }: RouteParams) {
194122
}))
195123
}
196124

197-
if (data.entities && Array.isArray(data.entities)) {
198-
intelligence.entities = data.entities.map((e: { entity_type: string, text: string, start: number, end: number }) => ({
125+
if (transcript.entities && Array.isArray(transcript.entities)) {
126+
intelligence.entities = transcript.entities.map((e) => ({
199127
entityType: e.entity_type,
200128
text: e.text,
201129
start: e.start / 1000,
202130
end: e.end / 1000,
203131
}))
204132
}
205133

206-
if (data.auto_highlights_result?.results && Array.isArray(data.auto_highlights_result.results)) {
207-
intelligence.keyPhrases = data.auto_highlights_result.results.map((h: { text: string, count: number, rank: number, timestamps: { start: number, end: number }[] }) => ({
134+
if (transcript.auto_highlights_result?.results && Array.isArray(transcript.auto_highlights_result.results)) {
135+
intelligence.keyPhrases = transcript.auto_highlights_result.results.map((h) => ({
208136
text: h.text,
209137
count: h.count,
210138
rank: h.rank,
211-
timestamps: h.timestamps.map((t: { start: number, end: number }) => ({
139+
timestamps: (h.timestamps || []).map((t) => ({
212140
start: t.start / 1000,
213141
end: t.end / 1000,
214142
})),
215143
}))
216144
}
217145

218-
if (data.content_safety_labels) {
146+
if (transcript.content_safety_labels) {
219147
intelligence.contentSafety = {
220-
results: data.content_safety_labels.results || [],
221-
summary: data.content_safety_labels.summary || {},
148+
results: transcript.content_safety_labels.results || [],
149+
summary: transcript.content_safety_labels.summary || {},
222150
}
223151
}
224152

225-
if (data.iab_categories_result) {
153+
if (transcript.iab_categories_result) {
226154
intelligence.topics = {
227-
results: data.iab_categories_result.results || [],
228-
summary: data.iab_categories_result.summary || {},
155+
results: transcript.iab_categories_result.results || [],
156+
summary: transcript.iab_categories_result.summary || {},
229157
}
230158
}
231159

232160
result.output = {
233161
segments,
234-
detected_language: data.language_code || null,
162+
detected_language: transcript.language_code || null,
235163
intelligence: Object.keys(intelligence).length > 0 ? intelligence : undefined,
236164
}
237165
}
@@ -242,9 +170,7 @@ export async function GET(_request: NextRequest, { params }: RouteParams) {
242170
const errorMessage =
243171
error instanceof Error ? error.message : "Unknown error"
244172
return NextResponse.json(
245-
{
246-
error: errorMessage,
247-
},
173+
{ error: errorMessage },
248174
{ status: 500 },
249175
)
250176
}

0 commit comments

Comments
 (0)