11# api.py
2- # FastAPI REST API for TranscriptAI
2+ # FastAPI REST API for TranscriptAI — v2
33#
4- # This makes TranscriptAI enterprise-ready:
5- # - Any CRM, HR system, or dashboard can call this endpoint
6- # - Streamlit app continues working unchanged (calls analyzer.py directly)
7- # - This API layer is for external integrations
4+ # v2 FIXES:
5+ # FIX-1: analyze_transcript() wrapped in asyncio.to_thread() — was blocking
6+ # the entire FastAPI event loop on every request (sync call inside async route).
7+ # Now truly non-blocking: multiple users can hit /analyze simultaneously.
8+ # FIX-2: utils import path corrected (was utils.utils, now utils directly).
9+ # FIX-3: Temperature 0.1 + top_p 0.85 applied in analyzer.py — api.py inherits
10+ # these automatically since it calls analyze_transcript().
11+ # FIX-4: Batch endpoint now uses asyncio.gather() for true parallel execution
12+ # instead of sequential await in a loop.
813#
914# Run with:
10- # pip install fastapi uvicorn
15+ # pip install fastapi uvicorn httpx
1116# uvicorn api:app --reload --port 8000
1217#
13- # Then call:
14- # POST http://localhost:8000/analyze
15- # GET http://localhost:8000/health
18+ # Docs: http://localhost:8000/docs
1619
17- from fastapi import FastAPI , HTTPException , BackgroundTasks
20+ import asyncio
21+ import uuid
22+ from datetime import datetime
23+ from typing import Optional
24+
25+ from fastapi import FastAPI , HTTPException
1826from fastapi .middleware .cors import CORSMiddleware
1927from pydantic import BaseModel , Field
20- from typing import Optional
21- from datetime import datetime
22- import uuid
23- import json
2428
2529from analysis .analyzer import analyze_transcript
26- from utils . utils import detect_language , clean_text
30+ from utils import detect_language , clean_text
2731
28- # Optional modules
32+ # ── Optional modules ──────────────────────────────────────────────────────────
2933try :
3034 from transcription .pii_masker import mask_transcript , restore_pii_in_result , get_pii_report
3135 PII_AVAILABLE = True
4852app = FastAPI (
4953 title = "TranscriptAI API" ,
5054 description = (
51- "Japanese Business Intelligence — Call Transcript Analyzer API . "
55+ "Japanese Business Intelligence — Meeting Transcript Analyzer. "
5256 "Extracts action items, sentiment, speaker breakdown, and Japan-specific "
53- "insights from meeting transcripts . APPI compliant."
57+ "insights. APPI compliant via local PII masking before any LLM call ."
5458 ),
55- version = "1 .0.0" ,
59+ version = "2 .0.0" ,
5660)
5761
5862app .add_middleware (
6266 allow_headers = ["*" ],
6367)
6468
69+
6570# ── REQUEST / RESPONSE MODELS ─────────────────────────────────────────────────
6671class AnalyzeRequest (BaseModel ):
6772 transcript : str = Field (
6873 ...,
6974 min_length = 20 ,
70- description = "The meeting transcript text . Supports Japanese, English, mixed JA/EN ."
75+ description = "Meeting transcript. Supports Japanese, English, Hindi, mixed ."
7176 )
7277 language : Optional [str ] = Field (
7378 None ,
74- description = "Force language: 'ja', 'en', or 'mixed'. Leave null for auto-detect."
79+ description = "Force language: 'ja', 'en', 'hi', 'mixed'. Null = auto-detect."
7580 )
7681 mask_pii : bool = Field (
7782 True ,
78- description = "Anonymize PII before analysis (APPI compliance). Recommended: true."
83+ description = "Anonymize PII before LLM (APPI compliance). Recommended: true."
7984 )
8085 include_soft_rejections : bool = Field (
8186 True ,
@@ -94,136 +99,158 @@ class Config:
9499
95100
96101class AnalyzeResponse (BaseModel ):
97- request_id : str
98- timestamp : str
99- language_detected : str
100- pii_masked : bool
101- pii_items_found : int
102+ request_id : str
103+ timestamp : str
104+ language_detected : str
105+ pii_masked : bool
106+ pii_items_found : int
102107 processing_time_ms : float
103- result : dict
108+ result : dict
104109
105110
106111# ── HEALTH CHECK ──────────────────────────────────────────────────────────────
107112@app .get ("/health" )
108- def health ():
113+ async def health ():
109114 """Check API status and available modules."""
115+ import os
116+ groq_key_present = bool (os .getenv ("GROQ_API_KEY" , "" ).strip ())
110117 return {
111- "status" : "healthy" ,
112- "version" : "1 .0.0" ,
113- "modules" : {
118+ "status" : "healthy" ,
119+ "version" : "2 .0.0" ,
120+ "modules" : {
114121 "pii_masker" : PII_AVAILABLE ,
115122 "soft_rejection" : SOFT_REJECTION_AVAILABLE ,
116123 "hallucination_guard" : HALLUCINATION_GUARD_AVAILABLE ,
117124 },
118- "model" : "qwen3:8b via Ollama" ,
125+ "provider" : "groq" if groq_key_present else "mock" ,
126+ "groq_key" : groq_key_present ,
119127 "appi_compliant" : PII_AVAILABLE ,
128+ "async_mode" : True ,
120129 }
121130
122131
123- # ── MAIN ANALYZE ENDPOINT ── ───────────────────────────────────────────────────
132+ # ── SINGLE ANALYZE ENDPOINT ───────────────────────────────────────────────────
124133@app .post ("/analyze" , response_model = AnalyzeResponse )
125134async def analyze (request : AnalyzeRequest ):
126135 """
127- Analyze a meeting transcript and return structured intelligence.
136+ Analyze a meeting transcript — returns structured intelligence.
128137
129- - Detects language automatically (or use forced language)
130- - Masks PII before LLM processing (APPI compliant)
131- - Extracts: summary, action items, sentiment, speakers, Japan insights
132- - Detects soft rejections (検討します, 難しいかもしれません, etc.)
133- - Runs hallucination prevention on all outputs
138+ Non-blocking: uses asyncio.to_thread() so multiple requests run concurrently.
139+ analyze_transcript() itself is CPU/IO-bound sync code — thread pool handles it.
134140 """
135- start_time = datetime .now ()
136- request_id = str (uuid .uuid4 ())[:8 ]
141+ start_time = datetime .now ()
142+ request_id = str (uuid .uuid4 ())[:8 ]
137143
138144 # Clean and validate
139145 transcript = clean_text (request .transcript )
140146 if len (transcript .strip ()) < 20 :
141- raise HTTPException (status_code = 400 , detail = "Transcript too short (minimum 20 characters)" )
147+ raise HTTPException (
148+ status_code = 400 ,
149+ detail = "Transcript too short (minimum 20 characters after cleaning)"
150+ )
142151
143152 # Detect language
144153 detected_lang = detect_language (transcript )
145154 active_lang = request .language or detected_lang
146155
147- # PII masking
156+ # PII masking — runs locally before any LLM call
148157 pii_items_found = 0
149158 pii_mask = None
150159 text_to_analyze = transcript
151160
152161 if request .mask_pii and PII_AVAILABLE :
162+ # mask_transcript is fast/sync — ok to call directly
153163 text_to_analyze , pii_mask = mask_transcript (transcript )
154164 pii_report = get_pii_report (pii_mask )
155165 pii_items_found = pii_report .get ("total_pii_found" , 0 )
156166
157- # Run analysis
167+ # FIX-1: Run blocking analyze_transcript in thread pool
168+ # This is the key async fix — Groq HTTP call is I/O bound but uses
169+ # requests (sync). asyncio.to_thread() offloads it without blocking
170+ # the event loop, so concurrent users don't queue behind each other.
158171 try :
159- result = analyze_transcript (text_to_analyze , active_lang )
172+ result = await asyncio .to_thread (
173+ analyze_transcript ,
174+ text_to_analyze ,
175+ active_lang
176+ )
160177 except Exception as e :
161178 raise HTTPException (status_code = 500 , detail = f"Analysis failed: { str (e )} " )
162179
163- # Restore PII in results
180+ # Restore PII in results (local operation, fast)
164181 if pii_mask is not None :
165182 result = restore_pii_in_result (result , pii_mask )
166183
167- # Soft rejection detection
184+ # Soft rejection detection (local pattern matching, fast)
168185 if request .include_soft_rejections and SOFT_REJECTION_AVAILABLE :
169186 result ["soft_rejections" ] = detect_soft_rejections (transcript )
170187
171- # Calculate processing time
172188 elapsed_ms = (datetime .now () - start_time ).total_seconds () * 1000
173189
174190 return AnalyzeResponse (
175- request_id = request_id ,
176- timestamp = datetime .now ().isoformat (),
177- language_detected = active_lang ,
178- pii_masked = request .mask_pii and PII_AVAILABLE ,
179- pii_items_found = pii_items_found ,
180- processing_time_ms = round (elapsed_ms , 1 ),
181- result = result
191+ request_id = request_id ,
192+ timestamp = datetime .now ().isoformat (),
193+ language_detected = active_lang ,
194+ pii_masked = request .mask_pii and PII_AVAILABLE ,
195+ pii_items_found = pii_items_found ,
196+ processing_time_ms = round (elapsed_ms , 1 ),
197+ result = result
182198 )
183199
184200
185201# ── BATCH ENDPOINT ────────────────────────────────────────────────────────────
186202@app .post ("/analyze/batch" )
187203async def analyze_batch (requests : list [AnalyzeRequest ]):
188204 """
189- Analyze multiple transcripts in sequence.
190- For high-volume use (10,000+/day), combine with Redis Queue + vLLM.
205+ Analyze multiple transcripts in parallel.
206+
207+ FIX-4: Uses asyncio.gather() for true concurrent execution.
208+ Was previously sequential (await in loop) — now all run simultaneously.
209+ Max 10 per batch. For 10,000+/day use Redis Queue + vLLM.
191210 """
192211 if len (requests ) > 10 :
193212 raise HTTPException (
194213 status_code = 400 ,
195- detail = "Batch limit is 10 transcripts . For larger volumes use async queue."
214+ detail = "Batch limit is 10. For larger volumes use the async job queue."
196215 )
197216
198- results = []
199- for req in requests :
217+ # FIX-4: gather runs all requests concurrently, not one by one
218+ async def _safe_analyze ( req : AnalyzeRequest ) -> dict :
200219 try :
201220 result = await analyze (req )
202- results .append ({"status" : "success" , "data" : result })
221+ return {"status" : "success" , "data" : result .dict ()}
222+ except HTTPException as e :
223+ return {"status" : "error" , "error" : e .detail }
203224 except Exception as e :
204- results .append ({"status" : "error" , "error" : str (e )})
225+ return {"status" : "error" , "error" : str (e )}
226+
227+ results = await asyncio .gather (* [_safe_analyze (req ) for req in requests ])
205228
206229 return {
207230 "batch_size" : len (requests ),
208231 "successful" : sum (1 for r in results if r ["status" ] == "success" ),
209232 "failed" : sum (1 for r in results if r ["status" ] == "error" ),
210- "results" : results
233+ "results" : list ( results )
211234 }
212235
213236
214237# ── PATTERNS ENDPOINT ─────────────────────────────────────────────────────────
215238@app .get ("/patterns/soft-rejections" )
216- def get_soft_rejection_patterns ():
239+ async def get_soft_rejection_patterns ():
217240 """Returns the full soft rejection pattern dictionary with cultural explanations."""
218241 if not SOFT_REJECTION_AVAILABLE :
219- raise HTTPException (status_code = 503 , detail = "soft_rejection_detector.py not available" )
242+ raise HTTPException (
243+ status_code = 503 ,
244+ detail = "soft_rejection_detector.py not found"
245+ )
220246 from analysis .soft_rejection_detector import SOFT_REJECTION_PATTERNS
221247 return {
222248 "total_patterns" : len (SOFT_REJECTION_PATTERNS ),
223- "patterns" : SOFT_REJECTION_PATTERNS ,
249+ "patterns" : SOFT_REJECTION_PATTERNS ,
224250 "cultural_context" : (
225251 "Japanese business communication avoids direct refusal. "
226- "These patterns encode the speaker's true intent through indirect language."
252+ "These patterns encode the speaker's true intent through indirect language. "
253+ "Examples: 検討いたします (likely rejection), 難しいかもしれません (high rejection signal)."
227254 )
228255 }
229256
0 commit comments