Skip to content

Commit 6711d8b

Browse files
committed
fix: async FastAPI routes with asyncio.to_thread, parallel batch, correct utils import and top_p is implimented and set to .85
1 parent 41f74bd commit 6711d8b

3 files changed

Lines changed: 102 additions & 71 deletions

File tree

analysis/analyzer.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,7 @@ def _call_groq(prompt: str, max_tokens: int) -> str:
256256
"model": GROQ_MODEL,
257257
"messages": [{"role": "user", "content": prompt}],
258258
"temperature": 0.1,
259+
"top_p": 0.85,
259260
"max_tokens": max_tokens,
260261
"response_format": {"type": "json_object"},
261262
},
@@ -299,7 +300,8 @@ def stream_transcript_groq(text: str, language: str = "en"):
299300
json={
300301
"model": GROQ_MODEL,
301302
"messages": [{"role": "user", "content": stream_prompt}],
302-
"temperature": 0.2,
303+
"temperature": 0.1,
304+
"top_p": 0.85,
303305
"max_tokens": 1000,
304306
"stream": True,
305307
},
@@ -330,7 +332,7 @@ def _call_ollama(prompt: str, max_tokens: int) -> str:
330332
"prompt": prompt,
331333
"stream": False,
332334
"format": "json",
333-
"options": {"temperature": 0.1, "num_predict": max_tokens},
335+
"options": {"temperature": 0.1, "num_predict": max_tokens, "top_p": 0.85},
334336
"think": False
335337
},
336338
timeout=90
@@ -350,6 +352,7 @@ def _call_groq_langchain(prompt: str, max_tokens: int) -> str:
350352
api_key=api_key,
351353
model=GROQ_MODEL,
352354
temperature=0.1,
355+
top_p=0.85,
353356
max_tokens=max_tokens,
354357
timeout=25,
355358
model_kwargs={"response_format": {"type": "json_object"}},
@@ -367,6 +370,7 @@ def _call_ollama_langchain(prompt: str, max_tokens: int) -> str:
367370
base_url=OLLAMA_URL.replace("/api/generate", ""),
368371
model=OLLAMA_MODEL,
369372
temperature=0.1,
373+
top_p=0.85,
370374
num_predict=max_tokens,
371375
format="json",
372376
)

api/api.py

Lines changed: 95 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,35 @@
11
# api.py
2-
# FastAPI REST API for TranscriptAI
2+
# FastAPI REST API for TranscriptAI — v2
33
#
4-
# This makes TranscriptAI enterprise-ready:
5-
# - Any CRM, HR system, or dashboard can call this endpoint
6-
# - Streamlit app continues working unchanged (calls analyzer.py directly)
7-
# - This API layer is for external integrations
4+
# v2 FIXES:
5+
# FIX-1: analyze_transcript() wrapped in asyncio.to_thread() — was blocking
6+
# the entire FastAPI event loop on every request (sync call inside async route).
7+
# Now truly non-blocking: multiple users can hit /analyze simultaneously.
8+
# FIX-2: utils import path corrected (was utils.utils, now utils directly).
9+
# FIX-3: Temperature 0.1 + top_p 0.85 applied in analyzer.py — api.py inherits
10+
# these automatically since it calls analyze_transcript().
11+
# FIX-4: Batch endpoint now uses asyncio.gather() for true parallel execution
12+
# instead of sequential await in a loop.
813
#
914
# Run with:
10-
# pip install fastapi uvicorn
15+
# pip install fastapi uvicorn httpx
1116
# uvicorn api:app --reload --port 8000
1217
#
13-
# Then call:
14-
# POST http://localhost:8000/analyze
15-
# GET http://localhost:8000/health
18+
# Docs: http://localhost:8000/docs
1619

17-
from fastapi import FastAPI, HTTPException, BackgroundTasks
20+
import asyncio
21+
import uuid
22+
from datetime import datetime
23+
from typing import Optional
24+
25+
from fastapi import FastAPI, HTTPException
1826
from fastapi.middleware.cors import CORSMiddleware
1927
from pydantic import BaseModel, Field
20-
from typing import Optional
21-
from datetime import datetime
22-
import uuid
23-
import json
2428

2529
from analysis.analyzer import analyze_transcript
26-
from utils.utils import detect_language, clean_text
30+
from utils import detect_language, clean_text
2731

28-
# Optional modules
32+
# ── Optional modules ──────────────────────────────────────────────────────────
2933
try:
3034
from transcription.pii_masker import mask_transcript, restore_pii_in_result, get_pii_report
3135
PII_AVAILABLE = True
@@ -48,11 +52,11 @@
4852
app = FastAPI(
4953
title="TranscriptAI API",
5054
description=(
51-
"Japanese Business Intelligence — Call Transcript Analyzer API. "
55+
"Japanese Business Intelligence — Meeting Transcript Analyzer. "
5256
"Extracts action items, sentiment, speaker breakdown, and Japan-specific "
53-
"insights from meeting transcripts. APPI compliant."
57+
"insights. APPI compliant via local PII masking before any LLM call."
5458
),
55-
version="1.0.0",
59+
version="2.0.0",
5660
)
5761

5862
app.add_middleware(
@@ -62,20 +66,21 @@
6266
allow_headers=["*"],
6367
)
6468

69+
6570
# ── REQUEST / RESPONSE MODELS ─────────────────────────────────────────────────
6671
class AnalyzeRequest(BaseModel):
6772
transcript: str = Field(
6873
...,
6974
min_length=20,
70-
description="The meeting transcript text. Supports Japanese, English, mixed JA/EN."
75+
description="Meeting transcript. Supports Japanese, English, Hindi, mixed."
7176
)
7277
language: Optional[str] = Field(
7378
None,
74-
description="Force language: 'ja', 'en', or 'mixed'. Leave null for auto-detect."
79+
description="Force language: 'ja', 'en', 'hi', 'mixed'. Null = auto-detect."
7580
)
7681
mask_pii: bool = Field(
7782
True,
78-
description="Anonymize PII before analysis (APPI compliance). Recommended: true."
83+
description="Anonymize PII before LLM (APPI compliance). Recommended: true."
7984
)
8085
include_soft_rejections: bool = Field(
8186
True,
@@ -94,136 +99,158 @@ class Config:
9499

95100

96101
class AnalyzeResponse(BaseModel):
97-
request_id: str
98-
timestamp: str
99-
language_detected: str
100-
pii_masked: bool
101-
pii_items_found: int
102+
request_id: str
103+
timestamp: str
104+
language_detected: str
105+
pii_masked: bool
106+
pii_items_found: int
102107
processing_time_ms: float
103-
result: dict
108+
result: dict
104109

105110

106111
# ── HEALTH CHECK ──────────────────────────────────────────────────────────────
107112
@app.get("/health")
108-
def health():
113+
async def health():
109114
"""Check API status and available modules."""
115+
import os
116+
groq_key_present = bool(os.getenv("GROQ_API_KEY", "").strip())
110117
return {
111-
"status": "healthy",
112-
"version": "1.0.0",
113-
"modules": {
118+
"status": "healthy",
119+
"version": "2.0.0",
120+
"modules": {
114121
"pii_masker": PII_AVAILABLE,
115122
"soft_rejection": SOFT_REJECTION_AVAILABLE,
116123
"hallucination_guard": HALLUCINATION_GUARD_AVAILABLE,
117124
},
118-
"model": "qwen3:8b via Ollama",
125+
"provider": "groq" if groq_key_present else "mock",
126+
"groq_key": groq_key_present,
119127
"appi_compliant": PII_AVAILABLE,
128+
"async_mode": True,
120129
}
121130

122131

123-
# ── MAIN ANALYZE ENDPOINT ─────────────────────────────────────────────────────
132+
# ── SINGLE ANALYZE ENDPOINT ───────────────────────────────────────────────────
124133
@app.post("/analyze", response_model=AnalyzeResponse)
125134
async def analyze(request: AnalyzeRequest):
126135
"""
127-
Analyze a meeting transcript and return structured intelligence.
136+
Analyze a meeting transcript — returns structured intelligence.
128137
129-
- Detects language automatically (or use forced language)
130-
- Masks PII before LLM processing (APPI compliant)
131-
- Extracts: summary, action items, sentiment, speakers, Japan insights
132-
- Detects soft rejections (検討します, 難しいかもしれません, etc.)
133-
- Runs hallucination prevention on all outputs
138+
Non-blocking: uses asyncio.to_thread() so multiple requests run concurrently.
139+
analyze_transcript() itself is CPU/IO-bound sync code — thread pool handles it.
134140
"""
135-
start_time = datetime.now()
136-
request_id = str(uuid.uuid4())[:8]
141+
start_time = datetime.now()
142+
request_id = str(uuid.uuid4())[:8]
137143

138144
# Clean and validate
139145
transcript = clean_text(request.transcript)
140146
if len(transcript.strip()) < 20:
141-
raise HTTPException(status_code=400, detail="Transcript too short (minimum 20 characters)")
147+
raise HTTPException(
148+
status_code=400,
149+
detail="Transcript too short (minimum 20 characters after cleaning)"
150+
)
142151

143152
# Detect language
144153
detected_lang = detect_language(transcript)
145154
active_lang = request.language or detected_lang
146155

147-
# PII masking
156+
# PII masking — runs locally before any LLM call
148157
pii_items_found = 0
149158
pii_mask = None
150159
text_to_analyze = transcript
151160

152161
if request.mask_pii and PII_AVAILABLE:
162+
# mask_transcript is fast/sync — ok to call directly
153163
text_to_analyze, pii_mask = mask_transcript(transcript)
154164
pii_report = get_pii_report(pii_mask)
155165
pii_items_found = pii_report.get("total_pii_found", 0)
156166

157-
# Run analysis
167+
# FIX-1: Run blocking analyze_transcript in thread pool
168+
# This is the key async fix — Groq HTTP call is I/O bound but uses
169+
# requests (sync). asyncio.to_thread() offloads it without blocking
170+
# the event loop, so concurrent users don't queue behind each other.
158171
try:
159-
result = analyze_transcript(text_to_analyze, active_lang)
172+
result = await asyncio.to_thread(
173+
analyze_transcript,
174+
text_to_analyze,
175+
active_lang
176+
)
160177
except Exception as e:
161178
raise HTTPException(status_code=500, detail=f"Analysis failed: {str(e)}")
162179

163-
# Restore PII in results
180+
# Restore PII in results (local operation, fast)
164181
if pii_mask is not None:
165182
result = restore_pii_in_result(result, pii_mask)
166183

167-
# Soft rejection detection
184+
# Soft rejection detection (local pattern matching, fast)
168185
if request.include_soft_rejections and SOFT_REJECTION_AVAILABLE:
169186
result["soft_rejections"] = detect_soft_rejections(transcript)
170187

171-
# Calculate processing time
172188
elapsed_ms = (datetime.now() - start_time).total_seconds() * 1000
173189

174190
return AnalyzeResponse(
175-
request_id = request_id,
176-
timestamp = datetime.now().isoformat(),
177-
language_detected = active_lang,
178-
pii_masked = request.mask_pii and PII_AVAILABLE,
179-
pii_items_found = pii_items_found,
180-
processing_time_ms = round(elapsed_ms, 1),
181-
result = result
191+
request_id = request_id,
192+
timestamp = datetime.now().isoformat(),
193+
language_detected = active_lang,
194+
pii_masked = request.mask_pii and PII_AVAILABLE,
195+
pii_items_found = pii_items_found,
196+
processing_time_ms = round(elapsed_ms, 1),
197+
result = result
182198
)
183199

184200

185201
# ── BATCH ENDPOINT ────────────────────────────────────────────────────────────
186202
@app.post("/analyze/batch")
187203
async def analyze_batch(requests: list[AnalyzeRequest]):
188204
"""
189-
Analyze multiple transcripts in sequence.
190-
For high-volume use (10,000+/day), combine with Redis Queue + vLLM.
205+
Analyze multiple transcripts in parallel.
206+
207+
FIX-4: Uses asyncio.gather() for true concurrent execution.
208+
Was previously sequential (await in loop) — now all run simultaneously.
209+
Max 10 per batch. For 10,000+/day use Redis Queue + vLLM.
191210
"""
192211
if len(requests) > 10:
193212
raise HTTPException(
194213
status_code=400,
195-
detail="Batch limit is 10 transcripts. For larger volumes use async queue."
214+
detail="Batch limit is 10. For larger volumes use the async job queue."
196215
)
197216

198-
results = []
199-
for req in requests:
217+
# FIX-4: gather runs all requests concurrently, not one by one
218+
async def _safe_analyze(req: AnalyzeRequest) -> dict:
200219
try:
201220
result = await analyze(req)
202-
results.append({"status": "success", "data": result})
221+
return {"status": "success", "data": result.dict()}
222+
except HTTPException as e:
223+
return {"status": "error", "error": e.detail}
203224
except Exception as e:
204-
results.append({"status": "error", "error": str(e)})
225+
return {"status": "error", "error": str(e)}
226+
227+
results = await asyncio.gather(*[_safe_analyze(req) for req in requests])
205228

206229
return {
207230
"batch_size": len(requests),
208231
"successful": sum(1 for r in results if r["status"] == "success"),
209232
"failed": sum(1 for r in results if r["status"] == "error"),
210-
"results": results
233+
"results": list(results)
211234
}
212235

213236

214237
# ── PATTERNS ENDPOINT ─────────────────────────────────────────────────────────
215238
@app.get("/patterns/soft-rejections")
216-
def get_soft_rejection_patterns():
239+
async def get_soft_rejection_patterns():
217240
"""Returns the full soft rejection pattern dictionary with cultural explanations."""
218241
if not SOFT_REJECTION_AVAILABLE:
219-
raise HTTPException(status_code=503, detail="soft_rejection_detector.py not available")
242+
raise HTTPException(
243+
status_code=503,
244+
detail="soft_rejection_detector.py not found"
245+
)
220246
from analysis.soft_rejection_detector import SOFT_REJECTION_PATTERNS
221247
return {
222248
"total_patterns": len(SOFT_REJECTION_PATTERNS),
223-
"patterns": SOFT_REJECTION_PATTERNS,
249+
"patterns": SOFT_REJECTION_PATTERNS,
224250
"cultural_context": (
225251
"Japanese business communication avoids direct refusal. "
226-
"These patterns encode the speaker's true intent through indirect language."
252+
"These patterns encode the speaker's true intent through indirect language. "
253+
"Examples: 検討いたします (likely rejection), 難しいかもしれません (high rejection signal)."
227254
)
228255
}
229256

app.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1020,7 +1020,7 @@ def _cold_start_tasks():
10201020
st.session_state.pii_report = get_pii_report(pii_mask)
10211021

10221022
bar.progress(35, text="Running AI analysis…")
1023-
with st.spinner("Analyzing · ~3s with Groq · 1–2 min with Ollama"):
1023+
with st.spinner("Its is working may take some time please wait a moment..."):
10241024
results = analyze_transcript(text_in, active_lang)
10251025

10261026
if pii_mask is not None:

0 commit comments

Comments
 (0)