-
Notifications
You must be signed in to change notification settings - Fork 129
Expand file tree
/
Copy pathmetrics_tts.py
More file actions
109 lines (89 loc) · 3.15 KB
/
Copy pathmetrics_tts.py
File metadata and controls
109 lines (89 loc) · 3.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
"""
---
title: TTS Metrics
category: metrics
tags: [metrics, deepgram, openai, cartesia]
difficulty: beginner
description: Shows how to use the TTS metrics to log metrics to the console.
demonstrates:
- Using the TTS metrics to log metrics to the console.
- This includes:
- TTFB
- Duration
- Audio Duration
- Cancelled
- Characters Count
- Streamed
- Speech ID
- Error
---
"""
import logging
import asyncio
from dotenv import load_dotenv
from livekit.agents import JobContext, JobProcess, AgentServer, cli, Agent, AgentSession, inference
from livekit.agents.metrics import TTSMetrics
from livekit.plugins import silero
from rich.console import Console
from rich.table import Table
from rich import box
from datetime import datetime
load_dotenv()
logger = logging.getLogger("metrics-tts")
logger.setLevel(logging.INFO)
console = Console()
class TTSMetricsAgent(Agent):
def __init__(self) -> None:
super().__init__(
instructions="You are a helpful agent."
)
async def on_enter(self):
self.session.generate_reply()
async def display_tts_metrics(metrics: TTSMetrics):
table = Table(
title="[bold blue]TTS Metrics Report[/bold blue]",
box=box.ROUNDED,
highlight=True,
show_header=True,
header_style="bold cyan"
)
table.add_column("Metric", style="bold green")
table.add_column("Value", style="yellow")
timestamp = datetime.fromtimestamp(metrics.timestamp).strftime('%Y-%m-%d %H:%M:%S')
table.add_row("Type", str(metrics.type))
table.add_row("Label", str(metrics.label))
table.add_row("Request ID", str(metrics.request_id))
table.add_row("Timestamp", timestamp)
table.add_row("TTFB", f"[white]{metrics.ttfb:.4f}[/white]s")
table.add_row("Duration", f"[white]{metrics.duration:.4f}[/white]s")
table.add_row("Audio Duration", f"[white]{metrics.audio_duration:.4f}[/white]s")
table.add_row("Cancelled", "✓" if metrics.cancelled else "✗")
table.add_row("Characters Count", str(metrics.characters_count))
table.add_row("Streamed", "✓" if metrics.streamed else "✗")
table.add_row("Speech ID", str(metrics.speech_id))
table.add_row("Error", str(metrics.error))
console.print("\n")
console.print(table)
console.print("\n")
server = AgentServer()
def prewarm(proc: JobProcess):
proc.userdata["vad"] = silero.VAD.load()
server.setup_fnc = prewarm
@server.rtc_session()
async def entrypoint(ctx: JobContext):
ctx.log_context_fields = {"room": ctx.room.name}
tts_instance = inference.TTS(model="cartesia/sonic-3", voice="9626c31c-bec5-4cca-baa8-f8ba9e84c8bc")
def on_tts_metrics(metrics: TTSMetrics):
asyncio.create_task(display_tts_metrics(metrics))
tts_instance.on("metrics_collected", on_tts_metrics)
session = AgentSession(
stt=inference.STT(model="deepgram/nova-3-general"),
llm=inference.LLM(model="openai/gpt-5-mini"),
tts=tts_instance,
vad=ctx.proc.userdata["vad"],
preemptive_generation=True,
)
await session.start(agent=TTSMetricsAgent(), room=ctx.room)
await ctx.connect()
if __name__ == "__main__":
cli.run_app(server)