Skip to content

Commit ce05355

Browse files
committed
add vad example
1 parent e76dc54 commit ce05355

5 files changed

Lines changed: 187 additions & 114 deletions

File tree

examples/vad_example.py

Lines changed: 0 additions & 114 deletions
This file was deleted.

examples/vad_silero/README.md

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# Stream × Silero — VAD Bot
2+
3+
Detect when people speak in a Stream Video call.
4+
5+
This minimal example starts a bot that listens to the call, runs every audio
6+
frame through the [Silero VAD](https://github.com/snakers4/silero-vad) model
7+
and prints each speech turn with its duration.
8+
9+
---
10+
11+
## Quick start
12+
13+
```bash
14+
cd examples/vad_silero
15+
16+
# install deps (choose one)
17+
pip install -e .
18+
uv venv .venv && source .venv/bin/activate && uv sync # fast ⚡️
19+
20+
cp ../stt_deepgram_transcription/env.example .env # fill STREAM_* keys
21+
python main.py # or: uv -m python main.py
22+
```
23+
24+
Speak in the browser tab – you'll see logs like:
25+
26+
```
27+
🤖 VAD bot starting – speak in the call and watch the console.
28+
📞 Call ready: 7a1f…
29+
[12:03:45] Speech from user-d1e8… — 1.32s
30+
[12:03:49] Speech from user-d1e8… — 0.87s
31+
```
32+
33+
---
34+
35+
## How it works
36+
37+
1. Creates two temporary Stream users (human + `vad-bot-*`).
38+
2. Opens the call URL so you can join immediately.
39+
3. Every incoming PCM frame goes to `Silero.process_audio()`.
40+
4. The plugin emits `partial` (in-progress) and `audio` (end-of-turn) events.
41+
5. On **Ctrl-C** the bot leaves the call and temporary users are deleted.
42+
43+
< 120 lines of Python 🐍

examples/vad_silero/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+

examples/vad_silero/main.py

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Example: Voice-Activity-Detection bot (Silero VAD)
4+
5+
The script joins a Stream video call with a bot that detects when anyone
6+
speaks, using the Silero VAD plugin (`getstream.plugins.vad.silero`).
7+
Each complete speech turn is logged with a timestamp and duration.
8+
9+
Run:
10+
python main.py
11+
12+
Environment: copy `examples/env.example` to `.env` and fill in
13+
`STREAM_API_KEY`, `STREAM_API_SECRET` (and optionally `STREAM_BASE_URL`).
14+
"""
15+
16+
from __future__ import annotations
17+
18+
import asyncio
19+
import logging
20+
import os
21+
import time
22+
from typing import Any
23+
from uuid import uuid4
24+
25+
from dotenv import load_dotenv
26+
27+
from examples.utils import create_user, open_browser
28+
from getstream.stream import Stream
29+
from getstream.video import rtc
30+
from getstream.video.rtc.track_util import PcmData
31+
from getstream.plugins.vad.silero import Silero
32+
33+
# ---------------------------------------------------------------------------
34+
# Logging setup – INFO level so we see joins / leaves, etc.
35+
# ---------------------------------------------------------------------------
36+
logging.basicConfig(
37+
level=logging.INFO,
38+
format="%(asctime)s %(levelname)s %(message)s",
39+
)
40+
41+
42+
async def main() -> None:
43+
"""Create a call and start the Silero VAD bot."""
44+
45+
# Load env from examples/.env
46+
load_dotenv(os.path.join(os.path.dirname(__file__), "..", ".env"))
47+
48+
client = Stream.from_env()
49+
50+
human_id = f"user-{uuid4()}"
51+
bot_id = f"vad-bot-{uuid4()}"
52+
53+
create_user(client, human_id, "Human")
54+
create_user(client, bot_id, "VAD Bot")
55+
56+
token = client.create_token(human_id, expiration=3600)
57+
58+
call_id = str(uuid4())
59+
call = client.video.call("default", call_id)
60+
call.get_or_create(data={"created_by_id": bot_id})
61+
62+
logging.info("📞 Call ready: %s", call_id)
63+
64+
open_browser(client.api_key, token, call_id)
65+
66+
vad = Silero()
67+
68+
print("\n🤖 VAD bot starting – speak in the call and watch the console.\n")
69+
70+
speech_segments: list[dict[str, Any]] = []
71+
72+
try:
73+
async with await rtc.join(call, bot_id) as connection:
74+
logging.info("🤖 Bot joined call: %s", call_id)
75+
76+
# Forward audio frames to the VAD engine
77+
@connection.on("audio")
78+
async def _on_pcm(pcm: PcmData, user):
79+
await vad.process_audio(pcm, user)
80+
81+
# Complete speech turns
82+
@vad.on("audio") # type: ignore[arg-type]
83+
async def _on_turn(pcm: PcmData, user):
84+
duration = pcm.duration
85+
ts = time.strftime("%H:%M:%S")
86+
print(f"[{ts}] Speech from {user}{duration:.2f}s")
87+
speech_segments.append({
88+
"timestamp": ts,
89+
"duration": duration,
90+
"user": user,
91+
})
92+
93+
# Optional: in-progress indicator
94+
@vad.on("partial") # type: ignore[arg-type]
95+
async def _on_partial(_: PcmData, user):
96+
print(f" {user} … speaking", end="\r")
97+
98+
print("🎧 Listening… press Ctrl-C to stop")
99+
await connection.wait()
100+
101+
except (asyncio.CancelledError, KeyboardInterrupt):
102+
print("\n⏹️ Stopping VAD bot…")
103+
finally:
104+
await vad.close()
105+
106+
print(f"Detected {len(speech_segments)} speech segments")
107+
total_duration = sum(segment["duration"] for segment in speech_segments)
108+
print(f"Total speech duration: {total_duration:.2f} seconds")
109+
110+
client.delete_users([human_id, bot_id])
111+
print("🧹 Cleanup completed")
112+
113+
114+
if __name__ == "__main__":
115+
asyncio.run(main())

examples/vad_silero/pyproject.toml

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
[project]
2+
name = "vad-silero-example"
3+
version = "0.1.0"
4+
description = "Stream Video + Silero VAD demo."
5+
readme = "README.md"
6+
requires-python = ">=3.9"
7+
license = { text = "MIT" }
8+
9+
dependencies = [
10+
"python-dotenv>=1.0.0",
11+
"torch>=1.11.0",
12+
"numpy>=1.20.0",
13+
"aiortc>=1.10.1",
14+
]
15+
16+
[project.optional-dependencies]
17+
dev = [
18+
"pytest>=7.0.0",
19+
"pytest-asyncio>=0.21.0",
20+
]
21+
22+
[build-system]
23+
requires = ["setuptools>=61.0", "wheel"]
24+
build-backend = "setuptools.build_meta"
25+
26+
[tool.uv.sources]
27+
getstream = { workspace = true }
28+
getstream-plugins-vad-silero = { workspace = true }

0 commit comments

Comments
 (0)