Skip to content

Commit 4fe467a

Browse files
committed
Launch PolyTalk Community Edition
0 parents  commit 4fe467a

92 files changed

Lines changed: 19569 additions & 0 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.coveragerc

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
[run]
2+
source = app
3+
omit =
4+
app/tests/*
5+
app/log/*
6+
*/migrations/*
7+
*/__pycache__/*
8+
*/site-packages/*
9+
branch = True
10+
11+
[report]
12+
exclude_lines =
13+
pragma: no cover
14+
def __repr__
15+
raise AssertionError
16+
raise NotImplementedError
17+
if __name__ == .__main__.:
18+
if TYPE_CHECKING:
19+
@abstractmethod
20+
21+
precision = 2
22+
show_missing = True
23+
fail_under = 0
24+
25+
[html]
26+
directory = htmlcov
27+
show_contexts = True
28+
29+
[xml]
30+
output = coverage.xml
31+
32+
[lcov]
33+
output = lcov.info

.dockerignore

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# Python
2+
__pycache__
3+
*.py[cod]
4+
*$py.class
5+
*.so
6+
.Python
7+
.eggs
8+
*.egg-info
9+
dist
10+
build
11+
12+
# Virtual environments
13+
venv
14+
env
15+
.venv
16+
17+
# IDE
18+
.idea
19+
.vscode
20+
*.swp
21+
*.swo
22+
23+
# Testing
24+
.pytest_cache
25+
.ruff_cache
26+
.coverage
27+
coverage.xml
28+
lcov.info
29+
htmlcov
30+
.tox
31+
32+
33+
# Local configuration and generated data
34+
.env
35+
.env.*
36+
!.env.example
37+
config/config.yaml
38+
media
39+
tts/voices/*
40+
!tts/voices/.gitkeep
41+
42+
# Git
43+
.git
44+
.gitignore
45+
46+
# Docker
47+
Dockerfile
48+
docker-compose*.yml
49+
.dockerignore
50+
51+
# Documentation
52+
*.md
53+
docs
54+
55+
# Development files
56+
.env.local
57+
*.local
58+
59+
# Tests (not needed in container)
60+
tests/

.env.example

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
# PolyTalk Environment Variables
2+
# Copy this file to .env and update values as needed
3+
# All ${VAR} references in config/config.yaml will use these values
4+
5+
# ============================================================================
6+
# APPLICATION LOGGING
7+
# ============================================================================
8+
# Logging level: DEBUG, INFO, WARNING, ERROR, CRITICAL
9+
LOG_LEVEL=INFO
10+
11+
# ============================================================================
12+
# STT SERVICE (Local Speech-to-Text with faster-whisper)
13+
# ============================================================================
14+
# STT model to use: small, small-v3, medium, large-v3
15+
STT_MODEL=small
16+
17+
# Device to run STT: cpu or cuda
18+
STT_DEVICE=cpu
19+
20+
# Compute type: int8 (CPU) or float16 (CUDA)
21+
STT_COMPUTE_TYPE=int8
22+
23+
# Number of STT web workers. Each worker loads its own Whisper model.
24+
STT_WORKERS=1
25+
26+
# Load the Whisper model during STT service startup instead of on first stream.
27+
STT_PRELOAD_MODEL=true
28+
29+
# Max file upload size in MB
30+
STT_MAX_UPLOAD_MB=200
31+
32+
# Streaming audio window in seconds. Lower values reduce latency but can reduce
33+
# transcript stability. 3.0 gives Whisper more context while pause flush handles utterance endings.
34+
STT_STREAM_CHUNK_SECONDS=3.0
35+
36+
# Audio overlap between STT windows. Helps avoid missing words at chunk boundaries.
37+
# Keep this modest; too much overlap can increase repeated/hallucinated text.
38+
STT_CHUNK_OVERLAP_SECONDS=0.25
39+
40+
# Parallel STT queue workers. Increase to 2 when STT inference is slower than
41+
# incoming audio windows and the GPU has spare compute.
42+
STT_TRANSCRIBE_WORKERS=2
43+
STT_TRANSCRIBE_QUEUE_SIZE=8
44+
STT_MODEL_WORKERS=2
45+
46+
# Transcript emit batching. STT may infer more often than it emits to PolyTalk.
47+
# Increase these values if live transcript/translation/TTS chunks are too small.
48+
STT_EMIT_MIN_CHARS=120
49+
STT_EMIT_INTERVAL_SECONDS=4.5
50+
# Flush the current speech window after this much trailing silence, even if the
51+
# normal stream window or emit thresholds have not been reached. Set 0 to disable.
52+
STT_PAUSE_FLUSH_SECONDS=1.2
53+
54+
# Silence/hallucination guards for streaming STT. These balanced defaults work
55+
# well for typical microphone input: raise RMS/no-speech strictness if Whisper
56+
# hallucinates during silence; lower them if quiet speech is missed.
57+
STT_SILENCE_RMS_THRESHOLD=0.003
58+
STT_NO_SPEECH_PROB_THRESHOLD=0.50
59+
STT_LOG_PROB_THRESHOLD=-1.0
60+
STT_MAX_CROSS_DELTA_WORD_REPEATS=6
61+
62+
# faster-whisper decoding/VAD knobs. Keep previous-text conditioning disabled
63+
# by default for streaming because it can repeat or invent text during silence.
64+
STT_VAD_FILTER=true
65+
STT_VAD_MIN_SILENCE_MS=500
66+
STT_VAD_SPEECH_PAD_MS=200
67+
STT_WORD_TIMESTAMPS=true
68+
STT_CONDITION_ON_PREVIOUS_TEXT=false
69+
STT_TEMPERATURE=0.0
70+
# Optional domain/context prompt for Whisper, for example names or product terms.
71+
# STT_INITIAL_PROMPT=
72+
73+
# ============================================================================
74+
# WHISPER SERVICE CONFIGURATION (Points to local STT or external)
75+
# ============================================================================
76+
# Base URL for Whisper API (defaults to local STT service in Docker)
77+
# For external service, use: https://whisper.your-domain.com
78+
WHISPER_BASE_URL=http://stt:8000
79+
80+
# WebSocket endpoint for streaming transcription (used by PolyTalk)
81+
WHISPER_WS_ENDPOINT=/v1/stream/transcriptions
82+
83+
# Optional: API key for external Whisper API (e.g., OpenAI, custom deployment)
84+
# Set this if your Whisper service requires authentication
85+
# WHISPER_API_KEY=your-api-key-here
86+
87+
# ============================================================================
88+
# TRANSLATION SERVICE (AI Translation)
89+
# ============================================================================
90+
# Translation API format: openai_chat, openai_responses, anthropic_messages,
91+
# or gemini_generate_content.
92+
TRANSLATION_API_FORMAT=openai_chat
93+
94+
# Base URL and endpoint for Translation API. Use your self-hosted AI server
95+
# URL here, or an OpenAI-compatible provider URL.
96+
TRANSLATION_BASE_URL=https://ai.example.com
97+
TRANSLATION_ENDPOINT=/v1/chat/completions
98+
99+
# API key for Translation service
100+
TRANSLATION_API_KEY=your_translation_api_key_here
101+
102+
# AI model to use for translation. For self-hosted translation, use models such
103+
# as qwen3-8b, TranslateGama, or other open-source/open-weight models supported
104+
# by your model server.
105+
TRANSLATION_MODEL=qwen3-8b
106+
107+
# Maximum translation output tokens. Keep this bounded for live streaming, but
108+
# allow enough room for Indic-script targets and longer sentence buffers.
109+
TRANSLATION_MAX_TOKENS=240
110+
111+
# ============================================================================
112+
# TTS SERVICE (Local Text-to-Speech with Piper)
113+
# ============================================================================
114+
# Piper model to use (voice model name in tts/voices directory)
115+
TTS_MODEL=en_GB-jenny_dioco-medium
116+
117+
# Base URL for TTS API (local Piper service in Docker)
118+
# For external service, use: https://tts.your-domain.com
119+
TTS_BASE_URL=http://tts:5000
120+
121+
# ============================================================================
122+
# APPLICATION SETTINGS
123+
# ============================================================================
124+
# Host to bind the application (0.0.0.0 for all interfaces)
125+
APP_HOST=0.0.0.0
126+
127+
# Port to run the application
128+
APP_PORT=9000
129+
130+
# Enable debug mode (set to false for production)
131+
APP_DEBUG=true
132+
133+
# Comma-separated browser origins allowed to call the app.
134+
# Use the exact HTTPS origin in production, for example:
135+
# ALLOWED_ORIGINS=https://polytalk.example.com
136+
ALLOWED_ORIGINS=http://localhost:9000,http://127.0.0.1:9000
137+
138+
# Translate partial speech after this many buffered characters or seconds.
139+
# Lower values reduce latency; higher values improve context and quality.
140+
TRANSLATION_FLUSH_CHARS=300
141+
TRANSLATION_FLUSH_SECONDS=5.0
142+
TRANSLATION_FLUSH_MIN_CHARS=120
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
---
2+
name: Bug report
3+
about: Report a reproducible PolyTalk problem
4+
title: "[Bug]: "
5+
labels: bug
6+
assignees: ""
7+
---
8+
9+
## Summary
10+
11+
Describe the bug clearly.
12+
13+
## Environment
14+
15+
- PolyTalk commit/release:
16+
- Deployment mode: local Python / Docker CPU / Docker GPU / external services
17+
- Browser and OS:
18+
- STT provider/model:
19+
- Translation provider/model:
20+
- TTS provider/voice:
21+
22+
## Configuration
23+
24+
Paste relevant `.env` and `config/config.yaml` values with secrets removed.
25+
26+
## Steps to Reproduce
27+
28+
1.
29+
2.
30+
3.
31+
32+
## Expected Behavior
33+
34+
What should happen?
35+
36+
## Actual Behavior
37+
38+
What happened instead?
39+
40+
## Logs
41+
42+
Paste relevant logs with secrets and user data removed.

.github/ISSUE_TEMPLATE/config.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
blank_issues_enabled: true
2+
contact_links:
3+
- name: Security report
4+
url: mailto:security@bizzappdev.com
5+
about: Please report vulnerabilities privately.
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
---
2+
name: Feature request
3+
about: Suggest an improvement for PolyTalk
4+
title: "[Feature]: "
5+
labels: enhancement
6+
assignees: ""
7+
---
8+
9+
## Problem
10+
11+
What problem should this solve?
12+
13+
## Proposal
14+
15+
Describe the behavior or interface you want.
16+
17+
## Alternatives
18+
19+
What workarounds or alternatives have you considered?
20+
21+
## Deployment Impact
22+
23+
Does this affect STT, translation, TTS, Docker, frontend, or documentation?

.github/PULL_REQUEST_TEMPLATE.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
## Summary
2+
3+
-
4+
5+
## Testing
6+
7+
- [ ] `pre-commit run --all-files`
8+
- [ ] `pytest tests/ -v`
9+
- [ ] Manual Docker or browser testing, if relevant
10+
11+
## Configuration Impact
12+
13+
Describe any `.env`, `config/config.yaml`, Docker, model, or deployment changes.
14+
15+
## Checklist
16+
17+
- [ ] I did not commit secrets, generated media, downloaded voices, or local config.
18+
- [ ] Documentation was updated for user-facing or deployment-facing changes.
19+
- [ ] Logs are appropriate for the configured log level.

.github/workflows/ci.yml

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
name: CI
2+
3+
on:
4+
push:
5+
pull_request:
6+
7+
permissions:
8+
contents: read
9+
10+
jobs:
11+
pre-commit:
12+
name: Pre-commit
13+
runs-on: ubuntu-latest
14+
15+
steps:
16+
- name: Check out repository
17+
uses: actions/checkout@v4
18+
19+
- name: Set up Python
20+
uses: actions/setup-python@v5
21+
with:
22+
python-version: "3.12"
23+
cache: pip
24+
25+
- name: Install pre-commit
26+
run: |
27+
python -m pip install --upgrade pip
28+
pip install pre-commit
29+
30+
- name: Run pre-commit
31+
run: pre-commit run --all-files
32+
33+
tests:
34+
name: Tests
35+
runs-on: ubuntu-latest
36+
37+
steps:
38+
- name: Check out repository
39+
uses: actions/checkout@v4
40+
41+
- name: Set up Python
42+
uses: actions/setup-python@v5
43+
with:
44+
python-version: "3.12"
45+
cache: pip
46+
47+
- name: Install dependencies
48+
run: |
49+
python -m pip install --upgrade pip
50+
pip install -r requirements.txt
51+
pip install -r test-requirements.txt
52+
53+
- name: Run tests
54+
run: pytest tests/ -v

0 commit comments

Comments
 (0)