|
1 | 1 | // SPDX-FileCopyrightText: 2025 LiveKit, Inc. |
2 | 2 | // |
3 | 3 | // SPDX-License-Identifier: Apache-2.0 |
4 | | -import { beforeAll, describe, expect, it } from 'vitest'; |
| 4 | +import { beforeAll, describe, expect, it, vi } from 'vitest'; |
5 | 5 | import { normalizeLanguage } from '../language.js'; |
6 | 6 | import { initializeLogger } from '../log.js'; |
7 | 7 | import { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js'; |
8 | | -import { TTS, type TTSFallbackModel, normalizeTTSFallback, parseTTSModelString } from './tts.js'; |
| 8 | +import { |
| 9 | + TTS, |
| 10 | + type TTSFallbackModel, |
| 11 | + hasAlignedTranscript, |
| 12 | + normalizeTTSFallback, |
| 13 | + parseTTSModelString, |
| 14 | +} from './tts.js'; |
9 | 15 |
|
10 | 16 | beforeAll(() => { |
11 | 17 | initializeLogger({ level: 'silent', pretty: false }); |
@@ -352,3 +358,114 @@ describe('TTS provider modelOptions parity', () => { |
352 | 358 | expect(tts['opts'].modelOptions).toEqual(modelOptions); |
353 | 359 | }); |
354 | 360 | }); |
| 361 | + |
| 362 | +describe('hasAlignedTranscript', () => { |
| 363 | + it('returns false for unknown provider', () => { |
| 364 | + expect(hasAlignedTranscript('rime/mistv2', { add_timestamps: true })).toBe(false); |
| 365 | + expect(hasAlignedTranscript('deepgram/aura-2', { sync_alignment: true })).toBe(false); |
| 366 | + }); |
| 367 | + |
| 368 | + it('returns false for an empty options payload', () => { |
| 369 | + expect(hasAlignedTranscript('cartesia/sonic', {})).toBe(false); |
| 370 | + expect(hasAlignedTranscript('elevenlabs/eleven_flash_v2', undefined)).toBe(false); |
| 371 | + expect(hasAlignedTranscript(undefined, { add_timestamps: true })).toBe(false); |
| 372 | + }); |
| 373 | + |
| 374 | + it('detects Cartesia add_timestamps opt-in', () => { |
| 375 | + expect(hasAlignedTranscript('cartesia/sonic', { add_timestamps: true })).toBe(true); |
| 376 | + expect(hasAlignedTranscript('cartesia/sonic-3', { add_timestamps: false })).toBe(false); |
| 377 | + }); |
| 378 | + |
| 379 | + it('detects ElevenLabs sync_alignment opt-in', () => { |
| 380 | + expect(hasAlignedTranscript('elevenlabs/eleven_flash_v2', { sync_alignment: true })).toBe(true); |
| 381 | + expect( |
| 382 | + hasAlignedTranscript('elevenlabs/eleven_multilingual_v2', { sync_alignment: false }), |
| 383 | + ).toBe(false); |
| 384 | + }); |
| 385 | + |
| 386 | + it('detects Inworld WORD/CHARACTER timestamp types', () => { |
| 387 | + expect(hasAlignedTranscript('inworld/inworld-tts-1', { timestamp_type: 'WORD' })).toBe(true); |
| 388 | + expect(hasAlignedTranscript('inworld/inworld-tts-1', { timestamp_type: 'CHARACTER' })).toBe( |
| 389 | + true, |
| 390 | + ); |
| 391 | + expect( |
| 392 | + hasAlignedTranscript('inworld/inworld-tts-1', { |
| 393 | + timestamp_type: 'TIMESTAMP_TYPE_UNSPECIFIED', |
| 394 | + }), |
| 395 | + ).toBe(false); |
| 396 | + }); |
| 397 | +}); |
| 398 | + |
| 399 | +describe('TTS alignedTranscript capability', () => { |
| 400 | + it('defaults to alignedTranscript=false when no opt-in is provided', () => { |
| 401 | + const tts = makeTts(); |
| 402 | + expect(tts.capabilities.alignedTranscript).toBe(false); |
| 403 | + }); |
| 404 | + |
| 405 | + it('reports alignedTranscript=true when Cartesia add_timestamps is set', () => { |
| 406 | + const tts = makeTts({ |
| 407 | + model: 'cartesia/sonic', |
| 408 | + modelOptions: { add_timestamps: true }, |
| 409 | + }); |
| 410 | + expect(tts.capabilities.alignedTranscript).toBe(true); |
| 411 | + }); |
| 412 | + |
| 413 | + it('reports alignedTranscript=true when ElevenLabs sync_alignment is set', () => { |
| 414 | + const tts = makeTts({ |
| 415 | + model: 'elevenlabs/eleven_flash_v2', |
| 416 | + modelOptions: { sync_alignment: true }, |
| 417 | + }); |
| 418 | + expect(tts.capabilities.alignedTranscript).toBe(true); |
| 419 | + }); |
| 420 | + |
| 421 | + it('reports alignedTranscript=true when Inworld timestamp_type is WORD', () => { |
| 422 | + const tts = makeTts({ |
| 423 | + model: 'inworld/inworld-tts-1', |
| 424 | + modelOptions: { timestamp_type: 'WORD' }, |
| 425 | + }); |
| 426 | + expect(tts.capabilities.alignedTranscript).toBe(true); |
| 427 | + }); |
| 428 | + |
| 429 | + it('recomputes alignedTranscript when updateOptions changes modelOptions', () => { |
| 430 | + const tts = makeTts({ model: 'cartesia/sonic' }); |
| 431 | + expect(tts.capabilities.alignedTranscript).toBe(false); |
| 432 | + |
| 433 | + tts.updateOptions({ modelOptions: { add_timestamps: true } }); |
| 434 | + expect(tts.capabilities.alignedTranscript).toBe(true); |
| 435 | + |
| 436 | + tts.updateOptions({ modelOptions: { add_timestamps: false } }); |
| 437 | + expect(tts.capabilities.alignedTranscript).toBe(false); |
| 438 | + }); |
| 439 | + |
| 440 | + it('recomputes alignedTranscript when updateOptions changes the model', () => { |
| 441 | + const tts = makeTts({ |
| 442 | + model: 'cartesia/sonic', |
| 443 | + modelOptions: { sync_alignment: true }, |
| 444 | + }); |
| 445 | + expect(tts.capabilities.alignedTranscript).toBe(false); |
| 446 | + |
| 447 | + tts.updateOptions({ model: 'elevenlabs/eleven_flash_v2' }); |
| 448 | + expect(tts.capabilities.alignedTranscript).toBe(true); |
| 449 | + }); |
| 450 | + |
| 451 | + it('invalidates the connection pool when session-affecting options change', () => { |
| 452 | + const tts = makeTts({ model: 'cartesia/sonic' }); |
| 453 | + const invalidateSpy = vi.spyOn(tts.pool, 'invalidate'); |
| 454 | + |
| 455 | + tts.updateOptions({ modelOptions: { add_timestamps: true } }); |
| 456 | + expect(invalidateSpy).toHaveBeenCalledTimes(1); |
| 457 | + |
| 458 | + tts.updateOptions({ model: 'elevenlabs/eleven_flash_v2' }); |
| 459 | + expect(invalidateSpy).toHaveBeenCalledTimes(2); |
| 460 | + |
| 461 | + tts.updateOptions({ voice: 'narrator' }); |
| 462 | + expect(invalidateSpy).toHaveBeenCalledTimes(3); |
| 463 | + |
| 464 | + tts.updateOptions({ language: 'en' }); |
| 465 | + expect(invalidateSpy).toHaveBeenCalledTimes(4); |
| 466 | + |
| 467 | + // Empty update should not churn the pool. |
| 468 | + tts.updateOptions({}); |
| 469 | + expect(invalidateSpy).toHaveBeenCalledTimes(4); |
| 470 | + }); |
| 471 | +}); |
0 commit comments