Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/groq-tts-adapter.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
'@tanstack/ai-groq': minor
---

Add tree-shakeable Text-to-Speech (TTS) adapter for Groq with English and Arabic Orpheus voices, multiple output formats (default WAV), configurable speed and sample rate, model metadata, and unit tests.
168 changes: 168 additions & 0 deletions packages/ai-groq/src/adapters/tts.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
import OpenAI from 'openai'
import { BaseTTSAdapter } from '@tanstack/ai/adapters'
import { toRunErrorPayload } from '@tanstack/ai/adapter-internals'
import { arrayBufferToBase64, generateId } from '@tanstack/ai-utils'
import { getGroqApiKeyFromEnv, withGroqDefaults } from '../utils/client'
import { validateAudioInput } from '../audio/audio-provider-options'
import type { TTSOptions, TTSResult } from '@tanstack/ai'
import type OpenAI_SDK from 'openai'
import type { GroqTTSModel } from '../model-meta'
import type { GroqTTSProviderOptions } from '../audio/tts-provider-options'
import type { GroqClientConfig } from '../utils'

/**
* Configuration for Groq TTS adapter
*/
export interface GroqTTSConfig extends GroqClientConfig {}

/**
* Groq Text-to-Speech Adapter
*
* Tree-shakeable adapter for Groq TTS functionality. Groq exposes an
* OpenAI-compatible `/audio/speech` endpoint, so the adapter drives it with
* the OpenAI SDK via a `baseURL` override (the same pattern as the Groq text
* adapter).
*
* Supports `canopylabs/orpheus-v1-english` and
* `canopylabs/orpheus-arabic-saudi`.
*
* Features:
* - English voices: autumn(f), diana(f), hannah(f), austin(m), daniel(m), troy(m)
* - Arabic voices: fahad(m), sultan(m), lulwa(f), noura(f)
* - Output formats: flac, mp3, mulaw, ogg, wav (default wav)
* - Speed control
* - Configurable sample rate via `modelOptions`
*/
export class GroqTTSAdapter<TModel extends GroqTTSModel> extends BaseTTSAdapter<
TModel,
GroqTTSProviderOptions
> {
readonly name = 'groq' as const

protected client: OpenAI

constructor(config: GroqTTSConfig, model: TModel) {
super(model, {})
this.client = new OpenAI(withGroqDefaults(config))
}

async generateSpeech(
options: TTSOptions<GroqTTSProviderOptions>,
): Promise<TTSResult> {
const { model, text, voice, format, speed, modelOptions } = options

validateAudioInput({ input: text, model: this.model })

// Spreading optional inputs conditionally keeps the request compatible
// with the vendor SDK shape under exactOptionalPropertyTypes. `sample_rate`
// is a Groq-only body field carried via modelOptions.
const request: OpenAI_SDK.Audio.SpeechCreateParams = {
model,
input: text,
voice: voice ?? 'autumn',
response_format: format ?? 'wav',
...(speed !== undefined && { speed }),
...(modelOptions ?? {}),
}

try {
options.logger.request(
`activity=tts provider=${this.name} model=${model} format=${request.response_format ?? 'default'} voice=${request.voice}`,
{ provider: this.name, model },
)
const response = await this.client.audio.speech.create(request)

const arrayBuffer = await response.arrayBuffer()
const base64 = arrayBufferToBase64(arrayBuffer)

const outputFormat = request.response_format ?? 'wav'
const contentType = this.getContentType(outputFormat)

return {
id: generateId(this.name),
model,
audio: base64,
format: outputFormat,
contentType,
}
} catch (error: unknown) {
// Narrow before logging: raw SDK errors can carry request metadata
// (including auth headers) which we must never surface to user loggers.
options.logger.errors(`${this.name}.generateSpeech fatal`, {
error: toRunErrorPayload(error, `${this.name}.generateSpeech failed`),
source: `${this.name}.generateSpeech`,
})
throw error
}
}

private getContentType(format: string): string {
const contentTypes: Record<string, string> = {
flac: 'audio/flac',
mp3: 'audio/mpeg',
mulaw: 'audio/basic',
ogg: 'audio/ogg',
wav: 'audio/wav',
}
return contentTypes[format] || 'audio/wav'
}
}

/**
* Creates a Groq speech adapter with explicit API key.
* Type resolution happens here at the call site.
*
* @param model - The model name (e.g., 'canopylabs/orpheus-v1-english')
* @param apiKey - Your Groq API key
* @param config - Optional additional configuration
* @returns Configured Groq speech adapter instance with resolved types
*
* @example
* ```typescript
* const adapter = createGroqSpeech('canopylabs/orpheus-v1-english', 'gsk_...')
*
* const result = await generateSpeech({
* adapter,
* text: 'Hello, world!',
* voice: 'autumn',
* })
* ```
*/
export function createGroqSpeech<TModel extends GroqTTSModel>(
model: TModel,
apiKey: string,
config?: Omit<GroqTTSConfig, 'apiKey'>,
): GroqTTSAdapter<TModel> {
return new GroqTTSAdapter({ apiKey, ...config }, model)
}

/**
* Creates a Groq speech adapter with automatic API key detection from
* environment variables.
*
* Looks for `GROQ_API_KEY` in the environment.
*
* @param model - The model name (e.g., 'canopylabs/orpheus-v1-english')
* @param config - Optional configuration (excluding apiKey which is auto-detected)
* @returns Configured Groq speech adapter instance with resolved types
* @throws Error if GROQ_API_KEY is not found in environment
*
* @example
* ```typescript
* const adapter = groqSpeech('canopylabs/orpheus-v1-english')
*
* const result = await generateSpeech({
* adapter,
* text: 'Welcome to TanStack AI!',
* voice: 'autumn',
* format: 'wav',
* })
* ```
*/
export function groqSpeech<TModel extends GroqTTSModel>(
model: TModel,
config?: Omit<GroqTTSConfig, 'apiKey'>,
): GroqTTSAdapter<TModel> {
const apiKey = getGroqApiKeyFromEnv()
return createGroqSpeech(model, apiKey, config)
}
25 changes: 25 additions & 0 deletions packages/ai-groq/src/audio/audio-provider-options.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
/**
* Common audio provider options for Groq audio endpoints.
*/
export interface AudioProviderOptions {
/**
* The text to generate audio for.
* Maximum length is 200 characters.
* Use [directions] for vocal control (English voices only).
*/
input: string
/**
* The audio model to use for generation.
*/
model: string
}

/**
* Validates that the audio input text does not exceed the maximum length.
* @throws Error if input text exceeds 200 characters
*/
export const validateAudioInput = (options: AudioProviderOptions) => {
if (options.input.length > 200) {
throw new Error('Input text exceeds maximum length of 200 characters.')
}
}
48 changes: 48 additions & 0 deletions packages/ai-groq/src/audio/tts-provider-options.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
/**
* Groq TTS voice options for English models
*/
export type GroqTTSEnglishVoice =
| 'autumn'
| 'diana'
| 'hannah'
| 'austin'
| 'daniel'
| 'troy'

/**
* Groq TTS voice options for Arabic models
*/
export type GroqTTSArabicVoice = 'fahad' | 'sultan' | 'lulwa' | 'noura'

/**
* Union of all Groq TTS voice options
*/
export type GroqTTSVoice = GroqTTSEnglishVoice | GroqTTSArabicVoice

/**
* Groq TTS output format options.
*/
export type GroqTTSFormat = 'flac' | 'mp3' | 'mulaw' | 'ogg' | 'wav'
Comment thread
dhamivibez marked this conversation as resolved.

/**
* Groq TTS sample rate options
*/
export type GroqTTSSampleRate =
| 8000
| 16000
| 22050
| 24000
| 32000
| 44100
| 48000

/**
* Provider-specific options for Groq TTS.
* These options are passed via `modelOptions` when calling `generateSpeech`.
*/
export interface GroqTTSProviderOptions {
/**
* The sample rate of the generated audio in Hz.
*/
sample_rate?: GroqTTSSampleRate
}
22 changes: 20 additions & 2 deletions packages/ai-groq/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* @module @tanstack/ai-groq
*
* Groq provider adapter for TanStack AI.
* Provides tree-shakeable adapters for Groq's Chat Completions API.
* Provides tree-shakeable adapters for Groq's Chat Completions API and TTS API.
*/

// Text (Chat) adapter
Expand All @@ -14,16 +14,34 @@ export {
type GroqTextProviderOptions,
} from './adapters/text'

// TTS adapter - for text-to-speech
export {
GroqTTSAdapter,
createGroqSpeech,
groqSpeech,
type GroqTTSConfig,
} from './adapters/tts'
export type {
GroqTTSProviderOptions,
GroqTTSVoice,
GroqTTSEnglishVoice,
GroqTTSArabicVoice,
GroqTTSFormat,
GroqTTSSampleRate,
} from './audio/tts-provider-options'

// Types
export type {
GroqChatModelProviderOptionsByName,
GroqTTSModelProviderOptionsByName,
GroqChatModelToolCapabilitiesByName,
GroqModelInputModalitiesByName,
ResolveProviderOptions,
ResolveInputModalities,
GroqChatModels,
GroqTTSModel,
} from './model-meta'
export { GROQ_CHAT_MODELS } from './model-meta'
export { GROQ_CHAT_MODELS, GROQ_TTS_MODELS } from './model-meta'
export type {
GroqTextMetadata,
GroqImageMetadata,
Expand Down
65 changes: 61 additions & 4 deletions packages/ai-groq/src/model-meta.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import type { GroqTextProviderOptions } from './text/text-provider-options'
import type { GroqTTSProviderOptions } from './audio/tts-provider-options'

/**
* Internal metadata structure describing a Groq model's capabilities and pricing.
Expand Down Expand Up @@ -385,14 +386,23 @@ export type GroqChatModelToolCapabilitiesByName = {
[QWEN3_32B.name]: typeof QWEN3_32B.supports.tools
}

/**
* Type-only map from Groq TTS model name to its provider options type.
*/
export type GroqTTSModelProviderOptionsByName = {
[K in GroqTTSModel]: GroqTTSProviderOptions
}

/**
* Resolves the provider options type for a specific Groq model.
* Falls back to generic GroqTextProviderOptions for unknown models.
* Checks TTS models first, then chat models, then falls back to generic options.
*/
export type ResolveProviderOptions<TModel extends string> =
TModel extends keyof GroqChatModelProviderOptionsByName
? GroqChatModelProviderOptionsByName[TModel]
: GroqTextProviderOptions
TModel extends GroqTTSModel
? GroqTTSProviderOptions
: TModel extends keyof GroqChatModelProviderOptionsByName
? GroqChatModelProviderOptionsByName[TModel]
: GroqTextProviderOptions

/**
* Resolve input modalities for a specific model.
Expand All @@ -402,3 +412,50 @@ export type ResolveInputModalities<TModel extends string> =
TModel extends keyof GroqModelInputModalitiesByName
? GroqModelInputModalitiesByName[TModel]
: readonly ['text']

// ============================================================================
// TTS Models
// ============================================================================

const ORPHEUS_V1_ENGLISH = {
name: 'canopylabs/orpheus-v1-english',
pricing: {
input: {
normal: 22,
},
},
supports: {
input: ['text'],
output: ['audio'],
endpoints: ['tts'],
features: [],
},
} as const satisfies ModelMeta<GroqTTSProviderOptions>

const ORPHEUS_ARABIC_SAUDI = {
name: 'canopylabs/orpheus-arabic-saudi',
pricing: {
input: {
normal: 40,
},
},
supports: {
input: ['text'],
output: ['audio'],
endpoints: ['tts'],
features: [],
},
} as const satisfies ModelMeta<GroqTTSProviderOptions>

/**
* All supported Groq TTS model identifiers.
*/
export const GROQ_TTS_MODELS = [
ORPHEUS_V1_ENGLISH.name,
ORPHEUS_ARABIC_SAUDI.name,
] as const

/**
* Union type of all supported Groq TTS model names.
*/
export type GroqTTSModel = (typeof GROQ_TTS_MODELS)[number]
Comment thread
dhamivibez marked this conversation as resolved.
Loading