Skip to content
200 changes: 199 additions & 1 deletion docs.json
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,12 @@
"tutorials/partner-nodes/bytedance/seedream-5-lite"
]
},
{
"group": "ElevenLabs",
"pages": [
"partner-nodes/elevenlabs/elevenlabs"
]
},
{
"group": "Google",
"pages": [
Expand All @@ -326,6 +332,42 @@
"tutorials/partner-nodes/google/nano-banana-2"
]
},
{
"group": "Grok",
"pages": [
"partner-nodes/grok/grok"
]
},
{
"group": "HitPaw",
"pages": [
"partner-nodes/hitpaw/hitpaw"
]
},
{
"group": "Magnific",
"pages": [
"partner-nodes/magnific/magnific"
]
},
{
"group": "MiniMax",
"pages": [
"partner-nodes/minimax/minimax"
]
},
{
"group": "PixVerse",
"pages": [
"partner-nodes/pixverse/pixverse"
]
},
{
"group": "Quiver",
"pages": [
"partner-nodes/quiver/quiver"
]
},
{
"group": "Stability AI",
"pages": [
Expand Down Expand Up @@ -391,6 +433,12 @@
"tutorials/partner-nodes/rodin/model-generation"
]
},
{
"group": "Topaz",
"pages": [
"partner-nodes/topaz/topaz"
]
},
{
"group": "Tripo",
"pages": [
Expand Down Expand Up @@ -421,6 +469,18 @@
"tutorials/partner-nodes/reve/reve-image"
]
},
{
"group": "Veo",
"pages": [
"partner-nodes/veo/veo"
]
},
{
"group": "Vidu",
"pages": [
"partner-nodes/vidu/vidu"
]
},
{
"group": "Wan",
"pages": [
Expand All @@ -432,6 +492,12 @@
"pages": [
"tutorials/partner-nodes/sonilo/video-to-music"
]
},
{
"group": "WaveSpeed",
"pages": [
"partner-nodes/wavespeed/wavespeed"
]
}
]
}
Expand Down Expand Up @@ -2414,6 +2480,12 @@
"zh/tutorials/partner-nodes/bytedance/seedream-5-lite"
]
},
{
"group": "ElevenLabs",
"pages": [
"zh/tutorials/partner-nodes/elevenlabs/elevenlabs"
]
},
{
"group": "Google",
"pages": [
Expand All @@ -2422,6 +2494,42 @@
"zh/tutorials/partner-nodes/google/nano-banana-2"
]
},
{
"group": "Grok",
"pages": [
"zh/tutorials/partner-nodes/grok/grok"
]
},
{
"group": "HitPaw",
"pages": [
"zh/tutorials/partner-nodes/hitpaw/hitpaw"
]
},
{
"group": "Magnific",
"pages": [
"zh/tutorials/partner-nodes/magnific/magnific"
]
},
{
"group": "MiniMax",
"pages": [
"zh/tutorials/partner-nodes/minimax/minimax"
]
},
{
"group": "PixVerse",
"pages": [
"zh/tutorials/partner-nodes/pixverse/pixverse"
]
},
{
"group": "Quiver",
"pages": [
"zh/tutorials/partner-nodes/quiver/quiver"
]
},
{
"group": "Stability AI",
"pages": [
Expand Down Expand Up @@ -2487,6 +2595,12 @@
"zh/tutorials/partner-nodes/rodin/model-generation"
]
},
{
"group": "Topaz",
"pages": [
"zh/tutorials/partner-nodes/topaz/topaz"
]
},
{
"group": "Tripo",
"pages": [
Expand Down Expand Up @@ -2517,6 +2631,18 @@
"zh/tutorials/partner-nodes/reve/reve-image"
]
},
{
"group": "Veo",
"pages": [
"zh/tutorials/partner-nodes/veo/veo"
]
},
{
"group": "Vidu",
"pages": [
"zh/tutorials/partner-nodes/vidu/vidu"
]
},
{
"group": "Wan",
"pages": [
Expand All @@ -2528,6 +2654,12 @@
"pages": [
"zh/tutorials/partner-nodes/sonilo/video-to-music"
]
},
{
"group": "WaveSpeed",
"pages": [
"zh/tutorials/partner-nodes/wavespeed/wavespeed"
]
}
]
}
Expand Down Expand Up @@ -4515,6 +4647,12 @@
"ja/tutorials/partner-nodes/bytedance/seedream-5-lite"
]
},
{
"group": "ElevenLabs",
"pages": [
"ja/tutorials/partner-nodes/elevenlabs/elevenlabs"
]
},
{
"group": "Google",
"pages": [
Expand All @@ -4523,6 +4661,42 @@
"ja/tutorials/partner-nodes/google/nano-banana-2"
]
},
{
"group": "Grok",
"pages": [
"ja/tutorials/partner-nodes/grok/grok"
]
},
{
"group": "HitPaw",
"pages": [
"ja/tutorials/partner-nodes/hitpaw/hitpaw"
]
},
{
"group": "Magnific",
"pages": [
"ja/tutorials/partner-nodes/magnific/magnific"
]
},
{
"group": "MiniMax",
"pages": [
"ja/tutorials/partner-nodes/minimax/minimax"
]
},
{
"group": "PixVerse",
"pages": [
"ja/tutorials/partner-nodes/pixverse/pixverse"
]
},
{
"group": "Quiver",
"pages": [
"ja/tutorials/partner-nodes/quiver/quiver"
]
},
{
"group": "Stability AI",
"pages": [
Expand Down Expand Up @@ -4588,6 +4762,12 @@
"ja/tutorials/partner-nodes/rodin/model-generation"
]
},
{
"group": "Topaz",
"pages": [
"ja/tutorials/partner-nodes/topaz/topaz"
]
},
{
"group": "Tripo",
"pages": [
Expand Down Expand Up @@ -4618,6 +4798,18 @@
"ja/tutorials/partner-nodes/reve/reve-image"
]
},
{
"group": "Veo",
"pages": [
"ja/tutorials/partner-nodes/veo/veo"
]
},
{
"group": "Vidu",
"pages": [
"ja/tutorials/partner-nodes/vidu/vidu"
]
},
{
"group": "Wan",
"pages": [
Expand All @@ -4629,6 +4821,12 @@
"pages": [
"ja/tutorials/partner-nodes/sonilo/video-to-music"
]
},
{
"group": "WaveSpeed",
"pages": [
"ja/tutorials/partner-nodes/wavespeed/wavespeed"
]
}
]
}
Expand Down Expand Up @@ -6599,4 +6797,4 @@
"destination": "/zh/:slug*"
}
]
}
}
31 changes: 31 additions & 0 deletions tutorials/partner-nodes/elevenlabs/eleven-multilingual-v2.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
---
title: "ElevenLabs Text-to-Speech — Multilingual v2 / v3 / Flash v2.5"
description: "Generate lifelike, emotionally expressive speech from text using ElevenLabs' TTS models in ComfyUI."
sidebarTitle: "Eleven Multilingual v2 / v3"
---

import ReqHint from "/snippets/tutorials/partner-nodes/req-hint.mdx";
import UpdateReminder from "/snippets/tutorials/update-reminder.mdx";

Generate speech from text using ElevenLabs' flagship Text-to-Speech models. Choose between the high-quality **Eleven v3**, the stable **Multilingual v2** supporting 29 languages, or the low-latency **Flash v2.5** for performance-critical applications.

## Key capabilities

- **Emotionally expressive speech** — Natural intonation, pacing, and tone across all three model tiers
- **Multi-language support** — 29 languages with Multilingual v2, broader coverage with Flash v2.5
- **Low-latency option** — Flash v2.5 for real-time and performance-sensitive use cases
- **Voice customization** — Use ElevenLabs voice library or clone custom voices

<ReqHint/>
<UpdateReminder/>

## Available workflows

<CardGroup cols={2}>
<Card title="Run on Cloud" icon="cloud" href="https://cloud.comfy.org/?template=api_elevenlabs_text_to_speech&utm_source=docs">
Generate speech from text directly in your browser.
</Card>
<Card title="Download workflow" icon="download" href="https://github.com/Comfy-Org/workflow_templates/blob/main/templates/api_elevenlabs_text_to_speech.json">
Download the workflow JSON.
</Card>
</CardGroup>
47 changes: 47 additions & 0 deletions tutorials/partner-nodes/elevenlabs/overview.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
---
title: "ElevenLabs Text-to-Speech, Speech-to-Text, Sound Effects & Voice Tools"
description: "Generate natural, emotionally rich speech, transcribe audio, create sound effects, and isolate voices using ElevenLabs in ComfyUI."
sidebarTitle: "ElevenLabs"
---

import ReqHint from "/snippets/tutorials/partner-nodes/req-hint.mdx";
import UpdateReminder from "/snippets/tutorials/update-reminder.mdx";

ElevenLabs brings world-class voice AI to ComfyUI, covering text-to-speech, speech-to-text, sound effects, voice isolation, and voice transformation. Its **Eleven v3** and **Multilingual v2** models deliver emotionally expressive speech across 29 languages, while **Scribe v2** provides fast, accurate transcription with speaker diarization and entity detection.

## Key capabilities

- **Text to Speech** — Generate lifelike audio from text with three model tiers (Eleven v3, Multilingual v2, Flash v2.5)
- **Speech to Speech** — Transform a source audio clip into a different voice while keeping the original intonation and pacing
- **Speech to Text** — Transcribe audio with Scribe v2: 90+ languages, 32-speaker diarization, entity detection, keyword boost
- **Sound Effects** — Generate audio effects from text descriptions
- **Voice Isolation** — Separate voice from background noise, music, or ambient sound
- **Multi-Character Dialogue** — Generate conversations between multiple distinct voices in a single pass

<ReqHint/>
<UpdateReminder/>

## Models

ElevenLabs offers a suite of specialized voice AI models, each optimized for a distinct task. From high-fidelity speech generation to real-time transcription and audio editing, the following models are available as workflow-powered nodes in ComfyUI.

<CardGroup cols={2}>
<Card title="Eleven Multilingual v2 / v3" icon="microphone" href="./eleven-multilingual-v2">
High-fidelity text-to-speech with three model tiers.
</Card>
<Card title="Scribe v2" icon="document-text" href="./scribe-v2">
Fast, accurate speech-to-text with speaker diarization.
</Card>
<Card title="Speech to Speech" icon="shuffle" href="./speech-to-speech">
Transform audio into a different voice while preserving delivery.
</Card>
<Card title="Text to Sound Effects" icon="musical-note" href="./text-to-sound-effects">
Generate sound effects and ambient audio from text.
</Card>
<Card title="Text to Dialogue" icon="chat" href="./text-to-dialogue">
Multi-character dialogue with distinct voices in one pass.
</Card>
<Card title="Voice Isolation" icon="mic-off" href="./voice-isolation">
Separate voice from background noise and music.
</Card>
</CardGroup>
Loading
Loading