ComfyUI-Runware/modules/audioInferenceReferenceVoices.py at 283d383c29af9d40b7ef46a6daabffdcc1e4d7cb · Runware/ComfyUI-Runware · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
"""
Runware Audio Inference Inputs Reference Audio node.
Builds inputs.referenceVoices for zero-shot voice cloning (up to 4 entries).
"""

from typing import Any, Dict, List

from .utils import runwareUtils as rwUtils


class RunwareAudioInferenceReferenceVoices:
    """Build inputs.referenceVoices[] for Fish Audio and other TTS models."""

    MAX_REFERENCE_VOICES = 4

    @classmethod
    def INPUT_TYPES(cls):
        optional_inputs = {}
        for i in range(1, cls.MAX_REFERENCE_VOICES + 1):
            ordinal = rwUtils.getOrdinal(i)
            optional_inputs[f"useReferenceVoice{i}"] = ("BOOLEAN", {
                "default": False,
                "tooltip": f"Enable to include the {ordinal} reference voice in inputs.referenceVoices.",
            })
            optional_inputs[f"audio{i}"] = ("STRING", {
                "default": "",
                "tooltip": f"Reference audio clip ({ordinal}) as media UUID, URL, or base64. Required when enabled.",
            })
            optional_inputs[f"text{i}"] = ("STRING", {
                "multiline": True,
                "default": "",
                "tooltip": f"Transcript of the {ordinal} reference audio clip (1–1000 characters). Required when enabled.",
            })

        return {
            "required": {},
            "optional": optional_inputs,
        }

    RETURN_TYPES = ("RUNWAREAUDIOINFERENCEREFERENCEVOICES",)
    RETURN_NAMES = ("referenceVoices",)
    FUNCTION = "createReferenceVoices"
    CATEGORY = "Runware/Audio"
    DESCRIPTION = (
        "Configure inputs.referenceVoices for zero-shot voice cloning (up to 4 entries). "
        "Each entry: { \"audio\": \"<UUID/URL/base64>\", \"text\": \"<transcript>\" }. "
        "Connect to Runware Audio Inference Inputs."
    )

    def createReferenceVoices(self, **kwargs) -> tuple[List[Dict[str, Any]]]:
        reference_voices: List[Dict[str, Any]] = []

        for i in range(1, self.MAX_REFERENCE_VOICES + 1):
            if not kwargs.get(f"useReferenceVoice{i}", False):
                continue

            audio = (kwargs.get(f"audio{i}") or "").strip()
            text = (kwargs.get(f"text{i}") or "").strip()

            if not audio or not text:
                continue

            reference_voices.append({"audio": audio, "text": text})

        return (reference_voices,)


NODE_CLASS_MAPPINGS = {
    "RunwareAudioInferenceReferenceVoices": RunwareAudioInferenceReferenceVoices,
}

NODE_DISPLAY_NAME_MAPPINGS = {
    "RunwareAudioInferenceReferenceVoices": "Runware Audio Inference Inputs Reference Audio",
}