diff --git a/internal/transcription/adapters/voxtral_adapter.go b/internal/transcription/adapters/voxtral_adapter.go
index 9c5e1097..2cb138e5 100644
--- a/internal/transcription/adapters/voxtral_adapter.go
+++ b/internal/transcription/adapters/voxtral_adapter.go
@@ -33,7 +33,11 @@ func NewVoxtralAdapter(envPath string) *VoxtralAdapter {
Description: "Mistral's multilingual audio transcription model",
Version: "1.0.0",
SupportedLanguages: []string{
- "en", "es", "fr", "de", "it", "pt", "nl", "pl", "ru", "zh", "ja", "ko",
+ "af", "ar", "hy", "az", "be", "bs", "bg", "ca", "zh", "hr", "cs", "da", "nl",
+ "en", "et", "fi", "fr", "gl", "de", "el", "he", "hi", "hu", "is", "id", "it",
+ "ja", "kn", "kk", "ko", "lv", "lt", "mk", "ms", "mr", "mi", "ne", "no", "fa",
+ "pl", "pt", "ro", "ru", "sr", "sk", "sl", "es", "sw", "sv", "tl", "ta", "th",
+ "tr", "uk", "ur", "vi", "cy", "auto",
// Voxtral supports many languages
},
SupportedFormats: []string{"wav", "mp3", "flac", "m4a", "ogg"},
@@ -59,11 +63,17 @@ func NewVoxtralAdapter(envPath string) *VoxtralAdapter {
schema := []interfaces.ParameterSchema{
// Language selection
{
- Name: "language",
- Type: "string",
- Required: false,
- Default: "en",
- Options: []string{"en", "es", "fr", "de", "it", "pt", "nl", "pl", "ru", "zh", "ja", "ko"},
+ Name: "language",
+ Type: "string",
+ Required: false,
+ Default: "en",
+ Options: []string{
+ "af", "ar", "hy", "az", "be", "bs", "bg", "ca", "zh", "hr", "cs", "da", "nl",
+ "en", "et", "fi", "fr", "gl", "de", "el", "he", "hi", "hu", "is", "id", "it",
+ "ja", "kn", "kk", "ko", "lv", "lt", "mk", "ms", "mr", "mi", "ne", "no", "fa",
+ "pl", "pt", "ro", "ru", "sr", "sk", "sl", "es", "sw", "sv", "tl", "ta", "th",
+ "tr", "uk", "ur", "vi", "cy", "auto",
+ },
Description: "Language of the audio",
Group: "basic",
},
@@ -277,7 +287,7 @@ func (v *VoxtralAdapter) buildVoxtralArgs(input interfaces.AudioInput, params ma
// Determine if we should use buffered mode based on audio duration
// Voxtral handles 30-40 minutes natively, use buffered mode for longer files
- useBuffered := input.Duration > 30*60 // More than 30 minutes
+ useBuffered := input.Duration > 30*time.Minute // More than 30 minutes
var scriptPath string
if useBuffered {
@@ -294,7 +304,7 @@ func (v *VoxtralAdapter) buildVoxtralArgs(input interfaces.AudioInput, params ma
}
// Add language
- if language := v.GetStringParameter(params, "language"); language != "" {
+ if language := v.GetStringParameter(params, "language"); language != "" && language != "auto" {
args = append(args, "--language", language)
}
diff --git a/web/frontend/src/components/transcription/TranscriptionConfigDialog.tsx b/web/frontend/src/components/transcription/TranscriptionConfigDialog.tsx
index 3e80d189..01f5da56 100644
--- a/web/frontend/src/components/transcription/TranscriptionConfigDialog.tsx
+++ b/web/frontend/src/components/transcription/TranscriptionConfigDialog.tsx
@@ -216,32 +216,6 @@ const CANARY_LANGUAGES = [
{ value: "fr", label: "French" },
];
-const VOXTRAL_LANGUAGES = [
- { value: "auto", label: "Auto-detect" },
- { value: "en", label: "English" },
- { value: "zh", label: "Chinese" },
- { value: "de", label: "German" },
- { value: "es", label: "Spanish" },
- { value: "ru", label: "Russian" },
- { value: "ko", label: "Korean" },
- { value: "fr", label: "French" },
- { value: "ja", label: "Japanese" },
- { value: "pt", label: "Portuguese" },
- { value: "tr", label: "Turkish" },
- { value: "pl", label: "Polish" },
- { value: "nl", label: "Dutch" },
- { value: "ar", label: "Arabic" },
- { value: "sv", label: "Swedish" },
- { value: "it", label: "Italian" },
- { value: "id", label: "Indonesian" },
- { value: "hi", label: "Hindi" },
- { value: "fi", label: "Finnish" },
- { value: "vi", label: "Vietnamese" },
- { value: "he", label: "Hebrew" },
- { value: "uk", label: "Ukrainian" },
- { value: "el", label: "Greek" },
-];
-
const PARAM_DESCRIPTIONS = {
model: "Size of the Whisper model. Larger = more accurate but slower.",
language: "Source language. Auto-detect works for most cases.",
@@ -1171,7 +1145,7 @@ function VoxtralConfig({ params, updateParam }: ConfigProps) {
- {VOXTRAL_LANGUAGES.map((l) => (
+ {LANGUAGES.map((l) => (
{l.label}
))}