@@ -7,16 +7,17 @@ function Request-AudioTranscription {
77 [string ]$File ,
88
99 [Parameter ()]
10- [Completions (' whisper-1' , ' gpt-4o-transcribe' , ' gpt-4o-mini-transcribe' )]
10+ [Completions (' whisper-1' , ' gpt-4o-transcribe' , ' gpt-4o-mini-transcribe' , ' gpt-4o-transcribe-diarize ' )]
1111 [string ]$Model = ' whisper-1' ,
1212
1313 [Parameter ()]
1414 [string ]$Prompt ,
1515
1616 [Parameter ()]
1717 [Alias (' response_format' )]
18- [ValidateSet (' json' , ' text' , ' srt' , ' verbose_json' , ' vtt' )]
19- [string ]$Format = ' text' ,
18+ [Alias (' Format' )] # for backward compatibility
19+ [ValidateSet (' json' , ' text' , ' srt' , ' verbose_json' , ' vtt' , ' diarized_json' )]
20+ [string ]$ResponseFormat = ' text' ,
2021
2122 [Parameter ()]
2223 [ValidateRange (0.0 , 1.0 )]
@@ -26,6 +27,29 @@ function Request-AudioTranscription {
2627 [Completions (' logprobs' )]
2728 [string []]$Include ,
2829
30+ [Parameter ()]
31+ [Alias (' known_speaker_names' )]
32+ [string []]$KnownSpeakerNames ,
33+
34+ [Parameter ()]
35+ [Alias (' known_speaker_references' )]
36+ [string []]$KnownSpeakerReferences ,
37+
38+ [Parameter ()]
39+ [ValidateSet (' auto' , ' server_vad' )]
40+ [Alias (' chunking_strategy' )]
41+ [string ]$ChunkingStrategy = ' auto' ,
42+
43+ [Parameter ()]
44+ [ValidateRange (0.0 , 1.0 )]
45+ [float ]$ChunkingStrategyThreshold ,
46+
47+ [Parameter ()]
48+ [uint16 ]$ChunkingStrategyPrefixPadding ,
49+
50+ [Parameter ()]
51+ [uint16 ]$ChunkingStrategySilenceDuration ,
52+
2953 [Parameter ()]
3054 [ValidateSet (' word' , ' segment' )]
3155 [Alias (' timestamp_granularities' )]
@@ -108,8 +132,8 @@ function Request-AudioTranscription {
108132 $PostBody.model = $Model
109133 }
110134 $PostBody.file = $FileInfo
111- if ($Format ) {
112- $PostBody.response_format = $Format
135+ if ($ResponseFormat ) {
136+ $PostBody.response_format = $ResponseFormat
113137 }
114138 if ($PSBoundParameters.ContainsKey (' Prompt' )) {
115139 $PostBody.prompt = $Prompt
@@ -123,6 +147,51 @@ function Request-AudioTranscription {
123147 if ($PSBoundParameters.ContainsKey (' Include' )) {
124148 $PostBody .' include[]' = $Include
125149 }
150+
151+ if ($PSBoundParameters.ContainsKey (' KnownSpeakerNames' )) {
152+ $PostBody .' known_speaker_names[]' = $KnownSpeakerNames
153+ }
154+ if ($PSBoundParameters.ContainsKey (' KnownSpeakerReferences' )) {
155+ $KnownSpeakerReferencesFileInfoList = @ ()
156+ foreach ($ref in $KnownSpeakerReferences ) {
157+ $KnownSpeakerReferencesFileInfoList += Resolve-FileInfo $ref
158+ }
159+ if ($KnownSpeakerReferencesFileInfoList.Count -gt 0 ) {
160+ $PostBody .' known_speaker_references[]' = $KnownSpeakerReferencesFileInfoList
161+ }
162+ }
163+
164+ # region Chunking Strategy
165+ $ChunkingStrategyOptions = @ {}
166+ if ($PSBoundParameters.ContainsKey (' ChunkingStrategy' )) {
167+ if ($ChunkingStrategy -eq ' auto' ) {
168+ $PostBody.chunking_strategy = ' auto'
169+ }
170+ else {
171+ $ChunkingStrategyOptions.type = ' server_vad'
172+ }
173+ }
174+ else {
175+ if ($PSBoundParameters.ContainsKey (' ChunkingStrategyThreshold' )) {
176+ $ChunkingStrategyOptions.threshold = $ChunkingStrategyThreshold
177+ }
178+ if ($PSBoundParameters.ContainsKey (' ChunkingStrategyPrefixPadding' )) {
179+ $ChunkingStrategyOptions.prefix_padding_ms = $ChunkingStrategyPrefixPadding
180+ }
181+ if ($PSBoundParameters.ContainsKey (' ChunkingStrategySilenceDuration' )) {
182+ $ChunkingStrategyOptions.silence_duration_ms = $ChunkingStrategySilenceDuration
183+ }
184+ }
185+ if ( $ChunkingStrategyOptions.Keys.Count -gt 0 ) {
186+ $ChunkingStrategyOptions.type = ' server_vad'
187+ $PostBody.chunking_strategy = ConvertTo-Json $ChunkingStrategyOptions - Compress
188+ }
189+ elseif ($Model -like ' *diarize*' ) {
190+ # chunking_strategy parameter is required when using diarization models
191+ $PostBody.chunking_strategy = ' auto'
192+ }
193+ # endregion Chunking Strategy
194+
126195 if ($Language ) {
127196 $PostBody.language = $Language
128197 }
0 commit comments