Skip to content

Commit 54e8108

Browse files
dharmabclaude
andcommitted
Pass location names to recognizer initial prompt
Adds a WithLocations functional option to NewWhisperRecognizer and the OpenAI recognizers. When set, location names are appended to the initial prompt so Whisper biases transcription towards known place names (e.g. "Incirlik" instead of "INSULIC"). The application wires this up automatically from config.Locations. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
1 parent 458af9d commit 54e8108

5 files changed

Lines changed: 61 additions & 26 deletions

File tree

internal/application/app.go

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -153,26 +153,28 @@ func NewApplication(config conf.Configuration) (*Application, error) {
153153
)
154154
}
155155

156+
locationNames := make([]string, 0)
157+
for _, loc := range config.Locations {
158+
locationNames = append(locationNames, loc.Names...)
159+
}
160+
161+
recognizerOpts := []recognizer.Option{recognizer.WithLocations(locationNames)}
162+
156163
log.Info().Msg("constructing speech-to-text recognizer")
157164
var speechRecognizer recognizer.Recognizer
158165
switch config.Recognizer {
159166
case conf.WhisperLocal:
160-
speechRecognizer = recognizer.NewWhisperRecognizer(config.WhisperModel, config.Callsign)
167+
speechRecognizer = recognizer.NewWhisperRecognizer(config.WhisperModel, config.Callsign, recognizerOpts...)
161168
case conf.WhisperAPI:
162-
speechRecognizer = recognizer.NewWhisperAPIRecognizer(config.OpenAIAPIKey, config.Callsign)
169+
speechRecognizer = recognizer.NewWhisperAPIRecognizer(config.OpenAIAPIKey, config.Callsign, recognizerOpts...)
163170
case conf.GPT4o:
164-
speechRecognizer = recognizer.NewGPT4oRecognizer(config.OpenAIAPIKey, config.Callsign)
171+
speechRecognizer = recognizer.NewGPT4oRecognizer(config.OpenAIAPIKey, config.Callsign, recognizerOpts...)
165172
case conf.GPT4oMini:
166-
speechRecognizer = recognizer.NewGPT4oMiniRecognizer(config.OpenAIAPIKey, config.Callsign)
173+
speechRecognizer = recognizer.NewGPT4oMiniRecognizer(config.OpenAIAPIKey, config.Callsign, recognizerOpts...)
167174
default:
168175
return nil, fmt.Errorf("failed to construct application: unrecognized recognizer %q", config.Recognizer)
169176
}
170177

171-
locationNames := make([]string, 0)
172-
for _, loc := range config.Locations {
173-
locationNames = append(locationNames, loc.Names...)
174-
}
175-
176178
log.Info().Msg("constructing request parser")
177179
requestParser := parser.New(config.Callsign, locationNames, config.EnableTranscriptionLogging)
178180

pkg/recognizer/openai.go

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,39 +18,44 @@ type openAIRecognizer struct {
1818
callsign string
1919
client *openai.Client
2020
model string
21+
recognizerOptions
2122
}
2223

2324
var _ Recognizer = &openAIRecognizer{}
2425

25-
func newOpenAIRecognizer(apiKey, model, callsign string) Recognizer {
26-
return &openAIRecognizer{
26+
func newOpenAIRecognizer(apiKey, model, callsign string, opts ...Option) Recognizer {
27+
r := &openAIRecognizer{
2728
callsign: callsign,
2829
client: openai.NewClient(
2930
option.WithAPIKey(apiKey),
3031
),
3132
model: model,
3233
}
34+
for _, opt := range opts {
35+
opt(&r.recognizerOptions)
36+
}
37+
return r
3338
}
3439

35-
func NewWhisperAPIRecognizer(apiKey, callsign string) Recognizer {
36-
return newOpenAIRecognizer(apiKey, "whisper-1", callsign)
40+
func NewWhisperAPIRecognizer(apiKey, callsign string, opts ...Option) Recognizer {
41+
return newOpenAIRecognizer(apiKey, "whisper-1", callsign, opts...)
3742
}
3843

3944
// NewGPT4oRecognizer creates a new recognizer using OpenAI Platform's GPT-4o model.
40-
func NewGPT4oRecognizer(apiKey, callsign string) Recognizer {
41-
return newOpenAIRecognizer(apiKey, "gpt-4o-transcribe", callsign)
45+
func NewGPT4oRecognizer(apiKey, callsign string, opts ...Option) Recognizer {
46+
return newOpenAIRecognizer(apiKey, "gpt-4o-transcribe", callsign, opts...)
4247
}
4348

4449
// NewGPT4oMiniRecognizer creates a new recognizer using OpenAI Platform's GPT-4o Mini model.
45-
func NewGPT4oMiniRecognizer(apiKey, callsign string) Recognizer {
46-
return newOpenAIRecognizer(apiKey, "gpt-4o-mini-transcribe", callsign)
50+
func NewGPT4oMiniRecognizer(apiKey, callsign string, opts ...Option) Recognizer {
51+
return newOpenAIRecognizer(apiKey, "gpt-4o-mini-transcribe", callsign, opts...)
4752
}
4853

4954
// NewOpenAIRecognizer creates a new recognizer using OpenAI Platform.
5055
//
5156
// Deprecated: Use NewWhisperAPIRecognizer, NewGPT4oRecognizer, or NewGPT4oMiniRecognizer instead.
52-
func NewOpenAIRecognizer(apiKey, callsign string) Recognizer { // nolint: revive // Ignore deprecated function
53-
return NewWhisperAPIRecognizer(apiKey, callsign)
57+
func NewOpenAIRecognizer(apiKey, callsign string, opts ...Option) Recognizer { // nolint: revive // Ignore deprecated function
58+
return NewWhisperAPIRecognizer(apiKey, callsign, opts...)
5459
}
5560

5661
// Recognize implements [Recognizer.Recognize] using OpenAI Platform's hosted GPT4 transcription model.
@@ -65,7 +70,7 @@ func (r *openAIRecognizer) Recognize(ctx context.Context, sample []float32, _ bo
6570
File: openai.FileParam(buf, "audio.wav", "audio/wav"),
6671
Model: openai.String(r.model),
6772
Language: openai.String("en"),
68-
Prompt: openai.String(prompt(r.callsign)),
73+
Prompt: openai.String(prompt(r.callsign, r.locations)),
6974
}
7075

7176
log.Info().Str("model", r.model).Msg("calling OpenAI Audio Transcriptions API")

pkg/recognizer/options.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
package recognizer
2+
3+
type recognizerOptions struct {
4+
locations []string
5+
}
6+
7+
// Option configures a recognizer.
8+
type Option func(*recognizerOptions)
9+
10+
// WithLocations adds location names to the recognizer's initial prompt,
11+
// improving transcription accuracy for place names.
12+
func WithLocations(locations []string) Option {
13+
return func(o *recognizerOptions) {
14+
o.locations = locations
15+
}
16+
}

pkg/recognizer/prompt.go

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,15 @@
11
package recognizer
22

3-
import "fmt"
3+
import (
4+
"fmt"
5+
"strings"
6+
)
47

58
// prompt constructs a prompt for OpenAI's audio transcription models. See https://platform.openai.com/docs/guides/speech-to-text#prompting
6-
func prompt(callsign string) string {
7-
return fmt.Sprintf("Either ANYFACE or %s, PILOT CALLSIGN, DIGITS, one of 'RADIO' 'ALPHA' 'BOGEY' 'PICTURE' 'DECLARE' 'SNAPLOCK' 'SPIKED', ARGUMENTS such as BULLSEYE, BRAA, numbers or digits.", callsign)
9+
func prompt(callsign string, locations []string) string {
10+
s := fmt.Sprintf("Either ANYFACE or %s, PILOT CALLSIGN, DIGITS, one of 'RADIO' 'ALPHA' 'BOGEY' 'PICTURE' 'DECLARE' 'SNAPLOCK' 'SPIKED', ARGUMENTS such as BULLSEYE, BRAA, numbers or digits.", callsign)
11+
if len(locations) > 0 {
12+
s += " Locations: " + strings.Join(locations, ", ") + "."
13+
}
14+
return s
815
}

pkg/recognizer/whisper.go

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,21 @@ import (
1414
type whisperRecognizer struct {
1515
model whisper.Model
1616
callsign string
17+
recognizerOptions
1718
}
1819

1920
var _ Recognizer = &whisperRecognizer{}
2021

2122
// NewWhisperRecognizer creates a new recognizer using OpenAI Whisper.
22-
func NewWhisperRecognizer(model *whisper.Model, callsign string) Recognizer {
23-
return &whisperRecognizer{
23+
func NewWhisperRecognizer(model *whisper.Model, callsign string, opts ...Option) Recognizer {
24+
r := &whisperRecognizer{
2425
model: *model,
2526
callsign: callsign,
2627
}
28+
for _, opt := range opts {
29+
opt(&r.recognizerOptions)
30+
}
31+
return r
2732
}
2833

2934
const maxSize = 256 * 1024
@@ -40,7 +45,7 @@ func (r *whisperRecognizer) Recognize(ctx context.Context, sample []float32, ena
4045
return "", fmt.Errorf("error creating whisper context: %w", err)
4146
}
4247

43-
wCtx.SetInitialPrompt(prompt(r.callsign))
48+
wCtx.SetInitialPrompt(prompt(r.callsign, r.locations))
4449

4550
if wCtx.IsMultilingual() {
4651
_ = wCtx.SetLanguage("en")

0 commit comments

Comments
 (0)