-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathWhisperTextInjectionPipeline.cs
More file actions
105 lines (90 loc) · 3.23 KB
/
Copy pathWhisperTextInjectionPipeline.cs
File metadata and controls
105 lines (90 loc) · 3.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
using SharpHook;
using SharpHook.Data;
namespace PrimeDictate;
/// <summary>
/// Transcribes through the selected engine, then updates the focused control via final-only Unicode input.
/// Target injection is intentionally final-only: partial hypotheses are not typed into editors because repeated
/// correction loops fight autocomplete, caret movement, and slow input targets.
/// </summary>
internal sealed class WhisperTextInjectionPipeline
{
private readonly TranscriptionEngineHost transcriptionEngines = new();
private readonly EventSimulator eventSimulator = new();
public string ConfigurationSummary => this.transcriptionEngines.ConfigurationSummary;
public void UpdateConfiguration(
TranscriptionBackendKind transcriptionBackend,
TranscriptionComputeInterface transcriptionComputeInterface,
string? selectedModelId,
string? configuredModelPath)
{
this.transcriptionEngines.UpdateConfiguration(
transcriptionBackend,
transcriptionComputeInterface,
selectedModelId,
configuredModelPath);
}
/// <summary>
/// Full-buffer transcription with no target mutation.
/// </summary>
public async ValueTask<string> TranscribeAsync(
PcmAudioBuffer audio,
CancellationToken cancellationToken = default,
bool logTranscript = true)
{
if (audio.IsEmpty)
{
return string.Empty;
}
var backend = this.transcriptionEngines.ConfiguredBackendName;
if (logTranscript)
{
AppLog.Info(
$"Transcription request: {this.ConfigurationSummary}; audio={audio.Duration.TotalSeconds:0.00}s, bytes={audio.Pcm16KhzMono.Length:N0}.");
}
var text = await this.transcriptionEngines.TranscribeAsync(audio, cancellationToken).ConfigureAwait(false);
text = text.Trim();
if (string.IsNullOrWhiteSpace(text))
{
if (logTranscript)
{
AppLog.Info($"{backend} returned no text for this audio buffer.");
}
return string.Empty;
}
if (logTranscript)
{
AppLog.Info($"Transcribed ({backend}, {text.Length:N0} chars): {text}");
}
return text;
}
public void InjectTextToTarget(string text)
{
var target = text.Trim();
if (target.Length == 0)
{
return;
}
if (OperatingSystem.IsWindows())
{
WindowsUnicodeInput.SendText(target);
return;
}
var textResult = this.eventSimulator.SimulateTextEntry(target);
if (textResult != UioHookResult.Success)
{
throw new InvalidOperationException($"Text injection failed with status {textResult}.");
}
}
public void SendEnterToTarget()
{
var keyResult = this.eventSimulator.SimulateKeyStroke(new[] { KeyCode.VcEnter });
if (keyResult != UioHookResult.Success)
{
throw new InvalidOperationException($"Enter key simulation failed with status {keyResult}.");
}
}
public async ValueTask DisposeAsync()
{
await this.transcriptionEngines.DisposeAsync().ConfigureAwait(false);
}
}