Skip to content

Commit 92faa32

Browse files
authored
Merge pull request #9 from n-n-code/decoupling_whisper_2
Decoupling work continues
2 parents 5926687 + 4d14bf7 commit 92faa32

11 files changed

Lines changed: 496 additions & 110 deletions

src/app/applicationcommands.cpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,11 +61,21 @@ int runOnce(QGuiApplication &app, const AppConfig &config, double seconds)
6161
{
6262
AudioRecorder recorder(config.audio);
6363
const std::shared_ptr<const TranscriptionEngine> transcriptionEngine = createTranscriptionEngine(config.transcriber);
64-
std::unique_ptr<TranscriptionSession> transcriber = transcriptionEngine->createSession();
64+
RuntimeError runtimeError;
65+
const std::shared_ptr<const TranscriptionModelHandle> model = transcriptionEngine->loadModel(&runtimeError);
66+
if (model == nullptr) {
67+
qCCritical(appLog) << "Failed to load transcription model:" << runtimeError.message;
68+
return 1;
69+
}
70+
71+
std::unique_ptr<TranscriptionSession> transcriber = transcriptionEngine->createSession(model);
72+
if (transcriber == nullptr) {
73+
qCCritical(appLog) << "Failed to create transcription session";
74+
return 1;
75+
}
6576
ClipboardWriter clipboardWriter(QGuiApplication::clipboard());
6677

6778
if (config.transcriber.warmupOnStart) {
68-
RuntimeError runtimeError;
6979
if (!transcriber->warmup(&runtimeError)) {
7080
qCCritical(appLog) << "Failed to warm up transcriber:" << runtimeError.message;
7181
return 1;

src/service.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,8 @@ QJsonObject MutterkeyService::diagnostics() const
8080
object.insert(QStringLiteral("transcriptions_completed"), m_transcriptionsCompleted);
8181
object.insert(QStringLiteral("transcriber_backend"),
8282
m_transcriptionWorker != nullptr ? m_transcriptionWorker->backendName() : QStringLiteral("unconfigured"));
83+
object.insert(QStringLiteral("transcriber_model"),
84+
m_transcriptionWorker != nullptr ? m_transcriptionWorker->loadedModelDescription() : QString());
8385
const BackendCapabilities capabilities =
8486
m_transcriptionWorker != nullptr ? m_transcriptionWorker->capabilities() : m_transcriptionEngine->capabilities();
8587
object.insert(QStringLiteral("transcriber_runtime"), capabilities.runtimeDescription);

src/transcription/transcriptionengine.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,15 @@ class WhisperCppTranscriptionEngine final : public TranscriptionEngine
1919
return WhisperCppTranscriber::capabilitiesStatic();
2020
}
2121

22-
[[nodiscard]] std::unique_ptr<TranscriptionSession> createSession() const override
22+
[[nodiscard]] std::shared_ptr<const TranscriptionModelHandle> loadModel(RuntimeError *error) const override
2323
{
24-
return std::make_unique<WhisperCppTranscriber>(m_config);
24+
return WhisperCppTranscriber::loadModelHandle(m_config, error);
25+
}
26+
27+
[[nodiscard]] std::unique_ptr<TranscriptionSession>
28+
createSession(std::shared_ptr<const TranscriptionModelHandle> model) const override
29+
{
30+
return WhisperCppTranscriber::createSession(m_config, std::move(model));
2531
}
2632

2733
private:

src/transcription/transcriptionengine.h

Lines changed: 51 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,37 @@ struct Recording;
1212
* @brief Stable engine/session boundary for embedded transcription backends.
1313
*/
1414

15+
/**
16+
* @brief Immutable loaded-model interface created by a transcription engine.
17+
*
18+
* Handles own validated backend assets and may be shared across multiple
19+
* independent sessions without exposing backend-specific state to app code.
20+
*/
21+
class TranscriptionModelHandle
22+
{
23+
public:
24+
virtual ~TranscriptionModelHandle() = default;
25+
TranscriptionModelHandle(const TranscriptionModelHandle &) = delete;
26+
TranscriptionModelHandle &operator=(const TranscriptionModelHandle &) = delete;
27+
TranscriptionModelHandle(TranscriptionModelHandle &&) = delete;
28+
TranscriptionModelHandle &operator=(TranscriptionModelHandle &&) = delete;
29+
30+
/**
31+
* @brief Returns the backend identifier for this loaded model.
32+
* @return Short backend name used in diagnostics.
33+
*/
34+
[[nodiscard]] virtual QString backendName() const = 0;
35+
36+
/**
37+
* @brief Returns a human-readable description of the loaded model.
38+
* @return Diagnostic model description such as the resolved model path.
39+
*/
40+
[[nodiscard]] virtual QString modelDescription() const = 0;
41+
42+
protected:
43+
TranscriptionModelHandle() = default;
44+
};
45+
1546
/**
1647
* @brief Mutable per-session transcription interface.
1748
*
@@ -47,6 +78,14 @@ class TranscriptionSession
4778
*/
4879
[[nodiscard]] virtual TranscriptionResult transcribe(const Recording &recording) = 0;
4980

81+
/**
82+
* @brief Requests cooperative cancellation of any active decode.
83+
*
84+
* Implementations should stop in-flight backend work best-effort without
85+
* using thread interruption.
86+
*/
87+
virtual void cancel() = 0;
88+
5089
protected:
5190
TranscriptionSession() = default;
5291
};
@@ -73,10 +112,19 @@ class TranscriptionEngine
73112
[[nodiscard]] virtual BackendCapabilities capabilities() const = 0;
74113

75114
/**
76-
* @brief Creates a new isolated transcription session.
77-
* @return Newly constructed session that owns its backend state.
115+
* @brief Loads an immutable validated model handle for this engine.
116+
* @param error Optional destination for a structured failure reason.
117+
* @return Shared loaded-model handle suitable for multiple sessions.
118+
*/
119+
[[nodiscard]] virtual std::shared_ptr<const TranscriptionModelHandle> loadModel(RuntimeError *error = nullptr) const = 0;
120+
121+
/**
122+
* @brief Creates a new isolated transcription session from a loaded model.
123+
* @param model Shared immutable model handle created by this engine.
124+
* @return Newly constructed session that owns only mutable backend state.
78125
*/
79-
[[nodiscard]] virtual std::unique_ptr<TranscriptionSession> createSession() const = 0;
126+
[[nodiscard]] virtual std::unique_ptr<TranscriptionSession>
127+
createSession(std::shared_ptr<const TranscriptionModelHandle> model) const = 0;
80128

81129
protected:
82130
TranscriptionEngine() = default;

src/transcription/transcriptiontypes.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
*/
1717
enum class RuntimeErrorCode : std::uint8_t {
1818
None,
19+
Cancelled,
1920
InvalidConfig,
2021
ModelNotFound,
2122
ModelLoadFailed,

src/transcription/transcriptionworker.cpp

Lines changed: 56 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,27 @@ BackendCapabilities TranscriptionWorker::capabilities() const
3838
return m_capabilities;
3939
}
4040

41+
QString TranscriptionWorker::loadedModelDescription() const
42+
{
43+
if (m_model == nullptr) {
44+
return {};
45+
}
46+
47+
return m_model->modelDescription();
48+
}
49+
4150
bool TranscriptionWorker::warmup(RuntimeError *error)
4251
{
4352
if (!ensureSession(error)) {
4453
return false;
4554
}
4655

47-
return m_transcriber->warmup(error);
56+
const bool ready = m_transcriber->warmup(error);
57+
if (!ready && (error == nullptr || shouldDiscardSession(*error))) {
58+
m_transcriber.reset();
59+
}
60+
61+
return ready;
4862
}
4963

5064
void TranscriptionWorker::transcribe(const Recording &recording)
@@ -57,6 +71,9 @@ void TranscriptionWorker::transcribe(const Recording &recording)
5771

5872
const TranscriptionResult result = m_transcriber->transcribe(recording);
5973
if (!result.success) {
74+
if (shouldDiscardSession(result.error)) {
75+
m_transcriber.reset();
76+
}
6077
emit transcriptionFailed(result.error);
6178
return;
6279
}
@@ -70,6 +87,10 @@ bool TranscriptionWorker::ensureSession(RuntimeError *error)
7087
return true;
7188
}
7289

90+
if (!ensureModel(error)) {
91+
return false;
92+
}
93+
7394
if (m_engine == nullptr) {
7495
if (error != nullptr) {
7596
*error = makeRuntimeError(RuntimeErrorCode::InternalRuntimeError,
@@ -78,7 +99,7 @@ bool TranscriptionWorker::ensureSession(RuntimeError *error)
7899
return false;
79100
}
80101

81-
m_transcriber = m_engine->createSession();
102+
m_transcriber = m_engine->createSession(m_model);
82103
if (m_transcriber == nullptr) {
83104
if (error != nullptr) {
84105
*error = makeRuntimeError(RuntimeErrorCode::InternalRuntimeError,
@@ -88,3 +109,36 @@ bool TranscriptionWorker::ensureSession(RuntimeError *error)
88109
}
89110
return true;
90111
}
112+
113+
bool TranscriptionWorker::ensureModel(RuntimeError *error)
114+
{
115+
if (m_model != nullptr) {
116+
return true;
117+
}
118+
119+
if (m_engine == nullptr) {
120+
return true;
121+
}
122+
123+
m_model = m_engine->loadModel(error);
124+
return m_model != nullptr;
125+
}
126+
127+
bool TranscriptionWorker::shouldDiscardSession(const RuntimeError &error)
128+
{
129+
switch (error.code) {
130+
case RuntimeErrorCode::Cancelled:
131+
case RuntimeErrorCode::ModelLoadFailed:
132+
case RuntimeErrorCode::DecodeFailed:
133+
case RuntimeErrorCode::InternalRuntimeError:
134+
return true;
135+
case RuntimeErrorCode::None:
136+
case RuntimeErrorCode::InvalidConfig:
137+
case RuntimeErrorCode::ModelNotFound:
138+
case RuntimeErrorCode::AudioNormalizationFailed:
139+
case RuntimeErrorCode::UnsupportedLanguage:
140+
return false;
141+
}
142+
143+
return true;
144+
}

src/transcription/transcriptionworker.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,12 @@ class TranscriptionWorker final : public QObject
5151
*/
5252
[[nodiscard]] BackendCapabilities capabilities() const;
5353

54+
/**
55+
* @brief Returns the currently loaded model description, if available.
56+
* @return Human-readable loaded model description or an empty string.
57+
*/
58+
[[nodiscard]] QString loadedModelDescription() const;
59+
5460
/**
5561
* @brief Eagerly initializes backend state before the first real transcription.
5662
* @param error Optional output for warmup failures.
@@ -78,10 +84,15 @@ class TranscriptionWorker final : public QObject
7884
void transcriptionFailed(const RuntimeError &error);
7985

8086
private:
87+
static bool shouldDiscardSession(const RuntimeError &error);
88+
89+
bool ensureModel(RuntimeError *error = nullptr);
8190
bool ensureSession(RuntimeError *error = nullptr);
8291

8392
/// Shared immutable engine used to create the live session lazily on the worker thread.
8493
std::shared_ptr<const TranscriptionEngine> m_engine;
94+
/// Shared immutable loaded model handle reused across session instances.
95+
std::shared_ptr<const TranscriptionModelHandle> m_model;
8596
/// Capability snapshot reported even before the first session exists.
8697
BackendCapabilities m_capabilities;
8798
/// Owned transcription backend implementation.

0 commit comments

Comments
 (0)