Skip to content

Commit 7ab2456

Browse files
Merge pull request #1615 from CapSoftware/cursor/keyboard-and-captions-tracks-8d45
Keyboard and captions tracks
2 parents 75206ff + f515eae commit 7ab2456

121 files changed

Lines changed: 5594 additions & 1130 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

Cargo.lock

Lines changed: 33 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

apps/desktop/src-tauri/src/captions.rs

Lines changed: 39 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ use whisper_rs::{FullParams, SamplingStrategy, WhisperContext, WhisperContextPar
2222

2323
pub use cap_project::{CaptionSegment, CaptionSettings, CaptionWord};
2424

25-
use crate::http_client;
25+
use crate::{general_settings::GeneralSettingsStore, http_client};
2626

2727
#[derive(Debug, Serialize, Deserialize, Type, Clone)]
2828
pub struct CaptionData {
@@ -529,6 +529,7 @@ fn process_with_whisper(
529529
audio_path: &PathBuf,
530530
context: Arc<WhisperContext>,
531531
language: &str,
532+
transcription_hints: &[String],
532533
) -> Result<CaptionData, String> {
533534
log::info!("=== WHISPER TRANSCRIPTION START ===");
534535
log::info!("Processing audio file: {audio_path:?}");
@@ -544,6 +545,10 @@ fn process_with_whisper(
544545
params.set_language(Some(if language == "auto" { "auto" } else { language }));
545546
params.set_max_len(i32::MAX);
546547

548+
if let Some(initial_prompt) = build_initial_prompt(transcription_hints) {
549+
params.set_initial_prompt(&initial_prompt);
550+
}
551+
547552
log::info!("Whisper params - translate: false, token_timestamps: true, max_len: MAX");
548553

549554
let mut audio_file = File::open(audio_path)
@@ -783,10 +788,32 @@ fn process_with_whisper(
783788
})
784789
}
785790

791+
fn build_initial_prompt(transcription_hints: &[String]) -> Option<String> {
792+
let mut normalized = Vec::new();
793+
794+
for hint in transcription_hints {
795+
let value = hint.replace('\0', "").trim().to_string();
796+
if value.is_empty() || normalized.contains(&value) {
797+
continue;
798+
}
799+
normalized.push(value);
800+
}
801+
802+
if normalized.is_empty() {
803+
None
804+
} else {
805+
Some(format!(
806+
"Preferred spellings, names, and capitalization for this transcript: {}",
807+
normalized.join("; ")
808+
))
809+
}
810+
}
811+
786812
#[tauri::command]
787813
#[specta::specta]
788814
#[instrument]
789815
pub async fn transcribe_audio(
816+
app: AppHandle,
790817
video_path: String,
791818
model_path: String,
792819
language: String,
@@ -843,11 +870,18 @@ pub async fn transcribe_audio(
843870
}
844871
};
845872

873+
let transcription_hints = GeneralSettingsStore::get(&app)
874+
.ok()
875+
.flatten()
876+
.map(|settings| settings.transcription_hints)
877+
.unwrap_or_default();
878+
846879
log::info!("Starting Whisper transcription in blocking task...");
847-
let whisper_result =
848-
tokio::task::spawn_blocking(move || process_with_whisper(&audio_path, context, &language))
849-
.await
850-
.map_err(|e| format!("Whisper task panicked: {e}"))?;
880+
let whisper_result = tokio::task::spawn_blocking(move || {
881+
process_with_whisper(&audio_path, context, &language, &transcription_hints)
882+
})
883+
.await
884+
.map_err(|e| format!("Whisper task panicked: {e}"))?;
851885

852886
match whisper_result {
853887
Ok(captions) => {

apps/desktop/src-tauri/src/export.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,7 @@ pub async fn generate_export_preview(
293293
.iter()
294294
.map(|s| RenderSegment {
295295
cursor: s.cursor.clone(),
296+
keyboard: s.keyboard.clone(),
296297
decoders: s.decoders.clone(),
297298
})
298299
.collect();

apps/desktop/src-tauri/src/general_settings.rs

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,8 +121,10 @@ pub struct GeneralSettingsStore {
121121
skip_serializing_if = "no"
122122
)]
123123
pub enable_native_camera_preview: bool,
124-
#[serde(default)]
124+
#[serde(default = "default_true")]
125125
pub auto_zoom_on_clicks: bool,
126+
#[serde(default = "default_true")]
127+
pub capture_keyboard_events: bool,
126128
#[serde(default)]
127129
pub post_deletion_behaviour: PostDeletionBehaviour,
128130
#[serde(default = "default_excluded_windows")]
@@ -137,6 +139,8 @@ pub struct GeneralSettingsStore {
137139
pub crash_recovery_recording: bool,
138140
#[serde(default = "default_max_fps")]
139141
pub max_fps: u32,
142+
#[serde(default = "default_transcription_hints")]
143+
pub transcription_hints: Vec<String>,
140144
#[serde(default)]
141145
pub editor_preview_quality: EditorPreviewQuality,
142146
#[serde(default)]
@@ -167,6 +171,15 @@ fn default_max_fps() -> u32 {
167171
60
168172
}
169173

174+
fn default_transcription_hints() -> Vec<String> {
175+
vec![
176+
"Cap".to_string(),
177+
"TypeScript".to_string(),
178+
"My Brand Name".to_string(),
179+
"mywebsite.com".to_string(),
180+
]
181+
}
182+
170183
fn default_server_url() -> String {
171184
std::option_env!("VITE_SERVER_URL")
172185
.unwrap_or("https://cap.so")
@@ -202,14 +215,16 @@ impl Default for GeneralSettingsStore {
202215
server_url: default_server_url(),
203216
recording_countdown: Some(3),
204217
enable_native_camera_preview: default_enable_native_camera_preview(),
205-
auto_zoom_on_clicks: false,
218+
auto_zoom_on_clicks: true,
219+
capture_keyboard_events: true,
206220
post_deletion_behaviour: PostDeletionBehaviour::DoNothing,
207221
excluded_windows: default_excluded_windows(),
208222
delete_instant_recordings_after_upload: false,
209223
instant_mode_max_resolution: 1920,
210224
default_project_name_template: None,
211225
crash_recovery_recording: true,
212226
max_fps: 60,
227+
transcription_hints: default_transcription_hints(),
213228
editor_preview_quality: EditorPreviewQuality::Half,
214229
main_window_position: None,
215230
camera_window_position: None,

apps/desktop/src-tauri/src/import.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -506,6 +506,7 @@ pub async fn start_video_import(app: AppHandle, source_path: PathBuf) -> Result<
506506
mic: None,
507507
system_audio: None,
508508
cursor: None,
509+
keyboard: None,
509510
}],
510511
cursors: Cursors::default(),
511512
status: Some(StudioRecordingStatus::InProgress),
@@ -599,6 +600,7 @@ pub async fn start_video_import(app: AppHandle, source_path: PathBuf) -> Result<
599600
mic: None,
600601
system_audio,
601602
cursor: None,
603+
keyboard: None,
602604
}],
603605
cursors: Cursors::default(),
604606
status: Some(StudioRecordingStatus::Complete),

apps/desktop/src-tauri/src/lib.rs

Lines changed: 68 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2123,6 +2123,51 @@ async fn generate_zoom_segments_from_clicks(
21232123
Ok(zoom_segments)
21242124
}
21252125

2126+
#[tauri::command]
2127+
#[specta::specta]
2128+
#[instrument(skip(editor_instance))]
2129+
async fn generate_keyboard_segments(
2130+
editor_instance: WindowEditorInstance,
2131+
grouping_threshold_ms: f64,
2132+
linger_duration_ms: f64,
2133+
show_modifiers: bool,
2134+
show_special_keys: bool,
2135+
) -> Result<Vec<cap_project::KeyboardTrackSegment>, String> {
2136+
let meta = editor_instance.meta();
2137+
2138+
let RecordingMetaInner::Studio(studio_meta) = &meta.inner else {
2139+
return Ok(vec![]);
2140+
};
2141+
2142+
let segments = match studio_meta.as_ref() {
2143+
StudioRecordingMeta::MultipleSegments { inner, .. } => &inner.segments,
2144+
_ => return Ok(vec![]),
2145+
};
2146+
2147+
let mut all_events = cap_project::KeyboardEvents { presses: vec![] };
2148+
2149+
for segment in segments {
2150+
let events = segment.keyboard_events(meta);
2151+
all_events.presses.extend(events.presses);
2152+
}
2153+
2154+
all_events.presses.sort_by(|a, b| {
2155+
a.time_ms
2156+
.partial_cmp(&b.time_ms)
2157+
.unwrap_or(std::cmp::Ordering::Equal)
2158+
});
2159+
2160+
let grouped = cap_project::group_key_events(
2161+
&all_events,
2162+
grouping_threshold_ms,
2163+
linger_duration_ms,
2164+
show_modifiers,
2165+
show_special_keys,
2166+
);
2167+
2168+
Ok(grouped)
2169+
}
2170+
21262171
#[tauri::command]
21272172
#[specta::specta]
21282173
#[instrument]
@@ -3105,6 +3150,7 @@ pub async fn run(recording_logging_handle: LoggingHandle, logs_dir: PathBuf) {
31053150
set_project_config,
31063151
update_project_config_in_memory,
31073152
generate_zoom_segments_from_clicks,
3153+
generate_keyboard_segments,
31083154
permissions::open_permission_settings,
31093155
permissions::do_permissions_check,
31103156
permissions::request_permission,
@@ -3673,13 +3719,14 @@ pub async fn run(recording_logging_handle: LoggingHandle, logs_dir: PathBuf) {
36733719
id,
36743720
CapWindowId::TargetSelectOverlay { .. }
36753721
| CapWindowId::Main
3676-
| CapWindowId::Camera
36773722
)
36783723
{
36793724
let _ = window.show();
36803725
}
36813726
}
36823727

3728+
restore_camera_window(app);
3729+
36833730
#[cfg(target_os = "windows")]
36843731
if !has_open_editor_window(app) {
36853732
reopen_main_window(app);
@@ -3694,12 +3741,12 @@ pub async fn run(recording_logging_handle: LoggingHandle, logs_dir: PathBuf) {
36943741
id,
36953742
CapWindowId::TargetSelectOverlay { .. }
36963743
| CapWindowId::Main
3697-
| CapWindowId::Camera
36983744
)
36993745
{
37003746
let _ = window.show();
37013747
}
37023748
}
3749+
restore_camera_window(app);
37033750
return;
37043751
}
37053752
CapWindowId::TargetSelectOverlay { display_id } => {
@@ -3901,9 +3948,25 @@ fn restore_main_windows_if_no_editors(app: &AppHandle) {
39013948
if let Some(main) = CapWindowId::Main.get(app) {
39023949
let _ = main.show();
39033950
}
3904-
if let Some(camera) = CapWindowId::Camera.get(app) {
3905-
let _ = camera.show();
3906-
}
3951+
3952+
restore_camera_window(app);
3953+
}
3954+
}
3955+
3956+
fn restore_camera_window(app: &AppHandle) {
3957+
let should_restore_camera = app
3958+
.state::<ArcLock<App>>()
3959+
.try_read()
3960+
.map(|state| state.selected_camera_id.is_some())
3961+
.unwrap_or(false);
3962+
3963+
if should_restore_camera {
3964+
let app = app.clone();
3965+
tokio::spawn(async move {
3966+
let operation_lock = app.state::<CameraWindowOperationLock>();
3967+
let _operation_guard = operation_lock.lock().await;
3968+
let _ = ShowCapWindow::Camera { centered: false }.show(&app).await;
3969+
});
39073970
}
39083971
}
39093972

apps/desktop/src-tauri/src/recording.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -878,6 +878,12 @@ pub async fn start_recording(
878878
.map(|s| s.custom_cursor_capture)
879879
.unwrap_or_default(),
880880
)
881+
.with_keyboard_capture(
882+
general_settings
883+
.as_ref()
884+
.map(|s| s.capture_keyboard_events)
885+
.unwrap_or(true),
886+
)
881887
.with_fragmented(
882888
general_settings
883889
.as_ref()
@@ -2370,6 +2376,8 @@ fn project_config_from_recording(
23702376
scene_segments: Vec::new(),
23712377
mask_segments: Vec::new(),
23722378
text_segments: Vec::new(),
2379+
caption_segments: Vec::new(),
2380+
keyboard_segments: Vec::new(),
23732381
});
23742382

23752383
config
@@ -2391,7 +2399,7 @@ pub fn needs_fragment_remux(recording_dir: &Path, meta: &StudioRecordingMeta) ->
23912399
}
23922400

23932401
pub fn remux_fragmented_recording(recording_dir: &Path) -> Result<(), String> {
2394-
let incomplete_recording = RecoveryManager::find_incomplete_single(recording_dir);
2402+
let incomplete_recording = RecoveryManager::inspect_recording(recording_dir);
23952403

23962404
if let Some(recording) = incomplete_recording {
23972405
RecoveryManager::recover(&recording)

0 commit comments

Comments
 (0)