Skip to content

Commit 5acfa18

Browse files
committed
[gobby-cli-#444] fix: apply remaining CodeRabbit triage
1 parent 52d2167 commit 5acfa18

18 files changed

Lines changed: 487 additions & 218 deletions

File tree

crates/gcore/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ qdrant = ["dep:reqwest", "dep:urlencoding"]
1919
indexing = ["dep:ignore", "dep:sha2"]
2020
search = []
2121
ai = ["dep:reqwest", "dep:base64", "dep:bytes", "dep:httpdate", "dep:rand", "reqwest/multipart"]
22+
test-helpers = []
2223
full = ["postgres", "falkor", "qdrant", "indexing", "search", "ai"]
2324

2425
[dependencies]

crates/gcore/src/lib.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,5 +50,5 @@ pub mod indexing;
5050
#[cfg(feature = "search")]
5151
pub mod search;
5252

53-
#[cfg(test)]
54-
pub(crate) mod test_http;
53+
#[cfg(any(test, feature = "test-helpers"))]
54+
pub mod test_http;

crates/gcore/src/test_http.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,13 @@ use std::net::TcpListener;
55
use std::thread;
66
use std::time::Duration;
77

8-
pub(crate) type RequestHandle = thread::JoinHandle<io::Result<String>>;
8+
pub type RequestHandle = thread::JoinHandle<io::Result<String>>;
99

10-
pub(crate) fn spawn_json_response(body: impl Into<String>) -> io::Result<(String, RequestHandle)> {
10+
pub fn spawn_json_response(body: impl Into<String>) -> io::Result<(String, RequestHandle)> {
1111
spawn_response(200, "OK", "application/json", body.into())
1212
}
1313

14-
pub(crate) fn spawn_json_response_with_status(
14+
pub fn spawn_json_response_with_status(
1515
status: u16,
1616
body: impl Into<String>,
1717
) -> io::Result<(String, RequestHandle)> {

crates/gwiki/Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,3 +57,6 @@ pdfium-render = { version = "0.8.37", default-features = false, features = ["pdf
5757
png = { version = "0.17", optional = true }
5858
quick-xml = { version = "0.38", optional = true }
5959
zip = { version = "4", default-features = false, features = ["deflate"], optional = true }
60+
61+
[dev-dependencies]
62+
gobby-core = { path = "../gcore", version = "0.3.0", features = ["postgres", "falkor", "qdrant", "indexing", "search", "ai", "test-helpers"] }

crates/gwiki/src/ai/chunk.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -129,8 +129,9 @@ fn transcribe_chunks(
129129
_original_request: &TranscriptionRequest<'_>,
130130
client: &dyn TranscriptionClient,
131131
mode: ChunkTranscriptionMode<'_>,
132-
chunks: Vec<AudioChunk>,
132+
mut chunks: Vec<AudioChunk>,
133133
) -> Result<ChunkedTranscription, WikiError> {
134+
chunks.sort_by_key(|chunk| chunk.start_ms);
134135
let mut aggregate = empty_output();
135136
let mut completed_ranges = Vec::new();
136137
let mut missing_ranges = Vec::new();
@@ -354,7 +355,7 @@ mod tests {
354355
)),
355356
Ok(output("en", &[(0, 500, "overlap"), (500, 1_500, "beta")])),
356357
]);
357-
let chunker = FakeChunker::new(vec![chunk(0, 10_000), chunk(9_000, 19_000)]);
358+
let chunker = FakeChunker::new(vec![chunk(9_000, 19_000), chunk(0, 10_000)]);
358359

359360
let stitched = transcribe_audio_request_with_chunker(
360361
&request,

crates/gwiki/src/collect.rs

Lines changed: 44 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -311,19 +311,21 @@ fn accept_item(
311311
};
312312

313313
if let Err(error) = fs::remove_file(&path) {
314-
let _ = fs::remove_file(vault_root.join(&raw_path));
314+
let mut cleanup_errors = Vec::new();
315+
cleanup_collect_file(vault_root, &raw_path, &mut cleanup_errors);
315316
if let Some(asset_path) = &asset_path {
316-
let _ = fs::remove_file(vault_root.join(asset_path));
317+
cleanup_collect_file(vault_root, asset_path, &mut cleanup_errors);
317318
}
318319
let original_error = io_error("remove accepted inbox item", &path, error);
319320
if let Err(rollback_error) = previous_manifest.write(vault_root) {
320321
return Err(WikiError::Config {
321322
detail: format!(
322-
"failed to roll back source manifest after inbox removal failed: {rollback_error}; original error: {original_error}"
323+
"failed to roll back source manifest after inbox removal failed: {rollback_error}; original error: {original_error}{}",
324+
collect_cleanup_detail(&cleanup_errors)
323325
),
324326
});
325327
}
326-
return Err(original_error);
328+
return collect_error_with_cleanup(original_error, cleanup_errors);
327329
}
328330
report.accepted.push(CollectAction {
329331
inbox_path: relative,
@@ -342,20 +344,54 @@ fn rollback_registered_collect_source<T>(
342344
asset_path: Option<&PathBuf>,
343345
original_error: WikiError,
344346
) -> Result<T, WikiError> {
347+
let mut cleanup_errors = Vec::new();
345348
if let Some(raw_path) = raw_path {
346-
let _ = fs::remove_file(vault_root.join(raw_path));
349+
cleanup_collect_file(vault_root, raw_path, &mut cleanup_errors);
347350
}
348351
if let Some(asset_path) = asset_path {
349-
let _ = fs::remove_file(vault_root.join(asset_path));
352+
cleanup_collect_file(vault_root, asset_path, &mut cleanup_errors);
350353
}
351354
if let Err(rollback_error) = previous_manifest.write(vault_root) {
352355
return Err(WikiError::Config {
353356
detail: format!(
354-
"failed to roll back source manifest after collect write failure: {rollback_error}; original error: {original_error}"
357+
"failed to roll back source manifest after collect write failure: {rollback_error}; original error: {original_error}{}",
358+
collect_cleanup_detail(&cleanup_errors)
355359
),
356360
});
357361
}
358-
Err(original_error)
362+
collect_error_with_cleanup(original_error, cleanup_errors)
363+
}
364+
365+
fn cleanup_collect_file(vault_root: &Path, relative_path: &Path, cleanup_errors: &mut Vec<String>) {
366+
let path = vault_root.join(relative_path);
367+
match fs::remove_file(&path) {
368+
Ok(()) => {}
369+
Err(error) if error.kind() == std::io::ErrorKind::NotFound => {}
370+
Err(error) => cleanup_errors.push(format!("{}: {error}", path.display())),
371+
}
372+
}
373+
374+
fn collect_error_with_cleanup<T>(
375+
original_error: WikiError,
376+
cleanup_errors: Vec<String>,
377+
) -> Result<T, WikiError> {
378+
if cleanup_errors.is_empty() {
379+
return Err(original_error);
380+
}
381+
Err(WikiError::Config {
382+
detail: format!(
383+
"{original_error}; cleanup failures: {}",
384+
cleanup_errors.join("; ")
385+
),
386+
})
387+
}
388+
389+
fn collect_cleanup_detail(cleanup_errors: &[String]) -> String {
390+
if cleanup_errors.is_empty() {
391+
String::new()
392+
} else {
393+
format!("; cleanup failures: {}", cleanup_errors.join("; "))
394+
}
359395
}
360396

361397
fn skip_item(

crates/gwiki/src/commands/refresh.rs

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -356,11 +356,6 @@ fn refresh_changed_url_source(
356356
previous_paths.extend(source_asset_paths_for_id(vault_root, &previous.id)?);
357357

358358
let result = ingest::url::ingest_snapshot_without_index(vault_root, snapshot)?;
359-
SourceManifest::update(vault_root, |manifest| {
360-
let before = manifest.entries.len();
361-
manifest.entries.retain(|entry| entry.id != previous.id);
362-
Ok(manifest.entries.len() != before)
363-
})?;
364359

365360
let mut removed_paths = Vec::new();
366361
for path in previous_paths {
@@ -377,6 +372,12 @@ fn refresh_changed_url_source(
377372
}
378373
}
379374

375+
SourceManifest::update(vault_root, |manifest| {
376+
let before = manifest.entries.len();
377+
manifest.entries.retain(|entry| entry.id != previous.id);
378+
Ok(manifest.entries.len() != before)
379+
})?;
380+
380381
Ok(ChangedRefresh {
381382
result,
382383
previous_raw_path,
@@ -402,13 +403,6 @@ fn refresh_changed_local_source(
402403
vault_root, scope, ai_context, options, path, fetched_at,
403404
)?;
404405
let result = local_result.result;
405-
if previous.id != result.record.id {
406-
SourceManifest::update(vault_root, |manifest| {
407-
let before = manifest.entries.len();
408-
manifest.entries.retain(|entry| entry.id != previous.id);
409-
Ok(manifest.entries.len() != before)
410-
})?;
411-
}
412406

413407
let mut removed_paths = Vec::new();
414408
for path in previous_paths {
@@ -425,6 +419,14 @@ fn refresh_changed_local_source(
425419
}
426420
}
427421

422+
if previous.id != result.record.id {
423+
SourceManifest::update(vault_root, |manifest| {
424+
let before = manifest.entries.len();
425+
manifest.entries.retain(|entry| entry.id != previous.id);
426+
Ok(manifest.entries.len() != before)
427+
})?;
428+
}
429+
428430
Ok(ChangedRefresh {
429431
result,
430432
previous_raw_path,

crates/gwiki/src/commands/sources.rs

Lines changed: 62 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -56,15 +56,15 @@ pub(crate) fn execute_remove(
5656
scope.root(),
5757
&raw_path,
5858
source.raw_exists,
59-
dry_run,
59+
true,
6060
&mut path_changes,
6161
)?;
6262

6363
if let Some(asset_path) = asset_path {
6464
stage_source_asset(
6565
scope.root(),
6666
&asset_path,
67-
dry_run,
67+
true,
6868
keep_asset,
6969
&mut path_changes,
7070
&mut degradations,
@@ -75,9 +75,20 @@ pub(crate) fn execute_remove(
7575
IndexStatus::not_run()
7676
} else {
7777
match SourceManifest::remove(scope.root(), &record.id) {
78-
Ok(Some(_)) => {}
79-
Ok(None) => degradations.push("manifest_entry_missing_after_plan".to_string()),
80-
Err(error) => degradations.push(format!("manifest_remove_failed:{error}")),
78+
Ok(Some(removed)) => {
79+
if let Err(error) = remove_staged_source_files(scope.root(), &mut path_changes) {
80+
rollback_removed_source(scope.root(), removed, &error)?;
81+
return Err(error);
82+
}
83+
}
84+
Ok(None) => {
85+
degradations.push("manifest_entry_missing_after_plan".to_string());
86+
path_changes.removed_paths.clear();
87+
}
88+
Err(error) => {
89+
degradations.push(format!("manifest_remove_failed:{error}"));
90+
path_changes.removed_paths.clear();
91+
}
8192
}
8293
match index::index_resolved_scope(&scope) {
8394
Ok(counts) => IndexStatus::indexed(counts),
@@ -326,6 +337,52 @@ fn stage_source_asset(
326337
Ok(())
327338
}
328339

340+
fn remove_staged_source_files(
341+
vault_root: &Path,
342+
path_changes: &mut PathChanges,
343+
) -> Result<(), WikiError> {
344+
let planned = std::mem::take(&mut path_changes.removed_paths);
345+
let mut removed = Vec::with_capacity(planned.len());
346+
for relative in planned {
347+
let full_path = vault_root.join(&relative);
348+
match fs::remove_file(&full_path) {
349+
Ok(()) => removed.push(relative),
350+
Err(error) if error.kind() == std::io::ErrorKind::NotFound => {
351+
path_changes.missing_paths.push(relative);
352+
}
353+
Err(source) => {
354+
path_changes.removed_paths = removed;
355+
return Err(WikiError::Io {
356+
action: "remove source file",
357+
path: Some(full_path),
358+
source,
359+
});
360+
}
361+
}
362+
}
363+
path_changes.removed_paths = removed;
364+
Ok(())
365+
}
366+
367+
fn rollback_removed_source(
368+
vault_root: &Path,
369+
record: SourceRecord,
370+
original_error: &WikiError,
371+
) -> Result<(), WikiError> {
372+
SourceManifest::update(vault_root, |manifest| {
373+
if manifest.entries.iter().any(|entry| entry.id == record.id) {
374+
return Ok(false);
375+
}
376+
manifest.entries.push(record);
377+
Ok(true)
378+
})
379+
.map_err(|rollback_error| WikiError::Config {
380+
detail: format!(
381+
"failed to roll back source manifest after source file removal failed: {rollback_error}; original error: {original_error}"
382+
),
383+
})
384+
}
385+
329386
fn raw_source_path(id: &str) -> Result<PathBuf, WikiError> {
330387
if id.trim().is_empty()
331388
|| id.contains('/')

crates/gwiki/src/health.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,7 @@ fn source_reference_patterns(needle: &str) -> Vec<String> {
329329
]
330330
}
331331

332+
#[cfg(test)]
332333
fn source_reference_is_present(markdown: &str, needle: &str) -> bool {
333334
let needle = needle.trim();
334335
if needle.is_empty() {

crates/gwiki/src/ingest/audio.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -523,8 +523,8 @@ mod tests {
523523
#[cfg(feature = "ai")]
524524
fn spawn_transcription_server(
525525
response: &'static str,
526-
) -> (String, std::thread::JoinHandle<String>) {
527-
crate::test_http::spawn_json_response(response)
526+
) -> (String, gobby_core::test_http::RequestHandle) {
527+
gobby_core::test_http::spawn_json_response(response).expect("spawn test server")
528528
}
529529

530530
#[cfg(feature = "ai")]
@@ -595,7 +595,7 @@ mod tests {
595595
)
596596
.expect("ingest audio with production transcript");
597597

598-
let request = request.join().expect("request");
598+
let request = request.join().expect("request").expect("request ok");
599599
assert!(request.starts_with("POST /v1/audio/transcriptions HTTP/1.1"));
600600
assert!(result.transcription_degradation.is_none());
601601

@@ -628,7 +628,7 @@ mod tests {
628628
)
629629
.expect("ingest translated audio");
630630

631-
let request = request.join().expect("request");
631+
let request = request.join().expect("request").expect("request ok");
632632
assert!(request.starts_with("POST /v1/audio/translations HTTP/1.1"));
633633
assert!(result.transcription_degradation.is_none());
634634

0 commit comments

Comments
 (0)