Skip to content

Commit 68f3f5c

Browse files
fmassotclaude
andauthored
truncate split lists in error logs using PrettySample (#6315)
* truncate split lists in error logs using PrettySample When many splits fail during a search query, the error logs dump every failed split (with its full error message) into a single log line. At scale (hundreds of splits per query), this produces multi-KB unreadable log lines. Use PrettySample(5) to show only the first 5 items and a count of the rest, matching the pattern already used for split_offsets in leaf search. Also adds num_failed_splits/num_splits fields for easy filtering without parsing the sample. Files changed: - root.rs: failed_splits in leaf search response (debug + error) - list_terms.rs: failed_splits in list terms response - fetch_docs.rs: split_ids on fetch docs error Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fetch_docs: tag each split future with its split_id in error context Only one split fails when try_join_all returns an error; logging all split IDs was misleading. Now each future carries its own split_id via map_err context, so the error message identifies the exact failing split. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * fmt: apply nightly rustfmt to fetch_docs.rs Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 243960d commit 68f3f5c

3 files changed

Lines changed: 25 additions & 29 deletions

File tree

quickwit/quickwit-search/src/fetch_docs.rs

Lines changed: 20 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ use std::collections::{BTreeMap, HashMap};
1616
use std::sync::Arc;
1717

1818
use anyhow::{Context, Ok};
19-
use futures::{StreamExt, TryStreamExt};
19+
use futures::{StreamExt, TryFutureExt, TryStreamExt};
2020
use itertools::Itertools;
2121
use quickwit_doc_mapper::DocMapper;
2222
use quickwit_proto::search::{
@@ -65,32 +65,27 @@ async fn fetch_docs_to_map(
6565
let split_and_offset = split_offsets_map
6666
.get(split_id)
6767
.ok_or_else(|| anyhow::anyhow!("failed to find offset for split {}", split_id))?;
68-
split_fetch_docs_futures.push(fetch_docs_in_split(
69-
searcher_context.clone(),
70-
global_doc_addrs,
71-
index_storage.clone(),
72-
split_and_offset,
73-
doc_mapper.clone(),
74-
snippet_request_opt,
75-
));
68+
let split_id = split_id.to_string();
69+
split_fetch_docs_futures.push(
70+
fetch_docs_in_split(
71+
searcher_context.clone(),
72+
global_doc_addrs,
73+
index_storage.clone(),
74+
split_and_offset,
75+
doc_mapper.clone(),
76+
snippet_request_opt,
77+
)
78+
.map_err(move |e| e.context(format!("split_id={split_id}"))),
79+
);
7680
}
7781

78-
let split_fetch_docs: Vec<Vec<(GlobalDocAddress, Document)>> = futures::future::try_join_all(
79-
split_fetch_docs_futures,
80-
)
81-
.await
82-
.map_err(|error| {
83-
let split_ids = splits
84-
.iter()
85-
.map(|split| split.split_id.clone())
86-
.collect_vec();
87-
error!(split_ids = ?split_ids, error = ?error, "error when fetching docs in splits");
88-
anyhow::anyhow!(
89-
"error when fetching docs for splits {:?}: {:?}",
90-
split_ids,
91-
error
92-
)
93-
})?;
82+
let split_fetch_docs: Vec<Vec<(GlobalDocAddress, Document)>> =
83+
futures::future::try_join_all(split_fetch_docs_futures)
84+
.await
85+
.map_err(|error| {
86+
error!(error = ?error, "error when fetching docs in a split");
87+
error
88+
})?;
9489

9590
let global_doc_addr_to_doc_json: HashMap<GlobalDocAddress, Document> = split_fetch_docs
9691
.into_iter()

quickwit/quickwit-search/src/list_terms.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ pub async fn root_list_terms(
141141
.collect();
142142

143143
if !failed_splits.is_empty() {
144-
error!(failed_splits = ?failed_splits, "leaf search response contains at least one failed split");
144+
error!(num_failed_splits = failed_splits.len(), failed_splits = ?PrettySample::new(&failed_splits, 5), "leaf search response contains failed splits");
145145
let errors: String = failed_splits
146146
.iter()
147147
.map(|splits| splits.to_string())

quickwit/quickwit-search/src/root.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -778,10 +778,11 @@ pub(crate) async fn search_partial_hits_phase(
778778
.map_err(|error: TantivyError| crate::SearchError::Internal(error.to_string()))?;
779779
debug!(
780780
num_hits = leaf_search_response.num_hits,
781-
failed_splits = ?leaf_search_response.failed_splits,
781+
num_failed_splits = leaf_search_response.failed_splits.len(),
782+
failed_splits = ?PrettySample::new(&leaf_search_response.failed_splits, 5),
782783
num_attempted_splits = leaf_search_response.num_attempted_splits,
783784
has_intermediate_aggregation_result = leaf_search_response.intermediate_aggregation_result.is_some(),
784-
"Merged leaf search response."
785+
"merged leaf search response"
785786
);
786787

787788
if let Some(resource_stats) = &leaf_search_response.resource_stats
@@ -800,7 +801,7 @@ pub(crate) async fn search_partial_hits_phase(
800801
}
801802

802803
if !leaf_search_response.failed_splits.is_empty() {
803-
quickwit_common::rate_limited_error!(limit_per_min=6, failed_splits = ?leaf_search_response.failed_splits, "leaf search response contains at least one failed split");
804+
quickwit_common::rate_limited_error!(limit_per_min=6, num_failed_splits = leaf_search_response.failed_splits.len(), failed_splits = ?PrettySample::new(&leaf_search_response.failed_splits, 5), "leaf search response contains failed splits");
804805
}
805806

806807
Ok(leaf_search_response)

0 commit comments

Comments
 (0)