@@ -6,6 +6,7 @@ use libdd_common::Endpoint;
66use libdd_trace_utils:: {
77 config_utils:: trace_intake_url_prefixed,
88 send_data:: SendData ,
9+ send_with_retry:: { RetryBackoffType , RetryStrategy } ,
910 trace_utils:: { self } ,
1011 tracer_payload:: TracerPayloadCollection ,
1112} ;
@@ -14,11 +15,25 @@ use std::sync::Arc;
1415use tokio:: task:: JoinSet ;
1516use tracing:: { debug, error} ;
1617
18+ use crate :: FLUSH_RETRY_COUNT ;
1719use crate :: config:: Config ;
1820use crate :: lifecycle:: invocation:: processor:: S_TO_MS ;
1921use crate :: traces:: http_client:: HttpClient ;
2022use crate :: traces:: trace_aggregator_service:: AggregatorHandle ;
2123
24+ /// Retry strategy for trace flushing using the shared `FLUSH_RETRY_COUNT`
25+ /// with no delay between attempts. In Lambda, every millisecond of wall-clock
26+ /// time matters, and the per-attempt request timeout already bounds how long
27+ /// each retry can take.
28+ fn trace_retry_strategy ( ) -> RetryStrategy {
29+ RetryStrategy :: new (
30+ u32:: try_from ( FLUSH_RETRY_COUNT ) . unwrap_or ( 3 ) ,
31+ 0 ,
32+ RetryBackoffType :: Constant ,
33+ None ,
34+ )
35+ }
36+
2237pub struct TraceFlusher {
2338 pub aggregator_handle : AggregatorHandle ,
2439 pub config : Arc < Config > ,
@@ -113,7 +128,11 @@ impl TraceFlusher {
113128 let traces_with_tags: Vec < _ > = trace_builders
114129 . into_iter ( )
115130 . map ( |info| {
116- let trace = info. builder . with_api_key ( api_key. as_str ( ) ) . build ( ) ;
131+ let trace = info
132+ . builder
133+ . with_api_key ( api_key. as_str ( ) )
134+ . with_retry_strategy ( trace_retry_strategy ( ) )
135+ . build ( ) ;
117136 ( trace, info. header_tags )
118137 } )
119138 . collect ( ) ;
@@ -125,12 +144,16 @@ impl TraceFlusher {
125144 let additional_traces: Vec < _ > = traces_with_tags
126145 . iter ( )
127146 . filter_map ( |( trace, tags) | match trace. get_payloads ( ) {
128- TracerPayloadCollection :: V07 ( payloads) => Some ( SendData :: new (
129- trace. len ( ) ,
130- TracerPayloadCollection :: V07 ( payloads. clone ( ) ) ,
131- tags. to_tracer_header_tags ( ) ,
132- & endpoint,
133- ) ) ,
147+ TracerPayloadCollection :: V07 ( payloads) => {
148+ let mut send_data = SendData :: new (
149+ trace. len ( ) ,
150+ TracerPayloadCollection :: V07 ( payloads. clone ( ) ) ,
151+ tags. to_tracer_header_tags ( ) ,
152+ & endpoint,
153+ ) ;
154+ send_data. set_retry_strategy ( trace_retry_strategy ( ) ) ;
155+ Some ( send_data)
156+ }
134157 // All payloads in the extension are V07 (produced by
135158 // collect_pb_trace_chunks), so this branch is unreachable.
136159 _ => None ,
@@ -174,12 +197,23 @@ impl TraceFlusher {
174197 debug ! ( "TRACES | Flushing {} traces" , coalesced_traces. len( ) ) ;
175198
176199 for trace in & coalesced_traces {
177- let send_result = trace. send ( & http_client) . await . last_result ;
178-
179- if let Err ( e) = send_result {
180- error ! ( "TRACES | Request failed: {e:?}" ) ;
200+ let result = trace. send ( & http_client) . await ;
201+
202+ if let Err ( e) = & result. last_result {
203+ error ! (
204+ "TRACES | Request failed after {} attempts ({} timeouts, {} network errors, {} status code errors): {e:?}" ,
205+ result. requests_count,
206+ result. errors_timeout,
207+ result. errors_network,
208+ result. errors_status_code,
209+ ) ;
181210 return Some ( coalesced_traces) ;
182211 }
212+
213+ debug ! (
214+ "TRACES | Successfully sent trace ({} attempts, {} bytes)" ,
215+ result. requests_count, result. bytes_sent,
216+ ) ;
183217 }
184218
185219 debug ! ( "TRACES | Flushing took {} ms" , start. elapsed( ) . as_millis( ) ) ;
0 commit comments