@@ -17,9 +17,10 @@ use uuid::Uuid;
1717use crate :: {
1818 api_secret:: ApiSecret ,
1919 caps:: Caps ,
20+ logs:: LogCollector ,
2021 net_diagnostics:: { DiagnosticsReport , checks:: run_diagnostics} ,
2122 protocol:: {
22- ALPN , Auth , IrohServicesClient , NameEndpoint , Ping , Pong , PutMetrics ,
23+ ALPN , Auth , IrohServicesClient , NameEndpoint , Ping , Pong , PutLogs , PutMetrics ,
2324 PutNetworkDiagnostics , RemoteError ,
2425 } ,
2526} ;
@@ -54,6 +55,7 @@ pub struct Client {
5455 endpoint : Endpoint ,
5556 message_channel : tokio:: sync:: mpsc:: Sender < ClientActorMessage > ,
5657 _actor_task : Arc < AbortOnDropHandle < ( ) > > ,
58+ _log_flush_task : Option < Arc < AbortOnDropHandle < ( ) > > > ,
5759}
5860
5961/// ClientBuilder provides configures and builds a iroh-services client, typically
@@ -67,11 +69,19 @@ pub struct ClientBuilder {
6769 metrics_interval : Option < Duration > ,
6870 remote : Option < EndpointAddr > ,
6971 registry : Registry ,
72+ log_collector : Option < LogCollector > ,
73+ log_flush_interval : Duration ,
74+ log_max_batch : usize ,
7075}
7176
7277const DEFAULT_CAP_EXPIRY : Duration = Duration :: from_secs ( 60 * 60 * 24 * 30 ) ; // 1 month
7378pub const API_SECRET_ENV_VAR_NAME : & str = "IROH_SERVICES_API_SECRET" ;
7479
80+ /// Default interval between log batch flushes when log collection is enabled.
81+ pub const DEFAULT_LOG_FLUSH_INTERVAL : Duration = Duration :: from_secs ( 1 ) ;
82+ /// Default maximum batch size pushed in a single PutLogs request.
83+ pub const DEFAULT_LOG_MAX_BATCH : usize = 200 ;
84+
7585impl ClientBuilder {
7686 pub fn new ( endpoint : & Endpoint ) -> Self {
7787 let mut registry = Registry :: default ( ) ;
@@ -85,9 +95,35 @@ impl ClientBuilder {
8595 metrics_interval : Some ( Duration :: from_secs ( 60 ) ) ,
8696 remote : None ,
8797 registry,
98+ log_collector : None ,
99+ log_flush_interval : DEFAULT_LOG_FLUSH_INTERVAL ,
100+ log_max_batch : DEFAULT_LOG_MAX_BATCH ,
88101 }
89102 }
90103
104+ /// Enables periodic shipment of buffered log lines to iroh-services.
105+ ///
106+ /// The collector is shared with [`crate::client_host::ClientHost`] when
107+ /// runtime log-level overrides are needed; clone it before passing so both
108+ /// sides hold a handle.
109+ pub fn with_log_collection ( mut self , collector : LogCollector ) -> Self {
110+ self . log_collector = Some ( collector) ;
111+ self
112+ }
113+
114+ /// Override the log batch flush interval. Defaults to one second.
115+ pub fn log_flush_interval ( mut self , interval : Duration ) -> Self {
116+ self . log_flush_interval = interval;
117+ self
118+ }
119+
120+ /// Override the maximum number of lines included in a single PutLogs
121+ /// request. Defaults to [`DEFAULT_LOG_MAX_BATCH`].
122+ pub fn log_max_batch ( mut self , max : usize ) -> Self {
123+ self . log_max_batch = max;
124+ self
125+ }
126+
91127 /// Register a metrics group to forward to iroh-services
92128 ///
93129 /// The default registered metrics uses only the endpoint
@@ -213,22 +249,37 @@ impl ClientBuilder {
213249 let conn = IrohLazyRemoteConnection :: new ( self . endpoint . clone ( ) , remote, ALPN . to_vec ( ) ) ;
214250 let irpc_client = IrohServicesClient :: boxed ( conn) ;
215251
216- let ( tx, rx) = tokio:: sync:: mpsc:: channel ( 1 ) ;
252+ let session_id = Uuid :: new_v4 ( ) ;
253+ // The actor mailbox is only used for control-plane messages (auth,
254+ // ping, name, grant_cap) plus the periodic metrics + log flush. A
255+ // small buffer is enough but `1` head-of-line-blocks log flushes
256+ // behind metrics ticks, so leave a little room.
257+ let ( tx, rx) = tokio:: sync:: mpsc:: channel ( 8 ) ;
217258 let actor_task = AbortOnDropHandle :: new ( n0_future:: task:: spawn (
218259 ClientActor {
219260 capabilities,
220261 client : irpc_client,
221262 name : self . name . clone ( ) ,
222- session_id : Uuid :: new_v4 ( ) ,
263+ session_id,
223264 authorized : false ,
224265 }
225266 . run ( self . name , self . registry , self . metrics_interval , rx) ,
226267 ) ) ;
227268
269+ let log_flush_task = self . log_collector . map ( |collector| {
270+ let message_channel = tx. clone ( ) ;
271+ let interval = self . log_flush_interval ;
272+ let max_batch = self . log_max_batch ;
273+ Arc :: new ( AbortOnDropHandle :: new ( n0_future:: task:: spawn (
274+ run_log_flush ( message_channel, collector, interval, max_batch, session_id) ,
275+ ) ) )
276+ } ) ;
277+
228278 Ok ( Client {
229279 endpoint : self . endpoint ,
230280 message_channel : tx,
231281 _actor_task : Arc :: new ( actor_task) ,
282+ _log_flush_task : log_flush_task,
232283 } )
233284 }
234285}
@@ -425,6 +476,10 @@ enum ClientActorMessage {
425476 report : Box < DiagnosticsReport > ,
426477 done : oneshot:: Sender < Result < ( ) , Error > > ,
427478 } ,
479+ PutLogs {
480+ request : PutLogs ,
481+ done : oneshot:: Sender < Result < ( ) , Error > > ,
482+ } ,
428483 ReadName {
429484 done : oneshot:: Sender < Option < String > > ,
430485 } ,
@@ -505,6 +560,13 @@ impl ClientActor {
505560 warn!( "failed to publish network diagnostics: {:#?}" , err) ;
506561 }
507562 }
563+ ClientActorMessage :: PutLogs { request, done } => {
564+ let res = self . put_logs( request) . await ;
565+ if let Err ( err) = done. send( res) {
566+ debug!( "failed to publish logs: {:#?}" , err) ;
567+ self . authorized = false ;
568+ }
569+ }
508570 }
509571 }
510572 _ = async {
@@ -613,6 +675,77 @@ impl ClientActor {
613675
614676 Ok ( ( ) )
615677 }
678+
679+ async fn put_logs ( & mut self , request : PutLogs ) -> Result < ( ) , Error > {
680+ trace ! (
681+ lines = request. lines. len( ) ,
682+ dropped = request. dropped,
683+ "client actor put logs"
684+ ) ;
685+ self . auth ( ) . await ?;
686+
687+ self . client
688+ . rpc ( request)
689+ . await
690+ . map_err ( |_| RemoteError :: InternalServerError ) ??;
691+
692+ Ok ( ( ) )
693+ }
694+ }
695+
696+ async fn run_log_flush (
697+ message_channel : tokio:: sync:: mpsc:: Sender < ClientActorMessage > ,
698+ collector : LogCollector ,
699+ interval : Duration ,
700+ max_batch : usize ,
701+ session_id : Uuid ,
702+ ) {
703+ const INITIAL_BACKOFF : Duration = Duration :: from_millis ( 500 ) ;
704+ const MAX_BACKOFF : Duration = Duration :: from_secs ( 30 ) ;
705+
706+ let mut ticker = n0_future:: time:: interval ( interval) ;
707+ // After a slow RPC the default `Burst` behavior would fire several
708+ // ticks back-to-back; `Delay` waits a full interval from the previous
709+ // completed tick.
710+ ticker. set_missed_tick_behavior ( tokio:: time:: MissedTickBehavior :: Delay ) ;
711+ let mut backoff = INITIAL_BACKOFF ;
712+ loop {
713+ ticker. tick ( ) . await ;
714+ let ( lines, dropped) = collector. drain ( max_batch) ;
715+ if lines. is_empty ( ) && dropped == 0 {
716+ backoff = INITIAL_BACKOFF ;
717+ continue ;
718+ }
719+ let request = PutLogs {
720+ session_id,
721+ lines,
722+ dropped,
723+ } ;
724+ let ( tx, rx) = oneshot:: channel ( ) ;
725+ if message_channel
726+ . send ( ClientActorMessage :: PutLogs { request, done : tx } )
727+ . await
728+ . is_err ( )
729+ {
730+ // Mailbox closed only when the actor task has terminated; that
731+ // means the entire client is gone and there is nothing to do.
732+ debug ! ( "log flush stopped: client actor channel closed" ) ;
733+ return ;
734+ }
735+ match rx. await {
736+ Ok ( Ok ( ( ) ) ) => {
737+ backoff = INITIAL_BACKOFF ;
738+ }
739+ // Either the RPC failed (Ok(Err)) or the actor dropped the
740+ // response sender mid-handoff (Err(_)). Both are transient: keep
741+ // ticking and back off so the next attempt happens later.
742+ other => {
743+ debug ! ( ?other, ?backoff, "log flush attempt failed; backing off" ) ;
744+ n0_future:: time:: sleep ( backoff) . await ;
745+ backoff = ( backoff * 2 ) . min ( MAX_BACKOFF ) ;
746+ }
747+ }
748+ }
616749}
617750
618751async fn set_name_inner (
0 commit comments