Skip to content

Commit 0ef17e7

Browse files
authored
fix rmb calls timeout (#202)
* feat: app kill on listener thread panic with backoff stratagy Signed-off-by: nabil salah <nabil.salah203@gmail.com> * fix: linting Signed-off-by: nabil salah <nabil.salah203@gmail.com> * fix: backoff reset at sucess Signed-off-by: nabil salah <nabil.salah203@gmail.com> --------- Signed-off-by: nabil salah <nabil.salah203@gmail.com>
1 parent 11b1c05 commit 0ef17e7

2 files changed

Lines changed: 57 additions & 9 deletions

File tree

src/bins/rmb-relay.rs

Lines changed: 54 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ use rmb::relay::{
1111
limiter::{FixedWindowOptions, Limiters},
1212
};
1313
use rmb::twin::SubstrateTwinDB;
14+
use tokio::sync::oneshot;
1415

1516
/// A peer requires only which rely to connect to, and
1617
/// which identity (mnemonics)
@@ -92,7 +93,7 @@ fn set_limits() -> Result<()> {
9293
Ok(())
9394
}
9495

95-
async fn app(args: Args) -> Result<()> {
96+
async fn app(args: Args, tx: oneshot::Sender<()>) -> Result<()> {
9697
if args.workers == 0 {
9798
anyhow::bail!("number of workers cannot be zero");
9899
}
@@ -172,10 +173,42 @@ async fn app(args: Args) -> Result<()> {
172173

173174
let mut l = events::Listener::new(args.substrate, redis_cache).await?;
174175
tokio::spawn(async move {
175-
l.listen()
176-
.await
177-
.context("failed to listen to chain events")
178-
.unwrap();
176+
let max_retries = 9; // max wait is 2^9 = 512 seconds ( 5 minutes )
177+
let mut attempt = 0;
178+
let mut backoff = Duration::from_secs(1);
179+
let mut got_hit = false;
180+
181+
loop {
182+
match l
183+
.listen(&mut got_hit)
184+
.await
185+
.context("failed to listen to chain events")
186+
{
187+
Ok(_) => break,
188+
Err(e) => {
189+
if got_hit {
190+
log::warn!("Listener got a hit, but failed to listen to chain events before no attempts will be reset");
191+
got_hit = false;
192+
attempt = 0;
193+
backoff = Duration::from_secs(1);
194+
}
195+
attempt += 1;
196+
if attempt > max_retries {
197+
log::error!("Listener failed after {} attempts: {:?}", attempt - 1, e);
198+
let _ = tx.send(());
199+
break;
200+
}
201+
log::warn!(
202+
"Listener failed on attempt {}: {:?}. Retrying in {:?}...",
203+
attempt,
204+
e,
205+
backoff
206+
);
207+
tokio::time::sleep(backoff).await;
208+
backoff *= 2;
209+
}
210+
}
211+
}
179212
});
180213

181214
r.start(&args.listen).await.unwrap();
@@ -185,8 +218,21 @@ async fn app(args: Args) -> Result<()> {
185218
#[tokio::main]
186219
async fn main() {
187220
let args = Args::parse();
188-
if let Err(e) = app(args).await {
189-
eprintln!("{:#}", e);
190-
std::process::exit(1);
221+
let (tx, rx) = oneshot::channel();
222+
let app_handle = tokio::spawn(async move {
223+
if let Err(e) = app(args, tx).await {
224+
eprintln!("{:#}", e);
225+
std::process::exit(1);
226+
}
227+
});
228+
229+
tokio::select! {
230+
_ = app_handle => {
231+
log::info!("Application is closing successfully.");
232+
}
233+
_ = rx => {
234+
log::error!("Listener shutdown signal received. Exiting application.");
235+
std::process::exit(1);
236+
}
191237
}
192238
}

src/events/mod.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ where
6060
anyhow::bail!("failed to connect to substrate using the provided urls")
6161
}
6262

63-
pub async fn listen(&mut self) -> Result<()> {
63+
pub async fn listen(&mut self, got_hit: &mut bool) -> Result<()> {
6464
loop {
6565
// always flush in case some blocks were finalized before reconnecting
6666
if let Err(err) = self.cache.flush().await {
@@ -73,6 +73,8 @@ where
7373
if let Some(subxt::Error::Rpc(_)) = err.downcast_ref::<subxt::Error>() {
7474
self.api = Self::connect(&mut self.substrate_urls).await?;
7575
}
76+
} else {
77+
*got_hit = true
7678
}
7779
}
7880
}

0 commit comments

Comments
 (0)