Skip to content

Commit 734d0b2

Browse files
committed
Retry component-graph setup in LogicalMeterHandle::try_new
Wrap the entire fetch-and-build sequence (list components, list connections, build the component graph) in a single retry loop that sleeps 3 seconds between attempts and keeps trying until it succeeds. This way, transient API failures and graph-build errors stemming from incomplete server-side data both clear themselves up without aborting the calling app at startup. Signed-off-by: Sahas Subramanian <sahas.subramanian@proton.me>
1 parent 3fe51dd commit 734d0b2

1 file changed

Lines changed: 46 additions & 15 deletions

File tree

src/logical_meter/logical_meter_handle.rs

Lines changed: 46 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ use crate::{
1313
};
1414
use frequenz_microgrid_component_graph::{self, ComponentGraph};
1515
use std::collections::BTreeSet;
16+
use std::time::Duration;
1617
use tokio::sync::mpsc;
1718

1819
use super::{LogicalMeterConfig, logical_meter_actor::LogicalMeterActor};
@@ -26,6 +27,11 @@ pub struct LogicalMeterHandle {
2627

2728
impl LogicalMeterHandle {
2829
/// Creates a new LogicalMeter instance.
30+
///
31+
/// Listing the components and connections from the API and building the
32+
/// component graph is retried indefinitely with a 3 second backoff, so
33+
/// this call blocks until the server is reachable and returns data that
34+
/// forms a valid graph. Returns an error only if `config` is invalid.
2935
pub async fn try_new(
3036
client: MicrogridClientHandle,
3137
config: LogicalMeterConfig,
@@ -39,21 +45,19 @@ impl LogicalMeterHandle {
3945
clock: C,
4046
) -> Result<Self, Error> {
4147
let (sender, receiver) = mpsc::channel(8);
42-
let graph = ComponentGraph::try_new(
43-
client.list_electrical_components(vec![], vec![]).await?,
44-
client
45-
.list_electrical_component_connections(vec![], vec![])
46-
.await?,
47-
frequenz_microgrid_component_graph::ComponentGraphConfig {
48-
allow_component_validation_failures: true,
49-
allow_unconnected_components: true,
50-
allow_unspecified_inverters: false,
51-
disable_fallback_components: false,
52-
},
53-
)
54-
.map_err(|e| {
55-
Error::component_graph_error(format!("Unable to create a component graph: {e}"))
56-
})?;
48+
const RETRY_DELAY: Duration = Duration::from_secs(3);
49+
let graph = loop {
50+
match build_component_graph(&client).await {
51+
Ok(g) => break g,
52+
Err(reason) => {
53+
tracing::warn!(
54+
"Microgrid logical-meter setup failed, retrying in {:?}: {reason}",
55+
RETRY_DELAY
56+
);
57+
tokio::time::sleep(RETRY_DELAY).await;
58+
}
59+
}
60+
};
5761

5862
let logical_meter = LogicalMeterActor::try_new(receiver, client, config, clock)?;
5963

@@ -174,6 +178,33 @@ impl LogicalMeterHandle {
174178
}
175179
}
176180

181+
/// Lists the components and connections from the API and builds the
182+
/// component graph. Errors from each step are stringified with a prefix so
183+
/// the retry loop can log a concise reason.
184+
async fn build_component_graph(
185+
client: &MicrogridClientHandle,
186+
) -> Result<ComponentGraph<ElectricalComponent, ElectricalComponentConnection>, String> {
187+
let components = client
188+
.list_electrical_components(vec![], vec![])
189+
.await
190+
.map_err(|e| format!("fetching components failed: {e}"))?;
191+
let connections = client
192+
.list_electrical_component_connections(vec![], vec![])
193+
.await
194+
.map_err(|e| format!("fetching component connections failed: {e}"))?;
195+
ComponentGraph::try_new(
196+
components,
197+
connections,
198+
frequenz_microgrid_component_graph::ComponentGraphConfig {
199+
allow_component_validation_failures: true,
200+
allow_unconnected_components: true,
201+
allow_unspecified_inverters: false,
202+
disable_fallback_components: false,
203+
},
204+
)
205+
.map_err(|e| format!("building component graph failed: {e}"))
206+
}
207+
177208
#[cfg(test)]
178209
mod tests {
179210
use chrono::TimeDelta;

0 commit comments

Comments
 (0)