Skip to content

Commit e4cb769

Browse files
authored
Extend provider load control (match config) to substreams (#4339)
- Fixes issue #4338 - Adds rules to ProviderDetails::Firehose - FirehoseEndpoints::random now checks for capacity before returning an endpoint
1 parent 22e94d1 commit e4cb769

6 files changed

Lines changed: 299 additions & 17 deletions

File tree

chain/ethereum/examples/firehose.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use anyhow::Error;
22
use graph::{
33
env::env_var,
4+
firehose::SubgraphLimit,
45
prelude::{prost, tokio, tonic},
56
{firehose, firehose::FirehoseEndpoint},
67
};
@@ -25,6 +26,7 @@ async fn main() -> Result<(), Error> {
2526
token,
2627
false,
2728
false,
29+
SubgraphLimit::Unlimited,
2830
));
2931

3032
loop {

chain/substreams/examples/substreams.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use anyhow::{format_err, Context, Error};
22
use graph::blockchain::block_stream::BlockStreamEvent;
33
use graph::blockchain::substreams_block_stream::SubstreamsBlockStream;
4+
use graph::firehose::SubgraphLimit;
45
use graph::prelude::{info, tokio, DeploymentHash, Registry};
56
use graph::tokio_stream::StreamExt;
67
use graph::{env::env_var, firehose::FirehoseEndpoint, log::logger, substreams};
@@ -46,6 +47,7 @@ async fn main() -> Result<(), Error> {
4647
token,
4748
false,
4849
false,
50+
SubgraphLimit::Unlimited,
4951
));
5052

5153
let mut stream: SubstreamsBlockStream<graph_chain_substreams::Chain> =

graph/src/firehose/endpoints.rs

Lines changed: 96 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ use crate::{
99
substreams,
1010
};
1111

12+
use anyhow::bail;
1213
use futures03::StreamExt;
1314
use http::uri::{Scheme, Uri};
1415
use slog::Logger;
@@ -22,17 +23,28 @@ use tonic::{
2223

2324
use super::codec as firehose;
2425

25-
const SUBGRAPHS_PER_CONN: usize = 100;
26+
/// This is constant because we found this magic number of connections after
27+
/// which the grpc connections start to hang.
28+
/// For more details see: https://github.com/graphprotocol/graph-node/issues/3879
29+
pub const SUBGRAPHS_PER_CONN: usize = 100;
2630

2731
#[derive(Clone, Debug)]
2832
pub struct FirehoseEndpoint {
2933
pub provider: String,
3034
pub token: Option<String>,
3135
pub filters_enabled: bool,
3236
pub compression_enabled: bool,
37+
pub subgraph_limit: usize,
3338
channel: Channel,
3439
}
3540

41+
#[derive(Clone, Debug)]
42+
pub enum SubgraphLimit {
43+
Unlimited,
44+
Limit(usize),
45+
NoTraffic,
46+
}
47+
3648
impl Display for FirehoseEndpoint {
3749
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
3850
Display::fmt(self.provider.as_str(), f)
@@ -46,6 +58,7 @@ impl FirehoseEndpoint {
4658
token: Option<String>,
4759
filters_enabled: bool,
4860
compression_enabled: bool,
61+
subgraph_limit: SubgraphLimit,
4962
) -> Self {
5063
let uri = url
5164
.as_ref()
@@ -78,15 +91,31 @@ impl FirehoseEndpoint {
7891
// Timeout on each request, so the timeout to estabilish each 'Blocks' stream.
7992
.timeout(Duration::from_secs(120));
8093

94+
let subgraph_limit = match subgraph_limit {
95+
// See the comment on the constant
96+
SubgraphLimit::Unlimited => SUBGRAPHS_PER_CONN,
97+
// This is checked when parsing from config but doesn't hurt to be defensive.
98+
SubgraphLimit::Limit(limit) => limit.min(SUBGRAPHS_PER_CONN),
99+
SubgraphLimit::NoTraffic => 0,
100+
};
101+
81102
FirehoseEndpoint {
82103
provider: provider.as_ref().to_string(),
83104
channel: endpoint.connect_lazy(),
84105
token,
85106
filters_enabled,
86107
compression_enabled,
108+
subgraph_limit,
87109
}
88110
}
89111

112+
// The SUBGRAPHS_PER_CONN upper bound was already limited so we leave it the same
113+
// we need to use inclusive limits (<=) because there will always be a reference
114+
// inside FirehoseEndpoints that is not used (is always cloned).
115+
pub fn has_subgraph_capacity(self: &Arc<Self>) -> bool {
116+
Arc::strong_count(&self) <= self.subgraph_limit
117+
}
118+
90119
pub async fn get_block<M>(
91120
&self,
92121
cursor: FirehoseCursor,
@@ -327,8 +356,8 @@ impl FirehoseEndpoints {
327356
.iter()
328357
.min_by_key(|x| Arc::strong_count(x))
329358
.ok_or(anyhow!("no available firehose endpoints"))?;
330-
if Arc::strong_count(endpoint) > SUBGRAPHS_PER_CONN {
331-
return Err(anyhow!("all connections saturated with {} connections, increase the firehose conn_pool_size", SUBGRAPHS_PER_CONN));
359+
if !endpoint.has_subgraph_capacity() {
360+
bail!("all connections saturated with {} connections, increase the firehose conn_pool_size or limit for the node", SUBGRAPHS_PER_CONN);
332361
}
333362

334363
// Cloning here ensure we have the correct count at any given time, if we return a reference it can be cloned later
@@ -396,22 +425,22 @@ impl FirehoseNetworks {
396425

397426
#[cfg(test)]
398427
mod test {
399-
use std::{mem, str::FromStr, sync::Arc};
428+
use std::{mem, sync::Arc};
400429

401-
use http::Uri;
402-
use tonic::transport::Channel;
430+
use crate::firehose::SubgraphLimit;
403431

404432
use super::{FirehoseEndpoint, FirehoseEndpoints, SUBGRAPHS_PER_CONN};
405433

406434
#[tokio::test]
407435
async fn firehose_endpoint_errors() {
408-
let endpoint = vec![Arc::new(FirehoseEndpoint {
409-
provider: String::new(),
410-
token: None,
411-
filters_enabled: true,
412-
compression_enabled: true,
413-
channel: Channel::builder(Uri::from_str("http://127.0.0.1").unwrap()).connect_lazy(),
414-
})];
436+
let endpoint = vec![Arc::new(FirehoseEndpoint::new(
437+
String::new(),
438+
"http://127.0.0.1".to_string(),
439+
None,
440+
false,
441+
false,
442+
SubgraphLimit::Unlimited,
443+
))];
415444

416445
let mut endpoints = FirehoseEndpoints::from(endpoint);
417446

@@ -432,4 +461,58 @@ mod test {
432461
let err = endpoints.random().unwrap_err();
433462
assert!(err.to_string().contains("no available firehose endpoints"));
434463
}
464+
465+
#[tokio::test]
466+
async fn firehose_endpoint_with_limit() {
467+
let endpoint = vec![Arc::new(FirehoseEndpoint::new(
468+
String::new(),
469+
"http://127.0.0.1".to_string(),
470+
None,
471+
false,
472+
false,
473+
SubgraphLimit::Limit(2),
474+
))];
475+
476+
let mut endpoints = FirehoseEndpoints::from(endpoint);
477+
478+
let mut keep = vec![];
479+
for _ in 0..2 {
480+
keep.push(endpoints.random().unwrap());
481+
}
482+
483+
let err = endpoints.random().unwrap_err();
484+
assert!(err.to_string().contains("conn_pool_size"));
485+
486+
mem::drop(keep);
487+
endpoints.random().unwrap();
488+
489+
// Fails when empty too
490+
endpoints.remove("");
491+
492+
let err = endpoints.random().unwrap_err();
493+
assert!(err.to_string().contains("no available firehose endpoints"));
494+
}
495+
496+
#[tokio::test]
497+
async fn firehose_endpoint_no_traffic() {
498+
let endpoint = vec![Arc::new(FirehoseEndpoint::new(
499+
String::new(),
500+
"http://127.0.0.1".to_string(),
501+
None,
502+
false,
503+
false,
504+
SubgraphLimit::NoTraffic,
505+
))];
506+
507+
let mut endpoints = FirehoseEndpoints::from(endpoint);
508+
509+
let err = endpoints.random().unwrap_err();
510+
assert!(err.to_string().contains("conn_pool_size"));
511+
512+
// Fails when empty too
513+
endpoints.remove("");
514+
515+
let err = endpoints.random().unwrap_err();
516+
assert!(err.to_string().contains("no available firehose endpoints"));
517+
}
435518
}

node/src/chain.rs

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use futures::TryFutureExt;
55
use graph::anyhow::Error;
66
use graph::blockchain::{Block as BlockchainBlock, BlockchainKind, ChainIdentifier};
77
use graph::cheap_clone::CheapClone;
8-
use graph::firehose::{FirehoseEndpoint, FirehoseNetworks};
8+
use graph::firehose::{FirehoseEndpoint, FirehoseNetworks, SubgraphLimit};
99
use graph::ipfs_client::IpfsClient;
1010
use graph::prelude::{anyhow, tokio};
1111
use graph::prelude::{prost, MetricsRegistry as MetricsRegistryTrait};
@@ -137,6 +137,7 @@ pub fn create_substreams_networks(
137137
firehose.token.clone(),
138138
firehose.filters_enabled(),
139139
firehose.compression_enabled(),
140+
SubgraphLimit::Unlimited,
140141
)),
141142
);
142143
}
@@ -168,10 +169,22 @@ pub fn create_firehose_networks(
168169
"Configuring firehose endpoint";
169170
"provider" => &provider.label,
170171
);
172+
let subgraph_limit = match firehose.limit_for(&config.node) {
173+
Some(limit) if limit == 0 => SubgraphLimit::Unlimited,
174+
Some(limit) => SubgraphLimit::Limit(limit),
175+
None => SubgraphLimit::NoTraffic,
176+
};
171177

172178
let parsed_networks = networks_by_kind
173179
.entry(chain.protocol)
174180
.or_insert_with(|| FirehoseNetworks::new());
181+
182+
// Create n FirehoseEndpoints where n is the size of the pool. If a
183+
// subgraph limit is defined for this endpoint then each endpoint
184+
// instance will have their own subgraph limit.
185+
// eg: pool_size = 3 and sg_limit 2 will result in 3 separate instances
186+
// of FirehoseEndpoint and each of those instance can be used in 2 different
187+
// SubgraphInstances.
175188
for i in 0..firehose.conn_pool_size {
176189
parsed_networks.insert(
177190
name.to_string(),
@@ -181,6 +194,7 @@ pub fn create_firehose_networks(
181194
firehose.token.clone(),
182195
firehose.filters_enabled(),
183196
firehose.compression_enabled(),
197+
subgraph_limit.clone(),
184198
)),
185199
);
186200
}

0 commit comments

Comments
 (0)