@@ -19,6 +19,7 @@ use graph::data::subgraph::{
1919use graph:: data_source:: {
2020 offchain, DataSource , DataSourceCreationError , DataSourceTemplate , TriggerData ,
2121} ;
22+ use graph:: env:: EnvVars ;
2223use graph:: prelude:: * ;
2324use graph:: util:: { backoff:: ExponentialBackoff , lfu_cache:: LfuCache } ;
2425use std:: sync:: Arc ;
@@ -28,7 +29,7 @@ const MINUTE: Duration = Duration::from_secs(60);
2829
2930const SKIP_PTR_UPDATES_THRESHOLD : Duration = Duration :: from_secs ( 60 * 5 ) ;
3031
31- pub ( crate ) struct SubgraphRunner < C , T >
32+ pub struct SubgraphRunner < C , T >
3233where
3334 C : Blockchain ,
3435 T : RuntimeHostBuilder < C > ,
5051 ctx : IndexingContext < C , T > ,
5152 logger : Logger ,
5253 metrics : RunnerMetrics ,
54+ env_vars : Arc < EnvVars > ,
5355 ) -> Self {
5456 Self {
5557 inputs : Arc :: new ( inputs) ,
5961 synced : false ,
6062 skip_ptr_updates_timer : Instant :: now ( ) ,
6163 backoff : ExponentialBackoff :: new (
62- ( MINUTE * 2 ) . min ( ENV_VARS . subgraph_error_retry_ceil ) ,
63- ENV_VARS . subgraph_error_retry_ceil ,
64+ ( MINUTE * 2 ) . min ( env_vars . subgraph_error_retry_ceil ) ,
65+ env_vars . subgraph_error_retry_ceil ,
6466 ) ,
6567 entity_lfu_cache : LfuCache :: new ( ) ,
6668 } ,
6971 }
7072 }
7173
72- pub async fn run ( mut self ) -> Result < ( ) , Error > {
74+ /// Revert the state to a previous block. When handling revert operations
75+ /// or failed block processing, it is necessary to remove part of the existing
76+ /// in-memory state to keep it constent with DB changes.
77+ /// During block processing new dynamic data sources are added directly to the
78+ /// SubgraphInstance of the runner. This means that if, for whatever reason,
79+ /// the changes don;t complete then the remnants of that block processing must
80+ /// be removed. The same thing also applies to the block cache.
81+ /// This function must be called before continuing to process in order to avoid
82+ /// duplicated host insertion and POI issues with dirty entity changes.
83+ fn revert_state ( & mut self , block_number : BlockNumber ) -> Result < ( ) , Error > {
84+ self . state . entity_lfu_cache = LfuCache :: new ( ) ;
85+
86+ // 1. Revert all hosts(created by DDS) up to block_number inclusively.
87+ // 2. Unmark any offchain data sources that were marked done on the blocks being removed.
88+ // When no offchain datasources are present, 2. should be a noop.
89+ self . ctx . revert_data_sources ( block_number) ?;
90+ Ok ( ( ) )
91+ }
92+
93+ #[ cfg( debug_assertions) ]
94+ pub fn context ( & self ) -> & IndexingContext < C , T > {
95+ & self . ctx
96+ }
97+
98+ #[ cfg( debug_assertions) ]
99+ pub async fn run_for_test ( self , break_on_restart : bool ) -> Result < Self , Error > {
100+ self . run_inner ( break_on_restart) . await
101+ }
102+
103+ pub async fn run ( self ) -> Result < Self , Error > {
104+ self . run_inner ( false ) . await
105+ }
106+
107+ async fn run_inner ( mut self , break_on_restart : bool ) -> Result < Self , Error > {
73108 // If a subgraph failed for deterministic reasons, before start indexing, we first
74109 // revert the deployment head. It should lead to the same result since the error was
75110 // deterministic.
@@ -134,7 +169,12 @@ where
134169 Action :: Stop => {
135170 info ! ( self . logger, "Stopping subgraph" ) ;
136171 self . inputs . store . flush ( ) . await ?;
137- return Ok ( ( ) ) ;
172+ return Ok ( self ) ;
173+ }
174+ Action :: Restart if break_on_restart => {
175+ info ! ( self . logger, "Stopping subgraph on break" ) ;
176+ self . inputs . store . flush ( ) . await ?;
177+ return Ok ( self ) ;
138178 }
139179 Action :: Restart => break ,
140180 } ;
@@ -799,16 +839,8 @@ where
799839
800840 // Handle unexpected stream errors by marking the subgraph as failed.
801841 Err ( e) => {
802- // Clear entity cache when a subgraph fails.
803- //
804- // This is done to be safe and sure that there's no state that's
805- // out of sync from the database.
806- //
807- // Without it, POI changes on failure would be kept in the entity cache
808- // and be transacted incorrectly in the next run.
809- self . state . entity_lfu_cache = LfuCache :: new ( ) ;
810-
811842 self . metrics . stream . deployment_failed . set ( 1.0 ) ;
843+ self . revert_state ( block_ptr. block_number ( ) ) ?;
812844
813845 let message = format ! ( "{:#}" , e) . replace ( "\n " , "\t " ) ;
814846 let err = anyhow ! ( "{}, code: {}" , message, LogCode :: SubgraphSyncingFailure ) ;
@@ -920,11 +952,7 @@ where
920952 . deployment_head
921953 . set ( subgraph_ptr. number as f64 ) ;
922954
923- // Revert the in-memory state:
924- // - Revert any dynamic data sources.
925- // - Clear the entity cache.
926- self . ctx . revert_data_sources ( subgraph_ptr. number ) ;
927- self . state . entity_lfu_cache = LfuCache :: new ( ) ;
955+ self . revert_state ( subgraph_ptr. number ) ?;
928956
929957 Ok ( Action :: Continue )
930958 }
0 commit comments