@@ -6,15 +6,18 @@ use crate::app::background::Activator;
66use crate :: app:: background:: BackgroundTask ;
77use crate :: app:: background:: tasks:: fm_sitrep_load:: CurrentSitrep ;
88use anyhow:: Context ;
9+ use chrono:: Utc ;
910use futures:: future:: BoxFuture ;
1011use nexus_db_queries:: context:: OpContext ;
1112use nexus_db_queries:: db:: DataStore ;
13+ use nexus_db_queries:: db:: datastore;
1214use nexus_db_queries:: db:: pagination:: Paginator ;
1315use nexus_fm as fm;
1416use nexus_types:: internal_api:: background:: FmAnalysisStatus ;
1517use nexus_types:: internal_api:: background:: fm_analysis as status;
1618use nexus_types:: inventory;
1719use omicron_uuid_kinds:: GenericUuid ;
20+ use omicron_uuid_kinds:: OmicronZoneUuid ;
1821use serde_json:: json;
1922use slog_error_chain:: InlineErrorChain ;
2023use std:: sync:: Arc ;
@@ -26,6 +29,15 @@ pub struct FmAnalysis {
2629 sitrep_rx : watch:: Receiver < Option < CurrentSitrep > > ,
2730 inv_rx : watch:: Receiver < Option < Arc < inventory:: Collection > > > ,
2831 sitrep_loader : Activator ,
32+ sitrep_gc : Activator ,
33+ nexus_id : OmicronZoneUuid ,
34+ }
35+
36+ /// This is just because I don't like it when a constructor takes multiple
37+ /// positional arguments of the same type...
38+ pub struct Activators {
39+ pub sitrep_loader : Activator ,
40+ pub sitrep_gc : Activator ,
2941}
3042
3143impl BackgroundTask for FmAnalysis {
@@ -54,9 +66,18 @@ impl FmAnalysis {
5466 datastore : Arc < DataStore > ,
5567 sitrep_rx : watch:: Receiver < Option < CurrentSitrep > > ,
5668 inv_rx : watch:: Receiver < Option < Arc < inventory:: Collection > > > ,
57- sitrep_loader : Activator ,
69+ activators : Activators ,
70+ nexus_id : OmicronZoneUuid ,
5871 ) -> Self {
59- Self { datastore, sitrep_rx, inv_rx, sitrep_loader }
72+ let Activators { sitrep_loader, sitrep_gc } = activators;
73+ Self {
74+ datastore,
75+ sitrep_rx,
76+ inv_rx,
77+ sitrep_loader,
78+ sitrep_gc,
79+ nexus_id,
80+ }
6081 }
6182
6283 async fn actually_activate (
@@ -112,25 +133,15 @@ impl FmAnalysis {
112133 } ;
113134
114135 // Okay, actually run analysis and generate a new sitrep.
115- let outcome = self
116- . analyze ( & opctx, inputs)
117- . await
118- . unwrap_or_else ( |err| {
119- let error = InlineErrorChain :: new ( & * err) ;
120- slog:: error!( opctx. log, "fault management analysis failed!" ; & error) ;
121- status:: AnalysisOutcome :: Error ( error. to_string ( ) )
122- } ) ;
123-
124- if let status:: AnalysisOutcome :: Committed { .. } = & outcome {
125- // If we committed a new sitrep, we ought to go ahead and load it
126- // now...
127- self . sitrep_loader . activate ( ) ;
128- }
136+ let outcome = self . analyze ( & opctx, inputs) . await ;
129137
130138 FmAnalysisStatus {
131139 parent_sitrep_id,
132140 inv_collection_id : Some ( inv_collection_id) ,
133- outcome : status:: Outcome :: RanAnalysis { prep_status, outcome } ,
141+ outcome : status:: Outcome :: RanAnalysis {
142+ prep_status,
143+ analysis_status : outcome,
144+ } ,
134145 }
135146 }
136147
@@ -205,9 +216,91 @@ impl FmAnalysis {
205216
206217 async fn analyze (
207218 & mut self ,
208- _opctx : & OpContext ,
209- _inputs : fm:: analysis_input:: Input ,
210- ) -> anyhow:: Result < status:: AnalysisOutcome > {
211- anyhow:: bail!( "FM analysis is not yet implemented" )
219+ opctx : & OpContext ,
220+ inputs : fm:: analysis_input:: Input ,
221+ ) -> status:: AnalysisStatus {
222+ let start_time = Utc :: now ( ) ;
223+ let mut sitrep_builder = fm:: SitrepBuilder :: new ( & opctx. log , & inputs) ;
224+ let result = fm:: diagnosis:: analyze ( & inputs, & mut sitrep_builder) ;
225+ let end_time = Utc :: now ( ) ;
226+ let ( sitrep, report) = sitrep_builder. build ( self . nexus_id , end_time) ;
227+
228+ // Did it work?
229+ if let Err ( e) = result {
230+ let err = InlineErrorChain :: new ( & * e) ;
231+ slog:: error!( & opctx. log, "fault management analysis failed" ; "err" => %err) ;
232+ return status:: AnalysisStatus {
233+ start_time,
234+ end_time,
235+ report,
236+ outcome : status:: AnalysisOutcome :: Error ( e. to_string ( ) ) ,
237+ } ;
238+ }
239+
240+ // TODO(eliza): diff the sitrep against the parent, and return
241+ // `Unchanged` if it's the same.
242+ let unchanged = true ;
243+ if unchanged {
244+ slog:: info!(
245+ & opctx. log,
246+ "fault management analysis produced no changes from the \
247+ current sitrep"
248+ ) ;
249+ return status:: AnalysisStatus {
250+ start_time,
251+ end_time,
252+ report,
253+ outcome : status:: AnalysisOutcome :: Unchanged ,
254+ } ;
255+ }
256+
257+ let sitrep_id = sitrep. id ( ) ;
258+ match self . datastore . fm_sitrep_insert ( opctx, sitrep) . await {
259+ Ok ( ( ) ) => {
260+ slog:: info!( & opctx. log, "updated the current sitrep!" ) ;
261+ // If we committed a new sitrep, we ought to go ahead and load it
262+ // now...
263+ self . sitrep_loader . activate ( ) ;
264+ status:: AnalysisStatus {
265+ start_time,
266+ end_time,
267+ report,
268+ outcome : status:: AnalysisOutcome :: Committed { sitrep_id } ,
269+ }
270+ }
271+ Err ( datastore:: fm:: InsertSitrepError :: ParentNotCurrent ( _) ) => {
272+ slog:: info!(
273+ & opctx. log,
274+ "new sitrep was not committed as the parent sitrep was \
275+ out of date";
276+ ) ;
277+ // We are behind, activate the sitrep loader to try and catch up!
278+ self . sitrep_loader . activate ( ) ;
279+ // Also, we should probably clean up after ourselves...
280+ self . sitrep_gc . activate ( ) ;
281+
282+ status:: AnalysisStatus {
283+ start_time,
284+ end_time,
285+ report,
286+ outcome : status:: AnalysisOutcome :: NotCommitted {
287+ sitrep_id,
288+ } ,
289+ }
290+ }
291+ Err ( datastore:: fm:: InsertSitrepError :: Other ( e) ) => {
292+ let err = InlineErrorChain :: new ( & e) ;
293+ slog:: error!( & opctx. log, "failed to insert sitrep" ; "err" => %err) ;
294+ status:: AnalysisStatus {
295+ start_time,
296+ end_time,
297+ report,
298+ outcome : status:: AnalysisOutcome :: CommitFailed {
299+ sitrep_id,
300+ error : e. to_string ( ) ,
301+ } ,
302+ }
303+ }
304+ }
212305 }
213306}
0 commit comments