-
Notifications
You must be signed in to change notification settings - Fork 79
[fm] more analysis plumbing #10258
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[fm] more analysis plumbing #10258
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,13 @@ | ||
| // This Source Code Form is subject to the terms of the Mozilla Public | ||
| // License, v. 2.0. If a copy of the MPL was not distributed with this | ||
| // file, You can obtain one at https://mozilla.org/MPL/2.0/. | ||
|
|
||
| use crate::SitrepBuilder; | ||
| use crate::analysis_input::Input; | ||
|
|
||
| pub fn analyze( | ||
| _input: &Input, | ||
| _builder: &mut SitrepBuilder<'_>, | ||
| ) -> anyhow::Result<()> { | ||
| anyhow::bail!("FM analysis is not yet implemented") | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6,15 +6,18 @@ use crate::app::background::Activator; | |
| use crate::app::background::BackgroundTask; | ||
| use crate::app::background::tasks::fm_sitrep_load::CurrentSitrep; | ||
| use anyhow::Context; | ||
| use chrono::Utc; | ||
| use futures::future::BoxFuture; | ||
| use nexus_db_queries::context::OpContext; | ||
| use nexus_db_queries::db::DataStore; | ||
| use nexus_db_queries::db::datastore; | ||
| use nexus_db_queries::db::pagination::Paginator; | ||
| use nexus_fm as fm; | ||
| use nexus_types::internal_api::background::FmAnalysisStatus; | ||
| use nexus_types::internal_api::background::fm_analysis as status; | ||
| use nexus_types::inventory; | ||
| use omicron_uuid_kinds::GenericUuid; | ||
| use omicron_uuid_kinds::OmicronZoneUuid; | ||
| use serde_json::json; | ||
| use slog_error_chain::InlineErrorChain; | ||
| use std::sync::Arc; | ||
|
|
@@ -26,6 +29,15 @@ pub struct FmAnalysis { | |
| sitrep_rx: watch::Receiver<Option<CurrentSitrep>>, | ||
| inv_rx: watch::Receiver<Option<Arc<inventory::Collection>>>, | ||
| sitrep_loader: Activator, | ||
| sitrep_gc: Activator, | ||
| nexus_id: OmicronZoneUuid, | ||
| } | ||
|
|
||
| /// This is just because I don't like it when a constructor takes multiple | ||
| /// positional arguments of the same type... | ||
| pub struct Activators { | ||
| pub sitrep_loader: Activator, | ||
| pub sitrep_gc: Activator, | ||
| } | ||
|
|
||
| impl BackgroundTask for FmAnalysis { | ||
|
|
@@ -54,9 +66,18 @@ impl FmAnalysis { | |
| datastore: Arc<DataStore>, | ||
| sitrep_rx: watch::Receiver<Option<CurrentSitrep>>, | ||
| inv_rx: watch::Receiver<Option<Arc<inventory::Collection>>>, | ||
| sitrep_loader: Activator, | ||
| activators: Activators, | ||
| nexus_id: OmicronZoneUuid, | ||
| ) -> Self { | ||
| Self { datastore, sitrep_rx, inv_rx, sitrep_loader } | ||
| let Activators { sitrep_loader, sitrep_gc } = activators; | ||
| Self { | ||
| datastore, | ||
| sitrep_rx, | ||
| inv_rx, | ||
| sitrep_loader, | ||
| sitrep_gc, | ||
| nexus_id, | ||
| } | ||
| } | ||
|
|
||
| async fn actually_activate( | ||
|
|
@@ -112,25 +133,15 @@ impl FmAnalysis { | |
| }; | ||
|
|
||
| // Okay, actually run analysis and generate a new sitrep. | ||
| let outcome = self | ||
| .analyze(&opctx, inputs) | ||
| .await | ||
| .unwrap_or_else(|err| { | ||
| let error = InlineErrorChain::new(&*err); | ||
| slog::error!(opctx.log, "fault management analysis failed!"; &error); | ||
| status::AnalysisOutcome::Error(error.to_string()) | ||
| }); | ||
|
|
||
| if let status::AnalysisOutcome::Committed { .. } = &outcome { | ||
| // If we committed a new sitrep, we ought to go ahead and load it | ||
| // now... | ||
| self.sitrep_loader.activate(); | ||
| } | ||
| let outcome = self.analyze(&opctx, inputs).await; | ||
|
|
||
| FmAnalysisStatus { | ||
| parent_sitrep_id, | ||
| inv_collection_id: Some(inv_collection_id), | ||
| outcome: status::Outcome::RanAnalysis { prep_status, outcome }, | ||
| outcome: status::Outcome::RanAnalysis { | ||
| prep_status, | ||
| analysis_status: outcome, | ||
| }, | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -205,9 +216,91 @@ impl FmAnalysis { | |
|
|
||
| async fn analyze( | ||
| &mut self, | ||
| _opctx: &OpContext, | ||
| _inputs: fm::analysis_input::Input, | ||
| ) -> anyhow::Result<status::AnalysisOutcome> { | ||
| anyhow::bail!("FM analysis is not yet implemented") | ||
| opctx: &OpContext, | ||
| inputs: fm::analysis_input::Input, | ||
| ) -> status::AnalysisStatus { | ||
| let start_time = Utc::now(); | ||
| let mut sitrep_builder = fm::SitrepBuilder::new(&opctx.log, &inputs); | ||
| let result = fm::diagnosis::analyze(&inputs, &mut sitrep_builder); | ||
| let end_time = Utc::now(); | ||
| let (sitrep, report) = sitrep_builder.build(self.nexus_id, end_time); | ||
|
|
||
| // Did it work? | ||
| if let Err(e) = result { | ||
| let err = InlineErrorChain::new(&*e); | ||
| slog::error!(&opctx.log, "fault management analysis failed"; "err" => %err); | ||
| return status::AnalysisStatus { | ||
| start_time, | ||
| end_time, | ||
| report, | ||
| outcome: status::AnalysisOutcome::Error(e.to_string()), | ||
| }; | ||
| } | ||
|
|
||
| // TODO(eliza): diff the sitrep against the parent, and return | ||
| // `Unchanged` if it's the same. | ||
| let unchanged = true; | ||
| if unchanged { | ||
| slog::info!( | ||
| &opctx.log, | ||
| "fault management analysis produced no changes from the \ | ||
| current sitrep" | ||
| ); | ||
| return status::AnalysisStatus { | ||
| start_time, | ||
| end_time, | ||
| report, | ||
| outcome: status::AnalysisOutcome::Unchanged, | ||
| }; | ||
| } | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The compiler doesn't yell at you about code after this line being unreachable?
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah, I wondered the same - feels a little sketchy to have "impossible-to-reach" code down here that's definitely untested, but also this is probably fine for now as long as the build isn't broken?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah...I couldn't really think of a better way to do this incrementally while the diffing code and the sitrep-generating code both currently don't exist. I suppose we could just not add this part at all, but I wanted to make sure to remember to put it there when the necessary stuff is added. Once we actually have enough code to start trying to produce sitreps, any tests for that will start failing due to this until the diffing code is added, which felt like the right way to make sure we remembered to do that part... What do you guys think?
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. eh. I vote move forward for now. don't want to get hung around the wheel too much, but agree that we should not leave it in this weird intermediate state for too long.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would also like to avoid that! :) |
||
|
|
||
| let sitrep_id = sitrep.id(); | ||
| match self.datastore.fm_sitrep_insert(opctx, sitrep).await { | ||
| Ok(()) => { | ||
| slog::info!(&opctx.log, "updated the current sitrep!"); | ||
| // If we committed a new sitrep, we ought to go ahead and load it | ||
| // now... | ||
| self.sitrep_loader.activate(); | ||
| status::AnalysisStatus { | ||
| start_time, | ||
| end_time, | ||
| report, | ||
| outcome: status::AnalysisOutcome::Committed { sitrep_id }, | ||
| } | ||
| } | ||
| Err(datastore::fm::InsertSitrepError::ParentNotCurrent(_)) => { | ||
| slog::info!( | ||
| &opctx.log, | ||
| "new sitrep was not committed as the parent sitrep was \ | ||
| out of date"; | ||
| ); | ||
| // We are behind, activate the sitrep loader to try and catch up! | ||
| self.sitrep_loader.activate(); | ||
| // Also, we should probably clean up after ourselves... | ||
| self.sitrep_gc.activate(); | ||
|
|
||
| status::AnalysisStatus { | ||
| start_time, | ||
| end_time, | ||
| report, | ||
| outcome: status::AnalysisOutcome::NotCommitted { | ||
| sitrep_id, | ||
| }, | ||
| } | ||
| } | ||
| Err(datastore::fm::InsertSitrepError::Other(e)) => { | ||
| let err = InlineErrorChain::new(&e); | ||
| slog::error!(&opctx.log, "failed to insert sitrep"; "err" => %err); | ||
| status::AnalysisStatus { | ||
| start_time, | ||
| end_time, | ||
| report, | ||
| outcome: status::AnalysisOutcome::CommitFailed { | ||
| sitrep_id, | ||
| error: e.to_string(), | ||
| }, | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.