|
| 1 | +// This Source Code Form is subject to the terms of the Mozilla Public |
| 2 | +// License, v. 2.0. If a copy of the MPL was not distributed with this |
| 3 | +// file, You can obtain one at https://mozilla.org/MPL/2.0/. |
| 4 | + |
| 5 | +//! `omdb support-bundle collect` — collect a support bundle locally, |
| 6 | +//! without going through Nexus. |
| 7 | +//! |
| 8 | +//! Unlike the Nexus background task, this path: |
| 9 | +//! |
| 10 | +//! - Does not register a row in the `support_bundle` table. |
| 11 | +//! - Does not transfer the resulting bundle to a sled-agent for durable |
| 12 | +//! storage. The zip is written to a local file path. |
| 13 | +//! - Does not require Nexus to be up. It only needs CRDB, internal |
| 14 | +//! DNS, MGS, and the rack's sled-agents reachable on the underlay. |
| 15 | +//! |
| 16 | +//! This is intended for incident response, where the operator may need |
| 17 | +//! to collect a bundle precisely because Nexus is unhealthy. |
| 18 | +
|
| 19 | +use crate::Omdb; |
| 20 | +use crate::db::DbUrlOptions; |
| 21 | +use anyhow::Context; |
| 22 | +use camino::Utf8PathBuf; |
| 23 | +use camino_tempfile::tempdir_in; |
| 24 | +use clap::Args; |
| 25 | +use clap::Subcommand; |
| 26 | +use clap::ValueEnum; |
| 27 | +use nexus_db_queries::context::OpContext; |
| 28 | +use nexus_db_queries::db::DataStore; |
| 29 | +use nexus_types::fm::ereport::EreportFilters; |
| 30 | +use nexus_types::support_bundle::BundleDataSelection; |
| 31 | +use omicron_uuid_kinds::SupportBundleUuid; |
| 32 | +use std::io::Seek; |
| 33 | +use std::io::SeekFrom; |
| 34 | +use std::sync::Arc; |
| 35 | +use support_bundle_collection::BundleCollection; |
| 36 | +use support_bundle_collection::BundleInfo; |
| 37 | +use support_bundle_collection::zip::bundle_to_zipfile; |
| 38 | + |
| 39 | +/// Categories of data the bundle collector knows how to gather. |
| 40 | +/// |
| 41 | +/// Mirrors `nexus_types::support_bundle::BundleDataCategory`, but is |
| 42 | +/// declared here so it can derive `clap::ValueEnum` without making |
| 43 | +/// `nexus-types` depend on clap. |
| 44 | +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, ValueEnum)] |
| 45 | +enum BundleCategory { |
| 46 | + Reconfigurator, |
| 47 | + HostInfo, |
| 48 | + SledCubbyInfo, |
| 49 | + SpDumps, |
| 50 | + Ereports, |
| 51 | +} |
| 52 | + |
| 53 | +/// Arguments to the "omdb support-bundle" subcommand |
| 54 | +#[derive(Debug, Args)] |
| 55 | +pub struct SupportBundleArgs { |
| 56 | + #[command(subcommand)] |
| 57 | + command: SupportBundleCommands, |
| 58 | +} |
| 59 | + |
| 60 | +#[derive(Debug, Subcommand)] |
| 61 | +enum SupportBundleCommands { |
| 62 | + /// Collect a support bundle without involving Nexus. |
| 63 | + /// |
| 64 | + /// Connects directly to CockroachDB, internal DNS, MGS, and the |
| 65 | + /// rack's sled-agents — none of which depend on Nexus being up. |
| 66 | + /// The bundle is written to a local zip file. No row is created |
| 67 | + /// in the `support_bundle` table. |
| 68 | + Collect(CollectArgs), |
| 69 | +} |
| 70 | + |
| 71 | +#[derive(Debug, Args)] |
| 72 | +struct CollectArgs { |
| 73 | + #[command(flatten)] |
| 74 | + db_url_opts: DbUrlOptions, |
| 75 | + |
| 76 | + /// Path where the resulting bundle zip will be written. |
| 77 | + #[clap(long, short = 'o')] |
| 78 | + output: Utf8PathBuf, |
| 79 | + |
| 80 | + /// Reason recorded inside the bundle's metadata. |
| 81 | + #[clap(long, default_value = "collected via omdb")] |
| 82 | + reason: String, |
| 83 | + |
| 84 | + /// Directory to use for staging the bundle contents before zipping. |
| 85 | + #[clap(long, default_value = "/var/tmp")] |
| 86 | + tempdir: Utf8PathBuf, |
| 87 | + |
| 88 | + /// Categories of data to collect. May be supplied multiple times. |
| 89 | + /// Defaults to all categories. |
| 90 | + #[clap(long, value_enum)] |
| 91 | + include: Vec<BundleCategory>, |
| 92 | +} |
| 93 | + |
| 94 | +impl CollectArgs { |
| 95 | + fn data_selection(&self) -> BundleDataSelection { |
| 96 | + let categories: &[BundleCategory] = if self.include.is_empty() { |
| 97 | + BundleCategory::value_variants() |
| 98 | + } else { |
| 99 | + self.include.as_slice() |
| 100 | + }; |
| 101 | + |
| 102 | + let mut sel = BundleDataSelection::new(); |
| 103 | + for category in categories { |
| 104 | + sel = match category { |
| 105 | + BundleCategory::Reconfigurator => sel.with_reconfigurator(), |
| 106 | + BundleCategory::HostInfo => sel.with_all_sleds(), |
| 107 | + BundleCategory::SledCubbyInfo => sel.with_sled_cubby_info(), |
| 108 | + BundleCategory::SpDumps => sel.with_sp_dumps(), |
| 109 | + BundleCategory::Ereports => sel.with_ereports( |
| 110 | + EreportFilters::new() |
| 111 | + .with_start_time( |
| 112 | + omicron_common::now_db_precision() |
| 113 | + - chrono::Days::new(7), |
| 114 | + ) |
| 115 | + .expect("no end time set, cannot fail"), |
| 116 | + ), |
| 117 | + }; |
| 118 | + } |
| 119 | + sel |
| 120 | + } |
| 121 | +} |
| 122 | + |
| 123 | +impl SupportBundleArgs { |
| 124 | + pub async fn run_cmd( |
| 125 | + &self, |
| 126 | + omdb: &Omdb, |
| 127 | + log: &slog::Logger, |
| 128 | + ) -> anyhow::Result<()> { |
| 129 | + match &self.command { |
| 130 | + SupportBundleCommands::Collect(args) => args.run(omdb, log).await, |
| 131 | + } |
| 132 | + } |
| 133 | +} |
| 134 | + |
| 135 | +impl CollectArgs { |
| 136 | + async fn run(&self, omdb: &Omdb, log: &slog::Logger) -> anyhow::Result<()> { |
| 137 | + self.db_url_opts |
| 138 | + .with_datastore(omdb, log, async |opctx, datastore| { |
| 139 | + self.collect(omdb, log, opctx, datastore).await |
| 140 | + }) |
| 141 | + .await |
| 142 | + } |
| 143 | + |
| 144 | + async fn collect( |
| 145 | + &self, |
| 146 | + omdb: &Omdb, |
| 147 | + log: &slog::Logger, |
| 148 | + opctx: OpContext, |
| 149 | + datastore: Arc<DataStore>, |
| 150 | + ) -> anyhow::Result<()> { |
| 151 | + let resolver = omdb.dns_resolver(log.clone()).await?; |
| 152 | + |
| 153 | + let bundle = BundleInfo { |
| 154 | + id: SupportBundleUuid::new_v4(), |
| 155 | + reason_for_creation: self.reason.clone(), |
| 156 | + }; |
| 157 | + let bundle_log = log.new(slog::o!("bundle" => bundle.id.to_string())); |
| 158 | + eprintln!("Collecting support bundle {}", bundle.id); |
| 159 | + |
| 160 | + let collection = Arc::new(BundleCollection::new( |
| 161 | + datastore, |
| 162 | + resolver, |
| 163 | + bundle_log, |
| 164 | + opctx, |
| 165 | + self.data_selection(), |
| 166 | + bundle, |
| 167 | + )); |
| 168 | + |
| 169 | + // Wire Ctrl-C to cancel the in-flight collection. |
| 170 | + let cancel_handle = tokio::spawn({ |
| 171 | + let token = collection.cancellation_token().clone(); |
| 172 | + async move { |
| 173 | + let _ = tokio::signal::ctrl_c().await; |
| 174 | + eprintln!("\nCtrl-C received — cancelling bundle collection."); |
| 175 | + token.cancel(); |
| 176 | + } |
| 177 | + }); |
| 178 | + |
| 179 | + let dir = tempdir_in(&self.tempdir).with_context(|| { |
| 180 | + format!("creating temp dir under {}", self.tempdir) |
| 181 | + })?; |
| 182 | + let collect_result = collection.collect_bundle_locally(&dir).await; |
| 183 | + cancel_handle.abort(); |
| 184 | + let _ = cancel_handle.await; |
| 185 | + let report = collect_result?; |
| 186 | + |
| 187 | + let zip_tempdir = self.tempdir.clone(); |
| 188 | + let output = self.output.clone(); |
| 189 | + tokio::task::spawn_blocking(move || -> anyhow::Result<()> { |
| 190 | + let mut tempfile = bundle_to_zipfile(&dir, &zip_tempdir)?; |
| 191 | + tempfile.seek(SeekFrom::Start(0))?; |
| 192 | + let mut out = std::fs::File::create(&output) |
| 193 | + .with_context(|| format!("creating {output}"))?; |
| 194 | + std::io::copy(&mut tempfile, &mut out)?; |
| 195 | + Ok(()) |
| 196 | + }) |
| 197 | + .await |
| 198 | + .context("zip task panicked")??; |
| 199 | + |
| 200 | + eprintln!("Wrote bundle to {}", self.output); |
| 201 | + eprintln!("{} steps executed:", report.steps.len()); |
| 202 | + for step in &report.steps { |
| 203 | + let dur = step.end - step.start; |
| 204 | + eprintln!( |
| 205 | + " {:>9}ms {:?} {}", |
| 206 | + dur.num_milliseconds(), |
| 207 | + step.status, |
| 208 | + step.name, |
| 209 | + ); |
| 210 | + } |
| 211 | + if let Some(ereports) = &report.ereports { |
| 212 | + eprintln!( |
| 213 | + "ereports: {} found, {} collected, {} errors", |
| 214 | + ereports.n_found, |
| 215 | + ereports.n_collected, |
| 216 | + ereports.errors.len(), |
| 217 | + ); |
| 218 | + } |
| 219 | + Ok(()) |
| 220 | + } |
| 221 | +} |
0 commit comments