|
| 1 | +// This Source Code Form is subject to the terms of the Mozilla Public |
| 2 | +// License, v. 2.0. If a copy of the MPL was not distributed with this |
| 3 | +// file, You can obtain one at https://mozilla.org/MPL/2.0/. |
| 4 | + |
| 5 | +//! `omdb support-bundle collect` — collect a support bundle locally, |
| 6 | +//! without going through Nexus. |
| 7 | +//! |
| 8 | +//! Unlike the Nexus background task, this path: |
| 9 | +//! |
| 10 | +//! - Does not register a row in the `support_bundle` table. |
| 11 | +//! - Does not transfer the resulting bundle to a sled-agent for durable |
| 12 | +//! storage. The zip is written to a local file path. |
| 13 | +//! - Does not require Nexus to be up. It only needs CRDB, internal |
| 14 | +//! DNS, MGS, and the rack's sled-agents reachable on the underlay. |
| 15 | +//! |
| 16 | +//! This is intended for incident response, where the operator may need |
| 17 | +//! to collect a bundle precisely because Nexus is unhealthy. |
| 18 | +
|
| 19 | +use crate::Omdb; |
| 20 | +use crate::db::DbUrlOptions; |
| 21 | +use anyhow::Context; |
| 22 | +use camino::Utf8PathBuf; |
| 23 | +use camino_tempfile::tempdir_in; |
| 24 | +use clap::Args; |
| 25 | +use clap::Subcommand; |
| 26 | +use nexus_db_queries::context::OpContext; |
| 27 | +use nexus_db_queries::db::DataStore; |
| 28 | +use nexus_types::support_bundle::BundleDataSelection; |
| 29 | +use omicron_uuid_kinds::SupportBundleUuid; |
| 30 | +use std::io::Seek; |
| 31 | +use std::io::SeekFrom; |
| 32 | +use std::sync::Arc; |
| 33 | +use support_bundle_collection::BundleCollection; |
| 34 | +use support_bundle_collection::BundleInfo; |
| 35 | +use support_bundle_collection::zip::bundle_to_zipfile; |
| 36 | + |
| 37 | +/// Arguments to the "omdb support-bundle" subcommand |
| 38 | +#[derive(Debug, Args)] |
| 39 | +pub struct SupportBundleArgs { |
| 40 | + #[command(subcommand)] |
| 41 | + command: SupportBundleCommands, |
| 42 | +} |
| 43 | + |
| 44 | +#[derive(Debug, Subcommand)] |
| 45 | +enum SupportBundleCommands { |
| 46 | + /// Collect a support bundle without involving Nexus. |
| 47 | + /// |
| 48 | + /// Connects directly to CockroachDB, internal DNS, MGS, and the |
| 49 | + /// rack's sled-agents — none of which depend on Nexus being up. |
| 50 | + /// The bundle is written to a local zip file. No row is created |
| 51 | + /// in the `support_bundle` table. |
| 52 | + Collect(CollectArgs), |
| 53 | +} |
| 54 | + |
| 55 | +#[derive(Debug, Args)] |
| 56 | +struct CollectArgs { |
| 57 | + #[command(flatten)] |
| 58 | + db_url_opts: DbUrlOptions, |
| 59 | + |
| 60 | + /// Path where the resulting bundle zip will be written. |
| 61 | + #[clap(long, short = 'o')] |
| 62 | + output: Utf8PathBuf, |
| 63 | + |
| 64 | + /// Reason recorded inside the bundle's metadata. |
| 65 | + #[clap(long, default_value = "collected via omdb")] |
| 66 | + reason: String, |
| 67 | + |
| 68 | + /// Directory to use for staging the bundle contents before zipping. |
| 69 | + #[clap(long, default_value = "/var/tmp")] |
| 70 | + tempdir: Utf8PathBuf, |
| 71 | +} |
| 72 | + |
| 73 | +impl SupportBundleArgs { |
| 74 | + pub async fn run_cmd( |
| 75 | + &self, |
| 76 | + omdb: &Omdb, |
| 77 | + log: &slog::Logger, |
| 78 | + ) -> anyhow::Result<()> { |
| 79 | + match &self.command { |
| 80 | + SupportBundleCommands::Collect(args) => args.run(omdb, log).await, |
| 81 | + } |
| 82 | + } |
| 83 | +} |
| 84 | + |
| 85 | +impl CollectArgs { |
| 86 | + async fn run(&self, omdb: &Omdb, log: &slog::Logger) -> anyhow::Result<()> { |
| 87 | + self.db_url_opts |
| 88 | + .with_datastore(omdb, log, async |opctx, datastore| { |
| 89 | + self.collect(omdb, log, opctx, datastore).await |
| 90 | + }) |
| 91 | + .await |
| 92 | + } |
| 93 | + |
| 94 | + async fn collect( |
| 95 | + &self, |
| 96 | + omdb: &Omdb, |
| 97 | + log: &slog::Logger, |
| 98 | + opctx: OpContext, |
| 99 | + datastore: Arc<DataStore>, |
| 100 | + ) -> anyhow::Result<()> { |
| 101 | + let resolver = omdb.dns_resolver(log.clone()).await?; |
| 102 | + |
| 103 | + let bundle = BundleInfo { |
| 104 | + id: SupportBundleUuid::new_v4(), |
| 105 | + reason_for_creation: self.reason.clone(), |
| 106 | + }; |
| 107 | + let bundle_log = log.new(slog::o!("bundle" => bundle.id.to_string())); |
| 108 | + eprintln!("Collecting support bundle {}", bundle.id); |
| 109 | + |
| 110 | + let collection = Arc::new(BundleCollection::new( |
| 111 | + datastore, |
| 112 | + resolver, |
| 113 | + bundle_log, |
| 114 | + opctx, |
| 115 | + BundleDataSelection::all(), |
| 116 | + bundle, |
| 117 | + )); |
| 118 | + |
| 119 | + // Wire Ctrl-C to cancel the in-flight collection. |
| 120 | + let cancel_handle = tokio::spawn({ |
| 121 | + let token = collection.cancellation_token().clone(); |
| 122 | + async move { |
| 123 | + let _ = tokio::signal::ctrl_c().await; |
| 124 | + eprintln!("\nCtrl-C received — cancelling bundle collection."); |
| 125 | + token.cancel(); |
| 126 | + } |
| 127 | + }); |
| 128 | + |
| 129 | + let dir = tempdir_in(&self.tempdir).with_context(|| { |
| 130 | + format!("creating temp dir under {}", self.tempdir) |
| 131 | + })?; |
| 132 | + let collect_result = collection.collect_bundle_locally(&dir).await; |
| 133 | + cancel_handle.abort(); |
| 134 | + let _ = cancel_handle.await; |
| 135 | + let report = collect_result?; |
| 136 | + |
| 137 | + let zip_tempdir = self.tempdir.clone(); |
| 138 | + let output = self.output.clone(); |
| 139 | + tokio::task::spawn_blocking(move || -> anyhow::Result<()> { |
| 140 | + let mut tempfile = bundle_to_zipfile(&dir, &zip_tempdir)?; |
| 141 | + tempfile.seek(SeekFrom::Start(0))?; |
| 142 | + let mut out = std::fs::File::create(&output) |
| 143 | + .with_context(|| format!("creating {output}"))?; |
| 144 | + std::io::copy(&mut tempfile, &mut out)?; |
| 145 | + Ok(()) |
| 146 | + }) |
| 147 | + .await |
| 148 | + .context("zip task panicked")??; |
| 149 | + |
| 150 | + eprintln!("Wrote bundle to {}", self.output); |
| 151 | + eprintln!("{} steps executed:", report.steps.len()); |
| 152 | + for step in &report.steps { |
| 153 | + let dur = step.end - step.start; |
| 154 | + eprintln!( |
| 155 | + " {:>9}ms {:?} {}", |
| 156 | + dur.num_milliseconds(), |
| 157 | + step.status, |
| 158 | + step.name, |
| 159 | + ); |
| 160 | + } |
| 161 | + if let Some(ereports) = &report.ereports { |
| 162 | + eprintln!( |
| 163 | + "ereports: {} found, {} collected, {} errors", |
| 164 | + ereports.n_found, |
| 165 | + ereports.n_collected, |
| 166 | + ereports.errors.len(), |
| 167 | + ); |
| 168 | + } |
| 169 | + Ok(()) |
| 170 | + } |
| 171 | +} |
0 commit comments