Skip to content

Commit 185b7c6

Browse files
authored
DebugCollector: first class file archival (#9555)
1 parent 4d5a14f commit 185b7c6

12 files changed

Lines changed: 2265 additions & 218 deletions

File tree

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

sled-agent/config-reconciler/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ ntp-admin-client.workspace = true
3030
omicron-common.workspace = true
3131
omicron-uuid-kinds.workspace = true
3232
rand.workspace = true
33+
regex.workspace = true
3334
serde.workspace = true
3435
sha2.workspace = true
3536
sled-agent-api.workspace = true
@@ -38,6 +39,7 @@ sled-hardware.workspace = true
3839
sled-storage.workspace = true
3940
slog.workspace = true
4041
slog-error-chain.workspace = true
42+
strum.workspace = true
4143
thiserror.workspace = true
4244
tokio.workspace = true
4345
tufaceous-artifact.workspace = true
Lines changed: 273 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,273 @@
1+
// This Source Code Form is subject to the terms of the Mozilla Public
2+
// License, v. 2.0. If a copy of the MPL was not distributed with this
3+
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
4+
5+
//! Execution of file archival
6+
//!
7+
//! As much as possible, behavior should **not** live here, but in the planning
8+
//! module instead so that it can be tested without touching the filesystem.
9+
10+
use super::filesystem::FileLister;
11+
use super::planning::ArchiveStep;
12+
use anyhow::Context;
13+
use camino::Utf8Path;
14+
use slog::debug;
15+
16+
pub(crate) async fn execute_archive_step<'a>(
17+
log: &slog::Logger,
18+
step: ArchiveStep<'a>,
19+
lister: &'a (dyn FileLister + Send + Sync),
20+
) -> Result<(), anyhow::Error> {
21+
match step {
22+
ArchiveStep::Mkdir { output_directory } => {
23+
// We assume that the parent of all output directories
24+
// already exists. That's because in practice it should be
25+
// true: all of the output directories are one level below
26+
// the debug dataset itself. (The test suite verifies
27+
// this.) So if we find at runtime that this isn't true,
28+
// that's a bad sign. Maybe somebody has unmounted the
29+
// debug dataset and deleted its mountpoint? We don't want
30+
// to start spewing stuff to the wrong place. That's why we
31+
// don't use create_dir_all() here.
32+
debug!(
33+
log,
34+
"create directory";
35+
"directory" => %output_directory
36+
);
37+
tokio::fs::create_dir(&output_directory)
38+
.await
39+
.or_else(|error| {
40+
if error.kind() == std::io::ErrorKind::AlreadyExists {
41+
Ok(())
42+
} else {
43+
Err(error)
44+
}
45+
})
46+
.with_context(|| format!("mkdir {output_directory:?}"))
47+
}
48+
ArchiveStep::ArchiveFile(archive_file) => {
49+
match archive_file.choose_filename(lister) {
50+
Err(error) => Err(error),
51+
Ok(output_filename) => {
52+
let input_path = &archive_file.input_path;
53+
let output_path = archive_file
54+
.output_directory
55+
.join(output_filename.as_ref());
56+
debug!(
57+
log,
58+
"archive file";
59+
"input_path" => %input_path,
60+
"output_path" => %output_path,
61+
"delete_original" =>
62+
archive_file.delete_original,
63+
);
64+
archive_one(
65+
&input_path,
66+
&output_path,
67+
archive_file.delete_original,
68+
)
69+
.await
70+
.with_context(|| {
71+
format!("archive {input_path:?} to {output_path:?}")
72+
})
73+
}
74+
}
75+
}
76+
}
77+
}
78+
79+
async fn archive_one(
80+
source: &Utf8Path,
81+
dest: &Utf8Path,
82+
delete_original: bool,
83+
) -> tokio::io::Result<()> {
84+
let mut dest_f = tokio::fs::File::create(&dest).await?;
85+
let mut src_f = tokio::fs::File::open(&source).await?;
86+
87+
tokio::io::copy(&mut src_f, &mut dest_f).await?;
88+
89+
dest_f.sync_all().await?;
90+
if let Some(parent) = dest.parent() {
91+
let file = tokio::fs::File::open(&parent).await?;
92+
file.sync_all().await?;
93+
}
94+
95+
drop(src_f);
96+
drop(dest_f);
97+
98+
if delete_original {
99+
tokio::fs::remove_file(source).await?;
100+
}
101+
102+
Ok(())
103+
}
104+
105+
#[cfg(test)]
106+
mod test {
107+
use crate::debug_collector::file_archiver;
108+
use anyhow::Context;
109+
use camino::Utf8Path;
110+
use camino_tempfile::Utf8TempDir;
111+
use chrono::DateTime;
112+
use chrono::Utc;
113+
use file_archiver::planning::ArchiveKind;
114+
use file_archiver::planning::ArchivePlanner;
115+
use omicron_test_utils::dev::test_setup_log;
116+
use slog::info;
117+
118+
#[tokio::test]
119+
async fn test_real_archival() {
120+
// Set up the test.
121+
let logctx = test_setup_log("test_archiving_basic");
122+
let log = &logctx.log;
123+
124+
// Create a temporary directory in which to store some output files.
125+
let tempdir = Utf8TempDir::new().unwrap();
126+
info!(log, "temporary directory"; "tempdir" => %tempdir.path());
127+
128+
// Populate it with a couple of files.
129+
//
130+
// Note that all of the interesting cases around generating archive
131+
// steps are covered elsewhere. We really only need to smoke check
132+
// basic behavior here.
133+
let outdir = tempdir.path().join("out");
134+
let zone_name = "an-example-zone";
135+
let zone_root = tempdir.path().join(zone_name);
136+
let logdir = zone_root.join("var/svc/log");
137+
let file1_live = logdir.join("svc1.log");
138+
let file2_rotated = logdir.join("svc1.log.0");
139+
let file3_rotated = logdir.join("svc2.log.0");
140+
let coredir = tempdir.path().join("crash");
141+
let file4_core = coredir.join("core.123");
142+
143+
let populate_input = |contents: &str| {
144+
std::fs::create_dir_all(&logdir).unwrap();
145+
std::fs::create_dir_all(&coredir).unwrap();
146+
for file in
147+
[&file1_live, &file2_rotated, &file3_rotated, &file4_core]
148+
{
149+
let contents =
150+
format!("{}-{contents}", file.file_name().unwrap());
151+
std::fs::write(&file, contents).unwrap();
152+
}
153+
};
154+
155+
populate_input("first");
156+
157+
// Compute the expected filenames. These depend on the mtimes that the
158+
// files wound up with.
159+
let expected_filename = |base: &str, input: &Utf8Path| {
160+
let found_mtime = input.metadata().unwrap().modified().unwrap();
161+
let mtime: DateTime<Utc> = DateTime::from(found_mtime);
162+
format!("{base}{}", mtime.timestamp())
163+
};
164+
let file1_expected = expected_filename("svc1.", &file1_live);
165+
let file2_expected = expected_filename("svc1.log.", &file2_rotated);
166+
let file3_expected = expected_filename("svc2.log.", &file3_rotated);
167+
168+
// Run a complete archive.
169+
std::fs::create_dir(&outdir).unwrap();
170+
let mut planner = ArchivePlanner::new(log, ArchiveKind::Final, &outdir);
171+
planner.include_cores_directory(&coredir);
172+
planner.include_zone(zone_name, &zone_root);
173+
let () = planner.execute().await.expect("successful execution");
174+
175+
// Check each of the output log files. This is a little annoying
176+
// because we don't necessarily know what names they were given, since
177+
// it depends on the mtime on the input file.
178+
let verify_logs = |unchanged| {
179+
for (input_path, expected_filename, deleted_original) in [
180+
(&file1_live, &file1_expected, false),
181+
(&file2_rotated, &file2_expected, true),
182+
(&file3_rotated, &file3_expected, true),
183+
] {
184+
let expected_path =
185+
outdir.join(zone_name).join(expected_filename);
186+
let contents = std::fs::read_to_string(&expected_path)
187+
.with_context(|| {
188+
format!("read expected output file {expected_path:?}")
189+
})
190+
.unwrap();
191+
assert!(contents.starts_with(input_path.file_name().unwrap()));
192+
assert!(contents.ends_with("-first"));
193+
194+
if deleted_original {
195+
// Check that the original file is gone.
196+
assert!(!input_path.exists());
197+
} else {
198+
// The input file should exist. It may or may not match
199+
// what it originally did, depending on what the caller
200+
// says.
201+
let input_contents = std::fs::read_to_string(&input_path)
202+
.with_context(|| {
203+
format!("read expected intput file {input_path:?}")
204+
})
205+
.unwrap();
206+
if unchanged {
207+
assert_eq!(contents, input_contents);
208+
}
209+
}
210+
}
211+
};
212+
213+
verify_logs(true);
214+
215+
// Check the output core file, too.
216+
let file4_output = outdir.join("core.123");
217+
let contents = std::fs::read_to_string(&file4_output)
218+
.with_context(|| {
219+
format!("read expected output file {file4_output:?}")
220+
})
221+
.unwrap();
222+
assert_eq!(contents, "core.123-first");
223+
assert!(!file4_core.exists());
224+
225+
// Now, check the behavior for file collisions.
226+
//
227+
// First, re-populate the input tree, but with new data so that we can
228+
// tell when things have been clobbered.
229+
populate_input("second");
230+
231+
// Run another archive.
232+
let mut planner = ArchivePlanner::new(log, ArchiveKind::Final, &outdir);
233+
planner.include_cores_directory(&coredir);
234+
planner.include_zone(zone_name, &zone_root);
235+
let () = planner.execute().await.expect("successful execution");
236+
237+
// The previously archived log file should still exist, still have the
238+
// same (original) contents, and the input files should be gone again.
239+
verify_logs(false);
240+
241+
// There should now be new versions of the three log files that contain
242+
// the new contents.
243+
for result in outdir.join(zone_name).read_dir_utf8().unwrap() {
244+
let entry = result.unwrap();
245+
let contents = std::fs::read_to_string(&entry.path())
246+
.with_context(|| {
247+
format!("read expected intput file {:?}", entry.path())
248+
})
249+
.unwrap();
250+
251+
if entry.file_name() == &file1_expected
252+
|| entry.file_name() == &file2_expected
253+
|| entry.file_name() == &file3_expected
254+
{
255+
assert!(contents.ends_with("-first"));
256+
} else {
257+
assert!(contents.ends_with("-second"));
258+
}
259+
}
260+
261+
// The core file should have been completely overwritten with new
262+
// contents.
263+
assert!(!file4_core.exists());
264+
let contents = std::fs::read_to_string(&file4_output)
265+
.with_context(|| {
266+
format!("read expected output file {file4_output:?}")
267+
})
268+
.unwrap();
269+
assert_eq!(contents, "core.123-second");
270+
271+
logctx.cleanup_successful();
272+
}
273+
}

0 commit comments

Comments
 (0)