Skip to content

Commit c12feb7

Browse files
authored
Implement TqSecretRetriever for Trust Quorum secret retrieval (#9649)
- Implements `TqSecretRetriever` analogous to `LrtqSecretRetriever`, but using Trust Quorum's `NodeTaskHandle` instead of bootstore's `NodeHandle`. - Builds atop this to implement `TqOrLrtqSecretRetriever` which dynamically switches between them based on rack state. - Switches the `SecretRetriever` trait to use `&mut self`, which eliminates the need for interior mutability, but which complicates the previous use of a `OnceLock` to do late-binding of the secret retriever (needed because of order of initialization during boot). - Eliminates that global `OnceLock` and instead threads the late-binding of secret retriever through long running task handles to achieve the same effect. This closes #9586.
1 parent 5daa89e commit c12feb7

16 files changed

Lines changed: 557 additions & 227 deletions

File tree

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

key-manager/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,3 @@ thiserror.workspace = true
1818
tokio.workspace = true
1919
zeroize.workspace = true
2020
omicron-workspace-hack.workspace = true
21-

key-manager/src/lib.rs

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -306,18 +306,23 @@ pub enum SecretRetrieverError {
306306

307307
#[error("Bootstore error: {0}")]
308308
Bootstore(String),
309+
310+
#[error("Trust quorum error: {0}")]
311+
TrustQuorum(String),
309312
}
310313

311314
/// A mechanism for retrieving a secrets to use as input key material to HKDF-
312315
/// Extract.
313316
#[async_trait]
314-
pub trait SecretRetriever {
317+
pub trait SecretRetriever: Send + Sync + 'static {
315318
/// Return the latest secret
316319
////
317320
/// This is useful when a new entity is being encrypted and there is no need
318321
/// for a reconfiguration. When an entity is already encrypted, and needs to
319322
/// be decrypted, the user should instead call the [`SecretRetriever::get`].
320-
async fn get_latest(&self) -> Result<VersionedIkm, SecretRetrieverError>;
323+
async fn get_latest(
324+
&mut self,
325+
) -> Result<VersionedIkm, SecretRetrieverError>;
321326

322327
/// Get the secret for the given epoch
323328
///
@@ -331,7 +336,7 @@ pub trait SecretRetriever {
331336
/// Return an error if its not possible to recover the old secret given the
332337
/// latest secret.
333338
async fn get(
334-
&self,
339+
&mut self,
335340
epoch: u64,
336341
) -> Result<SecretState, SecretRetrieverError>;
337342
}
@@ -363,15 +368,15 @@ mod tests {
363368
#[async_trait]
364369
impl SecretRetriever for TestSecretRetriever {
365370
async fn get_latest(
366-
&self,
371+
&mut self,
367372
) -> Result<VersionedIkm, SecretRetrieverError> {
368373
let salt = [0u8; 32];
369374
let (epoch, bytes) = self.ikms.last_key_value().unwrap();
370375
Ok(VersionedIkm::new(*epoch, salt, bytes))
371376
}
372377

373378
async fn get(
374-
&self,
379+
&mut self,
375380
epoch: u64,
376381
) -> Result<SecretState, SecretRetrieverError> {
377382
let salt = [0u8; 32];

sled-agent/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ sled-hardware.workspace = true
9797
sled-hardware-types.workspace = true
9898
sled-storage.workspace = true
9999
sp-sim.workspace = true
100+
secrecy.workspace = true
100101
slog.workspace = true
101102
slog-async.workspace = true
102103
slog-dtrace.workspace = true

sled-agent/config-reconciler/src/dataset_serialization_task.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2356,7 +2356,7 @@ mod illumos_tests {
23562356
#[async_trait::async_trait]
23572357
impl SecretRetriever for HardcodedSecretRetriever {
23582358
async fn get_latest(
2359-
&self,
2359+
&mut self,
23602360
) -> Result<key_manager::VersionedIkm, SecretRetrieverError> {
23612361
let epoch = 0;
23622362
let salt = [0u8; 32];
@@ -2366,7 +2366,7 @@ mod illumos_tests {
23662366
}
23672367

23682368
async fn get(
2369-
&self,
2369+
&mut self,
23702370
epoch: u64,
23712371
) -> Result<key_manager::SecretState, SecretRetrieverError> {
23722372
if epoch != 0 {

sled-agent/src/bootstrap/secret_retriever.rs

Lines changed: 0 additions & 204 deletions
This file was deleted.
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
// This Source Code Form is subject to the terms of the Mozilla Public
2+
// License, v. 2.0. If a copy of the MPL was not distributed with this
3+
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
4+
5+
//! A secret retriever that waits to be configured before use.
6+
//!
7+
//! Created early in boot (before we know which retriever type to use),
8+
//! configured later once we have the sled agent request.
9+
10+
use async_trait::async_trait;
11+
use key_manager::{
12+
SecretRetriever, SecretRetrieverError, SecretState, VersionedIkm,
13+
};
14+
use std::sync::{Arc, Mutex};
15+
use tokio::sync::oneshot;
16+
17+
/// A secret retriever that waits to be configured before use.
18+
///
19+
/// Created early in boot (before we know which retriever type to use),
20+
/// configured later once we have the sled agent request.
21+
pub struct ConfigurableSecretRetriever {
22+
inner: Option<Box<dyn SecretRetriever>>,
23+
config_rx: Option<oneshot::Receiver<Box<dyn SecretRetriever>>>,
24+
}
25+
26+
/// Handle to configure a [`ConfigurableSecretRetriever`].
27+
///
28+
/// Cloneable, but `init` can only succeed once (panics on second call).
29+
#[derive(Clone)]
30+
pub struct ConfigurableSecretRetrieverHandle {
31+
#[allow(clippy::type_complexity)]
32+
tx: Arc<Mutex<Option<oneshot::Sender<Box<dyn SecretRetriever>>>>>,
33+
}
34+
35+
impl ConfigurableSecretRetriever {
36+
pub fn new() -> (Self, ConfigurableSecretRetrieverHandle) {
37+
let (tx, rx) = oneshot::channel();
38+
(
39+
Self { inner: None, config_rx: Some(rx) },
40+
ConfigurableSecretRetrieverHandle {
41+
tx: Arc::new(Mutex::new(Some(tx))),
42+
},
43+
)
44+
}
45+
}
46+
47+
impl ConfigurableSecretRetrieverHandle {
48+
/// Configure the pending retriever with the actual implementation.
49+
///
50+
/// Panics if called twice or if the corresponding
51+
/// [`ConfigurableSecretRetriever`] was dropped.
52+
pub fn init(&self, retriever: impl SecretRetriever) {
53+
self.tx
54+
.lock()
55+
.unwrap()
56+
.take()
57+
.expect("PendingSecretRetriever already configured")
58+
.send(Box::new(retriever))
59+
.unwrap_or_else(|_| {
60+
panic!("PendingSecretRetriever dropped before configure")
61+
});
62+
}
63+
}
64+
65+
#[async_trait]
66+
impl SecretRetriever for ConfigurableSecretRetriever {
67+
async fn get_latest(
68+
&mut self,
69+
) -> Result<VersionedIkm, SecretRetrieverError> {
70+
self.ensure_configured().await?;
71+
self.inner.as_mut().unwrap().get_latest().await
72+
}
73+
74+
async fn get(
75+
&mut self,
76+
epoch: u64,
77+
) -> Result<SecretState, SecretRetrieverError> {
78+
self.ensure_configured().await?;
79+
self.inner.as_mut().unwrap().get(epoch).await
80+
}
81+
}
82+
83+
impl ConfigurableSecretRetriever {
84+
async fn ensure_configured(&mut self) -> Result<(), SecretRetrieverError> {
85+
if self.inner.is_none() {
86+
let rx = self
87+
.config_rx
88+
.take()
89+
.ok_or(SecretRetrieverError::RackNotInitialized)?;
90+
self.inner = Some(
91+
rx.await
92+
.map_err(|_| SecretRetrieverError::RackNotInitialized)?,
93+
);
94+
}
95+
Ok(())
96+
}
97+
}

0 commit comments

Comments
 (0)