Skip to content

Commit fe47987

Browse files
mvp vm attestation (#1091)
Co-authored-by: iximeow <iximeow@oxide.computer>
1 parent 7e5ed24 commit fe47987

File tree

20 files changed

+2184
-268
lines changed

20 files changed

+2184
-268
lines changed

Cargo.lock

Lines changed: 1414 additions & 243 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,10 @@ sled-agent-client = { git = "https://github.com/oxidecomputer/omicron", branch =
9595
crucible = { git = "https://github.com/oxidecomputer/crucible", rev = "ae1da83e66c648574827298f4bc444632bf4d047" }
9696
crucible-client-types = { git = "https://github.com/oxidecomputer/crucible", rev = "ae1da83e66c648574827298f4bc444632bf4d047" }
9797

98+
# Attestation
99+
dice-verifier = { git = "https://github.com/oxidecomputer/dice-util", rev = "1d3084b514389847e8e0f5d966d2be4f18d02d32", features = ["sled-agent"] }
100+
vm-attest = { git = "https://github.com/oxidecomputer/vm-attest", rev = "2cdd17580a4fc6c871d24797016af8dbaac9421d", default-features = false }
101+
98102
# External dependencies
99103
anyhow = "1.0"
100104
async-trait = "0.1.88"
@@ -163,6 +167,7 @@ serde_arrays = "0.1"
163167
serde_derive = "1.0"
164168
serde_json = "1.0"
165169
serde_test = "1.0.138"
170+
sha2 = "0.10.9"
166171
slog = "2.7"
167172
slog-async = "2.8"
168173
slog-bunyan = "2.4.0"

bin/propolis-server/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ rgb_frame.workspace = true
6969
rfb = { workspace = true, features = ["tungstenite"] }
7070
uuid.workspace = true
7171
usdt.workspace = true
72+
vm-attest.workspace = true
7273
base64.workspace = true
7374
schemars = { workspace = true, features = ["chrono", "uuid1"] }
7475

bin/propolis-server/src/lib/initializer.rs

Lines changed: 127 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44

55
use std::convert::TryInto;
66
use std::fs::File;
7-
use std::net::{IpAddr, Ipv4Addr, SocketAddr};
87
use std::num::{NonZeroU8, NonZeroUsize};
98
use std::os::unix::fs::FileTypeExt;
109
use std::sync::Arc;
@@ -25,6 +24,9 @@ use crucible_client_types::VolumeConstructionRequest;
2524
pub use nexus_client::Client as NexusClient;
2625
use oximeter::types::ProducerRegistry;
2726
use oximeter_instruments::kstat::KstatSampler;
27+
use propolis::attestation;
28+
use propolis::attestation::server::AttestationServerConfig;
29+
use propolis::attestation::server::AttestationSock;
2830
use propolis::block;
2931
use propolis::chardev::{self, BlockingSource, Source};
3032
use propolis::common::{Lifecycle, GB, MB, PAGE_SIZE};
@@ -96,6 +98,12 @@ pub enum MachineInitError {
9698
#[error("boot order entry {0:?} does not refer to an attached disk")]
9799
BootOrderEntryWithoutDevice(SpecKey),
98100

101+
#[error(
102+
"disk device {device_id:?} refers to a \
103+
non-existent block backend {backend_id:?}"
104+
)]
105+
DeviceWithoutBlockBackend { device_id: SpecKey, backend_id: SpecKey },
106+
99107
#[error("boot entry {0:?} refers to a device on non-zero PCI bus {1}")]
100108
BootDeviceOnDownstreamPciBus(SpecKey, u8),
101109

@@ -105,6 +113,9 @@ pub enum MachineInitError {
105113
#[error("failed to specialize CPUID for vcpu {0}")]
106114
CpuidSpecializationFailed(i32, #[source] propolis::cpuid::SpecializeError),
107115

116+
#[error("failed to start attestation server")]
117+
AttestationServer(#[source] std::io::Error),
118+
108119
#[cfg(feature = "falcon")]
109120
#[error("softnpu p9 device missing")]
110121
SoftNpuP9Missing,
@@ -478,31 +489,25 @@ impl MachineInitializer<'_> {
478489
Ok(())
479490
}
480491

481-
pub fn initialize_vsock(
492+
pub async fn initialize_vsock(
482493
&mut self,
483494
chipset: &RegisteredChipset,
484-
) -> Result<(), MachineInitError> {
495+
attest_cfg: Option<AttestationServerConfig>,
496+
) -> Result<Option<AttestationSock>, MachineInitError> {
485497
use propolis::vsock::proxy::VsockPortMapping;
486498

487-
// OANA Port 605 - VM Attestation RFD 605
488-
const ATTESTATION_PORT: u16 = 605;
489-
const ATTESTATION_ADDR: SocketAddr = SocketAddr::new(
490-
IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)),
491-
ATTESTATION_PORT,
492-
);
493-
494499
if let Some(vsock) = &self.spec.vsock {
495500
let bdf: pci::Bdf = vsock.spec.pci_path.into();
496501

497502
let mappings = vec![VsockPortMapping::new(
498-
ATTESTATION_PORT.into(),
499-
ATTESTATION_ADDR,
503+
attestation::ATTESTATION_PORT.into(),
504+
attestation::ATTESTATION_ADDR,
500505
)];
501506

502507
let guest_cid = GuestCid::try_from(vsock.spec.guest_cid)
503-
.context("guest cid")?;
508+
.context("could not parse guest cid")?;
504509
// While the spec does not recommend how large the virtio descriptor
505-
// table should be we sized this appropriately in testing so
510+
// table should be, we sized this appropriately in testing, so
506511
// that the guest is able to move vsock packets at a reasonable
507512
// throughput without the need to be much larger.
508513
let num_queues = 256;
@@ -516,9 +521,23 @@ impl MachineInitializer<'_> {
516521

517522
self.devices.insert(vsock.id.clone(), device.clone());
518523
chipset.pci_attach(bdf, device);
524+
525+
// Spawn attestation server that will go over the vsock device
526+
if let Some(cfg) = attest_cfg {
527+
let attest = AttestationSock::new(
528+
self.log.new(slog::o!("component" => "attestation-server")),
529+
cfg.sled_agent_addr,
530+
)
531+
.await
532+
.map_err(MachineInitError::AttestationServer)?;
533+
return Ok(Some(attest));
534+
}
535+
} else {
536+
info!(self.log, "no vsock device in instance spec");
537+
return Ok(None);
519538
}
520539

521-
Ok(())
540+
Ok(None)
522541
}
523542

524543
async fn create_storage_backend_from_spec(
@@ -672,6 +691,99 @@ impl MachineInitializer<'_> {
672691
}
673692
}
674693

694+
/// Collect the necessary information out of the VM under construction into
695+
/// the provided `AttestationSocketInit`. This is expected to populate
696+
/// `attest_init` with information so the caller can spawn off
697+
/// `AttestationSockInit::run`.
698+
pub fn prepare_rot_initializer(
699+
&self,
700+
vm_rot: &mut AttestationSock,
701+
) -> Result<(), MachineInitError> {
702+
let uuid = self.properties.id;
703+
704+
// The first boot entry is a key into `self.spec.disks`, which is how
705+
// we'll get to a Crucible volume backing this boot option.
706+
let boot_disk_entry =
707+
self.spec.boot_settings.as_ref().and_then(|settings| {
708+
if settings.order.len() >= 2 {
709+
// In a rack we only configure propolis-server with zero or
710+
// one boot disks. It's possible to provide a fuller list,
711+
// and in the future the product may actually expose such a
712+
// capability. At that time, we'll need to have a reckoning
713+
// for what "boot disk measurement" from the RoT actually
714+
// means; it probably "should" be "the measurement of the
715+
// disk that EDK2 decided to boot into", but that
716+
// communication to and from the guest is a little more
717+
// complicated than we want or need to build out today.
718+
//
719+
// Since as the system exists we either have no specific
720+
// boot disk (and don't know where the guest is expected to
721+
// end up), or one boot disk (and can determine which disk
722+
// to collect a measurement of before even running guest
723+
// firmware), we encode this expectation up front. If the
724+
// product has changed such that this assert is reached,
725+
// "that's exciting!" and "sorry for crashing your
726+
// Propolis".
727+
panic!(
728+
"Unsupported VM RoT configuration: \
729+
more than one boot disk"
730+
);
731+
}
732+
733+
settings.order.first()
734+
});
735+
736+
let crucible_volume = if let Some(entry) = boot_disk_entry {
737+
let disk_dev =
738+
self.spec.disks.get(&entry.device_id).ok_or_else(|| {
739+
MachineInitError::BootOrderEntryWithoutDevice(
740+
entry.device_id.clone(),
741+
)
742+
})?;
743+
744+
let backend_id = match &disk_dev.device_spec {
745+
spec::StorageDevice::Virtio(disk) => &disk.backend_id,
746+
spec::StorageDevice::Nvme(disk) => &disk.backend_id,
747+
};
748+
749+
let Some(block_backend) = self.block_backends.get(backend_id)
750+
else {
751+
return Err(MachineInitError::DeviceWithoutBlockBackend {
752+
device_id: entry.device_id.to_owned(),
753+
backend_id: backend_id.to_owned(),
754+
});
755+
};
756+
757+
if let Some(backend) =
758+
block_backend.as_any().downcast_ref::<block::CrucibleBackend>()
759+
{
760+
if backend.is_read_only() {
761+
Some(backend.clone_volume())
762+
} else {
763+
// Disk must be read-only to be used for attestation.
764+
slog::info!(
765+
self.log,
766+
"boot disk is not read-only (and will not be used for attestations)",
767+
);
768+
None
769+
}
770+
} else {
771+
// Probably fine, just not handled right now.
772+
slog::warn!(
773+
self.log,
774+
"VM RoT ignoring boot disk: not a Crucible volume"
775+
);
776+
None
777+
}
778+
} else {
779+
None
780+
};
781+
782+
vm_rot.prepare_instance_conf(uuid, crucible_volume);
783+
784+
Ok(())
785+
}
786+
675787
/// Initializes the storage devices and backends listed in this
676788
/// initializer's instance spec.
677789
///

bin/propolis-server/src/lib/server.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ use internal_dns_resolver::{ResolveError, Resolver};
3636
use internal_dns_types::names::ServiceName;
3737
pub use nexus_client::Client as NexusClient;
3838
use oximeter::types::ProducerRegistry;
39+
use propolis::attestation::server::AttestationServerConfig;
3940
use propolis_api_types::disk::{
4041
InstanceVCRReplace, SnapshotRequestPathParams, VCRRequestPathParams,
4142
VolumeStatus, VolumeStatusPathParams,
@@ -95,6 +96,9 @@ pub struct StaticConfig {
9596
/// The configuration to use when setting up this server's Oximeter
9697
/// endpoint.
9798
metrics: Option<MetricsEndpointConfig>,
99+
100+
/// TODO: comment
101+
attest_config: Option<AttestationServerConfig>,
98102
}
99103

100104
/// Context accessible from HTTP callbacks.
@@ -113,6 +117,7 @@ impl DropshotEndpointContext {
113117
use_reservoir: bool,
114118
log: slog::Logger,
115119
metric_config: Option<MetricsEndpointConfig>,
120+
attest_config: Option<AttestationServerConfig>,
116121
) -> Self {
117122
let vnc_server = VncServer::new(log.clone());
118123
Self {
@@ -121,6 +126,7 @@ impl DropshotEndpointContext {
121126
bootrom_version,
122127
use_reservoir,
123128
metrics: metric_config,
129+
attest_config,
124130
},
125131
vnc_server,
126132
vm: crate::vm::Vm::new(&log),
@@ -245,6 +251,7 @@ impl PropolisServerApi for PropolisServerImpl {
245251
nexus_client,
246252
vnc_server: server_context.vnc_server.clone(),
247253
local_server_addr: rqctx.server.local_addr,
254+
attest_config: server_context.static_config.attest_config,
248255
};
249256

250257
let vm_init = match init {

bin/propolis-server/src/lib/vm/ensure.rs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -563,7 +563,8 @@ async fn initialize_vm_objects(
563563
&properties,
564564
))?;
565565
init.initialize_network_devices(&chipset).await?;
566-
init.initialize_vsock(&chipset)?;
566+
let mut attest_handle =
567+
init.initialize_vsock(&chipset, options.attest_config).await?;
567568

568569
#[cfg(feature = "failure-injection")]
569570
init.initialize_test_devices();
@@ -581,6 +582,14 @@ async fn initialize_vm_objects(
581582
let ramfb =
582583
init.initialize_fwcfg(spec.board.cpus, &options.bootrom_version)?;
583584

585+
// If we have a VM RoT, that RoT needs to be able to collect some
586+
// information about the guest before it can be actually usable. It will do
587+
// that asynchronously, but have to provide references for initial necessary
588+
// VM state.
589+
if let Some(attest_handle) = attest_handle.as_mut() {
590+
init.prepare_rot_initializer(attest_handle)?;
591+
}
592+
584593
init.register_guest_hv_interface(guest_hv_lifecycle);
585594
init.initialize_cpus().await?;
586595

@@ -642,6 +651,7 @@ async fn initialize_vm_objects(
642651
com1,
643652
framebuffer: Some(ramfb),
644653
ps2ctrl,
654+
attest_handle,
645655
};
646656

647657
// Another really terrible hack. As we've found in Propolis#1008, brk()

bin/propolis-server/src/lib/vm/mod.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ use state_publisher::StatePublisher;
100100
use tokio::sync::{oneshot, watch, RwLock, RwLockReadGuard};
101101

102102
use crate::{server::MetricsEndpointConfig, spec::Spec, vnc::VncServer};
103+
use propolis::attestation::server::AttestationServerConfig;
103104

104105
mod active;
105106
pub(crate) mod ensure;
@@ -309,6 +310,8 @@ pub(super) struct EnsureOptions {
309310
/// The address of this Propolis process, used by the live migration
310311
/// protocol to transfer serial console connections.
311312
pub(super) local_server_addr: SocketAddr,
313+
314+
pub(super) attest_config: Option<AttestationServerConfig>,
312315
}
313316

314317
impl Vm {

bin/propolis-server/src/lib/vm/objects.rs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ use std::{
1313

1414
use futures::{future::BoxFuture, stream::FuturesUnordered, StreamExt};
1515
use propolis::{
16+
attestation,
1617
hw::{ps2::ctrl::PS2Ctrl, qemu::ramfb::RamFb, uart::LpcUart},
1718
vmm::VmmHdl,
1819
Machine,
@@ -51,6 +52,7 @@ pub(super) struct InputVmObjects {
5152
pub com1: Arc<Serial<LpcUart>>,
5253
pub framebuffer: Option<Arc<RamFb>>,
5354
pub ps2ctrl: Arc<PS2Ctrl>,
55+
pub attest_handle: Option<attestation::server::AttestationSock>,
5456
}
5557

5658
/// The collection of objects and state that make up a Propolis instance.
@@ -86,6 +88,9 @@ pub(crate) struct VmObjectsLocked {
8688

8789
/// A handle to the VM's PS/2 controller.
8890
ps2ctrl: Arc<PS2Ctrl>,
91+
92+
/// A handle to the VM's attestation server.
93+
attest_handle: Option<attestation::server::AttestationSock>,
8994
}
9095

9196
impl VmObjects {
@@ -126,6 +131,7 @@ impl VmObjectsLocked {
126131
com1: input.com1,
127132
framebuffer: input.framebuffer,
128133
ps2ctrl: input.ps2ctrl,
134+
attest_handle: input.attest_handle,
129135
}
130136
}
131137

@@ -371,7 +377,7 @@ impl VmObjectsLocked {
371377

372378
/// Stops all of a VM's devices and detaches its block backends from their
373379
/// devices.
374-
async fn halt_devices(&self) {
380+
async fn halt_devices(&mut self) {
375381
// Take care not to wedge the runtime with any device halt
376382
// implementations which might block.
377383
tokio::task::block_in_place(|| {
@@ -386,6 +392,10 @@ impl VmObjectsLocked {
386392
backend.stop().await;
387393
backend.attachment().detach();
388394
}
395+
396+
if let Some(attest_handle) = self.attest_handle.take() {
397+
attest_handle.halt().await;
398+
}
389399
}
390400

391401
/// Resets a VM's kernel vCPU objects to their initial states.

0 commit comments

Comments
 (0)