@@ -47,6 +47,7 @@ use super::{
4747 ACTION_GENERATE_ID , ActionRegistry , NexusActionContext , NexusSaga ,
4848 SagaInitError ,
4949} ;
50+ use crate :: app:: db:: datastore;
5051use crate :: app:: db:: datastore:: ExistingTarget ;
5152use crate :: app:: db:: datastore:: ReplacementTarget ;
5253use crate :: app:: db:: datastore:: VolumeReplaceResult ;
@@ -55,12 +56,13 @@ use crate::app::db::datastore::VolumeWithTarget;
5556use crate :: app:: sagas:: declare_saga_actions;
5657use crate :: app:: { authn, authz, db} ;
5758use nexus_db_lookup:: LookupPath ;
59+ use nexus_db_model:: UserDataExport ;
5860use nexus_db_model:: VmmState ;
61+ use nexus_types:: identity:: Asset ;
5962use nexus_types:: saga:: saga_action_failed;
6063use omicron_common:: api:: external:: Error ;
6164use omicron_uuid_kinds:: GenericUuid ;
6265use omicron_uuid_kinds:: VolumeUuid ;
63- use propolis_client:: types:: ReplaceResult ;
6466use serde:: Deserialize ;
6567use serde:: Serialize ;
6668use sled_agent_client:: CrucibleOpts ;
@@ -395,63 +397,35 @@ async fn rsrss_replace_snapshot_in_volume_undo(
395397 Ok ( ( ) )
396398}
397399
398- async fn rsrss_notify_upstairs (
400+ async fn notify_potential_propolis_upstairs (
399401 sagactx : NexusActionContext ,
402+ disk : datastore:: CrucibleDisk ,
400403) -> Result < ( ) , ActionError > {
401404 let osagactx = sagactx. user_data ( ) ;
405+ let datastore = osagactx. datastore ( ) ;
402406 let params = sagactx. saga_params :: < Params > ( ) ?;
403407 let log = sagactx. user_data ( ) . log ( ) ;
404408
405- // If the associated volume was deleted, then skip this notification step as
406- // there is no Upstairs to talk to. Continue with the saga to transition the
407- // step request to Complete, and then perform the associated clean up.
408-
409- let volume_replace_snapshot_result = sagactx
410- . lookup :: < VolumeReplaceResult > ( "volume_replace_snapshot_result" ) ?;
411- if matches ! (
412- volume_replace_snapshot_result,
413- VolumeReplaceResult :: ExistingVolumeSoftDeleted
414- | VolumeReplaceResult :: ExistingVolumeHardDeleted
415- ) {
416- return Ok ( ( ) ) ;
417- }
418-
419- // Make an effort to notify a Propolis if one was booted for this volume.
420- // This is best effort: if there is a failure, this saga will unwind and be
421- // triggered again for the same request. If there is no Propolis booted for
422- // this volume, then there's nothing to be done: any future Propolis will
423- // receive the updated Volume.
424- //
425- // Unlike for region replacement, there's no step required here if there
426- // isn't an active Propolis: any Upstairs created after the snapshot_addr
427- // is replaced will reference the cloned data.
409+ let opctx = crate :: context:: op_context_for_saga_action (
410+ & sagactx,
411+ & params. serialized_authn ,
412+ ) ;
428413
429- let Some ( disk) = osagactx
430- . datastore ( )
431- . disk_for_volume_id ( params. request . volume_id ( ) )
432- . await
433- . map_err ( saga_action_failed) ?
434- else {
435- return Ok ( ( ) ) ;
436- } ;
414+ // Bail out if this disk is not attached to an instance
437415
438416 let Some ( instance_id) = disk. runtime ( ) . attach_instance_id else {
439417 return Ok ( ( ) ) ;
440418 } ;
441419
442- let opctx = crate :: context:: op_context_for_saga_action (
443- & sagactx,
444- & params. serialized_authn ,
445- ) ;
420+ // Bail if there is no active VMM
446421
447- let ( .., authz_instance) = LookupPath :: new ( & opctx, osagactx . datastore ( ) )
422+ let ( .., authz_instance) = LookupPath :: new ( & opctx, datastore)
448423 . instance_id ( instance_id)
449424 . lookup_for ( authz:: Action :: Read )
450425 . await
451426 . map_err ( saga_action_failed) ?;
452427
453- let instance_and_vmm = osagactx
454- . datastore ( )
428+ let instance_and_vmm = datastore
455429 . instance_fetch_with_vmm ( & opctx, & authz_instance)
456430 . await
457431 . map_err ( saga_action_failed) ?;
@@ -464,15 +438,17 @@ async fn rsrss_notify_upstairs(
464438
465439 info ! (
466440 log,
467- "volume associated with disk attached to instance with vmm in \
468- state {state}";
441+ "volume associated with disk attached to instance with vmm in state \
442+ {state}";
469443 "request id" => %params. request. id,
470444 "volume id" => %params. request. volume_id( ) ,
471445 "disk id" => ?disk. id( ) ,
472446 "instance id" => ?instance_id,
473447 "vmm id" => ?vmm. id,
474448 ) ;
475449
450+ // Bail if the VMM is not in a state to receive requests
451+
476452 match & state {
477453 VmmState :: Running | VmmState :: Rebooting => {
478454 // Propolis server is ok to receive the volume replacement request.
@@ -495,8 +471,9 @@ async fn rsrss_notify_upstairs(
495471 }
496472 }
497473
498- let new_volume_vcr = match osagactx
499- . datastore ( )
474+ // Send the new VCR via a replacement request
475+
476+ let new_volume_vcr = match datastore
500477 . volume_get ( params. request . volume_id ( ) )
501478 . await
502479 . map_err ( saga_action_failed) ?
@@ -511,7 +488,7 @@ async fn rsrss_notify_upstairs(
511488 } ;
512489
513490 let instance_lookup =
514- LookupPath :: new ( & opctx, osagactx . datastore ( ) ) . instance_id ( instance_id) ;
491+ LookupPath :: new ( & opctx, datastore) . instance_id ( instance_id) ;
515492
516493 let ( vmm, client) = osagactx
517494 . nexus ( )
@@ -568,27 +545,27 @@ async fn rsrss_notify_upstairs(
568545 ) ;
569546
570547 match & replace_result {
571- ReplaceResult :: Started => {
548+ propolis_client :: types :: ReplaceResult :: Started => {
572549 // This saga's call just started the replacement
573550 }
574551
575- ReplaceResult :: StartedAlready => {
552+ propolis_client :: types :: ReplaceResult :: StartedAlready => {
576553 // A previous run of this saga (or saga node) started the
577554 // replacement
578555 }
579556
580- ReplaceResult :: CompletedAlready => {
557+ propolis_client :: types :: ReplaceResult :: CompletedAlready => {
581558 // It's done! We see this if the same propolis that received the
582559 // original replace request started and finished the replacement.
583560 }
584561
585- ReplaceResult :: VcrMatches => {
562+ propolis_client :: types :: ReplaceResult :: VcrMatches => {
586563 // This propolis booted with the updated VCR
587564 }
588565
589- ReplaceResult :: Missing => {
590- // The volume does not contain the region to be replaced. This is an
591- // error!
566+ propolis_client :: types :: ReplaceResult :: Missing => {
567+ // The volume does not contain the read-only target to be replaced.
568+ // This is an error!
592569 return Err ( saga_action_failed ( Error :: internal_error (
593570 "saw ReplaceResult::Missing" ,
594571 ) ) ) ;
@@ -598,6 +575,175 @@ async fn rsrss_notify_upstairs(
598575 Ok ( ( ) )
599576}
600577
578+ async fn notify_pantry_upstairs (
579+ sagactx : NexusActionContext ,
580+ pantry_address : SocketAddrV6 ,
581+ attachment_id : Uuid ,
582+ ) -> Result < ( ) , ActionError > {
583+ let osagactx = sagactx. user_data ( ) ;
584+ let datastore = osagactx. datastore ( ) ;
585+ let params = sagactx. saga_params :: < Params > ( ) ?;
586+ let log = sagactx. user_data ( ) . log ( ) ;
587+
588+ info ! (
589+ log,
590+ "volume attached to pantry {pantry_address} with id {attachment_id}" ;
591+ "request id" => %params. request. id,
592+ "volume id" => %params. request. volume_id( ) ,
593+ ) ;
594+
595+ // Grab the new volume's VCR
596+
597+ let volume_construction_request = match datastore
598+ . volume_get ( params. request . volume_id ( ) )
599+ . await
600+ . map_err ( saga_action_failed) ?
601+ {
602+ Some ( volume) => serde_json:: from_str ( & volume. data ( ) ) . map_err ( |e| {
603+ saga_action_failed ( Error :: internal_error ( & format ! (
604+ "failed to deserialize volume {} data: {e}" ,
605+ volume. id( )
606+ ) ) )
607+ } ) ?,
608+
609+ None => {
610+ return Err ( saga_action_failed ( Error :: internal_error (
611+ "new volume is gone!" ,
612+ ) ) ) ;
613+ }
614+ } ;
615+
616+ let endpoint = format ! ( "http://{}" , pantry_address) ;
617+ let client = crucible_pantry_client:: Client :: new ( & endpoint) ;
618+
619+ let replace_request = crucible_pantry_client:: types:: ReplaceRequest {
620+ volume_construction_request,
621+ } ;
622+
623+ let replace_result = client
624+ . replace ( & attachment_id. to_string ( ) , & replace_request)
625+ . await
626+ . map_err ( |e| {
627+ saga_action_failed ( Error :: internal_error ( & e. to_string ( ) ) )
628+ } ) ?;
629+
630+ match replace_result. into_inner ( ) {
631+ crucible_pantry_client:: types:: ReplaceResult :: Started => {
632+ // This saga's call just started the replacement
633+ }
634+
635+ crucible_pantry_client:: types:: ReplaceResult :: StartedAlready => {
636+ // A previous run of this saga (or saga node) started the
637+ // replacement
638+ }
639+
640+ crucible_pantry_client:: types:: ReplaceResult :: CompletedAlready => {
641+ // It's done! We see this if the same pantry that received the
642+ // original replace request started and finished the replacement.
643+ }
644+
645+ crucible_pantry_client:: types:: ReplaceResult :: VcrMatches => {
646+ // This pantry booted with the updated VCR
647+ }
648+
649+ crucible_pantry_client:: types:: ReplaceResult :: Missing => {
650+ // The volume does not contain the read-only target to be replaced.
651+ // This is an error!
652+ return Err ( saga_action_failed ( Error :: internal_error (
653+ String :: from ( "saw ReplaceResult::Missing" ) ,
654+ ) ) ) ;
655+ }
656+ }
657+
658+ Ok ( ( ) )
659+ }
660+
661+ async fn rsrss_notify_upstairs (
662+ sagactx : NexusActionContext ,
663+ ) -> Result < ( ) , ActionError > {
664+ let osagactx = sagactx. user_data ( ) ;
665+ let params = sagactx. saga_params :: < Params > ( ) ?;
666+
667+ let opctx = crate :: context:: op_context_for_saga_action (
668+ & sagactx,
669+ & params. serialized_authn ,
670+ ) ;
671+
672+ // If the associated volume was deleted, then skip this notification step as
673+ // there is no Upstairs to talk to. Continue with the saga to transition the
674+ // step request to Complete, and then perform the associated clean up.
675+
676+ let volume_replace_snapshot_result = sagactx
677+ . lookup :: < VolumeReplaceResult > ( "volume_replace_snapshot_result" ) ?;
678+ if matches ! (
679+ volume_replace_snapshot_result,
680+ VolumeReplaceResult :: ExistingVolumeSoftDeleted
681+ | VolumeReplaceResult :: ExistingVolumeHardDeleted
682+ ) {
683+ return Ok ( ( ) ) ;
684+ }
685+
686+ // Make an effort to notify an Upstairs if one was constructed for this
687+ // volume. This is best effort: if there is a failure, this saga will unwind
688+ // and be triggered again for the same request. If there is no Upstairs
689+ // constructed for this volume, then there's nothing to be done: any future
690+ // construction will receive the updated Volume.
691+ //
692+ // Unlike for region replacement, there's no step required here if there
693+ // isn't an active Upstairs: any Upstairs created after the snapshot_addr is
694+ // replaced will reference the cloned data.
695+
696+ let maybe_disk = osagactx
697+ . datastore ( )
698+ . disk_for_volume_id ( params. request . volume_id ( ) )
699+ . await
700+ . map_err ( saga_action_failed) ?;
701+
702+ let maybe_user_data_export = osagactx
703+ . datastore ( )
704+ . user_data_export_lookup_by_volume_id (
705+ & opctx,
706+ params. request . volume_id ( ) ,
707+ )
708+ . await
709+ . map_err ( saga_action_failed) ?;
710+
711+ if let Some ( disk) = maybe_disk {
712+ notify_potential_propolis_upstairs ( sagactx, disk) . await ?;
713+ } else if let Some ( record) = maybe_user_data_export {
714+ let ( pantry_address, volume_id) = match record. is_live ( ) {
715+ Err ( s) => {
716+ // There was an error with a Live user data export record that
717+ // means we have to unwind here. This will likely require
718+ // support intervention, as the record is in state Live but does
719+ // not have either a Pantry address or volume id.
720+ return Err ( saga_action_failed ( Error :: internal_error (
721+ s. to_string ( ) ,
722+ ) ) ) ;
723+ }
724+
725+ Ok ( UserDataExport :: NotLive ) => {
726+ // The user data export is not Live, meaning no notification is
727+ // required.
728+ return Ok ( ( ) ) ;
729+ }
730+
731+ Ok ( UserDataExport :: Live { pantry_address, volume_id } ) => {
732+ ( pantry_address, volume_id)
733+ }
734+ } ;
735+
736+ notify_pantry_upstairs (
737+ sagactx,
738+ pantry_address,
739+ volume_id. into_untyped_uuid ( ) ,
740+ )
741+ . await ?;
742+ }
743+
744+ Ok ( ( ) )
745+ }
746+
601747async fn rsrss_update_request_record (
602748 sagactx : NexusActionContext ,
603749) -> Result < ( ) , ActionError > {
0 commit comments