diff --git a/s5_node/src/tasks/restore.rs b/s5_node/src/tasks/restore.rs index fffe68e..a2c694b 100644 --- a/s5_node/src/tasks/restore.rs +++ b/s5_node/src/tasks/restore.rs @@ -183,10 +183,6 @@ pub async fn run_restore( /// 6. Parse Node → extract snapshot parts /// 7. Build Snapshot using the remote store as read backend /// 8. Restore files to target path -// Variables resolved at the top of the function are used only by the -// unreachable code below (kept as a starting point for the v3 rewrite), -// so silence the unused warnings at function scope. -#[allow(unused_variables)] pub async fn run_remote_restore( ctx: &TaskExecutorContext, age_secret_key: &str, @@ -196,87 +192,77 @@ pub async fn run_remote_restore( reporter: TaskReporter, _cancel: CancellationToken, ) -> anyhow::Result<()> { + use super::publish::{derive_vault_id, recovery_signing_key}; + use ed25519_dalek::VerifyingKey; + use s5_fs_v2::snapshot::KEY_SLOT_RECOVERY; + let registry = ctx .registry .as_ref() - .ok_or_else(|| anyhow!("no registry configured — cannot perform remote restore"))?; + .ok_or_else(|| anyhow!("no registry configured, cannot perform remote restore"))?; let blob_store = resolve_store(&ctx.stores, blob_store_name)?; - // -- Step 1: paper-only recovery is not yet wired for the v3 schema -- - // - // The v3 recovery flow: - // 1. Derive recovery_age_secret = argon2id(paper_passphrase, …) - // 2. Fetch the vault root blob from a configured store - // (e.g. the relay S3 bucket holding meta blobs) - // 3. age-decrypt the vault root with recovery_age_secret - // 4. Read KEY_SLOT_RECOVERY from its TraversalContext.keys - // 5. Derive vault_id + recovery_signing_key from that secret - // (see s5_node::tasks::publish::{derive_vault_id, recovery_signing_key}) - // 6. registry.get(StreamKey::Vault { pubkey: recovery_pubkey, vault_id }) - // → its payload holds a device's signing pubkey - // 7. registry.get(StreamKey::Vault { pubkey: device_pubkey, vault_id }) - // → current snapshot HEAD hash - // - // The legacy `(age_secret, vault_name)` lookup that this function - // used in the pre-v3 schema produces wrong stream keys for v3 - // vaults, so rather than silently returning empty, we surface a - // clear error until the new flow is implemented. - let _ = age_secret_key; - return Err(anyhow!( - "remote restore for vault '{vault_name}' is not yet supported on \ - the v3 schema — the recovery flow needs to fetch and \ - age-decrypt the vault root first to derive vault_id from \ - KEY_SLOT_RECOVERY (see snapshot-publication.md § Vault ID \ - derivation)" - )); - - // The unreachable code below preserves the shape of the legacy - // restore so the v3 reimplementation has a starting point. - #[allow(unreachable_code)] - let recovery_stream_key: StreamKey = unreachable!(); - #[allow(unreachable_code)] + // Step 1: discover the vault root by enumerating blobs in the + // store and trying age-decrypt with the paper key. The first blob + // that decrypts and parses as a Node carrying KEY_SLOT_RECOVERY is + // a vault root. Note: this finds *some* vault root for the paper + // recipient (which gives us recovery_secret); the actual current + // HEAD comes from the registry lookup in step 4 below, so even an + // older snap's vault root is fine here. + tracing::info!( + vault = vault_name, + "scanning relay store for a vault root decryptable with the paper key" + ); + let recovery_secret = discover_recovery_secret(blob_store, age_secret_key).await?; + tracing::info!(vault = vault_name, "found a vault root, deriving vault_id"); + + // Step 2: derive vault_id + recovery_signing_key from the recovered secret. + let vault_id = derive_vault_id(&recovery_secret); + let recovery_key = recovery_signing_key(&recovery_secret); + let recovery_verifying: VerifyingKey = (&recovery_key).into(); + let recovery_stream_key = StreamKey::Vault { + pubkey: recovery_verifying.to_bytes(), + vault_id, + }; + + // Step 3: recovery entry → device's signing pubkey. let recovery_entry = registry .get(&recovery_stream_key) .await .context("fetching recovery registry entry")? .ok_or_else(|| { anyhow!( - "no recovery entry found for vault '{}' — \ - was this vault ever published with a recovery key?", - vault_name + "no recovery entry found for vault '{vault_name}' (vault_id={}). \ + Was this vault ever published?", + hex::encode(vault_id) ) })?; - - // The recovery entry's hash field stores the device's signing pubkey. let device_pubkey_bytes: [u8; 32] = *recovery_entry.hash.as_bytes(); - let vault_stream_key = StreamKey::Vault { - pubkey: device_pubkey_bytes, - vault_id: [0u8; 16], - }; - tracing::info!( vault = vault_name, - vault_pubkey = hex::encode(device_pubkey_bytes), - "found vault pubkey via recovery entry" + device_pubkey = hex::encode(device_pubkey_bytes), + "resolved device signing pubkey via recovery entry" ); - // -- Step 3: Vault entry → encrypted TN hash -- + // Step 4: vault entry under (device_pubkey, vault_id) → latest TN hash. + let vault_stream_key = StreamKey::Vault { + pubkey: device_pubkey_bytes, + vault_id, + }; let vault_entry = registry .get(&vault_stream_key) .await .context("fetching vault registry entry")? .ok_or_else(|| { anyhow!( - "no published snapshot found for vault '{}' — \ - vault pubkey {} has no registry entry", - vault_name, - hex::encode(device_pubkey_bytes) + "no published snapshot found for vault '{vault_name}' under \ + device pubkey {} + vault_id {}", + hex::encode(device_pubkey_bytes), + hex::encode(vault_id), ) })?; - let encrypted_tn_hash = vault_entry.hash; - tracing::info!( vault = vault_name, revision = vault_entry.revision, @@ -284,32 +270,38 @@ pub async fn run_remote_restore( "found latest published snapshot" ); - // -- Step 4: Download encrypted TN -- + // Step 5: fetch + age-decrypt the latest TN. let encrypted_bytes = blob_store .blob_download(encrypted_tn_hash) .await .map_err(|e| anyhow!("downloading encrypted Transparent Node: {e}"))?; - - tracing::info!( - vault = vault_name, - size = encrypted_bytes.len(), - "downloaded encrypted Transparent Node" - ); - - // -- Step 5: Age-decrypt with paper key -- let cbor = age_decrypt_with_secret_key(&encrypted_bytes, age_secret_key) - .context("decrypting Transparent Node with paper key")?; + .context("decrypting latest Transparent Node with paper key")?; - // -- Step 6: Parse Node → snapshot parts -- - // The published TN may have history entries (timestamps → previous hashes). - // We only care about the current entry at "". + // Step 6: Parse Node → snapshot parts. The published TN may have + // history entries (timestamps → previous hashes); only the "" entry + // (current snapshot) matters here. let node = Node::from_bytes(&cbor).map_err(|e| anyhow!("CBOR decode Transparent Node: {e}"))?; - let (root, root_plaintext_hash, context) = node_to_snapshot_parts(&node) .context("extracting snapshot from published Transparent Node")?; - let history_count = node.entries.len() - 1; // exclude "" + // Defence: the freshly-fetched TN must carry the same recovery_secret + // we derived vault_id from. A mismatch means the registry entry + // points at a substituted blob (recovery_secret would be different) + // and we should not splice that into the restore. + let fetched_recovery_secret = context + .keys + .as_ref() + .and_then(|m| m.get(&KEY_SLOT_RECOVERY).copied()) + .ok_or_else(|| anyhow!("latest TN has no KEY_SLOT_RECOVERY slot"))?; + if fetched_recovery_secret != recovery_secret { + return Err(anyhow!( + "vault root substitution detected: latest TN's KEY_SLOT_RECOVERY \ + does not match the discovered vault root's. Refusing to restore." + )); + } + let history_count = node.entries.len() - 1; tracing::info!( vault = vault_name, root = %root.fmt_short(), @@ -318,7 +310,7 @@ pub async fn run_remote_restore( "decrypted snapshot metadata" ); - // -- Step 7: Build Snapshot using remote store -- + // Step 7: build Snapshot using the remote store as read backend. let read_store: Arc = Arc::new(blob_store.clone()); let snapshot = Snapshot::new(root, read_store, context, root_plaintext_hash); @@ -377,3 +369,67 @@ pub async fn run_remote_restore( Ok(()) } + +/// Bootstrap step for paper-only recovery: enumerate every blob in the +/// store, try to age-decrypt with the paper key, and return the +/// `recovery_secret` from the first blob that decrypts and parses as a +/// vault root (a `Node` whose `TraversalContext.keys` carries +/// `KEY_SLOT_RECOVERY`). +/// +/// Once we have `recovery_secret` we can derive `vault_id` and look up +/// the registry entry for the actual current HEAD — see +/// [`run_remote_restore`]. The blob found here may be an older snap; +/// that's fine, we only need it for `recovery_secret`. +/// +/// O(N) over the relay's blob count. For the M3 demo (single vault, +/// single device) the relay holds dozens of blobs and the first hit +/// is typically the encrypted TN. Enumeration order is whatever the +/// underlying `Store::list` returns; we stop at the first match. +async fn discover_recovery_secret( + blob_store: &s5_core::blob::BlobStore, + age_secret_key: &str, +) -> anyhow::Result<[u8; 32]> { + use s5_fs_v2::snapshot::KEY_SLOT_RECOVERY; + + let hashes = blob_store + .list_hashes() + .await + .map_err(|e| anyhow!("listing blobs in relay store: {e}"))?; + + let total = hashes.len(); + for hash in hashes { + let bytes = match blob_store.blob_download(hash).await { + Ok(b) => b, + Err(_) => continue, + }; + // Most blobs are vault-encrypted leaves, not age-encrypted. + // age_decrypt_with_secret_key fails fast on those; we just skip. + let cbor = match age_decrypt_with_secret_key(&bytes, age_secret_key) { + Ok(c) => c, + Err(_) => continue, + }; + let node = match Node::from_bytes(&cbor) { + Ok(n) => n, + Err(_) => continue, + }; + let entry = match node.transparent_entry() { + Some(e) => e, + None => continue, + }; + let recovery_secret = entry + .child_context + .as_ref() + .and_then(|ctx| ctx.keys.as_ref()) + .and_then(|keys| keys.get(&KEY_SLOT_RECOVERY).copied()); + if let Some(secret) = recovery_secret { + return Ok(secret); + } + } + + Err(anyhow!( + "scanned {total} blob(s) in the relay store, found no vault root \ + decryptable with the supplied paper key. Either the wrong store \ + is configured, or no snapshot of any vault has been published \ + to it yet for this paper recipient." + )) +} diff --git a/s5_node/tests/async_relay.rs b/s5_node/tests/async_relay.rs new file mode 100644 index 0000000..4f81798 --- /dev/null +++ b/s5_node/tests/async_relay.rs @@ -0,0 +1,361 @@ +//! Async-relay end-to-end: device A publishes via a remote store, device B +//! pulls from the same remote store after A is offline. No iroh between A and +//! B; the only shared resource is a `Store` that both can reach. +//! +//! Steps: +//! 1. A creates files +//! 2. A snaps → file blobs + encrypted Transparent Node land in the relay +//! store; the registry entry mirrors to the relay via a `Multi` +//! registry whose backends are `[Memory, Store(relay)]`. +//! 3. A's runtime is dropped (offline simulation). +//! 4. B starts up with its own (empty) memory backend + the same relay +//! store. `RemoteRestore` finds the recovery + vault registry entries +//! via the Multi-registry fallback to relay, downloads the encrypted +//! Transparent Node from the relay, decrypts with the shared paper +//! age key, and restores. +//! 5. Test asserts that the SHA-256 of every restored file matches its +//! original. + +use std::collections::{BTreeMap, HashMap}; +use std::sync::Arc; +use std::time::Duration; + +use age::secrecy::ExposeSecret; +use anyhow::{Context, Result, anyhow}; +use s5_core::RegistryApi; +use s5_core::blob::BlobStore; +use s5_core::store::Store; +use s5_node::config::{ + NodeConfigIdentity, NodeConfigKey, NodeConfigSource, NodeConfigVault, S5NodeConfig, TaskSpec, +}; +use s5_node::tasks::{TaskExecutor, TaskExecutorContext}; +use s5_node_api::TaskState; +use s5_registry::{MemoryRegistry, MultiRegistry, WritePolicy}; +use s5_registry_store::StoreRegistry; +use s5_store_local::{LocalStore, LocalStoreConfig}; +use sha2::{Digest, Sha256}; +use tempfile::tempdir; +use tokio::sync::RwLock; + +/// Build a `TaskExecutorContext` with `relay` (primary, file blobs + TN +/// primary copy) and `mirror` (encrypted-TN mirror target via meta_targets). +/// The Multi registry has `[Memory, Store(relay)]` backends so the relay +/// store serves both as a CAS for blobs and as a key-value backend for the +/// registry — the "remote store as relay" shape exercised end-to-end. +/// +/// `node_secret` lets the test distinguish A from B: each gets its own +/// derivation of `vault_signing_key`, but the recovery bridge +/// (paper-age-derived) is the same on both sides, which is what makes B's +/// `RemoteRestore` resolve A's vault root. +fn build_ctx( + config: S5NodeConfig, + relay_blob: BlobStore, + mirror_blob: BlobStore, + relay_raw: Arc, + node_secret: [u8; 32], +) -> Arc { + let mut stores = HashMap::new(); + stores.insert("relay".to_string(), relay_blob); + stores.insert("mirror".to_string(), mirror_blob); + + let memory: Arc = Arc::new(MemoryRegistry::new()); + let store_reg: Arc = + Arc::new(StoreRegistry::new(relay_raw, None)); + let multi = MultiRegistry::with_policy(vec![memory, store_reg], WritePolicy::All); + + Arc::new(TaskExecutorContext { + config: Arc::new(RwLock::new(config)), + stores, + node_secret, + registry: Some(Arc::new(multi)), + }) +} + +/// Minimal config for a vault that publishes everything to `relay`. +fn vault_config( + vault_root: &str, + paper_recipient: &str, + paper_identity_file: &str, + source_paths: Vec, +) -> S5NodeConfig { + let mut key = BTreeMap::new(); + key.insert( + "paper".to_string(), + NodeConfigKey { + public_key: paper_recipient.to_string(), + identity_file: Some(paper_identity_file.to_string()), + }, + ); + + let mut source = BTreeMap::new(); + source.insert( + "docs".to_string(), + NodeConfigSource { + paths: source_paths, + include_caches: false, + skip_hidden: false, + respect_ignore_files: false, + exclude: Vec::new(), + one_file_system: false, + }, + ); + + let mut vault = BTreeMap::new(); + vault.insert( + "test".to_string(), + NodeConfigVault { + root_path: vault_root.to_string(), + key: "paper".to_string(), + blob_stores: vec!["relay".to_string()], + preset: None, + recipients: vec!["paper".to_string()], + sources: vec!["docs".to_string()], + // Distinct from blob_stores: forces publish through the + // meta_targets mirror loop (otherwise it dedups the duplicate). + meta_targets: vec!["mirror".to_string()], + plaintext_tree: false, + watch: false, + }, + ); + + S5NodeConfig { + identity: NodeConfigIdentity { + secret_key_file: None, + secret_key: None, + encrypted_with: None, + }, + key, + store: BTreeMap::new(), + registry: BTreeMap::new(), + source, + vault, + task: BTreeMap::new(), + } +} + +/// Spawn `spec` and block until the task reaches a terminal state. +async fn run_to_completion(executor: &TaskExecutor, spec: TaskSpec) -> Result<()> { + let (id, _) = executor.spawn(spec).await?; + let mut rx = executor + .watch_status(id) + .await + .ok_or_else(|| anyhow!("task vanished after spawn"))?; + loop { + let state = rx.borrow().state.clone(); + match state { + TaskState::Completed => return Ok(()), + TaskState::Failed { error } => return Err(anyhow!("task {id} failed: {error}")), + TaskState::Cancelled => return Err(anyhow!("task {id} cancelled")), + _ => { + rx.changed() + .await + .map_err(|_| anyhow!("task {id} status channel closed"))?; + } + } + } +} + +/// SHA-256 every regular file under `dir` (relative paths, sorted). Symlinks +/// are skipped so the assertion doesn't depend on follow semantics. +fn hash_tree(dir: &std::path::Path) -> Result> { + let mut out = BTreeMap::new(); + walk_files(dir, dir, &mut out)?; + Ok(out) +} + +fn walk_files( + base: &std::path::Path, + dir: &std::path::Path, + out: &mut BTreeMap, +) -> Result<()> { + for entry in std::fs::read_dir(dir).with_context(|| format!("read_dir {}", dir.display()))? { + let entry = entry?; + let path = entry.path(); + let ft = entry.file_type()?; + if ft.is_dir() { + walk_files(base, &path, out)?; + } else if ft.is_file() { + let rel = path + .strip_prefix(base) + .unwrap() + .to_string_lossy() + .into_owned(); + let bytes = std::fs::read(&path)?; + let mut hasher = Sha256::new(); + hasher.update(&bytes); + let digest: [u8; 32] = hasher.finalize().into(); + out.insert(rel, digest); + } + // skip symlinks; not part of this test's surface + } + Ok(()) +} + +#[tokio::test] +async fn async_relay_via_remote_store() -> Result<()> { + // ---- shared ground ----------------------------------------------------- + let relay_dir = tempdir()?; + let mirror_dir = tempdir()?; + let source_dir = tempdir()?; + let a_vault_dir = tempdir()?; + let b_vault_dir = tempdir()?; + let restore_dir = tempdir()?; + let identity_dir = tempdir()?; + + // Author the source tree A is going to back up. + std::fs::create_dir_all(source_dir.path().join("nested"))?; + std::fs::write( + source_dir.path().join("readme.md"), + b"# vault test\n\nSome text content.\n", + )?; + std::fs::write( + source_dir.path().join("nested/data.bin"), + // Non-trivial size so we exercise CDC + chunk upload, not just inline. + vec![0xAB; 256 * 1024], + )?; + std::fs::write(source_dir.path().join("nested/hello.txt"), b"hello world")?; + + // Paper age key — the only shared secret between A and B. + let paper = age::x25519::Identity::generate(); + let paper_recipient = paper.to_public().to_string(); + let paper_secret = paper.to_string().expose_secret().to_string(); + let identity_path = identity_dir.path().join("paper.txt"); + std::fs::write(&identity_path, &paper_secret)?; + let identity_path_str = identity_path.to_string_lossy().into_owned(); + + // The relay + mirror stores: A writes here, B reads from here. Same + // backing dirs, separate Store instances per role so we don't share + // runtime state across the offline boundary. + let relay_a_raw: Arc = Arc::new(LocalStore::create(LocalStoreConfig { + base_path: relay_dir.path().to_string_lossy().into_owned(), + })); + let relay_a_blob = BlobStore::from_arc(Arc::clone(&relay_a_raw)); + let mirror_a_blob = BlobStore::new(LocalStore::create(LocalStoreConfig { + base_path: mirror_dir.path().to_string_lossy().into_owned(), + })); + + // ---- A publishes ------------------------------------------------------- + let a_cfg = vault_config( + &a_vault_dir.path().to_string_lossy(), + &paper_recipient, + &identity_path_str, + vec![source_dir.path().to_string_lossy().into_owned()], + ); + + let a_node_secret = [0x11u8; 32]; + let a_ctx = build_ctx( + a_cfg, + relay_a_blob, + mirror_a_blob, + Arc::clone(&relay_a_raw), + a_node_secret, + ); + let a_executor = TaskExecutor::new(Arc::clone(&a_ctx)); + + run_to_completion( + &a_executor, + TaskSpec::Backup { + vault: "test".to_string(), + source: "docs".to_string(), + blob_store: "relay".to_string(), + keys: vec!["paper".to_string()], + target_path: None, + }, + ) + .await?; + + // ---- A goes offline ---------------------------------------------------- + drop(a_executor); + drop(a_ctx); + drop(relay_a_raw); + + // Sanity check: registry entries did land on the relay store. The Store + // backing the relay is independent of A, so we can read it directly here. + let relay_check: Arc = Arc::new(LocalStore::create(LocalStoreConfig { + base_path: relay_dir.path().to_string_lossy().into_owned(), + })); + let mut count = 0u32; + let mut stream = relay_check.list().await?; + use futures_util::StreamExt; + while let Some(item) = stream.next().await { + let _ = item?; + count += 1; + } + assert!( + count > 0, + "relay store has no entries — publish never wrote to it" + ); + + // ---- B restores -------------------------------------------------------- + let relay_b_raw: Arc = Arc::new(LocalStore::create(LocalStoreConfig { + base_path: relay_dir.path().to_string_lossy().into_owned(), + })); + let relay_b_blob = BlobStore::from_arc(Arc::clone(&relay_b_raw)); + let mirror_b_blob = BlobStore::new(LocalStore::create(LocalStoreConfig { + base_path: mirror_dir.path().to_string_lossy().into_owned(), + })); + + // B's vault config still references the same vault name (used for + // signing-key derivation in the recovery flow) and the same paper key. + let b_cfg = vault_config( + &b_vault_dir.path().to_string_lossy(), + &paper_recipient, + &identity_path_str, + Vec::new(), + ); + + let b_node_secret = [0x22u8; 32]; // distinct from A's — important + let b_ctx = build_ctx( + b_cfg, + relay_b_blob, + mirror_b_blob.clone(), + Arc::clone(&relay_b_raw), + b_node_secret, + ); + let b_executor = TaskExecutor::new(Arc::clone(&b_ctx)); + + run_to_completion( + &b_executor, + TaskSpec::RemoteRestore { + vault: "test".to_string(), + age_secret_key: paper_secret.clone(), + blob_store: "relay".to_string(), + target_path: restore_dir.path().to_string_lossy().into_owned(), + }, + ) + .await?; + + // Allow filesystem flushes (atomic renames) to settle before walking. + tokio::time::sleep(Duration::from_millis(50)).await; + + // ---- Verify meta_targets mirror ----------------------------------------- + // The mirror store should have received its own copy of the encrypted + // Transparent Node — proving the publish path mirrored to meta_targets, + // not just to blob_stores[0]. + let mirror_hashes = mirror_b_blob.list_hashes().await?; + assert!( + !mirror_hashes.is_empty(), + "mirror store has no blobs — meta_targets mirror loop did not run", + ); + + // ---- Verify content round-trip ----------------------------------------- + let originals = hash_tree(source_dir.path())?; + let restored = hash_tree(restore_dir.path())?; + + assert_eq!( + originals.keys().collect::>(), + restored.keys().collect::>(), + "restored file set does not match original", + ); + for (path, original_hash) in &originals { + let restored_hash = restored + .get(path) + .ok_or_else(|| anyhow!("missing on restore: {path}"))?; + assert_eq!( + original_hash, restored_hash, + "SHA-256 mismatch for restored file {path}", + ); + } + + Ok(()) +}