diff --git a/cli/src/commands/auth/mod.rs b/cli/src/commands/auth/mod.rs index 43582bb0..567b25ac 100644 --- a/cli/src/commands/auth/mod.rs +++ b/cli/src/commands/auth/mod.rs @@ -12,6 +12,7 @@ mod list; pub(crate) mod login; mod logout; +mod status; use crate::config::AppConfig; use clap::Subcommand; @@ -79,6 +80,13 @@ pub enum AuthCommands { #[arg(long, short)] profile: Option, }, + + /// Show active profile, configured credentials, and API reachability + Status { + /// Filter by profile + #[arg(long, short)] + profile: Option, + }, } impl AuthCommands { @@ -121,6 +129,9 @@ impl AuthCommands { logout::handle_logout(&config_dir, provider.as_deref(), profile.as_deref()) } AuthCommands::List { profile } => list::handle_list(&config_dir, profile.as_deref()), + AuthCommands::Status { profile } => { + status::handle_status(&config_dir, &config, profile.as_deref()).await + } } } } diff --git a/cli/src/commands/auth/status.rs b/cli/src/commands/auth/status.rs new file mode 100644 index 00000000..73a036d8 --- /dev/null +++ b/cli/src/commands/auth/status.rs @@ -0,0 +1,254 @@ +//! Auth status command — verify configured credentials and API reachability. + +use super::collect_all_credentials; +use crate::config::AppConfig; +use stakpak_shared::models::auth::ProviderAuth; +use stakpak_shared::oauth::ProviderRegistry; +use std::path::Path; + +/// Handle the auth status command. +/// +/// Prints configured profiles/providers (mirroring `auth list`), highlights the +/// currently active profile, and pings the Stakpak API to verify reachability +/// and credentials. Returns Err when the API check fails so callers/scripts can +/// detect broken auth via exit code. +pub async fn handle_status( + config_dir: &Path, + config: &AppConfig, + profile: Option<&str>, +) -> Result<(), String> { + let registry = ProviderRegistry::new(); + let all_credentials = collect_all_credentials(config_dir); + let active_profile = config.profile_name.as_str(); + + if all_credentials.is_empty() { + println!("No credentials configured."); + println!(); + println!("Run 'stakpak auth login' to add credentials."); + return Ok(()); + } + + println!("Active profile: {}", active_profile); + println!("Config file: {}", config.config_path); + println!("API endpoint: {}", config.api_endpoint); + if config.api_endpoint.starts_with("http://") { + println!(" ⚠ endpoint is plaintext http:// — credentials sent in clear"); + } + println!(); + + let mut profile_names: Vec<_> = all_credentials.keys().collect(); + profile_names.sort_by(|a, b| { + if *a == "all" { + std::cmp::Ordering::Less + } else if *b == "all" { + std::cmp::Ordering::Greater + } else { + a.cmp(b) + } + }); + + for profile_name in profile_names { + if let Some(filter) = profile + && profile_name != filter + && profile_name != "all" + { + continue; + } + + let Some(providers) = all_credentials.get(profile_name) else { + continue; + }; + if providers.is_empty() { + continue; + } + + let label = if profile_name == "all" { + "shared (all profiles)".to_string() + } else if profile_name == active_profile { + format!("profile '{}' (active)", profile_name) + } else { + format!("profile '{}'", profile_name) + }; + println!(" {}:", label); + + let mut provider_ids: Vec<_> = providers.keys().collect(); + provider_ids.sort(); + for provider_id in provider_ids { + let Some((auth, _source)) = providers.get(provider_id) else { + continue; + }; + let provider_name = registry + .get(provider_id) + .map(|p| p.name()) + .unwrap_or(provider_id.as_str()); + + println!( + " - {} ({}){}{}", + provider_name, + auth.auth_type_display(), + credential_suffix(auth), + expiry_suffix(auth), + ); + } + println!(); + } + + // API reachability check — probe the filtered profile if one was passed, + // otherwise the active profile. Falls back to shared "all" credentials. + let probe_profile = profile.unwrap_or(active_profile); + let active_stakpak_auth = all_credentials + .get(probe_profile) + .and_then(|providers| providers.get("stakpak")) + .or_else(|| { + all_credentials + .get("all") + .and_then(|providers| providers.get("stakpak")) + }) + .map(|(auth, _source)| auth.clone()); + + let Some(auth) = active_stakpak_auth else { + println!( + "API check: skipped (no stakpak credential on profile '{}')", + probe_profile + ); + return Ok(()); + }; + + if auth.is_expired() { + let msg = "access token expired (run `stakpak auth login`)"; + eprintln!("API check: ✗ {}", msg); + return Err(msg.to_string()); + } + + match probe_api(&config.api_endpoint, &auth).await { + Ok(identity) => { + println!("API check: ✓ reachable as {}", identity); + Ok(()) + } + Err(error) => { + eprintln!("API check: ✗ {}", error); + Err(error) + } + } +} + +/// Print " key=…XXXX" for API keys. Returns empty string for OAuth (access +/// token suffixes are sensitive enough that exposing tail bytes isn't worth +/// the debug value). +fn credential_suffix(auth: &ProviderAuth) -> String { + match auth.api_key_value() { + Some(key) => format!(" key=…{}", mask_tail(key)), + None => String::new(), + } +} + +/// Return the last 4 chars of `secret`, but only when the secret is long +/// enough that revealing them does not meaningfully expose the key. Short +/// strings (<12 chars) collapse to "????" so a malformed/test credential +/// can't leak in full. +fn mask_tail(secret: &str) -> String { + let count = secret.chars().count(); + if count < 12 { + return "????".to_string(); + } + secret.chars().skip(count - 4).collect() +} + +fn expiry_suffix(auth: &ProviderAuth) -> &'static str { + if auth.is_oauth() { + if auth.is_expired() { + " (expired)" + } else if auth.needs_refresh() { + " (needs refresh)" + } else { + "" + } + } else { + "" + } +} + +async fn probe_api(endpoint: &str, auth: &ProviderAuth) -> Result { + let token = match auth { + ProviderAuth::Api { key } => key.clone(), + ProviderAuth::OAuth { access, .. } => access.clone(), + }; + + let url = format!("{}/v1/account", endpoint.trim_end_matches('/')); + let response = reqwest::Client::new() + .get(&url) + .header("Authorization", format!("Bearer {}", token)) + .timeout(std::time::Duration::from_secs(5)) + .send() + .await + .map_err(|e| format!("request failed: {}", e))?; + + let status = response.status(); + if !status.is_success() { + return Err(format!("HTTP {} from {}", status.as_u16(), url)); + } + + let body: serde_json::Value = response + .json() + .await + .map_err(|e| format!("malformed response: {}", e))?; + + let username = body + .get("username") + .and_then(|v| v.as_str()) + .unwrap_or(""); + let email = body.get("email").and_then(|v| v.as_str()).unwrap_or(""); + if email.is_empty() { + Ok(username.to_string()) + } else { + Ok(format!("{} <{}>", username, email)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn mask_tail_collapses_short_secrets() { + assert_eq!(mask_tail(""), "????"); + assert_eq!(mask_tail("abc"), "????"); + assert_eq!(mask_tail("eleven_char"), "????"); + } + + #[test] + fn mask_tail_reveals_last_four_for_long_secrets() { + assert_eq!(mask_tail("twelve_chars"), "hars"); + assert_eq!(mask_tail("sk-test-1234567890ABCD"), "ABCD"); + } + + #[test] + fn mask_tail_handles_multibyte_chars() { + let secret = "prefix-αβγδ-ABCD"; + assert_eq!(mask_tail(secret), "ABCD"); + } + + #[test] + fn credential_suffix_for_api_key() { + let auth = ProviderAuth::api_key("sk-proj-very-long-key-1234"); + assert_eq!(credential_suffix(&auth), " key=…1234"); + } + + #[test] + fn credential_suffix_empty_for_oauth() { + let auth = ProviderAuth::oauth("access", "refresh", i64::MAX); + assert_eq!(credential_suffix(&auth), ""); + } + + #[test] + fn expiry_suffix_states() { + let api = ProviderAuth::api_key("k"); + assert_eq!(expiry_suffix(&api), ""); + + let expired = ProviderAuth::oauth("a", "r", 0); + assert_eq!(expiry_suffix(&expired), " (expired)"); + + let fresh = ProviderAuth::oauth("a", "r", i64::MAX); + assert_eq!(expiry_suffix(&fresh), ""); + } +} diff --git a/cli/src/commands/autopilot/mod.rs b/cli/src/commands/autopilot/mod.rs index 33f35637..43407444 100644 --- a/cli/src/commands/autopilot/mod.rs +++ b/cli/src/commands/autopilot/mod.rs @@ -1014,17 +1014,21 @@ async fn start_autopilot(config: &mut AppConfig, options: StartOptions) -> Resul // Clear spinner line before printing error print!("\r\x1b[2K"); let _ = std::io::Write::flush(&mut std::io::stdout()); - println!( + eprintln!( " ✗ Timed out waiting for autopilot to become healthy ({}s)", max_wait.as_secs() ); - println!(); - println!(" Troubleshoot:"); - println!(" stakpak autopilot logs -c server View server logs"); - println!(" stakpak autopilot status Check component health"); + eprintln!(); + eprintln!(" Troubleshoot:"); + eprintln!(" stakpak autopilot logs -c server View server logs"); + eprintln!(" stakpak autopilot status Check component health"); if expects_sandbox { - println!(" docker ps Verify sandbox container"); + eprintln!(" docker ps Verify sandbox container"); } + return Err(format!( + "Autopilot did not become healthy within {}s", + max_wait.as_secs() + )); } // Clean post-start status summary diff --git a/cli/src/commands/autopilot/probes.rs b/cli/src/commands/autopilot/probes.rs index 8105ba4b..ea2b4dd1 100644 --- a/cli/src/commands/autopilot/probes.rs +++ b/cli/src/commands/autopilot/probes.rs @@ -129,6 +129,41 @@ pub trait ProbeEnvironment: Send + Sync { fn can_read_path(&self, path: &Path) -> Result<(), String>; fn current_username(&self) -> Option; fn can_bind_addr(&self, addr: &str) -> Result<(), String>; + /// Linux distro ID parsed from /etc/os-release (e.g. "amzn", "ubuntu", "debian", "fedora"). + /// Returns None on non-Linux hosts or when /etc/os-release is unavailable/unparseable. + fn os_id(&self) -> Option { + let contents = self.read_to_string(Path::new("/etc/os-release")).ok()?; + parse_os_release_id(&contents) + } +} + +pub(crate) fn parse_os_release_id(contents: &str) -> Option { + for line in contents.lines() { + if let Some(value) = line.strip_prefix("ID=") { + let trimmed = value.trim().trim_matches('"').trim_matches('\''); + if !trimmed.is_empty() { + return Some(trimmed.to_ascii_lowercase()); + } + } + } + None +} + +fn docker_install_remediation(os_id: Option<&str>) -> (String, String) { + match os_id { + Some("amzn" | "rhel" | "fedora" | "rocky" | "almalinux" | "centos") => ( + "Install Docker, then rerun stakpak up".to_string(), + "sudo dnf install -y docker && sudo systemctl enable --now docker && sudo usermod -aG docker $USER".to_string(), + ), + Some("ubuntu" | "debian") => ( + "Install Docker, then rerun stakpak up".to_string(), + "sudo apt-get update && sudo apt-get install -y docker.io && sudo usermod -aG docker $USER".to_string(), + ), + _ => ( + "Install Docker for your distribution, then rerun stakpak up".to_string(), + "See https://docs.docker.com/engine/install/ — after install, run: sudo usermod -aG docker $USER".to_string(), + ), + } } pub struct RealProbeEnvironment; @@ -197,7 +232,7 @@ pub fn run_autopilot_probes( let docker_installed_ok = docker_installed.status == ProbeStatus::Pass; results.push(docker_installed); - results.push(if docker_installed_ok { + let docker_accessible = if docker_installed_ok { probe_docker_accessible(env) } else { ProbeResult { @@ -209,7 +244,15 @@ pub fn run_autopilot_probes( details: None, remediation: None, } - }); + }; + let docker_accessible_ok = docker_accessible.status == ProbeStatus::Pass; + results.push(docker_accessible); + + // ProbeMode currently only has Startup/Doctor — both want this check, so no + // mode gating needed today. Revisit if a lighter mode (e.g. Healthcheck) is added. + if docker_accessible_ok { + results.push(probe_docker_user_systemd(env)); + } results.push(probe_memory(env)); @@ -268,29 +311,33 @@ pub fn probe_credentials(ctx: &AutopilotProbeContext<'_>) -> ProbeResult { } pub fn probe_docker_installed(env: &dyn ProbeEnvironment) -> ProbeResult { - match env.command_output("docker", &["--version"]) { - Ok(snapshot) if snapshot.success => ProbeResult { + let snapshot = env.command_output("docker", &["--version"]); + if let Ok(ref snap) = snapshot + && snap.success + { + return ProbeResult { id: "docker_installed", title: "Docker", severity: ProbeSeverity::Blocking, status: ProbeStatus::Pass, summary: "Docker is installed".to_string(), - details: first_non_empty_line(&snapshot.stdout), + details: first_non_empty_line(&snap.stdout), remediation: None, - }, - Ok(snapshot) => ProbeResult { + }; + } + + let (summary, command) = docker_install_remediation(env.os_id().as_deref()); + match snapshot { + Ok(snap) => ProbeResult { id: "docker_installed", title: "Docker", severity: ProbeSeverity::Blocking, status: ProbeStatus::Fail, summary: "Docker is installed but failed to report its version".to_string(), - details: command_details(&snapshot), + details: command_details(&snap), remediation: Some(Remediation::Manual { - summary: "Reinstall or repair Docker, then rerun autopilot".to_string(), - command: Some( - "sudo apt-get install -y docker.io && sudo usermod -aG docker $USER" - .to_string(), - ), + summary: format!("Reinstall or repair Docker. {summary}"), + command: Some(command), }), }, Err(error) => ProbeResult { @@ -301,11 +348,8 @@ pub fn probe_docker_installed(env: &dyn ProbeEnvironment) -> ProbeResult { summary: "Docker is not installed".to_string(), details: Some(format!("Command error: {error}")), remediation: Some(Remediation::Manual { - summary: "Install Docker, then rerun stakpak up".to_string(), - command: Some( - "sudo apt-get install -y docker.io && sudo usermod -aG docker $USER" - .to_string(), - ), + summary, + command: Some(command), }), }, } @@ -511,6 +555,84 @@ pub fn probe_bind_port( } } +/// Detect the silent-failure case where the calling shell can reach the Docker +/// daemon but the user's systemd manager cannot — typically after `usermod -aG +/// docker $USER` was run without restarting `user@UID.service`. The systemd +/// manager retains the old (group-less) credentials, so any service it launches +/// (including autopilot) hits "permission denied on /var/run/docker.sock" and +/// crash-loops without surfacing a useful error. +pub fn probe_docker_user_systemd(env: &dyn ProbeEnvironment) -> ProbeResult { + let make = |severity, status, summary: String, details, remediation| ProbeResult { + id: "docker_user_systemd", + title: "Docker access via systemd user manager", + severity, + status, + summary, + details, + remediation, + }; + + if !env.path_exists(Path::new("/etc/os-release")) { + return make( + ProbeSeverity::Info, + ProbeStatus::Skip, + "Probe is only available on Linux hosts".to_string(), + None, + None, + ); + } + + let snapshot = env.command_output( + "systemd-run", + &[ + "--user", + "--pipe", + "--wait", + "--quiet", + "--collect", + "docker", + "ps", + ], + ); + match snapshot { + Ok(snap) if snap.success => make( + ProbeSeverity::Blocking, + ProbeStatus::Pass, + "systemd user manager can reach the Docker daemon".to_string(), + None, + None, + ), + Ok(snap) if combined_output(&snap).to_ascii_lowercase().contains("permission denied") => { + make( + ProbeSeverity::Blocking, + ProbeStatus::Fail, + "systemd user manager cannot reach Docker (likely stale group membership)".to_string(), + command_details(&snap), + Some(Remediation::Manual { + summary: "Restart the user systemd manager so it picks up docker group membership".to_string(), + command: Some( + "sudo systemctl restart user@$(id -u).service && systemd-run --user --pipe --wait --quiet docker ps".to_string(), + ), + }), + ) + } + Ok(snap) => make( + ProbeSeverity::Info, + ProbeStatus::Skip, + "Unable to verify Docker access from systemd user manager".to_string(), + command_details(&snap), + None, + ), + Err(error) => make( + ProbeSeverity::Info, + ProbeStatus::Skip, + "systemd-run is unavailable; skipping check".to_string(), + Some(error), + None, + ), + } +} + pub fn probe_systemd_linger(env: &dyn ProbeEnvironment) -> ProbeResult { let username = match env.current_username() { Some(value) => value, @@ -1013,6 +1135,117 @@ mod tests { } } + #[test] + fn docker_user_systemd_flags_stale_group_membership() { + let env = MockProbeEnvironment::default() + .with_file("/etc/os-release", "ID=ubuntu\n") + .with_command( + "systemd-run", + &[ + "--user", + "--pipe", + "--wait", + "--quiet", + "--collect", + "docker", + "ps", + ], + Ok(CommandSnapshot { + success: false, + stdout: String::new(), + stderr: "permission denied while trying to connect to the Docker daemon socket" + .to_string(), + }), + ); + + let result = probe_docker_user_systemd(&env); + assert!(result.is_blocking_failure()); + assert!(result.summary.contains("stale group membership")); + let remediation = result.remediation.expect("remediation"); + match remediation { + Remediation::Manual { command, .. } => { + let command = command.expect("command"); + assert!(command.contains("systemctl restart user@")); + } + Remediation::Suggested { .. } => panic!("expected manual remediation"), + } + } + + #[test] + fn docker_user_systemd_skips_when_systemd_run_missing() { + let env = MockProbeEnvironment::default().with_file("/etc/os-release", "ID=ubuntu\n"); + + let result = probe_docker_user_systemd(&env); + assert_eq!(result.status, ProbeStatus::Skip); + } + + #[test] + fn docker_user_systemd_skips_on_non_linux() { + let env = MockProbeEnvironment::default(); + + let result = probe_docker_user_systemd(&env); + assert_eq!(result.status, ProbeStatus::Skip); + assert!(result.summary.contains("only available on Linux")); + } + + #[test] + fn parse_os_release_id_handles_quoting_and_case() { + assert_eq!( + parse_os_release_id("NAME=Ubuntu\nID=ubuntu\nVERSION=\"22.04\"\n").as_deref(), + Some("ubuntu") + ); + assert_eq!( + parse_os_release_id("ID=\"amzn\"\n").as_deref(), + Some("amzn") + ); + assert_eq!( + parse_os_release_id("ID='fedora'\n").as_deref(), + Some("fedora") + ); + assert_eq!( + parse_os_release_id("ID=Debian\n").as_deref(), + Some("debian"), + "ID values should be lowercased for matching" + ); + } + + #[test] + fn parse_os_release_id_returns_none_when_missing() { + assert_eq!(parse_os_release_id(""), None); + assert_eq!(parse_os_release_id("NAME=Foo\n"), None); + assert_eq!(parse_os_release_id("ID=\n"), None); + } + + #[test] + fn docker_install_remediation_picks_dnf_for_rhel_family() { + for id in ["amzn", "rhel", "fedora", "rocky", "almalinux", "centos"] { + let (_, command) = docker_install_remediation(Some(id)); + assert!(command.contains("dnf install"), "{id} should use dnf"); + } + } + + #[test] + fn docker_install_remediation_picks_apt_for_debian_family() { + for id in ["ubuntu", "debian"] { + let (_, command) = docker_install_remediation(Some(id)); + assert!( + command.contains("apt-get install"), + "{id} should use apt-get" + ); + assert!( + command.contains("apt-get update"), + "{id} should refresh apt cache before install" + ); + } + } + + #[test] + fn docker_install_remediation_falls_back_to_docs_link() { + let (summary, command) = docker_install_remediation(None); + assert!(summary.contains("for your distribution")); + assert!(command.contains("docs.docker.com/engine/install")); + } + #[test] fn memory_blocks_on_small_host_without_swap() { let env = MockProbeEnvironment::default().with_file( diff --git a/cli/src/main.rs b/cli/src/main.rs index 29ad8894..3358a5d2 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -74,6 +74,7 @@ fn config_has_any_auth_flags(has_stakpak_key: bool, has_provider_keys: bool) -> #[derive(Parser, PartialEq)] #[command(name = "stakpak")] +#[command(version = env!("CARGO_PKG_VERSION"))] #[command(about = "Stakpak CLI tool", long_about = None)] struct Cli { /// Run the agent for a single step and print the response diff --git a/cli/src/prompts/system_prompt.v1.md b/cli/src/prompts/system_prompt.v1.md index 8ea6199f..bf93a56f 100644 --- a/cli/src/prompts/system_prompt.v1.md +++ b/cli/src/prompts/system_prompt.v1.md @@ -462,66 +462,7 @@ stakpak autopilot channel add telegram --token $TELEGRAM_BOT_TOKEN stakpak autopilot channel add discord --token $DISCORD_BOT_TOKEN ``` -### Slack App Setup (recommended: use manifest) -When helping users set up a Slack channel, **always recommend the manifest-based approach** — it's faster and less error-prone than manual scope/event configuration. - -**Steps to guide users through:** -1. Go to [api.slack.com/apps](https://api.slack.com/apps) → **Create New App** → **From an app manifest** -2. Select the target workspace -3. Paste the following Slack App Manifest YAML: - -```yaml -display_information: - name: Stakpak - description: AI agent for infrastructure operations - background_color: "#1a1a2e" - -features: - bot_user: - display_name: Stakpak - always_online: true - app_home: - home_tab_enabled: false - messages_tab_enabled: true - messages_tab_read_only_enabled: false - -oauth_config: - scopes: - bot: - - chat:write - - reactions:read - - reactions:write - - channels:read - - groups:read - - im:read - - mpim:read - - channels:history - - groups:history - - im:history - - mpim:history - - app_mentions:read - -settings: - event_subscriptions: - bot_events: - - message.channels - - message.groups - - message.im - - app_mention - interactivity: - is_enabled: true - org_deploy_enabled: false - socket_mode_enabled: true - token_rotation_enabled: false -``` - -4. Review and create the app -5. **Basic Information** → **App-Level Tokens** → generate a token with `connections:write` scope → this is the `xapp-*` token -6. **Install to Workspace** → copy the **Bot User OAuth Token** (`xoxb-*`) -7. Run: `stakpak autopilot channel add slack --bot-token "$SLACK_BOT_TOKEN" --app-token "$SLACK_APP_TOKEN"` -8. Verify: `stakpak autopilot channel test` - -**If the user already has a Slack app** and just needs to fix permissions, direct them to add the missing scopes under OAuth & Permissions and re-install the app. +**Token setup:** before asking the user for any channel token, run `stakpak autopilot channel add --help` and walk them through the `HOW TO GET TOKENS` section. After setup, verify with `stakpak autopilot channel test`. **IMPORTANT:** Always use `stakpak up` to start and install the system service. Do NOT manually create systemd unit files or launchd plist files. diff --git a/libs/api/src/local/hooks/task_board_context/system_prompt.txt b/libs/api/src/local/hooks/task_board_context/system_prompt.txt index d28de309..4ea8574e 100644 --- a/libs/api/src/local/hooks/task_board_context/system_prompt.txt +++ b/libs/api/src/local/hooks/task_board_context/system_prompt.txt @@ -455,6 +455,8 @@ stakpak autopilot channel add telegram --token $TELEGRAM_BOT_TOKEN stakpak autopilot channel add discord --token $DISCORD_BOT_TOKEN ``` +**Token setup:** before asking the user for any channel token, run `stakpak autopilot channel add --help` and walk them through the `HOW TO GET TOKENS` section. After setup, verify with `stakpak autopilot channel test`. + **IMPORTANT:** Always use `stakpak up` to start and install the system service. Do NOT manually create systemd unit files or launchd plist files. **Production trust model:** When setting up autopilot on production servers, recommend starting with **read-only IAM permissions** (e.g., `ReadOnlyAccess`, `ViewOnlyAccess`, or equivalent). This lets the user build confidence in autopilot's behavior before granting write access. Escalate permissions only after the user explicitly requests mutating actions (e.g., auto-remediation, scaling).