diff --git a/.claude/skills/tools-speckit-init/SKILL.md b/.claude/skills/tools-speckit-init/SKILL.md new file mode 100644 index 00000000..04f9bc7a --- /dev/null +++ b/.claude/skills/tools-speckit-init/SKILL.md @@ -0,0 +1,50 @@ +--- +name: tools-speckit-init +description: Initialize a new directory with spec-kit (specify) and install the spec-kit-sync extension +argument-hint: (e.g. my-new-component) +--- + +This skill creates a new directory, initializes it with `specify init . --ai claude`, and installs the `spec-kit-sync` extension. + +## Input + +The user must provide a directory name as an argument (e.g. `/tools-speckit-init my-new-component`). + +If no argument is provided, show the hint and stop. + +## Prerequisites + +- `specify` must be installed and available on PATH (`which specify` should succeed) +- If not installed, inform the user that `specify` is required and stop + +## Steps + +1. Verify that `specify` is installed by running `which specify`. If it fails, tell the user to install specify first and stop. + +2. Check that the target directory does not already exist. If it does, inform the user and stop. + +3. Create the new directory: + ```bash + mkdir -p + ``` + +4. Initialize spec-kit in the new directory: + ```bash + cd && specify init . --ai claude + ``` + +5. Install the spec-kit-sync extension: + ```bash + cd && specify extension add spec-kit-sync --from https://github.com/bgervin/spec-kit-sync/archive/refs/heads/master.zip + ``` + +6. Report success and tell the user: + - The directory has been created and initialized + - spec-kit-sync extension has been installed + - They can now use specify commands in the new directory (e.g. `/speckit-specify`, `/speckit-clarify`) + - The path to the new directory + +## Notes + +- The directory is created relative to the current working directory +- If `specify init` or the extension install fails, report the error to the user with the command output diff --git a/Cargo.toml b/Cargo.toml index a8a55432..8f01317a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,7 +28,8 @@ members = [ "apps/gpu-bb-vs-p2p", "certus-connector", "components/example-helloworld-dylib", - "apps/dynamic-loading-example" + "apps/dynamic-loading-example", + "apps/certus-server-yaml" ] # SPDK crates require pre-built native libraries at deps/spdk-build/. diff --git a/apps/certus-server-yaml/Cargo.toml b/apps/certus-server-yaml/Cargo.toml new file mode 100644 index 00000000..d9eb19e7 --- /dev/null +++ b/apps/certus-server-yaml/Cargo.toml @@ -0,0 +1,29 @@ +[package] +name = "certus-server-yaml" +version = "0.1.0" +edition.workspace = true +rust-version.workspace = true +publish = false + +[dependencies] +interfaces = { workspace = true, features = ["spdk"] } +component-framework.workspace = true +component-core.workspace = true +dispatcher.workspace = true +dispatch-map.workspace = true +memory-tier.workspace = true +block-device-spdk-nvme.workspace = true +extent-manager.workspace = true +spdk-env.workspace = true +gpu-services = { workspace = true, features = ["gpu"] } +logger.workspace = true + +tonic = { version = "0.12", features = ["tls"] } +prost = "0.13" +tokio = { version = "1", features = ["full"] } +clap = { version = "4", features = ["derive"] } + +[build-dependencies] +serde = { version = "1", features = ["derive"] } +serde_yaml = "0.9" +tonic-build = "0.12" diff --git a/apps/certus-server-yaml/build.rs b/apps/certus-server-yaml/build.rs new file mode 100644 index 00000000..dd682322 --- /dev/null +++ b/apps/certus-server-yaml/build.rs @@ -0,0 +1,390 @@ +use std::collections::{HashMap, HashSet}; +use std::fmt::Write as FmtWrite; +use std::path::PathBuf; +use std::process::Command; +use std::{env, fs}; + +use serde::Deserialize; + +// --- YAML schema types --- + +#[derive(Deserialize)] +struct ProfileManifest { + profile: ProfileMeta, + components: HashMap, + wiring: Vec, + init_order: Vec, + exports: Vec, +} + +#[derive(Deserialize)] +struct ProfileMeta { + name: String, + description: String, +} + +#[derive(Deserialize)] +struct ComponentDecl { + #[serde(rename = "crate")] + crate_name: String, + factory: String, + provides: Vec, + /// Override the Rust module path for the provided trait(s). + /// Defaults to "interfaces" if not specified. + trait_path: Option, + init_hook: Option, +} + +#[derive(Deserialize)] +struct WiringEntry { + target: String, + source: String, +} + +#[derive(Deserialize)] +struct ExportEntry { + component: String, + interface: String, +} + +// --- Validation --- + +fn validate_manifest(manifest: &ProfileManifest) { + let component_names: HashSet<&str> = manifest.components.keys().map(|s| s.as_str()).collect(); + + // Validate wiring references + for entry in &manifest.wiring { + let parts: Vec<&str> = entry.target.split('.').collect(); + if parts.len() != 2 { + panic!( + "invalid wiring target '{}': expected 'component.receptacle'", + entry.target + ); + } + if !component_names.contains(parts[0]) { + panic!( + "wiring target references unknown component '{}'", + parts[0] + ); + } + if !component_names.contains(entry.source.as_str()) { + panic!( + "wiring source references unknown component '{}'", + entry.source + ); + } + } + + // Validate init_order covers all components with hooks + let init_set: HashSet<&str> = manifest.init_order.iter().map(|s| s.as_str()).collect(); + for (name, decl) in &manifest.components { + if decl.init_hook.is_some() && !init_set.contains(name.as_str()) { + panic!( + "component '{}' has init_hook but is missing from init_order", + name + ); + } + if !init_set.contains(name.as_str()) && decl.init_hook.is_none() { + // OK — components without hooks don't need to be in init_order + } + } + + // Validate init_order only references declared components + for name in &manifest.init_order { + if !component_names.contains(name.as_str()) { + panic!( + "init_order references unknown component '{}'", + name + ); + } + } + + // Validate exports + for export in &manifest.exports { + if !component_names.contains(export.component.as_str()) { + panic!( + "export references unknown component '{}'", + export.component + ); + } + let decl = &manifest.components[&export.component]; + if !decl.provides.contains(&export.interface) { + panic!( + "export interface '{}' not in component '{}' provides list", + export.interface, export.component + ); + } + } +} + +// --- Code generation --- + +fn rust_crate_ident(crate_name: &str) -> String { + crate_name.replace('-', "_") +} + +fn interface_to_trait(iface: &str) -> String { + iface.to_string() +} + +fn ensure_callable(factory: &str) -> String { + let trimmed = factory.trim(); + if trimmed.ends_with(')') { + trimmed.to_string() + } else { + format!("{trimmed}()") + } +} + +fn generate_composition(manifest: &ProfileManifest) -> String { + let mut code = String::new(); + + writeln!( + code, + "// AUTO-GENERATED from profiles/{}.yaml — do not edit manually", + manifest.profile.name + ) + .unwrap(); + writeln!(code, "// Profile: {} — {}", manifest.profile.name, manifest.profile.description).unwrap(); + writeln!(code).unwrap(); + writeln!(code, "use component_core::query_interface;").unwrap(); + writeln!(code).unwrap(); + + // Generate ComponentStack struct + writeln!(code, "pub struct ComponentStack {{").unwrap(); + for export in &manifest.exports { + let trait_name = interface_to_trait(&export.interface); + let export_decl = &manifest.components[&export.component]; + let trait_mod = export_decl + .trait_path + .as_deref() + .unwrap_or("interfaces"); + let trait_mod = rust_crate_ident(trait_mod); + writeln!( + code, + " pub {}: std::sync::Arc,", + export.component, trait_mod, trait_name + ) + .unwrap(); + } + writeln!(code, "}}").unwrap(); + writeln!(code).unwrap(); + + // Generate build_stack function + writeln!(code, "#[allow(unused_imports, unused_variables)]").unwrap(); + writeln!( + code, + "pub fn build_stack(config: &crate::config::StackConfig) -> Result {{" + ) + .unwrap(); + + // --- Instantiation phase --- + writeln!(code, " // --- Instantiate components ---").unwrap(); + for name in &manifest.init_order { + let decl = &manifest.components[name]; + let crate_ident = rust_crate_ident(&decl.crate_name); + let factory_call = ensure_callable(&decl.factory); + writeln!( + code, + " let comp_{name} = {crate_ident}::{factory_call};", + ) + .unwrap(); + } + // Instantiate components not in init_order (those without hooks) + for (name, decl) in &manifest.components { + if !manifest.init_order.contains(name) { + let crate_ident = rust_crate_ident(&decl.crate_name); + let factory_call = ensure_callable(&decl.factory); + writeln!( + code, + " let comp_{name} = {crate_ident}::{factory_call};", + ) + .unwrap(); + } + } + writeln!(code).unwrap(); + + // --- Query interfaces phase --- + writeln!(code, " // --- Query interfaces ---").unwrap(); + for (name, decl) in &manifest.components { + let trait_mod = decl + .trait_path + .as_deref() + .unwrap_or("interfaces"); + let trait_mod = rust_crate_ident(trait_mod); + for iface in &decl.provides { + let trait_name = interface_to_trait(iface); + let var_name = format!("iface_{}_{}", name, iface.to_lowercase().trim_start_matches('i')); + writeln!( + code, + " let {var_name}: std::sync::Arc = query_interface!(comp_{name}, {trait_mod}::{trait_name})" + ) + .unwrap(); + writeln!( + code, + " .ok_or(\"failed to query {trait_name} from {name}\")?;" + ) + .unwrap(); + } + } + writeln!(code).unwrap(); + + // --- Wiring phase --- + writeln!(code, " // --- Wire receptacles ---").unwrap(); + for entry in &manifest.wiring { + let parts: Vec<&str> = entry.target.split('.').collect(); + let target_comp = parts[0]; + let receptacle = parts[1]; + let source_comp = &entry.source; + + // Find the interface provided by the source that matches + let source_decl = &manifest.components[source_comp]; + let source_iface = &source_decl.provides[0]; // Use first provided interface + let iface_var = format!( + "iface_{}_{}", + source_comp, + source_iface.to_lowercase().trim_start_matches('i') + ); + + writeln!( + code, + " comp_{target_comp}.{receptacle}.connect(std::sync::Arc::clone(&{iface_var}))" + ) + .unwrap(); + writeln!( + code, + " .map_err(|e| format!(\"{target_comp}.{receptacle}: {{e}}\"))?;", + ) + .unwrap(); + } + writeln!(code).unwrap(); + + // --- Initialization phase --- + writeln!(code, " // --- Initialize (in declared order) ---").unwrap(); + for name in &manifest.init_order { + let decl = &manifest.components[name]; + if let Some(hook) = &decl.init_hook { + let iface = &decl.provides[0]; + let iface_var = format!( + "iface_{}_{}", + name, + iface.to_lowercase().trim_start_matches('i') + ); + writeln!( + code, + " crate::hooks::{hook}(&{iface_var}, config)?;" + ) + .unwrap(); + } + } + writeln!(code).unwrap(); + + // --- Return exports --- + writeln!(code, " Ok(ComponentStack {{").unwrap(); + for export in &manifest.exports { + let iface_var = format!( + "iface_{}_{}", + export.component, + export.interface.to_lowercase().trim_start_matches('i') + ); + writeln!(code, " {}: {iface_var},", export.component).unwrap(); + } + writeln!(code, " }})").unwrap(); + writeln!(code, "}}").unwrap(); + + code +} + +// --- Protoc helpers (from certus-server) --- + +const PROTOC_VERSION: &str = "25.1"; + +fn find_protoc() -> Option { + if let Ok(p) = env::var("PROTOC") { + let path = PathBuf::from(&p); + if path.exists() { + return Some(path); + } + } + if let Ok(output) = Command::new("which").arg("protoc").output() { + if output.status.success() { + let path = PathBuf::from(String::from_utf8_lossy(&output.stdout).trim()); + if path.exists() { + return Some(path); + } + } + } + None +} + +fn download_protoc() -> PathBuf { + let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap()); + let protoc_dir = out_dir.join("protoc"); + let protoc_bin = protoc_dir.join("bin").join("protoc"); + + if protoc_bin.exists() { + return protoc_bin; + } + + let url = format!( + "https://github.com/protocolbuffers/protobuf/releases/download/v{}/protoc-{}-linux-x86_64.zip", + PROTOC_VERSION, PROTOC_VERSION + ); + + let zip_path = out_dir.join("protoc.zip"); + + let status = Command::new("curl") + .args(["-sL", "-o"]) + .arg(&zip_path) + .arg(&url) + .status() + .expect("failed to run curl"); + assert!(status.success(), "failed to download protoc from {url}"); + + fs::create_dir_all(&protoc_dir).unwrap(); + let status = Command::new("unzip") + .args(["-q", "-o"]) + .arg(&zip_path) + .arg("-d") + .arg(&protoc_dir) + .status() + .expect("failed to run unzip"); + assert!(status.success(), "failed to unzip protoc"); + + fs::remove_file(&zip_path).ok(); + protoc_bin +} + +// --- Main --- + +fn main() -> Result<(), Box> { + // 1. Determine profile + let profile = env::var("CERTUS_PROFILE").unwrap_or_else(|_| "full".into()); + let manifest_path = format!("profiles/{profile}.yaml"); + + println!("cargo:rerun-if-changed={manifest_path}"); + println!("cargo:rerun-if-env-changed=CERTUS_PROFILE"); + println!("cargo:rerun-if-changed=proto/dispatcher.proto"); + + // 2. Parse and validate manifest + let yaml_content = fs::read_to_string(&manifest_path) + .unwrap_or_else(|e| panic!("cannot read profile '{manifest_path}': {e}")); + let manifest: ProfileManifest = serde_yaml::from_str(&yaml_content) + .unwrap_or_else(|e| panic!("invalid YAML in '{manifest_path}': {e}")); + validate_manifest(&manifest); + + // 3. Generate composition code + let composition_code = generate_composition(&manifest); + let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap()); + fs::write(out_dir.join("composition.rs"), &composition_code)?; + + // 4. Compile protobuf + let protoc = find_protoc().unwrap_or_else(|| { + eprintln!("cargo:warning=protoc not found, downloading v{PROTOC_VERSION}..."); + download_protoc() + }); + env::set_var("PROTOC", &protoc); + tonic_build::compile_protos("proto/dispatcher.proto")?; + + Ok(()) +} diff --git a/apps/certus-server-yaml/profiles/full.yaml b/apps/certus-server-yaml/profiles/full.yaml new file mode 100644 index 00000000..ebeefe7d --- /dev/null +++ b/apps/certus-server-yaml/profiles/full.yaml @@ -0,0 +1,72 @@ +profile: + name: full + description: "Production profile with SPDK NVMe + GPU acceleration" + +components: + logger: + crate: logger + factory: "LoggerComponent::new_default" + provides: [ILogger] + + spdk_env: + crate: spdk-env + factory: "SPDKEnvComponent::new_default" + provides: [ISPDKEnv] + trait_path: spdk-env + init_hook: init_spdk_env + + gpu: + crate: gpu-services + factory: "GpuServicesComponent::new_default" + provides: [IGpuServices] + init_hook: init_gpu + + dispatch_map: + crate: dispatch-map + factory: "DispatchMapComponent::new(Default::default())" + provides: [IDispatchMap] + init_hook: init_dispatch_map + + memory_tier: + crate: memory-tier + factory: "MemoryTierComponent::new_default" + provides: [IMemoryTier] + init_hook: init_memory_tier + + dispatcher: + crate: dispatcher + factory: "DispatcherComponent::new_default" + provides: [IDispatcher] + init_hook: init_dispatcher + +wiring: + - target: gpu.logger + source: logger + - target: dispatch_map.logger + source: logger + - target: memory_tier.logger + source: logger + - target: dispatcher.logger + source: logger + - target: dispatcher.dispatch_map + source: dispatch_map + - target: dispatcher.memory_tier + source: memory_tier + - target: dispatcher.gpu_services + source: gpu + - target: dispatcher.spdk_env + source: spdk_env + +init_order: + - logger + - spdk_env + - gpu + - dispatch_map + - memory_tier + - dispatcher + +exports: + - component: logger + interface: ILogger + - component: dispatcher + interface: IDispatcher diff --git a/apps/certus-server-yaml/profiles/minimal.yaml b/apps/certus-server-yaml/profiles/minimal.yaml new file mode 100644 index 00000000..731789a0 --- /dev/null +++ b/apps/certus-server-yaml/profiles/minimal.yaml @@ -0,0 +1,18 @@ +profile: + name: minimal + description: "Minimal profile — logger only, no hardware dependencies" + +components: + logger: + crate: logger + factory: "LoggerComponent::new_default" + provides: [ILogger] + +wiring: [] + +init_order: + - logger + +exports: + - component: logger + interface: ILogger diff --git a/apps/certus-server-yaml/proto/dispatcher.proto b/apps/certus-server-yaml/proto/dispatcher.proto new file mode 100644 index 00000000..5251ffa3 --- /dev/null +++ b/apps/certus-server-yaml/proto/dispatcher.proto @@ -0,0 +1,126 @@ +syntax = "proto3"; + +package certus.dispatcher.v1; + +// Service exposing IDispatcher data operations as batch-capable RPCs. +service Dispatcher { + // Populate cache entries by DMA-copying from GPU memory. + rpc Populate(BatchPopulateRequest) returns (BatchPopulateResponse); + + // Look up cache entries and DMA-copy data to client GPU memory. + rpc Lookup(BatchLookupRequest) returns (BatchLookupResponse); + + // Check whether cache entries exist without transferring data. + rpc Check(BatchCheckRequest) returns (BatchCheckResponse); + + // Remove cache entries, freeing associated resources. + rpc Remove(BatchRemoveRequest) returns (BatchRemoveResponse); + + // Touch cache entries, updating eviction timestamps without DMA. + rpc Touch(BatchTouchRequest) returns (BatchTouchResponse); + + // Clear all entries from the memory-tier cache pool. + rpc ClearMemoryTier(ClearMemoryTierRequest) returns (ClearMemoryTierResponse); +} + +// --- Common types --- + +message IpcHandle { + // CUDA IPC memory handle (64 bytes from cudaIpcGetMemHandle) for cross-process GPU sharing. + // The server opens this handle to obtain a device pointer in its own CUDA context. + bytes cuda_ipc_handle = 1; + uint32 size = 2; // Data size in bytes + int32 gpu_device_id = 3; // CUDA device ordinal that allocated this memory +} + +enum ErrorCode { + ERROR_CODE_UNSPECIFIED = 0; + ERROR_CODE_NOT_INITIALIZED = 1; + ERROR_CODE_KEY_NOT_FOUND = 2; + ERROR_CODE_ALREADY_EXISTS = 3; + ERROR_CODE_ALLOCATION_FAILED = 4; + ERROR_CODE_IO_ERROR = 5; + ERROR_CODE_TIMEOUT = 6; + ERROR_CODE_INVALID_PARAMETER = 7; + ERROR_CODE_DUPLICATE_KEY = 8; +} + +message EntryResult { + uint64 key = 1; + bool success = 2; + ErrorCode error_code = 3; + string error_message = 4; +} + +// --- Populate --- + +message PopulateEntry { + uint64 key = 1; + IpcHandle ipc_handle = 2; +} + +message BatchPopulateRequest { + repeated PopulateEntry entries = 1; +} + +message BatchPopulateResponse { + repeated EntryResult results = 1; +} + +// --- Lookup --- + +message LookupEntry { + uint64 key = 1; + IpcHandle ipc_handle = 2; +} + +message BatchLookupRequest { + repeated LookupEntry entries = 1; +} + +message BatchLookupResponse { + repeated EntryResult results = 1; +} + +// --- Check --- + +message BatchCheckRequest { + repeated uint64 keys = 1; +} + +message CheckResult { + uint64 key = 1; + bool exists = 2; +} + +message BatchCheckResponse { + repeated CheckResult results = 1; +} + +// --- Remove --- + +message BatchRemoveRequest { + repeated uint64 keys = 1; +} + +message BatchRemoveResponse { + repeated EntryResult results = 1; +} + +// --- Touch --- + +message BatchTouchRequest { + repeated uint64 keys = 1; +} + +message BatchTouchResponse { + repeated EntryResult results = 1; +} + +// --- ClearMemoryTier --- + +message ClearMemoryTierRequest {} + +message ClearMemoryTierResponse { + uint64 entries_cleared = 1; +} diff --git a/apps/certus-server-yaml/src/config.rs b/apps/certus-server-yaml/src/config.rs new file mode 100644 index 00000000..02c05a03 --- /dev/null +++ b/apps/certus-server-yaml/src/config.rs @@ -0,0 +1,15 @@ +use std::cell::RefCell; + +/// Runtime configuration passed to component init hooks. +/// +/// `resolved_pci_addrs` is populated by `init_spdk_env` after device discovery +/// and consumed by `init_dispatcher`. Uses interior mutability because the +/// generated composition code passes `&StackConfig` (shared ref) to all hooks. +pub struct StackConfig { + pub device_pci: Vec, + pub drive_count: Option, + pub memory_tier_size: usize, + pub format: bool, + pub poller_base_cpu: Option, + pub resolved_pci_addrs: RefCell>, +} diff --git a/apps/certus-server-yaml/src/hooks.rs b/apps/certus-server-yaml/src/hooks.rs new file mode 100644 index 00000000..ae045500 --- /dev/null +++ b/apps/certus-server-yaml/src/hooks.rs @@ -0,0 +1,112 @@ +//! Init hooks — typed functions called by the generated composition code. +//! +//! Each hook receives the queried interface Arc and the runtime StackConfig, +//! performing component-specific initialization logic. + +use std::sync::Arc; + +use interfaces::{ + DmaAllocFn, DmaBuffer, DispatcherConfig, IDispatchMap, IDispatcher, IGpuServices, IMemoryTier, +}; +use spdk_env::ISPDKEnv; + +use crate::config::StackConfig; + +const NVME_CLASS_CODE: u32 = 0x010802; + +pub fn init_spdk_env( + iface: &Arc, + config: &StackConfig, +) -> Result<(), String> { + iface.init().map_err(|e| format!("SPDK init failed: {e}"))?; + + // Resolve device addresses: explicit list or auto-discover via SPDK. + let addrs = if !config.device_pci.is_empty() { + config.device_pci.clone() + } else { + let count = config.drive_count.unwrap_or(1); + let devices = iface.devices(); + let mut nvme_devices: Vec<_> = devices + .iter() + .filter(|d| d.id.class_id == NVME_CLASS_CODE) + .collect(); + nvme_devices.sort_by_key(|d| if d.numa_node == 0 { 0 } else { 1 }); + if nvme_devices.len() < count { + return Err(format!( + "--drive-count={count} but only {} NVMe device(s) discovered", + nvme_devices.len() + )); + } + nvme_devices[..count] + .iter() + .map(|d| d.address.to_string()) + .collect() + }; + + *config.resolved_pci_addrs.borrow_mut() = addrs; + Ok(()) +} + +pub fn init_gpu( + iface: &Arc, + _config: &StackConfig, +) -> Result<(), String> { + iface.initialize().map_err(|e| format!("GPU init failed: {e}")) +} + +pub fn init_dispatch_map( + iface: &Arc, + _config: &StackConfig, +) -> Result<(), String> { + let dma_alloc: DmaAllocFn = Arc::new(move |size, align, _numa| { + DmaBuffer::new(size, align, None).map_err(|e| e.to_string()) + }); + iface.set_dma_alloc(dma_alloc); + iface + .initialize() + .map_err(|e| format!("DispatchMap init failed: {e}")) +} + +pub fn init_memory_tier( + iface: &Arc, + config: &StackConfig, +) -> Result<(), String> { + iface + .initialize(config.memory_tier_size) + .map_err(|e| format!("MemoryTier init failed: {e}"))?; + + // Register the memory-tier pool with CUDA for pinned DMA transfers. + if let Some((pool_ptr, pool_size)) = iface.pool_info() { + let err = unsafe { + gpu_services::cuda_ffi::cudaHostRegister( + pool_ptr as *mut std::ffi::c_void, + pool_size, + 0, + ) + }; + if err != gpu_services::cuda_ffi::CUDA_SUCCESS { + eprintln!( + "warning: cudaHostRegister failed (err={err}), \ + memory-tier transfers will use staged path" + ); + } + } + + Ok(()) +} + +pub fn init_dispatcher( + iface: &Arc, + config: &StackConfig, +) -> Result<(), String> { + let data_pci_addrs = config.resolved_pci_addrs.borrow().clone(); + + iface + .initialize(DispatcherConfig { + data_pci_addrs, + format_on_init: config.format, + poller_base_cpu: config.poller_base_cpu, + ..Default::default() + }) + .map_err(|e| format!("Dispatcher init failed: {e}")) +} diff --git a/apps/certus-server-yaml/src/main.rs b/apps/certus-server-yaml/src/main.rs new file mode 100644 index 00000000..e0c3d7d7 --- /dev/null +++ b/apps/certus-server-yaml/src/main.rs @@ -0,0 +1,177 @@ +//! Certus gRPC Server — YAML-Composed +//! +//! Drop-in replacement for certus-server whose component graph is +//! declared in a YAML profile manifest and assembled at compile time +//! by build.rs code generation. + +mod config; +mod hooks; +mod service; + +// Include the generated composition code (build_stack + ComponentStack). +include!(concat!(env!("OUT_DIR"), "/composition.rs")); + +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; + +use clap::Parser; +use tonic::transport::{Identity, Server, ServerTlsConfig}; + +use config::StackConfig; +use service::DispatcherService; + +/// Certus gRPC server (YAML-composed) exposing the IDispatcher interface. +#[derive(Parser)] +#[command( + name = "certus-server-yaml", + about = "Certus dispatcher gRPC server — compile-time composed via YAML profiles" +)] +struct Cli { + /// PCI address(es) of NVMe device(s) — may be specified multiple times. + /// Mutually exclusive with --drive-count. + #[arg(long = "device-pci")] + device_pci: Vec, + + /// Use the first N discovered NVMe drives (alternative to --device-pci). + #[arg(long = "drive-count", conflicts_with = "device_pci")] + drive_count: Option, + + /// gRPC listen address + #[arg(long = "listen", default_value = "0.0.0.0:50051")] + listen: String, + + /// Memory-tier pool size (e.g. 256M, 1G, 512K). Defaults to 2G. + #[arg(long = "memory-tier-size", value_parser = parse_size)] + memory_tier_size: Option, + + /// Format extent managers on startup (destroys existing data). + #[arg(long = "format")] + format: bool, + + /// Path to TLS certificate file (enables TLS when provided with --tls-key) + #[arg(long = "tls-cert")] + tls_cert: Option, + + /// Path to TLS private key file (enables TLS when provided with --tls-cert) + #[arg(long = "tls-key")] + tls_key: Option, + + /// Pin each NVMe poller thread to a dedicated CPU core. + #[arg(long = "poller-base-cpu")] + poller_base_cpu: Option, +} + +fn parse_size(s: &str) -> Result { + let s = s.trim(); + if s.is_empty() { + return Err("empty size string".into()); + } + let (num_str, multiplier) = match s.as_bytes().last() { + Some(b'K' | b'k') => (&s[..s.len() - 1], 1024usize), + Some(b'M' | b'm') => (&s[..s.len() - 1], 1024 * 1024), + Some(b'G' | b'g') => (&s[..s.len() - 1], 1024 * 1024 * 1024), + _ => (s, 1usize), + }; + let num: usize = num_str + .parse() + .map_err(|_| format!("invalid size number: '{num_str}'"))?; + num.checked_mul(multiplier) + .ok_or_else(|| format!("size overflow: '{s}'")) +} + +fn validate_pci_address(addr: &str) -> Result<(), String> { + let parts: Vec<&str> = addr.split(':').collect(); + if parts.len() != 3 { + return Err(format!( + "invalid PCI address format '{addr}': expected DDDD:BB:DD.F" + )); + } + u32::from_str_radix(parts[0], 16) + .map_err(|_| format!("invalid PCI domain in '{addr}'"))?; + u8::from_str_radix(parts[1], 16) + .map_err(|_| format!("invalid PCI bus in '{addr}'"))?; + let dev_func: Vec<&str> = parts[2].split('.').collect(); + if dev_func.len() != 2 { + return Err(format!( + "invalid PCI dev.func in '{addr}': expected DD.F" + )); + } + u8::from_str_radix(dev_func[0], 16) + .map_err(|_| format!("invalid PCI device in '{addr}'"))?; + u8::from_str_radix(dev_func[1], 16) + .map_err(|_| format!("invalid PCI function in '{addr}'"))?; + Ok(()) +} + +#[tokio::main] +async fn main() -> Result<(), Box> { + let cli = Cli::parse(); + + // Validate PCI addresses + for addr in &cli.device_pci { + validate_pci_address(addr).map_err(Box::::from)?; + } + if cli.device_pci.is_empty() && cli.drive_count.is_none() { + return Err("either --device-pci or --drive-count must be specified".into()); + } + + const DEFAULT_MEMORY_TIER_SIZE: usize = 2 * 1024 * 1024 * 1024; // 2 GiB + + let stack_config = StackConfig { + device_pci: cli.device_pci.clone(), + drive_count: cli.drive_count, + memory_tier_size: cli.memory_tier_size.unwrap_or(DEFAULT_MEMORY_TIER_SIZE), + format: cli.format, + poller_base_cpu: cli.poller_base_cpu, + resolved_pci_addrs: std::cell::RefCell::new(Vec::new()), + }; + + // Build the component stack from the YAML-generated composition + let stack = build_stack(&stack_config)?; + + let logger = &stack.logger; + logger.info(&format!( + "certus-server-yaml: composed from profile, devices={:?}", + cli.device_pci + )); + logger.info(&format!( + "certus-server-yaml: memory-tier-size={} MiB", + stack_config.memory_tier_size / (1024 * 1024) + )); + + let svc = DispatcherService::new(Arc::clone(&stack.dispatcher)); + let addr = cli.listen.parse()?; + + // Build server with optional TLS + let mut server = Server::builder(); + if let (Some(cert_path), Some(key_path)) = (&cli.tls_cert, &cli.tls_key) { + let cert = tokio::fs::read(cert_path).await?; + let key = tokio::fs::read(key_path).await?; + let identity = Identity::from_pem(cert, key); + server = server.tls_config(ServerTlsConfig::new().identity(identity))?; + logger.info("certus-server-yaml: TLS enabled"); + } + + logger.info(&format!("certus-server-yaml: listening on {addr}")); + + let shutdown_flag = Arc::new(AtomicBool::new(false)); + let flag_clone = Arc::clone(&shutdown_flag); + + server + .add_service(service::dispatcher_server(svc)) + .serve_with_shutdown(addr, async move { + use tokio::signal::unix::{signal, SignalKind}; + let mut sigterm = signal(SignalKind::terminate()).expect("failed to register SIGTERM"); + tokio::select! { + _ = tokio::signal::ctrl_c() => {}, + _ = sigterm.recv() => {}, + } + flag_clone.store(true, Ordering::Release); + }) + .await?; + + let _ = stack.dispatcher.shutdown(); + stack.logger.info("certus-server-yaml: shutdown complete"); + + Ok(()) +} diff --git a/apps/certus-server-yaml/src/service.rs b/apps/certus-server-yaml/src/service.rs new file mode 100644 index 00000000..4f0ea71a --- /dev/null +++ b/apps/certus-server-yaml/src/service.rs @@ -0,0 +1,475 @@ +//! gRPC service implementation for the Certus Dispatcher. + +use std::collections::{HashMap, HashSet}; +use std::sync::{Arc, Mutex}; + +use tonic::{Request, Response, Status}; + +use gpu_services::cuda_ffi; +use interfaces::{DispatcherError, IDispatcher, IpcHandle}; + +pub mod proto { + tonic::include_proto!("certus.dispatcher.v1"); +} + +use proto::dispatcher_server::{Dispatcher, DispatcherServer}; +use proto::{ + BatchCheckRequest, BatchCheckResponse, BatchLookupRequest, BatchLookupResponse, + BatchPopulateRequest, BatchPopulateResponse, BatchRemoveRequest, BatchRemoveResponse, + BatchTouchRequest, BatchTouchResponse, CheckResult, ClearMemoryTierRequest, + ClearMemoryTierResponse, EntryResult, ErrorCode, +}; + +pub fn dispatcher_server(svc: DispatcherService) -> DispatcherServer { + DispatcherServer::new(svc) +} + +struct IpcCacheEntry { + dev_ptr: *mut std::ffi::c_void, + #[allow(dead_code)] + gpu_device_id: i32, + refcount: usize, +} + +// SAFETY: dev_ptr is a CUDA device pointer only used from blocking threads. +unsafe impl Send for IpcCacheEntry {} +unsafe impl Sync for IpcCacheEntry {} + +type IpcCache = Arc>>; + +pub struct DispatcherService { + dispatcher: Arc, + ipc_cache: IpcCache, +} + +impl DispatcherService { + pub fn new(dispatcher: Arc) -> Self { + Self { + dispatcher, + ipc_cache: Arc::new(Mutex::new(HashMap::new())), + } + } +} + +fn ipc_cache_open( + cache: &IpcCache, + handle_bytes: &[u8; 64], + gpu_device_id: i32, +) -> Result<*mut std::ffi::c_void, String> { + let mut map = cache.lock().unwrap_or_else(|e| e.into_inner()); + if let Some(entry) = map.get_mut(handle_bytes) { + entry.refcount += 1; + return Ok(entry.dev_ptr); + } + + if gpu_device_id >= 0 { + let err = unsafe { cuda_ffi::cudaSetDevice(gpu_device_id) }; + if err != cuda_ffi::CUDA_SUCCESS { + return Err(format!( + "cudaSetDevice({}) failed: {}", + gpu_device_id, + cuda_ffi::cuda_error_string(err) + )); + } + } + + let cuda_handle = cuda_ffi::cudaIpcMemHandle_t { + reserved: *handle_bytes, + }; + let mut dev_ptr: *mut std::ffi::c_void = std::ptr::null_mut(); + let err = unsafe { + cuda_ffi::cudaIpcOpenMemHandle( + &mut dev_ptr, + cuda_handle, + cuda_ffi::CUDA_IPC_MEM_LAZY_ENABLE_PEER_ACCESS, + ) + }; + if err != cuda_ffi::CUDA_SUCCESS { + return Err(format!( + "cudaIpcOpenMemHandle failed: {}", + cuda_ffi::cuda_error_string(err) + )); + } + if dev_ptr.is_null() { + return Err("cudaIpcOpenMemHandle returned null".to_string()); + } + + map.insert(*handle_bytes, IpcCacheEntry { + dev_ptr, + gpu_device_id, + refcount: 1, + }); + Ok(dev_ptr) +} + +fn ipc_cache_close(cache: &IpcCache, handle_bytes: &[u8; 64]) { + let mut map = cache.lock().unwrap_or_else(|e| e.into_inner()); + if let Some(entry) = map.get_mut(handle_bytes) { + entry.refcount -= 1; + if entry.refcount == 0 { + unsafe { + cuda_ffi::cudaIpcCloseMemHandle(entry.dev_ptr); + } + map.remove(handle_bytes); + } + } +} + +#[allow(clippy::result_large_err)] +fn check_duplicate_keys(keys: &[u64]) -> Result<(), Status> { + let mut seen = HashSet::with_capacity(keys.len()); + for &key in keys { + if !seen.insert(key) { + return Err(Status::invalid_argument(format!( + "duplicate key in batch: {key}" + ))); + } + } + Ok(()) +} + +fn map_dispatcher_error(err: &DispatcherError) -> (ErrorCode, String) { + match err { + DispatcherError::NotInitialized(msg) => (ErrorCode::NotInitialized, msg.clone()), + DispatcherError::KeyNotFound(k) => { + (ErrorCode::KeyNotFound, format!("key not found: {k}")) + } + DispatcherError::AlreadyExists(k) => { + (ErrorCode::AlreadyExists, format!("key already exists: {k}")) + } + DispatcherError::AllocationFailed(msg) => (ErrorCode::AllocationFailed, msg.clone()), + DispatcherError::IoError(msg) => (ErrorCode::IoError, msg.clone()), + DispatcherError::Timeout(msg) => (ErrorCode::Timeout, msg.clone()), + DispatcherError::InvalidParameter(msg) => (ErrorCode::InvalidParameter, msg.clone()), + } +} + +fn success_result(key: u64) -> EntryResult { + EntryResult { + key, + success: true, + error_code: ErrorCode::Unspecified.into(), + error_message: String::new(), + } +} + +fn error_result(key: u64, err: &DispatcherError) -> EntryResult { + let (code, msg) = map_dispatcher_error(err); + EntryResult { + key, + success: false, + error_code: code.into(), + error_message: msg, + } +} + +#[tonic::async_trait] +impl Dispatcher for DispatcherService { + async fn populate( + &self, + request: Request, + ) -> Result, Status> { + let req = request.into_inner(); + let keys: Vec = req.entries.iter().map(|e| e.key).collect(); + check_duplicate_keys(&keys)?; + + let dispatcher = Arc::clone(&self.dispatcher); + let cache = Arc::clone(&self.ipc_cache); + let results = tokio::task::spawn_blocking(move || { + let mut opened_keys: Vec<[u8; 64]> = Vec::new(); + let mut pre_errors: Vec> = vec![None; req.entries.len()]; + + let mut local_ptrs: HashMap<[u8; 64], *mut std::ffi::c_void> = HashMap::new(); + for (i, entry) in req.entries.iter().enumerate() { + let handle = match entry.ipc_handle.as_ref() { + Some(h) => h, + None => { + pre_errors[i] = Some(error_result( + entry.key, + &DispatcherError::InvalidParameter("missing ipc_handle".into()), + )); + continue; + } + }; + let key: [u8; 64] = match handle.cuda_ipc_handle.as_slice().try_into() { + Ok(k) => k, + Err(_) => { + pre_errors[i] = Some(error_result( + entry.key, + &DispatcherError::InvalidParameter(format!( + "cuda_ipc_handle must be 64 bytes, got {}", + handle.cuda_ipc_handle.len() + )), + )); + continue; + } + }; + if let std::collections::hash_map::Entry::Vacant(slot) = local_ptrs.entry(key) { + match ipc_cache_open(&cache, &key, handle.gpu_device_id) { + Ok(ptr) => { + slot.insert(ptr); + opened_keys.push(key); + } + Err(e) => { + pre_errors[i] = Some(error_result( + entry.key, + &DispatcherError::IoError(format!("IPC open failed: {e}")), + )); + } + } + } + } + + let results: Vec = req + .entries + .iter() + .enumerate() + .map(|(i, entry)| { + if let Some(err) = pre_errors[i].take() { + return err; + } + let handle = entry.ipc_handle.as_ref().unwrap(); + let key: [u8; 64] = handle.cuda_ipc_handle.as_slice().try_into().unwrap(); + let dev_ptr = match local_ptrs.get(&key) { + Some(&ptr) => ptr, + None => { + return error_result( + entry.key, + &DispatcherError::IoError("IPC handle not cached".into()), + ) + } + }; + let ipc = IpcHandle { + address: dev_ptr as *mut u8, + size: handle.size, + }; + match dispatcher.populate(entry.key, ipc) { + Ok(()) => success_result(entry.key), + Err(e) => error_result(entry.key, &e), + } + }) + .collect(); + + for key in &opened_keys { + ipc_cache_close(&cache, key); + } + results + }) + .await + .map_err(|e| Status::internal(format!("task join error: {e}")))?; + + Ok(Response::new(BatchPopulateResponse { results })) + } + + async fn lookup( + &self, + request: Request, + ) -> Result, Status> { + let req = request.into_inner(); + let keys: Vec = req.entries.iter().map(|e| e.key).collect(); + check_duplicate_keys(&keys)?; + + let dispatcher = Arc::clone(&self.dispatcher); + let cache = Arc::clone(&self.ipc_cache); + let results = tokio::task::spawn_blocking(move || { + let mut opened_keys: Vec<[u8; 64]> = Vec::new(); + let mut batch_entries: Vec<(u64, IpcHandle)> = Vec::with_capacity(req.entries.len()); + let mut pre_errors: Vec> = vec![None; req.entries.len()]; + let mut local_ptrs: HashMap<[u8; 64], *mut std::ffi::c_void> = HashMap::new(); + + for (i, entry) in req.entries.iter().enumerate() { + let handle = match entry.ipc_handle.as_ref() { + Some(h) => h, + None => { + pre_errors[i] = Some(error_result( + entry.key, + &DispatcherError::InvalidParameter("missing ipc_handle".into()), + )); + batch_entries.push(( + entry.key, + IpcHandle { + address: std::ptr::null_mut(), + size: 0, + }, + )); + continue; + } + }; + let handle_key: [u8; 64] = match handle.cuda_ipc_handle.as_slice().try_into() { + Ok(k) => k, + Err(_) => { + pre_errors[i] = Some(error_result( + entry.key, + &DispatcherError::InvalidParameter(format!( + "cuda_ipc_handle must be 64 bytes, got {}", + handle.cuda_ipc_handle.len() + )), + )); + batch_entries.push(( + entry.key, + IpcHandle { + address: std::ptr::null_mut(), + size: 0, + }, + )); + continue; + } + }; + let dev_ptr = match local_ptrs.get(&handle_key) { + Some(&ptr) => ptr, + None => match ipc_cache_open(&cache, &handle_key, handle.gpu_device_id) { + Ok(ptr) => { + local_ptrs.insert(handle_key, ptr); + opened_keys.push(handle_key); + ptr + } + Err(e) => { + pre_errors[i] = Some(error_result( + entry.key, + &DispatcherError::IoError(format!("IPC open failed: {e}")), + )); + batch_entries.push(( + entry.key, + IpcHandle { + address: std::ptr::null_mut(), + size: 0, + }, + )); + continue; + } + }, + }; + batch_entries.push(( + entry.key, + IpcHandle { + address: dev_ptr as *mut u8, + size: handle.size, + }, + )); + } + + let valid_indices: Vec = (0..batch_entries.len()) + .filter(|&i| pre_errors[i].is_none()) + .collect(); + let valid_batch: Vec<(u64, IpcHandle)> = valid_indices + .iter() + .map(|&i| { + let (key, ref ipc) = batch_entries[i]; + (key, IpcHandle { + address: ipc.address, + size: ipc.size, + }) + }) + .collect(); + + let batch_results = dispatcher.batch_lookup(&valid_batch); + + let mut results: Vec = Vec::with_capacity(req.entries.len()); + let mut batch_iter = batch_results.into_iter(); + for (i, entry) in req.entries.iter().enumerate() { + if let Some(err_result) = pre_errors[i].take() { + results.push(err_result); + } else { + match batch_iter.next().unwrap() { + Ok(()) => results.push(success_result(entry.key)), + Err(e) => results.push(error_result(entry.key, &e)), + } + } + } + + for key in &opened_keys { + ipc_cache_close(&cache, key); + } + + results + }) + .await + .map_err(|e| Status::internal(format!("task join error: {e}")))?; + + Ok(Response::new(BatchLookupResponse { results })) + } + + async fn check( + &self, + request: Request, + ) -> Result, Status> { + let req = request.into_inner(); + check_duplicate_keys(&req.keys)?; + + let dispatcher = Arc::clone(&self.dispatcher); + let results = tokio::task::spawn_blocking(move || { + req.keys + .iter() + .map(|&key| { + let exists: bool = dispatcher.check(key).unwrap_or_default(); + CheckResult { key, exists } + }) + .collect::>() + }) + .await + .map_err(|e| Status::internal(format!("task join error: {e}")))?; + + Ok(Response::new(BatchCheckResponse { results })) + } + + async fn remove( + &self, + request: Request, + ) -> Result, Status> { + let req = request.into_inner(); + check_duplicate_keys(&req.keys)?; + + let dispatcher = Arc::clone(&self.dispatcher); + let results = tokio::task::spawn_blocking(move || { + req.keys + .iter() + .map(|&key| match dispatcher.remove(key) { + Ok(()) => success_result(key), + Err(e) => error_result(key, &e), + }) + .collect::>() + }) + .await + .map_err(|e| Status::internal(format!("task join error: {e}")))?; + + Ok(Response::new(BatchRemoveResponse { results })) + } + + async fn touch( + &self, + request: Request, + ) -> Result, Status> { + let req = request.into_inner(); + check_duplicate_keys(&req.keys)?; + + let dispatcher = Arc::clone(&self.dispatcher); + let results = tokio::task::spawn_blocking(move || { + req.keys + .iter() + .map(|&key| match dispatcher.touch(key) { + Ok(()) => success_result(key), + Err(e) => error_result(key, &e), + }) + .collect::>() + }) + .await + .map_err(|e| Status::internal(format!("task join error: {e}")))?; + + Ok(Response::new(BatchTouchResponse { results })) + } + + async fn clear_memory_tier( + &self, + _request: Request, + ) -> Result, Status> { + let dispatcher = Arc::clone(&self.dispatcher); + let entries_cleared = tokio::task::spawn_blocking(move || dispatcher.clear_memory_tier()) + .await + .map_err(|e| Status::internal(format!("task join error: {e}")))? + .map_err(|e| Status::internal(format!("clear_memory_tier failed: {e}")))?; + + Ok(Response::new(ClearMemoryTierResponse { + entries_cleared: entries_cleared as u64, + })) + } +} diff --git a/tools/rdma-test/.claude/skills/speckit-agent-context-update/SKILL.md b/tools/rdma-test/.claude/skills/speckit-agent-context-update/SKILL.md new file mode 100644 index 00000000..667e84bb --- /dev/null +++ b/tools/rdma-test/.claude/skills/speckit-agent-context-update/SKILL.md @@ -0,0 +1,31 @@ +--- +name: speckit-agent-context-update +description: Refresh the managed Spec Kit section in the coding agent context file +compatibility: Requires spec-kit project structure with .specify/ directory +metadata: + author: github-spec-kit + source: agent-context:commands/speckit.agent-context.update.md +--- + +# Update Coding Agent Context + +Refresh the managed Spec Kit section inside the active coding agent's context/instruction file (e.g. `CLAUDE.md`, `.github/copilot-instructions.md`, `AGENTS.md`). + +## Behavior + +The script reads the agent-context extension config at +`.specify/extensions/agent-context/agent-context-config.yml` to discover: + +- `context_file` — the path of the coding agent context file to manage. +- `context_markers.start` / `.end` — the delimiters surrounding the managed section. Defaults to `` and `` when the field is missing. + +It then creates, replaces, or appends the managed block so that the section points at the most recent plan path when one can be discovered (`specs//plan.md`). + +If `context_file` is empty or the file cannot be located, the command reports nothing to do and exits successfully. + +## Execution + +- **Bash**: `.specify/extensions/agent-context/scripts/bash/update-agent-context.sh [plan_path]` +- **PowerShell**: `.specify/extensions/agent-context/scripts/powershell/update-agent-context.ps1 [plan_path]` + +When `plan_path` is omitted, the script auto-detects the most recently modified `specs/*/plan.md`. \ No newline at end of file diff --git a/tools/rdma-test/.claude/skills/speckit-analyze/SKILL.md b/tools/rdma-test/.claude/skills/speckit-analyze/SKILL.md new file mode 100644 index 00000000..58f2348b --- /dev/null +++ b/tools/rdma-test/.claude/skills/speckit-analyze/SKILL.md @@ -0,0 +1,260 @@ +--- +name: "speckit-analyze" +description: "Perform a non-destructive cross-artifact consistency and quality analysis across spec.md, plan.md, and tasks.md after task generation." +argument-hint: "Optional focus areas for analysis" +compatibility: "Requires spec-kit project structure with .specify/ directory" +metadata: + author: "github-spec-kit" + source: "templates/commands/analyze.md" +user-invocable: true +disable-model-invocation: false +--- + + +## User Input + +```text +$ARGUMENTS +``` + +You **MUST** consider the user input before proceeding (if not empty). + +## Pre-Execution Checks + +**Check for extension hooks (before analysis)**: +- Check if `.specify/extensions.yml` exists in the project root. +- If it exists, read it and look for entries under the `hooks.before_analyze` key +- If the YAML cannot be parsed or is invalid, skip hook checking silently and continue normally +- Filter out hooks where `enabled` is explicitly `false`. Treat hooks without an `enabled` field as enabled by default. +- For each remaining hook, do **not** attempt to interpret or evaluate hook `condition` expressions: + - If the hook has no `condition` field, or it is null/empty, treat the hook as executable + - If the hook defines a non-empty `condition`, skip the hook and leave condition evaluation to the HookExecutor implementation +- When constructing slash commands from hook command names, replace dots (`.`) with hyphens (`-`). For example, `speckit.git.commit` → `/speckit-git-commit`. +- For each executable hook, output the following based on its `optional` flag: + - **Optional hook** (`optional: true`): + ``` + ## Extension Hooks + + **Optional Pre-Hook**: {extension} + Command: `/{command}` + Description: {description} + + Prompt: {prompt} + To execute: `/{command}` + ``` + - **Mandatory hook** (`optional: false`): + ``` + ## Extension Hooks + + **Automatic Pre-Hook**: {extension} + Executing: `/{command}` + EXECUTE_COMMAND: {command} + + Wait for the result of the hook command before proceeding to the Goal. + ``` +- If no hooks are registered or `.specify/extensions.yml` does not exist, skip silently + +## Goal + +Identify inconsistencies, duplications, ambiguities, and underspecified items across the three core artifacts (`spec.md`, `plan.md`, `tasks.md`) before implementation. This command MUST run only after `/speckit-tasks` has successfully produced a complete `tasks.md`. + +## Operating Constraints + +**STRICTLY READ-ONLY**: Do **not** modify any files. Output a structured analysis report. Offer an optional remediation plan (user must explicitly approve before any follow-up editing commands would be invoked manually). + +**Constitution Authority**: The project constitution (`.specify/memory/constitution.md`) is **non-negotiable** within this analysis scope. Constitution conflicts are automatically CRITICAL and require adjustment of the spec, plan, or tasks—not dilution, reinterpretation, or silent ignoring of the principle. If a principle itself needs to change, that must occur in a separate, explicit constitution update outside `/speckit-analyze`. + +## Execution Steps + +### 1. Initialize Analysis Context + +Run `.specify/scripts/bash/check-prerequisites.sh --json --require-tasks --include-tasks` once from repo root and parse JSON for FEATURE_DIR and AVAILABLE_DOCS. Derive absolute paths: + +- SPEC = FEATURE_DIR/spec.md +- PLAN = FEATURE_DIR/plan.md +- TASKS = FEATURE_DIR/tasks.md + +Abort with an error message if any required file is missing (instruct the user to run missing prerequisite command). +For single quotes in args like "I'm Groot", use escape syntax: e.g 'I'\''m Groot' (or double-quote if possible: "I'm Groot"). + +### 2. Load Artifacts (Progressive Disclosure) + +Load only the minimal necessary context from each artifact: + +**From spec.md:** + +- Overview/Context +- Functional Requirements +- Success Criteria (measurable outcomes — e.g., performance, security, availability, user success, business impact) +- User Stories +- Edge Cases (if present) + +**From plan.md:** + +- Architecture/stack choices +- Data Model references +- Phases +- Technical constraints + +**From tasks.md:** + +- Task IDs +- Descriptions +- Phase grouping +- Parallel markers [P] +- Referenced file paths + +**From constitution:** + +- Load `.specify/memory/constitution.md` for principle validation + +### 3. Build Semantic Models + +Create internal representations (do not include raw artifacts in output): + +- **Requirements inventory**: For each Functional Requirement (FR-###) and Success Criterion (SC-###), record a stable key. Use the explicit FR-/SC- identifier as the primary key when present, and optionally also derive an imperative-phrase slug for readability (e.g., "User can upload file" → `user-can-upload-file`). Include only Success Criteria items that require buildable work (e.g., load-testing infrastructure, security audit tooling), and exclude post-launch outcome metrics and business KPIs (e.g., "Reduce support tickets by 50%"). +- **User story/action inventory**: Discrete user actions with acceptance criteria +- **Task coverage mapping**: Map each task to one or more requirements or stories (inference by keyword / explicit reference patterns like IDs or key phrases) +- **Constitution rule set**: Extract principle names and MUST/SHOULD normative statements + +### 4. Detection Passes (Token-Efficient Analysis) + +Focus on high-signal findings. Limit to 50 findings total; aggregate remainder in overflow summary. + +#### A. Duplication Detection + +- Identify near-duplicate requirements +- Mark lower-quality phrasing for consolidation + +#### B. Ambiguity Detection + +- Flag vague adjectives (fast, scalable, secure, intuitive, robust) lacking measurable criteria +- Flag unresolved placeholders (TODO, TKTK, ???, ``, etc.) + +#### C. Underspecification + +- Requirements with verbs but missing object or measurable outcome +- User stories missing acceptance criteria alignment +- Tasks referencing files or components not defined in spec/plan + +#### D. Constitution Alignment + +- Any requirement or plan element conflicting with a MUST principle +- Missing mandated sections or quality gates from constitution + +#### E. Coverage Gaps + +- Requirements with zero associated tasks +- Tasks with no mapped requirement/story +- Success Criteria requiring buildable work (performance, security, availability) not reflected in tasks + +#### F. Inconsistency + +- Terminology drift (same concept named differently across files) +- Data entities referenced in plan but absent in spec (or vice versa) +- Task ordering contradictions (e.g., integration tasks before foundational setup tasks without dependency note) +- Conflicting requirements (e.g., one requires Next.js while other specifies Vue) + +### 5. Severity Assignment + +Use this heuristic to prioritize findings: + +- **CRITICAL**: Violates constitution MUST, missing core spec artifact, or requirement with zero coverage that blocks baseline functionality +- **HIGH**: Duplicate or conflicting requirement, ambiguous security/performance attribute, untestable acceptance criterion +- **MEDIUM**: Terminology drift, missing non-functional task coverage, underspecified edge case +- **LOW**: Style/wording improvements, minor redundancy not affecting execution order + +### 6. Produce Compact Analysis Report + +Output a Markdown report (no file writes) with the following structure: + +## Specification Analysis Report + +| ID | Category | Severity | Location(s) | Summary | Recommendation | +|----|----------|----------|-------------|---------|----------------| +| A1 | Duplication | HIGH | spec.md:L120-134 | Two similar requirements ... | Merge phrasing; keep clearer version | + +(Add one row per finding; generate stable IDs prefixed by category initial.) + +**Coverage Summary Table:** + +| Requirement Key | Has Task? | Task IDs | Notes | +|-----------------|-----------|----------|-------| + +**Constitution Alignment Issues:** (if any) + +**Unmapped Tasks:** (if any) + +**Metrics:** + +- Total Requirements +- Total Tasks +- Coverage % (requirements with >=1 task) +- Ambiguity Count +- Duplication Count +- Critical Issues Count + +### 7. Provide Next Actions + +At end of report, output a concise Next Actions block: + +- If CRITICAL issues exist: Recommend resolving before `/speckit-implement` +- If only LOW/MEDIUM: User may proceed, but provide improvement suggestions +- Provide explicit command suggestions: e.g., "Run /speckit-specify with refinement", "Run /speckit-plan to adjust architecture", "Manually edit tasks.md to add coverage for 'performance-metrics'" + +### 8. Offer Remediation + +Ask the user: "Would you like me to suggest concrete remediation edits for the top N issues?" (Do NOT apply them automatically.) + +### 9. Check for extension hooks + +After reporting, check if `.specify/extensions.yml` exists in the project root. +- If it exists, read it and look for entries under the `hooks.after_analyze` key +- If the YAML cannot be parsed or is invalid, skip hook checking silently and continue normally +- Filter out hooks where `enabled` is explicitly `false`. Treat hooks without an `enabled` field as enabled by default. +- For each remaining hook, do **not** attempt to interpret or evaluate hook `condition` expressions: + - If the hook has no `condition` field, or it is null/empty, treat the hook as executable + - If the hook defines a non-empty `condition`, skip the hook and leave condition evaluation to the HookExecutor implementation +- When constructing slash commands from hook command names, replace dots (`.`) with hyphens (`-`). For example, `speckit.git.commit` → `/speckit-git-commit`. +- For each executable hook, output the following based on its `optional` flag: + - **Optional hook** (`optional: true`): + ``` + ## Extension Hooks + + **Optional Hook**: {extension} + Command: `/{command}` + Description: {description} + + Prompt: {prompt} + To execute: `/{command}` + ``` + - **Mandatory hook** (`optional: false`): + ``` + ## Extension Hooks + + **Automatic Hook**: {extension} + Executing: `/{command}` + EXECUTE_COMMAND: {command} + ``` +- If no hooks are registered or `.specify/extensions.yml` does not exist, skip silently + +## Operating Principles + +### Context Efficiency + +- **Minimal high-signal tokens**: Focus on actionable findings, not exhaustive documentation +- **Progressive disclosure**: Load artifacts incrementally; don't dump all content into analysis +- **Token-efficient output**: Limit findings table to 50 rows; summarize overflow +- **Deterministic results**: Rerunning without changes should produce consistent IDs and counts + +### Analysis Guidelines + +- **NEVER modify files** (this is read-only analysis) +- **NEVER hallucinate missing sections** (if absent, report them accurately) +- **Prioritize constitution violations** (these are always CRITICAL) +- **Use examples over exhaustive rules** (cite specific instances, not generic patterns) +- **Report zero issues gracefully** (emit success report with coverage statistics) + +## Context + +$ARGUMENTS diff --git a/tools/rdma-test/.claude/skills/speckit-checklist/SKILL.md b/tools/rdma-test/.claude/skills/speckit-checklist/SKILL.md new file mode 100644 index 00000000..eb6d580f --- /dev/null +++ b/tools/rdma-test/.claude/skills/speckit-checklist/SKILL.md @@ -0,0 +1,374 @@ +--- +name: "speckit-checklist" +description: "Generate a custom checklist for the current feature based on user requirements." +argument-hint: "Domain or focus area for the checklist" +compatibility: "Requires spec-kit project structure with .specify/ directory" +metadata: + author: "github-spec-kit" + source: "templates/commands/checklist.md" +user-invocable: true +disable-model-invocation: false +--- + + +## Checklist Purpose: "Unit Tests for English" + +**CRITICAL CONCEPT**: Checklists are **UNIT TESTS FOR REQUIREMENTS WRITING** - they validate the quality, clarity, and completeness of requirements in a given domain. + +**NOT for verification/testing**: + +- ❌ NOT "Verify the button clicks correctly" +- ❌ NOT "Test error handling works" +- ❌ NOT "Confirm the API returns 200" +- ❌ NOT checking if code/implementation matches the spec + +**FOR requirements quality validation**: + +- ✅ "Are visual hierarchy requirements defined for all card types?" (completeness) +- ✅ "Is 'prominent display' quantified with specific sizing/positioning?" (clarity) +- ✅ "Are hover state requirements consistent across all interactive elements?" (consistency) +- ✅ "Are accessibility requirements defined for keyboard navigation?" (coverage) +- ✅ "Does the spec define what happens when logo image fails to load?" (edge cases) + +**Metaphor**: If your spec is code written in English, the checklist is its unit test suite. You're testing whether the requirements are well-written, complete, unambiguous, and ready for implementation - NOT whether the implementation works. + +## User Input + +```text +$ARGUMENTS +``` + +You **MUST** consider the user input before proceeding (if not empty). + +## Pre-Execution Checks + +**Check for extension hooks (before checklist generation)**: +- Check if `.specify/extensions.yml` exists in the project root. +- If it exists, read it and look for entries under the `hooks.before_checklist` key +- If the YAML cannot be parsed or is invalid, skip hook checking silently and continue normally +- Filter out hooks where `enabled` is explicitly `false`. Treat hooks without an `enabled` field as enabled by default. +- For each remaining hook, do **not** attempt to interpret or evaluate hook `condition` expressions: + - If the hook has no `condition` field, or it is null/empty, treat the hook as executable + - If the hook defines a non-empty `condition`, skip the hook and leave condition evaluation to the HookExecutor implementation +- When constructing slash commands from hook command names, replace dots (`.`) with hyphens (`-`). For example, `speckit.git.commit` → `/speckit-git-commit`. +- For each executable hook, output the following based on its `optional` flag: + - **Optional hook** (`optional: true`): + ``` + ## Extension Hooks + + **Optional Pre-Hook**: {extension} + Command: `/{command}` + Description: {description} + + Prompt: {prompt} + To execute: `/{command}` + ``` + - **Mandatory hook** (`optional: false`): + ``` + ## Extension Hooks + + **Automatic Pre-Hook**: {extension} + Executing: `/{command}` + EXECUTE_COMMAND: {command} + + Wait for the result of the hook command before proceeding to the Execution Steps. + ``` +- If no hooks are registered or `.specify/extensions.yml` does not exist, skip silently + +## Execution Steps + +1. **Setup**: Run `.specify/scripts/bash/check-prerequisites.sh --json` from repo root and parse JSON for FEATURE_DIR and AVAILABLE_DOCS list. + - All file paths must be absolute. + - For single quotes in args like "I'm Groot", use escape syntax: e.g 'I'\''m Groot' (or double-quote if possible: "I'm Groot"). + +2. **IF EXISTS**: Load `.specify/memory/constitution.md` for project principles and governance constraints. + +3. **Clarify intent (dynamic)**: Derive up to THREE initial contextual clarifying questions (no pre-baked catalog). They MUST: + - Be generated from the user's phrasing + extracted signals from spec/plan/tasks + - Only ask about information that materially changes checklist content + - Be skipped individually if already unambiguous in `$ARGUMENTS` + - Prefer precision over breadth + + Generation algorithm: + 1. Extract signals: feature domain keywords (e.g., auth, latency, UX, API), risk indicators ("critical", "must", "compliance"), stakeholder hints ("QA", "review", "security team"), and explicit deliverables ("a11y", "rollback", "contracts"). + 2. Cluster signals into candidate focus areas (max 4) ranked by relevance. + 3. Identify probable audience & timing (author, reviewer, QA, release) if not explicit. + 4. Detect missing dimensions: scope breadth, depth/rigor, risk emphasis, exclusion boundaries, measurable acceptance criteria. + 5. Formulate questions chosen from these archetypes: + - Scope refinement (e.g., "Should this include integration touchpoints with X and Y or stay limited to local module correctness?") + - Risk prioritization (e.g., "Which of these potential risk areas should receive mandatory gating checks?") + - Depth calibration (e.g., "Is this a lightweight pre-commit sanity list or a formal release gate?") + - Audience framing (e.g., "Will this be used by the author only or peers during PR review?") + - Boundary exclusion (e.g., "Should we explicitly exclude performance tuning items this round?") + - Scenario class gap (e.g., "No recovery flows detected—are rollback / partial failure paths in scope?") + + Question formatting rules: + - If presenting options, generate a compact table with columns: Option | Candidate | Why It Matters + - Limit to A–E options maximum; omit table if a free-form answer is clearer + - Never ask the user to restate what they already said + - Avoid speculative categories (no hallucination). If uncertain, ask explicitly: "Confirm whether X belongs in scope." + + Defaults when interaction impossible: + - Depth: Standard + - Audience: Reviewer (PR) if code-related; Author otherwise + - Focus: Top 2 relevance clusters + + Output the questions (label Q1/Q2/Q3). After answers: if ≥2 scenario classes (Alternate / Exception / Recovery / Non-Functional domain) remain unclear, you MAY ask up to TWO more targeted follow‑ups (Q4/Q5) with a one-line justification each (e.g., "Unresolved recovery path risk"). Do not exceed five total questions. Skip escalation if user explicitly declines more. + +4. **Understand user request**: Combine `$ARGUMENTS` + clarifying answers: + - Derive checklist theme (e.g., security, review, deploy, ux) + - Consolidate explicit must-have items mentioned by user + - Map focus selections to category scaffolding + - Infer any missing context from spec/plan/tasks (do NOT hallucinate) + +5. **Load feature context**: Read from FEATURE_DIR: + - spec.md: Feature requirements and scope + - plan.md (if exists): Technical details, dependencies + - tasks.md (if exists): Implementation tasks + + **Context Loading Strategy**: + - Load only necessary portions relevant to active focus areas (avoid full-file dumping) + - Prefer summarizing long sections into concise scenario/requirement bullets + - Use progressive disclosure: add follow-on retrieval only if gaps detected + - If source docs are large, generate interim summary items instead of embedding raw text + +6. **Generate checklist** - Create "Unit Tests for Requirements": + - Create `FEATURE_DIR/checklists/` directory if it doesn't exist + - Generate unique checklist filename: + - Use short, descriptive name based on domain (e.g., `ux.md`, `api.md`, `security.md`) + - Format: `[domain].md` + - File handling behavior: + - If file does NOT exist: Create new file and number items starting from CHK001 + - If file exists: Append new items to existing file, continuing from the last CHK ID (e.g., if last item is CHK015, start new items at CHK016) + - Never delete or replace existing checklist content - always preserve and append + + **CORE PRINCIPLE - Test the Requirements, Not the Implementation**: + Every checklist item MUST evaluate the REQUIREMENTS THEMSELVES for: + - **Completeness**: Are all necessary requirements present? + - **Clarity**: Are requirements unambiguous and specific? + - **Consistency**: Do requirements align with each other? + - **Measurability**: Can requirements be objectively verified? + - **Coverage**: Are all scenarios/edge cases addressed? + + **Category Structure** - Group items by requirement quality dimensions: + - **Requirement Completeness** (Are all necessary requirements documented?) + - **Requirement Clarity** (Are requirements specific and unambiguous?) + - **Requirement Consistency** (Do requirements align without conflicts?) + - **Acceptance Criteria Quality** (Are success criteria measurable?) + - **Scenario Coverage** (Are all flows/cases addressed?) + - **Edge Case Coverage** (Are boundary conditions defined?) + - **Non-Functional Requirements** (Performance, Security, Accessibility, etc. - are they specified?) + - **Dependencies & Assumptions** (Are they documented and validated?) + - **Ambiguities & Conflicts** (What needs clarification?) + + **HOW TO WRITE CHECKLIST ITEMS - "Unit Tests for English"**: + + ❌ **WRONG** (Testing implementation): + - "Verify landing page displays 3 episode cards" + - "Test hover states work on desktop" + - "Confirm logo click navigates home" + + ✅ **CORRECT** (Testing requirements quality): + - "Are the exact number and layout of featured episodes specified?" [Completeness] + - "Is 'prominent display' quantified with specific sizing/positioning?" [Clarity] + - "Are hover state requirements consistent across all interactive elements?" [Consistency] + - "Are keyboard navigation requirements defined for all interactive UI?" [Coverage] + - "Is the fallback behavior specified when logo image fails to load?" [Edge Cases] + - "Are loading states defined for asynchronous episode data?" [Completeness] + - "Does the spec define visual hierarchy for competing UI elements?" [Clarity] + + **ITEM STRUCTURE**: + Each item should follow this pattern: + - Question format asking about requirement quality + - Focus on what's WRITTEN (or not written) in the spec/plan + - Include quality dimension in brackets [Completeness/Clarity/Consistency/etc.] + - Reference spec section `[Spec §X.Y]` when checking existing requirements + - Use `[Gap]` marker when checking for missing requirements + + **EXAMPLES BY QUALITY DIMENSION**: + + Completeness: + - "Are error handling requirements defined for all API failure modes? [Gap]" + - "Are accessibility requirements specified for all interactive elements? [Completeness]" + - "Are mobile breakpoint requirements defined for responsive layouts? [Gap]" + + Clarity: + - "Is 'fast loading' quantified with specific timing thresholds? [Clarity, Spec §NFR-2]" + - "Are 'related episodes' selection criteria explicitly defined? [Clarity, Spec §FR-5]" + - "Is 'prominent' defined with measurable visual properties? [Ambiguity, Spec §FR-4]" + + Consistency: + - "Do navigation requirements align across all pages? [Consistency, Spec §FR-10]" + - "Are card component requirements consistent between landing and detail pages? [Consistency]" + + Coverage: + - "Are requirements defined for zero-state scenarios (no episodes)? [Coverage, Edge Case]" + - "Are concurrent user interaction scenarios addressed? [Coverage, Gap]" + - "Are requirements specified for partial data loading failures? [Coverage, Exception Flow]" + + Measurability: + - "Are visual hierarchy requirements measurable/testable? [Acceptance Criteria, Spec §FR-1]" + - "Can 'balanced visual weight' be objectively verified? [Measurability, Spec §FR-2]" + + **Scenario Classification & Coverage** (Requirements Quality Focus): + - Check if requirements exist for: Primary, Alternate, Exception/Error, Recovery, Non-Functional scenarios + - For each scenario class, ask: "Are [scenario type] requirements complete, clear, and consistent?" + - If scenario class missing: "Are [scenario type] requirements intentionally excluded or missing? [Gap]" + - Include resilience/rollback when state mutation occurs: "Are rollback requirements defined for migration failures? [Gap]" + + **Traceability Requirements**: + - MINIMUM: ≥80% of items MUST include at least one traceability reference + - Each item should reference: spec section `[Spec §X.Y]`, or use markers: `[Gap]`, `[Ambiguity]`, `[Conflict]`, `[Assumption]` + - If no ID system exists: "Is a requirement & acceptance criteria ID scheme established? [Traceability]" + + **Surface & Resolve Issues** (Requirements Quality Problems): + Ask questions about the requirements themselves: + - Ambiguities: "Is the term 'fast' quantified with specific metrics? [Ambiguity, Spec §NFR-1]" + - Conflicts: "Do navigation requirements conflict between §FR-10 and §FR-10a? [Conflict]" + - Assumptions: "Is the assumption of 'always available podcast API' validated? [Assumption]" + - Dependencies: "Are external podcast API requirements documented? [Dependency, Gap]" + - Missing definitions: "Is 'visual hierarchy' defined with measurable criteria? [Gap]" + + **Content Consolidation**: + - Soft cap: If raw candidate items > 40, prioritize by risk/impact + - Merge near-duplicates checking the same requirement aspect + - If >5 low-impact edge cases, create one item: "Are edge cases X, Y, Z addressed in requirements? [Coverage]" + + **🚫 ABSOLUTELY PROHIBITED** - These make it an implementation test, not a requirements test: + - ❌ Any item starting with "Verify", "Test", "Confirm", "Check" + implementation behavior + - ❌ References to code execution, user actions, system behavior + - ❌ "Displays correctly", "works properly", "functions as expected" + - ❌ "Click", "navigate", "render", "load", "execute" + - ❌ Test cases, test plans, QA procedures + - ❌ Implementation details (frameworks, APIs, algorithms) + + **✅ REQUIRED PATTERNS** - These test requirements quality: + - ✅ "Are [requirement type] defined/specified/documented for [scenario]?" + - ✅ "Is [vague term] quantified/clarified with specific criteria?" + - ✅ "Are requirements consistent between [section A] and [section B]?" + - ✅ "Can [requirement] be objectively measured/verified?" + - ✅ "Are [edge cases/scenarios] addressed in requirements?" + - ✅ "Does the spec define [missing aspect]?" + +7. **Structure Reference**: Generate the checklist following the canonical template in `.specify/templates/checklist-template.md` for title, meta section, category headings, and ID formatting. If template is unavailable, use: H1 title, purpose/created meta lines, `##` category sections containing `- [ ] CHK### ` lines with globally incrementing IDs starting at CHK001. + +8. **Report**: Output full path to checklist file, item count, and summarize whether the run created a new file or appended to an existing one. Summarize: + - Focus areas selected + - Depth level + - Actor/timing + - Any explicit user-specified must-have items incorporated + +**Important**: Each `/speckit-checklist` command invocation uses a short, descriptive checklist filename and either creates a new file or appends to an existing one. This allows: + +- Multiple checklists of different types (e.g., `ux.md`, `test.md`, `security.md`) +- Simple, memorable filenames that indicate checklist purpose +- Easy identification and navigation in the `checklists/` folder + +To avoid clutter, use descriptive types and clean up obsolete checklists when done. + +## Example Checklist Types & Sample Items + +**UX Requirements Quality:** `ux.md` + +Sample items (testing the requirements, NOT the implementation): + +- "Are visual hierarchy requirements defined with measurable criteria? [Clarity, Spec §FR-1]" +- "Is the number and positioning of UI elements explicitly specified? [Completeness, Spec §FR-1]" +- "Are interaction state requirements (hover, focus, active) consistently defined? [Consistency]" +- "Are accessibility requirements specified for all interactive elements? [Coverage, Gap]" +- "Is fallback behavior defined when images fail to load? [Edge Case, Gap]" +- "Can 'prominent display' be objectively measured? [Measurability, Spec §FR-4]" + +**API Requirements Quality:** `api.md` + +Sample items: + +- "Are error response formats specified for all failure scenarios? [Completeness]" +- "Are rate limiting requirements quantified with specific thresholds? [Clarity]" +- "Are authentication requirements consistent across all endpoints? [Consistency]" +- "Are retry/timeout requirements defined for external dependencies? [Coverage, Gap]" +- "Is versioning strategy documented in requirements? [Gap]" + +**Performance Requirements Quality:** `performance.md` + +Sample items: + +- "Are performance requirements quantified with specific metrics? [Clarity]" +- "Are performance targets defined for all critical user journeys? [Coverage]" +- "Are performance requirements under different load conditions specified? [Completeness]" +- "Can performance requirements be objectively measured? [Measurability]" +- "Are degradation requirements defined for high-load scenarios? [Edge Case, Gap]" + +**Security Requirements Quality:** `security.md` + +Sample items: + +- "Are authentication requirements specified for all protected resources? [Coverage]" +- "Are data protection requirements defined for sensitive information? [Completeness]" +- "Is the threat model documented and requirements aligned to it? [Traceability]" +- "Are security requirements consistent with compliance obligations? [Consistency]" +- "Are security failure/breach response requirements defined? [Gap, Exception Flow]" + +## Anti-Examples: What NOT To Do + +**❌ WRONG - These test implementation, not requirements:** + +```markdown +- [ ] CHK001 - Verify landing page displays 3 episode cards [Spec §FR-001] +- [ ] CHK002 - Test hover states work correctly on desktop [Spec §FR-003] +- [ ] CHK003 - Confirm logo click navigates to home page [Spec §FR-010] +- [ ] CHK004 - Check that related episodes section shows 3-5 items [Spec §FR-005] +``` + +**✅ CORRECT - These test requirements quality:** + +```markdown +- [ ] CHK001 - Are the number and layout of featured episodes explicitly specified? [Completeness, Spec §FR-001] +- [ ] CHK002 - Are hover state requirements consistently defined for all interactive elements? [Consistency, Spec §FR-003] +- [ ] CHK003 - Are navigation requirements clear for all clickable brand elements? [Clarity, Spec §FR-010] +- [ ] CHK004 - Is the selection criteria for related episodes documented? [Gap, Spec §FR-005] +- [ ] CHK005 - Are loading state requirements defined for asynchronous episode data? [Gap] +- [ ] CHK006 - Can "visual hierarchy" requirements be objectively measured? [Measurability, Spec §FR-001] +``` + +**Key Differences:** + +- Wrong: Tests if the system works correctly +- Correct: Tests if the requirements are written correctly +- Wrong: Verification of behavior +- Correct: Validation of requirement quality +- Wrong: "Does it do X?" +- Correct: "Is X clearly specified?" + +## Post-Execution Checks + +**Check for extension hooks (after checklist generation)**: +Check if `.specify/extensions.yml` exists in the project root. +- If it exists, read it and look for entries under the `hooks.after_checklist` key +- If the YAML cannot be parsed or is invalid, skip hook checking silently and continue normally +- Filter out hooks where `enabled` is explicitly `false`. Treat hooks without an `enabled` field as enabled by default. +- For each remaining hook, do **not** attempt to interpret or evaluate hook `condition` expressions: + - If the hook has no `condition` field, or it is null/empty, treat the hook as executable + - If the hook defines a non-empty `condition`, skip the hook and leave condition evaluation to the HookExecutor implementation +- When constructing slash commands from hook command names, replace dots (`.`) with hyphens (`-`). For example, `speckit.git.commit` → `/speckit-git-commit`. +- For each executable hook, output the following based on its `optional` flag: + - **Optional hook** (`optional: true`): + ``` + ## Extension Hooks + + **Optional Hook**: {extension} + Command: `/{command}` + Description: {description} + + Prompt: {prompt} + To execute: `/{command}` + ``` + - **Mandatory hook** (`optional: false`): + ``` + ## Extension Hooks + + **Automatic Hook**: {extension} + Executing: `/{command}` + EXECUTE_COMMAND: {command} + ``` +- If no hooks are registered or `.specify/extensions.yml` does not exist, skip silently diff --git a/tools/rdma-test/.claude/skills/speckit-clarify/SKILL.md b/tools/rdma-test/.claude/skills/speckit-clarify/SKILL.md new file mode 100644 index 00000000..5e71c210 --- /dev/null +++ b/tools/rdma-test/.claude/skills/speckit-clarify/SKILL.md @@ -0,0 +1,286 @@ +--- +name: "speckit-clarify" +description: "Identify underspecified areas in the current feature spec by asking up to 5 highly targeted clarification questions and encoding answers back into the spec." +argument-hint: "Optional areas to clarify in the spec" +compatibility: "Requires spec-kit project structure with .specify/ directory" +metadata: + author: "github-spec-kit" + source: "templates/commands/clarify.md" +user-invocable: true +disable-model-invocation: false +--- + + +## User Input + +```text +$ARGUMENTS +``` + +You **MUST** consider the user input before proceeding (if not empty). + +## Pre-Execution Checks + +**Check for extension hooks (before clarification)**: +- Check if `.specify/extensions.yml` exists in the project root. +- If it exists, read it and look for entries under the `hooks.before_clarify` key +- If the YAML cannot be parsed or is invalid, skip hook checking silently and continue normally +- Filter out hooks where `enabled` is explicitly `false`. Treat hooks without an `enabled` field as enabled by default. +- For each remaining hook, do **not** attempt to interpret or evaluate hook `condition` expressions: + - If the hook has no `condition` field, or it is null/empty, treat the hook as executable + - If the hook defines a non-empty `condition`, skip the hook and leave condition evaluation to the HookExecutor implementation +- When constructing slash commands from hook command names, replace dots (`.`) with hyphens (`-`). For example, `speckit.git.commit` → `/speckit-git-commit`. +- For each executable hook, output the following based on its `optional` flag: + - **Optional hook** (`optional: true`): + ``` + ## Extension Hooks + + **Optional Pre-Hook**: {extension} + Command: `/{command}` + Description: {description} + + Prompt: {prompt} + To execute: `/{command}` + ``` + - **Mandatory hook** (`optional: false`): + ``` + ## Extension Hooks + + **Automatic Pre-Hook**: {extension} + Executing: `/{command}` + EXECUTE_COMMAND: {command} + + Wait for the result of the hook command before proceeding to the Outline. + ``` +- If no hooks are registered or `.specify/extensions.yml` does not exist, skip silently + +## Outline + +Goal: Detect and reduce ambiguity or missing decision points in the active feature specification and record the clarifications directly in the spec file. + +Note: This clarification workflow is expected to run (and be completed) BEFORE invoking `/speckit-plan`. If the user explicitly states they are skipping clarification (e.g., exploratory spike), you may proceed, but must warn that downstream rework risk increases. + +Execution steps: + +1. Run `.specify/scripts/bash/check-prerequisites.sh --json --paths-only` from repo root **once** (combined `--json --paths-only` mode / `-Json -PathsOnly`). Parse minimal JSON payload fields: + - `FEATURE_DIR` + - `FEATURE_SPEC` + - (Optionally capture `IMPL_PLAN`, `TASKS` for future chained flows.) + - If JSON parsing fails, abort and instruct user to re-run `/speckit-specify` or verify feature branch environment. + - For single quotes in args like "I'm Groot", use escape syntax: e.g 'I'\''m Groot' (or double-quote if possible: "I'm Groot"). + +2. **IF EXISTS**: Load `.specify/memory/constitution.md` for project principles and governance constraints. + +3. Load the current spec file. Perform a structured ambiguity & coverage scan using this taxonomy. For each category, mark status: Clear / Partial / Missing. Produce an internal coverage map used for prioritization (do not output raw map unless no questions will be asked). + + Functional Scope & Behavior: + - Core user goals & success criteria + - Explicit out-of-scope declarations + - User roles / personas differentiation + + Domain & Data Model: + - Entities, attributes, relationships + - Identity & uniqueness rules + - Lifecycle/state transitions + - Data volume / scale assumptions + + Interaction & UX Flow: + - Critical user journeys / sequences + - Error/empty/loading states + - Accessibility or localization notes + + Non-Functional Quality Attributes: + - Performance (latency, throughput targets) + - Scalability (horizontal/vertical, limits) + - Reliability & availability (uptime, recovery expectations) + - Observability (logging, metrics, tracing signals) + - Security & privacy (authN/Z, data protection, threat assumptions) + - Compliance / regulatory constraints (if any) + + Integration & External Dependencies: + - External services/APIs and failure modes + - Data import/export formats + - Protocol/versioning assumptions + + Edge Cases & Failure Handling: + - Negative scenarios + - Rate limiting / throttling + - Conflict resolution (e.g., concurrent edits) + + Constraints & Tradeoffs: + - Technical constraints (language, storage, hosting) + - Explicit tradeoffs or rejected alternatives + + Terminology & Consistency: + - Canonical glossary terms + - Avoided synonyms / deprecated terms + + Completion Signals: + - Acceptance criteria testability + - Measurable Definition of Done style indicators + + Misc / Placeholders: + - TODO markers / unresolved decisions + - Ambiguous adjectives ("robust", "intuitive") lacking quantification + + For each category with Partial or Missing status, add a candidate question opportunity unless: + - Clarification would not materially change implementation or validation strategy + - Information is better deferred to planning phase (note internally) + +4. Generate (internally) a prioritized queue of candidate clarification questions (maximum 5). Do NOT output them all at once. Apply these constraints: + - Maximum of 5 total questions across the whole session. + - Each question must be answerable with EITHER: + - A short multiple‑choice selection (2–5 distinct, mutually exclusive options), OR + - A one-word / short‑phrase answer (explicitly constrain: "Answer in <=5 words"). + - Only include questions whose answers materially impact architecture, data modeling, task decomposition, test design, UX behavior, operational readiness, or compliance validation. + - Ensure category coverage balance: attempt to cover the highest impact unresolved categories first; avoid asking two low-impact questions when a single high-impact area (e.g., security posture) is unresolved. + - Exclude questions already answered, trivial stylistic preferences, or plan-level execution details (unless blocking correctness). + - Favor clarifications that reduce downstream rework risk or prevent misaligned acceptance tests. + - If more than 5 categories remain unresolved, select the top 5 by (Impact * Uncertainty) heuristic. + +5. Sequential questioning loop (interactive): + - Present EXACTLY ONE question at a time. + - For multiple‑choice questions: + - **Analyze all options** and determine the **most suitable option** based on: + - Best practices for the project type + - Common patterns in similar implementations + - Risk reduction (security, performance, maintainability) + - Alignment with any explicit project goals or constraints visible in the spec + - Present your **recommended option prominently** at the top with clear reasoning (1-2 sentences explaining why this is the best choice). + - Format as: `**Recommended:** Option [X] - ` + - Then render all options as a Markdown table: + + | Option | Description | + |--------|-------------| + | A |