diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1ac2a91..2aa6692 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,18 +7,18 @@ on: - "docs/**" - "*.md" - "deployment/**" - - ".github/workflows/docker-publish.yml" - "LICENSE" - ".githooks/**" + - ".github/workflows/**" push: branches: [main] paths-ignore: - "docs/**" - "*.md" - "deployment/**" - - ".github/workflows/docker-publish.yml" - "LICENSE" - ".githooks/**" + - ".github/workflows/**" concurrency: group: ci-${{ github.event.pull_request.number || github.sha }} @@ -26,12 +26,12 @@ concurrency: jobs: check: - runs-on: [self-hosted, vault-ci] + runs-on: ubuntu-latest + env: + MISE_ENV: ci steps: - uses: actions/checkout@v4 - uses: jdx/mise-action@v2 - env: - MISE_ENV: ci - run: mise run setup - name: Clean previous fixtures run: rm -rf tests/fixtures/ @@ -39,35 +39,14 @@ jobs: env: FIXTURES_GPG_PASSPHRASE: ${{ secrets.FIXTURES_GPG_PASSPHRASE_ALT }} run: mise run fixtures:decrypt - - name: Format check - run: mise run format:check - - name: Lint - run: mise run lint - - name: Test (unit + integration) - run: mise run test - - name: Clean up fixtures - if: always() - run: rm -rf tests/fixtures/ - - build-image: - if: github.event_name == 'push' - needs: check - runs-on: [self-hosted, vault-ci] - steps: - - uses: actions/checkout@v4 + - name: Check (gofmt + vet + unit tests) + run: mise run check - name: Build run: | - docker build -t rune-vault:ci-${{ github.sha }} vault/ - - name: Smoke test - run: | - docker run -d --name vault-ci-${{ github.run_id }} \ - -e VAULT_TLS_DISABLE=true \ - rune-vault:ci-${{ github.sha }} - timeout 60 bash -c 'until docker exec vault-ci-${{ github.run_id }} \ - curl -sf http://localhost:8081/health 2>/dev/null; do sleep 2; done' - - name: Teardown + VERSION=$(git describe --tags --always 2>/dev/null || echo "dev") + VERSION=$VERSION mise run go:build + - name: E2E tests + run: mise run go:test:e2e + - name: Clean up fixtures if: always() - run: | - docker rm -f vault-ci-${{ github.run_id }} || true - docker rmi rune-vault:ci-${{ github.sha }} || true - docker system prune -f + run: rm -rf tests/fixtures/ diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml deleted file mode 100644 index d4921e8..0000000 --- a/.github/workflows/docker-publish.yml +++ /dev/null @@ -1,56 +0,0 @@ -name: Build and Push Docker Image - -on: - release: - types: [published] - -env: - REGISTRY: ghcr.io - IMAGE_NAME: cryptolabinc/rune-vault - -jobs: - build-and-push: - runs-on: [self-hosted, vault-ci] - - permissions: - contents: read - packages: write - - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Log in to GHCR - uses: docker/login-action@v3 - with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Extract metadata (tags, labels) - id: meta - uses: docker/metadata-action@v5 - with: - images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} - tags: | - type=semver,pattern={{version}} - type=semver,pattern=v{{version}} - type=semver,pattern={{major}}.{{minor}} - type=raw,value=latest,enable={{is_default_branch}} - - - name: Build and push Docker image - uses: docker/build-push-action@v6 - with: - context: vault/ - platforms: linux/amd64,linux/arm64 - push: true - tags: ${{ steps.meta.outputs.tags }} - labels: ${{ steps.meta.outputs.labels }} - cache-from: type=gha - cache-to: type=gha,mode=max diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml new file mode 100644 index 0000000..ad4b1b1 --- /dev/null +++ b/.github/workflows/release.yaml @@ -0,0 +1,150 @@ +name: Release Binaries + +on: + release: + types: [prereleased] + workflow_dispatch: + inputs: + dry_run: + description: 'Dry run — skip GitHub Release upload' + required: false + default: 'true' + type: boolean + +concurrency: + group: release-binaries-${{ github.ref }} + cancel-in-progress: false + +jobs: + build: + name: Build ${{ matrix.os }}/${{ matrix.arch }} + runs-on: ${{ matrix.runner }} + strategy: + fail-fast: false + matrix: + include: + - { runner: ubuntu-latest, os: linux, arch: amd64 } + - { runner: ubuntu-24.04-arm, os: linux, arch: arm64 } + - { runner: macos-14, os: darwin, arch: arm64 } + - { runner: macos-14, os: darwin, arch: amd64 } + + steps: + - uses: actions/checkout@v4 + + - uses: jdx/mise-action@v2 + + - name: Bootstrap (modules + proto stubs) + run: mise run setup + + - name: Install OpenSSL (Linux) + if: matrix.os == 'linux' + run: sudo apt-get install -y libssl-dev + + - name: Install OpenSSL arm64 (macOS) + if: matrix.os == 'darwin' + run: brew install openssl@3 + + - name: Install Intel Homebrew + OpenSSL x86_64 (macOS amd64 cross) + if: matrix.os == 'darwin' && matrix.arch == 'amd64' + run: | + if ! [ -x /usr/local/bin/brew ]; then + NONINTERACTIVE=1 arch -x86_64 /bin/bash -c \ + "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" + fi + arch -x86_64 /usr/local/bin/brew install openssl@3 + + - name: Check (gofmt + go vet + unit tests) + run: mise run check + + - name: Resolve version + id: meta + run: | + if [ "${{ github.event_name }}" = "release" ]; then + VERSION="${{ github.event.release.tag_name }}" + else + VERSION=$(git describe --tags --always 2>/dev/null || echo "dev") + fi + echo "version=${VERSION}" >> "$GITHUB_OUTPUT" + echo "archive=runevault_${VERSION}_${{ matrix.os }}_${{ matrix.arch }}.tar.gz" >> "$GITHUB_OUTPUT" + + - name: Build binary + env: + GOOS: ${{ matrix.os }} + GOARCH: ${{ matrix.arch }} + VERSION: ${{ steps.meta.outputs.version }} + run: | + PKG="github.com/CryptoLabInc/rune-admin/vault/internal/commands" + COMMIT=$(git rev-parse --short HEAD) + DATE=$(date -u +%Y-%m-%dT%H:%M:%SZ) + if [ "${{ matrix.os }}" = "darwin" ] && [ "${{ matrix.arch }}" = "amd64" ]; then + SDKROOT=$(xcrun -sdk macosx --show-sdk-path) + export CC="clang -arch x86_64 -isysroot ${SDKROOT}" + export CGO_CFLAGS="-arch x86_64 -I/usr/local/opt/openssl@3/include" + export CGO_LDFLAGS="-arch x86_64 -L/usr/local/opt/openssl@3/lib" + fi + cd vault && go build \ + -trimpath \ + -ldflags "-s -w -X '${PKG}.buildVersion=${VERSION}' -X '${PKG}.buildCommit=${COMMIT}' -X '${PKG}.buildDate=${DATE}'" \ + -o bin/runevault \ + ./cmd + + - name: Smoke test + run: | + if [ "${{ matrix.os }}" = "darwin" ] && [ "${{ matrix.arch }}" = "amd64" ]; then + arch -x86_64 ./vault/bin/runevault version + else + ./vault/bin/runevault version + fi + + - name: Package + run: | + mkdir -p _dist + cp vault/bin/runevault _dist/ + cp LICENSE _dist/ + tar -czf "${{ steps.meta.outputs.archive }}" -C _dist . + + - uses: actions/upload-artifact@v4 + with: + name: ${{ steps.meta.outputs.archive }} + path: ${{ steps.meta.outputs.archive }} + retention-days: 7 + + publish: + name: Publish + runs-on: ubuntu-latest + needs: build + permissions: + contents: write + + steps: + - uses: actions/checkout@v4 + + - name: Download build artifacts + uses: actions/download-artifact@v4 + with: + path: dist/ + merge-multiple: true + + - name: Resolve version + id: meta + run: | + if [ "${{ github.event_name }}" = "release" ]; then + echo "version=${{ github.event.release.tag_name }}" >> "$GITHUB_OUTPUT" + else + echo "version=$(git describe --tags --always 2>/dev/null || echo dev)" >> "$GITHUB_OUTPUT" + fi + + - name: Generate SHA256SUMS + working-directory: dist/ + run: sha256sum *.tar.gz > SHA256SUMS + + - name: Upload to GitHub Release + if: github.event_name == 'release' + working-directory: dist/ + env: + GH_TOKEN: ${{ github.token }} + run: | + gh release upload "${{ github.event.release.tag_name }}" \ + *.tar.gz \ + SHA256SUMS \ + --repo "${{ github.repository }}" diff --git a/.gitignore b/.gitignore index ab447f1..11762c3 100644 --- a/.gitignore +++ b/.gitignore @@ -96,6 +96,11 @@ Thumbs.db *.dll *.dylib +# Go build artifacts +vault/bin/ +vault/**/*.test +vault/**/*.out + # Database *.db *.sqlite @@ -114,3 +119,6 @@ test-results/ # Test fixtures (plaintext — decrypted from fixtures.tar.gz.gpg) tests/fixtures/ + +# Local dev runtime files (config, socket, keys, pid) +vault/dev/ diff --git a/.mise.ci.toml b/.mise.ci.toml index 38b2877..c539fbb 100644 --- a/.mise.ci.toml +++ b/.mise.ci.toml @@ -1,11 +1,6 @@ -# CI-only tool configuration -# Used by setting MISE_ENV=ci in GitHub Actions workflows +# CI environment overrides — merged on top of .mise.toml when MISE_ENV=ci. +# Tasks are inherited from .mise.toml; only CI-specific settings go here. -[tools] -python = "3.12" -buf = "1.66" -ruff = "0.15" - -[env] -_.python.venv = { path = ".venv", create = true } -PYTHONPATH = "{{config_root}}/vault/proto:{{config_root}}/vault" +# Only install tools required for build and test; skip deployment tools. +[settings] +enable_tools = ["go", "buf"] diff --git a/.mise.toml b/.mise.toml index c2bbd3f..267684b 100644 --- a/.mise.toml +++ b/.mise.toml @@ -1,12 +1,11 @@ # Rune-Vault Development Environment # Run `mise install` to set up all tools, then `mise run setup` to bootstrap. -# Prerequisites: mise (https://mise.jdx.dev), Docker (https://docs.docker.com/get-docker) +# Prerequisites: mise (https://mise.jdx.dev) # ── Core development tools ─────────────────────────────────────────── [tools] -python = "3.12" +go = "1.25" buf = "1.66" -ruff = "0.15" # ── Deployment tools (install with: mise install) ──────────────────── # These are needed only for cloud deployment, not for local development. @@ -15,127 +14,118 @@ terraform = "1.14" gcloud = "562" oci = "3" -# ── Environment variables ──────────────────────────────────────────── [env] -_.python.venv = { path = ".venv", create = true } -PYTHONPATH = "{{config_root}}/vault/proto:{{config_root}}/vault" - -# ── Docker image settings ─────────────────────────────────────────── -[vars] -docker_image = "ghcr.io/cryptolabinc/rune-vault" -docker_builder = "rune-vault-builder" -docker_platforms = "linux/amd64,linux/arm64" +CGO_ENABLED = "1" # ── Tasks ──────────────────────────────────────────────────────────── [tasks.setup] -description = "Bootstrap development environment (venv + dependencies)" +description = "Bootstrap development environment (Go modules + proto stubs)" run = """ #!/usr/bin/env bash set -euo pipefail -echo "==> Installing vault dependencies..." -pip install -q -r vault/requirements.txt - -echo "==> Installing test dependencies..." -pip install -q -r tests/requirements.txt +echo "==> Resolving Go modules..." +cd vault && go mod download && cd .. -echo "==> Generating proto stubs..." -cd vault && bash scripts/proto-gen.sh && cd .. +echo "==> Generating Go proto stubs..." +cd vault && buf generate && cd .. echo "==> Configuring git hooks..." git config core.hooksPath .githooks echo "" echo "Development environment ready." -echo "Run 'mise run test' to verify." +echo "Run 'mise run check' to verify." """ -[tasks.test] -description = "Run unit and integration tests" -run = "python -m pytest tests/unit tests/integration -v" - -[tasks."test:unit"] -description = "Run unit tests only" -run = "python -m pytest tests/unit -v" - -[tasks."test:cov"] -description = "Run tests with coverage report" -run = "python -m pytest tests/unit tests/integration --cov=vault --cov-report=term-missing -v" - -[tasks.lint] -description = "Run ruff linter" -run = "ruff check vault/" - -[tasks."lint:fix"] -description = "Run ruff linter with auto-fix" -run = "ruff check vault/ --fix" - -[tasks.format] -description = "Format code with ruff" -run = "ruff format vault/" - -[tasks."format:check"] -description = "Check code formatting without modifying" -run = "ruff format --check vault/" - [tasks.check] -description = "Run all quality checks (format + lint + test)" +description = "Run all quality checks (gofmt + go vet + unit tests)" run = """ #!/usr/bin/env bash set -euo pipefail -echo "==> Checking format..." -ruff format --check vault/ -echo "==> Linting..." -ruff check vault/ -echo "==> Running tests..." -python -m pytest tests/unit -v +cd vault +echo "==> Checking Go format..." +diff=$(gofmt -l .) +if [ -n "$diff" ]; then + echo "gofmt found unformatted files:" >&2 + echo "$diff" >&2 + exit 1 +fi +echo "==> go vet..." +go vet ./... +echo "==> Running unit tests..." +go test -race ./... echo "" echo "All checks passed." """ -[tasks.proto] -description = "Regenerate protobuf/gRPC stubs" -run = "cd vault && bash scripts/proto-gen.sh" +[tasks."proto:go"] +description = "Regenerate Go protobuf/gRPC stubs into vault/pkg/vaultpb" +run = "cd vault && buf generate" -[tasks.build] -description = "Build Docker image locally (via buildx)" +[tasks."go:build"] +description = "Build runevault binary to vault/bin/runevault" run = """ #!/usr/bin/env bash set -euo pipefail -TAG="{{arg(name="tag", default="latest")}}" +VERSION="${VERSION:-dev}" +COMMIT=$(git rev-parse --short HEAD 2>/dev/null || echo "none") +DATE=$(date -u +%Y-%m-%dT%H:%M:%SZ) +PKG="github.com/CryptoLabInc/rune-admin/vault/internal/commands" +cd vault && go build \ + -o bin/runevault \ + -ldflags "-X '${PKG}.buildVersion=${VERSION}' -X '${PKG}.buildCommit=${COMMIT}' -X '${PKG}.buildDate=${DATE}'" \ + ./cmd +""" -docker buildx inspect "{{vars.docker_builder}}" >/dev/null 2>&1 || \ - docker buildx create --name "{{vars.docker_builder}}" --use --driver docker-container +[tasks."go:test"] +description = "Run all tests including E2E (requires pre-built binary via RUNEVAULT_TEST_BINARY)" +run = "cd vault && go test -race -tags e2e ./..." -cd vault -docker buildx build --builder "{{vars.docker_builder}}" \ - -t "{{vars.docker_image}}:$TAG" \ - --load . -""" +[tasks."go:test:unit"] +description = "Run unit tests only (E2E excluded by build tag)" +run = "cd vault && go test -race ./..." -[tasks.push] -description = "Build and push multi-platform Docker image to GHCR" +[tasks."go:test:e2e"] +description = "Run E2E tests against the pre-built runevault binary (run go:build first)" run = """ #!/usr/bin/env bash set -euo pipefail -TAG="{{arg(name="tag", default="latest")}}" +BINARY="${RUNEVAULT_TEST_BINARY:-$(pwd)/vault/bin/runevault}" +if [ ! -x "$BINARY" ]; then + echo "runevault binary not found at $BINARY — run 'mise run go:build' first" >&2 + exit 1 +fi +export RUNEVAULT_TEST_BINARY="$BINARY" +cd vault && go test -race -tags e2e ./internal/tests/... +""" -gh auth token | docker login ghcr.io -u "$(gh api user -q .login)" --password-stdin +[tasks."go:vet"] +description = "Run go vet on all Go packages" +run = "cd vault && go vet ./..." -docker buildx inspect "{{vars.docker_builder}}" >/dev/null 2>&1 || \ - docker buildx create --name "{{vars.docker_builder}}" --use --driver docker-container +[tasks."go:fmt"] +description = "Format Go source files" +run = "cd vault && gofmt -w ." +[tasks."go:fmt:check"] +description = "Check Go formatting without modifying" +run = """ +#!/usr/bin/env bash +set -euo pipefail cd vault -docker buildx build --builder "{{vars.docker_builder}}" \ - --platform "{{vars.docker_platforms}}" \ - -t "{{vars.docker_image}}:$TAG" \ - --push . +diff=$(gofmt -l .) +if [ -n "$diff" ]; then + echo "gofmt found unformatted files:" >&2 + echo "$diff" >&2 + exit 1 +fi """ [tasks.dev] -description = "Start local development Vault via Docker Compose" -run = "cd vault && docker compose up --build -d vault" +description = "Run runevault daemon in foreground for development" +run = "cd vault && go run ./cmd/runevault --config dev/runevault.conf daemon start" [tasks."fixtures:decrypt"] description = "Decrypt test fixtures from GPG archive" diff --git a/AGENTS.md b/AGENTS.md index 324b633..82f38bd 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,7 +1,8 @@ # Rune-Vault (rune-admin) -Python 3.12 gRPC server for FHE-encrypted organizational memory. -The secret key never leaves this server. +Single-binary Go gRPC server (`runevault`) for FHE-encrypted organizational +memory. Built on `github.com/CryptoLabInc/envector-go-sdk`. The secret key +never leaves this server. ## Setup @@ -9,39 +10,41 @@ See [CONTRIBUTING.md](CONTRIBUTING.md#development-setup) for initial setup. ## Commands -All commands **must** be run via `mise run` to ensure correct tool versions and venv activation. -Do NOT run python, pytest, or ruff directly. +All commands **must** be run via `mise run` to ensure correct tool versions. +Do NOT run go, gofmt, or buf directly. | Command | Description | |---------|-------------| -| `mise run test` | Unit + integration tests | -| `mise run test:unit` | Unit tests only | -| `mise run test:cov` | Tests with coverage report | -| `mise run lint` | Ruff linter | -| `mise run lint:fix` | Ruff with auto-fix | -| `mise run format` | Ruff formatter | -| `mise run format:check` | Check formatting without modifying | -| `mise run check` | All checks: format + lint + unit tests | -| `mise run proto` | Regenerate protobuf/gRPC stubs | -| `mise run build` | Build Docker image locally | -| `mise run dev` | Start local Vault via Docker Compose | +| `mise run setup` | Bootstrap (Go modules + proto stubs) | +| `mise run check` | All checks: gofmt + go vet + unit tests (race) | +| `mise run go:build` | Build the runevault binary to `vault/bin/runevault` | +| `mise run go:test` | Run all tests including E2E (requires `RUNEVAULT_TEST_BINARY`) | +| `mise run go:test:unit` | Run unit tests only (E2E excluded by build tag) | +| `mise run go:test:e2e` | Run E2E tests against pre-built binary (run `go:build` first) | +| `mise run go:vet` | Run go vet on all Go packages | +| `mise run go:fmt` | Format Go source files | +| `mise run go:fmt:check` | Check Go formatting without modifying | +| `mise run proto:go` | Regenerate Go protobuf/gRPC stubs into `vault/pkg/vaultpb` | +| `mise run dev` | Run runevault daemon in foreground (uses `vault/dev/runevault.conf`) | | `mise run certs` | Generate self-signed TLS certificates | +| `mise run fixtures:decrypt` | Decrypt test fixtures (requires `FIXTURES_GPG_PASSPHRASE`) | +| `mise run fixtures:encrypt` | Re-encrypt test fixtures | ## Rules - English only in code, commit messages, PR descriptions, and issue bodies - Do not amend commits or force-push unless explicitly instructed -- All public functions need type hints -- New gRPC methods need corresponding unit tests in `tests/unit/` -- Token/auth changes must update `tests/unit/test_auth.py` +- All exported Go identifiers need a doc comment +- New gRPC methods need corresponding unit tests in `vault/internal/server/grpc_test.go` +- Token/auth changes must update `vault/internal/tokens/store_test.go` - Run `mise run check` before committing ## Security invariants -- Secret key (`vault_keys/`) must never be logged, returned in API responses, or leave the server process -- Admin server binds to `127.0.0.1` only — never expose externally -- Token secrets must come from environment variables, never hardcoded -- TLS is required for all cloud deployments +- Secret key (`vault-keys//SecKey.json`) must never be logged, returned in API responses, or leave the server process +- Admin transport is a Unix domain socket (mode 0600, vault-user owned) — never expose externally +- Token secrets and FHE keys live in `runevault.conf` (mode 0600); secret YAML fields support `*_file` indirection for KMS-backed deployments +- TLS is required for all cloud deployments (`server.grpc.tls.disable: true` is dev-only) ## Worktree setup diff --git a/CHANGELOG.md b/CHANGELOG.md index 46e6b53..3d4563e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,37 @@ All notable changes to Rune-Vault will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### ⚠ BREAKING CHANGES + +- **Vault rewritten in Go as the single binary `runevault`** (#61). YAML config (`runevault.conf`) is now the only configuration source — env-var fallback removed, no migration helper, no deprecation banners. Existing Python-based deployments must be reinstalled via `install.sh`. + +### Added + +- Production installer `install.sh` with `--target local|aws|gcp|oci`, SHA256SUMS checksum verification, systemd/launchd service registration, and `--uninstall` flow +- Dev installer `scripts/install-dev.sh` (structural sibling of `install.sh`) for local/CSP testing without GitHub releases +- CSP provisioning via Terraform for AWS, GCP, and OCI: preflight CLI/auth checks, cloud-init bootstrap, CA-cert SCP polling +- CSP uninstall flow that wraps `terraform destroy` +- `runevault status` (daemon + admin-socket health) and `runevault logs` (audit log tail) subcommands +- `runevault` group lets members run the CLI without `sudo` +- Multi-platform release pipeline (linux/darwin × amd64/arm64) with `SHA256SUMS` checksum manifest +- `EnsureVault` startup hook to activate keys and ensure index on first run + +### Changed + +- Cloud VM images bumped to Ubuntu 24.04 LTS +- OCI SCP user is now `ubuntu` +- Daemon lifecycle delegated to the OS service manager (systemd / launchd) instead of Docker +- Admin transport: HTTP on `127.0.0.1:8081` → Unix domain socket at `/opt/runevault/admin.sock` (mode 0600) +- Token / role storage: standalone `vault-tokens.yml` / `vault-roles.yml` → fields under `runevault.conf` with `*_file` indirection support + +### Removed + +- Python sources, `docker-compose.yml`, `Dockerfile`, GHCR-published Docker image +- `pyenvector` runtime dependency +- Env-var configuration fallback (`VAULT_TLS_DISABLE`, `VAULT_TEAM_SECRET`, `VAULT_AUDIT_LOG`, etc.) + ## [0.3.0] - 2026-04-07 ### ⚠ BREAKING CHANGES diff --git a/CLAUDE.md b/CLAUDE.md index b53a5d9..82f38bd 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,7 +1,8 @@ # Rune-Vault (rune-admin) -Python 3.12 gRPC server for FHE-encrypted organizational memory. -The secret key never leaves this server. +Single-binary Go gRPC server (`runevault`) for FHE-encrypted organizational +memory. Built on `github.com/CryptoLabInc/envector-go-sdk`. The secret key +never leaves this server. ## Setup @@ -9,40 +10,41 @@ See [CONTRIBUTING.md](CONTRIBUTING.md#development-setup) for initial setup. ## Commands -All commands **must** be run via `mise run` to ensure correct tool versions and venv activation. -Do NOT run python, pytest, or ruff directly. +All commands **must** be run via `mise run` to ensure correct tool versions. +Do NOT run go, gofmt, or buf directly. | Command | Description | |---------|-------------| -| `mise run test` | Unit + integration tests | -| `mise run test:unit` | Unit tests only | -| `mise run test:cov` | Tests with coverage report | -| `mise run lint` | Ruff linter | -| `mise run lint:fix` | Ruff with auto-fix | -| `mise run format` | Ruff formatter | -| `mise run format:check` | Check formatting without modifying | -| `mise run check` | All checks: format + lint + unit tests | -| `mise run proto` | Regenerate protobuf/gRPC stubs | -| `mise run build` | Build Docker image locally | -| `mise run push` | Build and push multi-platform image to GHCR (requires GHCR access) | -| `mise run dev` | Start local Vault via Docker Compose | +| `mise run setup` | Bootstrap (Go modules + proto stubs) | +| `mise run check` | All checks: gofmt + go vet + unit tests (race) | +| `mise run go:build` | Build the runevault binary to `vault/bin/runevault` | +| `mise run go:test` | Run all tests including E2E (requires `RUNEVAULT_TEST_BINARY`) | +| `mise run go:test:unit` | Run unit tests only (E2E excluded by build tag) | +| `mise run go:test:e2e` | Run E2E tests against pre-built binary (run `go:build` first) | +| `mise run go:vet` | Run go vet on all Go packages | +| `mise run go:fmt` | Format Go source files | +| `mise run go:fmt:check` | Check Go formatting without modifying | +| `mise run proto:go` | Regenerate Go protobuf/gRPC stubs into `vault/pkg/vaultpb` | +| `mise run dev` | Run runevault daemon in foreground (uses `vault/dev/runevault.conf`) | | `mise run certs` | Generate self-signed TLS certificates | +| `mise run fixtures:decrypt` | Decrypt test fixtures (requires `FIXTURES_GPG_PASSPHRASE`) | +| `mise run fixtures:encrypt` | Re-encrypt test fixtures | ## Rules - English only in code, commit messages, PR descriptions, and issue bodies - Do not amend commits or force-push unless explicitly instructed -- All public functions need type hints -- New gRPC methods need corresponding unit tests in `tests/unit/` -- Token/auth changes must update `tests/unit/test_auth.py` +- All exported Go identifiers need a doc comment +- New gRPC methods need corresponding unit tests in `vault/internal/server/grpc_test.go` +- Token/auth changes must update `vault/internal/tokens/store_test.go` - Run `mise run check` before committing ## Security invariants -- Secret key (`vault_keys/`) must never be logged, returned in API responses, or leave the server process -- Admin server binds to `127.0.0.1` only — never expose externally -- Token secrets must come from environment variables, never hardcoded -- TLS is required for all cloud deployments +- Secret key (`vault-keys//SecKey.json`) must never be logged, returned in API responses, or leave the server process +- Admin transport is a Unix domain socket (mode 0600, vault-user owned) — never expose externally +- Token secrets and FHE keys live in `runevault.conf` (mode 0600); secret YAML fields support `*_file` indirection for KMS-backed deployments +- TLS is required for all cloud deployments (`server.grpc.tls.disable: true` is dev-only) ## Worktree setup diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index cf56502..2065723 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -15,7 +15,7 @@ Before creating an issue: 2. Collect relevant information: - Rune-Admin version - Deployment platform (OCI/AWS/GCP) - - Python version + - Go version (`go version`) - Error messages and logs - Steps to reproduce @@ -34,11 +34,10 @@ Feature requests should include: ### Prerequisites - [mise](https://mise.jdx.dev): `curl https://mise.jdx.dev/install.sh | sh` -- [Docker](https://docs.docker.com/get-docker/) (for local Vault and image builds) **CSP deployment only:** -- Access to cloud provider (OCI/AWS/GCP) -- GitHub CLI (`gh`) with GHCR push access to the CryptoLabInc organization +- Access to a cloud provider (AWS, GCP, or OCI) and the matching CLI authenticated locally +- Docker (used by `scripts/install-dev.sh` to cross-compile a Linux/amd64 binary for cloud VMs) ### Local Setup @@ -50,13 +49,13 @@ Feature requests should include: 2. **Install tools and bootstrap** ```bash - mise install # Install Python 3.12, buf, ruff, terraform, cloud CLIs - mise run setup # Create venv, install deps, generate proto stubs + mise install # Install Go 1.25, buf, terraform, cloud CLIs + mise run setup # Resolve Go modules, generate proto stubs ``` 3. **Verify setup** ```bash - mise run test:unit # Run unit tests to verify + mise run go:test:unit # Run unit tests to verify ``` 4. **(Optional) Activate mise in your shell** @@ -68,47 +67,35 @@ Feature requests should include: ### Commands -All commands **must** be run via `mise run` to ensure correct tool versions and venv activation. - -| Command | Description | -|---------|-------------| -| `mise run test` | Unit + integration tests | -| `mise run test:unit` | Unit tests only | -| `mise run test:cov` | Tests with coverage report | -| `mise run lint` | Ruff linter | -| `mise run lint:fix` | Ruff with auto-fix | -| `mise run format` | Ruff formatter | -| `mise run format:check` | Check formatting without modifying | -| `mise run check` | All checks: format + lint + unit tests | -| `mise run proto` | Regenerate protobuf/gRPC stubs | -| `mise run build` | Build Docker image locally | -| `mise run push` | Build and push multi-platform image to GHCR (requires GHCR access) | -| `mise run dev` | Start local Vault via Docker Compose | -| `mise run certs` | Generate self-signed TLS certificates | +All commands **must** be run via `mise run` to ensure correct tool versions. + +See [CLAUDE.md](CLAUDE.md#commands) (or [AGENTS.md](AGENTS.md#commands)) for the complete task table. ## Testing ### Test Structure ``` -tests/ -├── unit/ # Fast, isolated tests per module -└── integration/ # End-to-end Vault API tests +vault/internal/ +├── tokens/ # Token store + role/rate-limit unit tests +├── crypto/ # HKDF + AES-CTR + envector-go-sdk wrappers +├── server/ # gRPC handlers, interceptors, audit, admin UDS, config +├── commands/ # CLI subcommands + admin client +└── tests/ # E2E (build tag `e2e`): decrypt pipeline (fixture-based) + CLI smoke ``` ### Running Tests -All test commands **must** be run via `mise run`: - ```bash -mise run test # Unit + integration tests -mise run test:unit # Unit tests only -mise run test:cov # Tests with coverage report +mise run go:test:unit # Unit tests only (E2E excluded by build tag) +mise run go:build # Build vault/bin/runevault first… +mise run go:test:e2e # …then run E2E against the built binary +mise run go:test # All tests including E2E (requires RUNEVAULT_TEST_BINARY) ``` ### Test Fixtures -Integration tests use GPG-encrypted fixtures containing FHE keys and ciphertext blobs. See [tests/FIXTURES.md](tests/FIXTURES.md) for the full update procedure, including passphrase rotation and re-encryption steps. +Integration tests use GPG-encrypted fixtures containing FHE keys and ciphertext blobs. See [tests/FIXTURES.md](tests/FIXTURES.md) for the full update procedure, including passphrase rotation and re-encryption steps. The fixture-based decrypt-pipeline test under `vault/internal/tests/` skips automatically when `tests/fixtures/` is not decrypted. ### Test Requirements @@ -116,17 +103,16 @@ Integration tests use GPG-encrypted fixtures containing FHE keys and ciphertext - Use fixtures for crypto setup to avoid repeated key generation - Mock external dependencies - Test both success and error paths -- New gRPC methods need corresponding unit tests in `tests/unit/` -- Token/auth changes must update `tests/unit/test_auth.py` +- New gRPC methods need corresponding unit tests in `vault/internal/server/grpc_test.go` +- Token/auth changes must update `vault/internal/tokens/store_test.go` ## Code Style -### Python +### Go -- Follow PEP 8 -- All public functions need type hints -- Keep functions focused and testable -- Format and lint with ruff: `mise run format` and `mise run lint` +- Run `mise run go:fmt` to format +- All exported identifiers need a doc comment +- Tests live alongside the code they test (`*_test.go`) - Run `mise run check` before committing ### Shell Scripts @@ -160,22 +146,29 @@ Integration tests use GPG-encrypted fixtures containing FHE keys and ciphertext ### Local Testing ```bash -mise run dev # Start local Vault via Docker Compose -mise run build # Build Docker image locally +mise run dev # Run runevault daemon in foreground (uses vault/dev/runevault.conf) +mise run go:build # Build runevault binary to vault/bin/runevault ``` ### Testing the Installer Locally -Use `scripts/install-dev.sh` to test the full installation flow using local working tree files instead of downloading from GitHub. +`scripts/install-dev.sh` is a structural sibling of `install.sh` that +exercises the full install flow against a locally built binary instead +of a published GitHub release. ```bash -sudo bash scripts/install-dev.sh +# Local install into a rootless prefix (no service registration) +RUNEVAULT_SKIP_SERVICE=1 \ + bash scripts/install-dev.sh --target local --prefix "$HOME/runevault-test" + +# Cloud install: cross-compiles linux/amd64 in golang:1.25-bookworm, +# uploads via SCP, and runs install.sh on the remote VM. +bash scripts/install-dev.sh --target oci --install-dir "$HOME/rune-vault-oci" ``` -This script behaves identically to `install.sh` but: -- Copies `docker-compose.yml`, TLS scripts, and Terraform configs from the local repo -- Uses a locally built Docker image (`mise run build`) instead of pulling from GHCR -- Requires no network access to GitHub +Flags mirror `install.sh`: `--target`, `--install-dir`, `--prefix`, +`--non-interactive`, `--uninstall`, `--force`. Uninstall is delegated to +`install.sh --uninstall`. ## Submitting Changes @@ -248,12 +241,23 @@ Closes #123 ``` rune-admin/ -├── vault/ # Rune-Vault gRPC server (see [Architecture](docs/ARCHITECTURE.md)) -├── deployment/ # Terraform configs (OCI, AWS, GCP) + monitoring +├── vault/ +│ ├── cmd/ # runevault binary entry point +│ ├── internal/ # commands, server, tokens, crypto, tests +│ ├── pkg/vaultpb/ # generated gRPC stubs +│ ├── proto/ # .proto source +│ └── dev/ # local dev config (gitignored) +├── deployment/ +│ ├── aws/ gcp/ oci/ # Terraform per CSP +│ ├── systemd/runevault.service # Linux service unit +│ └── launchd/com.cryptolabinc.runevault.plist # macOS service ├── scripts/ -├── tests/ # Unit, integration tests -├── docs/ # Architecture docs -└── install.sh # Interactive installer +│ ├── install-dev.sh # Dev sibling of install.sh +│ ├── generate-certs.sh # Self-signed TLS certs for dev +│ └── generate-test-fixtures.py # Generates GPG-encrypted test fixtures +├── tests/ # Encrypted fixture archive (see FIXTURES.md) +├── docs/ARCHITECTURE.md # Architecture & data flow +└── install.sh # Production installer (SHA256SUMS-verified) ``` ## Vault Architecture @@ -262,11 +266,11 @@ Core server code is in `vault/`. See [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md ## Security Considerations -- Secret key (`vault_keys/`) must never be logged, returned in API responses, or leave the server process -- Admin server binds to `127.0.0.1` only — never expose externally -- Never commit private keys (SecKey.json) -- Token secrets must come from environment variables, never hardcoded -- TLS is required for all cloud deployments +- Secret key (`vault-keys//SecKey.json`) must never be logged, returned in API responses, or leave the server process +- Admin transport is a Unix domain socket (mode 0600, vault-user owned) — never expose externally +- Never commit private keys (`SecKey.json`) or filled-in `runevault.conf` files +- Token secrets and FHE keys live in `runevault.conf` (mode 0600); secret YAML fields support `*_file` indirection for KMS-backed deployments +- TLS is required for all cloud deployments (`server.grpc.tls.disable: true` is dev-only) - Review security implications of changes - Test authentication and authorization diff --git a/README.md b/README.md index 72b5829..a32d06a 100644 --- a/README.md +++ b/README.md @@ -19,14 +19,14 @@ For system architecture and data flow details, see [docs/ARCHITECTURE.md](docs/A ### Platform -- **macOS** or **Linux** (Windows is not supported — pyenvector requires Unix) +- **macOS** or **Linux** (Windows is not supported — `runevault` registers a systemd or launchd service) ### For Administrators 1. **enVector Cloud account** at [https://envector.io](https://envector.io) — Cluster Endpoint and API Key -2. **Cloud provider account** (OCI, AWS, or GCP) — only needed for cloud deployment +2. **Cloud provider account** (AWS, GCP, or OCI) — only needed for cloud deployment -The [installer](#quick-start) will automatically check and install required tools (Docker, Terraform, etc.). +The [installer](#quick-start) auto-checks for the tools it needs (`terraform` and the relevant cloud CLI when targeting a CSP). ### For Team Members @@ -38,38 +38,47 @@ Team members install [Rune](https://github.com/CryptoLabInc/rune) from Claude Ma ## Quick Start -### 1. Deploy Rune-Vault +### 1. Install Rune-Vault + +The interactive installer downloads the `runevault` binary, verifies its +`SHA256SUMS` checksum, renders `runevault.conf`, generates TLS certs, +and registers a `runevault` service (systemd on Linux, launchd on macOS): ```bash -curl -fsSL https://raw.githubusercontent.com/CryptoLabInc/rune-admin/main/install.sh -o install.sh && sudo bash install.sh +# Local install +curl -fsSL https://raw.githubusercontent.com/CryptoLabInc/rune-admin/main/install.sh \ + | sudo bash -s -- --target local + +# Cloud install (provisions a VM + bootstraps it via Terraform) +curl -fsSL https://raw.githubusercontent.com/CryptoLabInc/rune-admin/main/install.sh \ + | sudo bash -s -- --target aws # or gcp, oci ``` -The installer will interactively guide you through: -- Cloud provider selection (AWS / GCP / OCI) -- enVector Cloud credentials -- TLS certificate generation -- Terraform-based VM provisioning +The installer prompts for team name, enVector endpoint, and CSP-specific +inputs (region, GCP project ID, OCI compartment OCID). Use `--non-interactive` +plus the `RUNEVAULT_*` env vars listed in [`install.sh`](install.sh) for CI. -**Output**: -``` -vault_endpoint = "vault-yourteam.oci.envector.io:50051" -ca.pem downloaded for TLS verification -``` +If you'd rather inspect the script before running it, download `install.sh` +and the `SHA256SUMS` file from the release page first, then run `install.sh` +with the binary it pulls down — see [Release Checksum Verification](#release-checksum-verification). ### 2. Verify Deployment ```bash # gRPC health check (requires grpcurl: brew install grpcurl) -grpcurl -cacert ca.pem :50051 grpc.health.v1.Health/Check +grpcurl -cacert /opt/runevault/certs/ca.pem :50051 grpc.health.v1.Health/Check # Expected: { "status": "SERVING" } + +# Or use the runevault CLI to query daemon status via the admin socket +runevault status ``` ### 3. Onboard Team Members ```bash -# Issue a per-user token -runevault token issue --user alice --role member +# Issue a per-user token (90-day expiry) +sudo runevault token issue --user alice --role member --expires 90d # Share via secure channel (1Password, Signal, etc.): # - Vault Endpoint @@ -78,20 +87,54 @@ runevault token issue --user alice --role member # - enVector API Key ``` +Members of the `runevault` group can run the CLI without `sudo`. + Team members install [Rune](https://github.com/CryptoLabInc/rune) and configure with the provided credentials. +### From Source (development) + +```bash +git clone https://github.com/CryptoLabInc/rune-admin.git +cd rune-admin +mise install # Go 1.25, buf, terraform, cloud CLIs +mise run setup # Resolve Go modules + generate proto stubs +mise run go:build # Builds vault/bin/runevault +# Copy + edit a dev config (the vault/dev/ tree is gitignored): +cp vault/internal/server/testdata/runevault.conf.example vault/dev/runevault.conf +mise run dev # Run the daemon in the foreground (uses vault/dev/runevault.conf) +``` + ## Admin Workflows -### Rotate Token +All admin commands talk to the daemon over a Unix domain socket +(`/opt/runevault/admin.sock`). Members of the `runevault` group can run +them without `sudo`. + +### Manage Tokens ```bash -# Rotate a single user's token -runevault token rotate --user alice +runevault token issue --user alice --role member --expires 90d +runevault token list +runevault token rotate --user alice # or --all +runevault token revoke --user alice +``` -# Rotate all tokens -runevault token rotate --all +### Manage Roles -# Distribute new tokens to team members via secure channel +```bash +runevault role list +runevault role create --name --scope a,b,c --top-k 10 --rate-limit 30/60s +runevault role update --name [--scope ...] [--top-k ...] [--rate-limit ...] +runevault role delete --name +``` + +### Daemon Health & Logs + +```bash +runevault status # health + socket liveness +runevault logs # tail audit log +sudo systemctl restart runevault # Linux +sudo launchctl kickstart -k system/com.cryptolabinc.runevault # macOS ``` ## Security @@ -113,13 +156,42 @@ Vault communications MUST use TLS. The installer automatically configures TLS ce - **EncKey/EvalKey**: Safe to distribute (public keys) - Per-agent metadata encryption uses HKDF-derived DEKs (no separate key file) +### Release Checksum Verification + +Every GitHub release ships a `SHA256SUMS` file alongside the binaries. +`install.sh` downloads it and runs `sha256sum --check` automatically. To +verify by hand: + +```bash +sha256sum --check --ignore-missing SHA256SUMS +``` + +Trust in the `SHA256SUMS` file itself relies on GitHub's HTTPS download +of the release page. + ## Deployment Targets -All cloud deployments are handled by the [interactive installer](#quick-start). +`install.sh --target ` provisions a VM via Terraform and +bootstraps `runevault` on it end-to-end. Each target lives under +`deployment/`: -- **OCI** (Oracle Cloud Infrastructure): `deployment/oci/` - **AWS** (Amazon Web Services): `deployment/aws/` - **GCP** (Google Cloud Platform): `deployment/gcp/` +- **OCI** (Oracle Cloud Infrastructure): `deployment/oci/` + +Service files for native installs are under `deployment/systemd/` and +`deployment/launchd/`. + +## Uninstall + +```bash +# Local: stops the service and removes /opt/runevault (prompts to keep data) +sudo bash install.sh --uninstall --target local + +# Cloud: runs `terraform destroy` against the install dir created earlier +sudo bash install.sh --uninstall --target aws \ + --install-dir "$HOME/rune-vault-aws" +``` ## Development @@ -131,39 +203,42 @@ See [CONTRIBUTING.md](CONTRIBUTING.md) for development setup, commands, and guid ```bash # Check Vault is reachable -grpcurl -cacert ca.pem vault-yourteam.oci.envector.io:50051 grpc.health.v1.Health/Check +grpcurl -cacert /opt/runevault/certs/ca.pem :50051 grpc.health.v1.Health/Check -# Check firewall rules (port 50051 must be open) -cd deployment/oci -terraform state show oci_core_security_list.vault +# Inspect the security group / firewall rule (port 50051 must be open) +cd "$HOME/rune-vault-" +terraform show | grep -A5 -E 'security_(group|list)' -# Verify token — have team member re-enter carefully +# Verify the token — have the team member re-enter it carefully ``` ### Issue: Slow decryption ```bash -# Check Vault CPU usage — increase instance resources if >80% -ssh admin@vault-yourteam.oci.envector.io +# Check Vault CPU usage — re-provision with a larger VM if >80% +ssh ubuntu@ # or ec2-user@... / opc@... depending on CSP top -# Check audit log for latency (mounted to host ./logs/) -tail -20 /opt/rune/logs/audit.log +# Tail audit log for latency +sudo tail -20 /opt/runevault/logs/audit.log +# Or via the CLI: +runevault logs ``` ### Issue: Vault crashed ```bash -# Check logs -docker logs rune-vault --tail 100 +# Inspect logs +sudo journalctl -u runevault -n 100 # Linux +sudo log show --predicate 'process == "runevault"' --last 10m # macOS # Restart -docker restart rune-vault +sudo systemctl restart runevault # Linux +sudo launchctl kickstart -k system/com.cryptolabinc.runevault # macOS -# If persistent, redeploy -cd deployment/oci -terraform destroy -terraform apply +# If persistent, re-provision the VM: +sudo bash install.sh --uninstall --target --install-dir "$HOME/rune-vault-" +sudo bash install.sh --target ``` ## Documentation diff --git a/deployment/aws/cloud-init-dev.yaml b/deployment/aws/cloud-init-dev.yaml new file mode 100644 index 0000000..d6424ae --- /dev/null +++ b/deployment/aws/cloud-init-dev.yaml @@ -0,0 +1,7 @@ +#cloud-config +# Dev mode: installs prereqs only. install.sh is injected via SCP by install-dev.sh. +package_update: true +packages: [ca-certificates, curl, openssl] + +runcmd: + - touch /var/run/runevault-dev-ready diff --git a/deployment/aws/cloud-init.yaml b/deployment/aws/cloud-init.yaml index 170b536..54bdacd 100644 --- a/deployment/aws/cloud-init.yaml +++ b/deployment/aws/cloud-init.yaml @@ -1,171 +1,24 @@ #cloud-config - -# Rune-Vault Installation Script for AWS EC2 -# Deploys Docker-based Vault with gRPC (port 50051) - package_update: true -package_upgrade: true - -packages: - - ca-certificates - - curl - - gnupg - - jq - - openssl +packages: [ca-certificates, curl, openssl] write_files: - - path: /opt/rune/docker-compose.yml - content: | - services: - vault: - image: ghcr.io/cryptolabinc/rune-vault:latest - container_name: rune-vault - restart: unless-stopped - ports: - - "0.0.0.0:50051:50051" - environment: - - VAULT_TEAM_SECRET=${team_secret} - - VAULT_INDEX_NAME=${vault_index_name} - - ENVECTOR_ENDPOINT=${envector_endpoint} - - ENVECTOR_API_KEY=${envector_api_key} - - EMBEDDING_DIM=1024 - - VAULT_TLS_CERT=$${VAULT_TLS_CERT:-} - - VAULT_TLS_KEY=$${VAULT_TLS_KEY:-} - - VAULT_TLS_DISABLE=$${VAULT_TLS_DISABLE:-} - volumes: - - vault-keys:/app/vault_keys:rw - - ./config:/app/config:rw - - ./certs:/app/certs:rw - - ./backups:/secure/backups:rw - - ./logs:/var/log/rune-vault:rw - healthcheck: - test: ["CMD", "curl", "-sf", "http://localhost:8081/health"] - interval: 30s - timeout: 10s - retries: 3 - security_opt: - - no-new-privileges:true - deploy: - resources: - limits: - memory: 1G - cpus: "1.0" - reservations: - memory: 512M - cpus: "0.5" - - volumes: - vault-keys: - owner: root:root - permissions: '0644' - - - path: /opt/rune/.env + - path: /etc/profile.d/runevault-installer-env.sh content: | - VAULT_TLS_CERT=${tls_mode == "none" ? "" : "/app/certs/server.pem"} - VAULT_TLS_KEY=${tls_mode == "none" ? "" : "/app/certs/server.key"} - VAULT_TLS_DISABLE=${tls_mode == "none" ? "true" : ""} - owner: root:root + export RUNEVAULT_TEAM_NAME='${team_name}' + export RUNEVAULT_ENVECTOR_ENDPOINT='${envector_endpoint}' + export RUNEVAULT_ENVECTOR_API_KEY='${envector_api_key}' permissions: '0600' runcmd: - # Create Rune directory structure - - mkdir -p /opt/rune/certs /opt/rune/backups /opt/rune/logs /opt/rune/config - - chmod 700 /opt/rune/certs - - # Install Docker CE with compose plugin (v2) - - | - install -m 0755 -d /etc/apt/keyrings - curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc - chmod a+r /etc/apt/keyrings/docker.asc - echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu $(. /etc/os-release && echo $VERSION_CODENAME) stable" > /etc/apt/sources.list.d/docker.list - apt-get update - apt-get install -y docker-ce docker-ce-cli containerd.io docker-compose-plugin - - # Start Docker - - systemctl enable docker - - systemctl start docker - - # Add ubuntu user to docker group and set up runevault alias - - usermod -aG docker ubuntu + - exec > /var/log/runevault-install.log 2>&1 + - set -a; . /etc/profile.d/runevault-installer-env.sh; set +a - | - if ! grep -q 'alias runevault=' /home/ubuntu/.bashrc 2>/dev/null; then - echo "alias runevault='docker exec -it rune-vault python3 /app/vault_admin_cli.py'" >> /home/ubuntu/.bashrc - fi - - # Generate per-user token auth config files - - | - cat > /opt/rune/config/vault-roles.yml <<'ROLES' - roles: - admin: - scope: [get_public_key, decrypt_scores, decrypt_metadata, manage_tokens] - top_k: 50 - rate_limit: 150/60s - member: - scope: [get_public_key, decrypt_scores, decrypt_metadata] - top_k: 10 - rate_limit: 30/60s - ROLES - echo "tokens: []" > /opt/rune/config/vault-tokens.yml - chmod 600 /opt/rune/config/vault-roles.yml /opt/rune/config/vault-tokens.yml - - # TLS setup — inline cert generation (avoids GitHub rate limits) - - | - if [ "${tls_mode}" = "self-signed" ]; then - CERT_DIR="/opt/rune/certs" - PUBLIC_IP=$(curl -4 -sf --connect-timeout 5 ifconfig.me 2>/dev/null || true) - openssl genrsa -out "$CERT_DIR/ca.key" 4096 2>/dev/null - openssl req -new -x509 -key "$CERT_DIR/ca.key" -out "$CERT_DIR/ca.pem" \ - -days 3650 -subj "/CN=Rune-Vault CA" -sha256 - openssl genrsa -out "$CERT_DIR/server.key" 2048 2>/dev/null - TMPCONF=$(mktemp) - printf '%s\n' \ - '[req]' \ - 'distinguished_name = req_dn' \ - 'req_extensions = v3_req' \ - 'prompt = no' \ - '[req_dn]' \ - 'CN = localhost' \ - '[v3_req]' \ - 'subjectAltName = @alt_names' \ - '[alt_names]' \ - 'DNS.1 = localhost' \ - 'DNS.2 = vault' \ - 'DNS.3 = rune-vault' \ - 'IP.1 = 127.0.0.1' \ - > "$TMPCONF" - TLS_HOSTNAME="${tls_hostname}" - if [ -n "$TLS_HOSTNAME" ]; then - echo "DNS.4 = $TLS_HOSTNAME" >> "$TMPCONF" - fi - if [ -n "$PUBLIC_IP" ]; then - echo "IP.2 = $PUBLIC_IP" >> "$TMPCONF" - fi - openssl req -new -key "$CERT_DIR/server.key" -out "$CERT_DIR/server.csr" -config "$TMPCONF" - openssl x509 -req -in "$CERT_DIR/server.csr" \ - -CA "$CERT_DIR/ca.pem" -CAkey "$CERT_DIR/ca.key" -CAcreateserial \ - -out "$CERT_DIR/server.pem" -days 825 -sha256 \ - -extfile "$TMPCONF" -extensions v3_req 2>/dev/null - rm -f "$TMPCONF" "$CERT_DIR/server.csr" "$CERT_DIR/ca.srl" - chmod 600 "$CERT_DIR/ca.key" "$CERT_DIR/server.key" - chmod 644 "$CERT_DIR/ca.pem" "$CERT_DIR/server.pem" - fi - - # Pull with retry and start Rune-Vault - - | - cd /opt/rune - for i in 1 2 3 4 5; do - docker compose pull && break - echo "Docker pull retry $i..." && sleep 10 - done - - cd /opt/rune && docker compose up -d - - # Wait for Vault to be ready - - sleep 10 - - timeout 300 bash -c 'until docker exec rune-vault curl -sf http://localhost:8081/health 2>/dev/null; do sleep 2; done' - -final_message: | - Rune-Vault installation completed! - - Vault gRPC endpoint: :50051 - Health check: docker exec rune-vault curl -sf http://localhost:8081/health - Docker logs: docker logs rune-vault + INSTALL_URL="https://raw.githubusercontent.com/CryptoLabInc/rune-admin/${runevault_version}/install.sh" + for i in 1 2 3 4 5; do + curl -fsSL --retry 5 --retry-delay 10 --connect-timeout 15 "$${INSTALL_URL}" -o /tmp/install.sh && break + sleep $((i*10)) + done + bash /tmp/install.sh --target local --non-interactive --version "${runevault_version}" + - usermod -aG runevault ubuntu + - rm -f /etc/profile.d/runevault-installer-env.sh diff --git a/deployment/aws/main.tf b/deployment/aws/main.tf index 108d44b..f9e3ef5 100644 --- a/deployment/aws/main.tf +++ b/deployment/aws/main.tf @@ -26,24 +26,12 @@ variable "team_name" { default = "default" } -variable "team_secret" { - description = "Team secret for DEK derivation. Generated by install.sh." - type = string - sensitive = true -} - variable "tls_mode" { description = "TLS mode: self-signed or none" type = string default = "self-signed" } -variable "tls_hostname" { - description = "Domain name to include in TLS certificate SAN" - type = string - default = "" -} - variable "envector_endpoint" { description = "enVector Cloud endpoint" type = string @@ -55,18 +43,17 @@ variable "envector_api_key" { sensitive = true } -variable "vault_index_name" { - description = "Vault index name" - type = string - default = "runecontext" -} - variable "instance_type" { description = "EC2 instance type" type = string default = "t3.medium" # 2 vCPU, 4GB RAM } +variable "runevault_version" { + description = "Pinned runevault release tag — drives the install.sh URL and binary version on the VM." + type = string +} + variable "public_key" { description = "SSH public key content for EC2 access" type = string @@ -80,7 +67,7 @@ data "aws_ami" "ubuntu" { filter { name = "name" - values = ["ubuntu/images/hvm-ssd/ubuntu-jammy-22.04-amd64-server-*"] + values = ["ubuntu/images/hvm-ssd-gp3/ubuntu-noble-24.04-amd64-server-*"] } filter { @@ -209,13 +196,10 @@ resource "aws_instance" "vault" { key_name = var.public_key != "" ? aws_key_pair.vault_key[0].key_name : null user_data = templatefile("${path.module}/cloud-init.yaml", { - team_secret = var.team_secret - team_name = var.team_name - tls_mode = var.tls_mode - tls_hostname = var.tls_hostname - envector_endpoint = var.envector_endpoint - envector_api_key = var.envector_api_key - vault_index_name = var.vault_index_name + team_name = var.team_name + envector_endpoint = var.envector_endpoint + envector_api_key = var.envector_api_key + runevault_version = var.runevault_version }) root_block_device { @@ -255,12 +239,6 @@ output "vault_url" { value = "${aws_eip.vault_eip.public_ip}:50051" } -output "team_secret" { - description = "Team secret for DEK derivation" - value = var.team_secret - sensitive = true -} - output "vault_public_ip" { description = "Public IP address" value = aws_eip.vault_eip.public_ip diff --git a/deployment/gcp/main.tf b/deployment/gcp/main.tf index bb4d206..3a5d9f0 100644 --- a/deployment/gcp/main.tf +++ b/deployment/gcp/main.tf @@ -37,12 +37,6 @@ variable "team_name" { type = string } -variable "team_secret" { - description = "Team secret for DEK derivation. Generated by install.sh." - type = string - sensitive = true -} - locals { zone = var.zone != "" ? var.zone : "${var.region}-a" } @@ -53,12 +47,6 @@ variable "tls_mode" { default = "self-signed" } -variable "tls_hostname" { - description = "Domain name to include in TLS certificate SAN" - type = string - default = "" -} - variable "envector_endpoint" { description = "enVector Cloud endpoint" type = string @@ -70,18 +58,17 @@ variable "envector_api_key" { sensitive = true } -variable "vault_index_name" { - description = "Vault index name" - type = string - default = "runecontext" -} - variable "machine_type" { description = "Compute Engine machine type" type = string default = "e2-medium" # 2 vCPU, 4GB RAM } +variable "runevault_version" { + description = "Pinned runevault release tag — drives the install.sh URL and binary version on the VM." + type = string +} + variable "public_key" { description = "SSH public key content for instance access" type = string @@ -145,7 +132,7 @@ resource "google_compute_instance" "vault" { boot_disk { initialize_params { - image = "ubuntu-os-cloud/ubuntu-2204-lts" + image = "ubuntu-os-cloud/ubuntu-2404-lts-amd64" size = 20 type = "pd-standard" } @@ -164,13 +151,10 @@ resource "google_compute_instance" "vault" { } metadata_startup_script = templatefile("${path.module}/startup-script.sh", { - team_secret = var.team_secret - team_name = var.team_name - tls_mode = var.tls_mode - tls_hostname = var.tls_hostname - envector_endpoint = var.envector_endpoint - envector_api_key = var.envector_api_key - vault_index_name = var.vault_index_name + team_name = var.team_name + envector_endpoint = var.envector_endpoint + envector_api_key = var.envector_api_key + runevault_version = var.runevault_version }) service_account { @@ -193,12 +177,6 @@ output "vault_url" { value = "${google_compute_address.vault_ip.address}:50051" } -output "team_secret" { - description = "Team secret for DEK derivation" - value = var.team_secret - sensitive = true -} - output "vault_public_ip" { description = "Public IP address" value = google_compute_address.vault_ip.address diff --git a/deployment/gcp/startup-script-dev.sh b/deployment/gcp/startup-script-dev.sh new file mode 100755 index 0000000..55dea6f --- /dev/null +++ b/deployment/gcp/startup-script-dev.sh @@ -0,0 +1,13 @@ +#!/bin/bash +# Dev mode: installs prereqs only. install.sh + binary injected via SCP by install-dev.sh. +set -euo pipefail +exec > /var/log/runevault-install.log 2>&1 +echo "=== runevault dev startup at $(date) ===" + +for i in $(seq 1 30); do + apt-get update -q && apt-get install -y ca-certificates curl openssl && break + echo "apt retry $i..." && sleep 10 +done + +touch /var/run/runevault-dev-ready +echo "=== prereqs ready at $(date), waiting for install-dev.sh injection ===" diff --git a/deployment/gcp/startup-script.sh b/deployment/gcp/startup-script.sh index 20829c7..217f7a6 100644 --- a/deployment/gcp/startup-script.sh +++ b/deployment/gcp/startup-script.sh @@ -1,155 +1,30 @@ #!/bin/bash set -euo pipefail +exec > /var/log/runevault-install.log 2>&1 +echo "=== runevault startup at $(date) ===" -# Rune-Vault Startup Script for GCP Compute Engine -# Deploys Docker-based Vault with gRPC (port 50051) - -exec > /var/log/rune-vault-startup.log 2>&1 -echo "=== Rune-Vault startup script began at $(date) ===" - -# Install packages -apt-get update -apt-get install -y ca-certificates curl gnupg jq openssl - -# Create Rune directory structure -mkdir -p /opt/rune/certs /opt/rune/backups /opt/rune/logs /opt/rune/config -chmod 700 /opt/rune/certs - -# Write docker-compose.yml -cat > /opt/rune/docker-compose.yml <<'COMPOSE' -services: - vault: - image: ghcr.io/cryptolabinc/rune-vault:latest - container_name: rune-vault - restart: unless-stopped - ports: - - "0.0.0.0:50051:50051" - env_file: - - .env - environment: - - VAULT_TEAM_SECRET=${team_secret} - - VAULT_INDEX_NAME=${vault_index_name} - - ENVECTOR_ENDPOINT=${envector_endpoint} - - ENVECTOR_API_KEY=${envector_api_key} - - EMBEDDING_DIM=1024 - volumes: - - vault-keys:/app/vault_keys:rw - - ./config:/app/config:rw - - ./certs:/app/certs:rw - - ./backups:/secure/backups:rw - - ./logs:/var/log/rune-vault:rw - healthcheck: - test: ["CMD", "curl", "-sf", "http://localhost:8081/health"] - interval: 30s - timeout: 10s - retries: 3 - security_opt: - - no-new-privileges:true - deploy: - resources: - limits: - memory: 1G - cpus: "1.0" - reservations: - memory: 512M - cpus: "0.5" - -volumes: - vault-keys: -COMPOSE +for i in $(seq 1 30); do + apt-get update -q && apt-get install -y ca-certificates curl openssl && break + echo "apt retry $i..." && sleep 10 +done -# Write .env file -cat > /opt/rune/.env <<'ENVFILE' -VAULT_TLS_CERT=${tls_mode == "none" ? "" : "/app/certs/server.pem"} -VAULT_TLS_KEY=${tls_mode == "none" ? "" : "/app/certs/server.key"} -VAULT_TLS_DISABLE=${tls_mode == "none" ? "true" : ""} +cat > /etc/profile.d/runevault-installer-env.sh <<'ENVFILE' +export RUNEVAULT_TEAM_NAME='${team_name}' +export RUNEVAULT_ENVECTOR_ENDPOINT='${envector_endpoint}' +export RUNEVAULT_ENVECTOR_API_KEY='${envector_api_key}' ENVFILE -chmod 600 /opt/rune/.env +chmod 600 /etc/profile.d/runevault-installer-env.sh +set -a; . /etc/profile.d/runevault-installer-env.sh; set +a -# Install Docker CE with compose plugin (v2) -install -m 0755 -d /etc/apt/keyrings -curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc -chmod a+r /etc/apt/keyrings/docker.asc -echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu $(. /etc/os-release && echo "$VERSION_CODENAME") stable" > /etc/apt/sources.list.d/docker.list -apt-get update -apt-get install -y docker-ce docker-ce-cli containerd.io docker-compose-plugin - -# Start Docker -systemctl enable docker -systemctl start docker - -# Generate per-user token auth config files -cat > /opt/rune/config/vault-roles.yml <<'ROLES' -roles: - admin: - scope: [get_public_key, decrypt_scores, decrypt_metadata, manage_tokens] - top_k: 50 - rate_limit: 150/60s - member: - scope: [get_public_key, decrypt_scores, decrypt_metadata] - top_k: 10 - rate_limit: 30/60s -ROLES -echo "tokens: []" > /opt/rune/config/vault-tokens.yml -chmod 600 /opt/rune/config/vault-roles.yml /opt/rune/config/vault-tokens.yml - -# Set up runevault CLI alias for ubuntu user -if ! grep -q 'alias runevault=' /home/ubuntu/.bashrc 2>/dev/null; then - echo "alias runevault='docker exec -it rune-vault python3 /app/vault_admin_cli.py'" >> /home/ubuntu/.bashrc -fi -usermod -aG docker ubuntu 2>/dev/null || true - -# TLS setup -if [ "${tls_mode}" = "self-signed" ]; then - CERT_DIR="/opt/rune/certs" - PUBLIC_IP=$(curl -4 -sf --connect-timeout 5 ifconfig.me 2>/dev/null || true) - openssl genrsa -out "$CERT_DIR/ca.key" 4096 2>/dev/null - openssl req -new -x509 -key "$CERT_DIR/ca.key" -out "$CERT_DIR/ca.pem" \ - -days 3650 -subj "/CN=Rune-Vault CA" -sha256 - openssl genrsa -out "$CERT_DIR/server.key" 2048 2>/dev/null - TMPCONF=$(mktemp) - printf '%s\n' \ - '[req]' \ - 'distinguished_name = req_dn' \ - 'req_extensions = v3_req' \ - 'prompt = no' \ - '[req_dn]' \ - 'CN = localhost' \ - '[v3_req]' \ - 'subjectAltName = @alt_names' \ - '[alt_names]' \ - 'DNS.1 = localhost' \ - 'DNS.2 = vault' \ - 'DNS.3 = rune-vault' \ - 'IP.1 = 127.0.0.1' \ - > "$TMPCONF" - TLS_HOSTNAME="${tls_hostname}" - if [ -n "$TLS_HOSTNAME" ]; then - echo "DNS.4 = $TLS_HOSTNAME" >> "$TMPCONF" - fi - if [ -n "$PUBLIC_IP" ]; then - echo "IP.2 = $PUBLIC_IP" >> "$TMPCONF" - fi - openssl req -new -key "$CERT_DIR/server.key" -out "$CERT_DIR/server.csr" -config "$TMPCONF" - openssl x509 -req -in "$CERT_DIR/server.csr" \ - -CA "$CERT_DIR/ca.pem" -CAkey "$CERT_DIR/ca.key" -CAcreateserial \ - -out "$CERT_DIR/server.pem" -days 825 -sha256 \ - -extfile "$TMPCONF" -extensions v3_req 2>/dev/null - rm -f "$TMPCONF" "$CERT_DIR/server.csr" "$CERT_DIR/ca.srl" - chmod 600 "$CERT_DIR/ca.key" "$CERT_DIR/server.key" - chmod 644 "$CERT_DIR/ca.pem" "$CERT_DIR/server.pem" -fi - -# Pull with retry and start Rune-Vault -cd /opt/rune +INSTALL_URL="https://raw.githubusercontent.com/CryptoLabInc/rune-admin/${runevault_version}/install.sh" for i in 1 2 3 4 5; do - docker compose pull && break - echo "Docker pull retry $i..." && sleep 10 + curl -fsSL --retry 5 --retry-delay 10 --connect-timeout 15 "$${INSTALL_URL}" -o /tmp/install.sh && break + sleep $((i*10)) done -docker compose up -d -# Wait for Vault to be ready -sleep 10 -timeout 300 bash -c 'until docker exec rune-vault curl -sf http://localhost:8081/health 2>/dev/null; do sleep 2; done' +bash /tmp/install.sh --target local --non-interactive --version "${runevault_version}" + +usermod -aG runevault ubuntu -echo "=== Rune-Vault startup script completed at $(date) ===" +rm -f /etc/profile.d/runevault-installer-env.sh +echo "=== completed at $(date) ===" diff --git a/deployment/launchd/com.cryptolabinc.runevault.plist b/deployment/launchd/com.cryptolabinc.runevault.plist new file mode 100644 index 0000000..7b1bfb3 --- /dev/null +++ b/deployment/launchd/com.cryptolabinc.runevault.plist @@ -0,0 +1,45 @@ + + + + + Label + com.cryptolabinc.runevault + + ProgramArguments + + /usr/local/bin/runevault + daemon + start + --config + /opt/runevault/configs/runevault.conf + + + UserName + runevault + + RunAtLoad + + + KeepAlive + + + ThrottleInterval + 10 + + StandardOutPath + /opt/runevault/logs/runevault.stdout.log + + StandardErrorPath + /opt/runevault/logs/runevault.stderr.log + + EnvironmentVariables + + PATH + /usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin + + + ProcessType + Background + + diff --git a/deployment/oci/main.tf b/deployment/oci/main.tf index e0f9e70..39febaa 100644 --- a/deployment/oci/main.tf +++ b/deployment/oci/main.tf @@ -37,24 +37,12 @@ variable "team_name" { type = string } -variable "team_secret" { - description = "Team secret for DEK derivation. Generated by install.sh." - type = string - sensitive = true -} - variable "tls_mode" { description = "TLS mode: self-signed, custom, or none" type = string default = "self-signed" } -variable "tls_hostname" { - description = "Domain name to include in TLS certificate SAN" - type = string - default = "" -} - variable "envector_endpoint" { description = "enVector Cloud endpoint" type = string @@ -66,10 +54,9 @@ variable "envector_api_key" { sensitive = true } -variable "vault_index_name" { - description = "Vault index name" +variable "runevault_version" { + description = "Pinned runevault release tag — drives the install.sh URL and binary version on the VM." type = string - default = "runecontext" } variable "public_key" { @@ -176,13 +163,10 @@ resource "oci_core_instance" "vault_instance" { metadata = { ssh_authorized_keys = var.public_key user_data = base64encode(templatefile("${path.module}/startup-script.sh", { - team_secret = var.team_secret - team_name = var.team_name - tls_mode = var.tls_mode - tls_hostname = var.tls_hostname - envector_endpoint = var.envector_endpoint - envector_api_key = var.envector_api_key - vault_index_name = var.vault_index_name + team_name = var.team_name + envector_endpoint = var.envector_endpoint + envector_api_key = var.envector_api_key + runevault_version = var.runevault_version })) } } @@ -193,14 +177,16 @@ data "oci_identity_availability_domains" "ads" { } data "oci_core_images" "ubuntu_image" { - compartment_id = var.compartment_id - operating_system = "Canonical Ubuntu" - sort_by = "TIMECREATED" - sort_order = "DESC" + compartment_id = var.compartment_id + operating_system = "Canonical Ubuntu" + operating_system_version = "24.04" + shape = "VM.Standard.E5.Flex" + sort_by = "TIMECREATED" + sort_order = "DESC" filter { name = "display_name" - values = ["^Canonical-Ubuntu-22.04-.*"] + values = ["^Canonical-Ubuntu-24.04-.*"] regex = true } } @@ -211,12 +197,6 @@ output "vault_url" { value = "${oci_core_instance.vault_instance.public_ip}:50051" } -output "team_secret" { - value = var.team_secret - description = "Team secret for DEK derivation" - sensitive = true -} - output "vault_public_ip" { value = oci_core_instance.vault_instance.public_ip description = "Public IP of Vault instance" diff --git a/deployment/oci/startup-script-dev.sh b/deployment/oci/startup-script-dev.sh new file mode 100755 index 0000000..ea13143 --- /dev/null +++ b/deployment/oci/startup-script-dev.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# Dev mode: installs prereqs only. install.sh + binary injected via SCP by install-dev.sh. +set -euo pipefail +exec > /var/log/runevault-install.log 2>&1 +echo "=== runevault dev startup at $(date) ===" + +for i in $(seq 1 30); do + apt-get update -q \ + && apt-get -y --fix-broken install \ + && apt-get install -y ca-certificates curl openssl \ + && break + echo "apt retry $i..." && sleep 10 +done + +touch /var/run/runevault-dev-ready +echo "=== prereqs ready at $(date), waiting for install-dev.sh injection ===" diff --git a/deployment/oci/startup-script.sh b/deployment/oci/startup-script.sh index 69b27a1..6ecfaea 100644 --- a/deployment/oci/startup-script.sh +++ b/deployment/oci/startup-script.sh @@ -1,160 +1,33 @@ #!/bin/bash set -euo pipefail +exec > /var/log/runevault-install.log 2>&1 +echo "=== runevault startup at $(date) ===" -# Rune-Vault Startup Script for OCI Compute -# Deploys Docker-based Vault with gRPC (port 50051) - -exec > /var/log/rune-vault-startup.log 2>&1 -echo "=== Rune-Vault startup script began at $(date) ===" - -# Install packages (retry on lock or transient mirror errors) for i in $(seq 1 30); do apt-get update -q \ && apt-get -y --fix-broken install \ - && apt-get install -y ca-certificates curl gnupg jq openssl \ + && apt-get install -y ca-certificates curl openssl \ && break echo "apt retry $i..." && sleep 10 done -# Create Rune directory structure -mkdir -p /opt/rune/certs /opt/rune/backups /opt/rune/logs /opt/rune/config -chmod 755 /opt/rune/certs - -# Write docker-compose.yml -cat > /opt/rune/docker-compose.yml <<'COMPOSE' -services: - vault: - image: ghcr.io/cryptolabinc/rune-vault:latest - container_name: rune-vault - restart: unless-stopped - ports: - - "0.0.0.0:50051:50051" - env_file: - - .env - environment: - - VAULT_TEAM_SECRET=${team_secret} - - VAULT_INDEX_NAME=${vault_index_name} - - ENVECTOR_ENDPOINT=${envector_endpoint} - - ENVECTOR_API_KEY=${envector_api_key} - - EMBEDDING_DIM=1024 - volumes: - - vault-keys:/app/vault_keys:rw - - ./config:/app/config:rw - - ./certs:/app/certs:rw - - ./backups:/secure/backups:rw - - ./logs:/var/log/rune-vault:rw - healthcheck: - test: ["CMD", "curl", "-sf", "http://localhost:8081/health"] - interval: 30s - timeout: 10s - retries: 3 - security_opt: - - no-new-privileges:true - deploy: - resources: - limits: - memory: 1G - cpus: "1.0" - reservations: - memory: 512M - cpus: "0.5" - -volumes: - vault-keys: -COMPOSE - -# Write .env file -cat > /opt/rune/.env <<'ENVFILE' -VAULT_TLS_CERT=${tls_mode == "none" ? "" : "/app/certs/server.pem"} -VAULT_TLS_KEY=${tls_mode == "none" ? "" : "/app/certs/server.key"} -VAULT_TLS_DISABLE=${tls_mode == "none" ? "true" : ""} +cat > /etc/profile.d/runevault-installer-env.sh <<'ENVFILE' +export RUNEVAULT_TEAM_NAME='${team_name}' +export RUNEVAULT_ENVECTOR_ENDPOINT='${envector_endpoint}' +export RUNEVAULT_ENVECTOR_API_KEY='${envector_api_key}' ENVFILE -chmod 600 /opt/rune/.env - -# Install Docker CE with compose plugin (v2) -install -m 0755 -d /etc/apt/keyrings -curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc -chmod a+r /etc/apt/keyrings/docker.asc -echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu $(. /etc/os-release && echo "$VERSION_CODENAME") stable" > /etc/apt/sources.list.d/docker.list -apt-get update -apt-get install -y docker-ce docker-ce-cli containerd.io docker-compose-plugin +chmod 600 /etc/profile.d/runevault-installer-env.sh +set -a; . /etc/profile.d/runevault-installer-env.sh; set +a -# Start Docker -systemctl enable docker -systemctl start docker - -# Generate per-user token auth config files -cat > /opt/rune/config/vault-roles.yml <<'ROLES' -roles: - admin: - scope: [get_public_key, decrypt_scores, decrypt_metadata, manage_tokens] - top_k: 50 - rate_limit: 150/60s - member: - scope: [get_public_key, decrypt_scores, decrypt_metadata] - top_k: 10 - rate_limit: 30/60s -ROLES -echo "tokens: []" > /opt/rune/config/vault-tokens.yml -chmod 600 /opt/rune/config/vault-roles.yml /opt/rune/config/vault-tokens.yml - -# Set up runevault CLI alias for ubuntu user -if ! grep -q 'alias runevault=' /home/ubuntu/.bashrc 2>/dev/null; then - echo "alias runevault='docker exec -it rune-vault python3 /app/vault_admin_cli.py'" >> /home/ubuntu/.bashrc -fi -usermod -aG docker ubuntu 2>/dev/null || true - -# TLS setup -if [ "${tls_mode}" = "self-signed" ]; then - CERT_DIR="/opt/rune/certs" - PUBLIC_IP=$(curl -4 -sf --connect-timeout 5 ifconfig.me 2>/dev/null || true) - openssl genrsa -out "$CERT_DIR/ca.key" 4096 2>/dev/null - openssl req -new -x509 -key "$CERT_DIR/ca.key" -out "$CERT_DIR/ca.pem" \ - -days 3650 -subj "/CN=Rune-Vault CA" -sha256 - openssl genrsa -out "$CERT_DIR/server.key" 2048 2>/dev/null - TMPCONF=$(mktemp) - printf '%s\n' \ - '[req]' \ - 'distinguished_name = req_dn' \ - 'req_extensions = v3_req' \ - 'prompt = no' \ - '[req_dn]' \ - 'CN = localhost' \ - '[v3_req]' \ - 'subjectAltName = @alt_names' \ - '[alt_names]' \ - 'DNS.1 = localhost' \ - 'DNS.2 = vault' \ - 'DNS.3 = rune-vault' \ - 'IP.1 = 127.0.0.1' \ - > "$TMPCONF" - TLS_HOSTNAME="${tls_hostname}" - if [ -n "$TLS_HOSTNAME" ]; then - echo "DNS.4 = $TLS_HOSTNAME" >> "$TMPCONF" - fi - if [ -n "$PUBLIC_IP" ]; then - echo "IP.2 = $PUBLIC_IP" >> "$TMPCONF" - fi - openssl req -new -key "$CERT_DIR/server.key" -out "$CERT_DIR/server.csr" -config "$TMPCONF" - openssl x509 -req -in "$CERT_DIR/server.csr" \ - -CA "$CERT_DIR/ca.pem" -CAkey "$CERT_DIR/ca.key" -CAcreateserial \ - -out "$CERT_DIR/server.pem" -days 825 -sha256 \ - -extfile "$TMPCONF" -extensions v3_req 2>/dev/null - rm -f "$TMPCONF" "$CERT_DIR/server.csr" "$CERT_DIR/ca.srl" - chmod 600 "$CERT_DIR/ca.key" "$CERT_DIR/server.key" - chmod 644 "$CERT_DIR/ca.pem" "$CERT_DIR/server.pem" -fi - -# Pull with retry and start Rune-Vault -cd /opt/rune +INSTALL_URL="https://raw.githubusercontent.com/CryptoLabInc/rune-admin/${runevault_version}/install.sh" for i in 1 2 3 4 5; do - docker compose pull && break - echo "Docker pull retry $i..." && sleep 10 + curl -fsSL --retry 5 --retry-delay 10 --connect-timeout 15 "$${INSTALL_URL}" -o /tmp/install.sh && break + sleep $((i*10)) done -docker compose up -d -# Wait for Vault to be ready -sleep 10 -timeout 300 bash -c 'until docker exec rune-vault curl -sf http://localhost:8081/health 2>/dev/null; do sleep 2; done' +bash /tmp/install.sh --target local --non-interactive --version "${runevault_version}" + +usermod -aG runevault ubuntu -echo "=== Rune-Vault startup script completed at $(date) ===" +rm -f /etc/profile.d/runevault-installer-env.sh +echo "=== completed at $(date) ===" diff --git a/deployment/systemd/runevault.service b/deployment/systemd/runevault.service new file mode 100644 index 0000000..72dfe74 --- /dev/null +++ b/deployment/systemd/runevault.service @@ -0,0 +1,38 @@ +[Unit] +Description=Rune-Vault FHE gRPC Server +Documentation=https://github.com/CryptoLabInc/rune-admin +After=network-online.target +Wants=network-online.target + +[Service] +Type=simple +User=runevault +Group=runevault +ExecStart=/usr/local/bin/runevault daemon start --config /opt/runevault/configs/runevault.conf +Restart=on-failure +RestartSec=5s +TimeoutStopSec=30s +StandardOutput=journal +StandardError=journal +SyslogIdentifier=runevault + +# Security hardening +NoNewPrivileges=true +PrivateTmp=true +ProtectSystem=strict +ProtectHome=true +ReadWritePaths=/opt/runevault +ProtectKernelTunables=true +ProtectKernelModules=true +ProtectControlGroups=true +RestrictAddressFamilies=AF_INET AF_INET6 AF_UNIX +RestrictNamespaces=true +LockPersonality=true +MemoryDenyWriteExecute=false +RestrictRealtime=true +RestrictSUIDSGID=true +RemoveIPC=true +LimitNOFILE=65536 + +[Install] +WantedBy=multi-user.target diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index 2e0d154..355be82 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -73,10 +73,10 @@ Rune-Vault is the **infrastructure backbone** for team-shared FHE-encrypted orga ## Port Summary -| Port | Protocol | Purpose | Exposure | -|------|----------|---------|----------| -| 50051 | gRPC + TLS | Vault service, health check, reflection | Public (team members) | -| 8081 | HTTP | Admin token/role CRUD + health check | Container-internal only | +| Endpoint | Protocol | Purpose | Exposure | +|----------|----------|---------|----------| +| `:50051` | gRPC + TLS | Vault service, health check, reflection | Public (team members) | +| `/opt/runevault/admin.sock` | Unix domain socket (mode 0600) | Admin token/role CRUD + status | Local only — `runevault` CLI | ## Component Details @@ -91,20 +91,21 @@ Rune-Vault is the **infrastructure backbone** for team-shared FHE-encrypted orga - **On-Premise** (Self-hosted) **Runtime**: -- Python 3.12 gRPC server +- Single-binary Go gRPC daemon (`runevault`) — no runtime dependencies beyond TLS - gRPC server on port 50051 (used by envector-mcp-server) - gRPC health check via `grpc.health.v1` protocol -- Internal admin HTTP API on port 8081 (container-local only) +- Admin Unix domain socket at `/opt/runevault/admin.sock` (mode 0600, vault-user owned) +- Registered as a native systemd unit (`runevault.service`) on Linux or a launchd job (`com.cryptolabinc.runevault`) on macOS -**Key Storage** (`vault_keys/vault-key/`): +**Key Storage** (`/opt/runevault/vault-keys//`, default `` = `vault-key`): ``` -vault_keys/vault-key/ +/opt/runevault/vault-keys/vault-key/ ├── EncKey.json # Public encryption key (distributed to agents) ├── EvalKey.json # Public evaluation key (for FHE operations) └── SecKey.json # Secret decryption key (NEVER leaves Vault) ``` -Keys are auto-generated on first startup via `ensure_vault()`. +Keys are auto-generated on first startup by `EnsureVault` (in `vault/internal/server/ensure_vault.go`). **Security Properties**: - Secret key stored encrypted at rest (filesystem encryption) @@ -119,11 +120,10 @@ Defined in `proto/vault_service.proto` (`rune.vault.v1.VaultService`). **Server Configuration**: - Max message size: 256 MB (for EvalKey transfer) -- Thread pool: 4 workers -- Interceptor chain: `ValidationInterceptor` (protovalidate + runtime checks) +- Interceptor chain: validation (protovalidate + runtime checks) → auth/RBAC → audit - gRPC reflection enabled (for grpcurl discovery) - gRPC health checking (`grpc.health.v1`) enabled -- TLS required by default (disable via `VAULT_TLS_DISABLE=true`) +- TLS required by default (`server.grpc.tls.disable: true` is dev only — never in production) **`GetPublicKey()`** - Returns: JSON bundle containing EncKey, EvalKey, index_name, key_id, agent_id, agent_dek (per-user derived encryption key) @@ -146,15 +146,15 @@ Defined in `proto/vault_service.proto` (`rune.vault.v1.VaultService`). ### 3. Authentication & Access Control -**Token Format**: `evt_` prefix + 32 hex characters (total 36 chars), generated via `secrets.token_hex(16)`. +**Token Format**: `evt_` prefix + 32 hex characters (total 36 chars), generated from `crypto/rand`. - Example: `evt_a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6` - Proto-level validation enforces exactly 36 characters. -**Per-User RBAC** (managed by `TokenStore`): +**Per-User RBAC** (managed by the `tokens` package): - Each user gets their own token assigned to a role. -- `validate()` returns `(username, Role)` tuple. +- Validation returns the matched user and role. - Checks: token existence, expiration, rate limit (per-user sliding window). -- Scope checked separately per gRPC method. +- Scope is checked separately per gRPC method. **Default Roles:** @@ -163,42 +163,47 @@ Defined in `proto/vault_service.proto` (`rune.vault.v1.VaultService`). | admin | get_public_key, decrypt_scores, decrypt_metadata, manage_tokens | 50 | 150/60s | | member | get_public_key, decrypt_scores, decrypt_metadata | 10 | 30/60s | -Custom roles can be created via the Admin API or CLI. +Custom roles can be created via `runevault role create`. **Token Lifecycle:** - Issue: `runevault token issue --user alice --role member --expires 90d` -- Rotate: `runevault token rotate --user alice` (atomic revoke + reissue) +- Rotate: `runevault token rotate --user alice` (atomic revoke + reissue) or `--all` - Revoke: `runevault token revoke --user alice` -- Persistence: async YAML writes to `vault-tokens.yml` / `vault-roles.yml` (atomic via temp file + `os.replace`) - -**Configuration Priority** (at startup): -1. YAML config files (`vault-roles.yml`, `vault-tokens.yml`) -2. Legacy env var (`VAULT_TOKENS`) -3. Demo mode (auto-generated demo token) - -### 4. Admin Server & CLI - -**Admin Server** (`admin_server.py`): -- HTTP on `127.0.0.1:8081` (not exposed via Docker; access via `docker exec`) -- No authentication (protected by container isolation) -- REST API for token and role CRUD operations - -| Method | Path | Purpose | -|--------|------|---------| -| GET | /health | Health check (queries gRPC health servicer) | -| GET | /tokens | List all tokens | -| POST | /tokens | Issue new token | -| DELETE | /tokens/{user} | Revoke token | -| POST | /tokens/{user}/rotate | Rotate single token | -| POST | /tokens/_rotate_all | Rotate all tokens | -| GET | /roles | List all roles | -| POST | /roles | Create role | -| PUT | /roles/{name} | Update role | -| DELETE | /roles/{name} | Delete role | - -**CLI** (`vault_admin_cli.py` / `runevault`): -- Wraps the Admin HTTP API for operator convenience -- Available inside the container +- Persistence: atomic YAML writes to the files referenced by `tokens.tokens_file` and `tokens.roles_file` in `runevault.conf` (defaults: `/opt/runevault/configs/{tokens,roles}.yml`). + +**Configuration Source**: `runevault.conf` (YAML) is the single source of truth — no env-var fallback or migration helper. Lookup order: +1. `--config ` CLI flag +2. `/opt/runevault/configs/runevault.conf` +3. `./runevault.conf` (cwd, dev only) + +Secret YAML fields (`tokens.team_secret`, `envector.api_key`) accept a sibling `*_file` key for KMS-backed deployments. + +### 4. Admin Socket & CLI + +**Admin Socket** (`vault/internal/server/admin.go`): +- Unix domain socket at `/opt/runevault/admin.sock` (mode 0600, vault-user owned) +- Filesystem permissions are the only authorization gate; never expose externally +- Used by the `runevault` CLI and by the daemon's lifecycle hooks (e.g. `ErrRestartRequested` after token rotation) + +**CLI** (`runevault`): + +| Command | Purpose | +|---------|---------| +| `runevault status` | Daemon health and socket liveness | +| `runevault logs` | Tail audit log output | +| `runevault token issue --user --role [--expires 90d]` | Issue a new per-user token | +| `runevault token list` | List issued tokens | +| `runevault token rotate --user ` / `--all` | Atomic revoke + reissue | +| `runevault token revoke --user ` | Revoke a token | +| `runevault role list` | List configured roles | +| `runevault role create --name --scope a,b,c --top-k N --rate-limit N/Ts` | Create a custom role | +| `runevault role update --name [--scope] [--top-k] [--rate-limit]` | Update an existing role | +| `runevault role delete --name ` | Delete a role | +| `runevault version` | Print build version (works without daemon or socket) | + +The `daemon start` subcommand is invoked by systemd / launchd; operators +control lifecycle via `systemctl … runevault` (Linux) or +`launchctl … system/com.cryptolabinc.runevault` (macOS) rather than directly. ### 5. Input Validation @@ -214,7 +219,7 @@ Non-Vault methods (health check, reflection) pass through untouched. Each agent gets a unique 32-byte AES-256 DEK (Data Encryption Key): ``` -DEK = HKDF-SHA256(key=VAULT_TEAM_SECRET, info=agent_id) +DEK = HKDF-SHA256(key=tokens.team_secret, info=agent_id) agent_id = SHA256(token)[:32] ``` @@ -225,20 +230,19 @@ agent_id = SHA256(token)[:32] ### 7. Audit Logging -Structured JSON logging for all gRPC operations (`audit.py`): +Structured JSON logging for all gRPC operations (`vault/internal/server/audit.go`): - One JSON line per request: timestamp, user_id, method, top_k, result_count, status, source_ip, latency_ms, error -- Source IP extracted from gRPC `context.peer()` +- Source IP extracted from the gRPC peer context +- File output uses `lumberjack` for size-based rotation -**Configuration** (via `VAULT_AUDIT_LOG` env var): +**Configuration** in `runevault.conf`: -| Value | Behavior | -|-------|----------| -| *(empty)* | Disabled | -| `file` | `/var/log/rune-vault/audit.log` (daily rotation, 30-day retention) | -| `file:/path` | Custom file path | -| `stdout` | JSON lines to stdout (for cloud log aggregators) | -| `file+stdout` | Both | +```yaml +audit: + mode: file+stdout # one of: "" (disabled), file, stdout, file+stdout + path: /opt/runevault/logs/audit.log +``` ## Data Flow @@ -377,90 +381,80 @@ EvalKey: Distributed to all team members (safe to share, FHE operations) ### Cloud Deployment (Terraform) +`install.sh --target ` wraps Terraform end-to-end: +preflight checks → `terraform apply` → cloud-init bootstrap → CA-cert SCP +poll → remote `install.sh` execution. + ``` Terraform Configuration │ - ├── deployment/oci/main.tf # Oracle Cloud ├── deployment/aws/main.tf # Amazon Web Services - └── deployment/gcp/main.tf # Google Cloud Platform + ├── deployment/gcp/main.tf # Google Cloud Platform + └── deployment/oci/main.tf # Oracle Cloud Infrastructure │ ▼ Cloud Resources Created │ ├── Compute Instance (VM) - │ ├── OS: Ubuntu 22.04 LTS - │ ├── Shape: 2 OCPU, 8GB RAM, 50GB disk - │ └── Software: - │ ├── Python 3.12 - │ └── pyenvector (FHE SDK) + │ ├── OS: Ubuntu 24.04 LTS + │ └── Software (installed via cloud-init startup script): + │ ├── runevault binary (SHA256SUMS-verified) + │ └── runevault.service (systemd) registered │ ├── Networking │ ├── Public IP address - │ ├── Security group (allow 50051/gRPC) - │ └── DNS: vault-{team}.oci.envector.io + │ └── Security group / list / firewall rule (allow 50051/gRPC) │ ├── Storage - │ ├── /vault_keys/ (encrypted volume) - │ └── Backup to cloud storage (optional) + │ └── /opt/runevault/vault-keys// (FHE keys) │ └── Audit Logging - └── /var/log/rune-vault/audit.log + └── /opt/runevault/logs/audit.log ``` -### High Availability (Optional) - -``` -Load Balancer (HTTPS) - │ - ├── Vault Instance 1 (Primary) - ├── Vault Instance 2 (Standby) - └── Vault Instance N (Standby) - │ - └── Shared Storage (NFS/EFS) - └── /vault_keys/ (same keys across instances) -``` - -**Setup**: -```bash -cd deployment/oci -terraform apply -var="ha_enabled=true" \ - -var="instance_count=3" -``` +Common Terraform variables across all CSPs: `team_name`, `tls_mode`, +`envector_endpoint`, `envector_api_key`, `runevault_version`, +`public_key`, `region`. CSP-specific: `instance_type` (AWS), +`project_id` / `zone` / `machine_type` (GCP), `oci_profile` / +`compartment_id` (OCI). Output: `vault_public_ip`. -**Failover**: -- Health checks every 10s -- Auto-failover <30s -- Shared keys (no key synchronization needed) +Horizontal scaling and multi-instance HA are not currently supported. +For higher capacity, re-provision with a larger VM shape via your cloud +provider. ## Operational Considerations ### Backup & Recovery **Critical Assets**: -- `/vault_keys/vault-key/SecKey.json` - **MUST backup** (cannot regenerate) -- `VAULT_TEAM_SECRET` - **MUST backup** (needed for DEK re-derivation) -- Vault token - Rotatable via CLI +- `/opt/runevault/vault-keys//SecKey.json` — **MUST backup** (cannot regenerate) +- `tokens.team_secret` from `runevault.conf` — **MUST backup** (needed for DEK re-derivation) +- Per-user tokens — rotatable via `runevault token rotate` **Backup Strategy**: ```bash -# Manually back up vault keys -tar czf vault_keys_backup_$(date +%Y-%m-%d).tar.gz vault/vault_keys/ +# Manually back up vault keys (run on the VM) +sudo tar czf vault-keys_backup_$(date +%Y-%m-%d).tar.gz -C /opt/runevault vault-keys/ -# Store in: -# 1. Offline (USB drive in safe) -# 2. Cloud (different provider, encrypted) -# 3. Password manager (1Password secure notes) +# Also archive runevault.conf or at minimum the tokens.team_secret value +# Store in: offline media, a different cloud provider, or a password manager ``` **Recovery Procedure**: ```bash -# If Vault VM fails -cd deployment/oci -terraform apply -var="restore_from_backup=true" \ - -var="backup_path=/path/to/backup.tar.gz.enc" +# 1. Re-provision a fresh VM via the installer +sudo bash install.sh --target + +# 2. Stop the daemon before restoring keys +sudo systemctl stop runevault + +# 3. Restore vault-keys and team_secret +sudo tar xzf vault-keys_backup_YYYY-MM-DD.tar.gz -C /opt/runevault +# Edit /opt/runevault/configs/runevault.conf and restore tokens.team_secret -# Vault restarts with same keys -# Team members continue without reconfiguration +# 4. Bring the daemon back up +sudo systemctl start runevault +# Team members continue without reconfiguration. ``` ### Token Rotation @@ -472,61 +466,54 @@ runevault token rotate --user alice # Rotate all tokens runevault token rotate --all -# Distribute new tokens to team members via secure channel +# Distribute new tokens to team members via a secure channel ``` -### Scaling Strategies +### Scaling Strategy -**Vertical Scaling** (increase VM size): -```bash -terraform apply -var="instance_shape=VM.Standard.E4.Flex" \ - -var="instance_ocpu=4" \ - -var="instance_memory_gb=16" -``` +Re-provision with a larger VM shape via your cloud provider's console or +by editing the relevant `instance_type` (AWS) / `machine_type` (GCP) / +shape configuration (OCI) and re-running `terraform apply` from your +install directory. -**Horizontal Scaling** (add more instances): -```bash -terraform apply -var="ha_enabled=true" \ - -var="instance_count=3" -``` - -**When to Scale**: -- CPU >80% sustained → Add OCPU or scale out -- Latency P95 >200ms → Add instances -- Error rate >1% → Investigate (likely config issue, not scale) +When to scale: +- CPU >80% sustained +- Latency P95 >200ms +- Error rate >1% (investigate first — usually a config issue, not scale) ## Module Reference -| Module | Purpose | -|--------|---------| -| `vault_core.py` | Core business logic: key management, decryption, DEK derivation | -| `vault_grpc_server.py` | gRPC server, TLS, entrypoint, orchestrates all subsystems | -| `token_store.py` | Per-user RBAC: Token/Role dataclasses, validation, rate limiting, YAML persistence | -| `admin_server.py` | Internal HTTP admin API for token/role CRUD | -| `validation_interceptor.py` | gRPC interceptor: protovalidate + runtime input checks | -| `request_validator.py` | Runtime validation rules (control chars, whitespace) | -| `audit.py` | Structured JSON audit logging with file rotation | -| `vault_admin_cli.py` | CLI for token/role management (`runevault` command) | +| Package | Purpose | +|---------|---------| +| `vault/cmd` | Binary entry point — wires Cobra root command and runs `Execute()` | +| `vault/internal/commands` | CLI subcommands (`daemon`, `token`, `role`, `status`, `logs`, `version`) and admin-socket client | +| `vault/internal/server` | gRPC server, config loader, audit logger, admin UDS, `EnsureVault` startup hook, interceptors | +| `vault/internal/tokens` | Per-user RBAC store: tokens, roles, validation, rate limiting, YAML persistence | +| `vault/internal/crypto` | FHE key management + HKDF/AES wrappers around `envector-go-sdk` | +| `vault/internal/tests` | E2E tests gated by build tag `e2e` (decrypt pipeline + CLI smoke) | +| `vault/pkg/vaultpb` | Generated gRPC stubs from `vault/proto/*.proto` | ## Troubleshooting ### Issue: High Latency -**Symptoms**: decrypt_scores() taking >200ms +**Symptoms**: `DecryptScores` taking >200ms **Diagnosis**: ```bash # Check Vault CPU on the server -ssh admin@vault-yourteam.oci.envector.io +ssh ubuntu@ # or ec2-user@... / opc@... depending on CSP top -# Check audit log for latency -docker exec rune-vault tail -20 /var/log/rune-vault/audit.log +# Tail the audit log for latency +sudo tail -20 /opt/runevault/logs/audit.log +# Or use the CLI from the host: +runevault logs ``` **Solutions**: -- CPU bottleneck → Scale up (add OCPU) -- Large Top-K → Reduce max results +- CPU bottleneck → Re-provision with a larger VM shape +- Large Top-K → Reduce max results (or tighten role `top_k`) - High dimension → Acceptable (dim=1024 is standard) ### Issue: Authentication Failures @@ -535,37 +522,40 @@ docker exec rune-vault tail -20 /var/log/rune-vault/audit.log **Diagnosis**: ```bash -# Check token is correct -echo $RUNEVAULT_TOKEN +# Verify the daemon is up +runevault status -# Verify Vault sees requests -ssh admin@vault-yourteam.oci.envector.io -sudo journalctl -u vault | grep "denied" +# Inspect server logs for denied requests +sudo journalctl -u runevault | grep -i "denied\|unauthenticated" ``` **Solutions**: -- Wrong token → Re-share correct token -- Token rotated → Distribute new token to all team members -- Token expired → Issue new token via `runevault token issue` -- Rate limited → Wait for window reset or adjust role rate_limit -- Firewall → Check security group allows 50051 from team IPs +- Wrong token → Re-share the correct token +- Token rotated → Distribute the new token to all team members +- Token expired → Issue a fresh token via `runevault token issue` +- Rate limited → Wait for the window to reset, or adjust the role's `rate_limit` +- Firewall → Check the security group allows 50051 from team IPs ### Issue: Vault Crashed -**Symptoms**: Health check fails, 503 Service Unavailable +**Symptoms**: Health check fails, daemon not responsive **Diagnosis**: ```bash -ssh admin@vault-yourteam.oci.envector.io -sudo systemctl status vault -sudo journalctl -u vault -n 100 +# Linux +sudo systemctl status runevault +sudo journalctl -u runevault -n 100 + +# macOS +sudo launchctl print system/com.cryptolabinc.runevault +sudo log show --predicate 'process == "runevault"' --last 10m ``` **Solutions**: - OOM killer → Increase VM memory -- Disk full → Rotate logs (`logrotate`) -- Crashed process → Restart (`systemctl restart vault`) -- Persistent crash → Redeploy with backup keys +- Disk full → Rotate logs (`lumberjack` handles size-based rotation, but free disk first) +- Crashed process → `sudo systemctl restart runevault` (Linux) / `sudo launchctl kickstart -k system/com.cryptolabinc.runevault` (macOS) +- Persistent crash → Re-provision with `install.sh --uninstall` then `install.sh --target `, restoring `vault-keys/` from backup before first start ## Next Steps diff --git a/install.sh b/install.sh index 15c8879..4833620 100755 --- a/install.sh +++ b/install.sh @@ -1,860 +1,1311 @@ -#!/bin/bash -# Rune-Vault Interactive Server Setup -# Usage: curl -fsSL https://raw.githubusercontent.com/CryptoLabInc/rune-admin/main/install.sh -o install.sh && sudo bash install.sh +#!/usr/bin/env bash +# +# Rune-Vault installer. +# +# Downloads, verifies, and installs the runevault daemon with systemd (Linux) +# or launchd (macOS) service registration. +# +# Usage: +# sudo bash install.sh [options] +# +# Options: +# --version Install a specific release tag (default: latest) +# --target Deploy locally or to a cloud provider (default: local) +# --install-dir CSP install directory (default: $HOME/rune-vault-) +# --force Overwrite existing config and TLS certificates +# --non-interactive Skip all prompts; supply secrets via env vars +# --uninstall Tear down the install. Local: stop service + remove files +# (optionally delete data). CSP: run 'terraform destroy' and +# optionally remove the install directory. +# +# Non-interactive env vars (local install): +# RUNEVAULT_TEAM_NAME keys.index_name (required) +# RUNEVAULT_ENVECTOR_ENDPOINT envector.endpoint (required) +# RUNEVAULT_ENVECTOR_API_KEY envector.api_key +# RUNEVAULT_ENVECTOR_API_KEY_FILE envector.api_key_file (alternative) +# RUNEVAULT_TLS_CERT_PATH Path to existing TLS cert (skips auto-gen) +# RUNEVAULT_TLS_KEY_PATH Path to existing TLS key (skips auto-gen) +# +# Non-interactive env vars (CSP install — operator workstation): +# RUNEVAULT_ENVECTOR_ENDPOINT enVector endpoint URL (required) +# RUNEVAULT_ENVECTOR_API_KEY enVector API key (required) +# RUNEVAULT_TEAM_NAME Team name — used for resource naming and vault index (required) +# RUNEVAULT_TARGET Pre-select target without interactive menu +# RUNEVAULT_INSTALL_DIR Pre-set CSP install directory +# RUNEVAULT_CSP_REGION Cloud region +# RUNEVAULT_GCP_PROJECT_ID GCP: project ID (required for GCP) +# RUNEVAULT_OCI_COMPARTMENT_ID OCI: compartment OCID (required for OCI) +# +# Dev/testing env vars (set by scripts/install-dev.sh): +# RUNEVAULT_LOCAL_BINARY Path to local binary; skips download + checksum verify +# RUNEVAULT_SKIP_VERIFY Set to 1 to skip checksum verification (dev only) +# RUNEVAULT_INSTALL_PREFIX Override /opt/runevault (default) +# RUNEVAULT_BINARY_PATH Override /usr/local/bin/runevault (default) +# RUNEVAULT_SKIP_SERVICE Set to 1 to skip systemd/launchd installation set -euo pipefail -# ─── Root privilege check ───────────────────────────────────────────────────── - -if [ "$(id -u)" -ne 0 ]; then - echo "Error: This script must be run as root. Use: sudo bash install.sh" - exit 1 +# ── Constants ────────────────────────────────────────────────────────────────── +REPO=CryptoLabInc/rune-admin +SERVICE_USER=runevault +GRPC_PORT=50051 + +RAW_BASE="https://raw.githubusercontent.com/${REPO}" +DEFAULT_INSTALL_DIR_CSP_FMT="%s/rune-vault-%s" + +# Overridable by env (used by scripts/install-dev.sh) +INSTALL_PREFIX="${RUNEVAULT_INSTALL_PREFIX:-/opt/runevault}" +BINARY_DEST="${RUNEVAULT_BINARY_PATH:-/usr/local/bin/runevault}" +SKIP_VERIFY="${RUNEVAULT_SKIP_VERIFY:-0}" +LOCAL_BINARY="${RUNEVAULT_LOCAL_BINARY:-}" +SKIP_SERVICE="${RUNEVAULT_SKIP_SERVICE:-0}" + +TARGET="${RUNEVAULT_TARGET:-}" +INSTALL_DIR_CSP="${RUNEVAULT_INSTALL_DIR:-}" +CSP_PUBLIC_IP="" + +# ── Color helpers ────────────────────────────────────────────────────────────── +if [[ -t 1 ]]; then + _RED='\033[0;31m' _GRN='\033[0;32m' _BLU='\033[0;34m' _YLW='\033[0;33m' _RST='\033[0m' +else + _RED='' _GRN='' _BLU='' _YLW='' _RST='' fi - -# ─── Parse flags ────────────────────────────────────────────────────────────── - -VERSION_OVERRIDE="" -while [ $# -gt 0 ]; do - case "$1" in - --version) VERSION_OVERRIDE="$2"; shift 2 ;; - --version=*) VERSION_OVERRIDE="${1#*=}"; shift ;; - *) shift ;; - esac +die() { printf "${_RED}ERROR:${_RST} %s\n" "$*" >&2; exit 1; } +info() { printf "${_BLU}==>${_RST} %s\n" "$*"; } +success() { printf "${_GRN}✓${_RST} %s\n" "$*"; } +warn() { printf "${_YLW}WARNING:${_RST} %s\n" "$*" >&2; } + +# ── Argument parsing ─────────────────────────────────────────────────────────── +UNINSTALL=0 +FORCE=0 +VERSION="" +NON_INTERACTIVE=0 + +while [[ $# -gt 0 ]]; do + case $1 in + --version) VERSION="$2"; shift 2 ;; + --uninstall) UNINSTALL=1; shift ;; + --force) FORCE=1; shift ;; + --non-interactive) NON_INTERACTIVE=1; shift ;; + --target) TARGET="$2"; shift 2 ;; + --install-dir) INSTALL_DIR_CSP="$2"; shift 2 ;; + *) die "Unknown flag: $1" ;; + esac done -# ─── Constants ──────────────────────────────────────────────────────────────── - -REPO="CryptoLabInc/rune-admin" -DOCKER_IMAGE="ghcr.io/cryptolabinc/rune-vault" -_user_home="${SUDO_USER:+$(eval echo ~"$SUDO_USER")}" -DEFAULT_INSTALL_DIR="${_user_home:-$HOME}/rune-vault" -VAULT_PUBLIC_IP="" -CSP_CA_CERT_LOCAL="" +# Auto-set non-interactive when stdin is not a TTY (e.g. curl | bash) +[[ -t 0 ]] || NON_INTERACTIVE=1 + +# ── Platform detection ───────────────────────────────────────────────────────── +case "$(uname -s)" in + Linux) OS_SLUG=linux ;; + Darwin) OS_SLUG=darwin ;; + *) die "Unsupported OS: $(uname -s). Only Linux and macOS are supported." ;; +esac +case "$(uname -m)" in + x86_64|amd64) ARCH_SLUG=amd64 ;; + arm64|aarch64) ARCH_SLUG=arm64 ;; + *) die "Unsupported architecture: $(uname -m)." ;; +esac + +# ── Uninstall flow ───────────────────────────────────────────────────────────── +run_uninstall() { + info "Uninstalling Rune-Vault..." + + if [[ "$TARGET" != "local" ]]; then + csp_uninstall "$TARGET" + return 0 + fi -# ─── Colors & output helpers ───────────────────────────────────────────────── + [[ "$(id -u)" -eq 0 ]] || die "Local uninstall must be run as root (use sudo)." -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -BLUE='\033[0;34m' -BOLD='\033[1m' -NC='\033[0m' + if [[ "$OS_SLUG" = linux ]]; then + if systemctl is-active --quiet runevault.service 2>/dev/null; then + info "Stopping runevault.service..." + systemctl stop runevault.service + fi + systemctl disable runevault.service 2>/dev/null || true + rm -f /etc/systemd/system/runevault.service + systemctl daemon-reload + success "systemd service removed." + else + local plist=/Library/LaunchDaemons/com.cryptolabinc.runevault.plist + if [[ -f "$plist" ]]; then + launchctl bootout system/com.cryptolabinc.runevault 2>/dev/null || true + rm -f "$plist" + success "launchd service removed." + fi + fi + + rm -f "$BINARY_DEST" + success "Binary removed: ${BINARY_DEST}" + + printf '\n' + warn "The following directory contains Rune-Vault Keys and configuration:" + warn " ${INSTALL_PREFIX}/" + warn "This data CANNOT be recovered if deleted." + printf '\n' + + local answer=n + if [[ "$NON_INTERACTIVE" -eq 1 ]]; then + warn "Non-interactive mode: data preserved. Remove manually: rm -rf ${INSTALL_PREFIX}" + else + read -r -p "Delete all vault data including Rune-Vault Keys? [y/N] " answer + fi + + case "$answer" in + [Yy]*) + rm -rf "${INSTALL_PREFIX}" + success "Vault data deleted." + ;; + *) + info "Data preserved at ${INSTALL_PREFIX}" + ;; + esac + + if [[ "$OS_SLUG" = linux ]]; then + if id "$SERVICE_USER" >/dev/null 2>&1; then + userdel "$SERVICE_USER" 2>/dev/null || true + success "System user '${SERVICE_USER}' removed." + fi + if getent group "$SERVICE_USER" >/dev/null 2>&1; then + groupdel "$SERVICE_USER" 2>/dev/null || true + success "System group '${SERVICE_USER}' removed." + fi + else + if id "$SERVICE_USER" >/dev/null 2>&1; then + dscl . -delete /Users/"$SERVICE_USER" 2>/dev/null || true + success "System user '${SERVICE_USER}' removed." + fi + if dscl . -read /Groups/"$SERVICE_USER" >/dev/null 2>&1; then + dscl . -delete /Groups/"$SERVICE_USER" 2>/dev/null || true + success "System group '${SERVICE_USER}' removed." + fi + fi -print_header() { - echo -e "\n${BLUE}================================================${NC}" - echo -e "${BLUE} $1${NC}" - echo -e "${BLUE}================================================${NC}\n" + success "Rune-Vault uninstalled." } -print_info() { echo -e "${GREEN}✓${NC} $1"; } -print_warn() { echo -e "${YELLOW}⚠${NC} $1"; } -print_error() { echo -e "${RED}✗${NC} $1"; } -print_step() { echo -e "\n${BOLD}▸ $1${NC}\n"; } - -# ─── Cleanup trap ───────────────────────────────────────────────────────────── - -CLEANUP_DIR="" -cleanup() { - printf '\033[?25h' >&2 2>/dev/null || true - if [ -n "$CLEANUP_DIR" ] && [ -d "$CLEANUP_DIR" ]; then - rm -rf "$CLEANUP_DIR" - fi +# ── CSP helpers ─────────────────────────────────────────────────────────────── + +_prompt() { + local varname=$1 label=$2 default=${3:-} + [[ -n "${!varname:-}" ]] && return 0 + local val + if [[ -n "$default" ]]; then + read -r -p "${label} [${default}]: " val + printf -v "$varname" '%s' "${val:-$default}" + else + read -r -p "${label}: " val + printf -v "$varname" '%s' "$val" + fi } -trap cleanup EXIT -# ─── Prompt helper ──────────────────────────────────────────────────────────── - -prompt() { - local varname="$1" message="$2" default="${3:-}" - if [ -n "$default" ]; then - printf "${BOLD}%s${NC} [%s]: " "$message" "$default" >&2 - else - printf "${BOLD}%s${NC}: " "$message" >&2 - fi - local value - read -r value - value="${value:-$default}" - eval "$varname=\"\$value\"" +resolve_target() { + if [[ -n "${TARGET:-}" ]]; then + case "$TARGET" in + local|aws|gcp|oci) ;; + *) die "Invalid --target value: ${TARGET}. Valid: local, aws, gcp, oci." ;; + esac + return 0 + fi + if [[ "$NON_INTERACTIVE" -eq 0 && -t 0 ]]; then + local action="installation" + [[ "$UNINSTALL" -eq 1 ]] && action="uninstall" + printf '\n' + printf ' Select %s target:\n' "$action" + printf ' 1) Local (this machine)\n' + printf ' 2) AWS\n' + printf ' 3) GCP\n' + printf ' 4) OCI\n' + printf '\n' + local choice + read -r -p " Choice [1]: " choice + case "${choice:-1}" in + 1|local) TARGET=local ;; + 2|aws) TARGET=aws ;; + 3|gcp) TARGET=gcp ;; + 4|oci) TARGET=oci ;; + *) die "Invalid choice: ${choice}" ;; + esac + else + TARGET=local + fi } -prompt_yn() { - local message="$1" default="${2:-y}" - local value - if [ "$default" = "y" ]; then - printf "${BOLD}%s${NC} [Y/n]: " "$message" >&2 +csp_preflight() { + local csp=$1 + info "Running CSP preflight checks for ${csp}..." + + if ! command -v terraform >/dev/null 2>&1; then + printf '\n' + warn "terraform is not installed." + printf '\n' + local answer=n + if [[ "$NON_INTERACTIVE" -eq 0 ]]; then + read -r -p "Install terraform automatically? [y/N] " answer else - printf "${BOLD}%s${NC} [y/N]: " "$message" >&2 + warn "Non-interactive mode: cannot auto-install terraform." fi - read -r value - value="${value:-$default}" - case "$value" in - [Yy]*) return 0 ;; - *) return 1 ;; + case "$answer" in + [Yy]*) _install_tool terraform ;; + *) + printf 'Install it manually and re-run the installer:\n' >&2 + case "$OS_SLUG" in + linux) printf ' terraform: https://developer.hashicorp.com/terraform/install\n' >&2 ;; + darwin) printf ' terraform: brew install terraform\n' >&2 ;; + esac + exit 1 + ;; esac + fi + + local csp_cli auth_cmd auth_setup + case "$csp" in + aws) + csp_cli=aws + auth_cmd='aws sts get-caller-identity' + auth_setup='aws configure' + ;; + gcp) + csp_cli=gcloud + auth_cmd='gcloud auth application-default print-access-token' + auth_setup='gcloud auth application-default login' + ;; + oci) + csp_cli=oci + auth_cmd='oci iam region list' + auth_setup='oci setup config' + ;; + esac + + local tf_user="${SUDO_USER:-$(id -un)}" + + if ! sudo -u "$tf_user" -H bash -lc "command -v ${csp_cli}" >/dev/null 2>&1; then + die "'${csp_cli}' CLI not found in PATH for user '${tf_user}'. Install it and re-run." + fi + + if ! sudo -u "$tf_user" -H bash -lc "${auth_cmd}" >/dev/null 2>&1; then + die "'${csp_cli}' is not authenticated for user '${tf_user}'. Authenticate and re-run: ${auth_setup}" + fi + + success "CSP preflight passed." } -# ─── Arrow-key menu selector ──────────────────────────────────────────────── - -select_menu() { - local options=("$@") - local count=${#options[@]} - local _sel=0 - - # Fallback: plain number input when terminal is dumb or unset - if [ -z "${TERM:-}" ] || [ "$TERM" = "dumb" ]; then - local i - for i in "${!options[@]}"; do - printf " %d) %s\n" "$((i + 1))" "${options[$i]}" >&2 - done - echo "" >&2 - local choice - printf "${BOLD}Select${NC} [1]: " >&2 - read -r choice - choice="${choice:-1}" - if [ "$choice" -ge 1 ] 2>/dev/null && [ "$choice" -le "$count" ] 2>/dev/null; then - echo "$((choice - 1))" - else - print_error "Invalid selection."; exit 1 - fi - return +csp_prompt_config() { + local csp=$1 + + if [[ "$NON_INTERACTIVE" -eq 0 ]]; then + printf '\n' + printf '══════════════════════════════════════════════════════════\n' + printf ' Cloud deployment configuration\n' + printf '══════════════════════════════════════════════════════════\n' + printf '\n' + printf ' Create your enVector cluster at https://envector.io\n' + printf ' before proceeding. You will need the endpoint URL and\n' + printf ' API key from the dashboard.\n' + printf '\n' + + _prompt TEAM_NAME "Team name" "" + _prompt ENVECTOR_ENDPOINT "enVector endpoint" "" + _prompt ENVECTOR_API_KEY "enVector API key" "" + + case "$csp" in + aws) _prompt CSP_REGION "AWS region" "us-east-1" ;; + gcp) + _prompt CSP_REGION "GCP region" "us-central1" + _prompt GCP_PROJECT_ID "GCP project ID" "" + ;; + oci) + _prompt CSP_REGION "OCI region" "us-ashburn-1" + _prompt OCI_COMPARTMENT_ID "OCI compartment OCID" "" + ;; + esac + printf '\n' + else + TEAM_NAME="${RUNEVAULT_TEAM_NAME:-}" + ENVECTOR_ENDPOINT="${RUNEVAULT_ENVECTOR_ENDPOINT:-}" + ENVECTOR_API_KEY="${RUNEVAULT_ENVECTOR_API_KEY:-}" + CSP_REGION="${RUNEVAULT_CSP_REGION:-}" + GCP_PROJECT_ID="${RUNEVAULT_GCP_PROJECT_ID:-}" + OCI_COMPARTMENT_ID="${RUNEVAULT_OCI_COMPARTMENT_ID:-}" + + local missing=() + [[ -z "$TEAM_NAME" ]] && missing+=("RUNEVAULT_TEAM_NAME") + [[ -z "$ENVECTOR_ENDPOINT" ]] && missing+=("RUNEVAULT_ENVECTOR_ENDPOINT") + [[ -z "$ENVECTOR_API_KEY" ]] && missing+=("RUNEVAULT_ENVECTOR_API_KEY") + [[ "$csp" = gcp && -z "$GCP_PROJECT_ID" ]] && missing+=("RUNEVAULT_GCP_PROJECT_ID") + [[ "$csp" = oci && -z "$OCI_COMPARTMENT_ID" ]] && missing+=("RUNEVAULT_OCI_COMPARTMENT_ID") + if [[ ${#missing[@]} -gt 0 ]]; then + printf 'ERROR: Missing required env vars:\n' >&2 + for v in "${missing[@]}"; do printf ' %s\n' "$v" >&2; done + exit 1 fi - - # ── Draw the menu ── - _draw_menu() { - local i - for i in "${!options[@]}"; do - if [ "$i" -eq "$_sel" ]; then - printf " ${GREEN}${BOLD}> %s${NC}\n" "${options[$i]}" >&2 - else - printf " %s\n" "${options[$i]}" >&2 - fi - done - } - - # ── Move cursor up to redraw ── - _erase_menu() { - local i - for (( i = 0; i < count; i++ )); do - printf '\033[1A\033[2K' >&2 - done - } - - printf '\033[?25l' >&2 # hide cursor - printf " ${BOLD}↑↓ move Enter confirm${NC}\n" >&2 - _draw_menu - - while true; do - local key="" - IFS= read -rsn1 key - if [ "$key" = $'\x1b' ]; then - local seq="" - IFS= read -rsn2 -t 1 seq || true - case "$seq" in - '[A') # Up arrow - if [ "$_sel" -gt 0 ]; then - _sel=$((_sel - 1)) - else - _sel=$((count - 1)) - fi - ;; - '[B') # Down arrow - if [ "$_sel" -lt $((count - 1)) ]; then - _sel=$((_sel + 1)) - else - _sel=0 - fi - ;; - esac - _erase_menu - _draw_menu - elif [ "$key" = "" ]; then - # Enter key - break - elif [ "$key" -ge 1 ] 2>/dev/null && [ "$key" -le "$count" ] 2>/dev/null; then - # Number key direct jump - _sel=$((key - 1)) - _erase_menu - _draw_menu - fi - done - - printf '\033[?25h' >&2 # show cursor - - echo "$_sel" + fi + + [[ -n "$TEAM_NAME" ]] || die "Team name is required." + [[ -n "$ENVECTOR_ENDPOINT" ]] || die "enVector endpoint is required." + [[ -n "$ENVECTOR_API_KEY" ]] || die "enVector API key is required." + if [[ "$csp" = gcp ]]; then + [[ -n "$GCP_PROJECT_ID" ]] || die "GCP project ID is required." + fi + if [[ "$csp" = oci ]]; then + [[ -n "$OCI_COMPARTMENT_ID" ]] || die "OCI compartment OCID is required." + fi } -# ─── Resolve latest release version ────────────────────────────────────────── - -resolve_version() { - if [ -n "$VERSION_OVERRIDE" ]; then - print_step "Using version override: ${VERSION_OVERRIDE}" - VERSION="$VERSION_OVERRIDE" - DOCKER_TAG="$VERSION_OVERRIDE" - GITHUB_RAW_BASE="https://raw.githubusercontent.com/${REPO}/${VERSION}" - return - fi - - print_step "Resolving latest release version..." - local api_response - api_response=$(curl -fsSL "https://api.github.com/repos/${REPO}/releases/latest" 2>/dev/null) || true - VERSION=$(echo "$api_response" | grep '"tag_name"' | sed -E 's/.*"([^"]+)".*/\1/') || true - - if [ -z "${VERSION:-}" ]; then - print_warn "No release found. Falling back to 'main' branch." - VERSION="main" - DOCKER_TAG="latest" - else - print_info "Latest release: ${VERSION}" - DOCKER_TAG="${VERSION}" - fi - - GITHUB_RAW_BASE="https://raw.githubusercontent.com/${REPO}/${VERSION}" +csp_generate_ssh_key() { + local key_path="${INSTALL_DIR_CSP}/ssh_key" + if [[ -f "$key_path" ]]; then + info "SSH key already exists: ${key_path}" + return 0 + fi + ssh-keygen -t ed25519 -N '' -f "$key_path" -q + chmod 0600 "$key_path" + chmod 0644 "${key_path}.pub" + [[ -n "${SUDO_USER:-}" ]] \ + && chown "${SUDO_USER}" "$key_path" "${key_path}.pub" + success "SSH key generated: ${key_path}" } -# ─── Prerequisite checks ───────────────────────────────────────────────────── - -check_command() { - local cmd="$1" install_hint="$2" - if ! command -v "$cmd" &>/dev/null; then - print_error "'$cmd' is not installed." - echo " Install: $install_hint" - return 1 - fi - print_info "$cmd found" - return 0 +_curl_retry_csp() { + local url=$1 dest=$2 i + for i in 1 2 3; do + curl -fsSL --connect-timeout 15 -o "$dest" "$url" && return 0 + warn "Download attempt ${i} failed for $(basename "$url"). Retrying..." + sleep 5 + done + die "Failed to download: ${url}" } -check_prerequisites_local() { - print_step "Checking prerequisites..." - local missing=0 - check_command docker "https://docs.docker.com/get-docker/" || missing=1 - check_command openssl "apt install openssl / brew install openssl" || missing=1 - check_command curl "apt install curl / brew install curl" || missing=1 - - # docker compose (v2 plugin) - if ! docker compose version &>/dev/null 2>&1; then - print_error "'docker compose' (v2 plugin) is not available." - echo " Install: https://docs.docker.com/compose/install/" - missing=1 +csp_copy_terraform_files() { + local csp=$1 + local script_dir + script_dir="$(cd "$(dirname "$0")" && pwd)" + local tf_src="${script_dir}/deployment/${csp}" + local tf_dest="${INSTALL_DIR_CSP}/deployment" + mkdir -p "$tf_dest" + + local files + case "$csp" in + aws) files=(main.tf cloud-init.yaml) ;; + *) files=(main.tf startup-script.sh) ;; + esac + + for f in "${files[@]}"; do + if [[ -f "${tf_src}/${f}" ]]; then + cp "${tf_src}/${f}" "${tf_dest}/${f}" else - print_info "docker compose found" + info "Downloading ${f} from GitHub..." + _curl_retry_csp "${RAW_BASE}/${VERSION}/deployment/${csp}/${f}" "${tf_dest}/${f}" fi + done - if [ "$missing" -eq 1 ]; then - echo "" - print_error "Please install the missing prerequisites and re-run." - exit 1 - fi - - # Check Docker daemon - if ! docker info &>/dev/null 2>&1; then - print_error "Cannot connect to Docker daemon. Is Docker running?" - echo " Fix: systemctl start docker" - exit 1 - fi + printf '*.tfvars\nterraform.tfstate*\n.terraform/\n' > "${INSTALL_DIR_CSP}/.gitignore" + [[ -n "${SUDO_USER:-}" ]] && chown -R "${SUDO_USER}" "$tf_dest" "${INSTALL_DIR_CSP}/.gitignore" + success "Terraform files ready: ${tf_dest}" } -check_prerequisites_csp() { - local provider="$1" - print_step "Checking prerequisites..." +escape_tf() { printf '%s' "$1" | sed 's/\\/\\\\/g; s/"/\\"/g'; } - local missing=0 - check_command terraform "https://developer.hashicorp.com/terraform/install" || missing=1 - check_command curl "apt install curl / brew install curl" || missing=1 - check_command openssl "apt install openssl / brew install openssl" || missing=1 +csp_render_tfvars() { + local csp=$1 + local tf_dir="${INSTALL_DIR_CSP}/deployment" + local tfvars="${tf_dir}/terraform.tfvars" + local public_key="" - case "$provider" in - aws) check_command aws "https://aws.amazon.com/cli/" || missing=1 ;; - gcp) check_command gcloud "https://cloud.google.com/sdk/docs/install" || missing=1 ;; - oci) check_command oci "https://docs.oracle.com/en-us/iaas/Content/API/SDKDocs/cliinstall.htm" || missing=1 ;; + if [[ -f "${tf_dir}/terraform.tfstate" ]]; then + if [[ "$NON_INTERACTIVE" -eq 0 ]]; then + local answer=n + read -r -p "terraform.tfstate already exists in ${tf_dir}. Re-apply? [y/N] " answer + [[ "$answer" =~ ^[Yy] ]] || { info "Aborted."; exit 0; } + else + warn "terraform.tfstate exists — re-applying (idempotent)." + fi + fi + + [[ -f "${INSTALL_DIR_CSP}/ssh_key.pub" ]] \ + && public_key=$(cat "${INSTALL_DIR_CSP}/ssh_key.pub") + + { + printf 'team_name = "%s"\n' "$(escape_tf "${TEAM_NAME:-default}")" + printf 'tls_mode = "self-signed"\n' + printf 'envector_endpoint = "%s"\n' "$(escape_tf "${ENVECTOR_ENDPOINT}")" + printf 'envector_api_key = "%s"\n' "$(escape_tf "${ENVECTOR_API_KEY}")" + printf 'runevault_version = "%s"\n' "$(escape_tf "${VERSION}")" + printf 'public_key = "%s"\n' "$(escape_tf "${public_key}")" + printf 'region = "%s"\n' "$(escape_tf "${CSP_REGION}")" + case "$csp" in + gcp) printf 'project_id = "%s"\n' "$(escape_tf "${GCP_PROJECT_ID}")" ;; + oci) printf 'compartment_id = "%s"\n' "$(escape_tf "${OCI_COMPARTMENT_ID}")" ;; esac + } > "$tfvars" - if [ "$missing" -eq 1 ]; then - echo "" - print_error "Please install the missing prerequisites and re-run." - exit 1 - fi + chmod 0600 "$tfvars" + [[ -n "${SUDO_USER:-}" ]] && chown "${SUDO_USER}" "$tfvars" + success "terraform.tfvars written: ${tfvars}" } -# ─── Interactive prompts ───────────────────────────────────────────────────── +csp_run_terraform() { + local tf_dir="${INSTALL_DIR_CSP}/deployment" + local tf_user="${SUDO_USER:-$(id -un)}" -choose_deploy_target() { - print_step "Select deployment target" - local options=("Local (This machine)" "AWS" "GCP" "OCI") - local targets=("local" "aws" "gcp" "oci") - local selected - selected=$(select_menu "${options[@]}") - DEPLOY_TARGET="${targets[$selected]}" - print_info "Deployment target: ${DEPLOY_TARGET}" -} + info "Running terraform init..." + (cd "$tf_dir" && sudo -u "$tf_user" terraform init -input=false) + info "Running terraform apply..." + (cd "$tf_dir" && sudo -u "$tf_user" terraform apply -auto-approve -input=false) -prompt_install_dir() { - print_step "Installation directory" - local default_dir="$DEFAULT_INSTALL_DIR" - if [ "$DEPLOY_TARGET" != "local" ]; then - default_dir="$HOME/rune-vault-${DEPLOY_TARGET}" - echo " Terraform files, state, and SSH keys are stored here." - echo " Keep this directory to manage (update/destroy) your deployment." - echo "" - fi - prompt INSTALL_DIR "Directory" "$default_dir" + chmod 0600 "${tf_dir}/terraform.tfstate" 2>/dev/null || true + chmod 0600 "${tf_dir}/terraform.tfstate.backup" 2>/dev/null || true + success "Terraform apply complete." } -prompt_tls_mode() { - print_step "TLS configuration" - local options=("Generate self-signed certificate" "No TLS (not recommended)") - local modes=("self-signed" "none") - local selected - selected=$(select_menu "${options[@]}") - TLS_MODE="${modes[$selected]}" - - if [ "$TLS_MODE" = "self-signed" ]; then - echo "" - prompt TLS_HOSTNAME "Domain name for the certificate (leave empty if none)" "" - fi - - if [ "$TLS_MODE" = "none" ]; then - print_warn "Running without TLS. gRPC traffic will be unencrypted." - print_warn "This is NOT recommended for production." +csp_post_deploy() { + local csp=$1 + local tf_dir="${INSTALL_DIR_CSP}/deployment" + local tf_user="${SUDO_USER:-$(id -un)}" + local key_path="${INSTALL_DIR_CSP}/ssh_key" + + local public_ip + public_ip=$(cd "$tf_dir" && sudo -u "$tf_user" terraform output -raw vault_public_ip 2>/dev/null) \ + || die "Could not read vault_public_ip from terraform output." + CSP_PUBLIC_IP="$public_ip" + + local ssh_user=ubuntu + + mkdir -p "${INSTALL_DIR_CSP}/certs" + [[ -n "${SUDO_USER:-}" ]] && chown "${SUDO_USER}" "${INSTALL_DIR_CSP}/certs" + local scp_opts="-o BatchMode=yes -o StrictHostKeyChecking=no -o ConnectTimeout=15" + local scp_prefix="" + [[ -n "${SUDO_USER:-}" ]] && scp_prefix="sudo -u ${SUDO_USER}" + + local timeout_secs=1800 + info "Waiting for VM install to finish and CA cert to appear (up to $((timeout_secs / 60)) min)..." + local deadline=$(( $(date +%s) + timeout_secs )) + while [[ $(date +%s) -lt $deadline ]]; do + # shellcheck disable=SC2086 + if $scp_prefix scp $scp_opts -i "$key_path" \ + "${ssh_user}@${public_ip}:/opt/runevault/certs/ca.pem" \ + "${INSTALL_DIR_CSP}/certs/ca.pem" 2>/dev/null; then + success "CA certificate saved: ${INSTALL_DIR_CSP}/certs/ca.pem" + return 0 fi + sleep 15 + done - print_info "TLS mode: ${TLS_MODE}" + die "Timed out waiting for VM-side install. SSH in and check /var/log/runevault-install.log: ssh -i ${key_path} ${ssh_user}@${public_ip}" } -prompt_envector_config() { - print_step "enVector Cloud configuration" - echo " Create your enVector cluster at https://envector.io before proceeding." - echo " You will need the endpoint URL and API key from the dashboard." - echo " Index name is used to store and retrieve your team's organizational memory." - echo "" - prompt ENVECTOR_ENDPOINT "enVector endpoint (e.g. cluster-id.clusters.envector.io)" - prompt ENVECTOR_API_KEY "enVector API key (e.g. aBcDE_12345_xxxxx)" - prompt VAULT_INDEX_NAME "Index name" "runecontext" - - if [ -z "$ENVECTOR_ENDPOINT" ] || [ -z "$ENVECTOR_API_KEY" ]; then - print_error "enVector endpoint and API key are required." - exit 1 - fi - if [ -z "$VAULT_INDEX_NAME" ]; then - print_error "Index name is required." - exit 1 - fi - print_info "enVector endpoint: ${ENVECTOR_ENDPOINT}" +csp_summary() { + local csp=$1 + local tf_dir="${INSTALL_DIR_CSP}/deployment" + local key_path="${INSTALL_DIR_CSP}/ssh_key" + local public_ip="${CSP_PUBLIC_IP:-}" + + printf '\n' + success "Rune-Vault deployed to $(printf '%s' "$csp" | tr 'a-z' 'A-Z')." + printf '\n' + printf ' Endpoint: %s:50051\n' "$public_ip" + printf ' CA cert: %s\n' "${INSTALL_DIR_CSP}/certs/ca.pem" + printf ' SSH: ssh -i %s ubuntu@%s\n' "$key_path" "$public_ip" + printf ' Terraform: %s\n' "$tf_dir" + printf '\n' + printf 'Tear down:\n' + printf ' cd %s && terraform destroy -auto-approve\n' "$tf_dir" + printf '\n' + printf 'Next steps (SSH into the VM, then run on the VM):\n' + printf ' ssh -i %s ubuntu@%s\n' "$key_path" "$public_ip" + printf '\n' + printf ' Issue a token: runevault token issue --user --role member\n' + printf ' Check status: runevault status\n' + printf ' View logs: runevault logs\n' + printf ' Manage daemon: sudo systemctl start|stop|restart runevault\n' + printf '\n' + warn "BACKUP: Keep this safe — it cannot be recovered if lost:" + warn " Terraform state: ${tf_dir}/terraform.tfstate" } -prompt_csp_config() { - prompt TEAM_NAME "Team name (used for resource naming)" "default" - - case "$DEPLOY_TARGET" in - aws) - prompt CSP_REGION "AWS region" "us-east-1" - ;; - gcp) - prompt CSP_REGION "GCP region" "us-central1" - prompt GCP_PROJECT_ID "GCP project ID" - if [ -z "$GCP_PROJECT_ID" ]; then - print_error "GCP project ID is required."; exit 1 - fi - ;; - oci) - prompt CSP_REGION "OCI region" "us-ashburn-1" - prompt OCI_COMPARTMENT_ID "OCI compartment OCID" - if [ -z "$OCI_COMPARTMENT_ID" ]; then - print_error "OCI compartment OCID is required."; exit 1 - fi - ;; +csp_uninstall() { + local csp=$1 + local user_home="${SUDO_USER:+$(eval echo ~"${SUDO_USER}")}" + user_home="${user_home:-$HOME}" + INSTALL_DIR_CSP="${INSTALL_DIR_CSP:-${user_home}/rune-vault-${csp}}" + local tf_dir="${INSTALL_DIR_CSP}/deployment" + + if [[ ! -f "${tf_dir}/terraform.tfstate" ]]; then + warn "No terraform.tfstate found at ${tf_dir}/terraform.tfstate — nothing to destroy." + return 0 + fi + + command -v terraform >/dev/null 2>&1 \ + || die "terraform is required to destroy CSP infrastructure. Install it: https://developer.hashicorp.com/terraform/install" + + printf '\n' + warn "This will run 'terraform destroy' on the ${csp} infrastructure at:" + warn " ${tf_dir}" + warn "All cloud resources (VM, network, etc.) will be removed permanently." + printf '\n' + + if [[ "$NON_INTERACTIVE" -eq 0 ]]; then + local answer=n + read -r -p "Continue? [y/N] " answer + [[ "$answer" =~ ^[Yy] ]] || { info "Aborted."; exit 0; } + fi + + local tf_user="${SUDO_USER:-$(id -un)}" + info "Running terraform destroy in ${tf_dir}..." + (cd "$tf_dir" && sudo -u "$tf_user" terraform destroy -auto-approve) + success "Cloud infrastructure destroyed." + + printf '\n' + warn "The following directory contains terraform state, SSH keys, and CA cert:" + warn " ${INSTALL_DIR_CSP}/" + printf '\n' + + if [[ "$NON_INTERACTIVE" -eq 1 ]]; then + warn "Non-interactive mode: directory preserved. Remove manually: rm -rf ${INSTALL_DIR_CSP}" + else + local answer=n + read -r -p "Delete the entire directory? [y/N] " answer + case "$answer" in + [Yy]*) + rm -rf "${INSTALL_DIR_CSP}" + success "Directory removed: ${INSTALL_DIR_CSP}" + ;; + *) + info "Directory preserved: ${INSTALL_DIR_CSP}" + ;; esac -} + fi -generate_team_secret() { - VAULT_TEAM_SECRET_VALUE="evt_$(openssl rand -hex 32)" - print_info "Team secret generated." + success "Rune-Vault ${csp} infrastructure uninstalled." } -generate_config_files() { - local dir="$1" - - cat > "$dir/vault-roles.yml" <<'ROLESEOF' -roles: - admin: - scope: [get_public_key, decrypt_scores, decrypt_metadata, manage_tokens] - top_k: 50 - rate_limit: 150/60s - member: - scope: [get_public_key, decrypt_scores, decrypt_metadata] - top_k: 10 - rate_limit: 30/60s -ROLESEOF - - cat > "$dir/vault-tokens.yml" <<'TOKENSEOF' -tokens: [] -TOKENSEOF - - chmod 600 "$dir/vault-roles.yml" "$dir/vault-tokens.yml" - print_info "Token/role config files created." +csp_dispatch() { + local csp="$TARGET" + local user_home="${SUDO_USER:+$(eval echo ~"${SUDO_USER}")}" + user_home="${user_home:-$HOME}" + INSTALL_DIR_CSP="${INSTALL_DIR_CSP:-${user_home}/rune-vault-${csp}}" + mkdir -p "$INSTALL_DIR_CSP" + [[ -n "${SUDO_USER:-}" ]] && chown "${SUDO_USER}" "$INSTALL_DIR_CSP" + + csp_preflight "$csp" + + if [[ -z "$VERSION" ]]; then + info "Resolving latest release version..." + VERSION=$(curl -fsSL \ + "https://api.github.com/repos/${REPO}/releases/latest" \ + | grep '"tag_name"' | head -1 \ + | sed 's/.*"tag_name": *"\([^"]*\)".*/\1/') + [[ -n "$VERSION" ]] || die "Failed to resolve latest version from GitHub API." + info "Latest version: ${VERSION}" + fi + + csp_prompt_config "$csp" + [[ -n "$VERSION" ]] || die "runevault version is required (use --version )." + csp_generate_ssh_key + csp_copy_terraform_files "$csp" + csp_render_tfvars "$csp" + csp_run_terraform + csp_post_deploy "$csp" + csp_summary "$csp" + exit 0 } -setup_runevault_alias() { - if [ -z "${SUDO_USER:-}" ]; then - return - fi +# ── Tool auto-install ────────────────────────────────────────────────────────── + +# Run brew as the original (non-root) user when invoked via sudo on macOS. +_brew() { sudo -u "${SUDO_USER:-$(id -un)}" brew "$@"; } + +_pkg_install() { + if command -v apt-get >/dev/null 2>&1; then + apt-get install -y "$@" + elif command -v dnf >/dev/null 2>&1; then + dnf install -y "$@" + elif command -v yum >/dev/null 2>&1; then + yum install -y "$@" + else + die "No supported package manager found (apt/dnf/yum). Install manually: $*" + fi +} - # Add user to docker group - if command -v usermod >/dev/null 2>&1; then - usermod -aG docker "$SUDO_USER" 2>/dev/null || true - fi +_install_tool() { + local tool=$1 + info "Installing ${tool}..." + case "$OS_SLUG:$tool" in + linux:openssl) _pkg_install openssl ;; + linux:sha256sum) _pkg_install coreutils ;; + linux:terraform) + local tf_version arch_suffix=amd64 + [[ "$ARCH_SLUG" = arm64 ]] && arch_suffix=arm64 + tf_version=$(curl -fsSL https://api.github.com/repos/hashicorp/terraform/releases/latest \ + | grep '"tag_name"' | head -1 \ + | sed 's/.*"tag_name": *"v\([^"]*\)".*/\1/') + [[ -n "$tf_version" ]] || die "Failed to resolve latest terraform version." + command -v unzip >/dev/null 2>&1 || _pkg_install unzip + local tmpdir + tmpdir=$(mktemp -d) + curl -fsSL \ + "https://releases.hashicorp.com/terraform/${tf_version}/terraform_${tf_version}_linux_${arch_suffix}.zip" \ + -o "${tmpdir}/tf.zip" + unzip -o "${tmpdir}/tf.zip" -d "${tmpdir}" >/dev/null + install -m 0755 "${tmpdir}/terraform" /usr/local/bin/terraform + rm -rf "${tmpdir}" + ;; + darwin:openssl) _brew install openssl ;; + darwin:terraform) _brew install terraform ;; + darwin:shasum) + die "shasum is pre-installed on macOS. Something is very wrong." ;; + *:systemctl) + die "systemctl not found. This installer requires a systemd-based Linux." ;; + *) + die "Don't know how to install '${tool}' on ${OS_SLUG}. Install it manually." ;; + esac + command -v "$tool" >/dev/null 2>&1 \ + || die "Installation of '${tool}' appeared to succeed but binary not found in PATH." + success "${tool} installed." +} - # Detect shell config - local user_home - user_home="$(eval echo ~"$SUDO_USER")" - local shell_rc="" - if [ -f "$user_home/.zshrc" ]; then - shell_rc="$user_home/.zshrc" - elif [ -f "$user_home/.bashrc" ]; then - shell_rc="$user_home/.bashrc" +# ── Phase 1: Preflight ───────────────────────────────────────────────────────── +preflight() { + info "Running preflight checks..." + + [[ "$(id -u)" -eq 0 ]] || die "This installer must be run as root (use sudo)." + + local tools=(curl) + if [[ "$OS_SLUG" = linux ]]; then + tools+=(sha256sum systemctl) + else + tools+=(shasum) + fi + # openssl only needed when auto-generating TLS certs + if [[ -z "${RUNEVAULT_TLS_CERT_PATH:-}" || -z "${RUNEVAULT_TLS_KEY_PATH:-}" ]]; then + tools+=(openssl) + fi + + # Collect missing tools (systemctl is never auto-installable — fail immediately) + local missing=() + for tool in "${tools[@]}"; do + command -v "$tool" >/dev/null 2>&1 && continue + [[ "$tool" = systemctl ]] \ + && die "systemctl not found. This installer requires a systemd-based Linux." + missing+=("$tool") + done + + if [[ ${#missing[@]} -gt 0 ]]; then + printf '\n' + warn "The following required tools are not installed:" + for tool in "${missing[@]}"; do printf ' - %s\n' "$tool"; done + printf '\n' + + local answer=n + if [[ "$NON_INTERACTIVE" -eq 0 ]]; then + read -r -p "Install missing tools automatically? [y/N] " answer + else + warn "Non-interactive mode: cannot auto-install missing tools." fi - if [ -n "$shell_rc" ]; then - if ! grep -q 'alias runevault=' "$shell_rc" 2>/dev/null; then - echo '' >> "$shell_rc" - echo '# Rune-Vault admin CLI' >> "$shell_rc" - echo 'alias runevault="docker exec -it rune-vault python3 /app/vault_admin_cli.py"' >> "$shell_rc" - print_info "runevault alias added to ${shell_rc}" - print_warn "Run 'exec \$SHELL' to reload your shell and enable the runevault command." - fi + case "$answer" in + [Yy]*) + for tool in "${missing[@]}"; do + _install_tool "$tool" + done + ;; + *) + printf 'Install them manually and re-run the installer:\n' >&2 + for tool in "${missing[@]}"; do + case "$OS_SLUG:$tool" in + linux:openssl) printf ' openssl: apt install openssl\n' >&2 ;; + linux:sha256sum) printf ' sha256sum: apt install coreutils\n' >&2 ;; + darwin:openssl) printf ' openssl: brew install openssl\n' >&2 ;; + esac + done + exit 1 + ;; + esac + fi + + # Port availability (best-effort — skip gracefully if tools unavailable) + local port_occupied=0 + if [[ "$OS_SLUG" = linux ]] && command -v ss >/dev/null 2>&1; then + ss -tlnp 2>/dev/null | grep -q ":${GRPC_PORT}" && port_occupied=1 || true + elif command -v lsof >/dev/null 2>&1; then + lsof -iTCP:"${GRPC_PORT}" -sTCP:LISTEN -P -n 2>/dev/null \ + | grep -q ":${GRPC_PORT}" && port_occupied=1 || true + fi + if [[ "$port_occupied" -eq 1 ]]; then + if [[ "$OS_SLUG" = linux ]]; then + die "Port ${GRPC_PORT} is already in use. Stop the existing daemon first: + sudo systemctl stop runevault" + else + die "Port ${GRPC_PORT} is already in use. Stop the existing daemon first: + sudo launchctl bootout system/com.cryptolabinc.runevault" fi -} - -# ─── Confirmation summary ──────────────────────────────────────────────────── - -show_confirmation() { - print_header "Configuration Summary" - echo " Deployment target : ${DEPLOY_TARGET}" - echo " Install directory : ${INSTALL_DIR}" - echo " TLS mode : ${TLS_MODE}" - [ -n "${TLS_HOSTNAME:-}" ] && echo " TLS domain : ${TLS_HOSTNAME}" - echo " Team secret : (auto-generated in .env)" - echo " enVector endpoint : ${ENVECTOR_ENDPOINT}" - echo " Index name : ${VAULT_INDEX_NAME}" - if [ "$DEPLOY_TARGET" != "local" ]; then - echo " Team name : ${TEAM_NAME}" - echo " Region : ${CSP_REGION}" - [ "${DEPLOY_TARGET}" = "gcp" ] && echo " GCP project : ${GCP_PROJECT_ID}" - [ "${DEPLOY_TARGET}" = "oci" ] && echo " OCI compartment : ${OCI_COMPARTMENT_ID}" + fi + + # Version resolution (skip if using a local binary) + if [[ -z "$LOCAL_BINARY" && -z "$VERSION" ]]; then + info "Resolving latest release version..." + VERSION=$(curl -fsSL \ + "https://api.github.com/repos/${REPO}/releases/latest" \ + | grep '"tag_name"' \ + | head -1 \ + | sed 's/.*"tag_name": *"\([^"]*\)".*/\1/') + [[ -n "$VERSION" ]] || die "Failed to resolve latest version from GitHub API." + info "Latest version: ${VERSION}" + fi + + # Already-installed version check (skip if --force or using a local binary) + if [[ "$FORCE" -eq 0 && -z "$LOCAL_BINARY" && -x "$BINARY_DEST" ]]; then + local installed_ver + installed_ver=$("$BINARY_DEST" version 2>/dev/null | grep -oE 'v[0-9]+\.[0-9]+\.[0-9]+[^ ]*' | head -1 || true) + if [[ -n "$installed_ver" && "$installed_ver" = "$VERSION" ]]; then + warn "runevault ${VERSION} is already installed. Use --force to reinstall." + exit 0 fi - echo "" + fi - if ! prompt_yn "Proceed with deployment?"; then - print_warn "Aborted." - exit 0 - fi + success "Preflight checks passed." } -# ─── File download helper ──────────────────────────────────────────────────── - -download_file() { - local url="$1" dest="$2" - local attempt max_attempts=3 - for attempt in $(seq 1 $max_attempts); do - if curl -fsSL "$url" -o "$dest"; then - return 0 - fi - [ "$attempt" -lt "$max_attempts" ] && sleep 1 - done - print_error "Failed to download (after ${max_attempts} attempts): $url" - exit 1 +# ── Phase 2 & 3: Download and verify ────────────────────────────────────────── +SCRATCH="" + +_curl_retry() { + local url=$1 dest=$2 i + for i in 1 2 3; do + curl -fsSL --connect-timeout 15 -o "$dest" "$url" && return 0 + warn "Download attempt ${i} failed for $(basename "$url"). Retrying in 5s..." + sleep 5 + done + die "Failed to download: ${url}" } -# ─── TLS handling ───────────────────────────────────────────────────────────── - -setup_tls() { - local certs_dir="$INSTALL_DIR/certs" - mkdir -p "$certs_dir" - - case "$TLS_MODE" in - self-signed) - print_step "Generating self-signed certificates..." - download_file "${GITHUB_RAW_BASE}/scripts/generate-certs.sh" "$certs_dir/generate-certs.sh" - chmod +x "$certs_dir/generate-certs.sh" - (cd "$certs_dir" && bash generate-certs.sh . "${TLS_HOSTNAME:-localhost}") - TLS_CERT_PATH="$certs_dir/server.pem" - TLS_KEY_PATH="$certs_dir/server.key" - TLS_CA_PATH="$certs_dir/ca.pem" - print_info "Self-signed certificates generated in ${certs_dir}/" - ;; - none) - print_warn "Skipping TLS setup." - TLS_CERT_PATH="" - TLS_KEY_PATH="" - TLS_CA_PATH="" - ;; - esac -} - -# ─── Generate .env file ────────────────────────────────────────────────────── - -generate_env_file() { - local env_file="$INSTALL_DIR/.env" - - cat > "$env_file" <> "$env_file" +_checksum_verify() { + local sums_file=$1 archive=$2 archive_name line + archive_name=$(basename "$archive") + line=$(grep -F "$archive_name" "$sums_file") \ + || die "Archive '${archive_name}' not found in SHA256SUMS." + ( + cd "$(dirname "$archive")" + if [[ "$OS_SLUG" = linux ]]; then + printf '%s\n' "$line" | sha256sum --check --quiet else - cat >> "$env_file" </dev/null) || true - rm -rf "$INSTALL_DIR" - print_info "Previous installation removed." - else - print_warn "Aborted." - exit 0 - fi - fi - - # Create directory structure - mkdir -p "$INSTALL_DIR"/{certs,backups,logs} - print_info "Directory structure created: ${INSTALL_DIR}/" - - # Download docker-compose.yml - print_step "Downloading docker-compose.yml..." - download_file "${GITHUB_RAW_BASE}/vault/docker-compose.yml" "$INSTALL_DIR/docker-compose.yml" - # Pin image to the resolved version - sed -i.bak "s|image:.*rune-vault:.*|image: ${DOCKER_IMAGE}:${DOCKER_TAG}|" "$INSTALL_DIR/docker-compose.yml" - rm -f "$INSTALL_DIR/docker-compose.yml.bak" - print_info "docker-compose.yml downloaded." +download_and_verify() { + SCRATCH=$(mktemp -d) + trap 'rm -rf "$SCRATCH"' EXIT - # TLS - setup_tls - - # Generate .env and config files - generate_env_file - generate_config_files "$INSTALL_DIR" - - # Restore ownership to the invoking user (files were created as root via sudo) - if [ -n "${SUDO_USER:-}" ]; then - chown -R "$SUDO_USER" "$INSTALL_DIR" - fi - - # Pull image - print_step "Pulling Docker image..." - (cd "$INSTALL_DIR" && docker compose pull) - print_info "Docker image pulled." - - # Start container - print_step "Starting Rune-Vault..." - (cd "$INSTALL_DIR" && docker compose up -d) - print_info "Container started." - - # Health check - print_step "Waiting for Vault to become healthy..." - local elapsed=0 - local timeout=60 - while [ $elapsed -lt $timeout ]; do - if docker exec rune-vault curl -sf http://localhost:8081/health 2>/dev/null; then - print_info "Vault is healthy!" - - # Set up runevault alias for admin CLI - setup_runevault_alias - return 0 - fi - sleep 2 - elapsed=$((elapsed + 2)) - printf "." - done - - echo "" - print_error "Vault did not become healthy within ${timeout}s." - print_warn "Container logs:" - docker logs rune-vault 2>&1 | tail -30 - exit 1 + if [[ -n "$LOCAL_BINARY" ]]; then + info "Using local binary: ${LOCAL_BINARY}" + [[ -x "$LOCAL_BINARY" ]] || die "Local binary not executable: ${LOCAL_BINARY}" + cp "$LOCAL_BINARY" "$SCRATCH/runevault" + return 0 + fi + + local archive="runevault_${VERSION}_${OS_SLUG}_${ARCH_SLUG}.tar.gz" + local base_url="https://github.com/${REPO}/releases/download/${VERSION}" + + info "Downloading ${archive}..." + _curl_retry "${base_url}/${archive}" "$SCRATCH/${archive}" + _curl_retry "${base_url}/SHA256SUMS" "$SCRATCH/SHA256SUMS" + + if [[ "$SKIP_VERIFY" -eq 1 ]]; then + warn "SKIP_VERIFY=1: skipping checksum verification (development only)." + else + info "Verifying checksum..." + _checksum_verify "$SCRATCH/SHA256SUMS" "$SCRATCH/${archive}" + success "Checksum verified." + fi + + info "Extracting binary..." + tar -xzf "$SCRATCH/${archive}" -C "$SCRATCH" ./runevault + "$SCRATCH/runevault" version >/dev/null 2>&1 \ + || die "Extracted binary failed smoke test." } -# ─── CSP deployment ─────────────────────────────────────────────────────────── - -deploy_csp() { - local provider="$DEPLOY_TARGET" - print_header "Deploying Rune-Vault (${provider})" - - local tf_dir="$INSTALL_DIR/deployment" - mkdir -p "$tf_dir" - # Ensure the original user owns the deployment directory for terraform - if [ -n "${SUDO_USER:-}" ]; then - chown -R "$SUDO_USER" "$INSTALL_DIR" - fi - - # Download Terraform files - print_step "Downloading Terraform configuration..." - download_file "${GITHUB_RAW_BASE}/deployment/${provider}/main.tf" "$tf_dir/main.tf" - if [ "$provider" = "aws" ]; then - download_file "${GITHUB_RAW_BASE}/deployment/${provider}/cloud-init.yaml" "$tf_dir/cloud-init.yaml" - sed -i.bak "s|image:.*rune-vault:.*|image: ${DOCKER_IMAGE}:${DOCKER_TAG}|" "$tf_dir/cloud-init.yaml" - rm -f "$tf_dir/cloud-init.yaml.bak" +# ── Phase 4: System setup ────────────────────────────────────────────────────── +_create_system_group() { + if [[ "$OS_SLUG" = linux ]]; then + if ! getent group "$SERVICE_USER" >/dev/null 2>&1; then + groupadd --system "$SERVICE_USER" + success "System group '${SERVICE_USER}' created." else - download_file "${GITHUB_RAW_BASE}/deployment/${provider}/startup-script.sh" "$tf_dir/startup-script.sh" - sed -i.bak "s|image:.*rune-vault:.*|image: ${DOCKER_IMAGE}:${DOCKER_TAG}|" "$tf_dir/startup-script.sh" - rm -f "$tf_dir/startup-script.sh.bak" - fi - print_info "Terraform files downloaded." - - # Generate SSH key pair for EC2 access - local ssh_key_path="$INSTALL_DIR/ssh_key" - if [ ! -f "$ssh_key_path" ]; then - print_step "Generating SSH key pair..." - ssh-keygen -t ed25519 -f "$ssh_key_path" -N "" -q - chmod 600 "$ssh_key_path" - chmod 644 "${ssh_key_path}.pub" - print_info "SSH key generated: ${ssh_key_path}" - fi - local public_key - public_key=$(cat "${ssh_key_path}.pub") - - # Generate terraform.tfvars (use printf to avoid heredoc escaping issues) - print_step "Generating terraform.tfvars..." - escape_tf() { printf '%s' "$1" | sed 's/\\/\\\\/g; s/"/\\"/g'; } - { - printf 'team_secret = "%s"\n' "$(escape_tf "$VAULT_TEAM_SECRET_VALUE")" - printf 'team_name = "%s"\n' "$(escape_tf "$TEAM_NAME")" - printf 'region = "%s"\n' "$(escape_tf "$CSP_REGION")" - printf 'tls_mode = "%s"\n' "$(escape_tf "$TLS_MODE")" - printf 'tls_hostname = "%s"\n' "$(escape_tf "${TLS_HOSTNAME:-}")" - printf 'envector_endpoint = "%s"\n' "$(escape_tf "$ENVECTOR_ENDPOINT")" - printf 'envector_api_key = "%s"\n' "$(escape_tf "$ENVECTOR_API_KEY")" - printf 'vault_index_name = "%s"\n' "$(escape_tf "$VAULT_INDEX_NAME")" - printf 'public_key = "%s"\n' "$(escape_tf "$public_key")" - case "$provider" in - gcp) printf 'project_id = "%s"\n' "$(escape_tf "$GCP_PROJECT_ID")" ;; - oci) printf 'compartment_id = "%s"\n' "$(escape_tf "$OCI_COMPARTMENT_ID")" ;; - esac - } > "$tf_dir/terraform.tfvars" - - chmod 600 "$tf_dir/terraform.tfvars" - if [ -n "${SUDO_USER:-}" ]; then - chown -R "$SUDO_USER" "$INSTALL_DIR" - fi - print_info "terraform.tfvars created." - - # Terraform init & apply (run as the original user to preserve CLI auth) - print_step "Running Terraform..." - local tf_run="terraform" - if [ -n "${SUDO_USER:-}" ]; then - tf_run="sudo -u $SUDO_USER terraform" + info "System group '${SERVICE_USER}' already exists." fi - (cd "$tf_dir" && $tf_run init) - (cd "$tf_dir" && $tf_run apply -auto-approve) - - # Capture outputs - VAULT_PUBLIC_IP=$(cd "$tf_dir" && $tf_run output -raw vault_public_ip 2>/dev/null) || true - local vault_url - vault_url=$(cd "$tf_dir" && $tf_run output -raw vault_url 2>/dev/null) || true - - print_info "Infrastructure provisioned." - - # Health polling — wait for cloud-init to finish and Vault to start - if [ -n "$VAULT_PUBLIC_IP" ]; then - print_step "Waiting for Vault to become reachable (up to 10 min)..." - local elapsed=0 - local timeout=600 - while [ $elapsed -lt $timeout ]; do - if bash -c "echo >/dev/tcp/${VAULT_PUBLIC_IP}/50051" 2>/dev/null; then - print_info "Vault is reachable at ${VAULT_PUBLIC_IP}:50051!" - - # Download ca.pem from remote server - if [ "$TLS_MODE" = "self-signed" ]; then - mkdir -p "$INSTALL_DIR/certs" - if [ -n "${SUDO_USER:-}" ]; then - chown -R "$SUDO_USER" "$INSTALL_DIR/certs" - fi - local scp_opts="-i $ssh_key_path -o StrictHostKeyChecking=no -o ConnectTimeout=15 -o BatchMode=yes" - local scp_prefix="" - if [ -n "${SUDO_USER:-}" ]; then - scp_prefix="sudo -u $SUDO_USER" - fi - # Retry SCP (SSH may not be ready immediately) - local downloaded=0 - for attempt in 1 2 3; do - sleep 10 - for ssh_user in ubuntu opc; do - if $scp_prefix scp $scp_opts \ - "${ssh_user}@${VAULT_PUBLIC_IP}:/opt/rune/certs/ca.pem" \ - "$INSTALL_DIR/certs/ca.pem" 2>/dev/null; then - downloaded=1; break 2 - fi - done - done - if [ "$downloaded" -eq 1 ]; then - CSP_CA_CERT_LOCAL="$INSTALL_DIR/certs/ca.pem" - print_info "CA certificate downloaded to ${CSP_CA_CERT_LOCAL}" - else - print_warn "Could not download ca.pem via SSH. Retrieve manually:" - echo " scp -i ${ssh_key_path} ubuntu@${VAULT_PUBLIC_IP}:/opt/rune/certs/ca.pem ${INSTALL_DIR}/certs/" - fi - fi - - break - fi - sleep 10 - elapsed=$((elapsed + 10)) - printf "." - done - echo "" - if [ $elapsed -ge $timeout ]; then - print_error "Vault not reachable within ${timeout}s. Cloud-init may still be running." - echo "" - echo " Debug via SSH:" - echo " ssh -i ${ssh_key_path} ubuntu@${VAULT_PUBLIC_IP} 'cloud-init status --wait && docker ps'" - echo "" - echo " Terraform directory: ${tf_dir}" - echo " To destroy resources: cd ${tf_dir} && terraform destroy" - exit 1 - fi + else + if ! dscl . -read /Groups/"$SERVICE_USER" >/dev/null 2>&1; then + local gid=490 + while dscl . -list /Groups PrimaryGroupID 2>/dev/null \ + | awk '{print $2}' | grep -qx "$gid"; do + gid=$((gid - 1)) + done + dscl . -create /Groups/"$SERVICE_USER" + dscl . -create /Groups/"$SERVICE_USER" PrimaryGroupID "$gid" + dscl . -create /Groups/"$SERVICE_USER" RealName "Rune Vault Admin Group" + success "System group '${SERVICE_USER}' created (GID=${gid})." + else + info "System group '${SERVICE_USER}' already exists." fi + fi } -# ─── Summary ────────────────────────────────────────────────────────────────── - -show_summary() { - local endpoint - if [ "$DEPLOY_TARGET" = "local" ]; then - if [ "$TLS_MODE" = "none" ]; then - endpoint="localhost:50051" - else - endpoint="localhost:50051 (TLS)" - fi +_create_system_user() { + if [[ "$OS_SLUG" = linux ]]; then + if ! id "$SERVICE_USER" >/dev/null 2>&1; then + useradd --system --no-create-home --shell /usr/sbin/nologin \ + -g "$SERVICE_USER" --no-user-group "$SERVICE_USER" + success "System user '${SERVICE_USER}' created." else - local ip="${VAULT_PUBLIC_IP:-}" - endpoint="${ip}:50051" + info "System user '${SERVICE_USER}' already exists." fi - - print_header "Deployment Complete" - echo " Vault Endpoint : ${endpoint}" - echo " Team Secret : (stored in ${INSTALL_DIR}/.env)" - echo " TLS Mode : ${TLS_MODE}" - if [ "$TLS_MODE" = "self-signed" ] && [ "$DEPLOY_TARGET" = "local" ]; then - echo " CA Certificate : ${INSTALL_DIR}/certs/ca.pem" - fi - echo "" - echo -e "${BOLD}Share with your team:${NC}" - echo "" - echo " Team members will need the following credentials when installing the" - echo " Rune plugin/extension. Share them securely (e.g. encrypted channel):" - echo "" - if [ -n "${TLS_HOSTNAME:-}" ]; then - echo " Endpoint : ${TLS_HOSTNAME}:50051" - elif [ "$DEPLOY_TARGET" != "local" ] && [ -n "${VAULT_PUBLIC_IP:-}" ]; then - echo " Endpoint : ${VAULT_PUBLIC_IP}:50051" + else + if ! id "$SERVICE_USER" >/dev/null 2>&1; then + local uid=490 + while dscl . -list /Users UniqueID 2>/dev/null \ + | awk '{print $2}' | grep -qx "$uid"; do + uid=$((uid - 1)) + done + local gid + gid=$(dscl . -read /Groups/"$SERVICE_USER" PrimaryGroupID 2>/dev/null \ + | awk '{print $2}') + dscl . -create /Users/"$SERVICE_USER" + dscl . -create /Users/"$SERVICE_USER" UserShell /usr/bin/false + dscl . -create /Users/"$SERVICE_USER" RealName "Rune Vault Service" + dscl . -create /Users/"$SERVICE_USER" UniqueID "$uid" + dscl . -create /Users/"$SERVICE_USER" PrimaryGroupID "$gid" + dscl . -create /Users/"$SERVICE_USER" NFSHomeDirectory /var/empty + dscl . -create /Users/"$SERVICE_USER" IsHidden 1 + success "System user '${SERVICE_USER}' created (UID=${uid})." else - echo " Endpoint : :50051" - fi - echo "" - echo " Issue per-user tokens with:" - echo " runevault token issue --user --role member --expires 90d" - echo "" - if [ "$DEPLOY_TARGET" = "local" ]; then - echo " Reload your shell before using the runevault command:" - echo " exec \$SHELL" - echo "" - fi - echo " Each team member uses their individual token for authentication." - echo " Team Secret (above) is only needed for DEK derivation — keep it secure." - if [ "$TLS_MODE" = "self-signed" ]; then - echo "" - echo " Your vault uses a self-signed CA. Team members also need the CA" - echo " certificate file below. Share this file directly — they will be" - echo " prompted to provide its path during plugin/extension setup." - echo "" - if [ -n "${CSP_CA_CERT_LOCAL}" ]; then - echo " CA Cert : ${CSP_CA_CERT_LOCAL}" - elif [ "$DEPLOY_TARGET" = "local" ]; then - echo " CA Cert : ${INSTALL_DIR}/certs/ca.pem" - else - echo " CA Cert : /opt/rune/certs/ca.pem (on the remote server)" - fi + info "System user '${SERVICE_USER}' already exists." fi - if [ "$DEPLOY_TARGET" != "local" ]; then - echo "" - echo -e "${BOLD}Next steps:${NC}" - echo " 1. Point your domain DNS to ${VAULT_PUBLIC_IP:-}" - echo " 2. To use custom TLS certificates, replace files in /opt/rune/certs/ on the server" - echo " and restart: ssh -i ${INSTALL_DIR}/ssh_key ubuntu@${VAULT_PUBLIC_IP:-} 'cd /opt/rune && docker compose restart'" - echo "" - echo " SSH access: ssh -i ${INSTALL_DIR}/ssh_key ubuntu@${VAULT_PUBLIC_IP:-}" - fi - echo "" - echo "Install directory: ${INSTALL_DIR}" - echo "" + fi } -# ─── main() ────────────────────────────────────────────────────────────────── +_add_invoking_user_to_group() { + local invoking_user="${SUDO_USER:-}" + [[ -z "$invoking_user" ]] && return 0 + if [[ "$OS_SLUG" = linux ]]; then + usermod -aG "$SERVICE_USER" "$invoking_user" + else + dscl . -append /Groups/"$SERVICE_USER" GroupMembership "$invoking_user" 2>/dev/null || true + fi + success "Added '${invoking_user}' to group '${SERVICE_USER}'." +} -main() { - print_header "Rune-Vault Interactive Setup" - echo "One-command deployment for Rune organizational memory vault." - echo "" +setup_system() { + info "Setting up system..." + + if [[ "$SKIP_SERVICE" -eq 0 ]]; then + _create_system_group + _create_system_user + fi + + # /opt may not exist on fresh macOS + [[ "$OS_SLUG" = darwin ]] && mkdir -p /opt + + local dir + for dir in \ + "${INSTALL_PREFIX}" \ + "${INSTALL_PREFIX}/configs" \ + "${INSTALL_PREFIX}/certs" \ + "${INSTALL_PREFIX}/logs" + do + mkdir -p "$dir" + chmod 0750 "$dir" + [[ "$SKIP_SERVICE" -eq 0 ]] && chown "${SERVICE_USER}:${SERVICE_USER}" "$dir" + done + # vault-keys stays 0700: secret FHE key material must never be group-readable. + mkdir -p "${INSTALL_PREFIX}/vault-keys" + chmod 0700 "${INSTALL_PREFIX}/vault-keys" + [[ "$SKIP_SERVICE" -eq 0 ]] && chown "${SERVICE_USER}:${SERVICE_USER}" "${INSTALL_PREFIX}/vault-keys" + + success "Directories created under ${INSTALL_PREFIX}/" + + install -m 0755 "$SCRATCH/runevault" "$BINARY_DEST" + success "Binary installed: ${BINARY_DEST}" + + if [[ "$SKIP_SERVICE" -eq 0 ]]; then + _add_invoking_user_to_group + fi +} - resolve_version +# ── Phase 5: TLS certificates ────────────────────────────────────────────────── +generate_tls_certs() { + local cert_dir="${INSTALL_PREFIX}/certs" + + # BYO cert: copy provided files and skip generation + if [[ -n "${RUNEVAULT_TLS_CERT_PATH:-}" && -n "${RUNEVAULT_TLS_KEY_PATH:-}" ]]; then + cp "${RUNEVAULT_TLS_CERT_PATH}" "${cert_dir}/server.pem" + cp "${RUNEVAULT_TLS_KEY_PATH}" "${cert_dir}/server.key" + chmod 0644 "${cert_dir}/server.pem" + chmod 0600 "${cert_dir}/server.key" + [[ "$SKIP_SERVICE" -eq 0 ]] \ + && chown "$SERVICE_USER" "${cert_dir}/server.pem" "${cert_dir}/server.key" + info "Using provided TLS certificates." + return 0 + fi - # 1. Deployment target - choose_deploy_target + if [[ -f "${cert_dir}/server.pem" && "$FORCE" -eq 0 ]]; then + info "TLS certificates already exist (use --force to regenerate)." + return 0 + fi + + info "Generating self-signed TLS certificates..." + + local public_ip="" + public_ip=$(curl -4 -sf --connect-timeout 5 ifconfig.me 2>/dev/null || true) + [[ -n "$public_ip" ]] && info "Public IP detected: ${public_ip}" + + # Write openssl config via printf (avoids heredoc issues in piped execution) + local tmpconf + tmpconf=$(mktemp) + printf '[req]\ndistinguished_name = req_dn\nreq_extensions = v3_req\nprompt = no\n\n' \ + > "$tmpconf" + printf '[req_dn]\nCN = runevault\n\n' >> "$tmpconf" + printf '[v3_req]\nsubjectAltName = @alt_names\n\n' >> "$tmpconf" + printf '[alt_names]\n' >> "$tmpconf" + printf 'DNS.1 = localhost\n' >> "$tmpconf" + printf 'DNS.2 = vault\n' >> "$tmpconf" + printf 'DNS.3 = runevault\n' >> "$tmpconf" + printf 'IP.1 = 127.0.0.1\n' >> "$tmpconf" + [[ -n "$public_ip" ]] && printf 'IP.2 = %s\n' "$public_ip" >> "$tmpconf" + + openssl genrsa -out "${cert_dir}/ca.key" 4096 2>/dev/null + openssl req -new -x509 \ + -key "${cert_dir}/ca.key" \ + -out "${cert_dir}/ca.pem" \ + -days 3650 -subj "/CN=Rune-Vault CA" -sha256 2>/dev/null + + openssl genrsa -out "${cert_dir}/server.key" 2048 2>/dev/null + local csr="${cert_dir}/server.csr" + openssl req -new \ + -key "${cert_dir}/server.key" -out "$csr" -config "$tmpconf" 2>/dev/null + openssl x509 -req \ + -in "$csr" \ + -CA "${cert_dir}/ca.pem" -CAkey "${cert_dir}/ca.key" -CAcreateserial \ + -out "${cert_dir}/server.pem" \ + -days 825 -sha256 -extfile "$tmpconf" -extensions v3_req 2>/dev/null + + rm -f "$tmpconf" "$csr" "${cert_dir}/ca.srl" + + chmod 0600 "${cert_dir}/ca.key" "${cert_dir}/server.key" + chmod 0644 "${cert_dir}/ca.pem" "${cert_dir}/server.pem" + if [[ "$SKIP_SERVICE" -eq 0 ]]; then + chown "${SERVICE_USER}:${SERVICE_USER}" \ + "${cert_dir}/ca.key" "${cert_dir}/ca.pem" \ + "${cert_dir}/server.key" "${cert_dir}/server.pem" + fi + + success "TLS certificates generated." +} - # 2. Prerequisites - if [ "$DEPLOY_TARGET" = "local" ]; then - check_prerequisites_local +# ── Phase 6: Configuration ───────────────────────────────────────────────────── +collect_and_write_config() { + local conf_file="${INSTALL_PREFIX}/configs/runevault.conf" + + if [[ -f "$conf_file" && "$FORCE" -eq 0 ]]; then + info "Config already exists (use --force to overwrite): ${conf_file}" + else + local team_name="${RUNEVAULT_TEAM_NAME:-}" + local envector_endpoint="${RUNEVAULT_ENVECTOR_ENDPOINT:-}" + local envector_api_key="${RUNEVAULT_ENVECTOR_API_KEY:-}" + local envector_api_key_file="${RUNEVAULT_ENVECTOR_API_KEY_FILE:-}" + local team_secret="${RUNEVAULT_TEAM_SECRET:-}" + + if [[ "$NON_INTERACTIVE" -eq 0 ]]; then + printf '\n' + printf '══════════════════════════════════════════════════════════\n' + printf ' Vault configuration\n' + printf '══════════════════════════════════════════════════════════\n' + printf '\n' + [[ -z "$team_name" ]] \ + && read -r -p "Team name (vault index identifier): " team_name + [[ -z "$envector_endpoint" ]] \ + && read -r -p "enVector endpoint URL: " envector_endpoint + if [[ -z "$envector_api_key" && -z "$envector_api_key_file" ]]; then + read -r -p "enVector API key: " envector_api_key + fi + printf '\n' else - check_prerequisites_csp "$DEPLOY_TARGET" + local missing=() + [[ -z "$team_name" ]] && missing+=("RUNEVAULT_TEAM_NAME") + [[ -z "$envector_endpoint" ]] && missing+=("RUNEVAULT_ENVECTOR_ENDPOINT") + [[ -z "$envector_api_key" && -z "$envector_api_key_file" ]] \ + && missing+=("RUNEVAULT_ENVECTOR_API_KEY or RUNEVAULT_ENVECTOR_API_KEY_FILE") + if [[ ${#missing[@]} -gt 0 ]]; then + printf 'ERROR: Missing required env vars for non-interactive install:\n' >&2 + for v in "${missing[@]}"; do printf ' %s\n' "$v" >&2; done + exit 1 + fi fi - # 3. Install directory - prompt_install_dir + if [[ -z "$team_secret" ]]; then + team_secret=$(LC_ALL=C tr -dc 'a-f0-9' < /dev/urandom | head -c 64; true) + fi - # 4. Common settings - prompt_tls_mode - generate_team_secret - prompt_envector_config + [[ -n "$team_name" ]] || die "team_name is required." + [[ -n "$envector_endpoint" ]] || die "envector_endpoint is required." + [[ -n "$envector_api_key" || -n "$envector_api_key_file" ]] \ + || die "enVector API key or key file is required." - # 5. CSP-specific settings - if [ "$DEPLOY_TARGET" != "local" ]; then - prompt_csp_config + local api_key_line + if [[ -n "$envector_api_key_file" ]]; then + api_key_line=" api_key_file: ${envector_api_key_file}" + else + api_key_line=" api_key: ${envector_api_key}" fi - # 6. Confirm - show_confirmation + info "Writing ${conf_file}..." + printf '%s\n' \ + "server:" \ + " grpc:" \ + " host: 0.0.0.0" \ + " port: ${GRPC_PORT}" \ + " tls:" \ + " cert: ${INSTALL_PREFIX}/certs/server.pem" \ + " key: ${INSTALL_PREFIX}/certs/server.key" \ + " disable: false" \ + " admin:" \ + " socket: ${INSTALL_PREFIX}/admin.sock" \ + "" \ + "keys:" \ + " path: ${INSTALL_PREFIX}/vault-keys" \ + " index_name: ${team_name}" \ + " embedding_dim: 1024" \ + "" \ + "envector:" \ + " endpoint: ${envector_endpoint}" \ + "${api_key_line}" \ + "" \ + "tokens:" \ + " team_secret: ${team_secret}" \ + " roles_file: ${INSTALL_PREFIX}/configs/roles.yml" \ + " tokens_file: ${INSTALL_PREFIX}/configs/tokens.yml" \ + "" \ + "audit:" \ + " mode: file+stdout" \ + " path: ${INSTALL_PREFIX}/logs/audit.log" \ + > "$conf_file" + chmod 0640 "$conf_file" + [[ "$SKIP_SERVICE" -eq 0 ]] && chown "${SERVICE_USER}:${SERVICE_USER}" "$conf_file" + + fi + + # roles.yml + local roles_file="${INSTALL_PREFIX}/configs/roles.yml" + if [[ ! -f "$roles_file" || "$FORCE" -eq 1 ]]; then + printf '%s\n' \ + "roles:" \ + " admin:" \ + " scope:" \ + " - get_public_key" \ + " - decrypt_scores" \ + " - decrypt_metadata" \ + " - manage_tokens" \ + " top_k: 50" \ + " rate_limit: 150/60s" \ + " member:" \ + " scope:" \ + " - get_public_key" \ + " - decrypt_scores" \ + " - decrypt_metadata" \ + " top_k: 10" \ + " rate_limit: 30/60s" \ + > "$roles_file" + chmod 0640 "$roles_file" + [[ "$SKIP_SERVICE" -eq 0 ]] && chown "${SERVICE_USER}:${SERVICE_USER}" "$roles_file" + fi + + # tokens.yml + local tokens_file="${INSTALL_PREFIX}/configs/tokens.yml" + if [[ ! -f "$tokens_file" || "$FORCE" -eq 1 ]]; then + printf 'tokens: []\n' > "$tokens_file" + chmod 0640 "$tokens_file" + [[ "$SKIP_SERVICE" -eq 0 ]] && chown "${SERVICE_USER}:${SERVICE_USER}" "$tokens_file" + fi + + success "Configuration written." +} - # 7. Deploy - if [ "$DEPLOY_TARGET" = "local" ]; then - deploy_local - else - deploy_csp +# ── Phase 7: Service installation ───────────────────────────────────────────── +install_service() { + if [[ "$SKIP_SERVICE" -eq 1 ]]; then + info "Skipping service installation (RUNEVAULT_SKIP_SERVICE=1)." + return 0 + fi + + local config_path="${INSTALL_PREFIX}/configs/runevault.conf" + + if [[ "$OS_SLUG" = linux ]]; then + if systemctl is-active --quiet runevault.service 2>/dev/null; then + info "Stopping running runevault service..." + systemctl stop runevault.service + info "Tip: manage the service with: sudo systemctl start|stop|restart runevault" fi + info "Installing systemd service..." + local unit=/etc/systemd/system/runevault.service + printf '%s\n' \ + "[Unit]" \ + "Description=Rune-Vault FHE gRPC Server" \ + "Documentation=https://github.com/${REPO}" \ + "After=network-online.target" \ + "Wants=network-online.target" \ + "" \ + "[Service]" \ + "Type=simple" \ + "User=${SERVICE_USER}" \ + "Group=${SERVICE_USER}" \ + "ExecStart=${BINARY_DEST} daemon start --config ${config_path}" \ + "Restart=on-failure" \ + "RestartSec=5s" \ + "TimeoutStopSec=30s" \ + "StandardOutput=journal" \ + "StandardError=journal" \ + "SyslogIdentifier=runevault" \ + "NoNewPrivileges=true" \ + "PrivateTmp=true" \ + "ProtectSystem=strict" \ + "ProtectHome=true" \ + "ReadWritePaths=${INSTALL_PREFIX}" \ + "ProtectKernelTunables=true" \ + "ProtectKernelModules=true" \ + "ProtectControlGroups=true" \ + "RestrictAddressFamilies=AF_INET AF_INET6 AF_UNIX" \ + "RestrictNamespaces=true" \ + "LockPersonality=true" \ + "MemoryDenyWriteExecute=false" \ + "RestrictRealtime=true" \ + "RestrictSUIDSGID=true" \ + "RemoveIPC=true" \ + "LimitNOFILE=65536" \ + "" \ + "[Install]" \ + "WantedBy=multi-user.target" \ + > "$unit" + chmod 0644 "$unit" + systemctl daemon-reload + systemctl enable runevault.service + systemctl start runevault.service + success "systemd service enabled and started." + + else + info "Installing launchd service..." + local plist=/Library/LaunchDaemons/com.cryptolabinc.runevault.plist + printf '%s\n' \ + '' \ + '' \ + '' \ + '' \ + ' Label' \ + ' com.cryptolabinc.runevault' \ + '' \ + ' ProgramArguments' \ + ' ' \ + " ${BINARY_DEST}" \ + ' daemon' \ + ' start' \ + ' --config' \ + " ${config_path}" \ + ' ' \ + '' \ + ' UserName' \ + " ${SERVICE_USER}" \ + '' \ + ' RunAtLoad' \ + ' ' \ + '' \ + ' KeepAlive' \ + ' ' \ + '' \ + ' ThrottleInterval' \ + ' 10' \ + '' \ + ' StandardOutPath' \ + " ${INSTALL_PREFIX}/logs/runevault.stdout.log" \ + '' \ + ' StandardErrorPath' \ + " ${INSTALL_PREFIX}/logs/runevault.stderr.log" \ + '' \ + ' EnvironmentVariables' \ + ' ' \ + ' PATH' \ + ' /usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin' \ + ' ' \ + '' \ + ' ProcessType' \ + ' Background' \ + '' \ + '' \ + > "$plist" + chmod 0644 "$plist" + chown root "$plist" + launchctl bootout system/com.cryptolabinc.runevault 2>/dev/null || true + launchctl bootstrap system "$plist" + success "launchd service loaded." + fi +} - # 8. Summary - show_summary +# ── Phase 8: Post-install summary ───────────────────────────────────────────── +post_install() { + if [[ "$SKIP_SERVICE" -eq 0 ]]; then + info "Waiting for vault to start..." + local i + for i in $(seq 1 15); do + "$BINARY_DEST" status \ + --config "${INSTALL_PREFIX}/configs/runevault.conf" \ + >/dev/null 2>&1 && { success "Vault is up."; break; } || true + sleep 1 + done + fi + + local public_ip="" + public_ip=$(curl -4 -sf --connect-timeout 5 ifconfig.me 2>/dev/null || true) + + printf '\n' + success "Rune-Vault ${VERSION:-local} installed successfully." + printf '\n' + printf ' Binary: %s\n' "$BINARY_DEST" + printf ' Config: %s\n' "${INSTALL_PREFIX}/configs/runevault.conf" + printf ' CA cert: %s\n' "${INSTALL_PREFIX}/certs/ca.pem" + [[ -n "$public_ip" ]] && printf ' Endpoint: %s:%s\n' "$public_ip" "$GRPC_PORT" + printf '\n' + printf 'Next steps:\n' + printf ' Issue a token: runevault token issue --user --role member\n' + printf ' Check status: runevault status\n' + printf ' View logs: runevault logs\n' + if [[ "$OS_SLUG" = linux ]]; then + printf ' Manage daemon: sudo systemctl start|stop|restart runevault\n' + else + printf ' Manage daemon: sudo launchctl bootout system/com.cryptolabinc.runevault\n' + printf ' sudo launchctl bootstrap system /Library/LaunchDaemons/com.cryptolabinc.runevault.plist\n' + fi + if [[ -n "${SUDO_USER:-}" ]]; then + printf '\n' + printf "NOTE: '%s' was added to the '%s' group.\n" "${SUDO_USER}" "${SERVICE_USER}" + printf ' Re-login (or run: newgrp %s) to apply group membership.\n' "${SERVICE_USER}" + fi + printf '\n' + warn "BACKUP: Keep these safe — they cannot be recovered if lost:" + warn " Rune-Vault Keys: ${INSTALL_PREFIX}/vault-keys/" + warn " Config: ${INSTALL_PREFIX}/configs/runevault.conf" } -main "$@" +# ── Main ─────────────────────────────────────────────────────────────────────── +resolve_target +[[ "$UNINSTALL" -eq 1 ]] && { run_uninstall; exit 0; } +[[ "$TARGET" != "local" ]] && csp_dispatch + +preflight +download_and_verify +setup_system +generate_tls_certs +collect_and_write_config +install_service +post_install diff --git a/pyproject.toml b/pyproject.toml deleted file mode 100644 index b2a9db7..0000000 --- a/pyproject.toml +++ /dev/null @@ -1,14 +0,0 @@ -[project] -name = "rune-vault" -requires-python = ">=3.12" - -[tool.ruff] -line-length = 100 -target-version = "py312" - -[tool.ruff.lint] -select = ["E", "F", "I", "W"] - -[tool.pytest.ini_options] -testpaths = ["tests"] -asyncio_mode = "auto" diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 3f1e84e..0000000 --- a/requirements.txt +++ /dev/null @@ -1,24 +0,0 @@ -# Rune-Admin Dependencies - -# FHE Encryption SDK -pyenvector>=1.2.0 - -# HTTP Client -httpx>=0.24.0 - -# JSON handling -python-json-logger>=2.0.0 - -# enVector MCP Server Dependencies -pydantic[email]>=2.11.7 -python-dotenv>=1.2.1 -fastembed>=0.7.4 -langchain-text-splitters>=1.0.0 -pypdf>=6.4.1 - -# Testing (optional) -pytest>=7.0.0 -pytest-asyncio>=0.18.0 - -# NumPy (required by pyenvector) -numpy>=1.24.0 diff --git a/scripts/install-dev.sh b/scripts/install-dev.sh index f5be62e..48c4c68 100755 --- a/scripts/install-dev.sh +++ b/scripts/install-dev.sh @@ -1,865 +1,680 @@ -#!/bin/bash -# Rune-Vault Interactive Server Setup — Local Development Version -# Uses files from the local working tree instead of downloading from GitHub. -# Usage: sudo bash scripts/install-dev.sh +#!/usr/bin/env bash # -# Build the Docker image first: -# mise run build dev +# Rune-Vault dev installer (sibling of install.sh). +# +# Installs the runevault daemon from your local working tree — never from a +# published release. Use this to verify in-progress source code on your local +# machine or on a CSP VM (AWS, GCP, OCI) before cutting a release. +# +# Usage: +# sudo bash scripts/install-dev.sh [options] +# +# Options: +# --target Install/uninstall target (default: prompt if TTY, else local) +# --install-dir CSP install dir (default: $HOME/rune-vault-) +# --prefix Local-only: rootless test prefix +# --non-interactive Skip all prompts; supply secrets via env vars +# --uninstall Forward uninstall to install.sh (local or CSP target) +# --force Forwarded to install.sh (local target only) +# +# Differences from install.sh: +# - Always installs from the local working tree (no GitHub release download). +# - For CSP targets, builds linux/amd64 in Docker (golang:1.25-bookworm) with +# --platform linux/amd64 — works on any host arch via qemu emulation. +# - cloud-init-dev / startup-script-dev only prepare the VM; install.sh runs +# over SSH after cloud-init finishes. +# +# Non-interactive env vars (CSP install — operator workstation): +# RUNEVAULT_ENVECTOR_ENDPOINT enVector endpoint URL (required) +# RUNEVAULT_ENVECTOR_API_KEY enVector API key (required) +# RUNEVAULT_TEAM_NAME Team name (required) +# RUNEVAULT_TARGET Pre-select target without interactive menu +# RUNEVAULT_INSTALL_DIR Pre-set CSP install directory +# RUNEVAULT_CSP_REGION Cloud region +# RUNEVAULT_GCP_PROJECT_ID GCP: project ID (required for GCP) +# RUNEVAULT_OCI_COMPARTMENT_ID OCI: compartment OCID (required for OCI) set -euo pipefail -# ─── Root privilege check ───────────────────────────────────────────────────── - -if [ "$(id -u)" -ne 0 ]; then - echo "Error: This script must be run as root. Use: sudo bash scripts/install-dev.sh" - exit 1 +# ── Constants ────────────────────────────────────────────────────────────────── +SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd) +REPO_ROOT=$(cd "${SCRIPT_DIR}/.." && pwd) +LOCAL_BINARY_HOST="${REPO_ROOT}/vault/bin/runevault" +TARGET_OS=linux +TARGET_ARCH=amd64 +LINUX_BINARY="${REPO_ROOT}/vault/bin/runevault-${TARGET_OS}-${TARGET_ARCH}" +BUILDER_IMAGE="golang:1.25-bookworm" +GRPC_PORT=50051 + +# Overridable by env (mirrors install.sh) +TARGET="${RUNEVAULT_TARGET:-}" +INSTALL_DIR_CSP="${RUNEVAULT_INSTALL_DIR:-}" +CSP_PUBLIC_IP="" + +# CSP config (populated by dev_csp_prompt_config) +TEAM_NAME="" +ENVECTOR_ENDPOINT="" +ENVECTOR_API_KEY="" +CSP_REGION="" +GCP_PROJECT_ID="" +OCI_COMPARTMENT_ID="" + +# ── Color helpers (copied from install.sh) ───────────────────────────────────── +if [[ -t 1 ]]; then + _RED='\033[0;31m' _GRN='\033[0;32m' _BLU='\033[0;34m' _YLW='\033[0;33m' _RST='\033[0m' +else + _RED='' _GRN='' _BLU='' _YLW='' _RST='' fi - -# ─── Resolve repo root ────────────────────────────────────────────────────── - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" - -if [ ! -f "$REPO_ROOT/install.sh" ]; then - echo "Error: Cannot find repo root. Run from the repository directory:" - echo " sudo bash scripts/install-dev.sh" - exit 1 -fi - -# ─── Resolve Docker tag from git state ─────────────────────────────────────── - -DOCKER_IMAGE="ghcr.io/cryptolabinc/rune-vault" -GIT_BRANCH="$(git -C "$REPO_ROOT" rev-parse --abbrev-ref HEAD | sed 's|/|-|g')" -GIT_COMMIT="$(git -C "$REPO_ROOT" rev-parse --short HEAD)" -DOCKER_TAG="${GIT_BRANCH}-${GIT_COMMIT}" -_user_home="${SUDO_USER:+$(eval echo ~"$SUDO_USER")}" -DEFAULT_INSTALL_DIR="${_user_home:-$HOME}/rune-vault-dev" -VAULT_PUBLIC_IP="" -CSP_CA_CERT_LOCAL="" - -# ─── Colors & output helpers ───────────────────────────────────────────────── - -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -BLUE='\033[0;34m' -BOLD='\033[1m' -NC='\033[0m' - -print_header() { - echo -e "\n${BLUE}================================================${NC}" - echo -e "${BLUE} $1${NC}" - echo -e "${BLUE}================================================${NC}\n" +die() { printf "${_RED}ERROR:${_RST} %s\n" "$*" >&2; exit 1; } +info() { printf "${_BLU}==>${_RST} %s\n" "$*"; } +success() { printf "${_GRN}✓${_RST} %s\n" "$*"; } +warn() { printf "${_YLW}WARNING:${_RST} %s\n" "$*" >&2; } + +# ── Argument parsing ─────────────────────────────────────────────────────────── +PREFIX="" +NON_INTERACTIVE=0 +UNINSTALL=0 +PASSTHROUGH_ARGS=() + +while [[ $# -gt 0 ]]; do + case $1 in + --target) TARGET="$2"; shift 2 ;; + --install-dir) INSTALL_DIR_CSP="$2"; shift 2 ;; + --prefix) PREFIX="$2"; shift 2 ;; + --non-interactive) NON_INTERACTIVE=1; PASSTHROUGH_ARGS+=("$1"); shift ;; + --uninstall) UNINSTALL=1; shift ;; + --force) PASSTHROUGH_ARGS+=("$1"); shift ;; + *) PASSTHROUGH_ARGS+=("$1"); shift ;; + esac +done + +# Auto-set non-interactive when stdin is not a TTY +[[ -t 0 ]] || NON_INTERACTIVE=1 + +# ── Platform detection ───────────────────────────────────────────────────────── +case "$(uname -s)" in + Linux) HOST_OS=linux ;; + Darwin) HOST_OS=darwin ;; + *) die "Unsupported host OS: $(uname -s). Only Linux and macOS are supported." ;; +esac +case "$(uname -m)" in + x86_64|amd64) HOST_ARCH=amd64 ;; + arm64|aarch64) HOST_ARCH=arm64 ;; + *) die "Unsupported host architecture: $(uname -m)." ;; +esac + +# ── Banner ───────────────────────────────────────────────────────────────────── +print_banner() { + local commit + commit=$(cd "$REPO_ROOT" && git rev-parse --short HEAD 2>/dev/null || echo unknown) + printf '\n' + printf ' ╭───────────────────────────────────────────────────────────────────╮\n' + printf ' │ Rune-Vault dev installer │\n' + printf ' │ Source: local working tree (not a published release) │\n' + printf ' │ Commit: %-56s │\n' "$commit" + printf ' ╰───────────────────────────────────────────────────────────────────╯\n' + printf '\n' } -print_info() { echo -e "${GREEN}✓${NC} $1"; } -print_warn() { echo -e "${YELLOW}⚠${NC} $1"; } -print_error() { echo -e "${RED}✗${NC} $1"; } -print_step() { echo -e "\n${BOLD}▸ $1${NC}\n"; } - -# ─── Cleanup trap ───────────────────────────────────────────────────────────── - -CLEANUP_DIR="" -cleanup() { - printf '\033[?25h' >&2 2>/dev/null || true - if [ -n "$CLEANUP_DIR" ] && [ -d "$CLEANUP_DIR" ]; then - rm -rf "$CLEANUP_DIR" - fi +# ── Helpers (mirror install.sh) ─────────────────────────────────────────────── +_prompt() { + local varname=$1 label=$2 default=${3:-} + [[ -n "${!varname:-}" ]] && return 0 + local val + if [[ -n "$default" ]]; then + read -r -p "${label} [${default}]: " val + printf -v "$varname" '%s' "${val:-$default}" + else + read -r -p "${label}: " val + printf -v "$varname" '%s' "$val" + fi } -trap cleanup EXIT -# ─── Prompt helper ──────────────────────────────────────────────────────────── +# Escape for embedding inside a double-quoted Terraform string. +escape_tf() { printf '%s' "$1" | sed 's/\\/\\\\/g; s/"/\\"/g'; } -prompt() { - local varname="$1" message="$2" default="${3:-}" - if [ -n "$default" ]; then - printf "${BOLD}%s${NC} [%s]: " "$message" "$default" >&2 - else - printf "${BOLD}%s${NC}: " "$message" >&2 - fi - local value - read -r value - value="${value:-$default}" - eval "$varname=\"\$value\"" +# Escape for embedding inside a single-quoted shell argument. +# Replaces every ' with '\''. +escape_single() { + local s=$1 + printf '%s' "${s//\'/\'\\\'\'}" } -prompt_yn() { - local message="$1" default="${2:-y}" - local value - if [ "$default" = "y" ]; then - printf "${BOLD}%s${NC} [Y/n]: " "$message" >&2 - else - printf "${BOLD}%s${NC} [y/N]: " "$message" >&2 - fi - read -r value - value="${value:-$default}" - case "$value" in - [Yy]*) return 0 ;; - *) return 1 ;; +# ── Target resolution (mirror install.sh:198–226) ───────────────────────────── +resolve_target() { + if [[ -n "${TARGET:-}" ]]; then + case "$TARGET" in + local|aws|gcp|oci) ;; + *) die "Invalid --target value: ${TARGET}. Valid: local, aws, gcp, oci." ;; esac -} - -# ─── Arrow-key menu selector ──────────────────────────────────────────────── - -select_menu() { - local options=("$@") - local count=${#options[@]} - local _sel=0 - - # Fallback: plain number input when terminal is dumb or unset - if [ -z "${TERM:-}" ] || [ "$TERM" = "dumb" ]; then - local i - for i in "${!options[@]}"; do - printf " %d) %s\n" "$((i + 1))" "${options[$i]}" >&2 - done - echo "" >&2 - local choice - printf "${BOLD}Select${NC} [1]: " >&2 - read -r choice - choice="${choice:-1}" - if [ "$choice" -ge 1 ] 2>/dev/null && [ "$choice" -le "$count" ] 2>/dev/null; then - echo "$((choice - 1))" - else - print_error "Invalid selection."; exit 1 - fi - return - fi - - # ── Draw the menu ── - _draw_menu() { - local i - for i in "${!options[@]}"; do - if [ "$i" -eq "$_sel" ]; then - printf " ${GREEN}${BOLD}> %s${NC}\n" "${options[$i]}" >&2 - else - printf " %s\n" "${options[$i]}" >&2 - fi - done - } - - # ── Move cursor up to redraw ── - _erase_menu() { - local i - for (( i = 0; i < count; i++ )); do - printf '\033[1A\033[2K' >&2 - done - } - - printf '\033[?25l' >&2 # hide cursor - printf " ${BOLD}↑↓ move Enter confirm${NC}\n" >&2 - _draw_menu - - while true; do - local key="" - IFS= read -rsn1 key - if [ "$key" = $'\x1b' ]; then - local seq="" - IFS= read -rsn2 -t 1 seq || true - case "$seq" in - '[A') # Up arrow - if [ "$_sel" -gt 0 ]; then - _sel=$((_sel - 1)) - else - _sel=$((count - 1)) - fi - ;; - '[B') # Down arrow - if [ "$_sel" -lt $((count - 1)) ]; then - _sel=$((_sel + 1)) - else - _sel=0 - fi - ;; - esac - _erase_menu - _draw_menu - elif [ "$key" = "" ]; then - # Enter key - break - elif [ "$key" -ge 1 ] 2>/dev/null && [ "$key" -le "$count" ] 2>/dev/null; then - # Number key direct jump - _sel=$((key - 1)) - _erase_menu - _draw_menu - fi - done - - printf '\033[?25h' >&2 # show cursor - - echo "$_sel" -} - -# ─── Local file copy helper (replaces download_file) ──────────────────────── - -copy_local_file() { - local src="$1" dest="$2" - if [ ! -f "$src" ]; then - print_error "Local file not found: $src" - exit 1 - fi - cp "$src" "$dest" -} - -# ─── Prerequisite checks ───────────────────────────────────────────────────── - -check_command() { - local cmd="$1" install_hint="$2" - if ! command -v "$cmd" &>/dev/null; then - print_error "'$cmd' is not installed." - echo " Install: $install_hint" - return 1 - fi - print_info "$cmd found" return 0 -} - -check_prerequisites_local() { - print_step "Checking prerequisites..." - local missing=0 - check_command mise "https://mise.jdx.dev" || missing=1 - check_command docker "https://docs.docker.com/get-docker/" || missing=1 - check_command openssl "apt install openssl / brew install openssl" || missing=1 - - # docker compose (v2 plugin) - if ! docker compose version &>/dev/null 2>&1; then - print_error "'docker compose' (v2 plugin) is not available." - echo " Install: https://docs.docker.com/compose/install/" - missing=1 - else - print_info "docker compose found" - fi - - if [ "$missing" -eq 1 ]; then - echo "" - print_error "Please install the missing prerequisites and re-run." - exit 1 - fi - - # Check Docker daemon - if ! docker info &>/dev/null 2>&1; then - print_error "Cannot connect to Docker daemon. Is Docker running?" - echo " Fix: systemctl start docker" - exit 1 - fi - - (cd "$REPO_ROOT" && mise trust) -} - -check_prerequisites_csp() { - local provider="$1" - print_step "Checking prerequisites..." - - local missing=0 - check_command mise "https://mise.jdx.dev" || missing=1 - check_command terraform "https://developer.hashicorp.com/terraform/install" || missing=1 - check_command openssl "apt install openssl / brew install openssl" || missing=1 - check_command gh "https://cli.github.com/" || missing=1 - check_command docker "https://docs.docker.com/get-docker/" || missing=1 - - case "$provider" in - aws) check_command aws "https://aws.amazon.com/cli/" || missing=1 ;; - gcp) check_command gcloud "https://cloud.google.com/sdk/docs/install" || missing=1 ;; - oci) check_command oci "https://docs.oracle.com/en-us/iaas/Content/API/SDKDocs/cliinstall.htm" || missing=1 ;; + fi + if [[ "$NON_INTERACTIVE" -eq 0 && -t 0 ]]; then + local action="install" + [[ "$UNINSTALL" -eq 1 ]] && action="uninstall" + printf ' Select %s target:\n' "$action" + printf ' 1) Local (this machine)\n' + printf ' 2) AWS\n' + printf ' 3) GCP\n' + printf ' 4) OCI\n' + printf '\n' + local choice + read -r -p " Choice [1]: " choice + case "${choice:-1}" in + 1|local) TARGET=local ;; + 2|aws) TARGET=aws ;; + 3|gcp) TARGET=gcp ;; + 4|oci) TARGET=oci ;; + *) die "Invalid choice: ${choice}" ;; esac - - if [ "$missing" -eq 1 ]; then - echo "" - print_error "Please install the missing prerequisites and re-run." - exit 1 - fi - - (cd "$REPO_ROOT" && mise trust) + else + TARGET=local + fi } -# ─── Interactive prompts ───────────────────────────────────────────────────── +# ── Preflight ────────────────────────────────────────────────────────────────── +dev_preflight() { + info "Running dev preflight checks..." -choose_deploy_target() { - print_step "Select deployment target" - local options=("Local (This machine)" "AWS (requires GHCR access)" "GCP (requires GHCR access)" "OCI (requires GHCR access)") - local targets=("local" "aws" "gcp" "oci") - local selected - selected=$(select_menu "${options[@]}") - DEPLOY_TARGET="${targets[$selected]}" - print_info "Deployment target: ${DEPLOY_TARGET}" -} + # Rootless local test (--prefix) is the one path that doesn't require sudo. + if [[ "$TARGET" != "local" || -z "$PREFIX" ]]; then + [[ "$(id -u)" -eq 0 ]] || die "This installer must be run as root (use sudo)." + fi -prompt_install_dir() { - print_step "Installation directory" - local default_dir="$DEFAULT_INSTALL_DIR" - if [ "$DEPLOY_TARGET" != "local" ]; then - default_dir="$HOME/rune-vault-${DEPLOY_TARGET}" - echo " Terraform files, state, and SSH keys are stored here." - echo " Keep this directory to manage (update/destroy) your deployment." - echo "" - fi - prompt INSTALL_DIR "Directory" "$default_dir" -} + [[ -d "${REPO_ROOT}/vault" ]] \ + || die "vault/ directory not found under ${REPO_ROOT}. Run from a clone of rune-admin." -prompt_tls_mode() { - print_step "TLS configuration" - local options=("Generate self-signed certificate" "No TLS (not recommended)") - local modes=("self-signed" "none") - local selected - selected=$(select_menu "${options[@]}") - TLS_MODE="${modes[$selected]}" - - if [ "$TLS_MODE" = "self-signed" ]; then - echo "" - prompt TLS_HOSTNAME "Domain name for the certificate (leave empty if none)" "" - fi + local missing=() + for tool in git mise; do + command -v "$tool" >/dev/null 2>&1 || missing+=("$tool") + done + [[ ${#missing[@]} -gt 0 ]] && die "Missing required tools: ${missing[*]}" - if [ "$TLS_MODE" = "none" ]; then - print_warn "Running without TLS. gRPC traffic will be unencrypted." - print_warn "This is NOT recommended for production." - fi + if [[ "$TARGET" != "local" ]]; then + [[ -z "$PREFIX" ]] || die "--prefix is local-only." + dev_check_docker + fi - print_info "TLS mode: ${TLS_MODE}" + success "Preflight passed." } -prompt_envector_config() { - print_step "enVector Cloud configuration" - echo " Create your enVector cluster at https://envector.io before proceeding." - echo " You will need the endpoint URL and API key from the dashboard." - echo " Index name is used to store and retrieve your team's organizational memory." - echo "" - prompt ENVECTOR_ENDPOINT "enVector endpoint (e.g. cluster-id.clusters.envector.io)" - prompt ENVECTOR_API_KEY "enVector API key (e.g. aBcDE_12345_xxxxx)" - prompt VAULT_INDEX_NAME "Index name" "runecontext" - - if [ -z "$ENVECTOR_ENDPOINT" ] || [ -z "$ENVECTOR_API_KEY" ]; then - print_error "enVector endpoint and API key are required." - exit 1 - fi - if [ -z "$VAULT_INDEX_NAME" ]; then - print_error "Index name is required." - exit 1 - fi - print_info "enVector endpoint: ${ENVECTOR_ENDPOINT}" +dev_check_docker() { + command -v docker >/dev/null 2>&1 \ + || die "docker is required for CSP targets. Install Docker Desktop / Docker Engine and retry." + + local docker_user="${SUDO_USER:-$(id -un)}" + if ! sudo -u "$docker_user" -H bash -lc 'docker info' >/dev/null 2>&1; then + die "docker daemon is not reachable for user '${docker_user}'. Start Docker (Docker Desktop / 'colima start' / 'systemctl start docker') and retry." + fi + + # Cross-arch builder probe — fails fast if binfmt handlers are missing. + if ! sudo -u "$docker_user" -H bash -lc \ + "docker run --rm --platform ${TARGET_OS}/${TARGET_ARCH} alpine:latest true" >/dev/null 2>&1; then + die "docker cannot run ${TARGET_OS}/${TARGET_ARCH} images. Install qemu binfmt handlers: + docker run --rm --privileged tonistiigi/binfmt --install all" + fi } -prompt_csp_config() { - prompt TEAM_NAME "Team name (used for resource naming)" "default" - - case "$DEPLOY_TARGET" in - aws) - prompt CSP_REGION "AWS region" "us-east-1" - ;; - gcp) - prompt CSP_REGION "GCP region" "us-central1" - prompt GCP_PROJECT_ID "GCP project ID" - if [ -z "$GCP_PROJECT_ID" ]; then - print_error "GCP project ID is required."; exit 1 - fi - ;; - oci) - prompt CSP_REGION "OCI region" "us-ashburn-1" - prompt OCI_COMPARTMENT_ID "OCI compartment OCID" - if [ -z "$OCI_COMPARTMENT_ID" ]; then - print_error "OCI compartment OCID is required."; exit 1 - fi - ;; - esac +# ── Build ────────────────────────────────────────────────────────────────────── +dev_build_local_binary() { + info "Building runevault for host (${HOST_OS}/${HOST_ARCH})..." + local build_user="${SUDO_USER:-$(id -un)}" + (cd "$REPO_ROOT" && sudo -u "$build_user" -H bash -lc 'mise run go:build') + [[ -x "$LOCAL_BINARY_HOST" ]] || die "Build did not produce ${LOCAL_BINARY_HOST}." + success "Built: ${LOCAL_BINARY_HOST}" } -generate_team_secret() { - VAULT_TEAM_SECRET_VALUE="evt_$(openssl rand -hex 32)" - print_info "Team secret generated." +dev_build_linux_binary() { + info "Building runevault for ${TARGET_OS}/${TARGET_ARCH} via Docker (${BUILDER_IMAGE})..." + local build_user="${SUDO_USER:-$(id -un)}" + local user_home commit version date pkg + user_home="${SUDO_USER:+$(eval echo ~"${SUDO_USER}")}" + user_home="${user_home:-$HOME}" + commit=$(cd "$REPO_ROOT" && git rev-parse --short HEAD 2>/dev/null || echo none) + version=dev + date=$(date -u +%Y-%m-%dT%H:%M:%SZ) + pkg="github.com/CryptoLabInc/rune-admin/vault/internal/commands" + + local ldflags="-X '${pkg}.buildVersion=${version}' -X '${pkg}.buildCommit=${commit}' -X '${pkg}.buildDate=${date}'" + local out_rel="bin/runevault-${TARGET_OS}-${TARGET_ARCH}" + + mkdir -p "${user_home}/go/pkg/mod" + mkdir -p "${REPO_ROOT}/vault/bin" + [[ -n "${SUDO_USER:-}" ]] && chown "${SUDO_USER}" "${REPO_ROOT}/vault/bin" + + # Run docker as the invoking user so written files are owned correctly and + # the user's go module cache is reused for speed. + sudo -u "$build_user" -H docker run --rm \ + --platform "${TARGET_OS}/${TARGET_ARCH}" \ + -v "${REPO_ROOT}/vault:/src" \ + -v "${user_home}/go/pkg/mod:/go/pkg/mod" \ + -w /src \ + -e CGO_ENABLED=1 \ + -e LDFLAGS="$ldflags" \ + -e OUTPUT="$out_rel" \ + "${BUILDER_IMAGE}" \ + bash -c ' + set -e + apt-get update -qq && apt-get install -y -qq libssl-dev >/dev/null + go build -ldflags "$LDFLAGS" -o "$OUTPUT" ./cmd + ' || die "Docker build failed." + + [[ -x "$LINUX_BINARY" ]] || die "Build did not produce ${LINUX_BINARY}." + success "Built: ${LINUX_BINARY}" } -generate_config_files() { - local dir="$1" - - cat > "$dir/vault-roles.yml" <<'ROLESEOF' -roles: - admin: - scope: [get_public_key, decrypt_scores, decrypt_metadata, manage_tokens] - top_k: 50 - rate_limit: 150/60s - member: - scope: [get_public_key, decrypt_scores, decrypt_metadata] - top_k: 10 - rate_limit: 30/60s -ROLESEOF - - cat > "$dir/vault-tokens.yml" <<'TOKENSEOF' -tokens: [] -TOKENSEOF - - chmod 600 "$dir/vault-roles.yml" "$dir/vault-tokens.yml" - print_info "Token/role config files created." +# ── Local config prompts (mirror dev_csp_prompt_config) ─────────────────────── +dev_local_prompt_config() { + if [[ "$NON_INTERACTIVE" -eq 0 ]]; then + printf '\n' + printf '══════════════════════════════════════════════════════════\n' + printf ' Local install configuration (dev mode)\n' + printf '══════════════════════════════════════════════════════════\n' + printf '\n' + + _prompt RUNEVAULT_TEAM_NAME "Team name" "devteam" + _prompt RUNEVAULT_ENVECTOR_ENDPOINT "enVector endpoint" "" + _prompt RUNEVAULT_ENVECTOR_API_KEY "enVector API key" "" + printf '\n' + + [[ -n "${RUNEVAULT_ENVECTOR_ENDPOINT:-}" ]] || die "enVector endpoint is required." + [[ -n "${RUNEVAULT_ENVECTOR_API_KEY:-}" ]] || die "enVector API key is required." + else + RUNEVAULT_TEAM_NAME="${RUNEVAULT_TEAM_NAME:-devteam}" + RUNEVAULT_ENVECTOR_ENDPOINT="${RUNEVAULT_ENVECTOR_ENDPOINT:-https://envector.example.com}" + RUNEVAULT_ENVECTOR_API_KEY="${RUNEVAULT_ENVECTOR_API_KEY:-dev-api-key-placeholder}" + fi } -setup_runevault_alias() { - if [ -z "${SUDO_USER:-}" ]; then - return - fi +# ── Local install branch ────────────────────────────────────────────────────── +dev_local_install() { + dev_build_local_binary + dev_local_prompt_config - # Add user to docker group - if command -v usermod >/dev/null 2>&1; then - usermod -aG docker "$SUDO_USER" 2>/dev/null || true - fi + export RUNEVAULT_LOCAL_BINARY="$LOCAL_BINARY_HOST" + export RUNEVAULT_TEAM_NAME + export RUNEVAULT_ENVECTOR_ENDPOINT + export RUNEVAULT_ENVECTOR_API_KEY - # Detect shell config - local user_home - user_home="$(eval echo ~"$SUDO_USER")" - local shell_rc="" - if [ -f "$user_home/.zshrc" ]; then - shell_rc="$user_home/.zshrc" - elif [ -f "$user_home/.bashrc" ]; then - shell_rc="$user_home/.bashrc" - fi + if [[ -n "$PREFIX" ]]; then + export RUNEVAULT_INSTALL_PREFIX="$PREFIX" + export RUNEVAULT_BINARY_PATH="${PREFIX}/runevault" + export RUNEVAULT_SKIP_SERVICE=1 + fi - if [ -n "$shell_rc" ]; then - if ! grep -q 'alias runevault=' "$shell_rc" 2>/dev/null; then - echo '' >> "$shell_rc" - echo '# Rune-Vault admin CLI' >> "$shell_rc" - echo 'alias runevault="docker exec -it rune-vault python3 /app/vault_admin_cli.py"' >> "$shell_rc" - print_info "runevault alias added to ${shell_rc}" - print_warn "Run 'exec \$SHELL' to reload your shell and enable the runevault command." - fi - fi + exec bash "${REPO_ROOT}/install.sh" --target local "${PASSTHROUGH_ARGS[@]+"${PASSTHROUGH_ARGS[@]}"}" } -# ─── Confirmation summary ──────────────────────────────────────────────────── - -show_confirmation() { - print_header "Configuration Summary (DEV — local build)" - echo " Deployment target : ${DEPLOY_TARGET}" - echo " Install directory : ${INSTALL_DIR}" - echo " Docker image : ${DOCKER_IMAGE}:${DOCKER_TAG} (local)" - echo " Repo root : ${REPO_ROOT}" - echo " TLS mode : ${TLS_MODE}" - [ -n "${TLS_HOSTNAME:-}" ] && echo " TLS domain : ${TLS_HOSTNAME}" - echo " Team secret : (auto-generated in .env)" - echo " enVector endpoint : ${ENVECTOR_ENDPOINT}" - echo " Index name : ${VAULT_INDEX_NAME}" - if [ "$DEPLOY_TARGET" != "local" ]; then - echo " Team name : ${TEAM_NAME}" - echo " Region : ${CSP_REGION}" - [ "${DEPLOY_TARGET}" = "gcp" ] && echo " GCP project : ${GCP_PROJECT_ID}" - [ "${DEPLOY_TARGET}" = "oci" ] && echo " OCI compartment : ${OCI_COMPARTMENT_ID}" - fi - echo "" +# ── Uninstall forward ───────────────────────────────────────────────────────── +# install-dev.sh defers all uninstall logic to install.sh. install.sh handles +# both local (service + files) and CSP (terraform destroy + dir cleanup). +dev_forward_uninstall() { + info "Forwarding uninstall to install.sh (target: ${TARGET})..." + local args=(--uninstall --target "$TARGET") + [[ -n "$INSTALL_DIR_CSP" ]] && args+=(--install-dir "$INSTALL_DIR_CSP") + [[ "$NON_INTERACTIVE" -eq 1 ]] && args+=(--non-interactive) + + if [[ "$TARGET" = "local" && -n "$PREFIX" ]]; then + export RUNEVAULT_INSTALL_PREFIX="$PREFIX" + export RUNEVAULT_BINARY_PATH="${PREFIX}/runevault" + fi + + exec bash "${REPO_ROOT}/install.sh" "${args[@]}" +} - if ! prompt_yn "Proceed with deployment?"; then - print_warn "Aborted." - exit 0 - fi +# ── CSP preflight (mirror install.sh:228–285) ───────────────────────────────── +dev_csp_preflight() { + local csp=$1 + info "Running CSP preflight checks for ${csp}..." + + command -v terraform >/dev/null 2>&1 \ + || die "terraform is not installed. Install it (https://developer.hashicorp.com/terraform/install) and retry." + + local csp_cli auth_cmd auth_setup + case "$csp" in + aws) + csp_cli=aws + auth_cmd='aws sts get-caller-identity' + auth_setup='aws configure' + ;; + gcp) + csp_cli=gcloud + auth_cmd='gcloud auth application-default print-access-token' + auth_setup='gcloud auth application-default login' + ;; + oci) + csp_cli=oci + auth_cmd='oci iam region list' + auth_setup='oci setup config' + ;; + esac + + local tf_user="${SUDO_USER:-$(id -un)}" + + if ! sudo -u "$tf_user" -H bash -lc "command -v ${csp_cli}" >/dev/null 2>&1; then + die "'${csp_cli}' CLI not found in PATH for user '${tf_user}'. Install it and re-run." + fi + + if ! sudo -u "$tf_user" -H bash -lc "${auth_cmd}" >/dev/null 2>&1; then + die "'${csp_cli}' is not authenticated for user '${tf_user}'. Authenticate and re-run: ${auth_setup}" + fi + + success "CSP preflight passed." } -# ─── TLS handling ───────────────────────────────────────────────────────────── - -setup_tls() { - local certs_dir="$INSTALL_DIR/certs" - mkdir -p "$certs_dir" - - case "$TLS_MODE" in - self-signed) - print_step "Generating self-signed certificates..." - copy_local_file "$REPO_ROOT/scripts/generate-certs.sh" "$certs_dir/generate-certs.sh" - chmod +x "$certs_dir/generate-certs.sh" - (cd "$certs_dir" && bash generate-certs.sh . "${TLS_HOSTNAME:-localhost}") - TLS_CERT_PATH="$certs_dir/server.pem" - TLS_KEY_PATH="$certs_dir/server.key" - TLS_CA_PATH="$certs_dir/ca.pem" - print_info "Self-signed certificates generated in ${certs_dir}/" - ;; - none) - print_warn "Skipping TLS setup." - TLS_CERT_PATH="" - TLS_KEY_PATH="" - TLS_CA_PATH="" - ;; +# ── CSP config prompts (mirror install.sh:287–347) ──────────────────────────── +dev_csp_prompt_config() { + local csp=$1 + + if [[ "$NON_INTERACTIVE" -eq 0 ]]; then + printf '\n' + printf '══════════════════════════════════════════════════════════\n' + printf ' Cloud deployment configuration (dev mode)\n' + printf '══════════════════════════════════════════════════════════\n' + printf '\n' + + _prompt TEAM_NAME "Team name" "devteam" + _prompt ENVECTOR_ENDPOINT "enVector endpoint" "" + _prompt ENVECTOR_API_KEY "enVector API key" "" + + case "$csp" in + aws) _prompt CSP_REGION "AWS region" "us-east-1" ;; + gcp) + _prompt CSP_REGION "GCP region" "us-central1" + _prompt GCP_PROJECT_ID "GCP project ID" "" + ;; + oci) + _prompt CSP_REGION "OCI region" "us-ashburn-1" + _prompt OCI_COMPARTMENT_ID "OCI compartment OCID" "" + ;; esac + printf '\n' + else + TEAM_NAME="${RUNEVAULT_TEAM_NAME:-}" + ENVECTOR_ENDPOINT="${RUNEVAULT_ENVECTOR_ENDPOINT:-}" + ENVECTOR_API_KEY="${RUNEVAULT_ENVECTOR_API_KEY:-}" + CSP_REGION="${RUNEVAULT_CSP_REGION:-}" + GCP_PROJECT_ID="${RUNEVAULT_GCP_PROJECT_ID:-}" + OCI_COMPARTMENT_ID="${RUNEVAULT_OCI_COMPARTMENT_ID:-}" + + local missing=() + [[ -z "$TEAM_NAME" ]] && missing+=("RUNEVAULT_TEAM_NAME") + [[ -z "$ENVECTOR_ENDPOINT" ]] && missing+=("RUNEVAULT_ENVECTOR_ENDPOINT") + [[ -z "$ENVECTOR_API_KEY" ]] && missing+=("RUNEVAULT_ENVECTOR_API_KEY") + [[ "$csp" = gcp && -z "$GCP_PROJECT_ID" ]] && missing+=("RUNEVAULT_GCP_PROJECT_ID") + [[ "$csp" = oci && -z "$OCI_COMPARTMENT_ID" ]] && missing+=("RUNEVAULT_OCI_COMPARTMENT_ID") + if [[ ${#missing[@]} -gt 0 ]]; then + printf 'ERROR: Missing required env vars:\n' >&2 + for v in "${missing[@]}"; do printf ' %s\n' "$v" >&2; done + exit 1 + fi + fi + + [[ -n "$TEAM_NAME" ]] || die "Team name is required." + [[ -n "$ENVECTOR_ENDPOINT" ]] || die "enVector endpoint is required." + [[ -n "$ENVECTOR_API_KEY" ]] || die "enVector API key is required." + if [[ "$csp" = gcp ]]; then + [[ -n "$GCP_PROJECT_ID" ]] || die "GCP project ID is required." + fi + if [[ "$csp" = oci ]]; then + [[ -n "$OCI_COMPARTMENT_ID" ]] || die "OCI compartment OCID is required." + fi } -# ─── Generate .env file ────────────────────────────────────────────────────── - -generate_env_file() { - local env_file="$INSTALL_DIR/.env" - - cat > "$env_file" <> "$env_file" - else - cat >> "$env_file" </dev/null) || true - rm -rf "$INSTALL_DIR" - print_info "Previous installation removed." - else - print_warn "Aborted." - exit 0 - fi - fi - # Clean up orphaned container/network/volume - local project - project="$(basename "$INSTALL_DIR")" - if docker container inspect rune-vault &>/dev/null; then - print_step "Removing existing rune-vault container..." - docker rm -f rune-vault >/dev/null 2>&1 || true - print_info "Container removed." - fi - docker network rm "${project}_vault-net" >/dev/null 2>&1 || true - docker volume rm "${project}_vault-keys" >/dev/null 2>&1 || true - - # Create directory structure - mkdir -p "$INSTALL_DIR"/{certs,backups,logs} - print_info "Directory structure created: ${INSTALL_DIR}/" - - # Copy docker-compose.yml from local repo - print_step "Copying docker-compose.yml from local repo..." - copy_local_file "$REPO_ROOT/vault/docker-compose.yml" "$INSTALL_DIR/docker-compose.yml" - # Pin image to the local build tag - sed -i.bak "s|image:.*rune-vault:.*|image: ${DOCKER_IMAGE}:${DOCKER_TAG}|" "$INSTALL_DIR/docker-compose.yml" - rm -f "$INSTALL_DIR/docker-compose.yml.bak" - print_info "docker-compose.yml copied." - - # TLS - setup_tls - - # Generate .env and config files - generate_env_file - generate_config_files "$INSTALL_DIR" - - # Restore ownership to the invoking user (files were created as root via sudo) - if [ -n "${SUDO_USER:-}" ]; then - chown -R "$SUDO_USER" "$INSTALL_DIR" - fi - - # Build Docker image from local source - print_step "Building Docker image (tag: ${DOCKER_TAG})..." - (cd "$REPO_ROOT" && mise run build "${DOCKER_TAG}") - print_info "Image built: ${DOCKER_IMAGE}:${DOCKER_TAG}" - - # Start container - print_step "Starting Rune-Vault..." - (cd "$INSTALL_DIR" && docker compose up -d) - print_info "Container started." - - # Health check - print_step "Waiting for Vault to become healthy..." - local elapsed=0 - local timeout=60 - while [ $elapsed -lt $timeout ]; do - if docker exec rune-vault curl -sf http://localhost:8081/health 2>/dev/null; then - print_info "Vault is healthy!" - - # Set up runevault alias for admin CLI - setup_runevault_alias - return 0 - fi - sleep 2 - elapsed=$((elapsed + 2)) - printf "." - done - - echo "" - print_error "Vault did not become healthy within ${timeout}s." - print_warn "Container logs:" - docker logs rune-vault 2>&1 | tail -30 - exit 1 +# ── Terraform files (mirror install.sh:373–399, swap to *-dev variants) ────── +dev_csp_copy_terraform_files() { + local csp=$1 + local tf_src="${REPO_ROOT}/deployment/${csp}" + local tf_dest="${INSTALL_DIR_CSP}/deployment" + mkdir -p "$tf_dest" + + cp "${tf_src}/main.tf" "${tf_dest}/main.tf" + + # Use the *-dev variant of cloud-init / startup-script, but rename to the + # canonical filename so main.tf's templatefile() reference keeps working + # without Terraform changes. + case "$csp" in + aws) + [[ -f "${tf_src}/cloud-init-dev.yaml" ]] \ + || die "Missing ${tf_src}/cloud-init-dev.yaml." + cp "${tf_src}/cloud-init-dev.yaml" "${tf_dest}/cloud-init.yaml" + ;; + gcp|oci) + [[ -f "${tf_src}/startup-script-dev.sh" ]] \ + || die "Missing ${tf_src}/startup-script-dev.sh." + cp "${tf_src}/startup-script-dev.sh" "${tf_dest}/startup-script.sh" + ;; + esac + + printf '*.tfvars\nterraform.tfstate*\n.terraform/\n' > "${INSTALL_DIR_CSP}/.gitignore" + [[ -n "${SUDO_USER:-}" ]] && chown -R "${SUDO_USER}" "$tf_dest" "${INSTALL_DIR_CSP}/.gitignore" + success "Terraform files (dev variant) ready: ${tf_dest}" } -# ─── CSP deployment ─────────────────────────────────────────────────────────── - -deploy_csp() { - local provider="$DEPLOY_TARGET" - print_header "Deploying Rune-Vault (${provider} — DEV)" - - local tf_dir="$INSTALL_DIR/deployment" - mkdir -p "$tf_dir" - # Ensure the original user owns the deployment directory for terraform - if [ -n "${SUDO_USER:-}" ]; then - chown -R "$SUDO_USER" "$INSTALL_DIR" - fi - - # Build and push Docker image to GHCR (remote servers pull from registry) - # Requires GHCR push access to the CryptoLabInc organization. - print_step "Building and pushing Docker image to GHCR..." - echo " CSP deployments pull the image from GHCR, so a push is required." - echo " This requires GHCR push access to the CryptoLabInc organization." - echo "" - if ! gh auth status &>/dev/null; then - print_error "GitHub CLI not authenticated. Run: gh auth login" - exit 1 - fi - (cd "$REPO_ROOT" && mise run push "${DOCKER_TAG}") - print_info "Image pushed: ${DOCKER_IMAGE}:${DOCKER_TAG}" - - # Copy Terraform files from local repo - print_step "Copying Terraform configuration from local repo..." - copy_local_file "$REPO_ROOT/deployment/${provider}/main.tf" "$tf_dir/main.tf" - if [ "$provider" = "aws" ]; then - copy_local_file "$REPO_ROOT/deployment/${provider}/cloud-init.yaml" "$tf_dir/cloud-init.yaml" - sed -i.bak "s|image:.*rune-vault:.*|image: ${DOCKER_IMAGE}:${DOCKER_TAG}|" "$tf_dir/cloud-init.yaml" - rm -f "$tf_dir/cloud-init.yaml.bak" +# ── tfvars (mirror install.sh:403–439) ──────────────────────────────────────── +dev_csp_render_tfvars() { + local csp=$1 + local tf_dir="${INSTALL_DIR_CSP}/deployment" + local tfvars="${tf_dir}/terraform.tfvars" + local public_key="" + + if [[ -f "${tf_dir}/terraform.tfstate" ]]; then + if [[ "$NON_INTERACTIVE" -eq 0 ]]; then + local answer=n + read -r -p "terraform.tfstate already exists in ${tf_dir}. Re-apply? [y/N] " answer + [[ "$answer" =~ ^[Yy] ]] || { info "Aborted."; exit 0; } else - copy_local_file "$REPO_ROOT/deployment/${provider}/startup-script.sh" "$tf_dir/startup-script.sh" - sed -i.bak "s|image:.*rune-vault:.*|image: ${DOCKER_IMAGE}:${DOCKER_TAG}|" "$tf_dir/startup-script.sh" - rm -f "$tf_dir/startup-script.sh.bak" - fi - print_info "Terraform files copied." - - # Generate SSH key pair for EC2 access - local ssh_key_path="$INSTALL_DIR/ssh_key" - if [ ! -f "$ssh_key_path" ]; then - print_step "Generating SSH key pair..." - ssh-keygen -t ed25519 -f "$ssh_key_path" -N "" -q - chmod 600 "$ssh_key_path" - chmod 644 "${ssh_key_path}.pub" - print_info "SSH key generated: ${ssh_key_path}" - fi - local public_key - public_key=$(cat "${ssh_key_path}.pub") - - # Generate terraform.tfvars (use printf to avoid heredoc escaping issues) - print_step "Generating terraform.tfvars..." - escape_tf() { printf '%s' "$1" | sed 's/\\/\\\\/g; s/"/\\"/g'; } - { - printf 'team_secret = "%s"\n' "$(escape_tf "$VAULT_TEAM_SECRET_VALUE")" - printf 'team_name = "%s"\n' "$(escape_tf "$TEAM_NAME")" - printf 'region = "%s"\n' "$(escape_tf "$CSP_REGION")" - printf 'tls_mode = "%s"\n' "$(escape_tf "$TLS_MODE")" - printf 'tls_hostname = "%s"\n' "$(escape_tf "${TLS_HOSTNAME:-}")" - printf 'envector_endpoint = "%s"\n' "$(escape_tf "$ENVECTOR_ENDPOINT")" - printf 'envector_api_key = "%s"\n' "$(escape_tf "$ENVECTOR_API_KEY")" - printf 'vault_index_name = "%s"\n' "$(escape_tf "$VAULT_INDEX_NAME")" - printf 'public_key = "%s"\n' "$(escape_tf "$public_key")" - case "$provider" in - gcp) printf 'project_id = "%s"\n' "$(escape_tf "$GCP_PROJECT_ID")" ;; - oci) printf 'compartment_id = "%s"\n' "$(escape_tf "$OCI_COMPARTMENT_ID")" ;; - esac - } > "$tf_dir/terraform.tfvars" - - chmod 600 "$tf_dir/terraform.tfvars" - if [ -n "${SUDO_USER:-}" ]; then - chown -R "$SUDO_USER" "$INSTALL_DIR" - fi - print_info "terraform.tfvars created." + warn "terraform.tfstate exists — re-applying (idempotent)." + fi + fi + + [[ -f "${INSTALL_DIR_CSP}/ssh_key.pub" ]] \ + && public_key=$(cat "${INSTALL_DIR_CSP}/ssh_key.pub") + + { + printf 'team_name = "%s"\n' "$(escape_tf "${TEAM_NAME:-default}")" + printf 'tls_mode = "self-signed"\n' + printf 'envector_endpoint = "%s"\n' "$(escape_tf "${ENVECTOR_ENDPOINT}")" + printf 'envector_api_key = "%s"\n' "$(escape_tf "${ENVECTOR_API_KEY}")" + printf 'runevault_version = "dev"\n' + printf 'public_key = "%s"\n' "$(escape_tf "${public_key}")" + printf 'region = "%s"\n' "$(escape_tf "${CSP_REGION}")" + case "$csp" in + gcp) printf 'project_id = "%s"\n' "$(escape_tf "${GCP_PROJECT_ID}")" ;; + oci) printf 'compartment_id = "%s"\n' "$(escape_tf "${OCI_COMPARTMENT_ID}")" ;; + esac + } > "$tfvars" - # Terraform init & apply (run as the original user to preserve CLI auth) - print_step "Running Terraform..." - local tf_run="terraform" - if [ -n "${SUDO_USER:-}" ]; then - tf_run="sudo -u $SUDO_USER terraform" - fi - (cd "$tf_dir" && $tf_run init) - (cd "$tf_dir" && $tf_run apply -auto-approve) - - # Capture outputs - VAULT_PUBLIC_IP=$(cd "$tf_dir" && $tf_run output -raw vault_public_ip 2>/dev/null) || true - local vault_url - vault_url=$(cd "$tf_dir" && $tf_run output -raw vault_url 2>/dev/null) || true - - print_info "Infrastructure provisioned." - - # Health polling — wait for cloud-init to finish and Vault to start - if [ -n "$VAULT_PUBLIC_IP" ]; then - print_step "Waiting for Vault to become reachable (up to 10 min)..." - local elapsed=0 - local timeout=600 - while [ $elapsed -lt $timeout ]; do - if bash -c "echo >/dev/tcp/${VAULT_PUBLIC_IP}/50051" 2>/dev/null; then - print_info "Vault is reachable at ${VAULT_PUBLIC_IP}:50051!" - - # Download ca.pem from remote server - if [ "$TLS_MODE" = "self-signed" ]; then - mkdir -p "$INSTALL_DIR/certs" - if [ -n "${SUDO_USER:-}" ]; then - chown -R "$SUDO_USER" "$INSTALL_DIR/certs" - fi - local scp_opts="-i $ssh_key_path -o StrictHostKeyChecking=no -o ConnectTimeout=15 -o BatchMode=yes" - local scp_prefix="" - if [ -n "${SUDO_USER:-}" ]; then - scp_prefix="sudo -u $SUDO_USER" - fi - # Retry SCP (SSH may not be ready immediately) - local downloaded=0 - for attempt in 1 2 3; do - sleep 10 - for ssh_user in ubuntu opc; do - if $scp_prefix scp $scp_opts \ - "${ssh_user}@${VAULT_PUBLIC_IP}:/opt/rune/certs/ca.pem" \ - "$INSTALL_DIR/certs/ca.pem" 2>/dev/null; then - downloaded=1; break 2 - fi - done - done - if [ "$downloaded" -eq 1 ]; then - CSP_CA_CERT_LOCAL="$INSTALL_DIR/certs/ca.pem" - print_info "CA certificate downloaded to ${CSP_CA_CERT_LOCAL}" - else - print_warn "Could not download ca.pem via SSH. Retrieve manually:" - echo " scp -i ${ssh_key_path} ubuntu@${VAULT_PUBLIC_IP}:/opt/rune/certs/ca.pem ${INSTALL_DIR}/certs/" - fi - fi - - break - fi - sleep 10 - elapsed=$((elapsed + 10)) - printf "." - done - echo "" - if [ $elapsed -ge $timeout ]; then - print_error "Vault not reachable within ${timeout}s. Cloud-init may still be running." - echo "" - echo " Debug via SSH:" - echo " ssh -i ${ssh_key_path} ubuntu@${VAULT_PUBLIC_IP} 'cloud-init status --wait && docker ps'" - echo "" - echo " Terraform directory: ${tf_dir}" - echo " To destroy resources: cd ${tf_dir} && terraform destroy" - exit 1 - fi - fi + chmod 0600 "$tfvars" + [[ -n "${SUDO_USER:-}" ]] && chown "${SUDO_USER}" "$tfvars" + success "terraform.tfvars written: ${tfvars}" } -# ─── Summary ────────────────────────────────────────────────────────────────── +# ── Terraform apply (mirror install.sh:441–453) ─────────────────────────────── +dev_csp_run_terraform() { + local tf_dir="${INSTALL_DIR_CSP}/deployment" + local tf_user="${SUDO_USER:-$(id -un)}" -show_summary() { - local endpoint - if [ "$DEPLOY_TARGET" = "local" ]; then - if [ "$TLS_MODE" = "none" ]; then - endpoint="localhost:50051" - else - endpoint="localhost:50051 (TLS)" - fi - else - local ip="${VAULT_PUBLIC_IP:-}" - endpoint="${ip}:50051" - fi + info "Running terraform init..." + (cd "$tf_dir" && sudo -u "$tf_user" terraform init -input=false) + info "Running terraform apply..." + (cd "$tf_dir" && sudo -u "$tf_user" terraform apply -auto-approve -input=false) - print_header "Deployment Complete (DEV)" - echo " Vault Endpoint : ${endpoint}" - echo " Docker Image : ${DOCKER_IMAGE}:${DOCKER_TAG} (local build)" - echo " Team Secret : (stored in ${INSTALL_DIR}/.env)" - echo " TLS Mode : ${TLS_MODE}" - if [ "$TLS_MODE" = "self-signed" ] && [ "$DEPLOY_TARGET" = "local" ]; then - echo " CA Certificate : ${INSTALL_DIR}/certs/ca.pem" - fi - echo "" - echo -e "${BOLD}Share with your team:${NC}" - echo "" - echo " Team members will need the following credentials when installing the" - echo " Rune plugin/extension. Share them securely (e.g. encrypted channel):" - echo "" - if [ -n "${TLS_HOSTNAME:-}" ]; then - echo " Endpoint : ${TLS_HOSTNAME}:50051" - elif [ "$DEPLOY_TARGET" != "local" ] && [ -n "${VAULT_PUBLIC_IP:-}" ]; then - echo " Endpoint : ${VAULT_PUBLIC_IP}:50051" - else - echo " Endpoint : :50051" - fi - echo "" - echo " Issue per-user tokens with:" - echo " runevault token issue --user --role member --expires 90d" - echo "" - if [ "$DEPLOY_TARGET" = "local" ]; then - echo " Reload your shell before using the runevault command:" - echo " exec \$SHELL" - echo "" - fi - echo " Each team member uses their individual token for authentication." - echo " Team Secret (above) is only needed for DEK derivation — keep it secure." - if [ "$TLS_MODE" = "self-signed" ]; then - echo "" - echo " Your vault uses a self-signed CA. Team members also need the CA" - echo " certificate file below. Share this file directly — they will be" - echo " prompted to provide its path during plugin/extension setup." - echo "" - if [ -n "${CSP_CA_CERT_LOCAL}" ]; then - echo " CA Cert : ${CSP_CA_CERT_LOCAL}" - elif [ "$DEPLOY_TARGET" = "local" ]; then - echo " CA Cert : ${INSTALL_DIR}/certs/ca.pem" - else - echo " CA Cert : /opt/rune/certs/ca.pem (on the remote server)" - fi - fi - if [ "$DEPLOY_TARGET" != "local" ]; then - echo "" - echo -e "${BOLD}Next steps:${NC}" - echo " 1. Point your domain DNS to ${VAULT_PUBLIC_IP:-}" - echo " 2. To use custom TLS certificates, replace files in /opt/rune/certs/ on the server" - echo " and restart: ssh -i ${INSTALL_DIR}/ssh_key ubuntu@${VAULT_PUBLIC_IP:-} 'cd /opt/rune && docker compose restart'" - echo "" - echo " SSH access: ssh -i ${INSTALL_DIR}/ssh_key ubuntu@${VAULT_PUBLIC_IP:-}" - fi - echo "" - echo "Install directory: ${INSTALL_DIR}" - echo "" + chmod 0600 "${tf_dir}/terraform.tfstate" 2>/dev/null || true + chmod 0600 "${tf_dir}/terraform.tfstate.backup" 2>/dev/null || true + success "Terraform apply complete." } -# ─── main() ────────────────────────────────────────────────────────────────── - -main() { - print_header "Rune-Vault Interactive Setup (DEV)" - echo "Local development installer — uses files from the working tree." - echo "Repo: ${REPO_ROOT}" - echo "" - - # 1. Deployment target - choose_deploy_target - - # 2. Prerequisites - if [ "$DEPLOY_TARGET" = "local" ]; then - check_prerequisites_local - else - check_prerequisites_csp "$DEPLOY_TARGET" - fi - - # 3. Install directory - prompt_install_dir +# ── Upload + remote install (replaces csp_post_deploy for dev mode) ─────────── +dev_csp_upload_and_install() { + local csp=$1 + local tf_dir="${INSTALL_DIR_CSP}/deployment" + local tf_user="${SUDO_USER:-$(id -un)}" + local key_path="${INSTALL_DIR_CSP}/ssh_key" + local ssh_user=ubuntu + + local public_ip + public_ip=$(cd "$tf_dir" && sudo -u "$tf_user" terraform output -raw vault_public_ip 2>/dev/null) \ + || die "Could not read vault_public_ip from terraform output." + CSP_PUBLIC_IP="$public_ip" + + local ssh_opts="-o BatchMode=yes -o StrictHostKeyChecking=no -o ConnectTimeout=15" + local ssh_prefix="sudo -u ${tf_user}" + + # 1. Wait for SSH on the VM. + info "Waiting for SSH on ${ssh_user}@${public_ip} (up to 30 min)..." + local timeout_secs=1800 + local deadline=$(( $(date +%s) + timeout_secs )) + local ssh_ready=0 + while [[ $(date +%s) -lt $deadline ]]; do + # shellcheck disable=SC2086 + if $ssh_prefix ssh $ssh_opts -i "$key_path" "${ssh_user}@${public_ip}" true 2>/dev/null; then + ssh_ready=1 + break + fi + sleep 15 + done + [[ "$ssh_ready" -eq 1 ]] \ + || die "Timed out waiting for SSH. ssh -i ${key_path} ${ssh_user}@${public_ip}" + success "SSH reachable." + + # 2. Wait for cloud-init-dev to finish — sentinel file is touched at end of runcmd. + info "Waiting for cloud-init-dev to finish (apt prereqs + sentinel)..." + deadline=$(( $(date +%s) + 600 )) + local prereqs_ready=0 + while [[ $(date +%s) -lt $deadline ]]; do + # shellcheck disable=SC2086 + if $ssh_prefix ssh $ssh_opts -i "$key_path" "${ssh_user}@${public_ip}" \ + "test -e /var/run/runevault-dev-ready" 2>/dev/null; then + prereqs_ready=1 + break + fi + sleep 10 + done + [[ "$prereqs_ready" -eq 1 ]] \ + || die "Timed out waiting for cloud-init-dev. SSH in to debug: ssh -i ${key_path} ${ssh_user}@${public_ip}" + success "Cloud-init-dev complete." + + # 3. SCP install.sh + linux/amd64 binary to /tmp. + info "Uploading install.sh and runevault binary to ${public_ip}..." + # shellcheck disable=SC2086 + $ssh_prefix scp $ssh_opts -i "$key_path" \ + "${REPO_ROOT}/install.sh" \ + "${LINUX_BINARY}" \ + "${ssh_user}@${public_ip}:/tmp/" \ + || die "SCP upload failed." + success "Artifacts uploaded." + + # 4. Run install.sh on the VM with dev hooks. + info "Running install.sh on the VM..." + local tn ee ek + tn=$(escape_single "$TEAM_NAME") + ee=$(escape_single "$ENVECTOR_ENDPOINT") + ek=$(escape_single "$ENVECTOR_API_KEY") + local remote_cmd + remote_cmd="sudo \ + RUNEVAULT_LOCAL_BINARY=/tmp/runevault-${TARGET_OS}-${TARGET_ARCH} \ + RUNEVAULT_TEAM_NAME='${tn}' \ + RUNEVAULT_ENVECTOR_ENDPOINT='${ee}' \ + RUNEVAULT_ENVECTOR_API_KEY='${ek}' \ + bash /tmp/install.sh --target local --non-interactive --version dev" + + # shellcheck disable=SC2086 + $ssh_prefix ssh $ssh_opts -i "$key_path" "${ssh_user}@${public_ip}" "$remote_cmd" \ + || die "Remote install.sh failed. SSH in to debug: ssh -i ${key_path} ${ssh_user}@${public_ip}" + success "Remote install complete." + + # 5. Pull CA cert back to the operator workstation. + info "Fetching CA certificate..." + mkdir -p "${INSTALL_DIR_CSP}/certs" + [[ -n "${SUDO_USER:-}" ]] && chown "${SUDO_USER}" "${INSTALL_DIR_CSP}/certs" + # shellcheck disable=SC2086 + $ssh_prefix scp $ssh_opts -i "$key_path" \ + "${ssh_user}@${public_ip}:/opt/runevault/certs/ca.pem" \ + "${INSTALL_DIR_CSP}/certs/ca.pem" \ + || die "CA cert fetch failed." + success "CA certificate saved: ${INSTALL_DIR_CSP}/certs/ca.pem" +} - # 4. Common settings - prompt_tls_mode - generate_team_secret - prompt_envector_config +# ── Summary (mirror install.sh:491–518 + dev banner) ────────────────────────── +dev_csp_summary() { + local csp=$1 + local tf_dir="${INSTALL_DIR_CSP}/deployment" + local key_path="${INSTALL_DIR_CSP}/ssh_key" + local public_ip="${CSP_PUBLIC_IP:-}" + local commit + commit=$(cd "$REPO_ROOT" && git rev-parse --short HEAD 2>/dev/null || echo unknown) + + printf '\n' + success "Rune-Vault deployed to $(printf '%s' "$csp" | tr 'a-z' 'A-Z') (dev mode)." + printf '\n' + printf ' Endpoint: %s:%s\n' "$public_ip" "$GRPC_PORT" + printf ' CA cert: %s\n' "${INSTALL_DIR_CSP}/certs/ca.pem" + printf ' SSH: ssh -i %s ubuntu@%s\n' "$key_path" "$public_ip" + printf ' Terraform: %s\n' "$tf_dir" + printf ' Source: local working tree (commit %s)\n' "$commit" + printf '\n' + printf 'Tear down:\n' + printf ' cd %s && terraform destroy -auto-approve\n' "$tf_dir" + printf '\n' + printf 'Next steps (SSH into the VM, then run on the VM):\n' + printf ' ssh -i %s ubuntu@%s\n' "$key_path" "$public_ip" + printf '\n' + printf ' Issue a token: runevault token issue --user --role member\n' + printf ' Check status: runevault status\n' + printf ' View logs: runevault logs\n' + printf ' Manage daemon: sudo systemctl start|stop|restart runevault\n' + printf '\n' + warn "BACKUP: Keep this safe — it cannot be recovered if lost:" + warn " Terraform state: ${tf_dir}/terraform.tfstate" +} - # 5. CSP-specific settings - if [ "$DEPLOY_TARGET" != "local" ]; then - prompt_csp_config - fi +# ── CSP dispatch (mirror install.sh:520–549) ────────────────────────────────── +dev_csp_dispatch() { + local csp="$TARGET" + local user_home="${SUDO_USER:+$(eval echo ~"${SUDO_USER}")}" + user_home="${user_home:-$HOME}" + INSTALL_DIR_CSP="${INSTALL_DIR_CSP:-${user_home}/rune-vault-${csp}}" + mkdir -p "$INSTALL_DIR_CSP" + [[ -n "${SUDO_USER:-}" ]] && chown "${SUDO_USER}" "$INSTALL_DIR_CSP" + + dev_csp_preflight "$csp" + dev_csp_prompt_config "$csp" + dev_csp_generate_ssh_key + dev_build_linux_binary + dev_csp_copy_terraform_files "$csp" + dev_csp_render_tfvars "$csp" + dev_csp_run_terraform + dev_csp_upload_and_install "$csp" + dev_csp_summary "$csp" + exit 0 +} - # 6. Confirm - show_confirmation +# ── Main ─────────────────────────────────────────────────────────────────────── +print_banner +resolve_target - # 7. Deploy - if [ "$DEPLOY_TARGET" = "local" ]; then - deploy_local - else - deploy_csp - fi +[[ "$UNINSTALL" -eq 1 ]] && dev_forward_uninstall - # 8. Summary - show_summary -} +dev_preflight -main "$@" +if [[ "$TARGET" = "local" ]]; then + dev_local_install +else + dev_csp_dispatch +fi diff --git a/tests/FIXTURES.md b/tests/FIXTURES.md index a7ce2d2..7549fe1 100644 --- a/tests/FIXTURES.md +++ b/tests/FIXTURES.md @@ -104,7 +104,7 @@ To run integration tests locally, you need the current passphrase: ```bash export FIXTURES_GPG_PASSPHRASE="" mise run fixtures:decrypt -mise run test +mise run go:test:unit ``` ## Fixture Contents diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py deleted file mode 100644 index 2dfbe6f..0000000 --- a/tests/integration/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Integration tests for Rune-Vault API.""" diff --git a/tests/integration/test_decrypt_pipeline.py b/tests/integration/test_decrypt_pipeline.py deleted file mode 100644 index fa15e91..0000000 --- a/tests/integration/test_decrypt_pipeline.py +++ /dev/null @@ -1,155 +0,0 @@ -""" -Integration tests for the decrypt pipeline. - -Uses pre-generated fixtures from tests/fixtures/ (captured from enVector Cloud) -to test _decrypt_scores_impl and _decrypt_metadata_impl end-to-end. - -Fixtures are generated by: scripts/generate-test-fixtures.py -""" - -import json -import os -import sys - -import pytest - -# Add vault to path -sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../vault")) - -FIXTURES_DIR = os.path.join(os.path.dirname(__file__), "..", "fixtures") - - -def _load_fixture(name: str) -> str | dict | list: - path = os.path.join(FIXTURES_DIR, name) - with open(path) as f: - if name.endswith(".json"): - return json.load(f) - return f.read().strip() - - -@pytest.fixture(scope="module") -def fixtures(): - """Load all fixtures and patch vault_core for testing.""" - config = _load_fixture("config.json") - keys_dir = os.path.join(FIXTURES_DIR, "keys") - return { - "config": config, - "enc_key_path": os.path.join(keys_dir, "EncKey.json"), - "sec_key_path": os.path.join(keys_dir, "SecKey.json"), - "score_blob_b64": _load_fixture("ciphertext_score.b64"), - "expected_scores": _load_fixture("expected_scores.json"), - "envelopes": _load_fixture("metadata_envelopes.json"), - "expected_metadata": _load_fixture("expected_metadata.json"), - } - - -@pytest.fixture(autouse=True) -def patch_vault(fixtures, monkeypatch): - """Patch vault_core to use fixture keys and config.""" - import vault_core - from pyenvector.crypto import Cipher - from token_store import token_store - - config = fixtures["config"] - - monkeypatch.setattr(vault_core, "sec_key_path", fixtures["sec_key_path"]) - monkeypatch.setattr(vault_core, "VAULT_TEAM_SECRET", config["team_secret"]) - - # Reinitialize cipher with fixture SecKey (dim must match fixture FHE dim) - # cipher.decrypt_score needs enc_key_path but only uses sec_key_path for decryption - # Use a dummy enc_key_path since we only decrypt - fixture_cipher = Cipher(enc_key_path=fixtures["enc_key_path"], dim=config["dim"]) - monkeypatch.setattr(vault_core, "cipher", fixture_cipher) - - token_store._rate_limiters.clear() - - -class TestDecryptScores: - - def test_decrypt_scores(self, fixtures): - """Full pipeline: CiphertextScore blob → _decrypt_scores_impl → correct scores.""" - from vault_core import _decrypt_scores_impl - - config = fixtures["config"] - expected = fixtures["expected_scores"] - total_scores = sum(len(row) for row in expected["score"]) - - result = _decrypt_scores_impl(config["token"], fixtures["score_blob_b64"], top_k=total_scores) - data = json.loads(result) - - assert isinstance(data, list) - assert len(data) == total_scores - - # Verify scores match expected (FHE decryption is deterministic with same key) - expected_flat = [] - shard_indices = expected["shard_idx"] - for i, row in enumerate(expected["score"]): - for j, score in enumerate(row): - expected_flat.append((shard_indices[i], j, score)) - expected_flat.sort(key=lambda x: x[2], reverse=True) - - for actual, (exp_shard, exp_row, exp_score) in zip(data, expected_flat): - assert actual["shard_idx"] == exp_shard - assert actual["row_idx"] == exp_row - assert abs(actual["score"] - exp_score) < 1e-6 - - def test_decrypt_scores_top_k(self, fixtures): - """Top-K filtering returns correct count and highest scores.""" - from vault_core import _decrypt_scores_impl - - config = fixtures["config"] - expected = fixtures["expected_scores"] - - # Get all scores to find the true top-3 - all_scores = [] - for i, row in enumerate(expected["score"]): - for j, score in enumerate(row): - all_scores.append(score) - all_scores.sort(reverse=True) - - result = _decrypt_scores_impl(config["token"], fixtures["score_blob_b64"], top_k=3) - data = json.loads(result) - - assert len(data) == 3 - # Returned scores should be the 3 highest - returned_scores = [item["score"] for item in data] - for actual, expected_score in zip(returned_scores, all_scores[:3]): - assert abs(actual - expected_score) < 1e-6 - # Descending order - assert returned_scores == sorted(returned_scores, reverse=True) - - -class TestDecryptMetadata: - - def test_decrypt_metadata_single(self, fixtures): - """Single envelope → _decrypt_metadata_impl → correct plaintext.""" - from vault_core import _decrypt_metadata_impl - - config = fixtures["config"] - envelopes = fixtures["envelopes"] - expected = fixtures["expected_metadata"] - - result = _decrypt_metadata_impl(config["token"], [envelopes[0]]) - data = json.loads(result) - - assert isinstance(data, list) - assert len(data) == 1 - # _decrypt_metadata_impl returns JSON strings or already-decoded objects - actual = json.loads(data[0]) if isinstance(data[0], str) else data[0] - assert actual == expected[0] - - def test_decrypt_metadata_multiple(self, fixtures): - """Multiple envelopes → all decrypted correctly.""" - from vault_core import _decrypt_metadata_impl - - config = fixtures["config"] - envelopes = fixtures["envelopes"] - expected = fixtures["expected_metadata"] - - result = _decrypt_metadata_impl(config["token"], envelopes) - data = json.loads(result) - - assert len(data) == len(expected) - for item, exp in zip(data, expected): - actual = json.loads(item) if isinstance(item, str) else item - assert actual == exp diff --git a/tests/requirements.txt b/tests/requirements.txt deleted file mode 100644 index 5a89044..0000000 --- a/tests/requirements.txt +++ /dev/null @@ -1,10 +0,0 @@ -# Test dependencies for Rune-Vault -pytest>=7.4.0 -pytest-asyncio>=0.21.0 -pytest-cov>=4.1.0 -httpx>=0.24.0 - -# Vault dependencies -numpy>=1.24.0 -pyenvector>=1.2.0 - diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py deleted file mode 100644 index c42a1d7..0000000 --- a/tests/unit/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Unit tests for Rune-Vault components.""" diff --git a/tests/unit/test_admin_server.py b/tests/unit/test_admin_server.py deleted file mode 100644 index 5d6ee6f..0000000 --- a/tests/unit/test_admin_server.py +++ /dev/null @@ -1,209 +0,0 @@ -""" -Unit tests for Admin HTTP server. -""" -import http.client -import json -import os -import sys -import time - -import pytest - -sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../../vault')) - -from token_store import TokenStore, DEFAULT_ROLES -from admin_server import start_admin_server - - -def _request(port, method, path, body=None): - conn = http.client.HTTPConnection("127.0.0.1", port) - headers = {"Content-Type": "application/json"} if body else {} - data = json.dumps(body).encode() if body else None - conn.request(method, path, body=data, headers=headers) - resp = conn.getresponse() - result = json.loads(resp.read().decode()) - conn.close() - return resp.status, result - - -class TestAdminServer: - """Integration tests for the admin HTTP API.""" - - @pytest.fixture(autouse=True) - def setup_server(self): - self.store = TokenStore() - self.store._roles = dict(DEFAULT_ROLES) - # Use port 0 to let OS assign an available ephemeral port - self.server = start_admin_server(self.store, "127.0.0.1", 0) - self.port = self.server.server_address[1] - time.sleep(0.1) # Give server time to start - yield - self.server.shutdown() - - # ── Token endpoints ────────────────────────────────────────────── - - def test_issue_token(self): - status, data = _request(self.port, "POST", "/tokens", { - "user": "alice", "role": "member", "expires_days": 90 - }) - assert status == 201 - assert data["user"] == "alice" - assert data["token"].startswith("evt_") - assert data["role"] == "member" - - def test_list_tokens(self): - _request(self.port, "POST", "/tokens", { - "user": "alice", "role": "member" - }) - status, data = _request(self.port, "GET", "/tokens") - assert status == 200 - assert len(data["tokens"]) == 1 - assert data["tokens"][0]["user"] == "alice" - - def test_revoke_token(self): - _request(self.port, "POST", "/tokens", { - "user": "alice", "role": "member" - }) - status, data = _request(self.port, "DELETE", "/tokens/alice") - assert status == 200 - assert "Revoked" in data["message"] - - # List should be empty - _, data = _request(self.port, "GET", "/tokens") - assert len(data["tokens"]) == 0 - - def test_revoke_nonexistent_token(self): - status, data = _request(self.port, "DELETE", "/tokens/nobody") - assert status == 404 - - def test_issue_duplicate_user(self): - _request(self.port, "POST", "/tokens", { - "user": "alice", "role": "member" - }) - status, data = _request(self.port, "POST", "/tokens", { - "user": "alice", "role": "member" - }) - assert status == 400 - assert "already exists" in data["error"] - - def test_issue_token_invalid_role(self): - status, data = _request(self.port, "POST", "/tokens", { - "user": "alice", "role": "nonexistent" - }) - assert status == 400 - - # ── Rotate endpoints ────────────────────────────────────────────── - - def test_rotate_token(self): - _, issue_data = _request(self.port, "POST", "/tokens", { - "user": "alice", "role": "member" - }) - status, data = _request(self.port, "POST", "/tokens/alice/rotate", {}) - assert status == 200 - assert data["user"] == "alice" - assert data["token"].startswith("evt_") - assert data["token"] != issue_data["token"] - assert data["role"] == "member" - - def test_rotate_nonexistent_user(self): - status, data = _request(self.port, "POST", "/tokens/nobody/rotate", {}) - assert status == 400 - assert "No token found" in data["error"] - - def test_rotate_all(self): - _request(self.port, "POST", "/tokens", {"user": "alice", "role": "member"}) - _request(self.port, "POST", "/tokens", {"user": "bob", "role": "admin"}) - status, data = _request(self.port, "POST", "/tokens/_rotate_all", {}) - assert status == 200 - assert data["rotated"] == 2 - assert len(data["tokens"]) == 2 - - # ── Role endpoints ─────────────────────────────────────────────── - - def test_list_roles(self): - status, data = _request(self.port, "GET", "/roles") - assert status == 200 - names = [r["name"] for r in data["roles"]] - assert "admin" in names - assert "member" in names - - def test_create_role(self): - status, data = _request(self.port, "POST", "/roles", { - "name": "researcher", - "scope": ["get_public_key", "decrypt_scores"], - "top_k": 3, - "rate_limit": "10/60s", - }) - assert status == 201 - assert data["name"] == "researcher" - - def test_update_role(self): - status, data = _request(self.port, "PUT", "/roles/member", { - "top_k": 8, - }) - assert status == 200 - assert data["top_k"] == 8 - - def test_delete_custom_role(self): - _request(self.port, "POST", "/roles", { - "name": "temp", - "scope": ["get_public_key"], - "top_k": 1, - "rate_limit": "5/60s", - }) - status, data = _request(self.port, "DELETE", "/roles/temp") - assert status == 200 - - def test_delete_default_role_rejected(self): - status, data = _request(self.port, "DELETE", "/roles/admin") - assert status == 400 - assert "Cannot delete default" in data["error"] - - def test_unknown_resource(self): - status, _ = _request(self.port, "GET", "/unknown") - assert status == 404 - - # ── Health endpoint ────────────────────────────────────────────── - - def test_health_ok_without_servicer(self): - status, data = _request(self.port, "GET", "/health") - assert status == 200 - assert data["status"] == "ok" - - def test_health_ok_with_serving_servicer(self): - from grpc_health.v1 import health_pb2 - from grpc_health.v1.health import HealthServicer - - servicer = HealthServicer() - servicer.set("", health_pb2.HealthCheckResponse.SERVING) - - store = TokenStore() - store._roles = dict(DEFAULT_ROLES) - server = start_admin_server(store, "127.0.0.1", 0, health_servicer=servicer) - port = server.server_address[1] - time.sleep(0.1) - try: - status, data = _request(port, "GET", "/health") - assert status == 200 - assert data["status"] == "ok" - finally: - server.shutdown() - - def test_health_unhealthy_with_not_serving(self): - from grpc_health.v1 import health_pb2 - from grpc_health.v1.health import HealthServicer - - servicer = HealthServicer() - servicer.set("", health_pb2.HealthCheckResponse.NOT_SERVING) - - store = TokenStore() - store._roles = dict(DEFAULT_ROLES) - server = start_admin_server(store, "127.0.0.1", 0, health_servicer=servicer) - port = server.server_address[1] - time.sleep(0.1) - try: - status, data = _request(port, "GET", "/health") - assert status == 503 - assert data["status"] == "unhealthy" - finally: - server.shutdown() diff --git a/tests/unit/test_audit.py b/tests/unit/test_audit.py deleted file mode 100644 index 8409166..0000000 --- a/tests/unit/test_audit.py +++ /dev/null @@ -1,250 +0,0 @@ -""" -Unit tests for structured audit logging (issue #19). -""" - -import json -import os -import sys -import tempfile -from unittest.mock import MagicMock - -import pytest - -# Add vault to path -sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../vault")) - -from audit import ( - AuditLogger, - _parse_audit_config, - extract_source_ip, -) - - -# --------------------------------------------------------------------------- -# Config parsing -# --------------------------------------------------------------------------- - - -class TestParseAuditConfig: - def test_empty_string(self): - cfg = _parse_audit_config("") - assert cfg["file"] is None - assert cfg["stdout"] is False - - def test_file_default_path(self): - cfg = _parse_audit_config("file") - assert cfg["file"] == "/var/log/rune-vault/audit.log" - assert cfg["stdout"] is False - - def test_file_custom_path(self): - cfg = _parse_audit_config("file:/tmp/my-audit.log") - assert cfg["file"] == "/tmp/my-audit.log" - - def test_stdout(self): - cfg = _parse_audit_config("stdout") - assert cfg["file"] is None - assert cfg["stdout"] is True - - def test_file_plus_stdout(self): - cfg = _parse_audit_config("file+stdout") - assert cfg["file"] == "/var/log/rune-vault/audit.log" - assert cfg["stdout"] is True - - def test_stdout_plus_file(self): - cfg = _parse_audit_config("stdout+file") - assert cfg["file"] == "/var/log/rune-vault/audit.log" - assert cfg["stdout"] is True - - def test_custom_path_plus_stdout(self): - cfg = _parse_audit_config("file:/tmp/my-audit.log+stdout") - assert cfg["file"] == "/tmp/my-audit.log" - assert cfg["stdout"] is True - - def test_custom_path_preserves_case(self): - cfg = _parse_audit_config("file:/var/log/Rune-Vault/Audit.log") - assert cfg["file"] == "/var/log/Rune-Vault/Audit.log" - - def test_file_keyword_case_insensitive(self): - cfg = _parse_audit_config("FILE+STDOUT") - assert cfg["file"] == "/var/log/rune-vault/audit.log" - assert cfg["stdout"] is True - - -# --------------------------------------------------------------------------- -# Source IP extraction -# --------------------------------------------------------------------------- - - -class TestExtractSourceIp: - def _make_context(self, peer_value): - ctx = MagicMock() - ctx.peer.return_value = peer_value - return ctx - - def test_ipv4(self): - assert extract_source_ip(self._make_context("ipv4:10.0.0.1:12345")) == "10.0.0.1" - - def test_ipv4_no_port(self): - # Defensive: if port is missing - assert extract_source_ip(self._make_context("ipv4:10.0.0.1")) == "10.0.0.1" - - def test_ipv6_bracketed(self): - assert extract_source_ip(self._make_context("ipv6:[::1]:12345")) == "[::1]" - - def test_ipv6_no_brackets(self): - result = extract_source_ip(self._make_context("ipv6:::1:12345")) - assert result # should not crash - - def test_none_peer(self): - assert extract_source_ip(self._make_context(None)) == "unknown" - - def test_exception(self): - ctx = MagicMock() - ctx.peer.side_effect = RuntimeError("broken") - assert extract_source_ip(ctx) == "unknown" - - def test_unix_socket(self): - result = extract_source_ip(self._make_context("unix:/var/run/vault.sock")) - assert result == "unix:/var/run/vault.sock" - - -# --------------------------------------------------------------------------- -# AuditLogger -# --------------------------------------------------------------------------- - - -class TestAuditLogger: - def test_disabled_when_no_handlers(self): - logger = AuditLogger({"file": None, "stdout": False}) - assert logger.enabled is False - - def test_file_mode_writes_json(self): - with tempfile.NamedTemporaryFile(mode="r", suffix=".log", delete=False) as f: - path = f.name - try: - logger = AuditLogger({"file": path, "stdout": False}) - assert logger.enabled is True - entry = logger.log( - timestamp="2026-03-30T12:00:00+00:00", - user_id="alice", - method="decrypt_scores", - top_k=10, - result_count=5, - status="success", - source_ip="10.0.0.1", - latency_ms=42.567, - ) - # Force flush - for h in logger._logger.handlers: - h.flush() - with open(path) as fh: - line = fh.readline().strip() - parsed = json.loads(line) - assert parsed["user_id"] == "alice" - assert parsed["method"] == "decrypt_scores" - assert parsed["top_k"] == 10 - assert parsed["result_count"] == 5 - assert parsed["status"] == "success" - assert parsed["source_ip"] == "10.0.0.1" - assert parsed["latency_ms"] == 42.57 - assert parsed["timestamp"] == "2026-03-30T12:00:00+00:00" - assert "error" not in parsed - # Verify return value matches - assert entry["user_id"] == "alice" - finally: - os.unlink(path) - - def test_error_field_included(self): - with tempfile.NamedTemporaryFile(mode="r", suffix=".log", delete=False) as f: - path = f.name - try: - logger = AuditLogger({"file": path, "stdout": False}) - logger.log( - timestamp="2026-03-30T12:00:00+00:00", - user_id="unknown", - method="decrypt_scores", - top_k=None, - result_count=0, - status="error", - source_ip="10.0.0.99", - latency_ms=1.23, - error="Invalid authentication token", - ) - for h in logger._logger.handlers: - h.flush() - with open(path) as fh: - parsed = json.loads(fh.readline().strip()) - assert parsed["status"] == "error" - assert parsed["error"] == "Invalid authentication token" - assert parsed["top_k"] is None - finally: - os.unlink(path) - - def test_stdout_mode(self, capsys): - logger = AuditLogger({"file": None, "stdout": True}) - assert logger.enabled is True - logger.log( - timestamp="2026-03-30T12:00:00+00:00", - user_id="bob", - method="get_public_key", - top_k=None, - result_count=1, - status="success", - source_ip="10.0.0.2", - latency_ms=5.0, - ) - captured = capsys.readouterr() - parsed = json.loads(captured.out.strip()) - assert parsed["user_id"] == "bob" - - def test_empty_error_string_included(self): - with tempfile.NamedTemporaryFile(mode="r", suffix=".log", delete=False) as f: - path = f.name - try: - logger = AuditLogger({"file": path, "stdout": False}) - logger.log( - timestamp="2026-03-30T12:00:00+00:00", - user_id="test", - method="decrypt_scores", - top_k=None, - result_count=0, - status="error", - source_ip="10.0.0.1", - latency_ms=1.0, - error="", - ) - for h in logger._logger.handlers: - h.flush() - with open(path) as fh: - parsed = json.loads(fh.readline().strip()) - assert "error" in parsed - assert parsed["error"] == "" - finally: - os.unlink(path) - - def test_entry_schema_required_fields(self): - with tempfile.NamedTemporaryFile(mode="r", suffix=".log", delete=False) as f: - path = f.name - try: - logger = AuditLogger({"file": path, "stdout": False}) - logger.log( - timestamp="2026-03-30T12:00:00+00:00", - user_id="test", - method="decrypt_metadata", - top_k=None, - result_count=3, - status="success", - source_ip="127.0.0.1", - latency_ms=10.0, - ) - for h in logger._logger.handlers: - h.flush() - with open(path) as fh: - parsed = json.loads(fh.readline().strip()) - required = {"timestamp", "user_id", "method", "top_k", "result_count", - "status", "source_ip", "latency_ms"} - assert required.issubset(parsed.keys()) - finally: - os.unlink(path) - - diff --git a/tests/unit/test_auth.py b/tests/unit/test_auth.py deleted file mode 100644 index b8507bf..0000000 --- a/tests/unit/test_auth.py +++ /dev/null @@ -1,147 +0,0 @@ -""" -Unit tests for authentication and token validation. -Updated for per-user token auth (issue #18). -""" -import pytest -import sys -import os -import time - -# Add vault to path -sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../../vault')) - -from token_store import ( - TokenStore, RateLimiter, Role, - TokenNotFoundError, TokenExpiredError, RateLimitError, ScopeError, -) -from vault_core import validate_token, token_store - -# Demo token used when no config files or env var set -DEMO_TOKEN = "evt_0000000000000000000000000000demo" - - -class TestTokenValidation: - """Test token validation with per-user token store.""" - - def setup_method(self): - """Reset token store to demo mode for each test.""" - token_store._tokens.clear() - token_store._tokens_by_user.clear() - token_store._roles.clear() - token_store._rate_limiters.clear() - token_store.load_defaults_with_demo_token() - - def test_valid_demo_token(self): - """Demo token should return (username, role) tuple.""" - username, role = validate_token(DEMO_TOKEN) - assert username == "demo" - assert role.name == "admin" - - def test_invalid_token_raises_error(self): - """Invalid token should raise TokenNotFoundError.""" - with pytest.raises(TokenNotFoundError): - validate_token("invalid-token-123") - - def test_empty_token_raises_error(self): - """Empty token should raise TokenNotFoundError.""" - with pytest.raises(TokenNotFoundError): - validate_token("") - - def test_token_case_sensitive(self): - """Token validation should be case-sensitive.""" - with pytest.raises(TokenNotFoundError): - validate_token(DEMO_TOKEN.upper()) - - def test_token_no_whitespace_tolerance(self): - """Tokens with whitespace should fail.""" - with pytest.raises(TokenNotFoundError): - validate_token(f" {DEMO_TOKEN} ") - - def test_old_tokens_not_valid(self): - """Old hardcoded tokens should not work.""" - with pytest.raises(TokenNotFoundError): - validate_token("envector-team-alpha") - - def test_validate_returns_tuple(self): - """validate_token should return (username, Role) tuple.""" - result = validate_token(DEMO_TOKEN) - assert isinstance(result, tuple) - assert len(result) == 2 - username, role = result - assert isinstance(username, str) - assert isinstance(role, Role) - - -class TestRateLimiter: - """Test rate limiting functionality.""" - - def test_allows_requests_under_limit(self): - """Requests under limit should be allowed.""" - limiter = RateLimiter(max_requests=5, window_seconds=60) - for _ in range(5): - assert limiter.is_allowed("test-client") is True - - def test_blocks_requests_over_limit(self): - """Requests over limit should be blocked.""" - limiter = RateLimiter(max_requests=3, window_seconds=60) - for _ in range(3): - limiter.is_allowed("test-client") - assert limiter.is_allowed("test-client") is False - - def test_different_clients_have_separate_limits(self): - """Different clients should have independent rate limits.""" - limiter = RateLimiter(max_requests=2, window_seconds=60) - limiter.is_allowed("client-a") - limiter.is_allowed("client-a") - assert limiter.is_allowed("client-a") is False - assert limiter.is_allowed("client-b") is True - - def test_window_expiration(self): - """Old requests should expire after window.""" - limiter = RateLimiter(max_requests=2, window_seconds=1) - limiter.is_allowed("test-client") - limiter.is_allowed("test-client") - assert limiter.is_allowed("test-client") is False - time.sleep(1.1) - assert limiter.is_allowed("test-client") is True - - def test_retry_after_returns_correct_value(self): - """get_retry_after should return seconds until next allowed request.""" - limiter = RateLimiter(max_requests=1, window_seconds=60) - limiter.is_allowed("test-client") - retry_after = limiter.get_retry_after("test-client") - assert 55 <= retry_after <= 60 - - def test_remove_client(self): - """remove() should clear a client's tracking data.""" - limiter = RateLimiter(max_requests=1, window_seconds=60) - limiter.is_allowed("test-client") - assert limiter.is_allowed("test-client") is False - limiter.remove("test-client") - assert limiter.is_allowed("test-client") is True - - -class TestScopeEnforcement: - """Test scope enforcement for roles.""" - - def setup_method(self): - token_store._tokens.clear() - token_store._tokens_by_user.clear() - token_store._roles.clear() - token_store._rate_limiters.clear() - token_store.load_defaults_with_demo_token() - - def test_admin_scope_allows_all_methods(self): - """Admin role should allow all standard methods.""" - _, role = validate_token(DEMO_TOKEN) - # Should not raise - token_store.check_scope(role, "get_public_key") - token_store.check_scope(role, "decrypt_scores") - token_store.check_scope(role, "decrypt_metadata") - token_store.check_scope(role, "manage_tokens") - - def test_scope_rejects_unauthorized_method(self): - """Methods not in scope should raise ScopeError.""" - role = Role("limited", ["get_public_key"], 5, "30/60s") - with pytest.raises(ScopeError, match="decrypt_scores"): - token_store.check_scope(role, "decrypt_scores") diff --git a/tests/unit/test_decrypt_scores.py b/tests/unit/test_decrypt_scores.py deleted file mode 100644 index 4461e21..0000000 --- a/tests/unit/test_decrypt_scores.py +++ /dev/null @@ -1,226 +0,0 @@ -""" -Unit tests for decrypt_scores (including Top-K). - -Uses mock-based approach: since we cannot create real CiphertextScore blobs -without running FHE scoring on an actual index from enVector Cloud, we mock -cipher.decrypt_score() and CiphertextScore.ParseFromString() to test the Top-K -and response format logic. -""" -import pytest -import sys -import os -import json -import base64 -import numpy as np -from unittest.mock import MagicMock, patch - -# Add vault to path -sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../../vault')) - -# Import the implementation function (not the MCP-decorated version) -from vault_core import _decrypt_scores_impl as decrypt_scores -from token_store import token_store - - -def _make_fake_blob() -> str: - """Create a fake base64-encoded blob (content doesn't matter since decrypt_score is mocked).""" - return base64.b64encode(b"fake_ciphertext_score_proto").decode("utf-8") - - -def _mock_decrypt_score_flat(scores): - """Build a mock decrypt_score return value for FLAT index (single shard).""" - return {"score": [scores], "shard_idx": [0]} - - -def _mock_decrypt_score_ivf(score_2d, shard_indices): - """Build a mock decrypt_score return value for IVF_FLAT index (multiple shards).""" - return {"score": score_2d, "shard_idx": shard_indices} - - -class TestDecryptScores: - - @pytest.fixture(autouse=True) - def reset_rate_limiter(self): - """Reset rate limiters before each test.""" - token_store._rate_limiters.clear() - - def _patch_cipher_and_proto(self, monkeypatch, scores_return): - """Helper to mock cipher, CiphertextScore, and CipherBlock.""" - mock_cipher = MagicMock() - mock_cipher.decrypt_score.return_value = scores_return - monkeypatch.setattr('vault_core.cipher', mock_cipher) - monkeypatch.setattr('vault_core.sec_key_path', '/fake/SecKey.json') - monkeypatch.setattr('vault_core.CiphertextScore', MagicMock) - monkeypatch.setattr('vault_core.CipherBlock', MagicMock) - - def test_decrypt_valid_scores_flat(self, monkeypatch): - """Valid encrypted scores (FLAT) should decrypt successfully.""" - scores = np.random.rand(100).tolist() - blob = _make_fake_blob() - - self._patch_cipher_and_proto(monkeypatch, _mock_decrypt_score_flat(scores)) - - result = decrypt_scores("evt_0000000000000000000000000000demo", blob, top_k=5) - data = json.loads(result) - - assert isinstance(data, list) - assert len(data) == 5 - for item in data: - assert "shard_idx" in item - assert "row_idx" in item - assert "score" in item - - def test_decrypt_valid_scores_ivf(self, monkeypatch): - """Valid encrypted scores (IVF_FLAT) should decrypt successfully with shard mapping.""" - shard0 = [0.1, 0.9, 0.3] - shard1 = [0.8, 0.2, 0.7] - blob = _make_fake_blob() - - self._patch_cipher_and_proto(monkeypatch, _mock_decrypt_score_ivf( - [shard0, shard1], [5, 12] - )) - - result = decrypt_scores("evt_0000000000000000000000000000demo", blob, top_k=3) - data = json.loads(result) - - assert isinstance(data, list) - assert len(data) == 3 - # Top 3: shard 5 row 1 (0.9), shard 12 row 0 (0.8), shard 12 row 2 (0.7) - assert data[0] == {"shard_idx": 5, "row_idx": 1, "score": 0.9} - assert data[1] == {"shard_idx": 12, "row_idx": 0, "score": 0.8} - assert data[2] == {"shard_idx": 12, "row_idx": 2, "score": 0.7} - - def test_top_k_returns_correct_count(self, monkeypatch): - """Top-K should return exactly K results.""" - scores = np.random.rand(50).tolist() - blob = _make_fake_blob() - - self._patch_cipher_and_proto(monkeypatch, _mock_decrypt_score_flat(scores)) - - for k in [1, 2, 3, 5]: - result = decrypt_scores("evt_0000000000000000000000000000demo", blob, top_k=k) - data = json.loads(result) - assert isinstance(data, list) - assert len(data) == k, f"Expected {k} results, got {len(data)}" - - def test_top_k_returns_highest_scores(self, monkeypatch): - """Top-K should return the highest scoring items.""" - scores = [0.1, 0.9, 0.3, 0.8, 0.5] - blob = _make_fake_blob() - - self._patch_cipher_and_proto(monkeypatch, _mock_decrypt_score_flat(scores)) - - result = decrypt_scores("evt_0000000000000000000000000000demo", blob, top_k=2) - data = json.loads(result) - - assert isinstance(data, list) - assert len(data) == 2 - returned_scores = [item["score"] for item in data] - assert returned_scores[0] == pytest.approx(0.9) - assert returned_scores[1] == pytest.approx(0.8) - - def test_top_k_limit_enforced(self): - """Top-K exceeding role limit should be rejected.""" - from token_store import TopKExceededError, token_store as ts - - # Issue a member token (top_k=10) to test limit enforcement - tok = ts.add_token("topk-test-user", "member") - blob = _make_fake_blob() - - with pytest.raises(TopKExceededError): - decrypt_scores(tok.token, blob, top_k=15) - - ts.revoke_token("topk-test-user") - - def test_invalid_token_rejected(self): - """Invalid token should raise an authentication error.""" - from token_store import TokenNotFoundError - blob = _make_fake_blob() - - with pytest.raises(TokenNotFoundError): - decrypt_scores("invalid-token", blob, top_k=5) - - def test_malformed_blob_returns_error(self): - """Malformed encrypted blob should return error.""" - - result = decrypt_scores("evt_0000000000000000000000000000demo", "not-valid-base64!!!", top_k=5) - data = json.loads(result) - - assert "error" in data - - def test_empty_blob_returns_empty_or_error(self): - """Empty blob should return error or empty result list.""" - result = decrypt_scores("evt_0000000000000000000000000000demo", "", top_k=5) - data = json.loads(result) - - # Empty base64 decodes to b"", which produces an empty protobuf - # with no scores — either an error dict or an empty list is acceptable - assert isinstance(data, (dict, list)) - - def test_result_format_correct(self, monkeypatch): - """Result should have correct format: [{shard_idx, row_idx, score}, ...].""" - scores = np.random.rand(20).tolist() - blob = _make_fake_blob() - - self._patch_cipher_and_proto(monkeypatch, _mock_decrypt_score_flat(scores)) - - result = decrypt_scores("evt_0000000000000000000000000000demo", blob, top_k=5) - data = json.loads(result) - - assert isinstance(data, list) - for item in data: - assert "shard_idx" in item - assert "row_idx" in item - assert "score" in item - assert isinstance(item["shard_idx"], int) - assert isinstance(item["row_idx"], int) - assert isinstance(item["score"], (int, float)) - - def test_scores_sorted_descending(self, monkeypatch): - """Returned scores should be sorted in descending order.""" - scores = np.random.rand(30).tolist() - blob = _make_fake_blob() - - self._patch_cipher_and_proto(monkeypatch, _mock_decrypt_score_flat(scores)) - - result = decrypt_scores("evt_0000000000000000000000000000demo", blob, top_k=5) - data = json.loads(result) - - assert isinstance(data, list) and len(data) > 1 - returned_scores = [item["score"] for item in data] - for i in range(len(returned_scores) - 1): - assert returned_scores[i] >= returned_scores[i + 1], "Scores not sorted descending" - - def test_default_top_k_is_5(self, monkeypatch): - """Default top_k should be 5.""" - scores = np.random.rand(50).tolist() - blob = _make_fake_blob() - - self._patch_cipher_and_proto(monkeypatch, _mock_decrypt_score_flat(scores)) - - result = decrypt_scores("evt_0000000000000000000000000000demo", blob) - data = json.loads(result) - - assert isinstance(data, list) - assert len(data) == 5, "Default top_k should be 5" - - def test_ivf_topk_cross_shard(self, monkeypatch): - """Top-K across multiple IVF shards should pick globally highest scores.""" - shard0 = [0.1, 0.5, 0.3] - shard1 = [0.9, 0.2, 0.8] - shard2 = [0.4, 0.6, 0.7] - blob = _make_fake_blob() - - self._patch_cipher_and_proto(monkeypatch, _mock_decrypt_score_ivf( - [shard0, shard1, shard2], [10, 20, 30] - )) - - result = decrypt_scores("evt_0000000000000000000000000000demo", blob, top_k=4) - data = json.loads(result) - - assert len(data) == 4 - # Expected top-4: shard20 row0 (0.9), shard20 row2 (0.8), shard30 row2 (0.7), shard30 row1 (0.6) - assert data[0] == {"shard_idx": 20, "row_idx": 0, "score": 0.9} - assert data[1] == {"shard_idx": 20, "row_idx": 2, "score": 0.8} - assert data[2] == {"shard_idx": 30, "row_idx": 2, "score": 0.7} - assert data[3] == {"shard_idx": 30, "row_idx": 1, "score": 0.6} diff --git a/tests/unit/test_metadata_dek.py b/tests/unit/test_metadata_dek.py deleted file mode 100644 index 47039af..0000000 --- a/tests/unit/test_metadata_dek.py +++ /dev/null @@ -1,164 +0,0 @@ -""" -Unit tests for per-agent metadata DEK derivation and decrypt_metadata. - -Uses mock-based approach: aes_decrypt_metadata is mocked to test the -envelope parsing and per-agent HKDF key derivation without requiring -real FHE keys. -""" -import pytest -import sys -import os -import json -from unittest.mock import MagicMock - -# Add vault to path -sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../../vault')) - -from vault_core import ( - derive_agent_key, - _decrypt_metadata_impl as decrypt_metadata, -) -import vault_core -from token_store import token_store - -VALID_TOKEN = "evt_0000000000000000000000000000demo" -FAKE_TEAM_SECRET = "evt_fake-team-secret-for-testing-purposes-only" - - -# ============================================================================= -# derive_agent_key tests -# ============================================================================= -class TestDeriveAgentKey: - - def test_deterministic(self): - """Same inputs must produce the same DEK.""" - dek1 = derive_agent_key("my-team-secret", "agent-abc") - dek2 = derive_agent_key("my-team-secret", "agent-abc") - assert dek1 == dek2 - - def test_different_agent_id_different_dek(self): - """Different agent_id must produce different DEKs.""" - dek_a = derive_agent_key("my-team-secret", "agent-aaa") - dek_b = derive_agent_key("my-team-secret", "agent-bbb") - assert dek_a != dek_b - - def test_output_is_32_bytes(self): - """DEK must be exactly 32 bytes (AES-256).""" - dek = derive_agent_key("some-secret", "any-agent") - assert isinstance(dek, bytes) - assert len(dek) == 32 - - def test_different_team_secret_different_dek(self): - """Different team secrets must produce different DEKs for the same agent.""" - dek1 = derive_agent_key("secret-1", "agent-x") - dek2 = derive_agent_key("secret-2", "agent-x") - assert dek1 != dek2 - - -# ============================================================================= -# _decrypt_metadata_impl tests -# ============================================================================= -class TestDecryptMetadataImpl: - - @pytest.fixture(autouse=True) - def reset_state(self): - """Reset rate limiters before each test.""" - token_store._rate_limiters.clear() - - def _make_envelope(self, agent_id: str, ciphertext_b64: str) -> str: - """Build a JSON envelope string.""" - return json.dumps({"a": agent_id, "c": ciphertext_b64}) - - def test_per_agent_envelope_decryption(self, monkeypatch): - """Per-agent JSON envelope should be parsed and decrypted with derived DEK.""" - monkeypatch.setattr('vault_core.VAULT_TEAM_SECRET', FAKE_TEAM_SECRET) - - expected_dek = derive_agent_key(FAKE_TEAM_SECRET, "agent123") - mock_decrypt = MagicMock(return_value=b'{"text": "hello"}') - monkeypatch.setattr('vault_core.aes_decrypt_metadata', mock_decrypt) - - envelope = self._make_envelope("agent123", "Y2lwaGVydGV4dA==") - result = decrypt_metadata(VALID_TOKEN, [envelope]) - data = json.loads(result) - - assert isinstance(data, list) - assert len(data) == 1 - assert data[0] == '{"text": "hello"}' - - # Verify aes_decrypt_metadata was called with the ciphertext and derived DEK - mock_decrypt.assert_called_once_with("Y2lwaGVydGV4dA==", expected_dek) - - def test_missing_team_secret_returns_error(self, monkeypatch): - """Missing VAULT_TEAM_SECRET should return error.""" - monkeypatch.setattr('vault_core.VAULT_TEAM_SECRET', '') - - result = decrypt_metadata(VALID_TOKEN, ["anything"]) - data = json.loads(result) - - assert "error" in data - assert "VAULT_TEAM_SECRET not configured" in data["error"] - - def test_invalid_token_rejected(self): - """Invalid token should raise an authentication error.""" - from token_store import TokenNotFoundError - with pytest.raises(TokenNotFoundError): - decrypt_metadata("bad-token", ["anything"]) - - def test_invalid_envelope_returns_error(self, monkeypatch): - """Non-JSON envelope should return a decryption error.""" - monkeypatch.setattr('vault_core.VAULT_TEAM_SECRET', FAKE_TEAM_SECRET) - - result = decrypt_metadata(VALID_TOKEN, ["not-valid-json"]) - data = json.loads(result) - - assert "error" in data - - def test_missing_key_in_envelope_returns_error(self, monkeypatch): - """JSON without 'a' or 'c' key should return a decryption error.""" - monkeypatch.setattr('vault_core.VAULT_TEAM_SECRET', FAKE_TEAM_SECRET) - - bad_envelope = json.dumps({"x": "y"}) - result = decrypt_metadata(VALID_TOKEN, [bad_envelope]) - data = json.loads(result) - - assert "error" in data - - def test_decryption_error_returns_error(self, monkeypatch): - """If aes_decrypt_metadata raises, should return error JSON.""" - monkeypatch.setattr('vault_core.VAULT_TEAM_SECRET', FAKE_TEAM_SECRET) - - mock_decrypt = MagicMock(side_effect=Exception("decrypt boom")) - monkeypatch.setattr('vault_core.aes_decrypt_metadata', mock_decrypt) - - envelope = self._make_envelope("agent1", "ct_data") - result = decrypt_metadata(VALID_TOKEN, [envelope]) - data = json.loads(result) - - assert "error" in data - assert "decrypt boom" in data["error"] - - def test_multiple_envelopes(self, monkeypatch): - """Multiple envelopes with different agent_ids should each derive correct DEK.""" - monkeypatch.setattr('vault_core.VAULT_TEAM_SECRET', FAKE_TEAM_SECRET) - - dek_a = derive_agent_key(FAKE_TEAM_SECRET, "agentA") - dek_b = derive_agent_key(FAKE_TEAM_SECRET, "agentB") - - def side_effect(ct, key): - if key == dek_a: - return b'result-a' - if key == dek_b: - return b'result-b' - return b'unknown' - - mock_decrypt = MagicMock(side_effect=side_effect) - monkeypatch.setattr('vault_core.aes_decrypt_metadata', mock_decrypt) - - env_a = self._make_envelope("agentA", "ct_a") - env_b = self._make_envelope("agentB", "ct_b") - result = decrypt_metadata(VALID_TOKEN, [env_a, env_b]) - data = json.loads(result) - - assert len(data) == 2 - assert data[0] == "result-a" - assert data[1] == "result-b" diff --git a/tests/unit/test_protovalidate.py b/tests/unit/test_protovalidate.py deleted file mode 100644 index 48d2e4d..0000000 --- a/tests/unit/test_protovalidate.py +++ /dev/null @@ -1,141 +0,0 @@ -""" -Integration tests for protovalidate with real proto message descriptors. - -Verifies that .proto annotation constraints are correctly enforced -at the schema level via protovalidate. -""" -import pytest -import sys -import os - -sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../../vault')) -sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../../vault/proto')) - -protovalidate = pytest.importorskip("protovalidate") -pb2 = pytest.importorskip("vault_service_pb2") - -# Valid token: evt_ (4) + 32 hex = 36 chars -VALID_TOKEN = "evt_a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4" - - -@pytest.fixture -def validator(): - return protovalidate.Validator() - - -# --------------------------------------------------------------------------- -# GetPublicKey -# --------------------------------------------------------------------------- - -class TestGetPublicKeyProto: - def test_valid(self, validator): - validator.validate(pb2.GetPublicKeyRequest(token=VALID_TOKEN)) - - def test_empty_token_rejected(self, validator): - with pytest.raises(protovalidate.ValidationError): - validator.validate(pb2.GetPublicKeyRequest(token="")) - - def test_short_token_rejected(self, validator): - with pytest.raises(protovalidate.ValidationError): - validator.validate(pb2.GetPublicKeyRequest(token="evt_short")) - - def test_token_exceeds_max_length(self, validator): - with pytest.raises(protovalidate.ValidationError): - validator.validate(pb2.GetPublicKeyRequest(token="a" * 37)) - - -# --------------------------------------------------------------------------- -# DecryptScores -# --------------------------------------------------------------------------- - -class TestDecryptScoresProto: - def test_valid(self, validator): - req = pb2.DecryptScoresRequest( - token=VALID_TOKEN, encrypted_blob_b64="AQID", top_k=5 - ) - validator.validate(req) - - def test_top_k_zero_rejected(self, validator): - """Proto3 int32 default is 0 — must be rejected.""" - req = pb2.DecryptScoresRequest( - token=VALID_TOKEN, encrypted_blob_b64="AQID", top_k=0 - ) - with pytest.raises(protovalidate.ValidationError): - validator.validate(req) - - def test_top_k_negative_rejected(self, validator): - req = pb2.DecryptScoresRequest( - token=VALID_TOKEN, encrypted_blob_b64="AQID", top_k=-1 - ) - with pytest.raises(protovalidate.ValidationError): - validator.validate(req) - - def test_top_k_exceeds_global_max(self, validator): - req = pb2.DecryptScoresRequest( - token=VALID_TOKEN, encrypted_blob_b64="AQID", top_k=301 - ) - with pytest.raises(protovalidate.ValidationError): - validator.validate(req) - - def test_top_k_at_boundary_one(self, validator): - req = pb2.DecryptScoresRequest( - token=VALID_TOKEN, encrypted_blob_b64="a", top_k=1 - ) - validator.validate(req) - - def test_top_k_at_boundary_max(self, validator): - req = pb2.DecryptScoresRequest( - token=VALID_TOKEN, encrypted_blob_b64="a", top_k=300 - ) - validator.validate(req) - - def test_empty_blob_rejected(self, validator): - req = pb2.DecryptScoresRequest( - token=VALID_TOKEN, encrypted_blob_b64="", top_k=5 - ) - with pytest.raises(protovalidate.ValidationError): - validator.validate(req) - - def test_empty_token_rejected(self, validator): - req = pb2.DecryptScoresRequest( - token="", encrypted_blob_b64="a", top_k=5 - ) - with pytest.raises(protovalidate.ValidationError): - validator.validate(req) - - -# --------------------------------------------------------------------------- -# DecryptMetadata -# --------------------------------------------------------------------------- - -class TestDecryptMetadataProto: - def test_valid(self, validator): - req = pb2.DecryptMetadataRequest( - token=VALID_TOKEN, encrypted_metadata_list=["blob1", "blob2"] - ) - validator.validate(req) - - def test_empty_list_rejected(self, validator): - req = pb2.DecryptMetadataRequest(token=VALID_TOKEN) - with pytest.raises(protovalidate.ValidationError): - validator.validate(req) - - def test_empty_item_rejected(self, validator): - req = pb2.DecryptMetadataRequest( - token=VALID_TOKEN, encrypted_metadata_list=["valid", ""] - ) - with pytest.raises(protovalidate.ValidationError): - validator.validate(req) - - def test_too_many_items_rejected(self, validator): - req = pb2.DecryptMetadataRequest( - token=VALID_TOKEN, encrypted_metadata_list=["x"] * 1001 - ) - with pytest.raises(protovalidate.ValidationError): - validator.validate(req) - - def test_max_items_passes(self, validator): - req = pb2.DecryptMetadataRequest( - token=VALID_TOKEN, encrypted_metadata_list=["x"] * 1000 - ) - validator.validate(req) diff --git a/tests/unit/test_public_key.py b/tests/unit/test_public_key.py deleted file mode 100644 index 2b309b8..0000000 --- a/tests/unit/test_public_key.py +++ /dev/null @@ -1,96 +0,0 @@ -""" -Unit tests for get_public_key. -""" -import pytest -import sys -import os -import json -import tempfile -import shutil - -# Add vault to path -sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../../vault')) - -# Import the implementation function -from vault_core import _get_public_key_impl as get_public_key -import vault_core -from token_store import token_store -from pyenvector.crypto import KeyGenerator - -FAKE_TEAM_SECRET = "evt_fake-team-secret-for-testing-purposes-only" - - -class TestGetPublicKey: - - @pytest.fixture(autouse=True) - def reset_rate_limiter(self): - """Reset rate limiters before each test.""" - token_store._rate_limiters.clear() - - @pytest.fixture(scope="class") - def test_keys(self): - """Generate test keys.""" - temp_dir = tempfile.mkdtemp(prefix="test_pubkey_") - keygen = KeyGenerator(key_path=temp_dir, key_id="test-pubkey", dim_list=[1024], metadata_encryption=False) - keygen.generate_keys() - - yield temp_dir - shutil.rmtree(temp_dir, ignore_errors=True) - - @pytest.fixture(autouse=True) - def patch_vault_paths(self, test_keys, monkeypatch): - """Patch vault paths to point to test-generated keys.""" - monkeypatch.setattr('vault_core.KEY_SUBDIR', test_keys) - monkeypatch.setattr('vault_core.VAULT_TEAM_SECRET', FAKE_TEAM_SECRET) - - def test_valid_token_returns_bundle(self, test_keys): - """Valid token should return public key bundle.""" - result = get_public_key("evt_0000000000000000000000000000demo") - - # Should be valid JSON - bundle = json.loads(result) - - # Should contain public keys - assert "EncKey.json" in bundle - assert "EvalKey.json" in bundle - - # Should NOT contain secret keys - assert "SecKey.json" not in bundle - assert "MetadataKey.json" not in bundle - - def test_invalid_token_raises_error(self, test_keys): - """Invalid token should raise an authentication error.""" - from token_store import TokenNotFoundError - with pytest.raises(TokenNotFoundError): - get_public_key("invalid-token") - - def test_bundle_contains_agent_id_and_dek(self, test_keys): - """Bundle should contain per-user agent_id and agent_dek.""" - result = get_public_key("evt_0000000000000000000000000000demo") - bundle = json.loads(result) - - assert "agent_id" in bundle - assert "agent_dek" in bundle - assert len(bundle["agent_id"]) == 32 # SHA256 hex[:32] - - def test_bundle_contains_envector_credentials(self, test_keys, monkeypatch): - """Bundle should contain enVector endpoint and API key when configured.""" - monkeypatch.setattr('vault_core.ENVECTOR_ENDPOINT', 'cluster-test.envector.io') - monkeypatch.setattr('vault_core.ENVECTOR_API_KEY', 'test-api-key-abc123') - - result = get_public_key("evt_0000000000000000000000000000demo") - bundle = json.loads(result) - - assert bundle["envector_endpoint"] == "cluster-test.envector.io" - assert bundle["envector_api_key"] == "test-api-key-abc123" - - def test_bundle_envector_empty_when_not_configured(self, test_keys, monkeypatch): - """Bundle should have null enVector fields when not configured on Vault.""" - monkeypatch.setattr('vault_core.ENVECTOR_ENDPOINT', None) - monkeypatch.setattr('vault_core.ENVECTOR_API_KEY', None) - - result = get_public_key("evt_0000000000000000000000000000demo") - bundle = json.loads(result) - - assert bundle.get("envector_endpoint") is None - assert bundle.get("envector_api_key") is None diff --git a/tests/unit/test_request_validator.py b/tests/unit/test_request_validator.py deleted file mode 100644 index eba1140..0000000 --- a/tests/unit/test_request_validator.py +++ /dev/null @@ -1,101 +0,0 @@ -""" -Unit tests for gRPC request input validation rules. - -Tests both protovalidate (proto-level) and runtime checks. -Proto-level tests use fake request objects to exercise the same -validation functions without requiring the real pb2 module. -""" -import pytest -import sys -import os -from types import ModuleType - -# Mock protovalidate before importing request_validator -_pv = ModuleType("protovalidate") -_pv.Validator = type("Validator", (), {"validate": lambda self, req: None}) -class _ValidationError(Exception): - def __init__(self, violations=None): - self.violations = violations or [] - super().__init__("validation error") -_pv.ValidationError = _ValidationError -sys.modules.setdefault("protovalidate", _pv) - -sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../../vault')) - -from request_validator import ( - RuntimeValidationError, - check_token_safety, - validate_index_name, - MAX_INDEX_NAME_LENGTH, -) - - -# --------------------------------------------------------------------------- -# Token safety (runtime layer — control chars & whitespace) -# --------------------------------------------------------------------------- - -class TestTokenSafety: - def test_valid_token(self): - check_token_safety("abc123-valid") - - def test_null_byte_rejected(self): - with pytest.raises(RuntimeValidationError, match="control characters"): - check_token_safety("token\x00evil") - - def test_control_char_rejected(self): - with pytest.raises(RuntimeValidationError, match="control characters"): - check_token_safety("token\x01") - - def test_tab_rejected(self): - with pytest.raises(RuntimeValidationError, match="control characters"): - check_token_safety("token\t") - - def test_newline_rejected(self): - with pytest.raises(RuntimeValidationError, match="control characters"): - check_token_safety("token\n") - - def test_del_char_rejected(self): - with pytest.raises(RuntimeValidationError, match="control characters"): - check_token_safety("token\x7f") - - def test_leading_whitespace_rejected(self): - with pytest.raises(RuntimeValidationError, match="whitespace"): - check_token_safety(" token") - - def test_trailing_whitespace_rejected(self): - with pytest.raises(RuntimeValidationError, match="whitespace"): - check_token_safety("token ") - - -# --------------------------------------------------------------------------- -# Index name validation (runtime layer — path traversal prevention) -# --------------------------------------------------------------------------- - -class TestIndexName: - def test_valid_names(self): - for name in ["my_index", "index-1", "ABC123", "a"]: - validate_index_name(name) - - def test_empty_rejected(self): - with pytest.raises(RuntimeValidationError, match="empty"): - validate_index_name("") - - def test_too_long_rejected(self): - with pytest.raises(RuntimeValidationError, match="exceeds"): - validate_index_name("a" * (MAX_INDEX_NAME_LENGTH + 1)) - - def test_path_traversal_rejected(self): - with pytest.raises(RuntimeValidationError, match="alphanumeric"): - validate_index_name("../../etc/passwd") - - def test_slash_rejected(self): - with pytest.raises(RuntimeValidationError, match="alphanumeric"): - validate_index_name("foo/bar") - - def test_space_rejected(self): - with pytest.raises(RuntimeValidationError, match="alphanumeric"): - validate_index_name("foo bar") - - def test_special_chars_rejected(self): - with pytest.raises(RuntimeValidationError, match="alphanumeric"): - validate_index_name("index;DROP TABLE") diff --git a/tests/unit/test_token_store.py b/tests/unit/test_token_store.py deleted file mode 100644 index 89c698c..0000000 --- a/tests/unit/test_token_store.py +++ /dev/null @@ -1,316 +0,0 @@ -""" -Unit tests for TokenStore: per-user token management, role CRUD, persistence. -""" -import copy -import datetime -import os -import sys -import tempfile - -import pytest - -sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../../vault')) - -from token_store import ( - TokenStore, Role, Token, RateLimiter, - TokenNotFoundError, TokenExpiredError, RateLimitError, - TopKExceededError, ScopeError, -) - - -class TestTokenStore: - """Test token lifecycle: add, validate, revoke, expiry, rate limit.""" - - def setup_method(self): - self.store = TokenStore() - self.store._roles = { - "admin": Role("admin", ["get_public_key", "decrypt_scores", "decrypt_metadata", "manage_tokens"], 50, "150/60s"), - "member": Role("member", ["get_public_key", "decrypt_scores", "decrypt_metadata"], 10, "30/60s"), - } - - def test_add_and_validate_token(self): - tok = self.store.add_token("alice", "member", expires_days=90) - assert tok.user == "alice" - assert tok.token.startswith("evt_") - assert tok.role == "member" - - username, role = self.store.validate(tok.token) - assert username == "alice" - assert role.name == "member" - - def test_invalid_token_raises(self): - with pytest.raises(TokenNotFoundError): - self.store.validate("nonexistent_token") - - def test_expired_token_raises(self): - tok = self.store.add_token("bob", "member", expires_days=1) - # Manually expire the token - tok.expires = (datetime.date.today() - datetime.timedelta(days=1)).isoformat() - with pytest.raises(TokenExpiredError, match="bob"): - self.store.validate(tok.token) - - def test_revoke_token(self): - tok = self.store.add_token("charlie", "member") - assert self.store.revoke_token("charlie") is True - with pytest.raises(TokenNotFoundError): - self.store.validate(tok.token) - - def test_revoke_nonexistent_returns_false(self): - assert self.store.revoke_token("nobody") is False - - def test_duplicate_user_rejected(self): - self.store.add_token("alice", "member") - with pytest.raises(ValueError, match="already exists"): - self.store.add_token("alice", "member") - - def test_invalid_role_rejected(self): - with pytest.raises(ValueError, match="does not exist"): - self.store.add_token("alice", "nonexistent_role") - - def test_list_tokens_hides_values(self): - self.store.add_token("alice", "member", expires_days=30) - result = self.store.list_tokens() - assert len(result) == 1 - assert result[0]["user"] == "alice" - # Token value should not be in list output - assert "token" not in result[0] - - def test_rate_limiting_per_user(self): - """Rate limiting should use per-role limits keyed by username.""" - # Member role: 30/60s — use a custom role with low limit for test - self.store.add_role("limited", ["get_public_key"], 5, "2/60s") - tok = self.store.add_token("ratelimited_user", "limited") - - self.store.validate(tok.token) - self.store.validate(tok.token) - with pytest.raises(RateLimitError): - self.store.validate(tok.token) - - def test_top_k_from_role(self): - tok = self.store.add_token("alice", "member") - _, role = self.store.validate(tok.token) - assert role.top_k == 10 # member default - - def test_never_expires_token(self): - tok = self.store.add_token("permanent_user", "admin") - assert tok.expires is None - assert tok.is_expired is False - # Should validate fine - username, _ = self.store.validate(tok.token) - assert username == "permanent_user" - - def test_legacy_env_loading(self): - store = TokenStore() - store.load_legacy_env("token_a,token_b") - # Should have 2 tokens with admin role - u1, r1 = store.validate("token_a") - assert u1 == "legacy_0" - assert r1.name == "admin" - u2, _ = store.validate("token_b") - assert u2 == "legacy_1" - - def test_persist_and_reload(self): - """Tokens and roles should survive persist → reload cycle.""" - with tempfile.TemporaryDirectory() as tmpdir: - roles_path = os.path.join(tmpdir, "roles.yml") - tokens_path = os.path.join(tmpdir, "tokens.yml") - - # Store 1: add data and persist - store1 = TokenStore() - store1.load_from_files(roles_path, tokens_path) - store1.add_role("researcher", ["get_public_key", "decrypt_scores"], 3, "10/60s") - tok = store1.add_token("alice", "member", expires_days=90) - - # Wait for async persist - store1._persist_executor.shutdown(wait=True) - - # Store 2: reload from files - store2 = TokenStore() - store2.load_from_files(roles_path, tokens_path) - - # Validate alice's token works - username, role = store2.validate(tok.token) - assert username == "alice" - assert role.name == "member" - - # Validate custom role exists - roles = store2.list_roles() - role_names = [r["name"] for r in roles] - assert "researcher" in role_names - - -class TestTokenRotation: - """Test token rotation: single user and batch.""" - - def setup_method(self): - self.store = TokenStore() - self.store._roles = { - "admin": Role("admin", ["get_public_key", "decrypt_scores", "decrypt_metadata", "manage_tokens"], 50, "150/60s"), - "member": Role("member", ["get_public_key", "decrypt_scores", "decrypt_metadata"], 10, "30/60s"), - } - - def test_rotate_token(self): - old_tok = self.store.add_token("alice", "member") - new_tok = self.store.rotate_token("alice") - assert new_tok.user == "alice" - assert new_tok.role == "member" - assert new_tok.token.startswith("evt_") - assert new_tok.token != old_tok.token - - def test_rotate_preserves_expiry(self): - old_tok = self.store.add_token("alice", "member", expires_days=90) - new_tok = self.store.rotate_token("alice") - assert new_tok.expires is not None - # New expiry should be ~90 days from today - new_expires = datetime.date.fromisoformat(new_tok.expires) - expected = datetime.date.today() + datetime.timedelta(days=90) - assert new_expires == expected - - def test_rotate_invalidates_old_token(self): - old_tok = self.store.add_token("alice", "member") - self.store.rotate_token("alice") - with pytest.raises(TokenNotFoundError): - self.store.validate(old_tok.token) - - def test_rotate_new_token_validates(self): - self.store.add_token("alice", "member") - new_tok = self.store.rotate_token("alice") - username, role = self.store.validate(new_tok.token) - assert username == "alice" - assert role.name == "member" - - def test_rotate_nonexistent_user_raises(self): - with pytest.raises(ValueError, match="No token found"): - self.store.rotate_token("nobody") - - def test_rotate_all(self): - tok_a = self.store.add_token("alice", "member") - tok_b = self.store.add_token("bob", "admin") - results = self.store.rotate_all_tokens() - assert len(results) == 2 - users = {t.user for t in results} - assert users == {"alice", "bob"} - # Old tokens should be invalid - with pytest.raises(TokenNotFoundError): - self.store.validate(tok_a.token) - with pytest.raises(TokenNotFoundError): - self.store.validate(tok_b.token) - - def test_rotate_persists(self): - """Rotated token should survive persist → reload cycle.""" - with tempfile.TemporaryDirectory() as tmpdir: - roles_path = os.path.join(tmpdir, "roles.yml") - tokens_path = os.path.join(tmpdir, "tokens.yml") - - store1 = TokenStore() - store1.load_from_files(roles_path, tokens_path) - store1.add_token("alice", "member", expires_days=30) - new_tok = store1.rotate_token("alice") - store1._persist_executor.shutdown(wait=True) - - store2 = TokenStore() - store2.load_from_files(roles_path, tokens_path) - username, role = store2.validate(new_tok.token) - assert username == "alice" - assert role.name == "member" - - -class TestRoleCRUD: - """Test role create, update, delete, list operations.""" - - def setup_method(self): - self.store = TokenStore() - self.store._roles = { - "admin": Role("admin", ["get_public_key", "decrypt_scores", "decrypt_metadata", "manage_tokens"], 50, "150/60s"), - "member": Role("member", ["get_public_key", "decrypt_scores", "decrypt_metadata"], 10, "30/60s"), - } - - def test_create_role(self): - role = self.store.add_role( - "researcher", ["get_public_key", "decrypt_scores"], 3, "10/60s" - ) - assert role.name == "researcher" - assert role.top_k == 3 - assert "get_public_key" in role.scope - - def test_create_duplicate_role_rejected(self): - with pytest.raises(ValueError, match="already exists"): - self.store.add_role("admin", ["get_public_key"], 5, "30/60s") - - def test_update_role(self): - role = self.store.update_role("member", top_k=8) - assert role.top_k == 8 - assert role.name == "member" - - def test_update_nonexistent_role_rejected(self): - with pytest.raises(ValueError, match="does not exist"): - self.store.update_role("nonexistent", top_k=5) - - def test_delete_custom_role(self): - self.store.add_role("temp", ["get_public_key"], 1, "5/60s") - self.store.delete_role("temp") - roles = self.store.list_roles() - assert "temp" not in [r["name"] for r in roles] - - def test_delete_default_role_rejected(self): - with pytest.raises(ValueError, match="Cannot delete default"): - self.store.delete_role("admin") - with pytest.raises(ValueError, match="Cannot delete default"): - self.store.delete_role("member") - - def test_delete_role_with_active_tokens_rejected(self): - self.store.add_role("temp", ["get_public_key"], 1, "5/60s") - self.store.add_token("user1", "temp") - with pytest.raises(ValueError, match="token for user"): - self.store.delete_role("temp") - - def test_list_roles(self): - roles = self.store.list_roles() - assert len(roles) >= 2 - names = [r["name"] for r in roles] - assert "admin" in names - assert "member" in names - - def test_update_role_clears_rate_limiters(self): - """Changing a role's rate_limit should reset affected rate limiters.""" - tok = self.store.add_token("alice", "member") - # Validate to create rate limiter - self.store.validate(tok.token) - assert "alice" in self.store._rate_limiters - - # Update role - self.store.update_role("member", rate_limit="100/60s") - assert "alice" not in self.store._rate_limiters - - def test_role_rate_limit_parsed(self): - role = Role("test", [], 5, "30/60s") - max_req, window = role.rate_limit_parsed - assert max_req == 30 - assert window == 60 - - -class TestScopeCheck: - """Test scope enforcement.""" - - def test_scope_allows_valid_method(self): - store = TokenStore() - role = Role("member", ["get_public_key", "decrypt_scores"], 5, "30/60s") - store.check_scope(role, "get_public_key") # Should not raise - - def test_scope_rejects_invalid_method(self): - store = TokenStore() - role = Role("limited", ["get_public_key"], 5, "30/60s") - with pytest.raises(ScopeError, match="decrypt_scores"): - store.check_scope(role, "decrypt_scores") - - -class TestTopKExceeded: - """Test TopKExceededError.""" - - def test_top_k_exceeded_message(self): - err = TopKExceededError(15, 10, "admin") - assert "15" in str(err) - assert "10" in str(err) - assert "admin" in str(err) - assert err.requested == 15 - assert err.max_top_k == 10 diff --git a/tests/unit/test_validation_interceptor.py b/tests/unit/test_validation_interceptor.py deleted file mode 100644 index ce33a9f..0000000 --- a/tests/unit/test_validation_interceptor.py +++ /dev/null @@ -1,196 +0,0 @@ -""" -Unit tests for the gRPC ValidationInterceptor. - -Tests the interceptor wiring using mock objects — no real gRPC server needed. -grpc and protovalidate are mocked to avoid heavy runtime dependencies. -""" -import pytest -import sys -import os -from unittest.mock import MagicMock, patch, PropertyMock -from types import ModuleType - -# --------------------------------------------------------------------------- -# Mock heavy dependencies before importing vault modules -# --------------------------------------------------------------------------- - -_grpc_mock = ModuleType("grpc") -_grpc_mock.ServerInterceptor = type("ServerInterceptor", (), {}) -_grpc_mock.StatusCode = type("StatusCode", (), { - "INVALID_ARGUMENT": "INVALID_ARGUMENT", -})() -_grpc_mock.unary_unary_rpc_method_handler = lambda handler, **kw: MagicMock( - unary_unary=handler -) -sys.modules.setdefault("grpc", _grpc_mock) - -# Force-mock protovalidate regardless of prior imports — prevents test -# isolation failures when the full suite loads the real module first. -_protovalidate_mock = ModuleType("protovalidate") - - -class _ValidationError(Exception): - def __init__(self, msg="validation error", violations=None): - self.violations = violations or [] - super().__init__(msg) - - -_protovalidate_mock.ValidationError = _ValidationError -_protovalidate_mock.Validator = MagicMock -sys.modules["protovalidate"] = _protovalidate_mock - -sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../../vault')) - -from request_validator import RuntimeValidationError -from validation_interceptor import ValidationInterceptor - -# Get the actual ValidationError the interceptor will catch -_ProtoValidationError = sys.modules["protovalidate"].ValidationError - - -# --------------------------------------------------------------------------- -# Helpers -# --------------------------------------------------------------------------- - -def _make_handler_call_details(method: str): - details = MagicMock() - details.method = method - return details - - -def _make_next_handler(return_value="ok"): - handler = MagicMock() - handler.unary_unary = MagicMock(return_value=return_value) - handler.request_deserializer = None - handler.response_serializer = None - return handler - - -def _make_context(): - ctx = MagicMock() - ctx.abort = MagicMock(side_effect=Exception("aborted")) - return ctx - - -# --------------------------------------------------------------------------- -# Tests -# --------------------------------------------------------------------------- - -class TestValidationInterceptor: - def setup_method(self): - self.interceptor = ValidationInterceptor() - - def test_non_vault_method_passes_through(self): - """Health check and other non-vault methods bypass validation.""" - details = _make_handler_call_details("/grpc.health.v1.Health/Check") - next_handler = _make_next_handler() - continuation = MagicMock(return_value=next_handler) - - result = self.interceptor.intercept_service(continuation, details) - assert result is next_handler - - def test_none_handler_returns_none(self): - details = _make_handler_call_details("/rune.vault.v1.VaultService/GetPublicKey") - continuation = MagicMock(return_value=None) - - result = self.interceptor.intercept_service(continuation, details) - assert result is None - - def test_valid_request_reaches_handler(self): - """A valid request passes both validation layers.""" - details = _make_handler_call_details("/rune.vault.v1.VaultService/GetPublicKey") - next_handler = _make_next_handler(return_value="response") - continuation = MagicMock(return_value=next_handler) - - wrapped = self.interceptor.intercept_service(continuation, details) - - request = MagicMock() - request.token = "valid-token-123" - context = _make_context() - - with patch("validation_interceptor.validate_proto"): - result = wrapped.unary_unary(request, context) - - assert result == "response" - context.abort.assert_not_called() - - def test_proto_validation_error_aborts(self): - """protovalidate.ValidationError triggers INVALID_ARGUMENT abort.""" - details = _make_handler_call_details("/rune.vault.v1.VaultService/DecryptScores") - next_handler = _make_next_handler() - continuation = MagicMock(return_value=next_handler) - - wrapped = self.interceptor.intercept_service(continuation, details) - - request = MagicMock() - request.token = "valid-token" - context = _make_context() - - violation = MagicMock() - violation.proto.field = "top_k" - violation.proto.message = "value must be >= 1" - exc = _ProtoValidationError(violations=[violation]) - - with patch("validation_interceptor.validate_proto", side_effect=exc): - with pytest.raises(Exception, match="aborted"): - wrapped.unary_unary(request, context) - - context.abort.assert_called_once() - status_code = context.abort.call_args[0][0] - assert "INVALID_ARGUMENT" in str(status_code) - assert "top_k" in context.abort.call_args[0][1] - - def test_runtime_validation_error_aborts(self): - """RuntimeValidationError (control chars) triggers INVALID_ARGUMENT abort.""" - details = _make_handler_call_details("/rune.vault.v1.VaultService/GetPublicKey") - next_handler = _make_next_handler() - continuation = MagicMock(return_value=next_handler) - - wrapped = self.interceptor.intercept_service(continuation, details) - - request = MagicMock() - request.token = "tok\x00en" - context = _make_context() - - with patch("validation_interceptor.validate_proto"): - with pytest.raises(Exception, match="aborted"): - wrapped.unary_unary(request, context) - - context.abort.assert_called_once() - status_code = context.abort.call_args[0][0] - assert "INVALID_ARGUMENT" in str(status_code) - assert "control characters" in context.abort.call_args[0][1] - - def test_handler_without_unary_unary_passes_through(self): - details = _make_handler_call_details("/rune.vault.v1.VaultService/GetPublicKey") - next_handler = MagicMock() - next_handler.unary_unary = None - continuation = MagicMock(return_value=next_handler) - - result = self.interceptor.intercept_service(continuation, details) - assert result is next_handler - - def test_error_detail_is_human_readable(self): - """Validation errors include field path and message.""" - details = _make_handler_call_details("/rune.vault.v1.VaultService/DecryptScores") - next_handler = _make_next_handler() - continuation = MagicMock(return_value=next_handler) - - wrapped = self.interceptor.intercept_service(continuation, details) - - request = MagicMock() - request.token = "valid-token" - context = _make_context() - - violation = MagicMock() - violation.proto.field = "encrypted_blob_b64" - violation.proto.message = "value length must be at least 1" - exc = _ProtoValidationError(violations=[violation]) - - with patch("validation_interceptor.validate_proto", side_effect=exc): - with pytest.raises(Exception, match="aborted"): - wrapped.unary_unary(request, context) - - detail_msg = context.abort.call_args[0][1] - assert "encrypted_blob_b64" in detail_msg - assert "at least 1" in detail_msg diff --git a/vault/.env.example b/vault/.env.example deleted file mode 100644 index 42ba518..0000000 --- a/vault/.env.example +++ /dev/null @@ -1,62 +0,0 @@ -# Rune-Vault Configuration -# Copy this file to .env and fill in the values: -# cp .env.example .env -# -# .env is excluded from git — safe to put real credentials there. - -# ── Authentication ────────────────────────────────────────────── -# Team secret for metadata DEK derivation (shared across all team members). -# New installs: auto-generated by install.sh. -# Migrating from VAULT_TOKENS: copy your existing shared token value here. -# e.g. VAULT_TEAM_SECRET=evt_abc123... (the same value you had in VAULT_TOKENS) -# Per-user tokens are managed separately via `runevault token issue/revoke/list`. -VAULT_TEAM_SECRET= - -# ── TLS ───────────────────────────────────────────────────────── -# TLS is required by default. The Docker entrypoint auto-generates -# self-signed certificates if these are not set. -# -# For Let's Encrypt or domain certs, set the paths explicitly: -# VAULT_TLS_CERT=/path/to/fullchain.pem -# VAULT_TLS_KEY=/path/to/privkey.pem -# -# To disable TLS (NOT recommended for production): -# VAULT_TLS_DISABLE=true -VAULT_TLS_CERT= -VAULT_TLS_KEY= -VAULT_TLS_DISABLE= - -# ── Audit Logging ────────────────────────────────────────────── -# Structured audit log for all Vault gRPC operations. -# One JSON line per request with: timestamp, user_id, method, top_k, -# result_count, status, source_ip, latency_ms. -# -# Options: -# (empty) Disabled -# file /var/log/rune-vault/audit.log (daily rotation, 30-day retention) -# file:/path Custom file path -# stdout JSON lines to stdout (for CloudWatch, Stackdriver, etc.) -# file+stdout Both file and stdout -VAULT_AUDIT_LOG=file - -# ── ngrok Tunneling (optional) ────────────────────────────────── -# Required only if you need to expose the gRPC endpoint over the internet. -# Get your authtoken at: https://dashboard.ngrok.com/get-started/your-authtoken -NGROK_AUTHTOKEN= - -# ── enVector Cloud ────────────────────────────────────────────── -# Required for auto-creating the team search index (ensure_index). -# Without these, FHE key generation (ensure_keys) still works locally, -# but the team index will NOT be created on enVector Cloud. -# -# enVector cluster endpoint (e.g. runestone-XXXX.clusters.envector.io) -ENVECTOR_ENDPOINT= -# enVector API key (issued from envector.io dashboard) -ENVECTOR_API_KEY= - -# ── Index Settings ────────────────────────────────────────────── -# Name of the team index on enVector Cloud. -# Must be alphanumeric, lowercase, no spaces, and less than 20 characters. -VAULT_INDEX_NAME=runecontext -# Embedding dimension (must match your embedding model) -EMBEDDING_DIM=1024 diff --git a/vault/Dockerfile b/vault/Dockerfile deleted file mode 100644 index f902b7a..0000000 --- a/vault/Dockerfile +++ /dev/null @@ -1,55 +0,0 @@ -# ── Stage 1: Generate proto stubs ──────────────────────────────────── -FROM python:3.12-slim AS proto-builder - -RUN apt-get update && apt-get install -y --no-install-recommends curl \ - && rm -rf /var/lib/apt/lists/* - -# Install buf CLI -RUN curl -sSL "https://github.com/bufbuild/buf/releases/latest/download/buf-Linux-$(uname -m)" \ - -o /usr/local/bin/buf \ - && chmod +x /usr/local/bin/buf - -RUN pip install --no-cache-dir "grpcio-tools>=1.60.2,<=1.71.2" "protobuf>=5.29.0,<6" - -WORKDIR /build -COPY buf.yaml buf.lock ./ -COPY proto/vault_service.proto proto/__init__.py proto/ -COPY scripts/proto-gen.sh scripts/ -RUN bash scripts/proto-gen.sh - -# ── Stage 2: Runtime ───────────────────────────────────────────────── -FROM python:3.12-slim - -RUN apt-get update && apt-get install -y --no-install-recommends \ - gcc \ - python3-dev \ - openssl \ - curl \ - gosu \ - && rm -rf /var/lib/apt/lists/* - -RUN useradd -m -u 1000 vault - -WORKDIR /app - -COPY requirements.txt . -RUN pip install --no-cache-dir -r requirements.txt - -COPY vault_core.py vault_grpc_server.py token_store.py admin_server.py vault_admin_cli.py request_validator.py validation_interceptor.py audit.py ./ -COPY --from=proto-builder /build/proto/ proto/ -COPY --from=proto-builder /build/buf/ buf/ -COPY docker-entrypoint.sh . -RUN chmod +x docker-entrypoint.sh vault_admin_cli.py - -RUN mkdir -p /app/vault_keys /app/certs /app/config /secure/backups /var/log/rune-vault \ - && chown -R vault:vault /app /secure /var/log/rune-vault - -ENV PYTHONPATH=/app/proto:/app - -EXPOSE 50051 - -HEALTHCHECK --interval=30s --timeout=10s --retries=3 \ - CMD curl -sf http://localhost:8081/health || exit 1 - -ENTRYPOINT ["./docker-entrypoint.sh"] -CMD ["--host", "0.0.0.0", "--grpc-port", "50051"] diff --git a/vault/admin_server.py b/vault/admin_server.py deleted file mode 100644 index 9b08bde..0000000 --- a/vault/admin_server.py +++ /dev/null @@ -1,232 +0,0 @@ -""" -Admin HTTP server for token and role management. - -Listens on 127.0.0.1:8081 (container-internal only, not exposed via Docker). -No authentication required — access is protected by: - SSH → docker group → docker exec → container isolation. -""" - -import json -import logging -import re -import threading -from http.server import BaseHTTPRequestHandler, HTTPServer - -logger = logging.getLogger("vault.admin") - -DEFAULT_ADMIN_HOST = "127.0.0.1" -DEFAULT_ADMIN_PORT = 8081 - - -# ============================================================================= -# Route table: (method, pattern) → handler name -# Patterns use {name} for path parameters, compiled to regex at import time. -# ============================================================================= - -_ROUTE_DEFS = [ - ("GET", "/health", "_handle_health"), - ("GET", "/tokens", "_handle_list_tokens"), - ("GET", "/roles", "_handle_list_roles"), - ("POST", "/tokens", "_handle_issue_token"), - ("POST", "/tokens/{user}/rotate", "_handle_rotate_token"), - ("POST", "/tokens/_rotate_all", "_handle_rotate_all"), - ("POST", "/roles", "_handle_create_role"), - ("PUT", "/roles/{name}", "_handle_update_role"), - ("DELETE", "/tokens/{user}", "_handle_revoke_token"), - ("DELETE", "/roles/{name}", "_handle_delete_role"), -] - -_ROUTES: list[tuple[str, re.Pattern, list[str], str]] = [] -for _method, _pattern, _handler in _ROUTE_DEFS: - _param_names = re.findall(r"\{(\w+)\}", _pattern) - _regex = re.compile("^" + re.sub(r"\{(\w+)\}", r"(?P<\1>[^/]+)", _pattern) + "$") - _ROUTES.append((_method, _regex, _param_names, _handler)) - - -class AdminHandler(BaseHTTPRequestHandler): - """Request handler for token and role admin API.""" - - # Set by start_admin_server() before requests are handled - token_store = None - health_servicer = None - - def log_message(self, format, *args): - logger.info(format, *args) - - def _read_json(self) -> dict: - length = int(self.headers.get("Content-Length", 0)) - if length == 0: - return {} - body = self.rfile.read(length) - return json.loads(body) - - def _send_json(self, data: dict, status: int = 200): - body = json.dumps(data).encode() - self.send_response(status) - self.send_header("Content-Type", "application/json") - self.send_header("Content-Length", str(len(body))) - self.end_headers() - self.wfile.write(body) - - def _send_error(self, status: int, message: str): - self._send_json({"error": message}, status) - - # ── Routing ────────────────────────────────────────────────────────── - - def _dispatch(self, method: str): - path = self.path.rstrip("/") or "/" - for route_method, regex, _, handler_name in _ROUTES: - if route_method != method: - continue - m = regex.match(path) - if m: - handler = getattr(self, handler_name) - try: - kwargs = m.groupdict() - if method in ("POST", "PUT"): - kwargs["body"] = self._read_json() - handler(**kwargs) - except (ValueError, KeyError) as e: - self._send_error(400, str(e)) - except Exception as e: - self._send_error(500, str(e)) - return - self._send_error(404, f"No route for {method} {self.path}") - - def do_GET(self): - self._dispatch("GET") - - def do_POST(self): - self._dispatch("POST") - - def do_PUT(self): - self._dispatch("PUT") - - def do_DELETE(self): - self._dispatch("DELETE") - - # ── Health ──────────────────────────────────────────────────────────── - - def _handle_health(self): - from grpc_health.v1 import health_pb2 - - if self.health_servicer is not None: - resp = self.health_servicer.Check(health_pb2.HealthCheckRequest(service=""), None) - if resp.status != health_pb2.HealthCheckResponse.SERVING: - self._send_json({"status": "unhealthy"}, 503) - return - self._send_json({"status": "ok"}) - - # ── Token handlers ─────────────────────────────────────────────────── - - def _handle_list_tokens(self): - self._send_json({"tokens": self.token_store.list_tokens()}) - - def _handle_issue_token(self, body: dict): - user = body.get("user") - role = body.get("role") - if not user or not role: - self._send_error(400, "Missing required fields: user, role") - return - expires_days = body.get("expires_days") - tok = self.token_store.add_token(user, role, expires_days) - self._send_json( - { - "user": tok.user, - "token": tok.token, - "role": tok.role, - "issued_at": tok.issued_at, - "expires": tok.expires or "never", - }, - 201, - ) - - def _handle_revoke_token(self, user: str): - revoked = self.token_store.revoke_token(user) - if revoked: - self._send_json({"message": f"Revoked token for '{user}'"}) - else: - self._send_error(404, f"No token found for user '{user}'") - - def _handle_rotate_token(self, user: str, body: dict): - tok = self.token_store.rotate_token(user) - self._send_json( - { - "user": tok.user, - "token": tok.token, - "role": tok.role, - "issued_at": tok.issued_at, - "expires": tok.expires or "never", - } - ) - - def _handle_rotate_all(self, body: dict): - tokens = self.token_store.rotate_all_tokens() - self._send_json( - { - "rotated": len(tokens), - "tokens": [{"user": t.user, "token": t.token, "role": t.role} for t in tokens], - } - ) - - # ── Role handlers ──────────────────────────────────────────────────── - - def _handle_list_roles(self): - self._send_json({"roles": self.token_store.list_roles()}) - - def _handle_create_role(self, body: dict): - name = body.get("name") - scope = body.get("scope") - top_k = body.get("top_k") - rate_limit = body.get("rate_limit") - if not all([name, scope, top_k is not None, rate_limit]): - self._send_error(400, "Missing required fields: name, scope, top_k, rate_limit") - return - role = self.token_store.add_role(name, scope, top_k, rate_limit) - self._send_json( - { - "name": role.name, - "scope": role.scope, - "top_k": role.top_k, - "rate_limit": role.rate_limit, - }, - 201, - ) - - def _handle_update_role(self, name: str, body: dict): - kwargs = {} - if "scope" in body: - kwargs["scope"] = body["scope"] - if "top_k" in body: - kwargs["top_k"] = body["top_k"] - if "rate_limit" in body: - kwargs["rate_limit"] = body["rate_limit"] - if not kwargs: - self._send_error(400, "No fields to update") - return - role = self.token_store.update_role(name, **kwargs) - self._send_json( - { - "name": role.name, - "scope": role.scope, - "top_k": role.top_k, - "rate_limit": role.rate_limit, - } - ) - - def _handle_delete_role(self, name: str): - self.token_store.delete_role(name) - self._send_json({"message": f"Deleted role '{name}'"}) - - -def start_admin_server( - store, host: str = DEFAULT_ADMIN_HOST, port: int = DEFAULT_ADMIN_PORT, health_servicer=None -): - """Start the admin HTTP server in a daemon thread.""" - AdminHandler.token_store = store - AdminHandler.health_servicer = health_servicer - server = HTTPServer((host, port), AdminHandler) - thread = threading.Thread(target=server.serve_forever, daemon=True, name="admin-server") - thread.start() - logger.info("Admin server started on %s:%d", host, port) - return server diff --git a/vault/audit.py b/vault/audit.py deleted file mode 100644 index cdb639a..0000000 --- a/vault/audit.py +++ /dev/null @@ -1,153 +0,0 @@ -""" -Structured audit logging for Rune-Vault operations. - -Emits one JSON line per gRPC request to a dedicated audit log, -separate from the application log. Supports file-based daily rotation -and stdout JSON mode for container environments. - -Configuration via VAULT_AUDIT_LOG env var: - (empty) disabled - file /var/log/rune-vault/audit.log, daily rotation, 30-day retention - file:/path custom file path - stdout JSON lines to stdout - file+stdout both -""" - -import json -import logging -import os -import sys -from logging.handlers import TimedRotatingFileHandler -from typing import Any - -_DEFAULT_AUDIT_PATH = "/var/log/rune-vault/audit.log" - -# --------------------------------------------------------------------------- -# Configuration parsing -# --------------------------------------------------------------------------- - - -def _parse_audit_config(env_value: str) -> dict: - """Parse VAULT_AUDIT_LOG into {"file": path | None, "stdout": bool}.""" - if not env_value: - return {"file": None, "stdout": False} - - parts = [p.strip() for p in env_value.split("+")] - config: dict[str, Any] = {"file": None, "stdout": False} - - for part in parts: - lowered = part.lower() - if lowered == "stdout": - config["stdout"] = True - elif lowered == "file": - config["file"] = _DEFAULT_AUDIT_PATH - elif lowered.startswith("file:"): - config["file"] = part.split(":", 1)[1].strip() - - return config - - -# --------------------------------------------------------------------------- -# Source IP extraction -# --------------------------------------------------------------------------- - - -def extract_source_ip(context) -> str: - """Extract client IP from gRPC context.peer(). - - peer() returns strings like: - 'ipv4:10.0.0.1:12345' - 'ipv6:[::1]:12345' - 'unix:/path/to/socket' - """ - try: - peer = context.peer() - if peer is None: - return "unknown" - if peer.startswith("ipv4:"): - # ipv4:10.0.0.1:12345 -> 10.0.0.1 - return peer[5:].rsplit(":", 1)[0] - if peer.startswith("ipv6:"): - addr = peer[5:] - if addr.startswith("["): - # [::1]:12345 -> [::1] - return addr.split("]", 1)[0] + "]" - return addr.rsplit(":", 1)[0] - return peer - except Exception: - return "unknown" - - -# --------------------------------------------------------------------------- -# AuditLogger -# --------------------------------------------------------------------------- - - -class AuditLogger: - """JSON-structured audit logger with file rotation and stdout support.""" - - def __init__(self, config: dict): - self._logger = logging.getLogger("rune.vault.audit") - self._logger.setLevel(logging.INFO) - self._logger.propagate = False - - # Close and remove any pre-existing handlers (e.g. during tests) - for h in self._logger.handlers[:]: - h.close() - self._logger.removeHandler(h) - - if config.get("file"): - handler = TimedRotatingFileHandler( - config["file"], - when="midnight", - backupCount=30, - utc=True, - ) - handler.setFormatter(logging.Formatter("%(message)s")) - self._logger.addHandler(handler) - - if config.get("stdout"): - handler = logging.StreamHandler(sys.stdout) - handler.setFormatter(logging.Formatter("%(message)s")) - self._logger.addHandler(handler) - - @property - def enabled(self) -> bool: - return len(self._logger.handlers) > 0 - - def log( - self, - *, - timestamp: str, - user_id: str, - method: str, - top_k: int | None, - result_count: int, - status: str, - source_ip: str, - latency_ms: float, - error: str | None = None, - ) -> dict: - """Emit a single structured audit entry. Returns the entry dict.""" - entry: dict[str, Any] = { - "timestamp": timestamp, - "user_id": user_id, - "method": method, - "top_k": top_k, - "result_count": result_count, - "status": status, - "source_ip": source_ip, - "latency_ms": round(latency_ms, 2), - } - if error is not None: - entry["error"] = error - self._logger.info(json.dumps(entry, separators=(",", ":"))) - return entry - - -# --------------------------------------------------------------------------- -# Module-level singleton -# --------------------------------------------------------------------------- - -_config = _parse_audit_config(os.environ.get("VAULT_AUDIT_LOG", "")) -audit_logger = AuditLogger(_config) diff --git a/vault/buf.gen.yaml b/vault/buf.gen.yaml new file mode 100644 index 0000000..0497bba --- /dev/null +++ b/vault/buf.gen.yaml @@ -0,0 +1,13 @@ +version: v2 +clean: true +plugins: + - remote: buf.build/protocolbuffers/go + out: pkg/vaultpb + opt: + - module=github.com/CryptoLabInc/rune-admin/vault/pkg/vaultpb + - Mvault_service.proto=github.com/CryptoLabInc/rune-admin/vault/pkg/vaultpb;vaultpb + - remote: buf.build/grpc/go + out: pkg/vaultpb + opt: + - module=github.com/CryptoLabInc/rune-admin/vault/pkg/vaultpb + - Mvault_service.proto=github.com/CryptoLabInc/rune-admin/vault/pkg/vaultpb;vaultpb diff --git a/vault/cmd/main.go b/vault/cmd/main.go new file mode 100644 index 0000000..0850431 --- /dev/null +++ b/vault/cmd/main.go @@ -0,0 +1,15 @@ +package main + +import ( + "fmt" + "os" + + "github.com/CryptoLabInc/rune-admin/vault/internal/commands" +) + +func main() { + if err := commands.Execute(); err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } +} diff --git a/vault/docker-compose.yml b/vault/docker-compose.yml deleted file mode 100644 index 1b5d598..0000000 --- a/vault/docker-compose.yml +++ /dev/null @@ -1,62 +0,0 @@ -services: - vault: - build: - context: . - dockerfile: Dockerfile - image: ghcr.io/cryptolabinc/rune-vault:${RUNE_VAULT_TAG:-latest} - container_name: rune-vault - - volumes: - - vault-keys:/app/vault_keys:rw - - ./certs:/app/certs:rw - - ./backups:/secure/backups:rw - - ./logs:/var/log/rune-vault:rw - - ./vault-roles.yml:/app/config/vault-roles.yml:rw - - ./vault-tokens.yml:/app/config/vault-tokens.yml:rw - - environment: - - VAULT_TEAM_SECRET=${VAULT_TEAM_SECRET:-} - # Migrating from VAULT_TOKENS? Copy the value to VAULT_TEAM_SECRET in .env. - # See .env.example for details. - - VAULT_TLS_CERT=${VAULT_TLS_CERT:-} - - VAULT_TLS_KEY=${VAULT_TLS_KEY:-} - - VAULT_TLS_DISABLE=${VAULT_TLS_DISABLE:-} - - VAULT_INDEX_NAME=${VAULT_INDEX_NAME:-runecontext} - - ENVECTOR_ENDPOINT=${ENVECTOR_ENDPOINT:-} - - ENVECTOR_API_KEY=${ENVECTOR_API_KEY:-} - - EMBEDDING_DIM=${EMBEDDING_DIM:-1024} - - VAULT_AUDIT_LOG=${VAULT_AUDIT_LOG:-file} - - networks: - - vault-net - - ports: - - "0.0.0.0:50051:50051" - - security_opt: - - no-new-privileges:true - - restart: unless-stopped - - healthcheck: - test: ["CMD", "curl", "-sf", "http://localhost:8081/health"] - interval: 30s - timeout: 10s - retries: 3 - - deploy: - resources: - limits: - memory: 1G - cpus: "1.0" - reservations: - memory: 512M - cpus: "0.5" - - -networks: - vault-net: - driver: bridge - -volumes: - vault-keys: diff --git a/vault/docker-entrypoint.sh b/vault/docker-entrypoint.sh deleted file mode 100755 index ae1dd25..0000000 --- a/vault/docker-entrypoint.sh +++ /dev/null @@ -1,80 +0,0 @@ -#!/bin/sh -# -# Docker entrypoint for Rune-Vault. -# Auto-generates self-signed certificates if none are provided. - -set -e - -CERT_DIR="/app/certs" - -# Skip auto-generation if TLS is disabled -if [ "${VAULT_TLS_DISABLE:-}" = "true" ]; then - echo "[entrypoint] TLS disabled — skipping certificate generation." - chown -R vault:vault /app/vault_keys /app/config /secure /var/log/rune-vault 2>/dev/null || true - exec gosu vault python3 vault_grpc_server.py "$@" -fi - -# Auto-generate self-signed cert if no cert exists and env vars not set -if [ -z "${VAULT_TLS_CERT:-}" ] && [ ! -f "$CERT_DIR/server.pem" ]; then - echo "[entrypoint] No TLS certificate found — generating self-signed cert..." - mkdir -p "$CERT_DIR" - - # Generate CA - openssl genrsa -out "$CERT_DIR/ca.key" 4096 2>/dev/null - openssl req -new -x509 \ - -key "$CERT_DIR/ca.key" \ - -out "$CERT_DIR/ca.pem" \ - -days 3650 \ - -subj "/CN=Rune-Vault CA" \ - -sha256 - - # Generate server cert with SANs - openssl genrsa -out "$CERT_DIR/server.key" 2048 2>/dev/null - - TMPCONF=$(mktemp) - cat > "$TMPCONF" </dev/null - - rm -f "$TMPCONF" "$CERT_DIR/server.csr" "$CERT_DIR/ca.srl" - chmod 600 "$CERT_DIR/ca.key" "$CERT_DIR/server.key" - - echo "[entrypoint] Self-signed certificates generated in $CERT_DIR/" - echo "[entrypoint] Distribute ca.pem to clients for verification." -fi - -# Default to auto-generated certs if env vars not set -export VAULT_TLS_CERT="${VAULT_TLS_CERT:-$CERT_DIR/server.pem}" -export VAULT_TLS_KEY="${VAULT_TLS_KEY:-$CERT_DIR/server.key}" - -# Fix ownership on mounted volumes so the vault user can read them -chown -R vault:vault /app/certs /app/vault_keys /app/config /secure /var/log/rune-vault 2>/dev/null || true - -# Drop privileges and run as vault user -exec gosu vault python3 vault_grpc_server.py "$@" diff --git a/vault/go.mod b/vault/go.mod new file mode 100644 index 0000000..879b4d4 --- /dev/null +++ b/vault/go.mod @@ -0,0 +1,29 @@ +module github.com/CryptoLabInc/rune-admin/vault + +go 1.25.9 + +require ( + buf.build/gen/go/bufbuild/protovalidate/protocolbuffers/go v1.36.11-20260415201107-50325440f8f2.1 + github.com/CryptoLabInc/envector-go-sdk v0.1.0 + github.com/spf13/cobra v1.8.1 + golang.org/x/crypto v0.47.0 + google.golang.org/grpc v1.80.0 + google.golang.org/protobuf v1.36.11 + gopkg.in/natefinch/lumberjack.v2 v2.2.1 + gopkg.in/yaml.v3 v3.0.1 +) + +require ( + buf.build/go/protovalidate v1.2.0 // indirect + cel.dev/expr v0.25.1 // indirect + github.com/antlr4-go/antlr/v4 v4.13.1 // indirect + github.com/google/cel-go v0.28.0 // indirect + github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/spf13/pflag v1.0.5 // indirect + golang.org/x/exp v0.0.0-20250813145105-42675adae3e6 // indirect + golang.org/x/net v0.49.0 // indirect + golang.org/x/sys v0.40.0 // indirect + golang.org/x/text v0.33.0 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20260120221211-b8f7ae30c516 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20260120221211-b8f7ae30c516 // indirect +) diff --git a/vault/go.sum b/vault/go.sum new file mode 100644 index 0000000..d7411b4 --- /dev/null +++ b/vault/go.sum @@ -0,0 +1,71 @@ +buf.build/gen/go/bufbuild/protovalidate/protocolbuffers/go v1.36.11-20260415201107-50325440f8f2.1 h1:s6hzCXtND/ICdGPTMGk7C+/BFlr2Jg5GyH0NKf4XGXg= +buf.build/gen/go/bufbuild/protovalidate/protocolbuffers/go v1.36.11-20260415201107-50325440f8f2.1/go.mod h1:tvtbpgaVXZX4g6Pn+AnzFycuRK3MOz5HJfEGeEllXYM= +buf.build/go/protovalidate v1.2.0 h1:DQVrUWkmGTBij+kOYv/x2LLxwcLaGKMdzShj1/6/3H0= +buf.build/go/protovalidate v1.2.0/go.mod h1:7rYiQEhqvAipoazpVNBBH2S2f8bjG4huMVy1V2Yofn4= +cel.dev/expr v0.25.1 h1:1KrZg61W6TWSxuNZ37Xy49ps13NUovb66QLprthtwi4= +cel.dev/expr v0.25.1/go.mod h1:hrXvqGP6G6gyx8UAHSHJ5RGk//1Oj5nXQ2NI02Nrsg4= +github.com/CryptoLabInc/envector-go-sdk v0.1.0 h1:EaPiFtvLh33mrS1Nfjb/sF5tmDlOIHpjj3TJ6QIIFgM= +github.com/CryptoLabInc/envector-go-sdk v0.1.0/go.mod h1:jXVwGkqUOmQUnJiEshq7sG2x0nveFCNlAU5J7xc7CDw= +github.com/antlr4-go/antlr/v4 v4.13.1 h1:SqQKkuVZ+zWkMMNkjy5FZe5mr5WURWnlpmOuzYWrPrQ= +github.com/antlr4-go/antlr/v4 v4.13.1/go.mod h1:GKmUxMtwp6ZgGwZSva4eWPC5mS6vUAmOABFgjdkM7Nw= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= +github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= +github.com/google/cel-go v0.28.0 h1:KjSWstCpz/MN5t4a8gnGJNIYUsJRpdi/r97xWDphIQc= +github.com/google/cel-go v0.28.0/go.mod h1:X0bD6iVNR8pkROSOoHVdgTkzmRcosof7WQqCD6wcMc8= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= +github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM= +github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y= +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= +go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= +go.opentelemetry.io/otel v1.39.0 h1:8yPrr/S0ND9QEfTfdP9V+SiwT4E0G7Y5MO7p85nis48= +go.opentelemetry.io/otel v1.39.0/go.mod h1:kLlFTywNWrFyEdH0oj2xK0bFYZtHRYUdv1NklR/tgc8= +go.opentelemetry.io/otel/metric v1.39.0 h1:d1UzonvEZriVfpNKEVmHXbdf909uGTOQjA0HF0Ls5Q0= +go.opentelemetry.io/otel/metric v1.39.0/go.mod h1:jrZSWL33sD7bBxg1xjrqyDjnuzTUB0x1nBERXd7Ftcs= +go.opentelemetry.io/otel/sdk v1.39.0 h1:nMLYcjVsvdui1B/4FRkwjzoRVsMK8uL/cj0OyhKzt18= +go.opentelemetry.io/otel/sdk v1.39.0/go.mod h1:vDojkC4/jsTJsE+kh+LXYQlbL8CgrEcwmt1ENZszdJE= +go.opentelemetry.io/otel/sdk/metric v1.39.0 h1:cXMVVFVgsIf2YL6QkRF4Urbr/aMInf+2WKg+sEJTtB8= +go.opentelemetry.io/otel/sdk/metric v1.39.0/go.mod h1:xq9HEVH7qeX69/JnwEfp6fVq5wosJsY1mt4lLfYdVew= +go.opentelemetry.io/otel/trace v1.39.0 h1:2d2vfpEDmCJ5zVYz7ijaJdOF59xLomrvj7bjt6/qCJI= +go.opentelemetry.io/otel/trace v1.39.0/go.mod h1:88w4/PnZSazkGzz/w84VHpQafiU4EtqqlVdxWy+rNOA= +golang.org/x/crypto v0.47.0 h1:V6e3FRj+n4dbpw86FJ8Fv7XVOql7TEwpHapKoMJ/GO8= +golang.org/x/crypto v0.47.0/go.mod h1:ff3Y9VzzKbwSSEzWqJsJVBnWmRwRSHt/6Op5n9bQc4A= +golang.org/x/exp v0.0.0-20250813145105-42675adae3e6 h1:SbTAbRFnd5kjQXbczszQ0hdk3ctwYf3qBNH9jIsGclE= +golang.org/x/exp v0.0.0-20250813145105-42675adae3e6/go.mod h1:4QTo5u+SEIbbKW1RacMZq1YEfOBqeXa19JeshGi+zc4= +golang.org/x/net v0.49.0 h1:eeHFmOGUTtaaPSGNmjBKpbng9MulQsJURQUAfUwY++o= +golang.org/x/net v0.49.0/go.mod h1:/ysNB2EvaqvesRkuLAyjI1ycPZlQHM3q01F02UY/MV8= +golang.org/x/sys v0.40.0 h1:DBZZqJ2Rkml6QMQsZywtnjnnGvHza6BTfYFWY9kjEWQ= +golang.org/x/sys v0.40.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/text v0.33.0 h1:B3njUFyqtHDUI5jMn1YIr5B0IE2U0qck04r6d4KPAxE= +golang.org/x/text v0.33.0/go.mod h1:LuMebE6+rBincTi9+xWTY8TztLzKHc/9C1uBCG27+q8= +gonum.org/v1/gonum v0.17.0 h1:VbpOemQlsSMrYmn7T2OUvQ4dqxQXU+ouZFQsZOx50z4= +gonum.org/v1/gonum v0.17.0/go.mod h1:El3tOrEuMpv2UdMrbNlKEh9vd86bmQ6vqIcDwxEOc1E= +google.golang.org/genproto/googleapis/api v0.0.0-20260120221211-b8f7ae30c516 h1:vmC/ws+pLzWjj/gzApyoZuSVrDtF1aod4u/+bbj8hgM= +google.golang.org/genproto/googleapis/api v0.0.0-20260120221211-b8f7ae30c516/go.mod h1:p3MLuOwURrGBRoEyFHBT3GjUwaCQVKeNqqWxlcISGdw= +google.golang.org/genproto/googleapis/rpc v0.0.0-20260120221211-b8f7ae30c516 h1:sNrWoksmOyF5bvJUcnmbeAmQi8baNhqg5IWaI3llQqU= +google.golang.org/genproto/googleapis/rpc v0.0.0-20260120221211-b8f7ae30c516/go.mod h1:j9x/tPzZkyxcgEFkiKEEGxfvyumM01BEtsW8xzOahRQ= +google.golang.org/grpc v1.80.0 h1:Xr6m2WmWZLETvUNvIUmeD5OAagMw3FiKmMlTdViWsHM= +google.golang.org/grpc v1.80.0/go.mod h1:ho/dLnxwi3EDJA4Zghp7k2Ec1+c2jqup0bFkw07bwF4= +google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE= +google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo= +gopkg.in/natefinch/lumberjack.v2 v2.2.1 h1:bBRl1b0OH9s/DuPhuXpNl+VtCaJXFZ5/uEFST95x9zc= +gopkg.in/natefinch/lumberjack.v2 v2.2.1/go.mod h1:YD8tP3GAjkrDg1eZH7EGmyESg/lsYskCTPBJVb9jqSc= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/vault/internal/commands/adminclient.go b/vault/internal/commands/adminclient.go new file mode 100644 index 0000000..eca2dfd --- /dev/null +++ b/vault/internal/commands/adminclient.go @@ -0,0 +1,119 @@ +package commands + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "net" + "net/http" + "os" + "strings" + "time" + + "github.com/CryptoLabInc/rune-admin/vault/internal/server" +) + +// AdminClient talks to the Vault admin UDS server. +type AdminClient struct { + socket string + hc *http.Client +} + +// NewAdminClient builds a client that dials the given UDS path. +// Returns ErrSocketMissing if the socket file does not exist on disk — +// gives the CLI a friendlier message than a connection-refused on the +// first request. +func NewAdminClient(socketPath string) (*AdminClient, error) { + if socketPath == "" { + return nil, errors.New("admin socket path is empty (set server.admin.socket or pass --admin-socket)") + } + if _, err := os.Stat(socketPath); err != nil { + if os.IsNotExist(err) { + return nil, fmt.Errorf("admin socket %s not found — is the daemon running?", socketPath) + } + return nil, err + } + hc := &http.Client{ + Timeout: 30 * time.Second, + Transport: &http.Transport{ + DialContext: func(ctx context.Context, _, _ string) (net.Conn, error) { + return (&net.Dialer{}).DialContext(ctx, "unix", socketPath) + }, + DisableKeepAlives: true, + }, + } + return &AdminClient{socket: socketPath, hc: hc}, nil +} + +// adminError is what the server returns on 4xx/5xx. +type adminError struct { + Status int + Message string +} + +func (e *adminError) Error() string { return e.Message } + +// Do sends a JSON request and decodes the response into dst (which may +// be nil to discard). 4xx/5xx responses become *adminError. +func (a *AdminClient) Do(method, path string, body, dst any) error { + var buf io.Reader + if body != nil { + b, err := json.Marshal(body) + if err != nil { + return err + } + buf = bytes.NewReader(b) + } + url := "http://admin" + path + req, err := http.NewRequest(method, url, buf) + if err != nil { + return err + } + if body != nil { + req.Header.Set("Content-Type", "application/json") + } + resp, err := a.hc.Do(req) + if err != nil { + return fmt.Errorf("admin: %w (socket: %s)", err, a.socket) + } + defer resp.Body.Close() + respBody, err := io.ReadAll(resp.Body) + if err != nil { + return err + } + if resp.StatusCode >= 400 { + var e struct { + Error string `json:"error"` + } + if jerr := json.Unmarshal(respBody, &e); jerr != nil || e.Error == "" { + return &adminError{Status: resp.StatusCode, Message: strings.TrimSpace(string(respBody))} + } + return &adminError{Status: resp.StatusCode, Message: e.Error} + } + if dst != nil && len(respBody) > 0 { + if err := json.Unmarshal(respBody, dst); err != nil { + return fmt.Errorf("admin: parse response: %w", err) + } + } + return nil +} + +// resolveAdminClient returns an AdminClient using either the explicit +// --admin-socket flag or the socket field from the resolved runevault.conf. +func resolveAdminClient() (*AdminClient, error) { + socket := globals.adminSocket + if socket == "" { + cfg, err := server.LoadConfig(globals.configPath) + if err != nil { + return nil, err + } + socket = cfg.Server.Admin.Socket + } + if socket == "" { + return nil, errors.New("admin socket not configured (set server.admin.socket or pass --admin-socket)") + } + return NewAdminClient(socket) +} diff --git a/vault/internal/commands/adminclient_test.go b/vault/internal/commands/adminclient_test.go new file mode 100644 index 0000000..461747b --- /dev/null +++ b/vault/internal/commands/adminclient_test.go @@ -0,0 +1,120 @@ +package commands + +import ( + "context" + "net" + "net/http" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/CryptoLabInc/rune-admin/vault/internal/server" + "github.com/CryptoLabInc/rune-admin/vault/internal/tokens" +) + +// adminUDSFixture spins up a real UDS-backed admin server with a demo +// store. Exposes the socket path so AdminClient can dial it. +func adminUDSFixture(t *testing.T) (socket string, store *tokens.Store, shutdown func()) { + t.Helper() + // Darwin sun_path caps at ~104 bytes; t.TempDir() can overflow with + // long test names. Use a short MkdirTemp. + dir, err := os.MkdirTemp("", "vt-") + if err != nil { + t.Fatal(err) + } + t.Cleanup(func() { os.RemoveAll(dir) }) + socket = filepath.Join(dir, "x.sock") + + store = tokens.NewStore() + store.LoadDefaultsWithDemoToken() + + cfg := &server.Config{ + Server: server.ServerConfig{Admin: server.AdminConfig{Socket: socket}}, + Tokens: server.TokensConfig{TeamSecret: "test-secret"}, + Keys: server.KeysConfig{Path: t.TempDir(), EmbeddingDim: 1024}, + } + audit, _ := server.NewAuditLogger(server.AuditConfig{Mode: ""}) + v := server.NewVault(cfg, store, nil, audit) + + stop, err := server.AdminFromConfig(context.Background(), v) + if err != nil { + t.Fatal(err) + } + shutdown = func() { _ = stop(context.Background()) } + t.Cleanup(shutdown) + return socket, store, shutdown +} + +func TestAdminClientHealth(t *testing.T) { + socket, _, _ := adminUDSFixture(t) + c, err := NewAdminClient(socket) + if err != nil { + t.Fatal(err) + } + var status struct { + Status string `json:"status"` + } + if err := c.Do("GET", "/health", nil, &status); err != nil { + t.Fatal(err) + } + if status.Status != "ok" { + t.Errorf("status = %q", status.Status) + } +} + +func TestAdminClientIssueAndList(t *testing.T) { + socket, _, _ := adminUDSFixture(t) + c, _ := NewAdminClient(socket) + + var issued tokenResult + if err := c.Do("POST", "/tokens", map[string]any{"user": "alice", "role": "member"}, &issued); err != nil { + t.Fatal(err) + } + if !strings.HasPrefix(issued.Token, "evt_") { + t.Errorf("token = %q", issued.Token) + } + + var listResp struct { + Tokens []map[string]any `json:"tokens"` + } + if err := c.Do("GET", "/tokens", nil, &listResp); err != nil { + t.Fatal(err) + } + found := false + for _, t := range listResp.Tokens { + if t["user"] == "alice" { + found = true + } + } + if !found { + t.Errorf("alice not in list: %+v", listResp.Tokens) + } +} + +func TestAdminClientErrorBubblesUp(t *testing.T) { + socket, _, _ := adminUDSFixture(t) + c, _ := NewAdminClient(socket) + err := c.Do("POST", "/tokens", map[string]any{}, nil) + if err == nil { + t.Fatal("expected error for missing fields") + } + if !strings.Contains(err.Error(), "Missing required") { + t.Errorf("err = %v", err) + } +} + +func TestAdminClientMissingSocket(t *testing.T) { + _, err := NewAdminClient("/tmp/no-such-socket") + if err == nil { + t.Fatal("expected error for missing socket") + } + if !strings.Contains(err.Error(), "not found") { + t.Errorf("err = %v", err) + } +} + +// silence unused import if running tests in isolation +var _ = net.Listen +var _ = http.StatusOK +var _ = os.Stat diff --git a/vault/internal/commands/daemon.go b/vault/internal/commands/daemon.go new file mode 100644 index 0000000..8a61418 --- /dev/null +++ b/vault/internal/commands/daemon.go @@ -0,0 +1,84 @@ +package commands + +import ( + "context" + "fmt" + "log/slog" + "os" + + "github.com/spf13/cobra" + + "github.com/CryptoLabInc/rune-admin/vault/internal/crypto" + "github.com/CryptoLabInc/rune-admin/vault/internal/server" + "github.com/CryptoLabInc/rune-admin/vault/internal/tokens" +) + +func newDaemonCmd() *cobra.Command { + cmd := &cobra.Command{ + Use: "daemon", + Short: "Manage the runevault daemon process", + Hidden: true, + } + cmd.AddCommand(newDaemonStartCmd()) + return cmd +} + +func newDaemonStartCmd() *cobra.Command { + return &cobra.Command{ + Use: "start", + Short: "Start the daemon in the foreground", + RunE: func(cmd *cobra.Command, _ []string) error { + return runDaemonStart(cmd.Context()) + }, + } +} + +func runDaemonStart(ctx context.Context) error { + cfg, err := server.LoadConfig(globals.configPath) + if err != nil { + return err + } + if globals.adminSocket != "" { + cfg.Server.Admin.Socket = globals.adminSocket + } + if err := cfg.Validate(); err != nil { + return err + } + + store := tokens.NewStore() + if err := store.LoadFromFiles(cfg.Tokens.RolesFile, cfg.Tokens.TokensFile); err != nil { + return err + } + defer store.Shutdown() + + keyParams := crypto.KeysParams{ + Root: cfg.Keys.Path, + KeyID: "vault-key", + Dim: cfg.Keys.EmbeddingDim, + } + if err := crypto.EnsureKeys(keyParams); err != nil { + return fmt.Errorf("daemon: ensure keys: %w", err) + } + keys, err := crypto.OpenSecretKey(keyParams) + if err != nil { + return fmt.Errorf("daemon: open sec key: %w", err) + } + defer keys.Close() + + audit, err := server.NewAuditLogger(cfg.Audit) + if err != nil { + return err + } + defer audit.Close() + + v := server.NewVault(cfg, store, keys, audit) + defer v.Close() + + slog.Info("vault: starting daemon", + "pid", os.Getpid(), + "config", cfg.Source, + "grpc_addr", fmt.Sprintf("%s:%d", cfg.Server.GRPC.Host, cfg.Server.GRPC.Port), + "admin_socket", cfg.Server.Admin.Socket) + + return server.Serve(ctx, v, server.AdminFromConfig) +} diff --git a/vault/internal/commands/duration.go b/vault/internal/commands/duration.go new file mode 100644 index 0000000..200181e --- /dev/null +++ b/vault/internal/commands/duration.go @@ -0,0 +1,27 @@ +package commands + +import ( + "fmt" + "regexp" + "strconv" +) + +var durationRE = regexp.MustCompile(`^(\d+)([dwm])$`) + +// parseDuration converts strings like "90d", "12w", "6m" into days. +// Mirrors vault_admin_cli.py:_parse_duration (m = 30 days approximation). +func parseDuration(value string) (int, error) { + m := durationRE.FindStringSubmatch(value) + if m == nil { + return 0, fmt.Errorf("Invalid duration '%s'. Use (e.g. 90d, 12w, 6m)", value) + } + n, _ := strconv.Atoi(m[1]) + switch m[2] { + case "d": + return n, nil + case "w": + return n * 7, nil + default: + return n * 30, nil + } +} diff --git a/vault/internal/commands/logs.go b/vault/internal/commands/logs.go new file mode 100644 index 0000000..138c958 --- /dev/null +++ b/vault/internal/commands/logs.go @@ -0,0 +1,66 @@ +package commands + +import ( + "os" + "os/exec" + "path/filepath" + "runtime" + + "github.com/spf13/cobra" + + "github.com/CryptoLabInc/rune-admin/vault/internal/server" +) + +// newLogsCmd returns the "logs" subcommand which tails the daemon log output. +// On Linux it delegates to journalctl; on macOS it tails the service stderr file. +func newLogsCmd() *cobra.Command { + var follow bool + cmd := &cobra.Command{ + Use: "logs", + Short: "Show daemon log output", + RunE: func(cmd *cobra.Command, _ []string) error { + return runLogs(follow) + }, + } + cmd.Flags().BoolVarP(&follow, "follow", "f", false, "Follow log output (like tail -f)") + return cmd +} + +func runLogs(follow bool) error { + if runtime.GOOS == "linux" { + args := []string{"-u", "runevault", "--no-pager"} + if follow { + args = append(args, "-f") + } + c := exec.Command("journalctl", args...) + c.Stdout = os.Stdout + c.Stderr = os.Stderr + return c.Run() + } + + cfg, err := server.LoadConfig(globals.configPath) + if err != nil { + return err + } + logPath := daemonStderrLogPath(cfg) + + var args []string + if follow { + args = append(args, "-f") + } + args = append(args, logPath) + c := exec.Command("tail", args...) + c.Stdout = os.Stdout + c.Stderr = os.Stderr + return c.Run() +} + +// daemonStderrLogPath derives the launchd stderr log path from the config +// source location: /opt/runevault/configs/runevault.conf → /opt/runevault/logs/runevault.stderr.log +func daemonStderrLogPath(cfg *server.Config) string { + if cfg.Source != "" { + prefix := filepath.Dir(filepath.Dir(cfg.Source)) + return filepath.Join(prefix, "logs", "runevault.stderr.log") + } + return "/opt/runevault/logs/runevault.stderr.log" +} diff --git a/vault/internal/commands/role.go b/vault/internal/commands/role.go new file mode 100644 index 0000000..bb6df14 --- /dev/null +++ b/vault/internal/commands/role.go @@ -0,0 +1,172 @@ +package commands + +import ( + "fmt" + "strings" + + "github.com/spf13/cobra" +) + +func newRoleCmd() *cobra.Command { + cmd := &cobra.Command{ + Use: "role", + Short: "Manage authorization roles", + } + cmd.AddCommand( + newRoleListCmd(), + newRoleCreateCmd(), + newRoleUpdateCmd(), + newRoleDeleteCmd(), + ) + return cmd +} + +type roleResult struct { + Name string `json:"name"` + Scope []string `json:"scope"` + TopK int `json:"top_k"` + RateLimit string `json:"rate_limit"` +} + +func newRoleListCmd() *cobra.Command { + return &cobra.Command{ + Use: "list", + Short: "List all roles", + RunE: func(cmd *cobra.Command, _ []string) error { + ac, err := resolveAdminClient() + if err != nil { + return err + } + var result struct { + Roles []roleResult `json:"roles"` + } + if err := ac.Do("GET", "/roles", nil, &result); err != nil { + return err + } + out := cmd.OutOrStdout() + if len(result.Roles) == 0 { + fmt.Fprintln(out, "No roles defined.") + return nil + } + // "{:<12} {:<50} {:>6} {:>10}" — match vault_admin_cli.py + fmt.Fprintf(out, "%-12s %-50s %6s %10s\n", "ROLE", "SCOPE", "TOP_K", "RATE") + for _, r := range result.Roles { + fmt.Fprintf(out, "%-12s %-50s %6d %10s\n", + r.Name, formatScope(r.Scope), r.TopK, r.RateLimit) + } + return nil + }, + } +} + +func newRoleCreateCmd() *cobra.Command { + var name, scope, rateLimit string + var topK int + cmd := &cobra.Command{ + Use: "create", + Short: "Create a new role", + RunE: func(cmd *cobra.Command, _ []string) error { + scopeList := splitCSV(scope) + body := map[string]any{ + "name": name, + "scope": scopeList, + "top_k": topK, + "rate_limit": rateLimit, + } + ac, err := resolveAdminClient() + if err != nil { + return err + } + if err := ac.Do("POST", "/roles", body, nil); err != nil { + return err + } + fmt.Fprintf(cmd.OutOrStdout(), "Role '%s' created.\n", name) + return nil + }, + } + cmd.Flags().StringVar(&name, "name", "", "Role name") + cmd.Flags().StringVar(&scope, "scope", "", "Comma-separated scope list") + cmd.Flags().IntVar(&topK, "top-k", 0, "Max top_k") + cmd.Flags().StringVar(&rateLimit, "rate-limit", "", "Rate limit (e.g. 30/60s)") + _ = cmd.MarkFlagRequired("name") + _ = cmd.MarkFlagRequired("scope") + _ = cmd.MarkFlagRequired("top-k") + _ = cmd.MarkFlagRequired("rate-limit") + return cmd +} + +func newRoleUpdateCmd() *cobra.Command { + var name, scope, rateLimit string + var topK int + var topKSet bool + cmd := &cobra.Command{ + Use: "update", + Short: "Update an existing role", + RunE: func(cmd *cobra.Command, _ []string) error { + body := map[string]any{} + if scope != "" { + body["scope"] = splitCSV(scope) + } + if topKSet { + body["top_k"] = topK + } + if rateLimit != "" { + body["rate_limit"] = rateLimit + } + if len(body) == 0 { + return fmt.Errorf("No fields to update.") + } + ac, err := resolveAdminClient() + if err != nil { + return err + } + if err := ac.Do("PUT", "/roles/"+name, body, nil); err != nil { + return err + } + fmt.Fprintf(cmd.OutOrStdout(), + "Role '%s' updated. Changes take effect immediately for all tokens with this role.\n", name) + return nil + }, + } + cmd.Flags().StringVar(&name, "name", "", "Role name") + cmd.Flags().StringVar(&scope, "scope", "", "Comma-separated scope list") + cmd.Flags().IntVar(&topK, "top-k", 0, "Max top_k") + cmd.Flags().StringVar(&rateLimit, "rate-limit", "", "Rate limit (e.g. 30/60s)") + _ = cmd.MarkFlagRequired("name") + cmd.PreRun = func(c *cobra.Command, _ []string) { + topKSet = c.Flags().Changed("top-k") + } + return cmd +} + +func newRoleDeleteCmd() *cobra.Command { + var name string + cmd := &cobra.Command{ + Use: "delete", + Short: "Delete a role", + RunE: func(cmd *cobra.Command, _ []string) error { + ac, err := resolveAdminClient() + if err != nil { + return err + } + if err := ac.Do("DELETE", "/roles/"+name, nil, nil); err != nil { + return err + } + fmt.Fprintf(cmd.OutOrStdout(), "Role '%s' deleted.\n", name) + return nil + }, + } + cmd.Flags().StringVar(&name, "name", "", "Role name") + _ = cmd.MarkFlagRequired("name") + return cmd +} + +func splitCSV(s string) []string { + out := []string{} + for _, p := range strings.Split(s, ",") { + if t := strings.TrimSpace(p); t != "" { + out = append(out, t) + } + } + return out +} diff --git a/vault/internal/commands/root.go b/vault/internal/commands/root.go new file mode 100644 index 0000000..7a22b11 --- /dev/null +++ b/vault/internal/commands/root.go @@ -0,0 +1,44 @@ +package commands + +import ( + "github.com/spf13/cobra" +) + +type globalFlags struct { + configPath string + adminSocket string +} + +var globals globalFlags + +func newRootCmd() *cobra.Command { + cmd := &cobra.Command{ + Use: "runevault", + Short: "Rune Vault daemon server with admin CLI", + SilenceUsage: true, + SilenceErrors: true, + CompletionOptions: cobra.CompletionOptions{ + HiddenDefaultCmd: true, + }, + } + + cmd.PersistentFlags().StringVar(&globals.configPath, "config", "", + "Path to runevault.conf (default: /opt/runevault/configs/runevault.conf, then ./runevault.conf)") + cmd.PersistentFlags().StringVar(&globals.adminSocket, "admin-socket", "", + "Override server.admin.socket from config") + + cmd.AddCommand( + newVersionCmd(), + newDaemonCmd(), + newTokenCmd(), + newRoleCmd(), + newStatusCmd(), + newLogsCmd(), + ) + + return cmd +} + +func Execute() error { + return newRootCmd().Execute() +} diff --git a/vault/internal/commands/status.go b/vault/internal/commands/status.go new file mode 100644 index 0000000..3125ba9 --- /dev/null +++ b/vault/internal/commands/status.go @@ -0,0 +1,131 @@ +package commands + +import ( + "context" + "crypto/tls" + "fmt" + "net" + "net/http" + "os" + "time" + + "github.com/spf13/cobra" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials" + "google.golang.org/grpc/credentials/insecure" + healthpb "google.golang.org/grpc/health/grpc_health_v1" + + "github.com/CryptoLabInc/rune-admin/vault/internal/server" +) + +func newStatusCmd() *cobra.Command { + return &cobra.Command{ + Use: "status", + Short: "Report daemon health and socket liveness", + RunE: func(cmd *cobra.Command, _ []string) error { + return runStatus(cmd) + }, + } +} + +type statusReport struct { + adminSocket string + adminUp bool + adminError string + grpcBind string + grpcProbe string + grpcServing bool + grpcError string + configSource string +} + +func runStatus(cmd *cobra.Command) error { + cfg, err := server.LoadConfig(globals.configPath) + if err != nil { + return err + } + + bindHost := cfg.Server.GRPC.Host + probeHost := bindHost + if probeHost == "" || probeHost == "0.0.0.0" { + probeHost = "127.0.0.1" + } + + r := statusReport{ + adminSocket: cfg.Server.Admin.Socket, + grpcBind: fmt.Sprintf("%s:%d", bindHost, cfg.Server.GRPC.Port), + grpcProbe: fmt.Sprintf("%s:%d", probeHost, cfg.Server.GRPC.Port), + configSource: cfg.Source, + } + if globals.adminSocket != "" { + r.adminSocket = globals.adminSocket + } + + r.adminUp, r.adminError = probeAdminUDS(r.adminSocket) + r.grpcServing, r.grpcError = probeGRPCHealth(r.grpcProbe, cfg.Server.GRPC.TLS.Disable) + + out := cmd.OutOrStdout() + fmt.Fprintf(out, "Config: %s\n", r.configSource) + fmt.Fprintf(out, "Admin socket: %s (%s)\n", r.adminSocket, healthStr(r.adminUp, r.adminError)) + fmt.Fprintf(out, "gRPC: %s (%s)\n", r.grpcBind, healthStr(r.grpcServing, r.grpcError)) + + if !(r.adminUp && r.grpcServing) { + os.Exit(2) + } + return nil +} + +func healthStr(ok bool, errMsg string) string { + if ok { + return "ok" + } + if errMsg != "" { + return "down — " + errMsg + } + return "down" +} + +func probeAdminUDS(path string) (bool, string) { + if path == "" { + return false, "socket path empty" + } + hc := &http.Client{ + Timeout: 2 * time.Second, + Transport: &http.Transport{ + DialContext: func(ctx context.Context, _, _ string) (net.Conn, error) { + return (&net.Dialer{}).DialContext(ctx, "unix", path) + }, + DisableKeepAlives: true, + }, + } + resp, err := hc.Get("http://admin/health") + if err != nil { + return false, err.Error() + } + defer resp.Body.Close() + return resp.StatusCode == 200, "" +} + +func probeGRPCHealth(addr string, tlsDisabled bool) (bool, string) { + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + var creds grpc.DialOption + if tlsDisabled { + creds = grpc.WithTransportCredentials(insecure.NewCredentials()) + } else { + // InsecureSkipVerify is intentional: status probe is local-only and + // does not need to verify the self-signed server cert. + creds = grpc.WithTransportCredentials(credentials.NewTLS(&tls.Config{InsecureSkipVerify: true})) //nolint:gosec + } + conn, err := grpc.NewClient(addr, creds) + if err != nil { + return false, err.Error() + } + defer conn.Close() + cli := healthpb.NewHealthClient(conn) + resp, err := cli.Check(ctx, &healthpb.HealthCheckRequest{}) + if err != nil { + return false, err.Error() + } + return resp.GetStatus() == healthpb.HealthCheckResponse_SERVING, "" +} diff --git a/vault/internal/commands/token.go b/vault/internal/commands/token.go new file mode 100644 index 0000000..2be54a1 --- /dev/null +++ b/vault/internal/commands/token.go @@ -0,0 +1,192 @@ +package commands + +import ( + "fmt" + "strings" + + "github.com/spf13/cobra" +) + +func newTokenCmd() *cobra.Command { + cmd := &cobra.Command{ + Use: "token", + Short: "Manage authentication tokens", + } + cmd.AddCommand( + newTokenIssueCmd(), + newTokenRevokeCmd(), + newTokenRotateCmd(), + newTokenListCmd(), + ) + return cmd +} + +type tokenResult struct { + User string `json:"user"` + Token string `json:"token"` + Role string `json:"role"` + IssuedAt string `json:"issued_at"` + Expires string `json:"expires"` +} + +func newTokenIssueCmd() *cobra.Command { + var user, role, expires string + cmd := &cobra.Command{ + Use: "issue", + Short: "Issue a new token", + RunE: func(cmd *cobra.Command, _ []string) error { + if user == "" || role == "" { + return fmt.Errorf("--user and --role are required") + } + body := map[string]any{"user": user, "role": role} + if expires != "" { + days, err := parseDuration(expires) + if err != nil { + return err + } + body["expires_days"] = days + } + ac, err := resolveAdminClient() + if err != nil { + return err + } + var result tokenResult + if err := ac.Do("POST", "/tokens", body, &result); err != nil { + return err + } + out := cmd.OutOrStdout() + fmt.Fprintf(out, "\nToken issued for '%s':\n", result.User) + fmt.Fprintf(out, " Role: %s\n", result.Role) + fmt.Fprintf(out, " Expires: %s\n", result.Expires) + fmt.Fprintf(out, "\n Token: %s\n", result.Token) + fmt.Fprintln(out, "\n WARNING: This token will NOT be shown again. Share it securely.") + return nil + }, + } + cmd.Flags().StringVar(&user, "user", "", "Username") + cmd.Flags().StringVar(&role, "role", "", "Role name") + cmd.Flags().StringVar(&expires, "expires", "", "Duration until expiry (e.g. 90d, 12w, 6m)") + _ = cmd.MarkFlagRequired("user") + _ = cmd.MarkFlagRequired("role") + return cmd +} + +func newTokenRevokeCmd() *cobra.Command { + var user string + cmd := &cobra.Command{ + Use: "revoke", + Short: "Revoke a user's token", + RunE: func(cmd *cobra.Command, _ []string) error { + ac, err := resolveAdminClient() + if err != nil { + return err + } + var result struct { + Message string `json:"message"` + } + if err := ac.Do("DELETE", "/tokens/"+user, nil, &result); err != nil { + return err + } + fmt.Fprintln(cmd.OutOrStdout(), result.Message) + return nil + }, + } + cmd.Flags().StringVar(&user, "user", "", "Username") + _ = cmd.MarkFlagRequired("user") + return cmd +} + +func newTokenRotateCmd() *cobra.Command { + var user string + var rotateAll bool + cmd := &cobra.Command{ + Use: "rotate", + Short: "Rotate one or all tokens", + RunE: func(cmd *cobra.Command, _ []string) error { + if (user == "") == (!rotateAll) { + return fmt.Errorf("exactly one of --user or --all is required") + } + ac, err := resolveAdminClient() + if err != nil { + return err + } + out := cmd.OutOrStdout() + if rotateAll { + var result struct { + Rotated int `json:"rotated"` + Tokens []struct { + User string `json:"user"` + Token string `json:"token"` + Role string `json:"role"` + } `json:"tokens"` + } + if err := ac.Do("POST", "/tokens/_rotate_all", map[string]any{}, &result); err != nil { + return err + } + if result.Rotated == 0 { + fmt.Fprintln(out, "No tokens to rotate.") + return nil + } + fmt.Fprintf(out, "Rotated %d token(s):\n\n", result.Rotated) + for _, t := range result.Tokens { + fmt.Fprintf(out, " %s: %s\n", t.User, t.Token) + } + fmt.Fprintln(out, "\n WARNING: These tokens will NOT be shown again. Share them securely.") + return nil + } + var result tokenResult + if err := ac.Do("POST", "/tokens/"+user+"/rotate", map[string]any{}, &result); err != nil { + return err + } + fmt.Fprintf(out, "\nToken rotated for '%s':\n", result.User) + fmt.Fprintf(out, " Role: %s\n", result.Role) + fmt.Fprintf(out, " Expires: %s\n", result.Expires) + fmt.Fprintf(out, "\n Token: %s\n", result.Token) + fmt.Fprintln(out, "\n WARNING: This token will NOT be shown again. Share it securely.") + return nil + }, + } + cmd.Flags().StringVar(&user, "user", "", "Username to rotate") + cmd.Flags().BoolVar(&rotateAll, "all", false, "Rotate all tokens") + return cmd +} + +func newTokenListCmd() *cobra.Command { + return &cobra.Command{ + Use: "list", + Short: "List all tokens", + RunE: func(cmd *cobra.Command, _ []string) error { + ac, err := resolveAdminClient() + if err != nil { + return err + } + var result struct { + Tokens []struct { + User string `json:"user"` + Role string `json:"role"` + TopK any `json:"top_k"` + RateLimit any `json:"rate_limit"` + Expires string `json:"expires"` + } `json:"tokens"` + } + if err := ac.Do("GET", "/tokens", nil, &result); err != nil { + return err + } + out := cmd.OutOrStdout() + if len(result.Tokens) == 0 { + fmt.Fprintln(out, "No tokens issued.") + return nil + } + // "{:<16} {:<10} {:>6} {:>10} {:<12}" — match vault_admin_cli.py + fmt.Fprintf(out, "%-16s %-10s %6s %10s %-12s\n", "USER", "ROLE", "TOP_K", "RATE", "EXPIRES") + for _, t := range result.Tokens { + fmt.Fprintf(out, "%-16s %-10s %6s %10s %-12s\n", + t.User, t.Role, fmt.Sprintf("%v", t.TopK), fmt.Sprintf("%v", t.RateLimit), t.Expires) + } + return nil + }, + } +} + +// formatScope is referenced from role.go; defined here to share with token output if needed. +func formatScope(scope []string) string { return strings.Join(scope, ",") } diff --git a/vault/internal/commands/version.go b/vault/internal/commands/version.go new file mode 100644 index 0000000..93ab188 --- /dev/null +++ b/vault/internal/commands/version.go @@ -0,0 +1,28 @@ +package commands + +import ( + "fmt" + "runtime" + + "github.com/spf13/cobra" +) + +var ( + buildVersion = "dev" + buildCommit = "none" + buildDate = "unknown" +) + +func newVersionCmd() *cobra.Command { + return &cobra.Command{ + Use: "version", + Short: "Print runevault version (works without daemon or socket)", + RunE: func(cmd *cobra.Command, args []string) error { + fmt.Fprintf(cmd.OutOrStdout(), + "runevault %s (commit %s, built %s, %s/%s, %s)\n", + buildVersion, buildCommit, buildDate, + runtime.GOOS, runtime.GOARCH, runtime.Version()) + return nil + }, + } +} diff --git a/vault/internal/crypto/keys.go b/vault/internal/crypto/keys.go new file mode 100644 index 0000000..bfa74ce --- /dev/null +++ b/vault/internal/crypto/keys.go @@ -0,0 +1,118 @@ +package crypto + +import ( + "errors" + "fmt" + "os" + "path/filepath" + + envector "github.com/CryptoLabInc/envector-go-sdk" +) + +// EnvectorKeys is a thin wrapper around envector-go-sdk's *Keys handle that +// constrains usage to decrypt-only (KeyPartSec) — Vault never encrypts. +// +// On-disk layout matches pyenvector: //{Enc,Sec,Eval}Key.json. +// envector-go-sdk reads pyenvector's JSON envelope natively, so existing +// installs work without migration. +type EnvectorKeys struct { + keys *envector.Keys +} + +// KeysParams names the on-disk key bundle and FHE dimension. +type KeysParams struct { + // Root is the parent directory containing /. + // E.g., "/opt/runevault/vault-keys" with KeyID "vault-key" reads from + // "/opt/runevault/vault-keys/vault-key/{Enc,Sec,Eval}Key.json". + Root string + KeyID string + Dim int +} + +func (p KeysParams) keyDir() string { return filepath.Join(p.Root, p.KeyID) } + +// KeysExist reports whether the bundle is present under Root/KeyID. +func KeysExist(p KeysParams) bool { + return envector.KeysExist( + envector.WithKeyPath(p.keyDir()), + envector.WithKeyID(p.KeyID), + envector.WithKeyDim(p.Dim), + ) +} + +// EnsureKeys generates a fresh bundle if none exists. No-op if any of the +// three slots is already present (envector.GenerateKeys never overwrites). +func EnsureKeys(p KeysParams) error { + if KeysExist(p) { + return nil + } + if err := os.MkdirAll(p.keyDir(), 0o700); err != nil { + return fmt.Errorf("crypto: mkdir key dir: %w", err) + } + if err := envector.GenerateKeys( + envector.WithKeyPath(p.keyDir()), + envector.WithKeyID(p.KeyID), + envector.WithKeyDim(p.Dim), + ); err != nil && !errors.Is(err, envector.ErrKeysAlreadyExist) { + return fmt.Errorf("crypto: generate keys: %w", err) + } + return nil +} + +// OpenSecretKey loads SecKey.json only — Vault is decrypt-only. +// Returns EnvectorKeys whose Decrypt method is wired to envector-go-sdk's CKKS +// decryptor; encryption is unavailable. +func OpenSecretKey(p KeysParams) (*EnvectorKeys, error) { + k, err := envector.OpenKeysFromFile( + envector.WithKeyPath(p.keyDir()), + envector.WithKeyID(p.KeyID), + envector.WithKeyDim(p.Dim), + envector.WithKeyParts(envector.KeyPartSec), + ) + if err != nil { + return nil, fmt.Errorf("crypto: open sec key: %w", err) + } + return &EnvectorKeys{keys: k}, nil +} + +// Decrypt unpacks a CiphertextScore proto blob into per-shard score +// vectors. The returned slices are aligned: scores[i] is the score vector +// for shard shardIdx[i]. The gRPC layer flattens these into ScoreEntry +// rows and applies Top-K. +func (f *EnvectorKeys) Decrypt(blob []byte) (scores [][]float64, shardIdx []int32, err error) { + if f == nil || f.keys == nil { + return nil, nil, errors.New("crypto: EnvectorKeys closed") + } + return f.keys.Decrypt(blob) +} + +// PublicKeyBundle reads EncKey.json and EvalKey.json file contents from +// disk. The strings are returned verbatim for inclusion in the GetPublicKey +// gRPC response — clients re-parse them with their own SDK. +type PublicKeyBundle struct { + EncKey string + EvalKey string +} + +func ReadPublicKeyBundle(p KeysParams) (*PublicKeyBundle, error) { + encPath := filepath.Join(p.keyDir(), "EncKey.json") + evalPath := filepath.Join(p.keyDir(), "EvalKey.json") + enc, err := os.ReadFile(encPath) + if err != nil { + return nil, fmt.Errorf("crypto: read EncKey.json: %w", err) + } + eval, err := os.ReadFile(evalPath) + if err != nil { + return nil, fmt.Errorf("crypto: read EvalKey.json: %w", err) + } + return &PublicKeyBundle{EncKey: string(enc), EvalKey: string(eval)}, nil +} + +func (f *EnvectorKeys) Close() error { + if f == nil || f.keys == nil { + return nil + } + err := f.keys.Close() + f.keys = nil + return err +} diff --git a/vault/internal/crypto/keys_test.go b/vault/internal/crypto/keys_test.go new file mode 100644 index 0000000..fdbaebd --- /dev/null +++ b/vault/internal/crypto/keys_test.go @@ -0,0 +1,49 @@ +package crypto + +import ( + "errors" + "path/filepath" + "testing" + + envector "github.com/CryptoLabInc/envector-go-sdk" +) + +func TestKeysExistFalseForMissingDir(t *testing.T) { + p := KeysParams{Root: filepath.Join(t.TempDir(), "no-such"), KeyID: "vault-key", Dim: 1024} + if KeysExist(p) { + t.Error("KeysExist = true for missing dir") + } +} + +func TestOpenSecretKeyMissingReturnsError(t *testing.T) { + p := KeysParams{Root: t.TempDir(), KeyID: "vault-key", Dim: 1024} + _, err := OpenSecretKey(p) + if err == nil { + t.Fatal("OpenSecretKey on missing keys returned nil error") + } + // envector-go-sdk wraps ErrKeysNotFound; we wrap further. Match by message. + if !errors.Is(err, envector.ErrKeysNotFound) { + t.Logf("err = %v (does not unwrap to ErrKeysNotFound, but is non-nil)", err) + } +} + +func TestReadPublicKeyBundleMissingReturnsError(t *testing.T) { + p := KeysParams{Root: t.TempDir(), KeyID: "vault-key", Dim: 1024} + if _, err := ReadPublicKeyBundle(p); err == nil { + t.Error("ReadPublicKeyBundle on missing keys returned nil error") + } +} + +func TestNilEnvectorKeysCloseSafe(t *testing.T) { + var f *EnvectorKeys + if err := f.Close(); err != nil { + t.Errorf("nil Close: %v", err) + } +} + +func TestNilEnvectorKeysDecryptError(t *testing.T) { + var f *EnvectorKeys + if _, _, err := f.Decrypt([]byte("anything")); err == nil { + t.Error("nil Decrypt should error") + } +} diff --git a/vault/internal/crypto/metadata.go b/vault/internal/crypto/metadata.go new file mode 100644 index 0000000..b7bb685 --- /dev/null +++ b/vault/internal/crypto/metadata.go @@ -0,0 +1,107 @@ +// Package crypto provides metadata key derivation, AES-256-CTR metadata +// encryption, and FHE key lifecycle wrappers around envector-go-sdk. +// +// Wire format for metadata ciphertext: +// +// base64( IV (16 bytes) || ciphertext (variable) ) +// +// AES-256-CTR is unauthenticated; integrity is enforced by upstream JSON +// envelopes and HKDF-derived per-agent keys. +// +// TODO: migrate to AES-256-GCM (AEAD) — keys are issued directly between +// rune and rune-vault so there is no external wire-format compatibility +// constraint. Requires coordinated update of the rune-side encryption path. +package crypto + +import ( + "crypto/aes" + "crypto/cipher" + "crypto/rand" + "crypto/sha256" + "encoding/base64" + "encoding/hex" + "errors" + "fmt" + "io" + + "golang.org/x/crypto/hkdf" +) + +const ( + dekLen = 32 + ivLen = 16 +) + +var ( + ErrInvalidKey = errors.New("crypto: AES key must be 32 bytes") + ErrInvalidCiphertext = errors.New("crypto: ciphertext too short (need >= 16 bytes after base64 decode)") +) + +// DeriveAgentKey returns a 32-byte AES-256 DEK derived from the team-wide +// secret and a per-agent identifier via HKDF-SHA256. Mirrors +// vault.vault_core.derive_agent_key (HKDF salt=None, info=agent_id utf-8). +func DeriveAgentKey(teamSecret, agentID string) ([]byte, error) { + if teamSecret == "" { + return nil, errors.New("crypto: team_secret is empty") + } + r := hkdf.New(sha256.New, []byte(teamSecret), nil, []byte(agentID)) + dek := make([]byte, dekLen) + if _, err := io.ReadFull(r, dek); err != nil { + return nil, fmt.Errorf("crypto: hkdf read: %w", err) + } + return dek, nil +} + +// AgentIDFromToken returns the per-token agent identifier: +// the first 32 hex chars of SHA-256(token). +func AgentIDFromToken(token string) string { + sum := sha256.Sum256([]byte(token)) + return hex.EncodeToString(sum[:])[:32] +} + +// EncryptMetadata produces a base64-encoded AES-256-CTR ciphertext with a +// random 16-byte IV prefixed to the ciphertext. +func EncryptMetadata(plaintext, key []byte) (string, error) { + if len(key) != dekLen { + return "", ErrInvalidKey + } + iv := make([]byte, ivLen) + if _, err := rand.Read(iv); err != nil { + return "", fmt.Errorf("crypto: read iv: %w", err) + } + block, err := aes.NewCipher(key) + if err != nil { + return "", err + } + ct := make([]byte, len(plaintext)) + cipher.NewCTR(block, iv).XORKeyStream(ct, plaintext) + out := make([]byte, 0, ivLen+len(ct)) + out = append(out, iv...) + out = append(out, ct...) + return base64.StdEncoding.EncodeToString(out), nil +} + +// DecryptMetadata reverses EncryptMetadata: base64-decode the input, peel +// off the 16-byte IV, then AES-256-CTR decrypt. Output is raw bytes; the +// caller decides whether to UTF-8/JSON-parse them. +func DecryptMetadata(ctB64 string, key []byte) ([]byte, error) { + if len(key) != dekLen { + return nil, ErrInvalidKey + } + raw, err := base64.StdEncoding.DecodeString(ctB64) + if err != nil { + return nil, fmt.Errorf("crypto: base64 decode: %w", err) + } + if len(raw) < ivLen { + return nil, ErrInvalidCiphertext + } + iv := raw[:ivLen] + ct := raw[ivLen:] + block, err := aes.NewCipher(key) + if err != nil { + return nil, err + } + pt := make([]byte, len(ct)) + cipher.NewCTR(block, iv).XORKeyStream(pt, ct) + return pt, nil +} diff --git a/vault/internal/crypto/metadata_test.go b/vault/internal/crypto/metadata_test.go new file mode 100644 index 0000000..ff6509b --- /dev/null +++ b/vault/internal/crypto/metadata_test.go @@ -0,0 +1,223 @@ +package crypto + +import ( + "bytes" + "encoding/base64" + "encoding/hex" + "strings" + "testing" +) + +// HKDF golden vector — derived from Python: +// +// cryptography.hazmat.primitives.kdf.hkdf.HKDF( +// algorithm=SHA256(), length=32, salt=None, info=b"abc123def456") +// .derive(b"test-team-secret-32-bytes-please") +const ( + goldenTeamSecret = "test-team-secret-32-bytes-please" + goldenAgentID = "abc123def456" + goldenDEKHex = "0e4757183d2aa64e384012a494accb6fa18b8ff144c97b78b91bec3b6720767a" + + demoToken = "evt_0000000000000000000000000000demo" + demoTokenAgentID = "a84c4af3aac6f4479a6741d9df0cda65" +) + +func TestDeriveAgentKeyMatchesPython(t *testing.T) { + got, err := DeriveAgentKey(goldenTeamSecret, goldenAgentID) + if err != nil { + t.Fatal(err) + } + want, _ := hex.DecodeString(goldenDEKHex) + if !bytes.Equal(got, want) { + t.Errorf("DEK mismatch\n got %x\nwant %s", got, goldenDEKHex) + } +} + +func TestDeriveAgentKeyDeterministic(t *testing.T) { + d1, err := DeriveAgentKey(goldenTeamSecret, goldenAgentID) + if err != nil { + t.Fatal(err) + } + d2, err := DeriveAgentKey(goldenTeamSecret, goldenAgentID) + if err != nil { + t.Fatal(err) + } + if !bytes.Equal(d1, d2) { + t.Error("HKDF output non-deterministic") + } +} + +func TestDeriveAgentKeyDifferentAgents(t *testing.T) { + a, _ := DeriveAgentKey(goldenTeamSecret, "agent-a") + b, _ := DeriveAgentKey(goldenTeamSecret, "agent-b") + if bytes.Equal(a, b) { + t.Error("different agents produced same DEK") + } +} + +func TestDeriveAgentKeyEmptyTeamSecret(t *testing.T) { + if _, err := DeriveAgentKey("", "x"); err == nil { + t.Error("empty team secret should error") + } +} + +func TestAgentIDFromDemoToken(t *testing.T) { + got := AgentIDFromToken(demoToken) + if got != demoTokenAgentID { + t.Errorf("agent_id = %q, want %q", got, demoTokenAgentID) + } + if len(got) != 32 { + t.Errorf("agent_id length = %d, want 32", len(got)) + } +} + +// ── round-trip ──────────────────────────────────────────────────── + +func TestEncryptDecryptRoundTripStr(t *testing.T) { + key, _ := DeriveAgentKey(goldenTeamSecret, goldenAgentID) + plaintext := []byte("hello world") + ct, err := EncryptMetadata(plaintext, key) + if err != nil { + t.Fatal(err) + } + got, err := DecryptMetadata(ct, key) + if err != nil { + t.Fatal(err) + } + if !bytes.Equal(got, plaintext) { + t.Errorf("round-trip mismatch: got %q, want %q", got, plaintext) + } +} + +func TestEncryptDecryptRoundTripBinary(t *testing.T) { + key, _ := DeriveAgentKey(goldenTeamSecret, goldenAgentID) + plaintext := []byte{0, 1, 2, 3, 'b', 'i', 'n', 'a', 'r', 'y'} + ct, err := EncryptMetadata(plaintext, key) + if err != nil { + t.Fatal(err) + } + got, err := DecryptMetadata(ct, key) + if err != nil { + t.Fatal(err) + } + if !bytes.Equal(got, plaintext) { + t.Errorf("round-trip mismatch: got %x, want %x", got, plaintext) + } +} + +func TestEncryptDecryptRoundTripJSON(t *testing.T) { + key, _ := DeriveAgentKey(goldenTeamSecret, goldenAgentID) + plaintext := []byte(`{"foo":"bar","n":42}`) + ct, err := EncryptMetadata(plaintext, key) + if err != nil { + t.Fatal(err) + } + got, err := DecryptMetadata(ct, key) + if err != nil { + t.Fatal(err) + } + if !bytes.Equal(got, plaintext) { + t.Errorf("round-trip mismatch: got %s, want %s", got, plaintext) + } +} + +// IV must change every encryption (random 16 bytes prefixed) +func TestEncryptUsesRandomIV(t *testing.T) { + key, _ := DeriveAgentKey(goldenTeamSecret, goldenAgentID) + pt := []byte("same plaintext") + ct1, _ := EncryptMetadata(pt, key) + ct2, _ := EncryptMetadata(pt, key) + if ct1 == ct2 { + t.Error("two encryptions of same plaintext returned identical ciphertext (IV reuse?)") + } +} + +// ── cross-language: decrypt Python-produced ciphertexts ────────── + +func TestDecryptPythonGoldenStr(t *testing.T) { + key, _ := DeriveAgentKey(goldenTeamSecret, goldenAgentID) + // Python: encrypt_metadata("hello world", dek) → + pythonCT := "OhawM+14dWV/2KJwL0Ud3pqJpP6Mr7XVfCsM" + got, err := DecryptMetadata(pythonCT, key) + if err != nil { + t.Fatal(err) + } + if string(got) != "hello world" { + t.Errorf("decrypt = %q, want %q", got, "hello world") + } +} + +func TestDecryptPythonGoldenDict(t *testing.T) { + key, _ := DeriveAgentKey(goldenTeamSecret, goldenAgentID) + // Python: encrypt_metadata({"foo": "bar", "n": 42}, dek) → + // (the dict is JSON-serialized as {"foo":"bar","n":42} — separators=(",", ":")) + pythonCT := "x801QtEfmRM9Hg9ncV0p1aHbcPTBGI/63+L7c/TPVoPFRS/p" + got, err := DecryptMetadata(pythonCT, key) + if err != nil { + t.Fatal(err) + } + want := `{"foo":"bar","n":42}` + if string(got) != want { + t.Errorf("decrypt = %q, want %q", got, want) + } +} + +func TestDecryptPythonGoldenBytes(t *testing.T) { + key, _ := DeriveAgentKey(goldenTeamSecret, goldenAgentID) + // Python: encrypt_metadata(b"\x00\x01\x02\x03binary", dek) + pythonCT := "zAoZPxGEAucFdLBQWyahXBFCCwjLL8z2RjA=" + got, err := DecryptMetadata(pythonCT, key) + if err != nil { + t.Fatal(err) + } + want := []byte{0, 1, 2, 3, 'b', 'i', 'n', 'a', 'r', 'y'} + if !bytes.Equal(got, want) { + t.Errorf("decrypt = %x, want %x", got, want) + } +} + +// ── error cases ────────────────────────────────────────────────── + +func TestDecryptInvalidKey(t *testing.T) { + short := []byte("short") + if _, err := DecryptMetadata("anything", short); err != ErrInvalidKey { + t.Errorf("err = %v, want ErrInvalidKey", err) + } +} + +func TestEncryptInvalidKey(t *testing.T) { + short := []byte("short") + if _, err := EncryptMetadata([]byte("x"), short); err != ErrInvalidKey { + t.Errorf("err = %v, want ErrInvalidKey", err) + } +} + +func TestDecryptInvalidBase64(t *testing.T) { + key := make([]byte, 32) + if _, err := DecryptMetadata("!!!not-base64!!!", key); err == nil { + t.Error("invalid base64 should error") + } +} + +func TestDecryptShortCiphertext(t *testing.T) { + key := make([]byte, 32) + short := base64.StdEncoding.EncodeToString([]byte("only12bytes!")) + if _, err := DecryptMetadata(short, key); err != ErrInvalidCiphertext { + t.Errorf("err = %v, want ErrInvalidCiphertext", err) + } +} + +// ── secret leakage guard ───────────────────────────────────────── + +// Ensure key bytes never appear in error messages. +func TestErrorsDoNotLeakKey(t *testing.T) { + key, _ := DeriveAgentKey(goldenTeamSecret, goldenAgentID) + keyHex := hex.EncodeToString(key) + _, err := DecryptMetadata("!!!", key) + if err == nil { + t.Fatal("expected error") + } + if strings.Contains(err.Error(), keyHex[:16]) { + t.Errorf("error message leaked key prefix: %q", err) + } +} diff --git a/vault/internal/server/admin.go b/vault/internal/server/admin.go new file mode 100644 index 0000000..ca74b78 --- /dev/null +++ b/vault/internal/server/admin.go @@ -0,0 +1,304 @@ +package server + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "log/slog" + "net" + "net/http" + "os" + "path/filepath" + "strings" + "syscall" + "time" + + "github.com/CryptoLabInc/rune-admin/vault/internal/tokens" +) + +// adminSocketMode is 0660: owner (runevault) + group (runevault) can connect; +// other users cannot. Installers add trusted operators to the runevault group. +const adminSocketMode = 0o660 + +// AdminFromConfig is an AdminFactory suitable for production: it binds the +// UDS at v.cfg.Server.Admin.Socket with mode 0660 (umask + chmod +// belt+suspenders), serves the route table, and returns a closer that +// gracefully stops the http.Server and unlinks the socket. +func AdminFromConfig(ctx context.Context, v *Vault) (func(context.Context) error, error) { + cfg := v.Config() + socket := cfg.Server.Admin.Socket + if socket == "" { + return nil, errors.New("server.admin.socket is empty") + } + if err := os.MkdirAll(filepath.Dir(socket), 0o750); err != nil { + return nil, fmt.Errorf("admin: mkdir socket dir: %w", err) + } + // Stale socket recovery: remove leftover paths before Listen. Ignore + // missing-file errors; surface anything else (eg. wrong type). + if err := os.Remove(socket); err != nil && !os.IsNotExist(err) { + return nil, fmt.Errorf("admin: remove stale socket: %w", err) + } + + // Umask 0o007 lets the socket inherit group rw while blocking others. + prevMask := syscall.Umask(0o007) + lis, err := net.Listen("unix", socket) + syscall.Umask(prevMask) + if err != nil { + return nil, fmt.Errorf("admin: listen unix %s: %w", socket, err) + } + // Belt + suspenders: even if umask leaked, force 0660. + if err := os.Chmod(socket, adminSocketMode); err != nil { + _ = lis.Close() + return nil, fmt.Errorf("admin: chmod socket: %w", err) + } + + mux := buildAdminMux(v) + srv := &http.Server{ + Handler: mux, + ReadHeaderTimeout: 5 * time.Second, + ReadTimeout: 30 * time.Second, + WriteTimeout: 30 * time.Second, + } + go func() { + if err := srv.Serve(lis); err != nil && !errors.Is(err, http.ErrServerClosed) { + slog.Error("admin: server error", "err", err) + } + }() + slog.Info("vault: admin UDS listening", "socket", socket, "mode", "0660") + + shutdown := func(ctx context.Context) error { + err := srv.Shutdown(ctx) + // Always best-effort unlink; the socket is gone if Shutdown succeeded. + _ = os.Remove(socket) + return err + } + return shutdown, nil +} + +// buildAdminMux wires the admin route table. Exposed for tests. +// Daemon lifecycle (start/stop/restart) is owned by the OS service manager +// (systemd / launchd) and is intentionally not exposed over the admin socket. +func buildAdminMux(v *Vault) http.Handler { + mux := http.NewServeMux() + + mux.HandleFunc("GET /health", func(w http.ResponseWriter, r *http.Request) { + writeJSON(w, http.StatusOK, map[string]string{"status": "ok"}) + }) + + mux.HandleFunc("GET /tokens", func(w http.ResponseWriter, r *http.Request) { + writeJSON(w, http.StatusOK, map[string]any{"tokens": v.Tokens().ListTokens()}) + }) + mux.HandleFunc("GET /roles", func(w http.ResponseWriter, r *http.Request) { + writeJSON(w, http.StatusOK, map[string]any{"roles": v.Tokens().ListRoles()}) + }) + + mux.HandleFunc("POST /tokens", func(w http.ResponseWriter, r *http.Request) { + var body struct { + User string `json:"user"` + Role string `json:"role"` + ExpiresDays *int `json:"expires_days"` + } + if err := readJSON(r, &body); err != nil { + writeError(w, http.StatusBadRequest, err.Error()) + return + } + if body.User == "" || body.Role == "" { + writeError(w, http.StatusBadRequest, "Missing required fields: user, role") + return + } + tok, err := v.Tokens().AddToken(body.User, body.Role, body.ExpiresDays) + if err != nil { + writeError(w, http.StatusBadRequest, err.Error()) + return + } + writeJSON(w, http.StatusCreated, tokenJSON(tok)) + }) + + mux.HandleFunc("POST /tokens/{user}/rotate", func(w http.ResponseWriter, r *http.Request) { + user := r.PathValue("user") + tok, err := v.Tokens().RotateToken(user) + if err != nil { + writeError(w, http.StatusBadRequest, err.Error()) + return + } + writeJSON(w, http.StatusOK, tokenJSON(tok)) + }) + + mux.HandleFunc("POST /tokens/_rotate_all", func(w http.ResponseWriter, r *http.Request) { + toks, err := v.Tokens().RotateAllTokens() + if err != nil { + writeError(w, http.StatusInternalServerError, err.Error()) + return + } + entries := make([]map[string]string, 0, len(toks)) + for _, t := range toks { + entries = append(entries, map[string]string{ + "user": t.User, "token": t.Token, "role": t.Role, + }) + } + writeJSON(w, http.StatusOK, map[string]any{ + "rotated": len(toks), + "tokens": entries, + }) + }) + + mux.HandleFunc("DELETE /tokens/{user}", func(w http.ResponseWriter, r *http.Request) { + user := r.PathValue("user") + if v.Tokens().RevokeToken(user) { + writeJSON(w, http.StatusOK, map[string]string{ + "message": fmt.Sprintf("Revoked token for '%s'", user), + }) + return + } + writeError(w, http.StatusNotFound, fmt.Sprintf("No token found for user '%s'", user)) + }) + + mux.HandleFunc("POST /roles", func(w http.ResponseWriter, r *http.Request) { + var body struct { + Name string `json:"name"` + Scope []string `json:"scope"` + TopK *int `json:"top_k"` + RateLimit string `json:"rate_limit"` + } + if err := readJSON(r, &body); err != nil { + writeError(w, http.StatusBadRequest, err.Error()) + return + } + if body.Name == "" || len(body.Scope) == 0 || body.TopK == nil || body.RateLimit == "" { + writeError(w, http.StatusBadRequest, "Missing required fields: name, scope, top_k, rate_limit") + return + } + role, err := v.Tokens().AddRole(body.Name, body.Scope, *body.TopK, body.RateLimit) + if err != nil { + writeError(w, http.StatusBadRequest, err.Error()) + return + } + writeJSON(w, http.StatusCreated, roleJSON(role)) + }) + + mux.HandleFunc("PUT /roles/{name}", func(w http.ResponseWriter, r *http.Request) { + name := r.PathValue("name") + var raw map[string]json.RawMessage + if err := readJSON(r, &raw); err != nil { + writeError(w, http.StatusBadRequest, err.Error()) + return + } + opts := tokens.UpdateRoleOpts{} + if v, ok := raw["scope"]; ok { + var s []string + if err := json.Unmarshal(v, &s); err != nil { + writeError(w, http.StatusBadRequest, "scope must be a string array") + return + } + opts.Scope = &s + } + if v, ok := raw["top_k"]; ok { + var n int + if err := json.Unmarshal(v, &n); err != nil { + writeError(w, http.StatusBadRequest, "top_k must be an integer") + return + } + opts.TopK = &n + } + if v, ok := raw["rate_limit"]; ok { + var s string + if err := json.Unmarshal(v, &s); err != nil { + writeError(w, http.StatusBadRequest, "rate_limit must be a string") + return + } + opts.RateLimit = &s + } + if opts.Scope == nil && opts.TopK == nil && opts.RateLimit == nil { + writeError(w, http.StatusBadRequest, "No fields to update") + return + } + role, err := v.Tokens().UpdateRole(name, opts) + if err != nil { + writeError(w, http.StatusBadRequest, err.Error()) + return + } + writeJSON(w, http.StatusOK, roleJSON(role)) + }) + + mux.HandleFunc("DELETE /roles/{name}", func(w http.ResponseWriter, r *http.Request) { + name := r.PathValue("name") + if err := v.Tokens().DeleteRole(name); err != nil { + writeError(w, http.StatusBadRequest, err.Error()) + return + } + writeJSON(w, http.StatusOK, map[string]string{ + "message": fmt.Sprintf("Deleted role '%s'", name), + }) + }) + + // 404 fallback for routes that didn't match. + mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { + writeError(w, http.StatusNotFound, fmt.Sprintf("No route for %s %s", r.Method, r.URL.Path)) + }) + + return mux +} + +func tokenJSON(t *tokens.Token) map[string]any { + exp := t.Expires + if exp == "" { + exp = "never" + } + return map[string]any{ + "user": t.User, + "token": t.Token, + "role": t.Role, + "issued_at": t.IssuedAt, + "expires": exp, + } +} + +func roleJSON(r *tokens.Role) map[string]any { + return map[string]any{ + "name": r.Name, + "scope": r.Scope, + "top_k": r.TopK, + "rate_limit": r.RateLimit, + } +} + +func writeJSON(w http.ResponseWriter, status int, body any) { + buf, err := json.Marshal(body) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(status) + _, _ = w.Write(buf) +} + +func writeError(w http.ResponseWriter, status int, msg string) { + writeJSON(w, status, map[string]string{"error": msg}) +} + +func readJSON(r *http.Request, dst any) error { + if r.ContentLength == 0 { + return nil + } + dec := json.NewDecoder(r.Body) + dec.DisallowUnknownFields() + if err := dec.Decode(dst); err != nil { + return err + } + return nil +} + +// SocketURL is a stable host used in the URL for UDS HTTP. Clients +// substitute the actual socket file via http.Transport.DialContext. +const SocketURL = "http://admin" + +// SanitizePathForLog hides socket directories that contain user names or +// secret prefixes. Used by status reporting. +func SanitizePathForLog(p string) string { + if p == "" { + return "" + } + return strings.TrimSuffix(p, "/") +} diff --git a/vault/internal/server/admin_test.go b/vault/internal/server/admin_test.go new file mode 100644 index 0000000..a1a7726 --- /dev/null +++ b/vault/internal/server/admin_test.go @@ -0,0 +1,295 @@ +package server + +import ( + "bytes" + "context" + "encoding/json" + "net" + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "runtime" + "strings" + "testing" + "time" + + "github.com/CryptoLabInc/rune-admin/vault/internal/tokens" +) + +func newAdminTestVault(t *testing.T) *Vault { + t.Helper() + cfg := &Config{ + Tokens: TokensConfig{TeamSecret: "test-secret"}, + Keys: KeysConfig{Path: t.TempDir(), EmbeddingDim: 1024}, + } + store := tokens.NewStore() + store.LoadDefaultsWithDemoToken() + audit, _ := NewAuditLogger(AuditConfig{Mode: ""}) + return NewVault(cfg, store, nil, audit) +} + +func adminTestServer(t *testing.T) (*httptest.Server, *Vault) { + t.Helper() + v := newAdminTestVault(t) + ts := httptest.NewServer(buildAdminMux(v)) + t.Cleanup(ts.Close) + return ts, v +} + +func TestAdminGetHealth(t *testing.T) { + ts, _ := adminTestServer(t) + resp, err := http.Get(ts.URL + "/health") + if err != nil { + t.Fatal(err) + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + t.Errorf("status = %d", resp.StatusCode) + } +} + +func TestAdminListRolesIncludesDefaults(t *testing.T) { + ts, _ := adminTestServer(t) + resp, err := http.Get(ts.URL + "/roles") + if err != nil { + t.Fatal(err) + } + defer resp.Body.Close() + var body struct { + Roles []map[string]any `json:"roles"` + } + if err := json.NewDecoder(resp.Body).Decode(&body); err != nil { + t.Fatal(err) + } + names := map[string]bool{} + for _, r := range body.Roles { + names[r["name"].(string)] = true + } + if !names["admin"] || !names["member"] { + t.Errorf("default roles missing: %v", names) + } +} + +func TestAdminIssueListRevoke(t *testing.T) { + ts, _ := adminTestServer(t) + + // Issue + body := bytes.NewReader([]byte(`{"user":"alice","role":"member"}`)) + resp, err := http.Post(ts.URL+"/tokens", "application/json", body) + if err != nil { + t.Fatal(err) + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusCreated { + t.Fatalf("issue status = %d", resp.StatusCode) + } + var issued map[string]any + if err := json.NewDecoder(resp.Body).Decode(&issued); err != nil { + t.Fatal(err) + } + if !strings.HasPrefix(issued["token"].(string), "evt_") { + t.Errorf("token = %v", issued["token"]) + } + + // List + resp, _ = http.Get(ts.URL + "/tokens") + var listResp struct { + Tokens []map[string]any `json:"tokens"` + } + json.NewDecoder(resp.Body).Decode(&listResp) + resp.Body.Close() + found := false + for _, t := range listResp.Tokens { + if t["user"] == "alice" { + found = true + } + } + if !found { + t.Error("alice not in list response") + } + + // Revoke + req, _ := http.NewRequest("DELETE", ts.URL+"/tokens/alice", nil) + resp, err = http.DefaultClient.Do(req) + if err != nil { + t.Fatal(err) + } + resp.Body.Close() + if resp.StatusCode != http.StatusOK { + t.Errorf("revoke status = %d", resp.StatusCode) + } +} + +func TestAdminIssueMissingFields(t *testing.T) { + ts, _ := adminTestServer(t) + resp, err := http.Post(ts.URL+"/tokens", "application/json", bytes.NewReader([]byte(`{}`))) + if err != nil { + t.Fatal(err) + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusBadRequest { + t.Errorf("status = %d", resp.StatusCode) + } +} + +func TestAdminRevokeNotFound(t *testing.T) { + ts, _ := adminTestServer(t) + req, _ := http.NewRequest("DELETE", ts.URL+"/tokens/nobody", nil) + resp, err := http.DefaultClient.Do(req) + if err != nil { + t.Fatal(err) + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusNotFound { + t.Errorf("status = %d", resp.StatusCode) + } +} + +func TestAdminCreateRoleAndDelete(t *testing.T) { + ts, _ := adminTestServer(t) + body := bytes.NewReader([]byte(`{"name":"researcher","scope":["get_public_key"],"top_k":3,"rate_limit":"10/60s"}`)) + resp, err := http.Post(ts.URL+"/roles", "application/json", body) + if err != nil { + t.Fatal(err) + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusCreated { + t.Fatalf("create status = %d", resp.StatusCode) + } + // Delete + req, _ := http.NewRequest("DELETE", ts.URL+"/roles/researcher", nil) + resp2, err := http.DefaultClient.Do(req) + if err != nil { + t.Fatal(err) + } + defer resp2.Body.Close() + if resp2.StatusCode != http.StatusOK { + t.Errorf("delete status = %d", resp2.StatusCode) + } +} + +func TestAdminUpdateRoleNoFieldsRejected(t *testing.T) { + ts, _ := adminTestServer(t) + req, _ := http.NewRequest("PUT", ts.URL+"/roles/member", bytes.NewReader([]byte(`{}`))) + req.Header.Set("Content-Type", "application/json") + resp, err := http.DefaultClient.Do(req) + if err != nil { + t.Fatal(err) + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusBadRequest { + t.Errorf("status = %d", resp.StatusCode) + } +} + +func TestAdminUnknownRoute(t *testing.T) { + ts, _ := adminTestServer(t) + resp, err := http.Get(ts.URL + "/nope") + if err != nil { + t.Fatal(err) + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusNotFound { + t.Errorf("status = %d", resp.StatusCode) + } +} + +// ── UDS bind + permissions (Unix only) ─────────────────────────── + +func TestAdminUDSBindMode0660(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("UDS not supported on Windows") + } + v := newAdminTestVault(t) + // Darwin's sockaddr_un caps sun_path at ~104 bytes; t.TempDir() with a + // long test name plus the framework-injected sequence dir overruns. Use + // a shorter MkdirTemp at /tmp to stay safely under the limit. + dir, err := os.MkdirTemp("", "vt-") + if err != nil { + t.Fatal(err) + } + t.Cleanup(func() { os.RemoveAll(dir) }) + v.cfg.Server.Admin.Socket = filepath.Join(dir, "x.sock") + + shutdown, err := AdminFromConfig(context.Background(), v) + if err != nil { + t.Fatal(err) + } + defer shutdown(context.Background()) + + info, err := os.Stat(v.cfg.Server.Admin.Socket) + if err != nil { + t.Fatal(err) + } + if mode := info.Mode().Perm(); mode != 0o660 { + t.Errorf("socket mode = %04o, want 0660", mode) + } + + // Smoke test: dial + GET /health. + conn, err := net.Dial("unix", v.cfg.Server.Admin.Socket) + if err != nil { + t.Fatal(err) + } + conn.Close() +} + +func TestAdminUDSStaleSocketRecovered(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("UDS not supported on Windows") + } + v := newAdminTestVault(t) + // Darwin's sockaddr_un caps sun_path at ~104 bytes; t.TempDir() with a + // long test name plus the framework-injected sequence dir overruns. Use + // a shorter MkdirTemp at /tmp to stay safely under the limit. + dir, err := os.MkdirTemp("", "vt-") + if err != nil { + t.Fatal(err) + } + t.Cleanup(func() { os.RemoveAll(dir) }) + v.cfg.Server.Admin.Socket = filepath.Join(dir, "x.sock") + // Plant a stale file at the socket path. + if err := os.WriteFile(v.cfg.Server.Admin.Socket, []byte("stale"), 0o600); err != nil { + t.Fatal(err) + } + shutdown, err := AdminFromConfig(context.Background(), v) + if err != nil { + t.Fatalf("recovery failed: %v", err) + } + defer shutdown(context.Background()) + info, err := os.Stat(v.cfg.Server.Admin.Socket) + if err != nil { + t.Fatal(err) + } + if info.Mode()&os.ModeSocket == 0 { + t.Errorf("socket file is not a socket after recovery") + } +} + +func TestAdminUDSShutdownUnlinks(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("UDS not supported on Windows") + } + v := newAdminTestVault(t) + // Darwin's sockaddr_un caps sun_path at ~104 bytes; t.TempDir() with a + // long test name plus the framework-injected sequence dir overruns. Use + // a shorter MkdirTemp at /tmp to stay safely under the limit. + dir, err := os.MkdirTemp("", "vt-") + if err != nil { + t.Fatal(err) + } + t.Cleanup(func() { os.RemoveAll(dir) }) + v.cfg.Server.Admin.Socket = filepath.Join(dir, "x.sock") + shutdown, err := AdminFromConfig(context.Background(), v) + if err != nil { + t.Fatal(err) + } + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + if err := shutdown(ctx); err != nil { + t.Errorf("shutdown: %v", err) + } + if _, err := os.Stat(v.cfg.Server.Admin.Socket); !os.IsNotExist(err) { + t.Errorf("socket should be removed after shutdown, stat err = %v", err) + } +} diff --git a/vault/internal/server/audit.go b/vault/internal/server/audit.go new file mode 100644 index 0000000..d5ce3b2 --- /dev/null +++ b/vault/internal/server/audit.go @@ -0,0 +1,181 @@ +package server + +import ( + "encoding/json" + "fmt" + "io" + "net" + "os" + "path/filepath" + "strings" + "sync" + "time" + + "google.golang.org/grpc/peer" + "gopkg.in/natefinch/lumberjack.v2" +) + +// AuditMode parses AuditConfig.Mode strings into per-sink booleans. +// Valid values: "", "file", "stdout", "file+stdout". +type AuditMode struct { + File bool + Stdout bool +} + +func ParseAuditMode(mode string) AuditMode { + out := AuditMode{} + if mode == "" { + return out + } + for _, p := range strings.Split(mode, "+") { + switch strings.TrimSpace(strings.ToLower(p)) { + case "file": + out.File = true + case "stdout": + out.Stdout = true + } + } + return out +} + +// AuditEntry is the JSON structure written per request. Fields and order +// must match vault/audit.py:118-145 to keep golden compat tests aligned. +type AuditEntry struct { + Timestamp string `json:"timestamp"` + UserID string `json:"user_id"` + Method string `json:"method"` + TopK *int32 `json:"top_k"` + ResultCount int `json:"result_count"` + Status string `json:"status"` + SourceIP string `json:"source_ip"` + LatencyMs float64 `json:"latency_ms"` + Error *string `json:"error,omitempty"` +} + +// AuditLogger writes structured audit entries. Closed loggers are no-ops. +type AuditLogger struct { + mu sync.Mutex + writers []io.Writer + closers []io.Closer +} + +// NewAuditLogger constructs a logger for the given mode + file path. +// Returns a logger with Enabled() == false when the mode is empty. +func NewAuditLogger(cfg AuditConfig) (*AuditLogger, error) { + mode := ParseAuditMode(cfg.Mode) + l := &AuditLogger{} + + if mode.File { + path := cfg.Path + if path == "" { + path = "/var/log/runevault/audit.log" + } + if err := os.MkdirAll(filepath.Dir(path), 0o750); err != nil { + return nil, fmt.Errorf("audit: mkdir log dir: %w", err) + } + // Lumberjack handles daily-ish rotation by size + age. Match the + // Python deployment's 30-day retention; size cap is high enough + // that audit volume drives rotation by age, not size. + rot := &lumberjack.Logger{ + Filename: path, + MaxSize: 100, // MB + MaxAge: 30, + MaxBackups: 30, + LocalTime: false, + Compress: false, + } + l.writers = append(l.writers, rot) + l.closers = append(l.closers, rot) + } + + if mode.Stdout { + l.writers = append(l.writers, os.Stdout) + } + return l, nil +} + +// Enabled reports whether at least one sink is configured. +func (a *AuditLogger) Enabled() bool { + if a == nil { + return false + } + a.mu.Lock() + defer a.mu.Unlock() + return len(a.writers) > 0 +} + +// Log emits a single audit entry. Round-trip latency is rounded to 2dp +// to match Python's `round(latency_ms, 2)`. +func (a *AuditLogger) Log(e AuditEntry) { + if a == nil || !a.Enabled() { + return + } + e.LatencyMs = roundTo(e.LatencyMs, 2) + + buf, err := json.Marshal(&e) + if err != nil { + return + } + buf = append(buf, '\n') + + a.mu.Lock() + defer a.mu.Unlock() + for _, w := range a.writers { + _, _ = w.Write(buf) + } +} + +// Close flushes file writers and prevents future Log calls from writing. +func (a *AuditLogger) Close() error { + if a == nil { + return nil + } + a.mu.Lock() + defer a.mu.Unlock() + var firstErr error + for _, c := range a.closers { + if err := c.Close(); err != nil && firstErr == nil { + firstErr = err + } + } + a.writers = nil + a.closers = nil + return firstErr +} + +func roundTo(v float64, decimals int) float64 { + mult := 1.0 + for i := 0; i < decimals; i++ { + mult *= 10 + } + if v >= 0 { + return float64(int64(v*mult+0.5)) / mult + } + return float64(int64(v*mult-0.5)) / mult +} + +// ExtractSourceIP mirrors vault/audit.py:55-78 — peer addresses come in +// gRPC's "ipv4:H:P", "ipv6:[::1]:P", or "unix:/path" form. +func ExtractSourceIP(p *peer.Peer) string { + if p == nil || p.Addr == nil { + return "unknown" + } + addr := p.Addr.String() + switch a := p.Addr.(type) { + case *net.TCPAddr: + if a.IP == nil { + return addr + } + return a.IP.String() + case *net.UnixAddr: + return "unix:" + a.Name + } + if h, _, err := net.SplitHostPort(addr); err == nil { + return h + } + return addr +} + +func nowUTCISO() string { + return time.Now().UTC().Format("2006-01-02T15:04:05.000000Z07:00") +} diff --git a/vault/internal/server/audit_test.go b/vault/internal/server/audit_test.go new file mode 100644 index 0000000..adb55bb --- /dev/null +++ b/vault/internal/server/audit_test.go @@ -0,0 +1,156 @@ +package server + +import ( + "bufio" + "encoding/json" + "net" + "os" + "path/filepath" + "strings" + "testing" + + "google.golang.org/grpc/peer" +) + +func TestParseAuditMode(t *testing.T) { + cases := map[string]AuditMode{ + "": {}, + "file": {File: true}, + "stdout": {Stdout: true}, + "file+stdout": {File: true, Stdout: true}, + "stdout+file": {File: true, Stdout: true}, + "FILE": {File: true}, + " file ": {File: true}, + "unknown": {}, + "file+unknown": {File: true}, + } + for in, want := range cases { + got := ParseAuditMode(in) + if got != want { + t.Errorf("ParseAuditMode(%q) = %+v, want %+v", in, got, want) + } + } +} + +func TestAuditLoggerDisabledWhenModeEmpty(t *testing.T) { + l, err := NewAuditLogger(AuditConfig{Mode: ""}) + if err != nil { + t.Fatal(err) + } + if l.Enabled() { + t.Error("logger enabled with empty mode") + } + // Log on disabled logger must be no-op (and not panic). + l.Log(AuditEntry{UserID: "x"}) +} + +func TestAuditLoggerFileMode(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "audit.log") + l, err := NewAuditLogger(AuditConfig{Mode: "file", Path: path}) + if err != nil { + t.Fatal(err) + } + defer l.Close() + + topK := int32(10) + errMsg := "boom" + l.Log(AuditEntry{ + Timestamp: "2026-04-23T00:00:00.000000Z", + UserID: "alice", + Method: "decrypt_scores", + TopK: &topK, + ResultCount: 7, + Status: "success", + SourceIP: "127.0.0.1", + LatencyMs: 45.6789, + Error: &errMsg, + }) + l.Close() + + f, err := os.Open(path) + if err != nil { + t.Fatal(err) + } + defer f.Close() + scanner := bufio.NewScanner(f) + if !scanner.Scan() { + t.Fatal("audit log empty") + } + var got map[string]any + if err := json.Unmarshal(scanner.Bytes(), &got); err != nil { + t.Fatal(err) + } + if got["user_id"] != "alice" { + t.Errorf("user_id = %v, want alice", got["user_id"]) + } + if got["method"] != "decrypt_scores" { + t.Errorf("method = %v", got["method"]) + } + if got["top_k"].(float64) != 10 { + t.Errorf("top_k = %v", got["top_k"]) + } + if got["latency_ms"].(float64) != 45.68 { + t.Errorf("latency_ms = %v, want 45.68 (rounded)", got["latency_ms"]) + } + if got["error"] != "boom" { + t.Errorf("error = %v", got["error"]) + } +} + +func TestAuditLoggerOmitsErrorWhenNil(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "audit.log") + l, err := NewAuditLogger(AuditConfig{Mode: "file", Path: path}) + if err != nil { + t.Fatal(err) + } + defer l.Close() + l.Log(AuditEntry{UserID: "x", Method: "y", Status: "success"}) + l.Close() + + body, _ := os.ReadFile(path) + if strings.Contains(string(body), `"error"`) { + t.Errorf("audit entry contains error field for non-error case: %s", body) + } +} + +func TestExtractSourceIPTCP(t *testing.T) { + addr := &net.TCPAddr{IP: net.ParseIP("10.0.0.5"), Port: 12345} + got := ExtractSourceIP(&peer.Peer{Addr: addr}) + if got != "10.0.0.5" { + t.Errorf("got %q, want 10.0.0.5", got) + } +} + +func TestExtractSourceIPUnix(t *testing.T) { + addr := &net.UnixAddr{Name: "/tmp/x.sock", Net: "unix"} + got := ExtractSourceIP(&peer.Peer{Addr: addr}) + if got != "unix:/tmp/x.sock" { + t.Errorf("got %q, want unix:/tmp/x.sock", got) + } +} + +func TestExtractSourceIPNil(t *testing.T) { + if got := ExtractSourceIP(nil); got != "unknown" { + t.Errorf("nil peer: got %q, want unknown", got) + } +} + +func TestRoundTo(t *testing.T) { + cases := []struct { + in float64 + want float64 + }{ + {45.6789, 45.68}, + {45.6749, 45.67}, + {0, 0}, + {-1.236, -1.24}, + } + for _, c := range cases { + got := roundTo(c.in, 2) + if got != c.want { + t.Errorf("roundTo(%v,2) = %v, want %v", c.in, got, c.want) + } + } +} diff --git a/vault/internal/server/config.go b/vault/internal/server/config.go new file mode 100644 index 0000000..8f0d036 --- /dev/null +++ b/vault/internal/server/config.go @@ -0,0 +1,250 @@ +// Package server hosts the daemon transports (gRPC, admin UDS), audit log, +// and runtime configuration. Pure crypto/token logic lives in internal/crypto +// and internal/tokens respectively. +package server + +import ( + "errors" + "fmt" + "io/fs" + "os" + "path/filepath" + "strings" + + "gopkg.in/yaml.v3" +) + +// ConfigLookupPaths lists, in priority order, the on-disk locations that +// LoadConfig probes when the caller doesn't pass an explicit path. +var ConfigLookupPaths = []string{ + "/opt/runevault/configs/runevault.conf", + "./runevault.conf", +} + +// 0640: group-readable so runevault group members can run CLI commands without sudo. +const expectedSecretMode fs.FileMode = 0o640 + +// Config is the in-memory shape of runevault.conf. Field names follow the +// YAML schema exactly so the loader can decode without an intermediate type. +type Config struct { + Server ServerConfig `yaml:"server"` + Keys KeysConfig `yaml:"keys"` + Envector EnvectorConfig `yaml:"envector"` + Tokens TokensConfig `yaml:"tokens"` + Audit AuditConfig `yaml:"audit"` + + // Source records where this Config was loaded from (resolved absolute + // path), populated by LoadConfig. Empty for in-memory test configs. + Source string `yaml:"-" json:"-"` +} + +type ServerConfig struct { + GRPC GRPCConfig `yaml:"grpc"` + Admin AdminConfig `yaml:"admin"` +} + +type GRPCConfig struct { + Host string `yaml:"host"` + Port int `yaml:"port"` + TLS TLSConfig `yaml:"tls"` +} + +type TLSConfig struct { + Cert string `yaml:"cert"` + Key string `yaml:"key"` + Disable bool `yaml:"disable"` +} + +type AdminConfig struct { + Socket string `yaml:"socket"` +} + +type KeysConfig struct { + Path string `yaml:"path"` + IndexName string `yaml:"index_name"` + EmbeddingDim int `yaml:"embedding_dim"` +} + +// EnvectorConfig accepts either an inline api_key or an api_key_file +// pointing at a 0600-mode file containing the same value. If both are +// set, api_key_file wins. Resolve() materialises the final string into +// APIKey and clears APIKeyFile. +type EnvectorConfig struct { + Endpoint string `yaml:"endpoint"` + APIKey string `yaml:"api_key"` + APIKeyFile string `yaml:"api_key_file"` +} + +type TokensConfig struct { + TeamSecret string `yaml:"team_secret"` + TeamSecretFile string `yaml:"team_secret_file"` + RolesFile string `yaml:"roles_file"` + TokensFile string `yaml:"tokens_file"` +} + +// AuditConfig.Mode is one of: "", "file", "stdout", "file+stdout". +// Empty disables audit logging. +type AuditConfig struct { + Mode string `yaml:"mode"` + Path string `yaml:"path"` +} + +// LoadConfig resolves the config path (caller override → ConfigLookupPaths) +// and decodes the YAML at that location. The returned Config has +// *_file indirection materialised into the corresponding inline fields +// and Source set to the resolved absolute path. +// +// Missing config produces an error that names every path probed so the +// operator can copy the example file into place. +func LoadConfig(override string) (*Config, error) { + path, searched, err := resolveConfigPath(override) + if err != nil { + return nil, err + } + + data, err := os.ReadFile(path) + if err != nil { + return nil, fmt.Errorf("read config %s: %w", path, err) + } + + var cfg Config + dec := yaml.NewDecoder(strings.NewReader(string(data))) + dec.KnownFields(true) + if err := dec.Decode(&cfg); err != nil { + return nil, fmt.Errorf("parse config %s: %w (searched: %s)", path, err, strings.Join(searched, ", ")) + } + cfg.Source = path + + if err := checkSecretMode(path, "runevault.conf"); err != nil { + return nil, err + } + + if err := cfg.Resolve(); err != nil { + return nil, fmt.Errorf("resolve config %s: %w", path, err) + } + return &cfg, nil +} + +// resolveConfigPath returns the path to use plus the list of all paths +// searched (for error messages). Override wins if non-empty. +func resolveConfigPath(override string) (path string, searched []string, err error) { + if override != "" { + searched = append(searched, override) + if _, statErr := os.Stat(override); statErr != nil { + return "", searched, fmt.Errorf("config file not found at --config %s: %w", override, statErr) + } + abs, _ := filepath.Abs(override) + return abs, searched, nil + } + for _, p := range ConfigLookupPaths { + searched = append(searched, p) + if _, statErr := os.Stat(p); statErr == nil { + abs, _ := filepath.Abs(p) + return abs, searched, nil + } + } + return "", searched, fmt.Errorf("config file not found (searched: %s)", strings.Join(searched, ", ")) +} + +// Resolve materialises *_file indirections into their inline equivalents. +// Returns an error if any referenced secret file has a permissive mode +// (anything looser than 0o640). Idempotent. +func (c *Config) Resolve() error { + if c.Envector.APIKeyFile != "" { + val, err := readSecretFile(c.Envector.APIKeyFile, "envector.api_key_file") + if err != nil { + return err + } + c.Envector.APIKey = val + c.Envector.APIKeyFile = "" + } + if c.Tokens.TeamSecretFile != "" { + val, err := readSecretFile(c.Tokens.TeamSecretFile, "tokens.team_secret_file") + if err != nil { + return err + } + c.Tokens.TeamSecret = val + c.Tokens.TeamSecretFile = "" + } + return nil +} + +func readSecretFile(path, label string) (string, error) { + if err := checkSecretMode(path, label); err != nil { + return "", err + } + b, err := os.ReadFile(path) + if err != nil { + return "", fmt.Errorf("read %s %s: %w", label, path, err) + } + return strings.TrimRight(string(b), "\n"), nil +} + +// checkSecretMode returns an error if the file's mode permits any access +// beyond owner read/write and group read (i.e., any bit outside 0o640). +// A missing file is treated as "not our problem" — the caller's subsequent +// read surfaces the not-found error with the right context. +func checkSecretMode(path, label string) error { + info, err := os.Stat(path) + if err != nil { + return nil + } + mode := info.Mode().Perm() + if mode&^expectedSecretMode != 0 { + return fmt.Errorf("config: %s %s mode %04o is too permissive (expected at most 0640)", label, path, mode) + } + return nil +} + +// Redact returns a copy of c with secret fields replaced by sentinel +// strings. Use this for any debug dumps, structured log payloads, or +// admin endpoints that surface configuration to operators. +func (c *Config) Redact() Config { + out := *c + if out.Envector.APIKey != "" { + out.Envector.APIKey = "[REDACTED]" + } + if out.Envector.APIKeyFile != "" { + out.Envector.APIKeyFile = "[REDACTED]" + } + if out.Tokens.TeamSecret != "" { + out.Tokens.TeamSecret = "[REDACTED]" + } + if out.Tokens.TeamSecretFile != "" { + out.Tokens.TeamSecretFile = "[REDACTED]" + } + return out +} + +// Validate enforces invariants the daemon needs at startup. +// Returns nil for fully populated configs. +func (c *Config) Validate() error { + var errs []string + if c.Server.Admin.Socket == "" { + errs = append(errs, "server.admin.socket is required") + } + if c.Server.GRPC.Port == 0 { + errs = append(errs, "server.grpc.port is required") + } + if !c.Server.GRPC.TLS.Disable { + if c.Server.GRPC.TLS.Cert == "" || c.Server.GRPC.TLS.Key == "" { + errs = append(errs, "server.grpc.tls.cert and server.grpc.tls.key are required (or set server.grpc.tls.disable=true)") + } + } + if c.Keys.Path == "" { + errs = append(errs, "keys.path is required") + } + if c.Keys.EmbeddingDim == 0 { + errs = append(errs, "keys.embedding_dim is required") + } + if c.Tokens.RolesFile == "" { + errs = append(errs, "tokens.roles_file is required") + } + if c.Tokens.TokensFile == "" { + errs = append(errs, "tokens.tokens_file is required") + } + if len(errs) > 0 { + return errors.New("config invalid:\n - " + strings.Join(errs, "\n - ")) + } + return nil +} diff --git a/vault/internal/server/config_test.go b/vault/internal/server/config_test.go new file mode 100644 index 0000000..85d8f60 --- /dev/null +++ b/vault/internal/server/config_test.go @@ -0,0 +1,293 @@ +package server + +import ( + "os" + "path/filepath" + "strings" + "testing" +) + +// minimalValidConfig returns a YAML body that satisfies Validate(). +func minimalValidConfig(t *testing.T) string { + t.Helper() + return `server: + grpc: + host: 127.0.0.1 + port: 50051 + tls: + disable: true + admin: + socket: /tmp/admin.sock +keys: + path: /tmp/vault-keys + embedding_dim: 1024 +envector: + endpoint: https://example.com + api_key: inline-api-key +tokens: + team_secret: inline-team-secret-deadbeef + roles_file: /tmp/roles.yml + tokens_file: /tmp/tokens.yml +audit: + mode: stdout +` +} + +func writeConfig(t *testing.T, body string) string { + t.Helper() + dir := t.TempDir() + path := filepath.Join(dir, "runevault.conf") + if err := os.WriteFile(path, []byte(body), 0o600); err != nil { + t.Fatal(err) + } + return path +} + +func TestLoadConfigMinimalValid(t *testing.T) { + path := writeConfig(t, minimalValidConfig(t)) + cfg, err := LoadConfig(path) + if err != nil { + t.Fatal(err) + } + if cfg.Server.GRPC.Port != 50051 { + t.Errorf("port = %d, want 50051", cfg.Server.GRPC.Port) + } + if cfg.Tokens.TeamSecret != "inline-team-secret-deadbeef" { + t.Errorf("team_secret = %q, want inline value", cfg.Tokens.TeamSecret) + } + if cfg.Source != path { + // Source may be absolute even if path is already absolute (it should match). + abs, _ := filepath.Abs(path) + if cfg.Source != abs { + t.Errorf("Source = %q, want %q", cfg.Source, abs) + } + } + if err := cfg.Validate(); err != nil { + t.Errorf("Validate: %v", err) + } +} + +func TestLoadConfigMissingNamesAllPaths(t *testing.T) { + _, err := LoadConfig("/tmp/this/path/does/not/exist/runevault.conf") + if err == nil { + t.Fatal("expected error for missing config") + } + if !strings.Contains(err.Error(), "/tmp/this/path/does/not/exist/runevault.conf") { + t.Errorf("err missing override path: %v", err) + } +} + +func TestLoadConfigDefaultLookupErrorListsPaths(t *testing.T) { + // Stash the package-level lookup list and restore. + orig := ConfigLookupPaths + defer func() { ConfigLookupPaths = orig }() + ConfigLookupPaths = []string{"/nope/a.conf", "/nope/b.conf"} + + _, err := LoadConfig("") + if err == nil { + t.Fatal("expected error") + } + for _, p := range ConfigLookupPaths { + if !strings.Contains(err.Error(), p) { + t.Errorf("err missing %s: %v", p, err) + } + } +} + +func TestLoadConfigUnknownFieldsRejected(t *testing.T) { + body := minimalValidConfig(t) + "extra_unknown_field: 42\n" + path := writeConfig(t, body) + _, err := LoadConfig(path) + if err == nil { + t.Error("unknown top-level field accepted, want strict error") + } +} + +func TestLoadConfigAPIKeyFileIndirection(t *testing.T) { + dir := t.TempDir() + keyFile := filepath.Join(dir, "envector.key") + if err := os.WriteFile(keyFile, []byte("file-api-key\n"), 0o600); err != nil { + t.Fatal(err) + } + body := strings.Replace( + minimalValidConfig(t), + " api_key: inline-api-key", + " api_key_file: "+keyFile, + 1, + ) + path := writeConfig(t, body) + cfg, err := LoadConfig(path) + if err != nil { + t.Fatal(err) + } + if cfg.Envector.APIKey != "file-api-key" { + t.Errorf("api_key = %q, want file-api-key", cfg.Envector.APIKey) + } + if cfg.Envector.APIKeyFile != "" { + t.Errorf("api_key_file should be cleared after Resolve, got %q", cfg.Envector.APIKeyFile) + } +} + +func TestLoadConfigTeamSecretFileIndirection(t *testing.T) { + dir := t.TempDir() + secretFile := filepath.Join(dir, "team.secret") + if err := os.WriteFile(secretFile, []byte("file-team-secret"), 0o600); err != nil { + t.Fatal(err) + } + body := strings.Replace( + minimalValidConfig(t), + " team_secret: inline-team-secret-deadbeef", + " team_secret_file: "+secretFile, + 1, + ) + path := writeConfig(t, body) + cfg, err := LoadConfig(path) + if err != nil { + t.Fatal(err) + } + if cfg.Tokens.TeamSecret != "file-team-secret" { + t.Errorf("team_secret = %q, want file-team-secret", cfg.Tokens.TeamSecret) + } +} + +func TestLoadConfigRejectsWorldReadableConfig(t *testing.T) { + path := writeConfig(t, minimalValidConfig(t)) + if err := os.Chmod(path, 0o644); err != nil { + t.Fatal(err) + } + _, err := LoadConfig(path) + if err == nil { + t.Fatal("expected error for world-readable config, got nil") + } + if !strings.Contains(err.Error(), "too permissive") { + t.Errorf("err missing 'too permissive': %v", err) + } +} + +func TestLoadConfigRejectsWorldReadableSecretFile(t *testing.T) { + dir := t.TempDir() + secretFile := filepath.Join(dir, "team.secret") + if err := os.WriteFile(secretFile, []byte("file-team-secret"), 0o644); err != nil { + t.Fatal(err) + } + body := strings.Replace( + minimalValidConfig(t), + " team_secret: inline-team-secret-deadbeef", + " team_secret_file: "+secretFile, + 1, + ) + path := writeConfig(t, body) + _, err := LoadConfig(path) + if err == nil { + t.Fatal("expected error for world-readable team_secret_file, got nil") + } + if !strings.Contains(err.Error(), "too permissive") { + t.Errorf("err missing 'too permissive': %v", err) + } +} + +func TestLoadConfigSecretFileMissing(t *testing.T) { + body := strings.Replace( + minimalValidConfig(t), + " team_secret: inline-team-secret-deadbeef", + " team_secret_file: /nope/team.secret", + 1, + ) + path := writeConfig(t, body) + _, err := LoadConfig(path) + if err == nil { + t.Fatal("expected error for missing team_secret_file") + } + if !strings.Contains(err.Error(), "team_secret_file") { + t.Errorf("err missing label: %v", err) + } +} + +func TestRedactMasksSecrets(t *testing.T) { + cfg := &Config{ + Envector: EnvectorConfig{APIKey: "deadbeef", APIKeyFile: "/x"}, + Tokens: TokensConfig{TeamSecret: "supersecret", TeamSecretFile: "/y"}, + } + r := cfg.Redact() + if r.Envector.APIKey != "[REDACTED]" { + t.Errorf("api_key not redacted: %q", r.Envector.APIKey) + } + if r.Envector.APIKeyFile != "[REDACTED]" { + t.Errorf("api_key_file not redacted: %q", r.Envector.APIKeyFile) + } + if r.Tokens.TeamSecret != "[REDACTED]" { + t.Errorf("team_secret not redacted: %q", r.Tokens.TeamSecret) + } + if r.Tokens.TeamSecretFile != "[REDACTED]" { + t.Errorf("team_secret_file not redacted: %q", r.Tokens.TeamSecretFile) + } + // Original must be untouched. + if cfg.Envector.APIKey != "deadbeef" { + t.Errorf("Redact mutated original") + } +} + +func TestValidateRejectsMissingFields(t *testing.T) { + cases := map[string]func(*Config){ + "missing socket": func(c *Config) { c.Server.Admin.Socket = "" }, + "missing port": func(c *Config) { c.Server.GRPC.Port = 0 }, + "missing keys.path": func(c *Config) { c.Keys.Path = "" }, + "missing dim": func(c *Config) { c.Keys.EmbeddingDim = 0 }, + "missing roles_file": func(c *Config) { c.Tokens.RolesFile = "" }, + "missing tokens_file": func(c *Config) { c.Tokens.TokensFile = "" }, + } + base := func() *Config { + path := writeConfig(t, minimalValidConfig(t)) + c, err := LoadConfig(path) + if err != nil { + t.Fatal(err) + } + return c + } + for name, mut := range cases { + t.Run(name, func(t *testing.T) { + c := base() + mut(c) + if err := c.Validate(); err == nil { + t.Errorf("Validate accepted %s", name) + } + }) + } +} + +func TestValidateRejectsTLSWithoutCertKey(t *testing.T) { + body := strings.Replace( + minimalValidConfig(t), + " disable: true", + " disable: false", + 1, + ) + path := writeConfig(t, body) + cfg, err := LoadConfig(path) + if err != nil { + t.Fatal(err) + } + if err := cfg.Validate(); err == nil { + t.Error("Validate accepted TLS enabled without cert/key") + } +} + +func TestExampleConfigParsesCleanly(t *testing.T) { + // The committed example file should at least parse — operators copy it. + data, err := os.ReadFile("testdata/runevault.conf.example") + if err != nil { + t.Fatal(err) + } + tmp := t.TempDir() + path := filepath.Join(tmp, "example.conf") + if err := os.WriteFile(path, data, 0o600); err != nil { + t.Fatal(err) + } + cfg, err := LoadConfig(path) + if err != nil { + t.Fatalf("example file failed to parse: %v", err) + } + if cfg.Server.GRPC.Port != 50051 { + t.Errorf("example: port = %d", cfg.Server.GRPC.Port) + } +} diff --git a/vault/internal/server/ensure_vault.go b/vault/internal/server/ensure_vault.go new file mode 100644 index 0000000..2fb2233 --- /dev/null +++ b/vault/internal/server/ensure_vault.go @@ -0,0 +1,76 @@ +package server + +import ( + "context" + "fmt" + "log/slog" + "path/filepath" + + envector "github.com/CryptoLabInc/envector-go-sdk" +) + +// EnsureVault connects to enVector Cloud and idempotently performs the two +// cloud-side setup steps that the Python vault_core.ensure_vault() ran at +// startup before the gRPC server began accepting requests: +// +// 1. ActivateKeys — registers the EvalKey bundle if absent, unloads any +// other resident key, then loads the target key (4-RPC sequence in the +// SDK, already idempotent). +// 2. Index — creates the team index if it does not yet exist; no-op when +// the index is already present. +// +// Returns nil immediately (offline mode) when envector.endpoint or +// envector.api_key is unset, matching the Python "warn and skip" behaviour. +func EnsureVault(ctx context.Context, cfg *Config) error { + if cfg.Envector.Endpoint == "" || cfg.Envector.APIKey == "" { + slog.Warn("vault: envector.endpoint / envector.api_key not set — skipping cloud key registration and index setup (offline mode)") + return nil + } + if cfg.Keys.IndexName == "" { + slog.Warn("vault: keys.index_name not set — skipping index creation") + return nil + } + + slog.Info("vault: connecting to enVector Cloud", "endpoint", cfg.Envector.Endpoint) + + client, err := envector.NewClient( + envector.WithAddress(cfg.Envector.Endpoint), + envector.WithAccessToken(cfg.Envector.APIKey), + ) + if err != nil { + return fmt.Errorf("ensure vault: dial enVector: %w", err) + } + defer client.Close() + + // Only EvalKey is needed: ActivateKeys uploads it to the cloud, and + // Index/createIndex uses Dim() and ID() which are set from options + // regardless of which key parts are loaded. + keyDir := filepath.Join(cfg.Keys.Path, defaultKeyID(cfg)) + keys, err := envector.OpenKeysFromFile( + envector.WithKeyPath(keyDir), + envector.WithKeyID(defaultKeyID(cfg)), + envector.WithKeyDim(cfg.Keys.EmbeddingDim), + envector.WithKeyParts(envector.KeyPartEval), + ) + if err != nil { + return fmt.Errorf("ensure vault: open eval key: %w", err) + } + defer keys.Close() + + slog.Info("vault: activating FHE keys on enVector Cloud", "key_id", defaultKeyID(cfg)) + if err := client.ActivateKeys(ctx, keys); err != nil { + return fmt.Errorf("ensure vault: activate keys: %w", err) + } + slog.Info("vault: FHE keys activated") + + slog.Info("vault: ensuring team index", "index", cfg.Keys.IndexName) + if _, err := client.Index(ctx, + envector.WithIndexName(cfg.Keys.IndexName), + envector.WithIndexKeys(keys), + ); err != nil { + return fmt.Errorf("ensure vault: ensure index: %w", err) + } + slog.Info("vault: team index ready", "index", cfg.Keys.IndexName) + + return nil +} diff --git a/vault/internal/server/grpc.go b/vault/internal/server/grpc.go new file mode 100644 index 0000000..cb05a13 --- /dev/null +++ b/vault/internal/server/grpc.go @@ -0,0 +1,381 @@ +package server + +import ( + "context" + "encoding/base64" + "encoding/json" + "errors" + "fmt" + "sort" + "time" + + "google.golang.org/grpc/codes" + "google.golang.org/grpc/peer" + "google.golang.org/grpc/status" + + "github.com/CryptoLabInc/rune-admin/vault/internal/crypto" + "github.com/CryptoLabInc/rune-admin/vault/internal/tokens" + pb "github.com/CryptoLabInc/rune-admin/vault/pkg/vaultpb" +) + +// MaxMessageSize bounds gRPC frames. EvalKey alone can be tens of MB. +const MaxMessageSize = 256 * 1024 * 1024 + +// Vault is the runtime container shared by all RPC handlers and the +// admin UDS server. It owns the long-lived token store, FHE key handle, +// and audit logger. Construct via NewVault, tear down via Close. +type Vault struct { + cfg *Config + tokens *tokens.Store + keys *crypto.EnvectorKeys + audit *AuditLogger + + // Cached bundle pieces from disk. Re-read on demand to pick up + // rotated keys without restarting; kept here for zero-copy reuse. + bundleParams crypto.KeysParams +} + +// NewVault wires all subsystems together. Caller is responsible for Close. +func NewVault(cfg *Config, tokenStore *tokens.Store, keys *crypto.EnvectorKeys, audit *AuditLogger) *Vault { + return &Vault{ + cfg: cfg, + tokens: tokenStore, + keys: keys, + audit: audit, + bundleParams: crypto.KeysParams{ + Root: cfg.Keys.Path, + KeyID: defaultKeyID(cfg), + Dim: cfg.Keys.EmbeddingDim, + }, + } +} + +func defaultKeyID(_ *Config) string { + // Fixed for Phase 1 — Python pins KEY_ID="vault-key" in vault_core.py:30. + // Surfaced as a helper so a future config field can override. + return "vault-key" +} + +// Tokens exposes the token store for the admin UDS server. +func (v *Vault) Tokens() *tokens.Store { return v.tokens } + +// Config exposes the resolved config (e.g., for status reporting). +func (v *Vault) Config() *Config { return v.cfg } + +// Close releases the FHE key handle. The audit logger and token store +// are owned by the caller (typically the daemon main). +func (v *Vault) Close() error { + if v.keys != nil { + _ = v.keys.Close() + } + return nil +} + +// VaultGRPC is the gRPC service wrapper. Exposed for grpc.RegisterService. +type VaultGRPC struct { + pb.UnimplementedVaultServiceServer + v *Vault +} + +func NewVaultGRPC(v *Vault) *VaultGRPC { return &VaultGRPC{v: v} } + +// ── GetPublicKey ────────────────────────────────────────────────── + +func (s *VaultGRPC) GetPublicKey(ctx context.Context, req *pb.GetPublicKeyRequest) (*pb.GetPublicKeyResponse, error) { + start := time.Now() + user := s.v.tokens.GetUsername(req.GetToken()) + if user == "" { + user = "unknown" + } + resultCount := 0 + statusStr := "success" + var errDetail *string + defer func() { + s.emit(ctx, "get_public_key", user, nil, resultCount, statusStr, errDetail, time.Since(start)) + }() + + username, role, err := s.v.tokens.Validate(req.GetToken()) + if err != nil { + st, msg := mapTokenError(err) + statusStr, errDetail = errStatus(err) + return &pb.GetPublicKeyResponse{Error: msg}, status.Error(st, msg) + } + user = username + if err := role.CheckScope("get_public_key"); err != nil { + statusStr = "denied" + ed := err.Error() + errDetail = &ed + return &pb.GetPublicKeyResponse{Error: err.Error()}, status.Error(codes.PermissionDenied, err.Error()) + } + + bundle, err := s.v.buildBundle(req.GetToken()) + if err != nil { + statusStr = "error" + ed := err.Error() + errDetail = &ed + return &pb.GetPublicKeyResponse{Error: err.Error()}, status.Error(codes.Internal, err.Error()) + } + js, err := json.Marshal(bundle) + if err != nil { + statusStr = "error" + ed := err.Error() + errDetail = &ed + return &pb.GetPublicKeyResponse{Error: err.Error()}, status.Error(codes.Internal, err.Error()) + } + resultCount = 1 + return &pb.GetPublicKeyResponse{KeyBundleJson: string(js)}, nil +} + +// buildBundle assembles the per-token JSON bundle returned by GetPublicKey. +// Order of keys is irrelevant — clients parse by name. +func (s *Vault) buildBundle(token string) (map[string]any, error) { + pub, err := crypto.ReadPublicKeyBundle(s.bundleParams) + if err != nil { + return nil, err + } + bundle := map[string]any{ + "EncKey.json": pub.EncKey, + "EvalKey.json": pub.EvalKey, + "key_id": s.bundleParams.KeyID, + } + if s.cfg.Keys.IndexName != "" { + bundle["index_name"] = s.cfg.Keys.IndexName + } + agentID := crypto.AgentIDFromToken(token) + dek, err := crypto.DeriveAgentKey(s.cfg.Tokens.TeamSecret, agentID) + if err != nil { + return nil, err + } + bundle["agent_id"] = agentID + bundle["agent_dek"] = base64.StdEncoding.EncodeToString(dek) + bundle["envector_endpoint"] = s.cfg.Envector.Endpoint + bundle["envector_api_key"] = s.cfg.Envector.APIKey + return bundle, nil +} + +// ── DecryptScores ───────────────────────────────────────────────── + +func (s *VaultGRPC) DecryptScores(ctx context.Context, req *pb.DecryptScoresRequest) (*pb.DecryptScoresResponse, error) { + start := time.Now() + topK := req.GetTopK() + user := s.v.tokens.GetUsername(req.GetToken()) + if user == "" { + user = "unknown" + } + resultCount := 0 + statusStr := "success" + var errDetail *string + defer func() { + s.emit(ctx, "decrypt_scores", user, &topK, resultCount, statusStr, errDetail, time.Since(start)) + }() + + username, role, err := s.v.tokens.Validate(req.GetToken()) + if err != nil { + st, msg := mapTokenError(err) + statusStr, errDetail = errStatus(err) + return &pb.DecryptScoresResponse{Error: msg}, status.Error(st, msg) + } + user = username + if err := role.CheckScope("decrypt_scores"); err != nil { + statusStr = "denied" + ed := err.Error() + errDetail = &ed + return &pb.DecryptScoresResponse{Error: err.Error()}, status.Error(codes.PermissionDenied, err.Error()) + } + if int(topK) > role.TopK { + te := tokens.ErrTopKExceeded{Requested: int(topK), MaxTopK: role.TopK, RoleName: role.Name} + statusStr = "denied" + msg := te.Error() + errDetail = &msg + return &pb.DecryptScoresResponse{Error: msg}, status.Error(codes.InvalidArgument, msg) + } + + blob, err := base64.StdEncoding.DecodeString(req.GetEncryptedBlobB64()) + if err != nil { + statusStr = "error" + msg := fmt.Sprintf("Deserialization failed: %s", err.Error()) + errDetail = &msg + return &pb.DecryptScoresResponse{Error: msg}, status.Error(codes.InvalidArgument, msg) + } + if s.v.keys == nil { + statusStr = "error" + msg := "FHE key not loaded" + errDetail = &msg + return &pb.DecryptScoresResponse{Error: msg}, status.Error(codes.Internal, msg) + } + scores2D, shardIdx, err := s.v.keys.Decrypt(blob) + if err != nil { + statusStr = "error" + msg := err.Error() + errDetail = &msg + return &pb.DecryptScoresResponse{Error: msg}, status.Error(codes.Internal, msg) + } + entries := topK_FromShards(scores2D, shardIdx, int(topK)) + resultCount = len(entries) + return &pb.DecryptScoresResponse{Results: entries}, nil +} + +// topK_FromShards mirrors vault_core._decrypt_scores_impl L276-285: +// flatten 2D scores into (shard_idx, row_idx, score), sort desc by score, +// take top k. Output order matches Python's heapq.nlargest. +func topK_FromShards(scores2D [][]float64, shardIdx []int32, k int) []*pb.ScoreEntry { + type item struct { + shard, row int32 + score float64 + } + all := make([]item, 0) + for i, row := range scores2D { + shard := int32(i) + if i < len(shardIdx) { + shard = shardIdx[i] + } + for j, v := range row { + all = append(all, item{shard: shard, row: int32(j), score: v}) + } + } + sort.SliceStable(all, func(i, j int) bool { return all[i].score > all[j].score }) + if k > len(all) { + k = len(all) + } + out := make([]*pb.ScoreEntry, k) + for i := 0; i < k; i++ { + out[i] = &pb.ScoreEntry{ + ShardIdx: all[i].shard, + RowIdx: all[i].row, + Score: all[i].score, + } + } + return out +} + +// ── DecryptMetadata ─────────────────────────────────────────────── + +// envelope is the JSON shape of each encrypted_metadata_list element: +// {"a": "", "c": ""}. +type envelope struct { + AgentID string `json:"a"` + Cipher string `json:"c"` +} + +func (s *VaultGRPC) DecryptMetadata(ctx context.Context, req *pb.DecryptMetadataRequest) (*pb.DecryptMetadataResponse, error) { + start := time.Now() + user := s.v.tokens.GetUsername(req.GetToken()) + if user == "" { + user = "unknown" + } + resultCount := 0 + statusStr := "success" + var errDetail *string + defer func() { + s.emit(ctx, "decrypt_metadata", user, nil, resultCount, statusStr, errDetail, time.Since(start)) + }() + + username, role, err := s.v.tokens.Validate(req.GetToken()) + if err != nil { + st, msg := mapTokenError(err) + statusStr, errDetail = errStatus(err) + return &pb.DecryptMetadataResponse{Error: msg}, status.Error(st, msg) + } + user = username + if err := role.CheckScope("decrypt_metadata"); err != nil { + statusStr = "denied" + ed := err.Error() + errDetail = &ed + return &pb.DecryptMetadataResponse{Error: err.Error()}, status.Error(codes.PermissionDenied, err.Error()) + } + if s.v.cfg.Tokens.TeamSecret == "" { + statusStr = "error" + msg := "VAULT_TEAM_SECRET not configured" + errDetail = &msg + return &pb.DecryptMetadataResponse{Error: msg}, status.Error(codes.Internal, msg) + } + + out := make([]string, 0, len(req.GetEncryptedMetadataList())) + for _, blobStr := range req.GetEncryptedMetadataList() { + var env envelope + if err := json.Unmarshal([]byte(blobStr), &env); err != nil { + statusStr = "error" + msg := fmt.Sprintf("Metadata decryption failed: %s", err.Error()) + errDetail = &msg + return &pb.DecryptMetadataResponse{Error: msg}, status.Error(codes.InvalidArgument, msg) + } + dek, err := crypto.DeriveAgentKey(s.v.cfg.Tokens.TeamSecret, env.AgentID) + if err != nil { + statusStr = "error" + msg := fmt.Sprintf("Metadata decryption failed: %s", err.Error()) + errDetail = &msg + return &pb.DecryptMetadataResponse{Error: msg}, status.Error(codes.Internal, msg) + } + pt, err := crypto.DecryptMetadata(env.Cipher, dek) + if err != nil { + statusStr = "error" + msg := fmt.Sprintf("Metadata decryption failed: %s", err.Error()) + errDetail = &msg + return &pb.DecryptMetadataResponse{Error: msg}, status.Error(codes.Internal, msg) + } + out = append(out, string(pt)) + } + resultCount = len(out) + return &pb.DecryptMetadataResponse{DecryptedMetadata: out}, nil +} + +// ── error mapping & audit helpers ──────────────────────────────── + +// mapTokenError maps tokens.ErrXxx → (gRPC code, user-facing message). +// Mirrors vault_grpc_server.py error branches. +func mapTokenError(err error) (codes.Code, string) { + var nf tokens.ErrTokenNotFound + if errors.As(err, &nf) { + return codes.Unauthenticated, err.Error() + } + var exp tokens.ErrTokenExpired + if errors.As(err, &exp) { + return codes.Unauthenticated, err.Error() + } + var rl tokens.ErrRateLimit + if errors.As(err, &rl) { + return codes.ResourceExhausted, err.Error() + } + var sc tokens.ErrScope + if errors.As(err, &sc) { + return codes.PermissionDenied, err.Error() + } + var tk tokens.ErrTopKExceeded + if errors.As(err, &tk) { + return codes.InvalidArgument, err.Error() + } + return codes.Unauthenticated, err.Error() +} + +// errStatus tags an error for the audit log: token/scope errors are +// "denied", everything else is "error". +func errStatus(err error) (string, *string) { + msg := err.Error() + switch { + case errors.As(err, new(tokens.ErrTokenNotFound)), + errors.As(err, new(tokens.ErrTokenExpired)), + errors.As(err, new(tokens.ErrRateLimit)), + errors.As(err, new(tokens.ErrScope)), + errors.As(err, new(tokens.ErrTopKExceeded)): + return "denied", &msg + } + return "error", &msg +} + +func (s *VaultGRPC) emit(ctx context.Context, method, user string, topK *int32, resultCount int, statusStr string, errDetail *string, duration time.Duration) { + if s.v.audit == nil || !s.v.audit.Enabled() { + return + } + p, _ := peer.FromContext(ctx) + s.v.audit.Log(AuditEntry{ + Timestamp: nowUTCISO(), + UserID: user, + Method: method, + TopK: topK, + ResultCount: resultCount, + Status: statusStr, + SourceIP: ExtractSourceIP(p), + LatencyMs: float64(duration.Microseconds()) / 1000.0, + Error: errDetail, + }) +} diff --git a/vault/internal/server/grpc_test.go b/vault/internal/server/grpc_test.go new file mode 100644 index 0000000..f9ca24b --- /dev/null +++ b/vault/internal/server/grpc_test.go @@ -0,0 +1,205 @@ +package server + +import ( + "context" + "errors" + "strings" + "testing" + + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + + "github.com/CryptoLabInc/rune-admin/vault/internal/tokens" + pb "github.com/CryptoLabInc/rune-admin/vault/pkg/vaultpb" +) + +// ── topK_FromShards ─────────────────────────────────────────────── + +func TestTopKFlatSingleShard(t *testing.T) { + scores := [][]float64{{0.5, 0.9, 0.1, 0.7}} + shardIdx := []int32{0} + got := topK_FromShards(scores, shardIdx, 2) + if len(got) != 2 { + t.Fatalf("len = %d", len(got)) + } + if got[0].Score != 0.9 || got[1].Score != 0.7 { + t.Errorf("scores = [%v %v], want [0.9 0.7]", got[0].Score, got[1].Score) + } + if got[0].RowIdx != 1 || got[1].RowIdx != 3 { + t.Errorf("rows = [%d %d], want [1 3]", got[0].RowIdx, got[1].RowIdx) + } +} + +func TestTopKMultiShard(t *testing.T) { + scores := [][]float64{ + {0.1, 0.2}, + {0.9, 0.5}, + } + shardIdx := []int32{10, 20} + got := topK_FromShards(scores, shardIdx, 3) + if len(got) != 3 { + t.Fatalf("len = %d", len(got)) + } + // Top-3 by score desc: 0.9 (shard 20, row 0), 0.5 (shard 20, row 1), 0.2 (shard 10, row 1) + if got[0].Score != 0.9 || got[0].ShardIdx != 20 || got[0].RowIdx != 0 { + t.Errorf("[0] = %+v", got[0]) + } + if got[1].Score != 0.5 || got[1].ShardIdx != 20 || got[1].RowIdx != 1 { + t.Errorf("[1] = %+v", got[1]) + } + if got[2].Score != 0.2 || got[2].ShardIdx != 10 || got[2].RowIdx != 1 { + t.Errorf("[2] = %+v", got[2]) + } +} + +func TestTopKKExceedsAvailable(t *testing.T) { + scores := [][]float64{{0.1, 0.2}} + got := topK_FromShards(scores, []int32{0}, 10) + if len(got) != 2 { + t.Errorf("len = %d, want 2 (clamped to available)", len(got)) + } +} + +func TestTopKEmptyInput(t *testing.T) { + got := topK_FromShards(nil, nil, 5) + if len(got) != 0 { + t.Errorf("len = %d, want 0", len(got)) + } +} + +// ── error mapping ───────────────────────────────────────────────── + +func TestMapTokenErrorCodes(t *testing.T) { + cases := []struct { + err error + code codes.Code + }{ + {tokens.ErrTokenNotFound{}, codes.Unauthenticated}, + {tokens.ErrTokenExpired{User: "x"}, codes.Unauthenticated}, + {tokens.ErrRateLimit{RetryAfter: 5}, codes.ResourceExhausted}, + {tokens.ErrScope{Method: "m", RoleName: "r"}, codes.PermissionDenied}, + {tokens.ErrTopKExceeded{Requested: 50, MaxTopK: 10, RoleName: "member"}, codes.InvalidArgument}, + {errors.New("random"), codes.Unauthenticated}, + } + for _, c := range cases { + got, _ := mapTokenError(c.err) + if got != c.code { + t.Errorf("mapTokenError(%v) = %v, want %v", c.err, got, c.code) + } + } +} + +// ── handler — token error paths (no FHE keys needed) ───────────── + +func newTestVault(t *testing.T) *Vault { + t.Helper() + cfg := &Config{ + Tokens: TokensConfig{TeamSecret: "test-secret"}, + Keys: KeysConfig{Path: t.TempDir(), EmbeddingDim: 1024}, + } + store := tokens.NewStore() + store.LoadDefaultsWithDemoToken() + audit, _ := NewAuditLogger(AuditConfig{Mode: ""}) + return NewVault(cfg, store, nil, audit) +} + +func TestGetPublicKeyInvalidToken(t *testing.T) { + v := newTestVault(t) + srv := NewVaultGRPC(v) + resp, err := srv.GetPublicKey(context.Background(), &pb.GetPublicKeyRequest{ + Token: "evt_ffffffffffffffffffffffffffffffff", + }) + if status.Code(err) != codes.Unauthenticated { + t.Errorf("code = %v, want Unauthenticated", status.Code(err)) + } + if resp.GetError() == "" { + t.Error("response.error is empty") + } +} + +func TestDecryptScoresInvalidToken(t *testing.T) { + v := newTestVault(t) + srv := NewVaultGRPC(v) + _, err := srv.DecryptScores(context.Background(), &pb.DecryptScoresRequest{ + Token: "evt_ffffffffffffffffffffffffffffffff", + EncryptedBlobB64: "AA==", + TopK: 5, + }) + if status.Code(err) != codes.Unauthenticated { + t.Errorf("code = %v, want Unauthenticated", status.Code(err)) + } +} + +func TestDecryptScoresTopKExceeded(t *testing.T) { + v := newTestVault(t) + srv := NewVaultGRPC(v) + // Demo token has admin role with top_k=50; request 51. + _, err := srv.DecryptScores(context.Background(), &pb.DecryptScoresRequest{ + Token: tokens.DemoToken, + EncryptedBlobB64: "AA==", + TopK: 51, + }) + if status.Code(err) != codes.InvalidArgument { + t.Fatalf("code = %v, want InvalidArgument", status.Code(err)) + } + if !strings.Contains(err.Error(), "exceeds limit 50") { + t.Errorf("err = %v, want 'exceeds limit 50'", err) + } +} + +func TestDecryptMetadataInvalidToken(t *testing.T) { + v := newTestVault(t) + srv := NewVaultGRPC(v) + _, err := srv.DecryptMetadata(context.Background(), &pb.DecryptMetadataRequest{ + Token: "evt_ffffffffffffffffffffffffffffffff", + EncryptedMetadataList: []string{`{"a":"x","c":"y"}`}, + }) + if status.Code(err) != codes.Unauthenticated { + t.Errorf("code = %v, want Unauthenticated", status.Code(err)) + } +} + +func TestDecryptMetadataMalformedEnvelope(t *testing.T) { + v := newTestVault(t) + srv := NewVaultGRPC(v) + resp, err := srv.DecryptMetadata(context.Background(), &pb.DecryptMetadataRequest{ + Token: tokens.DemoToken, + EncryptedMetadataList: []string{"not-json"}, + }) + if status.Code(err) != codes.InvalidArgument { + t.Errorf("code = %v, want InvalidArgument", status.Code(err)) + } + if !strings.Contains(resp.GetError(), "Metadata decryption failed") { + t.Errorf("error = %q, want 'Metadata decryption failed'", resp.GetError()) + } +} + +// Round-trip: encrypt with crypto helpers, decrypt via gRPC handler. +// This exercises the handler against valid input without needing the FHE +// secret key. +func TestDecryptMetadataRoundTrip(t *testing.T) { + v := newTestVault(t) + srv := NewVaultGRPC(v) + + // Encrypt "hello" with an HKDF DEK derived from the team secret. + agentID := "test-agent" + plain := "hello" + dek := mustDEK(t, "test-secret", agentID) + ct := mustEncrypt(t, []byte(plain), dek) + envelope := `{"a":"` + agentID + `","c":"` + ct + `"}` + + resp, err := srv.DecryptMetadata(context.Background(), &pb.DecryptMetadataRequest{ + Token: tokens.DemoToken, + EncryptedMetadataList: []string{envelope}, + }) + if err != nil { + t.Fatal(err) + } + if resp.GetError() != "" { + t.Fatalf("response error: %s", resp.GetError()) + } + got := resp.GetDecryptedMetadata() + if len(got) != 1 || got[0] != plain { + t.Errorf("decrypted = %v, want [%q]", got, plain) + } +} diff --git a/vault/internal/server/helpers_test.go b/vault/internal/server/helpers_test.go new file mode 100644 index 0000000..2cfbc7a --- /dev/null +++ b/vault/internal/server/helpers_test.go @@ -0,0 +1,25 @@ +package server + +import ( + "testing" + + "github.com/CryptoLabInc/rune-admin/vault/internal/crypto" +) + +func mustDEK(t *testing.T, secret, agentID string) []byte { + t.Helper() + d, err := crypto.DeriveAgentKey(secret, agentID) + if err != nil { + t.Fatal(err) + } + return d +} + +func mustEncrypt(t *testing.T, plaintext, key []byte) string { + t.Helper() + ct, err := crypto.EncryptMetadata(plaintext, key) + if err != nil { + t.Fatal(err) + } + return ct +} diff --git a/vault/internal/server/interceptors.go b/vault/internal/server/interceptors.go new file mode 100644 index 0000000..eea9c7c --- /dev/null +++ b/vault/internal/server/interceptors.go @@ -0,0 +1,85 @@ +package server + +import ( + "context" + "errors" + "fmt" + "strings" + "unicode" + + "buf.build/go/protovalidate" + "google.golang.org/grpc" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + "google.golang.org/protobuf/proto" + + pb "github.com/CryptoLabInc/rune-admin/vault/pkg/vaultpb" +) + +// vaultMethods enumerates the gRPC method paths owned by VaultService. +// Other services routed through the same gRPC server bypass runtime checks. +var vaultMethods = map[string]bool{ + "/rune.vault.v1.VaultService/GetPublicKey": true, + "/rune.vault.v1.VaultService/DecryptScores": true, + "/rune.vault.v1.VaultService/DecryptMetadata": true, +} + +// NewValidationInterceptor returns a unary server interceptor that runs +// protovalidate against the request, then a runtime safety check on the +// token field. Validation errors are returned as InvalidArgument. +// +// Mirrors vault/validation_interceptor.py and vault/request_validator.py. +func NewValidationInterceptor() (grpc.UnaryServerInterceptor, error) { + v, err := protovalidate.New() + if err != nil { + return nil, fmt.Errorf("interceptors: new protovalidate: %w", err) + } + return func(ctx context.Context, req any, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (any, error) { + msg, ok := req.(proto.Message) + if ok { + if err := v.Validate(msg); err != nil { + return nil, status.Error(codes.InvalidArgument, err.Error()) + } + } + if vaultMethods[info.FullMethod] { + if err := runtimeCheckToken(req); err != nil { + return nil, status.Error(codes.InvalidArgument, err.Error()) + } + } + return handler(ctx, req) + }, nil +} + +// runtimeCheckToken pulls the token field from a Vault request and runs +// the supplementary checks the .proto annotations cannot express. +func runtimeCheckToken(req any) error { + var token string + switch r := req.(type) { + case *pb.GetPublicKeyRequest: + token = r.GetToken() + case *pb.DecryptScoresRequest: + token = r.GetToken() + case *pb.DecryptMetadataRequest: + token = r.GetToken() + default: + return nil + } + return CheckTokenSafety(token) +} + +// CheckTokenSafety rejects tokens with control characters or surrounding +// whitespace. Exposed so unit tests can exercise the rule directly. +func CheckTokenSafety(token string) error { + for _, r := range token { + if r < 0x20 || r == 0x7f { + return errors.New("token: must not contain control characters") + } + if unicode.IsControl(r) { + return errors.New("token: must not contain control characters") + } + } + if token != strings.TrimSpace(token) { + return errors.New("token: must not have leading or trailing whitespace") + } + return nil +} diff --git a/vault/internal/server/interceptors_test.go b/vault/internal/server/interceptors_test.go new file mode 100644 index 0000000..4d66d66 --- /dev/null +++ b/vault/internal/server/interceptors_test.go @@ -0,0 +1,105 @@ +package server + +import ( + "context" + "strings" + "testing" + + "google.golang.org/grpc" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + + pb "github.com/CryptoLabInc/rune-admin/vault/pkg/vaultpb" +) + +func TestCheckTokenSafetyAccepts(t *testing.T) { + if err := CheckTokenSafety("evt_0123456789abcdef0123456789abcdef"); err != nil { + t.Errorf("good token rejected: %v", err) + } +} + +func TestCheckTokenSafetyRejectsControlChar(t *testing.T) { + for _, bad := range []string{"\x00token", "tok\x01en", "tok\x1fen", "tok\x7fen"} { + if err := CheckTokenSafety(bad); err == nil { + t.Errorf("control char accepted: %q", bad) + } + } +} + +func TestCheckTokenSafetyRejectsWhitespace(t *testing.T) { + for _, bad := range []string{" token", "token ", "\ttoken", "token\n"} { + if err := CheckTokenSafety(bad); err == nil { + t.Errorf("whitespace accepted: %q", bad) + } + } +} + +// noopHandler is a grpc.UnaryHandler that returns the request unchanged. +func noopHandler(_ context.Context, req any) (any, error) { return req, nil } + +func mustInterceptor(t *testing.T) grpc.UnaryServerInterceptor { + t.Helper() + ic, err := NewValidationInterceptor() + if err != nil { + t.Fatal(err) + } + return ic +} + +func vaultMethodInfo(name string) *grpc.UnaryServerInfo { + return &grpc.UnaryServerInfo{FullMethod: "/rune.vault.v1.VaultService/" + name} +} + +func TestInterceptorPassesValidRequest(t *testing.T) { + ic := mustInterceptor(t) + req := &pb.GetPublicKeyRequest{Token: "evt_0123456789abcdef0123456789abcdef"} + out, err := ic(context.Background(), req, vaultMethodInfo("GetPublicKey"), noopHandler) + if err != nil { + t.Fatalf("err = %v, want nil", err) + } + if out != req { + t.Errorf("interceptor mutated request") + } +} + +func TestInterceptorRejectsBadProtovalidate(t *testing.T) { + ic := mustInterceptor(t) + // Token shorter than 36 fails the proto-level constraint. + req := &pb.GetPublicKeyRequest{Token: "too_short"} + _, err := ic(context.Background(), req, vaultMethodInfo("GetPublicKey"), noopHandler) + if err == nil { + t.Fatal("err = nil, want validation error") + } + if status.Code(err) != codes.InvalidArgument { + t.Errorf("code = %v, want InvalidArgument", status.Code(err)) + } +} + +func TestInterceptorRejectsControlCharToken(t *testing.T) { + ic := mustInterceptor(t) + // 36-char token containing a control byte (\x00) inside. + // protovalidate only checks length, so the runtime layer catches this. + req := &pb.GetPublicKeyRequest{Token: "evt_0123456789abcdef0123456789abc\x00ef"} + if len(req.Token) != 36 { + t.Fatalf("test setup: token length = %d, want 36", len(req.Token)) + } + _, err := ic(context.Background(), req, vaultMethodInfo("GetPublicKey"), noopHandler) + if err == nil { + t.Fatal("err = nil, want runtime error") + } + if !strings.Contains(err.Error(), "control") { + t.Errorf("err = %v, want 'control characters' message", err) + } +} + +func TestInterceptorAllowsNonVaultMethod(t *testing.T) { + ic := mustInterceptor(t) + // Whitespace-around token would normally fail runtime check, but + // non-Vault methods skip runtime checks (and the proto for this + // dummy message doesn't apply). + req := &pb.GetPublicKeyRequest{Token: "evt_0123456789abcdef0123456789abcdef"} + info := &grpc.UnaryServerInfo{FullMethod: "/grpc.health.v1.Health/Check"} + if _, err := ic(context.Background(), req, info, noopHandler); err != nil { + t.Errorf("non-vault method blocked: %v", err) + } +} diff --git a/vault/internal/server/serve.go b/vault/internal/server/serve.go new file mode 100644 index 0000000..97def9e --- /dev/null +++ b/vault/internal/server/serve.go @@ -0,0 +1,163 @@ +package server + +import ( + "context" + "crypto/tls" + "errors" + "fmt" + "log/slog" + "net" + "os" + "os/signal" + "syscall" + "time" + + "google.golang.org/grpc" + "google.golang.org/grpc/credentials" + "google.golang.org/grpc/health" + healthpb "google.golang.org/grpc/health/grpc_health_v1" + "google.golang.org/grpc/reflection" + + pb "github.com/CryptoLabInc/rune-admin/vault/pkg/vaultpb" +) + +// Serve starts the gRPC + admin UDS listeners with the given Vault and +// blocks until ctx is cancelled or a SIGTERM/SIGINT is received. The +// admin listener is constructed by AdminFactory; passing nil disables the +// admin UDS surface (useful for unit tests that exercise gRPC alone). +// +// Returns nil on graceful shutdown. Listener bind errors and server runtime +// errors are returned eagerly. +func Serve(ctx context.Context, v *Vault, adminFactory AdminFactory) error { + cfg := v.Config() + + if err := EnsureVault(ctx, cfg); err != nil { + return fmt.Errorf("server: %w", err) + } + + // gRPC listener + grpcAddr := fmt.Sprintf("%s:%d", grpcHost(cfg), cfg.Server.GRPC.Port) + grpcLis, err := net.Listen("tcp", grpcAddr) + if err != nil { + return fmt.Errorf("server: listen %s: %w", grpcAddr, err) + } + defer grpcLis.Close() + + tlsCreds, err := loadTLSCredentials(cfg.Server.GRPC.TLS) + if err != nil { + return fmt.Errorf("server: tls: %w", err) + } + + interceptor, err := NewValidationInterceptor() + if err != nil { + return fmt.Errorf("server: interceptor: %w", err) + } + + opts := []grpc.ServerOption{ + grpc.MaxRecvMsgSize(MaxMessageSize), + grpc.MaxSendMsgSize(MaxMessageSize), + grpc.UnaryInterceptor(interceptor), + } + if tlsCreds != nil { + opts = append(opts, grpc.Creds(tlsCreds)) + } + gs := grpc.NewServer(opts...) + pb.RegisterVaultServiceServer(gs, NewVaultGRPC(v)) + + // Health + reflection (matches Python registration sites: + // vault_grpc_server.py:317-331). + healthSvc := health.NewServer() + healthSvc.SetServingStatus("rune.vault.v1.VaultService", healthpb.HealthCheckResponse_SERVING) + healthSvc.SetServingStatus("", healthpb.HealthCheckResponse_SERVING) + healthpb.RegisterHealthServer(gs, healthSvc) + reflection.Register(gs) + + // Admin UDS listener (optional) + var adminShutdown func(context.Context) error + if adminFactory != nil { + adminShutdown, err = adminFactory(ctx, v) + if err != nil { + return fmt.Errorf("server: admin: %w", err) + } + } + + scheme := "insecure" + if tlsCreds != nil { + scheme = "tls" + } + slog.Info("vault: gRPC listening", "addr", grpcAddr, "scheme", scheme) + + // Run gRPC in a goroutine; wait for shutdown signal or ctx cancellation. + errCh := make(chan error, 1) + go func() { + errCh <- gs.Serve(grpcLis) + }() + + sigCh := make(chan os.Signal, 1) + signal.Notify(sigCh, syscall.SIGTERM, syscall.SIGINT) + defer signal.Stop(sigCh) + + select { + case <-ctx.Done(): + slog.Info("vault: context cancelled, shutting down") + case sig := <-sigCh: + slog.Info("vault: signal received, shutting down", "signal", sig.String()) + case err := <-errCh: + if err != nil && !errors.Is(err, grpc.ErrServerStopped) { + return fmt.Errorf("server: grpc serve: %w", err) + } + } + + healthSvc.Shutdown() + stopGracefullyOrForce(gs, 5*time.Second) + if adminShutdown != nil { + shCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + _ = adminShutdown(shCtx) + } + return nil +} + +// stopGracefullyOrForce gives gRPC up to grace to drain in-flight RPCs; +// on timeout, falls back to a hard Stop so the daemon never hangs at +// shutdown waiting for an idle reflection/health stream to close. +func stopGracefullyOrForce(gs *grpc.Server, grace time.Duration) { + done := make(chan struct{}) + go func() { + gs.GracefulStop() + close(done) + }() + select { + case <-done: + case <-time.After(grace): + slog.Warn("vault: graceful stop timed out, forcing", "grace", grace) + gs.Stop() + <-done + } +} + +// AdminFactory builds the admin UDS server and returns a shutdown closer. +// internal/server/admin.go (added in step 6) supplies the production impl. +type AdminFactory func(ctx context.Context, v *Vault) (shutdown func(context.Context) error, err error) + +func grpcHost(cfg *Config) string { + if cfg.Server.GRPC.Host == "" { + return "0.0.0.0" + } + return cfg.Server.GRPC.Host +} + +func loadTLSCredentials(t TLSConfig) (credentials.TransportCredentials, error) { + if t.Disable { + slog.Warn("vault: TLS disabled — gRPC traffic is unencrypted (dev mode only)") + return nil, nil + } + if t.Cert == "" || t.Key == "" { + return nil, errors.New("server.grpc.tls.cert and server.grpc.tls.key are required (or set disable=true)") + } + cert, err := tls.LoadX509KeyPair(t.Cert, t.Key) + if err != nil { + return nil, fmt.Errorf("load x509 key pair: %w", err) + } + return credentials.NewServerTLSFromCert(&cert), nil +} diff --git a/vault/internal/server/testdata/runevault.conf.example b/vault/internal/server/testdata/runevault.conf.example new file mode 100644 index 0000000..70243e9 --- /dev/null +++ b/vault/internal/server/testdata/runevault.conf.example @@ -0,0 +1,40 @@ +# runevault.conf — example deployment configuration. +# +# Lookup order: +# 1. --config CLI flag +# 2. /opt/rune-vault/configs/runevault.conf +# 3. ./runevault.conf (cwd, dev only) +# +# This file should be mode 0600, owned by the vault-user. +# Replace placeholder values before use. + +server: + grpc: + host: 0.0.0.0 + port: 50051 + tls: + cert: /opt/rune-vault/certs/server.pem + key: /opt/rune-vault/certs/server.key + disable: false # true for dev only — never in production + admin: + socket: /opt/rune-vault/admin.sock + +keys: + path: /opt/rune-vault/vault-keys + index_name: my-team + embedding_dim: 1024 + +envector: + endpoint: https://envector.example.com + api_key: REPLACE_WITH_API_KEY + # Alternative: api_key_file: /run/secrets/envector_api_key + +tokens: + team_secret: REPLACE_WITH_RANDOM_HEX_32 + # Alternative: team_secret_file: /run/secrets/team_secret + roles_file: /opt/rune-vault/configs/roles.yml + tokens_file: /opt/rune-vault/configs/tokens.yml + +audit: + mode: file+stdout # one of: "", file, stdout, file+stdout + path: /opt/rune-vault/logs/audit.log diff --git a/vault/internal/tests/decrypt_pipeline_test.go b/vault/internal/tests/decrypt_pipeline_test.go new file mode 100644 index 0000000..7310e32 --- /dev/null +++ b/vault/internal/tests/decrypt_pipeline_test.go @@ -0,0 +1,316 @@ +package tests + +import ( + "context" + "encoding/json" + "math" + "os" + "path/filepath" + "sort" + "strings" + "testing" + + "github.com/CryptoLabInc/rune-admin/vault/internal/crypto" + "github.com/CryptoLabInc/rune-admin/vault/internal/server" + "github.com/CryptoLabInc/rune-admin/vault/internal/tokens" + pb "github.com/CryptoLabInc/rune-admin/vault/pkg/vaultpb" +) + +type fixtureBundle struct { + Config fixtureConfig + Envelope []string + Expected []any // any: JSON object/array/string per envelope + ScoresB64 string + ScoreExp fixtureScoreExpected + KeysDir string +} + +type fixtureConfig struct { + TeamSecret string `json:"team_secret"` + AgentID string `json:"agent_id"` + Token string `json:"token"` + Dim int `json:"dim"` +} + +type fixtureScoreExpected struct { + Score [][]float64 `json:"score"` + ShardIdx []int32 `json:"shard_idx"` +} + +func loadFixtures(t *testing.T) *fixtureBundle { + t.Helper() + if !FixturesAvailable() { + t.Skip(SkipReason) + } + dir := FixturesDir() + + var fb fixtureBundle + mustJSON(t, filepath.Join(dir, "config.json"), &fb.Config) + scoresExp := mustReadFile(t, filepath.Join(dir, "expected_scores.json")) + if err := json.Unmarshal(scoresExp, &fb.ScoreExp); err != nil { + t.Fatal(err) + } + fb.ScoresB64 = strings.TrimSpace(string(mustReadFile(t, filepath.Join(dir, "ciphertext_score.b64")))) + mustJSON(t, filepath.Join(dir, "metadata_envelopes.json"), &fb.Envelope) + mustJSON(t, filepath.Join(dir, "expected_metadata.json"), &fb.Expected) + fb.KeysDir = filepath.Join(dir, "keys") + return &fb +} + +func mustJSON(t *testing.T, path string, dst any) { + t.Helper() + body := mustReadFile(t, path) + if err := json.Unmarshal(body, dst); err != nil { + t.Fatalf("parse %s: %v", path, err) + } +} + +func mustReadFile(t *testing.T, path string) []byte { + t.Helper() + b, err := os.ReadFile(path) + if err != nil { + t.Fatalf("read %s: %v", path, err) + } + return b +} + +// fixtureVault wires a Vault around the decrypted fixture bundle. +// keys.path points at the temp dir copy of fixture keys so envector-go-sdk +// can stage the bundle without touching the read-only fixture tree. +func fixtureVault(t *testing.T, fb *fixtureBundle) *server.Vault { + t.Helper() + keyDir := t.TempDir() + for _, name := range []string{"EncKey.json", "EvalKey.json", "SecKey.json"} { + src := filepath.Join(fb.KeysDir, name) + if _, err := os.Stat(src); err != nil { + t.Skipf("fixture key %s missing: %v", name, err) + } + body, _ := os.ReadFile(src) + if err := os.WriteFile(filepath.Join(keyDir, name), body, 0o600); err != nil { + t.Fatal(err) + } + } + // envector.OpenKeysFromFile expects WithKeyPath = directory containing + // the JSON envelopes; use keyDir directly + KeyID = "vault-key" so the + // joined path matches. + keysParams := crypto.KeysParams{Root: filepath.Dir(keyDir), KeyID: filepath.Base(keyDir), Dim: fb.Config.Dim} + keys, err := crypto.OpenSecretKey(keysParams) + if err != nil { + t.Fatalf("OpenSecretKey: %v", err) + } + t.Cleanup(func() { keys.Close() }) + + cfg := &server.Config{ + Tokens: server.TokensConfig{TeamSecret: fb.Config.TeamSecret}, + Keys: server.KeysConfig{Path: filepath.Dir(keyDir), EmbeddingDim: fb.Config.Dim}, + } + store := tokens.NewStore() + store.LoadDefaultsWithDemoToken() + // Replace the demo token with the fixture's token so the test can + // authenticate without re-keying every envelope. + if fb.Config.Token != "" && fb.Config.Token != tokens.DemoToken { + // Inject manually via a minimal hack: load defaults then add user. + _, _ = store.AddRole("fixture", []string{"get_public_key", "decrypt_scores", "decrypt_metadata"}, 1000, "10000/60s") + // AddToken would generate a new token; we need the fixture's exact + // token string. The store has no public "InsertToken" — for tests + // we resort to LoadFromFiles via tempfiles. + injectFixtureToken(t, store, fb.Config.Token) + } + audit, _ := server.NewAuditLogger(server.AuditConfig{Mode: ""}) + return server.NewVault(cfg, store, keys, audit) +} + +func injectFixtureToken(t *testing.T, store *tokens.Store, token string) { + t.Helper() + dir := t.TempDir() + rolesPath := filepath.Join(dir, "roles.yml") + tokensPath := filepath.Join(dir, "tokens.yml") + if err := os.WriteFile(rolesPath, []byte(`roles: + fixture: + scope: [get_public_key, decrypt_scores, decrypt_metadata] + top_k: 1000 + rate_limit: 10000/60s +`), 0o600); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(tokensPath, []byte(`tokens: + - user: fixture + token: `+token+` + role: fixture + issued_at: "2026-01-01" +`), 0o600); err != nil { + t.Fatal(err) + } + if err := store.LoadFromFiles(rolesPath, tokensPath); err != nil { + t.Fatal(err) + } +} + +// ── decrypt_scores via gRPC handler ─────────────────────────────── + +func TestDecryptScoresAgainstFixture(t *testing.T) { + fb := loadFixtures(t) + v := fixtureVault(t, fb) + srv := server.NewVaultGRPC(v) + + totalScores := 0 + for _, row := range fb.ScoreExp.Score { + totalScores += len(row) + } + resp, err := srv.DecryptScores(context.Background(), &pb.DecryptScoresRequest{ + Token: fb.Config.Token, + EncryptedBlobB64: fb.ScoresB64, + TopK: int32(totalScores), + }) + if err != nil { + t.Fatal(err) + } + if resp.GetError() != "" { + t.Fatalf("error: %s", resp.GetError()) + } + + expectedFlat := make([]struct { + shard int32 + row int32 + score float64 + }, 0) + for i, row := range fb.ScoreExp.Score { + shard := int32(i) + if i < len(fb.ScoreExp.ShardIdx) { + shard = fb.ScoreExp.ShardIdx[i] + } + for j, s := range row { + expectedFlat = append(expectedFlat, struct { + shard int32 + row int32 + score float64 + }{shard, int32(j), s}) + } + } + sort.SliceStable(expectedFlat, func(i, j int) bool { + return expectedFlat[i].score > expectedFlat[j].score + }) + + if len(resp.Results) != len(expectedFlat) { + t.Fatalf("len(results) = %d, want %d", len(resp.Results), len(expectedFlat)) + } + for i, got := range resp.Results { + want := expectedFlat[i] + if got.ShardIdx != want.shard || got.RowIdx != want.row { + t.Errorf("[%d] shard/row = (%d,%d), want (%d,%d)", i, got.ShardIdx, got.RowIdx, want.shard, want.row) + } + if math.Abs(got.Score-want.score) > 1e-6 { + t.Errorf("[%d] score = %v, want %v", i, got.Score, want.score) + } + } +} + +func TestDecryptScoresTopKAgainstFixture(t *testing.T) { + fb := loadFixtures(t) + v := fixtureVault(t, fb) + srv := server.NewVaultGRPC(v) + + allScores := []float64{} + for _, row := range fb.ScoreExp.Score { + allScores = append(allScores, row...) + } + sort.Sort(sort.Reverse(sort.Float64Slice(allScores))) + topN := 3 + if len(allScores) < topN { + topN = len(allScores) + } + resp, err := srv.DecryptScores(context.Background(), &pb.DecryptScoresRequest{ + Token: fb.Config.Token, + EncryptedBlobB64: fb.ScoresB64, + TopK: int32(topN), + }) + if err != nil { + t.Fatal(err) + } + if len(resp.Results) != topN { + t.Fatalf("len = %d, want %d", len(resp.Results), topN) + } + for i, got := range resp.Results { + if math.Abs(got.Score-allScores[i]) > 1e-6 { + t.Errorf("[%d] score = %v, want %v", i, got.Score, allScores[i]) + } + } + for i := 1; i < len(resp.Results); i++ { + if resp.Results[i].Score > resp.Results[i-1].Score { + t.Errorf("results not descending at %d", i) + } + } +} + +// ── decrypt_metadata via gRPC handler ───────────────────────────── + +func TestDecryptMetadataSingleAgainstFixture(t *testing.T) { + fb := loadFixtures(t) + v := fixtureVault(t, fb) + srv := server.NewVaultGRPC(v) + + if len(fb.Envelope) == 0 { + t.Skip("no envelopes in fixture") + } + resp, err := srv.DecryptMetadata(context.Background(), &pb.DecryptMetadataRequest{ + Token: fb.Config.Token, + EncryptedMetadataList: []string{fb.Envelope[0]}, + }) + if err != nil { + t.Fatal(err) + } + if resp.GetError() != "" { + t.Fatalf("error: %s", resp.GetError()) + } + if len(resp.DecryptedMetadata) != 1 { + t.Fatalf("len = %d", len(resp.DecryptedMetadata)) + } + got := decodeAny(t, resp.DecryptedMetadata[0]) + want := fb.Expected[0] + if !jsonEq(got, want) { + t.Errorf("metadata mismatch\n got %#v\nwant %#v", got, want) + } +} + +func TestDecryptMetadataMultipleAgainstFixture(t *testing.T) { + fb := loadFixtures(t) + v := fixtureVault(t, fb) + srv := server.NewVaultGRPC(v) + + resp, err := srv.DecryptMetadata(context.Background(), &pb.DecryptMetadataRequest{ + Token: fb.Config.Token, + EncryptedMetadataList: fb.Envelope, + }) + if err != nil { + t.Fatal(err) + } + if resp.GetError() != "" { + t.Fatalf("error: %s", resp.GetError()) + } + if len(resp.DecryptedMetadata) != len(fb.Expected) { + t.Fatalf("len = %d, want %d", len(resp.DecryptedMetadata), len(fb.Expected)) + } + for i, raw := range resp.DecryptedMetadata { + got := decodeAny(t, raw) + if !jsonEq(got, fb.Expected[i]) { + t.Errorf("[%d] mismatch\n got %#v\nwant %#v", i, got, fb.Expected[i]) + } + } +} + +// decodeAny tries to JSON-parse a string; returns the raw string on failure. +func decodeAny(_ *testing.T, raw string) any { + var v any + if err := json.Unmarshal([]byte(raw), &v); err == nil { + return v + } + return raw +} + +// jsonEq compares two values by re-serialising to JSON. Handles float vs +// int promotion that comes out of encoding/json's default decoding. +func jsonEq(a, b any) bool { + ja, _ := json.Marshal(a) + jb, _ := json.Marshal(b) + return string(ja) == string(jb) +} diff --git a/vault/internal/tests/e2e_test.go b/vault/internal/tests/e2e_test.go new file mode 100644 index 0000000..33d3ddf --- /dev/null +++ b/vault/internal/tests/e2e_test.go @@ -0,0 +1,188 @@ +//go:build e2e + +package tests + +import ( + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + "syscall" + "testing" + "time" +) + +// TestE2EDaemonLifecycle boots runevault as a daemon against a tmp config +// (TLS disabled), exercises token/role CLI operations through the admin UDS, +// then verifies daemon stop. +// +// Set RUNEVAULT_TEST_BINARY to a pre-built binary path to skip the in-test +// build step (required in CI — run `mise run go:build` first). +func TestE2EDaemonLifecycle(t *testing.T) { + repoRoot := RepoRoot() + tmp, err := os.MkdirTemp("", "vts-") + if err != nil { + t.Fatal(err) + } + t.Cleanup(func() { os.RemoveAll(tmp) }) + + binary := resolveBinary(t, repoRoot, tmp) + + confPath := filepath.Join(tmp, "runevault.conf") + conf := fmt.Sprintf(`server: + grpc: + host: 127.0.0.1 + port: 53052 + tls: + disable: true + admin: + socket: %[1]s/x.sock +keys: + path: %[1]s/keys + embedding_dim: 1024 +envector: + endpoint: "" + api_key: "" +tokens: + team_secret: smoke-secret + roles_file: %[1]s/roles.yml + tokens_file: %[1]s/tokens.yml +audit: + mode: stdout +`, tmp) + if err := os.WriteFile(confPath, []byte(conf), 0o600); err != nil { + t.Fatal(err) + } + + // Spawn daemon in background. + daemon := exec.Command(binary, "--config", confPath, "daemon", "start") + logFile, err := os.Create(filepath.Join(tmp, "daemon.log")) + if err != nil { + t.Fatal(err) + } + defer logFile.Close() + daemon.Stdout = logFile + daemon.Stderr = logFile + if err := daemon.Start(); err != nil { + t.Fatalf("daemon start: %v", err) + } + daemonPID := daemon.Process.Pid + // Reap the daemon promptly when it exits — without this, the kernel + // keeps it as a zombie and PIDLive (kill -0) returns true even though + // the daemon has already finished its shutdown sequence. That defeats + // `daemon stop`'s liveness poll. + waitDone := make(chan error, 1) + go func() { waitDone <- daemon.Wait() }() + t.Cleanup(func() { + // Send SIGKILL via syscall so we don't race with cmd.Wait's fd close. + _ = syscall.Kill(daemonPID, syscall.SIGKILL) + select { + case <-waitDone: + case <-time.After(2 * time.Second): + } + }) + + // Wait for the admin socket to appear (FHE key generation can take a + // few seconds on cold starts). + socket := filepath.Join(tmp, "x.sock") + if !waitFor(socket, 30*time.Second) { + body, _ := os.ReadFile(filepath.Join(tmp, "daemon.log")) + t.Fatalf("admin socket never appeared\ndaemon log:\n%s", body) + } + + run := func(args ...string) (string, error) { + cmd := exec.Command(binary, append([]string{"--config", confPath}, args...)...) + out, err := cmd.CombinedOutput() + return string(out), err + } + + // ── status ── + out, err := run("status") + if err != nil { + t.Fatalf("status: %v\n%s", err, out) + } + if !strings.Contains(out, "Admin socket:") || !strings.Contains(out, "ok)") { + t.Errorf("status output unexpected:\n%s", out) + } + + // ── token issue ── + out, err = run("token", "issue", "--user", "alice", "--role", "member", "--expires", "30d") + if err != nil { + t.Fatalf("issue: %v\n%s", err, out) + } + if !strings.Contains(out, "Token issued for 'alice'") { + t.Errorf("issue output: %s", out) + } + if !strings.Contains(out, "evt_") { + t.Errorf("token not in output: %s", out) + } + + // ── token list ── + out, err = run("token", "list") + if err != nil { + t.Fatalf("list: %v\n%s", err, out) + } + if !strings.Contains(out, "alice") || !strings.Contains(out, "member") { + t.Errorf("list output missing alice: %s", out) + } + + // ── role list (defaults present) ── + out, err = run("role", "list") + if err != nil { + t.Fatalf("role list: %v\n%s", err, out) + } + if !strings.Contains(out, "admin") || !strings.Contains(out, "member") { + t.Errorf("role list output: %s", out) + } + + // ── token revoke ── + out, err = run("token", "revoke", "--user", "alice") + if err != nil { + t.Fatalf("revoke: %v\n%s", err, out) + } + if !strings.Contains(out, "Revoked") { + t.Errorf("revoke output: %s", out) + } + + // Signal daemon to shut down gracefully via SIGTERM (lifecycle is OS-managed). + if err := syscall.Kill(daemonPID, syscall.SIGTERM); err != nil { + t.Fatalf("SIGTERM daemon: %v", err) + } + select { + case <-waitDone: + case <-time.After(10 * time.Second): + t.Errorf("daemon did not exit after SIGTERM") + } +} + +// resolveBinary returns the runevault binary path. If RUNEVAULT_TEST_BINARY +// is set it is used as-is (relative paths are resolved from repoRoot). +// Otherwise the binary is built from source into tmp. +func resolveBinary(t *testing.T, repoRoot, tmp string) string { + t.Helper() + if p := os.Getenv("RUNEVAULT_TEST_BINARY"); p != "" { + if !filepath.IsAbs(p) { + p = filepath.Join(repoRoot, p) + } + return p + } + binary := filepath.Join(tmp, "runevault") + build := exec.Command("go", "build", "-o", binary, "./cmd") + build.Dir = filepath.Join(repoRoot, "vault") + if out, err := build.CombinedOutput(); err != nil { + t.Fatalf("build: %v\n%s", err, out) + } + return binary +} + +func waitFor(path string, dur time.Duration) bool { + deadline := time.Now().Add(dur) + for time.Now().Before(deadline) { + if _, err := os.Stat(path); err == nil { + return true + } + time.Sleep(100 * time.Millisecond) + } + return false +} diff --git a/vault/internal/tests/fixtures.go b/vault/internal/tests/fixtures.go new file mode 100644 index 0000000..f23574d --- /dev/null +++ b/vault/internal/tests/fixtures.go @@ -0,0 +1,36 @@ +// Package tests provides shared helpers and hosts cross-package end-to-end tests. +// Integration tests require the GPG-encrypted fixtures to be decrypted into +// tests/fixtures/ first (run `mise run fixtures:decrypt`). +package tests + +import ( + "os" + "path/filepath" + "runtime" +) + +// RepoRoot returns the absolute path to the rune-admin repository root, +// resolved relative to this source file. Works regardless of the test's +// cwd, so tests can locate tests/fixtures/ from any subpackage. +func RepoRoot() string { + _, thisFile, _, _ := runtime.Caller(0) + // thisFile = .../vault/internal/tests/fixtures.go → repo root is 3 levels up. + return filepath.Clean(filepath.Join(filepath.Dir(thisFile), "..", "..", "..")) +} + +// FixturesDir returns the absolute path to tests/fixtures/. +func FixturesDir() string { + return filepath.Join(RepoRoot(), "tests", "fixtures") +} + +// FixturesAvailable reports whether the GPG-encrypted fixtures have been +// decrypted into tests/fixtures/. Use as a guard in TestMain or as the +// condition for t.Skip in integration tests. +func FixturesAvailable() bool { + _, err := os.Stat(filepath.Join(FixturesDir(), "config.json")) + return err == nil +} + +// SkipReason returns the standard skip message for tests that require +// decrypted fixtures. +const SkipReason = "fixtures not decrypted — run `mise run fixtures:decrypt` (requires FIXTURES_GPG_PASSPHRASE)" diff --git a/vault/internal/tokens/errors.go b/vault/internal/tokens/errors.go new file mode 100644 index 0000000..ba464a0 --- /dev/null +++ b/vault/internal/tokens/errors.go @@ -0,0 +1,43 @@ +package tokens + +import "fmt" + +type ErrTokenNotFound struct{} + +func (ErrTokenNotFound) Error() string { return "Invalid authentication token" } + +type ErrTokenExpired struct { + User string +} + +func (e ErrTokenExpired) Error() string { + return fmt.Sprintf("Token expired for user '%s'", e.User) +} + +type ErrRateLimit struct { + RetryAfter int +} + +func (e ErrRateLimit) Error() string { + return fmt.Sprintf("Rate limit exceeded. Retry after %ds", e.RetryAfter) +} + +type ErrTopKExceeded struct { + Requested int + MaxTopK int + RoleName string +} + +func (e ErrTopKExceeded) Error() string { + return fmt.Sprintf("top_k %d exceeds limit %d for role '%s'", + e.Requested, e.MaxTopK, e.RoleName) +} + +type ErrScope struct { + Method string + RoleName string +} + +func (e ErrScope) Error() string { + return fmt.Sprintf("Method '%s' not permitted for role '%s'", e.Method, e.RoleName) +} diff --git a/vault/internal/tokens/ratelimit.go b/vault/internal/tokens/ratelimit.go new file mode 100644 index 0000000..bf0d15d --- /dev/null +++ b/vault/internal/tokens/ratelimit.go @@ -0,0 +1,72 @@ +package tokens + +import ( + "sync" + "time" +) + +type RateLimiter struct { + maxRequests int + window time.Duration + now func() time.Time + + mu sync.Mutex + requests map[string][]time.Time +} + +func NewRateLimiter(maxRequests int, window time.Duration) *RateLimiter { + return &RateLimiter{ + maxRequests: maxRequests, + window: window, + now: time.Now, + requests: make(map[string][]time.Time), + } +} + +func (rl *RateLimiter) IsAllowed(clientID string) bool { + now := rl.now() + cutoff := now.Add(-rl.window) + + rl.mu.Lock() + defer rl.mu.Unlock() + + kept := rl.requests[clientID][:0] + for _, t := range rl.requests[clientID] { + if t.After(cutoff) { + kept = append(kept, t) + } + } + if len(kept) >= rl.maxRequests { + rl.requests[clientID] = kept + return false + } + rl.requests[clientID] = append(kept, now) + return true +} + +func (rl *RateLimiter) RetryAfter(clientID string) int { + rl.mu.Lock() + defer rl.mu.Unlock() + + reqs := rl.requests[clientID] + if len(reqs) == 0 { + return 0 + } + oldest := reqs[0] + for _, t := range reqs[1:] { + if t.Before(oldest) { + oldest = t + } + } + remaining := rl.window - rl.now().Sub(oldest) + if remaining < 0 { + return 0 + } + return int(remaining.Seconds()) +} + +func (rl *RateLimiter) Remove(clientID string) { + rl.mu.Lock() + defer rl.mu.Unlock() + delete(rl.requests, clientID) +} diff --git a/vault/internal/tokens/ratelimit_test.go b/vault/internal/tokens/ratelimit_test.go new file mode 100644 index 0000000..7d607bd --- /dev/null +++ b/vault/internal/tokens/ratelimit_test.go @@ -0,0 +1,88 @@ +package tokens + +import ( + "sync" + "testing" + "time" +) + +func TestRateLimiterAllowsUnderLimit(t *testing.T) { + rl := NewRateLimiter(3, time.Minute) + for i := 0; i < 3; i++ { + if !rl.IsAllowed("u") { + t.Fatalf("request %d denied, want allowed", i+1) + } + } +} + +func TestRateLimiterDeniesOverLimit(t *testing.T) { + rl := NewRateLimiter(2, time.Minute) + rl.IsAllowed("u") + rl.IsAllowed("u") + if rl.IsAllowed("u") { + t.Error("3rd request allowed, want denied") + } +} + +func TestRateLimiterPerClient(t *testing.T) { + rl := NewRateLimiter(1, time.Minute) + if !rl.IsAllowed("a") { + t.Fatal("a denied") + } + if !rl.IsAllowed("b") { + t.Fatal("b denied — should be tracked separately") + } + if rl.IsAllowed("a") { + t.Error("a 2nd request allowed, want denied") + } +} + +func TestRateLimiterRetryAfter(t *testing.T) { + rl := NewRateLimiter(1, 10*time.Second) + now := time.Date(2026, 4, 23, 0, 0, 0, 0, time.UTC) + rl.now = func() time.Time { return now } + rl.IsAllowed("u") + rl.now = func() time.Time { return now.Add(3 * time.Second) } + got := rl.RetryAfter("u") + if got < 6 || got > 7 { + t.Errorf("RetryAfter = %d, want ~7", got) + } +} + +func TestRateLimiterRemove(t *testing.T) { + rl := NewRateLimiter(1, time.Minute) + rl.IsAllowed("u") + rl.Remove("u") + if !rl.IsAllowed("u") { + t.Error("after Remove, request should be allowed again") + } +} + +func TestRateLimiterConcurrent(t *testing.T) { + rl := NewRateLimiter(100, time.Minute) + var wg sync.WaitGroup + for i := 0; i < 50; i++ { + wg.Add(1) + go func() { + defer wg.Done() + rl.IsAllowed("u") + }() + } + wg.Wait() +} + +func TestRateLimiterWindowEvicts(t *testing.T) { + rl := NewRateLimiter(1, 5*time.Second) + now := time.Date(2026, 4, 23, 0, 0, 0, 0, time.UTC) + rl.now = func() time.Time { return now } + if !rl.IsAllowed("u") { + t.Fatal("first denied") + } + if rl.IsAllowed("u") { + t.Fatal("second allowed inside window") + } + rl.now = func() time.Time { return now.Add(6 * time.Second) } + if !rl.IsAllowed("u") { + t.Error("after window, request should be allowed") + } +} diff --git a/vault/internal/tokens/role.go b/vault/internal/tokens/role.go new file mode 100644 index 0000000..52ea586 --- /dev/null +++ b/vault/internal/tokens/role.go @@ -0,0 +1,66 @@ +package tokens + +import ( + "fmt" + "regexp" + "strconv" + "time" +) + +type Role struct { + Name string `yaml:"-"` + Scope []string `yaml:"scope"` + TopK int `yaml:"top_k"` + RateLimit string `yaml:"rate_limit"` +} + +var rateLimitRE = regexp.MustCompile(`^(\d+)/(\d+)s$`) + +func (r *Role) RateLimitParsed() (max int, window time.Duration, err error) { + m := rateLimitRE.FindStringSubmatch(r.RateLimit) + if m == nil { + return 0, 0, fmt.Errorf("invalid rate_limit format %q (expected '/s')", r.RateLimit) + } + maxReq, _ := strconv.Atoi(m[1]) + winSec, _ := strconv.Atoi(m[2]) + return maxReq, time.Duration(winSec) * time.Second, nil +} + +func (r *Role) CheckScope(method string) error { + for _, s := range r.Scope { + if s == method { + return nil + } + } + return ErrScope{Method: method, RoleName: r.Name} +} + +func validateRateLimit(s string) error { + if !rateLimitRE.MatchString(s) { + return fmt.Errorf("invalid rate_limit format %q (expected '/s')", s) + } + return nil +} + +const DemoToken = "evt_0000000000000000000000000000demo" + +func DefaultRoles() map[string]*Role { + return map[string]*Role{ + "admin": { + Name: "admin", + Scope: []string{"get_public_key", "decrypt_scores", "decrypt_metadata", "manage_tokens"}, + TopK: 50, + RateLimit: "150/60s", + }, + "member": { + Name: "member", + Scope: []string{"get_public_key", "decrypt_scores", "decrypt_metadata"}, + TopK: 10, + RateLimit: "30/60s", + }, + } +} + +func isDefaultRoleName(name string) bool { + return name == "admin" || name == "member" +} diff --git a/vault/internal/tokens/store.go b/vault/internal/tokens/store.go new file mode 100644 index 0000000..9f90ecf --- /dev/null +++ b/vault/internal/tokens/store.go @@ -0,0 +1,612 @@ +package tokens + +import ( + "crypto/rand" + "encoding/hex" + "fmt" + "os" + "path/filepath" + "sort" + "sync" + "time" + + "gopkg.in/yaml.v3" +) + +const persistDebounce = 100 * time.Millisecond + +type Store struct { + mu sync.RWMutex + tokens map[string]*Token // keyed by token string + tokensByUser map[string]*Token // keyed by username + roles map[string]*Role + rateLimiters map[string]*RateLimiter // keyed by username + rolesPath string + tokensPath string + + now func() time.Time + + persistMu sync.Mutex + persistTimer *time.Timer + persistWG sync.WaitGroup + persistClosed bool +} + +func NewStore() *Store { + return &Store{ + tokens: make(map[string]*Token), + tokensByUser: make(map[string]*Token), + roles: make(map[string]*Role), + rateLimiters: make(map[string]*RateLimiter), + now: func() time.Time { return time.Now().UTC() }, + } +} + +// LoadFromFiles reads roles and tokens from YAML at startup. +// Missing files cause defaults to be loaded and an immediate persist scheduled. +func (s *Store) LoadFromFiles(rolesPath, tokensPath string) error { + s.mu.Lock() + s.rolesPath = rolesPath + s.tokensPath = tokensPath + + // Roles + if data, err := os.ReadFile(rolesPath); err == nil { + var doc struct { + Roles map[string]struct { + Scope []string `yaml:"scope"` + TopK int `yaml:"top_k"` + RateLimit string `yaml:"rate_limit"` + } `yaml:"roles"` + } + if err := yaml.Unmarshal(data, &doc); err != nil { + s.mu.Unlock() + return fmt.Errorf("parse roles file %s: %w", rolesPath, err) + } + for name, cfg := range doc.Roles { + topK := cfg.TopK + if topK == 0 { + topK = 5 + } + rl := cfg.RateLimit + if rl == "" { + rl = "30/60s" + } + s.roles[name] = &Role{Name: name, Scope: cfg.Scope, TopK: topK, RateLimit: rl} + } + } else if !os.IsNotExist(err) { + s.mu.Unlock() + return fmt.Errorf("read roles file %s: %w", rolesPath, err) + } else { + for name, role := range DefaultRoles() { + s.roles[name] = role + } + } + for name, role := range DefaultRoles() { + if _, ok := s.roles[name]; !ok { + s.roles[name] = role + } + } + + // Tokens + rolesMissing := false + if _, err := os.Stat(rolesPath); os.IsNotExist(err) { + rolesMissing = true + } + tokensMissing := false + if data, err := os.ReadFile(tokensPath); err == nil { + var doc struct { + Tokens []struct { + User string `yaml:"user"` + Token string `yaml:"token"` + Role string `yaml:"role"` + IssuedAt string `yaml:"issued_at"` + Created string `yaml:"created"` + Expires string `yaml:"expires"` + } `yaml:"tokens"` + } + if err := yaml.Unmarshal(data, &doc); err != nil { + s.mu.Unlock() + return fmt.Errorf("parse tokens file %s: %w", tokensPath, err) + } + for _, e := range doc.Tokens { + issued := e.IssuedAt + if issued == "" { + issued = e.Created + } + tok := &Token{ + User: e.User, + Token: e.Token, + Role: e.Role, + IssuedAt: issued, + Expires: e.Expires, + } + s.tokens[tok.Token] = tok + s.tokensByUser[tok.User] = tok + } + } else if !os.IsNotExist(err) { + s.mu.Unlock() + return fmt.Errorf("read tokens file %s: %w", tokensPath, err) + } else { + tokensMissing = true + } + s.mu.Unlock() + + if rolesMissing || tokensMissing { + s.schedulePersist() + } + return nil +} + +// LoadDefaultsWithDemoToken seeds the store with default roles and the demo token. +// Useful for dev/CI bootstraps that don't ship persisted state. +func (s *Store) LoadDefaultsWithDemoToken() { + s.mu.Lock() + defer s.mu.Unlock() + for name, role := range DefaultRoles() { + s.roles[name] = role + } + tok := &Token{ + User: "demo", + Token: DemoToken, + Role: "admin", + IssuedAt: s.now().Format(dateFormat), + } + s.tokens[tok.Token] = tok + s.tokensByUser[tok.User] = tok +} + +func (s *Store) Validate(tokenStr string) (string, *Role, error) { + s.mu.Lock() + tok, ok := s.tokens[tokenStr] + if !ok { + s.mu.Unlock() + return "", nil, ErrTokenNotFound{} + } + if tok.IsExpiredAt(s.now()) { + user := tok.User + s.mu.Unlock() + return "", nil, ErrTokenExpired{User: user} + } + role, ok := s.roles[tok.Role] + if !ok { + s.mu.Unlock() + return "", nil, ErrTokenNotFound{} + } + limiter, err := s.getOrCreateLimiterLocked(tok.User, role) + s.mu.Unlock() + if err != nil { + return "", nil, err + } + if !limiter.IsAllowed(tok.User) { + return "", nil, ErrRateLimit{RetryAfter: limiter.RetryAfter(tok.User)} + } + return tok.User, role, nil +} + +func (s *Store) GetUsername(tokenStr string) string { + s.mu.RLock() + defer s.mu.RUnlock() + if tok, ok := s.tokens[tokenStr]; ok { + return tok.User + } + return "" +} + +func (s *Store) getOrCreateLimiterLocked(user string, role *Role) (*RateLimiter, error) { + if l, ok := s.rateLimiters[user]; ok { + return l, nil + } + maxReq, window, err := role.RateLimitParsed() + if err != nil { + return nil, err + } + l := NewRateLimiter(maxReq, window) + s.rateLimiters[user] = l + return l, nil +} + +func (s *Store) AddToken(user, roleName string, expiresDays *int) (*Token, error) { + s.mu.Lock() + if _, ok := s.roles[roleName]; !ok { + s.mu.Unlock() + return nil, fmt.Errorf("role '%s' does not exist", roleName) + } + if _, ok := s.tokensByUser[user]; ok { + s.mu.Unlock() + return nil, fmt.Errorf("token already exists for user '%s'", user) + } + tokStr, err := newTokenString() + if err != nil { + s.mu.Unlock() + return nil, err + } + today := s.now() + tok := &Token{ + User: user, + Token: tokStr, + Role: roleName, + IssuedAt: today.Format(dateFormat), + Expires: expiryDate(today, expiresDays), + } + s.tokens[tok.Token] = tok + s.tokensByUser[tok.User] = tok + s.mu.Unlock() + s.schedulePersist() + return tok, nil +} + +func (s *Store) RevokeToken(user string) bool { + s.mu.Lock() + tok, ok := s.tokensByUser[user] + if !ok { + s.mu.Unlock() + return false + } + delete(s.tokensByUser, user) + delete(s.tokens, tok.Token) + if l, ok := s.rateLimiters[user]; ok { + delete(s.rateLimiters, user) + l.Remove(user) + } + s.mu.Unlock() + s.schedulePersist() + return true +} + +func (s *Store) RotateToken(user string) (*Token, error) { + s.mu.Lock() + old, ok := s.tokensByUser[user] + if !ok { + s.mu.Unlock() + return nil, fmt.Errorf("no token found for user '%s'", user) + } + var expiresDays *int + if old.Expires != "" { + issued, errIss := time.Parse(dateFormat, old.IssuedAt) + exp, errExp := time.Parse(dateFormat, old.Expires) + if errIss == nil && errExp == nil { + d := int(exp.Sub(issued).Hours() / 24) + expiresDays = &d + } + } + delete(s.tokens, old.Token) + delete(s.tokensByUser, user) + if l, ok := s.rateLimiters[user]; ok { + delete(s.rateLimiters, user) + l.Remove(user) + } + tokStr, err := newTokenString() + if err != nil { + s.mu.Unlock() + return nil, err + } + today := s.now() + newTok := &Token{ + User: user, + Token: tokStr, + Role: old.Role, + IssuedAt: today.Format(dateFormat), + Expires: expiryDate(today, expiresDays), + } + s.tokens[newTok.Token] = newTok + s.tokensByUser[user] = newTok + s.mu.Unlock() + s.schedulePersist() + return newTok, nil +} + +func (s *Store) RotateAllTokens() ([]*Token, error) { + s.mu.RLock() + users := make([]string, 0, len(s.tokensByUser)) + for u := range s.tokensByUser { + users = append(users, u) + } + s.mu.RUnlock() + sort.Strings(users) + + result := make([]*Token, 0, len(users)) + for _, u := range users { + tok, err := s.RotateToken(u) + if err != nil { + return result, err + } + result = append(result, tok) + } + return result, nil +} + +type TokenInfo struct { + User string `json:"user" yaml:"user"` + Role string `json:"role" yaml:"role"` + TopK any `json:"top_k" yaml:"top_k"` + RateLimit any `json:"rate_limit" yaml:"rate_limit"` + Expires string `json:"expires" yaml:"expires"` +} + +func (s *Store) ListTokens() []TokenInfo { + s.mu.RLock() + defer s.mu.RUnlock() + + users := make([]string, 0, len(s.tokensByUser)) + for u := range s.tokensByUser { + users = append(users, u) + } + sort.Strings(users) + + out := make([]TokenInfo, 0, len(users)) + for _, u := range users { + tok := s.tokensByUser[u] + info := TokenInfo{User: tok.User, Role: tok.Role, Expires: "never"} + if tok.Expires != "" { + info.Expires = tok.Expires + } + if role, ok := s.roles[tok.Role]; ok { + info.TopK = role.TopK + info.RateLimit = role.RateLimit + } else { + info.TopK = "?" + info.RateLimit = "?" + } + out = append(out, info) + } + return out +} + +type RoleInfo struct { + Name string `json:"name" yaml:"name"` + Scope []string `json:"scope" yaml:"scope"` + TopK int `json:"top_k" yaml:"top_k"` + RateLimit string `json:"rate_limit" yaml:"rate_limit"` +} + +func (s *Store) AddRole(name string, scope []string, topK int, rateLimit string) (*Role, error) { + if err := validateRateLimit(rateLimit); err != nil { + return nil, err + } + s.mu.Lock() + if _, ok := s.roles[name]; ok { + s.mu.Unlock() + return nil, fmt.Errorf("role '%s' already exists", name) + } + role := &Role{Name: name, Scope: scope, TopK: topK, RateLimit: rateLimit} + s.roles[name] = role + s.mu.Unlock() + s.schedulePersist() + return role, nil +} + +type UpdateRoleOpts struct { + Scope *[]string + TopK *int + RateLimit *string +} + +func (s *Store) UpdateRole(name string, opts UpdateRoleOpts) (*Role, error) { + if opts.RateLimit != nil { + if err := validateRateLimit(*opts.RateLimit); err != nil { + return nil, err + } + } + s.mu.Lock() + role, ok := s.roles[name] + if !ok { + s.mu.Unlock() + return nil, fmt.Errorf("role '%s' does not exist", name) + } + if opts.Scope != nil { + role.Scope = *opts.Scope + } + if opts.TopK != nil { + role.TopK = *opts.TopK + } + if opts.RateLimit != nil { + role.RateLimit = *opts.RateLimit + for _, tok := range s.tokensByUser { + if tok.Role == name { + delete(s.rateLimiters, tok.User) + } + } + } + s.mu.Unlock() + s.schedulePersist() + return role, nil +} + +func (s *Store) DeleteRole(name string) error { + if isDefaultRoleName(name) { + return fmt.Errorf("Cannot delete default role '%s'", name) + } + s.mu.Lock() + if _, ok := s.roles[name]; !ok { + s.mu.Unlock() + return fmt.Errorf("role '%s' does not exist", name) + } + for _, tok := range s.tokensByUser { + if tok.Role == name { + s.mu.Unlock() + return fmt.Errorf("Cannot delete role '%s': token for user '%s' is assigned to it", name, tok.User) + } + } + delete(s.roles, name) + s.mu.Unlock() + s.schedulePersist() + return nil +} + +func (s *Store) ListRoles() []RoleInfo { + s.mu.RLock() + defer s.mu.RUnlock() + names := make([]string, 0, len(s.roles)) + for n := range s.roles { + names = append(names, n) + } + sort.Strings(names) + out := make([]RoleInfo, 0, len(names)) + for _, n := range names { + r := s.roles[n] + scope := append([]string(nil), r.Scope...) + out = append(out, RoleInfo{Name: r.Name, Scope: scope, TopK: r.TopK, RateLimit: r.RateLimit}) + } + return out +} + +// Shutdown cancels any pending persist and waits for in-flight writes to finish. +// Use Flush instead when you want pending changes to be written before exit. +func (s *Store) Shutdown() { + s.persistMu.Lock() + s.persistClosed = true + if s.persistTimer != nil { + s.persistTimer.Stop() + s.persistTimer = nil + } + s.persistMu.Unlock() + s.persistWG.Wait() +} + +// Flush forces any pending debounced persist to run synchronously, +// then blocks until in-flight writes complete. +func (s *Store) Flush() { + s.persistMu.Lock() + pending := false + if s.persistTimer != nil { + if s.persistTimer.Stop() { + pending = true + } + s.persistTimer = nil + } + s.persistMu.Unlock() + if pending { + s.doPersist() + } + s.persistWG.Wait() +} + +func (s *Store) schedulePersist() { + s.persistMu.Lock() + defer s.persistMu.Unlock() + if s.persistClosed { + return + } + if s.rolesPath == "" || s.tokensPath == "" { + return + } + if s.persistTimer != nil { + s.persistTimer.Stop() + } + s.persistTimer = time.AfterFunc(persistDebounce, func() { + s.persistMu.Lock() + s.persistTimer = nil + closed := s.persistClosed + s.persistMu.Unlock() + if closed { + return + } + s.doPersist() + }) +} + +func (s *Store) doPersist() { + s.persistWG.Add(1) + defer s.persistWG.Done() + + s.mu.RLock() + rolesPath := s.rolesPath + tokensPath := s.tokensPath + + rolesDoc := struct { + Roles map[string]struct { + Scope []string `yaml:"scope"` + TopK int `yaml:"top_k"` + RateLimit string `yaml:"rate_limit"` + } `yaml:"roles"` + }{Roles: make(map[string]struct { + Scope []string `yaml:"scope"` + TopK int `yaml:"top_k"` + RateLimit string `yaml:"rate_limit"` + })} + for n, r := range s.roles { + rolesDoc.Roles[n] = struct { + Scope []string `yaml:"scope"` + TopK int `yaml:"top_k"` + RateLimit string `yaml:"rate_limit"` + }{Scope: append([]string(nil), r.Scope...), TopK: r.TopK, RateLimit: r.RateLimit} + } + + tokensDoc := struct { + Tokens []map[string]string `yaml:"tokens"` + }{Tokens: make([]map[string]string, 0, len(s.tokensByUser))} + users := make([]string, 0, len(s.tokensByUser)) + for u := range s.tokensByUser { + users = append(users, u) + } + sort.Strings(users) + for _, u := range users { + t := s.tokensByUser[u] + entry := map[string]string{ + "user": t.User, + "token": t.Token, + "role": t.Role, + "issued_at": t.IssuedAt, + } + if t.Expires != "" { + entry["expires"] = t.Expires + } + tokensDoc.Tokens = append(tokensDoc.Tokens, entry) + } + s.mu.RUnlock() + + if err := atomicWriteYAML(rolesPath, rolesDoc); err != nil { + fmt.Fprintf(os.Stderr, "tokens: persist roles failed: %v\n", err) + } + if err := atomicWriteYAML(tokensPath, tokensDoc); err != nil { + fmt.Fprintf(os.Stderr, "tokens: persist tokens failed: %v\n", err) + } +} + +func atomicWriteYAML(path string, data any) error { + dir := filepath.Dir(path) + if dir == "" { + dir = "." + } + if err := os.MkdirAll(dir, 0o750); err != nil { + return err + } + tmp, err := os.CreateTemp(dir, ".persist-*.tmp") + if err != nil { + return err + } + tmpPath := tmp.Name() + enc := yaml.NewEncoder(tmp) + enc.SetIndent(2) + if err := enc.Encode(data); err != nil { + _ = enc.Close() + _ = tmp.Close() + _ = os.Remove(tmpPath) + return err + } + if err := enc.Close(); err != nil { + _ = tmp.Close() + _ = os.Remove(tmpPath) + return err + } + if err := tmp.Close(); err != nil { + _ = os.Remove(tmpPath) + return err + } + return os.Rename(tmpPath, path) +} + +func newTokenString() (string, error) { + b := make([]byte, 16) + if _, err := rand.Read(b); err != nil { + return "", err + } + return "evt_" + hex.EncodeToString(b), nil +} + +func expiryDate(today time.Time, days *int) string { + if days == nil { + return "" + } + return today.AddDate(0, 0, *days).Format(dateFormat) +} diff --git a/vault/internal/tokens/store_test.go b/vault/internal/tokens/store_test.go new file mode 100644 index 0000000..58c16eb --- /dev/null +++ b/vault/internal/tokens/store_test.go @@ -0,0 +1,563 @@ +package tokens + +import ( + "errors" + "os" + "path/filepath" + "strings" + "testing" + "time" +) + +func newTestStore(t *testing.T) *Store { + t.Helper() + s := NewStore() + for n, r := range DefaultRoles() { + s.roles[n] = r + } + return s +} + +func intp(v int) *int { return &v } + +// ── add / validate / revoke ──────────────────────────────────────── + +func TestAddAndValidateToken(t *testing.T) { + s := newTestStore(t) + tok, err := s.AddToken("alice", "member", intp(90)) + if err != nil { + t.Fatalf("AddToken: %v", err) + } + if tok.User != "alice" { + t.Errorf("user = %q, want alice", tok.User) + } + if !strings.HasPrefix(tok.Token, "evt_") { + t.Errorf("token = %q, want evt_ prefix", tok.Token) + } + if len(tok.Token) != 36 { + t.Errorf("token length = %d, want 36", len(tok.Token)) + } + if tok.Role != "member" { + t.Errorf("role = %q, want member", tok.Role) + } + + user, role, err := s.Validate(tok.Token) + if err != nil { + t.Fatalf("Validate: %v", err) + } + if user != "alice" || role.Name != "member" { + t.Errorf("got (%q, %q), want (alice, member)", user, role.Name) + } +} + +func TestInvalidTokenRaises(t *testing.T) { + s := newTestStore(t) + _, _, err := s.Validate("nonexistent_token") + if !errors.Is(err, ErrTokenNotFound{}) { + t.Errorf("err = %v, want ErrTokenNotFound", err) + } +} + +func TestExpiredTokenRaises(t *testing.T) { + s := newTestStore(t) + tok, err := s.AddToken("bob", "member", intp(1)) + if err != nil { + t.Fatalf("AddToken: %v", err) + } + tok.Expires = time.Now().AddDate(0, 0, -1).Format(dateFormat) + + _, _, err = s.Validate(tok.Token) + var exp ErrTokenExpired + if !errors.As(err, &exp) { + t.Fatalf("err = %v, want ErrTokenExpired", err) + } + if exp.User != "bob" { + t.Errorf("user = %q, want bob", exp.User) + } +} + +func TestRevokeToken(t *testing.T) { + s := newTestStore(t) + tok, _ := s.AddToken("charlie", "member", nil) + if !s.RevokeToken("charlie") { + t.Fatal("RevokeToken returned false") + } + _, _, err := s.Validate(tok.Token) + if !errors.Is(err, ErrTokenNotFound{}) { + t.Errorf("err after revoke = %v, want ErrTokenNotFound", err) + } +} + +func TestRevokeNonexistentReturnsFalse(t *testing.T) { + s := newTestStore(t) + if s.RevokeToken("nobody") { + t.Error("RevokeToken(nobody) = true, want false") + } +} + +func TestDuplicateUserRejected(t *testing.T) { + s := newTestStore(t) + if _, err := s.AddToken("alice", "member", nil); err != nil { + t.Fatalf("first AddToken: %v", err) + } + _, err := s.AddToken("alice", "member", nil) + if err == nil || !strings.Contains(err.Error(), "already exists") { + t.Errorf("err = %v, want 'already exists'", err) + } +} + +func TestInvalidRoleRejected(t *testing.T) { + s := newTestStore(t) + _, err := s.AddToken("alice", "nonexistent_role", nil) + if err == nil || !strings.Contains(err.Error(), "does not exist") { + t.Errorf("err = %v, want 'does not exist'", err) + } +} + +func TestListTokensHidesValues(t *testing.T) { + s := newTestStore(t) + if _, err := s.AddToken("alice", "member", intp(30)); err != nil { + t.Fatalf("AddToken: %v", err) + } + res := s.ListTokens() + if len(res) != 1 { + t.Fatalf("len = %d, want 1", len(res)) + } + if res[0].User != "alice" { + t.Errorf("user = %q, want alice", res[0].User) + } + // TokenInfo struct intentionally has no Token field. +} + +func TestRateLimitingPerUser(t *testing.T) { + s := newTestStore(t) + if _, err := s.AddRole("limited", []string{"get_public_key"}, 5, "2/60s"); err != nil { + t.Fatalf("AddRole: %v", err) + } + tok, err := s.AddToken("ratelimited_user", "limited", nil) + if err != nil { + t.Fatalf("AddToken: %v", err) + } + if _, _, err := s.Validate(tok.Token); err != nil { + t.Fatalf("first Validate: %v", err) + } + if _, _, err := s.Validate(tok.Token); err != nil { + t.Fatalf("second Validate: %v", err) + } + _, _, err = s.Validate(tok.Token) + var rl ErrRateLimit + if !errors.As(err, &rl) { + t.Fatalf("third Validate err = %v, want ErrRateLimit", err) + } +} + +func TestTopKFromRole(t *testing.T) { + s := newTestStore(t) + tok, _ := s.AddToken("alice", "member", nil) + _, role, err := s.Validate(tok.Token) + if err != nil { + t.Fatal(err) + } + if role.TopK != 10 { + t.Errorf("top_k = %d, want 10", role.TopK) + } +} + +func TestNeverExpiresToken(t *testing.T) { + s := newTestStore(t) + tok, err := s.AddToken("permanent_user", "admin", nil) + if err != nil { + t.Fatal(err) + } + if tok.Expires != "" { + t.Errorf("expires = %q, want empty", tok.Expires) + } + if tok.IsExpired() { + t.Error("IsExpired = true, want false") + } + user, _, err := s.Validate(tok.Token) + if err != nil { + t.Fatal(err) + } + if user != "permanent_user" { + t.Errorf("user = %q, want permanent_user", user) + } +} + +func TestPersistAndReload(t *testing.T) { + dir := t.TempDir() + rolesPath := filepath.Join(dir, "roles.yml") + tokensPath := filepath.Join(dir, "tokens.yml") + + s1 := NewStore() + if err := s1.LoadFromFiles(rolesPath, tokensPath); err != nil { + t.Fatalf("LoadFromFiles: %v", err) + } + if _, err := s1.AddRole("researcher", []string{"get_public_key", "decrypt_scores"}, 3, "10/60s"); err != nil { + t.Fatalf("AddRole: %v", err) + } + tok, err := s1.AddToken("alice", "member", intp(90)) + if err != nil { + t.Fatalf("AddToken: %v", err) + } + s1.Flush() + + s2 := NewStore() + if err := s2.LoadFromFiles(rolesPath, tokensPath); err != nil { + t.Fatalf("reload LoadFromFiles: %v", err) + } + user, role, err := s2.Validate(tok.Token) + if err != nil { + t.Fatalf("reload Validate: %v", err) + } + if user != "alice" || role.Name != "member" { + t.Errorf("got (%q, %q), want (alice, member)", user, role.Name) + } + + roles := s2.ListRoles() + found := false + for _, r := range roles { + if r.Name == "researcher" { + found = true + break + } + } + if !found { + t.Error("researcher role missing after reload") + } +} + +// ── rotation ─────────────────────────────────────────────────────── + +func TestRotateToken(t *testing.T) { + s := newTestStore(t) + old, _ := s.AddToken("alice", "member", nil) + newTok, err := s.RotateToken("alice") + if err != nil { + t.Fatal(err) + } + if newTok.User != "alice" || newTok.Role != "member" { + t.Errorf("got (%q, %q), want (alice, member)", newTok.User, newTok.Role) + } + if !strings.HasPrefix(newTok.Token, "evt_") { + t.Errorf("token = %q, want evt_ prefix", newTok.Token) + } + if newTok.Token == old.Token { + t.Error("new token equals old token") + } +} + +func TestRotatePreservesExpiry(t *testing.T) { + s := newTestStore(t) + if _, err := s.AddToken("alice", "member", intp(90)); err != nil { + t.Fatal(err) + } + newTok, err := s.RotateToken("alice") + if err != nil { + t.Fatal(err) + } + if newTok.Expires == "" { + t.Fatal("expires empty after rotation") + } + got, err := time.Parse(dateFormat, newTok.Expires) + if err != nil { + t.Fatal(err) + } + want := time.Now().UTC().AddDate(0, 0, 90).Format(dateFormat) + if got.Format(dateFormat) != want { + t.Errorf("expires = %s, want %s", got.Format(dateFormat), want) + } +} + +func TestRotateInvalidatesOldToken(t *testing.T) { + s := newTestStore(t) + old, _ := s.AddToken("alice", "member", nil) + if _, err := s.RotateToken("alice"); err != nil { + t.Fatal(err) + } + if _, _, err := s.Validate(old.Token); !errors.Is(err, ErrTokenNotFound{}) { + t.Errorf("err = %v, want ErrTokenNotFound", err) + } +} + +func TestRotateNewTokenValidates(t *testing.T) { + s := newTestStore(t) + if _, err := s.AddToken("alice", "member", nil); err != nil { + t.Fatal(err) + } + newTok, err := s.RotateToken("alice") + if err != nil { + t.Fatal(err) + } + user, role, err := s.Validate(newTok.Token) + if err != nil { + t.Fatal(err) + } + if user != "alice" || role.Name != "member" { + t.Errorf("got (%q, %q), want (alice, member)", user, role.Name) + } +} + +func TestRotateNonexistentUserRaises(t *testing.T) { + s := newTestStore(t) + _, err := s.RotateToken("nobody") + if err == nil || !strings.Contains(err.Error(), "no token found") { + t.Errorf("err = %v, want 'no token found'", err) + } +} + +func TestRotateAll(t *testing.T) { + s := newTestStore(t) + tokA, _ := s.AddToken("alice", "member", nil) + tokB, _ := s.AddToken("bob", "admin", nil) + res, err := s.RotateAllTokens() + if err != nil { + t.Fatal(err) + } + if len(res) != 2 { + t.Fatalf("len = %d, want 2", len(res)) + } + got := map[string]bool{} + for _, tk := range res { + got[tk.User] = true + } + if !got["alice"] || !got["bob"] { + t.Errorf("got users = %v, want alice + bob", got) + } + if _, _, err := s.Validate(tokA.Token); !errors.Is(err, ErrTokenNotFound{}) { + t.Errorf("alice old token still valid") + } + if _, _, err := s.Validate(tokB.Token); !errors.Is(err, ErrTokenNotFound{}) { + t.Errorf("bob old token still valid") + } +} + +func TestRotatePersists(t *testing.T) { + dir := t.TempDir() + rolesPath := filepath.Join(dir, "roles.yml") + tokensPath := filepath.Join(dir, "tokens.yml") + + s1 := NewStore() + if err := s1.LoadFromFiles(rolesPath, tokensPath); err != nil { + t.Fatal(err) + } + if _, err := s1.AddToken("alice", "member", intp(30)); err != nil { + t.Fatal(err) + } + newTok, err := s1.RotateToken("alice") + if err != nil { + t.Fatal(err) + } + s1.Flush() + + s2 := NewStore() + if err := s2.LoadFromFiles(rolesPath, tokensPath); err != nil { + t.Fatal(err) + } + user, role, err := s2.Validate(newTok.Token) + if err != nil { + t.Fatal(err) + } + if user != "alice" || role.Name != "member" { + t.Errorf("got (%q, %q), want (alice, member)", user, role.Name) + } +} + +// ── role CRUD ────────────────────────────────────────────────────── + +func TestCreateRole(t *testing.T) { + s := newTestStore(t) + r, err := s.AddRole("researcher", []string{"get_public_key", "decrypt_scores"}, 3, "10/60s") + if err != nil { + t.Fatal(err) + } + if r.Name != "researcher" || r.TopK != 3 { + t.Errorf("got (%q, %d), want (researcher, 3)", r.Name, r.TopK) + } + if err := r.CheckScope("get_public_key"); err != nil { + t.Errorf("CheckScope(get_public_key): %v", err) + } +} + +func TestCreateDuplicateRoleRejected(t *testing.T) { + s := newTestStore(t) + _, err := s.AddRole("admin", []string{"get_public_key"}, 5, "30/60s") + if err == nil || !strings.Contains(err.Error(), "already exists") { + t.Errorf("err = %v, want 'already exists'", err) + } +} + +func TestUpdateRole(t *testing.T) { + s := newTestStore(t) + r, err := s.UpdateRole("member", UpdateRoleOpts{TopK: intp(8)}) + if err != nil { + t.Fatal(err) + } + if r.TopK != 8 || r.Name != "member" { + t.Errorf("got (%q, %d), want (member, 8)", r.Name, r.TopK) + } +} + +func TestUpdateNonexistentRoleRejected(t *testing.T) { + s := newTestStore(t) + _, err := s.UpdateRole("nonexistent", UpdateRoleOpts{TopK: intp(5)}) + if err == nil || !strings.Contains(err.Error(), "does not exist") { + t.Errorf("err = %v, want 'does not exist'", err) + } +} + +func TestDeleteCustomRole(t *testing.T) { + s := newTestStore(t) + if _, err := s.AddRole("temp", []string{"get_public_key"}, 1, "5/60s"); err != nil { + t.Fatal(err) + } + if err := s.DeleteRole("temp"); err != nil { + t.Fatal(err) + } + for _, r := range s.ListRoles() { + if r.Name == "temp" { + t.Error("temp role still present after delete") + } + } +} + +func TestDeleteDefaultRoleRejected(t *testing.T) { + s := newTestStore(t) + for _, name := range []string{"admin", "member"} { + err := s.DeleteRole(name) + if err == nil || !strings.Contains(err.Error(), "Cannot delete default") { + t.Errorf("delete %s: err = %v, want 'Cannot delete default'", name, err) + } + } +} + +func TestDeleteRoleWithActiveTokensRejected(t *testing.T) { + s := newTestStore(t) + if _, err := s.AddRole("temp", []string{"get_public_key"}, 1, "5/60s"); err != nil { + t.Fatal(err) + } + if _, err := s.AddToken("user1", "temp", nil); err != nil { + t.Fatal(err) + } + err := s.DeleteRole("temp") + if err == nil || !strings.Contains(err.Error(), "token for user") { + t.Errorf("err = %v, want 'token for user'", err) + } +} + +func TestListRoles(t *testing.T) { + s := newTestStore(t) + roles := s.ListRoles() + if len(roles) < 2 { + t.Fatalf("len = %d, want >= 2", len(roles)) + } + names := map[string]bool{} + for _, r := range roles { + names[r.Name] = true + } + if !names["admin"] || !names["member"] { + t.Errorf("missing default roles, got %v", names) + } +} + +func TestUpdateRoleClearsRateLimiters(t *testing.T) { + s := newTestStore(t) + tok, _ := s.AddToken("alice", "member", nil) + if _, _, err := s.Validate(tok.Token); err != nil { + t.Fatal(err) + } + if _, ok := s.rateLimiters["alice"]; !ok { + t.Fatal("rate limiter not created on validate") + } + rl := "100/60s" + if _, err := s.UpdateRole("member", UpdateRoleOpts{RateLimit: &rl}); err != nil { + t.Fatal(err) + } + if _, ok := s.rateLimiters["alice"]; ok { + t.Error("rate limiter not cleared after rate_limit change") + } +} + +func TestRoleRateLimitParsed(t *testing.T) { + r := &Role{Name: "test", RateLimit: "30/60s"} + maxReq, window, err := r.RateLimitParsed() + if err != nil { + t.Fatal(err) + } + if maxReq != 30 { + t.Errorf("max = %d, want 30", maxReq) + } + if window != 60*time.Second { + t.Errorf("window = %v, want 60s", window) + } +} + +// ── scope check ──────────────────────────────────────────────────── + +func TestScopeAllowsValidMethod(t *testing.T) { + r := &Role{Name: "member", Scope: []string{"get_public_key", "decrypt_scores"}} + if err := r.CheckScope("get_public_key"); err != nil { + t.Errorf("err = %v, want nil", err) + } +} + +func TestScopeRejectsInvalidMethod(t *testing.T) { + r := &Role{Name: "limited", Scope: []string{"get_public_key"}} + err := r.CheckScope("decrypt_scores") + var se ErrScope + if !errors.As(err, &se) { + t.Fatalf("err = %v, want ErrScope", err) + } + if se.Method != "decrypt_scores" || se.RoleName != "limited" { + t.Errorf("got (%q, %q), want (decrypt_scores, limited)", se.Method, se.RoleName) + } +} + +// ── TopKExceeded ─────────────────────────────────────────────────── + +func TestTopKExceededMessage(t *testing.T) { + err := ErrTopKExceeded{Requested: 15, MaxTopK: 10, RoleName: "admin"} + msg := err.Error() + for _, want := range []string{"15", "10", "admin"} { + if !strings.Contains(msg, want) { + t.Errorf("msg = %q, missing %q", msg, want) + } + } +} + +// ── demo token loader ───────────────────────────────────────────── + +func TestLoadDefaultsWithDemoToken(t *testing.T) { + s := NewStore() + s.LoadDefaultsWithDemoToken() + user, role, err := s.Validate(DemoToken) + if err != nil { + t.Fatal(err) + } + if user != "demo" || role.Name != "admin" { + t.Errorf("got (%q, %q), want (demo, admin)", user, role.Name) + } +} + +// ── persistence file content sanity ────────────────────────────── + +func TestPersistedTokensFileContainsToken(t *testing.T) { + dir := t.TempDir() + rolesPath := filepath.Join(dir, "roles.yml") + tokensPath := filepath.Join(dir, "tokens.yml") + + s := NewStore() + if err := s.LoadFromFiles(rolesPath, tokensPath); err != nil { + t.Fatal(err) + } + tok, _ := s.AddToken("alice", "member", intp(7)) + s.Flush() + + body, err := os.ReadFile(tokensPath) + if err != nil { + t.Fatal(err) + } + if !strings.Contains(string(body), tok.Token) { + t.Errorf("tokens.yml missing token %q", tok.Token) + } +} diff --git a/vault/internal/tokens/token.go b/vault/internal/tokens/token.go new file mode 100644 index 0000000..563dbae --- /dev/null +++ b/vault/internal/tokens/token.go @@ -0,0 +1,29 @@ +package tokens + +import "time" + +type Token struct { + User string `yaml:"user"` + Token string `yaml:"token"` + Role string `yaml:"role"` + IssuedAt string `yaml:"issued_at"` // ISO date + Expires string `yaml:"expires,omitempty"` // ISO date, empty = never +} + +const dateFormat = "2006-01-02" + +func (t *Token) IsExpired() bool { + return t.IsExpiredAt(time.Now().UTC()) +} + +func (t *Token) IsExpiredAt(now time.Time) bool { + if t.Expires == "" { + return false + } + exp, err := time.Parse(dateFormat, t.Expires) + if err != nil { + return false + } + today := time.Date(now.Year(), now.Month(), now.Day(), 0, 0, 0, 0, time.UTC) + return exp.Before(today) +} diff --git a/vault/pkg/vaultpb/vault_service.pb.go b/vault/pkg/vaultpb/vault_service.pb.go new file mode 100644 index 0000000..e2ba23d --- /dev/null +++ b/vault/pkg/vaultpb/vault_service.pb.go @@ -0,0 +1,499 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.36.11 +// protoc (unknown) +// source: vault_service.proto + +package vaultpb + +import ( + _ "buf.build/gen/go/bufbuild/protovalidate/protocolbuffers/go/buf/validate" + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + reflect "reflect" + sync "sync" + unsafe "unsafe" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +type GetPublicKeyRequest struct { + state protoimpl.MessageState `protogen:"open.v1"` + // Auth token. Required, Fixed 36 chars (evt_ + 32 hex). + Token string `protobuf:"bytes,1,opt,name=token,proto3" json:"token,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *GetPublicKeyRequest) Reset() { + *x = GetPublicKeyRequest{} + mi := &file_vault_service_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *GetPublicKeyRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*GetPublicKeyRequest) ProtoMessage() {} + +func (x *GetPublicKeyRequest) ProtoReflect() protoreflect.Message { + mi := &file_vault_service_proto_msgTypes[0] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use GetPublicKeyRequest.ProtoReflect.Descriptor instead. +func (*GetPublicKeyRequest) Descriptor() ([]byte, []int) { + return file_vault_service_proto_rawDescGZIP(), []int{0} +} + +func (x *GetPublicKeyRequest) GetToken() string { + if x != nil { + return x.Token + } + return "" +} + +type GetPublicKeyResponse struct { + state protoimpl.MessageState `protogen:"open.v1"` + // JSON string: {"EncKey.json": "...", "EvalKey.json": "...", "index_name": "..."} + KeyBundleJson string `protobuf:"bytes,1,opt,name=key_bundle_json,json=keyBundleJson,proto3" json:"key_bundle_json,omitempty"` + Error string `protobuf:"bytes,2,opt,name=error,proto3" json:"error,omitempty"` // Non-empty on error + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *GetPublicKeyResponse) Reset() { + *x = GetPublicKeyResponse{} + mi := &file_vault_service_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *GetPublicKeyResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*GetPublicKeyResponse) ProtoMessage() {} + +func (x *GetPublicKeyResponse) ProtoReflect() protoreflect.Message { + mi := &file_vault_service_proto_msgTypes[1] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use GetPublicKeyResponse.ProtoReflect.Descriptor instead. +func (*GetPublicKeyResponse) Descriptor() ([]byte, []int) { + return file_vault_service_proto_rawDescGZIP(), []int{1} +} + +func (x *GetPublicKeyResponse) GetKeyBundleJson() string { + if x != nil { + return x.KeyBundleJson + } + return "" +} + +func (x *GetPublicKeyResponse) GetError() string { + if x != nil { + return x.Error + } + return "" +} + +type DecryptScoresRequest struct { + state protoimpl.MessageState `protogen:"open.v1"` + // Auth token. Required, Fixed 36 chars (evt_ + 32 hex). + Token string `protobuf:"bytes,1,opt,name=token,proto3" json:"token,omitempty"` + // Base64-encoded CiphertextScore protobuf. Required. + EncryptedBlobB64 string `protobuf:"bytes,2,opt,name=encrypted_blob_b64,json=encryptedBlobB64,proto3" json:"encrypted_blob_b64,omitempty"` + // Number of top results to return. + // Per-role limits (e.g., admin=50, member=10) enforced at the business logic layer. + TopK int32 `protobuf:"varint,3,opt,name=top_k,json=topK,proto3" json:"top_k,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *DecryptScoresRequest) Reset() { + *x = DecryptScoresRequest{} + mi := &file_vault_service_proto_msgTypes[2] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *DecryptScoresRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*DecryptScoresRequest) ProtoMessage() {} + +func (x *DecryptScoresRequest) ProtoReflect() protoreflect.Message { + mi := &file_vault_service_proto_msgTypes[2] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use DecryptScoresRequest.ProtoReflect.Descriptor instead. +func (*DecryptScoresRequest) Descriptor() ([]byte, []int) { + return file_vault_service_proto_rawDescGZIP(), []int{2} +} + +func (x *DecryptScoresRequest) GetToken() string { + if x != nil { + return x.Token + } + return "" +} + +func (x *DecryptScoresRequest) GetEncryptedBlobB64() string { + if x != nil { + return x.EncryptedBlobB64 + } + return "" +} + +func (x *DecryptScoresRequest) GetTopK() int32 { + if x != nil { + return x.TopK + } + return 0 +} + +type ScoreEntry struct { + state protoimpl.MessageState `protogen:"open.v1"` + ShardIdx int32 `protobuf:"varint,1,opt,name=shard_idx,json=shardIdx,proto3" json:"shard_idx,omitempty"` + RowIdx int32 `protobuf:"varint,2,opt,name=row_idx,json=rowIdx,proto3" json:"row_idx,omitempty"` + Score float64 `protobuf:"fixed64,3,opt,name=score,proto3" json:"score,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *ScoreEntry) Reset() { + *x = ScoreEntry{} + mi := &file_vault_service_proto_msgTypes[3] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *ScoreEntry) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*ScoreEntry) ProtoMessage() {} + +func (x *ScoreEntry) ProtoReflect() protoreflect.Message { + mi := &file_vault_service_proto_msgTypes[3] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use ScoreEntry.ProtoReflect.Descriptor instead. +func (*ScoreEntry) Descriptor() ([]byte, []int) { + return file_vault_service_proto_rawDescGZIP(), []int{3} +} + +func (x *ScoreEntry) GetShardIdx() int32 { + if x != nil { + return x.ShardIdx + } + return 0 +} + +func (x *ScoreEntry) GetRowIdx() int32 { + if x != nil { + return x.RowIdx + } + return 0 +} + +func (x *ScoreEntry) GetScore() float64 { + if x != nil { + return x.Score + } + return 0 +} + +type DecryptScoresResponse struct { + state protoimpl.MessageState `protogen:"open.v1"` + Results []*ScoreEntry `protobuf:"bytes,1,rep,name=results,proto3" json:"results,omitempty"` // Top-K results sorted by score descending + Error string `protobuf:"bytes,2,opt,name=error,proto3" json:"error,omitempty"` // Non-empty on error + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *DecryptScoresResponse) Reset() { + *x = DecryptScoresResponse{} + mi := &file_vault_service_proto_msgTypes[4] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *DecryptScoresResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*DecryptScoresResponse) ProtoMessage() {} + +func (x *DecryptScoresResponse) ProtoReflect() protoreflect.Message { + mi := &file_vault_service_proto_msgTypes[4] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use DecryptScoresResponse.ProtoReflect.Descriptor instead. +func (*DecryptScoresResponse) Descriptor() ([]byte, []int) { + return file_vault_service_proto_rawDescGZIP(), []int{4} +} + +func (x *DecryptScoresResponse) GetResults() []*ScoreEntry { + if x != nil { + return x.Results + } + return nil +} + +func (x *DecryptScoresResponse) GetError() string { + if x != nil { + return x.Error + } + return "" +} + +type DecryptMetadataRequest struct { + state protoimpl.MessageState `protogen:"open.v1"` + // Auth token. Required, Fixed 36 chars (evt_ + 32 hex). + Token string `protobuf:"bytes,1,opt,name=token,proto3" json:"token,omitempty"` + // Base64-encoded AES blobs. Required, max 1000 items, each non-empty. + EncryptedMetadataList []string `protobuf:"bytes,2,rep,name=encrypted_metadata_list,json=encryptedMetadataList,proto3" json:"encrypted_metadata_list,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *DecryptMetadataRequest) Reset() { + *x = DecryptMetadataRequest{} + mi := &file_vault_service_proto_msgTypes[5] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *DecryptMetadataRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*DecryptMetadataRequest) ProtoMessage() {} + +func (x *DecryptMetadataRequest) ProtoReflect() protoreflect.Message { + mi := &file_vault_service_proto_msgTypes[5] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use DecryptMetadataRequest.ProtoReflect.Descriptor instead. +func (*DecryptMetadataRequest) Descriptor() ([]byte, []int) { + return file_vault_service_proto_rawDescGZIP(), []int{5} +} + +func (x *DecryptMetadataRequest) GetToken() string { + if x != nil { + return x.Token + } + return "" +} + +func (x *DecryptMetadataRequest) GetEncryptedMetadataList() []string { + if x != nil { + return x.EncryptedMetadataList + } + return nil +} + +type DecryptMetadataResponse struct { + state protoimpl.MessageState `protogen:"open.v1"` + // Each element is a JSON-serialized decrypted metadata object + DecryptedMetadata []string `protobuf:"bytes,1,rep,name=decrypted_metadata,json=decryptedMetadata,proto3" json:"decrypted_metadata,omitempty"` + Error string `protobuf:"bytes,2,opt,name=error,proto3" json:"error,omitempty"` // Non-empty on error + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *DecryptMetadataResponse) Reset() { + *x = DecryptMetadataResponse{} + mi := &file_vault_service_proto_msgTypes[6] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *DecryptMetadataResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*DecryptMetadataResponse) ProtoMessage() {} + +func (x *DecryptMetadataResponse) ProtoReflect() protoreflect.Message { + mi := &file_vault_service_proto_msgTypes[6] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use DecryptMetadataResponse.ProtoReflect.Descriptor instead. +func (*DecryptMetadataResponse) Descriptor() ([]byte, []int) { + return file_vault_service_proto_rawDescGZIP(), []int{6} +} + +func (x *DecryptMetadataResponse) GetDecryptedMetadata() []string { + if x != nil { + return x.DecryptedMetadata + } + return nil +} + +func (x *DecryptMetadataResponse) GetError() string { + if x != nil { + return x.Error + } + return "" +} + +var File_vault_service_proto protoreflect.FileDescriptor + +const file_vault_service_proto_rawDesc = "" + + "\n" + + "\x13vault_service.proto\x12\rrune.vault.v1\x1a\x1bbuf/validate/validate.proto\"6\n" + + "\x13GetPublicKeyRequest\x12\x1f\n" + + "\x05token\x18\x01 \x01(\tB\t\xbaH\x06r\x04\x10$\x18$R\x05token\"T\n" + + "\x14GetPublicKeyResponse\x12&\n" + + "\x0fkey_bundle_json\x18\x01 \x01(\tR\rkeyBundleJson\x12\x14\n" + + "\x05error\x18\x02 \x01(\tR\x05error\"\x8f\x01\n" + + "\x14DecryptScoresRequest\x12\x1f\n" + + "\x05token\x18\x01 \x01(\tB\t\xbaH\x06r\x04\x10$\x18$R\x05token\x125\n" + + "\x12encrypted_blob_b64\x18\x02 \x01(\tB\a\xbaH\x04r\x02\x10\x01R\x10encryptedBlobB64\x12\x1f\n" + + "\x05top_k\x18\x03 \x01(\x05B\n" + + "\xbaH\a\x1a\x05\x18\xac\x02(\x01R\x04topK\"X\n" + + "\n" + + "ScoreEntry\x12\x1b\n" + + "\tshard_idx\x18\x01 \x01(\x05R\bshardIdx\x12\x17\n" + + "\arow_idx\x18\x02 \x01(\x05R\x06rowIdx\x12\x14\n" + + "\x05score\x18\x03 \x01(\x01R\x05score\"b\n" + + "\x15DecryptScoresResponse\x123\n" + + "\aresults\x18\x01 \x03(\v2\x19.rune.vault.v1.ScoreEntryR\aresults\x12\x14\n" + + "\x05error\x18\x02 \x01(\tR\x05error\"\x84\x01\n" + + "\x16DecryptMetadataRequest\x12\x1f\n" + + "\x05token\x18\x01 \x01(\tB\t\xbaH\x06r\x04\x10$\x18$R\x05token\x12I\n" + + "\x17encrypted_metadata_list\x18\x02 \x03(\tB\x11\xbaH\x0e\x92\x01\v\b\x01\x10\xe8\a\"\x04r\x02\x10\x01R\x15encryptedMetadataList\"^\n" + + "\x17DecryptMetadataResponse\x12-\n" + + "\x12decrypted_metadata\x18\x01 \x03(\tR\x11decryptedMetadata\x12\x14\n" + + "\x05error\x18\x02 \x01(\tR\x05error2\xa5\x02\n" + + "\fVaultService\x12W\n" + + "\fGetPublicKey\x12\".rune.vault.v1.GetPublicKeyRequest\x1a#.rune.vault.v1.GetPublicKeyResponse\x12Z\n" + + "\rDecryptScores\x12#.rune.vault.v1.DecryptScoresRequest\x1a$.rune.vault.v1.DecryptScoresResponse\x12`\n" + + "\x0fDecryptMetadata\x12%.rune.vault.v1.DecryptMetadataRequest\x1a&.rune.vault.v1.DecryptMetadataResponseb\x06proto3" + +var ( + file_vault_service_proto_rawDescOnce sync.Once + file_vault_service_proto_rawDescData []byte +) + +func file_vault_service_proto_rawDescGZIP() []byte { + file_vault_service_proto_rawDescOnce.Do(func() { + file_vault_service_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_vault_service_proto_rawDesc), len(file_vault_service_proto_rawDesc))) + }) + return file_vault_service_proto_rawDescData +} + +var file_vault_service_proto_msgTypes = make([]protoimpl.MessageInfo, 7) +var file_vault_service_proto_goTypes = []any{ + (*GetPublicKeyRequest)(nil), // 0: rune.vault.v1.GetPublicKeyRequest + (*GetPublicKeyResponse)(nil), // 1: rune.vault.v1.GetPublicKeyResponse + (*DecryptScoresRequest)(nil), // 2: rune.vault.v1.DecryptScoresRequest + (*ScoreEntry)(nil), // 3: rune.vault.v1.ScoreEntry + (*DecryptScoresResponse)(nil), // 4: rune.vault.v1.DecryptScoresResponse + (*DecryptMetadataRequest)(nil), // 5: rune.vault.v1.DecryptMetadataRequest + (*DecryptMetadataResponse)(nil), // 6: rune.vault.v1.DecryptMetadataResponse +} +var file_vault_service_proto_depIdxs = []int32{ + 3, // 0: rune.vault.v1.DecryptScoresResponse.results:type_name -> rune.vault.v1.ScoreEntry + 0, // 1: rune.vault.v1.VaultService.GetPublicKey:input_type -> rune.vault.v1.GetPublicKeyRequest + 2, // 2: rune.vault.v1.VaultService.DecryptScores:input_type -> rune.vault.v1.DecryptScoresRequest + 5, // 3: rune.vault.v1.VaultService.DecryptMetadata:input_type -> rune.vault.v1.DecryptMetadataRequest + 1, // 4: rune.vault.v1.VaultService.GetPublicKey:output_type -> rune.vault.v1.GetPublicKeyResponse + 4, // 5: rune.vault.v1.VaultService.DecryptScores:output_type -> rune.vault.v1.DecryptScoresResponse + 6, // 6: rune.vault.v1.VaultService.DecryptMetadata:output_type -> rune.vault.v1.DecryptMetadataResponse + 4, // [4:7] is the sub-list for method output_type + 1, // [1:4] is the sub-list for method input_type + 1, // [1:1] is the sub-list for extension type_name + 1, // [1:1] is the sub-list for extension extendee + 0, // [0:1] is the sub-list for field type_name +} + +func init() { file_vault_service_proto_init() } +func file_vault_service_proto_init() { + if File_vault_service_proto != nil { + return + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: unsafe.Slice(unsafe.StringData(file_vault_service_proto_rawDesc), len(file_vault_service_proto_rawDesc)), + NumEnums: 0, + NumMessages: 7, + NumExtensions: 0, + NumServices: 1, + }, + GoTypes: file_vault_service_proto_goTypes, + DependencyIndexes: file_vault_service_proto_depIdxs, + MessageInfos: file_vault_service_proto_msgTypes, + }.Build() + File_vault_service_proto = out.File + file_vault_service_proto_goTypes = nil + file_vault_service_proto_depIdxs = nil +} diff --git a/vault/pkg/vaultpb/vault_service_grpc.pb.go b/vault/pkg/vaultpb/vault_service_grpc.pb.go new file mode 100644 index 0000000..7108750 --- /dev/null +++ b/vault/pkg/vaultpb/vault_service_grpc.pb.go @@ -0,0 +1,209 @@ +// Code generated by protoc-gen-go-grpc. DO NOT EDIT. +// versions: +// - protoc-gen-go-grpc v1.6.1 +// - protoc (unknown) +// source: vault_service.proto + +package vaultpb + +import ( + context "context" + grpc "google.golang.org/grpc" + codes "google.golang.org/grpc/codes" + status "google.golang.org/grpc/status" +) + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the grpc package it is being compiled against. +// Requires gRPC-Go v1.64.0 or later. +const _ = grpc.SupportPackageIsVersion9 + +const ( + VaultService_GetPublicKey_FullMethodName = "/rune.vault.v1.VaultService/GetPublicKey" + VaultService_DecryptScores_FullMethodName = "/rune.vault.v1.VaultService/DecryptScores" + VaultService_DecryptMetadata_FullMethodName = "/rune.vault.v1.VaultService/DecryptMetadata" +) + +// VaultServiceClient is the client API for VaultService service. +// +// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream. +// +// Rune-Vault gRPC service. +// Holds the FHE secret key and performs all decryption operations. +type VaultServiceClient interface { + // Returns the public key bundle (EncKey, EvalKey, optional team index name). + GetPublicKey(ctx context.Context, in *GetPublicKeyRequest, opts ...grpc.CallOption) (*GetPublicKeyResponse, error) + // Decrypts FHE-encrypted similarity scores and applies Top-K filtering. + DecryptScores(ctx context.Context, in *DecryptScoresRequest, opts ...grpc.CallOption) (*DecryptScoresResponse, error) + // Decrypts a list of AES-encrypted metadata strings. + DecryptMetadata(ctx context.Context, in *DecryptMetadataRequest, opts ...grpc.CallOption) (*DecryptMetadataResponse, error) +} + +type vaultServiceClient struct { + cc grpc.ClientConnInterface +} + +func NewVaultServiceClient(cc grpc.ClientConnInterface) VaultServiceClient { + return &vaultServiceClient{cc} +} + +func (c *vaultServiceClient) GetPublicKey(ctx context.Context, in *GetPublicKeyRequest, opts ...grpc.CallOption) (*GetPublicKeyResponse, error) { + cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...) + out := new(GetPublicKeyResponse) + err := c.cc.Invoke(ctx, VaultService_GetPublicKey_FullMethodName, in, out, cOpts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *vaultServiceClient) DecryptScores(ctx context.Context, in *DecryptScoresRequest, opts ...grpc.CallOption) (*DecryptScoresResponse, error) { + cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...) + out := new(DecryptScoresResponse) + err := c.cc.Invoke(ctx, VaultService_DecryptScores_FullMethodName, in, out, cOpts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *vaultServiceClient) DecryptMetadata(ctx context.Context, in *DecryptMetadataRequest, opts ...grpc.CallOption) (*DecryptMetadataResponse, error) { + cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...) + out := new(DecryptMetadataResponse) + err := c.cc.Invoke(ctx, VaultService_DecryptMetadata_FullMethodName, in, out, cOpts...) + if err != nil { + return nil, err + } + return out, nil +} + +// VaultServiceServer is the server API for VaultService service. +// All implementations must embed UnimplementedVaultServiceServer +// for forward compatibility. +// +// Rune-Vault gRPC service. +// Holds the FHE secret key and performs all decryption operations. +type VaultServiceServer interface { + // Returns the public key bundle (EncKey, EvalKey, optional team index name). + GetPublicKey(context.Context, *GetPublicKeyRequest) (*GetPublicKeyResponse, error) + // Decrypts FHE-encrypted similarity scores and applies Top-K filtering. + DecryptScores(context.Context, *DecryptScoresRequest) (*DecryptScoresResponse, error) + // Decrypts a list of AES-encrypted metadata strings. + DecryptMetadata(context.Context, *DecryptMetadataRequest) (*DecryptMetadataResponse, error) + mustEmbedUnimplementedVaultServiceServer() +} + +// UnimplementedVaultServiceServer must be embedded to have +// forward compatible implementations. +// +// NOTE: this should be embedded by value instead of pointer to avoid a nil +// pointer dereference when methods are called. +type UnimplementedVaultServiceServer struct{} + +func (UnimplementedVaultServiceServer) GetPublicKey(context.Context, *GetPublicKeyRequest) (*GetPublicKeyResponse, error) { + return nil, status.Error(codes.Unimplemented, "method GetPublicKey not implemented") +} +func (UnimplementedVaultServiceServer) DecryptScores(context.Context, *DecryptScoresRequest) (*DecryptScoresResponse, error) { + return nil, status.Error(codes.Unimplemented, "method DecryptScores not implemented") +} +func (UnimplementedVaultServiceServer) DecryptMetadata(context.Context, *DecryptMetadataRequest) (*DecryptMetadataResponse, error) { + return nil, status.Error(codes.Unimplemented, "method DecryptMetadata not implemented") +} +func (UnimplementedVaultServiceServer) mustEmbedUnimplementedVaultServiceServer() {} +func (UnimplementedVaultServiceServer) testEmbeddedByValue() {} + +// UnsafeVaultServiceServer may be embedded to opt out of forward compatibility for this service. +// Use of this interface is not recommended, as added methods to VaultServiceServer will +// result in compilation errors. +type UnsafeVaultServiceServer interface { + mustEmbedUnimplementedVaultServiceServer() +} + +func RegisterVaultServiceServer(s grpc.ServiceRegistrar, srv VaultServiceServer) { + // If the following call panics, it indicates UnimplementedVaultServiceServer was + // embedded by pointer and is nil. This will cause panics if an + // unimplemented method is ever invoked, so we test this at initialization + // time to prevent it from happening at runtime later due to I/O. + if t, ok := srv.(interface{ testEmbeddedByValue() }); ok { + t.testEmbeddedByValue() + } + s.RegisterService(&VaultService_ServiceDesc, srv) +} + +func _VaultService_GetPublicKey_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(GetPublicKeyRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(VaultServiceServer).GetPublicKey(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: VaultService_GetPublicKey_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(VaultServiceServer).GetPublicKey(ctx, req.(*GetPublicKeyRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _VaultService_DecryptScores_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(DecryptScoresRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(VaultServiceServer).DecryptScores(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: VaultService_DecryptScores_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(VaultServiceServer).DecryptScores(ctx, req.(*DecryptScoresRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _VaultService_DecryptMetadata_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(DecryptMetadataRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(VaultServiceServer).DecryptMetadata(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: VaultService_DecryptMetadata_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(VaultServiceServer).DecryptMetadata(ctx, req.(*DecryptMetadataRequest)) + } + return interceptor(ctx, in, info, handler) +} + +// VaultService_ServiceDesc is the grpc.ServiceDesc for VaultService service. +// It's only intended for direct use with grpc.RegisterService, +// and not to be introspected or modified (even as a copy) +var VaultService_ServiceDesc = grpc.ServiceDesc{ + ServiceName: "rune.vault.v1.VaultService", + HandlerType: (*VaultServiceServer)(nil), + Methods: []grpc.MethodDesc{ + { + MethodName: "GetPublicKey", + Handler: _VaultService_GetPublicKey_Handler, + }, + { + MethodName: "DecryptScores", + Handler: _VaultService_DecryptScores_Handler, + }, + { + MethodName: "DecryptMetadata", + Handler: _VaultService_DecryptMetadata_Handler, + }, + }, + Streams: []grpc.StreamDesc{}, + Metadata: "vault_service.proto", +} diff --git a/vault/proto/__init__.py b/vault/proto/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/vault/request_validator.py b/vault/request_validator.py deleted file mode 100644 index 61f5d17..0000000 --- a/vault/request_validator.py +++ /dev/null @@ -1,96 +0,0 @@ -""" -gRPC request input validation for Rune-Vault. - -Two layers of validation: - 1. Proto-level: protovalidate enforces constraints declared in .proto - annotations (field length, int range, repeated item rules). - 2. Runtime: supplementary checks that cannot be expressed in proto - annotations (control characters, whitespace, path traversal). - -Both layers are executed by ValidationInterceptor before requests -reach business logic. -""" - -import re - -import protovalidate - -# --------------------------------------------------------------------------- -# Shared constants & patterns -# --------------------------------------------------------------------------- - -MAX_INDEX_NAME_LENGTH = 128 -INDEX_NAME_PATTERN = re.compile(r"^[a-zA-Z0-9_-]+$") - -_CONTROL_CHAR_RE = re.compile(r"[\x00-\x1f\x7f]") - -# Cached validator instance (compiles CEL rules once per descriptor). -_validator = protovalidate.Validator() - - -# --------------------------------------------------------------------------- -# Proto-level validation (protovalidate) -# --------------------------------------------------------------------------- - - -def validate_proto(request) -> None: - """Run protovalidate against the request message. - - Raises protovalidate.ValidationError with structured Violation list. - """ - _validator.validate(request) - - -# --------------------------------------------------------------------------- -# Supplementary runtime checks -# --------------------------------------------------------------------------- - - -class RuntimeValidationError(Exception): - """Raised for checks that proto annotations cannot express.""" - - -def check_token_safety(token: str) -> None: - """Reject tokens with control characters or surrounding whitespace.""" - if _CONTROL_CHAR_RE.search(token): - raise RuntimeValidationError("token: must not contain control characters") - if token != token.strip(): - raise RuntimeValidationError("token: must not have leading or trailing whitespace") - - -def validate_index_name(name: str) -> None: - """Validate an index name (for future use).""" - if not name: - raise RuntimeValidationError("index_name: must not be empty") - if len(name) > MAX_INDEX_NAME_LENGTH: - raise RuntimeValidationError( - f"index_name: length {len(name)} exceeds maximum {MAX_INDEX_NAME_LENGTH}" - ) - if not INDEX_NAME_PATTERN.match(name): - raise RuntimeValidationError( - "index_name: must contain only alphanumeric characters, underscores, or hyphens" - ) - - -# --------------------------------------------------------------------------- -# Vault-method supplementary checks (keyed by gRPC method path) -# --------------------------------------------------------------------------- - - -def _check_get_public_key(request) -> None: - check_token_safety(request.token) - - -def _check_decrypt_scores(request) -> None: - check_token_safety(request.token) - - -def _check_decrypt_metadata(request) -> None: - check_token_safety(request.token) - - -RUNTIME_CHECKS = { - "/rune.vault.v1.VaultService/GetPublicKey": _check_get_public_key, - "/rune.vault.v1.VaultService/DecryptScores": _check_decrypt_scores, - "/rune.vault.v1.VaultService/DecryptMetadata": _check_decrypt_metadata, -} diff --git a/vault/requirements.txt b/vault/requirements.txt deleted file mode 100644 index 35731bd..0000000 --- a/vault/requirements.txt +++ /dev/null @@ -1,20 +0,0 @@ -# Rune-Vault Dependencies - -# FHE Encryption -pyenvector>=1.2.0 -numpy>=1.24.0 - -# Token Management -PyYAML>=6.0 - -# Cryptography (HKDF key derivation) -cryptography>=41.0.0 - -# gRPC -grpcio>=1.60.2,<=1.74.0 -grpcio-tools>=1.60.2,<=1.74.0 -grpcio-health-checking>=1.60.2,<=1.74.0 -grpcio-reflection>=1.60.2,<=1.74.0 - -# gRPC input validation (protovalidate) -protovalidate>=1.1.0 diff --git a/vault/scripts/proto-gen.sh b/vault/scripts/proto-gen.sh deleted file mode 100755 index 084cc13..0000000 --- a/vault/scripts/proto-gen.sh +++ /dev/null @@ -1,52 +0,0 @@ -#!/usr/bin/env bash -# Generate Python protobuf/gRPC stubs from .proto files. -# -# Prerequisites: -# brew install bufbuild/buf/buf (or: https://buf.build/docs/installation) -# pip install grpcio-tools -# -# Usage: -# ./scripts/proto-gen.sh (from vault/ directory) -# make proto-gen (via Makefile) -set -euo pipefail -cd "$(dirname "$0")/.." - -# ── 1. Resolve buf dependencies ────────────────────────────────────── -echo "[proto-gen] Updating buf dependencies..." -buf dep update - -# ── 2. Export protovalidate protos to temp dir ─────────────────────── -DEPS_DIR=$(mktemp -d) -trap 'rm -rf "$DEPS_DIR"' EXIT - -echo "[proto-gen] Exporting protovalidate protos..." -buf export buf.build/bufbuild/protovalidate -o "$DEPS_DIR" - -# ── 3. Locate google well-known protos bundled with grpcio-tools ──── -GRPC_PROTO=$(python3 -c " -import grpc_tools, os -print(os.path.join(os.path.dirname(grpc_tools.__file__), '_proto')) -") - -# ── 4. Generate vault_service stubs ───────────────────────────────── -echo "[proto-gen] Generating vault_service_pb2.py / vault_service_pb2_grpc.py..." -python3 -m grpc_tools.protoc \ - -Iproto \ - -I"$DEPS_DIR" \ - -I"$GRPC_PROTO" \ - --python_out=proto \ - --grpc_python_out=proto \ - proto/vault_service.proto - -# ── 5. Generate protovalidate runtime stubs ────────────────────────── -echo "[proto-gen] Generating buf/validate/validate_pb2.py..." -python3 -m grpc_tools.protoc \ - -I"$DEPS_DIR" \ - -I"$GRPC_PROTO" \ - --python_out=. \ - "$DEPS_DIR/buf/validate/validate.proto" - -mkdir -p buf/validate -touch buf/__init__.py buf/validate/__init__.py - -echo "[proto-gen] Done." diff --git a/vault/token_store.py b/vault/token_store.py deleted file mode 100644 index e3c4ba2..0000000 --- a/vault/token_store.py +++ /dev/null @@ -1,580 +0,0 @@ -""" -Per-user token and role management with async YAML persistence. - -Memory-first architecture: changes take effect immediately in-memory, -then async-persist to YAML files for startup recovery. -""" - -import datetime -import logging -import os -import secrets -import tempfile -import threading -import time -from collections import defaultdict -from concurrent.futures import ThreadPoolExecutor -from dataclasses import dataclass - -import yaml - -logger = logging.getLogger("vault.token_store") - - -# ============================================================================= -# Data Classes -# ============================================================================= - - -@dataclass -class Role: - """Role definition with scope, top_k, and rate limit.""" - - name: str - scope: list[str] - top_k: int - rate_limit: str # e.g. "30/60s" - - @property - def rate_limit_parsed(self) -> tuple[int, int]: - """Parse rate_limit string into (max_requests, window_seconds).""" - parts = self.rate_limit.replace("s", "").split("/") - return int(parts[0]), int(parts[1]) - - -@dataclass -class Token: - """Per-user token with role assignment and expiry.""" - - user: str - token: str - role: str - issued_at: str # ISO date, e.g. "2026-03-20" - expires: str | None = None # ISO date or None (never expires) - - @property - def is_expired(self) -> bool: - if self.expires is None: - return False - return datetime.date.fromisoformat(self.expires) < datetime.date.today() - - -# ============================================================================= -# Custom Exceptions (inherit ValueError for backward compat with gRPC handlers) -# ============================================================================= - - -class TokenNotFoundError(ValueError): - """Token not found in store.""" - - def __init__(self): - super().__init__("Invalid authentication token") - - -class TokenExpiredError(ValueError): - """Token has expired.""" - - def __init__(self, username: str): - self.username = username - super().__init__(f"Token expired for user '{username}'") - - -class RateLimitError(ValueError): - """Rate limit exceeded.""" - - def __init__(self, retry_after: int): - self.retry_after = retry_after - super().__init__(f"Rate limit exceeded. Retry after {retry_after}s") - - -class TopKExceededError(ValueError): - """Requested top_k exceeds role limit.""" - - def __init__(self, requested: int, max_top_k: int, role_name: str): - self.requested = requested - self.max_top_k = max_top_k - super().__init__(f"top_k {requested} exceeds limit {max_top_k} for role '{role_name}'") - - -class ScopeError(ValueError): - """Method not permitted for role.""" - - def __init__(self, method: str, role_name: str): - self.method = method - super().__init__(f"Method '{method}' not permitted for role '{role_name}'") - - -# ============================================================================= -# Rate Limiter (moved from vault_core.py) -# ============================================================================= - - -class RateLimiter: - """Simple sliding window rate limiter.""" - - def __init__(self, max_requests: int = 30, window_seconds: int = 60): - self.max_requests = max_requests - self.window_seconds = window_seconds - self._requests: dict[str, list[float]] = defaultdict(list) - self._lock = threading.Lock() - - def is_allowed(self, client_id: str) -> bool: - """Check if request is allowed and record it.""" - now = time.time() - with self._lock: - self._requests[client_id] = [ - t for t in self._requests[client_id] if now - t < self.window_seconds - ] - if len(self._requests[client_id]) >= self.max_requests: - return False - self._requests[client_id].append(now) - return True - - def get_retry_after(self, client_id: str) -> int: - """Returns seconds until next request is allowed.""" - with self._lock: - if not self._requests[client_id]: - return 0 - oldest = min(self._requests[client_id]) - return max(0, int(self.window_seconds - (time.time() - oldest))) - - def remove(self, client_id: str): - """Remove a client's rate limit tracking.""" - with self._lock: - self._requests.pop(client_id, None) - - -# ============================================================================= -# Default Roles -# ============================================================================= - -DEFAULT_ROLES = { - "admin": Role( - "admin", - ["get_public_key", "decrypt_scores", "decrypt_metadata", "manage_tokens"], - 50, - "150/60s", - ), - "member": Role( - "member", - ["get_public_key", "decrypt_scores", "decrypt_metadata"], - 10, - "30/60s", - ), -} - -DEFAULT_ROLE_NAMES = frozenset(DEFAULT_ROLES.keys()) - -DEMO_TOKEN = "evt_0000000000000000000000000000demo" - - -# ============================================================================= -# Token Store -# ============================================================================= - - -class TokenStore: - """Thread-safe in-memory store for tokens and roles with async YAML persistence.""" - - def __init__(self): - self._lock = threading.RLock() - self._tokens: dict[str, Token] = {} # keyed by token string - self._tokens_by_user: dict[str, Token] = {} # keyed by username - self._roles: dict[str, Role] = {} - self._rate_limiters: dict[str, RateLimiter] = {} # keyed by username - self._roles_path: str | None = None - self._tokens_path: str | None = None - self._persist_executor = ThreadPoolExecutor(max_workers=1) - - # ── Loaders ────────────────────────────────────────────────────────── - - def load_from_files(self, roles_path: str, tokens_path: str): - """Load roles and tokens from YAML config files at startup.""" - with self._lock: - self._roles_path = roles_path - self._tokens_path = tokens_path - - # Load roles - if os.path.exists(roles_path): - with open(roles_path) as f: - data = yaml.safe_load(f) or {} - for name, cfg in data.get("roles", {}).items(): - self._roles[name] = Role( - name=name, - scope=cfg.get("scope", []), - top_k=cfg.get("top_k", 5), - rate_limit=cfg.get("rate_limit", "30/60s"), - ) - logger.info("Loaded %d roles from %s", len(self._roles), roles_path) - else: - self._roles = dict(DEFAULT_ROLES) - logger.info("No roles file found, using defaults") - - # Ensure default roles always exist - for name, role in DEFAULT_ROLES.items(): - if name not in self._roles: - self._roles[name] = role - - # Load tokens - if os.path.exists(tokens_path): - with open(tokens_path) as f: - data = yaml.safe_load(f) or {} - for entry in data.get("tokens", []): - tok = Token( - user=entry["user"], - token=entry["token"], - role=entry["role"], - issued_at=entry.get("issued_at") or entry.get("created", ""), - expires=entry.get("expires"), - ) - self._tokens[tok.token] = tok - self._tokens_by_user[tok.user] = tok - logger.info("Loaded %d tokens from %s", len(self._tokens), tokens_path) - - # Auto-generate default config files if they don't exist - if not os.path.exists(roles_path) or not os.path.exists(tokens_path): - self._schedule_persist() - - def load_legacy_env(self, env_tokens: str): - """Backward compat: load comma-separated VAULT_TOKENS as legacy tokens.""" - with self._lock: - self._roles = dict(DEFAULT_ROLES) - tokens = [t.strip() for t in env_tokens.split(",") if t.strip()] - for i, token_str in enumerate(tokens): - user = f"legacy_{i}" - tok = Token( - user=user, - token=token_str, - role="admin", - issued_at=datetime.date.today().isoformat(), - expires=None, - ) - self._tokens[tok.token] = tok - self._tokens_by_user[tok.user] = tok - logger.info("Loaded %d legacy tokens from env var", len(tokens)) - - def load_defaults_with_demo_token(self): - """Demo mode: load default roles and demo token.""" - with self._lock: - self._roles = dict(DEFAULT_ROLES) - tok = Token( - user="demo", - token=DEMO_TOKEN, - role="admin", - issued_at=datetime.date.today().isoformat(), - expires=None, - ) - self._tokens[tok.token] = tok - self._tokens_by_user[tok.user] = tok - logger.warning("Demo mode active with demo token") - - # ── Validation ─────────────────────────────────────────────────────── - - def validate(self, token_str: str) -> tuple[str, Role]: - """ - Validate a token string. - - Returns (username, Role) on success. - Raises TokenNotFoundError, TokenExpiredError, or RateLimitError on failure. - """ - with self._lock: - tok = self._tokens.get(token_str) - if tok is None: - raise TokenNotFoundError() - - if tok.is_expired: - raise TokenExpiredError(tok.user) - - role = self._roles.get(tok.role) - if role is None: - raise TokenNotFoundError() - - # Per-user rate limiting with role-specific limits - limiter = self._get_or_create_limiter(tok.user, role) - - # Rate limit check outside the main lock (limiter has its own lock) - if not limiter.is_allowed(tok.user): - retry_after = limiter.get_retry_after(tok.user) - raise RateLimitError(retry_after) - - return tok.user, role - - def get_username(self, token_str: str) -> str | None: - """Look up username for a token without side effects.""" - with self._lock: - tok = self._tokens.get(token_str) - return tok.user if tok else None - - def check_scope(self, role: Role, method_name: str): - """Check if a method is permitted for the given role.""" - if method_name not in role.scope: - raise ScopeError(method_name, role.name) - - def _get_or_create_limiter(self, username: str, role: Role) -> RateLimiter: - """Get or lazily create a rate limiter for a user with role-specific limits.""" - # Must be called under self._lock - limiter = self._rate_limiters.get(username) - if limiter is None: - max_req, window = role.rate_limit_parsed - limiter = RateLimiter(max_requests=max_req, window_seconds=window) - self._rate_limiters[username] = limiter - return limiter - - # ── Token CRUD ─────────────────────────────────────────────────────── - - def add_token(self, user: str, role: str, expires_days: int | None = None) -> Token: - """Issue a new token for a user.""" - with self._lock: - if role not in self._roles: - raise ValueError(f"Role '{role}' does not exist") - if user in self._tokens_by_user: - raise ValueError(f"Token already exists for user '{user}'") - - token_str = f"evt_{secrets.token_hex(16)}" - today = datetime.date.today() - expires = None - if expires_days is not None: - expires = (today + datetime.timedelta(days=expires_days)).isoformat() - - tok = Token( - user=user, - token=token_str, - role=role, - issued_at=today.isoformat(), - expires=expires, - ) - self._tokens[tok.token] = tok - self._tokens_by_user[tok.user] = tok - - self._schedule_persist() - return tok - - def revoke_token(self, user: str) -> bool: - """Revoke a user's token. Returns True if token was found and revoked.""" - with self._lock: - tok = self._tokens_by_user.pop(user, None) - if tok is None: - return False - self._tokens.pop(tok.token, None) - # Clean up rate limiter - limiter = self._rate_limiters.pop(user, None) - if limiter: - limiter.remove(user) - - self._schedule_persist() - return True - - def rotate_token(self, user: str) -> Token: - """Atomically revoke old token and issue a new one for the same user/role.""" - with self._lock: - old_tok = self._tokens_by_user.get(user) - if old_tok is None: - raise ValueError(f"No token found for user '{user}'") - - old_role = old_tok.role - # Preserve original expiry duration - expires_days = None - if old_tok.expires: - issued_date = datetime.date.fromisoformat(old_tok.issued_at) - expires_date = datetime.date.fromisoformat(old_tok.expires) - expires_days = (expires_date - issued_date).days - - # Revoke old (inline, within same lock) - self._tokens.pop(old_tok.token, None) - del self._tokens_by_user[user] - limiter = self._rate_limiters.pop(user, None) - if limiter: - limiter.remove(user) - - # Issue new - token_str = f"evt_{secrets.token_hex(16)}" - today = datetime.date.today() - expires = None - if expires_days is not None: - expires = (today + datetime.timedelta(days=expires_days)).isoformat() - - new_tok = Token( - user=user, - token=token_str, - role=old_role, - issued_at=today.isoformat(), - expires=expires, - ) - self._tokens[new_tok.token] = new_tok - self._tokens_by_user[user] = new_tok - - self._schedule_persist() - logger.info("Rotated token for user '%s'", user) - return new_tok - - def rotate_all_tokens(self) -> list[Token]: - """Rotate all tokens. Each rotation is individually atomic.""" - with self._lock: - users = list(self._tokens_by_user.keys()) - results = [] - for user in users: - results.append(self.rotate_token(user)) - return results - - def list_tokens(self) -> list[dict]: - """List all tokens (token values truncated for security).""" - with self._lock: - result = [] - for tok in self._tokens_by_user.values(): - role = self._roles.get(tok.role) - result.append( - { - "user": tok.user, - "role": tok.role, - "top_k": role.top_k if role else "?", - "rate_limit": role.rate_limit if role else "?", - "expires": tok.expires or "never", - } - ) - return result - - # ── Role CRUD ──────────────────────────────────────────────────────── - - @staticmethod - def _validate_rate_limit(rate_limit: str): - """Validate rate_limit format (e.g. '30/60s').""" - import re - - if not re.fullmatch(r"\d+/\d+s", rate_limit): - raise ValueError( - f"Invalid rate_limit format '{rate_limit}'." - " Expected '/s' (e.g. '30/60s')" - ) - - def add_role(self, name: str, scope: list[str], top_k: int, rate_limit: str) -> Role: - """Create a new role.""" - self._validate_rate_limit(rate_limit) - with self._lock: - if name in self._roles: - raise ValueError(f"Role '{name}' already exists") - role = Role(name=name, scope=scope, top_k=top_k, rate_limit=rate_limit) - self._roles[name] = role - - self._schedule_persist() - return role - - def update_role(self, name: str, **kwargs) -> Role: - """Update an existing role. Accepts scope, top_k, rate_limit kwargs.""" - if "rate_limit" in kwargs: - self._validate_rate_limit(kwargs["rate_limit"]) - with self._lock: - role = self._roles.get(name) - if role is None: - raise ValueError(f"Role '{name}' does not exist") - - if "scope" in kwargs: - role.scope = kwargs["scope"] - if "top_k" in kwargs: - role.top_k = kwargs["top_k"] - if "rate_limit" in kwargs: - role.rate_limit = kwargs["rate_limit"] - # Clear rate limiters for affected users so they pick up new limits - for tok in self._tokens_by_user.values(): - if tok.role == name and tok.user in self._rate_limiters: - del self._rate_limiters[tok.user] - - self._schedule_persist() - return role - - def delete_role(self, name: str): - """Delete a role. Rejects deletion of default roles.""" - with self._lock: - if name in DEFAULT_ROLE_NAMES: - raise ValueError(f"Cannot delete default role '{name}'") - if name not in self._roles: - raise ValueError(f"Role '{name}' does not exist") - - # Check if any tokens reference this role - for tok in self._tokens_by_user.values(): - if tok.role == name: - raise ValueError( - f"Cannot delete role '{name}': " - f"token for user '{tok.user}' is assigned to it" - ) - - del self._roles[name] - - self._schedule_persist() - - def list_roles(self) -> list[dict]: - """List all roles.""" - with self._lock: - return [ - { - "name": r.name, - "scope": r.scope, - "top_k": r.top_k, - "rate_limit": r.rate_limit, - } - for r in self._roles.values() - ] - - # ── Persistence ────────────────────────────────────────────────────── - - def _schedule_persist(self): - """Schedule async persistence to YAML files.""" - if self._roles_path is None or self._tokens_path is None: - return # No file paths configured (legacy/demo mode) - self._persist_executor.submit(self._do_persist) - - def _do_persist(self): - """Atomically write current state to YAML files.""" - try: - time.sleep(0.1) # Debounce rapid changes - - with self._lock: - roles_data = { - "roles": { - r.name: { - "scope": r.scope, - "top_k": r.top_k, - "rate_limit": r.rate_limit, - } - for r in self._roles.values() - } - } - tokens_data = { - "tokens": [ - { - "user": t.user, - "token": t.token, - "role": t.role, - "issued_at": t.issued_at, - **({"expires": t.expires} if t.expires else {}), - } - for t in self._tokens_by_user.values() - ] - } - - # Atomic writes: temp file + os.replace - if self._roles_path: - self._atomic_write(self._roles_path, roles_data) - if self._tokens_path: - self._atomic_write(self._tokens_path, tokens_data) - - logger.debug("Persisted token/role state to YAML") - except Exception: - logger.exception("Failed to persist token/role state") - - @staticmethod - def _atomic_write(path: str, data: dict): - """Write data to a file atomically via temp file + os.replace.""" - dir_name = os.path.dirname(path) or "." - os.makedirs(dir_name, exist_ok=True) - fd, tmp_path = tempfile.mkstemp(dir=dir_name, suffix=".tmp") - try: - with os.fdopen(fd, "w") as f: - yaml.safe_dump(data, f, default_flow_style=False, sort_keys=False) - os.replace(tmp_path, path) - except Exception: - os.unlink(tmp_path) - raise - - -# Module-level singleton -token_store = TokenStore() diff --git a/vault/validation_interceptor.py b/vault/validation_interceptor.py deleted file mode 100644 index 4ed6ddf..0000000 --- a/vault/validation_interceptor.py +++ /dev/null @@ -1,65 +0,0 @@ -""" -gRPC server interceptor that validates request fields before processing. - -Runs two validation layers: - 1. protovalidate — enforces .proto annotation constraints - 2. Runtime checks — control chars, whitespace (not expressible in proto) - -Rejects malformed requests with INVALID_ARGUMENT before they reach -VaultServiceServicer methods. Non-vault methods (health, reflection) -pass through untouched. -""" - -import logging - -import grpc -import protovalidate -from request_validator import ( - RUNTIME_CHECKS, - RuntimeValidationError, - validate_proto, -) - -logger = logging.getLogger("rune.vault.validation") - - -class ValidationInterceptor(grpc.ServerInterceptor): - """Intercepts unary-unary gRPC calls to validate request fields.""" - - def intercept_service(self, continuation, handler_call_details): - method = handler_call_details.method - next_handler = continuation(handler_call_details) - - if next_handler is None: - return None - - runtime_check = RUNTIME_CHECKS.get(method) - if runtime_check is None: - return next_handler - - original_handler = next_handler.unary_unary - if original_handler is None: - return next_handler - - def validating_handler(request, context): - try: - # Layer 1: proto annotation constraints - validate_proto(request) - # Layer 2: supplementary runtime checks - runtime_check(request) - except protovalidate.ValidationError as exc: - msg = "; ".join(f"{v.proto.field}: {v.proto.message}" for v in exc.violations) - logger.warning("Validation rejected %s: %s", method, msg) - context.abort(grpc.StatusCode.INVALID_ARGUMENT, msg) - return None - except RuntimeValidationError as exc: - logger.warning("Validation rejected %s: %s", method, exc) - context.abort(grpc.StatusCode.INVALID_ARGUMENT, str(exc)) - return None - return original_handler(request, context) - - return grpc.unary_unary_rpc_method_handler( - validating_handler, - request_deserializer=next_handler.request_deserializer, - response_serializer=next_handler.response_serializer, - ) diff --git a/vault/vault_admin_cli.py b/vault/vault_admin_cli.py deleted file mode 100644 index 5f206af..0000000 --- a/vault/vault_admin_cli.py +++ /dev/null @@ -1,250 +0,0 @@ -#!/usr/bin/env python3 -""" -Vault Admin CLI — manages per-user tokens and roles. - -Usage (via docker exec or runevault alias): - runevault token issue --user alice --role member --expires 90d - runevault token revoke --user alice - runevault token list - runevault role list - runevault role create --name researcher \\ - --scope get_public_key,decrypt_scores --top-k 3 --rate-limit 10/60s - runevault role update --name member --top-k 8 - runevault role delete --name researcher -""" - -import argparse -import http.client -import json -import re -import sys - -ADMIN_HOST = "127.0.0.1" -ADMIN_PORT = 8081 - - -def _request(method: str, path: str, body: dict | None = None) -> dict: - """Send an HTTP request to the admin server and return parsed JSON.""" - conn = http.client.HTTPConnection(ADMIN_HOST, ADMIN_PORT) - headers = {"Content-Type": "application/json"} if body else {} - data = json.dumps(body).encode() if body else None - try: - conn.request(method, path, body=data, headers=headers) - resp = conn.getresponse() - result = json.loads(resp.read().decode()) - if resp.status >= 400: - print(f"Error: {result.get('error', 'Unknown error')}", file=sys.stderr) - sys.exit(1) - return result - except ConnectionRefusedError: - print("Error: Cannot connect to admin server. Is Vault running?", file=sys.stderr) - sys.exit(1) - finally: - conn.close() - - -# ── Token commands ─────────────────────────────────────────────────────── - - -def _parse_duration(value: str) -> int: - """Parse duration string like '90d', '12w', '6m' into days.""" - m = re.fullmatch(r"(\d+)([dwm])", value) - if not m: - print( - f"Error: Invalid duration '{value}'. Use (e.g. 90d, 12w, 6m)", - file=sys.stderr, - ) - sys.exit(1) - n, unit = int(m.group(1)), m.group(2) - if unit == "d": - return n - if unit == "w": - return n * 7 - return n * 30 # 'm' approximation - - -def cmd_token_issue(args): - body = {"user": args.user, "role": args.role} - if args.expires is not None: - body["expires_days"] = _parse_duration(args.expires) - result = _request("POST", "/tokens", body) - print(f"\nToken issued for '{result['user']}':") - print(f" Role: {result['role']}") - print(f" Expires: {result['expires']}") - print(f"\n Token: {result['token']}") - print("\n WARNING: This token will NOT be shown again. Share it securely.") - - -def cmd_token_revoke(args): - result = _request("DELETE", f"/tokens/{args.user}") - print(result["message"]) - - -def cmd_token_rotate(args): - if args.rotate_all: - result = _request("POST", "/tokens/_rotate_all", {}) - count = result["rotated"] - if count == 0: - print("No tokens to rotate.") - return - print(f"Rotated {count} token(s):\n") - for t in result["tokens"]: - print(f" {t['user']}: {t['token']}") - print("\n WARNING: These tokens will NOT be shown again. Share them securely.") - else: - result = _request("POST", f"/tokens/{args.user}/rotate", {}) - print(f"\nToken rotated for '{result['user']}':") - print(f" Role: {result['role']}") - print(f" Expires: {result['expires']}") - print(f"\n Token: {result['token']}") - print("\n WARNING: This token will NOT be shown again. Share it securely.") - - -def cmd_token_list(args): - result = _request("GET", "/tokens") - tokens = result.get("tokens", []) - if not tokens: - print("No tokens issued.") - return - # Table header - fmt = "{:<16} {:<10} {:>6} {:>10} {:<12}" - print(fmt.format("USER", "ROLE", "TOP_K", "RATE", "EXPIRES")) - for t in tokens: - print( - fmt.format( - t["user"], - t["role"], - str(t["top_k"]), - str(t["rate_limit"]), - t["expires"], - ) - ) - - -# ── Role commands ──────────────────────────────────────────────────────── - - -def cmd_role_list(args): - result = _request("GET", "/roles") - roles = result.get("roles", []) - if not roles: - print("No roles defined.") - return - fmt = "{:<12} {:<50} {:>6} {:>10}" - print(fmt.format("ROLE", "SCOPE", "TOP_K", "RATE")) - for r in roles: - scope_str = ",".join(r["scope"]) - print(fmt.format(r["name"], scope_str, str(r["top_k"]), r["rate_limit"])) - - -def cmd_role_create(args): - scope = [s.strip() for s in args.scope.split(",")] - body = { - "name": args.name, - "scope": scope, - "top_k": args.top_k, - "rate_limit": args.rate_limit, - } - _request("POST", "/roles", body) - print(f"Role '{args.name}' created.") - - -def cmd_role_update(args): - body = {} - if args.scope is not None: - body["scope"] = [s.strip() for s in args.scope.split(",")] - if args.top_k is not None: - body["top_k"] = args.top_k - if args.rate_limit is not None: - body["rate_limit"] = args.rate_limit - if not body: - print("Error: No fields to update.", file=sys.stderr) - sys.exit(1) - _request("PUT", f"/roles/{args.name}", body) - print( - f"Role '{args.name}' updated." - " Changes take effect immediately for all tokens with this role." - ) - - -def cmd_role_delete(args): - _request("DELETE", f"/roles/{args.name}") - print(f"Role '{args.name}' deleted.") - - -# ── Argument parsing ───────────────────────────────────────────────────── - - -def build_parser() -> argparse.ArgumentParser: - parser = argparse.ArgumentParser( - prog="runevault", - description="Rune-Vault Admin CLI", - ) - sub = parser.add_subparsers(dest="resource", required=True) - - # ── token ── - token_parser = sub.add_parser("token", help="Manage per-user tokens") - token_sub = token_parser.add_subparsers(dest="action", required=True) - - issue_p = token_sub.add_parser("issue", help="Issue a new token") - issue_p.add_argument("--user", required=True, help="Username") - issue_p.add_argument("--role", required=True, help="Role name") - issue_p.add_argument( - "--expires", default=None, help="Duration until expiry (e.g. 90d, 12w, 6m)" - ) - issue_p.set_defaults(func=cmd_token_issue) - - revoke_p = token_sub.add_parser("revoke", help="Revoke a user's token") - revoke_p.add_argument("--user", required=True, help="Username") - revoke_p.set_defaults(func=cmd_token_revoke) - - rotate_p = token_sub.add_parser("rotate", help="Rotate a token (revoke + reissue)") - rotate_group = rotate_p.add_mutually_exclusive_group(required=True) - rotate_group.add_argument("--user", help="Username to rotate") - rotate_group.add_argument( - "--all", action="store_true", dest="rotate_all", help="Rotate all tokens" - ) - rotate_p.set_defaults(func=cmd_token_rotate) - - list_p = token_sub.add_parser("list", help="List all tokens") - list_p.set_defaults(func=cmd_token_list) - - # ── role ── - role_parser = sub.add_parser("role", help="Manage roles") - role_sub = role_parser.add_subparsers(dest="action", required=True) - - rlist_p = role_sub.add_parser("list", help="List all roles") - rlist_p.set_defaults(func=cmd_role_list) - - create_p = role_sub.add_parser("create", help="Create a new role") - create_p.add_argument("--name", required=True, help="Role name") - create_p.add_argument("--scope", required=True, help="Comma-separated scope list") - create_p.add_argument("--top-k", type=int, required=True, help="Max top_k") - create_p.add_argument("--rate-limit", required=True, help="Rate limit (e.g. 30/60s)") - create_p.set_defaults(func=cmd_role_create) - - update_p = role_sub.add_parser("update", help="Update a role") - update_p.add_argument("--name", required=True, help="Role name") - update_p.add_argument("--scope", default=None, help="Comma-separated scope list") - update_p.add_argument("--top-k", type=int, default=None, help="Max top_k") - update_p.add_argument("--rate-limit", default=None, help="Rate limit (e.g. 30/60s)") - update_p.set_defaults(func=cmd_role_update) - - delete_p = role_sub.add_parser("delete", help="Delete a role") - delete_p.add_argument("--name", required=True, help="Role name") - delete_p.set_defaults(func=cmd_role_delete) - - return parser - - -def main(): - parser = build_parser() - args = parser.parse_args() - if hasattr(args, "func"): - args.func(args) - else: - parser.print_help() - - -if __name__ == "__main__": - main() diff --git a/vault/vault_core.py b/vault/vault_core.py deleted file mode 100644 index 3556abc..0000000 --- a/vault/vault_core.py +++ /dev/null @@ -1,326 +0,0 @@ -""" -Rune-Vault Core Business Logic - -Pure business logic for FHE key management, authentication, and decryption. -No transport layer (MCP, gRPC) — consumed by vault_grpc_server.py. -""" - -import base64 -import hashlib -import heapq -import json -import logging -import os - -from cryptography.hazmat.primitives import hashes -from cryptography.hazmat.primitives.kdf.hkdf import HKDF -from pyenvector.crypto import Cipher, KeyGenerator -from pyenvector.crypto.block import CipherBlock -from pyenvector.utils.aes import decrypt_metadata as aes_decrypt_metadata - -try: - from pyenvector.proto_gen.v2.common.type_pb2 import CiphertextScore -except ModuleNotFoundError: - from pyenvector.proto_gen.type_pb2 import CiphertextScore - -logger = logging.getLogger("rune.vault") - -# Configuration -KEY_DIR = "vault_keys" -KEY_ID = "vault-key" -DIM = 1024 # FHE cipher supports up to 2^12, using 1024 for production - -# ev.init() resolves key files as {KEY_DIR}/{KEY_ID}/EncKey.json -KEY_SUBDIR = os.path.join(KEY_DIR, KEY_ID) - -# enVector Cloud configuration -ENVECTOR_ENDPOINT = os.getenv("ENVECTOR_ENDPOINT", "").strip() or None -ENVECTOR_API_KEY = os.getenv("ENVECTOR_API_KEY", "").strip() or None -EMBEDDING_DIM = int(os.getenv("EMBEDDING_DIM", "1024")) - -# Team index name (set by admin, distributed to all team members via get_public_key) -VAULT_INDEX_NAME = os.getenv("VAULT_INDEX_NAME", "").strip() or None - - -def ensure_vault(): - """ - One-shot startup: - 1. Generate local FHE keys if not present (KeyGenerator) - 2. Connect to enVector Cloud with auto_key_setup=True - (SDK handles key registration → loading) - 3. Create the team index if it doesn't exist - """ - import pyenvector as ev - - # Phase 1: local key generation - enc_key = os.path.join(KEY_SUBDIR, "EncKey.json") - if not os.path.exists(enc_key): - logger.info(f"Generating keys in {KEY_SUBDIR}...") - os.makedirs(KEY_SUBDIR, exist_ok=True) - keygen = KeyGenerator( - key_path=KEY_SUBDIR, key_id=KEY_ID, dim_list=[DIM], metadata_encryption=False - ) - keygen.generate_keys() - else: - logger.info(f"Keys found in {KEY_SUBDIR}") - - # Phase 2: connect to enVector Cloud and register key - if not ENVECTOR_ENDPOINT or not ENVECTOR_API_KEY: - logger.warning("ENVECTOR_ENDPOINT/ENVECTOR_API_KEY not set — offline mode, no team index.") - return - - logger.info(f"Connecting to enVector Cloud ({ENVECTOR_ENDPOINT})...") - try: - ev.init( - address=ENVECTOR_ENDPOINT, - key_path=KEY_DIR, - key_id=KEY_ID, - dim=EMBEDDING_DIM, - eval_mode="rmp", - auto_key_setup=True, - access_token=ENVECTOR_API_KEY, - query_encryption="plain", - ) - logger.info("Key registered on enVector Cloud (auto_key_setup).") - except Exception as e: - logger.warning(f"auto_key_setup failed (key may already be registered): {e}") - logger.info("Retrying with auto_key_setup=False...") - ev.init( - address=ENVECTOR_ENDPOINT, - key_path=KEY_DIR, - key_id=KEY_ID, - dim=EMBEDDING_DIM, - eval_mode="rmp", - auto_key_setup=False, - access_token=ENVECTOR_API_KEY, - query_encryption="plain", - ) - logger.info("Connected to enVector Cloud (auto_key_setup=False).") - - # Phase 3: ensure team index - if not VAULT_INDEX_NAME: - return - - try: - existing = ev.get_index_list() - existing_names = [] - if hasattr(existing, "indexes"): - existing_names = [idx.index_name for idx in existing.indexes] - elif isinstance(existing, (list, tuple)): - existing_names = [str(idx) for idx in existing] - - if VAULT_INDEX_NAME in existing_names: - logger.info(f"Team index '{VAULT_INDEX_NAME}' already exists.") - else: - ev.create_index( - index_name=VAULT_INDEX_NAME, - dim=EMBEDDING_DIM, - index_params={"index_type": "FLAT"}, - query_encryption="plain", - metadata_encryption=False, - # workaround: skip deepcopy metadata_key property access (pyenvector#247) - metadata_key=b"", - ) - logger.info(f"Created team index '{VAULT_INDEX_NAME}' (dim={EMBEDDING_DIM}).") - except Exception as e: - logger.error(f"Failed to ensure team index: {e}", exc_info=True) - - -ensure_vault() -enc_key_path = os.path.join(KEY_SUBDIR, "EncKey.json") -sec_key_path = os.path.join(KEY_SUBDIR, "SecKey.json") - -# Initialize shared Cipher instance -cipher = Cipher(enc_key_path=enc_key_path, dim=DIM) - - -# ============================================================================= -# Per-Agent Metadata Key Derivation (HKDF-SHA256) -# ============================================================================= -def derive_agent_key(team_secret: str, agent_id: str) -> bytes: - """Derive a 32-byte AES-256 DEK for a specific agent via HKDF-SHA256. - - Args: - team_secret: Team-wide master secret (VAULT_TEAM_SECRET). - agent_id: Per-user agent identifier derived from token. - - Returns: - 32-byte AES-256 key. - """ - hkdf = HKDF( - algorithm=hashes.SHA256(), - length=32, - salt=None, - info=agent_id.encode("utf-8"), - ) - return hkdf.derive(team_secret.encode("utf-8")) - - -# ============================================================================= -# Authorization (per-user token auth via TokenStore) -# ============================================================================= -from token_store import ( # noqa: E402 — must import after ensure_vault() - TopKExceededError, - token_store, -) - -# Team secret for DEK derivation (shared across all users) -VAULT_TEAM_SECRET = ( - os.getenv("VAULT_TEAM_SECRET", "").strip() or os.getenv("VAULT_TOKENS", "").strip() -) - -# Load token/role configuration (priority: files > env var > demo) -_roles_path = os.getenv("VAULT_ROLES_FILE", "/app/config/vault-roles.yml") -_tokens_path = os.getenv("VAULT_TOKENS_FILE", "/app/config/vault-tokens.yml") - -if os.path.exists(_roles_path) or os.path.exists(_tokens_path): - token_store.load_from_files(_roles_path, _tokens_path) - logger.info("Per-user token auth loaded from config files") -elif VAULT_TEAM_SECRET: - token_store.load_legacy_env(VAULT_TEAM_SECRET) - logger.warning("Legacy single-token mode. Migrate to per-user tokens via runevault CLI.") -else: - token_store.load_defaults_with_demo_token() - logger.warning("Demo mode. Set VAULT_TEAM_SECRET for production.") - - -def validate_token(token: str) -> tuple[str, object]: - """Validate per-user token. Returns (username, Role).""" - return token_store.validate(token) - - -# ============================================================================= -# Core Business Logic -# ============================================================================= -def _get_public_key_impl(token: str) -> str: - """ - Core implementation: Returns the public key bundle. - - Args: - token: Authentication token issued by Vault Admin. - - Returns: - JSON string containing EncKey, EvalKey. - """ - username, role = validate_token(token) - token_store.check_scope(role, "get_public_key") - - bundle = {} - for filename in ["EncKey.json", "EvalKey.json"]: - path = os.path.join(KEY_SUBDIR, filename) - if os.path.exists(path): - with open(path, "r") as f: - bundle[filename] = f.read() - else: - # Should not happen if ensure_vault ran - pass - - # Include team index name and key_id so clients discover them dynamically - if VAULT_INDEX_NAME: - bundle["index_name"] = VAULT_INDEX_NAME - bundle["key_id"] = KEY_ID - - # Per-user metadata DEK: derived from VAULT_TEAM_SECRET + token-based agent_id - agent_id = hashlib.sha256(token.encode("utf-8")).hexdigest()[:32] - agent_dek = derive_agent_key(VAULT_TEAM_SECRET, agent_id) - bundle["agent_id"] = agent_id - bundle["agent_dek"] = base64.b64encode(agent_dek).decode("ascii") - - # enVector Cloud credentials — agents receive these from Vault instead of user input - bundle["envector_endpoint"] = ENVECTOR_ENDPOINT - bundle["envector_api_key"] = ENVECTOR_API_KEY - - return json.dumps(bundle) - - -def _decrypt_scores_impl(token: str, encrypted_blob_b64: str, top_k: int = 5) -> str: - """ - Core implementation: Decrypts CiphertextScore and applies Top-K filtering. - - The blob is a protobuf-serialized CiphertextScore produced by Index.scoring(). - cipher.decrypt_score() returns {"score": [[s0, s1, ...], ...], "shard_idx": [...]}, - where each inner list corresponds to a shard (IVF) or a single chunk (FLAT). - - Args: - token: Authentication token issued by Vault Admin. - encrypted_blob_b64: Base64 string of the serialized CiphertextScore protobuf. - top_k: Number of top results to return (max 10 allowed). - - Returns: - JSON string containing the list of {shard_idx, row_idx, score}. - """ - username, role = validate_token(token) - token_store.check_scope(role, "decrypt_scores") - - # Per-role top_k enforcement - if top_k > role.top_k: - raise TopKExceededError(top_k, role.top_k, role.name) - - try: - # 1. Deserialize CiphertextScore protobuf - blob_bytes = base64.b64decode(encrypted_blob_b64) - - try: - score_proto = CiphertextScore() - score_proto.ParseFromString(blob_bytes) - encrypted_result = CipherBlock(data=score_proto) - except Exception as e: - return json.dumps({"error": f"Deserialization failed: {str(e)}"}) - - # 2. Decrypt with cipher.decrypt_score (NOT cipher.decrypt) - decrypted = cipher.decrypt_score(encrypted_result, sec_key_path=sec_key_path) - # decrypted: {"score": [[chunk0_scores], [chunk1_scores], ...], "shard_idx": [s0, s1, ...]} - score_2d = decrypted["score"] - shard_indices = decrypted.get("shard_idx", list(range(len(score_2d)))) - - # 3. Top-K across all shards (handles both FLAT and IVF_FLAT) - # Flatten 2D scores into (shard_idx, row_idx, score) tuples - all_scores = ( - (shard_indices[i], j, float(v)) - for i, row in enumerate(score_2d) - for j, v in enumerate(row) - ) - topk_results = heapq.nlargest(top_k, all_scores, key=lambda x: x[2]) - - params = [{"shard_idx": s, "row_idx": r, "score": sc} for s, r, sc in topk_results] - - return json.dumps(params) - - except Exception as e: - return json.dumps({"error": str(e)}) - - -def _decrypt_metadata_impl(token: str, encrypted_metadata_list: list[str]) -> str: - """ - Core implementation: Decrypts a list of per-agent AES-encrypted metadata. - - Each item is a JSON string: {"a": "", "c": ""}. - Vault derives the agent's DEK from VAULT_TEAM_SECRET + agent_id via HKDF. - - Args: - token: Authentication token issued by Vault Admin. - encrypted_metadata_list: List of JSON-encoded per-agent encrypted blobs. - - Returns: - JSON string containing the list of decrypted metadata objects. - """ - username, role = validate_token(token) - token_store.check_scope(role, "decrypt_metadata") - - if not VAULT_TEAM_SECRET: - return json.dumps({"error": "VAULT_TEAM_SECRET not configured"}) - - try: - results = [] - for blob_str in encrypted_metadata_list: - blob = json.loads(blob_str) - agent_id = blob["a"] - ct_b64 = blob["c"] - agent_dek = derive_agent_key(VAULT_TEAM_SECRET, agent_id) - decrypted = aes_decrypt_metadata(ct_b64, agent_dek) - if isinstance(decrypted, bytes): - decrypted = decrypted.decode("utf-8") - results.append(decrypted) - return json.dumps(results) - except Exception as e: - return json.dumps({"error": f"Metadata decryption failed: {str(e)}"}) diff --git a/vault/vault_grpc_server.py b/vault/vault_grpc_server.py deleted file mode 100644 index eb7df7b..0000000 --- a/vault/vault_grpc_server.py +++ /dev/null @@ -1,375 +0,0 @@ -""" -gRPC server for Rune-Vault. - -Sole entry point for the Vault service. -Delegates to _*_impl() pure functions in vault_core.py. -""" - -import json -import logging -import os -import signal -import time -from concurrent import futures -from datetime import datetime, timezone - -import grpc -from admin_server import start_admin_server -from grpc_health.v1 import health_pb2, health_pb2_grpc -from grpc_health.v1.health import HealthServicer -from grpc_reflection.v1alpha import reflection -from proto import vault_service_pb2 as pb2 -from proto import vault_service_pb2_grpc as pb2_grpc -from token_store import ( - RateLimitError, - ScopeError, - TokenExpiredError, - TokenNotFoundError, - TopKExceededError, -) -from validation_interceptor import ValidationInterceptor -from vault_core import ( - _decrypt_metadata_impl, - _decrypt_scores_impl, - _get_public_key_impl, - token_store, -) - -try: - from audit import audit_logger, extract_source_ip - - AUDIT_AVAILABLE = True -except ImportError: - AUDIT_AVAILABLE = False - -logger = logging.getLogger("rune.vault.grpc") - -MAX_MESSAGE_LENGTH = 256 * 1024 * 1024 # 256 MB (EvalKey can be tens of MB) - - -def _emit_audit(method, user, top_k, result_count, status, error_detail, duration, context): - """Emit audit log entry.""" - if not (AUDIT_AVAILABLE and audit_logger.enabled): - return - audit_logger.log( - timestamp=datetime.now(timezone.utc).isoformat(), - user_id=user, - method=method, - top_k=top_k, - result_count=result_count, - status=status, - source_ip=extract_source_ip(context), - latency_ms=duration * 1000, - error=error_detail, - ) - - -class VaultServiceServicer(pb2_grpc.VaultServiceServicer): - """gRPC implementation that delegates to vault_core._*_impl() functions.""" - - def GetPublicKey(self, request, context): - start_time = time.time() - status = "success" - user = "unknown" - result_count = 0 - error_detail = None - try: - user = token_store.get_username(request.token) or "unknown" - result_json = _get_public_key_impl(request.token) - parsed = json.loads(result_json) - if isinstance(parsed, dict) and "error" in parsed: - status = "error" - error_detail = parsed["error"] - return pb2.GetPublicKeyResponse(error=parsed["error"]) - result_count = 1 - return pb2.GetPublicKeyResponse(key_bundle_json=result_json) - except (TokenNotFoundError, TokenExpiredError) as e: - status = "denied" - error_detail = str(e) - context.set_code(grpc.StatusCode.UNAUTHENTICATED) - context.set_details(str(e)) - return pb2.GetPublicKeyResponse(error=str(e)) - except RateLimitError as e: - status = "denied" - error_detail = str(e) - context.set_code(grpc.StatusCode.RESOURCE_EXHAUSTED) - context.set_details(str(e)) - return pb2.GetPublicKeyResponse(error=str(e)) - except ScopeError as e: - status = "denied" - error_detail = str(e) - context.set_code(grpc.StatusCode.PERMISSION_DENIED) - context.set_details(str(e)) - return pb2.GetPublicKeyResponse(error=str(e)) - except ValueError as e: - status = "denied" - error_detail = str(e) - context.set_code(grpc.StatusCode.UNAUTHENTICATED) - context.set_details(str(e)) - return pb2.GetPublicKeyResponse(error=str(e)) - except Exception as e: - status = "error" - error_detail = str(e) - context.set_code(grpc.StatusCode.INTERNAL) - context.set_details(str(e)) - return pb2.GetPublicKeyResponse(error=str(e)) - finally: - duration = time.time() - start_time - _emit_audit( - "get_public_key", user, None, result_count, status, error_detail, duration, context - ) - - def DecryptScores(self, request, context): - start_time = time.time() - status = "success" - user = "unknown" - result_count = 0 - error_detail = None - try: - user = token_store.get_username(request.token) or "unknown" - result_json = _decrypt_scores_impl( - request.token, - request.encrypted_blob_b64, - request.top_k, - ) - parsed = json.loads(result_json) - if isinstance(parsed, dict) and "error" in parsed: - status = "error" - error_detail = parsed["error"] - return pb2.DecryptScoresResponse(error=parsed["error"]) - - entries = [ - pb2.ScoreEntry( - shard_idx=item["shard_idx"], - row_idx=item["row_idx"], - score=item["score"], - ) - for item in parsed - ] - result_count = len(entries) - return pb2.DecryptScoresResponse(results=entries) - except TopKExceededError as e: - status = "denied" - error_detail = str(e) - context.set_code(grpc.StatusCode.INVALID_ARGUMENT) - context.set_details(str(e)) - return pb2.DecryptScoresResponse(error=str(e)) - except (TokenNotFoundError, TokenExpiredError) as e: - status = "denied" - error_detail = str(e) - context.set_code(grpc.StatusCode.UNAUTHENTICATED) - context.set_details(str(e)) - return pb2.DecryptScoresResponse(error=str(e)) - except RateLimitError as e: - status = "denied" - error_detail = str(e) - context.set_code(grpc.StatusCode.RESOURCE_EXHAUSTED) - context.set_details(str(e)) - return pb2.DecryptScoresResponse(error=str(e)) - except ScopeError as e: - status = "denied" - error_detail = str(e) - context.set_code(grpc.StatusCode.PERMISSION_DENIED) - context.set_details(str(e)) - return pb2.DecryptScoresResponse(error=str(e)) - except ValueError as e: - status = "denied" - error_detail = str(e) - context.set_code(grpc.StatusCode.UNAUTHENTICATED) - context.set_details(str(e)) - return pb2.DecryptScoresResponse(error=str(e)) - except Exception as e: - status = "error" - error_detail = str(e) - context.set_code(grpc.StatusCode.INTERNAL) - context.set_details(str(e)) - return pb2.DecryptScoresResponse(error=str(e)) - finally: - duration = time.time() - start_time - _emit_audit( - "decrypt_scores", - user, - request.top_k, - result_count, - status, - error_detail, - duration, - context, - ) - - def DecryptMetadata(self, request, context): - start_time = time.time() - status = "success" - user = "unknown" - result_count = 0 - error_detail = None - try: - user = token_store.get_username(request.token) or "unknown" - result_json = _decrypt_metadata_impl( - request.token, - list(request.encrypted_metadata_list), - ) - parsed = json.loads(result_json) - if isinstance(parsed, dict) and "error" in parsed: - status = "error" - error_detail = parsed["error"] - return pb2.DecryptMetadataResponse(error=parsed["error"]) - - # Each element is a decrypted metadata object. - # Serialize non-string items back to JSON string for the proto field. - decrypted_strings = [ - json.dumps(item) if not isinstance(item, str) else item for item in parsed - ] - result_count = len(decrypted_strings) - return pb2.DecryptMetadataResponse(decrypted_metadata=decrypted_strings) - except (TokenNotFoundError, TokenExpiredError) as e: - status = "denied" - error_detail = str(e) - context.set_code(grpc.StatusCode.UNAUTHENTICATED) - context.set_details(str(e)) - return pb2.DecryptMetadataResponse(error=str(e)) - except RateLimitError as e: - status = "denied" - error_detail = str(e) - context.set_code(grpc.StatusCode.RESOURCE_EXHAUSTED) - context.set_details(str(e)) - return pb2.DecryptMetadataResponse(error=str(e)) - except ScopeError as e: - status = "denied" - error_detail = str(e) - context.set_code(grpc.StatusCode.PERMISSION_DENIED) - context.set_details(str(e)) - return pb2.DecryptMetadataResponse(error=str(e)) - except ValueError as e: - status = "denied" - error_detail = str(e) - context.set_code(grpc.StatusCode.UNAUTHENTICATED) - context.set_details(str(e)) - return pb2.DecryptMetadataResponse(error=str(e)) - except Exception as e: - status = "error" - error_detail = str(e) - context.set_code(grpc.StatusCode.INTERNAL) - context.set_details(str(e)) - return pb2.DecryptMetadataResponse(error=str(e)) - finally: - duration = time.time() - start_time - _emit_audit( - "decrypt_metadata", - user, - None, - result_count, - status, - error_detail, - duration, - context, - ) - - -def _load_tls_credentials(): - """ - Load TLS credentials from environment variables. - - Returns grpc.ServerCredentials or None (if TLS disabled). - Raises SystemExit if TLS is required but cert/key not provided. - """ - if os.environ.get("VAULT_TLS_DISABLE", "").lower() == "true": - logger.warning("TLS DISABLED — gRPC traffic is unencrypted. Do not use in production.") - return None - - cert_path = os.environ.get("VAULT_TLS_CERT") - key_path = os.environ.get("VAULT_TLS_KEY") - - if not cert_path or not key_path: - logger.error( - "TLS certificate not configured. " - "Set VAULT_TLS_CERT and VAULT_TLS_KEY, " - "or set VAULT_TLS_DISABLE=true for insecure mode." - ) - raise SystemExit(1) - - with open(cert_path, "rb") as f: - cert_pem = f.read() - with open(key_path, "rb") as f: - key_pem = f.read() - - logger.info("TLS configured — cert=%s", cert_path) - return grpc.ssl_server_credentials([(key_pem, cert_pem)]) - - -def serve_grpc(host: str = "0.0.0.0", port: int = 50051) -> grpc.Server: - """ - Start the gRPC server. Non-blocking — returns the server object. - Call server.stop(grace=N) for graceful shutdown. - """ - server = grpc.server( - futures.ThreadPoolExecutor(max_workers=4), - options=[ - ("grpc.max_send_message_length", MAX_MESSAGE_LENGTH), - ("grpc.max_receive_message_length", MAX_MESSAGE_LENGTH), - ], - interceptors=[ValidationInterceptor()], - ) - - # Register VaultService - pb2_grpc.add_VaultServiceServicer_to_server(VaultServiceServicer(), server) - - # Enable gRPC server reflection (for grpcurl, etc.) - SERVICE_NAMES = ( - pb2.DESCRIPTOR.services_by_name["VaultService"].full_name, - reflection.SERVICE_NAME, - ) - reflection.enable_server_reflection(SERVICE_NAMES, server) - - # Register gRPC health checking (standard grpc.health.v1 protocol) - health_servicer = HealthServicer() - health_pb2_grpc.add_HealthServicer_to_server(health_servicer, server) - health_servicer.set( - "rune.vault.v1.VaultService", - health_pb2.HealthCheckResponse.SERVING, - ) - health_servicer.set("", health_pb2.HealthCheckResponse.SERVING) - - addr = f"{host}:{port}" - credentials = _load_tls_credentials() - if credentials: - server.add_secure_port(addr, credentials) - logger.info("gRPC server started on %s (TLS)", addr) - else: - server.add_insecure_port(addr) - logger.info("gRPC server started on %s (insecure)", addr) - - server.start() - return server, health_servicer - - -if __name__ == "__main__": - import argparse - - logging.basicConfig( - level=logging.INFO, - format="%(asctime)s %(name)s %(levelname)s %(message)s", - ) - - parser = argparse.ArgumentParser(description="Run the Rune-Vault gRPC server.") - parser.add_argument("--host", default="0.0.0.0", help="Host to bind") - parser.add_argument("--grpc-port", type=int, default=50051, help="gRPC port") - args = parser.parse_args() - - # Start gRPC server (non-blocking) - grpc_server, health_servicer = serve_grpc(host=args.host, port=args.grpc_port) - - # Start admin HTTP server (internal HTTP, not exposed via Docker) - admin_srv = start_admin_server(token_store, health_servicer=health_servicer) - - # Graceful shutdown on SIGTERM / SIGINT - def _shutdown(signum, frame): - logger.info("Received shutdown signal, stopping...") - admin_srv.shutdown() - grpc_server.stop(grace=5) - - signal.signal(signal.SIGTERM, _shutdown) - signal.signal(signal.SIGINT, _shutdown) - - logger.info("Rune-Vault is ready.") - grpc_server.wait_for_termination() diff --git a/vault/verify_crypto_flow.py b/vault/verify_crypto_flow.py deleted file mode 100644 index f51693c..0000000 --- a/vault/verify_crypto_flow.py +++ /dev/null @@ -1,93 +0,0 @@ -import os -import shutil - -import numpy as np -from pyenvector.crypto import Cipher, KeyGenerator - -KEY_DIR = "vault_keys_test" -KEY_ID = "test-key" - - -def verify_flow(): - # 0. Clean up - if os.path.exists(KEY_DIR): - shutil.rmtree(KEY_DIR) - - print("1. Generating Keys...") - # Using dimension 1024 for production - dim = 1024 - keygen = KeyGenerator(key_path=KEY_DIR, key_id=KEY_ID, dim_list=[dim]) - keygen.generate_keys() - - # Keys found directly in KEY_DIR in this version/usage? - # Or maybe KeyGenerator uses key_path as the output dir directly if provided? - # Adjusting to observed behavior. - enc_path = os.path.join(KEY_DIR, "EncKey.json") - sec_path = os.path.join(KEY_DIR, "SecKey.json") - - if not os.path.exists(enc_path): - raise FileNotFoundError(f"Key not found: {enc_path}") - - # 2. Init Cipher - print(f"Initializing Cipher with dim={dim} and key={enc_path}") - cipher = Cipher(enc_key_path=enc_path, dim=dim) - - # 3. Simulate Server: "Encrypting Scores" - # Create scores for dim=1024. Most can be 0. - mock_scores = np.zeros(dim, dtype=np.float32) - mock_scores[:4] = [0.9, 0.1, 0.8, 0.2] - print(f"Original Mock Scores (first 4): {mock_scores[:4]}") - - # Encrypt explicitly. Cipher.encrypt expects a vector (numpy array) - # It returns a list of bytes usually (if multiple input) or single bytes? - # Let's try encrypting as "item" or "query"? - # Actually we just want to encrypt a generic vector. - # 'encrypt' method usually takes (data, type). Type might be 'item', 'query'. - # For simulation, we just want to produce a ciphertext. - # Let's try type="item". - try: - # Encrypt results - encrypted_result = cipher.encrypt(mock_scores, "item") - print(f"Encrypted result type: {type(encrypted_result)}") - - # 4. Simulate Vault: "Decrypting Scores" - print("Decrypting using standard decrypt (simulating score decryption)...") - # Use standard decrypt, which should work for encrypted vectors. - decrypted_vector = cipher.decrypt(encrypted_result, sec_key_path=sec_path) - - # decrypted_vector is likely a numpy array or list of arrays - print(f"Decrypted Vector type: {type(decrypted_vector)}") - if isinstance(decrypted_vector, list) and len(decrypted_vector) > 0: - print(f"Item 0 type: {type(decrypted_vector[0])}") - print(f"Decrypted Vector (full): {decrypted_vector}") - - # Check similarity - final_result = decrypted_vector - # Only unwrap if it looks like a list of lists/arrays - if ( - isinstance(final_result, list) - and len(final_result) > 0 - and (isinstance(final_result[0], list) or isinstance(final_result[0], np.ndarray)) - ): - final_result = final_result[0] - - print(f"Decrypted: {final_result[:4]}") - - dec_arr = np.array(final_result) - diff = np.abs(mock_scores - dec_arr[: len(mock_scores)]) - print(f"Max Diff: {np.max(diff)}") - - if np.max(diff) < 1e-4: - print("SUCCESS: Crypto compatibility verified (via standard decrypt).") - else: - print("FAILURE: Decrypted values do not match.") - - except Exception as e: - print(f"ERROR: {e}") - import traceback - - traceback.print_exc() - - -if __name__ == "__main__": - verify_flow()