diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bfff1cc5..8d2fae00 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -128,7 +128,7 @@ jobs: - name: Run golangci-lint run: | go install github.com/golangci/golangci-lint/v2/cmd/golangci-lint@v2.1.0 - golangci-lint run --timeout=5m --disable=noctx + golangci-lint run --timeout=5m # ------------------------------------------------------------------------- # 5. Tests — race detector, coverage threshold, test shuffling. @@ -155,8 +155,8 @@ jobs: echo "COVERAGE=${coverage}" >> "$GITHUB_ENV" - name: Coverage threshold (minimum 50%) run: | - if (( $(echo "${COVERAGE} < 50" | bc -l) )); then - echo "::error::Coverage ${COVERAGE}% is below minimum 50%" + if (( $(echo "${COVERAGE} < 60" | bc -l) )); then + echo "::error::Coverage ${COVERAGE}% is below minimum 60%" exit 1 fi - name: Upload coverage @@ -216,12 +216,12 @@ jobs: cache: true - name: govulncheck run: | - go install golang.org/x/vuln/cmd/govulncheck@latest + go install golang.org/x/vuln/cmd/govulncheck@v1.1.4 govulncheck ./... - name: gosec (report only) continue-on-error: true run: | - go install github.com/securego/gosec/v2/cmd/gosec@latest + go install github.com/securego/gosec/v2/cmd/gosec@v2.22.4 gosec -exclude=G104,G703,G704,G101,G107,G112,G114,G115,G201,G202,G203,G204,G301,G302,G304,G305,G306,G307,G401,G402,G403,G404,G501,G502,G503,G504,G505,G601,G602 -confidence=medium -severity=high ./... # ------------------------------------------------------------------------- @@ -264,7 +264,7 @@ jobs: cache: true - name: deadcode run: | - go install golang.org/x/tools/cmd/deadcode@latest + go install golang.org/x/tools/cmd/deadcode@v0.30.0 deadcode ./... 2>&1 | head -50 # ------------------------------------------------------------------------- @@ -323,6 +323,31 @@ jobs: fi rm -f /tmp/hawk-bin + # ------------------------------------------------------------------------- + # Fuzz — short corpus runs to catch panics in fuzz targets. + # ------------------------------------------------------------------------- + fuzz: + name: fuzz (60s) + runs-on: ubuntu-latest + needs: [test] + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - uses: ./.github/actions/checkout-eyrie + with: + ref: ${{ github.head_ref || github.ref_name }} + - uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0 + with: + go-version: ${{ env.GO_VERSION }} + cache: true + - name: Run fuzz targets + run: | + go test -fuzz=FuzzScanForAIComments -fuzztime=60s ./cmd/... || true + go test -fuzz=FuzzValidateSettings -fuzztime=60s ./internal/config/... || true + go test -fuzz=FuzzIsSuspicious -fuzztime=60s ./internal/tool/... || true + go test -fuzz=FuzzIsSafeGitCommit -fuzztime=60s ./internal/tool/... || true + go test -fuzz=FuzzParseMessage -fuzztime=60s ./internal/session/... || true + go test -fuzz=FuzzParseSessionMeta -fuzztime=60s ./internal/session/... || true + # ------------------------------------------------------------------------- # 10. Smoke — build hawk and verify ecosystem CLI wiring. # ------------------------------------------------------------------------- diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 23fd6f62..72b60691 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -26,12 +26,15 @@ jobs: steps: - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + - name: Set up QEMU + uses: docker/setup-qemu-action@29109295f81e9208d7d86ff1c6c12d2833863392 # v3.6.0 + - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 + uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0 - name: Log in to GHCR if: github.event_name != 'pull_request' - uses: docker/login-action@v3 + uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0 with: registry: ${{ env.REGISTRY }} username: ${{ github.actor }} @@ -39,7 +42,7 @@ jobs: - name: Docker metadata id: meta - uses: docker/metadata-action@v5 + uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 # v5.7.0 with: images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} tags: | @@ -49,9 +52,10 @@ jobs: type=sha,prefix=sha- - name: Build and push - uses: docker/build-push-action@v6 + uses: docker/build-push-action@14487ce63c7a62a4a324b0bfb37086795e31c6c1 # v6.16.0 with: context: . + platforms: linux/amd64,linux/arm64 push: ${{ github.event_name != 'pull_request' }} tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} @@ -61,3 +65,19 @@ jobs: VERSION=${{ github.ref_name }} COMMIT=${{ github.sha }} BUILD_DATE=${{ github.event.head_commit.timestamp }} + + - name: Scan image with Trivy + if: github.event_name != 'pull_request' + uses: aquasecurity/trivy-action@ed142fd0673e97e23eac54620cfb913e5ce36c25 # v0.36.0 + with: + image-ref: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:sha-${{ github.sha }} + format: sarif + output: trivy-image.sarif + severity: CRITICAL,HIGH + exit-code: '0' + + - name: Upload Trivy image scan results + if: github.event_name != 'pull_request' && always() + uses: github/codeql-action/upload-sarif@v3 + with: + sarif_file: trivy-image.sarif diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index f99c7d72..6d049b34 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -34,7 +34,7 @@ jobs: uses: goreleaser/goreleaser-action@1a80836c5c9d9e5755a25cb59ec6f45a3b5f41a8 # v7.2.1 with: distribution: goreleaser - version: "~> v2" + version: "v6.3.0" args: release --clean env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml new file mode 100644 index 00000000..f7862f7a --- /dev/null +++ b/.github/workflows/scorecard.yml @@ -0,0 +1,42 @@ +name: Scorecard + +on: + branch_protection_rule: + schedule: + - cron: '37 9 * * 1' + push: + branches: [main] + +permissions: + security-events: write + id-token: write + contents: read + +jobs: + analysis: + name: Scorecard analysis + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + + - name: Run analysis + uses: ossf/scorecard-action@f49aabe0b5af0936a0987cfb85d86b75b098e755 # v2.4.1 + with: + results_file: scorecard-results.sarif + results_format: sarif + publish_results: true + + - name: Upload artifact + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: SARIF file + path: scorecard-results.sarif + retention-days: 5 + + - name: Upload to code-scanning + uses: github/codeql-action/upload-sarif@v3 + with: + sarif_file: scorecard-results.sarif diff --git a/.goreleaser.yml b/.goreleaser.yml index ced71e53..3338d6f6 100644 --- a/.goreleaser.yml +++ b/.goreleaser.yml @@ -123,6 +123,11 @@ changelog: # Release — auto-detect prereleases (rc/beta tags). Created on the repo # itself (not a separate release repo). # --------------------------------------------------------------------------- +sboms: + - artifacts: archive + documents: + - "${artifact}.spdx.sbom.json" + release: draft: false prerelease: auto diff --git a/Dockerfile b/Dockerfile index e1d51d84..151b661a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,15 +12,14 @@ COPY go.mod go.sum ./ RUN go mod download COPY . . -# Add replace after source copy so it doesn't get overwritten -RUN echo "" >> go.mod && echo "replace github.com/GrayCodeAI/eyrie => /eyrie" >> go.mod && go mod tidy - -RUN CGO_ENABLED=0 GOOS=linux go build -trimpath -mod=mod \ +# Use go.work to resolve the local eyrie checkout — avoids mutating go.mod at build time. +RUN go work init . /eyrie && \ + CGO_ENABLED=0 GOOS=linux go build -trimpath \ -ldflags="-s -w -X main.Version=$(git describe --tags --always 2>/dev/null || echo dev)" \ -o hawk ./cmd/hawk -# Runtime stage — minimal image -FROM alpine:3.20 +# Runtime stage — Alpine (hawk requires git + bash for workspace operations; distroless excluded) +FROM alpine:3.21 RUN apk add --no-cache ca-certificates git bash tini && \ adduser -D -u 1000 -h /home/hawk hawk diff --git a/cmd/errors.go b/cmd/errors.go index 85a3c319..5b855ecc 100644 --- a/cmd/errors.go +++ b/cmd/errors.go @@ -232,7 +232,7 @@ func panicRecovery(saveFn func()) { _, _ = fmt.Fprintf(os.Stderr, "Details logged to ~/.hawk/crash.log\n") _, _ = fmt.Fprintf(os.Stderr, "Please report this at: https://github.com/GrayCodeAI/hawk/issues\n\n") _, _ = fmt.Fprintf(os.Stderr, "panic: %v\n", r) - os.Exit(1) + os.Exit(1) // os.Exit intentional: panic recovery, defer already unwound } } @@ -269,7 +269,7 @@ func signalHandler(saveFn func()) { } _, _ = fmt.Fprintf(os.Stderr, "Goodbye.\n") - os.Exit(0) + os.Exit(0) // os.Exit intentional: signal handler, must terminate process }() } diff --git a/cmd/inspect.go b/cmd/inspect.go index ef2f7df3..84f3522d 100644 --- a/cmd/inspect.go +++ b/cmd/inspect.go @@ -91,7 +91,7 @@ Examples: } if report.Failed() { - os.Exit(1) + return fmt.Errorf("inspect: one or more checks failed") } return nil }, diff --git a/cmd/sight.go b/cmd/sight.go index ec4dafa2..e8be88b2 100644 --- a/cmd/sight.go +++ b/cmd/sight.go @@ -168,7 +168,7 @@ func runSightReview(ctx context.Context, bridge *hawkSight.Bridge, diff string) } if result.Failed() { - os.Exit(1) + return fmt.Errorf("sight: one or more checks failed") } return nil } diff --git a/cmd/tok.go b/cmd/tok.go new file mode 100644 index 00000000..257ad641 --- /dev/null +++ b/cmd/tok.go @@ -0,0 +1,253 @@ +package cmd + +import ( + "encoding/json" + "fmt" + "io" + "os" + "strings" + + "github.com/GrayCodeAI/tok" + "github.com/spf13/cobra" +) + +// hawk embeds the tok library directly (see internal/engine/token/tokenizer.go). +// These verbs expose tok's compression, token-estimation, and secret-scanning +// surface through the hawk CLI — tok ships no standalone binary. + +var ( + tokInput string + tokFormat string + + // compress flags + tokIntensity string + tokBudget int + tokPrompt bool + tokStats bool + + // estimate flags + tokModel string + + // scan flags + tokRedact bool +) + +var tokCmd = &cobra.Command{ + Use: "tok", + Short: "Token compression, estimation, and secret scanning (embedded tok library)", + Long: `tok exposes hawk's embedded token-efficiency library: + + hawk tok compress shrink prose/prompts or fit text to a token budget + hawk tok estimate count tokens and estimate cost for a model + hawk tok scan detect (and optionally redact) secrets in text + +Input is read from --input , a trailing argument, or stdin. +tok has no standalone binary — these verbs run the library in-process.`, +} + +// readTokInput resolves input from --input , the first positional arg, +// or stdin (in that order). +func readTokInput(args []string) (string, error) { + if tokInput != "" { + b, err := os.ReadFile(tokInput) + if err != nil { + return "", fmt.Errorf("read --input %q: %w", tokInput, err) + } + return string(b), nil + } + if len(args) > 0 { + return strings.Join(args, " "), nil + } + b, err := io.ReadAll(os.Stdin) + if err != nil { + return "", fmt.Errorf("read stdin: %w", err) + } + if len(b) == 0 { + return "", fmt.Errorf("no input: provide text as an argument, --input , or via stdin") + } + return string(b), nil +} + +var tokCompressCmd = &cobra.Command{ + Use: "compress [text]", + Short: "Compress prompts/prose or fit text to a token budget", + Long: `Compress text with the tok pipeline. + +By default uses prompt/prose compression at the chosen --intensity (lite, full, +ultra). Pass --budget to instead run the full output pipeline targeting a token +budget. Use --stats for a savings summary.`, + Args: cobra.ArbitraryArgs, + RunE: func(cmd *cobra.Command, args []string) error { + text, err := readTokInput(args) + if err != nil { + return err + } + + // --budget runs the token-budget output pipeline, unless --prompt forces + // prose/prompt compression. + if tokBudget > 0 && !tokPrompt { + out, stats := tok.Compress(text, tok.WithBudget(tokBudget)) + return emitCompress(cmd, out, &stats, nil) + } + + intensity, err := parseIntensity(tokIntensity) + if err != nil { + return err + } + out, pstats := tok.PromptCompress(text, intensity) + return emitCompress(cmd, out, nil, &pstats) + }, +} + +func parseIntensity(s string) (tok.Intensity, error) { + switch strings.ToLower(strings.TrimSpace(s)) { + case "", "full": + return tok.IntensityFull, nil + case "lite": + return tok.IntensityLite, nil + case "ultra": + return tok.IntensityUltra, nil + default: + return tok.IntensityFull, fmt.Errorf("invalid --intensity %q: use lite, full, or ultra", s) + } +} + +// emitCompress prints compressed output, optionally with stats. Exactly one of +// stats / pstats is non-nil depending on which compression path ran. +func emitCompress(cmd *cobra.Command, out string, stats *tok.Stats, pstats *tok.PromptStats) error { + if tokFormat == "json" { + payload := map[string]any{"compressed": out} + switch { + case stats != nil: + payload["stats"] = stats + case pstats != nil: + payload["stats"] = pstats + } + return writeJSON(cmd, payload) + } + + cmd.Println(out) + if tokStats { + switch { + case stats != nil: + cmd.Println() + cmd.Print(tok.FormatStats(*stats)) + case pstats != nil: + cmd.Println() + cmd.Printf("intensity=%v bytes %d → %d (%.1f%% off)\n", + pstats.Intensity, pstats.OriginalBytes, pstats.CompressedBytes, pstats.PercentOff) + } + } + return nil +} + +var tokEstimateCmd = &cobra.Command{ + Use: "estimate [text]", + Short: "Estimate token count and cost for a model", + Args: cobra.ArbitraryArgs, + RunE: func(cmd *cobra.Command, args []string) error { + text, err := readTokInput(args) + if err != nil { + return err + } + + tokens := tok.EstimateTokensForModel(text, tokModel) + + var costUSD float64 + var priced bool + if pricing, ok := tok.GetModelPricing(tokModel); ok { + priced = true + costUSD = float64(tokens) / 1000.0 * pricing.InputPricePer1K + } + + if tokFormat == "json" { + payload := map[string]any{"tokens": tokens, "model": tokModel} + if priced { + payload["input_cost_usd"] = costUSD + } + return writeJSON(cmd, payload) + } + + cmd.Printf("%d tokens (model: %s)\n", tokens, tokModel) + if priced { + cmd.Printf("≈ $%.6f input cost\n", costUSD) + } else { + cmd.Printf("(no pricing registered for %q — token count only)\n", tokModel) + } + return nil + }, +} + +var tokScanCmd = &cobra.Command{ + Use: "scan [text]", + Short: "Detect (and optionally redact) secrets in text", + Long: "Scan input for credentials, keys, and other secrets. Use --redact to print the input with secrets masked.", + Args: cobra.ArbitraryArgs, + RunE: func(cmd *cobra.Command, args []string) error { + text, err := readTokInput(args) + if err != nil { + return err + } + + detector := tok.NewSecretDetector() + + if tokRedact { + redacted := detector.RedactSecrets(text) + if tokFormat == "json" { + return writeJSON(cmd, map[string]any{"redacted": redacted}) + } + cmd.Print(redacted) + if !strings.HasSuffix(redacted, "\n") { + cmd.Println() + } + return nil + } + + findings := detector.DetectSecrets(text) + + if tokFormat == "json" { + return writeJSON(cmd, map[string]any{ + "count": len(findings), + "secrets": findings, + }) + } + + if len(findings) == 0 { + cmd.Println("no secrets detected") + return nil + } + cmd.Printf("%d secret(s) detected:\n", len(findings)) + for _, f := range findings { + cmd.Printf(" - %s: %s\n", f.Type, f.Masked) + } + // Non-zero exit so callers (CI, hooks) can gate on detection. + return fmt.Errorf("tok scan: %d secret(s) detected", len(findings)) + }, +} + +// writeJSON encodes v as indented JSON to stdout. +func writeJSON(cmd *cobra.Command, v any) error { + enc := json.NewEncoder(cmd.OutOrStdout()) + enc.SetIndent("", " ") + return enc.Encode(v) +} + +func init() { + // Shared input/output flags on each subcommand. + for _, c := range []*cobra.Command{tokCompressCmd, tokEstimateCmd, tokScanCmd} { + c.Flags().StringVar(&tokInput, "input", "", "read input from this file instead of stdin/args") + c.Flags().StringVar(&tokFormat, "format", "text", "output format: text, json") + } + + tokCompressCmd.Flags().StringVar(&tokIntensity, "intensity", "full", "prompt compression intensity: lite, full, ultra") + tokCompressCmd.Flags().IntVar(&tokBudget, "budget", 0, "compress to fit within this many tokens (uses the output pipeline)") + tokCompressCmd.Flags().BoolVar(&tokPrompt, "prompt", false, "force prompt/prose compression even when --budget is set") + tokCompressCmd.Flags().BoolVar(&tokStats, "stats", false, "print a compression savings summary") + + tokEstimateCmd.Flags().StringVar(&tokModel, "model", "gpt-4o", "model to estimate tokens/cost for") + + tokScanCmd.Flags().BoolVar(&tokRedact, "redact", false, "print input with secrets masked instead of listing them") + + tokCmd.AddCommand(tokCompressCmd, tokEstimateCmd, tokScanCmd) + rootCmd.AddCommand(tokCmd) +} diff --git a/external/eyrie b/external/eyrie index ef8fb9a9..6aed6ff5 160000 --- a/external/eyrie +++ b/external/eyrie @@ -1 +1 @@ -Subproject commit ef8fb9a9a04577409d130107ced3e951106e8e8c +Subproject commit 6aed6ff55434ba4a85b8d238896535bf25ab2d56 diff --git a/external/inspect b/external/inspect index 73651ad6..930e88f0 160000 --- a/external/inspect +++ b/external/inspect @@ -1 +1 @@ -Subproject commit 73651ad699be8619c9ca432a994f3e6aef6f507d +Subproject commit 930e88f06f61a61112845fc2488595c620eefb40 diff --git a/external/sight b/external/sight index 6d3b83ab..3585769f 160000 --- a/external/sight +++ b/external/sight @@ -1 +1 @@ -Subproject commit 6d3b83ab628d552f3ae5794ccff412cc58c31cc3 +Subproject commit 3585769f1142fd585067c7935d58daf1090a49d7 diff --git a/external/tok b/external/tok index 4e404ac8..44347603 160000 --- a/external/tok +++ b/external/tok @@ -1 +1 @@ -Subproject commit 4e404ac85a01312fd9591a9d49ba3e1c3484a3dc +Subproject commit 4434760329434de5983874770b4a0da5b6746cc0 diff --git a/external/trace b/external/trace index 4dfa0a2d..1623acc0 160000 --- a/external/trace +++ b/external/trace @@ -1 +1 @@ -Subproject commit 4dfa0a2da431763081ce3c0088fbc6162f01dbcc +Subproject commit 1623acc02c0af898a3a8d51eebf6ac0f9879a953 diff --git a/external/yaad b/external/yaad index 163b1aa9..3416e181 160000 --- a/external/yaad +++ b/external/yaad @@ -1 +1 @@ -Subproject commit 163b1aa99b7b43b94a1c582c3f4894ec0b278ad8 +Subproject commit 3416e181ff2d4195f63965dda47bc61d90ccde34 diff --git a/go.mod b/go.mod index b8bba081..84713748 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/GrayCodeAI/hawk -go 1.26.3 +go 1.26.4 require ( github.com/GrayCodeAI/eyrie v0.1.0