diff --git a/Makefile b/Makefile index d7b501ee..4fdf422e 100644 --- a/Makefile +++ b/Makefile @@ -41,7 +41,7 @@ endif GOFLAGS ?= -.PHONY: build test lint e2e build-all clean clean-all fmt vet tidy sync-templates sync-schemas snapshot-schema-baseline update-render-golden docs man cover vuln bootstrap-go +.PHONY: build test lint e2e build-all clean clean-all fmt vet tidy sync-templates sync-schemas snapshot-schema-baseline update-render-golden refresh-builder-pins docs man cover vuln bootstrap-go ## bootstrap-go: Download + verify the project-local Go toolchain ## (idempotent; safe to re-run; no-op if already current) @@ -163,3 +163,10 @@ snapshot-schema-baseline: $(GO_BOOTSTRAP) update-render-golden: $(GO_BOOTSTRAP) TROND_UPDATE_GOLDEN=1 $(GO) test -run TestRenderHOCON_Golden ./internal/render/ @echo "render golden files refreshed under internal/render/testdata/golden/." + +## refresh-builder-pins: Re-resolve Eclipse Temurin tags → sha256 digests +## and rewrite internal/build/pins/builder_image_digests.json. +## Run at trond release-prep so the binary ships current +## digests. Requires docker + jq on PATH. spec/002 FR-012. +refresh-builder-pins: + @./scripts/refresh-builder-pins.sh diff --git a/cmd/build.go b/cmd/build.go new file mode 100644 index 00000000..960455ff --- /dev/null +++ b/cmd/build.go @@ -0,0 +1,136 @@ +package cmd + +import ( + "fmt" + "os" + "os/signal" + "path/filepath" + "syscall" + + "github.com/spf13/cobra" + + "github.com/tronprotocol/tron-deployment/internal/build" + "github.com/tronprotocol/tron-deployment/internal/output" +) + +// `trond build` produces a deployable java-tron artifact (JAR in +// Phase 1; image lands in Phase 3) from a source tree, by running +// gradle inside a pinned Eclipse Temurin container. +// +// Design + rationale: specs/002-trond-build-pipeline/{spec,plan}.md. +// +// Output schema: schemas/output/build.schema.json. + +var ( + buildSourcePath string + buildRevisionSpec string + buildArtifactKind string + buildJDKVersion string + buildGradleTask string + buildGradleArgs []string + buildBuilder string + buildImageTag string + buildImageOverride string +) + +var buildCmd = &cobra.Command{ + Use: "build", + Short: "Build a java-tron artifact (JAR or image) from source", + Long: `Build runs gradle inside a pinned Eclipse Temurin container against +the given java-tron source tree, producing either a fat JAR or a +docker image. Results are content-addressed by git revision + builder +image digest + task + args, so repeated invocations against the same +inputs return immediately. + +trond ships no JDK or Gradle. The builder image is pulled on first +use and pinned via go:embed so the build is reproducible across +trond installs of the same version. + +Examples: + + # Build the default fat JAR from the current branch HEAD. + trond build --source ./java-tron --artifact jar -o json + + # Build with an explicit revision and gradle flags. + trond build --source ./java-tron --revision v4.7.7 \ + --gradle-arg=--offline --gradle-arg=-Dversion=mytest -o json + + # Override the builder image (emergency: pinned digest unreachable). + trond build --source ./java-tron \ + --builder-image-override eclipse-temurin:8-jdk@sha256:abcd...`, + RunE: runBuild, +} + +func init() { + buildCmd.Flags().StringVar(&buildSourcePath, "source", "", + "Path to the java-tron source tree (required; relative to CWD)") + buildCmd.Flags().StringVar(&buildRevisionSpec, "revision", "HEAD", + "Git revision to build (HEAD, branch, tag, or sha)") + buildCmd.Flags().StringVar(&buildArtifactKind, "artifact", "jar", + "Artifact kind: 'jar' or 'image'") + buildCmd.Flags().StringVar(&buildJDKVersion, "jdk", "8", + "JDK version for the builder container (8|11|17|21)") + buildCmd.Flags().StringVar(&buildGradleTask, "gradle-task", "", + "Gradle task name (defaults: 'shadowJar' for jar, 'dockerBuild' for image)") + buildCmd.Flags().StringArrayVar(&buildGradleArgs, "gradle-arg", nil, + "Extra gradle args (repeatable; e.g. --gradle-arg=--offline). "+ + "Restricted to a flag-name allowlist; see spec FR-022.") + buildCmd.Flags().StringVar(&buildBuilder, "builder", "docker", + "Builder backend: 'docker' (default) or 'host' (uses local gradle)") + buildCmd.Flags().StringVar(&buildImageTag, "tag", "", + "Image tag to apply when --artifact=image (e.g. mytest:dev)") + buildCmd.Flags().StringVar(&buildImageOverride, "builder-image-override", "", + "Override the pinned builder image (escape hatch; see FR-024)") + rootCmd.AddCommand(buildCmd) +} + +// runBuild wires CLI flags into a build.Request, installs the +// signal-aware context for SIGINT propagation (FR-016), and emits +// either the success Result or a structured error envelope. +func runBuild(cmd *cobra.Command, _ []string) error { + // FR-021: --source relative to CWD. + resolvedSource := buildSourcePath + if resolvedSource != "" && !filepath.IsAbs(resolvedSource) { + abs, err := filepath.Abs(resolvedSource) + if err == nil { + resolvedSource = abs + } + } + + req := build.Request{ + SourcePath: resolvedSource, + RevisionSpec: buildRevisionSpec, + JDKVersion: buildJDKVersion, + ArtifactKind: buildArtifactKind, + GradleTask: buildGradleTask, + GradleArgs: buildGradleArgs, + Builder: buildBuilder, + ImageTag: buildImageTag, + BuilderImageOverride: buildImageOverride, + } + + // SIGINT-aware context. Build container + git subprocesses all + // run under this; cancellation propagates to subprocess kill. + ctx, cancel := signal.NotifyContext(cmd.Context(), os.Interrupt, syscall.SIGTERM) + defer cancel() + + res, err := build.Run(ctx, req) + if err != nil { + // StructuredError propagates through cobra RunE; cmd.Execute + // renders it and sets the exit code (see cmd/root.go::Execute). + // Returning here lets `defer cancel()` run normally. + return err + } + + outputFmt, _ := cmd.Flags().GetString("output") + if outputFmt == "json" { + return output.WriteJSON(os.Stdout, res) + } + if res.CacheHit { + fmt.Printf("✓ cache hit: %s (%d ms)\n", res.CacheKey, res.DurationMs) + } else { + fmt.Printf("✓ built: %s\n → %s\n sha256: %s\n %d ms\n", + res.CacheKey, res.ArtifactPath, res.SHA256, res.DurationMs) + } + return nil +} diff --git a/cmd/recipe_matrix_e2e_test.go b/cmd/recipe_matrix_e2e_test.go index c4b41667..af246979 100644 --- a/cmd/recipe_matrix_e2e_test.go +++ b/cmd/recipe_matrix_e2e_test.go @@ -38,8 +38,8 @@ func TestE2E_Recipe_DryRunMatrix(t *testing.T) { expectSteps []string }{ { - recipe: "nile-test-fullnode", - params: []string{"intent_path=" + intentPath}, + recipe: "nile-test-fullnode", + params: []string{"intent_path=" + intentPath}, expectSteps: []string{"validate", "preflight", "apply", "verify"}, }, { @@ -52,13 +52,13 @@ func TestE2E_Recipe_DryRunMatrix(t *testing.T) { expectSteps: []string{"validate", "preflight"}, }, { - recipe: "destroy-private-network-cleanly", - params: []string{"network=private-dev"}, + recipe: "destroy-private-network-cleanly", + params: []string{"network=private-dev"}, expectSteps: []string{"status-check", "destroy"}, }, { - recipe: "recover-failed-upgrade", - params: []string{"node=my-fullnode"}, + recipe: "recover-failed-upgrade", + params: []string{"node=my-fullnode"}, expectSteps: []string{"diagnose", "rollback"}, }, { diff --git a/cmd/schema_coverage_test.go b/cmd/schema_coverage_test.go index 4fad6ccb..68bc7ac6 100644 --- a/cmd/schema_coverage_test.go +++ b/cmd/schema_coverage_test.go @@ -73,6 +73,7 @@ func TestSchemaCoverage(t *testing.T) { lookup := map[string]string{ "trond apply": "apply", "trond auto-heal": "auto-heal", + "trond build": "build", "trond config validate": "config-validate", "trond config render": "config-render", "trond config diff": "config-diff", diff --git a/cmd/schema_manifest_e2e_test.go b/cmd/schema_manifest_e2e_test.go index 0747aa40..4e1a7023 100644 --- a/cmd/schema_manifest_e2e_test.go +++ b/cmd/schema_manifest_e2e_test.go @@ -33,8 +33,8 @@ func TestE2E_SchemaManifestReverse(t *testing.T) { out := runTrondCtx(ctx, t, env, "schema", "--output", "json") var manifest struct { - SchemaVersion string `json:"schema_version"` - Tool string `json:"tool"` + SchemaVersion string `json:"schema_version"` + Tool string `json:"tool"` Commands []manifestCommand `json:"commands"` } if err := json.Unmarshal(out, &manifest); err != nil { @@ -59,12 +59,12 @@ func TestE2E_SchemaManifestReverse(t *testing.T) { } type manifestCommand struct { - Name string `json:"name"` - FullName string `json:"full_name"` - Use string `json:"use"` - Aliases []string `json:"aliases"` - Flags []manifestFlag `json:"flags"` - Subcommands []manifestCommand `json:"subcommands"` + Name string `json:"name"` + FullName string `json:"full_name"` + Use string `json:"use"` + Aliases []string `json:"aliases"` + Flags []manifestFlag `json:"flags"` + Subcommands []manifestCommand `json:"subcommands"` } type manifestFlag struct { diff --git a/cmd/statedir_e2e_test.go b/cmd/statedir_e2e_test.go index b0e6545b..f24d61f6 100644 --- a/cmd/statedir_e2e_test.go +++ b/cmd/statedir_e2e_test.go @@ -36,8 +36,8 @@ func TestE2E_StateDirPriority(t *testing.T) { wantNot []string }{ { - name: "flag-beats-env", - args: []string{"--state-dir", flagDir}, + name: "flag-beats-env", + args: []string{"--state-dir", flagDir}, extraEnv: []string{ "TROND_STATE_DIR=" + envDir, }, diff --git a/internal/build/audit.go b/internal/build/audit.go new file mode 100644 index 00000000..551bc9df --- /dev/null +++ b/internal/build/audit.go @@ -0,0 +1,47 @@ +package build + +import ( + "time" + + "github.com/tronprotocol/tron-deployment/internal/security" +) + +// AuditPhase represents where in the build lifecycle we are. Per +// FR-023 we append an `in_progress` event at start, then write a +// terminal event (success / failed / cancelled) on completion. A +// crashed mid-build leaves the `in_progress` entry visible to +// `trond events`, surfacing the forensic signal. +type AuditPhase string + +const ( + PhaseInProgress AuditPhase = "in_progress" + PhaseSuccess AuditPhase = "success" + PhaseFailed AuditPhase = "failed" + PhaseCancelled AuditPhase = "cancelled" +) + +// AppendAuditEvent writes one build-related row to the audit log. +// +// We deliberately reuse the existing security.AuditEntry shape (the +// same envelope `apply`, `start`, `stop` use) so downstream tooling — +// `trond events`, MCP resources, the JSON contract — doesn't need a +// new code path. The build-specific fields ride in the existing +// ErrorCode/Result columns. +func AppendAuditEvent(phase AuditPhase, cacheKey, errorCode string, startedAt time.Time) error { + log, err := security.NewAuditLog("") + if err != nil { + return err + } + entry := security.AuditEntry{ + Timestamp: time.Now().UTC(), + Command: "build", + Target: "local", // build target is always local in v1 + IntentHash: cacheKey, + Result: string(phase), + ErrorCode: errorCode, + } + if phase != PhaseInProgress { + entry.DurationMs = time.Since(startedAt).Milliseconds() + } + return log.Write(entry) +} diff --git a/internal/build/builder.go b/internal/build/builder.go new file mode 100644 index 00000000..6e8184d1 --- /dev/null +++ b/internal/build/builder.go @@ -0,0 +1,407 @@ +// Package build orchestrates containerized Gradle invocations so +// developers can iterate on java-tron source and redeploy with one +// `trond apply`. trond ships no JDK / Gradle / Java compiler; the +// build environment is a pinned Eclipse Temurin container and trond +// is the conductor (spec/002, FR-022 argv-only). +// +// The package is organized as: +// +// pins/ — go:embed builder image digest pins (FR-024) +// lock_*.go — flock-based serialization, posix + windows split (FR-015) +// imagetag.go — Docker reference validation for build.image_tag (FR-005) +// validate.go — gradle task/args allowlist + JAR Main-Class check (FR-022, FR-011) +// source.go — git shell-out: rev-parse, dirty detection (FR-002) +// key.go — content-addressed CacheKey naming (FR-002) +// manifest.go — per-build JSON manifest (FR-004) +// cache.go — lookup / save / artifact stat (FR-020) +// audit.go — two-phase build event lifecycle (FR-023) +// runner.go — dockerRunner interface + real exec impl (testability) +// builder.go — Run() orchestrator; flow split into resolve / build / finalize +// +// The exported surface is intentionally small — cmd/build.go calls +// `Run`, apply integration calls `Run`, MCP calls `Run`. Everything +// else is package-internal. +package build + +import ( + "context" + "errors" + "fmt" + "os" + "path/filepath" + "sort" + "strings" + "time" + + "github.com/tronprotocol/tron-deployment/internal/build/pins" + "github.com/tronprotocol/tron-deployment/internal/output" +) + +// Request captures everything Run needs to decide a build. It is the +// pre-flight-ready, fully validated form — cobra layers and the apply +// pipeline both normalize to this struct. +type Request struct { + SourcePath string + RevisionSpec string // "HEAD" | branch | tag | sha + JDKVersion string // "8" | "11" | "17" | "21" + ArtifactKind string // "jar" | "image" + GradleTask string // overrides default per artifact + GradleArgs []string + Builder string // "docker" | "host" + ImageTag string // for artifact=image + BuilderImageOverride string // FR-024 escape hatch + Env map[string]string +} + +// Result is the JSON-serializable success payload. Mirrors +// schemas/output/build.schema.json. +type Result struct { + CacheKey string `json:"cache_key"` + SourceRevision string `json:"source_revision"` + Dirty bool `json:"dirty"` + ArtifactKind string `json:"artifact_kind"` + ArtifactPath string `json:"artifact_path,omitempty"` + ImageTag string `json:"image_tag,omitempty"` + SHA256 string `json:"sha256,omitempty"` + BuilderImage string `json:"builder_image"` + JDKVersion string `json:"jdk_version"` + GradleTask string `json:"gradle_task"` + Builder string `json:"builder"` + CacheHit bool `json:"cache_hit"` + DurationMs int64 `json:"duration_ms"` + CreatedAt time.Time `json:"created_at"` +} + +// resolved is the internal carrier between phases. Each helper takes +// what it needs and returns the next step's input. Keeps Run() +// readable. +type resolved struct { + req Request + src Source + imageRef string + imageDigest string + key CacheKey + cacheKeyStr string +} + +// Run executes (or cache-hits) a build for the given request. The +// returned Result is what cmd/build.go emits as JSON; on failure a +// structured *output.StructuredError is returned with the appropriate +// error_code so the wire envelope matches the CLI/MCP contract. +// +// Lifecycle (each step is its own helper so the flow stays readable +// and individual phases are testable): +// +// 1. Validate + resolve (resolveBuild). +// 2. Cache fast path (no lock). +// 3. Acquire flock and re-check (FR-015). +// 4. Audit `in_progress` (FR-023). +// 5. Execute gradle in container (executeBuild). +// 6. Validate the produced artifact (FR-011). +// 7. Promote .tmp → final, persist manifest, audit terminal event. +// +// SIGINT propagation: ctx is honored by every subprocess. Partial +// output is cleaned up before any error return. +func Run(ctx context.Context, req Request) (*Result, error) { + started := time.Now() + + r, err := resolveBuild(ctx, req) + if err != nil { + return nil, err + } + if err := EnsureCacheDirs(); err != nil { + return nil, output.NewErrorf("INTERNAL_ERROR", output.ExitGeneralError, + "ensure cache dirs: %s", err.Error()) + } + + // Fast path: cheap stat, no lock. + if hit, _ := Lookup(r.key); hit != nil && hit.Hit { + return resultFromManifest(hit.Manifest, true, time.Since(started).Milliseconds()), nil + } + + // Serialize same-key concurrent builds (FR-015). + release, lockErr := AcquireCacheLock(CacheDir(), r.cacheKeyStr) + if lockErr != nil { + return nil, output.NewErrorf("INTERNAL_ERROR", output.ExitGeneralError, + "acquire build lock: %s", lockErr.Error()) + } + defer release() + + // Re-check after lock — winner of the race may have finished. + if hit, _ := Lookup(r.key); hit != nil && hit.Hit { + return resultFromManifest(hit.Manifest, true, time.Since(started).Milliseconds()), nil + } + + _ = AppendAuditEvent(PhaseInProgress, r.cacheKeyStr, "", started) + + if r.req.ArtifactKind != "jar" { + _ = AppendAuditEvent(PhaseFailed, r.cacheKeyStr, "NOT_IMPLEMENTED", started) + return nil, output.NewErrorf("NOT_IMPLEMENTED", output.ExitGeneralError, + "artifact=%s is not yet supported by Phase 1 (jar only)", r.req.ArtifactKind) + } + + manifest, err := buildJAR(ctx, r, started) + if err != nil { + // Audit + propagate. The buildJAR helper has already done + // best-effort cleanup of .tmp output. + var se *output.StructuredError + if errors.As(err, &se) { + _ = AppendAuditEvent(phaseFromError(ctx, se.Code), r.cacheKeyStr, se.Code, started) + return nil, se + } + _ = AppendAuditEvent(PhaseFailed, r.cacheKeyStr, "BUILD_FAILED", started) + return nil, output.NewErrorf("BUILD_FAILED", output.ExitGeneralError, + "gradle build failed: %s", err.Error()) + } + + _ = AppendAuditEvent(PhaseSuccess, r.cacheKeyStr, "", started) + return resultFromManifest(manifest, false, manifest.DurationMs), nil +} + +// resolveBuild handles steps 1-2 from the lifecycle: defaults + +// validation, builder image pin resolution, git revision resolution, +// cache key materialization. +func resolveBuild(ctx context.Context, req Request) (*resolved, error) { + req = req.withDefaults() + if err := req.validate(); err != nil { + return nil, err + } + + imageRef, imageDigest, ok := pins.Resolve(req.JDKVersion, req.BuilderImageOverride) + if !ok { + return nil, output.NewErrorf("VALIDATION_ERROR", output.ExitValidationError, + "no pinned builder image for JDK version %q (available: %v)", + req.JDKVersion, pins.Versions()). + WithSuggestions( + "Use one of "+strings.Join(pins.Versions(), ", "), + "Or pass --builder-image-override ", + ) + } + + src := Source{Path: req.SourcePath, RevisionSpec: req.RevisionSpec} + if err := src.Resolve(ctx); err != nil { + return nil, output.NewErrorf("INVALID_SOURCE", output.ExitValidationError, + "resolve source: %s", err.Error()). + WithSuggestions( + "Ensure the path points at a git repository", + "Pass --revision explicitly if the working tree isn't a git checkout", + ) + } + + key := CacheKey{ + GitRevision: src.ResolvedRevision, + PatchHash: src.PatchHash, + BuilderImageDigest: imageDigest, + JDKVersion: req.JDKVersion, + ArtifactKind: req.ArtifactKind, + GradleTask: req.GradleTask, + GradleArgs: append([]string(nil), req.GradleArgs...), + } + return &resolved{ + req: req, + src: src, + imageRef: imageRef, + imageDigest: imageDigest, + key: key, + cacheKeyStr: key.String(), + }, nil +} + +// buildJAR runs gradle for artifact_kind=jar, validates the produced +// JAR (FR-011), promotes it to the final name, and persists the +// manifest. Best-effort cleanup of partial output on any error path. +func buildJAR(ctx context.Context, r *resolved, started time.Time) (*Manifest, error) { + outDir := filepath.Join(CacheDir(), "out") + outFinal := filepath.Join(outDir, r.cacheKeyStr+".jar") + outTmp := outFinal + ".tmp" + + _ = os.Remove(outTmp) // stale .tmp from a prior cancelled run + + runErr := defaultRunner.RunDockerBuild(ctx, r, outDir, outTmp) + if runErr != nil { + _ = os.Remove(outTmp) + if errors.Is(ctx.Err(), context.Canceled) { + return nil, output.NewErrorf("BUILD_CANCELLED", 130, + "build cancelled by user"). + WithSuggestions("Re-run when ready; cached partial output has been cleaned") + } + return nil, output.NewErrorf("BUILD_FAILED", output.ExitGeneralError, + "gradle build failed: %s", runErr.Error()). + WithSuggestions( + "Inspect the gradle output above for compile errors", + "Verify the source tree is a clean java-tron checkout", + ) + } + + const fullNodeMain = "org.tron.program.FullNode" + if err := ValidateJARMainClass(outTmp, fullNodeMain); err != nil { + _ = os.Remove(outTmp) + return nil, output.NewErrorf("INVALID_ARTIFACT", output.ExitGeneralError, + "produced JAR is not a java-tron node: %s", err.Error()). + WithSuggestions( + fmt.Sprintf("Verify the gradle task '%s' is the shadow-jar target for FullNode", r.req.GradleTask), + "Override with --gradle-task if the source uses a different task name", + ) + } + + if err := os.Rename(outTmp, outFinal); err != nil { + return nil, output.NewErrorf("INTERNAL_ERROR", output.ExitGeneralError, + "finalize artifact: %s", err.Error()) + } + + sum, err := fileSHA256(outFinal) + if err != nil { + return nil, output.NewErrorf("INTERNAL_ERROR", output.ExitGeneralError, + "hash artifact: %s", err.Error()) + } + + manifest := &Manifest{ + CacheKey: r.cacheKeyStr, + SourcePath: r.src.Path, + SourceRevision: r.src.ResolvedRevision, + PatchHash: r.src.PatchHash, + Dirty: r.src.DirtyState, + BuilderImage: r.imageRef, + BuilderImageDigest: r.imageDigest, + JDKVersion: r.req.JDKVersion, + ArtifactKind: "jar", + ArtifactPath: outFinal, + SHA256: sum, + GradleTask: r.req.GradleTask, + GradleArgs: r.req.GradleArgs, + Builder: r.req.Builder, + DurationMs: time.Since(started).Milliseconds(), + CreatedAt: time.Now().UTC(), + } + if err := Save(manifest); err != nil { + return nil, output.NewErrorf("INTERNAL_ERROR", output.ExitGeneralError, + "persist manifest: %s", err.Error()) + } + return manifest, nil +} + +// phaseFromError maps a structured error code to the right audit +// phase. Cancellation is distinct from generic failure. +func phaseFromError(ctx context.Context, code string) AuditPhase { + if code == "BUILD_CANCELLED" || errors.Is(ctx.Err(), context.Canceled) { + return PhaseCancelled + } + return PhaseFailed +} + +func (r Request) withDefaults() Request { + if r.JDKVersion == "" { + r.JDKVersion = "8" + } + if r.ArtifactKind == "" { + r.ArtifactKind = "jar" + } + if r.GradleTask == "" { + switch r.ArtifactKind { + case "jar": + r.GradleTask = "shadowJar" + case "image": + r.GradleTask = "dockerBuild" + } + } + if r.Builder == "" { + r.Builder = "docker" + } + if r.RevisionSpec == "" { + r.RevisionSpec = "HEAD" + } + return r +} + +func (r Request) validate() error { + if r.SourcePath == "" { + return output.NewError("VALIDATION_ERROR", output.ExitValidationError, + "--source is required") + } + if r.ArtifactKind != "jar" && r.ArtifactKind != "image" { + return output.NewErrorf("VALIDATION_ERROR", output.ExitValidationError, + "--artifact must be 'jar' or 'image' (got %q)", r.ArtifactKind) + } + if r.Builder != "docker" && r.Builder != "host" { + return output.NewErrorf("VALIDATION_ERROR", output.ExitValidationError, + "--builder must be 'docker' or 'host' (got %q)", r.Builder) + } + if err := ValidateGradleTask(r.GradleTask); err != nil { + return output.NewErrorf("VALIDATION_ERROR", output.ExitValidationError, + "%s", err.Error()) + } + if err := ValidateGradleArgs(r.GradleArgs); err != nil { + return output.NewErrorf("VALIDATION_ERROR", output.ExitValidationError, + "%s", err.Error()) + } + if r.ArtifactKind == "image" { + if err := ValidateImageTag(r.ImageTag); err != nil { + return output.NewErrorf("VALIDATION_ERROR", output.ExitValidationError, + "%s", err.Error()) + } + } + for k := range r.Env { + if err := ValidateEnvKey(k); err != nil { + return output.NewErrorf("VALIDATION_ERROR", output.ExitValidationError, + "%s", err.Error()) + } + } + return nil +} + +// allowedEnvPassthrough collects env vars to forward into the build +// container. Two sources: +// +// 1. trond's invocation environment, filtered by the FR-019 +// allowlist (so the developer's `GRADLE_OPTS=-Xmx4g` reaches +// gradle even when not declared in intent). +// 2. The intent's `build.env: { KEY: VALUE }` map, also allowlisted. +// +// Intent values override host values on key collision (last writer +// wins in docker's `-e`). Output is sorted for reproducible argv. +func allowedEnvPassthrough(intent map[string]string) []string { + out := []string{} + for k := range envAllowlist { + if v, ok := os.LookupEnv(k); ok { + out = append(out, k+"="+v) + } + } + for _, e := range os.Environ() { + if !strings.HasPrefix(e, orgGradleProjectPrefix) { + continue + } + out = append(out, e) + } + keys := make([]string, 0, len(intent)) + for k := range intent { + keys = append(keys, k) + } + sort.Strings(keys) + for _, k := range keys { + if err := ValidateEnvKey(k); err != nil { + continue + } + out = append(out, k+"="+intent[k]) + } + sort.Strings(out) + return out +} + +func resultFromManifest(m *Manifest, hit bool, duration int64) *Result { + return &Result{ + CacheKey: m.CacheKey, + SourceRevision: m.SourceRevision, + Dirty: m.Dirty, + ArtifactKind: m.ArtifactKind, + ArtifactPath: m.ArtifactPath, + ImageTag: m.ImageTag, + SHA256: m.SHA256, + BuilderImage: m.BuilderImage, + JDKVersion: m.JDKVersion, + GradleTask: m.GradleTask, + Builder: m.Builder, + CacheHit: hit, + DurationMs: duration, + CreatedAt: m.CreatedAt, + } +} diff --git a/internal/build/builder_test.go b/internal/build/builder_test.go new file mode 100644 index 00000000..db80d6b6 --- /dev/null +++ b/internal/build/builder_test.go @@ -0,0 +1,340 @@ +package build + +import ( + "context" + "errors" + "os" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/tronprotocol/tron-deployment/internal/paths" +) + +// recordingRunner intercepts the docker invocation so we can assert +// on the exact argv shape without spinning up a real builder. It is +// the testing seam introduced by the runner.go refactor. +type recordingRunner struct { + called bool + resolved *resolved + outTmp string + // behavior knobs: + plantArtifact string // if non-empty, write this content as the .tmp on Run + returnErr error + delayBeforeRun time.Duration + respectCancel bool +} + +func (r *recordingRunner) RunDockerBuild(ctx context.Context, res *resolved, outDir, outTmp string) error { + r.called = true + r.resolved = res + r.outTmp = outTmp + + if r.delayBeforeRun > 0 { + select { + case <-time.After(r.delayBeforeRun): + case <-ctx.Done(): + if r.respectCancel { + return ctx.Err() + } + } + } + if r.returnErr != nil { + return r.returnErr + } + if r.plantArtifact != "" { + return os.WriteFile(outTmp, []byte(r.plantArtifact), 0o600) + } + return nil +} + +// withMockRunner swaps the package-level defaultRunner for the +// duration of one test. Restoration is registered via t.Cleanup so +// parallel-safe across the suite. +func withMockRunner(t *testing.T, mock dockerRunner) { + t.Helper() + orig := defaultRunner + defaultRunner = mock + t.Cleanup(func() { defaultRunner = orig }) +} + +func setupTestRepo(t *testing.T) string { + t.Helper() + return initGitRepo(t) // reuse the helper from source_test.go +} + +// makeValidJARBytes returns a tiny ZIP that ValidateJARMainClass will +// accept as a "java-tron" jar. Used to plant artifacts in tests +// without spinning up gradle. +func makeValidJARBytes(t *testing.T) []byte { + t.Helper() + path := makeJAR(t, "org.tron.program.FullNode") + data, err := os.ReadFile(path) + if err != nil { + t.Fatalf("read fixture jar: %v", err) + } + return data +} + +// TestRun_HappyPath asserts the full lifecycle end-to-end with a +// mock runner that "produces" a valid jar: cache miss → build → +// validate → finalize → manifest persisted → result populated. +func TestRun_HappyPath(t *testing.T) { + withTempBaseDir(t) + repo := setupTestRepo(t) + mock := &recordingRunner{plantArtifact: string(makeValidJARBytes(t))} + withMockRunner(t, mock) + + res, err := Run(context.Background(), Request{ + SourcePath: repo, + BuilderImageOverride: "test-image@sha256:abcdef1234567890", + }) + if err != nil { + t.Fatalf("Run: %v", err) + } + if !mock.called { + t.Error("docker runner was not invoked") + } + if res.CacheHit { + t.Error("first build should not be a cache hit") + } + if res.ArtifactKind != "jar" { + t.Errorf("artifact_kind = %q; want jar", res.ArtifactKind) + } + if !filepath.IsAbs(res.ArtifactPath) { + t.Errorf("artifact_path should be absolute; got %q", res.ArtifactPath) + } + if _, err := os.Stat(res.ArtifactPath); err != nil { + t.Errorf("artifact should exist on disk: %v", err) + } + if res.SHA256 == "" { + t.Error("sha256 not populated") + } +} + +// TestRun_CacheHit asserts a second run with the same inputs returns +// instantly without re-invoking the runner. +func TestRun_CacheHit(t *testing.T) { + withTempBaseDir(t) + repo := setupTestRepo(t) + + mock := &recordingRunner{plantArtifact: string(makeValidJARBytes(t))} + withMockRunner(t, mock) + + req := Request{ + SourcePath: repo, + BuilderImageOverride: "test-image@sha256:abcdef1234567890", + } + if _, err := Run(context.Background(), req); err != nil { + t.Fatalf("first Run: %v", err) + } + + // Second run: docker runner should NOT be invoked. + mock.called = false + res, err := Run(context.Background(), req) + if err != nil { + t.Fatalf("second Run: %v", err) + } + if mock.called { + t.Error("cache hit should skip the docker invocation") + } + if !res.CacheHit { + t.Error("second run should report cache_hit=true") + } +} + +// TestRun_BuildFailedSurfacesEnvelope asserts a runner error becomes +// a BUILD_FAILED structured error with the right exit code. +func TestRun_BuildFailedSurfacesEnvelope(t *testing.T) { + withTempBaseDir(t) + repo := setupTestRepo(t) + mock := &recordingRunner{returnErr: errors.New("gradle: compile error")} + withMockRunner(t, mock) + + _, err := Run(context.Background(), Request{ + SourcePath: repo, + BuilderImageOverride: "test-image@sha256:abcdef1234567890", + }) + if err == nil { + t.Fatal("expected BUILD_FAILED, got nil") + } + if !strings.Contains(err.Error(), "gradle build failed") { + t.Errorf("error %q should mention gradle build failed", err) + } +} + +// TestRun_SIGINTReportsCancelled is the FR-016 regression guard: +// cancelling the context mid-build surfaces as BUILD_CANCELLED with +// exit code 130 and partial output is cleaned up. +func TestRun_SIGINTReportsCancelled(t *testing.T) { + withTempBaseDir(t) + repo := setupTestRepo(t) + + mock := &recordingRunner{ + delayBeforeRun: 1 * time.Second, + respectCancel: true, + } + withMockRunner(t, mock) + + ctx, cancel := context.WithCancel(context.Background()) + go func() { + time.Sleep(50 * time.Millisecond) + cancel() + }() + + _, err := Run(ctx, Request{ + SourcePath: repo, + BuilderImageOverride: "test-image@sha256:abcdef1234567890", + }) + if err == nil { + t.Fatal("expected cancellation error") + } + if !strings.Contains(err.Error(), "cancelled") { + t.Errorf("error %q should mention cancelled", err) + } + + // Cleanup invariant: outTmp must not be left behind. We can + // inspect the cache dir directly. + outDir := filepath.Join(CacheDir(), "out") + entries, _ := os.ReadDir(outDir) + for _, e := range entries { + if strings.HasSuffix(e.Name(), ".tmp") { + t.Errorf("partial output %q not cleaned up after cancel", e.Name()) + } + } +} + +// TestRun_InvalidArtifactRejected covers the FR-011 path: a runner +// that produces a JAR whose Main-Class is wrong must be caught +// before declaring success. +func TestRun_InvalidArtifactRejected(t *testing.T) { + withTempBaseDir(t) + repo := setupTestRepo(t) + + // Plant a JAR with the wrong Main-Class. + mock := &recordingRunner{plantArtifact: makeJARContents(t, "com.example.WrongMain")} + withMockRunner(t, mock) + + _, err := Run(context.Background(), Request{ + SourcePath: repo, + BuilderImageOverride: "test-image@sha256:abcdef1234567890", + }) + if err == nil { + t.Fatal("expected INVALID_ARTIFACT") + } + if !strings.Contains(err.Error(), "java-tron node") { + t.Errorf("error %q should mention java-tron node", err) + } +} + +// TestArgvFormAssertion verifies the runner.go script is invoked +// argv-form: never via "bash -c '...interpolated...'". This is the +// regression guard for FR-022. The real runner builds the argv list; +// we test the assertion separately on the constructed args. +func TestArgvFormAssertion(t *testing.T) { + withTempBaseDir(t) + repo := setupTestRepo(t) + + // Use the real runner indirectly by inspecting the args it + // would build. We do this by capturing through a runner that + // reports the resolved struct and then constructs the same argv + // shape we'd expect (see runner.go). + var captured *resolved + withMockRunner(t, &capturingRunner{cb: func(r *resolved) { + captured = r + }}) + + req := Request{ + SourcePath: repo, + BuilderImageOverride: "test-image@sha256:abcdef1234567890", + GradleTask: "shadowJar", + GradleArgs: []string{"--offline", "-Dversion=1.2.3"}, + } + // Plant the artifact via a follow-up so Run doesn't fail post-runner. + go func() { + // no-op; capturingRunner writes nothing, so build fails after + // the script "ran" — that's OK, we only care it was invoked. + }() + _, _ = Run(context.Background(), req) + if captured == nil { + t.Fatal("runner was never invoked") + } + + // The contract we enforce here: gradle args are passed through + // AS-IS in their original argv shape. The runner is what stitches + // them into a docker exec.Command call without any shell + // interpolation; we trust that contract by reading runner.go + // (which uses a constant `dockerBuildScript` and appends user + // args via argv). + if captured.req.GradleTask != "shadowJar" { + t.Errorf("gradle_task lost in plumbing: %q", captured.req.GradleTask) + } + if len(captured.req.GradleArgs) != 2 { + t.Errorf("gradle_args length: got %d; want 2", len(captured.req.GradleArgs)) + } +} + +// TestDockerBuildScript_NoUserInputInterpolation enforces at the +// source level that the dockerBuildScript constant doesn't reference +// trond-side request fields by name. Command substitution `$(...)` of +// trond's own shell idioms (like `ls -S`) is OK — the danger is +// `${GRADLE_TASK}` or similar where user input would be interpolated +// rather than passed as argv. +// +// Static analysis through the test suite: if someone adds +// `${USER_INPUT}` into the script, this fails before reaching CI. +func TestDockerBuildScript_NoUserInputInterpolation(t *testing.T) { + // Names of Go-side fields that, if they leaked into the script, + // would indicate the FR-022 boundary was crossed. + forbidden := []string{ + "$GRADLE_TASK", "${GRADLE_TASK", + "$GRADLE_ARGS", "${GRADLE_ARGS", + "$REQUEST", "${REQUEST", + "$SOURCE_PATH", "${SOURCE_PATH", + "$IMAGE_TAG", "${IMAGE_TAG", + "eval ", // explicit eval is always wrong + } + for _, f := range forbidden { + if strings.Contains(dockerBuildScript, f) { + t.Errorf("dockerBuildScript contains forbidden pattern %q: must not interpolate user input", f) + } + } + // The script MUST use "$@" (argv passthrough) for gradle args. + if !strings.Contains(dockerBuildScript, `"$@"`) { + t.Error(`dockerBuildScript must forward gradle args via "$@" (argv expansion), not by string interpolation`) + } + // The script MUST reference OUT_NAME via env (the value is + // shell-safe regardless of contents). + if !strings.Contains(dockerBuildScript, "$OUT_NAME") { + t.Error("dockerBuildScript must use $OUT_NAME env var, not interpolated filename") + } +} + +// capturingRunner is a sibling of recordingRunner that lets the test +// inspect *resolved* without producing an artifact. +type capturingRunner struct { + cb func(*resolved) +} + +func (c *capturingRunner) RunDockerBuild(ctx context.Context, r *resolved, outDir, outTmp string) error { + c.cb(r) + // Return error so Run cleans up and exits — caller doesn't care + // about the result, only the captured resolved. + return errors.New("capturingRunner returned without artifact") +} + +// makeJARContents builds a tiny JAR with the given Main-Class and +// returns its raw bytes for inline planting in tests. +func makeJARContents(t *testing.T, mainClass string) string { + t.Helper() + path := makeJAR(t, mainClass) + data, err := os.ReadFile(path) + if err != nil { + t.Fatalf("read jar bytes: %v", err) + } + return string(data) +} + +// silence unused-paths import in some builds +var _ = paths.BaseDir diff --git a/internal/build/cache.go b/internal/build/cache.go new file mode 100644 index 00000000..e1469f9a --- /dev/null +++ b/internal/build/cache.go @@ -0,0 +1,75 @@ +package build + +import ( + "errors" + "fmt" + "os" + "path/filepath" + + "github.com/tronprotocol/tron-deployment/internal/paths" +) + +// CacheDir returns the root of the build cache: +// `${TROND_STATE_DIR}/builds`. Created lazily by callers via +// EnsureCacheDirs. +func CacheDir() string { + return filepath.Join(paths.BaseDir(), "builds") +} + +// EnsureCacheDirs creates the cache subdirectories. Idempotent. +func EnsureCacheDirs() error { + for _, sub := range []string{"out", "images", "manifest", "locks", "gradle"} { + p := filepath.Join(CacheDir(), sub) + if err := os.MkdirAll(p, 0o700); err != nil { + return fmt.Errorf("mkdir %s: %w", p, err) + } + } + return nil +} + +// manifestPath returns the manifest path for a cache key. +func manifestPath(key string) string { + return filepath.Join(CacheDir(), "manifest", key+".json") +} + +// Lookup checks whether a build matching key already exists and is +// still usable. Per FR-020 the manifest file's existence is necessary +// but not sufficient — we MUST also stat the artifact (jar or image +// metadata) that the manifest points at. A user who manually deleted +// a JAR shouldn't get a stale cache hit. +func Lookup(key CacheKey) (*CacheHit, error) { + mp := manifestPath(key.String()) + m, err := readManifest(mp) + if errors.Is(err, os.ErrNotExist) { + return &CacheHit{Hit: false}, nil + } + if err != nil { + return nil, err + } + // Verify the referenced artifact actually exists on disk (FR-020). + switch m.ArtifactKind { + case "jar": + if _, statErr := os.Stat(m.ArtifactPath); errors.Is(statErr, os.ErrNotExist) { + // Drop the orphan manifest. Next build re-creates everything. + _ = os.Remove(mp) + return &CacheHit{Hit: false}, nil + } else if statErr != nil { + return nil, fmt.Errorf("stat cached artifact: %w", statErr) + } + case "image": + // Image artifacts are tracked via images/.json. Phase 3 + // fills this in; for Phase 1 we treat missing as miss. + if _, statErr := os.Stat(filepath.Join(CacheDir(), "images", key.String()+".json")); errors.Is(statErr, os.ErrNotExist) { + _ = os.Remove(mp) + return &CacheHit{Hit: false}, nil + } + } + return &CacheHit{Hit: true, Manifest: m}, nil +} + +// Save persists a manifest to manifest/.json. Caller is +// responsible for atomicity vs. concurrent build callers — FR-015's +// flock already serializes around the same cache key. +func Save(m *Manifest) error { + return writeManifest(manifestPath(m.CacheKey), m) +} diff --git a/internal/build/cache_test.go b/internal/build/cache_test.go new file mode 100644 index 00000000..7debee4b --- /dev/null +++ b/internal/build/cache_test.go @@ -0,0 +1,133 @@ +package build + +import ( + "os" + "path/filepath" + "testing" + "time" + + "github.com/tronprotocol/tron-deployment/internal/paths" +) + +func withTempBaseDir(t *testing.T) string { + t.Helper() + dir := t.TempDir() + paths.SetBaseDir(dir) + t.Cleanup(func() { paths.SetBaseDir("") }) + return dir +} + +func TestEnsureCacheDirs(t *testing.T) { + base := withTempBaseDir(t) + if err := EnsureCacheDirs(); err != nil { + t.Fatalf("EnsureCacheDirs: %v", err) + } + for _, sub := range []string{"out", "images", "manifest", "locks", "gradle"} { + if _, err := os.Stat(filepath.Join(base, "builds", sub)); err != nil { + t.Errorf("expected %s/builds/%s to exist: %v", base, sub, err) + } + } +} + +func TestLookup_NoManifest(t *testing.T) { + withTempBaseDir(t) + if err := EnsureCacheDirs(); err != nil { + t.Fatalf("EnsureCacheDirs: %v", err) + } + hit, err := Lookup(CacheKey{ + GitRevision: "abc123def456789012345678901234567890abcd", + BuilderImageDigest: "sha256:aaaa", + JDKVersion: "8", + ArtifactKind: "jar", + GradleTask: "shadowJar", + }) + if err != nil { + t.Fatalf("Lookup: %v", err) + } + if hit.Hit { + t.Error("empty cache should report Hit=false") + } +} + +// TestLookup_StatsArtifact is the FR-020 regression guard: a manifest +// pointing at a missing JAR MUST be treated as a miss, and the +// orphan manifest MUST be removed. +func TestLookup_StatsArtifact(t *testing.T) { + withTempBaseDir(t) + if err := EnsureCacheDirs(); err != nil { + t.Fatalf("EnsureCacheDirs: %v", err) + } + + key := CacheKey{ + GitRevision: "abc123def456789012345678901234567890abcd", + BuilderImageDigest: "sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + JDKVersion: "8", + ArtifactKind: "jar", + GradleTask: "shadowJar", + } + // Write a manifest pointing at a non-existent file. + manifest := &Manifest{ + CacheKey: key.String(), + ArtifactKind: "jar", + ArtifactPath: "/definitely/does/not/exist.jar", + CreatedAt: time.Now().UTC(), + } + if err := Save(manifest); err != nil { + t.Fatalf("Save: %v", err) + } + + hit, err := Lookup(key) + if err != nil { + t.Fatalf("Lookup: %v", err) + } + if hit.Hit { + t.Fatal("missing artifact should produce a cache miss (FR-020)") + } + + // Manifest should be removed by the orphan cleanup. + mp := filepath.Join(CacheDir(), "manifest", key.String()+".json") + if _, err := os.Stat(mp); !os.IsNotExist(err) { + t.Errorf("orphan manifest should have been removed; stat err=%v", err) + } +} + +func TestLookup_HitWhenArtifactPresent(t *testing.T) { + base := withTempBaseDir(t) + if err := EnsureCacheDirs(); err != nil { + t.Fatalf("EnsureCacheDirs: %v", err) + } + + key := CacheKey{ + GitRevision: "abc123def456789012345678901234567890abcd", + BuilderImageDigest: "sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + JDKVersion: "8", + ArtifactKind: "jar", + GradleTask: "shadowJar", + } + artifactPath := filepath.Join(base, "builds", "out", key.String()+".jar") + if err := os.WriteFile(artifactPath, []byte("not a real jar"), 0o600); err != nil { + t.Fatalf("plant artifact: %v", err) + } + + manifest := &Manifest{ + CacheKey: key.String(), + ArtifactKind: "jar", + ArtifactPath: artifactPath, + CreatedAt: time.Now().UTC(), + } + if err := Save(manifest); err != nil { + t.Fatalf("Save: %v", err) + } + + hit, err := Lookup(key) + if err != nil { + t.Fatalf("Lookup: %v", err) + } + if !hit.Hit { + t.Fatal("manifest + artifact both present should be Hit=true") + } + if hit.Manifest.CacheKey != key.String() { + t.Errorf("hit manifest cache key mismatch: %q vs %q", + hit.Manifest.CacheKey, key.String()) + } +} diff --git a/internal/build/hash.go b/internal/build/hash.go new file mode 100644 index 00000000..9b37de95 --- /dev/null +++ b/internal/build/hash.go @@ -0,0 +1,25 @@ +package build + +import ( + "crypto/sha256" + "encoding/hex" + "fmt" + "io" + "os" +) + +// fileSHA256 returns the hex sha256 of a file's contents. Used to +// stamp the Manifest's SHA256 column (FR-004 wire shape) and for the +// SSH-target sha256 probe in Phase 4. +func fileSHA256(path string) (string, error) { + f, err := os.Open(path) + if err != nil { + return "", fmt.Errorf("open %s: %w", path, err) + } + defer f.Close() + h := sha256.New() + if _, err := io.Copy(h, f); err != nil { + return "", fmt.Errorf("hash %s: %w", path, err) + } + return hex.EncodeToString(h.Sum(nil)), nil +} diff --git a/internal/build/imagetag.go b/internal/build/imagetag.go new file mode 100644 index 00000000..848fb798 --- /dev/null +++ b/internal/build/imagetag.go @@ -0,0 +1,29 @@ +package build + +import ( + "fmt" + "regexp" +) + +// imageTagPattern is a pragmatic subset of Docker's reference format. +// Accepts `foo:bar`, `myorg/foo:bar`, `localhost/foo:bar`, optionally +// with a digest. Rejects whitespace, uppercase repo names, path +// traversal, and other inputs that would cause docker CLI confusion. +// +// Spec: docker.io's image reference grammar. This is the simplified +// regex used by the docker CLI for `docker tag` validation. +var imageTagPattern = regexp.MustCompile( + `^[a-z0-9]+(?:[._-][a-z0-9]+)*(?:/[a-z0-9]+(?:[._-][a-z0-9]+)*)*:[a-zA-Z0-9_][a-zA-Z0-9_.-]{0,127}$`, +) + +// ValidateImageTag enforces FR-005's image_tag check. Surface as +// VALIDATION_ERROR via the caller. +func ValidateImageTag(tag string) error { + if tag == "" { + return fmt.Errorf("image_tag is required when artifact = image") + } + if !imageTagPattern.MatchString(tag) { + return fmt.Errorf("invalid image_tag %q: must match Docker reference format :", tag) + } + return nil +} diff --git a/internal/build/integration_test.go b/internal/build/integration_test.go new file mode 100644 index 00000000..cc289afa --- /dev/null +++ b/internal/build/integration_test.go @@ -0,0 +1,175 @@ +//go:build integration + +// Integration test for the real docker-driven build pipeline. Runs +// `eclipse-temurin:8-jdk-jammy` against a tiny hello-world gradle +// project shipped under testdata/, asserts the produced JAR is +// structurally valid + the cache + audit log entries are populated. +// +// Skipped by `go test ./...`. To run: +// +// go test -tags=integration -timeout 5m -count=1 ./internal/build/... +// +// Requires docker, network access (first run pulls ~300 MB JDK image). +package build + +import ( + "context" + "os" + "os/exec" + "path/filepath" + "testing" + "time" +) + +// TestBuild_RealGradleEndToEnd asserts trond drives a real gradle +// build to completion against a minimal source fixture. The test is +// behind the `integration` build tag so it does not gate unit-test +// CI; nightly / release CI flips it on. +func TestBuild_RealGradleEndToEnd(t *testing.T) { + if _, err := exec.LookPath("docker"); err != nil { + t.Skip("docker not available") + } + if _, err := exec.LookPath("git"); err != nil { + t.Skip("git not available") + } + + repo := setupIntegrationRepo(t) + withTempBaseDir(t) + + // We need a real builder image digest. Resolve at test time so + // this test doesn't depend on the placeholder pin file. + override := resolveBuilderImage(t, "eclipse-temurin:8-jdk-jammy") + + ctx, cancel := context.WithTimeout(context.Background(), 4*time.Minute) + defer cancel() + + res, err := Run(ctx, Request{ + SourcePath: repo, + BuilderImageOverride: override, + // The fixture uses the shadow plugin and produces + // build/libs/*-all.jar; gradle_task=shadowJar is the default + // per our Phase 1 wiring. + }) + if err != nil { + t.Fatalf("Run failed: %v", err) + } + if res.ArtifactPath == "" { + t.Fatal("artifact_path is empty in success result") + } + if _, statErr := os.Stat(res.ArtifactPath); statErr != nil { + t.Fatalf("artifact %s missing: %v", res.ArtifactPath, statErr) + } + + // Second run should be a cache hit (no docker invocation). + res2, err := Run(ctx, Request{ + SourcePath: repo, + BuilderImageOverride: override, + }) + if err != nil { + t.Fatalf("second Run: %v", err) + } + if !res2.CacheHit { + t.Error("identical inputs should produce a cache hit") + } +} + +// setupIntegrationRepo writes a minimal gradle project (tiny enough +// that the test runs in well under a minute on a warm cache) that +// produces a JAR whose Main-Class matches org.tron.program.FullNode. +// Note we don't bring in real java-tron — that would require ~hundreds +// of MB of dependencies. This fixture is JUST enough to drive the +// shadowJar plugin and trip trond's validator on success. +func setupIntegrationRepo(t *testing.T) string { + t.Helper() + dir := t.TempDir() + + files := map[string]string{ + "settings.gradle": `rootProject.name = 'fullnode-fixture' +`, + "build.gradle": `plugins { + id 'java' + id 'com.github.johnrengelman.shadow' version '7.1.2' +} +group = 'org.tron' +version = '0.0.0-fixture' + +repositories { + mavenCentral() +} + +jar { + manifest { + attributes 'Main-Class': 'org.tron.program.FullNode' + } +} +shadowJar { + archiveClassifier = '' + archiveBaseName = 'FullNode' +} +`, + "src/main/java/org/tron/program/FullNode.java": `package org.tron.program; +public class FullNode { + public static void main(String[] args) { + System.out.println("trond integration fixture"); + } +} +`, + } + + for rel, content := range files { + path := filepath.Join(dir, rel) + if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { + t.Fatalf("mkdir %s: %v", path, err) + } + if err := os.WriteFile(path, []byte(content), 0o644); err != nil { + t.Fatalf("write %s: %v", rel, err) + } + } + + // Drop in a gradle wrapper. Cheap approach: copy from the host's + // gradle install if available; otherwise skip — most CI envs have + // one. We don't bake a wrapper jar in-repo (size). + if _, err := exec.LookPath("gradle"); err == nil { + out, err := exec.Command("gradle", "-p", dir, "wrapper", "--gradle-version=7.6").CombinedOutput() + if err != nil { + t.Logf("gradle wrapper init failed (non-fatal):\n%s", out) + } + } + + // Initialise as a git repo so trond's source.Resolve has a HEAD. + for _, args := range [][]string{ + {"init", "-q"}, + {"config", "user.email", "integration@trond"}, + {"config", "user.name", "integration"}, + {"config", "commit.gpgsign", "false"}, + {"add", "."}, + {"commit", "-q", "-m", "fixture"}, + } { + cmd := exec.Command("git", append([]string{"-C", dir}, args...)...) + if out, err := cmd.CombinedOutput(); err != nil { + t.Fatalf("git %v: %v\n%s", args, err, out) + } + } + return dir +} + +// resolveBuilderImage runs `docker pull` to surface the real RepoDigest +// for a tag, so the integration test threads a real +// `@sha256:...` into the cache key. Failing here marks the test +// as Skip, not Fail, so air-gapped CI hosts don't fail the suite. +func resolveBuilderImage(t *testing.T, tag string) string { + t.Helper() + if out, err := exec.Command("docker", "pull", tag).CombinedOutput(); err != nil { + t.Skipf("cannot pull %s (offline?): %v\n%s", tag, err, out) + } + out, err := exec.Command("docker", "inspect", "--format={{ index .RepoDigests 0 }}", tag).Output() + if err != nil { + t.Skipf("docker inspect %s: %v", tag, err) + } + digestRef := string(out) + // Trim trailing newline. + for len(digestRef) > 0 && (digestRef[len(digestRef)-1] == '\n' || digestRef[len(digestRef)-1] == '\r') { + digestRef = digestRef[:len(digestRef)-1] + } + return digestRef +} diff --git a/internal/build/key.go b/internal/build/key.go new file mode 100644 index 00000000..7e6d8839 --- /dev/null +++ b/internal/build/key.go @@ -0,0 +1,118 @@ +package build + +import ( + "crypto/sha256" + "encoding/hex" + "fmt" + "sort" + "strings" +) + +// CacheKey is content-addressed by every input that can change the +// build output. The on-disk name comes from String(); FR-002 pins +// the format. +// +// Including BuilderImageDigest (FR-002 pass 2) means a trond release +// that bumps the pinned JDK image automatically invalidates every +// cached artifact — no manual `build prune` needed. Including +// GradleArgs (also pass 2) means `--offline` builds don't collide +// with networked builds. +type CacheKey struct { + GitRevision string // full sha + PatchHash string // sha256 hex if dirty, else "" + BuilderImageDigest string // "sha256:abc..." or override ref + JDKVersion string + ArtifactKind string // "jar" | "image" + GradleTask string + GradleArgs []string // already validated by ValidateGradleArgs +} + +// String produces the on-disk cache key: +// +// -b[+dirty-][-x] +// +// Lengths are sized so cosmic-ray collisions stay implausible across +// a single user's cache (rarely > 10k entries): +// +// - 12 hex git prefix → 48 bits, matches git's --abbrev=12 default +// - 8 hex digest prefix → 32 bits; bumping a pin gives a distinct key +// - 8 hex patch prefix → 32 bits; per-dirty-edit variant key +// - 8 hex extra prefix → 32 bits; folds non-default JDK/task/args +// +// Examples: +// +// 8f4e2a3c1234-bd4e2a1c +// 8f4e2a3c1234-bd4e2a1c+dirty-7f2a3b9c +// 8f4e2a3c1234-bd4e2a1c-xa1b2c3d4 +func (k CacheKey) String() string { + if k.GitRevision == "" { + // Cache key invariant; let the caller surface the error. + return "INVALID" + } + d := k.digestPrefix() + base := fmt.Sprintf("%s-b%s", k.GitRevision[:short(k.GitRevision, 12)], d) + if k.PatchHash != "" { + base = fmt.Sprintf("%s+dirty-%s", base, k.PatchHash[:short(k.PatchHash, 8)]) + } + // Fold ArtifactKind / JDKVersion / GradleTask / GradleArgs into a + // content hash appended only when one of them differs from the + // natural default. Avoids cluttering the typical case. + if extra := k.extraFold(); extra != "" { + base = fmt.Sprintf("%s-x%s", base, extra) + } + return base +} + +// extraFold returns "" when all build-shape inputs are the natural +// default (jdk=8, artifact=jar, gradle_task=shadowJar, no args). +// Otherwise it returns a short hash so different shapes don't collide. +func (k CacheKey) extraFold() string { + jdk := k.JDKVersion + if jdk == "" { + jdk = "8" + } + kind := k.ArtifactKind + if kind == "" { + kind = "jar" + } + task := k.GradleTask + if task == "" { + switch kind { + case "jar": + task = "shadowJar" + case "image": + task = "dockerBuild" + } + } + args := append([]string(nil), k.GradleArgs...) + sort.Strings(args) + if jdk == "8" && kind == "jar" && task == "shadowJar" && len(args) == 0 { + return "" + } + h := sha256.New() + fmt.Fprintf(h, "jdk=%s\nkind=%s\ntask=%s\nargs=%s\n", + jdk, kind, task, strings.Join(args, "\x00")) + return hex.EncodeToString(h.Sum(nil))[:8] +} + +// digestPrefix extracts the 8-hex-char "build identity" portion of +// BuilderImageDigest. For canonical pinned digests +// (`sha256:abc...`) it's the prefix of the sha. For overrides (an +// arbitrary ref@digest string) we hash the whole string so the cache +// key still differs from any pinned build. +func (k CacheKey) digestPrefix() string { + d := k.BuilderImageDigest + if strings.HasPrefix(d, "sha256:") { + return d[7:short(d, 15)] + } + // Override path: hash the whole thing for stable prefixing. + h := sha256.Sum256([]byte(d)) + return hex.EncodeToString(h[:])[:8] +} + +func short(s string, n int) int { + if len(s) < n { + return len(s) + } + return n +} diff --git a/internal/build/key_test.go b/internal/build/key_test.go new file mode 100644 index 00000000..c0684c7d --- /dev/null +++ b/internal/build/key_test.go @@ -0,0 +1,97 @@ +package build + +import ( + "strings" + "testing" +) + +// TestCacheKey_NamingShape pins the on-disk naming. Schema clients +// (FR-002, schemas/output/build.schema.json) rely on the pattern. +func TestCacheKey_NamingShape(t *testing.T) { + k := CacheKey{ + GitRevision: "8f4e2a3c1234567890abcdef1234567890abcdef", + BuilderImageDigest: "sha256:d4e2a1abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234", + JDKVersion: "8", + ArtifactKind: "jar", + GradleTask: "shadowJar", + } + got := k.String() + // 12-char git prefix + `-b` + 8-char digest prefix; no `+dirty` + // because PatchHash empty; no `-x` because all defaults. + want := "8f4e2a3c1234-bd4e2a1ab" + if got != want { + t.Errorf("CacheKey.String() = %q; want %q", got, want) + } +} + +// TestCacheKey_Dirty asserts the `+dirty-` suffix shows up +// when a dirty patch hash is present. +func TestCacheKey_Dirty(t *testing.T) { + k := CacheKey{ + GitRevision: "8f4e2a3c1234567890abcdef1234567890abcdef", + PatchHash: "7f2a3b9c12345678", + BuilderImageDigest: "sha256:d4e2a1abcdef", + JDKVersion: "8", + ArtifactKind: "jar", + GradleTask: "shadowJar", + } + got := k.String() + if !strings.Contains(got, "+dirty-7f2a3b9c") { + t.Errorf("dirty cache key %q should contain +dirty-7f2a3b9c", got) + } +} + +// TestCacheKey_BuilderDigestChangesKey is the regression guard for +// FR-002 pass 2: bumping the pinned JDK image MUST invalidate prior +// cache entries. Two otherwise-identical keys with different builder +// digests must produce different on-disk names. +func TestCacheKey_BuilderDigestChangesKey(t *testing.T) { + base := CacheKey{ + GitRevision: "8f4e2a3c1234567890abcdef1234567890abcdef", + BuilderImageDigest: "sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + JDKVersion: "8", + ArtifactKind: "jar", + GradleTask: "shadowJar", + } + other := base + other.BuilderImageDigest = "sha256:bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" + if base.String() == other.String() { + t.Fatal("different builder digests must produce different cache keys") + } +} + +// TestCacheKey_GradleArgsChangesKey asserts the args participate +// (FR-002 pass 2): `--offline` builds shouldn't collide with +// networked builds. +func TestCacheKey_GradleArgsChangesKey(t *testing.T) { + base := CacheKey{ + GitRevision: "8f4e2a3c1234567890abcdef1234567890abcdef", + BuilderImageDigest: "sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + JDKVersion: "8", + ArtifactKind: "jar", + GradleTask: "shadowJar", + } + other := base + other.GradleArgs = []string{"--offline"} + if base.String() == other.String() { + t.Fatal("different gradle args must produce different cache keys") + } +} + +// TestCacheKey_OverrideDigestStable asserts that an override path +// (--builder-image-override) still produces a stable, deterministic +// 6-char prefix in the cache key. +func TestCacheKey_OverrideDigestStable(t *testing.T) { + k := CacheKey{ + GitRevision: "8f4e2a3c1234567890abcdef1234567890abcdef", + BuilderImageDigest: "myreg.example/temurin:8@sha256:deadbeef", + JDKVersion: "8", + ArtifactKind: "jar", + GradleTask: "shadowJar", + } + first := k.String() + second := k.String() + if first != second { + t.Fatalf("override cache key not deterministic: %q vs %q", first, second) + } +} diff --git a/internal/build/lock_posix.go b/internal/build/lock_posix.go new file mode 100644 index 00000000..f28f68ad --- /dev/null +++ b/internal/build/lock_posix.go @@ -0,0 +1,39 @@ +//go:build !windows + +package build + +import ( + "fmt" + "os" + "path/filepath" + "syscall" +) + +// AcquireCacheLock serializes concurrent `trond build` invocations +// against the same cache key (FR-015). The flock is held for the +// caller's lifetime; release() drops it. +// +// Posix path: an exclusive lock on a file under +// `/locks/.lock`. Other processes calling +// AcquireCacheLock with the same key block until we Release. +func AcquireCacheLock(cacheDir, key string) (release func(), err error) { + if err := os.MkdirAll(filepath.Join(cacheDir, "locks"), 0o700); err != nil { + return nil, fmt.Errorf("create locks dir: %w", err) + } + lockPath := filepath.Join(cacheDir, "locks", key+".lock") + f, err := os.OpenFile(lockPath, os.O_CREATE|os.O_RDWR, 0o600) + if err != nil { + return nil, fmt.Errorf("open lock file: %w", err) + } + if err := syscall.Flock(int(f.Fd()), syscall.LOCK_EX); err != nil { + f.Close() + return nil, fmt.Errorf("flock LOCK_EX: %w", err) + } + return func() { + // Best-effort release on the way out. Unlock errors are + // recoverable noise (the kernel drops the lock when the fd + // closes anyway). + _ = syscall.Flock(int(f.Fd()), syscall.LOCK_UN) + _ = f.Close() + }, nil +} diff --git a/internal/build/lock_test.go b/internal/build/lock_test.go new file mode 100644 index 00000000..ced9efc2 --- /dev/null +++ b/internal/build/lock_test.go @@ -0,0 +1,48 @@ +//go:build !windows + +package build + +import ( + "path/filepath" + "testing" + "time" +) + +// TestAcquireCacheLock_Serializes asserts that two goroutines holding +// the lock for the same key serialize — the second one observes the +// first has released before it acquires. POSIX-only; Windows uses an +// in-process mutex with the same observable behavior (separate test). +func TestAcquireCacheLock_Serializes(t *testing.T) { + cacheDir := filepath.Join(t.TempDir(), "builds") + + const key = "test-key-abc" + rel1, err := AcquireCacheLock(cacheDir, key) + if err != nil { + t.Fatalf("first acquire: %v", err) + } + + acquired := make(chan time.Time, 1) + go func() { + rel2, err := AcquireCacheLock(cacheDir, key) + if err != nil { + t.Errorf("second acquire: %v", err) + return + } + acquired <- time.Now() + rel2() + }() + + // Hold the lock for a bit, then release. + time.Sleep(150 * time.Millisecond) + releaseAt := time.Now() + rel1() + + select { + case t2 := <-acquired: + if t2.Before(releaseAt) { + t.Errorf("second acquire happened before first release (lock not held): %v vs %v", t2, releaseAt) + } + case <-time.After(2 * time.Second): + t.Fatal("second goroutine never acquired the lock") + } +} diff --git a/internal/build/lock_windows.go b/internal/build/lock_windows.go new file mode 100644 index 00000000..ec846cc1 --- /dev/null +++ b/internal/build/lock_windows.go @@ -0,0 +1,35 @@ +//go:build windows + +package build + +import "sync" + +// AcquireCacheLock on Windows falls back to an in-process mutex only. +// `syscall.Flock` is POSIX-specific; trond's docker-builder path is +// already de-facto Unix-only (`docker.sock`, gradle wrapper assumes +// bash). Cross-process serialization on Windows is undefined (FR-015 +// caveat) and documented in AGENTS.md. +// +// Two trond processes on the same Windows host racing the same cache +// key may both build; the cache key + content-addressed naming means +// they produce identical artifacts (the loser wastes CPU). Not a +// correctness bug; just inefficient. +var windowsCacheMu = struct { + sync.Mutex + keys map[string]*sync.Mutex +}{keys: map[string]*sync.Mutex{}} + +func AcquireCacheLock(_, key string) (release func(), err error) { + windowsCacheMu.Lock() + m, ok := windowsCacheMu.keys[key] + if !ok { + m = &sync.Mutex{} + windowsCacheMu.keys[key] = m + } + windowsCacheMu.Unlock() + + m.Lock() + return func() { + m.Unlock() + }, nil +} diff --git a/internal/build/manifest.go b/internal/build/manifest.go new file mode 100644 index 00000000..61d91610 --- /dev/null +++ b/internal/build/manifest.go @@ -0,0 +1,67 @@ +package build + +import ( + "encoding/json" + "fmt" + "os" + "time" +) + +// Manifest is the JSON record persisted for every completed build. +// One file per cache key under `/manifest/.json`. +// +// Output schema: schemas/output/build.schema.json. +type Manifest struct { + CacheKey string `json:"cache_key"` + SourcePath string `json:"source_path"` + SourceRevision string `json:"source_revision"` + PatchHash string `json:"patch_hash,omitempty"` + Dirty bool `json:"dirty"` + BuilderImage string `json:"builder_image"` + BuilderImageDigest string `json:"builder_image_digest"` + JDKVersion string `json:"jdk_version"` + ArtifactKind string `json:"artifact_kind"` // "jar" | "image" + ArtifactPath string `json:"artifact_path,omitempty"` // for jar + ImageTag string `json:"image_tag,omitempty"` // for image + ImageID string `json:"image_id,omitempty"` // for image + SHA256 string `json:"sha256,omitempty"` // for jar + GradleTask string `json:"gradle_task"` + GradleArgs []string `json:"gradle_args,omitempty"` + Builder string `json:"builder"` // "docker" | "host" + DurationMs int64 `json:"duration_ms"` + CreatedAt time.Time `json:"created_at"` +} + +// CacheHit is the body returned to callers when a previous build +// satisfies the request. The boolean is hoisted from inside Manifest +// so the caller's tooling (cmd/build.go, MCP tool, apply pipeline) +// can branch on it without reading the manifest first. +type CacheHit struct { + Hit bool `json:"cache_hit"` + Manifest *Manifest `json:"manifest,omitempty"` +} + +// readManifest decodes a JSON manifest file. Returns os.ErrNotExist +// when the file is absent so callers can treat that as a miss. +func readManifest(path string) (*Manifest, error) { + data, err := os.ReadFile(path) + if err != nil { + return nil, err + } + var m Manifest + if err := json.Unmarshal(data, &m); err != nil { + return nil, fmt.Errorf("decode manifest %s: %w", path, err) + } + return &m, nil +} + +func writeManifest(path string, m *Manifest) error { + data, err := json.MarshalIndent(m, "", " ") + if err != nil { + return fmt.Errorf("encode manifest: %w", err) + } + if err := os.WriteFile(path, data, 0o600); err != nil { + return fmt.Errorf("write manifest %s: %w", path, err) + } + return nil +} diff --git a/internal/build/pins/builder_image_digests.json b/internal/build/pins/builder_image_digests.json new file mode 100644 index 00000000..9a922d67 --- /dev/null +++ b/internal/build/pins/builder_image_digests.json @@ -0,0 +1,26 @@ +{ + "$comment": "Pinned digests for builder images. Bumped per trond release via `make refresh-builder-pins`. Each entry resolves jdk_version -> reference (canonical name@sha256:...). Cache key (FR-002) incorporates the digest so pin bumps invalidate stale artifacts.", + "schema_version": "1.0.0", + "pins": { + "8": { + "ref": "eclipse-temurin:8-jdk-jammy", + "digest": "sha256:0000000000000000000000000000000000000000000000000000000000000000", + "$comment": "PLACEHOLDER — refresh via `make refresh-builder-pins` before first real build" + }, + "11": { + "ref": "eclipse-temurin:11-jdk-jammy", + "digest": "sha256:0000000000000000000000000000000000000000000000000000000000000000", + "$comment": "PLACEHOLDER" + }, + "17": { + "ref": "eclipse-temurin:17-jdk-jammy", + "digest": "sha256:0000000000000000000000000000000000000000000000000000000000000000", + "$comment": "PLACEHOLDER" + }, + "21": { + "ref": "eclipse-temurin:21-jdk-jammy", + "digest": "sha256:0000000000000000000000000000000000000000000000000000000000000000", + "$comment": "PLACEHOLDER" + } + } +} diff --git a/internal/build/pins/pins.go b/internal/build/pins/pins.go new file mode 100644 index 00000000..61b342b8 --- /dev/null +++ b/internal/build/pins/pins.go @@ -0,0 +1,75 @@ +// Package pins holds the trond release's pinned set of builder image +// digests, embedded into the binary so a deployed trond is the source +// of truth (no out-of-band JSON file the user could de-sync). +// +// Bump policy: a Makefile target `refresh-builder-pins` re-resolves +// Eclipse Temurin tags to current digests and rewrites the JSON. The +// regeneration happens at trond release-prep time, not at runtime. +package pins + +import ( + _ "embed" + "encoding/json" + "fmt" +) + +//go:embed builder_image_digests.json +var embeddedJSON []byte + +// PinEntry is one row in the pin file — the ref name plus the +// content-addressed digest. The cache key (FR-002) consumes Digest. +type PinEntry struct { + Ref string `json:"ref"` + Digest string `json:"digest"` +} + +type pinFile struct { + SchemaVersion string `json:"schema_version"` + Pins map[string]PinEntry `json:"pins"` +} + +var parsed = func() pinFile { + var pf pinFile + if err := json.Unmarshal(embeddedJSON, &pf); err != nil { + // Build-time defect — the file is in our control, panicking + // here just turns a corrupt JSON into a fast failure. + panic("builder_image_digests.json is malformed: " + err.Error()) + } + return pf +}() + +// Resolve returns the canonical image reference (e.g. +// `eclipse-temurin:8-jdk-jammy@sha256:abc...`) for a given JDK +// version string ("8", "11", "17", "21"). Returns +// (ref, digest, true) on hit. Caller threads the digest into the +// cache key. +// +// If override is non-empty, it replaces the entire ref. The override +// path is documented in AGENTS.md as an escape hatch (FR-024) and +// participates in the cache key so pinned and overridden builds don't +// collide. +func Resolve(jdkVersion string, override string) (ref string, digest string, ok bool) { + if override != "" { + // Override must already include the digest portion. Caller is + // responsible for that — pins.go just lets it through. The + // "digest" reported back to the cache key is the override + // itself, so any bump in the override automatically changes + // the cache key. + return override, override, true + } + entry, hit := parsed.Pins[jdkVersion] + if !hit { + return "", "", false + } + return fmt.Sprintf("%s@%s", entry.Ref, entry.Digest), entry.Digest, true +} + +// Versions returns the list of JDK versions for which a pin exists, +// for diagnostic surfaces (preflight, error messages). +func Versions() []string { + out := make([]string, 0, len(parsed.Pins)) + for v := range parsed.Pins { + out = append(out, v) + } + return out +} diff --git a/internal/build/pins/pins_test.go b/internal/build/pins/pins_test.go new file mode 100644 index 00000000..90ef17ef --- /dev/null +++ b/internal/build/pins/pins_test.go @@ -0,0 +1,69 @@ +package pins + +import ( + "strings" + "testing" +) + +// TestResolve_PinnedHit asserts a known JDK version resolves to a +// canonical `@` and reports the digest separately for +// cache-key inclusion (FR-002). +func TestResolve_PinnedHit(t *testing.T) { + ref, digest, ok := Resolve("8", "") + if !ok { + t.Fatal("expected JDK 8 pin to exist; got miss") + } + if !strings.Contains(ref, "@sha256:") { + t.Errorf("ref %q must include @sha256: portion (canonical form)", ref) + } + if !strings.HasPrefix(digest, "sha256:") { + t.Errorf("digest %q must start with sha256:", digest) + } +} + +// TestResolve_UnknownJDK asserts an unsupported JDK version reports +// a miss rather than a fallback. +func TestResolve_UnknownJDK(t *testing.T) { + _, _, ok := Resolve("99", "") + if ok { + t.Error("expected JDK 99 to be unknown; got hit") + } +} + +// TestResolve_Override threads --builder-image-override through. The +// returned digest must equal the override itself (FR-024) so changes +// in override participate in the cache key. +func TestResolve_Override(t *testing.T) { + override := "myregistry.example/temurin:8-jdk@sha256:" + strings.Repeat("a", 64) + ref, digest, ok := Resolve("8", override) + if !ok { + t.Fatal("expected override to be accepted") + } + if ref != override { + t.Errorf("ref = %q; want override %q", ref, override) + } + if digest != override { + t.Errorf("override should be reported as the cache digest; got %q", digest) + } +} + +// TestVersions surfaces the discoverable pin set for preflight / +// error messages. +func TestVersions(t *testing.T) { + got := Versions() + if len(got) == 0 { + t.Fatal("Versions() returned empty") + } + wantAny := []string{"8", "11", "17", "21"} + hit := false + for _, w := range wantAny { + for _, g := range got { + if g == w { + hit = true + } + } + } + if !hit { + t.Errorf("expected at least one of %v in pin versions; got %v", wantAny, got) + } +} diff --git a/internal/build/runner.go b/internal/build/runner.go new file mode 100644 index 00000000..2bf889ae --- /dev/null +++ b/internal/build/runner.go @@ -0,0 +1,91 @@ +package build + +import ( + "context" + "fmt" + "os" + "os/exec" + "path/filepath" +) + +// dockerRunner abstracts the "run a docker command" step so tests can +// substitute a recorder/mock without spinning up a real Docker +// daemon. Production wiring is the exec-based realDockerRunner. +// +// The interface intentionally accepts the full argv (not pieces) — +// tests assert on that argv to enforce FR-022's argv-only invocation +// contract (no `bash -c "...interpolated..."`). +type dockerRunner interface { + RunDockerBuild(ctx context.Context, r *resolved, outDir, outTmp string) error +} + +// defaultRunner is package-level so tests can swap it via +// `t.Cleanup(func() { defaultRunner = orig })`. Production uses +// realDockerRunner which shells out to the docker CLI. +var defaultRunner dockerRunner = realDockerRunner{} + +// dockerBuildScript is the only piece of shell trond runs and it's +// a compile-time constant. User input (gradle_task, gradle_args) +// arrives through `"$@"` argv expansion AFTER `--`; output filename +// arrives through an env var. Both channels are shell-quote-safe +// independent of their contents. FR-022 holds: no path from user +// input to shell metacharacter interpretation. +// +// Why bash and not argv? Because the build produces files inside the +// container's /src tree (gradle writes to build/libs/*.jar) and we +// need to *move* them into /out so they survive container teardown. +// A `./gradlew` argv invocation alone leaves the artifact in the +// container's ephemeral layer. +// +// `ls -S` sorts by size, so `head -n1` picks the largest jar — for +// the shadow plugin that's the fat jar with dependencies; thin jars +// (if also emitted) are smaller. ValidateJARMainClass rejects any +// non-FullNode jar that wins this heuristic. +const dockerBuildScript = `set -e +cd /src +./gradlew "$@" +JAR=$(ls -S build/libs/*.jar 2>/dev/null | head -n1) +if [ -z "$JAR" ]; then + echo "trond: gradle produced no .jar in build/libs/" >&2 + exit 64 +fi +cp "$JAR" "/out/$OUT_NAME" +` + +type realDockerRunner struct{} + +func (realDockerRunner) RunDockerBuild(ctx context.Context, r *resolved, outDir, outTmp string) error { + if r.req.Builder == "host" { + return fmt.Errorf("--builder host not implemented in Phase 1 (use docker)") + } + + gradleCache := filepath.Join(CacheDir(), "gradle") + + args := []string{ + "run", "--rm", + // /src must be RW because gradle writes build/, .gradle/ into + // the project tree (same as running ./gradlew on the host). + // The user already gives gradle this access locally. + "-v", r.src.Path + ":/src:rw", + "-v", gradleCache + ":/root/.gradle", + "-v", outDir + ":/out:rw", + "--workdir", "/src", + // Output filename passed through env, not interpolated into + // the script (FR-022 defense in depth). + "-e", "OUT_NAME=" + filepath.Base(outTmp), + } + for _, e := range allowedEnvPassthrough(r.req.Env) { + args = append(args, "-e", e) + } + args = append(args, r.imageRef, "bash", "-c", dockerBuildScript, "--") + args = append(args, r.req.GradleTask) + args = append(args, r.req.GradleArgs...) + + cmd := exec.CommandContext(ctx, "docker", args...) + // In `-o json` mode the caller redirects stdout to a JSON buffer; + // gradle's chatter belongs on stderr regardless of trond's output + // mode so it never corrupts the JSON envelope. + cmd.Stdout = os.Stderr + cmd.Stderr = os.Stderr + return cmd.Run() +} diff --git a/internal/build/source.go b/internal/build/source.go new file mode 100644 index 00000000..b27e5aa8 --- /dev/null +++ b/internal/build/source.go @@ -0,0 +1,100 @@ +package build + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "fmt" + "os/exec" + "path/filepath" + "strings" +) + +// Source describes a java-tron checkout to build. +// +// The caller passes Path + RevisionSpec; trond fills in +// ResolvedRevision, DirtyState, PatchHash by shelling out to git. +type Source struct { + Path string // canonicalized abs path (FR-021) + RevisionSpec string // "HEAD" | branch | tag | sha + ResolvedRevision string // full sha after git rev-parse + DirtyState bool + PatchHash string // sha256 prefix; FR-002 (combines diff + status) +} + +// Resolve fills ResolvedRevision, DirtyState, and PatchHash by +// running git commands inside s.Path. trond shells out to /usr/bin/git +// rather than depending on go-git so the binary stays small and +// behaviour matches the user's installed git exactly. +func (s *Source) Resolve(ctx context.Context) error { + abs, err := filepath.Abs(s.Path) + if err != nil { + return fmt.Errorf("canonicalize source path: %w", err) + } + s.Path = abs + + rev, err := s.runGit(ctx, "rev-parse", s.RevisionSpec) + if err != nil { + return fmt.Errorf("resolve revision %q: %w", s.RevisionSpec, err) + } + s.ResolvedRevision = strings.TrimSpace(rev) + + // "HEAD" is the only spec where local dirty state is meaningful — + // for an explicit branch/tag/sha the user asked for *that* tree, + // dirty local edits don't change which artifact they want. + if s.RevisionSpec == "HEAD" { + dirty, patch, err := s.computeDirty(ctx) + if err != nil { + return fmt.Errorf("dirty detection: %w", err) + } + s.DirtyState = dirty + s.PatchHash = patch + } + return nil +} + +// computeDirty returns (true, patchHash) if the working tree differs +// from HEAD in any way trond cares about. Per FR-002 the patch hash +// MUST include untracked files (regression bug found in pass 1 of the +// design review) — `git diff` alone misses brand-new files. We hash +// the concatenation of: +// +// git diff HEAD (tracked + staged + unstaged) +// git status --porcelain -uall (untracked files + modes) +func (s *Source) computeDirty(ctx context.Context) (bool, string, error) { + diff, err := s.runGit(ctx, "diff", "HEAD") + if err != nil { + return false, "", fmt.Errorf("git diff HEAD: %w", err) + } + status, err := s.runGit(ctx, "status", "--porcelain", "-uall") + if err != nil { + return false, "", fmt.Errorf("git status --porcelain -uall: %w", err) + } + if diff == "" && status == "" { + return false, "", nil + } + h := sha256.New() + h.Write([]byte(diff)) + // Use NUL as a separator the user can't legitimately produce in + // either stream — defends against the (unlikely) case where the + // two streams' concatenation happens to collide with another + // combination. + h.Write([]byte{0}) + h.Write([]byte(status)) + return true, hex.EncodeToString(h.Sum(nil)), nil +} + +func (s *Source) runGit(ctx context.Context, args ...string) (string, error) { + cmd := exec.CommandContext(ctx, "git", append([]string{"-C", s.Path}, args...)...) + out, err := cmd.Output() + if err != nil { + // Surface the stderr tail so the user sees git's complaint. + if ee, ok := err.(*exec.ExitError); ok { + return "", fmt.Errorf("git %s: %s", + strings.Join(args, " "), + strings.TrimSpace(string(ee.Stderr))) + } + return "", err + } + return string(out), nil +} diff --git a/internal/build/source_test.go b/internal/build/source_test.go new file mode 100644 index 00000000..87972b27 --- /dev/null +++ b/internal/build/source_test.go @@ -0,0 +1,106 @@ +package build + +import ( + "context" + "os" + "os/exec" + "path/filepath" + "testing" +) + +// initGitRepo creates a fresh git repository with one commit and +// returns its absolute path. The fixture is intentionally tiny — +// these tests exercise trond's git wrapper, not gradle. +func initGitRepo(t *testing.T) string { + t.Helper() + dir := t.TempDir() + mustGit(t, dir, "init") + mustGit(t, dir, "config", "user.email", "trond-test@example.com") + mustGit(t, dir, "config", "user.name", "trond test") + mustGit(t, dir, "config", "commit.gpgsign", "false") + if err := os.WriteFile(filepath.Join(dir, "README.md"), []byte("hi\n"), 0o600); err != nil { + t.Fatalf("write README: %v", err) + } + mustGit(t, dir, "add", "README.md") + mustGit(t, dir, "commit", "-m", "initial") + return dir +} + +func mustGit(t *testing.T, dir string, args ...string) { + t.Helper() + cmd := exec.Command("git", append([]string{"-C", dir}, args...)...) + if out, err := cmd.CombinedOutput(); err != nil { + t.Fatalf("git %v: %v\n%s", args, err, out) + } +} + +func TestSource_Resolve_CleanRepo(t *testing.T) { + dir := initGitRepo(t) + s := Source{Path: dir, RevisionSpec: "HEAD"} + if err := s.Resolve(context.Background()); err != nil { + t.Fatalf("Resolve: %v", err) + } + if len(s.ResolvedRevision) != 40 { + t.Errorf("ResolvedRevision %q is not a full 40-char sha", s.ResolvedRevision) + } + if s.DirtyState { + t.Error("clean repo should not be marked dirty") + } + if s.PatchHash != "" { + t.Errorf("clean repo should have empty PatchHash; got %q", s.PatchHash) + } +} + +// TestSource_Resolve_DirtyWithUntracked is the regression guard for +// FR-002: an untracked file MUST invalidate the cache (was missing +// from the v1 design — `git diff` alone misses untracked files). +func TestSource_Resolve_DirtyWithUntracked(t *testing.T) { + dir := initGitRepo(t) + + s1 := Source{Path: dir, RevisionSpec: "HEAD"} + if err := s1.Resolve(context.Background()); err != nil { + t.Fatalf("first Resolve: %v", err) + } + + // Add an untracked file. git diff returns empty here; trond MUST + // notice via git status --porcelain -uall. + if err := os.WriteFile(filepath.Join(dir, "NEWFILE.java"), + []byte("class C {}\n"), 0o600); err != nil { + t.Fatalf("write NEWFILE: %v", err) + } + + s2 := Source{Path: dir, RevisionSpec: "HEAD"} + if err := s2.Resolve(context.Background()); err != nil { + t.Fatalf("second Resolve: %v", err) + } + if !s2.DirtyState { + t.Fatal("repo with untracked file MUST be marked dirty (FR-002)") + } + if s2.PatchHash == "" { + t.Fatal("dirty repo MUST have non-empty PatchHash") + } +} + +func TestSource_Resolve_DirtyTrackedEdit(t *testing.T) { + dir := initGitRepo(t) + if err := os.WriteFile(filepath.Join(dir, "README.md"), + []byte("changed\n"), 0o600); err != nil { + t.Fatalf("modify README: %v", err) + } + s := Source{Path: dir, RevisionSpec: "HEAD"} + if err := s.Resolve(context.Background()); err != nil { + t.Fatalf("Resolve: %v", err) + } + if !s.DirtyState { + t.Error("modified tracked file should be dirty") + } +} + +func TestSource_Resolve_NonExistentRevision(t *testing.T) { + dir := initGitRepo(t) + s := Source{Path: dir, RevisionSpec: "does-not-exist-branch"} + err := s.Resolve(context.Background()) + if err == nil { + t.Error("unknown revision should produce an error") + } +} diff --git a/internal/build/validate.go b/internal/build/validate.go new file mode 100644 index 00000000..1cd1bb3b --- /dev/null +++ b/internal/build/validate.go @@ -0,0 +1,215 @@ +package build + +import ( + "archive/zip" + "bufio" + "fmt" + "io" + "regexp" + "strings" +) + +// ValidateGradleTask enforces FR-022's task-name regex. Task names +// are inherently regular ("shadowJar", ":dbfork:build", "assemble"). +// Tight regex is fine here. Allows an optional leading `:` (gradle +// absolute task path) and standard project/task name characters. +var gradleTaskPattern = regexp.MustCompile(`^:?[a-zA-Z][a-zA-Z0-9_-]*(:[a-zA-Z][a-zA-Z0-9_-]*)*$`) + +// ValidateGradleTask returns nil if name is a syntactically safe +// gradle task identifier. +func ValidateGradleTask(name string) error { + if name == "" { + return fmt.Errorf("gradle_task is required") + } + if !gradleTaskPattern.MatchString(name) { + return fmt.Errorf("invalid gradle_task %q: must match %s", + name, gradleTaskPattern.String()) + } + return nil +} + +// ValidateGradleArgs enforces FR-022's gradle_args flag-name +// allowlist. Per the spec rationale: character regexes are the wrong +// defense (--init-script /tmp/evil.gradle passes any sane char regex +// while --projects=a,b,c fails one), so we whitelist flag names +// instead. argv-form invocation already blocks shell injection; +// what's left is "which gradle flags are dangerous". +// +// Accepted shapes: +// +// --offline, --no-daemon, --parallel, --rerun-tasks +// --max-workers= +// -D=, -P= (value unrestricted; argv-safe) +// -q, -i, -d +// +// Anything else is rejected. The dangerous flags we specifically +// want to forbid: --init-script, --include-build, --build-file, +// --settings-file (they redirect the build to attacker-supplied +// logic). +func ValidateGradleArgs(args []string) error { + for _, a := range args { + if err := validateGradleArg(a); err != nil { + return err + } + } + return nil +} + +func validateGradleArg(a string) error { + if a == "" { + return fmt.Errorf("empty gradle_arg") + } + switch { + case a == "--offline", + a == "--no-daemon", + a == "--parallel", + a == "--rerun-tasks", + a == "-q", a == "-i", a == "-d": + return nil + } + // --max-workers= + if strings.HasPrefix(a, "--max-workers=") { + v := strings.TrimPrefix(a, "--max-workers=") + if v == "" { + return fmt.Errorf("--max-workers requires an integer value") + } + for _, r := range v { + if r < '0' || r > '9' { + return fmt.Errorf("--max-workers value %q must be a positive integer", v) + } + } + return nil + } + // -D= / -P= + if (strings.HasPrefix(a, "-D") || strings.HasPrefix(a, "-P")) && len(a) > 2 { + eq := strings.IndexByte(a[2:], '=') + if eq <= 0 { + return fmt.Errorf("malformed %s flag %q: expected -%c=", + a[:2], a, a[1]) + } + // Value is intentionally unrestricted — argv-form makes shell + // interpretation impossible; gradle treats the value as a + // plain string. + return nil + } + return fmt.Errorf("disallowed gradle_arg %q: only --offline, --no-daemon, "+ + "--parallel, --rerun-tasks, --max-workers=N, -D=, -P=, "+ + "-q/-i/-d are permitted (see spec FR-022)", a) +} + +// envAllowlist enforces FR-019. Allowlisted env keys are forwarded +// from the trond invocation environment AND accepted in +// `build.env: { KEY: VAL }`. Everything else is rejected to prevent +// LD_PRELOAD / PATH hijacks of the build container. +var envAllowlist = map[string]struct{}{ + "GRADLE_OPTS": {}, + "JAVA_OPTS": {}, + "GRADLE_USER_HOME": {}, + "MAVEN_OPTS": {}, +} + +const orgGradleProjectPrefix = "ORG_GRADLE_PROJECT_" + +// ValidateEnvKey returns nil if the env var name is on the allowlist +// (literal or prefix). +func ValidateEnvKey(name string) error { + if name == "" { + return fmt.Errorf("empty env key") + } + if _, ok := envAllowlist[name]; ok { + return nil + } + if strings.HasPrefix(name, orgGradleProjectPrefix) && len(name) > len(orgGradleProjectPrefix) { + return nil + } + return fmt.Errorf("disallowed env key %q: only %s and ORG_GRADLE_PROJECT_* "+ + "are forwarded into the build container (spec FR-019)", + name, allowedKeysString()) +} + +func allowedKeysString() string { + keys := make([]string, 0, len(envAllowlist)) + for k := range envAllowlist { + keys = append(keys, k) + } + return strings.Join(keys, ", ") +} + +// ValidateJARMainClass opens a built jar and confirms its +// META-INF/MANIFEST.MF Main-Class header equals expected (e.g. +// "org.tron.program.FullNode"). Returns a structured error otherwise. +// +// FR-011: produced JAR must be runnable as a java-tron node. +func ValidateJARMainClass(path, expected string) error { + r, err := zip.OpenReader(path) + if err != nil { + return fmt.Errorf("open jar: %w", err) + } + defer func() { _ = r.Close() }() + for _, f := range r.File { + if f.Name != "META-INF/MANIFEST.MF" { + continue + } + got, err := readManifestMainClass(f) + if err != nil { + return err + } + if got != expected { + return fmt.Errorf("jar Main-Class = %q; want %q", got, expected) + } + return nil + } + return fmt.Errorf("jar has no META-INF/MANIFEST.MF") +} + +// readManifestMainClass scopes the open+close to one function call so +// the linter sees a clean defer rather than `defer` inside the for +// loop (deferInLoop) of the caller. +func readManifestMainClass(f *zip.File) (string, error) { + rc, err := f.Open() + if err != nil { + return "", fmt.Errorf("read manifest: %w", err) + } + defer func() { _ = rc.Close() }() + return scanManifestMainClass(rc) +} + +// scanManifestMainClass extracts the Main-Class value from a JAR +// manifest. Honors the JAR-manifest spec's line-continuation rule: +// values longer than 72 bytes wrap to the next line with a leading +// single space, and the continuation belongs to the previous header. +// +// Without continuation handling, a long FQN like +// `com.example.really.long.package.path.MainClass` would be split +// across two lines and the validator would compare against only the +// first half. java-tron's current `org.tron.program.FullNode` is too +// short to trigger this, but the manifest spec is the manifest spec. +func scanManifestMainClass(r io.Reader) (string, error) { + scanner := bufio.NewScanner(r) + const prefix = "Main-Class:" + var ( + building string // value being accumulated (only set when we're inside Main-Class) + found bool + ) + for scanner.Scan() { + line := scanner.Text() + switch { + case strings.HasPrefix(line, " ") && found: + // Continuation line — append (without the leading space). + building += strings.TrimPrefix(line, " ") + case strings.HasPrefix(line, prefix): + building = strings.TrimSpace(strings.TrimPrefix(line, prefix)) + found = true + case found: + // A different header began; Main-Class is complete. + return building, nil + } + } + if err := scanner.Err(); err != nil { + return "", fmt.Errorf("scan manifest: %w", err) + } + if !found { + return "", fmt.Errorf("manifest has no Main-Class header") + } + return building, nil +} diff --git a/internal/build/validate_jar_test.go b/internal/build/validate_jar_test.go new file mode 100644 index 00000000..ab6f8e23 --- /dev/null +++ b/internal/build/validate_jar_test.go @@ -0,0 +1,100 @@ +package build + +import ( + "archive/zip" + "bytes" + "os" + "path/filepath" + "strings" + "testing" +) + +// makeJAR writes a minimal JAR with the given Main-Class header (or no +// manifest at all if mainClass==""). +func makeJAR(t *testing.T, mainClass string) string { + t.Helper() + path := filepath.Join(t.TempDir(), "test.jar") + + var buf bytes.Buffer + zw := zip.NewWriter(&buf) + if mainClass != "OMIT-MANIFEST" { + w, err := zw.Create("META-INF/MANIFEST.MF") + if err != nil { + t.Fatalf("create manifest: %v", err) + } + _, _ = w.Write([]byte("Manifest-Version: 1.0\n")) + if mainClass != "" { + _, _ = w.Write([]byte("Main-Class: " + mainClass + "\n")) + } + } + if err := zw.Close(); err != nil { + t.Fatalf("close zip: %v", err) + } + if err := os.WriteFile(path, buf.Bytes(), 0o600); err != nil { + t.Fatalf("write jar: %v", err) + } + return path +} + +func TestValidateJARMainClass_Match(t *testing.T) { + path := makeJAR(t, "org.tron.program.FullNode") + if err := ValidateJARMainClass(path, "org.tron.program.FullNode"); err != nil { + t.Errorf("expected pass; got %v", err) + } +} + +func TestValidateJARMainClass_Mismatch(t *testing.T) { + path := makeJAR(t, "com.example.WrongMain") + err := ValidateJARMainClass(path, "org.tron.program.FullNode") + if err == nil { + t.Fatal("expected mismatch error") + } + if !strings.Contains(err.Error(), "Main-Class") { + t.Errorf("error %q should mention Main-Class", err) + } +} + +func TestValidateJARMainClass_NoManifest(t *testing.T) { + path := makeJAR(t, "OMIT-MANIFEST") + err := ValidateJARMainClass(path, "org.tron.program.FullNode") + if err == nil { + t.Fatal("expected missing-manifest error") + } +} + +func TestValidateJARMainClass_NoMainClassHeader(t *testing.T) { + path := makeJAR(t, "") + err := ValidateJARMainClass(path, "org.tron.program.FullNode") + if err == nil { + t.Fatal("expected missing Main-Class error") + } +} + +// TestValidateJARMainClass_ContinuationLines is the FR-011 regression +// guard for JAR manifest's 72-byte line wrap. A long FQN must be +// reassembled across continuation lines before comparison. +func TestValidateJARMainClass_ContinuationLines(t *testing.T) { + path := filepath.Join(t.TempDir(), "wrapped.jar") + var buf bytes.Buffer + zw := zip.NewWriter(&buf) + w, err := zw.Create("META-INF/MANIFEST.MF") + if err != nil { + t.Fatalf("create entry: %v", err) + } + // Simulate a 72-byte-wrap. Continuation lines start with a single + // space and are concatenated WITHOUT that space. + _, _ = w.Write([]byte("Manifest-Version: 1.0\n")) + _, _ = w.Write([]byte("Main-Class: com.example.really.long.fully.qualified.path.to.the\n")) + _, _ = w.Write([]byte(" .MainClassName\n")) + _, _ = w.Write([]byte("Implementation-Title: example\n")) + if err := zw.Close(); err != nil { + t.Fatalf("close zip: %v", err) + } + if err := os.WriteFile(path, buf.Bytes(), 0o600); err != nil { + t.Fatalf("write: %v", err) + } + want := "com.example.really.long.fully.qualified.path.to.the.MainClassName" + if err := ValidateJARMainClass(path, want); err != nil { + t.Errorf("continuation-line Main-Class not reassembled: %v", err) + } +} diff --git a/internal/build/validate_test.go b/internal/build/validate_test.go new file mode 100644 index 00000000..5d18c524 --- /dev/null +++ b/internal/build/validate_test.go @@ -0,0 +1,146 @@ +package build + +import ( + "strings" + "testing" +) + +func TestValidateGradleTask(t *testing.T) { + tests := []struct { + name string + want bool // want ok + comment string + }{ + {"shadowJar", true, "common case"}, + {":dbfork:build", true, "nested task path"}, + {"assemble", true, "single token"}, + {"my-task", true, "hyphen permitted"}, + {"my_task", true, "underscore permitted"}, + {"", false, "empty rejected"}, + {"123task", false, "must start with letter"}, + {"shadow Jar", false, "no whitespace"}, + {"shadow;rm", false, "no semicolon"}, + {"shadow$()", false, "no $()"}, + {"shadow`evil`", false, "no backticks"}, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + err := ValidateGradleTask(tc.name) + ok := err == nil + if ok != tc.want { + t.Errorf("ValidateGradleTask(%q) ok=%v; want %v (%s) err=%v", + tc.name, ok, tc.want, tc.comment, err) + } + }) + } +} + +func TestValidateGradleArgs(t *testing.T) { + tests := []struct { + name string + args []string + want bool + comment string + }{ + {"offline", []string{"--offline"}, true, "common bareword"}, + {"no-daemon", []string{"--no-daemon"}, true, "bareword"}, + {"max-workers", []string{"--max-workers=4"}, true, "kv with int"}, + {"max-workers-bad", []string{"--max-workers=abc"}, false, "non-int rejected"}, + {"max-workers-empty", []string{"--max-workers="}, false, "empty value"}, + {"D-prop", []string{"-Dversion=1.2.3"}, true, "system property"}, + {"P-prop", []string{"-Pcustom=value"}, true, "project property"}, + {"D-prop-space", []string{"-Dtitle=my title"}, true, "value with space (argv-safe)"}, + {"D-prop-comma", []string{"-Dprojects=a,b,c"}, true, "value with comma (argv-safe)"}, + {"D-malformed", []string{"-Dnokey"}, false, "missing equals"}, + {"D-empty", []string{"-D"}, false, "lone -D"}, + {"q", []string{"-q"}, true, "log level"}, + {"init-script-blocked", []string{"--init-script", "/tmp/evil.gradle"}, false, "must be rejected (FR-022 dangerous flag)"}, + {"include-build-blocked", []string{"--include-build", "/tmp/x"}, false, "redirects build"}, + {"build-file-blocked", []string{"--build-file", "/tmp/x"}, false, "redirects build"}, + {"settings-file-blocked", []string{"--settings-file", "/tmp/x"}, false, "redirects build"}, + {"empty-arg", []string{""}, false, "empty rejected"}, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + err := ValidateGradleArgs(tc.args) + ok := err == nil + if ok != tc.want { + t.Errorf("ValidateGradleArgs(%v) ok=%v; want %v (%s) err=%v", + tc.args, ok, tc.want, tc.comment, err) + } + }) + } +} + +func TestValidateEnvKey(t *testing.T) { + tests := []struct { + name string + key string + want bool + }{ + {"GRADLE_OPTS", "GRADLE_OPTS", true}, + {"JAVA_OPTS", "JAVA_OPTS", true}, + {"GRADLE_USER_HOME", "GRADLE_USER_HOME", true}, + {"MAVEN_OPTS", "MAVEN_OPTS", true}, + {"ORG_GRADLE_PROJECT_foo", "ORG_GRADLE_PROJECT_foo", true}, + {"ORG_GRADLE_PROJECT_BAR", "ORG_GRADLE_PROJECT_BAR", true}, + {"PATH-blocked", "PATH", false}, + {"LD_PRELOAD-blocked", "LD_PRELOAD", false}, + {"DYLD_INSERT_LIBRARIES-blocked", "DYLD_INSERT_LIBRARIES", false}, + {"JAVA_TOOL_OPTIONS-blocked", "JAVA_TOOL_OPTIONS", false}, + {"empty-rejected", "", false}, + {"prefix-only", "ORG_GRADLE_PROJECT_", false}, // suffix required + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + err := ValidateEnvKey(tc.key) + ok := err == nil + if ok != tc.want { + t.Errorf("ValidateEnvKey(%q) ok=%v; want %v; err=%v", + tc.key, ok, tc.want, err) + } + }) + } +} + +func TestValidateImageTag(t *testing.T) { + tests := []struct { + tag string + want bool + }{ + {"trond-build:dev", true}, + {"myorg/trond-build:1.2.3", true}, + {"localhost/foo:bar", true}, + {"my.registry.example/path/to/img:1.0", true}, + {"", false}, + {"/etc/passwd", false}, + {"UPPER:case", false}, + {"foo bar:baz", false}, + {"foo:bar baz", false}, + {"foo", false}, // missing tag + {"foo:", false}, // empty tag + {":bar", false}, // empty repo + } + for _, tc := range tests { + t.Run(tc.tag, func(t *testing.T) { + err := ValidateImageTag(tc.tag) + ok := err == nil + if ok != tc.want { + t.Errorf("ValidateImageTag(%q) ok=%v; want %v; err=%v", + tc.tag, ok, tc.want, err) + } + }) + } +} + +// TestValidateGradleArgs_RejectMessage asserts the error mentions +// the spec FR so an operator hitting it can search the codebase. +func TestValidateGradleArgs_RejectMessage(t *testing.T) { + err := ValidateGradleArgs([]string{"--init-script", "/tmp/x"}) + if err == nil { + t.Fatal("expected rejection") + } + if !strings.Contains(err.Error(), "FR-022") { + t.Errorf("error %q should reference FR-022", err) + } +} diff --git a/internal/schema/embed.go b/internal/schema/embed.go index 1aca9f0a..70ee9d37 100644 --- a/internal/schema/embed.go +++ b/internal/schema/embed.go @@ -40,7 +40,10 @@ import ( // changes to existing schemas). // 1.2.0 — add verify-config + auto-heal schemas (new `trond // verify-config` and `trond auto-heal` commands). -const SchemaVersion = "1.2.0" +// 1.3.0 — add build schema (new `trond build` command + intent +// `build:` block; state node entry gains optional +// `build_cache_key`). +const SchemaVersion = "1.3.0" // JSONSchemaURLBase is the canonical URL prefix for individual output // schema files. Embedded $id values inside each schema mirror this so diff --git a/internal/schema/files/build.schema.json b/internal/schema/files/build.schema.json new file mode 100644 index 00000000..ec86522c --- /dev/null +++ b/internal/schema/files/build.schema.json @@ -0,0 +1,82 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://github.com/tronprotocol/tron-deployment/blob/master/schemas/output/build.schema.json", + "title": "trond build output", + "description": "Returned by `trond build ... -o json`. Same shape on first build (cache_hit=false) and cache hit (cache_hit=true, duration_ms close to 0). On failure, the error envelope shape (schemas/output/error.schema.json) is emitted instead.", + "type": "object", + "required": [ + "cache_key", + "source_revision", + "dirty", + "artifact_kind", + "builder_image", + "jdk_version", + "gradle_task", + "builder", + "cache_hit", + "duration_ms", + "created_at" + ], + "properties": { + "cache_key": { + "type": "string", + "description": "Content-addressed identifier of this build. Format: -b[+dirty-][-x]. Same source + same builder image + same task + same args yields the same key.", + "pattern": "^[0-9a-f]{12}-b[0-9a-f]{8}(\\+dirty-[0-9a-f]{8})?(-x[0-9a-f]{8})?$" + }, + "source_revision": { + "type": "string", + "description": "Full git sha resolved from the requested revision spec.", + "pattern": "^[0-9a-f]{40}$" + }, + "dirty": { + "type": "boolean", + "description": "True when the source working tree had uncommitted edits or untracked files at build time." + }, + "artifact_kind": { + "type": "string", + "enum": ["jar", "image"] + }, + "artifact_path": { + "type": "string", + "description": "Absolute path to the produced JAR (artifact_kind=jar)." + }, + "image_tag": { + "type": "string", + "description": "Locally tagged Docker image reference (artifact_kind=image)." + }, + "sha256": { + "type": "string", + "description": "sha256 of the produced JAR contents (artifact_kind=jar).", + "pattern": "^[0-9a-f]{64}$" + }, + "builder_image": { + "type": "string", + "description": "Fully-qualified builder image reference, including @sha256: digest portion." + }, + "jdk_version": { + "type": "string", + "enum": ["8", "11", "17", "21"] + }, + "gradle_task": { + "type": "string" + }, + "builder": { + "type": "string", + "enum": ["docker", "host"] + }, + "cache_hit": { + "type": "boolean", + "description": "True when this invocation returned a previously-built artifact without re-running gradle." + }, + "duration_ms": { + "type": "integer", + "minimum": 0, + "description": "Wall-clock time of this invocation. Cache hits return durations < 200ms." + }, + "created_at": { + "type": "string", + "format": "date-time", + "description": "Timestamp the manifest was first written (NOT this invocation's timestamp on a cache hit)." + } + } +} diff --git a/internal/schema/manifest.go b/internal/schema/manifest.go index 5fe32e97..f2f44686 100644 --- a/internal/schema/manifest.go +++ b/internal/schema/manifest.go @@ -66,6 +66,7 @@ func Build(root *cobra.Command, schemaLookup map[string]string) Manifest { // JSON Schema. var DefaultSchemaLookup = map[string]string{ "trond apply": "apply", + "trond build": "build", "trond plan": "plan", "trond status": "status", "trond list": "list", diff --git a/internal/schema/version_baseline.json b/internal/schema/version_baseline.json index b5bec390..d5672008 100644 --- a/internal/schema/version_baseline.json +++ b/internal/schema/version_baseline.json @@ -1,5 +1,5 @@ { - "schema_version": "1.2.0", + "schema_version": "1.3.0", "entries": [ { "name": "apply", @@ -9,6 +9,10 @@ "name": "auto-heal", "hash": "1a1108e340bf94c7d568f27245183245fce793e3527383159c33f951adbbd00e" }, + { + "name": "build", + "hash": "ea90d08461ac088e8535dc68cb410e7ce2ae0b6071c5329f07ba8aa2f2884be2" + }, { "name": "config-diff", "hash": "1e4bbca5944385aa5ffb1061b1b529c18cb22b16ea264ff0ec32f921ee2593c2" diff --git a/schemas/output/build.schema.json b/schemas/output/build.schema.json new file mode 100644 index 00000000..ec86522c --- /dev/null +++ b/schemas/output/build.schema.json @@ -0,0 +1,82 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://github.com/tronprotocol/tron-deployment/blob/master/schemas/output/build.schema.json", + "title": "trond build output", + "description": "Returned by `trond build ... -o json`. Same shape on first build (cache_hit=false) and cache hit (cache_hit=true, duration_ms close to 0). On failure, the error envelope shape (schemas/output/error.schema.json) is emitted instead.", + "type": "object", + "required": [ + "cache_key", + "source_revision", + "dirty", + "artifact_kind", + "builder_image", + "jdk_version", + "gradle_task", + "builder", + "cache_hit", + "duration_ms", + "created_at" + ], + "properties": { + "cache_key": { + "type": "string", + "description": "Content-addressed identifier of this build. Format: -b[+dirty-][-x]. Same source + same builder image + same task + same args yields the same key.", + "pattern": "^[0-9a-f]{12}-b[0-9a-f]{8}(\\+dirty-[0-9a-f]{8})?(-x[0-9a-f]{8})?$" + }, + "source_revision": { + "type": "string", + "description": "Full git sha resolved from the requested revision spec.", + "pattern": "^[0-9a-f]{40}$" + }, + "dirty": { + "type": "boolean", + "description": "True when the source working tree had uncommitted edits or untracked files at build time." + }, + "artifact_kind": { + "type": "string", + "enum": ["jar", "image"] + }, + "artifact_path": { + "type": "string", + "description": "Absolute path to the produced JAR (artifact_kind=jar)." + }, + "image_tag": { + "type": "string", + "description": "Locally tagged Docker image reference (artifact_kind=image)." + }, + "sha256": { + "type": "string", + "description": "sha256 of the produced JAR contents (artifact_kind=jar).", + "pattern": "^[0-9a-f]{64}$" + }, + "builder_image": { + "type": "string", + "description": "Fully-qualified builder image reference, including @sha256: digest portion." + }, + "jdk_version": { + "type": "string", + "enum": ["8", "11", "17", "21"] + }, + "gradle_task": { + "type": "string" + }, + "builder": { + "type": "string", + "enum": ["docker", "host"] + }, + "cache_hit": { + "type": "boolean", + "description": "True when this invocation returned a previously-built artifact without re-running gradle." + }, + "duration_ms": { + "type": "integer", + "minimum": 0, + "description": "Wall-clock time of this invocation. Cache hits return durations < 200ms." + }, + "created_at": { + "type": "string", + "format": "date-time", + "description": "Timestamp the manifest was first written (NOT this invocation's timestamp on a cache hit)." + } + } +} diff --git a/scripts/refresh-builder-pins.sh b/scripts/refresh-builder-pins.sh new file mode 100755 index 00000000..089ef458 --- /dev/null +++ b/scripts/refresh-builder-pins.sh @@ -0,0 +1,84 @@ +#!/usr/bin/env bash +# Re-resolves Eclipse Temurin builder image tags to their current +# sha256 digests and rewrites internal/build/pins/builder_image_digests.json. +# +# Per spec/002 FR-012: the embedded pin file is the source of truth +# for trond's reproducible builder images. Bumping the pins is a +# release-prep step — `make refresh-builder-pins` calls this script, +# review the diff, commit with the rest of the release tag. +# +# Usage: +# ./scripts/refresh-builder-pins.sh # rewrite the JSON in place +# ./scripts/refresh-builder-pins.sh --dry-run # print without writing +# +# Requirements: docker, jq (apt-get install jq | brew install jq). + +set -euo pipefail + +REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +PIN_FILE="$REPO_ROOT/internal/build/pins/builder_image_digests.json" + +# Map of JDK version → upstream tag. Keep sorted by version. +declare -a JDK_VERSIONS=(8 11 17 21) +declare -A TAG_FOR=( + [8]="eclipse-temurin:8-jdk-jammy" + [11]="eclipse-temurin:11-jdk-jammy" + [17]="eclipse-temurin:17-jdk-jammy" + [21]="eclipse-temurin:21-jdk-jammy" +) + +DRY_RUN=0 +if [[ "${1:-}" == "--dry-run" ]]; then + DRY_RUN=1 +fi + +if ! command -v docker >/dev/null 2>&1; then + echo "error: docker is required (used to pull + inspect images)" >&2 + exit 64 +fi +if ! command -v jq >/dev/null 2>&1; then + echo "error: jq is required for safe JSON rewriting" >&2 + exit 64 +fi + +# Build the new JSON in-memory, then atomically swap. +new_pins='{}' +for jdk in "${JDK_VERSIONS[@]}"; do + tag="${TAG_FOR[$jdk]}" + echo "[refresh-builder-pins] resolving $tag (JDK $jdk)..." >&2 + docker pull --quiet "$tag" >/dev/null + + # `docker inspect` returns the canonical RepoDigest for the local image. + # Strip the `@` prefix so we end up with just `sha256:`. + digest="$(docker inspect --format='{{ index .RepoDigests 0 }}' "$tag" | sed 's/.*@//')" + + if [[ ! "$digest" =~ ^sha256:[0-9a-f]{64}$ ]]; then + echo "error: unexpected digest format for $tag: $digest" >&2 + exit 1 + fi + + echo "[refresh-builder-pins] $jdk → $tag @ $digest" >&2 + + new_pins="$(jq --arg jdk "$jdk" --arg ref "$tag" --arg digest "$digest" \ + '. + { ($jdk): { ref: $ref, digest: $digest } }' <<<"$new_pins")" +done + +new_doc="$(jq --argjson pins "$new_pins" \ + '{ + "$comment": "Pinned digests for builder images. Bumped per trond release via `make refresh-builder-pins`. Each entry resolves jdk_version -> reference (canonical name@sha256:...). Cache key (FR-002) incorporates the digest so pin bumps invalidate stale artifacts.", + "schema_version": "1.0.0", + "pins": $pins + }' <<<'{}')" + +if [[ $DRY_RUN -eq 1 ]]; then + echo "$new_doc" + exit 0 +fi + +# Atomic write. +tmp="$(mktemp "${PIN_FILE}.XXXXXX")" +printf '%s\n' "$new_doc" > "$tmp" +mv "$tmp" "$PIN_FILE" + +echo "[refresh-builder-pins] wrote $PIN_FILE" +echo "[refresh-builder-pins] review the diff and commit alongside the trond version bump." diff --git a/specs/002-trond-build-pipeline/plan.md b/specs/002-trond-build-pipeline/plan.md new file mode 100644 index 00000000..343d407d --- /dev/null +++ b/specs/002-trond-build-pipeline/plan.md @@ -0,0 +1,694 @@ +# Implementation Plan: trond Build Pipeline + +**Branch**: `feat/build-pipeline` | **Date**: 2026-05-08 | **Spec**: [spec.md](spec.md) +**Last revised**: 2026-05-08 (self-review pass) + +## Summary + +Add a `trond build` command and an apply-side `build:` intent block that +together let developers iterate on java-tron source code without context- +switching out to Gradle. The build itself happens in a Docker container +running a pinned `eclipse-temurin` image. trond orchestrates: source mount, +gradle invocation, output capture, content-addressed caching, and (for +SSH targets) artifact transfer. + +trond ships no JDK, no Gradle, no Java compiler. The build environment is +the container; trond is the conductor. + +## Technical Context + +**Language/Version**: Go 1.25+ + +**New dependencies**: **None.** All external interactions go through +`os/exec`, matching the rest of trond: + +- `git`: shell out via `exec.CommandContext(ctx, "git", ...)` — `rev-parse`, + `status --porcelain -uall`, `diff`. Avoids ~30 MB `go-git/v5` impact on + binary size; consistent with how trond already drives `docker` and `scp`. +- `docker`: existing pattern. +- `scp` / `ssh`: existing `internal/target/ssh`. + +**Existing trond packages reused**: +- `internal/paths` — for `${TROND_STATE_DIR}/builds/` +- `internal/output` — for the structured error envelope +- `internal/state` — extends node entry with optional `build_revision` +- `internal/mcp` — to expose tools (`build`, `build_list`, `build_inspect`, + `build_prune`) +- `internal/target/ssh` — for the scp transfer path in US-4 and the + `scp` preflight probe (FR-017) +- `internal/audit` — to record build events + +**Stdlib only**: +- `archive/zip` — read JAR manifest to validate `Main-Class`. +- `crypto/sha256` — content hashing and patch hashing. +- `os/signal` — SIGINT handling (FR-016). +- `syscall.Flock` — concurrent build serialization (FR-015). + +## Architecture + +``` + trond CLI + │ + ┌─────────────────────┼─────────────────────┐ + │ │ │ + cmd/build.go cmd/apply.go internal/mcp + (standalone) (resolves build:) (tool: build) + │ │ │ + └──────── calls ──────┴──────── calls ──────┘ + │ + internal/build/builder.go + (Builder interface + docker/host impls in one file) + │ + ┌───────────┼───────────┐ + │ │ │ + ▼ ▼ ▼ + docker run ./gradlew cache hit + eclipse- (host (manifest + temurin builder) lookup) + │ + └─► -v :/src:ro + -v /gradle:/root/.gradle + -v /out:/out:rw + -e GRADLE_OPTS, JAVA_OPTS, ORG_GRADLE_PROJECT_* (allowlist only) + --workdir /src + ./gradlew + (NO `bash -c`; argv-form only — FR-022) + + internal/build/cache.go + (content-addressed cache, manifest dir, prune logic, + flock-based concurrent-build serialization) + + internal/build/validate.go + (JAR Main-Class check, image inspect) + + internal/build/source.go + (shells out to git: rev-parse, status, diff, patch hash) +``` + +Note: dropped the separate `host.go` file from the v1 draft — host builder +is a single function with a switch in `builder.go`. Avoids over-stratifying +~30 lines of logic. + +### Directory layout on disk + +``` +${TROND_STATE_DIR}/builds/ +├── gradle/ # gradle deps cache, persisted across builds +├── out/ # produced JARs (named by cache key) +│ ├── abc123.jar +│ └── abc123+dirty-7f2a.jar +├── images/ # local image registry (sha → tag map) +│ └── abc123.json +├── manifest/ # one JSON per build, source of truth +│ ├── abc123.json +│ └── abc123+dirty-7f2a.json +└── locks/ # flock per cache key (FR-015) + └── abc123.lock +``` + +The `manifest/` directory is the cache key index. `cache.go` reads only +manifests (small JSON files); the artifacts under `out/` and `images/` are +opaque blobs. + +### Cache key derivation + +```go +type CacheKey struct { + SourcePath string // canonicalized abs path (symlinks resolved) + GitRevision string // resolved sha + PatchHash string // sha256(git diff || git status --porcelain -uall) if dirty + BuilderImageDigest string // sha256:... of the JDK image actually used + JDKVersion string + ArtifactKind string // "jar" | "image" + GradleTask string // "shadowJar" | "dockerBuild" | custom + GradleArgs []string +} + +func (k CacheKey) String() string { + bd := k.BuilderImageDigest[7:13] // strip "sha256:" then take 6 hex chars + base := fmt.Sprintf("%s-b%s", k.GitRevision, bd) + if k.PatchHash != "" { + return fmt.Sprintf("%s+dirty-%s", base, k.PatchHash[:8]) + } + return base +} +``` + +**Critical**: +- PatchHash combines BOTH `git diff` AND `git status --porcelain -uall`. + Diff alone misses untracked files (FR-002, regression bug found in + pass 1). +- BuilderImageDigest is in the key (FR-002, found in pass 2). When a + trond release bumps the JDK pin, every cache entry is automatically + invalidated — no manual prune required. +- GradleArgs participates (different `-Dflag` produces different bytes). + +Two different source paths producing the same sha hit the same cache +(this is the intent — a build is determined by its inputs, not its +location). + +### Artifact naming pattern on disk + +``` +~/.trond/builds/out/-b[+dirty-].jar + ^^^^^^^ ^^^^^^^^ + 6 hex 6 hex prefix of patch sha + prefix when working tree dirty + of image + sha256 +``` + +Example: `8f4e2a-bd4e2a1+dirty-7f2a3b9c.jar` reads as +"revision 8f4e2a, built by image digest starting d4e2a1, with dirty patch +7f2a3b9c". Long enough for collision safety, short enough to grep. + +### Concurrent build serialization (FR-015) + +POSIX path (Linux / macOS) uses `syscall.Flock`. Windows ships a +no-cross-process fallback (in-process `sync.Mutex` only) — concurrent +`trond build` from two trond processes on Windows is undefined and +documented as such. Split via build constraints: + +``` +internal/build/lock_posix.go // //go:build !windows +internal/build/lock_windows.go // //go:build windows +``` + +POSIX implementation: + +```go +//go:build !windows + +func acquireBuildLock(cacheDir, key string) (release func(), err error) { + lockPath := filepath.Join(cacheDir, "locks", key+".lock") + f, err := os.OpenFile(lockPath, os.O_CREATE|os.O_RDWR, 0o600) + if err != nil { return nil, err } + if err := syscall.Flock(int(f.Fd()), syscall.LOCK_EX); err != nil { + f.Close() + return nil, err + } + return func() { + syscall.Flock(int(f.Fd()), syscall.LOCK_UN) + f.Close() + }, nil +} +``` + +```go +// before any expensive work +release, err := acquireBuildLock(cacheDir, key.String()) +if err != nil { return errResult(err) } +defer release() + +// re-check cache after acquiring lock +if hit := cacheLookup(key); hit != nil { + return hit, nil // first caller finished while we waited +} +// otherwise do the build +``` + +### SIGINT handling (FR-016) + +`signal.NotifyContext` is installed once at the `apply` (or `build`) +entry point and threaded through every subprocess that runs after it — +docker, git, ssh, scp. Each invocation uses `exec.CommandContext`, so +cancellation propagates as SIGKILL to the child. + +```go +ctx, cancel := signal.NotifyContext(parent, os.Interrupt, syscall.SIGTERM) +defer cancel() + +// build phase +cmd := exec.CommandContext(ctx, "docker", "run", "--rm", "--name", containerName, ...) +defer cleanup(containerName) // best-effort `docker kill` + `rm -f out/*.tmp` + +if err := cmd.Run(); err != nil { + if errors.Is(ctx.Err(), context.Canceled) { + return errResult("BUILD_CANCELLED", 130, err, suggestions...) + } + return errResult("BUILD_FAILED", 1, err, suggestionsFromTail(cmd)) +} + +// transfer phase (SSH target) — same ctx, same cancellation semantics +// - scp writes to `.tmp`, rename only on clean exit +// - on ctx cancel: trond runs `ssh remote 'rm -f .tmp'` best-effort +``` + +Writing output via a `.tmp` suffix and rename-on-success guarantees that +a cancelled build/transfer leaves no half-written file pretending to be +a finished artifact — neither locally nor on the SSH target. + +### Security boundary: subprocess invocation + +A user-controlled intent file (or a `--gradle-task` CLI flag) MUST NOT +be able to inject shell metacharacters into the build pipeline. Two +guardrails (FR-022): + +1. **Argv-only**. Every subprocess goes through `exec.Command(name, + args...)` with no intervening shell. No `bash -c`, no + `sh -c "..."`. The container's entrypoint stays `./gradlew` + directly. + + ```go + // GOOD + cmd := exec.CommandContext(ctx, "docker", "run", "--rm", + "-v", srcMount, "-v", outMount, "--workdir", "/src", + imageRef, + "./gradlew", task, gradleArgs...) + + // BAD — would have enabled $(...) and ; injection + cmd := exec.CommandContext(ctx, "bash", "-c", + fmt.Sprintf("docker run ... %s && cp ...", task)) + ``` + +2. **`gradle_task`: char regex.** Task names are inherently regular — + `shadowJar`, `:dbfork:build`, `assemble`. Validated as + `^[a-zA-Z][a-zA-Z0-9:_-]*$`. + +3. **`gradle_args`: flag-name allowlist** (not a character class). + The character regex from pass 2 was the wrong defense: it rejected + legitimate args (`--projects=a,b,c` has `,`; `-Dtitle=my title` has + spaces) AND let dangerous flags through (`--init-script + /tmp/evil.gradle` is all-ASCII-and-slashes). Since argv form already + blocks shell injection, the remaining concern is which gradle flags + are dangerous. + + Allowed flags (whole arg matches): + - `--offline`, `--no-daemon`, `--parallel`, `--rerun-tasks` + - `--max-workers=` + - `-D=`, `-P=` (value unrestricted, argv-safe) + - `-q`, `-i`, `-d` (log levels) + + Rejected flags (allowlist miss): + - `--init-script`, `--include-build`, `--build-file`, + `--settings-file` — these redirect the build to attacker-supplied + logic. + - Anything else not listed. + + Extending the allowlist is a code change, not an intent change. + +The same gate guards `build.env` keys (FR-019 allowlist). + +### Intent integration + +The intent schema gains a `build:` block, mutually exclusive with `image:`. + +```yaml +name: dev-fullnode +network: nile +node: + type: fullnode + runtime: jar # or docker +target: + type: local + +build: + source: /Users/me/java-tron + revision: HEAD # or branch / tag / sha + jdk: "8" # default + artifact: jar # or image + image_tag: dev:latest # required if artifact=image + builder: docker # default; "host" available + gradle_task: shadowJar # default depends on artifact + gradle_args: # extra gradle args (FR-001, FR-022) + - --no-daemon + - -Dorg.gradle.parallel=true + env: # allowlisted env only (FR-019) + GRADLE_OPTS: "-Xmx2g" # GRADLE_OPTS|JAVA_OPTS|GRADLE_USER_HOME|MAVEN_OPTS|ORG_GRADLE_PROJECT_* + cache: + dirty_ttl: 7d # FR-025; "never" allowed +``` + +Note: `rebuild: always|on_change|never` is **removed** from the v1 draft. +The cache key derivation already handles every legitimate case (dirty tree +forces a new key; clean tree hits cache). + +Render pipeline change: if `build:` is present, render runs the build first, +then substitutes the produced artifact ref (JAR path or image tag) into the +runtime config. Render is otherwise unchanged. + +### Pull policy for local images (US-3 acceptance #2, FR-005) + +When `artifact: image` is consumed by `runtime: docker`, the rendered +service block MUST carry `pull_policy: never` (Compose 3.9+) so compose +does not attempt to pull the locally-built tag: + +```yaml +services: + java-tron: + image: trond-build:abc123 + pull_policy: never +``` + +### Apply pipeline change + +``` +preflight → validate intent → [NEW: resolve build] → render → diff → apply +``` + +`resolve build` is a no-op if the intent has no `build:` block (backwards- +compatible). Otherwise it calls the builder, then mutates the in-memory +intent to substitute the resolved artifact ref. The original intent file is +not touched. On success, apply records `build_revision = ` on +the state node entry (FR-018), enabling `build prune` to skip in-use builds. + +### Preflight integration (FR-017) + +When intent has `build:`: + +```go +// internal/preflight/build.go (new file) +- check source path exists + `git status` works +- if builder == docker: check docker daemon reachable + builder image cached or pullable +- if builder == host: check `./gradlew --version` works +- if target == ssh: ssh probe `command -v scp` on remote +``` + +Surfaces as preflight checks with the existing pass/warning/fail shape. + +### SSH transfer path (US-4) + +When `target.type == ssh` and `build.artifact == jar`: + +``` +local build → ~/.trond/builds/out/.jar + │ + ▼ scp (skip if remote file already matches sha256) +remote:/opt/trond/deployments//java-tron.jar + │ + ▼ systemd unit references /opt/trond/deployments//java-tron.jar + │ + ▼ systemctl start +``` + +Transfers > 50 MB emit MCP progress notifications (FR-009) using the same +mechanism `snapshot download` uses. The scp invocation pipes its `-v` +output through a parser that converts byte counts to MCP `progress` +messages. + +When `artifact == image` and target is SSH: out of scope for v1. Document +that and require `target.type: local` for image artifacts. Users wanting +remote image deploys should push to a registry and reference via the +existing `image:` path. + +## Phase Breakdown + +### Phase 1 — `trond build` standalone (~4 days) + +**Deliverable**: `trond build --source --artifact jar -o json` works +end-to-end. No intent integration yet. + +- `cmd/build.go`: cobra command, flags (`--gradle-task`, `--gradle-arg`, + `--builder-image-override`, etc.), JSON output via `output.Result`. +- `internal/build/builder.go`: `Builder` interface, default impl, docker + and host paths inline. Argv-only subprocess invocation (FR-022). +- `internal/build/cache.go`: manifest read/write, cache lookup that also + stats the artifact file (FR-020), flock serialization (FR-015), + builder-digest-aware key (FR-002). +- `internal/build/source.go`: shell-out wrapper for `git rev-parse`, + `status --porcelain -uall`, `diff`; patch hash combining both (FR-002). +- `internal/build/validate.go`: JAR `Main-Class` check via `archive/zip`, + argv token regex `^[a-zA-Z0-9._:=/+-]+$` for gradle task + args + (FR-022), env-key allowlist (FR-019). +- `internal/build/signal.go`: SIGINT context propagation + cleanup + (FR-016). +- `internal/build/audit.go`: emits the FR-023 build-event JSON into the + existing `internal/audit` log. Append `result: "in_progress"` at + start; update atomically to `success` / `failed` / `cancelled` on + completion. Crashed builds leave forensic-visible `in_progress` + entries. +- `internal/build/pins/`: package owning the embedded JSON. Layout: + - `internal/build/pins/builder_image_digests.json` — checked-in, + embedded via `//go:embed builder_image_digests.json` + - `internal/build/pins/pins.go` — `Resolve(jdkVersion) (ref string, + err error)` plus the `--builder-image-override` resolver. Override + values feed into the cache key (FR-024). When a pinned digest + resolves but `docker pull` fails (registry deleted the tag), + surface `BUILDER_IMAGE_UNAVAILABLE` with suggestions; do NOT fall + back to an unpinned tag. + - Repo-root `builder_image_digests.json` becomes a generated symlink + (Makefile target) for editor convenience and the + `refresh-builder-pins` script's output target. +- `internal/build/lock_posix.go` + `internal/build/lock_windows.go`: + POSIX `flock` vs in-process mutex (FR-015 windows caveat). +- `internal/build/imagetag.go`: Docker reference-format validator + (FR-005). Uses `github.com/distribution/reference` (small, stdlib- + free transitively) or an inline regex equivalent. +- `internal/schema/files/build.schema.json` + `schemas/output/build.schema.json`. +- `internal/schema/embed.go`: bump SchemaVersion to 1.3.0; add entry to + history comment. +- `internal/schema/version_baseline.json`: regenerate via make target. +- `builder_image_digests.json` checked in at the repo root (also embedded + via go:embed); `Makefile :: refresh-builder-pins` regenerates it (FR-012). +- Tests: + - `cmd/build_test.go`: cobra wiring + JSON shape. + - `internal/build/builder_test.go`: unit with a fake `dockerRunner`; + asserts argv form and the absence of any shell invocation. + - `internal/build/cache_test.go`: cache hit / dirty key including + untracked files / cache miss when artifact deleted (FR-020) / cache + invalidation on builder-digest change / prune / concurrent flock. + - `internal/build/source_test.go`: patch hash regression test for + untracked file invalidation. + - `internal/build/signal_test.go`: SIGINT mid-build cleanup; asserts + `out/*.tmp` removed. + - `internal/build/validate_test.go`: token regex rejects `;`, ``` ` ```, + `$()`, whitespace, etc.; env-allowlist rejects `LD_PRELOAD`, + `PATH`, etc. + - Golden test: a tiny synthetic source tree produces a deterministic + manifest (excluding `duration_ms` and `created_at`). + - **Integration test (build-tag gated)**: `internal/build/integration_test.go` + (`//go:build integration`) runs a real `eclipse-temurin:8-jdk` against + a 10-line hello-world gradle project, asserts the produced JAR is + structurally valid. Runs in CI on the e2e job; skipped on `go test ./...`. + +### Phase 2 — Intent integration (~2 days) + +**Deliverable**: `trond apply --intent dev.yaml` automatically builds. + +- `internal/intent/schema.go`: add `Build` struct, validator rules + (mutual exclusion with `image:`, valid jdk versions, image_tag + reference-format check via `imagetag.go`). +- `schemas/intent.schema.json`: add the `build:` block, document mutual + exclusion with `image:`. +- `internal/apply/apply.go`: insert `resolveBuild()` between validate and + render. Resolved artifact ref is held on the in-memory intent. On + success record `build_cache_key` on the state node entry (FR-018). +- `internal/state/state.go`: extend node entry with optional + `build_cache_key`. SchemaVersion stays 1.3.0 (still MINOR; additive). +- `internal/render/`: consume the resolved ref. For `runtime: jar`, point + the systemd unit at `.jar`. For `runtime: docker`, use the local + image tag with `pull_policy: never` (FR-005). +- `internal/output`: add the `build` block to the apply result JSON. +- `internal/preflight/build.go`: build-related preflight checks (FR-017). +- Tests: + - `cmd/apply_build_test.go`: intent with `build:` resolves and applies + against a stub builder. + - `cmd/preflight_build_test.go`: each FR-017 check surfaces correctly. + - `examples/dev-local.yaml`: a working example intent. + +### Phase 3 — Image artifact (~1 day) + +**Deliverable**: `--artifact image` and `runtime: docker` work together. + +- Recognize `image` artifact in `builder.go`; invoke `./gradlew :dockerBuild` + (or whatever `build.gradle_task` overrides to). +- Local image-tag book-keeping: write a `images/.json` mapping + `image_id → tag`; remove tag on `prune`. +- `internal/render/docker.go`: when artifact is `image`, use the tag + directly with `pull_policy: never`. +- Tests: round-trip with a minimal Dockerfile-only source tree (a stub + that doesn't need the full java-tron build). + +### Phase 4 — SSH target transfer (~2 days) + +**Deliverable**: build locally, deploy over SSH. + +- `internal/target/ssh/scp.go`: add a `Sha256IfExists(remotePath)` probe; + a `PutFile(ctx, localPath, remotePath)` op that: + - Writes to `.tmp` first. + - Pipes `-v` (or `-p` byte counts) through a parser that converts to + MCP `progress` notifications for transfers > 50 MB (FR-009). + - On ctx cancel: tries `ssh remote 'rm -f .tmp'` + best-effort, then returns `BUILD_CANCELLED` (FR-016 SSH branch). + - On success: atomic rename to ``. +- `internal/target/ssh/preflight.go`: add `command -v scp` probe (FR-017). +- `internal/apply/apply.go`: when target is SSH and artifact is JAR, after + the build call `target.PutFile`. Skip if remote sha256 matches. +- `examples/dev-ssh.yaml`. +- Tests: + - Integration test against an SSH-target container (already used by + existing e2e suite). Asserts the `.tmp`-then-rename pattern, the + progress notification firing for a synthesized large file, and the + `command -v scp` preflight check. + - SIGINT during transfer leaves no `.tmp` on the remote. + +### Phase 5 — Build management commands & MCP (~1 day) + +**Deliverable**: `trond build list / inspect / prune --keep N` + MCP +tools surfaced. + +- `cmd/build_list.go`, `cmd/build_inspect.go`, `cmd/build_prune.go`. +- `internal/build/cache.go`: prune logic — LRU by `created_at`, + cross-references `state.json::nodes[].build_cache_key` to refuse + deletion of in-use builds (FR-018). For `--artifact image` entries, + prune runs `docker image rm ` (not `docker untag`) so + layer storage is released; docker's refcounting protects layers + shared with other tags. Dirty-build entries (those with `+dirty-` + suffix) are pruned more aggressively (configurable + `build.cache.dirty_ttl` / `--cache-dirty-ttl`, default 7 days, + `never` accepted — FR-025). During prune, manifest entries pointing + at missing artifacts (e.g., user manually deleted a JAR) are also + removed (FR-020 cleanup branch). +- MCP tools (`internal/mcp/tools_build.go`): expose `build`, `build_list`, + `build_inspect`, `build_prune` with annotations per FR-013: + - `build`: `idempotentHint=true`, `destructiveHint=false` + - `build_list`, `build_inspect`: `readOnlyHint=true` + - `build_prune`: `destructiveHint=true` + +### Phase 6 — Docs & quickstart (~0.5 day) + +- `specs/002-trond-build-pipeline/quickstart.md` — copy/pasteable dev-loop + walkthrough. +- README.md `## Dev loop` section linking to quickstart. +- AGENTS.md: add `build` to the read-write tool list, document the + build-then-apply workflow. + +## Total estimate + +~11-12 working days from MVP (Phase 1+2) to fully closed loop (Phase +1-6). Revised up from: +- v1 draft 7-8d (initial scope) +- pass 1 (+2d): signal handling, flock, patch-hash bug fix → 9-10d +- pass 3 (+2d): Windows split + flag allowlist + image_tag validator + + pins package + audit lifecycle → 11-12d + +Phase 1 alone is now ~4 working days (was 2 in v1, 3 in pass 1). + +## Risks and mitigations + +- **Risk**: tron-docker's gradle task layout changes (`shadowJar` vs + `bootJar` vs `dockerBuild`). + **Mitigation**: `build.gradle_task` field in intent + `--gradle-task` + CLI flag (FR-001). Sensible default per artifact kind. + +- **Risk**: builder image pin becomes a security-update blocker. + **Mitigation**: `make refresh-builder-pins` regenerates digests; CI runs + it weekly and opens a PR if drift is detected. Users in a bind can pass + `--builder-image-override ` (escape hatch, not promoted in docs). + +- **Risk**: the cache directory grows unbounded (a working dev produces a + dirty build every few minutes). + **Mitigation**: `prune` exists in Phase 5. Dirty-build entries have a + default 7-day TTL (more aggressive than clean-build LRU). Soft-warn when + cache > 5 GB at the start of any `build`. + +- **Risk**: gradle caches inside the container conflict with host gradle + (when developer also uses `./gradlew` outside trond). + **Mitigation**: trond mounts a separate `/gradle` directory rather + than the host's `~/.gradle`. The container's caches are isolated from + host. Document this. + +- **Risk**: Patch hash misses some signal that changes the build output + (e.g., file mode changes, submodule state). + **Mitigation**: `git status --porcelain -uall` covers untracked files, + modified mode bits, and submodule state. Stays a strict subset of "what + gradle actually depends on", but it's the strictest off-the-shelf + hash we can get without parsing build.gradle. + +- **Risk**: SSH transfer over a slow link with no progress feedback feels + hung. + **Mitigation**: MCP progress notifications for > 50 MB (FR-009). In + `-o text` mode the same parser drives a tty progress bar. + +## Schema impact + +- SchemaVersion: 1.2.0 → **1.3.0** (MINOR: adds new `build` schema + + extends state node entry with optional `build_cache_key`; no breaking + changes to existing schemas). +- New file: `schemas/output/build.schema.json`. +- Modified: `schemas/state.schema.json` (additive: new optional + `build_cache_key` field on each node entry). +- Modified: `schemas/intent.schema.json` (additive: new optional `build:` + block). +- Modified: `schemas/state.schema.json` (additive: optional + `build_revision` field). +- `internal/schema/version_baseline.json`: regenerate. + +## Open questions (to resolve during implementation) + +1. Exact gradle task name in current tron-docker: `:shadowJar` or + `:bootJar`? Verify by inspecting `tools/toolkit/build.gradle` and + `tools/dbfork/build.gradle`. The `build.gradle_task` field neutralizes + this concern; the question is what default to ship. +2. `build.jdk` schema type: string (`"8"`, `"11"`, `"17"`, `"21"`). + Confirmed — string. Number is ambiguous (`8` vs `1.8`). +3. Whether to expose a `--builder ssh:` (build on a remote build + server) in the future. Hook is there in the `Builder` interface; no + implementation in v1. + +## CHANGELOG + +- **2026-05-08**: Initial draft. +- **2026-05-08 (self-review)**: Applied 17-item review. + - Removed `go-git/v5` dependency; everything shells out via os/exec. + - Phase 1 estimate 2 days → 3 days (added signal handling, flock, + patch hash bug fix). + - Total estimate 7-8 → 9-10 days. + - Removed separate `host.go` file from architecture; folded into + `builder.go`. + - Architecture diagram redrawn to show new components: flock, + SIGINT handler, env passthrough, preflight integration. + - Cache key now explicitly combines git-diff AND git-status (untracked + files invalidate cache). + - Added FR-015 (concurrent lock), FR-016 (SIGINT), FR-017 + (preflight), FR-018 (prune state cross-ref), FR-019 (env + passthrough) — all phases updated accordingly. + - `pull_policy: never` made explicit for local-built docker images. + - SSH progress notifications for transfers > 50 MB. +- **2026-05-08 (self-review pass 3)**: Applied 10-item third review. + - Windows port: split `internal/build/lock_{posix,windows}.go` so + `syscall.Flock` doesn't break windows/amd64 builds. + - Defense correction: `gradle_args` validation is now a flag-name + allowlist (was a char regex). New "Security boundary" subsection + enumerates accepted and rejected flags with rationale. + - `internal/build/pins/` package owns the embedded JSON; the root + file is a Makefile-generated symlink for ergonomics. + - New `internal/build/imagetag.go` validates `build.image_tag` + against Docker reference format. + - State node field renamed `build_revision` → `build_cache_key` (the + value is a cache key, not just a sha). + - Phase 5 prune for image artifacts uses `docker image rm` (not + `docker untag`) so layer storage is actually freed. + - Audit-log lifecycle is now two-phase: `in_progress` at start, + atomic update to terminal on completion. Crashed builds leave + forensic-visible entries. + - Pin-unreachable behavior specified: error, not silent fallback. + - Phase 1 estimate 3d → 4d; total 9-10d → 11-12d. + - Open question #4 (`build.env` allowlist) removed — resolved in pass 2. +- **2026-05-08 (self-review pass 2)**: Applied 12-item second review. + - **Security hardening**: dedicated "Security boundary" section; argv- + only subprocess invocation (no `bash -c`); FR-022 token regex on + gradle task + args; FR-019 narrows `build.env` to a fixed allowlist + (`GRADLE_OPTS|JAVA_OPTS|GRADLE_USER_HOME|MAVEN_OPTS|ORG_GRADLE_PROJECT_*`). + - **Cache correctness**: cache key now includes + `BuilderImageDigest` + `GradleArgs` (FR-002 pass 2). On-disk naming + `-b[+dirty-]`. Cache lookup also stats the + artifact (FR-020) so a manually-deleted JAR triggers a real rebuild. + - **Distribution**: `builder_image_digests.json` is now `go:embed`-ed + (FR-024); `--builder-image-override` is documented as escape hatch + only and participates in the cache key. + - **SIGINT extended to SSH** (FR-016): scp writes `.tmp` and renames; + cancellation tries best-effort remote cleanup. + - **Audit log shape**: FR-023 fixes a concrete event JSON for build + operations. + - **Source path resolution**: FR-021 distinguishes CLI (CWD) vs intent + (intent-file dir) relative-path resolution. + - **Preflight**: FR-017 builder-image probe is now offline-friendly + (warning on missing-and-offline, not hard fail). + - **New flags**: `--gradle-arg` repeatable (FR-001); cache dirty TTL + via `build.cache.dirty_ttl` or `--cache-dirty-ttl` (FR-025, default + 7d, `never` accepted). + - **Integration test**: build-tag gated test runs real + `eclipse-temurin:8-jdk` against a hello-world gradle project. + - Phase / total estimate unchanged at ~9-10 days; pass 2 fixes are + additions to existing phases, not new phases. diff --git a/specs/002-trond-build-pipeline/spec.md b/specs/002-trond-build-pipeline/spec.md new file mode 100644 index 00000000..e652b0c0 --- /dev/null +++ b/specs/002-trond-build-pipeline/spec.md @@ -0,0 +1,560 @@ +# Feature Specification: trond Build Pipeline + +**Feature Branch**: `feat/build-pipeline` +**Created**: 2026-05-08 +**Last revised**: 2026-05-08 (self-review pass — see CHANGELOG below) +**Status**: Draft +**Input**: User description: "Add a `trond build` capability that produces deployable +java-tron artifacts (JAR or Docker image) from a source tree, integrated with +`trond apply` so a developer can iterate on java-tron code and redeploy with a +single command." + +## Background + +trond today consumes pre-built java-tron artifacts: an official Docker image +(`tronprotocol/java-tron:`) or a pre-built JAR. The "edit java-tron code, +test the change on a node" loop is unsupported — operators must context-switch +to tron-docker / java-tron's Gradle toolchain to produce a custom artifact, +then hand-stitch the result back into a trond intent. + +This feature closes that loop. trond gains a `build` verb that orchestrates a +containerized Gradle invocation and an `apply`-side hook that resolves a +`build:` block in intent.yaml automatically. + +## Non-Goals + +trond will NOT implement Java compilation, dependency resolution, or the Gradle +DSL itself. The Java toolchain runs unchanged inside a container that trond +manages; trond is an orchestrator, not a re-implementation. + +trond will NOT replace tron-docker's release-grade image build (signed, +SBOM-attached, multi-arch matrix). That stays in tron-docker's CI. trond's +`build` targets the development inner loop. + +## Clarifications + +### Session 2026-05-08 + +- Q: Should the builder image be reproducible or follow upstream? → A: Pin a + specific sha256 digest per JDK version. Bump the pin in a trond release. +- Q: Keep a `--builder host` escape hatch for developers with local gradle? → + A: Yes — same flags, skips the container path. Low maintenance cost. +- Q: How should SSH targets handle the build step? → A: Build locally, scp the + resulting JAR to the remote target, start the node there. Don't ship source. +- Q: Is `trond build` a standalone command or only an apply-side phase? → + A: Both. Standalone for CI / debugging; apply-side for the integrated loop. + +### Session 2026-05-08 (self-review pass) + +- Q: Should `build.rebuild` field exist (`always | on_change | never`)? → + A: **Removed.** Semantics overlapped confusingly with revision = HEAD + + dirty detection. The cache-key derivation already handles the legitimate + cases; an explicit field added zero value. +- Q: Patch hash for dirty trees: just `git diff`? → A: **No.** `git diff` + misses untracked files, which would silently cache-hit a stale artifact. + Patch hash MUST combine `git diff` and `git status --porcelain -uall` + (so a brand-new `.java` file invalidates the cache). +- Q: Configurable gradle task name? → A: Yes, as `build.gradle_task` field + with sensible defaults (`shadowJar` for jar, `dockerBuild` for image). + +### Session 2026-05-08 (review pass 2) + +- Q: How to pass gradle_task / gradle_args without shell injection? → + A: Never invoke `bash -c`. Pass gradle task and args as separate + `exec.Command` argv; validate each token against + `^[a-zA-Z0-9._:=/+-]+$` at parse time and reject otherwise. +- Q: Should the cache key incorporate the builder image digest? → A: Yes. + Cache key becomes `-b` so a pin bump silently + invalidates stale artifacts. Same for `+dirty-` variants. +- Q: `build.env` arbitrary keys? → A: **No.** v1 is an allowlist: env + passthrough is restricted to `GRADLE_OPTS`, `JAVA_OPTS`, + `GRADLE_USER_HOME`, `MAVEN_OPTS`, and any var matching + `ORG_GRADLE_PROJECT_*`. Extending the list is a code change, not an + intent change. +- Q: How is `builder_image_digests.json` distributed? → A: Embedded into + the trond binary via `go:embed`. Pin bumps ship with releases. A + `--builder-image-override ` escape hatch exists for + emergencies but is not documented in the dev-loop quickstart. +- Q: Source path relative to what? → A: CLI `--source ./path` resolves + relative to CWD. `build.source: ./path` in intent resolves relative + to the intent file's directory (matches docker-compose `build.context`). +- Q: Should cache hit verify the artifact file exists on disk? → A: Yes. + Manifest existence is necessary but not sufficient; cache lookup MUST + stat the artifact and treat a missing file as a miss. +- Q: Should SIGINT cancel scp transfers too? → A: Yes. The whole apply + flow runs under the same signal-aware context. scp writes to a + `.tmp` suffix and renames on success so a cancelled transfer leaves + no half-written artifact on the remote. + +### Session 2026-05-08 (review pass 3) + +- Q: `gradle_args` validation by character class? → A: **No.** Character + regex was both too tight (rejects `,` in `--projects=a,b,c`, spaces in + `-Dtitle=my title`) and too loose (allows `--init-script + /tmp/evil.gradle`, which is the actual threat). Switched to a + **flag-name allowlist**: `--offline`, `--no-daemon`, `--parallel`, + `--max-workers=N`, `--rerun-tasks`, `-D=`, `-P=`, + `-q`/`-i`/`-d`. The value portion is unrestricted (argv form is + already shell-safe). `gradle_task` continues to use a tight char + regex (`^[a-zA-Z][a-zA-Z0-9:_-]*$`) since task names are inherently + regular. +- Q: Windows support for concurrent-build serialization? → A: trond + publishes a windows/amd64 binary, but `syscall.Flock` is POSIX-only. + Implementation split via `//go:build !windows` — Windows uses an + in-process mutex only (no cross-process protection). FR-015 caveat + records this. Doc says: concurrent `trond build` from two trond + processes is undefined on Windows. +- Q: `image_tag` format validation? → A: Validated against Docker + reference format. `image_tag: /etc/passwd` and similar must be + rejected with `VALIDATION_ERROR` at intent parse time. +- Q: `state.json` field naming — `build_revision` or `build_cache_key`? + → A: **`build_cache_key`.** The stored value is the full cache key + (`-b[+dirty-]`), not just a git revision. Renaming + pre-implementation so we don't ship the misleading name. +- Q: What if a pinned builder image digest becomes unreachable? → A: + Fail loudly. `error_code: "BUILDER_IMAGE_UNAVAILABLE"`, suggestions + point at `--builder-image-override` and "upgrade trond." Do NOT + silently fall back to unpinned tags — that defeats reproducibility. +- Q: Does `build prune` clean up docker image layer disk usage? → A: + Yes. For `--artifact image`, prune runs `docker image rm ` (not + just `docker untag`). Layers shared with other images stay because + docker handles refcounting. +- Q: Audit log atomicity around long-running builds? → A: Append a + `result: "in_progress"` event at build start; update atomically to + the terminal state on completion. A crash mid-build leaves an + `in_progress` entry visible via `trond events`, surfacing the + forensic signal. + +## User Scenarios & Testing + +### User Story 1 — Build a JAR from local source (Priority: P1) + +A java-tron developer has cloned the source, modified a few files, and wants a +fat JAR they can hand to `trond apply` or run separately. They invoke +`trond build` against the source directory and receive a JSON response +containing the artifact path, source revision, and content hash. + +**Why this priority**: This is the foundation. Until a single artifact can be +produced repeatably from a source tree, nothing else in this feature works. + +**Independent Test**: +```bash +trond build --source ./java-tron --artifact jar -o json +``` +Delivers a JAR file under `~/.trond/builds/out/` and a JSON manifest under +`~/.trond/builds/manifest/`. + +**Acceptance Scenarios**: + +1. **Given** a clean java-tron working tree at revision `abc123`, **When** the + developer runs `trond build --source ./java-tron --artifact jar -o json`, + **Then** trond pulls the pinned `eclipse-temurin:8-jdk` image (first run + only), runs `./gradlew shadowJar` inside it, and emits + `{"source_revision":"abc123", "artifact_path":"~/.trond/builds/out/abc123.jar", + "sha256":"...", "duration_ms":..., "cache_hit": false}`. + +2. **Given** the same revision built once already, **When** the developer + re-runs the same command, **Then** trond returns the cached manifest with + `cache_hit: true` and `duration_ms < 200`. The on-disk JAR is byte-identical. + +3. **Given** uncommitted edits in the working tree **and** an untracked new + file, **When** the developer runs the same command, **Then** trond + computes a patch hash that combines `git diff` and `git status --porcelain + -uall`, names the artifact `abc123+dirty-.jar`, and treats it as + a distinct cache entry. Adding an untracked file MUST change the patch + hash (regression guard against the v1 design bug found in self-review). + +4. **Given** the build fails (compile error in user's code), **When** the + command runs, **Then** trond surfaces a structured error envelope + (`error_code: "BUILD_FAILED"`, message containing gradle's tail output, + `suggestions` pointing at common causes) and exits 1. + +5. **Given** Docker is not running on the host, **When** the developer + invokes `trond build` without `--builder host`, **Then** trond exits with + `error_code: "TARGET_UNREACHABLE"`, exit code 3, and a suggestion to start + Docker or pass `--builder host`. + +6. **Given** the user sends SIGINT during a running build, **When** trond + receives the signal, **Then** trond kills the build container, removes + any partial output from `out/`, leaves no manifest entry for the aborted + build, and exits with `error_code: "BUILD_CANCELLED"`, exit code 130 + (standard for SIGINT). + +7. **Given** two concurrent `trond build` invocations against the same + source + revision, **When** they race, **Then** the second caller + acquires a per-key file lock, waits for the first to complete, and + returns a cache hit (no duplicate build). + +### User Story 2 — Build and deploy in one command (Priority: P1) + +A developer has an intent file that references a `build:` block instead of a +prebuilt image. They run `trond apply --intent dev.yaml`. trond resolves the +build first, then deploys a node against the freshly produced artifact. + +**Why this priority**: This is the dev loop trond is being extended for. Story +1 alone leaves the build/deploy boundary manual. + +**Independent Test**: +```bash +trond apply --intent examples/dev-local.yaml --auto-approve --wait -o json +``` +Delivers a running node whose runtime points at the just-built artifact. + +**Acceptance Scenarios**: + +1. **Given** an intent with `build.source: ./java-tron, build.revision: HEAD, + build.artifact: jar`, **When** `apply` runs and the revision has not been + built before, **Then** trond invokes the build pipeline, then proceeds to + `apply`. The JSON output contains both a `build` block and the usual `apply` + fields. + +2. **Given** the intent points at a build that's already cached, **When** + `apply` runs and the node is already deployed at that same revision, + **Then** the result is `no_change` and total duration is < 5 seconds. + +3. **Given** the build succeeds but the resulting JAR is not a valid + java-tron fat JAR (no `org.tron.program.FullNode` main class), **When** + `apply` reaches the runtime step, **Then** trond fails with `error_code: + "INVALID_ARTIFACT"`, NOT with a runtime crash inside the container. + +4. **Given** `runtime: docker` with `artifact: image`, **When** trond renders + the compose file, **Then** the service has `pull_policy: never` set, so + compose does not attempt to pull the locally-built tag from a remote + registry. + +### User Story 3 — Build a Docker image, not a JAR (Priority: P2) + +The same developer wants a runnable Docker image rather than a JAR (so the +node uses the docker runtime). They set `build.artifact: image` and a +`build.image_tag` in their intent. + +**Independent Test**: +```bash +trond build --source ./java-tron --artifact image --tag trond-dev:abc123 -o json +``` + +**Acceptance Scenarios**: + +1. **Given** a tron-docker-shaped source tree (gradle target `dockerBuild`), + **When** the build runs, **Then** trond invokes the gradle docker plugin + and produces a local image tagged `trond-dev:abc123`. The manifest records + the image ID (sha256 digest). + +2. **Given** the artifact is `image`, **When** `apply` runs, **Then** the + rendered docker-compose references the local image tag directly with + `pull_policy: never`. + +### User Story 4 — Deploy over SSH using a locally built JAR (Priority: P2) + +A developer builds on their laptop and deploys to a remote Linux VM via SSH. + +**Independent Test**: +```bash +trond apply --intent examples/dev-ssh.yaml -o json +``` +where the intent has both a `build:` block AND `target.type: ssh`. + +**Acceptance Scenarios**: + +1. **Given** a successful local build of `.jar`, **When** `apply` runs + against an SSH target, **Then** trond scp's the JAR to the remote host's + deployment directory, configures the systemd unit to point at it, and + starts the service. No source is shipped to the remote. + +2. **Given** the remote target already has the same `.jar` on disk (from + a prior run), **When** `apply` runs, **Then** trond skips the transfer + step and goes directly to the start phase. + +3. **Given** a slow link and a large fat JAR (~200 MB), **When** the + transfer starts, **Then** trond emits MCP progress notifications (same + mechanism `snapshot download` uses) so a connected agent surfaces a + live progress bar to the user. + +4. **Given** the SSH target host lacks `scp` (some hardened distros), **When** + preflight runs, **Then** trond reports a clear `error_code: + "TARGET_MISSING_TOOL"` with a suggestion to install openssh-clients on + the remote. + +### User Story 5 — Use host gradle (no container) (Priority: P3) + +A developer with a configured local Gradle daemon wants to skip the container +for max-iteration speed. + +**Acceptance Scenarios**: + +1. **Given** `--builder host` is passed (CLI) or `build.builder: host` is set + (intent), **When** the build runs, **Then** trond invokes `./gradlew` from + the source directory directly. JDK + Gradle version mismatches surface as + `BUILD_FAILED` with `suggestions` pointing at the required versions. + +### Edge Cases + +- **Source tree on a separate filesystem with weird permissions**: trond + mounts read-only so the build can't corrupt source. Gradle wrapper attempts + to write to `~/.gradle/caches` — trond redirects this via mount. +- **Out-of-disk during a build**: detected by gradle's own error; trond + surfaces with a disk-space suggestion. +- **Concurrent `trond build` calls on the same revision**: per-key + `flock`-based serialization (FR-015); second caller waits then returns cache + hit. +- **Builder image not present and offline**: surface clear network-error + message, suggest `--builder host` fallback. +- **Revision is `HEAD` but git working tree is not a git repo**: error + `error_code: "INVALID_SOURCE"` with suggestion to either commit or pass an + explicit `--source-id `. +- **Source path is a symlink**: resolve it before mounting; Docker's mount + resolves symlinks at mount-time, so trond canonicalizes the path first. +- **Untracked file added between builds**: the patch hash MUST include + untracked files (FR-002 explicit), preventing stale cache hits. + +## Requirements + +### Functional Requirements + +- **FR-001**: trond MUST expose a `trond build` cobra command accepting at + minimum `--source `, `--artifact `, `--revision `, + `--jdk `, `--builder `, `--tag ` (for image), + `--gradle-task ` (override default), `--gradle-arg ` + (repeatable; e.g. `--gradle-arg=--offline`). +- **FR-002**: The build cache MUST be content-addressed. The cache key + MUST combine: resolved git sha, builder image digest prefix (so a pin + bump silently invalidates stale artifacts), jdk version, artifact kind, + gradle task name, and, for dirty working trees, a patch hash computed + over the COMBINED output of `git diff` AND `git status --porcelain + -uall` (so untracked files invalidate cache). On-disk naming: + `-b[+dirty-].(jar|imgmeta)`. +- **FR-003**: Build outputs MUST live under `${TROND_STATE_DIR}/builds/` + (default `~/.trond/builds/`) and respect `--state-dir` / `TROND_STATE_DIR`. +- **FR-004**: Each completed build MUST produce a JSON manifest matching + `schemas/output/build.schema.json`. +- **FR-005**: The intent.yaml schema MUST accept a `build:` block that is + mutually exclusive with `image:` and references source + revision. When + `runtime: docker` + `artifact: image`, the rendered compose service MUST + carry `pull_policy: never`. `build.image_tag` MUST match Docker's + reference format (validated via `github.com/distribution/reference` or + equivalent regex); paths, whitespace, and uppercase are rejected with + `VALIDATION_ERROR`. +- **FR-006**: `trond apply` MUST resolve a `build:` block by invoking the + build pipeline before the render/deploy phases. +- **FR-007**: A build failure MUST set exit code 1 and `error_code: + "BUILD_FAILED"`, with the gradle stderr tail (last ~50 lines) in the + message. Output of a successful build MUST be silent on stdout in `-o + json` mode; verbose mode streams gradle output to stderr. +- **FR-008**: The default builder MUST be `docker`. The host builder MUST be + available as a flag override. +- **FR-009**: When the target is SSH, trond MUST build locally and transfer + the JAR via scp. Source code MUST NOT be shipped to the remote. Transfers + > 50 MB MUST emit MCP progress notifications. +- **FR-010**: Builds MUST be discoverable: `trond build list`, `trond build + prune --keep N`, `trond build inspect `. +- **FR-011**: trond MUST validate the produced artifact is structurally valid + (JAR contains `org.tron.program.FullNode` main class, or image has runnable + ENTRYPOINT) before declaring success. +- **FR-012**: The pinned builder image MUST be reproducible: trond ships a + `builder_image_digests.json` mapping `jdk_version → sha256:...`. Upgrading + the pin is a trond release change, not a runtime change. A `make + refresh-builder-pins` target MUST regenerate the file from current Temurin + tags so digest drift is a one-command bump. +- **FR-013**: All build-related operations MUST be exposed through MCP as + tools so AI agents can drive the dev loop. Tool annotations: + - `build`: `idempotentHint=true`, `destructiveHint=false` + - `build_list`, `build_inspect`: `readOnlyHint=true` + - `build_prune`: `destructiveHint=true` +- **FR-014**: The audit log MUST record each build invocation (revision, + duration, result) alongside the existing apply/upgrade events. +- **FR-015**: Concurrent `trond build` invocations against the same cache + key MUST serialize via a per-key `flock` on + `${TROND_STATE_DIR}/builds/locks/.lock`. The waiting caller MUST + return a cache hit after the first completes (not duplicate work). + On Windows (no POSIX `flock`), the implementation falls back to an + in-process mutex only; cross-process serialization is undefined and + documented as such. Goal: don't break the Windows build. +- **FR-016**: `trond build` AND the build phase of `trond apply` MUST run + under a signal-aware context. SIGINT MUST terminate the build container + AND any in-flight scp transfer, remove partial output (`out/*.tmp`, + remote `*.tmp` files), omit the manifest entry, and exit 130 with + `error_code: "BUILD_CANCELLED"`. +- **FR-017**: `trond preflight --intent ` MUST, when the intent + contains a `build:` block, additionally verify: (a) source path exists + and is a git repo, (b) docker is reachable (or host gradle exists with + `--builder host`), (c) the builder image is in the local cache (a + network-reachable warm-pull check runs only when the image is missing; + offline hosts get a warning, not an error), (d) for SSH targets, scp + is present on the remote. +- **FR-018**: `trond build prune` MUST cross-reference `state.json` and + refuse to delete any build whose cache key equals the + `build_cache_key` field of a currently-managed node. The state schema + gains an optional `build_cache_key` field on each node entry + (additive, MINOR bump). For `--artifact image` entries, prune MUST + call `docker image rm ` (not just `docker untag`) so layer + storage is actually released; docker's refcounting protects layers + shared with other tags. +- **FR-019**: The build environment MUST forward env vars to the build + container ONLY from a fixed allowlist: `GRADLE_OPTS`, `JAVA_OPTS`, + `GRADLE_USER_HOME`, `MAVEN_OPTS`, and any var matching the + `ORG_GRADLE_PROJECT_*` prefix. Intent-side `build.env: { KEY: VALUE }` + is also restricted to this allowlist; unknown keys MUST fail validation + with `VALIDATION_ERROR`. Extending the allowlist is a trond code + change, not an intent change. +- **FR-020**: Cache lookup MUST stat the manifest's referenced artifact + (jar file or local image tag) and treat a missing artifact as a cache + miss. Manifests pointing at missing artifacts MUST be removed during + the next prune. +- **FR-021**: Source path resolution: `--source ./path` on the CLI + resolves relative to CWD; `build.source: ./path` in intent.yaml + resolves relative to the intent file's parent directory (matching + docker-compose's `build.context` convention). +- **FR-022**: All gradle invocations MUST use a no-shell argv form + (`exec.Command("docker", "run", ..., image, "./gradlew", task, + args...)` — never `bash -c "..."`). Validation: + - `gradle_task` MUST match `^[a-zA-Z][a-zA-Z0-9:_-]*$` (task names + are inherently regular). Examples accepted: `shadowJar`, + `:dbfork:build`. Rejected: anything with whitespace, `;`, `$()`, + or path separators. + - `gradle_args` is restricted by a **flag-name allowlist**, not a + character class. Accepted: `--offline`, `--no-daemon`, `--parallel`, + `--max-workers=`, `--rerun-tasks`, `-D=`, + `-P=`, `-q` / `-i` / `-d`. The value portion of `-D`/`-P` + is unrestricted (argv form is already shell-safe). Any other + flag, including `--init-script`, `--include-build`, `--build-file`, + `--settings-file`, is rejected with `VALIDATION_ERROR` because they + can redirect the build to attacker-supplied logic. +- **FR-023**: Each build event in the audit log MUST conform to: + `{timestamp, command: "build", result: + "in_progress"|"success"|"failed"|"cancelled", build: + {source_revision, dirty: bool, jdk_version, artifact_kind, builder, + duration_ms, error_code: string|null}}`. Lifecycle: append a + `result: "in_progress"` event at build start, then update atomically + to the terminal result on completion. A trond process crash mid-build + leaves an `in_progress` entry visible via `trond events`, surfacing + the forensic signal. Schema is shared with the existing audit-log + shape. +- **FR-024**: `builder_image_digests.json` MUST be embedded into the + trond binary via `go:embed`. Runtime override via + `--builder-image-override ` is allowed but is an + escape hatch (not promoted in the dev-loop quickstart). Override values + participate in the cache key (FR-002) so they don't pollute pinned + caches. When the pinned digest is unreachable (image removed from + registry, network outage of the registry itself), trond MUST exit + with `error_code: "BUILDER_IMAGE_UNAVAILABLE"`, `exit_code: 3`, and + surface suggestions covering both `--builder-image-override ` + and "upgrade trond." trond MUST NOT silently fall back to an + unpinned tag — that defeats reproducibility. +- **FR-025**: The dirty-build cache TTL MUST be user-configurable via + `build.cache.dirty_ttl` in intent (or `--cache-dirty-ttl` on `build + prune`). Default 7 days. Accepted: any Go `time.ParseDuration` value + plus the literal `never`. + +### Key Entities + +- **Build**: A content-addressed compilation of a java-tron source tree. + Properties: source_revision (git sha), patch_hash (if dirty), + builder_image_digest (which JDK image produced this), jdk_version, + artifact_kind (jar|image), artifact_ref (path or image tag), + sha256/image_id, duration_ms, builder (docker|host), gradle_task, + gradle_args, created_at. +- **Source**: A reference to a java-tron checkout. Properties: path + (canonicalized), revision_spec (HEAD|branch|tag|sha), resolved_revision, + dirty_state (boolean), patch_hash (when dirty_state). +- **Builder Image Pin**: A frozen mapping `jdk_version → image@sha256:...` + bundled with each trond release. Refresh path: `make + refresh-builder-pins`. +- **State node entry** (existing, extended): adds optional + `build_cache_key: string` field (full cache key, not just git sha), + populated by apply when the deploy consumed a `build:` block. Used by + `build prune` to refuse deletion of in-use builds. + +### Success Criteria + +- **SC-001**: A developer with java-tron source on their laptop can run + `trond apply --intent dev.yaml` and reach a running node in < 5 minutes for + a cold build, < 1 minute for a cached build. +- **SC-002**: Re-running `trond apply` against an unchanged source tree + results in `no_change` and exits within 2 seconds. +- **SC-003**: A build can be triggered by an AI agent through MCP (the + `build` tool) and the agent can chain build → apply → status in one + conversation. +- **SC-004**: The first 10 trond users to try the dev-loop quickstart (US-1 + / US-2) complete it without manual intervention on the build step. + +## Out of Scope (For This Feature) + +- Multi-arch image build matrix (`linux/amd64,linux/arm64`). Recorded in + follow-up. The Phase 4 design notes the buildx hook point. +- Build provenance signing (cosign-signed artifacts at build time). The + release pipeline in tron-docker already does this; trond's dev builds are + intentionally unsigned. +- Builds against branches of dependencies (e.g., custom protobuf-java). +- Cross-source builds (combine multiple repos into one artifact). +- Remote-host builds (build *on* the SSH target). Out of scope per + clarification. +- Image artifact + SSH target combined. Use registry push for that case + (existing `image:` path). + +## Dependencies + +- This feature does NOT depend on the toolkit wrapper or analyze layer. +- The shadow-fork feature DOES depend on a working build pipeline (it needs + to produce a forked-state JAR/image). + +## CHANGELOG + +- **2026-05-08**: Initial draft. +- **2026-05-08 (self-review pass 1)**: Applied 17-item review. Material + changes: + - Removed ambiguous `build.rebuild: always|on_change|never` field. + - FR-002 patch hash now MUST include untracked files (regression bug + fix in design). + - FR-005 makes `pull_policy: never` explicit for local-built images. + - FR-015 (concurrent lock), FR-016 (SIGINT), FR-017 (preflight), + FR-018 (prune cross-ref state), FR-019 (env passthrough) all newly + added. + - US-1 gained acceptance scenarios 6-7 (SIGINT, concurrent). + - US-2 gained scenario 4 (pull_policy: never). + - US-4 gained scenarios 3-4 (progress, scp probe). + - FR-013 MCP tool annotations now explicit. +- **2026-05-08 (self-review pass 3)**: Applied 10-item third review. + - **Portability bug**: FR-015 now documents Windows fallback for + `flock` (in-process mutex only; cross-process serialization + undefined). Prevents windows/amd64 build break. + - **Defense correction**: FR-022 swaps the `gradle_args` char-regex + for a flag-name allowlist (`--init-script /tmp/evil.gradle` was + passing the old regex while legitimate `--projects=a,b,c` was + failing it). `gradle_task` keeps its tight char-regex since task + names are inherently regular. + - **Input validation**: FR-005 grows an `image_tag` reference-format + check (rejects `image_tag: /etc/passwd` etc.). + - **State naming**: `build_revision` field renamed to + `build_cache_key` — the stored value is the cache key + (`-b[+dirty-]`), not just a git sha. + - **Resilience**: FR-024 specifies behavior when a pinned digest + becomes unreachable — explicit error, no silent fallback to + unpinned tags. + - **Resource cleanup**: FR-018 prune now calls `docker image rm` + (not `docker untag`) so image layer storage is actually freed. + - **Audit lifecycle**: FR-023 introduces `result: "in_progress"` + appended at build start, atomically updated on completion. A + crashed build leaves an inspectable forensic entry. +- **2026-05-08 (self-review pass 2)**: Applied 12-item second review. + Material changes: + - **Security**: FR-022 forbids shell-mediated gradle invocations + (closes command-injection via `gradle_task` / `gradle_args`); FR-019 + narrows `build.env` from "any KEY" to a fixed allowlist (closes + `LD_PRELOAD`-style hijacks). Token regex added to `--gradle-task` + and `--gradle-arg`. + - **Correctness**: FR-002 cache key now includes builder image digest + (pin bump invalidates stale artifacts); FR-020 makes cache hit also + verify artifact file exists on disk. + - **Distribution**: FR-024 makes the pin file `go:embed`-ed so the + binary is the source of truth, with `--builder-image-override` as + documented escape hatch. + - **UX**: FR-021 disambiguates `source:` relative path resolution + (CLI = CWD, intent = intent-file dir). + - **Robustness**: FR-016 extends SIGINT handling to scp; uses + `.tmp` + rename so remote never sees half-written JARs. + FR-017 builder-image preflight is offline-friendly (warning on + missing-and-offline, not hard fail). + - **Audit/observability**: FR-023 fixes the audit-log build event + JSON shape so tooling can rely on it. + - **Configurability**: FR-001 grows `--gradle-arg ` repeatable; + FR-025 makes dirty-cache TTL user-tunable (default 7d).