Skip to content

Commit 0108d89

Browse files
committed
feat: add benchmark quality and gate why mode
1 parent 08b1c92 commit 0108d89

26 files changed

Lines changed: 1160 additions & 96 deletions

.github/workflows/release-dry-run.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,4 +53,5 @@ jobs:
5353
release-manifest.json
5454
charts/sheaft/**
5555
.tmp/smoke-examples/**
56+
.tmp/benchmark-slice/**
5657
if-no-files-found: error

.github/workflows/release.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,9 @@ jobs:
5151
- name: Package default config pack
5252
run: make default-config-pack APP_VERSION=${APP_VERSION#v}
5353

54+
- name: Run fixed benchmark slice
55+
run: make benchmark-slice BENCHMARK_OUT_DIR=dist/benchmark-slice
56+
5457
- name: Publish OCI image
5558
run: |
5659
OWNER_LC="$(printf '%s' "${GITHUB_REPOSITORY_OWNER}" | tr '[:upper:]' '[:lower:]')"
@@ -114,6 +117,9 @@ jobs:
114117
find dist/charts -maxdepth 1 -type f -name 'sheaft-*.tgz'
115118
} | sort
116119
)
120+
if [ -f dist/benchmark-slice/quality-report.json ]; then
121+
files+=("dist/benchmark-slice/quality-report.json")
122+
fi
117123
files+=("compatibility-manifest.json")
118124
files+=("release-manifest.json")
119125
gh release upload "${TAG}" "${files[@]}" --clobber

CHANGELOG.md

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,32 @@
11
# Changelog
22

3+
## v0.2.4 - 2026-05-31
4+
5+
Technical-preview feature release focused on trustable gate evidence, benchmark packaging, and explainable gate decisions for the `v0.2.x` line.
6+
7+
Included in this release:
8+
9+
- added a fixed Sheaft-on-Bering benchmark slice with `make benchmark-slice` and a versioned manifest under `benchmarks/fixed-slice`
10+
- added benchmark quality reporting with repeatability, confidence, advanced-metric availability, baseline-diff, contract, and cross-profile checks
11+
- added machine-readable gate decision reasons in `report.json` and a Why section in `summary.md`
12+
- added `sheaft gate --why` and `sheaft run --why` for human-readable gate explanations
13+
- added the benchmark manifest to the default config pack and the benchmark slice to release dry-run validation
14+
- refreshed roadmap, release tracking, and assumptions documentation after closing R4.1, R4.2, R4.3, and R9.1
15+
16+
Stable within the `v0.2.4` preview:
17+
18+
- the same strict `1.0.0` and `1.1.0` Bering contract acceptance introduced in `v0.2.0`
19+
- deterministic batch analysis, baseline comparison, and CI gate behavior from the `v0.2.0` line
20+
- release validation now includes the fixed benchmark slice and quality-report generation
21+
- gate decisions now carry threshold, aggregate, and assertion causes for release review
22+
23+
Still experimental in `v0.2.4`:
24+
25+
- long-running `serve` / `watch` service mode remains technical-preview surface
26+
- local `discover` helper
27+
- broader operator-facing packaging and operational conventions around image/chart deployment
28+
- benchmark scale and external quality datasets remain outside this repository
29+
330
## v0.2.3 - 2026-05-01
431

532
Technical-preview patch release focused on Kubernetes handoff reliability for the `v0.2.x` line.

Makefile

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,9 @@ BUILD_DATE ?= $(shell git log -1 --format=%cI 2>/dev/null || date -u +"%Y-%m-%dT
3030
endif
3131
DIST_DIR ?= dist
3232
PLATFORMS ?= linux/amd64,linux/arm64
33+
BENCHMARK_OUT_DIR ?= .tmp/benchmark-slice
3334

34-
.PHONY: help build test lint smoke-examples docker-build docker-run-sample sample compatibility-manifest validate-compatibility-manifest validate-chart default-config-pack release-tools release-build image-dry-run image-local chart-package chart-publish-local release-manifest validate-release-manifest validate-release-assets release-dry-run release-local clean clean-dist
35+
.PHONY: help build test lint smoke-examples benchmark-slice docker-build docker-run-sample sample compatibility-manifest validate-compatibility-manifest validate-chart default-config-pack release-tools release-build image-dry-run image-local chart-package chart-publish-local release-manifest validate-release-manifest validate-release-assets release-dry-run release-local clean clean-dist
3536

3637
ifeq ($(OS),Windows_NT)
3738
define MKDIR_P
@@ -57,6 +58,7 @@ help:
5758
@echo " test Run Go tests"
5859
@echo " lint Run go vet"
5960
@echo " smoke-examples Build the CLI and smoke checked-in examples"
61+
@echo " benchmark-slice Run the fixed Sheaft-on-Bering benchmark slice"
6062
@echo " docker-build Build the local container image"
6163
@echo " docker-run-sample Run sample pipeline in the container image"
6264
@echo " compatibility-manifest Generate compatibility-manifest.json from strict contract pins"
@@ -85,6 +87,9 @@ else
8587
"$(POSIX_SH)" scripts/ci/smoke-examples.sh .tmp/smoke-examples ./bin/sheaft$(EXEEXT)
8688
endif
8789

90+
benchmark-slice:
91+
go run ./cmd/releasectl benchmark-slice --manifest benchmarks/fixed-slice/manifest.json --out-dir $(BENCHMARK_OUT_DIR)
92+
8893
docker-build:
8994
docker build -f build/Dockerfile -t $(IMAGE) .
9095

@@ -156,7 +161,7 @@ validate-release-manifest:
156161

157162
validate-release-assets: validate-compatibility-manifest validate-chart validate-release-manifest
158163

159-
release-dry-run: compatibility-manifest test smoke-examples release-build default-config-pack image-dry-run chart-package release-manifest validate-release-assets
164+
release-dry-run: compatibility-manifest test smoke-examples benchmark-slice release-build default-config-pack image-dry-run chart-package release-manifest validate-release-assets
160165

161166
release-local: compatibility-manifest test smoke-examples release-build
162167
ifeq ($(OS),Windows_NT)

README.md

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
[![ci-template-smoke](https://img.shields.io/github/actions/workflow/status/MB3R-Lab/Sheaft/ci-template-smoke.yml?branch=main&label=ci-template-smoke)](https://github.com/MB3R-Lab/Sheaft/actions/workflows/ci-template-smoke.yml)
66
[![schema-contract](https://img.shields.io/github/actions/workflow/status/MB3R-Lab/Sheaft/schema-contract.yml?branch=main&label=schema-contract)](https://github.com/MB3R-Lab/Sheaft/actions/workflows/schema-contract.yml)
77
[![Go version](https://img.shields.io/github/go-mod/go-version/MB3R-Lab/Sheaft)](https://github.com/MB3R-Lab/Sheaft/blob/main/go.mod)
8-
[![Technical preview](https://img.shields.io/badge/preview-v0.2.3-orange)](https://github.com/MB3R-Lab/Sheaft/releases/tag/v0.2.3)
8+
[![Technical preview](https://img.shields.io/badge/preview-v0.2.4-orange)](https://github.com/MB3R-Lab/Sheaft/releases/tag/v0.2.4)
99
[![Bering support](https://img.shields.io/badge/Bering-1.0%20%7C%201.1-blue)](https://github.com/MB3R-Lab/Sheaft/blob/main/docs/compatibility-matrix.md)
1010

1111
Sheaft is a downstream resilience posture engine and CI/CD gate for model artifacts produced by Bering or another compatible upstream producer.
@@ -21,16 +21,18 @@ It stays downstream of topology discovery. The public surface in this repository
2121

2222
## Stability / Release Status
2323

24-
The current public release is `v0.2.3`. The `v0.2.x` line is an experimental public release and should be treated as a technical preview, not a stable GA release.
24+
The current public release is `v0.2.4`. The `v0.2.x` line is an experimental public release and should be treated as a technical preview, not a stable GA release.
2525

26-
Stable within the `v0.2.3` technical preview:
26+
Stable within the `v0.2.4` technical preview:
2727

2828
- strict acceptance of the baseline Bering contract line: `io.mb3r.bering.model@1.0.0` and `io.mb3r.bering.snapshot@1.0.0`
2929
- strict acceptance of the advanced Bering contract line: `io.mb3r.bering.model@1.1.0` and `io.mb3r.bering.snapshot@1.1.0`
3030
- batch CLI command names and core flow: `simulate`, `gate`, `run`
3131
- deterministic batch execution for a fixed seed and config
3232
- cross-line baseline comparison through `analysis.baselines`
3333
- additive advanced analysis when `1.1.0` metadata exists
34+
- gate decision reasons in `report.json`, `summary.md`, and `sheaft gate/run --why`
35+
- fixed benchmark slice and release-quality `quality-report.json` generation
3436
- release archives for Linux and macOS on `amd64` and `arm64`
3537

3638
Experimental in `v0.2.x`:
@@ -181,6 +183,7 @@ Sheaft is intentionally downstream of Bering artifacts and schemas.
181183
- `serve` and its watch loop are suitable for technical-preview evaluation, not yet for a stable long-term operational contract.
182184
- The richer analysis surface is available, but its configuration ergonomics and operational conventions may still change in later `0.x` releases.
183185
- Release automation is designed around GitHub Releases, release manifests, OCI image publication, and an OCI Helm chart; Windows release archives can be built, but they are not the primary tested surface in this preview.
186+
- Before using a Sheaft report as blocking release evidence, review the do-not-trust signal catalogue in [Assumptions and Limitations](docs/assumptions-and-limitations.md).
184187

185188
## Development
186189

@@ -191,6 +194,7 @@ make build
191194
make test
192195
make lint
193196
make smoke-examples
197+
make benchmark-slice
194198
```
195199

196200
Direct command equivalents:
@@ -211,6 +215,7 @@ go vet ./...
211215
- [Methodology](docs/methodology.md)
212216
- [Configuration and Schemas](docs/configuration.md)
213217
- [CI Gate](docs/ci-gate.md)
218+
- [Fixed Benchmark Slice](docs/benchmark-slice.md)
214219
- [Consumer Semantics v1](docs/consumer-semantics-v1.md)
215220
- [Consumer Semantics v2](docs/consumer-semantics-v2.md)
216221
- [Versioning](VERSIONING.md)

RELEASING.md

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ A successful tagged release publishes:
4545
- `compatibility-manifest.json`
4646
- `release-manifest.json`
4747
- default config pack archive
48+
- fixed benchmark quality report
4849
- versioned GitHub release notes from `release/<tag>.md`, when present
4950

5051
## Local Validation
@@ -82,12 +83,13 @@ On tag push it:
8283

8384
1. runs tests;
8485
2. runs smoke checks against checked-in examples;
85-
3. builds release archives with GoReleaser;
86-
4. publishes the OCI image;
87-
5. publishes the OCI Helm chart;
88-
6. generates `release-manifest.json`;
89-
7. creates or updates the GitHub Release using `release/<tag>.md` when available;
90-
8. uploads the canonical payload to the GitHub Release.
86+
3. runs the fixed benchmark slice;
87+
4. builds release archives with GoReleaser;
88+
5. publishes the OCI image;
89+
6. publishes the OCI Helm chart;
90+
7. generates `release-manifest.json`;
91+
8. creates or updates the GitHub Release using `release/<tag>.md` when available;
92+
9. uploads the canonical payload to the GitHub Release.
9193

9294
## Generic CI Reuse
9395

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
{
2+
"$schema": "https://json-schema.org/draft/2020-12/schema",
3+
"$id": "https://github.com/MB3R-Lab/Sheaft/api/schema/benchmark-quality.schema.json",
4+
"title": "SheaftBenchmarkQualityReport",
5+
"type": "object",
6+
"required": [
7+
"schema_version",
8+
"benchmark_name",
9+
"status",
10+
"generated_at",
11+
"inputs",
12+
"outputs",
13+
"metrics",
14+
"checks"
15+
],
16+
"properties": {
17+
"schema_version": {
18+
"type": "string"
19+
},
20+
"benchmark_name": {
21+
"type": "string"
22+
},
23+
"status": {
24+
"type": "string",
25+
"enum": [
26+
"pass",
27+
"fail"
28+
]
29+
},
30+
"generated_at": {
31+
"type": "string"
32+
},
33+
"inputs": {
34+
"type": "object",
35+
"required": [
36+
"artifact",
37+
"analysis"
38+
],
39+
"properties": {
40+
"artifact": {
41+
"type": "string"
42+
},
43+
"analysis": {
44+
"type": "string"
45+
}
46+
}
47+
},
48+
"outputs": {
49+
"type": "object",
50+
"required": [
51+
"model",
52+
"report",
53+
"summary",
54+
"quality_report"
55+
],
56+
"properties": {
57+
"model": {
58+
"type": "string"
59+
},
60+
"report": {
61+
"type": "string"
62+
},
63+
"summary": {
64+
"type": "string"
65+
},
66+
"quality_report": {
67+
"type": "string"
68+
}
69+
}
70+
},
71+
"metrics": {
72+
"type": "object",
73+
"required": [
74+
"decision",
75+
"profile_count",
76+
"confidence",
77+
"cross_profile_weighted_availability",
78+
"risk_score",
79+
"unavailable_advanced_metrics",
80+
"baseline_diff_count",
81+
"stable_report_sha256",
82+
"repeat_stable_report_sha256"
83+
],
84+
"properties": {
85+
"decision": {
86+
"type": "string"
87+
},
88+
"profile_count": {
89+
"type": "integer"
90+
},
91+
"confidence": {
92+
"type": "number"
93+
},
94+
"cross_profile_weighted_availability": {
95+
"type": "number"
96+
},
97+
"risk_score": {
98+
"type": "number"
99+
},
100+
"unavailable_advanced_metrics": {
101+
"type": "integer"
102+
},
103+
"baseline_diff_count": {
104+
"type": "integer"
105+
},
106+
"stable_report_sha256": {
107+
"type": "string"
108+
},
109+
"repeat_stable_report_sha256": {
110+
"type": "string"
111+
}
112+
}
113+
},
114+
"checks": {
115+
"type": "array",
116+
"items": {
117+
"type": "object",
118+
"required": [
119+
"id",
120+
"status",
121+
"expected",
122+
"actual",
123+
"message"
124+
],
125+
"properties": {
126+
"id": {
127+
"type": "string"
128+
},
129+
"status": {
130+
"type": "string",
131+
"enum": [
132+
"pass",
133+
"fail"
134+
]
135+
},
136+
"expected": {
137+
"type": "string"
138+
},
139+
"actual": {
140+
"type": "string"
141+
},
142+
"message": {
143+
"type": "string"
144+
}
145+
}
146+
}
147+
}
148+
}
149+
}

0 commit comments

Comments
 (0)