feat(ci): overhaul benchmark releases

spencer-tb · spencer-tb · commit 1a8f05608d26 · 2026-06-04T12:16:47.000+01:00
diff --git a/.github/configs/feature.yaml b/.github/configs/feature.yaml
@@ -11,13 +11,7 @@ tests:
 
 benchmark:
   evm-type: benchmark
-  fill-params: --fork=Osaka --generate-all-formats --gas-benchmark-values 1,5,10,30,60,100,150 ./tests/benchmark/compute --maxprocesses=30 --dist=worksteal
-  feature_only: true
-
-benchmark_fast:
-  evm-type: benchmark
-  fill-params: --fork=Osaka --generate-all-formats --gas-benchmark-values 100 ./tests/benchmark/compute
-  feature_only: true
+  fill-params: --fork=Amsterdam --gas-benchmark-values 1,10,30,60,100,150 ./tests/benchmark
 
 # Shared entry for all `<feat>-devnet` releases; matched by `-devnet` suffix.
 devnet:
diff --git a/.github/workflows/benchmark.yaml b/.github/workflows/benchmark.yaml
@@ -83,18 +83,3 @@ jobs:
 
   # TODO: Add execute remote tests with --gas-benchmark-values
   # TODO: Add execute remote tests with --fixed-opcode-count
-
-  build-artifact:
-    name: Build Benchmark Fixture Artifact
-    needs: [sanity-checks] # TODO: Add execute remote jobs when implemented
-    if: github.event_name == 'push'
-    runs-on: [self-hosted-ghr, size-gigachungus-x64]
-    timeout-minutes: 720
-    steps:
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
-        with:
-          submodules: true
-
-      - uses: ./.github/actions/build-fixtures
-        with:
-          release_name: benchmark_fast
diff --git a/packages/testing/src/execution_testing/cli/pytest_commands/plugins/filler/tests/test_benchmarking.py b/packages/testing/src/execution_testing/cli/pytest_commands/plugins/filler/tests/test_benchmarking.py
@@ -193,7 +193,7 @@ def test_benchmarking_mode_configured_with_option(
     )
 
     assert result.ret == 0
-    assert any("6 tests collected" in line for line in result.outlines)
+    assert any("3 tests collected" in line for line in result.outlines)
     # Check that the test names include the benchmark gas values
     assert any("benchmark-gas-value_10M" in line for line in result.outlines)
     assert any("benchmark-gas-value_20M" in line for line in result.outlines)
@@ -379,8 +379,8 @@ def test_benchmarking_mode_not_configured_without_option(
     )
 
     assert result.ret == 0
-    # Should generate normal test variants (2) without parametrization
-    assert any("2 tests collected" in line for line in result.outlines)
+    # Should generate normal test variants (1) without parametrization
+    assert any("1 test collected" in line for line in result.outlines)
     assert not any(
         "benchmark-gas-value_10M" in line for line in result.outlines
     )
@@ -523,7 +523,7 @@ def test_repricing_marker_with_kwargs_filters_parametrized_tests(
     ]
 
     # test_parametrized_with_repricing_kwargs should only have ADD variants
-    # (multiple test types like blockchain_test and blockchain_test_engine)
+    # (the blockchain_test fixture type)
     kwargs_test_lines = [
         line
         for line in collected_lines
@@ -863,29 +863,29 @@ def test_fixed_opcode_count_invalid_regex_raises_error() -> None:
 @pytest.mark.parametrize(
     "config_counts,expected_tests,expected_ids",
     [
-        pytest.param([1], 2, ["opcount_1"], id="single_int"),
+        pytest.param([1], 1, ["opcount_1"], id="single_int"),
         pytest.param(
             [1, 2, 3],
-            6,
+            3,
             ["opcount_1", "opcount_2", "opcount_3"],
             id="multiple_ints",
         ),
-        pytest.param([0.5], 2, ["opcount_0.5"], id="single_float"),
+        pytest.param([0.5], 1, ["opcount_0.5"], id="single_float"),
         pytest.param(
             [0.5, 1, 2],
-            6,
+            3,
             ["opcount_0.5", "opcount_1", "opcount_2"],
             id="multiple_floats",
         ),
         pytest.param(
             [1, 0.5, 2],
-            6,
+            3,
             ["opcount_1", "opcount_0.5", "opcount_2"],
             id="mixed_int_float",
         ),
         pytest.param(
             [1, 2, 3, 5],
-            8,
+            4,
             ["opcount_1", "opcount_2", "opcount_3", "opcount_5"],
             id="four_ints",
         ),
@@ -943,7 +943,7 @@ def test_fixed_opcode_count_config_file_parametrized(
     )
 
     assert result.ret == 0
-    # Check expected number of tests (2 test types * len(counts))
+    # Check expected number of tests (1 test type * len(counts))
     assert any(f"{expected_tests} passed" in line for line in result.outlines)
     # Check opcode count IDs are present
     for expected_id in expected_ids:
diff --git a/packages/testing/src/execution_testing/specs/benchmark.py b/packages/testing/src/execution_testing/specs/benchmark.py
@@ -31,7 +31,6 @@
     TransactionPost,
 )
 from execution_testing.fixtures import (
-    BlockchainEngineFixture,
     BlockchainEngineXFixture,
     BlockchainFixture,
     FixtureFormat,
@@ -316,7 +315,6 @@ class BenchmarkTest(BaseTest):
         Sequence[FixtureFormat | LabeledFixtureFormat]
     ] = [
         BlockchainFixture,
-        BlockchainEngineFixture,
         BlockchainEngineXFixture,
     ]
 
@@ -329,9 +327,6 @@ class BenchmarkTest(BaseTest):
     ]
 
     supported_markers: ClassVar[Dict[str, str]] = {
-        "blockchain_test_engine_only": (
-            "Only generate a blockchain test engine fixture"
-        ),
         "blockchain_test_only": "Only generate a blockchain test fixture",
         "repricing": "Mark test as reference test for gas repricing analysis",
     }
@@ -430,8 +425,6 @@ def discard_fixture_format_by_marks(
 
         if "blockchain_test_only" in [m.name for m in markers]:
             return fixture_format != BlockchainFixture
-        if "blockchain_test_engine_only" in [m.name for m in markers]:
-            return fixture_format != BlockchainEngineFixture
         return False
 
     def get_genesis_environment(self) -> Environment: