chore(ci): defer benchmark changes to separate PR

spencer-tb · spencer-tb · commit bfafa909dcb2 · 2026-06-03T17:05:51.000+01:00
diff --git a/.github/configs/feature.yaml b/.github/configs/feature.yaml
@@ -11,7 +11,13 @@ tests:
 
 benchmark:
   evm-type: benchmark
-  fill-params: --fork=Amsterdam --gas-benchmark-values 1,10,30,60,100,150 ./tests/benchmark
+  fill-params: --fork=Osaka --generate-all-formats --gas-benchmark-values 1,5,10,30,60,100,150 ./tests/benchmark/compute --maxprocesses=30 --dist=worksteal
+  feature_only: true
+
+benchmark_fast:
+  evm-type: benchmark
+  fill-params: --fork=Osaka --generate-all-formats --gas-benchmark-values 100 ./tests/benchmark/compute
+  feature_only: true
 
 # Shared entry for all `<feat>-devnet` releases; matched by `-devnet` suffix.
 devnet:
diff --git a/.github/workflows/benchmark.yaml b/.github/workflows/benchmark.yaml
@@ -83,3 +83,18 @@ jobs:
 
   # TODO: Add execute remote tests with --gas-benchmark-values
   # TODO: Add execute remote tests with --fixed-opcode-count
+
+  build-artifact:
+    name: Build Benchmark Fixture Artifact
+    needs: [sanity-checks] # TODO: Add execute remote jobs when implemented
+    if: github.event_name == 'push'
+    runs-on: [self-hosted-ghr, size-gigachungus-x64]
+    timeout-minutes: 720
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        with:
+          submodules: true
+
+      - uses: ./.github/actions/build-fixtures
+        with:
+          release_name: benchmark_fast
diff --git a/packages/testing/src/execution_testing/cli/pytest_commands/plugins/filler/tests/test_benchmarking.py b/packages/testing/src/execution_testing/cli/pytest_commands/plugins/filler/tests/test_benchmarking.py
@@ -193,7 +193,7 @@ def test_benchmarking_mode_configured_with_option(
     )
 
     assert result.ret == 0
-    assert any("3 tests collected" in line for line in result.outlines)
+    assert any("6 tests collected" in line for line in result.outlines)
     # Check that the test names include the benchmark gas values
     assert any("benchmark-gas-value_10M" in line for line in result.outlines)
     assert any("benchmark-gas-value_20M" in line for line in result.outlines)
@@ -379,8 +379,8 @@ def test_benchmarking_mode_not_configured_without_option(
     )
 
     assert result.ret == 0
-    # Should generate normal test variants (1) without parametrization
-    assert any("1 test collected" in line for line in result.outlines)
+    # Should generate normal test variants (2) without parametrization
+    assert any("2 tests collected" in line for line in result.outlines)
     assert not any(
         "benchmark-gas-value_10M" in line for line in result.outlines
     )
@@ -523,7 +523,7 @@ def test_repricing_marker_with_kwargs_filters_parametrized_tests(
     ]
 
     # test_parametrized_with_repricing_kwargs should only have ADD variants
-    # (the blockchain_test fixture type)
+    # (multiple test types like blockchain_test and blockchain_test_engine)
     kwargs_test_lines = [
         line
         for line in collected_lines
@@ -863,29 +863,29 @@ def test_fixed_opcode_count_invalid_regex_raises_error() -> None:
 @pytest.mark.parametrize(
     "config_counts,expected_tests,expected_ids",
     [
-        pytest.param([1], 1, ["opcount_1"], id="single_int"),
+        pytest.param([1], 2, ["opcount_1"], id="single_int"),
         pytest.param(
             [1, 2, 3],
-            3,
+            6,
             ["opcount_1", "opcount_2", "opcount_3"],
             id="multiple_ints",
         ),
-        pytest.param([0.5], 1, ["opcount_0.5"], id="single_float"),
+        pytest.param([0.5], 2, ["opcount_0.5"], id="single_float"),
         pytest.param(
             [0.5, 1, 2],
-            3,
+            6,
             ["opcount_0.5", "opcount_1", "opcount_2"],
             id="multiple_floats",
         ),
         pytest.param(
             [1, 0.5, 2],
-            3,
+            6,
             ["opcount_1", "opcount_0.5", "opcount_2"],
             id="mixed_int_float",
         ),
         pytest.param(
             [1, 2, 3, 5],
-            4,
+            8,
             ["opcount_1", "opcount_2", "opcount_3", "opcount_5"],
             id="four_ints",
         ),
@@ -943,7 +943,7 @@ def test_fixed_opcode_count_config_file_parametrized(
     )
 
     assert result.ret == 0
-    # Check expected number of tests (1 test type * len(counts))
+    # Check expected number of tests (2 test types * len(counts))
     assert any(f"{expected_tests} passed" in line for line in result.outlines)
     # Check opcode count IDs are present
     for expected_id in expected_ids:
diff --git a/packages/testing/src/execution_testing/specs/benchmark.py b/packages/testing/src/execution_testing/specs/benchmark.py
@@ -31,6 +31,7 @@
     TransactionPost,
 )
 from execution_testing.fixtures import (
+    BlockchainEngineFixture,
     BlockchainEngineXFixture,
     BlockchainFixture,
     FixtureFormat,
@@ -315,6 +316,7 @@ class BenchmarkTest(BaseTest):
         Sequence[FixtureFormat | LabeledFixtureFormat]
     ] = [
         BlockchainFixture,
+        BlockchainEngineFixture,
         BlockchainEngineXFixture,
     ]
 
@@ -327,6 +329,9 @@ class BenchmarkTest(BaseTest):
     ]
 
     supported_markers: ClassVar[Dict[str, str]] = {
+        "blockchain_test_engine_only": (
+            "Only generate a blockchain test engine fixture"
+        ),
         "blockchain_test_only": "Only generate a blockchain test fixture",
         "repricing": "Mark test as reference test for gas repricing analysis",
     }
@@ -425,6 +430,8 @@ def discard_fixture_format_by_marks(
 
         if "blockchain_test_only" in [m.name for m in markers]:
             return fixture_format != BlockchainFixture
+        if "blockchain_test_engine_only" in [m.name for m in markers]:
+            return fixture_format != BlockchainEngineFixture
         return False
 
     def get_genesis_environment(self) -> Environment: