temp

LouisTsai-Csie · LouisTsai-Csie · commit e03b40fc1cc6 · 2026-04-09T20:47:44.000+08:00
diff --git a/packages/testing/src/execution_testing/__init__.py b/packages/testing/src/execution_testing/__init__.py
@@ -40,8 +40,12 @@
     Block,
     BlockchainTest,
     BlockchainTestFiller,
+    BlockVerification,
+    ColdSloadExpected,
     Header,
+    NoTraceErrors,
     OpcodeTarget,
+    ReceiptStatusExpected,
     StateTest,
     StateTestFiller,
     TransactionTest,
@@ -140,11 +144,13 @@
     "BlobsTest",
     "BlobsTestFiller",
     "Block",
+    "BlockVerification",
     "BlockAccessList",
     "BlockAccessListExpectation",
     "BlockchainTest",
     "BlockchainTestFiller",
     "BlockException",
+    "ColdSloadExpected",
     "Bytecode",
     "Bytes",
     "BytesConcatenation",
@@ -174,7 +180,9 @@
     "Macros",
     "MemoryVariable",
     "NetworkWrappedTransaction",
+    "NoTraceErrors",
     "OpcodeTarget",
+    "ReceiptStatusExpected",
     "Op",
     "Opcode",
     "OpcodeCallArg",
diff --git a/packages/testing/src/execution_testing/specs/__init__.py b/packages/testing/src/execution_testing/specs/__init__.py
@@ -1,6 +1,13 @@
 """Test spec definitions and utilities."""
 
-from .base import BaseTest, TestSpec
+from .base import (
+    BaseTest,
+    BlockVerification,
+    ColdSloadExpected,
+    NoTraceErrors,
+    ReceiptStatusExpected,
+    TestSpec,
+)
 from .base_static import BaseStaticTest
 from .benchmark import (
     BenchmarkTest,
@@ -27,6 +34,7 @@
 __all__ = (
     "BaseStaticTest",
     "BaseTest",
+    "BlockVerification",
     "BenchmarkTest",
     "BenchmarkTestFiller",
     "BenchmarkTestSpec",
@@ -39,8 +47,11 @@
     "BlockchainTestFiller",
     "BlockchainTestSpec",
     "Block",
+    "ColdSloadExpected",
     "Header",
+    "NoTraceErrors",
     "OpcodeTarget",
+    "ReceiptStatusExpected",
     "StateStaticTest",
     "StateTest",
     "StateTestFiller",
diff --git a/packages/testing/src/execution_testing/specs/base.py b/packages/testing/src/execution_testing/specs/base.py
@@ -2,7 +2,8 @@
 Base test class and helper functions for Ethereum state and blockchain tests.
 """
 
-from abc import abstractmethod
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
 from enum import StrEnum, unique
 from functools import reduce
 from typing import (
@@ -37,9 +38,6 @@
 from execution_testing.forks import Fork, TransitionFork
 from execution_testing.forks.base_fork import BaseFork
 from execution_testing.test_types import Environment, Withdrawal
-from execution_testing.test_types.receipt_types import (
-    TransactionReceipt,
-)
 
 
 class HashMismatchExceptionError(Exception):
@@ -99,6 +97,130 @@ class FillResult(BaseModel):
     metadata: Dict[str, Any] = Field(default_factory=dict)
 
 
+@dataclass
+class BlockVerification(ABC):
+    """
+    Base class for block-level verification rules.
+
+    Each rule inspects the transition tool result for a
+    single block and raises on failure. Add new rules by
+    subclassing and implementing ``verify``.
+    """
+
+    @abstractmethod
+    def verify(
+        self,
+        *,
+        result: Result,
+        block_number: int,
+    ) -> None:
+        """Verify the block result, raise on failure."""
+        ...
+
+
+@dataclass
+class NoTraceErrors(BlockVerification):
+    """
+    Verify that no trace line contains an error.
+
+    Catches silent subcall failures, out of gas,
+    invalid jumps, and stack errors.
+    """
+
+    def verify(
+        self,
+        *,
+        result: Result,
+        block_number: int,
+    ) -> None:
+        """Raise if any trace line has an error."""
+        if result.traces is None:
+            return
+        for tx_idx, tx in enumerate(result.traces.root):
+            for step, line in enumerate(tx.traces):
+                if line.error is not None:
+                    raise Exception(
+                        f"Trace error in block "
+                        f"{block_number}, "
+                        f"tx {tx_idx}, "
+                        f"step {step} "
+                        f"(pc={line.pc}, "
+                        f"op={line.op_name}, "
+                        f"depth={line.depth}): "
+                        f"{line.error}"
+                    )
+
+
+@dataclass
+class ReceiptStatusExpected(BlockVerification):
+    """
+    Verify all transaction receipts have the expected
+    status. Default expects success (status=1).
+
+    Catches silent OOG failures that roll back state
+    and invalidate benchmarks.
+    """
+
+    status: int = 1
+
+    def verify(
+        self,
+        *,
+        result: Result,
+        block_number: int,
+    ) -> None:
+        """Raise if any receipt status mismatches."""
+        for i, receipt in enumerate(result.receipts):
+            if receipt.status is not None and (
+                int(receipt.status) != self.status
+            ):
+                raise Exception(
+                    f"Transaction {i} in block "
+                    f"{block_number} has receipt "
+                    f"status {int(receipt.status)}, "
+                    f"expected {self.status}."
+                )
+
+
+@dataclass
+class ColdSloadExpected(BlockVerification):
+    """
+    Verify every SLOAD in the trace is a cold access.
+
+    Checks that SLOAD gas cost meets the minimum for
+    cold storage access (default 2100). Useful for
+    benchmarks measuring cold storage performance.
+    """
+
+    min_gas_cost: int = 2100
+
+    def verify(
+        self,
+        *,
+        result: Result,
+        block_number: int,
+    ) -> None:
+        """Raise if any SLOAD has warm-access gas cost."""
+        if result.traces is None:
+            return
+        for tx_idx, tx in enumerate(result.traces.root):
+            for step, line in enumerate(tx.traces):
+                if (
+                    line.op_name == "SLOAD"
+                    and line.gas_cost is not None
+                    and int(line.gas_cost) < self.min_gas_cost
+                ):
+                    raise Exception(
+                        f"Warm SLOAD in block "
+                        f"{block_number}, "
+                        f"tx {tx_idx}, step {step} "
+                        f"(pc={line.pc}, "
+                        f"gas_cost={line.gas_cost}, "
+                        f"expected >= "
+                        f"{self.min_gas_cost})"
+                    )
+
+
 class BaseTest(BaseModel):
     """
     Represents a base Ethereum test which must return a single test fixture.
@@ -115,7 +237,7 @@ class BaseTest(BaseModel):
     gas_optimization_max_gas_limit: int | None = None
     expected_benchmark_gas_used: int | None = None
     skip_gas_used_validation: bool = False
-    expected_receipt_status: int | None = None
+    verifications: List[BlockVerification] = Field(default_factory=list)
     is_tx_gas_heavy_test: bool = False
     is_exception_test: bool = False
 
@@ -295,32 +417,23 @@ def validate_benchmark_gas(
             f"{gas_benchmark_value}"
         )
 
-    def validate_receipt_status(
+    def run_block_verifications(
         self,
         *,
-        receipts: List[TransactionReceipt],
+        result: Result,
         block_number: int,
     ) -> None:
         """
-        Validate receipt status for every transaction in a block.
+        Run all block verification rules.
 
-        When expected_receipt_status is set, verify that all
-        receipts match. Catches silent OOG failures that roll
-        back state and invalidate benchmarks.
+        Dispatch every rule in ``self.verifications``
+        against the transition tool result for a block.
         """
-        if "expected_receipt_status" not in self.model_fields_set:
-            return
-        for i, receipt in enumerate(receipts):
-            if receipt.status is not None and (
-                int(receipt.status) != self.expected_receipt_status
-            ):
-                raise Exception(
-                    f"Transaction {i} in block "
-                    f"{block_number} has receipt "
-                    f"status {int(receipt.status)}, "
-                    f"expected "
-                    f"{self.expected_receipt_status}."
-                )
+        for v in self.verifications:
+            v.verify(
+                result=result,
+                block_number=block_number,
+            )
 
 
 TestSpec = Callable[[Fork], Generator[BaseTest, None, None]]
diff --git a/packages/testing/src/execution_testing/specs/benchmark.py b/packages/testing/src/execution_testing/specs/benchmark.py
@@ -41,7 +41,11 @@
 from execution_testing.test_types import Alloc, Environment, Transaction
 from execution_testing.vm import Bytecode, Op
 
-from .base import BaseTest, FillResult
+from .base import (
+    BaseTest,
+    BlockVerification,
+    FillResult,
+)
 from .blockchain import Block, BlockchainTest
 
 
@@ -307,6 +311,7 @@ class BenchmarkTest(BaseTest):
     fixed_opcode_count: float | None = None
     target_opcode: Op | OpcodeTarget | None = None
     code_generator: BenchmarkCodeGenerator | None = None
+    verifications: List[BlockVerification] = Field(default_factory=list)
     # By default, benchmark tests require neither of these
     include_full_post_state_in_output: bool = False
     include_tx_receipts_in_output: bool = False
diff --git a/packages/testing/src/execution_testing/specs/blockchain.py b/packages/testing/src/execution_testing/specs/blockchain.py
@@ -867,9 +867,9 @@ def make_fixture(
             if is_last_block and self.operation_mode == OpMode.BENCHMARKING:
                 benchmark_gas_used = int(built_block.result.gas_used)
                 benchmark_opcode_count = built_block.result.opcode_count
-            if built_block.result.receipts:
-                self.validate_receipt_status(
-                    receipts=built_block.result.receipts,
+            if block.exception is None:
+                self.run_block_verifications(
+                    result=built_block.result,
                     block_number=block_number,
                 )
             include_receipts = (
@@ -966,9 +966,9 @@ def make_hive_fixture(
             if is_last_block and self.operation_mode == OpMode.BENCHMARKING:
                 benchmark_gas_used = int(built_block.result.gas_used)
                 benchmark_opcode_count = built_block.result.opcode_count
-            if built_block.result.receipts:
-                self.validate_receipt_status(
-                    receipts=built_block.result.receipts,
+            if block.exception is None:
+                self.run_block_verifications(
+                    result=built_block.result,
                     block_number=block_number,
                 )
             fixture_payloads.append(
diff --git a/tests/benchmark/stateful/bloatnet/test_multi_opcode.py b/tests/benchmark/stateful/bloatnet/test_multi_opcode.py
@@ -17,6 +17,7 @@
     Create2PreimageLayout,
     Fork,
     Op,
+    ReceiptStatusExpected,
     Transaction,
     While,
 )
@@ -563,5 +564,5 @@ def test_mixed_sload_sstore(
         pre=pre,
         blocks=[Block(txs=txs)],
         skip_gas_used_validation=True,
-        expected_receipt_status=True,
+        verifications=[ReceiptStatusExpected()],
     )
diff --git a/tests/benchmark/stateful/bloatnet/test_single_opcode.py b/tests/benchmark/stateful/bloatnet/test_single_opcode.py
@@ -27,6 +27,7 @@
     IteratingBytecode,
     JumpLoopGenerator,
     Op,
+    ReceiptStatusExpected,
     SequentialAddressLayout,
     Storage,
     TestPhaseManager,
@@ -141,7 +142,7 @@ def run_bloated_eoa_benchmark(
         pre=pre,
         blocks=blocks,
         skip_gas_used_validation=True,
-        expected_receipt_status=True,
+        verifications=[ReceiptStatusExpected()],
     )
 
 
@@ -334,7 +335,7 @@ def test_sload_erc20_generic(
         pre=pre,
         blocks=blocks,
         skip_gas_used_validation=True,
-        expected_receipt_status=True,
+        verifications=[ReceiptStatusExpected()],
     )
 
 
@@ -640,7 +641,7 @@ def test_sstore_erc20_generic(
         pre=pre,
         blocks=blocks,
         skip_gas_used_validation=True,
-        expected_receipt_status=True,
+        verifications=[ReceiptStatusExpected()],
     )
 
 
@@ -2135,5 +2136,5 @@ def calldata(iteration_count: int, start_iteration: int) -> bytes:
         blocks=blocks,
         target_opcode=opcode,
         skip_gas_used_validation=True,
-        expected_receipt_status=1,
+        verifications=[ReceiptStatusExpected()],
     )

Original file line number	Diff line number	Diff line change
`@@ -17,6 +17,7 @@`
`17`	`17`	`Create2PreimageLayout,`
`18`	`18`	`Fork,`
`19`	`19`	`Op,`
	`20`	`+ ReceiptStatusExpected,`
`20`	`21`	`Transaction,`
`21`	`22`	`While,`
`22`	`23`	`)`
`@@ -563,5 +564,5 @@ def test_mixed_sload_sstore(`
`563`	`564`	`pre=pre,`
`564`	`565`	`blocks=[Block(txs=txs)],`
`565`	`566`	`skip_gas_used_validation=True,`
`566`		`- expected_receipt_status=True,`
	`567`	`+ verifications=[ReceiptStatusExpected()],`
`567`	`568`	`)`