runtimeverification · automergerpr-permission-manager · Apr 17, 2025 · Apr 16, 2025 · Apr 16, 2025 · Apr 16, 2025
diff --git a/.github/workflows/test-pr.yml b/.github/workflows/test-pr.yml
@@ -72,6 +72,8 @@ jobs:
         run: docker exec -u github-user kevm-ci-concrete-${{ github.sha }} /bin/bash -c 'make test-integration'
       - name: 'Test conformance'
         run: docker exec -u github-user kevm-ci-concrete-${{ github.sha }} /bin/bash -c 'make test-conformance'
+      - name: 'Test execution-spec-tests'
+        run: docker exec -u github-user kevm-ci-concrete-${{ github.sha }} /bin/bash -c 'make test-fixtures'
       - name: 'Test llvm krun'
         run: docker exec -u github-user kevm-ci-concrete-${{ github.sha }} /bin/bash -c 'make test-interactive'
       - name: 'Tear down Docker'

diff --git a/.gitignore b/.gitignore
@@ -17,6 +17,7 @@
 /tests/specs/opcodes/evm-optimizations-spec.md
 /tests/specs/**/*.prove.out
 /tests/specs/**/*.sol.json
+/tests/execution-spec-tests/fixtures
 /tests/vm/*.out
 .DS_Store
 .idea/

diff --git a/Makefile b/Makefile
@@ -15,7 +15,7 @@ POETRY       := poetry -C $(KEVM_PYK_DIR)
 POETRY_RUN   := $(POETRY) run --
 
 
-.PHONY: poetry-env
+.PHONY: poetry-env download-json-fixtures
 poetry-env:
 	$(POETRY) env use --no-cache $(PYTHON_BIN)
 
@@ -51,6 +51,24 @@ conformance-failing-list: poetry
 		sed -i '1{/^[[:space:]]*$$/d;}' tests/failing.llvm ;\
 	fi
 
+download-json-fixtures:
+	rm -rf tests/execution-spec-tests/fixtures
+	cd tests/execution-spec-tests && bash get_execution_spec_tests.sh
+
+test-fixtures: poetry download-json-fixtures
+	$(MAKE) -C kevm-pyk/ test-integration PYTEST_ARGS+="-k test_execution_spec_tests.py"
+
+fixtures-failing-list: poetry download-json-fixtures
+	cat /dev/null > tests/ethereum-sepc-tests/failing.llvm
+	- $(MAKE) -C kevm-pyk/ test-integration PYTEST_ARGS+="-k test_execution_spec_tests.py --save-failing --maxfail=10000"
+	LC_ALL=en_US.UTF-8 sort -f -d -o tests/execution-spec-tests/failing.llvm tests/execution-spec-tests/failing.llvm
+	if [ "$(shell uname)" = "Darwin" ]; then \
+		sed -i '' '1{/^[[:space:]]*$$/d;}' tests/ethereum-sepc-tests/failing.llvm ;\
+		echo >> tests/ethereum-sepc-tests/failing.llvm ;\
+	else \
+		sed -i '1{/^[[:space:]]*$$/d;}' tests/ethereum-sepc-tests/failing.llvm ;\
+	fi
+
 test-vm: poetry
 	$(MAKE) -C kevm-pyk/ test-integration PYTEST_ARGS+="-k test_vm"
 

diff --git a/kevm-pyk/src/kevm_pyk/gst_to_kore.py b/kevm-pyk/src/kevm_pyk/gst_to_kore.py
@@ -38,6 +38,7 @@
         'chainname',
         'lastblockhash',
         'hasBigInt',
+        'config',
     ]
 )
 _GST_LOAD_KEYS: Final = frozenset(

diff --git a/kevm-pyk/src/kevm_pyk/interpreter.py b/kevm-pyk/src/kevm_pyk/interpreter.py
@@ -15,5 +15,8 @@
 
 def interpret(gst_data: Any, schedule: str, mode: str, chainid: int, usegas: bool, *, check: bool = True) -> Pattern:
     """Interpret the given GST data using the LLVM backend."""
+    if 'config' in gst_data.keys():
+        schedule = gst_data['config']['network'].upper()
+        chainid = int(gst_data['config']['network'], 16)
     init_kore = gst_to_kore(filter_gst_keys(gst_data), schedule, mode, chainid, usegas)
     return llvm_interpret(kdist.get('evm-semantics.llvm'), init_kore, check=check)
diff --git a/kevm-pyk/src/tests/integration/test_conformance.py b/kevm-pyk/src/tests/integration/test_conformance.py
@@ -1,27 +1,17 @@
 from __future__ import annotations
 
-import csv
-import json
 import logging
 import sys
-from pathlib import Path
 from typing import TYPE_CHECKING
 
 import pytest
-from pyk.kdist import kdist
-from pyk.kore.prelude import int_dv
-from pyk.kore.syntax import App
-from pyk.kore.tools import PrintOutput, kore_print
 
-from kevm_pyk.interpreter import interpret
-
-from ..utils import REPO_ROOT
+from ..utils import REPO_ROOT, _skipped_tests, _test
 
 if TYPE_CHECKING:
+    from pathlib import Path
     from typing import Final
 
-    from pyk.kore.syntax import Pattern
-
 
 _LOGGER: Final = logging.getLogger(__name__)
 
@@ -34,76 +24,12 @@
 SLOW_TESTS_FILE: Final = REPO_ROOT / 'tests/slow.llvm'
 
 
-def _test(gst_file: Path, *, schedule: str, mode: str, usegas: bool, save_failing: bool) -> None:
-    skipped_gst_tests = SKIPPED_TESTS.get(gst_file, [])
-    if '*' in skipped_gst_tests:
-        pytest.skip()
-
-    failing_tests: list[str] = []
-    gst_file_relative_path: Final[str] = str(gst_file.relative_to(TEST_DIR))
-    chainid = 0 if gst_file_relative_path in TEST_FILES_WITH_CID_0 else 1
-
-    with gst_file.open() as f:
-        gst_data = json.load(f)
-
-    for test_name, test in gst_data.items():
-        _LOGGER.info(f'Running test: {gst_file} - {test_name}')
-        if test_name in skipped_gst_tests:
-            continue
-        res = interpret({test_name: test}, schedule, mode, chainid, usegas, check=False)
-
-        try:
-            _assert_exit_code_zero(res)
-        except AssertionError:
-            if not save_failing:
-                raise
-            failing_tests.append(test_name)
-
-    if not failing_tests:
-        return
-    if save_failing:
-        with FAILING_TESTS_FILE.open('a', newline='') as ff:
-            writer = csv.writer(ff)
-            if len(failing_tests) == len(gst_data):
-                writer.writerow([gst_file_relative_path, '*'])
-            else:
-                for test_name in sorted(failing_tests):
-                    writer.writerow([gst_file_relative_path, test_name])
-    raise AssertionError(f'Found failing tests in GST file {gst_file_relative_path}: {failing_tests}')
-
-
-def _assert_exit_code_zero(pattern: Pattern) -> None:
-    assert type(pattern) is App
-    kevm_cell = pattern.args[0]
-    assert type(kevm_cell) is App
-    exit_code_cell = kevm_cell.args[1]
-    assert type(exit_code_cell) is App
-
-    exit_code = exit_code_cell.args[0]
-    if exit_code == int_dv(0):
-        return
-
-    pretty = kore_print(pattern, definition_dir=kdist.get('evm-semantics.llvm'), output=PrintOutput.PRETTY)
-    assert pretty == GOLDEN
-
-
-def _skipped_tests() -> dict[Path, list[str]]:
-    slow_tests = read_csv_file(SLOW_TESTS_FILE)
-    failing_tests = read_csv_file(FAILING_TESTS_FILE)
-    skipped: dict[Path, list[str]] = {}
-    for test_file, test in slow_tests + failing_tests:
-        test_file = TEST_DIR / test_file
-        skipped.setdefault(test_file, []).append(test)
-    return skipped
-
+SKIPPED_TESTS: Final = _skipped_tests(TEST_DIR, SLOW_TESTS_FILE, FAILING_TESTS_FILE)
 
-def read_csv_file(csv_file: Path) -> tuple[tuple[Path, str], ...]:
-    with csv_file.open(newline='') as file:
-        reader = csv.reader(file)
-        return tuple((Path(row[0]), row[1]) for row in reader)
 
+def compute_chain_id(gst_file: str) -> int:
+    return 0 if gst_file in TEST_FILES_WITH_CID_0 else 1
 
-SKIPPED_TESTS: Final = _skipped_tests()
 
 VM_TEST_DIR: Final = TEST_DIR / 'BlockchainTests/GeneralStateTests/VMTests'
 VM_TESTS: Final = tuple(VM_TEST_DIR.glob('*/*.json'))
@@ -116,7 +42,17 @@ def read_csv_file(csv_file: Path) -> tuple[tuple[Path, str], ...]:
     ids=[str(test_file.relative_to(VM_TEST_DIR)) for test_file in VM_TESTS],
 )
 def test_vm(test_file: Path, save_failing: bool) -> None:
-    _test(test_file, schedule='DEFAULT', mode='VMTESTS', usegas=True, save_failing=save_failing)
+    _test(
+        test_file,
+        schedule='DEFAULT',
+        mode='VMTESTS',
+        usegas=True,
+        save_failing=save_failing,
+        compute_chain_id=compute_chain_id,
+        skipped_tests=SKIPPED_TESTS,
+        test_dir=TEST_DIR,
+        failing_tests_file=FAILING_TESTS_FILE,
+    )
 
 
 @pytest.mark.skip(reason='failing / slow VM tests')
@@ -126,7 +62,17 @@ def test_vm(test_file: Path, save_failing: bool) -> None:
     ids=[str(test_file.relative_to(VM_TEST_DIR)) for test_file in SKIPPED_VM_TESTS],
 )
 def test_rest_vm(test_file: Path, save_failing: bool) -> None:
-    _test(test_file, schedule='DEFAULT', mode='VMTESTS', usegas=True, save_failing=save_failing)
+    _test(
+        test_file,
+        schedule='DEFAULT',
+        mode='VMTESTS',
+        usegas=True,
+        save_failing=save_failing,
+        compute_chain_id=compute_chain_id,
+        skipped_tests=SKIPPED_TESTS,
+        test_dir=TEST_DIR,
+        failing_tests_file=FAILING_TESTS_FILE,
+    )
 
 
 ALL_TEST_DIR: Final = TEST_DIR / 'BlockchainTests/GeneralStateTests'
@@ -141,7 +87,17 @@ def test_rest_vm(test_file: Path, save_failing: bool) -> None:
     ids=[str(test_file.relative_to(ALL_TEST_DIR)) for test_file in BCHAIN_TESTS],
 )
 def test_bchain(test_file: Path, save_failing: bool) -> None:
-    _test(test_file, schedule='CANCUN', mode='NORMAL', usegas=True, save_failing=save_failing)
+    _test(
+        test_file,
+        schedule='CANCUN',
+        mode='NORMAL',
+        usegas=True,
+        save_failing=save_failing,
+        compute_chain_id=compute_chain_id,
+        skipped_tests=SKIPPED_TESTS,
+        test_dir=TEST_DIR,
+        failing_tests_file=FAILING_TESTS_FILE,
+    )
 
 
 @pytest.mark.skip(reason='failing / slow blockchain tests')
@@ -151,4 +107,14 @@ def test_bchain(test_file: Path, save_failing: bool) -> None:
     ids=[str(test_file.relative_to(ALL_TEST_DIR)) for test_file in SKIPPED_BCHAIN_TESTS],
 )
 def test_rest_bchain(test_file: Path, save_failing: bool) -> None:
-    _test(test_file, schedule='CANCUN', mode='NORMAL', usegas=True, save_failing=save_failing)
+    _test(
+        test_file,
+        schedule='CANCUN',
+        mode='NORMAL',
+        usegas=True,
+        save_failing=save_failing,
+        compute_chain_id=compute_chain_id,
+        skipped_tests=SKIPPED_TESTS,
+        test_dir=TEST_DIR,
+        failing_tests_file=FAILING_TESTS_FILE,
+    )
diff --git a/kevm-pyk/src/tests/integration/test_execution_spec_tests.py b/kevm-pyk/src/tests/integration/test_execution_spec_tests.py
@@ -0,0 +1,121 @@
+from __future__ import annotations
+
+import logging
+import sys
+from typing import TYPE_CHECKING
+
+import pytest
+
+from ..utils import REPO_ROOT, _skipped_tests, _test
+
+if TYPE_CHECKING:
+    from pathlib import Path
+    from typing import Final
+
+
+_LOGGER: Final = logging.getLogger(__name__)
+
+sys.setrecursionlimit(10**8)
+
+WORK_DIR: Final = REPO_ROOT / 'tests/execution-spec-tests'
+TEST_DIR: Final = WORK_DIR / 'fixtures'
+FAILING_TESTS_FILE: Final = WORK_DIR / 'failing.llvm'
+SLOW_TESTS_FILE: Final = WORK_DIR / 'slow.llvm'
+
+SKIPPED_TESTS: Final = _skipped_tests(TEST_DIR, SLOW_TESTS_FILE, FAILING_TESTS_FILE)
+
+
+BCHAIN_TEST_DIR: Final = TEST_DIR / 'blockchain_tests'
+BCHAIN_TESTS: Final = tuple(BCHAIN_TEST_DIR.rglob('**/*.json'))
+
+
+def chain_id_always_one(_file: str) -> int:
+    return 1
+
+
+@pytest.mark.parametrize(
+    'test_file',
+    BCHAIN_TESTS,
+    ids=[str(test_file.relative_to(BCHAIN_TEST_DIR)) for test_file in BCHAIN_TESTS],
+)
+def test_bchain(test_file: Path, save_failing: bool) -> None:
+    _test(
+        test_file,
+        schedule='CANCUN',
+        mode='NORMAL',
+        usegas=True,
+        save_failing=save_failing,
+        compute_chain_id=chain_id_always_one,
+        skipped_tests=SKIPPED_TESTS,
+        test_dir=TEST_DIR,
+        failing_tests_file=FAILING_TESTS_FILE,
+    )
+
+
+BCHAIN_ENGINE_TEST_DIR: Final = TEST_DIR / 'blockchain_tests_engine'
+BCHAIN_ENGINE_TESTS: Final = tuple(BCHAIN_ENGINE_TEST_DIR.rglob('**/*.json'))
+
+
+@pytest.mark.parametrize(
+    'test_file',
+    BCHAIN_ENGINE_TESTS,
+    ids=[str(test_file.relative_to(BCHAIN_ENGINE_TEST_DIR)) for test_file in BCHAIN_ENGINE_TESTS],
+)
+def test_bchain_engine(test_file: Path, save_failing: bool) -> None:
+    _test(
+        test_file,
+        schedule='CANCUN',
+        mode='NORMAL',
+        usegas=True,
+        save_failing=save_failing,
+        compute_chain_id=chain_id_always_one,
+        skipped_tests=SKIPPED_TESTS,
+        test_dir=TEST_DIR,
+        failing_tests_file=FAILING_TESTS_FILE,
+    )
+
+
+STATE_TEST_DIR: Final = TEST_DIR / 'state_tests'
+STATE_TESTS: Final = tuple(STATE_TEST_DIR.rglob('**/*.json'))
+
+
+@pytest.mark.parametrize(
+    'test_file',
+    STATE_TESTS,
+    ids=[str(test_file.relative_to(STATE_TEST_DIR)) for test_file in STATE_TESTS],
+)
+def test_state(test_file: Path, save_failing: bool) -> None:
+    _test(
+        test_file,
+        schedule='CANCUN',
+        mode='NORMAL',
+        usegas=True,
+        save_failing=save_failing,
+        compute_chain_id=chain_id_always_one,
+        skipped_tests=SKIPPED_TESTS,
+        test_dir=TEST_DIR,
+        failing_tests_file=FAILING_TESTS_FILE,
+    )
+
+
+TRANSACTION_TEST_DIR: Final = TEST_DIR / 'transaction_tests'
+TRANSACTION_TESTS: Final = tuple(TRANSACTION_TEST_DIR.rglob('**/*.json'))
+
+
+@pytest.mark.parametrize(
+    'test_file',
+    TRANSACTION_TESTS,
+    ids=[str(test_file.relative_to(TRANSACTION_TEST_DIR)) for test_file in TRANSACTION_TESTS],
+)
+def test_transaction(test_file: Path, save_failing: bool) -> None:
+    _test(
+        test_file,
+        schedule='CANCUN',
+        mode='NORMAL',
+        usegas=True,
+        save_failing=save_failing,
+        compute_chain_id=chain_id_always_one,
+        skipped_tests=SKIPPED_TESTS,
+        test_dir=TEST_DIR,
+        failing_tests_file=FAILING_TESTS_FILE,
+    )