diff --git a/Justfile b/Justfile index e7aa4247c5e..0d3372c9464 100644 --- a/Justfile +++ b/Justfile @@ -219,7 +219,7 @@ bench-gas *args: @mkdir -p "{{ output_dir }}/bench-gas/tmp" "{{ output_dir }}/bench-gas/logs" uv run fill \ --evm-bin="{{ evm_bin }}" \ - --gas-benchmark-values 1 \ + --gas-benchmark-values 0.5 \ --generate-pre-alloc-groups \ --fork Osaka \ -m "not slow" \ diff --git a/docs/running_tests/releases.md b/docs/running_tests/releases.md index 057f2302bc3..0f66fae8eff 100644 --- a/docs/running_tests/releases.md +++ b/docs/running_tests/releases.md @@ -52,15 +52,16 @@ follow the same layout. When filling with `--gas-benchmark-values`, benchmark tests additionally include the gas limit in the subdirectory name (`for_{fork}_at_{gas}M`, -where `{gas}` is in millions, zero-padded to four digits), with one +where `{gas}` is in millions and the integer part is zero-padded to four +digits), with one subdirectory per gas value: ```text fixtures/ └── blockchain_tests/ - ├── for_osaka_at_0001M/ # 1M gas benchmark + ├── for_osaka_at_0000.5M/ # 0.5M gas benchmark │ └── benchmark/compute/... - └── for_osaka_at_0002M/ # 2M gas benchmark + └── for_osaka_at_0001.0M/ # 1M gas benchmark └── benchmark/compute/... ``` diff --git a/docs/templates/base.md.j2 b/docs/templates/base.md.j2 index 0316ca26f1c..46e259c4650 100644 --- a/docs/templates/base.md.j2 +++ b/docs/templates/base.md.j2 @@ -10,7 +10,7 @@ Documentation for [`{{ pytest_node_id }}@{{ short_git_ref }}`]({{ source_code_ur ```console {% if is_benchmark %} - fill -v {{ pytest_node_id }} --gas-benchmark-values 1 + fill -v {{ pytest_node_id }} --gas-benchmark-values 0.5 {% else %} fill -v {{ pytest_node_id }} --fork {{ target_or_valid_fork }} {% endif %} @@ -19,4 +19,4 @@ Documentation for [`{{ pytest_node_id }}@{{ short_git_ref }}`]({{ source_code_ur {% block additional_content %} {# templates that inherit from base can add an additional_content block here #} -{% endblock %} \ No newline at end of file +{% endblock %} diff --git a/docs/writing_tests/benchmarks.md b/docs/writing_tests/benchmarks.md index 4d81f3ba87e..6019fc57501 100644 --- a/docs/writing_tests/benchmarks.md +++ b/docs/writing_tests/benchmarks.md @@ -62,7 +62,7 @@ This mode is primarily used for gas repricing analysis, where it enables: ### Worst-Case Mode In worst-case mode, users specify a target block gas limit instead of an opcode count. -By providing `--gas-benchmark-values N` (where N denotes the gas limit in millions), the benchmark construction process packs each block with as many instances as possible of the selected operation. +By providing `--gas-benchmark-values N` (where N denotes the gas limit in millions), the benchmark construction process packs each block with as many instances as possible of the selected operation. Fractional Mgas values such as `0.5` are also supported. This mode is designed for gas limit testing, and gas repricing, where it enables: @@ -71,6 +71,7 @@ This mode is designed for gas limit testing, and gas repricing, where it enables **Note:** For both benchmark modes, users may supply multiple values in a single invocation. For example: +- `--gas-benchmark-values 0.5,1,2` runs the test with 0.5M, 1M, and 2M block gas limits - `--gas-benchmark-values 1,2,3` runs the test with 1M, 2M, and 3M block gas limits - `--fixed-opcode-count 4,5` runs the test with approximately 4K and 5K opcode executions @@ -79,18 +80,18 @@ This mode is designed for gas limit testing, and gas repricing, where it enables ```text / blockchain_tests/ - for_osaka_at_0001M/... - for_osaka_at_0002M/... + for_osaka_at_0000.5M/... + for_osaka_at_0001.0M/... blockchain_tests_engine/ - for_osaka_at_0001M/... - for_osaka_at_0002M/... + for_osaka_at_0000.5M/... + for_osaka_at_0001.0M/... blockchain_tests_engine_x/ pre_alloc/... - for_osaka_at_0001M/... - for_osaka_at_0002M/... + for_osaka_at_0000.5M/... + for_osaka_at_0001.0M/... ``` -The subdirectory name follows the pattern `for_{fork}_at_{gas}M` (see [Fixture Output Directory Structure](../running_tests/releases.md#fixture-output-directory-structure) for details). Non-benchmark (consensus) fixtures use `for_{fork}` without the gas limit suffix. +The subdirectory name follows the pattern `for_{fork}_at_{gas}M` (see [Fixture Output Directory Structure](../running_tests/releases.md#fixture-output-directory-structure) for details). Integer-only inputs keep the existing `0001M` style; fractional inputs add a decimal suffix while keeping the integer part zero-padded for sorting. Non-benchmark (consensus) fixtures use `for_{fork}` without the gas limit suffix. **Output layout with fixed opcode counts:** When `--fixed-opcode-count` is provided, the subdirectory name uses the opcode count instead of the gas limit (`for_{fork}_at_opcount_{N}K`): @@ -123,7 +124,7 @@ def test_benchmark( ... ``` -For example, running the test with `--gas-benchmark-values 1,10,45,60`. will execute the test 4 times, passing `gas_benchmark_value` as 1M, 10M, 45M, and 60M respectively. +For example, running the test with `--gas-benchmark-values 0.5,1,10,45`. will execute the test 4 times, passing `gas_benchmark_value` as 500k, 1M, 10M, and 45M gas respectively. Never configure the transaction / block gas limit to `env.gas_limit`. When running in benchmark mode, the test framework sets this value to a very large number (e.g., `1_000_000_000_000`), this setup allows the framework to reuse a single genesis file for all specified gas limits. I.e., the example below should be avoided: diff --git a/packages/testing/src/execution_testing/cli/pytest_commands/plugins/filler/tests/test_benchmarking.py b/packages/testing/src/execution_testing/cli/pytest_commands/plugins/filler/tests/test_benchmarking.py index f611ccdb4d1..c2e7c354ac7 100644 --- a/packages/testing/src/execution_testing/cli/pytest_commands/plugins/filler/tests/test_benchmarking.py +++ b/packages/testing/src/execution_testing/cli/pytest_commands/plugins/filler/tests/test_benchmarking.py @@ -10,11 +10,14 @@ import pytest from execution_testing.cli.pytest_commands.plugins.shared.benchmarking import ( + GasBenchmarkValues, OpcodeCountsConfig, + format_gas_limit_label, ) from execution_testing.cli.pytest_commands.plugins.shared.fixture_output import ( # noqa: E501 FORK_SUBDIR_PREFIX, SUBFOLDER_LEVEL_SEPARATOR, + format_gas_limit_prefix, format_fork_subdir, ) @@ -186,7 +189,7 @@ def test_benchmarking_mode_configured_with_option( "--fork", "Prague", "--gas-benchmark-values", - "10,20,30", + "0.5,1,2", "tests/benchmark/dummy_test_module/", "--collect-only", "-q", @@ -195,9 +198,74 @@ def test_benchmarking_mode_configured_with_option( assert result.ret == 0 assert any("6 tests collected" in line for line in result.outlines) # Check that the test names include the benchmark gas values - assert any("benchmark-gas-value_10M" in line for line in result.outlines) - assert any("benchmark-gas-value_20M" in line for line in result.outlines) - assert any("benchmark-gas-value_30M" in line for line in result.outlines) + assert any("benchmark-gas-value_0.5M" in line for line in result.outlines) + assert any("benchmark-gas-value_1M" in line for line in result.outlines) + assert any("benchmark-gas-value_2M" in line for line in result.outlines) + + +@pytest.mark.parametrize( + "cli_input,expected_gas_values,expected_ids", + [ + pytest.param( + "0.5", + [500_000], + ["benchmark-gas-value_0.5M"], + id="single_fractional", + ), + pytest.param( + "0.5,1,2", + [500_000, 1_000_000, 2_000_000], + [ + "benchmark-gas-value_0.5M", + "benchmark-gas-value_1M", + "benchmark-gas-value_2M", + ], + id="mixed_fractional_and_integer", + ), + pytest.param( + "1.25", + [1_250_000], + ["benchmark-gas-value_1.25M"], + id="fractional_precision", + ), + ], +) +def test_gas_benchmark_values_valid_input( + cli_input: str, + expected_gas_values: list[int], + expected_ids: list[str], +) -> None: + """Valid decimal Mgas inputs are converted into integer gas budgets.""" + mock_config = MagicMock() + + result = GasBenchmarkValues.from_parameter_value(mock_config, cli_input) + assert result is not None + assert result.root == expected_gas_values + + params = result.get_test_parameters("test_benchmark") + assert [param.values[0] for param in params] == expected_gas_values + assert [param.id for param in params] == expected_ids + + +@pytest.mark.parametrize("cli_input", ["", "0", "-1", "abc", "0.0000001"]) +def test_gas_benchmark_values_invalid_input(cli_input: str) -> None: + """Invalid decimal Mgas inputs raise a usage error.""" + mock_config = MagicMock() + + with pytest.raises(pytest.UsageError): + GasBenchmarkValues.from_parameter_value(mock_config, cli_input) + + +def test_gas_benchmark_value_formatting() -> None: + """Format decimal gas benchmark labels and subdirectories consistently.""" + gas_values = [500_000, 1_000_000, 1_250_000] + + assert format_gas_limit_label(500_000) == "0.5" + assert format_gas_limit_label(1_000_000) == "1" + assert format_gas_limit_label(1_250_000) == "1.25" + assert format_gas_limit_prefix(500_000, gas_values) == "0000.50M" + assert format_gas_limit_prefix(1_000_000, gas_values) == "0001.00M" + assert format_gas_limit_prefix(1_250_000, gas_values) == "0001.25M" def test_benchmark_gas_values_split_into_subdirs( @@ -301,6 +369,62 @@ def _assert_keys( ) +def test_fractional_benchmark_gas_values_split_into_subdirs( + pytester: pytest.Pytester, tmp_path: Path +) -> None: + """Ensure fractional Mgas outputs use distinct sortable directories.""" + setup_test_directory_structure( + pytester, test_module_dummy, "test_dummy_benchmark.py" + ) + + output_dir = tmp_path / "fixtures" + result = pytester.runpytest( + "-c", + "pytest-fill.ini", + "--fork", + "Prague", + "--gas-benchmark-values", + "0.5,1", + "-m", + "blockchain_test and not derived_test", + "--no-html", + "--skip-index", + f"--output={output_dir}", + "tests/benchmark/dummy_test_module/", + "-q", + ) + + assert result.ret == 0, f"Fill command failed:\n{result.outlines}" + + gas_half_subdir = format_fork_subdir("Prague", "0000.5M") + gas_one_subdir = format_fork_subdir("Prague", "0001.0M") + gas_half_dir = output_dir / "blockchain_tests" / gas_half_subdir + gas_one_dir = output_dir / "blockchain_tests" / gas_one_subdir + assert gas_half_dir.exists() + assert gas_one_dir.exists() + + gas_half_files = list(gas_half_dir.rglob("*.json")) + gas_one_files = list(gas_one_dir.rglob("*.json")) + assert gas_half_files, f"Expected fixtures under {gas_half_subdir}" + assert gas_one_files, f"Expected fixtures under {gas_one_subdir}" + + for file_path in gas_half_files: + data = json.loads(file_path.read_text()) + for key in data: + assert "benchmark-gas-value_0.5M" in key, ( + f"Expected benchmark-gas-value_0.5M in key {key} " + f"({file_path})" + ) + + for file_path in gas_one_files: + data = json.loads(file_path.read_text()) + for key in data: + assert "benchmark-gas-value_1M" in key, ( + f"Expected benchmark-gas-value_1M in key {key} " + f"({file_path})" + ) + + def test_fixed_opcode_count_split_into_subdirs( pytester: pytest.Pytester, tmp_path: Path ) -> None: @@ -382,13 +506,13 @@ def test_benchmarking_mode_not_configured_without_option( # Should generate normal test variants (2) without parametrization assert any("2 tests collected" in line for line in result.outlines) assert not any( - "benchmark-gas-value_10M" in line for line in result.outlines + "benchmark-gas-value_0.5M" in line for line in result.outlines ) assert not any( - "benchmark-gas-value_20M" in line for line in result.outlines + "benchmark-gas-value_1M" in line for line in result.outlines ) assert not any( - "benchmark-gas-value_30M" in line for line in result.outlines + "benchmark-gas-value_2M" in line for line in result.outlines ) diff --git a/packages/testing/src/execution_testing/cli/pytest_commands/plugins/filler/tests/test_filler.py b/packages/testing/src/execution_testing/cli/pytest_commands/plugins/filler/tests/test_filler.py index 73e97d70f62..c568a53e2ad 100644 --- a/packages/testing/src/execution_testing/cli/pytest_commands/plugins/filler/tests/test_filler.py +++ b/packages/testing/src/execution_testing/cli/pytest_commands/plugins/filler/tests/test_filler.py @@ -1118,7 +1118,7 @@ def test_benchmark_default_uses_gas_benchmark_mode( [ pytest.param( "--gas-benchmark-values", - "1", + "0.5", id="gas-benchmark-values", ), pytest.param( @@ -1253,7 +1253,7 @@ def test_execute_benchmark_default_collects_without_flags( [ pytest.param( "--gas-benchmark-values", - "1", + "0.5", id="gas-benchmark-values", ), pytest.param( diff --git a/packages/testing/src/execution_testing/cli/pytest_commands/plugins/shared/benchmarking.py b/packages/testing/src/execution_testing/cli/pytest_commands/plugins/shared/benchmarking.py index 2a47ed80365..d2bcccdbad4 100644 --- a/packages/testing/src/execution_testing/cli/pytest_commands/plugins/shared/benchmarking.py +++ b/packages/testing/src/execution_testing/cli/pytest_commands/plugins/shared/benchmarking.py @@ -2,6 +2,7 @@ import re from abc import ABC, abstractmethod +from decimal import Decimal, InvalidOperation from pathlib import Path from typing import ClassVar, Dict, List, Self @@ -19,6 +20,8 @@ format_gas_limit_prefix, ) +GAS_PER_MEGAGAS = Decimal("1000000") + def pytest_addoption(parser: pytest.Parser) -> None: """Add command line options for benchmark tests.""" @@ -33,7 +36,7 @@ def pytest_addoption(parser: pytest.Parser) -> None: default=None, help=( "Gas limits (in millions) for benchmark tests. " - "Example: '100,500' runs tests with 100M and 500M gas. " + "Example: '0.5,1,2' runs tests with 0.5M, 1M, and 2M gas. " "Benchmark outputs are grouped under " f"{FORK_SUBDIR_PREFIX}{{fork}}" f"{SUBFOLDER_LEVEL_SEPARATOR}XXXXM/ subdirectories. " @@ -185,14 +188,18 @@ def from_parameter_value( cls, _config: pytest.Config, value: str ) -> Self | None: """Given the parameter value and config, return the expected object.""" - return cls.model_validate(value.split(",")) + gas_values = [] + for raw_value in value.split(","): + gas_values.append(_parse_gas_limit_millions(raw_value)) + + return cls.model_validate(gas_values) def get_test_parameters(self, _test_name: str) -> list[ParameterSet]: """Get benchmark values. All tests have the same list.""" return [ pytest.param( - gas_value * 1_000_000, - id=f"benchmark-gas-value_{gas_value}M", + gas_value, + id=f"benchmark-gas-value_{format_gas_limit_label(gas_value)}M", marks=[ pytest.mark.fixture_subfolder( level=1, @@ -204,6 +211,47 @@ def get_test_parameters(self, _test_name: str) -> list[ParameterSet]: ] +def _parse_gas_limit_millions(value: str) -> int: + """Parse a gas benchmark value expressed in millions into gas units.""" + stripped = value.strip() + if stripped == "": + raise pytest.UsageError( + "Invalid value for --gas-benchmark-values: empty entry found. " + "Expected comma-separated positive numbers such as '0.5,1,2'." + ) + + try: + gas_limit_millions = Decimal(stripped) + except InvalidOperation as exc: + raise pytest.UsageError( + f"Invalid value for --gas-benchmark-values: '{value}'. " + "Expected comma-separated positive numbers such as '0.5,1,2'." + ) from exc + + if not gas_limit_millions.is_finite() or gas_limit_millions <= 0: + raise pytest.UsageError( + f"Invalid value for --gas-benchmark-values: '{value}'. " + "Each gas benchmark value must be a positive number." + ) + + gas_limit = gas_limit_millions * GAS_PER_MEGAGAS + if gas_limit != gas_limit.to_integral_value(): + raise pytest.UsageError( + f"Invalid value for --gas-benchmark-values: '{value}'. " + "Each gas benchmark value must resolve to a whole number of gas " + "units after multiplying by 1_000_000." + ) + + return int(gas_limit) + + +def format_gas_limit_label(gas_limit: int) -> str: + """Format a gas benchmark value as a human-readable Mgas label.""" + return format(Decimal(gas_limit) / GAS_PER_MEGAGAS, "f").rstrip( + "0" + ).rstrip(".") + + class OpcodeCountsConfig(BaseModel, BenchmarkParametrizer): """Opcode counts configuration object.""" diff --git a/packages/testing/src/execution_testing/cli/pytest_commands/plugins/shared/fixture_output.py b/packages/testing/src/execution_testing/cli/pytest_commands/plugins/shared/fixture_output.py index 166744f15fe..743cc02f3b3 100644 --- a/packages/testing/src/execution_testing/cli/pytest_commands/plugins/shared/fixture_output.py +++ b/packages/testing/src/execution_testing/cli/pytest_commands/plugins/shared/fixture_output.py @@ -4,6 +4,7 @@ import subprocess import tarfile import warnings +from decimal import Decimal from pathlib import Path import pytest @@ -312,15 +313,42 @@ def from_config(cls, config: pytest.Config) -> "FixtureOutput": FORK_SUBDIR_PREFIX = "for_" SUBFOLDER_LEVEL_SEPARATOR = "_at_" +GAS_PER_MEGAGAS = Decimal("1000000") def format_gas_limit_prefix( - gas_value_millions: int, all_values_millions: list[int] + gas_value: int, all_values: list[int] ) -> str: """Return a stable, sortable gas-limit prefix for a fixture subfolder.""" - max_value = max(all_values_millions) if all_values_millions else 0 - width = max(4, len(str(max_value))) - return f"{gas_value_millions:0{width}d}M" + width = max( + 4, + max( + len(_split_gas_limit_millions(value)[0]) + for value in (all_values or [gas_value]) + ), + ) + precision = max( + _split_gas_limit_millions(value)[1] + for value in (all_values or [gas_value]) + ) + mgas = Decimal(gas_value) / GAS_PER_MEGAGAS + if precision == 0: + return f"{int(mgas):0{width}d}M" + + formatted = format(mgas, f".{precision}f") + integer_part, fractional_part = formatted.split(".") + return f"{int(integer_part):0{width}d}.{fractional_part}M" + + +def _split_gas_limit_millions(gas_value: int) -> tuple[str, int]: + """Return the integer part width and fractional precision in Mgas.""" + formatted = format(Decimal(gas_value) / GAS_PER_MEGAGAS, "f") + trimmed = formatted.rstrip("0").rstrip(".") + if "." not in trimmed: + return trimmed, 0 + + integer_part, fractional_part = trimmed.split(".") + return integer_part, len(fractional_part) def format_fork_subdir(