Skip to content

Commit 30ffc8f

Browse files
committed
replace marker-based e2e sharding
1 parent 6fd4dde commit 30ffc8f

File tree

5 files changed

+230
-35
lines changed

5 files changed

+230
-35
lines changed

.github/workflows/ci.yml

Lines changed: 60 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -98,18 +98,22 @@ jobs:
9898
fail-fast: false
9999
matrix:
100100
include:
101-
- shard-name: core
102-
test-root: tests/e2e
103-
parallel-marker: "e2e_core and not serial_e2e"
104-
serial-marker: "e2e_core and serial_e2e"
105-
- shard-name: data
106-
test-root: tests/e2e
107-
parallel-marker: "e2e_data and not serial_e2e"
108-
serial-marker: "e2e_data and serial_e2e"
109-
- shard-name: live-provider
110-
test-root: tests/live_provider
111-
parallel-marker: "live_provider"
112-
serial-marker: ""
101+
- suite: e2e
102+
job_name: E2E shard 1 tests on Python 3.13
103+
shard_name: shard-1
104+
shard_index: 0
105+
shard_count: 2
106+
cache_writer: true
107+
- suite: e2e
108+
job_name: E2E shard 2 tests on Python 3.13
109+
shard_name: shard-2
110+
shard_index: 1
111+
shard_count: 2
112+
cache_writer: false
113+
- suite: live_provider
114+
job_name: E2E live-provider tests on Python 3.13
115+
shard_name: live-provider
116+
cache_writer: false
113117
env:
114118
LANGFUSE_BASE_URL: "http://localhost:3000"
115119
LANGFUSE_PUBLIC_KEY: "pk-lf-1234567890"
@@ -130,7 +134,7 @@ jobs:
130134
HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
131135
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
132136

133-
name: E2E ${{ matrix.shard-name }} tests on Python 3.13
137+
name: ${{ matrix.job_name }}
134138
steps:
135139
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
136140
- name: Install uv and set Python version
@@ -199,7 +203,7 @@ jobs:
199203
docker compose up -d
200204
echo "::endgroup::"
201205
- name: Save langfuse server images to cache
202-
if: ${{ steps.docker-image-cache.outputs.cache-hit != 'true' && matrix.shard-name == 'core' }}
206+
if: ${{ steps.docker-image-cache.outputs.cache-hit != 'true' && matrix.cache_writer }}
203207
run: |
204208
mkdir -p ./langfuse-server/docker-image-cache
205209
while read -r image; do
@@ -228,15 +232,53 @@ jobs:
228232
done
229233
echo "Langfuse server is up and running!"
230234
231-
- name: Run the end-to-end tests
235+
- name: Select e2e shard files
236+
if: ${{ matrix.suite == 'e2e' }}
237+
run: |
238+
uv run --frozen python scripts/select_e2e_shard.py \
239+
--shard-index ${{ matrix.shard_index }} \
240+
--shard-count ${{ matrix.shard_count }} \
241+
--json
242+
uv run --frozen python scripts/select_e2e_shard.py \
243+
--shard-index ${{ matrix.shard_index }} \
244+
--shard-count ${{ matrix.shard_count }} \
245+
> "$RUNNER_TEMP/e2e-shard-files.txt"
246+
cat "$RUNNER_TEMP/e2e-shard-files.txt"
247+
248+
- name: Run the parallel end-to-end tests
249+
if: ${{ matrix.suite == 'e2e' }}
232250
run: |
233251
uv run --frozen python --version
234-
uv run --frozen pytest -n 4 --dist worksteal -s -v --log-cli-level=INFO ${{ matrix.test-root }} -m "${{ matrix.parallel-marker }}"
252+
mapfile -t e2e_files < "$RUNNER_TEMP/e2e-shard-files.txt"
253+
set +e
254+
uv run --frozen pytest -n 4 --dist worksteal -s -v --log-cli-level=INFO "${e2e_files[@]}" -m "not serial_e2e"
255+
status=$?
256+
set -e
257+
if [ "$status" -eq 5 ]; then
258+
echo "No parallel e2e tests selected for this shard."
259+
elif [ "$status" -ne 0 ]; then
260+
exit "$status"
261+
fi
235262
236263
- name: Run serial end-to-end tests
237-
if: ${{ matrix.serial-marker != '' }}
264+
if: ${{ matrix.suite == 'e2e' }}
238265
run: |
239-
uv run --frozen pytest -s -v --log-cli-level=INFO ${{ matrix.test-root }} -m "${{ matrix.serial-marker }}"
266+
mapfile -t e2e_files < "$RUNNER_TEMP/e2e-shard-files.txt"
267+
set +e
268+
uv run --frozen pytest -s -v --log-cli-level=INFO "${e2e_files[@]}" -m "serial_e2e"
269+
status=$?
270+
set -e
271+
if [ "$status" -eq 5 ]; then
272+
echo "No serial e2e tests selected for this shard."
273+
elif [ "$status" -ne 0 ]; then
274+
exit "$status"
275+
fi
276+
277+
- name: Run live-provider tests
278+
if: ${{ matrix.suite == 'live_provider' }}
279+
run: |
280+
uv run --frozen python --version
281+
uv run --frozen pytest -n 4 --dist worksteal -s -v --log-cli-level=INFO tests/live_provider -m "live_provider"
240282
241283
all-tests-passed:
242284
# This allows us to have a branch protection rule for tests and deploys with matrix

pyproject.toml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,10 +55,8 @@ log_cli = true
5555
markers = [
5656
"unit: deterministic tests that run without a Langfuse server",
5757
"e2e: tests that require a real Langfuse server or persisted backend behaviour",
58-
"e2e_core: the explicitly curated core e2e shard",
59-
"e2e_data: the catch-all e2e shard for everything not in e2e_core",
6058
"serial_e2e: e2e tests that must not share server concurrency with the rest of the suite",
61-
"live_provider: tests that call live model providers and are kept out of default CI",
59+
"live_provider: tests that call live model providers and run as a dedicated CI suite",
6260
]
6361

6462
[tool.mypy]

scripts/select_e2e_shard.py

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
import argparse
2+
import ast
3+
import json
4+
from pathlib import Path
5+
6+
REPO_ROOT = Path(__file__).resolve().parents[1]
7+
E2E_ROOT = REPO_ROOT / "tests" / "e2e"
8+
9+
# These weights keep the existing balance close to the observed runtime split,
10+
# while new files automatically fall back to their local test count.
11+
HISTORICAL_WEIGHTS = {
12+
"tests/e2e/test_batch_evaluation.py": 41,
13+
"tests/e2e/test_core_sdk.py": 53,
14+
"tests/e2e/test_datasets.py": 7,
15+
"tests/e2e/test_decorators.py": 32,
16+
"tests/e2e/test_experiments.py": 17,
17+
"tests/e2e/test_media.py": 1,
18+
"tests/e2e/test_prompt.py": 27,
19+
}
20+
21+
22+
def relative_test_path(path: Path) -> str:
23+
return path.relative_to(REPO_ROOT).as_posix()
24+
25+
26+
def discover_e2e_files() -> list[Path]:
27+
return sorted(E2E_ROOT.glob("test_*.py"))
28+
29+
30+
def count_test_functions(path: Path) -> int:
31+
module = ast.parse(path.read_text(encoding="utf-8"))
32+
return sum(
33+
1
34+
for node in module.body
35+
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef))
36+
and node.name.startswith("test_")
37+
)
38+
39+
40+
def estimate_weight(path: Path) -> int:
41+
try:
42+
relative_path = relative_test_path(path)
43+
except ValueError:
44+
relative_path = None
45+
if relative_path is not None and relative_path in HISTORICAL_WEIGHTS:
46+
return HISTORICAL_WEIGHTS[relative_path]
47+
48+
return max(count_test_functions(path), 1)
49+
50+
51+
def assign_shards(
52+
paths: list[Path], shard_count: int
53+
) -> tuple[list[list[str]], list[int]]:
54+
shard_loads = [0] * shard_count
55+
shards: list[list[str]] = [[] for _ in range(shard_count)]
56+
57+
weighted_paths = sorted(
58+
((estimate_weight(path), relative_test_path(path)) for path in paths),
59+
key=lambda item: (-item[0], item[1]),
60+
)
61+
62+
for weight, relative_path in weighted_paths:
63+
shard_index = min(
64+
range(shard_count), key=lambda index: (shard_loads[index], index)
65+
)
66+
shards[shard_index].append(relative_path)
67+
shard_loads[shard_index] += weight
68+
69+
return [sorted(shard) for shard in shards], shard_loads
70+
71+
72+
def parse_args() -> argparse.Namespace:
73+
parser = argparse.ArgumentParser(
74+
description="Select the files for one e2e CI shard."
75+
)
76+
parser.add_argument("--shard-index", required=True, type=int)
77+
parser.add_argument("--shard-count", default=2, type=int)
78+
parser.add_argument("--json", action="store_true")
79+
return parser.parse_args()
80+
81+
82+
def main() -> int:
83+
args = parse_args()
84+
85+
if args.shard_count < 1:
86+
raise SystemExit("--shard-count must be at least 1")
87+
88+
if args.shard_index < 0 or args.shard_index >= args.shard_count:
89+
raise SystemExit("--shard-index must be within the configured shard count")
90+
91+
shards, shard_loads = assign_shards(discover_e2e_files(), args.shard_count)
92+
selected_files = shards[args.shard_index]
93+
94+
if args.json:
95+
print(
96+
json.dumps(
97+
{
98+
"shard_count": args.shard_count,
99+
"shard_index": args.shard_index,
100+
"selected_files": selected_files,
101+
"shard_loads": shard_loads,
102+
}
103+
)
104+
)
105+
return 0
106+
107+
for path in selected_files:
108+
print(path)
109+
110+
return 0
111+
112+
113+
if __name__ == "__main__":
114+
raise SystemExit(main())

tests/conftest.py

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,6 @@
1010
from langfuse._client.client import Langfuse
1111
from langfuse._client.resource_manager import LangfuseResourceManager
1212

13-
CORE_E2E_FILENAMES = {
14-
"test_core_sdk.py",
15-
"test_decorators.py",
16-
"test_media.py",
17-
}
18-
1913
SERIAL_E2E_NODEIDS = {
2014
"tests/e2e/test_core_sdk.py::test_create_trace",
2115
"tests/e2e/test_core_sdk.py::test_create_boolean_score",
@@ -55,21 +49,14 @@ def clear(self) -> None:
5549

5650
def pytest_collection_modifyitems(items: list[pytest.Item]) -> None:
5751
for item in items:
58-
file_path = Path(str(item.fspath))
59-
test_group = file_path.parent.name
52+
test_group = Path(str(item.fspath)).parent.name
6053

6154
if test_group == "unit":
6255
item.add_marker(pytest.mark.unit)
6356
continue
6457

6558
if test_group == "e2e":
6659
item.add_marker(pytest.mark.e2e)
67-
# Keep the data shard as the default so new tests under tests/e2e
68-
# are picked up automatically unless we explicitly promote them.
69-
if file_path.name in CORE_E2E_FILENAMES:
70-
item.add_marker(pytest.mark.e2e_core)
71-
else:
72-
item.add_marker(pytest.mark.e2e_data)
7360
if item.nodeid in SERIAL_E2E_NODEIDS:
7461
item.add_marker(pytest.mark.serial_e2e)
7562
continue

tests/unit/test_e2e_sharding.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
import importlib.util
2+
from pathlib import Path
3+
4+
REPO_ROOT = Path(__file__).resolve().parents[2]
5+
SCRIPT_PATH = REPO_ROOT / "scripts" / "select_e2e_shard.py"
6+
7+
8+
def load_shard_script():
9+
spec = importlib.util.spec_from_file_location("select_e2e_shard", SCRIPT_PATH)
10+
if spec is None or spec.loader is None:
11+
raise AssertionError(f"Unable to load shard selector from {SCRIPT_PATH}")
12+
13+
module = importlib.util.module_from_spec(spec)
14+
spec.loader.exec_module(module)
15+
return module
16+
17+
18+
def test_e2e_shards_cover_all_files_once():
19+
shard_script = load_shard_script()
20+
21+
all_files = sorted(
22+
path.relative_to(REPO_ROOT).as_posix()
23+
for path in (REPO_ROOT / "tests" / "e2e").glob("test_*.py")
24+
)
25+
26+
shards, shard_loads = shard_script.assign_shards(
27+
shard_script.discover_e2e_files(), shard_count=2
28+
)
29+
30+
assert len(shards) == 2
31+
assert set(shards[0]).isdisjoint(shards[1])
32+
assert sorted([path for shard in shards for path in shard]) == all_files
33+
assert all(load > 0 for load in shard_loads)
34+
35+
36+
def test_unknown_file_weight_falls_back_to_test_count(tmp_path: Path):
37+
shard_script = load_shard_script()
38+
39+
test_file = tmp_path / "test_future_suite.py"
40+
test_file.write_text(
41+
"\n".join(
42+
[
43+
"def test_one():",
44+
" pass",
45+
"",
46+
"async def test_two():",
47+
" pass",
48+
]
49+
),
50+
encoding="utf-8",
51+
)
52+
53+
assert shard_script.count_test_functions(test_file) == 2
54+
assert shard_script.estimate_weight(test_file) == 2

0 commit comments

Comments
 (0)