Skip to content

Commit 3c3b264

Browse files
committed
stabilize remaining e2e checks
1 parent 4cd23c3 commit 3c3b264

8 files changed

Lines changed: 635 additions & 305 deletions

File tree

.github/workflows/ci.yml

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,8 @@ jobs:
9898
LANGFUSE_BASE_URL: "http://localhost:3000"
9999
LANGFUSE_PUBLIC_KEY: "pk-lf-1234567890"
100100
LANGFUSE_SECRET_KEY: "sk-lf-1234567890"
101+
LANGFUSE_E2E_READ_TIMEOUT_SECONDS: "30"
102+
LANGFUSE_E2E_READ_INTERVAL_SECONDS: "0.5"
101103
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
102104
# SERPAPI_API_KEY: ${{ secrets.SERPAPI_API_KEY }}
103105
HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
@@ -106,6 +108,16 @@ jobs:
106108
name: E2E tests on Python 3.13
107109
steps:
108110
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
111+
- name: Install uv and set Python version
112+
uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57 # v8
113+
with:
114+
version: "0.11.2"
115+
python-version: "3.13"
116+
enable-cache: true
117+
- name: Install the project dependencies
118+
run: uv sync --locked
119+
- name: Check uv Python version
120+
run: uv run --frozen python --version
109121
- uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5
110122
with:
111123
version: 10.33.0
@@ -183,23 +195,10 @@ jobs:
183195
done
184196
echo "Langfuse server is up and running!"
185197
186-
- name: Install uv and set Python version
187-
uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57 # v8
188-
with:
189-
version: "0.11.2"
190-
python-version: "3.13"
191-
enable-cache: true
192-
193-
- name: Check Python version
194-
run: python --version
195-
196-
- name: Install the project dependencies
197-
run: uv sync --locked
198-
199198
- name: Run the end-to-end tests
200199
run: |
201-
python --version
202-
uv run --frozen pytest -n auto --dist loadfile -s -v --log-cli-level=INFO tests/e2e
200+
uv run --frozen python --version
201+
uv run --frozen pytest -n 4 --dist loadfile -s -v --log-cli-level=INFO tests/e2e
203202
204203
all-tests-passed:
205204
# This allows us to have a branch protection rule for tests and deploys with matrix

tests/e2e/test_core_sdk.py

Lines changed: 92 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
from tests.support.utils import (
1414
create_uuid,
1515
get_api,
16+
wait_for_result,
17+
wait_for_trace,
1618
)
1719

1820

@@ -228,7 +230,7 @@ def test_create_boolean_score():
228230

229231
# Ensure data is sent
230232
langfuse.flush()
231-
sleep(2)
233+
api_wrapper.get_trace(trace_id)
232234

233235
# Create a boolean score
234236
score_id = create_uuid()
@@ -251,10 +253,14 @@ def test_create_boolean_score():
251253

252254
# Ensure data is sent
253255
langfuse.flush()
254-
sleep(2)
255256

256257
# Retrieve and verify
257-
trace = api_wrapper.get_trace(trace_id)
258+
trace = api_wrapper.get_trace(
259+
trace_id,
260+
is_result_ready=lambda trace: any(
261+
score["name"] == "this-is-a-score" for score in trace.get("scores", [])
262+
),
263+
)
258264

259265
# Find the score we created by name
260266
created_score = next(
@@ -283,7 +289,7 @@ def test_create_categorical_score():
283289

284290
# Ensure data is sent
285291
langfuse.flush()
286-
sleep(2)
292+
api_wrapper.get_trace(trace_id)
287293

288294
# Create a categorical score
289295
score_id = create_uuid()
@@ -305,10 +311,14 @@ def test_create_categorical_score():
305311

306312
# Ensure data is sent
307313
langfuse.flush()
308-
sleep(2)
309314

310315
# Retrieve and verify
311-
trace = api_wrapper.get_trace(trace_id)
316+
trace = api_wrapper.get_trace(
317+
trace_id,
318+
is_result_ready=lambda trace: any(
319+
score["name"] == "this-is-a-score" for score in trace.get("scores", [])
320+
),
321+
)
312322

313323
# Find the score we created by name
314324
created_score = next(
@@ -337,7 +347,7 @@ def test_create_score_with_custom_timestamp():
337347

338348
# Ensure data is sent
339349
langfuse.flush()
340-
sleep(2)
350+
api_wrapper.get_trace(trace_id)
341351

342352
custom_timestamp = datetime.now(timezone.utc) - timedelta(hours=1)
343353
score_id = create_uuid()
@@ -352,10 +362,15 @@ def test_create_score_with_custom_timestamp():
352362

353363
# Ensure data is sent
354364
langfuse.flush()
355-
sleep(2)
356365

357366
# Retrieve and verify
358-
trace = api_wrapper.get_trace(trace_id)
367+
trace = api_wrapper.get_trace(
368+
trace_id,
369+
is_result_ready=lambda trace: any(
370+
score["name"] == "custom-timestamp-score"
371+
for score in trace.get("scores", [])
372+
),
373+
)
359374

360375
# Find the score we created by name
361376
created_score = next(
@@ -398,10 +413,18 @@ def test_create_trace():
398413

399414
# Ensure data is sent to the API
400415
langfuse.flush()
401-
sleep(2)
402416

403417
# Retrieve the trace from the API
404-
trace = LangfuseAPI().get_trace(trace_id)
418+
trace = LangfuseAPI().get_trace(
419+
trace_id,
420+
is_result_ready=lambda trace: (
421+
trace.get("name") == trace_name
422+
and trace.get("userId") == "test"
423+
and trace.get("metadata", {}).get("key") == "value"
424+
and trace.get("tags") == ["tag1", "tag2"]
425+
and trace.get("public") is True
426+
),
427+
)
405428

406429
# Verify all trace properties
407430
assert trace["name"] == trace_name
@@ -437,11 +460,20 @@ def test_create_update_trace():
437460

438461
# Ensure data is sent to the API
439462
langfuse.flush()
440-
sleep(2)
441463

442464
assert isinstance(trace_id, str)
443465
# Retrieve and verify trace
444-
trace = get_api().trace.get(trace_id)
466+
trace = wait_for_trace(
467+
trace_id,
468+
is_result_ready=lambda trace: (
469+
trace.name == trace_name
470+
and trace.user_id == "test"
471+
and trace.metadata is not None
472+
and trace.metadata.get("key") == "value"
473+
and trace.metadata.get("key2") == "value2"
474+
and trace.public is True
475+
),
476+
)
445477

446478
assert trace.name == trace_name
447479
assert trace.user_id == "test"
@@ -1735,16 +1767,20 @@ def test_fetch_traces():
17351767

17361768
# Ensure data is sent
17371769
langfuse.flush()
1738-
sleep(3)
17391770

1740-
# Fetch all traces with the same name
1741-
# Note: Using session_id in the query is causing a server error,
1742-
# but we keep the session_id in the trace data to ensure it's being stored correctly
1743-
all_traces = get_api().trace.list(name=name, limit=10)
1771+
expected_trace_ids = set(trace_ids)
1772+
api = get_api(retry=False)
1773+
1774+
# Fetch all traces with the same name.
1775+
all_traces = wait_for_result(
1776+
lambda: api.trace.list(name=name, limit=10),
1777+
is_result_ready=lambda response: (
1778+
{trace.id for trace in response.data} == expected_trace_ids
1779+
),
1780+
)
17441781

17451782
# Verify we got all traces
17461783
assert len(all_traces.data) == 3
1747-
assert all_traces.meta.total_items == 3
17481784

17491785
# Verify trace properties
17501786
for trace in all_traces.data:
@@ -1753,11 +1789,19 @@ def test_fetch_traces():
17531789
assert trace.input == {"key": "value"}
17541790
assert trace.output == "output-value"
17551791

1756-
# Test pagination by fetching just one trace
1757-
paginated_response = get_api().trace.list(name=name, limit=1, page=2)
1758-
assert len(paginated_response.data) == 1
1759-
assert paginated_response.meta.total_items == 3
1760-
assert paginated_response.meta.total_pages == 3
1792+
# Test pagination by fetching the first three pages one at a time and
1793+
# confirming they collectively cover the created traces.
1794+
paginated_ids = set()
1795+
for page in range(1, 4):
1796+
paginated_response = wait_for_result(
1797+
lambda page=page: api.trace.list(name=name, limit=1, page=page),
1798+
is_result_ready=lambda response: (
1799+
len(response.data) == 1 and response.data[0].id in expected_trace_ids
1800+
),
1801+
)
1802+
paginated_ids.add(paginated_response.data[0].id)
1803+
1804+
assert paginated_ids == expected_trace_ids
17611805

17621806

17631807
def test_get_observation():
@@ -1812,10 +1856,16 @@ def test_get_observations():
18121856

18131857
# Ensure data is sent
18141858
langfuse.flush()
1815-
sleep(2)
1859+
api = get_api(retry=False)
18161860

18171861
# Fetch observations using the API
1818-
observations = get_api().legacy.observations_v1.get_many(name=name, limit=10)
1862+
expected_generation_ids = {gen1_id, gen2_id}
1863+
observations = wait_for_result(
1864+
lambda: api.legacy.observations_v1.get_many(name=name, limit=10),
1865+
is_result_ready=lambda response: expected_generation_ids.issubset(
1866+
{obs.id for obs in response.data}
1867+
),
1868+
)
18191869

18201870
# Verify fetched observations
18211871
assert len(observations.data) == 2
@@ -1829,13 +1879,22 @@ def test_get_observations():
18291879
assert gen1_id in gen_ids
18301880
assert gen2_id in gen_ids
18311881

1832-
# Test pagination
1833-
paginated_response = get_api().legacy.observations_v1.get_many(
1834-
name=name, limit=1, page=2
1835-
)
1836-
assert len(paginated_response.data) == 1
1837-
assert paginated_response.meta.total_items == 2 # Parent span + 2 generations
1838-
assert paginated_response.meta.total_pages == 2
1882+
# Test pagination by confirming both created generations can be reached
1883+
# across separate pages.
1884+
paginated_ids = set()
1885+
for page in range(1, 3):
1886+
paginated_response = wait_for_result(
1887+
lambda page=page: api.legacy.observations_v1.get_many(
1888+
name=name, limit=1, page=page
1889+
),
1890+
is_result_ready=lambda response: (
1891+
len(response.data) == 1
1892+
and response.data[0].id in expected_generation_ids
1893+
),
1894+
)
1895+
paginated_ids.add(paginated_response.data[0].id)
1896+
1897+
assert paginated_ids == expected_generation_ids
18391898

18401899

18411900
def test_get_trace_not_found():

0 commit comments

Comments
 (0)