|
10 | 10 | from typing import Any, Dict |
11 | 11 |
|
12 | 12 | import pytest |
| 13 | +import requests |
13 | 14 |
|
14 | 15 | from eval_protocol.models import EvaluationRow, InputMetadata, Message |
15 | 16 |
|
16 | 17 |
|
| 18 | +def _skip_on_external_dataset_network_error(exc: Exception, provider: str) -> None: |
| 19 | + """Skip flaky real-data E2E tests when the upstream dataset service is unreachable.""" |
| 20 | + transient_markers = ( |
| 21 | + "read timed out", |
| 22 | + "connection reset", |
| 23 | + "connection aborted", |
| 24 | + "temporary failure", |
| 25 | + "failed to establish a new connection", |
| 26 | + "max retries exceeded", |
| 27 | + "name or service not known", |
| 28 | + "service unavailable", |
| 29 | + ) |
| 30 | + |
| 31 | + if isinstance(exc, requests.exceptions.RequestException): |
| 32 | + pytest.skip(f"Skipping due to {provider} network issue: {exc}") |
| 33 | + |
| 34 | + if any(marker in str(exc).lower() for marker in transient_markers): |
| 35 | + pytest.skip(f"Skipping due to {provider} network issue: {exc}") |
| 36 | + |
| 37 | + |
17 | 38 | class TestLangfuseAdapterE2E: |
18 | 39 | """End-to-end tests for Langfuse adapter with real deployment.""" |
19 | 40 |
|
@@ -295,13 +316,17 @@ def math_transform(row: Dict[str, Any]) -> Dict[str, Any]: |
295 | 316 | } |
296 | 317 |
|
297 | 318 | # Create adapter |
298 | | - adapter = create_huggingface_adapter( |
299 | | - dataset_id="SuperSecureHuman/competition_math_hf_dataset", |
300 | | - transform_fn=math_transform, |
301 | | - ) |
| 319 | + try: |
| 320 | + adapter = create_huggingface_adapter( |
| 321 | + dataset_id="SuperSecureHuman/competition_math_hf_dataset", |
| 322 | + transform_fn=math_transform, |
| 323 | + ) |
302 | 324 |
|
303 | | - # Test loading data |
304 | | - rows = list(adapter.get_evaluation_rows(split="test", limit=3)) |
| 325 | + # Test loading data |
| 326 | + rows = list(adapter.get_evaluation_rows(split="test", limit=3)) |
| 327 | + except Exception as exc: |
| 328 | + _skip_on_external_dataset_network_error(exc, provider="Hugging Face") |
| 329 | + raise |
305 | 330 |
|
306 | 331 | # Verify data |
307 | 332 | assert len(rows) > 0, "Should retrieve MATH dataset data" |
|
0 commit comments