Skip to content

Commit 6bf86a8

Browse files
ulixius9claude
andcommitted
fix(ssrs): streamline test connection, stream reports, and retry transient failures
- Test connection's GetDashboards step no longer paginates every report; it now issues a single $top=1 probe to /Reports, matching the existing CheckAccess probe against /Folders. - SsrsClient.get_reports is now a generator that yields each page as it is fetched, so ingestion memory stays flat regardless of report count. - Session retries transient failures (connect errors, read timeouts, 5xx) up to 2 times with exponential backoff to survive flaky SSRS endpoints. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 285eb8a commit 6bf86a8

6 files changed

Lines changed: 134 additions & 20 deletions

File tree

ingestion/src/metadata/ingestion/source/dashboard/ssrs/client.py

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,12 @@
1212
SSRS REST client
1313
"""
1414
import traceback
15-
from typing import List, Optional, Union
15+
from typing import Iterator, List, Optional, Union
1616

1717
import requests
18+
from requests.adapters import HTTPAdapter
1819
from requests_ntlm import HttpNtlmAuth
20+
from urllib3.util.retry import Retry
1921

2022
from metadata.generated.schema.entity.services.connections.dashboard.ssrsConnection import (
2123
SsrsConnection,
@@ -35,6 +37,9 @@
3537
API_VERSION = "api/v2.0"
3638
DEFAULT_TIMEOUT = 30
3739
PAGE_SIZE = 100
40+
MAX_RETRIES = 2
41+
BACKOFF_FACTOR = 1
42+
RETRY_STATUS_CODES = (500, 502, 503, 504)
3843

3944

4045
class SsrsClient:
@@ -53,6 +58,19 @@ def __init__(
5358
self.session.headers.update({"Accept": "application/json"})
5459
if verify_ssl is not None:
5560
self.session.verify = verify_ssl
61+
retry = Retry(
62+
total=MAX_RETRIES,
63+
connect=MAX_RETRIES,
64+
read=MAX_RETRIES,
65+
status=MAX_RETRIES,
66+
backoff_factor=BACKOFF_FACTOR,
67+
status_forcelist=RETRY_STATUS_CODES,
68+
allowed_methods=frozenset(["GET"]),
69+
raise_on_status=False,
70+
)
71+
adapter = HTTPAdapter(max_retries=retry)
72+
self.session.mount("http://", adapter)
73+
self.session.mount("https://", adapter)
5674

5775
def close(self) -> None:
5876
if self.session:
@@ -72,6 +90,14 @@ def test_access(self) -> None:
7290
f"Failed to connect to SSRS: {exc}"
7391
) from exc
7492

93+
def test_get_reports(self) -> None:
94+
try:
95+
self._get("/Reports", params={"$top": "1"})
96+
except Exception as exc:
97+
raise SourceConnectionException(
98+
f"Failed to fetch SSRS reports: {exc}"
99+
) from exc
100+
75101
def get_folders(self) -> List[SsrsFolder]:
76102
try:
77103
results: List[SsrsFolder] = []
@@ -91,21 +117,18 @@ def get_folders(self) -> List[SsrsFolder]:
91117
logger.warning("Failed to fetch SSRS folders: %s", exc)
92118
return []
93119

94-
def get_reports(self) -> List[SsrsReport]:
120+
def get_reports(self) -> Iterator[SsrsReport]:
95121
try:
96-
results: List[SsrsReport] = []
97122
skip = 0
98123
while True:
99124
data = self._get(
100125
"/Reports", params={"$top": str(PAGE_SIZE), "$skip": str(skip)}
101126
)
102127
response = SsrsReportListResponse(**data)
103-
results.extend(response.value)
128+
yield from response.value
104129
if len(response.value) < PAGE_SIZE:
105130
break
106131
skip += PAGE_SIZE
107-
return results
108132
except Exception as exc:
109133
logger.debug(traceback.format_exc())
110134
logger.warning("Failed to fetch SSRS reports: %s", exc)
111-
return []

ingestion/src/metadata/ingestion/source/dashboard/ssrs/connection.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def test_connection(
4646
) -> TestConnectionResult:
4747
test_fn = {
4848
"CheckAccess": client.test_access,
49-
"GetDashboards": client.get_reports,
49+
"GetDashboards": client.test_get_reports,
5050
}
5151

5252
return test_connection_steps(

ingestion/src/metadata/ingestion/source/dashboard/ssrs/metadata.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
SSRS source module
1313
"""
1414
import traceback
15-
from typing import Any, Dict, Iterable, List, Optional
15+
from typing import Any, Dict, Iterable, Optional
1616

1717
from metadata.generated.schema.api.data.createChart import CreateChartRequest
1818
from metadata.generated.schema.api.data.createDashboard import CreateDashboardRequest
@@ -81,9 +81,10 @@ def prepare(self):
8181
self.folder_path_map = {folder.path: folder.name for folder in folders}
8282
return super().prepare()
8383

84-
def get_dashboards_list(self) -> Optional[List[SsrsReport]]:
85-
reports = self.client.get_reports()
86-
return [r for r in reports if not r.hidden]
84+
def get_dashboards_list(self) -> Iterable[SsrsReport]:
85+
for report in self.client.get_reports():
86+
if not report.hidden:
87+
yield report
8788

8889
def get_dashboard_name(self, dashboard: SsrsReport) -> str:
8990
return dashboard.name

ingestion/tests/integration/connections/test_ssrs_connection.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,29 @@ def log_message(self, format, *args):
3838
pass
3939

4040

41+
class _FlakyHandler(BaseHTTPRequestHandler):
42+
failures_remaining = 2
43+
request_count = 0
44+
45+
def do_GET(self):
46+
type(self).request_count += 1
47+
if type(self).failures_remaining > 0:
48+
type(self).failures_remaining -= 1
49+
self.send_response(503)
50+
self.send_header("Content-Length", "0")
51+
self.end_headers()
52+
return
53+
body = json.dumps({"value": []}).encode()
54+
self.send_response(200)
55+
self.send_header("Content-Type", "application/json")
56+
self.send_header("Content-Length", str(len(body)))
57+
self.end_headers()
58+
self.wfile.write(body)
59+
60+
def log_message(self, format, *args):
61+
pass
62+
63+
4164
@pytest.fixture(scope="module")
4265
def ssrs_mock_url():
4366
server = HTTPServer(("127.0.0.1", 0), _MockHandler)
@@ -48,6 +71,18 @@ def ssrs_mock_url():
4871
server.shutdown()
4972

5073

74+
@pytest.fixture()
75+
def ssrs_flaky_url():
76+
_FlakyHandler.failures_remaining = 2
77+
_FlakyHandler.request_count = 0
78+
server = HTTPServer(("127.0.0.1", 0), _FlakyHandler)
79+
port = server.server_address[1]
80+
thread = threading.Thread(target=server.serve_forever, daemon=True)
81+
thread.start()
82+
yield f"http://127.0.0.1:{port}/reports"
83+
server.shutdown()
84+
85+
5186
@pytest.mark.integration
5287
class TestSsrsConnection:
5388
def test_get_connection(self, ssrs_mock_url):
@@ -64,10 +99,34 @@ def test_get_connection_test_access(self, ssrs_mock_url):
6499
client = get_connection(connection)
65100
client.test_access()
66101

102+
def test_get_connection_test_get_reports(self, ssrs_mock_url):
103+
connection = SsrsConnection(
104+
hostPort=ssrs_mock_url, username="test_user", password="test_pass"
105+
)
106+
client = get_connection(connection)
107+
client.test_get_reports()
108+
67109
def test_connection_bad_host(self):
68110
connection = SsrsConnection(
69111
hostPort="http://localhost:1", username="test_user", password="test_pass"
70112
)
71113
client = get_connection(connection)
72114
with pytest.raises(SourceConnectionException):
73115
client.test_access()
116+
117+
def test_connection_bad_host_get_reports(self):
118+
connection = SsrsConnection(
119+
hostPort="http://localhost:1", username="test_user", password="test_pass"
120+
)
121+
client = get_connection(connection)
122+
with pytest.raises(SourceConnectionException):
123+
client.test_get_reports()
124+
125+
def test_get_reports_retries_transient_failures(self, ssrs_flaky_url):
126+
connection = SsrsConnection(
127+
hostPort=ssrs_flaky_url, username="test_user", password="test_pass"
128+
)
129+
client = get_connection(connection)
130+
reports = list(client.get_reports())
131+
assert reports == []
132+
assert _FlakyHandler.request_count == 3

ingestion/tests/integration/ssrs/test_metadata.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ def test_client_get_reports(self, ssrs_service):
2626
hostPort=ssrs_service, username="test_user", password="test_pass"
2727
)
2828
client = SsrsClient(connection)
29-
reports = client.get_reports()
29+
reports = list(client.get_reports())
3030
assert len(reports) == 4
3131
assert reports[0].name == "Report 1"
3232
assert reports[0].path == "/TestFolder/Report 1"
@@ -52,7 +52,7 @@ def test_hidden_reports_present_in_raw(self, ssrs_service):
5252
hostPort=ssrs_service, username="test_user", password="test_pass"
5353
)
5454
client = SsrsClient(connection)
55-
reports = client.get_reports()
55+
reports = list(client.get_reports())
5656
assert any(r.hidden for r in reports)
5757
visible = [r for r in reports if not r.hidden]
5858
assert len(visible) == 3

ingestion/tests/unit/topology/dashboard/test_ssrs.py

Lines changed: 38 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -173,14 +173,14 @@ def test_dashboard_details(self, ssrs_source):
173173
assert ssrs_source.get_dashboard_details(report) == report
174174

175175
def test_dashboards_list(self, ssrs_source):
176-
ssrs_source.client.get_reports = lambda: MOCK_REPORTS
177-
result = ssrs_source.get_dashboards_list()
176+
ssrs_source.client.get_reports = lambda: iter(MOCK_REPORTS)
177+
result = list(ssrs_source.get_dashboards_list())
178178
assert result == MOCK_REPORTS
179179
assert len(result) == 3
180180

181181
def test_dashboards_list_filters_hidden(self, ssrs_source):
182-
ssrs_source.client.get_reports = lambda: MOCK_REPORTS_WITH_HIDDEN
183-
result = ssrs_source.get_dashboards_list()
182+
ssrs_source.client.get_reports = lambda: iter(MOCK_REPORTS_WITH_HIDDEN)
183+
result = list(ssrs_source.get_dashboards_list())
184184
assert len(result) == 3
185185
assert all(not r.hidden for r in result)
186186

@@ -313,7 +313,7 @@ def test_get_reports_single_page(self):
313313
]
314314
}
315315
)
316-
reports = client.get_reports()
316+
reports = list(client.get_reports())
317317
assert len(reports) == 3
318318
client._get.assert_called_once()
319319

@@ -343,14 +343,45 @@ def test_get_reports_multi_page(self):
343343
}
344344
client._get = MagicMock(side_effect=[page1, page2])
345345

346-
reports = client.get_reports()
346+
reports = list(client.get_reports())
347347
assert len(reports) == 150
348348
assert client._get.call_count == 2
349349
_, kwargs1 = client._get.call_args_list[0]
350350
_, kwargs2 = client._get.call_args_list[1]
351351
assert kwargs1["params"]["$skip"] == "0"
352352
assert kwargs2["params"]["$skip"] == "100"
353353

354+
def test_get_reports_streams_lazily(self):
355+
client = MagicMock(spec=SsrsClient)
356+
client.get_reports = SsrsClient.get_reports.__get__(client)
357+
358+
page1 = {
359+
"value": [
360+
{
361+
"Id": f"r-{i}",
362+
"Name": f"Report {i}",
363+
"Path": f"/Reports/Report {i}",
364+
}
365+
for i in range(100)
366+
]
367+
}
368+
page2 = {
369+
"value": [
370+
{
371+
"Id": f"r-{i}",
372+
"Name": f"Report {i}",
373+
"Path": f"/Reports/Report {i}",
374+
}
375+
for i in range(100, 150)
376+
]
377+
}
378+
client._get = MagicMock(side_effect=[page1, page2])
379+
380+
reports_iter = client.get_reports()
381+
first = next(reports_iter)
382+
assert first.id == "r-0"
383+
assert client._get.call_count == 1
384+
354385
def test_get_folders_multi_page(self):
355386
client = MagicMock(spec=SsrsClient)
356387
client.get_folders = SsrsClient.get_folders.__get__(client)
@@ -377,6 +408,6 @@ def test_get_reports_empty(self):
377408
client = MagicMock(spec=SsrsClient)
378409
client.get_reports = SsrsClient.get_reports.__get__(client)
379410
client._get = MagicMock(return_value={"value": []})
380-
reports = client.get_reports()
411+
reports = list(client.get_reports())
381412
assert len(reports) == 0
382413
client._get.assert_called_once()

0 commit comments

Comments
 (0)