Skip to content

Commit 81305d0

Browse files
authored
Don't collect Prometheus metrics on container-based backends (#2605)
Fixes: #2565
1 parent 61eb095 commit 81305d0

File tree

3 files changed

+29
-4
lines changed

3 files changed

+29
-4
lines changed

pytest.ini

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,4 @@ addopts =
77
--allow-unix-socket
88
markers =
99
shim_version
10+
dockerized

src/dstack/_internal/server/background/tasks/process_prometheus_metrics.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,11 +99,14 @@ async def _collect_jobs_metrics(job_models: list[JobModel], collected_at: dateti
9999

100100

101101
async def _collect_job_metrics(job_model: JobModel) -> Optional[str]:
102-
ssh_private_keys = get_instance_ssh_private_keys(get_or_error(job_model.instance))
103102
jpd = get_job_provisioning_data(job_model)
104-
jrd = get_job_runtime_data(job_model)
105103
if jpd is None:
106104
return None
105+
if not jpd.dockerized:
106+
# Container-based backend, no shim
107+
return None
108+
ssh_private_keys = get_instance_ssh_private_keys(get_or_error(job_model.instance))
109+
jrd = get_job_runtime_data(job_model)
107110
try:
108111
res = await run_async(
109112
_pull_job_metrics,

src/tests/_internal/server/background/tasks/test_process_prometheus_metrics.py

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,13 @@
3434
@pytest.mark.usefixtures("test_db", "image_config_mock")
3535
class TestCollectPrometheusMetrics:
3636
@pytest_asyncio.fixture
37-
async def job(self, session: AsyncSession) -> JobModel:
37+
async def job(self, request: pytest.FixtureRequest, session: AsyncSession) -> JobModel:
38+
dockerized: bool
39+
marker = request.node.get_closest_marker("dockerized")
40+
if marker is None:
41+
dockerized = True
42+
else:
43+
dockerized = marker.args[0]
3844
user = await create_user(session=session, global_role=GlobalRole.USER)
3945
project = await create_project(session=session, owner=user)
4046
await add_project_member(
@@ -59,7 +65,7 @@ async def job(self, session: AsyncSession) -> JobModel:
5965
session=session,
6066
run=run,
6167
status=JobStatus.RUNNING,
62-
job_provisioning_data=get_job_provisioning_data(),
68+
job_provisioning_data=get_job_provisioning_data(dockerized=dockerized),
6369
instance_assigned=True,
6470
instance=instance,
6571
)
@@ -142,6 +148,21 @@ async def test_skips_recently_updated(
142148
assert metrics.text == "# prom old response"
143149
assert metrics.collected_at == datetime(2023, 1, 2, 3, 5, 15)
144150

151+
@freeze_time(datetime(2023, 1, 2, 3, 5, 20, tzinfo=timezone.utc))
152+
@pytest.mark.dockerized(False)
153+
async def test_skips_non_dockerized_jobs(
154+
self, session: AsyncSession, job: JobModel, ssh_tunnel_mock: Mock, shim_client_mock: Mock
155+
):
156+
await collect_prometheus_metrics()
157+
158+
ssh_tunnel_mock.assert_not_called()
159+
shim_client_mock.get_task_metrics.assert_not_called()
160+
res = await session.execute(
161+
select(JobPrometheusMetrics).where(JobPrometheusMetrics.job_id == job.id)
162+
)
163+
metrics = res.scalar_one_or_none()
164+
assert metrics is None
165+
145166

146167
@pytest.mark.asyncio
147168
@pytest.mark.parametrize("test_db", ["sqlite", "postgres"], indirect=True)

0 commit comments

Comments
 (0)