Skip to content

Commit da2bfcc

Browse files
authored
Merge pull request #1534 from PolicyEngine/add-modal-concurrent-inputs-and-max-cap
Set Modal worker concurrency to 5 and cap autoscale at 100
2 parents 5b98c3a + e059c85 commit da2bfcc

3 files changed

Lines changed: 64 additions & 0 deletions

File tree

changelog.d/1533.changed.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Enable Modal `@modal.concurrent(max_inputs=5, target_inputs=4)` (5x warm-pool capacity, autoscaler aims for 80% steady-state utilisation, no new container cost) and cap autoscale at `max_containers=100` to prevent runaway scaling.

policyengine_household_api/modal_release/worker_app.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,13 @@ def worker_function_options(
4646
"timeout": 180,
4747
"scaledown_window": 300,
4848
"enable_memory_snapshot": True,
49+
# Hard cap on autoscale. Without this Modal is bounded only by the
50+
# workspace quota, so a runaway traffic spike (or a buggy partner
51+
# client) could scale to hundreds of containers and rack up cost.
52+
# 100 covers any realistic partner burst we expect today (peak 100
53+
# concurrent x 5 inputs = 500 in-flight) while keeping accidents
54+
# bounded.
55+
"max_containers": 100,
4956
}
5057
if environment == "main":
5158
options["min_containers"] = 3
@@ -54,7 +61,21 @@ def worker_function_options(
5461
return options
5562

5663

64+
def worker_concurrency_options() -> dict[str, int]:
65+
# Each container processes up to 5 requests in parallel (`max_inputs`).
66+
# With ~3s of CPU per request on a 1-core container, 5-way sharing gives
67+
# ~15s wall-time per request when fully saturated. Multiplies effective
68+
# warm-pool capacity 5x with no additional container cost.
69+
#
70+
# `target_inputs=4` is the autoscaler's steady-state goal: keep average
71+
# utilisation at 80% so each container retains one free slot to absorb
72+
# single-request spikes without waiting on a cold start. Containers still
73+
# burst up to `max_inputs=5` under load before queueing.
74+
return {"max_inputs": 5, "target_inputs": 4}
75+
76+
5777
@app.cls(**worker_function_options())
78+
@modal.concurrent(**worker_concurrency_options())
5879
class HouseholdWorker:
5980
"""Worker class for handling household API requests.
6081

tests/unit/modal_release/test_worker_app.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,48 @@ def test_household_worker_exposes_post_snapshot_reset_hook(worker_app):
8484
assert hasattr(worker_cls, "reset_post_snapshot_state")
8585

8686

87+
def test_worker_concurrency_options_set_max_inputs(worker_app):
88+
"""Each container processes up to 5 requests in parallel. Keeping this
89+
consistent across environments ensures staging behavior mirrors
90+
production so concurrency-related issues surface in staging tests."""
91+
assert worker_app.worker_concurrency_options()["max_inputs"] == 5
92+
93+
94+
def test_worker_concurrency_options_set_target_inputs(worker_app):
95+
"""`target_inputs=4` keeps the autoscaler aiming for 80% steady-state
96+
utilisation, so each container retains a free slot to absorb a
97+
single-request spike without waiting on a cold start."""
98+
assert worker_app.worker_concurrency_options()["target_inputs"] == 4
99+
100+
101+
def test_worker_function_options_do_not_use_deprecated_concurrency_kwarg(
102+
worker_app,
103+
):
104+
for environment in ("main", "staging", "testing"):
105+
options = worker_app.worker_function_options(
106+
modal_environment=environment
107+
)
108+
assert "allow_concurrent_inputs" not in options, (
109+
"`allow_concurrent_inputs` is deprecated; use "
110+
f"`@modal.concurrent` for `{environment}` worker concurrency"
111+
)
112+
113+
114+
def test_worker_function_options_max_containers_capped_in_all_envs(
115+
worker_app,
116+
):
117+
"""A hard ceiling on autoscale prevents runaway scaling from a buggy
118+
client or traffic spike from racking up unbounded cost."""
119+
for environment in ("main", "staging", "testing"):
120+
options = worker_app.worker_function_options(
121+
modal_environment=environment
122+
)
123+
assert options["max_containers"] == 100, (
124+
f"max_containers must be 100 in `{environment}` to bound "
125+
"autoscale cost"
126+
)
127+
128+
87129
def test_country_package_install_specs_use_release_package_versions_only():
88130
assert country_package_install_specs(
89131
{

0 commit comments

Comments
 (0)