Skip to content

Commit 5cc60b7

Browse files
Migrate service model base url (#3560)
* - Assign `service.model.base_url` to `service.url` + `prefix` (e.g. `/v1`) if `model` has `openai` format. - Add CORS support to the gateway service endppoint (if `model` has `openai` format) * - Ensure CORS logic works even if project has multiple services with the same model name * Add gateway services state migration for CORS support in services for backward compatibility (with older verions of gateway) * Update SKILL.md to reflect service.model.base_url changes Now that service.model.base_url points to service.url + /v1 for openai-format models, it is no longer deprecated and can be recommended as the model endpoint. Co-authored-by: Cursor <cursoragent@cursor.com> * Shorten SKILL.md model endpoint wording Co-authored-by: Cursor <cursoragent@cursor.com> * PR feedback: Refactor `get_nginx_service_config` to use `service.cors_enabled`.
1 parent ad9fd69 commit 5cc60b7

File tree

10 files changed

+114
-23
lines changed

10 files changed

+114
-23
lines changed

skills/dstack/SKILL.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,7 @@ resources:
222222
- Without gateway: `<dstack server URL>/proxy/services/f/<run name>/`
223223
- With gateway: `https://<run name>.<gateway domain>/`
224224
- Authentication: Unless `auth` is `false`, include `Authorization: Bearer <DSTACK_TOKEN>` on all service requests.
225-
- OpenAI-compatible models: Use `service.url` from `dstack run get <run name> --json` and append `/v1` as the base URL; do **not** use deprecated `service.model.base_url` for requests.
225+
- Model endpoint: If `model` is set, `service.model.base_url` from `dstack run get <run name> --json` provides the model endpoint. For OpenAI-compatible models (the default, unless format is set otherwise), this will be `service.url` + `/v1`.
226226
- Example (with gateway):
227227
```bash
228228
curl -sS -X POST "https://<run name>.<gateway domain>/v1/chat/completions" \

src/dstack/_internal/core/models/configurations.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,7 @@
5757
DEFAULT_PROBE_UNTIL_READY = False
5858
MAX_PROBE_URL_LEN = 2048
5959
DEFAULT_REPLICA_GROUP_NAME = "0"
60-
DEFAULT_MODEL_PROBE_TIMEOUT = 30
61-
DEFAULT_MODEL_PROBE_URL = "/v1/chat/completions"
60+
OPENAI_MODEL_PROBE_TIMEOUT = 30
6261

6362

6463
class RunConfigurationType(str, Enum):

src/dstack/_internal/proxy/gateway/resources/nginx/service.jinja2

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,17 @@ server {
2424

2525
{% for location in locations %}
2626
location {{ location.prefix }} {
27+
{% if cors_enabled %}
28+
# Handle CORS preflight before auth (rewrite phase runs before access phase)
29+
if ($request_method = 'OPTIONS') {
30+
add_header 'Access-Control-Allow-Origin' '*' always;
31+
add_header 'Access-Control-Allow-Methods' 'GET, POST, PUT, DELETE, PATCH, OPTIONS, HEAD' always;
32+
add_header 'Access-Control-Allow-Headers' '*' always;
33+
add_header 'Access-Control-Max-Age' '600' always;
34+
return 204;
35+
}
36+
{% endif %}
37+
2738
{% if auth %}
2839
auth_request /_dstack_auth;
2940
{% endif %}
@@ -46,6 +57,15 @@ server {
4657
location @websocket {
4758
set $dstack_replica_hit 1;
4859
{% if replicas %}
60+
{% if cors_enabled %}
61+
proxy_hide_header 'Access-Control-Allow-Origin';
62+
proxy_hide_header 'Access-Control-Allow-Methods';
63+
proxy_hide_header 'Access-Control-Allow-Headers';
64+
proxy_hide_header 'Access-Control-Allow-Credentials';
65+
add_header 'Access-Control-Allow-Origin' '*' always;
66+
add_header 'Access-Control-Allow-Methods' 'GET, POST, PUT, DELETE, PATCH, OPTIONS, HEAD' always;
67+
add_header 'Access-Control-Allow-Headers' '*' always;
68+
{% endif %}
4969
proxy_pass http://{{ domain }}.upstream;
5070
proxy_set_header X-Real-IP $remote_addr;
5171
proxy_set_header Host $host;
@@ -60,6 +80,15 @@ server {
6080
location @ {
6181
set $dstack_replica_hit 1;
6282
{% if replicas %}
83+
{% if cors_enabled %}
84+
proxy_hide_header 'Access-Control-Allow-Origin';
85+
proxy_hide_header 'Access-Control-Allow-Methods';
86+
proxy_hide_header 'Access-Control-Allow-Headers';
87+
proxy_hide_header 'Access-Control-Allow-Credentials';
88+
add_header 'Access-Control-Allow-Origin' '*' always;
89+
add_header 'Access-Control-Allow-Methods' 'GET, POST, PUT, DELETE, PATCH, OPTIONS, HEAD' always;
90+
add_header 'Access-Control-Allow-Headers' '*' always;
91+
{% endif %}
6392
proxy_pass http://{{ domain }}.upstream;
6493
proxy_set_header X-Real-IP $remote_addr;
6594
proxy_set_header Host $host;

src/dstack/_internal/proxy/gateway/services/nginx.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ class ServiceConfig(SiteConfig):
7272
replicas: list[ReplicaConfig]
7373
router: Optional[AnyRouterConfig] = None
7474
router_port: Optional[int] = None
75+
cors_enabled: bool = False
7576

7677

7778
class ModelEntrypointConfig(SiteConfig):

src/dstack/_internal/proxy/gateway/services/registry.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ async def register_service(
4747
service_conn_pool: ServiceConnectionPool,
4848
router: Optional[AnyRouterConfig] = None,
4949
) -> None:
50+
cors_enabled = model is not None and model.type == "chat" and model.format == "openai"
5051
service = models.Service(
5152
project_name=project_name,
5253
run_name=run_name,
@@ -57,6 +58,7 @@ async def register_service(
5758
client_max_body_size=client_max_body_size,
5859
replicas=(),
5960
router=router,
61+
cors_enabled=cors_enabled,
6062
)
6163

6264
async with lock:
@@ -374,6 +376,7 @@ async def get_nginx_service_config(
374376
locations=locations,
375377
replicas=sorted(replicas, key=lambda r: r.id), # sort for reproducible configs
376378
router=service.router,
379+
cors_enabled=service.cors_enabled,
377380
)
378381

379382

@@ -389,9 +392,34 @@ async def apply_entrypoint(
389392
await nginx.register(config, acme)
390393

391394

395+
async def _migrate_cors_enabled(repo: GatewayProxyRepo) -> None:
396+
"""Migrate services registered before the cors_enabled field was added.
397+
398+
Old gateway versions didn't persist cors_enabled on services. This derives it
399+
from the associated model's format so that CORS is enabled for openai-format
400+
models on gateway restart without requiring service re-registration.
401+
"""
402+
services = await repo.list_services()
403+
openai_run_names: set[tuple[str, str]] = set()
404+
for service in services:
405+
for model in await repo.list_models(service.project_name):
406+
if model.run_name == service.run_name and isinstance(
407+
model.format_spec, models.OpenAIChatModelFormat
408+
):
409+
openai_run_names.add((service.project_name, service.run_name))
410+
for service in services:
411+
if (
412+
not service.cors_enabled
413+
and (service.project_name, service.run_name) in openai_run_names
414+
):
415+
updated = models.Service(**{**service.dict(), "cors_enabled": True})
416+
await repo.set_service(updated)
417+
418+
392419
async def apply_all(
393420
repo: GatewayProxyRepo, nginx: Nginx, service_conn_pool: ServiceConnectionPool
394421
) -> None:
422+
await _migrate_cors_enabled(repo)
395423
service_tasks = [
396424
apply_service(
397425
service=service,

src/dstack/_internal/proxy/lib/models.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ class Service(ImmutableModel):
5959
strip_prefix: bool = True # only used in-server
6060
replicas: tuple[Replica, ...]
6161
router: Optional[AnyRouterConfig] = None
62+
cors_enabled: bool = False # only used on gateways; enabled for openai-format models
6263

6364
@property
6465
def domain_safe(self) -> str:

src/dstack/_internal/server/services/jobs/configurators/base.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@
1212
from dstack._internal.core.errors import DockerRegistryError, ServerClientError
1313
from dstack._internal.core.models.common import RegistryAuth
1414
from dstack._internal.core.models.configurations import (
15-
DEFAULT_MODEL_PROBE_TIMEOUT,
16-
DEFAULT_MODEL_PROBE_URL,
1715
DEFAULT_PROBE_INTERVAL,
1816
DEFAULT_PROBE_METHOD,
1917
DEFAULT_PROBE_READY_AFTER,
@@ -22,6 +20,7 @@
2220
DEFAULT_PROBE_URL,
2321
DEFAULT_REPLICA_GROUP_NAME,
2422
LEGACY_REPO_DIR,
23+
OPENAI_MODEL_PROBE_TIMEOUT,
2524
HTTPHeaderSpec,
2625
PortMapping,
2726
ProbeConfig,
@@ -406,7 +405,7 @@ def _probes(self) -> list[ProbeSpec]:
406405
# Generate default probe if model is set
407406
model = self.run_spec.configuration.model
408407
if isinstance(model, OpenAIChatModel):
409-
return [_default_model_probe_spec(model.name)]
408+
return [_openai_model_probe_spec(model.name, model.prefix)]
410409
return []
411410

412411

@@ -460,7 +459,7 @@ def _probe_config_to_spec(c: ProbeConfig) -> ProbeSpec:
460459
)
461460

462461

463-
def _default_model_probe_spec(model_name: str) -> ProbeSpec:
462+
def _openai_model_probe_spec(model_name: str, prefix: str) -> ProbeSpec:
464463
body = orjson.dumps(
465464
{
466465
"model": model_name,
@@ -471,12 +470,12 @@ def _default_model_probe_spec(model_name: str) -> ProbeSpec:
471470
return ProbeSpec(
472471
type="http",
473472
method="post",
474-
url=DEFAULT_MODEL_PROBE_URL,
473+
url=prefix.rstrip("/") + "/chat/completions",
475474
headers=[
476475
HTTPHeaderSpec(name="Content-Type", value="application/json"),
477476
],
478477
body=body,
479-
timeout=DEFAULT_MODEL_PROBE_TIMEOUT,
478+
timeout=OPENAI_MODEL_PROBE_TIMEOUT,
480479
interval=DEFAULT_PROBE_INTERVAL,
481480
ready_after=DEFAULT_PROBE_READY_AFTER,
482481
)

src/dstack/_internal/server/services/services/__init__.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
from dstack._internal.core.models.gateways import GatewayConfiguration, GatewayStatus
2828
from dstack._internal.core.models.instances import SSHConnectionParams
2929
from dstack._internal.core.models.runs import JobSpec, Run, RunSpec, ServiceModelSpec, ServiceSpec
30+
from dstack._internal.core.models.services import OpenAIChatModel
3031
from dstack._internal.server import settings
3132
from dstack._internal.server.models import GatewayModel, JobModel, ProjectModel, RunModel
3233
from dstack._internal.server.services import events
@@ -106,10 +107,15 @@ async def _register_service_in_gateway(
106107
wildcard_domain = gateway.wildcard_domain.lstrip("*.") if gateway.wildcard_domain else None
107108
if wildcard_domain is None:
108109
raise ServerClientError("Domain is required for gateway")
110+
service_url = f"{service_protocol}://{run_model.run_name}.{wildcard_domain}"
111+
if isinstance(run_spec.configuration.model, OpenAIChatModel):
112+
model_url = service_url + run_spec.configuration.model.prefix
113+
else:
114+
model_url = f"{gateway_protocol}://gateway.{wildcard_domain}"
109115
service_spec = get_service_spec(
110116
configuration=run_spec.configuration,
111-
service_url=f"{service_protocol}://{run_model.run_name}.{wildcard_domain}",
112-
model_url=f"{gateway_protocol}://gateway.{wildcard_domain}",
117+
service_url=service_url,
118+
model_url=model_url,
113119
)
114120

115121
domain = service_spec.get_domain()
@@ -173,10 +179,15 @@ def _register_service_in_server(run_model: RunModel, run_spec: RunSpec) -> Servi
173179
"Rate limits are not supported when running services without a gateway."
174180
" Please configure a gateway or remove `rate_limits` from the service configuration"
175181
)
182+
service_url = f"/proxy/services/{run_model.project.name}/{run_model.run_name}/"
183+
if isinstance(run_spec.configuration.model, OpenAIChatModel):
184+
model_url = service_url.rstrip("/") + run_spec.configuration.model.prefix
185+
else:
186+
model_url = f"/proxy/models/{run_model.project.name}/"
176187
return get_service_spec(
177188
configuration=run_spec.configuration,
178-
service_url=f"/proxy/services/{run_model.project.name}/{run_model.run_name}/",
179-
model_url=f"/proxy/models/{run_model.project.name}/",
189+
service_url=service_url,
190+
model_url=model_url,
180191
)
181192

182193

src/tests/_internal/server/routers/test_runs.py

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -588,14 +588,15 @@ def get_service_run_spec(
588588
repo_id: str,
589589
run_name: Optional[str] = None,
590590
gateway: Optional[Union[bool, str]] = None,
591+
model: Union[str, dict] = "test-model",
591592
) -> dict:
592593
return {
593594
"configuration": {
594595
"type": "service",
595596
"commands": ["python -m http.server"],
596597
"port": 8000,
597598
"gateway": gateway,
598-
"model": "test-model",
599+
"model": model,
599600
"repos": [
600601
{
601602
"url": "https://github.com/dstackai/dstack",
@@ -2303,48 +2304,69 @@ def mock_gateway_connections(self) -> Generator[None, None, None]:
23032304
"expected_service_url",
23042305
"expected_model_url",
23052306
"is_gateway",
2307+
"model",
23062308
),
23072309
[
23082310
pytest.param(
23092311
[("default-gateway", True), ("non-default-gateway", False)],
23102312
None,
23112313
"https://test-service.default-gateway.example",
2312-
"https://gateway.default-gateway.example",
2314+
"https://test-service.default-gateway.example/v1",
23132315
True,
2316+
"test-model",
23142317
id="submits-to-default-gateway",
23152318
),
23162319
pytest.param(
23172320
[("default-gateway", True), ("non-default-gateway", False)],
23182321
True,
23192322
"https://test-service.default-gateway.example",
2320-
"https://gateway.default-gateway.example",
2323+
"https://test-service.default-gateway.example/v1",
23212324
True,
2325+
"test-model",
23222326
id="submits-to-default-gateway-when-gateway-true",
23232327
),
23242328
pytest.param(
23252329
[("default-gateway", True), ("non-default-gateway", False)],
23262330
"non-default-gateway",
23272331
"https://test-service.non-default-gateway.example",
2328-
"https://gateway.non-default-gateway.example",
2332+
"https://test-service.non-default-gateway.example/v1",
23292333
True,
2334+
"test-model",
23302335
id="submits-to-specified-gateway",
23312336
),
23322337
pytest.param(
23332338
[("non-default-gateway", False)],
23342339
None,
23352340
"/proxy/services/test-project/test-service/",
2336-
"/proxy/models/test-project/",
2341+
"/proxy/services/test-project/test-service/v1",
23372342
False,
2343+
"test-model",
23382344
id="submits-in-server-when-no-default-gateway",
23392345
),
23402346
pytest.param(
23412347
[("default-gateway", True)],
23422348
False,
23432349
"/proxy/services/test-project/test-service/",
2344-
"/proxy/models/test-project/",
2350+
"/proxy/services/test-project/test-service/v1",
23452351
False,
2352+
"test-model",
23462353
id="submits-in-server-when-specified",
23472354
),
2355+
pytest.param(
2356+
[("default-gateway", True)],
2357+
None,
2358+
"https://test-service.default-gateway.example",
2359+
"https://gateway.default-gateway.example",
2360+
True,
2361+
{
2362+
"type": "chat",
2363+
"name": "test-model",
2364+
"format": "tgi",
2365+
"chat_template": "test",
2366+
"eos_token": "</s>",
2367+
},
2368+
id="submits-tgi-model-to-gateway",
2369+
),
23482370
],
23492371
)
23502372
async def test_submit_to_correct_proxy(
@@ -2357,6 +2379,7 @@ async def test_submit_to_correct_proxy(
23572379
expected_service_url: str,
23582380
expected_model_url: str,
23592381
is_gateway: bool,
2382+
model: Union[str, dict],
23602383
) -> None:
23612384
user = await create_user(session=session, global_role=GlobalRole.USER)
23622385
project = await create_project(session=session, owner=user, name="test-project")
@@ -2386,6 +2409,7 @@ async def test_submit_to_correct_proxy(
23862409
repo_id=repo.name,
23872410
run_name="test-service",
23882411
gateway=specified_gateway_in_run_conf,
2412+
model=model,
23892413
)
23902414
response = await client.post(
23912415
f"/api/project/{project.name}/runs/submit",

src/tests/_internal/server/services/jobs/configurators/test_service.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
import pytest
22

33
from dstack._internal.core.models.configurations import (
4-
DEFAULT_MODEL_PROBE_TIMEOUT,
5-
DEFAULT_MODEL_PROBE_URL,
4+
OPENAI_MODEL_PROBE_TIMEOUT,
65
ProbeConfig,
76
ServiceConfiguration,
87
)
@@ -35,8 +34,8 @@ async def test_default_probe_when_model_set(self):
3534
probe = probes[0]
3635
assert probe.type == "http"
3736
assert probe.method == "post"
38-
assert probe.url == DEFAULT_MODEL_PROBE_URL
39-
assert probe.timeout == DEFAULT_MODEL_PROBE_TIMEOUT
37+
assert probe.url == "/v1/chat/completions"
38+
assert probe.timeout == OPENAI_MODEL_PROBE_TIMEOUT
4039
assert len(probe.headers) == 1
4140
assert probe.headers[0].name == "Content-Type"
4241
assert probe.headers[0].value == "application/json"

0 commit comments

Comments
 (0)