Skip to content

Commit ad98a6d

Browse files
committed
feat: add api runtime server and unified bootstrap entrypoint
1 parent f1814dd commit ad98a6d

9 files changed

Lines changed: 448 additions & 20 deletions

File tree

.github/workflows/runtimes.yml

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,3 +133,36 @@ jobs:
133133
runpod/queue:py${{ matrix.python-version }}-${{ env.TAG_SUFFIX }}
134134
cache-from: type=gha,scope=queue-py${{ matrix.python-version }}
135135
cache-to: type=gha,mode=max,scope=queue-py${{ matrix.python-version }}
136+
137+
build-api:
138+
runs-on: ubuntu-latest
139+
needs: [test-runner]
140+
strategy:
141+
matrix:
142+
python-version: ["3.10", "3.11", "3.12"]
143+
steps:
144+
- uses: actions/checkout@v7
145+
146+
- uses: docker/setup-buildx-action@v3
147+
148+
- name: Login to Docker Hub
149+
if: github.event_name != 'pull_request'
150+
uses: docker/login-action@v3
151+
with:
152+
registry: ${{ env.REGISTRY }}
153+
username: ${{ secrets.DOCKERHUB_USERNAME }}
154+
password: ${{ secrets.DOCKERHUB_TOKEN }}
155+
156+
- name: Build and push api image
157+
uses: docker/build-push-action@v6
158+
with:
159+
context: .
160+
file: runpod/runtimes/api/Dockerfile
161+
platforms: linux/amd64
162+
build-args: |
163+
PYTHON_VERSION=${{ matrix.python-version }}
164+
push: ${{ github.event_name != 'pull_request' }}
165+
tags: |
166+
runpod/api:py${{ matrix.python-version }}-${{ env.TAG_SUFFIX }}
167+
cache-from: type=gha,scope=api-py${{ matrix.python-version }}
168+
cache-to: type=gha,mode=max,scope=api-py${{ matrix.python-version }}

runpod/apps/dev.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,8 @@
3434
DEFAULT_IMAGES = {
3535
("queue", False): f"runpod/queue:py3.12-{_TAG}",
3636
("queue", True): f"runpod/queue:py3.12-{_TAG}",
37-
("api", False): "runpod/flash-lb:py3.12-latest",
38-
("api", True): "runpod/flash-lb-cpu:py3.12-latest",
37+
("api", False): f"runpod/api:py3.12-{_TAG}",
38+
("api", True): f"runpod/api:py3.12-{_TAG}",
3939
}
4040

4141

@@ -60,7 +60,7 @@ def _image_for(spec: ResourceSpec) -> str:
6060

6161
def _bootstrap_source() -> str:
6262
return (
63-
Path(__file__).parent.parent / "runtimes" / "queue" / "bootstrap.py"
63+
Path(__file__).parent.parent / "runtimes" / "bootstrap.py"
6464
).read_text()
6565

6666

@@ -120,11 +120,16 @@ def _endpoint_input(app: App, spec: ResourceSpec, generation: int = 1) -> Dict:
120120
if spec.image:
121121
# custom image: inject the bootstrap so the worker runtime starts
122122
# regardless of what the image contains
123-
payload["template"]["env"].append(
124-
{
125-
"key": "RUNPOD_BOOTSTRAP_B64",
126-
"value": base64.b64encode(_bootstrap_source().encode()).decode(),
127-
}
123+
payload["template"]["env"].extend(
124+
[
125+
{
126+
"key": "RUNPOD_BOOTSTRAP_B64",
127+
"value": base64.b64encode(
128+
_bootstrap_source().encode()
129+
).decode(),
130+
},
131+
{"key": "RUNPOD_RUNTIME_KIND", "value": spec.kind.value},
132+
]
128133
)
129134
payload["template"]["dockerArgs"] = _bootstrap_docker_args()
130135
if spec.kind is ResourceKind.API:

runpod/runtimes/api/Dockerfile

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
ARG PYTHON_VERSION=3.12
2+
FROM python:${PYTHON_VERSION}-slim
3+
4+
ENV DEBIAN_FRONTEND=noninteractive \
5+
TZ=Etc/UTC \
6+
PYTHONUNBUFFERED=1 \
7+
RUNPOD_RUNTIME_KIND=api
8+
9+
RUN apt-get update && apt-get install -y --no-install-recommends \
10+
git ca-certificates \
11+
&& apt-get clean && rm -rf /var/lib/apt/lists/*
12+
13+
# install the runpod package (worker loop + runtimes) from the build context
14+
COPY . /src
15+
RUN pip install --no-cache-dir /src cloudpickle "uvicorn>=0.30" && rm -rf /src
16+
17+
EXPOSE 80
18+
CMD ["python", "-m", "runpod.runtimes.bootstrap"]

runpod/runtimes/api/__init__.py

Whitespace-only changes.

runpod/runtimes/api/server.py

Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
"""generic api (load-balanced) server for app endpoints.
2+
3+
serves an asgi app on the port runpod's load balancer routes to
4+
(PORT env, default 80), with /ping kept healthy for LB health checks.
5+
6+
two serving modes, chosen at startup:
7+
8+
deployed mode (rp deploy):
9+
the build artifact is unpacked at RUNPOD_APP_DIR and
10+
FLASH_RESOURCE_NAME identifies this resource. the server imports
11+
the user's module, finds the ApiHandle, and builds the asgi app:
12+
- class-based api: instantiate the class, run its @init method
13+
before /ping reports healthy, mount each @get/@post route
14+
- asgi factory: call the factory, serve what it returns
15+
16+
live mode (rp dev):
17+
no artifact. serves /execute, which runs FunctionRequest payloads
18+
(source per request) via the task runner's execute_request.
19+
"""
20+
21+
import importlib
22+
import inspect
23+
import json
24+
import logging
25+
import os
26+
import sys
27+
from typing import Any, Optional
28+
29+
log = logging.getLogger("runpod.runtimes.api")
30+
31+
APP_DIR = os.environ.get("RUNPOD_APP_DIR", "/app")
32+
MANIFEST_NAME = "runpod_manifest.json"
33+
PORT = int(os.environ.get("PORT", "80"))
34+
35+
36+
def _resource_name() -> str:
37+
return os.environ.get("FLASH_RESOURCE_NAME") or os.environ.get(
38+
"RUNPOD_RESOURCE_NAME", ""
39+
)
40+
41+
42+
def _is_deployed() -> bool:
43+
return bool(_resource_name()) and os.path.isfile(
44+
os.path.join(APP_DIR, MANIFEST_NAME)
45+
)
46+
47+
48+
def _load_api_handle():
49+
"""import the user's module and return the ApiHandle for this resource."""
50+
with open(os.path.join(APP_DIR, MANIFEST_NAME)) as f:
51+
manifest = json.load(f)
52+
53+
name = _resource_name()
54+
entry = next(
55+
(r for r in manifest.get("resources", []) if r.get("name") == name),
56+
None,
57+
)
58+
if entry is None:
59+
raise RuntimeError(
60+
f"resource '{name}' not in manifest "
61+
f"(has: {[r.get('name') for r in manifest.get('resources', [])]})"
62+
)
63+
64+
if APP_DIR not in sys.path:
65+
sys.path.insert(0, APP_DIR)
66+
module = importlib.import_module(entry["module"])
67+
68+
from runpod.apps.handles import ApiHandle
69+
70+
for attr in vars(module).values():
71+
if isinstance(attr, ApiHandle) and attr.spec.name == name:
72+
return attr
73+
raise RuntimeError(
74+
f"no @app.api handle named '{name}' found in module '{entry['module']}'"
75+
)
76+
77+
78+
async def _maybe_await(value: Any) -> Any:
79+
if inspect.isawaitable(value):
80+
return await value
81+
return value
82+
83+
84+
def _build_class_app(handle) -> Any:
85+
"""construct a fastapi app from an ApiHandle's decorated class.
86+
87+
the class is instantiated once per worker; @init runs before /ping
88+
reports healthy so the LB only routes to ready workers.
89+
"""
90+
from contextlib import asynccontextmanager
91+
92+
from fastapi import FastAPI, Request
93+
94+
cls = handle._cls
95+
instance = cls()
96+
ready = {"ok": False}
97+
98+
@asynccontextmanager
99+
async def lifespan(_app):
100+
if handle._init_name:
101+
await _maybe_await(getattr(instance, handle._init_name)())
102+
ready["ok"] = True
103+
yield
104+
105+
app = FastAPI(title=handle.spec.name, lifespan=lifespan)
106+
107+
@app.get("/ping")
108+
async def ping():
109+
from fastapi.responses import JSONResponse
110+
111+
if not ready["ok"]:
112+
return JSONResponse({"status": "initializing"}, status_code=204)
113+
return {"status": "healthy"}
114+
115+
for route in handle.spec.routes:
116+
method = getattr(route, "method", None) or route["method"]
117+
path = getattr(route, "path", None) or route["path"]
118+
handler_name = (
119+
getattr(route, "handler_name", None) or route["handler"]
120+
)
121+
bound = getattr(instance, handler_name)
122+
123+
def make_endpoint(fn):
124+
async def endpoint(request: Request):
125+
body = None
126+
if request.method in ("POST", "PUT", "PATCH", "DELETE"):
127+
try:
128+
body = await request.json()
129+
except Exception: # noqa: BLE001 - empty/non-json body
130+
body = None
131+
if body is not None:
132+
return await _maybe_await(fn(body))
133+
return await _maybe_await(fn())
134+
135+
return endpoint
136+
137+
app.add_api_route(
138+
path, make_endpoint(bound), methods=[method], name=handler_name
139+
)
140+
141+
return app
142+
143+
144+
def _build_factory_app(handle) -> Any:
145+
"""call the user's asgi factory and ensure /ping exists."""
146+
app = handle._asgi_factory()
147+
148+
routes = getattr(app, "routes", [])
149+
if not any(getattr(r, "path", None) == "/ping" for r in routes):
150+
151+
@app.get("/ping")
152+
async def ping():
153+
return {"status": "healthy"}
154+
155+
return app
156+
157+
158+
def _build_live_app() -> Any:
159+
"""generic /execute server for dev sessions (source per request)."""
160+
from fastapi import FastAPI
161+
162+
app = FastAPI(title="runpod-live-api")
163+
164+
@app.get("/ping")
165+
async def ping():
166+
return {"status": "healthy"}
167+
168+
@app.post("/execute")
169+
async def execute(request: dict):
170+
from runpod.runtimes.task.runner import execute_request
171+
172+
return execute_request(request.get("input", request))
173+
174+
return app
175+
176+
177+
def build_app() -> Any:
178+
if _is_deployed():
179+
handle = _load_api_handle()
180+
if handle._cls is not None:
181+
return _build_class_app(handle)
182+
return _build_factory_app(handle)
183+
return _build_live_app()
184+
185+
186+
def main() -> None:
187+
import uvicorn
188+
189+
uvicorn.run(
190+
build_app(),
191+
host="0.0.0.0",
192+
port=PORT,
193+
timeout_keep_alive=600,
194+
log_level="info",
195+
)
196+
197+
198+
if __name__ == "__main__":
199+
main()
Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
"""stdlib-only bootstrap for queue endpoints on custom images.
1+
"""stdlib-only bootstrap for serverless endpoints on custom images.
22
33
deployed code reaches the pod via the host's artifact delivery (any pod
44
in a flash environment gets the build tarball, regardless of image).
@@ -7,15 +7,18 @@
77
phase unpack wait for and extract the artifact into /app
88
phase runtime pip install the runpod package if missing
99
phase deps pip install this resource's manifest dependencies
10-
phase worker exec the real worker (runpod.runtimes.queue.worker)
10+
phase worker exec the real runtime: runpod.runtimes.queue.worker
11+
or runpod.runtimes.api.server, selected by
12+
RUNPOD_RUNTIME_KIND (queue | api)
1113
1214
if any phase fails, instead of crash-looping silently, a minimal
1315
job-take loop starts and answers every job with the structured error so
1416
the failure is visible in job responses, not just container logs.
1517
16-
delivery: baked into the runtime images, or injected base64 in a
17-
template env var and booted via dockerArgs on custom images. stdlib
18-
only; must run on any image with a python3 binary.
18+
this module is the entrypoint for every serverless runtime image
19+
(runpod/queue, runpod/api) and for custom images, where it is injected
20+
base64 in a template env var and booted via dockerArgs. stdlib only;
21+
must run on any image with a python3 binary.
1922
"""
2023

2124
import json
@@ -84,6 +87,10 @@ def _pip_install(packages, phase):
8487
raise PhaseError(phase, result.stderr[-3000:])
8588

8689

90+
def _runtime_kind():
91+
return os.environ.get("RUNPOD_RUNTIME_KIND", "queue")
92+
93+
8794
def _ensure_runtime():
8895
try:
8996
import runpod.runtimes.queue.worker # noqa: F401
@@ -95,7 +102,10 @@ def _ensure_runtime():
95102
# prerelease testing); defaults to the published package
96103
spec = os.environ.get("RUNPOD_PACKAGE_SPEC", "runpod")
97104
_log(f"worker runtime not in image, installing {spec}")
98-
_pip_install([spec, "cloudpickle"], "runtime")
105+
packages = [spec, "cloudpickle"]
106+
if _runtime_kind() == "api":
107+
packages.append("uvicorn>=0.30")
108+
_pip_install(packages, "runtime")
99109

100110

101111
def _resource_entry():
@@ -126,10 +136,13 @@ def _install_deps(entry):
126136

127137

128138
def _run_worker():
129-
env = dict(os.environ, RUNPOD_APP_DIR=APP_DIR)
130-
result = subprocess.run(
131-
[sys.executable, "-m", "runpod.runtimes.queue.worker"], env=env
139+
module = (
140+
"runpod.runtimes.api.server"
141+
if _runtime_kind() == "api"
142+
else "runpod.runtimes.queue.worker"
132143
)
144+
env = dict(os.environ, RUNPOD_APP_DIR=APP_DIR)
145+
result = subprocess.run([sys.executable, "-m", module], env=env)
133146
sys.exit(result.returncode)
134147

135148

runpod/runtimes/queue/Dockerfile

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@ FROM python:${PYTHON_VERSION}-slim
33

44
ENV DEBIAN_FRONTEND=noninteractive \
55
TZ=Etc/UTC \
6-
PYTHONUNBUFFERED=1
6+
PYTHONUNBUFFERED=1 \
7+
RUNPOD_RUNTIME_KIND=queue
78

89
RUN apt-get update && apt-get install -y --no-install-recommends \
910
git ca-certificates \
@@ -13,4 +14,4 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
1314
COPY . /src
1415
RUN pip install --no-cache-dir /src cloudpickle && rm -rf /src
1516

16-
CMD ["python", "-m", "runpod.runtimes.queue.bootstrap"]
17+
CMD ["python", "-m", "runpod.runtimes.bootstrap"]

0 commit comments

Comments
 (0)