Skip to content

Commit feb7482

Browse files
committed
create a round robin proxy
Signed-off-by: Samuel Shen <slshen@uchicago.edu>
1 parent ecb11e6 commit feb7482

5 files changed

Lines changed: 56 additions & 0 deletions

File tree

2-serving-engines/flat/centralized-cache/configs/redis.yaml

Whitespace-only changes.
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
from fastapi import FastAPI, Request
2+
from fastapi.responses import StreamingResponse
3+
from contextlib import asynccontextmanager
4+
import httpx
5+
6+
TARGET_BASE_URLS = [
7+
"http://localhost:8001"
8+
]
9+
10+
current_index = 0
11+
12+
@asynccontextmanager
13+
async def lifespan(app: FastAPI):
14+
app.state.client = httpx.AsyncClient()
15+
yield
16+
await app.state.client.aclose()
17+
18+
app = FastAPI(lifespan=lifespan)
19+
20+
async def stream_response(request, backend_url, client):
21+
async with client.stream(
22+
method = request.method,
23+
url = backend_url,
24+
headers = dict(request.headers),
25+
content = await request.body(),
26+
) as backend_response:
27+
28+
yield backend_response.headers, backend_response.status_code
29+
30+
async for chunk in backend_response.aiter_bytes():
31+
yield chunk
32+
33+
34+
@app.api_route("/{full_path:path}", methods=["GET", "POST", "PUT", "DELETE", "PATCH"])
35+
async def proxy(full_path: str, request: Request):
36+
global current_index
37+
38+
target_base = TARGET_BASE_URLS[current_index]
39+
current_index = (current_index + 1) % len(TARGET_BASE_URLS)
40+
target_url = f"{target_base}/{full_path}"
41+
42+
method = request.method
43+
headers = dict(request.headers)
44+
headers.pop("host", None)
45+
body = await request.body()
46+
client: httpx.AsyncClient = request.app.state.client
47+
48+
stream_generator = stream_response(request, target_url, client)
49+
50+
headeres, status_code = await anext(stream_generator)
51+
52+
return StreamingResponse(
53+
stream_generator,
54+
status_code = status_code,
55+
headers = {key: value for key, value in headers.items() if key.lower() not in {"transfer-encoding", "content-length"}},
56+
)

2-serving-engines/flat/centralized-cache/run-infinistore-llama8B.sh

Whitespace-only changes.

2-serving-engines/flat/centralized-cache/run-mooncake-llama8B.sh

Whitespace-only changes.

2-serving-engines/flat/centralized-cache/run-redis-llama8B.sh

Whitespace-only changes.

0 commit comments

Comments
 (0)