Skip to content

Commit 17e373d

Browse files
authored
add v1/models && qwen3_coder stream fc (#1252)
1 parent 93dd2a6 commit 17e373d

7 files changed

Lines changed: 932 additions & 138 deletions

File tree

lightllm/common/mamba_cache_mem_manager/__init__.py

Whitespace-only changes.

lightllm/server/api_cli.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,12 @@ def make_argument_parser() -> argparse.ArgumentParser:
9898
default="default_model_name",
9999
help="just help to distinguish internal model name, use 'host:port/get_model_name' to get",
100100
)
101+
parser.add_argument(
102+
"--model_owner",
103+
type=str,
104+
default=None,
105+
help="the model owner, if not set, will use lightllm",
106+
)
101107

102108
parser.add_argument(
103109
"--model_dir",

lightllm/server/api_http.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import asyncio
2020
import collections
2121
import time
22+
2223
import uvloop
2324
import requests
2425
import base64
@@ -57,6 +58,8 @@
5758
ChatCompletionResponse,
5859
CompletionRequest,
5960
CompletionResponse,
61+
ModelCard,
62+
ModelListResponse,
6063
)
6164
from .build_prompt import build_prompt, init_tokenizer
6265

@@ -72,6 +75,9 @@ class G_Objs:
7275
g_generate_stream_func: Callable = None
7376
httpserver_manager: Union[HttpServerManager, HttpServerManagerForPDMaster] = None
7477
shared_token_load: TokenLoad = None
78+
# OpenAI-compatible "created" timestamp for /v1/models.
79+
# Should be stable for the lifetime of this server process.
80+
model_created: int = None
7581

7682
def set_args(self, args: StartArgs):
7783
self.args = args
@@ -101,6 +107,8 @@ def set_args(self, args: StartArgs):
101107
self.httpserver_manager = HttpServerManager(args=args)
102108
dp_size_in_node = max(1, args.dp // args.nnodes) # 兼容多机纯tp的运行模式,这时候 1 // 2 == 0, 需要兼容
103109
self.shared_token_load = TokenLoad(f"{get_unique_server_name()}_shared_token_load", dp_size_in_node)
110+
if self.model_created is None:
111+
self.model_created = int(time.time())
104112

105113

106114
g_objs = G_Objs()
@@ -258,6 +266,26 @@ async def completions(request: CompletionRequest, raw_request: Request) -> Respo
258266
return resp
259267

260268

269+
@app.get("/v1/models", response_model=ModelListResponse)
270+
@app.post("/v1/models", response_model=ModelListResponse)
271+
async def get_models(raw_request: Request):
272+
model_name = g_objs.args.model_name
273+
max_model_len = g_objs.args.max_req_total_len
274+
if model_name == "default_model_name" and g_objs.args.model_dir:
275+
model_name = os.path.basename(g_objs.args.model_dir.rstrip("/"))
276+
277+
return ModelListResponse(
278+
data=[
279+
ModelCard(
280+
id=model_name,
281+
created=g_objs.model_created,
282+
max_model_len=max_model_len,
283+
owned_by=g_objs.args.model_owner,
284+
)
285+
]
286+
)
287+
288+
261289
@app.get("/tokens")
262290
@app.post("/tokens")
263291
async def tokens(request: Request):

lightllm/server/api_models.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ class ToolCall(BaseModel):
8787

8888
id: Optional[str] = None
8989
index: Optional[int] = None
90-
type: Literal["function"] = "function"
90+
type: Optional[Literal["function"]] = None
9191
function: FunctionResponse
9292

9393

@@ -370,3 +370,16 @@ class CompletionStreamResponse(BaseModel):
370370
@field_validator("id", mode="before")
371371
def ensure_id_is_str(cls, v):
372372
return str(v)
373+
374+
375+
class ModelCard(BaseModel):
376+
id: str
377+
object: str = "model"
378+
created: int = Field(default_factory=lambda: int(time.time()))
379+
owned_by: str = "lightllm"
380+
max_model_len: Optional[int] = None
381+
382+
383+
class ModelListResponse(BaseModel):
384+
object: str = "list"
385+
data: List[ModelCard]

0 commit comments

Comments
 (0)