1919import asyncio
2020import collections
2121import time
22+
2223import uvloop
2324import requests
2425import base64
5758 ChatCompletionResponse ,
5859 CompletionRequest ,
5960 CompletionResponse ,
61+ ModelCard ,
62+ ModelListResponse ,
6063)
6164from .build_prompt import build_prompt , init_tokenizer
6265
@@ -72,6 +75,9 @@ class G_Objs:
7275 g_generate_stream_func : Callable = None
7376 httpserver_manager : Union [HttpServerManager , HttpServerManagerForPDMaster ] = None
7477 shared_token_load : TokenLoad = None
78+ # OpenAI-compatible "created" timestamp for /v1/models.
79+ # Should be stable for the lifetime of this server process.
80+ model_created : int = None
7581
7682 def set_args (self , args : StartArgs ):
7783 self .args = args
@@ -101,6 +107,8 @@ def set_args(self, args: StartArgs):
101107 self .httpserver_manager = HttpServerManager (args = args )
102108 dp_size_in_node = max (1 , args .dp // args .nnodes ) # 兼容多机纯tp的运行模式,这时候 1 // 2 == 0, 需要兼容
103109 self .shared_token_load = TokenLoad (f"{ get_unique_server_name ()} _shared_token_load" , dp_size_in_node )
110+ if self .model_created is None :
111+ self .model_created = int (time .time ())
104112
105113
106114g_objs = G_Objs ()
@@ -258,6 +266,26 @@ async def completions(request: CompletionRequest, raw_request: Request) -> Respo
258266 return resp
259267
260268
269+ @app .get ("/v1/models" , response_model = ModelListResponse )
270+ @app .post ("/v1/models" , response_model = ModelListResponse )
271+ async def get_models (raw_request : Request ):
272+ model_name = g_objs .args .model_name
273+ max_model_len = g_objs .args .max_req_total_len
274+ if model_name == "default_model_name" and g_objs .args .model_dir :
275+ model_name = os .path .basename (g_objs .args .model_dir .rstrip ("/" ))
276+
277+ return ModelListResponse (
278+ data = [
279+ ModelCard (
280+ id = model_name ,
281+ created = g_objs .model_created ,
282+ max_model_len = max_model_len ,
283+ owned_by = g_objs .args .model_owner ,
284+ )
285+ ]
286+ )
287+
288+
261289@app .get ("/tokens" )
262290@app .post ("/tokens" )
263291async def tokens (request : Request ):
0 commit comments