Skip to content

Commit d94fa35

Browse files
committed
Change default workers and max-concurrency when launch api-server
1 parent 6847891 commit d94fa35

2 files changed

Lines changed: 6 additions & 3 deletions

File tree

fastdeploy/entrypoints/api_server.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ def main():
136136
parser = FlexibleArgumentParser()
137137
parser.add_argument("--port", default=9904, type=int, help="port to the http server")
138138
parser.add_argument("--host", default="0.0.0.0", type=str, help="host to the http server")
139-
parser.add_argument("--workers", default=1, type=int, help="number of workers")
139+
parser.add_argument("--workers", default=4, type=int, help="number of workers")
140140
parser = EngineArgs.add_cli_args(parser)
141141
args = parser.parse_args()
142142
launch_api_server(args)

fastdeploy/entrypoints/openai/utils.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -341,9 +341,10 @@ async def close(self):
341341

342342

343343
def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
344+
_is_multi_server = os.environ.get("FD_ENABLE_MULTI_API_SERVER") == "1"
344345
parser.add_argument("--port", default=8000, type=int, help="port to the http server")
345346
parser.add_argument("--host", default="0.0.0.0", type=str, help="host to the http server")
346-
parser.add_argument("--workers", default=1, type=int, help="number of workers")
347+
parser.add_argument("--workers", default=1 if _is_multi_server else 4, type=int, help="number of workers")
347348
parser.add_argument("--metrics-port", default=None, type=int, help="port for metrics server")
348349
parser.add_argument("--controller-port", default=-1, type=int, help="port for controller server")
349350
parser.add_argument(
@@ -352,7 +353,9 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
352353
type=int,
353354
help="max waiting time for connection, if set value -1 means no waiting time limit",
354355
)
355-
parser.add_argument("--max-concurrency", default=512, type=int, help="max concurrency")
356+
parser.add_argument(
357+
"--max-concurrency", default=512 if _is_multi_server else 2048, type=int, help="max concurrency"
358+
)
356359

357360
parser.add_argument(
358361
"--enable-mm-output", action="store_true", help="Enable 'multimodal_content' field in response output. "

0 commit comments

Comments
 (0)