File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 2020
2121import uvicorn
2222from fastapi import FastAPI , HTTPException
23- from tensorrt_llm ._torch .auto_deploy import LLM , AutoDeployConfig
23+ from tensorrt_llm ._torch .auto_deploy import LLM
2424from tensorrt_llm .builder import BuildConfig
2525from tensorrt_llm .llmapi .llm import RequestOutput
2626from tensorrt_llm .sampling_params import SamplingParams
@@ -48,8 +48,7 @@ def build_runner_from_config(args) -> LLM:
4848 build_config = BuildConfig (max_seq_len = args .max_seq_len , max_batch_size = args .max_batch_size )
4949 build_config .plugin_config .tokens_per_block = args .max_seq_len
5050
51- # setup AD config
52- ad_config = AutoDeployConfig (
51+ llm = LLM (
5352 model = args .ckpt_path ,
5453 compile_backend = args .compile_backend ,
5554 device = args .device ,
@@ -58,9 +57,8 @@ def build_runner_from_config(args) -> LLM:
5857 max_seq_len = args .max_seq_len ,
5958 max_num_tokens = args .max_num_tokens ,
6059 model_kwargs = model_kwargs ,
61- attn_backend = "triton " ,
60+ attn_backend = "flashinfer " ,
6261 )
63- llm = LLM (** ad_config .to_llm_kwargs ())
6462
6563 return llm
6664
You can’t perform that action at this time.
0 commit comments