diff --git a/configs/inference/sglang.json b/configs/inference/sglang.json index 4eb85eab..785e78b9 100644 --- a/configs/inference/sglang.json +++ b/configs/inference/sglang.json @@ -6,9 +6,9 @@ "engine_images": [ { "compute_type": "gpu", - "image": "opencsghq/sglang:v0.5.9-cu130", + "image": "opencsghq/sglang:v0.5.11-cu130", "driver_version": "13.0", - "engine_version": "v0.5.9", + "engine_version": "v0.5.11", "extra_archs": [ "ApertusForCausalLM", "ArceeForCausalLM", @@ -19,10 +19,11 @@ "BailingMoeV2_5ForCausalLM", "BertForSequenceClassification", "BertModel", - "ChatGLMForConditionalGeneration", "Contriever", - "DeepseekOCR2ForCausalLM", + "ChatGLMForConditionalGeneration", + "DeepseekOCRForCausalLM", "DeepseekV32ForCausalLM", + "DotsOCRForCausalLM", "DotsVLMForCausalLM", "Ernie4_5_ForCausalLM", "Ernie4_5_MoeForCausalLM", @@ -31,47 +32,72 @@ "FalconH1ForCausalLM", "GPT2LMHeadModel", "GPTBigCodeForCausalLM", + "GPTJForCausalLM", + "Gemma4ForCausalLM", + "Gemma4ForConditionalGeneration", "Glm4ForCausalLM", "Glm4MoeForCausalLM", "Glm4MoeForCausalLMNextN", + "Glm4MoeLiteForCausalLM", "Glm4vForConditionalGeneration", "Glm4vMoeForConditionalGeneration", + "GlmAsrForConditionalGeneration", "GlmMoeDsaForCausalLM", "GlmOcrForConditionalGeneration", + "GlmOcrForConditionalGenerationNextN", "GptOssForCausalLM", "GraniteMoeHybridForCausalLM", "HunYuanDenseV1ForCausalLM", "HunYuanMoEV1ForCausalLM", "JetNemotronForCausalLM", "JetVLMForConditionalGeneration", + "KimiK25ForConditionalGeneration", "KimiLinearForCausalLM", + "KimiVLForConditionalGeneration", "LLaDA2MoeModelLM", + "LlavaForConditionalGeneration", "LlavaLlamaForCausalLM", "LlavaMistralForCausalLM", "LlavaQwenForCausalLM", "LongcatFlashForCausalLM", "LongcatFlashForCausalLMNextN", + "MiMoV2FlashForCausalLM", + "MiMoV2ForCausalLM", + "MiMoV2MTP", "MindSporeForCausalLM", "MiniMaxM2ForCausalLM", "Ministral3ForCausalLM", + "MistralForCausalLMEagle", "MistralLarge3ForCausalLM", "MistralLarge3ForCausalLMEagle", - "NemotronForCausalLM", "NemotronHForCausalLM", + "NemotronHForCausalLMMTP", + "NemotronH_Nano_Omni_Reasoning_V3", "NemotronH_Nano_VL_V2", "OPTForCausalLM", "OrionForCausalLM", "POINTSV15ChatModel", "PaddleOCRVLForConditionalGeneration", "Phi4FlashForCausalLM", - "Qwen3NextForCausalLM", + "Qwen2ForRewardModel", + "Qwen2ForSequenceClassification", "Qwen3_5MoeForConditionalGeneration", + "Qwen3ASRForConditionalGeneration", + "Qwen3ForRewardModel", + "Qwen3ForSequenceClassification", + "Qwen3NextForCausalLM", + "Qwen3NextForCausalLMMTP", + "Qwen3OmniMoeForConditionalGeneration", + "Qwen3VLForConditionalGeneration", + "Qwen3VLMoeForConditionalGeneration", + "Qwen3_5ForCausalLMMTP", "Qwen3_5ForConditionalGeneration", "Sarashina2VisionForCausalLM", "SmolLM3ForCausalLM", "SolarForCausalLM", "Starcoder2ForCausalLM", "Step3p5ForCausalLM", + "Step3p5MTP", "StepVLForConditionalGeneration", "TeleFLMForCausalLM", "VILAForConditionalGeneration" diff --git a/docker/inference/Dockerfile.sglang b/docker/inference/Dockerfile.sglang index 6fb574de..4f8b54c1 100644 --- a/docker/inference/Dockerfile.sglang +++ b/docker/inference/Dockerfile.sglang @@ -1,4 +1,4 @@ -FROM lmsysorg/sglang:v0.5.9-cu130 +FROM lmsysorg/sglang:v0.5.11-cu130 RUN pip config set global.index-url https://mirrors.aliyun.com/pypi/simple RUN sed -i 's|http://archive.ubuntu.com/ubuntu|http://mirrors.aliyun.com/ubuntu|g' /etc/apt/sources.list && \ sed -i 's|http://security.ubuntu.com/ubuntu|http://mirrors.aliyun.com/ubuntu|g' /etc/apt/sources.list && \