Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions config/models/MiniMaxAI/MiniMax-M2.5.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
apiVersion: ome.io/v1beta1
kind: ClusterBaseModel
metadata:
name: minimax-m2-5
spec:
modelCapabilities:
- TEXT_TO_TEXT
vendor: MiniMaxAI
displayName: minimax.minimax-m2-5
modelArchitecture: MiniMaxM2ForCausalLM
disabled: false
version: "1.0.0"
modelFormat:
name: safetensors
version: "1.0.0"
modelFramework:
name: transformers
version: "4.46.1"
modelParameterSize: 229B
quantization: fp8
storage:
storageUri: hf://MiniMaxAI/MiniMax-M2.5
path: /raid/models/MiniMaxAI/MiniMax-M2.5
key: hf-token
24 changes: 24 additions & 0 deletions config/models/mistralai/Devstral-2-123B-Instruct-2512.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
apiVersion: ome.io/v1beta1
kind: ClusterBaseModel
metadata:
name: devstral-2-123b-instruct-2512
spec:
modelCapabilities:
- TEXT_TO_TEXT
vendor: mistralai
displayName: mistralai.devstral-2-123b-instruct-2512
modelArchitecture: Ministral3ForCausalLM
disabled: false
version: "1.0.0"
modelFormat:
name: safetensors
version: "1.0.0"
modelFramework:
name: transformers
version: "5.0.0.dev0"
modelParameterSize: 123B
quantization: fp8
storage:
storageUri: hf://mistralai/Devstral-2-123B-Instruct-2512
path: /raid/models/mistralai/Devstral-2-123B-Instruct-2512
key: hf-token
25 changes: 25 additions & 0 deletions config/models/mistralai/Devstral-Small-2-24B-Instruct-2512.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
apiVersion: ome.io/v1beta1
kind: ClusterBaseModel
metadata:
name: devstral-small-2-24b-instruct-2512
spec:
modelCapabilities:
- IMAGE_TEXT_TO_TEXT
- VIDEO_TEXT_TO_TEXT
vendor: mistralai
displayName: mistralai.devstral-small-2-24b-instruct-2512
modelArchitecture: Mistral3ForConditionalGeneration
disabled: false
version: "1.0.0"
modelFormat:
name: safetensors
version: "1.0.0"
modelFramework:
name: transformers
version: "5.0.0.dev0"
modelParameterSize: 24B
quantization: fp8
storage:
storageUri: hf://mistralai/Devstral-Small-2-24B-Instruct-2512
path: /raid/models/mistralai/Devstral-Small-2-24B-Instruct-2512
key: hf-token
8 changes: 8 additions & 0 deletions config/models/zai-org/GLM-4.5V.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,16 @@ spec:
- VIDEO_TEXT_TO_TEXT
vendor: zai-org
displayName: zai-org.glm-4.5v
modelArchitecture: Glm4vMoeForConditionalGeneration
disabled: false
version: "1.0.0"
modelFormat:
name: safetensors
version: "1.0.0"
modelFramework:
name: transformers
version: "4.57.1"
modelParameterSize: 106B
storage:
storageUri: hf://zai-org/GLM-4.5V
path: /raid/models/zai-org/GLM-4.5V
Expand Down
24 changes: 24 additions & 0 deletions config/models/zai-org/GLM-4.6-FP8.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
apiVersion: ome.io/v1beta1
kind: ClusterBaseModel
metadata:
name: glm-4-6-fp8
spec:
modelCapabilities:
- TEXT_TO_TEXT
vendor: zai-org
displayName: zai-org.glm-4.6-fp8
modelArchitecture: Glm4MoeForCausalLM
disabled: false
version: "1.0.0"
modelFormat:
name: safetensors
version: "1.0.0"
modelFramework:
name: transformers
version: "4.54.0"
modelParameterSize: 355B
quantization: fp8
storage:
storageUri: hf://zai-org/GLM-4.6-FP8
path: /raid/models/zai-org/GLM-4.6-FP8
key: hf-token
24 changes: 24 additions & 0 deletions config/models/zai-org/GLM-4.7-FP8.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
apiVersion: ome.io/v1beta1
kind: ClusterBaseModel
metadata:
name: glm-4-7-fp8
spec:
modelCapabilities:
- TEXT_TO_TEXT
vendor: zai-org
displayName: zai-org.glm-4.7-fp8
modelArchitecture: Glm4MoeForCausalLM
disabled: false
version: "1.0.0"
modelFormat:
name: safetensors
version: "1.0.0"
modelFramework:
name: transformers
version: "4.52.3"
modelParameterSize: 355B
quantization: fp8
storage:
storageUri: hf://zai-org/GLM-4.7-FP8
path: /raid/models/zai-org/GLM-4.7-FP8
key: hf-token
5 changes: 5 additions & 0 deletions config/runtimes/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ resources:
- srt/mistral-7b-instruct-rt.yaml
- srt/mixtral-8x7b-instruct-pd-rt.yaml
- srt/mixtral-8x7b-instruct-rt.yaml
- srt/zai-org/glm-text-fp8-tp8-rt.yaml
# vLLM runtimes
- vllm/e5-mistral-7b-instruct-rt.yaml
- vllm/llama-3-1-405b-instruct-fp8-rt.yaml
Expand All @@ -50,3 +51,7 @@ resources:
- vllm/llama-4-scout-17b-16e-instruct-rt.yaml
- vllm/mistral-7b-instruct-rt.yaml
- vllm/mixtral-8x7b-instruct-rt.yaml
- vllm/zai-org/glm-vl-tp4-rt.yaml
- vllm/minimax/minimax-m2-fp8-tp8-rt.yaml
- vllm/mistralai/devstral-2-123b-instruct-fp8-tp4-rt.yaml
- vllm/mistralai/devstral-small-2-24b-instruct-tp2-rt.yaml
160 changes: 0 additions & 160 deletions config/runtimes/srt/zai-org/glm-4-5v-rt.yaml

This file was deleted.

Loading
Loading