We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 2c3fe8e commit b26b54dCopy full SHA for b26b54d
1 file changed
configs/inference/vllm.json
@@ -235,6 +235,11 @@
235
"value": "0",
236
"format": "--cpu-offload-gb %s"
237
},
238
+ {
239
+ "name": "data-parallel-size",
240
+ "value": "1",
241
+ "format": "--data-parallel-size %s"
242
+ },
243
{
244
"name": "pipeline-parallel-size",
245
"value": "1",
@@ -285,6 +290,11 @@
285
290
"value": "disable",
286
291
"format": "--enable-auto-tool-choice"
287
292
293
294
+ "name": "enable-expert-parallel",
295
+ "value": "disable",
296
+ "format": "--enable-expert-parallel"
297
288
298
289
299
"name": "limit-mm-per-prompt",
300
"value": "image=5,video=5",
0 commit comments