Skip to content

Commit 8b3fccf

Browse files
authored
Merge pull request MCSManager#2074 from TamakiIroha3/master
add docker gpu support
2 parents f19392e + 01e41cf commit 8b3fccf

File tree

11 files changed

+349
-7
lines changed

11 files changed

+349
-7
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ production-code/
1717
test.js
1818
.idea/
1919
ai-api.key
20+
.vs/
2021

2122
# Diagnostic reports (https://nodejs.org/api/report.html)
2223
report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json

common/global.d.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,14 @@ declare global {
102102
uploadSpeedLimit?: number;
103103
/** Download speed limit in KB/s */
104104
downloadSpeedLimit?: number;
105+
/** Whether to enable GPU passthrough */
106+
gpuEnabled?: boolean;
107+
/** GPU count: -1 = all GPUs, 0 = none, positive integer = specific count */
108+
gpuCount?: number;
109+
/** Specific GPU device IDs, e.g. ["0","1"] or ["GPU-xxxx"]. Mutually exclusive with gpuCount */
110+
gpuDeviceIds?: string[];
111+
/** GPU driver name, default "nvidia" */
112+
gpuDriver?: string;
105113
}
106114

107115
interface IPanelResponseProtocol {

daemon/src/entity/instance/Instance_config.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,11 @@ export default class InstanceConfig implements IGlobalInstanceConfig {
8686
workingDir: "/data",
8787
env: [],
8888
changeWorkdir: true,
89-
labels: []
89+
labels: [],
90+
gpuEnabled: false,
91+
gpuCount: -1,
92+
gpuDeviceIds: [],
93+
gpuDriver: "nvidia"
9094
};
9195

9296
public pingConfig = {

daemon/src/entity/instance/instance.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,10 @@ export default class Instance extends EventEmitter {
246246
configureEntityParams(this.config.docker, cfg.docker, "capDrop");
247247
configureEntityParams(this.config.docker, cfg.docker, "devices");
248248
configureEntityParams(this.config.docker, cfg.docker, "privileged", Boolean);
249+
configureEntityParams(this.config.docker, cfg.docker, "gpuEnabled", Boolean);
250+
configureEntityParams(this.config.docker, cfg.docker, "gpuCount", Number);
251+
configureEntityParams(this.config.docker, cfg.docker, "gpuDeviceIds");
252+
configureEntityParams(this.config.docker, cfg.docker, "gpuDriver", String);
249253
}
250254
if (cfg.pingConfig) {
251255
configureEntityParams(this.config.pingConfig, cfg.pingConfig, "ip", String);

daemon/src/service/docker_process_service.ts

Lines changed: 79 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,80 @@ export class SetupDockerContainer extends AsyncTask {
191191

192192
const privileged = dockerConfig.privileged || false;
193193

194+
// GPU DeviceRequests
195+
let gpuDeviceRequests: any[] | undefined = undefined;
196+
if (dockerConfig.gpuEnabled) {
197+
const gpuCount = dockerConfig.gpuCount ?? -1;
198+
const gpuDeviceIds = dockerConfig.gpuDeviceIds ?? [];
199+
const gpuDriver = dockerConfig.gpuDriver ?? "";
200+
201+
// Validate gpuCount: must be integer >= -1 and <= 128 (reasonable upper bound)
202+
if (!Number.isInteger(gpuCount) || gpuCount < -1 || gpuCount > 128) {
203+
throw new Error(
204+
$t("TXT_CODE_gpu_invalid_count", { v: String(gpuCount) })
205+
);
206+
}
207+
208+
// Validate gpuDeviceIds: each item must be non-empty and contain only [a-zA-Z0-9_-]
209+
if (gpuDeviceIds.length > 128) {
210+
throw new Error(
211+
$t("TXT_CODE_gpu_invalid_device_id", { v: `(${gpuDeviceIds.length} items)` })
212+
);
213+
}
214+
const gpuIdPattern = /^[a-zA-Z0-9_-]+$/;
215+
for (const id of gpuDeviceIds) {
216+
if (typeof id !== "string" || !id.trim() || id.length > 128 || !gpuIdPattern.test(id)) {
217+
throw new Error(
218+
$t("TXT_CODE_gpu_invalid_device_id", { v: id })
219+
);
220+
}
221+
}
222+
223+
// Validate gpuDriver: if set, must contain only letters and digits, max 32 chars
224+
if (gpuDriver && (gpuDriver.length > 32 || !/^[a-zA-Z0-9]+$/.test(gpuDriver))) {
225+
throw new Error(
226+
$t("TXT_CODE_gpu_invalid_driver", { v: gpuDriver })
227+
);
228+
}
229+
230+
// Conflict check: gpuDeviceIds and gpuCount > 0 are mutually exclusive
231+
if (gpuDeviceIds.length > 0 && gpuCount > 0) {
232+
throw new Error($t("TXT_CODE_gpu_conflict_count_and_ids"));
233+
}
234+
235+
// Conflict check: gpuCount === 0 and no deviceIds => effectively disabled
236+
if (gpuCount === 0 && gpuDeviceIds.length === 0) {
237+
logger.warn(
238+
`[SetupDockerContainer] GPU enabled but gpuCount=0 and no deviceIds specified, GPU will not be allocated. Instance: ${instance.instanceUuid}`
239+
);
240+
} else {
241+
// Warn if privileged mode is also enabled
242+
if (privileged) {
243+
logger.warn(
244+
`[SetupDockerContainer] GPU passthrough is configured alongside privileged mode. ` +
245+
`In privileged mode the container already has access to all host devices. Instance: ${instance.instanceUuid}`
246+
);
247+
}
248+
249+
const deviceRequest: any = {
250+
Driver: gpuDriver,
251+
Capabilities: [["gpu"]],
252+
Options: {}
253+
};
254+
255+
if (gpuDeviceIds.length > 0) {
256+
// Specific device IDs take priority, Count must be 0
257+
deviceRequest.DeviceIDs = gpuDeviceIds;
258+
deviceRequest.Count = 0;
259+
} else {
260+
// Allocate by count (-1 = all, positive integer = specific count)
261+
deviceRequest.Count = gpuCount;
262+
}
263+
264+
gpuDeviceRequests = [deviceRequest];
265+
}
266+
}
267+
194268
let cwd = instance.absoluteCwdPath();
195269
const defaultInstanceDir = InstanceSubsystem.getInstanceDataDir();
196270
const hostRealPath = toText(process.env.MCSM_DOCKER_WORKSPACE_PATH);
@@ -233,6 +307,9 @@ export class SetupDockerContainer extends AsyncTask {
233307
memorySwap ? (memorySwap / 1024 / 1024).toFixed(2) : "--"
234308
} MB`
235309
);
310+
logger.info(
311+
`GPU: ${gpuDeviceRequests ? JSON.stringify(gpuDeviceRequests) : "disabled"}`
312+
);
236313

237314
if (workingDir) {
238315
instance.println("INFO", $t("TXT_CODE_e76e49e9") + cwd + " --> " + workingDir + "\n");
@@ -325,7 +402,8 @@ export class SetupDockerContainer extends AsyncTask {
325402
CapAdd: capAdd,
326403
CapDrop: capDrop,
327404
Devices: parsedDevices,
328-
Privileged: privileged
405+
Privileged: privileged,
406+
DeviceRequests: gpuDeviceRequests
329407
},
330408
// Only set NetworkingConfig for non-host network modes
331409
// host mode uses the host's network stack and doesn't support EndpointsConfig

frontend/src/types/const.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,11 @@ export const defaultDockerConfig: IGlobalInstanceDockerConfig = {
145145
changeWorkdir: true,
146146
memorySwap: undefined,
147147
memorySwappiness: undefined,
148-
labels: []
148+
labels: [],
149+
gpuEnabled: false,
150+
gpuCount: -1,
151+
gpuDeviceIds: [],
152+
gpuDriver: "nvidia"
149153
};
150154

151155
export const defaultInstanceJavaConfig: IInstanceJavaConfig = {

frontend/src/widgets/instance/dialogs/InstanceDetail.vue

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,8 @@ const initFormDetail = () => {
184184
imageSelectMethod: "SELECT"
185185
};
186186
}
187+
initGpuAllocMode();
188+
initGpuDeviceIdsText();
187189
};
188190
189191
const VNodes = defineComponent({
@@ -229,6 +231,46 @@ const isGlobalTerminal = computed(() => {
229231
230232
const isDockerMode = computed(() => formData?.value?.instance?.config?.processType === "docker");
231233
234+
// GPU allocation mode: "all" | "count" | "deviceIds"
235+
const gpuAllocMode = ref<"all" | "count" | "deviceIds">("all");
236+
237+
const initGpuAllocMode = () => {
238+
const docker = formData.value.instance?.config?.docker;
239+
if (!docker) return;
240+
if (docker.gpuDeviceIds && docker.gpuDeviceIds.length > 0) {
241+
gpuAllocMode.value = "deviceIds";
242+
} else if (
243+
typeof docker.gpuCount === "number" &&
244+
docker.gpuCount >= 1
245+
) {
246+
gpuAllocMode.value = "count";
247+
} else {
248+
gpuAllocMode.value = "all";
249+
}
250+
};
251+
252+
const gpuDeviceIdsText = ref("");
253+
const initGpuDeviceIdsText = () => {
254+
const docker = formData.value.instance?.config?.docker;
255+
gpuDeviceIdsText.value = docker?.gpuDeviceIds?.join(",") || "";
256+
};
257+
258+
const onGpuAllocModeChange = () => {
259+
const docker = formData.value.instance?.config?.docker;
260+
if (!docker) return;
261+
if (gpuAllocMode.value === "all") {
262+
docker.gpuCount = -1;
263+
docker.gpuDeviceIds = [];
264+
gpuDeviceIdsText.value = "";
265+
} else if (gpuAllocMode.value === "count") {
266+
docker.gpuCount = 1;
267+
docker.gpuDeviceIds = [];
268+
gpuDeviceIdsText.value = "";
269+
} else if (gpuAllocMode.value === "deviceIds") {
270+
docker.gpuCount = 0;
271+
}
272+
};
273+
232274
const loadNetworkModes = async () => {
233275
try {
234276
const modes = await executeGetNetworkModeList({
@@ -306,6 +348,28 @@ const encodeFormData = () => {
306348
?.split(",")
307349
?.map((v) => v.trim())
308350
?.filter((v) => v !== "");
351+
352+
// Encode GPU device IDs from comma-separated text
353+
if (gpuAllocMode.value === "deviceIds") {
354+
postData.config.docker.gpuDeviceIds = gpuDeviceIdsText.value
355+
.split(",")
356+
.map((v) => v.trim())
357+
.filter((v) => v !== "");
358+
postData.config.docker.gpuCount = 0;
359+
} else if (gpuAllocMode.value === "all") {
360+
postData.config.docker.gpuCount = -1;
361+
postData.config.docker.gpuDeviceIds = [];
362+
} else {
363+
// count mode: gpuCount is already set via input
364+
postData.config.docker.gpuDeviceIds = [];
365+
}
366+
367+
// If GPU is disabled, clear all GPU sub-fields
368+
if (!postData.config.docker.gpuEnabled) {
369+
postData.config.docker.gpuCount = -1;
370+
postData.config.docker.gpuDeviceIds = [];
371+
}
372+
309373
return postData;
310374
}
311375
throw new Error("Ref Options is null");
@@ -1459,6 +1523,99 @@ defineExpose({
14591523
/>
14601524
</a-form-item>
14611525
</a-col>
1526+
1527+
<!-- GPU Configuration -->
1528+
<a-col :xs="24" :offset="0">
1529+
<a-divider orientation="left">{{ t("TXT_CODE_gpu_section_title") }}</a-divider>
1530+
</a-col>
1531+
<a-col :xs="24" :lg="8" :offset="0">
1532+
<a-form-item>
1533+
<a-typography-title :level="5">{{ t("TXT_CODE_gpu_enable") }}</a-typography-title>
1534+
<a-typography-paragraph>
1535+
<a-tooltip :title="t('TXT_CODE_gpu_enable_help')" placement="top">
1536+
<a-typography-text type="secondary" class="typography-text-ellipsis">
1537+
{{ t("TXT_CODE_gpu_enable_help") }}
1538+
</a-typography-text>
1539+
</a-tooltip>
1540+
</a-typography-paragraph>
1541+
<a-switch
1542+
v-model:checked="formData.instance.config.docker.gpuEnabled"
1543+
:checked-children="t('TXT_CODE_gpu_enabled')"
1544+
:un-checked-children="t('TXT_CODE_gpu_disabled')"
1545+
/>
1546+
</a-form-item>
1547+
</a-col>
1548+
<template v-if="formData.instance?.config?.docker?.gpuEnabled">
1549+
<a-col :xs="24" :lg="8" :offset="0">
1550+
<a-form-item>
1551+
<a-typography-title :level="5">{{ t("TXT_CODE_gpu_driver") }}</a-typography-title>
1552+
<a-typography-paragraph>
1553+
<a-tooltip :title="t('TXT_CODE_gpu_driver_help')" placement="top">
1554+
<a-typography-text type="secondary" class="typography-text-ellipsis">
1555+
{{ t("TXT_CODE_gpu_driver_help") }}
1556+
</a-typography-text>
1557+
</a-tooltip>
1558+
</a-typography-paragraph>
1559+
<a-input
1560+
v-model:value="formData.instance.config.docker.gpuDriver"
1561+
:placeholder="t('TXT_CODE_gpu_driver_placeholder')"
1562+
/>
1563+
</a-form-item>
1564+
</a-col>
1565+
<a-col :xs="24" :lg="8" :offset="0">
1566+
<a-form-item>
1567+
<a-typography-title :level="5">{{ t("TXT_CODE_gpu_alloc_mode") }}</a-typography-title>
1568+
<a-typography-paragraph>
1569+
<a-tooltip :title="t('TXT_CODE_gpu_alloc_mode_help')" placement="top">
1570+
<a-typography-text type="secondary" class="typography-text-ellipsis">
1571+
{{ t("TXT_CODE_gpu_alloc_mode_help") }}
1572+
</a-typography-text>
1573+
</a-tooltip>
1574+
</a-typography-paragraph>
1575+
<a-radio-group v-model:value="gpuAllocMode" @change="onGpuAllocModeChange">
1576+
<a-radio-button value="all">{{ t("TXT_CODE_gpu_alloc_all") }}</a-radio-button>
1577+
<a-radio-button value="count">{{ t("TXT_CODE_gpu_alloc_count") }}</a-radio-button>
1578+
<a-radio-button value="deviceIds">{{ t("TXT_CODE_gpu_alloc_device_ids") }}</a-radio-button>
1579+
</a-radio-group>
1580+
</a-form-item>
1581+
</a-col>
1582+
<a-col v-if="gpuAllocMode === 'count'" :xs="24" :lg="8" :offset="0">
1583+
<a-form-item>
1584+
<a-typography-title :level="5">{{ t("TXT_CODE_gpu_count") }}</a-typography-title>
1585+
<a-typography-paragraph>
1586+
<a-tooltip :title="t('TXT_CODE_gpu_count_help')" placement="top">
1587+
<a-typography-text type="secondary" class="typography-text-ellipsis">
1588+
{{ t("TXT_CODE_gpu_count_help") }}
1589+
</a-typography-text>
1590+
</a-tooltip>
1591+
</a-typography-paragraph>
1592+
<a-input-number
1593+
v-model:value="formData.instance.config.docker.gpuCount"
1594+
:min="1"
1595+
:max="128"
1596+
:precision="0"
1597+
style="width: 100%"
1598+
:placeholder="t('TXT_CODE_gpu_count_placeholder')"
1599+
/>
1600+
</a-form-item>
1601+
</a-col>
1602+
<a-col v-if="gpuAllocMode === 'deviceIds'" :xs="24" :lg="16" :offset="0">
1603+
<a-form-item>
1604+
<a-typography-title :level="5">{{ t("TXT_CODE_gpu_device_ids") }}</a-typography-title>
1605+
<a-typography-paragraph>
1606+
<a-tooltip :title="t('TXT_CODE_gpu_device_ids_help')" placement="top">
1607+
<a-typography-text type="secondary" class="typography-text-ellipsis">
1608+
{{ t("TXT_CODE_gpu_device_ids_help") }}
1609+
</a-typography-text>
1610+
</a-tooltip>
1611+
</a-typography-paragraph>
1612+
<a-input
1613+
v-model:value="gpuDeviceIdsText"
1614+
:placeholder="t('TXT_CODE_gpu_device_ids_placeholder')"
1615+
/>
1616+
</a-form-item>
1617+
</a-col>
1618+
</template>
14621619
</a-row>
14631620
</a-tab-pane>
14641621
</a-tabs>

languages/en_US.json

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3400,5 +3400,28 @@
34003400
"TXT_CODE_SSO_ERROR_AUTH_FAILED": "SSO Authentication Failed",
34013401
"TXT_CODE_SSO_ERROR_SESSION_EXPIRED": "SSO Session Expired",
34023402
"TXT_CODE_bdb9f7bb": "Network rate limiting failed to enable! Please ensure the control panel can read system commands or network adapter information, such as tc, ip, modprobe, ifb, etc., or choose to disable network rate limiting for this instance in the settings.",
3403-
"TXT_CODE_49731eec": "Network rate limiting failed to enable! Please ensure the control panel can read container information or network adapter information, or choose to disable network rate limiting for this instance in the settings."
3403+
"TXT_CODE_49731eec": "Network rate limiting failed to enable! Please ensure the control panel can read container information or network adapter information, or choose to disable network rate limiting for this instance in the settings.",
3404+
"TXT_CODE_gpu_section_title": "GPU Configuration",
3405+
"TXT_CODE_gpu_enable": "Enable GPU",
3406+
"TXT_CODE_gpu_enable_help": "Enable GPU passthrough for this Docker container. Requires NVIDIA Container Toolkit installed on the host.",
3407+
"TXT_CODE_gpu_enabled": "Enabled",
3408+
"TXT_CODE_gpu_disabled": "Disabled",
3409+
"TXT_CODE_gpu_driver": "GPU Driver",
3410+
"TXT_CODE_gpu_driver_help": "GPU driver name, typically nvidia. Only letters and digits are allowed.",
3411+
"TXT_CODE_gpu_driver_placeholder": "e.g., nvidia",
3412+
"TXT_CODE_gpu_alloc_mode": "GPU Allocation Mode",
3413+
"TXT_CODE_gpu_alloc_mode_help": "Choose how to allocate GPUs: all available GPUs, a specific count, or specific device IDs. These options are mutually exclusive.",
3414+
"TXT_CODE_gpu_alloc_all": "All GPUs",
3415+
"TXT_CODE_gpu_alloc_count": "By Count",
3416+
"TXT_CODE_gpu_alloc_device_ids": "By Device ID",
3417+
"TXT_CODE_gpu_count": "GPU Count",
3418+
"TXT_CODE_gpu_count_help": "Number of GPUs to allocate, must be a positive integer.",
3419+
"TXT_CODE_gpu_count_placeholder": "e.g., 1",
3420+
"TXT_CODE_gpu_device_ids": "GPU Device IDs",
3421+
"TXT_CODE_gpu_device_ids_help": "Comma-separated GPU device IDs. Each ID may only contain letters, digits, hyphens, and underscores. e.g., 0,1 or GPU-xxxx",
3422+
"TXT_CODE_gpu_device_ids_placeholder": "e.g., 0,1 or GPU-abc123",
3423+
"TXT_CODE_gpu_invalid_count": "Invalid GPU count: {{v}}. Must be an integer >= -1.",
3424+
"TXT_CODE_gpu_invalid_device_id": "Invalid GPU device ID: {{v}}. Only letters, digits, hyphens and underscores are allowed.",
3425+
"TXT_CODE_gpu_invalid_driver": "Invalid GPU driver name: {{v}}. Only letters and digits are allowed.",
3426+
"TXT_CODE_gpu_conflict_count_and_ids": "GPU configuration conflict: cannot specify both GPU count and device IDs at the same time."
34043427
}

0 commit comments

Comments
 (0)