-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgpu-preflight.service.ts
More file actions
106 lines (91 loc) · 2.82 KB
/
gpu-preflight.service.ts
File metadata and controls
106 lines (91 loc) · 2.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import type { HostCheckResult } from '../types/doctor.types.js';
import { runCommand } from '../utils/process.js';
interface DockerInfoPayload {
DiscoveredDevices?: Array<{
ID?: string;
}>;
Runtimes?: Record<string, unknown>;
}
/**
* Fails fast when the host does not expose an NVIDIA GPU to Docker.
*/
export async function assertNvidiaGpuRuntime(): Promise<void> {
const result = await checkNvidiaGpuRuntime();
if (!result.ok) {
throw new Error(result.detail);
}
}
/**
* Validates that Atlas Lab can access an NVIDIA GPU from Docker for Ollama inference.
*/
export async function checkNvidiaGpuRuntime(): Promise<HostCheckResult> {
const gpuProbe = await runCommand(
'nvidia-smi',
['--query-gpu=name', '--format=csv,noheader'],
{
allowFailure: true,
captureOutput: true,
scope: 'host'
}
);
const gpuNames = parseNvidiaGpuNames(gpuProbe.stdout);
if (gpuProbe.exitCode !== 0 || gpuNames.length === 0) {
return {
name: 'NVIDIA GPU',
ok: false,
detail: 'No NVIDIA GPU detected on the host. Atlas Lab now expects GPU-backed Ollama by default.'
};
}
const dockerInfo = await runCommand('docker', ['info', '--format', '{{json .}}'], {
allowFailure: true,
captureOutput: true,
scope: 'host'
});
if (dockerInfo.exitCode !== 0) {
return {
name: 'NVIDIA GPU',
ok: false,
detail: dockerInfo.stderr.trim() || dockerInfo.stdout.trim() || 'Could not inspect Docker GPU capabilities.'
};
}
if (!dockerInfoSupportsNvidiaGpu(dockerInfo.stdout)) {
return {
name: 'NVIDIA GPU',
ok: false,
detail: `Host GPU detected (${gpuNames.join(', ')}), but Docker does not expose an NVIDIA compute device. Enable NVIDIA GPU support in Docker before starting Atlas Lab.`
};
}
return {
name: 'NVIDIA GPU',
ok: true,
detail: `${gpuNames.join(', ')} available for Ollama inference`
};
}
/**
* Parses the GPU names reported by `nvidia-smi --query-gpu=name --format=csv,noheader`.
*/
export function parseNvidiaGpuNames(stdout: string): string[] {
return stdout
.split(/\r?\n/gu)
.map((line) => line.trim())
.filter((line) => line.length > 0);
}
/**
* Detects whether Docker advertises an NVIDIA-capable runtime or CDI compute device.
*/
export function dockerInfoSupportsNvidiaGpu(stdout: string): boolean {
let payload: DockerInfoPayload;
try {
payload = JSON.parse(stdout) as DockerInfoPayload;
} catch {
return false;
}
const runtimeNames = Object.keys(payload.Runtimes ?? {}).map((name) => name.toLowerCase());
if (runtimeNames.includes('nvidia')) {
return true;
}
return (payload.DiscoveredDevices ?? []).some((device) => {
const id = device.ID?.toLowerCase() ?? '';
return /nvidia\.com\/gpu/iu.test(id) || /docker\.com\/gpu=(?!webgpu)/iu.test(id);
});
}