Skip to content

Commit 1a7e17b

Browse files
committed
Fix Host's GPU metrics calculation
1 parent 4f316bc commit 1a7e17b

7 files changed

Lines changed: 95 additions & 149 deletions

File tree

plugins/hypervisors/simulator/src/main/java/com/cloud/agent/manager/MockAgentManagerImpl.java

Lines changed: 28 additions & 124 deletions
Original file line numberDiff line numberDiff line change
@@ -245,66 +245,32 @@ public Map<AgentResourceBase, Map<String, String>> createServerResources(Map<Str
245245
*/
246246
private void createPhysicalGpuDevices(long hostId) {
247247
// 1. Create passthrough-only GPU devices across different NUMA nodes and PCI roots
248-
createPassthroughGpu(hostId, "00:01.0", "1234", "5678", "Apache CloudStack Simulator",
249-
"Simulated Graphics Card Pro", 0, "pci0000:00", 8192L, 4096L, 2160L, 1L);
250-
createPassthroughGpu(hostId, "00:02.0", "1234", "5678", "Apache CloudStack Simulator",
251-
"Simulated Graphics Card Pro", 0, "pci0000:00", 8192L, 4096L, 2160L, 1L);
252-
createPassthroughGpu(hostId, "00:03.0", "1234", "6789", "Apache CloudStack Simulator",
253-
"Simulated Graphics Card Basic", 1, "pci0000:00", 4096L, 1920L, 1080L, 2L);
254-
createPassthroughGpu(hostId, "00:04.0", "1234", "6789", "Apache CloudStack Simulator",
255-
"Simulated Graphics Card Basic", 1, "pci0000:00", 4096L, 1920L, 1080L, 2L);
256-
248+
createPassthroughGpu(hostId, "00:01.0", "1234", "5678", "ACS",
249+
"Simulated Graphics Card Basic", 0, "pci0000:00", 8192L, 4096L, 2160L, 1L);
250+
createPassthroughGpu(hostId, "00:02.0", "1234", "5678", "ACS",
251+
"Simulated Graphics Card Basic", 0, "pci0000:00", 8192L, 4096L, 2160L, 1L);
257252
// Additional passthrough GPUs on different PCI roots
258-
createPassthroughGpu(hostId, "17:00.0", "1234", "5678", "Apache CloudStack Simulator",
259-
"Simulated Graphics Card Pro", 2, "pci0000:17", 8192L, 4096L, 2160L, 1L);
260-
createPassthroughGpu(hostId, "3a:00.0", "1234", "6789", "Apache CloudStack Simulator",
261-
"Simulated Graphics Card Basic", 3, "pci0000:3a", 4096L, 1920L, 1080L, 2L);
262-
createPassthroughGpu(hostId, "5d:00.0", "1234", "5678", "Apache CloudStack Simulator",
263-
"Simulated Graphics Card Pro", 2, "pci0000:5d", 8192L, 4096L, 2160L, 1L);
264-
createPassthroughGpu(hostId, "80:00.0", "1234", "6789", "Apache CloudStack Simulator",
265-
"Simulated Graphics Card Basic", 3, "pci0000:80", 4096L, 1920L, 1080L, 2L);
266-
267-
// 2. Create GPUs with Virtual Functions support on different NUMA nodes
268-
MockGpuDeviceVO vfParentDevice1 = createVfCapableGpu(hostId, "00:05.0", "1234", "789a", "Apache CloudStack Simulator",
269-
"Simulated Graphics Card VF", 0, "pci0000:00");
270-
createVirtualFunctions(hostId, vfParentDevice1);
271-
272-
MockGpuDeviceVO vfParentDevice2 = createVfCapableGpu(hostId, "00:06.0", "1234", "789a", "Apache CloudStack Simulator",
273-
"Simulated Graphics Card VF", 1, "pci0000:00");
274-
createVirtualFunctions(hostId, vfParentDevice2);
275-
276-
MockGpuDeviceVO vfParentDevice3 = createVfCapableGpu(hostId, "17:01.0", "1234", "789a", "Apache CloudStack Simulator",
277-
"Simulated Graphics Card VF", 2, "pci0000:17");
278-
createVirtualFunctions(hostId, vfParentDevice3);
279-
280-
MockGpuDeviceVO vfParentDevice4 = createVfCapableGpu(hostId, "3a:01.0", "1234", "789a", "Apache CloudStack Simulator",
281-
"Simulated Graphics Card VF", 3, "pci0000:3a");
282-
createVirtualFunctions(hostId, vfParentDevice4);
253+
createPassthroughGpu(hostId, "00:03.0", "1234", "5678", "ACS",
254+
"Simulated Graphics Card Basic", 1, "pci0000:17", 8192L, 4096L, 2160L, 1L);
255+
createPassthroughGpu(hostId, "00:04.0", "1234", "5678", "ACS",
256+
"Simulated Graphics Card Basic", 1, "pci0000:5d", 8192L, 4096L, 2160L, 1L);
283257

284258
// 3. Create GPUs with MDEV support across different NUMA nodes
285-
MockGpuDeviceVO mdevParentDevice1 = createMdevCapableGpu(hostId, "00:07.0", "1234", "89ab", "Apache CloudStack Simulator",
286-
"Simulated Graphics Card MDEV", 0, "pci0000:00");
259+
MockGpuDeviceVO mdevParentDevice1 = createMdevCapableGpu(hostId, "00:05.0", "1234", "89ab", "ACS",
260+
"Simulated Graphics Card Pro", 0, "pci0000:00");
287261
createMdevDevices(hostId, mdevParentDevice1);
288262

289-
MockGpuDeviceVO mdevParentDevice2 = createMdevCapableGpu(hostId, "00:08.0", "1234", "89ab", "Apache CloudStack Simulator",
290-
"Simulated Graphics Card MDEV", 1, "pci0000:00");
263+
MockGpuDeviceVO mdevParentDevice2 = createMdevCapableGpu(hostId, "00:06.0", "1234", "89ab", "ACS",
264+
"Simulated Graphics Card Pro", 0, "pci0000:00");
291265
createMdevDevices(hostId, mdevParentDevice2);
292266

293-
MockGpuDeviceVO mdevParentDevice3 = createMdevCapableGpu(hostId, "17:02.0", "1234", "89ab", "Apache CloudStack Simulator",
294-
"Simulated Graphics Card MDEV", 2, "pci0000:17");
267+
MockGpuDeviceVO mdevParentDevice3 = createMdevCapableGpu(hostId, "00:07.0", "1234", "89ab", "ACS",
268+
"Simulated Graphics Card Pro", 1, "pci0000:17");
295269
createMdevDevices(hostId, mdevParentDevice3);
296270

297-
MockGpuDeviceVO mdevParentDevice4 = createMdevCapableGpu(hostId, "3a:02.0", "1234", "89ab", "Apache CloudStack Simulator",
298-
"Simulated Graphics Card MDEV", 3, "pci0000:3a");
271+
MockGpuDeviceVO mdevParentDevice4 = createMdevCapableGpu(hostId, "00:08.0", "1234", "89ab", "ACS",
272+
"Simulated Graphics Card Pro", 1, "pci0000:3a");
299273
createMdevDevices(hostId, mdevParentDevice4);
300-
301-
MockGpuDeviceVO mdevParentDevice5 = createMdevCapableGpu(hostId, "5d:01.0", "1234", "89ab", "Apache CloudStack Simulator",
302-
"Simulated Graphics Card MDEV", 2, "pci0000:5d");
303-
createMdevDevices(hostId, mdevParentDevice5);
304-
305-
MockGpuDeviceVO mdevParentDevice6 = createMdevCapableGpu(hostId, "80:01.0", "1234", "89ab", "Apache CloudStack Simulator",
306-
"Simulated Graphics Card MDEV", 3, "pci0000:80");
307-
createMdevDevices(hostId, mdevParentDevice6);
308274
}
309275

310276
/**
@@ -332,67 +298,6 @@ private void createPassthroughGpu(long hostId, String busAddress, String vendorI
332298
_mockGpuDeviceDao.persist(device);
333299
}
334300

335-
/**
336-
* Creates a GPU device capable of Virtual Functions
337-
*/
338-
private MockGpuDeviceVO createVfCapableGpu(long hostId, String busAddress, String vendorId, String deviceId,
339-
String vendorName, String deviceName, Integer numaNode, String pciRoot) {
340-
MockGpuDeviceVO device = new MockGpuDeviceVO();
341-
device.setBusAddress(busAddress);
342-
device.setVendorId(vendorId);
343-
device.setDeviceId(deviceId);
344-
device.setVendorName(vendorName);
345-
device.setDeviceName(deviceName);
346-
device.setHostId(hostId);
347-
device.setState(MockGpuDevice.State.Available);
348-
device.setDeviceType(GpuDevice.DeviceType.PCI);
349-
device.setProfileName("passthrough");
350-
device.setPassthroughEnabled(false);
351-
device.setNumaNode(numaNode);
352-
device.setPciRoot(pciRoot);
353-
MockGpuDeviceVO savedDevice = _mockGpuDeviceDao.persist(device);
354-
return savedDevice;
355-
}
356-
357-
/**
358-
* Creates Virtual Function devices as children of a VF-capable GPU
359-
*/
360-
private void createVirtualFunctions(long hostId, MockGpuDeviceVO parentDevice) {
361-
// Create 3 Virtual Function devices
362-
String[] vfProfiles = {"VF-P1", "VF-P2", "VF-P2"};
363-
Long[] maxVgpuPerGpu = {2L, 4L, 4L};
364-
String parentBussAdressPrefix = parentDevice.getBusAddress().substring(0, parentDevice.getBusAddress().length() - 1);
365-
String[] vfAddresses = {parentBussAdressPrefix + "1", parentBussAdressPrefix + "2", parentBussAdressPrefix + "3"};
366-
Long[] videoRam = {8192L, 4096L, 4096L};
367-
Long[] resoultionX = {4096L, 1920L, 1920L};
368-
Long[] resoultionY = {2160L, 1080L, 1080L};
369-
Long[] maxHeads = {1L, 2L, 2L};
370-
371-
for (int i = 0; i < vfAddresses.length; i++) {
372-
MockGpuDeviceVO vfDevice = new MockGpuDeviceVO();
373-
vfDevice.setBusAddress(vfAddresses[i]);
374-
vfDevice.setVendorId("1234");
375-
vfDevice.setDeviceId("789a"); // Different device ID for VF
376-
vfDevice.setVendorName("Apache CloudStack Simulator");
377-
vfDevice.setDeviceName("Simulated Graphics Card VF");
378-
vfDevice.setHostId(hostId);
379-
vfDevice.setState(MockGpuDevice.State.Available);
380-
vfDevice.setDeviceType(GpuDevice.DeviceType.PCI);
381-
vfDevice.setParentDeviceId(parentDevice.getId());
382-
vfDevice.setProfileName(vfProfiles[i]);
383-
vfDevice.setPassthroughEnabled(true);
384-
vfDevice.setMaxVgpuPerPgpu(maxVgpuPerGpu[i]);
385-
vfDevice.setVideoRam(videoRam[i]);
386-
vfDevice.setMaxResolutionX(resoultionX[i]);
387-
vfDevice.setMaxResolutionY(resoultionY[i]);
388-
vfDevice.setMaxHeads(maxHeads[i]);
389-
// VF devices inherit NUMA node and PCI root from parent
390-
vfDevice.setNumaNode(parentDevice.getNumaNode());
391-
vfDevice.setPciRoot(parentDevice.getPciRoot());
392-
_mockGpuDeviceDao.persist(vfDevice);
393-
}
394-
}
395-
396301
/**
397302
* Creates a GPU device capable of MDEV (Mediated Devices)
398303
*/
@@ -411,31 +316,30 @@ private MockGpuDeviceVO createMdevCapableGpu(long hostId, String busAddress, Str
411316
device.setPassthroughEnabled(false); // MDEV parent doesn't use passthrough
412317
device.setNumaNode(numaNode);
413318
device.setPciRoot(pciRoot);
414-
MockGpuDeviceVO savedDevice = _mockGpuDeviceDao.persist(device);
415-
return savedDevice;
319+
return _mockGpuDeviceDao.persist(device);
416320
}
417321

418322
/**
419323
* Creates MDEV devices with different profiles
420324
*/
421325
private void createMdevDevices(long hostId, MockGpuDeviceVO parentDevice) {
422326
// Create MDEV devices with different profiles (similar to NVIDIA/Intel naming)
423-
String[] mdevProfiles = { "sim-8q", "sim-4q", "sim-2q", "sim-1q", "sim-1q" };
327+
String[] mdevProfiles = { "sim-8q", "sim-4q", "sim-2q", "sim-2q"};
424328
String[] mdevUuids = {UUID.randomUUID().toString(), UUID.randomUUID().toString(), UUID.randomUUID().toString(),
425-
UUID.randomUUID().toString(), UUID.randomUUID().toString()};
426-
Long[] videoRam = {8192L, 4096L, 2048L, 1024L, 1024L};
427-
Long[] resoultionX = {4096L, 1920L, 1920L, 1920L, 1920L};
428-
Long[] resoultionY = {2160L, 1080L, 1080L, 1080L, 1080L};
429-
Long[] maxVgpuPerGpu = {2L, 4L, 8L, 16L, 16L};
430-
Long[] maxHeads = {1L, 2L, 4L, 8L, 8L};
329+
UUID.randomUUID().toString()};
330+
Long[] videoRam = {8192L, 4096L, 2048L, 2048L};
331+
Long[] resoultionX = {4096L, 1920L, 1920L, 1920L};
332+
Long[] resoultionY = {2160L, 1080L, 1080L, 1080L};
333+
Long[] maxVgpuPerGpu = {2L, 4L, 8L, 8L};
334+
Long[] maxHeads = {4L, 2L, 1L, 1L};
431335

432336
for (int i = 0; i < mdevProfiles.length; i++) {
433337
MockGpuDeviceVO mdevDevice = new MockGpuDeviceVO();
434338
mdevDevice.setBusAddress(mdevUuids[i]); // MDEV uses UUID as bus address
435-
mdevDevice.setVendorId("1234");
436-
mdevDevice.setDeviceId("89ab"); // Different device ID for MDEV
437-
mdevDevice.setVendorName("Apache CloudStack Simulator");
438-
mdevDevice.setDeviceName("Simulated Graphics Card MDEV");
339+
mdevDevice.setVendorId(parentDevice.getVendorId());
340+
mdevDevice.setDeviceId(parentDevice.getDeviceId());
341+
mdevDevice.setVendorName(parentDevice.getVendorName());
342+
mdevDevice.setDeviceName(parentDevice.getDeviceName() + " " + mdevProfiles[i]);
439343
mdevDevice.setHostId(hostId);
440344
mdevDevice.setState(MockGpuDevice.State.Available);
441345
mdevDevice.setDeviceType(GpuDevice.DeviceType.MDEV);

server/src/main/java/com/cloud/api/ApiResponseHelper.java

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2156,15 +2156,13 @@ public List<CapacityResponse> createCapacityResponse(List<? extends Capacity> re
21562156
result.get(0).getPodId(), result.get(0).getClusterId())) != null) {
21572157
HashMap<String, Long> vgpuVMs = ApiDBUtils.getVgpuVmsCount(result.get(0).getDataCenterId(), result.get(0).getPodId(), result.get(0).getClusterId());
21582158

2159-
float capacityUsed = 0;
2159+
long capacityUsed = 0;
21602160
long capacityMax = 0;
21612161
for (VgpuTypesInfo capacity : gpuCapacities) {
21622162
if (vgpuVMs.containsKey(capacity.getGroupName().concat(capacity.getModelName()))) {
2163-
capacityUsed += (float)vgpuVMs.get(capacity.getGroupName().concat(capacity.getModelName())) / capacity.getMaxVpuPerGpu();
2164-
}
2165-
if (capacity.getModelName().equals(GPU.GPUType.passthrough.toString())) {
2166-
capacityMax += capacity.getMaxCapacity();
2163+
capacityUsed += vgpuVMs.get(capacity.getGroupName().concat(capacity.getModelName()));
21672164
}
2165+
capacityMax += capacity.getMaxCapacity();
21682166
}
21692167

21702168
DataCenter zone = ApiDBUtils.findZoneById(result.get(0).getDataCenterId());
@@ -2185,11 +2183,11 @@ public List<CapacityResponse> createCapacityResponse(List<? extends Capacity> re
21852183
}
21862184
capacityResponse.setCapacityType(Capacity.CAPACITY_TYPE_GPU);
21872185
capacityResponse.setCapacityName(CapacityVO.getCapacityName(Capacity.CAPACITY_TYPE_GPU));
2188-
capacityResponse.setCapacityUsed((long)Math.ceil(capacityUsed));
2189-
capacityResponse.setCapacityAllocated((long)Math.ceil(capacityUsed));
2186+
capacityResponse.setCapacityUsed(capacityUsed);
2187+
capacityResponse.setCapacityAllocated(capacityUsed);
21902188
capacityResponse.setCapacityTotal(capacityMax);
21912189
if (capacityMax > 0) {
2192-
capacityResponse.setPercentUsed(format.format(capacityUsed / capacityMax * 100f));
2190+
capacityResponse.setPercentUsed(format.format((float)capacityUsed / capacityMax * 100f));
21932191
} else {
21942192
capacityResponse.setPercentUsed(format.format(0));
21952193
}

server/src/main/java/org/apache/cloudstack/gpu/GpuServiceImpl.java

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -827,10 +827,6 @@ public HashMap<String, HashMap<String, VgpuTypesInfo>> getGpuGroupDetailsFromGpu
827827
maxCapacity = 0L;
828828
remainingCapacity = 0L;
829829
}
830-
if (GpuDevice.State.Free.equals(device.getState()) && GpuDevice.ManagedState.Managed.equals(
831-
device.getManagedState())) {
832-
gpuDeviceInfo.setRemainingCapacity(gpuDeviceInfo.getRemainingCapacity() + 1);
833-
}
834830
gpuDeviceInfo.setRemainingCapacity(gpuDeviceInfo.getRemainingCapacity() + remainingCapacity);
835831
gpuDeviceInfo.setMaxVmCapacity(gpuDeviceInfo.getMaxCapacity() + maxCapacity);
836832
}

ui/public/locales/en.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1087,6 +1087,8 @@
10871087
"label.gpu.display": "GPU Display",
10881088
"label.gpulimit": "GPU limits",
10891089
"label.gpu.summary": "Summary",
1090+
"label.gputotal": "GPU Total",
1091+
"label.gpuused": "GPU Used",
10901092
"label.chart.info": "Information about the charts",
10911093
"label.group": "Group",
10921094
"label.group.optional": "Group (Optional)",

ui/src/components/view/GPUDevicesTab.vue

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,8 @@
8282
:rowKey="record => record.id"
8383
:childrenColumnName="'children'"
8484
:defaultExpandAllRows="true"
85+
:expandedRowKeys="expandedRowKeys"
86+
@expand="onExpand"
8587
:rowSelection="{
8688
selectedRowKeys: selectedGpuDeviceIds,
8789
onChange: onGpuDeviceSelectionChange
@@ -165,18 +167,22 @@
165167
</template>
166168
<template v-else-if="column.key === 'managedstate'">
167169
<Status
170+
v-if="!record.children || record.children.length === 0"
168171
:text="record.managedstate"
169172
:displayText="true"
170173
/>
174+
<span v-else></span>
171175
</template>
172176
<template v-else-if="column.key === 'state'">
173177
<Status
178+
v-if="!record.children || record.children.length === 0"
174179
:text="record.state"
175180
:displayText="true"
176181
/>
182+
<span v-else></span>
177183
</template>
178184
<template v-else-if="column.key === 'actions'">
179-
<a-space>
185+
<a-space v-if="!record.children || record.children.length === 0">
180186
<!-- Manage/Unmanage Action -->
181187
<a-popconfirm
182188
v-if="record.managedstate && record.managedstate.toLowerCase() === 'unmanaged'"
@@ -369,6 +375,7 @@ export default {
369375
selectedColumnKeys: [],
370376
columns: [],
371377
items: [],
378+
expandedRowKeys: [],
372379
selectedGpuDeviceIds: [],
373380
updateGpuDeviceModalVisible: false,
374381
selectedGpuDevice: null,
@@ -423,6 +430,9 @@ export default {
423430
if (!this.resource.id) {
424431
return
425432
}
433+
// Reset expanded keys when fetching new data
434+
this.expandedRowKeys = []
435+
426436
const params = {}
427437
if (this.resourceType === 'Host') {
428438
params.hostid = this.resource.id
@@ -474,15 +484,23 @@ export default {
474484
})
475485
})
476486
477-
// Build tree structure
487+
// Build tree structure and collect parent IDs that have children
488+
const expandedKeys = []
478489
const treeData = parentDevices.map(parent => {
479490
const children = vgpusByParent[parent.id] || []
480-
return {
481-
...parent,
482-
children: children.length > 0 ? children : undefined
491+
if (children.length > 0) {
492+
expandedKeys.push(parent.id)
493+
return {
494+
...parent,
495+
children: children
496+
}
483497
}
498+
return parent
484499
})
485500
501+
// Set expanded row keys for all parents with children
502+
this.expandedRowKeys = expandedKeys
503+
486504
if (treeData.length === 0) {
487505
// Sort standalone vGPU devices by busaddress
488506
return vgpuDevices.sort((a, b) => {
@@ -554,6 +572,15 @@ export default {
554572
onGpuDeviceSelectionChange (keys) {
555573
this.selectedGpuDeviceIds = keys
556574
},
575+
onExpand (expanded, record) {
576+
if (expanded) {
577+
if (!this.expandedRowKeys.includes(record.id)) {
578+
this.expandedRowKeys.push(record.id)
579+
}
580+
} else {
581+
this.expandedRowKeys = this.expandedRowKeys.filter(key => key !== record.id)
582+
}
583+
},
557584
customRowProps (record) {
558585
return {
559586
class: record.parentgpudeviceid ? 'vgpu-row' : 'parent-gpu-row'

0 commit comments

Comments
 (0)