Skip to content

Commit 4f316bc

Browse files
committed
address comments
1 parent b84f02c commit 4f316bc

17 files changed

Lines changed: 477 additions & 43 deletions

File tree

api/src/main/java/org/apache/cloudstack/gpu/GpuDevice.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
public interface GpuDevice extends InternalIdentity, Identity {
2626

2727
enum State {
28-
Allocated, Free, Error,
28+
Allocated, Free, Error, PartiallyAllocated,
2929
}
3030

3131
enum ManagedState {

engine/schema/src/main/java/com/cloud/gpu/dao/GpuDeviceDao.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,4 +66,6 @@ Pair<List<GpuDeviceVO>, Integer> searchAndCountGpuDevices(
6666
List<Long> getDistinctGpuCardIds();
6767

6868
List<Long> getDistinctVgpuProfileIds();
69+
70+
List<GpuDeviceVO> listByParentGpuDeviceId(Long parentGpuDeviceId);
6971
}

engine/schema/src/main/java/com/cloud/gpu/dao/GpuDeviceDaoImpl.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,4 +257,11 @@ public List<Long> getDistinctVgpuProfileIds() {
257257
.distinct()
258258
.collect(Collectors.toList());
259259
}
260+
261+
@Override
262+
public List<GpuDeviceVO> listByParentGpuDeviceId(Long parentGpuDeviceId) {
263+
SearchCriteria<GpuDeviceVO> sc = allFieldSearch.create();
264+
sc.setParameters(PARENT_GPU_DEVICE_ID, parentGpuDeviceId);
265+
return listBy(sc);
266+
}
260267
}

plugins/hypervisors/simulator/src/main/java/com/cloud/agent/manager/MockAgentManagerImpl.java

Lines changed: 33 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -246,23 +246,23 @@ public Map<AgentResourceBase, Map<String, String>> createServerResources(Map<Str
246246
private void createPhysicalGpuDevices(long hostId) {
247247
// 1. Create passthrough-only GPU devices across different NUMA nodes and PCI roots
248248
createPassthroughGpu(hostId, "00:01.0", "1234", "5678", "Apache CloudStack Simulator",
249-
"Simulated Graphics Card Pro", 0, "pci0000:00");
249+
"Simulated Graphics Card Pro", 0, "pci0000:00", 8192L, 4096L, 2160L, 1L);
250250
createPassthroughGpu(hostId, "00:02.0", "1234", "5678", "Apache CloudStack Simulator",
251-
"Simulated Graphics Card Pro", 0, "pci0000:00");
251+
"Simulated Graphics Card Pro", 0, "pci0000:00", 8192L, 4096L, 2160L, 1L);
252252
createPassthroughGpu(hostId, "00:03.0", "1234", "6789", "Apache CloudStack Simulator",
253-
"Simulated Graphics Card Basic", 1, "pci0000:00");
253+
"Simulated Graphics Card Basic", 1, "pci0000:00", 4096L, 1920L, 1080L, 2L);
254254
createPassthroughGpu(hostId, "00:04.0", "1234", "6789", "Apache CloudStack Simulator",
255-
"Simulated Graphics Card Basic", 1, "pci0000:00");
255+
"Simulated Graphics Card Basic", 1, "pci0000:00", 4096L, 1920L, 1080L, 2L);
256256

257257
// Additional passthrough GPUs on different PCI roots
258258
createPassthroughGpu(hostId, "17:00.0", "1234", "5678", "Apache CloudStack Simulator",
259-
"Simulated Graphics Card Pro", 2, "pci0000:17");
259+
"Simulated Graphics Card Pro", 2, "pci0000:17", 8192L, 4096L, 2160L, 1L);
260260
createPassthroughGpu(hostId, "3a:00.0", "1234", "6789", "Apache CloudStack Simulator",
261-
"Simulated Graphics Card Basic", 3, "pci0000:3a");
261+
"Simulated Graphics Card Basic", 3, "pci0000:3a", 4096L, 1920L, 1080L, 2L);
262262
createPassthroughGpu(hostId, "5d:00.0", "1234", "5678", "Apache CloudStack Simulator",
263-
"Simulated Graphics Card Pro", 2, "pci0000:5d");
263+
"Simulated Graphics Card Pro", 2, "pci0000:5d", 8192L, 4096L, 2160L, 1L);
264264
createPassthroughGpu(hostId, "80:00.0", "1234", "6789", "Apache CloudStack Simulator",
265-
"Simulated Graphics Card Basic", 3, "pci0000:80");
265+
"Simulated Graphics Card Basic", 3, "pci0000:80", 4096L, 1920L, 1080L, 2L);
266266

267267
// 2. Create GPUs with Virtual Functions support on different NUMA nodes
268268
MockGpuDeviceVO vfParentDevice1 = createVfCapableGpu(hostId, "00:05.0", "1234", "789a", "Apache CloudStack Simulator",
@@ -311,7 +311,7 @@ private void createPhysicalGpuDevices(long hostId) {
311311
* Creates a basic passthrough-only GPU device
312312
*/
313313
private void createPassthroughGpu(long hostId, String busAddress, String vendorId, String deviceId,
314-
String vendorName, String deviceName, Integer numaNode, String pciRoot) {
314+
String vendorName, String deviceName, Integer numaNode, String pciRoot, Long videoRam, Long resolutionX, Long resolutionY, Long maxHeads) {
315315
MockGpuDeviceVO device = new MockGpuDeviceVO();
316316
device.setBusAddress(busAddress);
317317
device.setVendorId(vendorId);
@@ -325,6 +325,10 @@ private void createPassthroughGpu(long hostId, String busAddress, String vendorI
325325
device.setPassthroughEnabled(true);
326326
device.setNumaNode(numaNode);
327327
device.setPciRoot(pciRoot);
328+
device.setVideoRam(videoRam);
329+
device.setMaxResolutionX(resolutionX);
330+
device.setMaxResolutionY(resolutionY);
331+
device.setMaxHeads(maxHeads);
328332
_mockGpuDeviceDao.persist(device);
329333
}
330334

@@ -359,6 +363,10 @@ private void createVirtualFunctions(long hostId, MockGpuDeviceVO parentDevice) {
359363
Long[] maxVgpuPerGpu = {2L, 4L, 4L};
360364
String parentBussAdressPrefix = parentDevice.getBusAddress().substring(0, parentDevice.getBusAddress().length() - 1);
361365
String[] vfAddresses = {parentBussAdressPrefix + "1", parentBussAdressPrefix + "2", parentBussAdressPrefix + "3"};
366+
Long[] videoRam = {8192L, 4096L, 4096L};
367+
Long[] resoultionX = {4096L, 1920L, 1920L};
368+
Long[] resoultionY = {2160L, 1080L, 1080L};
369+
Long[] maxHeads = {1L, 2L, 2L};
362370

363371
for (int i = 0; i < vfAddresses.length; i++) {
364372
MockGpuDeviceVO vfDevice = new MockGpuDeviceVO();
@@ -374,6 +382,10 @@ private void createVirtualFunctions(long hostId, MockGpuDeviceVO parentDevice) {
374382
vfDevice.setProfileName(vfProfiles[i]);
375383
vfDevice.setPassthroughEnabled(true);
376384
vfDevice.setMaxVgpuPerPgpu(maxVgpuPerGpu[i]);
385+
vfDevice.setVideoRam(videoRam[i]);
386+
vfDevice.setMaxResolutionX(resoultionX[i]);
387+
vfDevice.setMaxResolutionY(resoultionY[i]);
388+
vfDevice.setMaxHeads(maxHeads[i]);
377389
// VF devices inherit NUMA node and PCI root from parent
378390
vfDevice.setNumaNode(parentDevice.getNumaNode());
379391
vfDevice.setPciRoot(parentDevice.getPciRoot());
@@ -411,7 +423,11 @@ private void createMdevDevices(long hostId, MockGpuDeviceVO parentDevice) {
411423
String[] mdevProfiles = { "sim-8q", "sim-4q", "sim-2q", "sim-1q", "sim-1q" };
412424
String[] mdevUuids = {UUID.randomUUID().toString(), UUID.randomUUID().toString(), UUID.randomUUID().toString(),
413425
UUID.randomUUID().toString(), UUID.randomUUID().toString()};
426+
Long[] videoRam = {8192L, 4096L, 2048L, 1024L, 1024L};
427+
Long[] resoultionX = {4096L, 1920L, 1920L, 1920L, 1920L};
428+
Long[] resoultionY = {2160L, 1080L, 1080L, 1080L, 1080L};
414429
Long[] maxVgpuPerGpu = {2L, 4L, 8L, 16L, 16L};
430+
Long[] maxHeads = {1L, 2L, 4L, 8L, 8L};
415431

416432
for (int i = 0; i < mdevProfiles.length; i++) {
417433
MockGpuDeviceVO mdevDevice = new MockGpuDeviceVO();
@@ -427,6 +443,10 @@ private void createMdevDevices(long hostId, MockGpuDeviceVO parentDevice) {
427443
mdevDevice.setProfileName(mdevProfiles[i]);
428444
mdevDevice.setPassthroughEnabled(true); // MDEV devices don't use passthrough
429445
mdevDevice.setMaxVgpuPerPgpu(maxVgpuPerGpu[i]);
446+
mdevDevice.setVideoRam(videoRam[i]);
447+
mdevDevice.setMaxResolutionX(resoultionX[i]);
448+
mdevDevice.setMaxResolutionY(resoultionY[i]);
449+
mdevDevice.setMaxHeads(maxHeads[i]);
430450
// MDEV devices inherit NUMA node and PCI root from parent
431451
mdevDevice.setNumaNode(parentDevice.getNumaNode());
432452
mdevDevice.setPciRoot(parentDevice.getPciRoot());
@@ -721,6 +741,10 @@ public List<VgpuTypesInfo> getGPUDevices(long hostId) {
721741
deviceId, deviceName, numaNode.toString(), pciRoot);
722742
vgpuTypesInfo.setPassthroughEnabled(isPassthrough);
723743
vgpuTypesInfo.setMaxVgpuPerGpu(maxVgpuPerGpu);
744+
vgpuTypesInfo.setVideoRam(mockGpuDevice.getVideoRam());
745+
vgpuTypesInfo.setMaxResolutionX(mockGpuDevice.getMaxResolutionX());
746+
vgpuTypesInfo.setMaxResolutionY(mockGpuDevice.getMaxResolutionY());
747+
vgpuTypesInfo.setMaxHeads(mockGpuDevice.getMaxHeads());
724748

725749
if (mockGpuDevice.getVmId() != null) {
726750
MockVMVO mockVm = _mockVmDao.findById(mockGpuDevice.getVmId());

plugins/hypervisors/simulator/src/main/java/com/cloud/simulator/MockGpuDeviceVO.java

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,18 @@ public class MockGpuDeviceVO implements MockGpuDevice, InternalIdentity {
6161
@Column(name = "max_vgpu_per_pgpu", nullable = false)
6262
private Long maxVgpuPerPgpu = 1L;
6363

64+
@Column(name = "video_ram", nullable = false)
65+
private Long videoRam = 0L;
66+
67+
@Column(name = "max_resolution_x", nullable = false)
68+
private Long maxResolutionX = 0L;
69+
70+
@Column(name = "max_resolution_y", nullable = false)
71+
private Long maxResolutionY = 0L;
72+
73+
@Column(name = "max_heads", nullable = false)
74+
private Long maxHeads = 1L;
75+
6476
@Column(name = "state")
6577
@Enumerated(EnumType.STRING)
6678
private State state;
@@ -190,6 +202,38 @@ public void setMaxVgpuPerPgpu(Long maxVgpuPerGpu) {
190202
this.maxVgpuPerPgpu = maxVgpuPerGpu;
191203
}
192204

205+
public Long getVideoRam() {
206+
return videoRam;
207+
}
208+
209+
public void setVideoRam(Long videoRam) {
210+
this.videoRam = videoRam;
211+
}
212+
213+
public Long getMaxResolutionX() {
214+
return maxResolutionX;
215+
}
216+
217+
public void setMaxResolutionX(Long maxResolutionX) {
218+
this.maxResolutionX = maxResolutionX;
219+
}
220+
221+
public Long getMaxResolutionY() {
222+
return maxResolutionY;
223+
}
224+
225+
public void setMaxResolutionY(Long maxResolutionY) {
226+
this.maxResolutionY = maxResolutionY;
227+
}
228+
229+
public Long getMaxHeads() {
230+
return maxHeads;
231+
}
232+
233+
public void setMaxHeads(Long maxHeads) {
234+
this.maxHeads = maxHeads;
235+
}
236+
193237
public GpuDevice.DeviceType getDeviceType() {
194238
return deviceType;
195239
}

server/src/main/java/org/apache/cloudstack/gpu/GpuServiceImpl.java

Lines changed: 50 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -643,6 +643,33 @@ public void deallocateGpuDevicesForVmOnHost(long vmId) {
643643
device.setState(GpuDevice.State.Free);
644644
device.setVmId(null);
645645
gpuDeviceDao.persist(device);
646+
checkAndUpdateParentGpuDeviceState(device.getParentGpuDeviceId());
647+
}
648+
}
649+
}
650+
651+
protected void checkAndUpdateParentGpuDeviceState(Long parentGpuDeviceId) {
652+
if (parentGpuDeviceId != null) {
653+
GpuDeviceVO parentGpuDevice = gpuDeviceDao.findById(parentGpuDeviceId);
654+
checkAndUpdateParentGpuDeviceState(parentGpuDevice);
655+
}
656+
}
657+
658+
protected void checkAndUpdateParentGpuDeviceState(GpuDeviceVO parentDevice) {
659+
if (parentDevice != null) {
660+
List<GpuDeviceVO> childDevices = gpuDeviceDao.listByParentGpuDeviceId(parentDevice.getId());
661+
GpuDevice.State finalState = GpuDevice.State.Free;
662+
for (GpuDeviceVO childDevice : childDevices) {
663+
if (childDevice.getState().equals(GpuDevice.State.Allocated)) {
664+
finalState = GpuDevice.State.PartiallyAllocated;
665+
} else if (childDevice.getState().equals(GpuDevice.State.Error)) {
666+
finalState = GpuDevice.State.Error;
667+
break;
668+
}
669+
}
670+
if (!finalState.equals(parentDevice.getState())) {
671+
parentDevice.setState(finalState);
672+
gpuDeviceDao.update(parentDevice.getId(), parentDevice);
646673
}
647674
}
648675
}
@@ -662,6 +689,7 @@ public void doInTransactionWithoutResult(TransactionStatus status) {
662689
device.setState(GpuDevice.State.Allocated);
663690
device.setVmId(vmId);
664691
gpuDeviceDao.persist(device);
692+
checkAndUpdateParentGpuDeviceState(device.getParentGpuDeviceId());
665693
} else {
666694
throw new CloudRuntimeException(
667695
String.format("GPU device not found for VM %d on host %d", vmId, hostId));
@@ -736,14 +764,17 @@ public GPUDeviceTO doInTransaction(TransactionStatus status) {
736764
gpuDevice.setState(GpuDevice.State.Allocated);
737765
gpuDevice.setVmId(vm.getId());
738766
gpuDeviceDao.persist(gpuDevice);
767+
739768
VgpuTypesInfo vgpuInfo = new VgpuTypesInfo(gpuDevice.getType(), gpuCard.getName(),
740769
vgpuProfile.getName(), gpuDevice.getBusAddress(), gpuCard.getVendorId(),
741770
gpuCard.getVendorName(), gpuCard.getDeviceId(), gpuCard.getDeviceName());
742771
vgpuInfo.setDisplay(serviceOffering.getGpuDisplay());
772+
743773
if (gpuDevice.getParentGpuDeviceId() != null) {
744774
GpuDeviceVO parentGpuDevice = gpuDeviceDao.findById(gpuDevice.getParentGpuDeviceId());
745775
if (parentGpuDevice != null) {
746776
vgpuInfo.setParentBusAddress(parentGpuDevice.getBusAddress());
777+
checkAndUpdateParentGpuDeviceState(parentGpuDevice);
747778
}
748779
}
749780
vgpuInfoList.add(vgpuInfo);
@@ -840,9 +871,10 @@ public void addGpuDevicesToHost(final Host host, final List<VgpuTypesInfo> newGp
840871
createAndAddGpuDeviceToHost(deviceInfo, host, card, vgpuProfile);
841872
} else {
842873
// Update the device's info
874+
GpuDeviceVO parentGpuDevice = null;
843875
if (existingDevice.getParentGpuDeviceId() == null
844876
&& deviceInfo.getParentBusAddress() != null) {
845-
GpuDeviceVO parentGpuDevice = gpuDeviceDao.findByHostIdAndBusAddress(host.getId(),
877+
parentGpuDevice = gpuDeviceDao.findByHostIdAndBusAddress(host.getId(),
846878
deviceInfo.getParentBusAddress());
847879
if (parentGpuDevice != null) {
848880
existingDevice.setParentGpuDeviceId(parentGpuDevice.getId());
@@ -851,8 +883,9 @@ public void addGpuDevicesToHost(final Host host, final List<VgpuTypesInfo> newGp
851883
if (existingDevice.getPciRoot() == null) {
852884
existingDevice.setPciRoot(deviceInfo.getPciRoot());
853885
}
854-
setStateAndVmName(deviceInfo, existingDevice);
886+
setStateAndVmName(deviceInfo, existingDevice, parentGpuDevice);
855887
gpuDeviceDao.update(existingDevice.getId(), existingDevice);
888+
checkAndUpdateParentGpuDeviceState(existingDevice.getParentGpuDeviceId());
856889
}
857890
gpuDevicesToDisableMap.remove(deviceInfo.getBusAddress());
858891
}
@@ -863,6 +896,7 @@ public void addGpuDevicesToHost(final Host host, final List<VgpuTypesInfo> newGp
863896
device.setState(GpuDevice.State.Error);
864897
device.setManagedState(GpuDevice.ManagedState.Unmanaged);
865898
gpuDeviceDao.update(device.getId(), device);
899+
checkAndUpdateParentGpuDeviceState(device.getParentGpuDeviceId());
866900
}
867901
} finally {
868902
lock.unlock();
@@ -921,6 +955,10 @@ private GpuCardVO getGpuCardAndUpdateMap(VgpuTypesInfo deviceInfo, Map<String, G
921955

922956
// Create default passthrough profile for the new card
923957
VgpuProfileVO passthroughProfile = new VgpuProfileVO("passthrough", "passthrough", card.getId(), 1L);
958+
passthroughProfile.setVideoRam(deviceInfo.getVideoRam());
959+
passthroughProfile.setMaxResolutionX(deviceInfo.getMaxResolutionX());
960+
passthroughProfile.setMaxResolutionY(deviceInfo.getMaxResolutionY());
961+
passthroughProfile.setMaxHeads(deviceInfo.getMaxHeads());
924962
passthroughProfile = vgpuProfileDao.persist(passthroughProfile);
925963

926964
String vgpuProfileKey = card.getUuid() + " | " + deviceInfo.getModelName();
@@ -945,6 +983,10 @@ private VgpuProfileVO getVgpuProfileAndUpdateMap(VgpuTypesInfo deviceInfo, GpuCa
945983

946984
vgpuProfile = new VgpuProfileVO(deviceInfo.getModelName(), deviceInfo.getModelName(), card.getId(),
947985
deviceInfo.getMaxVpuPerGpu() != null ? deviceInfo.getMaxVpuPerGpu() : 1L);
986+
vgpuProfile.setVideoRam(deviceInfo.getVideoRam());
987+
vgpuProfile.setMaxResolutionX(deviceInfo.getMaxResolutionX());
988+
vgpuProfile.setMaxResolutionY(deviceInfo.getMaxResolutionY());
989+
vgpuProfile.setMaxHeads(deviceInfo.getMaxHeads());
948990
vgpuProfile = vgpuProfileDao.persist(vgpuProfile);
949991

950992
logger.info("Created vGPU profile: {}", vgpuProfile);
@@ -957,8 +999,9 @@ private VgpuProfileVO getVgpuProfileAndUpdateMap(VgpuTypesInfo deviceInfo, GpuCa
957999
private void createAndAddGpuDeviceToHost(VgpuTypesInfo deviceInfo, Host host, GpuCardVO card,
9581000
VgpuProfileVO vgpuProfile) {
9591001
Long parentGpuDeviceId = null;
1002+
GpuDeviceVO parentGpuDevice = null;
9601003
if (deviceInfo.getParentBusAddress() != null) {
961-
GpuDeviceVO parentGpuDevice = gpuDeviceDao.findByHostIdAndBusAddress(host.getId(),
1004+
parentGpuDevice = gpuDeviceDao.findByHostIdAndBusAddress(host.getId(),
9621005
deviceInfo.getParentBusAddress());
9631006
if (parentGpuDevice != null) {
9641007
parentGpuDeviceId = parentGpuDevice.getId();
@@ -969,16 +1012,17 @@ private void createAndAddGpuDeviceToHost(VgpuTypesInfo deviceInfo, Host host, Gp
9691012
gpuDevice.setHostId(host.getId());
9701013
gpuDevice.setBusAddress(deviceInfo.getBusAddress());
9711014
gpuDevice.setCardId(card.getId());
972-
setStateAndVmName(deviceInfo, gpuDevice);
1015+
setStateAndVmName(deviceInfo, gpuDevice, parentGpuDevice);
9731016
if (!deviceInfo.isPassthroughEnabled()) {
9741017
gpuDevice.setType(GpuDevice.DeviceType.VGPUOnly);
9751018
}
9761019

9771020
gpuDevice = gpuDeviceDao.persist(gpuDevice);
1021+
checkAndUpdateParentGpuDeviceState(parentGpuDevice);
9781022
logger.info("Added new GPU device {} to host {}", gpuDevice, host);
9791023
}
9801024

981-
private void setStateAndVmName(VgpuTypesInfo deviceInfo, GpuDeviceVO device) {
1025+
private void setStateAndVmName(VgpuTypesInfo deviceInfo, GpuDeviceVO device, GpuDeviceVO parentGpuDevice) {
9821026
if (StringUtils.isNotBlank(deviceInfo.getVmName())) {
9831027
VMInstanceVO vm = vmInstanceDao.findVMByInstanceName(deviceInfo.getVmName());
9841028
if (vm != null) {
@@ -991,7 +1035,7 @@ private void setStateAndVmName(VgpuTypesInfo deviceInfo, GpuDeviceVO device) {
9911035
}
9921036
} else {
9931037
// If no VM name is provided, it's possible that the device is allocated to a stopped VM or not allocated at all.
994-
if (device.getVmId() == null) {
1038+
if (device.getVmId() == null && !device.getState().equals(GpuDevice.State.PartiallyAllocated)) {
9951039
device.setState(GpuDevice.State.Free);
9961040
} else {
9971041
VMInstanceVO vm = vmInstanceDao.findById(device.getVmId());

0 commit comments

Comments
 (0)