Skip to content

Commit 723bf14

Browse files
committed
kvm/flasharray: address review feedback on NVMe-TCP PR
Apply the review comments from the first round on #13061: * FlashArrayAdapter.snapshot() and both getSnapshot() entry points now wrap the returned FlashArrayVolume in withAddressType(). Without this, snapshots taken against an NVMe-TCP pool had the constructor-default AddressType.FIBERWWN and ProviderSnapshot.getAddress() emitted an FC style WWN instead of the NVMe EUI-128, which the adaptive driver then persisted as the snapshot path. Verified end-to-end against Purity 6.7.7: a fresh NVMe-TCP snapshot now lands with install_path starting 006c... , matching the source volume's EUI (previously it was 6-24a9370...). * FlashArrayAdapter.attach() - retry path after 'Connection already exists' no longer requires a hostgroup-scoped match for NVMe-TCP. If hostgroup is not configured, or the existing connection is host-scoped, fall back to matching by host name, same as the Fibre Channel branch. Also normalize the 'volume lun is not found' message when no connection list is returned. * FlashArrayAdapter.attach() - initial 'Volume attach did not return lun information' exception message now mentions both lun (FC) and nsid (NVMe-TCP) so the error is not misleading on NVMe deployments. * FlashArrayAdapter.getVolumeByAddress() - validate the EUI-128 length before slicing. A short/malformed address used to throw StringIndexOutOfBoundsException deep inside getFlashArrayItem and be swallowed as 'not found'; now a clear RuntimeException is raised with the expected vs actual length. * FlashArrayVolume.getAddress() - same defensive check when building an EUI-128 from the FlashArray volume serial; if the serial is shorter than 24 hex chars, fail with a clear message instead of SIOOBE. * MultipathNVMeOFAdapterBase.connectPhysicalDisk() - Integer.parseInt of the STORAGE_POOL_DISK_WAIT detail is now guarded; a non-numeric value falls back to the default rather than aborting the connect. * MultipathNVMeOFAdapterBase.rescanAllControllers() - honour the boolean return from Process.waitFor(). If an nvme ns-rescan invocation does not complete in NS_RESCAN_TIMEOUT_SECS we destroyForcibly() it, so hung nvme-cli processes do not accumulate while the namespace poll loop retries. * NVMeTCPAdapter - rename LOGGER_NVMETCP to LOGGER to match the naming convention used in the other KVM adapters. Signed-off-by: Eugenio Grosso <eugenio.grosso@gmail.com>
1 parent c0cdfa4 commit 723bf14

4 files changed

Lines changed: 57 additions & 11 deletions

File tree

plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/MultipathNVMeOFAdapterBase.java

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,13 @@ private boolean connectPhysicalDisk(AddressInfo address, KVMStoragePool pool, Ma
185185
if (details != null && details.containsKey(com.cloud.storage.StorageManager.STORAGE_POOL_DISK_WAIT.toString())) {
186186
String waitTime = details.get(com.cloud.storage.StorageManager.STORAGE_POOL_DISK_WAIT.toString());
187187
if (StringUtils.isNotEmpty(waitTime)) {
188-
waitSecs = Integer.parseInt(waitTime);
188+
try {
189+
waitSecs = Integer.parseInt(waitTime);
190+
} catch (NumberFormatException e) {
191+
LOGGER.warn("Ignoring non-numeric " + com.cloud.storage.StorageManager.STORAGE_POOL_DISK_WAIT.toString()
192+
+ "=[" + waitTime + "] on pool " + pool.getUuid() + ", falling back to default "
193+
+ DEFAULT_DISK_WAIT_SECS + "s");
194+
}
189195
}
190196
}
191197
return waitForNamespace(address, pool, waitSecs);
@@ -234,7 +240,15 @@ private void rescanAllControllers() {
234240
for (File ctrl : ctrls) {
235241
Process p = new ProcessBuilder("nvme", "ns-rescan", "/dev/" + ctrl.getName())
236242
.redirectErrorStream(true).start();
237-
p.waitFor(NS_RESCAN_TIMEOUT_SECS, TimeUnit.SECONDS);
243+
if (!p.waitFor(NS_RESCAN_TIMEOUT_SECS, TimeUnit.SECONDS)) {
244+
// Kill runaway nvme-cli invocations so they do not pile
245+
// up under the JVM on every poll iteration while we
246+
// are still waiting for the namespace to appear.
247+
LOGGER.debug("nvme ns-rescan /dev/" + ctrl.getName()
248+
+ " did not complete within " + NS_RESCAN_TIMEOUT_SECS
249+
+ "s; terminating");
250+
p.destroyForcibly();
251+
}
238252
}
239253
} catch (Exception e) {
240254
LOGGER.debug("nvme ns-rescan attempt failed: " + e.getMessage());

plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/NVMeTCPAdapter.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,10 @@
2828
* {@link KVMStoragePoolManager} can find it via reflection.
2929
*/
3030
public class NVMeTCPAdapter extends MultipathNVMeOFAdapterBase {
31-
private static final Logger LOGGER_NVMETCP = LogManager.getLogger(NVMeTCPAdapter.class);
31+
private static final Logger LOGGER = LogManager.getLogger(NVMeTCPAdapter.class);
3232

3333
public NVMeTCPAdapter() {
34-
LOGGER_NVMETCP.info("Loaded NVMeTCPAdapter for StorageLayer");
34+
LOGGER.info("Loaded NVMeTCPAdapter for StorageLayer");
3535
}
3636

3737
@Override

plugins/storage/volume/flasharray/src/main/java/org/apache/cloudstack/storage/datastore/adapter/flasharray/FlashArrayAdapter.java

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,8 @@ public String attach(ProviderAdapterContext context, ProviderAdapterDataObject d
160160
}
161161

162162
if (list == null || list.getItems() == null || list.getItems().size() == 0) {
163-
throw new RuntimeException("Volume attach did not return lun information");
163+
throw new RuntimeException("Volume attach did not return connection information "
164+
+ "(expected lun for Fibre Channel or nsid for NVMe-TCP)");
164165
}
165166

166167
FlashArrayConnection connection = (FlashArrayConnection) this.getFlashArrayItem(list);
@@ -186,10 +187,22 @@ public String attach(ProviderAdapterContext context, ProviderAdapterDataObject d
186187
if (list != null && list.getItems() != null) {
187188
for (FlashArrayConnection conn : list.getItems()) {
188189
if (AddressType.NVMETCP.equals(volumeAddressType)) {
189-
if (conn.getHostGroup() != null && conn.getHostGroup().getName() != null
190+
// Prefer a hostgroup-scoped match when a hostgroup is configured
191+
// on the pool; otherwise fall through to matching the connection
192+
// by host like the Fibre Channel branch below. Covers both
193+
// transport=nvme-tcp deployments with and without hostgroup=.
194+
if (hostgroup != null && conn.getHostGroup() != null
195+
&& conn.getHostGroup().getName() != null
190196
&& conn.getHostGroup().getName().equals(hostgroup)) {
191197
return conn.getNsid() != null ? "" + conn.getNsid() : "1";
192198
}
199+
if (conn.getHost() != null && conn.getHost().getName() != null
200+
&& (conn.getHost().getName().equals(hostname)
201+
|| (hostname.indexOf('.') > 0
202+
&& conn.getHost().getName()
203+
.equals(hostname.substring(0, hostname.indexOf('.')))))) {
204+
return conn.getNsid() != null ? "" + conn.getNsid() : "1";
205+
}
193206
} else if (conn.getHost() != null && conn.getHost().getName() != null &&
194207
(conn.getHost().getName().equals(hostname) || conn.getHost().getName().equals(hostname.substring(0, hostname.indexOf('.')))) &&
195208
conn.getLun() != null) {
@@ -198,7 +211,7 @@ public String attach(ProviderAdapterContext context, ProviderAdapterDataObject d
198211
}
199212
throw new RuntimeException("Volume connection identifier (lun/nsid) not found in existing connection");
200213
} else {
201-
throw new RuntimeException("Volume lun is not found in existing connection");
214+
throw new RuntimeException("Volume connection is not found in existing connection list");
202215
}
203216
} else {
204217
throw e;
@@ -291,6 +304,11 @@ public ProviderVolume getVolumeByAddress(ProviderAdapterContext context, Address
291304
// Reverse the EUI-128 layout: serial = eui[2:16] + eui[22:32], after
292305
// stripping the optional "eui." prefix that appears in udev paths.
293306
String eui = address.startsWith("eui.") ? address.substring(4) : address;
307+
if (eui == null || eui.length() != 32) {
308+
throw new RuntimeException("Invalid NVMe-TCP EUI-128 address ["
309+
+ address + "]: expected 32 hex characters, got "
310+
+ (eui == null ? "null" : String.valueOf(eui.length())));
311+
}
294312
serial = (eui.substring(2, 16) + eui.substring(22)).toUpperCase();
295313
} else {
296314
throw new RuntimeException(
@@ -346,8 +364,11 @@ public ProviderSnapshot snapshot(ProviderAdapterContext context, ProviderAdapter
346364
"/volume-snapshots?source_names=" + sourceDataObject.getExternalName(), null,
347365
new TypeReference<FlashArrayList<FlashArrayVolume>>() {
348366
});
349-
350-
return (FlashArrayVolume) getFlashArrayItem(list);
367+
// Stamp the pool's volume address type so ProviderSnapshot.getAddress()
368+
// emits an NVMe EUI-128 on NVMe-TCP pools. Without this, the adaptive
369+
// driver persists the snapshot with an FC-style WWN and subsequent
370+
// revert/list operations cannot locate the namespace.
371+
return withAddressType((FlashArrayVolume) getFlashArrayItem(list));
351372
}
352373

353374
/**
@@ -390,7 +411,12 @@ public ProviderSnapshot getSnapshot(ProviderAdapterContext context, ProviderAdap
390411
"/volume-snapshots?names=" + dataObject.getExternalName(),
391412
new TypeReference<FlashArrayList<FlashArrayVolume>>() {
392413
});
393-
return (FlashArrayVolume) getFlashArrayItem(list);
414+
// Stamp the pool's volume address type so ProviderSnapshot.getAddress()
415+
// emits an NVMe EUI-128 on NVMe-TCP pools instead of the FIBERWWN
416+
// default. Without this, the adaptive driver persists the snapshot
417+
// path with an FC-style WWN and revert/list fails to locate the
418+
// namespace on the host.
419+
return withAddressType((FlashArrayVolume) getFlashArrayItem(list));
394420
}
395421

396422
@Override
@@ -813,7 +839,7 @@ private FlashArrayVolume getSnapshot(String snapshotName) {
813839
FlashArrayList<FlashArrayVolume> list = GET("/volume-snapshots?names=" + snapshotName,
814840
new TypeReference<FlashArrayList<FlashArrayVolume>>() {
815841
});
816-
return (FlashArrayVolume) getFlashArrayItem(list);
842+
return withAddressType((FlashArrayVolume) getFlashArrayItem(list));
817843
}
818844

819845
private FlashArrayVolume withAddressType(FlashArrayVolume vol) {

plugins/storage/volume/flasharray/src/main/java/org/apache/cloudstack/storage/datastore/adapter/flasharray/FlashArrayVolume.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,12 @@ public String getAddress() {
116116
// 00 + serial[0:14] + <Pure OUI (24a937)> + serial[14:24]
117117
// This is the value the Linux kernel exposes as
118118
// /dev/disk/by-id/nvme-eui.<result>
119+
if (serial.length() < 24) {
120+
throw new RuntimeException("FlashArray serial [" + serial
121+
+ "] is too short to build an NVMe EUI-128 address "
122+
+ "(expected at least 24 hex characters, got "
123+
+ serial.length() + ")");
124+
}
119125
return ("00" + serial.substring(0, 14) + PURE_OUI_EUI + serial.substring(14)).toLowerCase();
120126
}
121127
return ("6" + PURE_OUI + serial).toLowerCase();

0 commit comments

Comments
 (0)