Skip to content

Commit e084c20

Browse files
author
Eugenio Grosso
committed
kvm: add MultipathNVMeOFAdapterBase and NVMeTCPAdapter
Introduce an NVMe-over-Fabrics counterpart to the existing MultipathSCSIAdapterBase / FiberChannelAdapter pair. NVMe-oF is conceptually distinct from SCSI - it speaks the NVMe command set, identifies namespaces by EUI-128 NGUIDs, and is multipathed by the kernel natively rather than by device-mapper - so keeping it out of the SCSI code path avoids special-casing inside every method that handles volume paths, connect, disconnect, or size lookup. MultipathNVMeOFAdapterBase (abstract) * Parses volume paths of the form type=NVMETCP; address=<eui>; connid.<host>=<nsid>; ... into an AddressInfo whose path is /dev/disk/by-id/nvme-eui.<eui> which is the udev symlink the kernel emits for every NVMe namespace. * connectPhysicalDisk polls the udev path and, on every iteration, triggers nvme ns-rescan on all local NVMe controllers, to cover target/firmware combinations that do not send an asynchronous event notification when a new namespace is mapped. * disconnectPhysicalDisk is a no-op; the kernel drops the namespace when the target removes the host-group connection. The ByPath variant only claims paths starting with /dev/disk/by-id/nvme-eui. so foreign paths still fall through to other adapters. * Delegates getPhysicalDisk, isConnected, and getPhysicalDiskSize to plain test -b / blockdev --getsize64 calls - no SCSI rescan, no dm multipath, no multipath-map cleanup timer. * createPhysicalDisk / createTemplateFromDisk / listPhysicalDisks / copyPhysicalDisk all throw UnsupportedOperationException - these are the responsibility of the storage provider, not the KVM adapter, same as the SCSI base. MultipathNVMeOFPool * KVMStoragePool mirror of MultipathSCSIPool. Defaults to Storage.StoragePoolType.NVMeTCP in the parameterless-fallback constructor. NVMeTCPAdapter * Concrete adapter that registers itself for Storage.StoragePoolType.NVMeTCP via the reflection-based scan in KVMStoragePoolManager. Carries no logic of its own beyond binding the base to the pool type. A similar MultipathNVMeOFAdapterBase-derived NVMeRoCEAdapter (or NVMeFCAdapter) can later be added by adding one concrete subclass and a new pool-type value; the base does not assume any particular fabric-level transport.
1 parent 160aad5 commit e084c20

3 files changed

Lines changed: 604 additions & 0 deletions

File tree

Lines changed: 396 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,396 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
package com.cloud.hypervisor.kvm.storage;
19+
20+
import java.io.File;
21+
import java.util.HashMap;
22+
import java.util.List;
23+
import java.util.Map;
24+
import java.util.concurrent.TimeUnit;
25+
26+
import org.apache.cloudstack.utils.qemu.QemuImg;
27+
import org.apache.cloudstack.utils.qemu.QemuImg.PhysicalDiskFormat;
28+
29+
import com.cloud.storage.Storage;
30+
import com.cloud.utils.exception.CloudRuntimeException;
31+
import com.cloud.utils.script.OutputInterpreter;
32+
import com.cloud.utils.script.Script;
33+
import org.apache.commons.lang3.StringUtils;
34+
import org.apache.logging.log4j.LogManager;
35+
import org.apache.logging.log4j.Logger;
36+
37+
/**
38+
* Base class for KVM storage adapters that surface remote block volumes over
39+
* NVMe-over-Fabrics (NVMe-oF). It is the NVMe-oF counterpart of
40+
* {@link MultipathSCSIAdapterBase}: it does not drive device-mapper multipath
41+
* and does not rescan the SCSI bus, because NVMe-oF has its own multipath
42+
* (the kernel's native NVMe multipath) and namespaces show up via
43+
* asynchronous event notifications as soon as the target grants access.
44+
*
45+
* Volumes are identified on the host by their EUI-128 NGUID, which udev
46+
* exposes as {@code /dev/disk/by-id/nvme-eui.<eui>}.
47+
*/
48+
public abstract class MultipathNVMeOFAdapterBase implements StorageAdaptor {
49+
protected static Logger LOGGER = LogManager.getLogger(MultipathNVMeOFAdapterBase.class);
50+
static final Map<String, KVMStoragePool> MapStorageUuidToStoragePool = new HashMap<>();
51+
52+
static final int DEFAULT_DISK_WAIT_SECS = 240;
53+
static final long NS_RESCAN_TIMEOUT_SECS = 5;
54+
private static final long POLL_INTERVAL_MS = 2000;
55+
56+
@Override
57+
public KVMStoragePool getStoragePool(String uuid) {
58+
KVMStoragePool pool = MapStorageUuidToStoragePool.get(uuid);
59+
if (pool == null) {
60+
// Dummy pool - adapters that dispatch per-volume don't need
61+
// connectivity information on the pool itself.
62+
pool = new MultipathNVMeOFPool(uuid, this);
63+
MapStorageUuidToStoragePool.put(uuid, pool);
64+
}
65+
return pool;
66+
}
67+
68+
@Override
69+
public KVMStoragePool getStoragePool(String uuid, boolean refreshInfo) {
70+
return getStoragePool(uuid);
71+
}
72+
73+
public abstract String getName();
74+
75+
@Override
76+
public abstract Storage.StoragePoolType getStoragePoolType();
77+
78+
public abstract boolean isStoragePoolTypeSupported(Storage.StoragePoolType type);
79+
80+
/**
81+
* Parse a {@code type=NVMETCP; address=<eui>; connid.<host>=<nsid>; ...}
82+
* volume path and produce an {@link AddressInfo} with the host-side device
83+
* path set to {@code /dev/disk/by-id/nvme-eui.<eui>}.
84+
*/
85+
public AddressInfo parseAndValidatePath(String inPath) {
86+
String type = null;
87+
String address = null;
88+
String connectionId = null;
89+
String path = null;
90+
String hostname = resolveHostnameShort();
91+
String hostnameFq = resolveHostnameFq();
92+
String[] parts = inPath.split(";");
93+
for (String part : parts) {
94+
String[] pair = part.split("=");
95+
if (pair.length != 2) {
96+
continue;
97+
}
98+
String key = pair[0].trim();
99+
String value = pair[1].trim();
100+
if (key.equals("type")) {
101+
type = value.toUpperCase();
102+
} else if (key.equals("address")) {
103+
address = value;
104+
} else if (key.equals("connid")) {
105+
connectionId = value;
106+
} else if (key.startsWith("connid.")) {
107+
String inHostname = key.substring("connid.".length());
108+
if (inHostname.equals(hostname) || inHostname.equals(hostnameFq)) {
109+
connectionId = value;
110+
}
111+
}
112+
}
113+
114+
if (!"NVMETCP".equals(type)) {
115+
throw new CloudRuntimeException("Invalid address type provided for NVMe-oF target disk: " + type);
116+
}
117+
if (address == null) {
118+
throw new CloudRuntimeException("NVMe-oF volume path is missing the required address field");
119+
}
120+
path = "/dev/disk/by-id/nvme-eui." + address.toLowerCase();
121+
return new AddressInfo(type, address, connectionId, path);
122+
}
123+
124+
@Override
125+
public KVMPhysicalDisk getPhysicalDisk(String volumePath, KVMStoragePool pool) {
126+
if (StringUtils.isEmpty(volumePath) || pool == null) {
127+
LOGGER.error("Unable to get physical disk, volume path or pool not specified");
128+
return null;
129+
}
130+
return getPhysicalDisk(parseAndValidatePath(volumePath), pool);
131+
}
132+
133+
private KVMPhysicalDisk getPhysicalDisk(AddressInfo address, KVMStoragePool pool) {
134+
KVMPhysicalDisk disk = new KVMPhysicalDisk(address.getPath(), address.toString(), pool);
135+
disk.setFormat(QemuImg.PhysicalDiskFormat.RAW);
136+
137+
if (!isConnected(address.getPath())) {
138+
if (!connectPhysicalDisk(address, pool, null)) {
139+
throw new CloudRuntimeException("Unable to connect to NVMe namespace at " + address.getPath());
140+
}
141+
}
142+
long diskSize = getPhysicalDiskSize(address.getPath());
143+
disk.setSize(diskSize);
144+
disk.setVirtualSize(diskSize);
145+
return disk;
146+
}
147+
148+
@Override
149+
public KVMStoragePool createStoragePool(String uuid, String host, int port, String path, String userInfo, Storage.StoragePoolType type, Map<String, String> details, boolean isPrimaryStorage) {
150+
LOGGER.info(String.format("createStoragePool(uuid,host,port,path,type) called with args (%s, %s, %d, %s, %s)", uuid, host, port, path, type));
151+
MultipathNVMeOFPool pool = new MultipathNVMeOFPool(uuid, host, port, path, type, details, this);
152+
MapStorageUuidToStoragePool.put(uuid, pool);
153+
return pool;
154+
}
155+
156+
@Override
157+
public boolean deleteStoragePool(String uuid) {
158+
MapStorageUuidToStoragePool.remove(uuid);
159+
return true;
160+
}
161+
162+
@Override
163+
public boolean deleteStoragePool(KVMStoragePool pool) {
164+
return deleteStoragePool(pool.getUuid());
165+
}
166+
167+
@Override
168+
public boolean connectPhysicalDisk(String volumePath, KVMStoragePool pool, Map<String, String> details, boolean isVMMigrate) {
169+
if (StringUtils.isEmpty(volumePath) || pool == null) {
170+
LOGGER.error("Unable to connect NVMe-oF physical disk: insufficient arguments");
171+
return false;
172+
}
173+
return connectPhysicalDisk(parseAndValidatePath(volumePath), pool, details);
174+
}
175+
176+
private boolean connectPhysicalDisk(AddressInfo address, KVMStoragePool pool, Map<String, String> details) {
177+
if (address.getConnectionId() == null) {
178+
LOGGER.error("NVMe-oF volume " + address.getPath() + " on pool " + pool.getUuid() + " is missing a connid.<host> token in its path");
179+
return false;
180+
}
181+
long waitSecs = DEFAULT_DISK_WAIT_SECS;
182+
if (details != null && details.containsKey(com.cloud.storage.StorageManager.STORAGE_POOL_DISK_WAIT.toString())) {
183+
String waitTime = details.get(com.cloud.storage.StorageManager.STORAGE_POOL_DISK_WAIT.toString());
184+
if (StringUtils.isNotEmpty(waitTime)) {
185+
waitSecs = Integer.parseInt(waitTime);
186+
}
187+
}
188+
return waitForNamespace(address, pool, waitSecs);
189+
}
190+
191+
/**
192+
* Poll for the EUI-keyed udev symlink to show up. On every iteration also
193+
* nudge the kernel with {@code nvme ns-rescan} on every local NVMe
194+
* controller, to cover arrays / firmware combinations that do not emit a
195+
* reliable asynchronous event notification when a new namespace is
196+
* mapped.
197+
*/
198+
private boolean waitForNamespace(AddressInfo address, KVMStoragePool pool, long waitSecs) {
199+
if (waitSecs < 60) {
200+
waitSecs = 60;
201+
}
202+
long deadline = System.currentTimeMillis() + (waitSecs * 1000);
203+
File dev = new File(address.getPath());
204+
while (System.currentTimeMillis() < deadline) {
205+
if (dev.exists() && isConnected(address.getPath())) {
206+
long size = getPhysicalDiskSize(address.getPath());
207+
if (size > 0) {
208+
LOGGER.debug("Found NVMe namespace at " + address.getPath());
209+
return true;
210+
}
211+
}
212+
rescanAllControllers();
213+
try {
214+
Thread.sleep(POLL_INTERVAL_MS);
215+
} catch (InterruptedException ie) {
216+
Thread.currentThread().interrupt();
217+
return false;
218+
}
219+
}
220+
LOGGER.debug("NVMe namespace did not appear at " + address.getPath() + " within " + waitSecs + "s");
221+
return false;
222+
}
223+
224+
private void rescanAllControllers() {
225+
try {
226+
File sysClass = new File("/sys/class/nvme");
227+
File[] ctrls = sysClass.listFiles();
228+
if (ctrls == null) {
229+
return;
230+
}
231+
for (File ctrl : ctrls) {
232+
Process p = new ProcessBuilder("nvme", "ns-rescan", "/dev/" + ctrl.getName())
233+
.redirectErrorStream(true).start();
234+
p.waitFor(NS_RESCAN_TIMEOUT_SECS, TimeUnit.SECONDS);
235+
}
236+
} catch (Exception e) {
237+
LOGGER.debug("nvme ns-rescan attempt failed: " + e.getMessage());
238+
}
239+
}
240+
241+
@Override
242+
public boolean disconnectPhysicalDisk(String volumePath, KVMStoragePool pool) {
243+
// NVMe-oF: the kernel drops the namespace as soon as the target
244+
// removes the host(-group) connection. No host-side action needed.
245+
return true;
246+
}
247+
248+
@Override
249+
public boolean disconnectPhysicalDisk(Map<String, String> volumeToDisconnect) {
250+
return true;
251+
}
252+
253+
@Override
254+
public boolean disconnectPhysicalDiskByPath(String localPath) {
255+
// Same rationale as disconnectPhysicalDisk above. Only claim paths
256+
// that look like NVMe EUI symlinks so we don't swallow foreign paths.
257+
return localPath != null && localPath.startsWith("/dev/disk/by-id/nvme-eui.");
258+
}
259+
260+
@Override
261+
public boolean deletePhysicalDisk(String uuid, KVMStoragePool pool, Storage.ImageFormat format) {
262+
throw new UnsupportedOperationException("Deletion of NVMe namespaces is the storage provider's responsibility");
263+
}
264+
265+
@Override
266+
public KVMPhysicalDisk createPhysicalDisk(String name, KVMStoragePool pool, PhysicalDiskFormat format,
267+
Storage.ProvisioningType provisioningType, long size, byte[] passphrase) {
268+
throw new UnsupportedOperationException("Unimplemented method 'createPhysicalDisk'");
269+
}
270+
271+
@Override
272+
public KVMPhysicalDisk createTemplateFromDisk(KVMPhysicalDisk disk, String name, QemuImg.PhysicalDiskFormat format, long size, KVMStoragePool destPool) {
273+
throw new UnsupportedOperationException("Unimplemented method 'createTemplateFromDisk'");
274+
}
275+
276+
@Override
277+
public List<KVMPhysicalDisk> listPhysicalDisks(String storagePoolUuid, KVMStoragePool pool) {
278+
throw new UnsupportedOperationException("Unimplemented method 'listPhysicalDisks'");
279+
}
280+
281+
@Override
282+
public KVMPhysicalDisk copyPhysicalDisk(KVMPhysicalDisk disk, String name, KVMStoragePool destPool, int timeout) {
283+
throw new UnsupportedOperationException("Unimplemented method 'copyPhysicalDisk'");
284+
}
285+
286+
@Override
287+
public KVMPhysicalDisk copyPhysicalDisk(KVMPhysicalDisk disk, String name, KVMStoragePool destPool, int timeout, byte[] srcPassphrase, byte[] destPassphrase, Storage.ProvisioningType provisioningType) {
288+
throw new UnsupportedOperationException("Unimplemented method 'copyPhysicalDisk'");
289+
}
290+
291+
@Override
292+
public KVMPhysicalDisk createDiskFromTemplate(KVMPhysicalDisk template, String name, PhysicalDiskFormat format, Storage.ProvisioningType provisioningType, long size, KVMStoragePool destPool, int timeout, byte[] passphrase) {
293+
throw new UnsupportedOperationException("Unimplemented method 'createDiskFromTemplate'");
294+
}
295+
296+
@Override
297+
public KVMPhysicalDisk createDiskFromTemplateBacking(KVMPhysicalDisk template, String name, PhysicalDiskFormat format, long size, KVMStoragePool destPool, int timeout, byte[] passphrase) {
298+
throw new UnsupportedOperationException("Unimplemented method 'createDiskFromTemplateBacking'");
299+
}
300+
301+
@Override
302+
public KVMPhysicalDisk createTemplateFromDirectDownloadFile(String templateFilePath, String destTemplatePath, KVMStoragePool destPool, Storage.ImageFormat format, int timeout) {
303+
throw new UnsupportedOperationException("Unimplemented method 'createTemplateFromDirectDownloadFile'");
304+
}
305+
306+
@Override
307+
public boolean refresh(KVMStoragePool pool) {
308+
return true;
309+
}
310+
311+
@Override
312+
public boolean createFolder(String uuid, String path) {
313+
throw new UnsupportedOperationException("Unimplemented method 'createFolder'");
314+
}
315+
316+
@Override
317+
public boolean createFolder(String uuid, String path, String localPath) {
318+
throw new UnsupportedOperationException("Unimplemented method 'createFolder'");
319+
}
320+
321+
public void resize(String path, String vmName, long newSize) {
322+
throw new UnsupportedOperationException("Volume resize on NVMe-oF pools is driven by the storage provider, not the KVM adapter");
323+
}
324+
325+
boolean isConnected(String path) {
326+
Script test = new Script("/bin/test", LOGGER);
327+
test.add("-b", path);
328+
test.execute();
329+
return test.getExitValue() == 0;
330+
}
331+
332+
long getPhysicalDiskSize(String diskPath) {
333+
if (StringUtils.isEmpty(diskPath)) {
334+
return 0;
335+
}
336+
Script cmd = new Script("blockdev", LOGGER);
337+
cmd.add("--getsize64", diskPath);
338+
OutputInterpreter.OneLineParser parser = new OutputInterpreter.OneLineParser();
339+
String result = cmd.execute(parser);
340+
if (result != null) {
341+
LOGGER.debug("Unable to get the disk size at path: " + diskPath);
342+
return 0;
343+
}
344+
try {
345+
return Long.parseLong(parser.getLine());
346+
} catch (NumberFormatException e) {
347+
return 0;
348+
}
349+
}
350+
351+
private static String resolveHostnameShort() {
352+
try {
353+
String h = java.net.InetAddress.getLocalHost().getHostName();
354+
int dot = h.indexOf('.');
355+
return dot > 0 ? h.substring(0, dot) : h;
356+
} catch (Exception e) {
357+
return null;
358+
}
359+
}
360+
361+
private static String resolveHostnameFq() {
362+
try {
363+
return java.net.InetAddress.getLocalHost().getCanonicalHostName();
364+
} catch (Exception e) {
365+
return null;
366+
}
367+
}
368+
369+
/**
370+
* Same shape as {@link MultipathSCSIAdapterBase.AddressInfo}. Kept
371+
* separate so this class can be consumed by adapters that don't share the
372+
* SCSI base.
373+
*/
374+
public static final class AddressInfo {
375+
String type;
376+
String address;
377+
String connectionId;
378+
String path;
379+
380+
public AddressInfo(String type, String address, String connectionId, String path) {
381+
this.type = type;
382+
this.address = address;
383+
this.connectionId = connectionId;
384+
this.path = path;
385+
}
386+
387+
public String getType() { return type; }
388+
public String getAddress() { return address; }
389+
public String getConnectionId() { return connectionId; }
390+
public String getPath() { return path; }
391+
392+
public String toString() {
393+
return String.format("AddressInfo %s [address=%s, connectionId=%s, path=%s]", type, address, connectionId, path);
394+
}
395+
}
396+
}

0 commit comments

Comments
 (0)