Skip to content

Commit 21af88d

Browse files
committed
NVIDIA: SAUCE: Patch NVMe/NVMeoF driver to support GDS on Linux 6.17 Kernel
BugLink: https://bugs.launchpad.net/bugs/2134960 With this change, the NVMe and NVMeoF driver would be enabled to support GPUDirectStorage(GDS). NVMe driver introduced a way to use the blk_rq_dma_map API to DMA map requests instead of scatter gather lists. With these changes, GDS path also adopts a similar framework where we introduce blk based APIs(nvfs_blk_rq_dma_map_iter_start and nvfs_blk_rq_dma_map_iter_next) to map a DMA request. The NVMeoF path remains the same as previous releases. Signed-off-by: Sourab Gupta <sougupta@nvidia.com> Reviewed-by: Kiran Modukuri <kmodukuri@nvidia.com>
1 parent 505ecd9 commit 21af88d

8 files changed

Lines changed: 666 additions & 7 deletions

File tree

drivers/nvme/host/Makefile

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# SPDX-License-Identifier: GPL-2.0
22

33
ccflags-y += -I$(src)
4-
4+
ccflags-y += -DCONFIG_NVFS
55
obj-$(CONFIG_NVME_CORE) += nvme-core.o
66
obj-$(CONFIG_BLK_DEV_NVME) += nvme.o
77
obj-$(CONFIG_NVME_FABRICS) += nvme-fabrics.o
@@ -20,10 +20,11 @@ nvme-core-$(CONFIG_NVME_HWMON) += hwmon.o
2020
nvme-core-$(CONFIG_NVME_HOST_AUTH) += auth.o
2121

2222
nvme-y += pci.o
23-
23+
nvme-y += nvfs-dma.o
2424
nvme-fabrics-y += fabrics.o
2525

2626
nvme-rdma-y += rdma.o
27+
nvme-rdma-y += nvfs-rdma.o
2728

2829
nvme-fc-y += fc.o
2930

drivers/nvme/host/nvfs-dma.c

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
/*
3+
* Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4+
*
5+
* This program is free software; you can redistribute it and/or modify it
6+
* under the terms and conditions of the GNU General Public License,
7+
* version 2, as published by the Free Software Foundation.
8+
*
9+
* This program is distributed in the hope it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12+
* more details.
13+
*/
14+
#ifdef CONFIG_NVFS
15+
#define NVFS_USE_DMA_ITER_API
16+
#define MODULE_PREFIX nvme_v2
17+
#include "nvfs.h"
18+
19+
struct nvfs_dma_rw_blk_iter_ops *nvfs_ops = NULL;
20+
21+
atomic_t nvfs_shutdown = ATOMIC_INIT(1);
22+
23+
DEFINE_PER_CPU(long, nvfs_n_ops);
24+
25+
#define NVIDIA_FS_COMPAT_FT(ops) \
26+
(NVIDIA_FS_CHECK_FT_BLK_DMA_MAP_ITER_START(ops) && NVIDIA_FS_CHECK_FT_BLK_DMA_MAP_ITER_NEXT(ops))
27+
28+
// protected via nvfs_module_mutex
29+
int REGISTER_FUNC(struct nvfs_dma_rw_blk_iter_ops *ops)
30+
{
31+
if (NVIDIA_FS_COMPAT_FT(ops)) {
32+
nvfs_ops = ops;
33+
atomic_set(&nvfs_shutdown, 0);
34+
return 0;
35+
} else
36+
return -EOPNOTSUPP;
37+
38+
}
39+
EXPORT_SYMBOL_GPL(REGISTER_FUNC);
40+
41+
// protected via nvfs_module_mutex
42+
void UNREGISTER_FUNC(void)
43+
{
44+
(void) atomic_cmpxchg(&nvfs_shutdown, 0, 1);
45+
do {
46+
msleep(NVFS_HOLD_TIME_MS);
47+
} while(nvfs_count_ops());
48+
nvfs_ops = NULL;
49+
}
50+
EXPORT_SYMBOL_GPL(UNREGISTER_FUNC);
51+
#endif

drivers/nvme/host/nvfs-dma.h

Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
/*
3+
* Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4+
*
5+
* This program is free software; you can redistribute it and/or modify it
6+
* under the terms and conditions of the GNU General Public License,
7+
* version 2, as published by the Free Software Foundation.
8+
*
9+
* This program is distributed in the hope it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12+
* more details.
13+
*/
14+
15+
#ifndef NVFS_DMA_H
16+
#define NVFS_DMA_H
17+
18+
/* Forward declarations for functions from pci.c that we need */
19+
static blk_status_t nvme_pci_setup_data_prp(struct request *req,
20+
struct blk_dma_iter *iter);
21+
static blk_status_t nvme_pci_setup_data_sgl(struct request *req,
22+
struct blk_dma_iter *iter);
23+
static inline struct dma_pool *nvme_dma_pool(struct nvme_queue *nvmeq,
24+
struct nvme_iod *iod);
25+
static inline dma_addr_t nvme_pci_first_desc_dma_addr(struct nvme_command *cmd);
26+
27+
static inline bool nvme_nvfs_unmap_sgls(struct request *req)
28+
{
29+
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
30+
struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
31+
struct device *dma_dev = nvmeq->dev->dev;
32+
dma_addr_t sqe_dma_addr = le64_to_cpu(iod->cmd.common.dptr.sgl.addr);
33+
unsigned int sqe_dma_len = le32_to_cpu(iod->cmd.common.dptr.sgl.length);
34+
struct nvme_sgl_desc *sg_list = iod->descriptors[0];
35+
enum dma_data_direction dir = rq_dma_dir(req);
36+
37+
if (iod->nr_descriptors) {
38+
unsigned int nr_entries = sqe_dma_len / sizeof(*sg_list), i;
39+
40+
for (i = 0; i < nr_entries; i++) {
41+
nvfs_ops->nvfs_dma_unmap_page(dma_dev,
42+
iod->nvfs_cookie,
43+
le64_to_cpu(sg_list[i].addr),
44+
le32_to_cpu(sg_list[i].length),
45+
dir);
46+
}
47+
} else
48+
nvfs_ops->nvfs_dma_unmap_page(dma_dev, iod->nvfs_cookie, sqe_dma_addr, sqe_dma_len, dir);
49+
50+
51+
52+
return true;
53+
}
54+
55+
static inline bool nvme_nvfs_unmap_prps(struct request *req)
56+
{
57+
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
58+
struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
59+
struct device *dma_dev = nvmeq->dev->dev;
60+
enum dma_data_direction dma_dir = rq_dma_dir(req);
61+
unsigned int i;
62+
63+
/* Check if dma_vecs was allocated - if setup failed early, it might be NULL */
64+
if (!iod->dma_vecs)
65+
return true;
66+
67+
/* Unmap all DMA vectors - pass page pointer from dma_vecs */
68+
for (i = 0; i < iod->nr_dma_vecs; i++) {
69+
nvfs_ops->nvfs_dma_unmap_page(dma_dev,
70+
iod->nvfs_cookie,
71+
iod->dma_vecs[i].addr,
72+
iod->dma_vecs[i].len,
73+
dma_dir);
74+
}
75+
76+
/* Free the dma_vecs mempool allocation */
77+
mempool_free(iod->dma_vecs, nvmeq->dev->dmavec_mempool);
78+
iod->dma_vecs = NULL;
79+
iod->nr_dma_vecs = 0;
80+
81+
return true;
82+
}
83+
84+
static inline void nvme_nvfs_free_descriptors(struct request *req)
85+
{
86+
struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
87+
const int last_prp = NVME_CTRL_PAGE_SIZE / sizeof(__le64) - 1;
88+
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
89+
dma_addr_t dma_addr = nvme_pci_first_desc_dma_addr(&iod->cmd);
90+
int i;
91+
92+
if (iod->nr_descriptors == 1) {
93+
dma_pool_free(nvme_dma_pool(nvmeq, iod), iod->descriptors[0],
94+
dma_addr);
95+
return;
96+
}
97+
98+
for (i = 0; i < iod->nr_descriptors; i++) {
99+
__le64 *prp_list = iod->descriptors[i];
100+
dma_addr_t next_dma_addr = le64_to_cpu(prp_list[last_prp]);
101+
102+
dma_pool_free(nvmeq->descriptor_pools.large, prp_list,
103+
dma_addr);
104+
dma_addr = next_dma_addr;
105+
}
106+
}
107+
108+
static inline bool nvme_nvfs_unmap_data(struct request *req)
109+
{
110+
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
111+
bool ret;
112+
113+
/* Check if this was an NVFS I/O by checking the IOD_NVFS_IO flag */
114+
if (!(iod->flags & IOD_NVFS_IO))
115+
return false;
116+
117+
/* Clear the NVFS flag */
118+
iod->flags &= ~IOD_NVFS_IO;
119+
120+
/* Call appropriate unmap function based on command type */
121+
if (nvme_pci_cmd_use_sgl(&iod->cmd))
122+
ret = nvme_nvfs_unmap_sgls(req);
123+
else
124+
ret = nvme_nvfs_unmap_prps(req);
125+
126+
if (iod->nr_descriptors)
127+
nvme_nvfs_free_descriptors(req);
128+
129+
nvfs_put_ops();
130+
return ret;
131+
}
132+
133+
static inline blk_status_t nvme_nvfs_map_data(struct request *req,
134+
bool *is_nvfs_io)
135+
{
136+
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
137+
struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
138+
struct nvme_dev *dev = nvmeq->dev;
139+
struct device *dma_dev = nvmeq->dev->dev;
140+
enum nvme_use_sgl use_sgl = nvme_pci_use_sgls(dev, req);
141+
struct blk_dma_iter iter;
142+
blk_status_t ret = BLK_STS_RESOURCE;
143+
144+
*is_nvfs_io = false;
145+
146+
/* Check integrity and try to get nvfs_ops */
147+
if (blk_integrity_rq(req) || !nvfs_get_ops()) {
148+
return ret;
149+
}
150+
151+
/* Initialize total_len for this request */
152+
iod->total_len = 0;
153+
154+
if (!nvfs_ops->nvfs_blk_rq_dma_map_iter_start(req, dma_dev,
155+
&iod->dma_state, &iter, &iod->nvfs_cookie)) {
156+
nvfs_put_ops();
157+
if (iter.status == BLK_STS_IOERR) {
158+
/* GPU DMA error — do not fall through to CPU path */
159+
*is_nvfs_io = true;
160+
ret = iter.status;
161+
}
162+
/* else: CPU page, let caller fall through to CPU path */
163+
return ret;
164+
}
165+
166+
/* NVFS can handle this request, set the flag */
167+
*is_nvfs_io = true;
168+
iod->flags |= IOD_NVFS_IO;
169+
170+
if (use_sgl == SGL_FORCED ||
171+
(use_sgl == SGL_SUPPORTED &&
172+
(sgl_threshold && nvme_pci_avg_seg_size(req) >= sgl_threshold)))
173+
ret = nvme_pci_setup_data_sgl(req, &iter);
174+
else
175+
ret = nvme_pci_setup_data_prp(req, &iter);
176+
177+
/* If setup failed, cleanup: unmap DMA, clear flag, release ops */
178+
if (ret != BLK_STS_OK) {
179+
nvme_nvfs_unmap_data(req);
180+
}
181+
182+
return ret;
183+
}
184+
185+
#endif /* NVFS_DMA_H */

drivers/nvme/host/nvfs-rdma.c

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
/*
3+
* Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4+
*
5+
* This program is free software; you can redistribute it and/or modify it
6+
* under the terms and conditions of the GNU General Public License,
7+
* version 2, as published by the Free Software Foundation.
8+
*
9+
* This program is distributed in the hope it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12+
* more details.
13+
*/
14+
15+
#ifdef CONFIG_NVFS
16+
#define MODULE_PREFIX nvme_rdma_v1
17+
#include "nvfs.h"
18+
19+
struct nvfs_dma_rw_ops *nvfs_ops;
20+
21+
atomic_t nvfs_shutdown = ATOMIC_INIT(1);
22+
23+
DEFINE_PER_CPU(long, nvfs_n_ops);
24+
25+
// must have for compatability
26+
#define NVIDIA_FS_COMPAT_FT(ops) \
27+
(NVIDIA_FS_CHECK_FT_SGLIST_PREP(ops) && NVIDIA_FS_CHECK_FT_SGLIST_DMA(ops))
28+
29+
// protected via nvfs_module_mutex
30+
int REGISTER_FUNC(struct nvfs_dma_rw_ops *ops)
31+
{
32+
if (NVIDIA_FS_COMPAT_FT(ops)) {
33+
nvfs_ops = ops;
34+
atomic_set(&nvfs_shutdown, 0);
35+
return 0;
36+
} else
37+
return -EOPNOTSUPP;
38+
39+
}
40+
EXPORT_SYMBOL_GPL(REGISTER_FUNC);
41+
42+
// protected via nvfs_module_mutex
43+
void UNREGISTER_FUNC(void)
44+
{
45+
(void) atomic_cmpxchg(&nvfs_shutdown, 0, 1);
46+
do {
47+
msleep(NVFS_HOLD_TIME_MS);
48+
} while(nvfs_count_ops());
49+
nvfs_ops = NULL;
50+
}
51+
EXPORT_SYMBOL_GPL(UNREGISTER_FUNC);
52+
#endif

0 commit comments

Comments
 (0)