Skip to content

Commit 743331e

Browse files
committed
NFS: Patch NFS driver to support GDS with 6.2 Kernel
With this change, the NFS driver would be enabled to support GPUDirectStorage(GDS). The change is around frwr_map and frwr_unmap in the NFS driver, where the IO request is first intercepted to check for GDS pages and if it is a GDS page then the request is served by GDS driver component called nvidia-fs, else the request would be served by the standard NFS driver code.
1 parent 3d28f6c commit 743331e

5 files changed

Lines changed: 262 additions & 4 deletions

File tree

net/sunrpc/xprtrdma/Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
# SPDX-License-Identifier: GPL-2.0
2+
ccflags-y += -DCONFIG_NVFS
23
obj-$(CONFIG_SUNRPC_XPRT_RDMA) += rpcrdma.o
34

45
rpcrdma-y := transport.o rpc_rdma.o verbs.o frwr_ops.o \
56
svc_rdma.o svc_rdma_backchannel.o svc_rdma_transport.o \
67
svc_rdma_sendto.o svc_rdma_recvfrom.o svc_rdma_rw.o \
78
svc_rdma_pcl.o module.o
9+
rpcrdma-y += nvfs_rpc_rdma.o
810
rpcrdma-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel.o

net/sunrpc/xprtrdma/frwr_ops.c

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,11 @@
4444

4545
#include "xprt_rdma.h"
4646
#include <trace/events/rpcrdma.h>
47+
#ifdef CONFIG_NVFS
48+
#define NVFS_FRWR
49+
#include "nvfs.h"
50+
#include "nvfs_rpc_rdma.h"
51+
#endif
4752

4853
static void frwr_cid_init(struct rpcrdma_ep *ep,
4954
struct rpcrdma_mr *mr)
@@ -58,6 +63,13 @@ static void frwr_mr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr)
5863
{
5964
if (mr->mr_device) {
6065
trace_xprtrdma_mr_unmap(mr);
66+
#ifdef CONFIG_NVFS
67+
if (rpcrdma_nvfs_unmap_data(mr->mr_device->dma_device,
68+
mr->mr_sg, mr->mr_nents, mr->mr_dir))
69+
pr_debug("rpcrdma_nvfs_unmap_data device %s mr->mr_sg: %p , nents: %d\n",
70+
mr->mr_device->name, mr->mr_sg, mr->mr_nents);
71+
else
72+
#endif
6173
ib_dma_unmap_sg(mr->mr_device, mr->mr_sg, mr->mr_nents,
6274
mr->mr_dir);
6375
mr->mr_device = NULL;
@@ -286,6 +298,9 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
286298
int nsegs, bool writing, __be32 xid,
287299
struct rpcrdma_mr *mr)
288300
{
301+
#ifdef CONFIG_NVFS
302+
bool is_nvfs_io = false;
303+
#endif
289304
struct rpcrdma_ep *ep = r_xprt->rx_ep;
290305
struct ib_reg_wr *reg_wr;
291306
int i, n, dma_nents;
@@ -308,11 +323,23 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
308323
}
309324
mr->mr_dir = rpcrdma_data_dir(writing);
310325
mr->mr_nents = i;
311-
312-
dma_nents = ib_dma_map_sg(ep->re_id->device, mr->mr_sg, mr->mr_nents,
313-
mr->mr_dir);
314-
if (!dma_nents)
326+
#ifdef CONFIG_NVFS
327+
dma_nents = rpcrdma_nvfs_map_data(ep->re_id->device->dma_device,
328+
mr->mr_sg, i, mr->mr_dir,
329+
&is_nvfs_io);
330+
if (dma_nents == -EIO) {
315331
goto out_dmamap_err;
332+
} else if (is_nvfs_io) {
333+
pr_debug("rpcrdma_nvfs_map_data device %s mr->mr_sg: %p , nents: %d\n",
334+
ep->re_id->device->name, mr->mr_sg, mr->mr_nents);
335+
} else
336+
#endif
337+
{
338+
dma_nents = ib_dma_map_sg(ep->re_id->device, mr->mr_sg, mr->mr_nents,
339+
mr->mr_dir);
340+
if (!dma_nents)
341+
goto out_dmamap_err;
342+
}
316343
mr->mr_device = ep->re_id->device;
317344

318345
ibmr = mr->mr_ibmr;

net/sunrpc/xprtrdma/nvfs.h

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
/*
3+
* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4+
*
5+
* This program is free software; you can redistribute it and/or modify it
6+
* under the terms and conditions of the GNU General Public License,
7+
* version 2, as published by the Free Software Foundation.
8+
*
9+
* This program is distributed in the hope it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12+
* more details.
13+
*/
14+
15+
#ifndef NVFS_H
16+
#define NVFS_H
17+
18+
#include <linux/types.h>
19+
#include <linux/delay.h>
20+
#include <linux/blkdev.h>
21+
#include <linux/cpumask.h>
22+
#include <linux/scatterlist.h>
23+
#include <linux/percpu-defs.h>
24+
#include <linux/dma-direction.h>
25+
26+
#define REGSTR2(x) x##_register_nvfs_dma_ops
27+
#define REGSTR(x) REGSTR2(x)
28+
29+
#define UNREGSTR2(x) x##_unregister_nvfs_dma_ops
30+
#define UNREGSTR(x) UNREGSTR2(x)
31+
32+
#define REGISTER_FUNC REGSTR(MODULE_PREFIX)
33+
#define UNREGISTER_FUNC UNREGSTR(MODULE_PREFIX)
34+
35+
#define NVFS_IO_ERR -1
36+
#define NVFS_CPU_REQ -2
37+
38+
#define NVFS_HOLD_TIME_MS 1000
39+
40+
extern struct nvfs_dma_rw_ops *nvfs_ops;
41+
42+
extern atomic_t nvfs_shutdown;
43+
44+
DECLARE_PER_CPU(long, nvfs_n_ops);
45+
46+
static inline long nvfs_count_ops(void)
47+
{
48+
int i;
49+
long sum = 0;
50+
51+
for_each_possible_cpu(i)
52+
sum += per_cpu(nvfs_n_ops, i);
53+
return sum;
54+
}
55+
56+
static inline bool nvfs_get_ops(void)
57+
{
58+
if (nvfs_ops && !atomic_read(&nvfs_shutdown)) {
59+
this_cpu_inc(nvfs_n_ops);
60+
return true;
61+
}
62+
return false;
63+
}
64+
65+
static inline void nvfs_put_ops(void)
66+
{
67+
this_cpu_dec(nvfs_n_ops);
68+
}
69+
70+
struct nvfs_dma_rw_ops {
71+
unsigned long long ft_bmap; // feature bitmap
72+
73+
int (*nvfs_blk_rq_map_sg)(struct request_queue *q,
74+
struct request *req,
75+
struct scatterlist *sglist);
76+
77+
int (*nvfs_dma_map_sg_attrs)(struct device *device,
78+
struct scatterlist *sglist,
79+
int nents,
80+
enum dma_data_direction dma_dir,
81+
unsigned long attrs);
82+
83+
int (*nvfs_dma_unmap_sg)(struct device *device,
84+
struct scatterlist *sglist,
85+
int nents,
86+
enum dma_data_direction dma_dir);
87+
88+
bool (*nvfs_is_gpu_page)(struct page *page);
89+
90+
unsigned int (*nvfs_gpu_index)(struct page *page);
91+
92+
unsigned int (*nvfs_device_priority)(struct device *dev, unsigned int gpu_index);
93+
};
94+
95+
// feature list for dma_ops, values indicate bit pos
96+
enum ft_bits {
97+
nvfs_ft_prep_sglist = 1ULL << 0,
98+
nvfs_ft_map_sglist = 1ULL << 1,
99+
nvfs_ft_is_gpu_page = 1ULL << 2,
100+
nvfs_ft_device_priority = 1ULL << 3,
101+
};
102+
103+
// check features for use in registration with vendor drivers
104+
#define NVIDIA_FS_CHECK_FT_SGLIST_PREP(ops) ((ops)->ft_bmap & nvfs_ft_prep_sglist)
105+
#define NVIDIA_FS_CHECK_FT_SGLIST_DMA(ops) ((ops)->ft_bmap & nvfs_ft_map_sglist)
106+
#define NVIDIA_FS_CHECK_FT_GPU_PAGE(ops) ((ops)->ft_bmap & nvfs_ft_is_gpu_page)
107+
#define NVIDIA_FS_CHECK_FT_DEVICE_PRIORITY(ops) ((ops)->ft_bmap & nvfs_ft_device_priority)
108+
109+
int REGISTER_FUNC(struct nvfs_dma_rw_ops *ops);
110+
111+
void UNREGISTER_FUNC(void);
112+
113+
#endif /* NVFS_H */
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
/*
3+
* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4+
*
5+
* This program is free software; you can redistribute it and/or modify it
6+
* under the terms and conditions of the GNU General Public License,
7+
* version 2, as published by the Free Software Foundation.
8+
*
9+
* This program is distributed in the hope it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12+
* more details.
13+
*/
14+
15+
#ifdef CONFIG_NVFS
16+
#define MODULE_PREFIX rpcrdma
17+
#include "nvfs.h"
18+
19+
struct nvfs_dma_rw_ops *nvfs_ops;
20+
21+
atomic_t nvfs_shutdown = ATOMIC_INIT(1);
22+
23+
DEFINE_PER_CPU(long, nvfs_n_ops);
24+
25+
// must have for compatibility
26+
#define NVIDIA_FS_COMPAT_FT(ops) \
27+
((NVIDIA_FS_CHECK_FT_SGLIST_PREP(ops)) && (NVIDIA_FS_CHECK_FT_SGLIST_DMA(ops)))
28+
29+
// protected via nvfs_module_mutex
30+
int REGISTER_FUNC(struct nvfs_dma_rw_ops *ops)
31+
{
32+
if (NVIDIA_FS_COMPAT_FT(ops)) {
33+
nvfs_ops = ops;
34+
atomic_set(&nvfs_shutdown, 0);
35+
return 0;
36+
}
37+
return -EOPNOTSUPP;
38+
}
39+
EXPORT_SYMBOL(REGISTER_FUNC);
40+
41+
// protected via nvfs_module_mutex
42+
void UNREGISTER_FUNC(void)
43+
{
44+
(void)atomic_cmpxchg(&nvfs_shutdown, 0, 1);
45+
do {
46+
msleep(NVFS_HOLD_TIME_MS);
47+
} while (nvfs_count_ops());
48+
nvfs_ops = NULL;
49+
}
50+
EXPORT_SYMBOL(UNREGISTER_FUNC);
51+
#endif
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
/*
3+
* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4+
*
5+
* This program is free software; you can redistribute it and/or modify it
6+
* under the terms and conditions of the GNU General Public License,
7+
* version 2, as published by the Free Software Foundation.
8+
*
9+
* This program is distributed in the hope it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12+
* more details.
13+
*/
14+
15+
#ifndef NVFS_RPCRDMA_H
16+
#define NVFS_RPCRDMA_H
17+
18+
#ifdef NVFS_FRWR
19+
static int rpcrdma_nvfs_map_data(struct device *dev, struct scatterlist *sg,
20+
int nents, enum dma_data_direction dma_dir,
21+
bool *is_nvfs_io)
22+
{
23+
int count;
24+
25+
*is_nvfs_io = false;
26+
count = 0;
27+
if (nvfs_get_ops()) {
28+
count = nvfs_ops->nvfs_dma_map_sg_attrs(dev,
29+
sg,
30+
nents,
31+
dma_dir,
32+
DMA_ATTR_NO_WARN);
33+
34+
if (unlikely(count == NVFS_IO_ERR)) {
35+
nvfs_put_ops();
36+
return -EIO;
37+
}
38+
39+
if (unlikely(count == NVFS_CPU_REQ)) {
40+
nvfs_put_ops();
41+
return 0;
42+
}
43+
*is_nvfs_io = true;
44+
}
45+
return count;
46+
}
47+
#endif
48+
49+
static bool rpcrdma_nvfs_unmap_data(struct device *dev, struct scatterlist *sg,
50+
int nents, enum dma_data_direction dma_dir)
51+
{
52+
int count;
53+
54+
if (nvfs_ops != NULL) {
55+
count = nvfs_ops->nvfs_dma_unmap_sg(dev, sg, nents,
56+
dma_dir);
57+
if (count > 0) {
58+
nvfs_put_ops();
59+
return true;
60+
}
61+
}
62+
return false;
63+
}
64+
65+
#endif /* NVFS_RPCRDMA_H */

0 commit comments

Comments
 (0)