Skip to content

Commit 4b8d49a

Browse files
SriMNvidiakobak2026
authored andcommitted
NVIDIA: VR: SAUCE: cxl: Add memory offlining and cache flush helpers
BugLink: https://bugs.launchpad.net/bugs/2143032 Add infrastructure for quiescing the CXL data path before reset: - Memory offlining: check if CXL-backed memory is online and offline it via offline_and_remove_memory() before reset, per CXL spec requirement to quiesce all CXL.mem transactions before issuing CXL Reset. - CPU cache flush: invalidate cache lines before reset as a safety measure after memory offline. Signed-off-by: Srirangan Madhavan <smadhavan@nvidia.com> (cherry picked from https://lore.kernel.org/linux-cxl/20260306092322.148765-1-smadhavan@nvidia.com/) Signed-off-by: Jiandi An <jan@nvidia.com> Acked-by: Jamie Nguyen <jamien@nvidia.com> Acked-by: Nirmoy Das <nirmoyd@nvidia.com> Acked-by: Carol L Soto <csoto@nvidia.com> Acked-by: Matthew R. Ochs <mochs@nvidia.com> Signed-off-by: Brad Figg <bfigg@nvidia.com> (backported from commit 98bfbf9 nv-kernels/24.04_linux-nvidia-6.17-next) [koba: Use a real System RAM walker callback so resource walks never invoke a NULL function pointer.] Signed-off-by: Koba Ko <kobak@nvidia.com>
1 parent 0dd6ce1 commit 4b8d49a

1 file changed

Lines changed: 120 additions & 0 deletions

File tree

drivers/cxl/core/pci.c

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
#include <linux/io-64-nonatomic-lo-hi.h>
55
#include <linux/device.h>
66
#include <linux/delay.h>
7+
#include <linux/memory_hotplug.h>
8+
#include <linux/memregion.h>
79
#include <linux/pci.h>
810
#include <linux/pci-doe.h>
911
#include <cxl/pci.h>
@@ -927,3 +929,121 @@ int cxl_port_get_possible_dports(struct cxl_port *port)
927929

928930
return ctx.count;
929931
}
932+
933+
/*
934+
* CXL Reset support - core-provided reset logic for CXL devices.
935+
*
936+
* These functions implement the CXL reset sequence.
937+
*/
938+
939+
/*
940+
* If CXL memory backed by this decoder is online as System RAM, offline
941+
* and remove it per CXL spec requirements before issuing CXL Reset.
942+
* Returns 0 if memory was not online or was successfully offlined.
943+
*/
944+
static int cxl_is_system_ram(struct resource *res, void *arg)
945+
{
946+
return 1;
947+
}
948+
949+
static int __maybe_unused cxl_offline_memory(struct device *dev, void *data)
950+
{
951+
struct cxl_endpoint_decoder *cxled;
952+
struct cxl_region *cxlr;
953+
struct cxl_region_params *p;
954+
int rc;
955+
956+
if (!is_endpoint_decoder(dev))
957+
return 0;
958+
959+
cxled = to_cxl_endpoint_decoder(dev);
960+
guard(rwsem_read)(&cxl_rwsem.region);
961+
962+
cxlr = cxled->cxld.region;
963+
if (!cxlr)
964+
return 0;
965+
966+
p = &cxlr->params;
967+
if (!p->res)
968+
return 0;
969+
970+
if (walk_iomem_res_desc(IORES_DESC_NONE,
971+
IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY,
972+
p->res->start, p->res->end, NULL,
973+
cxl_is_system_ram) <= 0)
974+
return 0;
975+
976+
dev_info(dev, "Offlining CXL memory [%pr] for reset\n", p->res);
977+
978+
#ifdef CONFIG_MEMORY_HOTREMOVE
979+
rc = offline_and_remove_memory(p->res->start, resource_size(p->res));
980+
if (rc) {
981+
dev_err(dev,
982+
"Failed to offline CXL memory [%pr]: %d\n",
983+
p->res, rc);
984+
return rc;
985+
}
986+
#else
987+
dev_err(dev, "Memory hotremove not supported, cannot offline CXL memory\n");
988+
rc = -EOPNOTSUPP;
989+
return rc;
990+
#endif
991+
992+
return 0;
993+
}
994+
995+
static int __maybe_unused cxl_reset_prepare_memdev(struct cxl_memdev *cxlmd)
996+
{
997+
struct cxl_port *endpoint;
998+
struct device *dev;
999+
1000+
if (!cxlmd || !cxlmd->cxlds)
1001+
return -ENODEV;
1002+
1003+
dev = cxlmd->cxlds->dev;
1004+
endpoint = cxlmd->endpoint;
1005+
if (!endpoint)
1006+
return 0;
1007+
1008+
return device_for_each_child(&endpoint->dev, NULL,
1009+
cxl_offline_memory);
1010+
}
1011+
1012+
static int __maybe_unused cxl_decoder_flush_cache(struct device *dev, void *data)
1013+
{
1014+
struct cxl_endpoint_decoder *cxled;
1015+
struct cxl_region *cxlr;
1016+
struct resource *res;
1017+
1018+
if (!is_endpoint_decoder(dev))
1019+
return 0;
1020+
1021+
cxled = to_cxl_endpoint_decoder(dev);
1022+
guard(rwsem_read)(&cxl_rwsem.region);
1023+
1024+
cxlr = cxled->cxld.region;
1025+
if (!cxlr || !cxlr->params.res)
1026+
return 0;
1027+
1028+
res = cxlr->params.res;
1029+
cpu_cache_invalidate_memregion(res->start, resource_size(res));
1030+
return 0;
1031+
}
1032+
1033+
static int __maybe_unused cxl_reset_flush_cpu_caches(struct cxl_memdev *cxlmd)
1034+
{
1035+
struct cxl_port *endpoint;
1036+
1037+
if (!cxlmd)
1038+
return 0;
1039+
1040+
endpoint = cxlmd->endpoint;
1041+
if (!endpoint || IS_ERR(endpoint))
1042+
return 0;
1043+
1044+
if (!cpu_cache_has_invalidate_memregion())
1045+
return 0;
1046+
1047+
device_for_each_child(&endpoint->dev, NULL, cxl_decoder_flush_cache);
1048+
return 0;
1049+
}

0 commit comments

Comments
 (0)