Skip to content

Commit fc781ce

Browse files
kvaneeshsmb49
authored andcommitted
mm/vmemmap/devdax: fix kernel crash when probing devdax devices
BugLink: https://bugs.launchpad.net/bugs/2028979 commit 87a7ae7 upstream. commit 4917f55 ("mm/sparse-vmemmap: improve memory savings for compound devmaps") added support for using optimized vmmemap for devdax devices. But how vmemmap mappings are created are architecture specific. For example, powerpc with hash translation doesn't have vmemmap mappings in init_mm page table instead they are bolted table entries in the hardware page table vmemmap_populate_compound_pages() used by vmemmap optimization code is not aware of these architecture-specific mapping. Hence allow architecture to opt for this feature. I selected architectures supporting HUGETLB_PAGE_OPTIMIZE_VMEMMAP option as also supporting this feature. This patch fixes the below crash on ppc64. BUG: Unable to handle kernel data access on write at 0xc00c000100400038 Faulting instruction address: 0xc000000001269d90 Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries Modules linked in: CPU: 7 PID: 1 Comm: swapper/0 Not tainted 6.3.0-rc5-150500.34-default+ #2 5c90a668b6bbd142599890245c2fb5de19d7d28a Hardware name: IBM,9009-42G POWER9 (raw) 0x4e0202 0xf000005 of:IBM,FW950.40 (VL950_099) hv:phyp pSeries NIP: c000000001269d90 LR: c0000000004c57d4 CTR: 0000000000000000 REGS: c000000003632c30 TRAP: 0300 Not tainted (6.3.0-rc5-150500.34-default+) MSR: 8000000000009033 <SF,EE,ME,IR,DR,RI,LE> CR: 24842228 XER: 00000000 CFAR: c0000000004c57d0 DAR: c00c000100400038 DSISR: 42000000 IRQMASK: 0 .... NIP [c000000001269d90] __init_single_page.isra.74+0x14/0x4c LR [c0000000004c57d4] __init_zone_device_page+0x44/0xd0 Call Trace: [c000000003632ed0] [c000000003632f60] 0xc000000003632f60 (unreliable) [c000000003632f10] [c0000000004c5ca0] memmap_init_zone_device+0x170/0x250 [c000000003632fe0] [c0000000005575f8] memremap_pages+0x2c8/0x7f0 [c0000000036330c0] [c000000000557b5c] devm_memremap_pages+0x3c/0xa0 [c000000003633100] [c000000000d458a8] dev_dax_probe+0x108/0x3e0 [c0000000036331a0] [c000000000d41430] dax_bus_probe+0xb0/0x140 [c0000000036331d0] [c000000000cef27c] really_probe+0x19c/0x520 [c000000003633260] [c000000000cef6b4] __driver_probe_device+0xb4/0x230 [c0000000036332e0] [c000000000cef888] driver_probe_device+0x58/0x120 [c000000003633320] [c000000000cefa6c] __device_attach_driver+0x11c/0x1e0 [c0000000036333a0] [c000000000cebc58] bus_for_each_drv+0xa8/0x130 [c000000003633400] [c000000000ceefcc] __device_attach+0x15c/0x250 [c0000000036334a0] [c000000000ced458] bus_probe_device+0x108/0x110 [c0000000036334f0] [c000000000ce92dc] device_add+0x7fc/0xa10 [c0000000036335b0] [c000000000d447c8] devm_create_dev_dax+0x1d8/0x530 [c000000003633640] [c000000000d46b60] __dax_pmem_probe+0x200/0x270 [c0000000036337b0] [c000000000d46bf0] dax_pmem_probe+0x20/0x70 [c0000000036337d0] [c000000000d2279c] nvdimm_bus_probe+0xac/0x2b0 [c000000003633860] [c000000000cef27c] really_probe+0x19c/0x520 [c0000000036338f0] [c000000000cef6b4] __driver_probe_device+0xb4/0x230 [c000000003633970] [c000000000cef888] driver_probe_device+0x58/0x120 [c0000000036339b0] [c000000000cefd08] __driver_attach+0x1d8/0x240 [c000000003633a30] [c000000000cebb04] bus_for_each_dev+0xb4/0x130 [c000000003633a90] [c000000000cee564] driver_attach+0x34/0x50 [c000000003633ab0] [c000000000ced878] bus_add_driver+0x218/0x300 [c000000003633b40] [c000000000cf1144] driver_register+0xa4/0x1b0 [c000000003633bb0] [c000000000d21a0c] __nd_driver_register+0x5c/0x100 [c000000003633c10] [c00000000206a2e8] dax_pmem_init+0x34/0x48 [c000000003633c30] [c0000000000132d0] do_one_initcall+0x60/0x320 [c000000003633d00] [c0000000020051b0] kernel_init_freeable+0x360/0x400 [c000000003633de0] [c000000000013764] kernel_init+0x34/0x1d0 [c000000003633e50] [c00000000000de14] ret_from_kernel_thread+0x5c/0x64 Link: https://lkml.kernel.org/r/20230411142214.64464-1-aneesh.kumar@linux.ibm.com Fixes: 4917f55 ("mm/sparse-vmemmap: improve memory savings for compound devmaps") Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> Reported-by: Tarun Sahu <tsahu@linux.ibm.com> Reviewed-by: Joao Martins <joao.m.martins@oracle.com> Cc: Muchun Song <songmuchun@bytedance.com> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Mike Kravetz <mike.kravetz@oracle.com> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Piyush Sachdeva <piyushs@linux.ibm.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Signed-off-by: Kamal Mostafa <kamal@canonical.com> Signed-off-by: Stefan Bader <stefan.bader@canonical.com>
1 parent d724e3d commit fc781ce

3 files changed

Lines changed: 23 additions & 6 deletions

File tree

include/linux/mm.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3439,6 +3439,22 @@ void vmemmap_populate_print_last(void);
34393439
void vmemmap_free(unsigned long start, unsigned long end,
34403440
struct vmem_altmap *altmap);
34413441
#endif
3442+
3443+
#ifdef CONFIG_ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
3444+
static inline bool vmemmap_can_optimize(struct vmem_altmap *altmap,
3445+
struct dev_pagemap *pgmap)
3446+
{
3447+
return is_power_of_2(sizeof(struct page)) &&
3448+
pgmap && (pgmap_vmemmap_nr(pgmap) > 1) && !altmap;
3449+
}
3450+
#else
3451+
static inline bool vmemmap_can_optimize(struct vmem_altmap *altmap,
3452+
struct dev_pagemap *pgmap)
3453+
{
3454+
return false;
3455+
}
3456+
#endif
3457+
34423458
void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
34433459
unsigned long nr_pages);
34443460

mm/page_alloc.c

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6861,10 +6861,12 @@ static void __ref __init_zone_device_page(struct page *page, unsigned long pfn,
68616861
* of an altmap. See vmemmap_populate_compound_pages().
68626862
*/
68636863
static inline unsigned long compound_nr_pages(struct vmem_altmap *altmap,
6864-
unsigned long nr_pages)
6864+
struct dev_pagemap *pgmap)
68656865
{
6866-
return is_power_of_2(sizeof(struct page)) &&
6867-
!altmap ? 2 * (PAGE_SIZE / sizeof(struct page)) : nr_pages;
6866+
if (!vmemmap_can_optimize(altmap, pgmap))
6867+
return pgmap_vmemmap_nr(pgmap);
6868+
6869+
return 2 * (PAGE_SIZE / sizeof(struct page));
68686870
}
68696871

68706872
static void __ref memmap_init_compound(struct page *head,
@@ -6929,7 +6931,7 @@ void __ref memmap_init_zone_device(struct zone *zone,
69296931
continue;
69306932

69316933
memmap_init_compound(page, pfn, zone_idx, nid, pgmap,
6932-
compound_nr_pages(altmap, pfns_per_compound));
6934+
compound_nr_pages(altmap, pgmap));
69336935
}
69346936

69356937
pr_info("%s initialised %lu pages in %ums\n", __func__,

mm/sparse-vmemmap.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -458,8 +458,7 @@ struct page * __meminit __populate_section_memmap(unsigned long pfn,
458458
!IS_ALIGNED(nr_pages, PAGES_PER_SUBSECTION)))
459459
return NULL;
460460

461-
if (is_power_of_2(sizeof(struct page)) &&
462-
pgmap && pgmap_vmemmap_nr(pgmap) > 1 && !altmap)
461+
if (vmemmap_can_optimize(altmap, pgmap))
463462
r = vmemmap_populate_compound_pages(pfn, start, end, nid, pgmap);
464463
else
465464
r = vmemmap_populate(start, end, nid, altmap);

0 commit comments

Comments
 (0)