Skip to content

Commit 17c9590

Browse files
yhuang-intelopsiff
authored andcommitted
mm and cache_info: remove unnecessary CPU cache info update
[ Upstream commit 5cec4eb ] Conflict: none For each CPU hotplug event, we will update per-CPU data slice size and corresponding PCP configuration for every online CPU to make the implementation simple. But, Kyle reported that this takes tens seconds during boot on a machine with 34 zones and 3840 CPUs. So, in this patch, for each CPU hotplug event, we only update per-CPU data slice size and corresponding PCP configuration for the CPUs that share caches with the hotplugged CPU. With the patch, the system boot time reduces 67 seconds on the machine. Intel-SIG: commit 5cec4eb and cache_info: remove unnecessary CPU cache info update. Backport Auto-tune per-CPU pageset size. Link: https://lkml.kernel.org/r/20240126081944.414520-1-ying.huang@intel.com Fixes: 362d37a ("mm, pcp: reduce lock contention for draining high-order pages") Signed-off-by: "Huang, Ying" <ying.huang@intel.com> Originally-by: Kyle Meyer <kyle.meyer@hpe.com> Reported-and-tested-by: Kyle Meyer <kyle.meyer@hpe.com> Cc: Sudeep Holla <sudeep.holla@arm.com> Cc: Mel Gorman <mgorman@techsingularity.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> [ Aubrey Li: amend commit log ] Signed-off-by: Aubrey Li <aubrey.li@linux.intel.com>
1 parent d8dc792 commit 17c9590

3 files changed

Lines changed: 63 additions & 28 deletions

File tree

drivers/base/cacheinfo.c

Lines changed: 44 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -900,6 +900,37 @@ static int cache_add_dev(unsigned int cpu)
900900
return rc;
901901
}
902902

903+
static unsigned int cpu_map_shared_cache(bool online, unsigned int cpu,
904+
cpumask_t **map)
905+
{
906+
struct cacheinfo *llc, *sib_llc;
907+
unsigned int sibling;
908+
909+
if (!last_level_cache_is_valid(cpu))
910+
return 0;
911+
912+
llc = per_cpu_cacheinfo_idx(cpu, cache_leaves(cpu) - 1);
913+
914+
if (llc->type != CACHE_TYPE_DATA && llc->type != CACHE_TYPE_UNIFIED)
915+
return 0;
916+
917+
if (online) {
918+
*map = &llc->shared_cpu_map;
919+
return cpumask_weight(*map);
920+
}
921+
922+
/* shared_cpu_map of offlined CPU will be cleared, so use sibling map */
923+
for_each_cpu(sibling, &llc->shared_cpu_map) {
924+
if (sibling == cpu || !last_level_cache_is_valid(sibling))
925+
continue;
926+
sib_llc = per_cpu_cacheinfo_idx(sibling, cache_leaves(sibling) - 1);
927+
*map = &sib_llc->shared_cpu_map;
928+
return cpumask_weight(*map);
929+
}
930+
931+
return 0;
932+
}
933+
903934
/*
904935
* Calculate the size of the per-CPU data cache slice. This can be
905936
* used to estimate the size of the data cache slice that can be used
@@ -931,28 +962,31 @@ static void update_per_cpu_data_slice_size_cpu(unsigned int cpu)
931962
ci->per_cpu_data_slice_size = llc->size / nr_shared;
932963
}
933964

934-
static void update_per_cpu_data_slice_size(bool cpu_online, unsigned int cpu)
965+
static void update_per_cpu_data_slice_size(bool cpu_online, unsigned int cpu,
966+
cpumask_t *cpu_map)
935967
{
936968
unsigned int icpu;
937969

938-
for_each_online_cpu(icpu) {
970+
for_each_cpu(icpu, cpu_map) {
939971
if (!cpu_online && icpu == cpu)
940972
continue;
941973
update_per_cpu_data_slice_size_cpu(icpu);
974+
setup_pcp_cacheinfo(icpu);
942975
}
943976
}
944977

945978
static int cacheinfo_cpu_online(unsigned int cpu)
946979
{
947980
int rc = detect_cache_attributes(cpu);
981+
cpumask_t *cpu_map;
948982

949983
if (rc)
950984
return rc;
951985
rc = cache_add_dev(cpu);
952986
if (rc)
953987
goto err;
954-
update_per_cpu_data_slice_size(true, cpu);
955-
setup_pcp_cacheinfo();
988+
if (cpu_map_shared_cache(true, cpu, &cpu_map))
989+
update_per_cpu_data_slice_size(true, cpu, cpu_map);
956990
return 0;
957991
err:
958992
free_cache_attributes(cpu);
@@ -961,12 +995,16 @@ static int cacheinfo_cpu_online(unsigned int cpu)
961995

962996
static int cacheinfo_cpu_pre_down(unsigned int cpu)
963997
{
998+
cpumask_t *cpu_map;
999+
unsigned int nr_shared;
1000+
1001+
nr_shared = cpu_map_shared_cache(false, cpu, &cpu_map);
9641002
if (cpumask_test_and_clear_cpu(cpu, &cache_dev_map))
9651003
cpu_cache_sysfs_exit(cpu);
9661004

9671005
free_cache_attributes(cpu);
968-
update_per_cpu_data_slice_size(false, cpu);
969-
setup_pcp_cacheinfo();
1006+
if (nr_shared > 1)
1007+
update_per_cpu_data_slice_size(false, cpu, cpu_map);
9701008
return 0;
9711009
}
9721010

include/linux/gfp.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -326,7 +326,7 @@ void drain_all_pages(struct zone *zone);
326326
void drain_local_pages(struct zone *zone);
327327

328328
void page_alloc_init_late(void);
329-
void setup_pcp_cacheinfo(void);
329+
void setup_pcp_cacheinfo(unsigned int cpu);
330330

331331
/*
332332
* gfp_allowed_mask is set to GFP_BOOT_MASK during early boot to restrict what

mm/page_alloc.c

Lines changed: 18 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -5594,37 +5594,34 @@ static void zone_pcp_update(struct zone *zone, int cpu_online)
55945594
mutex_unlock(&pcp_batch_high_lock);
55955595
}
55965596

5597-
static void zone_pcp_update_cacheinfo(struct zone *zone)
5597+
static void zone_pcp_update_cacheinfo(struct zone *zone, unsigned int cpu)
55985598
{
5599-
int cpu;
56005599
struct per_cpu_pages *pcp;
56015600
struct cpu_cacheinfo *cci;
56025601

5603-
for_each_online_cpu(cpu) {
5604-
pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);
5605-
cci = get_cpu_cacheinfo(cpu);
5606-
/*
5607-
* If data cache slice of CPU is large enough, "pcp->batch"
5608-
* pages can be preserved in PCP before draining PCP for
5609-
* consecutive high-order pages freeing without allocation.
5610-
* This can reduce zone lock contention without hurting
5611-
* cache-hot pages sharing.
5612-
*/
5613-
spin_lock(&pcp->lock);
5614-
if ((cci->per_cpu_data_slice_size >> PAGE_SHIFT) > 3 * pcp->batch)
5615-
pcp->flags |= PCPF_FREE_HIGH_BATCH;
5616-
else
5617-
pcp->flags &= ~PCPF_FREE_HIGH_BATCH;
5618-
spin_unlock(&pcp->lock);
5619-
}
5602+
pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);
5603+
cci = get_cpu_cacheinfo(cpu);
5604+
/*
5605+
* If data cache slice of CPU is large enough, "pcp->batch"
5606+
* pages can be preserved in PCP before draining PCP for
5607+
* consecutive high-order pages freeing without allocation.
5608+
* This can reduce zone lock contention without hurting
5609+
* cache-hot pages sharing.
5610+
*/
5611+
spin_lock(&pcp->lock);
5612+
if ((cci->per_cpu_data_slice_size >> PAGE_SHIFT) > 3 * pcp->batch)
5613+
pcp->flags |= PCPF_FREE_HIGH_BATCH;
5614+
else
5615+
pcp->flags &= ~PCPF_FREE_HIGH_BATCH;
5616+
spin_unlock(&pcp->lock);
56205617
}
56215618

5622-
void setup_pcp_cacheinfo(void)
5619+
void setup_pcp_cacheinfo(unsigned int cpu)
56235620
{
56245621
struct zone *zone;
56255622

56265623
for_each_populated_zone(zone)
5627-
zone_pcp_update_cacheinfo(zone);
5624+
zone_pcp_update_cacheinfo(zone, cpu);
56285625
}
56295626

56305627
/*

0 commit comments

Comments
 (0)