Skip to content

Commit d8dc792

Browse files
yhuang-intelopsiff
authored andcommitted
mm, pcp: reduce detecting time of consecutive high order page freeing
[ Upstream commit 6ccdcb6 ] Conflict: none In current PCP auto-tuning design, if the number of pages allocated is much more than that of pages freed on a CPU, the PCP high may become the maximal value even if the allocating/freeing depth is small, for example, in the sender of network workloads. If a CPU was used as sender originally, then it is used as receiver after context switching, we need to fill the whole PCP with maximal high before triggering PCP draining for consecutive high order freeing. This will hurt the performance of some network workloads. To solve the issue, in this patch, we will track the consecutive page freeing with a counter in stead of relying on PCP draining. So, we can detect consecutive page freeing much earlier. On a 2-socket Intel server with 128 logical CPU, we tested SCTP_STREAM_MANY test case of netperf test suite with 64-pair processes. With the patch, the network bandwidth improves 5.0%. This restores the performance drop caused by PCP auto-tuning. Intel-SIG: commit 6ccdcb6 mm, pcp: reduce detecting time of consecutive high order page freeing. Backport Auto-tune per-CPU pageset size. Link: https://lkml.kernel.org/r/20231016053002.756205-10-ying.huang@intel.com Signed-off-by: "Huang, Ying" <ying.huang@intel.com> Cc: Mel Gorman <mgorman@techsingularity.net> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: David Hildenbrand <david@redhat.com> Cc: Johannes Weiner <jweiner@redhat.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Michal Hocko <mhocko@suse.com> Cc: Pavel Tatashin <pasha.tatashin@soleen.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Christoph Lameter <cl@linux.com> Cc: Arjan van de Ven <arjan@linux.intel.com> Cc: Sudeep Holla <sudeep.holla@arm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> [ Aubrey Li: amend commit log ] Signed-off-by: Aubrey Li <aubrey.li@linux.intel.com>
1 parent fc6a128 commit d8dc792

2 files changed

Lines changed: 16 additions & 13 deletions

File tree

include/linux/mmzone.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -710,10 +710,10 @@ struct per_cpu_pages {
710710
int batch; /* chunk size for buddy add/remove */
711711
u8 flags; /* protected by pcp->lock */
712712
u8 alloc_factor; /* batch scaling factor during allocate */
713-
u8 free_factor; /* batch scaling factor during free */
714713
#ifdef CONFIG_NUMA
715714
u8 expire; /* When 0, remote pagesets are drained */
716715
#endif
716+
short free_count; /* consecutive free count */
717717

718718
/* Lists of pages, one per migrate type stored on the pcp-lists */
719719
struct list_head lists[NR_PCP_LISTS];

mm/page_alloc.c

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2394,13 +2394,10 @@ static int nr_pcp_free(struct per_cpu_pages *pcp, int batch, int high, bool free
23942394
max_nr_free = high - batch;
23952395

23962396
/*
2397-
* Double the number of pages freed each time there is subsequent
2398-
* freeing of pages without any allocation.
2397+
* Increase the batch number to the number of the consecutive
2398+
* freed pages to reduce zone lock contention.
23992399
*/
2400-
batch <<= pcp->free_factor;
2401-
if (batch <= max_nr_free && pcp->free_factor < CONFIG_PCP_BATCH_SCALE_MAX)
2402-
pcp->free_factor++;
2403-
batch = clamp(batch, min_nr_free, max_nr_free);
2400+
batch = clamp_t(int, pcp->free_count, min_nr_free, max_nr_free);
24042401

24052402
return batch;
24062403
}
@@ -2428,18 +2425,22 @@ static int nr_pcp_high(struct per_cpu_pages *pcp, struct zone *zone,
24282425
* stored on pcp lists
24292426
*/
24302427
if (test_bit(ZONE_RECLAIM_ACTIVE, &zone->flags)) {
2431-
pcp->high = max(high - (batch << pcp->free_factor), high_min);
2428+
int free_count = max_t(int, pcp->free_count, batch);
2429+
2430+
pcp->high = max(high - free_count, high_min);
24322431
return min(batch << 2, pcp->high);
24332432
}
24342433

24352434
if (high_min == high_max)
24362435
return high;
24372436

24382437
if (test_bit(ZONE_BELOW_HIGH, &zone->flags)) {
2439-
pcp->high = max(high - (batch << pcp->free_factor), high_min);
2438+
int free_count = max_t(int, pcp->free_count, batch);
2439+
2440+
pcp->high = max(high - free_count, high_min);
24402441
high = max(pcp->count, high_min);
24412442
} else if (pcp->count >= high) {
2442-
int need_high = (batch << pcp->free_factor) + batch;
2443+
int need_high = pcp->free_count + batch;
24432444

24442445
/* pcp->high should be large enough to hold batch freed pages */
24452446
if (pcp->high < need_high)
@@ -2476,14 +2477,16 @@ static void free_unref_page_commit(struct zone *zone, struct per_cpu_pages *pcp,
24762477
* stops will be drained from vmstat refresh context.
24772478
*/
24782479
if (order && order <= PAGE_ALLOC_COSTLY_ORDER) {
2479-
free_high = (pcp->free_factor &&
2480+
free_high = (pcp->free_count >= batch &&
24802481
(pcp->flags & PCPF_PREV_FREE_HIGH_ORDER) &&
24812482
(!(pcp->flags & PCPF_FREE_HIGH_BATCH) ||
24822483
pcp->count >= READ_ONCE(batch)));
24832484
pcp->flags |= PCPF_PREV_FREE_HIGH_ORDER;
24842485
} else if (pcp->flags & PCPF_PREV_FREE_HIGH_ORDER) {
24852486
pcp->flags &= ~PCPF_PREV_FREE_HIGH_ORDER;
24862487
}
2488+
if (pcp->free_count < (batch << CONFIG_PCP_BATCH_SCALE_MAX))
2489+
pcp->free_count += (1 << order);
24872490
high = nr_pcp_high(pcp, zone, batch, free_high);
24882491
if (pcp->count >= high) {
24892492
free_pcppages_bulk(zone, nr_pcp_free(pcp, batch, high, free_high),
@@ -2880,7 +2883,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
28802883
* See nr_pcp_free() where free_factor is increased for subsequent
28812884
* frees.
28822885
*/
2883-
pcp->free_factor >>= 1;
2886+
pcp->free_count >>= 1;
28842887
list = &pcp->lists[order_to_pindex(migratetype, order)];
28852888
page = __rmqueue_pcplist(zone, order, migratetype, alloc_flags, pcp, list);
28862889
pcp_spin_unlock(pcp);
@@ -5508,7 +5511,7 @@ static void per_cpu_pages_init(struct per_cpu_pages *pcp, struct per_cpu_zonesta
55085511
pcp->high_min = BOOT_PAGESET_HIGH;
55095512
pcp->high_max = BOOT_PAGESET_HIGH;
55105513
pcp->batch = BOOT_PAGESET_BATCH;
5511-
pcp->free_factor = 0;
5514+
pcp->free_count = 0;
55125515
}
55135516

55145517
static void __zone_set_pageset_high_and_batch(struct zone *zone, unsigned long high_min,

0 commit comments

Comments
 (0)