From ce0dba2eb054760300620c08d89395fb2d9ed3b6 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:25:17 +0800 Subject: [PATCH 01/50] drm/ttm: introduce pool_shrink_rwsem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mainline inclusion from mainline-v6.7-rc1 category: performance Currently, synchronize_shrinkers() is only used by TTM pool. It only requires that no shrinkers run in parallel. After we use RCU+refcount method to implement the lockless slab shrink, we can not use shrinker_rwsem or synchronize_rcu() to guarantee that all shrinker invocations have seen an update before freeing memory. So we introduce a new pool_shrink_rwsem to implement a private ttm_pool_synchronize_shrinkers(), so as to achieve the same purpose. Link: https://lkml.kernel.org/r/20230911092517.64141-5-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Reviewed-by: Christian König Acked-by: Daniel Vetter Cc: Christian Brauner Cc: Chuck Lever Cc: Daniel Vetter Cc: Darrick J. Wong Cc: Dave Chinner Cc: Greg Kroah-Hartman Cc: Joel Fernandes Cc: Kirill Tkhai Cc: Paul E. McKenney Cc: Roman Gushchin Cc: Sergey Senozhatsky Cc: Steven Price Cc: Theodore Ts'o Cc: Vlastimil Babka Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Coly Li Cc: Dai Ngo Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Muchun Song Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Sean Paul Cc: Song Liu Cc: Stefano Stabellini Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton (cherry picked from commit 0b2f5ea1aa39c0ed34bdadb53faf519e3d84ac4a) Signed-off-by: Wentao Guan --- drivers/gpu/drm/ttm/ttm_pool.c | 17 ++++++++++++++++- include/linux/shrinker.h | 1 - mm/shrinker.c | 15 --------------- 3 files changed, 16 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 80ba34cabca36..c1e0a355cfa4a 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -74,6 +74,7 @@ static struct ttm_pool_type global_dma32_uncached[NR_PAGE_ORDERS]; static spinlock_t shrinker_lock; static struct list_head shrinker_list; static struct shrinker mm_shrinker; +static DECLARE_RWSEM(pool_shrink_rwsem); /* Allocate pages of size 1 << order with the given gfp_flags */ static struct page *ttm_pool_alloc_page(struct ttm_pool *pool, gfp_t gfp_flags, @@ -323,6 +324,7 @@ static unsigned int ttm_pool_shrink(void) unsigned int num_pages; struct page *p; + down_read(&pool_shrink_rwsem); spin_lock(&shrinker_lock); pt = list_first_entry(&shrinker_list, typeof(*pt), shrinker_list); list_move_tail(&pt->shrinker_list, &shrinker_list); @@ -335,6 +337,7 @@ static unsigned int ttm_pool_shrink(void) } else { num_pages = 0; } + up_read(&pool_shrink_rwsem); return num_pages; } @@ -584,6 +587,18 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev, } EXPORT_SYMBOL(ttm_pool_init); +/** + * ttm_pool_synchronize_shrinkers - Wait for all running shrinkers to complete. + * + * This is useful to guarantee that all shrinker invocations have seen an + * update, before freeing memory, similar to rcu. + */ +static void ttm_pool_synchronize_shrinkers(void) +{ + down_write(&pool_shrink_rwsem); + up_write(&pool_shrink_rwsem); +} + /** * ttm_pool_fini - Cleanup a pool * @@ -611,7 +626,7 @@ void ttm_pool_fini(struct ttm_pool *pool) /* We removed the pool types from the LRU, but we need to also make sure * that no shrinker is concurrently freeing pages from the pool. */ - synchronize_shrinkers(); + ttm_pool_synchronize_shrinkers(); } EXPORT_SYMBOL(ttm_pool_fini); diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index 3aa2119c3bc3c..e652372ebd8bf 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -110,7 +110,6 @@ extern int __printf(2, 3) register_shrinker(struct shrinker *shrinker, const char *fmt, ...); extern void unregister_shrinker(struct shrinker *shrinker); extern void free_prealloced_shrinker(struct shrinker *shrinker); -extern void synchronize_shrinkers(void); #ifdef CONFIG_SHRINKER_DEBUG extern int __printf(2, 3) shrinker_debugfs_rename(struct shrinker *shrinker, diff --git a/mm/shrinker.c b/mm/shrinker.c index 043c87ccfab41..a16cd448b9243 100644 --- a/mm/shrinker.c +++ b/mm/shrinker.c @@ -692,18 +692,3 @@ void unregister_shrinker(struct shrinker *shrinker) shrinker->nr_deferred = NULL; } EXPORT_SYMBOL(unregister_shrinker); - -/** - * synchronize_shrinkers - Wait for all running shrinkers to complete. - * - * This is equivalent to calling unregister_shrink() and register_shrinker(), - * but atomically and with less overhead. This is useful to guarantee that all - * shrinker invocations have seen an update, before freeing memory, similar to - * rcu. - */ -void synchronize_shrinkers(void) -{ - down_write(&shrinker_rwsem); - up_write(&shrinker_rwsem); -} -EXPORT_SYMBOL(synchronize_shrinkers); From 08cfa87c6d5536a00fb644c5639cf13e6b55f8c8 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:00 +0800 Subject: [PATCH 02/50] mm: shrinker: add infrastructure for dynamically allocating shrinker mainline inclusion from mainline-v6.7-rc1 category: performance Patch series "use refcount+RCU method to implement lockless slab shrink", v6. 1. Background ============= We used to implement the lockless slab shrink with SRCU [1], but then kernel test robot reported -88.8% regression in stress-ng.ramfs.ops_per_sec test case [2], so we reverted it [3]. This patch series aims to re-implement the lockless slab shrink using the refcount+RCU method proposed by Dave Chinner [4]. [1]. https://lore.kernel.org/lkml/20230313112819.38938-1-zhengqi.arch@bytedance.com/ [2]. https://lore.kernel.org/lkml/202305230837.db2c233f-yujie.liu@intel.com/ [3]. https://lore.kernel.org/all/20230609081518.3039120-1-qi.zheng@linux.dev/ [4]. https://lore.kernel.org/lkml/ZIJhou1d55d4H1s0@dread.disaster.area/ 2. Implementation ================= Currently, the shrinker instances can be divided into the following three types: a) global shrinker instance statically defined in the kernel, such as workingset_shadow_shrinker. b) global shrinker instance statically defined in the kernel modules, such as mmu_shrinker in x86. c) shrinker instance embedded in other structures. For case a, the memory of shrinker instance is never freed. For case b, the memory of shrinker instance will be freed after synchronize_rcu() when the module is unloaded. For case c, the memory of shrinker instance will be freed along with the structure it is embedded in. In preparation for implementing lockless slab shrink, we need to dynamically allocate those shrinker instances in case c, then the memory can be dynamically freed alone by calling kfree_rcu(). This patchset adds the following new APIs for dynamically allocating shrinker, and add a private_data field to struct shrinker to record and get the original embedded structure. 1. shrinker_alloc() 2. shrinker_register() 3. shrinker_free() In order to simplify shrinker-related APIs and make shrinker more independent of other kernel mechanisms, this patchset uses the above APIs to convert all shrinkers (including case a and b) to dynamically allocated, and then remove all existing APIs. This will also have another advantage mentioned by Dave Chinner: ``` The other advantage of this is that it will break all the existing out of tree code and third party modules using the old API and will no longer work with a kernel using lockless slab shrinkers. They need to break (both at the source and binary levels) to stop bad things from happening due to using uncoverted shrinkers in the new setup. ``` Then we free the shrinker by calling call_rcu(), and use rcu_read_{lock,unlock}() to ensure that the shrinker instance is valid. And the shrinker::refcount mechanism ensures that the shrinker instance will not be run again after unregistration. So the structure that records the pointer of shrinker instance can be safely freed without waiting for the RCU read-side critical section. In this way, while we implement the lockless slab shrink, we don't need to be blocked in unregister_shrinker() to wait RCU read-side critical section. PATCH 1: introduce new APIs PATCH 2~38: convert all shrinnkers to use new APIs PATCH 39: remove old APIs PATCH 40~41: some cleanups and preparations PATCH 42-43: implement the lockless slab shrink PATCH 44~45: convert shrinker_rwsem to mutex 3. Testing ========== 3.1 slab shrink stress test --------------------------- We can reproduce the down_read_trylock() hotspot through the following script: ``` DIR="/root/shrinker/memcg/mnt" do_create() { mkdir -p /sys/fs/cgroup/memory/test echo 4G > /sys/fs/cgroup/memory/test/memory.limit_in_bytes for i in `seq 0 $1`; do mkdir -p /sys/fs/cgroup/memory/test/$i; echo $$ > /sys/fs/cgroup/memory/test/$i/cgroup.procs; mkdir -p $DIR/$i; done } do_mount() { for i in `seq $1 $2`; do mount -t tmpfs $i $DIR/$i; done } do_touch() { for i in `seq $1 $2`; do echo $$ > /sys/fs/cgroup/memory/test/$i/cgroup.procs; dd if=/dev/zero of=$DIR/$i/file$i bs=1M count=1 & done } case "$1" in touch) do_touch $2 $3 ;; test) do_create 4000 do_mount 0 4000 do_touch 0 3000 ;; *) exit 1 ;; esac ``` Save the above script, then run test and touch commands. Then we can use the following perf command to view hotspots: perf top -U -F 999 1) Before applying this patchset: 33.15% [kernel] [k] down_read_trylock 25.38% [kernel] [k] shrink_slab 21.75% [kernel] [k] up_read 4.45% [kernel] [k] _find_next_bit 2.27% [kernel] [k] do_shrink_slab 1.80% [kernel] [k] intel_idle_irq 1.79% [kernel] [k] shrink_lruvec 0.67% [kernel] [k] xas_descend 0.41% [kernel] [k] mem_cgroup_iter 0.40% [kernel] [k] shrink_node 0.38% [kernel] [k] list_lru_count_one 2) After applying this patchset: 64.56% [kernel] [k] shrink_slab 12.18% [kernel] [k] do_shrink_slab 3.30% [kernel] [k] __rcu_read_unlock 2.61% [kernel] [k] shrink_lruvec 2.49% [kernel] [k] __rcu_read_lock 1.93% [kernel] [k] intel_idle_irq 0.89% [kernel] [k] shrink_node 0.81% [kernel] [k] mem_cgroup_iter 0.77% [kernel] [k] mem_cgroup_calculate_protection 0.66% [kernel] [k] list_lru_count_one We can see that the first perf hotspot becomes shrink_slab, which is what we expect. 3.2 registration and unregistration stress test ----------------------------------------------- Run the command below to test: stress-ng --timeout 60 --times --verify --metrics-brief --ramfs 9 & 1) Before applying this patchset: setting to a 60 second run per stressor dispatching hogs: 9 ramfs stressor bogo ops real time usr time sys time bogo ops/s bogo ops/s (secs) (secs) (secs) (real time) (usr+sys time) ramfs 473062 60.00 8.00 279.13 7884.12 1647.59 for a 60.01s run time: 1440.34s available CPU time 7.99s user time ( 0.55%) 279.13s system time ( 19.38%) 287.12s total time ( 19.93%) load average: 7.12 2.99 1.15 successful run completed in 60.01s (1 min, 0.01 secs) 2) After applying this patchset: setting to a 60 second run per stressor dispatching hogs: 9 ramfs stressor bogo ops real time usr time sys time bogo ops/s bogo ops/s (secs) (secs) (secs) (real time) (usr+sys time) ramfs 477165 60.00 8.13 281.34 7952.55 1648.40 for a 60.01s run time: 1440.33s available CPU time 8.12s user time ( 0.56%) 281.34s system time ( 19.53%) 289.46s total time ( 20.10%) load average: 6.98 3.03 1.19 successful run completed in 60.01s (1 min, 0.01 secs) We can see that the ops/s has hardly changed. This patch (of 45): Currently, the shrinker instances can be divided into the following three types: a) global shrinker instance statically defined in the kernel, such as workingset_shadow_shrinker. b) global shrinker instance statically defined in the kernel modules, such as mmu_shrinker in x86. c) shrinker instance embedded in other structures. For case a, the memory of shrinker instance is never freed. For case b, the memory of shrinker instance will be freed after synchronize_rcu() when the module is unloaded. For case c, the memory of shrinker instance will be freed along with the structure it is embedded in. In preparation for implementing lockless slab shrink, we need to dynamically allocate those shrinker instances in case c, then the memory can be dynamically freed alone by calling kfree_rcu(). So this commit adds the following new APIs for dynamically allocating shrinker, and add a private_data field to struct shrinker to record and get the original embedded structure. 1. shrinker_alloc() Used to allocate shrinker instance itself and related memory, it will return a pointer to the shrinker instance on success and NULL on failure. 2. shrinker_register() Used to register the shrinker instance, which is same as the current register_shrinker_prepared(). 3. shrinker_free() Used to unregister (if needed) and free the shrinker instance. In order to simplify shrinker-related APIs and make shrinker more independent of other kernel mechanisms, subsequent submissions will use the above API to convert all shrinkers (including case a and b) to dynamically allocated, and then remove all existing APIs. This will also have another advantage mentioned by Dave Chinner: ``` The other advantage of this is that it will break all the existing out of tree code and third party modules using the old API and will no longer work with a kernel using lockless slab shrinkers. They need to break (both at the source and binary levels) to stop bad things from happening due to using unconverted shrinkers in the new setup. ``` [zhengqi.arch@bytedance.com: mm: shrinker: some cleanup] Link: https://lkml.kernel.org/r/20230919024607.65463-1-zhengqi.arch@bytedance.com Link: https://lkml.kernel.org/r/20230911094444.68966-1-zhengqi.arch@bytedance.com Link: https://lkml.kernel.org/r/20230911094444.68966-2-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Cc: Christian Brauner Cc: Chuck Lever Cc: Darrick J. Wong Cc: Dave Chinner Cc: Greg Kroah-Hartman Cc: Kirill Tkhai Cc: Paul E. McKenney Cc: Roman Gushchin Cc: Sergey Senozhatsky Cc: Steven Price Cc: Theodore Ts'o Cc: Vlastimil Babka Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Christian Koenig Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: Daniel Vetter Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Sean Paul Cc: Song Liu Cc: Stefano Stabellini Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton (cherry picked from commit c42d50aefd17a6bad3ed617769edbbb579137545) Signed-off-by: Wentao Guan --- include/linux/shrinker.h | 19 +++++-- mm/internal.h | 22 ++++++++ mm/shrinker.c | 106 +++++++++++++++++++++++++++++++++++++++ mm/shrinker_debug.c | 3 -- 4 files changed, 142 insertions(+), 8 deletions(-) diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index e652372ebd8bf..c88f42f75dc0f 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -74,6 +74,8 @@ struct shrinker { int seeks; /* seeks to recreate an obj */ unsigned flags; + void *private_data; + /* These are for internal use */ struct list_head list; #ifdef CONFIG_MEMCG @@ -93,15 +95,22 @@ struct shrinker { }; #define DEFAULT_SEEKS 2 /* A good number if you don't know better. */ -/* Flags */ -#define SHRINKER_REGISTERED (1 << 0) -#define SHRINKER_NUMA_AWARE (1 << 1) -#define SHRINKER_MEMCG_AWARE (1 << 2) +/* Internal flags */ +#define SHRINKER_REGISTERED BIT(0) +#define SHRINKER_ALLOCATED BIT(1) + +/* Flags for users to use */ +#define SHRINKER_NUMA_AWARE BIT(2) +#define SHRINKER_MEMCG_AWARE BIT(3) /* * It just makes sense when the shrinker is also MEMCG_AWARE for now, * non-MEMCG_AWARE shrinker should not have this flag set. */ -#define SHRINKER_NONSLAB (1 << 3) +#define SHRINKER_NONSLAB BIT(4) + +struct shrinker *shrinker_alloc(unsigned int flags, const char *fmt, ...); +void shrinker_register(struct shrinker *shrinker); +void shrinker_free(struct shrinker *shrinker); extern int __printf(2, 3) prealloc_shrinker(struct shrinker *shrinker, const char *fmt, ...); diff --git a/mm/internal.h b/mm/internal.h index b9be9ff122dc4..d95461953559d 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1291,6 +1291,20 @@ unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg, int priority); #ifdef CONFIG_SHRINKER_DEBUG +static inline int shrinker_debugfs_name_alloc(struct shrinker *shrinker, + const char *fmt, va_list ap) +{ + shrinker->name = kvasprintf_const(GFP_KERNEL, fmt, ap); + + return shrinker->name ? 0 : -ENOMEM; +} + +static inline void shrinker_debugfs_name_free(struct shrinker *shrinker) +{ + kfree_const(shrinker->name); + shrinker->name = NULL; +} + extern int shrinker_debugfs_add(struct shrinker *shrinker); extern struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker, int *debugfs_id); @@ -1301,6 +1315,14 @@ static inline int shrinker_debugfs_add(struct shrinker *shrinker) { return 0; } +static inline int shrinker_debugfs_name_alloc(struct shrinker *shrinker, + const char *fmt, va_list ap) +{ + return 0; +} +static inline void shrinker_debugfs_name_free(struct shrinker *shrinker) +{ +} static inline struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker, int *debugfs_id) { diff --git a/mm/shrinker.c b/mm/shrinker.c index a16cd448b9243..d1032a4d56845 100644 --- a/mm/shrinker.c +++ b/mm/shrinker.c @@ -550,6 +550,112 @@ unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg, return freed; } +struct shrinker *shrinker_alloc(unsigned int flags, const char *fmt, ...) +{ + struct shrinker *shrinker; + unsigned int size; + va_list ap; + int err; + + shrinker = kzalloc(sizeof(struct shrinker), GFP_KERNEL); + if (!shrinker) + return NULL; + + va_start(ap, fmt); + err = shrinker_debugfs_name_alloc(shrinker, fmt, ap); + va_end(ap); + if (err) + goto err_name; + + shrinker->flags = flags | SHRINKER_ALLOCATED; + shrinker->seeks = DEFAULT_SEEKS; + + if (flags & SHRINKER_MEMCG_AWARE) { + err = prealloc_memcg_shrinker(shrinker); + if (err == -ENOSYS) { + /* Memcg is not supported, fallback to non-memcg-aware shrinker. */ + shrinker->flags &= ~SHRINKER_MEMCG_AWARE; + goto non_memcg; + } + + if (err) + goto err_flags; + + return shrinker; + } + +non_memcg: + /* + * The nr_deferred is available on per memcg level for memcg aware + * shrinkers, so only allocate nr_deferred in the following cases: + * - non-memcg-aware shrinkers + * - !CONFIG_MEMCG + * - memcg is disabled by kernel command line + */ + size = sizeof(*shrinker->nr_deferred); + if (flags & SHRINKER_NUMA_AWARE) + size *= nr_node_ids; + + shrinker->nr_deferred = kzalloc(size, GFP_KERNEL); + if (!shrinker->nr_deferred) + goto err_flags; + + return shrinker; + +err_flags: + shrinker_debugfs_name_free(shrinker); +err_name: + kfree(shrinker); + return NULL; +} +EXPORT_SYMBOL_GPL(shrinker_alloc); + +void shrinker_register(struct shrinker *shrinker) +{ + if (unlikely(!(shrinker->flags & SHRINKER_ALLOCATED))) { + pr_warn("Must use shrinker_alloc() to dynamically allocate the shrinker"); + return; + } + + down_write(&shrinker_rwsem); + list_add_tail(&shrinker->list, &shrinker_list); + shrinker->flags |= SHRINKER_REGISTERED; + shrinker_debugfs_add(shrinker); + up_write(&shrinker_rwsem); +} +EXPORT_SYMBOL_GPL(shrinker_register); + +void shrinker_free(struct shrinker *shrinker) +{ + struct dentry *debugfs_entry = NULL; + int debugfs_id; + + if (!shrinker) + return; + + down_write(&shrinker_rwsem); + if (shrinker->flags & SHRINKER_REGISTERED) { + list_del(&shrinker->list); + debugfs_entry = shrinker_debugfs_detach(shrinker, &debugfs_id); + shrinker->flags &= ~SHRINKER_REGISTERED; + } + + shrinker_debugfs_name_free(shrinker); + + if (shrinker->flags & SHRINKER_MEMCG_AWARE) + unregister_memcg_shrinker(shrinker); + up_write(&shrinker_rwsem); + + if (debugfs_entry) + shrinker_debugfs_remove(debugfs_entry, debugfs_id); + + kfree(shrinker->nr_deferred); + shrinker->nr_deferred = NULL; + + kfree(shrinker); +} +EXPORT_SYMBOL_GPL(shrinker_free); + /* * Add a shrinker callback to be called from the vm. */ diff --git a/mm/shrinker_debug.c b/mm/shrinker_debug.c index e4ce509f619ec..24aebe7c24cc5 100644 --- a/mm/shrinker_debug.c +++ b/mm/shrinker_debug.c @@ -241,9 +241,6 @@ struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker, lockdep_assert_held(&shrinker_rwsem); - kfree_const(shrinker->name); - shrinker->name = NULL; - *debugfs_id = entry ? shrinker->debugfs_id : -1; shrinker->debugfs_entry = NULL; From 62dda9ab0915eb3d2cf310cd34d77007bac74dd5 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:01 +0800 Subject: [PATCH 03/50] kvm: mmu: dynamically allocate the x86-mmu shrinker mainline inclusion from mainline-v6.7-rc1 category: performance Use new APIs to dynamically allocate the x86-mmu shrinker. Link: https://lkml.kernel.org/r/20230911094444.68966-3-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Christian Brauner Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Muchun Song Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: "Theodore Ts'o" Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton (cherry picked from commit e5985c40987640717e3985e2ae02bf707c12f74a) Signed-off-by: Wentao Guan --- arch/x86/kvm/mmu/mmu.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 8385176968b5f..fcea2333a61bf 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -6791,11 +6791,7 @@ static unsigned long mmu_shrink_count(struct shrinker *shrink, return percpu_counter_read_positive(&kvm_total_used_mmu_pages); } -static struct shrinker mmu_shrinker = { - .count_objects = mmu_shrink_count, - .scan_objects = mmu_shrink_scan, - .seeks = DEFAULT_SEEKS * 10, -}; +static struct shrinker *mmu_shrinker; static void mmu_destroy_caches(void) { @@ -6928,10 +6924,16 @@ int kvm_mmu_vendor_module_init(void) if (percpu_counter_init(&kvm_total_used_mmu_pages, 0, GFP_KERNEL)) goto out; - ret = register_shrinker(&mmu_shrinker, "x86-mmu"); - if (ret) + mmu_shrinker = shrinker_alloc(0, "x86-mmu"); + if (!mmu_shrinker) goto out_shrinker; + mmu_shrinker->count_objects = mmu_shrink_count; + mmu_shrinker->scan_objects = mmu_shrink_scan; + mmu_shrinker->seeks = DEFAULT_SEEKS * 10; + + shrinker_register(mmu_shrinker); + return 0; out_shrinker: @@ -6953,7 +6955,7 @@ void kvm_mmu_vendor_module_exit(void) { mmu_destroy_caches(); percpu_counter_destroy(&kvm_total_used_mmu_pages); - unregister_shrinker(&mmu_shrinker); + shrinker_free(mmu_shrinker); } /* From 31a27a39e251bb4192a888e75283ad93c99fe045 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:02 +0800 Subject: [PATCH 04/50] binder: dynamically allocate the android-binder shrinker mainline inclusion from mainline-v6.7-rc1 category: performance Use new APIs to dynamically allocate the android-binder shrinker. Link: https://lkml.kernel.org/r/20230911094444.68966-4-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Acked-by: Carlos Llamas Cc: Greg Kroah-Hartman Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Christian Brauner Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Muchun Song Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: "Theodore Ts'o" Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton (cherry picked from commit 95a542da5322ef2db193a546d2d888cf4dbaa219) Signed-off-by: Wentao Guan --- drivers/android/binder_alloc.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index 8695c0953f9f0..3a6d129918e2b 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -1055,11 +1055,7 @@ binder_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) NULL, sc->nr_to_scan); } -static struct shrinker binder_shrinker = { - .count_objects = binder_shrink_count, - .scan_objects = binder_shrink_scan, - .seeks = DEFAULT_SEEKS, -}; +static struct shrinker *binder_shrinker; /** * binder_alloc_init() - called by binder_open() for per-proc initialization @@ -1079,19 +1075,29 @@ void binder_alloc_init(struct binder_alloc *alloc) int binder_alloc_shrinker_init(void) { - int ret = list_lru_init(&binder_alloc_lru); + int ret; - if (ret == 0) { - ret = register_shrinker(&binder_shrinker, "android-binder"); - if (ret) - list_lru_destroy(&binder_alloc_lru); + ret = list_lru_init(&binder_alloc_lru); + if (ret) + return ret; + + binder_shrinker = shrinker_alloc(0, "android-binder"); + if (!binder_shrinker) { + list_lru_destroy(&binder_alloc_lru); + return -ENOMEM; } - return ret; + + binder_shrinker->count_objects = binder_shrink_count; + binder_shrinker->scan_objects = binder_shrink_scan; + + shrinker_register(binder_shrinker); + + return 0; } void binder_alloc_shrinker_exit(void) { - unregister_shrinker(&binder_shrinker); + shrinker_free(binder_shrinker); list_lru_destroy(&binder_alloc_lru); } From 0bdd948966b7e415ccc1e1ec6d2f02a2c121d4c8 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:03 +0800 Subject: [PATCH 05/50] drm/ttm: dynamically allocate the drm-ttm_pool shrinker mainline inclusion from mainline-v6.7-rc1 category: performance Use new APIs to dynamically allocate the drm-ttm_pool shrinker. Link: https://lkml.kernel.org/r/20230911094444.68966-5-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Acked-by: Daniel Vetter Cc: Christian Koenig Cc: Huang Rui Cc: David Airlie Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Christian Brauner Cc: Chuck Lever Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Muchun Song Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: "Theodore Ts'o" Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton (cherry picked from commit d35b5c98c1f1ea44d4652a78163ce6d0e6ec2b78) Signed-off-by: Wentao Guan --- drivers/gpu/drm/ttm/ttm_pool.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index c1e0a355cfa4a..32b5781d499f4 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -73,7 +73,7 @@ static struct ttm_pool_type global_dma32_uncached[NR_PAGE_ORDERS]; static spinlock_t shrinker_lock; static struct list_head shrinker_list; -static struct shrinker mm_shrinker; +static struct shrinker *mm_shrinker; static DECLARE_RWSEM(pool_shrink_rwsem); /* Allocate pages of size 1 << order with the given gfp_flags */ @@ -769,8 +769,8 @@ static int ttm_pool_debugfs_shrink_show(struct seq_file *m, void *data) struct shrink_control sc = { .gfp_mask = GFP_NOFS }; fs_reclaim_acquire(GFP_KERNEL); - seq_printf(m, "%lu/%lu\n", ttm_pool_shrinker_count(&mm_shrinker, &sc), - ttm_pool_shrinker_scan(&mm_shrinker, &sc)); + seq_printf(m, "%lu/%lu\n", ttm_pool_shrinker_count(mm_shrinker, &sc), + ttm_pool_shrinker_scan(mm_shrinker, &sc)); fs_reclaim_release(GFP_KERNEL); return 0; @@ -814,10 +814,17 @@ int ttm_pool_mgr_init(unsigned long num_pages) &ttm_pool_debugfs_shrink_fops); #endif - mm_shrinker.count_objects = ttm_pool_shrinker_count; - mm_shrinker.scan_objects = ttm_pool_shrinker_scan; - mm_shrinker.seeks = 1; - return register_shrinker(&mm_shrinker, "drm-ttm_pool"); + mm_shrinker = shrinker_alloc(0, "drm-ttm_pool"); + if (!mm_shrinker) + return -ENOMEM; + + mm_shrinker->count_objects = ttm_pool_shrinker_count; + mm_shrinker->scan_objects = ttm_pool_shrinker_scan; + mm_shrinker->seeks = 1; + + shrinker_register(mm_shrinker); + + return 0; } /** @@ -837,6 +844,6 @@ void ttm_pool_mgr_fini(void) ttm_pool_type_fini(&global_dma32_uncached[i]); } - unregister_shrinker(&mm_shrinker); + shrinker_free(mm_shrinker); WARN_ON(!list_empty(&shrinker_list)); } From 2809d15ee26ed9e42b5020f44750005375895a8a Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:04 +0800 Subject: [PATCH 06/50] xenbus/backend: dynamically allocate the xen-backend shrinker mainline inclusion from mainline-v6.7-rc1 category: performance Use new APIs to dynamically allocate the xen-backend shrinker. Link: https://lkml.kernel.org/r/20230911094444.68966-6-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Acked-by: Juergen Gross Cc: Stefano Stabellini Cc: Oleksandr Tyshchenko Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Christian Brauner Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Muchun Song Cc: Nadav Amit Cc: Neil Brown Cc: Olga Kornievskaia Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Steven Price Cc: "Theodore Ts'o" Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton (cherry picked from commit 1ec016bfa10f2873c125018c4efcf09c141645ff) Signed-off-by: Wentao Guan --- drivers/xen/xenbus/xenbus_probe_backend.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c index da96c260e26b4..5ebb7233076f7 100644 --- a/drivers/xen/xenbus/xenbus_probe_backend.c +++ b/drivers/xen/xenbus/xenbus_probe_backend.c @@ -284,13 +284,9 @@ static unsigned long backend_shrink_memory_count(struct shrinker *shrinker, return 0; } -static struct shrinker backend_memory_shrinker = { - .count_objects = backend_shrink_memory_count, - .seeks = DEFAULT_SEEKS, -}; - static int __init xenbus_probe_backend_init(void) { + struct shrinker *backend_memory_shrinker; static struct notifier_block xenstore_notifier = { .notifier_call = backend_probe_and_watch }; @@ -305,8 +301,15 @@ static int __init xenbus_probe_backend_init(void) register_xenstore_notifier(&xenstore_notifier); - if (register_shrinker(&backend_memory_shrinker, "xen-backend")) - pr_warn("shrinker registration failed\n"); + backend_memory_shrinker = shrinker_alloc(0, "xen-backend"); + if (!backend_memory_shrinker) { + pr_warn("shrinker allocation failed\n"); + return 0; + } + + backend_memory_shrinker->count_objects = backend_shrink_memory_count; + + shrinker_register(backend_memory_shrinker); return 0; } From 6650b21b09d56e3ae89b05ef2c509b2a534d3ef7 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:05 +0800 Subject: [PATCH 07/50] erofs: dynamically allocate the erofs-shrinker mainline inclusion from mainline-v6.7-rc1 category: performance Use new APIs to dynamically allocate the erofs-shrinker. Link: https://lkml.kernel.org/r/20230911094444.68966-7-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Reviewed-by: Chao Yu Reviewed-by: Gao Xiang Cc: Yue Hu Cc: Jeffle Xu Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chris Mason Cc: Christian Brauner Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Muchun Song Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: "Theodore Ts'o" Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Signed-off-by: Andrew Morton (cherry picked from commit 557936ee8dc6bd0d5e078f415441d9f2a3fcca23) Signed-off-by: Wentao Guan --- fs/erofs/utils.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/fs/erofs/utils.c b/fs/erofs/utils.c index 4256a85719a1d..5dea308764b45 100644 --- a/fs/erofs/utils.c +++ b/fs/erofs/utils.c @@ -264,19 +264,24 @@ static unsigned long erofs_shrink_scan(struct shrinker *shrink, return freed; } -static struct shrinker erofs_shrinker_info = { - .scan_objects = erofs_shrink_scan, - .count_objects = erofs_shrink_count, - .seeks = DEFAULT_SEEKS, -}; +static struct shrinker *erofs_shrinker_info; int __init erofs_init_shrinker(void) { - return register_shrinker(&erofs_shrinker_info, "erofs-shrinker"); + erofs_shrinker_info = shrinker_alloc(0, "erofs-shrinker"); + if (!erofs_shrinker_info) + return -ENOMEM; + + erofs_shrinker_info->count_objects = erofs_shrink_count; + erofs_shrinker_info->scan_objects = erofs_shrink_scan; + + shrinker_register(erofs_shrinker_info); + + return 0; } void erofs_exit_shrinker(void) { - unregister_shrinker(&erofs_shrinker_info); + shrinker_free(erofs_shrinker_info); } #endif /* !CONFIG_EROFS_FS_ZIP */ From da699ba5946ff4874d086ac634c7429e9f743c36 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:06 +0800 Subject: [PATCH 08/50] f2fs: dynamically allocate the f2fs-shrinker mainline inclusion from mainline-v6.7-rc1 category: performance Use new APIs to dynamically allocate the f2fs-shrinker. Link: https://lkml.kernel.org/r/20230911094444.68966-8-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Reviewed-by: Chao Yu Cc: Jaegeuk Kim Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chris Mason Cc: Christian Brauner Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Ingo Molnar Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Muchun Song Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: "Theodore Ts'o" Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton Conflicts: fs/f2fs/super.c (cherry picked from commit bfcba5ba39cbce8957b4908c9dd570dd6097fff3) Signed-off-by: Wentao Guan --- fs/f2fs/super.c | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index cff8706958457..e486c0a9b9edf 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -93,11 +93,26 @@ int f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned long rate, #endif /* f2fs-wide shrinker description */ -static struct shrinker f2fs_shrinker_info = { - .scan_objects = f2fs_shrink_scan, - .count_objects = f2fs_shrink_count, - .seeks = DEFAULT_SEEKS, -}; +static struct shrinker *f2fs_shrinker_info; + +static int __init f2fs_init_shrinker(void) +{ + f2fs_shrinker_info = shrinker_alloc(0, "f2fs-shrinker"); + if (!f2fs_shrinker_info) + return -ENOMEM; + + f2fs_shrinker_info->count_objects = f2fs_shrink_count; + f2fs_shrinker_info->scan_objects = f2fs_shrink_scan; + + shrinker_register(f2fs_shrinker_info); + + return 0; +} + +static void f2fs_exit_shrinker(void) +{ + shrinker_free(f2fs_shrinker_info); +} enum { Opt_gc_background, @@ -4960,7 +4975,7 @@ static int __init init_f2fs_fs(void) err = f2fs_init_sysfs(); if (err) goto free_garbage_collection_cache; - err = register_shrinker(&f2fs_shrinker_info, "f2fs-shrinker"); + err = f2fs_init_shrinker(); if (err) goto free_sysfs; f2fs_create_root_stats(); @@ -5010,7 +5025,7 @@ static int __init init_f2fs_fs(void) f2fs_destroy_post_read_processing(); free_root_stats: f2fs_destroy_root_stats(); - unregister_shrinker(&f2fs_shrinker_info); + f2fs_exit_shrinker(); free_sysfs: f2fs_exit_sysfs(); free_garbage_collection_cache: @@ -5043,7 +5058,7 @@ static void __exit exit_f2fs_fs(void) f2fs_destroy_iostat_processing(); f2fs_destroy_post_read_processing(); f2fs_destroy_root_stats(); - unregister_shrinker(&f2fs_shrinker_info); + f2fs_exit_shrinker(); f2fs_exit_sysfs(); f2fs_destroy_garbage_collection_cache(); f2fs_destroy_extent_cache(); From aff0368b7436f47631c187a249f7db8530b88868 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:07 +0800 Subject: [PATCH 09/50] gfs2: dynamically allocate the gfs2-glock shrinker mainline inclusion from mainline-v6.7-rc1 category: performance Use new APIs to dynamically allocate the gfs2-glock shrinker. Link: https://lkml.kernel.org/r/20230911094444.68966-9-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Cc: Bob Peterson Cc: Andreas Gruenbacher Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Christian Brauner Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Muchun Song Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: "Theodore Ts'o" Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton (cherry picked from commit a304c23cd6c96ecdf5f96df66e809993d5e96bf0) Signed-off-by: Wentao Guan --- fs/gfs2/glock.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 9265262807f09..868dc8fa0785b 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -2101,11 +2101,7 @@ static unsigned long gfs2_glock_shrink_count(struct shrinker *shrink, return vfs_pressure_ratio(atomic_read(&lru_count)); } -static struct shrinker glock_shrinker = { - .seeks = DEFAULT_SEEKS, - .count_objects = gfs2_glock_shrink_count, - .scan_objects = gfs2_glock_shrink_scan, -}; +static struct shrinker *glock_shrinker; /** * glock_hash_walk - Call a function for glock in a hash bucket @@ -2530,13 +2526,18 @@ int __init gfs2_glock_init(void) return -ENOMEM; } - ret = register_shrinker(&glock_shrinker, "gfs2-glock"); - if (ret) { + glock_shrinker = shrinker_alloc(0, "gfs2-glock"); + if (!glock_shrinker) { destroy_workqueue(glock_workqueue); rhashtable_destroy(&gl_hash_table); - return ret; + return -ENOMEM; } + glock_shrinker->count_objects = gfs2_glock_shrink_count; + glock_shrinker->scan_objects = gfs2_glock_shrink_scan; + + shrinker_register(glock_shrinker); + for (i = 0; i < GLOCK_WAIT_TABLE_SIZE; i++) init_waitqueue_head(glock_wait_table + i); @@ -2545,7 +2546,7 @@ int __init gfs2_glock_init(void) void gfs2_glock_exit(void) { - unregister_shrinker(&glock_shrinker); + shrinker_free(glock_shrinker); rhashtable_destroy(&gl_hash_table); destroy_workqueue(glock_workqueue); } From 3d041ba7208ad388ef88706c6064eaaa6069a85b Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:08 +0800 Subject: [PATCH 10/50] gfs2: dynamically allocate the gfs2-qd shrinker mainline inclusion from mainline-v6.7-rc1 category: performance Use new APIs to dynamically allocate the gfs2-qd shrinker. Link: https://lkml.kernel.org/r/20230911094444.68966-10-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Cc: Bob Peterson Cc: Andreas Gruenbacher Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Christian Brauner Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Muchun Song Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: "Theodore Ts'o" Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton (cherry picked from commit 8ee0fd9c1085d952ea62c1879620eaa01e4c6332) Signed-off-by: Wentao Guan --- fs/gfs2/main.c | 6 +++--- fs/gfs2/quota.c | 25 +++++++++++++++++++------ fs/gfs2/quota.h | 3 ++- 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 66eb98b690a27..79be0cdc730c0 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -147,7 +147,7 @@ static int __init init_gfs2_fs(void) if (!gfs2_trans_cachep) goto fail_cachep8; - error = register_shrinker(&gfs2_qd_shrinker, "gfs2-qd"); + error = gfs2_qd_shrinker_init(); if (error) goto fail_shrinker; @@ -196,7 +196,7 @@ static int __init init_gfs2_fs(void) fail_wq2: destroy_workqueue(gfs2_recovery_wq); fail_wq1: - unregister_shrinker(&gfs2_qd_shrinker); + gfs2_qd_shrinker_exit(); fail_shrinker: kmem_cache_destroy(gfs2_trans_cachep); fail_cachep8: @@ -229,7 +229,7 @@ static int __init init_gfs2_fs(void) static void __exit exit_gfs2_fs(void) { - unregister_shrinker(&gfs2_qd_shrinker); + gfs2_qd_shrinker_exit(); gfs2_glock_exit(); gfs2_unregister_debugfs(); unregister_filesystem(&gfs2_fs_type); diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index c537e1d02cf3a..ae317d1951e7a 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -193,13 +193,26 @@ static unsigned long gfs2_qd_shrink_count(struct shrinker *shrink, return vfs_pressure_ratio(list_lru_shrink_count(&gfs2_qd_lru, sc)); } -struct shrinker gfs2_qd_shrinker = { - .count_objects = gfs2_qd_shrink_count, - .scan_objects = gfs2_qd_shrink_scan, - .seeks = DEFAULT_SEEKS, - .flags = SHRINKER_NUMA_AWARE, -}; +static struct shrinker *gfs2_qd_shrinker; + +int __init gfs2_qd_shrinker_init(void) +{ + gfs2_qd_shrinker = shrinker_alloc(SHRINKER_NUMA_AWARE, "gfs2-qd"); + if (!gfs2_qd_shrinker) + return -ENOMEM; + + gfs2_qd_shrinker->count_objects = gfs2_qd_shrink_count; + gfs2_qd_shrinker->scan_objects = gfs2_qd_shrink_scan; + + shrinker_register(gfs2_qd_shrinker); + return 0; +} + +void gfs2_qd_shrinker_exit(void) +{ + shrinker_free(gfs2_qd_shrinker); +} static u64 qd2index(struct gfs2_quota_data *qd) { diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h index e4a2fdb552cd7..b98164c94a34e 100644 --- a/fs/gfs2/quota.h +++ b/fs/gfs2/quota.h @@ -60,7 +60,8 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip, } extern const struct quotactl_ops gfs2_quotactl_ops; -extern struct shrinker gfs2_qd_shrinker; +int __init gfs2_qd_shrinker_init(void); +void gfs2_qd_shrinker_exit(void); extern struct list_lru gfs2_qd_lru; void __init gfs2_quota_hash_init(void); From 86b5d4fd7694f31bd14b3981a3cc00da914cff93 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:09 +0800 Subject: [PATCH 11/50] NFSv4.2: dynamically allocate the nfs-xattr shrinkers mainline inclusion from mainline-v6.7-rc1 category: performance Use new APIs to dynamically allocate the nfs-xattr shrinkers. Link: https://lkml.kernel.org/r/20230911094444.68966-11-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Cc: Trond Myklebust Cc: Anna Schumaker Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Christian Brauner Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Muchun Song Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: "Theodore Ts'o" Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton (cherry picked from commit d5dad4929fb42e7f610cda5e1ea51cd76be998d4) Signed-off-by: Wentao Guan --- fs/nfs/nfs42xattr.c | 87 +++++++++++++++++++++++---------------------- 1 file changed, 44 insertions(+), 43 deletions(-) diff --git a/fs/nfs/nfs42xattr.c b/fs/nfs/nfs42xattr.c index 911f634ba3da7..2ad66a8922f42 100644 --- a/fs/nfs/nfs42xattr.c +++ b/fs/nfs/nfs42xattr.c @@ -796,28 +796,9 @@ static unsigned long nfs4_xattr_cache_scan(struct shrinker *shrink, static unsigned long nfs4_xattr_entry_scan(struct shrinker *shrink, struct shrink_control *sc); -static struct shrinker nfs4_xattr_cache_shrinker = { - .count_objects = nfs4_xattr_cache_count, - .scan_objects = nfs4_xattr_cache_scan, - .seeks = DEFAULT_SEEKS, - .flags = SHRINKER_MEMCG_AWARE, -}; - -static struct shrinker nfs4_xattr_entry_shrinker = { - .count_objects = nfs4_xattr_entry_count, - .scan_objects = nfs4_xattr_entry_scan, - .seeks = DEFAULT_SEEKS, - .batch = 512, - .flags = SHRINKER_MEMCG_AWARE, -}; - -static struct shrinker nfs4_xattr_large_entry_shrinker = { - .count_objects = nfs4_xattr_entry_count, - .scan_objects = nfs4_xattr_entry_scan, - .seeks = 1, - .batch = 512, - .flags = SHRINKER_MEMCG_AWARE, -}; +static struct shrinker *nfs4_xattr_cache_shrinker; +static struct shrinker *nfs4_xattr_entry_shrinker; +static struct shrinker *nfs4_xattr_large_entry_shrinker; static enum lru_status cache_lru_isolate(struct list_head *item, @@ -943,7 +924,7 @@ nfs4_xattr_entry_scan(struct shrinker *shrink, struct shrink_control *sc) struct nfs4_xattr_entry *entry; struct list_lru *lru; - lru = (shrink == &nfs4_xattr_large_entry_shrinker) ? + lru = (shrink == nfs4_xattr_large_entry_shrinker) ? &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru; freed = list_lru_shrink_walk(lru, sc, entry_lru_isolate, &dispose); @@ -971,7 +952,7 @@ nfs4_xattr_entry_count(struct shrinker *shrink, struct shrink_control *sc) unsigned long count; struct list_lru *lru; - lru = (shrink == &nfs4_xattr_large_entry_shrinker) ? + lru = (shrink == nfs4_xattr_large_entry_shrinker) ? &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru; count = list_lru_shrink_count(lru, sc); @@ -991,18 +972,34 @@ static void nfs4_xattr_cache_init_once(void *p) INIT_LIST_HEAD(&cache->dispose); } -static int nfs4_xattr_shrinker_init(struct shrinker *shrinker, - struct list_lru *lru, const char *name) +typedef unsigned long (*count_objects_cb)(struct shrinker *s, + struct shrink_control *sc); +typedef unsigned long (*scan_objects_cb)(struct shrinker *s, + struct shrink_control *sc); + +static int __init nfs4_xattr_shrinker_init(struct shrinker **shrinker, + struct list_lru *lru, const char *name, + count_objects_cb count, + scan_objects_cb scan, long batch, int seeks) { - int ret = 0; + int ret; - ret = register_shrinker(shrinker, name); - if (ret) + *shrinker = shrinker_alloc(SHRINKER_MEMCG_AWARE, name); + if (!*shrinker) + return -ENOMEM; + + ret = list_lru_init_memcg(lru, *shrinker); + if (ret) { + shrinker_free(*shrinker); return ret; + } - ret = list_lru_init_memcg(lru, shrinker); - if (ret) - unregister_shrinker(shrinker); + (*shrinker)->count_objects = count; + (*shrinker)->scan_objects = scan; + (*shrinker)->batch = batch; + (*shrinker)->seeks = seeks; + + shrinker_register(*shrinker); return ret; } @@ -1010,7 +1007,7 @@ static int nfs4_xattr_shrinker_init(struct shrinker *shrinker, static void nfs4_xattr_shrinker_destroy(struct shrinker *shrinker, struct list_lru *lru) { - unregister_shrinker(shrinker); + shrinker_free(shrinker); list_lru_destroy(lru); } @@ -1026,27 +1023,31 @@ int __init nfs4_xattr_cache_init(void) return -ENOMEM; ret = nfs4_xattr_shrinker_init(&nfs4_xattr_cache_shrinker, - &nfs4_xattr_cache_lru, - "nfs-xattr_cache"); + &nfs4_xattr_cache_lru, "nfs-xattr_cache", + nfs4_xattr_cache_count, + nfs4_xattr_cache_scan, 0, DEFAULT_SEEKS); if (ret) goto out1; ret = nfs4_xattr_shrinker_init(&nfs4_xattr_entry_shrinker, - &nfs4_xattr_entry_lru, - "nfs-xattr_entry"); + &nfs4_xattr_entry_lru, "nfs-xattr_entry", + nfs4_xattr_entry_count, + nfs4_xattr_entry_scan, 512, DEFAULT_SEEKS); if (ret) goto out2; ret = nfs4_xattr_shrinker_init(&nfs4_xattr_large_entry_shrinker, &nfs4_xattr_large_entry_lru, - "nfs-xattr_large_entry"); + "nfs-xattr_large_entry", + nfs4_xattr_entry_count, + nfs4_xattr_entry_scan, 512, 1); if (!ret) return 0; - nfs4_xattr_shrinker_destroy(&nfs4_xattr_entry_shrinker, + nfs4_xattr_shrinker_destroy(nfs4_xattr_entry_shrinker, &nfs4_xattr_entry_lru); out2: - nfs4_xattr_shrinker_destroy(&nfs4_xattr_cache_shrinker, + nfs4_xattr_shrinker_destroy(nfs4_xattr_cache_shrinker, &nfs4_xattr_cache_lru); out1: kmem_cache_destroy(nfs4_xattr_cache_cachep); @@ -1056,11 +1057,11 @@ int __init nfs4_xattr_cache_init(void) void nfs4_xattr_cache_exit(void) { - nfs4_xattr_shrinker_destroy(&nfs4_xattr_large_entry_shrinker, + nfs4_xattr_shrinker_destroy(nfs4_xattr_large_entry_shrinker, &nfs4_xattr_large_entry_lru); - nfs4_xattr_shrinker_destroy(&nfs4_xattr_entry_shrinker, + nfs4_xattr_shrinker_destroy(nfs4_xattr_entry_shrinker, &nfs4_xattr_entry_lru); - nfs4_xattr_shrinker_destroy(&nfs4_xattr_cache_shrinker, + nfs4_xattr_shrinker_destroy(nfs4_xattr_cache_shrinker, &nfs4_xattr_cache_lru); kmem_cache_destroy(nfs4_xattr_cache_cachep); } From 45877616af8a83b76a92bb0f496a00e04834a8ef Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:10 +0800 Subject: [PATCH 12/50] nfs: dynamically allocate the nfs-acl shrinker mainline inclusion from mainline-v6.7-rc1 category: performance Use new APIs to dynamically allocate the nfs-acl shrinker. Link: https://lkml.kernel.org/r/20230911094444.68966-12-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Cc: Trond Myklebust Cc: Anna Schumaker Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Christian Brauner Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Muchun Song Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: "Theodore Ts'o" Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton (cherry picked from commit 777fc8f1b4b9a0c5a558f1ab49ddfa5137bfaeb3) Signed-off-by: Wentao Guan --- fs/nfs/super.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 5c14c30a84c0e..437634e57b5be 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -130,11 +130,7 @@ static void nfs_ssc_unregister_ops(void) } #endif /* CONFIG_NFS_V4_2 */ -static struct shrinker acl_shrinker = { - .count_objects = nfs_access_cache_count, - .scan_objects = nfs_access_cache_scan, - .seeks = DEFAULT_SEEKS, -}; +static struct shrinker *acl_shrinker; /* * Register the NFS filesystems @@ -154,9 +150,18 @@ int __init register_nfs_fs(void) ret = nfs_register_sysctl(); if (ret < 0) goto error_2; - ret = register_shrinker(&acl_shrinker, "nfs-acl"); - if (ret < 0) + + acl_shrinker = shrinker_alloc(0, "nfs-acl"); + if (!acl_shrinker) { + ret = -ENOMEM; goto error_3; + } + + acl_shrinker->count_objects = nfs_access_cache_count; + acl_shrinker->scan_objects = nfs_access_cache_scan; + + shrinker_register(acl_shrinker); + #ifdef CONFIG_NFS_V4_2 nfs_ssc_register_ops(); #endif @@ -176,7 +181,7 @@ int __init register_nfs_fs(void) */ void __exit unregister_nfs_fs(void) { - unregister_shrinker(&acl_shrinker); + shrinker_free(acl_shrinker); nfs_unregister_sysctl(); unregister_nfs4_fs(); #ifdef CONFIG_NFS_V4_2 From 797c4b9e510bef19bd3c22255a51cdd4f1942a82 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:11 +0800 Subject: [PATCH 13/50] nfsd: dynamically allocate the nfsd-filecache shrinker mainline inclusion from mainline-v6.7-rc1 category: performance Use new APIs to dynamically allocate the nfsd-filecache shrinker. Link: https://lkml.kernel.org/r/20230911094444.68966-13-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Cc: Jeff Layton Cc: Neil Brown Cc: Olga Kornievskaia Cc: Dai Ngo Cc: Tom Talpey Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Christian Brauner Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Daniel Vetter Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Muchun Song Cc: Nadav Amit Cc: Oleksandr Tyshchenko Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: "Theodore Ts'o" Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton (cherry picked from commit 856e594965414a1cc63d0f3e709ecb6869bc685e) Signed-off-by: Wentao Guan --- fs/nfsd/filecache.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 0ff07d53931f2..2419cf3302105 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -523,11 +523,7 @@ nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc) return ret; } -static struct shrinker nfsd_file_shrinker = { - .scan_objects = nfsd_file_lru_scan, - .count_objects = nfsd_file_lru_count, - .seeks = 1, -}; +static struct shrinker *nfsd_file_shrinker; /** * nfsd_file_cond_queue - conditionally unhash and queue a nfsd_file @@ -748,12 +744,19 @@ nfsd_file_cache_init(void) goto out_err; } - ret = register_shrinker(&nfsd_file_shrinker, "nfsd-filecache"); - if (ret) { - pr_err("nfsd: failed to register nfsd_file_shrinker: %d\n", ret); + nfsd_file_shrinker = shrinker_alloc(0, "nfsd-filecache"); + if (!nfsd_file_shrinker) { + ret = -ENOMEM; + pr_err("nfsd: failed to allocate nfsd_file_shrinker\n"); goto out_lru; } + nfsd_file_shrinker->count_objects = nfsd_file_lru_count; + nfsd_file_shrinker->scan_objects = nfsd_file_lru_scan; + nfsd_file_shrinker->seeks = 1; + + shrinker_register(nfsd_file_shrinker); + ret = lease_register_notifier(&nfsd_file_lease_notifier); if (ret) { pr_err("nfsd: unable to register lease notifier: %d\n", ret); @@ -778,7 +781,7 @@ nfsd_file_cache_init(void) out_notifier: lease_unregister_notifier(&nfsd_file_lease_notifier); out_shrinker: - unregister_shrinker(&nfsd_file_shrinker); + shrinker_free(nfsd_file_shrinker); out_lru: list_lru_destroy(&nfsd_file_lru); out_err: @@ -895,7 +898,7 @@ nfsd_file_cache_shutdown(void) return; lease_unregister_notifier(&nfsd_file_lease_notifier); - unregister_shrinker(&nfsd_file_shrinker); + shrinker_free(nfsd_file_shrinker); /* * make sure all callers of nfsd_file_lru_cb are done before * calling nfsd_file_cache_purge From 83beda54556c55c6e9acc857cdfc5111d1205d92 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:12 +0800 Subject: [PATCH 14/50] quota: dynamically allocate the dquota-cache shrinker mainline inclusion from mainline-v6.7-rc1 category: performance Use new APIs to dynamically allocate the dquota-cache shrinker. Link: https://lkml.kernel.org/r/20230911094444.68966-14-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Acked-by: Jan Kara Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Christian Brauner Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Muchun Song Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: "Theodore Ts'o" Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton (cherry picked from commit eab477e883b52a82bd7b4437111bbc6264694349) Signed-off-by: Wentao Guan --- fs/quota/dquot.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 7562ee4ff2929..4c69ba48c774e 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -833,12 +833,6 @@ dqcache_shrink_count(struct shrinker *shrink, struct shrink_control *sc) percpu_counter_read_positive(&dqstats.counter[DQST_FREE_DQUOTS])); } -static struct shrinker dqcache_shrinker = { - .count_objects = dqcache_shrink_count, - .scan_objects = dqcache_shrink_scan, - .seeks = DEFAULT_SEEKS, -}; - /* * Safely release dquot and put reference to dquot. */ @@ -3034,6 +3028,7 @@ static int __init dquot_init(void) { int i, ret; unsigned long nr_hash, order; + struct shrinker *dqcache_shrinker; printk(KERN_NOTICE "VFS: Disk quotas %s\n", __DQUOT_VERSION__); @@ -3068,8 +3063,14 @@ static int __init dquot_init(void) pr_info("VFS: Dquot-cache hash table entries: %ld (order %ld," " %ld bytes)\n", nr_hash, order, (PAGE_SIZE << order)); - if (register_shrinker(&dqcache_shrinker, "dquota-cache")) - panic("Cannot register dquot shrinker"); + dqcache_shrinker = shrinker_alloc(0, "dquota-cache"); + if (!dqcache_shrinker) + panic("Cannot allocate dquot shrinker"); + + dqcache_shrinker->count_objects = dqcache_shrink_count; + dqcache_shrinker->scan_objects = dqcache_shrink_scan; + + shrinker_register(dqcache_shrinker); quota_unbound_wq = alloc_workqueue("quota_events_unbound", WQ_UNBOUND | WQ_MEM_RECLAIM, WQ_MAX_ACTIVE); From bc993390016b06caca9c71addad13653fca34c2a Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:13 +0800 Subject: [PATCH 15/50] ubifs: dynamically allocate the ubifs-slab shrinker mainline inclusion from mainline-v6.7-rc1 category: performance Use new APIs to dynamically allocate the ubifs-slab shrinker. Link: https://lkml.kernel.org/r/20230911094444.68966-15-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Cc: Richard Weinberger Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Christian Brauner Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Muchun Song Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Paul E. McKenney Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: "Theodore Ts'o" Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton (cherry picked from commit 827a34f90e5a27b83f188b914512d81e36688dec) Signed-off-by: Wentao Guan --- fs/ubifs/super.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 78f31bddff746..86643605e149f 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -54,11 +54,7 @@ module_param_cb(default_version, &ubifs_default_version_ops, &ubifs_default_vers static struct kmem_cache *ubifs_inode_slab; /* UBIFS TNC shrinker description */ -static struct shrinker ubifs_shrinker_info = { - .scan_objects = ubifs_shrink_scan, - .count_objects = ubifs_shrink_count, - .seeks = DEFAULT_SEEKS, -}; +static struct shrinker *ubifs_shrinker_info; /** * validate_inode - validate inode. @@ -2373,7 +2369,7 @@ static void inode_slab_ctor(void *obj) static int __init ubifs_init(void) { - int err; + int err = -ENOMEM; BUILD_BUG_ON(sizeof(struct ubifs_ch) != 24); @@ -2439,10 +2435,15 @@ static int __init ubifs_init(void) if (!ubifs_inode_slab) return -ENOMEM; - err = register_shrinker(&ubifs_shrinker_info, "ubifs-slab"); - if (err) + ubifs_shrinker_info = shrinker_alloc(0, "ubifs-slab"); + if (!ubifs_shrinker_info) goto out_slab; + ubifs_shrinker_info->count_objects = ubifs_shrink_count; + ubifs_shrinker_info->scan_objects = ubifs_shrink_scan; + + shrinker_register(ubifs_shrinker_info); + err = ubifs_compressors_init(); if (err) goto out_shrinker; @@ -2467,7 +2468,7 @@ static int __init ubifs_init(void) dbg_debugfs_exit(); ubifs_compressors_exit(); out_shrinker: - unregister_shrinker(&ubifs_shrinker_info); + shrinker_free(ubifs_shrinker_info); out_slab: kmem_cache_destroy(ubifs_inode_slab); return err; @@ -2483,7 +2484,7 @@ static void __exit ubifs_exit(void) dbg_debugfs_exit(); ubifs_sysfs_exit(); ubifs_compressors_exit(); - unregister_shrinker(&ubifs_shrinker_info); + shrinker_free(ubifs_shrinker_info); /* * Make sure all delayed rcu free inodes are flushed before we From 823c2f504f57a1956f6b1519ffc6821ec23c44db Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:14 +0800 Subject: [PATCH 16/50] rcu: dynamically allocate the rcu-lazy shrinker mainline inclusion from mainline-v6.7-rc1 category: performance Use new APIs to dynamically allocate the rcu-lazy shrinker. Link: https://lkml.kernel.org/r/20230911094444.68966-16-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Joel Fernandes (Google) Acked-by: Muchun Song Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Christian Brauner Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: "Theodore Ts'o" Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton (cherry picked from commit 2fbacff0cbf58530fa459c3f28ba9125df33ad86) Signed-off-by: Wentao Guan --- kernel/rcu/tree_nocb.h | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h index a3d613f063a8f..7617fd9a35b80 100644 --- a/kernel/rcu/tree_nocb.h +++ b/kernel/rcu/tree_nocb.h @@ -1383,13 +1383,6 @@ lazy_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) return count ? count : SHRINK_STOP; } - -static struct shrinker lazy_rcu_shrinker = { - .count_objects = lazy_rcu_shrink_count, - .scan_objects = lazy_rcu_shrink_scan, - .batch = 0, - .seeks = DEFAULT_SEEKS, -}; #endif // #ifdef CONFIG_RCU_LAZY void __init rcu_init_nohz(void) @@ -1397,6 +1390,7 @@ void __init rcu_init_nohz(void) int cpu; struct rcu_data *rdp; const struct cpumask *cpumask = NULL; + struct shrinker * __maybe_unused lazy_rcu_shrinker; #if defined(CONFIG_NO_HZ_FULL) if (tick_nohz_full_running && !cpumask_empty(tick_nohz_full_mask)) @@ -1423,8 +1417,15 @@ void __init rcu_init_nohz(void) return; #ifdef CONFIG_RCU_LAZY - if (register_shrinker(&lazy_rcu_shrinker, "rcu-lazy")) - pr_err("Failed to register lazy_rcu shrinker!\n"); + lazy_rcu_shrinker = shrinker_alloc(0, "rcu-lazy"); + if (!lazy_rcu_shrinker) { + pr_err("Failed to allocate lazy_rcu shrinker!\n"); + } else { + lazy_rcu_shrinker->count_objects = lazy_rcu_shrink_count; + lazy_rcu_shrinker->scan_objects = lazy_rcu_shrink_scan; + + shrinker_register(lazy_rcu_shrinker); + } #endif // #ifdef CONFIG_RCU_LAZY if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) { From 5fbfa75b9fa6304c684f1a6ebe77b861a57e1a4f Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:15 +0800 Subject: [PATCH 17/50] rcu: dynamically allocate the rcu-kfree shrinker mainline inclusion from mainline-v6.7-rc1 category: performance Use new APIs to dynamically allocate the rcu-kfree shrinker. Link: https://lkml.kernel.org/r/20230911094444.68966-17-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Joel Fernandes (Google) Reviewed-by: Muchun Song Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Christian Brauner Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Muchun Song Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: "Theodore Ts'o" Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton (cherry picked from commit 21e0b932fb5d72bb6cefdf56e22bfddea968cf32) Signed-off-by: Wentao Guan --- kernel/rcu/tree.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index c9b72f4171e09..cf540eb2aa592 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3542,13 +3542,6 @@ kfree_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) return freed == 0 ? SHRINK_STOP : freed; } -static struct shrinker kfree_rcu_shrinker = { - .count_objects = kfree_rcu_shrink_count, - .scan_objects = kfree_rcu_shrink_scan, - .batch = 0, - .seeks = DEFAULT_SEEKS, -}; - void __init kfree_rcu_scheduler_running(void) { int cpu; @@ -5041,6 +5034,7 @@ static void __init kfree_rcu_batch_init(void) { int cpu; int i, j; + struct shrinker *kfree_rcu_shrinker; /* Clamp it to [0:100] seconds interval. */ if (rcu_delay_page_cache_fill_msec < 0 || @@ -5072,8 +5066,17 @@ static void __init kfree_rcu_batch_init(void) INIT_DELAYED_WORK(&krcp->page_cache_work, fill_page_cache_func); krcp->initialized = true; } - if (register_shrinker(&kfree_rcu_shrinker, "rcu-kfree")) - pr_err("Failed to register kfree_rcu() shrinker!\n"); + + kfree_rcu_shrinker = shrinker_alloc(0, "rcu-kfree"); + if (!kfree_rcu_shrinker) { + pr_err("Failed to allocate kfree_rcu() shrinker!\n"); + return; + } + + kfree_rcu_shrinker->count_objects = kfree_rcu_shrink_count; + kfree_rcu_shrinker->scan_objects = kfree_rcu_shrink_scan; + + shrinker_register(kfree_rcu_shrinker); } void __init rcu_init(void) From edd8fa9d76e9071f9aeecd7d99a0c065ae888c8d Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:16 +0800 Subject: [PATCH 18/50] mm: thp: dynamically allocate the thp-related shrinkers mainline inclusion from mainline-v6.7-rc1 category: performance Use new APIs to dynamically allocate the thp-zero and thp-deferred_split shrinkers. Link: https://lkml.kernel.org/r/20230911094444.68966-18-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Christian Brauner Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Muchun Song Cc: Muchun Song Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: "Theodore Ts'o" Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton (cherry picked from commit 54d917295b8366545e6ee940c93dffe1452e6480) Signed-off-by: Wentao Guan --- mm/huge_memory.c | 67 +++++++++++++++++++++++++++++++----------------- 1 file changed, 43 insertions(+), 24 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 54ef65277379a..fcc5644c8a6d4 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -66,7 +66,11 @@ unsigned long transparent_hugepage_flags __read_mostly = (1<count_objects = shrink_huge_zero_page_count; + huge_zero_page_shrinker->scan_objects = shrink_huge_zero_page_scan; + shrinker_register(huge_zero_page_shrinker); + + deferred_split_shrinker->count_objects = deferred_split_count; + deferred_split_shrinker->scan_objects = deferred_split_scan; + shrinker_register(deferred_split_shrinker); + + return 0; +} + +static void __init thp_shrinker_exit(void) +{ + shrinker_free(huge_zero_page_shrinker); + shrinker_free(deferred_split_shrinker); +} + static int __init hugepage_init(void) { int err; @@ -478,12 +510,9 @@ static int __init hugepage_init(void) if (err) goto err_slab; - err = register_shrinker(&huge_zero_page_shrinker, "thp-zero"); - if (err) - goto err_hzp_shrinker; - err = register_shrinker(&deferred_split_shrinker, "thp-deferred_split"); + err = thp_shrinker_init(); if (err) - goto err_split_shrinker; + goto err_shrinker; /* * By default disable transparent hugepages on smaller systems, @@ -501,10 +530,8 @@ static int __init hugepage_init(void) return 0; err_khugepaged: - unregister_shrinker(&deferred_split_shrinker); -err_split_shrinker: - unregister_shrinker(&huge_zero_page_shrinker); -err_hzp_shrinker: + thp_shrinker_exit(); +err_shrinker: khugepaged_destroy(); err_slab: hugepage_exit_sysfs(hugepage_kobj); @@ -2843,7 +2870,7 @@ void deferred_split_folio(struct folio *folio) #ifdef CONFIG_MEMCG if (memcg) set_shrinker_bit(memcg, folio_nid(folio), - deferred_split_shrinker.id); + deferred_split_shrinker->id); #endif } spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags); @@ -2917,14 +2944,6 @@ static unsigned long deferred_split_scan(struct shrinker *shrink, return split; } -static struct shrinker deferred_split_shrinker = { - .count_objects = deferred_split_count, - .scan_objects = deferred_split_scan, - .seeks = DEFAULT_SEEKS, - .flags = SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE | - SHRINKER_NONSLAB, -}; - #ifdef CONFIG_DEBUG_FS static void split_huge_pages_all(void) { From 09449effee73ce413cf9d1a299d9032b62e42c48 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:17 +0800 Subject: [PATCH 19/50] sunrpc: dynamically allocate the sunrpc_cred shrinker mainline inclusion from mainline-v6.7-rc1 category: performance Use new APIs to dynamically allocate the sunrpc_cred shrinker. Link: https://lkml.kernel.org/r/20230911094444.68966-19-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Cc: Jeff Layton Cc: Neil Brown Cc: Olga Kornievskaia Cc: Dai Ngo Cc: Tom Talpey Cc: Trond Myklebust Cc: Anna Schumaker Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Christian Brauner Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Daniel Vetter Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Muchun Song Cc: Nadav Amit Cc: Oleksandr Tyshchenko Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: "Theodore Ts'o" Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton (cherry picked from commit abe0c269f9820c637223ad88d77d174e8d85c58f) Signed-off-by: Wentao Guan --- net/sunrpc/auth.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 7204bdb018f7f..cc4eb575068e6 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -883,11 +883,7 @@ rpcauth_uptodatecred(struct rpc_task *task) test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0; } -static struct shrinker rpc_cred_shrinker = { - .count_objects = rpcauth_cache_shrink_count, - .scan_objects = rpcauth_cache_shrink_scan, - .seeks = DEFAULT_SEEKS, -}; +static struct shrinker *rpc_cred_shrinker; int __init rpcauth_init_module(void) { @@ -901,9 +897,17 @@ int __init rpcauth_init_module(void) err = rpc_init_authunix(); if (err < 0) goto out1; - err = register_shrinker(&rpc_cred_shrinker, "sunrpc_cred"); - if (err < 0) + rpc_cred_shrinker = shrinker_alloc(0, "sunrpc_cred"); + if (!rpc_cred_shrinker) { + err = -ENOMEM; goto out2; + } + + rpc_cred_shrinker->count_objects = rpcauth_cache_shrink_count; + rpc_cred_shrinker->scan_objects = rpcauth_cache_shrink_scan; + + shrinker_register(rpc_cred_shrinker); + return 0; out2: rpc_destroy_authunix(); @@ -917,5 +921,5 @@ void rpcauth_remove_module(void) abort_creds(machine_cred); #endif rpc_destroy_authunix(); - unregister_shrinker(&rpc_cred_shrinker); + shrinker_free(rpc_cred_shrinker); } From c9e70d071c7f43b317cc1fec1b53208a31953db9 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:18 +0800 Subject: [PATCH 20/50] mm: workingset: dynamically allocate the mm-shadow shrinker mainline inclusion from mainline-v6.7-rc1 category: performance Use new APIs to dynamically allocate the mm-shadow shrinker. Link: https://lkml.kernel.org/r/20230911094444.68966-20-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Acked-by: Muchun Song Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Christian Brauner Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: "Theodore Ts'o" Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton (cherry picked from commit 219c666eb2854bb9ca94d04a53ceb14aaf81cb28) Signed-off-by: Wentao Guan --- mm/workingset.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/mm/workingset.c b/mm/workingset.c index 1cdbc1fb58731..1aec494b7d24f 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -787,13 +787,6 @@ static unsigned long scan_shadow_nodes(struct shrinker *shrinker, NULL); } -static struct shrinker workingset_shadow_shrinker = { - .count_objects = count_shadow_nodes, - .scan_objects = scan_shadow_nodes, - .seeks = 0, /* ->count reports only fully expendable nodes */ - .flags = SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE, -}; - /* * Our list_lru->lock is IRQ-safe as it nests inside the IRQ-safe * i_pages lock. @@ -802,9 +795,10 @@ static struct lock_class_key shadow_nodes_key; static int __init workingset_init(void) { + struct shrinker *workingset_shadow_shrinker; unsigned int timestamp_bits; unsigned int max_order; - int ret; + int ret = -ENOMEM; BUILD_BUG_ON(BITS_PER_LONG < EVICTION_SHIFT); /* @@ -821,17 +815,26 @@ static int __init workingset_init(void) pr_info("workingset: timestamp_bits=%d max_order=%d bucket_order=%u\n", timestamp_bits, max_order, bucket_order); - ret = prealloc_shrinker(&workingset_shadow_shrinker, "mm-shadow"); - if (ret) + workingset_shadow_shrinker = shrinker_alloc(SHRINKER_NUMA_AWARE | + SHRINKER_MEMCG_AWARE, + "mm-shadow"); + if (!workingset_shadow_shrinker) goto err; + ret = __list_lru_init(&shadow_nodes, true, &shadow_nodes_key, - &workingset_shadow_shrinker); + workingset_shadow_shrinker); if (ret) goto err_list_lru; - register_shrinker_prepared(&workingset_shadow_shrinker); + + workingset_shadow_shrinker->count_objects = count_shadow_nodes; + workingset_shadow_shrinker->scan_objects = scan_shadow_nodes; + /* ->count reports only fully expendable nodes */ + workingset_shadow_shrinker->seeks = 0; + + shrinker_register(workingset_shadow_shrinker); return 0; err_list_lru: - free_prealloced_shrinker(&workingset_shadow_shrinker); + shrinker_free(workingset_shadow_shrinker); err: return ret; } From 2e344e6186779598c7a22c1b086ea840fc8128cd Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:19 +0800 Subject: [PATCH 21/50] drm/i915: dynamically allocate the i915_gem_mm shrinker mainline inclusion from mainline-v6.7-rc1 category: performance In preparation for implementing lockless slab shrink, use new APIs to dynamically allocate the i915_gem_mm shrinker, so that it can be freed asynchronously via RCU. Then it doesn't need to wait for RCU read-side critical section when releasing the struct drm_i915_private. Link: https://lkml.kernel.org/r/20230911094444.68966-21-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Acked-by: Daniel Vetter Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: Tvrtko Ursulin Cc: David Airlie Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Christian Brauner Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Muchun Song Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: "Theodore Ts'o" Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton (cherry picked from commit 583cc9e41095292e2ebf33c977d8ba1e64308892) Signed-off-by: Wentao Guan --- drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 29 +++++++++++--------- drivers/gpu/drm/i915/i915_drv.h | 2 +- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index 9cb7bbfb42782..d166052eb2ce3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -288,8 +288,7 @@ unsigned long i915_gem_shrink_all(struct drm_i915_private *i915) static unsigned long i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) { - struct drm_i915_private *i915 = - container_of(shrinker, struct drm_i915_private, mm.shrinker); + struct drm_i915_private *i915 = shrinker->private_data; unsigned long num_objects; unsigned long count; @@ -306,8 +305,8 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) if (num_objects) { unsigned long avg = 2 * count / num_objects; - i915->mm.shrinker.batch = - max((i915->mm.shrinker.batch + avg) >> 1, + i915->mm.shrinker->batch = + max((i915->mm.shrinker->batch + avg) >> 1, 128ul /* default SHRINK_BATCH */); } @@ -317,8 +316,7 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) static unsigned long i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) { - struct drm_i915_private *i915 = - container_of(shrinker, struct drm_i915_private, mm.shrinker); + struct drm_i915_private *i915 = shrinker->private_data; unsigned long freed; sc->nr_scanned = 0; @@ -430,12 +428,17 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr void i915_gem_driver_register__shrinker(struct drm_i915_private *i915) { - i915->mm.shrinker.scan_objects = i915_gem_shrinker_scan; - i915->mm.shrinker.count_objects = i915_gem_shrinker_count; - i915->mm.shrinker.seeks = DEFAULT_SEEKS; - i915->mm.shrinker.batch = 4096; - drm_WARN_ON(&i915->drm, register_shrinker(&i915->mm.shrinker, - "drm-i915_gem")); + i915->mm.shrinker = shrinker_alloc(0, "drm-i915_gem"); + if (!i915->mm.shrinker) { + drm_WARN_ON(&i915->drm, 1); + } else { + i915->mm.shrinker->scan_objects = i915_gem_shrinker_scan; + i915->mm.shrinker->count_objects = i915_gem_shrinker_count; + i915->mm.shrinker->batch = 4096; + i915->mm.shrinker->private_data = i915; + + shrinker_register(i915->mm.shrinker); + } i915->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom; drm_WARN_ON(&i915->drm, register_oom_notifier(&i915->mm.oom_notifier)); @@ -451,7 +454,7 @@ void i915_gem_driver_unregister__shrinker(struct drm_i915_private *i915) unregister_vmap_purge_notifier(&i915->mm.vmap_notifier)); drm_WARN_ON(&i915->drm, unregister_oom_notifier(&i915->mm.oom_notifier)); - unregister_shrinker(&i915->mm.shrinker); + shrinker_free(i915->mm.shrinker); } void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f4ad8885a9cf2..4ec4240d18d8d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -163,7 +163,7 @@ struct i915_gem_mm { struct notifier_block oom_notifier; struct notifier_block vmap_notifier; - struct shrinker shrinker; + struct shrinker *shrinker; #ifdef CONFIG_MMU_NOTIFIER /** From 7af10100f23f1560acac130a75ae516228d46156 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:20 +0800 Subject: [PATCH 22/50] drm/msm: dynamically allocate the drm-msm_gem shrinker mainline inclusion from mainline-v6.7-rc1 category: performance In preparation for implementing lockless slab shrink, use new APIs to dynamically allocate the drm-msm_gem shrinker, so that it can be freed asynchronously via RCU. Then it doesn't need to wait for RCU read-side critical section when releasing the struct msm_drm_private. Link: https://lkml.kernel.org/r/20230911094444.68966-22-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Acked-by: Daniel Vetter Cc: Rob Clark Cc: Abhinav Kumar Cc: Dmitry Baryshkov Cc: Sean Paul Cc: Marijn Suijten Cc: David Airlie Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Christian Brauner Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: David Hildenbrand Cc: David Sterba Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Muchun Song Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: "Theodore Ts'o" Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton (cherry picked from commit cd61a76c210afd324c345ff62d950e2ff7947f11) Signed-off-by: Wentao Guan --- drivers/gpu/drm/msm/msm_drv.c | 4 +++- drivers/gpu/drm/msm/msm_drv.h | 4 ++-- drivers/gpu/drm/msm/msm_gem_shrinker.c | 33 ++++++++++++++++---------- 3 files changed, 25 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 4bd028fa75006..7f20249d60715 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -462,7 +462,9 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv) if (ret) goto err_msm_uninit; - msm_gem_shrinker_init(ddev); + ret = msm_gem_shrinker_init(ddev); + if (ret) + goto err_msm_uninit; if (priv->kms_init) { ret = priv->kms_init(ddev); diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index 223bf904235a8..c3c0aef6a5cbc 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -209,7 +209,7 @@ struct msm_drm_private { } vram; struct notifier_block vmap_notifier; - struct shrinker shrinker; + struct shrinker *shrinker; struct drm_atomic_state *pm_state; @@ -271,7 +271,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, unsigned long msm_gem_shrinker_shrink(struct drm_device *dev, unsigned long nr_to_scan); #endif -void msm_gem_shrinker_init(struct drm_device *dev); +int msm_gem_shrinker_init(struct drm_device *dev); void msm_gem_shrinker_cleanup(struct drm_device *dev); struct sg_table *msm_gem_prime_get_sg_table(struct drm_gem_object *obj); diff --git a/drivers/gpu/drm/msm/msm_gem_shrinker.c b/drivers/gpu/drm/msm/msm_gem_shrinker.c index 5d5cf51cde65a..89c0ea8ddeaac 100644 --- a/drivers/gpu/drm/msm/msm_gem_shrinker.c +++ b/drivers/gpu/drm/msm/msm_gem_shrinker.c @@ -33,8 +33,7 @@ static bool can_block(struct shrink_control *sc) static unsigned long msm_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) { - struct msm_drm_private *priv = - container_of(shrinker, struct msm_drm_private, shrinker); + struct msm_drm_private *priv = shrinker->private_data; unsigned count = priv->lru.dontneed.count; if (can_swap()) @@ -99,8 +98,7 @@ active_evict(struct drm_gem_object *obj) static unsigned long msm_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) { - struct msm_drm_private *priv = - container_of(shrinker, struct msm_drm_private, shrinker); + struct msm_drm_private *priv = shrinker->private_data; struct { struct drm_gem_lru *lru; bool (*shrink)(struct drm_gem_object *obj); @@ -147,10 +145,11 @@ msm_gem_shrinker_shrink(struct drm_device *dev, unsigned long nr_to_scan) struct shrink_control sc = { .nr_to_scan = nr_to_scan, }; - int ret; + unsigned long ret = SHRINK_STOP; fs_reclaim_acquire(GFP_KERNEL); - ret = msm_gem_shrinker_scan(&priv->shrinker, &sc); + if (priv->shrinker) + ret = msm_gem_shrinker_scan(priv->shrinker, &sc); fs_reclaim_release(GFP_KERNEL); return ret; @@ -209,16 +208,24 @@ msm_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr) * * This function registers and sets up the msm shrinker. */ -void msm_gem_shrinker_init(struct drm_device *dev) +int msm_gem_shrinker_init(struct drm_device *dev) { struct msm_drm_private *priv = dev->dev_private; - priv->shrinker.count_objects = msm_gem_shrinker_count; - priv->shrinker.scan_objects = msm_gem_shrinker_scan; - priv->shrinker.seeks = DEFAULT_SEEKS; - WARN_ON(register_shrinker(&priv->shrinker, "drm-msm_gem")); + + priv->shrinker = shrinker_alloc(0, "drm-msm_gem"); + if (!priv->shrinker) + return -ENOMEM; + + priv->shrinker->count_objects = msm_gem_shrinker_count; + priv->shrinker->scan_objects = msm_gem_shrinker_scan; + priv->shrinker->private_data = priv; + + shrinker_register(priv->shrinker); priv->vmap_notifier.notifier_call = msm_gem_shrinker_vmap; WARN_ON(register_vmap_purge_notifier(&priv->vmap_notifier)); + + return 0; } /** @@ -231,8 +238,8 @@ void msm_gem_shrinker_cleanup(struct drm_device *dev) { struct msm_drm_private *priv = dev->dev_private; - if (priv->shrinker.nr_deferred) { + if (priv->shrinker) { WARN_ON(unregister_vmap_purge_notifier(&priv->vmap_notifier)); - unregister_shrinker(&priv->shrinker); + shrinker_free(priv->shrinker); } } From f18fcc622d7479e9497c71893fed4ae24692adeb Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:21 +0800 Subject: [PATCH 23/50] drm/panfrost: dynamically allocate the drm-panfrost shrinker mainline inclusion from mainline-v6.7-rc1 category: performance In preparation for implementing lockless slab shrink, use new APIs to dynamically allocate the drm-panfrost shrinker, so that it can be freed asynchronously via RCU. Then it doesn't need to wait for RCU read-side critical section when releasing the struct panfrost_device. Link: https://lkml.kernel.org/r/20230911094444.68966-23-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Steven Price Acked-by: Daniel Vetter Cc: Rob Herring Cc: Tomeu Vizoso Cc: Alyssa Rosenzweig Cc: David Airlie Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Christian Brauner Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Muchun Song Cc: Muchun Song Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: "Theodore Ts'o" Cc: Thomas Gleixner Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton (cherry picked from commit e11c4f3acbbb776992425867df894b8c0f131f50) Signed-off-by: Wentao Guan --- drivers/gpu/drm/panfrost/panfrost_device.h | 2 +- drivers/gpu/drm/panfrost/panfrost_drv.c | 6 +++- drivers/gpu/drm/panfrost/panfrost_gem.h | 2 +- .../gpu/drm/panfrost/panfrost_gem_shrinker.c | 29 +++++++++++-------- 4 files changed, 24 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h index b0126b9fbadca..e667e56893536 100644 --- a/drivers/gpu/drm/panfrost/panfrost_device.h +++ b/drivers/gpu/drm/panfrost/panfrost_device.h @@ -118,7 +118,7 @@ struct panfrost_device { struct mutex shrinker_lock; struct list_head shrinker_list; - struct shrinker shrinker; + struct shrinker *shrinker; struct panfrost_devfreq pfdevfreq; }; diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index 0c30310da50e2..16480ad296b94 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -603,10 +603,14 @@ static int panfrost_probe(struct platform_device *pdev) if (err < 0) goto err_out1; - panfrost_gem_shrinker_init(ddev); + err = panfrost_gem_shrinker_init(ddev); + if (err) + goto err_out2; return 0; +err_out2: + drm_dev_unregister(ddev); err_out1: pm_runtime_disable(pfdev->dev); panfrost_device_fini(pfdev); diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h b/drivers/gpu/drm/panfrost/panfrost_gem.h index ad2877eeeccdf..863d2ec8d4f01 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem.h +++ b/drivers/gpu/drm/panfrost/panfrost_gem.h @@ -81,7 +81,7 @@ panfrost_gem_mapping_get(struct panfrost_gem_object *bo, void panfrost_gem_mapping_put(struct panfrost_gem_mapping *mapping); void panfrost_gem_teardown_mappings_locked(struct panfrost_gem_object *bo); -void panfrost_gem_shrinker_init(struct drm_device *dev); +int panfrost_gem_shrinker_init(struct drm_device *dev); void panfrost_gem_shrinker_cleanup(struct drm_device *dev); #endif /* __PANFROST_GEM_H__ */ diff --git a/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c b/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c index 6a71a2555f85c..3d9f51bd48b61 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c +++ b/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c @@ -18,8 +18,7 @@ static unsigned long panfrost_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) { - struct panfrost_device *pfdev = - container_of(shrinker, struct panfrost_device, shrinker); + struct panfrost_device *pfdev = shrinker->private_data; struct drm_gem_shmem_object *shmem; unsigned long count = 0; @@ -65,8 +64,7 @@ static bool panfrost_gem_purge(struct drm_gem_object *obj) static unsigned long panfrost_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) { - struct panfrost_device *pfdev = - container_of(shrinker, struct panfrost_device, shrinker); + struct panfrost_device *pfdev = shrinker->private_data; struct drm_gem_shmem_object *shmem, *tmp; unsigned long freed = 0; @@ -97,13 +95,21 @@ panfrost_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) * * This function registers and sets up the panfrost shrinker. */ -void panfrost_gem_shrinker_init(struct drm_device *dev) +int panfrost_gem_shrinker_init(struct drm_device *dev) { struct panfrost_device *pfdev = dev->dev_private; - pfdev->shrinker.count_objects = panfrost_gem_shrinker_count; - pfdev->shrinker.scan_objects = panfrost_gem_shrinker_scan; - pfdev->shrinker.seeks = DEFAULT_SEEKS; - WARN_ON(register_shrinker(&pfdev->shrinker, "drm-panfrost")); + + pfdev->shrinker = shrinker_alloc(0, "drm-panfrost"); + if (!pfdev->shrinker) + return -ENOMEM; + + pfdev->shrinker->count_objects = panfrost_gem_shrinker_count; + pfdev->shrinker->scan_objects = panfrost_gem_shrinker_scan; + pfdev->shrinker->private_data = pfdev; + + shrinker_register(pfdev->shrinker); + + return 0; } /** @@ -116,7 +122,6 @@ void panfrost_gem_shrinker_cleanup(struct drm_device *dev) { struct panfrost_device *pfdev = dev->dev_private; - if (pfdev->shrinker.nr_deferred) { - unregister_shrinker(&pfdev->shrinker); - } + if (pfdev->shrinker) + shrinker_free(pfdev->shrinker); } From a9871479ae100215328be0d6bf470d1eccb36925 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:22 +0800 Subject: [PATCH 24/50] dm: dynamically allocate the dm-bufio shrinker mainline inclusion from mainline-v6.7-rc1 category: performance In preparation for implementing lockless slab shrink, use new APIs to dynamically allocate the dm-bufio shrinker, so that it can be freed asynchronously via RCU. Then it doesn't need to wait for RCU read-side critical section when releasing the struct dm_bufio_client. Link: https://lkml.kernel.org/r/20230911094444.68966-24-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Cc: Alasdair Kergon Cc: Mike Snitzer Cc: Abhinav Kumar Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Christian Brauner Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Minchan Kim Cc: Muchun Song Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: "Theodore Ts'o" Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton (cherry picked from commit 1f1d459c9a2f0c8618b137fe845c09e405ae59fc) Signed-off-by: Wentao Guan --- drivers/md/dm-bufio.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 24d5ade3462d7..ded85d8d4c9eb 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -1003,7 +1003,7 @@ struct dm_bufio_client { sector_t start; - struct shrinker shrinker; + struct shrinker *shrinker; struct work_struct shrink_work; atomic_long_t need_shrink; @@ -2414,7 +2414,7 @@ static unsigned long dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink { struct dm_bufio_client *c; - c = container_of(shrink, struct dm_bufio_client, shrinker); + c = shrink->private_data; atomic_long_add(sc->nr_to_scan, &c->need_shrink); queue_work(dm_bufio_wq, &c->shrink_work); @@ -2423,7 +2423,7 @@ static unsigned long dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink static unsigned long dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc) { - struct dm_bufio_client *c = container_of(shrink, struct dm_bufio_client, shrinker); + struct dm_bufio_client *c = shrink->private_data; unsigned long count = cache_total(&c->cache); unsigned long retain_target = get_retain_buffers(c); unsigned long queued_for_cleanup = atomic_long_read(&c->need_shrink); @@ -2540,14 +2540,20 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign INIT_WORK(&c->shrink_work, shrink_work); atomic_long_set(&c->need_shrink, 0); - c->shrinker.count_objects = dm_bufio_shrink_count; - c->shrinker.scan_objects = dm_bufio_shrink_scan; - c->shrinker.seeks = 1; - c->shrinker.batch = 0; - r = register_shrinker(&c->shrinker, "dm-bufio:(%u:%u)", - MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev)); - if (r) + c->shrinker = shrinker_alloc(0, "dm-bufio:(%u:%u)", + MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev)); + if (!c->shrinker) { + r = -ENOMEM; goto bad; + } + + c->shrinker->count_objects = dm_bufio_shrink_count; + c->shrinker->scan_objects = dm_bufio_shrink_scan; + c->shrinker->seeks = 1; + c->shrinker->batch = 0; + c->shrinker->private_data = c; + + shrinker_register(c->shrinker); mutex_lock(&dm_bufio_clients_lock); dm_bufio_client_count++; @@ -2587,7 +2593,7 @@ void dm_bufio_client_destroy(struct dm_bufio_client *c) drop_buffers(c); - unregister_shrinker(&c->shrinker); + shrinker_free(c->shrinker); flush_work(&c->shrink_work); mutex_lock(&dm_bufio_clients_lock); From f204a3c09d061e6eace107b740a0f1407688f0d5 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:23 +0800 Subject: [PATCH 25/50] dm zoned: dynamically allocate the dm-zoned-meta shrinker mainline inclusion from mainline-v6.7-rc1 category: performance In preparation for implementing lockless slab shrink, use new APIs to dynamically allocate the dm-zoned-meta shrinker, so that it can be freed asynchronously via RCU. Then it doesn't need to wait for RCU read-side critical section when releasing the struct dmz_metadata. Link: https://lkml.kernel.org/r/20230911094444.68966-25-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Cc: Alasdair Kergon Cc: Mike Snitzer Cc: Abhinav Kumar Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Christian Brauner Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Minchan Kim Cc: Muchun Song Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: "Theodore Ts'o" Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton (cherry picked from commit ba3d6acafd26416c3001adb4cc3cedcd6a419c73) Signed-off-by: Wentao Guan --- drivers/md/dm-zoned-metadata.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index 9d3cca8e3dc9e..60a4dc01ea187 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -187,7 +187,7 @@ struct dmz_metadata { struct rb_root mblk_rbtree; struct list_head mblk_lru_list; struct list_head mblk_dirty_list; - struct shrinker mblk_shrinker; + struct shrinker *mblk_shrinker; /* Zone allocation management */ struct mutex map_lock; @@ -615,7 +615,7 @@ static unsigned long dmz_shrink_mblock_cache(struct dmz_metadata *zmd, static unsigned long dmz_mblock_shrinker_count(struct shrinker *shrink, struct shrink_control *sc) { - struct dmz_metadata *zmd = container_of(shrink, struct dmz_metadata, mblk_shrinker); + struct dmz_metadata *zmd = shrink->private_data; return atomic_read(&zmd->nr_mblks); } @@ -626,7 +626,7 @@ static unsigned long dmz_mblock_shrinker_count(struct shrinker *shrink, static unsigned long dmz_mblock_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc) { - struct dmz_metadata *zmd = container_of(shrink, struct dmz_metadata, mblk_shrinker); + struct dmz_metadata *zmd = shrink->private_data; unsigned long count; spin_lock(&zmd->mblk_lock); @@ -2936,19 +2936,23 @@ int dmz_ctr_metadata(struct dmz_dev *dev, int num_dev, */ zmd->min_nr_mblks = 2 + zmd->nr_map_blocks + zmd->zone_nr_bitmap_blocks * 16; zmd->max_nr_mblks = zmd->min_nr_mblks + 512; - zmd->mblk_shrinker.count_objects = dmz_mblock_shrinker_count; - zmd->mblk_shrinker.scan_objects = dmz_mblock_shrinker_scan; - zmd->mblk_shrinker.seeks = DEFAULT_SEEKS; /* Metadata cache shrinker */ - ret = register_shrinker(&zmd->mblk_shrinker, "dm-zoned-meta:(%u:%u)", - MAJOR(dev->bdev->bd_dev), - MINOR(dev->bdev->bd_dev)); - if (ret) { - dmz_zmd_err(zmd, "Register metadata cache shrinker failed"); + zmd->mblk_shrinker = shrinker_alloc(0, "dm-zoned-meta:(%u:%u)", + MAJOR(dev->bdev->bd_dev), + MINOR(dev->bdev->bd_dev)); + if (!zmd->mblk_shrinker) { + ret = -ENOMEM; + dmz_zmd_err(zmd, "Allocate metadata cache shrinker failed"); goto err; } + zmd->mblk_shrinker->count_objects = dmz_mblock_shrinker_count; + zmd->mblk_shrinker->scan_objects = dmz_mblock_shrinker_scan; + zmd->mblk_shrinker->private_data = zmd; + + shrinker_register(zmd->mblk_shrinker); + dmz_zmd_info(zmd, "DM-Zoned metadata version %d", zmd->sb_version); for (i = 0; i < zmd->nr_devs; i++) dmz_print_dev(zmd, i); @@ -2995,7 +2999,7 @@ int dmz_ctr_metadata(struct dmz_dev *dev, int num_dev, */ void dmz_dtr_metadata(struct dmz_metadata *zmd) { - unregister_shrinker(&zmd->mblk_shrinker); + shrinker_free(zmd->mblk_shrinker); dmz_cleanup_metadata(zmd); kfree(zmd); } From d6a6158a53305f49f5c113e1428a079068aa16e6 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:24 +0800 Subject: [PATCH 26/50] md/raid5: dynamically allocate the md-raid5 shrinker mainline inclusion from mainline-v6.7-rc1 category: performance In preparation for implementing lockless slab shrink, use new APIs to dynamically allocate the md-raid5 shrinker, so that it can be freed asynchronously via RCU. Then it doesn't need to wait for RCU read-side critical section when releasing the struct r5conf. Link: https://lkml.kernel.org/r/20230911094444.68966-26-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Reviewed-by: Song Liu Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Christian Brauner Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Muchun Song Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Stefano Stabellini Cc: Steven Price Cc: "Theodore Ts'o" Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton Conflicts: drivers/md/raid5.c (cherry picked from commit 86298d8b8ceacc17d0192cd6412d2773ff51b27f) Signed-off-by: Wentao Guan --- drivers/md/raid5.c | 26 +++++++++++++++----------- drivers/md/raid5.h | 2 +- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 6eb94e466f904..7f4bb4656ce88 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -7430,7 +7430,7 @@ static void free_conf(struct r5conf *conf) log_exit(conf); - unregister_shrinker(&conf->shrinker); + shrinker_free(conf->shrinker); free_thread_groups(conf); shrink_stripes(conf); raid5_free_percpu(conf); @@ -7478,7 +7478,7 @@ static int raid5_alloc_percpu(struct r5conf *conf) static unsigned long raid5_cache_scan(struct shrinker *shrink, struct shrink_control *sc) { - struct r5conf *conf = container_of(shrink, struct r5conf, shrinker); + struct r5conf *conf = shrink->private_data; unsigned long ret = SHRINK_STOP; if (mutex_trylock(&conf->cache_size_mutex)) { @@ -7499,7 +7499,7 @@ static unsigned long raid5_cache_scan(struct shrinker *shrink, static unsigned long raid5_cache_count(struct shrinker *shrink, struct shrink_control *sc) { - struct r5conf *conf = container_of(shrink, struct r5conf, shrinker); + struct r5conf *conf = shrink->private_data; int max_stripes = READ_ONCE(conf->max_nr_stripes); int min_stripes = READ_ONCE(conf->min_nr_stripes); @@ -7736,18 +7736,22 @@ static struct r5conf *setup_conf(struct mddev *mddev) * it reduces the queue depth and so can hurt throughput. * So set it rather large, scaled by number of devices. */ - conf->shrinker.seeks = DEFAULT_SEEKS * conf->raid_disks * 4; - conf->shrinker.scan_objects = raid5_cache_scan; - conf->shrinker.count_objects = raid5_cache_count; - conf->shrinker.batch = 128; - conf->shrinker.flags = 0; - ret = register_shrinker(&conf->shrinker, "md-raid5:%s", mdname(mddev)); - if (ret) { - pr_warn("md/raid:%s: couldn't register shrinker.\n", + conf->shrinker = shrinker_alloc(0, "md-raid5:%s", mdname(mddev)); + if (!conf->shrinker) { + ret = -ENOMEM; + pr_warn("md/raid:%s: couldn't allocate shrinker.\n", mdname(mddev)); goto abort; } + conf->shrinker->seeks = DEFAULT_SEEKS * conf->raid_disks * 4; + conf->shrinker->scan_objects = raid5_cache_scan; + conf->shrinker->count_objects = raid5_cache_count; + conf->shrinker->batch = 128; + conf->shrinker->private_data = conf; + + shrinker_register(conf->shrinker); + sprintf(pers_name, "raid%d", mddev->new_level); rcu_assign_pointer(conf->thread, md_register_thread(raid5d, mddev, pers_name)); diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index fd61715538800..f796b23c2b1be 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -666,7 +666,7 @@ struct r5conf { wait_queue_head_t wait_for_stripe; wait_queue_head_t wait_for_overlap; unsigned long cache_state; - struct shrinker shrinker; + struct shrinker *shrinker; int pool_size; /* number of disks in stripeheads in pool */ spinlock_t device_lock; struct disk_info *disks; From f425de13618d778dc932eaf84ae01c3be532d23b Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:25 +0800 Subject: [PATCH 27/50] bcache: dynamically allocate the md-bcache shrinker mainline inclusion from mainline-v6.7-rc1 category: performance In preparation for implementing lockless slab shrink, use new APIs to dynamically allocate the md-bcache shrinker, so that it can be freed asynchronously via RCU. Then it doesn't need to wait for RCU read-side critical section when releasing the struct cache_set. Link: https://lkml.kernel.org/r/20230911094444.68966-27-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Acked-by: Muchun Song Cc: Coly Li Cc: Kent Overstreet Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Christian Brauner Cc: Christian Koenig Cc: Chuck Lever Cc: Dai Ngo Cc: Daniel Vetter Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: "Theodore Ts'o" Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton (cherry picked from commit a6a1eb6214cf7adfd1e2da33d819df17fe8a64d7) Signed-off-by: Wentao Guan --- drivers/md/bcache/bcache.h | 2 +- drivers/md/bcache/btree.c | 27 ++++++++++++++++----------- drivers/md/bcache/sysfs.c | 3 ++- 3 files changed, 19 insertions(+), 13 deletions(-) diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 83eb7f27db3d4..38f2cb259321e 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -542,7 +542,7 @@ struct cache_set { struct bio_set bio_split; /* For the btree cache */ - struct shrinker shrink; + struct shrinker *shrink; /* For the btree cache and anything allocation related */ struct mutex bucket_lock; diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 30d6973de258b..5f8e10e84f3c2 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -667,7 +667,7 @@ static int mca_reap(struct btree *b, unsigned int min_order, bool flush) static unsigned long bch_mca_scan(struct shrinker *shrink, struct shrink_control *sc) { - struct cache_set *c = container_of(shrink, struct cache_set, shrink); + struct cache_set *c = shrink->private_data; struct btree *b, *t; unsigned long i, nr = sc->nr_to_scan; unsigned long freed = 0; @@ -734,7 +734,7 @@ static unsigned long bch_mca_scan(struct shrinker *shrink, static unsigned long bch_mca_count(struct shrinker *shrink, struct shrink_control *sc) { - struct cache_set *c = container_of(shrink, struct cache_set, shrink); + struct cache_set *c = shrink->private_data; if (c->shrinker_disabled) return 0; @@ -752,8 +752,8 @@ void bch_btree_cache_free(struct cache_set *c) closure_init_stack(&cl); - if (c->shrink.list.next) - unregister_shrinker(&c->shrink); + if (c->shrink) + shrinker_free(c->shrink); mutex_lock(&c->bucket_lock); @@ -828,14 +828,19 @@ int bch_btree_cache_alloc(struct cache_set *c) c->verify_data = NULL; #endif - c->shrink.count_objects = bch_mca_count; - c->shrink.scan_objects = bch_mca_scan; - c->shrink.seeks = 4; - c->shrink.batch = c->btree_pages * 2; + c->shrink = shrinker_alloc(0, "md-bcache:%pU", c->set_uuid); + if (!c->shrink) { + pr_warn("bcache: %s: could not allocate shrinker\n", __func__); + return 0; + } + + c->shrink->count_objects = bch_mca_count; + c->shrink->scan_objects = bch_mca_scan; + c->shrink->seeks = 4; + c->shrink->batch = c->btree_pages * 2; + c->shrink->private_data = c; - if (register_shrinker(&c->shrink, "md-bcache:%pU", c->set_uuid)) - pr_warn("bcache: %s: could not register shrinker\n", - __func__); + shrinker_register(c->shrink); return 0; } diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index b3a34f3ac081c..c4633425acc4b 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -866,7 +866,8 @@ STORE(__bch_cache_set) sc.gfp_mask = GFP_KERNEL; sc.nr_to_scan = strtoul_or_return(buf); - c->shrink.scan_objects(&c->shrink, &sc); + if (c->shrink) + c->shrink->scan_objects(c->shrink, &sc); } sysfs_strtoul_clamp(congested_read_threshold_us, From 1d461c87a3806858a7c5d418b973ac35dafd26c0 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:26 +0800 Subject: [PATCH 28/50] vmw_balloon: dynamically allocate the vmw-balloon shrinker mainline inclusion from mainline-v6.7-rc1 category: performance In preparation for implementing lockless slab shrink, use new APIs to dynamically allocate the vmw-balloon shrinker, so that it can be freed asynchronously via RCU. Then it doesn't need to wait for RCU read-side critical section when releasing the struct vmballoon. And we can simply exit vmballoon_init() when registering the shrinker fails. So the shrinker_registered indication is redundant, just remove it. Link: https://lkml.kernel.org/r/20230911094444.68966-28-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Acked-by: Nadav Amit Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Christian Brauner Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Muchun Song Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: "Theodore Ts'o" Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton (cherry picked from commit 17c4eb036a37f079ce2857be5914f839623b4eb2) Signed-off-by: Wentao Guan --- drivers/misc/vmw_balloon.c | 38 ++++++++++++-------------------------- 1 file changed, 12 insertions(+), 26 deletions(-) diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c index ad41e9af8102d..6653fc53c951c 100644 --- a/drivers/misc/vmw_balloon.c +++ b/drivers/misc/vmw_balloon.c @@ -380,16 +380,7 @@ struct vmballoon { /** * @shrinker: shrinker interface that is used to avoid over-inflation. */ - struct shrinker shrinker; - - /** - * @shrinker_registered: whether the shrinker was registered. - * - * The shrinker interface does not handle gracefully the removal of - * shrinker that was not registered before. This indication allows to - * simplify the unregistration process. - */ - bool shrinker_registered; + struct shrinker *shrinker; }; static struct vmballoon balloon; @@ -1568,29 +1559,27 @@ static unsigned long vmballoon_shrinker_count(struct shrinker *shrinker, static void vmballoon_unregister_shrinker(struct vmballoon *b) { - if (b->shrinker_registered) - unregister_shrinker(&b->shrinker); - b->shrinker_registered = false; + shrinker_free(b->shrinker); + b->shrinker = NULL; } static int vmballoon_register_shrinker(struct vmballoon *b) { - int r; - /* Do nothing if the shrinker is not enabled */ if (!vmwballoon_shrinker_enable) return 0; - b->shrinker.scan_objects = vmballoon_shrinker_scan; - b->shrinker.count_objects = vmballoon_shrinker_count; - b->shrinker.seeks = DEFAULT_SEEKS; + b->shrinker = shrinker_alloc(0, "vmw-balloon"); + if (!b->shrinker) + return -ENOMEM; - r = register_shrinker(&b->shrinker, "vmw-balloon"); + b->shrinker->scan_objects = vmballoon_shrinker_scan; + b->shrinker->count_objects = vmballoon_shrinker_count; + b->shrinker->private_data = b; - if (r == 0) - b->shrinker_registered = true; + shrinker_register(b->shrinker); - return r; + return 0; } /* @@ -1882,7 +1871,7 @@ static int __init vmballoon_init(void) error = vmballoon_register_shrinker(&balloon); if (error) - goto fail; + return error; /* * Initialization of compaction must be done after the call to @@ -1904,9 +1893,6 @@ static int __init vmballoon_init(void) vmballoon_debugfs_init(&balloon); return 0; -fail: - vmballoon_unregister_shrinker(&balloon); - return error; } /* From ecc73c366c10c21aca0a356d9e80cf04363536be Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:27 +0800 Subject: [PATCH 29/50] virtio_balloon: dynamically allocate the virtio-balloon shrinker mainline inclusion from mainline-v6.7-rc1 category: performance In preparation for implementing lockless slab shrink, use new APIs to dynamically allocate the virtio-balloon shrinker, so that it can be freed asynchronously via RCU. Then it doesn't need to wait for RCU read-side critical section when releasing the struct virtio_balloon. Link: https://lkml.kernel.org/r/20230911094444.68966-29-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Cc: "Michael S. Tsirkin" Cc: David Hildenbrand Cc: Jason Wang Cc: Xuan Zhuo Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Christian Brauner Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: David Airlie Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jeff Layton Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: Mike Snitzer Cc: Minchan Kim Cc: Muchun Song Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: "Theodore Ts'o" Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Yue Hu Signed-off-by: Andrew Morton (cherry picked from commit 0fbb99698b165444e3cf5421f96de01d9d56baf4) Signed-off-by: Wentao Guan --- drivers/virtio/virtio_balloon.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 6cb7ccb21b715..562972cec9225 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -111,7 +111,7 @@ struct virtio_balloon { struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR]; /* Shrinker to return free pages - VIRTIO_BALLOON_F_FREE_PAGE_HINT */ - struct shrinker shrinker; + struct shrinker *shrinker; /* OOM notifier to deflate on OOM - VIRTIO_BALLOON_F_DEFLATE_ON_OOM */ struct notifier_block oom_nb; @@ -818,8 +818,7 @@ static unsigned long shrink_free_pages(struct virtio_balloon *vb, static unsigned long virtio_balloon_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) { - struct virtio_balloon *vb = container_of(shrinker, - struct virtio_balloon, shrinker); + struct virtio_balloon *vb = shrinker->private_data; return shrink_free_pages(vb, sc->nr_to_scan); } @@ -827,8 +826,7 @@ static unsigned long virtio_balloon_shrinker_scan(struct shrinker *shrinker, static unsigned long virtio_balloon_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) { - struct virtio_balloon *vb = container_of(shrinker, - struct virtio_balloon, shrinker); + struct virtio_balloon *vb = shrinker->private_data; return vb->num_free_page_blocks * VIRTIO_BALLOON_HINT_BLOCK_PAGES; } @@ -849,16 +847,22 @@ static int virtio_balloon_oom_notify(struct notifier_block *nb, static void virtio_balloon_unregister_shrinker(struct virtio_balloon *vb) { - unregister_shrinker(&vb->shrinker); + shrinker_free(vb->shrinker); } static int virtio_balloon_register_shrinker(struct virtio_balloon *vb) { - vb->shrinker.scan_objects = virtio_balloon_shrinker_scan; - vb->shrinker.count_objects = virtio_balloon_shrinker_count; - vb->shrinker.seeks = DEFAULT_SEEKS; + vb->shrinker = shrinker_alloc(0, "virtio-balloon"); + if (!vb->shrinker) + return -ENOMEM; - return register_shrinker(&vb->shrinker, "virtio-balloon"); + vb->shrinker->scan_objects = virtio_balloon_shrinker_scan; + vb->shrinker->count_objects = virtio_balloon_shrinker_count; + vb->shrinker->private_data = vb; + + shrinker_register(vb->shrinker); + + return 0; } static int virtballoon_probe(struct virtio_device *vdev) From 40b8816190ec4e9a5d66e078cfc4477beb3f8e7d Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:28 +0800 Subject: [PATCH 30/50] mbcache: dynamically allocate the mbcache shrinker mainline inclusion from mainline-v6.7-rc1 category: performance In preparation for implementing lockless slab shrink, use new APIs to dynamically allocate the mbcache shrinker, so that it can be freed asynchronously via RCU. Then it doesn't need to wait for RCU read-side critical section when releasing the struct mb_cache. Link: https://lkml.kernel.org/r/20230911094444.68966-30-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Cc: Alexander Viro Cc: Christian Brauner Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Muchun Song Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: "Theodore Ts'o" Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton (cherry picked from commit 714e5bde00a58ba13b03b68547931e011f93de58) Signed-off-by: Wentao Guan --- fs/mbcache.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/fs/mbcache.c b/fs/mbcache.c index 2a4b8b549e934..82aa7a35db26b 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c @@ -37,7 +37,7 @@ struct mb_cache { struct list_head c_list; /* Number of entries in cache */ unsigned long c_entry_count; - struct shrinker c_shrink; + struct shrinker *c_shrink; /* Work for shrinking when the cache has too many entries */ struct work_struct c_shrink_work; }; @@ -293,8 +293,7 @@ EXPORT_SYMBOL(mb_cache_entry_touch); static unsigned long mb_cache_count(struct shrinker *shrink, struct shrink_control *sc) { - struct mb_cache *cache = container_of(shrink, struct mb_cache, - c_shrink); + struct mb_cache *cache = shrink->private_data; return cache->c_entry_count; } @@ -333,8 +332,7 @@ static unsigned long mb_cache_shrink(struct mb_cache *cache, static unsigned long mb_cache_scan(struct shrinker *shrink, struct shrink_control *sc) { - struct mb_cache *cache = container_of(shrink, struct mb_cache, - c_shrink); + struct mb_cache *cache = shrink->private_data; return mb_cache_shrink(cache, sc->nr_to_scan); } @@ -377,15 +375,19 @@ struct mb_cache *mb_cache_create(int bucket_bits) for (i = 0; i < bucket_count; i++) INIT_HLIST_BL_HEAD(&cache->c_hash[i]); - cache->c_shrink.count_objects = mb_cache_count; - cache->c_shrink.scan_objects = mb_cache_scan; - cache->c_shrink.seeks = DEFAULT_SEEKS; - if (register_shrinker(&cache->c_shrink, "mbcache-shrinker")) { + cache->c_shrink = shrinker_alloc(0, "mbcache-shrinker"); + if (!cache->c_shrink) { kfree(cache->c_hash); kfree(cache); goto err_out; } + cache->c_shrink->count_objects = mb_cache_count; + cache->c_shrink->scan_objects = mb_cache_scan; + cache->c_shrink->private_data = cache; + + shrinker_register(cache->c_shrink); + INIT_WORK(&cache->c_shrink_work, mb_cache_shrink_worker); return cache; @@ -406,7 +408,7 @@ void mb_cache_destroy(struct mb_cache *cache) { struct mb_cache_entry *entry, *next; - unregister_shrinker(&cache->c_shrink); + shrinker_free(cache->c_shrink); /* * We don't bother with any locking. Cache must not be used at this From bcf2c5cd62ed9c844cfe980dbbb7253d00922a97 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:29 +0800 Subject: [PATCH 31/50] ext4: dynamically allocate the ext4-es shrinker mainline inclusion from mainline-v6.7-rc1 category: performance In preparation for implementing lockless slab shrink, use new APIs to dynamically allocate the ext4-es shrinker, so that it can be freed asynchronously via RCU. Then it doesn't need to wait for RCU read-side critical section when releasing the struct ext4_sb_info. Link: https://lkml.kernel.org/r/20230911094444.68966-31-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Cc: "Theodore Ts'o" Cc: Andreas Dilger Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Christian Brauner Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Muchun Song Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton (cherry picked from commit 4d09d75d8b8cb3c8087b46585924777bc8737663) Signed-off-by: Wentao Guan --- fs/ext4/ext4.h | 2 +- fs/ext4/extents_status.c | 23 +++++++++++++---------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 317c2c0c015b2..a7f305fe77a11 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1665,7 +1665,7 @@ struct ext4_sb_info { __u32 s_csum_seed; /* Reclaim extents from extent status tree */ - struct shrinker s_es_shrinker; + struct shrinker *s_es_shrinker; struct list_head s_es_list; /* List of inodes with reclaimable extents */ long s_es_nr_inode; struct ext4_es_stats s_es_stats; diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index d9d5cfb9c951a..3a53dbb85e15b 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c @@ -1634,7 +1634,7 @@ static unsigned long ext4_es_count(struct shrinker *shrink, unsigned long nr; struct ext4_sb_info *sbi; - sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker); + sbi = shrink->private_data; nr = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt); trace_ext4_es_shrink_count(sbi->s_sb, sc->nr_to_scan, nr); return nr; @@ -1643,8 +1643,7 @@ static unsigned long ext4_es_count(struct shrinker *shrink, static unsigned long ext4_es_scan(struct shrinker *shrink, struct shrink_control *sc) { - struct ext4_sb_info *sbi = container_of(shrink, - struct ext4_sb_info, s_es_shrinker); + struct ext4_sb_info *sbi = shrink->private_data; int nr_to_scan = sc->nr_to_scan; int ret, nr_shrunk; @@ -1728,13 +1727,17 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi) if (err) goto err3; - sbi->s_es_shrinker.scan_objects = ext4_es_scan; - sbi->s_es_shrinker.count_objects = ext4_es_count; - sbi->s_es_shrinker.seeks = DEFAULT_SEEKS; - err = register_shrinker(&sbi->s_es_shrinker, "ext4-es:%s", - sbi->s_sb->s_id); - if (err) + sbi->s_es_shrinker = shrinker_alloc(0, "ext4-es:%s", sbi->s_sb->s_id); + if (!sbi->s_es_shrinker) { + err = -ENOMEM; goto err4; + } + + sbi->s_es_shrinker->scan_objects = ext4_es_scan; + sbi->s_es_shrinker->count_objects = ext4_es_count; + sbi->s_es_shrinker->private_data = sbi; + + shrinker_register(sbi->s_es_shrinker); return 0; err4: @@ -1754,7 +1757,7 @@ void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi) percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_misses); percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt); percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt); - unregister_shrinker(&sbi->s_es_shrinker); + shrinker_free(sbi->s_es_shrinker); } /* From 552857b5f7d777cc851f386e7fae8c5c93b9e57b Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:30 +0800 Subject: [PATCH 32/50] jbd2,ext4: dynamically allocate the jbd2-journal shrinker mainline inclusion from mainline-v6.7-rc1 category: performance In preparation for implementing lockless slab shrink, use new APIs to dynamically allocate the jbd2-journal shrinker, so that it can be freed asynchronously via RCU. Then it doesn't need to wait for RCU read-side critical section when releasing the struct journal_s. Link: https://lkml.kernel.org/r/20230911094444.68966-32-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Acked-by: Jan Kara Cc: "Theodore Ts'o" Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Christian Brauner Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Muchun Song Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton (cherry picked from commit 4b403dfa8ea8dfbc9d317b25a0433c1ac6765f7f) Signed-off-by: Wentao Guan --- fs/jbd2/journal.c | 29 ++++++++++++++++++----------- include/linux/jbd2.h | 2 +- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 88ac2639ad61e..b6e8bea40dbb0 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1290,7 +1290,7 @@ static int jbd2_min_tag_size(void) static unsigned long jbd2_journal_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) { - journal_t *journal = container_of(shrink, journal_t, j_shrinker); + journal_t *journal = shrink->private_data; unsigned long nr_to_scan = sc->nr_to_scan; unsigned long nr_shrunk; unsigned long count; @@ -1316,7 +1316,7 @@ static unsigned long jbd2_journal_shrink_scan(struct shrinker *shrink, static unsigned long jbd2_journal_shrink_count(struct shrinker *shrink, struct shrink_control *sc) { - journal_t *journal = container_of(shrink, journal_t, j_shrinker); + journal_t *journal = shrink->private_data; unsigned long count; count = percpu_counter_read_positive(&journal->j_checkpoint_jh_count); @@ -1630,14 +1630,21 @@ static journal_t *journal_init_common(struct block_device *bdev, goto err_cleanup; journal->j_shrink_transaction = NULL; - journal->j_shrinker.scan_objects = jbd2_journal_shrink_scan; - journal->j_shrinker.count_objects = jbd2_journal_shrink_count; - journal->j_shrinker.seeks = DEFAULT_SEEKS; - journal->j_shrinker.batch = journal->j_max_transaction_buffers; - err = register_shrinker(&journal->j_shrinker, "jbd2-journal:(%u:%u)", - MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev)); - if (err) + + journal->j_shrinker = shrinker_alloc(0, "jbd2-journal:(%u:%u)", + MAJOR(bdev->bd_dev), + MINOR(bdev->bd_dev)); + if (!journal->j_shrinker) { + err = -ENOMEM; goto err_cleanup; + } + + journal->j_shrinker->scan_objects = jbd2_journal_shrink_scan; + journal->j_shrinker->count_objects = jbd2_journal_shrink_count; + journal->j_shrinker->batch = journal->j_max_transaction_buffers; + journal->j_shrinker->private_data = journal; + + shrinker_register(journal->j_shrinker); return journal; @@ -2217,9 +2224,9 @@ int jbd2_journal_destroy(journal_t *journal) brelse(journal->j_sb_buffer); } - if (journal->j_shrinker.flags & SHRINKER_REGISTERED) { + if (journal->j_shrinker) { percpu_counter_destroy(&journal->j_checkpoint_jh_count); - unregister_shrinker(&journal->j_shrinker); + shrinker_free(journal->j_shrinker); } if (journal->j_proc_entry) jbd2_stats_proc_exit(journal); diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 476e92a02bd29..a1fd41415342e 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -899,7 +899,7 @@ struct journal_s * Journal head shrinker, reclaim buffer's journal head which * has been written back. */ - struct shrinker j_shrinker; + struct shrinker *j_shrinker; /** * @j_checkpoint_jh_count: From 50a2e3bb7a9e03a60a1f1e1b714508aa17c3d09d Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:31 +0800 Subject: [PATCH 33/50] nfsd: dynamically allocate the nfsd-client shrinker mainline inclusion from mainline-v6.7-rc1 category: performance In preparation for implementing lockless slab shrink, use new APIs to dynamically allocate the nfsd-client shrinker, so that it can be freed asynchronously via RCU. Then it doesn't need to wait for RCU read-side critical section when releasing the struct nfsd_net. Link: https://lkml.kernel.org/r/20230911094444.68966-33-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Acked-by: Chuck Lever Acked-by: Jeff Layton Cc: Jeff Layton Cc: Olga Kornievskaia Cc: Dai Ngo Cc: Tom Talpey Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Christian Brauner Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Daniel Vetter Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Muchun Song Cc: Muchun Song Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: "Theodore Ts'o" Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton Conflicts: fs/nfsd/nfs4state.c (cherry picked from commit d17452aa33a6fd0e9cf2bd3d43237f2d2c294a7e) Signed-off-by: Wentao Guan --- fs/nfsd/netns.h | 2 +- fs/nfsd/nfs4state.c | 19 +++++++++++-------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 0ecc5527a8f1a..23b5e64934d02 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -215,7 +215,7 @@ struct nfsd_net { int nfs4_max_clients; atomic_t nfsd_courtesy_clients; - struct shrinker nfsd_client_shrinker; + struct shrinker *nfsd_client_shrinker; struct work_struct nfsd_shrinker_work; }; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index c0dd50b59ad16..06f1fb8ebc4eb 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4427,8 +4427,7 @@ static unsigned long nfsd4_state_shrinker_count(struct shrinker *shrink, struct shrink_control *sc) { int count; - struct nfsd_net *nn = container_of(shrink, - struct nfsd_net, nfsd_client_shrinker); + struct nfsd_net *nn = shrink->private_data; count = atomic_read(&nn->nfsd_courtesy_clients); if (!count) @@ -8245,12 +8244,16 @@ static int nfs4_state_create_net(struct net *net) INIT_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker); get_net(net); - nn->nfsd_client_shrinker.scan_objects = nfsd4_state_shrinker_scan; - nn->nfsd_client_shrinker.count_objects = nfsd4_state_shrinker_count; - nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS; - - if (register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client")) + nn->nfsd_client_shrinker = shrinker_alloc(0, "nfsd-client"); + if (!nn->nfsd_client_shrinker) goto err_shrinker; + + nn->nfsd_client_shrinker->scan_objects = nfsd4_state_shrinker_scan; + nn->nfsd_client_shrinker->count_objects = nfsd4_state_shrinker_count; + nn->nfsd_client_shrinker->private_data = nn; + + shrinker_register(nn->nfsd_client_shrinker); + return 0; err_shrinker: @@ -8352,7 +8355,7 @@ nfs4_state_shutdown_net(struct net *net) struct list_head *pos, *next, reaplist; struct nfsd_net *nn = net_generic(net, nfsd_net_id); - unregister_shrinker(&nn->nfsd_client_shrinker); + shrinker_free(nn->nfsd_client_shrinker); cancel_work_sync(&nn->nfsd_shrinker_work); spin_lock(&nn->client_lock); nn->client_tracking_active = false; From 7c7ecf1f7d91782530b3a585509373d9db8faa4e Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:32 +0800 Subject: [PATCH 34/50] nfsd: dynamically allocate the nfsd-reply shrinker mainline inclusion from mainline-v6.7-rc1 category: performance In preparation for implementing lockless slab shrink, use new APIs to dynamically allocate the nfsd-reply shrinker, so that it can be freed asynchronously via RCU. Then it doesn't need to wait for RCU read-side critical section when releasing the struct nfsd_net. Link: https://lkml.kernel.org/r/20230911094444.68966-34-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Acked-by: Chuck Lever Acked-by: Jeff Layton Acked-by: Muchun Song Cc: Neil Brown Cc: Olga Kornievskaia Cc: Dai Ngo Cc: Tom Talpey Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Christian Brauner Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Daniel Vetter Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Nadav Amit Cc: Oleksandr Tyshchenko Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: "Theodore Ts'o" Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton (cherry picked from commit 8eea99a81c6f67c08fedd7db2ccd09aa93db69a7) Signed-off-by: Wentao Guan --- fs/nfsd/netns.h | 2 +- fs/nfsd/nfscache.c | 31 ++++++++++++++++--------------- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 23b5e64934d02..f8d1a7c13d108 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -197,7 +197,7 @@ struct nfsd_net { /* size of cache when we saw the longest hash chain */ unsigned int longest_chain_cachesize; - struct shrinker nfsd_reply_cache_shrinker; + struct shrinker *nfsd_reply_cache_shrinker; /* tracking server-to-server copy mounts */ spinlock_t nfsd_ssc_lock; diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index c52132ecb339d..622830a09ec37 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -180,26 +180,29 @@ int nfsd_reply_cache_init(struct nfsd_net *nn) { unsigned int hashsize; unsigned int i; - int status = 0; nn->max_drc_entries = nfsd_cache_size_limit(); atomic_set(&nn->num_drc_entries, 0); hashsize = nfsd_hashsize(nn->max_drc_entries); nn->maskbits = ilog2(hashsize); - nn->nfsd_reply_cache_shrinker.scan_objects = nfsd_reply_cache_scan; - nn->nfsd_reply_cache_shrinker.count_objects = nfsd_reply_cache_count; - nn->nfsd_reply_cache_shrinker.seeks = 1; - status = register_shrinker(&nn->nfsd_reply_cache_shrinker, - "nfsd-reply:%s", nn->nfsd_name); - if (status) - return status; - nn->drc_hashtbl = kvzalloc(array_size(hashsize, sizeof(*nn->drc_hashtbl)), GFP_KERNEL); if (!nn->drc_hashtbl) + return -ENOMEM; + + nn->nfsd_reply_cache_shrinker = shrinker_alloc(0, "nfsd-reply:%s", + nn->nfsd_name); + if (!nn->nfsd_reply_cache_shrinker) goto out_shrinker; + nn->nfsd_reply_cache_shrinker->scan_objects = nfsd_reply_cache_scan; + nn->nfsd_reply_cache_shrinker->count_objects = nfsd_reply_cache_count; + nn->nfsd_reply_cache_shrinker->seeks = 1; + nn->nfsd_reply_cache_shrinker->private_data = nn; + + shrinker_register(nn->nfsd_reply_cache_shrinker); + for (i = 0; i < hashsize; i++) { INIT_LIST_HEAD(&nn->drc_hashtbl[i].lru_head); spin_lock_init(&nn->drc_hashtbl[i].cache_lock); @@ -208,7 +211,7 @@ int nfsd_reply_cache_init(struct nfsd_net *nn) return 0; out_shrinker: - unregister_shrinker(&nn->nfsd_reply_cache_shrinker); + kvfree(nn->drc_hashtbl); printk(KERN_ERR "nfsd: failed to allocate reply cache\n"); return -ENOMEM; } @@ -218,7 +221,7 @@ void nfsd_reply_cache_shutdown(struct nfsd_net *nn) struct nfsd_cacherep *rp; unsigned int i; - unregister_shrinker(&nn->nfsd_reply_cache_shrinker); + shrinker_free(nn->nfsd_reply_cache_shrinker); for (i = 0; i < nn->drc_hashsize; i++) { struct list_head *head = &nn->drc_hashtbl[i].lru_head; @@ -302,8 +305,7 @@ nfsd_prune_bucket_locked(struct nfsd_net *nn, struct nfsd_drc_bucket *b, static unsigned long nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc) { - struct nfsd_net *nn = container_of(shrink, - struct nfsd_net, nfsd_reply_cache_shrinker); + struct nfsd_net *nn = shrink->private_data; return atomic_read(&nn->num_drc_entries); } @@ -322,8 +324,7 @@ nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc) static unsigned long nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc) { - struct nfsd_net *nn = container_of(shrink, - struct nfsd_net, nfsd_reply_cache_shrinker); + struct nfsd_net *nn = shrink->private_data; unsigned long freed = 0; LIST_HEAD(dispose); unsigned int i; From 2c96ace5bfc060254c66765e71d2e2eb225c4d39 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:33 +0800 Subject: [PATCH 35/50] xfs: dynamically allocate the xfs-buf shrinker mainline inclusion from mainline-v6.7-rc1 category: performance In preparation for implementing lockless slab shrink, use new APIs to dynamically allocate the xfs-buf shrinker, so that it can be freed asynchronously via RCU. Then it doesn't need to wait for RCU read-side critical section when releasing the struct xfs_buftarg. Link: https://lkml.kernel.org/r/20230911094444.68966-35-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Cc: Chandan Babu R Cc: "Darrick J. Wong" Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chao Yu Cc: Chris Mason Cc: Christian Brauner Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: Daniel Vetter Cc: Dave Chinner Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Muchun Song Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: "Theodore Ts'o" Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton (cherry picked from commit 17e7a00e60c8fff31082d5216b5c7d06d156f972) Signed-off-by: Wentao Guan --- fs/xfs/xfs_buf.c | 24 +++++++++++++----------- fs/xfs/xfs_buf.h | 2 +- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 1181108f80746..9c9a3a5548b8a 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1913,8 +1913,7 @@ xfs_buftarg_shrink_scan( struct shrinker *shrink, struct shrink_control *sc) { - struct xfs_buftarg *btp = container_of(shrink, - struct xfs_buftarg, bt_shrinker); + struct xfs_buftarg *btp = shrink->private_data; LIST_HEAD(dispose); unsigned long freed; @@ -1936,8 +1935,7 @@ xfs_buftarg_shrink_count( struct shrinker *shrink, struct shrink_control *sc) { - struct xfs_buftarg *btp = container_of(shrink, - struct xfs_buftarg, bt_shrinker); + struct xfs_buftarg *btp = shrink->private_data; return list_lru_shrink_count(&btp->bt_lru, sc); } @@ -1947,7 +1945,7 @@ xfs_free_buftarg( { struct block_device *bdev = btp->bt_bdev; - unregister_shrinker(&btp->bt_shrinker); + shrinker_free(btp->bt_shrinker); ASSERT(percpu_counter_sum(&btp->bt_io_count) == 0); percpu_counter_destroy(&btp->bt_io_count); list_lru_destroy(&btp->bt_lru); @@ -2031,13 +2029,17 @@ xfs_alloc_buftarg( if (percpu_counter_init(&btp->bt_io_count, 0, GFP_KERNEL)) goto error_lru; - btp->bt_shrinker.count_objects = xfs_buftarg_shrink_count; - btp->bt_shrinker.scan_objects = xfs_buftarg_shrink_scan; - btp->bt_shrinker.seeks = DEFAULT_SEEKS; - btp->bt_shrinker.flags = SHRINKER_NUMA_AWARE; - if (register_shrinker(&btp->bt_shrinker, "xfs-buf:%s", - mp->m_super->s_id)) + btp->bt_shrinker = shrinker_alloc(SHRINKER_NUMA_AWARE, "xfs-buf:%s", + mp->m_super->s_id); + if (!btp->bt_shrinker) goto error_pcpu; + + btp->bt_shrinker->count_objects = xfs_buftarg_shrink_count; + btp->bt_shrinker->scan_objects = xfs_buftarg_shrink_scan; + btp->bt_shrinker->private_data = btp; + + shrinker_register(btp->bt_shrinker); + return btp; error_pcpu: diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 5896b58c5f4db..b21ec7234dbe7 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -108,7 +108,7 @@ typedef struct xfs_buftarg { size_t bt_logical_sectormask; /* LRU control structures */ - struct shrinker bt_shrinker; + struct shrinker *bt_shrinker; struct list_lru bt_lru; struct percpu_counter bt_io_count; From 46e378c862f4784bd4291981dcece5e3087135a3 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:34 +0800 Subject: [PATCH 36/50] xfs: dynamically allocate the xfs-inodegc shrinker mainline inclusion from mainline-v6.7-rc1 category: performance In preparation for implementing lockless slab shrink, use new APIs to dynamically allocate the xfs-inodegc shrinker, so that it can be freed asynchronously via RCU. Then it doesn't need to wait for RCU read-side critical section when releasing the struct xfs_mount. Link: https://lkml.kernel.org/r/20230911094444.68966-36-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Cc: Chandan Babu R Cc: "Darrick J. Wong" Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chao Yu Cc: Chris Mason Cc: Christian Brauner Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: Daniel Vetter Cc: Dave Chinner Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Muchun Song Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: "Theodore Ts'o" Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton (cherry picked from commit 1a86a53da46778a583cd866465e18d662d866f30) Signed-off-by: Wentao Guan --- fs/xfs/xfs_icache.c | 26 +++++++++++++++----------- fs/xfs/xfs_mount.c | 4 ++-- fs/xfs/xfs_mount.h | 2 +- 3 files changed, 18 insertions(+), 14 deletions(-) diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index c54a7c60e0633..f79b35755a398 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -2176,8 +2176,7 @@ xfs_inodegc_shrinker_count( struct shrinker *shrink, struct shrink_control *sc) { - struct xfs_mount *mp = container_of(shrink, struct xfs_mount, - m_inodegc_shrinker); + struct xfs_mount *mp = shrink->private_data; struct xfs_inodegc *gc; int cpu; @@ -2198,8 +2197,7 @@ xfs_inodegc_shrinker_scan( struct shrinker *shrink, struct shrink_control *sc) { - struct xfs_mount *mp = container_of(shrink, struct xfs_mount, - m_inodegc_shrinker); + struct xfs_mount *mp = shrink->private_data; struct xfs_inodegc *gc; int cpu; bool no_items = true; @@ -2235,13 +2233,19 @@ int xfs_inodegc_register_shrinker( struct xfs_mount *mp) { - struct shrinker *shrink = &mp->m_inodegc_shrinker; + mp->m_inodegc_shrinker = shrinker_alloc(SHRINKER_NONSLAB, + "xfs-inodegc:%s", + mp->m_super->s_id); + if (!mp->m_inodegc_shrinker) + return -ENOMEM; + + mp->m_inodegc_shrinker->count_objects = xfs_inodegc_shrinker_count; + mp->m_inodegc_shrinker->scan_objects = xfs_inodegc_shrinker_scan; + mp->m_inodegc_shrinker->seeks = 0; + mp->m_inodegc_shrinker->batch = XFS_INODEGC_SHRINKER_BATCH; + mp->m_inodegc_shrinker->private_data = mp; - shrink->count_objects = xfs_inodegc_shrinker_count; - shrink->scan_objects = xfs_inodegc_shrinker_scan; - shrink->seeks = 0; - shrink->flags = SHRINKER_NONSLAB; - shrink->batch = XFS_INODEGC_SHRINKER_BATCH; + shrinker_register(mp->m_inodegc_shrinker); - return register_shrinker(shrink, "xfs-inodegc:%s", mp->m_super->s_id); + return 0; } diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 44c686d57e7bb..e026c0ee60134 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1022,7 +1022,7 @@ xfs_mountfs( out_log_dealloc: xfs_log_mount_cancel(mp); out_inodegc_shrinker: - unregister_shrinker(&mp->m_inodegc_shrinker); + shrinker_free(mp->m_inodegc_shrinker); out_fail_wait: if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) xfs_buftarg_drain(mp->m_logdev_targp); @@ -1105,7 +1105,7 @@ xfs_unmountfs( #if defined(DEBUG) xfs_errortag_clearall(mp); #endif - unregister_shrinker(&mp->m_inodegc_shrinker); + shrinker_free(mp->m_inodegc_shrinker); xfs_free_perag(mp); xfs_errortag_del(mp); diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index d19cca099bc3a..219681d29fbc4 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -219,7 +219,7 @@ typedef struct xfs_mount { atomic_t m_agirotor; /* last ag dir inode alloced */ /* Memory shrinker to throttle and reprioritize inodegc */ - struct shrinker m_inodegc_shrinker; + struct shrinker *m_inodegc_shrinker; /* * Workqueue item so that we can coalesce multiple inode flush attempts * into a single flush. From 3ba2ae65bdb9135edee9680f9adc1dc2f1d27307 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:35 +0800 Subject: [PATCH 37/50] xfs: dynamically allocate the xfs-qm shrinker mainline inclusion from mainline-v6.7-rc1 category: performance In preparation for implementing lockless slab shrink, use new APIs to dynamically allocate the xfs-qm shrinker, so that it can be freed asynchronously via RCU. Then it doesn't need to wait for RCU read-side critical section when releasing the struct xfs_quotainfo. Link: https://lkml.kernel.org/r/20230911094444.68966-37-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Cc: Chandan Babu R Cc: "Darrick J. Wong" Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chao Yu Cc: Chris Mason Cc: Christian Brauner Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: Daniel Vetter Cc: Dave Chinner Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Muchun Song Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: "Theodore Ts'o" Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton (cherry picked from commit fe88527852be8a6f1391c961db38d342cc46461d) Signed-off-by: Wentao Guan --- fs/xfs/xfs_qm.c | 27 ++++++++++++++------------- fs/xfs/xfs_qm.h | 2 +- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 086e78a6143aa..94a7932ac5700 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -504,8 +504,7 @@ xfs_qm_shrink_scan( struct shrinker *shrink, struct shrink_control *sc) { - struct xfs_quotainfo *qi = container_of(shrink, - struct xfs_quotainfo, qi_shrinker); + struct xfs_quotainfo *qi = shrink->private_data; struct xfs_qm_isolate isol; unsigned long freed; int error; @@ -539,8 +538,7 @@ xfs_qm_shrink_count( struct shrinker *shrink, struct shrink_control *sc) { - struct xfs_quotainfo *qi = container_of(shrink, - struct xfs_quotainfo, qi_shrinker); + struct xfs_quotainfo *qi = shrink->private_data; return list_lru_shrink_count(&qi->qi_lru, sc); } @@ -680,15 +678,18 @@ xfs_qm_init_quotainfo( if (XFS_IS_PQUOTA_ON(mp)) xfs_qm_set_defquota(mp, XFS_DQTYPE_PROJ, qinf); - qinf->qi_shrinker.count_objects = xfs_qm_shrink_count; - qinf->qi_shrinker.scan_objects = xfs_qm_shrink_scan; - qinf->qi_shrinker.seeks = DEFAULT_SEEKS; - qinf->qi_shrinker.flags = SHRINKER_NUMA_AWARE; - - error = register_shrinker(&qinf->qi_shrinker, "xfs-qm:%s", - mp->m_super->s_id); - if (error) + qinf->qi_shrinker = shrinker_alloc(SHRINKER_NUMA_AWARE, "xfs-qm:%s", + mp->m_super->s_id); + if (!qinf->qi_shrinker) { + error = -ENOMEM; goto out_free_inos; + } + + qinf->qi_shrinker->count_objects = xfs_qm_shrink_count; + qinf->qi_shrinker->scan_objects = xfs_qm_shrink_scan; + qinf->qi_shrinker->private_data = qinf; + + shrinker_register(qinf->qi_shrinker); return 0; @@ -718,7 +719,7 @@ xfs_qm_destroy_quotainfo( qi = mp->m_quotainfo; ASSERT(qi != NULL); - unregister_shrinker(&qi->qi_shrinker); + shrinker_free(qi->qi_shrinker); list_lru_destroy(&qi->qi_lru); xfs_qm_destroy_quotainos(qi); mutex_destroy(&qi->qi_tree_lock); diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h index 9683f0457d192..d5c9fc4ba591e 100644 --- a/fs/xfs/xfs_qm.h +++ b/fs/xfs/xfs_qm.h @@ -63,7 +63,7 @@ struct xfs_quotainfo { struct xfs_def_quota qi_usr_default; struct xfs_def_quota qi_grp_default; struct xfs_def_quota qi_prj_default; - struct shrinker qi_shrinker; + struct shrinker *qi_shrinker; /* Minimum and maximum quota expiration timestamp values. */ time64_t qi_expiry_min; From 4cbb603816e906d4c440527a8b30bdfc2d96bcff Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:36 +0800 Subject: [PATCH 38/50] zsmalloc: dynamically allocate the mm-zspool shrinker mainline inclusion from mainline-v6.7-rc1 category: performance In preparation for implementing lockless slab shrink, use new APIs to dynamically allocate the mm-zspool shrinker, so that it can be freed asynchronously via RCU. Then it doesn't need to wait for RCU read-side critical section when releasing the struct zs_pool. Link: https://lkml.kernel.org/r/20230911094444.68966-38-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Reviewed-by: Sergey Senozhatsky Cc: Minchan Kim Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Christian Brauner Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Muchun Song Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: "Theodore Ts'o" Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton (cherry picked from commit c19b548b493455cfb80895074687152869e77742) Signed-off-by: Wentao Guan --- mm/zsmalloc.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 14327fc34aa71..e8ba3627638bf 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -227,7 +227,7 @@ struct zs_pool { struct zs_pool_stats stats; /* Compact classes */ - struct shrinker shrinker; + struct shrinker *shrinker; #ifdef CONFIG_ZSMALLOC_STAT struct dentry *stat_dentry; @@ -2087,8 +2087,7 @@ static unsigned long zs_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) { unsigned long pages_freed; - struct zs_pool *pool = container_of(shrinker, struct zs_pool, - shrinker); + struct zs_pool *pool = shrinker->private_data; /* * Compact classes and calculate compaction delta. @@ -2106,8 +2105,7 @@ static unsigned long zs_shrinker_count(struct shrinker *shrinker, int i; struct size_class *class; unsigned long pages_to_free = 0; - struct zs_pool *pool = container_of(shrinker, struct zs_pool, - shrinker); + struct zs_pool *pool = shrinker->private_data; for (i = ZS_SIZE_CLASSES - 1; i >= 0; i--) { class = pool->size_class[i]; @@ -2122,18 +2120,23 @@ static unsigned long zs_shrinker_count(struct shrinker *shrinker, static void zs_unregister_shrinker(struct zs_pool *pool) { - unregister_shrinker(&pool->shrinker); + shrinker_free(pool->shrinker); } static int zs_register_shrinker(struct zs_pool *pool) { - pool->shrinker.scan_objects = zs_shrinker_scan; - pool->shrinker.count_objects = zs_shrinker_count; - pool->shrinker.batch = 0; - pool->shrinker.seeks = DEFAULT_SEEKS; + pool->shrinker = shrinker_alloc(0, "mm-zspool:%s", pool->name); + if (!pool->shrinker) + return -ENOMEM; + + pool->shrinker->scan_objects = zs_shrinker_scan; + pool->shrinker->count_objects = zs_shrinker_count; + pool->shrinker->batch = 0; + pool->shrinker->private_data = pool; - return register_shrinker(&pool->shrinker, "mm-zspool:%s", - pool->name); + shrinker_register(pool->shrinker); + + return 0; } static int calculate_zspage_chain_size(int class_size) From 4a5f43e95c4e0147108ae4e9acb905869d40a2cd Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:37 +0800 Subject: [PATCH 39/50] fs: super: dynamically allocate the s_shrink mainline inclusion from mainline-v6.7-rc1 category: performance In preparation for implementing lockless slab shrink, use new APIs to dynamically allocate the s_shrink, so that it can be freed asynchronously via RCU. Then it doesn't need to wait for RCU read-side critical section when releasing the struct super_block. Link: https://lkml.kernel.org/r/20230911094444.68966-39-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Acked-by: David Sterba Cc: Chris Mason Cc: Josef Bacik Cc: Alexander Viro Cc: Christian Brauner Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Muchun Song Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: "Theodore Ts'o" Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton (cherry picked from commit 1720f5dd8d3af34d6023fb9e8c35e5e60e8b6643) Signed-off-by: Wentao Guan --- fs/btrfs/super.c | 2 +- fs/kernfs/mount.c | 2 +- fs/proc/root.c | 2 +- fs/super.c | 33 ++++++++++++++++++--------------- include/linux/fs.h | 2 +- 5 files changed, 22 insertions(+), 19 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 2045ac3d94c4d..55dfa57d51a78 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1521,7 +1521,7 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type, error = -EBUSY; } else { snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev); - shrinker_debugfs_rename(&s->s_shrink, "sb-%s:%s", fs_type->name, + shrinker_debugfs_rename(s->s_shrink, "sb-%s:%s", fs_type->name, s->s_id); btrfs_sb(s)->bdev_holder = fs_type; error = btrfs_fill_super(s, fs_devices, data); diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index d1a2e44df3c05..63d768332151c 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -266,7 +266,7 @@ static int kernfs_fill_super(struct super_block *sb, struct kernfs_fs_context *k sb->s_time_gran = 1; /* sysfs dentries and inodes don't require IO to create */ - sb->s_shrink.seeks = 0; + sb->s_shrink->seeks = 0; /* get root inode, initialize and unlock it */ down_read(&kf_root->kernfs_rwsem); diff --git a/fs/proc/root.c b/fs/proc/root.c index 9191248f2dacb..b55dbc70287b4 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -188,7 +188,7 @@ static int proc_fill_super(struct super_block *s, struct fs_context *fc) s->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH; /* procfs dentries and inodes don't require IO to create */ - s->s_shrink.seeks = 0; + s->s_shrink->seeks = 0; pde_get(&proc_root); root_inode = proc_get_inode(s, &proc_root); diff --git a/fs/super.c b/fs/super.c index b142e71eb8dfd..6555164b453c7 100644 --- a/fs/super.c +++ b/fs/super.c @@ -191,7 +191,7 @@ static unsigned long super_cache_scan(struct shrinker *shrink, long dentries; long inodes; - sb = container_of(shrink, struct super_block, s_shrink); + sb = shrink->private_data; /* * Deadlock avoidance. We may hold various FS locks, and we don't want @@ -244,7 +244,7 @@ static unsigned long super_cache_count(struct shrinker *shrink, struct super_block *sb; long total_objects = 0; - sb = container_of(shrink, struct super_block, s_shrink); + sb = shrink->private_data; /* * We don't call super_trylock_shared() here as it is a scalability @@ -306,7 +306,7 @@ static void destroy_unused_super(struct super_block *s) security_sb_free(s); put_user_ns(s->s_user_ns); kfree(s->s_subtype); - free_prealloced_shrinker(&s->s_shrink); + shrinker_free(s->s_shrink); /* no delays needed */ destroy_super_work(&s->destroy_work); } @@ -383,16 +383,19 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags, s->s_time_min = TIME64_MIN; s->s_time_max = TIME64_MAX; - s->s_shrink.seeks = DEFAULT_SEEKS; - s->s_shrink.scan_objects = super_cache_scan; - s->s_shrink.count_objects = super_cache_count; - s->s_shrink.batch = 1024; - s->s_shrink.flags = SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE; - if (prealloc_shrinker(&s->s_shrink, "sb-%s", type->name)) + s->s_shrink = shrinker_alloc(SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE, + "sb-%s", type->name); + if (!s->s_shrink) goto fail; - if (list_lru_init_memcg(&s->s_dentry_lru, &s->s_shrink)) + + s->s_shrink->scan_objects = super_cache_scan; + s->s_shrink->count_objects = super_cache_count; + s->s_shrink->batch = 1024; + s->s_shrink->private_data = s; + + if (list_lru_init_memcg(&s->s_dentry_lru, s->s_shrink)) goto fail; - if (list_lru_init_memcg(&s->s_inode_lru, &s->s_shrink)) + if (list_lru_init_memcg(&s->s_inode_lru, s->s_shrink)) goto fail; return s; @@ -477,7 +480,7 @@ void deactivate_locked_super(struct super_block *s) { struct file_system_type *fs = s->s_type; if (atomic_dec_and_test(&s->s_active)) { - unregister_shrinker(&s->s_shrink); + shrinker_free(s->s_shrink); fs->kill_sb(s); kill_super_notify(s); @@ -829,7 +832,7 @@ struct super_block *sget_fc(struct fs_context *fc, hlist_add_head(&s->s_instances, &s->s_type->fs_supers); spin_unlock(&sb_lock); get_filesystem(s->s_type); - register_shrinker_prepared(&s->s_shrink); + shrinker_register(s->s_shrink); return s; share_extant_sb: @@ -912,7 +915,7 @@ struct super_block *sget(struct file_system_type *type, hlist_add_head(&s->s_instances, &type->fs_supers); spin_unlock(&sb_lock); get_filesystem(type); - register_shrinker_prepared(&s->s_shrink); + shrinker_register(s->s_shrink); return s; } EXPORT_SYMBOL(sget); @@ -1536,7 +1539,7 @@ int setup_bdev_super(struct super_block *sb, int sb_flags, mutex_unlock(&bdev->bd_fsfreeze_mutex); snprintf(sb->s_id, sizeof(sb->s_id), "%pg", bdev); - shrinker_debugfs_rename(&sb->s_shrink, "sb-%s:%s", sb->s_type->name, + shrinker_debugfs_rename(sb->s_shrink, "sb-%s:%s", sb->s_type->name, sb->s_id); sb_set_blocksize(sb, block_size(bdev)); return 0; diff --git a/include/linux/fs.h b/include/linux/fs.h index 9d903573358e3..d104f393cd276 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1300,7 +1300,7 @@ struct super_block { const struct dentry_operations *s_d_op; /* default d_op for dentries */ - struct shrinker s_shrink; /* per-sb shrinker handle */ + struct shrinker *s_shrink; /* per-sb shrinker handle */ /* Number of inodes with nlink == 0 but still referenced */ atomic_long_t s_remove_count; From 8e53cd65c148cf455dc1dd133fc1021adb22b2d5 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:38 +0800 Subject: [PATCH 40/50] mm: shrinker: remove old APIs mainline inclusion from mainline-v6.7-rc1 category: performance Now no users are using the old APIs, just remove them. Link: https://lkml.kernel.org/r/20230911094444.68966-40-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Christian Brauner Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Muchun Song Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: "Theodore Ts'o" Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton (cherry picked from commit f2383e01507eeee8a1c1283d61a117a97d6c4ebe) Signed-off-by: Wentao Guan --- include/linux/shrinker.h | 8 --- mm/shrinker.c | 143 --------------------------------------- 2 files changed, 151 deletions(-) diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index c88f42f75dc0f..dc012b06f5e2d 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -112,14 +112,6 @@ struct shrinker *shrinker_alloc(unsigned int flags, const char *fmt, ...); void shrinker_register(struct shrinker *shrinker); void shrinker_free(struct shrinker *shrinker); -extern int __printf(2, 3) prealloc_shrinker(struct shrinker *shrinker, - const char *fmt, ...); -extern void register_shrinker_prepared(struct shrinker *shrinker); -extern int __printf(2, 3) register_shrinker(struct shrinker *shrinker, - const char *fmt, ...); -extern void unregister_shrinker(struct shrinker *shrinker); -extern void free_prealloced_shrinker(struct shrinker *shrinker); - #ifdef CONFIG_SHRINKER_DEBUG extern int __printf(2, 3) shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...); diff --git a/mm/shrinker.c b/mm/shrinker.c index d1032a4d56845..736fa67e84549 100644 --- a/mm/shrinker.c +++ b/mm/shrinker.c @@ -655,146 +655,3 @@ void shrinker_free(struct shrinker *shrinker) kfree(shrinker); } EXPORT_SYMBOL_GPL(shrinker_free); - -/* - * Add a shrinker callback to be called from the vm. - */ -static int __prealloc_shrinker(struct shrinker *shrinker) -{ - unsigned int size; - int err; - - if (shrinker->flags & SHRINKER_MEMCG_AWARE) { - err = prealloc_memcg_shrinker(shrinker); - if (err != -ENOSYS) - return err; - - shrinker->flags &= ~SHRINKER_MEMCG_AWARE; - } - - size = sizeof(*shrinker->nr_deferred); - if (shrinker->flags & SHRINKER_NUMA_AWARE) - size *= nr_node_ids; - - shrinker->nr_deferred = kzalloc(size, GFP_KERNEL); - if (!shrinker->nr_deferred) - return -ENOMEM; - - return 0; -} - -#ifdef CONFIG_SHRINKER_DEBUG -int prealloc_shrinker(struct shrinker *shrinker, const char *fmt, ...) -{ - va_list ap; - int err; - - va_start(ap, fmt); - shrinker->name = kvasprintf_const(GFP_KERNEL, fmt, ap); - va_end(ap); - if (!shrinker->name) - return -ENOMEM; - - err = __prealloc_shrinker(shrinker); - if (err) { - kfree_const(shrinker->name); - shrinker->name = NULL; - } - - return err; -} -#else -int prealloc_shrinker(struct shrinker *shrinker, const char *fmt, ...) -{ - return __prealloc_shrinker(shrinker); -} -#endif - -void free_prealloced_shrinker(struct shrinker *shrinker) -{ -#ifdef CONFIG_SHRINKER_DEBUG - kfree_const(shrinker->name); - shrinker->name = NULL; -#endif - if (shrinker->flags & SHRINKER_MEMCG_AWARE) { - down_write(&shrinker_rwsem); - unregister_memcg_shrinker(shrinker); - up_write(&shrinker_rwsem); - return; - } - - kfree(shrinker->nr_deferred); - shrinker->nr_deferred = NULL; -} - -void register_shrinker_prepared(struct shrinker *shrinker) -{ - down_write(&shrinker_rwsem); - list_add_tail(&shrinker->list, &shrinker_list); - shrinker->flags |= SHRINKER_REGISTERED; - shrinker_debugfs_add(shrinker); - up_write(&shrinker_rwsem); -} - -static int __register_shrinker(struct shrinker *shrinker) -{ - int err = __prealloc_shrinker(shrinker); - - if (err) - return err; - register_shrinker_prepared(shrinker); - return 0; -} - -#ifdef CONFIG_SHRINKER_DEBUG -int register_shrinker(struct shrinker *shrinker, const char *fmt, ...) -{ - va_list ap; - int err; - - va_start(ap, fmt); - shrinker->name = kvasprintf_const(GFP_KERNEL, fmt, ap); - va_end(ap); - if (!shrinker->name) - return -ENOMEM; - - err = __register_shrinker(shrinker); - if (err) { - kfree_const(shrinker->name); - shrinker->name = NULL; - } - return err; -} -#else -int register_shrinker(struct shrinker *shrinker, const char *fmt, ...) -{ - return __register_shrinker(shrinker); -} -#endif -EXPORT_SYMBOL(register_shrinker); - -/* - * Remove one - */ -void unregister_shrinker(struct shrinker *shrinker) -{ - struct dentry *debugfs_entry; - int debugfs_id; - - if (!(shrinker->flags & SHRINKER_REGISTERED)) - return; - - down_write(&shrinker_rwsem); - list_del(&shrinker->list); - shrinker->flags &= ~SHRINKER_REGISTERED; - if (shrinker->flags & SHRINKER_MEMCG_AWARE) - unregister_memcg_shrinker(shrinker); - debugfs_entry = shrinker_debugfs_detach(shrinker, &debugfs_id); - up_write(&shrinker_rwsem); - - shrinker_debugfs_remove(debugfs_entry, debugfs_id); - - kfree(shrinker->nr_deferred); - shrinker->nr_deferred = NULL; -} -EXPORT_SYMBOL(unregister_shrinker); From 1f5a2834729e157ce04ba150ccf1ea08fd617629 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:39 +0800 Subject: [PATCH 41/50] mm: shrinker: add a secondary array for shrinker_info::{map, nr_deferred} mainline inclusion from mainline-v6.7-rc1 category: performance Currently, we maintain two linear arrays per node per memcg, which are shrinker_info::map and shrinker_info::nr_deferred. And we need to resize them when the shrinker_nr_max is exceeded, that is, allocate a new array, and then copy the old array to the new array, and finally free the old array by RCU. For shrinker_info::map, we do set_bit() under the RCU lock, so we may set the value into the old map which is about to be freed. This may cause the value set to be lost. The current solution is not to copy the old map when resizing, but to set all the corresponding bits in the new map to 1. This solves the data loss problem, but bring the overhead of more pointless loops while doing memcg slab shrink. For shrinker_info::nr_deferred, we will only modify it under the read lock of shrinker_rwsem, so it will not run concurrently with the resizing. But after we make memcg slab shrink lockless, there will be the same data loss problem as shrinker_info::map, and we can't work around it like the map. For such resizable arrays, the most straightforward idea is to change it to xarray, like we did for list_lru [1]. We need to do xa_store() in the list_lru_add()-->set_shrinker_bit(), but this will cause memory allocation, and the list_lru_add() doesn't accept failure. A possible solution is to pre-allocate, but the location of pre-allocation is not well determined (such as deferred_split_shrinker case). Therefore, this commit chooses to introduce the following secondary array for shrinker_info::{map, nr_deferred}: +---------------+--------+--------+-----+ | shrinker_info | unit 0 | unit 1 | ... | (secondary array) +---------------+--------+--------+-----+ | v +---------------+-----+ | nr_deferred[] | map | (leaf array) +---------------+-----+ (shrinker_info_unit) The leaf array is never freed unless the memcg is destroyed. The secondary array will be resized every time the shrinker id exceeds shrinker_nr_max. So the shrinker_info_unit can be indexed from both the old and the new shrinker_info->unit[x]. Then even if we get the old secondary array under the RCU lock, the found map and nr_deferred are also true, so the updated nr_deferred and map will not be lost. [1]. https://lore.kernel.org/all/20220228122126.37293-13-songmuchun@bytedance.com/ [zhengqi.arch@bytedance.com: unlock the &shrinker_rwsem before the call to free_shrinker_info()] Link: https://lkml.kernel.org/r/20230928141517.12164-1-zhengqi.arch@bytedance.com Link: https://lkml.kernel.org/r/20230911094444.68966-41-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Christian Brauner Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Muchun Song Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: "Theodore Ts'o" Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton Conflicts: include/linux/memcontrol.h (cherry picked from commit 307bececcd1205bcb67a3c0d53a69db237ccc9d4) Signed-off-by: Wentao Guan --- include/linux/memcontrol.h | 15 +-- include/linux/shrinker.h | 17 +++ mm/shrinker.c | 250 +++++++++++++++++++++++-------------- 3 files changed, 172 insertions(+), 110 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 8ceda226fc2bf..99566b12a20f1 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -21,6 +21,7 @@ #include #include #include +#include #include struct mem_cgroup; @@ -95,20 +96,6 @@ struct mem_cgroup_reclaim_iter { DEEPIN_KABI_RESERVE(3) }; -/* - * Bitmap and deferred work of shrinker::id corresponding to memcg-aware - * shrinkers, which have elements charged to this memcg. - */ -struct shrinker_info { - struct rcu_head rcu; - atomic_long_t *nr_deferred; - unsigned long *map; - int map_nr_max; - DEEPIN_KABI_RESERVE(1) - DEEPIN_KABI_RESERVE(2) - DEEPIN_KABI_RESERVE(3) -}; - struct lruvec_stats_percpu { /* Local (CPU and cgroup) state */ long state[NR_VM_NODE_STAT_ITEMS]; diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index dc012b06f5e2d..383afb6a69133 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -6,6 +6,23 @@ #include #include +#define SHRINKER_UNIT_BITS BITS_PER_LONG + +/* + * Bitmap and deferred work of shrinker::id corresponding to memcg-aware + * shrinkers, which have elements charged to the memcg. + */ +struct shrinker_info_unit { + atomic_long_t nr_deferred[SHRINKER_UNIT_BITS]; + DECLARE_BITMAP(map, SHRINKER_UNIT_BITS); +}; + +struct shrinker_info { + struct rcu_head rcu; + int map_nr_max; + struct shrinker_info_unit *unit[]; +}; + /* * This struct is used to pass information from page reclaim to the shrinkers. * We consolidate the values for easier extension later. diff --git a/mm/shrinker.c b/mm/shrinker.c index 736fa67e84549..e9644cda80b53 100644 --- a/mm/shrinker.c +++ b/mm/shrinker.c @@ -12,15 +12,50 @@ DECLARE_RWSEM(shrinker_rwsem); #ifdef CONFIG_MEMCG static int shrinker_nr_max; -/* The shrinker_info is expanded in a batch of BITS_PER_LONG */ -static inline int shrinker_map_size(int nr_items) +static inline int shrinker_unit_size(int nr_items) { - return (DIV_ROUND_UP(nr_items, BITS_PER_LONG) * sizeof(unsigned long)); + return (DIV_ROUND_UP(nr_items, SHRINKER_UNIT_BITS) * sizeof(struct shrinker_info_unit *)); } -static inline int shrinker_defer_size(int nr_items) +static inline void shrinker_unit_free(struct shrinker_info *info, int start) { - return (round_up(nr_items, BITS_PER_LONG) * sizeof(atomic_long_t)); + struct shrinker_info_unit **unit; + int nr, i; + + if (!info) + return; + + unit = info->unit; + nr = DIV_ROUND_UP(info->map_nr_max, SHRINKER_UNIT_BITS); + + for (i = start; i < nr; i++) { + if (!unit[i]) + break; + + kfree(unit[i]); + unit[i] = NULL; + } +} + +static inline int shrinker_unit_alloc(struct shrinker_info *new, + struct shrinker_info *old, int nid) +{ + struct shrinker_info_unit *unit; + int nr = DIV_ROUND_UP(new->map_nr_max, SHRINKER_UNIT_BITS); + int start = old ? DIV_ROUND_UP(old->map_nr_max, SHRINKER_UNIT_BITS) : 0; + int i; + + for (i = start; i < nr; i++) { + unit = kzalloc_node(sizeof(*unit), GFP_KERNEL, nid); + if (!unit) { + shrinker_unit_free(new, start); + return -ENOMEM; + } + + new->unit[i] = unit; + } + + return 0; } void free_shrinker_info(struct mem_cgroup *memcg) @@ -32,6 +67,7 @@ void free_shrinker_info(struct mem_cgroup *memcg) for_each_node(nid) { pn = memcg->nodeinfo[nid]; info = rcu_dereference_protected(pn->shrinker_info, true); + shrinker_unit_free(info, 0); kvfree(info); rcu_assign_pointer(pn->shrinker_info, NULL); } @@ -40,28 +76,28 @@ void free_shrinker_info(struct mem_cgroup *memcg) int alloc_shrinker_info(struct mem_cgroup *memcg) { struct shrinker_info *info; - int nid, size, ret = 0; - int map_size, defer_size = 0; + int nid, ret = 0; + int array_size = 0; down_write(&shrinker_rwsem); - map_size = shrinker_map_size(shrinker_nr_max); - defer_size = shrinker_defer_size(shrinker_nr_max); - size = map_size + defer_size; + array_size = shrinker_unit_size(shrinker_nr_max); for_each_node(nid) { - info = kvzalloc_node(sizeof(*info) + size, GFP_KERNEL, nid); - if (!info) { - free_shrinker_info(memcg); - ret = -ENOMEM; - break; - } - info->nr_deferred = (atomic_long_t *)(info + 1); - info->map = (void *)info->nr_deferred + defer_size; + info = kvzalloc_node(sizeof(*info) + array_size, GFP_KERNEL, nid); + if (!info) + goto err; info->map_nr_max = shrinker_nr_max; + if (shrinker_unit_alloc(info, NULL, nid)) + goto err; rcu_assign_pointer(memcg->nodeinfo[nid]->shrinker_info, info); } up_write(&shrinker_rwsem); return ret; + +err: + up_write(&shrinker_rwsem); + free_shrinker_info(memcg); + return -ENOMEM; } static struct shrinker_info *shrinker_info_protected(struct mem_cgroup *memcg, @@ -71,15 +107,12 @@ static struct shrinker_info *shrinker_info_protected(struct mem_cgroup *memcg, lockdep_is_held(&shrinker_rwsem)); } -static int expand_one_shrinker_info(struct mem_cgroup *memcg, - int map_size, int defer_size, - int old_map_size, int old_defer_size, - int new_nr_max) +static int expand_one_shrinker_info(struct mem_cgroup *memcg, int new_size, + int old_size, int new_nr_max) { struct shrinker_info *new, *old; struct mem_cgroup_per_node *pn; int nid; - int size = map_size + defer_size; for_each_node(nid) { pn = memcg->nodeinfo[nid]; @@ -92,21 +125,17 @@ static int expand_one_shrinker_info(struct mem_cgroup *memcg, if (new_nr_max <= old->map_nr_max) continue; - new = kvmalloc_node(sizeof(*new) + size, GFP_KERNEL, nid); + new = kvmalloc_node(sizeof(*new) + new_size, GFP_KERNEL, nid); if (!new) return -ENOMEM; - new->nr_deferred = (atomic_long_t *)(new + 1); - new->map = (void *)new->nr_deferred + defer_size; new->map_nr_max = new_nr_max; - /* map: set all old bits, clear all new bits */ - memset(new->map, (int)0xff, old_map_size); - memset((void *)new->map + old_map_size, 0, map_size - old_map_size); - /* nr_deferred: copy old values, clear all new values */ - memcpy(new->nr_deferred, old->nr_deferred, old_defer_size); - memset((void *)new->nr_deferred + old_defer_size, 0, - defer_size - old_defer_size); + memcpy(new->unit, old->unit, old_size); + if (shrinker_unit_alloc(new, old, nid)) { + kvfree(new); + return -ENOMEM; + } rcu_assign_pointer(pn->shrinker_info, new); kvfree_rcu(old, rcu); @@ -118,9 +147,8 @@ static int expand_one_shrinker_info(struct mem_cgroup *memcg, static int expand_shrinker_info(int new_id) { int ret = 0; - int new_nr_max = round_up(new_id + 1, BITS_PER_LONG); - int map_size, defer_size = 0; - int old_map_size, old_defer_size = 0; + int new_nr_max = round_up(new_id + 1, SHRINKER_UNIT_BITS); + int new_size, old_size = 0; struct mem_cgroup *memcg; if (!root_mem_cgroup) @@ -128,15 +156,12 @@ static int expand_shrinker_info(int new_id) lockdep_assert_held(&shrinker_rwsem); - map_size = shrinker_map_size(new_nr_max); - defer_size = shrinker_defer_size(new_nr_max); - old_map_size = shrinker_map_size(shrinker_nr_max); - old_defer_size = shrinker_defer_size(shrinker_nr_max); + new_size = shrinker_unit_size(new_nr_max); + old_size = shrinker_unit_size(shrinker_nr_max); memcg = mem_cgroup_iter(NULL, NULL, NULL); do { - ret = expand_one_shrinker_info(memcg, map_size, defer_size, - old_map_size, old_defer_size, + ret = expand_one_shrinker_info(memcg, new_size, old_size, new_nr_max); if (ret) { mem_cgroup_iter_break(NULL, memcg); @@ -150,17 +175,34 @@ static int expand_shrinker_info(int new_id) return ret; } +static inline int shrinker_id_to_index(int shrinker_id) +{ + return shrinker_id / SHRINKER_UNIT_BITS; +} + +static inline int shrinker_id_to_offset(int shrinker_id) +{ + return shrinker_id % SHRINKER_UNIT_BITS; +} + +static inline int calc_shrinker_id(int index, int offset) +{ + return index * SHRINKER_UNIT_BITS + offset; +} + void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id) { if (shrinker_id >= 0 && memcg && !mem_cgroup_is_root(memcg)) { struct shrinker_info *info; + struct shrinker_info_unit *unit; rcu_read_lock(); info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info); + unit = info->unit[shrinker_id_to_index(shrinker_id)]; if (!WARN_ON_ONCE(shrinker_id >= info->map_nr_max)) { /* Pairs with smp mb in shrink_slab() */ smp_mb__before_atomic(); - set_bit(shrinker_id, info->map); + set_bit(shrinker_id_to_offset(shrinker_id), unit->map); } rcu_read_unlock(); } @@ -209,26 +251,31 @@ static long xchg_nr_deferred_memcg(int nid, struct shrinker *shrinker, struct mem_cgroup *memcg) { struct shrinker_info *info; + struct shrinker_info_unit *unit; info = shrinker_info_protected(memcg, nid); - return atomic_long_xchg(&info->nr_deferred[shrinker->id], 0); + unit = info->unit[shrinker_id_to_index(shrinker->id)]; + return atomic_long_xchg(&unit->nr_deferred[shrinker_id_to_offset(shrinker->id)], 0); } static long add_nr_deferred_memcg(long nr, int nid, struct shrinker *shrinker, struct mem_cgroup *memcg) { struct shrinker_info *info; + struct shrinker_info_unit *unit; info = shrinker_info_protected(memcg, nid); - return atomic_long_add_return(nr, &info->nr_deferred[shrinker->id]); + unit = info->unit[shrinker_id_to_index(shrinker->id)]; + return atomic_long_add_return(nr, &unit->nr_deferred[shrinker_id_to_offset(shrinker->id)]); } void reparent_shrinker_deferred(struct mem_cgroup *memcg) { - int i, nid; + int nid, index, offset; long nr; struct mem_cgroup *parent; struct shrinker_info *child_info, *parent_info; + struct shrinker_info_unit *child_unit, *parent_unit; parent = parent_mem_cgroup(memcg); if (!parent) @@ -239,9 +286,13 @@ void reparent_shrinker_deferred(struct mem_cgroup *memcg) for_each_node(nid) { child_info = shrinker_info_protected(memcg, nid); parent_info = shrinker_info_protected(parent, nid); - for (i = 0; i < child_info->map_nr_max; i++) { - nr = atomic_long_read(&child_info->nr_deferred[i]); - atomic_long_add(nr, &parent_info->nr_deferred[i]); + for (index = 0; index < shrinker_id_to_index(child_info->map_nr_max); index++) { + child_unit = child_info->unit[index]; + parent_unit = parent_info->unit[index]; + for (offset = 0; offset < SHRINKER_UNIT_BITS; offset++) { + nr = atomic_long_read(&child_unit->nr_deferred[offset]); + atomic_long_add(nr, &parent_unit->nr_deferred[offset]); + } } } up_read(&shrinker_rwsem); @@ -407,7 +458,7 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid, { struct shrinker_info *info; unsigned long ret, freed = 0; - int i; + int offset, index = 0; if (!mem_cgroup_online(memcg)) return 0; @@ -419,56 +470,63 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid, if (unlikely(!info)) goto unlock; - for_each_set_bit(i, info->map, info->map_nr_max) { - struct shrink_control sc = { - .gfp_mask = gfp_mask, - .nid = nid, - .memcg = memcg, - }; - struct shrinker *shrinker; + for (; index < shrinker_id_to_index(info->map_nr_max); index++) { + struct shrinker_info_unit *unit; - shrinker = idr_find(&shrinker_idr, i); - if (unlikely(!shrinker || !(shrinker->flags & SHRINKER_REGISTERED))) { - if (!shrinker) - clear_bit(i, info->map); - continue; - } + unit = info->unit[index]; - /* Call non-slab shrinkers even though kmem is disabled */ - if (!memcg_kmem_online() && - !(shrinker->flags & SHRINKER_NONSLAB)) - continue; + for_each_set_bit(offset, unit->map, SHRINKER_UNIT_BITS) { + struct shrink_control sc = { + .gfp_mask = gfp_mask, + .nid = nid, + .memcg = memcg, + }; + struct shrinker *shrinker; + int shrinker_id = calc_shrinker_id(index, offset); - ret = do_shrink_slab(&sc, shrinker, priority); - if (ret == SHRINK_EMPTY) { - clear_bit(i, info->map); - /* - * After the shrinker reported that it had no objects to - * free, but before we cleared the corresponding bit in - * the memcg shrinker map, a new object might have been - * added. To make sure, we have the bit set in this - * case, we invoke the shrinker one more time and reset - * the bit if it reports that it is not empty anymore. - * The memory barrier here pairs with the barrier in - * set_shrinker_bit(): - * - * list_lru_add() shrink_slab_memcg() - * list_add_tail() clear_bit() - * - * set_bit() do_shrink_slab() - */ - smp_mb__after_atomic(); - ret = do_shrink_slab(&sc, shrinker, priority); - if (ret == SHRINK_EMPTY) - ret = 0; - else - set_shrinker_bit(memcg, nid, i); - } - freed += ret; + shrinker = idr_find(&shrinker_idr, shrinker_id); + if (unlikely(!shrinker || !(shrinker->flags & SHRINKER_REGISTERED))) { + if (!shrinker) + clear_bit(offset, unit->map); + continue; + } - if (rwsem_is_contended(&shrinker_rwsem)) { - freed = freed ? : 1; - break; + /* Call non-slab shrinkers even though kmem is disabled */ + if (!memcg_kmem_online() && + !(shrinker->flags & SHRINKER_NONSLAB)) + continue; + + ret = do_shrink_slab(&sc, shrinker, priority); + if (ret == SHRINK_EMPTY) { + clear_bit(offset, unit->map); + /* + * After the shrinker reported that it had no objects to + * free, but before we cleared the corresponding bit in + * the memcg shrinker map, a new object might have been + * added. To make sure, we have the bit set in this + * case, we invoke the shrinker one more time and reset + * the bit if it reports that it is not empty anymore. + * The memory barrier here pairs with the barrier in + * set_shrinker_bit(): + * + * list_lru_add() shrink_slab_memcg() + * list_add_tail() clear_bit() + * + * set_bit() do_shrink_slab() + */ + smp_mb__after_atomic(); + ret = do_shrink_slab(&sc, shrinker, priority); + if (ret == SHRINK_EMPTY) + ret = 0; + else + set_shrinker_bit(memcg, nid, shrinker_id); + } + freed += ret; + + if (rwsem_is_contended(&shrinker_rwsem)) { + freed = freed ? : 1; + goto unlock; + } } } unlock: From 5a90fbfeeb057f4145fa6d9aceb36069e15c7c16 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:40 +0800 Subject: [PATCH 42/50] mm: shrinker: rename {prealloc|unregister}_memcg_shrinker() to shrinker_memcg_{alloc|remove}() mainline inclusion from mainline-v6.7-rc1 category: performance With the new shrinker APIs, there is no action such as prealloc, so rename {prealloc|unregister}_memcg_shrinker() to shrinker_memcg_{alloc|remove}(), which corresponds to the idr_{alloc|remove}() inside the function. Link: https://lkml.kernel.org/r/20230911094444.68966-42-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Christian Brauner Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: "Theodore Ts'o" Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton (cherry picked from commit 48a7a0996a0089f4161ad437a810a10596e8d278) Signed-off-by: Wentao Guan --- mm/shrinker.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mm/shrinker.c b/mm/shrinker.c index e9644cda80b53..e2e832b158036 100644 --- a/mm/shrinker.c +++ b/mm/shrinker.c @@ -210,7 +210,7 @@ void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id) static DEFINE_IDR(shrinker_idr); -static int prealloc_memcg_shrinker(struct shrinker *shrinker) +static int shrinker_memcg_alloc(struct shrinker *shrinker) { int id, ret = -ENOMEM; @@ -236,7 +236,7 @@ static int prealloc_memcg_shrinker(struct shrinker *shrinker) return ret; } -static void unregister_memcg_shrinker(struct shrinker *shrinker) +static void shrinker_memcg_remove(struct shrinker *shrinker) { int id = shrinker->id; @@ -298,12 +298,12 @@ void reparent_shrinker_deferred(struct mem_cgroup *memcg) up_read(&shrinker_rwsem); } #else -static int prealloc_memcg_shrinker(struct shrinker *shrinker) +static int shrinker_memcg_alloc(struct shrinker *shrinker) { return -ENOSYS; } -static void unregister_memcg_shrinker(struct shrinker *shrinker) +static void shrinker_memcg_remove(struct shrinker *shrinker) { } @@ -629,7 +629,7 @@ struct shrinker *shrinker_alloc(unsigned int flags, const char *fmt, ...) shrinker->seeks = DEFAULT_SEEKS; if (flags & SHRINKER_MEMCG_AWARE) { - err = prealloc_memcg_shrinker(shrinker); + err = shrinker_memcg_alloc(shrinker); if (err == -ENOSYS) { /* Memcg is not supported, fallback to non-memcg-aware shrinker. */ shrinker->flags &= ~SHRINKER_MEMCG_AWARE; @@ -701,7 +701,7 @@ void shrinker_free(struct shrinker *shrinker) shrinker_debugfs_name_free(shrinker); if (shrinker->flags & SHRINKER_MEMCG_AWARE) - unregister_memcg_shrinker(shrinker); + shrinker_memcg_remove(shrinker); up_write(&shrinker_rwsem); if (debugfs_entry) From 197f0674bc04eb88e3d568e39808208005831cf6 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:41 +0800 Subject: [PATCH 43/50] mm: shrinker: make global slab shrink lockless mainline inclusion from mainline-v6.7-rc1 category: performance The shrinker_rwsem is a global read-write lock in shrinkers subsystem, which protects most operations such as slab shrink, registration and unregistration of shrinkers, etc. This can easily cause problems in the following cases. 1) When the memory pressure is high and there are many filesystems mounted or unmounted at the same time, slab shrink will be affected (down_read_trylock() failed). Such as the real workload mentioned by Kirill Tkhai: ``` One of the real workloads from my experience is start of an overcommitted node containing many starting containers after node crash (or many resuming containers after reboot for kernel update). In these cases memory pressure is huge, and the node goes round in long reclaim. ``` 2) If a shrinker is blocked (such as the case mentioned in [1]) and a writer comes in (such as mount a fs), then this writer will be blocked and cause all subsequent shrinker-related operations to be blocked. Even if there is no competitor when shrinking slab, there may still be a problem. The down_read_trylock() may become a perf hotspot with frequent calls to shrink_slab(). Because of the poor multicore scalability of atomic operations, this can lead to a significant drop in IPC (instructions per cycle). We used to implement the lockless slab shrink with SRCU [2], but then kernel test robot reported -88.8% regression in stress-ng.ramfs.ops_per_sec test case [3], so we reverted it [4]. This commit uses the refcount+RCU method [5] proposed by Dave Chinner to re-implement the lockless global slab shrink. The memcg slab shrink is handled in the subsequent patch. For now, all shrinker instances are converted to dynamically allocated and will be freed by call_rcu(). So we can use rcu_read_{lock,unlock}() to ensure that the shrinker instance is valid. And the shrinker instance will not be run again after unregistration. So the structure that records the pointer of shrinker instance can be safely freed without waiting for the RCU read-side critical section. In this way, while we implement the lockless slab shrink, we don't need to be blocked in unregister_shrinker(). The following are the test results: stress-ng --timeout 60 --times --verify --metrics-brief --ramfs 9 & 1) Before applying this patchset: setting to a 60 second run per stressor dispatching hogs: 9 ramfs stressor bogo ops real time usr time sys time bogo ops/s bogo ops/s (secs) (secs) (secs) (real time) (usr+sys time) ramfs 473062 60.00 8.00 279.13 7884.12 1647.59 for a 60.01s run time: 1440.34s available CPU time 7.99s user time ( 0.55%) 279.13s system time ( 19.38%) 287.12s total time ( 19.93%) load average: 7.12 2.99 1.15 successful run completed in 60.01s (1 min, 0.01 secs) 2) After applying this patchset: setting to a 60 second run per stressor dispatching hogs: 9 ramfs stressor bogo ops real time usr time sys time bogo ops/s bogo ops/s (secs) (secs) (secs) (real time) (usr+sys time) ramfs 477165 60.00 8.13 281.34 7952.55 1648.40 for a 60.01s run time: 1440.33s available CPU time 8.12s user time ( 0.56%) 281.34s system time ( 19.53%) 289.46s total time ( 20.10%) load average: 6.98 3.03 1.19 successful run completed in 60.01s (1 min, 0.01 secs) We can see that the ops/s has hardly changed. [1]. https://lore.kernel.org/lkml/20191129214541.3110-1-ptikhomirov@virtuozzo.com/ [2]. https://lore.kernel.org/lkml/20230313112819.38938-1-zhengqi.arch@bytedance.com/ [3]. https://lore.kernel.org/lkml/202305230837.db2c233f-yujie.liu@intel.com/ [4]. https://lore.kernel.org/all/20230609081518.3039120-1-qi.zheng@linux.dev/ [5]. https://lore.kernel.org/lkml/ZIJhou1d55d4H1s0@dread.disaster.area/ Link: https://lkml.kernel.org/r/20230911094444.68966-43-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Christian Brauner Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Muchun Song Cc: Muchun Song Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: "Theodore Ts'o" Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton Conflicts: include/linux/shrinker.h (cherry picked from commit ca1d36b823944f24b5755311e95883fb5fdb807b) Signed-off-by: Wentao Guan --- include/linux/shrinker.h | 24 +++++++++++ mm/shrinker.c | 89 ++++++++++++++++++++++++++++++---------- 2 files changed, 92 insertions(+), 21 deletions(-) diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index 383afb6a69133..1a30bd6ba801b 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -4,6 +4,8 @@ #include #include +#include +#include #include #define SHRINKER_UNIT_BITS BITS_PER_LONG @@ -91,6 +93,17 @@ struct shrinker { int seeks; /* seeks to recreate an obj */ unsigned flags; + /* + * The reference count of this shrinker. Registered shrinker have an + * initial refcount of 1, then the lookup operations are now allowed + * to use it via shrinker_try_get(). Later in the unregistration step, + * the initial refcount will be discarded, and will free the shrinker + * asynchronously via RCU after its refcount reaches 0. + */ + refcount_t refcount; + struct completion done; /* use to wait for refcount to reach 0 */ + struct rcu_head rcu; + void *private_data; /* These are for internal use */ @@ -129,6 +142,17 @@ struct shrinker *shrinker_alloc(unsigned int flags, const char *fmt, ...); void shrinker_register(struct shrinker *shrinker); void shrinker_free(struct shrinker *shrinker); +static inline bool shrinker_try_get(struct shrinker *shrinker) +{ + return refcount_inc_not_zero(&shrinker->refcount); +} + +static inline void shrinker_put(struct shrinker *shrinker) +{ + if (refcount_dec_and_test(&shrinker->refcount)) + complete(&shrinker->done); +} + #ifdef CONFIG_SHRINKER_DEBUG extern int __printf(2, 3) shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...); diff --git a/mm/shrinker.c b/mm/shrinker.c index e2e832b158036..9bb2e61092b36 100644 --- a/mm/shrinker.c +++ b/mm/shrinker.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include "internal.h" @@ -577,33 +578,50 @@ unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg, if (!mem_cgroup_disabled() && !mem_cgroup_is_root(memcg)) return shrink_slab_memcg(gfp_mask, nid, memcg, priority); - if (!down_read_trylock(&shrinker_rwsem)) - goto out; - - list_for_each_entry(shrinker, &shrinker_list, list) { + /* + * lockless algorithm of global shrink. + * + * In the unregistration setp, the shrinker will be freed asynchronously + * via RCU after its refcount reaches 0. So both rcu_read_lock() and + * shrinker_try_get() can be used to ensure the existence of the shrinker. + * + * So in the global shrink: + * step 1: use rcu_read_lock() to guarantee existence of the shrinker + * and the validity of the shrinker_list walk. + * step 2: use shrinker_try_get() to try get the refcount, if successful, + * then the existence of the shrinker can also be guaranteed, + * so we can release the RCU lock to do do_shrink_slab() that + * may sleep. + * step 3: *MUST* to reacquire the RCU lock before calling shrinker_put(), + * which ensures that neither this shrinker nor the next shrinker + * will be freed in the next traversal operation. + * step 4: do shrinker_put() paired with step 2 to put the refcount, + * if the refcount reaches 0, then wake up the waiter in + * shrinker_free() by calling complete(). + */ + rcu_read_lock(); + list_for_each_entry_rcu(shrinker, &shrinker_list, list) { struct shrink_control sc = { .gfp_mask = gfp_mask, .nid = nid, .memcg = memcg, }; + if (!shrinker_try_get(shrinker)) + continue; + + rcu_read_unlock(); + ret = do_shrink_slab(&sc, shrinker, priority); if (ret == SHRINK_EMPTY) ret = 0; freed += ret; - /* - * Bail out if someone want to register a new shrinker to - * prevent the registration from being stalled for long periods - * by parallel ongoing shrinking. - */ - if (rwsem_is_contended(&shrinker_rwsem)) { - freed = freed ? : 1; - break; - } + + rcu_read_lock(); + shrinker_put(shrinker); } - up_read(&shrinker_rwsem); -out: + rcu_read_unlock(); cond_resched(); return freed; } @@ -676,13 +694,29 @@ void shrinker_register(struct shrinker *shrinker) } down_write(&shrinker_rwsem); - list_add_tail(&shrinker->list, &shrinker_list); + list_add_tail_rcu(&shrinker->list, &shrinker_list); shrinker->flags |= SHRINKER_REGISTERED; shrinker_debugfs_add(shrinker); up_write(&shrinker_rwsem); + + init_completion(&shrinker->done); + /* + * Now the shrinker is fully set up, take the first reference to it to + * indicate that lookup operations are now allowed to use it via + * shrinker_try_get(). + */ + refcount_set(&shrinker->refcount, 1); } EXPORT_SYMBOL_GPL(shrinker_register); +static void shrinker_free_rcu_cb(struct rcu_head *head) +{ + struct shrinker *shrinker = container_of(head, struct shrinker, rcu); + + kfree(shrinker->nr_deferred); + kfree(shrinker); +} + void shrinker_free(struct shrinker *shrinker) { struct dentry *debugfs_entry = NULL; @@ -691,9 +725,25 @@ void shrinker_free(struct shrinker *shrinker) if (!shrinker) return; + if (shrinker->flags & SHRINKER_REGISTERED) { + /* drop the initial refcount */ + shrinker_put(shrinker); + /* + * Wait for all lookups of the shrinker to complete, after that, + * no shrinker is running or will run again, then we can safely + * free it asynchronously via RCU and safely free the structure + * where the shrinker is located, such as super_block etc. + */ + wait_for_completion(&shrinker->done); + } + down_write(&shrinker_rwsem); if (shrinker->flags & SHRINKER_REGISTERED) { - list_del(&shrinker->list); + /* + * Now we can safely remove it from the shrinker_list and then + * free it. + */ + list_del_rcu(&shrinker->list); debugfs_entry = shrinker_debugfs_detach(shrinker, &debugfs_id); shrinker->flags &= ~SHRINKER_REGISTERED; } @@ -707,9 +757,6 @@ void shrinker_free(struct shrinker *shrinker) if (debugfs_entry) shrinker_debugfs_remove(debugfs_entry, debugfs_id); - kfree(shrinker->nr_deferred); - shrinker->nr_deferred = NULL; - - kfree(shrinker); + call_rcu(&shrinker->rcu, shrinker_free_rcu_cb); } EXPORT_SYMBOL_GPL(shrinker_free); From 1b4c87eacd2e2932b00bab2c86c58f3f1d46d021 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:42 +0800 Subject: [PATCH 44/50] mm: shrinker: make memcg slab shrink lockless mainline inclusion from mainline-v6.7-rc1 category: performance Like global slab shrink, this commit also uses refcount+RCU method to make memcg slab shrink lockless. Use the following script to do slab shrink stress test: ``` DIR="/root/shrinker/memcg/mnt" do_create() { mkdir -p /sys/fs/cgroup/memory/test echo 4G > /sys/fs/cgroup/memory/test/memory.limit_in_bytes for i in `seq 0 $1`; do mkdir -p /sys/fs/cgroup/memory/test/$i; echo $$ > /sys/fs/cgroup/memory/test/$i/cgroup.procs; mkdir -p $DIR/$i; done } do_mount() { for i in `seq $1 $2`; do mount -t tmpfs $i $DIR/$i; done } do_touch() { for i in `seq $1 $2`; do echo $$ > /sys/fs/cgroup/memory/test/$i/cgroup.procs; dd if=/dev/zero of=$DIR/$i/file$i bs=1M count=1 & done } case "$1" in touch) do_touch $2 $3 ;; test) do_create 4000 do_mount 0 4000 do_touch 0 3000 ;; *) exit 1 ;; esac ``` Save the above script, then run test and touch commands. Then we can use the following perf command to view hotspots: perf top -U -F 999 1) Before applying this patchset: 33.15% [kernel] [k] down_read_trylock 25.38% [kernel] [k] shrink_slab 21.75% [kernel] [k] up_read 4.45% [kernel] [k] _find_next_bit 2.27% [kernel] [k] do_shrink_slab 1.80% [kernel] [k] intel_idle_irq 1.79% [kernel] [k] shrink_lruvec 0.67% [kernel] [k] xas_descend 0.41% [kernel] [k] mem_cgroup_iter 0.40% [kernel] [k] shrink_node 0.38% [kernel] [k] list_lru_count_one 2) After applying this patchset: 64.56% [kernel] [k] shrink_slab 12.18% [kernel] [k] do_shrink_slab 3.30% [kernel] [k] __rcu_read_unlock 2.61% [kernel] [k] shrink_lruvec 2.49% [kernel] [k] __rcu_read_lock 1.93% [kernel] [k] intel_idle_irq 0.89% [kernel] [k] shrink_node 0.81% [kernel] [k] mem_cgroup_iter 0.77% [kernel] [k] mem_cgroup_calculate_protection 0.66% [kernel] [k] list_lru_count_one We can see that the first perf hotspot becomes shrink_slab, which is what we expect. Link: https://lkml.kernel.org/r/20230911094444.68966-44-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Christian Brauner Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Muchun Song Cc: Muchun Song Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: "Theodore Ts'o" Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton (cherry picked from commit 50d09da8e1199092a128eebd8a986d1fb0335fe4) Signed-off-by: Wentao Guan --- mm/shrinker.c | 85 +++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 66 insertions(+), 19 deletions(-) diff --git a/mm/shrinker.c b/mm/shrinker.c index 9bb2e61092b36..d1b34a79c7a7b 100644 --- a/mm/shrinker.c +++ b/mm/shrinker.c @@ -219,7 +219,6 @@ static int shrinker_memcg_alloc(struct shrinker *shrinker) return -ENOSYS; down_write(&shrinker_rwsem); - /* This may call shrinker, so it must use down_read_trylock() */ id = idr_alloc(&shrinker_idr, shrinker, 0, 0, GFP_KERNEL); if (id < 0) goto unlock; @@ -253,10 +252,15 @@ static long xchg_nr_deferred_memcg(int nid, struct shrinker *shrinker, { struct shrinker_info *info; struct shrinker_info_unit *unit; + long nr_deferred; - info = shrinker_info_protected(memcg, nid); + rcu_read_lock(); + info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info); unit = info->unit[shrinker_id_to_index(shrinker->id)]; - return atomic_long_xchg(&unit->nr_deferred[shrinker_id_to_offset(shrinker->id)], 0); + nr_deferred = atomic_long_xchg(&unit->nr_deferred[shrinker_id_to_offset(shrinker->id)], 0); + rcu_read_unlock(); + + return nr_deferred; } static long add_nr_deferred_memcg(long nr, int nid, struct shrinker *shrinker, @@ -264,10 +268,16 @@ static long add_nr_deferred_memcg(long nr, int nid, struct shrinker *shrinker, { struct shrinker_info *info; struct shrinker_info_unit *unit; + long nr_deferred; - info = shrinker_info_protected(memcg, nid); + rcu_read_lock(); + info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info); unit = info->unit[shrinker_id_to_index(shrinker->id)]; - return atomic_long_add_return(nr, &unit->nr_deferred[shrinker_id_to_offset(shrinker->id)]); + nr_deferred = + atomic_long_add_return(nr, &unit->nr_deferred[shrinker_id_to_offset(shrinker->id)]); + rcu_read_unlock(); + + return nr_deferred; } void reparent_shrinker_deferred(struct mem_cgroup *memcg) @@ -464,18 +474,54 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid, if (!mem_cgroup_online(memcg)) return 0; - if (!down_read_trylock(&shrinker_rwsem)) - return 0; - - info = shrinker_info_protected(memcg, nid); + /* + * lockless algorithm of memcg shrink. + * + * The shrinker_info may be freed asynchronously via RCU in the + * expand_one_shrinker_info(), so the rcu_read_lock() needs to be used + * to ensure the existence of the shrinker_info. + * + * The shrinker_info_unit is never freed unless its corresponding memcg + * is destroyed. Here we already hold the refcount of memcg, so the + * memcg will not be destroyed, and of course shrinker_info_unit will + * not be freed. + * + * So in the memcg shrink: + * step 1: use rcu_read_lock() to guarantee existence of the + * shrinker_info. + * step 2: after getting shrinker_info_unit we can safely release the + * RCU lock. + * step 3: traverse the bitmap and calculate shrinker_id + * step 4: use rcu_read_lock() to guarantee existence of the shrinker. + * step 5: use shrinker_id to find the shrinker, then use + * shrinker_try_get() to guarantee existence of the shrinker, + * then we can release the RCU lock to do do_shrink_slab() that + * may sleep. + * step 6: do shrinker_put() paired with step 5 to put the refcount, + * if the refcount reaches 0, then wake up the waiter in + * shrinker_free() by calling complete(). + * Note: here is different from the global shrink, we don't + * need to acquire the RCU lock to guarantee existence of + * the shrinker, because we don't need to use this + * shrinker to traverse the next shrinker in the bitmap. + * step 7: we have already exited the read-side of rcu critical section + * before calling do_shrink_slab(), the shrinker_info may be + * released in expand_one_shrinker_info(), so go back to step 1 + * to reacquire the shrinker_info. + */ +again: + rcu_read_lock(); + info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info); if (unlikely(!info)) goto unlock; - for (; index < shrinker_id_to_index(info->map_nr_max); index++) { + if (index < shrinker_id_to_index(info->map_nr_max)) { struct shrinker_info_unit *unit; unit = info->unit[index]; + rcu_read_unlock(); + for_each_set_bit(offset, unit->map, SHRINKER_UNIT_BITS) { struct shrink_control sc = { .gfp_mask = gfp_mask, @@ -485,12 +531,14 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid, struct shrinker *shrinker; int shrinker_id = calc_shrinker_id(index, offset); + rcu_read_lock(); shrinker = idr_find(&shrinker_idr, shrinker_id); - if (unlikely(!shrinker || !(shrinker->flags & SHRINKER_REGISTERED))) { - if (!shrinker) - clear_bit(offset, unit->map); + if (unlikely(!shrinker || !shrinker_try_get(shrinker))) { + clear_bit(offset, unit->map); + rcu_read_unlock(); continue; } + rcu_read_unlock(); /* Call non-slab shrinkers even though kmem is disabled */ if (!memcg_kmem_online() && @@ -523,15 +571,14 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid, set_shrinker_bit(memcg, nid, shrinker_id); } freed += ret; - - if (rwsem_is_contended(&shrinker_rwsem)) { - freed = freed ? : 1; - goto unlock; - } + shrinker_put(shrinker); } + + index++; + goto again; } unlock: - up_read(&shrinker_rwsem); + rcu_read_unlock(); return freed; } #else /* !CONFIG_MEMCG */ From 8208597d34415e9f91c9f9e02b8264daea5bb64a Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:43 +0800 Subject: [PATCH 45/50] mm: shrinker: hold write lock to reparent shrinker nr_deferred mainline inclusion from mainline-v6.7-rc1 category: performance For now, reparent_shrinker_deferred() is the only holder of read lock of shrinker_rwsem. And it already holds the global cgroup_mutex, so it will not be called in parallel. Therefore, in order to convert shrinker_rwsem to shrinker_mutex later, here we change to hold the write lock of shrinker_rwsem to reparent. Link: https://lkml.kernel.org/r/20230911094444.68966-45-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Christian Brauner Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: "Theodore Ts'o" Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton (cherry picked from commit 604b8b655020816b66ae474681d1100c5c0ba8c4) Signed-off-by: Wentao Guan --- mm/shrinker.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/shrinker.c b/mm/shrinker.c index d1b34a79c7a7b..ef22f20a97cbd 100644 --- a/mm/shrinker.c +++ b/mm/shrinker.c @@ -293,7 +293,7 @@ void reparent_shrinker_deferred(struct mem_cgroup *memcg) parent = root_mem_cgroup; /* Prevent from concurrent shrinker_info expand */ - down_read(&shrinker_rwsem); + down_write(&shrinker_rwsem); for_each_node(nid) { child_info = shrinker_info_protected(memcg, nid); parent_info = shrinker_info_protected(parent, nid); @@ -306,7 +306,7 @@ void reparent_shrinker_deferred(struct mem_cgroup *memcg) } } } - up_read(&shrinker_rwsem); + up_write(&shrinker_rwsem); } #else static int shrinker_memcg_alloc(struct shrinker *shrinker) From 412781fdc0d82419f77b7afeed8d4e6ab5bab983 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:44 +0800 Subject: [PATCH 46/50] mm: shrinker: convert shrinker_rwsem to mutex mainline inclusion from mainline-v6.7-rc1 category: performance Now there are no readers of shrinker_rwsem, so we can simply replace it with mutex lock. [akpm@linux-foundation.org: update the fix to alloc_shrinker_info()] Link: https://lkml.kernel.org/r/20230911094444.68966-46-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Christian Brauner Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: "Theodore Ts'o" Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton (cherry picked from commit 8a0e8bb112af28362514624fc46e20426b4bdf1f) Signed-off-by: Wentao Guan --- drivers/md/dm-cache-metadata.c | 2 +- fs/super.c | 2 +- mm/shrinker.c | 30 +++++++++++++++--------------- mm/shrinker_debug.c | 14 +++++++------- 4 files changed, 24 insertions(+), 24 deletions(-) diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index 74078b90653f3..ac65d0652d69f 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c @@ -1823,7 +1823,7 @@ int dm_cache_metadata_abort(struct dm_cache_metadata *cmd) * Replacement block manager (new_bm) is created and old_bm destroyed outside of * cmd root_lock to avoid ABBA deadlock that would result (due to life-cycle of * shrinker associated with the block manager's bufio client vs cmd root_lock). - * - must take shrinker_rwsem without holding cmd->root_lock + * - must take shrinker_mutex without holding cmd->root_lock */ new_bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE << SECTOR_SHIFT, CACHE_MAX_CONCURRENT_LOCKS); diff --git a/fs/super.c b/fs/super.c index 6555164b453c7..e1cc7e9e534c0 100644 --- a/fs/super.c +++ b/fs/super.c @@ -178,7 +178,7 @@ static void super_wake(struct super_block *sb, unsigned int flag) * One thing we have to be careful of with a per-sb shrinker is that we don't * drop the last active reference to the superblock from within the shrinker. * If that happens we could trigger unregistering the shrinker from within the - * shrinker path and that leads to deadlock on the shrinker_rwsem. Hence we + * shrinker path and that leads to deadlock on the shrinker_mutex. Hence we * take a passive reference to the superblock to avoid this from occurring. */ static unsigned long super_cache_scan(struct shrinker *shrink, diff --git a/mm/shrinker.c b/mm/shrinker.c index ef22f20a97cbd..dd91eab43ed30 100644 --- a/mm/shrinker.c +++ b/mm/shrinker.c @@ -8,7 +8,7 @@ #include "internal.h" LIST_HEAD(shrinker_list); -DECLARE_RWSEM(shrinker_rwsem); +DEFINE_MUTEX(shrinker_mutex); #ifdef CONFIG_MEMCG static int shrinker_nr_max; @@ -80,7 +80,7 @@ int alloc_shrinker_info(struct mem_cgroup *memcg) int nid, ret = 0; int array_size = 0; - down_write(&shrinker_rwsem); + mutex_lock(&shrinker_mutex); array_size = shrinker_unit_size(shrinker_nr_max); for_each_node(nid) { info = kvzalloc_node(sizeof(*info) + array_size, GFP_KERNEL, nid); @@ -91,12 +91,12 @@ int alloc_shrinker_info(struct mem_cgroup *memcg) goto err; rcu_assign_pointer(memcg->nodeinfo[nid]->shrinker_info, info); } - up_write(&shrinker_rwsem); + mutex_unlock(&shrinker_mutex); return ret; err: - up_write(&shrinker_rwsem); + mutex_unlock(&shrinker_mutex); free_shrinker_info(memcg); return -ENOMEM; } @@ -105,7 +105,7 @@ static struct shrinker_info *shrinker_info_protected(struct mem_cgroup *memcg, int nid) { return rcu_dereference_protected(memcg->nodeinfo[nid]->shrinker_info, - lockdep_is_held(&shrinker_rwsem)); + lockdep_is_held(&shrinker_mutex)); } static int expand_one_shrinker_info(struct mem_cgroup *memcg, int new_size, @@ -155,7 +155,7 @@ static int expand_shrinker_info(int new_id) if (!root_mem_cgroup) goto out; - lockdep_assert_held(&shrinker_rwsem); + lockdep_assert_held(&shrinker_mutex); new_size = shrinker_unit_size(new_nr_max); old_size = shrinker_unit_size(shrinker_nr_max); @@ -218,7 +218,7 @@ static int shrinker_memcg_alloc(struct shrinker *shrinker) if (mem_cgroup_disabled()) return -ENOSYS; - down_write(&shrinker_rwsem); + mutex_lock(&shrinker_mutex); id = idr_alloc(&shrinker_idr, shrinker, 0, 0, GFP_KERNEL); if (id < 0) goto unlock; @@ -232,7 +232,7 @@ static int shrinker_memcg_alloc(struct shrinker *shrinker) shrinker->id = id; ret = 0; unlock: - up_write(&shrinker_rwsem); + mutex_unlock(&shrinker_mutex); return ret; } @@ -242,7 +242,7 @@ static void shrinker_memcg_remove(struct shrinker *shrinker) BUG_ON(id < 0); - lockdep_assert_held(&shrinker_rwsem); + lockdep_assert_held(&shrinker_mutex); idr_remove(&shrinker_idr, id); } @@ -293,7 +293,7 @@ void reparent_shrinker_deferred(struct mem_cgroup *memcg) parent = root_mem_cgroup; /* Prevent from concurrent shrinker_info expand */ - down_write(&shrinker_rwsem); + mutex_lock(&shrinker_mutex); for_each_node(nid) { child_info = shrinker_info_protected(memcg, nid); parent_info = shrinker_info_protected(parent, nid); @@ -306,7 +306,7 @@ void reparent_shrinker_deferred(struct mem_cgroup *memcg) } } } - up_write(&shrinker_rwsem); + mutex_unlock(&shrinker_mutex); } #else static int shrinker_memcg_alloc(struct shrinker *shrinker) @@ -740,11 +740,11 @@ void shrinker_register(struct shrinker *shrinker) return; } - down_write(&shrinker_rwsem); + mutex_lock(&shrinker_mutex); list_add_tail_rcu(&shrinker->list, &shrinker_list); shrinker->flags |= SHRINKER_REGISTERED; shrinker_debugfs_add(shrinker); - up_write(&shrinker_rwsem); + mutex_unlock(&shrinker_mutex); init_completion(&shrinker->done); /* @@ -784,7 +784,7 @@ void shrinker_free(struct shrinker *shrinker) wait_for_completion(&shrinker->done); } - down_write(&shrinker_rwsem); + mutex_lock(&shrinker_mutex); if (shrinker->flags & SHRINKER_REGISTERED) { /* * Now we can safely remove it from the shrinker_list and then @@ -799,7 +799,7 @@ void shrinker_free(struct shrinker *shrinker) if (shrinker->flags & SHRINKER_MEMCG_AWARE) shrinker_memcg_remove(shrinker); - up_write(&shrinker_rwsem); + mutex_unlock(&shrinker_mutex); if (debugfs_entry) shrinker_debugfs_remove(debugfs_entry, debugfs_id); diff --git a/mm/shrinker_debug.c b/mm/shrinker_debug.c index 24aebe7c24cc5..12ea5486a3e95 100644 --- a/mm/shrinker_debug.c +++ b/mm/shrinker_debug.c @@ -9,7 +9,7 @@ #include "internal.h" /* defined in vmscan.c */ -extern struct rw_semaphore shrinker_rwsem; +extern struct mutex shrinker_mutex; extern struct list_head shrinker_list; static DEFINE_IDA(shrinker_debugfs_ida); @@ -165,7 +165,7 @@ int shrinker_debugfs_add(struct shrinker *shrinker) char buf[128]; int id; - lockdep_assert_held(&shrinker_rwsem); + lockdep_assert_held(&shrinker_mutex); /* debugfs isn't initialized yet, add debugfs entries later. */ if (!shrinker_debugfs_root) @@ -208,7 +208,7 @@ int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...) if (!new) return -ENOMEM; - down_write(&shrinker_rwsem); + mutex_lock(&shrinker_mutex); old = shrinker->name; shrinker->name = new; @@ -226,7 +226,7 @@ int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...) shrinker->debugfs_entry = entry; } - up_write(&shrinker_rwsem); + mutex_unlock(&shrinker_mutex); kfree_const(old); @@ -239,7 +239,7 @@ struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker, { struct dentry *entry = shrinker->debugfs_entry; - lockdep_assert_held(&shrinker_rwsem); + lockdep_assert_held(&shrinker_mutex); *debugfs_id = entry ? shrinker->debugfs_id : -1; shrinker->debugfs_entry = NULL; @@ -265,14 +265,14 @@ static int __init shrinker_debugfs_init(void) shrinker_debugfs_root = dentry; /* Create debugfs entries for shrinkers registered at boot */ - down_write(&shrinker_rwsem); + mutex_lock(&shrinker_mutex); list_for_each_entry(shrinker, &shrinker_list, list) if (!shrinker->debugfs_entry) { ret = shrinker_debugfs_add(shrinker); if (ret) break; } - up_write(&shrinker_rwsem); + mutex_unlock(&shrinker_mutex); return ret; } From 058957faba4a6fefa7593ea76a5735f2cb718368 Mon Sep 17 00:00:00 2001 From: Lucy Mielke Date: Fri, 6 Oct 2023 22:30:51 +0200 Subject: [PATCH 47/50] mm: add printf attribute to shrinker_debugfs_name_alloc mainline inclusion from mainline-v6.7-rc1 category: bugfix This fixes a compiler warning when compiling an allyesconfig with W=1: mm/internal.h:1235:9: error: function might be a candidate for `gnu_printf' format attribute [-Werror=suggest-attribute=format] [akpm@linux-foundation.org: fix shrinker_alloc() as welll per Qi Zheng] Link: https://lkml.kernel.org/r/822387b7-4895-4e64-5806-0f56b5d6c447@bytedance.com Link: https://lkml.kernel.org/r/ZSBue-3kM6gI6jCr@mainframe Fixes: c42d50aefd17 ("mm: shrinker: add infrastructure for dynamically allocating shrinker") Signed-off-by: Lucy Mielke Cc: Qi Zheng Signed-off-by: Andrew Morton (cherry picked from commit f04eba134e598d4a5af2eeecff0562df5042cbfc) Signed-off-by: Wentao Guan --- include/linux/shrinker.h | 1 + mm/internal.h | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index 1a30bd6ba801b..3b034a8c35b18 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -138,6 +138,7 @@ struct shrinker { */ #define SHRINKER_NONSLAB BIT(4) +__printf(2, 3) struct shrinker *shrinker_alloc(unsigned int flags, const char *fmt, ...); void shrinker_register(struct shrinker *shrinker); void shrinker_free(struct shrinker *shrinker); diff --git a/mm/internal.h b/mm/internal.h index d95461953559d..31014e38ba5f9 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1291,8 +1291,8 @@ unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg, int priority); #ifdef CONFIG_SHRINKER_DEBUG -static inline int shrinker_debugfs_name_alloc(struct shrinker *shrinker, - const char *fmt, va_list ap) +static inline __printf(2, 0) int shrinker_debugfs_name_alloc( + struct shrinker *shrinker, const char *fmt, va_list ap) { shrinker->name = kvasprintf_const(GFP_KERNEL, fmt, ap); From 280b7714bfadf2bde49ac48873b531241cf11a80 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Wed, 3 Jan 2024 10:52:22 +0900 Subject: [PATCH 48/50] mm: shrinker: use kvzalloc_node() from expand_one_shrinker_info() mainline inclusion from mainline-v6.7 category: bugfix syzbot is reporting uninit-value at shrinker_alloc(), for commit 307bececcd12 ("mm: shrinker: add a secondary array for shrinker_info::{map, nr_deferred}") which assumed that the ->unit was allocated with __GFP_ZERO forgot to replace kvmalloc_node() in expand_one_shrinker_info() with kvzalloc_node(). Link: https://lkml.kernel.org/r/9226cc0a-10e0-4489-80c5-58c3b5b4359c@I-love.SAKURA.ne.jp Reported-by: syzbot Closes: https://syzkaller.appspot.com/bug?extid=1e0ed05798af62917464 Fixes: 307bececcd12 ("mm: shrinker: add a secondary array for shrinker_info::{map, nr_deferred}") Signed-off-by: Tetsuo Handa Acked-by: Qi Zheng Cc: Muchun Song Signed-off-by: Andrew Morton (cherry picked from commit 7fba9420b726561966e1671004df60a08b39beb3) Signed-off-by: Wentao Guan --- mm/shrinker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/shrinker.c b/mm/shrinker.c index dd91eab43ed30..dc5d2a6fcfc41 100644 --- a/mm/shrinker.c +++ b/mm/shrinker.c @@ -126,7 +126,7 @@ static int expand_one_shrinker_info(struct mem_cgroup *memcg, int new_size, if (new_nr_max <= old->map_nr_max) continue; - new = kvmalloc_node(sizeof(*new) + new_size, GFP_KERNEL, nid); + new = kvzalloc_node(sizeof(*new) + new_size, GFP_KERNEL, nid); if (!new) return -ENOMEM; From 97afe5641a2713fc43210634421334151816ad9f Mon Sep 17 00:00:00 2001 From: Chen Ridong Date: Fri, 25 Oct 2024 06:09:42 +0000 Subject: [PATCH 49/50] mm: shrinker: avoid memleak in alloc_shrinker_info mainline inclusion from mainline-v6.12-rc6 category: bugfix A memleak was found as below: unreferenced object 0xffff8881010d2a80 (size 32): comm "mkdir", pid 1559, jiffies 4294932666 hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 40 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 @............... backtrace (crc 2e7ef6fa): [] __kmalloc_node_noprof+0x394/0x470 [] alloc_shrinker_info+0x7b/0x1a0 [] mem_cgroup_css_online+0x11a/0x3b0 [] online_css+0x29/0xa0 [] cgroup_apply_control_enable+0x20d/0x360 [] cgroup_mkdir+0x168/0x5f0 [] kernfs_iop_mkdir+0x5e/0x90 [] vfs_mkdir+0x144/0x220 [] do_mkdirat+0x87/0x130 [] __x64_sys_mkdir+0x49/0x70 [] do_syscall_64+0x68/0x140 [] entry_SYSCALL_64_after_hwframe+0x76/0x7e alloc_shrinker_info(), when shrinker_unit_alloc() returns an errer, the info won't be freed. Just fix it. Link: https://lkml.kernel.org/r/20241025060942.1049263-1-chenridong@huaweicloud.com Fixes: 307bececcd12 ("mm: shrinker: add a secondary array for shrinker_info::{map, nr_deferred}") Signed-off-by: Chen Ridong Acked-by: Qi Zheng Acked-by: Roman Gushchin Acked-by: Vlastimil Babka Acked-by: Kirill A. Shutemov Reviewed-by: Dave Chinner Cc: Anshuman Khandual Cc: Muchun Song Cc: Wang Weiyang Cc: Signed-off-by: Andrew Morton (cherry picked from commit 15e8156713cc38031642fafc8baf7d53f19f2e83) Signed-off-by: Wentao Guan --- mm/shrinker.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/mm/shrinker.c b/mm/shrinker.c index dc5d2a6fcfc41..4a93fd433689a 100644 --- a/mm/shrinker.c +++ b/mm/shrinker.c @@ -76,19 +76,21 @@ void free_shrinker_info(struct mem_cgroup *memcg) int alloc_shrinker_info(struct mem_cgroup *memcg) { - struct shrinker_info *info; int nid, ret = 0; int array_size = 0; mutex_lock(&shrinker_mutex); array_size = shrinker_unit_size(shrinker_nr_max); for_each_node(nid) { - info = kvzalloc_node(sizeof(*info) + array_size, GFP_KERNEL, nid); + struct shrinker_info *info = kvzalloc_node(sizeof(*info) + array_size, + GFP_KERNEL, nid); if (!info) goto err; info->map_nr_max = shrinker_nr_max; - if (shrinker_unit_alloc(info, NULL, nid)) + if (shrinker_unit_alloc(info, NULL, nid)) { + kvfree(info); goto err; + } rcu_assign_pointer(memcg->nodeinfo[nid]->shrinker_info, info); } mutex_unlock(&shrinker_mutex); From 0b9813302beb69bcdc44c2bd3ced151b8560387f Mon Sep 17 00:00:00 2001 From: Altan Hacigumus Date: Tue, 3 Feb 2026 19:35:53 -0800 Subject: [PATCH 50/50] mm/shrinker: fix refcount leak in shrink_slab_memcg() mainline inclusion from mainline-v7.1-rc1 category: bugfix When kmem is disabled for memcg, slab-backed shrinkers are skipped. However, shrink_slab_memcg() doesn't drop the reference acquired via shrinker_try_get() before continuing. Add the missing shrinker_put(). Also, since memcg_kmem_online() and shrinker flags cannot change dynamically, remove the shrinker from the bitmap to avoid unnecessary future scans. Link: https://lkml.kernel.org/r/20260204033553.50039-1-ahacigu.linux@gmail.com Fixes: 50d09da8e119 ("mm: shrinker: make memcg slab shrink lockless") Signed-off-by: Altan Hacigumus Acked-by: Qi Zheng Link: https://lore.kernel.org/r/20260203073757.135088-1-ahacigu.linux@gmail.com Reviewed-by: Muchun Song Cc: Dave Chinner Cc: Roman Gushchin Signed-off-by: Andrew Morton (cherry picked from commit 260d70819c37cf59383286f8dc6566eaa32776bb) Signed-off-by: Wentao Guan --- mm/shrinker.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/shrinker.c b/mm/shrinker.c index 4a93fd433689a..68dc7b4242f2a 100644 --- a/mm/shrinker.c +++ b/mm/shrinker.c @@ -544,8 +544,11 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid, /* Call non-slab shrinkers even though kmem is disabled */ if (!memcg_kmem_online() && - !(shrinker->flags & SHRINKER_NONSLAB)) + !(shrinker->flags & SHRINKER_NONSLAB)) { + clear_bit(offset, unit->map); + shrinker_put(shrinker); continue; + } ret = do_shrink_slab(&sc, shrinker, priority); if (ret == SHRINK_EMPTY) {