diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 8385176968b5f..fcea2333a61bf 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -6791,11 +6791,7 @@ static unsigned long mmu_shrink_count(struct shrinker *shrink, return percpu_counter_read_positive(&kvm_total_used_mmu_pages); } -static struct shrinker mmu_shrinker = { - .count_objects = mmu_shrink_count, - .scan_objects = mmu_shrink_scan, - .seeks = DEFAULT_SEEKS * 10, -}; +static struct shrinker *mmu_shrinker; static void mmu_destroy_caches(void) { @@ -6928,10 +6924,16 @@ int kvm_mmu_vendor_module_init(void) if (percpu_counter_init(&kvm_total_used_mmu_pages, 0, GFP_KERNEL)) goto out; - ret = register_shrinker(&mmu_shrinker, "x86-mmu"); - if (ret) + mmu_shrinker = shrinker_alloc(0, "x86-mmu"); + if (!mmu_shrinker) goto out_shrinker; + mmu_shrinker->count_objects = mmu_shrink_count; + mmu_shrinker->scan_objects = mmu_shrink_scan; + mmu_shrinker->seeks = DEFAULT_SEEKS * 10; + + shrinker_register(mmu_shrinker); + return 0; out_shrinker: @@ -6953,7 +6955,7 @@ void kvm_mmu_vendor_module_exit(void) { mmu_destroy_caches(); percpu_counter_destroy(&kvm_total_used_mmu_pages); - unregister_shrinker(&mmu_shrinker); + shrinker_free(mmu_shrinker); } /* diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index 8695c0953f9f0..3a6d129918e2b 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -1055,11 +1055,7 @@ binder_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) NULL, sc->nr_to_scan); } -static struct shrinker binder_shrinker = { - .count_objects = binder_shrink_count, - .scan_objects = binder_shrink_scan, - .seeks = DEFAULT_SEEKS, -}; +static struct shrinker *binder_shrinker; /** * binder_alloc_init() - called by binder_open() for per-proc initialization @@ -1079,19 +1075,29 @@ void binder_alloc_init(struct binder_alloc *alloc) int binder_alloc_shrinker_init(void) { - int ret = list_lru_init(&binder_alloc_lru); + int ret; - if (ret == 0) { - ret = register_shrinker(&binder_shrinker, "android-binder"); - if (ret) - list_lru_destroy(&binder_alloc_lru); + ret = list_lru_init(&binder_alloc_lru); + if (ret) + return ret; + + binder_shrinker = shrinker_alloc(0, "android-binder"); + if (!binder_shrinker) { + list_lru_destroy(&binder_alloc_lru); + return -ENOMEM; } - return ret; + + binder_shrinker->count_objects = binder_shrink_count; + binder_shrinker->scan_objects = binder_shrink_scan; + + shrinker_register(binder_shrinker); + + return 0; } void binder_alloc_shrinker_exit(void) { - unregister_shrinker(&binder_shrinker); + shrinker_free(binder_shrinker); list_lru_destroy(&binder_alloc_lru); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index 9cb7bbfb42782..d166052eb2ce3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -288,8 +288,7 @@ unsigned long i915_gem_shrink_all(struct drm_i915_private *i915) static unsigned long i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) { - struct drm_i915_private *i915 = - container_of(shrinker, struct drm_i915_private, mm.shrinker); + struct drm_i915_private *i915 = shrinker->private_data; unsigned long num_objects; unsigned long count; @@ -306,8 +305,8 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) if (num_objects) { unsigned long avg = 2 * count / num_objects; - i915->mm.shrinker.batch = - max((i915->mm.shrinker.batch + avg) >> 1, + i915->mm.shrinker->batch = + max((i915->mm.shrinker->batch + avg) >> 1, 128ul /* default SHRINK_BATCH */); } @@ -317,8 +316,7 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) static unsigned long i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) { - struct drm_i915_private *i915 = - container_of(shrinker, struct drm_i915_private, mm.shrinker); + struct drm_i915_private *i915 = shrinker->private_data; unsigned long freed; sc->nr_scanned = 0; @@ -430,12 +428,17 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr void i915_gem_driver_register__shrinker(struct drm_i915_private *i915) { - i915->mm.shrinker.scan_objects = i915_gem_shrinker_scan; - i915->mm.shrinker.count_objects = i915_gem_shrinker_count; - i915->mm.shrinker.seeks = DEFAULT_SEEKS; - i915->mm.shrinker.batch = 4096; - drm_WARN_ON(&i915->drm, register_shrinker(&i915->mm.shrinker, - "drm-i915_gem")); + i915->mm.shrinker = shrinker_alloc(0, "drm-i915_gem"); + if (!i915->mm.shrinker) { + drm_WARN_ON(&i915->drm, 1); + } else { + i915->mm.shrinker->scan_objects = i915_gem_shrinker_scan; + i915->mm.shrinker->count_objects = i915_gem_shrinker_count; + i915->mm.shrinker->batch = 4096; + i915->mm.shrinker->private_data = i915; + + shrinker_register(i915->mm.shrinker); + } i915->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom; drm_WARN_ON(&i915->drm, register_oom_notifier(&i915->mm.oom_notifier)); @@ -451,7 +454,7 @@ void i915_gem_driver_unregister__shrinker(struct drm_i915_private *i915) unregister_vmap_purge_notifier(&i915->mm.vmap_notifier)); drm_WARN_ON(&i915->drm, unregister_oom_notifier(&i915->mm.oom_notifier)); - unregister_shrinker(&i915->mm.shrinker); + shrinker_free(i915->mm.shrinker); } void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f4ad8885a9cf2..4ec4240d18d8d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -163,7 +163,7 @@ struct i915_gem_mm { struct notifier_block oom_notifier; struct notifier_block vmap_notifier; - struct shrinker shrinker; + struct shrinker *shrinker; #ifdef CONFIG_MMU_NOTIFIER /** diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 4bd028fa75006..7f20249d60715 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -462,7 +462,9 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv) if (ret) goto err_msm_uninit; - msm_gem_shrinker_init(ddev); + ret = msm_gem_shrinker_init(ddev); + if (ret) + goto err_msm_uninit; if (priv->kms_init) { ret = priv->kms_init(ddev); diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index 223bf904235a8..c3c0aef6a5cbc 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -209,7 +209,7 @@ struct msm_drm_private { } vram; struct notifier_block vmap_notifier; - struct shrinker shrinker; + struct shrinker *shrinker; struct drm_atomic_state *pm_state; @@ -271,7 +271,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, unsigned long msm_gem_shrinker_shrink(struct drm_device *dev, unsigned long nr_to_scan); #endif -void msm_gem_shrinker_init(struct drm_device *dev); +int msm_gem_shrinker_init(struct drm_device *dev); void msm_gem_shrinker_cleanup(struct drm_device *dev); struct sg_table *msm_gem_prime_get_sg_table(struct drm_gem_object *obj); diff --git a/drivers/gpu/drm/msm/msm_gem_shrinker.c b/drivers/gpu/drm/msm/msm_gem_shrinker.c index 5d5cf51cde65a..89c0ea8ddeaac 100644 --- a/drivers/gpu/drm/msm/msm_gem_shrinker.c +++ b/drivers/gpu/drm/msm/msm_gem_shrinker.c @@ -33,8 +33,7 @@ static bool can_block(struct shrink_control *sc) static unsigned long msm_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) { - struct msm_drm_private *priv = - container_of(shrinker, struct msm_drm_private, shrinker); + struct msm_drm_private *priv = shrinker->private_data; unsigned count = priv->lru.dontneed.count; if (can_swap()) @@ -99,8 +98,7 @@ active_evict(struct drm_gem_object *obj) static unsigned long msm_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) { - struct msm_drm_private *priv = - container_of(shrinker, struct msm_drm_private, shrinker); + struct msm_drm_private *priv = shrinker->private_data; struct { struct drm_gem_lru *lru; bool (*shrink)(struct drm_gem_object *obj); @@ -147,10 +145,11 @@ msm_gem_shrinker_shrink(struct drm_device *dev, unsigned long nr_to_scan) struct shrink_control sc = { .nr_to_scan = nr_to_scan, }; - int ret; + unsigned long ret = SHRINK_STOP; fs_reclaim_acquire(GFP_KERNEL); - ret = msm_gem_shrinker_scan(&priv->shrinker, &sc); + if (priv->shrinker) + ret = msm_gem_shrinker_scan(priv->shrinker, &sc); fs_reclaim_release(GFP_KERNEL); return ret; @@ -209,16 +208,24 @@ msm_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr) * * This function registers and sets up the msm shrinker. */ -void msm_gem_shrinker_init(struct drm_device *dev) +int msm_gem_shrinker_init(struct drm_device *dev) { struct msm_drm_private *priv = dev->dev_private; - priv->shrinker.count_objects = msm_gem_shrinker_count; - priv->shrinker.scan_objects = msm_gem_shrinker_scan; - priv->shrinker.seeks = DEFAULT_SEEKS; - WARN_ON(register_shrinker(&priv->shrinker, "drm-msm_gem")); + + priv->shrinker = shrinker_alloc(0, "drm-msm_gem"); + if (!priv->shrinker) + return -ENOMEM; + + priv->shrinker->count_objects = msm_gem_shrinker_count; + priv->shrinker->scan_objects = msm_gem_shrinker_scan; + priv->shrinker->private_data = priv; + + shrinker_register(priv->shrinker); priv->vmap_notifier.notifier_call = msm_gem_shrinker_vmap; WARN_ON(register_vmap_purge_notifier(&priv->vmap_notifier)); + + return 0; } /** @@ -231,8 +238,8 @@ void msm_gem_shrinker_cleanup(struct drm_device *dev) { struct msm_drm_private *priv = dev->dev_private; - if (priv->shrinker.nr_deferred) { + if (priv->shrinker) { WARN_ON(unregister_vmap_purge_notifier(&priv->vmap_notifier)); - unregister_shrinker(&priv->shrinker); + shrinker_free(priv->shrinker); } } diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h index b0126b9fbadca..e667e56893536 100644 --- a/drivers/gpu/drm/panfrost/panfrost_device.h +++ b/drivers/gpu/drm/panfrost/panfrost_device.h @@ -118,7 +118,7 @@ struct panfrost_device { struct mutex shrinker_lock; struct list_head shrinker_list; - struct shrinker shrinker; + struct shrinker *shrinker; struct panfrost_devfreq pfdevfreq; }; diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index 0c30310da50e2..16480ad296b94 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -603,10 +603,14 @@ static int panfrost_probe(struct platform_device *pdev) if (err < 0) goto err_out1; - panfrost_gem_shrinker_init(ddev); + err = panfrost_gem_shrinker_init(ddev); + if (err) + goto err_out2; return 0; +err_out2: + drm_dev_unregister(ddev); err_out1: pm_runtime_disable(pfdev->dev); panfrost_device_fini(pfdev); diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h b/drivers/gpu/drm/panfrost/panfrost_gem.h index ad2877eeeccdf..863d2ec8d4f01 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem.h +++ b/drivers/gpu/drm/panfrost/panfrost_gem.h @@ -81,7 +81,7 @@ panfrost_gem_mapping_get(struct panfrost_gem_object *bo, void panfrost_gem_mapping_put(struct panfrost_gem_mapping *mapping); void panfrost_gem_teardown_mappings_locked(struct panfrost_gem_object *bo); -void panfrost_gem_shrinker_init(struct drm_device *dev); +int panfrost_gem_shrinker_init(struct drm_device *dev); void panfrost_gem_shrinker_cleanup(struct drm_device *dev); #endif /* __PANFROST_GEM_H__ */ diff --git a/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c b/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c index 6a71a2555f85c..3d9f51bd48b61 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c +++ b/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c @@ -18,8 +18,7 @@ static unsigned long panfrost_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) { - struct panfrost_device *pfdev = - container_of(shrinker, struct panfrost_device, shrinker); + struct panfrost_device *pfdev = shrinker->private_data; struct drm_gem_shmem_object *shmem; unsigned long count = 0; @@ -65,8 +64,7 @@ static bool panfrost_gem_purge(struct drm_gem_object *obj) static unsigned long panfrost_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) { - struct panfrost_device *pfdev = - container_of(shrinker, struct panfrost_device, shrinker); + struct panfrost_device *pfdev = shrinker->private_data; struct drm_gem_shmem_object *shmem, *tmp; unsigned long freed = 0; @@ -97,13 +95,21 @@ panfrost_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) * * This function registers and sets up the panfrost shrinker. */ -void panfrost_gem_shrinker_init(struct drm_device *dev) +int panfrost_gem_shrinker_init(struct drm_device *dev) { struct panfrost_device *pfdev = dev->dev_private; - pfdev->shrinker.count_objects = panfrost_gem_shrinker_count; - pfdev->shrinker.scan_objects = panfrost_gem_shrinker_scan; - pfdev->shrinker.seeks = DEFAULT_SEEKS; - WARN_ON(register_shrinker(&pfdev->shrinker, "drm-panfrost")); + + pfdev->shrinker = shrinker_alloc(0, "drm-panfrost"); + if (!pfdev->shrinker) + return -ENOMEM; + + pfdev->shrinker->count_objects = panfrost_gem_shrinker_count; + pfdev->shrinker->scan_objects = panfrost_gem_shrinker_scan; + pfdev->shrinker->private_data = pfdev; + + shrinker_register(pfdev->shrinker); + + return 0; } /** @@ -116,7 +122,6 @@ void panfrost_gem_shrinker_cleanup(struct drm_device *dev) { struct panfrost_device *pfdev = dev->dev_private; - if (pfdev->shrinker.nr_deferred) { - unregister_shrinker(&pfdev->shrinker); - } + if (pfdev->shrinker) + shrinker_free(pfdev->shrinker); } diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 80ba34cabca36..32b5781d499f4 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -73,7 +73,8 @@ static struct ttm_pool_type global_dma32_uncached[NR_PAGE_ORDERS]; static spinlock_t shrinker_lock; static struct list_head shrinker_list; -static struct shrinker mm_shrinker; +static struct shrinker *mm_shrinker; +static DECLARE_RWSEM(pool_shrink_rwsem); /* Allocate pages of size 1 << order with the given gfp_flags */ static struct page *ttm_pool_alloc_page(struct ttm_pool *pool, gfp_t gfp_flags, @@ -323,6 +324,7 @@ static unsigned int ttm_pool_shrink(void) unsigned int num_pages; struct page *p; + down_read(&pool_shrink_rwsem); spin_lock(&shrinker_lock); pt = list_first_entry(&shrinker_list, typeof(*pt), shrinker_list); list_move_tail(&pt->shrinker_list, &shrinker_list); @@ -335,6 +337,7 @@ static unsigned int ttm_pool_shrink(void) } else { num_pages = 0; } + up_read(&pool_shrink_rwsem); return num_pages; } @@ -584,6 +587,18 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev, } EXPORT_SYMBOL(ttm_pool_init); +/** + * ttm_pool_synchronize_shrinkers - Wait for all running shrinkers to complete. + * + * This is useful to guarantee that all shrinker invocations have seen an + * update, before freeing memory, similar to rcu. + */ +static void ttm_pool_synchronize_shrinkers(void) +{ + down_write(&pool_shrink_rwsem); + up_write(&pool_shrink_rwsem); +} + /** * ttm_pool_fini - Cleanup a pool * @@ -611,7 +626,7 @@ void ttm_pool_fini(struct ttm_pool *pool) /* We removed the pool types from the LRU, but we need to also make sure * that no shrinker is concurrently freeing pages from the pool. */ - synchronize_shrinkers(); + ttm_pool_synchronize_shrinkers(); } EXPORT_SYMBOL(ttm_pool_fini); @@ -754,8 +769,8 @@ static int ttm_pool_debugfs_shrink_show(struct seq_file *m, void *data) struct shrink_control sc = { .gfp_mask = GFP_NOFS }; fs_reclaim_acquire(GFP_KERNEL); - seq_printf(m, "%lu/%lu\n", ttm_pool_shrinker_count(&mm_shrinker, &sc), - ttm_pool_shrinker_scan(&mm_shrinker, &sc)); + seq_printf(m, "%lu/%lu\n", ttm_pool_shrinker_count(mm_shrinker, &sc), + ttm_pool_shrinker_scan(mm_shrinker, &sc)); fs_reclaim_release(GFP_KERNEL); return 0; @@ -799,10 +814,17 @@ int ttm_pool_mgr_init(unsigned long num_pages) &ttm_pool_debugfs_shrink_fops); #endif - mm_shrinker.count_objects = ttm_pool_shrinker_count; - mm_shrinker.scan_objects = ttm_pool_shrinker_scan; - mm_shrinker.seeks = 1; - return register_shrinker(&mm_shrinker, "drm-ttm_pool"); + mm_shrinker = shrinker_alloc(0, "drm-ttm_pool"); + if (!mm_shrinker) + return -ENOMEM; + + mm_shrinker->count_objects = ttm_pool_shrinker_count; + mm_shrinker->scan_objects = ttm_pool_shrinker_scan; + mm_shrinker->seeks = 1; + + shrinker_register(mm_shrinker); + + return 0; } /** @@ -822,6 +844,6 @@ void ttm_pool_mgr_fini(void) ttm_pool_type_fini(&global_dma32_uncached[i]); } - unregister_shrinker(&mm_shrinker); + shrinker_free(mm_shrinker); WARN_ON(!list_empty(&shrinker_list)); } diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 83eb7f27db3d4..38f2cb259321e 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -542,7 +542,7 @@ struct cache_set { struct bio_set bio_split; /* For the btree cache */ - struct shrinker shrink; + struct shrinker *shrink; /* For the btree cache and anything allocation related */ struct mutex bucket_lock; diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 30d6973de258b..5f8e10e84f3c2 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -667,7 +667,7 @@ static int mca_reap(struct btree *b, unsigned int min_order, bool flush) static unsigned long bch_mca_scan(struct shrinker *shrink, struct shrink_control *sc) { - struct cache_set *c = container_of(shrink, struct cache_set, shrink); + struct cache_set *c = shrink->private_data; struct btree *b, *t; unsigned long i, nr = sc->nr_to_scan; unsigned long freed = 0; @@ -734,7 +734,7 @@ static unsigned long bch_mca_scan(struct shrinker *shrink, static unsigned long bch_mca_count(struct shrinker *shrink, struct shrink_control *sc) { - struct cache_set *c = container_of(shrink, struct cache_set, shrink); + struct cache_set *c = shrink->private_data; if (c->shrinker_disabled) return 0; @@ -752,8 +752,8 @@ void bch_btree_cache_free(struct cache_set *c) closure_init_stack(&cl); - if (c->shrink.list.next) - unregister_shrinker(&c->shrink); + if (c->shrink) + shrinker_free(c->shrink); mutex_lock(&c->bucket_lock); @@ -828,14 +828,19 @@ int bch_btree_cache_alloc(struct cache_set *c) c->verify_data = NULL; #endif - c->shrink.count_objects = bch_mca_count; - c->shrink.scan_objects = bch_mca_scan; - c->shrink.seeks = 4; - c->shrink.batch = c->btree_pages * 2; + c->shrink = shrinker_alloc(0, "md-bcache:%pU", c->set_uuid); + if (!c->shrink) { + pr_warn("bcache: %s: could not allocate shrinker\n", __func__); + return 0; + } + + c->shrink->count_objects = bch_mca_count; + c->shrink->scan_objects = bch_mca_scan; + c->shrink->seeks = 4; + c->shrink->batch = c->btree_pages * 2; + c->shrink->private_data = c; - if (register_shrinker(&c->shrink, "md-bcache:%pU", c->set_uuid)) - pr_warn("bcache: %s: could not register shrinker\n", - __func__); + shrinker_register(c->shrink); return 0; } diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index b3a34f3ac081c..c4633425acc4b 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -866,7 +866,8 @@ STORE(__bch_cache_set) sc.gfp_mask = GFP_KERNEL; sc.nr_to_scan = strtoul_or_return(buf); - c->shrink.scan_objects(&c->shrink, &sc); + if (c->shrink) + c->shrink->scan_objects(c->shrink, &sc); } sysfs_strtoul_clamp(congested_read_threshold_us, diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 24d5ade3462d7..ded85d8d4c9eb 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -1003,7 +1003,7 @@ struct dm_bufio_client { sector_t start; - struct shrinker shrinker; + struct shrinker *shrinker; struct work_struct shrink_work; atomic_long_t need_shrink; @@ -2414,7 +2414,7 @@ static unsigned long dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink { struct dm_bufio_client *c; - c = container_of(shrink, struct dm_bufio_client, shrinker); + c = shrink->private_data; atomic_long_add(sc->nr_to_scan, &c->need_shrink); queue_work(dm_bufio_wq, &c->shrink_work); @@ -2423,7 +2423,7 @@ static unsigned long dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink static unsigned long dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc) { - struct dm_bufio_client *c = container_of(shrink, struct dm_bufio_client, shrinker); + struct dm_bufio_client *c = shrink->private_data; unsigned long count = cache_total(&c->cache); unsigned long retain_target = get_retain_buffers(c); unsigned long queued_for_cleanup = atomic_long_read(&c->need_shrink); @@ -2540,14 +2540,20 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign INIT_WORK(&c->shrink_work, shrink_work); atomic_long_set(&c->need_shrink, 0); - c->shrinker.count_objects = dm_bufio_shrink_count; - c->shrinker.scan_objects = dm_bufio_shrink_scan; - c->shrinker.seeks = 1; - c->shrinker.batch = 0; - r = register_shrinker(&c->shrinker, "dm-bufio:(%u:%u)", - MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev)); - if (r) + c->shrinker = shrinker_alloc(0, "dm-bufio:(%u:%u)", + MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev)); + if (!c->shrinker) { + r = -ENOMEM; goto bad; + } + + c->shrinker->count_objects = dm_bufio_shrink_count; + c->shrinker->scan_objects = dm_bufio_shrink_scan; + c->shrinker->seeks = 1; + c->shrinker->batch = 0; + c->shrinker->private_data = c; + + shrinker_register(c->shrinker); mutex_lock(&dm_bufio_clients_lock); dm_bufio_client_count++; @@ -2587,7 +2593,7 @@ void dm_bufio_client_destroy(struct dm_bufio_client *c) drop_buffers(c); - unregister_shrinker(&c->shrinker); + shrinker_free(c->shrinker); flush_work(&c->shrink_work); mutex_lock(&dm_bufio_clients_lock); diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index 74078b90653f3..ac65d0652d69f 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c @@ -1823,7 +1823,7 @@ int dm_cache_metadata_abort(struct dm_cache_metadata *cmd) * Replacement block manager (new_bm) is created and old_bm destroyed outside of * cmd root_lock to avoid ABBA deadlock that would result (due to life-cycle of * shrinker associated with the block manager's bufio client vs cmd root_lock). - * - must take shrinker_rwsem without holding cmd->root_lock + * - must take shrinker_mutex without holding cmd->root_lock */ new_bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE << SECTOR_SHIFT, CACHE_MAX_CONCURRENT_LOCKS); diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index 9d3cca8e3dc9e..60a4dc01ea187 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -187,7 +187,7 @@ struct dmz_metadata { struct rb_root mblk_rbtree; struct list_head mblk_lru_list; struct list_head mblk_dirty_list; - struct shrinker mblk_shrinker; + struct shrinker *mblk_shrinker; /* Zone allocation management */ struct mutex map_lock; @@ -615,7 +615,7 @@ static unsigned long dmz_shrink_mblock_cache(struct dmz_metadata *zmd, static unsigned long dmz_mblock_shrinker_count(struct shrinker *shrink, struct shrink_control *sc) { - struct dmz_metadata *zmd = container_of(shrink, struct dmz_metadata, mblk_shrinker); + struct dmz_metadata *zmd = shrink->private_data; return atomic_read(&zmd->nr_mblks); } @@ -626,7 +626,7 @@ static unsigned long dmz_mblock_shrinker_count(struct shrinker *shrink, static unsigned long dmz_mblock_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc) { - struct dmz_metadata *zmd = container_of(shrink, struct dmz_metadata, mblk_shrinker); + struct dmz_metadata *zmd = shrink->private_data; unsigned long count; spin_lock(&zmd->mblk_lock); @@ -2936,19 +2936,23 @@ int dmz_ctr_metadata(struct dmz_dev *dev, int num_dev, */ zmd->min_nr_mblks = 2 + zmd->nr_map_blocks + zmd->zone_nr_bitmap_blocks * 16; zmd->max_nr_mblks = zmd->min_nr_mblks + 512; - zmd->mblk_shrinker.count_objects = dmz_mblock_shrinker_count; - zmd->mblk_shrinker.scan_objects = dmz_mblock_shrinker_scan; - zmd->mblk_shrinker.seeks = DEFAULT_SEEKS; /* Metadata cache shrinker */ - ret = register_shrinker(&zmd->mblk_shrinker, "dm-zoned-meta:(%u:%u)", - MAJOR(dev->bdev->bd_dev), - MINOR(dev->bdev->bd_dev)); - if (ret) { - dmz_zmd_err(zmd, "Register metadata cache shrinker failed"); + zmd->mblk_shrinker = shrinker_alloc(0, "dm-zoned-meta:(%u:%u)", + MAJOR(dev->bdev->bd_dev), + MINOR(dev->bdev->bd_dev)); + if (!zmd->mblk_shrinker) { + ret = -ENOMEM; + dmz_zmd_err(zmd, "Allocate metadata cache shrinker failed"); goto err; } + zmd->mblk_shrinker->count_objects = dmz_mblock_shrinker_count; + zmd->mblk_shrinker->scan_objects = dmz_mblock_shrinker_scan; + zmd->mblk_shrinker->private_data = zmd; + + shrinker_register(zmd->mblk_shrinker); + dmz_zmd_info(zmd, "DM-Zoned metadata version %d", zmd->sb_version); for (i = 0; i < zmd->nr_devs; i++) dmz_print_dev(zmd, i); @@ -2995,7 +2999,7 @@ int dmz_ctr_metadata(struct dmz_dev *dev, int num_dev, */ void dmz_dtr_metadata(struct dmz_metadata *zmd) { - unregister_shrinker(&zmd->mblk_shrinker); + shrinker_free(zmd->mblk_shrinker); dmz_cleanup_metadata(zmd); kfree(zmd); } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 6eb94e466f904..7f4bb4656ce88 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -7430,7 +7430,7 @@ static void free_conf(struct r5conf *conf) log_exit(conf); - unregister_shrinker(&conf->shrinker); + shrinker_free(conf->shrinker); free_thread_groups(conf); shrink_stripes(conf); raid5_free_percpu(conf); @@ -7478,7 +7478,7 @@ static int raid5_alloc_percpu(struct r5conf *conf) static unsigned long raid5_cache_scan(struct shrinker *shrink, struct shrink_control *sc) { - struct r5conf *conf = container_of(shrink, struct r5conf, shrinker); + struct r5conf *conf = shrink->private_data; unsigned long ret = SHRINK_STOP; if (mutex_trylock(&conf->cache_size_mutex)) { @@ -7499,7 +7499,7 @@ static unsigned long raid5_cache_scan(struct shrinker *shrink, static unsigned long raid5_cache_count(struct shrinker *shrink, struct shrink_control *sc) { - struct r5conf *conf = container_of(shrink, struct r5conf, shrinker); + struct r5conf *conf = shrink->private_data; int max_stripes = READ_ONCE(conf->max_nr_stripes); int min_stripes = READ_ONCE(conf->min_nr_stripes); @@ -7736,18 +7736,22 @@ static struct r5conf *setup_conf(struct mddev *mddev) * it reduces the queue depth and so can hurt throughput. * So set it rather large, scaled by number of devices. */ - conf->shrinker.seeks = DEFAULT_SEEKS * conf->raid_disks * 4; - conf->shrinker.scan_objects = raid5_cache_scan; - conf->shrinker.count_objects = raid5_cache_count; - conf->shrinker.batch = 128; - conf->shrinker.flags = 0; - ret = register_shrinker(&conf->shrinker, "md-raid5:%s", mdname(mddev)); - if (ret) { - pr_warn("md/raid:%s: couldn't register shrinker.\n", + conf->shrinker = shrinker_alloc(0, "md-raid5:%s", mdname(mddev)); + if (!conf->shrinker) { + ret = -ENOMEM; + pr_warn("md/raid:%s: couldn't allocate shrinker.\n", mdname(mddev)); goto abort; } + conf->shrinker->seeks = DEFAULT_SEEKS * conf->raid_disks * 4; + conf->shrinker->scan_objects = raid5_cache_scan; + conf->shrinker->count_objects = raid5_cache_count; + conf->shrinker->batch = 128; + conf->shrinker->private_data = conf; + + shrinker_register(conf->shrinker); + sprintf(pers_name, "raid%d", mddev->new_level); rcu_assign_pointer(conf->thread, md_register_thread(raid5d, mddev, pers_name)); diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index fd61715538800..f796b23c2b1be 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -666,7 +666,7 @@ struct r5conf { wait_queue_head_t wait_for_stripe; wait_queue_head_t wait_for_overlap; unsigned long cache_state; - struct shrinker shrinker; + struct shrinker *shrinker; int pool_size; /* number of disks in stripeheads in pool */ spinlock_t device_lock; struct disk_info *disks; diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c index ad41e9af8102d..6653fc53c951c 100644 --- a/drivers/misc/vmw_balloon.c +++ b/drivers/misc/vmw_balloon.c @@ -380,16 +380,7 @@ struct vmballoon { /** * @shrinker: shrinker interface that is used to avoid over-inflation. */ - struct shrinker shrinker; - - /** - * @shrinker_registered: whether the shrinker was registered. - * - * The shrinker interface does not handle gracefully the removal of - * shrinker that was not registered before. This indication allows to - * simplify the unregistration process. - */ - bool shrinker_registered; + struct shrinker *shrinker; }; static struct vmballoon balloon; @@ -1568,29 +1559,27 @@ static unsigned long vmballoon_shrinker_count(struct shrinker *shrinker, static void vmballoon_unregister_shrinker(struct vmballoon *b) { - if (b->shrinker_registered) - unregister_shrinker(&b->shrinker); - b->shrinker_registered = false; + shrinker_free(b->shrinker); + b->shrinker = NULL; } static int vmballoon_register_shrinker(struct vmballoon *b) { - int r; - /* Do nothing if the shrinker is not enabled */ if (!vmwballoon_shrinker_enable) return 0; - b->shrinker.scan_objects = vmballoon_shrinker_scan; - b->shrinker.count_objects = vmballoon_shrinker_count; - b->shrinker.seeks = DEFAULT_SEEKS; + b->shrinker = shrinker_alloc(0, "vmw-balloon"); + if (!b->shrinker) + return -ENOMEM; - r = register_shrinker(&b->shrinker, "vmw-balloon"); + b->shrinker->scan_objects = vmballoon_shrinker_scan; + b->shrinker->count_objects = vmballoon_shrinker_count; + b->shrinker->private_data = b; - if (r == 0) - b->shrinker_registered = true; + shrinker_register(b->shrinker); - return r; + return 0; } /* @@ -1882,7 +1871,7 @@ static int __init vmballoon_init(void) error = vmballoon_register_shrinker(&balloon); if (error) - goto fail; + return error; /* * Initialization of compaction must be done after the call to @@ -1904,9 +1893,6 @@ static int __init vmballoon_init(void) vmballoon_debugfs_init(&balloon); return 0; -fail: - vmballoon_unregister_shrinker(&balloon); - return error; } /* diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 6cb7ccb21b715..562972cec9225 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -111,7 +111,7 @@ struct virtio_balloon { struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR]; /* Shrinker to return free pages - VIRTIO_BALLOON_F_FREE_PAGE_HINT */ - struct shrinker shrinker; + struct shrinker *shrinker; /* OOM notifier to deflate on OOM - VIRTIO_BALLOON_F_DEFLATE_ON_OOM */ struct notifier_block oom_nb; @@ -818,8 +818,7 @@ static unsigned long shrink_free_pages(struct virtio_balloon *vb, static unsigned long virtio_balloon_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) { - struct virtio_balloon *vb = container_of(shrinker, - struct virtio_balloon, shrinker); + struct virtio_balloon *vb = shrinker->private_data; return shrink_free_pages(vb, sc->nr_to_scan); } @@ -827,8 +826,7 @@ static unsigned long virtio_balloon_shrinker_scan(struct shrinker *shrinker, static unsigned long virtio_balloon_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) { - struct virtio_balloon *vb = container_of(shrinker, - struct virtio_balloon, shrinker); + struct virtio_balloon *vb = shrinker->private_data; return vb->num_free_page_blocks * VIRTIO_BALLOON_HINT_BLOCK_PAGES; } @@ -849,16 +847,22 @@ static int virtio_balloon_oom_notify(struct notifier_block *nb, static void virtio_balloon_unregister_shrinker(struct virtio_balloon *vb) { - unregister_shrinker(&vb->shrinker); + shrinker_free(vb->shrinker); } static int virtio_balloon_register_shrinker(struct virtio_balloon *vb) { - vb->shrinker.scan_objects = virtio_balloon_shrinker_scan; - vb->shrinker.count_objects = virtio_balloon_shrinker_count; - vb->shrinker.seeks = DEFAULT_SEEKS; + vb->shrinker = shrinker_alloc(0, "virtio-balloon"); + if (!vb->shrinker) + return -ENOMEM; - return register_shrinker(&vb->shrinker, "virtio-balloon"); + vb->shrinker->scan_objects = virtio_balloon_shrinker_scan; + vb->shrinker->count_objects = virtio_balloon_shrinker_count; + vb->shrinker->private_data = vb; + + shrinker_register(vb->shrinker); + + return 0; } static int virtballoon_probe(struct virtio_device *vdev) diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c index da96c260e26b4..5ebb7233076f7 100644 --- a/drivers/xen/xenbus/xenbus_probe_backend.c +++ b/drivers/xen/xenbus/xenbus_probe_backend.c @@ -284,13 +284,9 @@ static unsigned long backend_shrink_memory_count(struct shrinker *shrinker, return 0; } -static struct shrinker backend_memory_shrinker = { - .count_objects = backend_shrink_memory_count, - .seeks = DEFAULT_SEEKS, -}; - static int __init xenbus_probe_backend_init(void) { + struct shrinker *backend_memory_shrinker; static struct notifier_block xenstore_notifier = { .notifier_call = backend_probe_and_watch }; @@ -305,8 +301,15 @@ static int __init xenbus_probe_backend_init(void) register_xenstore_notifier(&xenstore_notifier); - if (register_shrinker(&backend_memory_shrinker, "xen-backend")) - pr_warn("shrinker registration failed\n"); + backend_memory_shrinker = shrinker_alloc(0, "xen-backend"); + if (!backend_memory_shrinker) { + pr_warn("shrinker allocation failed\n"); + return 0; + } + + backend_memory_shrinker->count_objects = backend_shrink_memory_count; + + shrinker_register(backend_memory_shrinker); return 0; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 2045ac3d94c4d..55dfa57d51a78 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1521,7 +1521,7 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type, error = -EBUSY; } else { snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev); - shrinker_debugfs_rename(&s->s_shrink, "sb-%s:%s", fs_type->name, + shrinker_debugfs_rename(s->s_shrink, "sb-%s:%s", fs_type->name, s->s_id); btrfs_sb(s)->bdev_holder = fs_type; error = btrfs_fill_super(s, fs_devices, data); diff --git a/fs/erofs/utils.c b/fs/erofs/utils.c index 4256a85719a1d..5dea308764b45 100644 --- a/fs/erofs/utils.c +++ b/fs/erofs/utils.c @@ -264,19 +264,24 @@ static unsigned long erofs_shrink_scan(struct shrinker *shrink, return freed; } -static struct shrinker erofs_shrinker_info = { - .scan_objects = erofs_shrink_scan, - .count_objects = erofs_shrink_count, - .seeks = DEFAULT_SEEKS, -}; +static struct shrinker *erofs_shrinker_info; int __init erofs_init_shrinker(void) { - return register_shrinker(&erofs_shrinker_info, "erofs-shrinker"); + erofs_shrinker_info = shrinker_alloc(0, "erofs-shrinker"); + if (!erofs_shrinker_info) + return -ENOMEM; + + erofs_shrinker_info->count_objects = erofs_shrink_count; + erofs_shrinker_info->scan_objects = erofs_shrink_scan; + + shrinker_register(erofs_shrinker_info); + + return 0; } void erofs_exit_shrinker(void) { - unregister_shrinker(&erofs_shrinker_info); + shrinker_free(erofs_shrinker_info); } #endif /* !CONFIG_EROFS_FS_ZIP */ diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 317c2c0c015b2..a7f305fe77a11 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1665,7 +1665,7 @@ struct ext4_sb_info { __u32 s_csum_seed; /* Reclaim extents from extent status tree */ - struct shrinker s_es_shrinker; + struct shrinker *s_es_shrinker; struct list_head s_es_list; /* List of inodes with reclaimable extents */ long s_es_nr_inode; struct ext4_es_stats s_es_stats; diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index d9d5cfb9c951a..3a53dbb85e15b 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c @@ -1634,7 +1634,7 @@ static unsigned long ext4_es_count(struct shrinker *shrink, unsigned long nr; struct ext4_sb_info *sbi; - sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker); + sbi = shrink->private_data; nr = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt); trace_ext4_es_shrink_count(sbi->s_sb, sc->nr_to_scan, nr); return nr; @@ -1643,8 +1643,7 @@ static unsigned long ext4_es_count(struct shrinker *shrink, static unsigned long ext4_es_scan(struct shrinker *shrink, struct shrink_control *sc) { - struct ext4_sb_info *sbi = container_of(shrink, - struct ext4_sb_info, s_es_shrinker); + struct ext4_sb_info *sbi = shrink->private_data; int nr_to_scan = sc->nr_to_scan; int ret, nr_shrunk; @@ -1728,13 +1727,17 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi) if (err) goto err3; - sbi->s_es_shrinker.scan_objects = ext4_es_scan; - sbi->s_es_shrinker.count_objects = ext4_es_count; - sbi->s_es_shrinker.seeks = DEFAULT_SEEKS; - err = register_shrinker(&sbi->s_es_shrinker, "ext4-es:%s", - sbi->s_sb->s_id); - if (err) + sbi->s_es_shrinker = shrinker_alloc(0, "ext4-es:%s", sbi->s_sb->s_id); + if (!sbi->s_es_shrinker) { + err = -ENOMEM; goto err4; + } + + sbi->s_es_shrinker->scan_objects = ext4_es_scan; + sbi->s_es_shrinker->count_objects = ext4_es_count; + sbi->s_es_shrinker->private_data = sbi; + + shrinker_register(sbi->s_es_shrinker); return 0; err4: @@ -1754,7 +1757,7 @@ void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi) percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_misses); percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt); percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt); - unregister_shrinker(&sbi->s_es_shrinker); + shrinker_free(sbi->s_es_shrinker); } /* diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index cff8706958457..e486c0a9b9edf 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -93,11 +93,26 @@ int f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned long rate, #endif /* f2fs-wide shrinker description */ -static struct shrinker f2fs_shrinker_info = { - .scan_objects = f2fs_shrink_scan, - .count_objects = f2fs_shrink_count, - .seeks = DEFAULT_SEEKS, -}; +static struct shrinker *f2fs_shrinker_info; + +static int __init f2fs_init_shrinker(void) +{ + f2fs_shrinker_info = shrinker_alloc(0, "f2fs-shrinker"); + if (!f2fs_shrinker_info) + return -ENOMEM; + + f2fs_shrinker_info->count_objects = f2fs_shrink_count; + f2fs_shrinker_info->scan_objects = f2fs_shrink_scan; + + shrinker_register(f2fs_shrinker_info); + + return 0; +} + +static void f2fs_exit_shrinker(void) +{ + shrinker_free(f2fs_shrinker_info); +} enum { Opt_gc_background, @@ -4960,7 +4975,7 @@ static int __init init_f2fs_fs(void) err = f2fs_init_sysfs(); if (err) goto free_garbage_collection_cache; - err = register_shrinker(&f2fs_shrinker_info, "f2fs-shrinker"); + err = f2fs_init_shrinker(); if (err) goto free_sysfs; f2fs_create_root_stats(); @@ -5010,7 +5025,7 @@ static int __init init_f2fs_fs(void) f2fs_destroy_post_read_processing(); free_root_stats: f2fs_destroy_root_stats(); - unregister_shrinker(&f2fs_shrinker_info); + f2fs_exit_shrinker(); free_sysfs: f2fs_exit_sysfs(); free_garbage_collection_cache: @@ -5043,7 +5058,7 @@ static void __exit exit_f2fs_fs(void) f2fs_destroy_iostat_processing(); f2fs_destroy_post_read_processing(); f2fs_destroy_root_stats(); - unregister_shrinker(&f2fs_shrinker_info); + f2fs_exit_shrinker(); f2fs_exit_sysfs(); f2fs_destroy_garbage_collection_cache(); f2fs_destroy_extent_cache(); diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 9265262807f09..868dc8fa0785b 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -2101,11 +2101,7 @@ static unsigned long gfs2_glock_shrink_count(struct shrinker *shrink, return vfs_pressure_ratio(atomic_read(&lru_count)); } -static struct shrinker glock_shrinker = { - .seeks = DEFAULT_SEEKS, - .count_objects = gfs2_glock_shrink_count, - .scan_objects = gfs2_glock_shrink_scan, -}; +static struct shrinker *glock_shrinker; /** * glock_hash_walk - Call a function for glock in a hash bucket @@ -2530,13 +2526,18 @@ int __init gfs2_glock_init(void) return -ENOMEM; } - ret = register_shrinker(&glock_shrinker, "gfs2-glock"); - if (ret) { + glock_shrinker = shrinker_alloc(0, "gfs2-glock"); + if (!glock_shrinker) { destroy_workqueue(glock_workqueue); rhashtable_destroy(&gl_hash_table); - return ret; + return -ENOMEM; } + glock_shrinker->count_objects = gfs2_glock_shrink_count; + glock_shrinker->scan_objects = gfs2_glock_shrink_scan; + + shrinker_register(glock_shrinker); + for (i = 0; i < GLOCK_WAIT_TABLE_SIZE; i++) init_waitqueue_head(glock_wait_table + i); @@ -2545,7 +2546,7 @@ int __init gfs2_glock_init(void) void gfs2_glock_exit(void) { - unregister_shrinker(&glock_shrinker); + shrinker_free(glock_shrinker); rhashtable_destroy(&gl_hash_table); destroy_workqueue(glock_workqueue); } diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 66eb98b690a27..79be0cdc730c0 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -147,7 +147,7 @@ static int __init init_gfs2_fs(void) if (!gfs2_trans_cachep) goto fail_cachep8; - error = register_shrinker(&gfs2_qd_shrinker, "gfs2-qd"); + error = gfs2_qd_shrinker_init(); if (error) goto fail_shrinker; @@ -196,7 +196,7 @@ static int __init init_gfs2_fs(void) fail_wq2: destroy_workqueue(gfs2_recovery_wq); fail_wq1: - unregister_shrinker(&gfs2_qd_shrinker); + gfs2_qd_shrinker_exit(); fail_shrinker: kmem_cache_destroy(gfs2_trans_cachep); fail_cachep8: @@ -229,7 +229,7 @@ static int __init init_gfs2_fs(void) static void __exit exit_gfs2_fs(void) { - unregister_shrinker(&gfs2_qd_shrinker); + gfs2_qd_shrinker_exit(); gfs2_glock_exit(); gfs2_unregister_debugfs(); unregister_filesystem(&gfs2_fs_type); diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index c537e1d02cf3a..ae317d1951e7a 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -193,13 +193,26 @@ static unsigned long gfs2_qd_shrink_count(struct shrinker *shrink, return vfs_pressure_ratio(list_lru_shrink_count(&gfs2_qd_lru, sc)); } -struct shrinker gfs2_qd_shrinker = { - .count_objects = gfs2_qd_shrink_count, - .scan_objects = gfs2_qd_shrink_scan, - .seeks = DEFAULT_SEEKS, - .flags = SHRINKER_NUMA_AWARE, -}; +static struct shrinker *gfs2_qd_shrinker; + +int __init gfs2_qd_shrinker_init(void) +{ + gfs2_qd_shrinker = shrinker_alloc(SHRINKER_NUMA_AWARE, "gfs2-qd"); + if (!gfs2_qd_shrinker) + return -ENOMEM; + + gfs2_qd_shrinker->count_objects = gfs2_qd_shrink_count; + gfs2_qd_shrinker->scan_objects = gfs2_qd_shrink_scan; + + shrinker_register(gfs2_qd_shrinker); + return 0; +} + +void gfs2_qd_shrinker_exit(void) +{ + shrinker_free(gfs2_qd_shrinker); +} static u64 qd2index(struct gfs2_quota_data *qd) { diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h index e4a2fdb552cd7..b98164c94a34e 100644 --- a/fs/gfs2/quota.h +++ b/fs/gfs2/quota.h @@ -60,7 +60,8 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip, } extern const struct quotactl_ops gfs2_quotactl_ops; -extern struct shrinker gfs2_qd_shrinker; +int __init gfs2_qd_shrinker_init(void); +void gfs2_qd_shrinker_exit(void); extern struct list_lru gfs2_qd_lru; void __init gfs2_quota_hash_init(void); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 88ac2639ad61e..b6e8bea40dbb0 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1290,7 +1290,7 @@ static int jbd2_min_tag_size(void) static unsigned long jbd2_journal_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) { - journal_t *journal = container_of(shrink, journal_t, j_shrinker); + journal_t *journal = shrink->private_data; unsigned long nr_to_scan = sc->nr_to_scan; unsigned long nr_shrunk; unsigned long count; @@ -1316,7 +1316,7 @@ static unsigned long jbd2_journal_shrink_scan(struct shrinker *shrink, static unsigned long jbd2_journal_shrink_count(struct shrinker *shrink, struct shrink_control *sc) { - journal_t *journal = container_of(shrink, journal_t, j_shrinker); + journal_t *journal = shrink->private_data; unsigned long count; count = percpu_counter_read_positive(&journal->j_checkpoint_jh_count); @@ -1630,14 +1630,21 @@ static journal_t *journal_init_common(struct block_device *bdev, goto err_cleanup; journal->j_shrink_transaction = NULL; - journal->j_shrinker.scan_objects = jbd2_journal_shrink_scan; - journal->j_shrinker.count_objects = jbd2_journal_shrink_count; - journal->j_shrinker.seeks = DEFAULT_SEEKS; - journal->j_shrinker.batch = journal->j_max_transaction_buffers; - err = register_shrinker(&journal->j_shrinker, "jbd2-journal:(%u:%u)", - MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev)); - if (err) + + journal->j_shrinker = shrinker_alloc(0, "jbd2-journal:(%u:%u)", + MAJOR(bdev->bd_dev), + MINOR(bdev->bd_dev)); + if (!journal->j_shrinker) { + err = -ENOMEM; goto err_cleanup; + } + + journal->j_shrinker->scan_objects = jbd2_journal_shrink_scan; + journal->j_shrinker->count_objects = jbd2_journal_shrink_count; + journal->j_shrinker->batch = journal->j_max_transaction_buffers; + journal->j_shrinker->private_data = journal; + + shrinker_register(journal->j_shrinker); return journal; @@ -2217,9 +2224,9 @@ int jbd2_journal_destroy(journal_t *journal) brelse(journal->j_sb_buffer); } - if (journal->j_shrinker.flags & SHRINKER_REGISTERED) { + if (journal->j_shrinker) { percpu_counter_destroy(&journal->j_checkpoint_jh_count); - unregister_shrinker(&journal->j_shrinker); + shrinker_free(journal->j_shrinker); } if (journal->j_proc_entry) jbd2_stats_proc_exit(journal); diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index d1a2e44df3c05..63d768332151c 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -266,7 +266,7 @@ static int kernfs_fill_super(struct super_block *sb, struct kernfs_fs_context *k sb->s_time_gran = 1; /* sysfs dentries and inodes don't require IO to create */ - sb->s_shrink.seeks = 0; + sb->s_shrink->seeks = 0; /* get root inode, initialize and unlock it */ down_read(&kf_root->kernfs_rwsem); diff --git a/fs/mbcache.c b/fs/mbcache.c index 2a4b8b549e934..82aa7a35db26b 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c @@ -37,7 +37,7 @@ struct mb_cache { struct list_head c_list; /* Number of entries in cache */ unsigned long c_entry_count; - struct shrinker c_shrink; + struct shrinker *c_shrink; /* Work for shrinking when the cache has too many entries */ struct work_struct c_shrink_work; }; @@ -293,8 +293,7 @@ EXPORT_SYMBOL(mb_cache_entry_touch); static unsigned long mb_cache_count(struct shrinker *shrink, struct shrink_control *sc) { - struct mb_cache *cache = container_of(shrink, struct mb_cache, - c_shrink); + struct mb_cache *cache = shrink->private_data; return cache->c_entry_count; } @@ -333,8 +332,7 @@ static unsigned long mb_cache_shrink(struct mb_cache *cache, static unsigned long mb_cache_scan(struct shrinker *shrink, struct shrink_control *sc) { - struct mb_cache *cache = container_of(shrink, struct mb_cache, - c_shrink); + struct mb_cache *cache = shrink->private_data; return mb_cache_shrink(cache, sc->nr_to_scan); } @@ -377,15 +375,19 @@ struct mb_cache *mb_cache_create(int bucket_bits) for (i = 0; i < bucket_count; i++) INIT_HLIST_BL_HEAD(&cache->c_hash[i]); - cache->c_shrink.count_objects = mb_cache_count; - cache->c_shrink.scan_objects = mb_cache_scan; - cache->c_shrink.seeks = DEFAULT_SEEKS; - if (register_shrinker(&cache->c_shrink, "mbcache-shrinker")) { + cache->c_shrink = shrinker_alloc(0, "mbcache-shrinker"); + if (!cache->c_shrink) { kfree(cache->c_hash); kfree(cache); goto err_out; } + cache->c_shrink->count_objects = mb_cache_count; + cache->c_shrink->scan_objects = mb_cache_scan; + cache->c_shrink->private_data = cache; + + shrinker_register(cache->c_shrink); + INIT_WORK(&cache->c_shrink_work, mb_cache_shrink_worker); return cache; @@ -406,7 +408,7 @@ void mb_cache_destroy(struct mb_cache *cache) { struct mb_cache_entry *entry, *next; - unregister_shrinker(&cache->c_shrink); + shrinker_free(cache->c_shrink); /* * We don't bother with any locking. Cache must not be used at this diff --git a/fs/nfs/nfs42xattr.c b/fs/nfs/nfs42xattr.c index 911f634ba3da7..2ad66a8922f42 100644 --- a/fs/nfs/nfs42xattr.c +++ b/fs/nfs/nfs42xattr.c @@ -796,28 +796,9 @@ static unsigned long nfs4_xattr_cache_scan(struct shrinker *shrink, static unsigned long nfs4_xattr_entry_scan(struct shrinker *shrink, struct shrink_control *sc); -static struct shrinker nfs4_xattr_cache_shrinker = { - .count_objects = nfs4_xattr_cache_count, - .scan_objects = nfs4_xattr_cache_scan, - .seeks = DEFAULT_SEEKS, - .flags = SHRINKER_MEMCG_AWARE, -}; - -static struct shrinker nfs4_xattr_entry_shrinker = { - .count_objects = nfs4_xattr_entry_count, - .scan_objects = nfs4_xattr_entry_scan, - .seeks = DEFAULT_SEEKS, - .batch = 512, - .flags = SHRINKER_MEMCG_AWARE, -}; - -static struct shrinker nfs4_xattr_large_entry_shrinker = { - .count_objects = nfs4_xattr_entry_count, - .scan_objects = nfs4_xattr_entry_scan, - .seeks = 1, - .batch = 512, - .flags = SHRINKER_MEMCG_AWARE, -}; +static struct shrinker *nfs4_xattr_cache_shrinker; +static struct shrinker *nfs4_xattr_entry_shrinker; +static struct shrinker *nfs4_xattr_large_entry_shrinker; static enum lru_status cache_lru_isolate(struct list_head *item, @@ -943,7 +924,7 @@ nfs4_xattr_entry_scan(struct shrinker *shrink, struct shrink_control *sc) struct nfs4_xattr_entry *entry; struct list_lru *lru; - lru = (shrink == &nfs4_xattr_large_entry_shrinker) ? + lru = (shrink == nfs4_xattr_large_entry_shrinker) ? &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru; freed = list_lru_shrink_walk(lru, sc, entry_lru_isolate, &dispose); @@ -971,7 +952,7 @@ nfs4_xattr_entry_count(struct shrinker *shrink, struct shrink_control *sc) unsigned long count; struct list_lru *lru; - lru = (shrink == &nfs4_xattr_large_entry_shrinker) ? + lru = (shrink == nfs4_xattr_large_entry_shrinker) ? &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru; count = list_lru_shrink_count(lru, sc); @@ -991,18 +972,34 @@ static void nfs4_xattr_cache_init_once(void *p) INIT_LIST_HEAD(&cache->dispose); } -static int nfs4_xattr_shrinker_init(struct shrinker *shrinker, - struct list_lru *lru, const char *name) +typedef unsigned long (*count_objects_cb)(struct shrinker *s, + struct shrink_control *sc); +typedef unsigned long (*scan_objects_cb)(struct shrinker *s, + struct shrink_control *sc); + +static int __init nfs4_xattr_shrinker_init(struct shrinker **shrinker, + struct list_lru *lru, const char *name, + count_objects_cb count, + scan_objects_cb scan, long batch, int seeks) { - int ret = 0; + int ret; - ret = register_shrinker(shrinker, name); - if (ret) + *shrinker = shrinker_alloc(SHRINKER_MEMCG_AWARE, name); + if (!*shrinker) + return -ENOMEM; + + ret = list_lru_init_memcg(lru, *shrinker); + if (ret) { + shrinker_free(*shrinker); return ret; + } - ret = list_lru_init_memcg(lru, shrinker); - if (ret) - unregister_shrinker(shrinker); + (*shrinker)->count_objects = count; + (*shrinker)->scan_objects = scan; + (*shrinker)->batch = batch; + (*shrinker)->seeks = seeks; + + shrinker_register(*shrinker); return ret; } @@ -1010,7 +1007,7 @@ static int nfs4_xattr_shrinker_init(struct shrinker *shrinker, static void nfs4_xattr_shrinker_destroy(struct shrinker *shrinker, struct list_lru *lru) { - unregister_shrinker(shrinker); + shrinker_free(shrinker); list_lru_destroy(lru); } @@ -1026,27 +1023,31 @@ int __init nfs4_xattr_cache_init(void) return -ENOMEM; ret = nfs4_xattr_shrinker_init(&nfs4_xattr_cache_shrinker, - &nfs4_xattr_cache_lru, - "nfs-xattr_cache"); + &nfs4_xattr_cache_lru, "nfs-xattr_cache", + nfs4_xattr_cache_count, + nfs4_xattr_cache_scan, 0, DEFAULT_SEEKS); if (ret) goto out1; ret = nfs4_xattr_shrinker_init(&nfs4_xattr_entry_shrinker, - &nfs4_xattr_entry_lru, - "nfs-xattr_entry"); + &nfs4_xattr_entry_lru, "nfs-xattr_entry", + nfs4_xattr_entry_count, + nfs4_xattr_entry_scan, 512, DEFAULT_SEEKS); if (ret) goto out2; ret = nfs4_xattr_shrinker_init(&nfs4_xattr_large_entry_shrinker, &nfs4_xattr_large_entry_lru, - "nfs-xattr_large_entry"); + "nfs-xattr_large_entry", + nfs4_xattr_entry_count, + nfs4_xattr_entry_scan, 512, 1); if (!ret) return 0; - nfs4_xattr_shrinker_destroy(&nfs4_xattr_entry_shrinker, + nfs4_xattr_shrinker_destroy(nfs4_xattr_entry_shrinker, &nfs4_xattr_entry_lru); out2: - nfs4_xattr_shrinker_destroy(&nfs4_xattr_cache_shrinker, + nfs4_xattr_shrinker_destroy(nfs4_xattr_cache_shrinker, &nfs4_xattr_cache_lru); out1: kmem_cache_destroy(nfs4_xattr_cache_cachep); @@ -1056,11 +1057,11 @@ int __init nfs4_xattr_cache_init(void) void nfs4_xattr_cache_exit(void) { - nfs4_xattr_shrinker_destroy(&nfs4_xattr_large_entry_shrinker, + nfs4_xattr_shrinker_destroy(nfs4_xattr_large_entry_shrinker, &nfs4_xattr_large_entry_lru); - nfs4_xattr_shrinker_destroy(&nfs4_xattr_entry_shrinker, + nfs4_xattr_shrinker_destroy(nfs4_xattr_entry_shrinker, &nfs4_xattr_entry_lru); - nfs4_xattr_shrinker_destroy(&nfs4_xattr_cache_shrinker, + nfs4_xattr_shrinker_destroy(nfs4_xattr_cache_shrinker, &nfs4_xattr_cache_lru); kmem_cache_destroy(nfs4_xattr_cache_cachep); } diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 5c14c30a84c0e..437634e57b5be 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -130,11 +130,7 @@ static void nfs_ssc_unregister_ops(void) } #endif /* CONFIG_NFS_V4_2 */ -static struct shrinker acl_shrinker = { - .count_objects = nfs_access_cache_count, - .scan_objects = nfs_access_cache_scan, - .seeks = DEFAULT_SEEKS, -}; +static struct shrinker *acl_shrinker; /* * Register the NFS filesystems @@ -154,9 +150,18 @@ int __init register_nfs_fs(void) ret = nfs_register_sysctl(); if (ret < 0) goto error_2; - ret = register_shrinker(&acl_shrinker, "nfs-acl"); - if (ret < 0) + + acl_shrinker = shrinker_alloc(0, "nfs-acl"); + if (!acl_shrinker) { + ret = -ENOMEM; goto error_3; + } + + acl_shrinker->count_objects = nfs_access_cache_count; + acl_shrinker->scan_objects = nfs_access_cache_scan; + + shrinker_register(acl_shrinker); + #ifdef CONFIG_NFS_V4_2 nfs_ssc_register_ops(); #endif @@ -176,7 +181,7 @@ int __init register_nfs_fs(void) */ void __exit unregister_nfs_fs(void) { - unregister_shrinker(&acl_shrinker); + shrinker_free(acl_shrinker); nfs_unregister_sysctl(); unregister_nfs4_fs(); #ifdef CONFIG_NFS_V4_2 diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 0ff07d53931f2..2419cf3302105 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -523,11 +523,7 @@ nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc) return ret; } -static struct shrinker nfsd_file_shrinker = { - .scan_objects = nfsd_file_lru_scan, - .count_objects = nfsd_file_lru_count, - .seeks = 1, -}; +static struct shrinker *nfsd_file_shrinker; /** * nfsd_file_cond_queue - conditionally unhash and queue a nfsd_file @@ -748,12 +744,19 @@ nfsd_file_cache_init(void) goto out_err; } - ret = register_shrinker(&nfsd_file_shrinker, "nfsd-filecache"); - if (ret) { - pr_err("nfsd: failed to register nfsd_file_shrinker: %d\n", ret); + nfsd_file_shrinker = shrinker_alloc(0, "nfsd-filecache"); + if (!nfsd_file_shrinker) { + ret = -ENOMEM; + pr_err("nfsd: failed to allocate nfsd_file_shrinker\n"); goto out_lru; } + nfsd_file_shrinker->count_objects = nfsd_file_lru_count; + nfsd_file_shrinker->scan_objects = nfsd_file_lru_scan; + nfsd_file_shrinker->seeks = 1; + + shrinker_register(nfsd_file_shrinker); + ret = lease_register_notifier(&nfsd_file_lease_notifier); if (ret) { pr_err("nfsd: unable to register lease notifier: %d\n", ret); @@ -778,7 +781,7 @@ nfsd_file_cache_init(void) out_notifier: lease_unregister_notifier(&nfsd_file_lease_notifier); out_shrinker: - unregister_shrinker(&nfsd_file_shrinker); + shrinker_free(nfsd_file_shrinker); out_lru: list_lru_destroy(&nfsd_file_lru); out_err: @@ -895,7 +898,7 @@ nfsd_file_cache_shutdown(void) return; lease_unregister_notifier(&nfsd_file_lease_notifier); - unregister_shrinker(&nfsd_file_shrinker); + shrinker_free(nfsd_file_shrinker); /* * make sure all callers of nfsd_file_lru_cb are done before * calling nfsd_file_cache_purge diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 0ecc5527a8f1a..f8d1a7c13d108 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -197,7 +197,7 @@ struct nfsd_net { /* size of cache when we saw the longest hash chain */ unsigned int longest_chain_cachesize; - struct shrinker nfsd_reply_cache_shrinker; + struct shrinker *nfsd_reply_cache_shrinker; /* tracking server-to-server copy mounts */ spinlock_t nfsd_ssc_lock; @@ -215,7 +215,7 @@ struct nfsd_net { int nfs4_max_clients; atomic_t nfsd_courtesy_clients; - struct shrinker nfsd_client_shrinker; + struct shrinker *nfsd_client_shrinker; struct work_struct nfsd_shrinker_work; }; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index c0dd50b59ad16..06f1fb8ebc4eb 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4427,8 +4427,7 @@ static unsigned long nfsd4_state_shrinker_count(struct shrinker *shrink, struct shrink_control *sc) { int count; - struct nfsd_net *nn = container_of(shrink, - struct nfsd_net, nfsd_client_shrinker); + struct nfsd_net *nn = shrink->private_data; count = atomic_read(&nn->nfsd_courtesy_clients); if (!count) @@ -8245,12 +8244,16 @@ static int nfs4_state_create_net(struct net *net) INIT_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker); get_net(net); - nn->nfsd_client_shrinker.scan_objects = nfsd4_state_shrinker_scan; - nn->nfsd_client_shrinker.count_objects = nfsd4_state_shrinker_count; - nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS; - - if (register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client")) + nn->nfsd_client_shrinker = shrinker_alloc(0, "nfsd-client"); + if (!nn->nfsd_client_shrinker) goto err_shrinker; + + nn->nfsd_client_shrinker->scan_objects = nfsd4_state_shrinker_scan; + nn->nfsd_client_shrinker->count_objects = nfsd4_state_shrinker_count; + nn->nfsd_client_shrinker->private_data = nn; + + shrinker_register(nn->nfsd_client_shrinker); + return 0; err_shrinker: @@ -8352,7 +8355,7 @@ nfs4_state_shutdown_net(struct net *net) struct list_head *pos, *next, reaplist; struct nfsd_net *nn = net_generic(net, nfsd_net_id); - unregister_shrinker(&nn->nfsd_client_shrinker); + shrinker_free(nn->nfsd_client_shrinker); cancel_work_sync(&nn->nfsd_shrinker_work); spin_lock(&nn->client_lock); nn->client_tracking_active = false; diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index c52132ecb339d..622830a09ec37 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -180,26 +180,29 @@ int nfsd_reply_cache_init(struct nfsd_net *nn) { unsigned int hashsize; unsigned int i; - int status = 0; nn->max_drc_entries = nfsd_cache_size_limit(); atomic_set(&nn->num_drc_entries, 0); hashsize = nfsd_hashsize(nn->max_drc_entries); nn->maskbits = ilog2(hashsize); - nn->nfsd_reply_cache_shrinker.scan_objects = nfsd_reply_cache_scan; - nn->nfsd_reply_cache_shrinker.count_objects = nfsd_reply_cache_count; - nn->nfsd_reply_cache_shrinker.seeks = 1; - status = register_shrinker(&nn->nfsd_reply_cache_shrinker, - "nfsd-reply:%s", nn->nfsd_name); - if (status) - return status; - nn->drc_hashtbl = kvzalloc(array_size(hashsize, sizeof(*nn->drc_hashtbl)), GFP_KERNEL); if (!nn->drc_hashtbl) + return -ENOMEM; + + nn->nfsd_reply_cache_shrinker = shrinker_alloc(0, "nfsd-reply:%s", + nn->nfsd_name); + if (!nn->nfsd_reply_cache_shrinker) goto out_shrinker; + nn->nfsd_reply_cache_shrinker->scan_objects = nfsd_reply_cache_scan; + nn->nfsd_reply_cache_shrinker->count_objects = nfsd_reply_cache_count; + nn->nfsd_reply_cache_shrinker->seeks = 1; + nn->nfsd_reply_cache_shrinker->private_data = nn; + + shrinker_register(nn->nfsd_reply_cache_shrinker); + for (i = 0; i < hashsize; i++) { INIT_LIST_HEAD(&nn->drc_hashtbl[i].lru_head); spin_lock_init(&nn->drc_hashtbl[i].cache_lock); @@ -208,7 +211,7 @@ int nfsd_reply_cache_init(struct nfsd_net *nn) return 0; out_shrinker: - unregister_shrinker(&nn->nfsd_reply_cache_shrinker); + kvfree(nn->drc_hashtbl); printk(KERN_ERR "nfsd: failed to allocate reply cache\n"); return -ENOMEM; } @@ -218,7 +221,7 @@ void nfsd_reply_cache_shutdown(struct nfsd_net *nn) struct nfsd_cacherep *rp; unsigned int i; - unregister_shrinker(&nn->nfsd_reply_cache_shrinker); + shrinker_free(nn->nfsd_reply_cache_shrinker); for (i = 0; i < nn->drc_hashsize; i++) { struct list_head *head = &nn->drc_hashtbl[i].lru_head; @@ -302,8 +305,7 @@ nfsd_prune_bucket_locked(struct nfsd_net *nn, struct nfsd_drc_bucket *b, static unsigned long nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc) { - struct nfsd_net *nn = container_of(shrink, - struct nfsd_net, nfsd_reply_cache_shrinker); + struct nfsd_net *nn = shrink->private_data; return atomic_read(&nn->num_drc_entries); } @@ -322,8 +324,7 @@ nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc) static unsigned long nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc) { - struct nfsd_net *nn = container_of(shrink, - struct nfsd_net, nfsd_reply_cache_shrinker); + struct nfsd_net *nn = shrink->private_data; unsigned long freed = 0; LIST_HEAD(dispose); unsigned int i; diff --git a/fs/proc/root.c b/fs/proc/root.c index 9191248f2dacb..b55dbc70287b4 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -188,7 +188,7 @@ static int proc_fill_super(struct super_block *s, struct fs_context *fc) s->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH; /* procfs dentries and inodes don't require IO to create */ - s->s_shrink.seeks = 0; + s->s_shrink->seeks = 0; pde_get(&proc_root); root_inode = proc_get_inode(s, &proc_root); diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 7562ee4ff2929..4c69ba48c774e 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -833,12 +833,6 @@ dqcache_shrink_count(struct shrinker *shrink, struct shrink_control *sc) percpu_counter_read_positive(&dqstats.counter[DQST_FREE_DQUOTS])); } -static struct shrinker dqcache_shrinker = { - .count_objects = dqcache_shrink_count, - .scan_objects = dqcache_shrink_scan, - .seeks = DEFAULT_SEEKS, -}; - /* * Safely release dquot and put reference to dquot. */ @@ -3034,6 +3028,7 @@ static int __init dquot_init(void) { int i, ret; unsigned long nr_hash, order; + struct shrinker *dqcache_shrinker; printk(KERN_NOTICE "VFS: Disk quotas %s\n", __DQUOT_VERSION__); @@ -3068,8 +3063,14 @@ static int __init dquot_init(void) pr_info("VFS: Dquot-cache hash table entries: %ld (order %ld," " %ld bytes)\n", nr_hash, order, (PAGE_SIZE << order)); - if (register_shrinker(&dqcache_shrinker, "dquota-cache")) - panic("Cannot register dquot shrinker"); + dqcache_shrinker = shrinker_alloc(0, "dquota-cache"); + if (!dqcache_shrinker) + panic("Cannot allocate dquot shrinker"); + + dqcache_shrinker->count_objects = dqcache_shrink_count; + dqcache_shrinker->scan_objects = dqcache_shrink_scan; + + shrinker_register(dqcache_shrinker); quota_unbound_wq = alloc_workqueue("quota_events_unbound", WQ_UNBOUND | WQ_MEM_RECLAIM, WQ_MAX_ACTIVE); diff --git a/fs/super.c b/fs/super.c index b142e71eb8dfd..e1cc7e9e534c0 100644 --- a/fs/super.c +++ b/fs/super.c @@ -178,7 +178,7 @@ static void super_wake(struct super_block *sb, unsigned int flag) * One thing we have to be careful of with a per-sb shrinker is that we don't * drop the last active reference to the superblock from within the shrinker. * If that happens we could trigger unregistering the shrinker from within the - * shrinker path and that leads to deadlock on the shrinker_rwsem. Hence we + * shrinker path and that leads to deadlock on the shrinker_mutex. Hence we * take a passive reference to the superblock to avoid this from occurring. */ static unsigned long super_cache_scan(struct shrinker *shrink, @@ -191,7 +191,7 @@ static unsigned long super_cache_scan(struct shrinker *shrink, long dentries; long inodes; - sb = container_of(shrink, struct super_block, s_shrink); + sb = shrink->private_data; /* * Deadlock avoidance. We may hold various FS locks, and we don't want @@ -244,7 +244,7 @@ static unsigned long super_cache_count(struct shrinker *shrink, struct super_block *sb; long total_objects = 0; - sb = container_of(shrink, struct super_block, s_shrink); + sb = shrink->private_data; /* * We don't call super_trylock_shared() here as it is a scalability @@ -306,7 +306,7 @@ static void destroy_unused_super(struct super_block *s) security_sb_free(s); put_user_ns(s->s_user_ns); kfree(s->s_subtype); - free_prealloced_shrinker(&s->s_shrink); + shrinker_free(s->s_shrink); /* no delays needed */ destroy_super_work(&s->destroy_work); } @@ -383,16 +383,19 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags, s->s_time_min = TIME64_MIN; s->s_time_max = TIME64_MAX; - s->s_shrink.seeks = DEFAULT_SEEKS; - s->s_shrink.scan_objects = super_cache_scan; - s->s_shrink.count_objects = super_cache_count; - s->s_shrink.batch = 1024; - s->s_shrink.flags = SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE; - if (prealloc_shrinker(&s->s_shrink, "sb-%s", type->name)) + s->s_shrink = shrinker_alloc(SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE, + "sb-%s", type->name); + if (!s->s_shrink) goto fail; - if (list_lru_init_memcg(&s->s_dentry_lru, &s->s_shrink)) + + s->s_shrink->scan_objects = super_cache_scan; + s->s_shrink->count_objects = super_cache_count; + s->s_shrink->batch = 1024; + s->s_shrink->private_data = s; + + if (list_lru_init_memcg(&s->s_dentry_lru, s->s_shrink)) goto fail; - if (list_lru_init_memcg(&s->s_inode_lru, &s->s_shrink)) + if (list_lru_init_memcg(&s->s_inode_lru, s->s_shrink)) goto fail; return s; @@ -477,7 +480,7 @@ void deactivate_locked_super(struct super_block *s) { struct file_system_type *fs = s->s_type; if (atomic_dec_and_test(&s->s_active)) { - unregister_shrinker(&s->s_shrink); + shrinker_free(s->s_shrink); fs->kill_sb(s); kill_super_notify(s); @@ -829,7 +832,7 @@ struct super_block *sget_fc(struct fs_context *fc, hlist_add_head(&s->s_instances, &s->s_type->fs_supers); spin_unlock(&sb_lock); get_filesystem(s->s_type); - register_shrinker_prepared(&s->s_shrink); + shrinker_register(s->s_shrink); return s; share_extant_sb: @@ -912,7 +915,7 @@ struct super_block *sget(struct file_system_type *type, hlist_add_head(&s->s_instances, &type->fs_supers); spin_unlock(&sb_lock); get_filesystem(type); - register_shrinker_prepared(&s->s_shrink); + shrinker_register(s->s_shrink); return s; } EXPORT_SYMBOL(sget); @@ -1536,7 +1539,7 @@ int setup_bdev_super(struct super_block *sb, int sb_flags, mutex_unlock(&bdev->bd_fsfreeze_mutex); snprintf(sb->s_id, sizeof(sb->s_id), "%pg", bdev); - shrinker_debugfs_rename(&sb->s_shrink, "sb-%s:%s", sb->s_type->name, + shrinker_debugfs_rename(sb->s_shrink, "sb-%s:%s", sb->s_type->name, sb->s_id); sb_set_blocksize(sb, block_size(bdev)); return 0; diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 78f31bddff746..86643605e149f 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -54,11 +54,7 @@ module_param_cb(default_version, &ubifs_default_version_ops, &ubifs_default_vers static struct kmem_cache *ubifs_inode_slab; /* UBIFS TNC shrinker description */ -static struct shrinker ubifs_shrinker_info = { - .scan_objects = ubifs_shrink_scan, - .count_objects = ubifs_shrink_count, - .seeks = DEFAULT_SEEKS, -}; +static struct shrinker *ubifs_shrinker_info; /** * validate_inode - validate inode. @@ -2373,7 +2369,7 @@ static void inode_slab_ctor(void *obj) static int __init ubifs_init(void) { - int err; + int err = -ENOMEM; BUILD_BUG_ON(sizeof(struct ubifs_ch) != 24); @@ -2439,10 +2435,15 @@ static int __init ubifs_init(void) if (!ubifs_inode_slab) return -ENOMEM; - err = register_shrinker(&ubifs_shrinker_info, "ubifs-slab"); - if (err) + ubifs_shrinker_info = shrinker_alloc(0, "ubifs-slab"); + if (!ubifs_shrinker_info) goto out_slab; + ubifs_shrinker_info->count_objects = ubifs_shrink_count; + ubifs_shrinker_info->scan_objects = ubifs_shrink_scan; + + shrinker_register(ubifs_shrinker_info); + err = ubifs_compressors_init(); if (err) goto out_shrinker; @@ -2467,7 +2468,7 @@ static int __init ubifs_init(void) dbg_debugfs_exit(); ubifs_compressors_exit(); out_shrinker: - unregister_shrinker(&ubifs_shrinker_info); + shrinker_free(ubifs_shrinker_info); out_slab: kmem_cache_destroy(ubifs_inode_slab); return err; @@ -2483,7 +2484,7 @@ static void __exit ubifs_exit(void) dbg_debugfs_exit(); ubifs_sysfs_exit(); ubifs_compressors_exit(); - unregister_shrinker(&ubifs_shrinker_info); + shrinker_free(ubifs_shrinker_info); /* * Make sure all delayed rcu free inodes are flushed before we diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 1181108f80746..9c9a3a5548b8a 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1913,8 +1913,7 @@ xfs_buftarg_shrink_scan( struct shrinker *shrink, struct shrink_control *sc) { - struct xfs_buftarg *btp = container_of(shrink, - struct xfs_buftarg, bt_shrinker); + struct xfs_buftarg *btp = shrink->private_data; LIST_HEAD(dispose); unsigned long freed; @@ -1936,8 +1935,7 @@ xfs_buftarg_shrink_count( struct shrinker *shrink, struct shrink_control *sc) { - struct xfs_buftarg *btp = container_of(shrink, - struct xfs_buftarg, bt_shrinker); + struct xfs_buftarg *btp = shrink->private_data; return list_lru_shrink_count(&btp->bt_lru, sc); } @@ -1947,7 +1945,7 @@ xfs_free_buftarg( { struct block_device *bdev = btp->bt_bdev; - unregister_shrinker(&btp->bt_shrinker); + shrinker_free(btp->bt_shrinker); ASSERT(percpu_counter_sum(&btp->bt_io_count) == 0); percpu_counter_destroy(&btp->bt_io_count); list_lru_destroy(&btp->bt_lru); @@ -2031,13 +2029,17 @@ xfs_alloc_buftarg( if (percpu_counter_init(&btp->bt_io_count, 0, GFP_KERNEL)) goto error_lru; - btp->bt_shrinker.count_objects = xfs_buftarg_shrink_count; - btp->bt_shrinker.scan_objects = xfs_buftarg_shrink_scan; - btp->bt_shrinker.seeks = DEFAULT_SEEKS; - btp->bt_shrinker.flags = SHRINKER_NUMA_AWARE; - if (register_shrinker(&btp->bt_shrinker, "xfs-buf:%s", - mp->m_super->s_id)) + btp->bt_shrinker = shrinker_alloc(SHRINKER_NUMA_AWARE, "xfs-buf:%s", + mp->m_super->s_id); + if (!btp->bt_shrinker) goto error_pcpu; + + btp->bt_shrinker->count_objects = xfs_buftarg_shrink_count; + btp->bt_shrinker->scan_objects = xfs_buftarg_shrink_scan; + btp->bt_shrinker->private_data = btp; + + shrinker_register(btp->bt_shrinker); + return btp; error_pcpu: diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 5896b58c5f4db..b21ec7234dbe7 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -108,7 +108,7 @@ typedef struct xfs_buftarg { size_t bt_logical_sectormask; /* LRU control structures */ - struct shrinker bt_shrinker; + struct shrinker *bt_shrinker; struct list_lru bt_lru; struct percpu_counter bt_io_count; diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index c54a7c60e0633..f79b35755a398 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -2176,8 +2176,7 @@ xfs_inodegc_shrinker_count( struct shrinker *shrink, struct shrink_control *sc) { - struct xfs_mount *mp = container_of(shrink, struct xfs_mount, - m_inodegc_shrinker); + struct xfs_mount *mp = shrink->private_data; struct xfs_inodegc *gc; int cpu; @@ -2198,8 +2197,7 @@ xfs_inodegc_shrinker_scan( struct shrinker *shrink, struct shrink_control *sc) { - struct xfs_mount *mp = container_of(shrink, struct xfs_mount, - m_inodegc_shrinker); + struct xfs_mount *mp = shrink->private_data; struct xfs_inodegc *gc; int cpu; bool no_items = true; @@ -2235,13 +2233,19 @@ int xfs_inodegc_register_shrinker( struct xfs_mount *mp) { - struct shrinker *shrink = &mp->m_inodegc_shrinker; + mp->m_inodegc_shrinker = shrinker_alloc(SHRINKER_NONSLAB, + "xfs-inodegc:%s", + mp->m_super->s_id); + if (!mp->m_inodegc_shrinker) + return -ENOMEM; + + mp->m_inodegc_shrinker->count_objects = xfs_inodegc_shrinker_count; + mp->m_inodegc_shrinker->scan_objects = xfs_inodegc_shrinker_scan; + mp->m_inodegc_shrinker->seeks = 0; + mp->m_inodegc_shrinker->batch = XFS_INODEGC_SHRINKER_BATCH; + mp->m_inodegc_shrinker->private_data = mp; - shrink->count_objects = xfs_inodegc_shrinker_count; - shrink->scan_objects = xfs_inodegc_shrinker_scan; - shrink->seeks = 0; - shrink->flags = SHRINKER_NONSLAB; - shrink->batch = XFS_INODEGC_SHRINKER_BATCH; + shrinker_register(mp->m_inodegc_shrinker); - return register_shrinker(shrink, "xfs-inodegc:%s", mp->m_super->s_id); + return 0; } diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 44c686d57e7bb..e026c0ee60134 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1022,7 +1022,7 @@ xfs_mountfs( out_log_dealloc: xfs_log_mount_cancel(mp); out_inodegc_shrinker: - unregister_shrinker(&mp->m_inodegc_shrinker); + shrinker_free(mp->m_inodegc_shrinker); out_fail_wait: if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) xfs_buftarg_drain(mp->m_logdev_targp); @@ -1105,7 +1105,7 @@ xfs_unmountfs( #if defined(DEBUG) xfs_errortag_clearall(mp); #endif - unregister_shrinker(&mp->m_inodegc_shrinker); + shrinker_free(mp->m_inodegc_shrinker); xfs_free_perag(mp); xfs_errortag_del(mp); diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index d19cca099bc3a..219681d29fbc4 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -219,7 +219,7 @@ typedef struct xfs_mount { atomic_t m_agirotor; /* last ag dir inode alloced */ /* Memory shrinker to throttle and reprioritize inodegc */ - struct shrinker m_inodegc_shrinker; + struct shrinker *m_inodegc_shrinker; /* * Workqueue item so that we can coalesce multiple inode flush attempts * into a single flush. diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 086e78a6143aa..94a7932ac5700 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -504,8 +504,7 @@ xfs_qm_shrink_scan( struct shrinker *shrink, struct shrink_control *sc) { - struct xfs_quotainfo *qi = container_of(shrink, - struct xfs_quotainfo, qi_shrinker); + struct xfs_quotainfo *qi = shrink->private_data; struct xfs_qm_isolate isol; unsigned long freed; int error; @@ -539,8 +538,7 @@ xfs_qm_shrink_count( struct shrinker *shrink, struct shrink_control *sc) { - struct xfs_quotainfo *qi = container_of(shrink, - struct xfs_quotainfo, qi_shrinker); + struct xfs_quotainfo *qi = shrink->private_data; return list_lru_shrink_count(&qi->qi_lru, sc); } @@ -680,15 +678,18 @@ xfs_qm_init_quotainfo( if (XFS_IS_PQUOTA_ON(mp)) xfs_qm_set_defquota(mp, XFS_DQTYPE_PROJ, qinf); - qinf->qi_shrinker.count_objects = xfs_qm_shrink_count; - qinf->qi_shrinker.scan_objects = xfs_qm_shrink_scan; - qinf->qi_shrinker.seeks = DEFAULT_SEEKS; - qinf->qi_shrinker.flags = SHRINKER_NUMA_AWARE; - - error = register_shrinker(&qinf->qi_shrinker, "xfs-qm:%s", - mp->m_super->s_id); - if (error) + qinf->qi_shrinker = shrinker_alloc(SHRINKER_NUMA_AWARE, "xfs-qm:%s", + mp->m_super->s_id); + if (!qinf->qi_shrinker) { + error = -ENOMEM; goto out_free_inos; + } + + qinf->qi_shrinker->count_objects = xfs_qm_shrink_count; + qinf->qi_shrinker->scan_objects = xfs_qm_shrink_scan; + qinf->qi_shrinker->private_data = qinf; + + shrinker_register(qinf->qi_shrinker); return 0; @@ -718,7 +719,7 @@ xfs_qm_destroy_quotainfo( qi = mp->m_quotainfo; ASSERT(qi != NULL); - unregister_shrinker(&qi->qi_shrinker); + shrinker_free(qi->qi_shrinker); list_lru_destroy(&qi->qi_lru); xfs_qm_destroy_quotainos(qi); mutex_destroy(&qi->qi_tree_lock); diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h index 9683f0457d192..d5c9fc4ba591e 100644 --- a/fs/xfs/xfs_qm.h +++ b/fs/xfs/xfs_qm.h @@ -63,7 +63,7 @@ struct xfs_quotainfo { struct xfs_def_quota qi_usr_default; struct xfs_def_quota qi_grp_default; struct xfs_def_quota qi_prj_default; - struct shrinker qi_shrinker; + struct shrinker *qi_shrinker; /* Minimum and maximum quota expiration timestamp values. */ time64_t qi_expiry_min; diff --git a/include/linux/fs.h b/include/linux/fs.h index 9d903573358e3..d104f393cd276 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1300,7 +1300,7 @@ struct super_block { const struct dentry_operations *s_d_op; /* default d_op for dentries */ - struct shrinker s_shrink; /* per-sb shrinker handle */ + struct shrinker *s_shrink; /* per-sb shrinker handle */ /* Number of inodes with nlink == 0 but still referenced */ atomic_long_t s_remove_count; diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 476e92a02bd29..a1fd41415342e 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -899,7 +899,7 @@ struct journal_s * Journal head shrinker, reclaim buffer's journal head which * has been written back. */ - struct shrinker j_shrinker; + struct shrinker *j_shrinker; /** * @j_checkpoint_jh_count: diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 8ceda226fc2bf..99566b12a20f1 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -21,6 +21,7 @@ #include #include #include +#include #include struct mem_cgroup; @@ -95,20 +96,6 @@ struct mem_cgroup_reclaim_iter { DEEPIN_KABI_RESERVE(3) }; -/* - * Bitmap and deferred work of shrinker::id corresponding to memcg-aware - * shrinkers, which have elements charged to this memcg. - */ -struct shrinker_info { - struct rcu_head rcu; - atomic_long_t *nr_deferred; - unsigned long *map; - int map_nr_max; - DEEPIN_KABI_RESERVE(1) - DEEPIN_KABI_RESERVE(2) - DEEPIN_KABI_RESERVE(3) -}; - struct lruvec_stats_percpu { /* Local (CPU and cgroup) state */ long state[NR_VM_NODE_STAT_ITEMS]; diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index 3aa2119c3bc3c..3b034a8c35b18 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -4,8 +4,27 @@ #include #include +#include +#include #include +#define SHRINKER_UNIT_BITS BITS_PER_LONG + +/* + * Bitmap and deferred work of shrinker::id corresponding to memcg-aware + * shrinkers, which have elements charged to the memcg. + */ +struct shrinker_info_unit { + atomic_long_t nr_deferred[SHRINKER_UNIT_BITS]; + DECLARE_BITMAP(map, SHRINKER_UNIT_BITS); +}; + +struct shrinker_info { + struct rcu_head rcu; + int map_nr_max; + struct shrinker_info_unit *unit[]; +}; + /* * This struct is used to pass information from page reclaim to the shrinkers. * We consolidate the values for easier extension later. @@ -74,6 +93,19 @@ struct shrinker { int seeks; /* seeks to recreate an obj */ unsigned flags; + /* + * The reference count of this shrinker. Registered shrinker have an + * initial refcount of 1, then the lookup operations are now allowed + * to use it via shrinker_try_get(). Later in the unregistration step, + * the initial refcount will be discarded, and will free the shrinker + * asynchronously via RCU after its refcount reaches 0. + */ + refcount_t refcount; + struct completion done; /* use to wait for refcount to reach 0 */ + struct rcu_head rcu; + + void *private_data; + /* These are for internal use */ struct list_head list; #ifdef CONFIG_MEMCG @@ -93,24 +125,34 @@ struct shrinker { }; #define DEFAULT_SEEKS 2 /* A good number if you don't know better. */ -/* Flags */ -#define SHRINKER_REGISTERED (1 << 0) -#define SHRINKER_NUMA_AWARE (1 << 1) -#define SHRINKER_MEMCG_AWARE (1 << 2) +/* Internal flags */ +#define SHRINKER_REGISTERED BIT(0) +#define SHRINKER_ALLOCATED BIT(1) + +/* Flags for users to use */ +#define SHRINKER_NUMA_AWARE BIT(2) +#define SHRINKER_MEMCG_AWARE BIT(3) /* * It just makes sense when the shrinker is also MEMCG_AWARE for now, * non-MEMCG_AWARE shrinker should not have this flag set. */ -#define SHRINKER_NONSLAB (1 << 3) - -extern int __printf(2, 3) prealloc_shrinker(struct shrinker *shrinker, - const char *fmt, ...); -extern void register_shrinker_prepared(struct shrinker *shrinker); -extern int __printf(2, 3) register_shrinker(struct shrinker *shrinker, - const char *fmt, ...); -extern void unregister_shrinker(struct shrinker *shrinker); -extern void free_prealloced_shrinker(struct shrinker *shrinker); -extern void synchronize_shrinkers(void); +#define SHRINKER_NONSLAB BIT(4) + +__printf(2, 3) +struct shrinker *shrinker_alloc(unsigned int flags, const char *fmt, ...); +void shrinker_register(struct shrinker *shrinker); +void shrinker_free(struct shrinker *shrinker); + +static inline bool shrinker_try_get(struct shrinker *shrinker) +{ + return refcount_inc_not_zero(&shrinker->refcount); +} + +static inline void shrinker_put(struct shrinker *shrinker) +{ + if (refcount_dec_and_test(&shrinker->refcount)) + complete(&shrinker->done); +} #ifdef CONFIG_SHRINKER_DEBUG extern int __printf(2, 3) shrinker_debugfs_rename(struct shrinker *shrinker, diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index c9b72f4171e09..cf540eb2aa592 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3542,13 +3542,6 @@ kfree_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) return freed == 0 ? SHRINK_STOP : freed; } -static struct shrinker kfree_rcu_shrinker = { - .count_objects = kfree_rcu_shrink_count, - .scan_objects = kfree_rcu_shrink_scan, - .batch = 0, - .seeks = DEFAULT_SEEKS, -}; - void __init kfree_rcu_scheduler_running(void) { int cpu; @@ -5041,6 +5034,7 @@ static void __init kfree_rcu_batch_init(void) { int cpu; int i, j; + struct shrinker *kfree_rcu_shrinker; /* Clamp it to [0:100] seconds interval. */ if (rcu_delay_page_cache_fill_msec < 0 || @@ -5072,8 +5066,17 @@ static void __init kfree_rcu_batch_init(void) INIT_DELAYED_WORK(&krcp->page_cache_work, fill_page_cache_func); krcp->initialized = true; } - if (register_shrinker(&kfree_rcu_shrinker, "rcu-kfree")) - pr_err("Failed to register kfree_rcu() shrinker!\n"); + + kfree_rcu_shrinker = shrinker_alloc(0, "rcu-kfree"); + if (!kfree_rcu_shrinker) { + pr_err("Failed to allocate kfree_rcu() shrinker!\n"); + return; + } + + kfree_rcu_shrinker->count_objects = kfree_rcu_shrink_count; + kfree_rcu_shrinker->scan_objects = kfree_rcu_shrink_scan; + + shrinker_register(kfree_rcu_shrinker); } void __init rcu_init(void) diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h index a3d613f063a8f..7617fd9a35b80 100644 --- a/kernel/rcu/tree_nocb.h +++ b/kernel/rcu/tree_nocb.h @@ -1383,13 +1383,6 @@ lazy_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) return count ? count : SHRINK_STOP; } - -static struct shrinker lazy_rcu_shrinker = { - .count_objects = lazy_rcu_shrink_count, - .scan_objects = lazy_rcu_shrink_scan, - .batch = 0, - .seeks = DEFAULT_SEEKS, -}; #endif // #ifdef CONFIG_RCU_LAZY void __init rcu_init_nohz(void) @@ -1397,6 +1390,7 @@ void __init rcu_init_nohz(void) int cpu; struct rcu_data *rdp; const struct cpumask *cpumask = NULL; + struct shrinker * __maybe_unused lazy_rcu_shrinker; #if defined(CONFIG_NO_HZ_FULL) if (tick_nohz_full_running && !cpumask_empty(tick_nohz_full_mask)) @@ -1423,8 +1417,15 @@ void __init rcu_init_nohz(void) return; #ifdef CONFIG_RCU_LAZY - if (register_shrinker(&lazy_rcu_shrinker, "rcu-lazy")) - pr_err("Failed to register lazy_rcu shrinker!\n"); + lazy_rcu_shrinker = shrinker_alloc(0, "rcu-lazy"); + if (!lazy_rcu_shrinker) { + pr_err("Failed to allocate lazy_rcu shrinker!\n"); + } else { + lazy_rcu_shrinker->count_objects = lazy_rcu_shrink_count; + lazy_rcu_shrinker->scan_objects = lazy_rcu_shrink_scan; + + shrinker_register(lazy_rcu_shrinker); + } #endif // #ifdef CONFIG_RCU_LAZY if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) { diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 54ef65277379a..fcc5644c8a6d4 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -66,7 +66,11 @@ unsigned long transparent_hugepage_flags __read_mostly = (1<count_objects = shrink_huge_zero_page_count; + huge_zero_page_shrinker->scan_objects = shrink_huge_zero_page_scan; + shrinker_register(huge_zero_page_shrinker); + + deferred_split_shrinker->count_objects = deferred_split_count; + deferred_split_shrinker->scan_objects = deferred_split_scan; + shrinker_register(deferred_split_shrinker); + + return 0; +} + +static void __init thp_shrinker_exit(void) +{ + shrinker_free(huge_zero_page_shrinker); + shrinker_free(deferred_split_shrinker); +} + static int __init hugepage_init(void) { int err; @@ -478,12 +510,9 @@ static int __init hugepage_init(void) if (err) goto err_slab; - err = register_shrinker(&huge_zero_page_shrinker, "thp-zero"); - if (err) - goto err_hzp_shrinker; - err = register_shrinker(&deferred_split_shrinker, "thp-deferred_split"); + err = thp_shrinker_init(); if (err) - goto err_split_shrinker; + goto err_shrinker; /* * By default disable transparent hugepages on smaller systems, @@ -501,10 +530,8 @@ static int __init hugepage_init(void) return 0; err_khugepaged: - unregister_shrinker(&deferred_split_shrinker); -err_split_shrinker: - unregister_shrinker(&huge_zero_page_shrinker); -err_hzp_shrinker: + thp_shrinker_exit(); +err_shrinker: khugepaged_destroy(); err_slab: hugepage_exit_sysfs(hugepage_kobj); @@ -2843,7 +2870,7 @@ void deferred_split_folio(struct folio *folio) #ifdef CONFIG_MEMCG if (memcg) set_shrinker_bit(memcg, folio_nid(folio), - deferred_split_shrinker.id); + deferred_split_shrinker->id); #endif } spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags); @@ -2917,14 +2944,6 @@ static unsigned long deferred_split_scan(struct shrinker *shrink, return split; } -static struct shrinker deferred_split_shrinker = { - .count_objects = deferred_split_count, - .scan_objects = deferred_split_scan, - .seeks = DEFAULT_SEEKS, - .flags = SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE | - SHRINKER_NONSLAB, -}; - #ifdef CONFIG_DEBUG_FS static void split_huge_pages_all(void) { diff --git a/mm/internal.h b/mm/internal.h index b9be9ff122dc4..31014e38ba5f9 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1291,6 +1291,20 @@ unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg, int priority); #ifdef CONFIG_SHRINKER_DEBUG +static inline __printf(2, 0) int shrinker_debugfs_name_alloc( + struct shrinker *shrinker, const char *fmt, va_list ap) +{ + shrinker->name = kvasprintf_const(GFP_KERNEL, fmt, ap); + + return shrinker->name ? 0 : -ENOMEM; +} + +static inline void shrinker_debugfs_name_free(struct shrinker *shrinker) +{ + kfree_const(shrinker->name); + shrinker->name = NULL; +} + extern int shrinker_debugfs_add(struct shrinker *shrinker); extern struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker, int *debugfs_id); @@ -1301,6 +1315,14 @@ static inline int shrinker_debugfs_add(struct shrinker *shrinker) { return 0; } +static inline int shrinker_debugfs_name_alloc(struct shrinker *shrinker, + const char *fmt, va_list ap) +{ + return 0; +} +static inline void shrinker_debugfs_name_free(struct shrinker *shrinker) +{ +} static inline struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker, int *debugfs_id) { diff --git a/mm/shrinker.c b/mm/shrinker.c index 043c87ccfab41..68dc7b4242f2a 100644 --- a/mm/shrinker.c +++ b/mm/shrinker.c @@ -2,25 +2,61 @@ #include #include #include +#include #include #include "internal.h" LIST_HEAD(shrinker_list); -DECLARE_RWSEM(shrinker_rwsem); +DEFINE_MUTEX(shrinker_mutex); #ifdef CONFIG_MEMCG static int shrinker_nr_max; -/* The shrinker_info is expanded in a batch of BITS_PER_LONG */ -static inline int shrinker_map_size(int nr_items) +static inline int shrinker_unit_size(int nr_items) { - return (DIV_ROUND_UP(nr_items, BITS_PER_LONG) * sizeof(unsigned long)); + return (DIV_ROUND_UP(nr_items, SHRINKER_UNIT_BITS) * sizeof(struct shrinker_info_unit *)); } -static inline int shrinker_defer_size(int nr_items) +static inline void shrinker_unit_free(struct shrinker_info *info, int start) { - return (round_up(nr_items, BITS_PER_LONG) * sizeof(atomic_long_t)); + struct shrinker_info_unit **unit; + int nr, i; + + if (!info) + return; + + unit = info->unit; + nr = DIV_ROUND_UP(info->map_nr_max, SHRINKER_UNIT_BITS); + + for (i = start; i < nr; i++) { + if (!unit[i]) + break; + + kfree(unit[i]); + unit[i] = NULL; + } +} + +static inline int shrinker_unit_alloc(struct shrinker_info *new, + struct shrinker_info *old, int nid) +{ + struct shrinker_info_unit *unit; + int nr = DIV_ROUND_UP(new->map_nr_max, SHRINKER_UNIT_BITS); + int start = old ? DIV_ROUND_UP(old->map_nr_max, SHRINKER_UNIT_BITS) : 0; + int i; + + for (i = start; i < nr; i++) { + unit = kzalloc_node(sizeof(*unit), GFP_KERNEL, nid); + if (!unit) { + shrinker_unit_free(new, start); + return -ENOMEM; + } + + new->unit[i] = unit; + } + + return 0; } void free_shrinker_info(struct mem_cgroup *memcg) @@ -32,6 +68,7 @@ void free_shrinker_info(struct mem_cgroup *memcg) for_each_node(nid) { pn = memcg->nodeinfo[nid]; info = rcu_dereference_protected(pn->shrinker_info, true); + shrinker_unit_free(info, 0); kvfree(info); rcu_assign_pointer(pn->shrinker_info, NULL); } @@ -39,47 +76,46 @@ void free_shrinker_info(struct mem_cgroup *memcg) int alloc_shrinker_info(struct mem_cgroup *memcg) { - struct shrinker_info *info; - int nid, size, ret = 0; - int map_size, defer_size = 0; + int nid, ret = 0; + int array_size = 0; - down_write(&shrinker_rwsem); - map_size = shrinker_map_size(shrinker_nr_max); - defer_size = shrinker_defer_size(shrinker_nr_max); - size = map_size + defer_size; + mutex_lock(&shrinker_mutex); + array_size = shrinker_unit_size(shrinker_nr_max); for_each_node(nid) { - info = kvzalloc_node(sizeof(*info) + size, GFP_KERNEL, nid); - if (!info) { - free_shrinker_info(memcg); - ret = -ENOMEM; - break; - } - info->nr_deferred = (atomic_long_t *)(info + 1); - info->map = (void *)info->nr_deferred + defer_size; + struct shrinker_info *info = kvzalloc_node(sizeof(*info) + array_size, + GFP_KERNEL, nid); + if (!info) + goto err; info->map_nr_max = shrinker_nr_max; + if (shrinker_unit_alloc(info, NULL, nid)) { + kvfree(info); + goto err; + } rcu_assign_pointer(memcg->nodeinfo[nid]->shrinker_info, info); } - up_write(&shrinker_rwsem); + mutex_unlock(&shrinker_mutex); return ret; + +err: + mutex_unlock(&shrinker_mutex); + free_shrinker_info(memcg); + return -ENOMEM; } static struct shrinker_info *shrinker_info_protected(struct mem_cgroup *memcg, int nid) { return rcu_dereference_protected(memcg->nodeinfo[nid]->shrinker_info, - lockdep_is_held(&shrinker_rwsem)); + lockdep_is_held(&shrinker_mutex)); } -static int expand_one_shrinker_info(struct mem_cgroup *memcg, - int map_size, int defer_size, - int old_map_size, int old_defer_size, - int new_nr_max) +static int expand_one_shrinker_info(struct mem_cgroup *memcg, int new_size, + int old_size, int new_nr_max) { struct shrinker_info *new, *old; struct mem_cgroup_per_node *pn; int nid; - int size = map_size + defer_size; for_each_node(nid) { pn = memcg->nodeinfo[nid]; @@ -92,21 +128,17 @@ static int expand_one_shrinker_info(struct mem_cgroup *memcg, if (new_nr_max <= old->map_nr_max) continue; - new = kvmalloc_node(sizeof(*new) + size, GFP_KERNEL, nid); + new = kvzalloc_node(sizeof(*new) + new_size, GFP_KERNEL, nid); if (!new) return -ENOMEM; - new->nr_deferred = (atomic_long_t *)(new + 1); - new->map = (void *)new->nr_deferred + defer_size; new->map_nr_max = new_nr_max; - /* map: set all old bits, clear all new bits */ - memset(new->map, (int)0xff, old_map_size); - memset((void *)new->map + old_map_size, 0, map_size - old_map_size); - /* nr_deferred: copy old values, clear all new values */ - memcpy(new->nr_deferred, old->nr_deferred, old_defer_size); - memset((void *)new->nr_deferred + old_defer_size, 0, - defer_size - old_defer_size); + memcpy(new->unit, old->unit, old_size); + if (shrinker_unit_alloc(new, old, nid)) { + kvfree(new); + return -ENOMEM; + } rcu_assign_pointer(pn->shrinker_info, new); kvfree_rcu(old, rcu); @@ -118,25 +150,21 @@ static int expand_one_shrinker_info(struct mem_cgroup *memcg, static int expand_shrinker_info(int new_id) { int ret = 0; - int new_nr_max = round_up(new_id + 1, BITS_PER_LONG); - int map_size, defer_size = 0; - int old_map_size, old_defer_size = 0; + int new_nr_max = round_up(new_id + 1, SHRINKER_UNIT_BITS); + int new_size, old_size = 0; struct mem_cgroup *memcg; if (!root_mem_cgroup) goto out; - lockdep_assert_held(&shrinker_rwsem); + lockdep_assert_held(&shrinker_mutex); - map_size = shrinker_map_size(new_nr_max); - defer_size = shrinker_defer_size(new_nr_max); - old_map_size = shrinker_map_size(shrinker_nr_max); - old_defer_size = shrinker_defer_size(shrinker_nr_max); + new_size = shrinker_unit_size(new_nr_max); + old_size = shrinker_unit_size(shrinker_nr_max); memcg = mem_cgroup_iter(NULL, NULL, NULL); do { - ret = expand_one_shrinker_info(memcg, map_size, defer_size, - old_map_size, old_defer_size, + ret = expand_one_shrinker_info(memcg, new_size, old_size, new_nr_max); if (ret) { mem_cgroup_iter_break(NULL, memcg); @@ -150,17 +178,34 @@ static int expand_shrinker_info(int new_id) return ret; } +static inline int shrinker_id_to_index(int shrinker_id) +{ + return shrinker_id / SHRINKER_UNIT_BITS; +} + +static inline int shrinker_id_to_offset(int shrinker_id) +{ + return shrinker_id % SHRINKER_UNIT_BITS; +} + +static inline int calc_shrinker_id(int index, int offset) +{ + return index * SHRINKER_UNIT_BITS + offset; +} + void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id) { if (shrinker_id >= 0 && memcg && !mem_cgroup_is_root(memcg)) { struct shrinker_info *info; + struct shrinker_info_unit *unit; rcu_read_lock(); info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info); + unit = info->unit[shrinker_id_to_index(shrinker_id)]; if (!WARN_ON_ONCE(shrinker_id >= info->map_nr_max)) { /* Pairs with smp mb in shrink_slab() */ smp_mb__before_atomic(); - set_bit(shrinker_id, info->map); + set_bit(shrinker_id_to_offset(shrinker_id), unit->map); } rcu_read_unlock(); } @@ -168,15 +213,14 @@ void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id) static DEFINE_IDR(shrinker_idr); -static int prealloc_memcg_shrinker(struct shrinker *shrinker) +static int shrinker_memcg_alloc(struct shrinker *shrinker) { int id, ret = -ENOMEM; if (mem_cgroup_disabled()) return -ENOSYS; - down_write(&shrinker_rwsem); - /* This may call shrinker, so it must use down_read_trylock() */ + mutex_lock(&shrinker_mutex); id = idr_alloc(&shrinker_idr, shrinker, 0, 0, GFP_KERNEL); if (id < 0) goto unlock; @@ -190,17 +234,17 @@ static int prealloc_memcg_shrinker(struct shrinker *shrinker) shrinker->id = id; ret = 0; unlock: - up_write(&shrinker_rwsem); + mutex_unlock(&shrinker_mutex); return ret; } -static void unregister_memcg_shrinker(struct shrinker *shrinker) +static void shrinker_memcg_remove(struct shrinker *shrinker) { int id = shrinker->id; BUG_ON(id < 0); - lockdep_assert_held(&shrinker_rwsem); + lockdep_assert_held(&shrinker_mutex); idr_remove(&shrinker_idr, id); } @@ -209,50 +253,70 @@ static long xchg_nr_deferred_memcg(int nid, struct shrinker *shrinker, struct mem_cgroup *memcg) { struct shrinker_info *info; + struct shrinker_info_unit *unit; + long nr_deferred; + + rcu_read_lock(); + info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info); + unit = info->unit[shrinker_id_to_index(shrinker->id)]; + nr_deferred = atomic_long_xchg(&unit->nr_deferred[shrinker_id_to_offset(shrinker->id)], 0); + rcu_read_unlock(); - info = shrinker_info_protected(memcg, nid); - return atomic_long_xchg(&info->nr_deferred[shrinker->id], 0); + return nr_deferred; } static long add_nr_deferred_memcg(long nr, int nid, struct shrinker *shrinker, struct mem_cgroup *memcg) { struct shrinker_info *info; + struct shrinker_info_unit *unit; + long nr_deferred; - info = shrinker_info_protected(memcg, nid); - return atomic_long_add_return(nr, &info->nr_deferred[shrinker->id]); + rcu_read_lock(); + info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info); + unit = info->unit[shrinker_id_to_index(shrinker->id)]; + nr_deferred = + atomic_long_add_return(nr, &unit->nr_deferred[shrinker_id_to_offset(shrinker->id)]); + rcu_read_unlock(); + + return nr_deferred; } void reparent_shrinker_deferred(struct mem_cgroup *memcg) { - int i, nid; + int nid, index, offset; long nr; struct mem_cgroup *parent; struct shrinker_info *child_info, *parent_info; + struct shrinker_info_unit *child_unit, *parent_unit; parent = parent_mem_cgroup(memcg); if (!parent) parent = root_mem_cgroup; /* Prevent from concurrent shrinker_info expand */ - down_read(&shrinker_rwsem); + mutex_lock(&shrinker_mutex); for_each_node(nid) { child_info = shrinker_info_protected(memcg, nid); parent_info = shrinker_info_protected(parent, nid); - for (i = 0; i < child_info->map_nr_max; i++) { - nr = atomic_long_read(&child_info->nr_deferred[i]); - atomic_long_add(nr, &parent_info->nr_deferred[i]); + for (index = 0; index < shrinker_id_to_index(child_info->map_nr_max); index++) { + child_unit = child_info->unit[index]; + parent_unit = parent_info->unit[index]; + for (offset = 0; offset < SHRINKER_UNIT_BITS; offset++) { + nr = atomic_long_read(&child_unit->nr_deferred[offset]); + atomic_long_add(nr, &parent_unit->nr_deferred[offset]); + } } } - up_read(&shrinker_rwsem); + mutex_unlock(&shrinker_mutex); } #else -static int prealloc_memcg_shrinker(struct shrinker *shrinker) +static int shrinker_memcg_alloc(struct shrinker *shrinker) { return -ENOSYS; } -static void unregister_memcg_shrinker(struct shrinker *shrinker) +static void shrinker_memcg_remove(struct shrinker *shrinker) { } @@ -407,72 +471,119 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid, { struct shrinker_info *info; unsigned long ret, freed = 0; - int i; + int offset, index = 0; if (!mem_cgroup_online(memcg)) return 0; - if (!down_read_trylock(&shrinker_rwsem)) - return 0; - - info = shrinker_info_protected(memcg, nid); + /* + * lockless algorithm of memcg shrink. + * + * The shrinker_info may be freed asynchronously via RCU in the + * expand_one_shrinker_info(), so the rcu_read_lock() needs to be used + * to ensure the existence of the shrinker_info. + * + * The shrinker_info_unit is never freed unless its corresponding memcg + * is destroyed. Here we already hold the refcount of memcg, so the + * memcg will not be destroyed, and of course shrinker_info_unit will + * not be freed. + * + * So in the memcg shrink: + * step 1: use rcu_read_lock() to guarantee existence of the + * shrinker_info. + * step 2: after getting shrinker_info_unit we can safely release the + * RCU lock. + * step 3: traverse the bitmap and calculate shrinker_id + * step 4: use rcu_read_lock() to guarantee existence of the shrinker. + * step 5: use shrinker_id to find the shrinker, then use + * shrinker_try_get() to guarantee existence of the shrinker, + * then we can release the RCU lock to do do_shrink_slab() that + * may sleep. + * step 6: do shrinker_put() paired with step 5 to put the refcount, + * if the refcount reaches 0, then wake up the waiter in + * shrinker_free() by calling complete(). + * Note: here is different from the global shrink, we don't + * need to acquire the RCU lock to guarantee existence of + * the shrinker, because we don't need to use this + * shrinker to traverse the next shrinker in the bitmap. + * step 7: we have already exited the read-side of rcu critical section + * before calling do_shrink_slab(), the shrinker_info may be + * released in expand_one_shrinker_info(), so go back to step 1 + * to reacquire the shrinker_info. + */ +again: + rcu_read_lock(); + info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info); if (unlikely(!info)) goto unlock; - for_each_set_bit(i, info->map, info->map_nr_max) { - struct shrink_control sc = { - .gfp_mask = gfp_mask, - .nid = nid, - .memcg = memcg, - }; - struct shrinker *shrinker; + if (index < shrinker_id_to_index(info->map_nr_max)) { + struct shrinker_info_unit *unit; - shrinker = idr_find(&shrinker_idr, i); - if (unlikely(!shrinker || !(shrinker->flags & SHRINKER_REGISTERED))) { - if (!shrinker) - clear_bit(i, info->map); - continue; - } + unit = info->unit[index]; - /* Call non-slab shrinkers even though kmem is disabled */ - if (!memcg_kmem_online() && - !(shrinker->flags & SHRINKER_NONSLAB)) - continue; + rcu_read_unlock(); + + for_each_set_bit(offset, unit->map, SHRINKER_UNIT_BITS) { + struct shrink_control sc = { + .gfp_mask = gfp_mask, + .nid = nid, + .memcg = memcg, + }; + struct shrinker *shrinker; + int shrinker_id = calc_shrinker_id(index, offset); + + rcu_read_lock(); + shrinker = idr_find(&shrinker_idr, shrinker_id); + if (unlikely(!shrinker || !shrinker_try_get(shrinker))) { + clear_bit(offset, unit->map); + rcu_read_unlock(); + continue; + } + rcu_read_unlock(); + + /* Call non-slab shrinkers even though kmem is disabled */ + if (!memcg_kmem_online() && + !(shrinker->flags & SHRINKER_NONSLAB)) { + clear_bit(offset, unit->map); + shrinker_put(shrinker); + continue; + } - ret = do_shrink_slab(&sc, shrinker, priority); - if (ret == SHRINK_EMPTY) { - clear_bit(i, info->map); - /* - * After the shrinker reported that it had no objects to - * free, but before we cleared the corresponding bit in - * the memcg shrinker map, a new object might have been - * added. To make sure, we have the bit set in this - * case, we invoke the shrinker one more time and reset - * the bit if it reports that it is not empty anymore. - * The memory barrier here pairs with the barrier in - * set_shrinker_bit(): - * - * list_lru_add() shrink_slab_memcg() - * list_add_tail() clear_bit() - * - * set_bit() do_shrink_slab() - */ - smp_mb__after_atomic(); ret = do_shrink_slab(&sc, shrinker, priority); - if (ret == SHRINK_EMPTY) - ret = 0; - else - set_shrinker_bit(memcg, nid, i); + if (ret == SHRINK_EMPTY) { + clear_bit(offset, unit->map); + /* + * After the shrinker reported that it had no objects to + * free, but before we cleared the corresponding bit in + * the memcg shrinker map, a new object might have been + * added. To make sure, we have the bit set in this + * case, we invoke the shrinker one more time and reset + * the bit if it reports that it is not empty anymore. + * The memory barrier here pairs with the barrier in + * set_shrinker_bit(): + * + * list_lru_add() shrink_slab_memcg() + * list_add_tail() clear_bit() + * + * set_bit() do_shrink_slab() + */ + smp_mb__after_atomic(); + ret = do_shrink_slab(&sc, shrinker, priority); + if (ret == SHRINK_EMPTY) + ret = 0; + else + set_shrinker_bit(memcg, nid, shrinker_id); + } + freed += ret; + shrinker_put(shrinker); } - freed += ret; - if (rwsem_is_contended(&shrinker_rwsem)) { - freed = freed ? : 1; - break; - } + index++; + goto again; } unlock: - up_read(&shrinker_rwsem); + rcu_read_unlock(); return freed; } #else /* !CONFIG_MEMCG */ @@ -519,191 +630,185 @@ unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg, if (!mem_cgroup_disabled() && !mem_cgroup_is_root(memcg)) return shrink_slab_memcg(gfp_mask, nid, memcg, priority); - if (!down_read_trylock(&shrinker_rwsem)) - goto out; - - list_for_each_entry(shrinker, &shrinker_list, list) { + /* + * lockless algorithm of global shrink. + * + * In the unregistration setp, the shrinker will be freed asynchronously + * via RCU after its refcount reaches 0. So both rcu_read_lock() and + * shrinker_try_get() can be used to ensure the existence of the shrinker. + * + * So in the global shrink: + * step 1: use rcu_read_lock() to guarantee existence of the shrinker + * and the validity of the shrinker_list walk. + * step 2: use shrinker_try_get() to try get the refcount, if successful, + * then the existence of the shrinker can also be guaranteed, + * so we can release the RCU lock to do do_shrink_slab() that + * may sleep. + * step 3: *MUST* to reacquire the RCU lock before calling shrinker_put(), + * which ensures that neither this shrinker nor the next shrinker + * will be freed in the next traversal operation. + * step 4: do shrinker_put() paired with step 2 to put the refcount, + * if the refcount reaches 0, then wake up the waiter in + * shrinker_free() by calling complete(). + */ + rcu_read_lock(); + list_for_each_entry_rcu(shrinker, &shrinker_list, list) { struct shrink_control sc = { .gfp_mask = gfp_mask, .nid = nid, .memcg = memcg, }; + if (!shrinker_try_get(shrinker)) + continue; + + rcu_read_unlock(); + ret = do_shrink_slab(&sc, shrinker, priority); if (ret == SHRINK_EMPTY) ret = 0; freed += ret; - /* - * Bail out if someone want to register a new shrinker to - * prevent the registration from being stalled for long periods - * by parallel ongoing shrinking. - */ - if (rwsem_is_contended(&shrinker_rwsem)) { - freed = freed ? : 1; - break; - } + + rcu_read_lock(); + shrinker_put(shrinker); } - up_read(&shrinker_rwsem); -out: + rcu_read_unlock(); cond_resched(); return freed; } -/* - * Add a shrinker callback to be called from the vm. - */ -static int __prealloc_shrinker(struct shrinker *shrinker) +struct shrinker *shrinker_alloc(unsigned int flags, const char *fmt, ...) { + struct shrinker *shrinker; unsigned int size; + va_list ap; int err; - if (shrinker->flags & SHRINKER_MEMCG_AWARE) { - err = prealloc_memcg_shrinker(shrinker); - if (err != -ENOSYS) - return err; + shrinker = kzalloc(sizeof(struct shrinker), GFP_KERNEL); + if (!shrinker) + return NULL; - shrinker->flags &= ~SHRINKER_MEMCG_AWARE; + va_start(ap, fmt); + err = shrinker_debugfs_name_alloc(shrinker, fmt, ap); + va_end(ap); + if (err) + goto err_name; + + shrinker->flags = flags | SHRINKER_ALLOCATED; + shrinker->seeks = DEFAULT_SEEKS; + + if (flags & SHRINKER_MEMCG_AWARE) { + err = shrinker_memcg_alloc(shrinker); + if (err == -ENOSYS) { + /* Memcg is not supported, fallback to non-memcg-aware shrinker. */ + shrinker->flags &= ~SHRINKER_MEMCG_AWARE; + goto non_memcg; + } + + if (err) + goto err_flags; + + return shrinker; } +non_memcg: + /* + * The nr_deferred is available on per memcg level for memcg aware + * shrinkers, so only allocate nr_deferred in the following cases: + * - non-memcg-aware shrinkers + * - !CONFIG_MEMCG + * - memcg is disabled by kernel command line + */ size = sizeof(*shrinker->nr_deferred); - if (shrinker->flags & SHRINKER_NUMA_AWARE) + if (flags & SHRINKER_NUMA_AWARE) size *= nr_node_ids; shrinker->nr_deferred = kzalloc(size, GFP_KERNEL); if (!shrinker->nr_deferred) - return -ENOMEM; + goto err_flags; - return 0; -} + return shrinker; -#ifdef CONFIG_SHRINKER_DEBUG -int prealloc_shrinker(struct shrinker *shrinker, const char *fmt, ...) -{ - va_list ap; - int err; - - va_start(ap, fmt); - shrinker->name = kvasprintf_const(GFP_KERNEL, fmt, ap); - va_end(ap); - if (!shrinker->name) - return -ENOMEM; - - err = __prealloc_shrinker(shrinker); - if (err) { - kfree_const(shrinker->name); - shrinker->name = NULL; - } - - return err; -} -#else -int prealloc_shrinker(struct shrinker *shrinker, const char *fmt, ...) -{ - return __prealloc_shrinker(shrinker); +err_flags: + shrinker_debugfs_name_free(shrinker); +err_name: + kfree(shrinker); + return NULL; } -#endif +EXPORT_SYMBOL_GPL(shrinker_alloc); -void free_prealloced_shrinker(struct shrinker *shrinker) +void shrinker_register(struct shrinker *shrinker) { -#ifdef CONFIG_SHRINKER_DEBUG - kfree_const(shrinker->name); - shrinker->name = NULL; -#endif - if (shrinker->flags & SHRINKER_MEMCG_AWARE) { - down_write(&shrinker_rwsem); - unregister_memcg_shrinker(shrinker); - up_write(&shrinker_rwsem); + if (unlikely(!(shrinker->flags & SHRINKER_ALLOCATED))) { + pr_warn("Must use shrinker_alloc() to dynamically allocate the shrinker"); return; } - kfree(shrinker->nr_deferred); - shrinker->nr_deferred = NULL; -} - -void register_shrinker_prepared(struct shrinker *shrinker) -{ - down_write(&shrinker_rwsem); - list_add_tail(&shrinker->list, &shrinker_list); + mutex_lock(&shrinker_mutex); + list_add_tail_rcu(&shrinker->list, &shrinker_list); shrinker->flags |= SHRINKER_REGISTERED; shrinker_debugfs_add(shrinker); - up_write(&shrinker_rwsem); + mutex_unlock(&shrinker_mutex); + + init_completion(&shrinker->done); + /* + * Now the shrinker is fully set up, take the first reference to it to + * indicate that lookup operations are now allowed to use it via + * shrinker_try_get(). + */ + refcount_set(&shrinker->refcount, 1); } +EXPORT_SYMBOL_GPL(shrinker_register); -static int __register_shrinker(struct shrinker *shrinker) +static void shrinker_free_rcu_cb(struct rcu_head *head) { - int err = __prealloc_shrinker(shrinker); + struct shrinker *shrinker = container_of(head, struct shrinker, rcu); - if (err) - return err; - register_shrinker_prepared(shrinker); - return 0; + kfree(shrinker->nr_deferred); + kfree(shrinker); } -#ifdef CONFIG_SHRINKER_DEBUG -int register_shrinker(struct shrinker *shrinker, const char *fmt, ...) +void shrinker_free(struct shrinker *shrinker) { - va_list ap; - int err; + struct dentry *debugfs_entry = NULL; + int debugfs_id; - va_start(ap, fmt); - shrinker->name = kvasprintf_const(GFP_KERNEL, fmt, ap); - va_end(ap); - if (!shrinker->name) - return -ENOMEM; + if (!shrinker) + return; - err = __register_shrinker(shrinker); - if (err) { - kfree_const(shrinker->name); - shrinker->name = NULL; + if (shrinker->flags & SHRINKER_REGISTERED) { + /* drop the initial refcount */ + shrinker_put(shrinker); + /* + * Wait for all lookups of the shrinker to complete, after that, + * no shrinker is running or will run again, then we can safely + * free it asynchronously via RCU and safely free the structure + * where the shrinker is located, such as super_block etc. + */ + wait_for_completion(&shrinker->done); } - return err; -} -#else -int register_shrinker(struct shrinker *shrinker, const char *fmt, ...) -{ - return __register_shrinker(shrinker); -} -#endif -EXPORT_SYMBOL(register_shrinker); -/* - * Remove one - */ -void unregister_shrinker(struct shrinker *shrinker) -{ - struct dentry *debugfs_entry; - int debugfs_id; + mutex_lock(&shrinker_mutex); + if (shrinker->flags & SHRINKER_REGISTERED) { + /* + * Now we can safely remove it from the shrinker_list and then + * free it. + */ + list_del_rcu(&shrinker->list); + debugfs_entry = shrinker_debugfs_detach(shrinker, &debugfs_id); + shrinker->flags &= ~SHRINKER_REGISTERED; + } - if (!(shrinker->flags & SHRINKER_REGISTERED)) - return; + shrinker_debugfs_name_free(shrinker); - down_write(&shrinker_rwsem); - list_del(&shrinker->list); - shrinker->flags &= ~SHRINKER_REGISTERED; if (shrinker->flags & SHRINKER_MEMCG_AWARE) - unregister_memcg_shrinker(shrinker); - debugfs_entry = shrinker_debugfs_detach(shrinker, &debugfs_id); - up_write(&shrinker_rwsem); - - shrinker_debugfs_remove(debugfs_entry, debugfs_id); + shrinker_memcg_remove(shrinker); + mutex_unlock(&shrinker_mutex); - kfree(shrinker->nr_deferred); - shrinker->nr_deferred = NULL; -} -EXPORT_SYMBOL(unregister_shrinker); + if (debugfs_entry) + shrinker_debugfs_remove(debugfs_entry, debugfs_id); -/** - * synchronize_shrinkers - Wait for all running shrinkers to complete. - * - * This is equivalent to calling unregister_shrink() and register_shrinker(), - * but atomically and with less overhead. This is useful to guarantee that all - * shrinker invocations have seen an update, before freeing memory, similar to - * rcu. - */ -void synchronize_shrinkers(void) -{ - down_write(&shrinker_rwsem); - up_write(&shrinker_rwsem); + call_rcu(&shrinker->rcu, shrinker_free_rcu_cb); } -EXPORT_SYMBOL(synchronize_shrinkers); +EXPORT_SYMBOL_GPL(shrinker_free); diff --git a/mm/shrinker_debug.c b/mm/shrinker_debug.c index e4ce509f619ec..12ea5486a3e95 100644 --- a/mm/shrinker_debug.c +++ b/mm/shrinker_debug.c @@ -9,7 +9,7 @@ #include "internal.h" /* defined in vmscan.c */ -extern struct rw_semaphore shrinker_rwsem; +extern struct mutex shrinker_mutex; extern struct list_head shrinker_list; static DEFINE_IDA(shrinker_debugfs_ida); @@ -165,7 +165,7 @@ int shrinker_debugfs_add(struct shrinker *shrinker) char buf[128]; int id; - lockdep_assert_held(&shrinker_rwsem); + lockdep_assert_held(&shrinker_mutex); /* debugfs isn't initialized yet, add debugfs entries later. */ if (!shrinker_debugfs_root) @@ -208,7 +208,7 @@ int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...) if (!new) return -ENOMEM; - down_write(&shrinker_rwsem); + mutex_lock(&shrinker_mutex); old = shrinker->name; shrinker->name = new; @@ -226,7 +226,7 @@ int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...) shrinker->debugfs_entry = entry; } - up_write(&shrinker_rwsem); + mutex_unlock(&shrinker_mutex); kfree_const(old); @@ -239,10 +239,7 @@ struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker, { struct dentry *entry = shrinker->debugfs_entry; - lockdep_assert_held(&shrinker_rwsem); - - kfree_const(shrinker->name); - shrinker->name = NULL; + lockdep_assert_held(&shrinker_mutex); *debugfs_id = entry ? shrinker->debugfs_id : -1; shrinker->debugfs_entry = NULL; @@ -268,14 +265,14 @@ static int __init shrinker_debugfs_init(void) shrinker_debugfs_root = dentry; /* Create debugfs entries for shrinkers registered at boot */ - down_write(&shrinker_rwsem); + mutex_lock(&shrinker_mutex); list_for_each_entry(shrinker, &shrinker_list, list) if (!shrinker->debugfs_entry) { ret = shrinker_debugfs_add(shrinker); if (ret) break; } - up_write(&shrinker_rwsem); + mutex_unlock(&shrinker_mutex); return ret; } diff --git a/mm/workingset.c b/mm/workingset.c index 1cdbc1fb58731..1aec494b7d24f 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -787,13 +787,6 @@ static unsigned long scan_shadow_nodes(struct shrinker *shrinker, NULL); } -static struct shrinker workingset_shadow_shrinker = { - .count_objects = count_shadow_nodes, - .scan_objects = scan_shadow_nodes, - .seeks = 0, /* ->count reports only fully expendable nodes */ - .flags = SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE, -}; - /* * Our list_lru->lock is IRQ-safe as it nests inside the IRQ-safe * i_pages lock. @@ -802,9 +795,10 @@ static struct lock_class_key shadow_nodes_key; static int __init workingset_init(void) { + struct shrinker *workingset_shadow_shrinker; unsigned int timestamp_bits; unsigned int max_order; - int ret; + int ret = -ENOMEM; BUILD_BUG_ON(BITS_PER_LONG < EVICTION_SHIFT); /* @@ -821,17 +815,26 @@ static int __init workingset_init(void) pr_info("workingset: timestamp_bits=%d max_order=%d bucket_order=%u\n", timestamp_bits, max_order, bucket_order); - ret = prealloc_shrinker(&workingset_shadow_shrinker, "mm-shadow"); - if (ret) + workingset_shadow_shrinker = shrinker_alloc(SHRINKER_NUMA_AWARE | + SHRINKER_MEMCG_AWARE, + "mm-shadow"); + if (!workingset_shadow_shrinker) goto err; + ret = __list_lru_init(&shadow_nodes, true, &shadow_nodes_key, - &workingset_shadow_shrinker); + workingset_shadow_shrinker); if (ret) goto err_list_lru; - register_shrinker_prepared(&workingset_shadow_shrinker); + + workingset_shadow_shrinker->count_objects = count_shadow_nodes; + workingset_shadow_shrinker->scan_objects = scan_shadow_nodes; + /* ->count reports only fully expendable nodes */ + workingset_shadow_shrinker->seeks = 0; + + shrinker_register(workingset_shadow_shrinker); return 0; err_list_lru: - free_prealloced_shrinker(&workingset_shadow_shrinker); + shrinker_free(workingset_shadow_shrinker); err: return ret; } diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 14327fc34aa71..e8ba3627638bf 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -227,7 +227,7 @@ struct zs_pool { struct zs_pool_stats stats; /* Compact classes */ - struct shrinker shrinker; + struct shrinker *shrinker; #ifdef CONFIG_ZSMALLOC_STAT struct dentry *stat_dentry; @@ -2087,8 +2087,7 @@ static unsigned long zs_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) { unsigned long pages_freed; - struct zs_pool *pool = container_of(shrinker, struct zs_pool, - shrinker); + struct zs_pool *pool = shrinker->private_data; /* * Compact classes and calculate compaction delta. @@ -2106,8 +2105,7 @@ static unsigned long zs_shrinker_count(struct shrinker *shrinker, int i; struct size_class *class; unsigned long pages_to_free = 0; - struct zs_pool *pool = container_of(shrinker, struct zs_pool, - shrinker); + struct zs_pool *pool = shrinker->private_data; for (i = ZS_SIZE_CLASSES - 1; i >= 0; i--) { class = pool->size_class[i]; @@ -2122,18 +2120,23 @@ static unsigned long zs_shrinker_count(struct shrinker *shrinker, static void zs_unregister_shrinker(struct zs_pool *pool) { - unregister_shrinker(&pool->shrinker); + shrinker_free(pool->shrinker); } static int zs_register_shrinker(struct zs_pool *pool) { - pool->shrinker.scan_objects = zs_shrinker_scan; - pool->shrinker.count_objects = zs_shrinker_count; - pool->shrinker.batch = 0; - pool->shrinker.seeks = DEFAULT_SEEKS; + pool->shrinker = shrinker_alloc(0, "mm-zspool:%s", pool->name); + if (!pool->shrinker) + return -ENOMEM; + + pool->shrinker->scan_objects = zs_shrinker_scan; + pool->shrinker->count_objects = zs_shrinker_count; + pool->shrinker->batch = 0; + pool->shrinker->private_data = pool; - return register_shrinker(&pool->shrinker, "mm-zspool:%s", - pool->name); + shrinker_register(pool->shrinker); + + return 0; } static int calculate_zspage_chain_size(int class_size) diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 7204bdb018f7f..cc4eb575068e6 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -883,11 +883,7 @@ rpcauth_uptodatecred(struct rpc_task *task) test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0; } -static struct shrinker rpc_cred_shrinker = { - .count_objects = rpcauth_cache_shrink_count, - .scan_objects = rpcauth_cache_shrink_scan, - .seeks = DEFAULT_SEEKS, -}; +static struct shrinker *rpc_cred_shrinker; int __init rpcauth_init_module(void) { @@ -901,9 +897,17 @@ int __init rpcauth_init_module(void) err = rpc_init_authunix(); if (err < 0) goto out1; - err = register_shrinker(&rpc_cred_shrinker, "sunrpc_cred"); - if (err < 0) + rpc_cred_shrinker = shrinker_alloc(0, "sunrpc_cred"); + if (!rpc_cred_shrinker) { + err = -ENOMEM; goto out2; + } + + rpc_cred_shrinker->count_objects = rpcauth_cache_shrink_count; + rpc_cred_shrinker->scan_objects = rpcauth_cache_shrink_scan; + + shrinker_register(rpc_cred_shrinker); + return 0; out2: rpc_destroy_authunix(); @@ -917,5 +921,5 @@ void rpcauth_remove_module(void) abort_creds(machine_cred); #endif rpc_destroy_authunix(); - unregister_shrinker(&rpc_cred_shrinker); + shrinker_free(rpc_cred_shrinker); }