[2/3] mm, lru_gen: move pages in bulk when aging
Commit Message
From: Kairui Song <kasong@tencent.com>
Another overhead of aging is page moving. Actually, in most cases,
pages are being moved to the same gen after folio_inc_gen is called,
especially the protected pages. So it's better to move them in bulk.
This also has a good effect on LRU ordering. Currently when MGLRU
ages, it walks the LRU backward, and the protected pages are moved to
the tail of newer gen one by one, which reverses the order of pages in
LRU. Moving them in batches can help keep their order, only in a small
scope though due to the scan limit of MAX_LRU_BATCH pages.
After this commit, we can see a performance gain:
Tested in a 4G memcg on a EPYC 7K62 with:
memcached -u nobody -m 16384 -s /tmp/memcached.socket \
-a 0766 -t 16 -B binary &
memtier_benchmark -S /tmp/memcached.socket \
-P memcache_binary -n allkeys \
--key-minimum=1 --key-maximum=16000000 -d 1024 \
--ratio=1:0 --key-pattern=P:P -c 2 -t 16 --pipeline 8 -x 6
Average result of 18 test runs:
Before: 44017.78 Ops/sec
After patch 1-2: 44810.01 Ops/sec (+1.8%)
Signed-off-by: Kairui Song <kasong@tencent.com>
---
mm/vmscan.c | 84 ++++++++++++++++++++++++++++++++++++++++++++---------
1 file changed, 71 insertions(+), 13 deletions(-)
Comments
Hi Kairui,
kernel test robot noticed the following build warnings:
[auto build test WARNING on akpm-mm/mm-everything]
url: https://github.com/intel-lab-lkp/linux/commits/Kairui-Song/mm-lru_gen-batch-update-counters-on-againg/20231222-184601
base: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
patch link: https://lore.kernel.org/r/20231222102255.56993-3-ryncsn%40gmail.com
patch subject: [PATCH 2/3] mm, lru_gen: move pages in bulk when aging
config: arc-randconfig-002-20231223 (https://download.01.org/0day-ci/archive/20231223/202312231555.KTX84YjF-lkp@intel.com/config)
compiler: arc-elf-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231223/202312231555.KTX84YjF-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202312231555.KTX84YjF-lkp@intel.com/
All warnings (new ones prefixed by >>):
>> mm/vmscan.c:3108:1: warning: 'inline' is not at beginning of declaration [-Wold-style-declaration]
3108 | static void inline lru_gen_inc_bulk_finish(struct lru_gen_folio *lrugen,
| ^~~~~~
mm/vmscan.c:3127:1: warning: 'inline' is not at beginning of declaration [-Wold-style-declaration]
3127 | static void inline lru_gen_try_inc_bulk(struct lru_gen_folio *lrugen, struct folio *folio,
| ^~~~~~
vim +/inline +3108 mm/vmscan.c
3107
> 3108 static void inline lru_gen_inc_bulk_finish(struct lru_gen_folio *lrugen,
3109 int bulk_gen, bool type, int zone,
3110 struct gen_update_batch *batch)
3111 {
3112 if (!batch->head)
3113 return;
3114
3115 list_bulk_move_tail(&lrugen->folios[bulk_gen][type][zone],
3116 &batch->head->lru,
3117 &batch->tail->lru);
3118
3119 batch->head = NULL;
3120 }
3121
On Fri, Dec 22, 2023 at 3:24 AM Kairui Song <ryncsn@gmail.com> wrote:
>
> From: Kairui Song <kasong@tencent.com>
>
> Another overhead of aging is page moving. Actually, in most cases,
> pages are being moved to the same gen after folio_inc_gen is called,
> especially the protected pages. So it's better to move them in bulk.
>
> This also has a good effect on LRU ordering. Currently when MGLRU
> ages, it walks the LRU backward, and the protected pages are moved to
> the tail of newer gen one by one, which reverses the order of pages in
> LRU. Moving them in batches can help keep their order, only in a small
> scope though due to the scan limit of MAX_LRU_BATCH pages.
>
> After this commit, we can see a performance gain:
>
> Tested in a 4G memcg on a EPYC 7K62 with:
>
> memcached -u nobody -m 16384 -s /tmp/memcached.socket \
> -a 0766 -t 16 -B binary &
>
> memtier_benchmark -S /tmp/memcached.socket \
> -P memcache_binary -n allkeys \
> --key-minimum=1 --key-maximum=16000000 -d 1024 \
> --ratio=1:0 --key-pattern=P:P -c 2 -t 16 --pipeline 8 -x 6
>
> Average result of 18 test runs:
>
> Before: 44017.78 Ops/sec
> After patch 1-2: 44810.01 Ops/sec (+1.8%)
Was it tested with CONFIG_DEBUG_LIST=y?
Also, the (44810.01-44687.08)/44687.08=0.0027 improvement also sounded
like a noise to me.
> Signed-off-by: Kairui Song <kasong@tencent.com>
> ---
> mm/vmscan.c | 84 ++++++++++++++++++++++++++++++++++++++++++++---------
> 1 file changed, 71 insertions(+), 13 deletions(-)
>
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index e3b4797b9729..af1266129c1b 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -3102,9 +3102,46 @@ static int folio_update_gen(struct folio *folio, int gen)
> */
> struct gen_update_batch {
> int delta[MAX_NR_GENS];
> + struct folio *head, *tail;
> };
>
> -static void lru_gen_update_batch(struct lruvec *lruvec, bool type, int zone,
> +static void inline lru_gen_inc_bulk_finish(struct lru_gen_folio *lrugen,
> + int bulk_gen, bool type, int zone,
> + struct gen_update_batch *batch)
> +{
> + if (!batch->head)
> + return;
> +
> + list_bulk_move_tail(&lrugen->folios[bulk_gen][type][zone],
> + &batch->head->lru,
> + &batch->tail->lru);
> +
> + batch->head = NULL;
> +}
> +
> +/*
> + * When aging, protected pages will go to the tail of the same higher
> + * gen, so the can be moved in batches. Besides reduced overhead, this
> + * also avoids changing their LRU order in a small scope.
> + */
> +static void inline lru_gen_try_inc_bulk(struct lru_gen_folio *lrugen, struct folio *folio,
> + int bulk_gen, int gen, bool type, int zone,
> + struct gen_update_batch *batch)
> +{
> + /*
> + * If folio not moving to the bulk_gen, it's raced with promotion
> + * so it need to go to the head of another LRU.
> + */
> + if (bulk_gen != gen)
> + list_move(&folio->lru, &lrugen->folios[gen][type][zone]);
> +
> + if (!batch->head)
> + batch->tail = folio;
> +
> + batch->head = folio;
> +}
> +
> +static void lru_gen_update_batch(struct lruvec *lruvec, int bulk_gen, bool type, int zone,
> struct gen_update_batch *batch)
> {
> int gen;
> @@ -3112,6 +3149,8 @@ static void lru_gen_update_batch(struct lruvec *lruvec, bool type, int zone,
> struct lru_gen_folio *lrugen = &lruvec->lrugen;
> enum lru_list lru = type ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON;
>
> + lru_gen_inc_bulk_finish(lrugen, bulk_gen, type, zone, batch);
> +
> for (gen = 0; gen < MAX_NR_GENS; gen++) {
> int delta = batch->delta[gen];
>
> @@ -3705,6 +3744,7 @@ static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap)
> struct gen_update_batch batch = { };
> struct lru_gen_folio *lrugen = &lruvec->lrugen;
> int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]);
> + int bulk_gen = (old_gen + 1) % MAX_NR_GENS;
>
> if (type == LRU_GEN_ANON && !can_swap)
> goto done;
> @@ -3712,24 +3752,33 @@ static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap)
> /* prevent cold/hot inversion if force_scan is true */
> for (zone = 0; zone < MAX_NR_ZONES; zone++) {
> struct list_head *head = &lrugen->folios[old_gen][type][zone];
> + struct folio *prev = NULL;
>
> - while (!list_empty(head)) {
> - struct folio *folio = lru_to_folio(head);
> + if (!list_empty(head))
> + prev = lru_to_folio(head);
>
> + while (prev) {
> + struct folio *folio = prev;
> VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio);
> VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
> VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type, folio);
> VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone, folio);
>
> + if (unlikely(list_is_first(&folio->lru, head)))
> + prev = NULL;
> + else
> + prev = lru_to_folio(&folio->lru);
> +
> new_gen = folio_inc_gen(lruvec, folio, false, &batch);
> - list_move_tail(&folio->lru, &lrugen->folios[new_gen][type][zone]);
> + lru_gen_try_inc_bulk(lrugen, folio, bulk_gen, new_gen, type, zone, &batch);
>
> if (!--remaining) {
> - lru_gen_update_batch(lruvec, type, zone, &batch);
> + lru_gen_update_batch(lruvec, bulk_gen, type, zone, &batch);
> return false;
> }
> }
> - lru_gen_update_batch(lruvec, type, zone, &batch);
> +
> + lru_gen_update_batch(lruvec, bulk_gen, type, zone, &batch);
> }
> done:
> reset_ctrl_pos(lruvec, type, true);
> @@ -4240,7 +4289,7 @@ static int lru_gen_memcg_seg(struct lruvec *lruvec)
> ******************************************************************************/
>
> static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_control *sc,
> - int tier_idx, struct gen_update_batch *batch)
> + int tier_idx, int bulk_gen, struct gen_update_batch *batch)
> {
> bool success;
> int gen = folio_lru_gen(folio);
> @@ -4283,7 +4332,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c
> int hist = lru_hist_from_seq(lrugen->min_seq[type]);
>
> gen = folio_inc_gen(lruvec, folio, false, batch);
> - list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]);
> + lru_gen_try_inc_bulk(lrugen, folio, bulk_gen, gen, type, zone, batch);
>
> WRITE_ONCE(lrugen->protected[hist][type][tier - 1],
> lrugen->protected[hist][type][tier - 1] + delta);
> @@ -4293,7 +4342,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c
> /* ineligible */
> if (zone > sc->reclaim_idx || skip_cma(folio, sc)) {
> gen = folio_inc_gen(lruvec, folio, false, batch);
> - list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]);
> + lru_gen_try_inc_bulk(lrugen, folio, bulk_gen, gen, type, zone, batch);
> return true;
> }
>
> @@ -4367,11 +4416,16 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
> LIST_HEAD(moved);
> int skipped_zone = 0;
> struct gen_update_batch batch = { };
> + int bulk_gen = (gen + 1) % MAX_NR_GENS;
> int zone = (sc->reclaim_idx + i) % MAX_NR_ZONES;
> struct list_head *head = &lrugen->folios[gen][type][zone];
> + struct folio *prev = NULL;
>
> - while (!list_empty(head)) {
> - struct folio *folio = lru_to_folio(head);
> + if (!list_empty(head))
> + prev = lru_to_folio(head);
> +
> + while (prev) {
> + struct folio *folio = prev;
> int delta = folio_nr_pages(folio);
>
> VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio);
> @@ -4380,8 +4434,12 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
> VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone, folio);
>
> scanned += delta;
> + if (unlikely(list_is_first(&folio->lru, head)))
> + prev = NULL;
> + else
> + prev = lru_to_folio(&folio->lru);
>
> - if (sort_folio(lruvec, folio, sc, tier, &batch))
> + if (sort_folio(lruvec, folio, sc, tier, bulk_gen, &batch))
> sorted += delta;
> else if (isolate_folio(lruvec, folio, sc)) {
> list_add(&folio->lru, list);
> @@ -4401,7 +4459,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
> skipped += skipped_zone;
> }
>
> - lru_gen_update_batch(lruvec, type, zone, &batch);
> + lru_gen_update_batch(lruvec, bulk_gen, type, zone, &batch);
>
> if (!remaining || isolated >= MIN_LRU_BATCH)
> break;
> --
> 2.43.0
>
Yu Zhao <yuzhao@google.com> 于2023年12月25日周一 14:58写道:
>
> On Fri, Dec 22, 2023 at 3:24 AM Kairui Song <ryncsn@gmail.com> wrote:
> >
> > From: Kairui Song <kasong@tencent.com>
> >
> > Another overhead of aging is page moving. Actually, in most cases,
> > pages are being moved to the same gen after folio_inc_gen is called,
> > especially the protected pages. So it's better to move them in bulk.
> >
> > This also has a good effect on LRU ordering. Currently when MGLRU
> > ages, it walks the LRU backward, and the protected pages are moved to
> > the tail of newer gen one by one, which reverses the order of pages in
> > LRU. Moving them in batches can help keep their order, only in a small
> > scope though due to the scan limit of MAX_LRU_BATCH pages.
> >
> > After this commit, we can see a performance gain:
> >
> > Tested in a 4G memcg on a EPYC 7K62 with:
> >
> > memcached -u nobody -m 16384 -s /tmp/memcached.socket \
> > -a 0766 -t 16 -B binary &
> >
> > memtier_benchmark -S /tmp/memcached.socket \
> > -P memcache_binary -n allkeys \
> > --key-minimum=1 --key-maximum=16000000 -d 1024 \
> > --ratio=1:0 --key-pattern=P:P -c 2 -t 16 --pipeline 8 -x 6
> >
> > Average result of 18 test runs:
> >
> > Before: 44017.78 Ops/sec
> > After patch 1-2: 44810.01 Ops/sec (+1.8%)
>
> Was it tested with CONFIG_DEBUG_LIST=y?
>
Hi, CONFIG_DEBUG_LIST is disabled here.
> Also, the (44810.01-44687.08)/44687.08=0.0027 improvement also sounded
> like a noise to me.
>
@@ -3102,9 +3102,46 @@ static int folio_update_gen(struct folio *folio, int gen)
*/
struct gen_update_batch {
int delta[MAX_NR_GENS];
+ struct folio *head, *tail;
};
-static void lru_gen_update_batch(struct lruvec *lruvec, bool type, int zone,
+static void inline lru_gen_inc_bulk_finish(struct lru_gen_folio *lrugen,
+ int bulk_gen, bool type, int zone,
+ struct gen_update_batch *batch)
+{
+ if (!batch->head)
+ return;
+
+ list_bulk_move_tail(&lrugen->folios[bulk_gen][type][zone],
+ &batch->head->lru,
+ &batch->tail->lru);
+
+ batch->head = NULL;
+}
+
+/*
+ * When aging, protected pages will go to the tail of the same higher
+ * gen, so the can be moved in batches. Besides reduced overhead, this
+ * also avoids changing their LRU order in a small scope.
+ */
+static void inline lru_gen_try_inc_bulk(struct lru_gen_folio *lrugen, struct folio *folio,
+ int bulk_gen, int gen, bool type, int zone,
+ struct gen_update_batch *batch)
+{
+ /*
+ * If folio not moving to the bulk_gen, it's raced with promotion
+ * so it need to go to the head of another LRU.
+ */
+ if (bulk_gen != gen)
+ list_move(&folio->lru, &lrugen->folios[gen][type][zone]);
+
+ if (!batch->head)
+ batch->tail = folio;
+
+ batch->head = folio;
+}
+
+static void lru_gen_update_batch(struct lruvec *lruvec, int bulk_gen, bool type, int zone,
struct gen_update_batch *batch)
{
int gen;
@@ -3112,6 +3149,8 @@ static void lru_gen_update_batch(struct lruvec *lruvec, bool type, int zone,
struct lru_gen_folio *lrugen = &lruvec->lrugen;
enum lru_list lru = type ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON;
+ lru_gen_inc_bulk_finish(lrugen, bulk_gen, type, zone, batch);
+
for (gen = 0; gen < MAX_NR_GENS; gen++) {
int delta = batch->delta[gen];
@@ -3705,6 +3744,7 @@ static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap)
struct gen_update_batch batch = { };
struct lru_gen_folio *lrugen = &lruvec->lrugen;
int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]);
+ int bulk_gen = (old_gen + 1) % MAX_NR_GENS;
if (type == LRU_GEN_ANON && !can_swap)
goto done;
@@ -3712,24 +3752,33 @@ static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap)
/* prevent cold/hot inversion if force_scan is true */
for (zone = 0; zone < MAX_NR_ZONES; zone++) {
struct list_head *head = &lrugen->folios[old_gen][type][zone];
+ struct folio *prev = NULL;
- while (!list_empty(head)) {
- struct folio *folio = lru_to_folio(head);
+ if (!list_empty(head))
+ prev = lru_to_folio(head);
+ while (prev) {
+ struct folio *folio = prev;
VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio);
VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type, folio);
VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone, folio);
+ if (unlikely(list_is_first(&folio->lru, head)))
+ prev = NULL;
+ else
+ prev = lru_to_folio(&folio->lru);
+
new_gen = folio_inc_gen(lruvec, folio, false, &batch);
- list_move_tail(&folio->lru, &lrugen->folios[new_gen][type][zone]);
+ lru_gen_try_inc_bulk(lrugen, folio, bulk_gen, new_gen, type, zone, &batch);
if (!--remaining) {
- lru_gen_update_batch(lruvec, type, zone, &batch);
+ lru_gen_update_batch(lruvec, bulk_gen, type, zone, &batch);
return false;
}
}
- lru_gen_update_batch(lruvec, type, zone, &batch);
+
+ lru_gen_update_batch(lruvec, bulk_gen, type, zone, &batch);
}
done:
reset_ctrl_pos(lruvec, type, true);
@@ -4240,7 +4289,7 @@ static int lru_gen_memcg_seg(struct lruvec *lruvec)
******************************************************************************/
static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_control *sc,
- int tier_idx, struct gen_update_batch *batch)
+ int tier_idx, int bulk_gen, struct gen_update_batch *batch)
{
bool success;
int gen = folio_lru_gen(folio);
@@ -4283,7 +4332,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c
int hist = lru_hist_from_seq(lrugen->min_seq[type]);
gen = folio_inc_gen(lruvec, folio, false, batch);
- list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]);
+ lru_gen_try_inc_bulk(lrugen, folio, bulk_gen, gen, type, zone, batch);
WRITE_ONCE(lrugen->protected[hist][type][tier - 1],
lrugen->protected[hist][type][tier - 1] + delta);
@@ -4293,7 +4342,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c
/* ineligible */
if (zone > sc->reclaim_idx || skip_cma(folio, sc)) {
gen = folio_inc_gen(lruvec, folio, false, batch);
- list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]);
+ lru_gen_try_inc_bulk(lrugen, folio, bulk_gen, gen, type, zone, batch);
return true;
}
@@ -4367,11 +4416,16 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
LIST_HEAD(moved);
int skipped_zone = 0;
struct gen_update_batch batch = { };
+ int bulk_gen = (gen + 1) % MAX_NR_GENS;
int zone = (sc->reclaim_idx + i) % MAX_NR_ZONES;
struct list_head *head = &lrugen->folios[gen][type][zone];
+ struct folio *prev = NULL;
- while (!list_empty(head)) {
- struct folio *folio = lru_to_folio(head);
+ if (!list_empty(head))
+ prev = lru_to_folio(head);
+
+ while (prev) {
+ struct folio *folio = prev;
int delta = folio_nr_pages(folio);
VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio);
@@ -4380,8 +4434,12 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone, folio);
scanned += delta;
+ if (unlikely(list_is_first(&folio->lru, head)))
+ prev = NULL;
+ else
+ prev = lru_to_folio(&folio->lru);
- if (sort_folio(lruvec, folio, sc, tier, &batch))
+ if (sort_folio(lruvec, folio, sc, tier, bulk_gen, &batch))
sorted += delta;
else if (isolate_folio(lruvec, folio, sc)) {
list_add(&folio->lru, list);
@@ -4401,7 +4459,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
skipped += skipped_zone;
}
- lru_gen_update_batch(lruvec, type, zone, &batch);
+ lru_gen_update_batch(lruvec, bulk_gen, type, zone, &batch);
if (!remaining || isolated >= MIN_LRU_BATCH)
break;