dm-crypt: allocate compound pages if possible
Commit Message
It was reported that allocating pages for the write buffer in dm-crypt
causes measurable overhead [1].
This patch changes dm-crypt to allocate compound pages if they are
available. If not, we fall back to the mempool.
[1] https://listman.redhat.com/archives/dm-devel/2023-February/053284.html
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
---
drivers/md/dm-crypt.c | 50 ++++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 42 insertions(+), 8 deletions(-)
Comments
On Thu, Feb 16, 2023 at 12:47:08PM -0500, Mikulas Patocka wrote:
> + while (order > 0) {
> + page = alloc_pages(gfp_mask
> + | __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN, order);
... | __GFP_COMP
> page = mempool_alloc(&cc->page_pool, gfp_mask);
> if (!page) {
> crypt_free_buffer_pages(cc, clone);
> bio_put(clone);
> gfp_mask |= __GFP_DIRECT_RECLAIM;
> + order = 0;
> goto retry;
> }
>
> - len = (remaining_size > PAGE_SIZE) ? PAGE_SIZE : remaining_size;
> -
> - bio_add_page(clone, page, len, 0);
> +have_pages:
> + page->compound_order = order;
No. You'll corrupt the next page if page is order-0, which it is if it
came from the mempool. Also we've deleted page->compound_order in -next
so you can't make this mistake. Using __GFP_COMP will set this field
for you, so you can just drop this line.
> - remaining_size -= len;
> + for (o = 0; o < 1U << order; o++) {
> + unsigned len = min((unsigned)PAGE_SIZE, remaining_size);
> + bio_add_page(clone, page, len, 0);
> + remaining_size -= len;
> + page++;
You can add multiple pages at once, whether they're compound or not. So
replace this entire loop with:
bio_add_page(clone, page, remaining_size, 0);
> @@ -1711,10 +1732,23 @@ static void crypt_free_buffer_pages(stru
> {
> struct bio_vec *bv;
> struct bvec_iter_all iter_all;
> + unsigned skip_entries = 0;
>
> bio_for_each_segment_all(bv, clone, iter_all) {
> - BUG_ON(!bv->bv_page);
> - mempool_free(bv->bv_page, &cc->page_pool);
> + unsigned order;
> + struct page *page = bv->bv_page;
> + BUG_ON(!page);
> + if (skip_entries) {
> + skip_entries--;
> + continue;
> + }
> + order = page->compound_order;
> + if (order) {
> + __free_pages(page, order);
> + skip_entries = (1U << order) - 1;
> + } else {
> + mempool_free(page, &cc->page_pool);
> + }
You can simplify this by using the folio code.
struct folio_iter fi;
bio_for_each_folio_all(fi, bio) {
if (folio_test_large(folio))
folio_put(folio);
else
mempool_free(&folio->page, &cc->page_pool);
}
(further work would actually convert this driver to use folios instead
of pages)
On Thu, 16 Feb 2023, Matthew Wilcox wrote:
> > - len = (remaining_size > PAGE_SIZE) ? PAGE_SIZE : remaining_size;
> > -
> > - bio_add_page(clone, page, len, 0);
> > +have_pages:
> > + page->compound_order = order;
>
> No. You'll corrupt the next page if page is order-0, which it is if it
> came from the mempool. Also we've deleted page->compound_order in -next
> so you can't make this mistake. Using __GFP_COMP will set this field
> for you, so you can just drop this line.
OK
> > - remaining_size -= len;
> > + for (o = 0; o < 1U << order; o++) {
> > + unsigned len = min((unsigned)PAGE_SIZE, remaining_size);
> > + bio_add_page(clone, page, len, 0);
> > + remaining_size -= len;
> > + page++;
>
> You can add multiple pages at once, whether they're compound or not. So
> replace this entire loop with:
>
> bio_add_page(clone, page, remaining_size, 0);
This should be min((unsigned)PAGE_SIZE << order, remaining_size), because
we may allocate less than remaining_size.
> > @@ -1711,10 +1732,23 @@ static void crypt_free_buffer_pages(stru
> > {
> > struct bio_vec *bv;
> > struct bvec_iter_all iter_all;
> > + unsigned skip_entries = 0;
> >
> > bio_for_each_segment_all(bv, clone, iter_all) {
> > - BUG_ON(!bv->bv_page);
> > - mempool_free(bv->bv_page, &cc->page_pool);
> > + unsigned order;
> > + struct page *page = bv->bv_page;
> > + BUG_ON(!page);
> > + if (skip_entries) {
> > + skip_entries--;
> > + continue;
> > + }
> > + order = page->compound_order;
> > + if (order) {
> > + __free_pages(page, order);
> > + skip_entries = (1U << order) - 1;
> > + } else {
> > + mempool_free(page, &cc->page_pool);
> > + }
>
> You can simplify this by using the folio code.
>
> struct folio_iter fi;
>
> bio_for_each_folio_all(fi, bio) {
> if (folio_test_large(folio))
> folio_put(folio);
> else
> mempool_free(&folio->page, &cc->page_pool);
> }
OK. I'm sending version 2 of the patch.
> (further work would actually convert this driver to use folios instead
> of pages)
Mikulas
===================================================================
@@ -1657,6 +1657,9 @@ static void crypt_free_buffer_pages(stru
* In order to not degrade performance with excessive locking, we try
* non-blocking allocations without a mutex first but on failure we fallback
* to blocking allocations with a mutex.
+ *
+ * In order to reduce allocation overhead, we try to allocate compound pages in
+ * the first pass. If they are not available, we fall back to the mempool.
*/
static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size)
{
@@ -1664,8 +1667,9 @@ static struct bio *crypt_alloc_buffer(st
struct bio *clone;
unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
gfp_t gfp_mask = GFP_NOWAIT | __GFP_HIGHMEM;
- unsigned i, len, remaining_size;
+ unsigned remaining_size;
struct page *page;
+ unsigned order = MAX_ORDER - 1;
retry:
if (unlikely(gfp_mask & __GFP_DIRECT_RECLAIM))
@@ -1678,20 +1682,37 @@ retry:
remaining_size = size;
- for (i = 0; i < nr_iovecs; i++) {
+ while (remaining_size) {
+ unsigned o;
+ unsigned remaining_order = __fls((remaining_size + PAGE_SIZE - 1) >> PAGE_SHIFT);
+ order = min(order, remaining_order);
+
+ while (order > 0) {
+ page = alloc_pages(gfp_mask
+ | __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN, order);
+ if (likely(page != NULL))
+ goto have_pages;
+ order--;
+ }
+
page = mempool_alloc(&cc->page_pool, gfp_mask);
if (!page) {
crypt_free_buffer_pages(cc, clone);
bio_put(clone);
gfp_mask |= __GFP_DIRECT_RECLAIM;
+ order = 0;
goto retry;
}
- len = (remaining_size > PAGE_SIZE) ? PAGE_SIZE : remaining_size;
-
- bio_add_page(clone, page, len, 0);
+have_pages:
+ page->compound_order = order;
- remaining_size -= len;
+ for (o = 0; o < 1U << order; o++) {
+ unsigned len = min((unsigned)PAGE_SIZE, remaining_size);
+ bio_add_page(clone, page, len, 0);
+ remaining_size -= len;
+ page++;
+ }
}
/* Allocate space for integrity tags */
@@ -1711,10 +1732,23 @@ static void crypt_free_buffer_pages(stru
{
struct bio_vec *bv;
struct bvec_iter_all iter_all;
+ unsigned skip_entries = 0;
bio_for_each_segment_all(bv, clone, iter_all) {
- BUG_ON(!bv->bv_page);
- mempool_free(bv->bv_page, &cc->page_pool);
+ unsigned order;
+ struct page *page = bv->bv_page;
+ BUG_ON(!page);
+ if (skip_entries) {
+ skip_entries--;
+ continue;
+ }
+ order = page->compound_order;
+ if (order) {
+ __free_pages(page, order);
+ skip_entries = (1U << order) - 1;
+ } else {
+ mempool_free(page, &cc->page_pool);
+ }
}
}