[v2,1/4] mm: kmsan: handle alloc failures in kmsan_vmap_pages_range_noflush()
Commit Message
As reported by Dipanjan Das, when KMSAN is used together with kernel
fault injection (or, generally, even without the latter), calls to
kcalloc() or __vmap_pages_range_noflush() may fail, leaving the
metadata mappings for the virtual mapping in an inconsistent state.
When these metadata mappings are accessed later, the kernel crashes.
To address the problem, we return a non-zero error code from
kmsan_vmap_pages_range_noflush() in the case of any allocation/mapping
failure inside it, and make vmap_pages_range_noflush() return an error
if KMSAN fails to allocate the metadata.
This patch also removes KMSAN_WARN_ON() from vmap_pages_range_noflush(),
as these allocation failures are not fatal anymore.
Reported-by: Dipanjan Das <mail.dipanjan.das@gmail.com>
Link: https://lore.kernel.org/linux-mm/CANX2M5ZRrRA64k0hOif02TjmY9kbbO2aCBPyq79es34RXZ=cAw@mail.gmail.com/
Fixes: b073d7f8aee4 ("mm: kmsan: maintain KMSAN metadata for page operations")
Signed-off-by: Alexander Potapenko <glider@google.com>
---
v2:
-- return 0 from the inline version of kmsan_vmap_pages_range_noflush()
(spotted by kernel test robot <lkp@intel.com>)
---
include/linux/kmsan.h | 20 +++++++++++---------
mm/kmsan/shadow.c | 27 ++++++++++++++++++---------
mm/vmalloc.c | 6 +++++-
3 files changed, 34 insertions(+), 19 deletions(-)
Comments
On Thu, 13 Apr 2023 at 15:12, Alexander Potapenko <glider@google.com> wrote:
>
> As reported by Dipanjan Das, when KMSAN is used together with kernel
> fault injection (or, generally, even without the latter), calls to
> kcalloc() or __vmap_pages_range_noflush() may fail, leaving the
> metadata mappings for the virtual mapping in an inconsistent state.
> When these metadata mappings are accessed later, the kernel crashes.
>
> To address the problem, we return a non-zero error code from
> kmsan_vmap_pages_range_noflush() in the case of any allocation/mapping
> failure inside it, and make vmap_pages_range_noflush() return an error
> if KMSAN fails to allocate the metadata.
>
> This patch also removes KMSAN_WARN_ON() from vmap_pages_range_noflush(),
> as these allocation failures are not fatal anymore.
>
> Reported-by: Dipanjan Das <mail.dipanjan.das@gmail.com>
> Link: https://lore.kernel.org/linux-mm/CANX2M5ZRrRA64k0hOif02TjmY9kbbO2aCBPyq79es34RXZ=cAw@mail.gmail.com/
> Fixes: b073d7f8aee4 ("mm: kmsan: maintain KMSAN metadata for page operations")
> Signed-off-by: Alexander Potapenko <glider@google.com>
Reviewed-by: Marco Elver <elver@google.com>
Looks reasonable, thanks.
> ---
> v2:
> -- return 0 from the inline version of kmsan_vmap_pages_range_noflush()
> (spotted by kernel test robot <lkp@intel.com>)
> ---
> include/linux/kmsan.h | 20 +++++++++++---------
> mm/kmsan/shadow.c | 27 ++++++++++++++++++---------
> mm/vmalloc.c | 6 +++++-
> 3 files changed, 34 insertions(+), 19 deletions(-)
>
> diff --git a/include/linux/kmsan.h b/include/linux/kmsan.h
> index e38ae3c346184..c7ff3aefc5a13 100644
> --- a/include/linux/kmsan.h
> +++ b/include/linux/kmsan.h
> @@ -134,11 +134,12 @@ void kmsan_kfree_large(const void *ptr);
> * @page_shift: page_shift passed to vmap_range_noflush().
> *
> * KMSAN maps shadow and origin pages of @pages into contiguous ranges in
> - * vmalloc metadata address range.
> + * vmalloc metadata address range. Returns 0 on success, callers must check
> + * for non-zero return value.
> */
> -void kmsan_vmap_pages_range_noflush(unsigned long start, unsigned long end,
> - pgprot_t prot, struct page **pages,
> - unsigned int page_shift);
> +int kmsan_vmap_pages_range_noflush(unsigned long start, unsigned long end,
> + pgprot_t prot, struct page **pages,
> + unsigned int page_shift);
>
> /**
> * kmsan_vunmap_kernel_range_noflush() - Notify KMSAN about a vunmap.
> @@ -281,12 +282,13 @@ static inline void kmsan_kfree_large(const void *ptr)
> {
> }
>
> -static inline void kmsan_vmap_pages_range_noflush(unsigned long start,
> - unsigned long end,
> - pgprot_t prot,
> - struct page **pages,
> - unsigned int page_shift)
> +static inline int kmsan_vmap_pages_range_noflush(unsigned long start,
> + unsigned long end,
> + pgprot_t prot,
> + struct page **pages,
> + unsigned int page_shift)
> {
> + return 0;
> }
>
> static inline void kmsan_vunmap_range_noflush(unsigned long start,
> diff --git a/mm/kmsan/shadow.c b/mm/kmsan/shadow.c
> index a787c04e9583c..b8bb95eea5e3d 100644
> --- a/mm/kmsan/shadow.c
> +++ b/mm/kmsan/shadow.c
> @@ -216,27 +216,29 @@ void kmsan_free_page(struct page *page, unsigned int order)
> kmsan_leave_runtime();
> }
>
> -void kmsan_vmap_pages_range_noflush(unsigned long start, unsigned long end,
> - pgprot_t prot, struct page **pages,
> - unsigned int page_shift)
> +int kmsan_vmap_pages_range_noflush(unsigned long start, unsigned long end,
> + pgprot_t prot, struct page **pages,
> + unsigned int page_shift)
> {
> unsigned long shadow_start, origin_start, shadow_end, origin_end;
> struct page **s_pages, **o_pages;
> - int nr, mapped;
> + int nr, mapped, err = 0;
>
> if (!kmsan_enabled)
> - return;
> + return 0;
>
> shadow_start = vmalloc_meta((void *)start, KMSAN_META_SHADOW);
> shadow_end = vmalloc_meta((void *)end, KMSAN_META_SHADOW);
> if (!shadow_start)
> - return;
> + return 0;
>
> nr = (end - start) / PAGE_SIZE;
> s_pages = kcalloc(nr, sizeof(*s_pages), GFP_KERNEL);
> o_pages = kcalloc(nr, sizeof(*o_pages), GFP_KERNEL);
> - if (!s_pages || !o_pages)
> + if (!s_pages || !o_pages) {
> + err = -ENOMEM;
> goto ret;
> + }
> for (int i = 0; i < nr; i++) {
> s_pages[i] = shadow_page_for(pages[i]);
> o_pages[i] = origin_page_for(pages[i]);
> @@ -249,10 +251,16 @@ void kmsan_vmap_pages_range_noflush(unsigned long start, unsigned long end,
> kmsan_enter_runtime();
> mapped = __vmap_pages_range_noflush(shadow_start, shadow_end, prot,
> s_pages, page_shift);
> - KMSAN_WARN_ON(mapped);
> + if (mapped) {
> + err = mapped;
> + goto ret;
> + }
> mapped = __vmap_pages_range_noflush(origin_start, origin_end, prot,
> o_pages, page_shift);
> - KMSAN_WARN_ON(mapped);
> + if (mapped) {
> + err = mapped;
> + goto ret;
> + }
> kmsan_leave_runtime();
> flush_tlb_kernel_range(shadow_start, shadow_end);
> flush_tlb_kernel_range(origin_start, origin_end);
> @@ -262,6 +270,7 @@ void kmsan_vmap_pages_range_noflush(unsigned long start, unsigned long end,
> ret:
> kfree(s_pages);
> kfree(o_pages);
> + return err;
> }
>
> /* Allocate metadata for pages allocated at boot time. */
> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> index a50072066221a..1355d95cce1ca 100644
> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -605,7 +605,11 @@ int __vmap_pages_range_noflush(unsigned long addr, unsigned long end,
> int vmap_pages_range_noflush(unsigned long addr, unsigned long end,
> pgprot_t prot, struct page **pages, unsigned int page_shift)
> {
> - kmsan_vmap_pages_range_noflush(addr, end, prot, pages, page_shift);
> + int ret = kmsan_vmap_pages_range_noflush(addr, end, prot, pages,
> + page_shift);
> +
> + if (ret)
> + return ret;
> return __vmap_pages_range_noflush(addr, end, prot, pages, page_shift);
> }
>
> --
> 2.40.0.577.gac1e443424-goog
>
@@ -134,11 +134,12 @@ void kmsan_kfree_large(const void *ptr);
* @page_shift: page_shift passed to vmap_range_noflush().
*
* KMSAN maps shadow and origin pages of @pages into contiguous ranges in
- * vmalloc metadata address range.
+ * vmalloc metadata address range. Returns 0 on success, callers must check
+ * for non-zero return value.
*/
-void kmsan_vmap_pages_range_noflush(unsigned long start, unsigned long end,
- pgprot_t prot, struct page **pages,
- unsigned int page_shift);
+int kmsan_vmap_pages_range_noflush(unsigned long start, unsigned long end,
+ pgprot_t prot, struct page **pages,
+ unsigned int page_shift);
/**
* kmsan_vunmap_kernel_range_noflush() - Notify KMSAN about a vunmap.
@@ -281,12 +282,13 @@ static inline void kmsan_kfree_large(const void *ptr)
{
}
-static inline void kmsan_vmap_pages_range_noflush(unsigned long start,
- unsigned long end,
- pgprot_t prot,
- struct page **pages,
- unsigned int page_shift)
+static inline int kmsan_vmap_pages_range_noflush(unsigned long start,
+ unsigned long end,
+ pgprot_t prot,
+ struct page **pages,
+ unsigned int page_shift)
{
+ return 0;
}
static inline void kmsan_vunmap_range_noflush(unsigned long start,
@@ -216,27 +216,29 @@ void kmsan_free_page(struct page *page, unsigned int order)
kmsan_leave_runtime();
}
-void kmsan_vmap_pages_range_noflush(unsigned long start, unsigned long end,
- pgprot_t prot, struct page **pages,
- unsigned int page_shift)
+int kmsan_vmap_pages_range_noflush(unsigned long start, unsigned long end,
+ pgprot_t prot, struct page **pages,
+ unsigned int page_shift)
{
unsigned long shadow_start, origin_start, shadow_end, origin_end;
struct page **s_pages, **o_pages;
- int nr, mapped;
+ int nr, mapped, err = 0;
if (!kmsan_enabled)
- return;
+ return 0;
shadow_start = vmalloc_meta((void *)start, KMSAN_META_SHADOW);
shadow_end = vmalloc_meta((void *)end, KMSAN_META_SHADOW);
if (!shadow_start)
- return;
+ return 0;
nr = (end - start) / PAGE_SIZE;
s_pages = kcalloc(nr, sizeof(*s_pages), GFP_KERNEL);
o_pages = kcalloc(nr, sizeof(*o_pages), GFP_KERNEL);
- if (!s_pages || !o_pages)
+ if (!s_pages || !o_pages) {
+ err = -ENOMEM;
goto ret;
+ }
for (int i = 0; i < nr; i++) {
s_pages[i] = shadow_page_for(pages[i]);
o_pages[i] = origin_page_for(pages[i]);
@@ -249,10 +251,16 @@ void kmsan_vmap_pages_range_noflush(unsigned long start, unsigned long end,
kmsan_enter_runtime();
mapped = __vmap_pages_range_noflush(shadow_start, shadow_end, prot,
s_pages, page_shift);
- KMSAN_WARN_ON(mapped);
+ if (mapped) {
+ err = mapped;
+ goto ret;
+ }
mapped = __vmap_pages_range_noflush(origin_start, origin_end, prot,
o_pages, page_shift);
- KMSAN_WARN_ON(mapped);
+ if (mapped) {
+ err = mapped;
+ goto ret;
+ }
kmsan_leave_runtime();
flush_tlb_kernel_range(shadow_start, shadow_end);
flush_tlb_kernel_range(origin_start, origin_end);
@@ -262,6 +270,7 @@ void kmsan_vmap_pages_range_noflush(unsigned long start, unsigned long end,
ret:
kfree(s_pages);
kfree(o_pages);
+ return err;
}
/* Allocate metadata for pages allocated at boot time. */
@@ -605,7 +605,11 @@ int __vmap_pages_range_noflush(unsigned long addr, unsigned long end,
int vmap_pages_range_noflush(unsigned long addr, unsigned long end,
pgprot_t prot, struct page **pages, unsigned int page_shift)
{
- kmsan_vmap_pages_range_noflush(addr, end, prot, pages, page_shift);
+ int ret = kmsan_vmap_pages_range_noflush(addr, end, prot, pages,
+ page_shift);
+
+ if (ret)
+ return ret;
return __vmap_pages_range_noflush(addr, end, prot, pages, page_shift);
}