[v7,1/3] mm/slub: only zero requested size of buffer for kzalloc when debug enabled

Message ID 20221021032405.1825078-2-feng.tang@intel.com
State New
Headers
Series mm/slub: extend redzone check for kmalloc objects |

Commit Message

Feng Tang Oct. 21, 2022, 3:24 a.m. UTC
  kzalloc/kmalloc will round up the request size to a fixed size
(mostly power of 2), so the allocated memory could be more than
requested. Currently kzalloc family APIs will zero all the
allocated memory.

To detect out-of-bound usage of the extra allocated memory, only
zero the requested part, so that redzone sanity check could be
added to the extra space later.

For kzalloc users who will call ksize() later and utilize this
extra space, please be aware that the space is not zeroed any
more when debug is enabled. (Thanks to Kees Cook's effort to
sanitize all ksize() user cases [1], this won't be a big issue).

[1]. https://lore.kernel.org/all/20220922031013.2150682-1-keescook@chromium.org/#r
Signed-off-by: Feng Tang <feng.tang@intel.com>
---
 mm/slab.c |  7 ++++---
 mm/slab.h | 18 ++++++++++++++++--
 mm/slub.c | 10 +++++++---
 3 files changed, 27 insertions(+), 8 deletions(-)
  

Comments

Hyeonggon Yoo Oct. 24, 2022, 2 p.m. UTC | #1
On Fri, Oct 21, 2022 at 11:24:03AM +0800, Feng Tang wrote:
> kzalloc/kmalloc will round up the request size to a fixed size
> (mostly power of 2), so the allocated memory could be more than
> requested. Currently kzalloc family APIs will zero all the
> allocated memory.
> 
> To detect out-of-bound usage of the extra allocated memory, only
> zero the requested part, so that redzone sanity check could be
> added to the extra space later.
> 
> For kzalloc users who will call ksize() later and utilize this
> extra space, please be aware that the space is not zeroed any
> more when debug is enabled. (Thanks to Kees Cook's effort to
> sanitize all ksize() user cases [1], this won't be a big issue).
> 
> [1]. https://lore.kernel.org/all/20220922031013.2150682-1-keescook@chromium.org/#r
> Signed-off-by: Feng Tang <feng.tang@intel.com>
> ---
>  mm/slab.c |  7 ++++---
>  mm/slab.h | 18 ++++++++++++++++--
>  mm/slub.c | 10 +++++++---
>  3 files changed, 27 insertions(+), 8 deletions(-)
> 
> diff --git a/mm/slab.c b/mm/slab.c
> index a5486ff8362a..4594de0e3d6b 100644
> --- a/mm/slab.c
> +++ b/mm/slab.c
> @@ -3253,7 +3253,8 @@ slab_alloc_node(struct kmem_cache *cachep, struct list_lru *lru, gfp_t flags,
>  	init = slab_want_init_on_alloc(flags, cachep);
>  
>  out:
> -	slab_post_alloc_hook(cachep, objcg, flags, 1, &objp, init);
> +	slab_post_alloc_hook(cachep, objcg, flags, 1, &objp, init,
> +				cachep->object_size);
>  	return objp;
>  }
>  
> @@ -3506,13 +3507,13 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
>  	 * Done outside of the IRQ disabled section.
>  	 */
>  	slab_post_alloc_hook(s, objcg, flags, size, p,
> -				slab_want_init_on_alloc(flags, s));
> +			slab_want_init_on_alloc(flags, s), s->object_size);
>  	/* FIXME: Trace call missing. Christoph would like a bulk variant */
>  	return size;
>  error:
>  	local_irq_enable();
>  	cache_alloc_debugcheck_after_bulk(s, flags, i, p, _RET_IP_);
> -	slab_post_alloc_hook(s, objcg, flags, i, p, false);
> +	slab_post_alloc_hook(s, objcg, flags, i, p, false, s->object_size);
>  	kmem_cache_free_bulk(s, i, p);
>  	return 0;
>  }
> diff --git a/mm/slab.h b/mm/slab.h
> index 0202a8c2f0d2..8b4ee02fc14a 100644
> --- a/mm/slab.h
> +++ b/mm/slab.h
> @@ -720,12 +720,26 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
>  
>  static inline void slab_post_alloc_hook(struct kmem_cache *s,
>  					struct obj_cgroup *objcg, gfp_t flags,
> -					size_t size, void **p, bool init)
> +					size_t size, void **p, bool init,
> +					unsigned int orig_size)
>  {
> +	unsigned int zero_size = s->object_size;
>  	size_t i;
>  
>  	flags &= gfp_allowed_mask;
>  
> +	/*
> +	 * For kmalloc object, the allocated memory size(object_size) is likely
> +	 * larger than the requested size(orig_size). If redzone check is
> +	 * enabled for the extra space, don't zero it, as it will be redzoned
> +	 * soon. The redzone operation for this extra space could be seen as a
> +	 * replacement of current poisoning under certain debug option, and
> +	 * won't break other sanity checks.
> +	 */
> +	if (kmem_cache_debug_flags(s, SLAB_STORE_USER) &&
> +	    (s->flags & SLAB_KMALLOC))
> +		zero_size = orig_size;
> +
>  	/*
>  	 * As memory initialization might be integrated into KASAN,
>  	 * kasan_slab_alloc and initialization memset must be
> @@ -736,7 +750,7 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s,
>  	for (i = 0; i < size; i++) {
>  		p[i] = kasan_slab_alloc(s, p[i], flags, init);
>  		if (p[i] && init && !kasan_has_integrated_init())
> -			memset(p[i], 0, s->object_size);
> +			memset(p[i], 0, zero_size);
>  		kmemleak_alloc_recursive(p[i], s->object_size, 1,
>  					 s->flags, flags);
>  		kmsan_slab_alloc(s, p[i], flags);
> diff --git a/mm/slub.c b/mm/slub.c
> index 12354fb8d6e4..17292c2d3eee 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -3395,7 +3395,11 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s, struct list_l
>  	init = slab_want_init_on_alloc(gfpflags, s);
>  
>  out:
> -	slab_post_alloc_hook(s, objcg, gfpflags, 1, &object, init);
> +	/*
> +	 * When init equals 'true', like for kzalloc() family, only
> +	 * @orig_size bytes will be zeroed instead of s->object_size
> +	 */
> +	slab_post_alloc_hook(s, objcg, gfpflags, 1, &object, init, orig_size);
>  
>  	return object;
>  }
> @@ -3852,11 +3856,11 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
>  	 * Done outside of the IRQ disabled fastpath loop.
>  	 */
>  	slab_post_alloc_hook(s, objcg, flags, size, p,
> -				slab_want_init_on_alloc(flags, s));
> +			slab_want_init_on_alloc(flags, s), s->object_size);
>  	return i;
>  error:
>  	slub_put_cpu_ptr(s->cpu_slab);
> -	slab_post_alloc_hook(s, objcg, flags, i, p, false);
> +	slab_post_alloc_hook(s, objcg, flags, i, p, false, s->object_size);
>  	kmem_cache_free_bulk(s, i, p);
>  	return 0;
>  }
> -- 
> 2.34.1

Looks good to me.

Acked-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
  
Andrey Konovalov Oct. 27, 2022, 7:27 p.m. UTC | #2
On Fri, Oct 21, 2022 at 5:24 AM Feng Tang <feng.tang@intel.com> wrote:
>
> kzalloc/kmalloc will round up the request size to a fixed size
> (mostly power of 2), so the allocated memory could be more than
> requested. Currently kzalloc family APIs will zero all the
> allocated memory.
>
> To detect out-of-bound usage of the extra allocated memory, only
> zero the requested part, so that redzone sanity check could be
> added to the extra space later.
>
> For kzalloc users who will call ksize() later and utilize this
> extra space, please be aware that the space is not zeroed any
> more when debug is enabled. (Thanks to Kees Cook's effort to
> sanitize all ksize() user cases [1], this won't be a big issue).
>
> [1]. https://lore.kernel.org/all/20220922031013.2150682-1-keescook@chromium.org/#r
> Signed-off-by: Feng Tang <feng.tang@intel.com>
> ---
>  mm/slab.c |  7 ++++---
>  mm/slab.h | 18 ++++++++++++++++--
>  mm/slub.c | 10 +++++++---
>  3 files changed, 27 insertions(+), 8 deletions(-)
>
> diff --git a/mm/slab.c b/mm/slab.c
> index a5486ff8362a..4594de0e3d6b 100644
> --- a/mm/slab.c
> +++ b/mm/slab.c
> @@ -3253,7 +3253,8 @@ slab_alloc_node(struct kmem_cache *cachep, struct list_lru *lru, gfp_t flags,
>         init = slab_want_init_on_alloc(flags, cachep);
>
>  out:
> -       slab_post_alloc_hook(cachep, objcg, flags, 1, &objp, init);
> +       slab_post_alloc_hook(cachep, objcg, flags, 1, &objp, init,
> +                               cachep->object_size);
>         return objp;
>  }
>
> @@ -3506,13 +3507,13 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
>          * Done outside of the IRQ disabled section.
>          */
>         slab_post_alloc_hook(s, objcg, flags, size, p,
> -                               slab_want_init_on_alloc(flags, s));
> +                       slab_want_init_on_alloc(flags, s), s->object_size);
>         /* FIXME: Trace call missing. Christoph would like a bulk variant */
>         return size;
>  error:
>         local_irq_enable();
>         cache_alloc_debugcheck_after_bulk(s, flags, i, p, _RET_IP_);
> -       slab_post_alloc_hook(s, objcg, flags, i, p, false);
> +       slab_post_alloc_hook(s, objcg, flags, i, p, false, s->object_size);
>         kmem_cache_free_bulk(s, i, p);
>         return 0;
>  }
> diff --git a/mm/slab.h b/mm/slab.h
> index 0202a8c2f0d2..8b4ee02fc14a 100644
> --- a/mm/slab.h
> +++ b/mm/slab.h
> @@ -720,12 +720,26 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
>
>  static inline void slab_post_alloc_hook(struct kmem_cache *s,
>                                         struct obj_cgroup *objcg, gfp_t flags,
> -                                       size_t size, void **p, bool init)
> +                                       size_t size, void **p, bool init,
> +                                       unsigned int orig_size)
>  {
> +       unsigned int zero_size = s->object_size;
>         size_t i;
>
>         flags &= gfp_allowed_mask;
>
> +       /*
> +        * For kmalloc object, the allocated memory size(object_size) is likely
> +        * larger than the requested size(orig_size). If redzone check is
> +        * enabled for the extra space, don't zero it, as it will be redzoned
> +        * soon. The redzone operation for this extra space could be seen as a
> +        * replacement of current poisoning under certain debug option, and
> +        * won't break other sanity checks.
> +        */
> +       if (kmem_cache_debug_flags(s, SLAB_STORE_USER) &&
> +           (s->flags & SLAB_KMALLOC))
> +               zero_size = orig_size;
> +
>         /*
>          * As memory initialization might be integrated into KASAN,
>          * kasan_slab_alloc and initialization memset must be
> @@ -736,7 +750,7 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s,
>         for (i = 0; i < size; i++) {
>                 p[i] = kasan_slab_alloc(s, p[i], flags, init);
>                 if (p[i] && init && !kasan_has_integrated_init())
> -                       memset(p[i], 0, s->object_size);
> +                       memset(p[i], 0, zero_size);
>                 kmemleak_alloc_recursive(p[i], s->object_size, 1,
>                                          s->flags, flags);
>                 kmsan_slab_alloc(s, p[i], flags);
> diff --git a/mm/slub.c b/mm/slub.c
> index 12354fb8d6e4..17292c2d3eee 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -3395,7 +3395,11 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s, struct list_l
>         init = slab_want_init_on_alloc(gfpflags, s);
>
>  out:
> -       slab_post_alloc_hook(s, objcg, gfpflags, 1, &object, init);
> +       /*
> +        * When init equals 'true', like for kzalloc() family, only
> +        * @orig_size bytes will be zeroed instead of s->object_size
> +        */
> +       slab_post_alloc_hook(s, objcg, gfpflags, 1, &object, init, orig_size);
>
>         return object;
>  }
> @@ -3852,11 +3856,11 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
>          * Done outside of the IRQ disabled fastpath loop.
>          */
>         slab_post_alloc_hook(s, objcg, flags, size, p,
> -                               slab_want_init_on_alloc(flags, s));
> +                       slab_want_init_on_alloc(flags, s), s->object_size);
>         return i;
>  error:
>         slub_put_cpu_ptr(s->cpu_slab);
> -       slab_post_alloc_hook(s, objcg, flags, i, p, false);
> +       slab_post_alloc_hook(s, objcg, flags, i, p, false, s->object_size);
>         kmem_cache_free_bulk(s, i, p);
>         return 0;
>  }
> --
> 2.34.1
>

For the KASAN part:

Reviewed-by: Andrey Konovalov <andreyknvl@gmail.com>

Thanks!
  
Vlastimil Babka Nov. 9, 2022, 2:28 p.m. UTC | #3
On 10/21/22 05:24, Feng Tang wrote:
> kzalloc/kmalloc will round up the request size to a fixed size
> (mostly power of 2), so the allocated memory could be more than
> requested. Currently kzalloc family APIs will zero all the
> allocated memory.
> 
> To detect out-of-bound usage of the extra allocated memory, only
> zero the requested part, so that redzone sanity check could be
> added to the extra space later.
> 
> For kzalloc users who will call ksize() later and utilize this
> extra space, please be aware that the space is not zeroed any
> more when debug is enabled. (Thanks to Kees Cook's effort to
> sanitize all ksize() user cases [1], this won't be a big issue).
> 
> [1]. https://lore.kernel.org/all/20220922031013.2150682-1-keescook@chromium.org/#r
> Signed-off-by: Feng Tang <feng.tang@intel.com>
> ---
>  mm/slab.c |  7 ++++---
>  mm/slab.h | 18 ++++++++++++++++--
>  mm/slub.c | 10 +++++++---
>  3 files changed, 27 insertions(+), 8 deletions(-)
> 
> diff --git a/mm/slab.c b/mm/slab.c
> index a5486ff8362a..4594de0e3d6b 100644
> --- a/mm/slab.c
> +++ b/mm/slab.c
> @@ -3253,7 +3253,8 @@ slab_alloc_node(struct kmem_cache *cachep, struct list_lru *lru, gfp_t flags,
>  	init = slab_want_init_on_alloc(flags, cachep);
>  
>  out:
> -	slab_post_alloc_hook(cachep, objcg, flags, 1, &objp, init);
> +	slab_post_alloc_hook(cachep, objcg, flags, 1, &objp, init,
> +				cachep->object_size);
>  	return objp;
>  }
>  
> @@ -3506,13 +3507,13 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
>  	 * Done outside of the IRQ disabled section.
>  	 */
>  	slab_post_alloc_hook(s, objcg, flags, size, p,
> -				slab_want_init_on_alloc(flags, s));
> +			slab_want_init_on_alloc(flags, s), s->object_size);
>  	/* FIXME: Trace call missing. Christoph would like a bulk variant */
>  	return size;
>  error:
>  	local_irq_enable();
>  	cache_alloc_debugcheck_after_bulk(s, flags, i, p, _RET_IP_);
> -	slab_post_alloc_hook(s, objcg, flags, i, p, false);
> +	slab_post_alloc_hook(s, objcg, flags, i, p, false, s->object_size);
>  	kmem_cache_free_bulk(s, i, p);
>  	return 0;
>  }
> diff --git a/mm/slab.h b/mm/slab.h
> index 0202a8c2f0d2..8b4ee02fc14a 100644
> --- a/mm/slab.h
> +++ b/mm/slab.h
> @@ -720,12 +720,26 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
>  
>  static inline void slab_post_alloc_hook(struct kmem_cache *s,
>  					struct obj_cgroup *objcg, gfp_t flags,
> -					size_t size, void **p, bool init)
> +					size_t size, void **p, bool init,
> +					unsigned int orig_size)
>  {
> +	unsigned int zero_size = s->object_size;
>  	size_t i;
>  
>  	flags &= gfp_allowed_mask;
>  
> +	/*
> +	 * For kmalloc object, the allocated memory size(object_size) is likely
> +	 * larger than the requested size(orig_size). If redzone check is
> +	 * enabled for the extra space, don't zero it, as it will be redzoned
> +	 * soon. The redzone operation for this extra space could be seen as a
> +	 * replacement of current poisoning under certain debug option, and
> +	 * won't break other sanity checks.
> +	 */
> +	if (kmem_cache_debug_flags(s, SLAB_STORE_USER) &&

Shouldn't we check SLAB_RED_ZONE instead? Otherwise a debugging could be
specified so that SLAB_RED_ZONE is set but SLAB_STORE_USER?

> +	    (s->flags & SLAB_KMALLOC))
> +		zero_size = orig_size;
> +
>  	/*
>  	 * As memory initialization might be integrated into KASAN,
>  	 * kasan_slab_alloc and initialization memset must be
> @@ -736,7 +750,7 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s,
>  	for (i = 0; i < size; i++) {
>  		p[i] = kasan_slab_alloc(s, p[i], flags, init);
>  		if (p[i] && init && !kasan_has_integrated_init())
> -			memset(p[i], 0, s->object_size);
> +			memset(p[i], 0, zero_size);
>  		kmemleak_alloc_recursive(p[i], s->object_size, 1,
>  					 s->flags, flags);
>  		kmsan_slab_alloc(s, p[i], flags);
> diff --git a/mm/slub.c b/mm/slub.c
> index 12354fb8d6e4..17292c2d3eee 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -3395,7 +3395,11 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s, struct list_l
>  	init = slab_want_init_on_alloc(gfpflags, s);
>  
>  out:
> -	slab_post_alloc_hook(s, objcg, gfpflags, 1, &object, init);
> +	/*
> +	 * When init equals 'true', like for kzalloc() family, only
> +	 * @orig_size bytes will be zeroed instead of s->object_size

s/will be/might be/ because it depends on the debugging?

> +	 */
> +	slab_post_alloc_hook(s, objcg, gfpflags, 1, &object, init, orig_size);
>  
>  	return object;
>  }
> @@ -3852,11 +3856,11 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
>  	 * Done outside of the IRQ disabled fastpath loop.
>  	 */
>  	slab_post_alloc_hook(s, objcg, flags, size, p,
> -				slab_want_init_on_alloc(flags, s));
> +			slab_want_init_on_alloc(flags, s), s->object_size);
>  	return i;
>  error:
>  	slub_put_cpu_ptr(s->cpu_slab);
> -	slab_post_alloc_hook(s, objcg, flags, i, p, false);
> +	slab_post_alloc_hook(s, objcg, flags, i, p, false, s->object_size);
>  	kmem_cache_free_bulk(s, i, p);
>  	return 0;
>  }
  
Feng Tang Nov. 10, 2022, 3:20 a.m. UTC | #4
On Wed, Nov 09, 2022 at 03:28:19PM +0100, Vlastimil Babka wrote:
> On 10/21/22 05:24, Feng Tang wrote:
> > kzalloc/kmalloc will round up the request size to a fixed size
> > (mostly power of 2), so the allocated memory could be more than
> > requested. Currently kzalloc family APIs will zero all the
> > allocated memory.
> > 
> > To detect out-of-bound usage of the extra allocated memory, only
> > zero the requested part, so that redzone sanity check could be
> > added to the extra space later.
> > 
> > For kzalloc users who will call ksize() later and utilize this
> > extra space, please be aware that the space is not zeroed any
> > more when debug is enabled. (Thanks to Kees Cook's effort to
> > sanitize all ksize() user cases [1], this won't be a big issue).
> > 
> > [1]. https://lore.kernel.org/all/20220922031013.2150682-1-keescook@chromium.org/#r
> > Signed-off-by: Feng Tang <feng.tang@intel.com>
> > ---
[...]
> >  static inline void slab_post_alloc_hook(struct kmem_cache *s,
> >  					struct obj_cgroup *objcg, gfp_t flags,
> > -					size_t size, void **p, bool init)
> > +					size_t size, void **p, bool init,
> > +					unsigned int orig_size)
> >  {
> > +	unsigned int zero_size = s->object_size;
> >  	size_t i;
> >  
> >  	flags &= gfp_allowed_mask;
> >  
> > +	/*
> > +	 * For kmalloc object, the allocated memory size(object_size) is likely
> > +	 * larger than the requested size(orig_size). If redzone check is
> > +	 * enabled for the extra space, don't zero it, as it will be redzoned
> > +	 * soon. The redzone operation for this extra space could be seen as a
> > +	 * replacement of current poisoning under certain debug option, and
> > +	 * won't break other sanity checks.
> > +	 */
> > +	if (kmem_cache_debug_flags(s, SLAB_STORE_USER) &&
> 
> Shouldn't we check SLAB_RED_ZONE instead? Otherwise a debugging could be
> specified so that SLAB_RED_ZONE is set but SLAB_STORE_USER?

Thanks for the catch!

I will add check for SLAB_RED_ZONE. The SLAB_STORE_USER is for
checking whether 'orig_size' field exists. In earlier discussion,
we make 'orig_size' depend on STORE_USER, https://lore.kernel.org/lkml/1b0fa66c-f855-1c00-e024-b2b823b18678@suse.cz/ 

> > +	    (s->flags & SLAB_KMALLOC))
> > +		zero_size = orig_size;
> > +
> >  	/*
> >  	 * As memory initialization might be integrated into KASAN,
> >  	 * kasan_slab_alloc and initialization memset must be
> > @@ -736,7 +750,7 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s,
> >  	for (i = 0; i < size; i++) {
> >  		p[i] = kasan_slab_alloc(s, p[i], flags, init);
> >  		if (p[i] && init && !kasan_has_integrated_init())
> > -			memset(p[i], 0, s->object_size);
> > +			memset(p[i], 0, zero_size);
> >  		kmemleak_alloc_recursive(p[i], s->object_size, 1,
> >  					 s->flags, flags);
> >  		kmsan_slab_alloc(s, p[i], flags);
> > diff --git a/mm/slub.c b/mm/slub.c
> > index 12354fb8d6e4..17292c2d3eee 100644
> > --- a/mm/slub.c
> > +++ b/mm/slub.c
> > @@ -3395,7 +3395,11 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s, struct list_l
> >  	init = slab_want_init_on_alloc(gfpflags, s);
> >  
> >  out:
> > -	slab_post_alloc_hook(s, objcg, gfpflags, 1, &object, init);
> > +	/*
> > +	 * When init equals 'true', like for kzalloc() family, only
> > +	 * @orig_size bytes will be zeroed instead of s->object_size
> 
> s/will be/might be/ because it depends on the debugging?

Yes, will change.

Thanks,
Feng
  
Feng Tang Nov. 10, 2022, 12:57 p.m. UTC | #5
On Thu, Nov 10, 2022 at 11:20:34AM +0800, Tang, Feng wrote:
> On Wed, Nov 09, 2022 at 03:28:19PM +0100, Vlastimil Babka wrote:
[...]
> > > +	/*
> > > +	 * For kmalloc object, the allocated memory size(object_size) is likely
> > > +	 * larger than the requested size(orig_size). If redzone check is
> > > +	 * enabled for the extra space, don't zero it, as it will be redzoned
> > > +	 * soon. The redzone operation for this extra space could be seen as a
> > > +	 * replacement of current poisoning under certain debug option, and
> > > +	 * won't break other sanity checks.
> > > +	 */
> > > +	if (kmem_cache_debug_flags(s, SLAB_STORE_USER) &&
> > 
> > Shouldn't we check SLAB_RED_ZONE instead? Otherwise a debugging could be
> > specified so that SLAB_RED_ZONE is set but SLAB_STORE_USER?
> 
> Thanks for the catch!
> 
> I will add check for SLAB_RED_ZONE. The SLAB_STORE_USER is for
> checking whether 'orig_size' field exists. In earlier discussion,
> we make 'orig_size' depend on STORE_USER, https://lore.kernel.org/lkml/1b0fa66c-f855-1c00-e024-b2b823b18678@suse.cz/ 

Below is the updated patch, please review, thanks! 

- Feng

-----8>----
From b2a92f0c2518ef80fcda340f1ad37b418ee32d85 Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@intel.com>
Date: Thu, 20 Oct 2022 20:47:31 +0800
Subject: [PATCH 1/3] mm/slub: only zero requested size of buffer for kzalloc
 when debug enabled

kzalloc/kmalloc will round up the request size to a fixed size
(mostly power of 2), so the allocated memory could be more than
requested. Currently kzalloc family APIs will zero all the
allocated memory.

To detect out-of-bound usage of the extra allocated memory, only
zero the requested part, so that redzone sanity check could be
added to the extra space later.

For kzalloc users who will call ksize() later and utilize this
extra space, please be aware that the space is not zeroed any
more when debug is enabled. (Thanks to Kees Cook's effort to
sanitize all ksize() user cases [1], this won't be a big issue).

[1]. https://lore.kernel.org/all/20220922031013.2150682-1-keescook@chromium.org/#r
Signed-off-by: Feng Tang <feng.tang@intel.com>
Acked-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
---
 mm/slab.c |  7 ++++---
 mm/slab.h | 19 +++++++++++++++++--
 mm/slub.c | 10 +++++++---
 3 files changed, 28 insertions(+), 8 deletions(-)

diff --git a/mm/slab.c b/mm/slab.c
index 4b265174b6d5..1eddec4a50e4 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3258,7 +3258,8 @@ slab_alloc_node(struct kmem_cache *cachep, struct list_lru *lru, gfp_t flags,
 	init = slab_want_init_on_alloc(flags, cachep);
 
 out:
-	slab_post_alloc_hook(cachep, objcg, flags, 1, &objp, init);
+	slab_post_alloc_hook(cachep, objcg, flags, 1, &objp, init,
+				cachep->object_size);
 	return objp;
 }
 
@@ -3511,13 +3512,13 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
 	 * Done outside of the IRQ disabled section.
 	 */
 	slab_post_alloc_hook(s, objcg, flags, size, p,
-				slab_want_init_on_alloc(flags, s));
+			slab_want_init_on_alloc(flags, s), s->object_size);
 	/* FIXME: Trace call missing. Christoph would like a bulk variant */
 	return size;
 error:
 	local_irq_enable();
 	cache_alloc_debugcheck_after_bulk(s, flags, i, p, _RET_IP_);
-	slab_post_alloc_hook(s, objcg, flags, i, p, false);
+	slab_post_alloc_hook(s, objcg, flags, i, p, false, s->object_size);
 	kmem_cache_free_bulk(s, i, p);
 	return 0;
 }
diff --git a/mm/slab.h b/mm/slab.h
index 8c4aafb00bd6..2551214392c7 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -730,12 +730,27 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
 
 static inline void slab_post_alloc_hook(struct kmem_cache *s,
 					struct obj_cgroup *objcg, gfp_t flags,
-					size_t size, void **p, bool init)
+					size_t size, void **p, bool init,
+					unsigned int orig_size)
 {
+	unsigned int zero_size = s->object_size;
 	size_t i;
 
 	flags &= gfp_allowed_mask;
 
+	/*
+	 * For kmalloc object, the allocated memory size(object_size) is likely
+	 * larger than the requested size(orig_size). If redzone check is
+	 * enabled for the extra space, don't zero it, as it will be redzoned
+	 * soon. The redzone operation for this extra space could be seen as a
+	 * replacement of current poisoning under certain debug option, and
+	 * won't break other sanity checks.
+	 */
+	if (kmem_cache_debug_flags(s, SLAB_STORE_USER) &&
+	    (s->flags & SLAB_RED_ZONE) &&
+	    (s->flags & SLAB_KMALLOC))
+		zero_size = orig_size;
+
 	/*
 	 * As memory initialization might be integrated into KASAN,
 	 * kasan_slab_alloc and initialization memset must be
@@ -746,7 +761,7 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s,
 	for (i = 0; i < size; i++) {
 		p[i] = kasan_slab_alloc(s, p[i], flags, init);
 		if (p[i] && init && !kasan_has_integrated_init())
-			memset(p[i], 0, s->object_size);
+			memset(p[i], 0, zero_size);
 		kmemleak_alloc_recursive(p[i], s->object_size, 1,
 					 s->flags, flags);
 		kmsan_slab_alloc(s, p[i], flags);
diff --git a/mm/slub.c b/mm/slub.c
index 0a14e7bc278c..13490f317f5f 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3387,7 +3387,11 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s, struct list_l
 	init = slab_want_init_on_alloc(gfpflags, s);
 
 out:
-	slab_post_alloc_hook(s, objcg, gfpflags, 1, &object, init);
+	/*
+	 * When init equals 'true', like for kzalloc() family, only
+	 * @orig_size bytes might be zeroed instead of s->object_size
+	 */
+	slab_post_alloc_hook(s, objcg, gfpflags, 1, &object, init, orig_size);
 
 	return object;
 }
@@ -3844,11 +3848,11 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
 	 * Done outside of the IRQ disabled fastpath loop.
 	 */
 	slab_post_alloc_hook(s, objcg, flags, size, p,
-				slab_want_init_on_alloc(flags, s));
+			slab_want_init_on_alloc(flags, s), s->object_size);
 	return i;
 error:
 	slub_put_cpu_ptr(s->cpu_slab);
-	slab_post_alloc_hook(s, objcg, flags, i, p, false);
+	slab_post_alloc_hook(s, objcg, flags, i, p, false, s->object_size);
 	kmem_cache_free_bulk(s, i, p);
 	return 0;
 }
  
Vlastimil Babka Nov. 10, 2022, 3:44 p.m. UTC | #6
On 11/10/22 13:57, Feng Tang wrote:
> On Thu, Nov 10, 2022 at 11:20:34AM +0800, Tang, Feng wrote:
>> On Wed, Nov 09, 2022 at 03:28:19PM +0100, Vlastimil Babka wrote:
> [...]
>> > > +	/*
>> > > +	 * For kmalloc object, the allocated memory size(object_size) is likely
>> > > +	 * larger than the requested size(orig_size). If redzone check is
>> > > +	 * enabled for the extra space, don't zero it, as it will be redzoned
>> > > +	 * soon. The redzone operation for this extra space could be seen as a
>> > > +	 * replacement of current poisoning under certain debug option, and
>> > > +	 * won't break other sanity checks.
>> > > +	 */
>> > > +	if (kmem_cache_debug_flags(s, SLAB_STORE_USER) &&
>> > 
>> > Shouldn't we check SLAB_RED_ZONE instead? Otherwise a debugging could be
>> > specified so that SLAB_RED_ZONE is set but SLAB_STORE_USER?
>> 
>> Thanks for the catch!
>> 
>> I will add check for SLAB_RED_ZONE. The SLAB_STORE_USER is for
>> checking whether 'orig_size' field exists. In earlier discussion,
>> we make 'orig_size' depend on STORE_USER, https://lore.kernel.org/lkml/1b0fa66c-f855-1c00-e024-b2b823b18678@suse.cz/ 
> 
> Below is the updated patch, please review, thanks! 

Thanks, grabbing it including Andrey's review, with a small change below:

> - Feng
> 
> -----8>----
> From b2a92f0c2518ef80fcda340f1ad37b418ee32d85 Mon Sep 17 00:00:00 2001
> From: Feng Tang <feng.tang@intel.com>
> Date: Thu, 20 Oct 2022 20:47:31 +0800
> Subject: [PATCH 1/3] mm/slub: only zero requested size of buffer for kzalloc
>  when debug enabled
> 
> kzalloc/kmalloc will round up the request size to a fixed size
> (mostly power of 2), so the allocated memory could be more than
> requested. Currently kzalloc family APIs will zero all the
> allocated memory.
> 
> To detect out-of-bound usage of the extra allocated memory, only
> zero the requested part, so that redzone sanity check could be
> added to the extra space later.
> 
> For kzalloc users who will call ksize() later and utilize this
> extra space, please be aware that the space is not zeroed any
> more when debug is enabled. (Thanks to Kees Cook's effort to
> sanitize all ksize() user cases [1], this won't be a big issue).
> 
> [1]. https://lore.kernel.org/all/20220922031013.2150682-1-keescook@chromium.org/#r
> Signed-off-by: Feng Tang <feng.tang@intel.com>
> Acked-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
> ---
>  mm/slab.c |  7 ++++---
>  mm/slab.h | 19 +++++++++++++++++--
>  mm/slub.c | 10 +++++++---
>  3 files changed, 28 insertions(+), 8 deletions(-)
> 
> diff --git a/mm/slab.c b/mm/slab.c
> index 4b265174b6d5..1eddec4a50e4 100644
> --- a/mm/slab.c
> +++ b/mm/slab.c
> @@ -3258,7 +3258,8 @@ slab_alloc_node(struct kmem_cache *cachep, struct list_lru *lru, gfp_t flags,
>  	init = slab_want_init_on_alloc(flags, cachep);
>  
>  out:
> -	slab_post_alloc_hook(cachep, objcg, flags, 1, &objp, init);
> +	slab_post_alloc_hook(cachep, objcg, flags, 1, &objp, init,
> +				cachep->object_size);
>  	return objp;
>  }
>  
> @@ -3511,13 +3512,13 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
>  	 * Done outside of the IRQ disabled section.
>  	 */
>  	slab_post_alloc_hook(s, objcg, flags, size, p,
> -				slab_want_init_on_alloc(flags, s));
> +			slab_want_init_on_alloc(flags, s), s->object_size);
>  	/* FIXME: Trace call missing. Christoph would like a bulk variant */
>  	return size;
>  error:
>  	local_irq_enable();
>  	cache_alloc_debugcheck_after_bulk(s, flags, i, p, _RET_IP_);
> -	slab_post_alloc_hook(s, objcg, flags, i, p, false);
> +	slab_post_alloc_hook(s, objcg, flags, i, p, false, s->object_size);
>  	kmem_cache_free_bulk(s, i, p);
>  	return 0;
>  }
> diff --git a/mm/slab.h b/mm/slab.h
> index 8c4aafb00bd6..2551214392c7 100644
> --- a/mm/slab.h
> +++ b/mm/slab.h
> @@ -730,12 +730,27 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
>  
>  static inline void slab_post_alloc_hook(struct kmem_cache *s,
>  					struct obj_cgroup *objcg, gfp_t flags,
> -					size_t size, void **p, bool init)
> +					size_t size, void **p, bool init,
> +					unsigned int orig_size)
>  {
> +	unsigned int zero_size = s->object_size;
>  	size_t i;
>  
>  	flags &= gfp_allowed_mask;
>  
> +	/*
> +	 * For kmalloc object, the allocated memory size(object_size) is likely
> +	 * larger than the requested size(orig_size). If redzone check is
> +	 * enabled for the extra space, don't zero it, as it will be redzoned
> +	 * soon. The redzone operation for this extra space could be seen as a
> +	 * replacement of current poisoning under certain debug option, and
> +	 * won't break other sanity checks.
> +	 */
> +	if (kmem_cache_debug_flags(s, SLAB_STORE_USER) &&
> +	    (s->flags & SLAB_RED_ZONE) &&

Combined the two above to:

  if (kmem_cache_debug_flags(s, SLAB_STORE_USER | SLAB_RED_ZONE)

> +	    (s->flags & SLAB_KMALLOC))
> +		zero_size = orig_size;
> +
>  	/*
>  	 * As memory initialization might be integrated into KASAN,
>  	 * kasan_slab_alloc and initialization memset must be
> @@ -746,7 +761,7 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s,
>  	for (i = 0; i < size; i++) {
>  		p[i] = kasan_slab_alloc(s, p[i], flags, init);
>  		if (p[i] && init && !kasan_has_integrated_init())
> -			memset(p[i], 0, s->object_size);
> +			memset(p[i], 0, zero_size);
>  		kmemleak_alloc_recursive(p[i], s->object_size, 1,
>  					 s->flags, flags);
>  		kmsan_slab_alloc(s, p[i], flags);
> diff --git a/mm/slub.c b/mm/slub.c
> index 0a14e7bc278c..13490f317f5f 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -3387,7 +3387,11 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s, struct list_l
>  	init = slab_want_init_on_alloc(gfpflags, s);
>  
>  out:
> -	slab_post_alloc_hook(s, objcg, gfpflags, 1, &object, init);
> +	/*
> +	 * When init equals 'true', like for kzalloc() family, only
> +	 * @orig_size bytes might be zeroed instead of s->object_size
> +	 */
> +	slab_post_alloc_hook(s, objcg, gfpflags, 1, &object, init, orig_size);
>  
>  	return object;
>  }
> @@ -3844,11 +3848,11 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
>  	 * Done outside of the IRQ disabled fastpath loop.
>  	 */
>  	slab_post_alloc_hook(s, objcg, flags, size, p,
> -				slab_want_init_on_alloc(flags, s));
> +			slab_want_init_on_alloc(flags, s), s->object_size);
>  	return i;
>  error:
>  	slub_put_cpu_ptr(s->cpu_slab);
> -	slab_post_alloc_hook(s, objcg, flags, i, p, false);
> +	slab_post_alloc_hook(s, objcg, flags, i, p, false, s->object_size);
>  	kmem_cache_free_bulk(s, i, p);
>  	return 0;
>  }
  
Feng Tang Nov. 11, 2022, 6:19 a.m. UTC | #7
On Thu, Nov 10, 2022 at 04:44:59PM +0100, Vlastimil Babka wrote:
> On 11/10/22 13:57, Feng Tang wrote:
> > On Thu, Nov 10, 2022 at 11:20:34AM +0800, Tang, Feng wrote:
> >> On Wed, Nov 09, 2022 at 03:28:19PM +0100, Vlastimil Babka wrote:
> > [...]
> >> > > +	/*
> >> > > +	 * For kmalloc object, the allocated memory size(object_size) is likely
> >> > > +	 * larger than the requested size(orig_size). If redzone check is
> >> > > +	 * enabled for the extra space, don't zero it, as it will be redzoned
> >> > > +	 * soon. The redzone operation for this extra space could be seen as a
> >> > > +	 * replacement of current poisoning under certain debug option, and
> >> > > +	 * won't break other sanity checks.
> >> > > +	 */
> >> > > +	if (kmem_cache_debug_flags(s, SLAB_STORE_USER) &&
> >> > 
> >> > Shouldn't we check SLAB_RED_ZONE instead? Otherwise a debugging could be
> >> > specified so that SLAB_RED_ZONE is set but SLAB_STORE_USER?
> >> 
> >> Thanks for the catch!
> >> 
> >> I will add check for SLAB_RED_ZONE. The SLAB_STORE_USER is for
> >> checking whether 'orig_size' field exists. In earlier discussion,
> >> we make 'orig_size' depend on STORE_USER, https://lore.kernel.org/lkml/1b0fa66c-f855-1c00-e024-b2b823b18678@suse.cz/ 
> > 
> > Below is the updated patch, please review, thanks! 
> 
> Thanks, grabbing it including Andrey's review, with a small change below:
> 
> > - Feng
> > 
> > -----8>----
> > From b2a92f0c2518ef80fcda340f1ad37b418ee32d85 Mon Sep 17 00:00:00 2001
> > From: Feng Tang <feng.tang@intel.com>
> > Date: Thu, 20 Oct 2022 20:47:31 +0800
> > Subject: [PATCH 1/3] mm/slub: only zero requested size of buffer for kzalloc
> >  when debug enabled
[...]
> > +	/*
> > +	 * For kmalloc object, the allocated memory size(object_size) is likely
> > +	 * larger than the requested size(orig_size). If redzone check is
> > +	 * enabled for the extra space, don't zero it, as it will be redzoned
> > +	 * soon. The redzone operation for this extra space could be seen as a
> > +	 * replacement of current poisoning under certain debug option, and
> > +	 * won't break other sanity checks.
> > +	 */
> > +	if (kmem_cache_debug_flags(s, SLAB_STORE_USER) &&
> > +	    (s->flags & SLAB_RED_ZONE) &&
> 
> Combined the two above to:
> 
>   if (kmem_cache_debug_flags(s, SLAB_STORE_USER | SLAB_RED_ZONE)

Yes, this is cleaner, thanks!

- Feng
  

Patch

diff --git a/mm/slab.c b/mm/slab.c
index a5486ff8362a..4594de0e3d6b 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3253,7 +3253,8 @@  slab_alloc_node(struct kmem_cache *cachep, struct list_lru *lru, gfp_t flags,
 	init = slab_want_init_on_alloc(flags, cachep);
 
 out:
-	slab_post_alloc_hook(cachep, objcg, flags, 1, &objp, init);
+	slab_post_alloc_hook(cachep, objcg, flags, 1, &objp, init,
+				cachep->object_size);
 	return objp;
 }
 
@@ -3506,13 +3507,13 @@  int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
 	 * Done outside of the IRQ disabled section.
 	 */
 	slab_post_alloc_hook(s, objcg, flags, size, p,
-				slab_want_init_on_alloc(flags, s));
+			slab_want_init_on_alloc(flags, s), s->object_size);
 	/* FIXME: Trace call missing. Christoph would like a bulk variant */
 	return size;
 error:
 	local_irq_enable();
 	cache_alloc_debugcheck_after_bulk(s, flags, i, p, _RET_IP_);
-	slab_post_alloc_hook(s, objcg, flags, i, p, false);
+	slab_post_alloc_hook(s, objcg, flags, i, p, false, s->object_size);
 	kmem_cache_free_bulk(s, i, p);
 	return 0;
 }
diff --git a/mm/slab.h b/mm/slab.h
index 0202a8c2f0d2..8b4ee02fc14a 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -720,12 +720,26 @@  static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
 
 static inline void slab_post_alloc_hook(struct kmem_cache *s,
 					struct obj_cgroup *objcg, gfp_t flags,
-					size_t size, void **p, bool init)
+					size_t size, void **p, bool init,
+					unsigned int orig_size)
 {
+	unsigned int zero_size = s->object_size;
 	size_t i;
 
 	flags &= gfp_allowed_mask;
 
+	/*
+	 * For kmalloc object, the allocated memory size(object_size) is likely
+	 * larger than the requested size(orig_size). If redzone check is
+	 * enabled for the extra space, don't zero it, as it will be redzoned
+	 * soon. The redzone operation for this extra space could be seen as a
+	 * replacement of current poisoning under certain debug option, and
+	 * won't break other sanity checks.
+	 */
+	if (kmem_cache_debug_flags(s, SLAB_STORE_USER) &&
+	    (s->flags & SLAB_KMALLOC))
+		zero_size = orig_size;
+
 	/*
 	 * As memory initialization might be integrated into KASAN,
 	 * kasan_slab_alloc and initialization memset must be
@@ -736,7 +750,7 @@  static inline void slab_post_alloc_hook(struct kmem_cache *s,
 	for (i = 0; i < size; i++) {
 		p[i] = kasan_slab_alloc(s, p[i], flags, init);
 		if (p[i] && init && !kasan_has_integrated_init())
-			memset(p[i], 0, s->object_size);
+			memset(p[i], 0, zero_size);
 		kmemleak_alloc_recursive(p[i], s->object_size, 1,
 					 s->flags, flags);
 		kmsan_slab_alloc(s, p[i], flags);
diff --git a/mm/slub.c b/mm/slub.c
index 12354fb8d6e4..17292c2d3eee 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3395,7 +3395,11 @@  static __always_inline void *slab_alloc_node(struct kmem_cache *s, struct list_l
 	init = slab_want_init_on_alloc(gfpflags, s);
 
 out:
-	slab_post_alloc_hook(s, objcg, gfpflags, 1, &object, init);
+	/*
+	 * When init equals 'true', like for kzalloc() family, only
+	 * @orig_size bytes will be zeroed instead of s->object_size
+	 */
+	slab_post_alloc_hook(s, objcg, gfpflags, 1, &object, init, orig_size);
 
 	return object;
 }
@@ -3852,11 +3856,11 @@  int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
 	 * Done outside of the IRQ disabled fastpath loop.
 	 */
 	slab_post_alloc_hook(s, objcg, flags, size, p,
-				slab_want_init_on_alloc(flags, s));
+			slab_want_init_on_alloc(flags, s), s->object_size);
 	return i;
 error:
 	slub_put_cpu_ptr(s->cpu_slab);
-	slab_post_alloc_hook(s, objcg, flags, i, p, false);
+	slab_post_alloc_hook(s, objcg, flags, i, p, false, s->object_size);
 	kmem_cache_free_bulk(s, i, p);
 	return 0;
 }