[RFC] string: Allow 2-argument strscpy()

Message ID 20240129202901.work.282-kees@kernel.org
State New
Headers
Series [RFC] string: Allow 2-argument strscpy() |

Commit Message

Kees Cook Jan. 29, 2024, 8:29 p.m. UTC
  Using sizeof(dst) is the overwhelmingly common case for strscpy().
Instead of requiring this everywhere, allow a 2-argument version to be
used that will use the sizeof() internally.

Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Andy Shevchenko <andy@kernel.org>
Cc: linux-hardening@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
---
What do people think of this idea? It's usually very redundant to
include the 3rd argument, so this might improve readability (and
perhaps make things more robust by avoiding mistakes when the
destination name changes).
---
 include/linux/fortify-string.h | 4 ++--
 include/linux/string.h         | 9 ++++++++-
 lib/string.c                   | 4 ++--
 3 files changed, 12 insertions(+), 5 deletions(-)
  

Comments

Justin Stitt Jan. 29, 2024, 9:55 p.m. UTC | #1
Hi,

On Mon, Jan 29, 2024 at 12:29:04PM -0800, Kees Cook wrote:
> Using sizeof(dst) is the overwhelmingly common case for strscpy().
> Instead of requiring this everywhere, allow a 2-argument version to be
> used that will use the sizeof() internally.

Yeah, this is definitely the case. I have a ton of patches replacing
strncpy with strscpy [1] and many of them match the pattern of:
| strscpy(dest, src, sizeof(dest))

BTW, this hack for function overloading is insane. Never really looked into
it before.

>
> Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
> Cc: Andy Shevchenko <andy@kernel.org>
> Cc: linux-hardening@vger.kernel.org
> Signed-off-by: Kees Cook <keescook@chromium.org>
> ---
> What do people think of this idea? It's usually very redundant to
> include the 3rd argument, so this might improve readability (and
> perhaps make things more robust by avoiding mistakes when the
> destination name changes).

I like this, though, should you include documentation changes/additions?

Reviewed-by: Justin Stitt <justinstitt@google.com>

> ---
>  include/linux/fortify-string.h | 4 ++--
>  include/linux/string.h         | 9 ++++++++-
>  lib/string.c                   | 4 ++--
>  3 files changed, 12 insertions(+), 5 deletions(-)
>
> diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h
> index 89a6888f2f9e..56be4d4a5dea 100644
> --- a/include/linux/fortify-string.h
> +++ b/include/linux/fortify-string.h
> @@ -215,7 +215,7 @@ __kernel_size_t __fortify_strlen(const char * const POS p)
>  }
>
>  /* Defined after fortified strnlen() to reuse it. */
> -extern ssize_t __real_strscpy(char *, const char *, size_t) __RENAME(strscpy);
> +extern ssize_t __real_strscpy(char *, const char *, size_t) __RENAME(sized_strscpy);
>  /**
>   * strscpy - Copy a C-string into a sized buffer
>   *
> @@ -234,7 +234,7 @@ extern ssize_t __real_strscpy(char *, const char *, size_t) __RENAME(strscpy);
>   * Returns the number of characters copied in @p (not including the
>   * trailing %NUL) or -E2BIG if @size is 0 or the copy of @q was truncated.
>   */
> -__FORTIFY_INLINE ssize_t strscpy(char * const POS p, const char * const POS q, size_t size)
> +__FORTIFY_INLINE ssize_t sized_strscpy(char * const POS p, const char * const POS q, size_t size)
>  {
>  	/* Use string size rather than possible enclosing struct size. */
>  	const size_t p_size = __member_size(p);
> diff --git a/include/linux/string.h b/include/linux/string.h
> index ab148d8dbfc1..0bb1c8d05f18 100644
> --- a/include/linux/string.h
> +++ b/include/linux/string.h
> @@ -67,9 +67,16 @@ extern char * strcpy(char *,const char *);
>  extern char * strncpy(char *,const char *, __kernel_size_t);
>  #endif
>  #ifndef __HAVE_ARCH_STRSCPY
> -ssize_t strscpy(char *, const char *, size_t);
> +ssize_t sized_strscpy(char *, const char *, size_t);
>  #endif
>
> +#define __strscpy0(dst, src, ...)	sized_strscpy(dst, src, sizeof(dst))
> +
> +#define __strscpy1(dst, src, size)	sized_strscpy(dst, src, size)
> +
> +#define strscpy(dst, src, ...)	\
> +	CONCATENATE(__strscpy, COUNT_ARGS(__VA_ARGS__))(dst, src, __VA_ARGS__)
> +
>  /* Wraps calls to strscpy()/memset(), no arch specific code required */
>  ssize_t strscpy_pad(char *dest, const char *src, size_t count);
>
> diff --git a/lib/string.c b/lib/string.c
> index 6891d15ce991..2869895a1180 100644
> --- a/lib/string.c
> +++ b/lib/string.c
> @@ -104,7 +104,7 @@ EXPORT_SYMBOL(strncpy);
>  #endif
>
>  #ifndef __HAVE_ARCH_STRSCPY
> -ssize_t strscpy(char *dest, const char *src, size_t count)
> +ssize_t sized_strscpy(char *dest, const char *src, size_t count)
>  {
>  	const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
>  	size_t max = count;
> @@ -170,7 +170,7 @@ ssize_t strscpy(char *dest, const char *src, size_t count)
>
>  	return -E2BIG;
>  }
> -EXPORT_SYMBOL(strscpy);
> +EXPORT_SYMBOL(sized_strscpy);
>  #endif
>
>  /**
> --
> 2.34.1
>

[1]: https://lore.kernel.org/all/?q=f%3A%22justinstitt%40google.com%22+AND+b%3Astrscpy+AND+NOT+s%3A%22Re%22

Thanks
Justin
  
Kees Cook Jan. 29, 2024, 10:02 p.m. UTC | #2
On Mon, Jan 29, 2024 at 09:55:25PM +0000, Justin Stitt wrote:
> Hi,
> 
> On Mon, Jan 29, 2024 at 12:29:04PM -0800, Kees Cook wrote:
> > Using sizeof(dst) is the overwhelmingly common case for strscpy().
> > Instead of requiring this everywhere, allow a 2-argument version to be
> > used that will use the sizeof() internally.
> 
> Yeah, this is definitely the case. I have a ton of patches replacing
> strncpy with strscpy [1] and many of them match the pattern of:
> | strscpy(dest, src, sizeof(dest))
> 
> BTW, this hack for function overloading is insane. Never really looked into
> it before.

It very much is. :P Hence the RFC nature of this patch. I don't think we
any any other API in the kernel that does this (though there are plenty
of wild macro wrappers to do similar tricks, like the syscall wrappers).

> > Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
> > Cc: Andy Shevchenko <andy@kernel.org>
> > Cc: linux-hardening@vger.kernel.org
> > Signed-off-by: Kees Cook <keescook@chromium.org>
> > ---
> > What do people think of this idea? It's usually very redundant to
> > include the 3rd argument, so this might improve readability (and
> > perhaps make things more robust by avoiding mistakes when the
> > destination name changes).
> 
> I like this, though, should you include documentation changes/additions?

Yeah, though I'm not sure how to do this -- kerndoc expects a fixed
number of arguments. :P Maybe I can just do something like add
"optional" to @size:

 * strscpy - Copy a C-string into a sized buffer
 * @p: Where to copy the string to
 * @q: Where to copy the string from
 * @size: Size of destination buffer (optional)

> 
> Reviewed-by: Justin Stitt <justinstitt@google.com>
> 
> > ---
> >  include/linux/fortify-string.h | 4 ++--
> >  include/linux/string.h         | 9 ++++++++-
> >  lib/string.c                   | 4 ++--
> >  3 files changed, 12 insertions(+), 5 deletions(-)
> >
> > diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h
> > index 89a6888f2f9e..56be4d4a5dea 100644
> > --- a/include/linux/fortify-string.h
> > +++ b/include/linux/fortify-string.h
> > @@ -215,7 +215,7 @@ __kernel_size_t __fortify_strlen(const char * const POS p)
> >  }
> >
> >  /* Defined after fortified strnlen() to reuse it. */
> > -extern ssize_t __real_strscpy(char *, const char *, size_t) __RENAME(strscpy);
> > +extern ssize_t __real_strscpy(char *, const char *, size_t) __RENAME(sized_strscpy);
> >  /**
> >   * strscpy - Copy a C-string into a sized buffer
> >   *
> > @@ -234,7 +234,7 @@ extern ssize_t __real_strscpy(char *, const char *, size_t) __RENAME(strscpy);
> >   * Returns the number of characters copied in @p (not including the
> >   * trailing %NUL) or -E2BIG if @size is 0 or the copy of @q was truncated.
> >   */
> > -__FORTIFY_INLINE ssize_t strscpy(char * const POS p, const char * const POS q, size_t size)
> > +__FORTIFY_INLINE ssize_t sized_strscpy(char * const POS p, const char * const POS q, size_t size)
> >  {
> >  	/* Use string size rather than possible enclosing struct size. */
> >  	const size_t p_size = __member_size(p);
> > diff --git a/include/linux/string.h b/include/linux/string.h
> > index ab148d8dbfc1..0bb1c8d05f18 100644
> > --- a/include/linux/string.h
> > +++ b/include/linux/string.h
> > @@ -67,9 +67,16 @@ extern char * strcpy(char *,const char *);
> >  extern char * strncpy(char *,const char *, __kernel_size_t);
> >  #endif
> >  #ifndef __HAVE_ARCH_STRSCPY
> > -ssize_t strscpy(char *, const char *, size_t);
> > +ssize_t sized_strscpy(char *, const char *, size_t);
> >  #endif
> >
> > +#define __strscpy0(dst, src, ...)	sized_strscpy(dst, src, sizeof(dst))

In thinking about this slightly longer, I realize that the size may be
better as: sizeof(dst) + __must_be_array(dst)

otherwise a "char *" will be allowed as a dst for the 2-arg method, and
will get a 1 byte size. :)

> > +
> > +#define __strscpy1(dst, src, size)	sized_strscpy(dst, src, size)
> > +

And I should probably relocate the kern-doc to here...

> > +#define strscpy(dst, src, ...)	\
> > +	CONCATENATE(__strscpy, COUNT_ARGS(__VA_ARGS__))(dst, src, __VA_ARGS__)
> > +
> >  /* Wraps calls to strscpy()/memset(), no arch specific code required */
> >  ssize_t strscpy_pad(char *dest, const char *src, size_t count);
> >
> > diff --git a/lib/string.c b/lib/string.c
> > index 6891d15ce991..2869895a1180 100644
> > --- a/lib/string.c
> > +++ b/lib/string.c
> > @@ -104,7 +104,7 @@ EXPORT_SYMBOL(strncpy);
> >  #endif
> >
> >  #ifndef __HAVE_ARCH_STRSCPY
> > -ssize_t strscpy(char *dest, const char *src, size_t count)
> > +ssize_t sized_strscpy(char *dest, const char *src, size_t count)
> >  {
> >  	const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
> >  	size_t max = count;
> > @@ -170,7 +170,7 @@ ssize_t strscpy(char *dest, const char *src, size_t count)
> >
> >  	return -E2BIG;
> >  }
> > -EXPORT_SYMBOL(strscpy);
> > +EXPORT_SYMBOL(sized_strscpy);
> >  #endif
> >
> >  /**
> > --
> > 2.34.1
> >
> 
> [1]: https://lore.kernel.org/all/?q=f%3A%22justinstitt%40google.com%22+AND+b%3Astrscpy+AND+NOT+s%3A%22Re%22
> 
> Thanks
> Justin
  
Andy Shevchenko Jan. 29, 2024, 11:27 p.m. UTC | #3
On Tue, Jan 30, 2024 at 12:03 AM Kees Cook <keescook@chromium.org> wrote:
> On Mon, Jan 29, 2024 at 09:55:25PM +0000, Justin Stitt wrote:
> > On Mon, Jan 29, 2024 at 12:29:04PM -0800, Kees Cook wrote:

..

> > BTW, this hack for function overloading is insane. Never really looked into
> > it before.
>
> It very much is. :P Hence the RFC nature of this patch. I don't think we
> any any other API in the kernel that does this (though there are plenty
> of wild macro wrappers to do similar tricks, like the syscall wrappers).

PCI has a couple of such.

> > I like this, though, should you include documentation changes/additions?
>
> Yeah, though I'm not sure how to do this -- kerndoc expects a fixed
> number of arguments. :P Maybe I can just do something like add
> "optional" to @size:

Use ... See again PCI header (pci.h). One of the macros there has a
valid kernel-doc.

>  * strscpy - Copy a C-string into a sized buffer
>  * @p: Where to copy the string to
>  * @q: Where to copy the string from
>  * @size: Size of destination buffer (optional)
  
Andy Shevchenko Jan. 29, 2024, 11:30 p.m. UTC | #4
On Tue, Jan 30, 2024 at 1:27 AM Andy Shevchenko
<andy.shevchenko@gmail.com> wrote:
> On Tue, Jan 30, 2024 at 12:03 AM Kees Cook <keescook@chromium.org> wrote:
> > On Mon, Jan 29, 2024 at 09:55:25PM +0000, Justin Stitt wrote:
> > > On Mon, Jan 29, 2024 at 12:29:04PM -0800, Kees Cook wrote:

..

> > > BTW, this hack for function overloading is insane. Never really looked into
> > > it before.
> >
> > It very much is. :P Hence the RFC nature of this patch. I don't think we
> > any any other API in the kernel that does this (though there are plenty
> > of wild macro wrappers to do similar tricks, like the syscall wrappers).
>
> PCI has a couple of such.
>
> > > I like this, though, should you include documentation changes/additions?
> >
> > Yeah, though I'm not sure how to do this -- kerndoc expects a fixed
> > number of arguments. :P Maybe I can just do something like add
> > "optional" to @size:
>
> Use ... See again PCI header (pci.h). One of the macros there has a
> valid kernel-doc.
>
> >  * strscpy - Copy a C-string into a sized buffer
> >  * @p: Where to copy the string to
> >  * @q: Where to copy the string from
> >  * @size: Size of destination buffer (optional)

FWIW,
https://elixir.bootlin.com/linux/latest/source/include/linux/pci.h#L1517
  
David Laight Feb. 1, 2024, 10:29 p.m. UTC | #5
From: Kees Cook
> Sent: 29 January 2024 20:29
> 
> Using sizeof(dst) is the overwhelmingly common case for strscpy().
> Instead of requiring this everywhere, allow a 2-argument version to be
> used that will use the sizeof() internally.

You may want to (try to) add a check that the first argument is
actually an array rather than just a pointer.

But the cpp output bloat can get silly and slow the build down.
I guess no one would do:
	x = min(strscpy(a, b), strscpy(c, d));
but you can never tell :-)

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
  
Pavel Machek Feb. 7, 2024, 5:51 p.m. UTC | #6
Hi!

> > Using sizeof(dst) is the overwhelmingly common case for strscpy().
> > Instead of requiring this everywhere, allow a 2-argument version to be
> > used that will use the sizeof() internally.
> 
> Yeah, this is definitely the case. I have a ton of patches replacing
> strncpy with strscpy [1] and many of them match the pattern of:
> | strscpy(dest, src, sizeof(dest))
> 
> BTW, this hack for function overloading is insane. Never really looked into
> it before.

This hack is insane, but this is also highly confusing, please don't
do this.

BR,
									Pavel
  
David Laight Feb. 10, 2024, 1:02 p.m. UTC | #7
From: Pavel Machek
> Sent: 07 February 2024 17:52
> 
> > > Using sizeof(dst) is the overwhelmingly common case for strscpy().
> > > Instead of requiring this everywhere, allow a 2-argument version to be
> > > used that will use the sizeof() internally.
> >
> > Yeah, this is definitely the case. I have a ton of patches replacing
> > strncpy with strscpy [1] and many of them match the pattern of:
> > | strscpy(dest, src, sizeof(dest))
> >
> > BTW, this hack for function overloading is insane. Never really looked into
> > it before.
> 
> This hack is insane, but this is also highly confusing, please don't
> do this.

An alternative would be to convert xxx(tgt, src, 0) to
xxx(tgt, src, sizeof (tgt) - that is when the specified
length is a compile-time constant zero.

Either with:
	(__builtin_constat_p(len) && (len) == 0 ? sizeof (dst) : (len))
Or, leveraging is_constexpr() and doing (I've probably got the syntax wrong):
	__Generic(0 ? (void *)(len) : (int *)0,
		void *: len,
		int *: sizeof (dst))

That probably needs a helper:
	is_constzero(value, if_zero, if_non_zero)
to make it more generally useful.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
  
David Laight Feb. 10, 2024, 1:51 p.m. UTC | #8
From: Pavel Machek
> Sent: 07 February 2024 17:52
> > > Using sizeof(dst) is the overwhelmingly common case for strscpy().
> > > Instead of requiring this everywhere, allow a 2-argument version to be
> > > used that will use the sizeof() internally.
> >
> > Yeah, this is definitely the case. I have a ton of patches replacing
> > strncpy with strscpy [1] and many of them match the pattern of:
> > | strscpy(dest, src, sizeof(dest))
> >
> > BTW, this hack for function overloading is insane. Never really looked into
> > it before.
> 
> This hack is insane, but this is also highly confusing, please don't
> do this.

A much simpler 'hack' - here defaulting in 16.
Although you'd probably want a compile-time check on the
number of arguments.

And convert 'sizeof 'non-array' to (say) -1 and error
a length that is a constant -1.

	David

extern int func_2(void *, const void *, int);

#define func_1(_p0, _p1, len, ...) func_2(_p0, _p1, len)
#define func(_p0, ...) func_1(_p0, __VA_ARGS__, 16)

int f1(void *p0, const void *p1)
{
    return func(p0, p1);
}

int f2(void *p0, const void *p1, int len)
{
    return func(p0, p1, len);
}

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
  

Patch

diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h
index 89a6888f2f9e..56be4d4a5dea 100644
--- a/include/linux/fortify-string.h
+++ b/include/linux/fortify-string.h
@@ -215,7 +215,7 @@  __kernel_size_t __fortify_strlen(const char * const POS p)
 }
 
 /* Defined after fortified strnlen() to reuse it. */
-extern ssize_t __real_strscpy(char *, const char *, size_t) __RENAME(strscpy);
+extern ssize_t __real_strscpy(char *, const char *, size_t) __RENAME(sized_strscpy);
 /**
  * strscpy - Copy a C-string into a sized buffer
  *
@@ -234,7 +234,7 @@  extern ssize_t __real_strscpy(char *, const char *, size_t) __RENAME(strscpy);
  * Returns the number of characters copied in @p (not including the
  * trailing %NUL) or -E2BIG if @size is 0 or the copy of @q was truncated.
  */
-__FORTIFY_INLINE ssize_t strscpy(char * const POS p, const char * const POS q, size_t size)
+__FORTIFY_INLINE ssize_t sized_strscpy(char * const POS p, const char * const POS q, size_t size)
 {
 	/* Use string size rather than possible enclosing struct size. */
 	const size_t p_size = __member_size(p);
diff --git a/include/linux/string.h b/include/linux/string.h
index ab148d8dbfc1..0bb1c8d05f18 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -67,9 +67,16 @@  extern char * strcpy(char *,const char *);
 extern char * strncpy(char *,const char *, __kernel_size_t);
 #endif
 #ifndef __HAVE_ARCH_STRSCPY
-ssize_t strscpy(char *, const char *, size_t);
+ssize_t sized_strscpy(char *, const char *, size_t);
 #endif
 
+#define __strscpy0(dst, src, ...)	sized_strscpy(dst, src, sizeof(dst))
+
+#define __strscpy1(dst, src, size)	sized_strscpy(dst, src, size)
+
+#define strscpy(dst, src, ...)	\
+	CONCATENATE(__strscpy, COUNT_ARGS(__VA_ARGS__))(dst, src, __VA_ARGS__)
+
 /* Wraps calls to strscpy()/memset(), no arch specific code required */
 ssize_t strscpy_pad(char *dest, const char *src, size_t count);
 
diff --git a/lib/string.c b/lib/string.c
index 6891d15ce991..2869895a1180 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -104,7 +104,7 @@  EXPORT_SYMBOL(strncpy);
 #endif
 
 #ifndef __HAVE_ARCH_STRSCPY
-ssize_t strscpy(char *dest, const char *src, size_t count)
+ssize_t sized_strscpy(char *dest, const char *src, size_t count)
 {
 	const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
 	size_t max = count;
@@ -170,7 +170,7 @@  ssize_t strscpy(char *dest, const char *src, size_t count)
 
 	return -E2BIG;
 }
-EXPORT_SYMBOL(strscpy);
+EXPORT_SYMBOL(sized_strscpy);
 #endif
 
 /**