[v3,2/4] vsscanf(): Integer overflow is a conversion failure

Message ID 20230610204044.3653-3-demi@invisiblethingslab.com
State New
Headers
Series Make sscanf() stricter |

Commit Message

Demi Marie Obenour June 10, 2023, 8:40 p.m. UTC
  sscanf() and friends currently ignore integer overflow, but this is a
bad idea.  It is much better to detect integer overflow errors and
consider this a conversion failure.

This implements Linus's suggestion of using '!' to treat integer
overflow as wrapping.  It does _not_ allow wrapping of unsigned
conversions by default, though, as in at least some cases accepting
negative numbers is _not_ intended.

Suggested-By: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com>
---
 lib/vsprintf.c | 90 ++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 69 insertions(+), 21 deletions(-)
  

Comments

Rasmus Villemoes June 12, 2023, 10:53 a.m. UTC | #1
On 10/06/2023 22.40, Demi Marie Obenour wrote:
> sscanf() and friends currently ignore integer overflow, but this is a
> bad idea.  It is much better to detect integer overflow errors and
> consider this a conversion failure.

Perhaps. And maybe I even agree. But not like this:

>  	while (*fmt) {
>  		/* skip any white space in format */
> @@ -3464,6 +3474,9 @@ int vsscanf(const char *buf, const char *fmt, va_list args)
>  			break;
>  		++fmt;
>  
> +		allow_overflow = *fmt == '!';
> +		fmt += (int)allow_overflow;
> +

You can't do that. Or, at least, you won't be able to actually use %!d
anywhere, because the compiler will yell at you:

lib/vsprintf.c: In function ‘foobar’:
lib/vsprintf.c:3727:26: error: unknown conversion type character ‘!’ in
format [-Werror=format=]
 3727 |  ret = sscanf("12345", "%!d", &val);
      |                          ^

So NAK.

Also, when you make significant changes to the sscanf implementation,
I'd expect the diffstat for the patch series to contain lib/test_scanf.c.

Rasmus
  

Patch

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 40f560959b169b4c4ac6154d658cfe76cfd0c5a6..9e53355c35b1d6260631868228ede1d178fe3325 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -59,7 +59,7 @@ 
 bool no_hash_pointers __ro_after_init;
 EXPORT_SYMBOL_GPL(no_hash_pointers);
 
-static noinline unsigned long long simple_strntoull(const char *startp, size_t max_chars, char **endp, unsigned int base)
+static noinline unsigned long long simple_strntoull(const char *startp, size_t max_chars, char **endp, unsigned int base, bool *overflow)
 {
 	const char *cp;
 	unsigned long long result = 0ULL;
@@ -70,11 +70,13 @@  static noinline unsigned long long simple_strntoull(const char *startp, size_t m
 	prefix_chars = cp - startp;
 	if (prefix_chars < max_chars) {
 		rv = _parse_integer_limit(cp, base, &result, max_chars - prefix_chars);
-		/* FIXME */
+		*overflow = !!(rv & KSTRTOX_OVERFLOW);
 		cp += (rv & ~KSTRTOX_OVERFLOW);
 	} else {
 		/* Field too short for prefix + digit, skip over without converting */
 		cp = startp + max_chars;
+		/* Treat this as a conversion failure */
+		*overflow = true;
 	}
 
 	if (endp)
@@ -94,7 +96,9 @@  static noinline unsigned long long simple_strntoull(const char *startp, size_t m
 noinline
 unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base)
 {
-	return simple_strntoull(cp, INT_MAX, endp, base);
+	bool _placeholder;
+
+	return simple_strntoull(cp, INT_MAX, endp, base, &_placeholder);
 }
 EXPORT_SYMBOL(simple_strtoull);
 
@@ -130,18 +134,22 @@  long simple_strtol(const char *cp, char **endp, unsigned int base)
 EXPORT_SYMBOL(simple_strtol);
 
 static long long simple_strntoll(const char *cp, size_t max_chars, char **endp,
-				 unsigned int base)
+				 unsigned int base, bool *overflow)
 {
+	unsigned long long minand;
+	bool negate;
+
 	/*
 	 * simple_strntoull() safely handles receiving max_chars==0 in the
 	 * case cp[0] == '-' && max_chars == 1.
 	 * If max_chars == 0 we can drop through and pass it to simple_strntoull()
 	 * and the content of *cp is irrelevant.
 	 */
-	if (*cp == '-' && max_chars > 0)
-		return -simple_strntoull(cp + 1, max_chars - 1, endp, base);
-
-	return simple_strntoull(cp, max_chars, endp, base);
+	negate = *cp == '-' && max_chars > 0;
+	minand = simple_strntoull(cp + negate, max_chars - negate, endp, base, overflow);
+	if (minand > (unsigned long long)LONG_MAX + negate)
+		*overflow = true;
+	return negate ? -minand : minand;
 }
 
 /**
@@ -154,7 +162,9 @@  static long long simple_strntoll(const char *cp, size_t max_chars, char **endp,
  */
 long long simple_strtoll(const char *cp, char **endp, unsigned int base)
 {
-	return simple_strntoll(cp, INT_MAX, endp, base);
+	bool _placeholder;
+
+	return simple_strntoll(cp, INT_MAX, endp, base, &_placeholder);
 }
 EXPORT_SYMBOL(simple_strtoll);
 
@@ -3441,7 +3451,7 @@  int vsscanf(const char *buf, const char *fmt, va_list args)
 		unsigned long long u;
 	} val;
 	s16 field_width;
-	bool is_sign;
+	bool is_sign, overflow, allow_overflow;
 
 	while (*fmt) {
 		/* skip any white space in format */
@@ -3464,6 +3474,9 @@  int vsscanf(const char *buf, const char *fmt, va_list args)
 			break;
 		++fmt;
 
+		allow_overflow = *fmt == '!';
+		fmt += (int)allow_overflow;
+
 		/* skip this conversion.
 		 * advance both strings to next white space
 		 */
@@ -3649,45 +3662,80 @@  int vsscanf(const char *buf, const char *fmt, va_list args)
 		if (is_sign)
 			val.s = simple_strntoll(str,
 						field_width >= 0 ? field_width : INT_MAX,
-						&next, base);
+						&next, base, &overflow);
 		else
 			val.u = simple_strntoull(str,
 						 field_width >= 0 ? field_width : INT_MAX,
-						 &next, base);
+						 &next, base, &overflow);
+		if (unlikely(overflow) && !allow_overflow)
+			break;
 
 		switch (qualifier) {
 		case 'H':	/* that's 'hh' in format */
-			if (is_sign)
+			if (is_sign) {
+				if (unlikely(val.s < SCHAR_MIN || val.s > SCHAR_MAX) &&
+				    !allow_overflow)
+					return num;
 				*va_arg(args, signed char *) = val.s;
-			else
+			} else {
+				if (unlikely(val.u > UCHAR_MAX) && !allow_overflow)
+					return num;
 				*va_arg(args, unsigned char *) = val.u;
+			}
 			break;
 		case 'h':
-			if (is_sign)
+			if (is_sign) {
+				if (unlikely(val.s < SHRT_MIN || val.s > SHRT_MAX) &&
+				    !allow_overflow)
+					return num;
 				*va_arg(args, short *) = val.s;
-			else
+			} else {
+				if (unlikely(val.u > USHRT_MAX) && !allow_overflow)
+					return num;
 				*va_arg(args, unsigned short *) = val.u;
+			}
 			break;
 		case 'l':
-			if (is_sign)
+			if (is_sign) {
+				if (unlikely(val.s < LONG_MIN || val.s > LONG_MAX) &&
+				    !allow_overflow)
+					return num;
 				*va_arg(args, long *) = val.s;
-			else
+			} else {
+				if (unlikely(val.u > ULONG_MAX) && !allow_overflow)
+					return num;
 				*va_arg(args, unsigned long *) = val.u;
+			}
 			break;
 		case 'L':
+			/* No overflow check needed */
 			if (is_sign)
 				*va_arg(args, long long *) = val.s;
 			else
 				*va_arg(args, unsigned long long *) = val.u;
 			break;
 		case 'z':
-			*va_arg(args, size_t *) = val.u;
+			if (is_sign) {
+				if (unlikely(val.s < SSIZE_MIN || val.s > SSIZE_MAX))
+					return num;
+				*va_arg(args, ssize_t *) = val.s;
+			} else {
+				if (unlikely(val.u > SIZE_MAX) && !allow_overflow)
+					return num;
+				*va_arg(args, size_t *) = val.u;
+			}
 			break;
 		default:
-			if (is_sign)
+			if (is_sign) {
+				if (unlikely(val.s < INT_MIN || val.s > INT_MAX) &&
+				    !allow_overflow)
+					return num;
 				*va_arg(args, int *) = val.s;
-			else
+			} else {
+				if (unlikely(val.u > UINT_MAX) && !allow_overflow)
+					return num;
 				*va_arg(args, unsigned int *) = val.u;
+			}
 			break;
 		}
 		num++;