[9/9] analyzer: implement kf_strcat [PR105899]

Message ID 20230824143903.3161185-10-dmalcolm@redhat.com
State Unresolved
Headers
Series analyzer: strlen, strcpy, and strcat [PR105899] |

Checks

Context Check Description
snail/gcc-patch-check warning Git am fail log

Commit Message

David Malcolm Aug. 24, 2023, 2:39 p.m. UTC
  gcc/analyzer/ChangeLog:
	PR analyzer/105899
	* call-details.cc
	(call_details::check_for_null_terminated_string_arg): Split into
	overloads, one taking just an arg_idx, the other a new
	"include_terminator" param.
	* call-details.h: Likewise.
	* kf.cc (class kf_strcat): New.
	(kf_strcpy::impl_call_pre): Update for change to
	check_for_null_terminated_string_arg.
	(register_known_functions): Register kf_strcat.
	* region-model.cc
	(region_model::check_for_null_terminated_string_arg): Split into
	overloads, one taking just an arg_idx, the other a new
	"include_terminator" param.  When returning an svalue, handle
	"include_terminator" being false by subtracting one.
	* region-model.h
	(region_model::check_for_null_terminated_string_arg): Split into
	overloads, one taking just an arg_idx, the other a new
	"include_terminator" param.

gcc/ChangeLog:
	PR analyzer/105899
	* doc/invoke.texi (Static Analyzer Options): Add "strcat" to the
	list of functions known to the analyzer.

gcc/testsuite/ChangeLog:
	PR analyzer/105899
	* gcc.dg/analyzer/strcat-1.c: New test.
---
 gcc/analyzer/call-details.cc             |  12 +-
 gcc/analyzer/call-details.h              |   5 +-
 gcc/analyzer/kf.cc                       |  72 ++++++++++--
 gcc/analyzer/region-model.cc             |  63 +++++++++--
 gcc/analyzer/region-model.h              |   6 +-
 gcc/doc/invoke.texi                      |   1 +
 gcc/testsuite/gcc.dg/analyzer/strcat-1.c | 136 +++++++++++++++++++++++
 7 files changed, 275 insertions(+), 20 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/strcat-1.c
  

Patch

diff --git a/gcc/analyzer/call-details.cc b/gcc/analyzer/call-details.cc
index 8f5b28ce6c26..ce1f859c9996 100644
--- a/gcc/analyzer/call-details.cc
+++ b/gcc/analyzer/call-details.cc
@@ -386,13 +386,23 @@  call_details::lookup_function_attribute (const char *attr_name) const
   return lookup_attribute (attr_name, TYPE_ATTRIBUTES (allocfntype));
 }
 
+void
+call_details::check_for_null_terminated_string_arg (unsigned arg_idx) const
+{
+  check_for_null_terminated_string_arg (arg_idx, false, nullptr);
+}
+
 const svalue *
 call_details::
 check_for_null_terminated_string_arg (unsigned arg_idx,
+				      bool include_terminator,
 				      const svalue **out_sval) const
 {
   region_model *model = get_model ();
-  return model->check_for_null_terminated_string_arg (*this, arg_idx, out_sval);
+  return model->check_for_null_terminated_string_arg (*this,
+						      arg_idx,
+						      include_terminator,
+						      out_sval);
 }
 
 } // namespace ana
diff --git a/gcc/analyzer/call-details.h b/gcc/analyzer/call-details.h
index 58b5ccd2acde..ae528e4ab116 100644
--- a/gcc/analyzer/call-details.h
+++ b/gcc/analyzer/call-details.h
@@ -72,9 +72,12 @@  public:
 
   tree lookup_function_attribute (const char *attr_name) const;
 
+  void
+  check_for_null_terminated_string_arg (unsigned arg_idx) const;
   const svalue *
   check_for_null_terminated_string_arg (unsigned arg_idx,
-					const svalue **out_sval = nullptr) const;
+					bool include_terminator,
+					const svalue **out_sval) const;
 
 private:
   const gcall *m_call;
diff --git a/gcc/analyzer/kf.cc b/gcc/analyzer/kf.cc
index 3eddbe200387..36d9d10bb013 100644
--- a/gcc/analyzer/kf.cc
+++ b/gcc/analyzer/kf.cc
@@ -1106,6 +1106,61 @@  public:
   /* Currently a no-op.  */
 };
 
+/* Handler for "strcat" and "__builtin_strcat_chk".  */
+
+class kf_strcat : public known_function
+{
+public:
+  kf_strcat (unsigned int num_args) : m_num_args (num_args) {}
+  bool matches_call_types_p (const call_details &cd) const final override
+  {
+    return (cd.num_args () == m_num_args
+	    && cd.arg_is_pointer_p (0)
+	    && cd.arg_is_pointer_p (1));
+  }
+
+  void impl_call_pre (const call_details &cd) const final override
+  {
+    region_model *model = cd.get_model ();
+    region_model_manager *mgr = cd.get_manager ();
+
+    const svalue *dest_sval = cd.get_arg_svalue (0);
+    const region *dest_reg = model->deref_rvalue (dest_sval, cd.get_arg_tree (0),
+						  cd.get_ctxt ());
+
+    const svalue *dst_strlen_sval
+      = cd.check_for_null_terminated_string_arg (0, false, nullptr);
+    if (!dst_strlen_sval)
+      {
+	if (cd.get_ctxt ())
+	  cd.get_ctxt ()->terminate_path ();
+	return;
+      }
+
+    const svalue *bytes_to_copy;
+    const svalue *num_src_bytes_read_sval
+      = cd.check_for_null_terminated_string_arg (1, true, &bytes_to_copy);
+    if (!num_src_bytes_read_sval)
+      {
+	if (cd.get_ctxt ())
+	  cd.get_ctxt ()->terminate_path ();
+	return;
+      }
+
+    cd.maybe_set_lhs (dest_sval);
+
+    const region *offset_reg
+      = mgr->get_offset_region (dest_reg, NULL_TREE, dst_strlen_sval);
+    model->write_bytes (offset_reg,
+			num_src_bytes_read_sval,
+			bytes_to_copy,
+			cd.get_ctxt ());
+  }
+
+private:
+  unsigned int m_num_args;
+};
+
 /* Handler for "strcpy" and "__builtin_strcpy_chk".  */
 
 class kf_strcpy : public known_function
@@ -1139,7 +1194,7 @@  kf_strcpy::impl_call_pre (const call_details &cd) const
 
   const svalue *bytes_to_copy;
   if (const svalue *num_bytes_read_sval
-	= cd.check_for_null_terminated_string_arg (1, &bytes_to_copy))
+      = cd.check_for_null_terminated_string_arg (1, true, &bytes_to_copy))
     {
       model->write_bytes (dest_reg, num_bytes_read_sval, bytes_to_copy, ctxt);
     }
@@ -1188,16 +1243,10 @@  public:
   }
   void impl_call_pre (const call_details &cd) const final override
   {
-    if (const svalue *bytes_read = cd.check_for_null_terminated_string_arg (0))
-      if (bytes_read->get_kind () != SK_UNKNOWN)
+    if (const svalue *strlen_sval
+	  = cd.check_for_null_terminated_string_arg (0, false, nullptr))
+      if (strlen_sval->get_kind () != SK_UNKNOWN)
 	{
-	  region_model_manager *mgr = cd.get_manager ();
-	  /* strlen is (bytes_read - 1).  */
-	  const svalue *one = mgr->get_or_create_int_cst (size_type_node, 1);
-	  const svalue *strlen_sval = mgr->get_or_create_binop (size_type_node,
-								MINUS_EXPR,
-								bytes_read,
-								one);
 	  cd.maybe_set_lhs (strlen_sval);
 	  return;
 	}
@@ -1415,6 +1464,8 @@  register_known_functions (known_function_manager &kfm)
     kfm.add (BUILT_IN_SPRINTF, make_unique<kf_sprintf> ());
     kfm.add (BUILT_IN_STACK_RESTORE, make_unique<kf_stack_restore> ());
     kfm.add (BUILT_IN_STACK_SAVE, make_unique<kf_stack_save> ());
+    kfm.add (BUILT_IN_STRCAT, make_unique<kf_strcat> (2));
+    kfm.add (BUILT_IN_STRCAT_CHK, make_unique<kf_strcat> (3));
     kfm.add (BUILT_IN_STRCHR, make_unique<kf_strchr> ());
     kfm.add (BUILT_IN_STRCPY, make_unique<kf_strcpy> (2));
     kfm.add (BUILT_IN_STRCPY_CHK, make_unique<kf_strcpy> (3));
@@ -1429,6 +1480,7 @@  register_known_functions (known_function_manager &kfm)
   /* Known builtins and C standard library functions.  */
   {
     kfm.add ("memset", make_unique<kf_memset> ());
+    kfm.add ("strcat", make_unique<kf_strcat> (2));
     kfm.add ("strdup", make_unique<kf_strdup> ());
     kfm.add ("strndup", make_unique<kf_strndup> ());
   }
diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc
index 025b555d7b97..02c073c15bcc 100644
--- a/gcc/analyzer/region-model.cc
+++ b/gcc/analyzer/region-model.cc
@@ -3679,9 +3679,41 @@  region_model::scan_for_null_terminator (const region *reg,
    - the buffer pointed to has any uninitalized bytes before any 0-terminator
    - any of the reads aren't within the bounds of the underlying base region
 
-   Otherwise, return a svalue for the number of bytes read (strlen + 1),
-   and, if OUT_SVAL is non-NULL, write to *OUT_SVAL with an svalue
-   representing the content of the buffer up to and including the terminator.
+   Otherwise, return a svalue for strlen of the buffer (*not* including
+   the null terminator).
+
+   TODO: we should also complain if:
+   - the pointer is NULL (or could be).  */
+
+void
+region_model::check_for_null_terminated_string_arg (const call_details &cd,
+						    unsigned arg_idx)
+{
+  check_for_null_terminated_string_arg (cd,
+					arg_idx,
+					false, /* include_terminator */
+					nullptr); // out_sval
+}
+
+
+/* Check that argument ARG_IDX (0-based) to the call described by CD
+   is a pointer to a valid null-terminated string.
+
+   Simulate scanning through the buffer, reading until we find a 0 byte
+   (equivalent to calling strlen).
+
+   Complain and return NULL if:
+   - the buffer pointed to isn't null-terminated
+   - the buffer pointed to has any uninitalized bytes before any 0-terminator
+   - any of the reads aren't within the bounds of the underlying base region
+
+   Otherwise, return a svalue.  This will be the number of bytes read
+   (including the null terminator) if INCLUDE_TERMINATOR is true, or strlen
+   of the buffer (not including the null terminator) if it is false.
+
+   Also, when returning an svalue, if OUT_SVAL is non-NULL, write to
+   *OUT_SVAL with an svalue representing the content of the buffer up to
+   and including the terminator.
 
    TODO: we should also complain if:
    - the pointer is NULL (or could be).  */
@@ -3689,6 +3721,7 @@  region_model::scan_for_null_terminator (const region *reg,
 const svalue *
 region_model::check_for_null_terminated_string_arg (const call_details &cd,
 						    unsigned arg_idx,
+						    bool include_terminator,
 						    const svalue **out_sval)
 {
   class null_terminator_check_event : public custom_event
@@ -3786,10 +3819,26 @@  region_model::check_for_null_terminated_string_arg (const call_details &cd,
   const region *buf_reg
     = deref_rvalue (arg_sval, cd.get_arg_tree (arg_idx), &my_ctxt);
 
-  return scan_for_null_terminator (buf_reg,
-				   cd.get_arg_tree (arg_idx),
-				   out_sval,
-				   &my_ctxt);
+  if (const svalue *num_bytes_read_sval
+      = scan_for_null_terminator (buf_reg,
+				  cd.get_arg_tree (arg_idx),
+				  out_sval,
+				  &my_ctxt))
+    {
+      if (include_terminator)
+	return num_bytes_read_sval;
+      else
+	{
+	  /* strlen is (bytes_read - 1).  */
+	  const svalue *one = m_mgr->get_or_create_int_cst (size_type_node, 1);
+	  return m_mgr->get_or_create_binop (size_type_node,
+					     MINUS_EXPR,
+					     num_bytes_read_sval,
+					     one);
+	}
+    }
+  else
+    return nullptr;
 }
 
 /* Remove all bindings overlapping REG within the store.  */
diff --git a/gcc/analyzer/region-model.h b/gcc/analyzer/region-model.h
index b1c705e22c28..40259625fb06 100644
--- a/gcc/analyzer/region-model.h
+++ b/gcc/analyzer/region-model.h
@@ -519,10 +519,14 @@  class region_model
 			       const svalue *sval_hint,
 			       region_model_context *ctxt) const;
 
+  void
+  check_for_null_terminated_string_arg (const call_details &cd,
+					unsigned idx);
   const svalue *
   check_for_null_terminated_string_arg (const call_details &cd,
 					unsigned idx,
-					const svalue **out_sval = nullptr);
+					bool include_terminator,
+					const svalue **out_sval);
 
 private:
   const region *get_lvalue_1 (path_var pv, region_model_context *ctxt) const;
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index ef3f40989860..209d6b0ce4d3 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -11109,6 +11109,7 @@  and of the following functions:
 @item @code{siglongjmp}
 @item @code{signal}
 @item @code{sigsetjmp}
+@item @code{strcat}
 @item @code{strchr}
 @item @code{strlen}
 @end itemize
diff --git a/gcc/testsuite/gcc.dg/analyzer/strcat-1.c b/gcc/testsuite/gcc.dg/analyzer/strcat-1.c
new file mode 100644
index 000000000000..e3b698ae73d3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/analyzer/strcat-1.c
@@ -0,0 +1,136 @@ 
+/* See e.g. https://en.cppreference.com/w/c/string/byte/strcat */
+
+#include "analyzer-decls.h"
+
+char *strcat (char *dest, const char *src);
+#define NULL ((void *)0)
+
+char *
+test_passthrough (char *dest, const char *src)
+{
+  return strcat (dest, src);
+}
+
+char *
+test_null_dest (const char *src)
+{
+  return strcat (NULL, src); /* { dg-warning "use of NULL where non-null expected" } */
+}
+
+char *
+test_null_src (char *dest)
+{
+  return strcat (dest, NULL); /* { dg-warning "use of NULL where non-null expected" } */
+}
+
+char *
+test_uninit_dest (const char *src)
+{
+  char dest[10];
+  return strcat (dest, src); /* { dg-warning "use of uninitialized value 'dest\\\[0\\\]'" } */
+}
+
+char *
+test_uninit_src (char *dest)
+{
+  const char src[10];
+  return strcat (dest, src); /* { dg-warning "use of uninitialized value 'src\\\[0\\\]'" } */
+}
+
+char *
+test_dest_not_terminated (char *src)
+{
+  char dest[3] = "foo";
+  return strcat (dest, src); /* { dg-warning "stack-based buffer over-read" } */
+  /* { dg-message "while looking for null terminator for argument 1 \\('&dest'\\) of 'strcat'" "" { target *-*-* } .-1 } */
+}
+
+char *
+test_src_not_terminated (char *dest)
+{
+  const char src[3] = "foo";
+  return strcat (dest, src); /* { dg-warning "stack-based buffer over-read" } */
+  /* { dg-message "while looking for null terminator for argument 2 \\('&src'\\) of 'strcat'" "" { target *-*-* } .-1 } */
+}
+
+char * __attribute__((noinline))
+call_strcat (char *dest, const char *src)
+{
+  return strcat (dest, src);
+}
+
+void
+test_concrete_valid_static_size (void)
+{
+  char buf[16];
+  char *p1 = __builtin_strcpy (buf, "abc");
+  char *p2 = call_strcat (buf, "def");
+  __analyzer_eval (p1 == buf); /* { dg-warning "TRUE" } */
+  __analyzer_eval (p2 == buf); /* { dg-warning "TRUE" } */
+  __analyzer_eval (buf[0] == 'a'); /* { dg-warning "TRUE" } */
+  __analyzer_eval (buf[1] == 'b'); /* { dg-warning "TRUE" } */
+  __analyzer_eval (buf[2] == 'c'); /* { dg-warning "TRUE" } */
+  __analyzer_eval (buf[3] == 'd'); /* { dg-warning "TRUE" } */
+  __analyzer_eval (buf[4] == 'e'); /* { dg-warning "TRUE" } */
+  __analyzer_eval (buf[5] == 'f'); /* { dg-warning "TRUE" } */
+  __analyzer_eval (buf[6] == '\0'); /* { dg-warning "TRUE" } */
+  __analyzer_eval (__builtin_strlen (buf) == 6);  /* { dg-warning "TRUE" } */
+}
+
+void
+test_concrete_valid_static_size_2 (void)
+{
+  char buf[16];
+  char *p1 = __builtin_strcpy (buf, "abc");
+  char *p2 = call_strcat (buf, "def");
+  char *p3 = call_strcat (buf, "ghi");
+  __analyzer_eval (p1 == buf); /* { dg-warning "TRUE" } */
+  __analyzer_eval (p2 == buf); /* { dg-warning "TRUE" } */
+  __analyzer_eval (p3 == buf); /* { dg-warning "TRUE" } */
+  __analyzer_eval (buf[0] == 'a'); /* { dg-warning "TRUE" } */
+  __analyzer_eval (buf[1] == 'b'); /* { dg-warning "TRUE" } */
+  __analyzer_eval (buf[2] == 'c'); /* { dg-warning "TRUE" } */
+  __analyzer_eval (buf[3] == 'd'); /* { dg-warning "TRUE" } */
+  __analyzer_eval (buf[4] == 'e'); /* { dg-warning "TRUE" } */
+  __analyzer_eval (buf[5] == 'f'); /* { dg-warning "TRUE" } */
+  __analyzer_eval (buf[6] == 'g'); /* { dg-warning "TRUE" } */
+  __analyzer_eval (buf[7] == 'h'); /* { dg-warning "TRUE" } */
+  __analyzer_eval (buf[8] == 'i'); /* { dg-warning "TRUE" } */
+  __analyzer_eval (buf[9] == '\0'); /* { dg-warning "TRUE" } */
+  __analyzer_eval (__builtin_strlen (buf) == 9);  /* { dg-warning "TRUE" } */
+  __analyzer_eval (__builtin_strlen (buf + 1) == 8);  /* { dg-warning "TRUE" } */
+  __analyzer_eval (__builtin_strlen (buf + 2) == 7);  /* { dg-warning "TRUE" } */
+  __analyzer_eval (__builtin_strlen (buf + 3) == 6);  /* { dg-warning "TRUE" } */
+  __analyzer_eval (__builtin_strlen (buf + 4) == 5);  /* { dg-warning "TRUE" } */
+  __analyzer_eval (__builtin_strlen (buf + 5) == 4);  /* { dg-warning "TRUE" } */
+  __analyzer_eval (__builtin_strlen (buf + 6) == 3);  /* { dg-warning "TRUE" } */
+  __analyzer_eval (__builtin_strlen (buf + 7) == 2);  /* { dg-warning "TRUE" } */
+  __analyzer_eval (__builtin_strlen (buf + 8) == 1);  /* { dg-warning "TRUE" } */
+  __analyzer_eval (__builtin_strlen (buf + 9) == 0);  /* { dg-warning "TRUE" } */
+}
+
+char * __attribute__((noinline))
+call_strcat_invalid (char *dest, const char *src)
+{
+  return strcat (dest, src); /* { dg-warning "stack-based buffer overflow" } */
+}
+
+void
+test_concrete_invalid_static_size (void)
+{
+  char buf[3];
+  buf[0] = '\0';
+  call_strcat_invalid (buf, "abc");
+}
+
+void
+test_concrete_symbolic (const char *suffix)
+{
+  char buf[10];
+  buf[0] = '\0';
+  call_strcat (buf, suffix);
+}
+
+/* TODO:
+     - "The behavior is undefined if the strings overlap."
+*/