[pushed] analyzer: fix taint false +ve due to overzealous state purging [PR112977]

Message ID 20240124151615.1538286-1-dmalcolm@redhat.com
State Unresolved
Headers
Series [pushed] analyzer: fix taint false +ve due to overzealous state purging [PR112977] |

Checks

Context Check Description
snail/gcc-patch-check warning Git am fail log

Commit Message

David Malcolm Jan. 24, 2024, 3:16 p.m. UTC
  Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Successful run of analyzer integration tests on x86_64-pc-linux-gnu.
Pushed to trunk as r14-8391-ge503f9aca91926.

gcc/analyzer/ChangeLog:
	PR analyzer/112977
	* engine.cc (impl_region_model_context::on_liveness_change): Pass
	m_ext_state to sm_state_map::on_liveness_change.
	* program-state.cc (sm_state_map::on_svalue_leak): Guard removal
	of map entry based on can_purge_p.
	(sm_state_map::on_liveness_change): Add ext_state param.  Add
	workaround for bad interaction between state purging and
	alt-inherited sm-state.
	* program-state.h (sm_state_map::on_liveness_change): Add
	ext_state param.
	* sm-taint.cc
	(taint_state_machine::has_alt_get_inherited_state_p): New.
	(taint_state_machine::can_purge_p): Return false for "has_lb" and
	"has_ub".
	* sm.h (state_machine::has_alt_get_inherited_state_p): New vfunc.

gcc/testsuite/ChangeLog:
	PR analyzer/112977
	* gcc.dg/plugin/plugin.exp: Add taint-pr112977.c.
	* gcc.dg/plugin/taint-pr112977.c: New test.

Signed-off-by: David Malcolm <dmalcolm@redhat.com>
---
 gcc/analyzer/engine.cc                       |  2 +-
 gcc/analyzer/program-state.cc                | 65 +++++++++++++++++++-
 gcc/analyzer/program-state.h                 |  1 +
 gcc/analyzer/sm-taint.cc                     |  9 +++
 gcc/analyzer/sm.h                            |  6 ++
 gcc/testsuite/gcc.dg/plugin/plugin.exp       |  3 +-
 gcc/testsuite/gcc.dg/plugin/taint-pr112977.c | 44 +++++++++++++
 7 files changed, 126 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/plugin/taint-pr112977.c
  

Patch

diff --git a/gcc/analyzer/engine.cc b/gcc/analyzer/engine.cc
index fde8412bc15..44ff20cf9af 100644
--- a/gcc/analyzer/engine.cc
+++ b/gcc/analyzer/engine.cc
@@ -179,7 +179,7 @@  on_liveness_change (const svalue_set &live_svalues,
 		    const region_model *model)
 {
   for (sm_state_map *smap : m_new_state->m_checker_states)
-    smap->on_liveness_change (live_svalues, model, this);
+    smap->on_liveness_change (live_svalues, model, m_ext_state, this);
 }
 
 void
diff --git a/gcc/analyzer/program-state.cc b/gcc/analyzer/program-state.cc
index 888f2a9c40b..55dd6ca7166 100644
--- a/gcc/analyzer/program-state.cc
+++ b/gcc/analyzer/program-state.cc
@@ -560,9 +560,10 @@  sm_state_map::on_svalue_leak (const svalue *sval,
 {
   if (state_machine::state_t state = get_state (sval, ctxt->m_ext_state))
     {
-      if (!m_sm.can_purge_p (state))
+      if (m_sm.can_purge_p (state))
+	m_map.remove (sval);
+      else
 	ctxt->on_state_leak (m_sm, sval, state);
-      m_map.remove (sval);
     }
 }
 
@@ -572,6 +573,7 @@  sm_state_map::on_svalue_leak (const svalue *sval,
 void
 sm_state_map::on_liveness_change (const svalue_set &live_svalues,
 				  const region_model *model,
+				  const extrinsic_state &ext_state,
 				  impl_region_model_context *ctxt)
 {
   svalue_set svals_to_unset;
@@ -605,9 +607,68 @@  sm_state_map::on_liveness_change (const svalue_set &live_svalues,
       ctxt->on_state_leak (m_sm, sval, e.m_state);
     }
 
+  sm_state_map old_sm_map = *this;
+
   for (svalue_set::iterator iter = svals_to_unset.begin ();
        iter != svals_to_unset.end (); ++iter)
     m_map.remove (*iter);
+
+  /* For state machines like "taint" where states can be
+     alt-inherited from other svalues, ensure that state purging doesn't
+     make us lose sm-state.
+
+     Consider e.g.:
+
+     make_tainted(foo);
+     if (foo.field > 128)
+       return;
+     arr[foo.field].f1 = v1;
+
+     where the last line is:
+
+     (A): _t1 = foo.field;
+     (B): _t2 = _t1 * sizeof(arr[0]);
+     (C): [arr + _t2].f1 = val;
+
+     At (A), foo is 'tainted' and foo.field is 'has_ub'.
+     After (B), foo.field's value (in _t1) is no longer directly
+     within LIVE_SVALUES, so with state purging enabled, we would
+     erroneously purge the "has_ub" state from the svalue.
+
+     Given that _t2's value's state comes from _t1's value's state,
+     we need to preserve that information.
+
+     Hence for all svalues that have had their explicit sm-state unset,
+     having their sm-state being unset, determine if doing so has changed
+     their effective state, and if so, explicitly set their state.
+
+     For example, in the above, unsetting the "has_ub" for _t1's value means
+     that _t2's effective value changes from "has_ub" (from alt-inherited
+     from _t1's value) to "tainted" (inherited from "foo"'s value).
+
+     For such cases, preserve the effective state by explicitly setting the
+     new state.  In the above example, this means explicitly setting _t2's
+     value to the value ("has_ub") it was previously alt-inheriting from _t1's
+     value.  */
+  if (m_sm.has_alt_get_inherited_state_p ())
+    {
+      auto_vec<const svalue *> svalues_needing_state;
+      for (auto unset_sval : svals_to_unset)
+	{
+	  const state_machine::state_t old_state
+	    = old_sm_map.get_state (unset_sval, ext_state);
+	  const state_machine::state_t new_state
+	    = get_state (unset_sval, ext_state);
+	  if (new_state != old_state)
+	    svalues_needing_state.safe_push (unset_sval);
+	}
+      for (auto sval : svalues_needing_state)
+	{
+	  const state_machine::state_t old_state
+	    = old_sm_map.get_state (sval, ext_state);
+	  impl_set_state (sval, old_state, nullptr, ext_state);
+	}
+    }
 }
 
 /* Purge state from SVAL (in response to a call to an unknown function).  */
diff --git a/gcc/analyzer/program-state.h b/gcc/analyzer/program-state.h
index 50bf2ab277a..69bf931f070 100644
--- a/gcc/analyzer/program-state.h
+++ b/gcc/analyzer/program-state.h
@@ -155,6 +155,7 @@  public:
 		       impl_region_model_context *ctxt);
   void on_liveness_change (const svalue_set &live_svalues,
 			   const region_model *model,
+			   const extrinsic_state &ext_state,
 			   impl_region_model_context *ctxt);
 
   void on_unknown_change (const svalue *sval,
diff --git a/gcc/analyzer/sm-taint.cc b/gcc/analyzer/sm-taint.cc
index dc4b078c411..bbf683f82ef 100644
--- a/gcc/analyzer/sm-taint.cc
+++ b/gcc/analyzer/sm-taint.cc
@@ -86,6 +86,12 @@  public:
 				   const extrinsic_state &ext_state)
     const final override;
 
+  bool
+  has_alt_get_inherited_state_p () const final override
+  {
+    return true;
+  }
+
   bool on_stmt (sm_context *sm_ctxt,
 		const supernode *node,
 		const gimple *stmt) const final override;
@@ -1199,6 +1205,9 @@  taint_state_machine::on_bounded_ranges (sm_context *sm_ctxt,
 bool
 taint_state_machine::can_purge_p (state_t s ATTRIBUTE_UNUSED) const
 {
+  if (s == m_has_lb || s == m_has_ub)
+    return false;
+
   return true;
 }
 
diff --git a/gcc/analyzer/sm.h b/gcc/analyzer/sm.h
index e7f634f4245..a58f5dbc395 100644
--- a/gcc/analyzer/sm.h
+++ b/gcc/analyzer/sm.h
@@ -78,6 +78,12 @@  public:
     return NULL;
   }
 
+  virtual bool
+  has_alt_get_inherited_state_p () const
+  {
+    return false;
+  }
+
   virtual state_machine::state_t get_default_state (const svalue *) const
   {
     return m_start;
diff --git a/gcc/testsuite/gcc.dg/plugin/plugin.exp b/gcc/testsuite/gcc.dg/plugin/plugin.exp
index a5a72daac1a..8141cc2aa46 100644
--- a/gcc/testsuite/gcc.dg/plugin/plugin.exp
+++ b/gcc/testsuite/gcc.dg/plugin/plugin.exp
@@ -170,7 +170,8 @@  set plugin_test_list [list \
 	  taint-pr112850-precise.c \
 	  taint-pr112850-too-complex.c \
 	  taint-pr112850-unsanitized.c \
-	  taint-pr112927.c } \
+	  taint-pr112927.c \
+	  taint-pr112977.c } \
     { analyzer_cpython_plugin.c \
 	  cpython-plugin-test-no-Python-h.c \
 	  cpython-plugin-test-PyList_Append.c \
diff --git a/gcc/testsuite/gcc.dg/plugin/taint-pr112977.c b/gcc/testsuite/gcc.dg/plugin/taint-pr112977.c
new file mode 100644
index 00000000000..a9beb00b88d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/plugin/taint-pr112977.c
@@ -0,0 +1,44 @@ 
+/* Reduced from false positive in Linux kernel in
+   drivers/scsi/aacraid/aachba.c.  */
+
+/* { dg-do compile } */
+/* { dg-options "-fanalyzer" } */
+/* { dg-require-effective-target analyzer } */
+
+typedef unsigned char u8;
+typedef unsigned int u32;
+
+extern unsigned long
+copy_from_user(void* to, const void* from, unsigned long n);
+
+struct fsa_dev_info
+{
+  u8 valid;
+  u8 deleted;
+};
+struct aac_dev
+{
+  int maximum_num_containers;
+  struct fsa_dev_info* fsa_dev;
+};
+struct aac_delete_disk
+{
+  u32 disknum;
+  u32 cnum;
+};
+int
+force_delete_disk(struct aac_dev* dev, void* arg)
+{
+  struct aac_delete_disk dd;
+  struct fsa_dev_info* fsa_dev_ptr;
+  fsa_dev_ptr = dev->fsa_dev;
+  if (!fsa_dev_ptr)
+    return -16;
+  if (copy_from_user(&dd, arg, sizeof(struct aac_delete_disk)))
+    return -14;
+  if (dd.cnum >= dev->maximum_num_containers)
+    return -22;
+  fsa_dev_ptr[dd.cnum].deleted = 1;
+  fsa_dev_ptr[dd.cnum].valid = 0; /* { dg-bogus "use of attacker-controlled value as offset without upper-bounds checking" } */
+  return 0;
+}