[pushed] analyzer: add sarif properties for bounds checking diagnostics

Message ID 20231216212347.3443252-1-dmalcolm@redhat.com
State Unresolved
Headers
Series [pushed] analyzer: add sarif properties for bounds checking diagnostics |

Checks

Context Check Description
snail/gcc-patch-check warning Git am fail log

Commit Message

David Malcolm Dec. 16, 2023, 9:23 p.m. UTC
  As a followup to r14-6057-g12b67d1e13b3cf, add SARIF property bags
for -Wanalyzer-out-of-bounds, to help with debugging these warnings.
This was very helpful with PR analyzer/112792.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Successful run of analyzer integration tests on x86_64-pc-linux-gnu.
Pushed to trunk as r14-6635-g7abc7aae564e63.

gcc/analyzer/ChangeLog:
	* analyzer.cc: Include "tree-pretty-print.h" and
	"diagnostic-event-id.h".
	(tree_to_json): New.
	(diagnostic_event_id_to_json): New.
	(bit_offset_to_json): New.
	(byte_offset_to_json): New.
	* analyzer.h (tree_to_json): New decl.
	(diagnostic_event_id_to_json): New decl.
	(bit_offset_to_json): New decl.
	(byte_offset_to_json): New decl.
	* bounds-checking.cc: Include "diagnostic-format-sarif.h".
	(out_of_bounds::maybe_add_sarif_properties): New.
	(concrete_out_of_bounds::maybe_add_sarif_properties): New.
	(concrete_past_the_end::maybe_add_sarif_properties): New.
	(symbolic_past_the_end::maybe_add_sarif_properties): New.
	* region-model.cc (region_to_value_map::to_json): New.
	(region_model::to_json): New.
	* region-model.h (region_to_value_map::to_json): New decl.
	(region_model::to_json): New decl.
	* store.cc (bit_range::to_json): New.
	(byte_range::to_json): New.
	* store.h (bit_range::to_json): New decl.
	(byte_range::to_json): New decl.

Signed-off-by: David Malcolm <dmalcolm@redhat.com>
---
 gcc/analyzer/analyzer.cc        | 59 +++++++++++++++++++++++++++++++++
 gcc/analyzer/analyzer.h         | 12 +++++++
 gcc/analyzer/bounds-checking.cc | 58 ++++++++++++++++++++++++++++++++
 gcc/analyzer/region-model.cc    | 41 +++++++++++++++++++++++
 gcc/analyzer/region-model.h     |  4 +++
 gcc/analyzer/store.cc           | 30 +++++++++++++++++
 gcc/analyzer/store.h            |  4 +++
 7 files changed, 208 insertions(+)
  

Patch

diff --git a/gcc/analyzer/analyzer.cc b/gcc/analyzer/analyzer.cc
index 9d4bc788f316..c9d725424e2e 100644
--- a/gcc/analyzer/analyzer.cc
+++ b/gcc/analyzer/analyzer.cc
@@ -29,6 +29,8 @@  along with GCC; see the file COPYING3.  If not see
 #include "diagnostic.h"
 #include "intl.h"
 #include "analyzer/analyzer.h"
+#include "tree-pretty-print.h"
+#include "diagnostic-event-id.h"
 
 #if ENABLE_ANALYZER
 
@@ -216,6 +218,63 @@  get_diagnostic_tree_for_gassign (const gassign *assign_stmt)
   return get_diagnostic_tree_for_gassign_1 (assign_stmt, &visited);
 }
 
+/* Generate a JSON value for NODE, which can be NULL_TREE.
+   This is intended for debugging the analyzer rather than serialization and
+   thus is a string (or null, for NULL_TREE).  */
+
+json::value *
+tree_to_json (tree node)
+{
+  if (!node)
+    return new json::literal (json::JSON_NULL);
+
+  pretty_printer pp;
+  dump_generic_node (&pp, node, 0, TDF_VOPS|TDF_MEMSYMS, false);
+  return new json::string (pp_formatted_text (&pp));
+}
+
+/* Generate a JSON value for EVENT_ID.
+   This is intended for debugging the analyzer rather than serialization and
+   thus is a string matching those seen in event messags (or null,
+   for unknown).  */
+
+json::value *
+diagnostic_event_id_to_json (const diagnostic_event_id_t &event_id)
+{
+  if (event_id.known_p ())
+    {
+      pretty_printer pp;
+      pp_printf (&pp, "%@", &event_id);
+      return new json::string (pp_formatted_text (&pp));
+    }
+  else
+    return new json::literal (json::JSON_NULL);
+}
+
+/* Generate a JSON value for OFFSET.
+   This is intended for debugging the analyzer rather than serialization and
+   thus is a string.  */
+
+json::value *
+bit_offset_to_json (const bit_offset_t &offset)
+{
+  pretty_printer pp;
+  pp_wide_int_large (&pp, offset, SIGNED);
+  return new json::string (pp_formatted_text (&pp));
+}
+
+/* Generate a JSON value for OFFSET.
+   This is intended for debugging the analyzer rather than serialization and
+   thus is a string.  */
+
+json::value *
+byte_offset_to_json (const byte_offset_t &offset)
+{
+  pretty_printer pp;
+  pp_wide_int_large (&pp, offset, SIGNED);
+  return new json::string (pp_formatted_text (&pp));
+}
+
 } // namespace ana
 
 /* Helper function for checkers.  Is the CALL to the given function name,
diff --git a/gcc/analyzer/analyzer.h b/gcc/analyzer/analyzer.h
index 3115f878573a..7d3984839560 100644
--- a/gcc/analyzer/analyzer.h
+++ b/gcc/analyzer/analyzer.h
@@ -415,6 +415,18 @@  extern void log_stashed_constants (logger *logger);
 
 extern FILE *get_or_create_any_logfile ();
 
+extern json::value *
+tree_to_json (tree node);
+
+extern json::value *
+diagnostic_event_id_to_json (const diagnostic_event_id_t &);
+
+extern json::value *
+bit_offset_to_json (const bit_offset_t &offset);
+
+extern json::value *
+byte_offset_to_json (const byte_offset_t &offset);
+
 } // namespace ana
 
 extern bool is_special_named_call_p (const gcall *call, const char *funcname,
diff --git a/gcc/analyzer/bounds-checking.cc b/gcc/analyzer/bounds-checking.cc
index 7cbfe91515f8..551d9796f799 100644
--- a/gcc/analyzer/bounds-checking.cc
+++ b/gcc/analyzer/bounds-checking.cc
@@ -31,6 +31,7 @@  along with GCC; see the file COPYING3.  If not see
 #include "gimple-iterator.h"
 #include "diagnostic-core.h"
 #include "diagnostic-diagram.h"
+#include "diagnostic-format-sarif.h"
 #include "analyzer/analyzer.h"
 #include "analyzer/analyzer-logging.h"
 #include "analyzer/region-model.h"
@@ -110,6 +111,23 @@  public:
 							  *this));
   }
 
+  void maybe_add_sarif_properties (sarif_object &result_obj)
+    const override
+  {
+    sarif_property_bag &props = result_obj.get_or_create_properties ();
+#define PROPERTY_PREFIX "gcc/analyzer/out_of_bounds/"
+    props.set_string (PROPERTY_PREFIX "dir",
+		      get_dir () == DIR_READ ? "read" : "write");
+    props.set (PROPERTY_PREFIX "model", m_model.to_json ());
+    props.set (PROPERTY_PREFIX "region", m_reg->to_json ());
+    props.set (PROPERTY_PREFIX "diag_arg", tree_to_json (m_diag_arg));
+    if (m_sval_hint)
+      props.set (PROPERTY_PREFIX "sval_hint", m_sval_hint->to_json ());
+    props.set (PROPERTY_PREFIX "region_creation_event_id",
+	       diagnostic_event_id_to_json (m_region_creation_event_id));
+#undef PROPERTY_PREFIX
+  }
+
   virtual enum access_direction get_dir () const = 0;
 
 protected:
@@ -220,6 +238,21 @@  public:
 	    && m_out_of_bounds_bits == other.m_out_of_bounds_bits);
   }
 
+  void maybe_add_sarif_properties (sarif_object &result_obj)
+    const override
+  {
+    out_of_bounds::maybe_add_sarif_properties (result_obj);
+    sarif_property_bag &props = result_obj.get_or_create_properties ();
+#define PROPERTY_PREFIX "gcc/analyzer/concrete_out_of_bounds/"
+    props.set (PROPERTY_PREFIX "out_of_bounds_bits",
+	       m_out_of_bounds_bits.to_json ());
+    byte_range out_of_bounds_bytes (0, 0);
+    if (get_out_of_bounds_bytes (&out_of_bounds_bytes))
+      props.set (PROPERTY_PREFIX "out_of_bounds_bytes",
+		 out_of_bounds_bytes.to_json ());
+#undef PROPERTY_PREFIX
+  }
+
   bool get_out_of_bounds_bytes (byte_range *out) const
   {
     return m_out_of_bounds_bits.as_byte_range (out);
@@ -271,6 +304,19 @@  public:
 							  *this));
   }
 
+  void maybe_add_sarif_properties (sarif_object &result_obj)
+    const final override
+  {
+    concrete_out_of_bounds::maybe_add_sarif_properties (result_obj);
+    sarif_property_bag &props = result_obj.get_or_create_properties ();
+#define PROPERTY_PREFIX "gcc/analyzer/concrete_past_the_end/"
+    props.set (PROPERTY_PREFIX "bit_bound",
+	       tree_to_json (m_bit_bound));
+    props.set (PROPERTY_PREFIX "byte_bound",
+	       tree_to_json (m_byte_bound));
+#undef PROPERTY_PREFIX
+  }
+
 protected:
   tree m_bit_bound;
   tree m_byte_bound;
@@ -862,6 +908,18 @@  public:
 	    && pending_diagnostic::same_tree_p (m_capacity, other.m_capacity));
   }
 
+  void maybe_add_sarif_properties (sarif_object &result_obj)
+    const final override
+  {
+    out_of_bounds::maybe_add_sarif_properties (result_obj);
+    sarif_property_bag &props = result_obj.get_or_create_properties ();
+#define PROPERTY_PREFIX "gcc/analyzer/symbolic_past_the_end/"
+    props.set (PROPERTY_PREFIX "offset", tree_to_json (m_offset));
+    props.set (PROPERTY_PREFIX "num_bytes", tree_to_json (m_num_bytes));
+    props.set (PROPERTY_PREFIX "capacity", tree_to_json (m_capacity));
+#undef PROPERTY_PREFIX
+  }
+
 protected:
   tree m_offset;
   tree m_num_bytes;
diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc
index 2315751870d3..62da029213d2 100644
--- a/gcc/analyzer/region-model.cc
+++ b/gcc/analyzer/region-model.cc
@@ -211,6 +211,31 @@  region_to_value_map::dump (bool simple) const
   pp_flush (&pp);
 }
 
+/* Generate a JSON value for this region_to_value_map.
+   This is intended for debugging the analyzer rather than
+   serialization.  */
+
+json::object *
+region_to_value_map::to_json () const
+{
+  json::object *map_obj = new json::object ();
+
+  auto_vec<const region *> regs;
+  for (iterator iter = begin (); iter != end (); ++iter)
+    regs.safe_push ((*iter).first);
+  regs.qsort (region::cmp_ptr_ptr);
+
+  unsigned i;
+  const region *reg;
+  FOR_EACH_VEC_ELT (regs, i, reg)
+    {
+      label_text reg_desc = reg->get_desc ();
+      const svalue *sval = *get (reg);
+      map_obj->set (reg_desc.get (), sval->to_json ());
+    }
+
+  return map_obj;
+}
 
 /* Attempt to merge THIS with OTHER, writing the result
    to OUT.
@@ -429,6 +454,22 @@  region_model::debug () const
   dump (true);
 }
 
+/* Generate a JSON value for this region_model.
+   This is intended for debugging the analyzer rather than
+   serialization.  */
+
+json::object *
+region_model::to_json () const
+{
+  json::object *model_obj = new json::object ();
+  model_obj->set ("store", m_store.to_json ());
+  model_obj->set ("constraints", m_constraints->to_json ());
+  if (m_current_frame)
+    model_obj->set ("current_frame", m_current_frame->to_json ());
+  model_obj->set ("dynamic_extents", m_dynamic_extents.to_json ());
+  return model_obj;
+}
+
 /* Assert that this object is valid.  */
 
 void
diff --git a/gcc/analyzer/region-model.h b/gcc/analyzer/region-model.h
index 2e15924fddb8..88f08809a2ee 100644
--- a/gcc/analyzer/region-model.h
+++ b/gcc/analyzer/region-model.h
@@ -175,6 +175,8 @@  public:
   void dump_to_pp (pretty_printer *pp, bool simple, bool multiline) const;
   void dump (bool simple) const;
 
+  json::object *to_json () const;
+
   bool can_merge_with_p (const region_to_value_map &other,
 			 region_to_value_map *out) const;
 
@@ -278,6 +280,8 @@  class region_model
 
   void debug () const;
 
+  json::object *to_json () const;
+
   void validate () const;
 
   void canonicalize ();
diff --git a/gcc/analyzer/store.cc b/gcc/analyzer/store.cc
index 32cb1d968b8a..18dac7a67b0f 100644
--- a/gcc/analyzer/store.cc
+++ b/gcc/analyzer/store.cc
@@ -235,6 +235,21 @@  bit_range::dump () const
   pp_flush (&pp);
 }
 
+/* Generate a JSON value for this bit_range.
+   This is intended for debugging the analyzer rather
+   than serialization.  */
+
+json::object *
+bit_range::to_json () const
+{
+  json::object *obj = new json::object ();
+  obj->set ("start_bit_offset",
+	    bit_offset_to_json (m_start_bit_offset));
+  obj->set ("size_in_bits",
+	    bit_offset_to_json (m_size_in_bits));
+  return obj;
+}
+
 /* If OTHER is a subset of this, return true and, if OUT is
    non-null, write to *OUT the relative range of OTHER within this.
    Otherwise return false.  */
@@ -484,6 +499,21 @@  byte_range::dump () const
   pp_flush (&pp);
 }
 
+/* Generate a JSON value for this byte_range.
+   This is intended for debugging the analyzer rather
+   than serialization.  */
+
+json::object *
+byte_range::to_json () const
+{
+  json::object *obj = new json::object ();
+  obj->set ("start_byte_offset",
+	    byte_offset_to_json (m_start_byte_offset));
+  obj->set ("size_in_bytes",
+	    byte_offset_to_json (m_size_in_bytes));
+  return obj;
+}
+
 /* If OTHER is a subset of this, return true and write
    to *OUT the relative range of OTHER within this.
    Otherwise return false.  */
diff --git a/gcc/analyzer/store.h b/gcc/analyzer/store.h
index da5c8b6ffaec..69dfa426e35a 100644
--- a/gcc/analyzer/store.h
+++ b/gcc/analyzer/store.h
@@ -237,6 +237,8 @@  struct bit_range
   void dump_to_pp (pretty_printer *pp) const;
   void dump () const;
 
+  json::object *to_json () const;
+
   bool empty_p () const
   {
     return m_size_in_bits == 0;
@@ -311,6 +313,8 @@  struct byte_range
   void dump_to_pp (pretty_printer *pp) const;
   void dump () const;
 
+  json::object *to_json () const;
+
   bool empty_p () const
   {
     return m_size_in_bytes == 0;