analyzer: stash values for CPython plugin [PR107646]

Message ID CANGHATU5pzXTNAgpsdua6M8d-YNtH7Jx=K9USKcT994vwmSw7Q@mail.gmail.com
State Not Applicable
Headers
Series analyzer: stash values for CPython plugin [PR107646] |

Checks

Context Check Description
snail/gcc-patch-check fail Git am fail log

Commit Message

Eric Feng Aug. 1, 2023, 1:52 p.m. UTC
  Hi all,

This patch adds a hook to the end of ana::on_finish_translation_unit
which calls relevant stashing-related callbacks registered during plugin
initialization. This feature is used to stash named types and global
variables for a CPython analyzer plugin [PR107646].

Bootstrapped and tested on aarch64-unknown-linux-gnu. Does it look okay?

---

gcc/analyzer/ChangeLog:

        * analyzer-language.cc (run_callbacks): New function.
        (on_finish_translation_unit): New function.
        * analyzer-language.h (GCC_ANALYZER_LANGUAGE_H): New include.
        (class translation_unit): New vfuncs.

gcc/c/ChangeLog:

        * c-parser.cc: New functions.

gcc/testsuite/ChangeLog:

        * gcc.dg/plugin/analyzer_cpython_plugin.c: New test.

Signed-off-by: Eric Feng <ef2648@columbia.edu>
---
 gcc/analyzer/analyzer-language.cc             |  22 ++
 gcc/analyzer/analyzer-language.h              |   9 +
 gcc/c/c-parser.cc                             |  26 ++
 .../gcc.dg/plugin/analyzer_cpython_plugin.c   | 224 ++++++++++++++++++
 4 files changed, 281 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
  

Comments

David Malcolm Aug. 1, 2023, 5:02 p.m. UTC | #1
On Tue, 2023-08-01 at 09:52 -0400, Eric Feng wrote:
> Hi all,
> 
> This patch adds a hook to the end of ana::on_finish_translation_unit
> which calls relevant stashing-related callbacks registered during
> plugin
> initialization. This feature is used to stash named types and global
> variables for a CPython analyzer plugin [PR107646].
> 
> Bootstrapped and tested on aarch64-unknown-linux-gnu. Does it look
> okay?

Hi Eric, thanks for the patch.

The patch touches the C frontend, so those parts would need approval
from the C FE maintainers/reviewers; I've CCed them.

Overall, I like the patch, but it's not ready for trunk yet; various
comments inline below...

> 
> ---
> 
> gcc/analyzer/ChangeLog:

You could add: PR analyzer/107646 to these ChangeLog entries; have a
look at how other ChangeLog entries refer to such bugzilla entries.

> 
>         * analyzer-language.cc (run_callbacks): New function.
>         (on_finish_translation_unit): New function.
>         * analyzer-language.h (GCC_ANALYZER_LANGUAGE_H): New include.
>         (class translation_unit): New vfuncs.
> 
> gcc/c/ChangeLog:
> 
>         * c-parser.cc: New functions.

I think this ChangeLog entry needs more detail.
> 
> gcc/testsuite/ChangeLog:
> 
>         * gcc.dg/plugin/analyzer_cpython_plugin.c: New test.
> 
> Signed-off-by: Eric Feng <ef2648@columbia.edu>
> ---
>  gcc/analyzer/analyzer-language.cc             |  22 ++
>  gcc/analyzer/analyzer-language.h              |   9 +
>  gcc/c/c-parser.cc                             |  26 ++
>  .../gcc.dg/plugin/analyzer_cpython_plugin.c   | 224
> ++++++++++++++++++
>  4 files changed, 281 insertions(+)
>  create mode 100644
> gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
> 
> diff --git a/gcc/analyzer/analyzer-language.cc
> b/gcc/analyzer/analyzer-language.cc
> index 2c8910906ee..fc41b9c17b8 100644
> --- a/gcc/analyzer/analyzer-language.cc
> +++ b/gcc/analyzer/analyzer-language.cc
> @@ -35,6 +35,26 @@ static GTY (()) hash_map <tree, tree>
> *analyzer_stashed_constants;
>  #if ENABLE_ANALYZER
> 
>  namespace ana {
> +static vec<finish_translation_unit_callback>
> +    *finish_translation_unit_callbacks;
> +
> +void
> +register_finish_translation_unit_callback (
> +    finish_translation_unit_callback callback)
> +{
> +  if (!finish_translation_unit_callbacks)
> +    vec_alloc (finish_translation_unit_callbacks, 1);
> +  finish_translation_unit_callbacks->safe_push (callback);
> +}
> +
> +void
> +run_callbacks (logger *logger, const translation_unit &tu)

This function could be "static" since it's not needed outside of
analyzer-language.cc

> +{
> +  for (auto const &cb : finish_translation_unit_callbacks)
> +    {
> +      cb (logger, tu);
> +    }
> +}
> 
>  /* Call into TU to try to find a value for NAME.
>     If found, stash its value within analyzer_stashed_constants.  */
> @@ -102,6 +122,8 @@ on_finish_translation_unit (const
> translation_unit &tu)
>      the_logger.set_logger (new logger (logfile, 0, 0,
>          *global_dc->printer));
>    stash_named_constants (the_logger.get_logger (), tu);
> +
> +  run_callbacks (the_logger.get_logger (), tu);
>  }
> 
>  /* Lookup NAME in the named constants stashed when the frontend TU
> finished.
> diff --git a/gcc/analyzer/analyzer-language.h
> b/gcc/analyzer/analyzer-language.h
> index 00f85aba041..8deea52d627 100644
> --- a/gcc/analyzer/analyzer-language.h
> +++ b/gcc/analyzer/analyzer-language.h
> @@ -21,6 +21,8 @@ along with GCC; see the file COPYING3.  If not see
>  #ifndef GCC_ANALYZER_LANGUAGE_H
>  #define GCC_ANALYZER_LANGUAGE_H
> 
> +#include "analyzer/analyzer-logging.h"
> +
>  #if ENABLE_ANALYZER
> 
>  namespace ana {
> @@ -35,8 +37,15 @@ class translation_unit
>       have been seen).  If it is defined and an integer (e.g. either
> as a
>       macro or enum), return the INTEGER_CST value, otherwise return
> NULL.  */
>    virtual tree lookup_constant_by_id (tree id) const = 0;
> +  virtual tree lookup_type_by_id (tree id) const = 0;
> +  virtual tree lookup_global_var_by_id (tree id) const = 0;
>  };
> 
> +typedef void (*finish_translation_unit_callback)
> +   (logger *, const translation_unit &);
> +void register_finish_translation_unit_callback (
> +    finish_translation_unit_callback callback);
> +
>  /* Analyzer hook for frontends to call at the end of the TU.  */
> 
>  void on_finish_translation_unit (const translation_unit &tu);
> diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc
> index 80920b31f83..f0ee55e416b 100644
> --- a/gcc/c/c-parser.cc
> +++ b/gcc/c/c-parser.cc
> @@ -1695,6 +1695,32 @@ public:
>      return NULL_TREE;
>    }
> 
> +  tree
> +  lookup_type_by_id (tree id) const final override
> +  {
> +    if (tree type_decl = lookup_name (id))
> +      {
> + if (TREE_CODE (type_decl) == TYPE_DECL)
> + {
> + tree record_type = TREE_TYPE (type_decl);
> + if (TREE_CODE (record_type) == RECORD_TYPE)
> + return record_type;
> + }

It looks like something's wrong with the indentation here, but the idea
seems OK to me (but needs C FE reviewer approval).

> +      }
> +
> +    return NULL_TREE;
> +  }
> +
> +  tree
> +  lookup_global_var_by_id (tree id) const final override
> +  {
> +    if (tree var_decl = lookup_name (id))
> +      if (TREE_CODE (var_decl) == VAR_DECL)
> + return var_decl;

Likewise, is this indentation correct?

> +
> +    return NULL_TREE;
> +  }
> +
>  private:
>    /* Attempt to get an INTEGER_CST from MACRO.
>       Only handle the simplest cases: where MACRO's definition is a
> single
> diff --git a/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
> b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
> new file mode 100644
> index 00000000000..285da102edb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
> @@ -0,0 +1,224 @@
> +/* -fanalyzer plugin for CPython extension modules  */
> +/* { dg-options "-g" } */
> +
> +#define INCLUDE_MEMORY
> +#include "gcc-plugin.h"
> +#include "config.h"
> +#include "system.h"
> +#include "coretypes.h"
> +#include "tree.h"
> +#include "function.h"
> +#include "basic-block.h"
> +#include "gimple.h"
> +#include "gimple-iterator.h"
> +#include "diagnostic-core.h"
> +#include "graphviz.h"
> +#include "options.h"
> +#include "cgraph.h"
> +#include "tree-dfa.h"
> +#include "stringpool.h"
> +#include "convert.h"
> +#include "target.h"
> +#include "fold-const.h"
> +#include "tree-pretty-print.h"
> +#include "diagnostic-color.h"
> +#include "diagnostic-metadata.h"
> +#include "tristate.h"
> +#include "bitmap.h"
> +#include "selftest.h"
> +#include "function.h"
> +#include "json.h"
> +#include "analyzer/analyzer.h"
> +#include "analyzer/analyzer-language.h"
> +#include "analyzer/analyzer-logging.h"
> +#include "ordered-hash-map.h"
> +#include "options.h"
> +#include "cgraph.h"
> +#include "cfg.h"
> +#include "digraph.h"
> +#include "analyzer/supergraph.h"
> +#include "sbitmap.h"
> +#include "analyzer/call-string.h"
> +#include "analyzer/program-point.h"
> +#include "analyzer/store.h"
> +#include "analyzer/region-model.h"
> +#include "analyzer/call-details.h"
> +#include "analyzer/call-info.h"
> +#include "make-unique.h"
> +
> +int plugin_is_GPL_compatible;
> +
> +#if ENABLE_ANALYZER
> +static GTY (()) hash_map<tree, tree> *analyzer_stashed_types;
> +static GTY (()) hash_map<tree, tree> *analyzer_stashed_globals;
> +
> +namespace ana
> +{
> +static tree pyobj_record = NULL_TREE;
> +static tree varobj_record = NULL_TREE;
> +static tree pylistobj_record = NULL_TREE;
> +static tree pylongobj_record = NULL_TREE;
> +static tree pylongtype_vardecl = NULL_TREE;
> +static tree pylisttype_vardecl = NULL_TREE;
> +
> +tree
> +get_field_by_name (tree type, const char *name)
> +{
> +  for (tree field = TYPE_FIELDS (type); field; field = TREE_CHAIN
> (field))
> +    {
> +      if (TREE_CODE (field) == FIELD_DECL)
> +        {
> +          const char *field_name = IDENTIFIER_POINTER (DECL_NAME
> (field));
> +          if (strcmp (field_name, name) == 0)
> +            return field;
> +        }
> +    }
> +  return NULL_TREE;
> +}
> +
> +static void
> +maybe_stash_named_type (logger *logger, const translation_unit &tu,
> +                        const char *name)
> +{
> +  LOG_FUNC_1 (logger, "name: %qs", name);
> +  if (!analyzer_stashed_types)
> +    analyzer_stashed_types = hash_map<tree, tree>::create_ggc ();
> +
> +  tree id = get_identifier (name);
> +  if (tree t = tu.lookup_type_by_id (id))
> +    {
> +      gcc_assert (TREE_CODE (t) == RECORD_TYPE);
> +      analyzer_stashed_types->put (id, t);
> +      if (logger)
> +        logger->log ("found %qs: %qE", name, t);
> +    }
> +  else
> +    {
> +      if (logger)
> +        logger->log ("%qs: not found", name);
> +    }
> +}
> +
> +static void
> +maybe_stash_global_var (logger *logger, const translation_unit &tu,
> +                        const char *name)
> +{
> +  LOG_FUNC_1 (logger, "name: %qs", name);
> +  if (!analyzer_stashed_globals)
> +    analyzer_stashed_globals = hash_map<tree, tree>::create_ggc ();
> +
> +  tree id = get_identifier (name);
> +  if (tree t = tu.lookup_global_var_by_id (id))
> +    {
> +      gcc_assert (TREE_CODE (t) == VAR_DECL);
> +      analyzer_stashed_globals->put (id, t);
> +      if (logger)
> +        logger->log ("found %qs: %qE", name, t);
> +    }
> +  else
> +    {
> +      if (logger)
> +        logger->log ("%qs: not found", name);
> +    }
> +}
> +
> +static void
> +stash_named_types (logger *logger, const translation_unit &tu)
> +{
> +  LOG_SCOPE (logger);
> +
> +  maybe_stash_named_type (logger, tu, "PyObject");
> +  maybe_stash_named_type (logger, tu, "PyListObject");
> +  maybe_stash_named_type (logger, tu, "PyVarObject");
> +  maybe_stash_named_type (logger, tu, "PyLongObject");
> +}
> +
> +static void
> +stash_global_vars (logger *logger, const translation_unit &tu)
> +{
> +  LOG_SCOPE (logger);
> +
> +  maybe_stash_global_var (logger, tu, "PyLong_Type");
> +  maybe_stash_global_var (logger, tu, "PyList_Type");
> +}
> +
> +tree
> +get_stashed_type_by_name (const char *name)
> +{
> +  if (!analyzer_stashed_types)
> +    return NULL_TREE;
> +  tree id = get_identifier (name);
> +  if (tree *slot = analyzer_stashed_types->get (id))
> +    {
> +      gcc_assert (TREE_CODE (*slot) == RECORD_TYPE);
> +      return *slot;
> +    }
> +  return NULL_TREE;
> +}
> +
> +tree
> +get_stashed_global_var_by_name (const char *name)
> +{
> +  if (!analyzer_stashed_globals)
> +    return NULL_TREE;
> +  tree id = get_identifier (name);
> +  if (tree *slot = analyzer_stashed_globals->get (id))
> +    {
> +      gcc_assert (TREE_CODE (*slot) == VAR_DECL);
> +      return *slot;
> +    }
> +  return NULL_TREE;
> +}
> +
> +static void
> +init_py_structs ()
> +{
> +  if (pyobj_record == NULL_TREE)
> +      pyobj_record = get_stashed_type_by_name ("PyObject");

This function is called once as the plugin starts up, so I don't think
we need the various comparisons against NULL_TREE here.  Or am I
missing something?

> +  if (varobj_record == NULL_TREE)
> +    varobj_record = get_stashed_type_by_name ("PyVarObject");
> +  if (pylistobj_record == NULL_TREE)
> +    pylistobj_record = get_stashed_type_by_name ("PyListObject");
> +  if (pylongobj_record == NULL_TREE)
> +    pylongobj_record = get_stashed_type_by_name ("PyLongObject");
> +  if (pylongtype_vardecl == NULL_TREE)
> +    pylongtype_vardecl = get_stashed_global_var_by_name
> ("PyLong_Type");
> +  if (pylisttype_vardecl == NULL_TREE)
> +    pylisttype_vardecl = get_stashed_global_var_by_name
> ("PyList_Type");
> +}
> +
> +static void
> +cpython_analyzer_init_cb (void *gcc_data, void * /*user_data */)
> +{
> +  ana::plugin_analyzer_init_iface *iface
> +      = (ana::plugin_analyzer_init_iface *)gcc_data;
> +  LOG_SCOPE (iface->get_logger ());
> +  if (1)
> +    inform (input_location, "got here: cpython_analyzer_init_cb");

We don't want that debugging "inform", or, at least, not on by default.

> +
> +  init_py_structs ();
> +  if (pyobj_record == NULL_TREE)
> +    return;

This conditional doesn't make much sense as-is, but presumably you're
going to use it to eventually conditionalize the rest of the plugin
startup, right?

> +}
> +} // namespace ana
> +
> +#endif /* #if ENABLE_ANALYZER */
> +
> +int
> +plugin_init (struct plugin_name_args *plugin_info,
> +             struct plugin_gcc_version *version)
> +{
> +#if ENABLE_ANALYZER
> +  const char *plugin_name = plugin_info->base_name;
> +  if (1)
> +    inform (input_location, "got here; %qs", plugin_name);

Another debugging "inform" that we don't want on by default when
running the tests.

> +  ana::register_finish_translation_unit_callback
> (&stash_named_types);
> +  ana::register_finish_translation_unit_callback
> (&stash_global_vars);
> +  register_callback (plugin_info->base_name, PLUGIN_ANALYZER_INIT,
> +                     ana::cpython_analyzer_init_cb,
> +                     NULL); /* void *user_data */
> +#else
> +  sorry_no_analyzer ();
> +#endif
> +  return 0;
> +}
> \ No newline at end of file

The patch doesn't touch plugin.exp, so this plugin won't actually get
compiled yet as part of the testsuite; I think we need a change to
plugin.exp.  You could add a minimal nonempty source file to be
compiled with the plugin (to test the "gracefully handling non-CPython
code" case).

Hope this is constructive; sorry if it comes across as nitpicky in
places (patch review tends to)

Dave
  
Eric Feng Aug. 2, 2023, 4:27 p.m. UTC | #2
Hi Dave,

Thank you for the feedback! I've incorporated the changes and sent a
revised version of the patch.

On Tue, Aug 1, 2023 at 1:02 PM David Malcolm <dmalcolm@redhat.com> wrote:
>
> On Tue, 2023-08-01 at 09:52 -0400, Eric Feng wrote:
> > Hi all,
> >
> > This patch adds a hook to the end of ana::on_finish_translation_unit
> > which calls relevant stashing-related callbacks registered during
> > plugin
> > initialization. This feature is used to stash named types and global
> > variables for a CPython analyzer plugin [PR107646].
> >
> > Bootstrapped and tested on aarch64-unknown-linux-gnu. Does it look
> > okay?
>
> Hi Eric, thanks for the patch.
>
> The patch touches the C frontend, so those parts would need approval
> from the C FE maintainers/reviewers; I've CCed them.
>
> Overall, I like the patch, but it's not ready for trunk yet; various
> comments inline below...
>
> >
> > ---
> >
> > gcc/analyzer/ChangeLog:
>
> You could add: PR analyzer/107646 to these ChangeLog entries; have a
> look at how other ChangeLog entries refer to such bugzilla entries.
>
> >
> >         * analyzer-language.cc (run_callbacks): New function.
> >         (on_finish_translation_unit): New function.
> >         * analyzer-language.h (GCC_ANALYZER_LANGUAGE_H): New include.
> >         (class translation_unit): New vfuncs.
> >
> > gcc/c/ChangeLog:
> >
> >         * c-parser.cc: New functions.
>
> I think this ChangeLog entry needs more detail.
Added in revised version of the patch.
> >
> > gcc/testsuite/ChangeLog:
> >
> >         * gcc.dg/plugin/analyzer_cpython_plugin.c: New test.
> >
> > Signed-off-by: Eric Feng <ef2648@columbia.edu>
> > ---
> >  gcc/analyzer/analyzer-language.cc             |  22 ++
> >  gcc/analyzer/analyzer-language.h              |   9 +
> >  gcc/c/c-parser.cc                             |  26 ++
> >  .../gcc.dg/plugin/analyzer_cpython_plugin.c   | 224
> > ++++++++++++++++++
> >  4 files changed, 281 insertions(+)
> >  create mode 100644
> > gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
> >
> > diff --git a/gcc/analyzer/analyzer-language.cc
> > b/gcc/analyzer/analyzer-language.cc
> > index 2c8910906ee..fc41b9c17b8 100644
> > --- a/gcc/analyzer/analyzer-language.cc
> > +++ b/gcc/analyzer/analyzer-language.cc
> > @@ -35,6 +35,26 @@ static GTY (()) hash_map <tree, tree>
> > *analyzer_stashed_constants;
> >  #if ENABLE_ANALYZER
> >
> >  namespace ana {
> > +static vec<finish_translation_unit_callback>
> > +    *finish_translation_unit_callbacks;
> > +
> > +void
> > +register_finish_translation_unit_callback (
> > +    finish_translation_unit_callback callback)
> > +{
> > +  if (!finish_translation_unit_callbacks)
> > +    vec_alloc (finish_translation_unit_callbacks, 1);
> > +  finish_translation_unit_callbacks->safe_push (callback);
> > +}
> > +
> > +void
> > +run_callbacks (logger *logger, const translation_unit &tu)
>
> This function could be "static" since it's not needed outside of
> analyzer-language.cc
>
> > +{
> > +  for (auto const &cb : finish_translation_unit_callbacks)
> > +    {
> > +      cb (logger, tu);
> > +    }
> > +}
> >
> >  /* Call into TU to try to find a value for NAME.
> >     If found, stash its value within analyzer_stashed_constants.  */
> > @@ -102,6 +122,8 @@ on_finish_translation_unit (const
> > translation_unit &tu)
> >      the_logger.set_logger (new logger (logfile, 0, 0,
> >          *global_dc->printer));
> >    stash_named_constants (the_logger.get_logger (), tu);
> > +
> > +  run_callbacks (the_logger.get_logger (), tu);
> >  }
> >
> >  /* Lookup NAME in the named constants stashed when the frontend TU
> > finished.
> > diff --git a/gcc/analyzer/analyzer-language.h
> > b/gcc/analyzer/analyzer-language.h
> > index 00f85aba041..8deea52d627 100644
> > --- a/gcc/analyzer/analyzer-language.h
> > +++ b/gcc/analyzer/analyzer-language.h
> > @@ -21,6 +21,8 @@ along with GCC; see the file COPYING3.  If not see
> >  #ifndef GCC_ANALYZER_LANGUAGE_H
> >  #define GCC_ANALYZER_LANGUAGE_H
> >
> > +#include "analyzer/analyzer-logging.h"
> > +
> >  #if ENABLE_ANALYZER
> >
> >  namespace ana {
> > @@ -35,8 +37,15 @@ class translation_unit
> >       have been seen).  If it is defined and an integer (e.g. either
> > as a
> >       macro or enum), return the INTEGER_CST value, otherwise return
> > NULL.  */
> >    virtual tree lookup_constant_by_id (tree id) const = 0;
> > +  virtual tree lookup_type_by_id (tree id) const = 0;
> > +  virtual tree lookup_global_var_by_id (tree id) const = 0;
> >  };
> >
> > +typedef void (*finish_translation_unit_callback)
> > +   (logger *, const translation_unit &);
> > +void register_finish_translation_unit_callback (
> > +    finish_translation_unit_callback callback);
> > +
> >  /* Analyzer hook for frontends to call at the end of the TU.  */
> >
> >  void on_finish_translation_unit (const translation_unit &tu);
> > diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc
> > index 80920b31f83..f0ee55e416b 100644
> > --- a/gcc/c/c-parser.cc
> > +++ b/gcc/c/c-parser.cc
> > @@ -1695,6 +1695,32 @@ public:
> >      return NULL_TREE;
> >    }
> >
> > +  tree
> > +  lookup_type_by_id (tree id) const final override
> > +  {
> > +    if (tree type_decl = lookup_name (id))
> > +      {
> > + if (TREE_CODE (type_decl) == TYPE_DECL)
> > + {
> > + tree record_type = TREE_TYPE (type_decl);
> > + if (TREE_CODE (record_type) == RECORD_TYPE)
> > + return record_type;
> > + }
>
> It looks like something's wrong with the indentation here, but the idea
> seems OK to me (but needs C FE reviewer approval).
>
> > +      }
> > +
> > +    return NULL_TREE;
> > +  }
> > +
> > +  tree
> > +  lookup_global_var_by_id (tree id) const final override
> > +  {
> > +    if (tree var_decl = lookup_name (id))
> > +      if (TREE_CODE (var_decl) == VAR_DECL)
> > + return var_decl;
>
> Likewise, is this indentation correct?
Sorry about that; looks like my formatter messed this up and neither I
nor check_GNU_style noticed! Should be fixed in the revised patch.
>
> > +
> > +    return NULL_TREE;
> > +  }
> > +
> >  private:
> >    /* Attempt to get an INTEGER_CST from MACRO.
> >       Only handle the simplest cases: where MACRO's definition is a
> > single
> > diff --git a/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
> > b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
> > new file mode 100644
> > index 00000000000..285da102edb
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
> > @@ -0,0 +1,224 @@
> > +/* -fanalyzer plugin for CPython extension modules  */
> > +/* { dg-options "-g" } */
> > +
> > +#define INCLUDE_MEMORY
> > +#include "gcc-plugin.h"
> > +#include "config.h"
> > +#include "system.h"
> > +#include "coretypes.h"
> > +#include "tree.h"
> > +#include "function.h"
> > +#include "basic-block.h"
> > +#include "gimple.h"
> > +#include "gimple-iterator.h"
> > +#include "diagnostic-core.h"
> > +#include "graphviz.h"
> > +#include "options.h"
> > +#include "cgraph.h"
> > +#include "tree-dfa.h"
> > +#include "stringpool.h"
> > +#include "convert.h"
> > +#include "target.h"
> > +#include "fold-const.h"
> > +#include "tree-pretty-print.h"
> > +#include "diagnostic-color.h"
> > +#include "diagnostic-metadata.h"
> > +#include "tristate.h"
> > +#include "bitmap.h"
> > +#include "selftest.h"
> > +#include "function.h"
> > +#include "json.h"
> > +#include "analyzer/analyzer.h"
> > +#include "analyzer/analyzer-language.h"
> > +#include "analyzer/analyzer-logging.h"
> > +#include "ordered-hash-map.h"
> > +#include "options.h"
> > +#include "cgraph.h"
> > +#include "cfg.h"
> > +#include "digraph.h"
> > +#include "analyzer/supergraph.h"
> > +#include "sbitmap.h"
> > +#include "analyzer/call-string.h"
> > +#include "analyzer/program-point.h"
> > +#include "analyzer/store.h"
> > +#include "analyzer/region-model.h"
> > +#include "analyzer/call-details.h"
> > +#include "analyzer/call-info.h"
> > +#include "make-unique.h"
> > +
> > +int plugin_is_GPL_compatible;
> > +
> > +#if ENABLE_ANALYZER
> > +static GTY (()) hash_map<tree, tree> *analyzer_stashed_types;
> > +static GTY (()) hash_map<tree, tree> *analyzer_stashed_globals;
> > +
> > +namespace ana
> > +{
> > +static tree pyobj_record = NULL_TREE;
> > +static tree varobj_record = NULL_TREE;
> > +static tree pylistobj_record = NULL_TREE;
> > +static tree pylongobj_record = NULL_TREE;
> > +static tree pylongtype_vardecl = NULL_TREE;
> > +static tree pylisttype_vardecl = NULL_TREE;
> > +
> > +tree
> > +get_field_by_name (tree type, const char *name)
> > +{
> > +  for (tree field = TYPE_FIELDS (type); field; field = TREE_CHAIN
> > (field))
> > +    {
> > +      if (TREE_CODE (field) == FIELD_DECL)
> > +        {
> > +          const char *field_name = IDENTIFIER_POINTER (DECL_NAME
> > (field));
> > +          if (strcmp (field_name, name) == 0)
> > +            return field;
> > +        }
> > +    }
> > +  return NULL_TREE;
> > +}
> > +
> > +static void
> > +maybe_stash_named_type (logger *logger, const translation_unit &tu,
> > +                        const char *name)
> > +{
> > +  LOG_FUNC_1 (logger, "name: %qs", name);
> > +  if (!analyzer_stashed_types)
> > +    analyzer_stashed_types = hash_map<tree, tree>::create_ggc ();
> > +
> > +  tree id = get_identifier (name);
> > +  if (tree t = tu.lookup_type_by_id (id))
> > +    {
> > +      gcc_assert (TREE_CODE (t) == RECORD_TYPE);
> > +      analyzer_stashed_types->put (id, t);
> > +      if (logger)
> > +        logger->log ("found %qs: %qE", name, t);
> > +    }
> > +  else
> > +    {
> > +      if (logger)
> > +        logger->log ("%qs: not found", name);
> > +    }
> > +}
> > +
> > +static void
> > +maybe_stash_global_var (logger *logger, const translation_unit &tu,
> > +                        const char *name)
> > +{
> > +  LOG_FUNC_1 (logger, "name: %qs", name);
> > +  if (!analyzer_stashed_globals)
> > +    analyzer_stashed_globals = hash_map<tree, tree>::create_ggc ();
> > +
> > +  tree id = get_identifier (name);
> > +  if (tree t = tu.lookup_global_var_by_id (id))
> > +    {
> > +      gcc_assert (TREE_CODE (t) == VAR_DECL);
> > +      analyzer_stashed_globals->put (id, t);
> > +      if (logger)
> > +        logger->log ("found %qs: %qE", name, t);
> > +    }
> > +  else
> > +    {
> > +      if (logger)
> > +        logger->log ("%qs: not found", name);
> > +    }
> > +}
> > +
> > +static void
> > +stash_named_types (logger *logger, const translation_unit &tu)
> > +{
> > +  LOG_SCOPE (logger);
> > +
> > +  maybe_stash_named_type (logger, tu, "PyObject");
> > +  maybe_stash_named_type (logger, tu, "PyListObject");
> > +  maybe_stash_named_type (logger, tu, "PyVarObject");
> > +  maybe_stash_named_type (logger, tu, "PyLongObject");
> > +}
> > +
> > +static void
> > +stash_global_vars (logger *logger, const translation_unit &tu)
> > +{
> > +  LOG_SCOPE (logger);
> > +
> > +  maybe_stash_global_var (logger, tu, "PyLong_Type");
> > +  maybe_stash_global_var (logger, tu, "PyList_Type");
> > +}
> > +
> > +tree
> > +get_stashed_type_by_name (const char *name)
> > +{
> > +  if (!analyzer_stashed_types)
> > +    return NULL_TREE;
> > +  tree id = get_identifier (name);
> > +  if (tree *slot = analyzer_stashed_types->get (id))
> > +    {
> > +      gcc_assert (TREE_CODE (*slot) == RECORD_TYPE);
> > +      return *slot;
> > +    }
> > +  return NULL_TREE;
> > +}
> > +
> > +tree
> > +get_stashed_global_var_by_name (const char *name)
> > +{
> > +  if (!analyzer_stashed_globals)
> > +    return NULL_TREE;
> > +  tree id = get_identifier (name);
> > +  if (tree *slot = analyzer_stashed_globals->get (id))
> > +    {
> > +      gcc_assert (TREE_CODE (*slot) == VAR_DECL);
> > +      return *slot;
> > +    }
> > +  return NULL_TREE;
> > +}
> > +
> > +static void
> > +init_py_structs ()
> > +{
> > +  if (pyobj_record == NULL_TREE)
> > +      pyobj_record = get_stashed_type_by_name ("PyObject");
>
> This function is called once as the plugin starts up, so I don't think
> we need the various comparisons against NULL_TREE here.  Or am I
> missing something?
No, you are right — thank you for mentioning that!
>
> > +  if (varobj_record == NULL_TREE)
> > +    varobj_record = get_stashed_type_by_name ("PyVarObject");
> > +  if (pylistobj_record == NULL_TREE)
> > +    pylistobj_record = get_stashed_type_by_name ("PyListObject");
> > +  if (pylongobj_record == NULL_TREE)
> > +    pylongobj_record = get_stashed_type_by_name ("PyLongObject");
> > +  if (pylongtype_vardecl == NULL_TREE)
> > +    pylongtype_vardecl = get_stashed_global_var_by_name
> > ("PyLong_Type");
> > +  if (pylisttype_vardecl == NULL_TREE)
> > +    pylisttype_vardecl = get_stashed_global_var_by_name
> > ("PyList_Type");
> > +}
> > +
> > +static void
> > +cpython_analyzer_init_cb (void *gcc_data, void * /*user_data */)
> > +{
> > +  ana::plugin_analyzer_init_iface *iface
> > +      = (ana::plugin_analyzer_init_iface *)gcc_data;
> > +  LOG_SCOPE (iface->get_logger ());
> > +  if (1)
> > +    inform (input_location, "got here: cpython_analyzer_init_cb");
>
> We don't want that debugging "inform", or, at least, not on by default.
Fixed.
>
> > +
> > +  init_py_structs ();
> > +  if (pyobj_record == NULL_TREE)
> > +    return;
>
> This conditional doesn't make much sense as-is, but presumably you're
> going to use it to eventually conditionalize the rest of the plugin
> startup, right?
Yes, that's correct.
>
> > +}
> > +} // namespace ana
> > +
> > +#endif /* #if ENABLE_ANALYZER */
> > +
> > +int
> > +plugin_init (struct plugin_name_args *plugin_info,
> > +             struct plugin_gcc_version *version)
> > +{
> > +#if ENABLE_ANALYZER
> > +  const char *plugin_name = plugin_info->base_name;
> > +  if (1)
> > +    inform (input_location, "got here; %qs", plugin_name);
>
> Another debugging "inform" that we don't want on by default when
> running the tests.
>
> > +  ana::register_finish_translation_unit_callback
> > (&stash_named_types);
> > +  ana::register_finish_translation_unit_callback
> > (&stash_global_vars);
> > +  register_callback (plugin_info->base_name, PLUGIN_ANALYZER_INIT,
> > +                     ana::cpython_analyzer_init_cb,
> > +                     NULL); /* void *user_data */
> > +#else
> > +  sorry_no_analyzer ();
> > +#endif
> > +  return 0;
> > +}
> > \ No newline at end of file
>
> The patch doesn't touch plugin.exp, so this plugin won't actually get
> compiled yet as part of the testsuite; I think we need a change to
> plugin.exp.  You could add a minimal nonempty source file to be
> compiled with the plugin (to test the "gracefully handling non-CPython
> code" case).
Changes made in the revised patch. I have also since added a "sorry"
diagnostic to be emitted in the case of handling non-CPython code, as
I believe it may be helpful for users. When Python/C API definitions
are not found, the user will receive the following message: 'Python/C
API' definitions not found. Please ensure to '#include <Python.h>'.
Exiting. Let me know if you disagree with this addition.
>
> Hope this is constructive; sorry if it comes across as nitpicky in
> places (patch review tends to)
Not at all! Thanks for the feedback. Let me know what you think of the
revised patch.
>
> Dave
>

Best,
Eric
  

Patch

diff --git a/gcc/analyzer/analyzer-language.cc
b/gcc/analyzer/analyzer-language.cc
index 2c8910906ee..fc41b9c17b8 100644
--- a/gcc/analyzer/analyzer-language.cc
+++ b/gcc/analyzer/analyzer-language.cc
@@ -35,6 +35,26 @@  static GTY (()) hash_map <tree, tree>
*analyzer_stashed_constants;
 #if ENABLE_ANALYZER

 namespace ana {
+static vec<finish_translation_unit_callback>
+    *finish_translation_unit_callbacks;
+
+void
+register_finish_translation_unit_callback (
+    finish_translation_unit_callback callback)
+{
+  if (!finish_translation_unit_callbacks)
+    vec_alloc (finish_translation_unit_callbacks, 1);
+  finish_translation_unit_callbacks->safe_push (callback);
+}
+
+void
+run_callbacks (logger *logger, const translation_unit &tu)
+{
+  for (auto const &cb : finish_translation_unit_callbacks)
+    {
+      cb (logger, tu);
+    }
+}

 /* Call into TU to try to find a value for NAME.
    If found, stash its value within analyzer_stashed_constants.  */
@@ -102,6 +122,8 @@  on_finish_translation_unit (const translation_unit &tu)
     the_logger.set_logger (new logger (logfile, 0, 0,
         *global_dc->printer));
   stash_named_constants (the_logger.get_logger (), tu);
+
+  run_callbacks (the_logger.get_logger (), tu);
 }

 /* Lookup NAME in the named constants stashed when the frontend TU finished.
diff --git a/gcc/analyzer/analyzer-language.h b/gcc/analyzer/analyzer-language.h
index 00f85aba041..8deea52d627 100644
--- a/gcc/analyzer/analyzer-language.h
+++ b/gcc/analyzer/analyzer-language.h
@@ -21,6 +21,8 @@  along with GCC; see the file COPYING3.  If not see
 #ifndef GCC_ANALYZER_LANGUAGE_H
 #define GCC_ANALYZER_LANGUAGE_H

+#include "analyzer/analyzer-logging.h"
+
 #if ENABLE_ANALYZER

 namespace ana {
@@ -35,8 +37,15 @@  class translation_unit
      have been seen).  If it is defined and an integer (e.g. either as a
      macro or enum), return the INTEGER_CST value, otherwise return NULL.  */
   virtual tree lookup_constant_by_id (tree id) const = 0;
+  virtual tree lookup_type_by_id (tree id) const = 0;
+  virtual tree lookup_global_var_by_id (tree id) const = 0;
 };

+typedef void (*finish_translation_unit_callback)
+   (logger *, const translation_unit &);
+void register_finish_translation_unit_callback (
+    finish_translation_unit_callback callback);
+
 /* Analyzer hook for frontends to call at the end of the TU.  */

 void on_finish_translation_unit (const translation_unit &tu);
diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc
index 80920b31f83..f0ee55e416b 100644
--- a/gcc/c/c-parser.cc
+++ b/gcc/c/c-parser.cc
@@ -1695,6 +1695,32 @@  public:
     return NULL_TREE;
   }

+  tree
+  lookup_type_by_id (tree id) const final override
+  {
+    if (tree type_decl = lookup_name (id))
+      {
+ if (TREE_CODE (type_decl) == TYPE_DECL)
+ {
+ tree record_type = TREE_TYPE (type_decl);
+ if (TREE_CODE (record_type) == RECORD_TYPE)
+ return record_type;
+ }
+      }
+
+    return NULL_TREE;
+  }
+
+  tree
+  lookup_global_var_by_id (tree id) const final override
+  {
+    if (tree var_decl = lookup_name (id))
+      if (TREE_CODE (var_decl) == VAR_DECL)
+ return var_decl;
+
+    return NULL_TREE;
+  }
+
 private:
   /* Attempt to get an INTEGER_CST from MACRO.
      Only handle the simplest cases: where MACRO's definition is a single
diff --git a/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
new file mode 100644
index 00000000000..285da102edb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
@@ -0,0 +1,224 @@ 
+/* -fanalyzer plugin for CPython extension modules  */
+/* { dg-options "-g" } */
+
+#define INCLUDE_MEMORY
+#include "gcc-plugin.h"
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tree.h"
+#include "function.h"
+#include "basic-block.h"
+#include "gimple.h"
+#include "gimple-iterator.h"
+#include "diagnostic-core.h"
+#include "graphviz.h"
+#include "options.h"
+#include "cgraph.h"
+#include "tree-dfa.h"
+#include "stringpool.h"
+#include "convert.h"
+#include "target.h"
+#include "fold-const.h"
+#include "tree-pretty-print.h"
+#include "diagnostic-color.h"
+#include "diagnostic-metadata.h"
+#include "tristate.h"
+#include "bitmap.h"
+#include "selftest.h"
+#include "function.h"
+#include "json.h"
+#include "analyzer/analyzer.h"
+#include "analyzer/analyzer-language.h"
+#include "analyzer/analyzer-logging.h"
+#include "ordered-hash-map.h"
+#include "options.h"
+#include "cgraph.h"
+#include "cfg.h"
+#include "digraph.h"
+#include "analyzer/supergraph.h"
+#include "sbitmap.h"
+#include "analyzer/call-string.h"
+#include "analyzer/program-point.h"
+#include "analyzer/store.h"
+#include "analyzer/region-model.h"
+#include "analyzer/call-details.h"
+#include "analyzer/call-info.h"
+#include "make-unique.h"
+
+int plugin_is_GPL_compatible;
+
+#if ENABLE_ANALYZER
+static GTY (()) hash_map<tree, tree> *analyzer_stashed_types;
+static GTY (()) hash_map<tree, tree> *analyzer_stashed_globals;
+
+namespace ana
+{
+static tree pyobj_record = NULL_TREE;
+static tree varobj_record = NULL_TREE;
+static tree pylistobj_record = NULL_TREE;
+static tree pylongobj_record = NULL_TREE;
+static tree pylongtype_vardecl = NULL_TREE;
+static tree pylisttype_vardecl = NULL_TREE;
+
+tree
+get_field_by_name (tree type, const char *name)
+{
+  for (tree field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
+    {
+      if (TREE_CODE (field) == FIELD_DECL)
+        {
+          const char *field_name = IDENTIFIER_POINTER (DECL_NAME (field));
+          if (strcmp (field_name, name) == 0)
+            return field;
+        }
+    }
+  return NULL_TREE;
+}
+
+static void
+maybe_stash_named_type (logger *logger, const translation_unit &tu,
+                        const char *name)
+{
+  LOG_FUNC_1 (logger, "name: %qs", name);
+  if (!analyzer_stashed_types)
+    analyzer_stashed_types = hash_map<tree, tree>::create_ggc ();
+
+  tree id = get_identifier (name);
+  if (tree t = tu.lookup_type_by_id (id))
+    {
+      gcc_assert (TREE_CODE (t) == RECORD_TYPE);
+      analyzer_stashed_types->put (id, t);
+      if (logger)
+        logger->log ("found %qs: %qE", name, t);
+    }
+  else
+    {
+      if (logger)
+        logger->log ("%qs: not found", name);
+    }
+}
+
+static void
+maybe_stash_global_var (logger *logger, const translation_unit &tu,
+                        const char *name)
+{
+  LOG_FUNC_1 (logger, "name: %qs", name);
+  if (!analyzer_stashed_globals)
+    analyzer_stashed_globals = hash_map<tree, tree>::create_ggc ();
+
+  tree id = get_identifier (name);
+  if (tree t = tu.lookup_global_var_by_id (id))
+    {
+      gcc_assert (TREE_CODE (t) == VAR_DECL);
+      analyzer_stashed_globals->put (id, t);
+      if (logger)
+        logger->log ("found %qs: %qE", name, t);
+    }
+  else
+    {
+      if (logger)
+        logger->log ("%qs: not found", name);
+    }
+}
+
+static void
+stash_named_types (logger *logger, const translation_unit &tu)
+{
+  LOG_SCOPE (logger);
+
+  maybe_stash_named_type (logger, tu, "PyObject");
+  maybe_stash_named_type (logger, tu, "PyListObject");
+  maybe_stash_named_type (logger, tu, "PyVarObject");
+  maybe_stash_named_type (logger, tu, "PyLongObject");
+}
+
+static void
+stash_global_vars (logger *logger, const translation_unit &tu)
+{
+  LOG_SCOPE (logger);
+
+  maybe_stash_global_var (logger, tu, "PyLong_Type");
+  maybe_stash_global_var (logger, tu, "PyList_Type");
+}
+
+tree
+get_stashed_type_by_name (const char *name)
+{
+  if (!analyzer_stashed_types)
+    return NULL_TREE;
+  tree id = get_identifier (name);
+  if (tree *slot = analyzer_stashed_types->get (id))
+    {
+      gcc_assert (TREE_CODE (*slot) == RECORD_TYPE);
+      return *slot;
+    }
+  return NULL_TREE;
+}
+
+tree
+get_stashed_global_var_by_name (const char *name)
+{
+  if (!analyzer_stashed_globals)
+    return NULL_TREE;
+  tree id = get_identifier (name);
+  if (tree *slot = analyzer_stashed_globals->get (id))
+    {
+      gcc_assert (TREE_CODE (*slot) == VAR_DECL);
+      return *slot;
+    }
+  return NULL_TREE;
+}
+
+static void
+init_py_structs ()
+{
+  if (pyobj_record == NULL_TREE)
+      pyobj_record = get_stashed_type_by_name ("PyObject");
+  if (varobj_record == NULL_TREE)
+    varobj_record = get_stashed_type_by_name ("PyVarObject");
+  if (pylistobj_record == NULL_TREE)
+    pylistobj_record = get_stashed_type_by_name ("PyListObject");
+  if (pylongobj_record == NULL_TREE)
+    pylongobj_record = get_stashed_type_by_name ("PyLongObject");
+  if (pylongtype_vardecl == NULL_TREE)
+    pylongtype_vardecl = get_stashed_global_var_by_name ("PyLong_Type");
+  if (pylisttype_vardecl == NULL_TREE)
+    pylisttype_vardecl = get_stashed_global_var_by_name ("PyList_Type");
+}
+
+static void
+cpython_analyzer_init_cb (void *gcc_data, void * /*user_data */)
+{
+  ana::plugin_analyzer_init_iface *iface
+      = (ana::plugin_analyzer_init_iface *)gcc_data;
+  LOG_SCOPE (iface->get_logger ());
+  if (1)
+    inform (input_location, "got here: cpython_analyzer_init_cb");
+
+  init_py_structs ();
+  if (pyobj_record == NULL_TREE)
+    return;
+}
+} // namespace ana
+
+#endif /* #if ENABLE_ANALYZER */
+
+int
+plugin_init (struct plugin_name_args *plugin_info,
+             struct plugin_gcc_version *version)
+{
+#if ENABLE_ANALYZER
+  const char *plugin_name = plugin_info->base_name;
+  if (1)
+    inform (input_location, "got here; %qs", plugin_name);
+  ana::register_finish_translation_unit_callback (&stash_named_types);
+  ana::register_finish_translation_unit_callback (&stash_global_vars);
+  register_callback (plugin_info->base_name, PLUGIN_ANALYZER_INIT,
+                     ana::cpython_analyzer_init_cb,
+                     NULL); /* void *user_data */
+#else
+  sorry_no_analyzer ();
+#endif
+  return 0;
+}
\ No newline at end of file