[v2] analyzer: stash values for CPython plugin [PR107646]
Checks
Commit Message
Revised:
-- Fix indentation problems
-- Add more detail to Changelog
-- Add new test on handling non-CPython code case
-- Turn off debugging inform by default
-- Make on_finish_translation_unit() static
-- Remove superfluous null checks in init_py_structs()
Changes have been bootstrapped and tested against trunk on aarch64-unknown-linux-gnu.
---
This patch adds a hook to the end of ana::on_finish_translation_unit
which calls relevant stashing-related callbacks registered during plugin
initialization. This feature is used to stash named types and global
variables for a CPython analyzer plugin [PR107646].
gcc/analyzer/ChangeLog:
PR analyzer/107646
* analyzer-language.cc (run_callbacks): New function.
(on_finish_translation_unit): New function.
* analyzer-language.h (GCC_ANALYZER_LANGUAGE_H): New include.
(class translation_unit): New vfuncs.
gcc/c/ChangeLog:
PR analyzer/107646
* c-parser.cc: New functions on stashing values for the
analyzer.
gcc/testsuite/ChangeLog:
PR analyzer/107646
* gcc.dg/plugin/plugin.exp: Add new plugin and test.
* gcc.dg/plugin/analyzer_cpython_plugin.c: New plugin.
* gcc.dg/plugin/cpython-plugin-test-1.c: New test.
Signed-off-by: Eric Feng <ef2648@columbia.edu>
---
gcc/analyzer/analyzer-language.cc | 22 ++
gcc/analyzer/analyzer-language.h | 9 +
gcc/c/c-parser.cc | 26 ++
.../gcc.dg/plugin/analyzer_cpython_plugin.c | 230 ++++++++++++++++++
.../gcc.dg/plugin/cpython-plugin-test-1.c | 8 +
gcc/testsuite/gcc.dg/plugin/plugin.exp | 2 +
6 files changed, 297 insertions(+)
create mode 100644 gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
create mode 100644 gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-1.c
Comments
On Wed, 2023-08-02 at 12:20 -0400, Eric Feng wrote:
Hi Eric, thanks for the updated patch.
Overall, looks good to me, although I'd drop the "Exited." from the
"sorry" message (and thus from the dg-message directive), since the
compiler is not exiting, it's just the particular plugin that's giving
up (but let's not hold up the patch with a "bikeshed" discussion on the
precise wording).
If Joseph or Marek approves the C parts of the patch, this will be OK
to push to trunk.
Dave
> Revised:
> -- Fix indentation problems
> -- Add more detail to Changelog
> -- Add new test on handling non-CPython code case
> -- Turn off debugging inform by default
> -- Make on_finish_translation_unit() static
> -- Remove superfluous null checks in init_py_structs()
>
> Changes have been bootstrapped and tested against trunk on aarch64-
> unknown-linux-gnu.
>
> ---
> This patch adds a hook to the end of ana::on_finish_translation_unit
> which calls relevant stashing-related callbacks registered during
> plugin
> initialization. This feature is used to stash named types and global
> variables for a CPython analyzer plugin [PR107646].
>
> gcc/analyzer/ChangeLog:
> PR analyzer/107646
> * analyzer-language.cc (run_callbacks): New function.
> (on_finish_translation_unit): New function.
> * analyzer-language.h (GCC_ANALYZER_LANGUAGE_H): New include.
> (class translation_unit): New vfuncs.
>
> gcc/c/ChangeLog:
> PR analyzer/107646
> * c-parser.cc: New functions on stashing values for the
> analyzer.
>
> gcc/testsuite/ChangeLog:
> PR analyzer/107646
> * gcc.dg/plugin/plugin.exp: Add new plugin and test.
> * gcc.dg/plugin/analyzer_cpython_plugin.c: New plugin.
> * gcc.dg/plugin/cpython-plugin-test-1.c: New test.
>
> Signed-off-by: Eric Feng <ef2648@columbia.edu>
> ---
> gcc/analyzer/analyzer-language.cc | 22 ++
> gcc/analyzer/analyzer-language.h | 9 +
> gcc/c/c-parser.cc | 26 ++
> .../gcc.dg/plugin/analyzer_cpython_plugin.c | 230
> ++++++++++++++++++
> .../gcc.dg/plugin/cpython-plugin-test-1.c | 8 +
> gcc/testsuite/gcc.dg/plugin/plugin.exp | 2 +
> 6 files changed, 297 insertions(+)
> create mode 100644
> gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
> create mode 100644 gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-
> 1.c
>
> diff --git a/gcc/analyzer/analyzer-language.cc
> b/gcc/analyzer/analyzer-language.cc
> index 2c8910906ee..85400288a93 100644
> --- a/gcc/analyzer/analyzer-language.cc
> +++ b/gcc/analyzer/analyzer-language.cc
> @@ -35,6 +35,26 @@ static GTY (()) hash_map <tree, tree>
> *analyzer_stashed_constants;
> #if ENABLE_ANALYZER
>
> namespace ana {
> +static vec<finish_translation_unit_callback>
> + *finish_translation_unit_callbacks;
> +
> +void
> +register_finish_translation_unit_callback (
> + finish_translation_unit_callback callback)
> +{
> + if (!finish_translation_unit_callbacks)
> + vec_alloc (finish_translation_unit_callbacks, 1);
> + finish_translation_unit_callbacks->safe_push (callback);
> +}
> +
> +static void
> +run_callbacks (logger *logger, const translation_unit &tu)
> +{
> + for (auto const &cb : finish_translation_unit_callbacks)
> + {
> + cb (logger, tu);
> + }
> +}
>
> /* Call into TU to try to find a value for NAME.
> If found, stash its value within analyzer_stashed_constants. */
> @@ -102,6 +122,8 @@ on_finish_translation_unit (const
> translation_unit &tu)
> the_logger.set_logger (new logger (logfile, 0, 0,
> *global_dc->printer));
> stash_named_constants (the_logger.get_logger (), tu);
> +
> + run_callbacks (the_logger.get_logger (), tu);
> }
>
> /* Lookup NAME in the named constants stashed when the frontend TU
> finished.
> diff --git a/gcc/analyzer/analyzer-language.h
> b/gcc/analyzer/analyzer-language.h
> index 00f85aba041..8deea52d627 100644
> --- a/gcc/analyzer/analyzer-language.h
> +++ b/gcc/analyzer/analyzer-language.h
> @@ -21,6 +21,8 @@ along with GCC; see the file COPYING3. If not see
> #ifndef GCC_ANALYZER_LANGUAGE_H
> #define GCC_ANALYZER_LANGUAGE_H
>
> +#include "analyzer/analyzer-logging.h"
> +
> #if ENABLE_ANALYZER
>
> namespace ana {
> @@ -35,8 +37,15 @@ class translation_unit
> have been seen). If it is defined and an integer (e.g. either
> as a
> macro or enum), return the INTEGER_CST value, otherwise return
> NULL. */
> virtual tree lookup_constant_by_id (tree id) const = 0;
> + virtual tree lookup_type_by_id (tree id) const = 0;
> + virtual tree lookup_global_var_by_id (tree id) const = 0;
> };
>
> +typedef void (*finish_translation_unit_callback)
> + (logger *, const translation_unit &);
> +void register_finish_translation_unit_callback (
> + finish_translation_unit_callback callback);
> +
> /* Analyzer hook for frontends to call at the end of the TU. */
>
> void on_finish_translation_unit (const translation_unit &tu);
> diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc
> index cf82b0306d1..617111b0f0a 100644
> --- a/gcc/c/c-parser.cc
> +++ b/gcc/c/c-parser.cc
> @@ -1695,6 +1695,32 @@ public:
> return NULL_TREE;
> }
>
> + tree
> + lookup_type_by_id (tree id) const final override
> + {
> + if (tree type_decl = lookup_name (id))
> + {
> + if (TREE_CODE (type_decl) == TYPE_DECL)
> + {
> + tree record_type = TREE_TYPE (type_decl);
> + if (TREE_CODE (record_type) == RECORD_TYPE)
> + return record_type;
> + }
> + }
> +
> + return NULL_TREE;
> + }
> +
> + tree
> + lookup_global_var_by_id (tree id) const final override
> + {
> + if (tree var_decl = lookup_name (id))
> + if (TREE_CODE (var_decl) == VAR_DECL)
> + return var_decl;
> +
> + return NULL_TREE;
> + }
> +
> private:
> /* Attempt to get an INTEGER_CST from MACRO.
> Only handle the simplest cases: where MACRO's definition is a
> single
> diff --git a/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
> b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
> new file mode 100644
> index 00000000000..72375cfd879
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
> @@ -0,0 +1,230 @@
> +/* -fanalyzer plugin for CPython extension modules */
> +/* { dg-options "-g" } */
> +
> +#define INCLUDE_MEMORY
> +#include "gcc-plugin.h"
> +#include "config.h"
> +#include "system.h"
> +#include "coretypes.h"
> +#include "tree.h"
> +#include "function.h"
> +#include "basic-block.h"
> +#include "gimple.h"
> +#include "gimple-iterator.h"
> +#include "diagnostic-core.h"
> +#include "graphviz.h"
> +#include "options.h"
> +#include "cgraph.h"
> +#include "tree-dfa.h"
> +#include "stringpool.h"
> +#include "convert.h"
> +#include "target.h"
> +#include "fold-const.h"
> +#include "tree-pretty-print.h"
> +#include "diagnostic-color.h"
> +#include "diagnostic-metadata.h"
> +#include "tristate.h"
> +#include "bitmap.h"
> +#include "selftest.h"
> +#include "function.h"
> +#include "json.h"
> +#include "analyzer/analyzer.h"
> +#include "analyzer/analyzer-language.h"
> +#include "analyzer/analyzer-logging.h"
> +#include "ordered-hash-map.h"
> +#include "options.h"
> +#include "cgraph.h"
> +#include "cfg.h"
> +#include "digraph.h"
> +#include "analyzer/supergraph.h"
> +#include "sbitmap.h"
> +#include "analyzer/call-string.h"
> +#include "analyzer/program-point.h"
> +#include "analyzer/store.h"
> +#include "analyzer/region-model.h"
> +#include "analyzer/call-details.h"
> +#include "analyzer/call-info.h"
> +#include "make-unique.h"
> +
> +int plugin_is_GPL_compatible;
> +
> +#if ENABLE_ANALYZER
> +static GTY (()) hash_map<tree, tree> *analyzer_stashed_types;
> +static GTY (()) hash_map<tree, tree> *analyzer_stashed_globals;
> +
> +namespace ana
> +{
> +static tree pyobj_record = NULL_TREE;
> +static tree varobj_record = NULL_TREE;
> +static tree pylistobj_record = NULL_TREE;
> +static tree pylongobj_record = NULL_TREE;
> +static tree pylongtype_vardecl = NULL_TREE;
> +static tree pylisttype_vardecl = NULL_TREE;
> +
> +static tree
> +get_field_by_name (tree type, const char *name)
> +{
> + for (tree field = TYPE_FIELDS (type); field; field = TREE_CHAIN
> (field))
> + {
> + if (TREE_CODE (field) == FIELD_DECL)
> + {
> + const char *field_name = IDENTIFIER_POINTER (DECL_NAME
> (field));
> + if (strcmp (field_name, name) == 0)
> + return field;
> + }
> + }
> + return NULL_TREE;
> +}
> +
> +static void
> +maybe_stash_named_type (logger *logger, const translation_unit &tu,
> + const char *name)
> +{
> + LOG_FUNC_1 (logger, "name: %qs", name);
> + if (!analyzer_stashed_types)
> + analyzer_stashed_types = hash_map<tree, tree>::create_ggc ();
> +
> + tree id = get_identifier (name);
> + if (tree t = tu.lookup_type_by_id (id))
> + {
> + gcc_assert (TREE_CODE (t) == RECORD_TYPE);
> + analyzer_stashed_types->put (id, t);
> + if (logger)
> + logger->log ("found %qs: %qE", name, t);
> + }
> + else
> + {
> + if (logger)
> + logger->log ("%qs: not found", name);
> + }
> +}
> +
> +static void
> +maybe_stash_global_var (logger *logger, const translation_unit &tu,
> + const char *name)
> +{
> + LOG_FUNC_1 (logger, "name: %qs", name);
> + if (!analyzer_stashed_globals)
> + analyzer_stashed_globals = hash_map<tree, tree>::create_ggc ();
> +
> + tree id = get_identifier (name);
> + if (tree t = tu.lookup_global_var_by_id (id))
> + {
> + gcc_assert (TREE_CODE (t) == VAR_DECL);
> + analyzer_stashed_globals->put (id, t);
> + if (logger)
> + logger->log ("found %qs: %qE", name, t);
> + }
> + else
> + {
> + if (logger)
> + logger->log ("%qs: not found", name);
> + }
> +}
> +
> +static void
> +stash_named_types (logger *logger, const translation_unit &tu)
> +{
> + LOG_SCOPE (logger);
> +
> + maybe_stash_named_type (logger, tu, "PyObject");
> + maybe_stash_named_type (logger, tu, "PyListObject");
> + maybe_stash_named_type (logger, tu, "PyVarObject");
> + maybe_stash_named_type (logger, tu, "PyLongObject");
> +}
> +
> +static void
> +stash_global_vars (logger *logger, const translation_unit &tu)
> +{
> + LOG_SCOPE (logger);
> +
> + maybe_stash_global_var (logger, tu, "PyLong_Type");
> + maybe_stash_global_var (logger, tu, "PyList_Type");
> +}
> +
> +static tree
> +get_stashed_type_by_name (const char *name)
> +{
> + if (!analyzer_stashed_types)
> + return NULL_TREE;
> + tree id = get_identifier (name);
> + if (tree *slot = analyzer_stashed_types->get (id))
> + {
> + gcc_assert (TREE_CODE (*slot) == RECORD_TYPE);
> + return *slot;
> + }
> + return NULL_TREE;
> +}
> +
> +static tree
> +get_stashed_global_var_by_name (const char *name)
> +{
> + if (!analyzer_stashed_globals)
> + return NULL_TREE;
> + tree id = get_identifier (name);
> + if (tree *slot = analyzer_stashed_globals->get (id))
> + {
> + gcc_assert (TREE_CODE (*slot) == VAR_DECL);
> + return *slot;
> + }
> + return NULL_TREE;
> +}
> +
> +static void
> +init_py_structs ()
> +{
> + pyobj_record = get_stashed_type_by_name ("PyObject");
> + varobj_record = get_stashed_type_by_name ("PyVarObject");
> + pylistobj_record = get_stashed_type_by_name ("PyListObject");
> + pylongobj_record = get_stashed_type_by_name ("PyLongObject");
> + pylongtype_vardecl = get_stashed_global_var_by_name
> ("PyLong_Type");
> + pylisttype_vardecl = get_stashed_global_var_by_name
> ("PyList_Type");
> +}
> +
> +void
> +sorry_no_cpython_plugin ()
> +{
> + sorry ("%qs definitions not found."
> + " Please ensure to %qs. Exiting.)",
> + "Python/C API", "#include <Python.h>");
> +}
> +
> +static void
> +cpython_analyzer_init_cb (void *gcc_data, void * /*user_data */)
> +{
> + ana::plugin_analyzer_init_iface *iface
> + = (ana::plugin_analyzer_init_iface *)gcc_data;
> + LOG_SCOPE (iface->get_logger ());
> + if (0)
> + inform (input_location, "got here: cpython_analyzer_init_cb");
> +
> + init_py_structs ();
> +
> + if (pyobj_record == NULL_TREE)
> + {
> + sorry_no_cpython_plugin ();
> + return;
> + }
> +}
> +} // namespace ana
> +
> +#endif /* #if ENABLE_ANALYZER */
> +
> +int
> +plugin_init (struct plugin_name_args *plugin_info,
> + struct plugin_gcc_version *version)
> +{
> +#if ENABLE_ANALYZER
> + const char *plugin_name = plugin_info->base_name;
> + if (0)
> + inform (input_location, "got here; %qs", plugin_name);
> + ana::register_finish_translation_unit_callback
> (&stash_named_types);
> + ana::register_finish_translation_unit_callback
> (&stash_global_vars);
> + register_callback (plugin_info->base_name, PLUGIN_ANALYZER_INIT,
> + ana::cpython_analyzer_init_cb,
> + NULL); /* void *user_data */
> +#else
> + sorry_no_analyzer ();
> +#endif
> + return 0;
> +}
> \ No newline at end of file
> diff --git a/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-1.c
> b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-1.c
> new file mode 100644
> index 00000000000..30f17355711
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-1.c
> @@ -0,0 +1,8 @@
> +/* { dg-do compile } */
> +/* { dg-options "-fanalyzer" } */
> +/* { dg-require-effective-target analyzer } */
> +/* { dg-message "'Python/C API' definitions not found. Please ensure
> to '#include <Python.h>'. Exiting." "" { target *-*-* } 0 } */
> +
> +void test_no_python_plugin ()
> +{
> +}
> \ No newline at end of file
> diff --git a/gcc/testsuite/gcc.dg/plugin/plugin.exp
> b/gcc/testsuite/gcc.dg/plugin/plugin.exp
> index 60723a20eda..09c45394b1f 100644
> --- a/gcc/testsuite/gcc.dg/plugin/plugin.exp
> +++ b/gcc/testsuite/gcc.dg/plugin/plugin.exp
> @@ -160,6 +160,8 @@ set plugin_test_list [list \
> taint-CVE-2011-0521-5-fixed.c \
> taint-CVE-2011-0521-6.c \
> taint-antipatterns-1.c } \
> + { analyzer_cpython_plugin.c \
> + cpython-plugin-test-1.c } \
> ]
>
> foreach plugin_test $plugin_test_list {
On Wed, Aug 02, 2023 at 12:59:28PM -0400, David Malcolm wrote:
> On Wed, 2023-08-02 at 12:20 -0400, Eric Feng wrote:
>
> Hi Eric, thanks for the updated patch.
>
> Overall, looks good to me, although I'd drop the "Exited." from the
> "sorry" message (and thus from the dg-message directive), since the
> compiler is not exiting, it's just the particular plugin that's giving
> up (but let's not hold up the patch with a "bikeshed" discussion on the
> precise wording).
>
> If Joseph or Marek approves the C parts of the patch, this will be OK
> to push to trunk.
[...]
> > index cf82b0306d1..617111b0f0a 100644
> > --- a/gcc/c/c-parser.cc
> > +++ b/gcc/c/c-parser.cc
> > @@ -1695,6 +1695,32 @@ public:
> > return NULL_TREE;
> > }
> >
> > + tree
> > + lookup_type_by_id (tree id) const final override
> > + {
> > + if (tree type_decl = lookup_name (id))
> > + {
> > + if (TREE_CODE (type_decl) == TYPE_DECL)
> > + {
> > + tree record_type = TREE_TYPE (type_decl);
> > + if (TREE_CODE (record_type) == RECORD_TYPE)
> > + return record_type;
> > + }
> > + }
I'd drop this set of { }, like below. OK with that adjusted, thanks.
> > +
> > + return NULL_TREE;
> > + }
> > +
> > + tree
> > + lookup_global_var_by_id (tree id) const final override
> > + {
> > + if (tree var_decl = lookup_name (id))
> > + if (TREE_CODE (var_decl) == VAR_DECL)
> > + return var_decl;
> > +
> > + return NULL_TREE;
> > + }
> > +
> > private:
> > /* Attempt to get an INTEGER_CST from MACRO.
> > Only handle the simplest cases: where MACRO's definition is a
Marek
On Wed, Aug 2, 2023 at 1:20 PM Marek Polacek <polacek@redhat.com> wrote:
>
> On Wed, Aug 02, 2023 at 12:59:28PM -0400, David Malcolm wrote:
> > On Wed, 2023-08-02 at 12:20 -0400, Eric Feng wrote:
> >
> > Hi Eric, thanks for the updated patch.
> >
> > Overall, looks good to me, although I'd drop the "Exited." from the
> > "sorry" message (and thus from the dg-message directive), since the
> > compiler is not exiting, it's just the particular plugin that's giving
> > up (but let's not hold up the patch with a "bikeshed" discussion on the
> > precise wording).
> >
> > If Joseph or Marek approves the C parts of the patch, this will be OK
> > to push to trunk.
>
Sounds good. Revised.
>
> > > index cf82b0306d1..617111b0f0a 100644
> > > --- a/gcc/c/c-parser.cc
> > > +++ b/gcc/c/c-parser.cc
> > > @@ -1695,6 +1695,32 @@ public:
> > > return NULL_TREE;
> > > }
> > >
> > > + tree
> > > + lookup_type_by_id (tree id) const final override
> > > + {
> > > + if (tree type_decl = lookup_name (id))
> > > + {
> > > + if (TREE_CODE (type_decl) == TYPE_DECL)
> > > + {
> > > + tree record_type = TREE_TYPE (type_decl);
> > > + if (TREE_CODE (record_type) == RECORD_TYPE)
> > > + return record_type;
> > > + }
> > > + }
>
> I'd drop this set of { }, like below. OK with that adjusted, thanks.
Sounds good — fixed.
>
> > > +
> > > + return NULL_TREE;
> > > + }
> > > +
> > > + tree
> > > + lookup_global_var_by_id (tree id) const final override
> > > + {
> > > + if (tree var_decl = lookup_name (id))
> > > + if (TREE_CODE (var_decl) == VAR_DECL)
> > > + return var_decl;
> > > +
> > > + return NULL_TREE;
> > > + }
> > > +
> > > private:
> > > /* Attempt to get an INTEGER_CST from MACRO.
> > > Only handle the simplest cases: where MACRO's definition is a
>
> Marek
>
Thank you, everyone. I've submitted a new patch with the described
changes. As I do not yet have write access, could someone please help
me commit it? Otherwise, please let me know if I should request write
access first (the GettingStarted page suggested requesting someone
commit the patch for the first few patches before requesting write
access).
Best,
Eric
On Wed, 2023-08-02 at 14:46 -0400, Eric Feng wrote:
> On Wed, Aug 2, 2023 at 1:20 PM Marek Polacek <polacek@redhat.com>
> wrote:
> >
> > On Wed, Aug 02, 2023 at 12:59:28PM -0400, David Malcolm wrote:
> > > On Wed, 2023-08-02 at 12:20 -0400, Eric Feng wrote:
> > >
[Dropping Joseph and Marek from the CC]
[...snip...]
>
>
> Thank you, everyone. I've submitted a new patch with the described
> changes.
Thanks.
> As I do not yet have write access, could someone please help
> me commit it?
I've pushed the v3 trunk to patch, as r14-2933-gfafe2d18f791c6; you can
see it at [1], so you're now officially a GCC contributor,
congratulation!
FWIW I had to do a little whitespace fixing on the ChangeLog entries
before the server-side hooks.commit-extra-checker would pass, as they
were indented with spaces, rather than tabs, so it complained thusly:
remote: *** The following commit was rejected by your hooks.commit-extra-checker script (status: 1)
remote: *** commit: 0a4a2dc7dad1dfe22be0b48fe0d8c50d216c8349
remote: *** ChangeLog format failed:
remote: *** ERR: line should start with a tab: " PR analyzer/107646"
remote: *** ERR: line should start with a tab: " * analyzer-language.cc (run_callbacks): New function."
remote: *** ERR: line should start with a tab: " (on_finish_translation_unit): New function."
remote: *** ERR: line should start with a tab: " * analyzer-language.h (GCC_ANALYZER_LANGUAGE_H): New include."
remote: *** ERR: line should start with a tab: " (class translation_unit): New vfuncs."
remote: *** ERR: line should start with a tab: " PR analyzer/107646"
remote: *** ERR: line should start with a tab: " * c-parser.cc: New functions on stashing values for the"
remote: *** ERR: line should start with a tab: " analyzer."
remote: *** ERR: line should start with a tab: " PR analyzer/107646"
remote: *** ERR: line should start with a tab: " * gcc.dg/plugin/plugin.exp: Add new plugin and test."
remote: *** ERR: line should start with a tab: " * gcc.dg/plugin/analyzer_cpython_plugin.c: New plugin."
remote: *** ERR: line should start with a tab: " * gcc.dg/plugin/cpython-plugin-test-1.c: New test."
remote: *** ERR: PR 107646 in subject but not in changelog: "analyzer: stash values for CPython plugin [PR107646]"
remote: ***
remote: *** Please see: https://gcc.gnu.org/codingconventions.html#ChangeLogs
remote: ***
remote: error: hook declined to update refs/heads/master
To git+ssh://gcc.gnu.org/git/gcc.git
! [remote rejected] master -> master (hook declined)
error: failed to push some refs to 'git+ssh://dmalcolm@gcc.gnu.org/git/gcc.git'
...but this was a trivial fix. You can test that patches are properly
formatted by running:
./contrib/gcc-changelog/git_check_commit.py HEAD
locally.
> Otherwise, please let me know if I should request write
> access first (the GettingStarted page suggested requesting someone
> commit the patch for the first few patches before requesting write
> access).
Please go ahead and request write access now; we should have done this
in the "community bonding" phase of GSoC; sorry for not catching this.
Thanks again for the patch. How's the followup work? Are you close to
being able to post one or more of the simpler known_function
subclasses?
Dave
[1]
https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=fafe2d18f791c6b97b49af7c84b1b5703681c3af
On Wed, Aug 2, 2023 at 5:09 PM David Malcolm <dmalcolm@redhat.com> wrote:
>
> On Wed, 2023-08-02 at 14:46 -0400, Eric Feng wrote:
> > On Wed, Aug 2, 2023 at 1:20 PM Marek Polacek <polacek@redhat.com>
> > wrote:
> > >
> > > On Wed, Aug 02, 2023 at 12:59:28PM -0400, David Malcolm wrote:
> > > > On Wed, 2023-08-02 at 12:20 -0400, Eric Feng wrote:
> > > >
>
> [Dropping Joseph and Marek from the CC]
>
> [...snip...]
>
> >
> >
> > Thank you, everyone. I've submitted a new patch with the described
> > changes.
>
> Thanks.
>
> > As I do not yet have write access, could someone please help
> > me commit it?
>
> I've pushed the v3 trunk to patch, as r14-2933-gfafe2d18f791c6; you can
> see it at [1], so you're now officially a GCC contributor,
> congratulation!
>
> FWIW I had to do a little whitespace fixing on the ChangeLog entries
> before the server-side hooks.commit-extra-checker would pass, as they
> were indented with spaces, rather than tabs, so it complained thusly:
>
> remote: *** The following commit was rejected by your hooks.commit-extra-checker script (status: 1)
> remote: *** commit: 0a4a2dc7dad1dfe22be0b48fe0d8c50d216c8349
> remote: *** ChangeLog format failed:
> remote: *** ERR: line should start with a tab: " PR analyzer/107646"
> remote: *** ERR: line should start with a tab: " * analyzer-language.cc (run_callbacks): New function."
> remote: *** ERR: line should start with a tab: " (on_finish_translation_unit): New function."
> remote: *** ERR: line should start with a tab: " * analyzer-language.h (GCC_ANALYZER_LANGUAGE_H): New include."
> remote: *** ERR: line should start with a tab: " (class translation_unit): New vfuncs."
> remote: *** ERR: line should start with a tab: " PR analyzer/107646"
> remote: *** ERR: line should start with a tab: " * c-parser.cc: New functions on stashing values for the"
> remote: *** ERR: line should start with a tab: " analyzer."
> remote: *** ERR: line should start with a tab: " PR analyzer/107646"
> remote: *** ERR: line should start with a tab: " * gcc.dg/plugin/plugin.exp: Add new plugin and test."
> remote: *** ERR: line should start with a tab: " * gcc.dg/plugin/analyzer_cpython_plugin.c: New plugin."
> remote: *** ERR: line should start with a tab: " * gcc.dg/plugin/cpython-plugin-test-1.c: New test."
> remote: *** ERR: PR 107646 in subject but not in changelog: "analyzer: stash values for CPython plugin [PR107646]"
> remote: ***
> remote: *** Please see: https://gcc.gnu.org/codingconventions.html#ChangeLogs
> remote: ***
> remote: error: hook declined to update refs/heads/master
> To git+ssh://gcc.gnu.org/git/gcc.git
> ! [remote rejected] master -> master (hook declined)
> error: failed to push some refs to 'git+ssh://dmalcolm@gcc.gnu.org/git/gcc.git'
>
> ...but this was a trivial fix. You can test that patches are properly
> formatted by running:
>
> ./contrib/gcc-changelog/git_check_commit.py HEAD
>
> locally.
Sorry about that — will do. Thanks!
>
>
> > Otherwise, please let me know if I should request write
> > access first (the GettingStarted page suggested requesting someone
> > commit the patch for the first few patches before requesting write
> > access).
>
> Please go ahead and request write access now; we should have done this
> in the "community bonding" phase of GSoC; sorry for not catching this.
Sounds good.
>
> Thanks again for the patch. How's the followup work? Are you close to
> being able to post one or more of the simpler known_function
> subclasses?
Yes, I will submit another patch for review very soon. Thank you for
helping me push this one!
Best,
Eric
>
> Dave
>
> [1]
> https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=fafe2d18f791c6b97b49af7c84b1b5703681c3af
>
On Thu, 2023-08-03 at 11:28 -0400, Eric Feng wrote:
> On Wed, Aug 2, 2023 at 5:09 PM David Malcolm <dmalcolm@redhat.com>
> wrote:
> >
> > On Wed, 2023-08-02 at 14:46 -0400, Eric Feng wrote:
> >
[...snip...]
> >
> > > Otherwise, please let me know if I should request write
> > > access first (the GettingStarted page suggested requesting
> > > someone
> > > commit the patch for the first few patches before requesting
> > > write
> > > access).
> >
> > Please go ahead and request write access now; we should have done
> > this
> > in the "community bonding" phase of GSoC; sorry for not catching
> > this.
> Sounds good.
FWIW once you have an @gcc.gnu.org account, I'd like to set you as the
"assignee" of PR107646 in bugzilla.
[...snip...]
Dave
@@ -35,6 +35,26 @@ static GTY (()) hash_map <tree, tree> *analyzer_stashed_constants;
#if ENABLE_ANALYZER
namespace ana {
+static vec<finish_translation_unit_callback>
+ *finish_translation_unit_callbacks;
+
+void
+register_finish_translation_unit_callback (
+ finish_translation_unit_callback callback)
+{
+ if (!finish_translation_unit_callbacks)
+ vec_alloc (finish_translation_unit_callbacks, 1);
+ finish_translation_unit_callbacks->safe_push (callback);
+}
+
+static void
+run_callbacks (logger *logger, const translation_unit &tu)
+{
+ for (auto const &cb : finish_translation_unit_callbacks)
+ {
+ cb (logger, tu);
+ }
+}
/* Call into TU to try to find a value for NAME.
If found, stash its value within analyzer_stashed_constants. */
@@ -102,6 +122,8 @@ on_finish_translation_unit (const translation_unit &tu)
the_logger.set_logger (new logger (logfile, 0, 0,
*global_dc->printer));
stash_named_constants (the_logger.get_logger (), tu);
+
+ run_callbacks (the_logger.get_logger (), tu);
}
/* Lookup NAME in the named constants stashed when the frontend TU finished.
@@ -21,6 +21,8 @@ along with GCC; see the file COPYING3. If not see
#ifndef GCC_ANALYZER_LANGUAGE_H
#define GCC_ANALYZER_LANGUAGE_H
+#include "analyzer/analyzer-logging.h"
+
#if ENABLE_ANALYZER
namespace ana {
@@ -35,8 +37,15 @@ class translation_unit
have been seen). If it is defined and an integer (e.g. either as a
macro or enum), return the INTEGER_CST value, otherwise return NULL. */
virtual tree lookup_constant_by_id (tree id) const = 0;
+ virtual tree lookup_type_by_id (tree id) const = 0;
+ virtual tree lookup_global_var_by_id (tree id) const = 0;
};
+typedef void (*finish_translation_unit_callback)
+ (logger *, const translation_unit &);
+void register_finish_translation_unit_callback (
+ finish_translation_unit_callback callback);
+
/* Analyzer hook for frontends to call at the end of the TU. */
void on_finish_translation_unit (const translation_unit &tu);
@@ -1695,6 +1695,32 @@ public:
return NULL_TREE;
}
+ tree
+ lookup_type_by_id (tree id) const final override
+ {
+ if (tree type_decl = lookup_name (id))
+ {
+ if (TREE_CODE (type_decl) == TYPE_DECL)
+ {
+ tree record_type = TREE_TYPE (type_decl);
+ if (TREE_CODE (record_type) == RECORD_TYPE)
+ return record_type;
+ }
+ }
+
+ return NULL_TREE;
+ }
+
+ tree
+ lookup_global_var_by_id (tree id) const final override
+ {
+ if (tree var_decl = lookup_name (id))
+ if (TREE_CODE (var_decl) == VAR_DECL)
+ return var_decl;
+
+ return NULL_TREE;
+ }
+
private:
/* Attempt to get an INTEGER_CST from MACRO.
Only handle the simplest cases: where MACRO's definition is a single
new file mode 100644
@@ -0,0 +1,230 @@
+/* -fanalyzer plugin for CPython extension modules */
+/* { dg-options "-g" } */
+
+#define INCLUDE_MEMORY
+#include "gcc-plugin.h"
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tree.h"
+#include "function.h"
+#include "basic-block.h"
+#include "gimple.h"
+#include "gimple-iterator.h"
+#include "diagnostic-core.h"
+#include "graphviz.h"
+#include "options.h"
+#include "cgraph.h"
+#include "tree-dfa.h"
+#include "stringpool.h"
+#include "convert.h"
+#include "target.h"
+#include "fold-const.h"
+#include "tree-pretty-print.h"
+#include "diagnostic-color.h"
+#include "diagnostic-metadata.h"
+#include "tristate.h"
+#include "bitmap.h"
+#include "selftest.h"
+#include "function.h"
+#include "json.h"
+#include "analyzer/analyzer.h"
+#include "analyzer/analyzer-language.h"
+#include "analyzer/analyzer-logging.h"
+#include "ordered-hash-map.h"
+#include "options.h"
+#include "cgraph.h"
+#include "cfg.h"
+#include "digraph.h"
+#include "analyzer/supergraph.h"
+#include "sbitmap.h"
+#include "analyzer/call-string.h"
+#include "analyzer/program-point.h"
+#include "analyzer/store.h"
+#include "analyzer/region-model.h"
+#include "analyzer/call-details.h"
+#include "analyzer/call-info.h"
+#include "make-unique.h"
+
+int plugin_is_GPL_compatible;
+
+#if ENABLE_ANALYZER
+static GTY (()) hash_map<tree, tree> *analyzer_stashed_types;
+static GTY (()) hash_map<tree, tree> *analyzer_stashed_globals;
+
+namespace ana
+{
+static tree pyobj_record = NULL_TREE;
+static tree varobj_record = NULL_TREE;
+static tree pylistobj_record = NULL_TREE;
+static tree pylongobj_record = NULL_TREE;
+static tree pylongtype_vardecl = NULL_TREE;
+static tree pylisttype_vardecl = NULL_TREE;
+
+static tree
+get_field_by_name (tree type, const char *name)
+{
+ for (tree field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
+ {
+ if (TREE_CODE (field) == FIELD_DECL)
+ {
+ const char *field_name = IDENTIFIER_POINTER (DECL_NAME (field));
+ if (strcmp (field_name, name) == 0)
+ return field;
+ }
+ }
+ return NULL_TREE;
+}
+
+static void
+maybe_stash_named_type (logger *logger, const translation_unit &tu,
+ const char *name)
+{
+ LOG_FUNC_1 (logger, "name: %qs", name);
+ if (!analyzer_stashed_types)
+ analyzer_stashed_types = hash_map<tree, tree>::create_ggc ();
+
+ tree id = get_identifier (name);
+ if (tree t = tu.lookup_type_by_id (id))
+ {
+ gcc_assert (TREE_CODE (t) == RECORD_TYPE);
+ analyzer_stashed_types->put (id, t);
+ if (logger)
+ logger->log ("found %qs: %qE", name, t);
+ }
+ else
+ {
+ if (logger)
+ logger->log ("%qs: not found", name);
+ }
+}
+
+static void
+maybe_stash_global_var (logger *logger, const translation_unit &tu,
+ const char *name)
+{
+ LOG_FUNC_1 (logger, "name: %qs", name);
+ if (!analyzer_stashed_globals)
+ analyzer_stashed_globals = hash_map<tree, tree>::create_ggc ();
+
+ tree id = get_identifier (name);
+ if (tree t = tu.lookup_global_var_by_id (id))
+ {
+ gcc_assert (TREE_CODE (t) == VAR_DECL);
+ analyzer_stashed_globals->put (id, t);
+ if (logger)
+ logger->log ("found %qs: %qE", name, t);
+ }
+ else
+ {
+ if (logger)
+ logger->log ("%qs: not found", name);
+ }
+}
+
+static void
+stash_named_types (logger *logger, const translation_unit &tu)
+{
+ LOG_SCOPE (logger);
+
+ maybe_stash_named_type (logger, tu, "PyObject");
+ maybe_stash_named_type (logger, tu, "PyListObject");
+ maybe_stash_named_type (logger, tu, "PyVarObject");
+ maybe_stash_named_type (logger, tu, "PyLongObject");
+}
+
+static void
+stash_global_vars (logger *logger, const translation_unit &tu)
+{
+ LOG_SCOPE (logger);
+
+ maybe_stash_global_var (logger, tu, "PyLong_Type");
+ maybe_stash_global_var (logger, tu, "PyList_Type");
+}
+
+static tree
+get_stashed_type_by_name (const char *name)
+{
+ if (!analyzer_stashed_types)
+ return NULL_TREE;
+ tree id = get_identifier (name);
+ if (tree *slot = analyzer_stashed_types->get (id))
+ {
+ gcc_assert (TREE_CODE (*slot) == RECORD_TYPE);
+ return *slot;
+ }
+ return NULL_TREE;
+}
+
+static tree
+get_stashed_global_var_by_name (const char *name)
+{
+ if (!analyzer_stashed_globals)
+ return NULL_TREE;
+ tree id = get_identifier (name);
+ if (tree *slot = analyzer_stashed_globals->get (id))
+ {
+ gcc_assert (TREE_CODE (*slot) == VAR_DECL);
+ return *slot;
+ }
+ return NULL_TREE;
+}
+
+static void
+init_py_structs ()
+{
+ pyobj_record = get_stashed_type_by_name ("PyObject");
+ varobj_record = get_stashed_type_by_name ("PyVarObject");
+ pylistobj_record = get_stashed_type_by_name ("PyListObject");
+ pylongobj_record = get_stashed_type_by_name ("PyLongObject");
+ pylongtype_vardecl = get_stashed_global_var_by_name ("PyLong_Type");
+ pylisttype_vardecl = get_stashed_global_var_by_name ("PyList_Type");
+}
+
+void
+sorry_no_cpython_plugin ()
+{
+ sorry ("%qs definitions not found."
+ " Please ensure to %qs. Exiting.)",
+ "Python/C API", "#include <Python.h>");
+}
+
+static void
+cpython_analyzer_init_cb (void *gcc_data, void * /*user_data */)
+{
+ ana::plugin_analyzer_init_iface *iface
+ = (ana::plugin_analyzer_init_iface *)gcc_data;
+ LOG_SCOPE (iface->get_logger ());
+ if (0)
+ inform (input_location, "got here: cpython_analyzer_init_cb");
+
+ init_py_structs ();
+
+ if (pyobj_record == NULL_TREE)
+ {
+ sorry_no_cpython_plugin ();
+ return;
+ }
+}
+} // namespace ana
+
+#endif /* #if ENABLE_ANALYZER */
+
+int
+plugin_init (struct plugin_name_args *plugin_info,
+ struct plugin_gcc_version *version)
+{
+#if ENABLE_ANALYZER
+ const char *plugin_name = plugin_info->base_name;
+ if (0)
+ inform (input_location, "got here; %qs", plugin_name);
+ ana::register_finish_translation_unit_callback (&stash_named_types);
+ ana::register_finish_translation_unit_callback (&stash_global_vars);
+ register_callback (plugin_info->base_name, PLUGIN_ANALYZER_INIT,
+ ana::cpython_analyzer_init_cb,
+ NULL); /* void *user_data */
+#else
+ sorry_no_analyzer ();
+#endif
+ return 0;
+}
\ No newline at end of file
new file mode 100644
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-fanalyzer" } */
+/* { dg-require-effective-target analyzer } */
+/* { dg-message "'Python/C API' definitions not found. Please ensure to '#include <Python.h>'. Exiting." "" { target *-*-* } 0 } */
+
+void test_no_python_plugin ()
+{
+}
\ No newline at end of file
@@ -160,6 +160,8 @@ set plugin_test_list [list \
taint-CVE-2011-0521-5-fixed.c \
taint-CVE-2011-0521-6.c \
taint-antipatterns-1.c } \
+ { analyzer_cpython_plugin.c \
+ cpython-plugin-test-1.c } \
]
foreach plugin_test $plugin_test_list {