On Wed, 2023-08-09 at 15:22 -0400, Eric Feng wrote:
> Thank you for your help in getting dg-require-python-h working! I can
> confirm that the FAILs are related to differences between the --
> cflags
> affecting the gimple seen by the analyzer. For this reason, I have
> changed it to --includes for now.
Sounds good.
Eventually we'll probably want to support --cflags, but given that
every distribution probably has its own set of flags, it's a recipe for
an unpleasantly large test matrix, so just using --includes is a good
compromise.
> To be sure, I tested on Python 3.8 as
> well and it works as expected. I have also addressed the following
> comments on the WIP patch as you described.
>
> -- Update Changelog entry to list new functions being simulated.
> -- Update region_model::get_or_create_region_for_heap_alloc leading
> comment.
> -- Change register_alloc to update_state_machine.
> -- Change move_ptr_sval_non_null to transition_ptr_sval_non_null.
> -- Static helper functions for:
> -- Initializing ob_refcnt field.
> -- Setting ob_type field.
> -- Getting ob_base field.
> -- Initializing heap allocated region for PyObjects.
> -- Incrementing a field by one.
> -- Change arg_is_long_p to arg_is_integral_p.
> -- Extract common failure scenario for reusability.
>
> The initial WIP patch using
>
> /* { dg-options "-fanalyzer -I/usr/include/python3.9" }. */
>
> have been bootstrapped and regtested on aarch64-unknown-linux-gnu.
> Since
> we did not change any core logic in the revision and the only changes
> within the analyzer core are changing variable names, is it OK for
> trunk. In the mean time, the revised patch is currently going through
> bootstrap and regtest process.
Thanks for the updated patch.
Unfortunately I just pushed a largish analyzer patch (r14-3114-
g73da34a538ddc2) which may well conflict with your patch, so please
rebase to beyond that.
Sorry about this.
In particular note that there's no longer a default assignment to the
LHS at a call-site in region_model::on_call_pre; known_function
subclasses are now responsible for assigning to the LHS of the
callsite. But I suspect that all the known_function subclasses in the
cpython plugin already do that.
Some nits inline below...
[...snip...]
> Some concessions were made to
> simplify the analysis process when comparing kf_PyList_Append with
> the
> real implementation. In particular, PyList_Append performs some
> optimization internally to try and avoid calls to realloc if
> possible. For simplicity, we assume that realloc is called every
> time.
> Also, we grow the size by just 1 (to ensure enough space for adding a
> new element) rather than abide by the heuristics that the actual
> implementation
> follows.
Might be worth capturing these notes as comments in the source (for
class kf_PyList_Append), rather than just within the commit message.
[...snip...]
>
> diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-
> model.cc
> index e92b3f7b074..c338f045d92 100644
> --- a/gcc/analyzer/region-model.cc
> +++ b/gcc/analyzer/region-model.cc
> @@ -5127,11 +5127,16 @@ region_model::check_dynamic_size_for_floats
> (const svalue *size_in_bytes,
> Use CTXT to complain about tainted sizes.
>
> Reuse an existing heap_allocated_region if it's not being
> referenced by
> - this region_model; otherwise create a new one. */
> + this region_model; otherwise create a new one.
> +
> + Optionally (update_state_machine) transitions the pointer
> pointing to the
> + heap_allocated_region from start to assumed non-null. */
>
> const region *
> region_model::get_or_create_region_for_heap_alloc (const svalue
> *size_in_bytes,
> -
> region_model_context *ctxt)
> + region_model_context *ctxt,
> + bool update_state_machine,
> + const call_details *cd)
> {
> /* Determine which regions are referenced in this region_model, so
> that
> we can reuse an existing heap_allocated_region if it's not in
> use on
> @@ -5153,6 +5158,17 @@
> region_model::get_or_create_region_for_heap_alloc (const svalue
> *size_in_bytes,
> if (size_in_bytes)
> if (compat_types_p (size_in_bytes->get_type (), size_type_node))
> set_dynamic_extents (reg, size_in_bytes, ctxt);
> +
> + if (update_state_machine && cd)
> + {
> + const svalue *ptr_sval = nullptr;
> + if (cd->get_lhs_type ())
> + ptr_sval = m_mgr->get_ptr_svalue (cd->get_lhs_type (), reg);
> + else
> + ptr_sval = m_mgr->get_ptr_svalue (NULL_TREE, reg);
> + transition_ptr_sval_non_null (ctxt,
> ptr_sval);
This if/else is redundant: the "else" is only reached if cd-
>get_lhs_type () is null, in which case you pass in NULL_TREE, so it
works the same either way. Or am I missing something?
Also, it looks like something weird's happening with indentation in
this hunk.
> + }
> +
> return reg;
> }
>
> diff --git a/gcc/analyzer/region-model.h b/gcc/analyzer/region-
> model.h
> index 0cf38714c96..16c80a238bc 100644
> --- a/gcc/analyzer/region-model.h
> +++ b/gcc/analyzer/region-model.h
> @@ -387,9 +387,9 @@ class region_model
> region_model_context *ctxt,
> rejected_constraint **out);
>
> - const region *
> - get_or_create_region_for_heap_alloc (const svalue *size_in_bytes,
> - region_model_context *ctxt);
> + const region *get_or_create_region_for_heap_alloc (
> + const svalue *size_in_bytes, region_model_context *ctxt,
> + bool update_state_machine = false, const call_details *cd =
> nullptr);
Nit: non-standard indentation here.
> const region *create_region_for_alloca (const svalue
> *size_in_bytes,
> region_model_context
> *ctxt);
> void get_referenced_base_regions (auto_bitmap &out_ids) const;
> @@ -476,6 +476,10 @@ class region_model
> const svalue *old_ptr_sval,
> const svalue *new_ptr_sval);
>
> + /* Implemented in sm-malloc.cc. */
> + void transition_ptr_sval_non_null (region_model_context *ctxt,
> + const svalue *new_ptr_sval);
Nit: non-standard indentation here.
> +
> /* Implemented in sm-taint.cc. */
> void mark_as_tainted (const svalue *sval,
> region_model_context *ctxt);
> diff --git a/gcc/analyzer/sm-malloc.cc b/gcc/analyzer/sm-malloc.cc
> index a8c63eb1ce8..bb8d83e4605 100644
> --- a/gcc/analyzer/sm-malloc.cc
> +++ b/gcc/analyzer/sm-malloc.cc
> @@ -434,6 +434,10 @@ public:
> const svalue *new_ptr_sval,
> const extrinsic_state &ext_state) const;
>
> + void transition_ptr_sval_non_null (region_model *model,
> sm_state_map *smap,
> + const svalue *new_ptr_sval,
> + const extrinsic_state &ext_state) const;
Nit: non-standard indentation here.
> +
> standard_deallocator_set m_free;
> standard_deallocator_set m_scalar_delete;
> standard_deallocator_set m_vector_delete;
> @@ -2504,6 +2508,16 @@ on_realloc_with_move (region_model *model,
> NULL, ext_state);
> }
>
> +/* Hook for get_or_create_region_for_heap_alloc for the case when
> we want
> + ptr_sval to mark a newly created region as assumed non null on
> malloc SM. */
> +void
> +malloc_state_machine::transition_ptr_sval_non_null (
> + region_model *model, sm_state_map *smap, const svalue
> *new_ptr_sval,
> + const extrinsic_state &ext_state) const
Nit: non-standard indentation here.
> +{
> + smap->set_state (model, new_ptr_sval, m_free.m_nonnull, NULL,
> ext_state);
> +}
>
[...]
> diff --git a/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
> b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
> index 9ecc42d4465..42c8aff101e 100644
> --- a/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
> +++ b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
[...]
> @@ -76,6 +78,703 @@ get_field_by_name (tree type, const char *name)
> return NULL_TREE;
> }
>
> +static const svalue *
> +get_sizeof_pyobjptr (region_model_manager *mgr)
> +{
> + tree size_tree = TYPE_SIZE_UNIT (pyobj_ptr_tree);
> + const svalue *sizeof_sval = mgr->get_or_create_constant_svalue
> (size_tree);
> + return sizeof_sval;
> +}
> +
> +static bool
> +arg_is_integral_p(const call_details &cd, unsigned idx)
> +{
> + return INTEGRAL_TYPE_P(cd.get_arg_type(idx));
> +}
We already have a call_details::arg_is_pointer_p, so perhaps this
should be a call_details::arg_is_integral_p?
> +
> +static void
> +init_ob_refcnt_field (region_model_manager *mgr, region_model
> *model,
> + const region *ob_base_region, tree
> pyobj_record,
> + const call_details &cd)
> +{
> + tree ob_refcnt_tree = get_field_by_name (pyobj_record,
> "ob_refcnt");
> + const region *ob_refcnt_region
> + = mgr->get_field_region (ob_base_region, ob_refcnt_tree);
> + const svalue *refcnt_one_sval
> + = mgr->get_or_create_int_cst (size_type_node, 1);
> + model->set_value (ob_refcnt_region, refcnt_one_sval, cd.get_ctxt
> ());
> +}
Please add a leading comment to this function, something like:
/* Update MODEL to set OB_BASE_REGION's ob_refcnt to 1. */
> +
> +static void
> +set_ob_type_field (region_model_manager *mgr, region_model *model,
> + const region *ob_base_region, tree pyobj_record,
> + tree pytype_var_decl_ptr, const call_details &cd)
> +{
> + const region *pylist_type_region
> + = mgr->get_region_for_global (pytype_var_decl_ptr);
> + tree pytype_var_decl_ptr_type
> + = build_pointer_type (TREE_TYPE (pytype_var_decl_ptr));
> + const svalue *pylist_type_ptr_sval
> + = mgr->get_ptr_svalue (pytype_var_decl_ptr_type,
> pylist_type_region);
> + tree ob_type_field = get_field_by_name (pyobj_record, "ob_type");
> + const region *ob_type_region
> + = mgr->get_field_region (ob_base_region, ob_type_field);
> + model->set_value (ob_type_region, pylist_type_ptr_sval,
> cd.get_ctxt ());
> +}
Likewise, this needs a leading comment, something like:
/* Update MODEL to set OB_BASE_REGION's ob_type to point to
PYTYPE_VAR_DECL_PTR. */
> +
> +static const region *
> +get_ob_base_region (region_model_manager *mgr, region_model *model,
> + const region *new_object_region, tree
> object_record,
> + const svalue *pyobj_svalue, const call_details
> &cd)
> +{
> + tree ob_base_tree = get_field_by_name (object_record, "ob_base");
> + const region *ob_base_region
> + = mgr->get_field_region (new_object_region, ob_base_tree);
> + model->set_value (ob_base_region, pyobj_svalue, cd.get_ctxt ());
> + return ob_base_region;
> +}
Likewise, needs a leading comment. It isn't clear to me what the
intent of this function is. I see it used from
kf_PyLong_FromLong::impl_call_post's outcome handler, where it seems to
be used to set the ob_base_region to an unknown value.
> +
> +static const region *
> +init_pyobject_region (region_model_manager *mgr, region_model
> *model,
> + const svalue *object_svalue, const
> call_details &cd)
> +{
> + /* TODO: switch to actual tp_basic_size */
> + const svalue *tp_basicsize_sval = mgr-
> >get_or_create_unknown_svalue (NULL);
> + const region *pyobject_region = model-
> >get_or_create_region_for_heap_alloc (
> + tp_basicsize_sval, cd.get_ctxt (), true, &cd);
> + model->set_value (pyobject_region, object_svalue, cd.get_ctxt ());
> + return pyobject_region;
> +}
Likewise needs a leading comment, and the exact intent isn't quite
clear to me. I believe that everywhere you're calling it, you're
passing in an unknown svalue for "object_svalue".
[...]
> +class kf_PyList_Append : public known_function
> +{
> +public:
> + bool
> + matches_call_types_p (const call_details &cd) const final override
> + {
> + return (cd.num_args () == 2); // TODO: more checks here
Probably:
&& cd.arg_is_pointer_p (0)
&& cd.arg_is_pointer_p (1)
> + }
> + void impl_call_pre (const call_details &cd) const final override;
> + void impl_call_post (const call_details &cd) const final override;
> +};
> +
[...snip kf_PyList_Append implementation...]
I confess that my eyes started glazing over at the kf_PyList_Append
implementation. Given that this is just within the test plugin, I'll
defer to you on this.
> +class kf_PyList_New : public known_function
> +{
> +public:
> + bool
> + matches_call_types_p (const call_details &cd) const final override
> + {
> + return (cd.num_args () == 1 && arg_is_integral_p (cd, 0));
> + }
> + void impl_call_post (const call_details &cd) const final override;
> +};
> +
> +void
> +kf_PyList_New::impl_call_post (const call_details &cd) const
> +{
> + class success : public call_info
> + {
> + public:
> + success (const call_details &cd) : call_info (cd) {}
> +
> + label_text
> + get_desc (bool can_colorize) const final override
> + {
> + return make_label_text (can_colorize, "when %qE succeeds",
> + get_fndecl ());
> + }
> +
> + bool
> + update_model (region_model *model, const exploded_edge *,
> + region_model_context *ctxt) const final override
> + {
> + const call_details cd (get_call_details (model, ctxt));
> + region_model_manager *mgr = cd.get_manager ();
> +
> + const svalue *pyobj_svalue
> + = mgr->get_or_create_unknown_svalue (pyobj_record);
> + const svalue *varobj_svalue
> + = mgr->get_or_create_unknown_svalue (varobj_record);
> + const svalue *pylist_svalue
> + = mgr->get_or_create_unknown_svalue (pylistobj_record);
> +
> + const svalue *size_sval = cd.get_arg_svalue (0);
> +
> + const region *pylist_region
> + = init_pyobject_region (mgr, model, pylist_svalue, cd);
> +
> + /*
> + typedef struct
> + {
> + PyObject_VAR_HEAD
> + PyObject **ob_item;
> + Py_ssize_t allocated;
> + } PyListObject;
> + */
> + tree varobj_field = get_field_by_name (pylistobj_record,
> "ob_base");
> + const region *varobj_region
> + = mgr->get_field_region (pylist_region, varobj_field);
> + model->set_value (varobj_region, varobj_svalue, cd.get_ctxt
> ());
> +
> + tree ob_item_field = get_field_by_name (pylistobj_record,
> "ob_item");
> + const region *ob_item_region
> + = mgr->get_field_region (pylist_region, ob_item_field);
> +
> + const svalue *zero_sval = mgr->get_or_create_int_cst
> (size_type_node, 0);
> + const svalue *casted_size_sval
> + = mgr->get_or_create_cast (size_type_node, size_sval);
> + const svalue *size_cond_sval = mgr->get_or_create_binop (
> + size_type_node, LE_EXPR, casted_size_sval, zero_sval);
> +
> + // if size <= 0, ob_item = NULL
> +
> + if (tree_int_cst_equal (size_cond_sval->maybe_get_constant (),
> + integer_one_node))
> + {
I think the way I'd do this is to bifurcate on the <= 0 versus > 0
case, and add the constraints to the model, as this ought to better
handle non-constant values for the size.
But this is good enough for now.
> + const svalue *null_sval
> + = mgr->get_or_create_null_ptr (pyobj_ptr_ptr);
> + model->set_value (ob_item_region, null_sval, cd.get_ctxt
> ());
> + }
> + else // calloc
> + {
> + const svalue *sizeof_sval = mgr->get_or_create_cast (
> + size_sval->get_type (), get_sizeof_pyobjptr (mgr));
> + const svalue *prod_sval = mgr->get_or_create_binop (
> + size_type_node, MULT_EXPR, sizeof_sval, size_sval);
> + const region *ob_item_sized_region
> + = model->get_or_create_region_for_heap_alloc
> (prod_sval,
> +
> cd.get_ctxt ());
> + model->zero_fill_region (ob_item_sized_region);
> + const svalue *ob_item_ptr_sval
> + = mgr->get_ptr_svalue (pyobj_ptr_ptr,
> ob_item_sized_region);
> + const svalue *ob_item_unmergeable
> + = mgr->get_or_create_unmergeable (ob_item_ptr_sval);
> + model->set_value (ob_item_region, ob_item_unmergeable,
> + cd.get_ctxt ());
> + }
> +
> + /*
> + typedef struct {
> + PyObject ob_base;
> + Py_ssize_t ob_size; // Number of items in variable part
> + } PyVarObject;
> + */
> + const region *ob_base_region = get_ob_base_region (
> + mgr, model, varobj_region, varobj_record, pyobj_svalue,
> cd);
> +
> + tree ob_size_tree = get_field_by_name (varobj_record,
> "ob_size");
> + const region *ob_size_region
> + = mgr->get_field_region (varobj_region, ob_size_tree);
> + model->set_value (ob_size_region, size_sval, cd.get_ctxt ());
> +
> + /*
> + typedef struct _object {
> + _PyObject_HEAD_EXTRA
> + Py_ssize_t ob_refcnt;
> + PyTypeObject *ob_type;
> + } PyObject;
> + */
> +
> + // Initialize ob_refcnt field to 1.
> + init_ob_refcnt_field(mgr, model, ob_base_region, pyobj_record,
> cd);
> +
> + // Get pointer svalue for PyList_Type then assign it to
> ob_type field.
> + set_ob_type_field(mgr, model, ob_base_region, pyobj_record,
> pylisttype_vardecl, cd);
> +
> + if (cd.get_lhs_type ())
> + {
> + const svalue *ptr_sval
> + = mgr->get_ptr_svalue (cd.get_lhs_type (),
> pylist_region);
> + cd.maybe_set_lhs (ptr_sval);
> + }
> + return true;
> + }
> + };
> +
> + if (cd.get_ctxt ())
> + {
> + cd.get_ctxt ()->bifurcate (make_unique<pyobj_init_fail> (cd));
> + cd.get_ctxt ()->bifurcate (make_unique<success> (cd));
> + cd.get_ctxt ()->terminate_path ();
> + }
> +}
> +
> +class kf_PyLong_FromLong : public known_function
> +{
> +public:
> + bool
> + matches_call_types_p (const call_details &cd) const final override
> + {
> + return (cd.num_args () == 1 && arg_is_integral_p (cd, 0));
> + }
> + void impl_call_post (const call_details &cd) const final override;
> +};
> +
> +void
> +kf_PyLong_FromLong::impl_call_post (const call_details &cd) const
> +{
> + class success : public call_info
> + {
> + public:
> + success (const call_details &cd) : call_info (cd) {}
> +
> + label_text
> + get_desc (bool can_colorize) const final override
> + {
> + return make_label_text (can_colorize, "when %qE succeeds",
> + get_fndecl ());
> + }
If you subclass from success_call_info then you get an equivalent
get_desc implementation "for free".
> +
> + bool
> + update_model (region_model *model, const exploded_edge *,
> + region_model_context *ctxt) const final override
> + {
Could add a comment here that we *don't* attempt to simulate the
special-casing that CPython does for values -5 to 256 (see
https://docs.python.org/3/c-api/long.html#c.PyLong_FromLong ).
> + const call_details cd (get_call_details (model, ctxt));
> + region_model_manager *mgr = cd.get_manager ();
> +
> + const svalue *pyobj_svalue
> + = mgr->get_or_create_unknown_svalue (pyobj_record);
> + const svalue *pylongobj_sval
> + = mgr->get_or_create_unknown_svalue (pylongobj_record);
> +
> + const region *pylong_region
> + = init_pyobject_region (mgr, model, pylongobj_sval, cd);
> +
> + // Create a region for the base PyObject within the
> PyLongObject.
> + const region *ob_base_region = get_ob_base_region (
> + mgr, model, pylong_region, pylongobj_record, pyobj_svalue,
> cd);
> +
> + // Initialize ob_refcnt field to 1.
> + init_ob_refcnt_field(mgr, model, ob_base_region, pyobj_record,
> cd);
> +
> + // Get pointer svalue for PyLong_Type then assign it to
> ob_type field.
> + set_ob_type_field(mgr, model, ob_base_region, pyobj_record,
> pylongtype_vardecl, cd);
> +
> + // Set the PyLongObject value.
> + tree ob_digit_field = get_field_by_name (pylongobj_record,
> "ob_digit");
> + const region *ob_digit_region
> + = mgr->get_field_region (pylong_region, ob_digit_field);
> + const svalue *ob_digit_sval = cd.get_arg_svalue (0);
> + model->set_value (ob_digit_region, ob_digit_sval, cd.get_ctxt
> ());
> +
> + if (cd.get_lhs_type ())
> + {
> + const svalue *ptr_sval
> + = mgr->get_ptr_svalue (cd.get_lhs_type (),
> pylong_region);
> + cd.maybe_set_lhs (ptr_sval);
> + }
> + return true;
> + }
> + };
> +
> + if (cd.get_ctxt ())
> + {
> + cd.get_ctxt ()->bifurcate (make_unique<pyobj_init_fail> (cd));
> + cd.get_ctxt ()->bifurcate (make_unique<success> (cd));
> + cd.get_ctxt ()->terminate_path ();
> + }
> +}
[...]
> diff --git a/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-2.c
> b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-2.c
> new file mode 100644
> index 00000000000..19b5c17428a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-2.c
> @@ -0,0 +1,78 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target analyzer } */
> +/* { dg-options "-fanalyzer" } */
> +/* { dg-require-python-h "" } */
> +
> +
> +#define PY_SSIZE_T_CLEAN
> +#include <Python.h>
> +#include "../analyzer/analyzer-decls.h"
> +
> +PyObject *
> +test_PyList_New (Py_ssize_t len)
> +{
> + PyObject *obj = PyList_New (len);
> + if (obj)
> + {
> + __analyzer_eval (obj->ob_refcnt == 1); /* { dg-warning "TRUE" }
> */
> + __analyzer_eval (PyList_CheckExact (obj)); /* { dg-warning
> "TRUE" } */
> + }
> + else
> + __analyzer_dump_path (); /* { dg-message "path" } */
> + return obj;
> +}
There's lots of scope for extra test coverage here.
For example, for all these test_FOO_New cases, consider a variant with
"void" return and no "return obj;" at the end. The analyzer ought to
report a leak when we fall off the end of these functions.
Similarly, it's good to try both:
- symbolic values for arguments (like you have here)
- constant values (for example, what happens with NULL for pointer
params?)
FWIW I like to organize test coverage for specific known functions into
test cases named after the function, so perhaps we could have:
cpython-plugin-test-PyList_Append.c
cpython-plugin-test-PyList_New.c
cpython-plugin-test-PyLong_New.c
but that's up to you.
All this can be left to a follow-up, though.
> +
> +PyObject *
> +test_PyLong_New (long n)
> +{
> + PyObject *obj = PyLong_FromLong (n);
> + if (obj)
> + {
> + __analyzer_eval (obj->ob_refcnt == 1); /* { dg-warning "TRUE" }
> */
> + __analyzer_eval (PyLong_CheckExact (obj)); /* { dg-warning
> "TRUE" } */
> + }
> + else
> + __analyzer_dump_path (); /* { dg-message "path" } */
> + return obj;
> +}
> +
> +PyObject *
> +test_PyListAppend (long n)
> +{
> + PyObject *item = PyLong_FromLong (n);
> + PyObject *list = PyList_New (0);
> + PyList_Append(list, item);
> + return list; /* { dg-warning "leak of 'item'" } */
> +}
> +
> +PyObject *
> +test_PyListAppend_2 (long n)
> +{
> + PyObject *item = PyLong_FromLong (n);
> + if (!item)
> + return NULL;
> +
> + __analyzer_eval (item->ob_refcnt == 1); /* { dg-warning "TRUE" }
> */
> + PyObject *list = PyList_New (n);
> + if (!list)
> + {
> + Py_DECREF(item);
> + return NULL;
> + }
> +
> + __analyzer_eval (list->ob_refcnt == 1); /* { dg-warning "TRUE" }
> */
> +
> + if (PyList_Append (list, item) < 0)
> + __analyzer_eval (item->ob_refcnt == 1); /* { dg-warning "TRUE" }
> */
> + else
> + __analyzer_eval (item->ob_refcnt == 2); /* { dg-warning "TRUE" }
> */
> + return list; /* { dg-warning "leak of 'item'" } */
> +}
> +
> +
> +PyObject *
> +test_PyListAppend_3 (PyObject *item, PyObject *list)
> +{
> + PyList_Append (list, item);
> + return list;
> +}
> \ No newline at end of file
[...]
Overall, I think that assuming the rebase is trivial then with the nits
fixed, this is good for trunk. As noted above there are some issues
with the known_function implementations in the plugin, but that's a
minor detail that doesn't impact anyone else, so let's not perfect be
the enemy of the good.
Hope the above makes sense; thanks again for the patch.
Dave
@@ -5127,11 +5127,16 @@ region_model::check_dynamic_size_for_floats (const svalue *size_in_bytes,
Use CTXT to complain about tainted sizes.
Reuse an existing heap_allocated_region if it's not being referenced by
- this region_model; otherwise create a new one. */
+ this region_model; otherwise create a new one.
+
+ Optionally (update_state_machine) transitions the pointer pointing to the
+ heap_allocated_region from start to assumed non-null. */
const region *
region_model::get_or_create_region_for_heap_alloc (const svalue *size_in_bytes,
- region_model_context *ctxt)
+ region_model_context *ctxt,
+ bool update_state_machine,
+ const call_details *cd)
{
/* Determine which regions are referenced in this region_model, so that
we can reuse an existing heap_allocated_region if it's not in use on
@@ -5153,6 +5158,17 @@ region_model::get_or_create_region_for_heap_alloc (const svalue *size_in_bytes,
if (size_in_bytes)
if (compat_types_p (size_in_bytes->get_type (), size_type_node))
set_dynamic_extents (reg, size_in_bytes, ctxt);
+
+ if (update_state_machine && cd)
+ {
+ const svalue *ptr_sval = nullptr;
+ if (cd->get_lhs_type ())
+ ptr_sval = m_mgr->get_ptr_svalue (cd->get_lhs_type (), reg);
+ else
+ ptr_sval = m_mgr->get_ptr_svalue (NULL_TREE, reg);
+ transition_ptr_sval_non_null (ctxt, ptr_sval);
+ }
+
return reg;
}
@@ -387,9 +387,9 @@ class region_model
region_model_context *ctxt,
rejected_constraint **out);
- const region *
- get_or_create_region_for_heap_alloc (const svalue *size_in_bytes,
- region_model_context *ctxt);
+ const region *get_or_create_region_for_heap_alloc (
+ const svalue *size_in_bytes, region_model_context *ctxt,
+ bool update_state_machine = false, const call_details *cd = nullptr);
const region *create_region_for_alloca (const svalue *size_in_bytes,
region_model_context *ctxt);
void get_referenced_base_regions (auto_bitmap &out_ids) const;
@@ -476,6 +476,10 @@ class region_model
const svalue *old_ptr_sval,
const svalue *new_ptr_sval);
+ /* Implemented in sm-malloc.cc. */
+ void transition_ptr_sval_non_null (region_model_context *ctxt,
+ const svalue *new_ptr_sval);
+
/* Implemented in sm-taint.cc. */
void mark_as_tainted (const svalue *sval,
region_model_context *ctxt);
@@ -434,6 +434,10 @@ public:
const svalue *new_ptr_sval,
const extrinsic_state &ext_state) const;
+ void transition_ptr_sval_non_null (region_model *model, sm_state_map *smap,
+ const svalue *new_ptr_sval,
+ const extrinsic_state &ext_state) const;
+
standard_deallocator_set m_free;
standard_deallocator_set m_scalar_delete;
standard_deallocator_set m_vector_delete;
@@ -2504,6 +2508,16 @@ on_realloc_with_move (region_model *model,
NULL, ext_state);
}
+/* Hook for get_or_create_region_for_heap_alloc for the case when we want
+ ptr_sval to mark a newly created region as assumed non null on malloc SM. */
+void
+malloc_state_machine::transition_ptr_sval_non_null (
+ region_model *model, sm_state_map *smap, const svalue *new_ptr_sval,
+ const extrinsic_state &ext_state) const
+{
+ smap->set_state (model, new_ptr_sval, m_free.m_nonnull, NULL, ext_state);
+}
+
} // anonymous namespace
/* Internal interface to this file. */
@@ -2548,6 +2562,32 @@ region_model::on_realloc_with_move (const call_details &cd,
*ext_state);
}
+/* Moves ptr_sval from start to assumed non-null, for use by
+ region_model::get_or_create_region_for_heap_alloc. */
+void
+region_model::transition_ptr_sval_non_null (region_model_context *ctxt,
+const svalue *ptr_sval)
+{
+ if (!ctxt)
+ return;
+ const extrinsic_state *ext_state = ctxt->get_ext_state ();
+ if (!ext_state)
+ return;
+
+ sm_state_map *smap;
+ const state_machine *sm;
+ unsigned sm_idx;
+ if (!ctxt->get_malloc_map (&smap, &sm, &sm_idx))
+ return;
+
+ gcc_assert (smap);
+ gcc_assert (sm);
+
+ const malloc_state_machine &malloc_sm = (const malloc_state_machine &)*sm;
+
+ malloc_sm.transition_ptr_sval_non_null (this, smap, ptr_sval, *ext_state);
+}
+
} // namespace ana
#endif /* #if ENABLE_ANALYZER */
@@ -55,6 +55,8 @@ static GTY (()) hash_map<tree, tree> *analyzer_stashed_globals;
namespace ana
{
static tree pyobj_record = NULL_TREE;
+static tree pyobj_ptr_tree = NULL_TREE;
+static tree pyobj_ptr_ptr = NULL_TREE;
static tree varobj_record = NULL_TREE;
static tree pylistobj_record = NULL_TREE;
static tree pylongobj_record = NULL_TREE;
@@ -76,6 +78,703 @@ get_field_by_name (tree type, const char *name)
return NULL_TREE;
}
+static const svalue *
+get_sizeof_pyobjptr (region_model_manager *mgr)
+{
+ tree size_tree = TYPE_SIZE_UNIT (pyobj_ptr_tree);
+ const svalue *sizeof_sval = mgr->get_or_create_constant_svalue (size_tree);
+ return sizeof_sval;
+}
+
+static bool
+arg_is_integral_p(const call_details &cd, unsigned idx)
+{
+ return INTEGRAL_TYPE_P(cd.get_arg_type(idx));
+}
+
+static void
+init_ob_refcnt_field (region_model_manager *mgr, region_model *model,
+ const region *ob_base_region, tree pyobj_record,
+ const call_details &cd)
+{
+ tree ob_refcnt_tree = get_field_by_name (pyobj_record, "ob_refcnt");
+ const region *ob_refcnt_region
+ = mgr->get_field_region (ob_base_region, ob_refcnt_tree);
+ const svalue *refcnt_one_sval
+ = mgr->get_or_create_int_cst (size_type_node, 1);
+ model->set_value (ob_refcnt_region, refcnt_one_sval, cd.get_ctxt ());
+}
+
+static void
+set_ob_type_field (region_model_manager *mgr, region_model *model,
+ const region *ob_base_region, tree pyobj_record,
+ tree pytype_var_decl_ptr, const call_details &cd)
+{
+ const region *pylist_type_region
+ = mgr->get_region_for_global (pytype_var_decl_ptr);
+ tree pytype_var_decl_ptr_type
+ = build_pointer_type (TREE_TYPE (pytype_var_decl_ptr));
+ const svalue *pylist_type_ptr_sval
+ = mgr->get_ptr_svalue (pytype_var_decl_ptr_type, pylist_type_region);
+ tree ob_type_field = get_field_by_name (pyobj_record, "ob_type");
+ const region *ob_type_region
+ = mgr->get_field_region (ob_base_region, ob_type_field);
+ model->set_value (ob_type_region, pylist_type_ptr_sval, cd.get_ctxt ());
+}
+
+static const region *
+get_ob_base_region (region_model_manager *mgr, region_model *model,
+ const region *new_object_region, tree object_record,
+ const svalue *pyobj_svalue, const call_details &cd)
+{
+ tree ob_base_tree = get_field_by_name (object_record, "ob_base");
+ const region *ob_base_region
+ = mgr->get_field_region (new_object_region, ob_base_tree);
+ model->set_value (ob_base_region, pyobj_svalue, cd.get_ctxt ());
+ return ob_base_region;
+}
+
+static const region *
+init_pyobject_region (region_model_manager *mgr, region_model *model,
+ const svalue *object_svalue, const call_details &cd)
+{
+ /* TODO: switch to actual tp_basic_size */
+ const svalue *tp_basicsize_sval = mgr->get_or_create_unknown_svalue (NULL);
+ const region *pyobject_region = model->get_or_create_region_for_heap_alloc (
+ tp_basicsize_sval, cd.get_ctxt (), true, &cd);
+ model->set_value (pyobject_region, object_svalue, cd.get_ctxt ());
+ return pyobject_region;
+}
+
+static void
+inc_field_val (region_model_manager *mgr, region_model *model,
+ const region *field_region, const tree type_node,
+ const call_details &cd, const svalue **old_sval = nullptr,
+ const svalue **new_sval = nullptr)
+{
+ const svalue *tmp_old_sval
+ = model->get_store_value (field_region, cd.get_ctxt ());
+ const svalue *one_sval = mgr->get_or_create_int_cst (type_node, 1);
+ const svalue *tmp_new_sval = mgr->get_or_create_binop (
+ type_node, PLUS_EXPR, tmp_old_sval, one_sval);
+
+ model->set_value (field_region, tmp_new_sval, cd.get_ctxt ());
+
+ if (old_sval)
+ *old_sval = tmp_old_sval;
+
+ if (new_sval)
+ *new_sval = tmp_new_sval;
+}
+
+class pyobj_init_fail : public failed_call_info
+{
+public:
+ pyobj_init_fail (const call_details &cd) : failed_call_info (cd) {}
+
+ bool
+ update_model (region_model *model, const exploded_edge *,
+ region_model_context *ctxt) const final override
+ {
+ /* Return NULL; everything else is unchanged. */
+ const call_details cd (get_call_details (model, ctxt));
+ region_model_manager *mgr = cd.get_manager ();
+ if (cd.get_lhs_type ())
+ {
+ const svalue *zero
+ = mgr->get_or_create_int_cst (cd.get_lhs_type (), 0);
+ model->set_value (cd.get_lhs_region (), zero, cd.get_ctxt ());
+ }
+ return true;
+ }
+};
+
+class kf_PyList_Append : public known_function
+{
+public:
+ bool
+ matches_call_types_p (const call_details &cd) const final override
+ {
+ return (cd.num_args () == 2); // TODO: more checks here
+ }
+ void impl_call_pre (const call_details &cd) const final override;
+ void impl_call_post (const call_details &cd) const final override;
+};
+
+void
+kf_PyList_Append::impl_call_pre (const call_details &cd) const
+{
+ region_model_manager *mgr = cd.get_manager ();
+ region_model *model = cd.get_model ();
+
+ const svalue *pylist_sval = cd.get_arg_svalue (0);
+ const region *pylist_reg
+ = model->deref_rvalue (pylist_sval, cd.get_arg_tree (0), cd.get_ctxt ());
+
+ const svalue *newitem_sval = cd.get_arg_svalue (1);
+ const region *newitem_reg
+ = model->deref_rvalue (pylist_sval, cd.get_arg_tree (0), cd.get_ctxt ());
+
+ // Skip checks if unknown etc
+ if (pylist_sval->get_kind () != SK_REGION
+ && pylist_sval->get_kind () != SK_CONSTANT)
+ return;
+
+ // PyList_Check
+ tree ob_type_field = get_field_by_name (pyobj_record, "ob_type");
+ const region *ob_type_region
+ = mgr->get_field_region (pylist_reg, ob_type_field);
+ const svalue *stored_sval
+ = model->get_store_value (ob_type_region, cd.get_ctxt ());
+ const region *pylist_type_region
+ = mgr->get_region_for_global (pylisttype_vardecl);
+ tree pylisttype_vardecl_ptr
+ = build_pointer_type (TREE_TYPE (pylisttype_vardecl));
+ const svalue *pylist_type_ptr
+ = mgr->get_ptr_svalue (pylisttype_vardecl_ptr, pylist_type_region);
+
+ if (stored_sval != pylist_type_ptr)
+ {
+ // TODO: emit diagnostic -Wanalyzer-type-error
+ cd.get_ctxt ()->terminate_path ();
+ return;
+ }
+
+ // Check that new_item is not null.
+ {
+ const svalue *null_ptr
+ = mgr->get_or_create_int_cst (newitem_sval->get_type (), 0);
+ if (!model->add_constraint (newitem_sval, NE_EXPR, null_ptr,
+ cd.get_ctxt ()))
+ {
+ // TODO: emit diagnostic here
+ cd.get_ctxt ()->terminate_path ();
+ return;
+ }
+ }
+}
+
+void
+kf_PyList_Append::impl_call_post (const call_details &cd) const
+{
+ /* Three custom subclasses of custom_edge_info, for handling the various
+ outcomes of "realloc". */
+
+ /* Concrete custom_edge_info: a realloc call that fails, returning NULL.
+ */
+ class realloc_failure : public failed_call_info
+ {
+ public:
+ realloc_failure (const call_details &cd) : failed_call_info (cd) {}
+
+ bool
+ update_model (region_model *model, const exploded_edge *,
+ region_model_context *ctxt) const final override
+ {
+ const call_details cd (get_call_details (model, ctxt));
+ region_model_manager *mgr = cd.get_manager ();
+
+ const svalue *pylist_sval = cd.get_arg_svalue (0);
+ const region *pylist_reg = model->deref_rvalue (
+ pylist_sval, cd.get_arg_tree (0), cd.get_ctxt ());
+
+ /* Identify ob_item field and set it to NULL. */
+ tree ob_item_field = get_field_by_name (pylistobj_record, "ob_item");
+ const region *ob_item_reg
+ = mgr->get_field_region (pylist_reg, ob_item_field);
+ const svalue *old_ptr_sval
+ = model->get_store_value (ob_item_reg, cd.get_ctxt ());
+
+ if (const region_svalue *old_reg
+ = old_ptr_sval->dyn_cast_region_svalue ())
+ {
+ const region *freed_reg = old_reg->get_pointee ();
+ model->unbind_region_and_descendents (freed_reg, POISON_KIND_FREED);
+ model->unset_dynamic_extents (freed_reg);
+ }
+
+ const svalue *null_sval = mgr->get_or_create_null_ptr (pyobj_ptr_ptr);
+ model->set_value (ob_item_reg, null_sval, cd.get_ctxt ());
+
+ if (cd.get_lhs_type ())
+ {
+ const svalue *neg_one
+ = mgr->get_or_create_int_cst (cd.get_lhs_type (), -1);
+ cd.maybe_set_lhs(neg_one);
+ }
+ return true;
+ }
+ };
+
+ class realloc_success_no_move : public call_info
+ {
+ public:
+ realloc_success_no_move (const call_details &cd) : call_info (cd) {}
+
+ label_text
+ get_desc (bool can_colorize) const final override
+ {
+ return make_label_text (
+ can_colorize, "when %qE succeeds, without moving underlying buffer",
+ get_fndecl ());
+ }
+
+ bool
+ update_model (region_model *model, const exploded_edge *,
+ region_model_context *ctxt) const final override
+ {
+ const call_details cd (get_call_details (model, ctxt));
+ region_model_manager *mgr = cd.get_manager ();
+
+ const svalue *pylist_sval = cd.get_arg_svalue (0);
+ const region *pylist_reg = model->deref_rvalue (
+ pylist_sval, cd.get_arg_tree (0), cd.get_ctxt ());
+
+ const svalue *newitem_sval = cd.get_arg_svalue (1);
+ const region *newitem_reg = model->deref_rvalue (
+ newitem_sval, cd.get_arg_tree (1), cd.get_ctxt ());
+
+ tree ob_size_field = get_field_by_name (varobj_record, "ob_size");
+ const region *ob_size_region
+ = mgr->get_field_region (pylist_reg, ob_size_field);
+ const svalue *ob_size_sval = nullptr;
+ const svalue *new_size_sval = nullptr;
+ inc_field_val (mgr, model, ob_size_region, integer_type_node, cd,
+ &ob_size_sval, &new_size_sval);
+
+ const svalue *sizeof_sval = mgr->get_or_create_cast (
+ ob_size_sval->get_type (), get_sizeof_pyobjptr (mgr));
+ const svalue *num_allocated_bytes = mgr->get_or_create_binop (
+ size_type_node, MULT_EXPR, sizeof_sval, new_size_sval);
+
+ tree ob_item_field = get_field_by_name (pylistobj_record, "ob_item");
+ const region *ob_item_region
+ = mgr->get_field_region (pylist_reg, ob_item_field);
+ const svalue *ob_item_ptr_sval
+ = model->get_store_value (ob_item_region, cd.get_ctxt ());
+
+ /* We can only grow in place with a non-NULL pointer and no unknown
+ */
+ {
+ const svalue *null_ptr = mgr->get_or_create_null_ptr (pyobj_ptr_ptr);
+ if (!model->add_constraint (ob_item_ptr_sval, NE_EXPR, null_ptr,
+ cd.get_ctxt ()))
+ {
+ return false;
+ }
+ }
+
+ const unmergeable_svalue *underlying_svalue
+ = ob_item_ptr_sval->dyn_cast_unmergeable_svalue ();
+ const svalue *target_svalue = nullptr;
+ const region_svalue *target_region_svalue = nullptr;
+
+ if (underlying_svalue)
+ {
+ target_svalue = underlying_svalue->get_arg ();
+ if (target_svalue->get_kind () != SK_REGION)
+ {
+ return false;
+ }
+ }
+ else
+ {
+ if (ob_item_ptr_sval->get_kind () != SK_REGION)
+ {
+ return false;
+ }
+ target_svalue = ob_item_ptr_sval;
+ }
+
+ target_region_svalue = target_svalue->dyn_cast_region_svalue ();
+ const region *curr_reg = target_region_svalue->get_pointee ();
+
+ if (compat_types_p (num_allocated_bytes->get_type (), size_type_node))
+ model->set_dynamic_extents (curr_reg, num_allocated_bytes, ctxt);
+
+ model->set_value (ob_size_region, new_size_sval, ctxt);
+
+ const svalue *offset_sval = mgr->get_or_create_binop (
+ size_type_node, MULT_EXPR, sizeof_sval, ob_size_sval);
+ const region *element_region
+ = mgr->get_offset_region (curr_reg, pyobj_ptr_ptr, offset_sval);
+ model->set_value (element_region, newitem_sval, cd.get_ctxt ());
+
+ // Increment ob_refcnt of appended item.
+ tree ob_refcnt_tree = get_field_by_name (pyobj_record, "ob_refcnt");
+ const region *ob_refcnt_region
+ = mgr->get_field_region (newitem_reg, ob_refcnt_tree);
+ inc_field_val (mgr, model, ob_refcnt_region, size_type_node, cd);
+
+ if (cd.get_lhs_type ())
+ {
+ const svalue *zero
+ = mgr->get_or_create_int_cst (cd.get_lhs_type (), 0);
+ cd.maybe_set_lhs(zero);
+ }
+ return true;
+ }
+ };
+
+ class realloc_success_move : public call_info
+ {
+ public:
+ realloc_success_move (const call_details &cd) : call_info (cd) {}
+
+ label_text
+ get_desc (bool can_colorize) const final override
+ {
+ return make_label_text (can_colorize, "when %qE succeeds, moving buffer",
+ get_fndecl ());
+ }
+
+ bool
+ update_model (region_model *model, const exploded_edge *,
+ region_model_context *ctxt) const final override
+ {
+ const call_details cd (get_call_details (model, ctxt));
+ region_model_manager *mgr = cd.get_manager ();
+ const svalue *pylist_sval = cd.get_arg_svalue (0);
+ const region *pylist_reg = model->deref_rvalue (
+ pylist_sval, cd.get_arg_tree (0), cd.get_ctxt ());
+
+ const svalue *newitem_sval = cd.get_arg_svalue (1);
+ const region *newitem_reg = model->deref_rvalue (
+ newitem_sval, cd.get_arg_tree (1), cd.get_ctxt ());
+
+ tree ob_size_field = get_field_by_name (varobj_record, "ob_size");
+ const region *ob_size_region
+ = mgr->get_field_region (pylist_reg, ob_size_field);
+ const svalue *old_ob_size_sval = nullptr;
+ const svalue *new_ob_size_sval = nullptr;
+ inc_field_val (mgr, model, ob_size_region, integer_type_node, cd,
+ &old_ob_size_sval, &new_ob_size_sval);
+
+ const svalue *sizeof_sval = mgr->get_or_create_cast (
+ old_ob_size_sval->get_type (), get_sizeof_pyobjptr (mgr));
+ const svalue *new_size_sval = mgr->get_or_create_binop (
+ size_type_node, MULT_EXPR, sizeof_sval, new_ob_size_sval);
+
+ tree ob_item_field = get_field_by_name (pylistobj_record, "ob_item");
+ const region *ob_item_reg
+ = mgr->get_field_region (pylist_reg, ob_item_field);
+ const svalue *old_ptr_sval
+ = model->get_store_value (ob_item_reg, cd.get_ctxt ());
+
+ /* Create the new region. */
+ const region *new_reg = model->get_or_create_region_for_heap_alloc (
+ new_size_sval, cd.get_ctxt ());
+ const svalue *new_ptr_sval
+ = mgr->get_ptr_svalue (pyobj_ptr_ptr, new_reg);
+ if (!model->add_constraint (new_ptr_sval, NE_EXPR, old_ptr_sval,
+ cd.get_ctxt ()))
+ return false;
+
+ if (const region_svalue *old_reg
+ = old_ptr_sval->dyn_cast_region_svalue ())
+ {
+ const region *freed_reg = old_reg->get_pointee ();
+ const svalue *old_size_sval = model->get_dynamic_extents (freed_reg);
+ if (old_size_sval)
+ {
+ const svalue *copied_size_sval
+ = get_copied_size (model, old_size_sval, new_size_sval);
+ const region *copied_old_reg = mgr->get_sized_region (
+ freed_reg, pyobj_ptr_ptr, copied_size_sval);
+ const svalue *buffer_content_sval
+ = model->get_store_value (copied_old_reg, cd.get_ctxt ());
+ const region *copied_new_reg = mgr->get_sized_region (
+ new_reg, pyobj_ptr_ptr, copied_size_sval);
+ model->set_value (copied_new_reg, buffer_content_sval,
+ cd.get_ctxt ());
+ }
+ else
+ {
+ model->mark_region_as_unknown (freed_reg, cd.get_uncertainty ());
+ }
+
+ model->unbind_region_and_descendents (freed_reg, POISON_KIND_FREED);
+ model->unset_dynamic_extents (freed_reg);
+ }
+
+ const svalue *null_ptr = mgr->get_or_create_null_ptr (pyobj_ptr_ptr);
+ if (!model->add_constraint (new_ptr_sval, NE_EXPR, null_ptr,
+ cd.get_ctxt ()))
+ return false;
+
+ model->set_value (ob_size_region, new_ob_size_sval, ctxt);
+ model->set_value (ob_item_reg, new_ptr_sval, cd.get_ctxt ());
+
+ const svalue *offset_sval = mgr->get_or_create_binop (
+ size_type_node, MULT_EXPR, sizeof_sval, old_ob_size_sval);
+ const region *element_region
+ = mgr->get_offset_region (new_reg, pyobj_ptr_ptr, offset_sval);
+ model->set_value (element_region, newitem_sval, cd.get_ctxt ());
+
+ // Increment ob_refcnt of appended item.
+ tree ob_refcnt_tree = get_field_by_name (pyobj_record, "ob_refcnt");
+ const region *ob_refcnt_region
+ = mgr->get_field_region (newitem_reg, ob_refcnt_tree);
+ inc_field_val (mgr, model, ob_refcnt_region, size_type_node, cd);
+
+ if (cd.get_lhs_type ())
+ {
+ const svalue *zero
+ = mgr->get_or_create_int_cst (cd.get_lhs_type (), 0);
+ cd.maybe_set_lhs(zero);
+ }
+ return true;
+ }
+
+ private:
+ /* Return the lesser of OLD_SIZE_SVAL and NEW_SIZE_SVAL.
+ If unknown, OLD_SIZE_SVAL is returned. */
+ const svalue *
+ get_copied_size (region_model *model, const svalue *old_size_sval,
+ const svalue *new_size_sval) const
+ {
+ tristate res
+ = model->eval_condition (old_size_sval, GT_EXPR, new_size_sval);
+ switch (res.get_value ())
+ {
+ case tristate::TS_TRUE:
+ return new_size_sval;
+ case tristate::TS_FALSE:
+ case tristate::TS_UNKNOWN:
+ return old_size_sval;
+ default:
+ gcc_unreachable ();
+ }
+ }
+ };
+
+ /* Body of kf_PyList_Append::impl_call_post. */
+ if (cd.get_ctxt ())
+ {
+ cd.get_ctxt ()->bifurcate (make_unique<realloc_failure> (cd));
+ cd.get_ctxt ()->bifurcate (make_unique<realloc_success_no_move> (cd));
+ cd.get_ctxt ()->bifurcate (make_unique<realloc_success_move> (cd));
+ cd.get_ctxt ()->terminate_path ();
+ }
+}
+
+class kf_PyList_New : public known_function
+{
+public:
+ bool
+ matches_call_types_p (const call_details &cd) const final override
+ {
+ return (cd.num_args () == 1 && arg_is_integral_p (cd, 0));
+ }
+ void impl_call_post (const call_details &cd) const final override;
+};
+
+void
+kf_PyList_New::impl_call_post (const call_details &cd) const
+{
+ class success : public call_info
+ {
+ public:
+ success (const call_details &cd) : call_info (cd) {}
+
+ label_text
+ get_desc (bool can_colorize) const final override
+ {
+ return make_label_text (can_colorize, "when %qE succeeds",
+ get_fndecl ());
+ }
+
+ bool
+ update_model (region_model *model, const exploded_edge *,
+ region_model_context *ctxt) const final override
+ {
+ const call_details cd (get_call_details (model, ctxt));
+ region_model_manager *mgr = cd.get_manager ();
+
+ const svalue *pyobj_svalue
+ = mgr->get_or_create_unknown_svalue (pyobj_record);
+ const svalue *varobj_svalue
+ = mgr->get_or_create_unknown_svalue (varobj_record);
+ const svalue *pylist_svalue
+ = mgr->get_or_create_unknown_svalue (pylistobj_record);
+
+ const svalue *size_sval = cd.get_arg_svalue (0);
+
+ const region *pylist_region
+ = init_pyobject_region (mgr, model, pylist_svalue, cd);
+
+ /*
+ typedef struct
+ {
+ PyObject_VAR_HEAD
+ PyObject **ob_item;
+ Py_ssize_t allocated;
+ } PyListObject;
+ */
+ tree varobj_field = get_field_by_name (pylistobj_record, "ob_base");
+ const region *varobj_region
+ = mgr->get_field_region (pylist_region, varobj_field);
+ model->set_value (varobj_region, varobj_svalue, cd.get_ctxt ());
+
+ tree ob_item_field = get_field_by_name (pylistobj_record, "ob_item");
+ const region *ob_item_region
+ = mgr->get_field_region (pylist_region, ob_item_field);
+
+ const svalue *zero_sval = mgr->get_or_create_int_cst (size_type_node, 0);
+ const svalue *casted_size_sval
+ = mgr->get_or_create_cast (size_type_node, size_sval);
+ const svalue *size_cond_sval = mgr->get_or_create_binop (
+ size_type_node, LE_EXPR, casted_size_sval, zero_sval);
+
+ // if size <= 0, ob_item = NULL
+
+ if (tree_int_cst_equal (size_cond_sval->maybe_get_constant (),
+ integer_one_node))
+ {
+ const svalue *null_sval
+ = mgr->get_or_create_null_ptr (pyobj_ptr_ptr);
+ model->set_value (ob_item_region, null_sval, cd.get_ctxt ());
+ }
+ else // calloc
+ {
+ const svalue *sizeof_sval = mgr->get_or_create_cast (
+ size_sval->get_type (), get_sizeof_pyobjptr (mgr));
+ const svalue *prod_sval = mgr->get_or_create_binop (
+ size_type_node, MULT_EXPR, sizeof_sval, size_sval);
+ const region *ob_item_sized_region
+ = model->get_or_create_region_for_heap_alloc (prod_sval,
+ cd.get_ctxt ());
+ model->zero_fill_region (ob_item_sized_region);
+ const svalue *ob_item_ptr_sval
+ = mgr->get_ptr_svalue (pyobj_ptr_ptr, ob_item_sized_region);
+ const svalue *ob_item_unmergeable
+ = mgr->get_or_create_unmergeable (ob_item_ptr_sval);
+ model->set_value (ob_item_region, ob_item_unmergeable,
+ cd.get_ctxt ());
+ }
+
+ /*
+ typedef struct {
+ PyObject ob_base;
+ Py_ssize_t ob_size; // Number of items in variable part
+ } PyVarObject;
+ */
+ const region *ob_base_region = get_ob_base_region (
+ mgr, model, varobj_region, varobj_record, pyobj_svalue, cd);
+
+ tree ob_size_tree = get_field_by_name (varobj_record, "ob_size");
+ const region *ob_size_region
+ = mgr->get_field_region (varobj_region, ob_size_tree);
+ model->set_value (ob_size_region, size_sval, cd.get_ctxt ());
+
+ /*
+ typedef struct _object {
+ _PyObject_HEAD_EXTRA
+ Py_ssize_t ob_refcnt;
+ PyTypeObject *ob_type;
+ } PyObject;
+ */
+
+ // Initialize ob_refcnt field to 1.
+ init_ob_refcnt_field(mgr, model, ob_base_region, pyobj_record, cd);
+
+ // Get pointer svalue for PyList_Type then assign it to ob_type field.
+ set_ob_type_field(mgr, model, ob_base_region, pyobj_record, pylisttype_vardecl, cd);
+
+ if (cd.get_lhs_type ())
+ {
+ const svalue *ptr_sval
+ = mgr->get_ptr_svalue (cd.get_lhs_type (), pylist_region);
+ cd.maybe_set_lhs (ptr_sval);
+ }
+ return true;
+ }
+ };
+
+ if (cd.get_ctxt ())
+ {
+ cd.get_ctxt ()->bifurcate (make_unique<pyobj_init_fail> (cd));
+ cd.get_ctxt ()->bifurcate (make_unique<success> (cd));
+ cd.get_ctxt ()->terminate_path ();
+ }
+}
+
+class kf_PyLong_FromLong : public known_function
+{
+public:
+ bool
+ matches_call_types_p (const call_details &cd) const final override
+ {
+ return (cd.num_args () == 1 && arg_is_integral_p (cd, 0));
+ }
+ void impl_call_post (const call_details &cd) const final override;
+};
+
+void
+kf_PyLong_FromLong::impl_call_post (const call_details &cd) const
+{
+ class success : public call_info
+ {
+ public:
+ success (const call_details &cd) : call_info (cd) {}
+
+ label_text
+ get_desc (bool can_colorize) const final override
+ {
+ return make_label_text (can_colorize, "when %qE succeeds",
+ get_fndecl ());
+ }
+
+ bool
+ update_model (region_model *model, const exploded_edge *,
+ region_model_context *ctxt) const final override
+ {
+ const call_details cd (get_call_details (model, ctxt));
+ region_model_manager *mgr = cd.get_manager ();
+
+ const svalue *pyobj_svalue
+ = mgr->get_or_create_unknown_svalue (pyobj_record);
+ const svalue *pylongobj_sval
+ = mgr->get_or_create_unknown_svalue (pylongobj_record);
+
+ const region *pylong_region
+ = init_pyobject_region (mgr, model, pylongobj_sval, cd);
+
+ // Create a region for the base PyObject within the PyLongObject.
+ const region *ob_base_region = get_ob_base_region (
+ mgr, model, pylong_region, pylongobj_record, pyobj_svalue, cd);
+
+ // Initialize ob_refcnt field to 1.
+ init_ob_refcnt_field(mgr, model, ob_base_region, pyobj_record, cd);
+
+ // Get pointer svalue for PyLong_Type then assign it to ob_type field.
+ set_ob_type_field(mgr, model, ob_base_region, pyobj_record, pylongtype_vardecl, cd);
+
+ // Set the PyLongObject value.
+ tree ob_digit_field = get_field_by_name (pylongobj_record, "ob_digit");
+ const region *ob_digit_region
+ = mgr->get_field_region (pylong_region, ob_digit_field);
+ const svalue *ob_digit_sval = cd.get_arg_svalue (0);
+ model->set_value (ob_digit_region, ob_digit_sval, cd.get_ctxt ());
+
+ if (cd.get_lhs_type ())
+ {
+ const svalue *ptr_sval
+ = mgr->get_ptr_svalue (cd.get_lhs_type (), pylong_region);
+ cd.maybe_set_lhs (ptr_sval);
+ }
+ return true;
+ }
+ };
+
+ if (cd.get_ctxt ())
+ {
+ cd.get_ctxt ()->bifurcate (make_unique<pyobj_init_fail> (cd));
+ cd.get_ctxt ()->bifurcate (make_unique<success> (cd));
+ cd.get_ctxt ()->terminate_path ();
+ }
+}
+
static void
maybe_stash_named_type (logger *logger, const translation_unit &tu,
const char *name)
@@ -179,6 +878,12 @@ init_py_structs ()
pylongobj_record = get_stashed_type_by_name ("PyLongObject");
pylongtype_vardecl = get_stashed_global_var_by_name ("PyLong_Type");
pylisttype_vardecl = get_stashed_global_var_by_name ("PyList_Type");
+
+ if (pyobj_record)
+ {
+ pyobj_ptr_tree = build_pointer_type (pyobj_record);
+ pyobj_ptr_ptr = build_pointer_type (pyobj_ptr_tree);
+ }
}
void
@@ -205,6 +910,12 @@ cpython_analyzer_init_cb (void *gcc_data, void * /*user_data */)
sorry_no_cpython_plugin ();
return;
}
+
+ iface->register_known_function ("PyList_Append",
+ make_unique<kf_PyList_Append> ());
+ iface->register_known_function ("PyList_New", make_unique<kf_PyList_New> ());
+ iface->register_known_function ("PyLong_FromLong",
+ make_unique<kf_PyLong_FromLong> ());
}
} // namespace ana
new file mode 100644
@@ -0,0 +1,78 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target analyzer } */
+/* { dg-options "-fanalyzer" } */
+/* { dg-require-python-h "" } */
+
+
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+#include "../analyzer/analyzer-decls.h"
+
+PyObject *
+test_PyList_New (Py_ssize_t len)
+{
+ PyObject *obj = PyList_New (len);
+ if (obj)
+ {
+ __analyzer_eval (obj->ob_refcnt == 1); /* { dg-warning "TRUE" } */
+ __analyzer_eval (PyList_CheckExact (obj)); /* { dg-warning "TRUE" } */
+ }
+ else
+ __analyzer_dump_path (); /* { dg-message "path" } */
+ return obj;
+}
+
+PyObject *
+test_PyLong_New (long n)
+{
+ PyObject *obj = PyLong_FromLong (n);
+ if (obj)
+ {
+ __analyzer_eval (obj->ob_refcnt == 1); /* { dg-warning "TRUE" } */
+ __analyzer_eval (PyLong_CheckExact (obj)); /* { dg-warning "TRUE" } */
+ }
+ else
+ __analyzer_dump_path (); /* { dg-message "path" } */
+ return obj;
+}
+
+PyObject *
+test_PyListAppend (long n)
+{
+ PyObject *item = PyLong_FromLong (n);
+ PyObject *list = PyList_New (0);
+ PyList_Append(list, item);
+ return list; /* { dg-warning "leak of 'item'" } */
+}
+
+PyObject *
+test_PyListAppend_2 (long n)
+{
+ PyObject *item = PyLong_FromLong (n);
+ if (!item)
+ return NULL;
+
+ __analyzer_eval (item->ob_refcnt == 1); /* { dg-warning "TRUE" } */
+ PyObject *list = PyList_New (n);
+ if (!list)
+ {
+ Py_DECREF(item);
+ return NULL;
+ }
+
+ __analyzer_eval (list->ob_refcnt == 1); /* { dg-warning "TRUE" } */
+
+ if (PyList_Append (list, item) < 0)
+ __analyzer_eval (item->ob_refcnt == 1); /* { dg-warning "TRUE" } */
+ else
+ __analyzer_eval (item->ob_refcnt == 2); /* { dg-warning "TRUE" } */
+ return list; /* { dg-warning "leak of 'item'" } */
+}
+
+
+PyObject *
+test_PyListAppend_3 (PyObject *item, PyObject *list)
+{
+ PyList_Append (list, item);
+ return list;
+}
\ No newline at end of file
@@ -161,7 +161,8 @@ set plugin_test_list [list \
taint-CVE-2011-0521-6.c \
taint-antipatterns-1.c } \
{ analyzer_cpython_plugin.c \
- cpython-plugin-test-1.c } \
+ cpython-plugin-test-1.c \
+ cpython-plugin-test-2.c } \
]
foreach plugin_test $plugin_test_list {
@@ -12559,3 +12559,28 @@ proc check_effective_target_const_volatile_readonly_section { } {
}
return 1
}
+
+# Appends necessary Python flags to extra-tool-flags if Python.h is supported.
+# Otherwise, modifies dg-do-what.
+proc dg-require-python-h { args } {
+ upvar dg-extra-tool-flags extra-tool-flags
+
+ verbose "ENTER dg-require-python-h" 2
+
+ set result [remote_exec host "python3-config --includes"]
+ set status [lindex $result 0]
+ if { $status == 0 } {
+ set python_flags [lindex $result 1]
+ } else {
+ verbose "Python.h not supported" 2
+ upvar dg-do-what dg-do-what
+ set dg-do-what [list [lindex ${dg-do-what} 0] "N" "P"]
+ return
+ }
+
+ verbose "Python flags are: $python_flags" 2
+
+ verbose "Before appending, extra-tool-flags: ${extra-tool-flags}" 3
+ eval lappend extra-tool-flags $python_flags
+ verbose "After appending, extra-tool-flags: ${extra-tool-flags}" 3
+}