[COMMITTED] Convert nonzero mask in irange to wide_int.

Message ID 20221004073530.1461390-1-aldyh@redhat.com
State New, archived
Headers
Series [COMMITTED] Convert nonzero mask in irange to wide_int. |

Commit Message

Aldy Hernandez Oct. 4, 2022, 7:35 a.m. UTC
  The reason the nonzero mask was kept in a tree was basically inertia,
as everything in irange is a tree.  However, there's no need to keep
it in a tree, as the conversions to and from wide ints are very
annoying.  That, plus special casing NULL masks to be -1 is prone
to error.

I have not only rewritten all the uses to assume a wide int, but
have corrected a few places where we weren't propagating the masks, or
rather pessimizing them to -1.  This will become more important in
upcoming patches where we make better use of the masks.

Performance testing shows a trivial improvement in VRP, as things like
irange::contains_p() are tied to a tree.  Ughh, can't wait for trees in
iranges to go away.

gcc/ChangeLog:

	* value-range-storage.cc (irange_storage_slot::set_irange): Remove
	special case.
	* value-range.cc (irange::irange_set): Adjust for nonzero mask
	being a wide int.
	(irange::irange_set_anti_range): Same.
	(irange::set): Same.
	(irange::verify_range): Same.
	(irange::legacy_equal_p): Same.
	(irange::operator==): Same.
	(irange::contains_p): Same.
	(irange::legacy_intersect): Same.
	(irange::legacy_union): Same.
	(irange::irange_single_pair_union): Call union_nonzero_bits.
	(irange::irange_union): Same.
	(irange::irange_intersect): Call intersect_nonzero_bits.
	(irange::intersect): Adjust for nonzero mask being a wide int.
	(irange::invert): Same.
	(irange::set_nonzero_bits): Same.
	(irange::get_nonzero_bits_from_range): New.
	(irange::set_range_from_nonzero_bits): New.
	(irange::get_nonzero_bits): Adjust for nonzero mask being a wide
	int.
	(irange::intersect_nonzero_bits): Same.
	(irange::union_nonzero_bits): Same.
	(range_tests_nonzero_bits): Remove test.
	* value-range.h (irange::varying_compatible_p): Adjust for nonzero
	mask being a wide int.
	(gt_ggc_mx): Same.
	(gt_pch_nx): Same.
	(irange::set_undefined): Same.
	(irange::set_varying): Same.
	(irange::normalize_kind): Same.
---
 gcc/value-range-storage.cc |   6 +-
 gcc/value-range.cc         | 270 ++++++++++++++++---------------------
 gcc/value-range.h          |  25 ++--
 3 files changed, 130 insertions(+), 171 deletions(-)
  

Comments

Richard Biener Oct. 4, 2022, 7:55 a.m. UTC | #1
Am 04.10.2022 um 09:36 schrieb Aldy Hernandez via Gcc-patches <gcc-patches@gcc.gnu.org>:
> 
> The reason the nonzero mask was kept in a tree was basically inertia,
> as everything in irange is a tree.  However, there's no need to keep
> it in a tree, as the conversions to and from wide ints are very
> annoying.  That, plus special casing NULL masks to be -1 is prone
> to error.
> 
> I have not only rewritten all the uses to assume a wide int, but
> have corrected a few places where we weren't propagating the masks, or
> rather pessimizing them to -1.  This will become more important in
> upcoming patches where we make better use of the masks.
> 
> Performance testing shows a trivial improvement in VRP, as things like
> irange::contains_p() are tied to a tree.  Ughh, can't wait for trees in
> iranges to go away.

You want trailing wide int storage though.  A wide_int is quite large.

> gcc/ChangeLog:
> 
>    * value-range-storage.cc (irange_storage_slot::set_irange): Remove
>    special case.
>    * value-range.cc (irange::irange_set): Adjust for nonzero mask
>    being a wide int.
>    (irange::irange_set_anti_range): Same.
>    (irange::set): Same.
>    (irange::verify_range): Same.
>    (irange::legacy_equal_p): Same.
>    (irange::operator==): Same.
>    (irange::contains_p): Same.
>    (irange::legacy_intersect): Same.
>    (irange::legacy_union): Same.
>    (irange::irange_single_pair_union): Call union_nonzero_bits.
>    (irange::irange_union): Same.
>    (irange::irange_intersect): Call intersect_nonzero_bits.
>    (irange::intersect): Adjust for nonzero mask being a wide int.
>    (irange::invert): Same.
>    (irange::set_nonzero_bits): Same.
>    (irange::get_nonzero_bits_from_range): New.
>    (irange::set_range_from_nonzero_bits): New.
>    (irange::get_nonzero_bits): Adjust for nonzero mask being a wide
>    int.
>    (irange::intersect_nonzero_bits): Same.
>    (irange::union_nonzero_bits): Same.
>    (range_tests_nonzero_bits): Remove test.
>    * value-range.h (irange::varying_compatible_p): Adjust for nonzero
>    mask being a wide int.
>    (gt_ggc_mx): Same.
>    (gt_pch_nx): Same.
>    (irange::set_undefined): Same.
>    (irange::set_varying): Same.
>    (irange::normalize_kind): Same.
> ---
> gcc/value-range-storage.cc |   6 +-
> gcc/value-range.cc         | 270 ++++++++++++++++---------------------
> gcc/value-range.h          |  25 ++--
> 3 files changed, 130 insertions(+), 171 deletions(-)
> 
> diff --git a/gcc/value-range-storage.cc b/gcc/value-range-storage.cc
> index de7575ed48d..6e054622830 100644
> --- a/gcc/value-range-storage.cc
> +++ b/gcc/value-range-storage.cc
> @@ -150,11 +150,7 @@ irange_storage_slot::set_irange (const irange &r)
> {
>   gcc_checking_assert (fits_p (r));
> 
> -  // Avoid calling unsupported get_nonzero_bits on legacy.
> -  if (r.legacy_mode_p ())
> -    m_ints[0] = -1;
> -  else
> -    m_ints[0] = r.get_nonzero_bits ();
> +  m_ints[0] = r.get_nonzero_bits ();
> 
>   unsigned pairs = r.num_pairs ();
>   for (unsigned i = 0; i < pairs; ++i)
> diff --git a/gcc/value-range.cc b/gcc/value-range.cc
> index 6e196574de9..afb26a40083 100644
> --- a/gcc/value-range.cc
> +++ b/gcc/value-range.cc
> @@ -940,7 +940,7 @@ irange::irange_set (tree min, tree max)
>   m_base[1] = max;
>   m_num_ranges = 1;
>   m_kind = VR_RANGE;
> -  m_nonzero_mask = NULL;
> +  m_nonzero_mask = wi::shwi (-1, TYPE_PRECISION (TREE_TYPE (min)));
>   normalize_kind ();
> 
>   if (flag_checking)
> @@ -1014,7 +1014,7 @@ irange::irange_set_anti_range (tree min, tree max)
>     }
> 
>   m_kind = VR_RANGE;
> -  m_nonzero_mask = NULL;
> +  m_nonzero_mask = wi::shwi (-1, TYPE_PRECISION (TREE_TYPE (min)));
>   normalize_kind ();
> 
>   if (flag_checking)
> @@ -1071,7 +1071,7 @@ irange::set (tree min, tree max, value_range_kind kind)
>       m_base[0] = min;
>       m_base[1] = max;
>       m_num_ranges = 1;
> -      m_nonzero_mask = NULL;
> +      m_nonzero_mask = wi::shwi (-1, TYPE_PRECISION (TREE_TYPE (min)));
>       return;
>     }
> 
> @@ -1121,7 +1121,7 @@ irange::set (tree min, tree max, value_range_kind kind)
>   m_base[0] = min;
>   m_base[1] = max;
>   m_num_ranges = 1;
> -  m_nonzero_mask = NULL;
> +  m_nonzero_mask = wi::shwi (-1, TYPE_PRECISION (TREE_TYPE (min)));
>   normalize_kind ();
>   if (flag_checking)
>     verify_range ();
> @@ -1136,14 +1136,11 @@ irange::verify_range ()
>   if (m_kind == VR_UNDEFINED)
>     {
>       gcc_checking_assert (m_num_ranges == 0);
> -      gcc_checking_assert (!m_nonzero_mask);
>       return;
>     }
> -  if (m_nonzero_mask)
> -    gcc_checking_assert (wi::to_wide (m_nonzero_mask) != -1);
>   if (m_kind == VR_VARYING)
>     {
> -      gcc_checking_assert (!m_nonzero_mask);
> +      gcc_checking_assert (m_nonzero_mask == -1);
>       gcc_checking_assert (m_num_ranges == 1);
>       gcc_checking_assert (varying_compatible_p ());
>       return;
> @@ -1238,7 +1235,7 @@ irange::legacy_equal_p (const irange &other) const
>                   other.tree_lower_bound (0))
>      && vrp_operand_equal_p (tree_upper_bound (0),
>                  other.tree_upper_bound (0))
> -      && vrp_operand_equal_p (m_nonzero_mask, other.m_nonzero_mask));
> +      && get_nonzero_bits () == other.get_nonzero_bits ());
> }
> 
> bool
> @@ -1273,7 +1270,7 @@ irange::operator== (const irange &other) const
>      || !operand_equal_p (ub, ub_other, 0))
>    return false;
>     }
> -  return vrp_operand_equal_p (m_nonzero_mask, other.m_nonzero_mask);
> +  return get_nonzero_bits () == other.get_nonzero_bits ();
> }
> 
> /* Return TRUE if this is a symbolic range.  */
> @@ -1416,10 +1413,11 @@ irange::contains_p (tree cst) const
> 
>   gcc_checking_assert (TREE_CODE (cst) == INTEGER_CST);
> 
> -  if (m_nonzero_mask)
> +  // See if we can exclude CST based on the nonzero bits.
> +  if (m_nonzero_mask != -1)
>     {
>       wide_int cstw = wi::to_wide (cst);
> -      if (cstw != 0 && wi::bit_and (wi::to_wide (m_nonzero_mask), cstw) == 0)
> +      if (cstw != 0 && wi::bit_and (m_nonzero_mask, cstw) == 0)
>    return false;
>     }
> 
> @@ -1878,9 +1876,6 @@ irange::legacy_intersect (irange *vr0, const irange *vr1)
>   intersect_ranges (&vr0kind, &vr0min, &vr0max,
>            vr1->kind (), vr1->min (), vr1->max ());
> 
> -  // Pessimize nonzero masks, as we don't support them.
> -  m_nonzero_mask = NULL;
> -
>   /* Make sure to canonicalize the result though as the inversion of a
>      VR_RANGE can still be a VR_RANGE.  */
>   if (vr0kind == VR_UNDEFINED)
> @@ -2202,9 +2197,6 @@ irange::legacy_union (irange *vr0, const irange *vr1)
>   union_ranges (&vr0kind, &vr0min, &vr0max,
>        vr1->kind (), vr1->min (), vr1->max ());
> 
> -  // Pessimize nonzero masks, as we don't support them.
> -  m_nonzero_mask = NULL;
> -
>   if (vr0kind == VR_UNDEFINED)
>     vr0->set_undefined ();
>   else if (vr0kind == VR_VARYING)
> @@ -2310,8 +2302,6 @@ irange::legacy_verbose_intersect (const irange *other)
> 
> // Perform an efficient union with R when both ranges have only a single pair.
> // Excluded are VARYING and UNDEFINED ranges.
> -//
> -// NOTE: It is the caller's responsibility to set the nonzero mask.
> 
> bool
> irange::irange_single_pair_union (const irange &r)
> @@ -2325,7 +2315,7 @@ irange::irange_single_pair_union (const irange &r)
>     {
>       // If current upper bound is new upper bound, we're done.
>       if (wi::le_p (wi::to_wide (r.m_base[1]), wi::to_wide (m_base[1]), sign))
> -    return false;
> +    return union_nonzero_bits (r);
>       // Otherwise R has the new upper bound.
>       // Check for overlap/touching ranges, or single target range.
>       if (m_max_ranges == 1
> @@ -2338,8 +2328,7 @@ irange::irange_single_pair_union (const irange &r)
>      m_base[3] = r.m_base[1];
>      m_num_ranges = 2;
>    }
> -      if (varying_compatible_p ())
> -    m_kind = VR_VARYING;
> +      union_nonzero_bits (r);
>       return true;
>     }
> 
> @@ -2353,11 +2342,7 @@ irange::irange_single_pair_union (const irange &r)
>   // Check for overlapping ranges, or target limited to a single range.
>   else if (m_max_ranges == 1
>       || wi::to_widest (r.m_base[1]) + 1 >= wi::to_widest (lb))
> -    {
> -      // This has the new upper bound, just check for varying.
> -      if (varying_compatible_p ())
> -      m_kind = VR_VARYING;
> -    }
> +    ;
>   else
>     {
>       // Left with 2 pairs.
> @@ -2366,8 +2351,7 @@ irange::irange_single_pair_union (const irange &r)
>       m_base[3] = m_base[1];
>       m_base[1] = r.m_base[1];
>     }
> -  if (varying_compatible_p ())
> -    m_kind = VR_VARYING;
> +  union_nonzero_bits (r);
>   return true;
> }
> 
> @@ -2398,27 +2382,13 @@ irange::irange_union (const irange &r)
>       return true;
>     }
> 
> -  // Save the nonzero mask in case the set operations below clobber it.
> -  bool ret_nz = union_nonzero_bits (r);
> -  tree saved_nz = m_nonzero_mask;
> -
> -  // The union_nonzero_bits may have turned things into a varying.
> -  if (varying_p ())
> -    return true;
> -
>   // Special case one range union one range.
>   if (m_num_ranges == 1 && r.m_num_ranges == 1)
> -    {
> -      bool ret = irange_single_pair_union (r);
> -      set_nonzero_bits (saved_nz);
> -      if (flag_checking)
> -    verify_range ();
> -      return ret || ret_nz;
> -    }
> +    return irange_single_pair_union (r);
> 
>   // If this ranges fully contains R, then we need do nothing.
>   if (irange_contains_p (r))
> -    return ret_nz;
> +    return union_nonzero_bits (r);
> 
>   // Do not worry about merging and such by reserving twice as many
>   // pairs as needed, and then simply sort the 2 ranges into this
> @@ -2506,11 +2476,7 @@ irange::irange_union (const irange &r)
>   m_num_ranges = i / 2;
> 
>   m_kind = VR_RANGE;
> -  m_nonzero_mask = saved_nz;
> -  normalize_kind ();
> -
> -  if (flag_checking)
> -    verify_range ();
> +  union_nonzero_bits (r);
>   return true;
> }
> 
> @@ -2576,21 +2542,11 @@ irange::irange_intersect (const irange &r)
>       set_undefined ();
>       return true;
>     }
> -
> -  // Save the nonzero mask in case the set operations below clobber it.
> -  bool ret_nz = intersect_nonzero_bits (r);
> -  tree saved_nz = m_nonzero_mask;
> -
>   if (r.varying_p ())
> -    return ret_nz;
> -
> +    return false;
>   if (varying_p ())
>     {
>       operator= (r);
> -      if (saved_nz)
> -    set_nonzero_bits (saved_nz);
> -      if (flag_checking)
> -    verify_range ();
>       return true;
>     }
> 
> @@ -2600,13 +2556,13 @@ irange::irange_intersect (const irange &r)
>       if (undefined_p ())
>    return true;
> 
> -      set_nonzero_bits (saved_nz);
> -      return res || saved_nz;
> +      res |= intersect_nonzero_bits (r);
> +      return res;
>     }
> 
>   // If R fully contains this, then intersection will change nothing.
>   if (r.irange_contains_p (*this))
> -    return ret_nz;
> +    return intersect_nonzero_bits (r);
> 
>   signop sign = TYPE_SIGN (TREE_TYPE(m_base[0]));
>   unsigned bld_pair = 0;
> @@ -2675,15 +2631,14 @@ irange::irange_intersect (const irange &r)
>   // At the exit of this loop, it is one of 2 things:
>   // ran out of r1, or r2, but either means we are done.
>   m_num_ranges = bld_pair;
> +  if (m_num_ranges == 0)
> +    {
> +      set_undefined ();
> +      return true;
> +    }
> 
>   m_kind = VR_RANGE;
> -  if (!undefined_p ())
> -    m_nonzero_mask = saved_nz;
> -  normalize_kind ();
> -
> -  if (flag_checking)
> -    verify_range ();
> -
> +  intersect_nonzero_bits (r);
>   return true;
> }
> 
> @@ -2749,10 +2704,15 @@ irange::intersect (const wide_int& lb, const wide_int& ub)
>     }
> 
>   m_num_ranges = bld_index;
> +  if (m_num_ranges == 0)
> +    {
> +      set_undefined ();
> +      return true;
> +    }
> 
>   m_kind = VR_RANGE;
> -  normalize_kind ();
> -
> +  // No need to call normalize_kind(), as the caller will do this
> +  // while intersecting the nonzero mask.
>   if (flag_checking)
>     verify_range ();
>   return true;
> @@ -2801,7 +2761,6 @@ irange::invert ()
>     }
> 
>   gcc_checking_assert (!undefined_p () && !varying_p ());
> -  m_nonzero_mask = NULL;
> 
>   // We always need one more set of bounds to represent an inverse, so
>   // if we're at the limit, we can't properly represent things.
> @@ -2822,6 +2781,7 @@ irange::invert ()
>   signop sign = TYPE_SIGN (ttype);
>   wide_int type_min = wi::min_value (prec, sign);
>   wide_int type_max = wi::max_value (prec, sign);
> +  m_nonzero_mask = wi::shwi (-1, prec);
>   if (m_num_ranges == m_max_ranges
>       && lower_bound () != type_min
>       && upper_bound () != type_max)
> @@ -2896,127 +2856,140 @@ irange::invert ()
>     verify_range ();
> }
> 
> -void
> -irange::set_nonzero_bits (tree mask)
> +// Return the nonzero bits inherent in the range.
> +
> +wide_int
> +irange::get_nonzero_bits_from_range () const
> {
> -  gcc_checking_assert (!undefined_p ());
> +  // For legacy symbolics.
> +  if (!constant_p ())
> +    return wi::shwi (-1, TYPE_PRECISION (type ()));
> 
> -  if (!mask)
> +  wide_int min = lower_bound ();
> +  wide_int max = upper_bound ();
> +  wide_int xorv = min ^ max;
> +  if (xorv != 0)
>     {
> -      if (m_nonzero_mask)
> -    {
> -      m_nonzero_mask = NULL;
> -      // Clearing the mask may have turned a range into VARYING.
> -      normalize_kind ();
> -    }
> -      return;
> +      unsigned prec = TYPE_PRECISION (type ());
> +      xorv = wi::mask (prec - wi::clz (xorv), false, prec);
>     }
> -  m_nonzero_mask = mask;
> -  // Setting the mask may have turned a VARYING into a range.
> -  if (m_kind == VR_VARYING)
> -    m_kind = VR_RANGE;
> -
> -  if (flag_checking)
> -    verify_range ();
> +  return min | xorv;
> }
> 
> -void
> -irange::set_nonzero_bits (const wide_int_ref &bits)
> +// If the the nonzero mask can be trivially converted to a range, do
> +// so and return TRUE.
> +
> +bool
> +irange::set_range_from_nonzero_bits ()
> {
>   gcc_checking_assert (!undefined_p ());
> +  unsigned popcount = wi::popcount (m_nonzero_mask);
> 
> -  if (bits == -1)
> -    {
> -      set_nonzero_bits (NULL);
> -      return;
> -    }
>   // If we have only one bit set in the mask, we can figure out the
>   // range immediately.
> -  if (wi::popcount (bits) == 1)
> +  if (popcount == 1)
>     {
>       // Make sure we don't pessimize the range.
> -      tree tbits = wide_int_to_tree (type (), bits);
> -      if (!contains_p (tbits))
> -    {
> -      set_nonzero_bits (tbits);
> -      return;
> -    }
> +      if (!contains_p (wide_int_to_tree (type (), m_nonzero_mask)))
> +    return false;
> 
>       bool has_zero = contains_p (build_zero_cst (type ()));
> +      wide_int bits = m_nonzero_mask;
>       set (type (), bits, bits);
> +      m_nonzero_mask = bits;
>       if (has_zero)
>    {
>      int_range<2> zero;
>      zero.set_zero (type ());
>      union_ (zero);
>    }
> +      return true;
>     }
> -  set_nonzero_bits (wide_int_to_tree (type (), bits));
> +  return false;
> }
> 
> -wide_int
> -irange::get_nonzero_bits () const
> +void
> +irange::set_nonzero_bits (const wide_int_ref &bits)
> {
>   gcc_checking_assert (!undefined_p ());
> +  unsigned prec = TYPE_PRECISION (type ());
> +  gcc_checking_assert (prec == bits.get_precision ());
> 
> -  // In case anyone in the legacy world queries us.
> -  if (!constant_p ())
> -    {
> -      if (m_nonzero_mask)
> -    return wi::to_wide (m_nonzero_mask);
> -      return wi::shwi (-1, TYPE_PRECISION (type ()));
> -    }
> +  // Drop VARYINGs with a nonzero mask to a plain range.
> +  if (m_kind == VR_VARYING && bits != -1)
> +    m_kind = VR_RANGE;
> 
> -  // Calculate the nonzero bits inherent in the range.
> -  wide_int min = lower_bound ();
> -  wide_int max = upper_bound ();
> -  wide_int xorv = min ^ max;
> -  if (xorv != 0)
> -    {
> -      unsigned prec = TYPE_PRECISION (type ());
> -      xorv = wi::mask (prec - wi::clz (xorv), false, prec);
> -    }
> -  wide_int mask = min | xorv;
> +  m_nonzero_mask = wide_int::from (bits, prec, TYPE_SIGN (type ()));
> +  if (set_range_from_nonzero_bits ())
> +    return;
> 
> -  // Return the nonzero bits augmented by the range.
> -  if (m_nonzero_mask)
> -    return mask & wi::to_wide (m_nonzero_mask);
> +  normalize_kind ();
> +  if (flag_checking)
> +    verify_range ();
> +}
> 
> -  return mask;
> +// Return the nonzero bitmask.  This will return the nonzero bits plus
> +// the nonzero bits inherent in the range.
> +
> +wide_int
> +irange::get_nonzero_bits () const
> +{
> +  gcc_checking_assert (!undefined_p ());
> +  // The nonzero mask inherent in the range is calculated on-demand.
> +  // For example, [0,255] does not have a 0xff nonzero mask by default
> +  // (unless manually set).  This saves us considerable time, because
> +  // setting it at creation incurs a large penalty for irange::set.
> +  // At the time of writing there was a 5% slowdown in VRP if we kept
> +  // the mask precisely up to date at all times.  Instead, we default
> +  // to -1 and set it when explicitly requested.  However, this
> +  // function will always return the correct mask.
> +  return m_nonzero_mask & get_nonzero_bits_from_range ();
> }
> 
> -// Intersect the nonzero bits in R into THIS.
> +// Intersect the nonzero bits in R into THIS and normalize the range.
> +// Return TRUE if the intersection changed anything.
> 
> bool
> irange::intersect_nonzero_bits (const irange &r)
> {
>   gcc_checking_assert (!undefined_p () && !r.undefined_p ());
> 
> -  if (m_nonzero_mask || r.m_nonzero_mask)
> +  bool changed = false;
> +  if (m_nonzero_mask != r.m_nonzero_mask)
>     {
> -      wide_int nz = wi::bit_and (get_nonzero_bits (),
> -                 r.get_nonzero_bits ());
> -      set_nonzero_bits (nz);
> -      return true;
> +      m_nonzero_mask = get_nonzero_bits () & r.get_nonzero_bits ();
> +      if (set_range_from_nonzero_bits ())
> +    return true;
> +      changed = true;
>     }
> -  return false;
> +  normalize_kind ();
> +  if (flag_checking)
> +    verify_range ();
> +  return changed;
> }
> 
> -// Union the nonzero bits in R into THIS.
> +// Union the nonzero bits in R into THIS and normalize the range.
> +// Return TRUE if the union changed anything.
> 
> bool
> irange::union_nonzero_bits (const irange &r)
> {
>   gcc_checking_assert (!undefined_p () && !r.undefined_p ());
> 
> -  if (m_nonzero_mask || r.m_nonzero_mask)
> +  bool changed = false;
> +  if (m_nonzero_mask != r.m_nonzero_mask)
>     {
> -      wide_int nz = wi::bit_or (get_nonzero_bits (),
> -                r.get_nonzero_bits ());
> -      set_nonzero_bits (nz);
> -      return true;
> +      m_nonzero_mask = get_nonzero_bits () | r.get_nonzero_bits ();
> +      // No need to call set_range_from_nonzero_bits, because we'll
> +      // never narrow the range.  Besides, it would cause endless
> +      // recursion because of the union_ in
> +      // set_range_from_nonzero_bits.
> +      changed = true;
>     }
> -  return false;
> +  normalize_kind ();
> +  if (flag_checking)
> +    verify_range ();
> +  return changed;
> }
> 
> void
> @@ -3605,13 +3578,6 @@ range_tests_nonzero_bits ()
>   r0.union_ (r1);
>   ASSERT_TRUE (r0.get_nonzero_bits () == 0xff);
> 
> -  // Union where the mask of nonzero bits is implicit from the range.
> -  r0.set_varying (integer_type_node);
> -  r0.set_nonzero_bits (0xf00);
> -  r1.set_zero (integer_type_node); // nonzero mask is implicitly 0
> -  r0.union_ (r1);
> -  ASSERT_TRUE (r0.get_nonzero_bits () == 0xf00);
> -
>   // Intersect of nonzero bits.
>   r0.set (INT (0), INT (255));
>   r0.set_nonzero_bits (0xfe);
> diff --git a/gcc/value-range.h b/gcc/value-range.h
> index 556e31aece1..d1663620444 100644
> --- a/gcc/value-range.h
> +++ b/gcc/value-range.h
> @@ -207,14 +207,15 @@ private:
> 
>   void irange_set_1bit_anti_range (tree, tree);
>   bool varying_compatible_p () const;
> -  void set_nonzero_bits (tree mask);
>   bool intersect_nonzero_bits (const irange &r);
>   bool union_nonzero_bits (const irange &r);
> +  wide_int get_nonzero_bits_from_range () const;
> +  bool set_range_from_nonzero_bits ();
> 
>   bool intersect (const wide_int& lb, const wide_int& ub);
>   unsigned char m_num_ranges;
>   unsigned char m_max_ranges;
> -  tree m_nonzero_mask;
> +  wide_int m_nonzero_mask;
>   tree *m_base;
> };
> 
> @@ -682,10 +683,11 @@ irange::varying_compatible_p () const
>   if (INTEGRAL_TYPE_P (t))
>     return (wi::to_wide (l) == wi::min_value (prec, sign)
>        && wi::to_wide (u) == wi::max_value (prec, sign)
> -        && !m_nonzero_mask);
> +        && m_nonzero_mask == -1);
>   if (POINTER_TYPE_P (t))
>     return (wi::to_wide (l) == 0
> -        && wi::to_wide (u) == wi::max_value (prec, sign));
> +        && wi::to_wide (u) == wi::max_value (prec, sign)
> +        && m_nonzero_mask == -1);
>   return true;
> }
> 
> @@ -752,8 +754,6 @@ gt_ggc_mx (irange *x)
>       gt_ggc_mx (x->m_base[i * 2]);
>       gt_ggc_mx (x->m_base[i * 2 + 1]);
>     }
> -  if (x->m_nonzero_mask)
> -    gt_ggc_mx (x->m_nonzero_mask);
> }
> 
> inline void
> @@ -764,8 +764,6 @@ gt_pch_nx (irange *x)
>       gt_pch_nx (x->m_base[i * 2]);
>       gt_pch_nx (x->m_base[i * 2 + 1]);
>     }
> -  if (x->m_nonzero_mask)
> -    gt_pch_nx (x->m_nonzero_mask);
> }
> 
> inline void
> @@ -776,8 +774,6 @@ gt_pch_nx (irange *x, gt_pointer_operator op, void *cookie)
>       op (&x->m_base[i * 2], NULL, cookie);
>       op (&x->m_base[i * 2 + 1], NULL, cookie);
>     }
> -  if (x->m_nonzero_mask)
> -    op (&x->m_nonzero_mask, NULL, cookie);
> }
> 
> template<unsigned N>
> @@ -872,7 +868,6 @@ irange::set_undefined ()
> {
>   m_kind = VR_UNDEFINED;
>   m_num_ranges = 0;
> -  m_nonzero_mask = NULL;
> }
> 
> inline void
> @@ -880,7 +875,11 @@ irange::set_varying (tree type)
> {
>   m_kind = VR_VARYING;
>   m_num_ranges = 1;
> -  m_nonzero_mask = NULL;
> +
> +  if (type == error_mark_node)
> +    m_nonzero_mask = wi::shwi (-1, 1);
> +  else
> +    m_nonzero_mask = wi::shwi (-1, TYPE_PRECISION (type));
> 
>   if (INTEGRAL_TYPE_P (type))
>     {
> @@ -1002,8 +1001,6 @@ irange::normalize_kind ()
>    m_kind = VR_VARYING;
>       else if (m_kind == VR_ANTI_RANGE)
>    set_undefined ();
> -      else
> -    gcc_unreachable ();
>     }
> }
> 
> -- 
> 2.37.1
>
  
Aldy Hernandez Oct. 4, 2022, 11:28 a.m. UTC | #2
On Tue, Oct 4, 2022 at 9:55 AM Richard Biener
<richard.guenther@gmail.com> wrote:
>
>
> Am 04.10.2022 um 09:36 schrieb Aldy Hernandez via Gcc-patches <gcc-patches@gcc.gnu.org>:
> >
> > The reason the nonzero mask was kept in a tree was basically inertia,
> > as everything in irange is a tree.  However, there's no need to keep
> > it in a tree, as the conversions to and from wide ints are very
> > annoying.  That, plus special casing NULL masks to be -1 is prone
> > to error.
> >
> > I have not only rewritten all the uses to assume a wide int, but
> > have corrected a few places where we weren't propagating the masks, or
> > rather pessimizing them to -1.  This will become more important in
> > upcoming patches where we make better use of the masks.
> >
> > Performance testing shows a trivial improvement in VRP, as things like
> > irange::contains_p() are tied to a tree.  Ughh, can't wait for trees in
> > iranges to go away.
>
> You want trailing wide int storage though.  A wide_int is quite large.

Absolutely, this is only for short term storage.  Any time we need
long term storage, say global ranges in SSA_NAME_RANGE_INFO, we go
through vrange_storage which will stream things in a more memory
efficient manner.  For irange, vrange_storage will stream all the
sub-ranges, including the nonzero bitmask which is the first entry in
such storage, as trailing_wide_ints.

See irange_storage_slot to see how it lives in GC memory.

Aldy
  
Aldy Hernandez Oct. 4, 2022, 12:13 p.m. UTC | #3
On Tue, Oct 4, 2022, 13:28 Aldy Hernandez <aldyh@redhat.com> wrote:

> On Tue, Oct 4, 2022 at 9:55 AM Richard Biener
> <richard.guenther@gmail.com> wrote:
> >
> >
> > Am 04.10.2022 um 09:36 schrieb Aldy Hernandez via Gcc-patches <
> gcc-patches@gcc.gnu.org>:
> > >
> > > The reason the nonzero mask was kept in a tree was basically inertia,
> > > as everything in irange is a tree.  However, there's no need to keep
> > > it in a tree, as the conversions to and from wide ints are very
> > > annoying.  That, plus special casing NULL masks to be -1 is prone
> > > to error.
> > >
> > > I have not only rewritten all the uses to assume a wide int, but
> > > have corrected a few places where we weren't propagating the masks, or
> > > rather pessimizing them to -1.  This will become more important in
> > > upcoming patches where we make better use of the masks.
> > >
> > > Performance testing shows a trivial improvement in VRP, as things like
> > > irange::contains_p() are tied to a tree.  Ughh, can't wait for trees in
> > > iranges to go away.
> >
> > You want trailing wide int storage though.  A wide_int is quite large.
>
> Absolutely, this is only for short term storage.  Any time we need
> long term storage, say global ranges in SSA_NAME_RANGE_INFO, we go
> through vrange_storage which will stream things in a more memory
> efficient manner.  For irange, vrange_storage will stream all the
> sub-ranges, including the nonzero bitmask which is the first entry in
> such storage, as trailing_wide_ints.
>
> See irange_storage_slot to see how it lives in GC memory.
>

That being said, the ranger's internal cache uses iranges, albeit with a
squished down number of subranges (the minimum amount to represent the
range).  So each cache entry will now be bigger by the difference between
one tree and one wide int.

I wonder if we should change the cache to use vrange_storage. If not now,
then when we convert all the subranges to wide ints.

Of course, the memory pressure of the cache is not nearly as problematic as
SSA_NAME_RANGE_INFO. The cache only stores names it cares about.

Aldy
  
Andrew MacLeod Oct. 4, 2022, 1:27 p.m. UTC | #4
On 10/4/22 08:13, Aldy Hernandez via Gcc-patches wrote:
> On Tue, Oct 4, 2022, 13:28 Aldy Hernandez <aldyh@redhat.com> wrote:
>
>> On Tue, Oct 4, 2022 at 9:55 AM Richard Biener
>> <richard.guenther@gmail.com> wrote:
>>>
>>> Am 04.10.2022 um 09:36 schrieb Aldy Hernandez via Gcc-patches <
>> gcc-patches@gcc.gnu.org>:
>>>> The reason the nonzero mask was kept in a tree was basically inertia,
>>>> as everything in irange is a tree.  However, there's no need to keep
>>>> it in a tree, as the conversions to and from wide ints are very
>>>> annoying.  That, plus special casing NULL masks to be -1 is prone
>>>> to error.
>>>>
>>>> I have not only rewritten all the uses to assume a wide int, but
>>>> have corrected a few places where we weren't propagating the masks, or
>>>> rather pessimizing them to -1.  This will become more important in
>>>> upcoming patches where we make better use of the masks.
>>>>
>>>> Performance testing shows a trivial improvement in VRP, as things like
>>>> irange::contains_p() are tied to a tree.  Ughh, can't wait for trees in
>>>> iranges to go away.
>>> You want trailing wide int storage though.  A wide_int is quite large.
>> Absolutely, this is only for short term storage.  Any time we need
>> long term storage, say global ranges in SSA_NAME_RANGE_INFO, we go
>> through vrange_storage which will stream things in a more memory
>> efficient manner.  For irange, vrange_storage will stream all the
>> sub-ranges, including the nonzero bitmask which is the first entry in
>> such storage, as trailing_wide_ints.
>>
>> See irange_storage_slot to see how it lives in GC memory.
>>
> That being said, the ranger's internal cache uses iranges, albeit with a
> squished down number of subranges (the minimum amount to represent the
> range).  So each cache entry will now be bigger by the difference between
> one tree and one wide int.
>
> I wonder if we should change the cache to use vrange_storage. If not now,
> then when we convert all the subranges to wide ints.
>
> Of course, the memory pressure of the cache is not nearly as problematic as
> SSA_NAME_RANGE_INFO. The cache only stores names it cares about.

Rangers cache can be a memory bottleneck in pathological cases.. 
Certainly not as bad as it use to be, but I'm sure it can still be 
problematic.    Its suppose to be a memory efficient representation 
because of that.  The cache can have an entry for any live ssa-name 
(which means all of them at some point in the IL) multiplied by a factor 
involving the number of dominator blocks and outgoing edges ranges are 
calculated on.   So while SSA_NAME_RANGE_INFO is a linear thing, the 
cache lies somewhere between a logarithmic and exponential factor based 
on the CFG size.

if you are growing the common cases of 1 to 2 endpoints to more than 
double in size (and most of the time not be needed), that would not be 
very appealing :-P  If we have any wide-ints, they would need to be a 
memory efficient version.   The Cache uses an irange_allocator, which is 
suppose to provide a memory efficient objects.. hence why it trims the 
number of ranges down to only what is needed.  It seems like a trailing 
wide-Int might be in order based on that..

Andrew


PS. which will be more problematic if you eventually introduce a 
known_ones wide_int.    I thought the mask tracking was/could be 
something simple like  HOST_WIDE_INT..  then you only tracks masks in 
types up to the size of a HOST_WIDE_INT.  then storage and masking is 
all trivial without going thru a wide_int.    Is that not so/possible?




> Aldy
>
  
Aldy Hernandez Oct. 4, 2022, 2:30 p.m. UTC | #5
On Tue, Oct 4, 2022 at 3:27 PM Andrew MacLeod <amacleod@redhat.com> wrote:
>
>
> On 10/4/22 08:13, Aldy Hernandez via Gcc-patches wrote:
> > On Tue, Oct 4, 2022, 13:28 Aldy Hernandez <aldyh@redhat.com> wrote:
> >
> >> On Tue, Oct 4, 2022 at 9:55 AM Richard Biener
> >> <richard.guenther@gmail.com> wrote:
> >>>
> >>> Am 04.10.2022 um 09:36 schrieb Aldy Hernandez via Gcc-patches <
> >> gcc-patches@gcc.gnu.org>:
> >>>> The reason the nonzero mask was kept in a tree was basically inertia,
> >>>> as everything in irange is a tree.  However, there's no need to keep
> >>>> it in a tree, as the conversions to and from wide ints are very
> >>>> annoying.  That, plus special casing NULL masks to be -1 is prone
> >>>> to error.
> >>>>
> >>>> I have not only rewritten all the uses to assume a wide int, but
> >>>> have corrected a few places where we weren't propagating the masks, or
> >>>> rather pessimizing them to -1.  This will become more important in
> >>>> upcoming patches where we make better use of the masks.
> >>>>
> >>>> Performance testing shows a trivial improvement in VRP, as things like
> >>>> irange::contains_p() are tied to a tree.  Ughh, can't wait for trees in
> >>>> iranges to go away.
> >>> You want trailing wide int storage though.  A wide_int is quite large.
> >> Absolutely, this is only for short term storage.  Any time we need
> >> long term storage, say global ranges in SSA_NAME_RANGE_INFO, we go
> >> through vrange_storage which will stream things in a more memory
> >> efficient manner.  For irange, vrange_storage will stream all the
> >> sub-ranges, including the nonzero bitmask which is the first entry in
> >> such storage, as trailing_wide_ints.
> >>
> >> See irange_storage_slot to see how it lives in GC memory.
> >>
> > That being said, the ranger's internal cache uses iranges, albeit with a
> > squished down number of subranges (the minimum amount to represent the
> > range).  So each cache entry will now be bigger by the difference between
> > one tree and one wide int.
> >
> > I wonder if we should change the cache to use vrange_storage. If not now,
> > then when we convert all the subranges to wide ints.
> >
> > Of course, the memory pressure of the cache is not nearly as problematic as
> > SSA_NAME_RANGE_INFO. The cache only stores names it cares about.
>
> Rangers cache can be a memory bottleneck in pathological cases..
> Certainly not as bad as it use to be, but I'm sure it can still be
> problematic.    Its suppose to be a memory efficient representation
> because of that.  The cache can have an entry for any live ssa-name
> (which means all of them at some point in the IL) multiplied by a factor
> involving the number of dominator blocks and outgoing edges ranges are
> calculated on.   So while SSA_NAME_RANGE_INFO is a linear thing, the
> cache lies somewhere between a logarithmic and exponential factor based
> on the CFG size.

Hmmm, perhaps the ultimate goal here should be to convert the cache to
use vrange_storage, which uses trailing wide ints for all of the end
points plus the masks (known_ones included for the next release).

>
> if you are growing the common cases of 1 to 2 endpoints to more than
> double in size (and most of the time not be needed), that would not be
> very appealing :-P  If we have any wide-ints, they would need to be a
> memory efficient version.   The Cache uses an irange_allocator, which is
> suppose to provide a memory efficient objects.. hence why it trims the
> number of ranges down to only what is needed.  It seems like a trailing
> wide-Int might be in order based on that..
>
> Andrew
>
>
> PS. which will be more problematic if you eventually introduce a
> known_ones wide_int.    I thought the mask tracking was/could be
> something simple like  HOST_WIDE_INT..  then you only tracks masks in
> types up to the size of a HOST_WIDE_INT.  then storage and masking is
> all trivial without going thru a wide_int.    Is that not so/possible?

That's certainly easy and cheaper to do.  The hard part was fixing all
the places where we weren't keeping the masks up to date, and that's
done (sans any bugs ;-)).

Can we get consensus here on only tracking masks for type sizes less
than HOST_WIDE_INT?  I'd hate to do all the work only to realize we
need to track 512 bit masks on a 32-bit host cross :-).

Aldy
  
Richard Biener Oct. 4, 2022, 2:34 p.m. UTC | #6
> Am 04.10.2022 um 16:30 schrieb Aldy Hernandez <aldyh@redhat.com>:
> 
> On Tue, Oct 4, 2022 at 3:27 PM Andrew MacLeod <amacleod@redhat.com> wrote:
>> 
>> 
>>> On 10/4/22 08:13, Aldy Hernandez via Gcc-patches wrote:
>>>> On Tue, Oct 4, 2022, 13:28 Aldy Hernandez <aldyh@redhat.com> wrote:
>>> 
>>>> On Tue, Oct 4, 2022 at 9:55 AM Richard Biener
>>>> <richard.guenther@gmail.com> wrote:
>>>>> 
>>>>> Am 04.10.2022 um 09:36 schrieb Aldy Hernandez via Gcc-patches <
>>>> gcc-patches@gcc.gnu.org>:
>>>>>> The reason the nonzero mask was kept in a tree was basically inertia,
>>>>>> as everything in irange is a tree.  However, there's no need to keep
>>>>>> it in a tree, as the conversions to and from wide ints are very
>>>>>> annoying.  That, plus special casing NULL masks to be -1 is prone
>>>>>> to error.
>>>>>> 
>>>>>> I have not only rewritten all the uses to assume a wide int, but
>>>>>> have corrected a few places where we weren't propagating the masks, or
>>>>>> rather pessimizing them to -1.  This will become more important in
>>>>>> upcoming patches where we make better use of the masks.
>>>>>> 
>>>>>> Performance testing shows a trivial improvement in VRP, as things like
>>>>>> irange::contains_p() are tied to a tree.  Ughh, can't wait for trees in
>>>>>> iranges to go away.
>>>>> You want trailing wide int storage though.  A wide_int is quite large.
>>>> Absolutely, this is only for short term storage.  Any time we need
>>>> long term storage, say global ranges in SSA_NAME_RANGE_INFO, we go
>>>> through vrange_storage which will stream things in a more memory
>>>> efficient manner.  For irange, vrange_storage will stream all the
>>>> sub-ranges, including the nonzero bitmask which is the first entry in
>>>> such storage, as trailing_wide_ints.
>>>> 
>>>> See irange_storage_slot to see how it lives in GC memory.
>>>> 
>>> That being said, the ranger's internal cache uses iranges, albeit with a
>>> squished down number of subranges (the minimum amount to represent the
>>> range).  So each cache entry will now be bigger by the difference between
>>> one tree and one wide int.
>>> 
>>> I wonder if we should change the cache to use vrange_storage. If not now,
>>> then when we convert all the subranges to wide ints.
>>> 
>>> Of course, the memory pressure of the cache is not nearly as problematic as
>>> SSA_NAME_RANGE_INFO. The cache only stores names it cares about.
>> 
>> Rangers cache can be a memory bottleneck in pathological cases..
>> Certainly not as bad as it use to be, but I'm sure it can still be
>> problematic.    Its suppose to be a memory efficient representation
>> because of that.  The cache can have an entry for any live ssa-name
>> (which means all of them at some point in the IL) multiplied by a factor
>> involving the number of dominator blocks and outgoing edges ranges are
>> calculated on.   So while SSA_NAME_RANGE_INFO is a linear thing, the
>> cache lies somewhere between a logarithmic and exponential factor based
>> on the CFG size.
> 
> Hmmm, perhaps the ultimate goal here should be to convert the cache to
> use vrange_storage, which uses trailing wide ints for all of the end
> points plus the masks (known_ones included for the next release).
> 
>> 
>> if you are growing the common cases of 1 to 2 endpoints to more than
>> double in size (and most of the time not be needed), that would not be
>> very appealing :-P  If we have any wide-ints, they would need to be a
>> memory efficient version.   The Cache uses an irange_allocator, which is
>> suppose to provide a memory efficient objects.. hence why it trims the
>> number of ranges down to only what is needed.  It seems like a trailing
>> wide-Int might be in order based on that..
>> 
>> Andrew
>> 
>> 
>> PS. which will be more problematic if you eventually introduce a
>> known_ones wide_int.    I thought the mask tracking was/could be
>> something simple like  HOST_WIDE_INT..  then you only tracks masks in
>> types up to the size of a HOST_WIDE_INT.  then storage and masking is
>> all trivial without going thru a wide_int.    Is that not so/possible?
> 
> That's certainly easy and cheaper to do.  The hard part was fixing all
> the places where we weren't keeping the masks up to date, and that's
> done (sans any bugs ;-)).
> 
> Can we get consensus here on only tracking masks for type sizes less
> than HOST_WIDE_INT?  I'd hate to do all the work only to realize we
> need to track 512 bit masks on a 32-bit host cross :-).

64bits are not enough, 128 might be.  But there’s trailing wide int storage so I don’t see the point in restricting ourselves?

> Aldy
>
  
Aldy Hernandez Oct. 4, 2022, 3:14 p.m. UTC | #7
On Tue, Oct 4, 2022 at 4:34 PM Richard Biener
<richard.guenther@gmail.com> wrote:
>
>
>
> > Am 04.10.2022 um 16:30 schrieb Aldy Hernandez <aldyh@redhat.com>:
> >
> > On Tue, Oct 4, 2022 at 3:27 PM Andrew MacLeod <amacleod@redhat.com> wrote:
> >>
> >>
> >>> On 10/4/22 08:13, Aldy Hernandez via Gcc-patches wrote:
> >>>> On Tue, Oct 4, 2022, 13:28 Aldy Hernandez <aldyh@redhat.com> wrote:
> >>>
> >>>> On Tue, Oct 4, 2022 at 9:55 AM Richard Biener
> >>>> <richard.guenther@gmail.com> wrote:
> >>>>>
> >>>>> Am 04.10.2022 um 09:36 schrieb Aldy Hernandez via Gcc-patches <
> >>>> gcc-patches@gcc.gnu.org>:
> >>>>>> The reason the nonzero mask was kept in a tree was basically inertia,
> >>>>>> as everything in irange is a tree.  However, there's no need to keep
> >>>>>> it in a tree, as the conversions to and from wide ints are very
> >>>>>> annoying.  That, plus special casing NULL masks to be -1 is prone
> >>>>>> to error.
> >>>>>>
> >>>>>> I have not only rewritten all the uses to assume a wide int, but
> >>>>>> have corrected a few places where we weren't propagating the masks, or
> >>>>>> rather pessimizing them to -1.  This will become more important in
> >>>>>> upcoming patches where we make better use of the masks.
> >>>>>>
> >>>>>> Performance testing shows a trivial improvement in VRP, as things like
> >>>>>> irange::contains_p() are tied to a tree.  Ughh, can't wait for trees in
> >>>>>> iranges to go away.
> >>>>> You want trailing wide int storage though.  A wide_int is quite large.
> >>>> Absolutely, this is only for short term storage.  Any time we need
> >>>> long term storage, say global ranges in SSA_NAME_RANGE_INFO, we go
> >>>> through vrange_storage which will stream things in a more memory
> >>>> efficient manner.  For irange, vrange_storage will stream all the
> >>>> sub-ranges, including the nonzero bitmask which is the first entry in
> >>>> such storage, as trailing_wide_ints.
> >>>>
> >>>> See irange_storage_slot to see how it lives in GC memory.
> >>>>
> >>> That being said, the ranger's internal cache uses iranges, albeit with a
> >>> squished down number of subranges (the minimum amount to represent the
> >>> range).  So each cache entry will now be bigger by the difference between
> >>> one tree and one wide int.
> >>>
> >>> I wonder if we should change the cache to use vrange_storage. If not now,
> >>> then when we convert all the subranges to wide ints.
> >>>
> >>> Of course, the memory pressure of the cache is not nearly as problematic as
> >>> SSA_NAME_RANGE_INFO. The cache only stores names it cares about.
> >>
> >> Rangers cache can be a memory bottleneck in pathological cases..
> >> Certainly not as bad as it use to be, but I'm sure it can still be
> >> problematic.    Its suppose to be a memory efficient representation
> >> because of that.  The cache can have an entry for any live ssa-name
> >> (which means all of them at some point in the IL) multiplied by a factor
> >> involving the number of dominator blocks and outgoing edges ranges are
> >> calculated on.   So while SSA_NAME_RANGE_INFO is a linear thing, the
> >> cache lies somewhere between a logarithmic and exponential factor based
> >> on the CFG size.
> >
> > Hmmm, perhaps the ultimate goal here should be to convert the cache to
> > use vrange_storage, which uses trailing wide ints for all of the end
> > points plus the masks (known_ones included for the next release).
> >
> >>
> >> if you are growing the common cases of 1 to 2 endpoints to more than
> >> double in size (and most of the time not be needed), that would not be
> >> very appealing :-P  If we have any wide-ints, they would need to be a
> >> memory efficient version.   The Cache uses an irange_allocator, which is
> >> suppose to provide a memory efficient objects.. hence why it trims the
> >> number of ranges down to only what is needed.  It seems like a trailing
> >> wide-Int might be in order based on that..
> >>
> >> Andrew
> >>
> >>
> >> PS. which will be more problematic if you eventually introduce a
> >> known_ones wide_int.    I thought the mask tracking was/could be
> >> something simple like  HOST_WIDE_INT..  then you only tracks masks in
> >> types up to the size of a HOST_WIDE_INT.  then storage and masking is
> >> all trivial without going thru a wide_int.    Is that not so/possible?
> >
> > That's certainly easy and cheaper to do.  The hard part was fixing all
> > the places where we weren't keeping the masks up to date, and that's
> > done (sans any bugs ;-)).
> >
> > Can we get consensus here on only tracking masks for type sizes less
> > than HOST_WIDE_INT?  I'd hate to do all the work only to realize we
> > need to track 512 bit masks on a 32-bit host cross :-).
>
> 64bits are not enough, 128 might be.  But there’s trailing wide int storage so I don’t see the point in restricting ourselves?

Fair enough.  Perhaps we should bite the bullet and convert the cache
to vrange_storage which is all set up for streaming irange's with
trailing_wide_ints.  No changes should be necessary for irange, since
we never have more than 3-4 live at any one time.  It's the cache that
needs twiddling.

Aldy
  
Andrew MacLeod Oct. 4, 2022, 3:42 p.m. UTC | #8
On 10/4/22 11:14, Aldy Hernandez wrote:
> On Tue, Oct 4, 2022 at 4:34 PM Richard Biener
> <richard.guenther@gmail.com> wrote:
>>
>>
>>> Am 04.10.2022 um 16:30 schrieb Aldy Hernandez <aldyh@redhat.com>:
>>>
>>> On Tue, Oct 4, 2022 at 3:27 PM Andrew MacLeod <amacleod@redhat.com> wrote:
>>>>
>>>>> On 10/4/22 08:13, Aldy Hernandez via Gcc-patches wrote:
>>>>>> On Tue, Oct 4, 2022, 13:28 Aldy Hernandez <aldyh@redhat.com> wrote:
>>>>>> On Tue, Oct 4, 2022 at 9:55 AM Richard Biener
>>>>>> <richard.guenther@gmail.com> wrote:
>>>>>>> Am 04.10.2022 um 09:36 schrieb Aldy Hernandez via Gcc-patches <
>>>>>> gcc-patches@gcc.gnu.org>:
>>>>>>>> The reason the nonzero mask was kept in a tree was basically inertia,
>>>>>>>> as everything in irange is a tree.  However, there's no need to keep
>>>>>>>> it in a tree, as the conversions to and from wide ints are very
>>>>>>>> annoying.  That, plus special casing NULL masks to be -1 is prone
>>>>>>>> to error.
>>>>>>>>
>>>>>>>> I have not only rewritten all the uses to assume a wide int, but
>>>>>>>> have corrected a few places where we weren't propagating the masks, or
>>>>>>>> rather pessimizing them to -1.  This will become more important in
>>>>>>>> upcoming patches where we make better use of the masks.
>>>>>>>>
>>>>>>>> Performance testing shows a trivial improvement in VRP, as things like
>>>>>>>> irange::contains_p() are tied to a tree.  Ughh, can't wait for trees in
>>>>>>>> iranges to go away.
>>>>>>> You want trailing wide int storage though.  A wide_int is quite large.
>>>>>> Absolutely, this is only for short term storage.  Any time we need
>>>>>> long term storage, say global ranges in SSA_NAME_RANGE_INFO, we go
>>>>>> through vrange_storage which will stream things in a more memory
>>>>>> efficient manner.  For irange, vrange_storage will stream all the
>>>>>> sub-ranges, including the nonzero bitmask which is the first entry in
>>>>>> such storage, as trailing_wide_ints.
>>>>>>
>>>>>> See irange_storage_slot to see how it lives in GC memory.
>>>>>>
>>>>> That being said, the ranger's internal cache uses iranges, albeit with a
>>>>> squished down number of subranges (the minimum amount to represent the
>>>>> range).  So each cache entry will now be bigger by the difference between
>>>>> one tree and one wide int.
>>>>>
>>>>> I wonder if we should change the cache to use vrange_storage. If not now,
>>>>> then when we convert all the subranges to wide ints.
>>>>>
>>>>> Of course, the memory pressure of the cache is not nearly as problematic as
>>>>> SSA_NAME_RANGE_INFO. The cache only stores names it cares about.
>>>> Rangers cache can be a memory bottleneck in pathological cases..
>>>> Certainly not as bad as it use to be, but I'm sure it can still be
>>>> problematic.    Its suppose to be a memory efficient representation
>>>> because of that.  The cache can have an entry for any live ssa-name
>>>> (which means all of them at some point in the IL) multiplied by a factor
>>>> involving the number of dominator blocks and outgoing edges ranges are
>>>> calculated on.   So while SSA_NAME_RANGE_INFO is a linear thing, the
>>>> cache lies somewhere between a logarithmic and exponential factor based
>>>> on the CFG size.
>>> Hmmm, perhaps the ultimate goal here should be to convert the cache to
>>> use vrange_storage, which uses trailing wide ints for all of the end
>>> points plus the masks (known_ones included for the next release).
>>>
>>>> if you are growing the common cases of 1 to 2 endpoints to more than
>>>> double in size (and most of the time not be needed), that would not be
>>>> very appealing :-P  If we have any wide-ints, they would need to be a
>>>> memory efficient version.   The Cache uses an irange_allocator, which is
>>>> suppose to provide a memory efficient objects.. hence why it trims the
>>>> number of ranges down to only what is needed.  It seems like a trailing
>>>> wide-Int might be in order based on that..
>>>>
>>>> Andrew
>>>>
>>>>
>>>> PS. which will be more problematic if you eventually introduce a
>>>> known_ones wide_int.    I thought the mask tracking was/could be
>>>> something simple like  HOST_WIDE_INT..  then you only tracks masks in
>>>> types up to the size of a HOST_WIDE_INT.  then storage and masking is
>>>> all trivial without going thru a wide_int.    Is that not so/possible?
>>> That's certainly easy and cheaper to do.  The hard part was fixing all
>>> the places where we weren't keeping the masks up to date, and that's
>>> done (sans any bugs ;-)).
>>>
>>> Can we get consensus here on only tracking masks for type sizes less
>>> than HOST_WIDE_INT?  I'd hate to do all the work only to realize we
>>> need to track 512 bit masks on a 32-bit host cross :-).
>> 64bits are not enough, 128 might be.  But there’s trailing wide int storage so I don’t see the point in restricting ourselves?
> Fair enough.  Perhaps we should bite the bullet and convert the cache
> to vrange_storage which is all set up for streaming irange's with
> trailing_wide_ints.  No changes should be necessary for irange, since
> we never have more than 3-4 live at any one time.  It's the cache that
> needs twiddling.
>
Wouldnt it be irange_allocator that needs twiddling?  It purpose in life 
is to allocate iranges for memory storage...  the cache is just a 
client, as is rangers global cache, etc...  that was the intention of 
irange_allocator to isolate clients from having to worry about memory 
storage issues?

Or is that problematic?


Andrew
  
Aldy Hernandez Oct. 5, 2022, 10:14 a.m. UTC | #9
On Tue, Oct 4, 2022 at 5:42 PM Andrew MacLeod <amacleod@redhat.com> wrote:
>
>
> On 10/4/22 11:14, Aldy Hernandez wrote:
> > On Tue, Oct 4, 2022 at 4:34 PM Richard Biener
> > <richard.guenther@gmail.com> wrote:
> >>
> >>
> >>> Am 04.10.2022 um 16:30 schrieb Aldy Hernandez <aldyh@redhat.com>:
> >>>
> >>> On Tue, Oct 4, 2022 at 3:27 PM Andrew MacLeod <amacleod@redhat.com> wrote:
> >>>>
> >>>>> On 10/4/22 08:13, Aldy Hernandez via Gcc-patches wrote:
> >>>>>> On Tue, Oct 4, 2022, 13:28 Aldy Hernandez <aldyh@redhat.com> wrote:
> >>>>>> On Tue, Oct 4, 2022 at 9:55 AM Richard Biener
> >>>>>> <richard.guenther@gmail.com> wrote:
> >>>>>>> Am 04.10.2022 um 09:36 schrieb Aldy Hernandez via Gcc-patches <
> >>>>>> gcc-patches@gcc.gnu.org>:
> >>>>>>>> The reason the nonzero mask was kept in a tree was basically inertia,
> >>>>>>>> as everything in irange is a tree.  However, there's no need to keep
> >>>>>>>> it in a tree, as the conversions to and from wide ints are very
> >>>>>>>> annoying.  That, plus special casing NULL masks to be -1 is prone
> >>>>>>>> to error.
> >>>>>>>>
> >>>>>>>> I have not only rewritten all the uses to assume a wide int, but
> >>>>>>>> have corrected a few places where we weren't propagating the masks, or
> >>>>>>>> rather pessimizing them to -1.  This will become more important in
> >>>>>>>> upcoming patches where we make better use of the masks.
> >>>>>>>>
> >>>>>>>> Performance testing shows a trivial improvement in VRP, as things like
> >>>>>>>> irange::contains_p() are tied to a tree.  Ughh, can't wait for trees in
> >>>>>>>> iranges to go away.
> >>>>>>> You want trailing wide int storage though.  A wide_int is quite large.
> >>>>>> Absolutely, this is only for short term storage.  Any time we need
> >>>>>> long term storage, say global ranges in SSA_NAME_RANGE_INFO, we go
> >>>>>> through vrange_storage which will stream things in a more memory
> >>>>>> efficient manner.  For irange, vrange_storage will stream all the
> >>>>>> sub-ranges, including the nonzero bitmask which is the first entry in
> >>>>>> such storage, as trailing_wide_ints.
> >>>>>>
> >>>>>> See irange_storage_slot to see how it lives in GC memory.
> >>>>>>
> >>>>> That being said, the ranger's internal cache uses iranges, albeit with a
> >>>>> squished down number of subranges (the minimum amount to represent the
> >>>>> range).  So each cache entry will now be bigger by the difference between
> >>>>> one tree and one wide int.
> >>>>>
> >>>>> I wonder if we should change the cache to use vrange_storage. If not now,
> >>>>> then when we convert all the subranges to wide ints.
> >>>>>
> >>>>> Of course, the memory pressure of the cache is not nearly as problematic as
> >>>>> SSA_NAME_RANGE_INFO. The cache only stores names it cares about.
> >>>> Rangers cache can be a memory bottleneck in pathological cases..
> >>>> Certainly not as bad as it use to be, but I'm sure it can still be
> >>>> problematic.    Its suppose to be a memory efficient representation
> >>>> because of that.  The cache can have an entry for any live ssa-name
> >>>> (which means all of them at some point in the IL) multiplied by a factor
> >>>> involving the number of dominator blocks and outgoing edges ranges are
> >>>> calculated on.   So while SSA_NAME_RANGE_INFO is a linear thing, the
> >>>> cache lies somewhere between a logarithmic and exponential factor based
> >>>> on the CFG size.
> >>> Hmmm, perhaps the ultimate goal here should be to convert the cache to
> >>> use vrange_storage, which uses trailing wide ints for all of the end
> >>> points plus the masks (known_ones included for the next release).
> >>>
> >>>> if you are growing the common cases of 1 to 2 endpoints to more than
> >>>> double in size (and most of the time not be needed), that would not be
> >>>> very appealing :-P  If we have any wide-ints, they would need to be a
> >>>> memory efficient version.   The Cache uses an irange_allocator, which is
> >>>> suppose to provide a memory efficient objects.. hence why it trims the
> >>>> number of ranges down to only what is needed.  It seems like a trailing
> >>>> wide-Int might be in order based on that..
> >>>>
> >>>> Andrew
> >>>>
> >>>>
> >>>> PS. which will be more problematic if you eventually introduce a
> >>>> known_ones wide_int.    I thought the mask tracking was/could be
> >>>> something simple like  HOST_WIDE_INT..  then you only tracks masks in
> >>>> types up to the size of a HOST_WIDE_INT.  then storage and masking is
> >>>> all trivial without going thru a wide_int.    Is that not so/possible?
> >>> That's certainly easy and cheaper to do.  The hard part was fixing all
> >>> the places where we weren't keeping the masks up to date, and that's
> >>> done (sans any bugs ;-)).
> >>>
> >>> Can we get consensus here on only tracking masks for type sizes less
> >>> than HOST_WIDE_INT?  I'd hate to do all the work only to realize we
> >>> need to track 512 bit masks on a 32-bit host cross :-).
> >> 64bits are not enough, 128 might be.  But there’s trailing wide int storage so I don’t see the point in restricting ourselves?
> > Fair enough.  Perhaps we should bite the bullet and convert the cache
> > to vrange_storage which is all set up for streaming irange's with
> > trailing_wide_ints.  No changes should be necessary for irange, since
> > we never have more than 3-4 live at any one time.  It's the cache that
> > needs twiddling.
> >
> Wouldnt it be irange_allocator that needs twiddling?  It purpose in life
> is to allocate iranges for memory storage...  the cache is just a
> client, as is rangers global cache, etc...  that was the intention of
> irange_allocator to isolate clients from having to worry about memory
> storage issues?

Hmmm, we currently have 2 storage mechanisms, vrange_allocator and
vrange_storage.  The first is used for the cache and the other for
global storage (SSA_NAME_RANGE_INFO).  The vrange allocator returns a
plain irange (trees and all), but with the minimum size needed to
represent the range.  It's seamless to use because it returns an
irange that can be used with the same API we're used to.

On the other hand, vrange_storage returns an opaque pointer (i.e. not
a vrange) and has an API used explicitly to set and get the vrange in
storage:

class vrange_storage
{
public:
  vrange_storage (vrange_allocator *alloc) : m_alloc (alloc) { }
  void *alloc_slot (const vrange &r);
  void free (void *slot) { m_alloc->free (slot); }
  void get_vrange (const void *slot, vrange &r, tree type);
  void set_vrange (void *slot, const vrange &r);
  static bool fits_p (const void *slot, const vrange &r);
};

It also stores a range in the minimum size needed, but is more memory
efficient because it stores the sub-ranges as trailing_wide_ints.
However, it doesn't return a convenient irange we can store in
directly.  You must go through get_vrange() and set_vrange().  I
probably went this route because that's how the old
SSA_NAME_RANGE_INFO stuff worked...it was an API to convert an irange
to something streamable.  But in retrospect, we should've probably
merged everything into vrange_allocator.

My initial thought was to convert the ranger's cache to use
vrange_storage, but I must admit that vrange_storage is not as elegant
as just getting an irange/vrange that can be stored directly.

I think a clean solution would be to modify the vrange_allocator
(currently used by the cache) to have an irange_storage of sorts that
inherits from vrange (instead of irange since it contains a tree
storage pointer, and other cruft).  Then we can have irange_storage
define a minimal set of things to make the cache client work
(set_undefined, set_varying, operator=).  Every other method would be
gcc_unreachable.  This way, the cache can continue to work as before
and we can have 2 irange's: one on trees (irange) and one with
trailing_wide_ints for storage (irange_storage).  And we'd provide a
way to copy back and forth seamlessly.

Long term we should have one allocator, for both the cache (with
obstacks) and global storage (with GC).  I'm torn between converting
the cache to vrange_storage since it already uses trailing wide ints,
or implement an irange_storage for vrange_allocator that can be
transparently copy between the actual irange.

However... I don't think I have the stomach to overhaul the allocators
this late in the release.  For this release I may opt to put the
nonzero mask back in a tree, but have it always be set.  The NULL ==
-1 shortcut was very error prone.  The rest of my fixes in this patch
still apply, as they keep better track of the masks, which we need.

Thoughts?
Aldy


Aldy
  
Aldy Hernandez Oct. 7, 2022, 9:23 a.m. UTC | #10
On Wed, Oct 5, 2022 at 12:14 PM Aldy Hernandez <aldyh@redhat.com> wrote:

> However... I don't think I have the stomach to overhaul the allocators
> this late in the release.  For this release I may opt to put the
> nonzero mask back in a tree, but have it always be set.  The NULL ==
> -1 shortcut was very error prone.  The rest of my fixes in this patch
> still apply, as they keep better track of the masks, which we need.

Here is the patch reverting the nonzero mask to trees.

Unfortunately, having the mask always set caused a 10% regression in
VRP, so that's a no go.  I've gone back to keeping a NULL mask by
default that semantically means -1.  It's not as bad as I thought,
since the code is much cleaner now.  This is unfortunate, but a 10%
regression in VRP plus a 1.5% regression in overall compilation is
unacceptable.  On the plus side, this is temporary as we're moving
entirely to wide ints next release (with appropriate cache/allocator
changes).

I will commit after a final round of tests finishes.

Thanks.
Aldy
  

Patch

diff --git a/gcc/value-range-storage.cc b/gcc/value-range-storage.cc
index de7575ed48d..6e054622830 100644
--- a/gcc/value-range-storage.cc
+++ b/gcc/value-range-storage.cc
@@ -150,11 +150,7 @@  irange_storage_slot::set_irange (const irange &r)
 {
   gcc_checking_assert (fits_p (r));
 
-  // Avoid calling unsupported get_nonzero_bits on legacy.
-  if (r.legacy_mode_p ())
-    m_ints[0] = -1;
-  else
-    m_ints[0] = r.get_nonzero_bits ();
+  m_ints[0] = r.get_nonzero_bits ();
 
   unsigned pairs = r.num_pairs ();
   for (unsigned i = 0; i < pairs; ++i)
diff --git a/gcc/value-range.cc b/gcc/value-range.cc
index 6e196574de9..afb26a40083 100644
--- a/gcc/value-range.cc
+++ b/gcc/value-range.cc
@@ -940,7 +940,7 @@  irange::irange_set (tree min, tree max)
   m_base[1] = max;
   m_num_ranges = 1;
   m_kind = VR_RANGE;
-  m_nonzero_mask = NULL;
+  m_nonzero_mask = wi::shwi (-1, TYPE_PRECISION (TREE_TYPE (min)));
   normalize_kind ();
 
   if (flag_checking)
@@ -1014,7 +1014,7 @@  irange::irange_set_anti_range (tree min, tree max)
     }
 
   m_kind = VR_RANGE;
-  m_nonzero_mask = NULL;
+  m_nonzero_mask = wi::shwi (-1, TYPE_PRECISION (TREE_TYPE (min)));
   normalize_kind ();
 
   if (flag_checking)
@@ -1071,7 +1071,7 @@  irange::set (tree min, tree max, value_range_kind kind)
       m_base[0] = min;
       m_base[1] = max;
       m_num_ranges = 1;
-      m_nonzero_mask = NULL;
+      m_nonzero_mask = wi::shwi (-1, TYPE_PRECISION (TREE_TYPE (min)));
       return;
     }
 
@@ -1121,7 +1121,7 @@  irange::set (tree min, tree max, value_range_kind kind)
   m_base[0] = min;
   m_base[1] = max;
   m_num_ranges = 1;
-  m_nonzero_mask = NULL;
+  m_nonzero_mask = wi::shwi (-1, TYPE_PRECISION (TREE_TYPE (min)));
   normalize_kind ();
   if (flag_checking)
     verify_range ();
@@ -1136,14 +1136,11 @@  irange::verify_range ()
   if (m_kind == VR_UNDEFINED)
     {
       gcc_checking_assert (m_num_ranges == 0);
-      gcc_checking_assert (!m_nonzero_mask);
       return;
     }
-  if (m_nonzero_mask)
-    gcc_checking_assert (wi::to_wide (m_nonzero_mask) != -1);
   if (m_kind == VR_VARYING)
     {
-      gcc_checking_assert (!m_nonzero_mask);
+      gcc_checking_assert (m_nonzero_mask == -1);
       gcc_checking_assert (m_num_ranges == 1);
       gcc_checking_assert (varying_compatible_p ());
       return;
@@ -1238,7 +1235,7 @@  irange::legacy_equal_p (const irange &other) const
 			       other.tree_lower_bound (0))
 	  && vrp_operand_equal_p (tree_upper_bound (0),
 				  other.tree_upper_bound (0))
-	  && vrp_operand_equal_p (m_nonzero_mask, other.m_nonzero_mask));
+	  && get_nonzero_bits () == other.get_nonzero_bits ());
 }
 
 bool
@@ -1273,7 +1270,7 @@  irange::operator== (const irange &other) const
 	  || !operand_equal_p (ub, ub_other, 0))
 	return false;
     }
-  return vrp_operand_equal_p (m_nonzero_mask, other.m_nonzero_mask);
+  return get_nonzero_bits () == other.get_nonzero_bits ();
 }
 
 /* Return TRUE if this is a symbolic range.  */
@@ -1416,10 +1413,11 @@  irange::contains_p (tree cst) const
 
   gcc_checking_assert (TREE_CODE (cst) == INTEGER_CST);
 
-  if (m_nonzero_mask)
+  // See if we can exclude CST based on the nonzero bits.
+  if (m_nonzero_mask != -1)
     {
       wide_int cstw = wi::to_wide (cst);
-      if (cstw != 0 && wi::bit_and (wi::to_wide (m_nonzero_mask), cstw) == 0)
+      if (cstw != 0 && wi::bit_and (m_nonzero_mask, cstw) == 0)
 	return false;
     }
 
@@ -1878,9 +1876,6 @@  irange::legacy_intersect (irange *vr0, const irange *vr1)
   intersect_ranges (&vr0kind, &vr0min, &vr0max,
 		    vr1->kind (), vr1->min (), vr1->max ());
 
-  // Pessimize nonzero masks, as we don't support them.
-  m_nonzero_mask = NULL;
-
   /* Make sure to canonicalize the result though as the inversion of a
      VR_RANGE can still be a VR_RANGE.  */
   if (vr0kind == VR_UNDEFINED)
@@ -2202,9 +2197,6 @@  irange::legacy_union (irange *vr0, const irange *vr1)
   union_ranges (&vr0kind, &vr0min, &vr0max,
 		vr1->kind (), vr1->min (), vr1->max ());
 
-  // Pessimize nonzero masks, as we don't support them.
-  m_nonzero_mask = NULL;
-
   if (vr0kind == VR_UNDEFINED)
     vr0->set_undefined ();
   else if (vr0kind == VR_VARYING)
@@ -2310,8 +2302,6 @@  irange::legacy_verbose_intersect (const irange *other)
 
 // Perform an efficient union with R when both ranges have only a single pair.
 // Excluded are VARYING and UNDEFINED ranges.
-//
-// NOTE: It is the caller's responsibility to set the nonzero mask.
 
 bool
 irange::irange_single_pair_union (const irange &r)
@@ -2325,7 +2315,7 @@  irange::irange_single_pair_union (const irange &r)
     {
       // If current upper bound is new upper bound, we're done.
       if (wi::le_p (wi::to_wide (r.m_base[1]), wi::to_wide (m_base[1]), sign))
-	return false;
+	return union_nonzero_bits (r);
       // Otherwise R has the new upper bound.
       // Check for overlap/touching ranges, or single target range.
       if (m_max_ranges == 1
@@ -2338,8 +2328,7 @@  irange::irange_single_pair_union (const irange &r)
 	  m_base[3] = r.m_base[1];
 	  m_num_ranges = 2;
 	}
-      if (varying_compatible_p ())
-	m_kind = VR_VARYING;
+      union_nonzero_bits (r);
       return true;
     }
 
@@ -2353,11 +2342,7 @@  irange::irange_single_pair_union (const irange &r)
   // Check for overlapping ranges, or target limited to a single range.
   else if (m_max_ranges == 1
 	   || wi::to_widest (r.m_base[1]) + 1 >= wi::to_widest (lb))
-    {
-      // This has the new upper bound, just check for varying.
-      if (varying_compatible_p ())
-	  m_kind = VR_VARYING;
-    }
+    ;
   else
     {
       // Left with 2 pairs.
@@ -2366,8 +2351,7 @@  irange::irange_single_pair_union (const irange &r)
       m_base[3] = m_base[1];
       m_base[1] = r.m_base[1];
     }
-  if (varying_compatible_p ())
-    m_kind = VR_VARYING;
+  union_nonzero_bits (r);
   return true;
 }
 
@@ -2398,27 +2382,13 @@  irange::irange_union (const irange &r)
       return true;
     }
 
-  // Save the nonzero mask in case the set operations below clobber it.
-  bool ret_nz = union_nonzero_bits (r);
-  tree saved_nz = m_nonzero_mask;
-
-  // The union_nonzero_bits may have turned things into a varying.
-  if (varying_p ())
-    return true;
-
   // Special case one range union one range.
   if (m_num_ranges == 1 && r.m_num_ranges == 1)
-    {
-      bool ret = irange_single_pair_union (r);
-      set_nonzero_bits (saved_nz);
-      if (flag_checking)
-	verify_range ();
-      return ret || ret_nz;
-    }
+    return irange_single_pair_union (r);
 
   // If this ranges fully contains R, then we need do nothing.
   if (irange_contains_p (r))
-    return ret_nz;
+    return union_nonzero_bits (r);
 
   // Do not worry about merging and such by reserving twice as many
   // pairs as needed, and then simply sort the 2 ranges into this
@@ -2506,11 +2476,7 @@  irange::irange_union (const irange &r)
   m_num_ranges = i / 2;
 
   m_kind = VR_RANGE;
-  m_nonzero_mask = saved_nz;
-  normalize_kind ();
-
-  if (flag_checking)
-    verify_range ();
+  union_nonzero_bits (r);
   return true;
 }
 
@@ -2576,21 +2542,11 @@  irange::irange_intersect (const irange &r)
       set_undefined ();
       return true;
     }
-
-  // Save the nonzero mask in case the set operations below clobber it.
-  bool ret_nz = intersect_nonzero_bits (r);
-  tree saved_nz = m_nonzero_mask;
-
   if (r.varying_p ())
-    return ret_nz;
-
+    return false;
   if (varying_p ())
     {
       operator= (r);
-      if (saved_nz)
-	set_nonzero_bits (saved_nz);
-      if (flag_checking)
-	verify_range ();
       return true;
     }
 
@@ -2600,13 +2556,13 @@  irange::irange_intersect (const irange &r)
       if (undefined_p ())
 	return true;
 
-      set_nonzero_bits (saved_nz);
-      return res || saved_nz;
+      res |= intersect_nonzero_bits (r);
+      return res;
     }
 
   // If R fully contains this, then intersection will change nothing.
   if (r.irange_contains_p (*this))
-    return ret_nz;
+    return intersect_nonzero_bits (r);
 
   signop sign = TYPE_SIGN (TREE_TYPE(m_base[0]));
   unsigned bld_pair = 0;
@@ -2675,15 +2631,14 @@  irange::irange_intersect (const irange &r)
   // At the exit of this loop, it is one of 2 things:
   // ran out of r1, or r2, but either means we are done.
   m_num_ranges = bld_pair;
+  if (m_num_ranges == 0)
+    {
+      set_undefined ();
+      return true;
+    }
 
   m_kind = VR_RANGE;
-  if (!undefined_p ())
-    m_nonzero_mask = saved_nz;
-  normalize_kind ();
-
-  if (flag_checking)
-    verify_range ();
-
+  intersect_nonzero_bits (r);
   return true;
 }
 
@@ -2749,10 +2704,15 @@  irange::intersect (const wide_int& lb, const wide_int& ub)
     }
 
   m_num_ranges = bld_index;
+  if (m_num_ranges == 0)
+    {
+      set_undefined ();
+      return true;
+    }
 
   m_kind = VR_RANGE;
-  normalize_kind ();
-
+  // No need to call normalize_kind(), as the caller will do this
+  // while intersecting the nonzero mask.
   if (flag_checking)
     verify_range ();
   return true;
@@ -2801,7 +2761,6 @@  irange::invert ()
     }
 
   gcc_checking_assert (!undefined_p () && !varying_p ());
-  m_nonzero_mask = NULL;
 
   // We always need one more set of bounds to represent an inverse, so
   // if we're at the limit, we can't properly represent things.
@@ -2822,6 +2781,7 @@  irange::invert ()
   signop sign = TYPE_SIGN (ttype);
   wide_int type_min = wi::min_value (prec, sign);
   wide_int type_max = wi::max_value (prec, sign);
+  m_nonzero_mask = wi::shwi (-1, prec);
   if (m_num_ranges == m_max_ranges
       && lower_bound () != type_min
       && upper_bound () != type_max)
@@ -2896,127 +2856,140 @@  irange::invert ()
     verify_range ();
 }
 
-void
-irange::set_nonzero_bits (tree mask)
+// Return the nonzero bits inherent in the range.
+
+wide_int
+irange::get_nonzero_bits_from_range () const
 {
-  gcc_checking_assert (!undefined_p ());
+  // For legacy symbolics.
+  if (!constant_p ())
+    return wi::shwi (-1, TYPE_PRECISION (type ()));
 
-  if (!mask)
+  wide_int min = lower_bound ();
+  wide_int max = upper_bound ();
+  wide_int xorv = min ^ max;
+  if (xorv != 0)
     {
-      if (m_nonzero_mask)
-	{
-	  m_nonzero_mask = NULL;
-	  // Clearing the mask may have turned a range into VARYING.
-	  normalize_kind ();
-	}
-      return;
+      unsigned prec = TYPE_PRECISION (type ());
+      xorv = wi::mask (prec - wi::clz (xorv), false, prec);
     }
-  m_nonzero_mask = mask;
-  // Setting the mask may have turned a VARYING into a range.
-  if (m_kind == VR_VARYING)
-    m_kind = VR_RANGE;
-
-  if (flag_checking)
-    verify_range ();
+  return min | xorv;
 }
 
-void
-irange::set_nonzero_bits (const wide_int_ref &bits)
+// If the the nonzero mask can be trivially converted to a range, do
+// so and return TRUE.
+
+bool
+irange::set_range_from_nonzero_bits ()
 {
   gcc_checking_assert (!undefined_p ());
+  unsigned popcount = wi::popcount (m_nonzero_mask);
 
-  if (bits == -1)
-    {
-      set_nonzero_bits (NULL);
-      return;
-    }
   // If we have only one bit set in the mask, we can figure out the
   // range immediately.
-  if (wi::popcount (bits) == 1)
+  if (popcount == 1)
     {
       // Make sure we don't pessimize the range.
-      tree tbits = wide_int_to_tree (type (), bits);
-      if (!contains_p (tbits))
-	{
-	  set_nonzero_bits (tbits);
-	  return;
-	}
+      if (!contains_p (wide_int_to_tree (type (), m_nonzero_mask)))
+	return false;
 
       bool has_zero = contains_p (build_zero_cst (type ()));
+      wide_int bits = m_nonzero_mask;
       set (type (), bits, bits);
+      m_nonzero_mask = bits;
       if (has_zero)
 	{
 	  int_range<2> zero;
 	  zero.set_zero (type ());
 	  union_ (zero);
 	}
+      return true;
     }
-  set_nonzero_bits (wide_int_to_tree (type (), bits));
+  return false;
 }
 
-wide_int
-irange::get_nonzero_bits () const
+void
+irange::set_nonzero_bits (const wide_int_ref &bits)
 {
   gcc_checking_assert (!undefined_p ());
+  unsigned prec = TYPE_PRECISION (type ());
+  gcc_checking_assert (prec == bits.get_precision ());
 
-  // In case anyone in the legacy world queries us.
-  if (!constant_p ())
-    {
-      if (m_nonzero_mask)
-	return wi::to_wide (m_nonzero_mask);
-      return wi::shwi (-1, TYPE_PRECISION (type ()));
-    }
+  // Drop VARYINGs with a nonzero mask to a plain range.
+  if (m_kind == VR_VARYING && bits != -1)
+    m_kind = VR_RANGE;
 
-  // Calculate the nonzero bits inherent in the range.
-  wide_int min = lower_bound ();
-  wide_int max = upper_bound ();
-  wide_int xorv = min ^ max;
-  if (xorv != 0)
-    {
-      unsigned prec = TYPE_PRECISION (type ());
-      xorv = wi::mask (prec - wi::clz (xorv), false, prec);
-    }
-  wide_int mask = min | xorv;
+  m_nonzero_mask = wide_int::from (bits, prec, TYPE_SIGN (type ()));
+  if (set_range_from_nonzero_bits ())
+    return;
 
-  // Return the nonzero bits augmented by the range.
-  if (m_nonzero_mask)
-    return mask & wi::to_wide (m_nonzero_mask);
+  normalize_kind ();
+  if (flag_checking)
+    verify_range ();
+}
 
-  return mask;
+// Return the nonzero bitmask.  This will return the nonzero bits plus
+// the nonzero bits inherent in the range.
+
+wide_int
+irange::get_nonzero_bits () const
+{
+  gcc_checking_assert (!undefined_p ());
+  // The nonzero mask inherent in the range is calculated on-demand.
+  // For example, [0,255] does not have a 0xff nonzero mask by default
+  // (unless manually set).  This saves us considerable time, because
+  // setting it at creation incurs a large penalty for irange::set.
+  // At the time of writing there was a 5% slowdown in VRP if we kept
+  // the mask precisely up to date at all times.  Instead, we default
+  // to -1 and set it when explicitly requested.  However, this
+  // function will always return the correct mask.
+  return m_nonzero_mask & get_nonzero_bits_from_range ();
 }
 
-// Intersect the nonzero bits in R into THIS.
+// Intersect the nonzero bits in R into THIS and normalize the range.
+// Return TRUE if the intersection changed anything.
 
 bool
 irange::intersect_nonzero_bits (const irange &r)
 {
   gcc_checking_assert (!undefined_p () && !r.undefined_p ());
 
-  if (m_nonzero_mask || r.m_nonzero_mask)
+  bool changed = false;
+  if (m_nonzero_mask != r.m_nonzero_mask)
     {
-      wide_int nz = wi::bit_and (get_nonzero_bits (),
-				 r.get_nonzero_bits ());
-      set_nonzero_bits (nz);
-      return true;
+      m_nonzero_mask = get_nonzero_bits () & r.get_nonzero_bits ();
+      if (set_range_from_nonzero_bits ())
+	return true;
+      changed = true;
     }
-  return false;
+  normalize_kind ();
+  if (flag_checking)
+    verify_range ();
+  return changed;
 }
 
-// Union the nonzero bits in R into THIS.
+// Union the nonzero bits in R into THIS and normalize the range.
+// Return TRUE if the union changed anything.
 
 bool
 irange::union_nonzero_bits (const irange &r)
 {
   gcc_checking_assert (!undefined_p () && !r.undefined_p ());
 
-  if (m_nonzero_mask || r.m_nonzero_mask)
+  bool changed = false;
+  if (m_nonzero_mask != r.m_nonzero_mask)
     {
-      wide_int nz = wi::bit_or (get_nonzero_bits (),
-				r.get_nonzero_bits ());
-      set_nonzero_bits (nz);
-      return true;
+      m_nonzero_mask = get_nonzero_bits () | r.get_nonzero_bits ();
+      // No need to call set_range_from_nonzero_bits, because we'll
+      // never narrow the range.  Besides, it would cause endless
+      // recursion because of the union_ in
+      // set_range_from_nonzero_bits.
+      changed = true;
     }
-  return false;
+  normalize_kind ();
+  if (flag_checking)
+    verify_range ();
+  return changed;
 }
 
 void
@@ -3605,13 +3578,6 @@  range_tests_nonzero_bits ()
   r0.union_ (r1);
   ASSERT_TRUE (r0.get_nonzero_bits () == 0xff);
 
-  // Union where the mask of nonzero bits is implicit from the range.
-  r0.set_varying (integer_type_node);
-  r0.set_nonzero_bits (0xf00);
-  r1.set_zero (integer_type_node); // nonzero mask is implicitly 0
-  r0.union_ (r1);
-  ASSERT_TRUE (r0.get_nonzero_bits () == 0xf00);
-
   // Intersect of nonzero bits.
   r0.set (INT (0), INT (255));
   r0.set_nonzero_bits (0xfe);
diff --git a/gcc/value-range.h b/gcc/value-range.h
index 556e31aece1..d1663620444 100644
--- a/gcc/value-range.h
+++ b/gcc/value-range.h
@@ -207,14 +207,15 @@  private:
 
   void irange_set_1bit_anti_range (tree, tree);
   bool varying_compatible_p () const;
-  void set_nonzero_bits (tree mask);
   bool intersect_nonzero_bits (const irange &r);
   bool union_nonzero_bits (const irange &r);
+  wide_int get_nonzero_bits_from_range () const;
+  bool set_range_from_nonzero_bits ();
 
   bool intersect (const wide_int& lb, const wide_int& ub);
   unsigned char m_num_ranges;
   unsigned char m_max_ranges;
-  tree m_nonzero_mask;
+  wide_int m_nonzero_mask;
   tree *m_base;
 };
 
@@ -682,10 +683,11 @@  irange::varying_compatible_p () const
   if (INTEGRAL_TYPE_P (t))
     return (wi::to_wide (l) == wi::min_value (prec, sign)
 	    && wi::to_wide (u) == wi::max_value (prec, sign)
-	    && !m_nonzero_mask);
+	    && m_nonzero_mask == -1);
   if (POINTER_TYPE_P (t))
     return (wi::to_wide (l) == 0
-	    && wi::to_wide (u) == wi::max_value (prec, sign));
+	    && wi::to_wide (u) == wi::max_value (prec, sign)
+	    && m_nonzero_mask == -1);
   return true;
 }
 
@@ -752,8 +754,6 @@  gt_ggc_mx (irange *x)
       gt_ggc_mx (x->m_base[i * 2]);
       gt_ggc_mx (x->m_base[i * 2 + 1]);
     }
-  if (x->m_nonzero_mask)
-    gt_ggc_mx (x->m_nonzero_mask);
 }
 
 inline void
@@ -764,8 +764,6 @@  gt_pch_nx (irange *x)
       gt_pch_nx (x->m_base[i * 2]);
       gt_pch_nx (x->m_base[i * 2 + 1]);
     }
-  if (x->m_nonzero_mask)
-    gt_pch_nx (x->m_nonzero_mask);
 }
 
 inline void
@@ -776,8 +774,6 @@  gt_pch_nx (irange *x, gt_pointer_operator op, void *cookie)
       op (&x->m_base[i * 2], NULL, cookie);
       op (&x->m_base[i * 2 + 1], NULL, cookie);
     }
-  if (x->m_nonzero_mask)
-    op (&x->m_nonzero_mask, NULL, cookie);
 }
 
 template<unsigned N>
@@ -872,7 +868,6 @@  irange::set_undefined ()
 {
   m_kind = VR_UNDEFINED;
   m_num_ranges = 0;
-  m_nonzero_mask = NULL;
 }
 
 inline void
@@ -880,7 +875,11 @@  irange::set_varying (tree type)
 {
   m_kind = VR_VARYING;
   m_num_ranges = 1;
-  m_nonzero_mask = NULL;
+
+  if (type == error_mark_node)
+    m_nonzero_mask = wi::shwi (-1, 1);
+  else
+    m_nonzero_mask = wi::shwi (-1, TYPE_PRECISION (type));
 
   if (INTEGRAL_TYPE_P (type))
     {
@@ -1002,8 +1001,6 @@  irange::normalize_kind ()
 	m_kind = VR_VARYING;
       else if (m_kind == VR_ANTI_RANGE)
 	set_undefined ();
-      else
-	gcc_unreachable ();
     }
 }