new file mode 100644
@@ -0,0 +1,38 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+extern void abort(void);
+
+struct s { int i : 31; };
+
+#define ELT0 {0}
+#define ELT1 {1}
+#define ELT2 {2}
+#define ELT3 {3}
+#define RES 48
+struct s A[N]
+ = { ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3};
+
+int f(struct s *ptr, unsigned n) {
+ int res = 0;
+ for (int i = 0; i < n; ++i)
+ res += ptr[i].i;
+ return res;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ if (f(&A[0], N) != RES)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
new file mode 100644
@@ -0,0 +1,42 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+extern void abort(void);
+
+struct s {
+ unsigned i : 31;
+ char a : 4;
+};
+
+#define N 32
+#define ELT0 {0x7FFFFFFFUL, 0}
+#define ELT1 {0x7FFFFFFFUL, 1}
+#define ELT2 {0x7FFFFFFFUL, 2}
+#define ELT3 {0x7FFFFFFFUL, 3}
+#define RES 48
+struct s A[N]
+ = { ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3};
+
+int f(struct s *ptr, unsigned n) {
+ int res = 0;
+ for (int i = 0; i < n; ++i)
+ res += ptr[i].a;
+ return res;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ if (f(&A[0], N) != RES)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
new file mode 100644
@@ -0,0 +1,43 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+#include <stdbool.h>
+
+extern void abort(void);
+
+typedef struct {
+ int c;
+ int b;
+ bool a : 1;
+} struct_t;
+
+#define N 16
+#define ELT_F { 0xFFFFFFFF, 0xFFFFFFFF, 0 }
+#define ELT_T { 0xFFFFFFFF, 0xFFFFFFFF, 1 }
+
+struct_t vect_false[N] = { ELT_F, ELT_F, ELT_F, ELT_F, ELT_F, ELT_F, ELT_F, ELT_F,
+ ELT_F, ELT_F, ELT_F, ELT_F, ELT_F, ELT_F, ELT_F, ELT_F };
+struct_t vect_true[N] = { ELT_F, ELT_F, ELT_T, ELT_F, ELT_F, ELT_F, ELT_F, ELT_F,
+ ELT_F, ELT_F, ELT_T, ELT_F, ELT_F, ELT_F, ELT_F, ELT_F };
+int main (void)
+{
+ unsigned ret = 0;
+ for (unsigned i = 0; i < N; i++)
+ {
+ ret |= vect_false[i].a;
+ }
+ if (ret)
+ abort ();
+
+ for (unsigned i = 0; i < N; i++)
+ {
+ ret |= vect_true[i].a;
+ }
+ if (!ret)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
new file mode 100644
@@ -0,0 +1,44 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+extern void abort(void);
+
+struct s {
+ unsigned i : 31;
+ char x : 2;
+ char a : 4;
+};
+
+#define N 32
+#define ELT0 {0x7FFFFFFFUL, 3, 0}
+#define ELT1 {0x7FFFFFFFUL, 3, 1}
+#define ELT2 {0x7FFFFFFFUL, 3, 2}
+#define ELT3 {0x7FFFFFFFUL, 3, 3}
+#define RES 48
+struct s A[N]
+ = { ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3};
+
+int f(struct s *ptr, unsigned n) {
+ int res = 0;
+ for (int i = 0; i < n; ++i)
+ res += ptr[i].a;
+ return res;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ if (f(&A[0], N) != RES)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+
@@ -1137,7 +1137,22 @@ dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
gcc_assert (base != NULL_TREE);
poly_int64 pbytepos;
- if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
+ /* If we are dealing with a bit-field reference then the PBITPOS may not be
+ a multiple of BITS_PER_UNIT. Set PBYTEPOS to 0 if PBITPOS is smaller than
+ a byte or to the largest number of bytes smaller than BITS_PER_UNIT *
+ PBITPOS. */
+ if (TREE_CODE (ref) == COMPONENT_REF
+ && DECL_BIT_FIELD (TREE_OPERAND (ref, 1)))
+ {
+ if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
+ {
+ if (known_lt (pbitpos, BITS_PER_UNIT))
+ pbytepos = 0;
+ else
+ can_div_trunc_p (pbitpos, BITS_PER_UNIT, &pbytepos);
+ }
+ }
+ else if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
return opt_result::failure_at (stmt,
"failed: bit offset alignment.\n");
@@ -4016,7 +4016,18 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
if (reversep)
return false;
- poly_int64 pbytepos = exact_div (pbitpos, BITS_PER_UNIT);
+ /* If we are dealing with a bit-field reference then the PBITPOS may not be
+ a multiple of BITS_PER_UNIT. Set PBYTEPOS to 0 if PBITPOS is smaller than
+ a byte or to the largest number of bytes smaller than BITS_PER_UNIT *
+ PBITPOS. */
+ poly_int64 pbytepos;
+ if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
+ {
+ if (known_lt (pbitpos, BITS_PER_UNIT))
+ pbytepos = 0;
+ else
+ can_div_trunc_p (pbitpos, BITS_PER_UNIT, &pbytepos);
+ }
if (TREE_CODE (base) == MEM_REF)
{
@@ -4296,7 +4307,8 @@ vect_find_stmt_data_reference (loop_p loop, gimple *stmt,
}
if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
- && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
+ && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1))
+ && !DR_IS_READ (dr))
{
free_data_ref (dr);
return opt_result::failure_at (stmt,
@@ -35,6 +35,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree-eh.h"
#include "gimplify.h"
#include "gimple-iterator.h"
+#include "gimplify-me.h"
#include "cfgloop.h"
#include "tree-vectorizer.h"
#include "dumpfile.h"
@@ -1828,6 +1829,157 @@ vect_recog_widen_sum_pattern (vec_info *vinfo,
return pattern_stmt;
}
+static gimple *
+vect_recog_bit_field_decl (vec_info* /* vinfo*/, stmt_vec_info last_stmt_info,
+ tree *type_out)
+{
+ gassign *decl_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
+
+ if (!decl_stmt)
+ return NULL;
+
+ data_reference *dr = STMT_VINFO_DATA_REF (last_stmt_info);
+ if (!dr)
+ return NULL;
+
+ if (TREE_CODE (DR_REF (dr)) != COMPONENT_REF
+ || !DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1))
+ || !DR_IS_READ (dr))
+ return NULL;
+
+ tree type = TREE_TYPE (gimple_get_lhs (decl_stmt));
+ tree record = TREE_OPERAND (DR_REF (dr), 0);
+ tree decl_field = TREE_OPERAND (DR_REF (dr), 1);
+ tree offset = fold_build2 (PLUS_EXPR, sizetype,
+ DECL_FIELD_OFFSET (decl_field),
+ DECL_FIELD_BIT_OFFSET (decl_field));
+ tree bf_ref = fold_build3 (BIT_FIELD_REF, type,
+ record,
+ build_int_cst (sizetype,
+ TYPE_PRECISION (type)),
+ offset);
+ gimple *pattern_stmt
+ = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL), bf_ref);
+
+ *type_out = STMT_VINFO_VECTYPE (last_stmt_info);
+
+ vect_pattern_detected ("bit_field_decl pattern", last_stmt_info->stmt);
+
+ return pattern_stmt;
+}
+
+static gimple *
+vect_recog_bit_field_ref (vec_info *vinfo, stmt_vec_info last_stmt_info,
+ tree *type_out)
+{
+ gassign *nop_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
+ if (!nop_stmt)
+ return NULL;
+
+ if (gimple_assign_rhs_code (nop_stmt) != NOP_EXPR)
+ return NULL;
+
+ if (TREE_CODE (gimple_assign_rhs1 (nop_stmt)) != SSA_NAME)
+ return NULL;
+
+ gassign *bf_stmt
+ = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (gimple_assign_rhs1 (nop_stmt)));
+
+ if (!bf_stmt)
+ return NULL;
+
+ stmt_vec_info bf_stmt_info = vinfo->lookup_stmt (bf_stmt);
+ if (gimple_assign_rhs_code (bf_stmt) != BIT_FIELD_REF
+ && (!bf_stmt_info || !STMT_VINFO_IN_PATTERN_P (bf_stmt_info)))
+ return NULL;
+
+ if (gimple_assign_rhs_code (bf_stmt) != BIT_FIELD_REF)
+ {
+ if (!STMT_VINFO_RELATED_STMT (bf_stmt_info))
+ return NULL;
+ bf_stmt
+ = dyn_cast <gassign *> (STMT_VINFO_RELATED_STMT (bf_stmt_info)->stmt);
+ }
+
+ if (!bf_stmt || gimple_assign_rhs_code (bf_stmt) != BIT_FIELD_REF)
+ return NULL;
+
+ /* This is weird, why is rhs1 still a BIT_FIELD_REF??. */
+ tree bf_ref = gimple_assign_rhs1 (bf_stmt);
+
+ tree record = TREE_OPERAND (bf_ref, 0);
+ tree size = TREE_OPERAND (bf_ref, 1);
+ tree offset = TREE_OPERAND (bf_ref, 2);
+
+ tree bf_type = TREE_TYPE (bf_ref);
+ unsigned HOST_WIDE_INT bf_precision = TYPE_PRECISION (bf_type);
+ unsigned HOST_WIDE_INT load_size
+ = CEIL (bf_precision, BITS_PER_UNIT) * BITS_PER_UNIT;
+
+ if (bf_precision == load_size)
+ return NULL;
+
+ tree addr = build1 (ADDR_EXPR, build_pointer_type (TREE_TYPE (record)),
+ record);
+
+ addr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr), addr,
+ fold_convert (sizetype, offset));
+
+ tree load_type = build_nonstandard_integer_type (load_size, 1);
+ tree vectype = get_vectype_for_scalar_type (vinfo, load_type);
+ tree lhs = vect_recog_temp_ssa_var (load_type, NULL);
+
+ data_reference *dr = STMT_VINFO_DATA_REF (bf_stmt_info);
+ /* TODO: Fix this, rather than using the DR_REF here I'd like to reconstruct
+ the desired load, rather than rely on the 'misguided?' behaviour of the
+ vectorizer to vectorize these as normal loads. However when I tried it
+ lead to the vectorizer think it needed to vectorize the address
+ computation too. */
+ gimple *pattern_stmt = gimple_build_assign (lhs, DR_REF (dr));
+ gimple *load_stmt = pattern_stmt;
+
+ tree ret_type = TREE_TYPE (gimple_get_lhs (nop_stmt));
+ if (!useless_type_conversion_p (TREE_TYPE (lhs), ret_type))
+ {
+ append_pattern_def_seq (vinfo, bf_stmt_info, pattern_stmt,
+ vectype);
+ pattern_stmt
+ = gimple_build_assign (vect_recog_temp_ssa_var (ret_type, NULL),
+ NOP_EXPR, lhs);
+ lhs = gimple_get_lhs (pattern_stmt);
+ vectype = STMT_VINFO_VECTYPE (last_stmt_info);
+ }
+ vect_mark_pattern_stmts (vinfo, bf_stmt_info, pattern_stmt, vectype);
+
+ stmt_vec_info load_stmt_info = vinfo->lookup_stmt (load_stmt);
+ vinfo->move_dr (load_stmt_info, bf_stmt_info);
+
+ unsigned HOST_WIDE_INT offset_i = tree_to_uhwi (offset);
+ unsigned HOST_WIDE_INT shift_n = offset_i % load_size;
+
+ if (shift_n)
+ {
+ pattern_stmt
+ = gimple_build_assign (vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL),
+ RSHIFT_EXPR, lhs,
+ build_int_cst (integer_type_node, shift_n));
+ append_pattern_def_seq (vinfo, last_stmt_info, pattern_stmt);
+ lhs = gimple_get_lhs (pattern_stmt);
+ }
+
+ unsigned HOST_WIDE_INT mask_i = tree_to_uhwi (size);
+ tree mask = build_int_cst (TREE_TYPE (lhs), (1ULL << mask_i) - 1);
+ pattern_stmt
+ = gimple_build_assign (vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL),
+ BIT_AND_EXPR, lhs, mask);
+
+ *type_out = STMT_VINFO_VECTYPE (last_stmt_info);
+ vect_pattern_detected ("bit_field_ref pattern", last_stmt_info->stmt);
+
+ return pattern_stmt;
+}
+
+
/* Recognize cases in which an operation is performed in one type WTYPE
but could be done more efficiently in a narrower type NTYPE. For example,
if we have:
@@ -5623,6 +5775,8 @@ struct vect_recog_func
taken which means usually the more complex one needs to preceed the
less comples onex (widen_sum only after dot_prod or sad for example). */
static vect_recog_func vect_vect_recog_func_ptrs[] = {
+ { vect_recog_bit_field_decl, "bit_field_decl" },
+ { vect_recog_bit_field_ref, "bitfield_ref" },
{ vect_recog_over_widening_pattern, "over_widening" },
/* Must come after over_widening, which narrows the shift as much as
possible beforehand. */
@@ -4933,19 +4933,6 @@ vectorizable_conversion (vec_info *vinfo,
&& SCALAR_FLOAT_TYPE_P (rhs_type))))
return false;
- if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
- && ((INTEGRAL_TYPE_P (lhs_type)
- && !type_has_mode_precision_p (lhs_type))
- || (INTEGRAL_TYPE_P (rhs_type)
- && !type_has_mode_precision_p (rhs_type))))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "type conversion to/from bit-precision unsupported."
- "\n");
- return false;
- }
-
if (op_type == binary_op)
{
gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR