RISC-V: Postpone full available optimization [VSETVL PASS]
Checks
Commit Message
Fix VSETVL BUG that AVL is polluted
.L15:
li a3,9
lui a4,%hi(s)
sw a3,%lo(j)(t2)
sh a5,%lo(s)(a4) <--a4 is hold the address of s
beq t0,zero,.L42
sw t5,8(t4)
vsetvli zero,a4,e8,m8,ta,ma <<--- a4 as avl
Actually, this vsetvl is redundant.
The root cause we include full available optimization in LCM local data computation.
full available optimization should be after LCM computation.
PR target/112929
PR target/112988
gcc/ChangeLog:
* config/riscv/riscv-vsetvl.cc (pre_vsetvl::compute_lcm_local_properties): Remove full available.
(pre_vsetvl::pre_global_vsetvl_info): Add full available optimization.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/vsetvl/pr112929.c: New test.
* gcc.target/riscv/rvv/vsetvl/pr112988.c: New test.
---
gcc/config/riscv/riscv-vsetvl.cc | 14 +++-
.../gcc.target/riscv/rvv/vsetvl/pr112929.c | 58 ++++++++++++++++
.../gcc.target/riscv/rvv/vsetvl/pr112988.c | 69 +++++++++++++++++++
3 files changed, 139 insertions(+), 2 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112929.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112988.c
Comments
Hi Juzhe,
in general looks OK to me.
Just a question for understanding:
> - if (header_info.valid_p ()
> - && (anticipated_exp_p (header_info) || block_info.full_available))
Why is full_available true if we cannot use it?
> +/* { dg-do compile } */
It would be nice if we could make this a run test as well.
Regards
Robin
> I don”t choose to run since I didn”t have issue run on my local
> simulator no matter qemu or spike.
Yes it was flaky. That's kind of expected with the out-of-bounds
writes we did. They can depend on runtime environment and other
factors. Of course it's a bit counterintuitive to add a (before)
passing test but, with the proper comment, if it ever FAILed at some
point in the future we'd have a point of reference.
Regards
Robin
> Do you mean add some comments in tests?
I meant add it as a run test as well and comment that the test
has caused out-of-bounds writes before and passed by the time of
adding it (or so) and is kept regardless.
Regards
Robin
@@ -2723,8 +2723,7 @@ pre_vsetvl::compute_lcm_local_properties ()
vsetvl_info &header_info = block_info.get_entry_info ();
vsetvl_info &footer_info = block_info.get_exit_info ();
- if (header_info.valid_p ()
- && (anticipated_exp_p (header_info) || block_info.full_available))
+ if (header_info.valid_p () && anticipated_exp_p (header_info))
bitmap_set_bit (m_antloc[bb_index],
get_expr_index (m_exprs, header_info));
@@ -3224,6 +3223,17 @@ pre_vsetvl::pre_global_vsetvl_info ()
info.set_delete ();
}
+ /* Remove vsetvl infos if all precessors are available to the block. */
+ for (const bb_info *bb : crtl->ssa->bbs ())
+ {
+ vsetvl_block_info &block_info = get_block_info (bb);
+ if (block_info.empty_p () || !block_info.full_available)
+ continue;
+
+ vsetvl_info &info = block_info.get_entry_info ();
+ info.set_delete ();
+ }
+
for (const bb_info *bb : crtl->ssa->bbs ())
{
vsetvl_block_info &block_info = get_block_info (bb);
new file mode 100644
@@ -0,0 +1,58 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+int printf(char *, ...);
+int a, l, i, p, q, t, n, o;
+int *volatile c;
+static int j;
+static struct pack_1_struct d;
+long e;
+char m = 5;
+short s;
+
+#pragma pack(1)
+struct pack_1_struct {
+ long c;
+ int d;
+ int e;
+ int f;
+ int g;
+ int h;
+ int i;
+} h, r = {1}, *f = &h, *volatile g;
+
+void add_em_up(int count, ...) {
+ __builtin_va_list ap;
+ __builtin_va_start(ap, count);
+ __builtin_va_end(ap);
+}
+
+int main() {
+ int u;
+ j = 0;
+
+ for (; j < 9; ++j) {
+ u = ++t ? a : 0;
+ if (u) {
+ int *v = &d.d;
+ *v = g || e;
+ *c = 0;
+ *f = h;
+ }
+ s = l && c;
+ o = i;
+ d.f || (p = 0);
+ q |= n;
+ }
+
+ r = *f;
+
+ add_em_up(1, 1);
+
+ printf("%d\n", m);
+}
+
+/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
+/* { dg-final { scan-assembler-not {vsetivli} } } */
+/* { dg-final { scan-assembler-times {vsetvli\tzero,\s*[a-x0-9]+,\s*e8,\s*m8,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
+/* { dg-final { scan-assembler-times {li\t[a-x0-9]+,\s*32} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
new file mode 100644
@@ -0,0 +1,69 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+int a = 0;
+int p, q, r, x = 230;
+short d;
+int e[256];
+static struct f w;
+int *c = &r;
+
+short y(short z) {
+ return z * d;
+}
+
+#pragma pack(1)
+struct f {
+ int g;
+ short h;
+ int j;
+ char k;
+ char l;
+ long m;
+ long n;
+ int o;
+} s = {1}, v, t, *u = &v, *b = &s;
+
+void add_em_up(int count, ...) {
+ __builtin_va_list ap;
+ __builtin_va_start(ap, count);
+ __builtin_va_end(ap);
+}
+
+int main() {
+ int i = 0;
+ for (; i < 256; i++)
+ e[i] = i;
+
+ p = 0;
+ for (; p <= 0; p++) {
+ *c = 4;
+ *u = t;
+ x |= y(6 >= q);
+ }
+
+ *b = w;
+
+ add_em_up(1, 1);
+
+ if (a != 0)
+ return 1;
+ if (q != 0)
+ return 2;
+ if (p != 1)
+ return 3;
+ if (r != 4)
+ return 4;
+ if (x != 0xE6)
+ return 5;
+ if (d != 0)
+ return 6;
+
+ return 0;
+}
+
+/* { dg-final { scan-assembler-times {vsetvli} 1 } } */
+/* { dg-final { scan-assembler-times {vsetivli} 1 } } */
+/* { dg-final { scan-assembler-times {vsetivli\tzero,\s*4,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\tzero,\s*[a-x0-9]+,\s*e8,\s*m8,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times {li\t[a-x0-9]+,\s*32} 1 } } */