Don't try bswap + rotate when TYPE_PRECISION(n->type) > n->range.
Checks
Commit Message
For the testcase in the PR, we have
br64 = br;
br64 = ((br64 << 16) & 0x000000ff00000000ull) | (br64 & 0x0000ff00ull);
n->n: 0x3000000200.
n->range: 32.
n->type: uint64.
The original code assumes n->range is same as TYPE PRECISION(n->type),
and tries to rotate the mask from 0x300000200 -> 0x20300 which is
incorrect. The patch fixed this bug by not trying bswap + rotate when
TYPE_PRECISION(n->type) is not equal to n->range.
Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ok for trunk?
gcc/ChangeLog:
PR tree-optimization/110067
* gimple-ssa-store-merging.cc (find_bswap_or_nop): Don't try
bswap + rotate when TYPE_PRECISION(n->type) > n->range.
gcc/testsuite/ChangeLog:
* gcc.target/i386/pr110067.c: New test.
---
gcc/gimple-ssa-store-merging.cc | 3 +
gcc/testsuite/gcc.target/i386/pr110067.c | 77 ++++++++++++++++++++++++
2 files changed, 80 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/i386/pr110067.c
Comments
On Thu, Jun 1, 2023 at 9:51 AM liuhongt via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> For the testcase in the PR, we have
>
> br64 = br;
> br64 = ((br64 << 16) & 0x000000ff00000000ull) | (br64 & 0x0000ff00ull);
>
> n->n: 0x3000000200.
> n->range: 32.
> n->type: uint64.
>
> The original code assumes n->range is same as TYPE PRECISION(n->type),
> and tries to rotate the mask from 0x300000200 -> 0x20300 which is
> incorrect. The patch fixed this bug by not trying bswap + rotate when
> TYPE_PRECISION(n->type) is not equal to n->range.
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> Ok for trunk?
OK.
> gcc/ChangeLog:
>
> PR tree-optimization/110067
> * gimple-ssa-store-merging.cc (find_bswap_or_nop): Don't try
> bswap + rotate when TYPE_PRECISION(n->type) > n->range.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/pr110067.c: New test.
> ---
> gcc/gimple-ssa-store-merging.cc | 3 +
> gcc/testsuite/gcc.target/i386/pr110067.c | 77 ++++++++++++++++++++++++
> 2 files changed, 80 insertions(+)
> create mode 100644 gcc/testsuite/gcc.target/i386/pr110067.c
>
> diff --git a/gcc/gimple-ssa-store-merging.cc b/gcc/gimple-ssa-store-merging.cc
> index 9cb574fa315..401496a9231 100644
> --- a/gcc/gimple-ssa-store-merging.cc
> +++ b/gcc/gimple-ssa-store-merging.cc
> @@ -1029,6 +1029,9 @@ find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap,
> /* TODO, handle cast64_to_32 and big/litte_endian memory
> source when rsize < range. */
> if (n->range == orig_range
> + /* There're case like 0x300000200 for uint32->uint64 cast,
> + Don't hanlde this. */
> + && n->range == TYPE_PRECISION (n->type)
> && ((orig_range == 32
> && optab_handler (rotl_optab, SImode) != CODE_FOR_nothing)
> || (orig_range == 64
> diff --git a/gcc/testsuite/gcc.target/i386/pr110067.c b/gcc/testsuite/gcc.target/i386/pr110067.c
> new file mode 100644
> index 00000000000..c4208811628
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr110067.c
> @@ -0,0 +1,77 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2 -fno-strict-aliasing" } */
> +
> +#include <stdint.h>
> +#define force_inline __inline__ __attribute__ ((__always_inline__))
> +
> +__attribute__((noipa))
> +static void
> +fetch_pixel_no_alpha_32_bug (void *out)
> +{
> + uint32_t *ret = out;
> + *ret = 0xff499baf;
> +}
> +
> +static force_inline uint32_t
> +bilinear_interpolation_local (uint32_t tl, uint32_t tr,
> + uint32_t bl, uint32_t br,
> + int distx, int disty)
> +{
> + uint64_t distxy, distxiy, distixy, distixiy;
> + uint64_t tl64, tr64, bl64, br64;
> + uint64_t f, r;
> +
> + distx <<= 1;
> + disty <<= 1;
> +
> + distxy = distx * disty;
> + distxiy = distx * (256 - disty);
> + distixy = (256 - distx) * disty;
> + distixiy = (256 - distx) * (256 - disty);
> +
> + /* Alpha and Blue */
> + tl64 = tl & 0xff0000ff;
> + tr64 = tr & 0xff0000ff;
> + bl64 = bl & 0xff0000ff;
> + br64 = br & 0xff0000ff;
> +
> + f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
> + r = f & 0x0000ff0000ff0000ull;
> +
> + /* Red and Green */
> + tl64 = tl;
> + tl64 = ((tl64 << 16) & 0x000000ff00000000ull) | (tl64 & 0x0000ff00ull);
> +
> + tr64 = tr;
> + tr64 = ((tr64 << 16) & 0x000000ff00000000ull) | (tr64 & 0x0000ff00ull);
> +
> + bl64 = bl;
> + bl64 = ((bl64 << 16) & 0x000000ff00000000ull) | (bl64 & 0x0000ff00ull);
> +
> + br64 = br;
> + br64 = ((br64 << 16) & 0x000000ff00000000ull) | (br64 & 0x0000ff00ull);
> +
> + f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
> + r |= ((f >> 16) & 0x000000ff00000000ull) | (f & 0xff000000ull);
> +
> + return (uint32_t)(r >> 16);
> +}
> +
> +__attribute__((noipa))
> +static void
> +bits_image_fetch_pixel_bilinear_32_bug (void *out)
> +{
> + uint32_t br;
> + uint32_t *ret = out;
> +
> + fetch_pixel_no_alpha_32_bug (&br);
> + *ret = bilinear_interpolation_local (0, 0, 0, br, 0x41, 0x42);
> +}
> +
> +int main() {
> + uint32_t r;
> + bits_image_fetch_pixel_bilinear_32_bug (&r);
> + if (r != 0x4213282d)
> + __builtin_abort ();
> + return 0;
> +}
> --
> 2.39.1.388.g2fc9e9ca3c
>
@@ -1029,6 +1029,9 @@ find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap,
/* TODO, handle cast64_to_32 and big/litte_endian memory
source when rsize < range. */
if (n->range == orig_range
+ /* There're case like 0x300000200 for uint32->uint64 cast,
+ Don't hanlde this. */
+ && n->range == TYPE_PRECISION (n->type)
&& ((orig_range == 32
&& optab_handler (rotl_optab, SImode) != CODE_FOR_nothing)
|| (orig_range == 64
new file mode 100644
@@ -0,0 +1,77 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing" } */
+
+#include <stdint.h>
+#define force_inline __inline__ __attribute__ ((__always_inline__))
+
+__attribute__((noipa))
+static void
+fetch_pixel_no_alpha_32_bug (void *out)
+{
+ uint32_t *ret = out;
+ *ret = 0xff499baf;
+}
+
+static force_inline uint32_t
+bilinear_interpolation_local (uint32_t tl, uint32_t tr,
+ uint32_t bl, uint32_t br,
+ int distx, int disty)
+{
+ uint64_t distxy, distxiy, distixy, distixiy;
+ uint64_t tl64, tr64, bl64, br64;
+ uint64_t f, r;
+
+ distx <<= 1;
+ disty <<= 1;
+
+ distxy = distx * disty;
+ distxiy = distx * (256 - disty);
+ distixy = (256 - distx) * disty;
+ distixiy = (256 - distx) * (256 - disty);
+
+ /* Alpha and Blue */
+ tl64 = tl & 0xff0000ff;
+ tr64 = tr & 0xff0000ff;
+ bl64 = bl & 0xff0000ff;
+ br64 = br & 0xff0000ff;
+
+ f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
+ r = f & 0x0000ff0000ff0000ull;
+
+ /* Red and Green */
+ tl64 = tl;
+ tl64 = ((tl64 << 16) & 0x000000ff00000000ull) | (tl64 & 0x0000ff00ull);
+
+ tr64 = tr;
+ tr64 = ((tr64 << 16) & 0x000000ff00000000ull) | (tr64 & 0x0000ff00ull);
+
+ bl64 = bl;
+ bl64 = ((bl64 << 16) & 0x000000ff00000000ull) | (bl64 & 0x0000ff00ull);
+
+ br64 = br;
+ br64 = ((br64 << 16) & 0x000000ff00000000ull) | (br64 & 0x0000ff00ull);
+
+ f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
+ r |= ((f >> 16) & 0x000000ff00000000ull) | (f & 0xff000000ull);
+
+ return (uint32_t)(r >> 16);
+}
+
+__attribute__((noipa))
+static void
+bits_image_fetch_pixel_bilinear_32_bug (void *out)
+{
+ uint32_t br;
+ uint32_t *ret = out;
+
+ fetch_pixel_no_alpha_32_bug (&br);
+ *ret = bilinear_interpolation_local (0, 0, 0, br, 0x41, 0x42);
+}
+
+int main() {
+ uint32_t r;
+ bits_image_fetch_pixel_bilinear_32_bug (&r);
+ if (r != 0x4213282d)
+ __builtin_abort ();
+ return 0;
+}