middle-end Recognize more conditional comparisons idioms.

Message ID patch-16259-tamar@arm.com
State New, archived
Headers
Series middle-end Recognize more conditional comparisons idioms. |

Commit Message

Tamar Christina Sept. 23, 2022, 9:18 a.m. UTC
  Hi All,

GCC currently recognizes some of these for signed but not unsigned types.
It also has trouble dealing with casts in between because these are handled
by the fold machinery.

This moves the pattern detection to match.pd instead.

We fold e.g.:

uint32_t min1_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
  uint32_t result;
  uint32_t m = (a >= b) - 1;
  result = (c & m) | (d & ~m);
  return result;
}

into a >= b ? c : d for all integral types.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

	* match.pd: New cond select pattern.

gcc/testsuite/ChangeLog:

	* gcc.dg/select_cond_1.c: New test.
	* gcc.dg/select_cond_2.c: New test.
	* gcc.dg/select_cond_3.c: New test.

--- inline copy of patch -- 
diff --git a/gcc/match.pd b/gcc/match.pd
index 39da61bf117a6eb2924fc8a6473fb37ddadd60e9..7b8f50410acfd0afafc5606e972cfc4e125d3a5d 100644




--
diff --git a/gcc/match.pd b/gcc/match.pd
index 39da61bf117a6eb2924fc8a6473fb37ddadd60e9..7b8f50410acfd0afafc5606e972cfc4e125d3a5d 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3577,6 +3577,25 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (cond (le @0 integer_zerop@1) (negate@2 @0) integer_zerop@1)
   (max @2 @1))
 
+/* (a & ((c `op` d) - 1)) | (b & ~((c `op` d) - 1)) ->  c `op` d ? a : b.  */
+(for op (simple_comparison)
+ (simplify
+  (bit_xor:c
+   (convert2? @0)
+   (bit_and:c
+    (convert2? (bit_xor:c @1 @0))
+    (convert3? (negate (convert? (op@4 @2 @3))))))
+  /* Alternative form, where some canonicalization were not done due to the
+     arguments being signed.  */
+  (if (INTEGRAL_TYPE_P (type) && tree_zero_one_valued_p (@4))
+   (convert:type (cond @4 @1 @0))))
+ (simplify
+  (bit_ior:c
+   (mult:c @0 (convert (convert2? (op@4 @2 @3))))
+   (bit_and:c @1 (convert (plus:c integer_minus_onep (convert (op@4 @2 @3))))))
+  (if (INTEGRAL_TYPE_P (type) && tree_zero_one_valued_p (@4))
+   (cond @4 @0 @1))))
+
 /* Simplifications of shift and rotates.  */
 
 (for rotate (lrotate rrotate)
diff --git a/gcc/testsuite/gcc.dg/select_cond_1.c b/gcc/testsuite/gcc.dg/select_cond_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..9eb9959baafe5fffeec24e4e3ae656f8fcfe943c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/select_cond_1.c
@@ -0,0 +1,97 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O2 -std=c99 -fdump-tree-optimized -save-temps" } */
+
+#include <stdint.h>
+
+__attribute__((noipa, noinline))
+uint32_t min1_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
+  uint32_t result;
+  uint32_t m = (a >= b) - 1;
+  result = (c & m) | (d & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t max1_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
+  uint32_t result;
+  uint32_t m = (a <= b) - 1;
+  result = (c & m) | (d & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t min2_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
+  uint32_t result;
+  uint32_t m = (a > b) - 1;
+  result = (c & m) | (d & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t max2_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
+  uint32_t result;
+  uint32_t m = (a < b) - 1;
+  result = (c & m) | (d & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t min3_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
+  uint32_t result;
+  uint32_t m = (a == b) - 1;
+  result = (c & m) | (d & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t max3_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
+  uint32_t result;
+  uint32_t m = (a != b) - 1;
+  result = (c & m) | (d & ~m);
+  return result;
+}
+
+/* { dg-final { scan-tree-dump-times {_[0-9]+ \? c_[0-9]+\(D\) : d_[0-9]+\(D\)} 6 "optimized" } } */
+
+extern void abort ();
+
+int main () {
+
+  if (min1_32u (3, 5, 7 , 8) != 7)
+    abort ();
+
+  if (max1_32u (3, 5, 7 , 8) != 8)
+    abort ();
+
+  if (min1_32u (5, 3, 7 , 8) != 8)
+    abort ();
+
+  if (max1_32u (5, 3, 7 , 8) != 7)
+    abort ();
+
+  if (min2_32u (3, 5, 7 , 8) != 7)
+    abort ();
+
+  if (max2_32u (3, 5, 7 , 8) != 8)
+    abort ();
+
+  if (min2_32u (5, 3, 7 , 8) != 8)
+    abort ();
+
+  if (max2_32u (5, 3, 7 , 8) != 7)
+    abort ();
+
+  if (min3_32u (3, 5, 7 , 8) != 7)
+    abort ();
+
+  if (max3_32u (3, 5, 7 , 8) != 8)
+    abort ();
+
+  if (min3_32u (5, 3, 7 , 8) != 7)
+    abort ();
+
+  if (max3_32u (5, 3, 7 , 8) != 8)
+    abort ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/select_cond_2.c b/gcc/testsuite/gcc.dg/select_cond_2.c
new file mode 100644
index 0000000000000000000000000000000000000000..623b2272ee7b4b00130d5e9fb8c781dbc5d4189e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/select_cond_2.c
@@ -0,0 +1,99 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O2 -std=c99 -fdump-tree-optimized -save-temps" } */
+
+#include <stdint.h>
+
+__attribute__((noipa, noinline))
+uint32_t min1_32u(uint32_t a, uint32_t b) {
+  uint32_t result;
+  uint32_t m = (a >= b) - 1;
+  result = (a & m) | (b & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t max1_32u(uint32_t a, uint32_t b) {
+  uint32_t result;
+  uint32_t m = (a <= b) - 1;
+  result = (a & m) | (b & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t min2_32u(uint32_t a, uint32_t b) {
+  uint32_t result;
+  uint32_t m = (a > b) - 1;
+  result = (a & m) | (b & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t max2_32u(uint32_t a, uint32_t b) {
+  uint32_t result;
+  uint32_t m = (a < b) - 1;
+  result = (a & m) | (b & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t min3_32u(uint32_t a, uint32_t b) {
+  uint32_t result;
+  uint32_t m = (a == b) - 1;
+  result = (a & m) | (b & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t max3_32u(uint32_t a, uint32_t b) {
+  uint32_t result;
+  uint32_t m = (a != b) - 1;
+  result = (a & m) | (b & ~m);
+  return result;
+}
+
+/* { dg-final { scan-tree-dump-not {_[0-9]+ \? c_[0-9]+\(D\) : d_[0-9]+\(D\)} "optimized" } } */
+/* { dg-final { scan-tree-dump-times {MIN_EXPR} 2 "optimized" } } */
+/* { dg-final { scan-tree-dump-times {MAX_EXPR} 2 "optimized" } } */
+
+extern void abort ();
+
+int main () {
+
+  if (min1_32u (7 , 8) != 7)
+    abort ();
+
+  if (max1_32u (7 , 8) != 8)
+    abort ();
+
+  if (min1_32u (7 , 8) != 7)
+    abort ();
+
+  if (max1_32u (7, 8) != 8)
+    abort ();
+
+  if (min2_32u (7 , 8) != 7)
+    abort ();
+
+  if (max2_32u (7 , 8) != 8)
+    abort ();
+
+  if (min2_32u (7 , 8) != 7)
+    abort ();
+
+  if (max2_32u (7 , 8) != 8)
+    abort ();
+
+  if (min3_32u (7 , 8) != 7)
+    abort ();
+
+  if (max3_32u (7 , 8) != 8)
+    abort ();
+
+  if (min3_32u (7 , 8) != 7)
+    abort ();
+
+  if (max3_32u (7 , 8) != 8)
+    abort ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/select_cond_3.c b/gcc/testsuite/gcc.dg/select_cond_3.c
new file mode 100644
index 0000000000000000000000000000000000000000..754e73b121ce360564481b1fa7e1dd4fb27642a3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/select_cond_3.c
@@ -0,0 +1,101 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O2 -std=c99 -fdump-tree-optimized -save-temps" } */
+
+#include <stdint.h>
+
+__attribute__((noipa, noinline))
+int32_t min1_32u1(int32_t a, int32_t b, int32_t c, int32_t d) {
+  uint32_t result;
+  uint32_t m = (a >= b) - 1;
+  result = (c & m) | (d & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t min1_32u2(uint32_t a, uint32_t b, int32_t c, int32_t d) {
+  uint32_t result;
+  uint32_t m = (a >= b) - 1;
+  result = (c & m) | (d & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t min1_32u3(uint32_t a, uint32_t b, uint32_t c, int32_t d) {
+  uint32_t result;
+  uint32_t m = (a >= b) - 1;
+  result = (c & m) | (d & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t min1_32u4(uint32_t a, uint32_t b, int32_t c, uint32_t d) {
+  uint32_t result;
+  uint32_t m = (a >= b) - 1;
+  result = (c & m) | (d & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t min1_32u5(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
+  uint32_t result;
+  uint32_t m = (a >= b) - 1;
+  result = (c & m) | (d & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t min1_32u6(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
+  uint32_t result;
+  int32_t m = (a >= b) - 1;
+  result = (c & m) | (d & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t min1_32u7(uint8_t a, uint16_t b, uint32_t c, uint32_t d) {
+  uint32_t result;
+  int32_t m = (a >= b) - 1;
+  result = (c & m) | (d & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t min1_32u8(uint32_t a, uint64_t b, uint32_t c, uint32_t d) {
+  uint32_t result;
+  int32_t m = (a >= b) - 1;
+  result = (c & m) | (d & ~m);
+  return result;
+}
+
+/* { dg-final { scan-tree-dump-times {_[0-9]+ \? c[^ ]+ : d[^ ]+;} 8 "optimized" } } */
+
+extern void abort ();
+
+int main () {
+
+  if (min1_32u1 (3, 5, 7 , 8) != 7)
+    abort ();
+
+  if (min1_32u2 (3, 5, 7 , 8) != 7)
+    abort ();
+
+  if (min1_32u3 (3, 5, 7 , 8) != 7)
+    abort ();
+
+  if (min1_32u4 (3, 5, 7 , 8) != 7)
+    abort ();
+
+  if (min1_32u5 (3, 5, 7 , 8) != 7)
+    abort ();
+
+  if (min1_32u6 (3, 5, 7 , 8) != 7)
+    abort ();
+
+  if (min1_32u7 (3, 5, 7 , 8) != 7)
+    abort ();
+
+  if (min1_32u8 (3, 5, 7 , 8) != 7)
+    abort ();
+
+  return 0;
+}
  

Comments

Richard Biener Sept. 26, 2022, 10:13 a.m. UTC | #1
On Fri, 23 Sep 2022, Tamar Christina wrote:

> Hi All,
> 
> GCC currently recognizes some of these for signed but not unsigned types.
> It also has trouble dealing with casts in between because these are handled
> by the fold machinery.
> 
> This moves the pattern detection to match.pd instead.

where's the other copy and is it possible to remove it with this patch?

> We fold e.g.:
> 
> uint32_t min1_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
>   uint32_t result;
>   uint32_t m = (a >= b) - 1;
>   result = (c & m) | (d & ~m);
>   return result;
> }
> 
> into a >= b ? c : d for all integral types.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> 
> Ok for master?
> 
> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
> 	* match.pd: New cond select pattern.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.dg/select_cond_1.c: New test.
> 	* gcc.dg/select_cond_2.c: New test.
> 	* gcc.dg/select_cond_3.c: New test.
> 
> --- inline copy of patch -- 
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 39da61bf117a6eb2924fc8a6473fb37ddadd60e9..7b8f50410acfd0afafc5606e972cfc4e125d3a5d 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3577,6 +3577,25 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>    (cond (le @0 integer_zerop@1) (negate@2 @0) integer_zerop@1)
>    (max @2 @1))
>  
> +/* (a & ((c `op` d) - 1)) | (b & ~((c `op` d) - 1)) ->  c `op` d ? a : b.  */
> +(for op (simple_comparison)
> + (simplify
> +  (bit_xor:c
> +   (convert2? @0)
> +   (bit_and:c
> +    (convert2? (bit_xor:c @1 @0))
> +    (convert3? (negate (convert? (op@4 @2 @3))))))

I have a hard time matching this up with the comment above.  It looks like

  (T2)@0 ^ ((T2)(@1 ^ @0) & (T3)(-(T)(@2 <=> @3))

I suppose we've canonicalized (a & x) | (b & ~x) somehow.

It also looks like we move coversions inside a negate but not outside
or into a bit_and?  Can we do a better job here to avoid the explosions
in the number of patterns?

> +  /* Alternative form, where some canonicalization were not done due to the
> +     arguments being signed.  */

these two comment lines belong ...

> +  (if (INTEGRAL_TYPE_P (type) && tree_zero_one_valued_p (@4))
> +   (convert:type (cond @4 @1 @0))))

... here?

> + (simplify
> +  (bit_ior:c
> +   (mult:c @0 (convert (convert2? (op@4 @2 @3))))
> +   (bit_and:c @1 (convert (plus:c integer_minus_onep (convert (op@4 @2 @3))))))

can you add a comment with what you match here as well?  You are using
(op@4 @2 @3) twice, the point of the @4 capture is that the second
occurance could be just @4.  I wonder how we arrived at the 
multiplication here?  That seems somewhat premature :/

Thanks,
Richard.

> +  (if (INTEGRAL_TYPE_P (type) && tree_zero_one_valued_p (@4))
> +   (cond @4 @0 @1))))
> +
>  /* Simplifications of shift and rotates.  */
>  
>  (for rotate (lrotate rrotate)
> diff --git a/gcc/testsuite/gcc.dg/select_cond_1.c b/gcc/testsuite/gcc.dg/select_cond_1.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..9eb9959baafe5fffeec24e4e3ae656f8fcfe943c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/select_cond_1.c
> @@ -0,0 +1,97 @@
> +/* { dg-do run } */
> +/* { dg-additional-options "-O2 -std=c99 -fdump-tree-optimized -save-temps" } */
> +
> +#include <stdint.h>
> +
> +__attribute__((noipa, noinline))
> +uint32_t min1_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
> +  uint32_t result;
> +  uint32_t m = (a >= b) - 1;
> +  result = (c & m) | (d & ~m);
> +  return result;
> +}
> +
> +__attribute__((noipa, noinline))
> +uint32_t max1_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
> +  uint32_t result;
> +  uint32_t m = (a <= b) - 1;
> +  result = (c & m) | (d & ~m);
> +  return result;
> +}
> +
> +__attribute__((noipa, noinline))
> +uint32_t min2_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
> +  uint32_t result;
> +  uint32_t m = (a > b) - 1;
> +  result = (c & m) | (d & ~m);
> +  return result;
> +}
> +
> +__attribute__((noipa, noinline))
> +uint32_t max2_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
> +  uint32_t result;
> +  uint32_t m = (a < b) - 1;
> +  result = (c & m) | (d & ~m);
> +  return result;
> +}
> +
> +__attribute__((noipa, noinline))
> +uint32_t min3_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
> +  uint32_t result;
> +  uint32_t m = (a == b) - 1;
> +  result = (c & m) | (d & ~m);
> +  return result;
> +}
> +
> +__attribute__((noipa, noinline))
> +uint32_t max3_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
> +  uint32_t result;
> +  uint32_t m = (a != b) - 1;
> +  result = (c & m) | (d & ~m);
> +  return result;
> +}
> +
> +/* { dg-final { scan-tree-dump-times {_[0-9]+ \? c_[0-9]+\(D\) : d_[0-9]+\(D\)} 6 "optimized" } } */
> +
> +extern void abort ();
> +
> +int main () {
> +
> +  if (min1_32u (3, 5, 7 , 8) != 7)
> +    abort ();
> +
> +  if (max1_32u (3, 5, 7 , 8) != 8)
> +    abort ();
> +
> +  if (min1_32u (5, 3, 7 , 8) != 8)
> +    abort ();
> +
> +  if (max1_32u (5, 3, 7 , 8) != 7)
> +    abort ();
> +
> +  if (min2_32u (3, 5, 7 , 8) != 7)
> +    abort ();
> +
> +  if (max2_32u (3, 5, 7 , 8) != 8)
> +    abort ();
> +
> +  if (min2_32u (5, 3, 7 , 8) != 8)
> +    abort ();
> +
> +  if (max2_32u (5, 3, 7 , 8) != 7)
> +    abort ();
> +
> +  if (min3_32u (3, 5, 7 , 8) != 7)
> +    abort ();
> +
> +  if (max3_32u (3, 5, 7 , 8) != 8)
> +    abort ();
> +
> +  if (min3_32u (5, 3, 7 , 8) != 7)
> +    abort ();
> +
> +  if (max3_32u (5, 3, 7 , 8) != 8)
> +    abort ();
> +
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.dg/select_cond_2.c b/gcc/testsuite/gcc.dg/select_cond_2.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..623b2272ee7b4b00130d5e9fb8c781dbc5d4189e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/select_cond_2.c
> @@ -0,0 +1,99 @@
> +/* { dg-do run } */
> +/* { dg-additional-options "-O2 -std=c99 -fdump-tree-optimized -save-temps" } */
> +
> +#include <stdint.h>
> +
> +__attribute__((noipa, noinline))
> +uint32_t min1_32u(uint32_t a, uint32_t b) {
> +  uint32_t result;
> +  uint32_t m = (a >= b) - 1;
> +  result = (a & m) | (b & ~m);
> +  return result;
> +}
> +
> +__attribute__((noipa, noinline))
> +uint32_t max1_32u(uint32_t a, uint32_t b) {
> +  uint32_t result;
> +  uint32_t m = (a <= b) - 1;
> +  result = (a & m) | (b & ~m);
> +  return result;
> +}
> +
> +__attribute__((noipa, noinline))
> +uint32_t min2_32u(uint32_t a, uint32_t b) {
> +  uint32_t result;
> +  uint32_t m = (a > b) - 1;
> +  result = (a & m) | (b & ~m);
> +  return result;
> +}
> +
> +__attribute__((noipa, noinline))
> +uint32_t max2_32u(uint32_t a, uint32_t b) {
> +  uint32_t result;
> +  uint32_t m = (a < b) - 1;
> +  result = (a & m) | (b & ~m);
> +  return result;
> +}
> +
> +__attribute__((noipa, noinline))
> +uint32_t min3_32u(uint32_t a, uint32_t b) {
> +  uint32_t result;
> +  uint32_t m = (a == b) - 1;
> +  result = (a & m) | (b & ~m);
> +  return result;
> +}
> +
> +__attribute__((noipa, noinline))
> +uint32_t max3_32u(uint32_t a, uint32_t b) {
> +  uint32_t result;
> +  uint32_t m = (a != b) - 1;
> +  result = (a & m) | (b & ~m);
> +  return result;
> +}
> +
> +/* { dg-final { scan-tree-dump-not {_[0-9]+ \? c_[0-9]+\(D\) : d_[0-9]+\(D\)} "optimized" } } */
> +/* { dg-final { scan-tree-dump-times {MIN_EXPR} 2 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times {MAX_EXPR} 2 "optimized" } } */
> +
> +extern void abort ();
> +
> +int main () {
> +
> +  if (min1_32u (7 , 8) != 7)
> +    abort ();
> +
> +  if (max1_32u (7 , 8) != 8)
> +    abort ();
> +
> +  if (min1_32u (7 , 8) != 7)
> +    abort ();
> +
> +  if (max1_32u (7, 8) != 8)
> +    abort ();
> +
> +  if (min2_32u (7 , 8) != 7)
> +    abort ();
> +
> +  if (max2_32u (7 , 8) != 8)
> +    abort ();
> +
> +  if (min2_32u (7 , 8) != 7)
> +    abort ();
> +
> +  if (max2_32u (7 , 8) != 8)
> +    abort ();
> +
> +  if (min3_32u (7 , 8) != 7)
> +    abort ();
> +
> +  if (max3_32u (7 , 8) != 8)
> +    abort ();
> +
> +  if (min3_32u (7 , 8) != 7)
> +    abort ();
> +
> +  if (max3_32u (7 , 8) != 8)
> +    abort ();
> +
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.dg/select_cond_3.c b/gcc/testsuite/gcc.dg/select_cond_3.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..754e73b121ce360564481b1fa7e1dd4fb27642a3
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/select_cond_3.c
> @@ -0,0 +1,101 @@
> +/* { dg-do run } */
> +/* { dg-additional-options "-O2 -std=c99 -fdump-tree-optimized -save-temps" } */
> +
> +#include <stdint.h>
> +
> +__attribute__((noipa, noinline))
> +int32_t min1_32u1(int32_t a, int32_t b, int32_t c, int32_t d) {
> +  uint32_t result;
> +  uint32_t m = (a >= b) - 1;
> +  result = (c & m) | (d & ~m);
> +  return result;
> +}
> +
> +__attribute__((noipa, noinline))
> +uint32_t min1_32u2(uint32_t a, uint32_t b, int32_t c, int32_t d) {
> +  uint32_t result;
> +  uint32_t m = (a >= b) - 1;
> +  result = (c & m) | (d & ~m);
> +  return result;
> +}
> +
> +__attribute__((noipa, noinline))
> +uint32_t min1_32u3(uint32_t a, uint32_t b, uint32_t c, int32_t d) {
> +  uint32_t result;
> +  uint32_t m = (a >= b) - 1;
> +  result = (c & m) | (d & ~m);
> +  return result;
> +}
> +
> +__attribute__((noipa, noinline))
> +uint32_t min1_32u4(uint32_t a, uint32_t b, int32_t c, uint32_t d) {
> +  uint32_t result;
> +  uint32_t m = (a >= b) - 1;
> +  result = (c & m) | (d & ~m);
> +  return result;
> +}
> +
> +__attribute__((noipa, noinline))
> +uint32_t min1_32u5(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
> +  uint32_t result;
> +  uint32_t m = (a >= b) - 1;
> +  result = (c & m) | (d & ~m);
> +  return result;
> +}
> +
> +__attribute__((noipa, noinline))
> +uint32_t min1_32u6(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
> +  uint32_t result;
> +  int32_t m = (a >= b) - 1;
> +  result = (c & m) | (d & ~m);
> +  return result;
> +}
> +
> +__attribute__((noipa, noinline))
> +uint32_t min1_32u7(uint8_t a, uint16_t b, uint32_t c, uint32_t d) {
> +  uint32_t result;
> +  int32_t m = (a >= b) - 1;
> +  result = (c & m) | (d & ~m);
> +  return result;
> +}
> +
> +__attribute__((noipa, noinline))
> +uint32_t min1_32u8(uint32_t a, uint64_t b, uint32_t c, uint32_t d) {
> +  uint32_t result;
> +  int32_t m = (a >= b) - 1;
> +  result = (c & m) | (d & ~m);
> +  return result;
> +}
> +
> +/* { dg-final { scan-tree-dump-times {_[0-9]+ \? c[^ ]+ : d[^ ]+;} 8 "optimized" } } */
> +
> +extern void abort ();
> +
> +int main () {
> +
> +  if (min1_32u1 (3, 5, 7 , 8) != 7)
> +    abort ();
> +
> +  if (min1_32u2 (3, 5, 7 , 8) != 7)
> +    abort ();
> +
> +  if (min1_32u3 (3, 5, 7 , 8) != 7)
> +    abort ();
> +
> +  if (min1_32u4 (3, 5, 7 , 8) != 7)
> +    abort ();
> +
> +  if (min1_32u5 (3, 5, 7 , 8) != 7)
> +    abort ();
> +
> +  if (min1_32u6 (3, 5, 7 , 8) != 7)
> +    abort ();
> +
> +  if (min1_32u7 (3, 5, 7 , 8) != 7)
> +    abort ();
> +
> +  if (min1_32u8 (3, 5, 7 , 8) != 7)
> +    abort ();
> +
> +  return 0;
> +}
> 
> 
> 
> 
>
  
Tamar Christina Oct. 31, 2022, 11:47 a.m. UTC | #2
> > This moves the pattern detection to match.pd instead.
> 
> where's the other copy and is it possible to remove it with this patch?
> 

It looks like it's spread over various passes. Starting with forwardprop.

> 
> > + (simplify
> > +  (bit_ior:c
> > +   (mult:c @0 (convert (convert2? (op@4 @2 @3))))
> > +   (bit_and:c @1 (convert (plus:c integer_minus_onep (convert (op@4
> > + @2 @3))))))
> 
> can you add a comment with what you match here as well?  You are using
> (op@4 @2 @3) twice, the point of the @4 capture is that the second
> occurance could be just @4.  I wonder how we arrived at the multiplication
> here?  That seems somewhat premature :/

That's being done by forwardprop.

To remove the various converts I broke down the operations into several
Smaller steps that on their own are sound optimizations.  When taken all
together it reduced the versions with casts down to the final optimized version.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

	* match.pd: New cond select pattern.
	(inverted_simple_comparison): New.

gcc/testsuite/ChangeLog:

	* gcc.dg/select_cond_1.c: New test.
	* gcc.dg/select_cond_2.c: New test.
	* gcc.dg/select_cond_3.c: New test.

--- inline copy of patch ---

diff --git a/gcc/match.pd b/gcc/match.pd
index 45f8941396ffd843f1bb23ab638c3b8c6d0d54b6..84d931c5d559ef1a605ea1121fbe655fcf870195 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -51,8 +51,9 @@ along with GCC; see the file COPYING3.  If not see
   unge ungt ne eq unlt unle ordered   unordered ge   gt   le   lt   ltgt uneq)
 (define_operator_list swapped_tcc_comparison
   gt   ge   eq ne le   lt   unordered ordered   ungt unge unlt unle uneq ltgt)
-(define_operator_list simple_comparison         lt   le   eq ne ge   gt)
-(define_operator_list swapped_simple_comparison gt   ge   eq ne le   lt)
+(define_operator_list simple_comparison          lt   le   eq ne ge   gt)
+(define_operator_list inverted_simple_comparison ge   gt   ne eq lt   le)
+(define_operator_list swapped_simple_comparison  gt   ge   eq ne le   lt)
 
 #include "cfn-operators.pd"
 
@@ -2081,7 +2082,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (if (!canonicalize_math_p ())
  (for cmp (gt lt ge le)
   (simplify
-   (mult (convert (cmp @0 @1)) @2)
+   (mult:c (convert (cmp @0 @1)) @2)
    (cond (cmp @0 @1) @2 { build_zero_cst (type); }))))
 
 /* For integral types with undefined overflow and C != 0 fold
@@ -3551,6 +3552,63 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (cond (le @0 integer_zerop@1) (negate@2 @0) integer_zerop@1)
   (max @2 @1))
 
+/* (a & ((c `cmp` d) - 1)) | (b & ~((c `cmp` d) - 1)) ->  c `cmp` d ? a : b.
+   by matching the canonicanized xor form.  This relies on: 0 ^ a == a and
+   (a ^ b) ^ a == b.  So (((a ^ b) & -((unsigned T)(c `cmp` d))) ^ b) into
+   c `cmp` d ? a : b.  */
+(for cmp (simple_comparison)
+     icmp (inverted_simple_comparison)
+ (simplify
+  (bit_xor:c
+   @0
+   (bit_and:c
+    (bit_xor:c @1 @0)
+    (negate (cmp@4 @2 @3))))
+  (if (INTEGRAL_TYPE_P (type) && tree_zero_one_valued_p (@4))
+   (convert:type (cond @4 @1 @0))))
+ /* ((unsigned T)(((signed T) (a `cmp` b)) + -1)) -> a `icmp` b ? -1 : 0.  */
+ (simplify
+  (convert (plus:c integer_minus_onep (convert2?@1 (cmp@2 @3 @4))))
+  (if (tree_zero_one_valued_p (@2)
+       && INTEGRAL_TYPE_P (type)
+       && TYPE_UNSIGNED (type)
+       && !TYPE_UNSIGNED (TREE_TYPE (@1))
+       && useless_type_conversion_p (unsigned_type_for (TREE_TYPE (@1)), type))
+   (cond (icmp @3 @4) { build_all_ones_cst (type); }
+		      { build_zero_cst (type); })))
+ /* ((-(unsigned T)(a `cmp` b)) & c) -> a `cmp` b ? c : 0.  */
+ (simplify
+  (bit_and:c
+   (convert2?@5 (negate@4 (convert (cmp@0 @1 @2)))) @3)
+  (if (tree_zero_one_valued_p (@0)
+       && INTEGRAL_TYPE_P (type)
+       && TYPE_UNSIGNED (TREE_TYPE (@4))
+       && useless_type_conversion_p (unsigned_type_for (TREE_TYPE (@5)),
+							TREE_TYPE (@4)))
+   (cond (cmp @1 @2) @3 { build_zero_cst (type); })))
+ /* (a `cmp` b) ? d : (a `icmp` b) ? c : e -> a `cmp` b ? d : c.  */
+ (simplify
+  (cond (cmp@0 @1 @2) @4
+   (cond (icmp @1 @2) @3 @5))
+  (cond @0 @4 @3))
+
+ /* (a `cmp` b) ? ((a `icmp` b) ? <dead> : c) : d -> a `cmp` b ? c : d.  */
+ (simplify
+  (cond (cmp@0 @1 @2)
+   (cond (icmp @1 @2) @3 @4) @5)
+  (cond @0 @4 @5)))
+
+(for op (bit_ior bit_and bit_xor)
+ /* (a `op` (b ? 0 : c) -> b ? a `op` 0 : a `op` c
+    and (a `op` (b ? c : 0) -> b ? a `op` c : a `op` 0
+    we only fold 0 because one operand is guaranteed to simplify.  */
+ (simplify
+  (op:c (cond @0 zerop@3 @1) @2)
+  (cond @0 (op @3 @2) (op @1 @2)))
+ (simplify
+  (op:c (cond @0 @1 zerop@3) @2)
+  (cond @0 (op @1 @2) (op @3 @2))))
+
 /* Simplifications of shift and rotates.  */
 
 (for rotate (lrotate rrotate)
diff --git a/gcc/testsuite/gcc.dg/select_cond_1.c b/gcc/testsuite/gcc.dg/select_cond_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..9eb9959baafe5fffeec24e4e3ae656f8fcfe943c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/select_cond_1.c
@@ -0,0 +1,97 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O2 -std=c99 -fdump-tree-optimized -save-temps" } */
+
+#include <stdint.h>
+
+__attribute__((noipa, noinline))
+uint32_t min1_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
+  uint32_t result;
+  uint32_t m = (a >= b) - 1;
+  result = (c & m) | (d & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t max1_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
+  uint32_t result;
+  uint32_t m = (a <= b) - 1;
+  result = (c & m) | (d & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t min2_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
+  uint32_t result;
+  uint32_t m = (a > b) - 1;
+  result = (c & m) | (d & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t max2_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
+  uint32_t result;
+  uint32_t m = (a < b) - 1;
+  result = (c & m) | (d & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t min3_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
+  uint32_t result;
+  uint32_t m = (a == b) - 1;
+  result = (c & m) | (d & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t max3_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
+  uint32_t result;
+  uint32_t m = (a != b) - 1;
+  result = (c & m) | (d & ~m);
+  return result;
+}
+
+/* { dg-final { scan-tree-dump-times {_[0-9]+ \? c_[0-9]+\(D\) : d_[0-9]+\(D\)} 6 "optimized" } } */
+
+extern void abort ();
+
+int main () {
+
+  if (min1_32u (3, 5, 7 , 8) != 7)
+    abort ();
+
+  if (max1_32u (3, 5, 7 , 8) != 8)
+    abort ();
+
+  if (min1_32u (5, 3, 7 , 8) != 8)
+    abort ();
+
+  if (max1_32u (5, 3, 7 , 8) != 7)
+    abort ();
+
+  if (min2_32u (3, 5, 7 , 8) != 7)
+    abort ();
+
+  if (max2_32u (3, 5, 7 , 8) != 8)
+    abort ();
+
+  if (min2_32u (5, 3, 7 , 8) != 8)
+    abort ();
+
+  if (max2_32u (5, 3, 7 , 8) != 7)
+    abort ();
+
+  if (min3_32u (3, 5, 7 , 8) != 7)
+    abort ();
+
+  if (max3_32u (3, 5, 7 , 8) != 8)
+    abort ();
+
+  if (min3_32u (5, 3, 7 , 8) != 7)
+    abort ();
+
+  if (max3_32u (5, 3, 7 , 8) != 8)
+    abort ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/select_cond_2.c b/gcc/testsuite/gcc.dg/select_cond_2.c
new file mode 100644
index 0000000000000000000000000000000000000000..623b2272ee7b4b00130d5e9fb8c781dbc5d4189e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/select_cond_2.c
@@ -0,0 +1,99 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O2 -std=c99 -fdump-tree-optimized -save-temps" } */
+
+#include <stdint.h>
+
+__attribute__((noipa, noinline))
+uint32_t min1_32u(uint32_t a, uint32_t b) {
+  uint32_t result;
+  uint32_t m = (a >= b) - 1;
+  result = (a & m) | (b & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t max1_32u(uint32_t a, uint32_t b) {
+  uint32_t result;
+  uint32_t m = (a <= b) - 1;
+  result = (a & m) | (b & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t min2_32u(uint32_t a, uint32_t b) {
+  uint32_t result;
+  uint32_t m = (a > b) - 1;
+  result = (a & m) | (b & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t max2_32u(uint32_t a, uint32_t b) {
+  uint32_t result;
+  uint32_t m = (a < b) - 1;
+  result = (a & m) | (b & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t min3_32u(uint32_t a, uint32_t b) {
+  uint32_t result;
+  uint32_t m = (a == b) - 1;
+  result = (a & m) | (b & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t max3_32u(uint32_t a, uint32_t b) {
+  uint32_t result;
+  uint32_t m = (a != b) - 1;
+  result = (a & m) | (b & ~m);
+  return result;
+}
+
+/* { dg-final { scan-tree-dump-not {_[0-9]+ \? c_[0-9]+\(D\) : d_[0-9]+\(D\)} "optimized" } } */
+/* { dg-final { scan-tree-dump-times {MIN_EXPR} 2 "optimized" } } */
+/* { dg-final { scan-tree-dump-times {MAX_EXPR} 2 "optimized" } } */
+
+extern void abort ();
+
+int main () {
+
+  if (min1_32u (7 , 8) != 7)
+    abort ();
+
+  if (max1_32u (7 , 8) != 8)
+    abort ();
+
+  if (min1_32u (7 , 8) != 7)
+    abort ();
+
+  if (max1_32u (7, 8) != 8)
+    abort ();
+
+  if (min2_32u (7 , 8) != 7)
+    abort ();
+
+  if (max2_32u (7 , 8) != 8)
+    abort ();
+
+  if (min2_32u (7 , 8) != 7)
+    abort ();
+
+  if (max2_32u (7 , 8) != 8)
+    abort ();
+
+  if (min3_32u (7 , 8) != 7)
+    abort ();
+
+  if (max3_32u (7 , 8) != 8)
+    abort ();
+
+  if (min3_32u (7 , 8) != 7)
+    abort ();
+
+  if (max3_32u (7 , 8) != 8)
+    abort ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/select_cond_3.c b/gcc/testsuite/gcc.dg/select_cond_3.c
new file mode 100644
index 0000000000000000000000000000000000000000..80087d4a0bd37066dbd44335aa3d1bf13fe3fa95
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/select_cond_3.c
@@ -0,0 +1,101 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O2 -std=c99 -fdump-tree-optimized -save-temps" } */
+
+#include <stdint.h>
+
+__attribute__((noipa, noinline))
+int32_t min1_32u1(int32_t a, int32_t b, int32_t c, int32_t d) {
+  uint32_t result;
+  uint32_t m = (a >= b) - 1;
+  result = (c & m) | (d & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t min1_32u2(uint32_t a, uint32_t b, int32_t c, int32_t d) {
+  uint32_t result;
+  uint32_t m = (a >= b) - 1;
+  result = (c & m) | (d & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t min1_32u3(uint32_t a, uint32_t b, uint32_t c, int32_t d) {
+  uint32_t result;
+  uint32_t m = (a >= b) - 1;
+  result = (c & m) | (d & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t min1_32u4(uint32_t a, uint32_t b, int32_t c, uint32_t d) {
+  uint32_t result;
+  uint32_t m = (a >= b) - 1;
+  result = (c & m) | (d & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t min1_32u5(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
+  uint32_t result;
+  uint32_t m = (a >= b) - 1;
+  result = (c & m) | (d & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t min1_32u6(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
+  uint32_t result;
+  int32_t m = (a >= b) - 1;
+  result = (c & m) | (d & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t min1_32u7(uint8_t a, uint16_t b, uint32_t c, uint32_t d) {
+  uint32_t result;
+  int32_t m = (a >= b) - 1;
+  result = (c & m) | (d & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t min1_32u8(uint32_t a, uint64_t b, uint32_t c, uint32_t d) {
+  uint32_t result;
+  int32_t m = (a >= b) - 1;
+  result = (c & m) | (d & ~m);
+  return result;
+}
+
+/* { dg-final { scan-tree-dump-times {_[0-9]+ \? [^ ]+ : [^ ]+;} 8 "optimized" } } */
+
+extern void abort ();
+
+int main () {
+
+  if (min1_32u1 (3, 5, 7 , 8) != 7)
+    abort ();
+
+  if (min1_32u2 (3, 5, 7 , 8) != 7)
+    abort ();
+
+  if (min1_32u3 (3, 5, 7 , 8) != 7)
+    abort ();
+
+  if (min1_32u4 (3, 5, 7 , 8) != 7)
+    abort ();
+
+  if (min1_32u5 (3, 5, 7 , 8) != 7)
+    abort ();
+
+  if (min1_32u6 (3, 5, 7 , 8) != 7)
+    abort ();
+
+  if (min1_32u7 (3, 5, 7 , 8) != 7)
+    abort ();
+
+  if (min1_32u8 (3, 5, 7 , 8) != 7)
+    abort ();
+
+  return 0;
+}
  
Richard Biener Nov. 7, 2022, 1:17 p.m. UTC | #3
On Mon, 31 Oct 2022, Tamar Christina wrote:

> > > This moves the pattern detection to match.pd instead.
> > 
> > where's the other copy and is it possible to remove it with this patch?
> > 
> 
> It looks like it's spread over various passes. Starting with forwardprop.

Can you be more specific?  If forwprop contains special pattern matching
for these please extend that instead.

> > 
> > > + (simplify
> > > +  (bit_ior:c
> > > +   (mult:c @0 (convert (convert2? (op@4 @2 @3))))
> > > +   (bit_and:c @1 (convert (plus:c integer_minus_onep (convert (op@4
> > > + @2 @3))))))
> > 
> > can you add a comment with what you match here as well?  You are using
> > (op@4 @2 @3) twice, the point of the @4 capture is that the second
> > occurance could be just @4.  I wonder how we arrived at the multiplication
> > here?  That seems somewhat premature :/
> 
> That's being done by forwardprop.

That's maybe the pass where the multiplication comes from but it's likely
some match.pd pattern that triggers instead?

> To remove the various converts I broke down the operations into several
> Smaller steps that on their own are sound optimizations.  When taken all
> together it reduced the versions with casts down to the final optimized version.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> 
> Ok for master?
> 
> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
> 	* match.pd: New cond select pattern.
> 	(inverted_simple_comparison): New.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.dg/select_cond_1.c: New test.
> 	* gcc.dg/select_cond_2.c: New test.
> 	* gcc.dg/select_cond_3.c: New test.
> 
> --- inline copy of patch ---
> 
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 45f8941396ffd843f1bb23ab638c3b8c6d0d54b6..84d931c5d559ef1a605ea1121fbe655fcf870195 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -51,8 +51,9 @@ along with GCC; see the file COPYING3.  If not see
>    unge ungt ne eq unlt unle ordered   unordered ge   gt   le   lt   ltgt uneq)
>  (define_operator_list swapped_tcc_comparison
>    gt   ge   eq ne le   lt   unordered ordered   ungt unge unlt unle uneq ltgt)
> -(define_operator_list simple_comparison         lt   le   eq ne ge   gt)
> -(define_operator_list swapped_simple_comparison gt   ge   eq ne le   lt)
> +(define_operator_list simple_comparison          lt   le   eq ne ge   gt)
> +(define_operator_list inverted_simple_comparison ge   gt   ne eq lt   le)
> +(define_operator_list swapped_simple_comparison  gt   ge   eq ne le   lt)
>  
>  #include "cfn-operators.pd"
>  
> @@ -2081,7 +2082,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>  (if (!canonicalize_math_p ())
>   (for cmp (gt lt ge le)
>    (simplify
> -   (mult (convert (cmp @0 @1)) @2)
> +   (mult:c (convert (cmp @0 @1)) @2)
>     (cond (cmp @0 @1) @2 { build_zero_cst (type); }))))

That looks OK.

>  /* For integral types with undefined overflow and C != 0 fold
> @@ -3551,6 +3552,63 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>    (cond (le @0 integer_zerop@1) (negate@2 @0) integer_zerop@1)
>    (max @2 @1))
>  
> +/* (a & ((c `cmp` d) - 1)) | (b & ~((c `cmp` d) - 1)) ->  c `cmp` d ? a : b.
> +   by matching the canonicanized xor form.  This relies on: 0 ^ a == a and
> +   (a ^ b) ^ a == b.  So (((a ^ b) & -((unsigned T)(c `cmp` d))) ^ b) into
> +   c `cmp` d ? a : b.  */
> +(for cmp (simple_comparison)
> +     icmp (inverted_simple_comparison)
> + (simplify
> +  (bit_xor:c
> +   @0
> +   (bit_and:c
> +    (bit_xor:c @1 @0)
> +    (negate (cmp@4 @2 @3))))
> +  (if (INTEGRAL_TYPE_P (type) && tree_zero_one_valued_p (@4))
> +   (convert:type (cond @4 @1 @0))))
> + /* ((unsigned T)(((signed T) (a `cmp` b)) + -1)) -> a `icmp` b ? -1 : 0.  */
> + (simplify
> +  (convert (plus:c integer_minus_onep (convert2?@1 (cmp@2 @3 @4))))
> +  (if (tree_zero_one_valued_p (@2)
> +       && INTEGRAL_TYPE_P (type)
> +       && TYPE_UNSIGNED (type)
> +       && !TYPE_UNSIGNED (TREE_TYPE (@1))
> +       && useless_type_conversion_p (unsigned_type_for (TREE_TYPE (@1)), type))
> +   (cond (icmp @3 @4) { build_all_ones_cst (type); }
> +		      { build_zero_cst (type); })))
> + /* ((-(unsigned T)(a `cmp` b)) & c) -> a `cmp` b ? c : 0.  */
> + (simplify
> +  (bit_and:c
> +   (convert2?@5 (negate@4 (convert (cmp@0 @1 @2)))) @3)
> +  (if (tree_zero_one_valued_p (@0)
> +       && INTEGRAL_TYPE_P (type)
> +       && TYPE_UNSIGNED (TREE_TYPE (@4))
> +       && useless_type_conversion_p (unsigned_type_for (TREE_TYPE (@5)),
> +							TREE_TYPE (@4)))
> +   (cond (cmp @1 @2) @3 { build_zero_cst (type); })))

Can you split out the above three patterns?

> + /* (a `cmp` b) ? d : (a `icmp` b) ? c : e -> a `cmp` b ? d : c.  */
> + (simplify
> +  (cond (cmp@0 @1 @2) @4
> +   (cond (icmp @1 @2) @3 @5))
> +  (cond @0 @4 @3))
> +
> + /* (a `cmp` b) ? ((a `icmp` b) ? <dead> : c) : d -> a `cmp` b ? c : d.  */
> + (simplify
> +  (cond (cmp@0 @1 @2)
> +   (cond (icmp @1 @2) @3 @4) @5)
> +  (cond @0 @4 @5)))

Can you put the two above next to the set of related

(for cnd (cond vec_cond)
 /* A ? B : (A ? X : C) -> A ? B : C.  */
 (simplify
  (cnd @0 (cnd @0 @1 @2) @3)
  (cnd @0 @1 @3))
 (simplify
  (cnd @0 @1 (cnd @0 @2 @3))
  (cnd @0 @1 @3))
...

please?  I wonder if these aren't already handled by

 /* A ? B : (!A ? C : X) -> A ? B : C.  */
 /* ???  This matches embedded conditions open-coded because genmatch
    would generate matching code for conditions in separate stmts only.
    The following is still important to merge then and else arm cases
    from if-conversion.  */
 (simplify
  (cnd @0 @1 (cnd @2 @3 @4))
  (if (inverse_conditions_p (@0, @2))
   (cnd @0 @1 @3)))
 (simplify
  (cnd @0 (cnd @1 @2 @3) @4)
  (if (inverse_conditions_p (@0, @1))
   (cnd @0 @3 @4)))

more specifically if not inverse_conditions_p should be enhanced to
follow the SSA def chain here?

> +(for op (bit_ior bit_and bit_xor)
> + /* (a `op` (b ? 0 : c) -> b ? a `op` 0 : a `op` c
> +    and (a `op` (b ? c : 0) -> b ? a `op` c : a `op` 0
> +    we only fold 0 because one operand is guaranteed to simplify.  */
> + (simplify
> +  (op:c (cond @0 zerop@3 @1) @2)
> +  (cond @0 (op @3 @2) (op @1 @2)))
> + (simplify
> +  (op:c (cond @0 @1 zerop@3) @2)
> +  (cond @0 (op @1 @2) (op @3 @2))))
> +

Not sure if I said it before but we have 
fold_binary_op_with_conditional_arg doing this on GENERIC so the above
should be #if GIMPLE and possibly next to

#if GIMPLE
/* From fold_binary_op_with_conditional_arg handle the case of
   rewriting (a ? b : c) > d to a ? (b > d) : (c > d) when the
   compares simplify.  */
(for cmp (simple_comparison)
 (simplify
  (cmp:c (cond @0 @1 @2) @3)
  /* Do not move possibly trapping operations into the conditional as this
     pessimizes code and causes gimplification issues when applied late.  
*/
  (if (!FLOAT_TYPE_P (TREE_TYPE (@3))
       || !operation_could_trap_p (cmp, true, false, @3))
   (cond @0 (cmp! @1 @3) (cmp! @2 @3)))))
#endif

covering fold_binary_op_with_conditional_arg in a more general way
would be nice but it's hard (or rather expensive) to express in
match.pd terms since you have to enumerate each and every supported
outer operation.

Thanks,
Richard.

>  /* Simplifications of shift and rotates.  */
>  
>  (for rotate (lrotate rrotate)
> diff --git a/gcc/testsuite/gcc.dg/select_cond_1.c b/gcc/testsuite/gcc.dg/select_cond_1.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..9eb9959baafe5fffeec24e4e3ae656f8fcfe943c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/select_cond_1.c
> @@ -0,0 +1,97 @@
> +/* { dg-do run } */
> +/* { dg-additional-options "-O2 -std=c99 -fdump-tree-optimized -save-temps" } */
> +
> +#include <stdint.h>
> +
> +__attribute__((noipa, noinline))
> +uint32_t min1_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
> +  uint32_t result;
> +  uint32_t m = (a >= b) - 1;
> +  result = (c & m) | (d & ~m);
> +  return result;
> +}
> +
> +__attribute__((noipa, noinline))
> +uint32_t max1_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
> +  uint32_t result;
> +  uint32_t m = (a <= b) - 1;
> +  result = (c & m) | (d & ~m);
> +  return result;
> +}
> +
> +__attribute__((noipa, noinline))
> +uint32_t min2_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
> +  uint32_t result;
> +  uint32_t m = (a > b) - 1;
> +  result = (c & m) | (d & ~m);
> +  return result;
> +}
> +
> +__attribute__((noipa, noinline))
> +uint32_t max2_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
> +  uint32_t result;
> +  uint32_t m = (a < b) - 1;
> +  result = (c & m) | (d & ~m);
> +  return result;
> +}
> +
> +__attribute__((noipa, noinline))
> +uint32_t min3_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
> +  uint32_t result;
> +  uint32_t m = (a == b) - 1;
> +  result = (c & m) | (d & ~m);
> +  return result;
> +}
> +
> +__attribute__((noipa, noinline))
> +uint32_t max3_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
> +  uint32_t result;
> +  uint32_t m = (a != b) - 1;
> +  result = (c & m) | (d & ~m);
> +  return result;
> +}
> +
> +/* { dg-final { scan-tree-dump-times {_[0-9]+ \? c_[0-9]+\(D\) : d_[0-9]+\(D\)} 6 "optimized" } } */
> +
> +extern void abort ();
> +
> +int main () {
> +
> +  if (min1_32u (3, 5, 7 , 8) != 7)
> +    abort ();
> +
> +  if (max1_32u (3, 5, 7 , 8) != 8)
> +    abort ();
> +
> +  if (min1_32u (5, 3, 7 , 8) != 8)
> +    abort ();
> +
> +  if (max1_32u (5, 3, 7 , 8) != 7)
> +    abort ();
> +
> +  if (min2_32u (3, 5, 7 , 8) != 7)
> +    abort ();
> +
> +  if (max2_32u (3, 5, 7 , 8) != 8)
> +    abort ();
> +
> +  if (min2_32u (5, 3, 7 , 8) != 8)
> +    abort ();
> +
> +  if (max2_32u (5, 3, 7 , 8) != 7)
> +    abort ();
> +
> +  if (min3_32u (3, 5, 7 , 8) != 7)
> +    abort ();
> +
> +  if (max3_32u (3, 5, 7 , 8) != 8)
> +    abort ();
> +
> +  if (min3_32u (5, 3, 7 , 8) != 7)
> +    abort ();
> +
> +  if (max3_32u (5, 3, 7 , 8) != 8)
> +    abort ();
> +
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.dg/select_cond_2.c b/gcc/testsuite/gcc.dg/select_cond_2.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..623b2272ee7b4b00130d5e9fb8c781dbc5d4189e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/select_cond_2.c
> @@ -0,0 +1,99 @@
> +/* { dg-do run } */
> +/* { dg-additional-options "-O2 -std=c99 -fdump-tree-optimized -save-temps" } */
> +
> +#include <stdint.h>
> +
> +__attribute__((noipa, noinline))
> +uint32_t min1_32u(uint32_t a, uint32_t b) {
> +  uint32_t result;
> +  uint32_t m = (a >= b) - 1;
> +  result = (a & m) | (b & ~m);
> +  return result;
> +}
> +
> +__attribute__((noipa, noinline))
> +uint32_t max1_32u(uint32_t a, uint32_t b) {
> +  uint32_t result;
> +  uint32_t m = (a <= b) - 1;
> +  result = (a & m) | (b & ~m);
> +  return result;
> +}
> +
> +__attribute__((noipa, noinline))
> +uint32_t min2_32u(uint32_t a, uint32_t b) {
> +  uint32_t result;
> +  uint32_t m = (a > b) - 1;
> +  result = (a & m) | (b & ~m);
> +  return result;
> +}
> +
> +__attribute__((noipa, noinline))
> +uint32_t max2_32u(uint32_t a, uint32_t b) {
> +  uint32_t result;
> +  uint32_t m = (a < b) - 1;
> +  result = (a & m) | (b & ~m);
> +  return result;
> +}
> +
> +__attribute__((noipa, noinline))
> +uint32_t min3_32u(uint32_t a, uint32_t b) {
> +  uint32_t result;
> +  uint32_t m = (a == b) - 1;
> +  result = (a & m) | (b & ~m);
> +  return result;
> +}
> +
> +__attribute__((noipa, noinline))
> +uint32_t max3_32u(uint32_t a, uint32_t b) {
> +  uint32_t result;
> +  uint32_t m = (a != b) - 1;
> +  result = (a & m) | (b & ~m);
> +  return result;
> +}
> +
> +/* { dg-final { scan-tree-dump-not {_[0-9]+ \? c_[0-9]+\(D\) : d_[0-9]+\(D\)} "optimized" } } */
> +/* { dg-final { scan-tree-dump-times {MIN_EXPR} 2 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times {MAX_EXPR} 2 "optimized" } } */
> +
> +extern void abort ();
> +
> +int main () {
> +
> +  if (min1_32u (7 , 8) != 7)
> +    abort ();
> +
> +  if (max1_32u (7 , 8) != 8)
> +    abort ();
> +
> +  if (min1_32u (7 , 8) != 7)
> +    abort ();
> +
> +  if (max1_32u (7, 8) != 8)
> +    abort ();
> +
> +  if (min2_32u (7 , 8) != 7)
> +    abort ();
> +
> +  if (max2_32u (7 , 8) != 8)
> +    abort ();
> +
> +  if (min2_32u (7 , 8) != 7)
> +    abort ();
> +
> +  if (max2_32u (7 , 8) != 8)
> +    abort ();
> +
> +  if (min3_32u (7 , 8) != 7)
> +    abort ();
> +
> +  if (max3_32u (7 , 8) != 8)
> +    abort ();
> +
> +  if (min3_32u (7 , 8) != 7)
> +    abort ();
> +
> +  if (max3_32u (7 , 8) != 8)
> +    abort ();
> +
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.dg/select_cond_3.c b/gcc/testsuite/gcc.dg/select_cond_3.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..80087d4a0bd37066dbd44335aa3d1bf13fe3fa95
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/select_cond_3.c
> @@ -0,0 +1,101 @@
> +/* { dg-do run } */
> +/* { dg-additional-options "-O2 -std=c99 -fdump-tree-optimized -save-temps" } */
> +
> +#include <stdint.h>
> +
> +__attribute__((noipa, noinline))
> +int32_t min1_32u1(int32_t a, int32_t b, int32_t c, int32_t d) {
> +  uint32_t result;
> +  uint32_t m = (a >= b) - 1;
> +  result = (c & m) | (d & ~m);
> +  return result;
> +}
> +
> +__attribute__((noipa, noinline))
> +uint32_t min1_32u2(uint32_t a, uint32_t b, int32_t c, int32_t d) {
> +  uint32_t result;
> +  uint32_t m = (a >= b) - 1;
> +  result = (c & m) | (d & ~m);
> +  return result;
> +}
> +
> +__attribute__((noipa, noinline))
> +uint32_t min1_32u3(uint32_t a, uint32_t b, uint32_t c, int32_t d) {
> +  uint32_t result;
> +  uint32_t m = (a >= b) - 1;
> +  result = (c & m) | (d & ~m);
> +  return result;
> +}
> +
> +__attribute__((noipa, noinline))
> +uint32_t min1_32u4(uint32_t a, uint32_t b, int32_t c, uint32_t d) {
> +  uint32_t result;
> +  uint32_t m = (a >= b) - 1;
> +  result = (c & m) | (d & ~m);
> +  return result;
> +}
> +
> +__attribute__((noipa, noinline))
> +uint32_t min1_32u5(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
> +  uint32_t result;
> +  uint32_t m = (a >= b) - 1;
> +  result = (c & m) | (d & ~m);
> +  return result;
> +}
> +
> +__attribute__((noipa, noinline))
> +uint32_t min1_32u6(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
> +  uint32_t result;
> +  int32_t m = (a >= b) - 1;
> +  result = (c & m) | (d & ~m);
> +  return result;
> +}
> +
> +__attribute__((noipa, noinline))
> +uint32_t min1_32u7(uint8_t a, uint16_t b, uint32_t c, uint32_t d) {
> +  uint32_t result;
> +  int32_t m = (a >= b) - 1;
> +  result = (c & m) | (d & ~m);
> +  return result;
> +}
> +
> +__attribute__((noipa, noinline))
> +uint32_t min1_32u8(uint32_t a, uint64_t b, uint32_t c, uint32_t d) {
> +  uint32_t result;
> +  int32_t m = (a >= b) - 1;
> +  result = (c & m) | (d & ~m);
> +  return result;
> +}
> +
> +/* { dg-final { scan-tree-dump-times {_[0-9]+ \? [^ ]+ : [^ ]+;} 8 "optimized" } } */
> +
> +extern void abort ();
> +
> +int main () {
> +
> +  if (min1_32u1 (3, 5, 7 , 8) != 7)
> +    abort ();
> +
> +  if (min1_32u2 (3, 5, 7 , 8) != 7)
> +    abort ();
> +
> +  if (min1_32u3 (3, 5, 7 , 8) != 7)
> +    abort ();
> +
> +  if (min1_32u4 (3, 5, 7 , 8) != 7)
> +    abort ();
> +
> +  if (min1_32u5 (3, 5, 7 , 8) != 7)
> +    abort ();
> +
> +  if (min1_32u6 (3, 5, 7 , 8) != 7)
> +    abort ();
> +
> +  if (min1_32u7 (3, 5, 7 , 8) != 7)
> +    abort ();
> +
> +  if (min1_32u8 (3, 5, 7 , 8) != 7)
> +    abort ();
> +
> +  return 0;
> +}
>
  

Patch

--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3577,6 +3577,25 @@  DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (cond (le @0 integer_zerop@1) (negate@2 @0) integer_zerop@1)
   (max @2 @1))
 
+/* (a & ((c `op` d) - 1)) | (b & ~((c `op` d) - 1)) ->  c `op` d ? a : b.  */
+(for op (simple_comparison)
+ (simplify
+  (bit_xor:c
+   (convert2? @0)
+   (bit_and:c
+    (convert2? (bit_xor:c @1 @0))
+    (convert3? (negate (convert? (op@4 @2 @3))))))
+  /* Alternative form, where some canonicalization were not done due to the
+     arguments being signed.  */
+  (if (INTEGRAL_TYPE_P (type) && tree_zero_one_valued_p (@4))
+   (convert:type (cond @4 @1 @0))))
+ (simplify
+  (bit_ior:c
+   (mult:c @0 (convert (convert2? (op@4 @2 @3))))
+   (bit_and:c @1 (convert (plus:c integer_minus_onep (convert (op@4 @2 @3))))))
+  (if (INTEGRAL_TYPE_P (type) && tree_zero_one_valued_p (@4))
+   (cond @4 @0 @1))))
+
 /* Simplifications of shift and rotates.  */
 
 (for rotate (lrotate rrotate)
diff --git a/gcc/testsuite/gcc.dg/select_cond_1.c b/gcc/testsuite/gcc.dg/select_cond_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..9eb9959baafe5fffeec24e4e3ae656f8fcfe943c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/select_cond_1.c
@@ -0,0 +1,97 @@ 
+/* { dg-do run } */
+/* { dg-additional-options "-O2 -std=c99 -fdump-tree-optimized -save-temps" } */
+
+#include <stdint.h>
+
+__attribute__((noipa, noinline))
+uint32_t min1_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
+  uint32_t result;
+  uint32_t m = (a >= b) - 1;
+  result = (c & m) | (d & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t max1_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
+  uint32_t result;
+  uint32_t m = (a <= b) - 1;
+  result = (c & m) | (d & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t min2_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
+  uint32_t result;
+  uint32_t m = (a > b) - 1;
+  result = (c & m) | (d & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t max2_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
+  uint32_t result;
+  uint32_t m = (a < b) - 1;
+  result = (c & m) | (d & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t min3_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
+  uint32_t result;
+  uint32_t m = (a == b) - 1;
+  result = (c & m) | (d & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t max3_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
+  uint32_t result;
+  uint32_t m = (a != b) - 1;
+  result = (c & m) | (d & ~m);
+  return result;
+}
+
+/* { dg-final { scan-tree-dump-times {_[0-9]+ \? c_[0-9]+\(D\) : d_[0-9]+\(D\)} 6 "optimized" } } */
+
+extern void abort ();
+
+int main () {
+
+  if (min1_32u (3, 5, 7 , 8) != 7)
+    abort ();
+
+  if (max1_32u (3, 5, 7 , 8) != 8)
+    abort ();
+
+  if (min1_32u (5, 3, 7 , 8) != 8)
+    abort ();
+
+  if (max1_32u (5, 3, 7 , 8) != 7)
+    abort ();
+
+  if (min2_32u (3, 5, 7 , 8) != 7)
+    abort ();
+
+  if (max2_32u (3, 5, 7 , 8) != 8)
+    abort ();
+
+  if (min2_32u (5, 3, 7 , 8) != 8)
+    abort ();
+
+  if (max2_32u (5, 3, 7 , 8) != 7)
+    abort ();
+
+  if (min3_32u (3, 5, 7 , 8) != 7)
+    abort ();
+
+  if (max3_32u (3, 5, 7 , 8) != 8)
+    abort ();
+
+  if (min3_32u (5, 3, 7 , 8) != 7)
+    abort ();
+
+  if (max3_32u (5, 3, 7 , 8) != 8)
+    abort ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/select_cond_2.c b/gcc/testsuite/gcc.dg/select_cond_2.c
new file mode 100644
index 0000000000000000000000000000000000000000..623b2272ee7b4b00130d5e9fb8c781dbc5d4189e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/select_cond_2.c
@@ -0,0 +1,99 @@ 
+/* { dg-do run } */
+/* { dg-additional-options "-O2 -std=c99 -fdump-tree-optimized -save-temps" } */
+
+#include <stdint.h>
+
+__attribute__((noipa, noinline))
+uint32_t min1_32u(uint32_t a, uint32_t b) {
+  uint32_t result;
+  uint32_t m = (a >= b) - 1;
+  result = (a & m) | (b & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t max1_32u(uint32_t a, uint32_t b) {
+  uint32_t result;
+  uint32_t m = (a <= b) - 1;
+  result = (a & m) | (b & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t min2_32u(uint32_t a, uint32_t b) {
+  uint32_t result;
+  uint32_t m = (a > b) - 1;
+  result = (a & m) | (b & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t max2_32u(uint32_t a, uint32_t b) {
+  uint32_t result;
+  uint32_t m = (a < b) - 1;
+  result = (a & m) | (b & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t min3_32u(uint32_t a, uint32_t b) {
+  uint32_t result;
+  uint32_t m = (a == b) - 1;
+  result = (a & m) | (b & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t max3_32u(uint32_t a, uint32_t b) {
+  uint32_t result;
+  uint32_t m = (a != b) - 1;
+  result = (a & m) | (b & ~m);
+  return result;
+}
+
+/* { dg-final { scan-tree-dump-not {_[0-9]+ \? c_[0-9]+\(D\) : d_[0-9]+\(D\)} "optimized" } } */
+/* { dg-final { scan-tree-dump-times {MIN_EXPR} 2 "optimized" } } */
+/* { dg-final { scan-tree-dump-times {MAX_EXPR} 2 "optimized" } } */
+
+extern void abort ();
+
+int main () {
+
+  if (min1_32u (7 , 8) != 7)
+    abort ();
+
+  if (max1_32u (7 , 8) != 8)
+    abort ();
+
+  if (min1_32u (7 , 8) != 7)
+    abort ();
+
+  if (max1_32u (7, 8) != 8)
+    abort ();
+
+  if (min2_32u (7 , 8) != 7)
+    abort ();
+
+  if (max2_32u (7 , 8) != 8)
+    abort ();
+
+  if (min2_32u (7 , 8) != 7)
+    abort ();
+
+  if (max2_32u (7 , 8) != 8)
+    abort ();
+
+  if (min3_32u (7 , 8) != 7)
+    abort ();
+
+  if (max3_32u (7 , 8) != 8)
+    abort ();
+
+  if (min3_32u (7 , 8) != 7)
+    abort ();
+
+  if (max3_32u (7 , 8) != 8)
+    abort ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/select_cond_3.c b/gcc/testsuite/gcc.dg/select_cond_3.c
new file mode 100644
index 0000000000000000000000000000000000000000..754e73b121ce360564481b1fa7e1dd4fb27642a3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/select_cond_3.c
@@ -0,0 +1,101 @@ 
+/* { dg-do run } */
+/* { dg-additional-options "-O2 -std=c99 -fdump-tree-optimized -save-temps" } */
+
+#include <stdint.h>
+
+__attribute__((noipa, noinline))
+int32_t min1_32u1(int32_t a, int32_t b, int32_t c, int32_t d) {
+  uint32_t result;
+  uint32_t m = (a >= b) - 1;
+  result = (c & m) | (d & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t min1_32u2(uint32_t a, uint32_t b, int32_t c, int32_t d) {
+  uint32_t result;
+  uint32_t m = (a >= b) - 1;
+  result = (c & m) | (d & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t min1_32u3(uint32_t a, uint32_t b, uint32_t c, int32_t d) {
+  uint32_t result;
+  uint32_t m = (a >= b) - 1;
+  result = (c & m) | (d & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t min1_32u4(uint32_t a, uint32_t b, int32_t c, uint32_t d) {
+  uint32_t result;
+  uint32_t m = (a >= b) - 1;
+  result = (c & m) | (d & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t min1_32u5(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
+  uint32_t result;
+  uint32_t m = (a >= b) - 1;
+  result = (c & m) | (d & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t min1_32u6(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
+  uint32_t result;
+  int32_t m = (a >= b) - 1;
+  result = (c & m) | (d & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t min1_32u7(uint8_t a, uint16_t b, uint32_t c, uint32_t d) {
+  uint32_t result;
+  int32_t m = (a >= b) - 1;
+  result = (c & m) | (d & ~m);
+  return result;
+}
+
+__attribute__((noipa, noinline))
+uint32_t min1_32u8(uint32_t a, uint64_t b, uint32_t c, uint32_t d) {
+  uint32_t result;
+  int32_t m = (a >= b) - 1;
+  result = (c & m) | (d & ~m);
+  return result;
+}
+
+/* { dg-final { scan-tree-dump-times {_[0-9]+ \? c[^ ]+ : d[^ ]+;} 8 "optimized" } } */
+
+extern void abort ();
+
+int main () {
+
+  if (min1_32u1 (3, 5, 7 , 8) != 7)
+    abort ();
+
+  if (min1_32u2 (3, 5, 7 , 8) != 7)
+    abort ();
+
+  if (min1_32u3 (3, 5, 7 , 8) != 7)
+    abort ();
+
+  if (min1_32u4 (3, 5, 7 , 8) != 7)
+    abort ();
+
+  if (min1_32u5 (3, 5, 7 , 8) != 7)
+    abort ();
+
+  if (min1_32u6 (3, 5, 7 , 8) != 7)
+    abort ();
+
+  if (min1_32u7 (3, 5, 7 , 8) != 7)
+    abort ();
+
+  if (min1_32u8 (3, 5, 7 , 8) != 7)
+    abort ();
+
+  return 0;
+}