[v2,3/4] LoongArch: Add fscaleb.{s, d} instructions as ldexp{sf, df}3
Checks
Commit Message
This allows optimizing __builtin_ldexp{,f} and __builtin_scalbn{,f} with
-fno-math-errno.
IMODE is added because we can't hard code SI for operand 2: fscaleb.d
instruction always take the high half of both source registers into
account. See my_ldexp_long in the test case.
gcc/ChangeLog:
* config/loongarch/loongarch.md (UNSPEC_FSCALEB): New unspec.
(type): Add fscaleb.
(IMODE): New mode attr.
(ldexp<mode>3): New instruction template.
gcc/testsuite/ChangeLog:
* gcc.target/loongarch/fscaleb.c: New test.
---
gcc/config/loongarch/loongarch.md | 26 ++++++++++-
gcc/testsuite/gcc.target/loongarch/fscaleb.c | 48 ++++++++++++++++++++
2 files changed, 72 insertions(+), 2 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/loongarch/fscaleb.c
Comments
在 2022/11/9 下午9:53, Xi Ruoyao 写道:
> This allows optimizing __builtin_ldexp{,f} and __builtin_scalbn{,f} with
> -fno-math-errno.
>
> IMODE is added because we can't hard code SI for operand 2: fscaleb.d
> instruction always take the high half of both source registers into
> account. See my_ldexp_long in the test case.
>
> gcc/ChangeLog:
>
> * config/loongarch/loongarch.md (UNSPEC_FSCALEB): New unspec.
> (type): Add fscaleb.
> (IMODE): New mode attr.
> (ldexp<mode>3): New instruction template.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/loongarch/fscaleb.c: New test.
> ---
> gcc/config/loongarch/loongarch.md | 26 ++++++++++-
> gcc/testsuite/gcc.target/loongarch/fscaleb.c | 48 ++++++++++++++++++++
> 2 files changed, 72 insertions(+), 2 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/loongarch/fscaleb.c
>
> diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
> index eb127c346a3..c141c9adde2 100644
> --- a/gcc/config/loongarch/loongarch.md
> +++ b/gcc/config/loongarch/loongarch.md
> @@ -41,6 +41,7 @@ (define_c_enum "unspec" [
> UNSPEC_FTINT
> UNSPEC_FTINTRM
> UNSPEC_FTINTRP
> + UNSPEC_FSCALEB
>
> ;; Override return address for exception handling.
> UNSPEC_EH_RETURN
> @@ -220,6 +221,7 @@ (define_attr "qword_mode" "no,yes"
> ;; fcmp floating point compare
> ;; fcopysign floating point copysign
> ;; fcvt floating point convert
> +;; fscaleb floating point scale
> ;; fsqrt floating point square root
> ;; frsqrt floating point reciprocal square root
> ;; multi multiword sequence (or user asm statements)
> @@ -231,8 +233,8 @@ (define_attr "type"
> "unknown,branch,jump,call,load,fpload,fpidxload,store,fpstore,fpidxstore,
> prefetch,prefetchx,condmove,mgtf,mftg,const,arith,logical,
> shift,slt,signext,clz,trap,imul,idiv,move,
> - fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,fneg,fcmp,fcopysign,fcvt,fsqrt,
> - frsqrt,accext,accmod,multi,atomic,syncloop,nop,ghost"
> + fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,fneg,fcmp,fcopysign,fcvt,fscaleb,
> + fsqrt,frsqrt,accext,accmod,multi,atomic,syncloop,nop,ghost"
> (cond [(eq_attr "jirl" "!unset") (const_string "call")
> (eq_attr "got" "load") (const_string "load")
>
> @@ -418,6 +420,10 @@ (define_mode_attr UNITMODE [(SF "SF") (DF "DF")])
> ;; the controlling mode.
> (define_mode_attr HALFMODE [(DF "SI") (DI "SI") (TF "DI")])
>
> +;; This attribute gives the integer mode that has the same size of a
> +;; floating-point mode.
> +(define_mode_attr IMODE [(SF "SI") (DF "DI")])
> +
> ;; This code iterator allows signed and unsigned widening multiplications
> ;; to use the same template.
> (define_code_iterator any_extend [sign_extend zero_extend])
> @@ -1014,7 +1020,23 @@ (define_insn "copysign<mode>3"
> "fcopysign.<fmt>\t%0,%1,%2"
> [(set_attr "type" "fcopysign")
> (set_attr "mode" "<UNITMODE>")])
> +
> +;;
> +;; ....................
> +;;
> +;; FLOATING POINT SCALE
> +;;
> +;; ....................
>
> +(define_insn "ldexp<mode>3"
> + [(set (match_operand:ANYF 0 "register_operand" "=f")
> + (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")
> + (match_operand:<IMODE> 2 "register_operand" "f")]
> + UNSPEC_FSCALEB))]
> + "TARGET_HARD_FLOAT"
> + "fscaleb.<fmt>\t%0,%1,%2"
> + [(set_attr "type" "fscaleb")
> + (set_attr "mode" "<UNITMODE>")])
>
> ;;
> ;; ...................
> diff --git a/gcc/testsuite/gcc.target/loongarch/fscaleb.c b/gcc/testsuite/gcc.target/loongarch/fscaleb.c
> new file mode 100644
> index 00000000000..f18470fbb8f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/loongarch/fscaleb.c
> @@ -0,0 +1,48 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mabi=lp64d -mdouble-float -fno-math-errno" } */
> +/* { dg-final { scan-assembler-times "fscaleb\\.s" 3 } } */
> +/* { dg-final { scan-assembler-times "fscaleb\\.d" 4 } } */
> +/* { dg-final { scan-assembler-times "slli\\.w" 1 } } */
> +
> +double
> +my_scalbln (double a, long b)
> +{
> + return __builtin_scalbln (a, b);
> +}
> +
> +double
> +my_scalbn (double a, int b)
> +{
> + return __builtin_scalbn (a, b);
> +}
> +
> +
> +float
> +my_scalblnf (float a, long b)
> +{
> + return __builtin_scalblnf (a, b);
> +}
> +
> +float
> +my_scalbnf (float a, int b)
> +{
> + return __builtin_scalbnf (a, b);
> +}
> +
>
I think scalbln/scalblnf/scalbn/scalbnf these four builtin test function
with the macro __FLT_RADIX__ control.
These functions are tested only if the macro __FLT_RADIX__ has a value of 2.
On Sat, 2022-11-12 at 11:54 +0800, Lulu Cheng wrote:
>
> 在 2022/11/9 下午9:53, Xi Ruoyao 写道:
> > This allows optimizing __builtin_ldexp{,f} and __builtin_scalbn{,f}
> > with
> > -fno-math-errno.
> >
> > IMODE is added because we can't hard code SI for operand 2:
> > fscaleb.d
> > instruction always take the high half of both source registers into
> > account. See my_ldexp_long in the test case.
> >
> > gcc/ChangeLog:
> >
> > * config/loongarch/loongarch.md (UNSPEC_FSCALEB): New
> > unspec.
> > (type): Add fscaleb.
> > (IMODE): New mode attr.
> > (ldexp<mode>3): New instruction template.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.target/loongarch/fscaleb.c: New test.
> > ---
> > gcc/config/loongarch/loongarch.md | 26 ++++++++++-
> > gcc/testsuite/gcc.target/loongarch/fscaleb.c | 48
> > ++++++++++++++++++++
> > 2 files changed, 72 insertions(+), 2 deletions(-)
> > create mode 100644 gcc/testsuite/gcc.target/loongarch/fscaleb.c
> >
> > diff --git a/gcc/config/loongarch/loongarch.md
> > b/gcc/config/loongarch/loongarch.md
> > index eb127c346a3..c141c9adde2 100644
> > --- a/gcc/config/loongarch/loongarch.md
> > +++ b/gcc/config/loongarch/loongarch.md
> > @@ -41,6 +41,7 @@ (define_c_enum "unspec" [
> > UNSPEC_FTINT
> > UNSPEC_FTINTRM
> > UNSPEC_FTINTRP
> > + UNSPEC_FSCALEB
> >
> > ;; Override return address for exception handling.
> > UNSPEC_EH_RETURN
> > @@ -220,6 +221,7 @@ (define_attr "qword_mode" "no,yes"
> > ;; fcmp floating point compare
> > ;; fcopysign floating point copysign
> > ;; fcvt floating point convert
> > +;; fscaleb floating point scale
> > ;; fsqrt floating point square root
> > ;; frsqrt floating point reciprocal square root
> > ;; multi multiword sequence (or user asm statements)
> > @@ -231,8 +233,8 @@ (define_attr "type"
> >
> > "unknown,branch,jump,call,load,fpload,fpidxload,store,fpstore,fpidxs
> > tore,
> > prefetch,prefetchx,condmove,mgtf,mftg,const,arith,logical,
> > shift,slt,signext,clz,trap,imul,idiv,move,
> > -
> > fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,fneg,fcmp,fcopysign,fcvt,fsqrt
> > ,
> > - frsqrt,accext,accmod,multi,atomic,syncloop,nop,ghost"
> > +
> > fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,fneg,fcmp,fcopysign,fcvt,fscal
> > eb,
> > + fsqrt,frsqrt,accext,accmod,multi,atomic,syncloop,nop,ghost"
> > (cond [(eq_attr "jirl" "!unset") (const_string "call")
> > (eq_attr "got" "load") (const_string "load")
> >
> > @@ -418,6 +420,10 @@ (define_mode_attr UNITMODE [(SF "SF") (DF
> > "DF")])
> > ;; the controlling mode.
> > (define_mode_attr HALFMODE [(DF "SI") (DI "SI") (TF "DI")])
> >
> > +;; This attribute gives the integer mode that has the same size of
> > a
> > +;; floating-point mode.
> > +(define_mode_attr IMODE [(SF "SI") (DF "DI")])
> > +
> > ;; This code iterator allows signed and unsigned widening
> > multiplications
> > ;; to use the same template.
> > (define_code_iterator any_extend [sign_extend zero_extend])
> > @@ -1014,7 +1020,23 @@ (define_insn "copysign<mode>3"
> > "fcopysign.<fmt>\t%0,%1,%2"
> > [(set_attr "type" "fcopysign")
> > (set_attr "mode" "<UNITMODE>")])
> > +
> > +;;
> > +;; ....................
> > +;;
> > +;; FLOATING POINT SCALE
> > +;;
> > +;; ....................
> >
> > +(define_insn "ldexp<mode>3"
> > + [(set (match_operand:ANYF 0 "register_operand" "=f")
> > + (unspec:ANYF [(match_operand:ANYF 1 "register_operand"
> > "f")
> > + (match_operand:<IMODE> 2 "register_operand"
> > "f")]
> > + UNSPEC_FSCALEB))]
> > + "TARGET_HARD_FLOAT"
> > + "fscaleb.<fmt>\t%0,%1,%2"
> > + [(set_attr "type" "fscaleb")
> > + (set_attr "mode" "<UNITMODE>")])
> >
> > ;;
> > ;; ...................
> > diff --git a/gcc/testsuite/gcc.target/loongarch/fscaleb.c
> > b/gcc/testsuite/gcc.target/loongarch/fscaleb.c
> > new file mode 100644
> > index 00000000000..f18470fbb8f
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/loongarch/fscaleb.c
> > @@ -0,0 +1,48 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -mabi=lp64d -mdouble-float -fno-math-errno" }
> > */
> > +/* { dg-final { scan-assembler-times "fscaleb\\.s" 3 } } */
> > +/* { dg-final { scan-assembler-times "fscaleb\\.d" 4 } } */
> > +/* { dg-final { scan-assembler-times "slli\\.w" 1 } } */
> > +
> > +double
> > +my_scalbln (double a, long b)
> > +{
> > + return __builtin_scalbln (a, b);
> > +}
> > +
> > +double
> > +my_scalbn (double a, int b)
> > +{
> > + return __builtin_scalbn (a, b);
> > +}
> > +
> > +
> > +float
> > +my_scalblnf (float a, long b)
> > +{
> > + return __builtin_scalblnf (a, b);
> > +}
> > +
> > +float
> > +my_scalbnf (float a, int b)
> > +{
> > + return __builtin_scalbnf (a, b);
> > +}
> > +
> >
> I think scalbln/scalblnf/scalbn/scalbnf these four builtin test
> function
> with the macro __FLT_RADIX__ control.
>
> These functions are tested only if the macro __FLT_RADIX__ has a value
> of 2.
LoongArch does not use RESET_FLOAT_FORMAT on SFmode, so __FLT_RADIX__ is
always 2.
在 2022/11/12 下午12:40, Xi Ruoyao 写道:
> On Sat, 2022-11-12 at 11:54 +0800, Lulu Cheng wrote:
>> 在 2022/11/9 下午9:53, Xi Ruoyao 写道:
>>> This allows optimizing __builtin_ldexp{,f} and __builtin_scalbn{,f}
>>> with
>>> -fno-math-errno.
>>>
>>> IMODE is added because we can't hard code SI for operand 2:
>>> fscaleb.d
>>> instruction always take the high half of both source registers into
>>> account. See my_ldexp_long in the test case.
>>>
>>> gcc/ChangeLog:
>>>
>>> * config/loongarch/loongarch.md (UNSPEC_FSCALEB): New
>>> unspec.
>>> (type): Add fscaleb.
>>> (IMODE): New mode attr.
>>> (ldexp<mode>3): New instruction template.
>>>
>>> gcc/testsuite/ChangeLog:
>>>
>>> * gcc.target/loongarch/fscaleb.c: New test.
>>> ---
>>> gcc/config/loongarch/loongarch.md | 26 ++++++++++-
>>> gcc/testsuite/gcc.target/loongarch/fscaleb.c | 48
>>> ++++++++++++++++++++
>>> 2 files changed, 72 insertions(+), 2 deletions(-)
>>> create mode 100644 gcc/testsuite/gcc.target/loongarch/fscaleb.c
>>>
>>> diff --git a/gcc/config/loongarch/loongarch.md
>>> b/gcc/config/loongarch/loongarch.md
>>> index eb127c346a3..c141c9adde2 100644
>>> --- a/gcc/config/loongarch/loongarch.md
>>> +++ b/gcc/config/loongarch/loongarch.md
>>> @@ -41,6 +41,7 @@ (define_c_enum "unspec" [
>>> UNSPEC_FTINT
>>> UNSPEC_FTINTRM
>>> UNSPEC_FTINTRP
>>> + UNSPEC_FSCALEB
>>>
>>> ;; Override return address for exception handling.
>>> UNSPEC_EH_RETURN
>>> @@ -220,6 +221,7 @@ (define_attr "qword_mode" "no,yes"
>>> ;; fcmp floating point compare
>>> ;; fcopysign floating point copysign
>>> ;; fcvt floating point convert
>>> +;; fscaleb floating point scale
>>> ;; fsqrt floating point square root
>>> ;; frsqrt floating point reciprocal square root
>>> ;; multi multiword sequence (or user asm statements)
>>> @@ -231,8 +233,8 @@ (define_attr "type"
>>>
>>> "unknown,branch,jump,call,load,fpload,fpidxload,store,fpstore,fpidxs
>>> tore,
>>> prefetch,prefetchx,condmove,mgtf,mftg,const,arith,logical,
>>> shift,slt,signext,clz,trap,imul,idiv,move,
>>> -
>>> fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,fneg,fcmp,fcopysign,fcvt,fsqrt
>>> ,
>>> - frsqrt,accext,accmod,multi,atomic,syncloop,nop,ghost"
>>> +
>>> fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,fneg,fcmp,fcopysign,fcvt,fscal
>>> eb,
>>> + fsqrt,frsqrt,accext,accmod,multi,atomic,syncloop,nop,ghost"
>>> (cond [(eq_attr "jirl" "!unset") (const_string "call")
>>> (eq_attr "got" "load") (const_string "load")
>>>
>>> @@ -418,6 +420,10 @@ (define_mode_attr UNITMODE [(SF "SF") (DF
>>> "DF")])
>>> ;; the controlling mode.
>>> (define_mode_attr HALFMODE [(DF "SI") (DI "SI") (TF "DI")])
>>>
>>> +;; This attribute gives the integer mode that has the same size of
>>> a
>>> +;; floating-point mode.
>>> +(define_mode_attr IMODE [(SF "SI") (DF "DI")])
>>> +
>>> ;; This code iterator allows signed and unsigned widening
>>> multiplications
>>> ;; to use the same template.
>>> (define_code_iterator any_extend [sign_extend zero_extend])
>>> @@ -1014,7 +1020,23 @@ (define_insn "copysign<mode>3"
>>> "fcopysign.<fmt>\t%0,%1,%2"
>>> [(set_attr "type" "fcopysign")
>>> (set_attr "mode" "<UNITMODE>")])
>>> +
>>> +;;
>>> +;; ....................
>>> +;;
>>> +;; FLOATING POINT SCALE
>>> +;;
>>> +;; ....................
>>>
>>> +(define_insn "ldexp<mode>3"
>>> + [(set (match_operand:ANYF 0 "register_operand" "=f")
>>> + (unspec:ANYF [(match_operand:ANYF 1 "register_operand"
>>> "f")
>>> + (match_operand:<IMODE> 2 "register_operand"
>>> "f")]
>>> + UNSPEC_FSCALEB))]
>>> + "TARGET_HARD_FLOAT"
>>> + "fscaleb.<fmt>\t%0,%1,%2"
>>> + [(set_attr "type" "fscaleb")
>>> + (set_attr "mode" "<UNITMODE>")])
>>>
>>> ;;
>>> ;; ...................
>>> diff --git a/gcc/testsuite/gcc.target/loongarch/fscaleb.c
>>> b/gcc/testsuite/gcc.target/loongarch/fscaleb.c
>>> new file mode 100644
>>> index 00000000000..f18470fbb8f
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.target/loongarch/fscaleb.c
>>> @@ -0,0 +1,48 @@
>>> +/* { dg-do compile } */
>>> +/* { dg-options "-O2 -mabi=lp64d -mdouble-float -fno-math-errno" }
>>> */
>>> +/* { dg-final { scan-assembler-times "fscaleb\\.s" 3 } } */
>>> +/* { dg-final { scan-assembler-times "fscaleb\\.d" 4 } } */
>>> +/* { dg-final { scan-assembler-times "slli\\.w" 1 } } */
>>> +
>>> +double
>>> +my_scalbln (double a, long b)
>>> +{
>>> + return __builtin_scalbln (a, b);
>>> +}
>>> +
>>> +double
>>> +my_scalbn (double a, int b)
>>> +{
>>> + return __builtin_scalbn (a, b);
>>> +}
>>> +
>>> +
>>> +float
>>> +my_scalblnf (float a, long b)
>>> +{
>>> + return __builtin_scalblnf (a, b);
>>> +}
>>> +
>>> +float
>>> +my_scalbnf (float a, int b)
>>> +{
>>> + return __builtin_scalbnf (a, b);
>>> +}
>>> +
>>>
>> I think scalbln/scalblnf/scalbn/scalbnf these four builtin test
>> function
>> with the macro __FLT_RADIX__ control.
>>
>> These functions are tested only if the macro __FLT_RADIX__ has a value
>> of 2.
> LoongArch does not use RESET_FLOAT_FORMAT on SFmode, so __FLT_RADIX__ is
> always 2.
Ok, I have no more questions
@@ -41,6 +41,7 @@ (define_c_enum "unspec" [
UNSPEC_FTINT
UNSPEC_FTINTRM
UNSPEC_FTINTRP
+ UNSPEC_FSCALEB
;; Override return address for exception handling.
UNSPEC_EH_RETURN
@@ -220,6 +221,7 @@ (define_attr "qword_mode" "no,yes"
;; fcmp floating point compare
;; fcopysign floating point copysign
;; fcvt floating point convert
+;; fscaleb floating point scale
;; fsqrt floating point square root
;; frsqrt floating point reciprocal square root
;; multi multiword sequence (or user asm statements)
@@ -231,8 +233,8 @@ (define_attr "type"
"unknown,branch,jump,call,load,fpload,fpidxload,store,fpstore,fpidxstore,
prefetch,prefetchx,condmove,mgtf,mftg,const,arith,logical,
shift,slt,signext,clz,trap,imul,idiv,move,
- fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,fneg,fcmp,fcopysign,fcvt,fsqrt,
- frsqrt,accext,accmod,multi,atomic,syncloop,nop,ghost"
+ fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,fneg,fcmp,fcopysign,fcvt,fscaleb,
+ fsqrt,frsqrt,accext,accmod,multi,atomic,syncloop,nop,ghost"
(cond [(eq_attr "jirl" "!unset") (const_string "call")
(eq_attr "got" "load") (const_string "load")
@@ -418,6 +420,10 @@ (define_mode_attr UNITMODE [(SF "SF") (DF "DF")])
;; the controlling mode.
(define_mode_attr HALFMODE [(DF "SI") (DI "SI") (TF "DI")])
+;; This attribute gives the integer mode that has the same size of a
+;; floating-point mode.
+(define_mode_attr IMODE [(SF "SI") (DF "DI")])
+
;; This code iterator allows signed and unsigned widening multiplications
;; to use the same template.
(define_code_iterator any_extend [sign_extend zero_extend])
@@ -1014,7 +1020,23 @@ (define_insn "copysign<mode>3"
"fcopysign.<fmt>\t%0,%1,%2"
[(set_attr "type" "fcopysign")
(set_attr "mode" "<UNITMODE>")])
+
+;;
+;; ....................
+;;
+;; FLOATING POINT SCALE
+;;
+;; ....................
+(define_insn "ldexp<mode>3"
+ [(set (match_operand:ANYF 0 "register_operand" "=f")
+ (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")
+ (match_operand:<IMODE> 2 "register_operand" "f")]
+ UNSPEC_FSCALEB))]
+ "TARGET_HARD_FLOAT"
+ "fscaleb.<fmt>\t%0,%1,%2"
+ [(set_attr "type" "fscaleb")
+ (set_attr "mode" "<UNITMODE>")])
;;
;; ...................
new file mode 100644
@@ -0,0 +1,48 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mabi=lp64d -mdouble-float -fno-math-errno" } */
+/* { dg-final { scan-assembler-times "fscaleb\\.s" 3 } } */
+/* { dg-final { scan-assembler-times "fscaleb\\.d" 4 } } */
+/* { dg-final { scan-assembler-times "slli\\.w" 1 } } */
+
+double
+my_scalbln (double a, long b)
+{
+ return __builtin_scalbln (a, b);
+}
+
+double
+my_scalbn (double a, int b)
+{
+ return __builtin_scalbn (a, b);
+}
+
+double
+my_ldexp (double a, int b)
+{
+ return __builtin_ldexp (a, b);
+}
+
+float
+my_scalblnf (float a, long b)
+{
+ return __builtin_scalblnf (a, b);
+}
+
+float
+my_scalbnf (float a, int b)
+{
+ return __builtin_scalbnf (a, b);
+}
+
+float
+my_ldexpf (float a, int b)
+{
+ return __builtin_ldexpf (a, b);
+}
+
+/* b must be sign-extended */
+double
+my_ldexp_long (double a, long b)
+{
+ return __builtin_ldexp (a, b);
+}