[V3] LoongArch: Provide kernel fpu functions

Message ID 20230306095934.609589-1-chenhuacai@loongson.cn
State New
Headers
Series [V3] LoongArch: Provide kernel fpu functions |

Commit Message

Huacai Chen March 6, 2023, 9:59 a.m. UTC
  Provide kernel_fpu_begin()/kernel_fpu_end() to allow the kernel itself
to use fpu. They can be used by some other kernel components, e.g., the
AMDGPU graphic driver for DCN.

Reported-by: WANG Xuerui <kernel@xen0n.name>
Tested-by: WANG Xuerui <kernel@xen0n.name>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
V2: Use non-GPL exports and update commit messages.
V3: Add spaces for coding style.

 arch/loongarch/include/asm/fpu.h |  3 +++
 arch/loongarch/kernel/Makefile   |  2 +-
 arch/loongarch/kernel/kfpu.c     | 41 ++++++++++++++++++++++++++++++++
 3 files changed, 45 insertions(+), 1 deletion(-)
 create mode 100644 arch/loongarch/kernel/kfpu.c
  

Comments

maobibo March 6, 2023, 12:03 p.m. UTC | #1
在 2023/3/6 17:59, Huacai Chen 写道:
> Provide kernel_fpu_begin()/kernel_fpu_end() to allow the kernel itself
> to use fpu. They can be used by some other kernel components, e.g., the
> AMDGPU graphic driver for DCN.
Since kernel is compiled with -msoft-float, I guess hw fpu will not be
used in kernel by present:). However it is deserved to try.
> 
> Reported-by: WANG Xuerui <kernel@xen0n.name>
> Tested-by: WANG Xuerui <kernel@xen0n.name>
> Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
> ---
> V2: Use non-GPL exports and update commit messages.
> V3: Add spaces for coding style.
> 
>  arch/loongarch/include/asm/fpu.h |  3 +++
>  arch/loongarch/kernel/Makefile   |  2 +-
>  arch/loongarch/kernel/kfpu.c     | 41 ++++++++++++++++++++++++++++++++
>  3 files changed, 45 insertions(+), 1 deletion(-)
>  create mode 100644 arch/loongarch/kernel/kfpu.c
> 
> diff --git a/arch/loongarch/include/asm/fpu.h b/arch/loongarch/include/asm/fpu.h
> index 358b254d9c1d..192f8e35d912 100644
> --- a/arch/loongarch/include/asm/fpu.h
> +++ b/arch/loongarch/include/asm/fpu.h
> @@ -21,6 +21,9 @@
>  
>  struct sigcontext;
>  
> +extern void kernel_fpu_begin(void);
> +extern void kernel_fpu_end(void);
> +
>  extern void _init_fpu(unsigned int);
>  extern void _save_fp(struct loongarch_fpu *);
>  extern void _restore_fp(struct loongarch_fpu *);
> diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
> index 78d4e3384305..9a72d91cd104 100644
> --- a/arch/loongarch/kernel/Makefile
> +++ b/arch/loongarch/kernel/Makefile
> @@ -13,7 +13,7 @@ obj-y		+= head.o cpu-probe.o cacheinfo.o env.o setup.o entry.o genex.o \
>  obj-$(CONFIG_ACPI)		+= acpi.o
>  obj-$(CONFIG_EFI) 		+= efi.o
>  
> -obj-$(CONFIG_CPU_HAS_FPU)	+= fpu.o
> +obj-$(CONFIG_CPU_HAS_FPU)	+= fpu.o kfpu.o
>  
>  obj-$(CONFIG_ARCH_STRICT_ALIGN)	+= unaligned.o
>  
> diff --git a/arch/loongarch/kernel/kfpu.c b/arch/loongarch/kernel/kfpu.c
> new file mode 100644
> index 000000000000..cd2a18fecdcc
> --- /dev/null
> +++ b/arch/loongarch/kernel/kfpu.c
> @@ -0,0 +1,41 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (C) 2023 Loongson Technology Corporation Limited
> + */
> +
> +#include <linux/cpu.h>
> +#include <linux/init.h>
> +#include <asm/fpu.h>
> +#include <asm/smp.h>
> +
> +static DEFINE_PER_CPU(bool, in_kernel_fpu);
> +
> +void kernel_fpu_begin(void)
> +{
> +	if (this_cpu_read(in_kernel_fpu))
> +		return;
> +
> +	preempt_disable();
> +	this_cpu_write(in_kernel_fpu, true);
> +
> +	if (!is_fpu_owner())
> +		enable_fpu();
> +	else
> +		_save_fp(&current->thread.fpu);
Do we need initialize fcsr rather than using random fcsr value
of other processes? There may be fpu exception enabled by
other tasks.

Regards
Bibo,mao
> +}
> +EXPORT_SYMBOL(kernel_fpu_begin);
> +
> +void kernel_fpu_end(void)
> +{
> +	if (!this_cpu_read(in_kernel_fpu))
> +		return;
> +
> +	if (!is_fpu_owner())
> +		disable_fpu();
> +	else
> +		_restore_fp(&current->thread.fpu);
> +
> +	this_cpu_write(in_kernel_fpu, false);
> +	preempt_enable();
> +}
> +EXPORT_SYMBOL(kernel_fpu_end);
  
Xi Ruoyao March 6, 2023, 12:09 p.m. UTC | #2
On Mon, 2023-03-06 at 20:03 +0800, maobibo wrote:
> 在 2023/3/6 17:59, Huacai Chen 写道:
> > Provide kernel_fpu_begin()/kernel_fpu_end() to allow the kernel itself
> > to use fpu. They can be used by some other kernel components, e.g., the
> > AMDGPU graphic driver for DCN.
> Since kernel is compiled with -msoft-float, I guess hw fpu will not be
> used in kernel by present:). However it is deserved to try.

See the draft AMD DCN support patch:
https://github.com/loongson/linux/commit/0ee299095c963938a7626c3121a8feef32251301
  
maobibo March 6, 2023, 12:20 p.m. UTC | #3
在 2023/3/6 20:09, Xi Ruoyao 写道:
> On Mon, 2023-03-06 at 20:03 +0800, maobibo wrote:
>> 在 2023/3/6 17:59, Huacai Chen 写道:
>>> Provide kernel_fpu_begin()/kernel_fpu_end() to allow the kernel itself
>>> to use fpu. They can be used by some other kernel components, e.g., the
>>> AMDGPU graphic driver for DCN.
>> Since kernel is compiled with -msoft-float, I guess hw fpu will not be
>> used in kernel by present:). However it is deserved to try.
> 
> See the draft AMD DCN support patch:
> https://github.com/loongson/linux/commit/0ee299095c963938a7626c3121a8feef32251301
Got it, thanks for pointing it out, good job:)

Regards
Bibo,Mao
>
  
WANG Xuerui March 6, 2023, 12:35 p.m. UTC | #4
Hi,

On 2023/3/6 20:03, maobibo wrote:
> 
> 
> 在 2023/3/6 17:59, Huacai Chen 写道:
>> Provide kernel_fpu_begin()/kernel_fpu_end() to allow the kernel itself
>> to use fpu. They can be used by some other kernel components, e.g., the
>> AMDGPU graphic driver for DCN.
> Since kernel is compiled with -msoft-float, I guess hw fpu will not be
> used in kernel by present:). However it is deserved to try.

This has been explained by Ruoyao, but we'd need such support anyway 
when we have LSX/LASX support mainlined in the future and want to 
accelerate various algorithms with those instructions. Maybe at that 
time you'll want to port some of those too ;-)
  
Huacai Chen March 6, 2023, 12:49 p.m. UTC | #5
On Mon, Mar 6, 2023 at 8:03 PM maobibo <maobibo@loongson.cn> wrote:
>
>
>
> 在 2023/3/6 17:59, Huacai Chen 写道:
> > Provide kernel_fpu_begin()/kernel_fpu_end() to allow the kernel itself
> > to use fpu. They can be used by some other kernel components, e.g., the
> > AMDGPU graphic driver for DCN.
> Since kernel is compiled with -msoft-float, I guess hw fpu will not be
> used in kernel by present:). However it is deserved to try.
> >
> > Reported-by: WANG Xuerui <kernel@xen0n.name>
> > Tested-by: WANG Xuerui <kernel@xen0n.name>
> > Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
> > ---
> > V2: Use non-GPL exports and update commit messages.
> > V3: Add spaces for coding style.
> >
> >  arch/loongarch/include/asm/fpu.h |  3 +++
> >  arch/loongarch/kernel/Makefile   |  2 +-
> >  arch/loongarch/kernel/kfpu.c     | 41 ++++++++++++++++++++++++++++++++
> >  3 files changed, 45 insertions(+), 1 deletion(-)
> >  create mode 100644 arch/loongarch/kernel/kfpu.c
> >
> > diff --git a/arch/loongarch/include/asm/fpu.h b/arch/loongarch/include/asm/fpu.h
> > index 358b254d9c1d..192f8e35d912 100644
> > --- a/arch/loongarch/include/asm/fpu.h
> > +++ b/arch/loongarch/include/asm/fpu.h
> > @@ -21,6 +21,9 @@
> >
> >  struct sigcontext;
> >
> > +extern void kernel_fpu_begin(void);
> > +extern void kernel_fpu_end(void);
> > +
> >  extern void _init_fpu(unsigned int);
> >  extern void _save_fp(struct loongarch_fpu *);
> >  extern void _restore_fp(struct loongarch_fpu *);
> > diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
> > index 78d4e3384305..9a72d91cd104 100644
> > --- a/arch/loongarch/kernel/Makefile
> > +++ b/arch/loongarch/kernel/Makefile
> > @@ -13,7 +13,7 @@ obj-y               += head.o cpu-probe.o cacheinfo.o env.o setup.o entry.o genex.o \
> >  obj-$(CONFIG_ACPI)           += acpi.o
> >  obj-$(CONFIG_EFI)            += efi.o
> >
> > -obj-$(CONFIG_CPU_HAS_FPU)    += fpu.o
> > +obj-$(CONFIG_CPU_HAS_FPU)    += fpu.o kfpu.o
> >
> >  obj-$(CONFIG_ARCH_STRICT_ALIGN)      += unaligned.o
> >
> > diff --git a/arch/loongarch/kernel/kfpu.c b/arch/loongarch/kernel/kfpu.c
> > new file mode 100644
> > index 000000000000..cd2a18fecdcc
> > --- /dev/null
> > +++ b/arch/loongarch/kernel/kfpu.c
> > @@ -0,0 +1,41 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +/*
> > + * Copyright (C) 2023 Loongson Technology Corporation Limited
> > + */
> > +
> > +#include <linux/cpu.h>
> > +#include <linux/init.h>
> > +#include <asm/fpu.h>
> > +#include <asm/smp.h>
> > +
> > +static DEFINE_PER_CPU(bool, in_kernel_fpu);
> > +
> > +void kernel_fpu_begin(void)
> > +{
> > +     if (this_cpu_read(in_kernel_fpu))
> > +             return;
> > +
> > +     preempt_disable();
> > +     this_cpu_write(in_kernel_fpu, true);
> > +
> > +     if (!is_fpu_owner())
> > +             enable_fpu();
> > +     else
> > +             _save_fp(&current->thread.fpu);
> Do we need initialize fcsr rather than using random fcsr value
> of other processes? There may be fpu exception enabled by
> other tasks.
Emm, I think initialize fcsr to 0 is better here.

Huacai
>
> Regards
> Bibo,mao
> > +}
> > +EXPORT_SYMBOL(kernel_fpu_begin);
> > +
> > +void kernel_fpu_end(void)
> > +{
> > +     if (!this_cpu_read(in_kernel_fpu))
> > +             return;
> > +
> > +     if (!is_fpu_owner())
> > +             disable_fpu();
> > +     else
> > +             _restore_fp(&current->thread.fpu);
> > +
> > +     this_cpu_write(in_kernel_fpu, false);
> > +     preempt_enable();
> > +}
> > +EXPORT_SYMBOL(kernel_fpu_end);
>
>
  
Xi Ruoyao March 6, 2023, 1 p.m. UTC | #6
On Mon, 2023-03-06 at 20:49 +0800, Huacai Chen wrote:
> > > +     if (!is_fpu_owner())
> > > +             enable_fpu();
> > > +     else
> > > +             _save_fp(&current->thread.fpu);
> > Do we need initialize fcsr rather than using random fcsr value
> > of other processes? There may be fpu exception enabled by
> > other tasks.
> Emm, I think initialize fcsr to 0 is better here.

I guess it's necessary: if we use a "dirty" FSCR0 with some exceptions
enabled (esp. inaccurate exception which is expected as disabled by most
developers), we may end up oops with a kernel FPE...
  
David Laight March 6, 2023, 4:57 p.m. UTC | #7
From: Huacai Chen
> Sent: 06 March 2023 10:00
> 
> Provide kernel_fpu_begin()/kernel_fpu_end() to allow the kernel itself
> to use fpu. They can be used by some other kernel components, e.g., the
> AMDGPU graphic driver for DCN.
> 
...
> +void kernel_fpu_end(void)
> +{
> +	if (!this_cpu_read(in_kernel_fpu))
> +		return;

Shouldn't it be better for that to be a counter?
Not sure what anyone else does.

> +
> +	if (!is_fpu_owner())
> +		disable_fpu();
> +	else
> +		_restore_fp(&current->thread.fpu);

Does that actual do the restore?
You really don't need to do it until 'return to user'.
That will speed up a subsequent kernel_fpu_begin().

If fact, couldn't kernel_fpu_end() just be preemt_enable()?

	David

> +
> +	this_cpu_write(in_kernel_fpu, false);
> +	preempt_enable();
> +}
> +EXPORT_SYMBOL(kernel_fpu_end);
> --
> 2.39.1

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
  
David Laight March 6, 2023, 5:16 p.m. UTC | #8
From: Huacai Chen
> Sent: 06 March 2023 10:00
> 
> Provide kernel_fpu_begin()/kernel_fpu_end() to allow the kernel itself
> to use fpu. They can be used by some other kernel components, e.g., the
> AMDGPU graphic driver for DCN.
> 
...
> diff --git a/arch/loongarch/kernel/kfpu.c b/arch/loongarch/kernel/kfpu.c
> new file mode 100644
> index 000000000000..cd2a18fecdcc
> --- /dev/null
> +++ b/arch/loongarch/kernel/kfpu.c
> @@ -0,0 +1,41 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (C) 2023 Loongson Technology Corporation Limited
> + */
> +
> +#include <linux/cpu.h>
> +#include <linux/init.h>
> +#include <asm/fpu.h>
> +#include <asm/smp.h>
> +
> +static DEFINE_PER_CPU(bool, in_kernel_fpu);
> +
> +void kernel_fpu_begin(void)
> +{
> +	if (this_cpu_read(in_kernel_fpu))
> +		return;

Isn't this check entirely broken?
It absolutely needs to be inside the preempt_disable().
If there are nested requests then fpu use is disabled by the first
kernel_fpu_end() call.

> +
> +	preempt_disable();
> +	this_cpu_write(in_kernel_fpu, true);
> +
> +	if (!is_fpu_owner())
> +		enable_fpu();
> +	else
> +		_save_fp(&current->thread.fpu);
> +}

More interestingly, unless the kernel is doing the kind of
'lazy fpu switch' that x86 used to do (not sure it still does in Linux)
where the fpu registers can contain values for a different process
isn't it actually enough for kernel_fpu_begin() to just be:

	preempt_disable();
	if (current->fpu_regs_live)
		__save_fp(current);
	preempt_enable();

and for kernel_fpu_end() to basically be a nop.

Then rely on the 'return to user' path to pick up the
live fpu registers from the save area.

After all, you pretty much don't want to load the fpu regs
every time a process wakes up and goes back to sleep without
returning to user.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
  
Huacai Chen March 7, 2023, 1:44 a.m. UTC | #9
Hi, David,

On Tue, Mar 7, 2023 at 1:17 AM David Laight <David.Laight@aculab.com> wrote:
>
> From: Huacai Chen
> > Sent: 06 March 2023 10:00
> >
> > Provide kernel_fpu_begin()/kernel_fpu_end() to allow the kernel itself
> > to use fpu. They can be used by some other kernel components, e.g., the
> > AMDGPU graphic driver for DCN.
> >
> ...
> > diff --git a/arch/loongarch/kernel/kfpu.c b/arch/loongarch/kernel/kfpu.c
> > new file mode 100644
> > index 000000000000..cd2a18fecdcc
> > --- /dev/null
> > +++ b/arch/loongarch/kernel/kfpu.c
> > @@ -0,0 +1,41 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +/*
> > + * Copyright (C) 2023 Loongson Technology Corporation Limited
> > + */
> > +
> > +#include <linux/cpu.h>
> > +#include <linux/init.h>
> > +#include <asm/fpu.h>
> > +#include <asm/smp.h>
> > +
> > +static DEFINE_PER_CPU(bool, in_kernel_fpu);
> > +
> > +void kernel_fpu_begin(void)
> > +{
> > +     if (this_cpu_read(in_kernel_fpu))
> > +             return;
>
> Isn't this check entirely broken?
> It absolutely needs to be inside the preempt_disable().
Yes, you are right, this check should be after preempt_disable().

> If there are nested requests then fpu use is disabled by the first
> kernel_fpu_end() call.
This check should be changed to WARN_ON() as x86 does since nested
requests are unexpected use cases.

>
> > +
> > +     preempt_disable();
> > +     this_cpu_write(in_kernel_fpu, true);
> > +
> > +     if (!is_fpu_owner())
> > +             enable_fpu();
> > +     else
> > +             _save_fp(&current->thread.fpu);
> > +}
>
> More interestingly, unless the kernel is doing the kind of
> 'lazy fpu switch' that x86 used to do (not sure it still does in Linux)
> where the fpu registers can contain values for a different process
> isn't it actually enough for kernel_fpu_begin() to just be:
>
>         preempt_disable();
>         if (current->fpu_regs_live)
I think this condition is the same as is_fpu_owner(). Moreover,
LoongArch doesn't allow fpu-disabled exception occured in kernel, so
we should make sure fpu is enabled at kernel_fpu_begin().

>                 __save_fp(current);
>         preempt_enable();
>
> and for kernel_fpu_end() to basically be a nop.
>
> Then rely on the 'return to user' path to pick up the
> live fpu registers from the save area.
>
> After all, you pretty much don't want to load the fpu regs
> every time a process wakes up and goes back to sleep without
> returning to user.
I think this is not a common case, so it isn't worthy to modify many
files to optimize for now.

Huacai
>
>         David
>
> -
> Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
> Registration No: 1397386 (Wales)
>
  

Patch

diff --git a/arch/loongarch/include/asm/fpu.h b/arch/loongarch/include/asm/fpu.h
index 358b254d9c1d..192f8e35d912 100644
--- a/arch/loongarch/include/asm/fpu.h
+++ b/arch/loongarch/include/asm/fpu.h
@@ -21,6 +21,9 @@ 
 
 struct sigcontext;
 
+extern void kernel_fpu_begin(void);
+extern void kernel_fpu_end(void);
+
 extern void _init_fpu(unsigned int);
 extern void _save_fp(struct loongarch_fpu *);
 extern void _restore_fp(struct loongarch_fpu *);
diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
index 78d4e3384305..9a72d91cd104 100644
--- a/arch/loongarch/kernel/Makefile
+++ b/arch/loongarch/kernel/Makefile
@@ -13,7 +13,7 @@  obj-y		+= head.o cpu-probe.o cacheinfo.o env.o setup.o entry.o genex.o \
 obj-$(CONFIG_ACPI)		+= acpi.o
 obj-$(CONFIG_EFI) 		+= efi.o
 
-obj-$(CONFIG_CPU_HAS_FPU)	+= fpu.o
+obj-$(CONFIG_CPU_HAS_FPU)	+= fpu.o kfpu.o
 
 obj-$(CONFIG_ARCH_STRICT_ALIGN)	+= unaligned.o
 
diff --git a/arch/loongarch/kernel/kfpu.c b/arch/loongarch/kernel/kfpu.c
new file mode 100644
index 000000000000..cd2a18fecdcc
--- /dev/null
+++ b/arch/loongarch/kernel/kfpu.c
@@ -0,0 +1,41 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023 Loongson Technology Corporation Limited
+ */
+
+#include <linux/cpu.h>
+#include <linux/init.h>
+#include <asm/fpu.h>
+#include <asm/smp.h>
+
+static DEFINE_PER_CPU(bool, in_kernel_fpu);
+
+void kernel_fpu_begin(void)
+{
+	if (this_cpu_read(in_kernel_fpu))
+		return;
+
+	preempt_disable();
+	this_cpu_write(in_kernel_fpu, true);
+
+	if (!is_fpu_owner())
+		enable_fpu();
+	else
+		_save_fp(&current->thread.fpu);
+}
+EXPORT_SYMBOL(kernel_fpu_begin);
+
+void kernel_fpu_end(void)
+{
+	if (!this_cpu_read(in_kernel_fpu))
+		return;
+
+	if (!is_fpu_owner())
+		disable_fpu();
+	else
+		_restore_fp(&current->thread.fpu);
+
+	this_cpu_write(in_kernel_fpu, false);
+	preempt_enable();
+}
+EXPORT_SYMBOL(kernel_fpu_end);