LoongArch: Add unaligned access support

Message ID 20221016133418.2122777-1-chenhuacai@loongson.cn
State New
Headers
Series LoongArch: Add unaligned access support |

Commit Message

Huacai Chen Oct. 16, 2022, 1:34 p.m. UTC
  Loongson-2 series (Loongson-2K500, Loongson-2K1000) don't support
unaligned access in hardware, while Loongson-3 series (Loongson-3A5000,
Loongson-3C5000) are configurable whether support unaligned access in
hardware. This patch add unaligned access emulation for those LoongArch
processors without hardware support.

Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 arch/loongarch/Kconfig            |   2 +
 arch/loongarch/include/asm/inst.h |  14 ++
 arch/loongarch/kernel/Makefile    |   3 +-
 arch/loongarch/kernel/traps.c     |  27 ++
 arch/loongarch/kernel/unaligned.c | 401 ++++++++++++++++++++++++++++++
 arch/loongarch/lib/Makefile       |   2 +-
 arch/loongarch/lib/unaligned.S    |  93 +++++++
 7 files changed, 540 insertions(+), 2 deletions(-)
 create mode 100644 arch/loongarch/kernel/unaligned.c
 create mode 100644 arch/loongarch/lib/unaligned.S
  

Comments

Xi Ruoyao Oct. 16, 2022, 3:04 p.m. UTC | #1
On Sun, 2022-10-16 at 21:34 +0800, Huacai Chen wrote:

> Loongson-2 series (Loongson-2K500, Loongson-2K1000)

"2K1000LA"? "2K1000" is puzzling because of a name conflict with the
MIPS-based model.

/* snip */

> +static inline unsigned long read_fpr(unsigned int fd)
> +{
> +#define READ_FPR(fd, __value)          \
> +{                                      \

Unnecessary curly brace pair.

> +       __asm__ __volatile__(           \
> +       "movfr2gr.d\t%0, $f%1\n\t"      \
> +       : "=r"(__value) : "i"(fd));     \
> +}

I'm not sure if this is a correct use of "i" constraint.  Maybe we
should just concatenate the string?

"movfr2gr.d\t%0, $f" #fd "\n\t"

> +
> +       unsigned long __value;
> +
> +       switch (fd) {

I don't like this "very long" switch statement, but it seems we have no
way to make it better...

> +       case 0:
> +               READ_FPR(0, __value);
> +               break;
> +       case 1:
> +               READ_FPR(1, __value);
> +               break;
> +       case 2:
> +               READ_FPR(2, __value);
> +               break;
  
Huacai Chen Oct. 17, 2022, 12:16 a.m. UTC | #2
Hi, Ruoyao,

On Sun, Oct 16, 2022 at 11:05 PM Xi Ruoyao <xry111@xry111.site> wrote:
>
> On Sun, 2022-10-16 at 21:34 +0800, Huacai Chen wrote:
>
> > Loongson-2 series (Loongson-2K500, Loongson-2K1000)
>
> "2K1000LA"? "2K1000" is puzzling because of a name conflict with the
> MIPS-based model.
Technically this is correct, both MIPS-based and LoongArch-based
Loongson-2K1000 have no hardware support.

>
> /* snip */
>
> > +static inline unsigned long read_fpr(unsigned int fd)
> > +{
> > +#define READ_FPR(fd, __value)          \
> > +{                                      \
>
> Unnecessary curly brace pair.
OK,thanks.

>
> > +       __asm__ __volatile__(           \
> > +       "movfr2gr.d\t%0, $f%1\n\t"      \
> > +       : "=r"(__value) : "i"(fd));     \
> > +}
>
> I'm not sure if this is a correct use of "i" constraint.  Maybe we
> should just concatenate the string?
OK, thanks.

Huacai
>
> "movfr2gr.d\t%0, $f" #fd "\n\t"
>
> > +
> > +       unsigned long __value;
> > +
> > +       switch (fd) {
>
> I don't like this "very long" switch statement, but it seems we have no
> way to make it better...
>
> > +       case 0:
> > +               READ_FPR(0, __value);
> > +               break;
> > +       case 1:
> > +               READ_FPR(1, __value);
> > +               break;
> > +       case 2:
> > +               READ_FPR(2, __value);
> > +               break;
>
> --
> Xi Ruoyao <xry111@xry111.site>
> School of Aerospace Science and Technology, Xidian University
  
WANG Xuerui Oct. 17, 2022, 5:49 a.m. UTC | #3
On 2022/10/17 08:16, Huacai Chen wrote:
> Hi, Ruoyao,
> 
> On Sun, Oct 16, 2022 at 11:05 PM Xi Ruoyao <xry111@xry111.site> wrote:
>>
>> On Sun, 2022-10-16 at 21:34 +0800, Huacai Chen wrote:
>>
>>> Loongson-2 series (Loongson-2K500, Loongson-2K1000)
>>
>> "2K1000LA"? "2K1000" is puzzling because of a name conflict with the
>> MIPS-based model.
> Technically this is correct, both MIPS-based and LoongArch-based
> Loongson-2K1000 have no hardware support.

Unfortunately, no, there is no "LoongArch-based" 2K1000. The Loongson 
2K1000 is a MIPS processor, and will always be; the official model name 
for the "LoongArch-based 2K1000" is 2K1000LA which is obviously distinct.
  
Arnd Bergmann Oct. 17, 2022, 7:12 a.m. UTC | #4
On Sun, Oct 16, 2022, at 3:34 PM, Huacai Chen wrote:
> Loongson-2 series (Loongson-2K500, Loongson-2K1000) don't support
> unaligned access in hardware, while Loongson-3 series (Loongson-3A5000,
> Loongson-3C5000) are configurable whether support unaligned access in
> hardware. This patch add unaligned access emulation for those LoongArch
> processors without hardware support.
>
> Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>

What does the Loongarch ELF ABI say about this? On most architectures,
C compilers are not allowed to produce unaligned accesses for standard
compliant source code, the only way you'd get this is when casting
a an unaligned (e.g. char*) pointer to another type with higher alignment
requirement.

> +/* sysctl hooks */
> +int unaligned_enabled __read_mostly = 1;	/* Enabled by default */
> +int no_unaligned_warning __read_mostly = 1;	/* Only 1 warning by default */

The comment says 'sysctl', the implementation has a debugfs interface.

> +#ifdef CONFIG_DEBUG_FS
> +static int __init debugfs_unaligned(void)
> +{
> +	struct dentry *d;
> +
> +	d = debugfs_create_dir("loongarch", NULL);
> +	if (!d)
> +		return -ENOMEM;
> +
> +	debugfs_create_u32("unaligned_instructions_user",
> +				S_IRUGO, d, &unaligned_instructions_user);
> +	debugfs_create_u32("unaligned_instructions_kernel",
> +				S_IRUGO, d, &unaligned_instructions_kernel);
> +
> +	return 0;
> +}
> +arch_initcall(debugfs_unaligned);
> +#endif

The debugfs interface does not sound like a good way to do this.
Overall, my feeling is that for a new architecture we should not
introduce this at all but instead provide a way to diagnose and
fix user space, since we do not have to keep compatibility with
broken binaries that worked in the past.

If the ELF ABI actually allows compilers to produce unaligned
accesses for correct code, there should at least be a more generic
way of enabling this that follows what other architectures do.
We are already somewhat inconsistent there between architectures,
but I don't think anything else uses debugfs here.

     Arnd
  
Huacai Chen Oct. 17, 2022, 7:31 a.m. UTC | #5
Hi, Arnd,

On Mon, Oct 17, 2022 at 3:12 PM Arnd Bergmann <arnd@arndb.de> wrote:
>
> On Sun, Oct 16, 2022, at 3:34 PM, Huacai Chen wrote:
> > Loongson-2 series (Loongson-2K500, Loongson-2K1000) don't support
> > unaligned access in hardware, while Loongson-3 series (Loongson-3A5000,
> > Loongson-3C5000) are configurable whether support unaligned access in
> > hardware. This patch add unaligned access emulation for those LoongArch
> > processors without hardware support.
> >
> > Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
>
> What does the Loongarch ELF ABI say about this? On most architectures,
> C compilers are not allowed to produce unaligned accesses for standard
> compliant source code, the only way you'd get this is when casting
> a an unaligned (e.g. char*) pointer to another type with higher alignment
> requirement.
Some unaligned accesses are observed from the kernel network stack, it
seems related to whether the packet aligns to IP header or MAC header.
And, gcc has a -mstrict-align parameter, if without this, there are
unaligned instructions.

>
> > +/* sysctl hooks */
> > +int unaligned_enabled __read_mostly = 1;     /* Enabled by default */
> > +int no_unaligned_warning __read_mostly = 1;  /* Only 1 warning by default */
>
> The comment says 'sysctl', the implementation has a debugfs interface.
Originally "enabled", "warning" and "counters" are all debugfs
interfaces, then you told me to use sysctl. Now in this version
"enabled" and "warning" are converted to sysctl, but there are no
existing "counters" sysctl.

Huacai

>
> > +#ifdef CONFIG_DEBUG_FS
> > +static int __init debugfs_unaligned(void)
> > +{
> > +     struct dentry *d;
> > +
> > +     d = debugfs_create_dir("loongarch", NULL);
> > +     if (!d)
> > +             return -ENOMEM;
> > +
> > +     debugfs_create_u32("unaligned_instructions_user",
> > +                             S_IRUGO, d, &unaligned_instructions_user);
> > +     debugfs_create_u32("unaligned_instructions_kernel",
> > +                             S_IRUGO, d, &unaligned_instructions_kernel);
> > +
> > +     return 0;
> > +}
> > +arch_initcall(debugfs_unaligned);
> > +#endif
>
> The debugfs interface does not sound like a good way to do this.
> Overall, my feeling is that for a new architecture we should not
> introduce this at all but instead provide a way to diagnose and
> fix user space, since we do not have to keep compatibility with
> broken binaries that worked in the past.
>
> If the ELF ABI actually allows compilers to produce unaligned
> accesses for correct code, there should at least be a more generic
> way of enabling this that follows what other architectures do.
> We are already somewhat inconsistent there between architectures,
> but I don't think anything else uses debugfs here.
>
>      Arnd
  
Arnd Bergmann Oct. 17, 2022, 7:38 a.m. UTC | #6
On Mon, Oct 17, 2022, at 9:31 AM, Huacai Chen wrote:
> Hi, Arnd,
>
> On Mon, Oct 17, 2022 at 3:12 PM Arnd Bergmann <arnd@arndb.de> wrote:
>>
>> On Sun, Oct 16, 2022, at 3:34 PM, Huacai Chen wrote:
>> > Loongson-2 series (Loongson-2K500, Loongson-2K1000) don't support
>> > unaligned access in hardware, while Loongson-3 series (Loongson-3A5000,
>> > Loongson-3C5000) are configurable whether support unaligned access in
>> > hardware. This patch add unaligned access emulation for those LoongArch
>> > processors without hardware support.
>> >
>> > Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
>>
>> What does the Loongarch ELF ABI say about this? On most architectures,
>> C compilers are not allowed to produce unaligned accesses for standard
>> compliant source code, the only way you'd get this is when casting
>> a an unaligned (e.g. char*) pointer to another type with higher alignment
>> requirement.
> Some unaligned accesses are observed from the kernel network stack, it
> seems related to whether the packet aligns to IP header or MAC header.

This is usually a bug in the device driver. It's a fairly common bug
since the network driver has to ensure the alignment is correct, but
it's usually fixable, and fixing it results in better performance on
machines that support unaligned access as well.

Which driver did you observe this with?

> And, gcc has a -mstrict-align parameter, if without this, there are
> unaligned instructions.

Does this default to strict or non-strict mode? Usually gcc does not
allow to turn this off on architectures that have no hardware support
for unaligned access.

>> > +/* sysctl hooks */
>> > +int unaligned_enabled __read_mostly = 1;     /* Enabled by default */
>> > +int no_unaligned_warning __read_mostly = 1;  /* Only 1 warning by default */
>>
>> The comment says 'sysctl', the implementation has a debugfs interface.
> Originally "enabled", "warning" and "counters" are all debugfs
> interfaces, then you told me to use sysctl. Now in this version
> "enabled" and "warning" are converted to sysctl, but there are no
> existing "counters" sysctl.

I don't see the sysctl interface in the patch, what am I missing?

      Arnd
  
WANG Xuerui Oct. 17, 2022, 8:05 a.m. UTC | #7
Hi,

Just my 2c...

On 2022/10/17 15:38, Arnd Bergmann wrote:
> On Mon, Oct 17, 2022, at 9:31 AM, Huacai Chen wrote:
>> Hi, Arnd,
>>
>> On Mon, Oct 17, 2022 at 3:12 PM Arnd Bergmann <arnd@arndb.de> wrote:
>>>
>>> On Sun, Oct 16, 2022, at 3:34 PM, Huacai Chen wrote:
>>>> Loongson-2 series (Loongson-2K500, Loongson-2K1000) don't support
>>>> unaligned access in hardware, while Loongson-3 series (Loongson-3A5000,
>>>> Loongson-3C5000) are configurable whether support unaligned access in
>>>> hardware. This patch add unaligned access emulation for those LoongArch
>>>> processors without hardware support.
>>>>
>>>> Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
>>>
>>> What does the Loongarch ELF ABI say about this? On most architectures,
>>> C compilers are not allowed to produce unaligned accesses for standard
>>> compliant source code, the only way you'd get this is when casting
>>> a an unaligned (e.g. char*) pointer to another type with higher alignment
>>> requirement.
>> Some unaligned accesses are observed from the kernel network stack, it
>> seems related to whether the packet aligns to IP header or MAC header.
> 
> This is usually a bug in the device driver. It's a fairly common bug
> since the network driver has to ensure the alignment is correct, but
> it's usually fixable, and fixing it results in better performance on
> machines that support unaligned access as well.
> 
> Which driver did you observe this with?

I agree with Arnd that it's probably better to fix the drivers. Having 
the debug feature would help, but in the end it's still the drivers that 
should get the fix. For example I have previously fixed one such 
unaligned access in iwlwifi when I was tinkering with a Loongson 3A4000, 
it was pretty easy to spot with the right perf tools.

> 
>> And, gcc has a -mstrict-align parameter, if without this, there are
>> unaligned instructions.
> 
> Does this default to strict or non-strict mode? Usually gcc does not
> allow to turn this off on architectures that have no hardware support
> for unaligned access.

The LoongArch gcc behavior is tunable via the "-m[no-]strict-align" 
command-line flag, and I believe gcc defaults to producing the 
"non-strict" code, most likely because the most popular LoongArch model 
(the 3A5000) supports efficient unaligned accesses. Also there's always 
the possibility that code compiled for and tested on e.g. 3A5000 will 
get run on the less capable models, so it's arguably desirable to not 
let those just fail.

Yes it's vendors' responsibility to actually test their code/solution 
and observe the failure early, but things happen and I'm actually not 
sure if not doing the emulation will benefit the users at this point...

> 
>>>> +/* sysctl hooks */
>>>> +int unaligned_enabled __read_mostly = 1;     /* Enabled by default */
>>>> +int no_unaligned_warning __read_mostly = 1;  /* Only 1 warning by default */
>>>
>>> The comment says 'sysctl', the implementation has a debugfs interface.
>> Originally "enabled", "warning" and "counters" are all debugfs
>> interfaces, then you told me to use sysctl. Now in this version
>> "enabled" and "warning" are converted to sysctl, but there are no
>> existing "counters" sysctl.
> 
> I don't see the sysctl interface in the patch, what am I missing?

FYI they are chosen by the Kconfig options and live in kernel/sysctl.c. 
And I believe the debugfs interface (the counters) is inspired by the 
original mips code. Pretty niche use case but can be handy at times...
  
Xi Ruoyao Oct. 17, 2022, 8:13 a.m. UTC | #8
On Mon, 2022-10-17 at 09:38 +0200, Arnd Bergmann wrote:

> > Some unaligned accesses are observed from the kernel network stack, it
> > seems related to whether the packet aligns to IP header or MAC header.
> 
> This is usually a bug in the device driver. It's a fairly common bug
> since the network driver has to ensure the alignment is correct, but
> it's usually fixable, and fixing it results in better performance on
> machines that support unaligned access as well.

Or, maybe a GCC bug is causing -mstrict-align not implemented correctly.

> > And, gcc has a -mstrict-align parameter, if without this, there are
> > unaligned instructions.
> 
> Does this default to strict or non-strict mode? Usually gcc does not
> allow to turn this off on architectures that have no hardware support
> for unaligned access.

On LoongArch the unaligned access support is optional.  An
implementation is allowed to implement it or not.  The software can
determine if it's supported by a CPUCFG instruction.

I think -march=la264 will turn off strict align, but it's not added into
GCC yet.

The GCC default is -mno-strict-align.  I expressed my concern about this
decision when I reviewed the GCC port, but at last they just kept the
decision.  But the kernel already sets -mstrict-align in CFLAGS anyway.
  
Arnd Bergmann Oct. 17, 2022, 8:22 a.m. UTC | #9
On Mon, Oct 17, 2022, at 10:05 AM, WANG Xuerui wrote:
> On 2022/10/17 15:38, Arnd Bergmann wrote:
>> On Mon, Oct 17, 2022, at 9:31 AM, Huacai Chen wrote:
>>>>> +/* sysctl hooks */
>>>>> +int unaligned_enabled __read_mostly = 1;     /* Enabled by default */
>>>>> +int no_unaligned_warning __read_mostly = 1;  /* Only 1 warning by default */
>>>>
>>>> The comment says 'sysctl', the implementation has a debugfs interface.
>>> Originally "enabled", "warning" and "counters" are all debugfs
>>> interfaces, then you told me to use sysctl. Now in this version
>>> "enabled" and "warning" are converted to sysctl, but there are no
>>> existing "counters" sysctl.
>> 
>> I don't see the sysctl interface in the patch, what am I missing?
>
> FYI they are chosen by the Kconfig options and live in kernel/sysctl.c. 

Got it, that's what I was looking for, I had completely forgotten
about how we got here.

> And I believe the debugfs interface (the counters) is inspired by the 
> original mips code. Pretty niche use case but can be handy at times...

Right, I see what it does now, and I agree that this is not a problem.
A tracepoint is probably an even better way to handle this flexibly,
but since it's not a stable interface either way, this can be optimized
later on.

       Arnd
  

Patch

diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 0a6ef613124c..a8dc58e8162a 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -122,6 +122,8 @@  config LOONGARCH
 	select RTC_LIB
 	select SMP
 	select SPARSE_IRQ
+	select SYSCTL_ARCH_UNALIGN_ALLOW
+	select SYSCTL_ARCH_UNALIGN_NO_WARN
 	select SYSCTL_EXCEPTION_TRACE
 	select SWIOTLB
 	select TRACE_IRQFLAGS_SUPPORT
diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h
index fce1843ceebb..e96b5345f389 100644
--- a/arch/loongarch/include/asm/inst.h
+++ b/arch/loongarch/include/asm/inst.h
@@ -76,6 +76,10 @@  enum reg2i12_op {
 	ldbu_op		= 0xa8,
 	ldhu_op		= 0xa9,
 	ldwu_op		= 0xaa,
+	flds_op		= 0xac,
+	fsts_op		= 0xad,
+	fldd_op		= 0xae,
+	fstd_op		= 0xaf,
 };
 
 enum reg2i14_op {
@@ -146,6 +150,10 @@  enum reg3_op {
 	ldxbu_op	= 0x7040,
 	ldxhu_op	= 0x7048,
 	ldxwu_op	= 0x7050,
+	fldxs_op	= 0x7060,
+	fldxd_op	= 0x7068,
+	fstxs_op	= 0x7070,
+	fstxd_op	= 0x7078,
 	amswapw_op	= 0x70c0,
 	amswapd_op	= 0x70c1,
 	amaddw_op	= 0x70c2,
@@ -566,4 +574,10 @@  static inline void emit_##NAME(union loongarch_instruction *insn,	\
 
 DEF_EMIT_REG3SA2_FORMAT(alsld, alsld_op)
 
+struct pt_regs;
+
+unsigned long unaligned_read(void *addr, void *value, unsigned long n, bool sign);
+unsigned long unaligned_write(void *addr, unsigned long value, unsigned long n);
+void emulate_load_store_insn(struct pt_regs *regs, void __user *addr, unsigned int *pc);
+
 #endif /* _ASM_INST_H */
diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
index 42be564278fa..2ad2555b53ea 100644
--- a/arch/loongarch/kernel/Makefile
+++ b/arch/loongarch/kernel/Makefile
@@ -7,7 +7,8 @@  extra-y		:= vmlinux.lds
 
 obj-y		+= head.o cpu-probe.o cacheinfo.o env.o setup.o entry.o genex.o \
 		   traps.o irq.o idle.o process.o dma.o mem.o io.o reset.o switch.o \
-		   elf.o syscall.o signal.o time.o topology.o inst.o ptrace.o vdso.o
+		   elf.o syscall.o signal.o time.o topology.o inst.o ptrace.o vdso.o \
+		   unaligned.o
 
 obj-$(CONFIG_ACPI)		+= acpi.o
 obj-$(CONFIG_EFI) 		+= efi.o
diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
index 1a4dce84ebc6..7ea62faeeadb 100644
--- a/arch/loongarch/kernel/traps.c
+++ b/arch/loongarch/kernel/traps.c
@@ -368,13 +368,40 @@  asmlinkage void noinstr do_ade(struct pt_regs *regs)
 	irqentry_exit(regs, state);
 }
 
+/* sysctl hooks */
+int unaligned_enabled __read_mostly = 1;	/* Enabled by default */
+int no_unaligned_warning __read_mostly = 1;	/* Only 1 warning by default */
+
 asmlinkage void noinstr do_ale(struct pt_regs *regs)
 {
+	unsigned int *pc;
 	irqentry_state_t state = irqentry_enter(regs);
 
+	perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, regs->csr_badvaddr);
+
+	/*
+	 * Did we catch a fault trying to load an instruction?
+	 */
+	if (regs->csr_badvaddr == regs->csr_era)
+		goto sigbus;
+	if (user_mode(regs) && !test_thread_flag(TIF_FIXADE))
+		goto sigbus;
+	if (!unaligned_enabled)
+		goto sigbus;
+	if (!no_unaligned_warning)
+		show_registers(regs);
+
+	pc = (unsigned int *)exception_era(regs);
+
+	emulate_load_store_insn(regs, (void __user *)regs->csr_badvaddr, pc);
+
+	goto out;
+
+sigbus:
 	die_if_kernel("Kernel ale access", regs);
 	force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)regs->csr_badvaddr);
 
+out:
 	irqentry_exit(regs, state);
 }
 
diff --git a/arch/loongarch/kernel/unaligned.c b/arch/loongarch/kernel/unaligned.c
new file mode 100644
index 000000000000..5aeb36d8a8db
--- /dev/null
+++ b/arch/loongarch/kernel/unaligned.c
@@ -0,0 +1,401 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Handle unaligned accesses by emulation.
+ *
+ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
+ *
+ */
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/signal.h>
+#include <linux/debugfs.h>
+#include <linux/perf_event.h>
+
+#include <asm/asm.h>
+#include <asm/branch.h>
+#include <asm/fpu.h>
+#include <asm/inst.h>
+
+#include "access-helper.h"
+
+#ifdef CONFIG_DEBUG_FS
+static u32 unaligned_instructions_user;
+static u32 unaligned_instructions_kernel;
+#endif
+
+static inline unsigned long read_fpr(unsigned int fd)
+{
+#define READ_FPR(fd, __value)		\
+{					\
+	__asm__ __volatile__(		\
+	"movfr2gr.d\t%0, $f%1\n\t"	\
+	: "=r"(__value) : "i"(fd));	\
+}
+
+	unsigned long __value;
+
+	switch (fd) {
+	case 0:
+		READ_FPR(0, __value);
+		break;
+	case 1:
+		READ_FPR(1, __value);
+		break;
+	case 2:
+		READ_FPR(2, __value);
+		break;
+	case 3:
+		READ_FPR(3, __value);
+		break;
+	case 4:
+		READ_FPR(4, __value);
+		break;
+	case 5:
+		READ_FPR(5, __value);
+		break;
+	case 6:
+		READ_FPR(6, __value);
+		break;
+	case 7:
+		READ_FPR(7, __value);
+		break;
+	case 8:
+		READ_FPR(8, __value);
+		break;
+	case 9:
+		READ_FPR(9, __value);
+		break;
+	case 10:
+		READ_FPR(10, __value);
+		break;
+	case 11:
+		READ_FPR(11, __value);
+		break;
+	case 12:
+		READ_FPR(12, __value);
+		break;
+	case 13:
+		READ_FPR(13, __value);
+		break;
+	case 14:
+		READ_FPR(14, __value);
+		break;
+	case 15:
+		READ_FPR(15, __value);
+		break;
+	case 16:
+		READ_FPR(16, __value);
+		break;
+	case 17:
+		READ_FPR(17, __value);
+		break;
+	case 18:
+		READ_FPR(18, __value);
+		break;
+	case 19:
+		READ_FPR(19, __value);
+		break;
+	case 20:
+		READ_FPR(20, __value);
+		break;
+	case 21:
+		READ_FPR(21, __value);
+		break;
+	case 22:
+		READ_FPR(22, __value);
+		break;
+	case 23:
+		READ_FPR(23, __value);
+		break;
+	case 24:
+		READ_FPR(24, __value);
+		break;
+	case 25:
+		READ_FPR(25, __value);
+		break;
+	case 26:
+		READ_FPR(26, __value);
+		break;
+	case 27:
+		READ_FPR(27, __value);
+		break;
+	case 28:
+		READ_FPR(28, __value);
+		break;
+	case 29:
+		READ_FPR(29, __value);
+		break;
+	case 30:
+		READ_FPR(30, __value);
+		break;
+	case 31:
+		READ_FPR(31, __value);
+		break;
+	default:
+		panic("unexpected fd '%d'", fd);
+	}
+#undef READ_FPR
+	return __value;
+}
+
+static inline void write_fpr(unsigned int fd, unsigned long value)
+{
+#define WRITE_FPR(fd, value)		\
+{					\
+	__asm__ __volatile__(		\
+	"movgr2fr.d $f%1, %0\n\t"	\
+	:: "r"(value), "i"(fd));	\
+}
+
+	switch (fd) {
+	case 0:
+		WRITE_FPR(0, value);
+		break;
+	case 1:
+		WRITE_FPR(1, value);
+		break;
+	case 2:
+		WRITE_FPR(2, value);
+		break;
+	case 3:
+		WRITE_FPR(3, value);
+		break;
+	case 4:
+		WRITE_FPR(4, value);
+		break;
+	case 5:
+		WRITE_FPR(5, value);
+		break;
+	case 6:
+		WRITE_FPR(6, value);
+		break;
+	case 7:
+		WRITE_FPR(7, value);
+		break;
+	case 8:
+		WRITE_FPR(8, value);
+		break;
+	case 9:
+		WRITE_FPR(9, value);
+		break;
+	case 10:
+		WRITE_FPR(10, value);
+		break;
+	case 11:
+		WRITE_FPR(11, value);
+		break;
+	case 12:
+		WRITE_FPR(12, value);
+		break;
+	case 13:
+		WRITE_FPR(13, value);
+		break;
+	case 14:
+		WRITE_FPR(14, value);
+		break;
+	case 15:
+		WRITE_FPR(15, value);
+		break;
+	case 16:
+		WRITE_FPR(16, value);
+		break;
+	case 17:
+		WRITE_FPR(17, value);
+		break;
+	case 18:
+		WRITE_FPR(18, value);
+		break;
+	case 19:
+		WRITE_FPR(19, value);
+		break;
+	case 20:
+		WRITE_FPR(20, value);
+		break;
+	case 21:
+		WRITE_FPR(21, value);
+		break;
+	case 22:
+		WRITE_FPR(22, value);
+		break;
+	case 23:
+		WRITE_FPR(23, value);
+		break;
+	case 24:
+		WRITE_FPR(24, value);
+		break;
+	case 25:
+		WRITE_FPR(25, value);
+		break;
+	case 26:
+		WRITE_FPR(26, value);
+		break;
+	case 27:
+		WRITE_FPR(27, value);
+		break;
+	case 28:
+		WRITE_FPR(28, value);
+		break;
+	case 29:
+		WRITE_FPR(29, value);
+		break;
+	case 30:
+		WRITE_FPR(30, value);
+		break;
+	case 31:
+		WRITE_FPR(31, value);
+		break;
+	default:
+		panic("unexpected fd '%d'", fd);
+	}
+#undef WRITE_FPR
+}
+
+void emulate_load_store_insn(struct pt_regs *regs, void __user *addr, unsigned int *pc)
+{
+	bool user = user_mode(regs);
+	unsigned int res;
+	unsigned long origpc;
+	unsigned long origra;
+	unsigned long value = 0;
+	union loongarch_instruction insn;
+
+	origpc = (unsigned long)pc;
+	origra = regs->regs[1];
+
+	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
+
+	/*
+	 * This load never faults.
+	 */
+	__get_inst(&insn.word, pc, user);
+	if (user && !access_ok(addr, 8))
+		goto sigbus;
+
+	if (insn.reg2i12_format.opcode == ldd_op ||
+		insn.reg2i14_format.opcode == ldptrd_op ||
+		insn.reg3_format.opcode == ldxd_op) {
+		res = unaligned_read(addr, &value, 8, 1);
+		if (res)
+			goto fault;
+		regs->regs[insn.reg2i12_format.rd] = value;
+	} else if (insn.reg2i12_format.opcode == ldw_op ||
+		insn.reg2i14_format.opcode == ldptrw_op ||
+		insn.reg3_format.opcode == ldxw_op) {
+		res = unaligned_read(addr, &value, 4, 1);
+		if (res)
+			goto fault;
+		regs->regs[insn.reg2i12_format.rd] = value;
+	} else if (insn.reg2i12_format.opcode == ldwu_op ||
+		insn.reg3_format.opcode == ldxwu_op) {
+		res = unaligned_read(addr, &value, 4, 0);
+		if (res)
+			goto fault;
+		regs->regs[insn.reg2i12_format.rd] = value;
+	} else if (insn.reg2i12_format.opcode == ldh_op ||
+		insn.reg3_format.opcode == ldxh_op) {
+		res = unaligned_read(addr, &value, 2, 1);
+		if (res)
+			goto fault;
+		regs->regs[insn.reg2i12_format.rd] = value;
+	} else if (insn.reg2i12_format.opcode == ldhu_op ||
+		insn.reg3_format.opcode == ldxhu_op) {
+		res = unaligned_read(addr, &value, 2, 0);
+		if (res)
+			goto fault;
+		regs->regs[insn.reg2i12_format.rd] = value;
+	} else if (insn.reg2i12_format.opcode == std_op ||
+		insn.reg2i14_format.opcode == stptrd_op ||
+		insn.reg3_format.opcode == stxd_op) {
+		value = regs->regs[insn.reg2i12_format.rd];
+		res = unaligned_write(addr, value, 8);
+		if (res)
+			goto fault;
+	} else if (insn.reg2i12_format.opcode == stw_op ||
+		insn.reg2i14_format.opcode == stptrw_op ||
+		insn.reg3_format.opcode == stxw_op) {
+		value = regs->regs[insn.reg2i12_format.rd];
+		res = unaligned_write(addr, value, 4);
+		if (res)
+			goto fault;
+	} else if (insn.reg2i12_format.opcode == sth_op ||
+		insn.reg3_format.opcode == stxh_op) {
+		value = regs->regs[insn.reg2i12_format.rd];
+		res = unaligned_write(addr, value, 2);
+		if (res)
+			goto fault;
+	} else if (insn.reg2i12_format.opcode == fldd_op ||
+		insn.reg3_format.opcode == fldxd_op) {
+		res = unaligned_read(addr, &value, 8, 1);
+		if (res)
+			goto fault;
+		write_fpr(insn.reg2i12_format.rd, value);
+	} else if (insn.reg2i12_format.opcode == flds_op ||
+		insn.reg3_format.opcode == fldxs_op) {
+		res = unaligned_read(addr, &value, 4, 1);
+		if (res)
+			goto fault;
+		write_fpr(insn.reg2i12_format.rd, value);
+	} else if (insn.reg2i12_format.opcode == fstd_op ||
+		insn.reg3_format.opcode == fstxd_op) {
+		value = read_fpr(insn.reg2i12_format.rd);
+		res = unaligned_write(addr, value, 8);
+		if (res)
+			goto fault;
+	} else if (insn.reg2i12_format.opcode == fsts_op ||
+		insn.reg3_format.opcode == fstxs_op) {
+		value = read_fpr(insn.reg2i12_format.rd);
+		res = unaligned_write(addr, value, 4);
+		if (res)
+			goto fault;
+	} else
+		goto sigbus;
+
+
+#ifdef CONFIG_DEBUG_FS
+	if (user)
+		unaligned_instructions_user++;
+	else
+		unaligned_instructions_kernel++;
+#endif
+
+	compute_return_era(regs);
+	return;
+
+fault:
+	/* roll back jump/branch */
+	regs->csr_era = origpc;
+	regs->regs[1] = origra;
+	/* Did we have an exception handler installed? */
+	if (fixup_exception(regs))
+		return;
+
+	die_if_kernel("Unhandled kernel unaligned access", regs);
+	force_sig(SIGSEGV);
+
+	return;
+
+sigbus:
+	die_if_kernel("Unhandled kernel unaligned access", regs);
+	force_sig(SIGBUS);
+
+	return;
+}
+
+#ifdef CONFIG_DEBUG_FS
+static int __init debugfs_unaligned(void)
+{
+	struct dentry *d;
+
+	d = debugfs_create_dir("loongarch", NULL);
+	if (!d)
+		return -ENOMEM;
+
+	debugfs_create_u32("unaligned_instructions_user",
+				S_IRUGO, d, &unaligned_instructions_user);
+	debugfs_create_u32("unaligned_instructions_kernel",
+				S_IRUGO, d, &unaligned_instructions_kernel);
+
+	return 0;
+}
+arch_initcall(debugfs_unaligned);
+#endif
diff --git a/arch/loongarch/lib/Makefile b/arch/loongarch/lib/Makefile
index e36635fccb69..867895530340 100644
--- a/arch/loongarch/lib/Makefile
+++ b/arch/loongarch/lib/Makefile
@@ -3,4 +3,4 @@ 
 # Makefile for LoongArch-specific library files.
 #
 
-lib-y	+= delay.o clear_user.o copy_user.o dump_tlb.o
+lib-y	+= delay.o clear_user.o copy_user.o dump_tlb.o unaligned.o
diff --git a/arch/loongarch/lib/unaligned.S b/arch/loongarch/lib/unaligned.S
new file mode 100644
index 000000000000..03210cb5a18d
--- /dev/null
+++ b/arch/loongarch/lib/unaligned.S
@@ -0,0 +1,93 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/asm.h>
+#include <asm/asmmacro.h>
+#include <asm/errno.h>
+#include <asm/export.h>
+#include <asm/regdef.h>
+
+.macro fixup_ex from, to, fix
+.if \fix
+	.section .fixup, "ax"
+\to:	li.w	a0, -EFAULT
+	jr	ra
+	.previous
+.endif
+	.section __ex_table, "a"
+	PTR	\from\()b, \to\()b
+	.previous
+.endm
+
+/*
+ * unsigned long unaligned_read(void *addr, void *value, unsigned long n, bool sign)
+ *
+ * a0: addr
+ * a1: value
+ * a2: n
+ * a3: sign
+ */
+SYM_FUNC_START(unaligned_read)
+	beqz	a2, 5f
+
+	li.w	t1, 8
+	li.w	t2, 0
+
+	addi.d	t0, a2, -1
+	mul.d	t1, t0, t1
+	add.d 	a0, a0, t0
+
+	beq	a3, zero, 2f
+1:	ld.b	t3, a0, 0
+	b	3f
+
+2:	ld.bu	t3, a0, 0
+3:	sll.d	t3, t3, t1
+	or	t2, t2, t3
+	addi.d	t1, t1, -8
+	addi.d	a0, a0, -1
+	addi.d	a2, a2, -1
+	bgt	a2, zero, 2b
+4:	st.d	t2, a1, 0
+
+	move	a0, a2
+	jr	ra
+
+5:	li.w    a0, -EFAULT
+	jr	ra
+
+	fixup_ex 1, 6, 1
+	fixup_ex 2, 6, 0
+	fixup_ex 4, 6, 0
+SYM_FUNC_END(unaligned_read)
+
+/*
+ * unsigned long unaligned_write(void *addr, unsigned long value, unsigned long n)
+ *
+ * a0: addr
+ * a1: value
+ * a2: n
+ */
+SYM_FUNC_START(unaligned_write)
+	beqz	a2, 3f
+
+	li.w	t0, 0
+1:	srl.d	t1, a1, t0
+2:	st.b	t1, a0, 0
+	addi.d	t0, t0, 8
+	addi.d	a2, a2, -1
+	addi.d	a0, a0, 1
+	bgt	a2, zero, 1b
+
+	move	a0, a2
+	jr	ra
+
+3:	li.w    a0, -EFAULT
+	jr	ra
+
+	fixup_ex 2, 4, 1
+SYM_FUNC_END(unaligned_write)