RISC-V: Optimize bitops with Zbb extension
Commit Message
This patch leverages the alternative mechanism to dynamically optimize
bitops (including __ffs, __fls, ffs, fls) with Zbb instructions. When
Zbb ext is not supported by the runtime CPU, legacy implementation is
used. If Zbb is supported, then the optimized variants will be selected
via alternative patching.
The legacy bitops support is taken from the generic C implementation as
fallback.
If the parameter is a build-time constant, we leverage compiler builtin to
calculate the result directly, this approach is inspired by x86 bitops
implementation.
EFI stub runs before the kernel, so alternative mechanism should not be
used there, this patch introduces a macro EFI_NO_ALTERNATIVE for this
purpose.
Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
---
arch/riscv/include/asm/bitops.h | 266 +++++++++++++++++++++++++-
drivers/firmware/efi/libstub/Makefile | 2 +-
2 files changed, 264 insertions(+), 4 deletions(-)
Comments
On Sun, 6 Aug 2023 at 04:39, Xiao Wang <xiao.w.wang@intel.com> wrote:
>
> This patch leverages the alternative mechanism to dynamically optimize
> bitops (including __ffs, __fls, ffs, fls) with Zbb instructions. When
> Zbb ext is not supported by the runtime CPU, legacy implementation is
> used. If Zbb is supported, then the optimized variants will be selected
> via alternative patching.
>
> The legacy bitops support is taken from the generic C implementation as
> fallback.
>
> If the parameter is a build-time constant, we leverage compiler builtin to
> calculate the result directly, this approach is inspired by x86 bitops
> implementation.
>
> EFI stub runs before the kernel, so alternative mechanism should not be
> used there, this patch introduces a macro EFI_NO_ALTERNATIVE for this
> purpose.
>
Why? The unpatched sequences work fine, no?
> Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
> ---
> arch/riscv/include/asm/bitops.h | 266 +++++++++++++++++++++++++-
> drivers/firmware/efi/libstub/Makefile | 2 +-
> 2 files changed, 264 insertions(+), 4 deletions(-)
>
> diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h
> index 3540b690944b..f727f6489cd5 100644
> --- a/arch/riscv/include/asm/bitops.h
> +++ b/arch/riscv/include/asm/bitops.h
> @@ -15,13 +15,273 @@
> #include <asm/barrier.h>
> #include <asm/bitsperlong.h>
>
> +#if !defined(CONFIG_RISCV_ISA_ZBB) || defined(EFI_NO_ALTERNATIVE)
> #include <asm-generic/bitops/__ffs.h>
> -#include <asm-generic/bitops/ffz.h>
> -#include <asm-generic/bitops/fls.h>
> #include <asm-generic/bitops/__fls.h>
> +#include <asm-generic/bitops/ffs.h>
> +#include <asm-generic/bitops/fls.h>
> +
> +#else
> +#include <asm/alternative-macros.h>
> +#include <asm/hwcap.h>
> +
> +#if (BITS_PER_LONG == 64)
> +#define CTZW "ctzw "
> +#define CLZW "clzw "
> +#elif (BITS_PER_LONG == 32)
> +#define CTZW "ctz "
> +#define CLZW "clz "
> +#else
> +#error "Unexpected BITS_PER_LONG"
> +#endif
> +
> +static __always_inline unsigned long variable__ffs(unsigned long word)
> +{
> + int num;
> +
> + asm_volatile_goto(
> + ALTERNATIVE("j %l[legacy]", "nop", 0, RISCV_ISA_EXT_ZBB, 1)
> + : : : : legacy);
> +
> + asm volatile (
> + ".option push\n"
> + ".option arch,+zbb\n"
> + "ctz %0, %1\n"
> + ".option pop\n"
> + : "=r" (word) : "r" (word) :);
> +
> + return word;
> +
> +legacy:
> + num = 0;
> +#if BITS_PER_LONG == 64
> + if ((word & 0xffffffff) == 0) {
> + num += 32;
> + word >>= 32;
> + }
> +#endif
> + if ((word & 0xffff) == 0) {
> + num += 16;
> + word >>= 16;
> + }
> + if ((word & 0xff) == 0) {
> + num += 8;
> + word >>= 8;
> + }
> + if ((word & 0xf) == 0) {
> + num += 4;
> + word >>= 4;
> + }
> + if ((word & 0x3) == 0) {
> + num += 2;
> + word >>= 2;
> + }
> + if ((word & 0x1) == 0)
> + num += 1;
> + return num;
> +}
> +
> +/**
> + * __ffs - find first set bit in a long word
> + * @word: The word to search
> + *
> + * Undefined if no set bit exists, so code should check against 0 first.
> + */
> +#define __ffs(word) \
> + (__builtin_constant_p(word) ? \
> + (unsigned long)__builtin_ctzl(word) : \
> + variable__ffs(word))
> +
> +static __always_inline unsigned long variable__fls(unsigned long word)
> +{
> + int num;
> +
> + asm_volatile_goto(
> + ALTERNATIVE("j %l[legacy]", "nop", 0, RISCV_ISA_EXT_ZBB, 1)
> + : : : : legacy);
> +
> + asm volatile (
> + ".option push\n"
> + ".option arch,+zbb\n"
> + "clz %0, %1\n"
> + ".option pop\n"
> + : "=r" (word) : "r" (word) :);
> +
> + return BITS_PER_LONG - 1 - word;
> +
> +legacy:
> + num = BITS_PER_LONG - 1;
> +#if BITS_PER_LONG == 64
> + if (!(word & (~0ul << 32))) {
> + num -= 32;
> + word <<= 32;
> + }
> +#endif
> + if (!(word & (~0ul << (BITS_PER_LONG-16)))) {
> + num -= 16;
> + word <<= 16;
> + }
> + if (!(word & (~0ul << (BITS_PER_LONG-8)))) {
> + num -= 8;
> + word <<= 8;
> + }
> + if (!(word & (~0ul << (BITS_PER_LONG-4)))) {
> + num -= 4;
> + word <<= 4;
> + }
> + if (!(word & (~0ul << (BITS_PER_LONG-2)))) {
> + num -= 2;
> + word <<= 2;
> + }
> + if (!(word & (~0ul << (BITS_PER_LONG-1))))
> + num -= 1;
> + return num;
> +}
> +
> +/**
> + * __fls - find last set bit in a long word
> + * @word: the word to search
> + *
> + * Undefined if no set bit exists, so code should check against 0 first.
> + */
> +#define __fls(word) \
> + (__builtin_constant_p(word) ? \
> + (unsigned long)(BITS_PER_LONG - 1 - __builtin_clzl(word)) : \
> + variable__fls(word))
> +
> +static __always_inline int variable_ffs(int x)
> +{
> + int r;
> +
> + asm_volatile_goto(
> + ALTERNATIVE("j %l[legacy]", "nop", 0, RISCV_ISA_EXT_ZBB, 1)
> + : : : : legacy);
> +
> + asm volatile (
> + ".option push\n"
> + ".option arch,+zbb\n"
> + "bnez %1, 1f\n"
> + "li %0, 0\n"
> + "j 2f\n"
> + "1:\n"
> + CTZW "%0, %1\n"
> + "addi %0, %0, 1\n"
> + "2:\n"
> + ".option pop\n"
> + : "=r" (r) : "r" (x) :);
> +
> + return r;
> +
> +legacy:
> + r = 1;
> + if (!x)
> + return 0;
> + if (!(x & 0xffff)) {
> + x >>= 16;
> + r += 16;
> + }
> + if (!(x & 0xff)) {
> + x >>= 8;
> + r += 8;
> + }
> + if (!(x & 0xf)) {
> + x >>= 4;
> + r += 4;
> + }
> + if (!(x & 3)) {
> + x >>= 2;
> + r += 2;
> + }
> + if (!(x & 1)) {
> + x >>= 1;
> + r += 1;
> + }
> + return r;
> +}
> +
> +/**
> + * ffs - find first set bit in a word
> + * @x: the word to search
> + *
> + * This is defined the same way as the libc and compiler builtin ffs routines.
> + *
> + * ffs(value) returns 0 if value is 0 or the position of the first set bit if
> + * value is nonzero. The first (least significant) bit is at position 1.
> + */
> +#define ffs(x) (__builtin_constant_p(x) ? __builtin_ffs(x) : variable_ffs(x))
> +
> +static __always_inline int variable_fls(unsigned int x)
> +{
> + int r;
> +
> + asm_volatile_goto(
> + ALTERNATIVE("j %l[legacy]", "nop", 0, RISCV_ISA_EXT_ZBB, 1)
> + : : : : legacy);
> +
> + asm volatile (
> + ".option push\n"
> + ".option arch,+zbb\n"
> + "bnez %1, 1f\n"
> + "li %0, 0\n"
> + "j 2f\n"
> + "1:\n"
> + CLZW "%0, %1\n"
> + "neg %0, %0\n"
> + "addi %0, %0, 32\n"
> + "2:\n"
> + ".option pop\n"
> + : "=r" (r) : "r" (x) :);
> +
> + return r;
> +
> +legacy:
> + r = 32;
> + if (!x)
> + return 0;
> + if (!(x & 0xffff0000u)) {
> + x <<= 16;
> + r -= 16;
> + }
> + if (!(x & 0xff000000u)) {
> + x <<= 8;
> + r -= 8;
> + }
> + if (!(x & 0xf0000000u)) {
> + x <<= 4;
> + r -= 4;
> + }
> + if (!(x & 0xc0000000u)) {
> + x <<= 2;
> + r -= 2;
> + }
> + if (!(x & 0x80000000u)) {
> + x <<= 1;
> + r -= 1;
> + }
> + return r;
> +}
> +
> +/**
> + * fls - find last set bit in a word
> + * @x: the word to search
> + *
> + * This is defined in a similar way as ffs, but returns the position of the most
> + * significant set bit.
> + *
> + * fls(value) returns 0 if value is 0 or the position of the last set bit if
> + * value is nonzero. The last (most significant) bit is at position 32.
> + */
> +#define fls(x) \
> + (__builtin_constant_p(x) ? \
> + (int)(((x) != 0) ? \
> + (sizeof(unsigned int) * 8 - __builtin_clz(x)) : 0) : \
> + variable_fls(x))
> +
> +#endif
> +
> +#include <asm-generic/bitops/ffz.h>
> #include <asm-generic/bitops/fls64.h>
> #include <asm-generic/bitops/sched.h>
> -#include <asm-generic/bitops/ffs.h>
>
> #include <asm-generic/bitops/hweight.h>
>
> diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
> index 16d64a34d1e1..b0f8c495c10f 100644
> --- a/drivers/firmware/efi/libstub/Makefile
> +++ b/drivers/firmware/efi/libstub/Makefile
> @@ -28,7 +28,7 @@ cflags-$(CONFIG_ARM) += -DEFI_HAVE_STRLEN -DEFI_HAVE_STRNLEN \
> -DEFI_HAVE_MEMCHR -DEFI_HAVE_STRRCHR \
> -DEFI_HAVE_STRCMP -fno-builtin -fpic \
> $(call cc-option,-mno-single-pic-base)
> -cflags-$(CONFIG_RISCV) += -fpic
> +cflags-$(CONFIG_RISCV) += -fpic -DEFI_NO_ALTERNATIVE
> cflags-$(CONFIG_LOONGARCH) += -fpie
>
> cflags-$(CONFIG_EFI_PARAMS_FROM_FDT) += -I$(srctree)/scripts/dtc/libfdt
> --
> 2.25.1
>
Hi,
> -----Original Message-----
> From: Ard Biesheuvel <ardb@kernel.org>
> Sent: Sunday, August 6, 2023 5:39 PM
> To: Wang, Xiao W <xiao.w.wang@intel.com>
> Cc: paul.walmsley@sifive.com; palmer@dabbelt.com;
> aou@eecs.berkeley.edu; anup@brainfault.org; Li, Haicheng
> <haicheng.li@intel.com>; linux-riscv@lists.infradead.org; linux-
> efi@vger.kernel.org; linux-kernel@vger.kernel.org
> Subject: Re: [PATCH] RISC-V: Optimize bitops with Zbb extension
>
> On Sun, 6 Aug 2023 at 04:39, Xiao Wang <xiao.w.wang@intel.com> wrote:
> >
> > This patch leverages the alternative mechanism to dynamically optimize
> > bitops (including __ffs, __fls, ffs, fls) with Zbb instructions. When
> > Zbb ext is not supported by the runtime CPU, legacy implementation is
> > used. If Zbb is supported, then the optimized variants will be selected
> > via alternative patching.
> >
> > The legacy bitops support is taken from the generic C implementation as
> > fallback.
> >
> > If the parameter is a build-time constant, we leverage compiler builtin to
> > calculate the result directly, this approach is inspired by x86 bitops
> > implementation.
> >
> > EFI stub runs before the kernel, so alternative mechanism should not be
> > used there, this patch introduces a macro EFI_NO_ALTERNATIVE for this
> > purpose.
> >
>
> Why? The unpatched sequences work fine, no?
It works. But there would be build warning: orphan section `.init.alternative' from `./drivers/firmware/efi/libstub/gop.stub.o' being placed in section `.init.alternative'. Besides, w/o this MACRO, the optimized variant would never be used at runtime, so this patch choose to disable alternative.
BRs,
Xiao
>
>
> > Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
> > ---
> > arch/riscv/include/asm/bitops.h | 266 +++++++++++++++++++++++++-
> > drivers/firmware/efi/libstub/Makefile | 2 +-
> > 2 files changed, 264 insertions(+), 4 deletions(-)
> >
> > diff --git a/arch/riscv/include/asm/bitops.h
> b/arch/riscv/include/asm/bitops.h
> > index 3540b690944b..f727f6489cd5 100644
> > --- a/arch/riscv/include/asm/bitops.h
> > +++ b/arch/riscv/include/asm/bitops.h
> > @@ -15,13 +15,273 @@
> > #include <asm/barrier.h>
> > #include <asm/bitsperlong.h>
> >
> > +#if !defined(CONFIG_RISCV_ISA_ZBB) || defined(EFI_NO_ALTERNATIVE)
> > #include <asm-generic/bitops/__ffs.h>
> > -#include <asm-generic/bitops/ffz.h>
> > -#include <asm-generic/bitops/fls.h>
> > #include <asm-generic/bitops/__fls.h>
> > +#include <asm-generic/bitops/ffs.h>
> > +#include <asm-generic/bitops/fls.h>
> > +
> > +#else
> > +#include <asm/alternative-macros.h>
> > +#include <asm/hwcap.h>
> > +
> > +#if (BITS_PER_LONG == 64)
> > +#define CTZW "ctzw "
> > +#define CLZW "clzw "
> > +#elif (BITS_PER_LONG == 32)
> > +#define CTZW "ctz "
> > +#define CLZW "clz "
> > +#else
> > +#error "Unexpected BITS_PER_LONG"
> > +#endif
[...]
On Sun, 27 Aug 2023 at 11:26, Wang, Xiao W <xiao.w.wang@intel.com> wrote:
>
> Hi,
>
> A gentle ping.
> Any other comments or suggestions for this patch? Or maybe we would review it in the Linux 6.7 development cycle?
>
This is going to be 6.7 material at the earliest in any case.
I am fine with the change as far as the EFI code is concerned, but I'd
suggest dropping EFI_ from the macro name, as it could be #define'd
for other reasons too.
>
> > -----Original Message-----
> > From: Wang, Xiao W
> > Sent: Sunday, August 6, 2023 6:24 PM
> > To: Ard Biesheuvel <ardb@kernel.org>
> > Cc: paul.walmsley@sifive.com; palmer@dabbelt.com;
> > aou@eecs.berkeley.edu; anup@brainfault.org; Li, Haicheng
> > <haicheng.li@intel.com>; linux-riscv@lists.infradead.org; linux-
> > efi@vger.kernel.org; linux-kernel@vger.kernel.org
> > Subject: RE: [PATCH] RISC-V: Optimize bitops with Zbb extension
> >
> > Hi,
> >
> > > -----Original Message-----
> > > From: Ard Biesheuvel <ardb@kernel.org>
> > > Sent: Sunday, August 6, 2023 5:39 PM
> > > To: Wang, Xiao W <xiao.w.wang@intel.com>
> > > Cc: paul.walmsley@sifive.com; palmer@dabbelt.com;
> > > aou@eecs.berkeley.edu; anup@brainfault.org; Li, Haicheng
> > > <haicheng.li@intel.com>; linux-riscv@lists.infradead.org; linux-
> > > efi@vger.kernel.org; linux-kernel@vger.kernel.org
> > > Subject: Re: [PATCH] RISC-V: Optimize bitops with Zbb extension
> > >
> > > On Sun, 6 Aug 2023 at 04:39, Xiao Wang <xiao.w.wang@intel.com> wrote:
> > > >
> > > > This patch leverages the alternative mechanism to dynamically optimize
> > > > bitops (including __ffs, __fls, ffs, fls) with Zbb instructions. When
> > > > Zbb ext is not supported by the runtime CPU, legacy implementation is
> > > > used. If Zbb is supported, then the optimized variants will be selected
> > > > via alternative patching.
> > > >
> > > > The legacy bitops support is taken from the generic C implementation as
> > > > fallback.
> > > >
> > > > If the parameter is a build-time constant, we leverage compiler builtin to
> > > > calculate the result directly, this approach is inspired by x86 bitops
> > > > implementation.
> > > >
> > > > EFI stub runs before the kernel, so alternative mechanism should not be
> > > > used there, this patch introduces a macro EFI_NO_ALTERNATIVE for this
> > > > purpose.
> > > >
> > >
> > > Why? The unpatched sequences work fine, no?
> >
> > It works. But there would be build warning: orphan section `.init.alternative'
> > from `./drivers/firmware/efi/libstub/gop.stub.o' being placed in section
> > `.init.alternative'. Besides, w/o this MACRO, the optimized variant would
> > never be used at runtime, so this patch choose to disable alternative.
> >
> > BRs,
> > Xiao
> >
> > >
> > >
> > > > Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
> > > > ---
> > > > arch/riscv/include/asm/bitops.h | 266
> > +++++++++++++++++++++++++-
> > > > drivers/firmware/efi/libstub/Makefile | 2 +-
> > > > 2 files changed, 264 insertions(+), 4 deletions(-)
> > > >
> > > > diff --git a/arch/riscv/include/asm/bitops.h
> > > b/arch/riscv/include/asm/bitops.h
> > > > index 3540b690944b..f727f6489cd5 100644
> > > > --- a/arch/riscv/include/asm/bitops.h
> > > > +++ b/arch/riscv/include/asm/bitops.h
> > > > @@ -15,13 +15,273 @@
> > > > #include <asm/barrier.h>
> > > > #include <asm/bitsperlong.h>
> > > >
> > > > +#if !defined(CONFIG_RISCV_ISA_ZBB) || defined(EFI_NO_ALTERNATIVE)
> > > > #include <asm-generic/bitops/__ffs.h>
> > > > -#include <asm-generic/bitops/ffz.h>
> > > > -#include <asm-generic/bitops/fls.h>
> > > > #include <asm-generic/bitops/__fls.h>
> > > > +#include <asm-generic/bitops/ffs.h>
> > > > +#include <asm-generic/bitops/fls.h>
> > > > +
> > > > +#else
> > > > +#include <asm/alternative-macros.h>
> > > > +#include <asm/hwcap.h>
> > > > +
> > > > +#if (BITS_PER_LONG == 64)
> > > > +#define CTZW "ctzw "
> > > > +#define CLZW "clzw "
> > > > +#elif (BITS_PER_LONG == 32)
> > > > +#define CTZW "ctz "
> > > > +#define CLZW "clz "
> > > > +#else
> > > > +#error "Unexpected BITS_PER_LONG"
> > > > +#endif
> > [...]
@@ -15,13 +15,273 @@
#include <asm/barrier.h>
#include <asm/bitsperlong.h>
+#if !defined(CONFIG_RISCV_ISA_ZBB) || defined(EFI_NO_ALTERNATIVE)
#include <asm-generic/bitops/__ffs.h>
-#include <asm-generic/bitops/ffz.h>
-#include <asm-generic/bitops/fls.h>
#include <asm-generic/bitops/__fls.h>
+#include <asm-generic/bitops/ffs.h>
+#include <asm-generic/bitops/fls.h>
+
+#else
+#include <asm/alternative-macros.h>
+#include <asm/hwcap.h>
+
+#if (BITS_PER_LONG == 64)
+#define CTZW "ctzw "
+#define CLZW "clzw "
+#elif (BITS_PER_LONG == 32)
+#define CTZW "ctz "
+#define CLZW "clz "
+#else
+#error "Unexpected BITS_PER_LONG"
+#endif
+
+static __always_inline unsigned long variable__ffs(unsigned long word)
+{
+ int num;
+
+ asm_volatile_goto(
+ ALTERNATIVE("j %l[legacy]", "nop", 0, RISCV_ISA_EXT_ZBB, 1)
+ : : : : legacy);
+
+ asm volatile (
+ ".option push\n"
+ ".option arch,+zbb\n"
+ "ctz %0, %1\n"
+ ".option pop\n"
+ : "=r" (word) : "r" (word) :);
+
+ return word;
+
+legacy:
+ num = 0;
+#if BITS_PER_LONG == 64
+ if ((word & 0xffffffff) == 0) {
+ num += 32;
+ word >>= 32;
+ }
+#endif
+ if ((word & 0xffff) == 0) {
+ num += 16;
+ word >>= 16;
+ }
+ if ((word & 0xff) == 0) {
+ num += 8;
+ word >>= 8;
+ }
+ if ((word & 0xf) == 0) {
+ num += 4;
+ word >>= 4;
+ }
+ if ((word & 0x3) == 0) {
+ num += 2;
+ word >>= 2;
+ }
+ if ((word & 0x1) == 0)
+ num += 1;
+ return num;
+}
+
+/**
+ * __ffs - find first set bit in a long word
+ * @word: The word to search
+ *
+ * Undefined if no set bit exists, so code should check against 0 first.
+ */
+#define __ffs(word) \
+ (__builtin_constant_p(word) ? \
+ (unsigned long)__builtin_ctzl(word) : \
+ variable__ffs(word))
+
+static __always_inline unsigned long variable__fls(unsigned long word)
+{
+ int num;
+
+ asm_volatile_goto(
+ ALTERNATIVE("j %l[legacy]", "nop", 0, RISCV_ISA_EXT_ZBB, 1)
+ : : : : legacy);
+
+ asm volatile (
+ ".option push\n"
+ ".option arch,+zbb\n"
+ "clz %0, %1\n"
+ ".option pop\n"
+ : "=r" (word) : "r" (word) :);
+
+ return BITS_PER_LONG - 1 - word;
+
+legacy:
+ num = BITS_PER_LONG - 1;
+#if BITS_PER_LONG == 64
+ if (!(word & (~0ul << 32))) {
+ num -= 32;
+ word <<= 32;
+ }
+#endif
+ if (!(word & (~0ul << (BITS_PER_LONG-16)))) {
+ num -= 16;
+ word <<= 16;
+ }
+ if (!(word & (~0ul << (BITS_PER_LONG-8)))) {
+ num -= 8;
+ word <<= 8;
+ }
+ if (!(word & (~0ul << (BITS_PER_LONG-4)))) {
+ num -= 4;
+ word <<= 4;
+ }
+ if (!(word & (~0ul << (BITS_PER_LONG-2)))) {
+ num -= 2;
+ word <<= 2;
+ }
+ if (!(word & (~0ul << (BITS_PER_LONG-1))))
+ num -= 1;
+ return num;
+}
+
+/**
+ * __fls - find last set bit in a long word
+ * @word: the word to search
+ *
+ * Undefined if no set bit exists, so code should check against 0 first.
+ */
+#define __fls(word) \
+ (__builtin_constant_p(word) ? \
+ (unsigned long)(BITS_PER_LONG - 1 - __builtin_clzl(word)) : \
+ variable__fls(word))
+
+static __always_inline int variable_ffs(int x)
+{
+ int r;
+
+ asm_volatile_goto(
+ ALTERNATIVE("j %l[legacy]", "nop", 0, RISCV_ISA_EXT_ZBB, 1)
+ : : : : legacy);
+
+ asm volatile (
+ ".option push\n"
+ ".option arch,+zbb\n"
+ "bnez %1, 1f\n"
+ "li %0, 0\n"
+ "j 2f\n"
+ "1:\n"
+ CTZW "%0, %1\n"
+ "addi %0, %0, 1\n"
+ "2:\n"
+ ".option pop\n"
+ : "=r" (r) : "r" (x) :);
+
+ return r;
+
+legacy:
+ r = 1;
+ if (!x)
+ return 0;
+ if (!(x & 0xffff)) {
+ x >>= 16;
+ r += 16;
+ }
+ if (!(x & 0xff)) {
+ x >>= 8;
+ r += 8;
+ }
+ if (!(x & 0xf)) {
+ x >>= 4;
+ r += 4;
+ }
+ if (!(x & 3)) {
+ x >>= 2;
+ r += 2;
+ }
+ if (!(x & 1)) {
+ x >>= 1;
+ r += 1;
+ }
+ return r;
+}
+
+/**
+ * ffs - find first set bit in a word
+ * @x: the word to search
+ *
+ * This is defined the same way as the libc and compiler builtin ffs routines.
+ *
+ * ffs(value) returns 0 if value is 0 or the position of the first set bit if
+ * value is nonzero. The first (least significant) bit is at position 1.
+ */
+#define ffs(x) (__builtin_constant_p(x) ? __builtin_ffs(x) : variable_ffs(x))
+
+static __always_inline int variable_fls(unsigned int x)
+{
+ int r;
+
+ asm_volatile_goto(
+ ALTERNATIVE("j %l[legacy]", "nop", 0, RISCV_ISA_EXT_ZBB, 1)
+ : : : : legacy);
+
+ asm volatile (
+ ".option push\n"
+ ".option arch,+zbb\n"
+ "bnez %1, 1f\n"
+ "li %0, 0\n"
+ "j 2f\n"
+ "1:\n"
+ CLZW "%0, %1\n"
+ "neg %0, %0\n"
+ "addi %0, %0, 32\n"
+ "2:\n"
+ ".option pop\n"
+ : "=r" (r) : "r" (x) :);
+
+ return r;
+
+legacy:
+ r = 32;
+ if (!x)
+ return 0;
+ if (!(x & 0xffff0000u)) {
+ x <<= 16;
+ r -= 16;
+ }
+ if (!(x & 0xff000000u)) {
+ x <<= 8;
+ r -= 8;
+ }
+ if (!(x & 0xf0000000u)) {
+ x <<= 4;
+ r -= 4;
+ }
+ if (!(x & 0xc0000000u)) {
+ x <<= 2;
+ r -= 2;
+ }
+ if (!(x & 0x80000000u)) {
+ x <<= 1;
+ r -= 1;
+ }
+ return r;
+}
+
+/**
+ * fls - find last set bit in a word
+ * @x: the word to search
+ *
+ * This is defined in a similar way as ffs, but returns the position of the most
+ * significant set bit.
+ *
+ * fls(value) returns 0 if value is 0 or the position of the last set bit if
+ * value is nonzero. The last (most significant) bit is at position 32.
+ */
+#define fls(x) \
+ (__builtin_constant_p(x) ? \
+ (int)(((x) != 0) ? \
+ (sizeof(unsigned int) * 8 - __builtin_clz(x)) : 0) : \
+ variable_fls(x))
+
+#endif
+
+#include <asm-generic/bitops/ffz.h>
#include <asm-generic/bitops/fls64.h>
#include <asm-generic/bitops/sched.h>
-#include <asm-generic/bitops/ffs.h>
#include <asm-generic/bitops/hweight.h>
@@ -28,7 +28,7 @@ cflags-$(CONFIG_ARM) += -DEFI_HAVE_STRLEN -DEFI_HAVE_STRNLEN \
-DEFI_HAVE_MEMCHR -DEFI_HAVE_STRRCHR \
-DEFI_HAVE_STRCMP -fno-builtin -fpic \
$(call cc-option,-mno-single-pic-base)
-cflags-$(CONFIG_RISCV) += -fpic
+cflags-$(CONFIG_RISCV) += -fpic -DEFI_NO_ALTERNATIVE
cflags-$(CONFIG_LOONGARCH) += -fpie
cflags-$(CONFIG_EFI_PARAMS_FROM_FDT) += -I$(srctree)/scripts/dtc/libfdt