[2/3] bitmap: improve small_const case for find_next() functions

Message ID 20221027043810.350460-3-yury.norov@gmail.com
State New
Headers
Series bitmap: optimize small_const path for |

Commit Message

Yury Norov Oct. 27, 2022, 4:38 a.m. UTC
  find_next_bit() and friends use small_const_nbits() to detect possibility
of compile-time optimization. It works well for nbits up to BITS_PER_LONG,
i.e. for 1-word bitmaps. When nbits belongs to 2nd, 3rd or any other word,
small_const_nbits() returns false.

But when both offset and size are within the same word boundary, we still
can make a small_const optimization because the whole business is just
fetching a single word, and masking head and tail bits if needed.

This patch adds a new small_const_nbits_off() macro doing that. It replaces
small_const_nbits() in find_next_bit() functions.

Signed-off-by: Yury Norov <yury.norov@gmail.com>
---
 include/asm-generic/bitsperlong.h | 12 +++++++++
 include/linux/find.h              | 45 ++++++++++++-------------------
 2 files changed, 29 insertions(+), 28 deletions(-)
  

Patch

diff --git a/include/asm-generic/bitsperlong.h b/include/asm-generic/bitsperlong.h
index 1023e2a4bd37..c294ff798154 100644
--- a/include/asm-generic/bitsperlong.h
+++ b/include/asm-generic/bitsperlong.h
@@ -35,4 +35,16 @@ 
 #define small_const_nbits(nbits) \
 	(__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG && (nbits) > 0)
 
+/*
+ * small_const_nbits_off(nbits, off) is true precisely when it is known at
+ * compile-time that all bits in range [off, nbits) belong to the same word.
+ * Bitmaps of size 0 are very rare, and a compile-time-known-size 0 is most
+ * likely a sign of error. They will be handled correctly by the bit/bitmap
+ * APIs using the out-of-line functions, so that the inline implementations
+ * can unconditionally dereference the pointer(s).
+ */
+#define small_const_nbits_off(nbits, off) \
+	(__builtin_constant_p(nbits) && __builtin_constant_p(off) && (nbits) > 0 && \
+	 (nbits) > (off) && (off) / BITS_PER_LONG == ((nbits) - 1) / BITS_PER_LONG)
+
 #endif /* __ASM_GENERIC_BITS_PER_LONG */
diff --git a/include/linux/find.h b/include/linux/find.h
index db2f2851601d..df5c4d1adf4c 100644
--- a/include/linux/find.h
+++ b/include/linux/find.h
@@ -7,6 +7,7 @@ 
 #endif
 
 #include <linux/bitops.h>
+#include <linux/math.h>
 
 unsigned long _find_next_bit(const unsigned long *addr1, unsigned long nbits,
 				unsigned long start);
@@ -49,14 +50,11 @@  static __always_inline
 unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
 			    unsigned long offset)
 {
-	if (small_const_nbits(size)) {
-		unsigned long val;
+	if (small_const_nbits_off(size, offset)) {
+		unsigned long val = addr[offset/BITS_PER_LONG] &
+				    GENMASK((size - 1) % BITS_PER_LONG, offset % BITS_PER_LONG);
 
-		if (unlikely(offset >= size))
-			return size;
-
-		val = *addr & GENMASK(size - 1, offset);
-		return val ? __ffs(val) : size;
+		return val ? round_down(offset, BITS_PER_LONG) + __ffs(val) : size;
 	}
 
 	return _find_next_bit(addr, size, offset);
@@ -79,14 +77,11 @@  unsigned long find_next_and_bit(const unsigned long *addr1,
 		const unsigned long *addr2, unsigned long size,
 		unsigned long offset)
 {
-	if (small_const_nbits(size)) {
-		unsigned long val;
+	if (small_const_nbits_off(size, offset)) {
+		unsigned long val = addr1[offset/BITS_PER_LONG] & addr2[offset/BITS_PER_LONG] &
+				    GENMASK((size - 1) % BITS_PER_LONG, offset % BITS_PER_LONG);
 
-		if (unlikely(offset >= size))
-			return size;
-
-		val = *addr1 & *addr2 & GENMASK(size - 1, offset);
-		return val ? __ffs(val) : size;
+		return val ? round_down(offset, BITS_PER_LONG) + __ffs(val) : size;
 	}
 
 	return _find_next_and_bit(addr1, addr2, size, offset);
@@ -110,14 +105,11 @@  unsigned long find_next_andnot_bit(const unsigned long *addr1,
 		const unsigned long *addr2, unsigned long size,
 		unsigned long offset)
 {
-	if (small_const_nbits(size)) {
-		unsigned long val;
+	if (small_const_nbits_off(size, offset)) {
+		unsigned long val = addr1[offset/BITS_PER_LONG] & ~addr2[offset/BITS_PER_LONG] &
+				    GENMASK((size - 1) % BITS_PER_LONG, offset % BITS_PER_LONG);
 
-		if (unlikely(offset >= size))
-			return size;
-
-		val = *addr1 & ~*addr2 & GENMASK(size - 1, offset);
-		return val ? __ffs(val) : size;
+		return val ? round_down(offset, BITS_PER_LONG) + __ffs(val) : size;
 	}
 
 	return _find_next_andnot_bit(addr1, addr2, size, offset);
@@ -138,14 +130,11 @@  static __always_inline
 unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
 				 unsigned long offset)
 {
-	if (small_const_nbits(size)) {
-		unsigned long val;
+	if (small_const_nbits_off(size, offset)) {
+		unsigned long val = addr[offset/BITS_PER_LONG] |
+				    ~GENMASK((size - 1) % BITS_PER_LONG, offset % BITS_PER_LONG);
 
-		if (unlikely(offset >= size))
-			return size;
-
-		val = *addr | ~GENMASK(size - 1, offset);
-		return val == ~0UL ? size : ffz(val);
+		return val == ~0UL ? size : round_down(offset, BITS_PER_LONG) + ffz(val);
 	}
 
 	return _find_next_zero_bit(addr, size, offset);