[2/5] eytzinger: Promote to include/linux/

Message ID 20240126220655.395093-2-kent.overstreet@linux.dev
State New
Headers
Series [1/5] mean and variance: Promote to lib/math |

Commit Message

Kent Overstreet Jan. 26, 2024, 10:06 p.m. UTC
  eytzinger trees are a faster alternative to binary search. They're a bit
more expensive to setup, but lookups perform much better assuming the
tree isn't entirely in cache.

Binary search is a worst case scenario for branch prediction and
prefetching, but eytzinger trees have children adjacent in memory and
thus we can prefetch before knowing the result of a comparison.

An eytzinger tree is a binary tree laid out in an array, with the same
geometry as the usual binary heap construction, but used as a search
tree instead.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/bset.c                         |   2 +-
 fs/bcachefs/journal_seq_blacklist.c        |   6 +-
 fs/bcachefs/replicas.c                     |  17 ++-
 fs/bcachefs/replicas.h                     |   3 +-
 fs/bcachefs/super-io.h                     |   2 +-
 fs/bcachefs/util.c                         | 145 +--------------------
 fs/bcachefs/util.h                         |   4 -
 {fs/bcachefs => include/linux}/eytzinger.h |  56 ++++----
 lib/sort.c                                 |  85 ++++++++++++
 9 files changed, 136 insertions(+), 184 deletions(-)
 rename {fs/bcachefs => include/linux}/eytzinger.h (78%)
  

Comments

Darrick J. Wong Jan. 27, 2024, 2:17 a.m. UTC | #1
On Fri, Jan 26, 2024 at 05:06:52PM -0500, Kent Overstreet wrote:
> eytzinger trees are a faster alternative to binary search. They're a bit
> more expensive to setup, but lookups perform much better assuming the
> tree isn't entirely in cache.
> 
> Binary search is a worst case scenario for branch prediction and
> prefetching, but eytzinger trees have children adjacent in memory and
> thus we can prefetch before knowing the result of a comparison.
> 
> An eytzinger tree is a binary tree laid out in an array, with the same
> geometry as the usual binary heap construction, but used as a search
> tree instead.
> 
> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>

This looks more or less like what I remember of building heaps and
squinting at my horrible handwritten notes about eytzinger trees from
back in the day.

Reviewed-by: Darrick J. Wong <djwong@kernel.org>

--D

> ---
>  fs/bcachefs/bset.c                         |   2 +-
>  fs/bcachefs/journal_seq_blacklist.c        |   6 +-
>  fs/bcachefs/replicas.c                     |  17 ++-
>  fs/bcachefs/replicas.h                     |   3 +-
>  fs/bcachefs/super-io.h                     |   2 +-
>  fs/bcachefs/util.c                         | 145 +--------------------
>  fs/bcachefs/util.h                         |   4 -
>  {fs/bcachefs => include/linux}/eytzinger.h |  56 ++++----
>  lib/sort.c                                 |  85 ++++++++++++
>  9 files changed, 136 insertions(+), 184 deletions(-)
>  rename {fs/bcachefs => include/linux}/eytzinger.h (78%)
> 
> diff --git a/fs/bcachefs/bset.c b/fs/bcachefs/bset.c
> index 3fd1085b6c61..1d77aa55d641 100644
> --- a/fs/bcachefs/bset.c
> +++ b/fs/bcachefs/bset.c
> @@ -9,12 +9,12 @@
>  #include "bcachefs.h"
>  #include "btree_cache.h"
>  #include "bset.h"
> -#include "eytzinger.h"
>  #include "trace.h"
>  #include "util.h"
>  
>  #include <asm/unaligned.h>
>  #include <linux/console.h>
> +#include <linux/eytzinger.h>
>  #include <linux/random.h>
>  #include <linux/prefetch.h>
>  
> diff --git a/fs/bcachefs/journal_seq_blacklist.c b/fs/bcachefs/journal_seq_blacklist.c
> index 0200e299cfbb..024c9b1b323f 100644
> --- a/fs/bcachefs/journal_seq_blacklist.c
> +++ b/fs/bcachefs/journal_seq_blacklist.c
> @@ -2,10 +2,11 @@
>  
>  #include "bcachefs.h"
>  #include "btree_iter.h"
> -#include "eytzinger.h"
>  #include "journal_seq_blacklist.h"
>  #include "super-io.h"
>  
> +#include <linux/eytzinger.h>
> +
>  /*
>   * journal_seq_blacklist machinery:
>   *
> @@ -119,8 +120,7 @@ int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64 start, u64 end)
>  	return ret ?: bch2_blacklist_table_initialize(c);
>  }
>  
> -static int journal_seq_blacklist_table_cmp(const void *_l,
> -					   const void *_r, size_t size)
> +static int journal_seq_blacklist_table_cmp(const void *_l, const void *_r)
>  {
>  	const struct journal_seq_blacklist_table_entry *l = _l;
>  	const struct journal_seq_blacklist_table_entry *r = _r;
> diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c
> index cc2672c12031..75fdce373f76 100644
> --- a/fs/bcachefs/replicas.c
> +++ b/fs/bcachefs/replicas.c
> @@ -6,12 +6,15 @@
>  #include "replicas.h"
>  #include "super-io.h"
>  
> +#include <linux/sort.h>
> +
>  static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *,
>  					    struct bch_replicas_cpu *);
>  
>  /* Some (buggy!) compilers don't allow memcmp to be passed as a pointer */
> -static int bch2_memcmp(const void *l, const void *r, size_t size)
> +static int bch2_memcmp(const void *l, const void *r,  const void *priv)
>  {
> +	size_t size = (size_t) priv;
>  	return memcmp(l, r, size);
>  }
>  
> @@ -39,7 +42,8 @@ void bch2_replicas_entry_sort(struct bch_replicas_entry_v1 *e)
>  
>  static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r)
>  {
> -	eytzinger0_sort(r->entries, r->nr, r->entry_size, bch2_memcmp, NULL);
> +	eytzinger0_sort_r(r->entries, r->nr, r->entry_size,
> +			  bch2_memcmp, NULL, (void *)(size_t)r->entry_size);
>  }
>  
>  static void bch2_replicas_entry_v0_to_text(struct printbuf *out,
> @@ -824,10 +828,11 @@ static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r,
>  {
>  	unsigned i;
>  
> -	sort_cmp_size(cpu_r->entries,
> -		      cpu_r->nr,
> -		      cpu_r->entry_size,
> -		      bch2_memcmp, NULL);
> +	sort_r(cpu_r->entries,
> +	       cpu_r->nr,
> +	       cpu_r->entry_size,
> +	       bch2_memcmp, NULL,
> +	       (void *)(size_t)cpu_r->entry_size);
>  
>  	for (i = 0; i < cpu_r->nr; i++) {
>  		struct bch_replicas_entry_v1 *e =
> diff --git a/fs/bcachefs/replicas.h b/fs/bcachefs/replicas.h
> index 654a4b26d3a3..983cce782ac2 100644
> --- a/fs/bcachefs/replicas.h
> +++ b/fs/bcachefs/replicas.h
> @@ -3,9 +3,10 @@
>  #define _BCACHEFS_REPLICAS_H
>  
>  #include "bkey.h"
> -#include "eytzinger.h"
>  #include "replicas_types.h"
>  
> +#include <linux/eytzinger.h>
> +
>  void bch2_replicas_entry_sort(struct bch_replicas_entry_v1 *);
>  void bch2_replicas_entry_to_text(struct printbuf *,
>  				 struct bch_replicas_entry_v1 *);
> diff --git a/fs/bcachefs/super-io.h b/fs/bcachefs/super-io.h
> index 95e80e06316b..f37620919e11 100644
> --- a/fs/bcachefs/super-io.h
> +++ b/fs/bcachefs/super-io.h
> @@ -3,12 +3,12 @@
>  #define _BCACHEFS_SUPER_IO_H
>  
>  #include "extents.h"
> -#include "eytzinger.h"
>  #include "super_types.h"
>  #include "super.h"
>  #include "sb-members.h"
>  
>  #include <asm/byteorder.h>
> +#include <linux/eytzinger.h>
>  
>  static inline bool bch2_version_compatible(u16 version)
>  {
> diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c
> index d7ea95abb9df..c7cf9c6fcf9a 100644
> --- a/fs/bcachefs/util.c
> +++ b/fs/bcachefs/util.c
> @@ -11,6 +11,7 @@
>  #include <linux/console.h>
>  #include <linux/ctype.h>
>  #include <linux/debugfs.h>
> +#include <linux/eytzinger.h>
>  #include <linux/freezer.h>
>  #include <linux/kthread.h>
>  #include <linux/log2.h>
> @@ -24,7 +25,6 @@
>  #include <linux/sched/clock.h>
>  #include <linux/mean_and_variance.h>
>  
> -#include "eytzinger.h"
>  #include "util.h"
>  
>  static const char si_units[] = "?kMGTPEZY";
> @@ -863,149 +863,6 @@ void memcpy_from_bio(void *dst, struct bio *src, struct bvec_iter src_iter)
>  	}
>  }
>  
> -static int alignment_ok(const void *base, size_t align)
> -{
> -	return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ||
> -		((unsigned long)base & (align - 1)) == 0;
> -}
> -
> -static void u32_swap(void *a, void *b, size_t size)
> -{
> -	u32 t = *(u32 *)a;
> -	*(u32 *)a = *(u32 *)b;
> -	*(u32 *)b = t;
> -}
> -
> -static void u64_swap(void *a, void *b, size_t size)
> -{
> -	u64 t = *(u64 *)a;
> -	*(u64 *)a = *(u64 *)b;
> -	*(u64 *)b = t;
> -}
> -
> -static void generic_swap(void *a, void *b, size_t size)
> -{
> -	char t;
> -
> -	do {
> -		t = *(char *)a;
> -		*(char *)a++ = *(char *)b;
> -		*(char *)b++ = t;
> -	} while (--size > 0);
> -}
> -
> -static inline int do_cmp(void *base, size_t n, size_t size,
> -			 int (*cmp_func)(const void *, const void *, size_t),
> -			 size_t l, size_t r)
> -{
> -	return cmp_func(base + inorder_to_eytzinger0(l, n) * size,
> -			base + inorder_to_eytzinger0(r, n) * size,
> -			size);
> -}
> -
> -static inline void do_swap(void *base, size_t n, size_t size,
> -			   void (*swap_func)(void *, void *, size_t),
> -			   size_t l, size_t r)
> -{
> -	swap_func(base + inorder_to_eytzinger0(l, n) * size,
> -		  base + inorder_to_eytzinger0(r, n) * size,
> -		  size);
> -}
> -
> -void eytzinger0_sort(void *base, size_t n, size_t size,
> -		     int (*cmp_func)(const void *, const void *, size_t),
> -		     void (*swap_func)(void *, void *, size_t))
> -{
> -	int i, c, r;
> -
> -	if (!swap_func) {
> -		if (size == 4 && alignment_ok(base, 4))
> -			swap_func = u32_swap;
> -		else if (size == 8 && alignment_ok(base, 8))
> -			swap_func = u64_swap;
> -		else
> -			swap_func = generic_swap;
> -	}
> -
> -	/* heapify */
> -	for (i = n / 2 - 1; i >= 0; --i) {
> -		for (r = i; r * 2 + 1 < n; r = c) {
> -			c = r * 2 + 1;
> -
> -			if (c + 1 < n &&
> -			    do_cmp(base, n, size, cmp_func, c, c + 1) < 0)
> -				c++;
> -
> -			if (do_cmp(base, n, size, cmp_func, r, c) >= 0)
> -				break;
> -
> -			do_swap(base, n, size, swap_func, r, c);
> -		}
> -	}
> -
> -	/* sort */
> -	for (i = n - 1; i > 0; --i) {
> -		do_swap(base, n, size, swap_func, 0, i);
> -
> -		for (r = 0; r * 2 + 1 < i; r = c) {
> -			c = r * 2 + 1;
> -
> -			if (c + 1 < i &&
> -			    do_cmp(base, n, size, cmp_func, c, c + 1) < 0)
> -				c++;
> -
> -			if (do_cmp(base, n, size, cmp_func, r, c) >= 0)
> -				break;
> -
> -			do_swap(base, n, size, swap_func, r, c);
> -		}
> -	}
> -}
> -
> -void sort_cmp_size(void *base, size_t num, size_t size,
> -	  int (*cmp_func)(const void *, const void *, size_t),
> -	  void (*swap_func)(void *, void *, size_t size))
> -{
> -	/* pre-scale counters for performance */
> -	int i = (num/2 - 1) * size, n = num * size, c, r;
> -
> -	if (!swap_func) {
> -		if (size == 4 && alignment_ok(base, 4))
> -			swap_func = u32_swap;
> -		else if (size == 8 && alignment_ok(base, 8))
> -			swap_func = u64_swap;
> -		else
> -			swap_func = generic_swap;
> -	}
> -
> -	/* heapify */
> -	for ( ; i >= 0; i -= size) {
> -		for (r = i; r * 2 + size < n; r  = c) {
> -			c = r * 2 + size;
> -			if (c < n - size &&
> -			    cmp_func(base + c, base + c + size, size) < 0)
> -				c += size;
> -			if (cmp_func(base + r, base + c, size) >= 0)
> -				break;
> -			swap_func(base + r, base + c, size);
> -		}
> -	}
> -
> -	/* sort */
> -	for (i = n - size; i > 0; i -= size) {
> -		swap_func(base, base + i, size);
> -		for (r = 0; r * 2 + size < i; r = c) {
> -			c = r * 2 + size;
> -			if (c < i - size &&
> -			    cmp_func(base + c, base + c + size, size) < 0)
> -				c += size;
> -			if (cmp_func(base + r, base + c, size) >= 0)
> -				break;
> -			swap_func(base + r, base + c, size);
> -		}
> -	}
> -}
> -
>  static void mempool_free_vp(void *element, void *pool_data)
>  {
>  	size_t size = (size_t) pool_data;
> diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h
> index 0059481995ef..c3b11c3d24ea 100644
> --- a/fs/bcachefs/util.h
> +++ b/fs/bcachefs/util.h
> @@ -737,10 +737,6 @@ static inline void memset_u64s_tail(void *s, int c, unsigned bytes)
>  	memset(s + bytes, c, rem);
>  }
>  
> -void sort_cmp_size(void *base, size_t num, size_t size,
> -	  int (*cmp_func)(const void *, const void *, size_t),
> -	  void (*swap_func)(void *, void *, size_t));
> -
>  /* just the memmove, doesn't update @_nr */
>  #define __array_insert_item(_array, _nr, _pos)				\
>  	memmove(&(_array)[(_pos) + 1],					\
> diff --git a/fs/bcachefs/eytzinger.h b/include/linux/eytzinger.h
> similarity index 78%
> rename from fs/bcachefs/eytzinger.h
> rename to include/linux/eytzinger.h
> index b04750dbf870..9565a5c26cd5 100644
> --- a/fs/bcachefs/eytzinger.h
> +++ b/include/linux/eytzinger.h
> @@ -1,27 +1,37 @@
>  /* SPDX-License-Identifier: GPL-2.0 */
> -#ifndef _EYTZINGER_H
> -#define _EYTZINGER_H
> +#ifndef _LINUX_EYTZINGER_H
> +#define _LINUX_EYTZINGER_H
>  
>  #include <linux/bitops.h>
>  #include <linux/log2.h>
>  
> -#include "util.h"
> +#ifdef EYTZINGER_DEBUG
> +#define EYTZINGER_BUG_ON(cond)		BUG_ON(cond)
> +#else
> +#define EYTZINGER_BUG_ON(cond)
> +#endif
>  
>  /*
>   * Traversal for trees in eytzinger layout - a full binary tree layed out in an
> - * array
> - */
> -
> -/*
> - * One based indexing version:
> + * array.
>   *
> - * With one based indexing each level of the tree starts at a power of two -
> - * good for cacheline alignment:
> + * Consider using an eytzinger tree any time you would otherwise be doing binary
> + * search over an array. Binary search is a worst case scenario for branch
> + * prediction and prefetching, but in an eytzinger tree every node's children
> + * are adjacent in memory, thus we can prefetch children before knowing the
> + * result of the comparison, assuming multiple nodes fit on a cacheline.
> + *
> + * Two variants are provided, for one based indexing and zero based indexing.
> + *
> + * Zero based indexing is more convenient, but one based indexing has better
> + * alignment and thus better performance because each new level of the tree
> + * starts at a power of two, and thus if element 0 was cacheline aligned, each
> + * new level will be as well.
>   */
>  
>  static inline unsigned eytzinger1_child(unsigned i, unsigned child)
>  {
> -	EBUG_ON(child > 1);
> +	EYTZINGER_BUG_ON(child > 1);
>  
>  	return (i << 1) + child;
>  }
> @@ -58,7 +68,7 @@ static inline unsigned eytzinger1_last(unsigned size)
>  
>  static inline unsigned eytzinger1_next(unsigned i, unsigned size)
>  {
> -	EBUG_ON(i > size);
> +	EYTZINGER_BUG_ON(i > size);
>  
>  	if (eytzinger1_right_child(i) <= size) {
>  		i = eytzinger1_right_child(i);
> @@ -74,7 +84,7 @@ static inline unsigned eytzinger1_next(unsigned i, unsigned size)
>  
>  static inline unsigned eytzinger1_prev(unsigned i, unsigned size)
>  {
> -	EBUG_ON(i > size);
> +	EYTZINGER_BUG_ON(i > size);
>  
>  	if (eytzinger1_left_child(i) <= size) {
>  		i = eytzinger1_left_child(i) + 1;
> @@ -101,7 +111,7 @@ static inline unsigned __eytzinger1_to_inorder(unsigned i, unsigned size,
>  	unsigned shift = __fls(size) - b;
>  	int s;
>  
> -	EBUG_ON(!i || i > size);
> +	EYTZINGER_BUG_ON(!i || i > size);
>  
>  	i  ^= 1U << b;
>  	i <<= 1;
> @@ -126,7 +136,7 @@ static inline unsigned __inorder_to_eytzinger1(unsigned i, unsigned size,
>  	unsigned shift;
>  	int s;
>  
> -	EBUG_ON(!i || i > size);
> +	EYTZINGER_BUG_ON(!i || i > size);
>  
>  	/*
>  	 * sign bit trick:
> @@ -164,7 +174,7 @@ static inline unsigned inorder_to_eytzinger1(unsigned i, unsigned size)
>  
>  static inline unsigned eytzinger0_child(unsigned i, unsigned child)
>  {
> -	EBUG_ON(child > 1);
> +	EYTZINGER_BUG_ON(child > 1);
>  
>  	return (i << 1) + 1 + child;
>  }
> @@ -231,11 +241,9 @@ static inline unsigned inorder_to_eytzinger0(unsigned i, unsigned size)
>  	     (_i) != -1;				\
>  	     (_i) = eytzinger0_next((_i), (_size)))
>  
> -typedef int (*eytzinger_cmp_fn)(const void *l, const void *r, size_t size);
> -
>  /* return greatest node <= @search, or -1 if not found */
>  static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size,
> -					 eytzinger_cmp_fn cmp, const void *search)
> +					 cmp_func_t cmp, const void *search)
>  {
>  	unsigned i, n = 0;
>  
> @@ -244,7 +252,7 @@ static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size,
>  
>  	do {
>  		i = n;
> -		n = eytzinger0_child(i, cmp(search, base + i * size, size) >= 0);
> +		n = eytzinger0_child(i, cmp(search, base + i * size) >= 0);
>  	} while (n < nr);
>  
>  	if (n & 1) {
> @@ -274,8 +282,8 @@ static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size,
>  	_i;								\
>  })
>  
> -void eytzinger0_sort(void *, size_t, size_t,
> -		    int (*cmp_func)(const void *, const void *, size_t),
> -		    void (*swap_func)(void *, void *, size_t));
> +void eytzinger0_sort_r(void *, size_t, size_t,
> +		       cmp_r_func_t, swap_r_func_t, const void *);
> +void eytzinger0_sort(void *, size_t, size_t, cmp_func_t, swap_func_t);
>  
> -#endif /* _EYTZINGER_H */
> +#endif /* _LINUX_EYTZINGER_H */
> diff --git a/lib/sort.c b/lib/sort.c
> index b399bf10d675..3dfa83d86bbb 100644
> --- a/lib/sort.c
> +++ b/lib/sort.c
> @@ -290,3 +290,88 @@ void sort(void *base, size_t num, size_t size,
>  	return sort_r(base, num, size, _CMP_WRAPPER, SWAP_WRAPPER, &w);
>  }
>  EXPORT_SYMBOL(sort);
> +
> +#include <linux/eytzinger.h>
> +
> +static inline int eytzinger0_do_cmp(void *base, size_t n, size_t size,
> +			 cmp_r_func_t cmp_func, const void *priv,
> +			 size_t l, size_t r)
> +{
> +	return do_cmp(base + inorder_to_eytzinger0(l, n) * size,
> +		      base + inorder_to_eytzinger0(r, n) * size,
> +		      cmp_func, priv);
> +}
> +
> +static inline void eytzinger0_do_swap(void *base, size_t n, size_t size,
> +			   swap_r_func_t swap_func, const void *priv,
> +			   size_t l, size_t r)
> +{
> +	do_swap(base + inorder_to_eytzinger0(l, n) * size,
> +		base + inorder_to_eytzinger0(r, n) * size,
> +		size, swap_func, priv);
> +}
> +
> +void eytzinger0_sort_r(void *base, size_t n, size_t size,
> +		       cmp_r_func_t cmp_func,
> +		       swap_r_func_t swap_func,
> +		       const void *priv)
> +{
> +	int i, c, r;
> +
> +	if (!swap_func) {
> +		if (is_aligned(base, size, 8))
> +			swap_func = SWAP_WORDS_64;
> +		else if (is_aligned(base, size, 4))
> +			swap_func = SWAP_WORDS_32;
> +		else
> +			swap_func = SWAP_BYTES;
> +	}
> +
> +	/* heapify */
> +	for (i = n / 2 - 1; i >= 0; --i) {
> +		for (r = i; r * 2 + 1 < n; r = c) {
> +			c = r * 2 + 1;
> +
> +			if (c + 1 < n &&
> +			    eytzinger0_do_cmp(base, n, size, cmp_func, priv, c, c + 1) < 0)
> +				c++;
> +
> +			if (eytzinger0_do_cmp(base, n, size, cmp_func, priv, r, c) >= 0)
> +				break;
> +
> +			eytzinger0_do_swap(base, n, size, swap_func, priv, r, c);
> +		}
> +	}
> +
> +	/* sort */
> +	for (i = n - 1; i > 0; --i) {
> +		eytzinger0_do_swap(base, n, size, swap_func, priv, 0, i);
> +
> +		for (r = 0; r * 2 + 1 < i; r = c) {
> +			c = r * 2 + 1;
> +
> +			if (c + 1 < i &&
> +			    eytzinger0_do_cmp(base, n, size, cmp_func, priv, c, c + 1) < 0)
> +				c++;
> +
> +			if (eytzinger0_do_cmp(base, n, size, cmp_func, priv, r, c) >= 0)
> +				break;
> +
> +			eytzinger0_do_swap(base, n, size, swap_func, priv, r, c);
> +		}
> +	}
> +}
> +EXPORT_SYMBOL_GPL(eytzinger0_sort_r);
> +
> +void eytzinger0_sort(void *base, size_t n, size_t size,
> +		     cmp_func_t cmp_func,
> +		     swap_func_t swap_func)
> +{
> +	struct wrapper w = {
> +		.cmp  = cmp_func,
> +		.swap = swap_func,
> +	};
> +
> +	return eytzinger0_sort_r(base, n, size, _CMP_WRAPPER, SWAP_WRAPPER, &w);
> +}
> +EXPORT_SYMBOL_GPL(eytzinger0_sort);
> -- 
> 2.43.0
> 
>
  

Patch

diff --git a/fs/bcachefs/bset.c b/fs/bcachefs/bset.c
index 3fd1085b6c61..1d77aa55d641 100644
--- a/fs/bcachefs/bset.c
+++ b/fs/bcachefs/bset.c
@@ -9,12 +9,12 @@ 
 #include "bcachefs.h"
 #include "btree_cache.h"
 #include "bset.h"
-#include "eytzinger.h"
 #include "trace.h"
 #include "util.h"
 
 #include <asm/unaligned.h>
 #include <linux/console.h>
+#include <linux/eytzinger.h>
 #include <linux/random.h>
 #include <linux/prefetch.h>
 
diff --git a/fs/bcachefs/journal_seq_blacklist.c b/fs/bcachefs/journal_seq_blacklist.c
index 0200e299cfbb..024c9b1b323f 100644
--- a/fs/bcachefs/journal_seq_blacklist.c
+++ b/fs/bcachefs/journal_seq_blacklist.c
@@ -2,10 +2,11 @@ 
 
 #include "bcachefs.h"
 #include "btree_iter.h"
-#include "eytzinger.h"
 #include "journal_seq_blacklist.h"
 #include "super-io.h"
 
+#include <linux/eytzinger.h>
+
 /*
  * journal_seq_blacklist machinery:
  *
@@ -119,8 +120,7 @@  int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64 start, u64 end)
 	return ret ?: bch2_blacklist_table_initialize(c);
 }
 
-static int journal_seq_blacklist_table_cmp(const void *_l,
-					   const void *_r, size_t size)
+static int journal_seq_blacklist_table_cmp(const void *_l, const void *_r)
 {
 	const struct journal_seq_blacklist_table_entry *l = _l;
 	const struct journal_seq_blacklist_table_entry *r = _r;
diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c
index cc2672c12031..75fdce373f76 100644
--- a/fs/bcachefs/replicas.c
+++ b/fs/bcachefs/replicas.c
@@ -6,12 +6,15 @@ 
 #include "replicas.h"
 #include "super-io.h"
 
+#include <linux/sort.h>
+
 static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *,
 					    struct bch_replicas_cpu *);
 
 /* Some (buggy!) compilers don't allow memcmp to be passed as a pointer */
-static int bch2_memcmp(const void *l, const void *r, size_t size)
+static int bch2_memcmp(const void *l, const void *r,  const void *priv)
 {
+	size_t size = (size_t) priv;
 	return memcmp(l, r, size);
 }
 
@@ -39,7 +42,8 @@  void bch2_replicas_entry_sort(struct bch_replicas_entry_v1 *e)
 
 static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r)
 {
-	eytzinger0_sort(r->entries, r->nr, r->entry_size, bch2_memcmp, NULL);
+	eytzinger0_sort_r(r->entries, r->nr, r->entry_size,
+			  bch2_memcmp, NULL, (void *)(size_t)r->entry_size);
 }
 
 static void bch2_replicas_entry_v0_to_text(struct printbuf *out,
@@ -824,10 +828,11 @@  static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r,
 {
 	unsigned i;
 
-	sort_cmp_size(cpu_r->entries,
-		      cpu_r->nr,
-		      cpu_r->entry_size,
-		      bch2_memcmp, NULL);
+	sort_r(cpu_r->entries,
+	       cpu_r->nr,
+	       cpu_r->entry_size,
+	       bch2_memcmp, NULL,
+	       (void *)(size_t)cpu_r->entry_size);
 
 	for (i = 0; i < cpu_r->nr; i++) {
 		struct bch_replicas_entry_v1 *e =
diff --git a/fs/bcachefs/replicas.h b/fs/bcachefs/replicas.h
index 654a4b26d3a3..983cce782ac2 100644
--- a/fs/bcachefs/replicas.h
+++ b/fs/bcachefs/replicas.h
@@ -3,9 +3,10 @@ 
 #define _BCACHEFS_REPLICAS_H
 
 #include "bkey.h"
-#include "eytzinger.h"
 #include "replicas_types.h"
 
+#include <linux/eytzinger.h>
+
 void bch2_replicas_entry_sort(struct bch_replicas_entry_v1 *);
 void bch2_replicas_entry_to_text(struct printbuf *,
 				 struct bch_replicas_entry_v1 *);
diff --git a/fs/bcachefs/super-io.h b/fs/bcachefs/super-io.h
index 95e80e06316b..f37620919e11 100644
--- a/fs/bcachefs/super-io.h
+++ b/fs/bcachefs/super-io.h
@@ -3,12 +3,12 @@ 
 #define _BCACHEFS_SUPER_IO_H
 
 #include "extents.h"
-#include "eytzinger.h"
 #include "super_types.h"
 #include "super.h"
 #include "sb-members.h"
 
 #include <asm/byteorder.h>
+#include <linux/eytzinger.h>
 
 static inline bool bch2_version_compatible(u16 version)
 {
diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c
index d7ea95abb9df..c7cf9c6fcf9a 100644
--- a/fs/bcachefs/util.c
+++ b/fs/bcachefs/util.c
@@ -11,6 +11,7 @@ 
 #include <linux/console.h>
 #include <linux/ctype.h>
 #include <linux/debugfs.h>
+#include <linux/eytzinger.h>
 #include <linux/freezer.h>
 #include <linux/kthread.h>
 #include <linux/log2.h>
@@ -24,7 +25,6 @@ 
 #include <linux/sched/clock.h>
 #include <linux/mean_and_variance.h>
 
-#include "eytzinger.h"
 #include "util.h"
 
 static const char si_units[] = "?kMGTPEZY";
@@ -863,149 +863,6 @@  void memcpy_from_bio(void *dst, struct bio *src, struct bvec_iter src_iter)
 	}
 }
 
-static int alignment_ok(const void *base, size_t align)
-{
-	return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ||
-		((unsigned long)base & (align - 1)) == 0;
-}
-
-static void u32_swap(void *a, void *b, size_t size)
-{
-	u32 t = *(u32 *)a;
-	*(u32 *)a = *(u32 *)b;
-	*(u32 *)b = t;
-}
-
-static void u64_swap(void *a, void *b, size_t size)
-{
-	u64 t = *(u64 *)a;
-	*(u64 *)a = *(u64 *)b;
-	*(u64 *)b = t;
-}
-
-static void generic_swap(void *a, void *b, size_t size)
-{
-	char t;
-
-	do {
-		t = *(char *)a;
-		*(char *)a++ = *(char *)b;
-		*(char *)b++ = t;
-	} while (--size > 0);
-}
-
-static inline int do_cmp(void *base, size_t n, size_t size,
-			 int (*cmp_func)(const void *, const void *, size_t),
-			 size_t l, size_t r)
-{
-	return cmp_func(base + inorder_to_eytzinger0(l, n) * size,
-			base + inorder_to_eytzinger0(r, n) * size,
-			size);
-}
-
-static inline void do_swap(void *base, size_t n, size_t size,
-			   void (*swap_func)(void *, void *, size_t),
-			   size_t l, size_t r)
-{
-	swap_func(base + inorder_to_eytzinger0(l, n) * size,
-		  base + inorder_to_eytzinger0(r, n) * size,
-		  size);
-}
-
-void eytzinger0_sort(void *base, size_t n, size_t size,
-		     int (*cmp_func)(const void *, const void *, size_t),
-		     void (*swap_func)(void *, void *, size_t))
-{
-	int i, c, r;
-
-	if (!swap_func) {
-		if (size == 4 && alignment_ok(base, 4))
-			swap_func = u32_swap;
-		else if (size == 8 && alignment_ok(base, 8))
-			swap_func = u64_swap;
-		else
-			swap_func = generic_swap;
-	}
-
-	/* heapify */
-	for (i = n / 2 - 1; i >= 0; --i) {
-		for (r = i; r * 2 + 1 < n; r = c) {
-			c = r * 2 + 1;
-
-			if (c + 1 < n &&
-			    do_cmp(base, n, size, cmp_func, c, c + 1) < 0)
-				c++;
-
-			if (do_cmp(base, n, size, cmp_func, r, c) >= 0)
-				break;
-
-			do_swap(base, n, size, swap_func, r, c);
-		}
-	}
-
-	/* sort */
-	for (i = n - 1; i > 0; --i) {
-		do_swap(base, n, size, swap_func, 0, i);
-
-		for (r = 0; r * 2 + 1 < i; r = c) {
-			c = r * 2 + 1;
-
-			if (c + 1 < i &&
-			    do_cmp(base, n, size, cmp_func, c, c + 1) < 0)
-				c++;
-
-			if (do_cmp(base, n, size, cmp_func, r, c) >= 0)
-				break;
-
-			do_swap(base, n, size, swap_func, r, c);
-		}
-	}
-}
-
-void sort_cmp_size(void *base, size_t num, size_t size,
-	  int (*cmp_func)(const void *, const void *, size_t),
-	  void (*swap_func)(void *, void *, size_t size))
-{
-	/* pre-scale counters for performance */
-	int i = (num/2 - 1) * size, n = num * size, c, r;
-
-	if (!swap_func) {
-		if (size == 4 && alignment_ok(base, 4))
-			swap_func = u32_swap;
-		else if (size == 8 && alignment_ok(base, 8))
-			swap_func = u64_swap;
-		else
-			swap_func = generic_swap;
-	}
-
-	/* heapify */
-	for ( ; i >= 0; i -= size) {
-		for (r = i; r * 2 + size < n; r  = c) {
-			c = r * 2 + size;
-			if (c < n - size &&
-			    cmp_func(base + c, base + c + size, size) < 0)
-				c += size;
-			if (cmp_func(base + r, base + c, size) >= 0)
-				break;
-			swap_func(base + r, base + c, size);
-		}
-	}
-
-	/* sort */
-	for (i = n - size; i > 0; i -= size) {
-		swap_func(base, base + i, size);
-		for (r = 0; r * 2 + size < i; r = c) {
-			c = r * 2 + size;
-			if (c < i - size &&
-			    cmp_func(base + c, base + c + size, size) < 0)
-				c += size;
-			if (cmp_func(base + r, base + c, size) >= 0)
-				break;
-			swap_func(base + r, base + c, size);
-		}
-	}
-}
-
 static void mempool_free_vp(void *element, void *pool_data)
 {
 	size_t size = (size_t) pool_data;
diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h
index 0059481995ef..c3b11c3d24ea 100644
--- a/fs/bcachefs/util.h
+++ b/fs/bcachefs/util.h
@@ -737,10 +737,6 @@  static inline void memset_u64s_tail(void *s, int c, unsigned bytes)
 	memset(s + bytes, c, rem);
 }
 
-void sort_cmp_size(void *base, size_t num, size_t size,
-	  int (*cmp_func)(const void *, const void *, size_t),
-	  void (*swap_func)(void *, void *, size_t));
-
 /* just the memmove, doesn't update @_nr */
 #define __array_insert_item(_array, _nr, _pos)				\
 	memmove(&(_array)[(_pos) + 1],					\
diff --git a/fs/bcachefs/eytzinger.h b/include/linux/eytzinger.h
similarity index 78%
rename from fs/bcachefs/eytzinger.h
rename to include/linux/eytzinger.h
index b04750dbf870..9565a5c26cd5 100644
--- a/fs/bcachefs/eytzinger.h
+++ b/include/linux/eytzinger.h
@@ -1,27 +1,37 @@ 
 /* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _EYTZINGER_H
-#define _EYTZINGER_H
+#ifndef _LINUX_EYTZINGER_H
+#define _LINUX_EYTZINGER_H
 
 #include <linux/bitops.h>
 #include <linux/log2.h>
 
-#include "util.h"
+#ifdef EYTZINGER_DEBUG
+#define EYTZINGER_BUG_ON(cond)		BUG_ON(cond)
+#else
+#define EYTZINGER_BUG_ON(cond)
+#endif
 
 /*
  * Traversal for trees in eytzinger layout - a full binary tree layed out in an
- * array
- */
-
-/*
- * One based indexing version:
+ * array.
  *
- * With one based indexing each level of the tree starts at a power of two -
- * good for cacheline alignment:
+ * Consider using an eytzinger tree any time you would otherwise be doing binary
+ * search over an array. Binary search is a worst case scenario for branch
+ * prediction and prefetching, but in an eytzinger tree every node's children
+ * are adjacent in memory, thus we can prefetch children before knowing the
+ * result of the comparison, assuming multiple nodes fit on a cacheline.
+ *
+ * Two variants are provided, for one based indexing and zero based indexing.
+ *
+ * Zero based indexing is more convenient, but one based indexing has better
+ * alignment and thus better performance because each new level of the tree
+ * starts at a power of two, and thus if element 0 was cacheline aligned, each
+ * new level will be as well.
  */
 
 static inline unsigned eytzinger1_child(unsigned i, unsigned child)
 {
-	EBUG_ON(child > 1);
+	EYTZINGER_BUG_ON(child > 1);
 
 	return (i << 1) + child;
 }
@@ -58,7 +68,7 @@  static inline unsigned eytzinger1_last(unsigned size)
 
 static inline unsigned eytzinger1_next(unsigned i, unsigned size)
 {
-	EBUG_ON(i > size);
+	EYTZINGER_BUG_ON(i > size);
 
 	if (eytzinger1_right_child(i) <= size) {
 		i = eytzinger1_right_child(i);
@@ -74,7 +84,7 @@  static inline unsigned eytzinger1_next(unsigned i, unsigned size)
 
 static inline unsigned eytzinger1_prev(unsigned i, unsigned size)
 {
-	EBUG_ON(i > size);
+	EYTZINGER_BUG_ON(i > size);
 
 	if (eytzinger1_left_child(i) <= size) {
 		i = eytzinger1_left_child(i) + 1;
@@ -101,7 +111,7 @@  static inline unsigned __eytzinger1_to_inorder(unsigned i, unsigned size,
 	unsigned shift = __fls(size) - b;
 	int s;
 
-	EBUG_ON(!i || i > size);
+	EYTZINGER_BUG_ON(!i || i > size);
 
 	i  ^= 1U << b;
 	i <<= 1;
@@ -126,7 +136,7 @@  static inline unsigned __inorder_to_eytzinger1(unsigned i, unsigned size,
 	unsigned shift;
 	int s;
 
-	EBUG_ON(!i || i > size);
+	EYTZINGER_BUG_ON(!i || i > size);
 
 	/*
 	 * sign bit trick:
@@ -164,7 +174,7 @@  static inline unsigned inorder_to_eytzinger1(unsigned i, unsigned size)
 
 static inline unsigned eytzinger0_child(unsigned i, unsigned child)
 {
-	EBUG_ON(child > 1);
+	EYTZINGER_BUG_ON(child > 1);
 
 	return (i << 1) + 1 + child;
 }
@@ -231,11 +241,9 @@  static inline unsigned inorder_to_eytzinger0(unsigned i, unsigned size)
 	     (_i) != -1;				\
 	     (_i) = eytzinger0_next((_i), (_size)))
 
-typedef int (*eytzinger_cmp_fn)(const void *l, const void *r, size_t size);
-
 /* return greatest node <= @search, or -1 if not found */
 static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size,
-					 eytzinger_cmp_fn cmp, const void *search)
+					 cmp_func_t cmp, const void *search)
 {
 	unsigned i, n = 0;
 
@@ -244,7 +252,7 @@  static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size,
 
 	do {
 		i = n;
-		n = eytzinger0_child(i, cmp(search, base + i * size, size) >= 0);
+		n = eytzinger0_child(i, cmp(search, base + i * size) >= 0);
 	} while (n < nr);
 
 	if (n & 1) {
@@ -274,8 +282,8 @@  static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size,
 	_i;								\
 })
 
-void eytzinger0_sort(void *, size_t, size_t,
-		    int (*cmp_func)(const void *, const void *, size_t),
-		    void (*swap_func)(void *, void *, size_t));
+void eytzinger0_sort_r(void *, size_t, size_t,
+		       cmp_r_func_t, swap_r_func_t, const void *);
+void eytzinger0_sort(void *, size_t, size_t, cmp_func_t, swap_func_t);
 
-#endif /* _EYTZINGER_H */
+#endif /* _LINUX_EYTZINGER_H */
diff --git a/lib/sort.c b/lib/sort.c
index b399bf10d675..3dfa83d86bbb 100644
--- a/lib/sort.c
+++ b/lib/sort.c
@@ -290,3 +290,88 @@  void sort(void *base, size_t num, size_t size,
 	return sort_r(base, num, size, _CMP_WRAPPER, SWAP_WRAPPER, &w);
 }
 EXPORT_SYMBOL(sort);
+
+#include <linux/eytzinger.h>
+
+static inline int eytzinger0_do_cmp(void *base, size_t n, size_t size,
+			 cmp_r_func_t cmp_func, const void *priv,
+			 size_t l, size_t r)
+{
+	return do_cmp(base + inorder_to_eytzinger0(l, n) * size,
+		      base + inorder_to_eytzinger0(r, n) * size,
+		      cmp_func, priv);
+}
+
+static inline void eytzinger0_do_swap(void *base, size_t n, size_t size,
+			   swap_r_func_t swap_func, const void *priv,
+			   size_t l, size_t r)
+{
+	do_swap(base + inorder_to_eytzinger0(l, n) * size,
+		base + inorder_to_eytzinger0(r, n) * size,
+		size, swap_func, priv);
+}
+
+void eytzinger0_sort_r(void *base, size_t n, size_t size,
+		       cmp_r_func_t cmp_func,
+		       swap_r_func_t swap_func,
+		       const void *priv)
+{
+	int i, c, r;
+
+	if (!swap_func) {
+		if (is_aligned(base, size, 8))
+			swap_func = SWAP_WORDS_64;
+		else if (is_aligned(base, size, 4))
+			swap_func = SWAP_WORDS_32;
+		else
+			swap_func = SWAP_BYTES;
+	}
+
+	/* heapify */
+	for (i = n / 2 - 1; i >= 0; --i) {
+		for (r = i; r * 2 + 1 < n; r = c) {
+			c = r * 2 + 1;
+
+			if (c + 1 < n &&
+			    eytzinger0_do_cmp(base, n, size, cmp_func, priv, c, c + 1) < 0)
+				c++;
+
+			if (eytzinger0_do_cmp(base, n, size, cmp_func, priv, r, c) >= 0)
+				break;
+
+			eytzinger0_do_swap(base, n, size, swap_func, priv, r, c);
+		}
+	}
+
+	/* sort */
+	for (i = n - 1; i > 0; --i) {
+		eytzinger0_do_swap(base, n, size, swap_func, priv, 0, i);
+
+		for (r = 0; r * 2 + 1 < i; r = c) {
+			c = r * 2 + 1;
+
+			if (c + 1 < i &&
+			    eytzinger0_do_cmp(base, n, size, cmp_func, priv, c, c + 1) < 0)
+				c++;
+
+			if (eytzinger0_do_cmp(base, n, size, cmp_func, priv, r, c) >= 0)
+				break;
+
+			eytzinger0_do_swap(base, n, size, swap_func, priv, r, c);
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(eytzinger0_sort_r);
+
+void eytzinger0_sort(void *base, size_t n, size_t size,
+		     cmp_func_t cmp_func,
+		     swap_func_t swap_func)
+{
+	struct wrapper w = {
+		.cmp  = cmp_func,
+		.swap = swap_func,
+	};
+
+	return eytzinger0_sort_r(base, n, size, _CMP_WRAPPER, SWAP_WRAPPER, &w);
+}
+EXPORT_SYMBOL_GPL(eytzinger0_sort);