[v9,05/11] btrfs: lookup physical address from stripe extent

Message ID 20230914-raid-stripe-tree-v9-5-15d423829637@wdc.com
State New
Headers
Series btrfs: introduce RAID stripe tree |

Commit Message

Johannes Thumshirn Sept. 14, 2023, 4:07 p.m. UTC
  Lookup the physical address from the raid stripe tree when a read on an
RAID volume formatted with the raid stripe tree was attempted.

Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
 fs/btrfs/raid-stripe-tree.c | 130 ++++++++++++++++++++++++++++++++++++++++++++
 fs/btrfs/raid-stripe-tree.h |  11 ++++
 fs/btrfs/volumes.c          |  37 ++++++++++---
 3 files changed, 169 insertions(+), 9 deletions(-)
  

Comments

David Sterba Sept. 14, 2023, 5:57 p.m. UTC | #1
On Thu, Sep 14, 2023 at 09:07:00AM -0700, Johannes Thumshirn wrote:
> Lookup the physical address from the raid stripe tree when a read on an
> RAID volume formatted with the raid stripe tree was attempted.
> 
> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> ---
>  fs/btrfs/raid-stripe-tree.c | 130 ++++++++++++++++++++++++++++++++++++++++++++
>  fs/btrfs/raid-stripe-tree.h |  11 ++++
>  fs/btrfs/volumes.c          |  37 ++++++++++---
>  3 files changed, 169 insertions(+), 9 deletions(-)
> 
> diff --git a/fs/btrfs/raid-stripe-tree.c b/fs/btrfs/raid-stripe-tree.c
> index 517bc08803f1..697a6e1fd255 100644
> --- a/fs/btrfs/raid-stripe-tree.c
> +++ b/fs/btrfs/raid-stripe-tree.c
> @@ -303,3 +303,133 @@ int btrfs_insert_raid_extent(struct btrfs_trans_handle *trans,
>  
>  	return ret;
>  }
> +
> +int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info,
> +				 u64 logical, u64 *length, u64 map_type,
> +				 u32 stripe_index,
> +				 struct btrfs_io_stripe *stripe)
> +{
> +	struct btrfs_root *stripe_root = fs_info->stripe_root;
> +	struct btrfs_stripe_extent *stripe_extent;
> +	struct btrfs_key stripe_key;
> +	struct btrfs_key found_key;
> +	struct btrfs_path *path;
> +	struct extent_buffer *leaf;
> +	const u64 end = logical + *length;
> +	int num_stripes;
> +	u8 encoding;
> +	u64 offset;
> +	u64 found_logical;
> +	u64 found_length;
> +	u64 found_end;
> +	int slot;
> +	int ret;
> +	int i;
> +
> +	stripe_key.objectid = logical;
> +	stripe_key.type = BTRFS_RAID_STRIPE_KEY;
> +	stripe_key.offset = 0;
> +
> +	path = btrfs_alloc_path();
> +	if (!path)
> +		return -ENOMEM;
> +
> +	ret = btrfs_search_slot(NULL, stripe_root, &stripe_key, path, 0, 0);
> +	if (ret < 0)
> +		goto free_path;
> +	if (ret) {
> +		if (path->slots[0] != 0)
> +			path->slots[0]--;
> +	}
> +
> +
> +	while (1) {
> +		leaf = path->nodes[0];
> +		slot = path->slots[0];
> +
> +		btrfs_item_key_to_cpu(leaf, &found_key, slot);
> +		found_logical = found_key.objectid;
> +		found_length = found_key.offset;
> +		found_end = found_logical + found_length;
> +
> +		if (found_logical > end) {
> +			ret = -ENOENT;
> +			goto out;
> +		}
> +
> +		if (in_range(logical, found_logical, found_length))
> +			break;
> +
> +		ret = btrfs_next_item(stripe_root, path);
> +		if (ret)
> +			goto out;
> +	}
> +
> +	offset = logical - found_logical;
> +
> +	/*
> +	 * If we have a logically contiguous, but physically noncontinuous
> +	 * range, we need to split the bio. Record the length after which we
> +	 * must split the bio.
> +	 */
> +	if (end > found_end)
> +		*length -= end - found_end;
> +
> +	num_stripes = btrfs_num_raid_stripes(btrfs_item_size(leaf, slot));
> +	stripe_extent = btrfs_item_ptr(leaf, slot, struct btrfs_stripe_extent);
> +	encoding = btrfs_stripe_extent_encoding(leaf, stripe_extent);
> +
> +	if (encoding != btrfs_bg_flags_to_raid_index(map_type)) {
> +		ret = -EUCLEAN;
> +		btrfs_handle_fs_error(fs_info, ret,
> +				      "on-disk stripe encoding %d doesn't match RAID index %d",
> +				      encoding,
> +				      btrfs_bg_flags_to_raid_index(map_type));
> +		goto out;
> +	}
> +
> +	for (i = 0; i < num_stripes; i++) {
> +		struct btrfs_raid_stride *stride = &stripe_extent->strides[i];
> +		u64 devid = btrfs_raid_stride_devid(leaf, stride);
> +		u64 len = btrfs_raid_stride_length(leaf, stride);
> +		u64 physical = btrfs_raid_stride_physical(leaf, stride);
> +
> +		if (offset >= len) {
> +			offset -= len;
> +
> +			if (offset >= BTRFS_STRIPE_LEN)
> +				continue;
> +		}
> +
> +		if (devid != stripe->dev->devid)
> +			continue;
> +
> +		if ((map_type & BTRFS_BLOCK_GROUP_DUP) && stripe_index != i)
> +			continue;
> +
> +		stripe->physical = physical + offset;
> +
> +		ret = 0;
> +		goto free_path;
> +	}
> +
> +	/*
> +	 * If we're here, we haven't found the requested devid in the stripe.
> +	 */
> +	ret = -ENOENT;
> +out:
> +	if (ret > 0)
> +		ret = -ENOENT;
> +	if (ret && ret != -EIO) {
> +		if (IS_ENABLED(CONFIG_BTRFS_DEBUG))
> +			btrfs_print_tree(leaf, 1);
> +		btrfs_err(fs_info,
> +			  "cannot find raid-stripe for logical [%llu, %llu] devid %llu, profile %s",
> +			  logical, logical + *length, stripe->dev->devid,
> +			  btrfs_bg_type_to_raid_name(map_type));
> +	}
> +free_path:
> +	btrfs_free_path(path);
> +
> +	return ret;
> +}
> diff --git a/fs/btrfs/raid-stripe-tree.h b/fs/btrfs/raid-stripe-tree.h
> index b3a127c997c8..5d9629a815c1 100644
> --- a/fs/btrfs/raid-stripe-tree.h
> +++ b/fs/btrfs/raid-stripe-tree.h
> @@ -13,6 +13,10 @@ struct btrfs_trans_handle;
>  
>  int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start,
>  			     u64 length);
> +int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info,
> +				 u64 logical, u64 *length, u64 map_type,
> +				 u32 stripe_index,
> +				 struct btrfs_io_stripe *stripe);
>  int btrfs_insert_raid_extent(struct btrfs_trans_handle *trans,
>  			     struct btrfs_ordered_extent *ordered_extent);
>  
> @@ -33,4 +37,11 @@ static inline bool btrfs_need_stripe_tree_update(struct btrfs_fs_info *fs_info,
>  
>  	return false;
>  }
> +
> +static inline int btrfs_num_raid_stripes(u32 item_size)
> +{
> +	return (item_size - offsetof(struct btrfs_stripe_extent, strides)) /
> +		sizeof(struct btrfs_raid_stride);
> +}
> +
>  #endif
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index c2bac87912c7..2326dbcf85f6 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -35,6 +35,7 @@
>  #include "relocation.h"
>  #include "scrub.h"
>  #include "super.h"
> +#include "raid-stripe-tree.h"
>  
>  #define BTRFS_BLOCK_GROUP_STRIPE_MASK	(BTRFS_BLOCK_GROUP_RAID0 | \
>  					 BTRFS_BLOCK_GROUP_RAID10 | \
> @@ -6309,12 +6310,22 @@ static u64 btrfs_max_io_len(struct map_lookup *map, enum btrfs_map_op op,
>  	return U64_MAX;
>  }
>  
> -static void set_io_stripe(struct btrfs_io_stripe *dst, const struct map_lookup *map,
> -			  u32 stripe_index, u64 stripe_offset, u32 stripe_nr)
> +static int set_io_stripe(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
> +		      u64 logical, u64 *length, struct btrfs_io_stripe *dst,
> +		      struct map_lookup *map, u32 stripe_index,
> +		      u64 stripe_offset, u64 stripe_nr)
>  {
>  	dst->dev = map->stripes[stripe_index].dev;
> +
> +	if (op == BTRFS_MAP_READ &&
> +	    btrfs_need_stripe_tree_update(fs_info, map->type))
> +		return btrfs_get_raid_extent_offset(fs_info, logical, length,
> +						    map->type, stripe_index,
> +						    dst);
> +
>  	dst->physical = map->stripes[stripe_index].physical +
>  			stripe_offset + btrfs_stripe_nr_to_offset(stripe_nr);
> +	return 0;
>  }
>  
>  /*
> @@ -6531,11 +6542,11 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
>  	 */
>  	if (smap && num_alloc_stripes == 1 &&
>  	    !((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) && mirror_num > 1)) {
> -		set_io_stripe(smap, map, stripe_index, stripe_offset, stripe_nr);
> +		ret = set_io_stripe(fs_info, op, logical, length, smap, map,
> +				    stripe_index, stripe_offset, stripe_nr);

>  		if (mirror_num_ret)
>  			*mirror_num_ret = mirror_num;
>  		*bioc_ret = NULL;
> -		ret = 0;
>  		goto out;
>  	}
>  
> @@ -6566,21 +6577,29 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
>  		bioc->full_stripe_logical = em->start +
>  			btrfs_stripe_nr_to_offset(stripe_nr * data_stripes);
>  		for (i = 0; i < num_stripes; i++)
> -			set_io_stripe(&bioc->stripes[i], map,
> -				      (i + stripe_nr) % num_stripes,
> -				      stripe_offset, stripe_nr);
> +			ret = set_io_stripe(fs_info, op, logical, length,
> +					    &bioc->stripes[i], map,
> +					    (i + stripe_nr) % num_stripes,
> +					    stripe_offset, stripe_nr);

You've added error value but it's not checked and the whole for-loop
continues, is that intentional?

>  	} else {
>  		/*
>  		 * For all other non-RAID56 profiles, just copy the target
>  		 * stripe into the bioc.
>  		 */
>  		for (i = 0; i < num_stripes; i++) {
> -			set_io_stripe(&bioc->stripes[i], map, stripe_index,
> -				      stripe_offset, stripe_nr);
> +			ret = set_io_stripe(fs_info, op, logical, length,
> +					    &bioc->stripes[i], map, stripe_index,
> +					    stripe_offset, stripe_nr);

Same here.

>  			stripe_index++;
>  		}
>  	}
>  
> +	if (ret) {
> +		*bioc_ret = NULL;
> +		btrfs_put_bioc(bioc);
> +		goto out;
> +	}

This handles ret != 0 but after the whole loop is done.

> +
>  	if (op != BTRFS_MAP_READ)
>  		max_errors = btrfs_chunk_max_errors(map);
>  
> 
> -- 
> 2.41.0
  

Patch

diff --git a/fs/btrfs/raid-stripe-tree.c b/fs/btrfs/raid-stripe-tree.c
index 517bc08803f1..697a6e1fd255 100644
--- a/fs/btrfs/raid-stripe-tree.c
+++ b/fs/btrfs/raid-stripe-tree.c
@@ -303,3 +303,133 @@  int btrfs_insert_raid_extent(struct btrfs_trans_handle *trans,
 
 	return ret;
 }
+
+int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info,
+				 u64 logical, u64 *length, u64 map_type,
+				 u32 stripe_index,
+				 struct btrfs_io_stripe *stripe)
+{
+	struct btrfs_root *stripe_root = fs_info->stripe_root;
+	struct btrfs_stripe_extent *stripe_extent;
+	struct btrfs_key stripe_key;
+	struct btrfs_key found_key;
+	struct btrfs_path *path;
+	struct extent_buffer *leaf;
+	const u64 end = logical + *length;
+	int num_stripes;
+	u8 encoding;
+	u64 offset;
+	u64 found_logical;
+	u64 found_length;
+	u64 found_end;
+	int slot;
+	int ret;
+	int i;
+
+	stripe_key.objectid = logical;
+	stripe_key.type = BTRFS_RAID_STRIPE_KEY;
+	stripe_key.offset = 0;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+
+	ret = btrfs_search_slot(NULL, stripe_root, &stripe_key, path, 0, 0);
+	if (ret < 0)
+		goto free_path;
+	if (ret) {
+		if (path->slots[0] != 0)
+			path->slots[0]--;
+	}
+
+
+	while (1) {
+		leaf = path->nodes[0];
+		slot = path->slots[0];
+
+		btrfs_item_key_to_cpu(leaf, &found_key, slot);
+		found_logical = found_key.objectid;
+		found_length = found_key.offset;
+		found_end = found_logical + found_length;
+
+		if (found_logical > end) {
+			ret = -ENOENT;
+			goto out;
+		}
+
+		if (in_range(logical, found_logical, found_length))
+			break;
+
+		ret = btrfs_next_item(stripe_root, path);
+		if (ret)
+			goto out;
+	}
+
+	offset = logical - found_logical;
+
+	/*
+	 * If we have a logically contiguous, but physically noncontinuous
+	 * range, we need to split the bio. Record the length after which we
+	 * must split the bio.
+	 */
+	if (end > found_end)
+		*length -= end - found_end;
+
+	num_stripes = btrfs_num_raid_stripes(btrfs_item_size(leaf, slot));
+	stripe_extent = btrfs_item_ptr(leaf, slot, struct btrfs_stripe_extent);
+	encoding = btrfs_stripe_extent_encoding(leaf, stripe_extent);
+
+	if (encoding != btrfs_bg_flags_to_raid_index(map_type)) {
+		ret = -EUCLEAN;
+		btrfs_handle_fs_error(fs_info, ret,
+				      "on-disk stripe encoding %d doesn't match RAID index %d",
+				      encoding,
+				      btrfs_bg_flags_to_raid_index(map_type));
+		goto out;
+	}
+
+	for (i = 0; i < num_stripes; i++) {
+		struct btrfs_raid_stride *stride = &stripe_extent->strides[i];
+		u64 devid = btrfs_raid_stride_devid(leaf, stride);
+		u64 len = btrfs_raid_stride_length(leaf, stride);
+		u64 physical = btrfs_raid_stride_physical(leaf, stride);
+
+		if (offset >= len) {
+			offset -= len;
+
+			if (offset >= BTRFS_STRIPE_LEN)
+				continue;
+		}
+
+		if (devid != stripe->dev->devid)
+			continue;
+
+		if ((map_type & BTRFS_BLOCK_GROUP_DUP) && stripe_index != i)
+			continue;
+
+		stripe->physical = physical + offset;
+
+		ret = 0;
+		goto free_path;
+	}
+
+	/*
+	 * If we're here, we haven't found the requested devid in the stripe.
+	 */
+	ret = -ENOENT;
+out:
+	if (ret > 0)
+		ret = -ENOENT;
+	if (ret && ret != -EIO) {
+		if (IS_ENABLED(CONFIG_BTRFS_DEBUG))
+			btrfs_print_tree(leaf, 1);
+		btrfs_err(fs_info,
+			  "cannot find raid-stripe for logical [%llu, %llu] devid %llu, profile %s",
+			  logical, logical + *length, stripe->dev->devid,
+			  btrfs_bg_type_to_raid_name(map_type));
+	}
+free_path:
+	btrfs_free_path(path);
+
+	return ret;
+}
diff --git a/fs/btrfs/raid-stripe-tree.h b/fs/btrfs/raid-stripe-tree.h
index b3a127c997c8..5d9629a815c1 100644
--- a/fs/btrfs/raid-stripe-tree.h
+++ b/fs/btrfs/raid-stripe-tree.h
@@ -13,6 +13,10 @@  struct btrfs_trans_handle;
 
 int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start,
 			     u64 length);
+int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info,
+				 u64 logical, u64 *length, u64 map_type,
+				 u32 stripe_index,
+				 struct btrfs_io_stripe *stripe);
 int btrfs_insert_raid_extent(struct btrfs_trans_handle *trans,
 			     struct btrfs_ordered_extent *ordered_extent);
 
@@ -33,4 +37,11 @@  static inline bool btrfs_need_stripe_tree_update(struct btrfs_fs_info *fs_info,
 
 	return false;
 }
+
+static inline int btrfs_num_raid_stripes(u32 item_size)
+{
+	return (item_size - offsetof(struct btrfs_stripe_extent, strides)) /
+		sizeof(struct btrfs_raid_stride);
+}
+
 #endif
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index c2bac87912c7..2326dbcf85f6 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -35,6 +35,7 @@ 
 #include "relocation.h"
 #include "scrub.h"
 #include "super.h"
+#include "raid-stripe-tree.h"
 
 #define BTRFS_BLOCK_GROUP_STRIPE_MASK	(BTRFS_BLOCK_GROUP_RAID0 | \
 					 BTRFS_BLOCK_GROUP_RAID10 | \
@@ -6309,12 +6310,22 @@  static u64 btrfs_max_io_len(struct map_lookup *map, enum btrfs_map_op op,
 	return U64_MAX;
 }
 
-static void set_io_stripe(struct btrfs_io_stripe *dst, const struct map_lookup *map,
-			  u32 stripe_index, u64 stripe_offset, u32 stripe_nr)
+static int set_io_stripe(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
+		      u64 logical, u64 *length, struct btrfs_io_stripe *dst,
+		      struct map_lookup *map, u32 stripe_index,
+		      u64 stripe_offset, u64 stripe_nr)
 {
 	dst->dev = map->stripes[stripe_index].dev;
+
+	if (op == BTRFS_MAP_READ &&
+	    btrfs_need_stripe_tree_update(fs_info, map->type))
+		return btrfs_get_raid_extent_offset(fs_info, logical, length,
+						    map->type, stripe_index,
+						    dst);
+
 	dst->physical = map->stripes[stripe_index].physical +
 			stripe_offset + btrfs_stripe_nr_to_offset(stripe_nr);
+	return 0;
 }
 
 /*
@@ -6531,11 +6542,11 @@  int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
 	 */
 	if (smap && num_alloc_stripes == 1 &&
 	    !((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) && mirror_num > 1)) {
-		set_io_stripe(smap, map, stripe_index, stripe_offset, stripe_nr);
+		ret = set_io_stripe(fs_info, op, logical, length, smap, map,
+				    stripe_index, stripe_offset, stripe_nr);
 		if (mirror_num_ret)
 			*mirror_num_ret = mirror_num;
 		*bioc_ret = NULL;
-		ret = 0;
 		goto out;
 	}
 
@@ -6566,21 +6577,29 @@  int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
 		bioc->full_stripe_logical = em->start +
 			btrfs_stripe_nr_to_offset(stripe_nr * data_stripes);
 		for (i = 0; i < num_stripes; i++)
-			set_io_stripe(&bioc->stripes[i], map,
-				      (i + stripe_nr) % num_stripes,
-				      stripe_offset, stripe_nr);
+			ret = set_io_stripe(fs_info, op, logical, length,
+					    &bioc->stripes[i], map,
+					    (i + stripe_nr) % num_stripes,
+					    stripe_offset, stripe_nr);
 	} else {
 		/*
 		 * For all other non-RAID56 profiles, just copy the target
 		 * stripe into the bioc.
 		 */
 		for (i = 0; i < num_stripes; i++) {
-			set_io_stripe(&bioc->stripes[i], map, stripe_index,
-				      stripe_offset, stripe_nr);
+			ret = set_io_stripe(fs_info, op, logical, length,
+					    &bioc->stripes[i], map, stripe_index,
+					    stripe_offset, stripe_nr);
 			stripe_index++;
 		}
 	}
 
+	if (ret) {
+		*bioc_ret = NULL;
+		btrfs_put_bioc(bioc);
+		goto out;
+	}
+
 	if (op != BTRFS_MAP_READ)
 		max_errors = btrfs_chunk_max_errors(map);