[v4,3/4] nvme: add csi, ms and nuse to sysfs

Message ID 20231207123624.29959-4-dwagner@suse.de
State New
Headers
Series nvme: add csi, ms and nuse to sysfs |

Commit Message

Daniel Wagner Dec. 7, 2023, 12:36 p.m. UTC
  libnvme is using the sysfs for enumarating the nvme resources. Though
there are few missing attritbutes in the sysfs. For these libnvme issues
commands during discovering.

As the kernel already knows all these attributes and we would like to
avoid libnvme to issue commands all the time, expose these missing
attributes.

The nuse value is updated on request because the nuse is a volatile
value. Since any user can read the sysfs attribute, a very simple rate
limit is added (update once every 5 seconds). A more sophisticated
update strategy can be added later if there is actually a need for it.

Signed-off-by: Daniel Wagner <dwagner@suse.de>
---
 drivers/nvme/host/core.c  |  4 +-
 drivers/nvme/host/nvme.h  |  6 +++
 drivers/nvme/host/sysfs.c | 89 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 98 insertions(+), 1 deletion(-)
  

Comments

Daniel Wagner Dec. 7, 2023, 12:42 p.m. UTC | #1
On Thu, Dec 07, 2023 at 01:36:23PM +0100, Daniel Wagner wrote:
> +static int ns_head_update_nuse(struct nvme_ns_head *head)
> +{
> +	struct nvme_id_ns *id;
> +	struct nvme_ns *ns;
> +	int srcu_idx, ret = -EWOULDBLOCK;
> +
> +	/* Avoid issuing commands too often by rate limiting the update */
> +	if (!__ratelimit(&head->rs_nuse))
> +		return 0;
> +
> +	pr_info("%s: head %p\n", __func__, head);

Forgot to remove this debug print.

> +static int ns_update_nuse(struct nvme_ns *ns)
> +{
> +	struct nvme_id_ns *id;
> +	int ret;
> +
> +	/* Avoid issuing commands too often by rate limiting the update. */
> +	if (!__ratelimit(&ns->head->rs_nuse))
> +		return 0;
> +
> +	pr_info("%s: ns %p\n", __func__, ns);

ditto
  
Christoph Hellwig Dec. 7, 2023, 3:31 p.m. UTC | #2
Hi Daniel,

this looks generally good to me.  A few comments, all but one are
cosmetic.

> @@ -1676,9 +1678,9 @@ static void nvme_init_integrity(struct gendisk *disk, struct nvme_ns *ns,

This an now take the ns_head instead of the ns.

> @@ -1776,11 +1778,11 @@ static int nvme_init_ms(struct nvme_ns *ns, struct nvme_id_ns *id)

This can take a ctrl and ns_head instead of the ns.

> @@ -1835,8 +1837,8 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id)

This as well after updating nvme_ns_has_pi to take the ns_head.

> @@ -1898,7 +1900,7 @@ static void nvme_update_disk_info(struct gendisk *disk,
>  		struct nvme_ns *ns, struct nvme_id_ns *id)

This as well after fixing up nvme_lba_to_sect to take the ns_head.

> @@ -2052,7 +2055,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,

This one as well.

> @@ -128,7 +129,7 @@ static void *nvme_zns_alloc_report_buffer(struct nvme_ns *ns,
>  				   sizeof(struct nvme_zone_descriptor);
>  
>  	nr_zones = min_t(unsigned int, nr_zones,
> -			 get_capacity(ns->disk) >> ilog2(ns->zsze));
> +			 get_capacity(ns->head->disk) >> ilog2(ns->head->zsze));

This doesn't work, as the head disk doesn't exist for !multipath setups.

> @@ -162,7 +163,7 @@ static int nvme_zone_parse_entry(struct nvme_ns *ns,

This could do with ctrl + ns_head now
  
Christoph Hellwig Dec. 7, 2023, 3:33 p.m. UTC | #3
With the extra debugging removed:

Reviewed-by: Christoph Hellwig <hch@lst.de>
  
Keith Busch Dec. 7, 2023, 4:44 p.m. UTC | #4
On Thu, Dec 07, 2023 at 01:36:23PM +0100, Daniel Wagner wrote:
> @@ -3418,6 +3419,7 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
>  	head->ns_id = info->nsid;
>  	head->ids = info->ids;
>  	head->shared = info->is_shared;
> +	ratelimit_state_init(&head->rs_nuse, 5 * HZ, 1);
>  	kref_init(&head->ref);

I think we need to add:

	ratelimit_set_flags(&head->rs_nuse, RATELIMIT_MSG_ON_RELEASE);	

So that we don't get periodic messages like:

	[   60.469730] ns_head_update_nuse: 39 callbacks suppressed
	[  159.532901] ns_head_update_nuse: 1999 callbacks suppressed
  
Daniel Wagner Dec. 8, 2023, 9:10 a.m. UTC | #5
Hi Christoph,

On Thu, Dec 07, 2023 at 04:31:38PM +0100, Christoph Hellwig wrote:
> this looks generally good to me.  A few comments, all but one are
> cosmetic.
> 
> > @@ -1676,9 +1678,9 @@ static void nvme_init_integrity(struct gendisk *disk, struct nvme_ns *ns,
> 
> This an now take the ns_head instead of the ns.

Okay

> > @@ -1776,11 +1778,11 @@ static int nvme_init_ms(struct nvme_ns *ns, struct nvme_id_ns *id)
> 
> This can take a ctrl and ns_head instead of the ns.

Okay

> > @@ -1835,8 +1837,8 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id)
> 
> This as well after updating nvme_ns_has_pi to take the ns_head.

Okay

> > @@ -1898,7 +1900,7 @@ static void nvme_update_disk_info(struct gendisk *disk,
> >  		struct nvme_ns *ns, struct nvme_id_ns *id)
> 
> This as well after fixing up nvme_lba_to_sect to take the ns_head.

Okay

> 
> > @@ -2052,7 +2055,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
> 
> This one as well.

When trying to refactor this function I run into a bit of trouble with
dependencies. Basically, we would need to pass through nvme_ctrl,
request_queue, gendisk and nvme_ns_head and still need nvme_ns in
nvme_update_zone_info:

nvme_update_ns_info_block
  blk_mq_freeze_queue(ns->disk->queue)
  nvme_set_chuck_sector(ns->queue)
  nvme_update_zone_info()
    set_bit(NVME_NS_FORCE_RO, ns->flags)
    blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);

I suggest we don't refactor this part now. At least it I don't see an
good way to avoid passing down nvme_ns as we still need this data
structure in deeper down the call chain.

> > @@ -128,7 +129,7 @@ static void *nvme_zns_alloc_report_buffer(struct nvme_ns *ns,
> >  				   sizeof(struct nvme_zone_descriptor);
> >  
> >  	nr_zones = min_t(unsigned int, nr_zones,
> > -			 get_capacity(ns->disk) >> ilog2(ns->zsze));
> > +			 get_capacity(ns->head->disk) >> ilog2(ns->head->zsze));
> 
> This doesn't work, as the head disk doesn't exist for !multipath
> setups.

I was a bit trigger happy here. Changed it back to 'get_capacity(ns->disk)'.

> > @@ -162,7 +163,7 @@ static int nvme_zone_parse_entry(struct nvme_ns *ns,
> 
> This could do with ctrl + ns_head now

Okay.

Thanks,
Daniel
  
Daniel Wagner Dec. 8, 2023, 9:10 a.m. UTC | #6
On Thu, Dec 07, 2023 at 09:44:58AM -0700, Keith Busch wrote:
> On Thu, Dec 07, 2023 at 01:36:23PM +0100, Daniel Wagner wrote:
> > @@ -3418,6 +3419,7 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
> >  	head->ns_id = info->nsid;
> >  	head->ids = info->ids;
> >  	head->shared = info->is_shared;
> > +	ratelimit_state_init(&head->rs_nuse, 5 * HZ, 1);
> >  	kref_init(&head->ref);
> 
> I think we need to add:
> 
> 	ratelimit_set_flags(&head->rs_nuse, RATELIMIT_MSG_ON_RELEASE);	
> 
> So that we don't get periodic messages like:
> 
> 	[   60.469730] ns_head_update_nuse: 39 callbacks suppressed
> 	[  159.532901] ns_head_update_nuse: 1999 callbacks suppressed

Good idea. I'll add this.
  

Patch

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index c3270818fa0d..82c6faf424d6 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1454,7 +1454,7 @@  static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl,
 	return status;
 }
 
-static int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid,
+int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid,
 			struct nvme_id_ns **id)
 {
 	struct nvme_command c = { };
@@ -2056,6 +2056,7 @@  static int nvme_update_ns_info_block(struct nvme_ns *ns,
 	blk_mq_freeze_queue(ns->disk->queue);
 	lbaf = nvme_lbaf_index(id->flbas);
 	ns->head->lba_shift = id->lbaf[lbaf].ds;
+	ns->head->nuse = le64_to_cpu(id->nuse);
 	nvme_set_queue_limits(ns->ctrl, ns->queue);
 
 	ret = nvme_configure_metadata(ns, id);
@@ -3418,6 +3419,7 @@  static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
 	head->ns_id = info->nsid;
 	head->ids = info->ids;
 	head->shared = info->is_shared;
+	ratelimit_state_init(&head->rs_nuse, 5 * HZ, 1);
 	kref_init(&head->ref);
 
 	if (head->ids.csi) {
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 32ec7ca30d7c..2b31641a97b9 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -16,6 +16,7 @@ 
 #include <linux/rcupdate.h>
 #include <linux/wait.h>
 #include <linux/t10-pi.h>
+#include <linux/ratelimit_types.h>
 
 #include <trace/events/block.h>
 
@@ -456,6 +457,7 @@  struct nvme_ns_head {
 	u16			pi_size;
 	u16			sgs;
 	u32			sws;
+	u64			nuse;
 	u8			pi_type;
 	u8			guard_type;
 #ifdef CONFIG_BLK_DEV_ZONED
@@ -463,6 +465,8 @@  struct nvme_ns_head {
 #endif
 	unsigned long		features;
 
+	struct ratelimit_state	rs_nuse;
+
 	struct cdev		cdev;
 	struct device		cdev_device;
 
@@ -867,6 +871,8 @@  int nvme_ns_chr_uring_cmd(struct io_uring_cmd *ioucmd,
 		unsigned int issue_flags);
 int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd,
 		unsigned int issue_flags);
+int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid,
+		struct nvme_id_ns **id);
 int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo);
 int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags);
 
diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c
index d682d0a667a0..79e362e0ac47 100644
--- a/drivers/nvme/host/sysfs.c
+++ b/drivers/nvme/host/sysfs.c
@@ -114,12 +114,101 @@  static ssize_t nsid_show(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR_RO(nsid);
 
+static ssize_t csi_show(struct device *dev, struct device_attribute *attr,
+		char *buf)
+{
+	return sysfs_emit(buf, "%d\n", dev_to_ns_head(dev)->ids.csi);
+}
+static DEVICE_ATTR_RO(csi);
+
+static ssize_t metadata_bytes_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%d\n", dev_to_ns_head(dev)->ms);
+}
+static DEVICE_ATTR_RO(metadata_bytes);
+
+static int ns_head_update_nuse(struct nvme_ns_head *head)
+{
+	struct nvme_id_ns *id;
+	struct nvme_ns *ns;
+	int srcu_idx, ret = -EWOULDBLOCK;
+
+	/* Avoid issuing commands too often by rate limiting the update */
+	if (!__ratelimit(&head->rs_nuse))
+		return 0;
+
+	pr_info("%s: head %p\n", __func__, head);
+
+	srcu_idx = srcu_read_lock(&head->srcu);
+	ns = nvme_find_path(head);
+	if (!ns)
+		goto out_unlock;
+
+	ret = nvme_identify_ns(ns->ctrl, head->ns_id, &id);
+	if (ret)
+		goto out_unlock;
+
+	head->nuse = le64_to_cpu(id->nuse);
+	kfree(id);
+
+out_unlock:
+	srcu_read_unlock(&head->srcu, srcu_idx);
+	return ret;
+}
+
+static int ns_update_nuse(struct nvme_ns *ns)
+{
+	struct nvme_id_ns *id;
+	int ret;
+
+	/* Avoid issuing commands too often by rate limiting the update. */
+	if (!__ratelimit(&ns->head->rs_nuse))
+		return 0;
+
+	pr_info("%s: ns %p\n", __func__, ns);
+
+	ret = nvme_identify_ns(ns->ctrl, ns->head->ns_id, &id);
+	if (ret)
+		goto out_free_id;
+
+	ns->head->nuse = le64_to_cpu(id->nuse);
+
+out_free_id:
+	kfree(id);
+
+	return ret;
+}
+
+static ssize_t nuse_show(struct device *dev, struct device_attribute *attr,
+		char *buf)
+{
+	struct nvme_ns_head *head = dev_to_ns_head(dev);
+	struct gendisk *disk = dev_to_disk(dev);
+	struct block_device *bdev = disk->part0;
+	int ret;
+
+	if (IS_ENABLED(CONFIG_NVME_MULTIPATH) &&
+	    bdev->bd_disk->fops == &nvme_ns_head_ops)
+		ret = ns_head_update_nuse(head);
+	else
+		ret = ns_update_nuse(bdev->bd_disk->private_data);
+	if (ret)
+		return ret;
+
+	return sysfs_emit(buf, "%llu\n", head->nuse);
+}
+static DEVICE_ATTR_RO(nuse);
+
 static struct attribute *nvme_ns_attrs[] = {
 	&dev_attr_wwid.attr,
 	&dev_attr_uuid.attr,
 	&dev_attr_nguid.attr,
 	&dev_attr_eui.attr,
+	&dev_attr_csi.attr,
 	&dev_attr_nsid.attr,
+	&dev_attr_metadata_bytes.attr,
+	&dev_attr_nuse.attr,
 #ifdef CONFIG_NVME_MULTIPATH
 	&dev_attr_ana_grpid.attr,
 	&dev_attr_ana_state.attr,