[v4,3/4] nvme: add csi, ms and nuse to sysfs
Commit Message
libnvme is using the sysfs for enumarating the nvme resources. Though
there are few missing attritbutes in the sysfs. For these libnvme issues
commands during discovering.
As the kernel already knows all these attributes and we would like to
avoid libnvme to issue commands all the time, expose these missing
attributes.
The nuse value is updated on request because the nuse is a volatile
value. Since any user can read the sysfs attribute, a very simple rate
limit is added (update once every 5 seconds). A more sophisticated
update strategy can be added later if there is actually a need for it.
Signed-off-by: Daniel Wagner <dwagner@suse.de>
---
drivers/nvme/host/core.c | 4 +-
drivers/nvme/host/nvme.h | 6 +++
drivers/nvme/host/sysfs.c | 89 +++++++++++++++++++++++++++++++++++++++
3 files changed, 98 insertions(+), 1 deletion(-)
Comments
On Thu, Dec 07, 2023 at 01:36:23PM +0100, Daniel Wagner wrote:
> +static int ns_head_update_nuse(struct nvme_ns_head *head)
> +{
> + struct nvme_id_ns *id;
> + struct nvme_ns *ns;
> + int srcu_idx, ret = -EWOULDBLOCK;
> +
> + /* Avoid issuing commands too often by rate limiting the update */
> + if (!__ratelimit(&head->rs_nuse))
> + return 0;
> +
> + pr_info("%s: head %p\n", __func__, head);
Forgot to remove this debug print.
> +static int ns_update_nuse(struct nvme_ns *ns)
> +{
> + struct nvme_id_ns *id;
> + int ret;
> +
> + /* Avoid issuing commands too often by rate limiting the update. */
> + if (!__ratelimit(&ns->head->rs_nuse))
> + return 0;
> +
> + pr_info("%s: ns %p\n", __func__, ns);
ditto
Hi Daniel,
this looks generally good to me. A few comments, all but one are
cosmetic.
> @@ -1676,9 +1678,9 @@ static void nvme_init_integrity(struct gendisk *disk, struct nvme_ns *ns,
This an now take the ns_head instead of the ns.
> @@ -1776,11 +1778,11 @@ static int nvme_init_ms(struct nvme_ns *ns, struct nvme_id_ns *id)
This can take a ctrl and ns_head instead of the ns.
> @@ -1835,8 +1837,8 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id)
This as well after updating nvme_ns_has_pi to take the ns_head.
> @@ -1898,7 +1900,7 @@ static void nvme_update_disk_info(struct gendisk *disk,
> struct nvme_ns *ns, struct nvme_id_ns *id)
This as well after fixing up nvme_lba_to_sect to take the ns_head.
> @@ -2052,7 +2055,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
This one as well.
> @@ -128,7 +129,7 @@ static void *nvme_zns_alloc_report_buffer(struct nvme_ns *ns,
> sizeof(struct nvme_zone_descriptor);
>
> nr_zones = min_t(unsigned int, nr_zones,
> - get_capacity(ns->disk) >> ilog2(ns->zsze));
> + get_capacity(ns->head->disk) >> ilog2(ns->head->zsze));
This doesn't work, as the head disk doesn't exist for !multipath setups.
> @@ -162,7 +163,7 @@ static int nvme_zone_parse_entry(struct nvme_ns *ns,
This could do with ctrl + ns_head now
With the extra debugging removed:
Reviewed-by: Christoph Hellwig <hch@lst.de>
On Thu, Dec 07, 2023 at 01:36:23PM +0100, Daniel Wagner wrote:
> @@ -3418,6 +3419,7 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
> head->ns_id = info->nsid;
> head->ids = info->ids;
> head->shared = info->is_shared;
> + ratelimit_state_init(&head->rs_nuse, 5 * HZ, 1);
> kref_init(&head->ref);
I think we need to add:
ratelimit_set_flags(&head->rs_nuse, RATELIMIT_MSG_ON_RELEASE);
So that we don't get periodic messages like:
[ 60.469730] ns_head_update_nuse: 39 callbacks suppressed
[ 159.532901] ns_head_update_nuse: 1999 callbacks suppressed
Hi Christoph,
On Thu, Dec 07, 2023 at 04:31:38PM +0100, Christoph Hellwig wrote:
> this looks generally good to me. A few comments, all but one are
> cosmetic.
>
> > @@ -1676,9 +1678,9 @@ static void nvme_init_integrity(struct gendisk *disk, struct nvme_ns *ns,
>
> This an now take the ns_head instead of the ns.
Okay
> > @@ -1776,11 +1778,11 @@ static int nvme_init_ms(struct nvme_ns *ns, struct nvme_id_ns *id)
>
> This can take a ctrl and ns_head instead of the ns.
Okay
> > @@ -1835,8 +1837,8 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id)
>
> This as well after updating nvme_ns_has_pi to take the ns_head.
Okay
> > @@ -1898,7 +1900,7 @@ static void nvme_update_disk_info(struct gendisk *disk,
> > struct nvme_ns *ns, struct nvme_id_ns *id)
>
> This as well after fixing up nvme_lba_to_sect to take the ns_head.
Okay
>
> > @@ -2052,7 +2055,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
>
> This one as well.
When trying to refactor this function I run into a bit of trouble with
dependencies. Basically, we would need to pass through nvme_ctrl,
request_queue, gendisk and nvme_ns_head and still need nvme_ns in
nvme_update_zone_info:
nvme_update_ns_info_block
blk_mq_freeze_queue(ns->disk->queue)
nvme_set_chuck_sector(ns->queue)
nvme_update_zone_info()
set_bit(NVME_NS_FORCE_RO, ns->flags)
blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
I suggest we don't refactor this part now. At least it I don't see an
good way to avoid passing down nvme_ns as we still need this data
structure in deeper down the call chain.
> > @@ -128,7 +129,7 @@ static void *nvme_zns_alloc_report_buffer(struct nvme_ns *ns,
> > sizeof(struct nvme_zone_descriptor);
> >
> > nr_zones = min_t(unsigned int, nr_zones,
> > - get_capacity(ns->disk) >> ilog2(ns->zsze));
> > + get_capacity(ns->head->disk) >> ilog2(ns->head->zsze));
>
> This doesn't work, as the head disk doesn't exist for !multipath
> setups.
I was a bit trigger happy here. Changed it back to 'get_capacity(ns->disk)'.
> > @@ -162,7 +163,7 @@ static int nvme_zone_parse_entry(struct nvme_ns *ns,
>
> This could do with ctrl + ns_head now
Okay.
Thanks,
Daniel
On Thu, Dec 07, 2023 at 09:44:58AM -0700, Keith Busch wrote:
> On Thu, Dec 07, 2023 at 01:36:23PM +0100, Daniel Wagner wrote:
> > @@ -3418,6 +3419,7 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
> > head->ns_id = info->nsid;
> > head->ids = info->ids;
> > head->shared = info->is_shared;
> > + ratelimit_state_init(&head->rs_nuse, 5 * HZ, 1);
> > kref_init(&head->ref);
>
> I think we need to add:
>
> ratelimit_set_flags(&head->rs_nuse, RATELIMIT_MSG_ON_RELEASE);
>
> So that we don't get periodic messages like:
>
> [ 60.469730] ns_head_update_nuse: 39 callbacks suppressed
> [ 159.532901] ns_head_update_nuse: 1999 callbacks suppressed
Good idea. I'll add this.
@@ -1454,7 +1454,7 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl,
return status;
}
-static int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid,
+int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid,
struct nvme_id_ns **id)
{
struct nvme_command c = { };
@@ -2056,6 +2056,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
blk_mq_freeze_queue(ns->disk->queue);
lbaf = nvme_lbaf_index(id->flbas);
ns->head->lba_shift = id->lbaf[lbaf].ds;
+ ns->head->nuse = le64_to_cpu(id->nuse);
nvme_set_queue_limits(ns->ctrl, ns->queue);
ret = nvme_configure_metadata(ns, id);
@@ -3418,6 +3419,7 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
head->ns_id = info->nsid;
head->ids = info->ids;
head->shared = info->is_shared;
+ ratelimit_state_init(&head->rs_nuse, 5 * HZ, 1);
kref_init(&head->ref);
if (head->ids.csi) {
@@ -16,6 +16,7 @@
#include <linux/rcupdate.h>
#include <linux/wait.h>
#include <linux/t10-pi.h>
+#include <linux/ratelimit_types.h>
#include <trace/events/block.h>
@@ -456,6 +457,7 @@ struct nvme_ns_head {
u16 pi_size;
u16 sgs;
u32 sws;
+ u64 nuse;
u8 pi_type;
u8 guard_type;
#ifdef CONFIG_BLK_DEV_ZONED
@@ -463,6 +465,8 @@ struct nvme_ns_head {
#endif
unsigned long features;
+ struct ratelimit_state rs_nuse;
+
struct cdev cdev;
struct device cdev_device;
@@ -867,6 +871,8 @@ int nvme_ns_chr_uring_cmd(struct io_uring_cmd *ioucmd,
unsigned int issue_flags);
int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd,
unsigned int issue_flags);
+int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid,
+ struct nvme_id_ns **id);
int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo);
int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags);
@@ -114,12 +114,101 @@ static ssize_t nsid_show(struct device *dev, struct device_attribute *attr,
}
static DEVICE_ATTR_RO(nsid);
+static ssize_t csi_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ return sysfs_emit(buf, "%d\n", dev_to_ns_head(dev)->ids.csi);
+}
+static DEVICE_ATTR_RO(csi);
+
+static ssize_t metadata_bytes_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", dev_to_ns_head(dev)->ms);
+}
+static DEVICE_ATTR_RO(metadata_bytes);
+
+static int ns_head_update_nuse(struct nvme_ns_head *head)
+{
+ struct nvme_id_ns *id;
+ struct nvme_ns *ns;
+ int srcu_idx, ret = -EWOULDBLOCK;
+
+ /* Avoid issuing commands too often by rate limiting the update */
+ if (!__ratelimit(&head->rs_nuse))
+ return 0;
+
+ pr_info("%s: head %p\n", __func__, head);
+
+ srcu_idx = srcu_read_lock(&head->srcu);
+ ns = nvme_find_path(head);
+ if (!ns)
+ goto out_unlock;
+
+ ret = nvme_identify_ns(ns->ctrl, head->ns_id, &id);
+ if (ret)
+ goto out_unlock;
+
+ head->nuse = le64_to_cpu(id->nuse);
+ kfree(id);
+
+out_unlock:
+ srcu_read_unlock(&head->srcu, srcu_idx);
+ return ret;
+}
+
+static int ns_update_nuse(struct nvme_ns *ns)
+{
+ struct nvme_id_ns *id;
+ int ret;
+
+ /* Avoid issuing commands too often by rate limiting the update. */
+ if (!__ratelimit(&ns->head->rs_nuse))
+ return 0;
+
+ pr_info("%s: ns %p\n", __func__, ns);
+
+ ret = nvme_identify_ns(ns->ctrl, ns->head->ns_id, &id);
+ if (ret)
+ goto out_free_id;
+
+ ns->head->nuse = le64_to_cpu(id->nuse);
+
+out_free_id:
+ kfree(id);
+
+ return ret;
+}
+
+static ssize_t nuse_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct nvme_ns_head *head = dev_to_ns_head(dev);
+ struct gendisk *disk = dev_to_disk(dev);
+ struct block_device *bdev = disk->part0;
+ int ret;
+
+ if (IS_ENABLED(CONFIG_NVME_MULTIPATH) &&
+ bdev->bd_disk->fops == &nvme_ns_head_ops)
+ ret = ns_head_update_nuse(head);
+ else
+ ret = ns_update_nuse(bdev->bd_disk->private_data);
+ if (ret)
+ return ret;
+
+ return sysfs_emit(buf, "%llu\n", head->nuse);
+}
+static DEVICE_ATTR_RO(nuse);
+
static struct attribute *nvme_ns_attrs[] = {
&dev_attr_wwid.attr,
&dev_attr_uuid.attr,
&dev_attr_nguid.attr,
&dev_attr_eui.attr,
+ &dev_attr_csi.attr,
&dev_attr_nsid.attr,
+ &dev_attr_metadata_bytes.attr,
+ &dev_attr_nuse.attr,
#ifdef CONFIG_NVME_MULTIPATH
&dev_attr_ana_grpid.attr,
&dev_attr_ana_state.attr,