[v3] block: ublk: enable zoned storage support
Commit Message
From: Andreas Hindborg <a.hindborg@samsung.com>
Add zoned storage support to ublk: report_zones and operations:
- REQ_OP_ZONE_OPEN
- REQ_OP_ZONE_CLOSE
- REQ_OP_ZONE_FINISH
- REQ_OP_ZONE_RESET
This allows implementation of zoned storage devices in user space. An
example user space implementation based on ubdsrv is available [1].
[1] https://github.com/metaspace/ubdsrv/commit/2c60b9ffdfb7cb44c5cce0272bb52229ea6c95d4
Signed-off-by: Andreas Hindborg <a.hindborg@samsung.com>
---
Changes since v2:
- Allow reporting of multiple zones in a single request
- Move zoned functions to separate translation unit
- Do not overload REQ_OP_DRV_IN for zone report
- Reserve space in ublk_param_zoned for future parameters
- Do not overload (struct request).__sector and .__data_len
- Remove unnecessary conditional compilation
- Remove goto statements in ublk_report_zones
v1: https://lore.kernel.org/linux-block/20230224125950.214779-1-nmi@metaspace.dk/
v2: https://lore.kernel.org/all/20230224200502.391570-1-nmi@metaspace.dk/
drivers/block/Kconfig | 4 +
drivers/block/Makefile | 1 +
drivers/block/ublk_drv-zoned.c | 142 +++++++++++++++++++++++++++++++++
drivers/block/ublk_drv.c | 113 ++++++++++++++------------
drivers/block/ublk_drv.h | 69 ++++++++++++++++
include/uapi/linux/ublk_cmd.h | 37 +++++++--
6 files changed, 308 insertions(+), 58 deletions(-)
create mode 100644 drivers/block/ublk_drv-zoned.c
create mode 100644 drivers/block/ublk_drv.h
base-commit: eeac8ede17557680855031c6f305ece2378af326
Comments
On Thu, Mar 16, 2023 at 03:55:38PM +0100, Andreas Hindborg wrote:
> From: Andreas Hindborg <a.hindborg@samsung.com>
>
> Add zoned storage support to ublk: report_zones and operations:
> - REQ_OP_ZONE_OPEN
> - REQ_OP_ZONE_CLOSE
> - REQ_OP_ZONE_FINISH
> - REQ_OP_ZONE_RESET
Without ZONE_APPEND support this is incomplete and broken.
Hi Andreas,
Thank you for the patch! Yet something to improve:
[auto build test ERROR on eeac8ede17557680855031c6f305ece2378af326]
url: https://github.com/intel-lab-lkp/linux/commits/Andreas-Hindborg/block-ublk-enable-zoned-storage-support/20230316-225725
base: eeac8ede17557680855031c6f305ece2378af326
patch link: https://lore.kernel.org/r/20230316145539.300523-1-nmi%40metaspace.dk
patch subject: [PATCH v3] block: ublk: enable zoned storage support
config: sh-allmodconfig (https://download.01.org/0day-ci/archive/20230317/202303170805.9OMzCcn7-lkp@intel.com/config)
compiler: sh4-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/intel-lab-lkp/linux/commit/723d5c2508e09f127226d38c698d3e4e6cff83f1
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Andreas-Hindborg/block-ublk-enable-zoned-storage-support/20230316-225725
git checkout 723d5c2508e09f127226d38c698d3e4e6cff83f1
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=sh olddefconfig
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=sh SHELL=/bin/bash drivers/block/
If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Link: https://lore.kernel.org/oe-kbuild-all/202303170805.9OMzCcn7-lkp@intel.com/
All error/warnings (new ones prefixed by >>):
drivers/block/ublk_drv-zoned.c: In function 'ublk_alloc_report_buffer':
>> drivers/block/ublk_drv-zoned.c:50:23: error: implicit declaration of function '__vmalloc'; did you mean '__kmalloc'? [-Werror=implicit-function-declaration]
50 | buf = __vmalloc(bufsize, GFP_KERNEL | __GFP_NORETRY);
| ^~~~~~~~~
| __kmalloc
>> drivers/block/ublk_drv-zoned.c:50:21: warning: assignment to 'void *' from 'int' makes pointer from integer without a cast [-Wint-conversion]
50 | buf = __vmalloc(bufsize, GFP_KERNEL | __GFP_NORETRY);
| ^
cc1: some warnings being treated as errors
vim +50 drivers/block/ublk_drv-zoned.c
31
32 // Based on virtblk_alloc_report_buffer
33 static void *ublk_alloc_report_buffer(struct ublk_device *ublk,
34 unsigned int nr_zones,
35 unsigned int zone_sectors, size_t *buflen)
36 {
37 struct request_queue *q = ublk->ub_disk->queue;
38 size_t bufsize;
39 void *buf;
40
41 nr_zones = min_t(unsigned int, nr_zones,
42 get_capacity(ublk->ub_disk) >> ilog2(zone_sectors));
43
44 bufsize = nr_zones * sizeof(struct blk_zone);
45 bufsize =
46 min_t(size_t, bufsize, queue_max_hw_sectors(q) << SECTOR_SHIFT);
47 bufsize = min_t(size_t, bufsize, queue_max_segments(q) << PAGE_SHIFT);
48
49 while (bufsize >= sizeof(struct blk_zone)) {
> 50 buf = __vmalloc(bufsize, GFP_KERNEL | __GFP_NORETRY);
51 if (buf) {
52 *buflen = bufsize;
53 return buf;
54 }
55 bufsize >>= 1;
56 }
57
58 bufsize = 0;
59 return NULL;
60 }
61
Hi Andreas,
Thank you for the patch! Yet something to improve:
[auto build test ERROR on eeac8ede17557680855031c6f305ece2378af326]
url: https://github.com/intel-lab-lkp/linux/commits/Andreas-Hindborg/block-ublk-enable-zoned-storage-support/20230316-225725
base: eeac8ede17557680855031c6f305ece2378af326
patch link: https://lore.kernel.org/r/20230316145539.300523-1-nmi%40metaspace.dk
patch subject: [PATCH v3] block: ublk: enable zoned storage support
config: s390-allmodconfig (https://download.01.org/0day-ci/archive/20230317/202303170851.rY3VPud2-lkp@intel.com/config)
compiler: s390-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/intel-lab-lkp/linux/commit/723d5c2508e09f127226d38c698d3e4e6cff83f1
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Andreas-Hindborg/block-ublk-enable-zoned-storage-support/20230316-225725
git checkout 723d5c2508e09f127226d38c698d3e4e6cff83f1
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=s390 olddefconfig
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=s390 SHELL=/bin/bash
If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Link: https://lore.kernel.org/oe-kbuild-all/202303170851.rY3VPud2-lkp@intel.com/
All errors (new ones prefixed by >>, old ones prefixed by <<):
ERROR: modpost: "devm_ioremap_resource" [drivers/dma/qcom/hdma.ko] undefined!
ERROR: modpost: "devm_platform_ioremap_resource" [drivers/dma/fsl-edma.ko] undefined!
ERROR: modpost: "devm_platform_ioremap_resource" [drivers/dma/idma64.ko] undefined!
ERROR: modpost: "iounmap" [drivers/tty/ipwireless/ipwireless.ko] undefined!
ERROR: modpost: "ioremap" [drivers/tty/ipwireless/ipwireless.ko] undefined!
ERROR: modpost: "devm_platform_ioremap_resource" [drivers/char/xillybus/xillybus_of.ko] undefined!
>> ERROR: modpost: "ublk_set_nr_zones" [drivers/block/ublk_drv.ko] undefined!
>> ERROR: modpost: "ublk_dev_param_zoned_apply" [drivers/block/ublk_drv.ko] undefined!
>> ERROR: modpost: "ublk_revalidate_disk_zones" [drivers/block/ublk_drv.ko] undefined!
>> ERROR: modpost: "ublk_report_zones" [drivers/block/ublk_drv.ko] undefined!
WARNING: modpost: suppressed 19 unresolved symbol warnings because there were too many)
Niklas Cassel <Niklas.Cassel@wdc.com> writes:
> On Thu, Mar 16, 2023 at 03:55:38PM +0100, Andreas Hindborg wrote:
>> From: Andreas Hindborg <a.hindborg@samsung.com>
>
> Hello Andreas,
>
>
> I think that this patch is starting to look very nice!
Thanks!
>
>
<snip>
>> +
>> +int ublk_report_zones(struct gendisk *disk, sector_t sector,
>> + unsigned int nr_zones, report_zones_cb cb, void *data)
>> +{
>> + unsigned int done_zones = 0;
>> + struct ublk_device *ub = disk->private_data;
>> + unsigned int zone_size_sectors = disk->queue->limits.chunk_sectors;
>> + unsigned int first_zone = sector >> ilog2(zone_size_sectors);
>> + struct blk_zone *buffer;
>> + size_t buffer_length;
>> + unsigned int max_zones_per_request;
>
> Nit: I would sort the variables differently.
>
> Perhaps:
>> + struct ublk_device *ub = disk->private_data;
>> + unsigned int zone_size_sectors = disk->queue->limits.chunk_sectors;
>> + unsigned int first_zone = sector >> ilog2(zone_size_sectors);
>> + unsigned int done_zones = 0;
>> + unsigned int max_zones_per_request;
>> + struct blk_zone *buffer;
>> + size_t buffer_length;
>
Can I ask what is the reasoning behind this? I think they way you
propose looks better, but are there any rules one can follow for this?
Best regards
Andreas
> -----Original Message-----
> From: Andreas Hindborg (Samsung) <nmi@metaspace.dk>
> Sent: Wednesday, 28 June 2023 13.52
> To: Niklas Cassel <Niklas.Cassel@wdc.com>
> Cc: linux-block@vger.kernel.org; Hans Holmberg <Hans.Holmberg@wdc.com>;
> Matias Bjørling <Matias.Bjorling@wdc.com>; Minwoo Im
> <minwoo.im.dev@gmail.com>; Damien Le Moal
> <damien.lemoal@opensource.wdc.com>; Jens Axboe <axboe@kernel.dk>;
> Ming Lei <ming.lei@redhat.com>; open list <linux-kernel@vger.kernel.org>
> Subject: Re: [PATCH v3] block: ublk: enable zoned storage support
>
>
> Niklas Cassel <Niklas.Cassel@wdc.com> writes:
>
> > On Thu, Mar 16, 2023 at 03:55:38PM +0100, Andreas Hindborg wrote:
> >> From: Andreas Hindborg <a.hindborg@samsung.com>
> >
> > Hello Andreas,
> >
> >
> > I think that this patch is starting to look very nice!
>
> Thanks!
>
> >
> >
> <snip>
> >> +
> >> +int ublk_report_zones(struct gendisk *disk, sector_t sector,
> >> + unsigned int nr_zones, report_zones_cb cb, void *data) {
> >> + unsigned int done_zones = 0;
> >> + struct ublk_device *ub = disk->private_data;
> >> + unsigned int zone_size_sectors = disk->queue->limits.chunk_sectors;
> >> + unsigned int first_zone = sector >> ilog2(zone_size_sectors);
> >> + struct blk_zone *buffer;
> >> + size_t buffer_length;
> >> + unsigned int max_zones_per_request;
> >
> > Nit: I would sort the variables differently.
> >
> > Perhaps:
> >> + struct ublk_device *ub = disk->private_data;
> >> + unsigned int zone_size_sectors = disk->queue->limits.chunk_sectors;
> >> + unsigned int first_zone = sector >> ilog2(zone_size_sectors);
> >> + unsigned int done_zones = 0;
> >> + unsigned int max_zones_per_request;
> >> + struct blk_zone *buffer;
> >> + size_t buffer_length;
> >
>
> Can I ask what is the reasoning behind this? I think they way you propose looks
> better, but are there any rules one can follow for this?
There aren't many hard rules to my knowledge. One rule of thumb, which I use, is to group variables of the same data type. Another one, I personally prefer having the complex data structures first, followed by the simple data types at the end. However, it isn't that easy, as I usually also take the code flow of the function into account, such that the data structures that are used together are also declared together and follows the flow of the code (if it is getting too complex, its probably because the function needs to be split). Finally, it always helps keeping things as simple as possible.
Other suggestions might be to look at existing code and get a feel for how other structures it. I've found Code Complete by Steve McConnell a classic on maintaining good code style. Obviously, each advice in the book should be weighted towards how its typically done in the kernel.
>
> Best regards
> Andreas
@@ -385,6 +385,10 @@ config BLK_DEV_UBLK
can handle batch more effectively, but task_work_add() isn't exported
for module, so ublk has to be built to kernel.
+config BLK_DEV_UBLK_ZONED
+ def_bool y
+ depends on BLK_DEV_UBLK && BLK_DEV_ZONED
+
source "drivers/block/rnbd/Kconfig"
endif # BLK_DEV
@@ -38,5 +38,6 @@ obj-$(CONFIG_BLK_DEV_RNBD) += rnbd/
obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk/
obj-$(CONFIG_BLK_DEV_UBLK) += ublk_drv.o
+obj-$(CONFIG_BLK_DEV_UBLK_ZONED) += ublk_drv-zoned.o
swim_mod-y := swim.o swim_asm.o
new file mode 100644
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2023 Andreas Hindborg <a.hindborg@samsung.com>
+ */
+#include <linux/blkzoned.h>
+#include <linux/ublk_cmd.h>
+#include "ublk_drv.h"
+
+void ublk_set_nr_zones(struct ublk_device *ub)
+{
+ const struct ublk_param_basic *p = &ub->params.basic;
+
+ if (ub->dev_info.flags & UBLK_F_ZONED && p->chunk_sectors)
+ ub->ub_disk->nr_zones = p->dev_sectors / p->chunk_sectors;
+}
+
+void ublk_dev_param_zoned_apply(struct ublk_device *ub)
+{
+ const struct ublk_param_zoned *p = &ub->params.zoned;
+
+ if (ub->dev_info.flags & UBLK_F_ZONED) {
+ disk_set_max_active_zones(ub->ub_disk, p->max_active_zones);
+ disk_set_max_open_zones(ub->ub_disk, p->max_open_zones);
+ }
+}
+
+int ublk_revalidate_disk_zones(struct gendisk *disk)
+{
+ return blk_revalidate_disk_zones(disk, NULL);
+}
+
+// Based on virtblk_alloc_report_buffer
+static void *ublk_alloc_report_buffer(struct ublk_device *ublk,
+ unsigned int nr_zones,
+ unsigned int zone_sectors, size_t *buflen)
+{
+ struct request_queue *q = ublk->ub_disk->queue;
+ size_t bufsize;
+ void *buf;
+
+ nr_zones = min_t(unsigned int, nr_zones,
+ get_capacity(ublk->ub_disk) >> ilog2(zone_sectors));
+
+ bufsize = nr_zones * sizeof(struct blk_zone);
+ bufsize =
+ min_t(size_t, bufsize, queue_max_hw_sectors(q) << SECTOR_SHIFT);
+ bufsize = min_t(size_t, bufsize, queue_max_segments(q) << PAGE_SHIFT);
+
+ while (bufsize >= sizeof(struct blk_zone)) {
+ buf = __vmalloc(bufsize, GFP_KERNEL | __GFP_NORETRY);
+ if (buf) {
+ *buflen = bufsize;
+ return buf;
+ }
+ bufsize >>= 1;
+ }
+
+ bufsize = 0;
+ return NULL;
+}
+
+int ublk_report_zones(struct gendisk *disk, sector_t sector,
+ unsigned int nr_zones, report_zones_cb cb, void *data)
+{
+ unsigned int done_zones = 0;
+ struct ublk_device *ub = disk->private_data;
+ unsigned int zone_size_sectors = disk->queue->limits.chunk_sectors;
+ unsigned int first_zone = sector >> ilog2(zone_size_sectors);
+ struct blk_zone *buffer;
+ size_t buffer_length;
+ unsigned int max_zones_per_request;
+
+ if (!(ub->dev_info.flags & UBLK_F_ZONED))
+ return -EOPNOTSUPP;
+
+ nr_zones = min_t(unsigned int, ub->ub_disk->nr_zones - first_zone,
+ nr_zones);
+
+ buffer = ublk_alloc_report_buffer(ub, nr_zones, zone_size_sectors,
+ &buffer_length);
+ if (!buffer)
+ return -ENOMEM;
+
+ max_zones_per_request = buffer_length / sizeof(struct blk_zone);
+
+ while (done_zones < nr_zones) {
+ unsigned int remaining_zones = nr_zones - done_zones;
+ unsigned int zones_in_request = min_t(
+ unsigned int, remaining_zones, max_zones_per_request);
+ int err = 0;
+ struct request *req;
+ struct ublk_rq_data *pdu;
+ blk_status_t status;
+
+ memset(buffer, 0, buffer_length);
+
+ req = blk_mq_alloc_request(disk->queue, REQ_OP_DRV_IN, 0);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ pdu = blk_mq_rq_to_pdu(req);
+ pdu->operation = UBLK_IO_OP_REPORT_ZONES;
+ pdu->sector = sector;
+ pdu->nr_sectors = remaining_zones * zone_size_sectors;
+
+ err = blk_rq_map_kern(disk->queue, req, buffer, buffer_length,
+ GFP_KERNEL);
+ if (err) {
+ blk_mq_free_request(req);
+ kvfree(buffer);
+ return err;
+ }
+
+ status = blk_execute_rq(req, 0);
+ err = blk_status_to_errno(status);
+ blk_mq_free_request(req);
+ if (err) {
+ kvfree(buffer);
+ return err;
+ }
+
+ for (unsigned int i = 0; i < zones_in_request; i++) {
+ struct blk_zone *zone = buffer + i;
+
+ err = cb(zone, i, data);
+ if (err)
+ return err;
+
+ done_zones++;
+ sector += zone_size_sectors;
+
+ /* A zero length zone means don't ask for more zones */
+ if (!zone->len) {
+ kvfree(buffer);
+ return done_zones;
+ }
+ }
+ }
+
+ kvfree(buffer);
+ return done_zones;
+}
@@ -44,6 +44,7 @@
#include <linux/task_work.h>
#include <linux/namei.h>
#include <uapi/linux/ublk_cmd.h>
+#include "ublk_drv.h"
#define UBLK_MINORS (1U << MINORBITS)
@@ -53,16 +54,13 @@
| UBLK_F_NEED_GET_DATA \
| UBLK_F_USER_RECOVERY \
| UBLK_F_USER_RECOVERY_REISSUE \
- | UBLK_F_UNPRIVILEGED_DEV)
+ | UBLK_F_UNPRIVILEGED_DEV \
+ | UBLK_F_ZONED)
/* All UBLK_PARAM_TYPE_* should be included here */
-#define UBLK_PARAM_TYPE_ALL (UBLK_PARAM_TYPE_BASIC | \
- UBLK_PARAM_TYPE_DISCARD | UBLK_PARAM_TYPE_DEVT)
-
-struct ublk_rq_data {
- struct llist_node node;
- struct callback_head work;
-};
+#define UBLK_PARAM_TYPE_ALL \
+ (UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD | \
+ UBLK_PARAM_TYPE_DEVT | UBLK_PARAM_TYPE_ZONED)
struct ublk_uring_cmd_pdu {
struct ublk_queue *ubq;
@@ -135,45 +133,6 @@ struct ublk_queue {
#define UBLK_DAEMON_MONITOR_PERIOD (5 * HZ)
-struct ublk_device {
- struct gendisk *ub_disk;
-
- char *__queues;
-
- unsigned int queue_size;
- struct ublksrv_ctrl_dev_info dev_info;
-
- struct blk_mq_tag_set tag_set;
-
- struct cdev cdev;
- struct device cdev_dev;
-
-#define UB_STATE_OPEN 0
-#define UB_STATE_USED 1
-#define UB_STATE_DELETED 2
- unsigned long state;
- int ub_number;
-
- struct mutex mutex;
-
- spinlock_t mm_lock;
- struct mm_struct *mm;
-
- struct ublk_params params;
-
- struct completion completion;
- unsigned int nr_queues_ready;
- unsigned int nr_privileged_daemon;
-
- /*
- * Our ubq->daemon may be killed without any notification, so
- * monitor each queue's daemon periodically
- */
- struct delayed_work monitor_work;
- struct work_struct quiesce_work;
- struct work_struct stop_work;
-};
-
/* header of ublk_params */
struct ublk_params_header {
__u32 len;
@@ -225,6 +184,9 @@ static void ublk_dev_param_basic_apply(struct ublk_device *ub)
set_disk_ro(ub->ub_disk, true);
set_capacity(ub->ub_disk, p->dev_sectors);
+
+ if (IS_ENABLED(CONFIG_BLK_DEV_ZONED))
+ ublk_set_nr_zones(ub);
}
static void ublk_dev_param_discard_apply(struct ublk_device *ub)
@@ -285,6 +247,9 @@ static int ublk_apply_params(struct ublk_device *ub)
if (ub->params.types & UBLK_PARAM_TYPE_DISCARD)
ublk_dev_param_discard_apply(ub);
+ if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && (ub->params.types & UBLK_PARAM_TYPE_ZONED))
+ ublk_dev_param_zoned_apply(ub);
+
return 0;
}
@@ -420,9 +385,10 @@ static int ublk_open(struct block_device *bdev, fmode_t mode)
}
static const struct block_device_operations ub_fops = {
- .owner = THIS_MODULE,
- .open = ublk_open,
- .free_disk = ublk_free_disk,
+ .owner = THIS_MODULE,
+ .open = ublk_open,
+ .free_disk = ublk_free_disk,
+ .report_zones = ublk_report_zones,
};
#define UBLK_MAX_PIN_PAGES 32
@@ -558,7 +524,7 @@ static int ublk_unmap_io(const struct ublk_queue *ubq,
{
const unsigned int rq_bytes = blk_rq_bytes(req);
- if (req_op(req) == REQ_OP_READ && ublk_rq_has_data(req)) {
+ if ((req_op(req) == REQ_OP_READ || req_op(req) == REQ_OP_DRV_IN) && ublk_rq_has_data(req)) {
struct ublk_map_data data = {
.ubq = ubq,
.rq = req,
@@ -607,6 +573,7 @@ static blk_status_t ublk_setup_iod(struct ublk_queue *ubq, struct request *req)
{
struct ublksrv_io_desc *iod = ublk_get_iod(ubq, req->tag);
struct ublk_io *io = &ubq->ios[req->tag];
+ struct ublk_rq_data *pdu = blk_mq_rq_to_pdu(req);
u32 ublk_op;
switch (req_op(req)) {
@@ -625,6 +592,35 @@ static blk_status_t ublk_setup_iod(struct ublk_queue *ubq, struct request *req)
case REQ_OP_WRITE_ZEROES:
ublk_op = UBLK_IO_OP_WRITE_ZEROES;
break;
+ case REQ_OP_ZONE_OPEN:
+ ublk_op = UBLK_IO_OP_ZONE_OPEN;
+ break;
+ case REQ_OP_ZONE_CLOSE:
+ ublk_op = UBLK_IO_OP_ZONE_CLOSE;
+ break;
+ case REQ_OP_ZONE_FINISH:
+ ublk_op = UBLK_IO_OP_ZONE_FINISH;
+ break;
+ case REQ_OP_ZONE_RESET:
+ ublk_op = UBLK_IO_OP_ZONE_RESET;
+ break;
+ case REQ_OP_DRV_IN:
+ case REQ_OP_DRV_OUT:
+ ublk_op = pdu->operation;
+ switch (ublk_op) {
+ case UBLK_IO_OP_REPORT_ZONES:
+ iod->op_flags = ublk_op | ublk_req_build_flags(req);
+ iod->nr_sectors = pdu->nr_sectors;
+ iod->start_sector = pdu->sector;
+ iod->addr = io->addr;
+ return BLK_STS_OK;
+ default:
+ return BLK_STS_IOERR;
+ }
+ case REQ_OP_ZONE_APPEND:
+ case REQ_OP_ZONE_RESET_ALL:
+ /* We do not support zone append or reset_all yet */
+ fallthrough;
default:
return BLK_STS_IOERR;
}
@@ -671,7 +667,8 @@ static void ublk_complete_rq(struct request *req)
*
* Both the two needn't unmap.
*/
- if (req_op(req) != REQ_OP_READ && req_op(req) != REQ_OP_WRITE) {
+ if (req_op(req) != REQ_OP_READ && req_op(req) != REQ_OP_WRITE &&
+ req_op(req) != REQ_OP_DRV_IN) {
blk_mq_end_request(req, BLK_STS_OK);
return;
}
@@ -1601,6 +1598,15 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd)
if (ub->nr_privileged_daemon != ub->nr_queues_ready)
set_bit(GD_SUPPRESS_PART_SCAN, &disk->state);
+ if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
+ ub->dev_info.flags & UBLK_F_ZONED) {
+ disk_set_zoned(disk, BLK_ZONED_HM);
+ blk_queue_required_elevator_features(disk->queue, ELEVATOR_F_ZBD_SEQ_WRITE);
+ ret = ublk_revalidate_disk_zones(disk);
+ if (ret)
+ goto out_put_disk;
+ }
+
get_device(&ub->cdev_dev);
ret = add_disk(disk);
if (ret) {
@@ -1746,6 +1752,9 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
if (!IS_BUILTIN(CONFIG_BLK_DEV_UBLK))
ub->dev_info.flags |= UBLK_F_URING_CMD_COMP_IN_TASK;
+ if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED))
+ ub->dev_info.flags &= ~UBLK_F_ZONED;
+
/* We are not ready to support zero copy */
ub->dev_info.flags &= ~UBLK_F_SUPPORT_ZERO_COPY;
new file mode 100644
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _UBLK_DRV_H
+#define _UBLK_DRV_H
+
+#include <uapi/linux/ublk_cmd.h>
+#include <linux/blk-mq.h>
+#include <linux/cdev.h>
+
+struct ublk_device {
+ struct gendisk *ub_disk;
+
+ char *__queues;
+
+ unsigned int queue_size;
+ struct ublksrv_ctrl_dev_info dev_info;
+
+ struct blk_mq_tag_set tag_set;
+
+ struct cdev cdev;
+ struct device cdev_dev;
+
+#define UB_STATE_OPEN 0
+#define UB_STATE_USED 1
+#define UB_STATE_DELETED 2
+ unsigned long state;
+ int ub_number;
+
+ struct mutex mutex;
+
+ spinlock_t mm_lock;
+ struct mm_struct *mm;
+
+ struct ublk_params params;
+
+ struct completion completion;
+ unsigned int nr_queues_ready;
+ unsigned int nr_privileged_daemon;
+
+ /*
+ * Our ubq->daemon may be killed without any notification, so
+ * monitor each queue's daemon periodically
+ */
+ struct delayed_work monitor_work;
+ struct work_struct quiesce_work;
+ struct work_struct stop_work;
+};
+
+struct ublk_rq_data {
+ struct llist_node node;
+ struct callback_head work;
+ enum ublk_op operation;
+ __u64 sector;
+ __u32 nr_sectors;
+};
+
+void ublk_set_nr_zones(struct ublk_device *ub);
+void ublk_dev_param_zoned_apply(struct ublk_device *ub);
+int ublk_revalidate_disk_zones(struct gendisk *disk);
+
+#ifdef CONFIG_BLK_DEV_UBLK_ZONED
+int ublk_report_zones(struct gendisk *disk, sector_t sector,
+ unsigned int nr_zones, report_zones_cb cb,
+ void *data);
+#else
+#define ublk_report_zones NULL
+#endif
+
+#endif
@@ -102,6 +102,11 @@
*/
#define UBLK_F_UNPRIVILEGED_DEV (1UL << 5)
+/*
+ * Enable zoned device support
+ */
+#define UBLK_F_ZONED (1ULL << 6)
+
/* device state */
#define UBLK_S_DEV_DEAD 0
#define UBLK_S_DEV_LIVE 1
@@ -155,12 +160,24 @@ struct ublksrv_ctrl_dev_info {
__u64 reserved2;
};
-#define UBLK_IO_OP_READ 0
-#define UBLK_IO_OP_WRITE 1
-#define UBLK_IO_OP_FLUSH 2
-#define UBLK_IO_OP_DISCARD 3
-#define UBLK_IO_OP_WRITE_SAME 4
-#define UBLK_IO_OP_WRITE_ZEROES 5
+enum ublk_op {
+ UBLK_IO_OP_READ = 0,
+ UBLK_IO_OP_WRITE = 1,
+ UBLK_IO_OP_FLUSH = 2,
+ UBLK_IO_OP_DISCARD = 3,
+ UBLK_IO_OP_WRITE_SAME = 4,
+ UBLK_IO_OP_WRITE_ZEROES = 5,
+ UBLK_IO_OP_ZONE_OPEN = 10,
+ UBLK_IO_OP_ZONE_CLOSE = 11,
+ UBLK_IO_OP_ZONE_FINISH = 12,
+ UBLK_IO_OP_ZONE_APPEND = 13,
+ UBLK_IO_OP_ZONE_RESET = 15,
+ __UBLK_IO_OP_DRV_IN_START = 32,
+ UBLK_IO_OP_REPORT_ZONES = __UBLK_IO_OP_DRV_IN_START,
+ __UBLK_IO_OP_DRV_IN_END = 96,
+ __UBLK_IO_OP_DRV_OUT_START = __UBLK_IO_OP_DRV_IN_END,
+ __UBLK_IO_OP_DRV_OUT_END = 160,
+};
#define UBLK_IO_F_FAILFAST_DEV (1U << 8)
#define UBLK_IO_F_FAILFAST_TRANSPORT (1U << 9)
@@ -257,6 +274,12 @@ struct ublk_param_devt {
__u32 disk_minor;
};
+struct ublk_param_zoned {
+ __u32 max_open_zones;
+ __u32 max_active_zones;
+ __u8 reserved[24];
+};
+
struct ublk_params {
/*
* Total length of parameters, userspace has to set 'len' for both
@@ -268,11 +291,13 @@ struct ublk_params {
#define UBLK_PARAM_TYPE_BASIC (1 << 0)
#define UBLK_PARAM_TYPE_DISCARD (1 << 1)
#define UBLK_PARAM_TYPE_DEVT (1 << 2)
+#define UBLK_PARAM_TYPE_ZONED (1 << 3)
__u32 types; /* types of parameter included */
struct ublk_param_basic basic;
struct ublk_param_discard discard;
struct ublk_param_devt devt;
+ struct ublk_param_zoned zoned;
};
#endif