[1/2] xen-blkback: Implement diskseq checks

Message ID 20230601214823.1701-2-demi@invisiblethingslab.com
State New
Headers
Series xen/blkback: support delete-on-close block devices |

Commit Message

Demi Marie Obenour June 1, 2023, 9:48 p.m. UTC
  This allows specifying a disk sequence number in XenStore.  If it does
not match the disk sequence number of the underlying device, the device
will not be exported and a warning will be logged.  Userspace can use
this to eliminate race conditions due to major/minor number reuse.
Old kernels do not support the new syntax, but a later patch will allow
userspace to discover that the new syntax is supported.

Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com>
---
 drivers/block/xen-blkback/xenbus.c | 112 +++++++++++++++++++++++------
 1 file changed, 89 insertions(+), 23 deletions(-)
  

Comments

Christoph Hellwig June 7, 2023, 7:44 a.m. UTC | #1
On Thu, Jun 01, 2023 at 05:48:22PM -0400, Demi Marie Obenour wrote:
> +	if (diskseq) {
> +		struct gendisk *disk = bdev->bd_disk;
> +
> +		if (unlikely(disk == NULL)) {
> +			pr_err("%s: device %08x has no gendisk\n",
> +			       __func__, vbd->pdevice);
> +			xen_vbd_free(vbd);
> +			return -EFAULT;
> +		}

bdev->bd_disk is never NULL.

> +	diskseq_str = xenbus_read(XBT_NIL, dev->nodename, "diskseq", &diskseq_len);

Please avoid the overly long line.

> +	if (IS_ERR(diskseq_str)) {
> +		int err = PTR_ERR(diskseq_str);
> +		diskseq_str = NULL;
> +
> +		/*
> +		 * If this does not exist, it means legacy userspace that does not

.. even more so in comments.

> +		 * support diskseq.
> +		 */
> +		if (unlikely(!XENBUS_EXIST_ERR(err))) {
> +			xenbus_dev_fatal(dev, err, "reading diskseq");
> +			return;
> +		}
> +		diskseq = 0;
> +	} else if (diskseq_len <= 0) {
> +		xenbus_dev_fatal(dev, -EFAULT, "diskseq must not be empty");
> +		goto fail;
> +	} else if (diskseq_len > 16) {

No need for a else after a return.

> +		xenbus_dev_fatal(dev, -ERANGE, "diskseq too long: got %d but limit is 16",
> +				 diskseq_len);
> +		goto fail;
> +	} else if (diskseq_str[0] == '0') {
> +		xenbus_dev_fatal(dev, -ERANGE, "diskseq must not start with '0'");
> +		goto fail;
> +	} else {
> +		char *diskseq_end;
> +		diskseq = simple_strtoull(diskseq_str, &diskseq_end, 16);
> +		if (diskseq_end != diskseq_str + diskseq_len) {
> +			xenbus_dev_fatal(dev, -EINVAL, "invalid diskseq");
> +			goto fail;
> +		}
> +		kfree(diskseq_str);
> +		diskseq_str = NULL;
> +	}

And I suspect the code will be a lot easier to follow if you move
the diskseq validation into a separate helper.
  
Demi Marie Obenour June 7, 2023, 4 p.m. UTC | #2
On Wed, Jun 07, 2023 at 12:44:22AM -0700, Christoph Hellwig wrote:
> On Thu, Jun 01, 2023 at 05:48:22PM -0400, Demi Marie Obenour wrote:
> > +	if (diskseq) {
> > +		struct gendisk *disk = bdev->bd_disk;
> > +
> > +		if (unlikely(disk == NULL)) {
> > +			pr_err("%s: device %08x has no gendisk\n",
> > +			       __func__, vbd->pdevice);
> > +			xen_vbd_free(vbd);
> > +			return -EFAULT;
> > +		}
> 
> bdev->bd_disk is never NULL.

Fixed in v3.

> > +	diskseq_str = xenbus_read(XBT_NIL, dev->nodename, "diskseq", &diskseq_len);
> 
> Please avoid the overly long line.

Fixed in v3.

> > +	if (IS_ERR(diskseq_str)) {
> > +		int err = PTR_ERR(diskseq_str);
> > +		diskseq_str = NULL;
> > +
> > +		/*
> > +		 * If this does not exist, it means legacy userspace that does not
> 
> .. even more so in comments.

Fixed in v3.

> > +		 * support diskseq.
> > +		 */
> > +		if (unlikely(!XENBUS_EXIST_ERR(err))) {
> > +			xenbus_dev_fatal(dev, err, "reading diskseq");
> > +			return;
> > +		}
> > +		diskseq = 0;
> > +	} else if (diskseq_len <= 0) {
> > +		xenbus_dev_fatal(dev, -EFAULT, "diskseq must not be empty");
> > +		goto fail;
> > +	} else if (diskseq_len > 16) {
> 
> No need for a else after a return.

Fixed in v3.

> > +		xenbus_dev_fatal(dev, -ERANGE, "diskseq too long: got %d but limit is 16",
> > +				 diskseq_len);
> > +		goto fail;
> > +	} else if (diskseq_str[0] == '0') {
> > +		xenbus_dev_fatal(dev, -ERANGE, "diskseq must not start with '0'");
> > +		goto fail;
> > +	} else {
> > +		char *diskseq_end;
> > +		diskseq = simple_strtoull(diskseq_str, &diskseq_end, 16);
> > +		if (diskseq_end != diskseq_str + diskseq_len) {
> > +			xenbus_dev_fatal(dev, -EINVAL, "invalid diskseq");
> > +			goto fail;
> > +		}
> > +		kfree(diskseq_str);
> > +		diskseq_str = NULL;
> > +	}
> 
> And I suspect the code will be a lot easier to follow if you move
> the diskseq validation into a separate helper.

Fixed in v3.
  

Patch

diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 4807af1d58059394d7a992335dabaf2bc3901721..9c3eb148fbd802c74e626c3d7bcd69dcb09bd921 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -24,6 +24,7 @@  struct backend_info {
 	struct xenbus_watch	backend_watch;
 	unsigned		major;
 	unsigned		minor;
+	unsigned long long	diskseq;
 	char			*mode;
 };
 
@@ -479,7 +480,7 @@  static void xen_vbd_free(struct xen_vbd *vbd)
 
 static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
 			  unsigned major, unsigned minor, int readonly,
-			  int cdrom)
+			  bool cdrom, u64 diskseq)
 {
 	struct xen_vbd *vbd;
 	struct block_device *bdev;
@@ -507,6 +508,26 @@  static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
 		xen_vbd_free(vbd);
 		return -ENOENT;
 	}
+
+	if (diskseq) {
+		struct gendisk *disk = bdev->bd_disk;
+
+		if (unlikely(disk == NULL)) {
+			pr_err("%s: device %08x has no gendisk\n",
+			       __func__, vbd->pdevice);
+			xen_vbd_free(vbd);
+			return -EFAULT;
+		}
+
+		if (unlikely(disk->diskseq != diskseq)) {
+			pr_warn("%s: device %08x has incorrect sequence "
+				"number 0x%llx (expected 0x%llx)\n",
+				__func__, vbd->pdevice, disk->diskseq, diskseq);
+			xen_vbd_free(vbd);
+			return -ENODEV;
+		}
+	}
+
 	vbd->size = vbd_sz(vbd);
 
 	if (cdrom || disk_to_cdi(vbd->bdev->bd_disk))
@@ -707,6 +728,9 @@  static void backend_changed(struct xenbus_watch *watch,
 	int cdrom = 0;
 	unsigned long handle;
 	char *device_type;
+	char *diskseq_str = NULL;
+	int diskseq_len;
+	unsigned long long diskseq;
 
 	pr_debug("%s %p %d\n", __func__, dev, dev->otherend_id);
 
@@ -725,10 +749,46 @@  static void backend_changed(struct xenbus_watch *watch,
 		return;
 	}
 
-	if (be->major | be->minor) {
-		if (be->major != major || be->minor != minor)
-			pr_warn("changing physical device (from %x:%x to %x:%x) not supported.\n",
-				be->major, be->minor, major, minor);
+	diskseq_str = xenbus_read(XBT_NIL, dev->nodename, "diskseq", &diskseq_len);
+	if (IS_ERR(diskseq_str)) {
+		int err = PTR_ERR(diskseq_str);
+		diskseq_str = NULL;
+
+		/*
+		 * If this does not exist, it means legacy userspace that does not
+		 * support diskseq.
+		 */
+		if (unlikely(!XENBUS_EXIST_ERR(err))) {
+			xenbus_dev_fatal(dev, err, "reading diskseq");
+			return;
+		}
+		diskseq = 0;
+	} else if (diskseq_len <= 0) {
+		xenbus_dev_fatal(dev, -EFAULT, "diskseq must not be empty");
+		goto fail;
+	} else if (diskseq_len > 16) {
+		xenbus_dev_fatal(dev, -ERANGE, "diskseq too long: got %d but limit is 16",
+				 diskseq_len);
+		goto fail;
+	} else if (diskseq_str[0] == '0') {
+		xenbus_dev_fatal(dev, -ERANGE, "diskseq must not start with '0'");
+		goto fail;
+	} else {
+		char *diskseq_end;
+		diskseq = simple_strtoull(diskseq_str, &diskseq_end, 16);
+		if (diskseq_end != diskseq_str + diskseq_len) {
+			xenbus_dev_fatal(dev, -EINVAL, "invalid diskseq");
+			goto fail;
+		}
+		kfree(diskseq_str);
+		diskseq_str = NULL;
+	}
+
+	if (be->major | be->minor | be->diskseq) {
+		if (be->major != major || be->minor != minor || be->diskseq != diskseq)
+			pr_warn("changing physical device (from %x:%x:%llx to %x:%x:%llx)"
+				" not supported.\n",
+				be->major, be->minor, be->diskseq, major, minor, diskseq);
 		return;
 	}
 
@@ -756,29 +816,35 @@  static void backend_changed(struct xenbus_watch *watch,
 
 	be->major = major;
 	be->minor = minor;
+	be->diskseq = diskseq;
 
 	err = xen_vbd_create(be->blkif, handle, major, minor,
-			     !strchr(be->mode, 'w'), cdrom);
-
-	if (err)
-		xenbus_dev_fatal(dev, err, "creating vbd structure");
-	else {
-		err = xenvbd_sysfs_addif(dev);
-		if (err) {
-			xen_vbd_free(&be->blkif->vbd);
-			xenbus_dev_fatal(dev, err, "creating sysfs entries");
-		}
-	}
+			     !strchr(be->mode, 'w'), cdrom, diskseq);
 
 	if (err) {
-		kfree(be->mode);
-		be->mode = NULL;
-		be->major = 0;
-		be->minor = 0;
-	} else {
-		/* We're potentially connected now */
-		xen_update_blkif_status(be->blkif);
+		xenbus_dev_fatal(dev, err, "creating vbd structure");
+		goto fail;
 	}
+
+	err = xenvbd_sysfs_addif(dev);
+	if (err) {
+		xenbus_dev_fatal(dev, err, "creating sysfs entries");
+		goto free_vbd;
+	}
+
+	/* We're potentially connected now */
+	xen_update_blkif_status(be->blkif);
+	return;
+
+free_vbd:
+	xen_vbd_free(&be->blkif->vbd);
+fail:
+	kfree(diskseq_str);
+	kfree(be->mode);
+	be->mode = NULL;
+	be->major = 0;
+	be->minor = 0;
+	be->diskseq = 0;
 }
 
 /*