[v3] vp_vdpa: harden the logic of set status
Commit Message
From: Longpeng <longpeng2@huawei.com>
1. We should not set status to 0 when invoking vp_vdpa_set_status(),
trigger a warning in that case.
2. The driver MUST wait for a read of device_status to return 0 before
reinitializing the device. But we also don't want to keep us in an
infinite loop forever, so wait for 5s if we try to reset the device.
Signed-off-by: Longpeng <longpeng2@huawei.com>
---
Changes v3->v2:
- move VP_VDPA_RESET_TIMEOUT_US near the other macros. [Stefano]
- refer v1.2 in comments. [Stefano]
- s/keep/keeping/ [Jason]
- use readx_poll_timeout. [Jason]
Changes v1->v2:
- use WARN_ON instead of BUG_ON. [Stefano]
- use "warning + failed" instead of "infinite loop". [Jason, Stefano]
- use usleep_range instead of msleep (checkpatch). [Longpeng]
---
drivers/vdpa/virtio_pci/vp_vdpa.c | 22 +++++++++++++++++++++-
1 file changed, 21 insertions(+), 1 deletion(-)
Comments
On Wed, Jan 04, 2023 at 12:25:19PM +0800, Longpeng(Mike) wrote:
> From: Longpeng <longpeng2@huawei.com>
>
> 1. We should not set status to 0 when invoking vp_vdpa_set_status(),
> trigger a warning in that case.
>
> 2. The driver MUST wait for a read of device_status to return 0 before
> reinitializing the device. But we also don't want to keep us in an
> infinite loop forever, so wait for 5s if we try to reset the device.
>
> Signed-off-by: Longpeng <longpeng2@huawei.com>
> ---
> Changes v3->v2:
> - move VP_VDPA_RESET_TIMEOUT_US near the other macros. [Stefano]
> - refer v1.2 in comments. [Stefano]
> - s/keep/keeping/ [Jason]
> - use readx_poll_timeout. [Jason]
>
> Changes v1->v2:
> - use WARN_ON instead of BUG_ON. [Stefano]
> - use "warning + failed" instead of "infinite loop". [Jason, Stefano]
> - use usleep_range instead of msleep (checkpatch). [Longpeng]
>
> ---
> drivers/vdpa/virtio_pci/vp_vdpa.c | 22 +++++++++++++++++++++-
> 1 file changed, 21 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/vdpa/virtio_pci/vp_vdpa.c b/drivers/vdpa/virtio_pci/vp_vdpa.c
> index d448db0c4de3..3fc496aea456 100644
> --- a/drivers/vdpa/virtio_pci/vp_vdpa.c
> +++ b/drivers/vdpa/virtio_pci/vp_vdpa.c
> @@ -10,6 +10,7 @@
>
> #include <linux/interrupt.h>
> #include <linux/module.h>
> +#include <linux/iopoll.h>
> #include <linux/pci.h>
> #include <linux/vdpa.h>
> #include <linux/virtio.h>
> @@ -22,6 +23,7 @@
> #define VP_VDPA_QUEUE_MAX 256
> #define VP_VDPA_DRIVER_NAME "vp_vdpa"
> #define VP_VDPA_NAME_SIZE 256
> +#define VP_VDPA_RESET_TIMEOUT_US 5000000 /* 5s */
>
> struct vp_vring {
> void __iomem *notify;
> @@ -214,6 +216,9 @@ static void vp_vdpa_set_status(struct vdpa_device *vdpa, u8 status)
> struct virtio_pci_modern_device *mdev = vp_vdpa_to_mdev(vp_vdpa);
> u8 s = vp_vdpa_get_status(vdpa);
>
> + /* We should never be setting status to 0. */
> + WARN_ON(status == 0);
> +
> if (status & VIRTIO_CONFIG_S_DRIVER_OK &&
> !(s & VIRTIO_CONFIG_S_DRIVER_OK)) {
> vp_vdpa_request_irq(vp_vdpa);
Isn't this user-triggerable? What prevents that?
> @@ -226,10 +231,25 @@ static int vp_vdpa_reset(struct vdpa_device *vdpa)
> {
> struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa);
> struct virtio_pci_modern_device *mdev = vp_vdpa_to_mdev(vp_vdpa);
> - u8 s = vp_vdpa_get_status(vdpa);
> + u8 tmp, s = vp_vdpa_get_status(vdpa);
> + int ret;
>
> vp_modern_set_status(mdev, 0);
>
> + /*
> + * As the virtio v1.1/v1.2 spec (4.1.4.3.2) says: After writing 0 to
> + * device_status, the driver MUST wait for a read of device_status
> + * to return 0 before reinitializing the device.
> + * To avoid keeping us here forever, we only wait for 5 seconds.
> + */
> + ret = readx_poll_timeout(vp_ioread8, &mdev->common->device_status, tmp,
> + tmp == 0, 1000, VP_VDPA_RESET_TIMEOUT_US);
> + if (ret) {
> + dev_err(&mdev->pci_dev->dev,
> + "vp_vdpa: fail to reset device, %d\n", ret);
> + return ret;
> + }
> +
> if (s & VIRTIO_CONFIG_S_DRIVER_OK)
> vp_vdpa_free_irq(vp_vdpa);
Do all callers actually check return status of reset?
If not they will happily reinitialize the device and violate the spec.
> --
> 2.23.0
On Wed, Jan 4, 2023 at 2:50 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Wed, Jan 04, 2023 at 12:25:19PM +0800, Longpeng(Mike) wrote:
> > From: Longpeng <longpeng2@huawei.com>
> >
> > 1. We should not set status to 0 when invoking vp_vdpa_set_status(),
> > trigger a warning in that case.
> >
> > 2. The driver MUST wait for a read of device_status to return 0 before
> > reinitializing the device. But we also don't want to keep us in an
> > infinite loop forever, so wait for 5s if we try to reset the device.
> >
> > Signed-off-by: Longpeng <longpeng2@huawei.com>
> > ---
> > Changes v3->v2:
> > - move VP_VDPA_RESET_TIMEOUT_US near the other macros. [Stefano]
> > - refer v1.2 in comments. [Stefano]
> > - s/keep/keeping/ [Jason]
> > - use readx_poll_timeout. [Jason]
> >
> > Changes v1->v2:
> > - use WARN_ON instead of BUG_ON. [Stefano]
> > - use "warning + failed" instead of "infinite loop". [Jason, Stefano]
> > - use usleep_range instead of msleep (checkpatch). [Longpeng]
> >
> > ---
> > drivers/vdpa/virtio_pci/vp_vdpa.c | 22 +++++++++++++++++++++-
> > 1 file changed, 21 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/vdpa/virtio_pci/vp_vdpa.c b/drivers/vdpa/virtio_pci/vp_vdpa.c
> > index d448db0c4de3..3fc496aea456 100644
> > --- a/drivers/vdpa/virtio_pci/vp_vdpa.c
> > +++ b/drivers/vdpa/virtio_pci/vp_vdpa.c
> > @@ -10,6 +10,7 @@
> >
> > #include <linux/interrupt.h>
> > #include <linux/module.h>
> > +#include <linux/iopoll.h>
> > #include <linux/pci.h>
> > #include <linux/vdpa.h>
> > #include <linux/virtio.h>
> > @@ -22,6 +23,7 @@
> > #define VP_VDPA_QUEUE_MAX 256
> > #define VP_VDPA_DRIVER_NAME "vp_vdpa"
> > #define VP_VDPA_NAME_SIZE 256
> > +#define VP_VDPA_RESET_TIMEOUT_US 5000000 /* 5s */
> >
> > struct vp_vring {
> > void __iomem *notify;
> > @@ -214,6 +216,9 @@ static void vp_vdpa_set_status(struct vdpa_device *vdpa, u8 status)
> > struct virtio_pci_modern_device *mdev = vp_vdpa_to_mdev(vp_vdpa);
> > u8 s = vp_vdpa_get_status(vdpa);
> >
> > + /* We should never be setting status to 0. */
> > + WARN_ON(status == 0);
> > +
> > if (status & VIRTIO_CONFIG_S_DRIVER_OK &&
> > !(s & VIRTIO_CONFIG_S_DRIVER_OK)) {
> > vp_vdpa_request_irq(vp_vdpa);
>
> Isn't this user-triggerable? What prevents that?
I think it's this in vhost_vdpa_set_status()
if (status == 0) {
ret = vdpa_reset(vdpa);
if (ret)
return ret;
The reset was factored out to dedicated config ops.
>
> > @@ -226,10 +231,25 @@ static int vp_vdpa_reset(struct vdpa_device *vdpa)
> > {
> > struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa);
> > struct virtio_pci_modern_device *mdev = vp_vdpa_to_mdev(vp_vdpa);
> > - u8 s = vp_vdpa_get_status(vdpa);
> > + u8 tmp, s = vp_vdpa_get_status(vdpa);
> > + int ret;
> >
> > vp_modern_set_status(mdev, 0);
> >
> > + /*
> > + * As the virtio v1.1/v1.2 spec (4.1.4.3.2) says: After writing 0 to
> > + * device_status, the driver MUST wait for a read of device_status
> > + * to return 0 before reinitializing the device.
> > + * To avoid keeping us here forever, we only wait for 5 seconds.
> > + */
5 second might not be sufficient see the discussion about sleep
instead of poll for cvq[1]
> > + ret = readx_poll_timeout(vp_ioread8, &mdev->common->device_status, tmp,
> > + tmp == 0, 1000, VP_VDPA_RESET_TIMEOUT_US);
> > + if (ret) {
> > + dev_err(&mdev->pci_dev->dev,
> > + "vp_vdpa: fail to reset device, %d\n", ret);
> > + return ret;
> > + }
> > +
> > if (s & VIRTIO_CONFIG_S_DRIVER_OK)
> > vp_vdpa_free_irq(vp_vdpa);
>
> Do all callers actually check return status of reset?
> If not they will happily reinitialize the device and violate the spec.
Can we simply:
1) start with a very long timeout 1minutes etc
2) break the device when timeout?
Thanks
[1] https://www.spinics.net/lists/netdev/msg869953.html
>
>
>
> > --
> > 2.23.0
>
@@ -10,6 +10,7 @@
#include <linux/interrupt.h>
#include <linux/module.h>
+#include <linux/iopoll.h>
#include <linux/pci.h>
#include <linux/vdpa.h>
#include <linux/virtio.h>
@@ -22,6 +23,7 @@
#define VP_VDPA_QUEUE_MAX 256
#define VP_VDPA_DRIVER_NAME "vp_vdpa"
#define VP_VDPA_NAME_SIZE 256
+#define VP_VDPA_RESET_TIMEOUT_US 5000000 /* 5s */
struct vp_vring {
void __iomem *notify;
@@ -214,6 +216,9 @@ static void vp_vdpa_set_status(struct vdpa_device *vdpa, u8 status)
struct virtio_pci_modern_device *mdev = vp_vdpa_to_mdev(vp_vdpa);
u8 s = vp_vdpa_get_status(vdpa);
+ /* We should never be setting status to 0. */
+ WARN_ON(status == 0);
+
if (status & VIRTIO_CONFIG_S_DRIVER_OK &&
!(s & VIRTIO_CONFIG_S_DRIVER_OK)) {
vp_vdpa_request_irq(vp_vdpa);
@@ -226,10 +231,25 @@ static int vp_vdpa_reset(struct vdpa_device *vdpa)
{
struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa);
struct virtio_pci_modern_device *mdev = vp_vdpa_to_mdev(vp_vdpa);
- u8 s = vp_vdpa_get_status(vdpa);
+ u8 tmp, s = vp_vdpa_get_status(vdpa);
+ int ret;
vp_modern_set_status(mdev, 0);
+ /*
+ * As the virtio v1.1/v1.2 spec (4.1.4.3.2) says: After writing 0 to
+ * device_status, the driver MUST wait for a read of device_status
+ * to return 0 before reinitializing the device.
+ * To avoid keeping us here forever, we only wait for 5 seconds.
+ */
+ ret = readx_poll_timeout(vp_ioread8, &mdev->common->device_status, tmp,
+ tmp == 0, 1000, VP_VDPA_RESET_TIMEOUT_US);
+ if (ret) {
+ dev_err(&mdev->pci_dev->dev,
+ "vp_vdpa: fail to reset device, %d\n", ret);
+ return ret;
+ }
+
if (s & VIRTIO_CONFIG_S_DRIVER_OK)
vp_vdpa_free_irq(vp_vdpa);