[V3,1/2] PCI/DOE: Remove the pci_doe_flush_mb() call

Message ID 20221128040338.1936529-2-ira.weiny@intel.com
State New
Headers
Series PCI/DOE: Remove asynchronous task support |

Commit Message

Ira Weiny Nov. 28, 2022, 4:03 a.m. UTC
  From: Ira Weiny <ira.weiny@intel.com>

pci_doe_flush_mb() does not work and is currently unused.

It does not work because each struct doe_mb is managed as part of the
PCI device.  They can't go away as long as the PCI device exists.
pci_doe_flush_mb() was set up to flush the workqueue and prevent any
further submissions to the mailboxes when the PCI device goes away.
Unfortunately, this was fundamentally flawed.  There was no guarantee
that a struct doe_mb remained after pci_doe_flush_mb() returned.
Therefore, the doe_mb state could be invalid when those threads waiting
on the workqueue were flushed.

Fortunately the current code is safe because all callers make a
synchronous call to pci_doe_submit_task() and maintain a reference on
the PCI device.  Therefore pci_doe_flush_mb() is effectively unused.

Rather than attempt to fix pci_doe_flush_mb() just remove the dead code
around pci_doe_flush_mb().

Cc: Lukas Wunner <lukas@wunner.de>
Cc: Jonathan Cameron <Jonathan.Cameron@Huawei.com>
Signed-off-by: Ira Weiny <ira.weiny@intel.com>

---
Changes from V2:
	Lukas
		Clarify commit message.
	Jonathan
		Add comment for changed poll interval.
---
 drivers/pci/doe.c | 49 +++++------------------------------------------
 1 file changed, 5 insertions(+), 44 deletions(-)
  

Comments

Li, Ming Nov. 28, 2022, 5:51 a.m. UTC | #1
On 11/28/2022 12:03 PM, ira.weiny@intel.com wrote:
> From: Ira Weiny <ira.weiny@intel.com>
> 
> pci_doe_flush_mb() does not work and is currently unused.
> 
> It does not work because each struct doe_mb is managed as part of the
> PCI device.  They can't go away as long as the PCI device exists.
> pci_doe_flush_mb() was set up to flush the workqueue and prevent any
> further submissions to the mailboxes when the PCI device goes away.
> Unfortunately, this was fundamentally flawed.  There was no guarantee
> that a struct doe_mb remained after pci_doe_flush_mb() returned.
> Therefore, the doe_mb state could be invalid when those threads waiting
> on the workqueue were flushed.
> 
> Fortunately the current code is safe because all callers make a
> synchronous call to pci_doe_submit_task() and maintain a reference on
> the PCI device.  Therefore pci_doe_flush_mb() is effectively unused.
> 
> Rather than attempt to fix pci_doe_flush_mb() just remove the dead code
> around pci_doe_flush_mb().
> 
> Cc: Lukas Wunner <lukas@wunner.de>
> Cc: Jonathan Cameron <Jonathan.Cameron@Huawei.com>
> Signed-off-by: Ira Weiny <ira.weiny@intel.com>

Some comments inline.

> 
> ---
> Changes from V2:
> 	Lukas
> 		Clarify commit message.
> 	Jonathan
> 		Add comment for changed poll interval.

...

>  
> -static int pci_doe_wait(struct pci_doe_mb *doe_mb, unsigned long timeout)
> -{
> -	if (wait_event_timeout(doe_mb->wq,
> -			       test_bit(PCI_DOE_FLAG_CANCEL, &doe_mb->flags),
> -			       timeout))
> -		return -EIO;
> -	return 0;
> -}
> -
>  static void pci_doe_write_ctrl(struct pci_doe_mb *doe_mb, u32 val)
>  {
>  	struct pci_dev *pdev = doe_mb->pdev;
> @@ -82,12 +73,9 @@ static int pci_doe_abort(struct pci_doe_mb *doe_mb)
>  	pci_doe_write_ctrl(doe_mb, PCI_DOE_CTRL_ABORT);
>  
>  	do {
> -		int rc;
>  		u32 val;
>  
> -		rc = pci_doe_wait(doe_mb, PCI_DOE_POLL_INTERVAL);
> -		if (rc)
> -			return rc;
> +		msleep_interruptible(PCI_DOE_POLL_INTERVAL_MSECS);
>  		pci_read_config_dword(pdev, offset + PCI_DOE_STATUS, &val);

Looks like we don't have to use msleep_interruptible() here, can use msleep() directly?

>  
>  		/* Abort success! */
> @@ -278,11 +266,7 @@ static void doe_statemachine_work(struct work_struct *work)
>  			signal_task_abort(task, -EIO);
>  			return;
>  		}
> -		rc = pci_doe_wait(doe_mb, PCI_DOE_POLL_INTERVAL);
> -		if (rc) {
> -			signal_task_abort(task, rc);
> -			return;
> -		}
> +		msleep_interruptible(PCI_DOE_POLL_INTERVAL_MSECS);
>  		goto retry_resp;
>  	}

I guess that you use msleep_interruptible() here for aborting current task when signals come.
So there should be signal_task_abort() and return when msleep_interruptible() receives a signal.

Thanks
Ming

>  
> @@ -383,21 +367,6 @@ static void pci_doe_destroy_workqueue(void *mb)
>  	destroy_workqueue(doe_mb->work_queue);
>  }
>  
> -static void pci_doe_flush_mb(void *mb)
> -{
> -	struct pci_doe_mb *doe_mb = mb;
> -
> -	/* Stop all pending work items from starting */
> -	set_bit(PCI_DOE_FLAG_DEAD, &doe_mb->flags);
> -
> -	/* Cancel an in progress work item, if necessary */
> -	set_bit(PCI_DOE_FLAG_CANCEL, &doe_mb->flags);
> -	wake_up(&doe_mb->wq);
> -
> -	/* Flush all work items */
> -	flush_workqueue(doe_mb->work_queue);
> -}
> -
>  /**
>   * pcim_doe_create_mb() - Create a DOE mailbox object
>   *
> @@ -450,14 +419,6 @@ struct pci_doe_mb *pcim_doe_create_mb(struct pci_dev *pdev, u16 cap_offset)
>  		return ERR_PTR(rc);
>  	}
>  
> -	/*
> -	 * The state machine and the mailbox should be in sync now;
> -	 * Set up mailbox flush prior to using the mailbox to query protocols.
> -	 */
> -	rc = devm_add_action_or_reset(dev, pci_doe_flush_mb, doe_mb);
> -	if (rc)
> -		return ERR_PTR(rc);
> -
>  	rc = pci_doe_cache_protocols(doe_mb);
>  	if (rc) {
>  		pci_err(pdev, "[%x] failed to cache protocols : %d\n",
  
Ira Weiny Nov. 28, 2022, 5:42 p.m. UTC | #2
On Mon, Nov 28, 2022 at 01:51:24PM +0800, Li, Ming wrote:
> 
> On 11/28/2022 12:03 PM, ira.weiny@intel.com wrote:
> > From: Ira Weiny <ira.weiny@intel.com>
> > 
> > pci_doe_flush_mb() does not work and is currently unused.
> > 
> > It does not work because each struct doe_mb is managed as part of the
> > PCI device.  They can't go away as long as the PCI device exists.
> > pci_doe_flush_mb() was set up to flush the workqueue and prevent any
> > further submissions to the mailboxes when the PCI device goes away.
> > Unfortunately, this was fundamentally flawed.  There was no guarantee
> > that a struct doe_mb remained after pci_doe_flush_mb() returned.
> > Therefore, the doe_mb state could be invalid when those threads waiting
> > on the workqueue were flushed.
> > 
> > Fortunately the current code is safe because all callers make a
> > synchronous call to pci_doe_submit_task() and maintain a reference on
> > the PCI device.  Therefore pci_doe_flush_mb() is effectively unused.
> > 
> > Rather than attempt to fix pci_doe_flush_mb() just remove the dead code
> > around pci_doe_flush_mb().
> > 
> > Cc: Lukas Wunner <lukas@wunner.de>
> > Cc: Jonathan Cameron <Jonathan.Cameron@Huawei.com>
> > Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> 
> Some comments inline.
> 
> > 
> > ---
> > Changes from V2:
> > 	Lukas
> > 		Clarify commit message.
> > 	Jonathan
> > 		Add comment for changed poll interval.
> 
> ...
> 
> >  
> > -static int pci_doe_wait(struct pci_doe_mb *doe_mb, unsigned long timeout)
> > -{
> > -	if (wait_event_timeout(doe_mb->wq,
> > -			       test_bit(PCI_DOE_FLAG_CANCEL, &doe_mb->flags),
> > -			       timeout))
> > -		return -EIO;
> > -	return 0;
> > -}
> > -
> >  static void pci_doe_write_ctrl(struct pci_doe_mb *doe_mb, u32 val)
> >  {
> >  	struct pci_dev *pdev = doe_mb->pdev;
> > @@ -82,12 +73,9 @@ static int pci_doe_abort(struct pci_doe_mb *doe_mb)
> >  	pci_doe_write_ctrl(doe_mb, PCI_DOE_CTRL_ABORT);
> >  
> >  	do {
> > -		int rc;
> >  		u32 val;
> >  
> > -		rc = pci_doe_wait(doe_mb, PCI_DOE_POLL_INTERVAL);
> > -		if (rc)
> > -			return rc;
> > +		msleep_interruptible(PCI_DOE_POLL_INTERVAL_MSECS);
> >  		pci_read_config_dword(pdev, offset + PCI_DOE_STATUS, &val);
> 
> Looks like we don't have to use msleep_interruptible() here, can use msleep() directly?

I don't know.  I think your suggestion below holds here too.  Unfortunately
that could mask the signal received in the case below.  But I think that is
going to be rare enough we could ignore it.

> 
> >  
> >  		/* Abort success! */
> > @@ -278,11 +266,7 @@ static void doe_statemachine_work(struct work_struct *work)
> >  			signal_task_abort(task, -EIO);
> >  			return;
> >  		}
> > -		rc = pci_doe_wait(doe_mb, PCI_DOE_POLL_INTERVAL);
> > -		if (rc) {
> > -			signal_task_abort(task, rc);
> > -			return;
> > -		}
> > +		msleep_interruptible(PCI_DOE_POLL_INTERVAL_MSECS);
> >  		goto retry_resp;
> >  	}
> 
> I guess that you use msleep_interruptible() here for aborting current task when signals come.
> So there should be signal_task_abort() and return when msleep_interruptible() receives a signal.

Yes this makes much more sense.  Thanks for looking!
Ira

> 
> Thanks
> Ming
> 
> >  
> > @@ -383,21 +367,6 @@ static void pci_doe_destroy_workqueue(void *mb)
> >  	destroy_workqueue(doe_mb->work_queue);
> >  }
> >  
> > -static void pci_doe_flush_mb(void *mb)
> > -{
> > -	struct pci_doe_mb *doe_mb = mb;
> > -
> > -	/* Stop all pending work items from starting */
> > -	set_bit(PCI_DOE_FLAG_DEAD, &doe_mb->flags);
> > -
> > -	/* Cancel an in progress work item, if necessary */
> > -	set_bit(PCI_DOE_FLAG_CANCEL, &doe_mb->flags);
> > -	wake_up(&doe_mb->wq);
> > -
> > -	/* Flush all work items */
> > -	flush_workqueue(doe_mb->work_queue);
> > -}
> > -
> >  /**
> >   * pcim_doe_create_mb() - Create a DOE mailbox object
> >   *
> > @@ -450,14 +419,6 @@ struct pci_doe_mb *pcim_doe_create_mb(struct pci_dev *pdev, u16 cap_offset)
> >  		return ERR_PTR(rc);
> >  	}
> >  
> > -	/*
> > -	 * The state machine and the mailbox should be in sync now;
> > -	 * Set up mailbox flush prior to using the mailbox to query protocols.
> > -	 */
> > -	rc = devm_add_action_or_reset(dev, pci_doe_flush_mb, doe_mb);
> > -	if (rc)
> > -		return ERR_PTR(rc);
> > -
> >  	rc = pci_doe_cache_protocols(doe_mb);
> >  	if (rc) {
> >  		pci_err(pdev, "[%x] failed to cache protocols : %d\n",
>
  
Alison Schofield Nov. 28, 2022, 5:51 p.m. UTC | #3
On Sun, Nov 27, 2022 at 08:03:37PM -0800, Ira Weiny wrote:
> From: Ira Weiny <ira.weiny@intel.com>
> 
> pci_doe_flush_mb() does not work and is currently unused.
> 
> It does not work because each struct doe_mb is managed as part of the
> PCI device.  They can't go away as long as the PCI device exists.
> pci_doe_flush_mb() was set up to flush the workqueue and prevent any
> further submissions to the mailboxes when the PCI device goes away.
> Unfortunately, this was fundamentally flawed.  There was no guarantee
> that a struct doe_mb remained after pci_doe_flush_mb() returned.
> Therefore, the doe_mb state could be invalid when those threads waiting
> on the workqueue were flushed.
> 
> Fortunately the current code is safe because all callers make a
> synchronous call to pci_doe_submit_task() and maintain a reference on
> the PCI device.  Therefore pci_doe_flush_mb() is effectively unused.
> 
> Rather than attempt to fix pci_doe_flush_mb() just remove the dead code
> around pci_doe_flush_mb().

The commit message says "Remove ...." and the commit log only
talks about removing code, yet an msleep() is added.
Can those be clearer?

> 
> Cc: Lukas Wunner <lukas@wunner.de>
> Cc: Jonathan Cameron <Jonathan.Cameron@Huawei.com>
> Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> 
> ---
> Changes from V2:
> 	Lukas
> 		Clarify commit message.
> 	Jonathan
> 		Add comment for changed poll interval.
> ---
>  drivers/pci/doe.c | 49 +++++------------------------------------------
>  1 file changed, 5 insertions(+), 44 deletions(-)
> 
> diff --git a/drivers/pci/doe.c b/drivers/pci/doe.c
> index e402f05068a5..685e7d26c7eb 100644
> --- a/drivers/pci/doe.c
> +++ b/drivers/pci/doe.c
> @@ -24,10 +24,10 @@
>  
>  /* Timeout of 1 second from 6.30.2 Operation, PCI Spec r6.0 */
>  #define PCI_DOE_TIMEOUT HZ
> -#define PCI_DOE_POLL_INTERVAL	(PCI_DOE_TIMEOUT / 128)
> +/* Interval to poll mailbox status */
> +#define PCI_DOE_POLL_INTERVAL_MSECS	8
>  
> -#define PCI_DOE_FLAG_CANCEL	0
> -#define PCI_DOE_FLAG_DEAD	1
> +#define PCI_DOE_FLAG_DEAD	0
>  
>  /**
>   * struct pci_doe_mb - State for a single DOE mailbox
> @@ -53,15 +53,6 @@ struct pci_doe_mb {
>  	unsigned long flags;
>  };
>  
> -static int pci_doe_wait(struct pci_doe_mb *doe_mb, unsigned long timeout)
> -{
> -	if (wait_event_timeout(doe_mb->wq,
> -			       test_bit(PCI_DOE_FLAG_CANCEL, &doe_mb->flags),
> -			       timeout))
> -		return -EIO;
> -	return 0;
> -}
> -
>  static void pci_doe_write_ctrl(struct pci_doe_mb *doe_mb, u32 val)
>  {
>  	struct pci_dev *pdev = doe_mb->pdev;
> @@ -82,12 +73,9 @@ static int pci_doe_abort(struct pci_doe_mb *doe_mb)
>  	pci_doe_write_ctrl(doe_mb, PCI_DOE_CTRL_ABORT);
>  
>  	do {
> -		int rc;
>  		u32 val;
>  
> -		rc = pci_doe_wait(doe_mb, PCI_DOE_POLL_INTERVAL);
> -		if (rc)
> -			return rc;
> +		msleep_interruptible(PCI_DOE_POLL_INTERVAL_MSECS);
>  		pci_read_config_dword(pdev, offset + PCI_DOE_STATUS, &val);
>  
>  		/* Abort success! */
> @@ -278,11 +266,7 @@ static void doe_statemachine_work(struct work_struct *work)
>  			signal_task_abort(task, -EIO);
>  			return;
>  		}
> -		rc = pci_doe_wait(doe_mb, PCI_DOE_POLL_INTERVAL);
> -		if (rc) {
> -			signal_task_abort(task, rc);
> -			return;
> -		}
> +		msleep_interruptible(PCI_DOE_POLL_INTERVAL_MSECS);
>  		goto retry_resp;
>  	}
>  
> @@ -383,21 +367,6 @@ static void pci_doe_destroy_workqueue(void *mb)
>  	destroy_workqueue(doe_mb->work_queue);
>  }
>  
> -static void pci_doe_flush_mb(void *mb)
> -{
> -	struct pci_doe_mb *doe_mb = mb;
> -
> -	/* Stop all pending work items from starting */
> -	set_bit(PCI_DOE_FLAG_DEAD, &doe_mb->flags);
> -
> -	/* Cancel an in progress work item, if necessary */
> -	set_bit(PCI_DOE_FLAG_CANCEL, &doe_mb->flags);
> -	wake_up(&doe_mb->wq);
> -
> -	/* Flush all work items */
> -	flush_workqueue(doe_mb->work_queue);
> -}
> -
>  /**
>   * pcim_doe_create_mb() - Create a DOE mailbox object
>   *
> @@ -450,14 +419,6 @@ struct pci_doe_mb *pcim_doe_create_mb(struct pci_dev *pdev, u16 cap_offset)
>  		return ERR_PTR(rc);
>  	}
>  
> -	/*
> -	 * The state machine and the mailbox should be in sync now;
> -	 * Set up mailbox flush prior to using the mailbox to query protocols.
> -	 */
> -	rc = devm_add_action_or_reset(dev, pci_doe_flush_mb, doe_mb);
> -	if (rc)
> -		return ERR_PTR(rc);
> -
>  	rc = pci_doe_cache_protocols(doe_mb);
>  	if (rc) {
>  		pci_err(pdev, "[%x] failed to cache protocols : %d\n",
> -- 
> 2.37.2
>
  
Ira Weiny Nov. 28, 2022, 7:41 p.m. UTC | #4
On Mon, Nov 28, 2022 at 09:51:47AM -0800, Alison Schofield wrote:
> On Sun, Nov 27, 2022 at 08:03:37PM -0800, Ira Weiny wrote:
> > From: Ira Weiny <ira.weiny@intel.com>
> > 
> > pci_doe_flush_mb() does not work and is currently unused.
> > 
> > It does not work because each struct doe_mb is managed as part of the
> > PCI device.  They can't go away as long as the PCI device exists.
> > pci_doe_flush_mb() was set up to flush the workqueue and prevent any
> > further submissions to the mailboxes when the PCI device goes away.
> > Unfortunately, this was fundamentally flawed.  There was no guarantee
> > that a struct doe_mb remained after pci_doe_flush_mb() returned.
> > Therefore, the doe_mb state could be invalid when those threads waiting
> > on the workqueue were flushed.
> > 
> > Fortunately the current code is safe because all callers make a
> > synchronous call to pci_doe_submit_task() and maintain a reference on
> > the PCI device.  Therefore pci_doe_flush_mb() is effectively unused.
> > 
> > Rather than attempt to fix pci_doe_flush_mb() just remove the dead code
> > around pci_doe_flush_mb().
> 
> The commit message says "Remove ...." and the commit log only
> talks about removing code, yet an msleep() is added.
> Can those be clearer?

:-/

I'm struggling a bit with this comment.  The patch focus is on removing an
unneeded and effectively unused function.  (It is called but not in a way that
would cause any running task to be aborted.)  The replacement of pci_doe_wait()
with msleep_interruptible() is a side effect and I don't think belongs in the
one liner.

I suppose something like this might work?

PCI/DOE: Replace broken task cancellation with msleep

I guess that makes some sense.  Combined with Ming's suggestion we are still
allowing task cancellation but from a signal rather than PCI device removal.

Ira

> 
> > 
> > Cc: Lukas Wunner <lukas@wunner.de>
> > Cc: Jonathan Cameron <Jonathan.Cameron@Huawei.com>
> > Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> > 
> > ---
> > Changes from V2:
> > 	Lukas
> > 		Clarify commit message.
> > 	Jonathan
> > 		Add comment for changed poll interval.
> > ---
> >  drivers/pci/doe.c | 49 +++++------------------------------------------
> >  1 file changed, 5 insertions(+), 44 deletions(-)
> > 
> > diff --git a/drivers/pci/doe.c b/drivers/pci/doe.c
> > index e402f05068a5..685e7d26c7eb 100644
> > --- a/drivers/pci/doe.c
> > +++ b/drivers/pci/doe.c
> > @@ -24,10 +24,10 @@
> >  
> >  /* Timeout of 1 second from 6.30.2 Operation, PCI Spec r6.0 */
> >  #define PCI_DOE_TIMEOUT HZ
> > -#define PCI_DOE_POLL_INTERVAL	(PCI_DOE_TIMEOUT / 128)
> > +/* Interval to poll mailbox status */
> > +#define PCI_DOE_POLL_INTERVAL_MSECS	8
> >  
> > -#define PCI_DOE_FLAG_CANCEL	0
> > -#define PCI_DOE_FLAG_DEAD	1
> > +#define PCI_DOE_FLAG_DEAD	0
> >  
> >  /**
> >   * struct pci_doe_mb - State for a single DOE mailbox
> > @@ -53,15 +53,6 @@ struct pci_doe_mb {
> >  	unsigned long flags;
> >  };
> >  
> > -static int pci_doe_wait(struct pci_doe_mb *doe_mb, unsigned long timeout)
> > -{
> > -	if (wait_event_timeout(doe_mb->wq,
> > -			       test_bit(PCI_DOE_FLAG_CANCEL, &doe_mb->flags),
> > -			       timeout))
> > -		return -EIO;
> > -	return 0;
> > -}
> > -
> >  static void pci_doe_write_ctrl(struct pci_doe_mb *doe_mb, u32 val)
> >  {
> >  	struct pci_dev *pdev = doe_mb->pdev;
> > @@ -82,12 +73,9 @@ static int pci_doe_abort(struct pci_doe_mb *doe_mb)
> >  	pci_doe_write_ctrl(doe_mb, PCI_DOE_CTRL_ABORT);
> >  
> >  	do {
> > -		int rc;
> >  		u32 val;
> >  
> > -		rc = pci_doe_wait(doe_mb, PCI_DOE_POLL_INTERVAL);
> > -		if (rc)
> > -			return rc;
> > +		msleep_interruptible(PCI_DOE_POLL_INTERVAL_MSECS);
> >  		pci_read_config_dword(pdev, offset + PCI_DOE_STATUS, &val);
> >  
> >  		/* Abort success! */
> > @@ -278,11 +266,7 @@ static void doe_statemachine_work(struct work_struct *work)
> >  			signal_task_abort(task, -EIO);
> >  			return;
> >  		}
> > -		rc = pci_doe_wait(doe_mb, PCI_DOE_POLL_INTERVAL);
> > -		if (rc) {
> > -			signal_task_abort(task, rc);
> > -			return;
> > -		}
> > +		msleep_interruptible(PCI_DOE_POLL_INTERVAL_MSECS);
> >  		goto retry_resp;
> >  	}
> >  
> > @@ -383,21 +367,6 @@ static void pci_doe_destroy_workqueue(void *mb)
> >  	destroy_workqueue(doe_mb->work_queue);
> >  }
> >  
> > -static void pci_doe_flush_mb(void *mb)
> > -{
> > -	struct pci_doe_mb *doe_mb = mb;
> > -
> > -	/* Stop all pending work items from starting */
> > -	set_bit(PCI_DOE_FLAG_DEAD, &doe_mb->flags);
> > -
> > -	/* Cancel an in progress work item, if necessary */
> > -	set_bit(PCI_DOE_FLAG_CANCEL, &doe_mb->flags);
> > -	wake_up(&doe_mb->wq);
> > -
> > -	/* Flush all work items */
> > -	flush_workqueue(doe_mb->work_queue);
> > -}
> > -
> >  /**
> >   * pcim_doe_create_mb() - Create a DOE mailbox object
> >   *
> > @@ -450,14 +419,6 @@ struct pci_doe_mb *pcim_doe_create_mb(struct pci_dev *pdev, u16 cap_offset)
> >  		return ERR_PTR(rc);
> >  	}
> >  
> > -	/*
> > -	 * The state machine and the mailbox should be in sync now;
> > -	 * Set up mailbox flush prior to using the mailbox to query protocols.
> > -	 */
> > -	rc = devm_add_action_or_reset(dev, pci_doe_flush_mb, doe_mb);
> > -	if (rc)
> > -		return ERR_PTR(rc);
> > -
> >  	rc = pci_doe_cache_protocols(doe_mb);
> >  	if (rc) {
> >  		pci_err(pdev, "[%x] failed to cache protocols : %d\n",
> > -- 
> > 2.37.2
> >
  

Patch

diff --git a/drivers/pci/doe.c b/drivers/pci/doe.c
index e402f05068a5..685e7d26c7eb 100644
--- a/drivers/pci/doe.c
+++ b/drivers/pci/doe.c
@@ -24,10 +24,10 @@ 
 
 /* Timeout of 1 second from 6.30.2 Operation, PCI Spec r6.0 */
 #define PCI_DOE_TIMEOUT HZ
-#define PCI_DOE_POLL_INTERVAL	(PCI_DOE_TIMEOUT / 128)
+/* Interval to poll mailbox status */
+#define PCI_DOE_POLL_INTERVAL_MSECS	8
 
-#define PCI_DOE_FLAG_CANCEL	0
-#define PCI_DOE_FLAG_DEAD	1
+#define PCI_DOE_FLAG_DEAD	0
 
 /**
  * struct pci_doe_mb - State for a single DOE mailbox
@@ -53,15 +53,6 @@  struct pci_doe_mb {
 	unsigned long flags;
 };
 
-static int pci_doe_wait(struct pci_doe_mb *doe_mb, unsigned long timeout)
-{
-	if (wait_event_timeout(doe_mb->wq,
-			       test_bit(PCI_DOE_FLAG_CANCEL, &doe_mb->flags),
-			       timeout))
-		return -EIO;
-	return 0;
-}
-
 static void pci_doe_write_ctrl(struct pci_doe_mb *doe_mb, u32 val)
 {
 	struct pci_dev *pdev = doe_mb->pdev;
@@ -82,12 +73,9 @@  static int pci_doe_abort(struct pci_doe_mb *doe_mb)
 	pci_doe_write_ctrl(doe_mb, PCI_DOE_CTRL_ABORT);
 
 	do {
-		int rc;
 		u32 val;
 
-		rc = pci_doe_wait(doe_mb, PCI_DOE_POLL_INTERVAL);
-		if (rc)
-			return rc;
+		msleep_interruptible(PCI_DOE_POLL_INTERVAL_MSECS);
 		pci_read_config_dword(pdev, offset + PCI_DOE_STATUS, &val);
 
 		/* Abort success! */
@@ -278,11 +266,7 @@  static void doe_statemachine_work(struct work_struct *work)
 			signal_task_abort(task, -EIO);
 			return;
 		}
-		rc = pci_doe_wait(doe_mb, PCI_DOE_POLL_INTERVAL);
-		if (rc) {
-			signal_task_abort(task, rc);
-			return;
-		}
+		msleep_interruptible(PCI_DOE_POLL_INTERVAL_MSECS);
 		goto retry_resp;
 	}
 
@@ -383,21 +367,6 @@  static void pci_doe_destroy_workqueue(void *mb)
 	destroy_workqueue(doe_mb->work_queue);
 }
 
-static void pci_doe_flush_mb(void *mb)
-{
-	struct pci_doe_mb *doe_mb = mb;
-
-	/* Stop all pending work items from starting */
-	set_bit(PCI_DOE_FLAG_DEAD, &doe_mb->flags);
-
-	/* Cancel an in progress work item, if necessary */
-	set_bit(PCI_DOE_FLAG_CANCEL, &doe_mb->flags);
-	wake_up(&doe_mb->wq);
-
-	/* Flush all work items */
-	flush_workqueue(doe_mb->work_queue);
-}
-
 /**
  * pcim_doe_create_mb() - Create a DOE mailbox object
  *
@@ -450,14 +419,6 @@  struct pci_doe_mb *pcim_doe_create_mb(struct pci_dev *pdev, u16 cap_offset)
 		return ERR_PTR(rc);
 	}
 
-	/*
-	 * The state machine and the mailbox should be in sync now;
-	 * Set up mailbox flush prior to using the mailbox to query protocols.
-	 */
-	rc = devm_add_action_or_reset(dev, pci_doe_flush_mb, doe_mb);
-	if (rc)
-		return ERR_PTR(rc);
-
 	rc = pci_doe_cache_protocols(doe_mb);
 	if (rc) {
 		pci_err(pdev, "[%x] failed to cache protocols : %d\n",