PCI/doe: Fix work struct declaration

Message ID 20221115011943.1051039-1-ira.weiny@intel.com
State New
Headers
Series PCI/doe: Fix work struct declaration |

Commit Message

Ira Weiny Nov. 15, 2022, 1:19 a.m. UTC
  From: Ira Weiny <ira.weiny@intel.com>

The callers of pci_doe_submit_task() allocate the pci_doe_task on the
stack.  This causes the work structure to be allocated on the stack
without pci_doe_submit_task() knowing.  Work item initialization needs
to be done with either INIT_WORK_ONSTACK() or INIT_WORK() depending on
how the work item is allocated.

Jonathan suggested creating doe task allocation macros such as
DECLARE_CDAT_DOE_TASK_ONSTACK().[1]  The issue with this is the work
function is not known to the callers and must be initialized correctly.

A follow up suggestion was to have an internal 'pci_doe_work' item
allocated by pci_doe_submit_task().[2]  This requires an allocation which
could restrict the context where tasks are used.

Compromise with an intermediate step to initialize the task struct with
a new call pci_doe_init_task() which must be called prior to submit
task.

[1] https://lore.kernel.org/linux-cxl/20221014151045.24781-1-Jonathan.Cameron@huawei.com/T/#m88a7f50dcce52f30c8bf5c3dcc06fa9843b54a2d
[2] https://lore.kernel.org/linux-cxl/20221014151045.24781-1-Jonathan.Cameron@huawei.com/T/#m63c636c5135f304480370924f4d03c00357be667

Cc: Bjorn Helgaas <helgaas@kernel.org>
Reported-by: Gregory Price <gregory.price@memverge.com>
Reported-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Suggested-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Ira Weiny <ira.weiny@intel.com>
---
 drivers/cxl/core/pci.c  |  2 ++
 drivers/pci/doe.c       | 14 ++++++++++++--
 include/linux/pci-doe.h |  8 +++++---
 3 files changed, 19 insertions(+), 5 deletions(-)


base-commit: 30a0b95b1335e12efef89dd78518ed3e4a71a763
  

Comments

Jonathan Cameron Nov. 15, 2022, 11:13 a.m. UTC | #1
On Mon, 14 Nov 2022 17:19:43 -0800
ira.weiny@intel.com wrote:

> From: Ira Weiny <ira.weiny@intel.com>
> 
> The callers of pci_doe_submit_task() allocate the pci_doe_task on the
> stack.  This causes the work structure to be allocated on the stack
> without pci_doe_submit_task() knowing.  Work item initialization needs
> to be done with either INIT_WORK_ONSTACK() or INIT_WORK() depending on
> how the work item is allocated.
> 
> Jonathan suggested creating doe task allocation macros such as
> DECLARE_CDAT_DOE_TASK_ONSTACK().[1]  The issue with this is the work
> function is not known to the callers and must be initialized correctly.
> 
> A follow up suggestion was to have an internal 'pci_doe_work' item
> allocated by pci_doe_submit_task().[2]  This requires an allocation which
> could restrict the context where tasks are used.
> 
> Compromise with an intermediate step to initialize the task struct with
> a new call pci_doe_init_task() which must be called prior to submit
> task.
> 
> [1] https://lore.kernel.org/linux-cxl/20221014151045.24781-1-Jonathan.Cameron@huawei.com/T/#m88a7f50dcce52f30c8bf5c3dcc06fa9843b54a2d
> [2] https://lore.kernel.org/linux-cxl/20221014151045.24781-1-Jonathan.Cameron@huawei.com/T/#m63c636c5135f304480370924f4d03c00357be667
> 
> Cc: Bjorn Helgaas <helgaas@kernel.org>
> Reported-by: Gregory Price <gregory.price@memverge.com>
> Reported-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> Suggested-by: Dan Williams <dan.j.williams@intel.com>
> Signed-off-by: Ira Weiny <ira.weiny@intel.com>
Looks like a good solution to me.

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>


> ---
>  drivers/cxl/core/pci.c  |  2 ++
>  drivers/pci/doe.c       | 14 ++++++++++++--
>  include/linux/pci-doe.h |  8 +++++---
>  3 files changed, 19 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
> index 9240df53ed87..a19c1fa0e2f4 100644
> --- a/drivers/cxl/core/pci.c
> +++ b/drivers/cxl/core/pci.c
> @@ -525,6 +525,7 @@ static int cxl_cdat_get_length(struct device *dev,
>  	DECLARE_CDAT_DOE_TASK(CDAT_DOE_REQ(0), t);
>  	int rc;
>  
> +	pci_doe_init_task(cdat_doe, &t.task, true);
>  	rc = pci_doe_submit_task(cdat_doe, &t.task);
>  	if (rc < 0) {
>  		dev_err(dev, "DOE submit failed: %d", rc);
> @@ -554,6 +555,7 @@ static int cxl_cdat_read_table(struct device *dev,
>  		u32 *entry;
>  		int rc;
>  
> +		pci_doe_init_task(cdat_doe, &t.task, true);
>  		rc = pci_doe_submit_task(cdat_doe, &t.task);
>  		if (rc < 0) {
>  			dev_err(dev, "DOE submit failed: %d", rc);
> diff --git a/drivers/pci/doe.c b/drivers/pci/doe.c
> index e402f05068a5..cabeae4ae955 100644
> --- a/drivers/pci/doe.c
> +++ b/drivers/pci/doe.c
> @@ -319,6 +319,7 @@ static int pci_doe_discovery(struct pci_doe_mb *doe_mb, u8 *index, u16 *vid,
>  	};
>  	int rc;
>  
> +	pci_doe_init_task(doe_mb, &task, true);
>  	rc = pci_doe_submit_task(doe_mb, &task);
>  	if (rc < 0)
>  		return rc;
> @@ -495,6 +496,14 @@ bool pci_doe_supports_prot(struct pci_doe_mb *doe_mb, u16 vid, u8 type)
>  }
>  EXPORT_SYMBOL_GPL(pci_doe_supports_prot);
>  
> +void pci_doe_init_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task,
> +		       bool onstack)
> +{
> +	task->doe_mb = doe_mb;
> +	__INIT_WORK(&task->work, doe_statemachine_work, onstack);
> +}
> +EXPORT_SYMBOL_GPL(pci_doe_init_task);
> +
>  /**
>   * pci_doe_submit_task() - Submit a task to be processed by the state machine
>   *
> @@ -517,6 +526,9 @@ int pci_doe_submit_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task)
>  	if (!pci_doe_supports_prot(doe_mb, task->prot.vid, task->prot.type))
>  		return -EINVAL;
>  
> +	if (WARN_ON_ONCE(task->work.func != doe_statemachine_work))
> +		return -EINVAL;
> +
>  	/*
>  	 * DOE requests must be a whole number of DW and the response needs to
>  	 * be big enough for at least 1 DW
> @@ -528,8 +540,6 @@ int pci_doe_submit_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task)
>  	if (test_bit(PCI_DOE_FLAG_DEAD, &doe_mb->flags))
>  		return -EIO;
>  
> -	task->doe_mb = doe_mb;
> -	INIT_WORK(&task->work, doe_statemachine_work);
>  	queue_work(doe_mb->work_queue, &task->work);
>  	return 0;
>  }
> diff --git a/include/linux/pci-doe.h b/include/linux/pci-doe.h
> index ed9b4df792b8..457fc0e53d64 100644
> --- a/include/linux/pci-doe.h
> +++ b/include/linux/pci-doe.h
> @@ -31,8 +31,8 @@ struct pci_doe_mb;
>   * @rv: Return value.  Length of received response or error (bytes)
>   * @complete: Called when task is complete
>   * @private: Private data for the consumer
> - * @work: Used internally by the mailbox
> - * @doe_mb: Used internally by the mailbox
> + * @work: Used internally by the mailbox [see pci_doe_init_task()]
> + * @doe_mb: Used internally by the mailbox [see pci_doe_init_task()]
>   *
>   * The payload sizes and rv are specified in bytes with the following
>   * restrictions concerning the protocol.
> @@ -53,7 +53,7 @@ struct pci_doe_task {
>  	void (*complete)(struct pci_doe_task *task);
>  	void *private;
>  
> -	/* No need for the user to initialize these fields */
> +	/* Call pci_doe_init_task() for these */
>  	struct work_struct work;
>  	struct pci_doe_mb *doe_mb;
>  };
> @@ -72,6 +72,8 @@ struct pci_doe_task {
>  
>  struct pci_doe_mb *pcim_doe_create_mb(struct pci_dev *pdev, u16 cap_offset);
>  bool pci_doe_supports_prot(struct pci_doe_mb *doe_mb, u16 vid, u8 type);
> +void pci_doe_init_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task,
> +		       bool onstack);
>  int pci_doe_submit_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task);
>  
>  #endif
> 
> base-commit: 30a0b95b1335e12efef89dd78518ed3e4a71a763
  
Bjorn Helgaas Nov. 15, 2022, 7:44 p.m. UTC | #2
Hi Ira,

Can you fix the subject to follow capitalization convention (use "git
log --oneline") and say something more specific than "fix struct"?

On Mon, Nov 14, 2022 at 05:19:43PM -0800, ira.weiny@intel.com wrote:
> From: Ira Weiny <ira.weiny@intel.com>
> 
> The callers of pci_doe_submit_task() allocate the pci_doe_task on the
> stack.  This causes the work structure to be allocated on the stack
> without pci_doe_submit_task() knowing.  Work item initialization needs
> to be done with either INIT_WORK_ONSTACK() or INIT_WORK() depending on
> how the work item is allocated.
> 
> Jonathan suggested creating doe task allocation macros such as
> DECLARE_CDAT_DOE_TASK_ONSTACK().[1]  The issue with this is the work
> function is not known to the callers and must be initialized correctly.
> 
> A follow up suggestion was to have an internal 'pci_doe_work' item
> allocated by pci_doe_submit_task().[2]  This requires an allocation which
> could restrict the context where tasks are used.
> 
> Compromise with an intermediate step to initialize the task struct with
> a new call pci_doe_init_task() which must be called prior to submit
> task.

I'm not really a fan of passing a parameter to say "this struct is on
the stack" because that seems kind of error-prone and I don't know
what the consequence of getting it wrong would be.  Sounds like it
*could* be some memory corruption or reading garbage data that would
be hard to debug.

Do we have cases today where pci_doe_submit_task() can't do the
kzalloc() as in your patch at [3]?  If the current use cases allow a
kzalloc(), why not do that now and defer this until it becomes an
issue?

Bjorn

> [1] https://lore.kernel.org/linux-cxl/20221014151045.24781-1-Jonathan.Cameron@huawei.com/T/#m88a7f50dcce52f30c8bf5c3dcc06fa9843b54a2d
> [2] https://lore.kernel.org/linux-cxl/20221014151045.24781-1-Jonathan.Cameron@huawei.com/T/#m63c636c5135f304480370924f4d03c00357be667

[3] https://lore.kernel.org/linux-cxl/Y2AnKB88ALYm9c5L@iweiny-desk3/

> Cc: Bjorn Helgaas <helgaas@kernel.org>
> Reported-by: Gregory Price <gregory.price@memverge.com>
> Reported-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> Suggested-by: Dan Williams <dan.j.williams@intel.com>
> Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> ---
>  drivers/cxl/core/pci.c  |  2 ++
>  drivers/pci/doe.c       | 14 ++++++++++++--
>  include/linux/pci-doe.h |  8 +++++---
>  3 files changed, 19 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
> index 9240df53ed87..a19c1fa0e2f4 100644
> --- a/drivers/cxl/core/pci.c
> +++ b/drivers/cxl/core/pci.c
> @@ -525,6 +525,7 @@ static int cxl_cdat_get_length(struct device *dev,
>  	DECLARE_CDAT_DOE_TASK(CDAT_DOE_REQ(0), t);
>  	int rc;
>  
> +	pci_doe_init_task(cdat_doe, &t.task, true);
>  	rc = pci_doe_submit_task(cdat_doe, &t.task);
>  	if (rc < 0) {
>  		dev_err(dev, "DOE submit failed: %d", rc);
> @@ -554,6 +555,7 @@ static int cxl_cdat_read_table(struct device *dev,
>  		u32 *entry;
>  		int rc;
>  
> +		pci_doe_init_task(cdat_doe, &t.task, true);
>  		rc = pci_doe_submit_task(cdat_doe, &t.task);
>  		if (rc < 0) {
>  			dev_err(dev, "DOE submit failed: %d", rc);
> diff --git a/drivers/pci/doe.c b/drivers/pci/doe.c
> index e402f05068a5..cabeae4ae955 100644
> --- a/drivers/pci/doe.c
> +++ b/drivers/pci/doe.c
> @@ -319,6 +319,7 @@ static int pci_doe_discovery(struct pci_doe_mb *doe_mb, u8 *index, u16 *vid,
>  	};
>  	int rc;
>  
> +	pci_doe_init_task(doe_mb, &task, true);
>  	rc = pci_doe_submit_task(doe_mb, &task);
>  	if (rc < 0)
>  		return rc;
> @@ -495,6 +496,14 @@ bool pci_doe_supports_prot(struct pci_doe_mb *doe_mb, u16 vid, u8 type)
>  }
>  EXPORT_SYMBOL_GPL(pci_doe_supports_prot);
>  
> +void pci_doe_init_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task,
> +		       bool onstack)
> +{
> +	task->doe_mb = doe_mb;
> +	__INIT_WORK(&task->work, doe_statemachine_work, onstack);
> +}
> +EXPORT_SYMBOL_GPL(pci_doe_init_task);
> +
>  /**
>   * pci_doe_submit_task() - Submit a task to be processed by the state machine
>   *
> @@ -517,6 +526,9 @@ int pci_doe_submit_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task)
>  	if (!pci_doe_supports_prot(doe_mb, task->prot.vid, task->prot.type))
>  		return -EINVAL;
>  
> +	if (WARN_ON_ONCE(task->work.func != doe_statemachine_work))
> +		return -EINVAL;
> +
>  	/*
>  	 * DOE requests must be a whole number of DW and the response needs to
>  	 * be big enough for at least 1 DW
> @@ -528,8 +540,6 @@ int pci_doe_submit_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task)
>  	if (test_bit(PCI_DOE_FLAG_DEAD, &doe_mb->flags))
>  		return -EIO;
>  
> -	task->doe_mb = doe_mb;
> -	INIT_WORK(&task->work, doe_statemachine_work);
>  	queue_work(doe_mb->work_queue, &task->work);
>  	return 0;
>  }
> diff --git a/include/linux/pci-doe.h b/include/linux/pci-doe.h
> index ed9b4df792b8..457fc0e53d64 100644
> --- a/include/linux/pci-doe.h
> +++ b/include/linux/pci-doe.h
> @@ -31,8 +31,8 @@ struct pci_doe_mb;
>   * @rv: Return value.  Length of received response or error (bytes)
>   * @complete: Called when task is complete
>   * @private: Private data for the consumer
> - * @work: Used internally by the mailbox
> - * @doe_mb: Used internally by the mailbox
> + * @work: Used internally by the mailbox [see pci_doe_init_task()]
> + * @doe_mb: Used internally by the mailbox [see pci_doe_init_task()]
>   *
>   * The payload sizes and rv are specified in bytes with the following
>   * restrictions concerning the protocol.
> @@ -53,7 +53,7 @@ struct pci_doe_task {
>  	void (*complete)(struct pci_doe_task *task);
>  	void *private;
>  
> -	/* No need for the user to initialize these fields */
> +	/* Call pci_doe_init_task() for these */
>  	struct work_struct work;
>  	struct pci_doe_mb *doe_mb;
>  };
> @@ -72,6 +72,8 @@ struct pci_doe_task {
>  
>  struct pci_doe_mb *pcim_doe_create_mb(struct pci_dev *pdev, u16 cap_offset);
>  bool pci_doe_supports_prot(struct pci_doe_mb *doe_mb, u16 vid, u8 type);
> +void pci_doe_init_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task,
> +		       bool onstack);
>  int pci_doe_submit_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task);
>  
>  #endif
> 
> base-commit: 30a0b95b1335e12efef89dd78518ed3e4a71a763
> -- 
> 2.37.2
>
  
Ira Weiny Nov. 15, 2022, 8:18 p.m. UTC | #3
On Tue, Nov 15, 2022 at 01:44:24PM -0600, Bjorn Helgaas wrote:
> Hi Ira,
> 
> Can you fix the subject to follow capitalization convention (use "git
> log --oneline")
>

My apologies.  I should have capitalized DOE.

>
> and say something more specific than "fix struct"?

How about?

PCI/DOE: Fix initialization of work struct in pci_doe_task

Thanks for the review,
Ira

> 
> On Mon, Nov 14, 2022 at 05:19:43PM -0800, ira.weiny@intel.com wrote:
> > From: Ira Weiny <ira.weiny@intel.com>
> > 
> > The callers of pci_doe_submit_task() allocate the pci_doe_task on the
> > stack.  This causes the work structure to be allocated on the stack
> > without pci_doe_submit_task() knowing.  Work item initialization needs
> > to be done with either INIT_WORK_ONSTACK() or INIT_WORK() depending on
> > how the work item is allocated.
> > 
> > Jonathan suggested creating doe task allocation macros such as
> > DECLARE_CDAT_DOE_TASK_ONSTACK().[1]  The issue with this is the work
> > function is not known to the callers and must be initialized correctly.
> > 
> > A follow up suggestion was to have an internal 'pci_doe_work' item
> > allocated by pci_doe_submit_task().[2]  This requires an allocation which
> > could restrict the context where tasks are used.
> > 
> > Compromise with an intermediate step to initialize the task struct with
> > a new call pci_doe_init_task() which must be called prior to submit
> > task.
> 
> I'm not really a fan of passing a parameter to say "this struct is on
> the stack" because that seems kind of error-prone and I don't know
> what the consequence of getting it wrong would be.  Sounds like it
> *could* be some memory corruption or reading garbage data that would
> be hard to debug.
> 
> Do we have cases today where pci_doe_submit_task() can't do the
> kzalloc() as in your patch at [3]?  If the current use cases allow a
> kzalloc(), why not do that now and defer this until it becomes an
> issue?
> 
> Bjorn
> 
> > [1] https://lore.kernel.org/linux-cxl/20221014151045.24781-1-Jonathan.Cameron@huawei.com/T/#m88a7f50dcce52f30c8bf5c3dcc06fa9843b54a2d
> > [2] https://lore.kernel.org/linux-cxl/20221014151045.24781-1-Jonathan.Cameron@huawei.com/T/#m63c636c5135f304480370924f4d03c00357be667
> 
> [3] https://lore.kernel.org/linux-cxl/Y2AnKB88ALYm9c5L@iweiny-desk3/
> 
> > Cc: Bjorn Helgaas <helgaas@kernel.org>
> > Reported-by: Gregory Price <gregory.price@memverge.com>
> > Reported-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> > Suggested-by: Dan Williams <dan.j.williams@intel.com>
> > Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> > ---
> >  drivers/cxl/core/pci.c  |  2 ++
> >  drivers/pci/doe.c       | 14 ++++++++++++--
> >  include/linux/pci-doe.h |  8 +++++---
> >  3 files changed, 19 insertions(+), 5 deletions(-)
> > 
> > diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
> > index 9240df53ed87..a19c1fa0e2f4 100644
> > --- a/drivers/cxl/core/pci.c
> > +++ b/drivers/cxl/core/pci.c
> > @@ -525,6 +525,7 @@ static int cxl_cdat_get_length(struct device *dev,
> >  	DECLARE_CDAT_DOE_TASK(CDAT_DOE_REQ(0), t);
> >  	int rc;
> >  
> > +	pci_doe_init_task(cdat_doe, &t.task, true);
> >  	rc = pci_doe_submit_task(cdat_doe, &t.task);
> >  	if (rc < 0) {
> >  		dev_err(dev, "DOE submit failed: %d", rc);
> > @@ -554,6 +555,7 @@ static int cxl_cdat_read_table(struct device *dev,
> >  		u32 *entry;
> >  		int rc;
> >  
> > +		pci_doe_init_task(cdat_doe, &t.task, true);
> >  		rc = pci_doe_submit_task(cdat_doe, &t.task);
> >  		if (rc < 0) {
> >  			dev_err(dev, "DOE submit failed: %d", rc);
> > diff --git a/drivers/pci/doe.c b/drivers/pci/doe.c
> > index e402f05068a5..cabeae4ae955 100644
> > --- a/drivers/pci/doe.c
> > +++ b/drivers/pci/doe.c
> > @@ -319,6 +319,7 @@ static int pci_doe_discovery(struct pci_doe_mb *doe_mb, u8 *index, u16 *vid,
> >  	};
> >  	int rc;
> >  
> > +	pci_doe_init_task(doe_mb, &task, true);
> >  	rc = pci_doe_submit_task(doe_mb, &task);
> >  	if (rc < 0)
> >  		return rc;
> > @@ -495,6 +496,14 @@ bool pci_doe_supports_prot(struct pci_doe_mb *doe_mb, u16 vid, u8 type)
> >  }
> >  EXPORT_SYMBOL_GPL(pci_doe_supports_prot);
> >  
> > +void pci_doe_init_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task,
> > +		       bool onstack)
> > +{
> > +	task->doe_mb = doe_mb;
> > +	__INIT_WORK(&task->work, doe_statemachine_work, onstack);
> > +}
> > +EXPORT_SYMBOL_GPL(pci_doe_init_task);
> > +
> >  /**
> >   * pci_doe_submit_task() - Submit a task to be processed by the state machine
> >   *
> > @@ -517,6 +526,9 @@ int pci_doe_submit_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task)
> >  	if (!pci_doe_supports_prot(doe_mb, task->prot.vid, task->prot.type))
> >  		return -EINVAL;
> >  
> > +	if (WARN_ON_ONCE(task->work.func != doe_statemachine_work))
> > +		return -EINVAL;
> > +
> >  	/*
> >  	 * DOE requests must be a whole number of DW and the response needs to
> >  	 * be big enough for at least 1 DW
> > @@ -528,8 +540,6 @@ int pci_doe_submit_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task)
> >  	if (test_bit(PCI_DOE_FLAG_DEAD, &doe_mb->flags))
> >  		return -EIO;
> >  
> > -	task->doe_mb = doe_mb;
> > -	INIT_WORK(&task->work, doe_statemachine_work);
> >  	queue_work(doe_mb->work_queue, &task->work);
> >  	return 0;
> >  }
> > diff --git a/include/linux/pci-doe.h b/include/linux/pci-doe.h
> > index ed9b4df792b8..457fc0e53d64 100644
> > --- a/include/linux/pci-doe.h
> > +++ b/include/linux/pci-doe.h
> > @@ -31,8 +31,8 @@ struct pci_doe_mb;
> >   * @rv: Return value.  Length of received response or error (bytes)
> >   * @complete: Called when task is complete
> >   * @private: Private data for the consumer
> > - * @work: Used internally by the mailbox
> > - * @doe_mb: Used internally by the mailbox
> > + * @work: Used internally by the mailbox [see pci_doe_init_task()]
> > + * @doe_mb: Used internally by the mailbox [see pci_doe_init_task()]
> >   *
> >   * The payload sizes and rv are specified in bytes with the following
> >   * restrictions concerning the protocol.
> > @@ -53,7 +53,7 @@ struct pci_doe_task {
> >  	void (*complete)(struct pci_doe_task *task);
> >  	void *private;
> >  
> > -	/* No need for the user to initialize these fields */
> > +	/* Call pci_doe_init_task() for these */
> >  	struct work_struct work;
> >  	struct pci_doe_mb *doe_mb;
> >  };
> > @@ -72,6 +72,8 @@ struct pci_doe_task {
> >  
> >  struct pci_doe_mb *pcim_doe_create_mb(struct pci_dev *pdev, u16 cap_offset);
> >  bool pci_doe_supports_prot(struct pci_doe_mb *doe_mb, u16 vid, u8 type);
> > +void pci_doe_init_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task,
> > +		       bool onstack);
> >  int pci_doe_submit_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task);
> >  
> >  #endif
> > 
> > base-commit: 30a0b95b1335e12efef89dd78518ed3e4a71a763
> > -- 
> > 2.37.2
> >
  
Bjorn Helgaas Nov. 15, 2022, 8:41 p.m. UTC | #4
On Tue, Nov 15, 2022 at 12:18:38PM -0800, Ira Weiny wrote:
> On Tue, Nov 15, 2022 at 01:44:24PM -0600, Bjorn Helgaas wrote:
> > and say something more specific than "fix struct"?
> 
> How about?
> 
> PCI/DOE: Fix initialization of work struct in pci_doe_task

The importance of this has to do with whether something is on the
stack, so I think something about that would be useful.

I'm afraid this subject line bike-shedding has made you overlook my
other questions below ...

> > On Mon, Nov 14, 2022 at 05:19:43PM -0800, ira.weiny@intel.com wrote:
> > > From: Ira Weiny <ira.weiny@intel.com>
> > > 
> > > The callers of pci_doe_submit_task() allocate the pci_doe_task on the
> > > stack.  This causes the work structure to be allocated on the stack
> > > without pci_doe_submit_task() knowing.  Work item initialization needs
> > > to be done with either INIT_WORK_ONSTACK() or INIT_WORK() depending on
> > > how the work item is allocated.
> > > 
> > > Jonathan suggested creating doe task allocation macros such as
> > > DECLARE_CDAT_DOE_TASK_ONSTACK().[1]  The issue with this is the work
> > > function is not known to the callers and must be initialized correctly.
> > > 
> > > A follow up suggestion was to have an internal 'pci_doe_work' item
> > > allocated by pci_doe_submit_task().[2]  This requires an allocation which
> > > could restrict the context where tasks are used.
> > > 
> > > Compromise with an intermediate step to initialize the task struct with
> > > a new call pci_doe_init_task() which must be called prior to submit
> > > task.
> > 
> > I'm not really a fan of passing a parameter to say "this struct is on
> > the stack" because that seems kind of error-prone and I don't know
> > what the consequence of getting it wrong would be.  Sounds like it
> > *could* be some memory corruption or reading garbage data that would
> > be hard to debug.
> > 
> > Do we have cases today where pci_doe_submit_task() can't do the
> > kzalloc() as in your patch at [3]?  If the current use cases allow a
> > kzalloc(), why not do that now and defer this until it becomes an
> > issue?
> > 
> > Bjorn
> > 
> > > [1] https://lore.kernel.org/linux-cxl/20221014151045.24781-1-Jonathan.Cameron@huawei.com/T/#m88a7f50dcce52f30c8bf5c3dcc06fa9843b54a2d
> > > [2] https://lore.kernel.org/linux-cxl/20221014151045.24781-1-Jonathan.Cameron@huawei.com/T/#m63c636c5135f304480370924f4d03c00357be667
> > 
> > [3] https://lore.kernel.org/linux-cxl/Y2AnKB88ALYm9c5L@iweiny-desk3/
> > 
> > > Cc: Bjorn Helgaas <helgaas@kernel.org>
> > > Reported-by: Gregory Price <gregory.price@memverge.com>
> > > Reported-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> > > Suggested-by: Dan Williams <dan.j.williams@intel.com>
> > > Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> > > ---
> > >  drivers/cxl/core/pci.c  |  2 ++
> > >  drivers/pci/doe.c       | 14 ++++++++++++--
> > >  include/linux/pci-doe.h |  8 +++++---
> > >  3 files changed, 19 insertions(+), 5 deletions(-)
> > > 
> > > diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
> > > index 9240df53ed87..a19c1fa0e2f4 100644
> > > --- a/drivers/cxl/core/pci.c
> > > +++ b/drivers/cxl/core/pci.c
> > > @@ -525,6 +525,7 @@ static int cxl_cdat_get_length(struct device *dev,
> > >  	DECLARE_CDAT_DOE_TASK(CDAT_DOE_REQ(0), t);
> > >  	int rc;
> > >  
> > > +	pci_doe_init_task(cdat_doe, &t.task, true);
> > >  	rc = pci_doe_submit_task(cdat_doe, &t.task);
> > >  	if (rc < 0) {
> > >  		dev_err(dev, "DOE submit failed: %d", rc);
> > > @@ -554,6 +555,7 @@ static int cxl_cdat_read_table(struct device *dev,
> > >  		u32 *entry;
> > >  		int rc;
> > >  
> > > +		pci_doe_init_task(cdat_doe, &t.task, true);
> > >  		rc = pci_doe_submit_task(cdat_doe, &t.task);
> > >  		if (rc < 0) {
> > >  			dev_err(dev, "DOE submit failed: %d", rc);
> > > diff --git a/drivers/pci/doe.c b/drivers/pci/doe.c
> > > index e402f05068a5..cabeae4ae955 100644
> > > --- a/drivers/pci/doe.c
> > > +++ b/drivers/pci/doe.c
> > > @@ -319,6 +319,7 @@ static int pci_doe_discovery(struct pci_doe_mb *doe_mb, u8 *index, u16 *vid,
> > >  	};
> > >  	int rc;
> > >  
> > > +	pci_doe_init_task(doe_mb, &task, true);
> > >  	rc = pci_doe_submit_task(doe_mb, &task);
> > >  	if (rc < 0)
> > >  		return rc;
> > > @@ -495,6 +496,14 @@ bool pci_doe_supports_prot(struct pci_doe_mb *doe_mb, u16 vid, u8 type)
> > >  }
> > >  EXPORT_SYMBOL_GPL(pci_doe_supports_prot);
> > >  
> > > +void pci_doe_init_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task,
> > > +		       bool onstack)
> > > +{
> > > +	task->doe_mb = doe_mb;
> > > +	__INIT_WORK(&task->work, doe_statemachine_work, onstack);
> > > +}
> > > +EXPORT_SYMBOL_GPL(pci_doe_init_task);
> > > +
> > >  /**
> > >   * pci_doe_submit_task() - Submit a task to be processed by the state machine
> > >   *
> > > @@ -517,6 +526,9 @@ int pci_doe_submit_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task)
> > >  	if (!pci_doe_supports_prot(doe_mb, task->prot.vid, task->prot.type))
> > >  		return -EINVAL;
> > >  
> > > +	if (WARN_ON_ONCE(task->work.func != doe_statemachine_work))
> > > +		return -EINVAL;
> > > +
> > >  	/*
> > >  	 * DOE requests must be a whole number of DW and the response needs to
> > >  	 * be big enough for at least 1 DW
> > > @@ -528,8 +540,6 @@ int pci_doe_submit_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task)
> > >  	if (test_bit(PCI_DOE_FLAG_DEAD, &doe_mb->flags))
> > >  		return -EIO;
> > >  
> > > -	task->doe_mb = doe_mb;
> > > -	INIT_WORK(&task->work, doe_statemachine_work);
> > >  	queue_work(doe_mb->work_queue, &task->work);
> > >  	return 0;
> > >  }
> > > diff --git a/include/linux/pci-doe.h b/include/linux/pci-doe.h
> > > index ed9b4df792b8..457fc0e53d64 100644
> > > --- a/include/linux/pci-doe.h
> > > +++ b/include/linux/pci-doe.h
> > > @@ -31,8 +31,8 @@ struct pci_doe_mb;
> > >   * @rv: Return value.  Length of received response or error (bytes)
> > >   * @complete: Called when task is complete
> > >   * @private: Private data for the consumer
> > > - * @work: Used internally by the mailbox
> > > - * @doe_mb: Used internally by the mailbox
> > > + * @work: Used internally by the mailbox [see pci_doe_init_task()]
> > > + * @doe_mb: Used internally by the mailbox [see pci_doe_init_task()]
> > >   *
> > >   * The payload sizes and rv are specified in bytes with the following
> > >   * restrictions concerning the protocol.
> > > @@ -53,7 +53,7 @@ struct pci_doe_task {
> > >  	void (*complete)(struct pci_doe_task *task);
> > >  	void *private;
> > >  
> > > -	/* No need for the user to initialize these fields */
> > > +	/* Call pci_doe_init_task() for these */
> > >  	struct work_struct work;
> > >  	struct pci_doe_mb *doe_mb;
> > >  };
> > > @@ -72,6 +72,8 @@ struct pci_doe_task {
> > >  
> > >  struct pci_doe_mb *pcim_doe_create_mb(struct pci_dev *pdev, u16 cap_offset);
> > >  bool pci_doe_supports_prot(struct pci_doe_mb *doe_mb, u16 vid, u8 type);
> > > +void pci_doe_init_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task,
> > > +		       bool onstack);
> > >  int pci_doe_submit_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task);
> > >  
> > >  #endif
> > > 
> > > base-commit: 30a0b95b1335e12efef89dd78518ed3e4a71a763
> > > -- 
> > > 2.37.2
> > >
  
Ira Weiny Nov. 15, 2022, 8:54 p.m. UTC | #5
On Tue, Nov 15, 2022 at 02:41:35PM -0600, Bjorn Helgaas wrote:
> On Tue, Nov 15, 2022 at 12:18:38PM -0800, Ira Weiny wrote:
> > On Tue, Nov 15, 2022 at 01:44:24PM -0600, Bjorn Helgaas wrote:
> > > and say something more specific than "fix struct"?
> > 
> > How about?
> > 
> > PCI/DOE: Fix initialization of work struct in pci_doe_task
> 
> The importance of this has to do with whether something is on the
> stack, so I think something about that would be useful.
> 
> I'm afraid this subject line bike-shedding has made you overlook my
> other questions below ...

Oh...  I see now.

> 
> > > On Mon, Nov 14, 2022 at 05:19:43PM -0800, ira.weiny@intel.com wrote:
> > > > From: Ira Weiny <ira.weiny@intel.com>
> > > > 
> > > > The callers of pci_doe_submit_task() allocate the pci_doe_task on the
> > > > stack.  This causes the work structure to be allocated on the stack
> > > > without pci_doe_submit_task() knowing.  Work item initialization needs
> > > > to be done with either INIT_WORK_ONSTACK() or INIT_WORK() depending on
> > > > how the work item is allocated.
> > > > 
> > > > Jonathan suggested creating doe task allocation macros such as
> > > > DECLARE_CDAT_DOE_TASK_ONSTACK().[1]  The issue with this is the work
> > > > function is not known to the callers and must be initialized correctly.
> > > > 
> > > > A follow up suggestion was to have an internal 'pci_doe_work' item
> > > > allocated by pci_doe_submit_task().[2]  This requires an allocation which
> > > > could restrict the context where tasks are used.
> > > > 
> > > > Compromise with an intermediate step to initialize the task struct with
> > > > a new call pci_doe_init_task() which must be called prior to submit
> > > > task.
> > > 
> > > I'm not really a fan of passing a parameter to say "this struct is on
> > > the stack" because that seems kind of error-prone and I don't know
> > > what the consequence of getting it wrong would be.  Sounds like it
> > > *could* be some memory corruption or reading garbage data that would
> > > be hard to debug.
> > > 
> > > Do we have cases today where pci_doe_submit_task() can't do the
> > > kzalloc() as in your patch at [3]?

No.

> > > If the current use cases allow a
> > > kzalloc(), why not do that now and defer this until it becomes an
> > > issue?

I do like pci_doe_submit_task() handling this as an internal detail.  I'm happy
with that if you are.

I was just concerned about the restriction of context.  Dan suggested this
instead of passing a gfp parameter.

If you are happy with my original patch I will submit it instead.  (With a
better one liner.)

Thanks again for the review,
Ira

> > > 
> > > Bjorn
> > > 
> > > > [1] https://lore.kernel.org/linux-cxl/20221014151045.24781-1-Jonathan.Cameron@huawei.com/T/#m88a7f50dcce52f30c8bf5c3dcc06fa9843b54a2d
> > > > [2] https://lore.kernel.org/linux-cxl/20221014151045.24781-1-Jonathan.Cameron@huawei.com/T/#m63c636c5135f304480370924f4d03c00357be667
> > > 
> > > [3] https://lore.kernel.org/linux-cxl/Y2AnKB88ALYm9c5L@iweiny-desk3/
> > > 
> > > > Cc: Bjorn Helgaas <helgaas@kernel.org>
> > > > Reported-by: Gregory Price <gregory.price@memverge.com>
> > > > Reported-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> > > > Suggested-by: Dan Williams <dan.j.williams@intel.com>
> > > > Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> > > > ---
> > > >  drivers/cxl/core/pci.c  |  2 ++
> > > >  drivers/pci/doe.c       | 14 ++++++++++++--
> > > >  include/linux/pci-doe.h |  8 +++++---
> > > >  3 files changed, 19 insertions(+), 5 deletions(-)
> > > > 
> > > > diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
> > > > index 9240df53ed87..a19c1fa0e2f4 100644
> > > > --- a/drivers/cxl/core/pci.c
> > > > +++ b/drivers/cxl/core/pci.c
> > > > @@ -525,6 +525,7 @@ static int cxl_cdat_get_length(struct device *dev,
> > > >  	DECLARE_CDAT_DOE_TASK(CDAT_DOE_REQ(0), t);
> > > >  	int rc;
> > > >  
> > > > +	pci_doe_init_task(cdat_doe, &t.task, true);
> > > >  	rc = pci_doe_submit_task(cdat_doe, &t.task);
> > > >  	if (rc < 0) {
> > > >  		dev_err(dev, "DOE submit failed: %d", rc);
> > > > @@ -554,6 +555,7 @@ static int cxl_cdat_read_table(struct device *dev,
> > > >  		u32 *entry;
> > > >  		int rc;
> > > >  
> > > > +		pci_doe_init_task(cdat_doe, &t.task, true);
> > > >  		rc = pci_doe_submit_task(cdat_doe, &t.task);
> > > >  		if (rc < 0) {
> > > >  			dev_err(dev, "DOE submit failed: %d", rc);
> > > > diff --git a/drivers/pci/doe.c b/drivers/pci/doe.c
> > > > index e402f05068a5..cabeae4ae955 100644
> > > > --- a/drivers/pci/doe.c
> > > > +++ b/drivers/pci/doe.c
> > > > @@ -319,6 +319,7 @@ static int pci_doe_discovery(struct pci_doe_mb *doe_mb, u8 *index, u16 *vid,
> > > >  	};
> > > >  	int rc;
> > > >  
> > > > +	pci_doe_init_task(doe_mb, &task, true);
> > > >  	rc = pci_doe_submit_task(doe_mb, &task);
> > > >  	if (rc < 0)
> > > >  		return rc;
> > > > @@ -495,6 +496,14 @@ bool pci_doe_supports_prot(struct pci_doe_mb *doe_mb, u16 vid, u8 type)
> > > >  }
> > > >  EXPORT_SYMBOL_GPL(pci_doe_supports_prot);
> > > >  
> > > > +void pci_doe_init_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task,
> > > > +		       bool onstack)
> > > > +{
> > > > +	task->doe_mb = doe_mb;
> > > > +	__INIT_WORK(&task->work, doe_statemachine_work, onstack);
> > > > +}
> > > > +EXPORT_SYMBOL_GPL(pci_doe_init_task);
> > > > +
> > > >  /**
> > > >   * pci_doe_submit_task() - Submit a task to be processed by the state machine
> > > >   *
> > > > @@ -517,6 +526,9 @@ int pci_doe_submit_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task)
> > > >  	if (!pci_doe_supports_prot(doe_mb, task->prot.vid, task->prot.type))
> > > >  		return -EINVAL;
> > > >  
> > > > +	if (WARN_ON_ONCE(task->work.func != doe_statemachine_work))
> > > > +		return -EINVAL;
> > > > +
> > > >  	/*
> > > >  	 * DOE requests must be a whole number of DW and the response needs to
> > > >  	 * be big enough for at least 1 DW
> > > > @@ -528,8 +540,6 @@ int pci_doe_submit_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task)
> > > >  	if (test_bit(PCI_DOE_FLAG_DEAD, &doe_mb->flags))
> > > >  		return -EIO;
> > > >  
> > > > -	task->doe_mb = doe_mb;
> > > > -	INIT_WORK(&task->work, doe_statemachine_work);
> > > >  	queue_work(doe_mb->work_queue, &task->work);
> > > >  	return 0;
> > > >  }
> > > > diff --git a/include/linux/pci-doe.h b/include/linux/pci-doe.h
> > > > index ed9b4df792b8..457fc0e53d64 100644
> > > > --- a/include/linux/pci-doe.h
> > > > +++ b/include/linux/pci-doe.h
> > > > @@ -31,8 +31,8 @@ struct pci_doe_mb;
> > > >   * @rv: Return value.  Length of received response or error (bytes)
> > > >   * @complete: Called when task is complete
> > > >   * @private: Private data for the consumer
> > > > - * @work: Used internally by the mailbox
> > > > - * @doe_mb: Used internally by the mailbox
> > > > + * @work: Used internally by the mailbox [see pci_doe_init_task()]
> > > > + * @doe_mb: Used internally by the mailbox [see pci_doe_init_task()]
> > > >   *
> > > >   * The payload sizes and rv are specified in bytes with the following
> > > >   * restrictions concerning the protocol.
> > > > @@ -53,7 +53,7 @@ struct pci_doe_task {
> > > >  	void (*complete)(struct pci_doe_task *task);
> > > >  	void *private;
> > > >  
> > > > -	/* No need for the user to initialize these fields */
> > > > +	/* Call pci_doe_init_task() for these */
> > > >  	struct work_struct work;
> > > >  	struct pci_doe_mb *doe_mb;
> > > >  };
> > > > @@ -72,6 +72,8 @@ struct pci_doe_task {
> > > >  
> > > >  struct pci_doe_mb *pcim_doe_create_mb(struct pci_dev *pdev, u16 cap_offset);
> > > >  bool pci_doe_supports_prot(struct pci_doe_mb *doe_mb, u16 vid, u8 type);
> > > > +void pci_doe_init_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task,
> > > > +		       bool onstack);
> > > >  int pci_doe_submit_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task);
> > > >  
> > > >  #endif
> > > > 
> > > > base-commit: 30a0b95b1335e12efef89dd78518ed3e4a71a763
> > > > -- 
> > > > 2.37.2
> > > >
  
Bjorn Helgaas Nov. 15, 2022, 10:12 p.m. UTC | #6
On Tue, Nov 15, 2022 at 12:54:39PM -0800, Ira Weiny wrote:
> On Tue, Nov 15, 2022 at 02:41:35PM -0600, Bjorn Helgaas wrote:
> > On Tue, Nov 15, 2022 at 12:18:38PM -0800, Ira Weiny wrote:
> > > On Tue, Nov 15, 2022 at 01:44:24PM -0600, Bjorn Helgaas wrote:
> > > > On Mon, Nov 14, 2022 at 05:19:43PM -0800, ira.weiny@intel.com wrote:
> > > > > From: Ira Weiny <ira.weiny@intel.com>
> > > > > 
> > > > > The callers of pci_doe_submit_task() allocate the
> > > > > pci_doe_task on the stack.  This causes the work structure
> > > > > to be allocated on the stack without pci_doe_submit_task()
> > > > > knowing.  Work item initialization needs to be done with
> > > > > either INIT_WORK_ONSTACK() or INIT_WORK() depending on how
> > > > > the work item is allocated.
> > > > > 
> > > > > Jonathan suggested creating doe task allocation macros such
> > > > > as DECLARE_CDAT_DOE_TASK_ONSTACK().[1]  The issue with this
> > > > > is the work function is not known to the callers and must be
> > > > > initialized correctly.
> > > > > 
> > > > > A follow up suggestion was to have an internal
> > > > > 'pci_doe_work' item allocated by pci_doe_submit_task().[2]
> > > > > This requires an allocation which could restrict the context
> > > > > where tasks are used.
> > > > > 
> > > > > Compromise with an intermediate step to initialize the task
> > > > > struct with a new call pci_doe_init_task() which must be
> > > > > called prior to submit task.
> > > > 
> > > > I'm not really a fan of passing a parameter to say "this struct is on
> > > > the stack" because that seems kind of error-prone and I don't know
> > > > what the consequence of getting it wrong would be.  Sounds like it
> > > > *could* be some memory corruption or reading garbage data that would
> > > > be hard to debug.
> > > > 
> > > > Do we have cases today where pci_doe_submit_task() can't do the
> > > > kzalloc() as in your patch at [3]?
> 
> No.
> 
> > > > If the current use cases allow a
> > > > kzalloc(), why not do that now and defer this until it becomes an
> > > > issue?
> 
> I do like pci_doe_submit_task() handling this as an internal detail.
> I'm happy with that if you are.
> 
> I was just concerned about the restriction of context.  Dan
> suggested this instead of passing a gfp parameter.
> 
> If you are happy with my original patch I will submit it instead.
> (With a better one liner.)

I don't know what's coming as far as pci_doe_submit_task() callers.
If there's some imminent caller that will require atomic context, I
guess we could solve it now.  But DOE doesn't really seem like an
atomic context thing to begin with, so maybe we could postpone dealing
with it.

That patch in [3] is more complicated than I expected, but I admit I
haven't looked closely.

Bjorn

> > > > > [1] https://lore.kernel.org/linux-cxl/20221014151045.24781-1-Jonathan.Cameron@huawei.com/T/#m88a7f50dcce52f30c8bf5c3dcc06fa9843b54a2d
> > > > > [2] https://lore.kernel.org/linux-cxl/20221014151045.24781-1-Jonathan.Cameron@huawei.com/T/#m63c636c5135f304480370924f4d03c00357be667
> > > > 
> > > > [3] https://lore.kernel.org/linux-cxl/Y2AnKB88ALYm9c5L@iweiny-desk3/
  
Lukas Wunner Nov. 16, 2022, 10:09 a.m. UTC | #7
On Mon, Nov 14, 2022 at 05:19:43PM -0800, ira.weiny@intel.com wrote:
> From: Ira Weiny <ira.weiny@intel.com>
> 
> The callers of pci_doe_submit_task() allocate the pci_doe_task on the
> stack.  This causes the work structure to be allocated on the stack
> without pci_doe_submit_task() knowing.  Work item initialization needs
> to be done with either INIT_WORK_ONSTACK() or INIT_WORK() depending on
> how the work item is allocated.
> 
> Jonathan suggested creating doe task allocation macros such as
> DECLARE_CDAT_DOE_TASK_ONSTACK().[1]  The issue with this is the work
> function is not known to the callers and must be initialized correctly.
> 
> A follow up suggestion was to have an internal 'pci_doe_work' item
> allocated by pci_doe_submit_task().[2]  This requires an allocation which
> could restrict the context where tasks are used.
> 
> Compromise with an intermediate step to initialize the task struct with
> a new call pci_doe_init_task() which must be called prior to submit
> task.
> 
> [1] https://lore.kernel.org/linux-cxl/20221014151045.24781-1-Jonathan.Cameron@huawei.com/T/#m88a7f50dcce52f30c8bf5c3dcc06fa9843b54a2d
> [2] https://lore.kernel.org/linux-cxl/20221014151045.24781-1-Jonathan.Cameron@huawei.com/T/#m63c636c5135f304480370924f4d03c00357be667

We have object_is_on_stack(), included from <linux/sched/task_stack.h>.

So you could just autosense in pci_doe_submit_task() whether
pci_doe_task is on the stack and call the appropriate INIT_WORK
variant.

Any reason not to do that?

Thanks,

Lukas
  
Bjorn Helgaas Nov. 16, 2022, 6:20 p.m. UTC | #8
On Wed, Nov 16, 2022 at 11:09:39AM +0100, Lukas Wunner wrote:
> On Mon, Nov 14, 2022 at 05:19:43PM -0800, ira.weiny@intel.com wrote:
> > From: Ira Weiny <ira.weiny@intel.com>
> > 
> > The callers of pci_doe_submit_task() allocate the pci_doe_task on the
> > stack.  This causes the work structure to be allocated on the stack
> > without pci_doe_submit_task() knowing.  Work item initialization needs
> > to be done with either INIT_WORK_ONSTACK() or INIT_WORK() depending on
> > how the work item is allocated.
> > 
> > Jonathan suggested creating doe task allocation macros such as
> > DECLARE_CDAT_DOE_TASK_ONSTACK().[1]  The issue with this is the work
> > function is not known to the callers and must be initialized correctly.
> > 
> > A follow up suggestion was to have an internal 'pci_doe_work' item
> > allocated by pci_doe_submit_task().[2]  This requires an allocation which
> > could restrict the context where tasks are used.
> > 
> > Compromise with an intermediate step to initialize the task struct with
> > a new call pci_doe_init_task() which must be called prior to submit
> > task.
> > 
> > [1] https://lore.kernel.org/linux-cxl/20221014151045.24781-1-Jonathan.Cameron@huawei.com/T/#m88a7f50dcce52f30c8bf5c3dcc06fa9843b54a2d
> > [2] https://lore.kernel.org/linux-cxl/20221014151045.24781-1-Jonathan.Cameron@huawei.com/T/#m63c636c5135f304480370924f4d03c00357be667
> 
> We have object_is_on_stack(), included from <linux/sched/task_stack.h>.
> 
> So you could just autosense in pci_doe_submit_task() whether
> pci_doe_task is on the stack and call the appropriate INIT_WORK
> variant.

Nifty, I had no idea object_is_on_stack() existed, thank you!

I wonder if there's an opportunity to use object_is_on_stack()
somewhere in the INIT_WORK() path to find usage mistakes.

Adding it in pci_doe_submit_task() would add some complexity, so I'm
not sure whether it's worth adding it unless we actually have uses for
both cases.

Bjorn
  
Ira Weiny Nov. 16, 2022, 8:57 p.m. UTC | #9
On Wed, Nov 16, 2022 at 12:20:37PM -0600, Bjorn Helgaas wrote:
> On Wed, Nov 16, 2022 at 11:09:39AM +0100, Lukas Wunner wrote:
> > On Mon, Nov 14, 2022 at 05:19:43PM -0800, ira.weiny@intel.com wrote:
> > > From: Ira Weiny <ira.weiny@intel.com>
> > > 
> > > The callers of pci_doe_submit_task() allocate the pci_doe_task on the
> > > stack.  This causes the work structure to be allocated on the stack
> > > without pci_doe_submit_task() knowing.  Work item initialization needs
> > > to be done with either INIT_WORK_ONSTACK() or INIT_WORK() depending on
> > > how the work item is allocated.
> > > 
> > > Jonathan suggested creating doe task allocation macros such as
> > > DECLARE_CDAT_DOE_TASK_ONSTACK().[1]  The issue with this is the work
> > > function is not known to the callers and must be initialized correctly.
> > > 
> > > A follow up suggestion was to have an internal 'pci_doe_work' item
> > > allocated by pci_doe_submit_task().[2]  This requires an allocation which
> > > could restrict the context where tasks are used.
> > > 
> > > Compromise with an intermediate step to initialize the task struct with
> > > a new call pci_doe_init_task() which must be called prior to submit
> > > task.
> > > 
> > > [1] https://lore.kernel.org/linux-cxl/20221014151045.24781-1-Jonathan.Cameron@huawei.com/T/#m88a7f50dcce52f30c8bf5c3dcc06fa9843b54a2d
> > > [2] https://lore.kernel.org/linux-cxl/20221014151045.24781-1-Jonathan.Cameron@huawei.com/T/#m63c636c5135f304480370924f4d03c00357be667
> > 
> > We have object_is_on_stack(), included from <linux/sched/task_stack.h>.
> > 
> > So you could just autosense in pci_doe_submit_task() whether
> > pci_doe_task is on the stack and call the appropriate INIT_WORK
> > variant.
> 
> Nifty, I had no idea object_is_on_stack() existed, thank you!

Indeed!  Neither did I!  thanks!

> 
> I wonder if there's an opportunity to use object_is_on_stack()
> somewhere in the INIT_WORK() path to find usage mistakes.

I'm thinking we could make INIT_WORK do the right thing all the time.  Not sure
what the overhead of object_is_on_stack() is.

> 
> Adding it in pci_doe_submit_task() would add some complexity, so I'm
> not sure whether it's worth adding it unless we actually have uses for
> both cases.

I think if we don't do something we have to document that
pci_doe_submit_task() only works with tasks on the stack.

I would rather just make pci_doe_submit_task() correct and not complicate the
callers.  object_is_on_stack() can't be enough overhead to be worried about in
this call path can it?

Actually after writing all that I wonder if we can't push the use of
object_is_on_stack() into the debug code?  Something like below (completely
untested)?  I think this could be pushed even further down but I'd like to get
opinions before attempting a change which will have a wider blast radius.

Ira


diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index a0143dd24430..4cc50b554a29 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -199,7 +199,7 @@ struct execute_work {
        struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f, TIMER_DEFERRABLE)
 
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
-extern void __init_work(struct work_struct *work, int onstack);
+extern void __init_work(struct work_struct *work);
 extern void destroy_work_on_stack(struct work_struct *work);
 extern void destroy_delayed_work_on_stack(struct delayed_work *work);
 static inline unsigned int work_static(struct work_struct *work)
@@ -207,7 +207,7 @@ static inline unsigned int work_static(struct work_struct *work)
        return *work_data_bits(work) & WORK_STRUCT_STATIC;
 }
 #else
-static inline void __init_work(struct work_struct *work, int onstack) { }
+static inline void __init_work(struct work_struct *work) { }
 static inline void destroy_work_on_stack(struct work_struct *work) { }
 static inline void destroy_delayed_work_on_stack(struct delayed_work *work) { }
 static inline unsigned int work_static(struct work_struct *work) { return 0; }
@@ -221,20 +221,20 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
  * to generate better code.
  */
 #ifdef CONFIG_LOCKDEP
-#define __INIT_WORK(_work, _func, _onstack)                            \
+#define __INIT_WORK(_work, _func)                                      \
        do {                                                            \
                static struct lock_class_key __key;                     \
                                                                        \
-               __init_work((_work), _onstack);                         \
+               __init_work(_work);                                     \
                (_work)->data = (atomic_long_t) WORK_DATA_INIT();       \
                lockdep_init_map(&(_work)->lockdep_map, "(work_completion)"#_work, &__key, 0); \
                INIT_LIST_HEAD(&(_work)->entry);                        \
                (_work)->func = (_func);                                \
        } while (0)
 #else
-#define __INIT_WORK(_work, _func, _onstack)                            \
+#define __INIT_WORK(_work, _func)                                      \
        do {                                                            \
-               __init_work((_work), _onstack);                         \
+               __init_work(_work);                                     \
                (_work)->data = (atomic_long_t) WORK_DATA_INIT();       \
                INIT_LIST_HEAD(&(_work)->entry);                        \
                (_work)->func = (_func);                                \
@@ -242,10 +242,10 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
 #endif
 
 #define INIT_WORK(_work, _func)                                                \
-       __INIT_WORK((_work), (_func), 0)
+       __INIT_WORK((_work), (_func))
 
 #define INIT_WORK_ONSTACK(_work, _func)                                        \
-       __INIT_WORK((_work), (_func), 1)
+       __INIT_WORK((_work), (_func))
 
 #define __INIT_DELAYED_WORK(_work, _func, _tflags)                     \
        do {                                                            \
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 7cd5f5e7e0a1..7d87300cfbc6 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -515,9 +515,9 @@ static inline void debug_work_deactivate(struct work_struct *work)
        debug_object_deactivate(work, &work_debug_descr);
 }
 
-void __init_work(struct work_struct *work, int onstack)
+void __init_work(struct work_struct *work)
 {
-       if (onstack)
+       if (object_is_on_stack(work))
                debug_object_init_on_stack(work, &work_debug_descr);
        else
                debug_object_init(work, &work_debug_descr);
  
Dan Williams Nov. 16, 2022, 9:10 p.m. UTC | #10
Ira Weiny wrote:
> On Wed, Nov 16, 2022 at 12:20:37PM -0600, Bjorn Helgaas wrote:
> > On Wed, Nov 16, 2022 at 11:09:39AM +0100, Lukas Wunner wrote:
> > > On Mon, Nov 14, 2022 at 05:19:43PM -0800, ira.weiny@intel.com wrote:
> > > > From: Ira Weiny <ira.weiny@intel.com>
> > > > 
> > > > The callers of pci_doe_submit_task() allocate the pci_doe_task on the
> > > > stack.  This causes the work structure to be allocated on the stack
> > > > without pci_doe_submit_task() knowing.  Work item initialization needs
> > > > to be done with either INIT_WORK_ONSTACK() or INIT_WORK() depending on
> > > > how the work item is allocated.
> > > > 
> > > > Jonathan suggested creating doe task allocation macros such as
> > > > DECLARE_CDAT_DOE_TASK_ONSTACK().[1]  The issue with this is the work
> > > > function is not known to the callers and must be initialized correctly.
> > > > 
> > > > A follow up suggestion was to have an internal 'pci_doe_work' item
> > > > allocated by pci_doe_submit_task().[2]  This requires an allocation which
> > > > could restrict the context where tasks are used.
> > > > 
> > > > Compromise with an intermediate step to initialize the task struct with
> > > > a new call pci_doe_init_task() which must be called prior to submit
> > > > task.
> > > > 
> > > > [1] https://lore.kernel.org/linux-cxl/20221014151045.24781-1-Jonathan.Cameron@huawei.com/T/#m88a7f50dcce52f30c8bf5c3dcc06fa9843b54a2d
> > > > [2] https://lore.kernel.org/linux-cxl/20221014151045.24781-1-Jonathan.Cameron@huawei.com/T/#m63c636c5135f304480370924f4d03c00357be667
> > > 
> > > We have object_is_on_stack(), included from <linux/sched/task_stack.h>.
> > > 
> > > So you could just autosense in pci_doe_submit_task() whether
> > > pci_doe_task is on the stack and call the appropriate INIT_WORK
> > > variant.
> > 
> > Nifty, I had no idea object_is_on_stack() existed, thank you!
> 
> Indeed!  Neither did I!  thanks!
> 
> > 
> > I wonder if there's an opportunity to use object_is_on_stack()
> > somewhere in the INIT_WORK() path to find usage mistakes.
> 
> I'm thinking we could make INIT_WORK do the right thing all the time.  Not sure
> what the overhead of object_is_on_stack() is.
> 
> > 
> > Adding it in pci_doe_submit_task() would add some complexity, so I'm
> > not sure whether it's worth adding it unless we actually have uses for
> > both cases.
> 
> I think if we don't do something we have to document that
> pci_doe_submit_task() only works with tasks on the stack.
> 
> I would rather just make pci_doe_submit_task() correct and not complicate the
> callers.  object_is_on_stack() can't be enough overhead to be worried about in
> this call path can it?
> 
> Actually after writing all that I wonder if we can't push the use of
> object_is_on_stack() into the debug code?  Something like below (completely
> untested)?  I think this could be pushed even further down but I'd like to get
> opinions before attempting a change which will have a wider blast radius.

This looks reasonable, but I would do it after and independently of
introducing the autosensing version of pci_doe_submit_task(). Then you
can pursue this line of thinking and come back to simplify
pci_doe_submit_task() if it indeed moves forward.
  

Patch

diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 9240df53ed87..a19c1fa0e2f4 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -525,6 +525,7 @@  static int cxl_cdat_get_length(struct device *dev,
 	DECLARE_CDAT_DOE_TASK(CDAT_DOE_REQ(0), t);
 	int rc;
 
+	pci_doe_init_task(cdat_doe, &t.task, true);
 	rc = pci_doe_submit_task(cdat_doe, &t.task);
 	if (rc < 0) {
 		dev_err(dev, "DOE submit failed: %d", rc);
@@ -554,6 +555,7 @@  static int cxl_cdat_read_table(struct device *dev,
 		u32 *entry;
 		int rc;
 
+		pci_doe_init_task(cdat_doe, &t.task, true);
 		rc = pci_doe_submit_task(cdat_doe, &t.task);
 		if (rc < 0) {
 			dev_err(dev, "DOE submit failed: %d", rc);
diff --git a/drivers/pci/doe.c b/drivers/pci/doe.c
index e402f05068a5..cabeae4ae955 100644
--- a/drivers/pci/doe.c
+++ b/drivers/pci/doe.c
@@ -319,6 +319,7 @@  static int pci_doe_discovery(struct pci_doe_mb *doe_mb, u8 *index, u16 *vid,
 	};
 	int rc;
 
+	pci_doe_init_task(doe_mb, &task, true);
 	rc = pci_doe_submit_task(doe_mb, &task);
 	if (rc < 0)
 		return rc;
@@ -495,6 +496,14 @@  bool pci_doe_supports_prot(struct pci_doe_mb *doe_mb, u16 vid, u8 type)
 }
 EXPORT_SYMBOL_GPL(pci_doe_supports_prot);
 
+void pci_doe_init_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task,
+		       bool onstack)
+{
+	task->doe_mb = doe_mb;
+	__INIT_WORK(&task->work, doe_statemachine_work, onstack);
+}
+EXPORT_SYMBOL_GPL(pci_doe_init_task);
+
 /**
  * pci_doe_submit_task() - Submit a task to be processed by the state machine
  *
@@ -517,6 +526,9 @@  int pci_doe_submit_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task)
 	if (!pci_doe_supports_prot(doe_mb, task->prot.vid, task->prot.type))
 		return -EINVAL;
 
+	if (WARN_ON_ONCE(task->work.func != doe_statemachine_work))
+		return -EINVAL;
+
 	/*
 	 * DOE requests must be a whole number of DW and the response needs to
 	 * be big enough for at least 1 DW
@@ -528,8 +540,6 @@  int pci_doe_submit_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task)
 	if (test_bit(PCI_DOE_FLAG_DEAD, &doe_mb->flags))
 		return -EIO;
 
-	task->doe_mb = doe_mb;
-	INIT_WORK(&task->work, doe_statemachine_work);
 	queue_work(doe_mb->work_queue, &task->work);
 	return 0;
 }
diff --git a/include/linux/pci-doe.h b/include/linux/pci-doe.h
index ed9b4df792b8..457fc0e53d64 100644
--- a/include/linux/pci-doe.h
+++ b/include/linux/pci-doe.h
@@ -31,8 +31,8 @@  struct pci_doe_mb;
  * @rv: Return value.  Length of received response or error (bytes)
  * @complete: Called when task is complete
  * @private: Private data for the consumer
- * @work: Used internally by the mailbox
- * @doe_mb: Used internally by the mailbox
+ * @work: Used internally by the mailbox [see pci_doe_init_task()]
+ * @doe_mb: Used internally by the mailbox [see pci_doe_init_task()]
  *
  * The payload sizes and rv are specified in bytes with the following
  * restrictions concerning the protocol.
@@ -53,7 +53,7 @@  struct pci_doe_task {
 	void (*complete)(struct pci_doe_task *task);
 	void *private;
 
-	/* No need for the user to initialize these fields */
+	/* Call pci_doe_init_task() for these */
 	struct work_struct work;
 	struct pci_doe_mb *doe_mb;
 };
@@ -72,6 +72,8 @@  struct pci_doe_task {
 
 struct pci_doe_mb *pcim_doe_create_mb(struct pci_dev *pdev, u16 cap_offset);
 bool pci_doe_supports_prot(struct pci_doe_mb *doe_mb, u16 vid, u8 type);
+void pci_doe_init_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task,
+		       bool onstack);
 int pci_doe_submit_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task);
 
 #endif