[v3,15/16] nvmet-fc: avoid deadlock on delete association path

Message ID 20231218153105.12717-16-dwagner@suse.de
State New
Headers
Series enable nvmet-fc for blktests |

Commit Message

Daniel Wagner Dec. 18, 2023, 3:31 p.m. UTC
  When deleting an association the shutdown path is deadlocking because we
try to flush the nvmet_wq nested. Avoid this by deadlock by deferring
the put work into its own work item.

Signed-off-by: Daniel Wagner <dwagner@suse.de>
---
 drivers/nvme/target/fc.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)
  

Comments

kernel test robot Dec. 19, 2023, 6 a.m. UTC | #1
Hi Daniel,

kernel test robot noticed the following build errors:

[auto build test ERROR on linux-nvme/nvme-6.8]
[also build test ERROR on linus/master v6.7-rc6 next-20231218]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Daniel-Wagner/nvmet-fc-remove-unnecessary-bracket/20231219-003117
base:   git://git.infradead.org/nvme.git nvme-6.8
patch link:    https://lore.kernel.org/r/20231218153105.12717-16-dwagner%40suse.de
patch subject: [PATCH v3 15/16] nvmet-fc: avoid deadlock on delete association path
config: x86_64-rhel-8.3-rust (https://download.01.org/0day-ci/archive/20231219/202312191347.7X0VJgnV-lkp@intel.com/config)
compiler: clang version 16.0.4 (https://github.com/llvm/llvm-project.git ae42196bc493ffe877a7e3dff8be32035dea4d07)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231219/202312191347.7X0VJgnV-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202312191347.7X0VJgnV-lkp@intel.com/

All errors (new ones prefixed by >>):

>> drivers/nvme/target/fc.c:253:2: error: call to undeclared function 'nvmet_fc_tgtport_put'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
           nvmet_fc_tgtport_put(tgtport);
           ^
>> drivers/nvme/target/fc.c:259:13: error: static declaration of 'nvmet_fc_tgtport_put' follows non-static declaration
   static void nvmet_fc_tgtport_put(struct nvmet_fc_tgtport *tgtport);
               ^
   drivers/nvme/target/fc.c:253:2: note: previous implicit declaration is here
           nvmet_fc_tgtport_put(tgtport);
           ^
   2 errors generated.


vim +/nvmet_fc_tgtport_put +253 drivers/nvme/target/fc.c

   244	
   245	
   246	static void nvmet_fc_handle_ls_rqst_work(struct work_struct *work);
   247	static void nvmet_fc_fcp_rqst_op_defer_work(struct work_struct *work);
   248	static void nvmet_fc_put_tgtport_work(struct work_struct *work)
   249	{
   250		struct nvmet_fc_tgtport *tgtport =
   251			container_of(work, struct nvmet_fc_tgtport, put_work);
   252	
 > 253		nvmet_fc_tgtport_put(tgtport);
   254	}
   255	static void nvmet_fc_tgt_a_put(struct nvmet_fc_tgt_assoc *assoc);
   256	static int nvmet_fc_tgt_a_get(struct nvmet_fc_tgt_assoc *assoc);
   257	static void nvmet_fc_tgt_q_put(struct nvmet_fc_tgt_queue *queue);
   258	static int nvmet_fc_tgt_q_get(struct nvmet_fc_tgt_queue *queue);
 > 259	static void nvmet_fc_tgtport_put(struct nvmet_fc_tgtport *tgtport);
   260	static int nvmet_fc_tgtport_get(struct nvmet_fc_tgtport *tgtport);
   261	static void nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
   262						struct nvmet_fc_fcp_iod *fod);
   263	static void nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc);
   264	static void nvmet_fc_xmt_ls_rsp(struct nvmet_fc_tgtport *tgtport,
   265					struct nvmet_fc_ls_iod *iod);
   266	
   267
  
kernel test robot Dec. 19, 2023, 8:29 a.m. UTC | #2
Hi Daniel,

kernel test robot noticed the following build errors:

[auto build test ERROR on linux-nvme/nvme-6.8]
[also build test ERROR on linus/master v6.7-rc6 next-20231219]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Daniel-Wagner/nvmet-fc-remove-unnecessary-bracket/20231219-003117
base:   git://git.infradead.org/nvme.git nvme-6.8
patch link:    https://lore.kernel.org/r/20231218153105.12717-16-dwagner%40suse.de
patch subject: [PATCH v3 15/16] nvmet-fc: avoid deadlock on delete association path
config: loongarch-defconfig (https://download.01.org/0day-ci/archive/20231219/202312191634.ASof5mm8-lkp@intel.com/config)
compiler: loongarch64-linux-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231219/202312191634.ASof5mm8-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202312191634.ASof5mm8-lkp@intel.com/

All error/warnings (new ones prefixed by >>):

   drivers/nvme/target/fc.c: In function 'nvmet_fc_put_tgtport_work':
>> drivers/nvme/target/fc.c:253:9: error: implicit declaration of function 'nvmet_fc_tgtport_put'; did you mean 'nvmet_ctrl_put'? [-Werror=implicit-function-declaration]
     253 |         nvmet_fc_tgtport_put(tgtport);
         |         ^~~~~~~~~~~~~~~~~~~~
         |         nvmet_ctrl_put
   drivers/nvme/target/fc.c: At top level:
>> drivers/nvme/target/fc.c:259:13: warning: conflicting types for 'nvmet_fc_tgtport_put'; have 'void(struct nvmet_fc_tgtport *)'
     259 | static void nvmet_fc_tgtport_put(struct nvmet_fc_tgtport *tgtport);
         |             ^~~~~~~~~~~~~~~~~~~~
   drivers/nvme/target/fc.c:259:13: error: static declaration of 'nvmet_fc_tgtport_put' follows non-static declaration
   drivers/nvme/target/fc.c:253:9: note: previous implicit declaration of 'nvmet_fc_tgtport_put' with type 'void(struct nvmet_fc_tgtport *)'
     253 |         nvmet_fc_tgtport_put(tgtport);
         |         ^~~~~~~~~~~~~~~~~~~~
   cc1: some warnings being treated as errors


vim +253 drivers/nvme/target/fc.c

   244	
   245	
   246	static void nvmet_fc_handle_ls_rqst_work(struct work_struct *work);
   247	static void nvmet_fc_fcp_rqst_op_defer_work(struct work_struct *work);
   248	static void nvmet_fc_put_tgtport_work(struct work_struct *work)
   249	{
   250		struct nvmet_fc_tgtport *tgtport =
   251			container_of(work, struct nvmet_fc_tgtport, put_work);
   252	
 > 253		nvmet_fc_tgtport_put(tgtport);
   254	}
   255	static void nvmet_fc_tgt_a_put(struct nvmet_fc_tgt_assoc *assoc);
   256	static int nvmet_fc_tgt_a_get(struct nvmet_fc_tgt_assoc *assoc);
   257	static void nvmet_fc_tgt_q_put(struct nvmet_fc_tgt_queue *queue);
   258	static int nvmet_fc_tgt_q_get(struct nvmet_fc_tgt_queue *queue);
 > 259	static void nvmet_fc_tgtport_put(struct nvmet_fc_tgtport *tgtport);
   260	static int nvmet_fc_tgtport_get(struct nvmet_fc_tgtport *tgtport);
   261	static void nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
   262						struct nvmet_fc_fcp_iod *fod);
   263	static void nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc);
   264	static void nvmet_fc_xmt_ls_rsp(struct nvmet_fc_tgtport *tgtport,
   265					struct nvmet_fc_ls_iod *iod);
   266	
   267
  
kernel test robot Dec. 19, 2023, 11:17 a.m. UTC | #3
Hi Daniel,

kernel test robot noticed the following build warnings:

[auto build test WARNING on linux-nvme/nvme-6.8]
[also build test WARNING on linus/master v6.7-rc6 next-20231219]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Daniel-Wagner/nvmet-fc-remove-unnecessary-bracket/20231219-003117
base:   git://git.infradead.org/nvme.git nvme-6.8
patch link:    https://lore.kernel.org/r/20231218153105.12717-16-dwagner%40suse.de
patch subject: [PATCH v3 15/16] nvmet-fc: avoid deadlock on delete association path
config: i386-randconfig-013-20231219 (https://download.01.org/0day-ci/archive/20231219/202312191845.lwrs1kbh-lkp@intel.com/config)
compiler: gcc-7 (Ubuntu 7.5.0-6ubuntu2) 7.5.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231219/202312191845.lwrs1kbh-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202312191845.lwrs1kbh-lkp@intel.com/

All warnings (new ones prefixed by >>):

   drivers/nvme/target/fc.c: In function 'nvmet_fc_put_tgtport_work':
   drivers/nvme/target/fc.c:253:2: error: implicit declaration of function 'nvmet_fc_tgtport_put'; did you mean 'nvmet_ctrl_put'? [-Werror=implicit-function-declaration]
     nvmet_fc_tgtport_put(tgtport);
     ^~~~~~~~~~~~~~~~~~~~
     nvmet_ctrl_put
   drivers/nvme/target/fc.c: At top level:
>> drivers/nvme/target/fc.c:259:13: warning: conflicting types for 'nvmet_fc_tgtport_put'
    static void nvmet_fc_tgtport_put(struct nvmet_fc_tgtport *tgtport);
                ^~~~~~~~~~~~~~~~~~~~
   drivers/nvme/target/fc.c:259:13: error: static declaration of 'nvmet_fc_tgtport_put' follows non-static declaration
   drivers/nvme/target/fc.c:253:2: note: previous implicit declaration of 'nvmet_fc_tgtport_put' was here
     nvmet_fc_tgtport_put(tgtport);
     ^~~~~~~~~~~~~~~~~~~~
   cc1: some warnings being treated as errors


vim +/nvmet_fc_tgtport_put +259 drivers/nvme/target/fc.c

c53432030d8642 James Smart   2016-12-02  244  
c53432030d8642 James Smart   2016-12-02  245  
c53432030d8642 James Smart   2016-12-02  246  static void nvmet_fc_handle_ls_rqst_work(struct work_struct *work);
9d625f7792875e James Smart   2018-02-28  247  static void nvmet_fc_fcp_rqst_op_defer_work(struct work_struct *work);
20d5f3b830ab45 Daniel Wagner 2023-12-18  248  static void nvmet_fc_put_tgtport_work(struct work_struct *work)
20d5f3b830ab45 Daniel Wagner 2023-12-18  249  {
20d5f3b830ab45 Daniel Wagner 2023-12-18  250  	struct nvmet_fc_tgtport *tgtport =
20d5f3b830ab45 Daniel Wagner 2023-12-18  251  		container_of(work, struct nvmet_fc_tgtport, put_work);
20d5f3b830ab45 Daniel Wagner 2023-12-18  252  
20d5f3b830ab45 Daniel Wagner 2023-12-18  253  	nvmet_fc_tgtport_put(tgtport);
20d5f3b830ab45 Daniel Wagner 2023-12-18  254  }
c53432030d8642 James Smart   2016-12-02  255  static void nvmet_fc_tgt_a_put(struct nvmet_fc_tgt_assoc *assoc);
c53432030d8642 James Smart   2016-12-02  256  static int nvmet_fc_tgt_a_get(struct nvmet_fc_tgt_assoc *assoc);
c53432030d8642 James Smart   2016-12-02  257  static void nvmet_fc_tgt_q_put(struct nvmet_fc_tgt_queue *queue);
c53432030d8642 James Smart   2016-12-02  258  static int nvmet_fc_tgt_q_get(struct nvmet_fc_tgt_queue *queue);
c53432030d8642 James Smart   2016-12-02 @259  static void nvmet_fc_tgtport_put(struct nvmet_fc_tgtport *tgtport);
c53432030d8642 James Smart   2016-12-02  260  static int nvmet_fc_tgtport_get(struct nvmet_fc_tgtport *tgtport);
0fb228d30b8d72 James Smart   2017-08-01  261  static void nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
0fb228d30b8d72 James Smart   2017-08-01  262  					struct nvmet_fc_fcp_iod *fod);
a96d4bd867129e James Smart   2017-10-27  263  static void nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc);
47bf3241064498 James Smart   2020-03-31  264  static void nvmet_fc_xmt_ls_rsp(struct nvmet_fc_tgtport *tgtport,
47bf3241064498 James Smart   2020-03-31  265  				struct nvmet_fc_ls_iod *iod);
c53432030d8642 James Smart   2016-12-02  266  
c53432030d8642 James Smart   2016-12-02  267
  
Hannes Reinecke Dec. 19, 2023, 11:43 a.m. UTC | #4
On 12/18/23 16:31, Daniel Wagner wrote:
> When deleting an association the shutdown path is deadlocking because we
> try to flush the nvmet_wq nested. Avoid this by deadlock by deferring
> the put work into its own work item.
> 
Maybe deleting the first 'by' ?

> Signed-off-by: Daniel Wagner <dwagner@suse.de>
> ---
>   drivers/nvme/target/fc.c | 14 ++++++++++++--
>   1 file changed, 12 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
> index 23d8779dc221..30ba4ede333f 100644
> --- a/drivers/nvme/target/fc.c
> +++ b/drivers/nvme/target/fc.c
> @@ -111,6 +111,8 @@ struct nvmet_fc_tgtport {
>   	struct nvmet_fc_port_entry	*pe;
>   	struct kref			ref;
>   	u32				max_sg_cnt;
> +
> +	struct work_struct		put_work;
>   };
>   
>   struct nvmet_fc_port_entry {
> @@ -243,6 +245,13 @@ static LIST_HEAD(nvmet_fc_portentry_list);
>   
>   static void nvmet_fc_handle_ls_rqst_work(struct work_struct *work);
>   static void nvmet_fc_fcp_rqst_op_defer_work(struct work_struct *work);
> +static void nvmet_fc_put_tgtport_work(struct work_struct *work)
> +{
> +	struct nvmet_fc_tgtport *tgtport =
> +		container_of(work, struct nvmet_fc_tgtport, put_work);
> +
> +	nvmet_fc_tgtport_put(tgtport);
> +}
>   static void nvmet_fc_tgt_a_put(struct nvmet_fc_tgt_assoc *assoc);
>   static int nvmet_fc_tgt_a_get(struct nvmet_fc_tgt_assoc *assoc);
>   static void nvmet_fc_tgt_q_put(struct nvmet_fc_tgt_queue *queue);
> @@ -359,7 +368,7 @@ __nvmet_fc_finish_ls_req(struct nvmet_fc_ls_req_op *lsop)
>   
>   	if (!lsop->req_queued) {
>   		spin_unlock_irqrestore(&tgtport->lock, flags);
> -		nvmet_fc_tgtport_put(tgtport);
> +		queue_work(nvmet_wq, &tgtport->put_work);
>   		return;
>   	}
>   
> @@ -373,7 +382,7 @@ __nvmet_fc_finish_ls_req(struct nvmet_fc_ls_req_op *lsop)
>   				  (lsreq->rqstlen + lsreq->rsplen),
>   				  DMA_BIDIRECTIONAL);
>   
> -	nvmet_fc_tgtport_put(tgtport);
> +	queue_work(nvmet_wq, &tgtport->put_work);
>   }
>   
>   static int
> @@ -1402,6 +1411,7 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo,
>   	kref_init(&newrec->ref);
>   	ida_init(&newrec->assoc_cnt);
>   	newrec->max_sg_cnt = template->max_sgl_segments;
> +	INIT_WORK(&newrec->put_work, nvmet_fc_put_tgtport_work);
>   
>   	ret = nvmet_fc_alloc_ls_iodlist(newrec);
>   	if (ret) {
Otherwise:

Reviewed-by: Hannes Reinecke <hare@suse.de>

Cheers,

Hannes
  

Patch

diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 23d8779dc221..30ba4ede333f 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -111,6 +111,8 @@  struct nvmet_fc_tgtport {
 	struct nvmet_fc_port_entry	*pe;
 	struct kref			ref;
 	u32				max_sg_cnt;
+
+	struct work_struct		put_work;
 };
 
 struct nvmet_fc_port_entry {
@@ -243,6 +245,13 @@  static LIST_HEAD(nvmet_fc_portentry_list);
 
 static void nvmet_fc_handle_ls_rqst_work(struct work_struct *work);
 static void nvmet_fc_fcp_rqst_op_defer_work(struct work_struct *work);
+static void nvmet_fc_put_tgtport_work(struct work_struct *work)
+{
+	struct nvmet_fc_tgtport *tgtport =
+		container_of(work, struct nvmet_fc_tgtport, put_work);
+
+	nvmet_fc_tgtport_put(tgtport);
+}
 static void nvmet_fc_tgt_a_put(struct nvmet_fc_tgt_assoc *assoc);
 static int nvmet_fc_tgt_a_get(struct nvmet_fc_tgt_assoc *assoc);
 static void nvmet_fc_tgt_q_put(struct nvmet_fc_tgt_queue *queue);
@@ -359,7 +368,7 @@  __nvmet_fc_finish_ls_req(struct nvmet_fc_ls_req_op *lsop)
 
 	if (!lsop->req_queued) {
 		spin_unlock_irqrestore(&tgtport->lock, flags);
-		nvmet_fc_tgtport_put(tgtport);
+		queue_work(nvmet_wq, &tgtport->put_work);
 		return;
 	}
 
@@ -373,7 +382,7 @@  __nvmet_fc_finish_ls_req(struct nvmet_fc_ls_req_op *lsop)
 				  (lsreq->rqstlen + lsreq->rsplen),
 				  DMA_BIDIRECTIONAL);
 
-	nvmet_fc_tgtport_put(tgtport);
+	queue_work(nvmet_wq, &tgtport->put_work);
 }
 
 static int
@@ -1402,6 +1411,7 @@  nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo,
 	kref_init(&newrec->ref);
 	ida_init(&newrec->assoc_cnt);
 	newrec->max_sg_cnt = template->max_sgl_segments;
+	INIT_WORK(&newrec->put_work, nvmet_fc_put_tgtport_work);
 
 	ret = nvmet_fc_alloc_ls_iodlist(newrec);
 	if (ret) {