NFSv4.1: handle memory allocation failure in nfs4_schedule_state_manager()

Message ID 20221111161033.899541-1-chenxiaosong2@huawei.com
State New
Headers
Series NFSv4.1: handle memory allocation failure in nfs4_schedule_state_manager() |

Commit Message

ChenXiaoSong Nov. 11, 2022, 4:10 p.m. UTC
  If memory allocation fail in nfs4_schedule_state_manager() when mount
NFSv4.1/NFSv4.2, nfs4_run_state_manager() will not be called, and current
construction state will never be marked as ready or failed,
nfs_wait_client_init_complete() will wait forever, as shown below:

  syscall(mount)
  ...
    nfs4_init_client
      nfs4_discover_server_trunking
        nfs41_discover_server_trunking
          nfs4_schedule_state_manager
            kthread_run /* nfs4_run_state_manager() will not be called */
              kthread_create
                kthread_create_on_node
                  __kthread_create_on_node
                    create = kmalloc() = NULL
                    return ERR_PTR(-ENOMEM)
          nfs_wait_client_init_complete /* wait forever */

Fix this by checking return value of nfs4_schedule_state_manager() which
can indicate whether kernel thread is created successful.

Signed-off-by: ChenXiaoSong <chenxiaosong2@huawei.com>
---
 fs/nfs/nfs4_fs.h   |  2 +-
 fs/nfs/nfs4state.c | 15 ++++++++++-----
 2 files changed, 11 insertions(+), 6 deletions(-)
  

Comments

kernel test robot Nov. 11, 2022, 5:23 p.m. UTC | #1
Hi ChenXiaoSong,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on trondmy-nfs/linux-next]
[also build test WARNING on linus/master v6.1-rc4 next-20221111]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/ChenXiaoSong/NFSv4-1-handle-memory-allocation-failure-in-nfs4_schedule_state_manager/20221111-231029
base:   git://git.linux-nfs.org/projects/trondmy/linux-nfs.git linux-next
patch link:    https://lore.kernel.org/r/20221111161033.899541-1-chenxiaosong2%40huawei.com
patch subject: [PATCH] NFSv4.1: handle memory allocation failure in nfs4_schedule_state_manager()
config: powerpc-allmodconfig
compiler: powerpc-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/8c841c9d48729579480bc452fdceff3dfdbf31c4
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review ChenXiaoSong/NFSv4-1-handle-memory-allocation-failure-in-nfs4_schedule_state_manager/20221111-231029
        git checkout 8c841c9d48729579480bc452fdceff3dfdbf31c4
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=powerpc SHELL=/bin/bash fs/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   In file included from include/linux/kernel.h:29,
                    from fs/nfs/nfs4state.c:41:
   fs/nfs/nfs4state.c: In function 'nfs4_schedule_state_manager':
>> include/linux/kern_levels.h:5:25: warning: format '%ld' expects argument of type 'long int', but argument 3 has type 'int' [-Wformat=]
       5 | #define KERN_SOH        "\001"          /* ASCII Start Of Header */
         |                         ^~~~~~
   include/linux/printk.h:429:25: note: in definition of macro 'printk_index_wrap'
     429 |                 _p_func(_fmt, ##__VA_ARGS__);                           \
         |                         ^~~~
   fs/nfs/nfs4state.c:1234:17: note: in expansion of macro 'printk'
    1234 |                 printk(KERN_ERR "%s: kthread_run: %ld\n",
         |                 ^~~~~~
   include/linux/kern_levels.h:11:25: note: in expansion of macro 'KERN_SOH'
      11 | #define KERN_ERR        KERN_SOH "3"    /* error conditions */
         |                         ^~~~~~~~
   fs/nfs/nfs4state.c:1234:24: note: in expansion of macro 'KERN_ERR'
    1234 |                 printk(KERN_ERR "%s: kthread_run: %ld\n",
         |                        ^~~~~~~~


vim +5 include/linux/kern_levels.h

314ba3520e513a Joe Perches 2012-07-30  4  
04d2c8c83d0e3a Joe Perches 2012-07-30 @5  #define KERN_SOH	"\001"		/* ASCII Start Of Header */
04d2c8c83d0e3a Joe Perches 2012-07-30  6  #define KERN_SOH_ASCII	'\001'
04d2c8c83d0e3a Joe Perches 2012-07-30  7
  
kernel test robot Nov. 11, 2022, 6:03 p.m. UTC | #2
Hi ChenXiaoSong,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on trondmy-nfs/linux-next]
[also build test WARNING on linus/master v6.1-rc4 next-20221111]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/ChenXiaoSong/NFSv4-1-handle-memory-allocation-failure-in-nfs4_schedule_state_manager/20221111-231029
base:   git://git.linux-nfs.org/projects/trondmy/linux-nfs.git linux-next
patch link:    https://lore.kernel.org/r/20221111161033.899541-1-chenxiaosong2%40huawei.com
patch subject: [PATCH] NFSv4.1: handle memory allocation failure in nfs4_schedule_state_manager()
config: arm-netwinder_defconfig
compiler: clang version 16.0.0 (https://github.com/llvm/llvm-project 463da45892e2d2a262277b91b96f5f8c05dc25d0)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install arm cross compiling tool for clang build
        # apt-get install binutils-arm-linux-gnueabi
        # https://github.com/intel-lab-lkp/linux/commit/8c841c9d48729579480bc452fdceff3dfdbf31c4
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review ChenXiaoSong/NFSv4-1-handle-memory-allocation-failure-in-nfs4_schedule_state_manager/20221111-231029
        git checkout 8c841c9d48729579480bc452fdceff3dfdbf31c4
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=arm SHELL=/bin/bash fs/nfs/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> fs/nfs/nfs4state.c:1235:14: warning: format specifies type 'long' but the argument has type 'int' [-Wformat]
                           __func__, ret);
                                     ^~~
   include/linux/printk.h:457:60: note: expanded from macro 'printk'
   #define printk(fmt, ...) printk_index_wrap(_printk, fmt, ##__VA_ARGS__)
                                                       ~~~    ^~~~~~~~~~~
   include/linux/printk.h:429:19: note: expanded from macro 'printk_index_wrap'
                   _p_func(_fmt, ##__VA_ARGS__);                           \
                           ~~~~    ^~~~~~~~~~~
   1 warning generated.


vim +1235 fs/nfs/nfs4state.c

  1201	
  1202	/*
  1203	 * Schedule the nfs_client asynchronous state management routine
  1204	 */
  1205	int nfs4_schedule_state_manager(struct nfs_client *clp)
  1206	{
  1207		struct task_struct *task;
  1208		char buf[INET6_ADDRSTRLEN + sizeof("-manager") + 1];
  1209		struct rpc_clnt *cl = clp->cl_rpcclient;
  1210		int ret = 0;
  1211	
  1212		while (cl != cl->cl_parent)
  1213			cl = cl->cl_parent;
  1214	
  1215		set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state);
  1216		if (test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state) != 0) {
  1217			wake_up_var(&clp->cl_state);
  1218			goto out;
  1219		}
  1220		set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state);
  1221		__module_get(THIS_MODULE);
  1222		refcount_inc(&clp->cl_count);
  1223	
  1224		/* The rcu_read_lock() is not strictly necessary, as the state
  1225		 * manager is the only thread that ever changes the rpc_xprt
  1226		 * after it's initialized.  At this point, we're single threaded. */
  1227		rcu_read_lock();
  1228		snprintf(buf, sizeof(buf), "%s-manager",
  1229				rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
  1230		rcu_read_unlock();
  1231		task = kthread_run(nfs4_run_state_manager, clp, "%s", buf);
  1232		if (IS_ERR(task)) {
  1233			ret = PTR_ERR(task);
  1234			printk(KERN_ERR "%s: kthread_run: %ld\n",
> 1235				__func__, ret);
  1236			nfs4_clear_state_manager_bit(clp);
  1237			clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state);
  1238			nfs_put_client(clp);
  1239			module_put(THIS_MODULE);
  1240		}
  1241	out:
  1242		return ret;
  1243	}
  1244
  

Patch

diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index cfef738d765e..74c6d1504010 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -502,7 +502,7 @@  extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *, struct nfs4_stat
 extern void nfs4_schedule_lease_recovery(struct nfs_client *);
 extern int nfs4_wait_clnt_recover(struct nfs_client *clp);
 extern int nfs4_client_recover_expired_lease(struct nfs_client *clp);
-extern void nfs4_schedule_state_manager(struct nfs_client *);
+extern int nfs4_schedule_state_manager(struct nfs_client *);
 extern void nfs4_schedule_path_down_recovery(struct nfs_client *clp);
 extern int nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *);
 extern int nfs4_schedule_migration_recovery(const struct nfs_server *);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index a2d2d5d1b088..127027f777c8 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -376,8 +376,9 @@  int nfs41_discover_server_trunking(struct nfs_client *clp,
 		else
 			set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
 	}
-	nfs4_schedule_state_manager(clp);
-	status = nfs_wait_client_init_complete(clp);
+	status = nfs4_schedule_state_manager(clp);
+	if (!status)
+		status = nfs_wait_client_init_complete(clp);
 	if (status < 0)
 		nfs_put_client(clp);
 	return status;
@@ -1201,11 +1202,12 @@  static void nfs4_clear_state_manager_bit(struct nfs_client *clp)
 /*
  * Schedule the nfs_client asynchronous state management routine
  */
-void nfs4_schedule_state_manager(struct nfs_client *clp)
+int nfs4_schedule_state_manager(struct nfs_client *clp)
 {
 	struct task_struct *task;
 	char buf[INET6_ADDRSTRLEN + sizeof("-manager") + 1];
 	struct rpc_clnt *cl = clp->cl_rpcclient;
+	int ret = 0;
 
 	while (cl != cl->cl_parent)
 		cl = cl->cl_parent;
@@ -1213,7 +1215,7 @@  void nfs4_schedule_state_manager(struct nfs_client *clp)
 	set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state);
 	if (test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state) != 0) {
 		wake_up_var(&clp->cl_state);
-		return;
+		goto out;
 	}
 	set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state);
 	__module_get(THIS_MODULE);
@@ -1228,13 +1230,16 @@  void nfs4_schedule_state_manager(struct nfs_client *clp)
 	rcu_read_unlock();
 	task = kthread_run(nfs4_run_state_manager, clp, "%s", buf);
 	if (IS_ERR(task)) {
+		ret = PTR_ERR(task);
 		printk(KERN_ERR "%s: kthread_run: %ld\n",
-			__func__, PTR_ERR(task));
+			__func__, ret);
 		nfs4_clear_state_manager_bit(clp);
 		clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state);
 		nfs_put_client(clp);
 		module_put(THIS_MODULE);
 	}
+out:
+	return ret;
 }
 
 /*