[v8] f2fs: support errors=remount-ro|continue|panic mountoption

Message ID 20230408123153.954480-1-chao@kernel.org
State New
Headers
Series [v8] f2fs: support errors=remount-ro|continue|panic mountoption |

Commit Message

Chao Yu April 8, 2023, 12:31 p.m. UTC
  This patch supports errors=remount-ro|continue|panic mount option,
by default it uses "remount-ro" mode.

Signed-off-by: Chao Yu <chao@kernel.org>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
---
v8:
- record stop reason synchronously if it's from ioctl shutdown interface
- fix typo in doc and commit message.
- use f2fs_hw_is_readonly() instead of bdev_read_only()
 Documentation/filesystems/f2fs.rst |   5 ++
 fs/f2fs/checkpoint.c               |   7 +-
 fs/f2fs/f2fs.h                     |  20 ++++-
 fs/f2fs/file.c                     |   5 --
 fs/f2fs/gc.c                       |   2 +-
 fs/f2fs/super.c                    | 134 ++++++++++++++++++++++++++---
 6 files changed, 149 insertions(+), 24 deletions(-)
  

Comments

Chao Yu April 19, 2023, 3:41 a.m. UTC | #1
Ping,

Jaegeuk, do you still suffer any issue w/ this version?

Thanks,

On 2023/4/8 20:31, Chao Yu wrote:
> This patch supports errors=remount-ro|continue|panic mount option,
> by default it uses "remount-ro" mode.
> 
> Signed-off-by: Chao Yu <chao@kernel.org>
> Signed-off-by: Yangtao Li <frank.li@vivo.com>
> ---
> v8:
> - record stop reason synchronously if it's from ioctl shutdown interface
> - fix typo in doc and commit message.
> - use f2fs_hw_is_readonly() instead of bdev_read_only()
>   Documentation/filesystems/f2fs.rst |   5 ++
>   fs/f2fs/checkpoint.c               |   7 +-
>   fs/f2fs/f2fs.h                     |  20 ++++-
>   fs/f2fs/file.c                     |   5 --
>   fs/f2fs/gc.c                       |   2 +-
>   fs/f2fs/super.c                    | 134 ++++++++++++++++++++++++++---
>   6 files changed, 149 insertions(+), 24 deletions(-)
> 
> diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst
> index 2055e72871fe..ba4c02fad647 100644
> --- a/Documentation/filesystems/f2fs.rst
> +++ b/Documentation/filesystems/f2fs.rst
> @@ -351,6 +351,11 @@ age_extent_cache	 Enable an age extent cache based on rb-tree. It records
>   			 data block update frequency of the extent per inode, in
>   			 order to provide better temperature hints for data block
>   			 allocation.
> +errors=%s		 Specify f2fs behavior on critical errors. This supports modes:
> +			 "panic", "continue" and "remount-ro", respectively, trigger
> +			 panic immediately, continue without doing anything, and remount
> +			 the partition in read-only mode. By default it uses "remount-ro"
> +			 mode.
>   ======================== ============================================================
>   
>   Debugfs Entries
> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> index 73ae4e85e70d..e6b266bb9ce0 100644
> --- a/fs/f2fs/checkpoint.c
> +++ b/fs/f2fs/checkpoint.c
> @@ -30,12 +30,9 @@ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io,
>   						unsigned char reason)
>   {
>   	f2fs_build_fault_attr(sbi, 0, 0);
> -	set_ckpt_flags(sbi, CP_ERROR_FLAG);
> -	if (!end_io) {
> +	if (!end_io)
>   		f2fs_flush_merged_writes(sbi);
> -
> -		f2fs_handle_stop(sbi, reason);
> -	}
> +	f2fs_handle_critical_error(sbi, reason, end_io);
>   }
>   
>   /*
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index ac4942f8e83e..7d9c5a65f974 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -162,6 +162,7 @@ struct f2fs_mount_info {
>   	int fs_mode;			/* fs mode: LFS or ADAPTIVE */
>   	int bggc_mode;			/* bggc mode: off, on or sync */
>   	int memory_mode;		/* memory mode */
> +	int errors;			/* errors parameter */
>   	int discard_unit;		/*
>   					 * discard command's offset/size should
>   					 * be aligned to this unit: block,
> @@ -1370,6 +1371,12 @@ enum {
>   	MEMORY_MODE_LOW,	/* memory mode for low memry devices */
>   };
>   
> +enum errors_option {
> +	MOUNT_ERRORS_READONLY,	/* remount fs ro on errors */
> +	MOUNT_ERRORS_CONTINUE,	/* continue on errors */
> +	MOUNT_ERRORS_PANIC,	/* panic on errors */
> +};
> +
>   static inline int f2fs_test_bit(unsigned int nr, char *addr);
>   static inline void f2fs_set_bit(unsigned int nr, char *addr);
>   static inline void f2fs_clear_bit(unsigned int nr, char *addr);
> @@ -1786,8 +1793,14 @@ struct f2fs_sb_info {
>   
>   	struct workqueue_struct *post_read_wq;	/* post read workqueue */
>   
> -	unsigned char errors[MAX_F2FS_ERRORS];	/* error flags */
> -	spinlock_t error_lock;			/* protect errors array */
> +	/*
> +	 * If we are in irq context, let's update error information into
> +	 * on-disk superblock in the work.
> +	 */
> +	struct work_struct s_error_work;
> +	unsigned char errors[MAX_F2FS_ERRORS];		/* error flags */
> +	unsigned char stop_reason[MAX_STOP_REASON];	/* stop reason */
> +	spinlock_t error_lock;			/* protect errors/stop_reason array */
>   	bool error_dirty;			/* errors of sb is dirty */
>   
>   	struct kmem_cache *inline_xattr_slab;	/* inline xattr entry */
> @@ -3522,8 +3535,9 @@ int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly);
>   int f2fs_quota_sync(struct super_block *sb, int type);
>   loff_t max_file_blocks(struct inode *inode);
>   void f2fs_quota_off_umount(struct super_block *sb);
> -void f2fs_handle_stop(struct f2fs_sb_info *sbi, unsigned char reason);
>   void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag);
> +void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason,
> +							bool irq_context);
>   void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error);
>   int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover);
>   int f2fs_sync_fs(struct super_block *sb, int sync);
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 14e9a20e68df..10743e864d57 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -2221,7 +2221,6 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
>   				ret = 0;
>   				f2fs_stop_checkpoint(sbi, false,
>   						STOP_CP_REASON_SHUTDOWN);
> -				set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>   				trace_f2fs_shutdown(sbi, in, ret);
>   			}
>   			return ret;
> @@ -2234,7 +2233,6 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
>   		if (ret)
>   			goto out;
>   		f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
> -		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>   		thaw_bdev(sb->s_bdev);
>   		break;
>   	case F2FS_GOING_DOWN_METASYNC:
> @@ -2243,16 +2241,13 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
>   		if (ret)
>   			goto out;
>   		f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
> -		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>   		break;
>   	case F2FS_GOING_DOWN_NOSYNC:
>   		f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
> -		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>   		break;
>   	case F2FS_GOING_DOWN_METAFLUSH:
>   		f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO);
>   		f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
> -		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>   		break;
>   	case F2FS_GOING_DOWN_NEED_FSCK:
>   		set_sbi_flag(sbi, SBI_NEED_FSCK);
> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> index f1d0dd9c5a6c..6dc2bc710ba8 100644
> --- a/fs/f2fs/gc.c
> +++ b/fs/f2fs/gc.c
> @@ -59,7 +59,7 @@ static int gc_thread_func(void *data)
>   		if (gc_th->gc_wake)
>   			gc_th->gc_wake = false;
>   
> -		if (try_to_freeze()) {
> +		if (try_to_freeze() || f2fs_readonly(sbi->sb)) {
>   			stat_other_skip_bggc_count(sbi);
>   			continue;
>   		}
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index 23574894e170..1573bf123197 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -164,6 +164,7 @@ enum {
>   	Opt_discard_unit,
>   	Opt_memory_mode,
>   	Opt_age_extent_cache,
> +	Opt_errors,
>   	Opt_err,
>   };
>   
> @@ -243,6 +244,7 @@ static match_table_t f2fs_tokens = {
>   	{Opt_discard_unit, "discard_unit=%s"},
>   	{Opt_memory_mode, "memory=%s"},
>   	{Opt_age_extent_cache, "age_extent_cache"},
> +	{Opt_errors, "errors=%s"},
>   	{Opt_err, NULL},
>   };
>   
> @@ -1268,6 +1270,25 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
>   		case Opt_age_extent_cache:
>   			set_opt(sbi, AGE_EXTENT_CACHE);
>   			break;
> +		case Opt_errors:
> +			name = match_strdup(&args[0]);
> +			if (!name)
> +				return -ENOMEM;
> +			if (!strcmp(name, "remount-ro")) {
> +				F2FS_OPTION(sbi).errors =
> +						MOUNT_ERRORS_READONLY;
> +			} else if (!strcmp(name, "continue")) {
> +				F2FS_OPTION(sbi).errors =
> +						MOUNT_ERRORS_CONTINUE;
> +			} else if (!strcmp(name, "panic")) {
> +				F2FS_OPTION(sbi).errors =
> +						MOUNT_ERRORS_PANIC;
> +			} else {
> +				kfree(name);
> +				return -EINVAL;
> +			}
> +			kfree(name);
> +			break;
>   		default:
>   			f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value",
>   				 p);
> @@ -1623,6 +1644,9 @@ static void f2fs_put_super(struct super_block *sb)
>   	f2fs_destroy_node_manager(sbi);
>   	f2fs_destroy_segment_manager(sbi);
>   
> +	/* flush s_error_work before sbi destroy */
> +	flush_work(&sbi->s_error_work);
> +
>   	f2fs_destroy_post_read_wq(sbi);
>   
>   	kvfree(sbi->ckpt);
> @@ -2053,6 +2077,13 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
>   	else if (F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_LOW)
>   		seq_printf(seq, ",memory=%s", "low");
>   
> +	if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_READONLY)
> +		seq_printf(seq, ",errors=%s", "remount-ro");
> +	else if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_CONTINUE)
> +		seq_printf(seq, ",errors=%s", "continue");
> +	else if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_PANIC)
> +		seq_printf(seq, ",errors=%s", "panic");
> +
>   	return 0;
>   }
>   
> @@ -2081,6 +2112,7 @@ static void default_options(struct f2fs_sb_info *sbi)
>   	}
>   	F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON;
>   	F2FS_OPTION(sbi).memory_mode = MEMORY_MODE_NORMAL;
> +	F2FS_OPTION(sbi).errors = MOUNT_ERRORS_READONLY;
>   
>   	sbi->sb->s_flags &= ~SB_INLINECRYPT;
>   
> @@ -2282,6 +2314,9 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
>   	if (err)
>   		goto restore_opts;
>   
> +	/* flush outstanding errors before changing fs state */
> +	flush_work(&sbi->s_error_work);
> +
>   	/*
>   	 * Previous and new state of filesystem is RO,
>   	 * so skip checking GC and FLUSH_MERGE conditions.
> @@ -3931,45 +3966,60 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
>   	return err;
>   }
>   
> -void f2fs_handle_stop(struct f2fs_sb_info *sbi, unsigned char reason)
> +static void save_stop_reason(struct f2fs_sb_info *sbi, unsigned char reason)
> +{
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&sbi->error_lock, flags);
> +	if (sbi->stop_reason[reason] < GENMASK(BITS_PER_BYTE - 1, 0))
> +		sbi->stop_reason[reason]++;
> +	spin_unlock_irqrestore(&sbi->error_lock, flags);
> +}
> +
> +static void f2fs_record_stop_reason(struct f2fs_sb_info *sbi)
>   {
>   	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
> +	unsigned long flags;
>   	int err;
>   
>   	f2fs_down_write(&sbi->sb_lock);
>   
> -	if (raw_super->s_stop_reason[reason] < GENMASK(BITS_PER_BYTE - 1, 0))
> -		raw_super->s_stop_reason[reason]++;
> +	spin_lock_irqsave(&sbi->error_lock, flags);
> +	memcpy(raw_super->s_stop_reason, sbi->stop_reason, MAX_STOP_REASON);
> +	spin_unlock_irqrestore(&sbi->error_lock, flags);
>   
>   	err = f2fs_commit_super(sbi, false);
> -	if (err)
> -		f2fs_err(sbi, "f2fs_commit_super fails to record reason:%u err:%d",
> -								reason, err);
> +
>   	f2fs_up_write(&sbi->sb_lock);
> +	if (err)
> +		f2fs_err(sbi, "f2fs_commit_super fails to record err:%d", err);
>   }
>   
>   void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag)
>   {
> -	spin_lock(&sbi->error_lock);
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&sbi->error_lock, flags);
>   	if (!test_bit(flag, (unsigned long *)sbi->errors)) {
>   		set_bit(flag, (unsigned long *)sbi->errors);
>   		sbi->error_dirty = true;
>   	}
> -	spin_unlock(&sbi->error_lock);
> +	spin_unlock_irqrestore(&sbi->error_lock, flags);
>   }
>   
>   static bool f2fs_update_errors(struct f2fs_sb_info *sbi)
>   {
> +	unsigned long flags;
>   	bool need_update = false;
>   
> -	spin_lock(&sbi->error_lock);
> +	spin_lock_irqsave(&sbi->error_lock, flags);
>   	if (sbi->error_dirty) {
>   		memcpy(F2FS_RAW_SUPER(sbi)->s_errors, sbi->errors,
>   							MAX_F2FS_ERRORS);
>   		sbi->error_dirty = false;
>   		need_update = true;
>   	}
> -	spin_unlock(&sbi->error_lock);
> +	spin_unlock_irqrestore(&sbi->error_lock, flags);
>   
>   	return need_update;
>   }
> @@ -3993,6 +4043,66 @@ void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error)
>   	f2fs_up_write(&sbi->sb_lock);
>   }
>   
> +static bool system_going_down(void)
> +{
> +	return system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF
> +		|| system_state == SYSTEM_RESTART;
> +}
> +
> +void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason,
> +							bool irq_context)
> +{
> +	struct super_block *sb = sbi->sb;
> +	bool shutdown = reason == STOP_CP_REASON_SHUTDOWN;
> +	bool continue_fs = !shutdown &&
> +			F2FS_OPTION(sbi).errors == MOUNT_ERRORS_CONTINUE;
> +
> +	set_ckpt_flags(sbi, CP_ERROR_FLAG);
> +
> +	if (!f2fs_hw_is_readonly(sbi)) {
> +		save_stop_reason(sbi, reason);
> +
> +		if (irq_context && !shutdown)
> +			schedule_work(&sbi->s_error_work);
> +		else
> +			f2fs_record_stop_reason(sbi);
> +	}
> +
> +	/*
> +	 * We force ERRORS_RO behavior when system is rebooting. Otherwise we
> +	 * could panic during 'reboot -f' as the underlying device got already
> +	 * disabled.
> +	 */
> +	if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_PANIC &&
> +				!shutdown && !system_going_down() &&
> +				!is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN))
> +		panic("F2FS-fs (device %s): panic forced after error\n",
> +							sb->s_id);
> +
> +	if (shutdown)
> +		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
> +
> +	/* continue filesystem operators if errors=continue */
> +	if (continue_fs || f2fs_readonly(sb))
> +		return;
> +
> +	f2fs_warn(sbi, "Remounting filesystem read-only");
> +	/*
> +	 * Make sure updated value of ->s_mount_flags will be visible before
> +	 * ->s_flags update
> +	 */
> +	smp_wmb();
> +	sb->s_flags |= SB_RDONLY;
> +}
> +
> +static void f2fs_record_error_work(struct work_struct *work)
> +{
> +	struct f2fs_sb_info *sbi = container_of(work,
> +					struct f2fs_sb_info, s_error_work);
> +
> +	f2fs_record_stop_reason(sbi);
> +}
> +
>   static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
>   {
>   	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
> @@ -4223,7 +4333,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>   	sb->s_fs_info = sbi;
>   	sbi->raw_super = raw_super;
>   
> +	INIT_WORK(&sbi->s_error_work, f2fs_record_error_work);
>   	memcpy(sbi->errors, raw_super->s_errors, MAX_F2FS_ERRORS);
> +	memcpy(sbi->stop_reason, raw_super->s_stop_reason, MAX_STOP_REASON);
>   
>   	/* precompute checksum seed for metadata */
>   	if (f2fs_sb_has_inode_chksum(sbi))
> @@ -4620,6 +4732,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>   	f2fs_destroy_segment_manager(sbi);
>   stop_ckpt_thread:
>   	f2fs_stop_ckpt_thread(sbi);
> +	/* flush s_error_work before sbi destroy */
> +	flush_work(&sbi->s_error_work);
>   	f2fs_destroy_post_read_wq(sbi);
>   free_devices:
>   	destroy_device_list(sbi);
  
Jaegeuk Kim April 19, 2023, 8:19 p.m. UTC | #2
On 04/08, Chao Yu wrote:
> This patch supports errors=remount-ro|continue|panic mount option,
> by default it uses "remount-ro" mode.
> 
> Signed-off-by: Chao Yu <chao@kernel.org>
> Signed-off-by: Yangtao Li <frank.li@vivo.com>
> ---
> v8:
> - record stop reason synchronously if it's from ioctl shutdown interface
> - fix typo in doc and commit message.
> - use f2fs_hw_is_readonly() instead of bdev_read_only()
>  Documentation/filesystems/f2fs.rst |   5 ++
>  fs/f2fs/checkpoint.c               |   7 +-
>  fs/f2fs/f2fs.h                     |  20 ++++-
>  fs/f2fs/file.c                     |   5 --
>  fs/f2fs/gc.c                       |   2 +-
>  fs/f2fs/super.c                    | 134 ++++++++++++++++++++++++++---
>  6 files changed, 149 insertions(+), 24 deletions(-)
> 
> diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst
> index 2055e72871fe..ba4c02fad647 100644
> --- a/Documentation/filesystems/f2fs.rst
> +++ b/Documentation/filesystems/f2fs.rst
> @@ -351,6 +351,11 @@ age_extent_cache	 Enable an age extent cache based on rb-tree. It records
>  			 data block update frequency of the extent per inode, in
>  			 order to provide better temperature hints for data block
>  			 allocation.
> +errors=%s		 Specify f2fs behavior on critical errors. This supports modes:
> +			 "panic", "continue" and "remount-ro", respectively, trigger
> +			 panic immediately, continue without doing anything, and remount
> +			 the partition in read-only mode. By default it uses "remount-ro"
> +			 mode.
>  ======================== ============================================================
>  
>  Debugfs Entries
> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> index 73ae4e85e70d..e6b266bb9ce0 100644
> --- a/fs/f2fs/checkpoint.c
> +++ b/fs/f2fs/checkpoint.c
> @@ -30,12 +30,9 @@ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io,
>  						unsigned char reason)
>  {
>  	f2fs_build_fault_attr(sbi, 0, 0);
> -	set_ckpt_flags(sbi, CP_ERROR_FLAG);
> -	if (!end_io) {
> +	if (!end_io)
>  		f2fs_flush_merged_writes(sbi);
> -
> -		f2fs_handle_stop(sbi, reason);
> -	}
> +	f2fs_handle_critical_error(sbi, reason, end_io);
>  }
>  
>  /*
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index ac4942f8e83e..7d9c5a65f974 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -162,6 +162,7 @@ struct f2fs_mount_info {
>  	int fs_mode;			/* fs mode: LFS or ADAPTIVE */
>  	int bggc_mode;			/* bggc mode: off, on or sync */
>  	int memory_mode;		/* memory mode */
> +	int errors;			/* errors parameter */
>  	int discard_unit;		/*
>  					 * discard command's offset/size should
>  					 * be aligned to this unit: block,
> @@ -1370,6 +1371,12 @@ enum {
>  	MEMORY_MODE_LOW,	/* memory mode for low memry devices */
>  };
>  
> +enum errors_option {
> +	MOUNT_ERRORS_READONLY,	/* remount fs ro on errors */
> +	MOUNT_ERRORS_CONTINUE,	/* continue on errors */
> +	MOUNT_ERRORS_PANIC,	/* panic on errors */
> +};
> +
>  static inline int f2fs_test_bit(unsigned int nr, char *addr);
>  static inline void f2fs_set_bit(unsigned int nr, char *addr);
>  static inline void f2fs_clear_bit(unsigned int nr, char *addr);
> @@ -1786,8 +1793,14 @@ struct f2fs_sb_info {
>  
>  	struct workqueue_struct *post_read_wq;	/* post read workqueue */
>  
> -	unsigned char errors[MAX_F2FS_ERRORS];	/* error flags */
> -	spinlock_t error_lock;			/* protect errors array */
> +	/*
> +	 * If we are in irq context, let's update error information into
> +	 * on-disk superblock in the work.
> +	 */
> +	struct work_struct s_error_work;
> +	unsigned char errors[MAX_F2FS_ERRORS];		/* error flags */
> +	unsigned char stop_reason[MAX_STOP_REASON];	/* stop reason */
> +	spinlock_t error_lock;			/* protect errors/stop_reason array */
>  	bool error_dirty;			/* errors of sb is dirty */
>  
>  	struct kmem_cache *inline_xattr_slab;	/* inline xattr entry */
> @@ -3522,8 +3535,9 @@ int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly);
>  int f2fs_quota_sync(struct super_block *sb, int type);
>  loff_t max_file_blocks(struct inode *inode);
>  void f2fs_quota_off_umount(struct super_block *sb);
> -void f2fs_handle_stop(struct f2fs_sb_info *sbi, unsigned char reason);
>  void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag);
> +void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason,
> +							bool irq_context);
>  void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error);
>  int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover);
>  int f2fs_sync_fs(struct super_block *sb, int sync);
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 14e9a20e68df..10743e864d57 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -2221,7 +2221,6 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
>  				ret = 0;
>  				f2fs_stop_checkpoint(sbi, false,
>  						STOP_CP_REASON_SHUTDOWN);
> -				set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>  				trace_f2fs_shutdown(sbi, in, ret);
>  			}
>  			return ret;
> @@ -2234,7 +2233,6 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
>  		if (ret)
>  			goto out;
>  		f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
> -		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>  		thaw_bdev(sb->s_bdev);
>  		break;
>  	case F2FS_GOING_DOWN_METASYNC:
> @@ -2243,16 +2241,13 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
>  		if (ret)
>  			goto out;
>  		f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
> -		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>  		break;
>  	case F2FS_GOING_DOWN_NOSYNC:
>  		f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
> -		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>  		break;
>  	case F2FS_GOING_DOWN_METAFLUSH:
>  		f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO);
>  		f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
> -		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>  		break;
>  	case F2FS_GOING_DOWN_NEED_FSCK:
>  		set_sbi_flag(sbi, SBI_NEED_FSCK);
> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> index f1d0dd9c5a6c..6dc2bc710ba8 100644
> --- a/fs/f2fs/gc.c
> +++ b/fs/f2fs/gc.c
> @@ -59,7 +59,7 @@ static int gc_thread_func(void *data)
>  		if (gc_th->gc_wake)
>  			gc_th->gc_wake = false;
>  
> -		if (try_to_freeze()) {
> +		if (try_to_freeze() || f2fs_readonly(sbi->sb)) {
>  			stat_other_skip_bggc_count(sbi);
>  			continue;
>  		}
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index 23574894e170..1573bf123197 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -164,6 +164,7 @@ enum {
>  	Opt_discard_unit,
>  	Opt_memory_mode,
>  	Opt_age_extent_cache,
> +	Opt_errors,
>  	Opt_err,
>  };
>  
> @@ -243,6 +244,7 @@ static match_table_t f2fs_tokens = {
>  	{Opt_discard_unit, "discard_unit=%s"},
>  	{Opt_memory_mode, "memory=%s"},
>  	{Opt_age_extent_cache, "age_extent_cache"},
> +	{Opt_errors, "errors=%s"},
>  	{Opt_err, NULL},
>  };
>  
> @@ -1268,6 +1270,25 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
>  		case Opt_age_extent_cache:
>  			set_opt(sbi, AGE_EXTENT_CACHE);
>  			break;
> +		case Opt_errors:
> +			name = match_strdup(&args[0]);
> +			if (!name)
> +				return -ENOMEM;
> +			if (!strcmp(name, "remount-ro")) {
> +				F2FS_OPTION(sbi).errors =
> +						MOUNT_ERRORS_READONLY;
> +			} else if (!strcmp(name, "continue")) {
> +				F2FS_OPTION(sbi).errors =
> +						MOUNT_ERRORS_CONTINUE;
> +			} else if (!strcmp(name, "panic")) {
> +				F2FS_OPTION(sbi).errors =
> +						MOUNT_ERRORS_PANIC;
> +			} else {
> +				kfree(name);
> +				return -EINVAL;
> +			}
> +			kfree(name);
> +			break;
>  		default:
>  			f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value",
>  				 p);
> @@ -1623,6 +1644,9 @@ static void f2fs_put_super(struct super_block *sb)
>  	f2fs_destroy_node_manager(sbi);
>  	f2fs_destroy_segment_manager(sbi);
>  
> +	/* flush s_error_work before sbi destroy */
> +	flush_work(&sbi->s_error_work);
> +
>  	f2fs_destroy_post_read_wq(sbi);
>  
>  	kvfree(sbi->ckpt);
> @@ -2053,6 +2077,13 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
>  	else if (F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_LOW)
>  		seq_printf(seq, ",memory=%s", "low");
>  
> +	if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_READONLY)
> +		seq_printf(seq, ",errors=%s", "remount-ro");
> +	else if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_CONTINUE)
> +		seq_printf(seq, ",errors=%s", "continue");
> +	else if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_PANIC)
> +		seq_printf(seq, ",errors=%s", "panic");
> +
>  	return 0;
>  }
>  
> @@ -2081,6 +2112,7 @@ static void default_options(struct f2fs_sb_info *sbi)
>  	}
>  	F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON;
>  	F2FS_OPTION(sbi).memory_mode = MEMORY_MODE_NORMAL;
> +	F2FS_OPTION(sbi).errors = MOUNT_ERRORS_READONLY;

Was the default MOUNT_ERRORS_CONTINUE?

>  
>  	sbi->sb->s_flags &= ~SB_INLINECRYPT;
>  
> @@ -2282,6 +2314,9 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
>  	if (err)
>  		goto restore_opts;
>  
> +	/* flush outstanding errors before changing fs state */
> +	flush_work(&sbi->s_error_work);
> +
>  	/*
>  	 * Previous and new state of filesystem is RO,
>  	 * so skip checking GC and FLUSH_MERGE conditions.
> @@ -3931,45 +3966,60 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
>  	return err;
>  }
>  
> -void f2fs_handle_stop(struct f2fs_sb_info *sbi, unsigned char reason)
> +static void save_stop_reason(struct f2fs_sb_info *sbi, unsigned char reason)
> +{
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&sbi->error_lock, flags);
> +	if (sbi->stop_reason[reason] < GENMASK(BITS_PER_BYTE - 1, 0))
> +		sbi->stop_reason[reason]++;
> +	spin_unlock_irqrestore(&sbi->error_lock, flags);
> +}
> +
> +static void f2fs_record_stop_reason(struct f2fs_sb_info *sbi)
>  {
>  	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
> +	unsigned long flags;
>  	int err;
>  
>  	f2fs_down_write(&sbi->sb_lock);
>  
> -	if (raw_super->s_stop_reason[reason] < GENMASK(BITS_PER_BYTE - 1, 0))
> -		raw_super->s_stop_reason[reason]++;
> +	spin_lock_irqsave(&sbi->error_lock, flags);
> +	memcpy(raw_super->s_stop_reason, sbi->stop_reason, MAX_STOP_REASON);
> +	spin_unlock_irqrestore(&sbi->error_lock, flags);
>  
>  	err = f2fs_commit_super(sbi, false);
> -	if (err)
> -		f2fs_err(sbi, "f2fs_commit_super fails to record reason:%u err:%d",
> -								reason, err);
> +
>  	f2fs_up_write(&sbi->sb_lock);
> +	if (err)
> +		f2fs_err(sbi, "f2fs_commit_super fails to record err:%d", err);
>  }
>  
>  void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag)
>  {
> -	spin_lock(&sbi->error_lock);
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&sbi->error_lock, flags);
>  	if (!test_bit(flag, (unsigned long *)sbi->errors)) {
>  		set_bit(flag, (unsigned long *)sbi->errors);
>  		sbi->error_dirty = true;
>  	}
> -	spin_unlock(&sbi->error_lock);
> +	spin_unlock_irqrestore(&sbi->error_lock, flags);
>  }
>  
>  static bool f2fs_update_errors(struct f2fs_sb_info *sbi)
>  {
> +	unsigned long flags;
>  	bool need_update = false;
>  
> -	spin_lock(&sbi->error_lock);
> +	spin_lock_irqsave(&sbi->error_lock, flags);
>  	if (sbi->error_dirty) {
>  		memcpy(F2FS_RAW_SUPER(sbi)->s_errors, sbi->errors,
>  							MAX_F2FS_ERRORS);
>  		sbi->error_dirty = false;
>  		need_update = true;
>  	}
> -	spin_unlock(&sbi->error_lock);
> +	spin_unlock_irqrestore(&sbi->error_lock, flags);
>  
>  	return need_update;
>  }
> @@ -3993,6 +4043,66 @@ void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error)
>  	f2fs_up_write(&sbi->sb_lock);
>  }
>  
> +static bool system_going_down(void)
> +{
> +	return system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF
> +		|| system_state == SYSTEM_RESTART;
> +}
> +
> +void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason,
> +							bool irq_context)
> +{
> +	struct super_block *sb = sbi->sb;
> +	bool shutdown = reason == STOP_CP_REASON_SHUTDOWN;
> +	bool continue_fs = !shutdown &&
> +			F2FS_OPTION(sbi).errors == MOUNT_ERRORS_CONTINUE;
> +
> +	set_ckpt_flags(sbi, CP_ERROR_FLAG);
> +
> +	if (!f2fs_hw_is_readonly(sbi)) {
> +		save_stop_reason(sbi, reason);
> +
> +		if (irq_context && !shutdown)
> +			schedule_work(&sbi->s_error_work);
> +		else
> +			f2fs_record_stop_reason(sbi);
> +	}
> +
> +	/*
> +	 * We force ERRORS_RO behavior when system is rebooting. Otherwise we
> +	 * could panic during 'reboot -f' as the underlying device got already
> +	 * disabled.
> +	 */
> +	if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_PANIC &&
> +				!shutdown && !system_going_down() &&
> +				!is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN))
> +		panic("F2FS-fs (device %s): panic forced after error\n",
> +							sb->s_id);
> +
> +	if (shutdown)
> +		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
> +
> +	/* continue filesystem operators if errors=continue */
> +	if (continue_fs || f2fs_readonly(sb))
> +		return;
> +
> +	f2fs_warn(sbi, "Remounting filesystem read-only");
> +	/*
> +	 * Make sure updated value of ->s_mount_flags will be visible before
> +	 * ->s_flags update
> +	 */
> +	smp_wmb();
> +	sb->s_flags |= SB_RDONLY;
> +}
> +
> +static void f2fs_record_error_work(struct work_struct *work)
> +{
> +	struct f2fs_sb_info *sbi = container_of(work,
> +					struct f2fs_sb_info, s_error_work);
> +
> +	f2fs_record_stop_reason(sbi);
> +}
> +
>  static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
>  {
>  	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
> @@ -4223,7 +4333,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>  	sb->s_fs_info = sbi;
>  	sbi->raw_super = raw_super;
>  
> +	INIT_WORK(&sbi->s_error_work, f2fs_record_error_work);
>  	memcpy(sbi->errors, raw_super->s_errors, MAX_F2FS_ERRORS);
> +	memcpy(sbi->stop_reason, raw_super->s_stop_reason, MAX_STOP_REASON);
>  
>  	/* precompute checksum seed for metadata */
>  	if (f2fs_sb_has_inode_chksum(sbi))
> @@ -4620,6 +4732,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>  	f2fs_destroy_segment_manager(sbi);
>  stop_ckpt_thread:
>  	f2fs_stop_ckpt_thread(sbi);
> +	/* flush s_error_work before sbi destroy */
> +	flush_work(&sbi->s_error_work);
>  	f2fs_destroy_post_read_wq(sbi);
>  free_devices:
>  	destroy_device_list(sbi);
> -- 
> 2.36.1
  
Chao Yu April 20, 2023, 8:33 a.m. UTC | #3
On 2023/4/20 4:19, Jaegeuk Kim wrote:
> On 04/08, Chao Yu wrote:
>> This patch supports errors=remount-ro|continue|panic mount option,
>> by default it uses "remount-ro" mode.
>>
>> Signed-off-by: Chao Yu <chao@kernel.org>
>> Signed-off-by: Yangtao Li <frank.li@vivo.com>
>> ---
>> v8:
>> - record stop reason synchronously if it's from ioctl shutdown interface
>> - fix typo in doc and commit message.
>> - use f2fs_hw_is_readonly() instead of bdev_read_only()
>>   Documentation/filesystems/f2fs.rst |   5 ++
>>   fs/f2fs/checkpoint.c               |   7 +-
>>   fs/f2fs/f2fs.h                     |  20 ++++-
>>   fs/f2fs/file.c                     |   5 --
>>   fs/f2fs/gc.c                       |   2 +-
>>   fs/f2fs/super.c                    | 134 ++++++++++++++++++++++++++---
>>   6 files changed, 149 insertions(+), 24 deletions(-)
>>
>> diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst
>> index 2055e72871fe..ba4c02fad647 100644
>> --- a/Documentation/filesystems/f2fs.rst
>> +++ b/Documentation/filesystems/f2fs.rst
>> @@ -351,6 +351,11 @@ age_extent_cache	 Enable an age extent cache based on rb-tree. It records
>>   			 data block update frequency of the extent per inode, in
>>   			 order to provide better temperature hints for data block
>>   			 allocation.
>> +errors=%s		 Specify f2fs behavior on critical errors. This supports modes:
>> +			 "panic", "continue" and "remount-ro", respectively, trigger
>> +			 panic immediately, continue without doing anything, and remount
>> +			 the partition in read-only mode. By default it uses "remount-ro"
>> +			 mode.
>>   ======================== ============================================================
>>   
>>   Debugfs Entries
>> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
>> index 73ae4e85e70d..e6b266bb9ce0 100644
>> --- a/fs/f2fs/checkpoint.c
>> +++ b/fs/f2fs/checkpoint.c
>> @@ -30,12 +30,9 @@ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io,
>>   						unsigned char reason)
>>   {
>>   	f2fs_build_fault_attr(sbi, 0, 0);
>> -	set_ckpt_flags(sbi, CP_ERROR_FLAG);
>> -	if (!end_io) {
>> +	if (!end_io)
>>   		f2fs_flush_merged_writes(sbi);
>> -
>> -		f2fs_handle_stop(sbi, reason);
>> -	}
>> +	f2fs_handle_critical_error(sbi, reason, end_io);
>>   }
>>   
>>   /*
>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>> index ac4942f8e83e..7d9c5a65f974 100644
>> --- a/fs/f2fs/f2fs.h
>> +++ b/fs/f2fs/f2fs.h
>> @@ -162,6 +162,7 @@ struct f2fs_mount_info {
>>   	int fs_mode;			/* fs mode: LFS or ADAPTIVE */
>>   	int bggc_mode;			/* bggc mode: off, on or sync */
>>   	int memory_mode;		/* memory mode */
>> +	int errors;			/* errors parameter */
>>   	int discard_unit;		/*
>>   					 * discard command's offset/size should
>>   					 * be aligned to this unit: block,
>> @@ -1370,6 +1371,12 @@ enum {
>>   	MEMORY_MODE_LOW,	/* memory mode for low memry devices */
>>   };
>>   
>> +enum errors_option {
>> +	MOUNT_ERRORS_READONLY,	/* remount fs ro on errors */
>> +	MOUNT_ERRORS_CONTINUE,	/* continue on errors */
>> +	MOUNT_ERRORS_PANIC,	/* panic on errors */
>> +};
>> +
>>   static inline int f2fs_test_bit(unsigned int nr, char *addr);
>>   static inline void f2fs_set_bit(unsigned int nr, char *addr);
>>   static inline void f2fs_clear_bit(unsigned int nr, char *addr);
>> @@ -1786,8 +1793,14 @@ struct f2fs_sb_info {
>>   
>>   	struct workqueue_struct *post_read_wq;	/* post read workqueue */
>>   
>> -	unsigned char errors[MAX_F2FS_ERRORS];	/* error flags */
>> -	spinlock_t error_lock;			/* protect errors array */
>> +	/*
>> +	 * If we are in irq context, let's update error information into
>> +	 * on-disk superblock in the work.
>> +	 */
>> +	struct work_struct s_error_work;
>> +	unsigned char errors[MAX_F2FS_ERRORS];		/* error flags */
>> +	unsigned char stop_reason[MAX_STOP_REASON];	/* stop reason */
>> +	spinlock_t error_lock;			/* protect errors/stop_reason array */
>>   	bool error_dirty;			/* errors of sb is dirty */
>>   
>>   	struct kmem_cache *inline_xattr_slab;	/* inline xattr entry */
>> @@ -3522,8 +3535,9 @@ int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly);
>>   int f2fs_quota_sync(struct super_block *sb, int type);
>>   loff_t max_file_blocks(struct inode *inode);
>>   void f2fs_quota_off_umount(struct super_block *sb);
>> -void f2fs_handle_stop(struct f2fs_sb_info *sbi, unsigned char reason);
>>   void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag);
>> +void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason,
>> +							bool irq_context);
>>   void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error);
>>   int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover);
>>   int f2fs_sync_fs(struct super_block *sb, int sync);
>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
>> index 14e9a20e68df..10743e864d57 100644
>> --- a/fs/f2fs/file.c
>> +++ b/fs/f2fs/file.c
>> @@ -2221,7 +2221,6 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
>>   				ret = 0;
>>   				f2fs_stop_checkpoint(sbi, false,
>>   						STOP_CP_REASON_SHUTDOWN);
>> -				set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>>   				trace_f2fs_shutdown(sbi, in, ret);
>>   			}
>>   			return ret;
>> @@ -2234,7 +2233,6 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
>>   		if (ret)
>>   			goto out;
>>   		f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
>> -		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>>   		thaw_bdev(sb->s_bdev);
>>   		break;
>>   	case F2FS_GOING_DOWN_METASYNC:
>> @@ -2243,16 +2241,13 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
>>   		if (ret)
>>   			goto out;
>>   		f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
>> -		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>>   		break;
>>   	case F2FS_GOING_DOWN_NOSYNC:
>>   		f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
>> -		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>>   		break;
>>   	case F2FS_GOING_DOWN_METAFLUSH:
>>   		f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO);
>>   		f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
>> -		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>>   		break;
>>   	case F2FS_GOING_DOWN_NEED_FSCK:
>>   		set_sbi_flag(sbi, SBI_NEED_FSCK);
>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
>> index f1d0dd9c5a6c..6dc2bc710ba8 100644
>> --- a/fs/f2fs/gc.c
>> +++ b/fs/f2fs/gc.c
>> @@ -59,7 +59,7 @@ static int gc_thread_func(void *data)
>>   		if (gc_th->gc_wake)
>>   			gc_th->gc_wake = false;
>>   
>> -		if (try_to_freeze()) {
>> +		if (try_to_freeze() || f2fs_readonly(sbi->sb)) {
>>   			stat_other_skip_bggc_count(sbi);
>>   			continue;
>>   		}
>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>> index 23574894e170..1573bf123197 100644
>> --- a/fs/f2fs/super.c
>> +++ b/fs/f2fs/super.c
>> @@ -164,6 +164,7 @@ enum {
>>   	Opt_discard_unit,
>>   	Opt_memory_mode,
>>   	Opt_age_extent_cache,
>> +	Opt_errors,
>>   	Opt_err,
>>   };
>>   
>> @@ -243,6 +244,7 @@ static match_table_t f2fs_tokens = {
>>   	{Opt_discard_unit, "discard_unit=%s"},
>>   	{Opt_memory_mode, "memory=%s"},
>>   	{Opt_age_extent_cache, "age_extent_cache"},
>> +	{Opt_errors, "errors=%s"},
>>   	{Opt_err, NULL},
>>   };
>>   
>> @@ -1268,6 +1270,25 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
>>   		case Opt_age_extent_cache:
>>   			set_opt(sbi, AGE_EXTENT_CACHE);
>>   			break;
>> +		case Opt_errors:
>> +			name = match_strdup(&args[0]);
>> +			if (!name)
>> +				return -ENOMEM;
>> +			if (!strcmp(name, "remount-ro")) {
>> +				F2FS_OPTION(sbi).errors =
>> +						MOUNT_ERRORS_READONLY;
>> +			} else if (!strcmp(name, "continue")) {
>> +				F2FS_OPTION(sbi).errors =
>> +						MOUNT_ERRORS_CONTINUE;
>> +			} else if (!strcmp(name, "panic")) {
>> +				F2FS_OPTION(sbi).errors =
>> +						MOUNT_ERRORS_PANIC;
>> +			} else {
>> +				kfree(name);
>> +				return -EINVAL;
>> +			}
>> +			kfree(name);
>> +			break;
>>   		default:
>>   			f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value",
>>   				 p);
>> @@ -1623,6 +1644,9 @@ static void f2fs_put_super(struct super_block *sb)
>>   	f2fs_destroy_node_manager(sbi);
>>   	f2fs_destroy_segment_manager(sbi);
>>   
>> +	/* flush s_error_work before sbi destroy */
>> +	flush_work(&sbi->s_error_work);
>> +
>>   	f2fs_destroy_post_read_wq(sbi);
>>   
>>   	kvfree(sbi->ckpt);
>> @@ -2053,6 +2077,13 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
>>   	else if (F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_LOW)
>>   		seq_printf(seq, ",memory=%s", "low");
>>   
>> +	if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_READONLY)
>> +		seq_printf(seq, ",errors=%s", "remount-ro");
>> +	else if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_CONTINUE)
>> +		seq_printf(seq, ",errors=%s", "continue");
>> +	else if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_PANIC)
>> +		seq_printf(seq, ",errors=%s", "panic");
>> +
>>   	return 0;
>>   }
>>   
>> @@ -2081,6 +2112,7 @@ static void default_options(struct f2fs_sb_info *sbi)
>>   	}
>>   	F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON;
>>   	F2FS_OPTION(sbi).memory_mode = MEMORY_MODE_NORMAL;
>> +	F2FS_OPTION(sbi).errors = MOUNT_ERRORS_READONLY;
> 
> Was the default MOUNT_ERRORS_CONTINUE?

Okay, let me revise in v9.

Please note that main difference between continue mode and readonly mode once
cp_error is set:

		continue mode		readonly mode
access ops:	Normal			Normal
modify ops:	-EIO			-EROFS
mount option	rw			ro

Thanks,

> 
>>   
>>   	sbi->sb->s_flags &= ~SB_INLINECRYPT;
>>   
>> @@ -2282,6 +2314,9 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
>>   	if (err)
>>   		goto restore_opts;
>>   
>> +	/* flush outstanding errors before changing fs state */
>> +	flush_work(&sbi->s_error_work);
>> +
>>   	/*
>>   	 * Previous and new state of filesystem is RO,
>>   	 * so skip checking GC and FLUSH_MERGE conditions.
>> @@ -3931,45 +3966,60 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
>>   	return err;
>>   }
>>   
>> -void f2fs_handle_stop(struct f2fs_sb_info *sbi, unsigned char reason)
>> +static void save_stop_reason(struct f2fs_sb_info *sbi, unsigned char reason)
>> +{
>> +	unsigned long flags;
>> +
>> +	spin_lock_irqsave(&sbi->error_lock, flags);
>> +	if (sbi->stop_reason[reason] < GENMASK(BITS_PER_BYTE - 1, 0))
>> +		sbi->stop_reason[reason]++;
>> +	spin_unlock_irqrestore(&sbi->error_lock, flags);
>> +}
>> +
>> +static void f2fs_record_stop_reason(struct f2fs_sb_info *sbi)
>>   {
>>   	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
>> +	unsigned long flags;
>>   	int err;
>>   
>>   	f2fs_down_write(&sbi->sb_lock);
>>   
>> -	if (raw_super->s_stop_reason[reason] < GENMASK(BITS_PER_BYTE - 1, 0))
>> -		raw_super->s_stop_reason[reason]++;
>> +	spin_lock_irqsave(&sbi->error_lock, flags);
>> +	memcpy(raw_super->s_stop_reason, sbi->stop_reason, MAX_STOP_REASON);
>> +	spin_unlock_irqrestore(&sbi->error_lock, flags);
>>   
>>   	err = f2fs_commit_super(sbi, false);
>> -	if (err)
>> -		f2fs_err(sbi, "f2fs_commit_super fails to record reason:%u err:%d",
>> -								reason, err);
>> +
>>   	f2fs_up_write(&sbi->sb_lock);
>> +	if (err)
>> +		f2fs_err(sbi, "f2fs_commit_super fails to record err:%d", err);
>>   }
>>   
>>   void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag)
>>   {
>> -	spin_lock(&sbi->error_lock);
>> +	unsigned long flags;
>> +
>> +	spin_lock_irqsave(&sbi->error_lock, flags);
>>   	if (!test_bit(flag, (unsigned long *)sbi->errors)) {
>>   		set_bit(flag, (unsigned long *)sbi->errors);
>>   		sbi->error_dirty = true;
>>   	}
>> -	spin_unlock(&sbi->error_lock);
>> +	spin_unlock_irqrestore(&sbi->error_lock, flags);
>>   }
>>   
>>   static bool f2fs_update_errors(struct f2fs_sb_info *sbi)
>>   {
>> +	unsigned long flags;
>>   	bool need_update = false;
>>   
>> -	spin_lock(&sbi->error_lock);
>> +	spin_lock_irqsave(&sbi->error_lock, flags);
>>   	if (sbi->error_dirty) {
>>   		memcpy(F2FS_RAW_SUPER(sbi)->s_errors, sbi->errors,
>>   							MAX_F2FS_ERRORS);
>>   		sbi->error_dirty = false;
>>   		need_update = true;
>>   	}
>> -	spin_unlock(&sbi->error_lock);
>> +	spin_unlock_irqrestore(&sbi->error_lock, flags);
>>   
>>   	return need_update;
>>   }
>> @@ -3993,6 +4043,66 @@ void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error)
>>   	f2fs_up_write(&sbi->sb_lock);
>>   }
>>   
>> +static bool system_going_down(void)
>> +{
>> +	return system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF
>> +		|| system_state == SYSTEM_RESTART;
>> +}
>> +
>> +void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason,
>> +							bool irq_context)
>> +{
>> +	struct super_block *sb = sbi->sb;
>> +	bool shutdown = reason == STOP_CP_REASON_SHUTDOWN;
>> +	bool continue_fs = !shutdown &&
>> +			F2FS_OPTION(sbi).errors == MOUNT_ERRORS_CONTINUE;
>> +
>> +	set_ckpt_flags(sbi, CP_ERROR_FLAG);
>> +
>> +	if (!f2fs_hw_is_readonly(sbi)) {
>> +		save_stop_reason(sbi, reason);
>> +
>> +		if (irq_context && !shutdown)
>> +			schedule_work(&sbi->s_error_work);
>> +		else
>> +			f2fs_record_stop_reason(sbi);
>> +	}
>> +
>> +	/*
>> +	 * We force ERRORS_RO behavior when system is rebooting. Otherwise we
>> +	 * could panic during 'reboot -f' as the underlying device got already
>> +	 * disabled.
>> +	 */
>> +	if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_PANIC &&
>> +				!shutdown && !system_going_down() &&
>> +				!is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN))
>> +		panic("F2FS-fs (device %s): panic forced after error\n",
>> +							sb->s_id);
>> +
>> +	if (shutdown)
>> +		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>> +
>> +	/* continue filesystem operators if errors=continue */
>> +	if (continue_fs || f2fs_readonly(sb))
>> +		return;
>> +
>> +	f2fs_warn(sbi, "Remounting filesystem read-only");
>> +	/*
>> +	 * Make sure updated value of ->s_mount_flags will be visible before
>> +	 * ->s_flags update
>> +	 */
>> +	smp_wmb();
>> +	sb->s_flags |= SB_RDONLY;
>> +}
>> +
>> +static void f2fs_record_error_work(struct work_struct *work)
>> +{
>> +	struct f2fs_sb_info *sbi = container_of(work,
>> +					struct f2fs_sb_info, s_error_work);
>> +
>> +	f2fs_record_stop_reason(sbi);
>> +}
>> +
>>   static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
>>   {
>>   	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
>> @@ -4223,7 +4333,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>>   	sb->s_fs_info = sbi;
>>   	sbi->raw_super = raw_super;
>>   
>> +	INIT_WORK(&sbi->s_error_work, f2fs_record_error_work);
>>   	memcpy(sbi->errors, raw_super->s_errors, MAX_F2FS_ERRORS);
>> +	memcpy(sbi->stop_reason, raw_super->s_stop_reason, MAX_STOP_REASON);
>>   
>>   	/* precompute checksum seed for metadata */
>>   	if (f2fs_sb_has_inode_chksum(sbi))
>> @@ -4620,6 +4732,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>>   	f2fs_destroy_segment_manager(sbi);
>>   stop_ckpt_thread:
>>   	f2fs_stop_ckpt_thread(sbi);
>> +	/* flush s_error_work before sbi destroy */
>> +	flush_work(&sbi->s_error_work);
>>   	f2fs_destroy_post_read_wq(sbi);
>>   free_devices:
>>   	destroy_device_list(sbi);
>> -- 
>> 2.36.1
  
Jaegeuk Kim April 20, 2023, 4:34 p.m. UTC | #4
On 04/20, Chao Yu wrote:
> On 2023/4/20 4:19, Jaegeuk Kim wrote:
> > On 04/08, Chao Yu wrote:
> > > This patch supports errors=remount-ro|continue|panic mount option,
> > > by default it uses "remount-ro" mode.
> > > 
> > > Signed-off-by: Chao Yu <chao@kernel.org>
> > > Signed-off-by: Yangtao Li <frank.li@vivo.com>
> > > ---
> > > v8:
> > > - record stop reason synchronously if it's from ioctl shutdown interface
> > > - fix typo in doc and commit message.
> > > - use f2fs_hw_is_readonly() instead of bdev_read_only()
> > >   Documentation/filesystems/f2fs.rst |   5 ++
> > >   fs/f2fs/checkpoint.c               |   7 +-
> > >   fs/f2fs/f2fs.h                     |  20 ++++-
> > >   fs/f2fs/file.c                     |   5 --
> > >   fs/f2fs/gc.c                       |   2 +-
> > >   fs/f2fs/super.c                    | 134 ++++++++++++++++++++++++++---
> > >   6 files changed, 149 insertions(+), 24 deletions(-)
> > > 
> > > diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst
> > > index 2055e72871fe..ba4c02fad647 100644
> > > --- a/Documentation/filesystems/f2fs.rst
> > > +++ b/Documentation/filesystems/f2fs.rst
> > > @@ -351,6 +351,11 @@ age_extent_cache	 Enable an age extent cache based on rb-tree. It records
> > >   			 data block update frequency of the extent per inode, in
> > >   			 order to provide better temperature hints for data block
> > >   			 allocation.
> > > +errors=%s		 Specify f2fs behavior on critical errors. This supports modes:
> > > +			 "panic", "continue" and "remount-ro", respectively, trigger
> > > +			 panic immediately, continue without doing anything, and remount
> > > +			 the partition in read-only mode. By default it uses "remount-ro"
> > > +			 mode.
> > >   ======================== ============================================================
> > >   Debugfs Entries
> > > diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> > > index 73ae4e85e70d..e6b266bb9ce0 100644
> > > --- a/fs/f2fs/checkpoint.c
> > > +++ b/fs/f2fs/checkpoint.c
> > > @@ -30,12 +30,9 @@ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io,
> > >   						unsigned char reason)
> > >   {
> > >   	f2fs_build_fault_attr(sbi, 0, 0);
> > > -	set_ckpt_flags(sbi, CP_ERROR_FLAG);
> > > -	if (!end_io) {
> > > +	if (!end_io)
> > >   		f2fs_flush_merged_writes(sbi);
> > > -
> > > -		f2fs_handle_stop(sbi, reason);
> > > -	}
> > > +	f2fs_handle_critical_error(sbi, reason, end_io);
> > >   }
> > >   /*
> > > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > > index ac4942f8e83e..7d9c5a65f974 100644
> > > --- a/fs/f2fs/f2fs.h
> > > +++ b/fs/f2fs/f2fs.h
> > > @@ -162,6 +162,7 @@ struct f2fs_mount_info {
> > >   	int fs_mode;			/* fs mode: LFS or ADAPTIVE */
> > >   	int bggc_mode;			/* bggc mode: off, on or sync */
> > >   	int memory_mode;		/* memory mode */
> > > +	int errors;			/* errors parameter */
> > >   	int discard_unit;		/*
> > >   					 * discard command's offset/size should
> > >   					 * be aligned to this unit: block,
> > > @@ -1370,6 +1371,12 @@ enum {
> > >   	MEMORY_MODE_LOW,	/* memory mode for low memry devices */
> > >   };
> > > +enum errors_option {
> > > +	MOUNT_ERRORS_READONLY,	/* remount fs ro on errors */
> > > +	MOUNT_ERRORS_CONTINUE,	/* continue on errors */
> > > +	MOUNT_ERRORS_PANIC,	/* panic on errors */
> > > +};
> > > +
> > >   static inline int f2fs_test_bit(unsigned int nr, char *addr);
> > >   static inline void f2fs_set_bit(unsigned int nr, char *addr);
> > >   static inline void f2fs_clear_bit(unsigned int nr, char *addr);
> > > @@ -1786,8 +1793,14 @@ struct f2fs_sb_info {
> > >   	struct workqueue_struct *post_read_wq;	/* post read workqueue */
> > > -	unsigned char errors[MAX_F2FS_ERRORS];	/* error flags */
> > > -	spinlock_t error_lock;			/* protect errors array */
> > > +	/*
> > > +	 * If we are in irq context, let's update error information into
> > > +	 * on-disk superblock in the work.
> > > +	 */
> > > +	struct work_struct s_error_work;
> > > +	unsigned char errors[MAX_F2FS_ERRORS];		/* error flags */
> > > +	unsigned char stop_reason[MAX_STOP_REASON];	/* stop reason */
> > > +	spinlock_t error_lock;			/* protect errors/stop_reason array */
> > >   	bool error_dirty;			/* errors of sb is dirty */
> > >   	struct kmem_cache *inline_xattr_slab;	/* inline xattr entry */
> > > @@ -3522,8 +3535,9 @@ int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly);
> > >   int f2fs_quota_sync(struct super_block *sb, int type);
> > >   loff_t max_file_blocks(struct inode *inode);
> > >   void f2fs_quota_off_umount(struct super_block *sb);
> > > -void f2fs_handle_stop(struct f2fs_sb_info *sbi, unsigned char reason);
> > >   void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag);
> > > +void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason,
> > > +							bool irq_context);
> > >   void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error);
> > >   int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover);
> > >   int f2fs_sync_fs(struct super_block *sb, int sync);
> > > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> > > index 14e9a20e68df..10743e864d57 100644
> > > --- a/fs/f2fs/file.c
> > > +++ b/fs/f2fs/file.c
> > > @@ -2221,7 +2221,6 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
> > >   				ret = 0;
> > >   				f2fs_stop_checkpoint(sbi, false,
> > >   						STOP_CP_REASON_SHUTDOWN);
> > > -				set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
> > >   				trace_f2fs_shutdown(sbi, in, ret);
> > >   			}
> > >   			return ret;
> > > @@ -2234,7 +2233,6 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
> > >   		if (ret)
> > >   			goto out;
> > >   		f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
> > > -		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
> > >   		thaw_bdev(sb->s_bdev);
> > >   		break;
> > >   	case F2FS_GOING_DOWN_METASYNC:
> > > @@ -2243,16 +2241,13 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
> > >   		if (ret)
> > >   			goto out;
> > >   		f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
> > > -		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
> > >   		break;
> > >   	case F2FS_GOING_DOWN_NOSYNC:
> > >   		f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
> > > -		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
> > >   		break;
> > >   	case F2FS_GOING_DOWN_METAFLUSH:
> > >   		f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO);
> > >   		f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
> > > -		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
> > >   		break;
> > >   	case F2FS_GOING_DOWN_NEED_FSCK:
> > >   		set_sbi_flag(sbi, SBI_NEED_FSCK);
> > > diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> > > index f1d0dd9c5a6c..6dc2bc710ba8 100644
> > > --- a/fs/f2fs/gc.c
> > > +++ b/fs/f2fs/gc.c
> > > @@ -59,7 +59,7 @@ static int gc_thread_func(void *data)
> > >   		if (gc_th->gc_wake)
> > >   			gc_th->gc_wake = false;
> > > -		if (try_to_freeze()) {
> > > +		if (try_to_freeze() || f2fs_readonly(sbi->sb)) {
> > >   			stat_other_skip_bggc_count(sbi);
> > >   			continue;
> > >   		}
> > > diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> > > index 23574894e170..1573bf123197 100644
> > > --- a/fs/f2fs/super.c
> > > +++ b/fs/f2fs/super.c
> > > @@ -164,6 +164,7 @@ enum {
> > >   	Opt_discard_unit,
> > >   	Opt_memory_mode,
> > >   	Opt_age_extent_cache,
> > > +	Opt_errors,
> > >   	Opt_err,
> > >   };
> > > @@ -243,6 +244,7 @@ static match_table_t f2fs_tokens = {
> > >   	{Opt_discard_unit, "discard_unit=%s"},
> > >   	{Opt_memory_mode, "memory=%s"},
> > >   	{Opt_age_extent_cache, "age_extent_cache"},
> > > +	{Opt_errors, "errors=%s"},
> > >   	{Opt_err, NULL},
> > >   };
> > > @@ -1268,6 +1270,25 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
> > >   		case Opt_age_extent_cache:
> > >   			set_opt(sbi, AGE_EXTENT_CACHE);
> > >   			break;
> > > +		case Opt_errors:
> > > +			name = match_strdup(&args[0]);
> > > +			if (!name)
> > > +				return -ENOMEM;
> > > +			if (!strcmp(name, "remount-ro")) {
> > > +				F2FS_OPTION(sbi).errors =
> > > +						MOUNT_ERRORS_READONLY;
> > > +			} else if (!strcmp(name, "continue")) {
> > > +				F2FS_OPTION(sbi).errors =
> > > +						MOUNT_ERRORS_CONTINUE;
> > > +			} else if (!strcmp(name, "panic")) {
> > > +				F2FS_OPTION(sbi).errors =
> > > +						MOUNT_ERRORS_PANIC;
> > > +			} else {
> > > +				kfree(name);
> > > +				return -EINVAL;
> > > +			}
> > > +			kfree(name);
> > > +			break;
> > >   		default:
> > >   			f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value",
> > >   				 p);
> > > @@ -1623,6 +1644,9 @@ static void f2fs_put_super(struct super_block *sb)
> > >   	f2fs_destroy_node_manager(sbi);
> > >   	f2fs_destroy_segment_manager(sbi);
> > > +	/* flush s_error_work before sbi destroy */
> > > +	flush_work(&sbi->s_error_work);
> > > +
> > >   	f2fs_destroy_post_read_wq(sbi);
> > >   	kvfree(sbi->ckpt);
> > > @@ -2053,6 +2077,13 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
> > >   	else if (F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_LOW)
> > >   		seq_printf(seq, ",memory=%s", "low");
> > > +	if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_READONLY)
> > > +		seq_printf(seq, ",errors=%s", "remount-ro");
> > > +	else if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_CONTINUE)
> > > +		seq_printf(seq, ",errors=%s", "continue");
> > > +	else if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_PANIC)
> > > +		seq_printf(seq, ",errors=%s", "panic");
> > > +
> > >   	return 0;
> > >   }
> > > @@ -2081,6 +2112,7 @@ static void default_options(struct f2fs_sb_info *sbi)
> > >   	}
> > >   	F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON;
> > >   	F2FS_OPTION(sbi).memory_mode = MEMORY_MODE_NORMAL;
> > > +	F2FS_OPTION(sbi).errors = MOUNT_ERRORS_READONLY;
> > 
> > Was the default MOUNT_ERRORS_CONTINUE?
> 
> Okay, let me revise in v9.
> 
> Please note that main difference between continue mode and readonly mode once
> cp_error is set:
> 
> 		continue mode		readonly mode
> access ops:	Normal			Normal
> modify ops:	-EIO			-EROFS
> mount option	rw			ro

Could you please add the difference of all the modes in patch description?

I think it'd be good to add:
1. access ops
2. syscall errors
3. mount option
4. pending data writes
5. pending node writes
6. pending meta writes


> 
> Thanks,
> 
> > 
> > >   	sbi->sb->s_flags &= ~SB_INLINECRYPT;
> > > @@ -2282,6 +2314,9 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
> > >   	if (err)
> > >   		goto restore_opts;
> > > +	/* flush outstanding errors before changing fs state */
> > > +	flush_work(&sbi->s_error_work);
> > > +
> > >   	/*
> > >   	 * Previous and new state of filesystem is RO,
> > >   	 * so skip checking GC and FLUSH_MERGE conditions.
> > > @@ -3931,45 +3966,60 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
> > >   	return err;
> > >   }
> > > -void f2fs_handle_stop(struct f2fs_sb_info *sbi, unsigned char reason)
> > > +static void save_stop_reason(struct f2fs_sb_info *sbi, unsigned char reason)
> > > +{
> > > +	unsigned long flags;
> > > +
> > > +	spin_lock_irqsave(&sbi->error_lock, flags);
> > > +	if (sbi->stop_reason[reason] < GENMASK(BITS_PER_BYTE - 1, 0))
> > > +		sbi->stop_reason[reason]++;
> > > +	spin_unlock_irqrestore(&sbi->error_lock, flags);
> > > +}
> > > +
> > > +static void f2fs_record_stop_reason(struct f2fs_sb_info *sbi)
> > >   {
> > >   	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
> > > +	unsigned long flags;
> > >   	int err;
> > >   	f2fs_down_write(&sbi->sb_lock);
> > > -	if (raw_super->s_stop_reason[reason] < GENMASK(BITS_PER_BYTE - 1, 0))
> > > -		raw_super->s_stop_reason[reason]++;
> > > +	spin_lock_irqsave(&sbi->error_lock, flags);
> > > +	memcpy(raw_super->s_stop_reason, sbi->stop_reason, MAX_STOP_REASON);
> > > +	spin_unlock_irqrestore(&sbi->error_lock, flags);
> > >   	err = f2fs_commit_super(sbi, false);
> > > -	if (err)
> > > -		f2fs_err(sbi, "f2fs_commit_super fails to record reason:%u err:%d",
> > > -								reason, err);
> > > +
> > >   	f2fs_up_write(&sbi->sb_lock);
> > > +	if (err)
> > > +		f2fs_err(sbi, "f2fs_commit_super fails to record err:%d", err);
> > >   }
> > >   void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag)
> > >   {
> > > -	spin_lock(&sbi->error_lock);
> > > +	unsigned long flags;
> > > +
> > > +	spin_lock_irqsave(&sbi->error_lock, flags);
> > >   	if (!test_bit(flag, (unsigned long *)sbi->errors)) {
> > >   		set_bit(flag, (unsigned long *)sbi->errors);
> > >   		sbi->error_dirty = true;
> > >   	}
> > > -	spin_unlock(&sbi->error_lock);
> > > +	spin_unlock_irqrestore(&sbi->error_lock, flags);
> > >   }
> > >   static bool f2fs_update_errors(struct f2fs_sb_info *sbi)
> > >   {
> > > +	unsigned long flags;
> > >   	bool need_update = false;
> > > -	spin_lock(&sbi->error_lock);
> > > +	spin_lock_irqsave(&sbi->error_lock, flags);
> > >   	if (sbi->error_dirty) {
> > >   		memcpy(F2FS_RAW_SUPER(sbi)->s_errors, sbi->errors,
> > >   							MAX_F2FS_ERRORS);
> > >   		sbi->error_dirty = false;
> > >   		need_update = true;
> > >   	}
> > > -	spin_unlock(&sbi->error_lock);
> > > +	spin_unlock_irqrestore(&sbi->error_lock, flags);
> > >   	return need_update;
> > >   }
> > > @@ -3993,6 +4043,66 @@ void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error)
> > >   	f2fs_up_write(&sbi->sb_lock);
> > >   }
> > > +static bool system_going_down(void)
> > > +{
> > > +	return system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF
> > > +		|| system_state == SYSTEM_RESTART;
> > > +}
> > > +
> > > +void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason,
> > > +							bool irq_context)
> > > +{
> > > +	struct super_block *sb = sbi->sb;
> > > +	bool shutdown = reason == STOP_CP_REASON_SHUTDOWN;
> > > +	bool continue_fs = !shutdown &&
> > > +			F2FS_OPTION(sbi).errors == MOUNT_ERRORS_CONTINUE;
> > > +
> > > +	set_ckpt_flags(sbi, CP_ERROR_FLAG);
> > > +
> > > +	if (!f2fs_hw_is_readonly(sbi)) {
> > > +		save_stop_reason(sbi, reason);
> > > +
> > > +		if (irq_context && !shutdown)
> > > +			schedule_work(&sbi->s_error_work);
> > > +		else
> > > +			f2fs_record_stop_reason(sbi);
> > > +	}
> > > +
> > > +	/*
> > > +	 * We force ERRORS_RO behavior when system is rebooting. Otherwise we
> > > +	 * could panic during 'reboot -f' as the underlying device got already
> > > +	 * disabled.
> > > +	 */
> > > +	if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_PANIC &&
> > > +				!shutdown && !system_going_down() &&
> > > +				!is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN))
> > > +		panic("F2FS-fs (device %s): panic forced after error\n",
> > > +							sb->s_id);
> > > +
> > > +	if (shutdown)
> > > +		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
> > > +
> > > +	/* continue filesystem operators if errors=continue */
> > > +	if (continue_fs || f2fs_readonly(sb))
> > > +		return;
> > > +
> > > +	f2fs_warn(sbi, "Remounting filesystem read-only");
> > > +	/*
> > > +	 * Make sure updated value of ->s_mount_flags will be visible before
> > > +	 * ->s_flags update
> > > +	 */
> > > +	smp_wmb();
> > > +	sb->s_flags |= SB_RDONLY;
> > > +}
> > > +
> > > +static void f2fs_record_error_work(struct work_struct *work)
> > > +{
> > > +	struct f2fs_sb_info *sbi = container_of(work,
> > > +					struct f2fs_sb_info, s_error_work);
> > > +
> > > +	f2fs_record_stop_reason(sbi);
> > > +}
> > > +
> > >   static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
> > >   {
> > >   	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
> > > @@ -4223,7 +4333,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
> > >   	sb->s_fs_info = sbi;
> > >   	sbi->raw_super = raw_super;
> > > +	INIT_WORK(&sbi->s_error_work, f2fs_record_error_work);
> > >   	memcpy(sbi->errors, raw_super->s_errors, MAX_F2FS_ERRORS);
> > > +	memcpy(sbi->stop_reason, raw_super->s_stop_reason, MAX_STOP_REASON);
> > >   	/* precompute checksum seed for metadata */
> > >   	if (f2fs_sb_has_inode_chksum(sbi))
> > > @@ -4620,6 +4732,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
> > >   	f2fs_destroy_segment_manager(sbi);
> > >   stop_ckpt_thread:
> > >   	f2fs_stop_ckpt_thread(sbi);
> > > +	/* flush s_error_work before sbi destroy */
> > > +	flush_work(&sbi->s_error_work);
> > >   	f2fs_destroy_post_read_wq(sbi);
> > >   free_devices:
> > >   	destroy_device_list(sbi);
> > > -- 
> > > 2.36.1
  
Chao Yu April 21, 2023, 1:56 a.m. UTC | #5
On 2023/4/21 0:34, Jaegeuk Kim wrote:
> On 04/20, Chao Yu wrote:
>> On 2023/4/20 4:19, Jaegeuk Kim wrote:
>>> On 04/08, Chao Yu wrote:
>>>> This patch supports errors=remount-ro|continue|panic mount option,
>>>> by default it uses "remount-ro" mode.
>>>>
>>>> Signed-off-by: Chao Yu <chao@kernel.org>
>>>> Signed-off-by: Yangtao Li <frank.li@vivo.com>
>>>> ---
>>>> v8:
>>>> - record stop reason synchronously if it's from ioctl shutdown interface
>>>> - fix typo in doc and commit message.
>>>> - use f2fs_hw_is_readonly() instead of bdev_read_only()
>>>>    Documentation/filesystems/f2fs.rst |   5 ++
>>>>    fs/f2fs/checkpoint.c               |   7 +-
>>>>    fs/f2fs/f2fs.h                     |  20 ++++-
>>>>    fs/f2fs/file.c                     |   5 --
>>>>    fs/f2fs/gc.c                       |   2 +-
>>>>    fs/f2fs/super.c                    | 134 ++++++++++++++++++++++++++---
>>>>    6 files changed, 149 insertions(+), 24 deletions(-)
>>>>
>>>> diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst
>>>> index 2055e72871fe..ba4c02fad647 100644
>>>> --- a/Documentation/filesystems/f2fs.rst
>>>> +++ b/Documentation/filesystems/f2fs.rst
>>>> @@ -351,6 +351,11 @@ age_extent_cache	 Enable an age extent cache based on rb-tree. It records
>>>>    			 data block update frequency of the extent per inode, in
>>>>    			 order to provide better temperature hints for data block
>>>>    			 allocation.
>>>> +errors=%s		 Specify f2fs behavior on critical errors. This supports modes:
>>>> +			 "panic", "continue" and "remount-ro", respectively, trigger
>>>> +			 panic immediately, continue without doing anything, and remount
>>>> +			 the partition in read-only mode. By default it uses "remount-ro"
>>>> +			 mode.
>>>>    ======================== ============================================================
>>>>    Debugfs Entries
>>>> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
>>>> index 73ae4e85e70d..e6b266bb9ce0 100644
>>>> --- a/fs/f2fs/checkpoint.c
>>>> +++ b/fs/f2fs/checkpoint.c
>>>> @@ -30,12 +30,9 @@ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io,
>>>>    						unsigned char reason)
>>>>    {
>>>>    	f2fs_build_fault_attr(sbi, 0, 0);
>>>> -	set_ckpt_flags(sbi, CP_ERROR_FLAG);
>>>> -	if (!end_io) {
>>>> +	if (!end_io)
>>>>    		f2fs_flush_merged_writes(sbi);
>>>> -
>>>> -		f2fs_handle_stop(sbi, reason);
>>>> -	}
>>>> +	f2fs_handle_critical_error(sbi, reason, end_io);
>>>>    }
>>>>    /*
>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>>> index ac4942f8e83e..7d9c5a65f974 100644
>>>> --- a/fs/f2fs/f2fs.h
>>>> +++ b/fs/f2fs/f2fs.h
>>>> @@ -162,6 +162,7 @@ struct f2fs_mount_info {
>>>>    	int fs_mode;			/* fs mode: LFS or ADAPTIVE */
>>>>    	int bggc_mode;			/* bggc mode: off, on or sync */
>>>>    	int memory_mode;		/* memory mode */
>>>> +	int errors;			/* errors parameter */
>>>>    	int discard_unit;		/*
>>>>    					 * discard command's offset/size should
>>>>    					 * be aligned to this unit: block,
>>>> @@ -1370,6 +1371,12 @@ enum {
>>>>    	MEMORY_MODE_LOW,	/* memory mode for low memry devices */
>>>>    };
>>>> +enum errors_option {
>>>> +	MOUNT_ERRORS_READONLY,	/* remount fs ro on errors */
>>>> +	MOUNT_ERRORS_CONTINUE,	/* continue on errors */
>>>> +	MOUNT_ERRORS_PANIC,	/* panic on errors */
>>>> +};
>>>> +
>>>>    static inline int f2fs_test_bit(unsigned int nr, char *addr);
>>>>    static inline void f2fs_set_bit(unsigned int nr, char *addr);
>>>>    static inline void f2fs_clear_bit(unsigned int nr, char *addr);
>>>> @@ -1786,8 +1793,14 @@ struct f2fs_sb_info {
>>>>    	struct workqueue_struct *post_read_wq;	/* post read workqueue */
>>>> -	unsigned char errors[MAX_F2FS_ERRORS];	/* error flags */
>>>> -	spinlock_t error_lock;			/* protect errors array */
>>>> +	/*
>>>> +	 * If we are in irq context, let's update error information into
>>>> +	 * on-disk superblock in the work.
>>>> +	 */
>>>> +	struct work_struct s_error_work;
>>>> +	unsigned char errors[MAX_F2FS_ERRORS];		/* error flags */
>>>> +	unsigned char stop_reason[MAX_STOP_REASON];	/* stop reason */
>>>> +	spinlock_t error_lock;			/* protect errors/stop_reason array */
>>>>    	bool error_dirty;			/* errors of sb is dirty */
>>>>    	struct kmem_cache *inline_xattr_slab;	/* inline xattr entry */
>>>> @@ -3522,8 +3535,9 @@ int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly);
>>>>    int f2fs_quota_sync(struct super_block *sb, int type);
>>>>    loff_t max_file_blocks(struct inode *inode);
>>>>    void f2fs_quota_off_umount(struct super_block *sb);
>>>> -void f2fs_handle_stop(struct f2fs_sb_info *sbi, unsigned char reason);
>>>>    void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag);
>>>> +void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason,
>>>> +							bool irq_context);
>>>>    void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error);
>>>>    int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover);
>>>>    int f2fs_sync_fs(struct super_block *sb, int sync);
>>>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
>>>> index 14e9a20e68df..10743e864d57 100644
>>>> --- a/fs/f2fs/file.c
>>>> +++ b/fs/f2fs/file.c
>>>> @@ -2221,7 +2221,6 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
>>>>    				ret = 0;
>>>>    				f2fs_stop_checkpoint(sbi, false,
>>>>    						STOP_CP_REASON_SHUTDOWN);
>>>> -				set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>>>>    				trace_f2fs_shutdown(sbi, in, ret);
>>>>    			}
>>>>    			return ret;
>>>> @@ -2234,7 +2233,6 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
>>>>    		if (ret)
>>>>    			goto out;
>>>>    		f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
>>>> -		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>>>>    		thaw_bdev(sb->s_bdev);
>>>>    		break;
>>>>    	case F2FS_GOING_DOWN_METASYNC:
>>>> @@ -2243,16 +2241,13 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
>>>>    		if (ret)
>>>>    			goto out;
>>>>    		f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
>>>> -		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>>>>    		break;
>>>>    	case F2FS_GOING_DOWN_NOSYNC:
>>>>    		f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
>>>> -		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>>>>    		break;
>>>>    	case F2FS_GOING_DOWN_METAFLUSH:
>>>>    		f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO);
>>>>    		f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
>>>> -		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>>>>    		break;
>>>>    	case F2FS_GOING_DOWN_NEED_FSCK:
>>>>    		set_sbi_flag(sbi, SBI_NEED_FSCK);
>>>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
>>>> index f1d0dd9c5a6c..6dc2bc710ba8 100644
>>>> --- a/fs/f2fs/gc.c
>>>> +++ b/fs/f2fs/gc.c
>>>> @@ -59,7 +59,7 @@ static int gc_thread_func(void *data)
>>>>    		if (gc_th->gc_wake)
>>>>    			gc_th->gc_wake = false;
>>>> -		if (try_to_freeze()) {
>>>> +		if (try_to_freeze() || f2fs_readonly(sbi->sb)) {
>>>>    			stat_other_skip_bggc_count(sbi);
>>>>    			continue;
>>>>    		}
>>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>>> index 23574894e170..1573bf123197 100644
>>>> --- a/fs/f2fs/super.c
>>>> +++ b/fs/f2fs/super.c
>>>> @@ -164,6 +164,7 @@ enum {
>>>>    	Opt_discard_unit,
>>>>    	Opt_memory_mode,
>>>>    	Opt_age_extent_cache,
>>>> +	Opt_errors,
>>>>    	Opt_err,
>>>>    };
>>>> @@ -243,6 +244,7 @@ static match_table_t f2fs_tokens = {
>>>>    	{Opt_discard_unit, "discard_unit=%s"},
>>>>    	{Opt_memory_mode, "memory=%s"},
>>>>    	{Opt_age_extent_cache, "age_extent_cache"},
>>>> +	{Opt_errors, "errors=%s"},
>>>>    	{Opt_err, NULL},
>>>>    };
>>>> @@ -1268,6 +1270,25 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
>>>>    		case Opt_age_extent_cache:
>>>>    			set_opt(sbi, AGE_EXTENT_CACHE);
>>>>    			break;
>>>> +		case Opt_errors:
>>>> +			name = match_strdup(&args[0]);
>>>> +			if (!name)
>>>> +				return -ENOMEM;
>>>> +			if (!strcmp(name, "remount-ro")) {
>>>> +				F2FS_OPTION(sbi).errors =
>>>> +						MOUNT_ERRORS_READONLY;
>>>> +			} else if (!strcmp(name, "continue")) {
>>>> +				F2FS_OPTION(sbi).errors =
>>>> +						MOUNT_ERRORS_CONTINUE;
>>>> +			} else if (!strcmp(name, "panic")) {
>>>> +				F2FS_OPTION(sbi).errors =
>>>> +						MOUNT_ERRORS_PANIC;
>>>> +			} else {
>>>> +				kfree(name);
>>>> +				return -EINVAL;
>>>> +			}
>>>> +			kfree(name);
>>>> +			break;
>>>>    		default:
>>>>    			f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value",
>>>>    				 p);
>>>> @@ -1623,6 +1644,9 @@ static void f2fs_put_super(struct super_block *sb)
>>>>    	f2fs_destroy_node_manager(sbi);
>>>>    	f2fs_destroy_segment_manager(sbi);
>>>> +	/* flush s_error_work before sbi destroy */
>>>> +	flush_work(&sbi->s_error_work);
>>>> +
>>>>    	f2fs_destroy_post_read_wq(sbi);
>>>>    	kvfree(sbi->ckpt);
>>>> @@ -2053,6 +2077,13 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
>>>>    	else if (F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_LOW)
>>>>    		seq_printf(seq, ",memory=%s", "low");
>>>> +	if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_READONLY)
>>>> +		seq_printf(seq, ",errors=%s", "remount-ro");
>>>> +	else if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_CONTINUE)
>>>> +		seq_printf(seq, ",errors=%s", "continue");
>>>> +	else if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_PANIC)
>>>> +		seq_printf(seq, ",errors=%s", "panic");
>>>> +
>>>>    	return 0;
>>>>    }
>>>> @@ -2081,6 +2112,7 @@ static void default_options(struct f2fs_sb_info *sbi)
>>>>    	}
>>>>    	F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON;
>>>>    	F2FS_OPTION(sbi).memory_mode = MEMORY_MODE_NORMAL;
>>>> +	F2FS_OPTION(sbi).errors = MOUNT_ERRORS_READONLY;
>>>
>>> Was the default MOUNT_ERRORS_CONTINUE?
>>
>> Okay, let me revise in v9.
>>
>> Please note that main difference between continue mode and readonly mode once
>> cp_error is set:
>>
>> 		continue mode		readonly mode
>> access ops:	Normal			Normal
>> modify ops:	-EIO			-EROFS
>> mount option	rw			ro
> 
> Could you please add the difference of all the modes in patch description?
> 
> I think it'd be good to add:
> 1. access ops
> 2. syscall errors
> 3. mount option
> 4. pending data writes
> 5. pending node writes
> 6. pending meta writes

Sure, I'd like to add below description into f2fs.rst, any comments?

			 			continue	remount-ro	panic
			 access ops		normal		noraml		N/A
			 syscall errors		-EIO		-EROFS		N/A
			 mount option		rw		ro		N/A
			 pending dir write	keep		keep		N/A
			 pending nondir write	drop		drop		N/A
			 pending node write	drop		drop		N/A
			 pending meta write	keep		keep		N/A

Thanks,

> 
> 
>>
>> Thanks,
>>
>>>
>>>>    	sbi->sb->s_flags &= ~SB_INLINECRYPT;
>>>> @@ -2282,6 +2314,9 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
>>>>    	if (err)
>>>>    		goto restore_opts;
>>>> +	/* flush outstanding errors before changing fs state */
>>>> +	flush_work(&sbi->s_error_work);
>>>> +
>>>>    	/*
>>>>    	 * Previous and new state of filesystem is RO,
>>>>    	 * so skip checking GC and FLUSH_MERGE conditions.
>>>> @@ -3931,45 +3966,60 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
>>>>    	return err;
>>>>    }
>>>> -void f2fs_handle_stop(struct f2fs_sb_info *sbi, unsigned char reason)
>>>> +static void save_stop_reason(struct f2fs_sb_info *sbi, unsigned char reason)
>>>> +{
>>>> +	unsigned long flags;
>>>> +
>>>> +	spin_lock_irqsave(&sbi->error_lock, flags);
>>>> +	if (sbi->stop_reason[reason] < GENMASK(BITS_PER_BYTE - 1, 0))
>>>> +		sbi->stop_reason[reason]++;
>>>> +	spin_unlock_irqrestore(&sbi->error_lock, flags);
>>>> +}
>>>> +
>>>> +static void f2fs_record_stop_reason(struct f2fs_sb_info *sbi)
>>>>    {
>>>>    	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
>>>> +	unsigned long flags;
>>>>    	int err;
>>>>    	f2fs_down_write(&sbi->sb_lock);
>>>> -	if (raw_super->s_stop_reason[reason] < GENMASK(BITS_PER_BYTE - 1, 0))
>>>> -		raw_super->s_stop_reason[reason]++;
>>>> +	spin_lock_irqsave(&sbi->error_lock, flags);
>>>> +	memcpy(raw_super->s_stop_reason, sbi->stop_reason, MAX_STOP_REASON);
>>>> +	spin_unlock_irqrestore(&sbi->error_lock, flags);
>>>>    	err = f2fs_commit_super(sbi, false);
>>>> -	if (err)
>>>> -		f2fs_err(sbi, "f2fs_commit_super fails to record reason:%u err:%d",
>>>> -								reason, err);
>>>> +
>>>>    	f2fs_up_write(&sbi->sb_lock);
>>>> +	if (err)
>>>> +		f2fs_err(sbi, "f2fs_commit_super fails to record err:%d", err);
>>>>    }
>>>>    void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag)
>>>>    {
>>>> -	spin_lock(&sbi->error_lock);
>>>> +	unsigned long flags;
>>>> +
>>>> +	spin_lock_irqsave(&sbi->error_lock, flags);
>>>>    	if (!test_bit(flag, (unsigned long *)sbi->errors)) {
>>>>    		set_bit(flag, (unsigned long *)sbi->errors);
>>>>    		sbi->error_dirty = true;
>>>>    	}
>>>> -	spin_unlock(&sbi->error_lock);
>>>> +	spin_unlock_irqrestore(&sbi->error_lock, flags);
>>>>    }
>>>>    static bool f2fs_update_errors(struct f2fs_sb_info *sbi)
>>>>    {
>>>> +	unsigned long flags;
>>>>    	bool need_update = false;
>>>> -	spin_lock(&sbi->error_lock);
>>>> +	spin_lock_irqsave(&sbi->error_lock, flags);
>>>>    	if (sbi->error_dirty) {
>>>>    		memcpy(F2FS_RAW_SUPER(sbi)->s_errors, sbi->errors,
>>>>    							MAX_F2FS_ERRORS);
>>>>    		sbi->error_dirty = false;
>>>>    		need_update = true;
>>>>    	}
>>>> -	spin_unlock(&sbi->error_lock);
>>>> +	spin_unlock_irqrestore(&sbi->error_lock, flags);
>>>>    	return need_update;
>>>>    }
>>>> @@ -3993,6 +4043,66 @@ void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error)
>>>>    	f2fs_up_write(&sbi->sb_lock);
>>>>    }
>>>> +static bool system_going_down(void)
>>>> +{
>>>> +	return system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF
>>>> +		|| system_state == SYSTEM_RESTART;
>>>> +}
>>>> +
>>>> +void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason,
>>>> +							bool irq_context)
>>>> +{
>>>> +	struct super_block *sb = sbi->sb;
>>>> +	bool shutdown = reason == STOP_CP_REASON_SHUTDOWN;
>>>> +	bool continue_fs = !shutdown &&
>>>> +			F2FS_OPTION(sbi).errors == MOUNT_ERRORS_CONTINUE;
>>>> +
>>>> +	set_ckpt_flags(sbi, CP_ERROR_FLAG);
>>>> +
>>>> +	if (!f2fs_hw_is_readonly(sbi)) {
>>>> +		save_stop_reason(sbi, reason);
>>>> +
>>>> +		if (irq_context && !shutdown)
>>>> +			schedule_work(&sbi->s_error_work);
>>>> +		else
>>>> +			f2fs_record_stop_reason(sbi);
>>>> +	}
>>>> +
>>>> +	/*
>>>> +	 * We force ERRORS_RO behavior when system is rebooting. Otherwise we
>>>> +	 * could panic during 'reboot -f' as the underlying device got already
>>>> +	 * disabled.
>>>> +	 */
>>>> +	if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_PANIC &&
>>>> +				!shutdown && !system_going_down() &&
>>>> +				!is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN))
>>>> +		panic("F2FS-fs (device %s): panic forced after error\n",
>>>> +							sb->s_id);
>>>> +
>>>> +	if (shutdown)
>>>> +		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>>>> +
>>>> +	/* continue filesystem operators if errors=continue */
>>>> +	if (continue_fs || f2fs_readonly(sb))
>>>> +		return;
>>>> +
>>>> +	f2fs_warn(sbi, "Remounting filesystem read-only");
>>>> +	/*
>>>> +	 * Make sure updated value of ->s_mount_flags will be visible before
>>>> +	 * ->s_flags update
>>>> +	 */
>>>> +	smp_wmb();
>>>> +	sb->s_flags |= SB_RDONLY;
>>>> +}
>>>> +
>>>> +static void f2fs_record_error_work(struct work_struct *work)
>>>> +{
>>>> +	struct f2fs_sb_info *sbi = container_of(work,
>>>> +					struct f2fs_sb_info, s_error_work);
>>>> +
>>>> +	f2fs_record_stop_reason(sbi);
>>>> +}
>>>> +
>>>>    static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
>>>>    {
>>>>    	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
>>>> @@ -4223,7 +4333,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>>>>    	sb->s_fs_info = sbi;
>>>>    	sbi->raw_super = raw_super;
>>>> +	INIT_WORK(&sbi->s_error_work, f2fs_record_error_work);
>>>>    	memcpy(sbi->errors, raw_super->s_errors, MAX_F2FS_ERRORS);
>>>> +	memcpy(sbi->stop_reason, raw_super->s_stop_reason, MAX_STOP_REASON);
>>>>    	/* precompute checksum seed for metadata */
>>>>    	if (f2fs_sb_has_inode_chksum(sbi))
>>>> @@ -4620,6 +4732,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>>>>    	f2fs_destroy_segment_manager(sbi);
>>>>    stop_ckpt_thread:
>>>>    	f2fs_stop_ckpt_thread(sbi);
>>>> +	/* flush s_error_work before sbi destroy */
>>>> +	flush_work(&sbi->s_error_work);
>>>>    	f2fs_destroy_post_read_wq(sbi);
>>>>    free_devices:
>>>>    	destroy_device_list(sbi);
>>>> -- 
>>>> 2.36.1
  
Jaegeuk Kim April 21, 2023, 7:31 p.m. UTC | #6
On 04/21, Chao Yu wrote:
> On 2023/4/21 0:34, Jaegeuk Kim wrote:
> > On 04/20, Chao Yu wrote:
> > > On 2023/4/20 4:19, Jaegeuk Kim wrote:
> > > > On 04/08, Chao Yu wrote:
> > > > > This patch supports errors=remount-ro|continue|panic mount option,
> > > > > by default it uses "remount-ro" mode.
> > > > > 
> > > > > Signed-off-by: Chao Yu <chao@kernel.org>
> > > > > Signed-off-by: Yangtao Li <frank.li@vivo.com>
> > > > > ---
> > > > > v8:
> > > > > - record stop reason synchronously if it's from ioctl shutdown interface
> > > > > - fix typo in doc and commit message.
> > > > > - use f2fs_hw_is_readonly() instead of bdev_read_only()
> > > > >    Documentation/filesystems/f2fs.rst |   5 ++
> > > > >    fs/f2fs/checkpoint.c               |   7 +-
> > > > >    fs/f2fs/f2fs.h                     |  20 ++++-
> > > > >    fs/f2fs/file.c                     |   5 --
> > > > >    fs/f2fs/gc.c                       |   2 +-
> > > > >    fs/f2fs/super.c                    | 134 ++++++++++++++++++++++++++---
> > > > >    6 files changed, 149 insertions(+), 24 deletions(-)
> > > > > 
> > > > > diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst
> > > > > index 2055e72871fe..ba4c02fad647 100644
> > > > > --- a/Documentation/filesystems/f2fs.rst
> > > > > +++ b/Documentation/filesystems/f2fs.rst
> > > > > @@ -351,6 +351,11 @@ age_extent_cache	 Enable an age extent cache based on rb-tree. It records
> > > > >    			 data block update frequency of the extent per inode, in
> > > > >    			 order to provide better temperature hints for data block
> > > > >    			 allocation.
> > > > > +errors=%s		 Specify f2fs behavior on critical errors. This supports modes:
> > > > > +			 "panic", "continue" and "remount-ro", respectively, trigger
> > > > > +			 panic immediately, continue without doing anything, and remount
> > > > > +			 the partition in read-only mode. By default it uses "remount-ro"
> > > > > +			 mode.
> > > > >    ======================== ============================================================
> > > > >    Debugfs Entries
> > > > > diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> > > > > index 73ae4e85e70d..e6b266bb9ce0 100644
> > > > > --- a/fs/f2fs/checkpoint.c
> > > > > +++ b/fs/f2fs/checkpoint.c
> > > > > @@ -30,12 +30,9 @@ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io,
> > > > >    						unsigned char reason)
> > > > >    {
> > > > >    	f2fs_build_fault_attr(sbi, 0, 0);
> > > > > -	set_ckpt_flags(sbi, CP_ERROR_FLAG);
> > > > > -	if (!end_io) {
> > > > > +	if (!end_io)
> > > > >    		f2fs_flush_merged_writes(sbi);
> > > > > -
> > > > > -		f2fs_handle_stop(sbi, reason);
> > > > > -	}
> > > > > +	f2fs_handle_critical_error(sbi, reason, end_io);
> > > > >    }
> > > > >    /*
> > > > > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > > > > index ac4942f8e83e..7d9c5a65f974 100644
> > > > > --- a/fs/f2fs/f2fs.h
> > > > > +++ b/fs/f2fs/f2fs.h
> > > > > @@ -162,6 +162,7 @@ struct f2fs_mount_info {
> > > > >    	int fs_mode;			/* fs mode: LFS or ADAPTIVE */
> > > > >    	int bggc_mode;			/* bggc mode: off, on or sync */
> > > > >    	int memory_mode;		/* memory mode */
> > > > > +	int errors;			/* errors parameter */
> > > > >    	int discard_unit;		/*
> > > > >    					 * discard command's offset/size should
> > > > >    					 * be aligned to this unit: block,
> > > > > @@ -1370,6 +1371,12 @@ enum {
> > > > >    	MEMORY_MODE_LOW,	/* memory mode for low memry devices */
> > > > >    };
> > > > > +enum errors_option {
> > > > > +	MOUNT_ERRORS_READONLY,	/* remount fs ro on errors */
> > > > > +	MOUNT_ERRORS_CONTINUE,	/* continue on errors */
> > > > > +	MOUNT_ERRORS_PANIC,	/* panic on errors */
> > > > > +};
> > > > > +
> > > > >    static inline int f2fs_test_bit(unsigned int nr, char *addr);
> > > > >    static inline void f2fs_set_bit(unsigned int nr, char *addr);
> > > > >    static inline void f2fs_clear_bit(unsigned int nr, char *addr);
> > > > > @@ -1786,8 +1793,14 @@ struct f2fs_sb_info {
> > > > >    	struct workqueue_struct *post_read_wq;	/* post read workqueue */
> > > > > -	unsigned char errors[MAX_F2FS_ERRORS];	/* error flags */
> > > > > -	spinlock_t error_lock;			/* protect errors array */
> > > > > +	/*
> > > > > +	 * If we are in irq context, let's update error information into
> > > > > +	 * on-disk superblock in the work.
> > > > > +	 */
> > > > > +	struct work_struct s_error_work;
> > > > > +	unsigned char errors[MAX_F2FS_ERRORS];		/* error flags */
> > > > > +	unsigned char stop_reason[MAX_STOP_REASON];	/* stop reason */
> > > > > +	spinlock_t error_lock;			/* protect errors/stop_reason array */
> > > > >    	bool error_dirty;			/* errors of sb is dirty */
> > > > >    	struct kmem_cache *inline_xattr_slab;	/* inline xattr entry */
> > > > > @@ -3522,8 +3535,9 @@ int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly);
> > > > >    int f2fs_quota_sync(struct super_block *sb, int type);
> > > > >    loff_t max_file_blocks(struct inode *inode);
> > > > >    void f2fs_quota_off_umount(struct super_block *sb);
> > > > > -void f2fs_handle_stop(struct f2fs_sb_info *sbi, unsigned char reason);
> > > > >    void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag);
> > > > > +void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason,
> > > > > +							bool irq_context);
> > > > >    void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error);
> > > > >    int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover);
> > > > >    int f2fs_sync_fs(struct super_block *sb, int sync);
> > > > > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> > > > > index 14e9a20e68df..10743e864d57 100644
> > > > > --- a/fs/f2fs/file.c
> > > > > +++ b/fs/f2fs/file.c
> > > > > @@ -2221,7 +2221,6 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
> > > > >    				ret = 0;
> > > > >    				f2fs_stop_checkpoint(sbi, false,
> > > > >    						STOP_CP_REASON_SHUTDOWN);
> > > > > -				set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
> > > > >    				trace_f2fs_shutdown(sbi, in, ret);
> > > > >    			}
> > > > >    			return ret;
> > > > > @@ -2234,7 +2233,6 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
> > > > >    		if (ret)
> > > > >    			goto out;
> > > > >    		f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
> > > > > -		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
> > > > >    		thaw_bdev(sb->s_bdev);
> > > > >    		break;
> > > > >    	case F2FS_GOING_DOWN_METASYNC:
> > > > > @@ -2243,16 +2241,13 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
> > > > >    		if (ret)
> > > > >    			goto out;
> > > > >    		f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
> > > > > -		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
> > > > >    		break;
> > > > >    	case F2FS_GOING_DOWN_NOSYNC:
> > > > >    		f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
> > > > > -		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
> > > > >    		break;
> > > > >    	case F2FS_GOING_DOWN_METAFLUSH:
> > > > >    		f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO);
> > > > >    		f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
> > > > > -		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
> > > > >    		break;
> > > > >    	case F2FS_GOING_DOWN_NEED_FSCK:
> > > > >    		set_sbi_flag(sbi, SBI_NEED_FSCK);
> > > > > diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> > > > > index f1d0dd9c5a6c..6dc2bc710ba8 100644
> > > > > --- a/fs/f2fs/gc.c
> > > > > +++ b/fs/f2fs/gc.c
> > > > > @@ -59,7 +59,7 @@ static int gc_thread_func(void *data)
> > > > >    		if (gc_th->gc_wake)
> > > > >    			gc_th->gc_wake = false;
> > > > > -		if (try_to_freeze()) {
> > > > > +		if (try_to_freeze() || f2fs_readonly(sbi->sb)) {
> > > > >    			stat_other_skip_bggc_count(sbi);
> > > > >    			continue;
> > > > >    		}
> > > > > diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> > > > > index 23574894e170..1573bf123197 100644
> > > > > --- a/fs/f2fs/super.c
> > > > > +++ b/fs/f2fs/super.c
> > > > > @@ -164,6 +164,7 @@ enum {
> > > > >    	Opt_discard_unit,
> > > > >    	Opt_memory_mode,
> > > > >    	Opt_age_extent_cache,
> > > > > +	Opt_errors,
> > > > >    	Opt_err,
> > > > >    };
> > > > > @@ -243,6 +244,7 @@ static match_table_t f2fs_tokens = {
> > > > >    	{Opt_discard_unit, "discard_unit=%s"},
> > > > >    	{Opt_memory_mode, "memory=%s"},
> > > > >    	{Opt_age_extent_cache, "age_extent_cache"},
> > > > > +	{Opt_errors, "errors=%s"},
> > > > >    	{Opt_err, NULL},
> > > > >    };
> > > > > @@ -1268,6 +1270,25 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
> > > > >    		case Opt_age_extent_cache:
> > > > >    			set_opt(sbi, AGE_EXTENT_CACHE);
> > > > >    			break;
> > > > > +		case Opt_errors:
> > > > > +			name = match_strdup(&args[0]);
> > > > > +			if (!name)
> > > > > +				return -ENOMEM;
> > > > > +			if (!strcmp(name, "remount-ro")) {
> > > > > +				F2FS_OPTION(sbi).errors =
> > > > > +						MOUNT_ERRORS_READONLY;
> > > > > +			} else if (!strcmp(name, "continue")) {
> > > > > +				F2FS_OPTION(sbi).errors =
> > > > > +						MOUNT_ERRORS_CONTINUE;
> > > > > +			} else if (!strcmp(name, "panic")) {
> > > > > +				F2FS_OPTION(sbi).errors =
> > > > > +						MOUNT_ERRORS_PANIC;
> > > > > +			} else {
> > > > > +				kfree(name);
> > > > > +				return -EINVAL;
> > > > > +			}
> > > > > +			kfree(name);
> > > > > +			break;
> > > > >    		default:
> > > > >    			f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value",
> > > > >    				 p);
> > > > > @@ -1623,6 +1644,9 @@ static void f2fs_put_super(struct super_block *sb)
> > > > >    	f2fs_destroy_node_manager(sbi);
> > > > >    	f2fs_destroy_segment_manager(sbi);
> > > > > +	/* flush s_error_work before sbi destroy */
> > > > > +	flush_work(&sbi->s_error_work);
> > > > > +
> > > > >    	f2fs_destroy_post_read_wq(sbi);
> > > > >    	kvfree(sbi->ckpt);
> > > > > @@ -2053,6 +2077,13 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
> > > > >    	else if (F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_LOW)
> > > > >    		seq_printf(seq, ",memory=%s", "low");
> > > > > +	if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_READONLY)
> > > > > +		seq_printf(seq, ",errors=%s", "remount-ro");
> > > > > +	else if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_CONTINUE)
> > > > > +		seq_printf(seq, ",errors=%s", "continue");
> > > > > +	else if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_PANIC)
> > > > > +		seq_printf(seq, ",errors=%s", "panic");
> > > > > +
> > > > >    	return 0;
> > > > >    }
> > > > > @@ -2081,6 +2112,7 @@ static void default_options(struct f2fs_sb_info *sbi)
> > > > >    	}
> > > > >    	F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON;
> > > > >    	F2FS_OPTION(sbi).memory_mode = MEMORY_MODE_NORMAL;
> > > > > +	F2FS_OPTION(sbi).errors = MOUNT_ERRORS_READONLY;
> > > > 
> > > > Was the default MOUNT_ERRORS_CONTINUE?
> > > 
> > > Okay, let me revise in v9.
> > > 
> > > Please note that main difference between continue mode and readonly mode once
> > > cp_error is set:
> > > 
> > > 		continue mode		readonly mode
> > > access ops:	Normal			Normal
> > > modify ops:	-EIO			-EROFS
> > > mount option	rw			ro
> > 
> > Could you please add the difference of all the modes in patch description?
> > 
> > I think it'd be good to add:
> > 1. access ops
> > 2. syscall errors
> > 3. mount option
> > 4. pending data writes
> > 5. pending node writes
> > 6. pending meta writes
> 
> Sure, I'd like to add below description into f2fs.rst, any comments?
> 
> 			 			continue	remount-ro	panic
> 			 access ops		normal		noraml		N/A
> 			 syscall errors		-EIO		-EROFS		N/A
> 			 mount option		rw		ro		N/A

So the only difference is the above two, as in any cases, we won't flush pending
pages at all? What about keeping nondir/node pages in the remount-ro case
differently?

> 			 pending dir write	keep		keep		N/A
> 			 pending nondir write	drop		drop		N/A
> 			 pending node write	drop		drop		N/A
> 			 pending meta write	keep		keep		N/A
> 
> Thanks,
> 
> > 
> > 
> > > 
> > > Thanks,
> > > 
> > > > 
> > > > >    	sbi->sb->s_flags &= ~SB_INLINECRYPT;
> > > > > @@ -2282,6 +2314,9 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
> > > > >    	if (err)
> > > > >    		goto restore_opts;
> > > > > +	/* flush outstanding errors before changing fs state */
> > > > > +	flush_work(&sbi->s_error_work);
> > > > > +
> > > > >    	/*
> > > > >    	 * Previous and new state of filesystem is RO,
> > > > >    	 * so skip checking GC and FLUSH_MERGE conditions.
> > > > > @@ -3931,45 +3966,60 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
> > > > >    	return err;
> > > > >    }
> > > > > -void f2fs_handle_stop(struct f2fs_sb_info *sbi, unsigned char reason)
> > > > > +static void save_stop_reason(struct f2fs_sb_info *sbi, unsigned char reason)
> > > > > +{
> > > > > +	unsigned long flags;
> > > > > +
> > > > > +	spin_lock_irqsave(&sbi->error_lock, flags);
> > > > > +	if (sbi->stop_reason[reason] < GENMASK(BITS_PER_BYTE - 1, 0))
> > > > > +		sbi->stop_reason[reason]++;
> > > > > +	spin_unlock_irqrestore(&sbi->error_lock, flags);
> > > > > +}
> > > > > +
> > > > > +static void f2fs_record_stop_reason(struct f2fs_sb_info *sbi)
> > > > >    {
> > > > >    	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
> > > > > +	unsigned long flags;
> > > > >    	int err;
> > > > >    	f2fs_down_write(&sbi->sb_lock);
> > > > > -	if (raw_super->s_stop_reason[reason] < GENMASK(BITS_PER_BYTE - 1, 0))
> > > > > -		raw_super->s_stop_reason[reason]++;
> > > > > +	spin_lock_irqsave(&sbi->error_lock, flags);
> > > > > +	memcpy(raw_super->s_stop_reason, sbi->stop_reason, MAX_STOP_REASON);
> > > > > +	spin_unlock_irqrestore(&sbi->error_lock, flags);
> > > > >    	err = f2fs_commit_super(sbi, false);
> > > > > -	if (err)
> > > > > -		f2fs_err(sbi, "f2fs_commit_super fails to record reason:%u err:%d",
> > > > > -								reason, err);
> > > > > +
> > > > >    	f2fs_up_write(&sbi->sb_lock);
> > > > > +	if (err)
> > > > > +		f2fs_err(sbi, "f2fs_commit_super fails to record err:%d", err);
> > > > >    }
> > > > >    void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag)
> > > > >    {
> > > > > -	spin_lock(&sbi->error_lock);
> > > > > +	unsigned long flags;
> > > > > +
> > > > > +	spin_lock_irqsave(&sbi->error_lock, flags);
> > > > >    	if (!test_bit(flag, (unsigned long *)sbi->errors)) {
> > > > >    		set_bit(flag, (unsigned long *)sbi->errors);
> > > > >    		sbi->error_dirty = true;
> > > > >    	}
> > > > > -	spin_unlock(&sbi->error_lock);
> > > > > +	spin_unlock_irqrestore(&sbi->error_lock, flags);
> > > > >    }
> > > > >    static bool f2fs_update_errors(struct f2fs_sb_info *sbi)
> > > > >    {
> > > > > +	unsigned long flags;
> > > > >    	bool need_update = false;
> > > > > -	spin_lock(&sbi->error_lock);
> > > > > +	spin_lock_irqsave(&sbi->error_lock, flags);
> > > > >    	if (sbi->error_dirty) {
> > > > >    		memcpy(F2FS_RAW_SUPER(sbi)->s_errors, sbi->errors,
> > > > >    							MAX_F2FS_ERRORS);
> > > > >    		sbi->error_dirty = false;
> > > > >    		need_update = true;
> > > > >    	}
> > > > > -	spin_unlock(&sbi->error_lock);
> > > > > +	spin_unlock_irqrestore(&sbi->error_lock, flags);
> > > > >    	return need_update;
> > > > >    }
> > > > > @@ -3993,6 +4043,66 @@ void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error)
> > > > >    	f2fs_up_write(&sbi->sb_lock);
> > > > >    }
> > > > > +static bool system_going_down(void)
> > > > > +{
> > > > > +	return system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF
> > > > > +		|| system_state == SYSTEM_RESTART;
> > > > > +}
> > > > > +
> > > > > +void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason,
> > > > > +							bool irq_context)
> > > > > +{
> > > > > +	struct super_block *sb = sbi->sb;
> > > > > +	bool shutdown = reason == STOP_CP_REASON_SHUTDOWN;
> > > > > +	bool continue_fs = !shutdown &&
> > > > > +			F2FS_OPTION(sbi).errors == MOUNT_ERRORS_CONTINUE;
> > > > > +
> > > > > +	set_ckpt_flags(sbi, CP_ERROR_FLAG);
> > > > > +
> > > > > +	if (!f2fs_hw_is_readonly(sbi)) {
> > > > > +		save_stop_reason(sbi, reason);
> > > > > +
> > > > > +		if (irq_context && !shutdown)
> > > > > +			schedule_work(&sbi->s_error_work);
> > > > > +		else
> > > > > +			f2fs_record_stop_reason(sbi);
> > > > > +	}
> > > > > +
> > > > > +	/*
> > > > > +	 * We force ERRORS_RO behavior when system is rebooting. Otherwise we
> > > > > +	 * could panic during 'reboot -f' as the underlying device got already
> > > > > +	 * disabled.
> > > > > +	 */
> > > > > +	if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_PANIC &&
> > > > > +				!shutdown && !system_going_down() &&
> > > > > +				!is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN))
> > > > > +		panic("F2FS-fs (device %s): panic forced after error\n",
> > > > > +							sb->s_id);
> > > > > +
> > > > > +	if (shutdown)
> > > > > +		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
> > > > > +
> > > > > +	/* continue filesystem operators if errors=continue */
> > > > > +	if (continue_fs || f2fs_readonly(sb))
> > > > > +		return;
> > > > > +
> > > > > +	f2fs_warn(sbi, "Remounting filesystem read-only");
> > > > > +	/*
> > > > > +	 * Make sure updated value of ->s_mount_flags will be visible before
> > > > > +	 * ->s_flags update
> > > > > +	 */
> > > > > +	smp_wmb();
> > > > > +	sb->s_flags |= SB_RDONLY;
> > > > > +}
> > > > > +
> > > > > +static void f2fs_record_error_work(struct work_struct *work)
> > > > > +{
> > > > > +	struct f2fs_sb_info *sbi = container_of(work,
> > > > > +					struct f2fs_sb_info, s_error_work);
> > > > > +
> > > > > +	f2fs_record_stop_reason(sbi);
> > > > > +}
> > > > > +
> > > > >    static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
> > > > >    {
> > > > >    	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
> > > > > @@ -4223,7 +4333,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
> > > > >    	sb->s_fs_info = sbi;
> > > > >    	sbi->raw_super = raw_super;
> > > > > +	INIT_WORK(&sbi->s_error_work, f2fs_record_error_work);
> > > > >    	memcpy(sbi->errors, raw_super->s_errors, MAX_F2FS_ERRORS);
> > > > > +	memcpy(sbi->stop_reason, raw_super->s_stop_reason, MAX_STOP_REASON);
> > > > >    	/* precompute checksum seed for metadata */
> > > > >    	if (f2fs_sb_has_inode_chksum(sbi))
> > > > > @@ -4620,6 +4732,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
> > > > >    	f2fs_destroy_segment_manager(sbi);
> > > > >    stop_ckpt_thread:
> > > > >    	f2fs_stop_ckpt_thread(sbi);
> > > > > +	/* flush s_error_work before sbi destroy */
> > > > > +	flush_work(&sbi->s_error_work);
> > > > >    	f2fs_destroy_post_read_wq(sbi);
> > > > >    free_devices:
> > > > >    	destroy_device_list(sbi);
> > > > > -- 
> > > > > 2.36.1
  
Chao Yu April 22, 2023, 12:08 p.m. UTC | #7
On 2023/4/22 3:31, Jaegeuk Kim wrote:
> On 04/21, Chao Yu wrote:
>> On 2023/4/21 0:34, Jaegeuk Kim wrote:
>>> On 04/20, Chao Yu wrote:
>>>> On 2023/4/20 4:19, Jaegeuk Kim wrote:
>>>>> On 04/08, Chao Yu wrote:
>>>>>> This patch supports errors=remount-ro|continue|panic mount option,
>>>>>> by default it uses "remount-ro" mode.
>>>>>>
>>>>>> Signed-off-by: Chao Yu <chao@kernel.org>
>>>>>> Signed-off-by: Yangtao Li <frank.li@vivo.com>
>>>>>> ---
>>>>>> v8:
>>>>>> - record stop reason synchronously if it's from ioctl shutdown interface
>>>>>> - fix typo in doc and commit message.
>>>>>> - use f2fs_hw_is_readonly() instead of bdev_read_only()
>>>>>>     Documentation/filesystems/f2fs.rst |   5 ++
>>>>>>     fs/f2fs/checkpoint.c               |   7 +-
>>>>>>     fs/f2fs/f2fs.h                     |  20 ++++-
>>>>>>     fs/f2fs/file.c                     |   5 --
>>>>>>     fs/f2fs/gc.c                       |   2 +-
>>>>>>     fs/f2fs/super.c                    | 134 ++++++++++++++++++++++++++---
>>>>>>     6 files changed, 149 insertions(+), 24 deletions(-)
>>>>>>
>>>>>> diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst
>>>>>> index 2055e72871fe..ba4c02fad647 100644
>>>>>> --- a/Documentation/filesystems/f2fs.rst
>>>>>> +++ b/Documentation/filesystems/f2fs.rst
>>>>>> @@ -351,6 +351,11 @@ age_extent_cache	 Enable an age extent cache based on rb-tree. It records
>>>>>>     			 data block update frequency of the extent per inode, in
>>>>>>     			 order to provide better temperature hints for data block
>>>>>>     			 allocation.
>>>>>> +errors=%s		 Specify f2fs behavior on critical errors. This supports modes:
>>>>>> +			 "panic", "continue" and "remount-ro", respectively, trigger
>>>>>> +			 panic immediately, continue without doing anything, and remount
>>>>>> +			 the partition in read-only mode. By default it uses "remount-ro"
>>>>>> +			 mode.
>>>>>>     ======================== ============================================================
>>>>>>     Debugfs Entries
>>>>>> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
>>>>>> index 73ae4e85e70d..e6b266bb9ce0 100644
>>>>>> --- a/fs/f2fs/checkpoint.c
>>>>>> +++ b/fs/f2fs/checkpoint.c
>>>>>> @@ -30,12 +30,9 @@ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io,
>>>>>>     						unsigned char reason)
>>>>>>     {
>>>>>>     	f2fs_build_fault_attr(sbi, 0, 0);
>>>>>> -	set_ckpt_flags(sbi, CP_ERROR_FLAG);
>>>>>> -	if (!end_io) {
>>>>>> +	if (!end_io)
>>>>>>     		f2fs_flush_merged_writes(sbi);
>>>>>> -
>>>>>> -		f2fs_handle_stop(sbi, reason);
>>>>>> -	}
>>>>>> +	f2fs_handle_critical_error(sbi, reason, end_io);
>>>>>>     }
>>>>>>     /*
>>>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>>>>> index ac4942f8e83e..7d9c5a65f974 100644
>>>>>> --- a/fs/f2fs/f2fs.h
>>>>>> +++ b/fs/f2fs/f2fs.h
>>>>>> @@ -162,6 +162,7 @@ struct f2fs_mount_info {
>>>>>>     	int fs_mode;			/* fs mode: LFS or ADAPTIVE */
>>>>>>     	int bggc_mode;			/* bggc mode: off, on or sync */
>>>>>>     	int memory_mode;		/* memory mode */
>>>>>> +	int errors;			/* errors parameter */
>>>>>>     	int discard_unit;		/*
>>>>>>     					 * discard command's offset/size should
>>>>>>     					 * be aligned to this unit: block,
>>>>>> @@ -1370,6 +1371,12 @@ enum {
>>>>>>     	MEMORY_MODE_LOW,	/* memory mode for low memry devices */
>>>>>>     };
>>>>>> +enum errors_option {
>>>>>> +	MOUNT_ERRORS_READONLY,	/* remount fs ro on errors */
>>>>>> +	MOUNT_ERRORS_CONTINUE,	/* continue on errors */
>>>>>> +	MOUNT_ERRORS_PANIC,	/* panic on errors */
>>>>>> +};
>>>>>> +
>>>>>>     static inline int f2fs_test_bit(unsigned int nr, char *addr);
>>>>>>     static inline void f2fs_set_bit(unsigned int nr, char *addr);
>>>>>>     static inline void f2fs_clear_bit(unsigned int nr, char *addr);
>>>>>> @@ -1786,8 +1793,14 @@ struct f2fs_sb_info {
>>>>>>     	struct workqueue_struct *post_read_wq;	/* post read workqueue */
>>>>>> -	unsigned char errors[MAX_F2FS_ERRORS];	/* error flags */
>>>>>> -	spinlock_t error_lock;			/* protect errors array */
>>>>>> +	/*
>>>>>> +	 * If we are in irq context, let's update error information into
>>>>>> +	 * on-disk superblock in the work.
>>>>>> +	 */
>>>>>> +	struct work_struct s_error_work;
>>>>>> +	unsigned char errors[MAX_F2FS_ERRORS];		/* error flags */
>>>>>> +	unsigned char stop_reason[MAX_STOP_REASON];	/* stop reason */
>>>>>> +	spinlock_t error_lock;			/* protect errors/stop_reason array */
>>>>>>     	bool error_dirty;			/* errors of sb is dirty */
>>>>>>     	struct kmem_cache *inline_xattr_slab;	/* inline xattr entry */
>>>>>> @@ -3522,8 +3535,9 @@ int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly);
>>>>>>     int f2fs_quota_sync(struct super_block *sb, int type);
>>>>>>     loff_t max_file_blocks(struct inode *inode);
>>>>>>     void f2fs_quota_off_umount(struct super_block *sb);
>>>>>> -void f2fs_handle_stop(struct f2fs_sb_info *sbi, unsigned char reason);
>>>>>>     void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag);
>>>>>> +void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason,
>>>>>> +							bool irq_context);
>>>>>>     void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error);
>>>>>>     int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover);
>>>>>>     int f2fs_sync_fs(struct super_block *sb, int sync);
>>>>>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
>>>>>> index 14e9a20e68df..10743e864d57 100644
>>>>>> --- a/fs/f2fs/file.c
>>>>>> +++ b/fs/f2fs/file.c
>>>>>> @@ -2221,7 +2221,6 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
>>>>>>     				ret = 0;
>>>>>>     				f2fs_stop_checkpoint(sbi, false,
>>>>>>     						STOP_CP_REASON_SHUTDOWN);
>>>>>> -				set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>>>>>>     				trace_f2fs_shutdown(sbi, in, ret);
>>>>>>     			}
>>>>>>     			return ret;
>>>>>> @@ -2234,7 +2233,6 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
>>>>>>     		if (ret)
>>>>>>     			goto out;
>>>>>>     		f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
>>>>>> -		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>>>>>>     		thaw_bdev(sb->s_bdev);
>>>>>>     		break;
>>>>>>     	case F2FS_GOING_DOWN_METASYNC:
>>>>>> @@ -2243,16 +2241,13 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
>>>>>>     		if (ret)
>>>>>>     			goto out;
>>>>>>     		f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
>>>>>> -		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>>>>>>     		break;
>>>>>>     	case F2FS_GOING_DOWN_NOSYNC:
>>>>>>     		f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
>>>>>> -		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>>>>>>     		break;
>>>>>>     	case F2FS_GOING_DOWN_METAFLUSH:
>>>>>>     		f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO);
>>>>>>     		f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
>>>>>> -		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>>>>>>     		break;
>>>>>>     	case F2FS_GOING_DOWN_NEED_FSCK:
>>>>>>     		set_sbi_flag(sbi, SBI_NEED_FSCK);
>>>>>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
>>>>>> index f1d0dd9c5a6c..6dc2bc710ba8 100644
>>>>>> --- a/fs/f2fs/gc.c
>>>>>> +++ b/fs/f2fs/gc.c
>>>>>> @@ -59,7 +59,7 @@ static int gc_thread_func(void *data)
>>>>>>     		if (gc_th->gc_wake)
>>>>>>     			gc_th->gc_wake = false;
>>>>>> -		if (try_to_freeze()) {
>>>>>> +		if (try_to_freeze() || f2fs_readonly(sbi->sb)) {
>>>>>>     			stat_other_skip_bggc_count(sbi);
>>>>>>     			continue;
>>>>>>     		}
>>>>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>>>>> index 23574894e170..1573bf123197 100644
>>>>>> --- a/fs/f2fs/super.c
>>>>>> +++ b/fs/f2fs/super.c
>>>>>> @@ -164,6 +164,7 @@ enum {
>>>>>>     	Opt_discard_unit,
>>>>>>     	Opt_memory_mode,
>>>>>>     	Opt_age_extent_cache,
>>>>>> +	Opt_errors,
>>>>>>     	Opt_err,
>>>>>>     };
>>>>>> @@ -243,6 +244,7 @@ static match_table_t f2fs_tokens = {
>>>>>>     	{Opt_discard_unit, "discard_unit=%s"},
>>>>>>     	{Opt_memory_mode, "memory=%s"},
>>>>>>     	{Opt_age_extent_cache, "age_extent_cache"},
>>>>>> +	{Opt_errors, "errors=%s"},
>>>>>>     	{Opt_err, NULL},
>>>>>>     };
>>>>>> @@ -1268,6 +1270,25 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
>>>>>>     		case Opt_age_extent_cache:
>>>>>>     			set_opt(sbi, AGE_EXTENT_CACHE);
>>>>>>     			break;
>>>>>> +		case Opt_errors:
>>>>>> +			name = match_strdup(&args[0]);
>>>>>> +			if (!name)
>>>>>> +				return -ENOMEM;
>>>>>> +			if (!strcmp(name, "remount-ro")) {
>>>>>> +				F2FS_OPTION(sbi).errors =
>>>>>> +						MOUNT_ERRORS_READONLY;
>>>>>> +			} else if (!strcmp(name, "continue")) {
>>>>>> +				F2FS_OPTION(sbi).errors =
>>>>>> +						MOUNT_ERRORS_CONTINUE;
>>>>>> +			} else if (!strcmp(name, "panic")) {
>>>>>> +				F2FS_OPTION(sbi).errors =
>>>>>> +						MOUNT_ERRORS_PANIC;
>>>>>> +			} else {
>>>>>> +				kfree(name);
>>>>>> +				return -EINVAL;
>>>>>> +			}
>>>>>> +			kfree(name);
>>>>>> +			break;
>>>>>>     		default:
>>>>>>     			f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value",
>>>>>>     				 p);
>>>>>> @@ -1623,6 +1644,9 @@ static void f2fs_put_super(struct super_block *sb)
>>>>>>     	f2fs_destroy_node_manager(sbi);
>>>>>>     	f2fs_destroy_segment_manager(sbi);
>>>>>> +	/* flush s_error_work before sbi destroy */
>>>>>> +	flush_work(&sbi->s_error_work);
>>>>>> +
>>>>>>     	f2fs_destroy_post_read_wq(sbi);
>>>>>>     	kvfree(sbi->ckpt);
>>>>>> @@ -2053,6 +2077,13 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
>>>>>>     	else if (F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_LOW)
>>>>>>     		seq_printf(seq, ",memory=%s", "low");
>>>>>> +	if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_READONLY)
>>>>>> +		seq_printf(seq, ",errors=%s", "remount-ro");
>>>>>> +	else if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_CONTINUE)
>>>>>> +		seq_printf(seq, ",errors=%s", "continue");
>>>>>> +	else if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_PANIC)
>>>>>> +		seq_printf(seq, ",errors=%s", "panic");
>>>>>> +
>>>>>>     	return 0;
>>>>>>     }
>>>>>> @@ -2081,6 +2112,7 @@ static void default_options(struct f2fs_sb_info *sbi)
>>>>>>     	}
>>>>>>     	F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON;
>>>>>>     	F2FS_OPTION(sbi).memory_mode = MEMORY_MODE_NORMAL;
>>>>>> +	F2FS_OPTION(sbi).errors = MOUNT_ERRORS_READONLY;
>>>>>
>>>>> Was the default MOUNT_ERRORS_CONTINUE?
>>>>
>>>> Okay, let me revise in v9.
>>>>
>>>> Please note that main difference between continue mode and readonly mode once
>>>> cp_error is set:
>>>>
>>>> 		continue mode		readonly mode
>>>> access ops:	Normal			Normal
>>>> modify ops:	-EIO			-EROFS
>>>> mount option	rw			ro
>>>
>>> Could you please add the difference of all the modes in patch description?
>>>
>>> I think it'd be good to add:
>>> 1. access ops
>>> 2. syscall errors
>>> 3. mount option
>>> 4. pending data writes
>>> 5. pending node writes
>>> 6. pending meta writes
>>
>> Sure, I'd like to add below description into f2fs.rst, any comments?
>>
>> 			 			continue	remount-ro	panic
>> 			 access ops		normal		noraml		N/A
>> 			 syscall errors		-EIO		-EROFS		N/A
>> 			 mount option		rw		ro		N/A
> 
> So the only difference is the above two, as in any cases, we won't flush pending
> pages at all? What about keeping nondir/node pages in the remount-ro case
> differently?

I revised v10 w/ below diff.

---
  Documentation/filesystems/f2fs.rst | 8 ++++++++
  fs/f2fs/data.c                     | 4 ++++
  fs/f2fs/node.c                     | 3 +++
  3 files changed, 15 insertions(+)

diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst
index ce0963ba255b..79670e182250 100644
--- a/Documentation/filesystems/f2fs.rst
+++ b/Documentation/filesystems/f2fs.rst
@@ -356,6 +356,14 @@ errors=%s		 Specify f2fs behavior on critical errors. This supports modes:
  			 panic immediately, continue without doing anything, and remount
  			 the partition in read-only mode. By default it uses "continue"
  			 mode.
+			 			continue	remount-ro	panic
+			 access ops		normal		noraml		N/A
+			 syscall errors		-EIO		-EROFS		N/A
+			 mount option		rw		ro		N/A
+			 pending dir write	keep		keep		N/A
+			 pending non-dir write	drop		keep		N/A
+			 pending node write	drop		keep		N/A
+			 pending meta write	keep		keep		N/A
  ======================== ============================================================

  Debugfs Entries
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 7165b1202f53..f26eac327d6e 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2807,6 +2807,10 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
  		if (S_ISDIR(inode->i_mode) &&
  				!is_sbi_flag_set(sbi, SBI_IS_CLOSE))
  			goto redirty_out;
+
+		/* keep data pages in remount-ro mode */
+		if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_READONLY)
+			goto redirty_out;
  		goto out;
  	}

diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index bd1dad523796..834c6f099c95 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1596,6 +1596,9 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
  	trace_f2fs_writepage(page, NODE);

  	if (unlikely(f2fs_cp_error(sbi))) {
+		/* keep node pages in remount-ro mode */
+		if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_READONLY)
+			goto redirty_out;
  		ClearPageUptodate(page);
  		dec_page_count(sbi, F2FS_DIRTY_NODES);
  		unlock_page(page);
  

Patch

diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst
index 2055e72871fe..ba4c02fad647 100644
--- a/Documentation/filesystems/f2fs.rst
+++ b/Documentation/filesystems/f2fs.rst
@@ -351,6 +351,11 @@  age_extent_cache	 Enable an age extent cache based on rb-tree. It records
 			 data block update frequency of the extent per inode, in
 			 order to provide better temperature hints for data block
 			 allocation.
+errors=%s		 Specify f2fs behavior on critical errors. This supports modes:
+			 "panic", "continue" and "remount-ro", respectively, trigger
+			 panic immediately, continue without doing anything, and remount
+			 the partition in read-only mode. By default it uses "remount-ro"
+			 mode.
 ======================== ============================================================
 
 Debugfs Entries
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 73ae4e85e70d..e6b266bb9ce0 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -30,12 +30,9 @@  void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io,
 						unsigned char reason)
 {
 	f2fs_build_fault_attr(sbi, 0, 0);
-	set_ckpt_flags(sbi, CP_ERROR_FLAG);
-	if (!end_io) {
+	if (!end_io)
 		f2fs_flush_merged_writes(sbi);
-
-		f2fs_handle_stop(sbi, reason);
-	}
+	f2fs_handle_critical_error(sbi, reason, end_io);
 }
 
 /*
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index ac4942f8e83e..7d9c5a65f974 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -162,6 +162,7 @@  struct f2fs_mount_info {
 	int fs_mode;			/* fs mode: LFS or ADAPTIVE */
 	int bggc_mode;			/* bggc mode: off, on or sync */
 	int memory_mode;		/* memory mode */
+	int errors;			/* errors parameter */
 	int discard_unit;		/*
 					 * discard command's offset/size should
 					 * be aligned to this unit: block,
@@ -1370,6 +1371,12 @@  enum {
 	MEMORY_MODE_LOW,	/* memory mode for low memry devices */
 };
 
+enum errors_option {
+	MOUNT_ERRORS_READONLY,	/* remount fs ro on errors */
+	MOUNT_ERRORS_CONTINUE,	/* continue on errors */
+	MOUNT_ERRORS_PANIC,	/* panic on errors */
+};
+
 static inline int f2fs_test_bit(unsigned int nr, char *addr);
 static inline void f2fs_set_bit(unsigned int nr, char *addr);
 static inline void f2fs_clear_bit(unsigned int nr, char *addr);
@@ -1786,8 +1793,14 @@  struct f2fs_sb_info {
 
 	struct workqueue_struct *post_read_wq;	/* post read workqueue */
 
-	unsigned char errors[MAX_F2FS_ERRORS];	/* error flags */
-	spinlock_t error_lock;			/* protect errors array */
+	/*
+	 * If we are in irq context, let's update error information into
+	 * on-disk superblock in the work.
+	 */
+	struct work_struct s_error_work;
+	unsigned char errors[MAX_F2FS_ERRORS];		/* error flags */
+	unsigned char stop_reason[MAX_STOP_REASON];	/* stop reason */
+	spinlock_t error_lock;			/* protect errors/stop_reason array */
 	bool error_dirty;			/* errors of sb is dirty */
 
 	struct kmem_cache *inline_xattr_slab;	/* inline xattr entry */
@@ -3522,8 +3535,9 @@  int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly);
 int f2fs_quota_sync(struct super_block *sb, int type);
 loff_t max_file_blocks(struct inode *inode);
 void f2fs_quota_off_umount(struct super_block *sb);
-void f2fs_handle_stop(struct f2fs_sb_info *sbi, unsigned char reason);
 void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag);
+void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason,
+							bool irq_context);
 void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error);
 int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover);
 int f2fs_sync_fs(struct super_block *sb, int sync);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 14e9a20e68df..10743e864d57 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -2221,7 +2221,6 @@  static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
 				ret = 0;
 				f2fs_stop_checkpoint(sbi, false,
 						STOP_CP_REASON_SHUTDOWN);
-				set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
 				trace_f2fs_shutdown(sbi, in, ret);
 			}
 			return ret;
@@ -2234,7 +2233,6 @@  static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
 		if (ret)
 			goto out;
 		f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
-		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
 		thaw_bdev(sb->s_bdev);
 		break;
 	case F2FS_GOING_DOWN_METASYNC:
@@ -2243,16 +2241,13 @@  static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
 		if (ret)
 			goto out;
 		f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
-		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
 		break;
 	case F2FS_GOING_DOWN_NOSYNC:
 		f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
-		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
 		break;
 	case F2FS_GOING_DOWN_METAFLUSH:
 		f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO);
 		f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
-		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
 		break;
 	case F2FS_GOING_DOWN_NEED_FSCK:
 		set_sbi_flag(sbi, SBI_NEED_FSCK);
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index f1d0dd9c5a6c..6dc2bc710ba8 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -59,7 +59,7 @@  static int gc_thread_func(void *data)
 		if (gc_th->gc_wake)
 			gc_th->gc_wake = false;
 
-		if (try_to_freeze()) {
+		if (try_to_freeze() || f2fs_readonly(sbi->sb)) {
 			stat_other_skip_bggc_count(sbi);
 			continue;
 		}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 23574894e170..1573bf123197 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -164,6 +164,7 @@  enum {
 	Opt_discard_unit,
 	Opt_memory_mode,
 	Opt_age_extent_cache,
+	Opt_errors,
 	Opt_err,
 };
 
@@ -243,6 +244,7 @@  static match_table_t f2fs_tokens = {
 	{Opt_discard_unit, "discard_unit=%s"},
 	{Opt_memory_mode, "memory=%s"},
 	{Opt_age_extent_cache, "age_extent_cache"},
+	{Opt_errors, "errors=%s"},
 	{Opt_err, NULL},
 };
 
@@ -1268,6 +1270,25 @@  static int parse_options(struct super_block *sb, char *options, bool is_remount)
 		case Opt_age_extent_cache:
 			set_opt(sbi, AGE_EXTENT_CACHE);
 			break;
+		case Opt_errors:
+			name = match_strdup(&args[0]);
+			if (!name)
+				return -ENOMEM;
+			if (!strcmp(name, "remount-ro")) {
+				F2FS_OPTION(sbi).errors =
+						MOUNT_ERRORS_READONLY;
+			} else if (!strcmp(name, "continue")) {
+				F2FS_OPTION(sbi).errors =
+						MOUNT_ERRORS_CONTINUE;
+			} else if (!strcmp(name, "panic")) {
+				F2FS_OPTION(sbi).errors =
+						MOUNT_ERRORS_PANIC;
+			} else {
+				kfree(name);
+				return -EINVAL;
+			}
+			kfree(name);
+			break;
 		default:
 			f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value",
 				 p);
@@ -1623,6 +1644,9 @@  static void f2fs_put_super(struct super_block *sb)
 	f2fs_destroy_node_manager(sbi);
 	f2fs_destroy_segment_manager(sbi);
 
+	/* flush s_error_work before sbi destroy */
+	flush_work(&sbi->s_error_work);
+
 	f2fs_destroy_post_read_wq(sbi);
 
 	kvfree(sbi->ckpt);
@@ -2053,6 +2077,13 @@  static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
 	else if (F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_LOW)
 		seq_printf(seq, ",memory=%s", "low");
 
+	if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_READONLY)
+		seq_printf(seq, ",errors=%s", "remount-ro");
+	else if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_CONTINUE)
+		seq_printf(seq, ",errors=%s", "continue");
+	else if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_PANIC)
+		seq_printf(seq, ",errors=%s", "panic");
+
 	return 0;
 }
 
@@ -2081,6 +2112,7 @@  static void default_options(struct f2fs_sb_info *sbi)
 	}
 	F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON;
 	F2FS_OPTION(sbi).memory_mode = MEMORY_MODE_NORMAL;
+	F2FS_OPTION(sbi).errors = MOUNT_ERRORS_READONLY;
 
 	sbi->sb->s_flags &= ~SB_INLINECRYPT;
 
@@ -2282,6 +2314,9 @@  static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 	if (err)
 		goto restore_opts;
 
+	/* flush outstanding errors before changing fs state */
+	flush_work(&sbi->s_error_work);
+
 	/*
 	 * Previous and new state of filesystem is RO,
 	 * so skip checking GC and FLUSH_MERGE conditions.
@@ -3931,45 +3966,60 @@  int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
 	return err;
 }
 
-void f2fs_handle_stop(struct f2fs_sb_info *sbi, unsigned char reason)
+static void save_stop_reason(struct f2fs_sb_info *sbi, unsigned char reason)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&sbi->error_lock, flags);
+	if (sbi->stop_reason[reason] < GENMASK(BITS_PER_BYTE - 1, 0))
+		sbi->stop_reason[reason]++;
+	spin_unlock_irqrestore(&sbi->error_lock, flags);
+}
+
+static void f2fs_record_stop_reason(struct f2fs_sb_info *sbi)
 {
 	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
+	unsigned long flags;
 	int err;
 
 	f2fs_down_write(&sbi->sb_lock);
 
-	if (raw_super->s_stop_reason[reason] < GENMASK(BITS_PER_BYTE - 1, 0))
-		raw_super->s_stop_reason[reason]++;
+	spin_lock_irqsave(&sbi->error_lock, flags);
+	memcpy(raw_super->s_stop_reason, sbi->stop_reason, MAX_STOP_REASON);
+	spin_unlock_irqrestore(&sbi->error_lock, flags);
 
 	err = f2fs_commit_super(sbi, false);
-	if (err)
-		f2fs_err(sbi, "f2fs_commit_super fails to record reason:%u err:%d",
-								reason, err);
+
 	f2fs_up_write(&sbi->sb_lock);
+	if (err)
+		f2fs_err(sbi, "f2fs_commit_super fails to record err:%d", err);
 }
 
 void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag)
 {
-	spin_lock(&sbi->error_lock);
+	unsigned long flags;
+
+	spin_lock_irqsave(&sbi->error_lock, flags);
 	if (!test_bit(flag, (unsigned long *)sbi->errors)) {
 		set_bit(flag, (unsigned long *)sbi->errors);
 		sbi->error_dirty = true;
 	}
-	spin_unlock(&sbi->error_lock);
+	spin_unlock_irqrestore(&sbi->error_lock, flags);
 }
 
 static bool f2fs_update_errors(struct f2fs_sb_info *sbi)
 {
+	unsigned long flags;
 	bool need_update = false;
 
-	spin_lock(&sbi->error_lock);
+	spin_lock_irqsave(&sbi->error_lock, flags);
 	if (sbi->error_dirty) {
 		memcpy(F2FS_RAW_SUPER(sbi)->s_errors, sbi->errors,
 							MAX_F2FS_ERRORS);
 		sbi->error_dirty = false;
 		need_update = true;
 	}
-	spin_unlock(&sbi->error_lock);
+	spin_unlock_irqrestore(&sbi->error_lock, flags);
 
 	return need_update;
 }
@@ -3993,6 +4043,66 @@  void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error)
 	f2fs_up_write(&sbi->sb_lock);
 }
 
+static bool system_going_down(void)
+{
+	return system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF
+		|| system_state == SYSTEM_RESTART;
+}
+
+void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason,
+							bool irq_context)
+{
+	struct super_block *sb = sbi->sb;
+	bool shutdown = reason == STOP_CP_REASON_SHUTDOWN;
+	bool continue_fs = !shutdown &&
+			F2FS_OPTION(sbi).errors == MOUNT_ERRORS_CONTINUE;
+
+	set_ckpt_flags(sbi, CP_ERROR_FLAG);
+
+	if (!f2fs_hw_is_readonly(sbi)) {
+		save_stop_reason(sbi, reason);
+
+		if (irq_context && !shutdown)
+			schedule_work(&sbi->s_error_work);
+		else
+			f2fs_record_stop_reason(sbi);
+	}
+
+	/*
+	 * We force ERRORS_RO behavior when system is rebooting. Otherwise we
+	 * could panic during 'reboot -f' as the underlying device got already
+	 * disabled.
+	 */
+	if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_PANIC &&
+				!shutdown && !system_going_down() &&
+				!is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN))
+		panic("F2FS-fs (device %s): panic forced after error\n",
+							sb->s_id);
+
+	if (shutdown)
+		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
+
+	/* continue filesystem operators if errors=continue */
+	if (continue_fs || f2fs_readonly(sb))
+		return;
+
+	f2fs_warn(sbi, "Remounting filesystem read-only");
+	/*
+	 * Make sure updated value of ->s_mount_flags will be visible before
+	 * ->s_flags update
+	 */
+	smp_wmb();
+	sb->s_flags |= SB_RDONLY;
+}
+
+static void f2fs_record_error_work(struct work_struct *work)
+{
+	struct f2fs_sb_info *sbi = container_of(work,
+					struct f2fs_sb_info, s_error_work);
+
+	f2fs_record_stop_reason(sbi);
+}
+
 static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
 {
 	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
@@ -4223,7 +4333,9 @@  static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_fs_info = sbi;
 	sbi->raw_super = raw_super;
 
+	INIT_WORK(&sbi->s_error_work, f2fs_record_error_work);
 	memcpy(sbi->errors, raw_super->s_errors, MAX_F2FS_ERRORS);
+	memcpy(sbi->stop_reason, raw_super->s_stop_reason, MAX_STOP_REASON);
 
 	/* precompute checksum seed for metadata */
 	if (f2fs_sb_has_inode_chksum(sbi))
@@ -4620,6 +4732,8 @@  static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 	f2fs_destroy_segment_manager(sbi);
 stop_ckpt_thread:
 	f2fs_stop_ckpt_thread(sbi);
+	/* flush s_error_work before sbi destroy */
+	flush_work(&sbi->s_error_work);
 	f2fs_destroy_post_read_wq(sbi);
 free_devices:
 	destroy_device_list(sbi);