[v2] f2fs: introduce discard_cpuset mount opt
Commit Message
It makes the discard process run faster on a more powerful CPU, or not.
And if bind it to a specific cpu, it is possible to have more cache
locality.
Signed-off-by: Yangtao Li <frank.li@vivo.com>
---
v2:
-fix kernel test robot warn
Documentation/filesystems/f2fs.rst | 2 ++
fs/f2fs/f2fs.h | 1 +
fs/f2fs/segment.c | 8 ++++++-
fs/f2fs/super.c | 36 ++++++++++++++++++++++++++++++
kernel/kthread.c | 1 +
5 files changed, 47 insertions(+), 1 deletion(-)
Comments
On 3/1/23 21:51, Yangtao Li wrote:
> diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst
> index 2055e72871fe..dc005f3b784a 100644
> --- a/Documentation/filesystems/f2fs.rst
> +++ b/Documentation/filesystems/f2fs.rst
> @@ -351,6 +351,8 @@ age_extent_cache Enable an age extent cache based on rb-tree. It records
> data block update frequency of the extent per inode, in
> order to provide better temperature hints for data block
> allocation.
> +discard_cpuset=%u Set the cpumask of dicard thread, it makes the discard
of discard
> + process run faster on a more powerful CPU, or not.
> ======================== ============================================================
This seems very counterintuitive.
a. GC and discard operations are mostly I/O-bound.
b. Both run when the storage is idle unless background conditions are
not met and are forced to run synchronously (i.e., foreground).
Setting the cpumask to run on the most efficient CPU core makes more
sense in my books (especially considering Android's I/O
characteristics).
Please provide data/elaborations for:
i. Why just the discard thread?
ii. Does setting the discard thread to big cores provide meaningful
and practical improvements?
iii. Is it enough to justify an explicit mask over HMP scheduler's own
heuristic?
iv. Is the additional power consumption for setting the mask to more
power hungry cores justified?
f2fs mount options are already pretty convoluted, and unfortunately
neither your commit message nor the code itself seem to justify its
addition imho.
Thanks,
On Thu, Mar 2, 2023 at 2:52 PM Yangtao Li via Linux-f2fs-devel
<linux-f2fs-devel@lists.sourceforge.net> wrote:
>
> It makes the discard process run faster on a more powerful CPU, or not.
> And if bind it to a specific cpu, it is possible to have more cache
> locality.
>
> Signed-off-by: Yangtao Li <frank.li@vivo.com>
> ---
> v2:
> -fix kernel test robot warn
> Documentation/filesystems/f2fs.rst | 2 ++
> fs/f2fs/f2fs.h | 1 +
> fs/f2fs/segment.c | 8 ++++++-
> fs/f2fs/super.c | 36 ++++++++++++++++++++++++++++++
> kernel/kthread.c | 1 +
> 5 files changed, 47 insertions(+), 1 deletion(-)
>
> diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst
> index 2055e72871fe..dc005f3b784a 100644
> --- a/Documentation/filesystems/f2fs.rst
> +++ b/Documentation/filesystems/f2fs.rst
> @@ -351,6 +351,8 @@ age_extent_cache Enable an age extent cache based on rb-tree. It records
> data block update frequency of the extent per inode, in
> order to provide better temperature hints for data block
> allocation.
> +discard_cpuset=%u Set the cpumask of dicard thread, it makes the discard
> + process run faster on a more powerful CPU, or not.
> ======================== ============================================================
>
> Debugfs Entries
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index b0ab2062038a..62ce02a87d33 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -183,6 +183,7 @@ struct f2fs_mount_info {
> int compress_mode; /* compression mode */
> unsigned char extensions[COMPRESS_EXT_NUM][F2FS_EXTENSION_LEN]; /* extensions */
> unsigned char noextensions[COMPRESS_EXT_NUM][F2FS_EXTENSION_LEN]; /* extensions */
> + struct cpumask discard_cpumask; /* discard thread cpumask */
> };
>
> #define F2FS_FEATURE_ENCRYPT 0x0001
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 227e25836173..2648c564833e 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -2054,11 +2054,17 @@ int f2fs_start_discard_thread(struct f2fs_sb_info *sbi)
> if (!f2fs_realtime_discard_enable(sbi))
> return 0;
>
> - dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi,
> + dcc->f2fs_issue_discard = kthread_create(issue_discard_thread, sbi,
> "f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev));
> if (IS_ERR(dcc->f2fs_issue_discard)) {
> err = PTR_ERR(dcc->f2fs_issue_discard);
> dcc->f2fs_issue_discard = NULL;
> + } else {
> + if (!cpumask_empty(&F2FS_OPTION(sbi).discard_cpumask)) {
> + kthread_bind_mask(dcc->f2fs_issue_discard,
> + &F2FS_OPTION(sbi).discard_cpumask);
> + }
> + wake_up_process(dcc->f2fs_issue_discard);
> }
>
> return err;
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index fbaaabbcd6de..8ecbe3595f34 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -164,6 +164,7 @@ enum {
> Opt_discard_unit,
> Opt_memory_mode,
> Opt_age_extent_cache,
> + Opt_discard_cpuset,
> Opt_err,
> };
>
> @@ -243,6 +244,7 @@ static match_table_t f2fs_tokens = {
> {Opt_discard_unit, "discard_unit=%s"},
> {Opt_memory_mode, "memory=%s"},
> {Opt_age_extent_cache, "age_extent_cache"},
> + {Opt_discard_cpuset, "discard_cpuset=%u"},
> {Opt_err, NULL},
> };
>
> @@ -1256,6 +1258,22 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
> case Opt_age_extent_cache:
> set_opt(sbi, AGE_EXTENT_CACHE);
> break;
> + case Opt_discard_cpuset:
> + if (!f2fs_hw_support_discard(sbi)) {
> + f2fs_warn(sbi, "device does not support discard");
> + break;
> + }
> +
> + if (args->from && match_int(args, &arg))
> + return -EINVAL;
> +
> + if (!cpu_possible(arg)) {
> + f2fs_err(sbi, "invalid cpu%d for discard_cpuset", arg);
> + return -EINVAL;
> + }
> +
> + cpumask_set_cpu(arg, &F2FS_OPTION(sbi).discard_cpumask);
> + break;
> default:
> f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value",
> p);
> @@ -1358,6 +1376,14 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
> f2fs_err(sbi, "Allow to mount readonly mode only");
> return -EROFS;
> }
> +
> + if (!cpumask_empty(&F2FS_OPTION(sbi).discard_cpumask) &&
> + !cpumask_intersects(cpu_online_mask,
> + &F2FS_OPTION(sbi).discard_cpumask)) {
> + f2fs_err(sbi, "Must include one online CPU for discard_cpuset");
> + return -EINVAL;
> + }
> +
> return 0;
> }
>
> @@ -1884,6 +1910,7 @@ static inline void f2fs_show_compress_options(struct seq_file *seq,
> static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
> {
> struct f2fs_sb_info *sbi = F2FS_SB(root->d_sb);
> + unsigned int cpu;
>
> if (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_SYNC)
> seq_printf(seq, ",background_gc=%s", "sync");
> @@ -1909,6 +1936,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
> seq_printf(seq, ",discard_unit=%s", "segment");
> else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SECTION)
> seq_printf(seq, ",discard_unit=%s", "section");
> + for_each_cpu(cpu, &F2FS_OPTION(sbi).discard_cpumask)
> + seq_printf(seq, ",discard_cpuset=%u", cpu);
> } else {
> seq_puts(seq, ",nodiscard");
> }
> @@ -2340,6 +2369,13 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
> goto restore_opts;
> }
>
> + if (!cpumask_equal(&org_mount_opt.discard_cpumask,
> + &F2FS_OPTION(sbi).discard_cpumask)) {
> + err = -EINVAL;
> + f2fs_warn(sbi, "switch discard_cpuset option is not allowed");
> + goto restore_opts;
> + }
> +
> if ((*flags & SB_RDONLY) && test_opt(sbi, DISABLE_CHECKPOINT)) {
> err = -EINVAL;
> f2fs_warn(sbi, "disabling checkpoint not compatible with read-only");
> diff --git a/kernel/kthread.c b/kernel/kthread.c
> index 7e6751b29101..8ddc2cd1b27e 100644
> --- a/kernel/kthread.c
> +++ b/kernel/kthread.c
> @@ -541,6 +541,7 @@ void kthread_bind_mask(struct task_struct *p, const struct cpumask *mask)
> {
> __kthread_bind_mask(p, mask, TASK_UNINTERRUPTIBLE);
> }
> +EXPORT_SYMBOL_GPL(kthread_bind_mask);
>
> /**
> * kthread_bind - bind a just-created kthread to a cpu.
This change to kernel/kthread.c should be made in a separate commit.
> --
> 2.25.1
>
>
>
> _______________________________________________
> Linux-f2fs-devel mailing list
> Linux-f2fs-devel@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
@@ -351,6 +351,8 @@ age_extent_cache Enable an age extent cache based on rb-tree. It records
data block update frequency of the extent per inode, in
order to provide better temperature hints for data block
allocation.
+discard_cpuset=%u Set the cpumask of dicard thread, it makes the discard
+ process run faster on a more powerful CPU, or not.
======================== ============================================================
Debugfs Entries
@@ -183,6 +183,7 @@ struct f2fs_mount_info {
int compress_mode; /* compression mode */
unsigned char extensions[COMPRESS_EXT_NUM][F2FS_EXTENSION_LEN]; /* extensions */
unsigned char noextensions[COMPRESS_EXT_NUM][F2FS_EXTENSION_LEN]; /* extensions */
+ struct cpumask discard_cpumask; /* discard thread cpumask */
};
#define F2FS_FEATURE_ENCRYPT 0x0001
@@ -2054,11 +2054,17 @@ int f2fs_start_discard_thread(struct f2fs_sb_info *sbi)
if (!f2fs_realtime_discard_enable(sbi))
return 0;
- dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi,
+ dcc->f2fs_issue_discard = kthread_create(issue_discard_thread, sbi,
"f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev));
if (IS_ERR(dcc->f2fs_issue_discard)) {
err = PTR_ERR(dcc->f2fs_issue_discard);
dcc->f2fs_issue_discard = NULL;
+ } else {
+ if (!cpumask_empty(&F2FS_OPTION(sbi).discard_cpumask)) {
+ kthread_bind_mask(dcc->f2fs_issue_discard,
+ &F2FS_OPTION(sbi).discard_cpumask);
+ }
+ wake_up_process(dcc->f2fs_issue_discard);
}
return err;
@@ -164,6 +164,7 @@ enum {
Opt_discard_unit,
Opt_memory_mode,
Opt_age_extent_cache,
+ Opt_discard_cpuset,
Opt_err,
};
@@ -243,6 +244,7 @@ static match_table_t f2fs_tokens = {
{Opt_discard_unit, "discard_unit=%s"},
{Opt_memory_mode, "memory=%s"},
{Opt_age_extent_cache, "age_extent_cache"},
+ {Opt_discard_cpuset, "discard_cpuset=%u"},
{Opt_err, NULL},
};
@@ -1256,6 +1258,22 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
case Opt_age_extent_cache:
set_opt(sbi, AGE_EXTENT_CACHE);
break;
+ case Opt_discard_cpuset:
+ if (!f2fs_hw_support_discard(sbi)) {
+ f2fs_warn(sbi, "device does not support discard");
+ break;
+ }
+
+ if (args->from && match_int(args, &arg))
+ return -EINVAL;
+
+ if (!cpu_possible(arg)) {
+ f2fs_err(sbi, "invalid cpu%d for discard_cpuset", arg);
+ return -EINVAL;
+ }
+
+ cpumask_set_cpu(arg, &F2FS_OPTION(sbi).discard_cpumask);
+ break;
default:
f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value",
p);
@@ -1358,6 +1376,14 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
f2fs_err(sbi, "Allow to mount readonly mode only");
return -EROFS;
}
+
+ if (!cpumask_empty(&F2FS_OPTION(sbi).discard_cpumask) &&
+ !cpumask_intersects(cpu_online_mask,
+ &F2FS_OPTION(sbi).discard_cpumask)) {
+ f2fs_err(sbi, "Must include one online CPU for discard_cpuset");
+ return -EINVAL;
+ }
+
return 0;
}
@@ -1884,6 +1910,7 @@ static inline void f2fs_show_compress_options(struct seq_file *seq,
static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
{
struct f2fs_sb_info *sbi = F2FS_SB(root->d_sb);
+ unsigned int cpu;
if (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_SYNC)
seq_printf(seq, ",background_gc=%s", "sync");
@@ -1909,6 +1936,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
seq_printf(seq, ",discard_unit=%s", "segment");
else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SECTION)
seq_printf(seq, ",discard_unit=%s", "section");
+ for_each_cpu(cpu, &F2FS_OPTION(sbi).discard_cpumask)
+ seq_printf(seq, ",discard_cpuset=%u", cpu);
} else {
seq_puts(seq, ",nodiscard");
}
@@ -2340,6 +2369,13 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
goto restore_opts;
}
+ if (!cpumask_equal(&org_mount_opt.discard_cpumask,
+ &F2FS_OPTION(sbi).discard_cpumask)) {
+ err = -EINVAL;
+ f2fs_warn(sbi, "switch discard_cpuset option is not allowed");
+ goto restore_opts;
+ }
+
if ((*flags & SB_RDONLY) && test_opt(sbi, DISABLE_CHECKPOINT)) {
err = -EINVAL;
f2fs_warn(sbi, "disabling checkpoint not compatible with read-only");
@@ -541,6 +541,7 @@ void kthread_bind_mask(struct task_struct *p, const struct cpumask *mask)
{
__kthread_bind_mask(p, mask, TASK_UNINTERRUPTIBLE);
}
+EXPORT_SYMBOL_GPL(kthread_bind_mask);
/**
* kthread_bind - bind a just-created kthread to a cpu.