GNU/Weeb Mailing List <[email protected]>
 help / color / mirror / Atom feed
From: Ammar Faizi <[email protected]>
To: Chris Mason <[email protected]>, Josef Bacik <[email protected]>,
	David Sterba <[email protected]>, Tejun Heo <[email protected]>
Cc: Ammar Faizi <[email protected]>,
	Lai Jiangshan <[email protected]>,
	Filipe Manana <[email protected]>,
	Linux Btrfs Mailing List <[email protected]>,
	Linux Kernel Mailing List <[email protected]>,
	Linux Fsdevel Mailing List <[email protected]>,
	GNU/Weeb Mailing List <[email protected]>
Subject: [RFC PATCH v1 4/6] btrfs: Add wq_cpu_set=%s mount option
Date: Sun, 26 Feb 2023 23:02:57 +0700	[thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>

Btrfs workqueues can slow sensitive user tasks down because they can use
any online CPU to perform heavy workloads on an SMP system. Add a mount
option to isolate the Btrfs workqueues to a set of CPUs. It is helpful
to avoid sensitive user tasks being preempted by Btrfs heavy workqueues.

This option is similar to the taskset bitmask except that the comma
separator is replaced with a dot. The reason for this is that the mount
option parser uses commas to separate mount options.

Signed-off-by: Ammar Faizi <[email protected]>
---
 fs/btrfs/async-thread.c | 51 +++++++++++++++++++++++++++++++++++++++++
 fs/btrfs/async-thread.h |  1 +
 fs/btrfs/disk-io.c      |  3 ++-
 fs/btrfs/fs.h           |  3 +++
 fs/btrfs/super.c        | 44 +++++++++++++++++++++++++++++++++++
 5 files changed, 101 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index aac240430efe1316..445c055304574653 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -11,6 +11,7 @@
 #include <linux/freezer.h>
 #include "async-thread.h"
 #include "ctree.h"
+#include "messages.h"
 
 enum {
 	WORK_DONE_BIT,
@@ -339,3 +340,53 @@ void btrfs_flush_workqueue(struct btrfs_workqueue *wq)
 {
 	flush_workqueue(wq->normal_wq);
 }
+
+static int apply_wq_cpu_set_notice(struct btrfs_fs_info *info,
+				   struct workqueue_struct *wq,
+				   const char *wq_name)
+{
+	const char *mask_str = info->wq_cpu_set->mask_str;
+	int ret;
+
+	ret = set_workqueue_cpumask(wq, info->wq_cpu_set->mask);
+	if (ret) {
+		btrfs_err(info, "failed to set cpu mask for %s wq: %d", wq_name,
+			  ret);
+		return ret;
+	}
+
+	btrfs_info(info, "set cpu mask for %s wq to %s", wq_name, mask_str);
+	return 0;
+}
+
+#define apply_wq_cpu_set(INFO, WQ) \
+	apply_wq_cpu_set_notice(INFO, (INFO)->WQ, # WQ)
+
+#define btrfs_apply_wq_cpu_set(INFO, WQ) \
+	apply_wq_cpu_set_notice(INFO, (INFO)->WQ->normal_wq, # WQ)
+
+
+void btrfs_apply_workqueue_cpu_set(struct btrfs_fs_info *fs_info)
+{
+	if (!btrfs_test_opt(fs_info, WQ_CPU_SET))
+		return;
+
+	btrfs_apply_wq_cpu_set(fs_info, workers);
+	btrfs_apply_wq_cpu_set(fs_info, hipri_workers);
+	btrfs_apply_wq_cpu_set(fs_info, delalloc_workers);
+	btrfs_apply_wq_cpu_set(fs_info, flush_workers);
+	btrfs_apply_wq_cpu_set(fs_info, caching_workers);
+	btrfs_apply_wq_cpu_set(fs_info, fixup_workers);
+	apply_wq_cpu_set(fs_info, endio_workers);
+	apply_wq_cpu_set(fs_info, endio_meta_workers);
+	apply_wq_cpu_set(fs_info, rmw_workers);
+	btrfs_apply_wq_cpu_set(fs_info, endio_write_workers);
+	apply_wq_cpu_set(fs_info, compressed_write_workers);
+	btrfs_apply_wq_cpu_set(fs_info, endio_freespace_worker);
+	btrfs_apply_wq_cpu_set(fs_info, delayed_workers);
+	btrfs_apply_wq_cpu_set(fs_info, qgroup_rescan_workers);
+	apply_wq_cpu_set(fs_info, discard_ctl.discard_workers);
+}
+
+#undef apply_wq_cpu_set
+#undef btrfs_apply_wq_cpu_set
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
index 6e2596ddae1002ab..2b8a76fa75ef9e69 100644
--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -41,5 +41,6 @@ struct btrfs_fs_info * __pure btrfs_work_owner(const struct btrfs_work *work);
 struct btrfs_fs_info * __pure btrfs_workqueue_owner(const struct btrfs_workqueue *wq);
 bool btrfs_workqueue_normal_congested(const struct btrfs_workqueue *wq);
 void btrfs_flush_workqueue(struct btrfs_workqueue *wq);
+void btrfs_apply_workqueue_cpu_set(struct btrfs_fs_info *fs_info);
 
 #endif
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index b53f0e30ce2b3bbb..1bb1db461a30fa71 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1498,6 +1498,7 @@ static void free_global_roots(struct btrfs_fs_info *fs_info)
 
 void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
 {
+	btrfs_destroy_cpu_set(fs_info->wq_cpu_set);
 	percpu_counter_destroy(&fs_info->dirty_metadata_bytes);
 	percpu_counter_destroy(&fs_info->delalloc_bytes);
 	percpu_counter_destroy(&fs_info->ordered_bytes);
@@ -2231,7 +2232,7 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info)
 	      fs_info->discard_ctl.discard_workers)) {
 		return -ENOMEM;
 	}
-
+	btrfs_apply_workqueue_cpu_set(fs_info);
 	return 0;
 }
 
diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h
index cbad856df197ccfd..a8bd1414b2520ea4 100644
--- a/fs/btrfs/fs.h
+++ b/fs/btrfs/fs.h
@@ -177,6 +177,7 @@ enum {
 	BTRFS_MOUNT_IGNOREBADROOTS		= (1UL << 29),
 	BTRFS_MOUNT_IGNOREDATACSUMS		= (1UL << 30),
 	BTRFS_MOUNT_NODISCARD			= (1UL << 31),
+	BTRFS_MOUNT_WQ_CPU_SET			= (1ULL << 32),
 };
 
 /*
@@ -807,6 +808,8 @@ struct btrfs_fs_info {
 	spinlock_t eb_leak_lock;
 	struct list_head allocated_ebs;
 #endif
+
+	struct btrfs_cpu_set *wq_cpu_set;
 };
 
 static inline void btrfs_set_last_root_drop_gen(struct btrfs_fs_info *fs_info,
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 581845bc206ad28b..3e061ec977b014d1 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -139,6 +139,7 @@ enum {
 #ifdef CONFIG_BTRFS_FS_REF_VERIFY
 	Opt_ref_verify,
 #endif
+	Opt_wq_cpu_set,
 	Opt_err,
 };
 
@@ -213,6 +214,7 @@ static const match_table_t tokens = {
 #ifdef CONFIG_BTRFS_FS_REF_VERIFY
 	{Opt_ref_verify, "ref_verify"},
 #endif
+	{Opt_wq_cpu_set, "wq_cpu_set=%s"},
 	{Opt_err, NULL},
 };
 
@@ -298,6 +300,23 @@ static int parse_rescue_options(struct btrfs_fs_info *info, const char *options)
 	return ret;
 }
 
+static int parse_wq_cpu_set(struct btrfs_fs_info *info, const char *mask_str)
+{
+	struct btrfs_cpu_set *cpu_set;
+	int ret;
+
+	ret = btrfs_parse_cpu_set(&cpu_set, mask_str);
+	if (ret) {
+		btrfs_err(info, "failed to parse wq_cpu_set: %d", ret);
+		return ret;
+	}
+
+	info->wq_cpu_set = cpu_set;
+	btrfs_info(info, "using wq_cpu_set=%s", mask_str);
+	btrfs_set_opt(info->mount_opt, WQ_CPU_SET);
+	return 0;
+}
+
 /*
  * Regular mount options parser.  Everything that is needed only when
  * reading in a new superblock is parsed here.
@@ -803,6 +822,11 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
 			btrfs_set_opt(info->mount_opt, REF_VERIFY);
 			break;
 #endif
+		case Opt_wq_cpu_set:
+			ret = parse_wq_cpu_set(info, args[0].from);
+			if (ret < 0)
+				goto out;
+			break;
 		case Opt_err:
 			btrfs_err(info, "unrecognized mount option '%s'", p);
 			ret = -EINVAL;
@@ -1319,6 +1343,8 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
 #endif
 	if (btrfs_test_opt(info, REF_VERIFY))
 		seq_puts(seq, ",ref_verify");
+	if (btrfs_test_opt(info, WQ_CPU_SET))
+		seq_printf(seq, ",wq_cpu_set=%s", info->wq_cpu_set->mask_str);
 	seq_printf(seq, ",subvolid=%llu",
 		  BTRFS_I(d_inode(dentry))->root->root_key.objectid);
 	subvol_name = btrfs_get_subvol_name_from_objectid(info,
@@ -1686,6 +1712,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
 	u64 old_max_inline = fs_info->max_inline;
 	u32 old_thread_pool_size = fs_info->thread_pool_size;
 	u32 old_metadata_ratio = fs_info->metadata_ratio;
+	struct btrfs_cpu_set *old_wq_cpu_set = fs_info->wq_cpu_set;
 	int ret;
 
 	sync_filesystem(sb);
@@ -1838,6 +1865,14 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
 		set_bit(BTRFS_FS_OPEN, &fs_info->flags);
 	}
 out:
+	/*
+	 * The remount operation changes the wq_cpu_set.
+	 */
+	if (fs_info->wq_cpu_set != old_wq_cpu_set) {
+		btrfs_destroy_cpu_set(old_wq_cpu_set);
+		btrfs_apply_workqueue_cpu_set(fs_info);
+	}
+
 	/*
 	 * We need to set SB_I_VERSION here otherwise it'll get cleared by VFS,
 	 * since the absence of the flag means it can be toggled off by remount.
@@ -1852,6 +1887,15 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
 	return 0;
 
 restore:
+	/*
+	 * The remount operation changes the wq_cpu_set, but we hit an error,
+	 * destroy the new value and roll it back to the previous value.
+	 */
+	if (fs_info->wq_cpu_set != old_wq_cpu_set) {
+		btrfs_destroy_cpu_set(fs_info->wq_cpu_set);
+		fs_info->wq_cpu_set = old_wq_cpu_set;
+	}
+
 	/* We've hit an error - don't reset SB_RDONLY */
 	if (sb_rdonly(sb))
 		old_flags |= SB_RDONLY;
-- 
Ammar Faizi


  parent reply	other threads:[~2023-02-26 16:03 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-02-26 16:02 [RFC PATCH v1 0/6] Introducing `wq_cpu_set` mount option for btrfs Ammar Faizi
2023-02-26 16:02 ` [RFC PATCH v1 1/6] workqueue: Add set_workqueue_cpumask() helper function Ammar Faizi
2023-02-26 16:02 ` [RFC PATCH v1 2/6] btrfs: Change `mount_opt` type in `struct btrfs_fs_info` to `u64` Ammar Faizi
2023-02-26 16:02 ` [RFC PATCH v1 3/6] btrfs: Create btrfs CPU set struct and helpers Ammar Faizi
2023-02-26 16:02 ` Ammar Faizi [this message]
2023-02-26 16:02 ` [RFC PATCH v1 5/6] btrfs: Adjust the default thread pool size when `wq_cpu_set` option is used Ammar Faizi
2023-02-26 16:02 ` [RFC PATCH v1 6/6] btrfs: Add `BTRFS_DEFAULT_MAX_THREAD_POOL_SIZE` macro Ammar Faizi
2023-02-26 17:01 ` [RFC PATCH v1 0/6] Introducing `wq_cpu_set` mount option for btrfs Tejun Heo
2023-02-26 18:26   ` Ammar Faizi
2023-02-26 18:29     ` Ammar Faizi
2023-02-27 10:18 ` Qu Wenruo
2023-02-27 13:42   ` Ammar Faizi
2023-02-27 23:49     ` Qu Wenruo
2023-02-27 11:02 ` Filipe Manana
     [not found]   ` <[email protected]>
2023-02-27 13:45     ` Ammar Faizi
2023-02-27 16:24       ` Roman Mamedov
2023-02-27 22:17 ` Dave Chinner
2023-02-28  8:01   ` Ammar Faizi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox