From: Christoph Hellwig <[email protected]>
To: Jens Axboe <[email protected]>
Cc: Christian Brauner <[email protected]>,
Keith Busch <[email protected]>, Sagi Grimberg <[email protected]>,
Kanchan Joshi <[email protected]>,
Hui Qi <[email protected]>,
Nitesh Shetty <[email protected]>, Jan Kara <[email protected]>,
Pavel Begunkov <[email protected]>,
[email protected], [email protected],
[email protected], [email protected],
[email protected]
Subject: [PATCH 15/15] RFC: block: allow write streams on partitions
Date: Tue, 19 Nov 2024 13:16:29 +0100 [thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>
By default assign all write streams to partition 1, and add a hack
sysfs files that distributes them all equally.
This is implemented by storing the number of per-partition write
streams in struct block device, as well as the offset to the global
ones, and then remapping the write streams in the I/O submission
path.
The sysfs is hacky and undocumented, better suggestions welcome
from actual users of write stream on partitions.
Signed-off-by: Christoph Hellwig <[email protected]>
---
block/bdev.c | 9 +++++++
block/blk-core.c | 2 ++
block/genhd.c | 52 +++++++++++++++++++++++++++++++++++++++
block/partitions/core.c | 6 +++--
include/linux/blk_types.h | 7 ++++++
include/linux/blkdev.h | 2 +-
6 files changed, 75 insertions(+), 3 deletions(-)
diff --git a/block/bdev.c b/block/bdev.c
index c23245f1fdfe..f3549a8cdb3f 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -440,6 +440,15 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
return NULL;
}
bdev->bd_disk = disk;
+
+ /*
+ * Assign all write streams to the first partition by default.
+ */
+ if (partno == 1) {
+ bdev->bd_part_write_stream_start = 0;
+ bdev->bd_part_write_streams = bdev_max_write_streams(bdev);
+ }
+
return bdev;
}
diff --git a/block/blk-core.c b/block/blk-core.c
index 666efe8fa202..9654937f9b2d 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -574,6 +574,8 @@ static int blk_partition_remap(struct bio *bio)
return -EIO;
if (bio_sectors(bio)) {
bio->bi_iter.bi_sector += p->bd_start_sect;
+ if (bio->bi_write_stream)
+ bio->bi_write_stream += p->bd_part_write_stream_start;
trace_block_bio_remap(bio, p->bd_dev,
bio->bi_iter.bi_sector -
p->bd_start_sect);
diff --git a/block/genhd.c b/block/genhd.c
index 79230c109fca..3156c70522b6 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1070,6 +1070,54 @@ static ssize_t partscan_show(struct device *dev,
return sysfs_emit(buf, "%u\n", disk_has_partscan(dev_to_disk(dev)));
}
+static ssize_t disk_distribute_write_streams_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ /* Anything useful to show here like the ranges? */
+ return sysfs_emit(buf, "0\n");
+}
+
+static ssize_t disk_distribute_write_streams_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct gendisk *disk = dev_to_disk(dev);
+ struct block_device *bdev = disk->part0, *part;
+ unsigned short total_write_streams =
+ disk->queue->limits.max_write_streams;
+ unsigned short part_write_streams, part_write_stream_start = 0;
+ unsigned long nr_partitions = 0, idx;
+ int error = 0;
+
+ if (!total_write_streams)
+ return -EINVAL;
+
+ mutex_lock(&disk->open_mutex);
+ if (atomic_read(&bdev->bd_openers)) {
+ error = -EBUSY;
+ goto out_unlock;
+ }
+
+ xa_for_each_start(&disk->part_tbl, idx, part, 1)
+ nr_partitions++;
+ if (!nr_partitions)
+ goto out_unlock;
+
+ part_write_streams = total_write_streams / nr_partitions;
+ xa_for_each_start(&disk->part_tbl, idx, part, 1) {
+ part->bd_part_write_streams = part_write_streams;
+ part->bd_part_write_stream_start = part_write_stream_start;
+ part_write_stream_start += part_write_streams;
+ dev_info(dev,
+ "assigning %u write streams at %u to partition %lu\n",
+ part_write_streams, part_write_stream_start, idx - 1);
+ }
+out_unlock:
+ mutex_unlock(&disk->open_mutex);
+ if (error)
+ return error;
+ return count;
+}
+
static DEVICE_ATTR(range, 0444, disk_range_show, NULL);
static DEVICE_ATTR(ext_range, 0444, disk_ext_range_show, NULL);
static DEVICE_ATTR(removable, 0444, disk_removable_show, NULL);
@@ -1084,6 +1132,9 @@ static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL);
static DEVICE_ATTR(badblocks, 0644, disk_badblocks_show, disk_badblocks_store);
static DEVICE_ATTR(diskseq, 0444, diskseq_show, NULL);
static DEVICE_ATTR(partscan, 0444, partscan_show, NULL);
+static DEVICE_ATTR(distribute_write_streams, 0644,
+ disk_distribute_write_streams_show,
+ disk_distribute_write_streams_store);
#ifdef CONFIG_FAIL_MAKE_REQUEST
ssize_t part_fail_show(struct device *dev,
@@ -1135,6 +1186,7 @@ static struct attribute *disk_attrs[] = {
&dev_attr_events_poll_msecs.attr,
&dev_attr_diskseq.attr,
&dev_attr_partscan.attr,
+ &dev_attr_distribute_write_streams.attr,
#ifdef CONFIG_FAIL_MAKE_REQUEST
&dev_attr_fail.attr,
#endif
diff --git a/block/partitions/core.c b/block/partitions/core.c
index 815ed33caa1b..a27dbb5589ce 100644
--- a/block/partitions/core.c
+++ b/block/partitions/core.c
@@ -245,8 +245,10 @@ static const struct attribute_group *part_attr_groups[] = {
static void part_release(struct device *dev)
{
- put_disk(dev_to_bdev(dev)->bd_disk);
- bdev_drop(dev_to_bdev(dev));
+ struct block_device *part = dev_to_bdev(dev);
+
+ put_disk(part->bd_disk);
+ bdev_drop(part);
}
static int part_uevent(const struct device *dev, struct kobj_uevent_env *env)
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 4ca3449ce9c9..02a3d58e814f 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -74,6 +74,13 @@ struct block_device {
#ifdef CONFIG_SECURITY
void *bd_security;
#endif
+
+ /*
+ * Allow assigning write streams to partitions.
+ */
+ unsigned short bd_part_write_streams;
+ unsigned short bd_part_write_stream_start;
+
/*
* keep this out-of-line as it's both big and not needed in the fast
* path
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 9fda66530d9a..bb0921e642fb 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1242,7 +1242,7 @@ static inline unsigned int bdev_max_segments(struct block_device *bdev)
static inline unsigned short bdev_max_write_streams(struct block_device *bdev)
{
if (bdev_is_partition(bdev))
- return 0;
+ return bdev->bd_part_write_streams;
return bdev_limits(bdev)->max_write_streams;
}
--
2.45.2
prev parent reply other threads:[~2024-11-19 12:17 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-11-19 12:16 support block layer write streams and FDP Christoph Hellwig
2024-11-19 12:16 ` [PATCH 01/15] fs: add write stream information to statx Christoph Hellwig
2024-11-19 12:16 ` [PATCH 02/15] fs: add a write stream field to the kiocb Christoph Hellwig
2024-11-19 12:16 ` [PATCH 03/15] io_uring: enable passing a per-io write stream Christoph Hellwig
2024-11-19 12:16 ` [PATCH 04/15] block: don't bother checking the data direction for merges Christoph Hellwig
2024-11-19 12:16 ` [PATCH 05/15] block: req->bio is always set in the merge code Christoph Hellwig
2024-11-19 12:16 ` [PATCH 06/15] block: add a bi_write_stream field Christoph Hellwig
2024-11-19 12:16 ` [PATCH 07/15] block: introduce max_write_streams queue limit Christoph Hellwig
2024-11-19 12:16 ` [PATCH 08/15] block: introduce a write_stream_granularity " Christoph Hellwig
2024-11-19 12:16 ` [PATCH 09/15] block: expose write streams for block device nodes Christoph Hellwig
2024-11-19 12:16 ` [PATCH 10/15] nvme: store the endurance group id in struct nvme_ns_head Christoph Hellwig
2024-11-19 12:16 ` [PATCH 11/15] nvme: pass a void pointer to nvme_get/set_features for the result Christoph Hellwig
2024-11-19 12:16 ` [PATCH 12/15] nvme: add a nvme_get_log_lsi helper Christoph Hellwig
2024-11-19 12:16 ` [PATCH 13/15] nvme.h: add FDP definitions Christoph Hellwig
2024-11-19 12:16 ` [PATCH 14/15] nvme: enable FDP support Christoph Hellwig
2024-11-19 18:17 ` Keith Busch
2024-11-19 18:24 ` Christoph Hellwig
2024-11-19 22:49 ` Keith Busch
2024-11-20 6:03 ` Christoph Hellwig
2024-11-19 12:16 ` Christoph Hellwig [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox