* [PATCH v3 0/2] block: enable per-cpu bio cache by default
@ 2025-11-14 9:21 Fengnan Chang
2025-11-14 9:21 ` [PATCH v3 1/2] block: use bio_alloc_bioset for passthru IO " Fengnan Chang
` (3 more replies)
0 siblings, 4 replies; 5+ messages in thread
From: Fengnan Chang @ 2025-11-14 9:21 UTC (permalink / raw)
To: axboe, viro, brauner, jack, asml.silence, willy, djwong, hch,
ritesh.list, linux-fsdevel, io-uring, linux-xfs, linux-ext4,
linux-block, ming.lei, linux-nvme
Cc: Fengnan Chang
For now, per-cpu bio cache was only used in the io_uring + raw block
device, filesystem also can use this to improve performance.
After discussion in [1], we think it's better to enable per-cpu bio cache
by default.
v3:
fix some build warnings.
v2:
enable per-cpu bio cache for passthru IO by default.
v1:
https://lore.kernel.org/linux-fsdevel/CAPFOzZs5mJ9Ts+TYkhioO8aAYfzevcgw7O3hjexFNb_tM+kEZA@mail.gmail.com/
[1] https://lore.kernel.org/linux-fsdevel/c4bc7c33-b1e1-47d1-9d22-b189c86c6c7d@gmail.com/
Fengnan Chang (2):
block: use bio_alloc_bioset for passthru IO by default
block: enable per-cpu bio cache by default
block/bio.c | 26 ++++++-----
block/blk-map.c | 90 ++++++++++++++++-----------------------
block/fops.c | 4 --
drivers/nvme/host/ioctl.c | 2 +-
include/linux/fs.h | 3 --
io_uring/rw.c | 1 -
6 files changed, 49 insertions(+), 77 deletions(-)
base-commit: 4a0c9b3391999818e2c5b93719699b255be1f682
--
2.39.5 (Apple Git-154)
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH v3 1/2] block: use bio_alloc_bioset for passthru IO by default
2025-11-14 9:21 [PATCH v3 0/2] block: enable per-cpu bio cache by default Fengnan Chang
@ 2025-11-14 9:21 ` Fengnan Chang
2025-11-14 9:21 ` [PATCH v3 2/2] block: enable per-cpu bio cache " Fengnan Chang
` (2 subsequent siblings)
3 siblings, 0 replies; 5+ messages in thread
From: Fengnan Chang @ 2025-11-14 9:21 UTC (permalink / raw)
To: axboe, viro, brauner, jack, asml.silence, willy, djwong, hch,
ritesh.list, linux-fsdevel, io-uring, linux-xfs, linux-ext4,
linux-block, ming.lei, linux-nvme
Cc: Fengnan Chang
Use bio_alloc_bioset for passthru IO by default, so that we can enable
bio cache for irq and polled passthru IO in later.
Signed-off-by: Fengnan Chang <changfengnan@bytedance.com>
---
block/blk-map.c | 90 ++++++++++++++++-----------------------
drivers/nvme/host/ioctl.c | 2 +-
2 files changed, 37 insertions(+), 55 deletions(-)
diff --git a/block/blk-map.c b/block/blk-map.c
index 60faf036fb6e..9e45cb142d85 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -37,6 +37,25 @@ static struct bio_map_data *bio_alloc_map_data(struct iov_iter *data,
return bmd;
}
+static inline void blk_mq_map_bio_put(struct bio *bio)
+{
+ bio_put(bio);
+}
+
+static struct bio *blk_rq_map_bio_alloc(struct request *rq,
+ unsigned int nr_vecs, gfp_t gfp_mask)
+{
+ struct block_device *bdev = rq->q->disk ? rq->q->disk->part0 : NULL;
+ struct bio *bio;
+
+ bio = bio_alloc_bioset(bdev, nr_vecs, rq->cmd_flags, gfp_mask,
+ &fs_bio_set);
+ if (!bio)
+ return NULL;
+
+ return bio;
+}
+
/**
* bio_copy_from_iter - copy all pages from iov_iter to bio
* @bio: The &struct bio which describes the I/O as destination
@@ -154,10 +173,9 @@ static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data,
nr_pages = bio_max_segs(DIV_ROUND_UP(offset + len, PAGE_SIZE));
ret = -ENOMEM;
- bio = bio_kmalloc(nr_pages, gfp_mask);
+ bio = blk_rq_map_bio_alloc(rq, nr_pages, gfp_mask);
if (!bio)
goto out_bmd;
- bio_init_inline(bio, NULL, nr_pages, req_op(rq));
if (map_data) {
nr_pages = 1U << map_data->page_order;
@@ -233,43 +251,12 @@ static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data,
cleanup:
if (!map_data)
bio_free_pages(bio);
- bio_uninit(bio);
- kfree(bio);
+ blk_mq_map_bio_put(bio);
out_bmd:
kfree(bmd);
return ret;
}
-static void blk_mq_map_bio_put(struct bio *bio)
-{
- if (bio->bi_opf & REQ_ALLOC_CACHE) {
- bio_put(bio);
- } else {
- bio_uninit(bio);
- kfree(bio);
- }
-}
-
-static struct bio *blk_rq_map_bio_alloc(struct request *rq,
- unsigned int nr_vecs, gfp_t gfp_mask)
-{
- struct block_device *bdev = rq->q->disk ? rq->q->disk->part0 : NULL;
- struct bio *bio;
-
- if (rq->cmd_flags & REQ_ALLOC_CACHE && (nr_vecs <= BIO_INLINE_VECS)) {
- bio = bio_alloc_bioset(bdev, nr_vecs, rq->cmd_flags, gfp_mask,
- &fs_bio_set);
- if (!bio)
- return NULL;
- } else {
- bio = bio_kmalloc(nr_vecs, gfp_mask);
- if (!bio)
- return NULL;
- bio_init_inline(bio, bdev, nr_vecs, req_op(rq));
- }
- return bio;
-}
-
static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
gfp_t gfp_mask)
{
@@ -318,25 +305,23 @@ static void bio_invalidate_vmalloc_pages(struct bio *bio)
static void bio_map_kern_endio(struct bio *bio)
{
bio_invalidate_vmalloc_pages(bio);
- bio_uninit(bio);
- kfree(bio);
+ blk_mq_map_bio_put(bio);
}
-static struct bio *bio_map_kern(void *data, unsigned int len, enum req_op op,
+static struct bio *bio_map_kern(struct request *rq, void *data, unsigned int len,
gfp_t gfp_mask)
{
unsigned int nr_vecs = bio_add_max_vecs(data, len);
struct bio *bio;
- bio = bio_kmalloc(nr_vecs, gfp_mask);
+ bio = blk_rq_map_bio_alloc(rq, nr_vecs, gfp_mask);
if (!bio)
return ERR_PTR(-ENOMEM);
- bio_init_inline(bio, NULL, nr_vecs, op);
+
if (is_vmalloc_addr(data)) {
bio->bi_private = data;
if (!bio_add_vmalloc(bio, data, len)) {
- bio_uninit(bio);
- kfree(bio);
+ blk_mq_map_bio_put(bio);
return ERR_PTR(-EINVAL);
}
} else {
@@ -349,8 +334,7 @@ static struct bio *bio_map_kern(void *data, unsigned int len, enum req_op op,
static void bio_copy_kern_endio(struct bio *bio)
{
bio_free_pages(bio);
- bio_uninit(bio);
- kfree(bio);
+ blk_mq_map_bio_put(bio);
}
static void bio_copy_kern_endio_read(struct bio *bio)
@@ -369,6 +353,7 @@ static void bio_copy_kern_endio_read(struct bio *bio)
/**
* bio_copy_kern - copy kernel address into bio
+ * @rq: request to fill
* @data: pointer to buffer to copy
* @len: length in bytes
* @op: bio/request operation
@@ -377,9 +362,10 @@ static void bio_copy_kern_endio_read(struct bio *bio)
* copy the kernel address into a bio suitable for io to a block
* device. Returns an error pointer in case of error.
*/
-static struct bio *bio_copy_kern(void *data, unsigned int len, enum req_op op,
+static struct bio *bio_copy_kern(struct request *rq, void *data, unsigned int len,
gfp_t gfp_mask)
{
+ enum req_op op = req_op(rq);
unsigned long kaddr = (unsigned long)data;
unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
unsigned long start = kaddr >> PAGE_SHIFT;
@@ -394,10 +380,9 @@ static struct bio *bio_copy_kern(void *data, unsigned int len, enum req_op op,
return ERR_PTR(-EINVAL);
nr_pages = end - start;
- bio = bio_kmalloc(nr_pages, gfp_mask);
+ bio = blk_rq_map_bio_alloc(rq, nr_pages, gfp_mask);
if (!bio)
return ERR_PTR(-ENOMEM);
- bio_init_inline(bio, NULL, nr_pages, op);
while (len) {
struct page *page;
@@ -431,8 +416,7 @@ static struct bio *bio_copy_kern(void *data, unsigned int len, enum req_op op,
cleanup:
bio_free_pages(bio);
- bio_uninit(bio);
- kfree(bio);
+ blk_mq_map_bio_put(bio);
return ERR_PTR(-ENOMEM);
}
@@ -676,18 +660,16 @@ int blk_rq_map_kern(struct request *rq, void *kbuf, unsigned int len,
return -EINVAL;
if (!blk_rq_aligned(rq->q, addr, len) || object_is_on_stack(kbuf))
- bio = bio_copy_kern(kbuf, len, req_op(rq), gfp_mask);
+ bio = bio_copy_kern(rq, kbuf, len, gfp_mask);
else
- bio = bio_map_kern(kbuf, len, req_op(rq), gfp_mask);
+ bio = bio_map_kern(rq, kbuf, len, gfp_mask);
if (IS_ERR(bio))
return PTR_ERR(bio);
ret = blk_rq_append_bio(rq, bio);
- if (unlikely(ret)) {
- bio_uninit(bio);
- kfree(bio);
- }
+ if (unlikely(ret))
+ blk_mq_map_bio_put(bio);
return ret;
}
EXPORT_SYMBOL(blk_rq_map_kern);
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index c212fa952c0f..9c0d7b1618ce 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -446,7 +446,7 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
struct iov_iter iter;
struct iov_iter *map_iter = NULL;
struct request *req;
- blk_opf_t rq_flags = REQ_ALLOC_CACHE;
+ blk_opf_t rq_flags = 0;
blk_mq_req_flags_t blk_flags = 0;
int ret;
--
2.39.5 (Apple Git-154)
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH v3 2/2] block: enable per-cpu bio cache by default
2025-11-14 9:21 [PATCH v3 0/2] block: enable per-cpu bio cache by default Fengnan Chang
2025-11-14 9:21 ` [PATCH v3 1/2] block: use bio_alloc_bioset for passthru IO " Fengnan Chang
@ 2025-11-14 9:21 ` Fengnan Chang
2025-12-03 8:31 ` [PATCH v3 0/2] " changfengnan
2025-12-03 14:53 ` Jens Axboe
3 siblings, 0 replies; 5+ messages in thread
From: Fengnan Chang @ 2025-11-14 9:21 UTC (permalink / raw)
To: axboe, viro, brauner, jack, asml.silence, willy, djwong, hch,
ritesh.list, linux-fsdevel, io-uring, linux-xfs, linux-ext4,
linux-block, ming.lei, linux-nvme
Cc: Fengnan Chang
Since after commit 12e4e8c7ab59 ("io_uring/rw: enable bio caches for
IRQ rw"), bio_put is safe for task and irq context, bio_alloc_bioset is
safe for task context and no one calls in irq context, so we can enable
per cpu bio cache by default.
Benchmarked with t/io_uring and ext4+nvme:
taskset -c 6 /root/fio/t/io_uring -p0 -d128 -b4096 -s1 -c1 -F1 -B1 -R1
-X1 -n1 -P1 /mnt/testfile
base IOPS is 562K, patch IOPS is 574K. The CPU usage of bio_alloc_bioset
decrease from 1.42% to 1.22%.
The worst case is allocate bio in CPU A but free in CPU B, still use
t/io_uring and ext4+nvme:
base IOPS is 648K, patch IOPS is 647K.
Also use fio test ext4/xfs with libaio/sync/io_uring on null_blk and
nvme, no obvious performance regression.
Signed-off-by: Fengnan Chang <changfengnan@bytedance.com>
---
block/bio.c | 26 ++++++++++++--------------
block/fops.c | 4 ----
include/linux/fs.h | 3 ---
io_uring/rw.c | 1 -
4 files changed, 12 insertions(+), 22 deletions(-)
diff --git a/block/bio.c b/block/bio.c
index b3a79285c278..64a1599a5930 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -516,20 +516,18 @@ struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs,
if (WARN_ON_ONCE(!mempool_initialized(&bs->bvec_pool) && nr_vecs > 0))
return NULL;
- if (opf & REQ_ALLOC_CACHE) {
- if (bs->cache && nr_vecs <= BIO_INLINE_VECS) {
- bio = bio_alloc_percpu_cache(bdev, nr_vecs, opf,
- gfp_mask, bs);
- if (bio)
- return bio;
- /*
- * No cached bio available, bio returned below marked with
- * REQ_ALLOC_CACHE to particpate in per-cpu alloc cache.
- */
- } else {
- opf &= ~REQ_ALLOC_CACHE;
- }
- }
+ if (bs->cache && nr_vecs <= BIO_INLINE_VECS) {
+ opf |= REQ_ALLOC_CACHE;
+ bio = bio_alloc_percpu_cache(bdev, nr_vecs, opf,
+ gfp_mask, bs);
+ if (bio)
+ return bio;
+ /*
+ * No cached bio available, bio returned below marked with
+ * REQ_ALLOC_CACHE to participate in per-cpu alloc cache.
+ */
+ } else
+ opf &= ~REQ_ALLOC_CACHE;
/*
* submit_bio_noacct() converts recursion to iteration; this means if
diff --git a/block/fops.c b/block/fops.c
index 5e3db9fead77..7ef2848244b1 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -184,8 +184,6 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
loff_t pos = iocb->ki_pos;
int ret = 0;
- if (iocb->ki_flags & IOCB_ALLOC_CACHE)
- opf |= REQ_ALLOC_CACHE;
bio = bio_alloc_bioset(bdev, nr_pages, opf, GFP_KERNEL,
&blkdev_dio_pool);
dio = container_of(bio, struct blkdev_dio, bio);
@@ -333,8 +331,6 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
loff_t pos = iocb->ki_pos;
int ret = 0;
- if (iocb->ki_flags & IOCB_ALLOC_CACHE)
- opf |= REQ_ALLOC_CACHE;
bio = bio_alloc_bioset(bdev, nr_pages, opf, GFP_KERNEL,
&blkdev_dio_pool);
dio = container_of(bio, struct blkdev_dio, bio);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c895146c1444..1be899ac8b5a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -365,8 +365,6 @@ struct readahead_control;
/* iocb->ki_waitq is valid */
#define IOCB_WAITQ (1 << 19)
#define IOCB_NOIO (1 << 20)
-/* can use bio alloc cache */
-#define IOCB_ALLOC_CACHE (1 << 21)
/*
* IOCB_DIO_CALLER_COMP can be set by the iocb owner, to indicate that the
* iocb completion can be passed back to the owner for execution from a safe
@@ -399,7 +397,6 @@ struct readahead_control;
{ IOCB_WRITE, "WRITE" }, \
{ IOCB_WAITQ, "WAITQ" }, \
{ IOCB_NOIO, "NOIO" }, \
- { IOCB_ALLOC_CACHE, "ALLOC_CACHE" }, \
{ IOCB_DIO_CALLER_COMP, "CALLER_COMP" }, \
{ IOCB_AIO_RW, "AIO_RW" }, \
{ IOCB_HAS_METADATA, "AIO_HAS_METADATA" }
diff --git a/io_uring/rw.c b/io_uring/rw.c
index 5b2241a5813c..c0c59eb358a8 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -862,7 +862,6 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type)
ret = kiocb_set_rw_flags(kiocb, rw->flags, rw_type);
if (unlikely(ret))
return ret;
- kiocb->ki_flags |= IOCB_ALLOC_CACHE;
/*
* If the file is marked O_NONBLOCK, still allow retry for it if it
--
2.39.5 (Apple Git-154)
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH v3 0/2] block: enable per-cpu bio cache by default
2025-11-14 9:21 [PATCH v3 0/2] block: enable per-cpu bio cache by default Fengnan Chang
2025-11-14 9:21 ` [PATCH v3 1/2] block: use bio_alloc_bioset for passthru IO " Fengnan Chang
2025-11-14 9:21 ` [PATCH v3 2/2] block: enable per-cpu bio cache " Fengnan Chang
@ 2025-12-03 8:31 ` changfengnan
2025-12-03 14:53 ` Jens Axboe
3 siblings, 0 replies; 5+ messages in thread
From: changfengnan @ 2025-12-03 8:31 UTC (permalink / raw)
To: axboe, viro, brauner, jack, asml.silence, willy, djwong, hch,
ritesh.list, linux-fsdevel, io-uring, linux-xfs, linux-ext4,
linux-block, ming.lei, linux-nvme
Ping
> From: "Fengnan Chang"<changfengnan@bytedance.com>
> Date: Fri, Nov 14, 2025, 17:22
> Subject: [PATCH v3 0/2] block: enable per-cpu bio cache by default
> To: <axboe@kernel.dk>, <viro@zeniv.linux.org.uk>, <brauner@kernel.org>, <jack@suse.cz>, <asml.silence@gmail.com>, <willy@infradead.org>, <djwong@kernel.org>, <hch@infradead.org>, <ritesh.list@gmail.com>, <linux-fsdevel@vger.kernel.org>, <io-uring@vger.kernel.org>, <linux-xfs@vger.kernel.org>, <linux-ext4@vger.kernel.org>, <linux-block@vger.kernel.org>, <ming.lei@redhat.com>, <linux-nvme@lists.infradead.org>
> Cc: "Fengnan Chang"<changfengnan@bytedance.com>
> For now, per-cpu bio cache was only used in the io_uring + raw block
> device, filesystem also can use this to improve performance.
> After discussion in [1], we think it's better to enable per-cpu bio cache
> by default.
>
> v3:
> fix some build warnings.
>
> v2:
> enable per-cpu bio cache for passthru IO by default.
>
> v1:
> https://lore.kernel.org/linux-fsdevel/CAPFOzZs5mJ9Ts+TYkhioO8aAYfzevcgw7O3hjexFNb_tM+kEZA@mail.gmail.com/
>
> [1] https://lore.kernel.org/linux-fsdevel/c4bc7c33-b1e1-47d1-9d22-b189c86c6c7d@gmail.com/
>
>
> Fengnan Chang (2):
> block: use bio_alloc_bioset for passthru IO by default
> block: enable per-cpu bio cache by default
>
> block/bio.c | 26 ++++++-----
> block/blk-map.c | 90 ++++++++++++++++-----------------------
> block/fops.c | 4 --
> drivers/nvme/host/ioctl.c | 2 +-
> include/linux/fs.h | 3 --
> io_uring/rw.c | 1 -
> 6 files changed, 49 insertions(+), 77 deletions(-)
>
>
> base-commit: 4a0c9b3391999818e2c5b93719699b255be1f682
> --
> 2.39.5 (Apple Git-154)
>
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH v3 0/2] block: enable per-cpu bio cache by default
2025-11-14 9:21 [PATCH v3 0/2] block: enable per-cpu bio cache by default Fengnan Chang
` (2 preceding siblings ...)
2025-12-03 8:31 ` [PATCH v3 0/2] " changfengnan
@ 2025-12-03 14:53 ` Jens Axboe
3 siblings, 0 replies; 5+ messages in thread
From: Jens Axboe @ 2025-12-03 14:53 UTC (permalink / raw)
To: viro, brauner, jack, asml.silence, willy, djwong, hch,
ritesh.list, linux-fsdevel, io-uring, linux-xfs, linux-ext4,
linux-block, ming.lei, linux-nvme, Fengnan Chang
On Fri, 14 Nov 2025 17:21:47 +0800, Fengnan Chang wrote:
> For now, per-cpu bio cache was only used in the io_uring + raw block
> device, filesystem also can use this to improve performance.
> After discussion in [1], we think it's better to enable per-cpu bio cache
> by default.
>
> v3:
> fix some build warnings.
>
> [...]
Applied, thanks!
[1/2] block: use bio_alloc_bioset for passthru IO by default
commit: a3ed57376382a72838c5a7bb4705bc6c8b8faf21
[2/2] block: enable per-cpu bio cache by default
commit: de4590e1f1838345dfd5c93eda01bcff8890607f
Best regards,
--
Jens Axboe
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2025-12-03 14:53 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-11-14 9:21 [PATCH v3 0/2] block: enable per-cpu bio cache by default Fengnan Chang
2025-11-14 9:21 ` [PATCH v3 1/2] block: use bio_alloc_bioset for passthru IO " Fengnan Chang
2025-11-14 9:21 ` [PATCH v3 2/2] block: enable per-cpu bio cache " Fengnan Chang
2025-12-03 8:31 ` [PATCH v3 0/2] " changfengnan
2025-12-03 14:53 ` Jens Axboe
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox