From: Jens Axboe <[email protected]>
To: Pavel Begunkov <[email protected]>, [email protected]
Subject: Re: [PATCH 1/6] io_uring: expand main struct io_kiocb flags to 64-bits
Date: Tue, 6 Feb 2024 19:18:47 -0700 [thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>
On 2/6/24 5:43 PM, Pavel Begunkov wrote:
> On 2/6/24 16:22, Jens Axboe wrote:
>> We're out of space here, and none of the flags are easily reclaimable.
>> Bump it to 64-bits and re-arrange the struct a bit to avoid gaps.
>>
>> Add a specific bitwise type for the request flags, io_request_flags_t.
>> This will help catch violations of casting this value to a smaller type
>> on 32-bit archs, like unsigned int.
>>
>> No functional changes intended in this patch.
>>
>> Signed-off-by: Jens Axboe <[email protected]>
>> ---
>> include/linux/io_uring_types.h | 87 ++++++++++++++++++---------------
>> include/trace/events/io_uring.h | 14 +++---
>> io_uring/filetable.h | 2 +-
>> io_uring/io_uring.c | 9 ++--
>> 4 files changed, 60 insertions(+), 52 deletions(-)
>>
>> diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
>> index 854ad67a5f70..5ac18b05d4ee 100644
>> --- a/include/linux/io_uring_types.h
>> +++ b/include/linux/io_uring_types.h
>> @@ -428,7 +428,7 @@ struct io_tw_state {
>> bool locked;
>> };
>> -enum {
>> +enum io_req_flags {
>> REQ_F_FIXED_FILE_BIT = IOSQE_FIXED_FILE_BIT,
>> REQ_F_IO_DRAIN_BIT = IOSQE_IO_DRAIN_BIT,
>> REQ_F_LINK_BIT = IOSQE_IO_LINK_BIT,
>> @@ -468,70 +468,73 @@ enum {
>> __REQ_F_LAST_BIT,
>> };
>> +typedef enum io_req_flags __bitwise io_req_flags_t;
>> +#define IO_REQ_FLAG(bitno) ((__force io_req_flags_t) BIT_ULL((bitno)))
>> +
>> enum {
>> /* ctx owns file */
>> - REQ_F_FIXED_FILE = BIT(REQ_F_FIXED_FILE_BIT),
>> + REQ_F_FIXED_FILE = IO_REQ_FLAG(REQ_F_FIXED_FILE_BIT),
>> /* drain existing IO first */
>> - REQ_F_IO_DRAIN = BIT(REQ_F_IO_DRAIN_BIT),
>> + REQ_F_IO_DRAIN = IO_REQ_FLAG(REQ_F_IO_DRAIN_BIT),
>> /* linked sqes */
>> - REQ_F_LINK = BIT(REQ_F_LINK_BIT),
>> + REQ_F_LINK = IO_REQ_FLAG(REQ_F_LINK_BIT),
>> /* doesn't sever on completion < 0 */
>> - REQ_F_HARDLINK = BIT(REQ_F_HARDLINK_BIT),
>> + REQ_F_HARDLINK = IO_REQ_FLAG(REQ_F_HARDLINK_BIT),
>> /* IOSQE_ASYNC */
>> - REQ_F_FORCE_ASYNC = BIT(REQ_F_FORCE_ASYNC_BIT),
>> + REQ_F_FORCE_ASYNC = IO_REQ_FLAG(REQ_F_FORCE_ASYNC_BIT),
>> /* IOSQE_BUFFER_SELECT */
>> - REQ_F_BUFFER_SELECT = BIT(REQ_F_BUFFER_SELECT_BIT),
>> + REQ_F_BUFFER_SELECT = IO_REQ_FLAG(REQ_F_BUFFER_SELECT_BIT),
>> /* IOSQE_CQE_SKIP_SUCCESS */
>> - REQ_F_CQE_SKIP = BIT(REQ_F_CQE_SKIP_BIT),
>> + REQ_F_CQE_SKIP = IO_REQ_FLAG(REQ_F_CQE_SKIP_BIT),
>> /* fail rest of links */
>> - REQ_F_FAIL = BIT(REQ_F_FAIL_BIT),
>> + REQ_F_FAIL = IO_REQ_FLAG(REQ_F_FAIL_BIT),
>> /* on inflight list, should be cancelled and waited on exit reliably */
>> - REQ_F_INFLIGHT = BIT(REQ_F_INFLIGHT_BIT),
>> + REQ_F_INFLIGHT = IO_REQ_FLAG(REQ_F_INFLIGHT_BIT),
>> /* read/write uses file position */
>> - REQ_F_CUR_POS = BIT(REQ_F_CUR_POS_BIT),
>> + REQ_F_CUR_POS = IO_REQ_FLAG(REQ_F_CUR_POS_BIT),
>> /* must not punt to workers */
>> - REQ_F_NOWAIT = BIT(REQ_F_NOWAIT_BIT),
>> + REQ_F_NOWAIT = IO_REQ_FLAG(REQ_F_NOWAIT_BIT),
>> /* has or had linked timeout */
>> - REQ_F_LINK_TIMEOUT = BIT(REQ_F_LINK_TIMEOUT_BIT),
>> + REQ_F_LINK_TIMEOUT = IO_REQ_FLAG(REQ_F_LINK_TIMEOUT_BIT),
>> /* needs cleanup */
>> - REQ_F_NEED_CLEANUP = BIT(REQ_F_NEED_CLEANUP_BIT),
>> + REQ_F_NEED_CLEANUP = IO_REQ_FLAG(REQ_F_NEED_CLEANUP_BIT),
>> /* already went through poll handler */
>> - REQ_F_POLLED = BIT(REQ_F_POLLED_BIT),
>> + REQ_F_POLLED = IO_REQ_FLAG(REQ_F_POLLED_BIT),
>> /* buffer already selected */
>> - REQ_F_BUFFER_SELECTED = BIT(REQ_F_BUFFER_SELECTED_BIT),
>> + REQ_F_BUFFER_SELECTED = IO_REQ_FLAG(REQ_F_BUFFER_SELECTED_BIT),
>> /* buffer selected from ring, needs commit */
>> - REQ_F_BUFFER_RING = BIT(REQ_F_BUFFER_RING_BIT),
>> + REQ_F_BUFFER_RING = IO_REQ_FLAG(REQ_F_BUFFER_RING_BIT),
>> /* caller should reissue async */
>> - REQ_F_REISSUE = BIT(REQ_F_REISSUE_BIT),
>> + REQ_F_REISSUE = IO_REQ_FLAG(REQ_F_REISSUE_BIT),
>> /* supports async reads/writes */
>> - REQ_F_SUPPORT_NOWAIT = BIT(REQ_F_SUPPORT_NOWAIT_BIT),
>> + REQ_F_SUPPORT_NOWAIT = IO_REQ_FLAG(REQ_F_SUPPORT_NOWAIT_BIT),
>> /* regular file */
>> - REQ_F_ISREG = BIT(REQ_F_ISREG_BIT),
>> + REQ_F_ISREG = IO_REQ_FLAG(REQ_F_ISREG_BIT),
>> /* has creds assigned */
>> - REQ_F_CREDS = BIT(REQ_F_CREDS_BIT),
>> + REQ_F_CREDS = IO_REQ_FLAG(REQ_F_CREDS_BIT),
>> /* skip refcounting if not set */
>> - REQ_F_REFCOUNT = BIT(REQ_F_REFCOUNT_BIT),
>> + REQ_F_REFCOUNT = IO_REQ_FLAG(REQ_F_REFCOUNT_BIT),
>> /* there is a linked timeout that has to be armed */
>> - REQ_F_ARM_LTIMEOUT = BIT(REQ_F_ARM_LTIMEOUT_BIT),
>> + REQ_F_ARM_LTIMEOUT = IO_REQ_FLAG(REQ_F_ARM_LTIMEOUT_BIT),
>> /* ->async_data allocated */
>> - REQ_F_ASYNC_DATA = BIT(REQ_F_ASYNC_DATA_BIT),
>> + REQ_F_ASYNC_DATA = IO_REQ_FLAG(REQ_F_ASYNC_DATA_BIT),
>> /* don't post CQEs while failing linked requests */
>> - REQ_F_SKIP_LINK_CQES = BIT(REQ_F_SKIP_LINK_CQES_BIT),
>> + REQ_F_SKIP_LINK_CQES = IO_REQ_FLAG(REQ_F_SKIP_LINK_CQES_BIT),
>> /* single poll may be active */
>> - REQ_F_SINGLE_POLL = BIT(REQ_F_SINGLE_POLL_BIT),
>> + REQ_F_SINGLE_POLL = IO_REQ_FLAG(REQ_F_SINGLE_POLL_BIT),
>> /* double poll may active */
>> - REQ_F_DOUBLE_POLL = BIT(REQ_F_DOUBLE_POLL_BIT),
>> + REQ_F_DOUBLE_POLL = IO_REQ_FLAG(REQ_F_DOUBLE_POLL_BIT),
>> /* request has already done partial IO */
>> - REQ_F_PARTIAL_IO = BIT(REQ_F_PARTIAL_IO_BIT),
>> + REQ_F_PARTIAL_IO = IO_REQ_FLAG(REQ_F_PARTIAL_IO_BIT),
>> /* fast poll multishot mode */
>> - REQ_F_APOLL_MULTISHOT = BIT(REQ_F_APOLL_MULTISHOT_BIT),
>> + REQ_F_APOLL_MULTISHOT = IO_REQ_FLAG(REQ_F_APOLL_MULTISHOT_BIT),
>> /* recvmsg special flag, clear EPOLLIN */
>> - REQ_F_CLEAR_POLLIN = BIT(REQ_F_CLEAR_POLLIN_BIT),
>> + REQ_F_CLEAR_POLLIN = IO_REQ_FLAG(REQ_F_CLEAR_POLLIN_BIT),
>> /* hashed into ->cancel_hash_locked, protected by ->uring_lock */
>> - REQ_F_HASH_LOCKED = BIT(REQ_F_HASH_LOCKED_BIT),
>> + REQ_F_HASH_LOCKED = IO_REQ_FLAG(REQ_F_HASH_LOCKED_BIT),
>> /* don't use lazy poll wake for this request */
>> - REQ_F_POLL_NO_LAZY = BIT(REQ_F_POLL_NO_LAZY_BIT),
>> + REQ_F_POLL_NO_LAZY = IO_REQ_FLAG(REQ_F_POLL_NO_LAZY_BIT),
>> };
>> typedef void (*io_req_tw_func_t)(struct io_kiocb *req, struct io_tw_state *ts);
>> @@ -592,15 +595,14 @@ struct io_kiocb {
>> * and after selection it points to the buffer ID itself.
>> */
>> u16 buf_index;
>> - unsigned int flags;
>> - struct io_cqe cqe;
>
> With the current layout the min number of lines we touch per
> request is 2 (including the op specific 64B), that's includes
> setting up cqe at init and using it for completing. Moving cqe
> down makes it 3.
>
>> + atomic_t refs;
>
> We're pulling it refs, which is not touched at all in the hot
> path. Even if there's a hole I'd argue it's better to leave it
> at the end.
>
>> +
>> + io_req_flags_t flags;
>> struct io_ring_ctx *ctx;
>> struct task_struct *task;
>> - struct io_rsrc_node *rsrc_node;
>
> It's used in hot paths, registered buffers/files, would be
> unfortunate to move it to the next line.
Yep I did feel a bit bad about that one... Let me take another stab at
it.
>> -
>> union {
>> /* store used ubuf, so we can prevent reloading */
>> struct io_mapped_ubuf *imu;
>> @@ -615,18 +617,23 @@ struct io_kiocb {
>> struct io_buffer_list *buf_list;
>> };
>> + /* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */
>> + struct hlist_node hash_node;
>> +
>
> And we're pulling hash_node into the hottest line, which is
> used only when we arm a poll and remove poll. So, it's mostly
> for networking, sends wouldn't use it much, and multishots
> wouldn't normally touch it.
>
> As for ideas how to find space:
> 1) iopoll_completed completed can be converted to flags2
That's a good idea, but won't immediately find any space as it'd just
leave a hole anyway. But would be good to note in there perhaps, you
never know when it needs re-arranging again.
> 2) REQ_F_{SINGLE,DOUBLE}_POLL is a weird duplication. Can
> probably be combined into one flag, or removed at all.
> Again, sends are usually not so poll heavy and the hot
> path for recv is multishot.
Normal receive is also a hot path, even if multishot should be preferred
in general. Ditto on non-sockets but still pollable files, doing eg read
for example.
> 3) we can probably move req->task down and replace it with
>
> get_task() {
> if (req->ctx->flags & DEFER_TASKRUN)
> task = ctx->submitter_task;
> else
> task = req->task;
> }
Assuming ctx flags is hot, which is would generally be, that's not a bad
idea at all.
I'll do another loop over this one.
--
Jens Axboe
next prev parent reply other threads:[~2024-02-07 2:18 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-02-06 16:22 [PATCHSET next 0/6] Misc cleanups / optimizations Jens Axboe
2024-02-06 16:22 ` [PATCH 1/6] io_uring: expand main struct io_kiocb flags to 64-bits Jens Axboe
2024-02-06 22:58 ` Jens Axboe
2024-02-07 0:43 ` Pavel Begunkov
2024-02-07 2:18 ` Jens Axboe [this message]
2024-02-07 3:22 ` Pavel Begunkov
2024-02-06 16:22 ` [PATCH 2/6] io_uring: add io_file_can_poll() helper Jens Axboe
2024-02-07 0:57 ` Pavel Begunkov
2024-02-07 2:15 ` Jens Axboe
2024-02-07 3:33 ` Pavel Begunkov
2024-02-06 16:22 ` [PATCH 3/6] io_uring/cancel: don't default to setting req->work.cancel_seq Jens Axboe
2024-02-06 16:22 ` [PATCH 4/6] io_uring: move io_kiocb->nr_tw into comp_list union Jens Axboe
2024-02-06 16:22 ` [PATCH 5/6] io_uring: mark the need to lock/unlock the ring as unlikely Jens Axboe
2024-02-06 16:22 ` [PATCH 6/6] io_uring/rw: remove dead file == NULL check Jens Axboe
-- strict thread matches above, loose matches on Subject: below --
2024-02-07 17:17 [PATCHSET v2 0/6] Misc cleanups / optimizations Jens Axboe
2024-02-07 17:17 ` [PATCH 1/6] io_uring: expand main struct io_kiocb flags to 64-bits Jens Axboe
2024-02-08 20:08 ` Gabriel Krisman Bertazi
2024-02-08 20:22 ` Jens Axboe
2024-02-08 20:52 ` Gabriel Krisman Bertazi
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox