* [PATCHSET 0/2] Add support for IORING_RECVSEND_POLL_FIRST
@ 2022-04-27 1:54 Jens Axboe
2022-04-27 1:54 ` [PATCH 1/2] io_uring: check IOPOLL/ioprio support upfront Jens Axboe
` (3 more replies)
0 siblings, 4 replies; 8+ messages in thread
From: Jens Axboe @ 2022-04-27 1:54 UTC (permalink / raw)
To: io-uring
Hi,
I had a re-think on the flags2 addition [1] that was posted earlier
today, and I don't really like the fact that flags2 then can't work
with ioprio for read/write etc. We might also want to extend the
ioprio field for other types of IO in the future.
So rather than do that, do a simpler approach and just add an io_uring
specific flag set for send/recv and friends. This then allow setting
IORING_RECVSEND_POLL_FIRST in sqe->addr2 for those, and if set, io_uring
will arm poll first rather than attempt a send/recv operation.
[1] https://lore.kernel.org/io-uring/[email protected]/
--
Jens Axboe
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH 1/2] io_uring: check IOPOLL/ioprio support upfront
2022-04-27 1:54 [PATCHSET 0/2] Add support for IORING_RECVSEND_POLL_FIRST Jens Axboe
@ 2022-04-27 1:54 ` Jens Axboe
2022-04-27 1:54 ` [PATCH 2/2] io_uring: add POLL_FIRST support for send/sendmsg and recv/recvmsg Jens Axboe
` (2 subsequent siblings)
3 siblings, 0 replies; 8+ messages in thread
From: Jens Axboe @ 2022-04-27 1:54 UTC (permalink / raw)
To: io-uring; +Cc: Jens Axboe
Don't punt this check to the op prep handlers, add the support to
io_op_defs and we can check them while setting up the request.
This reduces the text size by 500 bytes on aarch64, and makes this less
fragile by having the check in one spot and needing opcodes to opt in
to IOPOLL or ioprio support.
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io_uring.c | 148 ++++++++++++++++++--------------------------------
1 file changed, 52 insertions(+), 96 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index f1a9595ba4c2..39325e469738 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1028,6 +1028,10 @@ struct io_op_def {
unsigned not_supported : 1;
/* skip auditing */
unsigned audit_skip : 1;
+ /* supports ioprio */
+ unsigned ioprio : 1;
+ /* supports iopoll */
+ unsigned iopoll : 1;
/* size of async data needed, if any */
unsigned short async_size;
};
@@ -1042,6 +1046,8 @@ static const struct io_op_def io_op_defs[] = {
.needs_async_setup = 1,
.plug = 1,
.audit_skip = 1,
+ .ioprio = 1,
+ .iopoll = 1,
.async_size = sizeof(struct io_async_rw),
},
[IORING_OP_WRITEV] = {
@@ -1052,6 +1058,8 @@ static const struct io_op_def io_op_defs[] = {
.needs_async_setup = 1,
.plug = 1,
.audit_skip = 1,
+ .ioprio = 1,
+ .iopoll = 1,
.async_size = sizeof(struct io_async_rw),
},
[IORING_OP_FSYNC] = {
@@ -1064,6 +1072,8 @@ static const struct io_op_def io_op_defs[] = {
.pollin = 1,
.plug = 1,
.audit_skip = 1,
+ .ioprio = 1,
+ .iopoll = 1,
.async_size = sizeof(struct io_async_rw),
},
[IORING_OP_WRITE_FIXED] = {
@@ -1073,6 +1083,8 @@ static const struct io_op_def io_op_defs[] = {
.pollout = 1,
.plug = 1,
.audit_skip = 1,
+ .ioprio = 1,
+ .iopoll = 1,
.async_size = sizeof(struct io_async_rw),
},
[IORING_OP_POLL_ADD] = {
@@ -1137,6 +1149,7 @@ static const struct io_op_def io_op_defs[] = {
[IORING_OP_CLOSE] = {},
[IORING_OP_FILES_UPDATE] = {
.audit_skip = 1,
+ .iopoll = 1,
},
[IORING_OP_STATX] = {
.audit_skip = 1,
@@ -1148,6 +1161,8 @@ static const struct io_op_def io_op_defs[] = {
.buffer_select = 1,
.plug = 1,
.audit_skip = 1,
+ .ioprio = 1,
+ .iopoll = 1,
.async_size = sizeof(struct io_async_rw),
},
[IORING_OP_WRITE] = {
@@ -1157,6 +1172,8 @@ static const struct io_op_def io_op_defs[] = {
.pollout = 1,
.plug = 1,
.audit_skip = 1,
+ .ioprio = 1,
+ .iopoll = 1,
.async_size = sizeof(struct io_async_rw),
},
[IORING_OP_FADVISE] = {
@@ -1191,9 +1208,11 @@ static const struct io_op_def io_op_defs[] = {
},
[IORING_OP_PROVIDE_BUFFERS] = {
.audit_skip = 1,
+ .iopoll = 1,
},
[IORING_OP_REMOVE_BUFFERS] = {
.audit_skip = 1,
+ .iopoll = 1,
},
[IORING_OP_TEE] = {
.needs_file = 1,
@@ -1211,6 +1230,7 @@ static const struct io_op_def io_op_defs[] = {
[IORING_OP_LINKAT] = {},
[IORING_OP_MSG_RING] = {
.needs_file = 1,
+ .iopoll = 1,
},
};
@@ -4139,9 +4159,7 @@ static int io_renameat_prep(struct io_kiocb *req,
struct io_rename *ren = &req->rename;
const char __user *oldf, *newf;
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
- if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
+ if (sqe->buf_index || sqe->splice_fd_in)
return -EINVAL;
if (unlikely(req->flags & REQ_F_FIXED_FILE))
return -EBADF;
@@ -4190,10 +4208,7 @@ static int io_unlinkat_prep(struct io_kiocb *req,
struct io_unlink *un = &req->unlink;
const char __user *fname;
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
- if (sqe->ioprio || sqe->off || sqe->len || sqe->buf_index ||
- sqe->splice_fd_in)
+ if (sqe->off || sqe->len || sqe->buf_index || sqe->splice_fd_in)
return -EINVAL;
if (unlikely(req->flags & REQ_F_FIXED_FILE))
return -EBADF;
@@ -4239,10 +4254,7 @@ static int io_mkdirat_prep(struct io_kiocb *req,
struct io_mkdir *mkd = &req->mkdir;
const char __user *fname;
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
- if (sqe->ioprio || sqe->off || sqe->rw_flags || sqe->buf_index ||
- sqe->splice_fd_in)
+ if (sqe->off || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
return -EINVAL;
if (unlikely(req->flags & REQ_F_FIXED_FILE))
return -EBADF;
@@ -4282,10 +4294,7 @@ static int io_symlinkat_prep(struct io_kiocb *req,
struct io_symlink *sl = &req->symlink;
const char __user *oldpath, *newpath;
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
- if (sqe->ioprio || sqe->len || sqe->rw_flags || sqe->buf_index ||
- sqe->splice_fd_in)
+ if (sqe->len || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
return -EINVAL;
if (unlikely(req->flags & REQ_F_FIXED_FILE))
return -EBADF;
@@ -4331,9 +4340,7 @@ static int io_linkat_prep(struct io_kiocb *req,
struct io_hardlink *lnk = &req->hardlink;
const char __user *oldf, *newf;
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
- if (sqe->ioprio || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
+ if (sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
return -EINVAL;
if (unlikely(req->flags & REQ_F_FIXED_FILE))
return -EBADF;
@@ -4380,9 +4387,7 @@ static int io_shutdown_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
#if defined(CONFIG_NET)
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
- if (unlikely(sqe->ioprio || sqe->off || sqe->addr || sqe->rw_flags ||
+ if (unlikely(sqe->off || sqe->addr || sqe->rw_flags ||
sqe->buf_index || sqe->splice_fd_in))
return -EINVAL;
@@ -4422,9 +4427,6 @@ static int __io_splice_prep(struct io_kiocb *req,
struct io_splice *sp = &req->splice;
unsigned int valid_flags = SPLICE_F_FD_IN_FIXED | SPLICE_F_ALL;
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
-
sp->len = READ_ONCE(sqe->len);
sp->flags = READ_ONCE(sqe->splice_flags);
if (unlikely(sp->flags & ~valid_flags))
@@ -4523,11 +4525,6 @@ static int io_splice(struct io_kiocb *req, unsigned int issue_flags)
*/
static int io_nop(struct io_kiocb *req, unsigned int issue_flags)
{
- struct io_ring_ctx *ctx = req->ctx;
-
- if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
-
__io_req_complete(req, issue_flags, 0, 0);
return 0;
}
@@ -4535,8 +4532,8 @@ static int io_nop(struct io_kiocb *req, unsigned int issue_flags)
static int io_msg_ring_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
- if (unlikely(sqe->addr || sqe->ioprio || sqe->rw_flags ||
- sqe->splice_fd_in || sqe->buf_index || sqe->personality))
+ if (unlikely(sqe->addr || sqe->rw_flags || sqe->splice_fd_in ||
+ sqe->buf_index || sqe->personality))
return -EINVAL;
req->msg.user_data = READ_ONCE(sqe->off);
@@ -4577,12 +4574,7 @@ static int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags)
static int io_fsync_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- struct io_ring_ctx *ctx = req->ctx;
-
- if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
- if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index ||
- sqe->splice_fd_in))
+ if (unlikely(sqe->addr || sqe->buf_index || sqe->splice_fd_in))
return -EINVAL;
req->sync.flags = READ_ONCE(sqe->fsync_flags);
@@ -4615,10 +4607,7 @@ static int io_fsync(struct io_kiocb *req, unsigned int issue_flags)
static int io_fallocate_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
- if (sqe->ioprio || sqe->buf_index || sqe->rw_flags ||
- sqe->splice_fd_in)
- return -EINVAL;
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ if (sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in)
return -EINVAL;
req->sync.off = READ_ONCE(sqe->off);
@@ -4649,9 +4638,7 @@ static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
const char __user *fname;
int ret;
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
- if (unlikely(sqe->ioprio || sqe->buf_index))
+ if (unlikely(sqe->buf_index))
return -EINVAL;
if (unlikely(req->flags & REQ_F_FIXED_FILE))
return -EBADF;
@@ -4783,7 +4770,7 @@ static int io_remove_buffers_prep(struct io_kiocb *req,
struct io_provide_buf *p = &req->pbuf;
u64 tmp;
- if (sqe->ioprio || sqe->rw_flags || sqe->addr || sqe->len || sqe->off ||
+ if (sqe->rw_flags || sqe->addr || sqe->len || sqe->off ||
sqe->splice_fd_in)
return -EINVAL;
@@ -4850,7 +4837,7 @@ static int io_provide_buffers_prep(struct io_kiocb *req,
struct io_provide_buf *p = &req->pbuf;
u64 tmp;
- if (sqe->ioprio || sqe->rw_flags || sqe->splice_fd_in)
+ if (sqe->rw_flags || sqe->splice_fd_in)
return -EINVAL;
tmp = READ_ONCE(sqe->fd);
@@ -4980,9 +4967,7 @@ static int io_epoll_ctl_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
#if defined(CONFIG_EPOLL)
- if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
- return -EINVAL;
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ if (sqe->buf_index || sqe->splice_fd_in)
return -EINVAL;
req->epoll.epfd = READ_ONCE(sqe->fd);
@@ -5026,9 +5011,7 @@ static int io_epoll_ctl(struct io_kiocb *req, unsigned int issue_flags)
static int io_madvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
#if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU)
- if (sqe->ioprio || sqe->buf_index || sqe->off || sqe->splice_fd_in)
- return -EINVAL;
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ if (sqe->buf_index || sqe->off || sqe->splice_fd_in)
return -EINVAL;
req->madvise.addr = READ_ONCE(sqe->addr);
@@ -5061,9 +5044,7 @@ static int io_madvise(struct io_kiocb *req, unsigned int issue_flags)
static int io_fadvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- if (sqe->ioprio || sqe->buf_index || sqe->addr || sqe->splice_fd_in)
- return -EINVAL;
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ if (sqe->buf_index || sqe->addr || sqe->splice_fd_in)
return -EINVAL;
req->fadvise.offset = READ_ONCE(sqe->off);
@@ -5099,9 +5080,7 @@ static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
const char __user *path;
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
- if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
+ if (sqe->buf_index || sqe->splice_fd_in)
return -EINVAL;
if (req->flags & REQ_F_FIXED_FILE)
return -EBADF;
@@ -5146,10 +5125,7 @@ static int io_statx(struct io_kiocb *req, unsigned int issue_flags)
static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
- if (sqe->ioprio || sqe->off || sqe->addr || sqe->len ||
- sqe->rw_flags || sqe->buf_index)
+ if (sqe->off || sqe->addr || sqe->len || sqe->rw_flags || sqe->buf_index)
return -EINVAL;
if (req->flags & REQ_F_FIXED_FILE)
return -EBADF;
@@ -5215,12 +5191,7 @@ static int io_close(struct io_kiocb *req, unsigned int issue_flags)
static int io_sfr_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- struct io_ring_ctx *ctx = req->ctx;
-
- if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
- if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index ||
- sqe->splice_fd_in))
+ if (unlikely(sqe->addr || sqe->buf_index || sqe->splice_fd_in))
return -EINVAL;
req->sync.off = READ_ONCE(sqe->off);
@@ -5298,8 +5269,6 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_sr_msg *sr = &req->sr_msg;
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
if (unlikely(sqe->addr2 || sqe->file_index))
return -EINVAL;
@@ -5533,8 +5502,6 @@ static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_sr_msg *sr = &req->sr_msg;
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
if (unlikely(sqe->addr2 || sqe->file_index))
return -EINVAL;
@@ -5692,9 +5659,7 @@ static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_accept *accept = &req->accept;
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
- if (sqe->ioprio || sqe->len || sqe->buf_index)
+ if (sqe->len || sqe->buf_index)
return -EINVAL;
accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
@@ -5760,10 +5725,7 @@ static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_connect *conn = &req->connect;
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
- if (sqe->ioprio || sqe->len || sqe->buf_index || sqe->rw_flags ||
- sqe->splice_fd_in)
+ if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in)
return -EINVAL;
conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
@@ -6446,9 +6408,7 @@ static int io_poll_update_prep(struct io_kiocb *req,
struct io_poll_update *upd = &req->poll_update;
u32 flags;
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
- if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
+ if (sqe->buf_index || sqe->splice_fd_in)
return -EINVAL;
flags = READ_ONCE(sqe->len);
if (flags & ~(IORING_POLL_UPDATE_EVENTS | IORING_POLL_UPDATE_USER_DATA |
@@ -6478,9 +6438,7 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
struct io_poll_iocb *poll = &req->poll;
u32 flags;
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
- if (sqe->ioprio || sqe->buf_index || sqe->off || sqe->addr)
+ if (sqe->buf_index || sqe->off || sqe->addr)
return -EINVAL;
flags = READ_ONCE(sqe->len);
if (flags & ~IORING_POLL_ADD_MULTI)
@@ -6687,11 +6645,9 @@ static int io_timeout_remove_prep(struct io_kiocb *req,
{
struct io_timeout_rem *tr = &req->timeout_rem;
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
return -EINVAL;
- if (sqe->ioprio || sqe->buf_index || sqe->len || sqe->splice_fd_in)
+ if (sqe->buf_index || sqe->len || sqe->splice_fd_in)
return -EINVAL;
tr->ltimeout = false;
@@ -6761,10 +6717,7 @@ static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
unsigned flags;
u32 off = READ_ONCE(sqe->off);
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
- if (sqe->ioprio || sqe->buf_index || sqe->len != 1 ||
- sqe->splice_fd_in)
+ if (sqe->buf_index || sqe->len != 1 || sqe->splice_fd_in)
return -EINVAL;
if (off && is_timeout_link)
return -EINVAL;
@@ -6946,11 +6899,9 @@ static int io_try_cancel(struct io_kiocb *req, struct io_cancel_data *cd)
static int io_async_cancel_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
if (unlikely(req->flags & REQ_F_BUFFER_SELECT))
return -EINVAL;
- if (sqe->ioprio || sqe->off || sqe->len || sqe->splice_fd_in)
+ if (sqe->off || sqe->len || sqe->splice_fd_in)
return -EINVAL;
req->cancel.addr = READ_ONCE(sqe->addr);
@@ -7036,7 +6987,7 @@ static int io_rsrc_update_prep(struct io_kiocb *req,
{
if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
return -EINVAL;
- if (sqe->ioprio || sqe->rw_flags || sqe->splice_fd_in)
+ if (sqe->rw_flags || sqe->splice_fd_in)
return -EINVAL;
req->rsrc_update.offset = READ_ONCE(sqe->off);
@@ -7849,6 +7800,11 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
}
}
+ if (!io_op_defs[opcode].ioprio && sqe->ioprio)
+ return -EINVAL;
+ if (!io_op_defs[opcode].iopoll && (ctx->flags & IORING_SETUP_IOPOLL))
+ return -EINVAL;
+
if (io_op_defs[opcode].needs_file) {
struct io_submit_state *state = &ctx->submit_state;
--
2.35.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 2/2] io_uring: add POLL_FIRST support for send/sendmsg and recv/recvmsg
2022-04-27 1:54 [PATCHSET 0/2] Add support for IORING_RECVSEND_POLL_FIRST Jens Axboe
2022-04-27 1:54 ` [PATCH 1/2] io_uring: check IOPOLL/ioprio support upfront Jens Axboe
@ 2022-04-27 1:54 ` Jens Axboe
2022-04-27 3:57 ` [PATCHSET 0/2] Add support for IORING_RECVSEND_POLL_FIRST Hao Xu
2022-04-29 18:31 ` Hao Xu
3 siblings, 0 replies; 8+ messages in thread
From: Jens Axboe @ 2022-04-27 1:54 UTC (permalink / raw)
To: io-uring; +Cc: Jens Axboe
If IORING_RECVSEND_POLL_FIRST is set for recv/recvmsg or send/sendmsg,
then we arm poll first rather than attempt a receive or send upfront.
This can be useful if we expect there to be no data (or space) available
for the request, as we can then avoid wasting time on the initial
issue attempt.
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io_uring.c | 27 +++++++++++++++++++++++++--
include/uapi/linux/io_uring.h | 10 ++++++++++
2 files changed, 35 insertions(+), 2 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 39325e469738..a14bd5f55028 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -637,6 +637,7 @@ struct io_sr_msg {
int bgid;
size_t len;
size_t done_io;
+ unsigned int flags;
};
struct io_open {
@@ -5269,11 +5270,14 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_sr_msg *sr = &req->sr_msg;
- if (unlikely(sqe->addr2 || sqe->file_index))
+ if (unlikely(sqe->file_index))
return -EINVAL;
sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
sr->len = READ_ONCE(sqe->len);
+ sr->flags = READ_ONCE(sqe->addr2);
+ if (sr->flags & ~IORING_RECVSEND_POLL_FIRST)
+ return -EINVAL;
sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
if (sr->msg_flags & MSG_DONTWAIT)
req->flags |= REQ_F_NOWAIT;
@@ -5308,6 +5312,10 @@ static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
kmsg = &iomsg;
}
+ if (!(req->flags & REQ_F_POLLED) &&
+ (sr->flags & IORING_RECVSEND_POLL_FIRST))
+ return io_setup_async_msg(req, kmsg);
+
flags = req->sr_msg.msg_flags;
if (issue_flags & IO_URING_F_NONBLOCK)
flags |= MSG_DONTWAIT;
@@ -5350,6 +5358,10 @@ static int io_send(struct io_kiocb *req, unsigned int issue_flags)
int min_ret = 0;
int ret;
+ if (!(req->flags & REQ_F_POLLED) &&
+ (sr->flags & IORING_RECVSEND_POLL_FIRST))
+ return -EAGAIN;
+
sock = sock_from_file(req->file);
if (unlikely(!sock))
return -ENOTSOCK;
@@ -5502,11 +5514,14 @@ static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_sr_msg *sr = &req->sr_msg;
- if (unlikely(sqe->addr2 || sqe->file_index))
+ if (unlikely(sqe->file_index))
return -EINVAL;
sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
sr->len = READ_ONCE(sqe->len);
+ sr->flags = READ_ONCE(sqe->addr2);
+ if (sr->flags & ~IORING_RECVSEND_POLL_FIRST)
+ return -EINVAL;
sr->bgid = READ_ONCE(sqe->buf_group);
sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
if (sr->msg_flags & MSG_DONTWAIT)
@@ -5543,6 +5558,10 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
kmsg = &iomsg;
}
+ if (!(req->flags & REQ_F_POLLED) &&
+ (sr->flags & IORING_RECVSEND_POLL_FIRST))
+ return io_setup_async_msg(req, kmsg);
+
if (req->flags & REQ_F_BUFFER_SELECT) {
kbuf = io_recv_buffer_select(req, issue_flags);
if (IS_ERR(kbuf))
@@ -5600,6 +5619,10 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
int ret, min_ret = 0;
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
+ if (!(req->flags & REQ_F_POLLED) &&
+ (sr->flags & IORING_RECVSEND_POLL_FIRST))
+ return -EAGAIN;
+
sock = sock_from_file(req->file);
if (unlikely(!sock))
return -ENOTSOCK;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index fad63564678a..51f972ecaba0 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -213,6 +213,16 @@ enum {
#define IORING_ASYNC_CANCEL_FD (1U << 1)
#define IORING_ASYNC_CANCEL_ANY (1U << 2)
+/*
+ * send/sendmsg and recv/recvmsg flags (sqe->addr2)
+ *
+ * IORING_RECVSEND_POLL_FIRST If set, instead of first attempting to send
+ * or receive and arm poll if that yields an
+ * -EAGAIN result, arm poll upfront and skip
+ * the initial transfer attempt.
+ */
+#define IORING_RECVSEND_POLL_FIRST (1U << 1)
+
/*
* IO completion data structure (Completion Queue Entry)
*/
--
2.35.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCHSET 0/2] Add support for IORING_RECVSEND_POLL_FIRST
2022-04-27 1:54 [PATCHSET 0/2] Add support for IORING_RECVSEND_POLL_FIRST Jens Axboe
2022-04-27 1:54 ` [PATCH 1/2] io_uring: check IOPOLL/ioprio support upfront Jens Axboe
2022-04-27 1:54 ` [PATCH 2/2] io_uring: add POLL_FIRST support for send/sendmsg and recv/recvmsg Jens Axboe
@ 2022-04-27 3:57 ` Hao Xu
2022-04-29 18:31 ` Hao Xu
3 siblings, 0 replies; 8+ messages in thread
From: Hao Xu @ 2022-04-27 3:57 UTC (permalink / raw)
To: Jens Axboe, io-uring
在 4/27/22 9:54 AM, Jens Axboe 写道:
> Hi,
>
> I had a re-think on the flags2 addition [1] that was posted earlier
> today, and I don't really like the fact that flags2 then can't work
> with ioprio for read/write etc. We might also want to extend the
> ioprio field for other types of IO in the future.
>
> So rather than do that, do a simpler approach and just add an io_uring
> specific flag set for send/recv and friends. This then allow setting
> IORING_RECVSEND_POLL_FIRST in sqe->addr2 for those, and if set, io_uring
> will arm poll first rather than attempt a send/recv operation.
>
> [1] https://lore.kernel.org/io-uring/[email protected]/
>
Looks good to me,
Reviewed-by: Hao Xu <[email protected]>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCHSET 0/2] Add support for IORING_RECVSEND_POLL_FIRST
2022-04-27 1:54 [PATCHSET 0/2] Add support for IORING_RECVSEND_POLL_FIRST Jens Axboe
` (2 preceding siblings ...)
2022-04-27 3:57 ` [PATCHSET 0/2] Add support for IORING_RECVSEND_POLL_FIRST Hao Xu
@ 2022-04-29 18:31 ` Hao Xu
2022-04-29 18:40 ` Hao Xu
2022-04-29 18:44 ` Jens Axboe
3 siblings, 2 replies; 8+ messages in thread
From: Hao Xu @ 2022-04-29 18:31 UTC (permalink / raw)
To: Jens Axboe, io-uring
On 4/27/22 09:54, Jens Axboe wrote:
> Hi,
>
> I had a re-think on the flags2 addition [1] that was posted earlier
> today, and I don't really like the fact that flags2 then can't work
> with ioprio for read/write etc. We might also want to extend the
> ioprio field for other types of IO in the future.
>
> So rather than do that, do a simpler approach and just add an io_uring
> specific flag set for send/recv and friends. This then allow setting
> IORING_RECVSEND_POLL_FIRST in sqe->addr2 for those, and if set, io_uring
> will arm poll first rather than attempt a send/recv operation.
>
> [1]
> https://lore.kernel.org/io-uring/[email protected]/
>
Hi Jens,
Could we use something like the high bits of sqe->fd to store general flags2
since I saw the number of open FDs can be about (1<<20) at most.
Though I'm not sure if we can assume the limitation of fd won't change
in the future..
Regards,
Hao
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCHSET 0/2] Add support for IORING_RECVSEND_POLL_FIRST
2022-04-29 18:31 ` Hao Xu
@ 2022-04-29 18:40 ` Hao Xu
2022-04-29 18:44 ` Jens Axboe
2022-04-29 18:44 ` Jens Axboe
1 sibling, 1 reply; 8+ messages in thread
From: Hao Xu @ 2022-04-29 18:40 UTC (permalink / raw)
To: Jens Axboe, io-uring
On 4/30/22 02:31, Hao Xu wrote:
> On 4/27/22 09:54, Jens Axboe wrote:
>> Hi,
>>
>> I had a re-think on the flags2 addition [1] that was posted earlier
>> today, and I don't really like the fact that flags2 then can't work
>> with ioprio for read/write etc. We might also want to extend the
>> ioprio field for other types of IO in the future.
>>
>> So rather than do that, do a simpler approach and just add an io_uring
>> specific flag set for send/recv and friends. This then allow setting
>> IORING_RECVSEND_POLL_FIRST in sqe->addr2 for those, and if set, io_uring
>> will arm poll first rather than attempt a send/recv operation.
>>
>> [1]
>> https://lore.kernel.org/io-uring/[email protected]/
>>
>
> Hi Jens,
> Could we use something like the high bits of sqe->fd to store general
> flags2
> since I saw the number of open FDs can be about (1<<20) at most.
oops, sorry my bad, (1<<20) is just a default value..
> Though I'm not sure if we can assume the limitation of fd won't change
> in the future..
>
> Regards,
> Hao
>
>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCHSET 0/2] Add support for IORING_RECVSEND_POLL_FIRST
2022-04-29 18:31 ` Hao Xu
2022-04-29 18:40 ` Hao Xu
@ 2022-04-29 18:44 ` Jens Axboe
1 sibling, 0 replies; 8+ messages in thread
From: Jens Axboe @ 2022-04-29 18:44 UTC (permalink / raw)
To: Hao Xu, io-uring
On 4/29/22 12:31 PM, Hao Xu wrote:
> On 4/27/22 09:54, Jens Axboe wrote:
>> Hi,
>>
>> I had a re-think on the flags2 addition [1] that was posted earlier
>> today, and I don't really like the fact that flags2 then can't work
>> with ioprio for read/write etc. We might also want to extend the
>> ioprio field for other types of IO in the future.
>>
>> So rather than do that, do a simpler approach and just add an io_uring
>> specific flag set for send/recv and friends. This then allow setting
>> IORING_RECVSEND_POLL_FIRST in sqe->addr2 for those, and if set, io_uring
>> will arm poll first rather than attempt a send/recv operation.
>>
>> [1] https://lore.kernel.org/io-uring/[email protected]/
>>
>
> Hi Jens,
> Could we use something like the high bits of sqe->fd to store general
> flags2 since I saw the number of open FDs can be about (1<<20) at
> most. Though I'm not sure if we can assume the limitation of fd won't
> change in the future..
I think that's a bit iffy, it's pretty universally true (at least on
*NIX) that and fd is a signed int. So I'd be hesitant to do that. I
don't mind the flags here, generally we have potentially 3 per request:
- SQE specific ones, these are the IOSQE_* flags and are meant to be
generally applicable to all/most commands. io_uring internal, have no
meaning outside of io_uring.
- Some requests are layered on top of existing functionality, the
recv/recvmsg is a good example. They have their own sets of flags. We
generally use the sqe->rw_flags space for those.
- Lastly, io_uring modifiers to specific requests. That's what this
patch adds. They don't make sense to the lower layers, but they are
specific to this request type for io_uring.
The 3rd type is put in sqe->rw_flags for io_uring specific opcodes, but
for commands that already have flags in the 2nd category, we have to put
them somewhere else. Not a big deal imho, at least as long as the
request type has space in the sqe for it. They generally do, they did in
this case.
--
Jens Axboe
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCHSET 0/2] Add support for IORING_RECVSEND_POLL_FIRST
2022-04-29 18:40 ` Hao Xu
@ 2022-04-29 18:44 ` Jens Axboe
0 siblings, 0 replies; 8+ messages in thread
From: Jens Axboe @ 2022-04-29 18:44 UTC (permalink / raw)
To: Hao Xu, io-uring
On 4/29/22 12:40 PM, Hao Xu wrote:
>
>
> On 4/30/22 02:31, Hao Xu wrote:
>> On 4/27/22 09:54, Jens Axboe wrote:
>>> Hi,
>>>
>>> I had a re-think on the flags2 addition [1] that was posted earlier
>>> today, and I don't really like the fact that flags2 then can't work
>>> with ioprio for read/write etc. We might also want to extend the
>>> ioprio field for other types of IO in the future.
>>>
>>> So rather than do that, do a simpler approach and just add an io_uring
>>> specific flag set for send/recv and friends. This then allow setting
>>> IORING_RECVSEND_POLL_FIRST in sqe->addr2 for those, and if set, io_uring
>>> will arm poll first rather than attempt a send/recv operation.
>>>
>>> [1] https://lore.kernel.org/io-uring/[email protected]/
>>>
>>
>> Hi Jens,
>> Could we use something like the high bits of sqe->fd to store general flags2
>> since I saw the number of open FDs can be about (1<<20) at most.
>
> oops, sorry my bad, (1<<20) is just a default value..
Indeed, you can certainly go higher and people do.
--
Jens Axboe
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2022-04-29 18:44 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-04-27 1:54 [PATCHSET 0/2] Add support for IORING_RECVSEND_POLL_FIRST Jens Axboe
2022-04-27 1:54 ` [PATCH 1/2] io_uring: check IOPOLL/ioprio support upfront Jens Axboe
2022-04-27 1:54 ` [PATCH 2/2] io_uring: add POLL_FIRST support for send/sendmsg and recv/recvmsg Jens Axboe
2022-04-27 3:57 ` [PATCHSET 0/2] Add support for IORING_RECVSEND_POLL_FIRST Hao Xu
2022-04-29 18:31 ` Hao Xu
2022-04-29 18:40 ` Hao Xu
2022-04-29 18:44 ` Jens Axboe
2022-04-29 18:44 ` Jens Axboe
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox