* [PATCH v4 0/2] io_uring: add EPOLLEXCLUSIVE flag for POLL_ADD operation @ 2020-06-17 9:53 Jiufei Xue 2020-06-17 9:53 ` [PATCH v4 1/2] io_uring: change the poll type to be 32-bits Jiufei Xue 2020-06-17 9:53 ` [PATCH v4 2/2] io_uring: use EPOLLEXCLUSIVE flag to aoid thundering herd type behavior Jiufei Xue 0 siblings, 2 replies; 4+ messages in thread From: Jiufei Xue @ 2020-06-17 9:53 UTC (permalink / raw) To: io-uring; +Cc: axboe, joseph.qi Applications can use this flag to avoid accept thundering herd type behavior. Jiufei Xue (2): io_uring: change the poll type to be 32-bits io_uring: use EPOLLEXCLUSIVE flag to aoid thundering ^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH v4 1/2] io_uring: change the poll type to be 32-bits 2020-06-17 9:53 [PATCH v4 0/2] io_uring: add EPOLLEXCLUSIVE flag for POLL_ADD operation Jiufei Xue @ 2020-06-17 9:53 ` Jiufei Xue 2020-06-17 15:15 ` Jens Axboe 2020-06-17 9:53 ` [PATCH v4 2/2] io_uring: use EPOLLEXCLUSIVE flag to aoid thundering herd type behavior Jiufei Xue 1 sibling, 1 reply; 4+ messages in thread From: Jiufei Xue @ 2020-06-17 9:53 UTC (permalink / raw) To: io-uring; +Cc: axboe, joseph.qi poll events should be 32-bits to cover EPOLLEXCLUSIVE. Explicit word-swap the poll32_events for big endian to make sure the ABI is not changed. We call this feature IORING_FEAT_POLL_32BITS, applications who want to use EPOLLEXCLUSIVE should check the feature bit first. Signed-off-by: Jiufei Xue <[email protected]> --- fs/io_uring.c | 13 +++++++++---- include/uapi/linux/io_uring.h | 4 +++- tools/io_uring/liburing.h | 6 +++++- 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 155f3d8..fe935cf 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -4543,7 +4543,7 @@ static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head, static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_poll_iocb *poll = &req->poll; - u16 events; + u32 events; if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; @@ -4552,7 +4552,10 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe if (!poll->file) return -EBADF; - events = READ_ONCE(sqe->poll_events); + events = READ_ONCE(sqe->poll32_events); +#ifdef __BIG_ENDIAN + events = swahw32(events); +#endif poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP; get_task_struct(current); @@ -7865,7 +7868,8 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p, p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP | IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS | - IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL; + IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL | + IORING_FEAT_POLL_32BITS; if (copy_to_user(params, p, sizeof(*p))) { ret = -EFAULT; @@ -8154,7 +8158,8 @@ static int __init io_uring_init(void) BUILD_BUG_SQE_ELEM(28, /* compat */ int, rw_flags); BUILD_BUG_SQE_ELEM(28, /* compat */ __u32, rw_flags); BUILD_BUG_SQE_ELEM(28, __u32, fsync_flags); - BUILD_BUG_SQE_ELEM(28, __u16, poll_events); + BUILD_BUG_SQE_ELEM(28, /* compat */ __u16, poll_events); + BUILD_BUG_SQE_ELEM(28, __u32, poll32_events); BUILD_BUG_SQE_ELEM(28, __u32, sync_range_flags); BUILD_BUG_SQE_ELEM(28, __u32, msg_flags); BUILD_BUG_SQE_ELEM(28, __u32, timeout_flags); diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 92c2269..8d03396 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -31,7 +31,8 @@ struct io_uring_sqe { union { __kernel_rwf_t rw_flags; __u32 fsync_flags; - __u16 poll_events; + __u16 poll_events; /* compatibility */ + __u32 poll32_events; /* word-reversed for BE */ __u32 sync_range_flags; __u32 msg_flags; __u32 timeout_flags; @@ -248,6 +249,7 @@ struct io_uring_params { #define IORING_FEAT_RW_CUR_POS (1U << 3) #define IORING_FEAT_CUR_PERSONALITY (1U << 4) #define IORING_FEAT_FAST_POLL (1U << 5) +#define IORING_FEAT_POLL_32BITS (1U << 6) /* * io_uring_register(2) opcodes and arguments diff --git a/tools/io_uring/liburing.h b/tools/io_uring/liburing.h index 5f305c8..28a837b 100644 --- a/tools/io_uring/liburing.h +++ b/tools/io_uring/liburing.h @@ -10,6 +10,7 @@ #include <string.h> #include "../../include/uapi/linux/io_uring.h" #include <inttypes.h> +#include <linux/swab.h> #include "barrier.h" /* @@ -145,11 +146,14 @@ static inline void io_uring_prep_write_fixed(struct io_uring_sqe *sqe, int fd, } static inline void io_uring_prep_poll_add(struct io_uring_sqe *sqe, int fd, - short poll_mask) + unsigned poll_mask) { memset(sqe, 0, sizeof(*sqe)); sqe->opcode = IORING_OP_POLL_ADD; sqe->fd = fd; +#if __BYTE_ORDER == __BIG_ENDIAN + poll_mask = __swahw32(poll_mask); +#endif sqe->poll_events = poll_mask; } -- 1.8.3.1 ^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH v4 1/2] io_uring: change the poll type to be 32-bits 2020-06-17 9:53 ` [PATCH v4 1/2] io_uring: change the poll type to be 32-bits Jiufei Xue @ 2020-06-17 15:15 ` Jens Axboe 0 siblings, 0 replies; 4+ messages in thread From: Jens Axboe @ 2020-06-17 15:15 UTC (permalink / raw) To: Jiufei Xue, io-uring; +Cc: joseph.qi On 6/17/20 3:53 AM, Jiufei Xue wrote: > poll events should be 32-bits to cover EPOLLEXCLUSIVE. > > Explicit word-swap the poll32_events for big endian to make sure the ABI > is not changed. We call this feature IORING_FEAT_POLL_32BITS, > applications who want to use EPOLLEXCLUSIVE should check the feature bit > first. > > Signed-off-by: Jiufei Xue <[email protected]> > --- > fs/io_uring.c | 13 +++++++++---- > include/uapi/linux/io_uring.h | 4 +++- > tools/io_uring/liburing.h | 6 +++++- > 3 files changed, 17 insertions(+), 6 deletions(-) > > diff --git a/fs/io_uring.c b/fs/io_uring.c > index 155f3d8..fe935cf 100644 > --- a/fs/io_uring.c > +++ b/fs/io_uring.c > @@ -4543,7 +4543,7 @@ static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head, > static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) > { > struct io_poll_iocb *poll = &req->poll; > - u16 events; > + u32 events; > > if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) > return -EINVAL; > @@ -4552,7 +4552,10 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe > if (!poll->file) > return -EBADF; > > - events = READ_ONCE(sqe->poll_events); > + events = READ_ONCE(sqe->poll32_events); > +#ifdef __BIG_ENDIAN > + events = swahw32(events); > +#endif > poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP; > > get_task_struct(current); > @@ -7865,7 +7868,8 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p, > > p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP | > IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS | > - IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL; > + IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL | > + IORING_FEAT_POLL_32BITS; > > if (copy_to_user(params, p, sizeof(*p))) { > ret = -EFAULT; > @@ -8154,7 +8158,8 @@ static int __init io_uring_init(void) > BUILD_BUG_SQE_ELEM(28, /* compat */ int, rw_flags); > BUILD_BUG_SQE_ELEM(28, /* compat */ __u32, rw_flags); > BUILD_BUG_SQE_ELEM(28, __u32, fsync_flags); > - BUILD_BUG_SQE_ELEM(28, __u16, poll_events); > + BUILD_BUG_SQE_ELEM(28, /* compat */ __u16, poll_events); > + BUILD_BUG_SQE_ELEM(28, __u32, poll32_events); > BUILD_BUG_SQE_ELEM(28, __u32, sync_range_flags); > BUILD_BUG_SQE_ELEM(28, __u32, msg_flags); > BUILD_BUG_SQE_ELEM(28, __u32, timeout_flags); > diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h > index 92c2269..8d03396 100644 > --- a/include/uapi/linux/io_uring.h > +++ b/include/uapi/linux/io_uring.h > @@ -31,7 +31,8 @@ struct io_uring_sqe { > union { > __kernel_rwf_t rw_flags; > __u32 fsync_flags; > - __u16 poll_events; > + __u16 poll_events; /* compatibility */ > + __u32 poll32_events; /* word-reversed for BE */ > __u32 sync_range_flags; > __u32 msg_flags; > __u32 timeout_flags; > @@ -248,6 +249,7 @@ struct io_uring_params { > #define IORING_FEAT_RW_CUR_POS (1U << 3) > #define IORING_FEAT_CUR_PERSONALITY (1U << 4) > #define IORING_FEAT_FAST_POLL (1U << 5) > +#define IORING_FEAT_POLL_32BITS (1U << 6) > > /* > * io_uring_register(2) opcodes and arguments > diff --git a/tools/io_uring/liburing.h b/tools/io_uring/liburing.h > index 5f305c8..28a837b 100644 > --- a/tools/io_uring/liburing.h > +++ b/tools/io_uring/liburing.h > @@ -10,6 +10,7 @@ > #include <string.h> > #include "../../include/uapi/linux/io_uring.h" > #include <inttypes.h> > +#include <linux/swab.h> > #include "barrier.h" > > /* > @@ -145,11 +146,14 @@ static inline void io_uring_prep_write_fixed(struct io_uring_sqe *sqe, int fd, > } > > static inline void io_uring_prep_poll_add(struct io_uring_sqe *sqe, int fd, > - short poll_mask) > + unsigned poll_mask) > { > memset(sqe, 0, sizeof(*sqe)); > sqe->opcode = IORING_OP_POLL_ADD; > sqe->fd = fd; > +#if __BYTE_ORDER == __BIG_ENDIAN > + poll_mask = __swahw32(poll_mask); > +#endif > sqe->poll_events = poll_mask; This looks good to me now, but this one need not use the __ version, it should just use the regular one as that's the one defined in the non-uapi header. But I'll just make that change, won't functionally do anything. -- Jens Axboe ^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH v4 2/2] io_uring: use EPOLLEXCLUSIVE flag to aoid thundering herd type behavior 2020-06-17 9:53 [PATCH v4 0/2] io_uring: add EPOLLEXCLUSIVE flag for POLL_ADD operation Jiufei Xue 2020-06-17 9:53 ` [PATCH v4 1/2] io_uring: change the poll type to be 32-bits Jiufei Xue @ 2020-06-17 9:53 ` Jiufei Xue 1 sibling, 0 replies; 4+ messages in thread From: Jiufei Xue @ 2020-06-17 9:53 UTC (permalink / raw) To: io-uring; +Cc: axboe, joseph.qi Applications can pass this flag in to avoid accept thundering herd. Signed-off-by: Jiufei Xue <[email protected]> --- fs/io_uring.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index fe935cf..f156eba 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -4225,7 +4225,11 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt, pt->error = 0; poll->head = head; - add_wait_queue(head, &poll->wait); + + if (poll->events & EPOLLEXCLUSIVE) + add_wait_queue_exclusive(head, &poll->wait); + else + add_wait_queue(head, &poll->wait); } static void io_async_queue_proc(struct file *file, struct wait_queue_head *head, @@ -4556,7 +4560,8 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe #ifdef __BIG_ENDIAN events = swahw32(events); #endif - poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP; + poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP | + (events & EPOLLEXCLUSIVE); get_task_struct(current); req->task = current; -- 1.8.3.1 ^ permalink raw reply related [flat|nested] 4+ messages in thread
end of thread, other threads:[~2020-06-17 15:15 UTC | newest] Thread overview: 4+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2020-06-17 9:53 [PATCH v4 0/2] io_uring: add EPOLLEXCLUSIVE flag for POLL_ADD operation Jiufei Xue 2020-06-17 9:53 ` [PATCH v4 1/2] io_uring: change the poll type to be 32-bits Jiufei Xue 2020-06-17 15:15 ` Jens Axboe 2020-06-17 9:53 ` [PATCH v4 2/2] io_uring: use EPOLLEXCLUSIVE flag to aoid thundering herd type behavior Jiufei Xue
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox