* [PATCH v4 0/2] io_uring: add EPOLLEXCLUSIVE flag for POLL_ADD operation
@ 2020-06-17 9:53 Jiufei Xue
2020-06-17 9:53 ` [PATCH v4 1/2] io_uring: change the poll type to be 32-bits Jiufei Xue
2020-06-17 9:53 ` [PATCH v4 2/2] io_uring: use EPOLLEXCLUSIVE flag to aoid thundering herd type behavior Jiufei Xue
0 siblings, 2 replies; 4+ messages in thread
From: Jiufei Xue @ 2020-06-17 9:53 UTC (permalink / raw)
To: io-uring; +Cc: axboe, joseph.qi
Applications can use this flag to avoid accept thundering herd type
behavior.
Jiufei Xue (2):
io_uring: change the poll type to be 32-bits
io_uring: use EPOLLEXCLUSIVE flag to aoid thundering
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH v4 1/2] io_uring: change the poll type to be 32-bits
2020-06-17 9:53 [PATCH v4 0/2] io_uring: add EPOLLEXCLUSIVE flag for POLL_ADD operation Jiufei Xue
@ 2020-06-17 9:53 ` Jiufei Xue
2020-06-17 15:15 ` Jens Axboe
2020-06-17 9:53 ` [PATCH v4 2/2] io_uring: use EPOLLEXCLUSIVE flag to aoid thundering herd type behavior Jiufei Xue
1 sibling, 1 reply; 4+ messages in thread
From: Jiufei Xue @ 2020-06-17 9:53 UTC (permalink / raw)
To: io-uring; +Cc: axboe, joseph.qi
poll events should be 32-bits to cover EPOLLEXCLUSIVE.
Explicit word-swap the poll32_events for big endian to make sure the ABI
is not changed. We call this feature IORING_FEAT_POLL_32BITS,
applications who want to use EPOLLEXCLUSIVE should check the feature bit
first.
Signed-off-by: Jiufei Xue <[email protected]>
---
fs/io_uring.c | 13 +++++++++----
include/uapi/linux/io_uring.h | 4 +++-
tools/io_uring/liburing.h | 6 +++++-
3 files changed, 17 insertions(+), 6 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 155f3d8..fe935cf 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -4543,7 +4543,7 @@ static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head,
static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_poll_iocb *poll = &req->poll;
- u16 events;
+ u32 events;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
@@ -4552,7 +4552,10 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
if (!poll->file)
return -EBADF;
- events = READ_ONCE(sqe->poll_events);
+ events = READ_ONCE(sqe->poll32_events);
+#ifdef __BIG_ENDIAN
+ events = swahw32(events);
+#endif
poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP;
get_task_struct(current);
@@ -7865,7 +7868,8 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP |
IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS |
- IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL;
+ IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL |
+ IORING_FEAT_POLL_32BITS;
if (copy_to_user(params, p, sizeof(*p))) {
ret = -EFAULT;
@@ -8154,7 +8158,8 @@ static int __init io_uring_init(void)
BUILD_BUG_SQE_ELEM(28, /* compat */ int, rw_flags);
BUILD_BUG_SQE_ELEM(28, /* compat */ __u32, rw_flags);
BUILD_BUG_SQE_ELEM(28, __u32, fsync_flags);
- BUILD_BUG_SQE_ELEM(28, __u16, poll_events);
+ BUILD_BUG_SQE_ELEM(28, /* compat */ __u16, poll_events);
+ BUILD_BUG_SQE_ELEM(28, __u32, poll32_events);
BUILD_BUG_SQE_ELEM(28, __u32, sync_range_flags);
BUILD_BUG_SQE_ELEM(28, __u32, msg_flags);
BUILD_BUG_SQE_ELEM(28, __u32, timeout_flags);
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 92c2269..8d03396 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -31,7 +31,8 @@ struct io_uring_sqe {
union {
__kernel_rwf_t rw_flags;
__u32 fsync_flags;
- __u16 poll_events;
+ __u16 poll_events; /* compatibility */
+ __u32 poll32_events; /* word-reversed for BE */
__u32 sync_range_flags;
__u32 msg_flags;
__u32 timeout_flags;
@@ -248,6 +249,7 @@ struct io_uring_params {
#define IORING_FEAT_RW_CUR_POS (1U << 3)
#define IORING_FEAT_CUR_PERSONALITY (1U << 4)
#define IORING_FEAT_FAST_POLL (1U << 5)
+#define IORING_FEAT_POLL_32BITS (1U << 6)
/*
* io_uring_register(2) opcodes and arguments
diff --git a/tools/io_uring/liburing.h b/tools/io_uring/liburing.h
index 5f305c8..28a837b 100644
--- a/tools/io_uring/liburing.h
+++ b/tools/io_uring/liburing.h
@@ -10,6 +10,7 @@
#include <string.h>
#include "../../include/uapi/linux/io_uring.h"
#include <inttypes.h>
+#include <linux/swab.h>
#include "barrier.h"
/*
@@ -145,11 +146,14 @@ static inline void io_uring_prep_write_fixed(struct io_uring_sqe *sqe, int fd,
}
static inline void io_uring_prep_poll_add(struct io_uring_sqe *sqe, int fd,
- short poll_mask)
+ unsigned poll_mask)
{
memset(sqe, 0, sizeof(*sqe));
sqe->opcode = IORING_OP_POLL_ADD;
sqe->fd = fd;
+#if __BYTE_ORDER == __BIG_ENDIAN
+ poll_mask = __swahw32(poll_mask);
+#endif
sqe->poll_events = poll_mask;
}
--
1.8.3.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH v4 2/2] io_uring: use EPOLLEXCLUSIVE flag to aoid thundering herd type behavior
2020-06-17 9:53 [PATCH v4 0/2] io_uring: add EPOLLEXCLUSIVE flag for POLL_ADD operation Jiufei Xue
2020-06-17 9:53 ` [PATCH v4 1/2] io_uring: change the poll type to be 32-bits Jiufei Xue
@ 2020-06-17 9:53 ` Jiufei Xue
1 sibling, 0 replies; 4+ messages in thread
From: Jiufei Xue @ 2020-06-17 9:53 UTC (permalink / raw)
To: io-uring; +Cc: axboe, joseph.qi
Applications can pass this flag in to avoid accept thundering herd.
Signed-off-by: Jiufei Xue <[email protected]>
---
fs/io_uring.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index fe935cf..f156eba 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -4225,7 +4225,11 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
pt->error = 0;
poll->head = head;
- add_wait_queue(head, &poll->wait);
+
+ if (poll->events & EPOLLEXCLUSIVE)
+ add_wait_queue_exclusive(head, &poll->wait);
+ else
+ add_wait_queue(head, &poll->wait);
}
static void io_async_queue_proc(struct file *file, struct wait_queue_head *head,
@@ -4556,7 +4560,8 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
#ifdef __BIG_ENDIAN
events = swahw32(events);
#endif
- poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP;
+ poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP |
+ (events & EPOLLEXCLUSIVE);
get_task_struct(current);
req->task = current;
--
1.8.3.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH v4 1/2] io_uring: change the poll type to be 32-bits
2020-06-17 9:53 ` [PATCH v4 1/2] io_uring: change the poll type to be 32-bits Jiufei Xue
@ 2020-06-17 15:15 ` Jens Axboe
0 siblings, 0 replies; 4+ messages in thread
From: Jens Axboe @ 2020-06-17 15:15 UTC (permalink / raw)
To: Jiufei Xue, io-uring; +Cc: joseph.qi
On 6/17/20 3:53 AM, Jiufei Xue wrote:
> poll events should be 32-bits to cover EPOLLEXCLUSIVE.
>
> Explicit word-swap the poll32_events for big endian to make sure the ABI
> is not changed. We call this feature IORING_FEAT_POLL_32BITS,
> applications who want to use EPOLLEXCLUSIVE should check the feature bit
> first.
>
> Signed-off-by: Jiufei Xue <[email protected]>
> ---
> fs/io_uring.c | 13 +++++++++----
> include/uapi/linux/io_uring.h | 4 +++-
> tools/io_uring/liburing.h | 6 +++++-
> 3 files changed, 17 insertions(+), 6 deletions(-)
>
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 155f3d8..fe935cf 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -4543,7 +4543,7 @@ static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head,
> static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
> {
> struct io_poll_iocb *poll = &req->poll;
> - u16 events;
> + u32 events;
>
> if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
> return -EINVAL;
> @@ -4552,7 +4552,10 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
> if (!poll->file)
> return -EBADF;
>
> - events = READ_ONCE(sqe->poll_events);
> + events = READ_ONCE(sqe->poll32_events);
> +#ifdef __BIG_ENDIAN
> + events = swahw32(events);
> +#endif
> poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP;
>
> get_task_struct(current);
> @@ -7865,7 +7868,8 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
>
> p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP |
> IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS |
> - IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL;
> + IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL |
> + IORING_FEAT_POLL_32BITS;
>
> if (copy_to_user(params, p, sizeof(*p))) {
> ret = -EFAULT;
> @@ -8154,7 +8158,8 @@ static int __init io_uring_init(void)
> BUILD_BUG_SQE_ELEM(28, /* compat */ int, rw_flags);
> BUILD_BUG_SQE_ELEM(28, /* compat */ __u32, rw_flags);
> BUILD_BUG_SQE_ELEM(28, __u32, fsync_flags);
> - BUILD_BUG_SQE_ELEM(28, __u16, poll_events);
> + BUILD_BUG_SQE_ELEM(28, /* compat */ __u16, poll_events);
> + BUILD_BUG_SQE_ELEM(28, __u32, poll32_events);
> BUILD_BUG_SQE_ELEM(28, __u32, sync_range_flags);
> BUILD_BUG_SQE_ELEM(28, __u32, msg_flags);
> BUILD_BUG_SQE_ELEM(28, __u32, timeout_flags);
> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
> index 92c2269..8d03396 100644
> --- a/include/uapi/linux/io_uring.h
> +++ b/include/uapi/linux/io_uring.h
> @@ -31,7 +31,8 @@ struct io_uring_sqe {
> union {
> __kernel_rwf_t rw_flags;
> __u32 fsync_flags;
> - __u16 poll_events;
> + __u16 poll_events; /* compatibility */
> + __u32 poll32_events; /* word-reversed for BE */
> __u32 sync_range_flags;
> __u32 msg_flags;
> __u32 timeout_flags;
> @@ -248,6 +249,7 @@ struct io_uring_params {
> #define IORING_FEAT_RW_CUR_POS (1U << 3)
> #define IORING_FEAT_CUR_PERSONALITY (1U << 4)
> #define IORING_FEAT_FAST_POLL (1U << 5)
> +#define IORING_FEAT_POLL_32BITS (1U << 6)
>
> /*
> * io_uring_register(2) opcodes and arguments
> diff --git a/tools/io_uring/liburing.h b/tools/io_uring/liburing.h
> index 5f305c8..28a837b 100644
> --- a/tools/io_uring/liburing.h
> +++ b/tools/io_uring/liburing.h
> @@ -10,6 +10,7 @@
> #include <string.h>
> #include "../../include/uapi/linux/io_uring.h"
> #include <inttypes.h>
> +#include <linux/swab.h>
> #include "barrier.h"
>
> /*
> @@ -145,11 +146,14 @@ static inline void io_uring_prep_write_fixed(struct io_uring_sqe *sqe, int fd,
> }
>
> static inline void io_uring_prep_poll_add(struct io_uring_sqe *sqe, int fd,
> - short poll_mask)
> + unsigned poll_mask)
> {
> memset(sqe, 0, sizeof(*sqe));
> sqe->opcode = IORING_OP_POLL_ADD;
> sqe->fd = fd;
> +#if __BYTE_ORDER == __BIG_ENDIAN
> + poll_mask = __swahw32(poll_mask);
> +#endif
> sqe->poll_events = poll_mask;
This looks good to me now, but this one need not use the __ version, it
should just use the regular one as that's the one defined in the
non-uapi header. But I'll just make that change, won't functionally do
anything.
--
Jens Axboe
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2020-06-17 15:15 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2020-06-17 9:53 [PATCH v4 0/2] io_uring: add EPOLLEXCLUSIVE flag for POLL_ADD operation Jiufei Xue
2020-06-17 9:53 ` [PATCH v4 1/2] io_uring: change the poll type to be 32-bits Jiufei Xue
2020-06-17 15:15 ` Jens Axboe
2020-06-17 9:53 ` [PATCH v4 2/2] io_uring: use EPOLLEXCLUSIVE flag to aoid thundering herd type behavior Jiufei Xue
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox