From: Jens Axboe <[email protected]>
To: Xiaoguang Wang <[email protected]>
Cc: [email protected], [email protected]
Subject: Re: [PATCH] io_uring: export cq overflow status to userspace
Date: Wed, 8 Jul 2020 09:41:44 -0600 [thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>
On 7/8/20 9:39 AM, Xiaoguang Wang wrote:
> hi,
>
>> On 7/7/20 11:29 PM, Xiaoguang Wang wrote:
>>> I modify above test program a bit:
>>> #include <errno.h>
>>> #include <stdio.h>
>>> #include <unistd.h>
>>> #include <stdlib.h>
>>> #include <string.h>
>>> #include <fcntl.h>
>>> #include <assert.h>
>>>
>>> #include "liburing.h"
>>>
>>> static void test_cq_overflow(struct io_uring *ring)
>>> {
>>> struct io_uring_cqe *cqe;
>>> struct io_uring_sqe *sqe;
>>> int issued = 0;
>>> int ret = 0;
>>> int i;
>>>
>>> for (i = 0; i < 33; i++) {
>>> sqe = io_uring_get_sqe(ring);
>>> if (!sqe) {
>>> fprintf(stderr, "get sqe failed\n");
>>> break;;
>>> }
>>> ret = io_uring_submit(ring);
>>> if (ret <= 0) {
>>> if (ret != -EBUSY)
>>> fprintf(stderr, "sqe submit failed: %d\n", ret);
>>> break;
>>> }
>>> issued++;
>>> }
>>>
>>> printf("issued requests: %d\n", issued);
>>>
>>> while (issued) {
>>> ret = io_uring_peek_cqe(ring, &cqe);
>>> if (ret) {
>>> if (ret != -EAGAIN) {
>>> fprintf(stderr, "peek completion failed: %s\n",
>>> strerror(ret));
>>> break;
>>> }
>>> printf("left requets: %d %d\n", issued, IO_URING_READ_ONCE(*ring->sq.kflags));
>>> continue;
>>> }
>>> io_uring_cqe_seen(ring, cqe);
>>> issued--;
>>> printf("left requets: %d\n", issued);
>>> }
>>> }
>>>
>>> int main(int argc, char *argv[])
>>> {
>>> int ret;
>>> struct io_uring ring;
>>>
>>> ret = io_uring_queue_init(16, &ring, 0);
>>> if (ret) {
>>> fprintf(stderr, "ring setup failed: %d\n", ret);
>>> return 1;
>>> }
>>>
>>> test_cq_overflow(&ring);
>>> return 0;
>>> }
>>>
>>> Though with your patches applied, we still can not peek the last cqe.
>>> This test program will only issue 33 sqes, so it won't get EBUSY error.
>>
>> How about we make this even simpler, then - make the
>> IORING_SQ_CQ_OVERFLOW actually track the state, rather than when we fail
>> on submission. The liburing change would be the same, the kernel side
>> would then look like the below.
>>
>>
>> diff --git a/fs/io_uring.c b/fs/io_uring.c
>> index 4c9a494c9f9f..01981926cdf4 100644
>> --- a/fs/io_uring.c
>> +++ b/fs/io_uring.c
>> @@ -1342,6 +1342,7 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
>> if (cqe) {
>> clear_bit(0, &ctx->sq_check_overflow);
>> clear_bit(0, &ctx->cq_check_overflow);
>> + ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW;
>> }
>> spin_unlock_irqrestore(&ctx->completion_lock, flags);
>> io_cqring_ev_posted(ctx);
>> @@ -1379,6 +1380,7 @@ static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags)
>> if (list_empty(&ctx->cq_overflow_list)) {
>> set_bit(0, &ctx->sq_check_overflow);
>> set_bit(0, &ctx->cq_check_overflow);
>> + ctx->rings->sq_flags |= IORING_SQ_CQ_OVERFLOW;
>> }
>> req->flags |= REQ_F_OVERFLOW;
>> refcount_inc(&req->refs);
>> @@ -6375,9 +6377,9 @@ static int io_sq_thread(void *data)
>> }
>>
>> /* Tell userspace we may need a wakeup call */
>> + spin_lock_irq(&ctx->completion_lock);
>> ctx->rings->sq_flags |= IORING_SQ_NEED_WAKEUP;
>> - /* make sure to read SQ tail after writing flags */
>> - smp_mb();
>> + spin_unlock_irq(&ctx->completion_lock);
>>
>> to_submit = io_sqring_entries(ctx);
>> if (!to_submit || ret == -EBUSY) {
>> @@ -6400,7 +6402,9 @@ static int io_sq_thread(void *data)
>> }
>> finish_wait(&ctx->sqo_wait, &wait);
>>
>> + spin_lock_irq(&ctx->completion_lock);
>> ctx->rings->sq_flags &= ~IORING_SQ_NEED_WAKEUP;
>> + spin_unlock_irq(&ctx->completion_lock);
>> }
>>
>> mutex_lock(&ctx->uring_lock);
>> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
>> index 8d033961cb78..91953b543125 100644
>> --- a/include/uapi/linux/io_uring.h
>> +++ b/include/uapi/linux/io_uring.h
>> @@ -198,6 +198,7 @@ struct io_sqring_offsets {
>> * sq_ring->flags
>> */
>> #define IORING_SQ_NEED_WAKEUP (1U << 0) /* needs io_uring_enter wakeup */
>> +#define IORING_SQ_CQ_OVERFLOW (1U << 1) /* app needs to enter kernel */
>>
>> struct io_cqring_offsets {
>> __u32 head;
>>
> Looks good, I think it'll work now.
> I'll test and send patches soon, thanks.
One missing bit clear, and I corrected that last comment. Just base on
this one, thanks!
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 4c9a494c9f9f..0b6a4b2d7e76 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1342,6 +1342,7 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
if (cqe) {
clear_bit(0, &ctx->sq_check_overflow);
clear_bit(0, &ctx->cq_check_overflow);
+ ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW;
}
spin_unlock_irqrestore(&ctx->completion_lock, flags);
io_cqring_ev_posted(ctx);
@@ -1379,6 +1380,7 @@ static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags)
if (list_empty(&ctx->cq_overflow_list)) {
set_bit(0, &ctx->sq_check_overflow);
set_bit(0, &ctx->cq_check_overflow);
+ ctx->rings->sq_flags |= IORING_SQ_CQ_OVERFLOW;
}
req->flags |= REQ_F_OVERFLOW;
refcount_inc(&req->refs);
@@ -6375,9 +6377,9 @@ static int io_sq_thread(void *data)
}
/* Tell userspace we may need a wakeup call */
+ spin_lock_irq(&ctx->completion_lock);
ctx->rings->sq_flags |= IORING_SQ_NEED_WAKEUP;
- /* make sure to read SQ tail after writing flags */
- smp_mb();
+ spin_unlock_irq(&ctx->completion_lock);
to_submit = io_sqring_entries(ctx);
if (!to_submit || ret == -EBUSY) {
@@ -6400,7 +6402,9 @@ static int io_sq_thread(void *data)
}
finish_wait(&ctx->sqo_wait, &wait);
+ spin_lock_irq(&ctx->completion_lock);
ctx->rings->sq_flags &= ~IORING_SQ_NEED_WAKEUP;
+ spin_unlock_irq(&ctx->completion_lock);
}
mutex_lock(&ctx->uring_lock);
@@ -7810,6 +7814,7 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
if (list_empty(&ctx->cq_overflow_list)) {
clear_bit(0, &ctx->sq_check_overflow);
clear_bit(0, &ctx->cq_check_overflow);
+ ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW;
}
spin_unlock_irq(&ctx->completion_lock);
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 8d033961cb78..d65fde732518 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -198,6 +198,7 @@ struct io_sqring_offsets {
* sq_ring->flags
*/
#define IORING_SQ_NEED_WAKEUP (1U << 0) /* needs io_uring_enter wakeup */
+#define IORING_SQ_CQ_OVERFLOW (1U << 1) /* CQ ring is overflown */
struct io_cqring_offsets {
__u32 head;
--
Jens Axboe
next prev parent reply other threads:[~2020-07-08 15:43 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-07-07 13:24 [PATCH] io_uring: export cq overflow status to userspace Xiaoguang Wang
2020-07-07 14:28 ` Jens Axboe
2020-07-07 16:21 ` Jens Axboe
2020-07-07 16:25 ` Pavel Begunkov
2020-07-07 16:30 ` Jens Axboe
2020-07-07 16:36 ` Xiaoguang Wang
2020-07-07 17:23 ` Jens Axboe
2020-07-08 3:25 ` Xiaoguang Wang
2020-07-08 3:46 ` Jens Axboe
2020-07-08 5:29 ` Xiaoguang Wang
2020-07-08 15:29 ` Jens Axboe
2020-07-08 15:39 ` Xiaoguang Wang
2020-07-08 15:41 ` Jens Axboe [this message]
2020-07-08 16:51 ` Xiaoguang Wang
2020-07-08 21:33 ` Jens Axboe
2020-07-09 0:52 ` Xiaoguang Wang
2020-07-07 16:29 ` Xiaoguang Wang
2020-07-07 16:30 ` Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox