From: Stefan Metzmacher <[email protected]>
To: Pavel Begunkov <[email protected]>,
io-uring <[email protected]>, Jens Axboe <[email protected]>
Cc: Jakub Kicinski <[email protected]>, netdev <[email protected]>,
Dylan Yudaken <[email protected]>
Subject: IORING_SEND_NOTIF_REPORT_USAGE (was Re: IORING_CQE_F_COPIED)
Date: Thu, 20 Oct 2022 12:04:53 +0200 [thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>
Hi Pavel,
> Yep, sth like that should do, but let's guard against
> spurious net_zcopy_put() just in case.
>
> used = false;
> copied = false;
>
> callback(skb, success, ubuf) {
> if (skb)
> used = true;
> if (!success)
> copied = true;
> }
> complete() {
> if (!used || copied)
> set_flag(IORING_CQE_F_COPIED);
> }
>
>> And __io_notif_complete_tw still needs:
>>
>> if (!nd->zero_copied)
>> notif->cqe.flags |= IORING_CQE_F_COPIED;
>
> Which can be shoved in a custom callback
>
Ok, got the idea.
> I'm more concerned about future changes around it, but there won't
> be extra ifs.
>
> #define COMMON_FLAGS (RECVSEND_FIRST_POLL|...)
> #define ALL_FLAGS (COMMON_FLAGS|RECVSEND_PROBE)
>
> if (flags & ~COMMON_FLAGS) {
> if (flags & ~ALL_FLAGS)
> return err;
> if (flags & RECVSEND_PROBE)
> set_callback(notif);
> }
So far I came up with a IORING_SEND_NOTIF_REPORT_USAGE opt-in flag
and the reporting is done in cqe.res with IORING_NOTIF_USAGE_ZC_USED (0x00000001)
and/or IORING_NOTIF_USAGE_ZC_COPIED (0x8000000). So the caller is also
able to notice that some parts were able to use zero copy, while other
fragments were copied.
I haven't tested it yet, but I want to post it early...
What do you think?
metze
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index ab7458033ee3..751fc4eff8d1 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -296,10 +296,28 @@ enum io_uring_op {
*
* IORING_RECVSEND_FIXED_BUF Use registered buffers, the index is stored in
* the buf_index field.
+ *
+ * IORING_SEND_NOTIF_REPORT_USAGE
+ * If SEND[MSG]_ZC should report
+ * the zerocopy usage in cqe.res
+ * for the IORING_CQE_F_NOTIF cqe.
+ * IORING_NOTIF_USAGE_ZC_USED if zero copy was used
+ * (at least partially).
+ * IORING_NOTIF_USAGE_ZC_COPIED if data was copied
+ * (at least partially).
*/
#define IORING_RECVSEND_POLL_FIRST (1U << 0)
#define IORING_RECV_MULTISHOT (1U << 1)
#define IORING_RECVSEND_FIXED_BUF (1U << 2)
+#define IORING_SEND_NOTIF_REPORT_USAGE (1U << 3)
+
+/*
+ * cqe.res for IORING_CQE_F_NOTIF if
+ * IORING_SEND_NOTIF_REPORT_USAGE was requested
+ */
+#define IORING_NOTIF_USAGE_ZC_USED (1U << 0)
+#define IORING_NOTIF_USAGE_ZC_COPIED (1U << 31)
+
/*
* accept flags stored in sqe->ioprio
diff --git a/io_uring/net.c b/io_uring/net.c
index 735eec545115..a79d7d349e19 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -946,9 +946,11 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
zc->flags = READ_ONCE(sqe->ioprio);
if (zc->flags & ~(IORING_RECVSEND_POLL_FIRST |
- IORING_RECVSEND_FIXED_BUF))
+ IORING_RECVSEND_FIXED_BUF |
+ IORING_SEND_NOTIF_REPORT_USAGE))
return -EINVAL;
- notif = zc->notif = io_alloc_notif(ctx);
+ notif = zc->notif = io_alloc_notif(ctx,
+ zc->flags & IORING_SEND_NOTIF_REPORT_USAGE);
if (!notif)
return -ENOMEM;
notif->cqe.user_data = req->cqe.user_data;
diff --git a/io_uring/notif.c b/io_uring/notif.c
index e37c6569d82e..3844e3c8ad7e 100644
--- a/io_uring/notif.c
+++ b/io_uring/notif.c
@@ -3,13 +3,14 @@
#include <linux/file.h>
#include <linux/slab.h>
#include <linux/net.h>
+#include <linux/errqueue.h>
#include <linux/io_uring.h>
#include "io_uring.h"
#include "notif.h"
#include "rsrc.h"
-static void __io_notif_complete_tw(struct io_kiocb *notif, bool *locked)
+static inline void __io_notif_complete_tw(struct io_kiocb *notif, bool *locked)
{
struct io_notif_data *nd = io_notif_to_data(notif);
struct io_ring_ctx *ctx = notif->ctx;
@@ -21,20 +22,46 @@ static void __io_notif_complete_tw(struct io_kiocb *notif, bool *locked)
io_req_task_complete(notif, locked);
}
-static void io_uring_tx_zerocopy_callback(struct sk_buff *skb,
- struct ubuf_info *uarg,
- bool success)
+static inline void io_uring_tx_zerocopy_callback(struct sk_buff *skb,
+ struct ubuf_info *uarg,
+ bool success)
{
struct io_notif_data *nd = container_of(uarg, struct io_notif_data, uarg);
struct io_kiocb *notif = cmd_to_io_kiocb(nd);
if (refcount_dec_and_test(&uarg->refcnt)) {
- notif->io_task_work.func = __io_notif_complete_tw;
io_req_task_work_add(notif);
}
}
-struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx)
+static void __io_notif_complete_tw_report_usage(struct io_kiocb *notif, bool *locked)
+{
+ struct io_notif_data *nd = io_notif_to_data(notif);
+
+ if (likely(nd->zc_used))
+ notif->cqe.res |= IORING_NOTIF_USAGE_ZC_USED;
+
+ if (unlikely(nd->zc_copied))
+ notif->cqe.res |= IORING_NOTIF_USAGE_ZC_COPIED;
+
+ __io_notif_complete_tw(notif, locked);
+}
+
+static void io_uring_tx_zerocopy_callback_report_usage(struct sk_buff *skb,
+ struct ubuf_info *uarg,
+ bool success)
+{
+ struct io_notif_data *nd = container_of(uarg, struct io_notif_data, uarg);
+
+ if (success && !nd->zc_used && skb)
+ nd->zc_used = true;
+ else if (unlikely(!success && !nd->zc_copied))
+ nd->zc_copied = true;
+
+ io_uring_tx_zerocopy_callback(skb, uarg, success);
+}
+
+struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx, bool report_usage)
__must_hold(&ctx->uring_lock)
{
struct io_kiocb *notif;
@@ -54,7 +81,14 @@ struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx)
nd = io_notif_to_data(notif);
nd->account_pages = 0;
nd->uarg.flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN;
- nd->uarg.callback = io_uring_tx_zerocopy_callback;
+ if (report_usage) {
+ nd->zc_used = nd->zc_copied = false;
+ nd->uarg.callback = io_uring_tx_zerocopy_callback_report_usage;
+ notif->io_task_work.func = __io_notif_complete_tw_report_usage;
+ } else {
+ nd->uarg.callback = io_uring_tx_zerocopy_callback;
+ notif->io_task_work.func = __io_notif_complete_tw;
+ }
refcount_set(&nd->uarg.refcnt, 1);
return notif;
}
@@ -66,7 +100,6 @@ void io_notif_flush(struct io_kiocb *notif)
/* drop slot's master ref */
if (refcount_dec_and_test(&nd->uarg.refcnt)) {
- notif->io_task_work.func = __io_notif_complete_tw;
io_req_task_work_add(notif);
}
}
diff --git a/io_uring/notif.h b/io_uring/notif.h
index 5b4d710c8ca5..5ac7a2745e52 100644
--- a/io_uring/notif.h
+++ b/io_uring/notif.h
@@ -13,10 +13,12 @@ struct io_notif_data {
struct file *file;
struct ubuf_info uarg;
unsigned long account_pages;
+ bool zc_used;
+ bool zc_copied;
};
void io_notif_flush(struct io_kiocb *notif);
-struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx);
+struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx, bool report_usage);
static inline struct io_notif_data *io_notif_to_data(struct io_kiocb *notif)
{
next prev parent reply other threads:[~2022-10-20 10:05 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-10-14 11:06 IORING_CQE_F_COPIED Stefan Metzmacher
2022-10-17 16:46 ` IORING_CQE_F_COPIED Pavel Begunkov
2022-10-18 8:43 ` IORING_CQE_F_COPIED Stefan Metzmacher
2022-10-19 15:06 ` IORING_CQE_F_COPIED Pavel Begunkov
2022-10-19 16:12 ` IORING_CQE_F_COPIED Stefan Metzmacher
2022-10-20 2:24 ` IORING_CQE_F_COPIED Pavel Begunkov
2022-10-20 10:04 ` Stefan Metzmacher [this message]
2022-10-20 13:46 ` IORING_SEND_NOTIF_REPORT_USAGE (was Re: IORING_CQE_F_COPIED) Pavel Begunkov
2022-10-20 14:51 ` Stefan Metzmacher
2022-10-20 15:31 ` Pavel Begunkov
2022-10-21 9:36 ` Stefan Metzmacher
2022-10-21 11:09 ` Pavel Begunkov
2022-10-21 14:03 ` Stefan Metzmacher
2022-10-27 8:47 ` Stefan Metzmacher
2022-10-27 10:51 ` Pavel Begunkov
2022-10-20 10:10 ` IORING_SEND_NOTIF_USER_DATA " Stefan Metzmacher
2022-10-20 15:37 ` Pavel Begunkov
2022-10-21 8:32 ` Stefan Metzmacher
2022-10-21 9:27 ` Pavel Begunkov
2022-10-21 9:45 ` Stefan Metzmacher
2022-10-21 11:20 ` Pavel Begunkov
2022-10-21 12:10 ` Stefan Metzmacher
2022-10-21 10:15 ` Stefan Metzmacher
2022-10-21 11:26 ` Pavel Begunkov
2022-10-21 12:38 ` Stefan Metzmacher
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox