public inbox for [email protected]
 help / color / mirror / Atom feed
From: Pavel Begunkov <[email protected]>
To: [email protected], [email protected],
	[email protected]
Cc: Jakub Kicinski <[email protected]>,
	Jonathan Lemon <[email protected]>,
	"David S . Miller" <[email protected]>,
	Willem de Bruijn <[email protected]>,
	Eric Dumazet <[email protected]>,
	David Ahern <[email protected]>, Jens Axboe <[email protected]>,
	Pavel Begunkov <[email protected]>
Subject: [RFC v2 10/19] io_uring: add send notifiers registration
Date: Tue, 21 Dec 2021 15:35:32 +0000	[thread overview]
Message-ID: <4cfeb88dc07fcdff2c0c864c031bb32b61439674.1640029579.git.asml.silence@gmail.com> (raw)
In-Reply-To: <[email protected]>

Add IORING_REGISTER_TX_CTX and IORING_UNREGISTER_TX_CTX. Transmission
(i.e. send) context will serve be used to notify the userspace when
fixed buffers used for zerocopy sends are released by the kernel.

Notification of a single tx context lives in generations, where each
generation posts one CQE with ->user_data equal to the specified tag and
->res is a generation number starting from 0. All requests issued
against a ctx will get attached to the current generation of
notifications. Then, the userspace will be able to request to flush the
notification allowing it to post a CQE when all buffers of all requests
attached to it are released by the kernel. It'll also switch the
generation to a new one with a sequence number incremented by one.

Signed-off-by: Pavel Begunkov <[email protected]>
---
 fs/io_uring.c                 | 72 +++++++++++++++++++++++++++++++++++
 include/uapi/linux/io_uring.h |  7 ++++
 2 files changed, 79 insertions(+)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 59380e3454ad..a01f91e70fa5 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -94,6 +94,8 @@
 #define IORING_MAX_CQ_ENTRIES	(2 * IORING_MAX_ENTRIES)
 #define IORING_SQPOLL_CAP_ENTRIES_VALUE 8
 
+#define IORING_MAX_TX_NOTIFIERS	(1U << 10)
+
 /* only define max */
 #define IORING_MAX_FIXED_FILES	(1U << 15)
 #define IORING_MAX_RESTRICTIONS	(IORING_RESTRICTION_LAST + \
@@ -326,6 +328,15 @@ struct io_submit_state {
 	struct blk_plug		plug;
 };
 
+struct io_tx_notifier {
+};
+
+struct io_tx_ctx {
+	struct io_tx_notifier	*notifier;
+	u64			tag;
+	u32			seq;
+};
+
 struct io_ring_ctx {
 	/* const or read-mostly hot data */
 	struct {
@@ -373,6 +384,8 @@ struct io_ring_ctx {
 		unsigned		nr_user_files;
 		unsigned		nr_user_bufs;
 		struct io_mapped_ubuf	**user_bufs;
+		struct io_tx_ctx	*tx_ctxs;
+		unsigned		nr_tx_ctxs;
 
 		struct io_submit_state	submit_state;
 		struct list_head	timeout_list;
@@ -9199,6 +9212,55 @@ static int io_buffer_validate(struct iovec *iov)
 	return 0;
 }
 
+static int io_sqe_tx_ctx_unregister(struct io_ring_ctx *ctx)
+{
+	if (!ctx->nr_tx_ctxs)
+		return -ENXIO;
+
+	kvfree(ctx->tx_ctxs);
+	ctx->tx_ctxs = NULL;
+	ctx->nr_tx_ctxs = 0;
+	return 0;
+}
+
+static int io_sqe_tx_ctx_register(struct io_ring_ctx *ctx,
+				  void __user *arg, unsigned int nr_args)
+{
+	struct io_uring_tx_ctx_register __user *tx_args = arg;
+	struct io_uring_tx_ctx_register tx_arg;
+	unsigned i;
+	int ret;
+
+	if (ctx->nr_tx_ctxs)
+		return -EBUSY;
+	if (!nr_args)
+		return -EINVAL;
+	if (nr_args > IORING_MAX_TX_NOTIFIERS)
+		return -EMFILE;
+
+	ctx->tx_ctxs = kvcalloc(nr_args, sizeof(ctx->tx_ctxs[0]),
+				GFP_KERNEL_ACCOUNT);
+	if (!ctx->tx_ctxs)
+		return -ENOMEM;
+
+	for (i = 0; i < nr_args; i++, ctx->nr_tx_ctxs++) {
+		struct io_tx_ctx *tx_ctx = &ctx->tx_ctxs[i];
+
+		if (copy_from_user(&tx_arg, &tx_args[i], sizeof(tx_arg))) {
+			ret = -EFAULT;
+			goto out_fput;
+		}
+		tx_ctx->tag = tx_arg.tag;
+	}
+	return 0;
+
+out_fput:
+	kvfree(ctx->tx_ctxs);
+	ctx->tx_ctxs = NULL;
+	ctx->nr_tx_ctxs = 0;
+	return ret;
+}
+
 static int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
 				   unsigned int nr_args, u64 __user *tags)
 {
@@ -9429,6 +9491,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
 #endif
 	WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list));
 
+	io_sqe_tx_ctx_unregister(ctx);
 	io_mem_free(ctx->rings);
 	io_mem_free(ctx->sq_sqes);
 
@@ -11104,6 +11167,15 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 			break;
 		ret = io_register_iowq_max_workers(ctx, arg);
 		break;
+	case IORING_REGISTER_TX_CTX:
+		ret = io_sqe_tx_ctx_register(ctx, arg, nr_args);
+		break;
+	case IORING_UNREGISTER_TX_CTX:
+		ret = -EINVAL;
+		if (arg || nr_args)
+			break;
+		ret = io_sqe_tx_ctx_unregister(ctx);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 787f491f0d2a..f2e8d18e40e0 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -325,6 +325,9 @@ enum {
 	/* set/get max number of io-wq workers */
 	IORING_REGISTER_IOWQ_MAX_WORKERS	= 19,
 
+	IORING_REGISTER_TX_CTX			= 20,
+	IORING_UNREGISTER_TX_CTX		= 21,
+
 	/* this goes last */
 	IORING_REGISTER_LAST
 };
@@ -365,6 +368,10 @@ struct io_uring_rsrc_update2 {
 	__u32 resv2;
 };
 
+struct io_uring_tx_ctx_register {
+	__u64 tag;
+};
+
 /* Skip updating fd indexes set to this value in the fd table */
 #define IORING_REGISTER_FILES_SKIP	(-2)
 
-- 
2.34.1


  parent reply	other threads:[~2021-12-21 15:36 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-12-21 15:35 [RFC v2 00/19] io_uring zerocopy tx Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 01/19] skbuff: add SKBFL_DONT_ORPHAN flag Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 02/19] skbuff: pass a struct ubuf_info in msghdr Pavel Begunkov
2022-01-11 13:51   ` Hao Xu
2022-01-11 15:50     ` Pavel Begunkov
2022-01-12  3:39       ` Hao Xu
2022-01-12 16:53         ` Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 03/19] net: add zerocopy_sg_from_iter for bvec Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 04/19] net: optimise page get/free for bvec zc Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 05/19] net: don't track pfmemalloc for zc registered mem Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 06/19] ipv4/udp: add support msgdr::msg_ubuf Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 07/19] ipv6/udp: " Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 08/19] ipv4: avoid partial copy for zc Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 09/19] ipv6: " Pavel Begunkov
2021-12-21 15:35 ` Pavel Begunkov [this message]
2021-12-21 15:35 ` [RFC v2 11/19] io_uring: infrastructure for send zc notifications Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 12/19] io_uring: wire send zc request type Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 13/19] io_uring: add an option to flush zc notifications Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 14/19] io_uring: opcode independent fixed buf import Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 15/19] io_uring: sendzc with fixed buffers Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 16/19] io_uring: cache struct ubuf_info Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 17/19] io_uring: unclog ctx refs waiting with zc notifiers Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 18/19] io_uring: task_work for notification delivery Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 19/19] io_uring: optimise task referencing by notifiers Pavel Begunkov
2021-12-21 15:43 ` [RFC v2 00/19] io_uring zerocopy tx Pavel Begunkov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4cfeb88dc07fcdff2c0c864c031bb32b61439674.1640029579.git.asml.silence@gmail.com \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox