public inbox for [email protected]
 help / color / mirror / Atom feed
From: Jens Axboe <[email protected]>
To: [email protected]
Cc: Jens Axboe <[email protected]>
Subject: [PATCH 08/17] io_uring/net: add iovec recycling
Date: Wed, 20 Mar 2024 16:55:23 -0600	[thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>

Right now the io_async_msghdr is recycled to avoid the overhead of
allocating+freeing it for every request. But the iovec is not included,
hence that will be allocated and freed for each transfer regardless.
This commit enables recyling of the iovec between io_async_msghdr
recycles. This avoids alloc+free for each one if we use it, and on top
of that, it extends the cache hot nature of msg to the iovec as well.

Also enables KASAN for the iovec entries, so that reuse can be detected
even while they are in the cache.

The io_async_msghdr also shrinks from 376 -> 288 bytes, an 88 byte
saving (or ~23% smaller), as the fast_iovec entry is dropped from 8
entries to a single entry. There's no point keeping a big fast iovec
entry, if we're not allocating and freeing new iovecs all the time.

Signed-off-by: Jens Axboe <[email protected]>
---
 io_uring/net.c | 133 ++++++++++++++++++++++++++++++++-----------------
 io_uring/net.h |  13 ++---
 2 files changed, 93 insertions(+), 53 deletions(-)

diff --git a/io_uring/net.c b/io_uring/net.c
index 6b45311dcc08..20d6427f4250 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -115,15 +115,33 @@ static bool io_net_retry(struct socket *sock, int flags)
 	return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET;
 }
 
+static void io_netmsg_iovec_free(struct io_async_msghdr *kmsg)
+{
+	if (kmsg->free_iov) {
+		kfree(kmsg->free_iov);
+		kmsg->free_iov_nr = 0;
+		kmsg->free_iov = NULL;
+	}
+}
+
 static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags)
 {
 	struct io_async_msghdr *hdr = req->async_data;
+	struct iovec *iov;
 
-	if (!req_has_async_data(req) || issue_flags & IO_URING_F_UNLOCKED)
+	if (!req_has_async_data(req))
+		return;
+	/* can't recycle, ensure we free the iovec if we have one */
+	if (issue_flags & IO_URING_F_UNLOCKED) {
+		io_netmsg_iovec_free(hdr);
 		return;
+	}
 
 	/* Let normal cleanup path reap it if we fail adding to the cache */
+	iov = hdr->free_iov;
 	if (io_alloc_cache_put(&req->ctx->netmsg_cache, &hdr->cache)) {
+		if (iov)
+			kasan_mempool_poison_object(iov);
 		req->async_data = NULL;
 		req->flags &= ~REQ_F_ASYNC_DATA;
 	}
@@ -138,7 +156,11 @@ static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req)
 	entry = io_alloc_cache_get(&ctx->netmsg_cache);
 	if (entry) {
 		hdr = container_of(entry, struct io_async_msghdr, cache);
-		hdr->free_iov = NULL;
+		if (hdr->free_iov) {
+			kasan_mempool_unpoison_object(hdr->free_iov,
+				hdr->free_iov_nr * sizeof(struct iovec));
+			req->flags |= REQ_F_NEED_CLEANUP;
+		}
 		req->flags |= REQ_F_ASYNC_DATA;
 		req->async_data = hdr;
 		return hdr;
@@ -146,12 +168,27 @@ static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req)
 
 	if (!io_alloc_async_data(req)) {
 		hdr = req->async_data;
+		hdr->free_iov_nr = 0;
 		hdr->free_iov = NULL;
 		return hdr;
 	}
 	return NULL;
 }
 
+/* assign new iovec to kmsg, if we need to */
+static int io_net_vec_assign(struct io_kiocb *req, struct io_async_msghdr *kmsg,
+			     struct iovec *iov)
+{
+	if (iov) {
+		req->flags |= REQ_F_NEED_CLEANUP;
+		kmsg->free_iov_nr = kmsg->msg.msg_iter.nr_segs;
+		if (kmsg->free_iov)
+			kfree(kmsg->free_iov);
+		kmsg->free_iov = iov;
+	}
+	return 0;
+}
+
 #ifdef CONFIG_COMPAT
 static int io_compat_msg_copy_hdr(struct io_kiocb *req,
 				  struct io_async_msghdr *iomsg,
@@ -159,7 +196,16 @@ static int io_compat_msg_copy_hdr(struct io_kiocb *req,
 {
 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 	struct compat_iovec __user *uiov;
-	int ret;
+	struct iovec *iov;
+	int ret, nr_segs;
+
+	if (iomsg->free_iov) {
+		nr_segs = iomsg->free_iov_nr;
+		iov = iomsg->free_iov;
+	} else {
+		iov = &iomsg->fast_iov;
+		nr_segs = 1;
+	}
 
 	if (copy_from_user(msg, sr->umsg_compat, sizeof(*msg)))
 		return -EFAULT;
@@ -168,9 +214,9 @@ static int io_compat_msg_copy_hdr(struct io_kiocb *req,
 	if (req->flags & REQ_F_BUFFER_SELECT) {
 		compat_ssize_t clen;
 
-		iomsg->free_iov = NULL;
 		if (msg->msg_iovlen == 0) {
-			sr->len = 0;
+			sr->len = iov->iov_len = 0;
+			iov->iov_base = NULL;
 		} else if (msg->msg_iovlen > 1) {
 			return -EINVAL;
 		} else {
@@ -186,14 +232,12 @@ static int io_compat_msg_copy_hdr(struct io_kiocb *req,
 		return 0;
 	}
 
-	iomsg->free_iov = iomsg->fast_iov;
 	ret = __import_iovec(ddir, (struct iovec __user *)uiov, msg->msg_iovlen,
-				UIO_FASTIOV, &iomsg->free_iov,
-				&iomsg->msg.msg_iter, true);
+				nr_segs, &iov, &iomsg->msg.msg_iter, true);
 	if (unlikely(ret < 0))
 		return ret;
 
-	return 0;
+	return io_net_vec_assign(req, iomsg, iov);
 }
 #endif
 
@@ -201,7 +245,16 @@ static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg,
 			   struct user_msghdr *msg, int ddir)
 {
 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
-	int ret;
+	struct iovec *iov;
+	int ret, nr_segs;
+
+	if (iomsg->free_iov) {
+		nr_segs = iomsg->free_iov_nr;
+		iov = iomsg->free_iov;
+	} else {
+		iov = &iomsg->fast_iov;
+		nr_segs = 1;
+	}
 
 	if (!user_access_begin(sr->umsg, sizeof(*sr->umsg)))
 		return -EFAULT;
@@ -217,9 +270,8 @@ static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg,
 
 	if (req->flags & REQ_F_BUFFER_SELECT) {
 		if (msg->msg_iovlen == 0) {
-			sr->len = iomsg->fast_iov[0].iov_len = 0;
-			iomsg->fast_iov[0].iov_base = NULL;
-			iomsg->free_iov = NULL;
+			sr->len = iov->iov_len = 0;
+			iov->iov_base = NULL;
 		} else if (msg->msg_iovlen > 1) {
 			ret = -EINVAL;
 			goto ua_end;
@@ -227,10 +279,9 @@ static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg,
 			/* we only need the length for provided buffers */
 			if (!access_ok(&msg->msg_iov[0].iov_len, sizeof(__kernel_size_t)))
 				goto ua_end;
-			unsafe_get_user(iomsg->fast_iov[0].iov_len,
-					&msg->msg_iov[0].iov_len, ua_end);
-			sr->len = iomsg->fast_iov[0].iov_len;
-			iomsg->free_iov = NULL;
+			unsafe_get_user(iov->iov_len, &msg->msg_iov[0].iov_len,
+					ua_end);
+			sr->len = iov->iov_len;
 		}
 		ret = 0;
 ua_end:
@@ -239,13 +290,12 @@ static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg,
 	}
 
 	user_access_end();
-	iomsg->free_iov = iomsg->fast_iov;
-	ret = __import_iovec(ddir, msg->msg_iov, msg->msg_iovlen, UIO_FASTIOV,
-				&iomsg->free_iov, &iomsg->msg.msg_iter, false);
+	ret = __import_iovec(ddir, msg->msg_iov, msg->msg_iovlen, nr_segs,
+				&iov, &iomsg->msg.msg_iter, false);
 	if (unlikely(ret < 0))
 		return ret;
 
-	return 0;
+	return io_net_vec_assign(req, iomsg, iov);
 }
 
 static int io_sendmsg_copy_hdr(struct io_kiocb *req,
@@ -285,7 +335,7 @@ void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req)
 {
 	struct io_async_msghdr *io = req->async_data;
 
-	kfree(io->free_iov);
+	io_netmsg_iovec_free(io);
 }
 
 static int io_send_setup(struct io_kiocb *req)
@@ -369,9 +419,6 @@ static void io_req_msg_cleanup(struct io_kiocb *req,
 			       unsigned int issue_flags)
 {
 	req->flags &= ~REQ_F_NEED_CLEANUP;
-	/* fast path, check for non-NULL to avoid function call */
-	if (kmsg->free_iov)
-		kfree(kmsg->free_iov);
 	io_netmsg_recycle(req, issue_flags);
 }
 
@@ -622,11 +669,6 @@ static inline void io_recv_prep_retry(struct io_kiocb *req,
 {
 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 
-	if (kmsg->free_iov) {
-		kfree(kmsg->free_iov);
-		kmsg->free_iov = NULL;
-	}
-
 	req->flags &= ~REQ_F_BL_EMPTY;
 	sr->done_io = 0;
 	sr->len = 0; /* get from the provided buffer */
@@ -942,14 +984,10 @@ int io_recv(struct io_kiocb *req, unsigned int issue_flags)
 void io_send_zc_cleanup(struct io_kiocb *req)
 {
 	struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
-	struct io_async_msghdr *io;
+	struct io_async_msghdr *io = req->async_data;
 
-	if (req_has_async_data(req)) {
-		io = req->async_data;
-		/* might be ->fast_iov if *msg_copy_hdr failed */
-		if (io->free_iov != io->fast_iov)
-			kfree(io->free_iov);
-	}
+	if (req_has_async_data(req))
+		io_netmsg_iovec_free(io);
 	if (zc->notif) {
 		io_notif_flush(zc->notif);
 		zc->notif = NULL;
@@ -1171,8 +1209,7 @@ int io_send_zc(struct io_kiocb *req, unsigned int issue_flags)
 	 */
 	if (!(issue_flags & IO_URING_F_UNLOCKED)) {
 		io_notif_flush(zc->notif);
-		io_netmsg_recycle(req, issue_flags);
-		req->flags &= ~REQ_F_NEED_CLEANUP;
+		io_req_msg_cleanup(req, kmsg, 0);
 	}
 	io_req_set_res(req, ret, IORING_CQE_F_MORE);
 	return IOU_OK;
@@ -1221,13 +1258,7 @@ int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags)
 			ret = -EINTR;
 		req_set_fail(req);
 	}
-	/* fast path, check for non-NULL to avoid function call */
-	if (kmsg->free_iov) {
-		kfree(kmsg->free_iov);
-		kmsg->free_iov = NULL;
-	}
 
-	io_netmsg_recycle(req, issue_flags);
 	if (ret >= 0)
 		ret += sr->done_io;
 	else if (sr->done_io)
@@ -1239,7 +1270,7 @@ int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags)
 	 */
 	if (!(issue_flags & IO_URING_F_UNLOCKED)) {
 		io_notif_flush(sr->notif);
-		req->flags &= ~REQ_F_NEED_CLEANUP;
+		io_req_msg_cleanup(req, kmsg, 0);
 	}
 	io_req_set_res(req, ret, IORING_CQE_F_MORE);
 	return IOU_OK;
@@ -1483,6 +1514,14 @@ int io_connect(struct io_kiocb *req, unsigned int issue_flags)
 
 void io_netmsg_cache_free(struct io_cache_entry *entry)
 {
-	kfree(container_of(entry, struct io_async_msghdr, cache));
+	struct io_async_msghdr *kmsg;
+
+	kmsg = container_of(entry, struct io_async_msghdr, cache);
+	if (kmsg->free_iov) {
+		kasan_mempool_unpoison_object(kmsg->free_iov,
+				kmsg->free_iov_nr * sizeof(struct iovec));
+		io_netmsg_iovec_free(kmsg);
+	}
+	kfree(kmsg);
 }
 #endif
diff --git a/io_uring/net.h b/io_uring/net.h
index f99ebb9dc0bb..0aef1c992aee 100644
--- a/io_uring/net.h
+++ b/io_uring/net.h
@@ -8,17 +8,18 @@
 struct io_async_msghdr {
 #if defined(CONFIG_NET)
 	union {
-		struct iovec		fast_iov[UIO_FASTIOV];
+		struct iovec			fast_iov;
 		struct {
-			struct iovec	fast_iov_one;
-			__kernel_size_t	controllen;
-			int		namelen;
-			__kernel_size_t	payloadlen;
+			struct io_cache_entry	cache;
+			/* entry size of ->free_iov, if valid */
+			int			free_iov_nr;
 		};
-		struct io_cache_entry	cache;
 	};
 	/* points to an allocated iov, if NULL we use fast_iov instead */
 	struct iovec			*free_iov;
+	__kernel_size_t			controllen;
+	__kernel_size_t			payloadlen;
+	int				namelen;
 	struct sockaddr __user		*uaddr;
 	struct msghdr			msg;
 	struct sockaddr_storage		addr;
-- 
2.43.0


  parent reply	other threads:[~2024-03-20 22:58 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-03-20 22:55 [PATCHSET v2 0/17] Improve async state handling Jens Axboe
2024-03-20 22:55 ` [PATCH 01/17] io_uring/net: switch io_send() and io_send_zc() to using io_async_msghdr Jens Axboe
2024-04-06 20:58   ` Pavel Begunkov
2024-04-07 21:47     ` Jens Axboe
2024-03-20 22:55 ` [PATCH 02/17] io_uring/net: switch io_recv() " Jens Axboe
2024-03-20 22:55 ` [PATCH 03/17] io_uring/net: unify cleanup handling Jens Axboe
2024-03-20 22:55 ` [PATCH 04/17] io_uring/net: always setup an io_async_msghdr Jens Axboe
2024-03-20 22:55 ` [PATCH 05/17] io_uring/net: get rid of ->prep_async() for receive side Jens Axboe
2024-03-20 22:55 ` [PATCH 06/17] io_uring/net: get rid of ->prep_async() for send side Jens Axboe
2024-03-20 22:55 ` [PATCH 07/17] io_uring: kill io_msg_alloc_async_prep() Jens Axboe
2024-03-20 22:55 ` Jens Axboe [this message]
2024-03-20 22:55 ` [PATCH 09/17] io_uring/net: drop 'kmsg' parameter from io_req_msg_cleanup() Jens Axboe
2024-03-20 22:55 ` [PATCH 10/17] io_uring/rw: always setup io_async_rw for read/write requests Jens Axboe
2024-03-25 12:03   ` Anuj gupta
2024-03-25 14:54     ` Jens Axboe
2024-03-20 22:55 ` [PATCH 11/17] io_uring: get rid of struct io_rw_state Jens Axboe
2024-03-20 22:55 ` [PATCH 12/17] io_uring/rw: add iovec recycling Jens Axboe
2024-03-20 22:55 ` [PATCH 13/17] io_uring/net: move connect to always using async data Jens Axboe
2024-03-20 22:55 ` [PATCH 14/17] io_uring/uring_cmd: switch to always allocating " Jens Axboe
2024-03-20 22:55 ` [PATCH 15/17] io_uring/uring_cmd: defer SQE copying until we need it Jens Axboe
2024-03-25 12:41   ` Anuj gupta
2024-03-25 14:55     ` Jens Axboe
2024-03-20 22:55 ` [PATCH 16/17] io_uring: drop ->prep_async() Jens Axboe
2024-04-06 20:54   ` Pavel Begunkov
2024-04-07 21:46     ` Jens Axboe
2024-03-20 22:55 ` [PATCH 17/17] io_uring/alloc_cache: switch to array based caching Jens Axboe
2024-03-21 15:59   ` Gabriel Krisman Bertazi
2024-03-21 16:38     ` Jens Axboe
2024-03-21 17:20       ` Gabriel Krisman Bertazi
2024-03-21 17:22         ` Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    [email protected] \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox