public inbox for [email protected]
 help / color / mirror / Atom feed
From: Pavel Begunkov <[email protected]>
To: [email protected]
Cc: Jens Axboe <[email protected]>, [email protected]
Subject: [PATCH 9/9] io_uring/net: zerocopy sendmsg
Date: Wed, 21 Sep 2022 12:17:54 +0100	[thread overview]
Message-ID: <6aabc4bdfc0ec78df6ec9328137e394af9d4e7ef.1663668091.git.asml.silence@gmail.com> (raw)
In-Reply-To: <[email protected]>

Add a zerocopy version of sendmsg.

Signed-off-by: Pavel Begunkov <[email protected]>
---
 include/uapi/linux/io_uring.h |  1 +
 io_uring/net.c                | 92 +++++++++++++++++++++++++++++++++--
 io_uring/net.h                |  1 +
 io_uring/opdef.c              | 19 ++++++++
 4 files changed, 108 insertions(+), 5 deletions(-)

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 972b179bc07a..92f29d9505a6 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -213,6 +213,7 @@ enum io_uring_op {
 	IORING_OP_SOCKET,
 	IORING_OP_URING_CMD,
 	IORING_OP_SEND_ZC,
+	IORING_OP_SENDMSG_ZC,
 
 	/* this goes last, obviously */
 	IORING_OP_LAST,
diff --git a/io_uring/net.c b/io_uring/net.c
index 4bcc2675001f..d086f397d4d0 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -909,7 +909,12 @@ int io_recv(struct io_kiocb *req, unsigned int issue_flags)
 void io_send_zc_cleanup(struct io_kiocb *req)
 {
 	struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
+	struct io_async_msghdr *io;
 
+	if (req_has_async_data(req)) {
+		io = req->async_data;
+		kfree(io->free_iov);
+	}
 	zc->notif->flags |= REQ_F_CQE_SKIP;
 	io_notif_flush(zc->notif);
 	zc->notif = NULL;
@@ -921,8 +926,7 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 	struct io_ring_ctx *ctx = req->ctx;
 	struct io_kiocb *notif;
 
-	if (READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3) ||
-	    READ_ONCE(sqe->__pad3[0]))
+	if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3)))
 		return -EINVAL;
 	/* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */
 	if (req->flags & REQ_F_CQE_SKIP)
@@ -941,6 +945,19 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 		req->imu = READ_ONCE(ctx->user_bufs[idx]);
 		io_req_set_rsrc_node(req, ctx, 0);
 	}
+
+	if (req->opcode == IORING_OP_SEND_ZC) {
+		if (READ_ONCE(sqe->__pad3[0]))
+			return -EINVAL;
+		zc->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2));
+		zc->addr_len = READ_ONCE(sqe->addr_len);
+	} else {
+		if (unlikely(sqe->addr2 || sqe->file_index))
+			return -EINVAL;
+		if (unlikely(zc->flags & IORING_RECVSEND_FIXED_BUF))
+			return -EINVAL;
+	}
+
 	notif = zc->notif = io_alloc_notif(ctx);
 	if (!notif)
 		return -ENOMEM;
@@ -955,8 +972,6 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 	if (zc->msg_flags & MSG_DONTWAIT)
 		req->flags |= REQ_F_NOWAIT;
 
-	zc->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2));
-	zc->addr_len = READ_ONCE(sqe->addr_len);
 	zc->done_io = 0;
 
 #ifdef CONFIG_COMPAT
@@ -1118,6 +1133,73 @@ int io_send_zc(struct io_kiocb *req, unsigned int issue_flags)
 	return IOU_OK;
 }
 
+int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags)
+{
+	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+	struct io_async_msghdr iomsg, *kmsg;
+	struct socket *sock;
+	unsigned flags, cflags;
+	int ret, min_ret = 0;
+
+	sock = sock_from_file(req->file);
+	if (unlikely(!sock))
+		return -ENOTSOCK;
+
+	if (req_has_async_data(req)) {
+		kmsg = req->async_data;
+	} else {
+		ret = io_sendmsg_copy_hdr(req, &iomsg);
+		if (ret)
+			return ret;
+		kmsg = &iomsg;
+	}
+
+	if (!(req->flags & REQ_F_POLLED) &&
+	    (sr->flags & IORING_RECVSEND_POLL_FIRST))
+		return io_setup_async_msg(req, kmsg, issue_flags);
+
+	flags = sr->msg_flags | MSG_ZEROCOPY;
+	if (issue_flags & IO_URING_F_NONBLOCK)
+		flags |= MSG_DONTWAIT;
+	if (flags & MSG_WAITALL)
+		min_ret = iov_iter_count(&kmsg->msg.msg_iter);
+
+	kmsg->msg.msg_ubuf = &io_notif_to_data(sr->notif)->uarg;
+	kmsg->msg.sg_from_iter = io_sg_from_iter_iovec;
+	ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
+
+	if (unlikely(ret < min_ret)) {
+		if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
+			return io_setup_async_msg(req, kmsg, issue_flags);
+
+		if (ret > 0 && io_net_retry(sock, flags)) {
+			sr->done_io += ret;
+			req->flags |= REQ_F_PARTIAL_IO;
+			return io_setup_async_msg(req, kmsg, issue_flags);
+		}
+		if (ret < 0 && !sr->done_io)
+			sr->notif->flags |= REQ_F_CQE_SKIP;
+		if (ret == -ERESTARTSYS)
+			ret = -EINTR;
+		req_set_fail(req);
+	}
+	/* fast path, check for non-NULL to avoid function call */
+	if (kmsg->free_iov)
+		kfree(kmsg->free_iov);
+
+	io_netmsg_recycle(req, issue_flags);
+	if (ret >= 0)
+		ret += sr->done_io;
+	else if (sr->done_io)
+		ret = sr->done_io;
+
+	io_notif_flush(sr->notif);
+	req->flags &= ~REQ_F_NEED_CLEANUP;
+	cflags = ret >= 0 ? IORING_CQE_F_MORE : 0;
+	io_req_set_res(req, ret, cflags);
+	return IOU_OK;
+}
+
 void io_sendrecv_fail(struct io_kiocb *req)
 {
 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
@@ -1127,7 +1209,7 @@ void io_sendrecv_fail(struct io_kiocb *req)
 	if (req->flags & REQ_F_PARTIAL_IO)
 		res = sr->done_io;
 	if ((req->flags & REQ_F_NEED_CLEANUP) &&
-	    req->opcode == IORING_OP_SEND_ZC) {
+	    (req->opcode == IORING_OP_SEND_ZC || req->opcode == IORING_OP_SENDMSG_ZC)) {
 		/* preserve notification for partial I/O */
 		if (res < 0)
 			sr->notif->flags |= REQ_F_CQE_SKIP;
diff --git a/io_uring/net.h b/io_uring/net.h
index 45558e2b0a83..5ffa11bf5d2e 100644
--- a/io_uring/net.h
+++ b/io_uring/net.h
@@ -57,6 +57,7 @@ int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
 int io_connect(struct io_kiocb *req, unsigned int issue_flags);
 
 int io_send_zc(struct io_kiocb *req, unsigned int issue_flags);
+int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags);
 int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
 void io_send_zc_cleanup(struct io_kiocb *req);
 
diff --git a/io_uring/opdef.c b/io_uring/opdef.c
index e9fd940c2ee1..3c930fef255b 100644
--- a/io_uring/opdef.c
+++ b/io_uring/opdef.c
@@ -503,6 +503,25 @@ const struct io_op_def io_op_defs[] = {
 		.fail			= io_sendrecv_fail,
 #else
 		.prep			= io_eopnotsupp_prep,
+#endif
+	},
+	[IORING_OP_SENDMSG_ZC] = {
+		.name			= "SENDMSG_ZC",
+		.needs_file		= 1,
+		.unbound_nonreg_file	= 1,
+		.pollout		= 1,
+		.audit_skip		= 1,
+		.ioprio			= 1,
+		.manual_alloc		= 1,
+#if defined(CONFIG_NET)
+		.async_size		= sizeof(struct io_async_msghdr),
+		.prep			= io_send_zc_prep,
+		.issue			= io_sendmsg_zc,
+		.prep_async		= io_sendmsg_prep_async,
+		.cleanup		= io_send_zc_cleanup,
+		.fail			= io_sendrecv_fail,
+#else
+		.prep			= io_eopnotsupp_prep,
 #endif
 	},
 };
-- 
2.37.2


  parent reply	other threads:[~2022-09-21 11:21 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-09-21 11:17 [PATCH for-next 0/9] mix of 6.1 net patches Pavel Begunkov
2022-09-21 11:17 ` [PATCH 1/9] io_uring: add custom opcode hooks on fail Pavel Begunkov
2022-09-21 11:17 ` [PATCH 2/9] io_uring/rw: don't lose partial IO result " Pavel Begunkov
2022-09-21 11:17 ` [PATCH 3/9] io_uring/net: don't lose partial send/recv " Pavel Begunkov
2022-09-21 11:17 ` [PATCH 4/9] io_uring/net: don't lose partial send_zc " Pavel Begunkov
2022-09-21 11:17 ` [PATCH 5/9] io_uring/net: refactor io_setup_async_addr Pavel Begunkov
2022-09-21 11:17 ` [PATCH 6/9] io_uring/net: support non-zerocopy sendto Pavel Begunkov
2022-09-21 11:17 ` [PATCH 7/9] io_uring/net: rename io_sendzc() Pavel Begunkov
2022-09-21 11:17 ` [PATCH 8/9] io_uring/net: combine fail handlers Pavel Begunkov
2022-09-21 11:17 ` Pavel Begunkov [this message]
2022-09-21 16:37 ` [PATCH for-next 0/9] mix of 6.1 net patches Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=6aabc4bdfc0ec78df6ec9328137e394af9d4e7ef.1663668091.git.asml.silence@gmail.com \
    [email protected] \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox