public inbox for [email protected]
 help / color / mirror / Atom feed
* [PATCH 1/2] io_uring: ensure recv and recvmsg handle MSG_WAITALL correctly
  2022-03-23 15:39 [PATCHSET 0/2] Fix MSG_WAITALL for IORING_OP_RECV/RECVMSG Jens Axboe
@ 2022-03-23 15:39 ` Jens Axboe
  2022-03-23 20:13   ` Pavel Begunkov
  0 siblings, 1 reply; 8+ messages in thread
From: Jens Axboe @ 2022-03-23 15:39 UTC (permalink / raw)
  To: io-uring; +Cc: constantine.gavrilov, Jens Axboe, stable

We currently don't attempt to get the full asked for length even if
MSG_WAITALL is set, if we get a partial receive. If we do see a partial
receive, then just note how many bytes we did and return -EAGAIN to
get it retried.

The iov is advanced appropriately for the vector based case, and we
manually bump the buffer and remainder for the non-vector case.

Cc: [email protected]
Reported-by: Constantine Gavrilov <[email protected]>
Signed-off-by: Jens Axboe <[email protected]>
---
 fs/io_uring.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index f41d91ce1fd0..2cd67b4ff924 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -612,6 +612,7 @@ struct io_sr_msg {
 	int				msg_flags;
 	int				bgid;
 	size_t				len;
+	size_t				done_io;
 };
 
 struct io_open {
@@ -5417,12 +5418,14 @@ static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 	if (req->ctx->compat)
 		sr->msg_flags |= MSG_CMSG_COMPAT;
 #endif
+	sr->done_io = 0;
 	return 0;
 }
 
 static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
 {
 	struct io_async_msghdr iomsg, *kmsg;
+	struct io_sr_msg *sr = &req->sr_msg;
 	struct socket *sock;
 	struct io_buffer *kbuf;
 	unsigned flags;
@@ -5465,6 +5468,10 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
 			return io_setup_async_msg(req, kmsg);
 		if (ret == -ERESTARTSYS)
 			ret = -EINTR;
+		if (ret > 0 && flags & MSG_WAITALL) {
+			sr->done_io += ret;
+			return io_setup_async_msg(req, kmsg);
+		}
 		req_set_fail(req);
 	} else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
 		req_set_fail(req);
@@ -5474,6 +5481,10 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
 	if (kmsg->free_iov)
 		kfree(kmsg->free_iov);
 	req->flags &= ~REQ_F_NEED_CLEANUP;
+	if (ret >= 0)
+		ret += sr->done_io;
+	else if (sr->done_io)
+		ret = sr->done_io;
 	__io_req_complete(req, issue_flags, ret, io_put_kbuf(req, issue_flags));
 	return 0;
 }
@@ -5524,12 +5535,22 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
 			return -EAGAIN;
 		if (ret == -ERESTARTSYS)
 			ret = -EINTR;
+		if (ret > 0 && flags & MSG_WAITALL) {
+			sr->len -= ret;
+			sr->buf += ret;
+			sr->done_io += ret;
+			return -EAGAIN;
+		}
 		req_set_fail(req);
 	} else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
 out_free:
 		req_set_fail(req);
 	}
 
+	if (ret >= 0)
+		ret += sr->done_io;
+	else if (sr->done_io)
+		ret = sr->done_io;
 	__io_req_complete(req, issue_flags, ret, io_put_kbuf(req, issue_flags));
 	return 0;
 }
-- 
2.35.1



^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] io_uring: ensure recv and recvmsg handle MSG_WAITALL correctly
  2022-03-23 15:39 ` [PATCH 1/2] io_uring: ensure recv and recvmsg handle MSG_WAITALL correctly Jens Axboe
@ 2022-03-23 20:13   ` Pavel Begunkov
  2022-03-23 20:15     ` Jens Axboe
  2022-03-23 20:45     ` Constantine Gavrilov
  0 siblings, 2 replies; 8+ messages in thread
From: Pavel Begunkov @ 2022-03-23 20:13 UTC (permalink / raw)
  To: Jens Axboe, io-uring; +Cc: constantine.gavrilov, stable

On 3/23/22 15:39, Jens Axboe wrote:
> We currently don't attempt to get the full asked for length even if
> MSG_WAITALL is set, if we get a partial receive. If we do see a partial
> receive, then just note how many bytes we did and return -EAGAIN to
> get it retried.
> 
> The iov is advanced appropriately for the vector based case, and we
> manually bump the buffer and remainder for the non-vector case.

How datagrams work with MSG_WAITALL? I highly doubt it coalesces 2+
packets to satisfy the length requirement (e.g. because it may move
the address back into the userspace). I'm mainly afraid about
breaking io_uring users who are using the flag just to fail links
when there is not enough data in a packet.

-- 
Pavel Begunkov


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] io_uring: ensure recv and recvmsg handle MSG_WAITALL correctly
  2022-03-23 20:13   ` Pavel Begunkov
@ 2022-03-23 20:15     ` Jens Axboe
  2022-03-23 20:45     ` Constantine Gavrilov
  1 sibling, 0 replies; 8+ messages in thread
From: Jens Axboe @ 2022-03-23 20:15 UTC (permalink / raw)
  To: Pavel Begunkov, io-uring; +Cc: constantine.gavrilov, stable

On 3/23/22 2:13 PM, Pavel Begunkov wrote:
> On 3/23/22 15:39, Jens Axboe wrote:
>> We currently don't attempt to get the full asked for length even if
>> MSG_WAITALL is set, if we get a partial receive. If we do see a partial
>> receive, then just note how many bytes we did and return -EAGAIN to
>> get it retried.
>>
>> The iov is advanced appropriately for the vector based case, and we
>> manually bump the buffer and remainder for the non-vector case.
> 
> How datagrams work with MSG_WAITALL? I highly doubt it coalesces 2+
> packets to satisfy the length requirement (e.g. because it may move
> the address back into the userspace). I'm mainly afraid about
> breaking io_uring users who are using the flag just to fail links
> when there is not enough data in a packet.

Yes was thinking that too, nothing is final yet.. Need to write a
SOCK_STREAM test case.

-- 
Jens Axboe



^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] io_uring: ensure recv and recvmsg handle MSG_WAITALL correctly
  2022-03-23 20:13   ` Pavel Begunkov
  2022-03-23 20:15     ` Jens Axboe
@ 2022-03-23 20:45     ` Constantine Gavrilov
  2022-03-23 20:52       ` Pavel Begunkov
  1 sibling, 1 reply; 8+ messages in thread
From: Constantine Gavrilov @ 2022-03-23 20:45 UTC (permalink / raw)
  To: Pavel Begunkov; +Cc: Jens Axboe, io-uring, stable

On Wed, Mar 23, 2022 at 10:14 PM Pavel Begunkov <[email protected]> wrote:
>
> On 3/23/22 15:39, Jens Axboe wrote:
> > We currently don't attempt to get the full asked for length even if
> > MSG_WAITALL is set, if we get a partial receive. If we do see a partial
> > receive, then just note how many bytes we did and return -EAGAIN to
> > get it retried.
> >
> > The iov is advanced appropriately for the vector based case, and we
> > manually bump the buffer and remainder for the non-vector case.
>
> How datagrams work with MSG_WAITALL? I highly doubt it coalesces 2+
> packets to satisfy the length requirement (e.g. because it may move
> the address back into the userspace). I'm mainly afraid about
> breaking io_uring users who are using the flag just to fail links
> when there is not enough data in a packet.
>
> --
> Pavel Begunkov

Pavel:

Datagrams have message boundaries and the MSG_WAITALL flag does not
make sense there. I believe it is ignored by receive code on daragram
sockets. MSG_WAITALL makes sends only on stream sockets, like TCP. The
manual page says "This flag has  no  effect  for datagram sockets.".

-- 
----------------------------------------
Constantine Gavrilov
Storage Architect
Master Inventor
Tel-Aviv IBM Storage Lab
1 Azrieli Center, Tel-Aviv
----------------------------------------


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] io_uring: ensure recv and recvmsg handle MSG_WAITALL correctly
  2022-03-23 20:45     ` Constantine Gavrilov
@ 2022-03-23 20:52       ` Pavel Begunkov
  0 siblings, 0 replies; 8+ messages in thread
From: Pavel Begunkov @ 2022-03-23 20:52 UTC (permalink / raw)
  To: Constantine Gavrilov; +Cc: Jens Axboe, io-uring, stable

On 3/23/22 20:45, Constantine Gavrilov wrote:
> On Wed, Mar 23, 2022 at 10:14 PM Pavel Begunkov <[email protected]> wrote:
>>
>> On 3/23/22 15:39, Jens Axboe wrote:
>>> We currently don't attempt to get the full asked for length even if
>>> MSG_WAITALL is set, if we get a partial receive. If we do see a partial
>>> receive, then just note how many bytes we did and return -EAGAIN to
>>> get it retried.
>>>
>>> The iov is advanced appropriately for the vector based case, and we
>>> manually bump the buffer and remainder for the non-vector case.
>>
>> How datagrams work with MSG_WAITALL? I highly doubt it coalesces 2+
>> packets to satisfy the length requirement (e.g. because it may move
>> the address back into the userspace). I'm mainly afraid about
>> breaking io_uring users who are using the flag just to fail links
>> when there is not enough data in a packet.
>>
>> --
>> Pavel Begunkov
> 
> Pavel:
> 
> Datagrams have message boundaries and the MSG_WAITALL flag does not
> make sense there. I believe it is ignored by receive code on daragram
> sockets. MSG_WAITALL makes sends only on stream sockets, like TCP. The
> manual page says "This flag has  no  effect  for datagram sockets.".

Missed the line this in mans, thanks, and it's exactly as expected.
The problem is on the io_uring side where with the patch it might
blindly do a second call into the network stack consuming 2+ packets.

-- 
Pavel Begunkov


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCHSET v2 0/2] Fix MSG_WAITALL for IORING_OP_RECV/RECVMSG
@ 2022-03-23 22:41 Jens Axboe
  2022-03-23 22:41 ` [PATCH 1/2] io_uring: ensure recv and recvmsg handle MSG_WAITALL correctly Jens Axboe
  2022-03-23 22:41 ` [PATCH 2/2] io_uring: add flag for disabling provided buffer recycling Jens Axboe
  0 siblings, 2 replies; 8+ messages in thread
From: Jens Axboe @ 2022-03-23 22:41 UTC (permalink / raw)
  To: io-uring; +Cc: constantine.gavrilov

Hi,

If we get a partial receive, we don't retry even if MSG_WAITALL is set.
Ensure that we retry for the remainder in that case.

The ordering of patches may look a bit odd here, but it's done this way
to make it easier to handle for the stable backport.

v2:
- Only do it for SOCK_STREAM/SOCK_SEQPACKET

-- 
Jens Axboe




^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 1/2] io_uring: ensure recv and recvmsg handle MSG_WAITALL correctly
  2022-03-23 22:41 [PATCHSET v2 0/2] Fix MSG_WAITALL for IORING_OP_RECV/RECVMSG Jens Axboe
@ 2022-03-23 22:41 ` Jens Axboe
  2022-03-23 22:41 ` [PATCH 2/2] io_uring: add flag for disabling provided buffer recycling Jens Axboe
  1 sibling, 0 replies; 8+ messages in thread
From: Jens Axboe @ 2022-03-23 22:41 UTC (permalink / raw)
  To: io-uring; +Cc: constantine.gavrilov, Jens Axboe, stable

We currently don't attempt to get the full asked for length even if
MSG_WAITALL is set, if we get a partial receive. If we do see a partial
receive, then just note how many bytes we did and return -EAGAIN to
get it retried.

The iov is advanced appropriately for the vector based case, and we
manually bump the buffer and remainder for the non-vector case.

Cc: [email protected]
Reported-by: Constantine Gavrilov <[email protected]>
Signed-off-by: Jens Axboe <[email protected]>
---
 fs/io_uring.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index f41d91ce1fd0..a70de170aea1 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -612,6 +612,7 @@ struct io_sr_msg {
 	int				msg_flags;
 	int				bgid;
 	size_t				len;
+	size_t				done_io;
 };
 
 struct io_open {
@@ -5417,12 +5418,21 @@ static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 	if (req->ctx->compat)
 		sr->msg_flags |= MSG_CMSG_COMPAT;
 #endif
+	sr->done_io = 0;
 	return 0;
 }
 
+static bool io_net_retry(struct socket *sock, int flags)
+{
+	if (!(flags & MSG_WAITALL))
+		return false;
+	return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET;
+}
+
 static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
 {
 	struct io_async_msghdr iomsg, *kmsg;
+	struct io_sr_msg *sr = &req->sr_msg;
 	struct socket *sock;
 	struct io_buffer *kbuf;
 	unsigned flags;
@@ -5465,6 +5475,10 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
 			return io_setup_async_msg(req, kmsg);
 		if (ret == -ERESTARTSYS)
 			ret = -EINTR;
+		if (ret > 0 && io_net_retry(sock, flags)) {
+			sr->done_io += ret;
+			return io_setup_async_msg(req, kmsg);
+		}
 		req_set_fail(req);
 	} else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
 		req_set_fail(req);
@@ -5474,6 +5488,10 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
 	if (kmsg->free_iov)
 		kfree(kmsg->free_iov);
 	req->flags &= ~REQ_F_NEED_CLEANUP;
+	if (ret >= 0)
+		ret += sr->done_io;
+	else if (sr->done_io)
+		ret = sr->done_io;
 	__io_req_complete(req, issue_flags, ret, io_put_kbuf(req, issue_flags));
 	return 0;
 }
@@ -5524,12 +5542,22 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
 			return -EAGAIN;
 		if (ret == -ERESTARTSYS)
 			ret = -EINTR;
+		if (ret > 0 && io_net_retry(sock, flags)) {
+			sr->len -= ret;
+			sr->buf += ret;
+			sr->done_io += ret;
+			return -EAGAIN;
+		}
 		req_set_fail(req);
 	} else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
 out_free:
 		req_set_fail(req);
 	}
 
+	if (ret >= 0)
+		ret += sr->done_io;
+	else if (sr->done_io)
+		ret = sr->done_io;
 	__io_req_complete(req, issue_flags, ret, io_put_kbuf(req, issue_flags));
 	return 0;
 }
-- 
2.35.1



^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/2] io_uring: add flag for disabling provided buffer recycling
  2022-03-23 22:41 [PATCHSET v2 0/2] Fix MSG_WAITALL for IORING_OP_RECV/RECVMSG Jens Axboe
  2022-03-23 22:41 ` [PATCH 1/2] io_uring: ensure recv and recvmsg handle MSG_WAITALL correctly Jens Axboe
@ 2022-03-23 22:41 ` Jens Axboe
  1 sibling, 0 replies; 8+ messages in thread
From: Jens Axboe @ 2022-03-23 22:41 UTC (permalink / raw)
  To: io-uring; +Cc: constantine.gavrilov, Jens Axboe

If we need to continue doing this IO, then we don't want a potentially
selected buffer recycled. Add a flag for that.

Set this for recv/recvmsg if they do partial IO.

Signed-off-by: Jens Axboe <[email protected]>
---
 fs/io_uring.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index a70de170aea1..88556e654c5a 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -783,6 +783,7 @@ enum {
 	REQ_F_SKIP_LINK_CQES_BIT,
 	REQ_F_SINGLE_POLL_BIT,
 	REQ_F_DOUBLE_POLL_BIT,
+	REQ_F_PARTIAL_IO_BIT,
 	/* keep async read/write and isreg together and in order */
 	REQ_F_SUPPORT_NOWAIT_BIT,
 	REQ_F_ISREG_BIT,
@@ -845,6 +846,8 @@ enum {
 	REQ_F_SINGLE_POLL	= BIT(REQ_F_SINGLE_POLL_BIT),
 	/* double poll may active */
 	REQ_F_DOUBLE_POLL	= BIT(REQ_F_DOUBLE_POLL_BIT),
+	/* request has already done partial IO */
+	REQ_F_PARTIAL_IO	= BIT(REQ_F_PARTIAL_IO_BIT),
 };
 
 struct async_poll {
@@ -1392,6 +1395,9 @@ static void io_kbuf_recycle(struct io_kiocb *req, unsigned issue_flags)
 
 	if (likely(!(req->flags & REQ_F_BUFFER_SELECTED)))
 		return;
+	/* don't recycle if we already did IO to this buffer */
+	if (req->flags & REQ_F_PARTIAL_IO)
+		return;
 
 	if (issue_flags & IO_URING_F_UNLOCKED)
 		mutex_lock(&ctx->uring_lock);
@@ -5477,6 +5483,7 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
 			ret = -EINTR;
 		if (ret > 0 && io_net_retry(sock, flags)) {
 			sr->done_io += ret;
+			req->flags |= REQ_F_PARTIAL_IO;
 			return io_setup_async_msg(req, kmsg);
 		}
 		req_set_fail(req);
@@ -5546,6 +5553,7 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
 			sr->len -= ret;
 			sr->buf += ret;
 			sr->done_io += ret;
+			req->flags |= REQ_F_PARTIAL_IO;
 			return -EAGAIN;
 		}
 		req_set_fail(req);
-- 
2.35.1



^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2022-03-23 22:41 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-03-23 22:41 [PATCHSET v2 0/2] Fix MSG_WAITALL for IORING_OP_RECV/RECVMSG Jens Axboe
2022-03-23 22:41 ` [PATCH 1/2] io_uring: ensure recv and recvmsg handle MSG_WAITALL correctly Jens Axboe
2022-03-23 22:41 ` [PATCH 2/2] io_uring: add flag for disabling provided buffer recycling Jens Axboe
  -- strict thread matches above, loose matches on Subject: below --
2022-03-23 15:39 [PATCHSET 0/2] Fix MSG_WAITALL for IORING_OP_RECV/RECVMSG Jens Axboe
2022-03-23 15:39 ` [PATCH 1/2] io_uring: ensure recv and recvmsg handle MSG_WAITALL correctly Jens Axboe
2022-03-23 20:13   ` Pavel Begunkov
2022-03-23 20:15     ` Jens Axboe
2022-03-23 20:45     ` Constantine Gavrilov
2022-03-23 20:52       ` Pavel Begunkov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox