public inbox for [email protected]
 help / color / mirror / Atom feed
* [PATCH 5/5] io_uring: add IORING_OP_WAITID support
  2023-07-11 20:43 [PATCHSET 0/5] Add io_uring support for waitid Jens Axboe
@ 2023-07-11 20:43 ` Jens Axboe
  2023-07-11 21:11   ` Arnd Bergmann
  0 siblings, 1 reply; 21+ messages in thread
From: Jens Axboe @ 2023-07-11 20:43 UTC (permalink / raw)
  To: io-uring, linux-kernel; +Cc: brauner, arnd, Jens Axboe

This adds support for an async version of waitid(2), in a fully async
version. If an event isn't immediately available, wait for a callback
to trigger a retry.

The format of the sqe is as follows:

sqe->len		The 'which', the idtype being queried/waited for.
sqe->fd			The 'pid' (or id) being waited for.
sqe->file_index		The 'options' being set.
sqe->addr2		A pointer to siginfo_t, if any, being filled in.

buf_index, add3, and waitid_flags are reserved/unused for now.
waitid_flags will be used for options for this request type. One
interesting use case may be to add multi-shot support, so that the
request stays armed and posts a notification every time a monitored
process state change occurs.

Note that this does not support rusage, on Arnd's recommendation.

See the waitid(2) man page for details on the arguments.

Signed-off-by: Jens Axboe <[email protected]>
---
 include/linux/io_uring_types.h |   2 +
 include/uapi/linux/io_uring.h  |   2 +
 io_uring/Makefile              |   2 +-
 io_uring/cancel.c              |   5 +
 io_uring/io_uring.c            |   3 +
 io_uring/opdef.c               |   9 ++
 io_uring/waitid.c              | 271 +++++++++++++++++++++++++++++++++
 io_uring/waitid.h              |  15 ++
 8 files changed, 308 insertions(+), 1 deletion(-)
 create mode 100644 io_uring/waitid.c
 create mode 100644 io_uring/waitid.h

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index a7f03d8d879f..598553877fc2 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -276,6 +276,8 @@ struct io_ring_ctx {
 	struct hlist_head	futex_list;
 	struct io_alloc_cache	futex_cache;
 
+	struct hlist_head	waitid_list;
+
 	const struct cred	*sq_creds;	/* cred used for __io_sq_thread() */
 	struct io_sq_data	*sq_data;	/* if using sq thread polling */
 
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 420f38675769..8fca2cffc343 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -66,6 +66,7 @@ struct io_uring_sqe {
 		__u32		msg_ring_flags;
 		__u32		uring_cmd_flags;
 		__u32		futex_flags;
+		__u32		waitid_flags;
 	};
 	__u64	user_data;	/* data to be passed back at completion time */
 	/* pack this to avoid bogus arm OABI complaints */
@@ -239,6 +240,7 @@ enum io_uring_op {
 	IORING_OP_FUTEX_WAIT,
 	IORING_OP_FUTEX_WAKE,
 	IORING_OP_FUTEX_WAITV,
+	IORING_OP_WAITID,
 
 	/* this goes last, obviously */
 	IORING_OP_LAST,
diff --git a/io_uring/Makefile b/io_uring/Makefile
index 2e4779bc550c..e5be47e4fc3b 100644
--- a/io_uring/Makefile
+++ b/io_uring/Makefile
@@ -8,6 +8,6 @@ obj-$(CONFIG_IO_URING)		+= io_uring.o xattr.o nop.o fs.o splice.o \
 					statx.o net.o msg_ring.o timeout.o \
 					sqpoll.o fdinfo.o tctx.o poll.o \
 					cancel.o kbuf.o rsrc.o rw.o opdef.o \
-					notif.o
+					notif.o waitid.o
 obj-$(CONFIG_IO_WQ)		+= io-wq.o
 obj-$(CONFIG_FUTEX)		+= futex.o
diff --git a/io_uring/cancel.c b/io_uring/cancel.c
index 3dba8ccb1cd8..a01f3f41012b 100644
--- a/io_uring/cancel.c
+++ b/io_uring/cancel.c
@@ -16,6 +16,7 @@
 #include "poll.h"
 #include "timeout.h"
 #include "futex.h"
+#include "waitid.h"
 #include "cancel.h"
 
 struct io_cancel {
@@ -124,6 +125,10 @@ int io_try_cancel(struct io_uring_task *tctx, struct io_cancel_data *cd,
 	if (ret != -ENOENT)
 		return ret;
 
+	ret = io_waitid_cancel(ctx, cd, issue_flags);
+	if (ret != -ENOENT)
+		return ret;
+
 	spin_lock(&ctx->completion_lock);
 	if (!(cd->flags & IORING_ASYNC_CANCEL_FD))
 		ret = io_timeout_cancel(ctx, cd);
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 67ff148bc394..6d99d51b84e6 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -93,6 +93,7 @@
 #include "net.h"
 #include "notif.h"
 #include "futex.h"
+#include "waitid.h"
 
 #include "timeout.h"
 #include "poll.h"
@@ -336,6 +337,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 	ctx->submit_state.free_list.next = NULL;
 	INIT_WQ_LIST(&ctx->locked_free_list);
 	INIT_HLIST_HEAD(&ctx->futex_list);
+	INIT_HLIST_HEAD(&ctx->waitid_list);
 	INIT_DELAYED_WORK(&ctx->fallback_work, io_fallback_req_func);
 	INIT_WQ_LIST(&ctx->submit_state.compl_reqs);
 	return ctx;
@@ -3259,6 +3261,7 @@ static __cold bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
 	mutex_lock(&ctx->uring_lock);
 	ret |= io_poll_remove_all(ctx, task, cancel_all);
 	ret |= io_futex_remove_all(ctx, task, cancel_all);
+	ret |= io_waitid_remove_all(ctx, task, cancel_all);
 	mutex_unlock(&ctx->uring_lock);
 	ret |= io_kill_timeouts(ctx, task, cancel_all);
 	if (task)
diff --git a/io_uring/opdef.c b/io_uring/opdef.c
index 2034acfe10d0..2fbdf6a6c24a 100644
--- a/io_uring/opdef.c
+++ b/io_uring/opdef.c
@@ -34,6 +34,7 @@
 #include "cancel.h"
 #include "rw.h"
 #include "futex.h"
+#include "waitid.h"
 
 static int io_no_issue(struct io_kiocb *req, unsigned int issue_flags)
 {
@@ -453,6 +454,10 @@ const struct io_issue_def io_issue_defs[] = {
 		.prep			= io_eopnotsupp_prep,
 #endif
 	},
+	[IORING_OP_WAITID] = {
+		.prep			= io_waitid_prep,
+		.issue			= io_waitid,
+	},
 };
 
 const struct io_cold_def io_cold_defs[] = {
@@ -681,6 +686,10 @@ const struct io_cold_def io_cold_defs[] = {
 	[IORING_OP_FUTEX_WAITV] = {
 		.name			= "FUTEX_WAITV",
 	},
+	[IORING_OP_WAITID] = {
+		.name			= "WAITID",
+		.async_size		= sizeof(struct io_waitid_async),
+	},
 };
 
 const char *io_uring_get_opcode(u8 opcode)
diff --git a/io_uring/waitid.c b/io_uring/waitid.c
new file mode 100644
index 000000000000..8d6ac22113dd
--- /dev/null
+++ b/io_uring/waitid.c
@@ -0,0 +1,271 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Support for async notification of waitid
+ */
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/io_uring.h>
+
+#include <uapi/linux/io_uring.h>
+
+#include "io_uring.h"
+#include "cancel.h"
+#include "waitid.h"
+#include "../kernel/exit.h"
+
+struct io_waitid {
+	struct file *file;
+	int which;
+	pid_t upid;
+	int options;
+	struct wait_queue_head *head;
+	struct siginfo __user *infop;
+	struct waitid_info info;
+};
+
+static void io_waitid_free(struct io_kiocb *req)
+{
+	struct io_waitid_async *iwa = req->async_data;
+
+	put_pid(iwa->wo.wo_pid);
+	kfree(req->async_data);
+	req->async_data = NULL;
+	req->flags &= ~REQ_F_ASYNC_DATA;
+}
+
+static int io_waitid_finish(struct io_kiocb *req, int ret)
+{
+	struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid);
+	int signo = 0;
+
+	if (ret > 0) {
+		signo = SIGCHLD;
+		ret = 0;
+	}
+	if (!iw->infop)
+		goto done;
+
+	if (!user_write_access_begin(iw->infop, sizeof(*iw->infop))) {
+		ret = -EFAULT;
+		goto done;
+	}
+
+	unsafe_put_user(signo, &iw->infop->si_signo, Efault);
+	unsafe_put_user(0, &iw->infop->si_errno, Efault);
+	unsafe_put_user(iw->info.cause, &iw->infop->si_code, Efault);
+	unsafe_put_user(iw->info.pid, &iw->infop->si_pid, Efault);
+	unsafe_put_user(iw->info.uid, &iw->infop->si_uid, Efault);
+	unsafe_put_user(iw->info.status, &iw->infop->si_status, Efault);
+done:
+	user_write_access_end();
+	io_waitid_free(req);
+	return ret;
+Efault:
+	ret = -EFAULT;
+	goto done;
+}
+
+static void io_waitid_complete(struct io_kiocb *req, int ret)
+{
+	struct io_tw_state ts = { .locked = true };
+
+	lockdep_assert_held(&req->ctx->uring_lock);
+
+	/*
+	 * Did cancel find it meanwhile?
+	 */
+	if (hlist_unhashed(&req->hash_node))
+		return;
+
+	hlist_del_init(&req->hash_node);
+
+	ret = io_waitid_finish(req, ret);
+	if (ret < 0)
+		req_set_fail(req);
+	io_req_set_res(req, ret, 0);
+	io_req_task_complete(req, &ts);
+}
+
+static bool __io_waitid_cancel(struct io_ring_ctx *ctx, struct io_kiocb *req)
+{
+	struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid);
+	struct wait_queue_head *head;
+
+	head = READ_ONCE(iw->head);
+	if (head) {
+		struct io_waitid_async *iwa = req->async_data;
+
+		spin_lock_irq(&head->lock);
+		list_del_init(&iwa->wo.child_wait.entry);
+		iw->head = NULL;
+		spin_unlock_irq(&head->lock);
+		io_waitid_complete(req, -ECANCELED);
+		return true;
+	}
+
+	return false;
+}
+
+int io_waitid_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd,
+		     unsigned int issue_flags)
+{
+	struct hlist_node *tmp;
+	struct io_kiocb *req;
+	int nr = 0;
+
+	if (cd->flags & (IORING_ASYNC_CANCEL_FD|IORING_ASYNC_CANCEL_FD_FIXED))
+		return -ENOENT;
+
+	io_ring_submit_lock(ctx, issue_flags);
+	hlist_for_each_entry_safe(req, tmp, &ctx->waitid_list, hash_node) {
+		if (req->cqe.user_data != cd->data &&
+		    !(cd->flags & IORING_ASYNC_CANCEL_ANY))
+			continue;
+		if (__io_waitid_cancel(ctx, req))
+			nr++;
+		if (!(cd->flags & IORING_ASYNC_CANCEL_ALL))
+			break;
+	}
+	io_ring_submit_unlock(ctx, issue_flags);
+
+	if (nr)
+		return nr;
+
+	return -ENOENT;
+}
+
+bool io_waitid_remove_all(struct io_ring_ctx *ctx, struct task_struct *task,
+			  bool cancel_all)
+{
+	struct hlist_node *tmp;
+	struct io_kiocb *req;
+	bool found = false;
+
+	lockdep_assert_held(&ctx->uring_lock);
+
+	hlist_for_each_entry_safe(req, tmp, &ctx->waitid_list, hash_node) {
+		if (!io_match_task_safe(req, task, cancel_all))
+			continue;
+		__io_waitid_cancel(ctx, req);
+		found = true;
+	}
+
+	return found;
+}
+
+static void io_waitid_cb(struct io_kiocb *req, struct io_tw_state *ts)
+{
+	struct io_waitid_async *iwa = req->async_data;
+	struct io_ring_ctx *ctx = req->ctx;
+	int ret;
+
+	/*
+	 * If we get -ERESTARTSYS here, we need to re-arm and check again
+	 * to ensure we get another callback. If the retry works, then we can
+	 * just remove ourselves from the waitqueue again and finish the
+	 * request.
+	 */
+	ret = __do_wait(&iwa->wo);
+	if (unlikely(ret == -ERESTARTSYS)) {
+		struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid);
+
+		io_tw_lock(ctx, ts);
+		iw->head = &current->signal->wait_chldexit;
+		add_wait_queue(iw->head, &iwa->wo.child_wait);
+		ret = __do_wait(&iwa->wo);
+		if (ret == -ERESTARTSYS)
+			return;
+
+		remove_wait_queue(iw->head, &iwa->wo.child_wait);
+		iw->head = NULL;
+	}
+
+	io_tw_lock(ctx, ts);
+	io_waitid_complete(req, ret);
+}
+
+static int io_waitid_wait(struct wait_queue_entry *wait, unsigned mode,
+			  int sync, void *key)
+{
+	struct wait_opts *wo = container_of(wait, struct wait_opts, child_wait);
+	struct io_waitid_async *iwa = container_of(wo, struct io_waitid_async, wo);
+	struct io_kiocb *req = iwa->req;
+	struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid);
+	struct task_struct *p = key;
+
+	if (!pid_child_should_wake(wo, p))
+		return 0;
+
+	req->io_task_work.func = io_waitid_cb;
+	io_req_task_work_add(req);
+	iw->head = NULL;
+	list_del_init(&wait->entry);
+	return 1;
+}
+
+int io_waitid_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+	struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid);
+
+	if (sqe->addr || sqe->buf_index || sqe->addr3 || sqe->waitid_flags)
+		return -EINVAL;
+
+	iw->which = READ_ONCE(sqe->len);
+	iw->options = READ_ONCE(sqe->file_index);
+	iw->upid = READ_ONCE(sqe->fd);
+	iw->infop = u64_to_user_ptr(READ_ONCE(sqe->addr2));
+	iw->head = NULL;
+	return 0;
+}
+
+int io_waitid(struct io_kiocb *req, unsigned int issue_flags)
+{
+	struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid);
+	struct io_ring_ctx *ctx = req->ctx;
+	struct io_waitid_async *iwa;
+	unsigned int f_flags = 0;
+	int ret;
+
+	if (io_alloc_async_data(req))
+		return -ENOMEM;
+
+	iwa = req->async_data;
+	iwa->req = req;
+
+	ret = kernel_waitid_prepare(&iwa->wo, iw->which, iw->upid, &iw->info,
+					iw->options, NULL, &f_flags);
+	if (ret)
+		goto done;
+
+	/*
+	 * Arm our callback and add us to the waitqueue, in case no events
+	 * are available.
+	 */
+	init_waitqueue_func_entry(&iwa->wo.child_wait, io_waitid_wait);
+	iwa->wo.child_wait.private = req->task;
+	iw->head = &current->signal->wait_chldexit;
+	add_wait_queue(iw->head, &iwa->wo.child_wait);
+
+	io_ring_submit_lock(ctx, issue_flags);
+	hlist_add_head(&req->hash_node, &ctx->waitid_list);
+
+	ret = __do_wait(&iwa->wo);
+	if (ret == -ERESTARTSYS) {
+		io_ring_submit_unlock(ctx, issue_flags);
+		return IOU_ISSUE_SKIP_COMPLETE;
+	}
+
+	hlist_del_init(&req->hash_node);
+	remove_wait_queue(iw->head, &iwa->wo.child_wait);
+	iw->head = NULL;
+	ret = io_waitid_finish(req, ret);
+
+	io_ring_submit_unlock(ctx, issue_flags);
+done:
+	if (ret < 0)
+		req_set_fail(req);
+	io_req_set_res(req, ret, 0);
+	return IOU_OK;
+}
diff --git a/io_uring/waitid.h b/io_uring/waitid.h
new file mode 100644
index 000000000000..956a8adafe8c
--- /dev/null
+++ b/io_uring/waitid.h
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "../kernel/exit.h"
+
+struct io_waitid_async {
+	struct io_kiocb *req;
+	struct wait_opts wo;
+};
+
+int io_waitid_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
+int io_waitid(struct io_kiocb *req, unsigned int issue_flags);
+int io_waitid_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd,
+		     unsigned int issue_flags);
+bool io_waitid_remove_all(struct io_ring_ctx *ctx, struct task_struct *task,
+			  bool cancel_all);
-- 
2.40.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH 5/5] io_uring: add IORING_OP_WAITID support
  2023-07-11 20:43 ` [PATCH 5/5] io_uring: add IORING_OP_WAITID support Jens Axboe
@ 2023-07-11 21:11   ` Arnd Bergmann
  2023-07-11 21:22     ` Jens Axboe
  0 siblings, 1 reply; 21+ messages in thread
From: Arnd Bergmann @ 2023-07-11 21:11 UTC (permalink / raw)
  To: Jens Axboe, io-uring, linux-kernel; +Cc: Christian Brauner

On Tue, Jul 11, 2023, at 22:43, Jens Axboe wrote:
> This adds support for an async version of waitid(2), in a fully async
> version. If an event isn't immediately available, wait for a callback
> to trigger a retry.
>
> The format of the sqe is as follows:
>
> sqe->len		The 'which', the idtype being queried/waited for.
> sqe->fd			The 'pid' (or id) being waited for.
> sqe->file_index		The 'options' being set.
> sqe->addr2		A pointer to siginfo_t, if any, being filled in.
>
> buf_index, add3, and waitid_flags are reserved/unused for now.
> waitid_flags will be used for options for this request type. One
> interesting use case may be to add multi-shot support, so that the
> request stays armed and posts a notification every time a monitored
> process state change occurs.
>
> Note that this does not support rusage, on Arnd's recommendation.
>
> See the waitid(2) man page for details on the arguments.
>
> Signed-off-by: Jens Axboe <[email protected]>

Does this require argument conversion for compat tasks?

Even without the rusage argument, I think the siginfo
remains incompatible with 32-bit tasks, unfortunately.

     Arnd

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 5/5] io_uring: add IORING_OP_WAITID support
  2023-07-11 21:11   ` Arnd Bergmann
@ 2023-07-11 21:22     ` Jens Axboe
  2023-07-11 22:18       ` Jens Axboe
  0 siblings, 1 reply; 21+ messages in thread
From: Jens Axboe @ 2023-07-11 21:22 UTC (permalink / raw)
  To: Arnd Bergmann, io-uring, linux-kernel; +Cc: Christian Brauner

On 7/11/23 3:11?PM, Arnd Bergmann wrote:
> On Tue, Jul 11, 2023, at 22:43, Jens Axboe wrote:
>> This adds support for an async version of waitid(2), in a fully async
>> version. If an event isn't immediately available, wait for a callback
>> to trigger a retry.
>>
>> The format of the sqe is as follows:
>>
>> sqe->len		The 'which', the idtype being queried/waited for.
>> sqe->fd			The 'pid' (or id) being waited for.
>> sqe->file_index		The 'options' being set.
>> sqe->addr2		A pointer to siginfo_t, if any, being filled in.
>>
>> buf_index, add3, and waitid_flags are reserved/unused for now.
>> waitid_flags will be used for options for this request type. One
>> interesting use case may be to add multi-shot support, so that the
>> request stays armed and posts a notification every time a monitored
>> process state change occurs.
>>
>> Note that this does not support rusage, on Arnd's recommendation.
>>
>> See the waitid(2) man page for details on the arguments.
>>
>> Signed-off-by: Jens Axboe <[email protected]>
> 
> Does this require argument conversion for compat tasks?
> 
> Even without the rusage argument, I think the siginfo
> remains incompatible with 32-bit tasks, unfortunately.

Hmm yes good point, if compat_siginfo and siginfo are different, then it
does need handling for that. Would be a trivial addition, I'll make that
change. Thanks Arnd!

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 5/5] io_uring: add IORING_OP_WAITID support
  2023-07-11 21:22     ` Jens Axboe
@ 2023-07-11 22:18       ` Jens Axboe
  2023-07-14 15:47         ` Christian Brauner
  0 siblings, 1 reply; 21+ messages in thread
From: Jens Axboe @ 2023-07-11 22:18 UTC (permalink / raw)
  To: Arnd Bergmann, io-uring, linux-kernel; +Cc: Christian Brauner

On 7/11/23 3:22 PM, Jens Axboe wrote:
> On 7/11/23 3:11?PM, Arnd Bergmann wrote:
>> On Tue, Jul 11, 2023, at 22:43, Jens Axboe wrote:
>>> This adds support for an async version of waitid(2), in a fully async
>>> version. If an event isn't immediately available, wait for a callback
>>> to trigger a retry.
>>>
>>> The format of the sqe is as follows:
>>>
>>> sqe->len		The 'which', the idtype being queried/waited for.
>>> sqe->fd			The 'pid' (or id) being waited for.
>>> sqe->file_index		The 'options' being set.
>>> sqe->addr2		A pointer to siginfo_t, if any, being filled in.
>>>
>>> buf_index, add3, and waitid_flags are reserved/unused for now.
>>> waitid_flags will be used for options for this request type. One
>>> interesting use case may be to add multi-shot support, so that the
>>> request stays armed and posts a notification every time a monitored
>>> process state change occurs.
>>>
>>> Note that this does not support rusage, on Arnd's recommendation.
>>>
>>> See the waitid(2) man page for details on the arguments.
>>>
>>> Signed-off-by: Jens Axboe <[email protected]>
>>
>> Does this require argument conversion for compat tasks?
>>
>> Even without the rusage argument, I think the siginfo
>> remains incompatible with 32-bit tasks, unfortunately.
> 
> Hmm yes good point, if compat_siginfo and siginfo are different, then it
> does need handling for that. Would be a trivial addition, I'll make that
> change. Thanks Arnd!

Should be fixed in the current version:

https://git.kernel.dk/cgit/linux/commit/?h=io_uring-waitid&id=08f3dc9b7cedbd20c0f215f25c9a7814c6c601cc

-- 
Jens Axboe



^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 5/5] io_uring: add IORING_OP_WAITID support
  2023-07-11 22:18       ` Jens Axboe
@ 2023-07-14 15:47         ` Christian Brauner
  2023-07-14 18:33           ` Arnd Bergmann
  0 siblings, 1 reply; 21+ messages in thread
From: Christian Brauner @ 2023-07-14 15:47 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Arnd Bergmann, io-uring, linux-kernel

On Tue, Jul 11, 2023 at 04:18:13PM -0600, Jens Axboe wrote:
> On 7/11/23 3:22 PM, Jens Axboe wrote:
> > On 7/11/23 3:11?PM, Arnd Bergmann wrote:
> >> On Tue, Jul 11, 2023, at 22:43, Jens Axboe wrote:
> >>> This adds support for an async version of waitid(2), in a fully async
> >>> version. If an event isn't immediately available, wait for a callback
> >>> to trigger a retry.
> >>>
> >>> The format of the sqe is as follows:
> >>>
> >>> sqe->len		The 'which', the idtype being queried/waited for.
> >>> sqe->fd			The 'pid' (or id) being waited for.
> >>> sqe->file_index		The 'options' being set.
> >>> sqe->addr2		A pointer to siginfo_t, if any, being filled in.
> >>>
> >>> buf_index, add3, and waitid_flags are reserved/unused for now.
> >>> waitid_flags will be used for options for this request type. One
> >>> interesting use case may be to add multi-shot support, so that the
> >>> request stays armed and posts a notification every time a monitored
> >>> process state change occurs.
> >>>
> >>> Note that this does not support rusage, on Arnd's recommendation.
> >>>
> >>> See the waitid(2) man page for details on the arguments.
> >>>
> >>> Signed-off-by: Jens Axboe <[email protected]>
> >>
> >> Does this require argument conversion for compat tasks?
> >>
> >> Even without the rusage argument, I think the siginfo
> >> remains incompatible with 32-bit tasks, unfortunately.
> > 
> > Hmm yes good point, if compat_siginfo and siginfo are different, then it
> > does need handling for that. Would be a trivial addition, I'll make that
> > change. Thanks Arnd!
> 
> Should be fixed in the current version:
> 
> https://git.kernel.dk/cgit/linux/commit/?h=io_uring-waitid&id=08f3dc9b7cedbd20c0f215f25c9a7814c6c601cc

In kernel/signal.c in pidfd_send_signal() we have
copy_siginfo_from_user_any() it seems that a similar version
copy_siginfo_to_user_any() might be something to consider. We do have
copy_siginfo_to_user32() and copy_siginfo_to_user(). But I may lack
context why this wouldn't work here.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 5/5] io_uring: add IORING_OP_WAITID support
  2023-07-14 15:47         ` Christian Brauner
@ 2023-07-14 18:33           ` Arnd Bergmann
  2023-07-14 20:14             ` Jens Axboe
  0 siblings, 1 reply; 21+ messages in thread
From: Arnd Bergmann @ 2023-07-14 18:33 UTC (permalink / raw)
  To: Christian Brauner, Jens Axboe; +Cc: io-uring, linux-kernel

On Fri, Jul 14, 2023, at 17:47, Christian Brauner wrote:
> On Tue, Jul 11, 2023 at 04:18:13PM -0600, Jens Axboe wrote:
>> On 7/11/23 3:22 PM, Jens Axboe wrote:
>> > On 7/11/23 3:11?PM, Arnd Bergmann wrote:

>> >> Does this require argument conversion for compat tasks?
>> >>
>> >> Even without the rusage argument, I think the siginfo
>> >> remains incompatible with 32-bit tasks, unfortunately.
>> > 
>> > Hmm yes good point, if compat_siginfo and siginfo are different, then it
>> > does need handling for that. Would be a trivial addition, I'll make that
>> > change. Thanks Arnd!
>> 
>> Should be fixed in the current version:
>> 
>> https://git.kernel.dk/cgit/linux/commit/?h=io_uring-waitid&id=08f3dc9b7cedbd20c0f215f25c9a7814c6c601cc
>
> In kernel/signal.c in pidfd_send_signal() we have
> copy_siginfo_from_user_any() it seems that a similar version
> copy_siginfo_to_user_any() might be something to consider. We do have
> copy_siginfo_to_user32() and copy_siginfo_to_user(). But I may lack
> context why this wouldn't work here.

We could add a copy_siginfo_to_user_any(), but I think open-coding
it is easier here, since the in_compat_syscall() check does not
work inside of the io_uring kernel thread, it has to be
"if (req->ctx->compat)" in order to match the wordsize of the task
that started the request.

Using copy_siginfo_to_user32() and copy_siginfo_to_user() is
probably a good idea though, it's often faster and less
error-prone than writing each member separately.

      Arnd

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 5/5] io_uring: add IORING_OP_WAITID support
  2023-07-14 18:33           ` Arnd Bergmann
@ 2023-07-14 20:14             ` Jens Axboe
  2023-07-15  7:12               ` Arnd Bergmann
  0 siblings, 1 reply; 21+ messages in thread
From: Jens Axboe @ 2023-07-14 20:14 UTC (permalink / raw)
  To: Arnd Bergmann, Christian Brauner; +Cc: io-uring, linux-kernel

On 7/14/23 12:33?PM, Arnd Bergmann wrote:
> On Fri, Jul 14, 2023, at 17:47, Christian Brauner wrote:
>> On Tue, Jul 11, 2023 at 04:18:13PM -0600, Jens Axboe wrote:
>>> On 7/11/23 3:22?PM, Jens Axboe wrote:
>>>> On 7/11/23 3:11?PM, Arnd Bergmann wrote:
> 
>>>>> Does this require argument conversion for compat tasks?
>>>>>
>>>>> Even without the rusage argument, I think the siginfo
>>>>> remains incompatible with 32-bit tasks, unfortunately.
>>>>
>>>> Hmm yes good point, if compat_siginfo and siginfo are different, then it
>>>> does need handling for that. Would be a trivial addition, I'll make that
>>>> change. Thanks Arnd!
>>>
>>> Should be fixed in the current version:
>>>
>>> https://git.kernel.dk/cgit/linux/commit/?h=io_uring-waitid&id=08f3dc9b7cedbd20c0f215f25c9a7814c6c601cc
>>
>> In kernel/signal.c in pidfd_send_signal() we have
>> copy_siginfo_from_user_any() it seems that a similar version
>> copy_siginfo_to_user_any() might be something to consider. We do have
>> copy_siginfo_to_user32() and copy_siginfo_to_user(). But I may lack
>> context why this wouldn't work here.
> 
> We could add a copy_siginfo_to_user_any(), but I think open-coding
> it is easier here, since the in_compat_syscall() check does not
> work inside of the io_uring kernel thread, it has to be
> "if (req->ctx->compat)" in order to match the wordsize of the task
> that started the request.

Yeah, unifying this stuff did cross my mind when adding another one.
Which I think could still be done, you'd just need to pass in a 'compat'
parameter similar to how it's done for iovec importing.

But if it's ok with everybody I'd rather do that as a cleanup post this.

> Using copy_siginfo_to_user32() and copy_siginfo_to_user() is
> probably a good idea though, it's often faster and less
> error-prone than writing each member separately.

I was just pattern matching on the other use cases. I'll take a look at
the siginfo copy helpers, thanks!

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 5/5] io_uring: add IORING_OP_WAITID support
  2023-07-14 20:14             ` Jens Axboe
@ 2023-07-15  7:12               ` Arnd Bergmann
  2023-07-15 14:06                 ` Jens Axboe
  0 siblings, 1 reply; 21+ messages in thread
From: Arnd Bergmann @ 2023-07-15  7:12 UTC (permalink / raw)
  To: Jens Axboe, Christian Brauner; +Cc: io-uring, linux-kernel

On Fri, Jul 14, 2023, at 22:14, Jens Axboe wrote:
> On 7/14/23 12:33?PM, Arnd Bergmann wrote:
>> On Fri, Jul 14, 2023, at 17:47, Christian Brauner wrote:
>>> On Tue, Jul 11, 2023 at 04:18:13PM -0600, Jens Axboe wrote:
>>>>>> Does this require argument conversion for compat tasks?
>>>>>>
>>>>>> Even without the rusage argument, I think the siginfo
>>>>>> remains incompatible with 32-bit tasks, unfortunately.
>>>>>
>>>>> Hmm yes good point, if compat_siginfo and siginfo are different, then it
>>>>> does need handling for that. Would be a trivial addition, I'll make that
>>>>> change. Thanks Arnd!
>>>>
>>>> Should be fixed in the current version:
>>>>
>>>> https://git.kernel.dk/cgit/linux/commit/?h=io_uring-waitid&id=08f3dc9b7cedbd20c0f215f25c9a7814c6c601cc
>>>
>>> In kernel/signal.c in pidfd_send_signal() we have
>>> copy_siginfo_from_user_any() it seems that a similar version
>>> copy_siginfo_to_user_any() might be something to consider. We do have
>>> copy_siginfo_to_user32() and copy_siginfo_to_user(). But I may lack
>>> context why this wouldn't work here.
>> 
>> We could add a copy_siginfo_to_user_any(), but I think open-coding
>> it is easier here, since the in_compat_syscall() check does not
>> work inside of the io_uring kernel thread, it has to be
>> "if (req->ctx->compat)" in order to match the wordsize of the task
>> that started the request.
>
> Yeah, unifying this stuff did cross my mind when adding another one.
> Which I think could still be done, you'd just need to pass in a 'compat'
> parameter similar to how it's done for iovec importing.
>
> But if it's ok with everybody I'd rather do that as a cleanup post this.

Sure, keeping that separate seem best.

Looking at what copy_siginfo_from_user_any() actually does, I don't
even think it's worth adapting copy_siginfo_to_user_any() for io_uring,
since it's already just a trivial wrapper, and adding another
argument would add more complexity overall than it saves.

      Arnd

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 5/5] io_uring: add IORING_OP_WAITID support
  2023-07-15  7:12               ` Arnd Bergmann
@ 2023-07-15 14:06                 ` Jens Axboe
  2023-07-15 14:34                   ` Jens Axboe
  2023-07-15 20:23                   ` Jens Axboe
  0 siblings, 2 replies; 21+ messages in thread
From: Jens Axboe @ 2023-07-15 14:06 UTC (permalink / raw)
  To: Arnd Bergmann, Christian Brauner; +Cc: io-uring, linux-kernel

On 7/15/23 1:12 AM, Arnd Bergmann wrote:
> On Fri, Jul 14, 2023, at 22:14, Jens Axboe wrote:
>> On 7/14/23 12:33?PM, Arnd Bergmann wrote:
>>> On Fri, Jul 14, 2023, at 17:47, Christian Brauner wrote:
>>>> On Tue, Jul 11, 2023 at 04:18:13PM -0600, Jens Axboe wrote:
>>>>>>> Does this require argument conversion for compat tasks?
>>>>>>>
>>>>>>> Even without the rusage argument, I think the siginfo
>>>>>>> remains incompatible with 32-bit tasks, unfortunately.
>>>>>>
>>>>>> Hmm yes good point, if compat_siginfo and siginfo are different, then it
>>>>>> does need handling for that. Would be a trivial addition, I'll make that
>>>>>> change. Thanks Arnd!
>>>>>
>>>>> Should be fixed in the current version:
>>>>>
>>>>> https://git.kernel.dk/cgit/linux/commit/?h=io_uring-waitid&id=08f3dc9b7cedbd20c0f215f25c9a7814c6c601cc
>>>>
>>>> In kernel/signal.c in pidfd_send_signal() we have
>>>> copy_siginfo_from_user_any() it seems that a similar version
>>>> copy_siginfo_to_user_any() might be something to consider. We do have
>>>> copy_siginfo_to_user32() and copy_siginfo_to_user(). But I may lack
>>>> context why this wouldn't work here.
>>>
>>> We could add a copy_siginfo_to_user_any(), but I think open-coding
>>> it is easier here, since the in_compat_syscall() check does not
>>> work inside of the io_uring kernel thread, it has to be
>>> "if (req->ctx->compat)" in order to match the wordsize of the task
>>> that started the request.
>>
>> Yeah, unifying this stuff did cross my mind when adding another one.
>> Which I think could still be done, you'd just need to pass in a 'compat'
>> parameter similar to how it's done for iovec importing.
>>
>> But if it's ok with everybody I'd rather do that as a cleanup post this.
> 
> Sure, keeping that separate seem best.
> 
> Looking at what copy_siginfo_from_user_any() actually does, I don't
> even think it's worth adapting copy_siginfo_to_user_any() for io_uring,
> since it's already just a trivial wrapper, and adding another
> argument would add more complexity overall than it saves.

Yeah, took a look too this morning, and not sure there's much to reduce
here that would make it cleaner. I'm going to send out a v2 with this
unchanged, holler if people disagree.

-- 
Jens Axboe



^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 5/5] io_uring: add IORING_OP_WAITID support
  2023-07-15 14:06                 ` Jens Axboe
@ 2023-07-15 14:34                   ` Jens Axboe
  2023-07-15 20:23                   ` Jens Axboe
  1 sibling, 0 replies; 21+ messages in thread
From: Jens Axboe @ 2023-07-15 14:34 UTC (permalink / raw)
  To: Arnd Bergmann, Christian Brauner; +Cc: io-uring, linux-kernel

On 7/15/23 8:06?AM, Jens Axboe wrote:
> On 7/15/23 1:12?AM, Arnd Bergmann wrote:
>> On Fri, Jul 14, 2023, at 22:14, Jens Axboe wrote:
>>> On 7/14/23 12:33?PM, Arnd Bergmann wrote:
>>>> On Fri, Jul 14, 2023, at 17:47, Christian Brauner wrote:
>>>>> On Tue, Jul 11, 2023 at 04:18:13PM -0600, Jens Axboe wrote:
>>>>>>>> Does this require argument conversion for compat tasks?
>>>>>>>>
>>>>>>>> Even without the rusage argument, I think the siginfo
>>>>>>>> remains incompatible with 32-bit tasks, unfortunately.
>>>>>>>
>>>>>>> Hmm yes good point, if compat_siginfo and siginfo are different, then it
>>>>>>> does need handling for that. Would be a trivial addition, I'll make that
>>>>>>> change. Thanks Arnd!
>>>>>>
>>>>>> Should be fixed in the current version:
>>>>>>
>>>>>> https://git.kernel.dk/cgit/linux/commit/?h=io_uring-waitid&id=08f3dc9b7cedbd20c0f215f25c9a7814c6c601cc
>>>>>
>>>>> In kernel/signal.c in pidfd_send_signal() we have
>>>>> copy_siginfo_from_user_any() it seems that a similar version
>>>>> copy_siginfo_to_user_any() might be something to consider. We do have
>>>>> copy_siginfo_to_user32() and copy_siginfo_to_user(). But I may lack
>>>>> context why this wouldn't work here.
>>>>
>>>> We could add a copy_siginfo_to_user_any(), but I think open-coding
>>>> it is easier here, since the in_compat_syscall() check does not
>>>> work inside of the io_uring kernel thread, it has to be
>>>> "if (req->ctx->compat)" in order to match the wordsize of the task
>>>> that started the request.
>>>
>>> Yeah, unifying this stuff did cross my mind when adding another one.
>>> Which I think could still be done, you'd just need to pass in a 'compat'
>>> parameter similar to how it's done for iovec importing.
>>>
>>> But if it's ok with everybody I'd rather do that as a cleanup post this.
>>
>> Sure, keeping that separate seem best.
>>
>> Looking at what copy_siginfo_from_user_any() actually does, I don't
>> even think it's worth adapting copy_siginfo_to_user_any() for io_uring,
>> since it's already just a trivial wrapper, and adding another
>> argument would add more complexity overall than it saves.
> 
> Yeah, took a look too this morning, and not sure there's much to reduce
> here that would make it cleaner. I'm going to send out a v2 with this
> unchanged, holler if people disagree.

Looking over changes, none have been made so far. So I guess a v2 can
wait a bit. The branch was rebased to add Christian's acked-bys for some
of the patches, and since a branch it was based on (io_uring-futex) got
rebased to accommodate PeterZ's changes.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 5/5] io_uring: add IORING_OP_WAITID support
  2023-07-15 14:06                 ` Jens Axboe
  2023-07-15 14:34                   ` Jens Axboe
@ 2023-07-15 20:23                   ` Jens Axboe
  1 sibling, 0 replies; 21+ messages in thread
From: Jens Axboe @ 2023-07-15 20:23 UTC (permalink / raw)
  To: Arnd Bergmann, Christian Brauner; +Cc: io-uring, linux-kernel

On 7/15/23 8:06?AM, Jens Axboe wrote:
> On 7/15/23 1:12?AM, Arnd Bergmann wrote:
>> On Fri, Jul 14, 2023, at 22:14, Jens Axboe wrote:
>>> On 7/14/23 12:33?PM, Arnd Bergmann wrote:
>>>> On Fri, Jul 14, 2023, at 17:47, Christian Brauner wrote:
>>>>> On Tue, Jul 11, 2023 at 04:18:13PM -0600, Jens Axboe wrote:
>>>>>>>> Does this require argument conversion for compat tasks?
>>>>>>>>
>>>>>>>> Even without the rusage argument, I think the siginfo
>>>>>>>> remains incompatible with 32-bit tasks, unfortunately.
>>>>>>>
>>>>>>> Hmm yes good point, if compat_siginfo and siginfo are different, then it
>>>>>>> does need handling for that. Would be a trivial addition, I'll make that
>>>>>>> change. Thanks Arnd!
>>>>>>
>>>>>> Should be fixed in the current version:
>>>>>>
>>>>>> https://git.kernel.dk/cgit/linux/commit/?h=io_uring-waitid&id=08f3dc9b7cedbd20c0f215f25c9a7814c6c601cc
>>>>>
>>>>> In kernel/signal.c in pidfd_send_signal() we have
>>>>> copy_siginfo_from_user_any() it seems that a similar version
>>>>> copy_siginfo_to_user_any() might be something to consider. We do have
>>>>> copy_siginfo_to_user32() and copy_siginfo_to_user(). But I may lack
>>>>> context why this wouldn't work here.
>>>>
>>>> We could add a copy_siginfo_to_user_any(), but I think open-coding
>>>> it is easier here, since the in_compat_syscall() check does not
>>>> work inside of the io_uring kernel thread, it has to be
>>>> "if (req->ctx->compat)" in order to match the wordsize of the task
>>>> that started the request.
>>>
>>> Yeah, unifying this stuff did cross my mind when adding another one.
>>> Which I think could still be done, you'd just need to pass in a 'compat'
>>> parameter similar to how it's done for iovec importing.
>>>
>>> But if it's ok with everybody I'd rather do that as a cleanup post this.
>>
>> Sure, keeping that separate seem best.
>>
>> Looking at what copy_siginfo_from_user_any() actually does, I don't
>> even think it's worth adapting copy_siginfo_to_user_any() for io_uring,
>> since it's already just a trivial wrapper, and adding another
>> argument would add more complexity overall than it saves.
> 
> Yeah, took a look too this morning, and not sure there's much to reduce
> here that would make it cleaner. I'm going to send out a v2 with this
> unchanged, holler if people disagree.

One thing we could do is the below, but honestly not sure it's worth the
hassle?


diff --git a/io_uring/waitid.c b/io_uring/waitid.c
index 14ffa07e161a..6de1041c4784 100644
--- a/io_uring/waitid.c
+++ b/io_uring/waitid.c
@@ -43,6 +43,8 @@ static bool io_waitid_compat_copy_si(struct io_waitid *iw, int signo)
 	bool ret;
 
 	infop = (struct compat_siginfo __user *) iw->infop;
+	if (!infop)
+		return true;
 
 	if (!user_write_access_begin(infop, sizeof(*infop)))
 		return false;
@@ -66,32 +68,13 @@ static bool io_waitid_compat_copy_si(struct io_waitid *iw, int signo)
 static bool io_waitid_copy_si(struct io_kiocb *req, int signo)
 {
 	struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid);
-	bool ret;
-
-	if (!iw->infop)
-		return true;
 
 #ifdef CONFIG_COMPAT
 	if (req->ctx->compat)
 		return io_waitid_compat_copy_si(iw, signo);
 #endif
 
-	if (!user_write_access_begin(iw->infop, sizeof(*iw->infop)))
-		return false;
-
-	unsafe_put_user(signo, &iw->infop->si_signo, Efault);
-	unsafe_put_user(0, &iw->infop->si_errno, Efault);
-	unsafe_put_user(iw->info.cause, &iw->infop->si_code, Efault);
-	unsafe_put_user(iw->info.pid, &iw->infop->si_pid, Efault);
-	unsafe_put_user(iw->info.uid, &iw->infop->si_uid, Efault);
-	unsafe_put_user(iw->info.status, &iw->infop->si_status, Efault);
-	ret = true;
-done:
-	user_write_access_end();
-	return ret;
-Efault:
-	ret = false;
-	goto done;
+	return siginfo_put_user(iw->infop, &iw->info, signo);
 }
 
 static int io_waitid_finish(struct io_kiocb *req, int ret)
diff --git a/kernel/exit.c b/kernel/exit.c
index 1c9d1cbadcd0..e3a0b6699a23 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1723,6 +1723,28 @@ static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop,
 	return ret;
 }
 
+bool siginfo_put_user(struct siginfo __user *infop, struct waitid_info *wi,
+		      int signo)
+{
+	if (!infop)
+		return true;
+
+	if (!user_write_access_begin(infop, sizeof(*infop)))
+		return false;
+
+	unsafe_put_user(signo, &infop->si_signo, Efault);
+	unsafe_put_user(0, &infop->si_errno, Efault);
+	unsafe_put_user(wi->cause, &infop->si_code, Efault);
+	unsafe_put_user(wi->pid, &infop->si_pid, Efault);
+	unsafe_put_user(wi->uid, &infop->si_uid, Efault);
+	unsafe_put_user(wi->status, &infop->si_status, Efault);
+	user_write_access_end();
+	return true;
+Efault:
+	user_write_access_end();
+	return false;
+}
+
 SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
 		infop, int, options, struct rusage __user *, ru)
 {
@@ -1737,23 +1759,9 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
 		if (ru && copy_to_user(ru, &r, sizeof(struct rusage)))
 			return -EFAULT;
 	}
-	if (!infop)
-		return err;
-
-	if (!user_write_access_begin(infop, sizeof(*infop)))
+	if (siginfo_put_user(infop, &info, signo))
 		return -EFAULT;
-
-	unsafe_put_user(signo, &infop->si_signo, Efault);
-	unsafe_put_user(0, &infop->si_errno, Efault);
-	unsafe_put_user(info.cause, &infop->si_code, Efault);
-	unsafe_put_user(info.pid, &infop->si_pid, Efault);
-	unsafe_put_user(info.uid, &infop->si_uid, Efault);
-	unsafe_put_user(info.status, &infop->si_status, Efault);
-	user_write_access_end();
 	return err;
-Efault:
-	user_write_access_end();
-	return -EFAULT;
 }
 
 long kernel_wait4(pid_t upid, int __user *stat_addr, int options,
diff --git a/kernel/exit.h b/kernel/exit.h
index f10207ba1341..b7e0e32133fa 100644
--- a/kernel/exit.h
+++ b/kernel/exit.h
@@ -27,4 +27,6 @@ long __do_wait(struct wait_opts *wo);
 int kernel_waitid_prepare(struct wait_opts *wo, int which, pid_t upid,
 			  struct waitid_info *infop, int options,
 			  struct rusage *ru, unsigned int *f_flags);
+bool siginfo_put_user(struct siginfo __user *infop, struct waitid_info *wi,
+		      int signo);
 #endif

-- 
Jens Axboe


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCHSET v2] Add io_uring support for waitid
@ 2023-08-02 23:14 Jens Axboe
  2023-08-02 23:14 ` [PATCH 1/5] exit: abtract out should_wake helper for child_wait_callback() Jens Axboe
                   ` (4 more replies)
  0 siblings, 5 replies; 21+ messages in thread
From: Jens Axboe @ 2023-08-02 23:14 UTC (permalink / raw)
  To: io-uring, linux-kernel; +Cc: brauner, arnd

Hi,

This adds support for IORING_OP_WAITID, which is an async variant of
the waitid(2) syscall. Rather than have a parent need to block waiting
on a child task state change, it can now simply get an async notication
when the requested state change has occured.

Patches 1..4 are purely prep patches, and should not have functional
changes. They split out parts of do_wait() into __do_wait(), so that
the prepare-to-wait and sleep parts are contained within do_wait().

Patch 5 adds io_uring support.

I wrote a few basic tests for this, which can be found in the
'waitid' branch of liburing:

https://git.kernel.dk/cgit/liburing/log/?h=waitid

Also spun a custom kernel for someone to test it, and no issues reported
so far.

 include/linux/io_uring_types.h |   2 +
 include/uapi/linux/io_uring.h  |   2 +
 io_uring/Makefile              |   2 +-
 io_uring/cancel.c              |   5 +
 io_uring/io_uring.c            |   3 +
 io_uring/opdef.c               |   9 +
 io_uring/waitid.c              | 313 +++++++++++++++++++++++++++++++++
 io_uring/waitid.h              |  15 ++
 kernel/exit.c                  | 132 +++++++-------
 kernel/exit.h                  |  30 ++++
 10 files changed, 453 insertions(+), 60 deletions(-)

The code can also be found here:

https://git.kernel.dk/cgit/linux/log/?h=io_uring-waitid

Changes since v1:
- Rebase on io_uring-futex again, as that saw changes.
- Add compat handling of siginfo (Arnd)
- Add reviewed-by to patches 1+2
- Minor cleanups

-- 
Jens Axboe



^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 1/5] exit: abtract out should_wake helper for child_wait_callback()
  2023-08-02 23:14 [PATCHSET v2] Add io_uring support for waitid Jens Axboe
@ 2023-08-02 23:14 ` Jens Axboe
  2023-08-02 23:14 ` [PATCH 2/5] exit: move core of do_wait() into helper Jens Axboe
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 21+ messages in thread
From: Jens Axboe @ 2023-08-02 23:14 UTC (permalink / raw)
  To: io-uring, linux-kernel; +Cc: brauner, arnd, Jens Axboe

Abstract out the helper that decides if we should wake up following
a wake_up() callback on our internal waitqueue.

No functional changes intended in this patch.

Acked-by: Christian Brauner <[email protected]>
Signed-off-by: Jens Axboe <[email protected]>
---
 kernel/exit.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/kernel/exit.c b/kernel/exit.c
index edb50b4c9972..2809dad69492 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1520,6 +1520,17 @@ static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk)
 	return 0;
 }
 
+static bool pid_child_should_wake(struct wait_opts *wo, struct task_struct *p)
+{
+	if (!eligible_pid(wo, p))
+		return false;
+
+	if ((wo->wo_flags & __WNOTHREAD) && wo->child_wait.private != p->parent)
+		return false;
+
+	return true;
+}
+
 static int child_wait_callback(wait_queue_entry_t *wait, unsigned mode,
 				int sync, void *key)
 {
@@ -1527,13 +1538,10 @@ static int child_wait_callback(wait_queue_entry_t *wait, unsigned mode,
 						child_wait);
 	struct task_struct *p = key;
 
-	if (!eligible_pid(wo, p))
-		return 0;
+	if (pid_child_should_wake(wo, p))
+		return default_wake_function(wait, mode, sync, key);
 
-	if ((wo->wo_flags & __WNOTHREAD) && wait->private != p->parent)
-		return 0;
-
-	return default_wake_function(wait, mode, sync, key);
+	return 0;
 }
 
 void __wake_up_parent(struct task_struct *p, struct task_struct *parent)
-- 
2.40.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 2/5] exit: move core of do_wait() into helper
  2023-08-02 23:14 [PATCHSET v2] Add io_uring support for waitid Jens Axboe
  2023-08-02 23:14 ` [PATCH 1/5] exit: abtract out should_wake helper for child_wait_callback() Jens Axboe
@ 2023-08-02 23:14 ` Jens Axboe
  2023-08-02 23:14 ` [PATCH 3/5] exit: add kernel_waitid_prepare() helper Jens Axboe
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 21+ messages in thread
From: Jens Axboe @ 2023-08-02 23:14 UTC (permalink / raw)
  To: io-uring, linux-kernel; +Cc: brauner, arnd, Jens Axboe

Rather than have a maze of gotos, put the actual logic in __do_wait()
and have do_wait() loop deal with waitqueue setup/teardown and whether
to call __do_wait() again.

No functional changes intended in this patch.

Acked-by: Christian Brauner <[email protected]>
Signed-off-by: Jens Axboe <[email protected]>
---
 kernel/exit.c | 49 ++++++++++++++++++++++++++++++-------------------
 1 file changed, 30 insertions(+), 19 deletions(-)

diff --git a/kernel/exit.c b/kernel/exit.c
index 2809dad69492..d8fb124cc038 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1590,16 +1590,10 @@ static int do_wait_pid(struct wait_opts *wo)
 	return 0;
 }
 
-static long do_wait(struct wait_opts *wo)
+static long __do_wait(struct wait_opts *wo)
 {
-	int retval;
-
-	trace_sched_process_wait(wo->wo_pid);
+	long retval;
 
-	init_waitqueue_func_entry(&wo->child_wait, child_wait_callback);
-	wo->child_wait.private = current;
-	add_wait_queue(&current->signal->wait_chldexit, &wo->child_wait);
-repeat:
 	/*
 	 * If there is nothing that can match our criteria, just get out.
 	 * We will clear ->notask_error to zero if we see any child that
@@ -1617,18 +1611,18 @@ static long do_wait(struct wait_opts *wo)
 	if (wo->wo_type == PIDTYPE_PID) {
 		retval = do_wait_pid(wo);
 		if (retval)
-			goto end;
+			return retval;
 	} else {
 		struct task_struct *tsk = current;
 
 		do {
 			retval = do_wait_thread(wo, tsk);
 			if (retval)
-				goto end;
+				return retval;
 
 			retval = ptrace_do_wait(wo, tsk);
 			if (retval)
-				goto end;
+				return retval;
 
 			if (wo->wo_flags & __WNOTHREAD)
 				break;
@@ -1638,14 +1632,31 @@ static long do_wait(struct wait_opts *wo)
 
 notask:
 	retval = wo->notask_error;
-	if (!retval && !(wo->wo_flags & WNOHANG)) {
-		retval = -ERESTARTSYS;
-		if (!signal_pending(current)) {
-			schedule();
-			goto repeat;
-		}
-	}
-end:
+	if (!retval && !(wo->wo_flags & WNOHANG))
+		return -ERESTARTSYS;
+
+	return retval;
+}
+
+static long do_wait(struct wait_opts *wo)
+{
+	int retval;
+
+	trace_sched_process_wait(wo->wo_pid);
+
+	init_waitqueue_func_entry(&wo->child_wait, child_wait_callback);
+	wo->child_wait.private = current;
+	add_wait_queue(&current->signal->wait_chldexit, &wo->child_wait);
+
+	do {
+		retval = __do_wait(wo);
+		if (retval != -ERESTARTSYS)
+			break;
+		if (signal_pending(current))
+			break;
+		schedule();
+	} while (1);
+
 	__set_current_state(TASK_RUNNING);
 	remove_wait_queue(&current->signal->wait_chldexit, &wo->child_wait);
 	return retval;
-- 
2.40.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 3/5] exit: add kernel_waitid_prepare() helper
  2023-08-02 23:14 [PATCHSET v2] Add io_uring support for waitid Jens Axboe
  2023-08-02 23:14 ` [PATCH 1/5] exit: abtract out should_wake helper for child_wait_callback() Jens Axboe
  2023-08-02 23:14 ` [PATCH 2/5] exit: move core of do_wait() into helper Jens Axboe
@ 2023-08-02 23:14 ` Jens Axboe
  2023-08-02 23:14 ` [PATCH 4/5] exit: add internal include file with helpers Jens Axboe
  2023-08-02 23:14 ` [PATCH 5/5] io_uring: add IORING_OP_WAITID support Jens Axboe
  4 siblings, 0 replies; 21+ messages in thread
From: Jens Axboe @ 2023-08-02 23:14 UTC (permalink / raw)
  To: io-uring, linux-kernel; +Cc: brauner, arnd, Jens Axboe

Move the setup logic out of kernel_waitid(), and into a separate helper.

No functional changes intended in this patch.

Signed-off-by: Jens Axboe <[email protected]>
---
 kernel/exit.c | 41 +++++++++++++++++++++++++++--------------
 1 file changed, 27 insertions(+), 14 deletions(-)

diff --git a/kernel/exit.c b/kernel/exit.c
index d8fb124cc038..8934c91a9fe1 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1662,14 +1662,12 @@ static long do_wait(struct wait_opts *wo)
 	return retval;
 }
 
-static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop,
-			  int options, struct rusage *ru)
+static int kernel_waitid_prepare(struct wait_opts *wo, int which, pid_t upid,
+				 struct waitid_info *infop, int options,
+				 struct rusage *ru, unsigned int *f_flags)
 {
-	struct wait_opts wo;
 	struct pid *pid = NULL;
 	enum pid_type type;
-	long ret;
-	unsigned int f_flags = 0;
 
 	if (options & ~(WNOHANG|WNOWAIT|WEXITED|WSTOPPED|WCONTINUED|
 			__WNOTHREAD|__WCLONE|__WALL))
@@ -1703,7 +1701,7 @@ static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop,
 		if (upid < 0)
 			return -EINVAL;
 
-		pid = pidfd_get_pid(upid, &f_flags);
+		pid = pidfd_get_pid(upid, f_flags);
 		if (IS_ERR(pid))
 			return PTR_ERR(pid);
 
@@ -1712,19 +1710,34 @@ static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop,
 		return -EINVAL;
 	}
 
-	wo.wo_type	= type;
-	wo.wo_pid	= pid;
-	wo.wo_flags	= options;
-	wo.wo_info	= infop;
-	wo.wo_rusage	= ru;
-	if (f_flags & O_NONBLOCK)
-		wo.wo_flags |= WNOHANG;
+	wo->wo_type	= type;
+	wo->wo_pid	= pid;
+	wo->wo_flags	= options;
+	wo->wo_info	= infop;
+	wo->wo_rusage	= ru;
+	if (*f_flags & O_NONBLOCK)
+		wo->wo_flags |= WNOHANG;
+
+	return 0;
+}
+
+static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop,
+			  int options, struct rusage *ru)
+{
+	struct wait_opts wo;
+	long ret;
+	unsigned int f_flags = 0;
+
+	ret = kernel_waitid_prepare(&wo, which, upid, infop, options, ru,
+					&f_flags);
+	if (ret)
+		return ret;
 
 	ret = do_wait(&wo);
 	if (!ret && !(options & WNOHANG) && (f_flags & O_NONBLOCK))
 		ret = -EAGAIN;
 
-	put_pid(pid);
+	put_pid(wo.wo_pid);
 	return ret;
 }
 
-- 
2.40.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 4/5] exit: add internal include file with helpers
  2023-08-02 23:14 [PATCHSET v2] Add io_uring support for waitid Jens Axboe
                   ` (2 preceding siblings ...)
  2023-08-02 23:14 ` [PATCH 3/5] exit: add kernel_waitid_prepare() helper Jens Axboe
@ 2023-08-02 23:14 ` Jens Axboe
  2023-08-02 23:14 ` [PATCH 5/5] io_uring: add IORING_OP_WAITID support Jens Axboe
  4 siblings, 0 replies; 21+ messages in thread
From: Jens Axboe @ 2023-08-02 23:14 UTC (permalink / raw)
  To: io-uring, linux-kernel; +Cc: brauner, arnd, Jens Axboe

Move struct wait_opts and waitid_info into kernel/exit.h, and include
function declarations for the recently added helpers. Make them
non-static as well.

This is in preparation for adding a waitid operation through io_uring.
With the abtracted helpers, this is now possible.

Signed-off-by: Jens Axboe <[email protected]>
---
 kernel/exit.c | 32 +++++++-------------------------
 kernel/exit.h | 30 ++++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+), 25 deletions(-)
 create mode 100644 kernel/exit.h

diff --git a/kernel/exit.c b/kernel/exit.c
index 8934c91a9fe1..1c9d1cbadcd0 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -74,6 +74,8 @@
 #include <asm/unistd.h>
 #include <asm/mmu_context.h>
 
+#include "exit.h"
+
 /*
  * The default value should be high enough to not crash a system that randomly
  * crashes its kernel from time to time, but low enough to at least not permit
@@ -1037,26 +1039,6 @@ SYSCALL_DEFINE1(exit_group, int, error_code)
 	return 0;
 }
 
-struct waitid_info {
-	pid_t pid;
-	uid_t uid;
-	int status;
-	int cause;
-};
-
-struct wait_opts {
-	enum pid_type		wo_type;
-	int			wo_flags;
-	struct pid		*wo_pid;
-
-	struct waitid_info	*wo_info;
-	int			wo_stat;
-	struct rusage		*wo_rusage;
-
-	wait_queue_entry_t		child_wait;
-	int			notask_error;
-};
-
 static int eligible_pid(struct wait_opts *wo, struct task_struct *p)
 {
 	return	wo->wo_type == PIDTYPE_MAX ||
@@ -1520,7 +1502,7 @@ static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk)
 	return 0;
 }
 
-static bool pid_child_should_wake(struct wait_opts *wo, struct task_struct *p)
+bool pid_child_should_wake(struct wait_opts *wo, struct task_struct *p)
 {
 	if (!eligible_pid(wo, p))
 		return false;
@@ -1590,7 +1572,7 @@ static int do_wait_pid(struct wait_opts *wo)
 	return 0;
 }
 
-static long __do_wait(struct wait_opts *wo)
+long __do_wait(struct wait_opts *wo)
 {
 	long retval;
 
@@ -1662,9 +1644,9 @@ static long do_wait(struct wait_opts *wo)
 	return retval;
 }
 
-static int kernel_waitid_prepare(struct wait_opts *wo, int which, pid_t upid,
-				 struct waitid_info *infop, int options,
-				 struct rusage *ru, unsigned int *f_flags)
+int kernel_waitid_prepare(struct wait_opts *wo, int which, pid_t upid,
+			  struct waitid_info *infop, int options,
+			  struct rusage *ru, unsigned int *f_flags)
 {
 	struct pid *pid = NULL;
 	enum pid_type type;
diff --git a/kernel/exit.h b/kernel/exit.h
new file mode 100644
index 000000000000..f10207ba1341
--- /dev/null
+++ b/kernel/exit.h
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#ifndef LINUX_WAITID_H
+#define LINUX_WAITID_H
+
+struct waitid_info {
+	pid_t pid;
+	uid_t uid;
+	int status;
+	int cause;
+};
+
+struct wait_opts {
+	enum pid_type		wo_type;
+	int			wo_flags;
+	struct pid		*wo_pid;
+
+	struct waitid_info	*wo_info;
+	int			wo_stat;
+	struct rusage		*wo_rusage;
+
+	wait_queue_entry_t		child_wait;
+	int			notask_error;
+};
+
+bool pid_child_should_wake(struct wait_opts *wo, struct task_struct *p);
+long __do_wait(struct wait_opts *wo);
+int kernel_waitid_prepare(struct wait_opts *wo, int which, pid_t upid,
+			  struct waitid_info *infop, int options,
+			  struct rusage *ru, unsigned int *f_flags);
+#endif
-- 
2.40.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 5/5] io_uring: add IORING_OP_WAITID support
  2023-08-02 23:14 [PATCHSET v2] Add io_uring support for waitid Jens Axboe
                   ` (3 preceding siblings ...)
  2023-08-02 23:14 ` [PATCH 4/5] exit: add internal include file with helpers Jens Axboe
@ 2023-08-02 23:14 ` Jens Axboe
  2023-08-09 11:27   ` Christian Brauner
  4 siblings, 1 reply; 21+ messages in thread
From: Jens Axboe @ 2023-08-02 23:14 UTC (permalink / raw)
  To: io-uring, linux-kernel; +Cc: brauner, arnd, Jens Axboe

This adds support for an async version of waitid(2), in a fully async
version. If an event isn't immediately available, wait for a callback
to trigger a retry.

The format of the sqe is as follows:

sqe->len		The 'which', the idtype being queried/waited for.
sqe->fd			The 'pid' (or id) being waited for.
sqe->file_index		The 'options' being set.
sqe->addr2		A pointer to siginfo_t, if any, being filled in.

buf_index, add3, and waitid_flags are reserved/unused for now.
waitid_flags will be used for options for this request type. One
interesting use case may be to add multi-shot support, so that the
request stays armed and posts a notification every time a monitored
process state change occurs.

Note that this does not support rusage, on Arnd's recommendation.

See the waitid(2) man page for details on the arguments.

Signed-off-by: Jens Axboe <[email protected]>
---
 include/linux/io_uring_types.h |   2 +
 include/uapi/linux/io_uring.h  |   2 +
 io_uring/Makefile              |   2 +-
 io_uring/cancel.c              |   5 +
 io_uring/io_uring.c            |   3 +
 io_uring/opdef.c               |   9 +
 io_uring/waitid.c              | 313 +++++++++++++++++++++++++++++++++
 io_uring/waitid.h              |  15 ++
 8 files changed, 350 insertions(+), 1 deletion(-)
 create mode 100644 io_uring/waitid.c
 create mode 100644 io_uring/waitid.h

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index a7f03d8d879f..598553877fc2 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -276,6 +276,8 @@ struct io_ring_ctx {
 	struct hlist_head	futex_list;
 	struct io_alloc_cache	futex_cache;
 
+	struct hlist_head	waitid_list;
+
 	const struct cred	*sq_creds;	/* cred used for __io_sq_thread() */
 	struct io_sq_data	*sq_data;	/* if using sq thread polling */
 
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 420f38675769..8fca2cffc343 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -66,6 +66,7 @@ struct io_uring_sqe {
 		__u32		msg_ring_flags;
 		__u32		uring_cmd_flags;
 		__u32		futex_flags;
+		__u32		waitid_flags;
 	};
 	__u64	user_data;	/* data to be passed back at completion time */
 	/* pack this to avoid bogus arm OABI complaints */
@@ -239,6 +240,7 @@ enum io_uring_op {
 	IORING_OP_FUTEX_WAIT,
 	IORING_OP_FUTEX_WAKE,
 	IORING_OP_FUTEX_WAITV,
+	IORING_OP_WAITID,
 
 	/* this goes last, obviously */
 	IORING_OP_LAST,
diff --git a/io_uring/Makefile b/io_uring/Makefile
index 2e4779bc550c..e5be47e4fc3b 100644
--- a/io_uring/Makefile
+++ b/io_uring/Makefile
@@ -8,6 +8,6 @@ obj-$(CONFIG_IO_URING)		+= io_uring.o xattr.o nop.o fs.o splice.o \
 					statx.o net.o msg_ring.o timeout.o \
 					sqpoll.o fdinfo.o tctx.o poll.o \
 					cancel.o kbuf.o rsrc.o rw.o opdef.o \
-					notif.o
+					notif.o waitid.o
 obj-$(CONFIG_IO_WQ)		+= io-wq.o
 obj-$(CONFIG_FUTEX)		+= futex.o
diff --git a/io_uring/cancel.c b/io_uring/cancel.c
index 3dba8ccb1cd8..a01f3f41012b 100644
--- a/io_uring/cancel.c
+++ b/io_uring/cancel.c
@@ -16,6 +16,7 @@
 #include "poll.h"
 #include "timeout.h"
 #include "futex.h"
+#include "waitid.h"
 #include "cancel.h"
 
 struct io_cancel {
@@ -124,6 +125,10 @@ int io_try_cancel(struct io_uring_task *tctx, struct io_cancel_data *cd,
 	if (ret != -ENOENT)
 		return ret;
 
+	ret = io_waitid_cancel(ctx, cd, issue_flags);
+	if (ret != -ENOENT)
+		return ret;
+
 	spin_lock(&ctx->completion_lock);
 	if (!(cd->flags & IORING_ASYNC_CANCEL_FD))
 		ret = io_timeout_cancel(ctx, cd);
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index e52cbdcb29b8..94147e0835bf 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -93,6 +93,7 @@
 #include "net.h"
 #include "notif.h"
 #include "futex.h"
+#include "waitid.h"
 
 #include "timeout.h"
 #include "poll.h"
@@ -352,6 +353,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 	ctx->submit_state.free_list.next = NULL;
 	INIT_WQ_LIST(&ctx->locked_free_list);
 	INIT_HLIST_HEAD(&ctx->futex_list);
+	INIT_HLIST_HEAD(&ctx->waitid_list);
 	INIT_DELAYED_WORK(&ctx->fallback_work, io_fallback_req_func);
 	INIT_WQ_LIST(&ctx->submit_state.compl_reqs);
 	return ctx;
@@ -3286,6 +3288,7 @@ static __cold bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
 	mutex_lock(&ctx->uring_lock);
 	ret |= io_poll_remove_all(ctx, task, cancel_all);
 	ret |= io_futex_remove_all(ctx, task, cancel_all);
+	ret |= io_waitid_remove_all(ctx, task, cancel_all);
 	mutex_unlock(&ctx->uring_lock);
 	ret |= io_kill_timeouts(ctx, task, cancel_all);
 	if (task)
diff --git a/io_uring/opdef.c b/io_uring/opdef.c
index b9e1e12cac9c..1c5cfa9d7b31 100644
--- a/io_uring/opdef.c
+++ b/io_uring/opdef.c
@@ -34,6 +34,7 @@
 #include "cancel.h"
 #include "rw.h"
 #include "futex.h"
+#include "waitid.h"
 
 static int io_no_issue(struct io_kiocb *req, unsigned int issue_flags)
 {
@@ -453,6 +454,10 @@ const struct io_issue_def io_issue_defs[] = {
 		.prep			= io_eopnotsupp_prep,
 #endif
 	},
+	[IORING_OP_WAITID] = {
+		.prep			= io_waitid_prep,
+		.issue			= io_waitid,
+	},
 };
 
 const struct io_cold_def io_cold_defs[] = {
@@ -681,6 +686,10 @@ const struct io_cold_def io_cold_defs[] = {
 	[IORING_OP_FUTEX_WAITV] = {
 		.name			= "FUTEX_WAITV",
 	},
+	[IORING_OP_WAITID] = {
+		.name			= "WAITID",
+		.async_size		= sizeof(struct io_waitid_async),
+	},
 };
 
 const char *io_uring_get_opcode(u8 opcode)
diff --git a/io_uring/waitid.c b/io_uring/waitid.c
new file mode 100644
index 000000000000..14ffa07e161a
--- /dev/null
+++ b/io_uring/waitid.c
@@ -0,0 +1,313 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Support for async notification of waitid
+ */
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/compat.h>
+#include <linux/io_uring.h>
+
+#include <uapi/linux/io_uring.h>
+
+#include "io_uring.h"
+#include "cancel.h"
+#include "waitid.h"
+#include "../kernel/exit.h"
+
+struct io_waitid {
+	struct file *file;
+	int which;
+	pid_t upid;
+	int options;
+	struct wait_queue_head *head;
+	struct siginfo __user *infop;
+	struct waitid_info info;
+};
+
+static void io_waitid_free(struct io_kiocb *req)
+{
+	struct io_waitid_async *iwa = req->async_data;
+
+	put_pid(iwa->wo.wo_pid);
+	kfree(req->async_data);
+	req->async_data = NULL;
+	req->flags &= ~REQ_F_ASYNC_DATA;
+}
+
+#ifdef CONFIG_COMPAT
+static bool io_waitid_compat_copy_si(struct io_waitid *iw, int signo)
+{
+	struct compat_siginfo __user *infop;
+	bool ret;
+
+	infop = (struct compat_siginfo __user *) iw->infop;
+
+	if (!user_write_access_begin(infop, sizeof(*infop)))
+		return false;
+
+	unsafe_put_user(signo, &infop->si_signo, Efault);
+	unsafe_put_user(0, &infop->si_errno, Efault);
+	unsafe_put_user(iw->info.cause, &infop->si_code, Efault);
+	unsafe_put_user(iw->info.pid, &infop->si_pid, Efault);
+	unsafe_put_user(iw->info.uid, &infop->si_uid, Efault);
+	unsafe_put_user(iw->info.status, &infop->si_status, Efault);
+	ret = true;
+done:
+	user_write_access_end();
+	return ret;
+Efault:
+	ret = false;
+	goto done;
+}
+#endif
+
+static bool io_waitid_copy_si(struct io_kiocb *req, int signo)
+{
+	struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid);
+	bool ret;
+
+	if (!iw->infop)
+		return true;
+
+#ifdef CONFIG_COMPAT
+	if (req->ctx->compat)
+		return io_waitid_compat_copy_si(iw, signo);
+#endif
+
+	if (!user_write_access_begin(iw->infop, sizeof(*iw->infop)))
+		return false;
+
+	unsafe_put_user(signo, &iw->infop->si_signo, Efault);
+	unsafe_put_user(0, &iw->infop->si_errno, Efault);
+	unsafe_put_user(iw->info.cause, &iw->infop->si_code, Efault);
+	unsafe_put_user(iw->info.pid, &iw->infop->si_pid, Efault);
+	unsafe_put_user(iw->info.uid, &iw->infop->si_uid, Efault);
+	unsafe_put_user(iw->info.status, &iw->infop->si_status, Efault);
+	ret = true;
+done:
+	user_write_access_end();
+	return ret;
+Efault:
+	ret = false;
+	goto done;
+}
+
+static int io_waitid_finish(struct io_kiocb *req, int ret)
+{
+	int signo = 0;
+
+	if (ret > 0) {
+		signo = SIGCHLD;
+		ret = 0;
+	}
+
+	if (!io_waitid_copy_si(req, signo))
+		ret = -EFAULT;
+	io_waitid_free(req);
+	return ret;
+}
+
+static void io_waitid_complete(struct io_kiocb *req, int ret)
+{
+	struct io_tw_state ts = { .locked = true };
+
+	lockdep_assert_held(&req->ctx->uring_lock);
+
+	/*
+	 * Did cancel find it meanwhile?
+	 */
+	if (hlist_unhashed(&req->hash_node))
+		return;
+
+	hlist_del_init(&req->hash_node);
+
+	ret = io_waitid_finish(req, ret);
+	if (ret < 0)
+		req_set_fail(req);
+	io_req_set_res(req, ret, 0);
+	io_req_task_complete(req, &ts);
+}
+
+static bool __io_waitid_cancel(struct io_ring_ctx *ctx, struct io_kiocb *req)
+{
+	struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid);
+	struct wait_queue_head *head;
+
+	head = READ_ONCE(iw->head);
+	if (head) {
+		struct io_waitid_async *iwa = req->async_data;
+
+		spin_lock_irq(&head->lock);
+		list_del_init(&iwa->wo.child_wait.entry);
+		iw->head = NULL;
+		spin_unlock_irq(&head->lock);
+		io_waitid_complete(req, -ECANCELED);
+		return true;
+	}
+
+	return false;
+}
+
+int io_waitid_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd,
+		     unsigned int issue_flags)
+{
+	struct hlist_node *tmp;
+	struct io_kiocb *req;
+	int nr = 0;
+
+	if (cd->flags & (IORING_ASYNC_CANCEL_FD|IORING_ASYNC_CANCEL_FD_FIXED))
+		return -ENOENT;
+
+	io_ring_submit_lock(ctx, issue_flags);
+	hlist_for_each_entry_safe(req, tmp, &ctx->waitid_list, hash_node) {
+		if (req->cqe.user_data != cd->data &&
+		    !(cd->flags & IORING_ASYNC_CANCEL_ANY))
+			continue;
+		if (__io_waitid_cancel(ctx, req))
+			nr++;
+		if (!(cd->flags & IORING_ASYNC_CANCEL_ALL))
+			break;
+	}
+	io_ring_submit_unlock(ctx, issue_flags);
+
+	if (nr)
+		return nr;
+
+	return -ENOENT;
+}
+
+bool io_waitid_remove_all(struct io_ring_ctx *ctx, struct task_struct *task,
+			  bool cancel_all)
+{
+	struct hlist_node *tmp;
+	struct io_kiocb *req;
+	bool found = false;
+
+	lockdep_assert_held(&ctx->uring_lock);
+
+	hlist_for_each_entry_safe(req, tmp, &ctx->waitid_list, hash_node) {
+		if (!io_match_task_safe(req, task, cancel_all))
+			continue;
+		__io_waitid_cancel(ctx, req);
+		found = true;
+	}
+
+	return found;
+}
+
+static void io_waitid_cb(struct io_kiocb *req, struct io_tw_state *ts)
+{
+	struct io_waitid_async *iwa = req->async_data;
+	struct io_ring_ctx *ctx = req->ctx;
+	int ret;
+
+	/*
+	 * If we get -ERESTARTSYS here, we need to re-arm and check again
+	 * to ensure we get another callback. If the retry works, then we can
+	 * just remove ourselves from the waitqueue again and finish the
+	 * request.
+	 */
+	ret = __do_wait(&iwa->wo);
+	if (unlikely(ret == -ERESTARTSYS)) {
+		struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid);
+
+		io_tw_lock(ctx, ts);
+		iw->head = &current->signal->wait_chldexit;
+		add_wait_queue(iw->head, &iwa->wo.child_wait);
+		ret = __do_wait(&iwa->wo);
+		if (ret == -ERESTARTSYS)
+			return;
+
+		remove_wait_queue(iw->head, &iwa->wo.child_wait);
+		iw->head = NULL;
+	}
+
+	io_tw_lock(ctx, ts);
+	io_waitid_complete(req, ret);
+}
+
+static int io_waitid_wait(struct wait_queue_entry *wait, unsigned mode,
+			  int sync, void *key)
+{
+	struct wait_opts *wo = container_of(wait, struct wait_opts, child_wait);
+	struct io_waitid_async *iwa = container_of(wo, struct io_waitid_async, wo);
+	struct io_kiocb *req = iwa->req;
+	struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid);
+	struct task_struct *p = key;
+
+	if (!pid_child_should_wake(wo, p))
+		return 0;
+
+	req->io_task_work.func = io_waitid_cb;
+	io_req_task_work_add(req);
+	iw->head = NULL;
+	list_del_init(&wait->entry);
+	return 1;
+}
+
+int io_waitid_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+	struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid);
+
+	if (sqe->addr || sqe->buf_index || sqe->addr3 || sqe->waitid_flags)
+		return -EINVAL;
+
+	iw->which = READ_ONCE(sqe->len);
+	iw->options = READ_ONCE(sqe->file_index);
+	iw->upid = READ_ONCE(sqe->fd);
+	iw->infop = u64_to_user_ptr(READ_ONCE(sqe->addr2));
+	iw->head = NULL;
+	return 0;
+}
+
+int io_waitid(struct io_kiocb *req, unsigned int issue_flags)
+{
+	struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid);
+	struct io_ring_ctx *ctx = req->ctx;
+	struct io_waitid_async *iwa;
+	unsigned int f_flags = 0;
+	int ret;
+
+	if (io_alloc_async_data(req))
+		return -ENOMEM;
+
+	iwa = req->async_data;
+	iwa->req = req;
+
+	ret = kernel_waitid_prepare(&iwa->wo, iw->which, iw->upid, &iw->info,
+					iw->options, NULL, &f_flags);
+	if (ret)
+		goto done;
+
+	/*
+	 * Arm our callback and add us to the waitqueue, in case no events
+	 * are available.
+	 */
+	init_waitqueue_func_entry(&iwa->wo.child_wait, io_waitid_wait);
+	iwa->wo.child_wait.private = req->task;
+	iw->head = &current->signal->wait_chldexit;
+	add_wait_queue(iw->head, &iwa->wo.child_wait);
+
+	io_ring_submit_lock(ctx, issue_flags);
+	hlist_add_head(&req->hash_node, &ctx->waitid_list);
+
+	ret = __do_wait(&iwa->wo);
+	if (ret == -ERESTARTSYS) {
+		io_ring_submit_unlock(ctx, issue_flags);
+		return IOU_ISSUE_SKIP_COMPLETE;
+	}
+
+	hlist_del_init(&req->hash_node);
+	remove_wait_queue(iw->head, &iwa->wo.child_wait);
+	iw->head = NULL;
+	ret = io_waitid_finish(req, ret);
+
+	io_ring_submit_unlock(ctx, issue_flags);
+done:
+	if (ret < 0)
+		req_set_fail(req);
+	io_req_set_res(req, ret, 0);
+	return IOU_OK;
+}
diff --git a/io_uring/waitid.h b/io_uring/waitid.h
new file mode 100644
index 000000000000..956a8adafe8c
--- /dev/null
+++ b/io_uring/waitid.h
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "../kernel/exit.h"
+
+struct io_waitid_async {
+	struct io_kiocb *req;
+	struct wait_opts wo;
+};
+
+int io_waitid_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
+int io_waitid(struct io_kiocb *req, unsigned int issue_flags);
+int io_waitid_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd,
+		     unsigned int issue_flags);
+bool io_waitid_remove_all(struct io_ring_ctx *ctx, struct task_struct *task,
+			  bool cancel_all);
-- 
2.40.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH 5/5] io_uring: add IORING_OP_WAITID support
  2023-08-02 23:14 ` [PATCH 5/5] io_uring: add IORING_OP_WAITID support Jens Axboe
@ 2023-08-09 11:27   ` Christian Brauner
  2023-08-09 15:11     ` Jens Axboe
  0 siblings, 1 reply; 21+ messages in thread
From: Christian Brauner @ 2023-08-09 11:27 UTC (permalink / raw)
  To: Jens Axboe; +Cc: io-uring, linux-kernel, arnd

On Wed, Aug 02, 2023 at 05:14:42PM -0600, Jens Axboe wrote:
> This adds support for an async version of waitid(2), in a fully async
> version. If an event isn't immediately available, wait for a callback
> to trigger a retry.
> 
> The format of the sqe is as follows:
> 
> sqe->len		The 'which', the idtype being queried/waited for.
> sqe->fd			The 'pid' (or id) being waited for.
> sqe->file_index		The 'options' being set.
> sqe->addr2		A pointer to siginfo_t, if any, being filled in.
> 
> buf_index, add3, and waitid_flags are reserved/unused for now.
> waitid_flags will be used for options for this request type. One
> interesting use case may be to add multi-shot support, so that the
> request stays armed and posts a notification every time a monitored
> process state change occurs.
> 
> Note that this does not support rusage, on Arnd's recommendation.
> 
> See the waitid(2) man page for details on the arguments.
> 
> Signed-off-by: Jens Axboe <[email protected]>
> ---
>  include/linux/io_uring_types.h |   2 +
>  include/uapi/linux/io_uring.h  |   2 +
>  io_uring/Makefile              |   2 +-
>  io_uring/cancel.c              |   5 +
>  io_uring/io_uring.c            |   3 +
>  io_uring/opdef.c               |   9 +
>  io_uring/waitid.c              | 313 +++++++++++++++++++++++++++++++++
>  io_uring/waitid.h              |  15 ++
>  8 files changed, 350 insertions(+), 1 deletion(-)
>  create mode 100644 io_uring/waitid.c
>  create mode 100644 io_uring/waitid.h
> 
> diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
> index a7f03d8d879f..598553877fc2 100644
> --- a/include/linux/io_uring_types.h
> +++ b/include/linux/io_uring_types.h
> @@ -276,6 +276,8 @@ struct io_ring_ctx {
>  	struct hlist_head	futex_list;
>  	struct io_alloc_cache	futex_cache;
>  
> +	struct hlist_head	waitid_list;
> +
>  	const struct cred	*sq_creds;	/* cred used for __io_sq_thread() */
>  	struct io_sq_data	*sq_data;	/* if using sq thread polling */
>  
> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
> index 420f38675769..8fca2cffc343 100644
> --- a/include/uapi/linux/io_uring.h
> +++ b/include/uapi/linux/io_uring.h
> @@ -66,6 +66,7 @@ struct io_uring_sqe {
>  		__u32		msg_ring_flags;
>  		__u32		uring_cmd_flags;
>  		__u32		futex_flags;
> +		__u32		waitid_flags;
>  	};
>  	__u64	user_data;	/* data to be passed back at completion time */
>  	/* pack this to avoid bogus arm OABI complaints */
> @@ -239,6 +240,7 @@ enum io_uring_op {
>  	IORING_OP_FUTEX_WAIT,
>  	IORING_OP_FUTEX_WAKE,
>  	IORING_OP_FUTEX_WAITV,
> +	IORING_OP_WAITID,
>  
>  	/* this goes last, obviously */
>  	IORING_OP_LAST,
> diff --git a/io_uring/Makefile b/io_uring/Makefile
> index 2e4779bc550c..e5be47e4fc3b 100644
> --- a/io_uring/Makefile
> +++ b/io_uring/Makefile
> @@ -8,6 +8,6 @@ obj-$(CONFIG_IO_URING)		+= io_uring.o xattr.o nop.o fs.o splice.o \
>  					statx.o net.o msg_ring.o timeout.o \
>  					sqpoll.o fdinfo.o tctx.o poll.o \
>  					cancel.o kbuf.o rsrc.o rw.o opdef.o \
> -					notif.o
> +					notif.o waitid.o
>  obj-$(CONFIG_IO_WQ)		+= io-wq.o
>  obj-$(CONFIG_FUTEX)		+= futex.o
> diff --git a/io_uring/cancel.c b/io_uring/cancel.c
> index 3dba8ccb1cd8..a01f3f41012b 100644
> --- a/io_uring/cancel.c
> +++ b/io_uring/cancel.c
> @@ -16,6 +16,7 @@
>  #include "poll.h"
>  #include "timeout.h"
>  #include "futex.h"
> +#include "waitid.h"
>  #include "cancel.h"
>  
>  struct io_cancel {
> @@ -124,6 +125,10 @@ int io_try_cancel(struct io_uring_task *tctx, struct io_cancel_data *cd,
>  	if (ret != -ENOENT)
>  		return ret;
>  
> +	ret = io_waitid_cancel(ctx, cd, issue_flags);
> +	if (ret != -ENOENT)
> +		return ret;
> +
>  	spin_lock(&ctx->completion_lock);
>  	if (!(cd->flags & IORING_ASYNC_CANCEL_FD))
>  		ret = io_timeout_cancel(ctx, cd);
> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
> index e52cbdcb29b8..94147e0835bf 100644
> --- a/io_uring/io_uring.c
> +++ b/io_uring/io_uring.c
> @@ -93,6 +93,7 @@
>  #include "net.h"
>  #include "notif.h"
>  #include "futex.h"
> +#include "waitid.h"
>  
>  #include "timeout.h"
>  #include "poll.h"
> @@ -352,6 +353,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
>  	ctx->submit_state.free_list.next = NULL;
>  	INIT_WQ_LIST(&ctx->locked_free_list);
>  	INIT_HLIST_HEAD(&ctx->futex_list);
> +	INIT_HLIST_HEAD(&ctx->waitid_list);
>  	INIT_DELAYED_WORK(&ctx->fallback_work, io_fallback_req_func);
>  	INIT_WQ_LIST(&ctx->submit_state.compl_reqs);
>  	return ctx;
> @@ -3286,6 +3288,7 @@ static __cold bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
>  	mutex_lock(&ctx->uring_lock);
>  	ret |= io_poll_remove_all(ctx, task, cancel_all);
>  	ret |= io_futex_remove_all(ctx, task, cancel_all);
> +	ret |= io_waitid_remove_all(ctx, task, cancel_all);
>  	mutex_unlock(&ctx->uring_lock);
>  	ret |= io_kill_timeouts(ctx, task, cancel_all);
>  	if (task)
> diff --git a/io_uring/opdef.c b/io_uring/opdef.c
> index b9e1e12cac9c..1c5cfa9d7b31 100644
> --- a/io_uring/opdef.c
> +++ b/io_uring/opdef.c
> @@ -34,6 +34,7 @@
>  #include "cancel.h"
>  #include "rw.h"
>  #include "futex.h"
> +#include "waitid.h"
>  
>  static int io_no_issue(struct io_kiocb *req, unsigned int issue_flags)
>  {
> @@ -453,6 +454,10 @@ const struct io_issue_def io_issue_defs[] = {
>  		.prep			= io_eopnotsupp_prep,
>  #endif
>  	},
> +	[IORING_OP_WAITID] = {
> +		.prep			= io_waitid_prep,
> +		.issue			= io_waitid,
> +	},
>  };
>  
>  const struct io_cold_def io_cold_defs[] = {
> @@ -681,6 +686,10 @@ const struct io_cold_def io_cold_defs[] = {
>  	[IORING_OP_FUTEX_WAITV] = {
>  		.name			= "FUTEX_WAITV",
>  	},
> +	[IORING_OP_WAITID] = {
> +		.name			= "WAITID",
> +		.async_size		= sizeof(struct io_waitid_async),
> +	},
>  };
>  
>  const char *io_uring_get_opcode(u8 opcode)
> diff --git a/io_uring/waitid.c b/io_uring/waitid.c
> new file mode 100644
> index 000000000000..14ffa07e161a
> --- /dev/null
> +++ b/io_uring/waitid.c
> @@ -0,0 +1,313 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Support for async notification of waitid
> + */
> +#include <linux/kernel.h>
> +#include <linux/errno.h>
> +#include <linux/fs.h>
> +#include <linux/file.h>
> +#include <linux/compat.h>
> +#include <linux/io_uring.h>
> +
> +#include <uapi/linux/io_uring.h>
> +
> +#include "io_uring.h"
> +#include "cancel.h"
> +#include "waitid.h"
> +#include "../kernel/exit.h"
> +
> +struct io_waitid {
> +	struct file *file;
> +	int which;
> +	pid_t upid;
> +	int options;
> +	struct wait_queue_head *head;
> +	struct siginfo __user *infop;
> +	struct waitid_info info;
> +};
> +
> +static void io_waitid_free(struct io_kiocb *req)
> +{
> +	struct io_waitid_async *iwa = req->async_data;
> +
> +	put_pid(iwa->wo.wo_pid);
> +	kfree(req->async_data);
> +	req->async_data = NULL;
> +	req->flags &= ~REQ_F_ASYNC_DATA;
> +}
> +
> +#ifdef CONFIG_COMPAT
> +static bool io_waitid_compat_copy_si(struct io_waitid *iw, int signo)
> +{
> +	struct compat_siginfo __user *infop;
> +	bool ret;
> +
> +	infop = (struct compat_siginfo __user *) iw->infop;
> +
> +	if (!user_write_access_begin(infop, sizeof(*infop)))
> +		return false;
> +
> +	unsafe_put_user(signo, &infop->si_signo, Efault);
> +	unsafe_put_user(0, &infop->si_errno, Efault);
> +	unsafe_put_user(iw->info.cause, &infop->si_code, Efault);
> +	unsafe_put_user(iw->info.pid, &infop->si_pid, Efault);
> +	unsafe_put_user(iw->info.uid, &infop->si_uid, Efault);
> +	unsafe_put_user(iw->info.status, &infop->si_status, Efault);
> +	ret = true;
> +done:
> +	user_write_access_end();
> +	return ret;
> +Efault:
> +	ret = false;
> +	goto done;
> +}
> +#endif
> +
> +static bool io_waitid_copy_si(struct io_kiocb *req, int signo)
> +{
> +	struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid);
> +	bool ret;
> +
> +	if (!iw->infop)
> +		return true;
> +
> +#ifdef CONFIG_COMPAT
> +	if (req->ctx->compat)
> +		return io_waitid_compat_copy_si(iw, signo);
> +#endif
> +
> +	if (!user_write_access_begin(iw->infop, sizeof(*iw->infop)))
> +		return false;
> +
> +	unsafe_put_user(signo, &iw->infop->si_signo, Efault);
> +	unsafe_put_user(0, &iw->infop->si_errno, Efault);
> +	unsafe_put_user(iw->info.cause, &iw->infop->si_code, Efault);
> +	unsafe_put_user(iw->info.pid, &iw->infop->si_pid, Efault);
> +	unsafe_put_user(iw->info.uid, &iw->infop->si_uid, Efault);
> +	unsafe_put_user(iw->info.status, &iw->infop->si_status, Efault);
> +	ret = true;
> +done:
> +	user_write_access_end();
> +	return ret;
> +Efault:
> +	ret = false;
> +	goto done;
> +}
> +
> +static int io_waitid_finish(struct io_kiocb *req, int ret)
> +{
> +	int signo = 0;
> +
> +	if (ret > 0) {
> +		signo = SIGCHLD;
> +		ret = 0;
> +	}
> +
> +	if (!io_waitid_copy_si(req, signo))
> +		ret = -EFAULT;
> +	io_waitid_free(req);
> +	return ret;
> +}
> +
> +static void io_waitid_complete(struct io_kiocb *req, int ret)
> +{
> +	struct io_tw_state ts = { .locked = true };
> +
> +	lockdep_assert_held(&req->ctx->uring_lock);
> +
> +	/*
> +	 * Did cancel find it meanwhile?
> +	 */
> +	if (hlist_unhashed(&req->hash_node))
> +		return;
> +
> +	hlist_del_init(&req->hash_node);
> +
> +	ret = io_waitid_finish(req, ret);
> +	if (ret < 0)
> +		req_set_fail(req);
> +	io_req_set_res(req, ret, 0);
> +	io_req_task_complete(req, &ts);
> +}
> +
> +static bool __io_waitid_cancel(struct io_ring_ctx *ctx, struct io_kiocb *req)
> +{
> +	struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid);
> +	struct wait_queue_head *head;
> +
> +	head = READ_ONCE(iw->head);
> +	if (head) {
> +		struct io_waitid_async *iwa = req->async_data;
> +
> +		spin_lock_irq(&head->lock);
> +		list_del_init(&iwa->wo.child_wait.entry);
> +		iw->head = NULL;
> +		spin_unlock_irq(&head->lock);
> +		io_waitid_complete(req, -ECANCELED);
> +		return true;
> +	}
> +
> +	return false;
> +}
> +
> +int io_waitid_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd,
> +		     unsigned int issue_flags)
> +{
> +	struct hlist_node *tmp;
> +	struct io_kiocb *req;
> +	int nr = 0;
> +
> +	if (cd->flags & (IORING_ASYNC_CANCEL_FD|IORING_ASYNC_CANCEL_FD_FIXED))
> +		return -ENOENT;
> +
> +	io_ring_submit_lock(ctx, issue_flags);
> +	hlist_for_each_entry_safe(req, tmp, &ctx->waitid_list, hash_node) {
> +		if (req->cqe.user_data != cd->data &&
> +		    !(cd->flags & IORING_ASYNC_CANCEL_ANY))
> +			continue;
> +		if (__io_waitid_cancel(ctx, req))
> +			nr++;
> +		if (!(cd->flags & IORING_ASYNC_CANCEL_ALL))
> +			break;
> +	}
> +	io_ring_submit_unlock(ctx, issue_flags);
> +
> +	if (nr)
> +		return nr;
> +
> +	return -ENOENT;
> +}
> +
> +bool io_waitid_remove_all(struct io_ring_ctx *ctx, struct task_struct *task,
> +			  bool cancel_all)
> +{
> +	struct hlist_node *tmp;
> +	struct io_kiocb *req;
> +	bool found = false;
> +
> +	lockdep_assert_held(&ctx->uring_lock);
> +
> +	hlist_for_each_entry_safe(req, tmp, &ctx->waitid_list, hash_node) {
> +		if (!io_match_task_safe(req, task, cancel_all))
> +			continue;
> +		__io_waitid_cancel(ctx, req);
> +		found = true;
> +	}
> +
> +	return found;
> +}
> +
> +static void io_waitid_cb(struct io_kiocb *req, struct io_tw_state *ts)
> +{
> +	struct io_waitid_async *iwa = req->async_data;
> +	struct io_ring_ctx *ctx = req->ctx;
> +	int ret;
> +
> +	/*
> +	 * If we get -ERESTARTSYS here, we need to re-arm and check again
> +	 * to ensure we get another callback. If the retry works, then we can
> +	 * just remove ourselves from the waitqueue again and finish the
> +	 * request.
> +	 */
> +	ret = __do_wait(&iwa->wo);
> +	if (unlikely(ret == -ERESTARTSYS)) {
> +		struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid);
> +
> +		io_tw_lock(ctx, ts);
> +		iw->head = &current->signal->wait_chldexit;
> +		add_wait_queue(iw->head, &iwa->wo.child_wait);
> +		ret = __do_wait(&iwa->wo);
> +		if (ret == -ERESTARTSYS)
> +			return;
> +
> +		remove_wait_queue(iw->head, &iwa->wo.child_wait);
> +		iw->head = NULL;
> +	}
> +
> +	io_tw_lock(ctx, ts);
> +	io_waitid_complete(req, ret);
> +}
> +
> +static int io_waitid_wait(struct wait_queue_entry *wait, unsigned mode,
> +			  int sync, void *key)
> +{
> +	struct wait_opts *wo = container_of(wait, struct wait_opts, child_wait);
> +	struct io_waitid_async *iwa = container_of(wo, struct io_waitid_async, wo);
> +	struct io_kiocb *req = iwa->req;
> +	struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid);
> +	struct task_struct *p = key;
> +
> +	if (!pid_child_should_wake(wo, p))
> +		return 0;
> +
> +	req->io_task_work.func = io_waitid_cb;
> +	io_req_task_work_add(req);
> +	iw->head = NULL;
> +	list_del_init(&wait->entry);
> +	return 1;
> +}
> +
> +int io_waitid_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
> +{
> +	struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid);
> +
> +	if (sqe->addr || sqe->buf_index || sqe->addr3 || sqe->waitid_flags)
> +		return -EINVAL;
> +
> +	iw->which = READ_ONCE(sqe->len);
> +	iw->options = READ_ONCE(sqe->file_index);
> +	iw->upid = READ_ONCE(sqe->fd);
> +	iw->infop = u64_to_user_ptr(READ_ONCE(sqe->addr2));
> +	iw->head = NULL;
> +	return 0;
> +}
> +
> +int io_waitid(struct io_kiocb *req, unsigned int issue_flags)
> +{
> +	struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid);
> +	struct io_ring_ctx *ctx = req->ctx;
> +	struct io_waitid_async *iwa;
> +	unsigned int f_flags = 0;
> +	int ret;
> +
> +	if (io_alloc_async_data(req))
> +		return -ENOMEM;
> +
> +	iwa = req->async_data;
> +	iwa->req = req;
> +
> +	ret = kernel_waitid_prepare(&iwa->wo, iw->which, iw->upid, &iw->info,
> +					iw->options, NULL, &f_flags);

It seems you're not really using @f_flags at all so I'd just not bother
exposing it in kernel_waitid_prepare(). I think the following (untested)
will let you avoid all this:

diff --git a/kernel/exit.c b/kernel/exit.c
index 1c9d1cbadcd0..33eba306aa51 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1646,10 +1646,11 @@ static long do_wait(struct wait_opts *wo)

 int kernel_waitid_prepare(struct wait_opts *wo, int which, pid_t upid,
                          struct waitid_info *infop, int options,
-                         struct rusage *ru, unsigned int *f_flags)
+                         struct rusage *ru)
 {
        struct pid *pid = NULL;
        enum pid_type type;
+       unsigned int f_flags = 0;

        if (options & ~(WNOHANG|WNOWAIT|WEXITED|WSTOPPED|WCONTINUED|
                        __WNOTHREAD|__WCLONE|__WALL))
@@ -1708,16 +1709,16 @@ static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop,
 {
        struct wait_opts wo;
        long ret;
-       unsigned int f_flags = 0;

-       ret = kernel_waitid_prepare(&wo, which, upid, infop, options, ru,
-                                       &f_flags);
+       ret = kernel_waitid_prepare(&wo, which, upid, infop, options, ru);
        if (ret)
                return ret;

        ret = do_wait(&wo);
-       if (!ret && !(options & WNOHANG) && (f_flags & O_NONBLOCK))
+       if (!ret && !(options & WNOHANG) && (wo.wo_flags & WNOHANG)) {
+               WARN_ON_ONCE(which != P_PIDFD);
                ret = -EAGAIN;
+       }

        put_pid(wo.wo_pid);
        return ret;
diff --git a/kernel/exit.h b/kernel/exit.h
index f10207ba1341..278faa26a653 100644
--- a/kernel/exit.h
+++ b/kernel/exit.h
@@ -26,5 +26,5 @@ bool pid_child_should_wake(struct wait_opts *wo, struct task_struct *p);
 long __do_wait(struct wait_opts *wo);
 int kernel_waitid_prepare(struct wait_opts *wo, int which, pid_t upid,
                          struct waitid_info *infop, int options,
-                         struct rusage *ru, unsigned int *f_flags);
+                         struct rusage *ru);
 #endif

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH 5/5] io_uring: add IORING_OP_WAITID support
  2023-08-09 11:27   ` Christian Brauner
@ 2023-08-09 15:11     ` Jens Axboe
  0 siblings, 0 replies; 21+ messages in thread
From: Jens Axboe @ 2023-08-09 15:11 UTC (permalink / raw)
  To: Christian Brauner; +Cc: io-uring, linux-kernel, arnd

On 8/9/23 5:27 AM, Christian Brauner wrote:
>> +int io_waitid(struct io_kiocb *req, unsigned int issue_flags)
>> +{
>> +	struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid);
>> +	struct io_ring_ctx *ctx = req->ctx;
>> +	struct io_waitid_async *iwa;
>> +	unsigned int f_flags = 0;
>> +	int ret;
>> +
>> +	if (io_alloc_async_data(req))
>> +		return -ENOMEM;
>> +
>> +	iwa = req->async_data;
>> +	iwa->req = req;
>> +
>> +	ret = kernel_waitid_prepare(&iwa->wo, iw->which, iw->upid, &iw->info,
>> +					iw->options, NULL, &f_flags);
> 
> It seems you're not really using @f_flags at all so I'd just not bother
> exposing it in kernel_waitid_prepare(). I think the following (untested)
> will let you avoid all this:

That's a good idea, I'll update it to get rid of the f_flags. Thanks!

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 5/5] io_uring: add IORING_OP_WAITID support
  2023-08-11 14:16 [PATCHSET v3 0/5] Add io_uring support for waitid Jens Axboe
@ 2023-08-11 14:16 ` Jens Axboe
  0 siblings, 0 replies; 21+ messages in thread
From: Jens Axboe @ 2023-08-11 14:16 UTC (permalink / raw)
  To: io-uring, linux-kernel; +Cc: brauner, arnd, Jens Axboe

This adds support for an async version of waitid(2), in a fully async
version. If an event isn't immediately available, wait for a callback
to trigger a retry.

The format of the sqe is as follows:

sqe->len		The 'which', the idtype being queried/waited for.
sqe->fd			The 'pid' (or id) being waited for.
sqe->file_index		The 'options' being set.
sqe->addr2		A pointer to siginfo_t, if any, being filled in.

buf_index, add3, and waitid_flags are reserved/unused for now.
waitid_flags will be used for options for this request type. One
interesting use case may be to add multi-shot support, so that the
request stays armed and posts a notification every time a monitored
process state change occurs.

Note that this does not support rusage, on Arnd's recommendation.

See the waitid(2) man page for details on the arguments.

Signed-off-by: Jens Axboe <[email protected]>
---
 include/linux/io_uring_types.h |   2 +
 include/uapi/linux/io_uring.h  |   2 +
 io_uring/Makefile              |   2 +-
 io_uring/cancel.c              |   5 +
 io_uring/io_uring.c            |   3 +
 io_uring/opdef.c               |   9 +
 io_uring/waitid.c              | 312 +++++++++++++++++++++++++++++++++
 io_uring/waitid.h              |  15 ++
 8 files changed, 349 insertions(+), 1 deletion(-)
 create mode 100644 io_uring/waitid.c
 create mode 100644 io_uring/waitid.h

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index a7f03d8d879f..598553877fc2 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -276,6 +276,8 @@ struct io_ring_ctx {
 	struct hlist_head	futex_list;
 	struct io_alloc_cache	futex_cache;
 
+	struct hlist_head	waitid_list;
+
 	const struct cred	*sq_creds;	/* cred used for __io_sq_thread() */
 	struct io_sq_data	*sq_data;	/* if using sq thread polling */
 
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 3dd3d7557531..ef42a8203bdd 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -66,6 +66,7 @@ struct io_uring_sqe {
 		__u32		msg_ring_flags;
 		__u32		uring_cmd_flags;
 		__u32		futex_flags;
+		__u32		waitid_flags;
 	};
 	__u64	user_data;	/* data to be passed back at completion time */
 	/* pack this to avoid bogus arm OABI complaints */
@@ -239,6 +240,7 @@ enum io_uring_op {
 	IORING_OP_FUTEX_WAIT,
 	IORING_OP_FUTEX_WAKE,
 	IORING_OP_FUTEX_WAITV,
+	IORING_OP_WAITID,
 
 	/* this goes last, obviously */
 	IORING_OP_LAST,
diff --git a/io_uring/Makefile b/io_uring/Makefile
index 2e4779bc550c..e5be47e4fc3b 100644
--- a/io_uring/Makefile
+++ b/io_uring/Makefile
@@ -8,6 +8,6 @@ obj-$(CONFIG_IO_URING)		+= io_uring.o xattr.o nop.o fs.o splice.o \
 					statx.o net.o msg_ring.o timeout.o \
 					sqpoll.o fdinfo.o tctx.o poll.o \
 					cancel.o kbuf.o rsrc.o rw.o opdef.o \
-					notif.o
+					notif.o waitid.o
 obj-$(CONFIG_IO_WQ)		+= io-wq.o
 obj-$(CONFIG_FUTEX)		+= futex.o
diff --git a/io_uring/cancel.c b/io_uring/cancel.c
index 3dba8ccb1cd8..a01f3f41012b 100644
--- a/io_uring/cancel.c
+++ b/io_uring/cancel.c
@@ -16,6 +16,7 @@
 #include "poll.h"
 #include "timeout.h"
 #include "futex.h"
+#include "waitid.h"
 #include "cancel.h"
 
 struct io_cancel {
@@ -124,6 +125,10 @@ int io_try_cancel(struct io_uring_task *tctx, struct io_cancel_data *cd,
 	if (ret != -ENOENT)
 		return ret;
 
+	ret = io_waitid_cancel(ctx, cd, issue_flags);
+	if (ret != -ENOENT)
+		return ret;
+
 	spin_lock(&ctx->completion_lock);
 	if (!(cd->flags & IORING_ASYNC_CANCEL_FD))
 		ret = io_timeout_cancel(ctx, cd);
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index f1dbbe51f867..d33d9d91f94c 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -93,6 +93,7 @@
 #include "net.h"
 #include "notif.h"
 #include "futex.h"
+#include "waitid.h"
 
 #include "timeout.h"
 #include "poll.h"
@@ -335,6 +336,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 	ctx->submit_state.free_list.next = NULL;
 	INIT_WQ_LIST(&ctx->locked_free_list);
 	INIT_HLIST_HEAD(&ctx->futex_list);
+	INIT_HLIST_HEAD(&ctx->waitid_list);
 	INIT_DELAYED_WORK(&ctx->fallback_work, io_fallback_req_func);
 	INIT_WQ_LIST(&ctx->submit_state.compl_reqs);
 	return ctx;
@@ -3277,6 +3279,7 @@ static __cold bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
 	mutex_lock(&ctx->uring_lock);
 	ret |= io_poll_remove_all(ctx, task, cancel_all);
 	ret |= io_futex_remove_all(ctx, task, cancel_all);
+	ret |= io_waitid_remove_all(ctx, task, cancel_all);
 	mutex_unlock(&ctx->uring_lock);
 	ret |= io_kill_timeouts(ctx, task, cancel_all);
 	if (task)
diff --git a/io_uring/opdef.c b/io_uring/opdef.c
index b9e1e12cac9c..1c5cfa9d7b31 100644
--- a/io_uring/opdef.c
+++ b/io_uring/opdef.c
@@ -34,6 +34,7 @@
 #include "cancel.h"
 #include "rw.h"
 #include "futex.h"
+#include "waitid.h"
 
 static int io_no_issue(struct io_kiocb *req, unsigned int issue_flags)
 {
@@ -453,6 +454,10 @@ const struct io_issue_def io_issue_defs[] = {
 		.prep			= io_eopnotsupp_prep,
 #endif
 	},
+	[IORING_OP_WAITID] = {
+		.prep			= io_waitid_prep,
+		.issue			= io_waitid,
+	},
 };
 
 const struct io_cold_def io_cold_defs[] = {
@@ -681,6 +686,10 @@ const struct io_cold_def io_cold_defs[] = {
 	[IORING_OP_FUTEX_WAITV] = {
 		.name			= "FUTEX_WAITV",
 	},
+	[IORING_OP_WAITID] = {
+		.name			= "WAITID",
+		.async_size		= sizeof(struct io_waitid_async),
+	},
 };
 
 const char *io_uring_get_opcode(u8 opcode)
diff --git a/io_uring/waitid.c b/io_uring/waitid.c
new file mode 100644
index 000000000000..29c8467e3242
--- /dev/null
+++ b/io_uring/waitid.c
@@ -0,0 +1,312 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Support for async notification of waitid
+ */
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/compat.h>
+#include <linux/io_uring.h>
+
+#include <uapi/linux/io_uring.h>
+
+#include "io_uring.h"
+#include "cancel.h"
+#include "waitid.h"
+#include "../kernel/exit.h"
+
+struct io_waitid {
+	struct file *file;
+	int which;
+	pid_t upid;
+	int options;
+	struct wait_queue_head *head;
+	struct siginfo __user *infop;
+	struct waitid_info info;
+};
+
+static void io_waitid_free(struct io_kiocb *req)
+{
+	struct io_waitid_async *iwa = req->async_data;
+
+	put_pid(iwa->wo.wo_pid);
+	kfree(req->async_data);
+	req->async_data = NULL;
+	req->flags &= ~REQ_F_ASYNC_DATA;
+}
+
+#ifdef CONFIG_COMPAT
+static bool io_waitid_compat_copy_si(struct io_waitid *iw, int signo)
+{
+	struct compat_siginfo __user *infop;
+	bool ret;
+
+	infop = (struct compat_siginfo __user *) iw->infop;
+
+	if (!user_write_access_begin(infop, sizeof(*infop)))
+		return false;
+
+	unsafe_put_user(signo, &infop->si_signo, Efault);
+	unsafe_put_user(0, &infop->si_errno, Efault);
+	unsafe_put_user(iw->info.cause, &infop->si_code, Efault);
+	unsafe_put_user(iw->info.pid, &infop->si_pid, Efault);
+	unsafe_put_user(iw->info.uid, &infop->si_uid, Efault);
+	unsafe_put_user(iw->info.status, &infop->si_status, Efault);
+	ret = true;
+done:
+	user_write_access_end();
+	return ret;
+Efault:
+	ret = false;
+	goto done;
+}
+#endif
+
+static bool io_waitid_copy_si(struct io_kiocb *req, int signo)
+{
+	struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid);
+	bool ret;
+
+	if (!iw->infop)
+		return true;
+
+#ifdef CONFIG_COMPAT
+	if (req->ctx->compat)
+		return io_waitid_compat_copy_si(iw, signo);
+#endif
+
+	if (!user_write_access_begin(iw->infop, sizeof(*iw->infop)))
+		return false;
+
+	unsafe_put_user(signo, &iw->infop->si_signo, Efault);
+	unsafe_put_user(0, &iw->infop->si_errno, Efault);
+	unsafe_put_user(iw->info.cause, &iw->infop->si_code, Efault);
+	unsafe_put_user(iw->info.pid, &iw->infop->si_pid, Efault);
+	unsafe_put_user(iw->info.uid, &iw->infop->si_uid, Efault);
+	unsafe_put_user(iw->info.status, &iw->infop->si_status, Efault);
+	ret = true;
+done:
+	user_write_access_end();
+	return ret;
+Efault:
+	ret = false;
+	goto done;
+}
+
+static int io_waitid_finish(struct io_kiocb *req, int ret)
+{
+	int signo = 0;
+
+	if (ret > 0) {
+		signo = SIGCHLD;
+		ret = 0;
+	}
+
+	if (!io_waitid_copy_si(req, signo))
+		ret = -EFAULT;
+	io_waitid_free(req);
+	return ret;
+}
+
+static void io_waitid_complete(struct io_kiocb *req, int ret)
+{
+	struct io_tw_state ts = { .locked = true };
+
+	lockdep_assert_held(&req->ctx->uring_lock);
+
+	/*
+	 * Did cancel find it meanwhile?
+	 */
+	if (hlist_unhashed(&req->hash_node))
+		return;
+
+	hlist_del_init(&req->hash_node);
+
+	ret = io_waitid_finish(req, ret);
+	if (ret < 0)
+		req_set_fail(req);
+	io_req_set_res(req, ret, 0);
+	io_req_task_complete(req, &ts);
+}
+
+static bool __io_waitid_cancel(struct io_ring_ctx *ctx, struct io_kiocb *req)
+{
+	struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid);
+	struct wait_queue_head *head;
+
+	head = READ_ONCE(iw->head);
+	if (head) {
+		struct io_waitid_async *iwa = req->async_data;
+
+		spin_lock_irq(&head->lock);
+		list_del_init(&iwa->wo.child_wait.entry);
+		iw->head = NULL;
+		spin_unlock_irq(&head->lock);
+		io_waitid_complete(req, -ECANCELED);
+		return true;
+	}
+
+	return false;
+}
+
+int io_waitid_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd,
+		     unsigned int issue_flags)
+{
+	struct hlist_node *tmp;
+	struct io_kiocb *req;
+	int nr = 0;
+
+	if (cd->flags & (IORING_ASYNC_CANCEL_FD|IORING_ASYNC_CANCEL_FD_FIXED))
+		return -ENOENT;
+
+	io_ring_submit_lock(ctx, issue_flags);
+	hlist_for_each_entry_safe(req, tmp, &ctx->waitid_list, hash_node) {
+		if (req->cqe.user_data != cd->data &&
+		    !(cd->flags & IORING_ASYNC_CANCEL_ANY))
+			continue;
+		if (__io_waitid_cancel(ctx, req))
+			nr++;
+		if (!(cd->flags & IORING_ASYNC_CANCEL_ALL))
+			break;
+	}
+	io_ring_submit_unlock(ctx, issue_flags);
+
+	if (nr)
+		return nr;
+
+	return -ENOENT;
+}
+
+bool io_waitid_remove_all(struct io_ring_ctx *ctx, struct task_struct *task,
+			  bool cancel_all)
+{
+	struct hlist_node *tmp;
+	struct io_kiocb *req;
+	bool found = false;
+
+	lockdep_assert_held(&ctx->uring_lock);
+
+	hlist_for_each_entry_safe(req, tmp, &ctx->waitid_list, hash_node) {
+		if (!io_match_task_safe(req, task, cancel_all))
+			continue;
+		__io_waitid_cancel(ctx, req);
+		found = true;
+	}
+
+	return found;
+}
+
+static void io_waitid_cb(struct io_kiocb *req, struct io_tw_state *ts)
+{
+	struct io_waitid_async *iwa = req->async_data;
+	struct io_ring_ctx *ctx = req->ctx;
+	int ret;
+
+	/*
+	 * If we get -ERESTARTSYS here, we need to re-arm and check again
+	 * to ensure we get another callback. If the retry works, then we can
+	 * just remove ourselves from the waitqueue again and finish the
+	 * request.
+	 */
+	ret = __do_wait(&iwa->wo);
+	if (unlikely(ret == -ERESTARTSYS)) {
+		struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid);
+
+		io_tw_lock(ctx, ts);
+		iw->head = &current->signal->wait_chldexit;
+		add_wait_queue(iw->head, &iwa->wo.child_wait);
+		ret = __do_wait(&iwa->wo);
+		if (ret == -ERESTARTSYS)
+			return;
+
+		remove_wait_queue(iw->head, &iwa->wo.child_wait);
+		iw->head = NULL;
+	}
+
+	io_tw_lock(ctx, ts);
+	io_waitid_complete(req, ret);
+}
+
+static int io_waitid_wait(struct wait_queue_entry *wait, unsigned mode,
+			  int sync, void *key)
+{
+	struct wait_opts *wo = container_of(wait, struct wait_opts, child_wait);
+	struct io_waitid_async *iwa = container_of(wo, struct io_waitid_async, wo);
+	struct io_kiocb *req = iwa->req;
+	struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid);
+	struct task_struct *p = key;
+
+	if (!pid_child_should_wake(wo, p))
+		return 0;
+
+	req->io_task_work.func = io_waitid_cb;
+	io_req_task_work_add(req);
+	iw->head = NULL;
+	list_del_init(&wait->entry);
+	return 1;
+}
+
+int io_waitid_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+	struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid);
+
+	if (sqe->addr || sqe->buf_index || sqe->addr3 || sqe->waitid_flags)
+		return -EINVAL;
+
+	iw->which = READ_ONCE(sqe->len);
+	iw->options = READ_ONCE(sqe->file_index);
+	iw->upid = READ_ONCE(sqe->fd);
+	iw->infop = u64_to_user_ptr(READ_ONCE(sqe->addr2));
+	iw->head = NULL;
+	return 0;
+}
+
+int io_waitid(struct io_kiocb *req, unsigned int issue_flags)
+{
+	struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid);
+	struct io_ring_ctx *ctx = req->ctx;
+	struct io_waitid_async *iwa;
+	int ret;
+
+	if (io_alloc_async_data(req))
+		return -ENOMEM;
+
+	iwa = req->async_data;
+	iwa->req = req;
+
+	ret = kernel_waitid_prepare(&iwa->wo, iw->which, iw->upid, &iw->info,
+					iw->options, NULL);
+	if (ret)
+		goto done;
+
+	/*
+	 * Arm our callback and add us to the waitqueue, in case no events
+	 * are available.
+	 */
+	init_waitqueue_func_entry(&iwa->wo.child_wait, io_waitid_wait);
+	iwa->wo.child_wait.private = req->task;
+	iw->head = &current->signal->wait_chldexit;
+	add_wait_queue(iw->head, &iwa->wo.child_wait);
+
+	io_ring_submit_lock(ctx, issue_flags);
+	hlist_add_head(&req->hash_node, &ctx->waitid_list);
+
+	ret = __do_wait(&iwa->wo);
+	if (ret == -ERESTARTSYS) {
+		io_ring_submit_unlock(ctx, issue_flags);
+		return IOU_ISSUE_SKIP_COMPLETE;
+	}
+
+	hlist_del_init(&req->hash_node);
+	remove_wait_queue(iw->head, &iwa->wo.child_wait);
+	iw->head = NULL;
+	ret = io_waitid_finish(req, ret);
+
+	io_ring_submit_unlock(ctx, issue_flags);
+done:
+	if (ret < 0)
+		req_set_fail(req);
+	io_req_set_res(req, ret, 0);
+	return IOU_OK;
+}
diff --git a/io_uring/waitid.h b/io_uring/waitid.h
new file mode 100644
index 000000000000..956a8adafe8c
--- /dev/null
+++ b/io_uring/waitid.h
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "../kernel/exit.h"
+
+struct io_waitid_async {
+	struct io_kiocb *req;
+	struct wait_opts wo;
+};
+
+int io_waitid_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
+int io_waitid(struct io_kiocb *req, unsigned int issue_flags);
+int io_waitid_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd,
+		     unsigned int issue_flags);
+bool io_waitid_remove_all(struct io_ring_ctx *ctx, struct task_struct *task,
+			  bool cancel_all);
-- 
2.40.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 5/5] io_uring: add IORING_OP_WAITID support
  2023-09-09 15:11 [PATCHSET v4 0/5] Add io_uring support for waitid Jens Axboe
@ 2023-09-09 15:11 ` Jens Axboe
  0 siblings, 0 replies; 21+ messages in thread
From: Jens Axboe @ 2023-09-09 15:11 UTC (permalink / raw)
  To: io-uring, linux-kernel; +Cc: brauner, arnd, asml.silence, Jens Axboe

This adds support for an async version of waitid(2), in a fully async
version. If an event isn't immediately available, wait for a callback
to trigger a retry.

The format of the sqe is as follows:

sqe->len		The 'which', the idtype being queried/waited for.
sqe->fd			The 'pid' (or id) being waited for.
sqe->file_index		The 'options' being set.
sqe->addr2		A pointer to siginfo_t, if any, being filled in.

buf_index, add3, and waitid_flags are reserved/unused for now.
waitid_flags will be used for options for this request type. One
interesting use case may be to add multi-shot support, so that the
request stays armed and posts a notification every time a monitored
process state change occurs.

Note that this does not support rusage, on Arnd's recommendation.

See the waitid(2) man page for details on the arguments.

Signed-off-by: Jens Axboe <[email protected]>
---
 include/linux/io_uring_types.h |   2 +
 include/uapi/linux/io_uring.h  |   2 +
 io_uring/Makefile              |   3 +-
 io_uring/cancel.c              |   5 +
 io_uring/io_uring.c            |   3 +
 io_uring/opdef.c               |  10 +-
 io_uring/waitid.c              | 372 +++++++++++++++++++++++++++++++++
 io_uring/waitid.h              |  15 ++
 8 files changed, 410 insertions(+), 2 deletions(-)
 create mode 100644 io_uring/waitid.c
 create mode 100644 io_uring/waitid.h

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 13d19b9be9f4..fe1c5d4ec56c 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -313,6 +313,8 @@ struct io_ring_ctx {
 	struct list_head	cq_overflow_list;
 	struct io_hash_table	cancel_table;
 
+	struct hlist_head	waitid_list;
+
 	const struct cred	*sq_creds;	/* cred used for __io_sq_thread() */
 	struct io_sq_data	*sq_data;	/* if using sq thread polling */
 
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 8e61f8b7c2ce..90d5c7ceaedb 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -65,6 +65,7 @@ struct io_uring_sqe {
 		__u32		xattr_flags;
 		__u32		msg_ring_flags;
 		__u32		uring_cmd_flags;
+		__u32		waitid_flags;
 	};
 	__u64	user_data;	/* data to be passed back at completion time */
 	/* pack this to avoid bogus arm OABI complaints */
@@ -240,6 +241,7 @@ enum io_uring_op {
 	IORING_OP_URING_CMD,
 	IORING_OP_SEND_ZC,
 	IORING_OP_SENDMSG_ZC,
+	IORING_OP_WAITID,
 
 	/* this goes last, obviously */
 	IORING_OP_LAST,
diff --git a/io_uring/Makefile b/io_uring/Makefile
index 8cc8e5387a75..7bd64e442567 100644
--- a/io_uring/Makefile
+++ b/io_uring/Makefile
@@ -7,5 +7,6 @@ obj-$(CONFIG_IO_URING)		+= io_uring.o xattr.o nop.o fs.o splice.o \
 					openclose.o uring_cmd.o epoll.o \
 					statx.o net.o msg_ring.o timeout.o \
 					sqpoll.o fdinfo.o tctx.o poll.o \
-					cancel.o kbuf.o rsrc.o rw.o opdef.o notif.o
+					cancel.o kbuf.o rsrc.o rw.o opdef.o \
+					notif.o waitid.o
 obj-$(CONFIG_IO_WQ)		+= io-wq.o
diff --git a/io_uring/cancel.c b/io_uring/cancel.c
index 7b23607cf4af..eb77a51c5a79 100644
--- a/io_uring/cancel.c
+++ b/io_uring/cancel.c
@@ -15,6 +15,7 @@
 #include "tctx.h"
 #include "poll.h"
 #include "timeout.h"
+#include "waitid.h"
 #include "cancel.h"
 
 struct io_cancel {
@@ -119,6 +120,10 @@ int io_try_cancel(struct io_uring_task *tctx, struct io_cancel_data *cd,
 	if (ret != -ENOENT)
 		return ret;
 
+	ret = io_waitid_cancel(ctx, cd, issue_flags);
+	if (ret != -ENOENT)
+		return ret;
+
 	spin_lock(&ctx->completion_lock);
 	if (!(cd->flags & IORING_ASYNC_CANCEL_FD))
 		ret = io_timeout_cancel(ctx, cd);
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 783ed0fff71b..2dff4772bf14 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -92,6 +92,7 @@
 #include "cancel.h"
 #include "net.h"
 #include "notif.h"
+#include "waitid.h"
 
 #include "timeout.h"
 #include "poll.h"
@@ -348,6 +349,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 	INIT_LIST_HEAD(&ctx->tctx_list);
 	ctx->submit_state.free_list.next = NULL;
 	INIT_WQ_LIST(&ctx->locked_free_list);
+	INIT_HLIST_HEAD(&ctx->waitid_list);
 	INIT_DELAYED_WORK(&ctx->fallback_work, io_fallback_req_func);
 	INIT_WQ_LIST(&ctx->submit_state.compl_reqs);
 	return ctx;
@@ -3303,6 +3305,7 @@ static __cold bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
 	ret |= io_cancel_defer_files(ctx, task, cancel_all);
 	mutex_lock(&ctx->uring_lock);
 	ret |= io_poll_remove_all(ctx, task, cancel_all);
+	ret |= io_waitid_remove_all(ctx, task, cancel_all);
 	mutex_unlock(&ctx->uring_lock);
 	ret |= io_kill_timeouts(ctx, task, cancel_all);
 	if (task)
diff --git a/io_uring/opdef.c b/io_uring/opdef.c
index 3b9c6489b8b6..84e55b325d21 100644
--- a/io_uring/opdef.c
+++ b/io_uring/opdef.c
@@ -33,6 +33,7 @@
 #include "poll.h"
 #include "cancel.h"
 #include "rw.h"
+#include "waitid.h"
 
 static int io_no_issue(struct io_kiocb *req, unsigned int issue_flags)
 {
@@ -428,9 +429,12 @@ const struct io_issue_def io_issue_defs[] = {
 		.prep			= io_eopnotsupp_prep,
 #endif
 	},
+	[IORING_OP_WAITID] = {
+		.prep			= io_waitid_prep,
+		.issue			= io_waitid,
+	},
 };
 
-
 const struct io_cold_def io_cold_defs[] = {
 	[IORING_OP_NOP] = {
 		.name			= "NOP",
@@ -648,6 +652,10 @@ const struct io_cold_def io_cold_defs[] = {
 		.fail			= io_sendrecv_fail,
 #endif
 	},
+	[IORING_OP_WAITID] = {
+		.name			= "WAITID",
+		.async_size		= sizeof(struct io_waitid_async),
+	},
 };
 
 const char *io_uring_get_opcode(u8 opcode)
diff --git a/io_uring/waitid.c b/io_uring/waitid.c
new file mode 100644
index 000000000000..6f851978606d
--- /dev/null
+++ b/io_uring/waitid.c
@@ -0,0 +1,372 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Support for async notification of waitid
+ */
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/compat.h>
+#include <linux/io_uring.h>
+
+#include <uapi/linux/io_uring.h>
+
+#include "io_uring.h"
+#include "cancel.h"
+#include "waitid.h"
+#include "../kernel/exit.h"
+
+static void io_waitid_cb(struct io_kiocb *req, struct io_tw_state *ts);
+
+#define IO_WAITID_CANCEL_FLAG	BIT(31)
+#define IO_WAITID_REF_MASK	GENMASK(30, 0)
+
+struct io_waitid {
+	struct file *file;
+	int which;
+	pid_t upid;
+	int options;
+	atomic_t refs;
+	struct wait_queue_head *head;
+	struct siginfo __user *infop;
+	struct waitid_info info;
+};
+
+static void io_waitid_free(struct io_kiocb *req)
+{
+	struct io_waitid_async *iwa = req->async_data;
+
+	put_pid(iwa->wo.wo_pid);
+	kfree(req->async_data);
+	req->async_data = NULL;
+	req->flags &= ~REQ_F_ASYNC_DATA;
+}
+
+#ifdef CONFIG_COMPAT
+static bool io_waitid_compat_copy_si(struct io_waitid *iw, int signo)
+{
+	struct compat_siginfo __user *infop;
+	bool ret;
+
+	infop = (struct compat_siginfo __user *) iw->infop;
+
+	if (!user_write_access_begin(infop, sizeof(*infop)))
+		return false;
+
+	unsafe_put_user(signo, &infop->si_signo, Efault);
+	unsafe_put_user(0, &infop->si_errno, Efault);
+	unsafe_put_user(iw->info.cause, &infop->si_code, Efault);
+	unsafe_put_user(iw->info.pid, &infop->si_pid, Efault);
+	unsafe_put_user(iw->info.uid, &infop->si_uid, Efault);
+	unsafe_put_user(iw->info.status, &infop->si_status, Efault);
+	ret = true;
+done:
+	user_write_access_end();
+	return ret;
+Efault:
+	ret = false;
+	goto done;
+}
+#endif
+
+static bool io_waitid_copy_si(struct io_kiocb *req, int signo)
+{
+	struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid);
+	bool ret;
+
+	if (!iw->infop)
+		return true;
+
+#ifdef CONFIG_COMPAT
+	if (req->ctx->compat)
+		return io_waitid_compat_copy_si(iw, signo);
+#endif
+
+	if (!user_write_access_begin(iw->infop, sizeof(*iw->infop)))
+		return false;
+
+	unsafe_put_user(signo, &iw->infop->si_signo, Efault);
+	unsafe_put_user(0, &iw->infop->si_errno, Efault);
+	unsafe_put_user(iw->info.cause, &iw->infop->si_code, Efault);
+	unsafe_put_user(iw->info.pid, &iw->infop->si_pid, Efault);
+	unsafe_put_user(iw->info.uid, &iw->infop->si_uid, Efault);
+	unsafe_put_user(iw->info.status, &iw->infop->si_status, Efault);
+	ret = true;
+done:
+	user_write_access_end();
+	return ret;
+Efault:
+	ret = false;
+	goto done;
+}
+
+static int io_waitid_finish(struct io_kiocb *req, int ret)
+{
+	int signo = 0;
+
+	if (ret > 0) {
+		signo = SIGCHLD;
+		ret = 0;
+	}
+
+	if (!io_waitid_copy_si(req, signo))
+		ret = -EFAULT;
+	io_waitid_free(req);
+	return ret;
+}
+
+static void io_waitid_complete(struct io_kiocb *req, int ret)
+{
+	struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid);
+	struct io_tw_state ts = { .locked = true };
+
+	/* anyone completing better be holding a reference */
+	WARN_ON_ONCE(!(atomic_read(&iw->refs) & IO_WAITID_REF_MASK));
+
+	lockdep_assert_held(&req->ctx->uring_lock);
+
+	/*
+	 * Did cancel find it meanwhile?
+	 */
+	if (hlist_unhashed(&req->hash_node))
+		return;
+
+	hlist_del_init(&req->hash_node);
+
+	ret = io_waitid_finish(req, ret);
+	if (ret < 0)
+		req_set_fail(req);
+	io_req_set_res(req, ret, 0);
+	io_req_task_complete(req, &ts);
+}
+
+static bool __io_waitid_cancel(struct io_ring_ctx *ctx, struct io_kiocb *req)
+{
+	struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid);
+	struct io_waitid_async *iwa = req->async_data;
+
+	/*
+	 * Mark us canceled regardless of ownership. This will prevent a
+	 * potential retry from a spurious wakeup.
+	 */
+	atomic_or(IO_WAITID_CANCEL_FLAG, &iw->refs);
+
+	/* claim ownership */
+	if (atomic_fetch_inc(&iw->refs) & IO_WAITID_REF_MASK)
+		return false;
+
+	spin_lock_irq(&iw->head->lock);
+	list_del_init(&iwa->wo.child_wait.entry);
+	spin_unlock_irq(&iw->head->lock);
+	io_waitid_complete(req, -ECANCELED);
+	return true;
+}
+
+int io_waitid_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd,
+		     unsigned int issue_flags)
+{
+	struct hlist_node *tmp;
+	struct io_kiocb *req;
+	int nr = 0;
+
+	if (cd->flags & (IORING_ASYNC_CANCEL_FD|IORING_ASYNC_CANCEL_FD_FIXED))
+		return -ENOENT;
+
+	io_ring_submit_lock(ctx, issue_flags);
+	hlist_for_each_entry_safe(req, tmp, &ctx->waitid_list, hash_node) {
+		if (req->cqe.user_data != cd->data &&
+		    !(cd->flags & IORING_ASYNC_CANCEL_ANY))
+			continue;
+		if (__io_waitid_cancel(ctx, req))
+			nr++;
+		if (!(cd->flags & IORING_ASYNC_CANCEL_ALL))
+			break;
+	}
+	io_ring_submit_unlock(ctx, issue_flags);
+
+	if (nr)
+		return nr;
+
+	return -ENOENT;
+}
+
+bool io_waitid_remove_all(struct io_ring_ctx *ctx, struct task_struct *task,
+			  bool cancel_all)
+{
+	struct hlist_node *tmp;
+	struct io_kiocb *req;
+	bool found = false;
+
+	lockdep_assert_held(&ctx->uring_lock);
+
+	hlist_for_each_entry_safe(req, tmp, &ctx->waitid_list, hash_node) {
+		if (!io_match_task_safe(req, task, cancel_all))
+			continue;
+		__io_waitid_cancel(ctx, req);
+		found = true;
+	}
+
+	return found;
+}
+
+static inline bool io_waitid_drop_issue_ref(struct io_kiocb *req)
+{
+	struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid);
+	struct io_waitid_async *iwa = req->async_data;
+
+	if (!atomic_sub_return(1, &iw->refs))
+		return false;
+
+	/*
+	 * Wakeup triggered, racing with us. It was prevented from
+	 * completing because of that, queue up the tw to do that.
+	 */
+	req->io_task_work.func = io_waitid_cb;
+	io_req_task_work_add(req);
+	remove_wait_queue(iw->head, &iwa->wo.child_wait);
+	return true;
+}
+
+static void io_waitid_cb(struct io_kiocb *req, struct io_tw_state *ts)
+{
+	struct io_waitid_async *iwa = req->async_data;
+	struct io_ring_ctx *ctx = req->ctx;
+	int ret;
+
+	io_tw_lock(ctx, ts);
+
+	ret = __do_wait(&iwa->wo);
+
+	/*
+	 * If we get -ERESTARTSYS here, we need to re-arm and check again
+	 * to ensure we get another callback. If the retry works, then we can
+	 * just remove ourselves from the waitqueue again and finish the
+	 * request.
+	 */
+	if (unlikely(ret == -ERESTARTSYS)) {
+		struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid);
+
+		/* Don't retry if cancel found it meanwhile */
+		ret = -ECANCELED;
+		if (!(atomic_read(&iw->refs) & IO_WAITID_CANCEL_FLAG)) {
+			iw->head = &current->signal->wait_chldexit;
+			add_wait_queue(iw->head, &iwa->wo.child_wait);
+			ret = __do_wait(&iwa->wo);
+			if (ret == -ERESTARTSYS) {
+				/* retry armed, drop our ref */
+				io_waitid_drop_issue_ref(req);
+				return;
+			}
+
+			remove_wait_queue(iw->head, &iwa->wo.child_wait);
+		}
+	}
+
+	io_waitid_complete(req, ret);
+}
+
+static int io_waitid_wait(struct wait_queue_entry *wait, unsigned mode,
+			  int sync, void *key)
+{
+	struct wait_opts *wo = container_of(wait, struct wait_opts, child_wait);
+	struct io_waitid_async *iwa = container_of(wo, struct io_waitid_async, wo);
+	struct io_kiocb *req = iwa->req;
+	struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid);
+	struct task_struct *p = key;
+
+	if (!pid_child_should_wake(wo, p))
+		return 0;
+
+	/* cancel is in progress */
+	if (atomic_fetch_inc(&iw->refs) & IO_WAITID_REF_MASK)
+		return 1;
+
+	req->io_task_work.func = io_waitid_cb;
+	io_req_task_work_add(req);
+	list_del_init(&wait->entry);
+	return 1;
+}
+
+int io_waitid_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+	struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid);
+
+	if (sqe->addr || sqe->buf_index || sqe->addr3 || sqe->waitid_flags)
+		return -EINVAL;
+
+	iw->which = READ_ONCE(sqe->len);
+	iw->upid = READ_ONCE(sqe->fd);
+	iw->options = READ_ONCE(sqe->file_index);
+	iw->infop = u64_to_user_ptr(READ_ONCE(sqe->addr2));
+	return 0;
+}
+
+int io_waitid(struct io_kiocb *req, unsigned int issue_flags)
+{
+	struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid);
+	struct io_ring_ctx *ctx = req->ctx;
+	struct io_waitid_async *iwa;
+	int ret;
+
+	if (io_alloc_async_data(req))
+		return -ENOMEM;
+
+	iwa = req->async_data;
+	iwa->req = req;
+
+	ret = kernel_waitid_prepare(&iwa->wo, iw->which, iw->upid, &iw->info,
+					iw->options, NULL);
+	if (ret)
+		goto done;
+
+	/*
+	 * Mark the request as busy upfront, in case we're racing with the
+	 * wakeup. If we are, then we'll notice when we drop this initial
+	 * reference again after arming.
+	 */
+	atomic_set(&iw->refs, 1);
+
+	/*
+	 * Cancel must hold the ctx lock, so there's no risk of cancelation
+	 * finding us until a) we remain on the list, and b) the lock is
+	 * dropped. We only need to worry about racing with the wakeup
+	 * callback.
+	 */
+	io_ring_submit_lock(ctx, issue_flags);
+	hlist_add_head(&req->hash_node, &ctx->waitid_list);
+
+	init_waitqueue_func_entry(&iwa->wo.child_wait, io_waitid_wait);
+	iwa->wo.child_wait.private = req->task;
+	iw->head = &current->signal->wait_chldexit;
+	add_wait_queue(iw->head, &iwa->wo.child_wait);
+
+	ret = __do_wait(&iwa->wo);
+	if (ret == -ERESTARTSYS) {
+		/*
+		 * Nobody else grabbed a reference, it'll complete when we get
+		 * a waitqueue callback, or if someone cancels it.
+		 */
+		if (!io_waitid_drop_issue_ref(req)) {
+			io_ring_submit_unlock(ctx, issue_flags);
+			return IOU_ISSUE_SKIP_COMPLETE;
+		}
+
+		/*
+		 * Wakeup triggered, racing with us. It was prevented from
+		 * completing because of that, queue up the tw to do that.
+		 */
+		io_ring_submit_unlock(ctx, issue_flags);
+		return IOU_ISSUE_SKIP_COMPLETE;
+	}
+
+	hlist_del_init(&req->hash_node);
+	remove_wait_queue(iw->head, &iwa->wo.child_wait);
+	ret = io_waitid_finish(req, ret);
+
+	io_ring_submit_unlock(ctx, issue_flags);
+done:
+	if (ret < 0)
+		req_set_fail(req);
+	io_req_set_res(req, ret, 0);
+	return IOU_OK;
+}
diff --git a/io_uring/waitid.h b/io_uring/waitid.h
new file mode 100644
index 000000000000..956a8adafe8c
--- /dev/null
+++ b/io_uring/waitid.h
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "../kernel/exit.h"
+
+struct io_waitid_async {
+	struct io_kiocb *req;
+	struct wait_opts wo;
+};
+
+int io_waitid_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
+int io_waitid(struct io_kiocb *req, unsigned int issue_flags);
+int io_waitid_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd,
+		     unsigned int issue_flags);
+bool io_waitid_remove_all(struct io_ring_ctx *ctx, struct task_struct *task,
+			  bool cancel_all);
-- 
2.40.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2023-09-09 16:25 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-08-02 23:14 [PATCHSET v2] Add io_uring support for waitid Jens Axboe
2023-08-02 23:14 ` [PATCH 1/5] exit: abtract out should_wake helper for child_wait_callback() Jens Axboe
2023-08-02 23:14 ` [PATCH 2/5] exit: move core of do_wait() into helper Jens Axboe
2023-08-02 23:14 ` [PATCH 3/5] exit: add kernel_waitid_prepare() helper Jens Axboe
2023-08-02 23:14 ` [PATCH 4/5] exit: add internal include file with helpers Jens Axboe
2023-08-02 23:14 ` [PATCH 5/5] io_uring: add IORING_OP_WAITID support Jens Axboe
2023-08-09 11:27   ` Christian Brauner
2023-08-09 15:11     ` Jens Axboe
  -- strict thread matches above, loose matches on Subject: below --
2023-09-09 15:11 [PATCHSET v4 0/5] Add io_uring support for waitid Jens Axboe
2023-09-09 15:11 ` [PATCH 5/5] io_uring: add IORING_OP_WAITID support Jens Axboe
2023-08-11 14:16 [PATCHSET v3 0/5] Add io_uring support for waitid Jens Axboe
2023-08-11 14:16 ` [PATCH 5/5] io_uring: add IORING_OP_WAITID support Jens Axboe
2023-07-11 20:43 [PATCHSET 0/5] Add io_uring support for waitid Jens Axboe
2023-07-11 20:43 ` [PATCH 5/5] io_uring: add IORING_OP_WAITID support Jens Axboe
2023-07-11 21:11   ` Arnd Bergmann
2023-07-11 21:22     ` Jens Axboe
2023-07-11 22:18       ` Jens Axboe
2023-07-14 15:47         ` Christian Brauner
2023-07-14 18:33           ` Arnd Bergmann
2023-07-14 20:14             ` Jens Axboe
2023-07-15  7:12               ` Arnd Bergmann
2023-07-15 14:06                 ` Jens Axboe
2023-07-15 14:34                   ` Jens Axboe
2023-07-15 20:23                   ` Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox