public inbox for [email protected]
 help / color / mirror / Atom feed
* [PATCHSET 0/4] Add support for shared io-wq backends
@ 2020-01-23 23:16 Jens Axboe
  2020-01-23 23:16 ` [PATCH 1/4] io-wq: make the io_wq ref counted Jens Axboe
                   ` (6 more replies)
  0 siblings, 7 replies; 36+ messages in thread
From: Jens Axboe @ 2020-01-23 23:16 UTC (permalink / raw)
  To: io-uring

Sometimes an applications wants to use multiple smaller rings, because
it's more efficient than sharing a ring. The downside of that is that
we'll create the io-wq backend separately for all of them, while they
would be perfectly happy just sharing that.

This patchset adds support for that. io_uring_params grows an 'id' field,
which denotes an identifier for the async backend. If an application
wants to utilize sharing, it'll simply grab the id from the first ring
created, and pass it in to the next one and set IORING_SETUP_SHARED. This
allows efficient sharing of backend resources, while allowing multiple
rings in the application or library.

Not a huge fan of the IORING_SETUP_SHARED name, we should probably make
that better (I'm taking suggestions).

-- 
Jens Axboe



^ permalink raw reply	[flat|nested] 36+ messages in thread

* [PATCH 1/4] io-wq: make the io_wq ref counted
  2020-01-23 23:16 [PATCHSET 0/4] Add support for shared io-wq backends Jens Axboe
@ 2020-01-23 23:16 ` Jens Axboe
  2020-01-23 23:16 ` [PATCH 2/4] io-wq: add 'id' to io_wq Jens Axboe
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 36+ messages in thread
From: Jens Axboe @ 2020-01-23 23:16 UTC (permalink / raw)
  To: io-uring; +Cc: Jens Axboe

In preparation for sharing an io-wq across different users, add a
reference count that manages destruction of it.

Signed-off-by: Jens Axboe <[email protected]>
---
 fs/io-wq.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/fs/io-wq.c b/fs/io-wq.c
index 4d902c19ee5f..54e270ae12ab 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -113,6 +113,8 @@ struct io_wq {
 	struct mm_struct *mm;
 	refcount_t refs;
 	struct completion done;
+
+	refcount_t use_refs;
 };
 
 static bool io_worker_get(struct io_worker *worker)
@@ -1073,6 +1075,7 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
 			ret = -ENOMEM;
 			goto err;
 		}
+		refcount_set(&wq->use_refs, 1);
 		reinit_completion(&wq->done);
 		return wq;
 	}
@@ -1093,7 +1096,7 @@ static bool io_wq_worker_wake(struct io_worker *worker, void *data)
 	return false;
 }
 
-void io_wq_destroy(struct io_wq *wq)
+static void __io_wq_destroy(struct io_wq *wq)
 {
 	int node;
 
@@ -1113,3 +1116,9 @@ void io_wq_destroy(struct io_wq *wq)
 	kfree(wq->wqes);
 	kfree(wq);
 }
+
+void io_wq_destroy(struct io_wq *wq)
+{
+	if (refcount_dec_and_test(&wq->use_refs))
+		__io_wq_destroy(wq);
+}
-- 
2.25.0


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 2/4] io-wq: add 'id' to io_wq
  2020-01-23 23:16 [PATCHSET 0/4] Add support for shared io-wq backends Jens Axboe
  2020-01-23 23:16 ` [PATCH 1/4] io-wq: make the io_wq ref counted Jens Axboe
@ 2020-01-23 23:16 ` Jens Axboe
  2020-01-23 23:16 ` [PATCH 3/4] io-wq: allow lookup of existing io_wq with given id Jens Axboe
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 36+ messages in thread
From: Jens Axboe @ 2020-01-23 23:16 UTC (permalink / raw)
  To: io-uring; +Cc: Jens Axboe

Add each io_wq to a global list, and assign an id for each of them. This
is in preparation for attaching to an existing io_wq, rather than
creating a new one.

Signed-off-by: Jens Axboe <[email protected]>
---
 fs/io-wq.c | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/fs/io-wq.c b/fs/io-wq.c
index 54e270ae12ab..8cb0cff5f15c 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -16,9 +16,14 @@
 #include <linux/slab.h>
 #include <linux/kthread.h>
 #include <linux/rculist_nulls.h>
+#include <linux/idr.h>
 
 #include "io-wq.h"
 
+static LIST_HEAD(wq_list);
+static DEFINE_MUTEX(wq_lock);
+static DEFINE_IDA(wq_ida);
+
 #define WORKER_IDLE_TIMEOUT	(5 * HZ)
 
 enum {
@@ -115,6 +120,8 @@ struct io_wq {
 	struct completion done;
 
 	refcount_t use_refs;
+	struct list_head wq_list;
+	unsigned int id;
 };
 
 static bool io_worker_get(struct io_worker *worker)
@@ -1019,7 +1026,7 @@ void io_wq_flush(struct io_wq *wq)
 
 struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
 {
-	int ret = -ENOMEM, node;
+	int ret = -ENOMEM, node, id;
 	struct io_wq *wq;
 
 	wq = kzalloc(sizeof(*wq), GFP_KERNEL);
@@ -1076,6 +1083,16 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
 			goto err;
 		}
 		refcount_set(&wq->use_refs, 1);
+
+		id = ida_simple_get(&wq_ida, 1, INT_MAX, GFP_KERNEL);
+		if (id == -ENOSPC) {
+			ret = -ENOMEM;
+			goto err;
+		}
+		mutex_lock(&wq_lock);
+		wq->id = id;
+		list_add(&wq->wq_list, &wq_list);
+		mutex_unlock(&wq_lock);
 		reinit_completion(&wq->done);
 		return wq;
 	}
@@ -1119,6 +1136,12 @@ static void __io_wq_destroy(struct io_wq *wq)
 
 void io_wq_destroy(struct io_wq *wq)
 {
-	if (refcount_dec_and_test(&wq->use_refs))
+	if (refcount_dec_and_test(&wq->use_refs)) {
+		mutex_lock(&wq_lock);
+		ida_simple_remove(&wq_ida, wq->id);
+		list_del(&wq->wq_list);
+		mutex_unlock(&wq_lock);
+
 		__io_wq_destroy(wq);
+	}
 }
-- 
2.25.0


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 3/4] io-wq: allow lookup of existing io_wq with given id
  2020-01-23 23:16 [PATCHSET 0/4] Add support for shared io-wq backends Jens Axboe
  2020-01-23 23:16 ` [PATCH 1/4] io-wq: make the io_wq ref counted Jens Axboe
  2020-01-23 23:16 ` [PATCH 2/4] io-wq: add 'id' to io_wq Jens Axboe
@ 2020-01-23 23:16 ` Jens Axboe
  2020-01-24  9:54   ` Stefan Metzmacher
  2020-01-23 23:16 ` [PATCH 4/4] io_uring: add support for sharing kernel io-wq workqueue Jens Axboe
                   ` (3 subsequent siblings)
  6 siblings, 1 reply; 36+ messages in thread
From: Jens Axboe @ 2020-01-23 23:16 UTC (permalink / raw)
  To: io-uring; +Cc: Jens Axboe

If the id and user/creds match, return an existing io_wq if we can safely
grab a reference to it.

Signed-off-by: Jens Axboe <[email protected]>
---
 fs/io-wq.c | 42 +++++++++++++++++++++++++++++++++++++++++-
 fs/io-wq.h |  3 +++
 2 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/fs/io-wq.c b/fs/io-wq.c
index 8cb0cff5f15c..a9985856033d 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -1024,7 +1024,7 @@ void io_wq_flush(struct io_wq *wq)
 	}
 }
 
-struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
+static struct io_wq *__io_wq_create(unsigned bounded, struct io_wq_data *data)
 {
 	int ret = -ENOMEM, node, id;
 	struct io_wq *wq;
@@ -1107,6 +1107,41 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
 	return ERR_PTR(ret);
 }
 
+struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
+{
+	return __io_wq_create(bounded, data);
+}
+
+/*
+ * Find and return io_wq with given id and grab a reference to it.
+ */
+struct io_wq *io_wq_create_id(unsigned bounded, struct io_wq_data *data,
+			      unsigned int id)
+{
+	struct io_wq *wq, *ret = NULL;
+
+	mutex_lock(&wq_lock);
+	list_for_each_entry(wq, &wq_list, wq_list) {
+		if (id != wq->id)
+			continue;
+		if (data->creds != wq->creds || data->user != wq->user)
+			continue;
+		if (data->get_work != wq->get_work ||
+		    data->put_work != wq->put_work)
+			continue;
+		if (!refcount_inc_not_zero(&wq->use_refs))
+			continue;
+		ret = wq;
+		break;
+	}
+	mutex_unlock(&wq_lock);
+
+	if (!ret)
+		ret = io_wq_create(bounded, data);
+
+	return ret;
+}
+
 static bool io_wq_worker_wake(struct io_worker *worker, void *data)
 {
 	wake_up_process(worker->task);
@@ -1145,3 +1180,8 @@ void io_wq_destroy(struct io_wq *wq)
 		__io_wq_destroy(wq);
 	}
 }
+
+unsigned int io_wq_id(struct io_wq *wq)
+{
+	return wq->id;
+}
diff --git a/fs/io-wq.h b/fs/io-wq.h
index 1cd039af8813..7abe2c56b535 100644
--- a/fs/io-wq.h
+++ b/fs/io-wq.h
@@ -98,7 +98,10 @@ struct io_wq_data {
 };
 
 struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data);
+struct io_wq *io_wq_create_id(unsigned bounded, struct io_wq_data *data,
+				unsigned int id);
 void io_wq_destroy(struct io_wq *wq);
+unsigned int io_wq_id(struct io_wq *wq);
 
 void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work);
 void io_wq_enqueue_hashed(struct io_wq *wq, struct io_wq_work *work, void *val);
-- 
2.25.0


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 4/4] io_uring: add support for sharing kernel io-wq workqueue
  2020-01-23 23:16 [PATCHSET 0/4] Add support for shared io-wq backends Jens Axboe
                   ` (2 preceding siblings ...)
  2020-01-23 23:16 ` [PATCH 3/4] io-wq: allow lookup of existing io_wq with given id Jens Axboe
@ 2020-01-23 23:16 ` Jens Axboe
  2020-01-24  9:51 ` [PATCHSET 0/4] Add support for shared io-wq backends Stefan Metzmacher
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 36+ messages in thread
From: Jens Axboe @ 2020-01-23 23:16 UTC (permalink / raw)
  To: io-uring; +Cc: Jens Axboe

An id field is added to io_uring_params, which always returns the ID of
the io-wq backend that is associated with an io_uring context. If an 'id'
is provided and IORING_SETUP_SHARED is set in the creation flags, then
we attempt to attach to an existing io-wq instead of setting up a new one.

This allows creation of "sibling" io_urings, where we prefer to keep the
SQ/CQ private, but want to share the async backend to minimize the amount
of overhead associated with having multiple rings that belong to the same
backend.

Signed-off-by: Jens Axboe <[email protected]>
---
 fs/io_uring.c                 | 9 +++++++--
 include/uapi/linux/io_uring.h | 4 +++-
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 25f29ef81698..857cdf3f5ff6 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -5796,13 +5796,18 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx,
 
 	/* Do QD, or 4 * CPUS, whatever is smallest */
 	concurrency = min(ctx->sq_entries, 4 * num_online_cpus());
-	ctx->io_wq = io_wq_create(concurrency, &data);
+
+	if (ctx->flags & IORING_SETUP_SHARED)
+		ctx->io_wq = io_wq_create_id(concurrency, &data, p->id);
+	else
+		ctx->io_wq = io_wq_create(concurrency, &data);
 	if (IS_ERR(ctx->io_wq)) {
 		ret = PTR_ERR(ctx->io_wq);
 		ctx->io_wq = NULL;
 		goto err;
 	}
 
+	p->id = io_wq_id(ctx->io_wq);
 	return 0;
 err:
 	io_finish_async(ctx);
@@ -6626,7 +6631,7 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
 
 	if (p.flags & ~(IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL |
 			IORING_SETUP_SQ_AFF | IORING_SETUP_CQSIZE |
-			IORING_SETUP_CLAMP))
+			IORING_SETUP_CLAMP | IORING_SETUP_SHARED))
 		return -EINVAL;
 
 	ret = io_uring_create(entries, &p);
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index cffa6fd33827..c74681d30e92 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -75,6 +75,7 @@ enum {
 #define IORING_SETUP_SQ_AFF	(1U << 2)	/* sq_thread_cpu is valid */
 #define IORING_SETUP_CQSIZE	(1U << 3)	/* app defines CQ size */
 #define IORING_SETUP_CLAMP	(1U << 4)	/* clamp SQ/CQ ring sizes */
+#define IORING_SETUP_SHARED	(1U << 5)	/* share workqueue */
 
 enum {
 	IORING_OP_NOP,
@@ -184,7 +185,8 @@ struct io_uring_params {
 	__u32 sq_thread_cpu;
 	__u32 sq_thread_idle;
 	__u32 features;
-	__u32 resv[4];
+	__u32 id;
+	__u32 resv[3];
 	struct io_sqring_offsets sq_off;
 	struct io_cqring_offsets cq_off;
 };
-- 
2.25.0


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* Re: [PATCHSET 0/4] Add support for shared io-wq backends
  2020-01-23 23:16 [PATCHSET 0/4] Add support for shared io-wq backends Jens Axboe
                   ` (3 preceding siblings ...)
  2020-01-23 23:16 ` [PATCH 4/4] io_uring: add support for sharing kernel io-wq workqueue Jens Axboe
@ 2020-01-24  9:51 ` Stefan Metzmacher
  2020-01-24 16:43   ` Jens Axboe
  2020-01-24 20:34 ` Pavel Begunkov
  2020-01-26  1:51 ` Daurnimator
  6 siblings, 1 reply; 36+ messages in thread
From: Stefan Metzmacher @ 2020-01-24  9:51 UTC (permalink / raw)
  To: Jens Axboe, io-uring


[-- Attachment #1.1: Type: text/plain, Size: 1033 bytes --]

Hi Jens,

> Sometimes an applications wants to use multiple smaller rings, because
> it's more efficient than sharing a ring. The downside of that is that
> we'll create the io-wq backend separately for all of them, while they
> would be perfectly happy just sharing that.
> 
> This patchset adds support for that. io_uring_params grows an 'id' field,
> which denotes an identifier for the async backend. If an application
> wants to utilize sharing, it'll simply grab the id from the first ring
> created, and pass it in to the next one and set IORING_SETUP_SHARED. This
> allows efficient sharing of backend resources, while allowing multiple
> rings in the application or library.

But still all rings need to use the same creds, correct?

> Not a huge fan of the IORING_SETUP_SHARED name, we should probably make
> that better (I'm taking suggestions).

The flag is supposed to be used for the new ring that attached to
the existing workqueue, correct?

What about IORING_ATTACH_TO_WORKQUEUE?

metze



[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 3/4] io-wq: allow lookup of existing io_wq with given id
  2020-01-23 23:16 ` [PATCH 3/4] io-wq: allow lookup of existing io_wq with given id Jens Axboe
@ 2020-01-24  9:54   ` Stefan Metzmacher
  2020-01-24 16:41     ` Jens Axboe
  0 siblings, 1 reply; 36+ messages in thread
From: Stefan Metzmacher @ 2020-01-24  9:54 UTC (permalink / raw)
  To: Jens Axboe, io-uring


[-- Attachment #1.1: Type: text/plain, Size: 803 bytes --]

> +/*
> + * Find and return io_wq with given id and grab a reference to it.
> + */
> +struct io_wq *io_wq_create_id(unsigned bounded, struct io_wq_data *data,
> +			      unsigned int id)
> +{
> +	struct io_wq *wq, *ret = NULL;
> +
> +	mutex_lock(&wq_lock);
> +	list_for_each_entry(wq, &wq_list, wq_list) {
> +		if (id != wq->id)
> +			continue;
> +		if (data->creds != wq->creds || data->user != wq->user)
> +			continue;
> +		if (data->get_work != wq->get_work ||
> +		    data->put_work != wq->put_work)
> +			continue;
> +		if (!refcount_inc_not_zero(&wq->use_refs))
> +			continue;
> +		ret = wq;
> +		break;
> +	}
> +	mutex_unlock(&wq_lock);

Isn't there a more efficient ida_find function in order to avoid
the loop, which won't really scale in the long run.

metze


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 3/4] io-wq: allow lookup of existing io_wq with given id
  2020-01-24  9:54   ` Stefan Metzmacher
@ 2020-01-24 16:41     ` Jens Axboe
  0 siblings, 0 replies; 36+ messages in thread
From: Jens Axboe @ 2020-01-24 16:41 UTC (permalink / raw)
  To: Stefan Metzmacher, io-uring

On 1/24/20 2:54 AM, Stefan Metzmacher wrote:
>> +/*
>> + * Find and return io_wq with given id and grab a reference to it.
>> + */
>> +struct io_wq *io_wq_create_id(unsigned bounded, struct io_wq_data *data,
>> +			      unsigned int id)
>> +{
>> +	struct io_wq *wq, *ret = NULL;
>> +
>> +	mutex_lock(&wq_lock);
>> +	list_for_each_entry(wq, &wq_list, wq_list) {
>> +		if (id != wq->id)
>> +			continue;
>> +		if (data->creds != wq->creds || data->user != wq->user)
>> +			continue;
>> +		if (data->get_work != wq->get_work ||
>> +		    data->put_work != wq->put_work)
>> +			continue;
>> +		if (!refcount_inc_not_zero(&wq->use_refs))
>> +			continue;
>> +		ret = wq;
>> +		break;
>> +	}
>> +	mutex_unlock(&wq_lock);
> 
> Isn't there a more efficient ida_find function in order to avoid
> the loop, which won't really scale in the long run.

Yeah that would be a lot better - I initially just used a sequence
for this, but since I'm now using ida, might as well use it for
lookup as well. I'll make the change.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCHSET 0/4] Add support for shared io-wq backends
  2020-01-24  9:51 ` [PATCHSET 0/4] Add support for shared io-wq backends Stefan Metzmacher
@ 2020-01-24 16:43   ` Jens Axboe
  2020-01-24 19:14     ` Stefan Metzmacher
  0 siblings, 1 reply; 36+ messages in thread
From: Jens Axboe @ 2020-01-24 16:43 UTC (permalink / raw)
  To: Stefan Metzmacher, io-uring

On 1/24/20 2:51 AM, Stefan Metzmacher wrote:
> Hi Jens,
> 
>> Sometimes an applications wants to use multiple smaller rings, because
>> it's more efficient than sharing a ring. The downside of that is that
>> we'll create the io-wq backend separately for all of them, while they
>> would be perfectly happy just sharing that.
>>
>> This patchset adds support for that. io_uring_params grows an 'id' field,
>> which denotes an identifier for the async backend. If an application
>> wants to utilize sharing, it'll simply grab the id from the first ring
>> created, and pass it in to the next one and set IORING_SETUP_SHARED. This
>> allows efficient sharing of backend resources, while allowing multiple
>> rings in the application or library.
> 
> But still all rings need to use the same creds, correct?

Right

>> Not a huge fan of the IORING_SETUP_SHARED name, we should probably make
>> that better (I'm taking suggestions).
> 
> The flag is supposed to be used for the new ring that attached to
> the existing workqueue, correct?

Yes - any ring setup will return an id in the io_uring_params, attaching
to an existing one is done by setting the flag and putting the id number
you want to attach to in there.

> What about IORING_ATTACH_TO_WORKQUEUE?

Trying to avoid something that is too tied to the internals, but workqueue
(or wq) is probably generic enough that it can be used.
IORING_SETUP_ATTACH_WQ?

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCHSET 0/4] Add support for shared io-wq backends
  2020-01-24 16:43   ` Jens Axboe
@ 2020-01-24 19:14     ` Stefan Metzmacher
  2020-01-24 21:37       ` Jens Axboe
  0 siblings, 1 reply; 36+ messages in thread
From: Stefan Metzmacher @ 2020-01-24 19:14 UTC (permalink / raw)
  To: Jens Axboe, io-uring


[-- Attachment #1.1: Type: text/plain, Size: 199 bytes --]

Hi Jens,

> Trying to avoid something that is too tied to the internals, but workqueue
> (or wq) is probably generic enough that it can be used.
> IORING_SETUP_ATTACH_WQ?

Ok :-)

metze



[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCHSET 0/4] Add support for shared io-wq backends
  2020-01-23 23:16 [PATCHSET 0/4] Add support for shared io-wq backends Jens Axboe
                   ` (4 preceding siblings ...)
  2020-01-24  9:51 ` [PATCHSET 0/4] Add support for shared io-wq backends Stefan Metzmacher
@ 2020-01-24 20:34 ` Pavel Begunkov
  2020-01-24 21:38   ` Jens Axboe
  2020-01-26  1:51 ` Daurnimator
  6 siblings, 1 reply; 36+ messages in thread
From: Pavel Begunkov @ 2020-01-24 20:34 UTC (permalink / raw)
  To: Jens Axboe, io-uring


[-- Attachment #1.1: Type: text/plain, Size: 1135 bytes --]

On 24/01/2020 02:16, Jens Axboe wrote:
> Sometimes an applications wants to use multiple smaller rings, because
> it's more efficient than sharing a ring. The downside of that is that
> we'll create the io-wq backend separately for all of them, while they
> would be perfectly happy just sharing that.
> 
> This patchset adds support for that. io_uring_params grows an 'id' field,
> which denotes an identifier for the async backend. If an application
> wants to utilize sharing, it'll simply grab the id from the first ring
> created, and pass it in to the next one and set IORING_SETUP_SHARED. This
> allows efficient sharing of backend resources, while allowing multiple
> rings in the application or library.
> 
> Not a huge fan of the IORING_SETUP_SHARED name, we should probably make
> that better (I'm taking suggestions).
> 

Took a look at the latest version (b942f31ee0 at io_uring-vfs-shared-wq).
There is an outdated commit message for the last patch mentioning renamed
IORING_SETUP_SHARED, but the code looks good to me.

Reviewed-by: Pavel Begunkov <[email protected]>

-- 
Pavel Begunkov


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 36+ messages in thread

* [PATCH 4/4] io_uring: add support for sharing kernel io-wq workqueue
  2020-01-24 21:31 [PATCHSET v2 " Jens Axboe
@ 2020-01-24 21:31 ` Jens Axboe
  2020-01-25  7:45   ` Stefan Metzmacher
  0 siblings, 1 reply; 36+ messages in thread
From: Jens Axboe @ 2020-01-24 21:31 UTC (permalink / raw)
  To: io-uring; +Cc: Jens Axboe

An id field is added to io_uring_params, which always returns the ID of
the io-wq backend that is associated with an io_uring context. If an 'id'
is provided and IORING_SETUP_SHARED is set in the creation flags, then
we attempt to attach to an existing io-wq instead of setting up a new one.

This allows creation of "sibling" io_urings, where we prefer to keep the
SQ/CQ private, but want to share the async backend to minimize the amount
of overhead associated with having multiple rings that belong to the same
backend.

Signed-off-by: Jens Axboe <[email protected]>
---
 fs/io_uring.c                 | 20 +++++++++++++++++---
 include/uapi/linux/io_uring.h |  4 +++-
 2 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 9f73586dcfb8..3dad12906db3 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -5673,7 +5673,7 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx,
 {
 	struct io_wq_data data;
 	unsigned concurrency;
-	int ret;
+	int ret, id;
 
 	init_waitqueue_head(&ctx->sqo_wait);
 	mmgrab(current->mm);
@@ -5724,13 +5724,23 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx,
 
 	/* Do QD, or 4 * CPUS, whatever is smallest */
 	concurrency = min(ctx->sq_entries, 4 * num_online_cpus());
-	ctx->io_wq = io_wq_create(concurrency, &data);
+
+	id = 0;
+	if (ctx->flags & IORING_SETUP_ATTACH_WQ) {
+		id = p->id;
+		if (!id) {
+			ret = -EINVAL;
+			goto err;
+		}
+	}
+	ctx->io_wq = io_wq_create_id(concurrency, &data, id);
 	if (IS_ERR(ctx->io_wq)) {
 		ret = PTR_ERR(ctx->io_wq);
 		ctx->io_wq = NULL;
 		goto err;
 	}
 
+	p->id = io_wq_id(ctx->io_wq);
 	return 0;
 err:
 	io_finish_async(ctx);
@@ -6554,7 +6564,11 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
 
 	if (p.flags & ~(IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL |
 			IORING_SETUP_SQ_AFF | IORING_SETUP_CQSIZE |
-			IORING_SETUP_CLAMP))
+			IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ))
+		return -EINVAL;
+
+	/* id isn't valid without ATTACH_WQ being set */
+	if (!(p.flags & IORING_SETUP_ATTACH_WQ) && p.id)
 		return -EINVAL;
 
 	ret = io_uring_create(entries, &p);
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 57d05cc5e271..f66e53c74a3d 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -75,6 +75,7 @@ enum {
 #define IORING_SETUP_SQ_AFF	(1U << 2)	/* sq_thread_cpu is valid */
 #define IORING_SETUP_CQSIZE	(1U << 3)	/* app defines CQ size */
 #define IORING_SETUP_CLAMP	(1U << 4)	/* clamp SQ/CQ ring sizes */
+#define IORING_SETUP_ATTACH_WQ	(1U << 5)	/* attach to existing wq */
 
 enum {
 	IORING_OP_NOP,
@@ -183,7 +184,8 @@ struct io_uring_params {
 	__u32 sq_thread_cpu;
 	__u32 sq_thread_idle;
 	__u32 features;
-	__u32 resv[4];
+	__u32 id;
+	__u32 resv[3];
 	struct io_sqring_offsets sq_off;
 	struct io_cqring_offsets cq_off;
 };
-- 
2.25.0


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* Re: [PATCHSET 0/4] Add support for shared io-wq backends
  2020-01-24 19:14     ` Stefan Metzmacher
@ 2020-01-24 21:37       ` Jens Axboe
  0 siblings, 0 replies; 36+ messages in thread
From: Jens Axboe @ 2020-01-24 21:37 UTC (permalink / raw)
  To: Stefan Metzmacher, io-uring

On 1/24/20 12:14 PM, Stefan Metzmacher wrote:
> Hi Jens,
> 
>> Trying to avoid something that is too tied to the internals, but workqueue
>> (or wq) is probably generic enough that it can be used.
>> IORING_SETUP_ATTACH_WQ?
> 
> Ok :-)

It has been done!

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCHSET 0/4] Add support for shared io-wq backends
  2020-01-24 20:34 ` Pavel Begunkov
@ 2020-01-24 21:38   ` Jens Axboe
  0 siblings, 0 replies; 36+ messages in thread
From: Jens Axboe @ 2020-01-24 21:38 UTC (permalink / raw)
  To: Pavel Begunkov, io-uring

On 1/24/20 1:34 PM, Pavel Begunkov wrote:
> On 24/01/2020 02:16, Jens Axboe wrote:
>> Sometimes an applications wants to use multiple smaller rings, because
>> it's more efficient than sharing a ring. The downside of that is that
>> we'll create the io-wq backend separately for all of them, while they
>> would be perfectly happy just sharing that.
>>
>> This patchset adds support for that. io_uring_params grows an 'id' field,
>> which denotes an identifier for the async backend. If an application
>> wants to utilize sharing, it'll simply grab the id from the first ring
>> created, and pass it in to the next one and set IORING_SETUP_SHARED. This
>> allows efficient sharing of backend resources, while allowing multiple
>> rings in the application or library.
>>
>> Not a huge fan of the IORING_SETUP_SHARED name, we should probably make
>> that better (I'm taking suggestions).
>>
> 
> Took a look at the latest version (b942f31ee0 at io_uring-vfs-shared-wq).
> There is an outdated commit message for the last patch mentioning renamed
> IORING_SETUP_SHARED, but the code looks good to me.
> 
> Reviewed-by: Pavel Begunkov <[email protected]>

Thanks for the review, I'll add your reviewed-by and fix up that commit
message too.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 4/4] io_uring: add support for sharing kernel io-wq workqueue
  2020-01-24 21:31 ` [PATCH 4/4] io_uring: add support for sharing kernel io-wq workqueue Jens Axboe
@ 2020-01-25  7:45   ` Stefan Metzmacher
  2020-01-25 16:44     ` Jens Axboe
  0 siblings, 1 reply; 36+ messages in thread
From: Stefan Metzmacher @ 2020-01-25  7:45 UTC (permalink / raw)
  To: Jens Axboe, io-uring


[-- Attachment #1.1: Type: text/plain, Size: 1167 bytes --]

Am 24.01.20 um 22:31 schrieb Jens Axboe:
> An id field is added to io_uring_params, which always returns the ID of
> the io-wq backend that is associated with an io_uring context. If an 'id'
> is provided and IORING_SETUP_SHARED is set in the creation flags, then
> we attempt to attach to an existing io-wq instead of setting up a new one.

Use the new name here too.

>  	ret = io_uring_create(entries, &p);
> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
> index 57d05cc5e271..f66e53c74a3d 100644
> --- a/include/uapi/linux/io_uring.h
> +++ b/include/uapi/linux/io_uring.h
> @@ -75,6 +75,7 @@ enum {
>  #define IORING_SETUP_SQ_AFF	(1U << 2)	/* sq_thread_cpu is valid */
>  #define IORING_SETUP_CQSIZE	(1U << 3)	/* app defines CQ size */
>  #define IORING_SETUP_CLAMP	(1U << 4)	/* clamp SQ/CQ ring sizes */
> +#define IORING_SETUP_ATTACH_WQ	(1U << 5)	/* attach to existing wq */
>  
>  enum {
>  	IORING_OP_NOP,
> @@ -183,7 +184,8 @@ struct io_uring_params {
>  	__u32 sq_thread_cpu;
>  	__u32 sq_thread_idle;
>  	__u32 features;
> -	__u32 resv[4];
> +	__u32 id;

I think this should be wq_id;

metze



[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 4/4] io_uring: add support for sharing kernel io-wq workqueue
  2020-01-25  7:45   ` Stefan Metzmacher
@ 2020-01-25 16:44     ` Jens Axboe
  0 siblings, 0 replies; 36+ messages in thread
From: Jens Axboe @ 2020-01-25 16:44 UTC (permalink / raw)
  To: Stefan Metzmacher, io-uring

On 1/25/20 12:45 AM, Stefan Metzmacher wrote:
> Am 24.01.20 um 22:31 schrieb Jens Axboe:
>> An id field is added to io_uring_params, which always returns the ID of
>> the io-wq backend that is associated with an io_uring context. If an 'id'
>> is provided and IORING_SETUP_SHARED is set in the creation flags, then
>> we attempt to attach to an existing io-wq instead of setting up a new one.
> 
> Use the new name here too.

Already fixed that up yesterday.

>>  	ret = io_uring_create(entries, &p);
>> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
>> index 57d05cc5e271..f66e53c74a3d 100644
>> --- a/include/uapi/linux/io_uring.h
>> +++ b/include/uapi/linux/io_uring.h
>> @@ -75,6 +75,7 @@ enum {
>>  #define IORING_SETUP_SQ_AFF	(1U << 2)	/* sq_thread_cpu is valid */
>>  #define IORING_SETUP_CQSIZE	(1U << 3)	/* app defines CQ size */
>>  #define IORING_SETUP_CLAMP	(1U << 4)	/* clamp SQ/CQ ring sizes */
>> +#define IORING_SETUP_ATTACH_WQ	(1U << 5)	/* attach to existing wq */
>>  
>>  enum {
>>  	IORING_OP_NOP,
>> @@ -183,7 +184,8 @@ struct io_uring_params {
>>  	__u32 sq_thread_cpu;
>>  	__u32 sq_thread_idle;
>>  	__u32 features;
>> -	__u32 resv[4];
>> +	__u32 id;
> 
> I think this should be wq_id;

Yeah, probably clearer, I'll make that change, thanks.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCHSET 0/4] Add support for shared io-wq backends
  2020-01-23 23:16 [PATCHSET 0/4] Add support for shared io-wq backends Jens Axboe
                   ` (5 preceding siblings ...)
  2020-01-24 20:34 ` Pavel Begunkov
@ 2020-01-26  1:51 ` Daurnimator
  2020-01-26 15:11   ` Pavel Begunkov
  6 siblings, 1 reply; 36+ messages in thread
From: Daurnimator @ 2020-01-26  1:51 UTC (permalink / raw)
  To: Jens Axboe; +Cc: io-uring

On Fri, 24 Jan 2020 at 10:16, Jens Axboe <[email protected]> wrote:
>
> Sometimes an applications wants to use multiple smaller rings, because
> it's more efficient than sharing a ring. The downside of that is that
> we'll create the io-wq backend separately for all of them, while they
> would be perfectly happy just sharing that.
>
> This patchset adds support for that. io_uring_params grows an 'id' field,
> which denotes an identifier for the async backend. If an application
> wants to utilize sharing, it'll simply grab the id from the first ring
> created, and pass it in to the next one and set IORING_SETUP_SHARED. This
> allows efficient sharing of backend resources, while allowing multiple
> rings in the application or library.
>
> Not a huge fan of the IORING_SETUP_SHARED name, we should probably make
> that better (I'm taking suggestions).

I don't love the idea of some new type of magic user<>kernel
identifier. It would be nice if the id itself was e.g. a file
descriptor

What if when creating an io_uring you could pass in an existing
io_uring file descriptor, and the new one would share the io-wq
backend?

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCHSET 0/4] Add support for shared io-wq backends
  2020-01-26  1:51 ` Daurnimator
@ 2020-01-26 15:11   ` Pavel Begunkov
  2020-01-26 17:00     ` Jens Axboe
  0 siblings, 1 reply; 36+ messages in thread
From: Pavel Begunkov @ 2020-01-26 15:11 UTC (permalink / raw)
  To: Daurnimator, Jens Axboe; +Cc: io-uring

On 1/26/2020 4:51 AM, Daurnimator wrote:
> On Fri, 24 Jan 2020 at 10:16, Jens Axboe <[email protected]> wrote:
> 
> I don't love the idea of some new type of magic user<>kernel
> identifier. It would be nice if the id itself was e.g. a file
> descriptor
> 
> What if when creating an io_uring you could pass in an existing
> io_uring file descriptor, and the new one would share the io-wq
> backend?
> 
Good idea! It can solve potential problems with jails, isolation, etc in
the future.

May we need having other shared resources and want fine-grained control
over them at some moment? It can prove helpful for the BPF plans.
E.g.

io_uring_setup(share_io-wq=ring_fd1,
               share_fds=ring_fd2,
               share_ebpf=ring_fd3, ...);

If so, it's better to have more flexible API. E.g. as follows or a
pointer to a struct with @size field.

struct io_shared_resource {
    int type;
    int fd;
};

struct io_uring_params {
    ...
    struct io_shared_resource shared[];
};

params = {
    ...
    .shared = {{ATTACH_IO_WQ, fd1}, ..., SANTINEL_ENTRY};
};

-- 
Pavel Begunkov

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCHSET 0/4] Add support for shared io-wq backends
  2020-01-26 15:11   ` Pavel Begunkov
@ 2020-01-26 17:00     ` Jens Axboe
  2020-01-27 13:29       ` Pavel Begunkov
  0 siblings, 1 reply; 36+ messages in thread
From: Jens Axboe @ 2020-01-26 17:00 UTC (permalink / raw)
  To: Pavel Begunkov, Daurnimator; +Cc: io-uring

On 1/26/20 8:11 AM, Pavel Begunkov wrote:
> On 1/26/2020 4:51 AM, Daurnimator wrote:
>> On Fri, 24 Jan 2020 at 10:16, Jens Axboe <[email protected]> wrote:
>>
>> I don't love the idea of some new type of magic user<>kernel
>> identifier. It would be nice if the id itself was e.g. a file
>> descriptor
>>
>> What if when creating an io_uring you could pass in an existing
>> io_uring file descriptor, and the new one would share the io-wq
>> backend?
>>
> Good idea! It can solve potential problems with jails, isolation, etc in
> the future.
> 
> May we need having other shared resources and want fine-grained control
> over them at some moment? It can prove helpful for the BPF plans.
> E.g.
> 
> io_uring_setup(share_io-wq=ring_fd1,
>                share_fds=ring_fd2,
>                share_ebpf=ring_fd3, ...);
> 
> If so, it's better to have more flexible API. E.g. as follows or a
> pointer to a struct with @size field.
> 
> struct io_shared_resource {
>     int type;
>     int fd;
> };
> 
> struct io_uring_params {
>     ...
>     struct io_shared_resource shared[];
> };
> 
> params = {
>     ...
>     .shared = {{ATTACH_IO_WQ, fd1}, ..., SANTINEL_ENTRY};
> };

I'm fine with changing/extending the sharing API, please send a
patch!

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCHSET 0/4] Add support for shared io-wq backends
  2020-01-26 17:00     ` Jens Axboe
@ 2020-01-27 13:29       ` Pavel Begunkov
  2020-01-27 13:39         ` Jens Axboe
  0 siblings, 1 reply; 36+ messages in thread
From: Pavel Begunkov @ 2020-01-27 13:29 UTC (permalink / raw)
  To: Jens Axboe, Daurnimator; +Cc: io-uring

On 1/26/2020 8:00 PM, Jens Axboe wrote:
> On 1/26/20 8:11 AM, Pavel Begunkov wrote:
>> On 1/26/2020 4:51 AM, Daurnimator wrote:
>>> On Fri, 24 Jan 2020 at 10:16, Jens Axboe <[email protected]> wrote:
>>>
>>> I don't love the idea of some new type of magic user<>kernel
>>> identifier. It would be nice if the id itself was e.g. a file
>>> descriptor
>>>
>>> What if when creating an io_uring you could pass in an existing
>>> io_uring file descriptor, and the new one would share the io-wq
>>> backend?
>>>
>> Good idea! It can solve potential problems with jails, isolation, etc in
>> the future.
>>
>> May we need having other shared resources and want fine-grained control
>> over them at some moment? It can prove helpful for the BPF plans.
>> E.g.
>>
>> io_uring_setup(share_io-wq=ring_fd1,
>>                share_fds=ring_fd2,
>>                share_ebpf=ring_fd3, ...);
>>
>> If so, it's better to have more flexible API. E.g. as follows or a
>> pointer to a struct with @size field.
>>
>> struct io_shared_resource {
>>     int type;
>>     int fd;
>> };
>>
>> struct io_uring_params {
>>     ...
>>     struct io_shared_resource shared[];
>> };
>>
>> params = {
>>     ...
>>     .shared = {{ATTACH_IO_WQ, fd1}, ..., SANTINEL_ENTRY};
>> };
> 
> I'm fine with changing/extending the sharing API, please send a
> patch!
> 

Ok. I can't promise it'll play handy for sharing. Though, you'll be out
of space in struct io_uring_params soon anyway.

-- 
Pavel Begunkov

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCHSET 0/4] Add support for shared io-wq backends
  2020-01-27 13:29       ` Pavel Begunkov
@ 2020-01-27 13:39         ` Jens Axboe
  2020-01-27 14:07           ` Pavel Begunkov
  0 siblings, 1 reply; 36+ messages in thread
From: Jens Axboe @ 2020-01-27 13:39 UTC (permalink / raw)
  To: Pavel Begunkov, Daurnimator; +Cc: io-uring

On 1/27/20 6:29 AM, Pavel Begunkov wrote:
> On 1/26/2020 8:00 PM, Jens Axboe wrote:
>> On 1/26/20 8:11 AM, Pavel Begunkov wrote:
>>> On 1/26/2020 4:51 AM, Daurnimator wrote:
>>>> On Fri, 24 Jan 2020 at 10:16, Jens Axboe <[email protected]> wrote:
>>>>
>>>> I don't love the idea of some new type of magic user<>kernel
>>>> identifier. It would be nice if the id itself was e.g. a file
>>>> descriptor
>>>>
>>>> What if when creating an io_uring you could pass in an existing
>>>> io_uring file descriptor, and the new one would share the io-wq
>>>> backend?
>>>>
>>> Good idea! It can solve potential problems with jails, isolation, etc in
>>> the future.
>>>
>>> May we need having other shared resources and want fine-grained control
>>> over them at some moment? It can prove helpful for the BPF plans.
>>> E.g.
>>>
>>> io_uring_setup(share_io-wq=ring_fd1,
>>>                share_fds=ring_fd2,
>>>                share_ebpf=ring_fd3, ...);
>>>
>>> If so, it's better to have more flexible API. E.g. as follows or a
>>> pointer to a struct with @size field.
>>>
>>> struct io_shared_resource {
>>>     int type;
>>>     int fd;
>>> };
>>>
>>> struct io_uring_params {
>>>     ...
>>>     struct io_shared_resource shared[];
>>> };
>>>
>>> params = {
>>>     ...
>>>     .shared = {{ATTACH_IO_WQ, fd1}, ..., SANTINEL_ENTRY};
>>> };
>>
>> I'm fine with changing/extending the sharing API, please send a
>> patch!
>>
> 
> Ok. I can't promise it'll play handy for sharing. Though, you'll be out
> of space in struct io_uring_params soon anyway.

I'm going to keep what we have for now, as I'm really not imagining a
lot more sharing - what else would we share? So let's not over-design
anything.


-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCHSET 0/4] Add support for shared io-wq backends
  2020-01-27 13:39         ` Jens Axboe
@ 2020-01-27 14:07           ` Pavel Begunkov
  2020-01-27 19:39             ` Pavel Begunkov
  2020-01-27 20:33             ` Jens Axboe
  0 siblings, 2 replies; 36+ messages in thread
From: Pavel Begunkov @ 2020-01-27 14:07 UTC (permalink / raw)
  To: Jens Axboe, Daurnimator; +Cc: io-uring

On 1/27/2020 4:39 PM, Jens Axboe wrote:
> On 1/27/20 6:29 AM, Pavel Begunkov wrote:
>> On 1/26/2020 8:00 PM, Jens Axboe wrote:
>>> On 1/26/20 8:11 AM, Pavel Begunkov wrote:
>>>> On 1/26/2020 4:51 AM, Daurnimator wrote:
>>>>> On Fri, 24 Jan 2020 at 10:16, Jens Axboe <[email protected]> wrote:
>> Ok. I can't promise it'll play handy for sharing. Though, you'll be out
>> of space in struct io_uring_params soon anyway.
> 
> I'm going to keep what we have for now, as I'm really not imagining a
> lot more sharing - what else would we share? So let's not over-design
> anything.
> 
Fair enough. I prefer a ptr to an extendable struct, that will take the
last u64, when needed.

However, it's still better to share through file descriptors. It's just
not secure enough the way it's now.

-- 
Pavel Begunkov

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCHSET 0/4] Add support for shared io-wq backends
  2020-01-27 14:07           ` Pavel Begunkov
@ 2020-01-27 19:39             ` Pavel Begunkov
  2020-01-27 19:45               ` Jens Axboe
  2020-01-27 20:33             ` Jens Axboe
  1 sibling, 1 reply; 36+ messages in thread
From: Pavel Begunkov @ 2020-01-27 19:39 UTC (permalink / raw)
  To: Jens Axboe, Daurnimator; +Cc: io-uring


[-- Attachment #1.1: Type: text/plain, Size: 989 bytes --]

On 27/01/2020 17:07, Pavel Begunkov wrote:
> On 1/27/2020 4:39 PM, Jens Axboe wrote:
>> On 1/27/20 6:29 AM, Pavel Begunkov wrote:
>>> On 1/26/2020 8:00 PM, Jens Axboe wrote:
>>>> On 1/26/20 8:11 AM, Pavel Begunkov wrote:
>>>>> On 1/26/2020 4:51 AM, Daurnimator wrote:
>>>>>> On Fri, 24 Jan 2020 at 10:16, Jens Axboe <[email protected]> wrote:
>>> Ok. I can't promise it'll play handy for sharing. Though, you'll be out
>>> of space in struct io_uring_params soon anyway.
>>
>> I'm going to keep what we have for now, as I'm really not imagining a
>> lot more sharing - what else would we share? So let's not over-design
>> anything.
>>
> Fair enough. I prefer a ptr to an extendable struct, that will take the
> last u64, when needed.
> 
> However, it's still better to share through file descriptors. It's just
> not secure enough the way it's now.
> 

I'll send a patch with fd-approach shortly, just if you want to squeeze them
into 5.6-rc

-- 
Pavel Begunkov


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCHSET 0/4] Add support for shared io-wq backends
  2020-01-27 19:39             ` Pavel Begunkov
@ 2020-01-27 19:45               ` Jens Axboe
  0 siblings, 0 replies; 36+ messages in thread
From: Jens Axboe @ 2020-01-27 19:45 UTC (permalink / raw)
  To: Pavel Begunkov, Daurnimator; +Cc: io-uring

On 1/27/20 12:39 PM, Pavel Begunkov wrote:
> On 27/01/2020 17:07, Pavel Begunkov wrote:
>> On 1/27/2020 4:39 PM, Jens Axboe wrote:
>>> On 1/27/20 6:29 AM, Pavel Begunkov wrote:
>>>> On 1/26/2020 8:00 PM, Jens Axboe wrote:
>>>>> On 1/26/20 8:11 AM, Pavel Begunkov wrote:
>>>>>> On 1/26/2020 4:51 AM, Daurnimator wrote:
>>>>>>> On Fri, 24 Jan 2020 at 10:16, Jens Axboe <[email protected]> wrote:
>>>> Ok. I can't promise it'll play handy for sharing. Though, you'll be out
>>>> of space in struct io_uring_params soon anyway.
>>>
>>> I'm going to keep what we have for now, as I'm really not imagining a
>>> lot more sharing - what else would we share? So let's not over-design
>>> anything.
>>>
>> Fair enough. I prefer a ptr to an extendable struct, that will take the
>> last u64, when needed.
>>
>> However, it's still better to share through file descriptors. It's just
>> not secure enough the way it's now.
>>
> 
> I'll send a patch with fd-approach shortly, just if you want to squeeze them
> into 5.6-rc

We'll see how it pans out, I'll ship what I have now and we'll still have
plenty of time to adjust if we deem that a better path.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCHSET 0/4] Add support for shared io-wq backends
  2020-01-27 14:07           ` Pavel Begunkov
  2020-01-27 19:39             ` Pavel Begunkov
@ 2020-01-27 20:33             ` Jens Axboe
  2020-01-27 21:45               ` Pavel Begunkov
  1 sibling, 1 reply; 36+ messages in thread
From: Jens Axboe @ 2020-01-27 20:33 UTC (permalink / raw)
  To: Pavel Begunkov, Daurnimator; +Cc: io-uring

On 1/27/20 7:07 AM, Pavel Begunkov wrote:
> On 1/27/2020 4:39 PM, Jens Axboe wrote:
>> On 1/27/20 6:29 AM, Pavel Begunkov wrote:
>>> On 1/26/2020 8:00 PM, Jens Axboe wrote:
>>>> On 1/26/20 8:11 AM, Pavel Begunkov wrote:
>>>>> On 1/26/2020 4:51 AM, Daurnimator wrote:
>>>>>> On Fri, 24 Jan 2020 at 10:16, Jens Axboe <[email protected]> wrote:
>>> Ok. I can't promise it'll play handy for sharing. Though, you'll be out
>>> of space in struct io_uring_params soon anyway.
>>
>> I'm going to keep what we have for now, as I'm really not imagining a
>> lot more sharing - what else would we share? So let's not over-design
>> anything.
>>
> Fair enough. I prefer a ptr to an extendable struct, that will take the
> last u64, when needed.
> 
> However, it's still better to share through file descriptors. It's just
> not secure enough the way it's now.

Is the file descriptor value really a good choice? We just had some
confusion on ring sharing across forks. Not sure using an fd value
is a sane "key" to use across processes.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCHSET 0/4] Add support for shared io-wq backends
  2020-01-27 20:33             ` Jens Axboe
@ 2020-01-27 21:45               ` Pavel Begunkov
  2020-01-27 22:40                 ` Jens Axboe
  0 siblings, 1 reply; 36+ messages in thread
From: Pavel Begunkov @ 2020-01-27 21:45 UTC (permalink / raw)
  To: Jens Axboe, Daurnimator; +Cc: io-uring


[-- Attachment #1.1: Type: text/plain, Size: 1924 bytes --]

On 27/01/2020 23:33, Jens Axboe wrote:
> On 1/27/20 7:07 AM, Pavel Begunkov wrote:
>> On 1/27/2020 4:39 PM, Jens Axboe wrote:
>>> On 1/27/20 6:29 AM, Pavel Begunkov wrote:
>>>> On 1/26/2020 8:00 PM, Jens Axboe wrote:
>>>>> On 1/26/20 8:11 AM, Pavel Begunkov wrote:
>>>>>> On 1/26/2020 4:51 AM, Daurnimator wrote:
>>>>>>> On Fri, 24 Jan 2020 at 10:16, Jens Axboe <[email protected]> wrote:
>>>> Ok. I can't promise it'll play handy for sharing. Though, you'll be out
>>>> of space in struct io_uring_params soon anyway.
>>>
>>> I'm going to keep what we have for now, as I'm really not imagining a
>>> lot more sharing - what else would we share? So let's not over-design
>>> anything.
>>>
>> Fair enough. I prefer a ptr to an extendable struct, that will take the
>> last u64, when needed.
>>
>> However, it's still better to share through file descriptors. It's just
>> not secure enough the way it's now.
> 
> Is the file descriptor value really a good choice? We just had some
> confusion on ring sharing across forks. Not sure using an fd value
> is a sane "key" to use across processes.
> 
As I see it, the problem with @mm is that uring is dead-bound to it. For
example, a process can create and send uring (e.g. via socket), and then be
killed. And that basically means
1. @mm of the process is locked just because of the sent uring instance.
2. a process may have an io_uring, which bound to @mm of another process, even
though the layouts may be completely different.

File descriptors are different here, because io_uring doesn't know about them,
They are controlled by the userspace (send, dup, fork, etc), and don't sabotage
all isolation work done in the kernel. A dire example here is stealing io-wq
from within a container, which is trivial with global self-made id. I would love
to hear, if I am mistaken somewhere.

Is there some better option?

-- 
Pavel Begunkov


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCHSET 0/4] Add support for shared io-wq backends
  2020-01-27 21:45               ` Pavel Begunkov
@ 2020-01-27 22:40                 ` Jens Axboe
  2020-01-27 23:00                   ` Jens Axboe
  0 siblings, 1 reply; 36+ messages in thread
From: Jens Axboe @ 2020-01-27 22:40 UTC (permalink / raw)
  To: Pavel Begunkov, Daurnimator; +Cc: io-uring

On 1/27/20 2:45 PM, Pavel Begunkov wrote:
> On 27/01/2020 23:33, Jens Axboe wrote:
>> On 1/27/20 7:07 AM, Pavel Begunkov wrote:
>>> On 1/27/2020 4:39 PM, Jens Axboe wrote:
>>>> On 1/27/20 6:29 AM, Pavel Begunkov wrote:
>>>>> On 1/26/2020 8:00 PM, Jens Axboe wrote:
>>>>>> On 1/26/20 8:11 AM, Pavel Begunkov wrote:
>>>>>>> On 1/26/2020 4:51 AM, Daurnimator wrote:
>>>>>>>> On Fri, 24 Jan 2020 at 10:16, Jens Axboe <[email protected]> wrote:
>>>>> Ok. I can't promise it'll play handy for sharing. Though, you'll be out
>>>>> of space in struct io_uring_params soon anyway.
>>>>
>>>> I'm going to keep what we have for now, as I'm really not imagining a
>>>> lot more sharing - what else would we share? So let's not over-design
>>>> anything.
>>>>
>>> Fair enough. I prefer a ptr to an extendable struct, that will take the
>>> last u64, when needed.
>>>
>>> However, it's still better to share through file descriptors. It's just
>>> not secure enough the way it's now.
>>
>> Is the file descriptor value really a good choice? We just had some
>> confusion on ring sharing across forks. Not sure using an fd value
>> is a sane "key" to use across processes.
>>
> As I see it, the problem with @mm is that uring is dead-bound to it.
> For example, a process can create and send uring (e.g. via socket),
> and then be killed. And that basically means
> 1. @mm of the process is locked just because of the sent uring
> instance.
> 2. a process may have an io_uring, which bound to @mm of another
> process, even though the layouts may be completely different.
> 
> File descriptors are different here, because io_uring doesn't know
> about them, They are controlled by the userspace (send, dup, fork,
> etc), and don't sabotage all isolation work done in the kernel. A dire
> example here is stealing io-wq from within a container, which is
> trivial with global self-made id. I would love to hear, if I am
> mistaken somewhere.
> 
> Is there some better option?

OK, so how about this:

- We use the 'fd' as the lookup key. This makes it easy since we can
  just check if it's a io_uring instance or not, we don't need to do any
  tracking on the side. It also means that the application asking for
  sharing must already have some relationship to the process that
  created the ring.

- mm/creds must be transferred through the work item. Any SQE done on
  behalf of io_uring_enter() directly already has that, if punted we
  must pass the creds and mm. This means we break the static setup of
  io_wq->mm/creds. It also means that we probably have to add that to
  io_wq_work, which kind of sucks, but...

I think with that we have a decent setup, that's also safe. I've dropped
the sharing patches for now, from the 5.6 tree.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCHSET 0/4] Add support for shared io-wq backends
  2020-01-27 22:40                 ` Jens Axboe
@ 2020-01-27 23:00                   ` Jens Axboe
  2020-01-27 23:17                     ` Pavel Begunkov
  0 siblings, 1 reply; 36+ messages in thread
From: Jens Axboe @ 2020-01-27 23:00 UTC (permalink / raw)
  To: Pavel Begunkov, Daurnimator; +Cc: io-uring

On 1/27/20 3:40 PM, Jens Axboe wrote:
> On 1/27/20 2:45 PM, Pavel Begunkov wrote:
>> On 27/01/2020 23:33, Jens Axboe wrote:
>>> On 1/27/20 7:07 AM, Pavel Begunkov wrote:
>>>> On 1/27/2020 4:39 PM, Jens Axboe wrote:
>>>>> On 1/27/20 6:29 AM, Pavel Begunkov wrote:
>>>>>> On 1/26/2020 8:00 PM, Jens Axboe wrote:
>>>>>>> On 1/26/20 8:11 AM, Pavel Begunkov wrote:
>>>>>>>> On 1/26/2020 4:51 AM, Daurnimator wrote:
>>>>>>>>> On Fri, 24 Jan 2020 at 10:16, Jens Axboe <[email protected]> wrote:
>>>>>> Ok. I can't promise it'll play handy for sharing. Though, you'll be out
>>>>>> of space in struct io_uring_params soon anyway.
>>>>>
>>>>> I'm going to keep what we have for now, as I'm really not imagining a
>>>>> lot more sharing - what else would we share? So let's not over-design
>>>>> anything.
>>>>>
>>>> Fair enough. I prefer a ptr to an extendable struct, that will take the
>>>> last u64, when needed.
>>>>
>>>> However, it's still better to share through file descriptors. It's just
>>>> not secure enough the way it's now.
>>>
>>> Is the file descriptor value really a good choice? We just had some
>>> confusion on ring sharing across forks. Not sure using an fd value
>>> is a sane "key" to use across processes.
>>>
>> As I see it, the problem with @mm is that uring is dead-bound to it.
>> For example, a process can create and send uring (e.g. via socket),
>> and then be killed. And that basically means
>> 1. @mm of the process is locked just because of the sent uring
>> instance.
>> 2. a process may have an io_uring, which bound to @mm of another
>> process, even though the layouts may be completely different.
>>
>> File descriptors are different here, because io_uring doesn't know
>> about them, They are controlled by the userspace (send, dup, fork,
>> etc), and don't sabotage all isolation work done in the kernel. A dire
>> example here is stealing io-wq from within a container, which is
>> trivial with global self-made id. I would love to hear, if I am
>> mistaken somewhere.
>>
>> Is there some better option?
> 
> OK, so how about this:
> 
> - We use the 'fd' as the lookup key. This makes it easy since we can
>   just check if it's a io_uring instance or not, we don't need to do any
>   tracking on the side. It also means that the application asking for
>   sharing must already have some relationship to the process that
>   created the ring.
> 
> - mm/creds must be transferred through the work item. Any SQE done on
>   behalf of io_uring_enter() directly already has that, if punted we
>   must pass the creds and mm. This means we break the static setup of
>   io_wq->mm/creds. It also means that we probably have to add that to
>   io_wq_work, which kind of sucks, but...

It'd fix Stefan's worry too.

> I think with that we have a decent setup, that's also safe. I've dropped
> the sharing patches for now, from the 5.6 tree.

So one concern might be SQPOLL, it'll have to use the ctx creds and mm
as usual. I guess that is ok.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCHSET 0/4] Add support for shared io-wq backends
  2020-01-27 23:00                   ` Jens Axboe
@ 2020-01-27 23:17                     ` Pavel Begunkov
  2020-01-27 23:23                       ` Jens Axboe
  0 siblings, 1 reply; 36+ messages in thread
From: Pavel Begunkov @ 2020-01-27 23:17 UTC (permalink / raw)
  To: Jens Axboe, Daurnimator; +Cc: io-uring


[-- Attachment #1.1: Type: text/plain, Size: 3398 bytes --]

On 28/01/2020 02:00, Jens Axboe wrote:
> On 1/27/20 3:40 PM, Jens Axboe wrote:
>> On 1/27/20 2:45 PM, Pavel Begunkov wrote:
>>> On 27/01/2020 23:33, Jens Axboe wrote:
>>>> On 1/27/20 7:07 AM, Pavel Begunkov wrote:
>>>>> On 1/27/2020 4:39 PM, Jens Axboe wrote:
>>>>>> On 1/27/20 6:29 AM, Pavel Begunkov wrote:
>>>>>>> On 1/26/2020 8:00 PM, Jens Axboe wrote:
>>>>>>>> On 1/26/20 8:11 AM, Pavel Begunkov wrote:
>>>>>>>>> On 1/26/2020 4:51 AM, Daurnimator wrote:
>>>>>>>>>> On Fri, 24 Jan 2020 at 10:16, Jens Axboe <[email protected]> wrote:
>>>>>>> Ok. I can't promise it'll play handy for sharing. Though, you'll be out
>>>>>>> of space in struct io_uring_params soon anyway.
>>>>>>
>>>>>> I'm going to keep what we have for now, as I'm really not imagining a
>>>>>> lot more sharing - what else would we share? So let's not over-design
>>>>>> anything.
>>>>>>
>>>>> Fair enough. I prefer a ptr to an extendable struct, that will take the
>>>>> last u64, when needed.
>>>>>
>>>>> However, it's still better to share through file descriptors. It's just
>>>>> not secure enough the way it's now.
>>>>
>>>> Is the file descriptor value really a good choice? We just had some
>>>> confusion on ring sharing across forks. Not sure using an fd value
>>>> is a sane "key" to use across processes.
>>>>
>>> As I see it, the problem with @mm is that uring is dead-bound to it.
>>> For example, a process can create and send uring (e.g. via socket),
>>> and then be killed. And that basically means
>>> 1. @mm of the process is locked just because of the sent uring
>>> instance.
>>> 2. a process may have an io_uring, which bound to @mm of another
>>> process, even though the layouts may be completely different.
>>>
>>> File descriptors are different here, because io_uring doesn't know
>>> about them, They are controlled by the userspace (send, dup, fork,
>>> etc), and don't sabotage all isolation work done in the kernel. A dire
>>> example here is stealing io-wq from within a container, which is
>>> trivial with global self-made id. I would love to hear, if I am
>>> mistaken somewhere.
>>>
>>> Is there some better option?
>>
>> OK, so how about this:
>>
>> - We use the 'fd' as the lookup key. This makes it easy since we can
>>   just check if it's a io_uring instance or not, we don't need to do any
>>   tracking on the side. It also means that the application asking for
>>   sharing must already have some relationship to the process that
>>   created the ring.

Yeah, that's exactly the point.

>>
>> - mm/creds must be transferred through the work item. Any SQE done on
>>   behalf of io_uring_enter() directly already has that, if punted we
>>   must pass the creds and mm. This means we break the static setup of
>>   io_wq->mm/creds. It also means that we probably have to add that to
>>   io_wq_work, which kind of sucks, but...

ehh, juggling mm's... But don't have anything nicer myself.

> It'd fix Stefan's worry too.
> 
>> I think with that we have a decent setup, that's also safe. I've dropped
>> the sharing patches for now, from the 5.6 tree.
> 
> So one concern might be SQPOLL, it'll have to use the ctx creds and mm
> as usual. I guess that is ok.
> 

OK. I'll send the patches for the first part now, and take a look at the second
one a bit latter if isn't done until then.


-- 
Pavel Begunkov


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCHSET 0/4] Add support for shared io-wq backends
  2020-01-27 23:17                     ` Pavel Begunkov
@ 2020-01-27 23:23                       ` Jens Axboe
  2020-01-27 23:25                         ` Pavel Begunkov
  0 siblings, 1 reply; 36+ messages in thread
From: Jens Axboe @ 2020-01-27 23:23 UTC (permalink / raw)
  To: Pavel Begunkov, Daurnimator; +Cc: io-uring

On 1/27/20 4:17 PM, Pavel Begunkov wrote:
> On 28/01/2020 02:00, Jens Axboe wrote:
>> On 1/27/20 3:40 PM, Jens Axboe wrote:
>>> On 1/27/20 2:45 PM, Pavel Begunkov wrote:
>>>> On 27/01/2020 23:33, Jens Axboe wrote:
>>>>> On 1/27/20 7:07 AM, Pavel Begunkov wrote:
>>>>>> On 1/27/2020 4:39 PM, Jens Axboe wrote:
>>>>>>> On 1/27/20 6:29 AM, Pavel Begunkov wrote:
>>>>>>>> On 1/26/2020 8:00 PM, Jens Axboe wrote:
>>>>>>>>> On 1/26/20 8:11 AM, Pavel Begunkov wrote:
>>>>>>>>>> On 1/26/2020 4:51 AM, Daurnimator wrote:
>>>>>>>>>>> On Fri, 24 Jan 2020 at 10:16, Jens Axboe <[email protected]> wrote:
>>>>>>>> Ok. I can't promise it'll play handy for sharing. Though, you'll be out
>>>>>>>> of space in struct io_uring_params soon anyway.
>>>>>>>
>>>>>>> I'm going to keep what we have for now, as I'm really not imagining a
>>>>>>> lot more sharing - what else would we share? So let's not over-design
>>>>>>> anything.
>>>>>>>
>>>>>> Fair enough. I prefer a ptr to an extendable struct, that will take the
>>>>>> last u64, when needed.
>>>>>>
>>>>>> However, it's still better to share through file descriptors. It's just
>>>>>> not secure enough the way it's now.
>>>>>
>>>>> Is the file descriptor value really a good choice? We just had some
>>>>> confusion on ring sharing across forks. Not sure using an fd value
>>>>> is a sane "key" to use across processes.
>>>>>
>>>> As I see it, the problem with @mm is that uring is dead-bound to it.
>>>> For example, a process can create and send uring (e.g. via socket),
>>>> and then be killed. And that basically means
>>>> 1. @mm of the process is locked just because of the sent uring
>>>> instance.
>>>> 2. a process may have an io_uring, which bound to @mm of another
>>>> process, even though the layouts may be completely different.
>>>>
>>>> File descriptors are different here, because io_uring doesn't know
>>>> about them, They are controlled by the userspace (send, dup, fork,
>>>> etc), and don't sabotage all isolation work done in the kernel. A dire
>>>> example here is stealing io-wq from within a container, which is
>>>> trivial with global self-made id. I would love to hear, if I am
>>>> mistaken somewhere.
>>>>
>>>> Is there some better option?
>>>
>>> OK, so how about this:
>>>
>>> - We use the 'fd' as the lookup key. This makes it easy since we can
>>>   just check if it's a io_uring instance or not, we don't need to do any
>>>   tracking on the side. It also means that the application asking for
>>>   sharing must already have some relationship to the process that
>>>   created the ring.
> 
> Yeah, that's exactly the point.
> 
>>>
>>> - mm/creds must be transferred through the work item. Any SQE done on
>>>   behalf of io_uring_enter() directly already has that, if punted we
>>>   must pass the creds and mm. This means we break the static setup of
>>>   io_wq->mm/creds. It also means that we probably have to add that to
>>>   io_wq_work, which kind of sucks, but...
> 
> ehh, juggling mm's... But don't have anything nicer myself.

We already do juggle mm's, this is no different. A worker potentially
retain the mm across works if they are the same.

>> It'd fix Stefan's worry too.
>>
>>> I think with that we have a decent setup, that's also safe. I've dropped
>>> the sharing patches for now, from the 5.6 tree.
>>
>> So one concern might be SQPOLL, it'll have to use the ctx creds and mm
>> as usual. I guess that is ok.
>>
> 
> OK. I'll send the patches for the first part now, and take a look at
> the second one a bit latter if isn't done until then.

Hang on a second, I'm doing the mm and creds bits right now. I'll push
that to a branch, if you want to do the actual fd stuff on top of that,
that would be great.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCHSET 0/4] Add support for shared io-wq backends
  2020-01-27 23:23                       ` Jens Axboe
@ 2020-01-27 23:25                         ` Pavel Begunkov
  2020-01-27 23:38                           ` Jens Axboe
  0 siblings, 1 reply; 36+ messages in thread
From: Pavel Begunkov @ 2020-01-27 23:25 UTC (permalink / raw)
  To: Jens Axboe, Daurnimator; +Cc: io-uring


[-- Attachment #1.1: Type: text/plain, Size: 3981 bytes --]

On 28/01/2020 02:23, Jens Axboe wrote:
> On 1/27/20 4:17 PM, Pavel Begunkov wrote:
>> On 28/01/2020 02:00, Jens Axboe wrote:
>>> On 1/27/20 3:40 PM, Jens Axboe wrote:
>>>> On 1/27/20 2:45 PM, Pavel Begunkov wrote:
>>>>> On 27/01/2020 23:33, Jens Axboe wrote:
>>>>>> On 1/27/20 7:07 AM, Pavel Begunkov wrote:
>>>>>>> On 1/27/2020 4:39 PM, Jens Axboe wrote:
>>>>>>>> On 1/27/20 6:29 AM, Pavel Begunkov wrote:
>>>>>>>>> On 1/26/2020 8:00 PM, Jens Axboe wrote:
>>>>>>>>>> On 1/26/20 8:11 AM, Pavel Begunkov wrote:
>>>>>>>>>>> On 1/26/2020 4:51 AM, Daurnimator wrote:
>>>>>>>>>>>> On Fri, 24 Jan 2020 at 10:16, Jens Axboe <[email protected]> wrote:
>>>>>>>>> Ok. I can't promise it'll play handy for sharing. Though, you'll be out
>>>>>>>>> of space in struct io_uring_params soon anyway.
>>>>>>>>
>>>>>>>> I'm going to keep what we have for now, as I'm really not imagining a
>>>>>>>> lot more sharing - what else would we share? So let's not over-design
>>>>>>>> anything.
>>>>>>>>
>>>>>>> Fair enough. I prefer a ptr to an extendable struct, that will take the
>>>>>>> last u64, when needed.
>>>>>>>
>>>>>>> However, it's still better to share through file descriptors. It's just
>>>>>>> not secure enough the way it's now.
>>>>>>
>>>>>> Is the file descriptor value really a good choice? We just had some
>>>>>> confusion on ring sharing across forks. Not sure using an fd value
>>>>>> is a sane "key" to use across processes.
>>>>>>
>>>>> As I see it, the problem with @mm is that uring is dead-bound to it.
>>>>> For example, a process can create and send uring (e.g. via socket),
>>>>> and then be killed. And that basically means
>>>>> 1. @mm of the process is locked just because of the sent uring
>>>>> instance.
>>>>> 2. a process may have an io_uring, which bound to @mm of another
>>>>> process, even though the layouts may be completely different.
>>>>>
>>>>> File descriptors are different here, because io_uring doesn't know
>>>>> about them, They are controlled by the userspace (send, dup, fork,
>>>>> etc), and don't sabotage all isolation work done in the kernel. A dire
>>>>> example here is stealing io-wq from within a container, which is
>>>>> trivial with global self-made id. I would love to hear, if I am
>>>>> mistaken somewhere.
>>>>>
>>>>> Is there some better option?
>>>>
>>>> OK, so how about this:
>>>>
>>>> - We use the 'fd' as the lookup key. This makes it easy since we can
>>>>   just check if it's a io_uring instance or not, we don't need to do any
>>>>   tracking on the side. It also means that the application asking for
>>>>   sharing must already have some relationship to the process that
>>>>   created the ring.
>>
>> Yeah, that's exactly the point.
>>
>>>>
>>>> - mm/creds must be transferred through the work item. Any SQE done on
>>>>   behalf of io_uring_enter() directly already has that, if punted we
>>>>   must pass the creds and mm. This means we break the static setup of
>>>>   io_wq->mm/creds. It also means that we probably have to add that to
>>>>   io_wq_work, which kind of sucks, but...
>>
>> ehh, juggling mm's... But don't have anything nicer myself.
> 
> We already do juggle mm's, this is no different. A worker potentially
> retain the mm across works if they are the same.
> 
>>> It'd fix Stefan's worry too.
>>>
>>>> I think with that we have a decent setup, that's also safe. I've dropped
>>>> the sharing patches for now, from the 5.6 tree.
>>>
>>> So one concern might be SQPOLL, it'll have to use the ctx creds and mm
>>> as usual. I guess that is ok.
>>>
>>
>> OK. I'll send the patches for the first part now, and take a look at
>> the second one a bit latter if isn't done until then.
> 
> Hang on a second, I'm doing the mm and creds bits right now. I'll push
> that to a branch, if you want to do the actual fd stuff on top of that,
> that would be great.
> 
Sure, should be trivially mergeable.

-- 
Pavel Begunkov


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCHSET 0/4] Add support for shared io-wq backends
  2020-01-27 23:25                         ` Pavel Begunkov
@ 2020-01-27 23:38                           ` Jens Axboe
  2020-01-28 10:01                             ` Stefan Metzmacher
  0 siblings, 1 reply; 36+ messages in thread
From: Jens Axboe @ 2020-01-27 23:38 UTC (permalink / raw)
  To: Pavel Begunkov, Daurnimator; +Cc: io-uring

On 1/27/20 4:25 PM, Pavel Begunkov wrote:
> On 28/01/2020 02:23, Jens Axboe wrote:
>> On 1/27/20 4:17 PM, Pavel Begunkov wrote:
>>> On 28/01/2020 02:00, Jens Axboe wrote:
>>>> On 1/27/20 3:40 PM, Jens Axboe wrote:
>>>>> On 1/27/20 2:45 PM, Pavel Begunkov wrote:
>>>>>> On 27/01/2020 23:33, Jens Axboe wrote:
>>>>>>> On 1/27/20 7:07 AM, Pavel Begunkov wrote:
>>>>>>>> On 1/27/2020 4:39 PM, Jens Axboe wrote:
>>>>>>>>> On 1/27/20 6:29 AM, Pavel Begunkov wrote:
>>>>>>>>>> On 1/26/2020 8:00 PM, Jens Axboe wrote:
>>>>>>>>>>> On 1/26/20 8:11 AM, Pavel Begunkov wrote:
>>>>>>>>>>>> On 1/26/2020 4:51 AM, Daurnimator wrote:
>>>>>>>>>>>>> On Fri, 24 Jan 2020 at 10:16, Jens Axboe <[email protected]> wrote:
>>>>>>>>>> Ok. I can't promise it'll play handy for sharing. Though, you'll be out
>>>>>>>>>> of space in struct io_uring_params soon anyway.
>>>>>>>>>
>>>>>>>>> I'm going to keep what we have for now, as I'm really not imagining a
>>>>>>>>> lot more sharing - what else would we share? So let's not over-design
>>>>>>>>> anything.
>>>>>>>>>
>>>>>>>> Fair enough. I prefer a ptr to an extendable struct, that will take the
>>>>>>>> last u64, when needed.
>>>>>>>>
>>>>>>>> However, it's still better to share through file descriptors. It's just
>>>>>>>> not secure enough the way it's now.
>>>>>>>
>>>>>>> Is the file descriptor value really a good choice? We just had some
>>>>>>> confusion on ring sharing across forks. Not sure using an fd value
>>>>>>> is a sane "key" to use across processes.
>>>>>>>
>>>>>> As I see it, the problem with @mm is that uring is dead-bound to it.
>>>>>> For example, a process can create and send uring (e.g. via socket),
>>>>>> and then be killed. And that basically means
>>>>>> 1. @mm of the process is locked just because of the sent uring
>>>>>> instance.
>>>>>> 2. a process may have an io_uring, which bound to @mm of another
>>>>>> process, even though the layouts may be completely different.
>>>>>>
>>>>>> File descriptors are different here, because io_uring doesn't know
>>>>>> about them, They are controlled by the userspace (send, dup, fork,
>>>>>> etc), and don't sabotage all isolation work done in the kernel. A dire
>>>>>> example here is stealing io-wq from within a container, which is
>>>>>> trivial with global self-made id. I would love to hear, if I am
>>>>>> mistaken somewhere.
>>>>>>
>>>>>> Is there some better option?
>>>>>
>>>>> OK, so how about this:
>>>>>
>>>>> - We use the 'fd' as the lookup key. This makes it easy since we can
>>>>>   just check if it's a io_uring instance or not, we don't need to do any
>>>>>   tracking on the side. It also means that the application asking for
>>>>>   sharing must already have some relationship to the process that
>>>>>   created the ring.
>>>
>>> Yeah, that's exactly the point.
>>>
>>>>>
>>>>> - mm/creds must be transferred through the work item. Any SQE done on
>>>>>   behalf of io_uring_enter() directly already has that, if punted we
>>>>>   must pass the creds and mm. This means we break the static setup of
>>>>>   io_wq->mm/creds. It also means that we probably have to add that to
>>>>>   io_wq_work, which kind of sucks, but...
>>>
>>> ehh, juggling mm's... But don't have anything nicer myself.
>>
>> We already do juggle mm's, this is no different. A worker potentially
>> retain the mm across works if they are the same.
>>
>>>> It'd fix Stefan's worry too.
>>>>
>>>>> I think with that we have a decent setup, that's also safe. I've dropped
>>>>> the sharing patches for now, from the 5.6 tree.
>>>>
>>>> So one concern might be SQPOLL, it'll have to use the ctx creds and mm
>>>> as usual. I guess that is ok.
>>>>
>>>
>>> OK. I'll send the patches for the first part now, and take a look at
>>> the second one a bit latter if isn't done until then.
>>
>> Hang on a second, I'm doing the mm and creds bits right now. I'll push
>> that to a branch, if you want to do the actual fd stuff on top of that,
>> that would be great.
>>
> Sure, should be trivially mergeable.

https://git.kernel.dk/cgit/linux-block/log/?h=for-5.6/io_uring-vfs-wq

Top patch there is the mm/creds passing. I kind of like it even if it
means we're growing io_wq_worker (and subsequently io_kiocb) by 16
bytes, as it means we can be more flexible. This solves it for this use
case, but also the case that Stefan was worried about.

If you respin the last patch I had but using the fd instead, then I
think we're in business.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCHSET 0/4] Add support for shared io-wq backends
  2020-01-27 23:38                           ` Jens Axboe
@ 2020-01-28 10:01                             ` Stefan Metzmacher
  2020-01-28 10:30                               ` Pavel Begunkov
  0 siblings, 1 reply; 36+ messages in thread
From: Stefan Metzmacher @ 2020-01-28 10:01 UTC (permalink / raw)
  To: Jens Axboe, Pavel Begunkov, Daurnimator; +Cc: io-uring

Am 28.01.20 um 00:38 schrieb Jens Axboe:
> On 1/27/20 4:25 PM, Pavel Begunkov wrote:
>> On 28/01/2020 02:23, Jens Axboe wrote:
>>> On 1/27/20 4:17 PM, Pavel Begunkov wrote:
>>>> On 28/01/2020 02:00, Jens Axboe wrote:
>>>>> On 1/27/20 3:40 PM, Jens Axboe wrote:
>>>>>> On 1/27/20 2:45 PM, Pavel Begunkov wrote:
>>>>>>> On 27/01/2020 23:33, Jens Axboe wrote:
>>>>>>>> On 1/27/20 7:07 AM, Pavel Begunkov wrote:
>>>>>>>>> On 1/27/2020 4:39 PM, Jens Axboe wrote:
>>>>>>>>>> On 1/27/20 6:29 AM, Pavel Begunkov wrote:
>>>>>>>>>>> On 1/26/2020 8:00 PM, Jens Axboe wrote:
>>>>>>>>>>>> On 1/26/20 8:11 AM, Pavel Begunkov wrote:
>>>>>>>>>>>>> On 1/26/2020 4:51 AM, Daurnimator wrote:
>>>>>>>>>>>>>> On Fri, 24 Jan 2020 at 10:16, Jens Axboe <[email protected]> wrote:
>>>>>>>>>>> Ok. I can't promise it'll play handy for sharing. Though, you'll be out
>>>>>>>>>>> of space in struct io_uring_params soon anyway.
>>>>>>>>>>
>>>>>>>>>> I'm going to keep what we have for now, as I'm really not imagining a
>>>>>>>>>> lot more sharing - what else would we share? So let's not over-design
>>>>>>>>>> anything.
>>>>>>>>>>
>>>>>>>>> Fair enough. I prefer a ptr to an extendable struct, that will take the
>>>>>>>>> last u64, when needed.
>>>>>>>>>
>>>>>>>>> However, it's still better to share through file descriptors. It's just
>>>>>>>>> not secure enough the way it's now.
>>>>>>>>
>>>>>>>> Is the file descriptor value really a good choice? We just had some
>>>>>>>> confusion on ring sharing across forks. Not sure using an fd value
>>>>>>>> is a sane "key" to use across processes.
>>>>>>>>
>>>>>>> As I see it, the problem with @mm is that uring is dead-bound to it.
>>>>>>> For example, a process can create and send uring (e.g. via socket),
>>>>>>> and then be killed. And that basically means
>>>>>>> 1. @mm of the process is locked just because of the sent uring
>>>>>>> instance.
>>>>>>> 2. a process may have an io_uring, which bound to @mm of another
>>>>>>> process, even though the layouts may be completely different.
>>>>>>>
>>>>>>> File descriptors are different here, because io_uring doesn't know
>>>>>>> about them, They are controlled by the userspace (send, dup, fork,
>>>>>>> etc), and don't sabotage all isolation work done in the kernel. A dire
>>>>>>> example here is stealing io-wq from within a container, which is
>>>>>>> trivial with global self-made id. I would love to hear, if I am
>>>>>>> mistaken somewhere.
>>>>>>>
>>>>>>> Is there some better option?
>>>>>>
>>>>>> OK, so how about this:
>>>>>>
>>>>>> - We use the 'fd' as the lookup key. This makes it easy since we can
>>>>>>   just check if it's a io_uring instance or not, we don't need to do any
>>>>>>   tracking on the side. It also means that the application asking for
>>>>>>   sharing must already have some relationship to the process that
>>>>>>   created the ring.
>>>>
>>>> Yeah, that's exactly the point.
>>>>
>>>>>>
>>>>>> - mm/creds must be transferred through the work item. Any SQE done on
>>>>>>   behalf of io_uring_enter() directly already has that, if punted we
>>>>>>   must pass the creds and mm. This means we break the static setup of
>>>>>>   io_wq->mm/creds. It also means that we probably have to add that to
>>>>>>   io_wq_work, which kind of sucks, but...
>>>>
>>>> ehh, juggling mm's... But don't have anything nicer myself.
>>>
>>> We already do juggle mm's, this is no different. A worker potentially
>>> retain the mm across works if they are the same.
>>>
>>>>> It'd fix Stefan's worry too.
>>>>>
>>>>>> I think with that we have a decent setup, that's also safe. I've dropped
>>>>>> the sharing patches for now, from the 5.6 tree.
>>>>>
>>>>> So one concern might be SQPOLL, it'll have to use the ctx creds and mm
>>>>> as usual. I guess that is ok.
>>>>>
>>>>
>>>> OK. I'll send the patches for the first part now, and take a look at
>>>> the second one a bit latter if isn't done until then.
>>>
>>> Hang on a second, I'm doing the mm and creds bits right now. I'll push
>>> that to a branch, if you want to do the actual fd stuff on top of that,
>>> that would be great.
>>>
>> Sure, should be trivially mergeable.
> 
> https://git.kernel.dk/cgit/linux-block/log/?h=for-5.6/io_uring-vfs-wq
> 
> Top patch there is the mm/creds passing. I kind of like it even if it
> means we're growing io_wq_worker (and subsequently io_kiocb) by 16
> bytes, as it means we can be more flexible. This solves it for this use
> case, but also the case that Stefan was worried about.

Ok, that means that ctx->creds is only used in the IORING_SETUP_SQPOLL
case and there it's used for all requests as get_current_cred() is the
same as ctx->creds from within io_sq_thread(), correct?

And in all other cases get_current_cred() is used at io_uring_enter() time.

That's good in order to make the behavior consistent again and prevents
potential random security problems.

BTW: you need to revert/drop 44d282796f81eb1debc1d7cb53245b4cb3214cb5
in that branch. Or just rebase on v5.5 final?

Thanks!
metze

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCHSET 0/4] Add support for shared io-wq backends
  2020-01-28 10:01                             ` Stefan Metzmacher
@ 2020-01-28 10:30                               ` Pavel Begunkov
  2020-01-28 10:35                                 ` Stefan Metzmacher
  0 siblings, 1 reply; 36+ messages in thread
From: Pavel Begunkov @ 2020-01-28 10:30 UTC (permalink / raw)
  To: Stefan Metzmacher, Jens Axboe, Daurnimator; +Cc: io-uring

On 1/28/2020 1:01 PM, Stefan Metzmacher wrote:
>>>>>>> OK, so how about this:
>>>>>>>
>>>>>>> - We use the 'fd' as the lookup key. This makes it easy since we can
>>>>>>>   just check if it's a io_uring instance or not, we don't need to do any
>>>>>>>   tracking on the side. It also means that the application asking for
>>>>>>>   sharing must already have some relationship to the process that
>>>>>>>   created the ring.
>>>>>>>
>>>>>>> - mm/creds must be transferred through the work item. Any SQE done on
>>>>>>>   behalf of io_uring_enter() directly already has that, if punted we
>>>>>>>   must pass the creds and mm. This means we break the static setup of
>>>>>>>   io_wq->mm/creds. It also means that we probably have to add that to
>>>>>>>   io_wq_work, which kind of sucks, but...
>>>>>> It'd fix Stefan's worry too.
>>>>>>
>>>>>>> I think with that we have a decent setup, that's also safe. I've dropped
>>>>>>> the sharing patches for now, from the 5.6 tree.
>>>>>>
>>>>>> So one concern might be SQPOLL, it'll have to use the ctx creds and mm
>>>>>> as usual. I guess that is ok.
>>
>> https://git.kernel.dk/cgit/linux-block/log/?h=for-5.6/io_uring-vfs-wq
>>
>> Top patch there is the mm/creds passing. I kind of like it even if it
>> means we're growing io_wq_worker (and subsequently io_kiocb) by 16
>> bytes, as it means we can be more flexible. This solves it for this use
>> case, but also the case that Stefan was worried about.
> 
> Ok, that means that ctx->creds is only used in the IORING_SETUP_SQPOLL
> case and there it's used for all requests as get_current_cred() is the
> same as ctx->creds from within io_sq_thread(), correct?

Right

> And in all other cases get_current_cred() is used at io_uring_enter() time.

Exactly

> That's good in order to make the behavior consistent again and prevents
> potential random security problems.

BTW, there also can be problems with registered resources. E.g. for
buffers io_uring can get_user_pages() of one process, and then use the
pages from another process by passing a buffer index. This is not as
bad, however.

> 
> BTW: you need to revert/drop 44d282796f81eb1debc1d7cb53245b4cb3214cb5
> in that branch. Or just rebase on v5.5 final?
> 
> Thanks!
> metze
> 

-- 
Pavel Begunkov

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCHSET 0/4] Add support for shared io-wq backends
  2020-01-28 10:30                               ` Pavel Begunkov
@ 2020-01-28 10:35                                 ` Stefan Metzmacher
  2020-01-28 10:51                                   ` Pavel Begunkov
  0 siblings, 1 reply; 36+ messages in thread
From: Stefan Metzmacher @ 2020-01-28 10:35 UTC (permalink / raw)
  To: Pavel Begunkov, Jens Axboe, Daurnimator; +Cc: io-uring

Hi Pavel,

>> That's good in order to make the behavior consistent again and prevents
>> potential random security problems.
> 
> BTW, there also can be problems with registered resources. E.g. for
> buffers io_uring can get_user_pages() of one process, and then use the
> pages from another process by passing a buffer index. This is not as
> bad, however.

Yes, but that can only happen by intention, right?
And not randomly depending on the cache state. If the
application has confidential data in the registered buffers, files
then it should not share the ring fd with untrusted processes.

metze


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCHSET 0/4] Add support for shared io-wq backends
  2020-01-28 10:35                                 ` Stefan Metzmacher
@ 2020-01-28 10:51                                   ` Pavel Begunkov
  0 siblings, 0 replies; 36+ messages in thread
From: Pavel Begunkov @ 2020-01-28 10:51 UTC (permalink / raw)
  To: Stefan Metzmacher, Jens Axboe, Daurnimator; +Cc: io-uring

On 1/28/2020 1:35 PM, Stefan Metzmacher wrote:
> Hi Pavel,
> 
>>> That's good in order to make the behavior consistent again and prevents
>>> potential random security problems.
>>
>> BTW, there also can be problems with registered resources. E.g. for
>> buffers io_uring can get_user_pages() of one process, and then use the
>> pages from another process by passing a buffer index. This is not as
>> bad, however.
> 
> Yes, but that can only happen by intention, right?
> And not randomly depending on the cache state. If the
> application has confidential data in the registered buffers, files
> then it should not share the ring fd with untrusted processes.

Indeed, that's why it's a mild problem. Though, that's better to be said
explicitly, so users know the pitfall. Worth putting into the manual, I
think.

-- 
Pavel Begunkov

^ permalink raw reply	[flat|nested] 36+ messages in thread

end of thread, other threads:[~2020-01-28 10:51 UTC | newest]

Thread overview: 36+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2020-01-23 23:16 [PATCHSET 0/4] Add support for shared io-wq backends Jens Axboe
2020-01-23 23:16 ` [PATCH 1/4] io-wq: make the io_wq ref counted Jens Axboe
2020-01-23 23:16 ` [PATCH 2/4] io-wq: add 'id' to io_wq Jens Axboe
2020-01-23 23:16 ` [PATCH 3/4] io-wq: allow lookup of existing io_wq with given id Jens Axboe
2020-01-24  9:54   ` Stefan Metzmacher
2020-01-24 16:41     ` Jens Axboe
2020-01-23 23:16 ` [PATCH 4/4] io_uring: add support for sharing kernel io-wq workqueue Jens Axboe
2020-01-24  9:51 ` [PATCHSET 0/4] Add support for shared io-wq backends Stefan Metzmacher
2020-01-24 16:43   ` Jens Axboe
2020-01-24 19:14     ` Stefan Metzmacher
2020-01-24 21:37       ` Jens Axboe
2020-01-24 20:34 ` Pavel Begunkov
2020-01-24 21:38   ` Jens Axboe
2020-01-26  1:51 ` Daurnimator
2020-01-26 15:11   ` Pavel Begunkov
2020-01-26 17:00     ` Jens Axboe
2020-01-27 13:29       ` Pavel Begunkov
2020-01-27 13:39         ` Jens Axboe
2020-01-27 14:07           ` Pavel Begunkov
2020-01-27 19:39             ` Pavel Begunkov
2020-01-27 19:45               ` Jens Axboe
2020-01-27 20:33             ` Jens Axboe
2020-01-27 21:45               ` Pavel Begunkov
2020-01-27 22:40                 ` Jens Axboe
2020-01-27 23:00                   ` Jens Axboe
2020-01-27 23:17                     ` Pavel Begunkov
2020-01-27 23:23                       ` Jens Axboe
2020-01-27 23:25                         ` Pavel Begunkov
2020-01-27 23:38                           ` Jens Axboe
2020-01-28 10:01                             ` Stefan Metzmacher
2020-01-28 10:30                               ` Pavel Begunkov
2020-01-28 10:35                                 ` Stefan Metzmacher
2020-01-28 10:51                                   ` Pavel Begunkov
  -- strict thread matches above, loose matches on Subject: below --
2020-01-24 21:31 [PATCHSET v2 " Jens Axboe
2020-01-24 21:31 ` [PATCH 4/4] io_uring: add support for sharing kernel io-wq workqueue Jens Axboe
2020-01-25  7:45   ` Stefan Metzmacher
2020-01-25 16:44     ` Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox