public inbox for [email protected]
 help / color / mirror / Atom feed
* [PATCH] io_uring: add support for probing opcodes
@ 2020-01-16 17:23 Jens Axboe
  2020-01-16 22:35 ` Stefan Metzmacher
  0 siblings, 1 reply; 3+ messages in thread
From: Jens Axboe @ 2020-01-16 17:23 UTC (permalink / raw)
  To: io-uring; +Cc: 李通洲

The application currently has no way of knowing if a given opcode is
supported or not without having to try and issue one and see if we get
-EINVAL or not. And even this approach is fraught with peril, as maybe
we're getting -EINVAL due to some fields being missing, or maybe it's
just not that easy to issue that particular command without doing some
other leg work in terms of setup first.

This adds IORING_REGISTER_PROBE, which fills in a structure with info
on what it supported or not. This will work even with sparse opcode
fields, which may happen in the future or even today if someone
backports specific features to older kernels.

Signed-off-by: Jens Axboe <[email protected]>

---

diff --git a/fs/io_uring.c b/fs/io_uring.c
index ee14a0fcd59f..b073bf944423 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -6554,6 +6554,42 @@ SYSCALL_DEFINE2(io_uring_setup, u32, entries,
 	return io_uring_setup(entries, params);
 }
 
+static int io_probe(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args)
+{
+	struct io_uring_probe *p;
+	size_t size;
+	int i, ret;
+
+	size = struct_size(p, ops, nr_args);
+	if (size == SIZE_MAX)
+		return -EOVERFLOW;
+	p = kzalloc(size, GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	ret = -EFAULT;
+	if (copy_from_user(p, arg, size))
+		goto out;
+	ret = -EINVAL;
+	if (memchr_inv(p, 0, size))
+		goto out;
+
+	p->last_op = IORING_OP_LAST - 1;
+	/* stock kernel isn't sparse, so everything is supported */
+	for (i = 0; i < nr_args; i++) {
+		p->ops[i].op = i;
+		p->ops[i].flags = IO_URING_OP_SUPPORTED;
+	}
+	p->ops_len = i;
+
+	ret = 0;
+	if (copy_to_user(arg, p, size))
+		ret = -EFAULT;
+out:
+	kfree(p);
+	return ret;
+}
+
 static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 			       void __user *arg, unsigned nr_args)
 	__releases(ctx->uring_lock)
@@ -6570,7 +6606,8 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 		return -ENXIO;
 
 	if (opcode != IORING_UNREGISTER_FILES &&
-	    opcode != IORING_REGISTER_FILES_UPDATE) {
+	    opcode != IORING_REGISTER_FILES_UPDATE &&
+	    opcode != IORING_REGISTER_PROBE) {
 		percpu_ref_kill(&ctx->refs);
 
 		/*
@@ -6632,6 +6669,12 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 			break;
 		ret = io_eventfd_unregister(ctx);
 		break;
+	case IORING_REGISTER_PROBE:
+		ret = -EINVAL;
+		if (!arg)
+			break;
+		ret = io_probe(ctx, arg, nr_args);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
@@ -6639,7 +6682,8 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 
 
 	if (opcode != IORING_UNREGISTER_FILES &&
-	    opcode != IORING_REGISTER_FILES_UPDATE) {
+	    opcode != IORING_REGISTER_FILES_UPDATE &&
+	    opcode != IORING_REGISTER_PROBE) {
 		/* bring the ctx back to life */
 		percpu_ref_reinit(&ctx->refs);
 out:
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index fea7da182851..955fd477e530 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -194,6 +194,7 @@ struct io_uring_params {
 #define IORING_UNREGISTER_EVENTFD	5
 #define IORING_REGISTER_FILES_UPDATE	6
 #define IORING_REGISTER_EVENTFD_ASYNC	7
+#define IORING_REGISTER_PROBE		8
 
 struct io_uring_files_update {
 	__u32 offset;
@@ -201,4 +202,21 @@ struct io_uring_files_update {
 	__aligned_u64 /* __s32 * */ fds;
 };
 
+#define IO_URING_OP_SUPPORTED	(1U << 0)
+
+struct io_uring_probe_op {
+	__u8 op;
+	__u8 resv;
+	__u16 flags;	/* IO_URING_OP_* flags */
+	__u32 resv2;
+};
+
+struct io_uring_probe {
+	__u8 last_op;	/* last opcode supported */
+	__u8 ops_len;	/* length of ops[] array below */
+	__u16 resv;
+	__u32 resv2[3];
+	struct io_uring_probe_op ops[0];
+};
+
 #endif

-- 
Jens Axboe


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] io_uring: add support for probing opcodes
  2020-01-16 17:23 [PATCH] io_uring: add support for probing opcodes Jens Axboe
@ 2020-01-16 22:35 ` Stefan Metzmacher
  2020-01-16 22:36   ` Jens Axboe
  0 siblings, 1 reply; 3+ messages in thread
From: Stefan Metzmacher @ 2020-01-16 22:35 UTC (permalink / raw)
  To: Jens Axboe, io-uring; +Cc: 李通洲


[-- Attachment #1.1: Type: text/plain, Size: 1312 bytes --]

Am 16.01.20 um 18:23 schrieb Jens Axboe:
> The application currently has no way of knowing if a given opcode is
> supported or not without having to try and issue one and see if we get
> -EINVAL or not. And even this approach is fraught with peril, as maybe
> we're getting -EINVAL due to some fields being missing, or maybe it's
> just not that easy to issue that particular command without doing some
> other leg work in terms of setup first.
> 
> This adds IORING_REGISTER_PROBE, which fills in a structure with info
> on what it supported or not. This will work even with sparse opcode
> fields, which may happen in the future or even today if someone
> backports specific features to older kernels.

That's funny I was just thinking about exactly that topic before
I opened the io-uring mail folder:-)

That's will make it much easier to write a portable
vfs backend for samba that doesn't depend on the kernel
features at build time.

> +	p->last_op = IORING_OP_LAST - 1;
> +	/* stock kernel isn't sparse, so everything is supported */
> +	for (i = 0; i < nr_args; i++) {
> +		p->ops[i].op = i;

Shouldn't there be an if (i <= p->last_op) before we pretent to support
an opcode? Or we need to truncate nr_args

> +		p->ops[i].flags = IO_URING_OP_SUPPORTED;
> +	}

metze



[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] io_uring: add support for probing opcodes
  2020-01-16 22:35 ` Stefan Metzmacher
@ 2020-01-16 22:36   ` Jens Axboe
  0 siblings, 0 replies; 3+ messages in thread
From: Jens Axboe @ 2020-01-16 22:36 UTC (permalink / raw)
  To: Stefan Metzmacher, io-uring; +Cc: 李通洲

On 1/16/20 3:35 PM, Stefan Metzmacher wrote:
> Am 16.01.20 um 18:23 schrieb Jens Axboe:
>> The application currently has no way of knowing if a given opcode is
>> supported or not without having to try and issue one and see if we get
>> -EINVAL or not. And even this approach is fraught with peril, as maybe
>> we're getting -EINVAL due to some fields being missing, or maybe it's
>> just not that easy to issue that particular command without doing some
>> other leg work in terms of setup first.
>>
>> This adds IORING_REGISTER_PROBE, which fills in a structure with info
>> on what it supported or not. This will work even with sparse opcode
>> fields, which may happen in the future or even today if someone
>> backports specific features to older kernels.
> 
> That's funny I was just thinking about exactly that topic before
> I opened the io-uring mail folder:-)
> 
> That's will make it much easier to write a portable
> vfs backend for samba that doesn't depend on the kernel
> features at build time.
> 
>> +	p->last_op = IORING_OP_LAST - 1;
>> +	/* stock kernel isn't sparse, so everything is supported */
>> +	for (i = 0; i < nr_args; i++) {
>> +		p->ops[i].op = i;
> 
> Shouldn't there be an if (i <= p->last_op) before we pretent to support
> an opcode? Or we need to truncate nr_args

Yeah, I made some edits, just didn't post v2 yet. Below is the current
one:


diff --git a/fs/io_uring.c b/fs/io_uring.c
index ee14a0fcd59f..b20587bda5d4 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -561,6 +561,8 @@ struct io_op_def {
 	unsigned		hash_reg_file : 1;
 	/* unbound wq insertion if file is a non-regular file */
 	unsigned		unbound_nonreg_file : 1;
+	/* opcode is not supported by this kernel */
+	unsigned		not_supported : 1;
 };
 
 static const struct io_op_def io_op_defs[] = {
@@ -6554,6 +6556,45 @@ SYSCALL_DEFINE2(io_uring_setup, u32, entries,
 	return io_uring_setup(entries, params);
 }
 
+static int io_probe(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args)
+{
+	struct io_uring_probe *p;
+	size_t size;
+	int i, ret;
+
+	size = struct_size(p, ops, nr_args);
+	if (size == SIZE_MAX)
+		return -EOVERFLOW;
+	p = kzalloc(size, GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	ret = -EFAULT;
+	if (copy_from_user(p, arg, size))
+		goto out;
+	ret = -EINVAL;
+	if (memchr_inv(p, 0, size))
+		goto out;
+
+	p->last_op = IORING_OP_LAST - 1;
+	if (nr_args > IORING_OP_LAST)
+		nr_args = IORING_OP_LAST;
+
+	for (i = 0; i < nr_args; i++) {
+		p->ops[i].op = i;
+		if (!io_op_defs[i].not_supported)
+			p->ops[i].flags = IO_URING_OP_SUPPORTED;
+	}
+	p->ops_len = i;
+
+	ret = 0;
+	if (copy_to_user(arg, p, size))
+		ret = -EFAULT;
+out:
+	kfree(p);
+	return ret;
+}
+
 static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 			       void __user *arg, unsigned nr_args)
 	__releases(ctx->uring_lock)
@@ -6570,7 +6611,8 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 		return -ENXIO;
 
 	if (opcode != IORING_UNREGISTER_FILES &&
-	    opcode != IORING_REGISTER_FILES_UPDATE) {
+	    opcode != IORING_REGISTER_FILES_UPDATE &&
+	    opcode != IORING_REGISTER_PROBE) {
 		percpu_ref_kill(&ctx->refs);
 
 		/*
@@ -6632,6 +6674,12 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 			break;
 		ret = io_eventfd_unregister(ctx);
 		break;
+	case IORING_REGISTER_PROBE:
+		ret = -EINVAL;
+		if (!arg || nr_args > 256)
+			break;
+		ret = io_probe(ctx, arg, nr_args);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
@@ -6639,7 +6687,8 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 
 
 	if (opcode != IORING_UNREGISTER_FILES &&
-	    opcode != IORING_REGISTER_FILES_UPDATE) {
+	    opcode != IORING_REGISTER_FILES_UPDATE &&
+	    opcode != IORING_REGISTER_PROBE) {
 		/* bring the ctx back to life */
 		percpu_ref_reinit(&ctx->refs);
 out:
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index fea7da182851..955fd477e530 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -194,6 +194,7 @@ struct io_uring_params {
 #define IORING_UNREGISTER_EVENTFD	5
 #define IORING_REGISTER_FILES_UPDATE	6
 #define IORING_REGISTER_EVENTFD_ASYNC	7
+#define IORING_REGISTER_PROBE		8
 
 struct io_uring_files_update {
 	__u32 offset;
@@ -201,4 +202,21 @@ struct io_uring_files_update {
 	__aligned_u64 /* __s32 * */ fds;
 };
 
+#define IO_URING_OP_SUPPORTED	(1U << 0)
+
+struct io_uring_probe_op {
+	__u8 op;
+	__u8 resv;
+	__u16 flags;	/* IO_URING_OP_* flags */
+	__u32 resv2;
+};
+
+struct io_uring_probe {
+	__u8 last_op;	/* last opcode supported */
+	__u8 ops_len;	/* length of ops[] array below */
+	__u16 resv;
+	__u32 resv2[3];
+	struct io_uring_probe_op ops[0];
+};
+
 #endif

-- 
Jens Axboe


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2020-01-16 22:36 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2020-01-16 17:23 [PATCH] io_uring: add support for probing opcodes Jens Axboe
2020-01-16 22:35 ` Stefan Metzmacher
2020-01-16 22:36   ` Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox