From: Jens Axboe <[email protected]>
To: Stefan Metzmacher <[email protected]>, io-uring <[email protected]>
Cc: 李通洲 <[email protected]>
Subject: Re: [PATCH] io_uring: add support for probing opcodes
Date: Thu, 16 Jan 2020 15:36:29 -0700 [thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>
On 1/16/20 3:35 PM, Stefan Metzmacher wrote:
> Am 16.01.20 um 18:23 schrieb Jens Axboe:
>> The application currently has no way of knowing if a given opcode is
>> supported or not without having to try and issue one and see if we get
>> -EINVAL or not. And even this approach is fraught with peril, as maybe
>> we're getting -EINVAL due to some fields being missing, or maybe it's
>> just not that easy to issue that particular command without doing some
>> other leg work in terms of setup first.
>>
>> This adds IORING_REGISTER_PROBE, which fills in a structure with info
>> on what it supported or not. This will work even with sparse opcode
>> fields, which may happen in the future or even today if someone
>> backports specific features to older kernels.
>
> That's funny I was just thinking about exactly that topic before
> I opened the io-uring mail folder:-)
>
> That's will make it much easier to write a portable
> vfs backend for samba that doesn't depend on the kernel
> features at build time.
>
>> + p->last_op = IORING_OP_LAST - 1;
>> + /* stock kernel isn't sparse, so everything is supported */
>> + for (i = 0; i < nr_args; i++) {
>> + p->ops[i].op = i;
>
> Shouldn't there be an if (i <= p->last_op) before we pretent to support
> an opcode? Or we need to truncate nr_args
Yeah, I made some edits, just didn't post v2 yet. Below is the current
one:
diff --git a/fs/io_uring.c b/fs/io_uring.c
index ee14a0fcd59f..b20587bda5d4 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -561,6 +561,8 @@ struct io_op_def {
unsigned hash_reg_file : 1;
/* unbound wq insertion if file is a non-regular file */
unsigned unbound_nonreg_file : 1;
+ /* opcode is not supported by this kernel */
+ unsigned not_supported : 1;
};
static const struct io_op_def io_op_defs[] = {
@@ -6554,6 +6556,45 @@ SYSCALL_DEFINE2(io_uring_setup, u32, entries,
return io_uring_setup(entries, params);
}
+static int io_probe(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args)
+{
+ struct io_uring_probe *p;
+ size_t size;
+ int i, ret;
+
+ size = struct_size(p, ops, nr_args);
+ if (size == SIZE_MAX)
+ return -EOVERFLOW;
+ p = kzalloc(size, GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+
+ ret = -EFAULT;
+ if (copy_from_user(p, arg, size))
+ goto out;
+ ret = -EINVAL;
+ if (memchr_inv(p, 0, size))
+ goto out;
+
+ p->last_op = IORING_OP_LAST - 1;
+ if (nr_args > IORING_OP_LAST)
+ nr_args = IORING_OP_LAST;
+
+ for (i = 0; i < nr_args; i++) {
+ p->ops[i].op = i;
+ if (!io_op_defs[i].not_supported)
+ p->ops[i].flags = IO_URING_OP_SUPPORTED;
+ }
+ p->ops_len = i;
+
+ ret = 0;
+ if (copy_to_user(arg, p, size))
+ ret = -EFAULT;
+out:
+ kfree(p);
+ return ret;
+}
+
static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
void __user *arg, unsigned nr_args)
__releases(ctx->uring_lock)
@@ -6570,7 +6611,8 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
return -ENXIO;
if (opcode != IORING_UNREGISTER_FILES &&
- opcode != IORING_REGISTER_FILES_UPDATE) {
+ opcode != IORING_REGISTER_FILES_UPDATE &&
+ opcode != IORING_REGISTER_PROBE) {
percpu_ref_kill(&ctx->refs);
/*
@@ -6632,6 +6674,12 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
break;
ret = io_eventfd_unregister(ctx);
break;
+ case IORING_REGISTER_PROBE:
+ ret = -EINVAL;
+ if (!arg || nr_args > 256)
+ break;
+ ret = io_probe(ctx, arg, nr_args);
+ break;
default:
ret = -EINVAL;
break;
@@ -6639,7 +6687,8 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
if (opcode != IORING_UNREGISTER_FILES &&
- opcode != IORING_REGISTER_FILES_UPDATE) {
+ opcode != IORING_REGISTER_FILES_UPDATE &&
+ opcode != IORING_REGISTER_PROBE) {
/* bring the ctx back to life */
percpu_ref_reinit(&ctx->refs);
out:
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index fea7da182851..955fd477e530 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -194,6 +194,7 @@ struct io_uring_params {
#define IORING_UNREGISTER_EVENTFD 5
#define IORING_REGISTER_FILES_UPDATE 6
#define IORING_REGISTER_EVENTFD_ASYNC 7
+#define IORING_REGISTER_PROBE 8
struct io_uring_files_update {
__u32 offset;
@@ -201,4 +202,21 @@ struct io_uring_files_update {
__aligned_u64 /* __s32 * */ fds;
};
+#define IO_URING_OP_SUPPORTED (1U << 0)
+
+struct io_uring_probe_op {
+ __u8 op;
+ __u8 resv;
+ __u16 flags; /* IO_URING_OP_* flags */
+ __u32 resv2;
+};
+
+struct io_uring_probe {
+ __u8 last_op; /* last opcode supported */
+ __u8 ops_len; /* length of ops[] array below */
+ __u16 resv;
+ __u32 resv2[3];
+ struct io_uring_probe_op ops[0];
+};
+
#endif
--
Jens Axboe
prev parent reply other threads:[~2020-01-16 22:36 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-01-16 17:23 [PATCH] io_uring: add support for probing opcodes Jens Axboe
2020-01-16 22:35 ` Stefan Metzmacher
2020-01-16 22:36 ` Jens Axboe [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox