public inbox for [email protected]
 help / color / mirror / Atom feed
From: Jens Axboe <[email protected]>
To: Avi Kivity <[email protected]>, [email protected]
Subject: Re: memory access op ideas
Date: Sat, 23 Apr 2022 12:02:51 -0600	[thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>

On 4/23/22 11:32 AM, Jens Axboe wrote:
>> I guess copy_to_user saves us from having to consider endianness.
> 
> I was considering that too, definitely something that should be
> investigated. Making it a 1/2/4/8 switch and using put_user() is
> probably a better idea. Easy enough to benchmark.

FWIW, this is the current version. Some quick benchmarking doesn't show
any difference between copy_to_user and put_user, but that may depend on
the arch as well (using aarch64). But we might as well use put user and
combine it with the length check, so we explicitly only support 1/2/4/8
sizes.


diff --git a/fs/io_uring.c b/fs/io_uring.c
index 2052a796436c..3b94cb4b67ed 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -586,6 +586,14 @@ struct io_socket {
 	unsigned long			nofile;
 };
 
+struct io_mem {
+	struct file			*file;
+	u64				value;
+	void __user			*dest;
+	u32				len;
+	u32				flags;
+};
+
 struct io_sync {
 	struct file			*file;
 	loff_t				len;
@@ -962,6 +970,7 @@ struct io_kiocb {
 		struct io_msg		msg;
 		struct io_xattr		xattr;
 		struct io_socket	sock;
+		struct io_mem		mem;
 	};
 
 	u8				opcode;
@@ -1231,16 +1240,19 @@ static const struct io_op_def io_op_defs[] = {
 		.needs_file		= 1,
 	},
 	[IORING_OP_FSETXATTR] = {
-		.needs_file = 1
+		.needs_file		= 1,
 	},
 	[IORING_OP_SETXATTR] = {},
 	[IORING_OP_FGETXATTR] = {
-		.needs_file = 1
+		.needs_file		= 1,
 	},
 	[IORING_OP_GETXATTR] = {},
 	[IORING_OP_SOCKET] = {
 		.audit_skip		= 1,
 	},
+	[IORING_OP_MEMCPY] = {
+		.audit_skip		= 1,
+	},
 };
 
 /* requests with any of those set should undergo io_disarm_next() */
@@ -5527,6 +5539,71 @@ static int io_sync_file_range(struct io_kiocb *req, unsigned int issue_flags)
 	return 0;
 }
 
+static int io_memcpy_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+	struct io_mem *mem = &req->mem;
+
+	if (unlikely(sqe->ioprio || sqe->buf_index || sqe->splice_fd_in))
+		return -EINVAL;
+
+	mem->value = READ_ONCE(sqe->off);
+	mem->dest = u64_to_user_ptr(READ_ONCE(sqe->addr));
+	mem->len = READ_ONCE(sqe->len);
+	if (!mem->len || mem->len > sizeof(u64))
+		return -EINVAL;
+
+	mem->flags = READ_ONCE(sqe->memcpy_flags);
+	if (mem->flags & ~IORING_MEMCPY_IMM)
+		return -EINVAL;
+
+	/* only supports immediate mode for now */
+	if (!(mem->flags & IORING_MEMCPY_IMM))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int io_memcpy(struct io_kiocb *req)
+{
+	struct io_mem *mem = &req->mem;
+	int ret = mem->len;
+
+	switch (mem->len) {
+	case 1: {
+		u8 val = mem->value;
+		if (put_user(val, (u8 *) mem->dest))
+			ret = -EFAULT;
+		break;
+		}
+	case 2: {
+		u16 val = mem->value;
+		if (put_user(val, (u16 *) mem->dest))
+			ret = -EFAULT;
+		break;
+		}
+	case 4: {
+		u32 val = mem->value;
+		if (put_user(val, (u32 *) mem->dest))
+			ret = -EFAULT;
+		break;
+		}
+	case 8: {
+		u64 val = mem->value;
+		if (put_user(val, (u64 *) mem->dest))
+			ret = -EFAULT;
+		break;
+		}
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	if (ret < 0)
+		req_set_fail(req);
+	io_req_complete(req, ret);
+	return 0;
+}
+
 #if defined(CONFIG_NET)
 static bool io_net_retry(struct socket *sock, int flags)
 {
@@ -7494,6 +7571,8 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 		return io_getxattr_prep(req, sqe);
 	case IORING_OP_SOCKET:
 		return io_socket_prep(req, sqe);
+	case IORING_OP_MEMCPY:
+		return io_memcpy_prep(req, sqe);
 	}
 
 	printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
@@ -7815,6 +7894,9 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
 	case IORING_OP_SOCKET:
 		ret = io_socket(req, issue_flags);
 		break;
+	case IORING_OP_MEMCPY:
+		ret = io_memcpy(req);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 5fb52bf32435..9e69d70a3b5b 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -46,6 +46,7 @@ struct io_uring_sqe {
 		__u32		unlink_flags;
 		__u32		hardlink_flags;
 		__u32		xattr_flags;
+		__u32		memcpy_flags;
 	};
 	__u64	user_data;	/* data to be passed back at completion time */
 	/* pack this to avoid bogus arm OABI complaints */
@@ -152,6 +153,7 @@ enum {
 	IORING_OP_FGETXATTR,
 	IORING_OP_GETXATTR,
 	IORING_OP_SOCKET,
+	IORING_OP_MEMCPY,
 
 	/* this goes last, obviously */
 	IORING_OP_LAST,
@@ -206,6 +208,14 @@ enum {
 #define IORING_ASYNC_CANCEL_FD	(1U << 1)
 #define IORING_ASYNC_CANCEL_ANY	(1U << 2)
 
+/*
+ * IORING_OP_MEMCPY flags.
+ *
+ * IORING_MEMCPY_IMM		Immediate copy. 'off' contains an immediate
+ *				value. If not set, 'off' is a source address.
+ */
+#define IORING_MEMCPY_IMM	(1U << 0)
+
 /*
  * IO completion data structure (Completion Queue Entry)
  */

-- 
Jens Axboe


  reply	other threads:[~2022-04-23 18:02 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-04-13 10:33 memory access op ideas Avi Kivity
2022-04-22 12:52 ` Hao Xu
2022-04-22 13:24   ` Hao Xu
2022-04-22 13:38   ` Jens Axboe
2022-04-23  7:19     ` Hao Xu
2022-04-23 16:14   ` Avi Kivity
2022-04-22 14:50 ` Jens Axboe
2022-04-22 15:03   ` Jens Axboe
2022-04-23 16:30     ` Avi Kivity
2022-04-23 17:32       ` Jens Axboe
2022-04-23 18:02         ` Jens Axboe [this message]
2022-04-23 18:11           ` Jens Axboe
2022-04-22 20:03   ` Walker, Benjamin
2022-04-23 10:19     ` Pavel Begunkov
2022-04-23 13:20     ` Jens Axboe
2022-04-23 16:23   ` Avi Kivity
2022-04-23 17:30     ` Jens Axboe
2022-04-24 13:04       ` Avi Kivity
2022-04-24 13:30         ` Jens Axboe
2022-04-24 14:56           ` Avi Kivity
2022-04-25  0:45             ` Jens Axboe
2022-04-25 18:05               ` Walker, Benjamin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox