From: Dominique Martinet <[email protected]>
To: Alexander Viro <[email protected]>,
Christian Brauner <[email protected]>,
Jens Axboe <[email protected]>,
Pavel Begunkov <[email protected]>,
Stefan Roesch <[email protected]>
Cc: Clay Harris <[email protected]>,
Dave Chinner <[email protected]>,
[email protected], [email protected],
[email protected],
Dominique Martinet <[email protected]>
Subject: [PATCH v2 6/6] RFC: io_uring getdents: test returning an EOF flag in CQE
Date: Wed, 10 May 2023 19:52:54 +0900 [thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>
This turns out to be very slightly faster than an extra call to
getdents, but in practice it doesn't seem to be such an improvement as
the trailing getdents will return almost immediately be absorbed by the
scheduling noise in a find-like context (my ""server"" is too noisy to
get proper benchmarks out, but results look slightly better with this in
async mode, and almost identical in the NOWAIT path)
If the user is waiting the end of a single directory though it might be
worth it, so including the patch for comments.
(in particular I'm not really happy that the flag has become in-out for
vfs_getdents, especially when the getdents64 syscall does not use it,
but I don't see much other way around it)
If this approach is acceptable/wanted then this patch will be split down
further (at least dir_context/vfs_getdents, kernfs, libfs, uring in four
separate commits)
Signed-off-by: Dominique Martinet <[email protected]>
---
fs/internal.h | 2 +-
fs/kernfs/dir.c | 1 +
fs/libfs.c | 9 ++++++---
fs/readdir.c | 10 ++++++----
include/linux/fs.h | 2 ++
include/uapi/linux/io_uring.h | 2 ++
io_uring/fs.c | 8 ++++++--
7 files changed, 24 insertions(+), 10 deletions(-)
diff --git a/fs/internal.h b/fs/internal.h
index 0264b001d99a..0b1552c7a870 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -267,4 +267,4 @@ void mnt_idmap_put(struct mnt_idmap *idmap);
struct linux_dirent64;
int vfs_getdents(struct file *file, struct linux_dirent64 __user *dirent,
- unsigned int count, unsigned long flags);
+ unsigned int count, unsigned long *flags);
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 5a5b3e7881bf..53a6b4804c34 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -1860,6 +1860,7 @@ static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx)
up_read(&root->kernfs_rwsem);
file->private_data = NULL;
ctx->pos = INT_MAX;
+ ctx->flags |= DIR_CONTEXT_F_EOD;
return 0;
}
diff --git a/fs/libfs.c b/fs/libfs.c
index a3c7e42d90a7..b2a95dadffbd 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -208,10 +208,12 @@ int dcache_readdir(struct file *file, struct dir_context *ctx)
p = &next->d_child;
}
spin_lock(&dentry->d_lock);
- if (next)
+ if (next) {
list_move_tail(&cursor->d_child, &next->d_child);
- else
+ } else {
list_del_init(&cursor->d_child);
+ ctx->flags |= DIR_CONTEXT_F_EOD;
+ }
spin_unlock(&dentry->d_lock);
dput(next);
@@ -1347,7 +1349,8 @@ static loff_t empty_dir_llseek(struct file *file, loff_t offset, int whence)
static int empty_dir_readdir(struct file *file, struct dir_context *ctx)
{
- dir_emit_dots(file, ctx);
+ if (dir_emit_dots(file, ctx))
+ ctx->flags |= DIR_CONTEXT_F_EOD;
return 0;
}
diff --git a/fs/readdir.c b/fs/readdir.c
index 1311b89d75e1..be75a2154b4f 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -358,14 +358,14 @@ static bool filldir64(struct dir_context *ctx, const char *name, int namlen,
* @file : pointer to file struct of directory
* @dirent : pointer to user directory structure
* @count : size of buffer
- * @flags : additional dir_context flags
+ * @flags : pointer to additional dir_context flags
*/
int vfs_getdents(struct file *file, struct linux_dirent64 __user *dirent,
- unsigned int count, unsigned long flags)
+ unsigned int count, unsigned long *flags)
{
struct getdents_callback64 buf = {
.ctx.actor = filldir64,
- .ctx.flags = flags,
+ .ctx.flags = flags ? *flags : 0,
.count = count,
.current_dir = dirent
};
@@ -384,6 +384,8 @@ int vfs_getdents(struct file *file, struct linux_dirent64 __user *dirent,
else
error = count - buf.count;
}
+ if (flags)
+ *flags = buf.ctx.flags;
return error;
}
@@ -397,7 +399,7 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd,
if (!f.file)
return -EBADF;
- error = vfs_getdents(f.file, dirent, count, 0);
+ error = vfs_getdents(f.file, dirent, count, NULL);
fdput_pos(f);
return error;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index f7de2b5ca38e..d1e31bccfb4f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1723,8 +1723,10 @@ struct dir_context {
* flags for dir_context flags
* DIR_CONTEXT_F_NOWAIT: Request non-blocking iterate
* (requires file->f_mode & FMODE_NOWAIT)
+ * DIR_CONTEXT_F_EOD: Signal directory has been fully iterated, set by the fs
*/
#define DIR_CONTEXT_F_NOWAIT 0x1
+#define DIR_CONTEXT_F_EOD 0x2
/*
* These flags let !MMU mmap() govern direct device mapping vs immediate
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 35d0de18d893..35877132027e 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -381,11 +381,13 @@ struct io_uring_cqe {
* IORING_CQE_F_SOCK_NONEMPTY If set, more data to read after socket recv
* IORING_CQE_F_NOTIF Set for notification CQEs. Can be used to distinct
* them from sends.
+ * IORING_CQE_F_EOF If set, file or directory has reached end of file.
*/
#define IORING_CQE_F_BUFFER (1U << 0)
#define IORING_CQE_F_MORE (1U << 1)
#define IORING_CQE_F_SOCK_NONEMPTY (1U << 2)
#define IORING_CQE_F_NOTIF (1U << 3)
+#define IORING_CQE_F_EOF (1U << 4)
enum {
IORING_CQE_BUFFER_SHIFT = 16,
diff --git a/io_uring/fs.c b/io_uring/fs.c
index b15ec81c1ed2..f6222b0148ef 100644
--- a/io_uring/fs.c
+++ b/io_uring/fs.c
@@ -322,6 +322,7 @@ int io_getdents(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_getdents *gd = io_kiocb_to_cmd(req, struct io_getdents);
unsigned long getdents_flags = 0;
+ u32 cqe_flags = 0;
int ret;
if (issue_flags & IO_URING_F_NONBLOCK) {
@@ -338,13 +339,16 @@ int io_getdents(struct io_kiocb *req, unsigned int issue_flags)
goto out;
}
- ret = vfs_getdents(req->file, gd->dirent, gd->count, getdents_flags);
+ ret = vfs_getdents(req->file, gd->dirent, gd->count, &getdents_flags);
out:
if (ret == -EAGAIN &&
(issue_flags & IO_URING_F_NONBLOCK))
return -EAGAIN;
- io_req_set_res(req, ret, 0);
+ if (getdents_flags & DIR_CONTEXT_F_EOD)
+ cqe_flags |= IORING_CQE_F_EOF;
+
+ io_req_set_res(req, ret, cqe_flags);
return 0;
}
--
2.39.2
next prev parent reply other threads:[~2023-05-10 10:54 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-05-10 10:52 [PATCH v2 0/6] io_uring: add getdents support, take 2 Dominique Martinet
2023-05-10 10:52 ` [PATCH v2 1/6] fs: split off vfs_getdents function of getdents64 syscall Dominique Martinet
2023-05-23 15:39 ` Christian Brauner
2023-05-23 21:13 ` Dominique Martinet
2023-05-10 10:52 ` [PATCH v2 2/6] vfs_getdents/struct dir_context: add flags field Dominique Martinet
2023-05-10 10:52 ` [PATCH v2 3/6] io_uring: add support for getdents Dominique Martinet
2023-05-10 10:52 ` [PATCH v2 4/6] kernfs: implement readdir FMODE_NOWAIT Dominique Martinet
2023-05-11 10:55 ` Dan Carpenter
2023-05-11 11:03 ` Dominique Martinet
2023-05-10 10:52 ` [PATCH v2 5/6] libfs: set FMODE_NOWAIT on dir open Dominique Martinet
2023-05-10 10:52 ` Dominique Martinet [this message]
2023-05-23 14:30 ` [PATCH v2 6/6] RFC: io_uring getdents: test returning an EOF flag in CQE Christian Brauner
2023-05-23 21:05 ` Dominique Martinet
2023-05-24 13:52 ` [PATCH v2 1/6] fs: split off vfs_getdents function of getdents64 syscall Christian Brauner
2023-05-24 21:36 ` Dominique Martinet
2023-05-25 9:22 ` Christian Brauner
2023-05-25 11:00 ` Dominique Martinet
2023-05-25 12:33 ` Christian Brauner
2023-07-11 8:17 ` Hao Xu
2023-07-11 8:24 ` Dominique Martinet
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230422-uring-getdents-v2-6-2db1e37dc55e@codewreck.org \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox