From: Lennert Buytenhek <[email protected]>
To: David Laight <[email protected]>,
Al Viro <[email protected]>
Cc: Jens Axboe <[email protected]>,
[email protected], [email protected],
[email protected], [email protected]
Subject: Re: [RFC PATCH] io_uring: add support for IORING_OP_GETDENTS64
Date: Fri, 29 Jan 2021 01:07:10 +0200 [thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>
On Sun, Jan 24, 2021 at 10:21:38PM +0000, David Laight wrote:
> > One open question is whether IORING_OP_GETDENTS64 should be more like
> > pread(2) and allow passing in a starting offset to read from the
> > directory from. (This would require some more surgery in fs/readdir.c.)
>
> Since directories are seekable this ought to work.
> Modulo horrid issues with 32bit file offsets.
The incremental patch below does this. (It doesn't apply cleanly on
top of v1 of the IORING_OP_GETDENTS patch as I have other changes in
my tree -- I'm including it just to illustrate the changes that would
make this work.)
This change seems to work, and makes IORING_OP_GETDENTS take an
explicitly specified directory offset (instead of using the file's
->f_pos), making it more like pread(2), and I like the change from
a conceptual point of view, but it's a bit ugly around
iterate_dir_use_ctx_pos(). Any thoughts on how to do this more
cleanly (without breaking iterate_dir() semantics)?
> You'd need to return the final offset to allow another
> read to continue from the end position.
We can use the ->d_off value as returned in the last struct
linux_dirent64 as the directory offset to continue reading from
with the next IORING_OP_GETDENTS call, illustrated by the patch
to uringfind.c at the bottom.
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 13dd29f8ebb3..0f9707ed9294 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -576,6 +576,7 @@ struct io_getdents {
struct file *file;
struct linux_dirent64 __user *dirent;
unsigned int count;
+ loff_t pos;
};
struct io_completion {
@@ -4584,9 +4585,10 @@ static int io_getdents_prep(struct io_kiocb *req,
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
- if (sqe->ioprio || sqe->off || sqe->rw_flags || sqe->buf_index)
+ if (sqe->ioprio || sqe->rw_flags || sqe->buf_index)
return -EINVAL;
+ getdents->pos = READ_ONCE(sqe->off);
getdents->dirent = u64_to_user_ptr(READ_ONCE(sqe->addr));
getdents->count = READ_ONCE(sqe->len);
return 0;
@@ -4601,7 +4603,8 @@ static int io_getdents(struct io_kiocb *req, bool force_nonblock)
if (force_nonblock)
return -EAGAIN;
- ret = vfs_getdents(req->file, getdents->dirent, getdents->count);
+ ret = vfs_getdents(req->file, getdents->dirent, getdents->count,
+ &getdents->pos);
if (ret < 0) {
if (ret == -ERESTARTSYS)
ret = -EINTR;
diff --git a/fs/readdir.c b/fs/readdir.c
index f52167c1eb61..d6bd78f6350a 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -37,7 +37,7 @@
} while (0)
-int iterate_dir(struct file *file, struct dir_context *ctx)
+int iterate_dir_use_ctx_pos(struct file *file, struct dir_context *ctx)
{
struct inode *inode = file_inode(file);
bool shared = false;
@@ -60,12 +60,10 @@ int iterate_dir(struct file *file, struct dir_context *ctx)
res = -ENOENT;
if (!IS_DEADDIR(inode)) {
- ctx->pos = file->f_pos;
if (shared)
res = file->f_op->iterate_shared(file, ctx);
else
res = file->f_op->iterate(file, ctx);
- file->f_pos = ctx->pos;
fsnotify_access(file);
file_accessed(file);
}
@@ -76,6 +74,17 @@ int iterate_dir(struct file *file, struct dir_context *ctx)
out:
return res;
}
+
+int iterate_dir(struct file *file, struct dir_context *ctx)
+{
+ int res;
+
+ ctx->pos = file->f_pos;
+ res = iterate_dir_use_ctx_pos(file, ctx);
+ file->f_pos = ctx->pos;
+
+ return res;
+}
EXPORT_SYMBOL(iterate_dir);
/*
@@ -349,16 +358,18 @@ static int filldir64(struct dir_context *ctx, const char *name, int namlen,
}
int vfs_getdents(struct file *file, struct linux_dirent64 __user *dirent,
- unsigned int count)
+ unsigned int count, loff_t *pos)
{
struct getdents_callback64 buf = {
.ctx.actor = filldir64,
+ .ctx.pos = *pos,
.count = count,
.current_dir = dirent
};
int error;
- error = iterate_dir(file, &buf.ctx);
+ error = iterate_dir_use_ctx_pos(file, &buf.ctx);
+ *pos = buf.ctx.pos;
if (error >= 0)
error = buf.error;
if (buf.prev_reclen) {
@@ -384,7 +395,7 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd,
if (!f.file)
return -EBADF;
- error = vfs_getdents(f.file, dirent, count);
+ error = vfs_getdents(f.file, dirent, count, &f.file->f_pos);
fdput_pos(f);
return error;
}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 114885d3f6c4..4d9d96163f92 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3107,11 +3107,12 @@ const char *simple_get_link(struct dentry *, struct inode *,
struct delayed_call *);
extern const struct inode_operations simple_symlink_inode_operations;
+extern int iterate_dir_use_ctx_pos(struct file *, struct dir_context *);
extern int iterate_dir(struct file *, struct dir_context *);
struct linux_dirent64;
int vfs_getdents(struct file *file, struct linux_dirent64 __user *dirent,
- unsigned int count);
+ unsigned int count, loff_t *pos);
int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat,
int flags);
Corresponding uringfind.c change:
diff --git a/uringfind.c b/uringfind.c
index 4282296..e140388 100644
--- a/uringfind.c
+++ b/uringfind.c
@@ -22,9 +22,10 @@ struct linux_dirent64 {
};
static inline void io_uring_prep_getdents(struct io_uring_sqe *sqe, int fd,
- void *buf, unsigned int count)
+ void *buf, unsigned int count,
+ uint64_t off)
{
- io_uring_prep_rw(IORING_OP_GETDENTS, sqe, fd, buf, count, 0);
+ io_uring_prep_rw(IORING_OP_GETDENTS, sqe, fd, buf, count, off);
}
@@ -38,6 +39,7 @@ struct dir {
struct dir *parent;
int fd;
+ uint64_t off;
uint8_t buf[16384];
char name[0];
};
@@ -131,7 +133,8 @@ static void schedule_readdir(struct dir *dir)
struct io_uring_sqe *sqe;
sqe = get_sqe();
- io_uring_prep_getdents(sqe, dir->fd, dir->buf, sizeof(dir->buf));
+ io_uring_prep_getdents(sqe, dir->fd, dir->buf, sizeof(dir->buf),
+ dir->off);
io_uring_sqe_set_data(sqe, dir);
}
@@ -145,6 +148,7 @@ static void opendir_completion(struct dir *dir, int ret)
}
dir->fd = ret;
+ dir->off = 0;
schedule_readdir(dir);
}
@@ -179,6 +183,7 @@ static void readdir_completion(struct dir *dir, int ret)
schedule_opendir(dir, dent->d_name);
}
+ dir->off = dent->d_off;
bufp += dent->d_reclen;
}
next prev parent reply other threads:[~2021-01-28 23:08 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-01-23 11:41 [RFC PATCH] io_uring: add support for IORING_OP_GETDENTS64 Lennert Buytenhek
2021-01-23 17:37 ` Jens Axboe
2021-01-23 18:16 ` Lennert Buytenhek
2021-01-23 18:22 ` Jens Axboe
2021-01-23 23:27 ` Matthew Wilcox
2021-01-23 23:33 ` Jens Axboe
2021-01-28 22:30 ` Lennert Buytenhek
2021-01-23 23:50 ` Andres Freund
2021-01-23 23:56 ` Andres Freund
2021-01-24 1:59 ` Al Viro
2021-01-24 2:17 ` Andres Freund
2021-01-24 22:21 ` David Laight
2021-01-28 23:07 ` Lennert Buytenhek [this message]
2021-01-29 5:37 ` Lennert Buytenhek
2021-01-29 9:41 ` David Laight
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox