public inbox for [email protected]
 help / color / mirror / Atom feed
From: Christian Brauner <[email protected]>
To: Dominique Martinet <[email protected]>
Cc: Alexander Viro <[email protected]>,
	Jens Axboe <[email protected]>,
	Pavel Begunkov <[email protected]>,
	Stefan Roesch <[email protected]>, Clay Harris <[email protected]>,
	Dave Chinner <[email protected]>,
	[email protected], [email protected],
	[email protected]
Subject: Re: [PATCH v2 6/6] RFC: io_uring getdents: test returning an EOF flag in CQE
Date: Tue, 23 May 2023 16:30:14 +0200	[thread overview]
Message-ID: <20230523-abgleichen-rotieren-37fdb6fb9ef3@brauner> (raw)
In-Reply-To: <[email protected]>

On Wed, May 10, 2023 at 07:52:54PM +0900, Dominique Martinet wrote:
> This turns out to be very slightly faster than an extra call to
> getdents, but in practice it doesn't seem to be such an improvement as
> the trailing getdents will return almost immediately be absorbed by the
> scheduling noise in a find-like context (my ""server"" is too noisy to
> get proper benchmarks out, but results look slightly better with this in
> async mode, and almost identical in the NOWAIT path)
> 
> If the user is waiting the end of a single directory though it might be
> worth it, so including the patch for comments.
> (in particular I'm not really happy that the flag has become in-out for
> vfs_getdents, especially when the getdents64 syscall does not use it,
> but I don't see much other way around it)
> 
> If this approach is acceptable/wanted then this patch will be split down
> further (at least dir_context/vfs_getdents, kernfs, libfs, uring in four
> separate commits)
> 
> Signed-off-by: Dominique Martinet <[email protected]>
> ---
>  fs/internal.h                 |  2 +-
>  fs/kernfs/dir.c               |  1 +
>  fs/libfs.c                    |  9 ++++++---
>  fs/readdir.c                  | 10 ++++++----
>  include/linux/fs.h            |  2 ++
>  include/uapi/linux/io_uring.h |  2 ++
>  io_uring/fs.c                 |  8 ++++++--
>  7 files changed, 24 insertions(+), 10 deletions(-)
> 
> diff --git a/fs/internal.h b/fs/internal.h
> index 0264b001d99a..0b1552c7a870 100644
> --- a/fs/internal.h
> +++ b/fs/internal.h
> @@ -267,4 +267,4 @@ void mnt_idmap_put(struct mnt_idmap *idmap);
>  struct linux_dirent64;
>  
>  int vfs_getdents(struct file *file, struct linux_dirent64 __user *dirent,
> -		 unsigned int count, unsigned long flags);
> +		 unsigned int count, unsigned long *flags);
> diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
> index 5a5b3e7881bf..53a6b4804c34 100644
> --- a/fs/kernfs/dir.c
> +++ b/fs/kernfs/dir.c
> @@ -1860,6 +1860,7 @@ static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx)
>  	up_read(&root->kernfs_rwsem);
>  	file->private_data = NULL;
>  	ctx->pos = INT_MAX;
> +	ctx->flags |= DIR_CONTEXT_F_EOD;
>  	return 0;
>  }
>  
> diff --git a/fs/libfs.c b/fs/libfs.c
> index a3c7e42d90a7..b2a95dadffbd 100644
> --- a/fs/libfs.c
> +++ b/fs/libfs.c
> @@ -208,10 +208,12 @@ int dcache_readdir(struct file *file, struct dir_context *ctx)
>  		p = &next->d_child;
>  	}
>  	spin_lock(&dentry->d_lock);
> -	if (next)
> +	if (next) {
>  		list_move_tail(&cursor->d_child, &next->d_child);
> -	else
> +	} else {
>  		list_del_init(&cursor->d_child);
> +		ctx->flags |= DIR_CONTEXT_F_EOD;
> +	}
>  	spin_unlock(&dentry->d_lock);
>  	dput(next);
>  
> @@ -1347,7 +1349,8 @@ static loff_t empty_dir_llseek(struct file *file, loff_t offset, int whence)
>  
>  static int empty_dir_readdir(struct file *file, struct dir_context *ctx)
>  {
> -	dir_emit_dots(file, ctx);
> +	if (dir_emit_dots(file, ctx))
> +		ctx->flags |= DIR_CONTEXT_F_EOD;
>  	return 0;
>  }
>  
> diff --git a/fs/readdir.c b/fs/readdir.c
> index 1311b89d75e1..be75a2154b4f 100644
> --- a/fs/readdir.c
> +++ b/fs/readdir.c
> @@ -358,14 +358,14 @@ static bool filldir64(struct dir_context *ctx, const char *name, int namlen,
>   * @file    : pointer to file struct of directory
>   * @dirent  : pointer to user directory structure
>   * @count   : size of buffer
> - * @flags   : additional dir_context flags
> + * @flags   : pointer to additional dir_context flags
>   */
>  int vfs_getdents(struct file *file, struct linux_dirent64 __user *dirent,
> -		 unsigned int count, unsigned long flags)
> +		 unsigned int count, unsigned long *flags)
>  {
>  	struct getdents_callback64 buf = {
>  		.ctx.actor = filldir64,
> -		.ctx.flags = flags,
> +		.ctx.flags = flags ? *flags : 0,
>  		.count = count,
>  		.current_dir = dirent
>  	};
> @@ -384,6 +384,8 @@ int vfs_getdents(struct file *file, struct linux_dirent64 __user *dirent,
>  		else
>  			error = count - buf.count;
>  	}
> +	if (flags)
> +		*flags = buf.ctx.flags;
>  	return error;
>  }
>  
> @@ -397,7 +399,7 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd,
>  	if (!f.file)
>  		return -EBADF;
>  
> -	error = vfs_getdents(f.file, dirent, count, 0);
> +	error = vfs_getdents(f.file, dirent, count, NULL);
>  
>  	fdput_pos(f);
>  	return error;
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index f7de2b5ca38e..d1e31bccfb4f 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1723,8 +1723,10 @@ struct dir_context {
>   * flags for dir_context flags
>   * DIR_CONTEXT_F_NOWAIT: Request non-blocking iterate
>   *                       (requires file->f_mode & FMODE_NOWAIT)
> + * DIR_CONTEXT_F_EOD: Signal directory has been fully iterated, set by the fs
>   */
>  #define DIR_CONTEXT_F_NOWAIT	0x1
> +#define DIR_CONTEXT_F_EOD	0x2
>  
>  /*
>   * These flags let !MMU mmap() govern direct device mapping vs immediate
> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
> index 35d0de18d893..35877132027e 100644
> --- a/include/uapi/linux/io_uring.h
> +++ b/include/uapi/linux/io_uring.h
> @@ -381,11 +381,13 @@ struct io_uring_cqe {
>   * IORING_CQE_F_SOCK_NONEMPTY	If set, more data to read after socket recv
>   * IORING_CQE_F_NOTIF	Set for notification CQEs. Can be used to distinct
>   * 			them from sends.
> + * IORING_CQE_F_EOF     If set, file or directory has reached end of file.
>   */
>  #define IORING_CQE_F_BUFFER		(1U << 0)
>  #define IORING_CQE_F_MORE		(1U << 1)
>  #define IORING_CQE_F_SOCK_NONEMPTY	(1U << 2)
>  #define IORING_CQE_F_NOTIF		(1U << 3)
> +#define IORING_CQE_F_EOF		(1U << 4)
>  
>  enum {
>  	IORING_CQE_BUFFER_SHIFT		= 16,
> diff --git a/io_uring/fs.c b/io_uring/fs.c
> index b15ec81c1ed2..f6222b0148ef 100644
> --- a/io_uring/fs.c
> +++ b/io_uring/fs.c
> @@ -322,6 +322,7 @@ int io_getdents(struct io_kiocb *req, unsigned int issue_flags)
>  {
>  	struct io_getdents *gd = io_kiocb_to_cmd(req, struct io_getdents);
>  	unsigned long getdents_flags = 0;
> +	u32 cqe_flags = 0;
>  	int ret;
>  
>  	if (issue_flags & IO_URING_F_NONBLOCK) {
> @@ -338,13 +339,16 @@ int io_getdents(struct io_kiocb *req, unsigned int issue_flags)
>  			goto out;
>  	}
>  
> -	ret = vfs_getdents(req->file, gd->dirent, gd->count, getdents_flags);
> +	ret = vfs_getdents(req->file, gd->dirent, gd->count, &getdents_flags);

I don't understand how synchronization and updating of f_pos works here.
For example, what happens if a concurrent seek happens on the fd while
io_uring is using vfs_getdents which calls into iterate_dir() and
updates f_pos?

  reply	other threads:[~2023-05-23 14:30 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-05-10 10:52 [PATCH v2 0/6] io_uring: add getdents support, take 2 Dominique Martinet
2023-05-10 10:52 ` [PATCH v2 1/6] fs: split off vfs_getdents function of getdents64 syscall Dominique Martinet
2023-05-23 15:39   ` Christian Brauner
2023-05-23 21:13     ` Dominique Martinet
2023-05-10 10:52 ` [PATCH v2 2/6] vfs_getdents/struct dir_context: add flags field Dominique Martinet
2023-05-10 10:52 ` [PATCH v2 3/6] io_uring: add support for getdents Dominique Martinet
2023-05-10 10:52 ` [PATCH v2 4/6] kernfs: implement readdir FMODE_NOWAIT Dominique Martinet
2023-05-11 10:55   ` Dan Carpenter
2023-05-11 11:03     ` Dominique Martinet
2023-05-10 10:52 ` [PATCH v2 5/6] libfs: set FMODE_NOWAIT on dir open Dominique Martinet
2023-05-10 10:52 ` [PATCH v2 6/6] RFC: io_uring getdents: test returning an EOF flag in CQE Dominique Martinet
2023-05-23 14:30   ` Christian Brauner [this message]
2023-05-23 21:05     ` Dominique Martinet
2023-05-24 13:52       ` [PATCH v2 1/6] fs: split off vfs_getdents function of getdents64 syscall Christian Brauner
2023-05-24 21:36         ` Dominique Martinet
2023-05-25  9:22           ` Christian Brauner
2023-05-25 11:00             ` Dominique Martinet
2023-05-25 12:33               ` Christian Brauner
2023-07-11  8:17               ` Hao Xu
2023-07-11  8:24                 ` Dominique Martinet

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230523-abgleichen-rotieren-37fdb6fb9ef3@brauner \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox