public inbox for [email protected]
 help / color / mirror / Atom feed
From: Kanchan Joshi <[email protected]>
To: Christoph Hellwig <[email protected]>
Cc: Pankaj Raghav <[email protected]>,
	Jens Axboe <[email protected]>,
	[email protected], [email protected],
	Pavel Begunkov <[email protected]>,
	Ming Lei <[email protected]>,
	Luis Chamberlain <[email protected]>, Stefan Roesch <[email protected]>,
	[email protected], Kanchan Joshi <[email protected]>
Subject: Re: [PATCH v3 1/5] fs,io_uring: add infrastructure for uring-cmd
Date: Wed, 4 May 2022 08:12:44 -0700	[thread overview]
Message-ID: <CA+1E3rKe6G8UC9Pzkm4Wbu50X=TT5tise8g6umduhj1eTbN0+w@mail.gmail.com> (raw)
In-Reply-To: <[email protected]>

On Tue, May 3, 2022 at 1:52 PM Christoph Hellwig <[email protected]> wrote:
>
> This mostly looks good, but a few comments an an (untested) patch
> to fix these complaints below:
>
> > +struct io_uring_cmd {
> > +     struct file     *file;
> > +     void            *cmd;
> > +     /* for irq-completion - if driver requires doing stuff in task-context*/
> > +     void (*driver_cb)(struct io_uring_cmd *cmd);
>
> I'd rename this task_Work_cb or similar, as driver is not very
> meaningful.
>
> > +     u32             flags;
> > +     u32             cmd_op;
> > +     u32             unused;
> > +     u8              pdu[28]; /* available inline for free use */
>
> The unused field here is not only unused, but also messed up the
> alignment so that any pointer in pdu will not be properly aligned
> on 64-bit platforms and tjus might cause crashes on some platforms.

Indeed, thanks. Since we are going to remove that, we can give that
space to pdu for now i.e. pdu[32].This will help in removing "packed"
attribute in nvme pdu (which is 32 bytes without packing).

> For now we can just remove it, but I really want to get rid of the pdu
> thing that causes these problems, but I will not hold the series off for
> that and plan to look into that later.
> Also a lot of the fields here were using spaces instead of tabs for
> indentation.
>
> >       union {
> >               __u64   addr;   /* pointer to buffer or iovecs */
> >               __u64   splice_off_in;
> > +             __u16   cmd_len;
> >       };
>
> This field isn't actually used and can be removed.
>
> >       __u32   len;            /* buffer size or number of iovecs */
> >       union {
> > @@ -61,7 +63,10 @@ struct io_uring_sqe {
> >               __s32   splice_fd_in;
> >               __u32   file_index;
> >       };
> > -     __u64   addr3;
> > +     union {
> > +             __u64   addr3;
> > +             __u64   cmd;
> > +     };
> >       __u64   __pad2[1];
>
> The way how the tail of the structure is handled here is a mess.  I
> think something like a union of two structs for the small and large
> case would be much better, so that cmd can be the actual array for
> the data that can be used directly and without the nasty cast (that
> also casted away the constness):
>
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index b774e6eac5384..fe24d606ca306 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -4544,7 +4544,7 @@ static int io_getxattr_prep(struct io_kiocb *req,
>         if (ret)
>                 return ret;
>
> -       path = u64_to_user_ptr(READ_ONCE(sqe->addr3));
> +       path = u64_to_user_ptr(READ_ONCE(sqe->small.addr3));
>
>         ix->filename = getname_flags(path, LOOKUP_FOLLOW, NULL);
>         if (IS_ERR(ix->filename)) {
> @@ -4645,7 +4645,7 @@ static int io_setxattr_prep(struct io_kiocb *req,
>         if (ret)
>                 return ret;
>
> -       path = u64_to_user_ptr(READ_ONCE(sqe->addr3));
> +       path = u64_to_user_ptr(READ_ONCE(sqe->small.addr3));
>
>         ix->filename = getname_flags(path, LOOKUP_FOLLOW, NULL);
>         if (IS_ERR(ix->filename)) {
> @@ -4909,15 +4909,15 @@ static int io_linkat(struct io_kiocb *req, unsigned int issue_flags)
>
>  static void io_uring_cmd_work(struct io_kiocb *req, bool *locked)
>  {
> -       req->uring_cmd.driver_cb(&req->uring_cmd);
> +       req->uring_cmd.task_work_cb(&req->uring_cmd);
>  }
>
>  void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
> -                       void (*driver_cb)(struct io_uring_cmd *))
> +                       void (*task_work_cb)(struct io_uring_cmd *))
>  {
>         struct io_kiocb *req = container_of(ioucmd, struct io_kiocb, uring_cmd);
>
> -       req->uring_cmd.driver_cb = driver_cb;
> +       req->uring_cmd.task_work_cb = task_work_cb;
>         req->io_task_work.func = io_uring_cmd_work;
>         io_req_task_work_add(req, !!(req->ctx->flags & IORING_SETUP_SQPOLL));
>  }
> @@ -4949,7 +4949,7 @@ static int io_uring_cmd_prep(struct io_kiocb *req,
>                 return -EOPNOTSUPP;
>         if (!(req->ctx->flags & IORING_SETUP_CQE32))
>                 return -EOPNOTSUPP;
> -       ioucmd->cmd = (void *) &sqe->cmd;
> +       ioucmd->cmd = sqe->big.cmd;
>         ioucmd->cmd_op = READ_ONCE(sqe->cmd_op);
>         ioucmd->flags = 0;
>         return 0;
> @@ -12735,7 +12735,7 @@ static int __init io_uring_init(void)
>         BUILD_BUG_SQE_ELEM(42, __u16,  personality);
>         BUILD_BUG_SQE_ELEM(44, __s32,  splice_fd_in);
>         BUILD_BUG_SQE_ELEM(44, __u32,  file_index);
> -       BUILD_BUG_SQE_ELEM(48, __u64,  addr3);
> +       BUILD_BUG_SQE_ELEM(48, __u64,  small.addr3);
>
>         BUILD_BUG_ON(sizeof(struct io_uring_files_update) !=
>                      sizeof(struct io_uring_rsrc_update));
> diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
> index a4ff4696cbead..7274884859910 100644
> --- a/include/linux/io_uring.h
> +++ b/include/linux/io_uring.h
> @@ -13,20 +13,19 @@ enum io_uring_cmd_flags {
>  };
>
>  struct io_uring_cmd {
> -       struct file     *file;
> -       void            *cmd;
> -       /* for irq-completion - if driver requires doing stuff in task-context*/
> -       void (*driver_cb)(struct io_uring_cmd *cmd);
> -       u32             flags;
> -       u32             cmd_op;
> -       u32             unused;
> +       struct file     *file;
> +       const u8        *cmd;
> +       /* callback to defer completions to task context */
> +       void (*task_work_cb)(struct io_uring_cmd *cmd);
> +       u32             flags;
> +       u32             cmd_op;
>         u8              pdu[28]; /* available inline for free use */
>  };
>
>  #if defined(CONFIG_IO_URING)
>  void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2);
>  void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
> -                       void (*driver_cb)(struct io_uring_cmd *));
> +                       void (*task_work_cb)(struct io_uring_cmd *));
>  struct sock *io_uring_get_socket(struct file *file);
>  void __io_uring_cancel(bool cancel_all);
>  void __io_uring_free(struct task_struct *tsk);
> @@ -56,7 +55,7 @@ static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret,
>  {
>  }
>  static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
> -                       void (*driver_cb)(struct io_uring_cmd *))
> +                       void (*task_work_cb)(struct io_uring_cmd *))
>  {
>  }
>  static inline struct sock *io_uring_get_socket(struct file *file)
> diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h
> index 630982b3c34c5..b1f64c2412935 100644
> --- a/include/trace/events/io_uring.h
> +++ b/include/trace/events/io_uring.h
> @@ -539,8 +539,8 @@ TRACE_EVENT(io_uring_req_failed,
>                 __entry->buf_index      = sqe->buf_index;
>                 __entry->personality    = sqe->personality;
>                 __entry->file_index     = sqe->file_index;
> -               __entry->pad1           = sqe->__pad2[0];
> -               __entry->addr3          = sqe->addr3;
> +               __entry->addr3          = sqe->small.addr3;
> +               __entry->pad1           = sqe->small.__pad2[0];
>                 __entry->error          = error;
>         ),
>
> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
> index c081511119bfa..bd4221184f594 100644
> --- a/include/uapi/linux/io_uring.h
> +++ b/include/uapi/linux/io_uring.h
> @@ -27,7 +27,6 @@ struct io_uring_sqe {
>         union {
>                 __u64   addr;   /* pointer to buffer or iovecs */
>                 __u64   splice_off_in;
> -               __u16   cmd_len;
>         };
>         __u32   len;            /* buffer size or number of iovecs */
>         union {
> @@ -64,16 +63,19 @@ struct io_uring_sqe {
>                 __u32   file_index;
>         };
>         union {
> -               __u64   addr3;
> -               __u64   cmd;
> +               struct {
> +                       __u64   addr3;
> +                       __u64   __pad2[1];
> +               } small;

Thinking if this can cause any issue for existing users of addr3, i.e.
in the userspace side? Since it needs to access this field with
small.addr3.
Jens - is xattr infra already frozen?

> +               /*
> +                * If the ring is initialized with IORING_SETUP_SQE128, then
> +                * this field is used for 80 bytes of arbitrary command data.
> +                */
> +               struct {
> +                       __u8    cmd[0];
> +               } big;
>         };
> -       __u64   __pad2[1];
> -
> -       /*
> -        * If the ring is initialized with IORING_SETUP_SQE128, then this field
> -        * contains 64-bytes of padding, doubling the size of the SQE.
> -        */
> -       __u64   __big_sqe_pad[0];
>  };
>
>  enum {



-- 
Joshi


  reply	other threads:[~2022-05-04  8:13 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <CGME20220503184911eucas1p1beb172219537d78fcaf2a1417f532cf7@eucas1p1.samsung.com>
2022-05-03 18:48 ` [PATCH v3 0/5] io_uring passthough for nvme Pankaj Raghav
     [not found]   ` <CGME20220503184912eucas1p1bb0e3d36c06cfde8436df3a45e67bd32@eucas1p1.samsung.com>
2022-05-03 18:48     ` [PATCH v3 1/5] fs,io_uring: add infrastructure for uring-cmd Pankaj Raghav
2022-05-03 20:52       ` Christoph Hellwig
2022-05-04 15:12         ` Kanchan Joshi [this message]
2022-05-04 15:48           ` Kanchan Joshi
2022-05-03 21:03       ` Jens Axboe
     [not found]   ` <CGME20220503184913eucas1p156abb6e2273c8dabc22e87ec8b218a5c@eucas1p1.samsung.com>
2022-05-03 18:48     ` [PATCH v3 2/5] block: wire-up support for passthrough plugging Pankaj Raghav
2022-05-03 20:53       ` Christoph Hellwig
     [not found]   ` <CGME20220503184914eucas1p1d9df18afe3234c0698a66cdb9c664ddc@eucas1p1.samsung.com>
2022-05-03 18:48     ` [PATCH v3 3/5] nvme: refactor nvme_submit_user_cmd() Pankaj Raghav
     [not found]   ` <CGME20220503184915eucas1p2ae04772900c24ef0b23fd8bedead20ae@eucas1p2.samsung.com>
2022-05-03 18:48     ` [PATCH v3 4/5] nvme: wire-up uring-cmd support for io-passthru on char-device Pankaj Raghav
2022-05-03 20:55       ` Christoph Hellwig
     [not found]   ` <CGME20220503184916eucas1p266cbb3ffc1622b292bf59b5eccec9933@eucas1p2.samsung.com>
2022-05-03 18:48     ` [PATCH v3 5/5] nvme: add vectored-io support for uring-cmd Pankaj Raghav
2022-05-03 20:56       ` Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CA+1E3rKe6G8UC9Pzkm4Wbu50X=TT5tise8g6umduhj1eTbN0+w@mail.gmail.com' \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox