From: Eric Dumazet <[email protected]>
To: Stefan Roesch <[email protected]>, [email protected]
Cc: [email protected], [email protected], [email protected],
[email protected], [email protected]
Subject: Re: [RFC PATCH v2 1/2] io_uring: add napi busy polling support
Date: Mon, 7 Nov 2022 10:33:41 -0800 [thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>
On 11/7/22 09:52, Stefan Roesch wrote:
> This adds the napi busy polling support in io_uring.c. It adds a new
> napi_list to the io_ring_ctx structure. This list contains the list of
> napi_id's that are currently enabled for busy polling. The list is
> synchronized by the new napi_lock spin lock. The current default napi
> busy polling time is stored in napi_busy_poll_to. If napi busy polling
> is not enabled, the value is 0.
>
> The busy poll timeout is also stored as part of the io_wait_queue. This
> is necessary as for sq polling the poll interval needs to be adjusted
> and the napi callback allows only to pass in one value.
>
> Testing has shown that the round-trip times are reduced to 38us from
> 55us by enabling napi busy polling with a busy poll timeout of 100us.
>
> Signed-off-by: Stefan Roesch <[email protected]>
> Suggested-by: Olivier Langlois <[email protected]>
> ---
> include/linux/io_uring_types.h | 6 +
> io_uring/io_uring.c | 240 +++++++++++++++++++++++++++++++++
> io_uring/napi.h | 22 +++
> io_uring/poll.c | 3 +
> io_uring/sqpoll.c | 9 ++
> 5 files changed, 280 insertions(+)
> create mode 100644 io_uring/napi.h
>
> diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
> index f5b687a787a3..84b446b0d215 100644
> --- a/include/linux/io_uring_types.h
> +++ b/include/linux/io_uring_types.h
> @@ -270,6 +270,12 @@ struct io_ring_ctx {
> struct xarray personalities;
> u32 pers_next;
>
> +#ifdef CONFIG_NET_RX_BUSY_POLL
> + struct list_head napi_list; /* track busy poll napi_id */
> + spinlock_t napi_lock; /* napi_list lock */
> + unsigned int napi_busy_poll_to; /* napi busy poll default timeout */
> +#endif
> +
> struct {
> /*
> * We cache a range of free CQEs we can use, once exhausted it
> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
> index ac8c488e3077..b02bba4ebcbf 100644
> --- a/io_uring/io_uring.c
> +++ b/io_uring/io_uring.c
> @@ -90,6 +90,7 @@
> #include "rsrc.h"
> #include "cancel.h"
> #include "net.h"
> +#include "napi.h"
> #include "notif.h"
>
> #include "timeout.h"
> @@ -327,6 +328,13 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
> INIT_WQ_LIST(&ctx->locked_free_list);
> INIT_DELAYED_WORK(&ctx->fallback_work, io_fallback_req_func);
> INIT_WQ_LIST(&ctx->submit_state.compl_reqs);
> +
> +#ifdef CONFIG_NET_RX_BUSY_POLL
> + INIT_LIST_HEAD(&ctx->napi_list);
> + spin_lock_init(&ctx->napi_lock);
> + ctx->napi_busy_poll_to = READ_ONCE(sysctl_net_busy_poll);
> +#endif
> +
> return ctx;
> err:
> kfree(ctx->dummy_ubuf);
> @@ -2303,6 +2311,10 @@ struct io_wait_queue {
> struct io_ring_ctx *ctx;
> unsigned cq_tail;
> unsigned nr_timeouts;
> +
> +#ifdef CONFIG_NET_RX_BUSY_POLL
> + unsigned int busy_poll_to;
> +#endif
> };
>
> static inline bool io_has_work(struct io_ring_ctx *ctx)
> @@ -2376,6 +2388,198 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
> return 1;
> }
>
> +#ifdef CONFIG_NET_RX_BUSY_POLL
> +#define NAPI_TIMEOUT (60 * SEC_CONVERSION)
> +
> +struct io_napi_entry {
> + struct list_head list;
> + unsigned int napi_id;
> + unsigned long timeout;
> +};
> +
> +static bool io_napi_busy_loop_on(struct io_ring_ctx *ctx)
> +{
> + return READ_ONCE(ctx->napi_busy_poll_to);
> +}
> +
> +/*
> + * io_napi_add() - Add napi id to the busy poll list
> + * @file: file pointer for socket
> + * @ctx: io-uring context
> + *
> + * Add the napi id of the socket to the napi busy poll list.
> + */
> +void io_napi_add(struct file *file, struct io_ring_ctx *ctx)
> +{
> + unsigned int napi_id;
> + struct socket *sock;
> + struct sock *sk;
> + struct io_napi_entry *ne;
> +
> + if (!io_napi_busy_loop_on(ctx))
> + return;
> +
> + sock = sock_from_file(file);
> + if (!sock)
> + return;
> +
> + sk = sock->sk;
> + if (!sk)
> + return;
> +
> + napi_id = READ_ONCE(sk->sk_napi_id);
> +
> + /* Non-NAPI IDs can be rejected */
> + if (napi_id < MIN_NAPI_ID)
> + return;
> +
> + spin_lock(&ctx->napi_lock);
> + list_for_each_entry(ne, &ctx->napi_list, list) {
> + if (ne->napi_id == napi_id) {
> + ne->timeout = jiffies + NAPI_TIMEOUT;
> + goto out;
> + }
This list could become very big, if you do not remove stale napi_id from it.
Device reconfiguration do not recycle napi_id, it creates new ones.
> + }
> +
> + ne = kmalloc(sizeof(*ne), GFP_NOWAIT);
> + if (!ne)
> + goto out;
> +
> + ne->napi_id = napi_id;
> + ne->timeout = jiffies + NAPI_TIMEOUT;
> + list_add_tail(&ne->list, &ctx->napi_list);
> +
> +out:
> + spin_unlock(&ctx->napi_lock);
> +}
> +
> +
next prev parent reply other threads:[~2022-11-07 18:33 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-11-07 17:52 [RFC PATCH v2 0/2] io_uring: add napi busy polling support Stefan Roesch
2022-11-07 17:52 ` [RFC PATCH v2 1/2] " Stefan Roesch
2022-11-07 18:33 ` Eric Dumazet [this message]
2022-11-07 19:08 ` Stefan Roesch
2022-11-09 0:56 ` Jakub Kicinski
2022-11-10 23:36 ` Stefan Roesch
2022-11-11 1:35 ` Jakub Kicinski
2022-11-07 17:52 ` [RFC PATCH v2 2/2] io_uring: add api to set napi busy poll timeout Stefan Roesch
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox