public inbox for [email protected]
 help / color / mirror / Atom feed
From: Hao Xu <[email protected]>
To: Olivier Langlois <[email protected]>,
	Jens Axboe <[email protected]>,
	[email protected]
Subject: Re: napi_busy_poll
Date: Wed, 16 Feb 2022 20:14:25 +0800	[thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>

在 2022/2/16 上午2:05, Olivier Langlois 写道:
> On Tue, 2022-02-15 at 03:37 -0500, Olivier Langlois wrote:
>>
>> That being said, I have not been able to make it work yet. For some
>> unknown reasons, no valid napi_id is extracted from the sockets added
>> to the context so the net_busy_poll function is never called.
>>
>> I find that very strange since prior to use io_uring, my code was
>> using
>> epoll and the busy polling was working fine with my application
>> sockets. Something is escaping my comprehension. I must tired and
>> this
>> will become obvious...
>>
> The napi_id values associated with my sockets appear to be in the range
> 0 < napi_id < MIN_NAPI_ID
> 
> from busy_loop.h:
> /*		0 - Reserved to indicate value not set
>   *     1..NR_CPUS - Reserved for sender_cpu
>   *  NR_CPUS+1..~0 - Region available for NAPI IDs
>   */
> #define MIN_NAPI_ID ((unsigned int)(NR_CPUS + 1))
> 
> I have found this:
> https://lwn.net/Articles/619862/
> 
> hinting that busy_poll may be incompatible with RPS
> (Documentation/networking/scaling.rst) that I may have discovered
> *AFTER* my epoll -> io_uring transition (I don't recall exactly the
> sequence of my learning process).
> 
> With my current knowledge, it makes little sense why busy polling would
> not be possible with RPS. Also, what exactly is a NAPI device is quite
> nebulous to me... Looking into the Intel igb driver code, it seems like
> 1 NAPI device is created for each interrupt vector/Rx buffer of the
> device.
> 
> Bottomline, it seems like I have fallen into a new rabbit hole. It may
> take me a day or 2 to figure it all... you are welcome to enlight me if
> you know a thing or 2 about those topics... I am kinda lost right
> now...
> 
Hi Olivier,
I've write something to express my idea, it would be great if you can
try it.
It's totally untested and only does polling in sqthread, won't be hard
to expand it to cqring_wait. My original idea is to poll all the napi
device but seems that may be not efficient. so for a request, just
do napi polling for one napi.
There is still one problem: when to delete the polled NAPIs.

Regards,
Hao

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 538f90bd0508..2e32d5fe0641 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -63,6 +63,7 @@
  #include <net/sock.h>
  #include <net/af_unix.h>
  #include <net/scm.h>
+#include <net/busy_poll.h>
  #include <linux/anon_inodes.h>
  #include <linux/sched/mm.h>
  #include <linux/uaccess.h>
@@ -443,6 +444,7 @@ struct io_ring_ctx {
                 spinlock_t                      rsrc_ref_lock;
         };

+       struct list_head                napi_list;
         /* Keep this last, we don't need it for the fast path */
         struct {
                 #if defined(CONFIG_UNIX)
@@ -1457,6 +1459,7 @@ static __cold struct io_ring_ctx 
*io_ring_ctx_alloc(struct io_uring_params *p)
         INIT_WQ_LIST(&ctx->locked_free_list);
         INIT_DELAYED_WORK(&ctx->fallback_work, io_fallback_req_func);
         INIT_WQ_LIST(&ctx->submit_state.compl_reqs);
+       INIT_LIST_HEAD(&ctx->napi_list);
         return ctx;
  err:
         kfree(ctx->dummy_ubuf);
@@ -5419,6 +5422,70 @@ IO_NETOP_FN(send);
  IO_NETOP_FN(recv);
  #endif /* CONFIG_NET */

+#ifdef CONFIG_NET_RX_BUSY_POLL
+struct napi_entry {
+       struct list_head        list;
+       unsigned int            napi_id;
+};
+
+static void io_add_napi(struct file *file, struct io_ring_ctx *ctx)
+{
+       unsigned int napi_id;
+       struct socket *sock;
+       struct sock *sk;
+       struct napi_entry *ne;
+
+       if (!net_busy_loop_on())
+               return;
+
+       sock = sock_from_file(file);
+       if (!sock)
+               return;
+
+       sk = sock->sk;
+       if (!sk)
+               return;
+
+       napi_id = READ_ONCE(sk->sk_napi_id);
+       if (napi_id < MIN_NAPI_ID)
+               return;
+
+       list_for_each_entry(ne, &ctx->napi_list, list) {
+               if (ne->napi_id == napi_id)
+                       return;
+       }
+
+       ne = kmalloc(sizeof(*ne), GFP_KERNEL);
+       if (!ne)
+               return;
+
+       list_add_tail(&ne->list, &ctx->napi_list);
+}
+
+static void io_napi_busy_loop(struct io_ring_ctx *ctx)
+{
+       struct napi_entry *ne;
+
+       if (list_empty(&ctx->napi_list) || !net_busy_loop_on())
+               return;
+
+       list_for_each_entry(ne, &ctx->napi_list, list)
+               napi_busy_loop(ne->napi_id, NULL, NULL, false, 
BUSY_POLL_BUDGET);
+}
+#else
+
+static inline void io_add_napi(struct file *file, struct io_ring_ctx *ctx)
+{
+       return;
+}
+
+static inline void io_napi_busy_loop(struct io_ring_ctx *ctx)
+{
+       return;
+}
+#endif /* CONFIG_NET_RX_BUSY_POLL */
+
+
  struct io_poll_table {
         struct poll_table_struct pt;
         struct io_kiocb *req;
@@ -5583,6 +5650,7 @@ static void io_poll_task_func(struct io_kiocb 
*req, bool *locked)
         struct io_ring_ctx *ctx = req->ctx;
         int ret;

+       io_add_napi(req->file, req->ctx);
         ret = io_poll_check_events(req);
         if (ret > 0)
                 return;
@@ -5608,6 +5676,7 @@ static void io_apoll_task_func(struct io_kiocb 
*req, bool *locked)
         struct io_ring_ctx *ctx = req->ctx;
         int ret;

+       io_add_napi(req->file, req->ctx);
         ret = io_poll_check_events(req);
         if (ret > 0)
                 return;
@@ -7544,6 +7613,9 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, 
bool cap_entries)
                         wake_up(&ctx->sqo_sq_wait);
                 if (creds)
                         revert_creds(creds);
+#ifdef CONFIG_NET_RX_BUSY_POLL
+               io_napi_busy_loop(ctx);
+#endif
         }

         return ret;


  parent reply	other threads:[~2022-02-16 12:14 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-02-08 14:58 napi_busy_poll Olivier Langlois
2022-02-08 17:05 ` napi_busy_poll Jens Axboe
2022-02-09  3:34   ` napi_busy_poll Hao Xu
2022-02-12 19:51     ` napi_busy_poll Olivier Langlois
2022-02-13 18:47       ` napi_busy_poll Jens Axboe
2022-02-14 17:13       ` napi_busy_poll Hao Xu
2022-02-15  8:37         ` napi_busy_poll Olivier Langlois
2022-02-15 18:05           ` napi_busy_poll Olivier Langlois
2022-02-16  3:12             ` napi_busy_poll Hao Xu
2022-02-16 19:19               ` napi_busy_poll Olivier Langlois
2022-02-16 12:14             ` Hao Xu [this message]
2022-02-17 20:28               ` napi_busy_poll Olivier Langlois
2022-02-18  8:06                 ` napi_busy_poll Hao Xu
2022-02-19  7:14                   ` napi_busy_poll Olivier Langlois
2022-02-21  4:52                     ` napi_busy_poll Hao Xu
2022-02-17 23:18               ` napi_busy_poll Olivier Langlois
2022-02-17 23:25                 ` napi_busy_poll Jens Axboe
2022-02-18  7:21                 ` napi_busy_poll Hao Xu
2022-02-18  5:05               ` napi_busy_poll Olivier Langlois
2022-02-18  7:41                 ` napi_busy_poll Hao Xu
2022-02-19  7:02                   ` napi_busy_poll Olivier Langlois
2022-02-21  5:03                     ` napi_busy_poll Hao Xu
2022-02-25  4:42                       ` napi_busy_poll Olivier Langlois

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=f7f658cd-d76f-26c4-6549-0b3d2008d249@linux.alibaba.com \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox