* [PATCHSET for-next] Add alloc cache for sendmsg/recvmsg
@ 2022-07-07 23:23 Jens Axboe
2022-07-07 23:23 ` [PATCH 1/4] io_uring: move apoll cache to poll.c Jens Axboe
` (3 more replies)
0 siblings, 4 replies; 8+ messages in thread
From: Jens Axboe @ 2022-07-07 23:23 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence
Hi,
This abstracts out the alloc cache we have for apoll entries, and extends
it to be usable for recv/sendmsg as well. During that abstraction, impose
an upper limit for cached entries as well.
This yields a 4-5% performance increase running netbench using
sendmsg/recvmsg rather than plan send and recv.
Post 5.20, I suspect we can get rid of using io_async_msghdr for single
vector sendmsg/recvmsg, which will make this less relevant. But as this
work isn't done yet, and the support for eg ITER_UBUF isn't upstream yet
either, this can help fill the gap.
--
Jens Axboe
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH 1/4] io_uring: move apoll cache to poll.c
2022-07-07 23:23 [PATCHSET for-next] Add alloc cache for sendmsg/recvmsg Jens Axboe
@ 2022-07-07 23:23 ` Jens Axboe
2022-07-07 23:23 ` [PATCH 2/4] io_uring: add abstraction around apoll cache Jens Axboe
` (2 subsequent siblings)
3 siblings, 0 replies; 8+ messages in thread
From: Jens Axboe @ 2022-07-07 23:23 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence, Jens Axboe
This is where it's used, move the flush handler in there.
Signed-off-by: Jens Axboe <[email protected]>
---
io_uring/io_uring.c | 12 ------------
io_uring/poll.c | 12 ++++++++++++
io_uring/poll.h | 2 ++
3 files changed, 14 insertions(+), 12 deletions(-)
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index caf979cd4327..4d1ce58b015e 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -2445,18 +2445,6 @@ static void io_req_caches_free(struct io_ring_ctx *ctx)
mutex_unlock(&ctx->uring_lock);
}
-static void io_flush_apoll_cache(struct io_ring_ctx *ctx)
-{
- struct async_poll *apoll;
-
- while (!list_empty(&ctx->apoll_cache)) {
- apoll = list_first_entry(&ctx->apoll_cache, struct async_poll,
- poll.wait.entry);
- list_del(&apoll->poll.wait.entry);
- kfree(apoll);
- }
-}
-
static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
{
io_sq_thread_finish(ctx);
diff --git a/io_uring/poll.c b/io_uring/poll.c
index 57747d92bba4..f0fe209490d8 100644
--- a/io_uring/poll.c
+++ b/io_uring/poll.c
@@ -958,3 +958,15 @@ int io_poll_remove(struct io_kiocb *req, unsigned int issue_flags)
io_req_set_res(req, ret, 0);
return IOU_OK;
}
+
+void io_flush_apoll_cache(struct io_ring_ctx *ctx)
+{
+ struct async_poll *apoll;
+
+ while (!list_empty(&ctx->apoll_cache)) {
+ apoll = list_first_entry(&ctx->apoll_cache, struct async_poll,
+ poll.wait.entry);
+ list_del(&apoll->poll.wait.entry);
+ kfree(apoll);
+ }
+}
diff --git a/io_uring/poll.h b/io_uring/poll.h
index c40673d7da01..95f192c7babb 100644
--- a/io_uring/poll.h
+++ b/io_uring/poll.h
@@ -30,3 +30,5 @@ int io_poll_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd,
int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags);
bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk,
bool cancel_all);
+
+void io_flush_apoll_cache(struct io_ring_ctx *ctx);
--
2.35.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 2/4] io_uring: add abstraction around apoll cache
2022-07-07 23:23 [PATCHSET for-next] Add alloc cache for sendmsg/recvmsg Jens Axboe
2022-07-07 23:23 ` [PATCH 1/4] io_uring: move apoll cache to poll.c Jens Axboe
@ 2022-07-07 23:23 ` Jens Axboe
2022-07-07 23:23 ` [PATCH 3/4] io_uring: impose max limit on " Jens Axboe
2022-07-07 23:23 ` [PATCH 4/4] io_uring: add netmsg cache Jens Axboe
3 siblings, 0 replies; 8+ messages in thread
From: Jens Axboe @ 2022-07-07 23:23 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence, Jens Axboe
In preparation for adding limits, and one more user, abstract out the
core bits of the allocation+free cache.
Signed-off-by: Jens Axboe <[email protected]>
---
include/linux/io_uring_types.h | 6 +++++-
io_uring/alloc_cache.h | 4 ++++
io_uring/io_uring.c | 7 ++++---
io_uring/poll.c | 16 ++++++++--------
io_uring/poll.h | 5 ++++-
5 files changed, 25 insertions(+), 13 deletions(-)
create mode 100644 io_uring/alloc_cache.h
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 26ef11e978d4..b548da03b563 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -158,6 +158,10 @@ struct io_ev_fd {
struct rcu_head rcu;
};
+struct io_alloc_cache {
+ struct hlist_head list;
+};
+
struct io_ring_ctx {
/* const or read-mostly hot data */
struct {
@@ -216,7 +220,7 @@ struct io_ring_ctx {
struct io_hash_table cancel_table_locked;
struct list_head cq_overflow_list;
- struct list_head apoll_cache;
+ struct io_alloc_cache apoll_cache;
struct xarray personalities;
u32 pers_next;
} ____cacheline_aligned_in_smp;
diff --git a/io_uring/alloc_cache.h b/io_uring/alloc_cache.h
new file mode 100644
index 000000000000..49ac6ae237ef
--- /dev/null
+++ b/io_uring/alloc_cache.h
@@ -0,0 +1,4 @@
+static inline void io_alloc_cache_init(struct io_alloc_cache *cache)
+{
+ INIT_HLIST_HEAD(&cache->list);
+}
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 4d1ce58b015e..3b9033c401bf 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -92,6 +92,7 @@
#include "timeout.h"
#include "poll.h"
+#include "alloc_cache.h"
#define IORING_MAX_ENTRIES 32768
#define IORING_MAX_CQ_ENTRIES (2 * IORING_MAX_ENTRIES)
@@ -295,7 +296,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
INIT_LIST_HEAD(&ctx->sqd_list);
INIT_LIST_HEAD(&ctx->cq_overflow_list);
INIT_LIST_HEAD(&ctx->io_buffers_cache);
- INIT_LIST_HEAD(&ctx->apoll_cache);
+ io_alloc_cache_init(&ctx->apoll_cache);
init_completion(&ctx->ref_comp);
xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1);
mutex_init(&ctx->uring_lock);
@@ -1180,8 +1181,8 @@ void io_free_batch_list(struct io_ring_ctx *ctx, struct io_wq_work_node *node)
if (apoll->double_poll)
kfree(apoll->double_poll);
- list_add(&apoll->poll.wait.entry,
- &ctx->apoll_cache);
+ hlist_add_head(&apoll->cache_list,
+ &ctx->apoll_cache.list);
req->flags &= ~REQ_F_POLLED;
}
if (req->flags & IO_REQ_LINK_FLAGS)
diff --git a/io_uring/poll.c b/io_uring/poll.c
index f0fe209490d8..f3aae3cc6501 100644
--- a/io_uring/poll.c
+++ b/io_uring/poll.c
@@ -589,10 +589,10 @@ static struct async_poll *io_req_alloc_apoll(struct io_kiocb *req,
apoll = req->apoll;
kfree(apoll->double_poll);
} else if (!(issue_flags & IO_URING_F_UNLOCKED) &&
- !list_empty(&ctx->apoll_cache)) {
- apoll = list_first_entry(&ctx->apoll_cache, struct async_poll,
- poll.wait.entry);
- list_del_init(&apoll->poll.wait.entry);
+ !hlist_empty(&ctx->apoll_cache.list)) {
+ apoll = hlist_entry(ctx->apoll_cache.list.first,
+ struct async_poll, cache_list);
+ hlist_del(&apoll->cache_list);
} else {
apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC);
if (unlikely(!apoll))
@@ -963,10 +963,10 @@ void io_flush_apoll_cache(struct io_ring_ctx *ctx)
{
struct async_poll *apoll;
- while (!list_empty(&ctx->apoll_cache)) {
- apoll = list_first_entry(&ctx->apoll_cache, struct async_poll,
- poll.wait.entry);
- list_del(&apoll->poll.wait.entry);
+ while (!hlist_empty(&ctx->apoll_cache.list)) {
+ apoll = hlist_entry(ctx->apoll_cache.list.first,
+ struct async_poll, cache_list);
+ hlist_del(&apoll->cache_list);
kfree(apoll);
}
}
diff --git a/io_uring/poll.h b/io_uring/poll.h
index 95f192c7babb..cb528f8ef203 100644
--- a/io_uring/poll.h
+++ b/io_uring/poll.h
@@ -14,7 +14,10 @@ struct io_poll {
};
struct async_poll {
- struct io_poll poll;
+ union {
+ struct io_poll poll;
+ struct hlist_node cache_list;
+ };
struct io_poll *double_poll;
};
--
2.35.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 3/4] io_uring: impose max limit on apoll cache
2022-07-07 23:23 [PATCHSET for-next] Add alloc cache for sendmsg/recvmsg Jens Axboe
2022-07-07 23:23 ` [PATCH 1/4] io_uring: move apoll cache to poll.c Jens Axboe
2022-07-07 23:23 ` [PATCH 2/4] io_uring: add abstraction around apoll cache Jens Axboe
@ 2022-07-07 23:23 ` Jens Axboe
2022-07-07 23:23 ` [PATCH 4/4] io_uring: add netmsg cache Jens Axboe
3 siblings, 0 replies; 8+ messages in thread
From: Jens Axboe @ 2022-07-07 23:23 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence, Jens Axboe
Caches like this tend to grow to the peak size, and then never get any
smaller. Impose a max limit on the size, to prevent it from growing too
big.
A somewhat randomly chosen 512 is the max size we'll allow the cache
to get. If a batch of frees come in and would bring it over that, we
simply start kfree'ing the surplus.
Signed-off-by: Jens Axboe <[email protected]>
---
include/linux/io_uring_types.h | 1 +
io_uring/alloc_cache.h | 15 +++++++++++++++
io_uring/io_uring.c | 8 ++++++--
io_uring/poll.c | 2 ++
4 files changed, 24 insertions(+), 2 deletions(-)
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index b548da03b563..bf8f95332eda 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -160,6 +160,7 @@ struct io_ev_fd {
struct io_alloc_cache {
struct hlist_head list;
+ unsigned int nr_cached;
};
struct io_ring_ctx {
diff --git a/io_uring/alloc_cache.h b/io_uring/alloc_cache.h
index 49ac6ae237ef..0e64030f1ae0 100644
--- a/io_uring/alloc_cache.h
+++ b/io_uring/alloc_cache.h
@@ -1,4 +1,19 @@
+/*
+ * Don't allow the cache to grow beyond this size.
+ */
+#define IO_ALLOC_CACHE_MAX 512
+
+static inline bool io_alloc_cache_store(struct io_alloc_cache *cache)
+{
+ if (cache->nr_cached < IO_ALLOC_CACHE_MAX) {
+ cache->nr_cached++;
+ return true;
+ }
+ return false;
+}
+
static inline void io_alloc_cache_init(struct io_alloc_cache *cache)
{
INIT_HLIST_HEAD(&cache->list);
+ cache->nr_cached = 0;
}
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 3b9033c401bf..b5098773d924 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1181,8 +1181,12 @@ void io_free_batch_list(struct io_ring_ctx *ctx, struct io_wq_work_node *node)
if (apoll->double_poll)
kfree(apoll->double_poll);
- hlist_add_head(&apoll->cache_list,
- &ctx->apoll_cache.list);
+ if (io_alloc_cache_store(&ctx->apoll_cache)) {
+ hlist_add_head(&apoll->cache_list,
+ &ctx->apoll_cache.list);
+ } else {
+ kfree(apoll);
+ }
req->flags &= ~REQ_F_POLLED;
}
if (req->flags & IO_REQ_LINK_FLAGS)
diff --git a/io_uring/poll.c b/io_uring/poll.c
index f3aae3cc6501..cc49160975cb 100644
--- a/io_uring/poll.c
+++ b/io_uring/poll.c
@@ -593,6 +593,7 @@ static struct async_poll *io_req_alloc_apoll(struct io_kiocb *req,
apoll = hlist_entry(ctx->apoll_cache.list.first,
struct async_poll, cache_list);
hlist_del(&apoll->cache_list);
+ ctx->apoll_cache.nr_cached--;
} else {
apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC);
if (unlikely(!apoll))
@@ -969,4 +970,5 @@ void io_flush_apoll_cache(struct io_ring_ctx *ctx)
hlist_del(&apoll->cache_list);
kfree(apoll);
}
+ ctx->apoll_cache.nr_cached = 0;
}
--
2.35.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 4/4] io_uring: add netmsg cache
2022-07-07 23:23 [PATCHSET for-next] Add alloc cache for sendmsg/recvmsg Jens Axboe
` (2 preceding siblings ...)
2022-07-07 23:23 ` [PATCH 3/4] io_uring: impose max limit on " Jens Axboe
@ 2022-07-07 23:23 ` Jens Axboe
2022-07-08 7:33 ` Dylan Yudaken
3 siblings, 1 reply; 8+ messages in thread
From: Jens Axboe @ 2022-07-07 23:23 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence, Jens Axboe
For recvmsg/sendmsg, if they don't complete inline, we currently need
to allocate a struct io_async_msghdr for each request. This is a
somewhat large struct.
Hook up sendmsg/recvmsg to use the io_alloc_cache. This reduces the
alloc + free overhead considerably, yielding 4-5% of extra performance
running netbench.
Signed-off-by: Jens Axboe <[email protected]>
---
include/linux/io_uring_types.h | 6 ++-
io_uring/io_uring.c | 3 ++
io_uring/net.c | 73 +++++++++++++++++++++++++++++-----
io_uring/net.h | 11 ++++-
4 files changed, 81 insertions(+), 12 deletions(-)
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index bf8f95332eda..d54b8b7e0746 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -222,8 +222,7 @@ struct io_ring_ctx {
struct io_hash_table cancel_table_locked;
struct list_head cq_overflow_list;
struct io_alloc_cache apoll_cache;
- struct xarray personalities;
- u32 pers_next;
+ struct io_alloc_cache netmsg_cache;
} ____cacheline_aligned_in_smp;
/* IRQ completion list, under ->completion_lock */
@@ -241,6 +240,9 @@ struct io_ring_ctx {
unsigned int file_alloc_start;
unsigned int file_alloc_end;
+ struct xarray personalities;
+ u32 pers_next;
+
struct {
/*
* We cache a range of free CQEs we can use, once exhausted it
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index b5098773d924..32110c5b4059 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -89,6 +89,7 @@
#include "kbuf.h"
#include "rsrc.h"
#include "cancel.h"
+#include "net.h"
#include "timeout.h"
#include "poll.h"
@@ -297,6 +298,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
INIT_LIST_HEAD(&ctx->cq_overflow_list);
INIT_LIST_HEAD(&ctx->io_buffers_cache);
io_alloc_cache_init(&ctx->apoll_cache);
+ io_alloc_cache_init(&ctx->netmsg_cache);
init_completion(&ctx->ref_comp);
xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1);
mutex_init(&ctx->uring_lock);
@@ -2473,6 +2475,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
__io_cqring_overflow_flush(ctx, true);
io_eventfd_unregister(ctx);
io_flush_apoll_cache(ctx);
+ io_flush_netmsg_cache(ctx);
mutex_unlock(&ctx->uring_lock);
io_destroy_buffers(ctx);
if (ctx->sq_creds)
diff --git a/io_uring/net.c b/io_uring/net.c
index 6679069eeef1..ba7e94ff287c 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -12,6 +12,7 @@
#include "io_uring.h"
#include "kbuf.h"
+#include "alloc_cache.h"
#include "net.h"
#if defined(CONFIG_NET)
@@ -97,18 +98,57 @@ static bool io_net_retry(struct socket *sock, int flags)
return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET;
}
+static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags)
+{
+ struct io_async_msghdr *hdr = req->async_data;
+
+ if (!hdr || issue_flags & IO_URING_F_UNLOCKED)
+ return;
+
+ if (io_alloc_cache_store(&req->ctx->netmsg_cache)) {
+ hlist_add_head(&hdr->cache_list, &req->ctx->netmsg_cache.list);
+ req->async_data = NULL;
+ req->flags &= ~REQ_F_ASYNC_DATA;
+ }
+}
+
+static struct io_async_msghdr *io_recvmsg_alloc_async(struct io_kiocb *req,
+ unsigned int issue_flags)
+{
+ struct io_ring_ctx *ctx = req->ctx;
+
+ if (!(issue_flags & IO_URING_F_UNLOCKED) &&
+ !hlist_empty(&ctx->netmsg_cache.list)) {
+ struct io_async_msghdr *hdr;
+
+ hdr = hlist_entry(ctx->netmsg_cache.list.first,
+ struct io_async_msghdr, cache_list);
+ ctx->netmsg_cache.nr_cached--;
+ hlist_del(&hdr->cache_list);
+ req->flags |= REQ_F_ASYNC_DATA;
+ req->async_data = hdr;
+ return hdr;
+ }
+
+ if (!io_alloc_async_data(req))
+ return req->async_data;
+
+ return NULL;
+}
+
static int io_setup_async_msg(struct io_kiocb *req,
- struct io_async_msghdr *kmsg)
+ struct io_async_msghdr *kmsg,
+ unsigned int issue_flags)
{
struct io_async_msghdr *async_msg = req->async_data;
if (async_msg)
return -EAGAIN;
- if (io_alloc_async_data(req)) {
+ async_msg = io_recvmsg_alloc_async(req, issue_flags);
+ if (!async_msg) {
kfree(kmsg->free_iov);
return -ENOMEM;
}
- async_msg = req->async_data;
req->flags |= REQ_F_NEED_CLEANUP;
memcpy(async_msg, kmsg, sizeof(*kmsg));
async_msg->msg.msg_name = &async_msg->addr;
@@ -195,7 +235,7 @@ int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
if (!(req->flags & REQ_F_POLLED) &&
(sr->flags & IORING_RECVSEND_POLL_FIRST))
- return io_setup_async_msg(req, kmsg);
+ return io_setup_async_msg(req, kmsg, issue_flags);
flags = sr->msg_flags;
if (issue_flags & IO_URING_F_NONBLOCK)
@@ -207,13 +247,13 @@ int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
if (ret < min_ret) {
if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
- return io_setup_async_msg(req, kmsg);
+ return io_setup_async_msg(req, kmsg, issue_flags);
if (ret == -ERESTARTSYS)
ret = -EINTR;
if (ret > 0 && io_net_retry(sock, flags)) {
sr->done_io += ret;
req->flags |= REQ_F_PARTIAL_IO;
- return io_setup_async_msg(req, kmsg);
+ return io_setup_async_msg(req, kmsg, issue_flags);
}
req_set_fail(req);
}
@@ -221,6 +261,7 @@ int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
if (kmsg->free_iov)
kfree(kmsg->free_iov);
req->flags &= ~REQ_F_NEED_CLEANUP;
+ io_netmsg_recycle(req, issue_flags);
if (ret >= 0)
ret += sr->done_io;
else if (sr->done_io)
@@ -495,7 +536,7 @@ int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
if (!(req->flags & REQ_F_POLLED) &&
(sr->flags & IORING_RECVSEND_POLL_FIRST))
- return io_setup_async_msg(req, kmsg);
+ return io_setup_async_msg(req, kmsg, issue_flags);
if (io_do_buffer_select(req)) {
void __user *buf;
@@ -519,13 +560,13 @@ int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg, kmsg->uaddr, flags);
if (ret < min_ret) {
if (ret == -EAGAIN && force_nonblock)
- return io_setup_async_msg(req, kmsg);
+ return io_setup_async_msg(req, kmsg, issue_flags);
if (ret == -ERESTARTSYS)
ret = -EINTR;
if (ret > 0 && io_net_retry(sock, flags)) {
sr->done_io += ret;
req->flags |= REQ_F_PARTIAL_IO;
- return io_setup_async_msg(req, kmsg);
+ return io_setup_async_msg(req, kmsg, issue_flags);
}
req_set_fail(req);
} else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
@@ -535,6 +576,7 @@ int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
/* fast path, check for non-NULL to avoid function call */
if (kmsg->free_iov)
kfree(kmsg->free_iov);
+ io_netmsg_recycle(req, issue_flags);
req->flags &= ~REQ_F_NEED_CLEANUP;
if (ret > 0)
ret += sr->done_io;
@@ -848,4 +890,17 @@ int io_connect(struct io_kiocb *req, unsigned int issue_flags)
io_req_set_res(req, ret, 0);
return IOU_OK;
}
+
+void io_flush_netmsg_cache(struct io_ring_ctx *ctx)
+{
+ while (!hlist_empty(&ctx->netmsg_cache.list)) {
+ struct io_async_msghdr *hdr;
+
+ hdr = hlist_entry(ctx->netmsg_cache.list.first,
+ struct io_async_msghdr, cache_list);
+ hlist_del(&hdr->cache_list);
+ kfree(hdr);
+ }
+ ctx->netmsg_cache.nr_cached = 0;
+}
#endif
diff --git a/io_uring/net.h b/io_uring/net.h
index 81d71d164770..576efb602c7f 100644
--- a/io_uring/net.h
+++ b/io_uring/net.h
@@ -5,7 +5,10 @@
#if defined(CONFIG_NET)
struct io_async_msghdr {
- struct iovec fast_iov[UIO_FASTIOV];
+ union {
+ struct iovec fast_iov[UIO_FASTIOV];
+ struct hlist_node cache_list;
+ };
/* points to an allocated iov, if NULL we use fast_iov instead */
struct iovec *free_iov;
struct sockaddr __user *uaddr;
@@ -40,4 +43,10 @@ int io_socket(struct io_kiocb *req, unsigned int issue_flags);
int io_connect_prep_async(struct io_kiocb *req);
int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
int io_connect(struct io_kiocb *req, unsigned int issue_flags);
+
+void io_flush_netmsg_cache(struct io_ring_ctx *ctx);
+#else
+static inline void io_flush_netmsg_cache(struct io_ring_ctx *ctx)
+{
+}
#endif
--
2.35.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH 4/4] io_uring: add netmsg cache
2022-07-07 23:23 ` [PATCH 4/4] io_uring: add netmsg cache Jens Axboe
@ 2022-07-08 7:33 ` Dylan Yudaken
2022-07-08 12:47 ` Jens Axboe
0 siblings, 1 reply; 8+ messages in thread
From: Dylan Yudaken @ 2022-07-08 7:33 UTC (permalink / raw)
To: [email protected], [email protected]; +Cc: [email protected]
On Thu, 2022-07-07 at 17:23 -0600, Jens Axboe wrote:
> For recvmsg/sendmsg, if they don't complete inline, we currently need
> to allocate a struct io_async_msghdr for each request. This is a
> somewhat large struct.
>
> Hook up sendmsg/recvmsg to use the io_alloc_cache. This reduces the
> alloc + free overhead considerably, yielding 4-5% of extra
> performance
> running netbench.
>
> Signed-off-by: Jens Axboe <[email protected]>
> ---
> include/linux/io_uring_types.h | 6 ++-
> io_uring/io_uring.c | 3 ++
> io_uring/net.c | 73 +++++++++++++++++++++++++++++---
> --
> io_uring/net.h | 11 ++++-
> 4 files changed, 81 insertions(+), 12 deletions(-)
>
> diff --git a/include/linux/io_uring_types.h
> b/include/linux/io_uring_types.h
> index bf8f95332eda..d54b8b7e0746 100644
> --- a/include/linux/io_uring_types.h
> +++ b/include/linux/io_uring_types.h
> @@ -222,8 +222,7 @@ struct io_ring_ctx {
> struct io_hash_table cancel_table_locked;
> struct list_head cq_overflow_list;
> struct io_alloc_cache apoll_cache;
> - struct xarray personalities;
> - u32 pers_next;
> + struct io_alloc_cache netmsg_cache;
> } ____cacheline_aligned_in_smp;
>
> /* IRQ completion list, under ->completion_lock */
> @@ -241,6 +240,9 @@ struct io_ring_ctx {
> unsigned int file_alloc_start;
> unsigned int file_alloc_end;
>
> + struct xarray personalities;
> + u32 pers_next;
> +
> struct {
> /*
> * We cache a range of free CQEs we can use, once
> exhausted it
> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
> index b5098773d924..32110c5b4059 100644
> --- a/io_uring/io_uring.c
> +++ b/io_uring/io_uring.c
> @@ -89,6 +89,7 @@
> #include "kbuf.h"
> #include "rsrc.h"
> #include "cancel.h"
> +#include "net.h"
>
> #include "timeout.h"
> #include "poll.h"
> @@ -297,6 +298,7 @@ static __cold struct io_ring_ctx
> *io_ring_ctx_alloc(struct io_uring_params *p)
> INIT_LIST_HEAD(&ctx->cq_overflow_list);
> INIT_LIST_HEAD(&ctx->io_buffers_cache);
> io_alloc_cache_init(&ctx->apoll_cache);
> + io_alloc_cache_init(&ctx->netmsg_cache);
> init_completion(&ctx->ref_comp);
> xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1);
> mutex_init(&ctx->uring_lock);
> @@ -2473,6 +2475,7 @@ static __cold void io_ring_ctx_free(struct
> io_ring_ctx *ctx)
> __io_cqring_overflow_flush(ctx, true);
> io_eventfd_unregister(ctx);
> io_flush_apoll_cache(ctx);
> + io_flush_netmsg_cache(ctx);
> mutex_unlock(&ctx->uring_lock);
> io_destroy_buffers(ctx);
> if (ctx->sq_creds)
> diff --git a/io_uring/net.c b/io_uring/net.c
> index 6679069eeef1..ba7e94ff287c 100644
> --- a/io_uring/net.c
> +++ b/io_uring/net.c
> @@ -12,6 +12,7 @@
>
> #include "io_uring.h"
> #include "kbuf.h"
> +#include "alloc_cache.h"
> #include "net.h"
>
> #if defined(CONFIG_NET)
> @@ -97,18 +98,57 @@ static bool io_net_retry(struct socket *sock, int
> flags)
> return sock->type == SOCK_STREAM || sock->type ==
> SOCK_SEQPACKET;
> }
>
> +static void io_netmsg_recycle(struct io_kiocb *req, unsigned int
> issue_flags)
> +{
> + struct io_async_msghdr *hdr = req->async_data;
> +
> + if (!hdr || issue_flags & IO_URING_F_UNLOCKED)
> + return;
> +
> + if (io_alloc_cache_store(&req->ctx->netmsg_cache)) {
> + hlist_add_head(&hdr->cache_list, &req->ctx-
> >netmsg_cache.list);
can io_alloc_cache_store just do the store?
would be nicer to have cache::list be generally unused outside of the
cache code.
> + req->async_data = NULL;
> + req->flags &= ~REQ_F_ASYNC_DATA;
> + }
> +}
> +
> +static struct io_async_msghdr *io_recvmsg_alloc_async(struct
> io_kiocb *req,
> + unsigned int
> issue_flags)
> +{
> + struct io_ring_ctx *ctx = req->ctx;
> +
> + if (!(issue_flags & IO_URING_F_UNLOCKED) &&
> + !hlist_empty(&ctx->netmsg_cache.list)) {
> + struct io_async_msghdr *hdr;
> +
> + hdr = hlist_entry(ctx->netmsg_cache.list.first,
> + struct io_async_msghdr,
> cache_list);
> + ctx->netmsg_cache.nr_cached--;
> + hlist_del(&hdr->cache_list);
ditto here. I think all the hlist stuff and the nr_cached manipulation
can be wrapped up
> + req->flags |= REQ_F_ASYNC_DATA;
> + req->async_data = hdr;
> + return hdr;
> + }
> +
> + if (!io_alloc_async_data(req))
> + return req->async_data;
> +
> + return NULL;
> +}
> +
> static int io_setup_async_msg(struct io_kiocb *req,
> - struct io_async_msghdr *kmsg)
> + struct io_async_msghdr *kmsg,
> + unsigned int issue_flags)
> {
> struct io_async_msghdr *async_msg = req->async_data;
>
> if (async_msg)
> return -EAGAIN;
> - if (io_alloc_async_data(req)) {
> + async_msg = io_recvmsg_alloc_async(req, issue_flags);
> + if (!async_msg) {
> kfree(kmsg->free_iov);
> return -ENOMEM;
> }
> - async_msg = req->async_data;
> req->flags |= REQ_F_NEED_CLEANUP;
> memcpy(async_msg, kmsg, sizeof(*kmsg));
> async_msg->msg.msg_name = &async_msg->addr;
> @@ -195,7 +235,7 @@ int io_sendmsg(struct io_kiocb *req, unsigned int
> issue_flags)
>
> if (!(req->flags & REQ_F_POLLED) &&
> (sr->flags & IORING_RECVSEND_POLL_FIRST))
> - return io_setup_async_msg(req, kmsg);
> + return io_setup_async_msg(req, kmsg, issue_flags);
>
> flags = sr->msg_flags;
> if (issue_flags & IO_URING_F_NONBLOCK)
> @@ -207,13 +247,13 @@ int io_sendmsg(struct io_kiocb *req, unsigned
> int issue_flags)
>
> if (ret < min_ret) {
> if (ret == -EAGAIN && (issue_flags &
> IO_URING_F_NONBLOCK))
> - return io_setup_async_msg(req, kmsg);
> + return io_setup_async_msg(req, kmsg,
> issue_flags);
> if (ret == -ERESTARTSYS)
> ret = -EINTR;
> if (ret > 0 && io_net_retry(sock, flags)) {
> sr->done_io += ret;
> req->flags |= REQ_F_PARTIAL_IO;
> - return io_setup_async_msg(req, kmsg);
> + return io_setup_async_msg(req, kmsg,
> issue_flags);
> }
> req_set_fail(req);
> }
> @@ -221,6 +261,7 @@ int io_sendmsg(struct io_kiocb *req, unsigned int
> issue_flags)
> if (kmsg->free_iov)
> kfree(kmsg->free_iov);
> req->flags &= ~REQ_F_NEED_CLEANUP;
> + io_netmsg_recycle(req, issue_flags);
> if (ret >= 0)
> ret += sr->done_io;
> else if (sr->done_io)
> @@ -495,7 +536,7 @@ int io_recvmsg(struct io_kiocb *req, unsigned int
> issue_flags)
>
> if (!(req->flags & REQ_F_POLLED) &&
> (sr->flags & IORING_RECVSEND_POLL_FIRST))
> - return io_setup_async_msg(req, kmsg);
> + return io_setup_async_msg(req, kmsg, issue_flags);
>
> if (io_do_buffer_select(req)) {
> void __user *buf;
> @@ -519,13 +560,13 @@ int io_recvmsg(struct io_kiocb *req, unsigned
> int issue_flags)
> ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg, kmsg-
> >uaddr, flags);
> if (ret < min_ret) {
> if (ret == -EAGAIN && force_nonblock)
> - return io_setup_async_msg(req, kmsg);
> + return io_setup_async_msg(req, kmsg,
> issue_flags);
> if (ret == -ERESTARTSYS)
> ret = -EINTR;
> if (ret > 0 && io_net_retry(sock, flags)) {
> sr->done_io += ret;
> req->flags |= REQ_F_PARTIAL_IO;
> - return io_setup_async_msg(req, kmsg);
> + return io_setup_async_msg(req, kmsg,
> issue_flags);
> }
> req_set_fail(req);
> } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags &
> (MSG_TRUNC | MSG_CTRUNC))) {
> @@ -535,6 +576,7 @@ int io_recvmsg(struct io_kiocb *req, unsigned int
> issue_flags)
> /* fast path, check for non-NULL to avoid function call */
> if (kmsg->free_iov)
> kfree(kmsg->free_iov);
> + io_netmsg_recycle(req, issue_flags);
> req->flags &= ~REQ_F_NEED_CLEANUP;
> if (ret > 0)
> ret += sr->done_io;
> @@ -848,4 +890,17 @@ int io_connect(struct io_kiocb *req, unsigned
> int issue_flags)
> io_req_set_res(req, ret, 0);
> return IOU_OK;
> }
> +
> +void io_flush_netmsg_cache(struct io_ring_ctx *ctx)
> +{
> + while (!hlist_empty(&ctx->netmsg_cache.list)) {
> + struct io_async_msghdr *hdr;
> +
> + hdr = hlist_entry(ctx->netmsg_cache.list.first,
> + struct io_async_msghdr,
> cache_list);
> + hlist_del(&hdr->cache_list);
> + kfree(hdr);
> + }
> + ctx->netmsg_cache.nr_cached = 0;
> +}
again - could be put somewhere common. I assume there will not be much
more cleanup than simple kfree
> #endif
> diff --git a/io_uring/net.h b/io_uring/net.h
> index 81d71d164770..576efb602c7f 100644
> --- a/io_uring/net.h
> +++ b/io_uring/net.h
> @@ -5,7 +5,10 @@
>
> #if defined(CONFIG_NET)
> struct io_async_msghdr {
> - struct iovec fast_iov[UIO_FASTIOV];
> + union {
> + struct iovec fast_iov[UIO_FASTIOV];
> + struct hlist_node cache_list;
> + };
> /* points to an allocated iov, if NULL we use fast_iov
> instead */
> struct iovec *free_iov;
> struct sockaddr __user *uaddr;
> @@ -40,4 +43,10 @@ int io_socket(struct io_kiocb *req, unsigned int
> issue_flags);
> int io_connect_prep_async(struct io_kiocb *req);
> int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe
> *sqe);
> int io_connect(struct io_kiocb *req, unsigned int issue_flags);
> +
> +void io_flush_netmsg_cache(struct io_ring_ctx *ctx);
> +#else
> +static inline void io_flush_netmsg_cache(struct io_ring_ctx *ctx)
> +{
> +}
> #endif
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 4/4] io_uring: add netmsg cache
2022-07-08 7:33 ` Dylan Yudaken
@ 2022-07-08 12:47 ` Jens Axboe
0 siblings, 0 replies; 8+ messages in thread
From: Jens Axboe @ 2022-07-08 12:47 UTC (permalink / raw)
To: Dylan Yudaken, [email protected]; +Cc: [email protected]
On 7/8/22 1:33 AM, Dylan Yudaken wrote:
> On Thu, 2022-07-07 at 17:23 -0600, Jens Axboe wrote:
>> For recvmsg/sendmsg, if they don't complete inline, we currently need
>> to allocate a struct io_async_msghdr for each request. This is a
>> somewhat large struct.
>>
>> Hook up sendmsg/recvmsg to use the io_alloc_cache. This reduces the
>> alloc + free overhead considerably, yielding 4-5% of extra
>> performance
>> running netbench.
>>
>> Signed-off-by: Jens Axboe <[email protected]>
>> ---
>> include/linux/io_uring_types.h | 6 ++-
>> io_uring/io_uring.c | 3 ++
>> io_uring/net.c | 73 +++++++++++++++++++++++++++++---
>> --
>> io_uring/net.h | 11 ++++-
>> 4 files changed, 81 insertions(+), 12 deletions(-)
>>
>> diff --git a/include/linux/io_uring_types.h
>> b/include/linux/io_uring_types.h
>> index bf8f95332eda..d54b8b7e0746 100644
>> --- a/include/linux/io_uring_types.h
>> +++ b/include/linux/io_uring_types.h
>> @@ -222,8 +222,7 @@ struct io_ring_ctx {
>> struct io_hash_table cancel_table_locked;
>> struct list_head cq_overflow_list;
>> struct io_alloc_cache apoll_cache;
>> - struct xarray personalities;
>> - u32 pers_next;
>> + struct io_alloc_cache netmsg_cache;
>> } ____cacheline_aligned_in_smp;
>>
>> /* IRQ completion list, under ->completion_lock */
>> @@ -241,6 +240,9 @@ struct io_ring_ctx {
>> unsigned int file_alloc_start;
>> unsigned int file_alloc_end;
>>
>> + struct xarray personalities;
>> + u32 pers_next;
>> +
>> struct {
>> /*
>> * We cache a range of free CQEs we can use, once
>> exhausted it
>> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
>> index b5098773d924..32110c5b4059 100644
>> --- a/io_uring/io_uring.c
>> +++ b/io_uring/io_uring.c
>> @@ -89,6 +89,7 @@
>> #include "kbuf.h"
>> #include "rsrc.h"
>> #include "cancel.h"
>> +#include "net.h"
>>
>> #include "timeout.h"
>> #include "poll.h"
>> @@ -297,6 +298,7 @@ static __cold struct io_ring_ctx
>> *io_ring_ctx_alloc(struct io_uring_params *p)
>> INIT_LIST_HEAD(&ctx->cq_overflow_list);
>> INIT_LIST_HEAD(&ctx->io_buffers_cache);
>> io_alloc_cache_init(&ctx->apoll_cache);
>> + io_alloc_cache_init(&ctx->netmsg_cache);
>> init_completion(&ctx->ref_comp);
>> xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1);
>> mutex_init(&ctx->uring_lock);
>> @@ -2473,6 +2475,7 @@ static __cold void io_ring_ctx_free(struct
>> io_ring_ctx *ctx)
>> __io_cqring_overflow_flush(ctx, true);
>> io_eventfd_unregister(ctx);
>> io_flush_apoll_cache(ctx);
>> + io_flush_netmsg_cache(ctx);
>> mutex_unlock(&ctx->uring_lock);
>> io_destroy_buffers(ctx);
>> if (ctx->sq_creds)
>> diff --git a/io_uring/net.c b/io_uring/net.c
>> index 6679069eeef1..ba7e94ff287c 100644
>> --- a/io_uring/net.c
>> +++ b/io_uring/net.c
>> @@ -12,6 +12,7 @@
>>
>> #include "io_uring.h"
>> #include "kbuf.h"
>> +#include "alloc_cache.h"
>> #include "net.h"
>>
>> #if defined(CONFIG_NET)
>> @@ -97,18 +98,57 @@ static bool io_net_retry(struct socket *sock, int
>> flags)
>> return sock->type == SOCK_STREAM || sock->type ==
>> SOCK_SEQPACKET;
>> }
>>
>> +static void io_netmsg_recycle(struct io_kiocb *req, unsigned int
>> issue_flags)
>> +{
>> + struct io_async_msghdr *hdr = req->async_data;
>> +
>> + if (!hdr || issue_flags & IO_URING_F_UNLOCKED)
>> + return;
>> +
>> + if (io_alloc_cache_store(&req->ctx->netmsg_cache)) {
>> + hlist_add_head(&hdr->cache_list, &req->ctx-
>>> netmsg_cache.list);
>
> can io_alloc_cache_store just do the store?
> would be nicer to have cache::list be generally unused outside of the
> cache code.
We could do that if we just make the hlist_node be inside a struct.
Would probably allow cleaning up the get-entry etc too, let me give that
a whirl.
--
Jens Axboe
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH 3/4] io_uring: impose max limit on apoll cache
2022-07-08 13:30 [PATCHSET v2 for-next] Add alloc cache for sendmsg/recvmsg Jens Axboe
@ 2022-07-08 13:30 ` Jens Axboe
0 siblings, 0 replies; 8+ messages in thread
From: Jens Axboe @ 2022-07-08 13:30 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence, dylany, Jens Axboe
Caches like this tend to grow to the peak size, and then never get any
smaller. Impose a max limit on the size, to prevent it from growing too
big.
A somewhat randomly chosen 512 is the max size we'll allow the cache
to get. If a batch of frees come in and would bring it over that, we
simply start kfree'ing the surplus.
Signed-off-by: Jens Axboe <[email protected]>
---
include/linux/io_uring_types.h | 1 +
io_uring/alloc_cache.h | 16 ++++++++++++++--
io_uring/io_uring.c | 3 ++-
3 files changed, 17 insertions(+), 3 deletions(-)
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index b548da03b563..bf8f95332eda 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -160,6 +160,7 @@ struct io_ev_fd {
struct io_alloc_cache {
struct hlist_head list;
+ unsigned int nr_cached;
};
struct io_ring_ctx {
diff --git a/io_uring/alloc_cache.h b/io_uring/alloc_cache.h
index 98f2374c37c7..729793ae9712 100644
--- a/io_uring/alloc_cache.h
+++ b/io_uring/alloc_cache.h
@@ -1,14 +1,24 @@
#ifndef IOU_ALLOC_CACHE_H
#define IOU_ALLOC_CACHE_H
+/*
+ * Don't allow the cache to grow beyond this size.
+ */
+#define IO_ALLOC_CACHE_MAX 512
+
struct io_cache_entry {
struct hlist_node node;
};
-static inline void io_alloc_cache_put(struct io_alloc_cache *cache,
+static inline bool io_alloc_cache_put(struct io_alloc_cache *cache,
struct io_cache_entry *entry)
{
- hlist_add_head(&entry->node, &cache->list);
+ if (cache->nr_cached < IO_ALLOC_CACHE_MAX) {
+ cache->nr_cached++;
+ hlist_add_head(&entry->node, &cache->list);
+ return true;
+ }
+ return false;
}
static inline struct io_cache_entry *io_alloc_cache_get(struct io_alloc_cache *cache)
@@ -26,6 +36,7 @@ static inline struct io_cache_entry *io_alloc_cache_get(struct io_alloc_cache *c
static inline void io_alloc_cache_init(struct io_alloc_cache *cache)
{
INIT_HLIST_HEAD(&cache->list);
+ cache->nr_cached = 0;
}
static inline void io_alloc_cache_free(struct io_alloc_cache *cache,
@@ -37,5 +48,6 @@ static inline void io_alloc_cache_free(struct io_alloc_cache *cache,
hlist_del(node);
free(container_of(node, struct io_cache_entry, node));
}
+ cache->nr_cached = 0;
}
#endif
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index a360a3d390c6..c9c23e459766 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1181,7 +1181,8 @@ void io_free_batch_list(struct io_ring_ctx *ctx, struct io_wq_work_node *node)
if (apoll->double_poll)
kfree(apoll->double_poll);
- io_alloc_cache_put(&ctx->apoll_cache, &apoll->cache);
+ if (!io_alloc_cache_put(&ctx->apoll_cache, &apoll->cache))
+ kfree(apoll);
req->flags &= ~REQ_F_POLLED;
}
if (req->flags & IO_REQ_LINK_FLAGS)
--
2.35.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
end of thread, other threads:[~2022-07-08 13:30 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-07-07 23:23 [PATCHSET for-next] Add alloc cache for sendmsg/recvmsg Jens Axboe
2022-07-07 23:23 ` [PATCH 1/4] io_uring: move apoll cache to poll.c Jens Axboe
2022-07-07 23:23 ` [PATCH 2/4] io_uring: add abstraction around apoll cache Jens Axboe
2022-07-07 23:23 ` [PATCH 3/4] io_uring: impose max limit on " Jens Axboe
2022-07-07 23:23 ` [PATCH 4/4] io_uring: add netmsg cache Jens Axboe
2022-07-08 7:33 ` Dylan Yudaken
2022-07-08 12:47 ` Jens Axboe
-- strict thread matches above, loose matches on Subject: below --
2022-07-08 13:30 [PATCHSET v2 for-next] Add alloc cache for sendmsg/recvmsg Jens Axboe
2022-07-08 13:30 ` [PATCH 3/4] io_uring: impose max limit on apoll cache Jens Axboe
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox