From: Miklos Szeredi <[email protected]>
To: Jens Axboe <[email protected]>
Cc: [email protected]
Subject: Re: io_uring_prep_openat_direct() and link/drain
Date: Fri, 1 Apr 2022 18:02:40 +0200 [thread overview]
Message-ID: <CAJfpeguq1bBDa9-gbk6tutME1kH4SdHvkUdLGKzfdmhpCtCt6g@mail.gmail.com> (raw)
In-Reply-To: <[email protected]>
[-- Attachment #1: Type: text/plain, Size: 738 bytes --]
On Fri, 1 Apr 2022 at 17:36, Jens Axboe <[email protected]> wrote:
> I take it you're continually reusing those slots?
Yes.
> If you have a test
> case that'd be ideal. Agree that it sounds like we just need an
> appropriate breather to allow fput/task_work to run. Or it could be the
> deferral free of the fixed slot.
Adding a breather could make the worst case latency be large. I think
doing the fput synchronously would be better in general.
I test this on an VM with 8G of memory and run the following:
./forkbomb 14 &
# wait till 16k processes are forked
for i in `seq 1 100`; do ./procreads u; done
You can compare performance with plain reads (./procreads p), the
other tests don't work on public kernels.
Thanks,
Miklos
[-- Attachment #2: procreads.c --]
[-- Type: text/x-csrc, Size: 6663 bytes --]
#define _GNU_SOURCE
#include <stdio.h>
#include <fcntl.h>
#include <string.h>
#include <stdlib.h>
#include <dirent.h>
#include <unistd.h>
#include <err.h>
#include "liburing.h"
#define CHECK_NEGERR(_expr) \
({ typeof(_expr) _ret = (_expr); if (_ret < 0) { errno = -_ret; err(1, #_expr); } _ret; })
#define CHECK_NULL(_expr) \
({ typeof(_expr) _ret = (_expr); if (_ret == NULL) { errx(1, #_expr " returned NULL"); } _ret; })
#define CHECK_ERR(_expr) \
({ typeof(_expr) _ret = (_expr); if (_ret == -1) { err(1, #_expr); } _ret; })
struct name_val {
char *name; /* in */
struct iovec value_in; /* in */
struct iovec value_out; /* out */
uint32_t error; /* out */
uint32_t reserved;
};
static bool debug;
static const char *proc_list[] = { "stat", "status", "cmdline", "cgroup" };
#define proc_num (sizeof(proc_list)/sizeof(proc_list[0]))
#define batch 10
int getvalues(int dfd, const char *path, struct name_val *vec, size_t num,
unsigned int flags)
{
return syscall(451, dfd, path, vec, num, flags);
}
static void print_val(const char *name, struct name_val *nv)
{
const char *s = nv->value_out.iov_base;
size_t len = nv->value_out.iov_len;
const size_t prmax = 40;
int prlen = len < prmax ? len : prmax;
const char *cont = len < prmax ? "" : "...";
if (nv->error)
printf("/proc/%s/%s = ERROR %s (%i)\n",
name, nv->name, strerror(nv->error), nv->error);
else if (debug)
printf("/proc/%s/%s = \"%.*s\"%s (len=%zi)\n",
name, nv->name, prlen, s, cont, len);
}
static void print_values(const char *name, struct name_val *vec, size_t num,
ssize_t ret)
{
int i;
if (ret < 0) {
errno = -ret; warn("getvalues failed");
} else {
if ((size_t) ret < num)
warnx("%zi values read out of %zi", ret, num);
for (i = 0; i < ret; i++)
print_val(name, &vec[i]);
}
}
static ssize_t readfile_plain(int dfd, const char *path, char *buf, size_t size)
{
int fd;
ssize_t ret;
fd = openat(dfd, path, O_RDONLY);
if (fd == -1)
return -errno;
ret = read(fd, buf, size);
if (ret == -1)
ret = -errno;
else if ((size_t) ret == size)
ret = -EOVERFLOW;
close(fd);
return ret;
}
static int readfiles_plain(int dfd, const char *path, struct name_val *vec,
size_t num, int mode)
{
struct name_val *nv;
ssize_t ret;
size_t i;
if (path[0])
dfd = CHECK_ERR(openat(dfd, path, O_PATH));
for (i = 0; i < num; i++) {
nv = &vec[i];
if (mode) {
CHECK_ERR(getvalues(dfd, "", nv, 1, mode == 2));
} else {
ret = readfile_plain(dfd, nv->name,
nv->value_in.iov_base,
nv->value_in.iov_len);
if (ret < 0) {
nv->error = -ret;
} else {
nv->error = 0;
nv->value_out.iov_base = nv->value_in.iov_base;
nv->value_out.iov_len = ret;
}
}
}
if (path[0])
close(dfd);
return num;
}
static int readfiles_uring(struct io_uring *ring, int dfd, const char *path,
struct name_val *vec, size_t num)
{
struct io_uring_sqe *sqe;
struct io_uring_cqe *cqe;
size_t slot;
int ret, i;
static int seq = 1;
struct name_val *nv;
if (path[0])
dfd = CHECK_ERR(openat(dfd, path, O_PATH));
for (slot = 0; slot < num; slot++) {
nv = &vec[slot];
sqe = io_uring_get_sqe(ring);
io_uring_prep_openat_direct(sqe, dfd, nv->name, O_RDONLY, 0,
slot);
sqe->flags = IOSQE_IO_LINK | IOSQE_CQE_SKIP_SUCCESS;
sqe->user_data = seq + slot * 2;
sqe = io_uring_get_sqe(ring);
io_uring_prep_read(sqe, slot, nv->value_in.iov_base,
nv->value_in.iov_len, 0);
sqe->flags = IOSQE_FIXED_FILE;
sqe->user_data = seq + slot * 2 + 1;
}
ret = CHECK_NEGERR(io_uring_submit_and_wait(ring, num));
ret /= 2;
for (i = 0; i < ret; i++) {
CHECK_NEGERR(io_uring_wait_cqe(ring, &cqe));
slot = (cqe->user_data - seq) / 2;
nv = &vec[slot];
if (cqe->res < 0) {
nv->error = -cqe->res;
} else if ((size_t) cqe->res < nv->value_in.iov_len) {
nv->error = 0;
nv->value_out.iov_base = nv->value_in.iov_base;
nv->value_out.iov_len = cqe->res;
} else {
nv->error = EOVERFLOW;
}
io_uring_cqe_seen(ring, cqe);
}
seq += 2 * num;
if (path[0])
close(dfd);
return ret;
}
static const char *next_name(DIR *dp)
{
const char *name;
struct dirent *de;
while ((de = readdir(dp))) {
name = de->d_name;
if (name[0] > '0' && name[0] <= '9')
return name;
}
return NULL;
}
static size_t next_batch(DIR *dp, struct name_val *vec, size_t num,
const char **namep)
{
const char *name;
size_t i;
if (batch == 1) {
name = next_name(dp);
if (!name)
return 0;
*namep = name;
return 1;
}
*namep = "";
for (i = 0; i < num; i++) {
if (i % proc_num == 0 && (name = next_name(dp)) == NULL)
break;
free(vec[i].name);
vec[i].name = CHECK_NULL(malloc(128));
sprintf(vec[i].name, "%s/%s", name, proc_list[i % proc_num]);
}
return i;
}
static void test_uring(DIR *dp, struct name_val *vec, size_t num)
{
int fds[proc_num * batch];
const size_t numslots = sizeof(fds)/sizeof(fds[0]);
struct io_uring ring;
const char *name;
ssize_t ret;
memset(fds, -1, sizeof(fds));
CHECK_NEGERR(io_uring_queue_init(num * 2, &ring, 0));
CHECK_NEGERR(io_uring_register_files(&ring, fds, numslots));
while ((num = next_batch(dp, vec, num, &name))) {
ret = readfiles_uring(&ring, dirfd(dp), name, vec, num);
print_values(name, vec, num, ret);
}
io_uring_queue_exit(&ring);
}
static void test_plain(DIR *dp, struct name_val *vec, size_t num, int mode)
{
const char *name;
ssize_t ret;
while ((num = next_batch(dp, vec, num, &name))) {
ret = readfiles_plain(dirfd(dp), name, vec, num, mode);
print_values(name, vec, num, ret);
}
}
static void test_values(DIR *dp, struct name_val *vec, size_t num, bool rf)
{
const char *name;
ssize_t ret;
while ((num = next_batch(dp, vec, num, &name))) {
ret = getvalues(dirfd(dp), name, vec, num, rf);
print_values(name, vec, num, ret);
}
}
int main(int argc, char *argv[])
{
const size_t num = proc_num * batch;
char buf[num][4096];
struct name_val vec[num];
DIR *dp;
size_t i;
char type = 'p';
if (argc > 1)
type = argv[1][0];
if (argc > 2)
debug = true;
for (i = 0; i < num; i++) {
vec[i].value_in.iov_base = (type != 'w' || !i) ? buf[i] : NULL;
vec[i].value_in.iov_len = sizeof(buf[i]);
}
dp = CHECK_NULL(opendir("/proc"));
switch (type) {
case 'p':
test_plain(dp, vec, num, 0);
break;
case 'r':
test_plain(dp, vec, num, 1);
break;
case 's':
test_plain(dp, vec, num, 2);
break;
case 'u':
test_uring(dp, vec, num);
break;
case 'w':
vec[0].value_in.iov_len = sizeof(buf[0]) * num;
/* fallthrough */
case 'v':
case 'z':
test_values(dp, vec, num, type == 'z');
break;
}
closedir(dp);
return 0;
}
[-- Attachment #3: forkbomb.c --]
[-- Type: text/x-csrc, Size: 511 bytes --]
#include <unistd.h>
#include <stdio.h>
#include <err.h>
#include <pthread.h>
#include <stdlib.h>
static void *run(void *)
{
sleep(1000);
return NULL;
}
int main(int argc, char *argv[])
{
int pid, level, i;
pthread_t thr;
int maxlevel = atoi(argv[1]);
for (level = 0; level < maxlevel; level++) {
pid = fork();
if (pid == -1)
err(1, "fork");
fprintf(stderr, ".");
#if 0
if (pid == 0) {
for (i = 0; i < 4; i++)
pthread_create(&thr, NULL, run, NULL);
}
#endif
}
sleep(1000);
}
next prev parent reply other threads:[~2022-04-01 16:28 UTC|newest]
Thread overview: 32+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-03-29 13:20 io_uring_prep_openat_direct() and link/drain Miklos Szeredi
2022-03-29 16:08 ` Jens Axboe
2022-03-29 17:04 ` Jens Axboe
2022-03-29 18:21 ` Miklos Szeredi
2022-03-29 18:26 ` Jens Axboe
2022-03-29 18:31 ` Miklos Szeredi
2022-03-29 18:40 ` Jens Axboe
2022-03-29 19:30 ` Miklos Szeredi
2022-03-29 20:03 ` Jens Axboe
2022-03-30 8:18 ` Miklos Szeredi
2022-03-30 12:35 ` Jens Axboe
2022-03-30 12:43 ` Miklos Szeredi
2022-03-30 12:48 ` Jens Axboe
2022-03-30 12:51 ` Miklos Szeredi
2022-03-30 14:58 ` Miklos Szeredi
2022-03-30 15:05 ` Jens Axboe
2022-03-30 15:12 ` Miklos Szeredi
2022-03-30 15:17 ` Jens Axboe
2022-03-30 15:53 ` Jens Axboe
2022-03-30 17:49 ` Jens Axboe
2022-04-01 8:40 ` Miklos Szeredi
2022-04-01 15:36 ` Jens Axboe
2022-04-01 16:02 ` Miklos Szeredi [this message]
2022-04-01 16:21 ` Jens Axboe
2022-04-02 1:17 ` Jens Axboe
2022-04-05 7:45 ` Miklos Szeredi
2022-04-05 14:44 ` Jens Axboe
2022-04-21 12:31 ` Miklos Szeredi
2022-04-21 12:34 ` Jens Axboe
2022-04-21 12:39 ` Miklos Szeredi
2022-04-21 12:41 ` Jens Axboe
2022-04-21 13:10 ` Miklos Szeredi
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=CAJfpeguq1bBDa9-gbk6tutME1kH4SdHvkUdLGKzfdmhpCtCt6g@mail.gmail.com \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox