public inbox for [email protected]
 help / color / mirror / Atom feed
* io_uring's openat doesn't work with large (2G+) files
@ 2020-04-08 14:51 Dmitry Kadashev
  2020-04-08 15:19 ` Jens Axboe
  0 siblings, 1 reply; 13+ messages in thread
From: Dmitry Kadashev @ 2020-04-08 14:51 UTC (permalink / raw)
  To: io-uring

[-- Attachment #1: Type: text/plain, Size: 1226 bytes --]

Hi,

io_uring's openat seems to produce FDs that are incompatible with
large files (>2GB). If a file (smaller than 2GB) is opened using
io_uring's openat then writes -- both using io_uring and just sync
pwrite() -- past that threshold fail with EFBIG. If such a file is
opened with sync openat, then both io_uring's writes and sync writes
succeed. And if the file is larger than 2GB then io_uring's openat
fails right away, while the sync one works.

Kernel versions: 5.6.0-rc2, 5.6.0.

A couple of reproducers attached, one demos successful open with
failed writes afterwards, and another failing open (in comparison with
sync  calls).

The output of the former one for example:

*** sync openat
openat succeeded
sync write at offset 0
write succeeded
sync write at offset 4294967296
write succeeded

*** sync openat
openat succeeded
io_uring write at offset 0
write succeeded
io_uring write at offset 4294967296
write succeeded

*** io_uring openat
openat succeeded
sync write at offset 0
write succeeded
sync write at offset 4294967296
write failed: File too large

*** io_uring openat
openat succeeded
io_uring write at offset 0
write succeeded
io_uring write at offset 4294967296
write failed: File too large

-- 
Dmitry

[-- Attachment #2: test-io_uring-write-large-offset.c --]
[-- Type: text/x-csrc, Size: 3061 bytes --]

#include <liburing.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#include <sys/resource.h>
#include <unistd.h>

static const int RSIZE = 2;
static const int OPEN_FLAGS = O_RDWR | O_CREAT;
static const mode_t OPEN_MODE = S_IRUSR | S_IWUSR;

#define DIE(...) do {\
		fprintf(stderr, __VA_ARGS__);\
		abort();\
	} while(0);

void do_write(struct io_uring *ring, int sync, int fd, off_t offset)
{
	fprintf(stderr, "%s write at offset %lld\n", sync ? "sync": "io_uring", offset);
	char buf[] = "some test write buf";
	int res;
	if (sync) {
		res = pwrite(fd, buf, sizeof(buf), offset);
		if (res < 0) {
			res = -errno;
		}
	}
	else {
		struct io_uring_sqe *sqe;
		sqe = io_uring_get_sqe(ring);
		if (!sqe) {
			fprintf(stderr, "failed to get sqe\n");
			return;
		}
		io_uring_prep_write(sqe, fd, buf, sizeof(buf), offset);
		int ret = io_uring_submit(ring);
		if (ret < 0) {
			fprintf(stderr, "failed to submit write: %s\n", strerror(-ret));
			return;
		}

		struct io_uring_cqe *cqe;
		ret = io_uring_wait_cqe(ring, &cqe);
		res = cqe->res;
		io_uring_cqe_seen(ring, cqe);
		if (ret < 0) {
			fprintf(stderr, "wait_cqe failed: %s\n", strerror(-ret));
			return;
		}
	}
	if (res < 0) {
		fprintf(stderr, "write failed: %s\n", strerror(-res));
	}
	else {
		fprintf(stderr, "write succeeded\n");
	}
}

void test_open_write(struct io_uring *ring, int sync_open, int sync_write, int dfd, const char* fn)
{
	fprintf(stderr, "\n*** %s openat\n", sync_open ? "sync" : "io_uring");
	struct io_uring_sqe *sqe;
	int fd = -1;
	if (sync_open) {
		fd = openat(dfd, fn, OPEN_FLAGS, OPEN_MODE);
		if (fd < 0) {
			fd = -errno;
		}
	}
	else {
		sqe = io_uring_get_sqe(ring);
		if (!sqe) {
			fprintf(stderr, "failed to get sqe\n");
			return;
		}
		io_uring_prep_openat(sqe, dfd, fn, OPEN_FLAGS, OPEN_MODE);
		int ret = io_uring_submit(ring);
		if (ret < 0) {
			fprintf(stderr, "failed to submit openat: %s\n", strerror(-ret));
			return;
		}

		struct io_uring_cqe *cqe;
		ret = io_uring_wait_cqe(ring, &cqe);
		fd = cqe->res;
		io_uring_cqe_seen(ring, cqe);
		if (ret < 0) {
			fprintf(stderr, "wait_cqe failed: %s\n", strerror(-ret));
			return;
		}
	}
	if (fd < 0) {
		fprintf(stderr, "openat failed: %s\n", strerror(-fd));
	}
	else {
		fprintf(stderr, "openat succeeded\n");
		do_write(ring, sync_write, fd, 0);
		do_write(ring, sync_write, fd, 1ull << 32);
		close(fd);
	}
}

int main()
{
	int dfd = open("/tmp", O_RDONLY | O_DIRECTORY);
	if (dfd < 0) {
		DIE("open /tmp: %s\n", strerror(errno));
	}
	struct io_uring ring;
	int ret = io_uring_queue_init(RSIZE, &ring, 0);
	if (ret < 0) {
		DIE("failed to init io_uring: %s\n", strerror(-ret));
	}

	test_open_write(&ring, 1, 1, dfd, "io_uring_openat_write_test1");
	test_open_write(&ring, 1, 0, dfd, "io_uring_openat_write_test2");
	test_open_write(&ring, 0, 1, dfd, "io_uring_openat_write_test3");
	test_open_write(&ring, 0, 0, dfd, "io_uring_openat_write_test4");

	io_uring_queue_exit(&ring);
	close(dfd);
	return 0;
}

[-- Attachment #3: test-io_uring-openat-large-file.c --]
[-- Type: text/x-csrc, Size: 2255 bytes --]

#include <liburing.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#include <sys/resource.h>
#include <unistd.h>

#define DIE(...) do {\
		fprintf(stderr, __VA_ARGS__);\
		abort();\
	} while(0);

static const int RSIZE = 2;
static const int OPEN_FLAGS = O_RDWR | O_CREAT;
static const mode_t OPEN_MODE = S_IRUSR | S_IWUSR;

void open_sync(int dfd, const char* fn)
{
	int fd = openat(dfd, fn, OPEN_FLAGS, OPEN_MODE);
	if (fd < 0) {
		fprintf(stderr, "sync open failed: %s\n", strerror(errno));
	}
	else {
		fprintf(stderr, "sync open succeeded\n");
		close(fd);
	}
}

void open_io_uring(struct io_uring *ring, int dfd, const char* fn)
{
	struct io_uring_sqe *sqe;
	sqe = io_uring_get_sqe(ring);
	if (!sqe) {
		fprintf(stderr, "failed to get sqe\n");
		return;
	}
	io_uring_prep_openat(sqe, dfd, fn, OPEN_FLAGS, OPEN_MODE);
	int ret = io_uring_submit(ring);
	if (ret < 0) {
		fprintf(stderr, "failed to submit openat: %s\n", strerror(-ret));
		return;
	}

	struct io_uring_cqe *cqe;
	ret = io_uring_wait_cqe(ring, &cqe);
	int fd = cqe->res;
	io_uring_cqe_seen(ring, cqe);
	if (ret < 0) {
		fprintf(stderr, "wait_cqe failed: %s\n", strerror(-ret));
	}
	else if (fd < 0) {
		fprintf(stderr, "io_uring openat failed: %s\n", strerror(-fd));
	}
	else {
		fprintf(stderr, "io_uring openat succeeded\n");
		close(fd);
	}
}

int prepare_file(int dfd, const char* fn)
{
	const char buf[] = "foo";
	int fd = openat(dfd, fn, OPEN_FLAGS, OPEN_MODE);
	if (fd < 0) {
		fprintf(stderr, "prepare/open: %s\n", strerror(errno));
		return -1;
	}
	int res = pwrite(fd, buf, sizeof(buf), 1ull << 32);
	if (res < 0) {
		fprintf(stderr, "prepare/pwrite: %s\n", strerror(errno));
	}
	close(fd);
	return res < 0 ? res : 0;
}

int main()
{
	const char *fn = "io_uring_openat_test";
	int dfd = open("/tmp", O_RDONLY | O_DIRECTORY);
	if (dfd < 0) {
		DIE("open /tmp: %s\n", strerror(errno));
	}
	struct io_uring ring;
	int ret = io_uring_queue_init(RSIZE, &ring, 0);
	if (ret < 0) {
		DIE("failed to init io_uring: %s\n", strerror(-ret));
	}

	if (!prepare_file(dfd, fn)) {
		open_sync(dfd, fn);
		open_io_uring(&ring, dfd, fn);
	}

	io_uring_queue_exit(&ring);
	close(dfd);
	return 0;
}

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2020-04-13 10:20 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2020-04-08 14:51 io_uring's openat doesn't work with large (2G+) files Dmitry Kadashev
2020-04-08 15:19 ` Jens Axboe
2020-04-08 15:30   ` Dmitry Kadashev
2020-04-08 15:36     ` Jens Axboe
2020-04-08 15:41       ` Dmitry Kadashev
2020-04-08 15:49         ` Jens Axboe
2020-04-08 16:12           ` Dmitry Kadashev
2020-04-08 16:26             ` Jens Axboe
2020-04-09  3:50               ` Dmitry Kadashev
2020-04-09 15:29                 ` Jens Axboe
2020-04-13  9:20                   ` Dmitry Kadashev
2020-04-13 10:09                     ` Pavel Begunkov
2020-04-13 10:19                       ` Dmitry Kadashev

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox