* [PATCH v2 1/4] fsstress: add IO_URING read and write operations
2020-08-09 6:30 [PATCH v2 0/4] fsstress,fsx: add io_uring test and do some fix Zorro Lang
@ 2020-08-09 6:30 ` Zorro Lang
2020-08-09 17:51 ` Jens Axboe
2020-08-09 6:30 ` [PATCH v2 2/4] fsstress: reduce the number of events when io_setup Zorro Lang
` (2 subsequent siblings)
3 siblings, 1 reply; 8+ messages in thread
From: Zorro Lang @ 2020-08-09 6:30 UTC (permalink / raw)
To: fstests; +Cc: io-uring, jmoyer
IO_URING is a new feature of curent linux kernel, add basic IO_URING
read/write into fsstess to cover this kind of IO testing.
Signed-off-by: Zorro Lang <[email protected]>
---
README | 4 +-
configure.ac | 1 +
include/builddefs.in | 1 +
ltp/Makefile | 5 ++
ltp/fsstress.c | 139 ++++++++++++++++++++++++++++++++++++++++-
m4/Makefile | 1 +
m4/package_liburing.m4 | 4 ++
7 files changed, 152 insertions(+), 3 deletions(-)
create mode 100644 m4/package_liburing.m4
diff --git a/README b/README
index d0e23fcd..ae0f804d 100644
--- a/README
+++ b/README
@@ -8,13 +8,13 @@ _______________________
sudo apt-get install xfslibs-dev uuid-dev libtool-bin \
e2fsprogs automake gcc libuuid1 quota attr libattr1-dev make \
libacl1-dev libaio-dev xfsprogs libgdbm-dev gawk fio dbench \
- uuid-runtime python sqlite3
+ uuid-runtime python sqlite3 liburing-dev
For Fedora, RHEL, or CentOS:
yum install acl attr automake bc dbench dump e2fsprogs fio \
gawk gcc indent libtool lvm2 make psmisc quota sed \
xfsdump xfsprogs \
libacl-devel libattr-devel libaio-devel libuuid-devel \
- xfsprogs-devel btrfs-progs-devel python sqlite
+ xfsprogs-devel btrfs-progs-devel python sqlite liburing-devel
(Older distributions may require xfsprogs-qa-devel as well.)
(Note that for RHEL and CentOS, you may need the EPEL repo.)
- run make
diff --git a/configure.ac b/configure.ac
index 4bb50b32..8922c47e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -61,6 +61,7 @@ AC_PACKAGE_NEED_ACLINIT_LIBACL
AC_PACKAGE_WANT_GDBM
AC_PACKAGE_WANT_AIO
+AC_PACKAGE_WANT_URING
AC_PACKAGE_WANT_DMAPI
AC_PACKAGE_WANT_LINUX_FIEMAP_H
AC_PACKAGE_WANT_FALLOCATE
diff --git a/include/builddefs.in b/include/builddefs.in
index e7894b1a..fded3230 100644
--- a/include/builddefs.in
+++ b/include/builddefs.in
@@ -61,6 +61,7 @@ RPM_VERSION = @rpm_version@
ENABLE_SHARED = @enable_shared@
HAVE_DB = @have_db@
HAVE_AIO = @have_aio@
+HAVE_URING = @have_uring@
HAVE_FALLOCATE = @have_fallocate@
HAVE_OPEN_BY_HANDLE_AT = @have_open_by_handle_at@
HAVE_DMAPI = @have_dmapi@
diff --git a/ltp/Makefile b/ltp/Makefile
index ebf40336..198d930f 100644
--- a/ltp/Makefile
+++ b/ltp/Makefile
@@ -24,6 +24,11 @@ LCFLAGS += -DAIO
LLDLIBS += -laio -lpthread
endif
+ifeq ($(HAVE_URING), true)
+LCFLAGS += -DURING
+LLDLIBS += -luring
+endif
+
ifeq ($(HAVE_LIBBTRFSUTIL), true)
LLDLIBS += -lbtrfsutil
endif
diff --git a/ltp/fsstress.c b/ltp/fsstress.c
index 709fdeec..a4188e1c 100644
--- a/ltp/fsstress.c
+++ b/ltp/fsstress.c
@@ -30,6 +30,11 @@
#include <libaio.h>
io_context_t io_ctx;
#endif
+#ifdef URING
+#include <liburing.h>
+#define URING_ENTRIES 1
+struct io_uring ring;
+#endif
#include <sys/syscall.h>
#include <sys/xattr.h>
@@ -139,6 +144,8 @@ typedef enum {
OP_TRUNCATE,
OP_UNLINK,
OP_UNRESVSP,
+ OP_URING_READ,
+ OP_URING_WRITE,
OP_WRITE,
OP_WRITEV,
OP_LAST
@@ -267,6 +274,8 @@ void sync_f(int, long);
void truncate_f(int, long);
void unlink_f(int, long);
void unresvsp_f(int, long);
+void uring_read_f(int, long);
+void uring_write_f(int, long);
void write_f(int, long);
void writev_f(int, long);
char *xattr_flag_to_string(int);
@@ -335,6 +344,8 @@ opdesc_t ops[] = {
{ OP_TRUNCATE, "truncate", truncate_f, 2, 1 },
{ OP_UNLINK, "unlink", unlink_f, 1, 1 },
{ OP_UNRESVSP, "unresvsp", unresvsp_f, 1, 1 },
+ { OP_URING_READ, "uring_read", uring_read_f, 1, 0 },
+ { OP_URING_WRITE, "uring_write", uring_write_f, 1, 1 },
{ OP_WRITE, "write", write_f, 4, 1 },
{ OP_WRITEV, "writev", writev_f, 4, 1 },
}, *ops_end;
@@ -692,6 +703,12 @@ int main(int argc, char **argv)
fprintf(stderr, "io_setup failed");
exit(1);
}
+#endif
+#ifdef URING
+ if (io_uring_queue_init(URING_ENTRIES, &ring, 0)) {
+ fprintf(stderr, "io_uring_queue_init failed\n");
+ exit(1);
+ }
#endif
for (i = 0; !loops || (i < loops); i++)
doproc();
@@ -701,7 +718,9 @@ int main(int argc, char **argv)
return 1;
}
#endif
-
+#ifdef URING
+ io_uring_queue_exit(&ring);
+#endif
cleanup_flist();
free(freq_table);
return 0;
@@ -2170,6 +2189,108 @@ do_aio_rw(int opno, long r, int flags)
}
#endif
+#ifdef URING
+void
+do_uring_rw(int opno, long r, int flags)
+{
+ char *buf;
+ int e;
+ pathname_t f;
+ int fd;
+ size_t len;
+ int64_t lr;
+ off64_t off;
+ struct stat64 stb;
+ int v;
+ char st[1024];
+ struct io_uring_sqe *sqe;
+ struct io_uring_cqe *cqe;
+ struct iovec iovec;
+ int iswrite = (flags & (O_WRONLY | O_RDWR)) ? 1 : 0;
+
+ init_pathname(&f);
+ if (!get_fname(FT_REGFILE, r, &f, NULL, NULL, &v)) {
+ if (v)
+ printf("%d/%d: do_uring_rw - no filename\n", procid, opno);
+ goto uring_out3;
+ }
+ fd = open_path(&f, flags);
+ e = fd < 0 ? errno : 0;
+ check_cwd();
+ if (fd < 0) {
+ if (v)
+ printf("%d/%d: do_uring_rw - open %s failed %d\n",
+ procid, opno, f.path, e);
+ goto uring_out3;
+ }
+ if (fstat64(fd, &stb) < 0) {
+ if (v)
+ printf("%d/%d: do_uring_rw - fstat64 %s failed %d\n",
+ procid, opno, f.path, errno);
+ goto uring_out2;
+ }
+ inode_info(st, sizeof(st), &stb, v);
+ if (!iswrite && stb.st_size == 0) {
+ if (v)
+ printf("%d/%d: do_uring_rw - %s%s zero size\n", procid, opno,
+ f.path, st);
+ goto uring_out2;
+ }
+ sqe = io_uring_get_sqe(&ring);
+ if (!sqe) {
+ if (v)
+ printf("%d/%d: do_uring_rw - io_uring_get_sqe failed\n",
+ procid, opno);
+ goto uring_out2;
+ }
+ lr = ((int64_t)random() << 32) + random();
+ len = (random() % FILELEN_MAX) + 1;
+ buf = malloc(len);
+ if (!buf) {
+ if (v)
+ printf("%d/%d: do_uring_rw - malloc failed\n",
+ procid, opno);
+ goto uring_out2;
+ }
+ iovec.iov_base = buf;
+ iovec.iov_len = len;
+ if (iswrite) {
+ off = (off64_t)(lr % MIN(stb.st_size + (1024 * 1024), MAXFSIZE));
+ off %= maxfsize;
+ memset(buf, nameseq & 0xff, len);
+ io_uring_prep_writev(sqe, fd, &iovec, 1, off);
+ } else {
+ off = (off64_t)(lr % stb.st_size);
+ io_uring_prep_readv(sqe, fd, &iovec, 1, off);
+ }
+
+ if ((e = io_uring_submit(&ring)) != 1) {
+ if (v)
+ printf("%d/%d: %s - io_uring_submit failed %d\n", procid, opno,
+ iswrite ? "uring_write" : "uring_read", e);
+ goto uring_out1;
+ }
+ if ((e = io_uring_wait_cqe(&ring, &cqe)) < 0) {
+ if (v)
+ printf("%d/%d: %s - io_uring_wait_cqe failed %d\n", procid, opno,
+ iswrite ? "uring_write" : "uring_read", e);
+ goto uring_out1;
+ }
+ if (v)
+ printf("%d/%d: %s %s%s [%lld, %d(res=%d)] %d\n",
+ procid, opno, iswrite ? "uring_write" : "uring_read",
+ f.path, st, (long long)off, (int)len, cqe->res, e);
+ io_uring_cqe_seen(&ring, cqe);
+
+ uring_out1:
+ free(buf);
+ uring_out2:
+ close(fd);
+ uring_out3:
+ free_pathname(&f);
+}
+#endif
+
void
aread_f(int opno, long r)
{
@@ -5044,6 +5165,22 @@ unresvsp_f(int opno, long r)
close(fd);
}
+void
+uring_read_f(int opno, long r)
+{
+#ifdef URING
+ do_uring_rw(opno, r, O_RDONLY);
+#endif
+}
+
+void
+uring_write_f(int opno, long r)
+{
+#ifdef URING
+ do_uring_rw(opno, r, O_WRONLY);
+#endif
+}
+
void
write_f(int opno, long r)
{
diff --git a/m4/Makefile b/m4/Makefile
index 7fbff822..0352534d 100644
--- a/m4/Makefile
+++ b/m4/Makefile
@@ -14,6 +14,7 @@ LSRCFILES = \
package_dmapidev.m4 \
package_globals.m4 \
package_libcdev.m4 \
+ package_liburing.m4 \
package_ncurses.m4 \
package_pthread.m4 \
package_ssldev.m4 \
diff --git a/m4/package_liburing.m4 b/m4/package_liburing.m4
new file mode 100644
index 00000000..c92cc02a
--- /dev/null
+++ b/m4/package_liburing.m4
@@ -0,0 +1,4 @@
+AC_DEFUN([AC_PACKAGE_WANT_URING],
+ [ AC_CHECK_HEADERS(liburing.h, [ have_uring=true ], [ have_uring=false ])
+ AC_SUBST(have_uring)
+ ])
--
2.20.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH v2 1/4] fsstress: add IO_URING read and write operations
2020-08-09 6:30 ` [PATCH v2 1/4] fsstress: add IO_URING read and write operations Zorro Lang
@ 2020-08-09 17:51 ` Jens Axboe
2020-08-22 18:14 ` Zorro Lang
0 siblings, 1 reply; 8+ messages in thread
From: Jens Axboe @ 2020-08-09 17:51 UTC (permalink / raw)
To: Zorro Lang, fstests; +Cc: io-uring, jmoyer
On 8/9/20 12:30 AM, Zorro Lang wrote:
> @@ -2170,6 +2189,108 @@ do_aio_rw(int opno, long r, int flags)
> }
> #endif
>
> +#ifdef URING
> +void
> +do_uring_rw(int opno, long r, int flags)
> +{
> + char *buf;
> + int e;
> + pathname_t f;
> + int fd;
> + size_t len;
> + int64_t lr;
> + off64_t off;
> + struct stat64 stb;
> + int v;
> + char st[1024];
> + struct io_uring_sqe *sqe;
> + struct io_uring_cqe *cqe;
> + struct iovec iovec;
> + int iswrite = (flags & (O_WRONLY | O_RDWR)) ? 1 : 0;
> +
> + init_pathname(&f);
> + if (!get_fname(FT_REGFILE, r, &f, NULL, NULL, &v)) {
> + if (v)
> + printf("%d/%d: do_uring_rw - no filename\n", procid, opno);
> + goto uring_out3;
> + }
> + fd = open_path(&f, flags);
> + e = fd < 0 ? errno : 0;
> + check_cwd();
> + if (fd < 0) {
> + if (v)
> + printf("%d/%d: do_uring_rw - open %s failed %d\n",
> + procid, opno, f.path, e);
> + goto uring_out3;
> + }
> + if (fstat64(fd, &stb) < 0) {
> + if (v)
> + printf("%d/%d: do_uring_rw - fstat64 %s failed %d\n",
> + procid, opno, f.path, errno);
> + goto uring_out2;
> + }
> + inode_info(st, sizeof(st), &stb, v);
> + if (!iswrite && stb.st_size == 0) {
> + if (v)
> + printf("%d/%d: do_uring_rw - %s%s zero size\n", procid, opno,
> + f.path, st);
> + goto uring_out2;
> + }
> + sqe = io_uring_get_sqe(&ring);
> + if (!sqe) {
> + if (v)
> + printf("%d/%d: do_uring_rw - io_uring_get_sqe failed\n",
> + procid, opno);
> + goto uring_out2;
> + }
> + lr = ((int64_t)random() << 32) + random();
> + len = (random() % FILELEN_MAX) + 1;
> + buf = malloc(len);
> + if (!buf) {
> + if (v)
> + printf("%d/%d: do_uring_rw - malloc failed\n",
> + procid, opno);
> + goto uring_out2;
> + }
> + iovec.iov_base = buf;
> + iovec.iov_len = len;
> + if (iswrite) {
> + off = (off64_t)(lr % MIN(stb.st_size + (1024 * 1024), MAXFSIZE));
> + off %= maxfsize;
> + memset(buf, nameseq & 0xff, len);
> + io_uring_prep_writev(sqe, fd, &iovec, 1, off);
> + } else {
> + off = (off64_t)(lr % stb.st_size);
> + io_uring_prep_readv(sqe, fd, &iovec, 1, off);
> + }
> +
> + if ((e = io_uring_submit(&ring)) != 1) {
> + if (v)
> + printf("%d/%d: %s - io_uring_submit failed %d\n", procid, opno,
> + iswrite ? "uring_write" : "uring_read", e);
> + goto uring_out1;
> + }
> + if ((e = io_uring_wait_cqe(&ring, &cqe)) < 0) {
> + if (v)
> + printf("%d/%d: %s - io_uring_wait_cqe failed %d\n", procid, opno,
> + iswrite ? "uring_write" : "uring_read", e);
> + goto uring_out1;
> + }
You could use io_uring_submit_and_wait() here, that'll save a system
call for sync IO. Same comment goes for 4/4.
Apart from that, looks pretty straight forward to me.
--
Jens Axboe
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v2 1/4] fsstress: add IO_URING read and write operations
2020-08-09 17:51 ` Jens Axboe
@ 2020-08-22 18:14 ` Zorro Lang
2020-08-22 18:05 ` Jens Axboe
0 siblings, 1 reply; 8+ messages in thread
From: Zorro Lang @ 2020-08-22 18:14 UTC (permalink / raw)
To: Jens Axboe; +Cc: fstests, io-uring, jmoyer
On Sun, Aug 09, 2020 at 11:51:45AM -0600, Jens Axboe wrote:
> On 8/9/20 12:30 AM, Zorro Lang wrote:
> > @@ -2170,6 +2189,108 @@ do_aio_rw(int opno, long r, int flags)
> > }
> > #endif
> >
> > +#ifdef URING
> > +void
> > +do_uring_rw(int opno, long r, int flags)
> > +{
> > + char *buf;
> > + int e;
> > + pathname_t f;
> > + int fd;
> > + size_t len;
> > + int64_t lr;
> > + off64_t off;
> > + struct stat64 stb;
> > + int v;
> > + char st[1024];
> > + struct io_uring_sqe *sqe;
> > + struct io_uring_cqe *cqe;
> > + struct iovec iovec;
> > + int iswrite = (flags & (O_WRONLY | O_RDWR)) ? 1 : 0;
> > +
> > + init_pathname(&f);
> > + if (!get_fname(FT_REGFILE, r, &f, NULL, NULL, &v)) {
> > + if (v)
> > + printf("%d/%d: do_uring_rw - no filename\n", procid, opno);
> > + goto uring_out3;
> > + }
> > + fd = open_path(&f, flags);
> > + e = fd < 0 ? errno : 0;
> > + check_cwd();
> > + if (fd < 0) {
> > + if (v)
> > + printf("%d/%d: do_uring_rw - open %s failed %d\n",
> > + procid, opno, f.path, e);
> > + goto uring_out3;
> > + }
> > + if (fstat64(fd, &stb) < 0) {
> > + if (v)
> > + printf("%d/%d: do_uring_rw - fstat64 %s failed %d\n",
> > + procid, opno, f.path, errno);
> > + goto uring_out2;
> > + }
> > + inode_info(st, sizeof(st), &stb, v);
> > + if (!iswrite && stb.st_size == 0) {
> > + if (v)
> > + printf("%d/%d: do_uring_rw - %s%s zero size\n", procid, opno,
> > + f.path, st);
> > + goto uring_out2;
> > + }
> > + sqe = io_uring_get_sqe(&ring);
> > + if (!sqe) {
> > + if (v)
> > + printf("%d/%d: do_uring_rw - io_uring_get_sqe failed\n",
> > + procid, opno);
> > + goto uring_out2;
> > + }
> > + lr = ((int64_t)random() << 32) + random();
> > + len = (random() % FILELEN_MAX) + 1;
> > + buf = malloc(len);
> > + if (!buf) {
> > + if (v)
> > + printf("%d/%d: do_uring_rw - malloc failed\n",
> > + procid, opno);
> > + goto uring_out2;
> > + }
> > + iovec.iov_base = buf;
> > + iovec.iov_len = len;
> > + if (iswrite) {
> > + off = (off64_t)(lr % MIN(stb.st_size + (1024 * 1024), MAXFSIZE));
> > + off %= maxfsize;
> > + memset(buf, nameseq & 0xff, len);
> > + io_uring_prep_writev(sqe, fd, &iovec, 1, off);
> > + } else {
> > + off = (off64_t)(lr % stb.st_size);
> > + io_uring_prep_readv(sqe, fd, &iovec, 1, off);
> > + }
> > +
> > + if ((e = io_uring_submit(&ring)) != 1) {
> > + if (v)
> > + printf("%d/%d: %s - io_uring_submit failed %d\n", procid, opno,
> > + iswrite ? "uring_write" : "uring_read", e);
> > + goto uring_out1;
> > + }
> > + if ((e = io_uring_wait_cqe(&ring, &cqe)) < 0) {
> > + if (v)
> > + printf("%d/%d: %s - io_uring_wait_cqe failed %d\n", procid, opno,
> > + iswrite ? "uring_write" : "uring_read", e);
> > + goto uring_out1;
> > + }
>
> You could use io_uring_submit_and_wait() here, that'll save a system
> call for sync IO. Same comment goes for 4/4.
Hi Jens,
Sorry I think I haven't learned about io_uring enough, why the
io_uring_submit_and_wait can save a system call? Is it same with
io_uring_submit(), except a wait_nr ? The io_uring_wait_cqe() and
io_uring_cqe_seen() are still needed, right?
Thanks,
Zorro
>
> Apart from that, looks pretty straight forward to me.
>
> --
> Jens Axboe
>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v2 1/4] fsstress: add IO_URING read and write operations
2020-08-22 18:14 ` Zorro Lang
@ 2020-08-22 18:05 ` Jens Axboe
0 siblings, 0 replies; 8+ messages in thread
From: Jens Axboe @ 2020-08-22 18:05 UTC (permalink / raw)
To: fstests, io-uring, jmoyer
On 8/22/20 12:14 PM, Zorro Lang wrote:
>>> + if ((e = io_uring_submit(&ring)) != 1) {
>>> + if (v)
>>> + printf("%d/%d: %s - io_uring_submit failed %d\n", procid, opno,
>>> + iswrite ? "uring_write" : "uring_read", e);
>>> + goto uring_out1;
>>> + }
>>> + if ((e = io_uring_wait_cqe(&ring, &cqe)) < 0) {
>>> + if (v)
>>> + printf("%d/%d: %s - io_uring_wait_cqe failed %d\n", procid, opno,
>>> + iswrite ? "uring_write" : "uring_read", e);
>>> + goto uring_out1;
>>> + }
>>
>> You could use io_uring_submit_and_wait() here, that'll save a system
>> call for sync IO. Same comment goes for 4/4.
>
> Hi Jens,
>
> Sorry I think I haven't learned about io_uring enough, why the
> io_uring_submit_and_wait can save a system call? Is it same with
> io_uring_submit(), except a wait_nr ? The io_uring_wait_cqe() and
> io_uring_cqe_seen() are still needed, right?
If you just call io_uring_submit(), it'll enter the kernel and submit
that IO. Then right after that you're saying "I want to wait for
completion of a request", which is then another system call. If you do
io_uring_submit_and_wait() you're entering the kernel with the intent of
"submit my request(s), and wait for N requests" hence only doing a
single system call even though it's an async interface.
Nothing else changes, io_uring_wait_cqe() will not enter the kernel if a
cqe is available in the ring already.
--
Jens Axboe
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH v2 2/4] fsstress: reduce the number of events when io_setup
2020-08-09 6:30 [PATCH v2 0/4] fsstress,fsx: add io_uring test and do some fix Zorro Lang
2020-08-09 6:30 ` [PATCH v2 1/4] fsstress: add IO_URING read and write operations Zorro Lang
@ 2020-08-09 6:30 ` Zorro Lang
2020-08-09 6:30 ` [PATCH v2 3/4] fsstress: fix memory leak in do_aio_rw Zorro Lang
2020-08-09 6:30 ` [PATCH v2 4/4] fsx: add IO_URING test Zorro Lang
3 siblings, 0 replies; 8+ messages in thread
From: Zorro Lang @ 2020-08-09 6:30 UTC (permalink / raw)
To: fstests; +Cc: io-uring, jmoyer
The original number(128) of aio events for io_setup too big. When try
to run lots of fsstress processes(e.g. -p 1000) always hit io_setup
EAGAIN error, due to the nr_events exceeds the limit of available
events. Due to each fsstress process only does once libaio read/write
operation each time. So reduce the aio events number to 1, to make more
fsstress processes can do AIO test.
Signed-off-by: Zorro Lang <[email protected]>
---
ltp/fsstress.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/ltp/fsstress.c b/ltp/fsstress.c
index a4188e1c..0e7be6bb 100644
--- a/ltp/fsstress.c
+++ b/ltp/fsstress.c
@@ -28,6 +28,7 @@
#endif
#ifdef AIO
#include <libaio.h>
+#define AIO_ENTRIES 1
io_context_t io_ctx;
#endif
#ifdef URING
@@ -699,8 +700,8 @@ int main(int argc, char **argv)
}
procid = i;
#ifdef AIO
- if (io_setup(128, &io_ctx) != 0) {
- fprintf(stderr, "io_setup failed");
+ if (io_setup(AIO_ENTRIES, &io_ctx) != 0) {
+ fprintf(stderr, "io_setup failed\n");
exit(1);
}
#endif
--
2.20.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH v2 3/4] fsstress: fix memory leak in do_aio_rw
2020-08-09 6:30 [PATCH v2 0/4] fsstress,fsx: add io_uring test and do some fix Zorro Lang
2020-08-09 6:30 ` [PATCH v2 1/4] fsstress: add IO_URING read and write operations Zorro Lang
2020-08-09 6:30 ` [PATCH v2 2/4] fsstress: reduce the number of events when io_setup Zorro Lang
@ 2020-08-09 6:30 ` Zorro Lang
2020-08-09 6:30 ` [PATCH v2 4/4] fsx: add IO_URING test Zorro Lang
3 siblings, 0 replies; 8+ messages in thread
From: Zorro Lang @ 2020-08-09 6:30 UTC (permalink / raw)
To: fstests; +Cc: io-uring, jmoyer
If io_submit or io_getevents fails, the do_aio_rw() won't free the
"buf" and cause memory leak.
Signed-off-by: Zorro Lang <[email protected]>
---
ltp/fsstress.c | 31 ++++++++++++++++---------------
1 file changed, 16 insertions(+), 15 deletions(-)
diff --git a/ltp/fsstress.c b/ltp/fsstress.c
index 0e7be6bb..4b255bf6 100644
--- a/ltp/fsstress.c
+++ b/ltp/fsstress.c
@@ -2099,8 +2099,7 @@ do_aio_rw(int opno, long r, int flags)
if (!get_fname(FT_REGFILE, r, &f, NULL, NULL, &v)) {
if (v)
printf("%d/%d: do_aio_rw - no filename\n", procid, opno);
- free_pathname(&f);
- return;
+ goto aio_out3;
}
fd = open_path(&f, flags|O_DIRECT);
e = fd < 0 ? errno : 0;
@@ -2109,16 +2108,13 @@ do_aio_rw(int opno, long r, int flags)
if (v)
printf("%d/%d: do_aio_rw - open %s failed %d\n",
procid, opno, f.path, e);
- free_pathname(&f);
- return;
+ goto aio_out3;
}
if (fstat64(fd, &stb) < 0) {
if (v)
printf("%d/%d: do_aio_rw - fstat64 %s failed %d\n",
procid, opno, f.path, errno);
- free_pathname(&f);
- close(fd);
- return;
+ goto aio_out2;
}
inode_info(st, sizeof(st), &stb, v);
if (!iswrite && stb.st_size == 0) {
@@ -2150,6 +2146,12 @@ do_aio_rw(int opno, long r, int flags)
else if (len > diob.d_maxiosz)
len = diob.d_maxiosz;
buf = memalign(diob.d_mem, len);
+ if (!buf) {
+ if (v)
+ printf("%d/%d: do_aio_rw - memalign failed\n",
+ procid, opno);
+ goto aio_out2;
+ }
if (iswrite) {
off = (off64_t)(lr % MIN(stb.st_size + (1024 * 1024), MAXFSIZE));
@@ -2166,27 +2168,26 @@ do_aio_rw(int opno, long r, int flags)
if (v)
printf("%d/%d: %s - io_submit failed %d\n",
procid, opno, iswrite ? "awrite" : "aread", e);
- free_pathname(&f);
- close(fd);
- return;
+ goto aio_out1;
}
if ((e = io_getevents(io_ctx, 1, 1, &event, NULL)) != 1) {
if (v)
printf("%d/%d: %s - io_getevents failed %d\n",
procid, opno, iswrite ? "awrite" : "aread", e);
- free_pathname(&f);
- close(fd);
- return;
+ goto aio_out1;
}
e = event.res != len ? event.res2 : 0;
- free(buf);
if (v)
printf("%d/%d: %s %s%s [%lld,%d] %d\n",
procid, opno, iswrite ? "awrite" : "aread",
f.path, st, (long long)off, (int)len, e);
- free_pathname(&f);
+ aio_out1:
+ free(buf);
+ aio_out2:
close(fd);
+ aio_out3:
+ free_pathname(&f);
}
#endif
--
2.20.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH v2 4/4] fsx: add IO_URING test
2020-08-09 6:30 [PATCH v2 0/4] fsstress,fsx: add io_uring test and do some fix Zorro Lang
` (2 preceding siblings ...)
2020-08-09 6:30 ` [PATCH v2 3/4] fsstress: fix memory leak in do_aio_rw Zorro Lang
@ 2020-08-09 6:30 ` Zorro Lang
3 siblings, 0 replies; 8+ messages in thread
From: Zorro Lang @ 2020-08-09 6:30 UTC (permalink / raw)
To: fstests; +Cc: io-uring, jmoyer
New IO_URING test for fsx, use -U option to enable IO_URING test.
Signed-off-by: Zorro Lang <[email protected]>
---
ltp/fsx.c | 158 +++++++++++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 144 insertions(+), 14 deletions(-)
diff --git a/ltp/fsx.c b/ltp/fsx.c
index 7c76655a..7475bf9d 100644
--- a/ltp/fsx.c
+++ b/ltp/fsx.c
@@ -34,6 +34,9 @@
#ifdef AIO
#include <libaio.h>
#endif
+#ifdef URING
+#include <liburing.h>
+#endif
#include <sys/syscall.h>
#ifndef MAP_FILE
@@ -176,21 +179,17 @@ int integrity = 0; /* -i flag */
int fsxgoodfd = 0;
int o_direct; /* -Z */
int aio = 0;
+int uring = 0;
int mark_nr = 0;
int page_size;
int page_mask;
int mmap_mask;
-#ifdef AIO
-int aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset);
+int fsx_rw(int rw, int fd, char *buf, unsigned len, unsigned offset);
#define READ 0
#define WRITE 1
-#define fsxread(a,b,c,d) aio_rw(READ, a,b,c,d)
-#define fsxwrite(a,b,c,d) aio_rw(WRITE, a,b,c,d)
-#else
-#define fsxread(a,b,c,d) read(a,b,c)
-#define fsxwrite(a,b,c,d) write(a,b,c)
-#endif
+#define fsxread(a,b,c,d) fsx_rw(READ, a,b,c,d)
+#define fsxwrite(a,b,c,d) fsx_rw(WRITE, a,b,c,d)
const char *replayops = NULL;
const char *recordops = NULL;
@@ -2242,7 +2241,7 @@ void
usage(void)
{
fprintf(stdout, "usage: %s",
- "fsx [-dknqxABEFJLOWZ] [-b opnum] [-c Prob] [-g filldata] [-i logdev] [-j logid] [-l flen] [-m start:end] [-o oplen] [-p progressinterval] [-r readbdy] [-s style] [-t truncbdy] [-w writebdy] [-D startingop] [-N numops] [-P dirpath] [-S seed] fname\n\
+ "fsx [-dknqxBEFJLOWZ][-A|-U] [-b opnum] [-c Prob] [-g filldata] [-i logdev] [-j logid] [-l flen] [-m start:end] [-o oplen] [-p progressinterval] [-r readbdy] [-s style] [-t truncbdy] [-w writebdy] [-D startingop] [-N numops] [-P dirpath] [-S seed] fname\n\
-b opnum: beginning operation number (default 1)\n\
-c P: 1 in P chance of file close+open at each op (default infinity)\n\
-d: debug output for all operations\n\
@@ -2265,7 +2264,10 @@ usage(void)
-y synchronize changes to a file\n"
#ifdef AIO
-" -A: Use the AIO system calls\n"
+" -A: Use the AIO system calls, -A excludes -U\n"
+#endif
+#ifdef URING
+" -U: Use the IO_URING system calls, -U excludes -A\n"
#endif
" -D startingop: debug output starting at specified operation\n"
#ifdef HAVE_LINUX_FALLOC_H
@@ -2425,13 +2427,131 @@ out_error:
errno = -ret;
return -1;
}
+#endif
+
+#ifdef URING
+struct io_uring ring;
+#define URING_ENTRIES 1024
+int
+uring_setup()
+{
+ int ret;
+
+ ret = io_uring_queue_init(URING_ENTRIES, &ring, 0);
+ if (ret != 0) {
+ fprintf(stderr, "uring_setup: io_uring_queue_init failed: %s\n",
+ strerror(ret));
+ return -1;
+ }
+ return 0;
+}
-int aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
+int
+__uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
{
+ struct io_uring_sqe *sqe;
+ struct io_uring_cqe *cqe;
+ struct iovec iovec;
int ret;
+ int res, res2 = 0;
+ char *p = buf;
+ unsigned l = len;
+ unsigned o = offset;
+
+
+ /*
+ * Due to io_uring tries non-blocking IOs (especially read), that
+ * always cause 'normal' short reading. To avoid this short read
+ * fail, try to loop read/write (escpecilly read) data.
+ */
+ uring_loop:
+ sqe = io_uring_get_sqe(&ring);
+ if (!sqe) {
+ fprintf(stderr, "uring_rw: io_uring_get_sqe failed: %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ iovec.iov_base = p;
+ iovec.iov_len = l;
+ if (rw == READ) {
+ io_uring_prep_readv(sqe, fd, &iovec, 1, o);
+ } else {
+ io_uring_prep_writev(sqe, fd, &iovec, 1, o);
+ }
+
+ ret = io_uring_submit(&ring);
+ if (ret != 1) {
+ fprintf(stderr, "errcode=%d\n", -ret);
+ fprintf(stderr, "uring %s: io_uring_submit failed: %s\n",
+ rw == READ ? "read":"write", strerror(-ret));
+ goto uring_error;
+ }
+
+ ret = io_uring_wait_cqe(&ring, &cqe);
+ if (ret < 0) {
+ if (ret == 0)
+ fprintf(stderr, "uring %s: no events available\n",
+ rw == READ ? "read":"write");
+ else {
+ fprintf(stderr, "errcode=%d\n", -ret);
+ fprintf(stderr, "uring %s: io_uring_wait_cqe failed: %s\n",
+ rw == READ ? "read":"write", strerror(-ret));
+ }
+ goto uring_error;
+ }
+ res = cqe->res;
+ io_uring_cqe_seen(&ring, cqe);
+
+ res2 += res;
+ if (len != res2) {
+ if (res > 0) {
+ o += res;
+ l -= res;
+ p += res;
+ if (l > 0)
+ goto uring_loop;
+ } else if (res < 0) {
+ ret = res;
+ fprintf(stderr, "errcode=%d\n", -ret);
+ fprintf(stderr, "uring %s: io_uring failed: %s\n",
+ rw == READ ? "read":"write", strerror(-ret));
+ goto uring_error;
+ } else {
+ fprintf(stderr, "uring %s bad io length: %d instead of %u\n",
+ rw == READ ? "read":"write", res2, len);
+ }
+ }
+ return res2;
+
+ uring_error:
+ /*
+ * The caller expects error return in traditional libc
+ * convention, i.e. -1 and the errno set to error.
+ */
+ errno = -ret;
+ return -1;
+}
+#endif
+
+int fsx_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
+{
+ int ret = -1;
if (aio) {
+#ifdef AIO
ret = __aio_rw(rw, fd, buf, len, offset);
+#elif
+ fprintf(stderr, "io_rw: need AIO support!\n");
+ exit(111);
+#endif
+ } else if (uring) {
+#ifdef URING
+ ret = __uring_rw(rw, fd, buf, len, offset);
+#elif
+ fprintf(stderr, "io_rw: need IO_URING support!\n");
+ exit(111);
+#endif
} else {
if (rw == READ)
ret = read(fd, buf, len);
@@ -2441,8 +2561,6 @@ int aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
return ret;
}
-#endif
-
#define test_fallocate(mode) __test_fallocate(mode, #mode)
int
@@ -2496,7 +2614,7 @@ main(int argc, char **argv)
setvbuf(stdout, (char *)0, _IOLBF, 0); /* line buffered stdout */
while ((ch = getopt_long(argc, argv,
- "b:c:dfg:i:j:kl:m:no:p:qr:s:t:w:xyABD:EFJKHzCILN:OP:RS:WXZ",
+ "b:c:dfg:i:j:kl:m:no:p:qr:s:t:w:xyABD:EFJKHzCILN:OP:RS:UWXZ",
longopts, NULL)) != EOF)
switch (ch) {
case 'b':
@@ -2604,6 +2722,9 @@ main(int argc, char **argv)
case 'A':
aio = 1;
break;
+ case 'U':
+ uring = 1;
+ break;
case 'D':
debugstart = getnum(optarg, &endp);
if (debugstart < 1)
@@ -2694,6 +2815,11 @@ main(int argc, char **argv)
if (argc != 1)
usage();
+ if (aio && uring) {
+ fprintf(stderr, "-A and -U shouldn't be used together\n");
+ usage();
+ }
+
if (integrity && !dirpath) {
fprintf(stderr, "option -i <logdev> requires -P <dirpath>\n");
usage();
@@ -2784,6 +2910,10 @@ main(int argc, char **argv)
if (aio)
aio_setup();
#endif
+#ifdef URING
+ if (uring)
+ uring_setup();
+#endif
if (!(o_flags & O_TRUNC)) {
off_t ret;
--
2.20.1
^ permalink raw reply related [flat|nested] 8+ messages in thread