* [PATCH 01/12] block: read-ahead submission should imply no-wait as well
2020-05-24 19:21 [PATCHSET v4 0/12] Add support for async buffered reads Jens Axboe
@ 2020-05-24 19:21 ` Jens Axboe
2020-05-24 19:21 ` [PATCH 02/12] mm: allow read-ahead with IOCB_NOWAIT set Jens Axboe
` (10 subsequent siblings)
11 siblings, 0 replies; 15+ messages in thread
From: Jens Axboe @ 2020-05-24 19:21 UTC (permalink / raw)
To: io-uring; +Cc: linux-fsdevel, linux-kernel, linux-mm, Jens Axboe
As read-ahead is opportunistic, don't block for request allocation.
Signed-off-by: Jens Axboe <[email protected]>
---
include/linux/blk_types.h | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index ccb895f911b1..c296463c15eb 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -374,7 +374,8 @@ enum req_flag_bits {
#define REQ_INTEGRITY (1ULL << __REQ_INTEGRITY)
#define REQ_FUA (1ULL << __REQ_FUA)
#define REQ_PREFLUSH (1ULL << __REQ_PREFLUSH)
-#define REQ_RAHEAD (1ULL << __REQ_RAHEAD)
+#define REQ_RAHEAD \
+ ((1ULL << __REQ_RAHEAD) | (1ULL << __REQ_NOWAIT))
#define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND)
#define REQ_NOWAIT (1ULL << __REQ_NOWAIT)
#define REQ_CGROUP_PUNT (1ULL << __REQ_CGROUP_PUNT)
--
2.26.2
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH 02/12] mm: allow read-ahead with IOCB_NOWAIT set
2020-05-24 19:21 [PATCHSET v4 0/12] Add support for async buffered reads Jens Axboe
2020-05-24 19:21 ` [PATCH 01/12] block: read-ahead submission should imply no-wait as well Jens Axboe
@ 2020-05-24 19:21 ` Jens Axboe
2020-05-24 19:21 ` [PATCH 03/12] mm: abstract out wake_page_match() from wake_page_function() Jens Axboe
` (9 subsequent siblings)
11 siblings, 0 replies; 15+ messages in thread
From: Jens Axboe @ 2020-05-24 19:21 UTC (permalink / raw)
To: io-uring; +Cc: linux-fsdevel, linux-kernel, linux-mm, Jens Axboe
The read-ahead shouldn't block, so allow it to be done even if
IOCB_NOWAIT is set in the kiocb.
Signed-off-by: Jens Axboe <[email protected]>
---
mm/filemap.c | 2 --
1 file changed, 2 deletions(-)
diff --git a/mm/filemap.c b/mm/filemap.c
index 23a051a7ef0f..80747f1377d5 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2031,8 +2031,6 @@ static ssize_t generic_file_buffered_read(struct kiocb *iocb,
page = find_get_page(mapping, index);
if (!page) {
- if (iocb->ki_flags & IOCB_NOWAIT)
- goto would_block;
page_cache_sync_readahead(mapping,
ra, filp,
index, last_index - index);
--
2.26.2
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH 03/12] mm: abstract out wake_page_match() from wake_page_function()
2020-05-24 19:21 [PATCHSET v4 0/12] Add support for async buffered reads Jens Axboe
2020-05-24 19:21 ` [PATCH 01/12] block: read-ahead submission should imply no-wait as well Jens Axboe
2020-05-24 19:21 ` [PATCH 02/12] mm: allow read-ahead with IOCB_NOWAIT set Jens Axboe
@ 2020-05-24 19:21 ` Jens Axboe
2020-05-24 19:21 ` [PATCH 04/12] mm: add support for async page locking Jens Axboe
` (8 subsequent siblings)
11 siblings, 0 replies; 15+ messages in thread
From: Jens Axboe @ 2020-05-24 19:21 UTC (permalink / raw)
To: io-uring; +Cc: linux-fsdevel, linux-kernel, linux-mm, Jens Axboe
No functional changes in this patch, just in preparation for allowing
more callers.
Signed-off-by: Jens Axboe <[email protected]>
---
include/linux/pagemap.h | 37 +++++++++++++++++++++++++++++++++++++
mm/filemap.c | 35 ++++-------------------------------
2 files changed, 41 insertions(+), 31 deletions(-)
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index a8f7bd8ea1c6..53d980f2208d 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -456,6 +456,43 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
return pgoff;
}
+/* This has the same layout as wait_bit_key - see fs/cachefiles/rdwr.c */
+struct wait_page_key {
+ struct page *page;
+ int bit_nr;
+ int page_match;
+};
+
+struct wait_page_queue {
+ struct page *page;
+ int bit_nr;
+ wait_queue_entry_t wait;
+};
+
+static inline int wake_page_match(struct wait_page_queue *wait_page,
+ struct wait_page_key *key)
+{
+ if (wait_page->page != key->page)
+ return 0;
+ key->page_match = 1;
+
+ if (wait_page->bit_nr != key->bit_nr)
+ return 0;
+
+ /*
+ * Stop walking if it's locked.
+ * Is this safe if put_and_wait_on_page_locked() is in use?
+ * Yes: the waker must hold a reference to this page, and if PG_locked
+ * has now already been set by another task, that task must also hold
+ * a reference to the *same usage* of this page; so there is no need
+ * to walk on to wake even the put_and_wait_on_page_locked() callers.
+ */
+ if (test_bit(key->bit_nr, &key->page->flags))
+ return -1;
+
+ return 1;
+}
+
extern void __lock_page(struct page *page);
extern int __lock_page_killable(struct page *page);
extern int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
diff --git a/mm/filemap.c b/mm/filemap.c
index 80747f1377d5..e891b5bee8fd 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -990,43 +990,16 @@ void __init pagecache_init(void)
page_writeback_init();
}
-/* This has the same layout as wait_bit_key - see fs/cachefiles/rdwr.c */
-struct wait_page_key {
- struct page *page;
- int bit_nr;
- int page_match;
-};
-
-struct wait_page_queue {
- struct page *page;
- int bit_nr;
- wait_queue_entry_t wait;
-};
-
static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *arg)
{
struct wait_page_key *key = arg;
struct wait_page_queue *wait_page
= container_of(wait, struct wait_page_queue, wait);
+ int ret;
- if (wait_page->page != key->page)
- return 0;
- key->page_match = 1;
-
- if (wait_page->bit_nr != key->bit_nr)
- return 0;
-
- /*
- * Stop walking if it's locked.
- * Is this safe if put_and_wait_on_page_locked() is in use?
- * Yes: the waker must hold a reference to this page, and if PG_locked
- * has now already been set by another task, that task must also hold
- * a reference to the *same usage* of this page; so there is no need
- * to walk on to wake even the put_and_wait_on_page_locked() callers.
- */
- if (test_bit(key->bit_nr, &key->page->flags))
- return -1;
-
+ ret = wake_page_match(wait_page, key);
+ if (ret != 1)
+ return ret;
return autoremove_wake_function(wait, mode, sync, key);
}
--
2.26.2
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH 04/12] mm: add support for async page locking
2020-05-24 19:21 [PATCHSET v4 0/12] Add support for async buffered reads Jens Axboe
` (2 preceding siblings ...)
2020-05-24 19:21 ` [PATCH 03/12] mm: abstract out wake_page_match() from wake_page_function() Jens Axboe
@ 2020-05-24 19:21 ` Jens Axboe
2020-05-24 19:21 ` [PATCH 05/12] mm: support async buffered reads in generic_file_buffered_read() Jens Axboe
` (7 subsequent siblings)
11 siblings, 0 replies; 15+ messages in thread
From: Jens Axboe @ 2020-05-24 19:21 UTC (permalink / raw)
To: io-uring; +Cc: linux-fsdevel, linux-kernel, linux-mm, Jens Axboe
Normally waiting for a page to become unlocked, or locking the page,
requires waiting for IO to complete. Add support for lock_page_async()
and wait_on_page_locked_async(), which are callback based instead. This
allows a caller to get notified when a page becomes unlocked, rather
than wait for it.
We use the iocb->private field to pass in this necessary data for this
to happen. struct wait_page_key is made public, and we define struct
wait_page_async as the interface between the caller and the core.
Signed-off-by: Jens Axboe <[email protected]>
---
include/linux/fs.h | 7 ++++++-
include/linux/pagemap.h | 9 +++++++++
mm/filemap.c | 41 +++++++++++++++++++++++++++++++++++++++++
3 files changed, 56 insertions(+), 1 deletion(-)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7e84d823c6a8..5a5434ff7543 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -314,6 +314,8 @@ enum rw_hint {
#define IOCB_SYNC (1 << 5)
#define IOCB_WRITE (1 << 6)
#define IOCB_NOWAIT (1 << 7)
+/* iocb->ki_waitq is valid */
+#define IOCB_WAITQ (1 << 8)
struct kiocb {
struct file *ki_filp;
@@ -327,7 +329,10 @@ struct kiocb {
int ki_flags;
u16 ki_hint;
u16 ki_ioprio; /* See linux/ioprio.h */
- unsigned int ki_cookie; /* for ->iopoll */
+ union {
+ unsigned int ki_cookie; /* for ->iopoll */
+ struct wait_page_queue *ki_waitq; /* for async buffered IO */
+ };
randomized_struct_fields_end
};
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 53d980f2208d..d3e63c9c61ae 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -495,6 +495,7 @@ static inline int wake_page_match(struct wait_page_queue *wait_page,
extern void __lock_page(struct page *page);
extern int __lock_page_killable(struct page *page);
+extern int __lock_page_async(struct page *page, struct wait_page_queue *wait);
extern int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
unsigned int flags);
extern void unlock_page(struct page *page);
@@ -531,6 +532,14 @@ static inline int lock_page_killable(struct page *page)
return 0;
}
+static inline int lock_page_async(struct page *page,
+ struct wait_page_queue *wait)
+{
+ if (!trylock_page(page))
+ return __lock_page_async(page, wait);
+ return 0;
+}
+
/*
* lock_page_or_retry - Lock the page, unless this would block and the
* caller indicated that it can handle a retry.
diff --git a/mm/filemap.c b/mm/filemap.c
index e891b5bee8fd..c746541b1d49 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1183,6 +1183,42 @@ int wait_on_page_bit_killable(struct page *page, int bit_nr)
}
EXPORT_SYMBOL(wait_on_page_bit_killable);
+static int __wait_on_page_locked_async(struct page *page,
+ struct wait_page_queue *wait, bool set)
+{
+ struct wait_queue_head *q = page_waitqueue(page);
+ int ret = 0;
+
+ wait->page = page;
+ wait->bit_nr = PG_locked;
+
+ spin_lock_irq(&q->lock);
+ if (set)
+ ret = !trylock_page(page);
+ else
+ ret = PageLocked(page);
+ if (ret) {
+ __add_wait_queue_entry_tail(q, &wait->wait);
+ SetPageWaiters(page);
+ if (set)
+ ret = !trylock_page(page);
+ else
+ ret = PageLocked(page);
+ /*
+ * If we were succesful now, we know we're still on the
+ * waitqueue as we're still under the lock. This means it's
+ * safe to remove and return success, we know the callback
+ * isn't going to trigger.
+ */
+ if (!ret)
+ __remove_wait_queue(q, &wait->wait);
+ else
+ ret = -EIOCBQUEUED;
+ }
+ spin_unlock_irq(&q->lock);
+ return ret;
+}
+
/**
* put_and_wait_on_page_locked - Drop a reference and wait for it to be unlocked
* @page: The page to wait for.
@@ -1345,6 +1381,11 @@ int __lock_page_killable(struct page *__page)
}
EXPORT_SYMBOL_GPL(__lock_page_killable);
+int __lock_page_async(struct page *page, struct wait_page_queue *wait)
+{
+ return __wait_on_page_locked_async(page, wait, true);
+}
+
/*
* Return values:
* 1 - page is locked; mmap_sem is still held.
--
2.26.2
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH 05/12] mm: support async buffered reads in generic_file_buffered_read()
2020-05-24 19:21 [PATCHSET v4 0/12] Add support for async buffered reads Jens Axboe
` (3 preceding siblings ...)
2020-05-24 19:21 ` [PATCH 04/12] mm: add support for async page locking Jens Axboe
@ 2020-05-24 19:21 ` Jens Axboe
2020-05-24 19:22 ` [PATCH 06/12] fs: add FMODE_BUF_RASYNC Jens Axboe
` (6 subsequent siblings)
11 siblings, 0 replies; 15+ messages in thread
From: Jens Axboe @ 2020-05-24 19:21 UTC (permalink / raw)
To: io-uring; +Cc: linux-fsdevel, linux-kernel, linux-mm, Jens Axboe
Use the async page locking infrastructure, if IOCB_WAITQ is set in the
passed in iocb. The caller must expect an -EIOCBQUEUED return value,
which means that IO is started but not done yet. This is similar to how
O_DIRECT signals the same operation. Once the callback is received by
the caller for IO completion, the caller must retry the operation.
Signed-off-by: Jens Axboe <[email protected]>
---
mm/filemap.c | 33 ++++++++++++++++++++++++++-------
1 file changed, 26 insertions(+), 7 deletions(-)
diff --git a/mm/filemap.c b/mm/filemap.c
index c746541b1d49..18022de7dc33 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1219,6 +1219,14 @@ static int __wait_on_page_locked_async(struct page *page,
return ret;
}
+static int wait_on_page_locked_async(struct page *page,
+ struct wait_page_queue *wait)
+{
+ if (!PageLocked(page))
+ return 0;
+ return __wait_on_page_locked_async(compound_head(page), wait, false);
+}
+
/**
* put_and_wait_on_page_locked - Drop a reference and wait for it to be unlocked
* @page: The page to wait for.
@@ -2058,17 +2066,25 @@ static ssize_t generic_file_buffered_read(struct kiocb *iocb,
index, last_index - index);
}
if (!PageUptodate(page)) {
- if (iocb->ki_flags & IOCB_NOWAIT) {
- put_page(page);
- goto would_block;
- }
-
/*
* See comment in do_read_cache_page on why
* wait_on_page_locked is used to avoid unnecessarily
* serialisations and why it's safe.
*/
- error = wait_on_page_locked_killable(page);
+ if (iocb->ki_flags & IOCB_WAITQ) {
+ if (written) {
+ put_page(page);
+ goto out;
+ }
+ error = wait_on_page_locked_async(page,
+ iocb->ki_waitq);
+ } else {
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ put_page(page);
+ goto would_block;
+ }
+ error = wait_on_page_locked_killable(page);
+ }
if (unlikely(error))
goto readpage_error;
if (PageUptodate(page))
@@ -2156,7 +2172,10 @@ static ssize_t generic_file_buffered_read(struct kiocb *iocb,
page_not_up_to_date:
/* Get exclusive access to the page ... */
- error = lock_page_killable(page);
+ if (iocb->ki_flags & IOCB_WAITQ)
+ error = lock_page_async(page, iocb->ki_waitq);
+ else
+ error = lock_page_killable(page);
if (unlikely(error))
goto readpage_error;
--
2.26.2
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH 06/12] fs: add FMODE_BUF_RASYNC
2020-05-24 19:21 [PATCHSET v4 0/12] Add support for async buffered reads Jens Axboe
` (4 preceding siblings ...)
2020-05-24 19:21 ` [PATCH 05/12] mm: support async buffered reads in generic_file_buffered_read() Jens Axboe
@ 2020-05-24 19:22 ` Jens Axboe
2020-05-24 19:22 ` [PATCH 07/12] ext4: flag as supporting buffered async reads Jens Axboe
` (5 subsequent siblings)
11 siblings, 0 replies; 15+ messages in thread
From: Jens Axboe @ 2020-05-24 19:22 UTC (permalink / raw)
To: io-uring; +Cc: linux-fsdevel, linux-kernel, linux-mm, Jens Axboe
If set, this indicates that the file system supports IOCB_WAITQ for
buffered reads.
Signed-off-by: Jens Axboe <[email protected]>
---
include/linux/fs.h | 3 +++
1 file changed, 3 insertions(+)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5a5434ff7543..f7b1eb765c6e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -175,6 +175,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
/* File does not contribute to nr_files count */
#define FMODE_NOACCOUNT ((__force fmode_t)0x20000000)
+/* File supports async buffered reads */
+#define FMODE_BUF_RASYNC ((__force fmode_t)0x40000000)
+
/*
* Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector
* that indicates that they should check the contents of the iovec are
--
2.26.2
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH 07/12] ext4: flag as supporting buffered async reads
2020-05-24 19:21 [PATCHSET v4 0/12] Add support for async buffered reads Jens Axboe
` (5 preceding siblings ...)
2020-05-24 19:22 ` [PATCH 06/12] fs: add FMODE_BUF_RASYNC Jens Axboe
@ 2020-05-24 19:22 ` Jens Axboe
2020-05-24 19:22 ` [PATCH 08/12] block: flag block devices as supporting IOCB_WAITQ Jens Axboe
` (4 subsequent siblings)
11 siblings, 0 replies; 15+ messages in thread
From: Jens Axboe @ 2020-05-24 19:22 UTC (permalink / raw)
To: io-uring; +Cc: linux-fsdevel, linux-kernel, linux-mm, Jens Axboe
Signed-off-by: Jens Axboe <[email protected]>
---
fs/ext4/file.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 0d624250a62b..9f7d9bf427b4 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -826,7 +826,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
return ret;
}
- filp->f_mode |= FMODE_NOWAIT;
+ filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
return dquot_file_open(inode, filp);
}
--
2.26.2
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH 08/12] block: flag block devices as supporting IOCB_WAITQ
2020-05-24 19:21 [PATCHSET v4 0/12] Add support for async buffered reads Jens Axboe
` (6 preceding siblings ...)
2020-05-24 19:22 ` [PATCH 07/12] ext4: flag as supporting buffered async reads Jens Axboe
@ 2020-05-24 19:22 ` Jens Axboe
2020-05-24 19:22 ` [PATCH 09/12] xfs: flag files as supporting buffered async reads Jens Axboe
` (3 subsequent siblings)
11 siblings, 0 replies; 15+ messages in thread
From: Jens Axboe @ 2020-05-24 19:22 UTC (permalink / raw)
To: io-uring; +Cc: linux-fsdevel, linux-kernel, linux-mm, Jens Axboe
Signed-off-by: Jens Axboe <[email protected]>
---
fs/block_dev.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 86e2a7134513..ec8dccc81b65 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1851,7 +1851,7 @@ static int blkdev_open(struct inode * inode, struct file * filp)
*/
filp->f_flags |= O_LARGEFILE;
- filp->f_mode |= FMODE_NOWAIT;
+ filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
if (filp->f_flags & O_NDELAY)
filp->f_mode |= FMODE_NDELAY;
--
2.26.2
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH 09/12] xfs: flag files as supporting buffered async reads
2020-05-24 19:21 [PATCHSET v4 0/12] Add support for async buffered reads Jens Axboe
` (7 preceding siblings ...)
2020-05-24 19:22 ` [PATCH 08/12] block: flag block devices as supporting IOCB_WAITQ Jens Axboe
@ 2020-05-24 19:22 ` Jens Axboe
2020-05-24 19:22 ` [PATCH 10/12] btrfs: " Jens Axboe
` (2 subsequent siblings)
11 siblings, 0 replies; 15+ messages in thread
From: Jens Axboe @ 2020-05-24 19:22 UTC (permalink / raw)
To: io-uring; +Cc: linux-fsdevel, linux-kernel, linux-mm, Jens Axboe
XFS uses generic_file_read_iter(), which already supports this.
Signed-off-by: Jens Axboe <[email protected]>
---
fs/xfs/xfs_file.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 4b8bdecc3863..97f44fbf17f2 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1080,7 +1080,7 @@ xfs_file_open(
return -EFBIG;
if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb)))
return -EIO;
- file->f_mode |= FMODE_NOWAIT;
+ file->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
return 0;
}
--
2.26.2
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH 10/12] btrfs: flag files as supporting buffered async reads
2020-05-24 19:21 [PATCHSET v4 0/12] Add support for async buffered reads Jens Axboe
` (8 preceding siblings ...)
2020-05-24 19:22 ` [PATCH 09/12] xfs: flag files as supporting buffered async reads Jens Axboe
@ 2020-05-24 19:22 ` Jens Axboe
2020-05-24 19:22 ` [PATCH 11/12] mm: add kiocb_wait_page_queue_init() helper Jens Axboe
2020-05-24 19:22 ` [PATCH 12/12] io_uring: support true async buffered reads, if file provides it Jens Axboe
11 siblings, 0 replies; 15+ messages in thread
From: Jens Axboe @ 2020-05-24 19:22 UTC (permalink / raw)
To: io-uring; +Cc: linux-fsdevel, linux-kernel, linux-mm, Jens Axboe
btrfs uses generic_file_read_iter(), which already supports this.
Signed-off-by: Jens Axboe <[email protected]>
---
fs/btrfs/file.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 719e68ab552c..c933b6a1b4a8 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -3480,7 +3480,7 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence)
static int btrfs_file_open(struct inode *inode, struct file *filp)
{
- filp->f_mode |= FMODE_NOWAIT;
+ filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
return generic_file_open(inode, filp);
}
--
2.26.2
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH 11/12] mm: add kiocb_wait_page_queue_init() helper
2020-05-24 19:21 [PATCHSET v4 0/12] Add support for async buffered reads Jens Axboe
` (9 preceding siblings ...)
2020-05-24 19:22 ` [PATCH 10/12] btrfs: " Jens Axboe
@ 2020-05-24 19:22 ` Jens Axboe
2020-05-24 19:22 ` [PATCH 12/12] io_uring: support true async buffered reads, if file provides it Jens Axboe
11 siblings, 0 replies; 15+ messages in thread
From: Jens Axboe @ 2020-05-24 19:22 UTC (permalink / raw)
To: io-uring; +Cc: linux-fsdevel, linux-kernel, linux-mm, Jens Axboe
Checks if the file supports it, and initializes the values that we need.
Caller passes in 'data' pointer, if any, and the callback function to
be used.
Signed-off-by: Jens Axboe <[email protected]>
---
include/linux/pagemap.h | 21 +++++++++++++++++++++
1 file changed, 21 insertions(+)
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index d3e63c9c61ae..8b65420410ee 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -493,6 +493,27 @@ static inline int wake_page_match(struct wait_page_queue *wait_page,
return 1;
}
+static inline int kiocb_wait_page_queue_init(struct kiocb *kiocb,
+ struct wait_page_queue *wait,
+ wait_queue_func_t func,
+ void *data)
+{
+ /* Can't support async wakeup with polled IO */
+ if (kiocb->ki_flags & IOCB_HIPRI)
+ return -EINVAL;
+ if (kiocb->ki_filp->f_mode & FMODE_BUF_RASYNC) {
+ wait->wait.func = func;
+ wait->wait.private = data;
+ wait->wait.flags = 0;
+ INIT_LIST_HEAD(&wait->wait.entry);
+ kiocb->ki_flags |= IOCB_WAITQ;
+ kiocb->ki_waitq = wait;
+ return 0;
+ }
+
+ return -EOPNOTSUPP;
+}
+
extern void __lock_page(struct page *page);
extern int __lock_page_killable(struct page *page);
extern int __lock_page_async(struct page *page, struct wait_page_queue *wait);
--
2.26.2
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH 12/12] io_uring: support true async buffered reads, if file provides it
2020-05-24 19:21 [PATCHSET v4 0/12] Add support for async buffered reads Jens Axboe
` (10 preceding siblings ...)
2020-05-24 19:22 ` [PATCH 11/12] mm: add kiocb_wait_page_queue_init() helper Jens Axboe
@ 2020-05-24 19:22 ` Jens Axboe
11 siblings, 0 replies; 15+ messages in thread
From: Jens Axboe @ 2020-05-24 19:22 UTC (permalink / raw)
To: io-uring; +Cc: linux-fsdevel, linux-kernel, linux-mm, Jens Axboe
If the file is flagged with FMODE_BUF_RASYNC, then we don't have to punt
the buffered read to an io-wq worker. Instead we can rely on page
unlocking callbacks to support retry based async IO. This is a lot more
efficient than doing async thread offload.
The retry is done similarly to how we handle poll based retry. From
the unlock callback, we simply queue the retry to a task_work based
handler.
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io_uring.c | 112 ++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 112 insertions(+)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index e95481c552ff..23073857239c 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -498,6 +498,8 @@ struct io_async_rw {
struct iovec *iov;
ssize_t nr_segs;
ssize_t size;
+ struct wait_page_queue wpq;
+ struct callback_head task_work;
};
struct io_async_ctx {
@@ -2568,6 +2570,112 @@ static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
return 0;
}
+static void io_async_buf_cancel(struct callback_head *cb)
+{
+ struct io_async_rw *rw;
+ struct io_ring_ctx *ctx;
+ struct io_kiocb *req;
+
+ rw = container_of(cb, struct io_async_rw, task_work);
+ req = rw->wpq.wait.private;
+ ctx = req->ctx;
+
+ spin_lock_irq(&ctx->completion_lock);
+ io_cqring_fill_event(req, -ECANCELED);
+ io_commit_cqring(ctx);
+ spin_unlock_irq(&ctx->completion_lock);
+
+ io_cqring_ev_posted(ctx);
+ req_set_fail_links(req);
+ io_double_put_req(req);
+}
+
+static void io_async_buf_retry(struct callback_head *cb)
+{
+ struct io_async_rw *rw;
+ struct io_ring_ctx *ctx;
+ struct io_kiocb *req;
+
+ rw = container_of(cb, struct io_async_rw, task_work);
+ req = rw->wpq.wait.private;
+ ctx = req->ctx;
+
+ __set_current_state(TASK_RUNNING);
+ mutex_lock(&ctx->uring_lock);
+ __io_queue_sqe(req, NULL);
+ mutex_unlock(&ctx->uring_lock);
+}
+
+static int io_async_buf_func(struct wait_queue_entry *wait, unsigned mode,
+ int sync, void *arg)
+{
+ struct wait_page_queue *wpq;
+ struct io_kiocb *req = wait->private;
+ struct io_async_rw *rw = &req->io->rw;
+ struct wait_page_key *key = arg;
+ struct task_struct *tsk;
+ int ret;
+
+ wpq = container_of(wait, struct wait_page_queue, wait);
+
+ ret = wake_page_match(wpq, key);
+ if (ret != 1)
+ return ret;
+
+ list_del_init(&wait->entry);
+
+ init_task_work(&rw->task_work, io_async_buf_retry);
+ /* submit ref gets dropped, acquire a new one */
+ refcount_inc(&req->refs);
+ tsk = req->task;
+ ret = task_work_add(tsk, &rw->task_work, true);
+ if (unlikely(ret)) {
+ /* queue just for cancelation */
+ init_task_work(&rw->task_work, io_async_buf_cancel);
+ tsk = io_wq_get_task(req->ctx->io_wq);
+ task_work_add(tsk, &rw->task_work, true);
+ }
+ wake_up_process(tsk);
+ return 1;
+}
+
+static bool io_rw_should_retry(struct io_kiocb *req)
+{
+ struct kiocb *kiocb = &req->rw.kiocb;
+ int ret;
+
+ /* never retry for NOWAIT, we just complete with -EAGAIN */
+ if (req->flags & REQ_F_NOWAIT)
+ return false;
+
+ /* already tried, or we're doing O_DIRECT */
+ if (kiocb->ki_flags & (IOCB_DIRECT | IOCB_WAITQ))
+ return false;
+ /*
+ * just use poll if we can, and don't attempt if the fs doesn't
+ * support callback based unlocks
+ */
+ if (file_can_poll(req->file) || !(req->file->f_mode & FMODE_BUF_RASYNC))
+ return false;
+
+ /*
+ * If request type doesn't require req->io to defer in general,
+ * we need to allocate it here
+ */
+ if (!req->io && __io_alloc_async_ctx(req))
+ return false;
+
+ ret = kiocb_wait_page_queue_init(kiocb, &req->io->rw.wpq,
+ io_async_buf_func, req);
+ if (!ret) {
+ get_task_struct(current);
+ req->task = current;
+ return true;
+ }
+
+ return false;
+}
+
static int io_read(struct io_kiocb *req, bool force_nonblock)
{
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
@@ -2601,6 +2709,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock)
if (!ret) {
ssize_t ret2;
+retry:
if (req->file->f_op->read_iter)
ret2 = call_read_iter(req->file, kiocb, &iter);
else
@@ -2619,6 +2728,9 @@ static int io_read(struct io_kiocb *req, bool force_nonblock)
if (!(req->flags & REQ_F_NOWAIT) &&
!file_can_poll(req->file))
req->flags |= REQ_F_MUST_PUNT;
+ if (io_rw_should_retry(req))
+ goto retry;
+ kiocb->ki_flags &= ~IOCB_WAITQ;
return -EAGAIN;
}
}
--
2.26.2
^ permalink raw reply related [flat|nested] 15+ messages in thread