public inbox for [email protected]
 help / color / mirror / Atom feed
* [RFC PATCH] fs/lock: increase the filp's reference for Posix-style locks
@ 2022-11-07  9:52 xiubli
  2022-11-07 10:33 ` Jeff Layton
  0 siblings, 1 reply; 5+ messages in thread
From: xiubli @ 2022-11-07  9:52 UTC (permalink / raw)
  To: viro, jlayton, chuck.lever
  Cc: axboe, asml.silence, linux-kernel, linux-fsdevel, io-uring,
	ceph-devel, mchangir, idryomov, lhenriques, gfarnum, Xiubo Li

From: Xiubo Li <[email protected]>

When closing the file descripters in parallel in multiple threads,
who are sharing the same file descripters, the filp_close() will
remove all the Posix-style locks. But if two threads both calling
the filp_close() it may race and cause use-after-free crash:

 PID: 327771   TASK: ffff952aa1db3180  CPU: 8    COMMAND: "db2fmp"
  #0 [ffff95202f33b960] machine_kexec at ffffffff890662f4
  #1 [ffff95202f33b9c0] __crash_kexec at ffffffff89122b82
  #2 [ffff95202f33ba90] crash_kexec at ffffffff89122c70
  #3 [ffff95202f33baa8] oops_end at ffffffff89791798
  #4 [ffff95202f33bad0] no_context at ffffffff89075d14
  #5 [ffff95202f33bb20] __bad_area_nosemaphore at ffffffff89075fe2
  #6 [ffff95202f33bb70] bad_area_nosemaphore at ffffffff89076104
  #7 [ffff95202f33bb80] __do_page_fault at ffffffff89794750
  #8 [ffff95202f33bbf0] do_page_fault at ffffffff89794975
  #9 [ffff95202f33bc20] page_fault at ffffffff89790778
     [exception RIP: ceph_fl_release_lock+20]
     RIP: ffffffffc08247a4  RSP: ffff95202f33bcd0  RFLAGS: 00010286
     RAX: ffff952d4ebd8a00  RBX: 0000000000000000  RCX: dead000000000200
     RDX: ffff95202f33bd60  RSI: ffff95202f33bd60  RDI: ffff9526b6ac5b00
     RBP: ffff95202f33bce0   R8: ffff9526b6ac5b18   R9: ffffffffc083c368
     R10: 0000000000001109  R11: 0000000000000000  R12: ffff95202f33bd60
     R13: ffff9526b6ac5b00  R14: 0000000000000000  R15: 0000000000000000
     ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
 #10 [ffff95202f33bce8] locks_release_private at ffffffff892ab3d7
 #11 [ffff95202f33bd00] locks_free_lock at ffffffff892ac34d
 #12 [ffff95202f33bd18] locks_dispose_list at ffffffff892ac44b
 #13 [ffff95202f33bd40] __posix_lock_file at ffffffff892acdfa
 #14 [ffff95202f33bda8] posix_lock_file at ffffffff892ad146
 #15 [ffff95202f33bdb8] ceph_lock at ffffffffc0824e8a [ceph]
 #16 [ffff95202f33bdf8] vfs_lock_file at ffffffff892ad185
 #17 [ffff95202f33be08] locks_remove_posix at ffffffff892ad239
 #18 [ffff95202f33bee0] locks_remove_posix at ffffffff892ad2a0
 #19 [ffff95202f33bef0] filp_close at ffffffff8924baa6
 #20 [ffff95202f33bf18] __close_fd at ffffffff8926f89c
 #21 [ffff95202f33bf40] sys_close at ffffffff8924d503
 #22 [ffff95202f33bf50] system_call_fastpath at ffffffff89799f92
     RIP: 00007f806ec446ab  RSP: 00007f80517f0d90  RFLAGS: 00010206
     RAX: 0000000000000003  RBX: 00007f8030001a20  RCX: 00007f80300386b0
     RDX: 00007f806ef0d880  RSI: 0000000000000001  RDI: 0000000000000006
     RBP: 00007f806ef0e3c0   R8: 00007f80517fa700   R9: 0000000000000000
     R10: 0000000000000000  R11: 0000000000000206  R12: 0000000000000000
     R13: 00007f80300035b0  R14: 00007f80517f1104  R15: 000000000000006c
     ORIG_RAX: 0000000000000003  CS: 0033  SS: 002b

We need to make sure that the filp in the file_lock shouldn't be
release when any file_lock is still referring to it.

For the Posix-style locks, whose owner will be the thread ids, we
will increase the filp's reference.

URL: https://tracker.ceph.com/issues/57986
Signed-off-by: Xiubo Li <[email protected]>
---
 drivers/android/binder.c |  2 +-
 fs/file.c                | 15 ++++++++++-----
 fs/locks.c               | 18 +++++++++++++++---
 include/linux/fs.h       | 14 ++++++++++++++
 io_uring/openclose.c     |  3 ++-
 5 files changed, 42 insertions(+), 10 deletions(-)

diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 880224ec6abb..03692564d940 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -1924,7 +1924,7 @@ static void binder_deferred_fd_close(int fd)
 	if (twcb->file) {
 		// pin it until binder_do_fd_close(); see comments there
 		get_file(twcb->file);
-		filp_close(twcb->file, current->files);
+		filp_close(twcb->file, file_lock_make_thread_owner(current->files));
 		task_work_add(current, &twcb->twork, TWA_RESUME);
 	} else {
 		kfree(twcb);
diff --git a/fs/file.c b/fs/file.c
index 5f9c802a5d8d..39ad8e74a8d9 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -417,6 +417,7 @@ static struct fdtable *close_files(struct files_struct * files)
 	 * files structure.
 	 */
 	struct fdtable *fdt = rcu_dereference_raw(files->fdt);
+	fl_owner_t owner = file_lock_make_thread_owner(files);
 	unsigned int i, j = 0;
 
 	for (;;) {
@@ -429,7 +430,7 @@ static struct fdtable *close_files(struct files_struct * files)
 			if (set & 1) {
 				struct file * file = xchg(&fdt->fd[i], NULL);
 				if (file) {
-					filp_close(file, files);
+					filp_close(file, owner);
 					cond_resched();
 				}
 			}
@@ -653,6 +654,7 @@ static struct file *pick_file(struct files_struct *files, unsigned fd)
 int close_fd(unsigned fd)
 {
 	struct files_struct *files = current->files;
+	fl_owner_t owner = file_lock_make_thread_owner(files);
 	struct file *file;
 
 	spin_lock(&files->file_lock);
@@ -661,7 +663,7 @@ int close_fd(unsigned fd)
 	if (!file)
 		return -EBADF;
 
-	return filp_close(file, files);
+	return filp_close(file, owner);
 }
 EXPORT_SYMBOL(close_fd); /* for ksys_close() */
 
@@ -695,6 +697,7 @@ static inline void __range_cloexec(struct files_struct *cur_fds,
 static inline void __range_close(struct files_struct *cur_fds, unsigned int fd,
 				 unsigned int max_fd)
 {
+	fl_owner_t owner = file_lock_make_thread_owner(cur_fds);
 	unsigned n;
 
 	rcu_read_lock();
@@ -711,7 +714,7 @@ static inline void __range_close(struct files_struct *cur_fds, unsigned int fd,
 
 		if (file) {
 			/* found a valid file to close */
-			filp_close(file, cur_fds);
+			filp_close(file, owner);
 			cond_resched();
 		}
 	}
@@ -816,6 +819,7 @@ struct file *close_fd_get_file(unsigned int fd)
 
 void do_close_on_exec(struct files_struct *files)
 {
+	fl_owner_t owner = file_lock_make_thread_owner(files);
 	unsigned i;
 	struct fdtable *fdt;
 
@@ -841,7 +845,7 @@ void do_close_on_exec(struct files_struct *files)
 			rcu_assign_pointer(fdt->fd[fd], NULL);
 			__put_unused_fd(files, fd);
 			spin_unlock(&files->file_lock);
-			filp_close(file, files);
+			filp_close(file, owner);
 			cond_resched();
 			spin_lock(&files->file_lock);
 		}
@@ -1080,6 +1084,7 @@ static int do_dup2(struct files_struct *files,
 	struct file *file, unsigned fd, unsigned flags)
 __releases(&files->file_lock)
 {
+	fl_owner_t owner = file_lock_make_thread_owner(files);
 	struct file *tofree;
 	struct fdtable *fdt;
 
@@ -1111,7 +1116,7 @@ __releases(&files->file_lock)
 	spin_unlock(&files->file_lock);
 
 	if (tofree)
-		filp_close(tofree, files);
+		filp_close(tofree, owner);
 
 	return fd;
 
diff --git a/fs/locks.c b/fs/locks.c
index 607f94a0e789..e8b67f87e0ee 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -331,6 +331,8 @@ EXPORT_SYMBOL_GPL(locks_owner_has_blockers);
 /* Free a lock which is not in use. */
 void locks_free_lock(struct file_lock *fl)
 {
+	if (fl->fl_file && file_lock_is_thread_owner(fl->fl_owner))
+		fput(fl->fl_file);
 	locks_release_private(fl);
 	kmem_cache_free(filelock_cache, fl);
 }
@@ -384,7 +386,10 @@ void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
 
 	locks_copy_conflock(new, fl);
 
-	new->fl_file = fl->fl_file;
+	if (file_lock_is_thread_owner(new->fl_owner))
+		new->fl_file = get_file(fl->fl_file);
+	else
+		new->fl_file = fl->fl_file;
 	new->fl_ops = fl->fl_ops;
 
 	if (fl->fl_ops) {
@@ -488,13 +493,14 @@ static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl,
 	} else
 		fl->fl_end = OFFSET_MAX;
 
-	fl->fl_owner = current->files;
+	fl->fl_owner = file_lock_make_thread_owner(current->files);
 	fl->fl_pid = current->tgid;
-	fl->fl_file = filp;
+	fl->fl_file = get_file(filp);
 	fl->fl_flags = FL_POSIX;
 	fl->fl_ops = NULL;
 	fl->fl_lmops = NULL;
 
+
 	return assign_type(fl, l->l_type);
 }
 
@@ -2243,6 +2249,7 @@ int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock *flock)
 
 		fl->fl_flags |= FL_OFDLCK;
 		fl->fl_owner = filp;
+		fput(filp);
 	}
 
 	error = vfs_test_lock(filp, fl);
@@ -2376,6 +2383,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
 		cmd = F_SETLK;
 		file_lock->fl_flags |= FL_OFDLCK;
 		file_lock->fl_owner = filp;
+		fput(filp);
 		break;
 	case F_OFD_SETLKW:
 		error = -EINVAL;
@@ -2385,6 +2393,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
 		cmd = F_SETLKW;
 		file_lock->fl_flags |= FL_OFDLCK;
 		file_lock->fl_owner = filp;
+		fput(filp);
 		fallthrough;
 	case F_SETLKW:
 		file_lock->fl_flags |= FL_SLEEP;
@@ -2450,6 +2459,7 @@ int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 *flock)
 		cmd = F_GETLK64;
 		fl->fl_flags |= FL_OFDLCK;
 		fl->fl_owner = filp;
+		fput(filp);
 	}
 
 	error = vfs_test_lock(filp, fl);
@@ -2499,6 +2509,7 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
 		cmd = F_SETLK64;
 		file_lock->fl_flags |= FL_OFDLCK;
 		file_lock->fl_owner = filp;
+		fput(filp);
 		break;
 	case F_OFD_SETLKW:
 		error = -EINVAL;
@@ -2508,6 +2519,7 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
 		cmd = F_SETLKW64;
 		file_lock->fl_flags |= FL_OFDLCK;
 		file_lock->fl_owner = filp;
+		fput(filp);
 		fallthrough;
 	case F_SETLKW64:
 		file_lock->fl_flags |= FL_SLEEP;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e654435f1651..d7d81962a863 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1028,6 +1028,20 @@ static inline struct file *get_file(struct file *f)
 /* legacy typedef, should eventually be removed */
 typedef void *fl_owner_t;
 
+/*
+ * Set the last significant bit to 1 to mark that
+ * we have get a reference of the fl->fl_file.
+ */
+static inline fl_owner_t file_lock_make_thread_owner(fl_owner_t owner)
+{
+	return (fl_owner_t)((unsigned long)owner | 1UL);
+}
+
+static inline bool file_lock_is_thread_owner(fl_owner_t owner)
+{
+	return ((unsigned long)owner & 1UL);
+}
+
 struct file_lock;
 
 struct file_lock_operations {
diff --git a/io_uring/openclose.c b/io_uring/openclose.c
index 67178e4bb282..5a12cdf7f8d0 100644
--- a/io_uring/openclose.c
+++ b/io_uring/openclose.c
@@ -212,6 +212,7 @@ int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 int io_close(struct io_kiocb *req, unsigned int issue_flags)
 {
 	struct files_struct *files = current->files;
+	fl_owner_t owner = file_lock_make_thread_owner(files);
 	struct io_close *close = io_kiocb_to_cmd(req, struct io_close);
 	struct fdtable *fdt;
 	struct file *file;
@@ -247,7 +248,7 @@ int io_close(struct io_kiocb *req, unsigned int issue_flags)
 		goto err;
 
 	/* No ->flush() or already async, safely close from here */
-	ret = filp_close(file, current->files);
+	ret = filp_close(file, owner);
 err:
 	if (ret < 0)
 		req_set_fail(req);
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2022-11-07 12:45 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-11-07  9:52 [RFC PATCH] fs/lock: increase the filp's reference for Posix-style locks xiubli
2022-11-07 10:33 ` Jeff Layton
2022-11-07 12:03   ` Xiubo Li
2022-11-07 12:29     ` Jeff Layton
2022-11-07 12:44       ` Xiubo Li

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox