* [PATCH mm-unstable v1 1/3] mm/nommu: factor out check for NOMMU shared mappings into is_nommu_shared_mapping()
2023-01-02 16:08 [PATCH mm-unstable v1 0/3] mm/nommu: don't use VM_MAYSHARE for MAP_PRIVATE mappings David Hildenbrand
@ 2023-01-02 16:08 ` David Hildenbrand
2023-01-02 16:08 ` [PATCH mm-unstable v1 2/3] mm/nommu: don't use VM_MAYSHARE for MAP_PRIVATE mappings David Hildenbrand
2023-01-02 16:08 ` [PATCH mm-unstable v1 3/3] drivers/misc/open-dice: don't touch VM_MAYSHARE David Hildenbrand
2 siblings, 0 replies; 4+ messages in thread
From: David Hildenbrand @ 2023-01-02 16:08 UTC (permalink / raw)
To: linux-kernel
Cc: linux-mm, linux-fsdevel, io-uring, David Hildenbrand,
Andrew Morton, Arnd Bergmann, Greg Kroah-Hartman, Nicolas Pitre,
Jens Axboe, Pavel Begunkov
We want to stop using VM_MAYSHARE in private mappings to pave the way
for clarifying the semantics of VM_MAYSHARE vs. VM_SHARED and reduce
the confusion. While CONFIG_MMU uses VM_MAYSHARE to represent MAP_SHARED,
!CONFIG_MMU also sets VM_MAYSHARE for selected R/O private file mappings
that are an effective overlay of a file mapping.
Let's factor out all relevant VM_MAYSHARE checks in !CONFIG_MMU code into
is_nommu_shared_mapping() first.
Note that whenever VM_SHARED is set, VM_MAYSHARE must be set as well
(unless there is a serious BUG). So there is not need to test for VM_SHARED
manually.
No functional change intended.
Signed-off-by: David Hildenbrand <[email protected]>
---
drivers/char/mem.c | 2 +-
fs/cramfs/inode.c | 2 +-
fs/proc/task_nommu.c | 2 +-
fs/ramfs/file-nommu.c | 2 +-
fs/romfs/mmap-nommu.c | 2 +-
include/linux/mm.h | 15 +++++++++++++++
io_uring/io_uring.c | 2 +-
mm/nommu.c | 11 ++++++-----
8 files changed, 27 insertions(+), 11 deletions(-)
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 83bf2a4dcb57..ffb101d349f0 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -343,7 +343,7 @@ static unsigned zero_mmap_capabilities(struct file *file)
/* can't do an in-place private mapping if there's no MMU */
static inline int private_mapping_ok(struct vm_area_struct *vma)
{
- return vma->vm_flags & VM_MAYSHARE;
+ return is_nommu_shared_mapping(vma->vm_flags);
}
#else
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 61ccf7722fc3..50e4e060db68 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -437,7 +437,7 @@ static int cramfs_physmem_mmap(struct file *file, struct vm_area_struct *vma)
static int cramfs_physmem_mmap(struct file *file, struct vm_area_struct *vma)
{
- return vma->vm_flags & (VM_SHARED | VM_MAYSHARE) ? 0 : -ENOSYS;
+ return is_nommu_shared_mapping(vma->vm_flags) ? 0 : -ENOSYS;
}
static unsigned long cramfs_physmem_get_unmapped_area(struct file *file,
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 2fd06f52b6a4..0ec35072a8e5 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -38,7 +38,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
}
if (atomic_read(&mm->mm_count) > 1 ||
- vma->vm_flags & VM_MAYSHARE) {
+ is_nommu_shared_mapping(vma->vm_flags)) {
sbytes += size;
} else {
bytes += size;
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index cb240eac5036..cd4537692751 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -264,7 +264,7 @@ static unsigned long ramfs_nommu_get_unmapped_area(struct file *file,
*/
static int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma)
{
- if (!(vma->vm_flags & (VM_SHARED | VM_MAYSHARE)))
+ if (!is_nommu_shared_mapping(vma->vm_flags))
return -ENOSYS;
file_accessed(file);
diff --git a/fs/romfs/mmap-nommu.c b/fs/romfs/mmap-nommu.c
index 2c4a23113fb5..4578dc45e50a 100644
--- a/fs/romfs/mmap-nommu.c
+++ b/fs/romfs/mmap-nommu.c
@@ -63,7 +63,7 @@ static unsigned long romfs_get_unmapped_area(struct file *file,
*/
static int romfs_mmap(struct file *file, struct vm_area_struct *vma)
{
- return vma->vm_flags & (VM_SHARED | VM_MAYSHARE) ? 0 : -ENOSYS;
+ return is_nommu_shared_mapping(vma->vm_flags) ? 0 : -ENOSYS;
}
static unsigned romfs_mmap_capabilities(struct file *file)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index c37f9330f14e..28568a92e5df 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1347,6 +1347,21 @@ static inline bool is_cow_mapping(vm_flags_t flags)
return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
}
+#ifndef CONFIG_MMU
+static inline bool is_nommu_shared_mapping(vm_flags_t flags)
+{
+ /*
+ * NOMMU shared mappings are ordinary MAP_SHARED mappings and selected
+ * R/O MAP_PRIVATE file mappings that are an effective R/O overlay of
+ * a file mapping. R/O MAP_PRIVATE mappings might still modify
+ * underlying memory if ptrace is active, so this is only possible if
+ * ptrace does not apply. Note that there is no mprotect() to upgrade
+ * write permissions later.
+ */
+ return flags & VM_MAYSHARE;
+}
+#endif
+
#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
#define SECTION_IN_PAGE_FLAGS
#endif
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index ff2bbac1a10f..ae7d2a9eb63f 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -3194,7 +3194,7 @@ static __cold int io_uring_mmap(struct file *file, struct vm_area_struct *vma)
static int io_uring_mmap(struct file *file, struct vm_area_struct *vma)
{
- return vma->vm_flags & (VM_SHARED | VM_MAYSHARE) ? 0 : -EINVAL;
+ return is_nommu_shared_mapping(vma->vm_flags) ? 0 : -EINVAL;
}
static unsigned int io_uring_nommu_mmap_capabilities(struct file *file)
diff --git a/mm/nommu.c b/mm/nommu.c
index 214c70e1d059..6c4bdc07a776 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -958,9 +958,10 @@ static int do_mmap_private(struct vm_area_struct *vma,
*/
if (capabilities & NOMMU_MAP_DIRECT) {
ret = call_mmap(vma->vm_file, vma);
+ /* shouldn't return success if we're not sharing */
+ if (WARN_ON_ONCE(!is_nommu_shared_mapping(vma->vm_flags)))
+ ret = -ENOSYS;
if (ret == 0) {
- /* shouldn't return success if we're not sharing */
- BUG_ON(!(vma->vm_flags & VM_MAYSHARE));
vma->vm_region->vm_top = vma->vm_region->vm_end;
return 0;
}
@@ -1106,7 +1107,7 @@ unsigned long do_mmap(struct file *file,
* these cases, sharing is handled in the driver or filesystem rather
* than here
*/
- if (vm_flags & VM_MAYSHARE) {
+ if (is_nommu_shared_mapping(vm_flags)) {
struct vm_region *pregion;
unsigned long pglen, rpglen, pgend, rpgend, start;
@@ -1116,7 +1117,7 @@ unsigned long do_mmap(struct file *file,
for (rb = rb_first(&nommu_region_tree); rb; rb = rb_next(rb)) {
pregion = rb_entry(rb, struct vm_region, vm_rb);
- if (!(pregion->vm_flags & VM_MAYSHARE))
+ if (!is_nommu_shared_mapping(pregion->vm_flags))
continue;
/* search for overlapping mappings on the same file */
@@ -1597,7 +1598,7 @@ static unsigned long do_mremap(unsigned long addr,
if (vma->vm_end != vma->vm_start + old_len)
return (unsigned long) -EFAULT;
- if (vma->vm_flags & VM_MAYSHARE)
+ if (is_nommu_shared_mapping(vma->vm_flags))
return (unsigned long) -EPERM;
if (new_len > vma->vm_region->vm_end - vma->vm_region->vm_start)
--
2.39.0
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH mm-unstable v1 2/3] mm/nommu: don't use VM_MAYSHARE for MAP_PRIVATE mappings
2023-01-02 16:08 [PATCH mm-unstable v1 0/3] mm/nommu: don't use VM_MAYSHARE for MAP_PRIVATE mappings David Hildenbrand
2023-01-02 16:08 ` [PATCH mm-unstable v1 1/3] mm/nommu: factor out check for NOMMU shared mappings into is_nommu_shared_mapping() David Hildenbrand
@ 2023-01-02 16:08 ` David Hildenbrand
2023-01-02 16:08 ` [PATCH mm-unstable v1 3/3] drivers/misc/open-dice: don't touch VM_MAYSHARE David Hildenbrand
2 siblings, 0 replies; 4+ messages in thread
From: David Hildenbrand @ 2023-01-02 16:08 UTC (permalink / raw)
To: linux-kernel
Cc: linux-mm, linux-fsdevel, io-uring, David Hildenbrand,
Andrew Morton, Arnd Bergmann, Greg Kroah-Hartman, Nicolas Pitre,
Jens Axboe, Pavel Begunkov
Let's stop using VM_MAYSHARE for MAP_PRIVATE mappings and use VM_MAYOVERLAY
instead. Rewrite determine_vm_flags() to make the whole logic easier to
digest, and to cleanly separate MAP_PRIVATE vs. MAP_SHARED.
No functional change intended.
Signed-off-by: David Hildenbrand <[email protected]>
---
include/linux/mm.h | 7 ++++++-
mm/nommu.c | 51 +++++++++++++++++++++++++++-------------------
2 files changed, 36 insertions(+), 22 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 28568a92e5df..510f2e7cccdb 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -276,7 +276,12 @@ extern unsigned int kobjsize(const void *objp);
#define VM_MAYSHARE 0x00000080
#define VM_GROWSDOWN 0x00000100 /* general info on the segment */
+#ifdef CONFIG_MMU
#define VM_UFFD_MISSING 0x00000200 /* missing pages tracking */
+#else /* CONFIG_MMU */
+#define VM_MAYOVERLAY 0x00000200 /* nommu: R/O MAP_PRIVATE mapping that might overlay a file mapping */
+#define VM_UFFD_MISSING 0
+#endif /* CONFIG_MMU */
#define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */
#define VM_UFFD_WP 0x00001000 /* wrprotect pages tracking */
@@ -1358,7 +1363,7 @@ static inline bool is_nommu_shared_mapping(vm_flags_t flags)
* ptrace does not apply. Note that there is no mprotect() to upgrade
* write permissions later.
*/
- return flags & VM_MAYSHARE;
+ return flags & (VM_MAYSHARE | VM_MAYOVERLAY);
}
#endif
diff --git a/mm/nommu.c b/mm/nommu.c
index 6c4bdc07a776..5c628c868648 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -892,29 +892,36 @@ static unsigned long determine_vm_flags(struct file *file,
unsigned long vm_flags;
vm_flags = calc_vm_prot_bits(prot, 0) | calc_vm_flag_bits(flags);
- /* vm_flags |= mm->def_flags; */
- if (!(capabilities & NOMMU_MAP_DIRECT)) {
- /* attempt to share read-only copies of mapped file chunks */
+ if (!file) {
+ /*
+ * MAP_ANONYMOUS. MAP_SHARED is mapped to MAP_PRIVATE, because
+ * there is no fork().
+ */
vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
- if (file && !(prot & PROT_WRITE))
- vm_flags |= VM_MAYSHARE;
+ } else if (flags & MAP_PRIVATE) {
+ /* MAP_PRIVATE file mapping */
+ if (capabilities & NOMMU_MAP_DIRECT)
+ vm_flags |= (capabilities & NOMMU_VMFLAGS);
+ else
+ vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
+
+ if (!(prot & PROT_WRITE) && !current->ptrace)
+ /*
+ * R/O private file mapping which cannot be used to
+ * modify memory, especially also not via active ptrace
+ * (e.g., set breakpoints) or later by upgrading
+ * permissions (no mprotect()). We can try overlaying
+ * the file mapping, which will work e.g., on chardevs,
+ * ramfs/tmpfs/shmfs and romfs/cramf.
+ */
+ vm_flags |= VM_MAYOVERLAY;
} else {
- /* overlay a shareable mapping on the backing device or inode
- * if possible - used for chardevs, ramfs/tmpfs/shmfs and
- * romfs/cramfs */
- vm_flags |= VM_MAYSHARE | (capabilities & NOMMU_VMFLAGS);
- if (flags & MAP_SHARED)
- vm_flags |= VM_SHARED;
+ /* MAP_SHARED file mapping: NOMMU_MAP_DIRECT is set. */
+ vm_flags |= VM_SHARED | VM_MAYSHARE |
+ (capabilities & NOMMU_VMFLAGS);
}
- /* refuse to let anyone share private mappings with this process if
- * it's being traced - otherwise breakpoints set in it may interfere
- * with another untraced process
- */
- if ((flags & MAP_PRIVATE) && current->ptrace)
- vm_flags &= ~VM_MAYSHARE;
-
return vm_flags;
}
@@ -952,9 +959,11 @@ static int do_mmap_private(struct vm_area_struct *vma,
void *base;
int ret, order;
- /* invoke the file's mapping function so that it can keep track of
- * shared mappings on devices or memory
- * - VM_MAYSHARE will be set if it may attempt to share
+ /*
+ * Invoke the file's mapping function so that it can keep track of
+ * shared mappings on devices or memory. VM_MAYOVERLAY will be set if
+ * it may attempt to share, which will make is_nommu_shared_mapping()
+ * happy.
*/
if (capabilities & NOMMU_MAP_DIRECT) {
ret = call_mmap(vma->vm_file, vma);
--
2.39.0
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH mm-unstable v1 3/3] drivers/misc/open-dice: don't touch VM_MAYSHARE
2023-01-02 16:08 [PATCH mm-unstable v1 0/3] mm/nommu: don't use VM_MAYSHARE for MAP_PRIVATE mappings David Hildenbrand
2023-01-02 16:08 ` [PATCH mm-unstable v1 1/3] mm/nommu: factor out check for NOMMU shared mappings into is_nommu_shared_mapping() David Hildenbrand
2023-01-02 16:08 ` [PATCH mm-unstable v1 2/3] mm/nommu: don't use VM_MAYSHARE for MAP_PRIVATE mappings David Hildenbrand
@ 2023-01-02 16:08 ` David Hildenbrand
2 siblings, 0 replies; 4+ messages in thread
From: David Hildenbrand @ 2023-01-02 16:08 UTC (permalink / raw)
To: linux-kernel
Cc: linux-mm, linux-fsdevel, io-uring, David Hildenbrand,
Andrew Morton, Arnd Bergmann, Greg Kroah-Hartman, Nicolas Pitre,
Jens Axboe, Pavel Begunkov
A MAP_SHARED mapping always has VM_MAYSHARE set, and writable
(VM_MAYWRITE) MAP_SHARED mappings have VM_SHARED set as well. To
identify a MAP_SHARED mapping, it's sufficient to look at VM_MAYSHARE.
We cannot have VM_MAYSHARE|VM_WRITE mappings without having VM_SHARED
set. Consequently, current code will never actually end up clearing
VM_MAYSHARE and that code is confusing, because nobody is supposed to
mess with VM_MAYWRITE.
Let's clean it up and restructure the code. No functional change intended.
Cc: Arnd Bergmann <[email protected]>
Cc: Greg Kroah-Hartman <[email protected]>
Signed-off-by: David Hildenbrand <[email protected]>
---
drivers/misc/open-dice.c | 14 ++++++--------
1 file changed, 6 insertions(+), 8 deletions(-)
diff --git a/drivers/misc/open-dice.c b/drivers/misc/open-dice.c
index c61be3404c6f..9dda47b3fd70 100644
--- a/drivers/misc/open-dice.c
+++ b/drivers/misc/open-dice.c
@@ -90,15 +90,13 @@ static int open_dice_mmap(struct file *filp, struct vm_area_struct *vma)
{
struct open_dice_drvdata *drvdata = to_open_dice_drvdata(filp);
- /* Do not allow userspace to modify the underlying data. */
- if ((vma->vm_flags & VM_WRITE) && (vma->vm_flags & VM_SHARED))
- return -EPERM;
-
- /* Ensure userspace cannot acquire VM_WRITE + VM_SHARED later. */
- if (vma->vm_flags & VM_WRITE)
- vma->vm_flags &= ~VM_MAYSHARE;
- else if (vma->vm_flags & VM_SHARED)
+ if (vma->vm_flags & VM_MAYSHARE) {
+ /* Do not allow userspace to modify the underlying data. */
+ if (vma->vm_flags & VM_WRITE)
+ return -EPERM;
+ /* Ensure userspace cannot acquire VM_WRITE later. */
vma->vm_flags &= ~VM_MAYWRITE;
+ }
/* Create write-combine mapping so all clients observe a wipe. */
vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
--
2.39.0
^ permalink raw reply related [flat|nested] 4+ messages in thread