* [PATCH v3] mm/ksm: introduce ksm_force for each process @ 2022-05-07 5:47 cgel.zte 2022-05-07 17:59 ` Andrew Morton 0 siblings, 1 reply; 13+ messages in thread From: cgel.zte @ 2022-05-07 5:47 UTC (permalink / raw) To: akpm Cc: keescook, linux-kernel, linux-fsdevel, linux-mm, xu xin, Yang Yang, Ran Xiaokai, wangyong, Yunkai Zhang From: xu xin <[email protected]> To use KSM, we must explicitly call madvise() in application code, which means installed apps on OS needs to be uninstall and source code needs to be modified. It is inconvenient. In order to change this situation, We add a new proc 'ksm_force' under /proc/<pid>/ to support turning on/off KSM scanning of a process's mm dynamically. If ksm_force is set as 1, force all anonymous and 'qualified' vma of this mm to be involved in KSM scanning without explicitly calling madvise to make vma MADV_MERGEABLE. But It is effctive only when the klob of '/sys/kernel/mm/ksm/run' is set as 1. If ksm_enale is set as 0, cancel the feature of ksm_force of this process and unmerge those merged pages which is not madvised as MERGEABLE of this process, but leave MERGEABLE areas merged. Signed-off-by: xu xin <[email protected]> Reviewed-by: Yang Yang <[email protected]> Reviewed-by: Ran Xiaokai <[email protected]> Reviewed-by: wangyong <[email protected]> Reviewed-by: Yunkai Zhang <[email protected]> --- v3: - fix compile error of mm/ksm.c v2: - fix a spelling error in commit log. - remove a redundant condition check in ksm_force_write(). --- fs/proc/base.c | 99 ++++++++++++++++++++++++++++++++++++++++ include/linux/mm_types.h | 9 ++++ mm/ksm.c | 32 ++++++++++++- 3 files changed, 138 insertions(+), 2 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index 8dfa36a99c74..3115ffa4c9fb 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -96,6 +96,7 @@ #include <linux/time_namespace.h> #include <linux/resctrl.h> #include <linux/cn_proc.h> +#include <linux/ksm.h> #include <trace/events/oom.h> #include "internal.h" #include "fd.h" @@ -3168,6 +3169,102 @@ static int proc_pid_ksm_merging_pages(struct seq_file *m, struct pid_namespace * return 0; } + +static ssize_t ksm_force_read(struct file *file, char __user *buf, size_t count, + loff_t *ppos) +{ + struct task_struct *task; + struct mm_struct *mm; + char buffer[PROC_NUMBUF]; + ssize_t len; + int ret; + + task = get_proc_task(file_inode(file)); + if (!task) + return -ESRCH; + + mm = get_task_mm(task); + ret = 0; + if (mm) { + len = snprintf(buffer, sizeof(buffer), "%d\n", mm->ksm_force); + ret = simple_read_from_buffer(buf, count, ppos, buffer, len); + mmput(mm); + } + + return ret; +} + +static ssize_t ksm_force_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct task_struct *task; + struct mm_struct *mm; + char buffer[PROC_NUMBUF]; + int force; + int err = 0; + + memset(buffer, 0, sizeof(buffer)); + if (count > sizeof(buffer) - 1) + count = sizeof(buffer) - 1; + if (copy_from_user(buffer, buf, count)) { + err = -EFAULT; + goto out_return; + } + + err = kstrtoint(strstrip(buffer), 0, &force); + + if (err) + goto out_return; + if (force != 0 && force != 1) { + err = -EINVAL; + goto out_return; + } + + task = get_proc_task(file_inode(file)); + if (!task) { + err = -ESRCH; + goto out_return; + } + + mm = get_task_mm(task); + if (!mm) + goto out_put_task; + + if (mm->ksm_force != force) { + if (mmap_write_lock_killable(mm)) { + err = -EINTR; + goto out_mmput; + } + + if (force == 0) + mm->ksm_force = force; + else { + /* + * Force anonymous pages of this mm to be involved in KSM merging + * without explicitly calling madvise. + */ + if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) + err = __ksm_enter(mm); + if (!err) + mm->ksm_force = force; + } + + mmap_write_unlock(mm); + } + +out_mmput: + mmput(mm); +out_put_task: + put_task_struct(task); +out_return: + return err < 0 ? err : count; +} + +static const struct file_operations proc_pid_ksm_force_operations = { + .read = ksm_force_read, + .write = ksm_force_write, + .llseek = generic_file_llseek, +}; #endif /* CONFIG_KSM */ #ifdef CONFIG_STACKLEAK_METRICS @@ -3303,6 +3400,7 @@ static const struct pid_entry tgid_base_stuff[] = { #endif #ifdef CONFIG_KSM ONE("ksm_merging_pages", S_IRUSR, proc_pid_ksm_merging_pages), + REG("ksm_force", S_IRUSR|S_IWUSR, proc_pid_ksm_force_operations), #endif }; @@ -3639,6 +3737,7 @@ static const struct pid_entry tid_base_stuff[] = { #endif #ifdef CONFIG_KSM ONE("ksm_merging_pages", S_IRUSR, proc_pid_ksm_merging_pages), + REG("ksm_force", S_IRUSR|S_IWUSR, proc_pid_ksm_force_operations), #endif }; diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index b34ff2cdbc4f..1b1592c2f5cf 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -661,6 +661,15 @@ struct mm_struct { * merging. */ unsigned long ksm_merging_pages; + /* + * If true, force anonymous pages of this mm to be involved in KSM + * merging without explicitly calling madvise. It is effctive only + * when the klob of '/sys/kernel/mm/ksm/run' is set as 1. If false, + * cancel the feature of ksm_force of this process and unmerge + * those merged pages which is not madvised as MERGEABLE of this + * process, but leave MERGEABLE areas merged. + */ + bool ksm_force; #endif } __randomize_layout; diff --git a/mm/ksm.c b/mm/ksm.c index 38360285497a..c9f672dcc72e 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -334,6 +334,34 @@ static void __init ksm_slab_free(void) mm_slot_cache = NULL; } +/* Check if vma is qualified for ksmd scanning */ +static bool ksm_vma_check(struct vm_area_struct *vma) +{ + unsigned long vm_flags = vma->vm_flags; + + if (!(vma->vm_flags & VM_MERGEABLE) && !(vma->vm_mm->ksm_force)) + return false; + + if (vm_flags & (VM_SHARED | VM_MAYSHARE | + VM_PFNMAP | VM_IO | VM_DONTEXPAND | + VM_HUGETLB | VM_MIXEDMAP)) + return false; /* just ignore this vma*/ + + if (vma_is_dax(vma)) + return false; + +#ifdef VM_SAO + if (vm_flags & VM_SAO) + return false; +#endif +#ifdef VM_SPARC_ADI + if (vm_flags & VM_SPARC_ADI) + return false; +#endif + + return true; +} + static __always_inline bool is_stable_node_chain(struct stable_node *chain) { return chain->rmap_hlist_len == STABLE_NODE_CHAIN; @@ -523,7 +551,7 @@ static struct vm_area_struct *find_mergeable_vma(struct mm_struct *mm, if (ksm_test_exit(mm)) return NULL; vma = vma_lookup(mm, addr); - if (!vma || !(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma) + if (!vma || !ksm_vma_check(vma) || !vma->anon_vma) return NULL; return vma; } @@ -2297,7 +2325,7 @@ static struct rmap_item *scan_get_next_rmap_item(struct page **page) vma = find_vma(mm, ksm_scan.address); for (; vma; vma = vma->vm_next) { - if (!(vma->vm_flags & VM_MERGEABLE)) + if (!ksm_vma_check(vma)) continue; if (ksm_scan.address < vma->vm_start) ksm_scan.address = vma->vm_start; -- 2.25.1 ^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH v3] mm/ksm: introduce ksm_force for each process 2022-05-07 5:47 [PATCH v3] mm/ksm: introduce ksm_force for each process cgel.zte @ 2022-05-07 17:59 ` Andrew Morton 2022-05-08 9:14 ` [PATCH v4] " cgel.zte 2022-05-08 9:27 ` [PATCH v5] " cgel.zte 0 siblings, 2 replies; 13+ messages in thread From: Andrew Morton @ 2022-05-07 17:59 UTC (permalink / raw) To: cgel.zte Cc: keescook, linux-kernel, linux-fsdevel, linux-mm, xu xin, Yang Yang, Ran Xiaokai, wangyong, Yunkai Zhang On Sat, 7 May 2022 05:47:02 +0000 [email protected] wrote: > To use KSM, we must explicitly call madvise() in application code, > which means installed apps on OS needs to be uninstall and source > code needs to be modified. It is inconvenient. > > In order to change this situation, We add a new proc 'ksm_force' > under /proc/<pid>/ to support turning on/off KSM scanning of a > process's mm dynamically. > > If ksm_force is set as 1, force all anonymous and 'qualified' vma > of this mm to be involved in KSM scanning without explicitly > calling madvise to make vma MADV_MERGEABLE. But It is effctive only > when the klob of '/sys/kernel/mm/ksm/run' is set as 1. > > If ksm_enale is set as 0, cancel the feature of ksm_force of this > process and unmerge those merged pages which is not madvised as > MERGEABLE of this process, but leave MERGEABLE areas merged. > There are quite a lot of typos here. Please proof-read it. > fs/proc/base.c | 99 ++++++++++++++++++++++++++++++++++++++++ > include/linux/mm_types.h | 9 ++++ > mm/ksm.c | 32 ++++++++++++- And please update the appropriate places under Documentation/ - all user-facing interfaces should be well documented. ^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH v4] mm/ksm: introduce ksm_force for each process 2022-05-07 17:59 ` Andrew Morton @ 2022-05-08 9:14 ` cgel.zte 2022-05-08 9:27 ` [PATCH v5] " cgel.zte 1 sibling, 0 replies; 13+ messages in thread From: cgel.zte @ 2022-05-08 9:14 UTC (permalink / raw) To: akpm Cc: cgel.zte, keescook, linux-fsdevel, linux-kernel, linux-mm, ran.xiaokai, wang.yong12, xu.xin16, yang.yang29, zhang.yunkai From: xu xin <[email protected]> To use KSM, we have to explicitly call madvise() in application code, which means installed apps on OS needs to be uninstall and source code needs to be modified. It is inconvenient. In order to change this situation, We add a new proc file ksm_force under /proc/<pid>/ to support turning on/off KSM scanning of a process's mm dynamically. If ksm_force is set to 1, force all anonymous and 'qualified' VMAs of this mm to be involved in KSM scanning without explicitly calling madvise to mark VMA as MADV_MERGEABLE. But It is effective only when the klob of /sys/kernel/mm/ksm/run is set as 1. If ksm_force is set to 0, cancel the feature of ksm_force of this process and unmerge those merged pages belonging to VMAs which is not madvised as MADV_MERGEABLE of this process, but leave MADV_MERGEABLE areas merged. Signed-off-by: xu xin <[email protected]> Reviewed-by: Yang Yang <[email protected]> Reviewed-by: Ran Xiaokai <[email protected]> Reviewed-by: wangyong <[email protected]> Reviewed-by: Yunkai Zhang <[email protected]> --- v4: - fix typos in commit log - add interface descriptions under Documentation/ v3: - fix compile error of mm/ksm.c v2: - fix a spelling error in commit log. - remove a redundant condition check in ksm_force_write(). --- Documentation/admin-guide/mm/ksm.rst | 20 +++++- Documentation/filesystems/proc.rst | 17 +++++ fs/proc/base.c | 99 ++++++++++++++++++++++++++++ include/linux/mm_types.h | 9 +++ mm/ksm.c | 32 ++++++++- 5 files changed, 174 insertions(+), 3 deletions(-) diff --git a/Documentation/admin-guide/mm/ksm.rst b/Documentation/admin-guide/mm/ksm.rst index b244f0202a03..e42cffa42463 100644 --- a/Documentation/admin-guide/mm/ksm.rst +++ b/Documentation/admin-guide/mm/ksm.rst @@ -32,7 +32,7 @@ are swapped back in: ksmd must rediscover their identity and merge again). Controlling KSM with madvise ============================ -KSM only operates on those areas of address space which an application +KSM can operates on those areas of address space which an application has advised to be likely candidates for merging, by using the madvise(2) system call:: @@ -70,6 +70,24 @@ Applications should be considerate in their use of MADV_MERGEABLE, restricting its use to areas likely to benefit. KSM's scans may use a lot of processing power: some installations will disable KSM for that reason. +Controlling KSM with procfs +=========================== + +KSM can also operate on anonymous areas of address space of those processes's +knob ``/proc/<pid>/ksm_force`` is on, even if app codes doesn't call madvise() +explicitly to advise specific areas as MADV_MERGEABLE. + +You can set ksm_force to 1 to force all anonymous and qualified VMAs of +this process to be involved in KSM scanning. But It is effective only when the +klob of ``/sys/kernel/mm/ksm/run`` is set as 1. + e.g. ``echo 1 > /proc/<pid>/ksm_force`` + +You can also set ksm_force to 0 to cancel that force feature of this process +and unmerge those merged pages which belongs to those VMAs not marked as +MADV_MERGEABLE of this process. But that will leave those pages belonging to +VMAs marked as MADV_MERGEABLE merged. + e.g. ``echo 0 > /proc/<pid>/ksm_force`` + .. _ksm_sysfs: KSM daemon sysfs interface diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst index 061744c436d9..414977885e6e 100644 --- a/Documentation/filesystems/proc.rst +++ b/Documentation/filesystems/proc.rst @@ -47,6 +47,7 @@ fixes/update part 1.1 Stefani Seibold <[email protected]> June 9 2009 3.10 /proc/<pid>/timerslack_ns - Task timerslack value 3.11 /proc/<pid>/patch_state - Livepatch patch operation state 3.12 /proc/<pid>/arch_status - Task architecture specific information + 3.13 /proc/<pid>/ksm_force - Setting of mandatory involvement in KSM 4 Configuring procfs 4.1 Mount options @@ -2176,6 +2177,22 @@ AVX512_elapsed_ms the task is unlikely an AVX512 user, but depends on the workload and the scheduling scenario, it also could be a false negative mentioned above. +3.11 /proc/<pid>/ksm_force - Setting of mandatory involvement in KSM +----------------------------------------------------------------------- +When CONFIG_KSM is enabled, this file can be used to specify if this +process's anonymous memory can be involved in KSM scanning without app codes +explicitly calling madvise to mark memory address as MADV_MERGEABLE. + +If writing 1 to this file, the kernel will force all anonymous and qualified +memory to be involved in KSM scanning without explicitly calling madvise to +mark memory address as MADV_MERGEABLE. But that is effective only when the +klob of '/sys/kernel/mm/ksm/run' is set as 1. + +If writing 0 to this file, the mandatory KSM feature of this process's will +be cancelled and unmerge those merged pages which belongs to those areas not +marked as MADV_MERGEABLE of this process, but leave those pages belonging to +areas marked as MADV_MERGEABLE merged. + Chapter 4: Configuring procfs ============================= diff --git a/fs/proc/base.c b/fs/proc/base.c index 8dfa36a99c74..3115ffa4c9fb 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -96,6 +96,7 @@ #include <linux/time_namespace.h> #include <linux/resctrl.h> #include <linux/cn_proc.h> +#include <linux/ksm.h> #include <trace/events/oom.h> #include "internal.h" #include "fd.h" @@ -3168,6 +3169,102 @@ static int proc_pid_ksm_merging_pages(struct seq_file *m, struct pid_namespace * return 0; } + +static ssize_t ksm_force_read(struct file *file, char __user *buf, size_t count, + loff_t *ppos) +{ + struct task_struct *task; + struct mm_struct *mm; + char buffer[PROC_NUMBUF]; + ssize_t len; + int ret; + + task = get_proc_task(file_inode(file)); + if (!task) + return -ESRCH; + + mm = get_task_mm(task); + ret = 0; + if (mm) { + len = snprintf(buffer, sizeof(buffer), "%d\n", mm->ksm_force); + ret = simple_read_from_buffer(buf, count, ppos, buffer, len); + mmput(mm); + } + + return ret; +} + +static ssize_t ksm_force_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct task_struct *task; + struct mm_struct *mm; + char buffer[PROC_NUMBUF]; + int force; + int err = 0; + + memset(buffer, 0, sizeof(buffer)); + if (count > sizeof(buffer) - 1) + count = sizeof(buffer) - 1; + if (copy_from_user(buffer, buf, count)) { + err = -EFAULT; + goto out_return; + } + + err = kstrtoint(strstrip(buffer), 0, &force); + + if (err) + goto out_return; + if (force != 0 && force != 1) { + err = -EINVAL; + goto out_return; + } + + task = get_proc_task(file_inode(file)); + if (!task) { + err = -ESRCH; + goto out_return; + } + + mm = get_task_mm(task); + if (!mm) + goto out_put_task; + + if (mm->ksm_force != force) { + if (mmap_write_lock_killable(mm)) { + err = -EINTR; + goto out_mmput; + } + + if (force == 0) + mm->ksm_force = force; + else { + /* + * Force anonymous pages of this mm to be involved in KSM merging + * without explicitly calling madvise. + */ + if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) + err = __ksm_enter(mm); + if (!err) + mm->ksm_force = force; + } + + mmap_write_unlock(mm); + } + +out_mmput: + mmput(mm); +out_put_task: + put_task_struct(task); +out_return: + return err < 0 ? err : count; +} + +static const struct file_operations proc_pid_ksm_force_operations = { + .read = ksm_force_read, + .write = ksm_force_write, + .llseek = generic_file_llseek, +}; #endif /* CONFIG_KSM */ #ifdef CONFIG_STACKLEAK_METRICS @@ -3303,6 +3400,7 @@ static const struct pid_entry tgid_base_stuff[] = { #endif #ifdef CONFIG_KSM ONE("ksm_merging_pages", S_IRUSR, proc_pid_ksm_merging_pages), + REG("ksm_force", S_IRUSR|S_IWUSR, proc_pid_ksm_force_operations), #endif }; @@ -3639,6 +3737,7 @@ static const struct pid_entry tid_base_stuff[] = { #endif #ifdef CONFIG_KSM ONE("ksm_merging_pages", S_IRUSR, proc_pid_ksm_merging_pages), + REG("ksm_force", S_IRUSR|S_IWUSR, proc_pid_ksm_force_operations), #endif }; diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index b34ff2cdbc4f..1b1592c2f5cf 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -661,6 +661,15 @@ struct mm_struct { * merging. */ unsigned long ksm_merging_pages; + /* + * If true, force anonymous pages of this mm to be involved in KSM + * merging without explicitly calling madvise. It is effctive only + * when the klob of '/sys/kernel/mm/ksm/run' is set as 1. If false, + * cancel the feature of ksm_force of this process and unmerge + * those merged pages which is not madvised as MERGEABLE of this + * process, but leave MERGEABLE areas merged. + */ + bool ksm_force; #endif } __randomize_layout; diff --git a/mm/ksm.c b/mm/ksm.c index 38360285497a..c9f672dcc72e 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -334,6 +334,34 @@ static void __init ksm_slab_free(void) mm_slot_cache = NULL; } +/* Check if vma is qualified for ksmd scanning */ +static bool ksm_vma_check(struct vm_area_struct *vma) +{ + unsigned long vm_flags = vma->vm_flags; + + if (!(vma->vm_flags & VM_MERGEABLE) && !(vma->vm_mm->ksm_force)) + return false; + + if (vm_flags & (VM_SHARED | VM_MAYSHARE | + VM_PFNMAP | VM_IO | VM_DONTEXPAND | + VM_HUGETLB | VM_MIXEDMAP)) + return false; /* just ignore this vma*/ + + if (vma_is_dax(vma)) + return false; + +#ifdef VM_SAO + if (vm_flags & VM_SAO) + return false; +#endif +#ifdef VM_SPARC_ADI + if (vm_flags & VM_SPARC_ADI) + return false; +#endif + + return true; +} + static __always_inline bool is_stable_node_chain(struct stable_node *chain) { return chain->rmap_hlist_len == STABLE_NODE_CHAIN; @@ -523,7 +551,7 @@ static struct vm_area_struct *find_mergeable_vma(struct mm_struct *mm, if (ksm_test_exit(mm)) return NULL; vma = vma_lookup(mm, addr); - if (!vma || !(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma) + if (!vma || !ksm_vma_check(vma) || !vma->anon_vma) return NULL; return vma; } @@ -2297,7 +2325,7 @@ static struct rmap_item *scan_get_next_rmap_item(struct page **page) vma = find_vma(mm, ksm_scan.address); for (; vma; vma = vma->vm_next) { - if (!(vma->vm_flags & VM_MERGEABLE)) + if (!ksm_vma_check(vma)) continue; if (ksm_scan.address < vma->vm_start) ksm_scan.address = vma->vm_start; -- 2.25.1 ^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH v5] mm/ksm: introduce ksm_force for each process 2022-05-07 17:59 ` Andrew Morton 2022-05-08 9:14 ` [PATCH v4] " cgel.zte @ 2022-05-08 9:27 ` cgel.zte 2022-05-08 18:03 ` Matthew Wilcox 2022-05-10 20:10 ` Ammar Faizi 1 sibling, 2 replies; 13+ messages in thread From: cgel.zte @ 2022-05-08 9:27 UTC (permalink / raw) To: akpm Cc: cgel.zte, keescook, linux-fsdevel, linux-kernel, linux-mm, ran.xiaokai, wang.yong12, xu.xin16, yang.yang29, zhang.yunkai From: xu xin <[email protected]> To use KSM, we have to explicitly call madvise() in application code, which means installed apps on OS needs to be uninstall and source code needs to be modified. It is inconvenient. In order to change this situation, We add a new proc file ksm_force under /proc/<pid>/ to support turning on/off KSM scanning of a process's mm dynamically. If ksm_force is set to 1, force all anonymous and 'qualified' VMAs of this mm to be involved in KSM scanning without explicitly calling madvise to mark VMA as MADV_MERGEABLE. But It is effective only when the klob of /sys/kernel/mm/ksm/run is set as 1. If ksm_force is set to 0, cancel the feature of ksm_force of this process and unmerge those merged pages belonging to VMAs which is not madvised as MADV_MERGEABLE of this process, but leave MADV_MERGEABLE areas merged. Signed-off-by: xu xin <[email protected]> Reviewed-by: Yang Yang <[email protected]> Reviewed-by: Ran Xiaokai <[email protected]> Reviewed-by: wangyong <[email protected]> Reviewed-by: Yunkai Zhang <[email protected]> --- v5: - fix typos in Documentation/filesystems/proc.rst v4: - fix typos in commit log - add interface descriptions under Documentation/ v3: - fix compile error of mm/ksm.c v2: - fix a spelling error in commit log. - remove a redundant condition check in ksm_force_write(). --- Documentation/admin-guide/mm/ksm.rst | 20 +++++- Documentation/filesystems/proc.rst | 17 +++++ fs/proc/base.c | 99 ++++++++++++++++++++++++++++ include/linux/mm_types.h | 9 +++ mm/ksm.c | 32 ++++++++- 5 files changed, 174 insertions(+), 3 deletions(-) diff --git a/Documentation/admin-guide/mm/ksm.rst b/Documentation/admin-guide/mm/ksm.rst index b244f0202a03..e42cffa42463 100644 --- a/Documentation/admin-guide/mm/ksm.rst +++ b/Documentation/admin-guide/mm/ksm.rst @@ -32,7 +32,7 @@ are swapped back in: ksmd must rediscover their identity and merge again). Controlling KSM with madvise ============================ -KSM only operates on those areas of address space which an application +KSM can operates on those areas of address space which an application has advised to be likely candidates for merging, by using the madvise(2) system call:: @@ -70,6 +70,24 @@ Applications should be considerate in their use of MADV_MERGEABLE, restricting its use to areas likely to benefit. KSM's scans may use a lot of processing power: some installations will disable KSM for that reason. +Controlling KSM with procfs +=========================== + +KSM can also operate on anonymous areas of address space of those processes's +knob ``/proc/<pid>/ksm_force`` is on, even if app codes doesn't call madvise() +explicitly to advise specific areas as MADV_MERGEABLE. + +You can set ksm_force to 1 to force all anonymous and qualified VMAs of +this process to be involved in KSM scanning. But It is effective only when the +klob of ``/sys/kernel/mm/ksm/run`` is set as 1. + e.g. ``echo 1 > /proc/<pid>/ksm_force`` + +You can also set ksm_force to 0 to cancel that force feature of this process +and unmerge those merged pages which belongs to those VMAs not marked as +MADV_MERGEABLE of this process. But that will leave those pages belonging to +VMAs marked as MADV_MERGEABLE merged. + e.g. ``echo 0 > /proc/<pid>/ksm_force`` + .. _ksm_sysfs: KSM daemon sysfs interface diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst index 061744c436d9..8f959697ae1e 100644 --- a/Documentation/filesystems/proc.rst +++ b/Documentation/filesystems/proc.rst @@ -47,6 +47,7 @@ fixes/update part 1.1 Stefani Seibold <[email protected]> June 9 2009 3.10 /proc/<pid>/timerslack_ns - Task timerslack value 3.11 /proc/<pid>/patch_state - Livepatch patch operation state 3.12 /proc/<pid>/arch_status - Task architecture specific information + 3.13 /proc/<pid>/ksm_force - Setting of mandatory involvement in KSM 4 Configuring procfs 4.1 Mount options @@ -2176,6 +2177,22 @@ AVX512_elapsed_ms the task is unlikely an AVX512 user, but depends on the workload and the scheduling scenario, it also could be a false negative mentioned above. +3.13 /proc/<pid>/ksm_force - Setting of mandatory involvement in KSM +----------------------------------------------------------------------- +When CONFIG_KSM is enabled, this file can be used to specify if this +process's anonymous memory can be involved in KSM scanning without app codes +explicitly calling madvise to mark memory address as MADV_MERGEABLE. + +If writing 1 to this file, the kernel will force all anonymous and qualified +memory to be involved in KSM scanning without explicitly calling madvise to +mark memory address as MADV_MERGEABLE. But that is effective only when the +klob of '/sys/kernel/mm/ksm/run' is set as 1. + +If writing 0 to this file, the mandatory KSM feature of this process's will +be cancelled and unmerge those merged pages which belongs to those areas not +marked as MADV_MERGEABLE of this process, but leave those pages belonging to +areas marked as MADV_MERGEABLE merged. + Chapter 4: Configuring procfs ============================= diff --git a/fs/proc/base.c b/fs/proc/base.c index 8dfa36a99c74..3115ffa4c9fb 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -96,6 +96,7 @@ #include <linux/time_namespace.h> #include <linux/resctrl.h> #include <linux/cn_proc.h> +#include <linux/ksm.h> #include <trace/events/oom.h> #include "internal.h" #include "fd.h" @@ -3168,6 +3169,102 @@ static int proc_pid_ksm_merging_pages(struct seq_file *m, struct pid_namespace * return 0; } + +static ssize_t ksm_force_read(struct file *file, char __user *buf, size_t count, + loff_t *ppos) +{ + struct task_struct *task; + struct mm_struct *mm; + char buffer[PROC_NUMBUF]; + ssize_t len; + int ret; + + task = get_proc_task(file_inode(file)); + if (!task) + return -ESRCH; + + mm = get_task_mm(task); + ret = 0; + if (mm) { + len = snprintf(buffer, sizeof(buffer), "%d\n", mm->ksm_force); + ret = simple_read_from_buffer(buf, count, ppos, buffer, len); + mmput(mm); + } + + return ret; +} + +static ssize_t ksm_force_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct task_struct *task; + struct mm_struct *mm; + char buffer[PROC_NUMBUF]; + int force; + int err = 0; + + memset(buffer, 0, sizeof(buffer)); + if (count > sizeof(buffer) - 1) + count = sizeof(buffer) - 1; + if (copy_from_user(buffer, buf, count)) { + err = -EFAULT; + goto out_return; + } + + err = kstrtoint(strstrip(buffer), 0, &force); + + if (err) + goto out_return; + if (force != 0 && force != 1) { + err = -EINVAL; + goto out_return; + } + + task = get_proc_task(file_inode(file)); + if (!task) { + err = -ESRCH; + goto out_return; + } + + mm = get_task_mm(task); + if (!mm) + goto out_put_task; + + if (mm->ksm_force != force) { + if (mmap_write_lock_killable(mm)) { + err = -EINTR; + goto out_mmput; + } + + if (force == 0) + mm->ksm_force = force; + else { + /* + * Force anonymous pages of this mm to be involved in KSM merging + * without explicitly calling madvise. + */ + if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) + err = __ksm_enter(mm); + if (!err) + mm->ksm_force = force; + } + + mmap_write_unlock(mm); + } + +out_mmput: + mmput(mm); +out_put_task: + put_task_struct(task); +out_return: + return err < 0 ? err : count; +} + +static const struct file_operations proc_pid_ksm_force_operations = { + .read = ksm_force_read, + .write = ksm_force_write, + .llseek = generic_file_llseek, +}; #endif /* CONFIG_KSM */ #ifdef CONFIG_STACKLEAK_METRICS @@ -3303,6 +3400,7 @@ static const struct pid_entry tgid_base_stuff[] = { #endif #ifdef CONFIG_KSM ONE("ksm_merging_pages", S_IRUSR, proc_pid_ksm_merging_pages), + REG("ksm_force", S_IRUSR|S_IWUSR, proc_pid_ksm_force_operations), #endif }; @@ -3639,6 +3737,7 @@ static const struct pid_entry tid_base_stuff[] = { #endif #ifdef CONFIG_KSM ONE("ksm_merging_pages", S_IRUSR, proc_pid_ksm_merging_pages), + REG("ksm_force", S_IRUSR|S_IWUSR, proc_pid_ksm_force_operations), #endif }; diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index b34ff2cdbc4f..1b1592c2f5cf 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -661,6 +661,15 @@ struct mm_struct { * merging. */ unsigned long ksm_merging_pages; + /* + * If true, force anonymous pages of this mm to be involved in KSM + * merging without explicitly calling madvise. It is effctive only + * when the klob of '/sys/kernel/mm/ksm/run' is set as 1. If false, + * cancel the feature of ksm_force of this process and unmerge + * those merged pages which is not madvised as MERGEABLE of this + * process, but leave MERGEABLE areas merged. + */ + bool ksm_force; #endif } __randomize_layout; diff --git a/mm/ksm.c b/mm/ksm.c index 38360285497a..c9f672dcc72e 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -334,6 +334,34 @@ static void __init ksm_slab_free(void) mm_slot_cache = NULL; } +/* Check if vma is qualified for ksmd scanning */ +static bool ksm_vma_check(struct vm_area_struct *vma) +{ + unsigned long vm_flags = vma->vm_flags; + + if (!(vma->vm_flags & VM_MERGEABLE) && !(vma->vm_mm->ksm_force)) + return false; + + if (vm_flags & (VM_SHARED | VM_MAYSHARE | + VM_PFNMAP | VM_IO | VM_DONTEXPAND | + VM_HUGETLB | VM_MIXEDMAP)) + return false; /* just ignore this vma*/ + + if (vma_is_dax(vma)) + return false; + +#ifdef VM_SAO + if (vm_flags & VM_SAO) + return false; +#endif +#ifdef VM_SPARC_ADI + if (vm_flags & VM_SPARC_ADI) + return false; +#endif + + return true; +} + static __always_inline bool is_stable_node_chain(struct stable_node *chain) { return chain->rmap_hlist_len == STABLE_NODE_CHAIN; @@ -523,7 +551,7 @@ static struct vm_area_struct *find_mergeable_vma(struct mm_struct *mm, if (ksm_test_exit(mm)) return NULL; vma = vma_lookup(mm, addr); - if (!vma || !(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma) + if (!vma || !ksm_vma_check(vma) || !vma->anon_vma) return NULL; return vma; } @@ -2297,7 +2325,7 @@ static struct rmap_item *scan_get_next_rmap_item(struct page **page) vma = find_vma(mm, ksm_scan.address); for (; vma; vma = vma->vm_next) { - if (!(vma->vm_flags & VM_MERGEABLE)) + if (!ksm_vma_check(vma)) continue; if (ksm_scan.address < vma->vm_start) ksm_scan.address = vma->vm_start; -- 2.25.1 ^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH v5] mm/ksm: introduce ksm_force for each process 2022-05-08 9:27 ` [PATCH v5] " cgel.zte @ 2022-05-08 18:03 ` Matthew Wilcox 2022-05-09 6:57 ` CGEL 2022-05-10 20:10 ` Ammar Faizi 1 sibling, 1 reply; 13+ messages in thread From: Matthew Wilcox @ 2022-05-08 18:03 UTC (permalink / raw) To: cgel.zte Cc: akpm, keescook, linux-fsdevel, linux-kernel, linux-mm, ran.xiaokai, wang.yong12, xu.xin16, yang.yang29, zhang.yunkai On Sun, May 08, 2022 at 09:27:10AM +0000, [email protected] wrote: > If ksm_force is set to 0, cancel the feature of ksm_force of this > process and unmerge those merged pages belonging to VMAs which is not > madvised as MADV_MERGEABLE of this process, but leave MADV_MERGEABLE > areas merged. Is that actually a useful feature? Otherwise, we could simply turn on/off the existing MMF_VM_MERGEABLE flag instead of introducing this new bool. > +Controlling KSM with procfs > +=========================== > + > +KSM can also operate on anonymous areas of address space of those processes's > +knob ``/proc/<pid>/ksm_force`` is on, even if app codes doesn't call madvise() > +explicitly to advise specific areas as MADV_MERGEABLE. > + > +You can set ksm_force to 1 to force all anonymous and qualified VMAs of > +this process to be involved in KSM scanning. But It is effective only when the > +klob of ``/sys/kernel/mm/ksm/run`` is set as 1. I think that last sentence doesn't really add any value. > + memset(buffer, 0, sizeof(buffer)); > + if (count > sizeof(buffer) - 1) > + count = sizeof(buffer) - 1; > + if (copy_from_user(buffer, buf, count)) { > + err = -EFAULT; > + goto out_return; This feels a bit unnecessary. Just 'return -EFAULT' here. > + } > + > + err = kstrtoint(strstrip(buffer), 0, &force); > + > + if (err) > + goto out_return; 'return err' > + if (force != 0 && force != 1) { > + err = -EINVAL; > + goto out_return; 'return -EINVAL' > + } > + > + task = get_proc_task(file_inode(file)); > + if (!task) { > + err = -ESRCH; > + goto out_return; 'return -ESRCH' ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v5] mm/ksm: introduce ksm_force for each process 2022-05-08 18:03 ` Matthew Wilcox @ 2022-05-09 6:57 ` CGEL 2022-05-09 15:40 ` Matthew Wilcox 0 siblings, 1 reply; 13+ messages in thread From: CGEL @ 2022-05-09 6:57 UTC (permalink / raw) To: Matthew Wilcox Cc: akpm, keescook, linux-fsdevel, linux-kernel, linux-mm, ran.xiaokai, wang.yong12, xu.xin16, yang.yang29, zhang.yunkai On Sun, May 08, 2022 at 07:03:36PM +0100, Matthew Wilcox wrote: > On Sun, May 08, 2022 at 09:27:10AM +0000, [email protected] wrote: > > If ksm_force is set to 0, cancel the feature of ksm_force of this > > process and unmerge those merged pages belonging to VMAs which is not > > madvised as MADV_MERGEABLE of this process, but leave MADV_MERGEABLE > > areas merged. > > Is that actually a useful feature? Otherwise, we could simply turn > on/off the existing MMF_VM_MERGEABLE flag instead of introducing this > new bool. > I think this will be very useful for those apps which are very likely to cause Same Pages in memory and users and operators are not willing to modified the source codes for any reasons. Besides, simply turning of/off the existing MMF_VM_MERGEABLE flag may be not feasible because madvise will also turn on the MMF_VM_MERGEABLE flag. I think the following suggestions is good, and I will resend a patch. > > +Controlling KSM with procfs > > +=========================== > > + > > +KSM can also operate on anonymous areas of address space of those processes's > > +knob ``/proc/<pid>/ksm_force`` is on, even if app codes doesn't call madvise() > > +explicitly to advise specific areas as MADV_MERGEABLE. > > + > > +You can set ksm_force to 1 to force all anonymous and qualified VMAs of > > +this process to be involved in KSM scanning. But It is effective only when the > > +klob of ``/sys/kernel/mm/ksm/run`` is set as 1. > > I think that last sentence doesn't really add any value. > > > + memset(buffer, 0, sizeof(buffer)); > > + if (count > sizeof(buffer) - 1) > > + count = sizeof(buffer) - 1; > > + if (copy_from_user(buffer, buf, count)) { > > + err = -EFAULT; > > + goto out_return; > > This feels a bit unnecessary. Just 'return -EFAULT' here. > > > + } > > + > > + err = kstrtoint(strstrip(buffer), 0, &force); > > + > > + if (err) > > + goto out_return; > > 'return err' > > > + if (force != 0 && force != 1) { > > + err = -EINVAL; > > + goto out_return; > > 'return -EINVAL' > > > + } > > + > > + task = get_proc_task(file_inode(file)); > > + if (!task) { > > + err = -ESRCH; > > + goto out_return; > > 'return -ESRCH' ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v5] mm/ksm: introduce ksm_force for each process 2022-05-09 6:57 ` CGEL @ 2022-05-09 15:40 ` Matthew Wilcox 2022-05-10 2:23 ` CGEL 0 siblings, 1 reply; 13+ messages in thread From: Matthew Wilcox @ 2022-05-09 15:40 UTC (permalink / raw) To: CGEL Cc: akpm, keescook, linux-fsdevel, linux-kernel, linux-mm, ran.xiaokai, wang.yong12, xu.xin16, yang.yang29, zhang.yunkai On Mon, May 09, 2022 at 06:57:33AM +0000, CGEL wrote: > On Sun, May 08, 2022 at 07:03:36PM +0100, Matthew Wilcox wrote: > > On Sun, May 08, 2022 at 09:27:10AM +0000, [email protected] wrote: > > > If ksm_force is set to 0, cancel the feature of ksm_force of this > > > process and unmerge those merged pages belonging to VMAs which is not > > > madvised as MADV_MERGEABLE of this process, but leave MADV_MERGEABLE > > > areas merged. > > > > Is that actually a useful feature? Otherwise, we could simply turn > > on/off the existing MMF_VM_MERGEABLE flag instead of introducing this > > new bool. > > > I think this will be very useful for those apps which are very likely to > cause Same Pages in memory and users and operators are not willing to > modified the source codes for any reasons. No, you misunderstand. Is it useful to have the "force KSM off" functionality? ie code which has been modified to allow KSM, but then overridden by an admin? > Besides, simply turning of/off the existing MMF_VM_MERGEABLE flag may be > not feasible because madvise will also turn on the MMF_VM_MERGEABLE > flag. > > I think the following suggestions is good, and I will resend a patch. > > > +Controlling KSM with procfs > > > +=========================== > > > + > > > +KSM can also operate on anonymous areas of address space of those processes's > > > +knob ``/proc/<pid>/ksm_force`` is on, even if app codes doesn't call madvise() > > > +explicitly to advise specific areas as MADV_MERGEABLE. > > > + > > > +You can set ksm_force to 1 to force all anonymous and qualified VMAs of > > > +this process to be involved in KSM scanning. But It is effective only when the > > > +klob of ``/sys/kernel/mm/ksm/run`` is set as 1. > > > > I think that last sentence doesn't really add any value. > > > > > + memset(buffer, 0, sizeof(buffer)); > > > + if (count > sizeof(buffer) - 1) > > > + count = sizeof(buffer) - 1; > > > + if (copy_from_user(buffer, buf, count)) { > > > + err = -EFAULT; > > > + goto out_return; > > > > This feels a bit unnecessary. Just 'return -EFAULT' here. > > > > > + } > > > + > > > + err = kstrtoint(strstrip(buffer), 0, &force); > > > + > > > + if (err) > > > + goto out_return; > > > > 'return err' > > > > > + if (force != 0 && force != 1) { > > > + err = -EINVAL; > > > + goto out_return; > > > > 'return -EINVAL' > > > > > + } > > > + > > > + task = get_proc_task(file_inode(file)); > > > + if (!task) { > > > + err = -ESRCH; > > > + goto out_return; > > > > 'return -ESRCH' ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v5] mm/ksm: introduce ksm_force for each process 2022-05-09 15:40 ` Matthew Wilcox @ 2022-05-10 2:23 ` CGEL 0 siblings, 0 replies; 13+ messages in thread From: CGEL @ 2022-05-10 2:23 UTC (permalink / raw) To: Matthew Wilcox Cc: akpm, keescook, linux-fsdevel, linux-kernel, linux-mm, ran.xiaokai, wang.yong12, xu.xin16, yang.yang29, zhang.yunkai On Mon, May 09, 2022 at 04:40:50PM +0100, Matthew Wilcox wrote: > On Mon, May 09, 2022 at 06:57:33AM +0000, CGEL wrote: > > On Sun, May 08, 2022 at 07:03:36PM +0100, Matthew Wilcox wrote: > > > On Sun, May 08, 2022 at 09:27:10AM +0000, [email protected] wrote: > > > > If ksm_force is set to 0, cancel the feature of ksm_force of this > > > > process and unmerge those merged pages belonging to VMAs which is not > > > > madvised as MADV_MERGEABLE of this process, but leave MADV_MERGEABLE > > > > areas merged. > > > > > > Is that actually a useful feature? Otherwise, we could simply turn > > > on/off the existing MMF_VM_MERGEABLE flag instead of introducing this > > > new bool. > > > > > I think this will be very useful for those apps which are very likely to > > cause Same Pages in memory and users and operators are not willing to > > modified the source codes for any reasons. > > No, you misunderstand. Is it useful to have the "force KSM off" > functionality? ie code which has been modified to allow KSM, but > then overridden by an admin? > Oh, I see what you mean. It should be mentioned that "force KSM off" is not implemented for the current patch. In this patch, setting ksm_force to 0 just restores the system to the default state (the state before patching) > > Besides, simply turning of/off the existing MMF_VM_MERGEABLE flag may be > > not feasible because madvise will also turn on the MMF_VM_MERGEABLE > > flag. > > > > I think the following suggestions is good, and I will resend a patch. > > > > +Controlling KSM with procfs > > > > +=========================== > > > > + > > > > +KSM can also operate on anonymous areas of address space of those processes's > > > > +knob ``/proc/<pid>/ksm_force`` is on, even if app codes doesn't call madvise() > > > > +explicitly to advise specific areas as MADV_MERGEABLE. > > > > + > > > > +You can set ksm_force to 1 to force all anonymous and qualified VMAs of > > > > +this process to be involved in KSM scanning. But It is effective only when the > > > > +klob of ``/sys/kernel/mm/ksm/run`` is set as 1. > > > > > > I think that last sentence doesn't really add any value. > > > > > > > + memset(buffer, 0, sizeof(buffer)); > > > > + if (count > sizeof(buffer) - 1) > > > > + count = sizeof(buffer) - 1; > > > > + if (copy_from_user(buffer, buf, count)) { > > > > + err = -EFAULT; > > > > + goto out_return; > > > > > > This feels a bit unnecessary. Just 'return -EFAULT' here. > > > > > > > + } > > > > + > > > > + err = kstrtoint(strstrip(buffer), 0, &force); > > > > + > > > > + if (err) > > > > + goto out_return; > > > > > > 'return err' > > > > > > > + if (force != 0 && force != 1) { > > > > + err = -EINVAL; > > > > + goto out_return; > > > > > > 'return -EINVAL' > > > > > > > + } > > > > + > > > > + task = get_proc_task(file_inode(file)); > > > > + if (!task) { > > > > + err = -ESRCH; > > > > + goto out_return; > > > > > > 'return -ESRCH' ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v5] mm/ksm: introduce ksm_force for each process 2022-05-08 9:27 ` [PATCH v5] " cgel.zte 2022-05-08 18:03 ` Matthew Wilcox @ 2022-05-10 20:10 ` Ammar Faizi 2022-05-10 20:30 ` Andrew Morton 1 sibling, 1 reply; 13+ messages in thread From: Ammar Faizi @ 2022-05-10 20:10 UTC (permalink / raw) To: cgel.zte Cc: Andrew Morton, Kees Cook, Matthew Wilcox, Yang Yang, Ran Xiaokai, Yunkai Zhang, xu xin, wangyong, Linux MM Mailing List, Linux fsdevel Mailing List, Linux Kernel Mailing List On 5/8/22 4:27 PM, [email protected] wrote: > +static ssize_t ksm_force_write(struct file *file, const char __user *buf, > + size_t count, loff_t *ppos) > +{ > + struct task_struct *task; > + struct mm_struct *mm; > + char buffer[PROC_NUMBUF]; > + int force; > + int err = 0; > + > + memset(buffer, 0, sizeof(buffer)); > + if (count > sizeof(buffer) - 1) > + count = sizeof(buffer) - 1; > + if (copy_from_user(buffer, buf, count)) { > + err = -EFAULT; > + goto out_return; > + } This one looks like over-zeroing to me. You don't need to zero all elements in the array. You're going to overwrite it with `copy_from_user()` anyway. Just zero the last potentially useful element by using @count as the index. It can be like this: ``` char buffer[PROC_NUMBUF]; if (count > sizeof(buffer) - 1) count = sizeof(buffer) - 1; if (copy_from_user(buffer, buf, count)) return -EFAULT; buffer[count] = '\0'; ``` -- Ammar Faizi ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v5] mm/ksm: introduce ksm_force for each process 2022-05-10 20:10 ` Ammar Faizi @ 2022-05-10 20:30 ` Andrew Morton 2022-05-11 7:58 ` Ammar Faizi 0 siblings, 1 reply; 13+ messages in thread From: Andrew Morton @ 2022-05-10 20:30 UTC (permalink / raw) To: Ammar Faizi Cc: cgel.zte, Kees Cook, Matthew Wilcox, Yang Yang, Ran Xiaokai, Yunkai Zhang, xu xin, wangyong, Linux MM Mailing List, Linux fsdevel Mailing List, Linux Kernel Mailing List On Wed, 11 May 2022 03:10:31 +0700 Ammar Faizi <[email protected]> wrote: > On 5/8/22 4:27 PM, [email protected] wrote: > > +static ssize_t ksm_force_write(struct file *file, const char __user *buf, > > + size_t count, loff_t *ppos) > > +{ > > + struct task_struct *task; > > + struct mm_struct *mm; > > + char buffer[PROC_NUMBUF]; > > + int force; > > + int err = 0; > > + > > + memset(buffer, 0, sizeof(buffer)); > > + if (count > sizeof(buffer) - 1) > > + count = sizeof(buffer) - 1; > > + if (copy_from_user(buffer, buf, count)) { > > + err = -EFAULT; > > + goto out_return; > > + } > > This one looks like over-zeroing to me. You don't need to zero > all elements in the array. You're going to overwrite it with > `copy_from_user()` anyway. > > Just zero the last potentially useful element by using @count > as the index. It can be like this: > > ``` > char buffer[PROC_NUMBUF]; > > if (count > sizeof(buffer) - 1) > count = sizeof(buffer) - 1; > if (copy_from_user(buffer, buf, count)) > return -EFAULT; > buffer[count] = '\0'; > ``` Use strncpy_from_user()? Can this code use proc_dointvec_minmax() or similar? ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v5] mm/ksm: introduce ksm_force for each process 2022-05-10 20:30 ` Andrew Morton @ 2022-05-11 7:58 ` Ammar Faizi 2022-05-12 7:03 ` [PATCH v7] " cgel.zte 0 siblings, 1 reply; 13+ messages in thread From: Ammar Faizi @ 2022-05-11 7:58 UTC (permalink / raw) To: Andrew Morton Cc: cgel.zte, Kees Cook, Matthew Wilcox, Yang Yang, Ran Xiaokai, Yunkai Zhang, xu xin, wangyong, Linux MM Mailing List, Linux fsdevel Mailing List, Linux Kernel Mailing List On 5/11/22 3:30 AM, Andrew Morton wrote: > On Wed, 11 May 2022 03:10:31 +0700 Ammar Faizi <[email protected]> wrote: > >> On 5/8/22 4:27 PM, [email protected] wrote: >>> +static ssize_t ksm_force_write(struct file *file, const char __user *buf, >>> + size_t count, loff_t *ppos) >>> +{ >>> + struct task_struct *task; >>> + struct mm_struct *mm; >>> + char buffer[PROC_NUMBUF]; >>> + int force; >>> + int err = 0; >>> + >>> + memset(buffer, 0, sizeof(buffer)); >>> + if (count > sizeof(buffer) - 1) >>> + count = sizeof(buffer) - 1; >>> + if (copy_from_user(buffer, buf, count)) { >>> + err = -EFAULT; >>> + goto out_return; >>> + } >> >> This one looks like over-zeroing to me. You don't need to zero >> all elements in the array. You're going to overwrite it with >> `copy_from_user()` anyway. >> >> Just zero the last potentially useful element by using @count >> as the index. It can be like this: >> >> ``` >> char buffer[PROC_NUMBUF]; >> >> if (count > sizeof(buffer) - 1) >> count = sizeof(buffer) - 1; >> if (copy_from_user(buffer, buf, count)) >> return -EFAULT; >> buffer[count] = '\0'; >> ``` > > Use strncpy_from_user()? Sounds better. > Can this code use proc_dointvec_minmax() or similar? Not familiar with that API at all. Leaving it to other participants... -- Ammar Faizi ^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH v7] mm/ksm: introduce ksm_force for each process 2022-05-11 7:58 ` Ammar Faizi @ 2022-05-12 7:03 ` cgel.zte 2022-05-12 20:41 ` Andrew Morton 0 siblings, 1 reply; 13+ messages in thread From: cgel.zte @ 2022-05-12 7:03 UTC (permalink / raw) To: ammarfaizi2, akpm Cc: cgel.zte, linux-fsdevel, linux-kernel, linux-mm, ran.xiaokai, wang.yong12, willy, xu.xin16, yang.yang29, zhang.yunkai From: xu xin <[email protected]> To use KSM, we have to explicitly call madvise() in application code, which means installed apps on OS needs to be uninstall and source code needs to be modified. It is inconvenient. In order to change this situation, We add a new proc file ksm_force under /proc/<pid>/ to support turning on/off KSM scanning of a process's mm dynamically. If ksm_force is set to 1, force all anonymous and 'qualified' VMAs of this mm to be involved in KSM scanning without explicitly calling madvise to mark VMA as MADV_MERGEABLE. But It is effective only when the klob of /sys/kernel/mm/ksm/run is set as 1. If ksm_force is set to 0, cancel the feature of ksm_force of this process and unmerge those merged pages belonging to VMAs which is not madvised as MADV_MERGEABLE of this process, but leave MADV_MERGEABLE areas merged. Signed-off-by: xu xin <[email protected]> Reviewed-by: Yang Yang <[email protected]> Reviewed-by: Ran Xiaokai <[email protected]> Reviewed-by: wangyong <[email protected]> Reviewed-by: Yunkai Zhang <[email protected]> Suggested-by: Matthew Wilcox <[email protected]> Suggested-by: Ammar Faizi <[email protected]> --- v7: - remove over-zeroing in ksm_force_write() and using strncpy_from_user instead of copy_from_user. v6: - modify the way of "return" - remove unnecessary words in Documentation/admin-guide/mm/ksm.rst - add additional notes to "set 0 to ksm_force" in Documentation/../ksm.rst and Documentation/../proc.rst v5: - fix typos in Documentation/filesystem/proc.rst v4: - fix typos in commit log - add interface descriptions under Documentation/ v3: - fix compile error of mm/ksm.c v2: - fix a spelling error in commit log. - remove a redundant condition check in ksm_force_write(). --- Documentation/admin-guide/mm/ksm.rst | 19 +++++- Documentation/filesystems/proc.rst | 17 +++++ fs/proc/base.c | 97 ++++++++++++++++++++++++++++ include/linux/mm_types.h | 9 +++ mm/ksm.c | 32 ++++++++- 5 files changed, 171 insertions(+), 3 deletions(-) diff --git a/Documentation/admin-guide/mm/ksm.rst b/Documentation/admin-guide/mm/ksm.rst index b244f0202a03..8cabc2504005 100644 --- a/Documentation/admin-guide/mm/ksm.rst +++ b/Documentation/admin-guide/mm/ksm.rst @@ -32,7 +32,7 @@ are swapped back in: ksmd must rediscover their identity and merge again). Controlling KSM with madvise ============================ -KSM only operates on those areas of address space which an application +KSM can operates on those areas of address space which an application has advised to be likely candidates for merging, by using the madvise(2) system call:: @@ -70,6 +70,23 @@ Applications should be considerate in their use of MADV_MERGEABLE, restricting its use to areas likely to benefit. KSM's scans may use a lot of processing power: some installations will disable KSM for that reason. +Controlling KSM with procfs +=========================== + +KSM can also operate on anonymous areas of address space of those processes's +knob ``/proc/<pid>/ksm_force`` is on, even if app codes doesn't call madvise() +explicitly to advise specific areas as MADV_MERGEABLE. + +You can set ksm_force to 1 to force all anonymous and qualified VMAs of +this process to be involved in KSM scanning. + e.g. ``echo 1 > /proc/<pid>/ksm_force`` + +You can also set ksm_force to 0 to cancel that force feature of this process +and unmerge those merged pages which belongs to those VMAs not marked as +MADV_MERGEABLE of this process. But that still leave those pages belonging to +VMAs marked as MADV_MERGEABLE merged (fallback to the default state). + e.g. ``echo 0 > /proc/<pid>/ksm_force`` + .. _ksm_sysfs: KSM daemon sysfs interface diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst index 061744c436d9..8890b8b457a4 100644 --- a/Documentation/filesystems/proc.rst +++ b/Documentation/filesystems/proc.rst @@ -47,6 +47,7 @@ fixes/update part 1.1 Stefani Seibold <[email protected]> June 9 2009 3.10 /proc/<pid>/timerslack_ns - Task timerslack value 3.11 /proc/<pid>/patch_state - Livepatch patch operation state 3.12 /proc/<pid>/arch_status - Task architecture specific information + 3.13 /proc/<pid>/ksm_force - Setting of mandatory involvement in KSM 4 Configuring procfs 4.1 Mount options @@ -2176,6 +2177,22 @@ AVX512_elapsed_ms the task is unlikely an AVX512 user, but depends on the workload and the scheduling scenario, it also could be a false negative mentioned above. +3.13 /proc/<pid>/ksm_force - Setting of mandatory involvement in KSM +----------------------------------------------------------------------- +When CONFIG_KSM is enabled, this file can be used to specify if this +process's anonymous memory can be involved in KSM scanning without app codes +explicitly calling madvise to mark memory address as MADV_MERGEABLE. + +If writing 1 to this file, the kernel will force all anonymous and qualified +memory to be involved in KSM scanning without explicitly calling madvise to +mark memory address as MADV_MERGEABLE. But that is effective only when the +klob of '/sys/kernel/mm/ksm/run' is set as 1. + +If writing 0 to this file, the mandatory KSM feature of this process's will +be cancelled and unmerge those merged pages which belongs to those areas not +marked as MADV_MERGEABLE of this process, but leave those pages belonging to +areas marked as MADV_MERGEABLE merged (fallback to the default state). + Chapter 4: Configuring procfs ============================= diff --git a/fs/proc/base.c b/fs/proc/base.c index 8dfa36a99c74..99ab0e8cdcbc 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -96,6 +96,7 @@ #include <linux/time_namespace.h> #include <linux/resctrl.h> #include <linux/cn_proc.h> +#include <linux/ksm.h> #include <trace/events/oom.h> #include "internal.h" #include "fd.h" @@ -3168,6 +3169,100 @@ static int proc_pid_ksm_merging_pages(struct seq_file *m, struct pid_namespace * return 0; } + +static ssize_t ksm_force_read(struct file *file, char __user *buf, size_t count, + loff_t *ppos) +{ + struct task_struct *task; + struct mm_struct *mm; + char buffer[PROC_NUMBUF]; + ssize_t len; + int ret; + + task = get_proc_task(file_inode(file)); + if (!task) + return -ESRCH; + + mm = get_task_mm(task); + ret = 0; + if (mm) { + len = snprintf(buffer, sizeof(buffer), "%d\n", mm->ksm_force); + ret = simple_read_from_buffer(buf, count, ppos, buffer, len); + mmput(mm); + } + + return ret; +} + +static ssize_t ksm_force_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct task_struct *task; + struct mm_struct *mm; + char buffer[PROC_NUMBUF]; + int force; + int err = 0; + int str_len; + + if (count > sizeof(buffer) - 1) { + count = sizeof(buffer) - 1; + } + + str_len = strncpy_from_user(buffer, buf, count); + if (str_len < 0) + return -EFAULT; + buffer[str_len] = '\0'; + + err = kstrtoint(strstrip(buffer), 0, &force); + if (err) + return err; + + if (force != 0 && force != 1) + return -EINVAL; + + task = get_proc_task(file_inode(file)); + if (!task) + return -ESRCH; + + mm = get_task_mm(task); + if (!mm) + goto out_put_task; + + if (mm->ksm_force != force) { + if (mmap_write_lock_killable(mm)) { + err = -EINTR; + goto out_mmput; + } + + if (force == 0) + mm->ksm_force = force; + else { + /* + * Force anonymous pages of this mm to be involved in KSM merging + * without explicitly calling madvise. + */ + if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) + err = __ksm_enter(mm); + if (!err) + mm->ksm_force = force; + } + + mmap_write_unlock(mm); + } + +out_mmput: + mmput(mm); +out_put_task: + put_task_struct(task); + + return err < 0 ? err : count; +} + +static const struct file_operations proc_pid_ksm_force_operations = { + .read = ksm_force_read, + .write = ksm_force_write, + .llseek = generic_file_llseek, +}; #endif /* CONFIG_KSM */ #ifdef CONFIG_STACKLEAK_METRICS @@ -3303,6 +3398,7 @@ static const struct pid_entry tgid_base_stuff[] = { #endif #ifdef CONFIG_KSM ONE("ksm_merging_pages", S_IRUSR, proc_pid_ksm_merging_pages), + REG("ksm_force", S_IRUSR|S_IWUSR, proc_pid_ksm_force_operations), #endif }; @@ -3639,6 +3735,7 @@ static const struct pid_entry tid_base_stuff[] = { #endif #ifdef CONFIG_KSM ONE("ksm_merging_pages", S_IRUSR, proc_pid_ksm_merging_pages), + REG("ksm_force", S_IRUSR|S_IWUSR, proc_pid_ksm_force_operations), #endif }; diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index b34ff2cdbc4f..1b1592c2f5cf 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -661,6 +661,15 @@ struct mm_struct { * merging. */ unsigned long ksm_merging_pages; + /* + * If true, force anonymous pages of this mm to be involved in KSM + * merging without explicitly calling madvise. It is effctive only + * when the klob of '/sys/kernel/mm/ksm/run' is set as 1. If false, + * cancel the feature of ksm_force of this process and unmerge + * those merged pages which is not madvised as MERGEABLE of this + * process, but leave MERGEABLE areas merged. + */ + bool ksm_force; #endif } __randomize_layout; diff --git a/mm/ksm.c b/mm/ksm.c index 38360285497a..c9f672dcc72e 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -334,6 +334,34 @@ static void __init ksm_slab_free(void) mm_slot_cache = NULL; } +/* Check if vma is qualified for ksmd scanning */ +static bool ksm_vma_check(struct vm_area_struct *vma) +{ + unsigned long vm_flags = vma->vm_flags; + + if (!(vma->vm_flags & VM_MERGEABLE) && !(vma->vm_mm->ksm_force)) + return false; + + if (vm_flags & (VM_SHARED | VM_MAYSHARE | + VM_PFNMAP | VM_IO | VM_DONTEXPAND | + VM_HUGETLB | VM_MIXEDMAP)) + return false; /* just ignore this vma*/ + + if (vma_is_dax(vma)) + return false; + +#ifdef VM_SAO + if (vm_flags & VM_SAO) + return false; +#endif +#ifdef VM_SPARC_ADI + if (vm_flags & VM_SPARC_ADI) + return false; +#endif + + return true; +} + static __always_inline bool is_stable_node_chain(struct stable_node *chain) { return chain->rmap_hlist_len == STABLE_NODE_CHAIN; @@ -523,7 +551,7 @@ static struct vm_area_struct *find_mergeable_vma(struct mm_struct *mm, if (ksm_test_exit(mm)) return NULL; vma = vma_lookup(mm, addr); - if (!vma || !(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma) + if (!vma || !ksm_vma_check(vma) || !vma->anon_vma) return NULL; return vma; } @@ -2297,7 +2325,7 @@ static struct rmap_item *scan_get_next_rmap_item(struct page **page) vma = find_vma(mm, ksm_scan.address); for (; vma; vma = vma->vm_next) { - if (!(vma->vm_flags & VM_MERGEABLE)) + if (!ksm_vma_check(vma)) continue; if (ksm_scan.address < vma->vm_start) ksm_scan.address = vma->vm_start; -- 2.25.1 ^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH v7] mm/ksm: introduce ksm_force for each process 2022-05-12 7:03 ` [PATCH v7] " cgel.zte @ 2022-05-12 20:41 ` Andrew Morton 0 siblings, 0 replies; 13+ messages in thread From: Andrew Morton @ 2022-05-12 20:41 UTC (permalink / raw) To: cgel.zte Cc: ammarfaizi2, linux-fsdevel, linux-kernel, linux-mm, ran.xiaokai, wang.yong12, willy, xu.xin16, yang.yang29, zhang.yunkai On Thu, 12 May 2022 07:03:47 +0000 [email protected] wrote: > From: xu xin <[email protected]> > > To use KSM, we have to explicitly call madvise() in application code, > which means installed apps on OS needs to be uninstall and source code > needs to be modified. It is inconvenient. > > In order to change this situation, We add a new proc file ksm_force > under /proc/<pid>/ to support turning on/off KSM scanning of a > process's mm dynamically. > > If ksm_force is set to 1, force all anonymous and 'qualified' VMAs > of this mm to be involved in KSM scanning without explicitly calling > madvise to mark VMA as MADV_MERGEABLE. But It is effective only when > the klob of /sys/kernel/mm/ksm/run is set as 1. > > If ksm_force is set to 0, cancel the feature of ksm_force of this > process and unmerge those merged pages belonging to VMAs which is not > madvised as MADV_MERGEABLE of this process, but leave MADV_MERGEABLE > areas merged. It certainly seems like a useful feature. > Signed-off-by: xu xin <[email protected]> > Reviewed-by: Yang Yang <[email protected]> > Reviewed-by: Ran Xiaokai <[email protected]> > Reviewed-by: wangyong <[email protected]> > Reviewed-by: Yunkai Zhang <[email protected]> > Suggested-by: Matthew Wilcox <[email protected]> > Suggested-by: Ammar Faizi <[email protected]> This patch doesn't have your Signed-off-by:. It should, because you were on the delivery path. This is described in Documentation/process/submitting-patches.rst, "Developer's Certificate of Origin". I'll queue it for some testing but please do resend with that tag. > +/* Check if vma is qualified for ksmd scanning */ > +static bool ksm_vma_check(struct vm_area_struct *vma) I have trouble with "check" names, because the name doesn't convey what is being checked, nor does the name convey whether it's checking for truth or for falsity. I suggest that "vma_scannable" is a more informative name. It doesn't need the "ksm_" prefix as this is a static file-local function. See, with the name "vma_scannable", that comment which you added is barely needed. --- a/mm/ksm.c~mm-ksm-introduce-ksm_force-for-each-process-fix +++ a/mm/ksm.c @@ -335,7 +335,7 @@ static void __init ksm_slab_free(void) } /* Check if vma is qualified for ksmd scanning */ -static bool ksm_vma_check(struct vm_area_struct *vma) +static bool vma_scannable(struct vm_area_struct *vma) { unsigned long vm_flags = vma->vm_flags; @@ -551,7 +551,7 @@ static struct vm_area_struct *find_merge if (ksm_test_exit(mm)) return NULL; vma = vma_lookup(mm, addr); - if (!vma || !ksm_vma_check(vma) || !vma->anon_vma) + if (!vma || !vma_scannable(vma) || !vma->anon_vma) return NULL; return vma; } @@ -2328,7 +2328,7 @@ next_mm: goto no_vmas; for_each_vma(vmi, vma) { - if (!ksm_vma_check(vma)) + if (!vma_scannable(vma)) continue; if (ksm_scan.address < vma->vm_start) ksm_scan.address = vma->vm_start; _ ^ permalink raw reply [flat|nested] 13+ messages in thread
end of thread, other threads:[~2022-05-12 20:41 UTC | newest] Thread overview: 13+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2022-05-07 5:47 [PATCH v3] mm/ksm: introduce ksm_force for each process cgel.zte 2022-05-07 17:59 ` Andrew Morton 2022-05-08 9:14 ` [PATCH v4] " cgel.zte 2022-05-08 9:27 ` [PATCH v5] " cgel.zte 2022-05-08 18:03 ` Matthew Wilcox 2022-05-09 6:57 ` CGEL 2022-05-09 15:40 ` Matthew Wilcox 2022-05-10 2:23 ` CGEL 2022-05-10 20:10 ` Ammar Faizi 2022-05-10 20:30 ` Andrew Morton 2022-05-11 7:58 ` Ammar Faizi 2022-05-12 7:03 ` [PATCH v7] " cgel.zte 2022-05-12 20:41 ` Andrew Morton
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox