[v2] prctl: Get private anonymous memory region name
Commit Message
From: Rong Tao <rongtao@cestc.cn>
In commit 9a10064f5625 ("mm: add a field to store names for private anony-
mous memory") add PR_SET_VMA options and PR_SET_VMA_ANON_NAME for the prctl
system call, then the PR_GET_VMA interface should be provided accordingly,
which is necessary, as the userspace program usually wants to know what
VMA name it has configured for the anonymous page.
Userspace can set the name for a region of memory by calling:
prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, start, len, (unsigned long)name);
Then, Userspace can get the name of a memory region by calling:
char buf[80];
prctl(PR_GET_VMA, PR_GET_VMA_ANON_NAME, start, buf, 0);
Changes for prctl(2) manual page (in the options section):
PR_GET_VMA
Gets an attribute specified in arg2 for virtual memory areas
starting from the address specified in arg3 and spanning the
size specified in arg4. arg5 specifies the value of the attribute
to be set.
Currently, arg2 must be one of:
PR_GET_VMA_ANON_NAME
Get name of anonymous virtual memory areas. arg4 should be
a buffer in the user's program, and the size of the buffer
should not be less than 80 bytes, otherwise it is possible
that the prctl return will fail due to a copy failure
(unless you know the length of the name you set through
the PR_SET_VMA_ANON_NAME).
This feature is available only if the kernel is built with
the CONFIG_ANON_VMA_NAME option enabled.
Signed-off-by: Rong Tao <rongtao@cestc.cn>
---
v2: Simplify code implementation.
v1: https://lore.kernel.org/all/tencent_977CBF8E8CA6234A1B740A35655D5D7EAA0A@qq.com/
---
include/linux/mm.h | 7 +++++++
include/uapi/linux/prctl.h | 3 +++
kernel/sys.c | 39 ++++++++++++++++++++++++++++++++++++++
mm/madvise.c | 15 +++++++++++++++
4 files changed, 64 insertions(+)
Comments
On 11/26, Rong Tao wrote:
>
> then the PR_GET_VMA interface should be provided accordingly,
> which is necessary, as the userspace program usually wants to know what
> VMA name it has configured for the anonymous page.
I don't really understand the use-case for PR_GET_VMA ...
But the patch looks reasonable and correct to me.
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
On 26.11.23 14:56, Oleg Nesterov wrote:
> On 11/26, Rong Tao wrote:
>>
>> then the PR_GET_VMA interface should be provided accordingly,
>> which is necessary, as the userspace program usually wants to know what
>> VMA name it has configured for the anonymous page.
>
> I don't really understand the use-case for PR_GET_VMA ...
>
Can't we simply read "/proc/PID/maps" and just have that information
from there?
Also, I don't understand the exact use case, that should be clarified --
especially, why the existing way is insufficient.
On 11/27/23 22:52, David Hildenbrand wrote:
> On 26.11.23 14:56, Oleg Nesterov wrote:
>> On 11/26, Rong Tao wrote:
>>>
>>> then the PR_GET_VMA interface should be provided accordingly,
>>> which is necessary, as the userspace program usually wants to know what
>>> VMA name it has configured for the anonymous page.
>>
>> I don't really understand the use-case for PR_GET_VMA ...
>>
>
> Can't we simply read "/proc/PID/maps" and just have that information
> from there?
Thank you, David.
The relationship between PR_GET_VMA and /proc/PID/maps is like the
relationship between
PR_GET_NAME and /proc/PID/comm. Obviously, both methods can obtain the
corresponding
name. However, prctl(2) can be obtained directly from the code level,
while reading proc is not
so convenient and efficient. Moreover, reading proc is more like bash
command line, rather
than C code.
>
> Also, I don't understand the exact use case, that should be clarified
> -- especially, why the existing way is insufficient.
>
For use-case, in fact, I now want to develop a user-mode patch tool and
need to map the patch file to the target
process(Use ptrace(2) and pread/pwrite("/proc/self/mem")). I initially
used shared files
00400000-00401000 r--p 00000000 08:00 241933181
/home/sda/git-repos/upatch/tests/hello/hello
00401000-00402000 r-xp 00001000 08:00 241933181
/home/sda/git-repos/upatch/tests/hello/hello
00402000-00403000 r--p 00002000 08:00 241933181
/home/sda/git-repos/upatch/tests/hello/hello
00403000-00404000 r--p 00002000 08:00 241933181
/home/sda/git-repos/upatch/tests/hello/hello
00404000-00405000 rw-p 00003000 08:00 241933181
/home/sda/git-repos/upatch/tests/hello/hello
01136000-01157000 rw-p 00000000 00:00 0 [heap]
7f21472c0000-7f21472c2000 rw-p 00000000 00:00 0
7f21472c2000-7f21472e8000 r--p 00000000 103:03 3705
/usr/lib64/libc.so.6
7f21472e8000-7f2147448000 r-xp 00026000 103:03 3705
/usr/lib64/libc.so.6
7f2147448000-7f2147496000 r--p 00186000 103:03 3705
/usr/lib64/libc.so.6
7f2147496000-7f214749a000 r--p 001d3000 103:03 3705
/usr/lib64/libc.so.6
7f214749a000-7f214749c000 rw-p 001d7000 103:03 3705
/usr/lib64/libc.so.6
7f214749c000-7f21474a6000 rw-p 00000000 00:00 0
7f21474be000-7f21474bf000 rwxs 00000000 00:27 7794
/tmp/upatch/62984/map_files/patch-FKSYTp <<
7f21474bf000-7f21474c0000 rwxs 00000000 00:27 7793
/tmp/upatch/62984/map_files/patch-KFaQNU <<
7f21474c0000-7f21474c1000 r--p 00000000 103:03 3702
/usr/lib64/ld-linux-x86-64.so.2
7f21474c1000-7f21474e8000 r-xp 00001000 103:03 3702
/usr/lib64/ld-linux-x86-64.so.2
7f21474e8000-7f21474f2000 r--p 00028000 103:03 3702
/usr/lib64/ld-linux-x86-64.so.2
7f21474f2000-7f21474f4000 r--p 00031000 103:03 3702
/usr/lib64/ld-linux-x86-64.so.2
7f21474f4000-7f21474f6000 rw-p 00033000 103:03 3702
/usr/lib64/ld-linux-x86-64.so.2
7ffec158b000-7ffec15ad000 rw-p 00000000 00:00 0 [stack]
7ffec15cf000-7ffec15d3000 r--p 00000000 00:00 0 [vvar]
7ffec15d3000-7ffec15d5000 r-xp 00000000 00:00 0 [vdso]
ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]
However, this is obviously not the best approach, I want to use
anonymous pages instead,
7f21474be000-7f21474bf000 rwxp 00000000 00:27 7794 [anon:patch1]
7f21474bf000-7f21474c0000 rwxp 00000000 00:27 7793 [anon:patch2]
I hope to use the address to directly obtain the vma name, which is
"patch1". This is very convenient
in the program without parsing /proc/PID/maps in the source code.
Thanks again.
Rong Tao
@@ -4106,6 +4106,8 @@ static inline int seal_check_write(int seals, struct vm_area_struct *vma)
}
#ifdef CONFIG_ANON_VMA_NAME
+struct anon_vma_name *madvise_get_anon_name(struct mm_struct *mm,
+ unsigned long start);
int madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
unsigned long len_in,
struct anon_vma_name *anon_name);
@@ -4115,6 +4117,11 @@ madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
unsigned long len_in, struct anon_vma_name *anon_name) {
return 0;
}
+static inline
+struct anon_vma_name *madvise_get_anon_name(struct mm_struct *mm,
+ unsigned long start) {
+ return NULL;
+}
#endif
#ifdef CONFIG_UNACCEPTED_MEMORY
@@ -291,6 +291,9 @@ struct prctl_mm_map {
#define PR_SET_VMA 0x53564d41
# define PR_SET_VMA_ANON_NAME 0
+#define PR_GET_VMA 0x53564d42
+# define PR_GET_VMA_ANON_NAME 0
+
#define PR_GET_AUXV 0x41555856
#define PR_SET_MEMORY_MERGE 67
@@ -2359,12 +2359,48 @@ static int prctl_set_vma(unsigned long opt, unsigned long addr,
return error;
}
+static int prctl_get_vma(unsigned long opt, unsigned long addr,
+ unsigned long buf, unsigned long arg)
+{
+ struct mm_struct *mm = current->mm;
+ char __user *u_buf;
+ int error;
+
+ switch (opt) {
+ case PR_GET_VMA_ANON_NAME:
+ struct anon_vma_name *anon_name = NULL;
+
+ u_buf = (char __user *)buf;
+ error = 0;
+
+ mmap_read_lock(mm);
+ anon_name = madvise_get_anon_name(mm, addr);
+
+ if (!anon_name || copy_to_user(u_buf, anon_name->name,
+ strlen(anon_name->name) + 1))
+ error = -EFAULT;
+
+ mmap_read_unlock(mm);
+ anon_vma_name_put(anon_name);
+ break;
+ default:
+ error = -EINVAL;
+ }
+ return error;
+}
+
#else /* CONFIG_ANON_VMA_NAME */
static int prctl_set_vma(unsigned long opt, unsigned long start,
unsigned long size, unsigned long arg)
{
return -EINVAL;
}
+
+static int prctl_get_vma(unsigned long opt, unsigned long start,
+ unsigned long u_buf, unsigned long arg)
+{
+ return -EINVAL;
+}
#endif /* CONFIG_ANON_VMA_NAME */
static inline unsigned long get_current_mdwe(void)
@@ -2712,6 +2748,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
case PR_SET_VMA:
error = prctl_set_vma(arg2, arg3, arg4, arg5);
break;
+ case PR_GET_VMA:
+ error = prctl_get_vma(arg2, arg3, arg4, arg5);
+ break;
case PR_GET_AUXV:
if (arg4 || arg5)
return -EINVAL;
@@ -1287,6 +1287,21 @@ static int madvise_vma_anon_name(struct vm_area_struct *vma,
return error;
}
+struct anon_vma_name *madvise_get_anon_name(struct mm_struct *mm,
+ unsigned long start)
+{
+ struct vm_area_struct *vma;
+ struct anon_vma_name *anon_name = NULL;
+
+ vma = find_vma(mm, start);
+ if (vma) {
+ anon_name = anon_vma_name(vma);
+ anon_vma_name_get(anon_name);
+ }
+
+ return anon_name;
+}
+
int madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
unsigned long len_in, struct anon_vma_name *anon_name)
{