[v2,1/2] exec: add PR_HIDE_SELF_EXE prctl
Commit Message
This patch adds a new prctl called PR_HIDE_SELF_EXE which allows
processes to hide their own /proc/*/exe file. When this prctl is
used, every access to /proc/*/exe for the calling process will
fail with ENOENT.
This is useful for preventing issues like CVE-2019-5736, where an
attacker can gain host root access by overwriting the binary
in OCI runtimes through file-descriptor mishandling in containers.
The current fix for CVE-2019-5736 is to create a read-only copy or
a bind-mount of the current executable, and then re-exec the current
process. With the new prctl, the read-only copy or bind-mount copy is
not needed anymore.
While map_files/ also might contain symlinks to files in host,
proc_map_files_get_link() permissions checks are already sufficient.
Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
---
fs/exec.c | 1 +
fs/proc/base.c | 8 +++++---
include/linux/sched.h | 5 +++++
include/uapi/linux/prctl.h | 3 +++
kernel/sys.c | 9 +++++++++
tools/include/uapi/linux/prctl.h | 3 +++
6 files changed, 26 insertions(+), 3 deletions(-)
v1: https://lkml.org/lkml/2023/1/4/334
Differences from v1:
- amended more information in the commit message wrt map_files not
requiring the same protection.
- changed the test to verify PR_HIDE_SELF_EXE cannot be unset after
a fork.
Comments
On Thu, Jan 19, 2023 at 06:07:17PM +0100, Giuseppe Scrivano wrote:
> This patch adds a new prctl called PR_HIDE_SELF_EXE which allows
> processes to hide their own /proc/*/exe file. When this prctl is
> used, every access to /proc/*/exe for the calling process will
> fail with ENOENT.
>
> This is useful for preventing issues like CVE-2019-5736, where an
> attacker can gain host root access by overwriting the binary
> in OCI runtimes through file-descriptor mishandling in containers.
>
> The current fix for CVE-2019-5736 is to create a read-only copy or
> a bind-mount of the current executable, and then re-exec the current
> process. With the new prctl, the read-only copy or bind-mount copy is
> not needed anymore.
>
> While map_files/ also might contain symlinks to files in host,
> proc_map_files_get_link() permissions checks are already sufficient.
>
> Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
> ---
> fs/exec.c | 1 +
> fs/proc/base.c | 8 +++++---
> include/linux/sched.h | 5 +++++
> include/uapi/linux/prctl.h | 3 +++
> kernel/sys.c | 9 +++++++++
> tools/include/uapi/linux/prctl.h | 3 +++
> 6 files changed, 26 insertions(+), 3 deletions(-)
>
> v1: https://lkml.org/lkml/2023/1/4/334
>
> Differences from v1:
>
> - amended more information in the commit message wrt map_files not
> requiring the same protection.
> - changed the test to verify PR_HIDE_SELF_EXE cannot be unset after
> a fork.
>
> diff --git a/fs/exec.c b/fs/exec.c
> index ab913243a367..5a5dd964c3a3 100644
> --- a/fs/exec.c
> +++ b/fs/exec.c
> @@ -1855,6 +1855,7 @@ static int bprm_execve(struct linux_binprm *bprm,
> /* execve succeeded */
> current->fs->in_exec = 0;
> current->in_execve = 0;
> + task_clear_hide_self_exe(current);
[snip]
> rseq_execve(current);
> acct_update_integrals(current);
> task_numa_free(current, false);
> diff --git a/fs/proc/base.c b/fs/proc/base.c
> index 9e479d7d202b..959968e2da0d 100644
> --- a/fs/proc/base.c
> +++ b/fs/proc/base.c
> @@ -1723,19 +1723,21 @@ static int proc_exe_link(struct dentry *dentry, struct path *exe_path)
> {
> struct task_struct *task;
> struct file *exe_file;
> + long hide_self_exe;
>
> task = get_proc_task(d_inode(dentry));
> if (!task)
> return -ENOENT;
> exe_file = get_task_exe_file(task);
> + hide_self_exe = task_hide_self_exe(task);
Perhaps I am missing something, but where is task_clear_hide_self_exe()
and task_hide_self_exe() defined?
Brian
Brian Masney <bmasney@redhat.com> writes:
> On Thu, Jan 19, 2023 at 06:07:17PM +0100, Giuseppe Scrivano wrote:
>> This patch adds a new prctl called PR_HIDE_SELF_EXE which allows
>> processes to hide their own /proc/*/exe file. When this prctl is
>> used, every access to /proc/*/exe for the calling process will
>> fail with ENOENT.
>>
>> This is useful for preventing issues like CVE-2019-5736, where an
>> attacker can gain host root access by overwriting the binary
>> in OCI runtimes through file-descriptor mishandling in containers.
>>
>> The current fix for CVE-2019-5736 is to create a read-only copy or
>> a bind-mount of the current executable, and then re-exec the current
>> process. With the new prctl, the read-only copy or bind-mount copy is
>> not needed anymore.
>>
>> While map_files/ also might contain symlinks to files in host,
>> proc_map_files_get_link() permissions checks are already sufficient.
>>
>> Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
>> ---
>> fs/exec.c | 1 +
>> fs/proc/base.c | 8 +++++---
>> include/linux/sched.h | 5 +++++
>> include/uapi/linux/prctl.h | 3 +++
>> kernel/sys.c | 9 +++++++++
>> tools/include/uapi/linux/prctl.h | 3 +++
>> 6 files changed, 26 insertions(+), 3 deletions(-)
>>
>> v1: https://lkml.org/lkml/2023/1/4/334
>>
>> Differences from v1:
>>
>> - amended more information in the commit message wrt map_files not
>> requiring the same protection.
>> - changed the test to verify PR_HIDE_SELF_EXE cannot be unset after
>> a fork.
>>
>> diff --git a/fs/exec.c b/fs/exec.c
>> index ab913243a367..5a5dd964c3a3 100644
>> --- a/fs/exec.c
>> +++ b/fs/exec.c
>> @@ -1855,6 +1855,7 @@ static int bprm_execve(struct linux_binprm *bprm,
>> /* execve succeeded */
>> current->fs->in_exec = 0;
>> current->in_execve = 0;
>> + task_clear_hide_self_exe(current);
>
> [snip]
>
>> rseq_execve(current);
>> acct_update_integrals(current);
>> task_numa_free(current, false);
>> diff --git a/fs/proc/base.c b/fs/proc/base.c
>> index 9e479d7d202b..959968e2da0d 100644
>> --- a/fs/proc/base.c
>> +++ b/fs/proc/base.c
>> @@ -1723,19 +1723,21 @@ static int proc_exe_link(struct dentry *dentry, struct path *exe_path)
>> {
>> struct task_struct *task;
>> struct file *exe_file;
>> + long hide_self_exe;
>>
>> task = get_proc_task(d_inode(dentry));
>> if (!task)
>> return -ENOENT;
>> exe_file = get_task_exe_file(task);
>> + hide_self_exe = task_hide_self_exe(task);
>
> Perhaps I am missing something, but where is task_clear_hide_self_exe()
> and task_hide_self_exe() defined?
they are defined with:
TASK_PFA_TEST(HIDE_SELF_EXE, hide_self_exe)
TASK_PFA_SET(HIDE_SELF_EXE, hide_self_exe)
TASK_PFA_CLEAR(HIDE_SELF_EXE, hide_self_exe)
Regards,
Giuseppe
@@ -1855,6 +1855,7 @@ static int bprm_execve(struct linux_binprm *bprm,
/* execve succeeded */
current->fs->in_exec = 0;
current->in_execve = 0;
+ task_clear_hide_self_exe(current);
rseq_execve(current);
acct_update_integrals(current);
task_numa_free(current, false);
@@ -1723,19 +1723,21 @@ static int proc_exe_link(struct dentry *dentry, struct path *exe_path)
{
struct task_struct *task;
struct file *exe_file;
+ long hide_self_exe;
task = get_proc_task(d_inode(dentry));
if (!task)
return -ENOENT;
exe_file = get_task_exe_file(task);
+ hide_self_exe = task_hide_self_exe(task);
put_task_struct(task);
- if (exe_file) {
+ if (exe_file && !hide_self_exe) {
*exe_path = exe_file->f_path;
path_get(&exe_file->f_path);
fput(exe_file);
return 0;
- } else
- return -ENOENT;
+ }
+ return -ENOENT;
}
static const char *proc_pid_get_link(struct dentry *dentry,
@@ -1790,6 +1790,7 @@ static __always_inline bool is_percpu_thread(void)
#define PFA_SPEC_IB_DISABLE 5 /* Indirect branch speculation restricted */
#define PFA_SPEC_IB_FORCE_DISABLE 6 /* Indirect branch speculation permanently restricted */
#define PFA_SPEC_SSB_NOEXEC 7 /* Speculative Store Bypass clear on execve() */
+#define PFA_HIDE_SELF_EXE 8 /* Hide /proc/self/exe for the process */
#define TASK_PFA_TEST(name, func) \
static inline bool task_##func(struct task_struct *p) \
@@ -1832,6 +1833,10 @@ TASK_PFA_CLEAR(SPEC_IB_DISABLE, spec_ib_disable)
TASK_PFA_TEST(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable)
TASK_PFA_SET(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable)
+TASK_PFA_TEST(HIDE_SELF_EXE, hide_self_exe)
+TASK_PFA_SET(HIDE_SELF_EXE, hide_self_exe)
+TASK_PFA_CLEAR(HIDE_SELF_EXE, hide_self_exe)
+
static inline void
current_restore_flags(unsigned long orig_flags, unsigned long flags)
{
@@ -284,4 +284,7 @@ struct prctl_mm_map {
#define PR_SET_VMA 0x53564d41
# define PR_SET_VMA_ANON_NAME 0
+#define PR_SET_HIDE_SELF_EXE 65
+#define PR_GET_HIDE_SELF_EXE 66
+
#endif /* _LINUX_PRCTL_H */
@@ -2626,6 +2626,15 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
case PR_SET_VMA:
error = prctl_set_vma(arg2, arg3, arg4, arg5);
break;
+ case PR_SET_HIDE_SELF_EXE:
+ if (arg2 != 1 || arg3 || arg4 || arg5)
+ return -EINVAL;
+ task_set_hide_self_exe(current);
+ break;
+ case PR_GET_HIDE_SELF_EXE:
+ if (arg2 || arg3 || arg4 || arg5)
+ return -EINVAL;
+ return task_hide_self_exe(current) ? 1 : 0;
default:
error = -EINVAL;
break;
@@ -284,4 +284,7 @@ struct prctl_mm_map {
#define PR_SET_VMA 0x53564d41
# define PR_SET_VMA_ANON_NAME 0
+#define PR_SET_HIDE_SELF_EXE 65
+#define PR_GET_HIDE_SELF_EXE 66
+
#endif /* _LINUX_PRCTL_H */