[v2,2/2] pidfd: change pidfd_send_signal() to respect PIDFD_THREAD

Message ID 20240209130650.GA8048@redhat.com
State New
Headers
Series [v2,1/2] signal: add the "int si_code" arg to prepare_kill_siginfo() |

Commit Message

Oleg Nesterov Feb. 9, 2024, 1:06 p.m. UTC
  Turn kill_pid_info() into kill_pid_info_type(), this allows to pass any
pid_type to group_send_sig_info(), despite its name it should work fine
even if type = PIDTYPE_PID.

Change pidfd_send_signal() to use PIDTYPE_PID or PIDTYPE_TGID depending
on PIDFD_THREAD.

While at it kill another TODO comment in pidfd_show_fdinfo(). As Christian
expains fdinfo reports f_flags, userspace can already detect PIDFD_THREAD.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 kernel/fork.c   |  2 --
 kernel/signal.c | 38 ++++++++++++++++++++++----------------
 2 files changed, 22 insertions(+), 18 deletions(-)
  

Comments

Christian Brauner Feb. 9, 2024, 3:11 p.m. UTC | #1
On Fri, Feb 09, 2024 at 02:06:50PM +0100, Oleg Nesterov wrote:
> Turn kill_pid_info() into kill_pid_info_type(), this allows to pass any
> pid_type to group_send_sig_info(), despite its name it should work fine
> even if type = PIDTYPE_PID.
> 
> Change pidfd_send_signal() to use PIDTYPE_PID or PIDTYPE_TGID depending
> on PIDFD_THREAD.
> 
> While at it kill another TODO comment in pidfd_show_fdinfo(). As Christian
> expains fdinfo reports f_flags, userspace can already detect PIDFD_THREAD.
> 
> Signed-off-by: Oleg Nesterov <oleg@redhat.com>
> ---

Looks good to me,
Reviewed-by: Christian Brauner <brauner@kernel.org>
  
Christian Brauner Feb. 9, 2024, 3:15 p.m. UTC | #2
On Fri, Feb 09, 2024 at 02:06:50PM +0100, Oleg Nesterov wrote:
> Turn kill_pid_info() into kill_pid_info_type(), this allows to pass any
> pid_type to group_send_sig_info(), despite its name it should work fine
> even if type = PIDTYPE_PID.
> 
> Change pidfd_send_signal() to use PIDTYPE_PID or PIDTYPE_TGID depending
> on PIDFD_THREAD.
> 
> While at it kill another TODO comment in pidfd_show_fdinfo(). As Christian
> expains fdinfo reports f_flags, userspace can already detect PIDFD_THREAD.
> 
> Signed-off-by: Oleg Nesterov <oleg@redhat.com>
> ---

How do you feel about the following (untested...) addition?
I've played with PIDFD_SIGNAL_PROCESS_GROUP as well but that code is
fairly new to me so I would need some more time.

From a473512ed8de2e864961f7009e2f20ce4e7a0778 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Fri, 9 Feb 2024 15:49:45 +0100
Subject: [PATCH] [RFC] pidfd: allow to override signal scope in
 pidfd_send_signal()

Right now we determine the scope of the signal based on the type of
pidfd. There are use-cases where it's useful to override the scope of
the signal. For example in [1]. Add flags to determine the scope of the
signal:

(1) PIDFD_SIGNAL_THREAD: send signal to specific thread
(2) PIDFD_SIGNAL_THREAD_GROUP: send signal to thread-group

I've put off PIDFD_SIGNAL_PROCESS_GROUP for now since I need to stare at
the code a bit longer how this would work.

Link: https://github.com/systemd/systemd/issues/31093 [1]
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/uapi/linux/pidfd.h |  4 ++++
 kernel/signal.c            | 35 ++++++++++++++++++++++++++++-------
 2 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h
index 2e6461459877..757ed5a668c6 100644
--- a/include/uapi/linux/pidfd.h
+++ b/include/uapi/linux/pidfd.h
@@ -10,4 +10,8 @@
 #define PIDFD_NONBLOCK	O_NONBLOCK
 #define PIDFD_THREAD	O_EXCL
 
+/* Flags for pidfd_send_signal(). */
+#define PIDFD_SIGNAL_THREAD		(1UL << 0)
+#define PIDFD_SIGNAL_THREAD_GROUP	(1UL << 1)
+
 #endif /* _UAPI_LINUX_PIDFD_H */
diff --git a/kernel/signal.c b/kernel/signal.c
index 9578ce17d85d..1d6586964099 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3872,6 +3872,9 @@ static struct pid *pidfd_to_pid(const struct file *file)
 	return tgid_pidfd_to_pid(file);
 }
 
+#define PIDFD_SEND_SIGNAL_FLAGS \
+	(PIDFD_SIGNAL_THREAD | PIDFD_SIGNAL_THREAD_GROUP)
+
 /**
  * sys_pidfd_send_signal - Signal a process through a pidfd
  * @pidfd:  file descriptor of the process
@@ -3889,14 +3892,19 @@ static struct pid *pidfd_to_pid(const struct file *file)
 SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
 		siginfo_t __user *, info, unsigned int, flags)
 {
-	int ret;
+	int ret, si_code;
 	struct fd f;
 	struct pid *pid;
 	kernel_siginfo_t kinfo;
 	bool thread;
+	enum pid_type si_scope;
 
 	/* Enforce flags be set to 0 until we add an extension. */
-	if (flags)
+	if (flags & ~PIDFD_SEND_SIGNAL_FLAGS)
+		return -EINVAL;
+
+	/* Ensure that only a single signal scope determining flag is set. */
+	if (hweight32(flags & PIDFD_SEND_SIGNAL_FLAGS) > 1)
 		return -EINVAL;
 
 	f = fdget(pidfd);
@@ -3914,7 +3922,22 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
 	if (!access_pidfd_pidns(pid))
 		goto err;
 
-	thread = f.file->f_flags & PIDFD_THREAD;
+	switch (flags) {
+	case 0:
+		/* Infer scope from the type of pidfd. */
+		thread = (f.file->f_flags & PIDFD_THREAD);
+		si_scope = thread ? PIDTYPE_PID : PIDTYPE_TGID;
+		si_code = thread ? SI_TKILL : SI_USER;
+		break;
+	case PIDFD_SIGNAL_THREAD:
+		si_scope = PIDTYPE_PID;
+		si_code = SI_TKILL;
+		break;
+	case PIDFD_SIGNAL_THREAD_GROUP:
+		si_scope = PIDTYPE_TGID;
+		si_code = SI_USER;
+		break;
+	}
 
 	if (info) {
 		ret = copy_siginfo_from_user_any(&kinfo, info);
@@ -3931,12 +3954,10 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
 		    (kinfo.si_code >= 0 || kinfo.si_code == SI_TKILL))
 			goto err;
 	} else {
-		prepare_kill_siginfo(sig, &kinfo,
-				     thread ? SI_TKILL : SI_USER);
+		prepare_kill_siginfo(sig, &kinfo, si_code);
 	}
 
-	ret = kill_pid_info_type(sig, &kinfo, pid,
-				 thread ? PIDTYPE_PID : PIDTYPE_TGID);
+	ret = kill_pid_info_type(sig, &kinfo, pid, si_scope);
 err:
 	fdput(f);
 	return ret;
  
Oleg Nesterov Feb. 9, 2024, 3:43 p.m. UTC | #3
On 02/09, Christian Brauner wrote:
>
> How do you feel about the following (untested...) addition?

LGTM, but let me read this patch once again tomorrow, I have
a headache today.

> I've played with PIDFD_SIGNAL_PROCESS_GROUP as well but that code is
> fairly new to me so I would need some more time.

Heh, I was going to send another email to discuss this ;)

Should be simple, but may be need some simple preparations.

Especially if we also want PIDFD_SIGNAL_SESSION_GROUP.

So the question: do you think we also want PIDFD_SIGNAL_SESSION_GROUP?

Oleg.
  
Christian Brauner Feb. 9, 2024, 3:49 p.m. UTC | #4
On Fri, Feb 09, 2024 at 04:43:05PM +0100, Oleg Nesterov wrote:
> On 02/09, Christian Brauner wrote:
> >
> > How do you feel about the following (untested...) addition?
> 
> LGTM, but let me read this patch once again tomorrow, I have
> a headache today.

Bah, feel better!

> 
> > I've played with PIDFD_SIGNAL_PROCESS_GROUP as well but that code is
> > fairly new to me so I would need some more time.
> 
> Heh, I was going to send another email to discuss this ;)
> 
> Should be simple, but may be need some simple preparations.
> 
> Especially if we also want PIDFD_SIGNAL_SESSION_GROUP.
> 
> So the question: do you think we also want PIDFD_SIGNAL_SESSION_GROUP?

Thought about this as well and my feeling is to wait until someone asks
for it. Right now, we have a reason to add PIDFD_SIGNAL_PROCESS_GROUP
because of Andy's use-case. If someone has a use-case for session groups
then yes. Otherwise I'd just not bother?
  
Oleg Nesterov Feb. 9, 2024, 3:56 p.m. UTC | #5
On 02/09, Christian Brauner wrote:
>
> On Fri, Feb 09, 2024 at 04:43:05PM +0100, Oleg Nesterov wrote:
> >
> > So the question: do you think we also want PIDFD_SIGNAL_SESSION_GROUP?
>
> Thought about this as well and my feeling is to wait until someone asks
> for it. Right now, we have a reason to add PIDFD_SIGNAL_PROCESS_GROUP
> because of Andy's use-case. If someone has a use-case for session groups
> then yes. Otherwise I'd just not bother?

OK, agreed.

and I forgot to mention, if you want to add PIDFD_SIGNAL_PRGP you can
look at __kill_pgrp_info().

Oleg.
  
Tycho Andersen Feb. 9, 2024, 7:08 p.m. UTC | #6
On Fri, Feb 09, 2024 at 02:06:50PM +0100, Oleg Nesterov wrote:
> Turn kill_pid_info() into kill_pid_info_type(), this allows to pass any
> pid_type to group_send_sig_info(), despite its name it should work fine
> even if type = PIDTYPE_PID.
> 
> Change pidfd_send_signal() to use PIDTYPE_PID or PIDTYPE_TGID depending
> on PIDFD_THREAD.
> 
> While at it kill another TODO comment in pidfd_show_fdinfo(). As Christian
> expains fdinfo reports f_flags, userspace can already detect PIDFD_THREAD.
> 
> Signed-off-by: Oleg Nesterov <oleg@redhat.com>

Reviewed-by: Tycho Andersen <tandersen@netflix.com>
  
Christian Brauner Feb. 10, 2024, 10:23 a.m. UTC | #7
> and I forgot to mention, if you want to add PIDFD_SIGNAL_PRGP you can
> look at __kill_pgrp_info().

Yeah, I did that and there's a semantical twist in the old kill(2)
system call that made me think:

(1) kill(-1234) => kill process group with id 1234
(2) kill(0)     => kill process group of @current

which implementation wise is indicated by

__kill_pgrp_info(..., pid ? find_vpid(-pid) ? task_pgrp(current))

We're obviously not going to implement (2) as that doesn't really make a
sense for pidfd_send_signal().

But (1) is also wrong for pidfd_send_signal(). If we'd ever implement
(1) it should be via pidfd_open(1234, PIDFD_PROCESS_GROUP).

So if PIDFD_PROCESS_GROUP is set then we want to send a signal to the
process group that @pidfd is in. Unless there's reasons we can't do
this. I tried to draft this and what I have is a totally uncompiled
draft.

I was unsure how best to cleanly express how to take the process group
from the struct pid of that @pidfd. So I modeled it after how we do it
for PIDTYPE_TGID.

From 8d886b07cc1b17cc6dd3a9ebf19c51212282b6f5 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Fri, 9 Feb 2024 15:49:45 +0100
Subject: [PATCH] [RFC] pidfd: allow to override signal scope in
 pidfd_send_signal()

Right now we determine the scope of the signal based on the type of
pidfd. There are use-cases where it's useful to override the scope of
the signal. For example in [1]. Add flags to determine the scope of the
signal:

(1) PIDFD_SIGNAL_THREAD: send signal to specific thread reference by @pidfd
(2) PIDFD_SIGNAL_THREAD_GROUP: send signal to thread-group of @pidfd
(2) PIDFD_SIGNAL_PROCESS_GROUP: send signal to process-group of @pidfd

There's a semantical quirk in the old kill(2) system call that made me think:

(1) kill(-1234) => kill process group with id 1234
(2) kill(0)     => kill process group of @current

as indicated by

__kill_pgrp_info(..., pid ? find_vpid(-pid) ? task_pgrp(current))

We're obviously not going to implement (2) as that doesn't really make a
lot of sense for pidfd_send_signal().

But (1) is also wrong for pidfd_send_signal(). If we'd ever implement
(1) it should be via pidfd_open(..., PIDFD_PROCESS_GROUP).

Link: https://github.com/systemd/systemd/issues/31093 [1]
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/uapi/linux/pidfd.h |  5 +++++
 kernel/signal.c            | 46 ++++++++++++++++++++++++++++++++++----
 2 files changed, 47 insertions(+), 4 deletions(-)

diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h
index 2e6461459877..72ec000a97cd 100644
--- a/include/uapi/linux/pidfd.h
+++ b/include/uapi/linux/pidfd.h
@@ -10,4 +10,9 @@
 #define PIDFD_NONBLOCK	O_NONBLOCK
 #define PIDFD_THREAD	O_EXCL
 
+/* Flags for pidfd_send_signal(). */
+#define PIDFD_SIGNAL_THREAD		(1UL << 0)
+#define PIDFD_SIGNAL_THREAD_GROUP	(1UL << 1)
+#define PIDFD_SIGNAL_PROCESS_GROUP	(1UL << 2)
+
 #endif /* _UAPI_LINUX_PIDFD_H */
diff --git a/kernel/signal.c b/kernel/signal.c
index 8b8169623850..f0f9a5a822b4 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3873,6 +3873,23 @@ static struct pid *pidfd_to_pid(const struct file *file)
 	return tgid_pidfd_to_pid(file);
 }
 
+static int kill_pgrp_info(int sig, struct kernel_siginfo *info, struct pid *pid)
+{
+	struct task_struct *p;
+	int ret = -ESRCH;
+
+	read_lock(&tasklist_lock);
+	p = pid_task(pid, PIDTYPE_PID);
+	if (p)
+		ret = __kill_pgrp_info(sig, info, task_pgrp(p));
+	read_unlock(&tasklist_lock);
+	return ret;
+}
+
+#define PIDFD_SEND_SIGNAL_FLAGS                            \
+	(PIDFD_SIGNAL_THREAD | PIDFD_SIGNAL_THREAD_GROUP | \
+	 PIDFD_SIGNAL_PROCESS_GROUP)
+
 /**
  * sys_pidfd_send_signal - Signal a process through a pidfd
  * @pidfd:  file descriptor of the process
@@ -3897,7 +3914,11 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
 	enum pid_type type;
 
 	/* Enforce flags be set to 0 until we add an extension. */
-	if (flags)
+	if (flags & ~PIDFD_SEND_SIGNAL_FLAGS)
+		return -EINVAL;
+
+	/* Ensure that only a single signal scope determining flag is set. */
+	if (hweight32(flags & PIDFD_SEND_SIGNAL_FLAGS) > 1)
 		return -EINVAL;
 
 	f = fdget(pidfd);
@@ -3915,10 +3936,24 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
 	if (!access_pidfd_pidns(pid))
 		goto err;
 
-	if (f.file->f_flags & PIDFD_THREAD)
+	switch (flags) {
+	case 0:
+		/* Infer scope from the type of pidfd. */
+		if (f.file->f_flags & PIDFD_THREAD)
+			type = PIDTYPE_PID;
+		else
+			type = PIDTYPE_TGID;
+		break;
+	case PIDFD_SIGNAL_THREAD:
 		type = PIDTYPE_PID;
-	else
+		break;
+	case PIDFD_SIGNAL_THREAD_GROUP:
 		type = PIDTYPE_TGID;
+		break;
+	case PIDFD_SIGNAL_PROCESS_GROUP:
+		type = PIDTYPE_PGID;
+		break;
+	}
 
 	if (info) {
 		ret = copy_siginfo_from_user_any(&kinfo, info);
@@ -3938,7 +3973,10 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
 		prepare_kill_siginfo(sig, &kinfo, type);
 	}
 
-	ret = kill_pid_info_type(sig, &kinfo, pid, type);
+	if (type == PIDFD_SIGNAL_PROCESS_GROUP)
+		ret = kill_pgrp_info(sig, &kinfo, pid);
+	else
+		ret = kill_pid_info_type(sig, &kinfo, pid, type);
 err:
 	fdput(f);
 	return ret;
  
Oleg Nesterov Feb. 10, 2024, 12:30 p.m. UTC | #8
Christian,

Thanks again! the last 2 commits in vfs.pidfd look good to me.

As for this patch, I am not sure I understand your concerns, and I
have another concern, please see below.

For the moment, please forget about PIDFD_THREAD.

On 02/10, Christian Brauner wrote:
>
> (1) kill(-1234) => kill process group with id 1234
> (2) kill(0)     => kill process group of @current
>
> which implementation wise is indicated by
>
> __kill_pgrp_info(..., pid ? find_vpid(-pid) ? task_pgrp(current))
>
> We're obviously not going to implement (2) as that doesn't really make a
> sense for pidfd_send_signal().

Sure,

> But (1) is also wrong for pidfd_send_signal(). If we'd ever implement
> (1) it should be via pidfd_open(1234, PIDFD_PROCESS_GROUP).

Why do you think we need another flag for open() ?

To me it looks fine if we allow to send the signal to pgrp if
flags & PIDFD_SIGNAL_PROCESS_GROUP.

And pidfd_send_signal() can just do

	if (PIDFD_SIGNAL_THREAD_GROUP)
		ret = __kill_pgrp_info(sig, kinfo, pid);
	else
		ret = kill_pid_info_type(...);
		
(yes, yes, this needs tasklist, just a pseudo code to simpliy)

Now lets recall about PIDFD_THREAD.

If the target task is a group leader - there is no difference.

If it is not a leader - then __kill_pgrp_info() will always return
-ESRCH, do_each_pid_task(PIDTYPE_PGID) won't find any task.

And personally I think this is all we need.

------------------------------------------------------------------------------
But if you want to make PIDFD_SIGNAL_THREAD_GROUP work even if the
target task is not a leader, then yes, we need something like

	task_pgrp(pid_task(pid, PIDTYPE_PID))

like you did in the new kill_pgrp_info() helper in this patch.

I won't argue, but do you think this makes a lot of sense?

Oleg.
  
Oleg Nesterov Feb. 10, 2024, 12:47 p.m. UTC | #9
On 02/10, Oleg Nesterov wrote:
>
> On 02/10, Christian Brauner wrote:
> >
> > (1) kill(-1234) => kill process group with id 1234
> > (2) kill(0)     => kill process group of @current
> >
> > which implementation wise is indicated by
> >
> > __kill_pgrp_info(..., pid ? find_vpid(-pid) ? task_pgrp(current))
> >
> > We're obviously not going to implement (2) as that doesn't really make a
> > sense for pidfd_send_signal().
>
> Sure,
>
> > But (1) is also wrong for pidfd_send_signal(). If we'd ever implement
> > (1) it should be via pidfd_open(1234, PIDFD_PROCESS_GROUP).
>
> Why do you think we need another flag for open() ?
>
> To me it looks fine if we allow to send the signal to pgrp if
> flags & PIDFD_SIGNAL_PROCESS_GROUP.
>
> And pidfd_send_signal() can just do
>
> 	if (PIDFD_SIGNAL_THREAD_GROUP)
> 		ret = __kill_pgrp_info(sig, kinfo, pid);
> 	else
> 		ret = kill_pid_info_type(...);
>
> (yes, yes, this needs tasklist, just a pseudo code to simpliy)
>
> Now lets recall about PIDFD_THREAD.
>
> If the target task is a group leader - there is no difference.
>
> If it is not a leader - then __kill_pgrp_info() will always return
> -ESRCH, do_each_pid_task(PIDTYPE_PGID) won't find any task.

To clarify, __kill_pgrp_info() should send the signal to pgrp
identified by @pid, so it will return ESRCH if the target didn't
do setpgid/etc.

> And personally I think this is all we need.

Yes. I don't think we should send a signal to task_pgrp(target).

And this matches sys_kill(). I mean,

	pidfd = pidfd_open(1234);
	pidfd_send_signal(pidfd, PIDFD_PROCESS_GROUP);

should act as kill(-1234).

> ------------------------------------------------------------------------------
> But if you want to make PIDFD_SIGNAL_THREAD_GROUP work even if the
> target task is not a leader, then yes, we need something like
>
> 	task_pgrp(pid_task(pid, PIDTYPE_PID))
>
> like you did in the new kill_pgrp_info() helper in this patch.
>
> I won't argue, but do you think this makes a lot of sense?
>
> Oleg.
  
Christian Brauner Feb. 10, 2024, 12:54 p.m. UTC | #10
On Sat, Feb 10, 2024 at 01:30:33PM +0100, Oleg Nesterov wrote:
> Christian,
> 
> Thanks again! the last 2 commits in vfs.pidfd look good to me.
> 
> As for this patch, I am not sure I understand your concerns, and I
> have another concern, please see below.
> 
> For the moment, please forget about PIDFD_THREAD.
> 
> On 02/10, Christian Brauner wrote:
> >
> > (1) kill(-1234) => kill process group with id 1234
> > (2) kill(0)     => kill process group of @current
> >
> > which implementation wise is indicated by
> >
> > __kill_pgrp_info(..., pid ? find_vpid(-pid) ? task_pgrp(current))
> >
> > We're obviously not going to implement (2) as that doesn't really make a
> > sense for pidfd_send_signal().
> 
> Sure,
> 
> > But (1) is also wrong for pidfd_send_signal(). If we'd ever implement
> > (1) it should be via pidfd_open(1234, PIDFD_PROCESS_GROUP).
> 
> Why do you think we need another flag for open() ?

We don't need one. We could if we wanted to was my point. But let's
ignore that for now.

> 
> To me it looks fine if we allow to send the signal to pgrp if
> flags & PIDFD_SIGNAL_PROCESS_GROUP.

Yes, that's what I want too, I just wonder about the semantics.

> 
> And pidfd_send_signal() can just do
> 
> 	if (PIDFD_SIGNAL_THREAD_GROUP)
> 		ret = __kill_pgrp_info(sig, kinfo, pid);
> 	else
> 		ret = kill_pid_info_type(...);
> 		
> (yes, yes, this needs tasklist, just a pseudo code to simpliy)
> 
> Now lets recall about PIDFD_THREAD.
> 
> If the target task is a group leader - there is no difference.
> 
> If it is not a leader - then __kill_pgrp_info() will always return
> -ESRCH, do_each_pid_task(PIDTYPE_PGID) won't find any task.
> 
> And personally I think this is all we need.
> 
> ------------------------------------------------------------------------------
> But if you want to make PIDFD_SIGNAL_THREAD_GROUP work even if the
> target task is not a leader, then yes, we need something like
> 
> 	task_pgrp(pid_task(pid, PIDTYPE_PID))
> 
> like you did in the new kill_pgrp_info() helper in this patch.
> 
> I won't argue, but do you think this makes a lot of sense?

The question is what is more useful for userspace when they do:
pidfd_send_signal(1234, PIDFD_SEND_PROCESS_GROUP)?

(1) They either mean to signal a process group that is headed by 1234.
(2) Or they want to signal a process group of which 1234 is a member or
    the leader.

From a usability perspective (1) is a lot more restrictive because it
requires @pidfd to refer to a process group leader. Whereas (2) doesn't
require userspace to hold a @pidfd to a process group leader. It is
enough to just hold a @pidfd. In other words, (2) has wider scope.

And intuitively that is what I had thought is more useful. But by that
logic PIDFD_SEND_THREAD_GROUP would have to signal to the thread-group
that @pidfd is in regardless of whether @pidfd is actually a
thread-group leader.

Which is also what you're pointing out, afaict.
  
Oleg Nesterov Feb. 10, 2024, 1:15 p.m. UTC | #11
On 02/10, Christian Brauner wrote:
>
> The question is what is more useful for userspace when they do:
> pidfd_send_signal(1234, PIDFD_SEND_PROCESS_GROUP)?
>
> (1) They either mean to signal a process group that is headed by 1234.

Yes, this is what I had in mind, see also another email from me.
Simple, clear, and matches kill(-1234).

> (2) Or they want to signal a process group of which 1234 is a member or
>     the leader.

Somehow I didn't even consider this option when I thought about
PIDFD_SIGNAL_PGRP...

> From a usability perspective (1) is a lot more restrictive because it
> requires @pidfd to refer to a process group leader.

Yes, but to be honest (2) doesn't fit my head. Probably simply because
I always had (1) in mind...

But I won't argue if you think that (2) has useful usecases.

Oleg.
  
Christian Brauner Feb. 10, 2024, 2:26 p.m. UTC | #12
On Sat, Feb 10, 2024 at 02:15:18PM +0100, Oleg Nesterov wrote:
> On 02/10, Christian Brauner wrote:
> >
> > The question is what is more useful for userspace when they do:
> > pidfd_send_signal(1234, PIDFD_SEND_PROCESS_GROUP)?
> >
> > (1) They either mean to signal a process group that is headed by 1234.
> 
> Yes, this is what I had in mind, see also another email from me.
> Simple, clear, and matches kill(-1234).

I went for a walk and kept thinking about this and I agree with you.
It will require that 1234 will be a process group leader but I think
that this is ok to require that. The implementation becomes a lot
simpler of course.

From 6a9d6a6bd8b77cba210b9c28f0e6e047c7956e9f Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Fri, 9 Feb 2024 15:49:45 +0100
Subject: [PATCH v2] [RFC] pidfd: allow to override signal scope in
 pidfd_send_signal()

Right now we determine the scope of the signal based on the type of
pidfd. There are use-cases where it's useful to override the scope of
the signal. For example in [1]. Add flags to determine the scope of the
signal:

(1) PIDFD_SIGNAL_THREAD: send signal to specific thread referenced by @pidfd
(2) PIDFD_SIGNAL_THREAD_GROUP: send signal to thread-group headed by @pidfd
(2) PIDFD_SIGNAL_PROCESS_GROUP: send signal to process-group headed by @pidfd

Link: https://github.com/systemd/systemd/issues/31093 [1]
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/uapi/linux/pidfd.h |  5 +++++
 kernel/signal.c            | 44 +++++++++++++++++++++++++++++++-------
 2 files changed, 41 insertions(+), 8 deletions(-)

diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h
index 2e6461459877..72ec000a97cd 100644
--- a/include/uapi/linux/pidfd.h
+++ b/include/uapi/linux/pidfd.h
@@ -10,4 +10,9 @@
 #define PIDFD_NONBLOCK	O_NONBLOCK
 #define PIDFD_THREAD	O_EXCL
 
+/* Flags for pidfd_send_signal(). */
+#define PIDFD_SIGNAL_THREAD		(1UL << 0)
+#define PIDFD_SIGNAL_THREAD_GROUP	(1UL << 1)
+#define PIDFD_SIGNAL_PROCESS_GROUP	(1UL << 2)
+
 #endif /* _UAPI_LINUX_PIDFD_H */
diff --git a/kernel/signal.c b/kernel/signal.c
index 8b8169623850..cee7cd27ec88 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1905,16 +1905,19 @@ int send_sig_fault_trapno(int sig, int code, void __user *addr, int trapno,
 	return send_sig_info(info.si_signo, &info, t);
 }
 
-int kill_pgrp(struct pid *pid, int sig, int priv)
+static int kill_pgrp_info(int sig, struct kernel_siginfo *info, struct pid *pgrp)
 {
 	int ret;
-
 	read_lock(&tasklist_lock);
-	ret = __kill_pgrp_info(sig, __si_special(priv), pid);
+	ret = __kill_pgrp_info(sig, info, pgrp);
 	read_unlock(&tasklist_lock);
-
 	return ret;
 }
+
+int kill_pgrp(struct pid *pid, int sig, int priv)
+{
+	return kill_pgrp_info(sig, __si_special(priv), pid);
+}
 EXPORT_SYMBOL(kill_pgrp);
 
 int kill_pid(struct pid *pid, int sig, int priv)
@@ -3873,6 +3876,10 @@ static struct pid *pidfd_to_pid(const struct file *file)
 	return tgid_pidfd_to_pid(file);
 }
 
+#define PIDFD_SEND_SIGNAL_FLAGS                            \
+	(PIDFD_SIGNAL_THREAD | PIDFD_SIGNAL_THREAD_GROUP | \
+	 PIDFD_SIGNAL_PROCESS_GROUP)
+
 /**
  * sys_pidfd_send_signal - Signal a process through a pidfd
  * @pidfd:  file descriptor of the process
@@ -3897,7 +3904,11 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
 	enum pid_type type;
 
 	/* Enforce flags be set to 0 until we add an extension. */
-	if (flags)
+	if (flags & ~PIDFD_SEND_SIGNAL_FLAGS)
+		return -EINVAL;
+
+	/* Ensure that only a single signal scope determining flag is set. */
+	if (hweight32(flags & PIDFD_SEND_SIGNAL_FLAGS) > 1)
 		return -EINVAL;
 
 	f = fdget(pidfd);
@@ -3915,10 +3926,24 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
 	if (!access_pidfd_pidns(pid))
 		goto err;
 
-	if (f.file->f_flags & PIDFD_THREAD)
+	switch (flags) {
+	case 0:
+		/* Infer scope from the type of pidfd. */
+		if (f.file->f_flags & PIDFD_THREAD)
+			type = PIDTYPE_PID;
+		else
+			type = PIDTYPE_TGID;
+		break;
+	case PIDFD_SIGNAL_THREAD:
 		type = PIDTYPE_PID;
-	else
+		break;
+	case PIDFD_SIGNAL_THREAD_GROUP:
 		type = PIDTYPE_TGID;
+		break;
+	case PIDFD_SIGNAL_PROCESS_GROUP:
+		type = PIDTYPE_PGID;
+		break;
+	}
 
 	if (info) {
 		ret = copy_siginfo_from_user_any(&kinfo, info);
@@ -3938,7 +3963,10 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
 		prepare_kill_siginfo(sig, &kinfo, type);
 	}
 
-	ret = kill_pid_info_type(sig, &kinfo, pid, type);
+	if (type == PIDFD_SIGNAL_PROCESS_GROUP)
+		ret = kill_pgrp_info(sig, &kinfo, pid);
+	else
+		ret = kill_pid_info_type(sig, &kinfo, pid, type);
 err:
 	fdput(f);
 	return ret;
  
Oleg Nesterov Feb. 10, 2024, 4:51 p.m. UTC | #13
On 02/10, Christian Brauner wrote:
>
> On Sat, Feb 10, 2024 at 02:15:18PM +0100, Oleg Nesterov wrote:
> > On 02/10, Christian Brauner wrote:
> > >
> > > The question is what is more useful for userspace when they do:
> > > pidfd_send_signal(1234, PIDFD_SEND_PROCESS_GROUP)?
> > >
> > > (1) They either mean to signal a process group that is headed by 1234.
> >
> > Yes, this is what I had in mind, see also another email from me.
> > Simple, clear, and matches kill(-1234).
>
> I went for a walk and kept thinking about this and I agree with you.
> It will require that 1234 will be a process group leader but I think
> that this is ok to require that.

Yes... but I am starting to understand why you mentioned the new
open PIDFD_PROCESS_GROUP flag... perhaps we can do something like
this later, but this needs more thinking.

> +	if (type == PIDFD_SIGNAL_PROCESS_GROUP)
> +		ret = kill_pgrp_info(sig, &kinfo, pid);

I guess you meant

	if (type == PIDTYPE_PGID)

other than that,

Reviewed-by: Oleg Nesterov <oleg@redhat.com>
  
Christian Brauner Feb. 10, 2024, 5:22 p.m. UTC | #14
On Sat, Feb 10, 2024 at 05:51:33PM +0100, Oleg Nesterov wrote:
> On 02/10, Christian Brauner wrote:
> >
> > On Sat, Feb 10, 2024 at 02:15:18PM +0100, Oleg Nesterov wrote:
> > > On 02/10, Christian Brauner wrote:
> > > >
> > > > The question is what is more useful for userspace when they do:
> > > > pidfd_send_signal(1234, PIDFD_SEND_PROCESS_GROUP)?
> > > >
> > > > (1) They either mean to signal a process group that is headed by 1234.
> > >
> > > Yes, this is what I had in mind, see also another email from me.
> > > Simple, clear, and matches kill(-1234).
> >
> > I went for a walk and kept thinking about this and I agree with you.
> > It will require that 1234 will be a process group leader but I think
> > that this is ok to require that.
> 
> Yes... but I am starting to understand why you mentioned the new
> open PIDFD_PROCESS_GROUP flag... perhaps we can do something like
> this later, but this needs more thinking.
> 
> > +	if (type == PIDFD_SIGNAL_PROCESS_GROUP)
> > +		ret = kill_pgrp_info(sig, &kinfo, pid);
> 
> I guess you meant
> 
> 	if (type == PIDTYPE_PGID)
> 
> other than that,

Bahaa, yes of course.

> 
> Reviewed-by: Oleg Nesterov <oleg@redhat.com>

Thanks!
  
Oleg Nesterov Feb. 14, 2024, 12:36 p.m. UTC | #15
On 02/10, Oleg Nesterov wrote:
>
> On 02/10, Christian Brauner wrote:
> >
> > +	if (type == PIDFD_SIGNAL_PROCESS_GROUP)
> > +		ret = kill_pgrp_info(sig, &kinfo, pid);
>
> I guess you meant
>
> 	if (type == PIDTYPE_PGID)
>
> other than that,
>
> Reviewed-by: Oleg Nesterov <oleg@redhat.com>

Yes, but there is another thing I hadn't thought of...

sys_pidfd_send_signal() does

	/* Only allow sending arbitrary signals to yourself. */
	ret = -EPERM;
	if ((task_pid(current) != pid) &&
	    (kinfo.si_code >= 0 || kinfo.si_code == SI_TKILL))
		goto err;

and I am not sure that task_pid(current) == pid should allow
the "arbitrary signals" if PIDFD_SIGNAL_PROCESS_GROUP.

Perhaps

	/* Only allow sending arbitrary signals to yourself. */
	ret = -EPERM;
	if ((task_pid(current) != pid || type == PIDTYPE_PGID) &&
	    (kinfo.si_code >= 0 || kinfo.si_code == SI_TKILL)
		goto err;

?

Oleg.
  
Christian Brauner Feb. 16, 2024, 12:28 p.m. UTC | #16
On Wed, Feb 14, 2024 at 01:36:56PM +0100, Oleg Nesterov wrote:
> On 02/10, Oleg Nesterov wrote:
> >
> > On 02/10, Christian Brauner wrote:
> > >
> > > +	if (type == PIDFD_SIGNAL_PROCESS_GROUP)
> > > +		ret = kill_pgrp_info(sig, &kinfo, pid);
> >
> > I guess you meant
> >
> > 	if (type == PIDTYPE_PGID)
> >
> > other than that,
> >
> > Reviewed-by: Oleg Nesterov <oleg@redhat.com>
> 
> Yes, but there is another thing I hadn't thought of...
> 
> sys_pidfd_send_signal() does
> 
> 	/* Only allow sending arbitrary signals to yourself. */
> 	ret = -EPERM;
> 	if ((task_pid(current) != pid) &&
> 	    (kinfo.si_code >= 0 || kinfo.si_code == SI_TKILL))
> 		goto err;
> 
> and I am not sure that task_pid(current) == pid should allow
> the "arbitrary signals" if PIDFD_SIGNAL_PROCESS_GROUP.
> 
> Perhaps
> 
> 	/* Only allow sending arbitrary signals to yourself. */
> 	ret = -EPERM;
> 	if ((task_pid(current) != pid || type == PIDTYPE_PGID) &&
> 	    (kinfo.si_code >= 0 || kinfo.si_code == SI_TKILL)
> 		goto err;

Honestly, we should probably just do:

if (kinfo->si_code != SI_USER)
        goto err

and be done with it. If we get regressions reports about this then it's
easy to fix that up. But I find that unlikely. So why not try to get
away with something much simpler. What do you think?
  
Oleg Nesterov Feb. 16, 2024, 1:06 p.m. UTC | #17
On 02/16, Christian Brauner wrote:
>
> On Wed, Feb 14, 2024 at 01:36:56PM +0100, Oleg Nesterov wrote:
> >
> > and I am not sure that task_pid(current) == pid should allow
> > the "arbitrary signals" if PIDFD_SIGNAL_PROCESS_GROUP.
> >
> > Perhaps
> >
> > 	/* Only allow sending arbitrary signals to yourself. */
> > 	ret = -EPERM;
> > 	if ((task_pid(current) != pid || type == PIDTYPE_PGID) &&
> > 	    (kinfo.si_code >= 0 || kinfo.si_code == SI_TKILL)
> > 		goto err;
>
> Honestly, we should probably just do:
>
> if (kinfo->si_code != SI_USER)
>         goto err

Hmm. This doesn't look right. The purpose of the current check is to
forbid SI_TKILL and si_code >= 0, and SI_USER == 0.

SI_USER means that the target can trust the values of si_pid/si_uid
in siginfo.

> +		if (kinfo.si_code != SI_USER)
>  			goto err;

See above...

Oleg.
  
Christian Brauner Feb. 16, 2024, 2:46 p.m. UTC | #18
> SI_USER means that the target can trust the values of si_pid/si_uid
> in siginfo.

Bah, what an annoying nonsense. I see that this can be used to emulate
stuff like SI_TIMER and SI_ASYNCIO. But I very much doubt the value of
e.g., emulating SI_DETHREAD. Maybe I'm missing something very obvious.
In any case, thanks for keeping me honest:

So wouldn't be better of just writing this as?

if ((task_pid(current) != pid || type > PIDTYPE_TGID) &&
    (kinfo.si_code >= 0 || kinfo.si_code == SI_TKILL))
        goto err;

So that we don't have to repeat the same exercise if we extend this to
anything above PIDTYPE_PGID?
  
Oleg Nesterov Feb. 16, 2024, 6:12 p.m. UTC | #19
On 02/16, Christian Brauner wrote:
>
> > SI_USER means that the target can trust the values of si_pid/si_uid
> > in siginfo.
>
> Bah, what an annoying nonsense. I see that this can be used to emulate
> stuff like SI_TIMER and SI_ASYNCIO. But I very much doubt the value of
> e.g., emulating SI_DETHREAD. Maybe I'm missing something very obvious.

I don't understand...

SI_USER/SI_TKILL means that the signal comes from the userspace (kill/etc),
but siginfo was filled by the kernel so the receiver can trust it.

> So wouldn't be better of just writing this as?
>
> if ((task_pid(current) != pid || type > PIDTYPE_TGID) &&
>     (kinfo.si_code >= 0 || kinfo.si_code == SI_TKILL))
>         goto err;
>
> So that we don't have to repeat the same exercise if we extend this to
> anything above PIDTYPE_PGID?

Heh ;)

I swear, this is how I wrote it originally, but then for some reason I
thought it would raise the questions, so I changed it to check PIDTYPE_PGID.

IOW, sure, I agree.

Oleg.
  
Christian Brauner Feb. 20, 2024, 8:34 a.m. UTC | #20
On Fri, Feb 16, 2024 at 07:12:14PM +0100, Oleg Nesterov wrote:
> On 02/16, Christian Brauner wrote:
> >
> > > SI_USER means that the target can trust the values of si_pid/si_uid
> > > in siginfo.
> >
> > Bah, what an annoying nonsense. I see that this can be used to emulate
> > stuff like SI_TIMER and SI_ASYNCIO. But I very much doubt the value of
> > e.g., emulating SI_DETHREAD. Maybe I'm missing something very obvious.
> 
> I don't understand...

My question was what the purpose of being able to to set si_code to
e.g., SI_DETHREAD is and then to send a signal to yourself? Because it
looks like that's what rt_{tg}sigqueueinfo() and pidfd_send_signal()
allows the caller to do. I'm just trying to understand use-cases for
this.
  
Oleg Nesterov Feb. 20, 2024, 9:02 a.m. UTC | #21
On 02/20, Christian Brauner wrote:
>
> On Fri, Feb 16, 2024 at 07:12:14PM +0100, Oleg Nesterov wrote:
> > On 02/16, Christian Brauner wrote:
> > >
> > > > SI_USER means that the target can trust the values of si_pid/si_uid
> > > > in siginfo.
> > >
> > > Bah, what an annoying nonsense. I see that this can be used to emulate
> > > stuff like SI_TIMER and SI_ASYNCIO. But I very much doubt the value of
> > > e.g., emulating SI_DETHREAD. Maybe I'm missing something very obvious.
> >
> > I don't understand...
>
> My question was what the purpose of being able to to set si_code to
> e.g., SI_DETHREAD is and then to send a signal to yourself? Because it
> looks like that's what rt_{tg}sigqueueinfo() and pidfd_send_signal()
> allows the caller to do. I'm just trying to understand use-cases for
> this.

Ah. IIRC criu uses this hack to restore the pending (arbitrary) signals
collected at dump time.

I was a bit surprise sys_pidfd_send_signal() allows this hack too, I don't
think that criu uses pidfd at restore time, but I do not know.

Oleg.
  
Christian Brauner Feb. 20, 2024, 9:22 a.m. UTC | #22
On Tue, Feb 20, 2024 at 10:02:56AM +0100, Oleg Nesterov wrote:
> On 02/20, Christian Brauner wrote:
> >
> > On Fri, Feb 16, 2024 at 07:12:14PM +0100, Oleg Nesterov wrote:
> > > On 02/16, Christian Brauner wrote:
> > > >
> > > > > SI_USER means that the target can trust the values of si_pid/si_uid
> > > > > in siginfo.
> > > >
> > > > Bah, what an annoying nonsense. I see that this can be used to emulate
> > > > stuff like SI_TIMER and SI_ASYNCIO. But I very much doubt the value of
> > > > e.g., emulating SI_DETHREAD. Maybe I'm missing something very obvious.
> > >
> > > I don't understand...
> >
> > My question was what the purpose of being able to to set si_code to
> > e.g., SI_DETHREAD is and then to send a signal to yourself? Because it
> > looks like that's what rt_{tg}sigqueueinfo() and pidfd_send_signal()
> > allows the caller to do. I'm just trying to understand use-cases for
> > this.
> 
> Ah. IIRC criu uses this hack to restore the pending (arbitrary) signals
> collected at dump time.
> 
> I was a bit surprise sys_pidfd_send_signal() allows this hack too, I don't

I think that we simply mirrored the restrictions in the other system
calls.

> think that criu uses pidfd at restore time, but I do not know.

Hm, I just checked and it doesn't use pidfd_send_signal(). It uses
pidfds but only for pid reuse detection for RPC clients.

So right now si_code is blocked for >= 0 and for SI_TKILL. If we were to
simply ensure that si_code can't be < 0 then this amounts to effectively
blocking @info from being filled in by userspace at all. Because 0 is a
valid value.

So could we just _try_ and either ignore the @info argument completely
or consistenly report EINVAL when @info is non-NULL and see if anyone
reports a regression? If there ever is a valid use-case then we can just
add a flag argument PIDFD_SIGNAL_INFO to indicate that @info should be
taken into account.

So something like the completely untested?

diff --git a/kernel/signal.c b/kernel/signal.c
index cf6539a6b1cb..2cca42175480 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3849,22 +3849,6 @@ static bool access_pidfd_pidns(struct pid *pid)
        return true;
 }

-static int copy_siginfo_from_user_any(kernel_siginfo_t *kinfo,
-               siginfo_t __user *info)
-{
-#ifdef CONFIG_COMPAT
-       /*
-        * Avoid hooking up compat syscalls and instead handle necessary
-        * conversions here. Note, this is a stop-gap measure and should not be
-        * considered a generic solution.
-        */
-       if (in_compat_syscall())
-               return copy_siginfo_from_user32(
-                       kinfo, (struct compat_siginfo __user *)info);
-#endif
-       return copy_siginfo_from_user(kinfo, info);
-}
-
 static struct pid *pidfd_to_pid(const struct file *file)
 {
        struct pid *pid;
@@ -3911,6 +3895,10 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
        if (hweight32(flags & PIDFD_SEND_SIGNAL_FLAGS) > 1)
                return -EINVAL;

+       /* Currently unused. */
+       if (info)
+               return -EINVAL;
+
        f = fdget(pidfd);
        if (!f.file)
                return -EBADF;
@@ -3945,23 +3933,7 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
                break;
        }

-       if (info) {
-               ret = copy_siginfo_from_user_any(&kinfo, info);
-               if (unlikely(ret))
-                       goto err;
-
-               ret = -EINVAL;
-               if (unlikely(sig != kinfo.si_signo))
-                       goto err;
-
-               /* Only allow sending arbitrary signals to yourself. */
-               ret = -EPERM;
-               if ((task_pid(current) != pid) &&
-                   (kinfo.si_code >= 0 || kinfo.si_code == SI_TKILL))
-                       goto err;
-       } else {
-               prepare_kill_siginfo(sig, &kinfo, type);
-       }
+       prepare_kill_siginfo(sig, &kinfo, type);

        if (type == PIDTYPE_PGID)
                ret = kill_pgrp_info(sig, &kinfo, pid);
  
Oleg Nesterov Feb. 20, 2024, 11 a.m. UTC | #23
On 02/20, Christian Brauner wrote:
>
> On Tue, Feb 20, 2024 at 10:02:56AM +0100, Oleg Nesterov wrote:
> >
> > Ah. IIRC criu uses this hack to restore the pending (arbitrary) signals
> > collected at dump time.
> >
> > I was a bit surprise sys_pidfd_send_signal() allows this hack too, I don't
>
> I think that we simply mirrored the restrictions in the other system
> calls.
>
> > think that criu uses pidfd at restore time, but I do not know.
>
> Hm, I just checked and it doesn't use pidfd_send_signal(). It uses
> pidfds but only for pid reuse detection for RPC clients.

But perhaps something else already uses pidfd_send_signal() with info != NULL
or with info->si_code == SI_USER, we can't know. Please see below.

> So right now si_code is blocked for >= 0 and for SI_TKILL. If we were to
> simply ensure that si_code can't be < 0 then this amounts to effectively
> blocking @info from being filled in by userspace at all. Because 0 is a
> valid value.

I'am afraid I misunderstand you again... 0 == SI_USER is not a valid value
when siginfo != NULL.

Perhaps we can kill the "task_pid(current) != pid" check and just return
EPERM if "kinfo.si_code >= 0 || kinfo.si_code == SI_TKILL", I don't think
anobody needs pidfd_send_send_signal() to signal yourself. See below.

> +       /* Currently unused. */
> +       if (info)
> +               return -EINVAL;

Well, to me this looks like the unnecessary restriction... And why?

But whatever we do,

> -               /* Only allow sending arbitrary signals to yourself. */
> -               ret = -EPERM;
> -               if ((task_pid(current) != pid) &&
> -                   (kinfo.si_code >= 0 || kinfo.si_code == SI_TKILL))
> -                       goto err;

Can I suggest to fix this check in your tree (add type > PIDTYPE_TGID as
we discussed) first, then do other changes on top?

This way we can revert the next change(s) if we get regressions reports
without re-introducing the security problem.

Oleg.
  
Christian Brauner Feb. 20, 2024, 12:59 p.m. UTC | #24
On Tue, Feb 20, 2024 at 12:00:12PM +0100, Oleg Nesterov wrote:
> On 02/20, Christian Brauner wrote:
> >
> > On Tue, Feb 20, 2024 at 10:02:56AM +0100, Oleg Nesterov wrote:
> > >
> > > Ah. IIRC criu uses this hack to restore the pending (arbitrary) signals
> > > collected at dump time.
> > >
> > > I was a bit surprise sys_pidfd_send_signal() allows this hack too, I don't
> >
> > I think that we simply mirrored the restrictions in the other system
> > calls.
> >
> > > think that criu uses pidfd at restore time, but I do not know.
> >
> > Hm, I just checked and it doesn't use pidfd_send_signal(). It uses
> > pidfds but only for pid reuse detection for RPC clients.
> 
> But perhaps something else already uses pidfd_send_signal() with info != NULL
> or with info->si_code == SI_USER, we can't know. Please see below.
> 
> > So right now si_code is blocked for >= 0 and for SI_TKILL. If we were to
> > simply ensure that si_code can't be < 0 then this amounts to effectively
> > blocking @info from being filled in by userspace at all. Because 0 is a
> > valid value.
> 
> I'am afraid I misunderstand you again... 0 == SI_USER is not a valid value
> when siginfo != NULL.

Yes, I know. We're on the same page. I would just have preferred to
restrict remulating si_code completely because we don't know whether
that's actually used for pidfd_send_signal(). The point I was trying to
make is that si_code has no value that means "unset" so restricting
si_code further means always rejecting @info when it's passed.

> 
> Perhaps we can kill the "task_pid(current) != pid" check and just return
> EPERM if "kinfo.si_code >= 0 || kinfo.si_code == SI_TKILL", I don't think
> anobody needs pidfd_send_send_signal() to signal yourself. See below.

Yeah.

> 
> > +       /* Currently unused. */
> > +       if (info)
> > +               return -EINVAL;
> 
> Well, to me this looks like the unnecessary restriction... And why?

Because right now we aren't sure that it's used and we aren't sure what
use-cases are there.

> 
> But whatever we do,
> 
> > -               /* Only allow sending arbitrary signals to yourself. */
> > -               ret = -EPERM;
> > -               if ((task_pid(current) != pid) &&
> > -                   (kinfo.si_code >= 0 || kinfo.si_code == SI_TKILL))
> > -                       goto err;
> 
> Can I suggest to fix this check in your tree (add type > PIDTYPE_TGID as
> we discussed) first, then do other changes on top?

Yes, absolutely. That was always the plan. See appended patch I put on top.
I put you as author since you did spot this. Let me know if you don't
want that.
  
Oleg Nesterov Feb. 20, 2024, 4:22 p.m. UTC | #25
On 02/20, Christian Brauner wrote:
>
> On Tue, Feb 20, 2024 at 12:00:12PM +0100, Oleg Nesterov wrote:
> >
> > Perhaps we can kill the "task_pid(current) != pid" check and just return
> > EPERM if "kinfo.si_code >= 0 || kinfo.si_code == SI_TKILL", I don't think
> > anobody needs pidfd_send_send_signal() to signal yourself. See below.
>
> Yeah.

You have my ack in advance

> > > +       /* Currently unused. */
> > > +       if (info)
> > > +               return -EINVAL;
> >
> > Well, to me this looks like the unnecessary restriction... And why?
>
> Because right now we aren't sure that it's used

Yes, but...

> and we aren't sure what use-cases are there.

the same use-cases as for rt_sigqueueinfo() ?

Christian, I won't really argue but I still disagree.

Let me first repeat once again, I do not know what people do with pidfd
and pidfd_send_signal() in particular, so I won't be surprised if this
change won't cause any regression report.

But at the same time, I can easily imagine the following scenario: a
userspace programmer tries to use pidfd_send_signal(info != NULL), gets
-EINVAL, decides it can't/shouldn't work, and switches to sigqueueinfo()
without any report to lkml.

> Yes, absolutely. That was always the plan. See appended patch I put on top.
> I put you as author since you did spot this. Let me know if you don't
> want that.

Ah. Thanks Christian. I am fine either way, whatever is more convenient
for you.

But just in case, I won't mind at all if you simply fold this minor fix
into your PIDFD_SEND_PROCESS_GROUP patch, I certainly don't care about
the "From" tag ;)

A really, really minor/cosmetic nit below, feel free to ignore:

> -		if ((task_pid(current) != pid) &&
> +		if (((task_pid(current) != pid) || type > PIDTYPE_TGID) &&

we can remove the unnecessary parens around "task_pid(current) != pid"
or add the extra parens aroung "type > PIDTYPE_TGID".

I mean, the 1st operand of "&&" is

	(task_pid(current) != pid) || type > PIDTYPE_TGID

and this looks a bit inconsistent to me.

Oleg.
  
Christian Brauner Feb. 21, 2024, 7:42 a.m. UTC | #26
On Tue, Feb 20, 2024 at 05:22:02PM +0100, Oleg Nesterov wrote:
> On 02/20, Christian Brauner wrote:
> >
> > On Tue, Feb 20, 2024 at 12:00:12PM +0100, Oleg Nesterov wrote:
> > >
> > > Perhaps we can kill the "task_pid(current) != pid" check and just return
> > > EPERM if "kinfo.si_code >= 0 || kinfo.si_code == SI_TKILL", I don't think
> > > anobody needs pidfd_send_send_signal() to signal yourself. See below.
> >
> > Yeah.
> 
> You have my ack in advance
> 
> > > > +       /* Currently unused. */
> > > > +       if (info)
> > > > +               return -EINVAL;
> > >
> > > Well, to me this looks like the unnecessary restriction... And why?
> >
> > Because right now we aren't sure that it's used
> 
> Yes, but...
> 
> > and we aren't sure what use-cases are there.
> 
> the same use-cases as for rt_sigqueueinfo() ?

Specifically for pidfd_send_signal() I mean. To me it seems very
unlikely that anyone would be opening a pidfd to itself and then use
pidfd_send_signal() when they could entirely avoid this - race free - by
simply using *sigqueueinfo(). But I may be wrong of course.

> 
> Christian, I won't really argue but I still disagree.
> 
> Let me first repeat once again, I do not know what people do with pidfd
> and pidfd_send_signal() in particular, so I won't be surprised if this
> change won't cause any regression report.

Fwiw, that's fine as long as we'd fix it up.

> 
> But at the same time, I can easily imagine the following scenario: a
> userspace programmer tries to use pidfd_send_signal(info != NULL), gets
> -EINVAL, decides it can't/shouldn't work, and switches to sigqueueinfo()
> without any report to lkml.
> 
> > Yes, absolutely. That was always the plan. See appended patch I put on top.
> > I put you as author since you did spot this. Let me know if you don't
> > want that.
> 
> Ah. Thanks Christian. I am fine either way, whatever is more convenient
> for you.
> 
> But just in case, I won't mind at all if you simply fold this minor fix
> into your PIDFD_SEND_PROCESS_GROUP patch, I certainly don't care about
> the "From" tag ;)
> 
> A really, really minor/cosmetic nit below, feel free to ignore:
> 
> > -		if ((task_pid(current) != pid) &&
> > +		if (((task_pid(current) != pid) || type > PIDTYPE_TGID) &&
> 
> we can remove the unnecessary parens around "task_pid(current) != pid"
> or add the extra parens aroung "type > PIDTYPE_TGID".
> 
> I mean, the 1st operand of "&&" is
> 
> 	(task_pid(current) != pid) || type > PIDTYPE_TGID
> 
> and this looks a bit inconsistent to me.

Ok, will do.
  
Oleg Nesterov Feb. 21, 2024, 12:55 p.m. UTC | #27
On 02/21, Christian Brauner wrote:
>
> On Tue, Feb 20, 2024 at 05:22:02PM +0100, Oleg Nesterov wrote:
> >
> > > > > +       /* Currently unused. */
> > > > > +       if (info)
> > > > > +               return -EINVAL;
> > > >
> > > > Well, to me this looks like the unnecessary restriction... And why?
> > >
> > > Because right now we aren't sure that it's used
> >
> > Yes, but...
> >
> > > and we aren't sure what use-cases are there.
> >
> > the same use-cases as for rt_sigqueueinfo() ?
>
> Specifically for pidfd_send_signal() I mean. To me it seems very
> unlikely that anyone would be opening a pidfd to itself

Ah, with this, I do agree. And that is why (I think) we can remove
the "task_pid(current) != pid" check in the "info != NULL" branch.

Oleg.
  
Christian Brauner Feb. 21, 2024, 1:35 p.m. UTC | #28
On Wed, Feb 21, 2024 at 01:55:26PM +0100, Oleg Nesterov wrote:
> On 02/21, Christian Brauner wrote:
> >
> > On Tue, Feb 20, 2024 at 05:22:02PM +0100, Oleg Nesterov wrote:
> > >
> > > > > > +       /* Currently unused. */
> > > > > > +       if (info)
> > > > > > +               return -EINVAL;
> > > > >
> > > > > Well, to me this looks like the unnecessary restriction... And why?
> > > >
> > > > Because right now we aren't sure that it's used
> > >
> > > Yes, but...
> > >
> > > > and we aren't sure what use-cases are there.
> > >
> > > the same use-cases as for rt_sigqueueinfo() ?
> >
> > Specifically for pidfd_send_signal() I mean. To me it seems very
> > unlikely that anyone would be opening a pidfd to itself
> 
> Ah, with this, I do agree. And that is why (I think) we can remove
> the "task_pid(current) != pid" check in the "info != NULL" branch.

Ok, so let's try that. :)
  

Patch

diff --git a/kernel/fork.c b/kernel/fork.c
index cd61ca87d0e6..47b565598063 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2051,8 +2051,6 @@  static void pidfd_show_fdinfo(struct seq_file *m, struct file *f)
 
 	seq_put_decimal_ll(m, "Pid:\t", nr);
 
-	/* TODO: report PIDFD_THREAD */
-
 #ifdef CONFIG_PID_NS
 	seq_put_decimal_ll(m, "\nNSpid:\t", nr);
 	if (nr > 0) {
diff --git a/kernel/signal.c b/kernel/signal.c
index a8199fda0d61..9578ce17d85d 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -47,6 +47,7 @@ 
 #include <linux/cgroup.h>
 #include <linux/audit.h>
 #include <linux/sysctl.h>
+#include <uapi/linux/pidfd.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/signal.h>
@@ -1436,7 +1437,8 @@  void lockdep_assert_task_sighand_held(struct task_struct *task)
 #endif
 
 /*
- * send signal info to all the members of a group
+ * send signal info to all the members of a thread group or to the
+ * individual thread if type == PIDTYPE_PID.
  */
 int group_send_sig_info(int sig, struct kernel_siginfo *info,
 			struct task_struct *p, enum pid_type type)
@@ -1478,7 +1480,8 @@  int __kill_pgrp_info(int sig, struct kernel_siginfo *info, struct pid *pgrp)
 	return ret;
 }
 
-int kill_pid_info(int sig, struct kernel_siginfo *info, struct pid *pid)
+static int kill_pid_info_type(int sig, struct kernel_siginfo *info,
+				struct pid *pid, enum pid_type type)
 {
 	int error = -ESRCH;
 	struct task_struct *p;
@@ -1487,11 +1490,10 @@  int kill_pid_info(int sig, struct kernel_siginfo *info, struct pid *pid)
 		rcu_read_lock();
 		p = pid_task(pid, PIDTYPE_PID);
 		if (p)
-			error = group_send_sig_info(sig, info, p, PIDTYPE_TGID);
+			error = group_send_sig_info(sig, info, p, type);
 		rcu_read_unlock();
 		if (likely(!p || error != -ESRCH))
 			return error;
-
 		/*
 		 * The task was unhashed in between, try again.  If it
 		 * is dead, pid_task() will return NULL, if we race with
@@ -1500,6 +1502,11 @@  int kill_pid_info(int sig, struct kernel_siginfo *info, struct pid *pid)
 	}
 }
 
+int kill_pid_info(int sig, struct kernel_siginfo *info, struct pid *pid)
+{
+	return kill_pid_info_type(sig, info, pid, PIDTYPE_TGID);
+}
+
 static int kill_proc_info(int sig, struct kernel_siginfo *info, pid_t pid)
 {
 	int error;
@@ -3872,14 +3879,10 @@  static struct pid *pidfd_to_pid(const struct file *file)
  * @info:   signal info
  * @flags:  future flags
  *
- * The syscall currently only signals via PIDTYPE_PID which covers
- * kill(<positive-pid>, <signal>. It does not signal threads or process
- * groups.
- * In order to extend the syscall to threads and process groups the @flags
- * argument should be used. In essence, the @flags argument will determine
- * what is signaled and not the file descriptor itself. Put in other words,
- * grouping is a property of the flags argument not a property of the file
- * descriptor.
+ * Send the signal to the thread group or to the individual thread depending
+ * on PIDFD_THREAD.
+ * In the future extension to @flags may be used to override the default scope
+ * of @pidfd.
  *
  * Return: 0 on success, negative errno on failure
  */
@@ -3890,6 +3893,7 @@  SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
 	struct fd f;
 	struct pid *pid;
 	kernel_siginfo_t kinfo;
+	bool thread;
 
 	/* Enforce flags be set to 0 until we add an extension. */
 	if (flags)
@@ -3910,6 +3914,8 @@  SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
 	if (!access_pidfd_pidns(pid))
 		goto err;
 
+	thread = f.file->f_flags & PIDFD_THREAD;
+
 	if (info) {
 		ret = copy_siginfo_from_user_any(&kinfo, info);
 		if (unlikely(ret))
@@ -3925,12 +3931,12 @@  SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
 		    (kinfo.si_code >= 0 || kinfo.si_code == SI_TKILL))
 			goto err;
 	} else {
-		prepare_kill_siginfo(sig, &kinfo, SI_USER);
+		prepare_kill_siginfo(sig, &kinfo,
+				     thread ? SI_TKILL : SI_USER);
 	}
 
-	/* TODO: respect PIDFD_THREAD */
-	ret = kill_pid_info(sig, &kinfo, pid);
-
+	ret = kill_pid_info_type(sig, &kinfo, pid,
+				 thread ? PIDTYPE_PID : PIDTYPE_TGID);
 err:
 	fdput(f);
 	return ret;