[5/7] rust: file: add `Kuid` wrapper

Message ID 20231129-alice-file-v1-5-f81afe8c7261@google.com
State New
Headers
Series File abstractions needed by Rust Binder |

Commit Message

Alice Ryhl Nov. 29, 2023, 1:12 p.m. UTC
  Adds a wrapper around `kuid_t` called `Kuid`. This allows us to define
various operations on kuids such as equality and current_euid. It also
lets us provide conversions from kuid into userspace values.

Rust Binder needs these operations because it needs to compare kuids for
equality, and it needs to tell userspace about the pid and uid of
incoming transactions.

Signed-off-by: Alice Ryhl <aliceryhl@google.com>
---
 rust/bindings/bindings_helper.h |  1 +
 rust/helpers.c                  | 45 ++++++++++++++++++++++++++
 rust/kernel/cred.rs             |  5 +--
 rust/kernel/task.rs             | 71 ++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 119 insertions(+), 3 deletions(-)
  

Comments

Christian Brauner Nov. 29, 2023, 4:28 p.m. UTC | #1
On Wed, Nov 29, 2023 at 01:12:17PM +0000, Alice Ryhl wrote:
> Adds a wrapper around `kuid_t` called `Kuid`. This allows us to define
> various operations on kuids such as equality and current_euid. It also
> lets us provide conversions from kuid into userspace values.
> 
> Rust Binder needs these operations because it needs to compare kuids for
> equality, and it needs to tell userspace about the pid and uid of
> incoming transactions.
> 
> Signed-off-by: Alice Ryhl <aliceryhl@google.com>
> ---
>  rust/bindings/bindings_helper.h |  1 +
>  rust/helpers.c                  | 45 ++++++++++++++++++++++++++
>  rust/kernel/cred.rs             |  5 +--
>  rust/kernel/task.rs             | 71 ++++++++++++++++++++++++++++++++++++++++-
>  4 files changed, 119 insertions(+), 3 deletions(-)
> 
> diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h
> index 81b13a953eae..700f01840188 100644
> --- a/rust/bindings/bindings_helper.h
> +++ b/rust/bindings/bindings_helper.h
> @@ -11,6 +11,7 @@
>  #include <linux/errname.h>
>  #include <linux/file.h>
>  #include <linux/fs.h>
> +#include <linux/pid_namespace.h>
>  #include <linux/security.h>
>  #include <linux/slab.h>
>  #include <linux/refcount.h>
> diff --git a/rust/helpers.c b/rust/helpers.c
> index fd633d9db79a..58e3a9dff349 100644
> --- a/rust/helpers.c
> +++ b/rust/helpers.c
> @@ -142,6 +142,51 @@ void rust_helper_put_task_struct(struct task_struct *t)
>  }
>  EXPORT_SYMBOL_GPL(rust_helper_put_task_struct);
>  
> +kuid_t rust_helper_task_uid(struct task_struct *task)
> +{
> +	return task_uid(task);
> +}
> +EXPORT_SYMBOL_GPL(rust_helper_task_uid);
> +
> +kuid_t rust_helper_task_euid(struct task_struct *task)
> +{
> +	return task_euid(task);
> +}
> +EXPORT_SYMBOL_GPL(rust_helper_task_euid);
> +
> +#ifndef CONFIG_USER_NS
> +uid_t rust_helper_from_kuid(struct user_namespace *to, kuid_t uid)
> +{
> +	return from_kuid(to, uid);
> +}
> +EXPORT_SYMBOL_GPL(rust_helper_from_kuid);
> +#endif /* CONFIG_USER_NS */
> +
> +bool rust_helper_uid_eq(kuid_t left, kuid_t right)
> +{
> +	return uid_eq(left, right);
> +}
> +EXPORT_SYMBOL_GPL(rust_helper_uid_eq);
> +
> +kuid_t rust_helper_current_euid(void)
> +{
> +	return current_euid();
> +}
> +EXPORT_SYMBOL_GPL(rust_helper_current_euid);
> +
> +struct user_namespace *rust_helper_current_user_ns(void)
> +{
> +	return current_user_ns();
> +}
> +EXPORT_SYMBOL_GPL(rust_helper_current_user_ns);
> +
> +pid_t rust_helper_task_tgid_nr_ns(struct task_struct *tsk,
> +				  struct pid_namespace *ns)
> +{
> +	return task_tgid_nr_ns(tsk, ns);
> +}
> +EXPORT_SYMBOL_GPL(rust_helper_task_tgid_nr_ns);

I'm a bit puzzled by all these rust_helper_*() calls. Can you explain
why they are needed? Because they are/can be static inlines and that
somehow doesn't work?

> +
>  struct kunit *rust_helper_kunit_get_current_test(void)
>  {
>  	return kunit_get_current_test();
> diff --git a/rust/kernel/cred.rs b/rust/kernel/cred.rs
> index 3794937b5294..fbc749788bfa 100644
> --- a/rust/kernel/cred.rs
> +++ b/rust/kernel/cred.rs
> @@ -8,6 +8,7 @@
>  
>  use crate::{
>      bindings,
> +    task::Kuid,
>      types::{AlwaysRefCounted, Opaque},
>  };
>  
> @@ -52,9 +53,9 @@ pub fn get_secid(&self) -> u32 {
>      }
>  
>      /// Returns the effective UID of the given credential.
> -    pub fn euid(&self) -> bindings::kuid_t {
> +    pub fn euid(&self) -> Kuid {
>          // SAFETY: By the type invariant, we know that `self.0` is valid.
> -        unsafe { (*self.0.get()).euid }
> +        Kuid::from_raw(unsafe { (*self.0.get()).euid })
>      }
>  }
>  
> diff --git a/rust/kernel/task.rs b/rust/kernel/task.rs
> index b2299bc7ac1f..1a27b968a907 100644
> --- a/rust/kernel/task.rs
> +++ b/rust/kernel/task.rs
> @@ -5,7 +5,12 @@
>  //! C header: [`include/linux/sched.h`](../../../../include/linux/sched.h).
>  
>  use crate::{bindings, types::Opaque};
> -use core::{marker::PhantomData, ops::Deref, ptr};
> +use core::{
> +    cmp::{Eq, PartialEq},
> +    marker::PhantomData,
> +    ops::Deref,
> +    ptr,
> +};
>  
>  /// Returns the currently running task.
>  #[macro_export]
> @@ -78,6 +83,12 @@ unsafe impl Sync for Task {}
>  /// The type of process identifiers (PIDs).
>  type Pid = bindings::pid_t;
>  
> +/// The type of user identifiers (UIDs).
> +#[derive(Copy, Clone)]
> +pub struct Kuid {
> +    kuid: bindings::kuid_t,
> +}
> +
>  impl Task {
>      /// Returns a task reference for the currently executing task/thread.
>      ///
> @@ -132,12 +143,34 @@ pub fn pid(&self) -> Pid {
>          unsafe { *ptr::addr_of!((*self.0.get()).pid) }
>      }
>  
> +    /// Returns the UID of the given task.
> +    pub fn uid(&self) -> Kuid {
> +        // SAFETY: By the type invariant, we know that `self.0` is valid.
> +        Kuid::from_raw(unsafe { bindings::task_uid(self.0.get()) })
> +    }
> +
> +    /// Returns the effective UID of the given task.
> +    pub fn euid(&self) -> Kuid {
> +        // SAFETY: By the type invariant, we know that `self.0` is valid.
> +        Kuid::from_raw(unsafe { bindings::task_euid(self.0.get()) })
> +    }
> +
>      /// Determines whether the given task has pending signals.
>      pub fn signal_pending(&self) -> bool {
>          // SAFETY: By the type invariant, we know that `self.0` is valid.
>          unsafe { bindings::signal_pending(self.0.get()) != 0 }
>      }
>  
> +    /// Returns the given task's pid in the current pid namespace.
> +    pub fn pid_in_current_ns(&self) -> Pid {
> +        // SAFETY: We know that `self.0.get()` is valid by the type invariant. The rest is just FFI
> +        // calls.
> +        unsafe {
> +            let namespace = bindings::task_active_pid_ns(bindings::get_current());
> +            bindings::task_tgid_nr_ns(self.0.get(), namespace)
> +        }
> +    }
> +
>      /// Wakes up the task.
>      pub fn wake_up(&self) {
>          // SAFETY: By the type invariant, we know that `self.0.get()` is non-null and valid.
> @@ -147,6 +180,42 @@ pub fn wake_up(&self) {
>      }
>  }
>  
> +impl Kuid {
> +    /// Get the current euid.
> +    pub fn current_euid() -> Kuid {
> +        // SAFETY: Just an FFI call.
> +        Self {
> +            kuid: unsafe { bindings::current_euid() },
> +        }
> +    }
> +
> +    /// Create a `Kuid` given the raw C type.
> +    pub fn from_raw(kuid: bindings::kuid_t) -> Self {
> +        Self { kuid }
> +    }
> +
> +    /// Turn this kuid into the raw C type.
> +    pub fn into_raw(self) -> bindings::kuid_t {
> +        self.kuid
> +    }
> +
> +    /// Converts this kernel UID into a UID that userspace understands. Uses the namespace of the
> +    /// current task.
> +    pub fn into_uid_in_current_ns(self) -> bindings::uid_t {

Hm, I wouldn't special-case this. Just expose from_kuid() and let it
take a namespace argument, no? You don't need to provide bindings for
namespaces ofc.

> +        // SAFETY: Just an FFI call.
> +        unsafe { bindings::from_kuid(bindings::current_user_ns(), self.kuid) }
> +    }
> +}
> +
> +impl PartialEq for Kuid {
> +    fn eq(&self, other: &Kuid) -> bool {
> +        // SAFETY: Just an FFI call.
> +        unsafe { bindings::uid_eq(self.kuid, other.kuid) }
> +    }
> +}
> +
> +impl Eq for Kuid {}

Do you need that?

> +
>  // SAFETY: The type invariants guarantee that `Task` is always ref-counted.
>  unsafe impl crate::types::AlwaysRefCounted for Task {
>      fn inc_ref(&self) {
> 
> -- 
> 2.43.0.rc1.413.gea7ed67945-goog
>
  
Peter Zijlstra Nov. 29, 2023, 4:48 p.m. UTC | #2
On Wed, Nov 29, 2023 at 05:28:27PM +0100, Christian Brauner wrote:

> > +pid_t rust_helper_task_tgid_nr_ns(struct task_struct *tsk,
> > +				  struct pid_namespace *ns)
> > +{
> > +	return task_tgid_nr_ns(tsk, ns);
> > +}
> > +EXPORT_SYMBOL_GPL(rust_helper_task_tgid_nr_ns);
> 
> I'm a bit puzzled by all these rust_helper_*() calls. Can you explain
> why they are needed? Because they are/can be static inlines and that
> somehow doesn't work?

Correct, because Rust can only talk to C ABI, it cannot use C headers.
Bindgen would need to translate the full C headers into valid Rust for
that to work.

I really think the Rust peoples should spend more effort on that,
because you are quite right, all this wrappery is tedious at best.
  
Alice Ryhl Nov. 30, 2023, 9:36 a.m. UTC | #3
Christian Brauner <brauner@kernel.org> writes:
> I'm a bit puzzled by all these rust_helper_*() calls. Can you explain
> why they are needed? Because they are/can be static inlines and that
> somehow doesn't work?

Yes, it's because the methods are inline. Rust can only call C methods
that are actually exported by the C code.

>> +    /// Converts this kernel UID into a UID that userspace understands. Uses the namespace of the
>> +    /// current task.
>> +    pub fn into_uid_in_current_ns(self) -> bindings::uid_t {
> 
> Hm, I wouldn't special-case this. Just expose from_kuid() and let it
> take a namespace argument, no? You don't need to provide bindings for
> namespaces ofc.

To make `from_kuid` safe, I would need to wrap the namespace type too. I
could do that, but it would be more code than this method because I need
another wrapper struct and so on.

Personally I would prefer to special-case it until someone needs the
non-special-case. Then, they can delete this method when they introduce
the non-special-case.

But I'll do it if you think I should.

>> +impl PartialEq for Kuid {
>> +    fn eq(&self, other: &Kuid) -> bool {
>> +        // SAFETY: Just an FFI call.
>> +        unsafe { bindings::uid_eq(self.kuid, other.kuid) }
>> +    }
>> +}
>> +
>> +impl Eq for Kuid {}
> 
> Do you need that?

Yes. This is the code that tells the compiler what `==` means for the
`Kuid` type. Binder uses it here:

https://github.com/Darksonn/linux/blob/dca45e6c7848e024709b165a306cdbe88e5b086a/drivers/android/context.rs#L174

Alice
  
Peter Zijlstra Nov. 30, 2023, 10:36 a.m. UTC | #4
On Wed, Nov 29, 2023 at 01:12:17PM +0000, Alice Ryhl wrote:

> diff --git a/rust/helpers.c b/rust/helpers.c
> index fd633d9db79a..58e3a9dff349 100644
> --- a/rust/helpers.c
> +++ b/rust/helpers.c
> @@ -142,6 +142,51 @@ void rust_helper_put_task_struct(struct task_struct *t)
>  }
>  EXPORT_SYMBOL_GPL(rust_helper_put_task_struct);
>  
> +kuid_t rust_helper_task_uid(struct task_struct *task)
> +{
> +	return task_uid(task);
> +}
> +EXPORT_SYMBOL_GPL(rust_helper_task_uid);
> +
> +kuid_t rust_helper_task_euid(struct task_struct *task)
> +{
> +	return task_euid(task);
> +}
> +EXPORT_SYMBOL_GPL(rust_helper_task_euid);

Aren't these like ideal speculation gadgets? And shouldn't we avoid
functions like this for exactly that reason?
  
Christian Brauner Nov. 30, 2023, 10:52 a.m. UTC | #5
On Thu, Nov 30, 2023 at 09:36:03AM +0000, Alice Ryhl wrote:
> Christian Brauner <brauner@kernel.org> writes:
> > I'm a bit puzzled by all these rust_helper_*() calls. Can you explain
> > why they are needed? Because they are/can be static inlines and that
> > somehow doesn't work?
> 
> Yes, it's because the methods are inline. Rust can only call C methods
> that are actually exported by the C code.
> 
> >> +    /// Converts this kernel UID into a UID that userspace understands. Uses the namespace of the
> >> +    /// current task.
> >> +    pub fn into_uid_in_current_ns(self) -> bindings::uid_t {
> > 
> > Hm, I wouldn't special-case this. Just expose from_kuid() and let it
> > take a namespace argument, no? You don't need to provide bindings for
> > namespaces ofc.
> 
> To make `from_kuid` safe, I would need to wrap the namespace type too. I
> could do that, but it would be more code than this method because I need
> another wrapper struct and so on.
> 
> Personally I would prefer to special-case it until someone needs the
> non-special-case. Then, they can delete this method when they introduce
> the non-special-case.
> 
> But I'll do it if you think I should.

No, don't start wrapping namespaces as well. You already do parts of LSM
as well.

> 
> >> +impl PartialEq for Kuid {
> >> +    fn eq(&self, other: &Kuid) -> bool {
> >> +        // SAFETY: Just an FFI call.
> >> +        unsafe { bindings::uid_eq(self.kuid, other.kuid) }
> >> +    }
> >> +}
> >> +
> >> +impl Eq for Kuid {}
> > 
> > Do you need that?
> 
> Yes. This is the code that tells the compiler what `==` means for the
> `Kuid` type. Binder uses it here:

Ok, thanks.
  
Benno Lossin Nov. 30, 2023, 4:48 p.m. UTC | #6
On 11/29/23 14:12, Alice Ryhl wrote:
> +    /// Returns the given task's pid in the current pid namespace.
> +    pub fn pid_in_current_ns(&self) -> Pid {
> +        // SAFETY: We know that `self.0.get()` is valid by the type invariant. The rest is just FFI
> +        // calls.
> +        unsafe {
> +            let namespace = bindings::task_active_pid_ns(bindings::get_current());
> +            bindings::task_tgid_nr_ns(self.0.get(), namespace)
> +        }

I would split this into two `unsafe` blocks.

> +    }
> +
>      /// Wakes up the task.
>      pub fn wake_up(&self) {
>          // SAFETY: By the type invariant, we know that `self.0.get()` is non-null and valid.
> @@ -147,6 +180,42 @@ pub fn wake_up(&self) {
>      }
>  }
> 
> +impl Kuid {
> +    /// Get the current euid.
> +    pub fn current_euid() -> Kuid {
> +        // SAFETY: Just an FFI call.
> +        Self {
> +            kuid: unsafe { bindings::current_euid() },
> +        }

Would expect a call to `from_raw` here instead of `Self {}`.

> +    }
> +
> +    /// Create a `Kuid` given the raw C type.
> +    pub fn from_raw(kuid: bindings::kuid_t) -> Self {
> +        Self { kuid }
> +    }

Is there a reason that this is named `from_raw` and not just a normal
`From` impl? AFAICT any `bindings::kuid_t` is a valid `Kuid`.

> +
> +    /// Turn this kuid into the raw C type.
> +    pub fn into_raw(self) -> bindings::kuid_t {
> +        self.kuid
> +    }
> +
> +    /// Converts this kernel UID into a UID that userspace understands. Uses the namespace of the
> +    /// current task.

Why not:

    /// Converts this kernel UID into a userspace UID.
    ///
    /// Uses the namespace of the current task.
  
Kent Overstreet Dec. 6, 2023, 7:59 p.m. UTC | #7
On Thu, Nov 30, 2023 at 01:46:36PM +0100, Christian Brauner wrote:
> On Wed, Nov 29, 2023 at 05:48:15PM +0100, Peter Zijlstra wrote:
> > On Wed, Nov 29, 2023 at 05:28:27PM +0100, Christian Brauner wrote:
> > 
> > > > +pid_t rust_helper_task_tgid_nr_ns(struct task_struct *tsk,
> > > > +				  struct pid_namespace *ns)
> > > > +{
> > > > +	return task_tgid_nr_ns(tsk, ns);
> > > > +}
> > > > +EXPORT_SYMBOL_GPL(rust_helper_task_tgid_nr_ns);
> > > 
> > > I'm a bit puzzled by all these rust_helper_*() calls. Can you explain
> > > why they are needed? Because they are/can be static inlines and that
> > > somehow doesn't work?
> > 
> > Correct, because Rust can only talk to C ABI, it cannot use C headers.
> > Bindgen would need to translate the full C headers into valid Rust for
> > that to work.
> > 
> > I really think the Rust peoples should spend more effort on that,
> > because you are quite right, all this wrappery is tedious at best.

I suspect even if the manpower existed to go that route we'd end up
regretting it, because then the Rust compiler would need to be able to
handle _all_ the craziness a modern C compiler knows how to do -
preprocessor magic/devilry isn't even the worst of it, it gets even
worse when you start to consider things like bitfields and all the crazy
__attributes__(()) people have invented.

Swift went that route, but they have Apple funding them, and I doubt
even they would want anything to do with Linux kernel C.

IOW: yes, the extra friction from not being able to do full C -> Rust
translation is annoying now, but probably a good thing in the long run.

> The problem is that we end up with a long list of explicit exports that
> also are all really weirdly named like rust_helper_*(). I wouldn't even
> complain if it they were somehow auto-generated but as you say that
> might be out of scope.

I think we'd need help from the C side to auto generate them - what we
really want is for them to be inline, not static inline, but of course
that has never really worked for functions used across a single C file.
But maybe C compiler people are smarter these days?

Just a keyword to to tell the C compiler "take this static inline and
generate a compiled version in this .c file" would be all we need.

I could see it being handy for other things, too: as Linus has been
saying, we tend to inline too much code these days, and part of the
reason for that is we make a function inline because of the _one_
fastpath that needs it, but there's 3 more slowpaths that don't. 

And right now we don't have any sane way of having a function be
available with both inlined and outlined versions, besides the same kind
of manual wrappers the Rust people are doing here... so we should
probably just fix that.
  
Kent Overstreet Dec. 6, 2023, 8:02 p.m. UTC | #8
On Thu, Nov 30, 2023 at 11:36:35AM +0100, Peter Zijlstra wrote:
> On Wed, Nov 29, 2023 at 01:12:17PM +0000, Alice Ryhl wrote:
> 
> > diff --git a/rust/helpers.c b/rust/helpers.c
> > index fd633d9db79a..58e3a9dff349 100644
> > --- a/rust/helpers.c
> > +++ b/rust/helpers.c
> > @@ -142,6 +142,51 @@ void rust_helper_put_task_struct(struct task_struct *t)
> >  }
> >  EXPORT_SYMBOL_GPL(rust_helper_put_task_struct);
> >  
> > +kuid_t rust_helper_task_uid(struct task_struct *task)
> > +{
> > +	return task_uid(task);
> > +}
> > +EXPORT_SYMBOL_GPL(rust_helper_task_uid);
> > +
> > +kuid_t rust_helper_task_euid(struct task_struct *task)
> > +{
> > +	return task_euid(task);
> > +}
> > +EXPORT_SYMBOL_GPL(rust_helper_task_euid);
> 
> Aren't these like ideal speculation gadgets? And shouldn't we avoid
> functions like this for exactly that reason?

I think asking the Rust people to care about that is probably putting
too many constraints on them, unless you actually have an idea for
something better to do...

(loudly giving the CPU manufacturers the middle finger for making _all_
of us deal with this bullshit)
  
Greg KH Dec. 7, 2023, 7:18 a.m. UTC | #9
On Wed, Dec 06, 2023 at 03:02:24PM -0500, Kent Overstreet wrote:
> On Thu, Nov 30, 2023 at 11:36:35AM +0100, Peter Zijlstra wrote:
> > On Wed, Nov 29, 2023 at 01:12:17PM +0000, Alice Ryhl wrote:
> > 
> > > diff --git a/rust/helpers.c b/rust/helpers.c
> > > index fd633d9db79a..58e3a9dff349 100644
> > > --- a/rust/helpers.c
> > > +++ b/rust/helpers.c
> > > @@ -142,6 +142,51 @@ void rust_helper_put_task_struct(struct task_struct *t)
> > >  }
> > >  EXPORT_SYMBOL_GPL(rust_helper_put_task_struct);
> > >  
> > > +kuid_t rust_helper_task_uid(struct task_struct *task)
> > > +{
> > > +	return task_uid(task);
> > > +}
> > > +EXPORT_SYMBOL_GPL(rust_helper_task_uid);
> > > +
> > > +kuid_t rust_helper_task_euid(struct task_struct *task)
> > > +{
> > > +	return task_euid(task);
> > > +}
> > > +EXPORT_SYMBOL_GPL(rust_helper_task_euid);
> > 
> > Aren't these like ideal speculation gadgets? And shouldn't we avoid
> > functions like this for exactly that reason?
> 
> I think asking the Rust people to care about that is probably putting
> too many constraints on them, unless you actually have an idea for
> something better to do...

It's not a constraint, it is a "we can not do this as it is buggy
because cpus are broken and we need to protect users from those bugs."

If we were to accept this type of code, then the people who are going
"it's safer to write kernel code in Rust" would be "pleasantly
surprised" when it turns out that their systems are actually more
insecure.

Hint, when "known broken" code is found in code review, it can not just
be ignored.

thanks,

greg k-h
  
Kent Overstreet Dec. 7, 2023, 7:46 a.m. UTC | #10
On Thu, Dec 07, 2023 at 08:18:37AM +0100, Greg Kroah-Hartman wrote:
> On Wed, Dec 06, 2023 at 03:02:24PM -0500, Kent Overstreet wrote:
> > On Thu, Nov 30, 2023 at 11:36:35AM +0100, Peter Zijlstra wrote:
> > > On Wed, Nov 29, 2023 at 01:12:17PM +0000, Alice Ryhl wrote:
> > > 
> > > > diff --git a/rust/helpers.c b/rust/helpers.c
> > > > index fd633d9db79a..58e3a9dff349 100644
> > > > --- a/rust/helpers.c
> > > > +++ b/rust/helpers.c
> > > > @@ -142,6 +142,51 @@ void rust_helper_put_task_struct(struct task_struct *t)
> > > >  }
> > > >  EXPORT_SYMBOL_GPL(rust_helper_put_task_struct);
> > > >  
> > > > +kuid_t rust_helper_task_uid(struct task_struct *task)
> > > > +{
> > > > +	return task_uid(task);
> > > > +}
> > > > +EXPORT_SYMBOL_GPL(rust_helper_task_uid);
> > > > +
> > > > +kuid_t rust_helper_task_euid(struct task_struct *task)
> > > > +{
> > > > +	return task_euid(task);
> > > > +}
> > > > +EXPORT_SYMBOL_GPL(rust_helper_task_euid);
> > > 
> > > Aren't these like ideal speculation gadgets? And shouldn't we avoid
> > > functions like this for exactly that reason?
> > 
> > I think asking the Rust people to care about that is probably putting
> > too many constraints on them, unless you actually have an idea for
> > something better to do...
> 
> It's not a constraint, it is a "we can not do this as it is buggy
> because cpus are broken and we need to protect users from those bugs."
> 
> If we were to accept this type of code, then the people who are going
> "it's safer to write kernel code in Rust" would be "pleasantly
> surprised" when it turns out that their systems are actually more
> insecure.
> 
> Hint, when "known broken" code is found in code review, it can not just
> be ignored.

We're talking about a CPU bug, not a Rust bug, and maybe try a nm
--size-sort and see what you find before throwing stones at them...
  
comex Dec. 8, 2023, 5:28 a.m. UTC | #11
> On Nov 30, 2023, at 4:46 AM, Christian Brauner <brauner@kernel.org> wrote:
> 
> I wouldn't even
> complain if it they were somehow auto-generated but as you say that
> might be out of scope.

FYI, rust-bindgen got an experimental feature of this nature earlier this year:

https://github.com/rust-lang/rust-bindgen/pull/2335

Though apparently it has significant limitations meriting it the “experimental” title.

Regarding the issue of wrappers not being inlined, it's possible to get LLVM to optimize C and Rust code together into an object file, with the help of a compatible Clang and LLD:

@ rustc -O --emit llvm-bc a.rs                                         
@ clang --target=x86_64-unknown-linux-gnu -O2 -c -emit-llvm -o b.bc b.c
@ ld.lld -r -o c.o a.bc b.bc

Basically LTO but within the scope of a single object file.  This would be redundant in cases where kernel-wide LTO is enabled.

Using this approach might slow down compilation a bit due to needing to pass the LLVM bitcode between multiple commands, but probably not very much.

Just chiming in as someone not involved in Rust for Linux but familiar with these tools.  Perhaps this has been considered before and rejected for some reason; I wouldn’t know.
  
Miguel Ojeda Dec. 8, 2023, 4:19 p.m. UTC | #12
On Fri, Dec 8, 2023 at 6:28 AM comex <comexk@gmail.com> wrote:
>
> Regarding the issue of wrappers not being inlined, it's possible to get LLVM to optimize C and Rust code together into an object file, with the help of a compatible Clang and LLD:
>
> @ rustc -O --emit llvm-bc a.rs
> @ clang --target=x86_64-unknown-linux-gnu -O2 -c -emit-llvm -o b.bc b.c
> @ ld.lld -r -o c.o a.bc b.bc
>
> Basically LTO but within the scope of a single object file.  This would be redundant in cases where kernel-wide LTO is enabled.
>
> Using this approach might slow down compilation a bit due to needing to pass the LLVM bitcode between multiple commands, but probably not very much.
>
> Just chiming in as someone not involved in Rust for Linux but familiar with these tools.  Perhaps this has been considered before and rejected for some reason; I wouldn’t know.

Thanks comex for chiming in, much appreciated.

Yeah, this is what we have been calling the "local-LTO hack" and it
was one of the possibilities we were considering for non-LTO kernel
builds for performance reasons originally. I don't recall who
originally suggested it in one of our meetings (Gary or Björn
perhaps).

If LLVM folks think LLVM-wise nothing will break, then we are happy to
go ahead with that (since it also solves the performance side), but it
would be nice to know if it will always be OK to build like that, i.e.
I think Andreas actually tried it and it seemed to work and boot, but
the worry is whether there is something subtle that could have bad
codegen in the future.

(We will also need to worry about GCC.)

Cheers,
Miguel
  
Peter Zijlstra Dec. 8, 2023, 4:26 p.m. UTC | #13
On Wed, Dec 06, 2023 at 02:59:11PM -0500, Kent Overstreet wrote:

> I suspect even if the manpower existed to go that route we'd end up
> regretting it, because then the Rust compiler would need to be able to
> handle _all_ the craziness a modern C compiler knows how to do -
> preprocessor magic/devilry isn't even the worst of it, it gets even
> worse when you start to consider things like bitfields and all the crazy
> __attributes__(()) people have invented.

Dude, clang can already handle all of that. Both rust and clang are
build on top of llvm, they generate the same IR, you can simply feed a
string into libclang and get IR out of it, which you can splice into
your rust generated IR.
  
Nick Desaulniers Dec. 8, 2023, 5:08 p.m. UTC | #14
On Fri, Dec 8, 2023 at 8:19 AM Miguel Ojeda
<miguel.ojeda.sandonis@gmail.com> wrote:
>
> On Fri, Dec 8, 2023 at 6:28 AM comex <comexk@gmail.com> wrote:
> >
> > Regarding the issue of wrappers not being inlined, it's possible to get LLVM to optimize C and Rust code together into an object file, with the help of a compatible Clang and LLD:
> >
> > @ rustc -O --emit llvm-bc a.rs
> > @ clang --target=x86_64-unknown-linux-gnu -O2 -c -emit-llvm -o b.bc b.c
> > @ ld.lld -r -o c.o a.bc b.bc
> >
> > Basically LTO but within the scope of a single object file.  This would be redundant in cases where kernel-wide LTO is enabled.
> >
> > Using this approach might slow down compilation a bit due to needing to pass the LLVM bitcode between multiple commands, but probably not very much.
> >
> > Just chiming in as someone not involved in Rust for Linux but familiar with these tools.  Perhaps this has been considered before and rejected for some reason; I wouldn’t know.
>
> Thanks comex for chiming in, much appreciated.
>
> Yeah, this is what we have been calling the "local-LTO hack" and it
> was one of the possibilities we were considering for non-LTO kernel
> builds for performance reasons originally. I don't recall who
> originally suggested it in one of our meetings (Gary or Björn
> perhaps).
>
> If LLVM folks think LLVM-wise nothing will break, then we are happy to

On paper, nothing comes to mind.  No promises though.

From a build system perspective, I'd rather just point users towards
LTO if they have this concern.  We support full and thin lto.  This
proposal would add a third variant for just rust drivers.  Each
variation on LTO has a maintenance cost and each have had their own
distinct fun bugs in the past.  Not sure an additional variant is
worth the maintenance cost, even if it's technically feasible.

> go ahead with that (since it also solves the performance side), but it
> would be nice to know if it will always be OK to build like that, i.e.
> I think Andreas actually tried it and it seemed to work and boot, but
> the worry is whether there is something subtle that could have bad
> codegen in the future.
>
> (We will also need to worry about GCC.)
>
> Cheers,
> Miguel
  
Miguel Ojeda Dec. 8, 2023, 5:37 p.m. UTC | #15
On Fri, Dec 8, 2023 at 6:09 PM Nick Desaulniers <ndesaulniers@google.com> wrote:
>
> On paper, nothing comes to mind.  No promises though.

Thanks Nick -- that is useful nevertheless.

> From a build system perspective, I'd rather just point users towards
> LTO if they have this concern.  We support full and thin lto.  This
> proposal would add a third variant for just rust drivers.  Each
> variation on LTO has a maintenance cost and each have had their own
> distinct fun bugs in the past.  Not sure an additional variant is
> worth the maintenance cost, even if it's technically feasible.

I was thinking it would be something always done for Rust object
files: under a normal "no LTO" build, the Rust object files would
always get the cross-language inlining done and therefore no extra
dimension in the matrix. Would that help?

I think it is worth at least considering, given there is also a
non-trivial amount of performance to gain if we always do it, e.g.
Andreas wanted it for non-LTO kernel for this reason.

Cheers,
Miguel
  
Boqun Feng Dec. 8, 2023, 5:43 p.m. UTC | #16
On Fri, Dec 08, 2023 at 09:08:47AM -0800, Nick Desaulniers wrote:
> On Fri, Dec 8, 2023 at 8:19 AM Miguel Ojeda
> <miguel.ojeda.sandonis@gmail.com> wrote:
> >
> > On Fri, Dec 8, 2023 at 6:28 AM comex <comexk@gmail.com> wrote:
> > >
> > > Regarding the issue of wrappers not being inlined, it's possible to get LLVM to optimize C and Rust code together into an object file, with the help of a compatible Clang and LLD:
> > >
> > > @ rustc -O --emit llvm-bc a.rs
> > > @ clang --target=x86_64-unknown-linux-gnu -O2 -c -emit-llvm -o b.bc b.c
> > > @ ld.lld -r -o c.o a.bc b.bc
> > >
> > > Basically LTO but within the scope of a single object file.  This would be redundant in cases where kernel-wide LTO is enabled.
> > >
> > > Using this approach might slow down compilation a bit due to needing to pass the LLVM bitcode between multiple commands, but probably not very much.
> > >
> > > Just chiming in as someone not involved in Rust for Linux but familiar with these tools.  Perhaps this has been considered before and rejected for some reason; I wouldn’t know.
> >
> > Thanks comex for chiming in, much appreciated.
> >
> > Yeah, this is what we have been calling the "local-LTO hack" and it
> > was one of the possibilities we were considering for non-LTO kernel
> > builds for performance reasons originally. I don't recall who
> > originally suggested it in one of our meetings (Gary or Björn
> > perhaps).
> >
> > If LLVM folks think LLVM-wise nothing will break, then we are happy to
> 
> On paper, nothing comes to mind.  No promises though.
> 
> From a build system perspective, I'd rather just point users towards
> LTO if they have this concern.  We support full and thin lto.  This
> proposal would add a third variant for just rust drivers.  Each
> variation on LTO has a maintenance cost and each have had their own
> distinct fun bugs in the past.  Not sure an additional variant is
> worth the maintenance cost, even if it's technically feasible.
> 

Actually, the "LTO" in "local-LTO" may be misleading ;-) The problem we
want to resolve here is letting Rust code call small C functions (or
macros) without exporting the symbols. To me, it's really just "static
linking" a library (right now it's rust/helpers.o) contains small C
functions and macros used by Rust into a Rust driver kmodule, the "LTO"
part can be optional: let the linker make the call.

Regards,
Boqun

> > go ahead with that (since it also solves the performance side), but it
> > would be nice to know if it will always be OK to build like that, i.e.
> > I think Andreas actually tried it and it seemed to work and boot, but
> > the worry is whether there is something subtle that could have bad
> > codegen in the future.
> >
> > (We will also need to worry about GCC.)
> >
> > Cheers,
> > Miguel
> 
> 
> 
> -- 
> Thanks,
> ~Nick Desaulniers
  
Kent Overstreet Dec. 8, 2023, 7:58 p.m. UTC | #17
On Fri, Dec 08, 2023 at 05:26:16PM +0100, Peter Zijlstra wrote:
> On Wed, Dec 06, 2023 at 02:59:11PM -0500, Kent Overstreet wrote:
> 
> > I suspect even if the manpower existed to go that route we'd end up
> > regretting it, because then the Rust compiler would need to be able to
> > handle _all_ the craziness a modern C compiler knows how to do -
> > preprocessor magic/devilry isn't even the worst of it, it gets even
> > worse when you start to consider things like bitfields and all the crazy
> > __attributes__(()) people have invented.
> 
> Dude, clang can already handle all of that. Both rust and clang are
> build on top of llvm, they generate the same IR, you can simply feed a
> string into libclang and get IR out of it, which you can splice into
> your rust generated IR.

If only it were that simple :)

This is struct definitions we're talking about, not code, so what you
want isn't even IR, what you're generating is a memory layout for a
type, linked in with all your other type information.

And people critize Linux for being a giant monorepo that makes no
considerations for making our code reusable in other contexts; clang and
LLVM are no different. But that's not really the issue because you're
going to need a huge chunk of clang to even parse this stuff, what you
really want is a way to invoke clang and dump _type information_ in a
standardized, easy to consume way. What you want is actually more akin
to the debug info that's generated today.

So... yeah, sure, lovely if it existed, but not the world we live in :)

(As an aside, I've actually got an outstanding bug filed with rustc
because it needs to be able to handle types that are marked both packed
and aligned... if anyone in this thread _does_ know some rust compiler
folks, we need that for bcachefs on disk format types).
  
Matthew Wilcox Dec. 8, 2023, 8:43 p.m. UTC | #18
On Fri, Dec 08, 2023 at 09:08:47AM -0800, Nick Desaulniers wrote:
> From a build system perspective, I'd rather just point users towards
> LTO if they have this concern.  We support full and thin lto.  This
> proposal would add a third variant for just rust drivers.  Each
> variation on LTO has a maintenance cost and each have had their own
> distinct fun bugs in the past.  Not sure an additional variant is
> worth the maintenance cost, even if it's technically feasible.

If we're allowed to talk about ideal solutions ... I hate putting
code in header files.  I'd rather be able to put, eg:

__force_inline int put_page_testzero(struct page *page)
{
	VM_BUG_ON_PAGE(page_ref_count(page) == 0, page);
	return page_ref_dec_and_test(page);
}

__force_inline int folio_put_testzero(struct folio *folio)
{
	return put_page_testzero(&folio->page);
}

__force_inline void folio_put(struct folio *folio)
{
	if (folio_put_testzero(folio))
		__folio_put(folio);
}

into a .c file and have both C and Rust inline folio_put(),
folio_put_testzero(), put_page_testzero(), VM_BUG_ON_PAGE() and
page_ref_dec_and_test(), but not even attempt to inline __folio_put()
(because We Know Better, and have determined that is the point at
which to stop).
  
comex Dec. 9, 2023, 7:24 a.m. UTC | #19
On Dec 8, 2023, at 8:19 AM, Miguel Ojeda <miguel.ojeda.sandonis@gmail.com> wrote:
> 
> If LLVM folks think LLVM-wise nothing will break, then we are happy to
> go ahead with that (since it also solves the performance side), but it
> would be nice to know if it will always be OK to build like that, i.e.
> I think Andreas actually tried it and it seemed to work and boot, but
> the worry is whether there is something subtle that could have bad
> codegen in the future.

One potential issue is incompatibility between the LLVM versions used by rustc, Clang, and LLD.  At minimum, whichever tool is reading bitcode (LLD in my example) should have an LLVM version >= that of the tools producing bitcode, since newer LLVM versions can read older bitcode but not vice versa.  But ideally the tools would all just be linked against the same copy of LLVM.

If you’re getting your tools from a distro, then that may already be true for you.  But if you’re using upstream rustc binaries, those are built against a custom branch of LLVM, which is based on upstream release versions but adds a handful of patches [1]; by policy, those patches can include cherry-picks of miscompilation fixes that are upstream but haven’t made it into a release yet [2].  Upstream rustc binaries are accompanied by a copy of LLD linked against the same LLVM library, named rust-lld, but there’s no corresponding copy of Clang [3].  I’d say that agreement between rustc and LLD is the most important thing, but it would be nice if they'd make a matching Clang available through rustup.

[1] https://github.com/llvm/llvm-project/compare/release/17.x...rust-lang:llvm-project:rustc/17.0-2023-09-19
[2] https://rustc-dev-guide.rust-lang.org/backend/updating-llvm.html#bugfix-updates
[3] https://github.com/rust-lang/rust/issues/56371
  

Patch

diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h
index 81b13a953eae..700f01840188 100644
--- a/rust/bindings/bindings_helper.h
+++ b/rust/bindings/bindings_helper.h
@@ -11,6 +11,7 @@ 
 #include <linux/errname.h>
 #include <linux/file.h>
 #include <linux/fs.h>
+#include <linux/pid_namespace.h>
 #include <linux/security.h>
 #include <linux/slab.h>
 #include <linux/refcount.h>
diff --git a/rust/helpers.c b/rust/helpers.c
index fd633d9db79a..58e3a9dff349 100644
--- a/rust/helpers.c
+++ b/rust/helpers.c
@@ -142,6 +142,51 @@  void rust_helper_put_task_struct(struct task_struct *t)
 }
 EXPORT_SYMBOL_GPL(rust_helper_put_task_struct);
 
+kuid_t rust_helper_task_uid(struct task_struct *task)
+{
+	return task_uid(task);
+}
+EXPORT_SYMBOL_GPL(rust_helper_task_uid);
+
+kuid_t rust_helper_task_euid(struct task_struct *task)
+{
+	return task_euid(task);
+}
+EXPORT_SYMBOL_GPL(rust_helper_task_euid);
+
+#ifndef CONFIG_USER_NS
+uid_t rust_helper_from_kuid(struct user_namespace *to, kuid_t uid)
+{
+	return from_kuid(to, uid);
+}
+EXPORT_SYMBOL_GPL(rust_helper_from_kuid);
+#endif /* CONFIG_USER_NS */
+
+bool rust_helper_uid_eq(kuid_t left, kuid_t right)
+{
+	return uid_eq(left, right);
+}
+EXPORT_SYMBOL_GPL(rust_helper_uid_eq);
+
+kuid_t rust_helper_current_euid(void)
+{
+	return current_euid();
+}
+EXPORT_SYMBOL_GPL(rust_helper_current_euid);
+
+struct user_namespace *rust_helper_current_user_ns(void)
+{
+	return current_user_ns();
+}
+EXPORT_SYMBOL_GPL(rust_helper_current_user_ns);
+
+pid_t rust_helper_task_tgid_nr_ns(struct task_struct *tsk,
+				  struct pid_namespace *ns)
+{
+	return task_tgid_nr_ns(tsk, ns);
+}
+EXPORT_SYMBOL_GPL(rust_helper_task_tgid_nr_ns);
+
 struct kunit *rust_helper_kunit_get_current_test(void)
 {
 	return kunit_get_current_test();
diff --git a/rust/kernel/cred.rs b/rust/kernel/cred.rs
index 3794937b5294..fbc749788bfa 100644
--- a/rust/kernel/cred.rs
+++ b/rust/kernel/cred.rs
@@ -8,6 +8,7 @@ 
 
 use crate::{
     bindings,
+    task::Kuid,
     types::{AlwaysRefCounted, Opaque},
 };
 
@@ -52,9 +53,9 @@  pub fn get_secid(&self) -> u32 {
     }
 
     /// Returns the effective UID of the given credential.
-    pub fn euid(&self) -> bindings::kuid_t {
+    pub fn euid(&self) -> Kuid {
         // SAFETY: By the type invariant, we know that `self.0` is valid.
-        unsafe { (*self.0.get()).euid }
+        Kuid::from_raw(unsafe { (*self.0.get()).euid })
     }
 }
 
diff --git a/rust/kernel/task.rs b/rust/kernel/task.rs
index b2299bc7ac1f..1a27b968a907 100644
--- a/rust/kernel/task.rs
+++ b/rust/kernel/task.rs
@@ -5,7 +5,12 @@ 
 //! C header: [`include/linux/sched.h`](../../../../include/linux/sched.h).
 
 use crate::{bindings, types::Opaque};
-use core::{marker::PhantomData, ops::Deref, ptr};
+use core::{
+    cmp::{Eq, PartialEq},
+    marker::PhantomData,
+    ops::Deref,
+    ptr,
+};
 
 /// Returns the currently running task.
 #[macro_export]
@@ -78,6 +83,12 @@  unsafe impl Sync for Task {}
 /// The type of process identifiers (PIDs).
 type Pid = bindings::pid_t;
 
+/// The type of user identifiers (UIDs).
+#[derive(Copy, Clone)]
+pub struct Kuid {
+    kuid: bindings::kuid_t,
+}
+
 impl Task {
     /// Returns a task reference for the currently executing task/thread.
     ///
@@ -132,12 +143,34 @@  pub fn pid(&self) -> Pid {
         unsafe { *ptr::addr_of!((*self.0.get()).pid) }
     }
 
+    /// Returns the UID of the given task.
+    pub fn uid(&self) -> Kuid {
+        // SAFETY: By the type invariant, we know that `self.0` is valid.
+        Kuid::from_raw(unsafe { bindings::task_uid(self.0.get()) })
+    }
+
+    /// Returns the effective UID of the given task.
+    pub fn euid(&self) -> Kuid {
+        // SAFETY: By the type invariant, we know that `self.0` is valid.
+        Kuid::from_raw(unsafe { bindings::task_euid(self.0.get()) })
+    }
+
     /// Determines whether the given task has pending signals.
     pub fn signal_pending(&self) -> bool {
         // SAFETY: By the type invariant, we know that `self.0` is valid.
         unsafe { bindings::signal_pending(self.0.get()) != 0 }
     }
 
+    /// Returns the given task's pid in the current pid namespace.
+    pub fn pid_in_current_ns(&self) -> Pid {
+        // SAFETY: We know that `self.0.get()` is valid by the type invariant. The rest is just FFI
+        // calls.
+        unsafe {
+            let namespace = bindings::task_active_pid_ns(bindings::get_current());
+            bindings::task_tgid_nr_ns(self.0.get(), namespace)
+        }
+    }
+
     /// Wakes up the task.
     pub fn wake_up(&self) {
         // SAFETY: By the type invariant, we know that `self.0.get()` is non-null and valid.
@@ -147,6 +180,42 @@  pub fn wake_up(&self) {
     }
 }
 
+impl Kuid {
+    /// Get the current euid.
+    pub fn current_euid() -> Kuid {
+        // SAFETY: Just an FFI call.
+        Self {
+            kuid: unsafe { bindings::current_euid() },
+        }
+    }
+
+    /// Create a `Kuid` given the raw C type.
+    pub fn from_raw(kuid: bindings::kuid_t) -> Self {
+        Self { kuid }
+    }
+
+    /// Turn this kuid into the raw C type.
+    pub fn into_raw(self) -> bindings::kuid_t {
+        self.kuid
+    }
+
+    /// Converts this kernel UID into a UID that userspace understands. Uses the namespace of the
+    /// current task.
+    pub fn into_uid_in_current_ns(self) -> bindings::uid_t {
+        // SAFETY: Just an FFI call.
+        unsafe { bindings::from_kuid(bindings::current_user_ns(), self.kuid) }
+    }
+}
+
+impl PartialEq for Kuid {
+    fn eq(&self, other: &Kuid) -> bool {
+        // SAFETY: Just an FFI call.
+        unsafe { bindings::uid_eq(self.kuid, other.kuid) }
+    }
+}
+
+impl Eq for Kuid {}
+
 // SAFETY: The type invariants guarantee that `Task` is always ref-counted.
 unsafe impl crate::types::AlwaysRefCounted for Task {
     fn inc_ref(&self) {