diff mbox series

audit: check syscall bitmap on entry to avoid extra work

Message ID	20230523181624.19932-1-ivan@cloudflare.com
State	New
Headers	Received-SPF: pass (google.com: domain of linux-kernel-owner@vger.kernel.org designates 2620:137:e000::1:20 as permitted sender) client-ip=2620:137:e000::1:20; From: Ivan Babrou <ivan@cloudflare.com> To: audit@vger.kernel.org Cc: linux-kernel@vger.kernel.org, kernel-team@cloudflare.com, Paul Moore <paul@paul-moore.com>, Eric Paris <eparis@redhat.com>, Ivan Babrou <ivan@cloudflare.com> Subject: [PATCH] audit: check syscall bitmap on entry to avoid extra work Date: Tue, 23 May 2023 11:16:24 -0700 Message-Id: <20230523181624.19932-1-ivan@cloudflare.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Precedence: bulk
Series	audit: check syscall bitmap on entry to avoid extra work \| audit: check syscall bitmap on entry to avoid extra work

Commit Message

Ivan Babrou May 23, 2023, 6:16 p.m. UTC

  Currently audit subsystem arms itself as long as there are rules present,
which means that on every syscall exit all rules are evaluated, even
if they don't match the syscall to begin with. For setups where
there are no rules that can match any syscall, this means that
the CPU price needs to be paid when it's not necessary.

This patch introduces a bitmap for syscalls that is maintained
when rules are inserted and removed. For every syscall we maintain
a bit indicating whether it needs to be audited at all, which is then
checked at syscall entry. If the are no rules matching a syscall,
extra cost of checking all the rules is avoided.

Consider the following set of 10 audit rules as a benchmark:

    -a always,exit -F arch=b64 -S unlinkat,linkat,renameat,openat,renameat2 -F perm=wa -F dir=/tmp/audit-bench/0 -F key=BENCH0
    -a always,exit -F arch=b64 -S unlinkat,linkat,renameat,openat,renameat2 -F perm=wa -F dir=/tmp/audit-bench/1 -F key=BENCH1
    -a always,exit -F arch=b64 -S unlinkat,linkat,renameat,openat,renameat2 -F perm=wa -F dir=/tmp/audit-bench/2 -F key=BENCH2
    -a always,exit -F arch=b64 -S unlinkat,linkat,renameat,openat,renameat2 -F perm=wa -F dir=/tmp/audit-bench/3 -F key=BENCH3
    -a always,exit -F arch=b64 -S unlinkat,linkat,renameat,openat,renameat2 -F perm=wa -F dir=/tmp/audit-bench/4 -F key=BENCH4
    -a always,exit -F arch=b64 -S unlinkat,linkat,renameat,openat,renameat2 -F perm=wa -F dir=/tmp/audit-bench/5 -F key=BENCH5
    -a always,exit -F arch=b64 -S unlinkat,linkat,renameat,openat,renameat2 -F perm=wa -F dir=/tmp/audit-bench/6 -F key=BENCH6
    -a always,exit -F arch=b64 -S unlinkat,linkat,renameat,openat,renameat2 -F perm=wa -F dir=/tmp/audit-bench/7 -F key=BENCH7
    -a always,exit -F arch=b64 -S unlinkat,linkat,renameat,openat,renameat2 -F perm=wa -F dir=/tmp/audit-bench/8 -F key=BENCH8
    -a always,exit -F arch=b64 -S unlinkat,linkat,renameat,openat,renameat2 -F perm=wa -F dir=/tmp/audit-bench/9 -F key=BENCH9

We can use the following benchmark to run unrelated syscalls:

    #include <sys/stat.h>
    #include <unistd.h>
    #include <stdio.h>

    #define GETPID_COUNT 100 * 1000
    #define STAT_COUNT 100 * 1000

    pid_t bench_getpid()
    {
        pid_t pid;

        for (int i = 0; i < GETPID_COUNT; i++)
        {
            pid = getpid();
        }

        return pid;
    }

    struct stat bench_stat()
    {
        struct stat statbuf;

        for (int i = 0; i < STAT_COUNT; i++)
        {
            stat("/etc/passwd", &statbuf);
        }

        return statbuf;
    }

    int main()
    {
        pid_t pid = bench_getpid();
        struct stat statbuf = bench_stat();

        printf("pid = %d, size = %d\n", pid, statbuf.st_size);
    }

Here we run 100k `getpid()` calls and 100k `stat()` calls, which are not
covered by any of the audit rules installed on the system.

When running without any rules present, but with auditd running, flamegraphs
show ~5% of CPU time spent in audit_* code. If we install the rules mentioned
above, this number jumps to ~24%. With this patch applied, the number is once
again down to 5%, which is what one would expect.

There's extra cost of maintaining the bitmap when rules are changed,
but it's negligible compared to CPU savings from cheaper syscalls.

Signed-off-by: Ivan Babrou <ivan@cloudflare.com>
---
 include/linux/audit.h | 21 +++++++++++++++++++++
 kernel/auditfilter.c  | 32 ++++++++++++++++++++++++++++----
 kernel/auditsc.c      | 27 +++++++++++----------------
 3 files changed, 60 insertions(+), 20 deletions(-)

Comments

Paul Moore May 23, 2023, 7:59 p.m. UTC | #1

On Tue, May 23, 2023 at 2:16 PM Ivan Babrou <ivan@cloudflare.com> wrote:
> Currently audit subsystem arms itself as long as there are rules present,
> which means that on every syscall exit all rules are evaluated, even
> if they don't match the syscall to begin with. For setups where
> there are no rules that can match any syscall, this means that
> the CPU price needs to be paid when it's not necessary.
>
> This patch introduces a bitmap for syscalls that is maintained
> when rules are inserted and removed. For every syscall we maintain
> a bit indicating whether it needs to be audited at all, which is then
> checked at syscall entry. If the are no rules matching a syscall,
> extra cost of checking all the rules is avoided.
>
> Consider the following set of 10 audit rules as a benchmark:
>
>     -a always,exit -F arch=b64 -S unlinkat,linkat,renameat,openat,renameat2 -F perm=wa -F dir=/tmp/audit-bench/0 -F key=BENCH0
>     -a always,exit -F arch=b64 -S unlinkat,linkat,renameat,openat,renameat2 -F perm=wa -F dir=/tmp/audit-bench/1 -F key=BENCH1
>     -a always,exit -F arch=b64 -S unlinkat,linkat,renameat,openat,renameat2 -F perm=wa -F dir=/tmp/audit-bench/2 -F key=BENCH2
>     -a always,exit -F arch=b64 -S unlinkat,linkat,renameat,openat,renameat2 -F perm=wa -F dir=/tmp/audit-bench/3 -F key=BENCH3
>     -a always,exit -F arch=b64 -S unlinkat,linkat,renameat,openat,renameat2 -F perm=wa -F dir=/tmp/audit-bench/4 -F key=BENCH4
>     -a always,exit -F arch=b64 -S unlinkat,linkat,renameat,openat,renameat2 -F perm=wa -F dir=/tmp/audit-bench/5 -F key=BENCH5
>     -a always,exit -F arch=b64 -S unlinkat,linkat,renameat,openat,renameat2 -F perm=wa -F dir=/tmp/audit-bench/6 -F key=BENCH6
>     -a always,exit -F arch=b64 -S unlinkat,linkat,renameat,openat,renameat2 -F perm=wa -F dir=/tmp/audit-bench/7 -F key=BENCH7
>     -a always,exit -F arch=b64 -S unlinkat,linkat,renameat,openat,renameat2 -F perm=wa -F dir=/tmp/audit-bench/8 -F key=BENCH8
>     -a always,exit -F arch=b64 -S unlinkat,linkat,renameat,openat,renameat2 -F perm=wa -F dir=/tmp/audit-bench/9 -F key=BENCH9
>
> We can use the following benchmark to run unrelated syscalls:
>
>     #include <sys/stat.h>
>     #include <unistd.h>
>     #include <stdio.h>
>
>     #define GETPID_COUNT 100 * 1000
>     #define STAT_COUNT 100 * 1000
>
>     pid_t bench_getpid()
>     {
>         pid_t pid;
>
>         for (int i = 0; i < GETPID_COUNT; i++)
>         {
>             pid = getpid();
>         }
>
>         return pid;
>     }
>
>     struct stat bench_stat()
>     {
>         struct stat statbuf;
>
>         for (int i = 0; i < STAT_COUNT; i++)
>         {
>             stat("/etc/passwd", &statbuf);
>         }
>
>         return statbuf;
>     }
>
>     int main()
>     {
>         pid_t pid = bench_getpid();
>         struct stat statbuf = bench_stat();
>
>         printf("pid = %d, size = %d\n", pid, statbuf.st_size);
>     }
>
> Here we run 100k `getpid()` calls and 100k `stat()` calls, which are not
> covered by any of the audit rules installed on the system.
>
> When running without any rules present, but with auditd running, flamegraphs
> show ~5% of CPU time spent in audit_* code. If we install the rules mentioned
> above, this number jumps to ~24%. With this patch applied, the number is once
> again down to 5%, which is what one would expect.

Before seriously considering something like this, I would really like
to see some time put into profiling the original overhead and some
designs on how that could be improved.  Without that, patches like
this look like drive-by band-aids which have already caused enough
headaches for audit maintenance.

> There's extra cost of maintaining the bitmap when rules are changed,
> but it's negligible compared to CPU savings from cheaper syscalls.
>
> Signed-off-by: Ivan Babrou <ivan@cloudflare.com>
> ---
>  include/linux/audit.h | 21 +++++++++++++++++++++
>  kernel/auditfilter.c  | 32 ++++++++++++++++++++++++++++----
>  kernel/auditsc.c      | 27 +++++++++++----------------
>  3 files changed, 60 insertions(+), 20 deletions(-)

Ivan Babrou May 23, 2023, 9:50 p.m. UTC | #2

On Tue, May 23, 2023 at 12:59 PM Paul Moore <paul@paul-moore.com> wrote:
> Before seriously considering something like this, I would really like
> to see some time put into profiling the original overhead and some
> designs on how that could be improved.  Without that, patches like
> this look like drive-by band-aids which have already caused enough
> headaches for audit maintenance.

Hello Paul,

Could you elaborate on what exactly you would like to see added? It's
not clear to me what is missing.

There's a benchmark in the commit description with the numbers
attached, which quantifies and explains the existing overhead. In my
experience, people on Linux mailing lists frown upon external links to
images, but if it helps to visualize the effects, I have some
flamegraphs for the benchmark from the commit message:

* 10 rules, before:
https://r2.ivan.computer/audit-syscall-bitmap/flamegraph-before.svg?s=audit
* 10 rules, with patch applied:
https://r2.ivan.computer/audit-syscall-bitmap/flamegraph-after.svg?s=audit

Here's a couple extra:

* 0 rules, auditd running:
https://r2.ivan.computer/audit-syscall-bitmap/flamegraph-after.svg?s=audit
* 11 rules (extra rule matching the running syscalls):
https://r2.ivan.computer/audit-syscall-bitmap/flamegraph-after-match.svg?s=audit

The bitmap design mirrors what tracepoints implement for syscall entry/exit:

* https://elixir.bootlin.com/linux/v6.4-rc1/source/kernel/trace/trace_syscalls.c#L585

I am happy to consider a different design if you have one in mind.

Paul Moore May 24, 2023, 2:03 a.m. UTC | #3

On Tue, May 23, 2023 at 5:50 PM Ivan Babrou <ivan@cloudflare.com> wrote:
> On Tue, May 23, 2023 at 12:59 PM Paul Moore <paul@paul-moore.com> wrote:
> > Before seriously considering something like this, I would really like
> > to see some time put into profiling the original overhead and some
> > designs on how that could be improved.  Without that, patches like
> > this look like drive-by band-aids which have already caused enough
> > headaches for audit maintenance.
>
> Hello Paul,
>
> Could you elaborate on what exactly you would like to see added? It's
> not clear to me what is missing.

I should have been more clear, let me try again ...

From my perspective, this patch adds code and complexity to deal with
the performance impact of auditing.  In some cases that is the right
thing to do, but I would much rather see a more in-depth analysis of
where the audit hot spots are in this benchmark, and some thoughts on
how we might improve that.  In other words, don't just add additional
processing to bypass (slower, more involved) processing; look at the
processing that is currently being done and see if you can find a way
to make it faster.  It will likely take longer, but the results will
be much more useful.

Does that make sense?

Ivan Babrou May 24, 2023, 6:04 p.m. UTC | #4

On Tue, May 23, 2023 at 7:03 PM Paul Moore <paul@paul-moore.com> wrote:
> > Could you elaborate on what exactly you would like to see added? It's
> > not clear to me what is missing.
>
> I should have been more clear, let me try again ...
>
> From my perspective, this patch adds code and complexity to deal with
> the performance impact of auditing.  In some cases that is the right
> thing to do, but I would much rather see a more in-depth analysis of
> where the audit hot spots are in this benchmark, and some thoughts on
> how we might improve that.  In other words, don't just add additional
> processing to bypass (slower, more involved) processing; look at the
> processing that is currently being done and see if you can find a way
> to make it faster.  It will likely take longer, but the results will
> be much more useful.

The fastest way to do something is to not do it to begin with. The
next best thing I could think of is checking a trivial condition (int
equality + bit check) to bypass the work in the hot path, which is
what this patch does. I'm not even adding a new condition for this,
I'm using the existing context->dummy. It is also 100% transparent for
end users, which get the benefits as long as they don't have any rules
that target all syscalls.

It's not very useful to see futex() and stat() syscalls being audited
when we have no rules that target those. The processing of rules in
exit is already fast for a reasonable set of rules, but we don't have
to do it to begin with. List iteration is not going to be faster than
a bit check. For VFS related things we also have to collect paths
accessed during processing and it's just not useful when we know that
there is no way these are going to be used.

We started with a ruleset that was matching all syscalls and this was
very expensive. We reduced it to targeting specific syscalls, which
made it a lot cheaper, but it's still a very noticeable fraction of
overall CPU usage (the benchmark in the commit is the evidence of
that). Not enabling auditing on syscall entry is the next logical step
in making audit cheap enough to not feel guilty about using it in the
first place.

Paul Moore May 25, 2023, 2:15 a.m. UTC | #5

On Wed, May 24, 2023 at 2:05 PM Ivan Babrou <ivan@cloudflare.com> wrote:
>
> On Tue, May 23, 2023 at 7:03 PM Paul Moore <paul@paul-moore.com> wrote:
> > > Could you elaborate on what exactly you would like to see added? It's
> > > not clear to me what is missing.
> >
> > I should have been more clear, let me try again ...
> >
> > From my perspective, this patch adds code and complexity to deal with
> > the performance impact of auditing.  In some cases that is the right
> > thing to do, but I would much rather see a more in-depth analysis of
> > where the audit hot spots are in this benchmark, and some thoughts on
> > how we might improve that.  In other words, don't just add additional
> > processing to bypass (slower, more involved) processing; look at the
> > processing that is currently being done and see if you can find a way
> > to make it faster.  It will likely take longer, but the results will
> > be much more useful.
>
> The fastest way to do something is to not do it to begin with.

While you are not wrong, I believe you and I are focusing on different
things.  From my perspective, you appear primarily concerned with
improving performance by reducing the overhead of auditing.  I too am
interested in reducing the audit overhead, but I also place a very
high value on maintainable code, perhaps more than performance simply
because the current audit code quality is so very poor.
Unfortunately, the patch you posted appears to me as yet another
bolt-on performance tweak that doesn't make an attempt at analyzing
the current hot spots of syscall auditing, and ideally offering
solutions.  Perhaps ultimately this approach is the only sane thing
that can be done, but I'd like to see some analysis first of the
syscall auditing path.

Ignat Korchagin June 2, 2023, 11:08 a.m. UTC | #6

On Thu, May 25, 2023 at 3:15 AM Paul Moore <paul@paul-moore.com> wrote:
>
> On Wed, May 24, 2023 at 2:05 PM Ivan Babrou <ivan@cloudflare.com> wrote:
> >
> > On Tue, May 23, 2023 at 7:03 PM Paul Moore <paul@paul-moore.com> wrote:
> > > > Could you elaborate on what exactly you would like to see added? It's
> > > > not clear to me what is missing.
> > >
> > > I should have been more clear, let me try again ...
> > >
> > > From my perspective, this patch adds code and complexity to deal with
> > > the performance impact of auditing.  In some cases that is the right
> > > thing to do, but I would much rather see a more in-depth analysis of
> > > where the audit hot spots are in this benchmark, and some thoughts on
> > > how we might improve that.  In other words, don't just add additional
> > > processing to bypass (slower, more involved) processing; look at the
> > > processing that is currently being done and see if you can find a way
> > > to make it faster.  It will likely take longer, but the results will
> > > be much more useful.
> >
> > The fastest way to do something is to not do it to begin with.
>
> While you are not wrong, I believe you and I are focusing on different
> things.  From my perspective, you appear primarily concerned with
> improving performance by reducing the overhead of auditing.  I too am
> interested in reducing the audit overhead, but I also place a very
> high value on maintainable code, perhaps more than performance simply
> because the current audit code quality is so very poor.
> Unfortunately, the patch you posted appears to me as yet another
> bolt-on performance tweak that doesn't make an attempt at analyzing
> the current hot spots of syscall auditing, and ideally offering
> solutions.  Perhaps ultimately this approach is the only sane thing
> that can be done, but I'd like to see some analysis first of the
> syscall auditing path.

Ivan is out of office, but I would like to keep the discussion going.
We do understand your position and we're actually doing a project
right now to investigate audit performance better and this patch is
only the first thing which came up. Perhaps, we would have some other
improvements in the future.

But the way I see it - the audit subsystem performance and the way how
that subsystem plugs into the rest of the kernel code are two somewhat
independent things with the patch proposed here addressing the latter
(with full understanding that the former might be improved as well). I
think the fundamental issue we're trying to address here is that the
audit subsystem plugs into the kernel in a way, which is different
from conceptually similar systems, like netfilter, LSM,
kprobes/tracepoints etc, thus potentially breaking user expectations.
For example, when I use netfilter or LSMs - I see it as a system of
hooks: my code/rules/inspection plugs into some hooks of interest and
I fully understand that the kernel codepaths, where these hooks are
located, might be affected due to additional processing attached. Thus
I can adjust my code/rules/inspection to plug into only the minimum
required hooks to get the visibility or the effect I need without too
much affecting system performance.

So I (and most of us here really) had the same expectation from audit:
we thought that when you have a rule, which "monitors" a certain
system call, only that system call performance would be affected. IOW
we thought that system calls are a kind of a hook for audit rules and
it was a surprise for us to learn that when having a rule, which
monitors execve() for example, we can impact other unrelated system
calls in totally different workloads. I think this behaviour
fundamentally limits the ability of the audit users to optimise their
rules to monitor/collect only the minimum required information,
because as long as they need to monitor at least one system call -
they effectively monitor all of them. I do strongly suspect that most
Linux users would have a similar (mis-)expectation from the audit
subsystem, hence I think this patch will align the audit subsystem
better with the expectations from users.

Ignat

Paul Moore June 2, 2023, 2:56 p.m. UTC | #7

On Fri, Jun 2, 2023 at 7:08 AM Ignat Korchagin <ignat@cloudflare.com> wrote:
> On Thu, May 25, 2023 at 3:15 AM Paul Moore <paul@paul-moore.com> wrote:
> > On Wed, May 24, 2023 at 2:05 PM Ivan Babrou <ivan@cloudflare.com> wrote:
> > > On Tue, May 23, 2023 at 7:03 PM Paul Moore <paul@paul-moore.com> wrote:
> > > > > Could you elaborate on what exactly you would like to see added? It's
> > > > > not clear to me what is missing.
> > > >
> > > > I should have been more clear, let me try again ...
> > > >
> > > > From my perspective, this patch adds code and complexity to deal with
> > > > the performance impact of auditing.  In some cases that is the right
> > > > thing to do, but I would much rather see a more in-depth analysis of
> > > > where the audit hot spots are in this benchmark, and some thoughts on
> > > > how we might improve that.  In other words, don't just add additional
> > > > processing to bypass (slower, more involved) processing; look at the
> > > > processing that is currently being done and see if you can find a way
> > > > to make it faster.  It will likely take longer, but the results will
> > > > be much more useful.
> > >
> > > The fastest way to do something is to not do it to begin with.
> >
> > While you are not wrong, I believe you and I are focusing on different
> > things.  From my perspective, you appear primarily concerned with
> > improving performance by reducing the overhead of auditing.  I too am
> > interested in reducing the audit overhead, but I also place a very
> > high value on maintainable code, perhaps more than performance simply
> > because the current audit code quality is so very poor.
> > Unfortunately, the patch you posted appears to me as yet another
> > bolt-on performance tweak that doesn't make an attempt at analyzing
> > the current hot spots of syscall auditing, and ideally offering
> > solutions.  Perhaps ultimately this approach is the only sane thing
> > that can be done, but I'd like to see some analysis first of the
> > syscall auditing path.
>
> Ivan is out of office, but I would like to keep the discussion going.
> We do understand your position and we're actually doing a project
> right now to investigate audit performance better ...

That's great, thank you!

> But the way I see it - the audit subsystem performance and the way how
> that subsystem plugs into the rest of the kernel code are two somewhat
> independent things with the patch proposed here addressing the latter
> (with full understanding that the former might be improved as well) ...

You've done a good job explaining the reasoning and motivations behind
the patch submitted, that is good, but I'm not seeing any recognition
or understanding about the perspective I shared with you earlier.  The
performance of audit in general does need to be improved, I don't
think anyone disagrees with that, but my argument is that we need to
focus on changes which not only reduce the processing overhead, but
*also* reduce the complexity of the code as well.

Ignat Korchagin June 2, 2023, 3:15 p.m. UTC | #8

On Fri, Jun 2, 2023 at 3:56 PM Paul Moore <paul@paul-moore.com> wrote:
>
> On Fri, Jun 2, 2023 at 7:08 AM Ignat Korchagin <ignat@cloudflare.com> wrote:
> > On Thu, May 25, 2023 at 3:15 AM Paul Moore <paul@paul-moore.com> wrote:
> > > On Wed, May 24, 2023 at 2:05 PM Ivan Babrou <ivan@cloudflare.com> wrote:
> > > > On Tue, May 23, 2023 at 7:03 PM Paul Moore <paul@paul-moore.com> wrote:
> > > > > > Could you elaborate on what exactly you would like to see added? It's
> > > > > > not clear to me what is missing.
> > > > >
> > > > > I should have been more clear, let me try again ...
> > > > >
> > > > > From my perspective, this patch adds code and complexity to deal with
> > > > > the performance impact of auditing.  In some cases that is the right
> > > > > thing to do, but I would much rather see a more in-depth analysis of
> > > > > where the audit hot spots are in this benchmark, and some thoughts on
> > > > > how we might improve that.  In other words, don't just add additional
> > > > > processing to bypass (slower, more involved) processing; look at the
> > > > > processing that is currently being done and see if you can find a way
> > > > > to make it faster.  It will likely take longer, but the results will
> > > > > be much more useful.
> > > >
> > > > The fastest way to do something is to not do it to begin with.
> > >
> > > While you are not wrong, I believe you and I are focusing on different
> > > things.  From my perspective, you appear primarily concerned with
> > > improving performance by reducing the overhead of auditing.  I too am
> > > interested in reducing the audit overhead, but I also place a very
> > > high value on maintainable code, perhaps more than performance simply
> > > because the current audit code quality is so very poor.
> > > Unfortunately, the patch you posted appears to me as yet another
> > > bolt-on performance tweak that doesn't make an attempt at analyzing
> > > the current hot spots of syscall auditing, and ideally offering
> > > solutions.  Perhaps ultimately this approach is the only sane thing
> > > that can be done, but I'd like to see some analysis first of the
> > > syscall auditing path.
> >
> > Ivan is out of office, but I would like to keep the discussion going.
> > We do understand your position and we're actually doing a project
> > right now to investigate audit performance better ...
>
> That's great, thank you!
>
> > But the way I see it - the audit subsystem performance and the way how
> > that subsystem plugs into the rest of the kernel code are two somewhat
> > independent things with the patch proposed here addressing the latter
> > (with full understanding that the former might be improved as well) ...
>
> You've done a good job explaining the reasoning and motivations behind
> the patch submitted, that is good, but I'm not seeing any recognition
> or understanding about the perspective I shared with you earlier.  The
> performance of audit in general does need to be improved, I don't
> think anyone disagrees with that, but my argument is that we need to
> focus on changes which not only reduce the processing overhead, but
> *also* reduce the complexity of the code as well.

Ah, sorry. You're right - I paid too much attention to performance and
didn't quite read your concern about code complexity. But still, I
think that code complexity improvements fall onto the implementation
of the audit subsystem itself vs what we try to accomplish here is to
improve the way how that subsystem plugs into the rest of the kernel.

Ignat

diff mbox series

Patch

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 31086a72e32a..e99428052321 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -9,6 +9,7 @@ 
 #ifndef _LINUX_AUDIT_H_
 #define _LINUX_AUDIT_H_
 
+#include <linux/bitmap.h>
 #include <linux/sched.h>
 #include <linux/ptrace.h>
 #include <linux/audit_arch.h>
@@ -399,6 +400,22 @@  static inline void audit_ptrace(struct task_struct *t)
 		__audit_ptrace(t);
 }
 
+static inline int audit_in_mask(const struct audit_krule *rule, unsigned long val)
+{
+	int word, bit;
+
+	if (val > 0xffffffff)
+		return false;
+
+	word = AUDIT_WORD(val);
+	if (word >= AUDIT_BITMASK_SIZE)
+		return false;
+
+	bit = AUDIT_BIT(val);
+
+	return rule->mask[word] & bit;
+}
+
 				/* Private API (for audit.c only) */
 extern void __audit_ipc_obj(struct kern_ipc_perm *ipcp);
 extern void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode);
@@ -573,6 +590,10 @@  static inline void audit_log_nfcfg(const char *name, u8 af,
 
 extern int audit_n_rules;
 extern int audit_signals;
+
+extern int audit_n_syscall_rules;
+extern int audit_syscall_rules[NR_syscalls];
+extern DECLARE_BITMAP(audit_syscalls_bitmap, NR_syscalls);
 #else /* CONFIG_AUDITSYSCALL */
 static inline int audit_alloc(struct task_struct *task)
 {
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 42d99896e7a6..3c7ea1a3faf1 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -943,7 +943,7 @@  static inline int audit_add_rule(struct audit_entry *entry)
 	struct list_head *list;
 	int err = 0;
 #ifdef CONFIG_AUDITSYSCALL
-	int dont_count = 0;
+	int syscall_nr, dont_count = 0;
 
 	/* If any of these, don't count towards total */
 	switch(entry->rule.listnr) {
@@ -1007,9 +1007,21 @@  static inline int audit_add_rule(struct audit_entry *entry)
 		list_add_tail_rcu(&entry->list, list);
 	}
 #ifdef CONFIG_AUDITSYSCALL
-	if (!dont_count)
+	if (!dont_count) {
 		audit_n_rules++;
 
+		if (entry->rule.listnr == AUDIT_FILTER_EXIT) {
+			audit_n_syscall_rules++;
+
+			for (syscall_nr = 0; syscall_nr < NR_syscalls; syscall_nr++) {
+				if (!audit_in_mask(&entry->rule, syscall_nr))
+					continue;
+				if (++audit_syscall_rules[syscall_nr] == 1)
+					set_bit(syscall_nr, audit_syscalls_bitmap);
+			}
+		}
+	}
+
 	if (!audit_match_signal(entry))
 		audit_signals++;
 #endif
@@ -1026,7 +1038,7 @@  int audit_del_rule(struct audit_entry *entry)
 	struct list_head *list;
 	int ret = 0;
 #ifdef CONFIG_AUDITSYSCALL
-	int dont_count = 0;
+	int syscall_nr, dont_count = 0;
 
 	/* If any of these, don't count towards total */
 	switch(entry->rule.listnr) {
@@ -1054,9 +1066,21 @@  int audit_del_rule(struct audit_entry *entry)
 		audit_remove_mark_rule(&e->rule);
 
 #ifdef CONFIG_AUDITSYSCALL
-	if (!dont_count)
+	if (!dont_count) {
 		audit_n_rules--;
 
+		if (entry->rule.listnr == AUDIT_FILTER_EXIT) {
+			audit_n_syscall_rules--;
+
+			for (syscall_nr = 0; syscall_nr < NR_syscalls; syscall_nr++) {
+				if (!audit_in_mask(&entry->rule, syscall_nr))
+					continue;
+				if (--audit_syscall_rules[syscall_nr] == 0)
+					clear_bit(syscall_nr, audit_syscalls_bitmap);
+			}
+		}
+	}
+
 	if (!audit_match_signal(entry))
 		audit_signals--;
 #endif
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index addeed3df15d..eb8296474bb2 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -86,6 +86,15 @@  int audit_n_rules;
 /* determines whether we collect data for signals sent */
 int audit_signals;
 
+/* number of syscall related audit rules */
+int audit_n_syscall_rules;
+
+/* number of rules per syscall */
+int audit_syscall_rules[NR_syscalls];
+
+/* bitmap for checking whether a syscall is audited */
+DECLARE_BITMAP(audit_syscalls_bitmap, NR_syscalls);
+
 struct audit_aux_data {
 	struct audit_aux_data	*next;
 	int			type;
@@ -790,22 +799,6 @@  static enum audit_state audit_filter_task(struct task_struct *tsk, char **key)
 	return AUDIT_STATE_BUILD;
 }
 
-static int audit_in_mask(const struct audit_krule *rule, unsigned long val)
-{
-	int word, bit;
-
-	if (val > 0xffffffff)
-		return false;
-
-	word = AUDIT_WORD(val);
-	if (word >= AUDIT_BITMASK_SIZE)
-		return false;
-
-	bit = AUDIT_BIT(val);
-
-	return rule->mask[word] & bit;
-}
-
 /**
  * __audit_filter_op - common filter helper for operations (syscall/uring/etc)
  * @tsk: associated task
@@ -2025,6 +2018,8 @@  void __audit_syscall_entry(int major, unsigned long a1, unsigned long a2,
 		return;
 
 	context->dummy = !audit_n_rules;
+	if (!context->dummy && audit_n_syscall_rules == audit_n_rules)
+		context->dummy = !test_bit(major, audit_syscalls_bitmap);
 	if (!context->dummy && state == AUDIT_STATE_BUILD) {
 		context->prio = 0;
 		if (auditd_test_task(current))