[v4] perf bench sched pipe: Add -G/--cgroups option

Message ID 20231017202342.1353124-1-namhyung@kernel.org
State New
Headers
Series [v4] perf bench sched pipe: Add -G/--cgroups option |

Commit Message

Namhyung Kim Oct. 17, 2023, 8:23 p.m. UTC
  The -G/--cgroups option is to put sender and receiver in different
cgroups in order to measure cgroup context switch overheads.

Users need to make sure the cgroups exist and accessible.  The following
example should the effect of this change.  Please don't forget taskset
before the perf bench to measure cgroup switches properly.  Otherwise
each task would run on a different CPU and generate cgroup switches
regardless of this change.

  # perf stat -e context-switches,cgroup-switches \
  > taskset -c 0 perf bench sched pipe -l 10000 > /dev/null

   Performance counter stats for 'taskset -c 0 perf bench sched pipe -l 10000':

              20,001      context-switches
                   2      cgroup-switches

         0.053449651 seconds time elapsed

         0.011286000 seconds user
         0.041869000 seconds sys

  # perf stat -e context-switches,cgroup-switches \
  > taskset -c 0 perf bench sched pipe -l 10000 -G AAA,BBB > /dev/null

   Performance counter stats for 'taskset -c 0 perf bench sched pipe -l 10000 -G AAA,BBB':

              20,001      context-switches
              20,001      cgroup-switches

         0.052768627 seconds time elapsed

         0.006284000 seconds user
         0.046266000 seconds sys

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
* update hint for non-root usage
* initialize cgroup_failed properly
* describe taskset usage in the commit log

 tools/perf/Documentation/perf-bench.txt |  19 ++++
 tools/perf/bench/sched-pipe.c           | 132 +++++++++++++++++++++++-
 2 files changed, 147 insertions(+), 4 deletions(-)
  

Comments

Arnaldo Carvalho de Melo Oct. 20, 2023, 8:32 p.m. UTC | #1
Em Tue, Oct 17, 2023 at 01:23:42PM -0700, Namhyung Kim escreveu:
> The -G/--cgroups option is to put sender and receiver in different
> cgroups in order to measure cgroup context switch overheads.
<SNIP>
>   > taskset -c 0 perf bench sched pipe -l 10000 -G AAA,BBB > /dev/null
 
>    Performance counter stats for 'taskset -c 0 perf bench sched pipe -l 10000 -G AAA,BBB':
> 
>               20,001      context-switches
>               20,001      cgroup-switches

Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>

[root@five ~]# perf stat -e context-switches,cgroup-switches taskset -c 0 perf bench sched pipe -l 10000
# Running 'sched/pipe' benchmark:
# Executed 10000 pipe operations between two processes

     Total time: 0.063 [sec]

       6.367500 usecs/op
         157047 ops/sec

 Performance counter stats for 'taskset -c 0 perf bench sched pipe -l 10000':

            20,004      context-switches
                 4      cgroup-switches

       0.084191842 seconds time elapsed

       0.008332000 seconds user
       0.067193000 seconds sys


[root@five ~]# perf stat -e context-switches,cgroup-switches taskset -c 0 perf bench sched pipe -l 10000 -G AAA,BBB
# Running 'sched/pipe' benchmark:
no access to cgroup /sys/fs/cgroup/BBB
Failed to open cgroup file in BBB
 Hint: create the cgroup first, like 'mkdir /sys/fs/cgroup/BBB'
no access to cgroup /sys/fs/cgroup/AAA
Failed to open cgroup file in AAA
 Hint: create the cgroup first, like 'mkdir /sys/fs/cgroup/AAA'

 Performance counter stats for 'taskset -c 0 perf bench sched pipe -l 10000 -G AAA,BBB':

                 2      context-switches
                 1      cgroup-switches

       0.010356533 seconds time elapsed

       0.003209000 seconds user
       0.007122000 seconds sys


[root@five ~]# vim /tmp/bla
[root@five ~]#
[root@five ~]# mkdir /sys/fs/cgroup/BBB /sys/fs/cgroup/AAA
[root@five ~]# perf stat -e context-switches,cgroup-switches taskset -c 0 perf bench sched pipe -l 10000 -G AAA,BBB
# Running 'sched/pipe' benchmark:
# Executed 10000 pipe operations between two processes

     Total time: 0.079 [sec]

       7.987800 usecs/op
         125190 ops/sec

 Performance counter stats for 'taskset -c 0 perf bench sched pipe -l 10000 -G AAA,BBB':

            20,008      context-switches
            20,007      cgroup-switches

       0.089361029 seconds time elapsed

       0.009004000 seconds user
       0.065376000 seconds sys


[root@five ~]#
  
Namhyung Kim Oct. 20, 2023, 9:13 p.m. UTC | #2
On Fri, Oct 20, 2023 at 1:32 PM Arnaldo Carvalho de Melo
<acme@kernel.org> wrote:
>
> Em Tue, Oct 17, 2023 at 01:23:42PM -0700, Namhyung Kim escreveu:
> > The -G/--cgroups option is to put sender and receiver in different
> > cgroups in order to measure cgroup context switch overheads.
> <SNIP>
> >   > taskset -c 0 perf bench sched pipe -l 10000 -G AAA,BBB > /dev/null
>
> >    Performance counter stats for 'taskset -c 0 perf bench sched pipe -l 10000 -G AAA,BBB':
> >
> >               20,001      context-switches
> >               20,001      cgroup-switches
>
> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>

Thanks a lot Arnaldo!

Ingo, can I keep you Acked-by here? (forgot to add in v4, sorry!)

Thanks,
Namhyung

>
> [root@five ~]# perf stat -e context-switches,cgroup-switches taskset -c 0 perf bench sched pipe -l 10000
> # Running 'sched/pipe' benchmark:
> # Executed 10000 pipe operations between two processes
>
>      Total time: 0.063 [sec]
>
>        6.367500 usecs/op
>          157047 ops/sec
>
>  Performance counter stats for 'taskset -c 0 perf bench sched pipe -l 10000':
>
>             20,004      context-switches
>                  4      cgroup-switches
>
>        0.084191842 seconds time elapsed
>
>        0.008332000 seconds user
>        0.067193000 seconds sys
>
>
> [root@five ~]# perf stat -e context-switches,cgroup-switches taskset -c 0 perf bench sched pipe -l 10000 -G AAA,BBB
> # Running 'sched/pipe' benchmark:
> no access to cgroup /sys/fs/cgroup/BBB
> Failed to open cgroup file in BBB
>  Hint: create the cgroup first, like 'mkdir /sys/fs/cgroup/BBB'
> no access to cgroup /sys/fs/cgroup/AAA
> Failed to open cgroup file in AAA
>  Hint: create the cgroup first, like 'mkdir /sys/fs/cgroup/AAA'
>
>  Performance counter stats for 'taskset -c 0 perf bench sched pipe -l 10000 -G AAA,BBB':
>
>                  2      context-switches
>                  1      cgroup-switches
>
>        0.010356533 seconds time elapsed
>
>        0.003209000 seconds user
>        0.007122000 seconds sys
>
>
> [root@five ~]# vim /tmp/bla
> [root@five ~]#
> [root@five ~]# mkdir /sys/fs/cgroup/BBB /sys/fs/cgroup/AAA
> [root@five ~]# perf stat -e context-switches,cgroup-switches taskset -c 0 perf bench sched pipe -l 10000 -G AAA,BBB
> # Running 'sched/pipe' benchmark:
> # Executed 10000 pipe operations between two processes
>
>      Total time: 0.079 [sec]
>
>        7.987800 usecs/op
>          125190 ops/sec
>
>  Performance counter stats for 'taskset -c 0 perf bench sched pipe -l 10000 -G AAA,BBB':
>
>             20,008      context-switches
>             20,007      cgroup-switches
>
>        0.089361029 seconds time elapsed
>
>        0.009004000 seconds user
>        0.065376000 seconds sys
>
>
> [root@five ~]#
  
Namhyung Kim Oct. 26, 2023, 5:11 p.m. UTC | #3
On Tue, 17 Oct 2023 13:23:42 -0700, Namhyung Kim wrote:
> The -G/--cgroups option is to put sender and receiver in different
> cgroups in order to measure cgroup context switch overheads.
> 
> Users need to make sure the cgroups exist and accessible.  The following
> example should the effect of this change.  Please don't forget taskset
> before the perf bench to measure cgroup switches properly.  Otherwise
> each task would run on a different CPU and generate cgroup switches
> regardless of this change.
> 
> [...]

Applied to perf-tools-next, thanks!
  

Patch

diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt
index ca5789625cd2..8331bd28b10e 100644
--- a/tools/perf/Documentation/perf-bench.txt
+++ b/tools/perf/Documentation/perf-bench.txt
@@ -124,6 +124,14 @@  Options of *pipe*
 --loop=::
 Specify number of loops.
 
+-G::
+--cgroups=::
+Names of cgroups for sender and receiver, separated by a comma.
+This is useful to check cgroup context switching overhead.
+Note that perf doesn't create nor delete the cgroups, so users should
+make sure that the cgroups exist and are accessible before use.
+
+
 Example of *pipe*
 ^^^^^^^^^^^^^^^^^
 
@@ -141,6 +149,17 @@  Example of *pipe*
         Total time:0.016 sec
                 16.948000 usecs/op
                 59004 ops/sec
+
+% perf bench sched pipe -G AAA,BBB
+(executing 1000000 pipe operations between cgroups)
+# Running 'sched/pipe' benchmark:
+# Executed 1000000 pipe operations between two processes
+
+     Total time: 6.886 [sec]
+
+       6.886208 usecs/op
+         145217 ops/sec
+
 ---------------------
 
 SUITES FOR 'syscall'
diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c
index a960e7a93aec..3af6d3c55aba 100644
--- a/tools/perf/bench/sched-pipe.c
+++ b/tools/perf/bench/sched-pipe.c
@@ -10,7 +10,9 @@ 
  * Ported to perf by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
  */
 #include <subcmd/parse-options.h>
+#include <api/fs/fs.h>
 #include "bench.h"
+#include "util/cgroup.h"
 
 #include <unistd.h>
 #include <stdio.h>
@@ -19,6 +21,7 @@ 
 #include <sys/wait.h>
 #include <string.h>
 #include <errno.h>
+#include <fcntl.h>
 #include <assert.h>
 #include <sys/time.h>
 #include <sys/types.h>
@@ -31,6 +34,7 @@  struct thread_data {
 	int			nr;
 	int			pipe_read;
 	int			pipe_write;
+	bool			cgroup_failed;
 	pthread_t		pthread;
 };
 
@@ -40,9 +44,48 @@  static	int			loops = LOOPS_DEFAULT;
 /* Use processes by default: */
 static bool			threaded;
 
+static char			*cgrp_names[2];
+static struct cgroup		*cgrps[2];
+
+static int parse_two_cgroups(const struct option *opt __maybe_unused,
+			     const char *str, int unset __maybe_unused)
+{
+	char *p = strdup(str);
+	char *q;
+	int ret = -1;
+
+	if (p == NULL) {
+		fprintf(stderr, "memory allocation failure\n");
+		return -1;
+	}
+
+	q = strchr(p, ',');
+	if (q == NULL) {
+		fprintf(stderr, "it should have two cgroup names: %s\n", p);
+		goto out;
+	}
+	*q = '\0';
+
+	cgrp_names[0] = strdup(p);
+	cgrp_names[1] = strdup(q + 1);
+
+	if (cgrp_names[0] == NULL || cgrp_names[1] == NULL) {
+		fprintf(stderr, "memory allocation failure\n");
+		goto out;
+	}
+	ret = 0;
+
+out:
+	free(p);
+	return ret;
+}
+
 static const struct option options[] = {
 	OPT_INTEGER('l', "loop",	&loops,		"Specify number of loops"),
 	OPT_BOOLEAN('T', "threaded",	&threaded,	"Specify threads/process based task setup"),
+	OPT_CALLBACK('G', "cgroups", NULL, "SEND,RECV",
+		     "Put sender and receivers in given cgroups",
+		     parse_two_cgroups),
 	OPT_END()
 };
 
@@ -51,12 +94,89 @@  static const char * const bench_sched_pipe_usage[] = {
 	NULL
 };
 
+static int enter_cgroup(int nr)
+{
+	char buf[32];
+	int fd, len, ret;
+	int saved_errno;
+	struct cgroup *cgrp;
+	pid_t pid;
+
+	if (cgrp_names[nr] == NULL)
+		return 0;
+
+	if (cgrps[nr] == NULL) {
+		cgrps[nr] = cgroup__new(cgrp_names[nr], /*do_open=*/true);
+		if (cgrps[nr] == NULL)
+			goto err;
+	}
+	cgrp = cgrps[nr];
+
+	if (threaded)
+		pid = syscall(__NR_gettid);
+	else
+		pid = getpid();
+
+	snprintf(buf, sizeof(buf), "%d\n", pid);
+	len = strlen(buf);
+
+	/* try cgroup v2 interface first */
+	if (threaded)
+		fd = openat(cgrp->fd, "cgroup.threads", O_WRONLY);
+	else
+		fd = openat(cgrp->fd, "cgroup.procs", O_WRONLY);
+
+	/* try cgroup v1 if failed */
+	if (fd < 0 && errno == ENOENT)
+		fd = openat(cgrp->fd, "tasks", O_WRONLY);
+
+	if (fd < 0)
+		goto err;
+
+	ret = write(fd, buf, len);
+	close(fd);
+
+	if (ret != len) {
+		printf("Cannot enter to cgroup: %s\n", cgrp->name);
+		return -1;
+	}
+	return 0;
+
+err:
+	saved_errno = errno;
+	printf("Failed to open cgroup file in %s\n", cgrp_names[nr]);
+
+	if (saved_errno == ENOENT) {
+		char mnt[PATH_MAX];
+
+		if (cgroupfs_find_mountpoint(mnt, sizeof(mnt), "perf_event") == 0)
+			printf(" Hint: create the cgroup first, like 'mkdir %s/%s'\n",
+			       mnt, cgrp_names[nr]);
+	} else if (saved_errno == EACCES && geteuid() > 0) {
+		printf(" Hint: try to run as root\n");
+	}
+
+	return -1;
+}
+
+static void exit_cgroup(int nr)
+{
+	cgroup__put(cgrps[nr]);
+	free(cgrp_names[nr]);
+}
+
 static void *worker_thread(void *__tdata)
 {
 	struct thread_data *td = __tdata;
 	int m = 0, i;
 	int ret;
 
+	ret = enter_cgroup(td->nr);
+	if (ret < 0) {
+		td->cgroup_failed = true;
+		return NULL;
+	}
+
 	for (i = 0; i < loops; i++) {
 		if (!td->nr) {
 			ret = read(td->pipe_read, &m, sizeof(int));
@@ -76,7 +196,8 @@  static void *worker_thread(void *__tdata)
 
 int bench_sched_pipe(int argc, const char **argv)
 {
-	struct thread_data threads[2], *td;
+	struct thread_data threads[2] = {};
+	struct thread_data *td;
 	int pipe_1[2], pipe_2[2];
 	struct timeval start, stop, diff;
 	unsigned long long result_usec = 0;
@@ -112,9 +233,7 @@  int bench_sched_pipe(int argc, const char **argv)
 		}
 	}
 
-
 	if (threaded) {
-
 		for (t = 0; t < nr_threads; t++) {
 			td = threads + t;
 
@@ -128,7 +247,6 @@  int bench_sched_pipe(int argc, const char **argv)
 			ret = pthread_join(td->pthread, NULL);
 			BUG_ON(ret);
 		}
-
 	} else {
 		pid = fork();
 		assert(pid >= 0);
@@ -147,6 +265,12 @@  int bench_sched_pipe(int argc, const char **argv)
 	gettimeofday(&stop, NULL);
 	timersub(&stop, &start, &diff);
 
+	exit_cgroup(0);
+	exit_cgroup(1);
+
+	if (threads[0].cgroup_failed || threads[1].cgroup_failed)
+		return 0;
+
 	switch (bench_format) {
 	case BENCH_FORMAT_DEFAULT:
 		printf("# Executed %d pipe operations between two %s\n\n",