[v1,2/6] perf trace: Ignore thread hashing in summary

Message ID 20240214063708.972376-3-irogers@google.com
State New
Headers
Series Thread memory improvements and fixes |

Commit Message

Ian Rogers Feb. 14, 2024, 6:37 a.m. UTC
  Commit 91e467bc568f ("perf machine: Use hashtable for machine
threads") made the iteration of thread tids unordered. The perf trace
--summary output sorts and prints each hash bucket, rather than all
threads globally. Change this behavior by turn all threads into a
list, sort the list by number of trace events then by tids, finally
print the list. This also allows the rbtree in threads to be not
accessed outside of machine.

Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/perf/builtin-trace.c  | 41 +++++++++++++++++++++----------------
 tools/perf/util/rb_resort.h |  5 -----
 2 files changed, 23 insertions(+), 23 deletions(-)
  

Comments

Arnaldo Carvalho de Melo Feb. 14, 2024, 5:25 p.m. UTC | #1
On Tue, Feb 13, 2024 at 10:37:04PM -0800, Ian Rogers wrote:
> Commit 91e467bc568f ("perf machine: Use hashtable for machine
> threads") made the iteration of thread tids unordered. The perf trace
> --summary output sorts and prints each hash bucket, rather than all
> threads globally. Change this behavior by turn all threads into a
> list, sort the list by number of trace events then by tids, finally
> print the list. This also allows the rbtree in threads to be not
> accessed outside of machine.

Can you please provide a refresh of the output that is changed by your patch?

- Arnaldo
 
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
>  tools/perf/builtin-trace.c  | 41 +++++++++++++++++++++----------------
>  tools/perf/util/rb_resort.h |  5 -----
>  2 files changed, 23 insertions(+), 23 deletions(-)
> 
> diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
> index 109b8e64fe69..90eaff8c0f6e 100644
> --- a/tools/perf/builtin-trace.c
> +++ b/tools/perf/builtin-trace.c
> @@ -74,6 +74,7 @@
>  #include <linux/err.h>
>  #include <linux/filter.h>
>  #include <linux/kernel.h>
> +#include <linux/list_sort.h>
>  #include <linux/random.h>
>  #include <linux/stringify.h>
>  #include <linux/time64.h>
> @@ -4312,34 +4313,38 @@ static unsigned long thread__nr_events(struct thread_trace *ttrace)
>  	return ttrace ? ttrace->nr_events : 0;
>  }
>  
> -DEFINE_RESORT_RB(threads,
> -		(thread__nr_events(thread__priv(a->thread)) <
> -		 thread__nr_events(thread__priv(b->thread))),
> -	struct thread *thread;
> -)
> +static int trace_nr_events_cmp(void *priv __maybe_unused,
> +			       const struct list_head *la,
> +			       const struct list_head *lb)
>  {
> -	entry->thread = rb_entry(nd, struct thread_rb_node, rb_node)->thread;
> +	struct thread_list *a = list_entry(la, struct thread_list, list);
> +	struct thread_list *b = list_entry(lb, struct thread_list, list);
> +	unsigned long a_nr_events = thread__nr_events(thread__priv(a->thread));
> +	unsigned long b_nr_events = thread__nr_events(thread__priv(b->thread));
> +
> +	if (a_nr_events != b_nr_events)
> +		return a_nr_events < b_nr_events ? -1 : 1;
> +
> +	/* Identical number of threads, place smaller tids first. */
> +	return thread__tid(a->thread) < thread__tid(b->thread)
> +		? -1
> +		: (thread__tid(a->thread) > thread__tid(b->thread) ? 1 : 0);
>  }
>  
>  static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
>  {
>  	size_t printed = trace__fprintf_threads_header(fp);
> -	struct rb_node *nd;
> -	int i;
> -
> -	for (i = 0; i < THREADS__TABLE_SIZE; i++) {
> -		DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host, i);
> +	LIST_HEAD(threads);
>  
> -		if (threads == NULL) {
> -			fprintf(fp, "%s", "Error sorting output by nr_events!\n");
> -			return 0;
> -		}
> +	if (machine__thread_list(trace->host, &threads) == 0) {
> +		struct thread_list *pos;
>  
> -		resort_rb__for_each_entry(nd, threads)
> -			printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
> +		list_sort(NULL, &threads, trace_nr_events_cmp);
>  
> -		resort_rb__delete(threads);
> +		list_for_each_entry(pos, &threads, list)
> +			printed += trace__fprintf_thread(fp, pos->thread, trace);
>  	}
> +	thread_list__delete(&threads);
>  	return printed;
>  }
>  
> diff --git a/tools/perf/util/rb_resort.h b/tools/perf/util/rb_resort.h
> index 376e86cb4c3c..d927a0d25052 100644
> --- a/tools/perf/util/rb_resort.h
> +++ b/tools/perf/util/rb_resort.h
> @@ -143,9 +143,4 @@ struct __name##_sorted *__name = __name##_sorted__new
>  	DECLARE_RESORT_RB(__name)(&__ilist->rblist.entries.rb_root,		\
>  				  __ilist->rblist.nr_entries)
>  
> -/* For 'struct machine->threads' */
> -#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine, hash_bucket)    \
> - DECLARE_RESORT_RB(__name)(&__machine->threads[hash_bucket].entries.rb_root, \
> -			   __machine->threads[hash_bucket].nr)
> -
>  #endif /* _PERF_RESORT_RB_H_ */
> -- 
> 2.43.0.687.g38aa6559b0-goog
  
Arnaldo Carvalho de Melo Feb. 16, 2024, 2:57 p.m. UTC | #2
On Wed, Feb 14, 2024 at 01:36:46PM -0800, Ian Rogers wrote:
> On Wed, Feb 14, 2024 at 1:15 PM Ian Rogers <irogers@google.com> wrote:
> > On Wed, Feb 14, 2024 at 10:27 AM Ian Rogers <irogers@google.com> wrote:
> > > On Wed, Feb 14, 2024 at 9:25 AM Arnaldo Carvalho de Melo
> > > <acme@kernel.org> wrote:
> > > > On Tue, Feb 13, 2024 at 10:37:04PM -0800, Ian Rogers wrote:
> > > > > Commit 91e467bc568f ("perf machine: Use hashtable for machine
> > > > > threads") made the iteration of thread tids unordered. The perf trace
> > > > > --summary output sorts and prints each hash bucket, rather than all
> > > > > threads globally. Change this behavior by turn all threads into a
> > > > > list, sort the list by number of trace events then by tids, finally
> > > > > print the list. This also allows the rbtree in threads to be not
> > > > > accessed outside of machine.

> > > > Can you please provide a refresh of the output that is changed by your patch?
> > >
> > > Hmm.. looks like perf trace record has broken and doesn't produce
> > > output in newer perfs. It works on 6.5 and so a bisect is necessary.
> >
> > Bisect result:
> > ```
> > 9925495d96efc14d885ba66c5696f664fe0e663c is the first bad commit
> > commit 9925495d96efc14d885ba66c5696f664fe0e663c
> > Author: Ian Rogers <irogers@google.com>
> > Date:   Thu Sep 14 14:19:45 2023 -0700
> >
> >    perf build: Default BUILD_BPF_SKEL, warn/disable for missing deps
> > ...
> > https://lore.kernel.org/r/20230914211948.814999-3-irogers@google.com
> > ```
> >
> > Now to do the bisect with BUILD_BPF_SKEL=1 on each make.
> 
> This looks better (how could I be at fault :-) ):
> ```
> 1836480429d173c01664a633b61e525b13d41a2a is the first bad commit
> commit 1836480429d173c01664a633b61e525b13d41a2a
> Author: Arnaldo Carvalho de Melo <acme@redhat.com>
> Date:   Wed Aug 16 13:53:26 2023 -0300
> 
>    perf bpf_skel augmented_raw_syscalls: Cap the socklen parameter
> using &= sizeof(saddr)
> ...
>    Cc: Adrian Hunter <adrian.hunter@intel.com>
>    Cc: Ian Rogers <irogers@google.com>
>    Cc: Jiri Olsa <jolsa@kernel.org>
>    Cc: Namhyung Kim <namhyung@kernel.org>
>    Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
> ```
> No LKML link.

So simple... ;-\

I've reproduced your steps and got to the same cset while testing on a
recent distro kernel (6.6.13-200.fc39.x86_64), scratching my head now
and trying to figure this out.

Wonder if trying to run on an older kernel the problem would appear.

Will try and add a perf test shell entry with a simple:

root@number:~# perf trace record sleep 0.001 && perf script | head | wc -l
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.034 MB perf.data ]
0
root@number:~#

Has to be 10 :-)

Thanks,

- Arnaldo
  
Namhyung Kim Feb. 27, 2024, 6:55 a.m. UTC | #3
On Tue, Feb 13, 2024 at 10:37 PM Ian Rogers <irogers@google.com> wrote:
>
> Commit 91e467bc568f ("perf machine: Use hashtable for machine
> threads") made the iteration of thread tids unordered. The perf trace
> --summary output sorts and prints each hash bucket, rather than all
> threads globally. Change this behavior by turn all threads into a
> list, sort the list by number of trace events then by tids, finally
> print the list. This also allows the rbtree in threads to be not
> accessed outside of machine.
>
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
>  tools/perf/builtin-trace.c  | 41 +++++++++++++++++++++----------------
>  tools/perf/util/rb_resort.h |  5 -----
>  2 files changed, 23 insertions(+), 23 deletions(-)
>
> diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
> index 109b8e64fe69..90eaff8c0f6e 100644
> --- a/tools/perf/builtin-trace.c
> +++ b/tools/perf/builtin-trace.c
> @@ -74,6 +74,7 @@
>  #include <linux/err.h>
>  #include <linux/filter.h>
>  #include <linux/kernel.h>
> +#include <linux/list_sort.h>
>  #include <linux/random.h>
>  #include <linux/stringify.h>
>  #include <linux/time64.h>
> @@ -4312,34 +4313,38 @@ static unsigned long thread__nr_events(struct thread_trace *ttrace)
>         return ttrace ? ttrace->nr_events : 0;
>  }
>
> -DEFINE_RESORT_RB(threads,
> -               (thread__nr_events(thread__priv(a->thread)) <
> -                thread__nr_events(thread__priv(b->thread))),
> -       struct thread *thread;
> -)
> +static int trace_nr_events_cmp(void *priv __maybe_unused,
> +                              const struct list_head *la,
> +                              const struct list_head *lb)
>  {
> -       entry->thread = rb_entry(nd, struct thread_rb_node, rb_node)->thread;
> +       struct thread_list *a = list_entry(la, struct thread_list, list);
> +       struct thread_list *b = list_entry(lb, struct thread_list, list);
> +       unsigned long a_nr_events = thread__nr_events(thread__priv(a->thread));
> +       unsigned long b_nr_events = thread__nr_events(thread__priv(b->thread));
> +
> +       if (a_nr_events != b_nr_events)
> +               return a_nr_events < b_nr_events ? -1 : 1;
> +
> +       /* Identical number of threads, place smaller tids first. */
> +       return thread__tid(a->thread) < thread__tid(b->thread)
> +               ? -1
> +               : (thread__tid(a->thread) > thread__tid(b->thread) ? 1 : 0);

I'm not sure if it can have a case where two different threads in the
hash table can have the same tid.  If not, it can simplify the last case.


>  }
>
>  static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
>  {
>         size_t printed = trace__fprintf_threads_header(fp);
> -       struct rb_node *nd;
> -       int i;
> -
> -       for (i = 0; i < THREADS__TABLE_SIZE; i++) {
> -               DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host, i);
> +       LIST_HEAD(threads);
>
> -               if (threads == NULL) {
> -                       fprintf(fp, "%s", "Error sorting output by nr_events!\n");
> -                       return 0;
> -               }
> +       if (machine__thread_list(trace->host, &threads) == 0) {
> +               struct thread_list *pos;
>
> -               resort_rb__for_each_entry(nd, threads)
> -                       printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
> +               list_sort(NULL, &threads, trace_nr_events_cmp);

Same concern, it'd be nice if we can use an array instead.

Thanks,
Namhyung


>
> -               resort_rb__delete(threads);
> +               list_for_each_entry(pos, &threads, list)
> +                       printed += trace__fprintf_thread(fp, pos->thread, trace);
>         }
> +       thread_list__delete(&threads);
>         return printed;
>  }
>
> diff --git a/tools/perf/util/rb_resort.h b/tools/perf/util/rb_resort.h
> index 376e86cb4c3c..d927a0d25052 100644
> --- a/tools/perf/util/rb_resort.h
> +++ b/tools/perf/util/rb_resort.h
> @@ -143,9 +143,4 @@ struct __name##_sorted *__name = __name##_sorted__new
>         DECLARE_RESORT_RB(__name)(&__ilist->rblist.entries.rb_root,             \
>                                   __ilist->rblist.nr_entries)
>
> -/* For 'struct machine->threads' */
> -#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine, hash_bucket)    \
> - DECLARE_RESORT_RB(__name)(&__machine->threads[hash_bucket].entries.rb_root, \
> -                          __machine->threads[hash_bucket].nr)
> -
>  #endif /* _PERF_RESORT_RB_H_ */
> --
> 2.43.0.687.g38aa6559b0-goog
>
  

Patch

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 109b8e64fe69..90eaff8c0f6e 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -74,6 +74,7 @@ 
 #include <linux/err.h>
 #include <linux/filter.h>
 #include <linux/kernel.h>
+#include <linux/list_sort.h>
 #include <linux/random.h>
 #include <linux/stringify.h>
 #include <linux/time64.h>
@@ -4312,34 +4313,38 @@  static unsigned long thread__nr_events(struct thread_trace *ttrace)
 	return ttrace ? ttrace->nr_events : 0;
 }
 
-DEFINE_RESORT_RB(threads,
-		(thread__nr_events(thread__priv(a->thread)) <
-		 thread__nr_events(thread__priv(b->thread))),
-	struct thread *thread;
-)
+static int trace_nr_events_cmp(void *priv __maybe_unused,
+			       const struct list_head *la,
+			       const struct list_head *lb)
 {
-	entry->thread = rb_entry(nd, struct thread_rb_node, rb_node)->thread;
+	struct thread_list *a = list_entry(la, struct thread_list, list);
+	struct thread_list *b = list_entry(lb, struct thread_list, list);
+	unsigned long a_nr_events = thread__nr_events(thread__priv(a->thread));
+	unsigned long b_nr_events = thread__nr_events(thread__priv(b->thread));
+
+	if (a_nr_events != b_nr_events)
+		return a_nr_events < b_nr_events ? -1 : 1;
+
+	/* Identical number of threads, place smaller tids first. */
+	return thread__tid(a->thread) < thread__tid(b->thread)
+		? -1
+		: (thread__tid(a->thread) > thread__tid(b->thread) ? 1 : 0);
 }
 
 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
 {
 	size_t printed = trace__fprintf_threads_header(fp);
-	struct rb_node *nd;
-	int i;
-
-	for (i = 0; i < THREADS__TABLE_SIZE; i++) {
-		DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host, i);
+	LIST_HEAD(threads);
 
-		if (threads == NULL) {
-			fprintf(fp, "%s", "Error sorting output by nr_events!\n");
-			return 0;
-		}
+	if (machine__thread_list(trace->host, &threads) == 0) {
+		struct thread_list *pos;
 
-		resort_rb__for_each_entry(nd, threads)
-			printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
+		list_sort(NULL, &threads, trace_nr_events_cmp);
 
-		resort_rb__delete(threads);
+		list_for_each_entry(pos, &threads, list)
+			printed += trace__fprintf_thread(fp, pos->thread, trace);
 	}
+	thread_list__delete(&threads);
 	return printed;
 }
 
diff --git a/tools/perf/util/rb_resort.h b/tools/perf/util/rb_resort.h
index 376e86cb4c3c..d927a0d25052 100644
--- a/tools/perf/util/rb_resort.h
+++ b/tools/perf/util/rb_resort.h
@@ -143,9 +143,4 @@  struct __name##_sorted *__name = __name##_sorted__new
 	DECLARE_RESORT_RB(__name)(&__ilist->rblist.entries.rb_root,		\
 				  __ilist->rblist.nr_entries)
 
-/* For 'struct machine->threads' */
-#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine, hash_bucket)    \
- DECLARE_RESORT_RB(__name)(&__machine->threads[hash_bucket].entries.rb_root, \
-			   __machine->threads[hash_bucket].nr)
-
 #endif /* _PERF_RESORT_RB_H_ */