[v5,08/50] perf record: Be lazier in allocating lost samples buffer

Message ID 20231127220902.1315692-9-irogers@google.com
State New
Headers
Series Improvements to memory use |

Commit Message

Ian Rogers Nov. 27, 2023, 10:08 p.m. UTC
  Wait until a lost sample occurs to allocate the lost samples buffer,
often the buffer isn't necessary. This saves a 64kb allocation and
5.3kb of peak memory consumption.

Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/perf/builtin-record.c | 29 +++++++++++++++++++----------
 1 file changed, 19 insertions(+), 10 deletions(-)
  

Comments

Namhyung Kim Nov. 30, 2023, 2:09 a.m. UTC | #1
On Mon, Nov 27, 2023 at 2:09 PM Ian Rogers <irogers@google.com> wrote:
>
> Wait until a lost sample occurs to allocate the lost samples buffer,
> often the buffer isn't necessary. This saves a 64kb allocation and
> 5.3kb of peak memory consumption.
>
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
>  tools/perf/builtin-record.c | 29 +++++++++++++++++++----------
>  1 file changed, 19 insertions(+), 10 deletions(-)
>
> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> index 9b4f3805ca92..b6c8c1371b39 100644
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -1924,21 +1924,13 @@ static void __record__save_lost_samples(struct record *rec, struct evsel *evsel,
>  static void record__read_lost_samples(struct record *rec)
>  {
>         struct perf_session *session = rec->session;
> -       struct perf_record_lost_samples *lost;
> +       struct perf_record_lost_samples *lost = NULL;
>         struct evsel *evsel;
>
>         /* there was an error during record__open */
>         if (session->evlist == NULL)
>                 return;
>
> -       lost = zalloc(PERF_SAMPLE_MAX_SIZE);

To minimize the allocation size, this can be
sizeof(*lost) + session->machines.host.id_hdr_size
instead of PERF_SAMPLE_MAX_SIZE.

Thanks,
Namhyung


> -       if (lost == NULL) {
> -               pr_debug("Memory allocation failed\n");
> -               return;
> -       }
> -
> -       lost->header.type = PERF_RECORD_LOST_SAMPLES;
> -
>         evlist__for_each_entry(session->evlist, evsel) {
>                 struct xyarray *xy = evsel->core.sample_id;
>                 u64 lost_count;
> @@ -1961,6 +1953,14 @@ static void record__read_lost_samples(struct record *rec)
>                                 }
>
>                                 if (count.lost) {
> +                                       if (!lost) {
> +                                               lost = zalloc(PERF_SAMPLE_MAX_SIZE);
> +                                               if (!lost) {
> +                                                       pr_debug("Memory allocation failed\n");
> +                                                       return;
> +                                               }
> +                                               lost->header.type = PERF_RECORD_LOST_SAMPLES;
> +                                       }
>                                         __record__save_lost_samples(rec, evsel, lost,
>                                                                     x, y, count.lost, 0);
>                                 }
> @@ -1968,9 +1968,18 @@ static void record__read_lost_samples(struct record *rec)
>                 }
>
>                 lost_count = perf_bpf_filter__lost_count(evsel);
> -               if (lost_count)
> +               if (lost_count) {
> +                       if (!lost) {
> +                               lost = zalloc(PERF_SAMPLE_MAX_SIZE);
> +                               if (!lost) {
> +                                       pr_debug("Memory allocation failed\n");
> +                                       return;
> +                               }
> +                               lost->header.type = PERF_RECORD_LOST_SAMPLES;
> +                       }
>                         __record__save_lost_samples(rec, evsel, lost, 0, 0, lost_count,
>                                                     PERF_RECORD_MISC_LOST_SAMPLES_BPF);
> +               }
>         }
>  out:
>         free(lost);
> --
> 2.43.0.rc1.413.gea7ed67945-goog
>
  
Ian Rogers Nov. 30, 2023, 6:29 p.m. UTC | #2
On Wed, Nov 29, 2023 at 6:09 PM Namhyung Kim <namhyung@kernel.org> wrote:
>
> On Mon, Nov 27, 2023 at 2:09 PM Ian Rogers <irogers@google.com> wrote:
> >
> > Wait until a lost sample occurs to allocate the lost samples buffer,
> > often the buffer isn't necessary. This saves a 64kb allocation and
> > 5.3kb of peak memory consumption.
> >
> > Signed-off-by: Ian Rogers <irogers@google.com>
> > ---
> >  tools/perf/builtin-record.c | 29 +++++++++++++++++++----------
> >  1 file changed, 19 insertions(+), 10 deletions(-)
> >
> > diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> > index 9b4f3805ca92..b6c8c1371b39 100644
> > --- a/tools/perf/builtin-record.c
> > +++ b/tools/perf/builtin-record.c
> > @@ -1924,21 +1924,13 @@ static void __record__save_lost_samples(struct record *rec, struct evsel *evsel,
> >  static void record__read_lost_samples(struct record *rec)
> >  {
> >         struct perf_session *session = rec->session;
> > -       struct perf_record_lost_samples *lost;
> > +       struct perf_record_lost_samples *lost = NULL;
> >         struct evsel *evsel;
> >
> >         /* there was an error during record__open */
> >         if (session->evlist == NULL)
> >                 return;
> >
> > -       lost = zalloc(PERF_SAMPLE_MAX_SIZE);
>
> To minimize the allocation size, this can be
> sizeof(*lost) + session->machines.host.id_hdr_size
> instead of PERF_SAMPLE_MAX_SIZE.

Sounds good, should probably be a follow up. The current size is
PERF_SAMPLE_MAX_SIZE.

Thanks,
Ian

> Thanks,
> Namhyung
>
>
> > -       if (lost == NULL) {
> > -               pr_debug("Memory allocation failed\n");
> > -               return;
> > -       }
> > -
> > -       lost->header.type = PERF_RECORD_LOST_SAMPLES;
> > -
> >         evlist__for_each_entry(session->evlist, evsel) {
> >                 struct xyarray *xy = evsel->core.sample_id;
> >                 u64 lost_count;
> > @@ -1961,6 +1953,14 @@ static void record__read_lost_samples(struct record *rec)
> >                                 }
> >
> >                                 if (count.lost) {
> > +                                       if (!lost) {
> > +                                               lost = zalloc(PERF_SAMPLE_MAX_SIZE);
> > +                                               if (!lost) {
> > +                                                       pr_debug("Memory allocation failed\n");
> > +                                                       return;
> > +                                               }
> > +                                               lost->header.type = PERF_RECORD_LOST_SAMPLES;
> > +                                       }
> >                                         __record__save_lost_samples(rec, evsel, lost,
> >                                                                     x, y, count.lost, 0);
> >                                 }
> > @@ -1968,9 +1968,18 @@ static void record__read_lost_samples(struct record *rec)
> >                 }
> >
> >                 lost_count = perf_bpf_filter__lost_count(evsel);
> > -               if (lost_count)
> > +               if (lost_count) {
> > +                       if (!lost) {
> > +                               lost = zalloc(PERF_SAMPLE_MAX_SIZE);
> > +                               if (!lost) {
> > +                                       pr_debug("Memory allocation failed\n");
> > +                                       return;
> > +                               }
> > +                               lost->header.type = PERF_RECORD_LOST_SAMPLES;
> > +                       }
> >                         __record__save_lost_samples(rec, evsel, lost, 0, 0, lost_count,
> >                                                     PERF_RECORD_MISC_LOST_SAMPLES_BPF);
> > +               }
> >         }
> >  out:
> >         free(lost);
> > --
> > 2.43.0.rc1.413.gea7ed67945-goog
> >
  
Namhyung Kim Dec. 2, 2023, 11:56 p.m. UTC | #3
On Thu, Nov 30, 2023 at 10:29 AM Ian Rogers <irogers@google.com> wrote:
>
> On Wed, Nov 29, 2023 at 6:09 PM Namhyung Kim <namhyung@kernel.org> wrote:
> >
> > On Mon, Nov 27, 2023 at 2:09 PM Ian Rogers <irogers@google.com> wrote:
> > >
> > > Wait until a lost sample occurs to allocate the lost samples buffer,
> > > often the buffer isn't necessary. This saves a 64kb allocation and
> > > 5.3kb of peak memory consumption.
> > >
> > > Signed-off-by: Ian Rogers <irogers@google.com>
> > > ---
> > >  tools/perf/builtin-record.c | 29 +++++++++++++++++++----------
> > >  1 file changed, 19 insertions(+), 10 deletions(-)
> > >
> > > diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> > > index 9b4f3805ca92..b6c8c1371b39 100644
> > > --- a/tools/perf/builtin-record.c
> > > +++ b/tools/perf/builtin-record.c
> > > @@ -1924,21 +1924,13 @@ static void __record__save_lost_samples(struct record *rec, struct evsel *evsel,
> > >  static void record__read_lost_samples(struct record *rec)
> > >  {
> > >         struct perf_session *session = rec->session;
> > > -       struct perf_record_lost_samples *lost;
> > > +       struct perf_record_lost_samples *lost = NULL;
> > >         struct evsel *evsel;
> > >
> > >         /* there was an error during record__open */
> > >         if (session->evlist == NULL)
> > >                 return;
> > >
> > > -       lost = zalloc(PERF_SAMPLE_MAX_SIZE);
> >
> > To minimize the allocation size, this can be
> > sizeof(*lost) + session->machines.host.id_hdr_size
> > instead of PERF_SAMPLE_MAX_SIZE.
>
> Sounds good, should probably be a follow up. The current size is
> PERF_SAMPLE_MAX_SIZE.

Yep, I'm ok with having it as a follow-up.

Acked-by: Namhyung Kim <namhyung@kernel.org>

Thanks,
Namhyung
  
Arnaldo Carvalho de Melo Dec. 5, 2023, 3:54 p.m. UTC | #4
Em Sat, Dec 02, 2023 at 03:56:48PM -0800, Namhyung Kim escreveu:
> On Thu, Nov 30, 2023 at 10:29 AM Ian Rogers <irogers@google.com> wrote:
> > On Wed, Nov 29, 2023 at 6:09 PM Namhyung Kim <namhyung@kernel.org> wrote:
> > > To minimize the allocation size, this can be
> > > sizeof(*lost) + session->machines.host.id_hdr_size
> > > instead of PERF_SAMPLE_MAX_SIZE.
> >
> > Sounds good, should probably be a follow up. The current size is
> > PERF_SAMPLE_MAX_SIZE.
> 
> Yep, I'm ok with having it as a follow-up.
> 
> Acked-by: Namhyung Kim <namhyung@kernel.org>

Thanks, applied to perf-tools-next.

- Arnaldo
  

Patch

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 9b4f3805ca92..b6c8c1371b39 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1924,21 +1924,13 @@  static void __record__save_lost_samples(struct record *rec, struct evsel *evsel,
 static void record__read_lost_samples(struct record *rec)
 {
 	struct perf_session *session = rec->session;
-	struct perf_record_lost_samples *lost;
+	struct perf_record_lost_samples *lost = NULL;
 	struct evsel *evsel;
 
 	/* there was an error during record__open */
 	if (session->evlist == NULL)
 		return;
 
-	lost = zalloc(PERF_SAMPLE_MAX_SIZE);
-	if (lost == NULL) {
-		pr_debug("Memory allocation failed\n");
-		return;
-	}
-
-	lost->header.type = PERF_RECORD_LOST_SAMPLES;
-
 	evlist__for_each_entry(session->evlist, evsel) {
 		struct xyarray *xy = evsel->core.sample_id;
 		u64 lost_count;
@@ -1961,6 +1953,14 @@  static void record__read_lost_samples(struct record *rec)
 				}
 
 				if (count.lost) {
+					if (!lost) {
+						lost = zalloc(PERF_SAMPLE_MAX_SIZE);
+						if (!lost) {
+							pr_debug("Memory allocation failed\n");
+							return;
+						}
+						lost->header.type = PERF_RECORD_LOST_SAMPLES;
+					}
 					__record__save_lost_samples(rec, evsel, lost,
 								    x, y, count.lost, 0);
 				}
@@ -1968,9 +1968,18 @@  static void record__read_lost_samples(struct record *rec)
 		}
 
 		lost_count = perf_bpf_filter__lost_count(evsel);
-		if (lost_count)
+		if (lost_count) {
+			if (!lost) {
+				lost = zalloc(PERF_SAMPLE_MAX_SIZE);
+				if (!lost) {
+					pr_debug("Memory allocation failed\n");
+					return;
+				}
+				lost->header.type = PERF_RECORD_LOST_SAMPLES;
+			}
 			__record__save_lost_samples(rec, evsel, lost, 0, 0, lost_count,
 						    PERF_RECORD_MISC_LOST_SAMPLES_BPF);
+		}
 	}
 out:
 	free(lost);