[v4,06/53] tools api fs: Switch filename__read_str to use io.h

Message ID 20231102175735.2272696-7-irogers@google.com
State New
Headers
Series Improvements to memory use |

Commit Message

Ian Rogers Nov. 2, 2023, 5:56 p.m. UTC
  filename__read_str has its own string reading code that allocates
memory before reading into it. The memory allocated is sized at BUFSIZ
that is 8kb. Most strings are short and so most of this 8kb is
wasted.

Refactor io__getline so that the newline character can be configurable
and ignored in the case of filename__read_str.

Code like build_caches_for_cpu in perf's header.c will read many
strings and hold them in a data structure, in this case multiple
strings per cache level per CPU. Using io.h's io__getline avoids the
wasted memory as strings are temporarily read into a buffer on the
stack before being copied to a buffer that grows 128 bytes at a time
and is never sized larger than the string.

For a 16 hyperthread system the memory consumption of "perf record
true" is reduced by 180kb, primarily through saving memory when
reading the cache information.

Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/lib/api/fs/fs.c | 56 +++++++++++--------------------------------
 tools/lib/api/io.h    |  9 +++++--
 2 files changed, 21 insertions(+), 44 deletions(-)
  

Comments

Namhyung Kim Nov. 6, 2023, 3:53 a.m. UTC | #1
On Thu, Nov 2, 2023 at 10:58 AM Ian Rogers <irogers@google.com> wrote:
>
> filename__read_str has its own string reading code that allocates
> memory before reading into it. The memory allocated is sized at BUFSIZ
> that is 8kb. Most strings are short and so most of this 8kb is
> wasted.
>
> Refactor io__getline so that the newline character can be configurable
> and ignored in the case of filename__read_str.
>
> Code like build_caches_for_cpu in perf's header.c will read many
> strings and hold them in a data structure, in this case multiple
> strings per cache level per CPU. Using io.h's io__getline avoids the
> wasted memory as strings are temporarily read into a buffer on the
> stack before being copied to a buffer that grows 128 bytes at a time
> and is never sized larger than the string.
>
> For a 16 hyperthread system the memory consumption of "perf record
> true" is reduced by 180kb, primarily through saving memory when
> reading the cache information.
>
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---

[SNIP]
> diff --git a/tools/lib/api/io.h b/tools/lib/api/io.h
> index a77b74c5fb65..50d33e14fb56 100644
> --- a/tools/lib/api/io.h
> +++ b/tools/lib/api/io.h
> @@ -141,7 +141,7 @@ static inline int io__get_dec(struct io *io, __u64 *dec)
>  }
>
>  /* Read up to and including the first newline following the pattern of getline. */

You may want to update the comment as well.

> -static inline ssize_t io__getline(struct io *io, char **line_out, size_t *line_len_out)
> +static inline ssize_t io__getline_nl(struct io *io, char **line_out, size_t *line_len_out, int nl)

How about io__getdelim() similar to POSIX?

Thanks,
Namhyung


>  {
>         char buf[128];
>         int buf_pos = 0;
> @@ -151,7 +151,7 @@ static inline ssize_t io__getline(struct io *io, char **line_out, size_t *line_l
>
>         /* TODO: reuse previously allocated memory. */
>         free(*line_out);
> -       while (ch != '\n') {
> +       while (ch != nl) {
>                 ch = io__get_char(io);
>
>                 if (ch < 0)
> @@ -184,4 +184,9 @@ static inline ssize_t io__getline(struct io *io, char **line_out, size_t *line_l
>         return -ENOMEM;
>  }
>
> +static inline ssize_t io__getline(struct io *io, char **line_out, size_t *line_len_out)
> +{
> +       return io__getline_nl(io, line_out, line_len_out, /*nl=*/'\n');
> +}
> +
>  #endif /* __API_IO__ */
> --
> 2.42.0.869.gea05f2083d-goog
>
  
Ian Rogers Nov. 27, 2023, 8:26 p.m. UTC | #2
On Sun, Nov 5, 2023 at 7:53 PM Namhyung Kim <namhyung@kernel.org> wrote:
>
> On Thu, Nov 2, 2023 at 10:58 AM Ian Rogers <irogers@google.com> wrote:
> >
> > filename__read_str has its own string reading code that allocates
> > memory before reading into it. The memory allocated is sized at BUFSIZ
> > that is 8kb. Most strings are short and so most of this 8kb is
> > wasted.
> >
> > Refactor io__getline so that the newline character can be configurable
> > and ignored in the case of filename__read_str.
> >
> > Code like build_caches_for_cpu in perf's header.c will read many
> > strings and hold them in a data structure, in this case multiple
> > strings per cache level per CPU. Using io.h's io__getline avoids the
> > wasted memory as strings are temporarily read into a buffer on the
> > stack before being copied to a buffer that grows 128 bytes at a time
> > and is never sized larger than the string.
> >
> > For a 16 hyperthread system the memory consumption of "perf record
> > true" is reduced by 180kb, primarily through saving memory when
> > reading the cache information.
> >
> > Signed-off-by: Ian Rogers <irogers@google.com>
> > ---
>
> [SNIP]
> > diff --git a/tools/lib/api/io.h b/tools/lib/api/io.h
> > index a77b74c5fb65..50d33e14fb56 100644
> > --- a/tools/lib/api/io.h
> > +++ b/tools/lib/api/io.h
> > @@ -141,7 +141,7 @@ static inline int io__get_dec(struct io *io, __u64 *dec)
> >  }
> >
> >  /* Read up to and including the first newline following the pattern of getline. */
>
> You may want to update the comment as well.
>
> > -static inline ssize_t io__getline(struct io *io, char **line_out, size_t *line_len_out)
> > +static inline ssize_t io__getline_nl(struct io *io, char **line_out, size_t *line_len_out, int nl)
>
> How about io__getdelim() similar to POSIX?

Thanks done for v5.

Ian

> Thanks,
> Namhyung
>
>
> >  {
> >         char buf[128];
> >         int buf_pos = 0;
> > @@ -151,7 +151,7 @@ static inline ssize_t io__getline(struct io *io, char **line_out, size_t *line_l
> >
> >         /* TODO: reuse previously allocated memory. */
> >         free(*line_out);
> > -       while (ch != '\n') {
> > +       while (ch != nl) {
> >                 ch = io__get_char(io);
> >
> >                 if (ch < 0)
> > @@ -184,4 +184,9 @@ static inline ssize_t io__getline(struct io *io, char **line_out, size_t *line_l
> >         return -ENOMEM;
> >  }
> >
> > +static inline ssize_t io__getline(struct io *io, char **line_out, size_t *line_len_out)
> > +{
> > +       return io__getline_nl(io, line_out, line_len_out, /*nl=*/'\n');
> > +}
> > +
> >  #endif /* __API_IO__ */
> > --
> > 2.42.0.869.gea05f2083d-goog
> >
>
  

Patch

diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c
index 5cb0eeec2c8a..496812b5f1d2 100644
--- a/tools/lib/api/fs/fs.c
+++ b/tools/lib/api/fs/fs.c
@@ -16,6 +16,7 @@ 
 #include <sys/mount.h>
 
 #include "fs.h"
+#include "../io.h"
 #include "debug-internal.h"
 
 #define _STR(x) #x
@@ -344,53 +345,24 @@  int filename__read_ull(const char *filename, unsigned long long *value)
 	return filename__read_ull_base(filename, value, 0);
 }
 
-#define STRERR_BUFSIZE  128     /* For the buffer size of strerror_r */
-
 int filename__read_str(const char *filename, char **buf, size_t *sizep)
 {
-	size_t size = 0, alloc_size = 0;
-	void *bf = NULL, *nbf;
-	int fd, n, err = 0;
-	char sbuf[STRERR_BUFSIZE];
+	struct io io;
+	char bf[128];
+	int err;
 
-	fd = open(filename, O_RDONLY);
-	if (fd < 0)
+	io.fd = open(filename, O_RDONLY);
+	if (io.fd < 0)
 		return -errno;
-
-	do {
-		if (size == alloc_size) {
-			alloc_size += BUFSIZ;
-			nbf = realloc(bf, alloc_size);
-			if (!nbf) {
-				err = -ENOMEM;
-				break;
-			}
-
-			bf = nbf;
-		}
-
-		n = read(fd, bf + size, alloc_size - size);
-		if (n < 0) {
-			if (size) {
-				pr_warn("read failed %d: %s\n", errno,
-					strerror_r(errno, sbuf, sizeof(sbuf)));
-				err = 0;
-			} else
-				err = -errno;
-
-			break;
-		}
-
-		size += n;
-	} while (n > 0);
-
-	if (!err) {
-		*sizep = size;
-		*buf   = bf;
+	io__init(&io, io.fd, bf, sizeof(bf));
+	*buf = NULL;
+	err = io__getline_nl(&io, buf, sizep, /*nl=*/-1);
+	if (err < 0) {
+		free(*buf);
+		*buf = NULL;
 	} else
-		free(bf);
-
-	close(fd);
+		err = 0;
+	close(io.fd);
 	return err;
 }
 
diff --git a/tools/lib/api/io.h b/tools/lib/api/io.h
index a77b74c5fb65..50d33e14fb56 100644
--- a/tools/lib/api/io.h
+++ b/tools/lib/api/io.h
@@ -141,7 +141,7 @@  static inline int io__get_dec(struct io *io, __u64 *dec)
 }
 
 /* Read up to and including the first newline following the pattern of getline. */
-static inline ssize_t io__getline(struct io *io, char **line_out, size_t *line_len_out)
+static inline ssize_t io__getline_nl(struct io *io, char **line_out, size_t *line_len_out, int nl)
 {
 	char buf[128];
 	int buf_pos = 0;
@@ -151,7 +151,7 @@  static inline ssize_t io__getline(struct io *io, char **line_out, size_t *line_l
 
 	/* TODO: reuse previously allocated memory. */
 	free(*line_out);
-	while (ch != '\n') {
+	while (ch != nl) {
 		ch = io__get_char(io);
 
 		if (ch < 0)
@@ -184,4 +184,9 @@  static inline ssize_t io__getline(struct io *io, char **line_out, size_t *line_l
 	return -ENOMEM;
 }
 
+static inline ssize_t io__getline(struct io *io, char **line_out, size_t *line_len_out)
+{
+	return io__getline_nl(io, line_out, line_len_out, /*nl=*/'\n');
+}
+
 #endif /* __API_IO__ */