[v1,2/6] tools lib api: Add io_dir an allocation free readdir alternative

Message ID 20231207050433.1426834-3-irogers@google.com
State New
Headers
Series Add io_dir to avoid memory overhead from opendir |

Commit Message

Ian Rogers Dec. 7, 2023, 5:04 a.m. UTC
  glibc's opendir allocates a minimum of 32kb, when called recursively
for a directory tree the memory consumption can add up - nearly 300kb
during perf start-up when processing modules. Add a stack allocated
variant of readdir sized a little more than 1kb.

As getdents64 may be missing from libc, add support using syscall.

Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/lib/api/Makefile     |  2 +-
 tools/lib/api/io_dir.h     | 84 ++++++++++++++++++++++++++++++++++++++
 tools/perf/Makefile.config |  4 ++
 3 files changed, 89 insertions(+), 1 deletion(-)
 create mode 100644 tools/lib/api/io_dir.h
  

Comments

Namhyung Kim Dec. 11, 2023, 11:24 p.m. UTC | #1
On Wed, Dec 6, 2023 at 9:04 PM Ian Rogers <irogers@google.com> wrote:
>
> glibc's opendir allocates a minimum of 32kb, when called recursively
> for a directory tree the memory consumption can add up - nearly 300kb
> during perf start-up when processing modules. Add a stack allocated
> variant of readdir sized a little more than 1kb.
>
> As getdents64 may be missing from libc, add support using syscall.

Unfortunately my alpine build has:

In file included from util/machine.c:2:
/build/libapi/include/api/io_dir.h:17:23: error: conflicting types for
'getdents'; have 'ssize_t(int,  void *, size_t)' {aka 'long int(int,
void *, long unsigned int)'}
   17 | static inline ssize_t getdents64(int fd, void *dirp, size_t count)
      |                       ^~~~~~~~~~
/usr/include/dirent.h:52:5: note: previous declaration of 'getdents'
with type 'int(int,  struct dirent *, size_t)' {aka 'int(int,  struct
dirent *, long unsigned int)'}
   52 | int getdents(int, struct dirent *, size_t);
      |     ^~~~~~~~

Thanks,
Namhyung

>
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
>  tools/lib/api/Makefile     |  2 +-
>  tools/lib/api/io_dir.h     | 84 ++++++++++++++++++++++++++++++++++++++
>  tools/perf/Makefile.config |  4 ++
>  3 files changed, 89 insertions(+), 1 deletion(-)
>  create mode 100644 tools/lib/api/io_dir.h
>
> diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile
> index 044860ac1ed1..186aa407de8c 100644
> --- a/tools/lib/api/Makefile
> +++ b/tools/lib/api/Makefile
> @@ -99,7 +99,7 @@ install_lib: $(LIBFILE)
>                 $(call do_install_mkdir,$(libdir_SQ)); \
>                 cp -fpR $(LIBFILE) $(DESTDIR)$(libdir_SQ)
>
> -HDRS := cpu.h debug.h io.h
> +HDRS := cpu.h debug.h io.h io_dir.h
>  FD_HDRS := fd/array.h
>  FS_HDRS := fs/fs.h fs/tracing_path.h
>  INSTALL_HDRS_PFX := $(DESTDIR)$(prefix)/include/api
> diff --git a/tools/lib/api/io_dir.h b/tools/lib/api/io_dir.h
> new file mode 100644
> index 000000000000..9b702497e05c
> --- /dev/null
> +++ b/tools/lib/api/io_dir.h
> @@ -0,0 +1,84 @@
> +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
> +/*
> + * Lightweight directory reading library.
> + */
> +#ifndef __API_IO_DIR__
> +#define __API_IO_DIR__
> +
> +#include <dirent.h>
> +#include <fcntl.h>
> +#include <stdlib.h>
> +#include <unistd.h>
> +#include <sys/stat.h>
> +
> +#ifndef HAVE_GETDENTS64
> +#include <sys/syscall.h>
> +
> +static inline ssize_t getdents64(int fd, void *dirp, size_t count)
> +{
> +       return syscall(SYS_getdents64, fd, dirp, count);
> +}
> +#endif
> +
> +struct io_dirent64 {
> +       ino64_t        d_ino;    /* 64-bit inode number */
> +       off64_t        d_off;    /* 64-bit offset to next structure */
> +       unsigned short d_reclen; /* Size of this dirent */
> +       unsigned char  d_type;   /* File type */
> +       char           d_name[NAME_MAX + 1]; /* Filename (null-terminated) */
> +};
> +
> +struct io_dir {
> +       int dirfd;
> +       ssize_t available_bytes;
> +       struct io_dirent64 *next;
> +       struct io_dirent64 buff[4];
> +};
> +
> +static inline void io_dir__init(struct io_dir *iod, int dirfd)
> +{
> +       iod->dirfd = dirfd;
> +       iod->available_bytes = 0;
> +}
> +
> +static inline void io_dir__rewinddir(struct io_dir *iod)
> +{
> +       lseek(iod->dirfd, 0, SEEK_SET);
> +       iod->available_bytes = 0;
> +}
> +
> +static inline struct io_dirent64 *io_dir__readdir(struct io_dir *iod)
> +{
> +       struct io_dirent64 *entry;
> +
> +       if (iod->available_bytes <= 0) {
> +               ssize_t rc = getdents64(iod->dirfd, iod->buff, sizeof(iod->buff));
> +
> +               if (rc <= 0)
> +                       return NULL;
> +               iod->available_bytes = rc;
> +               iod->next = iod->buff;
> +       }
> +       entry = iod->next;
> +       iod->next = (struct io_dirent64 *)((char *)entry + entry->d_reclen);
> +       iod->available_bytes -= entry->d_reclen;
> +       return entry;
> +}
> +
> +static inline bool io_dir__is_dir(const struct io_dir *iod, struct io_dirent64 *dent)
> +{
> +       if (dent->d_type == DT_UNKNOWN) {
> +               struct stat st;
> +
> +               if (fstatat(iod->dirfd, dent->d_name, &st, /*flags=*/0))
> +                       return false;
> +
> +               if (S_ISDIR(st.st_mode)) {
> +                       dent->d_type = DT_DIR;
> +                       return true;
> +               }
> +       }
> +       return dent->d_type == DT_DIR;
> +}
> +
> +#endif
> diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
> index aa55850fbc21..1cef1ab4ddb7 100644
> --- a/tools/perf/Makefile.config
> +++ b/tools/perf/Makefile.config
> @@ -357,6 +357,10 @@ ifeq ($(feature-stackprotector-all), 1)
>    CORE_CFLAGS += -fstack-protector-all
>  endif
>
> +ifeq ($(feature-getdents64), 1)
> +  CFLAGS += -DHAVE_GETDENTS64
> +endif
> +
>  ifeq ($(DEBUG),0)
>    ifeq ($(feature-fortify-source), 1)
>      CORE_CFLAGS += -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2
> --
> 2.43.0.rc2.451.g8631bc7472-goog
>
  
Ian Rogers Dec. 11, 2023, 11:54 p.m. UTC | #2
On Mon, Dec 11, 2023 at 3:25 PM Namhyung Kim <namhyung@kernel.org> wrote:
>
> On Wed, Dec 6, 2023 at 9:04 PM Ian Rogers <irogers@google.com> wrote:
> >
> > glibc's opendir allocates a minimum of 32kb, when called recursively
> > for a directory tree the memory consumption can add up - nearly 300kb
> > during perf start-up when processing modules. Add a stack allocated
> > variant of readdir sized a little more than 1kb.
> >
> > As getdents64 may be missing from libc, add support using syscall.
>
> Unfortunately my alpine build has:
>
> In file included from util/machine.c:2:
> /build/libapi/include/api/io_dir.h:17:23: error: conflicting types for
> 'getdents'; have 'ssize_t(int,  void *, size_t)' {aka 'long int(int,
> void *, long unsigned int)'}
>    17 | static inline ssize_t getdents64(int fd, void *dirp, size_t count)
>       |                       ^~~~~~~~~~
> /usr/include/dirent.h:52:5: note: previous declaration of 'getdents'
> with type 'int(int,  struct dirent *, size_t)' {aka 'int(int,  struct
> dirent *, long unsigned int)'}
>    52 | int getdents(int, struct dirent *, size_t);
>       |     ^~~~~~~~

Presumably there is a #define getdents64 getdents .. Could we stop
caring about this version of Alpine linux?

Thanks,
Ian

> Thanks,
> Namhyung
>
> >
> > Signed-off-by: Ian Rogers <irogers@google.com>
> > ---
> >  tools/lib/api/Makefile     |  2 +-
> >  tools/lib/api/io_dir.h     | 84 ++++++++++++++++++++++++++++++++++++++
> >  tools/perf/Makefile.config |  4 ++
> >  3 files changed, 89 insertions(+), 1 deletion(-)
> >  create mode 100644 tools/lib/api/io_dir.h
> >
> > diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile
> > index 044860ac1ed1..186aa407de8c 100644
> > --- a/tools/lib/api/Makefile
> > +++ b/tools/lib/api/Makefile
> > @@ -99,7 +99,7 @@ install_lib: $(LIBFILE)
> >                 $(call do_install_mkdir,$(libdir_SQ)); \
> >                 cp -fpR $(LIBFILE) $(DESTDIR)$(libdir_SQ)
> >
> > -HDRS := cpu.h debug.h io.h
> > +HDRS := cpu.h debug.h io.h io_dir.h
> >  FD_HDRS := fd/array.h
> >  FS_HDRS := fs/fs.h fs/tracing_path.h
> >  INSTALL_HDRS_PFX := $(DESTDIR)$(prefix)/include/api
> > diff --git a/tools/lib/api/io_dir.h b/tools/lib/api/io_dir.h
> > new file mode 100644
> > index 000000000000..9b702497e05c
> > --- /dev/null
> > +++ b/tools/lib/api/io_dir.h
> > @@ -0,0 +1,84 @@
> > +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
> > +/*
> > + * Lightweight directory reading library.
> > + */
> > +#ifndef __API_IO_DIR__
> > +#define __API_IO_DIR__
> > +
> > +#include <dirent.h>
> > +#include <fcntl.h>
> > +#include <stdlib.h>
> > +#include <unistd.h>
> > +#include <sys/stat.h>
> > +
> > +#ifndef HAVE_GETDENTS64
> > +#include <sys/syscall.h>
> > +
> > +static inline ssize_t getdents64(int fd, void *dirp, size_t count)
> > +{
> > +       return syscall(SYS_getdents64, fd, dirp, count);
> > +}
> > +#endif
> > +
> > +struct io_dirent64 {
> > +       ino64_t        d_ino;    /* 64-bit inode number */
> > +       off64_t        d_off;    /* 64-bit offset to next structure */
> > +       unsigned short d_reclen; /* Size of this dirent */
> > +       unsigned char  d_type;   /* File type */
> > +       char           d_name[NAME_MAX + 1]; /* Filename (null-terminated) */
> > +};
> > +
> > +struct io_dir {
> > +       int dirfd;
> > +       ssize_t available_bytes;
> > +       struct io_dirent64 *next;
> > +       struct io_dirent64 buff[4];
> > +};
> > +
> > +static inline void io_dir__init(struct io_dir *iod, int dirfd)
> > +{
> > +       iod->dirfd = dirfd;
> > +       iod->available_bytes = 0;
> > +}
> > +
> > +static inline void io_dir__rewinddir(struct io_dir *iod)
> > +{
> > +       lseek(iod->dirfd, 0, SEEK_SET);
> > +       iod->available_bytes = 0;
> > +}
> > +
> > +static inline struct io_dirent64 *io_dir__readdir(struct io_dir *iod)
> > +{
> > +       struct io_dirent64 *entry;
> > +
> > +       if (iod->available_bytes <= 0) {
> > +               ssize_t rc = getdents64(iod->dirfd, iod->buff, sizeof(iod->buff));
> > +
> > +               if (rc <= 0)
> > +                       return NULL;
> > +               iod->available_bytes = rc;
> > +               iod->next = iod->buff;
> > +       }
> > +       entry = iod->next;
> > +       iod->next = (struct io_dirent64 *)((char *)entry + entry->d_reclen);
> > +       iod->available_bytes -= entry->d_reclen;
> > +       return entry;
> > +}
> > +
> > +static inline bool io_dir__is_dir(const struct io_dir *iod, struct io_dirent64 *dent)
> > +{
> > +       if (dent->d_type == DT_UNKNOWN) {
> > +               struct stat st;
> > +
> > +               if (fstatat(iod->dirfd, dent->d_name, &st, /*flags=*/0))
> > +                       return false;
> > +
> > +               if (S_ISDIR(st.st_mode)) {
> > +                       dent->d_type = DT_DIR;
> > +                       return true;
> > +               }
> > +       }
> > +       return dent->d_type == DT_DIR;
> > +}
> > +
> > +#endif
> > diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
> > index aa55850fbc21..1cef1ab4ddb7 100644
> > --- a/tools/perf/Makefile.config
> > +++ b/tools/perf/Makefile.config
> > @@ -357,6 +357,10 @@ ifeq ($(feature-stackprotector-all), 1)
> >    CORE_CFLAGS += -fstack-protector-all
> >  endif
> >
> > +ifeq ($(feature-getdents64), 1)
> > +  CFLAGS += -DHAVE_GETDENTS64
> > +endif
> > +
> >  ifeq ($(DEBUG),0)
> >    ifeq ($(feature-fortify-source), 1)
> >      CORE_CFLAGS += -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2
> > --
> > 2.43.0.rc2.451.g8631bc7472-goog
> >
  
Namhyung Kim Dec. 13, 2023, 1:33 a.m. UTC | #3
On Mon, Dec 11, 2023 at 3:54 PM Ian Rogers <irogers@google.com> wrote:
>
> On Mon, Dec 11, 2023 at 3:25 PM Namhyung Kim <namhyung@kernel.org> wrote:
> >
> > On Wed, Dec 6, 2023 at 9:04 PM Ian Rogers <irogers@google.com> wrote:
> > >
> > > glibc's opendir allocates a minimum of 32kb, when called recursively
> > > for a directory tree the memory consumption can add up - nearly 300kb
> > > during perf start-up when processing modules. Add a stack allocated
> > > variant of readdir sized a little more than 1kb.
> > >
> > > As getdents64 may be missing from libc, add support using syscall.
> >
> > Unfortunately my alpine build has:
> >
> > In file included from util/machine.c:2:
> > /build/libapi/include/api/io_dir.h:17:23: error: conflicting types for
> > 'getdents'; have 'ssize_t(int,  void *, size_t)' {aka 'long int(int,
> > void *, long unsigned int)'}
> >    17 | static inline ssize_t getdents64(int fd, void *dirp, size_t count)
> >       |                       ^~~~~~~~~~
> > /usr/include/dirent.h:52:5: note: previous declaration of 'getdents'
> > with type 'int(int,  struct dirent *, size_t)' {aka 'int(int,  struct
> > dirent *, long unsigned int)'}
> >    52 | int getdents(int, struct dirent *, size_t);
> >       |     ^~~~~~~~
>
> Presumably there is a #define getdents64 getdents .. Could we stop
> caring about this version of Alpine linux?

Right, there's a #define:

https://git.musl-libc.org/cgit/musl/tree/include/dirent.h#n68

But I'm not sure ignoring Alpine linux is a good idea.
Maybe we can add a #undef right before?

Thanks,
Namhyung
  

Patch

diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile
index 044860ac1ed1..186aa407de8c 100644
--- a/tools/lib/api/Makefile
+++ b/tools/lib/api/Makefile
@@ -99,7 +99,7 @@  install_lib: $(LIBFILE)
 		$(call do_install_mkdir,$(libdir_SQ)); \
 		cp -fpR $(LIBFILE) $(DESTDIR)$(libdir_SQ)
 
-HDRS := cpu.h debug.h io.h
+HDRS := cpu.h debug.h io.h io_dir.h
 FD_HDRS := fd/array.h
 FS_HDRS := fs/fs.h fs/tracing_path.h
 INSTALL_HDRS_PFX := $(DESTDIR)$(prefix)/include/api
diff --git a/tools/lib/api/io_dir.h b/tools/lib/api/io_dir.h
new file mode 100644
index 000000000000..9b702497e05c
--- /dev/null
+++ b/tools/lib/api/io_dir.h
@@ -0,0 +1,84 @@ 
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+/*
+ * Lightweight directory reading library.
+ */
+#ifndef __API_IO_DIR__
+#define __API_IO_DIR__
+
+#include <dirent.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/stat.h>
+
+#ifndef HAVE_GETDENTS64
+#include <sys/syscall.h>
+
+static inline ssize_t getdents64(int fd, void *dirp, size_t count)
+{
+	return syscall(SYS_getdents64, fd, dirp, count);
+}
+#endif
+
+struct io_dirent64 {
+	ino64_t        d_ino;    /* 64-bit inode number */
+	off64_t        d_off;    /* 64-bit offset to next structure */
+	unsigned short d_reclen; /* Size of this dirent */
+	unsigned char  d_type;   /* File type */
+	char           d_name[NAME_MAX + 1]; /* Filename (null-terminated) */
+};
+
+struct io_dir {
+	int dirfd;
+	ssize_t available_bytes;
+	struct io_dirent64 *next;
+	struct io_dirent64 buff[4];
+};
+
+static inline void io_dir__init(struct io_dir *iod, int dirfd)
+{
+	iod->dirfd = dirfd;
+	iod->available_bytes = 0;
+}
+
+static inline void io_dir__rewinddir(struct io_dir *iod)
+{
+	lseek(iod->dirfd, 0, SEEK_SET);
+	iod->available_bytes = 0;
+}
+
+static inline struct io_dirent64 *io_dir__readdir(struct io_dir *iod)
+{
+	struct io_dirent64 *entry;
+
+	if (iod->available_bytes <= 0) {
+		ssize_t rc = getdents64(iod->dirfd, iod->buff, sizeof(iod->buff));
+
+		if (rc <= 0)
+			return NULL;
+		iod->available_bytes = rc;
+		iod->next = iod->buff;
+	}
+	entry = iod->next;
+	iod->next = (struct io_dirent64 *)((char *)entry + entry->d_reclen);
+	iod->available_bytes -= entry->d_reclen;
+	return entry;
+}
+
+static inline bool io_dir__is_dir(const struct io_dir *iod, struct io_dirent64 *dent)
+{
+	if (dent->d_type == DT_UNKNOWN) {
+		struct stat st;
+
+		if (fstatat(iod->dirfd, dent->d_name, &st, /*flags=*/0))
+			return false;
+
+		if (S_ISDIR(st.st_mode)) {
+			dent->d_type = DT_DIR;
+			return true;
+		}
+	}
+	return dent->d_type == DT_DIR;
+}
+
+#endif
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index aa55850fbc21..1cef1ab4ddb7 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -357,6 +357,10 @@  ifeq ($(feature-stackprotector-all), 1)
   CORE_CFLAGS += -fstack-protector-all
 endif
 
+ifeq ($(feature-getdents64), 1)
+  CFLAGS += -DHAVE_GETDENTS64
+endif
+
 ifeq ($(DEBUG),0)
   ifeq ($(feature-fortify-source), 1)
     CORE_CFLAGS += -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2