libstdc++: use copy_file_range, improve sendfile in filesystem::copy_file

Message ID CAFqe=zJtMq0f00sm_Hasn9pVZPGWD12hN99FHnGM0BKCgi+DYA@mail.gmail.com
State Unresolved
Headers
Series libstdc++: use copy_file_range, improve sendfile in filesystem::copy_file |

Checks

Context Check Description
snail/gcc-patch-check warning Git am fail log

Commit Message

Jannik Glückert March 6, 2023, 11:45 p.m. UTC
  The current copy_file implementation is suboptimal. It only uses
sendfile for files smaller than 2GB, falling back to a userspace copy,
and does not support copy_file_range at all.
copy_file_range is particularly of increasing importance with the
adoption of reflinks in filesystems.

I am pretty sure I got some of the formatting wrong, feel free to tear apart.
I don't know if sendfile has identical semantics on linux as it does
on solaris, if someone could test with a big file that'd be great.
Otherwise, this should not regress. The implementation will fall back
to sendfile / userspace copy if copy_file_range is not available for
the target paths.

The copy implementations for sendfile and copy_file_range were put
into separate functions and the callee code simplified to the point
where you can basically just copy-paste it to add a new
implementation, should new interesting syscalls pop up.

Best
Jannik
  

Patch

From 72b7ad044246e496d90b5f241f59bd0b69e214fa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jannik=20Gl=C3=BCckert?= <jannik.glueckert@gmail.com>
Date: Mon, 6 Mar 2023 23:11:41 +0100
Subject: [PATCH 2/2] libstdc++: use copy_file_range

copy_file_range is a recent-ish syscall for copying files. It is similar
to sendfile but allows filesystem-specific optimizations. Common are:
Reflinks: BTRFS, XFS, ZFS (does not implement the syscall yet)
Server-side copy: NFS, SMB

If copy_file_range is not available for the given files, fall back to
sendfile / userspace copy.

libstdc++-v3/ChangeLog:

	* acinclude.m4 (_GLIBCXX_USE_COPY_FILE_RANGE): define
        * config.h.in: Regenerate.
	* configure: Regenerate.
	* src/filesystem/ops-common.h: use copy_file_range in
          std::filesystem::copy_file
---
 libstdc++-v3/acinclude.m4                | 20 ++++++++
 libstdc++-v3/config.h.in                 |  3 ++
 libstdc++-v3/configure                   | 62 ++++++++++++++++++++++++
 libstdc++-v3/src/filesystem/ops-common.h | 34 +++++++++++++
 4 files changed, 119 insertions(+)

diff --git a/libstdc++-v3/acinclude.m4 b/libstdc++-v3/acinclude.m4
index 5136c0571e8..ca09e1d22db 100644
--- a/libstdc++-v3/acinclude.m4
+++ b/libstdc++-v3/acinclude.m4
@@ -4581,6 +4581,7 @@  dnl  _GLIBCXX_USE_UTIMENSAT
 dnl  _GLIBCXX_USE_ST_MTIM
 dnl  _GLIBCXX_USE_FCHMOD
 dnl  _GLIBCXX_USE_FCHMODAT
+dnl  _GLIBCXX_USE_COPY_FILE_RANGE
 dnl  _GLIBCXX_USE_SENDFILE
 dnl  HAVE_LINK
 dnl  HAVE_READLINK
@@ -4718,6 +4719,25 @@  dnl
   if test $glibcxx_cv_fchmodat = yes; then
     AC_DEFINE(_GLIBCXX_USE_FCHMODAT, 1, [Define if fchmodat is available in <sys/stat.h>.])
   fi
+dnl
+  AC_CACHE_CHECK([for copy_file_range that can copy files],
+    glibcxx_cv_copy_file_range, [dnl
+    case "${target_os}" in
+      linux*)
+	GCC_TRY_COMPILE_OR_LINK(
+	  [#include <unistd.h>],
+	  [copy_file_range(1, NULL, 2, NULL, 1, 0);],
+	  [glibcxx_cv_copy_file_range=yes],
+	  [glibcxx_cv_copy_file_range=no])
+	;;
+      *)
+	glibcxx_cv_copy_file_range=no
+	;;
+    esac
+  ])
+  if test $glibcxx_cv_copy_file_range = yes; then
+    AC_DEFINE(_GLIBCXX_USE_COPY_FILE_RANGE, 1, [Define if copy_file_range is available in <unistd.h>.])
+  fi
 dnl
   AC_CACHE_CHECK([for sendfile that can copy files],
     glibcxx_cv_sendfile, [dnl
diff --git a/libstdc++-v3/src/filesystem/ops-common.h b/libstdc++-v3/src/filesystem/ops-common.h
index d8afc6a4d64..0491dc8d811 100644
--- a/libstdc++-v3/src/filesystem/ops-common.h
+++ b/libstdc++-v3/src/filesystem/ops-common.h
@@ -49,6 +49,9 @@ 
 #ifdef NEED_DO_COPY_FILE
 # include <filesystem>
 # include <ext/stdio_filebuf.h>
+# ifdef _GLIBCXX_USE_COPY_FILE_RANGE
+#  include <unistd.h> // copy_file_range
+# endif
 # ifdef _GLIBCXX_USE_SENDFILE
 #  include <sys/sendfile.h> // sendfile
 # endif
@@ -358,6 +361,24 @@  _GLIBCXX_BEGIN_NAMESPACE_FILESYSTEM
   }
 
 #ifdef NEED_DO_COPY_FILE
+#ifdef _GLIBCXX_USE_COPY_FILE_RANGE
+  bool
+  copy_file_copy_file_range(int fd_in, int fd_out, size_t length) noexcept
+  {
+    size_t bytes_left = length;
+    off_t offset = 0;
+    ssize_t bytes_copied;
+    do {
+      bytes_copied = ::copy_file_range(fd_in, &offset, fd_out, NULL, bytes_left, 0);
+      if (bytes_copied < 0)
+        {
+          return false;
+        }
+      bytes_left -= bytes_copied;
+    } while (bytes_left > 0 && bytes_copied > 0);
+    return true;
+  }
+#endif
 #if defined _GLIBCXX_USE_SENDFILE && ! defined _GLIBCXX_FILESYSTEM_IS_WINDOWS
   bool
   copy_file_sendfile(int fd_in, int fd_out, size_t length) noexcept
@@ -518,6 +539,19 @@  _GLIBCXX_BEGIN_NAMESPACE_FILESYSTEM
 
     bool has_copied = false;
 
+#ifdef _GLIBCXX_USE_COPY_FILE_RANGE
+    if (!has_copied)
+      has_copied = copy_file_copy_file_range(in.fd, out.fd, from_st->st_size);
+    if (!has_copied)
+      {
+        if (errno != EFBIG && errno != EOPNOTSUPP && errno != EOVERFLOW && errno != EXDEV)
+          {
+            ec.assign(errno, std::generic_category());
+            return false;
+          }
+      }
+#endif
+
 #if defined _GLIBCXX_USE_SENDFILE && ! defined _GLIBCXX_FILESYSTEM_IS_WINDOWS
     if (!has_copied)
       has_copied = copy_file_sendfile(in.fd, out.fd, from_st->st_size);
-- 
2.39.2