libstdc++: Avoid vector casts while still avoiding PR90424

Message ID 6348100.iIbC2pHGDl@minbar
State Accepted
Headers
Series libstdc++: Avoid vector casts while still avoiding PR90424 |

Checks

Context Check Description
snail/gcc-patch-check success Github commit url

Commit Message

Matthias Kretz June 6, 2023, 12:29 p.m. UTC
  This is the first part of a fix for the failure of the new simd test on non-
VSX POWER targets. There are more casts to unavailable vectors of 64-bit 
element types to be rewritten.

OK for master and backports?

Tested on x86_64-pc-linux-gnu and powerpc64le-linux-gnu

------ >8 -------

Signed-off-by: Matthias Kretz <m.kretz@gsi.de>

libstdc++-v3/ChangeLog:

	PR libstdc++/109822
	* include/experimental/bits/simd_builtin.h (_S_store): Rewrite
	to avoid casts to other vector types. Implement store as
	succession of power-of-2 sized memcpy to avoid PR90424.
---
 .../include/experimental/bits/simd_builtin.h  | 40 +++++++------------
 1 file changed, 15 insertions(+), 25 deletions(-)


--
──────────────────────────────────────────────────────────────────────────
 Dr. Matthias Kretz                           https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research               https://gsi.de
 stdₓ::simd
──────────────────────────────────────────────────────────────────────────
  

Comments

Jonathan Wakely June 6, 2023, 1:04 p.m. UTC | #1
On Tue, 6 Jun 2023 at 13:34, Matthias Kretz via Libstdc++ <
libstdc++@gcc.gnu.org> wrote:

> This is the first part of a fix for the failure of the new simd test on
> non-
> VSX POWER targets. There are more casts to unavailable vectors of 64-bit
> element types to be rewritten.
>
> OK for master and backports?
>

 OK for trunk and branches


> Tested on x86_64-pc-linux-gnu and powerpc64le-linux-gnu
>
> ------ >8 -------
>
> Signed-off-by: Matthias Kretz <m.kretz@gsi.de>
>
> libstdc++-v3/ChangeLog:
>
>         PR libstdc++/109822
>         * include/experimental/bits/simd_builtin.h (_S_store): Rewrite
>         to avoid casts to other vector types. Implement store as
>         succession of power-of-2 sized memcpy to avoid PR90424.
> ---
>  .../include/experimental/bits/simd_builtin.h  | 40 +++++++------------
>  1 file changed, 15 insertions(+), 25 deletions(-)
>
>
> --
> ──────────────────────────────────────────────────────────────────────────
>  Dr. Matthias Kretz                           https://mattkretz.github.io
>  GSI Helmholtz Centre for Heavy Ion Research               https://gsi.de
>  stdₓ::simd
> ──────────────────────────────────────────────────────────────────────────
  

Patch

diff --git a/libstdc++-v3/include/experimental/bits/simd_builtin.h b/libstdc++-v3/include/experimental/bits/simd_builtin.h
index 64ef6efaf8c..6ccc2fcec9c 100644
--- a/libstdc++-v3/include/experimental/bits/simd_builtin.h
+++ b/libstdc++-v3/include/experimental/bits/simd_builtin.h
@@ -1295,6 +1295,18 @@  _S_load(const void* __p)
 
   // }}}
   // _S_store {{{
+  template <size_t _Bytes>
+    _GLIBCXX_SIMD_INTRINSIC static void
+    _S_memcpy(char* __dst, const char* __src)
+    {
+      if constexpr (_Bytes > 0)
+	{
+	  constexpr size_t _Ns = std::__bit_floor(_Bytes);
+	  __builtin_memcpy(__dst, __src, _Ns);
+	  _S_memcpy<_Bytes - _Ns>(__dst + _Ns, __src + _Ns);
+	}
+    }
+
   template <size_t _ReqBytes = 0, typename _TV>
     _GLIBCXX_SIMD_INTRINSIC static void
     _S_store(_TV __x, void* __addr)
@@ -1302,33 +1314,11 @@  _S_store(_TV __x, void* __addr)
       constexpr size_t _Bytes = _ReqBytes == 0 ? sizeof(__x) : _ReqBytes;
       static_assert(sizeof(__x) >= _Bytes);
 
+#if !defined __clang__ && _GLIBCXX_SIMD_WORKAROUND_PR90424
       if constexpr (__is_vector_type_v<_TV>)
-	{
-	  using _Tp = typename _VectorTraits<_TV>::value_type;
-	  constexpr size_t _Np = _Bytes / sizeof(_Tp);
-	  static_assert(_Np * sizeof(_Tp) == _Bytes);
-
-#ifdef _GLIBCXX_SIMD_WORKAROUND_PR90424
-	  using _Up = conditional_t<
-	    (is_integral_v<_Tp> || _Bytes < 4),
-	    conditional_t<(sizeof(__x) > sizeof(long long)), long long, _Tp>,
-	    float>;
-	  const auto __v = __vector_bitcast<_Up>(__x);
-#else // _GLIBCXX_SIMD_WORKAROUND_PR90424
-	  const __vector_type_t<_Tp, _Np> __v = __x;
-#endif // _GLIBCXX_SIMD_WORKAROUND_PR90424
-
-	  if constexpr ((_Bytes & (_Bytes - 1)) != 0)
-	    {
-	      constexpr size_t _MoreBytes = std::__bit_ceil(_Bytes);
-	      alignas(decltype(__v)) char __tmp[_MoreBytes];
-	      __builtin_memcpy(__tmp, &__v, _MoreBytes);
-	      __builtin_memcpy(__addr, __tmp, _Bytes);
-	    }
-	  else
-	    __builtin_memcpy(__addr, &__v, _Bytes);
-	}
+	_S_memcpy<_Bytes>(reinterpret_cast<char*>(__addr), reinterpret_cast<const char*>(&__x));
       else
+#endif // _GLIBCXX_SIMD_WORKAROUND_PR90424
 	__builtin_memcpy(__addr, &__x, _Bytes);
     }