PR libstdc++/112682 More efficient std::basic_string move
Checks
Commit Message
A few places in bits/basic_string.h use `traits_type::copy` to copy
`__str.length() + 1` bytes.
Despite the knowledge that `__str.length()` is not greater than 15 the
compiler emits (and sometimes inlines) a `memcpy` call. That results
in a quite big set of instructions https://godbolt.org/z/j35MMfxzq
Replacing `__str.length() + 1` with `_S_local_capacity + 1` explicitly
forces the compiler to copy the whole `__str._M_local_buf`. As a
result the assembly becomes almost 5 times shorter and without any
function calls or multiple conditional jumps
https://godbolt.org/z/bfq8bxra9
This patch always copies `_S_local_capacity + 1` if working with
`std::char_traits<char>`.
PR libstdc++/112682:
* include/bits/basic_string.h: Optimize string moves.
P.S.: still not sure that this optimization is not an UB or fine for
libstdc++. However, the assembly looks much better with it.
@@ -188,6 +188,23 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
: basic_string(__svw._M_sv.data(), __svw._M_sv.size(), __a) { }
#endif
+ _GLIBCXX17_CONSTEXPR
+ static bool
+ _S_permit_copying_indeterminate() noexcept
+ {
+ // Copying compile-time known _S_local_capacity + 1 bytes is much more
+ // efficient than copying runtime known __str.length() + 1. This
+ // function returns true, if such initialization is permitted even if
+ // the right side has indeterminate values.
+ //
+ // [dcl.init] permits initializing with indeterminate value of unsigned
+ // narrow character type.
+ //
+ // Library users should not specialize char_traits<char> so this is
+ // not observable for user.
+ return is_same<traits_type, char_traits<char> >::value;
+ }
+
// Use empty-base optimization: http://www.cantrip.org/emptyopt.html
struct _Alloc_hider : allocator_type // TODO check __is_final
{
@@ -672,8 +689,10 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
{
if (__str._M_is_local())
{
- traits_type::copy(_M_local_buf, __str._M_local_buf,
- __str.length() + 1);
+ size_type __copy_count = _S_local_capacity + 1;
+ if _GLIBCXX17_CONSTEXPR (!_S_permit_copying_indeterminate())
+ __copy_count = __str.length() + 1;
+ traits_type::copy(_M_local_buf, __str._M_local_buf, __copy_count);
}
else
{
@@ -711,8 +730,10 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
{
if (__str._M_is_local())
{
- traits_type::copy(_M_local_buf, __str._M_local_buf,
- __str.length() + 1);
+ size_type __copy_count = _S_local_capacity + 1;
+ if _GLIBCXX17_CONSTEXPR (!_S_permit_copying_indeterminate())
+ __copy_count = __str.length() + 1;
+ traits_type::copy(_M_local_buf, __str._M_local_buf, __copy_count);
_M_length(__str.length());
__str._M_set_length(0);
}