libstdc++: Refactor implementation of operator+ for std::string

Message ID 20220905183011.43874-1-whh8b@obs.cr
State New, archived
Headers
Series libstdc++: Refactor implementation of operator+ for std::string |

Commit Message

Will Hawkins Sept. 5, 2022, 6:30 p.m. UTC
  Based on Jonathan's work, here is a patch for the implementation of operator+
on std::string that makes sure we always use the best allocation strategy.

I have attempted to learn from all the feedback that I got on a previous
submission -- I hope I did the right thing.

Passes abi and conformance testing on x86-64 trunk.

Sincerely,
Will

-- >8 --

Create a single function that performs one-allocation string concatenation
that can be used by various different version of operator+. 

libstdc++-v3/ChangeLog:

	* include/bits/basic_string.h:
	Add common function that performs single-allocation string
	concatenation. (__str_cat)
	Use __str_cat to perform optimized operator+, where relevant.
	* include/bits/basic_string.tcc::
	Remove single-allocation implementation of operator+.

Signed-off-by: Will Hawkins <whh8b@obs.cr>
---
 libstdc++-v3/include/bits/basic_string.h   | 66 ++++++++++++++++------
 libstdc++-v3/include/bits/basic_string.tcc | 41 --------------
 2 files changed, 49 insertions(+), 58 deletions(-)
  

Comments

François Dumont Sept. 8, 2022, 5:50 p.m. UTC | #1
On 05/09/22 20:30, Will Hawkins wrote:
> Based on Jonathan's work, here is a patch for the implementation of operator+
> on std::string that makes sure we always use the best allocation strategy.
>
> I have attempted to learn from all the feedback that I got on a previous
> submission -- I hope I did the right thing.
>
> Passes abi and conformance testing on x86-64 trunk.
>
> Sincerely,
> Will
>
> -- >8 --
>
> Create a single function that performs one-allocation string concatenation
> that can be used by various different version of operator+.
>
> libstdc++-v3/ChangeLog:
>
> 	* include/bits/basic_string.h:
> 	Add common function that performs single-allocation string
> 	concatenation. (__str_cat)
> 	Use __str_cat to perform optimized operator+, where relevant.
> 	* include/bits/basic_string.tcc::
> 	Remove single-allocation implementation of operator+.
>
> Signed-off-by: Will Hawkins <whh8b@obs.cr>
> ---
>   libstdc++-v3/include/bits/basic_string.h   | 66 ++++++++++++++++------
>   libstdc++-v3/include/bits/basic_string.tcc | 41 --------------
>   2 files changed, 49 insertions(+), 58 deletions(-)
>
> diff --git a/libstdc++-v3/include/bits/basic_string.h b/libstdc++-v3/include/bits/basic_string.h
> index 0df64ea98ca..4078651fadb 100644
> --- a/libstdc++-v3/include/bits/basic_string.h
> +++ b/libstdc++-v3/include/bits/basic_string.h
> @@ -3481,6 +3481,24 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
>   _GLIBCXX_END_NAMESPACE_CXX11
>   #endif
>   
> +  template<typename _Str>
> +    _GLIBCXX20_CONSTEXPR
> +    inline _Str
> +    __str_concat(typename _Str::value_type const* __lhs,
> +		 typename _Str::size_type __lhs_len,
> +		 typename _Str::value_type const* __rhs,
> +		 typename _Str::size_type __rhs_len,
> +		 typename _Str::allocator_type const& __a)
> +    {
> +      typedef typename _Str::allocator_type allocator_type;
> +      typedef __gnu_cxx::__alloc_traits<allocator_type> _Alloc_traits;
> +      _Str __str(_Alloc_traits::_S_select_on_copy(__a));
> +      __str.reserve(__lhs_len + __rhs_len);
> +      __str.append(__lhs, __lhs_len);
> +      __str.append(__rhs, __rhs_len);
> +      return __str;
> +    }
> +
>     // operator+
>     /**
>      *  @brief  Concatenate two strings.
> @@ -3490,13 +3508,14 @@ _GLIBCXX_END_NAMESPACE_CXX11
>      */
>     template<typename _CharT, typename _Traits, typename _Alloc>
>       _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR
> -    basic_string<_CharT, _Traits, _Alloc>
> +    inline basic_string<_CharT, _Traits, _Alloc>
>       operator+(const basic_string<_CharT, _Traits, _Alloc>& __lhs,
>   	      const basic_string<_CharT, _Traits, _Alloc>& __rhs)
>       {
> -      basic_string<_CharT, _Traits, _Alloc> __str(__lhs);
> -      __str.append(__rhs);
> -      return __str;
> +      typedef basic_string<_CharT, _Traits, _Alloc> _Str;
> +      return std::__str_concat<_Str>(__lhs.c_str(), __lhs.size(),
> +				     __rhs.c_str(), __rhs.size(),

You should use data() rather than c_str() here and all other operators.

It is currently the same but is more accurate in your context. Maybe one 
day it will make a difference.

> +				     __lhs.get_allocator());
>       }
>   
>     /**
> @@ -3507,9 +3526,16 @@ _GLIBCXX_END_NAMESPACE_CXX11
>      */
>     template<typename _CharT, typename _Traits, typename _Alloc>
>       _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR
> -    basic_string<_CharT,_Traits,_Alloc>
> +    inline basic_string<_CharT,_Traits,_Alloc>

Why inlining ?

I guess it is done this way to limit code bloat.

>       operator+(const _CharT* __lhs,
> -	      const basic_string<_CharT,_Traits,_Alloc>& __rhs);
> +	      const basic_string<_CharT,_Traits,_Alloc>& __rhs)
> +    {
> +      __glibcxx_requires_string(__lhs);
> +      typedef basic_string<_CharT, _Traits, _Alloc> _Str;
> +      return std::__str_concat<_Str>(__lhs, _Traits::length(__lhs),
> +				     __rhs.c_str(), __rhs.size(),
> +				     __rhs.get_allocator());
> +    }
>   
>     /**
>      *  @brief  Concatenate character and string.
> @@ -3519,8 +3545,14 @@ _GLIBCXX_END_NAMESPACE_CXX11
>      */
>     template<typename _CharT, typename _Traits, typename _Alloc>
>       _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR
> -    basic_string<_CharT,_Traits,_Alloc>
> -    operator+(_CharT __lhs, const basic_string<_CharT,_Traits,_Alloc>& __rhs);
> +    inline basic_string<_CharT,_Traits,_Alloc>
> +    operator+(_CharT __lhs, const basic_string<_CharT,_Traits,_Alloc>& __rhs)
> +    {
> +      typedef basic_string<_CharT, _Traits, _Alloc> _Str;
> +      return std::__str_concat<_Str>(__builtin_addressof(__lhs), 1,
> +				     __rhs.c_str(), __rhs.size(),
> +				     __rhs.get_allocator());
> +    }
>   
>     /**
>      *  @brief  Concatenate string and C string.
> @@ -3534,11 +3566,12 @@ _GLIBCXX_END_NAMESPACE_CXX11
>       operator+(const basic_string<_CharT, _Traits, _Alloc>& __lhs,
>   	      const _CharT* __rhs)
>       {
> -      basic_string<_CharT, _Traits, _Alloc> __str(__lhs);
> -      __str.append(__rhs);
> -      return __str;
> +      __glibcxx_requires_string(__rhs);
> +      typedef basic_string<_CharT, _Traits, _Alloc> _Str;
> +      return std::__str_concat<_Str>(__lhs.c_str(), __lhs.size(),
> +				     __rhs, _Traits::length(__rhs),
> +				     __lhs.get_allocator());
>       }
> -
>     /**
>      *  @brief  Concatenate string and character.
>      *  @param __lhs  First string.
> @@ -3550,11 +3583,10 @@ _GLIBCXX_END_NAMESPACE_CXX11
>       inline basic_string<_CharT, _Traits, _Alloc>
>       operator+(const basic_string<_CharT, _Traits, _Alloc>& __lhs, _CharT __rhs)
>       {
> -      typedef basic_string<_CharT, _Traits, _Alloc>	__string_type;
> -      typedef typename __string_type::size_type		__size_type;
> -      __string_type __str(__lhs);
> -      __str.append(__size_type(1), __rhs);
> -      return __str;
> +      typedef basic_string<_CharT, _Traits, _Alloc> _Str;
> +      return std::__str_concat<_Str>(__lhs.c_str(), __lhs.size(),
> +				     __builtin_addressof(__rhs), 1,
> +				     __lhs.get_allocator());
>       }
>   
>   #if __cplusplus >= 201103L
> diff --git a/libstdc++-v3/include/bits/basic_string.tcc b/libstdc++-v3/include/bits/basic_string.tcc
> index 4563c61429a..07a94d36757 100644
> --- a/libstdc++-v3/include/bits/basic_string.tcc
> +++ b/libstdc++-v3/include/bits/basic_string.tcc
> @@ -599,47 +599,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>   #else
>   # define _GLIBCXX_STRING_CONSTEXPR
>   #endif
> -
> -  template<typename _CharT, typename _Traits, typename _Alloc>
> -    _GLIBCXX20_CONSTEXPR
> -    basic_string<_CharT, _Traits, _Alloc>
> -    operator+(const _CharT* __lhs,
> -	      const basic_string<_CharT, _Traits, _Alloc>& __rhs)
> -    {
> -      __glibcxx_requires_string(__lhs);
> -      typedef basic_string<_CharT, _Traits, _Alloc> __string_type;
> -      typedef typename __string_type::size_type	  __size_type;
> -      typedef typename __gnu_cxx::__alloc_traits<_Alloc>::template
> -	rebind<_CharT>::other _Char_alloc_type;
> -      typedef __gnu_cxx::__alloc_traits<_Char_alloc_type> _Alloc_traits;
> -      const __size_type __len = _Traits::length(__lhs);
> -      __string_type __str(_Alloc_traits::_S_select_on_copy(
> -          __rhs.get_allocator()));
> -      __str.reserve(__len + __rhs.size());
> -      __str.append(__lhs, __len);
> -      __str.append(__rhs);
> -      return __str;
> -    }
> -
> -  template<typename _CharT, typename _Traits, typename _Alloc>
> -    _GLIBCXX20_CONSTEXPR
> -    basic_string<_CharT, _Traits, _Alloc>
> -    operator+(_CharT __lhs, const basic_string<_CharT, _Traits, _Alloc>& __rhs)
> -    {
> -      typedef basic_string<_CharT, _Traits, _Alloc> __string_type;
> -      typedef typename __string_type::size_type	  __size_type;
> -      typedef typename __gnu_cxx::__alloc_traits<_Alloc>::template
> -	rebind<_CharT>::other _Char_alloc_type;
> -      typedef __gnu_cxx::__alloc_traits<_Char_alloc_type> _Alloc_traits;
> -      __string_type __str(_Alloc_traits::_S_select_on_copy(
> -          __rhs.get_allocator()));
> -      const __size_type __len = __rhs.size();
> -      __str.reserve(__len + 1);
> -      __str.append(__size_type(1), __lhs);
> -      __str.append(__rhs);
> -      return __str;
> -    }
> -
>     template<typename _CharT, typename _Traits, typename _Alloc>
>       _GLIBCXX_STRING_CONSTEXPR
>       typename basic_string<_CharT, _Traits, _Alloc>::size_type
  
Jonathan Wakely Sept. 8, 2022, 6:05 p.m. UTC | #2
On Thu, 8 Sep 2022, 18:51 François Dumont via Libstdc++, <
libstdc++@gcc.gnu.org> wrote:

> On 05/09/22 20:30, Will Hawkins wrote:
> > Based on Jonathan's work, here is a patch for the implementation of
> operator+
> > on std::string that makes sure we always use the best allocation
> strategy.
> >
> > I have attempted to learn from all the feedback that I got on a previous
> > submission -- I hope I did the right thing.
> >
> > Passes abi and conformance testing on x86-64 trunk.
> >
> > Sincerely,
> > Will
> >
> > -- >8 --
> >
> > Create a single function that performs one-allocation string
> concatenation
> > that can be used by various different version of operator+.
> >
> > libstdc++-v3/ChangeLog:
> >
> >       * include/bits/basic_string.h:
> >       Add common function that performs single-allocation string
> >       concatenation. (__str_cat)
> >       Use __str_cat to perform optimized operator+, where relevant.
> >       * include/bits/basic_string.tcc::
> >       Remove single-allocation implementation of operator+.
> >
> > Signed-off-by: Will Hawkins <whh8b@obs.cr>
> > ---
> >   libstdc++-v3/include/bits/basic_string.h   | 66 ++++++++++++++++------
> >   libstdc++-v3/include/bits/basic_string.tcc | 41 --------------
> >   2 files changed, 49 insertions(+), 58 deletions(-)
> >
> > diff --git a/libstdc++-v3/include/bits/basic_string.h
> b/libstdc++-v3/include/bits/basic_string.h
> > index 0df64ea98ca..4078651fadb 100644
> > --- a/libstdc++-v3/include/bits/basic_string.h
> > +++ b/libstdc++-v3/include/bits/basic_string.h
> > @@ -3481,6 +3481,24 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
> >   _GLIBCXX_END_NAMESPACE_CXX11
> >   #endif
> >
> > +  template<typename _Str>
> > +    _GLIBCXX20_CONSTEXPR
> > +    inline _Str
> > +    __str_concat(typename _Str::value_type const* __lhs,
> > +              typename _Str::size_type __lhs_len,
> > +              typename _Str::value_type const* __rhs,
> > +              typename _Str::size_type __rhs_len,
> > +              typename _Str::allocator_type const& __a)
> > +    {
> > +      typedef typename _Str::allocator_type allocator_type;
> > +      typedef __gnu_cxx::__alloc_traits<allocator_type> _Alloc_traits;
> > +      _Str __str(_Alloc_traits::_S_select_on_copy(__a));
> > +      __str.reserve(__lhs_len + __rhs_len);
> > +      __str.append(__lhs, __lhs_len);
> > +      __str.append(__rhs, __rhs_len);
> > +      return __str;
> > +    }
> > +
> >     // operator+
> >     /**
> >      *  @brief  Concatenate two strings.
> > @@ -3490,13 +3508,14 @@ _GLIBCXX_END_NAMESPACE_CXX11
> >      */
> >     template<typename _CharT, typename _Traits, typename _Alloc>
> >       _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR
> > -    basic_string<_CharT, _Traits, _Alloc>
> > +    inline basic_string<_CharT, _Traits, _Alloc>
> >       operator+(const basic_string<_CharT, _Traits, _Alloc>& __lhs,
> >             const basic_string<_CharT, _Traits, _Alloc>& __rhs)
> >       {
> > -      basic_string<_CharT, _Traits, _Alloc> __str(__lhs);
> > -      __str.append(__rhs);
> > -      return __str;
> > +      typedef basic_string<_CharT, _Traits, _Alloc> _Str;
> > +      return std::__str_concat<_Str>(__lhs.c_str(), __lhs.size(),
> > +                                  __rhs.c_str(), __rhs.size(),
>
> You should use data() rather than c_str() here and all other operators.
>
> It is currently the same but is more accurate in your context. Maybe one
> day it will make a difference.
>


I don't think so, that would be a major breaking change, for no benefit. I
think it's safe to assume they will always stay equivalent now.



> > +                                  __lhs.get_allocator());
> >       }
> >
> >     /**
> > @@ -3507,9 +3526,16 @@ _GLIBCXX_END_NAMESPACE_CXX11
> >      */
> >     template<typename _CharT, typename _Traits, typename _Alloc>
> >       _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR
> > -    basic_string<_CharT,_Traits,_Alloc>
> > +    inline basic_string<_CharT,_Traits,_Alloc>
>
> Why inlining ?
>
> I guess it is done this way to limit code bloat.
>
> >       operator+(const _CharT* __lhs,
> > -           const basic_string<_CharT,_Traits,_Alloc>& __rhs);
> > +           const basic_string<_CharT,_Traits,_Alloc>& __rhs)
> > +    {
> > +      __glibcxx_requires_string(__lhs);
> > +      typedef basic_string<_CharT, _Traits, _Alloc> _Str;
> > +      return std::__str_concat<_Str>(__lhs, _Traits::length(__lhs),
> > +                                  __rhs.c_str(), __rhs.size(),
> > +                                  __rhs.get_allocator());
> > +    }
> >
> >     /**
> >      *  @brief  Concatenate character and string.
> > @@ -3519,8 +3545,14 @@ _GLIBCXX_END_NAMESPACE_CXX11
> >      */
> >     template<typename _CharT, typename _Traits, typename _Alloc>
> >       _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR
> > -    basic_string<_CharT,_Traits,_Alloc>
> > -    operator+(_CharT __lhs, const basic_string<_CharT,_Traits,_Alloc>&
> __rhs);
> > +    inline basic_string<_CharT,_Traits,_Alloc>
> > +    operator+(_CharT __lhs, const basic_string<_CharT,_Traits,_Alloc>&
> __rhs)
> > +    {
> > +      typedef basic_string<_CharT, _Traits, _Alloc> _Str;
> > +      return std::__str_concat<_Str>(__builtin_addressof(__lhs), 1,
> > +                                  __rhs.c_str(), __rhs.size(),
> > +                                  __rhs.get_allocator());
> > +    }
> >
> >     /**
> >      *  @brief  Concatenate string and C string.
> > @@ -3534,11 +3566,12 @@ _GLIBCXX_END_NAMESPACE_CXX11
> >       operator+(const basic_string<_CharT, _Traits, _Alloc>& __lhs,
> >             const _CharT* __rhs)
> >       {
> > -      basic_string<_CharT, _Traits, _Alloc> __str(__lhs);
> > -      __str.append(__rhs);
> > -      return __str;
> > +      __glibcxx_requires_string(__rhs);
> > +      typedef basic_string<_CharT, _Traits, _Alloc> _Str;
> > +      return std::__str_concat<_Str>(__lhs.c_str(), __lhs.size(),
> > +                                  __rhs, _Traits::length(__rhs),
> > +                                  __lhs.get_allocator());
> >       }
> > -
> >     /**
> >      *  @brief  Concatenate string and character.
> >      *  @param __lhs  First string.
> > @@ -3550,11 +3583,10 @@ _GLIBCXX_END_NAMESPACE_CXX11
> >       inline basic_string<_CharT, _Traits, _Alloc>
> >       operator+(const basic_string<_CharT, _Traits, _Alloc>& __lhs,
> _CharT __rhs)
> >       {
> > -      typedef basic_string<_CharT, _Traits, _Alloc>  __string_type;
> > -      typedef typename __string_type::size_type
> __size_type;
> > -      __string_type __str(__lhs);
> > -      __str.append(__size_type(1), __rhs);
> > -      return __str;
> > +      typedef basic_string<_CharT, _Traits, _Alloc> _Str;
> > +      return std::__str_concat<_Str>(__lhs.c_str(), __lhs.size(),
> > +                                  __builtin_addressof(__rhs), 1,
> > +                                  __lhs.get_allocator());
> >       }
> >
> >   #if __cplusplus >= 201103L
> > diff --git a/libstdc++-v3/include/bits/basic_string.tcc
> b/libstdc++-v3/include/bits/basic_string.tcc
> > index 4563c61429a..07a94d36757 100644
> > --- a/libstdc++-v3/include/bits/basic_string.tcc
> > +++ b/libstdc++-v3/include/bits/basic_string.tcc
> > @@ -599,47 +599,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >   #else
> >   # define _GLIBCXX_STRING_CONSTEXPR
> >   #endif
> > -
> > -  template<typename _CharT, typename _Traits, typename _Alloc>
> > -    _GLIBCXX20_CONSTEXPR
> > -    basic_string<_CharT, _Traits, _Alloc>
> > -    operator+(const _CharT* __lhs,
> > -           const basic_string<_CharT, _Traits, _Alloc>& __rhs)
> > -    {
> > -      __glibcxx_requires_string(__lhs);
> > -      typedef basic_string<_CharT, _Traits, _Alloc> __string_type;
> > -      typedef typename __string_type::size_type        __size_type;
> > -      typedef typename __gnu_cxx::__alloc_traits<_Alloc>::template
> > -     rebind<_CharT>::other _Char_alloc_type;
> > -      typedef __gnu_cxx::__alloc_traits<_Char_alloc_type> _Alloc_traits;
> > -      const __size_type __len = _Traits::length(__lhs);
> > -      __string_type __str(_Alloc_traits::_S_select_on_copy(
> > -          __rhs.get_allocator()));
> > -      __str.reserve(__len + __rhs.size());
> > -      __str.append(__lhs, __len);
> > -      __str.append(__rhs);
> > -      return __str;
> > -    }
> > -
> > -  template<typename _CharT, typename _Traits, typename _Alloc>
> > -    _GLIBCXX20_CONSTEXPR
> > -    basic_string<_CharT, _Traits, _Alloc>
> > -    operator+(_CharT __lhs, const basic_string<_CharT, _Traits,
> _Alloc>& __rhs)
> > -    {
> > -      typedef basic_string<_CharT, _Traits, _Alloc> __string_type;
> > -      typedef typename __string_type::size_type        __size_type;
> > -      typedef typename __gnu_cxx::__alloc_traits<_Alloc>::template
> > -     rebind<_CharT>::other _Char_alloc_type;
> > -      typedef __gnu_cxx::__alloc_traits<_Char_alloc_type> _Alloc_traits;
> > -      __string_type __str(_Alloc_traits::_S_select_on_copy(
> > -          __rhs.get_allocator()));
> > -      const __size_type __len = __rhs.size();
> > -      __str.reserve(__len + 1);
> > -      __str.append(__size_type(1), __lhs);
> > -      __str.append(__rhs);
> > -      return __str;
> > -    }
> > -
> >     template<typename _CharT, typename _Traits, typename _Alloc>
> >       _GLIBCXX_STRING_CONSTEXPR
> >       typename basic_string<_CharT, _Traits, _Alloc>::size_type
>
>
>
  
Will Hawkins Sept. 9, 2022, 1:53 p.m. UTC | #3
On Thu, Sep 8, 2022 at 2:05 PM Jonathan Wakely <jwakely.gcc@gmail.com> wrote:
>
>
>
> On Thu, 8 Sep 2022, 18:51 François Dumont via Libstdc++, <libstdc++@gcc.gnu.org> wrote:
>>
>> On 05/09/22 20:30, Will Hawkins wrote:
>> > Based on Jonathan's work, here is a patch for the implementation of operator+
>> > on std::string that makes sure we always use the best allocation strategy.
>> >
>> > I have attempted to learn from all the feedback that I got on a previous
>> > submission -- I hope I did the right thing.
>> >
>> > Passes abi and conformance testing on x86-64 trunk.
>> >
>> > Sincerely,
>> > Will
>> >
>> > -- >8 --
>> >
>> > Create a single function that performs one-allocation string concatenation
>> > that can be used by various different version of operator+.
>> >
>> > libstdc++-v3/ChangeLog:
>> >
>> >       * include/bits/basic_string.h:
>> >       Add common function that performs single-allocation string
>> >       concatenation. (__str_cat)
>> >       Use __str_cat to perform optimized operator+, where relevant.
>> >       * include/bits/basic_string.tcc::
>> >       Remove single-allocation implementation of operator+.
>> >
>> > Signed-off-by: Will Hawkins <whh8b@obs.cr>
>> > ---
>> >   libstdc++-v3/include/bits/basic_string.h   | 66 ++++++++++++++++------
>> >   libstdc++-v3/include/bits/basic_string.tcc | 41 --------------
>> >   2 files changed, 49 insertions(+), 58 deletions(-)
>> >
>> > diff --git a/libstdc++-v3/include/bits/basic_string.h b/libstdc++-v3/include/bits/basic_string.h
>> > index 0df64ea98ca..4078651fadb 100644
>> > --- a/libstdc++-v3/include/bits/basic_string.h
>> > +++ b/libstdc++-v3/include/bits/basic_string.h
>> > @@ -3481,6 +3481,24 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
>> >   _GLIBCXX_END_NAMESPACE_CXX11
>> >   #endif
>> >
>> > +  template<typename _Str>
>> > +    _GLIBCXX20_CONSTEXPR
>> > +    inline _Str
>> > +    __str_concat(typename _Str::value_type const* __lhs,
>> > +              typename _Str::size_type __lhs_len,
>> > +              typename _Str::value_type const* __rhs,
>> > +              typename _Str::size_type __rhs_len,
>> > +              typename _Str::allocator_type const& __a)
>> > +    {
>> > +      typedef typename _Str::allocator_type allocator_type;
>> > +      typedef __gnu_cxx::__alloc_traits<allocator_type> _Alloc_traits;
>> > +      _Str __str(_Alloc_traits::_S_select_on_copy(__a));
>> > +      __str.reserve(__lhs_len + __rhs_len);
>> > +      __str.append(__lhs, __lhs_len);
>> > +      __str.append(__rhs, __rhs_len);
>> > +      return __str;
>> > +    }
>> > +
>> >     // operator+
>> >     /**
>> >      *  @brief  Concatenate two strings.
>> > @@ -3490,13 +3508,14 @@ _GLIBCXX_END_NAMESPACE_CXX11
>> >      */
>> >     template<typename _CharT, typename _Traits, typename _Alloc>
>> >       _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR
>> > -    basic_string<_CharT, _Traits, _Alloc>
>> > +    inline basic_string<_CharT, _Traits, _Alloc>
>> >       operator+(const basic_string<_CharT, _Traits, _Alloc>& __lhs,
>> >             const basic_string<_CharT, _Traits, _Alloc>& __rhs)
>> >       {
>> > -      basic_string<_CharT, _Traits, _Alloc> __str(__lhs);
>> > -      __str.append(__rhs);
>> > -      return __str;
>> > +      typedef basic_string<_CharT, _Traits, _Alloc> _Str;
>> > +      return std::__str_concat<_Str>(__lhs.c_str(), __lhs.size(),
>> > +                                  __rhs.c_str(), __rhs.size(),
>>
>> You should use data() rather than c_str() here and all other operators.
>>
>> It is currently the same but is more accurate in your context. Maybe one
>> day it will make a difference.
>
>
>
> I don't think so, that would be a major breaking change, for no benefit. I think it's safe to assume they will always stay equivalent now.

Happy to make any changes to the patch that the group thinks are necessary!

Will


>
>
>>
>> > +                                  __lhs.get_allocator());
>> >       }
>> >
>> >     /**
>> > @@ -3507,9 +3526,16 @@ _GLIBCXX_END_NAMESPACE_CXX11
>> >      */
>> >     template<typename _CharT, typename _Traits, typename _Alloc>
>> >       _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR
>> > -    basic_string<_CharT,_Traits,_Alloc>
>> > +    inline basic_string<_CharT,_Traits,_Alloc>
>>
>> Why inlining ?
>>
>> I guess it is done this way to limit code bloat.
>>
>> >       operator+(const _CharT* __lhs,
>> > -           const basic_string<_CharT,_Traits,_Alloc>& __rhs);
>> > +           const basic_string<_CharT,_Traits,_Alloc>& __rhs)
>> > +    {
>> > +      __glibcxx_requires_string(__lhs);
>> > +      typedef basic_string<_CharT, _Traits, _Alloc> _Str;
>> > +      return std::__str_concat<_Str>(__lhs, _Traits::length(__lhs),
>> > +                                  __rhs.c_str(), __rhs.size(),
>> > +                                  __rhs.get_allocator());
>> > +    }
>> >
>> >     /**
>> >      *  @brief  Concatenate character and string.
>> > @@ -3519,8 +3545,14 @@ _GLIBCXX_END_NAMESPACE_CXX11
>> >      */
>> >     template<typename _CharT, typename _Traits, typename _Alloc>
>> >       _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR
>> > -    basic_string<_CharT,_Traits,_Alloc>
>> > -    operator+(_CharT __lhs, const basic_string<_CharT,_Traits,_Alloc>& __rhs);
>> > +    inline basic_string<_CharT,_Traits,_Alloc>
>> > +    operator+(_CharT __lhs, const basic_string<_CharT,_Traits,_Alloc>& __rhs)
>> > +    {
>> > +      typedef basic_string<_CharT, _Traits, _Alloc> _Str;
>> > +      return std::__str_concat<_Str>(__builtin_addressof(__lhs), 1,
>> > +                                  __rhs.c_str(), __rhs.size(),
>> > +                                  __rhs.get_allocator());
>> > +    }
>> >
>> >     /**
>> >      *  @brief  Concatenate string and C string.
>> > @@ -3534,11 +3566,12 @@ _GLIBCXX_END_NAMESPACE_CXX11
>> >       operator+(const basic_string<_CharT, _Traits, _Alloc>& __lhs,
>> >             const _CharT* __rhs)
>> >       {
>> > -      basic_string<_CharT, _Traits, _Alloc> __str(__lhs);
>> > -      __str.append(__rhs);
>> > -      return __str;
>> > +      __glibcxx_requires_string(__rhs);
>> > +      typedef basic_string<_CharT, _Traits, _Alloc> _Str;
>> > +      return std::__str_concat<_Str>(__lhs.c_str(), __lhs.size(),
>> > +                                  __rhs, _Traits::length(__rhs),
>> > +                                  __lhs.get_allocator());
>> >       }
>> > -
>> >     /**
>> >      *  @brief  Concatenate string and character.
>> >      *  @param __lhs  First string.
>> > @@ -3550,11 +3583,10 @@ _GLIBCXX_END_NAMESPACE_CXX11
>> >       inline basic_string<_CharT, _Traits, _Alloc>
>> >       operator+(const basic_string<_CharT, _Traits, _Alloc>& __lhs, _CharT __rhs)
>> >       {
>> > -      typedef basic_string<_CharT, _Traits, _Alloc>  __string_type;
>> > -      typedef typename __string_type::size_type              __size_type;
>> > -      __string_type __str(__lhs);
>> > -      __str.append(__size_type(1), __rhs);
>> > -      return __str;
>> > +      typedef basic_string<_CharT, _Traits, _Alloc> _Str;
>> > +      return std::__str_concat<_Str>(__lhs.c_str(), __lhs.size(),
>> > +                                  __builtin_addressof(__rhs), 1,
>> > +                                  __lhs.get_allocator());
>> >       }
>> >
>> >   #if __cplusplus >= 201103L
>> > diff --git a/libstdc++-v3/include/bits/basic_string.tcc b/libstdc++-v3/include/bits/basic_string.tcc
>> > index 4563c61429a..07a94d36757 100644
>> > --- a/libstdc++-v3/include/bits/basic_string.tcc
>> > +++ b/libstdc++-v3/include/bits/basic_string.tcc
>> > @@ -599,47 +599,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>> >   #else
>> >   # define _GLIBCXX_STRING_CONSTEXPR
>> >   #endif
>> > -
>> > -  template<typename _CharT, typename _Traits, typename _Alloc>
>> > -    _GLIBCXX20_CONSTEXPR
>> > -    basic_string<_CharT, _Traits, _Alloc>
>> > -    operator+(const _CharT* __lhs,
>> > -           const basic_string<_CharT, _Traits, _Alloc>& __rhs)
>> > -    {
>> > -      __glibcxx_requires_string(__lhs);
>> > -      typedef basic_string<_CharT, _Traits, _Alloc> __string_type;
>> > -      typedef typename __string_type::size_type        __size_type;
>> > -      typedef typename __gnu_cxx::__alloc_traits<_Alloc>::template
>> > -     rebind<_CharT>::other _Char_alloc_type;
>> > -      typedef __gnu_cxx::__alloc_traits<_Char_alloc_type> _Alloc_traits;
>> > -      const __size_type __len = _Traits::length(__lhs);
>> > -      __string_type __str(_Alloc_traits::_S_select_on_copy(
>> > -          __rhs.get_allocator()));
>> > -      __str.reserve(__len + __rhs.size());
>> > -      __str.append(__lhs, __len);
>> > -      __str.append(__rhs);
>> > -      return __str;
>> > -    }
>> > -
>> > -  template<typename _CharT, typename _Traits, typename _Alloc>
>> > -    _GLIBCXX20_CONSTEXPR
>> > -    basic_string<_CharT, _Traits, _Alloc>
>> > -    operator+(_CharT __lhs, const basic_string<_CharT, _Traits, _Alloc>& __rhs)
>> > -    {
>> > -      typedef basic_string<_CharT, _Traits, _Alloc> __string_type;
>> > -      typedef typename __string_type::size_type        __size_type;
>> > -      typedef typename __gnu_cxx::__alloc_traits<_Alloc>::template
>> > -     rebind<_CharT>::other _Char_alloc_type;
>> > -      typedef __gnu_cxx::__alloc_traits<_Char_alloc_type> _Alloc_traits;
>> > -      __string_type __str(_Alloc_traits::_S_select_on_copy(
>> > -          __rhs.get_allocator()));
>> > -      const __size_type __len = __rhs.size();
>> > -      __str.reserve(__len + 1);
>> > -      __str.append(__size_type(1), __lhs);
>> > -      __str.append(__rhs);
>> > -      return __str;
>> > -    }
>> > -
>> >     template<typename _CharT, typename _Traits, typename _Alloc>
>> >       _GLIBCXX_STRING_CONSTEXPR
>> >       typename basic_string<_CharT, _Traits, _Alloc>::size_type
>>
>>
  
Jonathan Wakely Sept. 9, 2022, 8:28 p.m. UTC | #4
On Thu, 8 Sept 2022 at 18:51, François Dumont via Libstdc++
<libstdc++@gcc.gnu.org> wrote:
>
> On 05/09/22 20:30, Will Hawkins wrote:
> > Based on Jonathan's work, here is a patch for the implementation of operator+
> > on std::string that makes sure we always use the best allocation strategy.
> >
> > I have attempted to learn from all the feedback that I got on a previous
> > submission -- I hope I did the right thing.
> >
> > Passes abi and conformance testing on x86-64 trunk.
> >
> > Sincerely,
> > Will
> >
> > -- >8 --
> >
> > Create a single function that performs one-allocation string concatenation
> > that can be used by various different version of operator+.
> >
> > libstdc++-v3/ChangeLog:
> >
> >       * include/bits/basic_string.h:
> >       Add common function that performs single-allocation string
> >       concatenation. (__str_cat)
> >       Use __str_cat to perform optimized operator+, where relevant.
> >       * include/bits/basic_string.tcc::
> >       Remove single-allocation implementation of operator+.
> >
> > Signed-off-by: Will Hawkins <whh8b@obs.cr>
> > ---
> >   libstdc++-v3/include/bits/basic_string.h   | 66 ++++++++++++++++------
> >   libstdc++-v3/include/bits/basic_string.tcc | 41 --------------
> >   2 files changed, 49 insertions(+), 58 deletions(-)
> >
> > diff --git a/libstdc++-v3/include/bits/basic_string.h b/libstdc++-v3/include/bits/basic_string.h
> > index 0df64ea98ca..4078651fadb 100644
> > --- a/libstdc++-v3/include/bits/basic_string.h
> > +++ b/libstdc++-v3/include/bits/basic_string.h
> > @@ -3481,6 +3481,24 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
> >   _GLIBCXX_END_NAMESPACE_CXX11
> >   #endif
> >
> > +  template<typename _Str>
> > +    _GLIBCXX20_CONSTEXPR
> > +    inline _Str
> > +    __str_concat(typename _Str::value_type const* __lhs,
> > +              typename _Str::size_type __lhs_len,
> > +              typename _Str::value_type const* __rhs,
> > +              typename _Str::size_type __rhs_len,
> > +              typename _Str::allocator_type const& __a)
> > +    {
> > +      typedef typename _Str::allocator_type allocator_type;
> > +      typedef __gnu_cxx::__alloc_traits<allocator_type> _Alloc_traits;
> > +      _Str __str(_Alloc_traits::_S_select_on_copy(__a));
> > +      __str.reserve(__lhs_len + __rhs_len);
> > +      __str.append(__lhs, __lhs_len);
> > +      __str.append(__rhs, __rhs_len);
> > +      return __str;
> > +    }
> > +
> >     // operator+
> >     /**
> >      *  @brief  Concatenate two strings.
> > @@ -3490,13 +3508,14 @@ _GLIBCXX_END_NAMESPACE_CXX11
> >      */
> >     template<typename _CharT, typename _Traits, typename _Alloc>
> >       _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR
> > -    basic_string<_CharT, _Traits, _Alloc>
> > +    inline basic_string<_CharT, _Traits, _Alloc>
> >       operator+(const basic_string<_CharT, _Traits, _Alloc>& __lhs,
> >             const basic_string<_CharT, _Traits, _Alloc>& __rhs)
> >       {
> > -      basic_string<_CharT, _Traits, _Alloc> __str(__lhs);
> > -      __str.append(__rhs);
> > -      return __str;
> > +      typedef basic_string<_CharT, _Traits, _Alloc> _Str;
> > +      return std::__str_concat<_Str>(__lhs.c_str(), __lhs.size(),
> > +                                  __rhs.c_str(), __rhs.size(),
>
> You should use data() rather than c_str() here and all other operators.
>
> It is currently the same but is more accurate in your context. Maybe one
> day it will make a difference.

As I said, it will never make a difference, so there's no technical
reason to change it. I suppose data() is a little more expressive
here, in that we only care about the characters, not the null
terminator that c_str() implies (even though data() has the null
terminator too, as it's the same pointer returned).

>
> > +                                  __lhs.get_allocator());
> >       }
> >
> >     /**
> > @@ -3507,9 +3526,16 @@ _GLIBCXX_END_NAMESPACE_CXX11
> >      */
> >     template<typename _CharT, typename _Traits, typename _Alloc>
> >       _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR
> > -    basic_string<_CharT,_Traits,_Alloc>
> > +    inline basic_string<_CharT,_Traits,_Alloc>
>
> Why inlining ?

Because it's a one line function that just calls another function.
That's an ideal candidate for being inline.
  

Patch

diff --git a/libstdc++-v3/include/bits/basic_string.h b/libstdc++-v3/include/bits/basic_string.h
index 0df64ea98ca..4078651fadb 100644
--- a/libstdc++-v3/include/bits/basic_string.h
+++ b/libstdc++-v3/include/bits/basic_string.h
@@ -3481,6 +3481,24 @@  _GLIBCXX_BEGIN_NAMESPACE_CXX11
 _GLIBCXX_END_NAMESPACE_CXX11
 #endif
 
+  template<typename _Str>
+    _GLIBCXX20_CONSTEXPR
+    inline _Str
+    __str_concat(typename _Str::value_type const* __lhs,
+		 typename _Str::size_type __lhs_len,
+		 typename _Str::value_type const* __rhs,
+		 typename _Str::size_type __rhs_len,
+		 typename _Str::allocator_type const& __a)
+    {
+      typedef typename _Str::allocator_type allocator_type;
+      typedef __gnu_cxx::__alloc_traits<allocator_type> _Alloc_traits;
+      _Str __str(_Alloc_traits::_S_select_on_copy(__a));
+      __str.reserve(__lhs_len + __rhs_len);
+      __str.append(__lhs, __lhs_len);
+      __str.append(__rhs, __rhs_len);
+      return __str;
+    }
+
   // operator+
   /**
    *  @brief  Concatenate two strings.
@@ -3490,13 +3508,14 @@  _GLIBCXX_END_NAMESPACE_CXX11
    */
   template<typename _CharT, typename _Traits, typename _Alloc>
     _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR
-    basic_string<_CharT, _Traits, _Alloc>
+    inline basic_string<_CharT, _Traits, _Alloc>
     operator+(const basic_string<_CharT, _Traits, _Alloc>& __lhs,
 	      const basic_string<_CharT, _Traits, _Alloc>& __rhs)
     {
-      basic_string<_CharT, _Traits, _Alloc> __str(__lhs);
-      __str.append(__rhs);
-      return __str;
+      typedef basic_string<_CharT, _Traits, _Alloc> _Str;
+      return std::__str_concat<_Str>(__lhs.c_str(), __lhs.size(),
+				     __rhs.c_str(), __rhs.size(),
+				     __lhs.get_allocator());
     }
 
   /**
@@ -3507,9 +3526,16 @@  _GLIBCXX_END_NAMESPACE_CXX11
    */
   template<typename _CharT, typename _Traits, typename _Alloc>
     _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR
-    basic_string<_CharT,_Traits,_Alloc>
+    inline basic_string<_CharT,_Traits,_Alloc>
     operator+(const _CharT* __lhs,
-	      const basic_string<_CharT,_Traits,_Alloc>& __rhs);
+	      const basic_string<_CharT,_Traits,_Alloc>& __rhs)
+    {
+      __glibcxx_requires_string(__lhs);
+      typedef basic_string<_CharT, _Traits, _Alloc> _Str;
+      return std::__str_concat<_Str>(__lhs, _Traits::length(__lhs),
+				     __rhs.c_str(), __rhs.size(),
+				     __rhs.get_allocator());
+    }
 
   /**
    *  @brief  Concatenate character and string.
@@ -3519,8 +3545,14 @@  _GLIBCXX_END_NAMESPACE_CXX11
    */
   template<typename _CharT, typename _Traits, typename _Alloc>
     _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR
-    basic_string<_CharT,_Traits,_Alloc>
-    operator+(_CharT __lhs, const basic_string<_CharT,_Traits,_Alloc>& __rhs);
+    inline basic_string<_CharT,_Traits,_Alloc>
+    operator+(_CharT __lhs, const basic_string<_CharT,_Traits,_Alloc>& __rhs)
+    {
+      typedef basic_string<_CharT, _Traits, _Alloc> _Str;
+      return std::__str_concat<_Str>(__builtin_addressof(__lhs), 1,
+				     __rhs.c_str(), __rhs.size(),
+				     __rhs.get_allocator());
+    }
 
   /**
    *  @brief  Concatenate string and C string.
@@ -3534,11 +3566,12 @@  _GLIBCXX_END_NAMESPACE_CXX11
     operator+(const basic_string<_CharT, _Traits, _Alloc>& __lhs,
 	      const _CharT* __rhs)
     {
-      basic_string<_CharT, _Traits, _Alloc> __str(__lhs);
-      __str.append(__rhs);
-      return __str;
+      __glibcxx_requires_string(__rhs);
+      typedef basic_string<_CharT, _Traits, _Alloc> _Str;
+      return std::__str_concat<_Str>(__lhs.c_str(), __lhs.size(),
+				     __rhs, _Traits::length(__rhs),
+				     __lhs.get_allocator());
     }
-
   /**
    *  @brief  Concatenate string and character.
    *  @param __lhs  First string.
@@ -3550,11 +3583,10 @@  _GLIBCXX_END_NAMESPACE_CXX11
     inline basic_string<_CharT, _Traits, _Alloc>
     operator+(const basic_string<_CharT, _Traits, _Alloc>& __lhs, _CharT __rhs)
     {
-      typedef basic_string<_CharT, _Traits, _Alloc>	__string_type;
-      typedef typename __string_type::size_type		__size_type;
-      __string_type __str(__lhs);
-      __str.append(__size_type(1), __rhs);
-      return __str;
+      typedef basic_string<_CharT, _Traits, _Alloc> _Str;
+      return std::__str_concat<_Str>(__lhs.c_str(), __lhs.size(),
+				     __builtin_addressof(__rhs), 1,
+				     __lhs.get_allocator());
     }
 
 #if __cplusplus >= 201103L
diff --git a/libstdc++-v3/include/bits/basic_string.tcc b/libstdc++-v3/include/bits/basic_string.tcc
index 4563c61429a..07a94d36757 100644
--- a/libstdc++-v3/include/bits/basic_string.tcc
+++ b/libstdc++-v3/include/bits/basic_string.tcc
@@ -599,47 +599,6 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #else
 # define _GLIBCXX_STRING_CONSTEXPR
 #endif
-
-  template<typename _CharT, typename _Traits, typename _Alloc>
-    _GLIBCXX20_CONSTEXPR
-    basic_string<_CharT, _Traits, _Alloc>
-    operator+(const _CharT* __lhs,
-	      const basic_string<_CharT, _Traits, _Alloc>& __rhs)
-    {
-      __glibcxx_requires_string(__lhs);
-      typedef basic_string<_CharT, _Traits, _Alloc> __string_type;
-      typedef typename __string_type::size_type	  __size_type;
-      typedef typename __gnu_cxx::__alloc_traits<_Alloc>::template
-	rebind<_CharT>::other _Char_alloc_type;
-      typedef __gnu_cxx::__alloc_traits<_Char_alloc_type> _Alloc_traits;
-      const __size_type __len = _Traits::length(__lhs);
-      __string_type __str(_Alloc_traits::_S_select_on_copy(
-          __rhs.get_allocator()));
-      __str.reserve(__len + __rhs.size());
-      __str.append(__lhs, __len);
-      __str.append(__rhs);
-      return __str;
-    }
-
-  template<typename _CharT, typename _Traits, typename _Alloc>
-    _GLIBCXX20_CONSTEXPR
-    basic_string<_CharT, _Traits, _Alloc>
-    operator+(_CharT __lhs, const basic_string<_CharT, _Traits, _Alloc>& __rhs)
-    {
-      typedef basic_string<_CharT, _Traits, _Alloc> __string_type;
-      typedef typename __string_type::size_type	  __size_type;
-      typedef typename __gnu_cxx::__alloc_traits<_Alloc>::template
-	rebind<_CharT>::other _Char_alloc_type;
-      typedef __gnu_cxx::__alloc_traits<_Char_alloc_type> _Alloc_traits;
-      __string_type __str(_Alloc_traits::_S_select_on_copy(
-          __rhs.get_allocator()));
-      const __size_type __len = __rhs.size();
-      __str.reserve(__len + 1);
-      __str.append(__size_type(1), __lhs);
-      __str.append(__rhs);
-      return __str;
-    }
-
   template<typename _CharT, typename _Traits, typename _Alloc>
     _GLIBCXX_STRING_CONSTEXPR
     typename basic_string<_CharT, _Traits, _Alloc>::size_type