readelf: support zstd compressed debug sections [PR 29640]

Message ID 20221001062057.681440-1-maskray@google.com
State Accepted, archived
Headers
Series readelf: support zstd compressed debug sections [PR 29640] |

Checks

Context Check Description
snail/binutils-gdb-check success Github commit url

Commit Message

Fangrui Song Oct. 1, 2022, 6:20 a.m. UTC
  ---
 binutils/Makefile.am |   6 +--
 binutils/Makefile.in |   6 +--
 binutils/readelf.c   | 112 +++++++++++++++++++++++++++----------------
 3 files changed, 78 insertions(+), 46 deletions(-)
  

Comments

Fangrui Song Oct. 6, 2022, 2:20 a.m. UTC | #1
On 2022-09-30, Fangrui Song wrote:
>---
> binutils/Makefile.am |   6 +--
> binutils/Makefile.in |   6 +--
> binutils/readelf.c   | 112 +++++++++++++++++++++++++++----------------
> 3 files changed, 78 insertions(+), 46 deletions(-)

ping:)
  
Martin Liška Oct. 13, 2022, 11:38 a.m. UTC | #2
On 10/1/22 08:20, Fangrui Song wrote:
> ---
>  binutils/Makefile.am |   6 +--
>  binutils/Makefile.in |   6 +--
>  binutils/readelf.c   | 112 +++++++++++++++++++++++++++----------------
>  3 files changed, 78 insertions(+), 46 deletions(-)
> 
> diff --git a/binutils/Makefile.am b/binutils/Makefile.am
> index 751fbacce12..b249af721ae 100644
> --- a/binutils/Makefile.am
> +++ b/binutils/Makefile.am
> @@ -54,8 +54,8 @@ DEBUGINFOD_LIBS = @DEBUGINFOD_LIBS@
>  WARN_CFLAGS = @WARN_CFLAGS@
>  WARN_CFLAGS_FOR_BUILD = @WARN_CFLAGS_FOR_BUILD@
>  NO_WERROR = @NO_WERROR@
> -AM_CFLAGS = $(WARN_CFLAGS) $(ZLIBINC)
> -AM_CFLAGS_FOR_BUILD = $(WARN_CFLAGS_FOR_BUILD) $(ZLIBINC)
> +AM_CFLAGS = $(WARN_CFLAGS) $(ZLIBINC) $(ZSTD_CFLAGS)
> +AM_CFLAGS_FOR_BUILD = $(WARN_CFLAGS_FOR_BUILD) $(ZLIBINC) $(ZSTD_CFLAGS)
>  LIBICONV = @LIBICONV@
>  
>  # these two are almost the same program
> @@ -256,7 +256,7 @@ objcopy_SOURCES = objcopy.c not-strip.c rename.c $(WRITE_DEBUG_SRCS) $(BULIBS)
>  strings_SOURCES = strings.c $(BULIBS)
>  
>  readelf_SOURCES = readelf.c version.c unwind-ia64.c dwarf.c demanguse.c $(ELFLIBS)
> -readelf_LDADD   = $(LIBCTF_NOBFD) $(LIBINTL) $(LIBIBERTY) $(ZLIB) $(DEBUGINFOD_LIBS) $(MSGPACK_LIBS)
> +readelf_LDADD   = $(LIBCTF_NOBFD) $(LIBINTL) $(LIBIBERTY) $(ZLIB) $(ZSTD_LIBS) $(DEBUGINFOD_LIBS) $(MSGPACK_LIBS)
>  
>  elfedit_SOURCES = elfedit.c version.c $(ELFLIBS)
>  elfedit_LDADD = $(LIBINTL) $(LIBIBERTY)
> diff --git a/binutils/Makefile.in b/binutils/Makefile.in
> index 6de4e239408..7d9039e0f2f 100644
> --- a/binutils/Makefile.in
> +++ b/binutils/Makefile.in
> @@ -651,8 +651,8 @@ am__skipyacc =
>  # case both are empty.
>  ZLIB = @zlibdir@ -lz
>  ZLIBINC = @zlibinc@
> -AM_CFLAGS = $(WARN_CFLAGS) $(ZLIBINC)
> -AM_CFLAGS_FOR_BUILD = $(WARN_CFLAGS_FOR_BUILD) $(ZLIBINC)
> +AM_CFLAGS = $(WARN_CFLAGS) $(ZLIBINC) $(ZSTD_CFLAGS)
> +AM_CFLAGS_FOR_BUILD = $(WARN_CFLAGS_FOR_BUILD) $(ZLIBINC) $(ZSTD_CFLAGS)
>  
>  # these two are almost the same program
>  AR_PROG = ar
> @@ -790,7 +790,7 @@ size_SOURCES = size.c $(BULIBS)
>  objcopy_SOURCES = objcopy.c not-strip.c rename.c $(WRITE_DEBUG_SRCS) $(BULIBS)
>  strings_SOURCES = strings.c $(BULIBS)
>  readelf_SOURCES = readelf.c version.c unwind-ia64.c dwarf.c demanguse.c $(ELFLIBS)
> -readelf_LDADD = $(LIBCTF_NOBFD) $(LIBINTL) $(LIBIBERTY) $(ZLIB) $(DEBUGINFOD_LIBS) $(MSGPACK_LIBS)
> +readelf_LDADD = $(LIBCTF_NOBFD) $(LIBINTL) $(LIBIBERTY) $(ZLIB) $(ZSTD_LIBS) $(DEBUGINFOD_LIBS) $(MSGPACK_LIBS)
>  elfedit_SOURCES = elfedit.c version.c $(ELFLIBS)
>  elfedit_LDADD = $(LIBINTL) $(LIBIBERTY)
>  strip_new_SOURCES = objcopy.c is-strip.c rename.c $(WRITE_DEBUG_SRCS) $(BULIBS)
> diff --git a/binutils/readelf.c b/binutils/readelf.c
> index 351571c8abb..04cda213f33 100644
> --- a/binutils/readelf.c
> +++ b/binutils/readelf.c
> @@ -44,6 +44,9 @@
>  #include <assert.h>
>  #include <time.h>
>  #include <zlib.h>
> +#ifdef HAVE_ZSTD
> +#include <zstd.h>
> +#endif
>  #include <wchar.h>
>  
>  #if defined HAVE_MSGPACK
> @@ -15170,48 +15173,56 @@ get_section_contents (Elf_Internal_Shdr * section, Filedata * filedata)
>                               _("section contents"));
>  }
>  
> -/* Uncompresses a section that was compressed using zlib, in place.  */
> +/* Uncompresses a section that was compressed using zlib/zstd, in place.  */
>  
>  static bool
> -uncompress_section_contents (unsigned char **buffer,
> -			     uint64_t uncompressed_size,
> -			     uint64_t *size)
> +uncompress_section_contents (bool is_zstd, unsigned char **buffer,
> +			     uint64_t uncompressed_size, uint64_t *size)
>  {
>    uint64_t compressed_size = *size;
>    unsigned char *compressed_buffer = *buffer;
> -  unsigned char *uncompressed_buffer;
> +  unsigned char *uncompressed_buffer = xmalloc (uncompressed_size);
>    z_stream strm;
>    int rc;
>  
> -  /* It is possible the section consists of several compressed
> -     buffers concatenated together, so we uncompress in a loop.  */
> -  /* PR 18313: The state field in the z_stream structure is supposed
> -     to be invisible to the user (ie us), but some compilers will
> -     still complain about it being used without initialisation.  So
> -     we first zero the entire z_stream structure and then set the fields
> -     that we need.  */
> -  memset (& strm, 0, sizeof strm);
> -  strm.avail_in = compressed_size;
> -  strm.next_in = (Bytef *) compressed_buffer;
> -  strm.avail_out = uncompressed_size;
> -  uncompressed_buffer = (unsigned char *) xmalloc (uncompressed_size);
> +  if (is_zstd)
> +    {
> +#ifdef HAVE_ZSTD
> +      size_t ret = ZSTD_decompress (uncompressed_buffer, uncompressed_size,
> +				    compressed_buffer, compressed_size);
> +      if (ZSTD_isError (ret))
> +	goto fail;
> +#endif
> +    }
> +  else
> +    {
> +      /* It is possible the section consists of several compressed
> +	 buffers concatenated together, so we uncompress in a loop.  */
> +      /* PR 18313: The state field in the z_stream structure is supposed
> +	 to be invisible to the user (ie us), but some compilers will
> +	 still complain about it being used without initialisation.  So
> +	 we first zero the entire z_stream structure and then set the fields
> +	 that we need.  */
> +      memset (&strm, 0, sizeof strm);
> +      strm.avail_in = compressed_size;
> +      strm.next_in = (Bytef *)compressed_buffer;
> +      strm.avail_out = uncompressed_size;
>  
> -  rc = inflateInit (& strm);
> -  while (strm.avail_in > 0)
> -    {
> -      if (rc != Z_OK)
> -        break;
> -      strm.next_out = ((Bytef *) uncompressed_buffer
> -                       + (uncompressed_size - strm.avail_out));
> -      rc = inflate (&strm, Z_FINISH);
> -      if (rc != Z_STREAM_END)
> -        break;
> -      rc = inflateReset (& strm);
> +      rc = inflateInit (&strm);
> +      while (strm.avail_in > 0)
> +	{
> +	  if (rc != Z_OK)
> +	    break;
> +	  strm.next_out = ((Bytef *)uncompressed_buffer
> +			   + (uncompressed_size - strm.avail_out));
> +	  rc = inflate (&strm, Z_FINISH);
> +	  if (rc != Z_STREAM_END)
> +	    break;
> +	  rc = inflateReset (&strm);
> +	}
> +      if (inflateEnd (&strm) != Z_OK || rc != Z_OK || strm.avail_out != 0)
> +	goto fail;
>      }
> -  if (inflateEnd (& strm) != Z_OK
> -      || rc != Z_OK
> -      || strm.avail_out != 0)
> -    goto fail;
>  
>    *buffer = uncompressed_buffer;
>    *size = uncompressed_size;
> @@ -15254,6 +15265,7 @@ dump_section_as_strings (Elf_Internal_Shdr * section, Filedata * filedata)
>      {
>        uint64_t new_size = num_bytes;
>        uint64_t uncompressed_size = 0;
> +      bool is_zstd = false;
>  
>        if ((section->sh_flags & SHF_COMPRESSED) != 0)
>  	{
> @@ -15266,7 +15278,13 @@ dump_section_as_strings (Elf_Internal_Shdr * section, Filedata * filedata)
>  	       by get_compression_header.  */
>  	    goto error_out;
>  
> -	  if (chdr.ch_type != ELFCOMPRESS_ZLIB)
> +	  if (chdr.ch_type == ELFCOMPRESS_ZLIB)
> +	    ;
> +#ifdef HAVE_ZSTD
> +	  else if (chdr.ch_type == ELFCOMPRESS_ZSTD)
> +	    is_zstd = true;
> +#endif
> +	  else
>  	    {
>  	      warn (_("section '%s' has unsupported compress type: %d\n"),
>  		    printable_section_name (filedata, section), chdr.ch_type);
> @@ -15295,8 +15313,8 @@ dump_section_as_strings (Elf_Internal_Shdr * section, Filedata * filedata)
>  
>        if (uncompressed_size)
>  	{
> -	  if (uncompress_section_contents (& start,
> -					   uncompressed_size, & new_size))
> +	  if (uncompress_section_contents (is_zstd, &start, uncompressed_size,
> +					   &new_size))
>  	    num_bytes = new_size;
>  	  else
>  	    {
> @@ -15470,6 +15488,7 @@ dump_section_as_bytes (Elf_Internal_Shdr *section,
>      {
>        uint64_t new_size = section_size;
>        uint64_t uncompressed_size = 0;
> +      bool is_zstd = false;
>  
>        if ((section->sh_flags & SHF_COMPRESSED) != 0)
>  	{
> @@ -15482,7 +15501,13 @@ dump_section_as_bytes (Elf_Internal_Shdr *section,
>  	       by get_compression_header.  */
>  	    goto error_out;
>  
> -	  if (chdr.ch_type != ELFCOMPRESS_ZLIB)
> +	  if (chdr.ch_type == ELFCOMPRESS_ZLIB)
> +	    ;
> +#ifdef HAVE_ZSTD
> +	  else if (chdr.ch_type == ELFCOMPRESS_ZSTD)
> +	    is_zstd = true;
> +#endif
> +	  else
>  	    {
>  	      warn (_("section '%s' has unsupported compress type: %d\n"),
>  		    printable_section_name (filedata, section), chdr.ch_type);
> @@ -15511,8 +15536,8 @@ dump_section_as_bytes (Elf_Internal_Shdr *section,
>  
>        if (uncompressed_size)
>  	{
> -	  if (uncompress_section_contents (& start, uncompressed_size,
> -					   & new_size))
> +	  if (uncompress_section_contents (is_zstd, &start, uncompressed_size,
> +					   &new_size))
>  	    {
>  	      section_size = new_size;
>  	    }
> @@ -15848,6 +15873,7 @@ load_specific_debug_section (enum dwarf_section_display_enum  debug,
>        unsigned char *start = section->start;
>        uint64_t size = sec->sh_size;
>        uint64_t uncompressed_size = 0;
> +      bool is_zstd = false;
>  
>        if ((sec->sh_flags & SHF_COMPRESSED) != 0)
>  	{
> @@ -15869,7 +15895,13 @@ load_specific_debug_section (enum dwarf_section_display_enum  debug,
>  	       by get_compression_header.  */
>  	    return false;
>  
> -	  if (chdr.ch_type != ELFCOMPRESS_ZLIB)
> +	  if (chdr.ch_type == ELFCOMPRESS_ZLIB)
> +	    ;
> +#ifdef HAVE_ZSTD
> +	  else if (chdr.ch_type == ELFCOMPRESS_ZSTD)
> +	    is_zstd = true;
> +#endif
> +	  else
>  	    {
>  	      warn (_("section '%s' has unsupported compress type: %d\n"),
>  		    section->name, chdr.ch_type);
> @@ -15898,7 +15930,7 @@ load_specific_debug_section (enum dwarf_section_display_enum  debug,
>  
>        if (uncompressed_size)
>  	{
> -	  if (uncompress_section_contents (&start, uncompressed_size,
> +	  if (uncompress_section_contents (is_zstd, &start, uncompressed_size,
>  					   &size))
>  	    {
>  	      /* Free the compressed buffer, update the section buffer

Hi.

I noticed the following issue (dunno if caused by objcopy or the patches readelf) with this patch:

$ echo 'int main() {return 0;}' | gcc -O2 -g -c -o a.o -x c -
$ /home/marxin/Programming/binutils/objdir/ld/../binutils/objcopy -O elf32-x86-64 a.o a-x32.o --compress-debug-sections=zstd
$ /home/marxin/Programming/binutils/objdir/binutils/readelf -wi a-x32.o >/dev/null
readelf: Error: Unable to decompress section .debug_info
readelf: Error: No comp units in .debug_info section ?

while zlib is fine:

$ /home/marxin/Programming/binutils/objdir/ld/../binutils/objcopy -O elf32-x86-64 a.o a-x32.o --compress-debug-sections=zlib
$ /home/marxin/Programming/binutils/objdir/binutils/readelf -wi a-x32.o >/dev/null

Can you please take a look?

Cheers,
Martin
  
Fangrui Song Oct. 14, 2022, 3:35 a.m. UTC | #3
On 2022-10-13, Martin Liška wrote:
>On 10/1/22 08:20, Fangrui Song wrote:
>> ---
>>  binutils/Makefile.am |   6 +--
>>  binutils/Makefile.in |   6 +--
>>  binutils/readelf.c   | 112 +++++++++++++++++++++++++++----------------
>>  3 files changed, 78 insertions(+), 46 deletions(-)
>>
>> diff --git a/binutils/Makefile.am b/binutils/Makefile.am
>> index 751fbacce12..b249af721ae 100644
>> --- a/binutils/Makefile.am
>> +++ b/binutils/Makefile.am
>> @@ -54,8 +54,8 @@ DEBUGINFOD_LIBS = @DEBUGINFOD_LIBS@
>>  WARN_CFLAGS = @WARN_CFLAGS@
>>  WARN_CFLAGS_FOR_BUILD = @WARN_CFLAGS_FOR_BUILD@
>>  NO_WERROR = @NO_WERROR@
>> -AM_CFLAGS = $(WARN_CFLAGS) $(ZLIBINC)
>> -AM_CFLAGS_FOR_BUILD = $(WARN_CFLAGS_FOR_BUILD) $(ZLIBINC)
>> +AM_CFLAGS = $(WARN_CFLAGS) $(ZLIBINC) $(ZSTD_CFLAGS)
>> +AM_CFLAGS_FOR_BUILD = $(WARN_CFLAGS_FOR_BUILD) $(ZLIBINC) $(ZSTD_CFLAGS)
>>  LIBICONV = @LIBICONV@
>>
>>  # these two are almost the same program
>> @@ -256,7 +256,7 @@ objcopy_SOURCES = objcopy.c not-strip.c rename.c $(WRITE_DEBUG_SRCS) $(BULIBS)
>>  strings_SOURCES = strings.c $(BULIBS)
>>
>>  readelf_SOURCES = readelf.c version.c unwind-ia64.c dwarf.c demanguse.c $(ELFLIBS)
>> -readelf_LDADD   = $(LIBCTF_NOBFD) $(LIBINTL) $(LIBIBERTY) $(ZLIB) $(DEBUGINFOD_LIBS) $(MSGPACK_LIBS)
>> +readelf_LDADD   = $(LIBCTF_NOBFD) $(LIBINTL) $(LIBIBERTY) $(ZLIB) $(ZSTD_LIBS) $(DEBUGINFOD_LIBS) $(MSGPACK_LIBS)
>>
>>  elfedit_SOURCES = elfedit.c version.c $(ELFLIBS)
>>  elfedit_LDADD = $(LIBINTL) $(LIBIBERTY)
>> diff --git a/binutils/Makefile.in b/binutils/Makefile.in
>> index 6de4e239408..7d9039e0f2f 100644
>> --- a/binutils/Makefile.in
>> +++ b/binutils/Makefile.in
>> @@ -651,8 +651,8 @@ am__skipyacc =
>>  # case both are empty.
>>  ZLIB = @zlibdir@ -lz
>>  ZLIBINC = @zlibinc@
>> -AM_CFLAGS = $(WARN_CFLAGS) $(ZLIBINC)
>> -AM_CFLAGS_FOR_BUILD = $(WARN_CFLAGS_FOR_BUILD) $(ZLIBINC)
>> +AM_CFLAGS = $(WARN_CFLAGS) $(ZLIBINC) $(ZSTD_CFLAGS)
>> +AM_CFLAGS_FOR_BUILD = $(WARN_CFLAGS_FOR_BUILD) $(ZLIBINC) $(ZSTD_CFLAGS)
>>
>>  # these two are almost the same program
>>  AR_PROG = ar
>> @@ -790,7 +790,7 @@ size_SOURCES = size.c $(BULIBS)
>>  objcopy_SOURCES = objcopy.c not-strip.c rename.c $(WRITE_DEBUG_SRCS) $(BULIBS)
>>  strings_SOURCES = strings.c $(BULIBS)
>>  readelf_SOURCES = readelf.c version.c unwind-ia64.c dwarf.c demanguse.c $(ELFLIBS)
>> -readelf_LDADD = $(LIBCTF_NOBFD) $(LIBINTL) $(LIBIBERTY) $(ZLIB) $(DEBUGINFOD_LIBS) $(MSGPACK_LIBS)
>> +readelf_LDADD = $(LIBCTF_NOBFD) $(LIBINTL) $(LIBIBERTY) $(ZLIB) $(ZSTD_LIBS) $(DEBUGINFOD_LIBS) $(MSGPACK_LIBS)
>>  elfedit_SOURCES = elfedit.c version.c $(ELFLIBS)
>>  elfedit_LDADD = $(LIBINTL) $(LIBIBERTY)
>>  strip_new_SOURCES = objcopy.c is-strip.c rename.c $(WRITE_DEBUG_SRCS) $(BULIBS)
>> diff --git a/binutils/readelf.c b/binutils/readelf.c
>> index 351571c8abb..04cda213f33 100644
>> --- a/binutils/readelf.c
>> +++ b/binutils/readelf.c
>> @@ -44,6 +44,9 @@
>>  #include <assert.h>
>>  #include <time.h>
>>  #include <zlib.h>
>> +#ifdef HAVE_ZSTD
>> +#include <zstd.h>
>> +#endif
>>  #include <wchar.h>
>>
>>  #if defined HAVE_MSGPACK
>> @@ -15170,48 +15173,56 @@ get_section_contents (Elf_Internal_Shdr * section, Filedata * filedata)
>>                               _("section contents"));
>>  }
>>
>> -/* Uncompresses a section that was compressed using zlib, in place.  */
>> +/* Uncompresses a section that was compressed using zlib/zstd, in place.  */
>>
>>  static bool
>> -uncompress_section_contents (unsigned char **buffer,
>> -			     uint64_t uncompressed_size,
>> -			     uint64_t *size)
>> +uncompress_section_contents (bool is_zstd, unsigned char **buffer,
>> +			     uint64_t uncompressed_size, uint64_t *size)
>>  {
>>    uint64_t compressed_size = *size;
>>    unsigned char *compressed_buffer = *buffer;
>> -  unsigned char *uncompressed_buffer;
>> +  unsigned char *uncompressed_buffer = xmalloc (uncompressed_size);
>>    z_stream strm;
>>    int rc;
>>
>> -  /* It is possible the section consists of several compressed
>> -     buffers concatenated together, so we uncompress in a loop.  */
>> -  /* PR 18313: The state field in the z_stream structure is supposed
>> -     to be invisible to the user (ie us), but some compilers will
>> -     still complain about it being used without initialisation.  So
>> -     we first zero the entire z_stream structure and then set the fields
>> -     that we need.  */
>> -  memset (& strm, 0, sizeof strm);
>> -  strm.avail_in = compressed_size;
>> -  strm.next_in = (Bytef *) compressed_buffer;
>> -  strm.avail_out = uncompressed_size;
>> -  uncompressed_buffer = (unsigned char *) xmalloc (uncompressed_size);
>> +  if (is_zstd)
>> +    {
>> +#ifdef HAVE_ZSTD
>> +      size_t ret = ZSTD_decompress (uncompressed_buffer, uncompressed_size,
>> +				    compressed_buffer, compressed_size);
>> +      if (ZSTD_isError (ret))
>> +	goto fail;
>> +#endif
>> +    }
>> +  else
>> +    {
>> +      /* It is possible the section consists of several compressed
>> +	 buffers concatenated together, so we uncompress in a loop.  */
>> +      /* PR 18313: The state field in the z_stream structure is supposed
>> +	 to be invisible to the user (ie us), but some compilers will
>> +	 still complain about it being used without initialisation.  So
>> +	 we first zero the entire z_stream structure and then set the fields
>> +	 that we need.  */
>> +      memset (&strm, 0, sizeof strm);
>> +      strm.avail_in = compressed_size;
>> +      strm.next_in = (Bytef *)compressed_buffer;
>> +      strm.avail_out = uncompressed_size;
>>
>> -  rc = inflateInit (& strm);
>> -  while (strm.avail_in > 0)
>> -    {
>> -      if (rc != Z_OK)
>> -        break;
>> -      strm.next_out = ((Bytef *) uncompressed_buffer
>> -                       + (uncompressed_size - strm.avail_out));
>> -      rc = inflate (&strm, Z_FINISH);
>> -      if (rc != Z_STREAM_END)
>> -        break;
>> -      rc = inflateReset (& strm);
>> +      rc = inflateInit (&strm);
>> +      while (strm.avail_in > 0)
>> +	{
>> +	  if (rc != Z_OK)
>> +	    break;
>> +	  strm.next_out = ((Bytef *)uncompressed_buffer
>> +			   + (uncompressed_size - strm.avail_out));
>> +	  rc = inflate (&strm, Z_FINISH);
>> +	  if (rc != Z_STREAM_END)
>> +	    break;
>> +	  rc = inflateReset (&strm);
>> +	}
>> +      if (inflateEnd (&strm) != Z_OK || rc != Z_OK || strm.avail_out != 0)
>> +	goto fail;
>>      }
>> -  if (inflateEnd (& strm) != Z_OK
>> -      || rc != Z_OK
>> -      || strm.avail_out != 0)
>> -    goto fail;
>>
>>    *buffer = uncompressed_buffer;
>>    *size = uncompressed_size;
>> @@ -15254,6 +15265,7 @@ dump_section_as_strings (Elf_Internal_Shdr * section, Filedata * filedata)
>>      {
>>        uint64_t new_size = num_bytes;
>>        uint64_t uncompressed_size = 0;
>> +      bool is_zstd = false;
>>
>>        if ((section->sh_flags & SHF_COMPRESSED) != 0)
>>  	{
>> @@ -15266,7 +15278,13 @@ dump_section_as_strings (Elf_Internal_Shdr * section, Filedata * filedata)
>>  	       by get_compression_header.  */
>>  	    goto error_out;
>>
>> -	  if (chdr.ch_type != ELFCOMPRESS_ZLIB)
>> +	  if (chdr.ch_type == ELFCOMPRESS_ZLIB)
>> +	    ;
>> +#ifdef HAVE_ZSTD
>> +	  else if (chdr.ch_type == ELFCOMPRESS_ZSTD)
>> +	    is_zstd = true;
>> +#endif
>> +	  else
>>  	    {
>>  	      warn (_("section '%s' has unsupported compress type: %d\n"),
>>  		    printable_section_name (filedata, section), chdr.ch_type);
>> @@ -15295,8 +15313,8 @@ dump_section_as_strings (Elf_Internal_Shdr * section, Filedata * filedata)
>>
>>        if (uncompressed_size)
>>  	{
>> -	  if (uncompress_section_contents (& start,
>> -					   uncompressed_size, & new_size))
>> +	  if (uncompress_section_contents (is_zstd, &start, uncompressed_size,
>> +					   &new_size))
>>  	    num_bytes = new_size;
>>  	  else
>>  	    {
>> @@ -15470,6 +15488,7 @@ dump_section_as_bytes (Elf_Internal_Shdr *section,
>>      {
>>        uint64_t new_size = section_size;
>>        uint64_t uncompressed_size = 0;
>> +      bool is_zstd = false;
>>
>>        if ((section->sh_flags & SHF_COMPRESSED) != 0)
>>  	{
>> @@ -15482,7 +15501,13 @@ dump_section_as_bytes (Elf_Internal_Shdr *section,
>>  	       by get_compression_header.  */
>>  	    goto error_out;
>>
>> -	  if (chdr.ch_type != ELFCOMPRESS_ZLIB)
>> +	  if (chdr.ch_type == ELFCOMPRESS_ZLIB)
>> +	    ;
>> +#ifdef HAVE_ZSTD
>> +	  else if (chdr.ch_type == ELFCOMPRESS_ZSTD)
>> +	    is_zstd = true;
>> +#endif
>> +	  else
>>  	    {
>>  	      warn (_("section '%s' has unsupported compress type: %d\n"),
>>  		    printable_section_name (filedata, section), chdr.ch_type);
>> @@ -15511,8 +15536,8 @@ dump_section_as_bytes (Elf_Internal_Shdr *section,
>>
>>        if (uncompressed_size)
>>  	{
>> -	  if (uncompress_section_contents (& start, uncompressed_size,
>> -					   & new_size))
>> +	  if (uncompress_section_contents (is_zstd, &start, uncompressed_size,
>> +					   &new_size))
>>  	    {
>>  	      section_size = new_size;
>>  	    }
>> @@ -15848,6 +15873,7 @@ load_specific_debug_section (enum dwarf_section_display_enum  debug,
>>        unsigned char *start = section->start;
>>        uint64_t size = sec->sh_size;
>>        uint64_t uncompressed_size = 0;
>> +      bool is_zstd = false;
>>
>>        if ((sec->sh_flags & SHF_COMPRESSED) != 0)
>>  	{
>> @@ -15869,7 +15895,13 @@ load_specific_debug_section (enum dwarf_section_display_enum  debug,
>>  	       by get_compression_header.  */
>>  	    return false;
>>
>> -	  if (chdr.ch_type != ELFCOMPRESS_ZLIB)
>> +	  if (chdr.ch_type == ELFCOMPRESS_ZLIB)
>> +	    ;
>> +#ifdef HAVE_ZSTD
>> +	  else if (chdr.ch_type == ELFCOMPRESS_ZSTD)
>> +	    is_zstd = true;
>> +#endif
>> +	  else
>>  	    {
>>  	      warn (_("section '%s' has unsupported compress type: %d\n"),
>>  		    section->name, chdr.ch_type);
>> @@ -15898,7 +15930,7 @@ load_specific_debug_section (enum dwarf_section_display_enum  debug,
>>
>>        if (uncompressed_size)
>>  	{
>> -	  if (uncompress_section_contents (&start, uncompressed_size,
>> +	  if (uncompress_section_contents (is_zstd, &start, uncompressed_size,
>>  					   &size))
>>  	    {
>>  	      /* Free the compressed buffer, update the section buffer
>
>Hi.
>
>I noticed the following issue (dunno if caused by objcopy or the patches readelf) with this patch:
>
>$ echo 'int main() {return 0;}' | gcc -O2 -g -c -o a.o -x c -
>$ /home/marxin/Programming/binutils/objdir/ld/../binutils/objcopy -O elf32-x86-64 a.o a-x32.o --compress-debug-sections=zstd
>$ /home/marxin/Programming/binutils/objdir/binutils/readelf -wi a-x32.o >/dev/null
>readelf: Error: Unable to decompress section .debug_info
>readelf: Error: No comp units in .debug_info section ?
>
>while zlib is fine:
>
>$ /home/marxin/Programming/binutils/objdir/ld/../binutils/objcopy -O elf32-x86-64 a.o a-x32.o --compress-debug-sections=zlib
>$ /home/marxin/Programming/binutils/objdir/binutils/readelf -wi a-x32.o >/dev/null
>
>Can you please take a look?

objcopy -O elf32-x86-64 a.o a-x32.o --compress-debug-sections=zstd uses
ch_type=1 (ELFCOMPRESS_ZLIB), but I don't know where goes wrong.

Converting EI_CLASS looks very hacky to me.
  
Martin Liška Oct. 14, 2022, 7:47 a.m. UTC | #4
On 10/14/22 05:35, Fangrui Song wrote:
> On 2022-10-13, Martin Liška wrote:
>> On 10/1/22 08:20, Fangrui Song wrote:
>>> ---
>>>  binutils/Makefile.am |   6 +--
>>>  binutils/Makefile.in |   6 +--
>>>  binutils/readelf.c   | 112 +++++++++++++++++++++++++++----------------
>>>  3 files changed, 78 insertions(+), 46 deletions(-)
>>>
>>> diff --git a/binutils/Makefile.am b/binutils/Makefile.am
>>> index 751fbacce12..b249af721ae 100644
>>> --- a/binutils/Makefile.am
>>> +++ b/binutils/Makefile.am
>>> @@ -54,8 +54,8 @@ DEBUGINFOD_LIBS = @DEBUGINFOD_LIBS@
>>>  WARN_CFLAGS = @WARN_CFLAGS@
>>>  WARN_CFLAGS_FOR_BUILD = @WARN_CFLAGS_FOR_BUILD@
>>>  NO_WERROR = @NO_WERROR@
>>> -AM_CFLAGS = $(WARN_CFLAGS) $(ZLIBINC)
>>> -AM_CFLAGS_FOR_BUILD = $(WARN_CFLAGS_FOR_BUILD) $(ZLIBINC)
>>> +AM_CFLAGS = $(WARN_CFLAGS) $(ZLIBINC) $(ZSTD_CFLAGS)
>>> +AM_CFLAGS_FOR_BUILD = $(WARN_CFLAGS_FOR_BUILD) $(ZLIBINC) $(ZSTD_CFLAGS)
>>>  LIBICONV = @LIBICONV@
>>>
>>>  # these two are almost the same program
>>> @@ -256,7 +256,7 @@ objcopy_SOURCES = objcopy.c not-strip.c rename.c $(WRITE_DEBUG_SRCS) $(BULIBS)
>>>  strings_SOURCES = strings.c $(BULIBS)
>>>
>>>  readelf_SOURCES = readelf.c version.c unwind-ia64.c dwarf.c demanguse.c $(ELFLIBS)
>>> -readelf_LDADD   = $(LIBCTF_NOBFD) $(LIBINTL) $(LIBIBERTY) $(ZLIB) $(DEBUGINFOD_LIBS) $(MSGPACK_LIBS)
>>> +readelf_LDADD   = $(LIBCTF_NOBFD) $(LIBINTL) $(LIBIBERTY) $(ZLIB) $(ZSTD_LIBS) $(DEBUGINFOD_LIBS) $(MSGPACK_LIBS)
>>>
>>>  elfedit_SOURCES = elfedit.c version.c $(ELFLIBS)
>>>  elfedit_LDADD = $(LIBINTL) $(LIBIBERTY)
>>> diff --git a/binutils/Makefile.in b/binutils/Makefile.in
>>> index 6de4e239408..7d9039e0f2f 100644
>>> --- a/binutils/Makefile.in
>>> +++ b/binutils/Makefile.in
>>> @@ -651,8 +651,8 @@ am__skipyacc =
>>>  # case both are empty.
>>>  ZLIB = @zlibdir@ -lz
>>>  ZLIBINC = @zlibinc@
>>> -AM_CFLAGS = $(WARN_CFLAGS) $(ZLIBINC)
>>> -AM_CFLAGS_FOR_BUILD = $(WARN_CFLAGS_FOR_BUILD) $(ZLIBINC)
>>> +AM_CFLAGS = $(WARN_CFLAGS) $(ZLIBINC) $(ZSTD_CFLAGS)
>>> +AM_CFLAGS_FOR_BUILD = $(WARN_CFLAGS_FOR_BUILD) $(ZLIBINC) $(ZSTD_CFLAGS)
>>>
>>>  # these two are almost the same program
>>>  AR_PROG = ar
>>> @@ -790,7 +790,7 @@ size_SOURCES = size.c $(BULIBS)
>>>  objcopy_SOURCES = objcopy.c not-strip.c rename.c $(WRITE_DEBUG_SRCS) $(BULIBS)
>>>  strings_SOURCES = strings.c $(BULIBS)
>>>  readelf_SOURCES = readelf.c version.c unwind-ia64.c dwarf.c demanguse.c $(ELFLIBS)
>>> -readelf_LDADD = $(LIBCTF_NOBFD) $(LIBINTL) $(LIBIBERTY) $(ZLIB) $(DEBUGINFOD_LIBS) $(MSGPACK_LIBS)
>>> +readelf_LDADD = $(LIBCTF_NOBFD) $(LIBINTL) $(LIBIBERTY) $(ZLIB) $(ZSTD_LIBS) $(DEBUGINFOD_LIBS) $(MSGPACK_LIBS)
>>>  elfedit_SOURCES = elfedit.c version.c $(ELFLIBS)
>>>  elfedit_LDADD = $(LIBINTL) $(LIBIBERTY)
>>>  strip_new_SOURCES = objcopy.c is-strip.c rename.c $(WRITE_DEBUG_SRCS) $(BULIBS)
>>> diff --git a/binutils/readelf.c b/binutils/readelf.c
>>> index 351571c8abb..04cda213f33 100644
>>> --- a/binutils/readelf.c
>>> +++ b/binutils/readelf.c
>>> @@ -44,6 +44,9 @@
>>>  #include <assert.h>
>>>  #include <time.h>
>>>  #include <zlib.h>
>>> +#ifdef HAVE_ZSTD
>>> +#include <zstd.h>
>>> +#endif
>>>  #include <wchar.h>
>>>
>>>  #if defined HAVE_MSGPACK
>>> @@ -15170,48 +15173,56 @@ get_section_contents (Elf_Internal_Shdr * section, Filedata * filedata)
>>>                               _("section contents"));
>>>  }
>>>
>>> -/* Uncompresses a section that was compressed using zlib, in place.  */
>>> +/* Uncompresses a section that was compressed using zlib/zstd, in place.  */
>>>
>>>  static bool
>>> -uncompress_section_contents (unsigned char **buffer,
>>> -                 uint64_t uncompressed_size,
>>> -                 uint64_t *size)
>>> +uncompress_section_contents (bool is_zstd, unsigned char **buffer,
>>> +                 uint64_t uncompressed_size, uint64_t *size)
>>>  {
>>>    uint64_t compressed_size = *size;
>>>    unsigned char *compressed_buffer = *buffer;
>>> -  unsigned char *uncompressed_buffer;
>>> +  unsigned char *uncompressed_buffer = xmalloc (uncompressed_size);
>>>    z_stream strm;
>>>    int rc;
>>>
>>> -  /* It is possible the section consists of several compressed
>>> -     buffers concatenated together, so we uncompress in a loop.  */
>>> -  /* PR 18313: The state field in the z_stream structure is supposed
>>> -     to be invisible to the user (ie us), but some compilers will
>>> -     still complain about it being used without initialisation.  So
>>> -     we first zero the entire z_stream structure and then set the fields
>>> -     that we need.  */
>>> -  memset (& strm, 0, sizeof strm);
>>> -  strm.avail_in = compressed_size;
>>> -  strm.next_in = (Bytef *) compressed_buffer;
>>> -  strm.avail_out = uncompressed_size;
>>> -  uncompressed_buffer = (unsigned char *) xmalloc (uncompressed_size);
>>> +  if (is_zstd)
>>> +    {
>>> +#ifdef HAVE_ZSTD
>>> +      size_t ret = ZSTD_decompress (uncompressed_buffer, uncompressed_size,
>>> +                    compressed_buffer, compressed_size);
>>> +      if (ZSTD_isError (ret))
>>> +    goto fail;
>>> +#endif
>>> +    }
>>> +  else
>>> +    {
>>> +      /* It is possible the section consists of several compressed
>>> +     buffers concatenated together, so we uncompress in a loop.  */
>>> +      /* PR 18313: The state field in the z_stream structure is supposed
>>> +     to be invisible to the user (ie us), but some compilers will
>>> +     still complain about it being used without initialisation.  So
>>> +     we first zero the entire z_stream structure and then set the fields
>>> +     that we need.  */
>>> +      memset (&strm, 0, sizeof strm);
>>> +      strm.avail_in = compressed_size;
>>> +      strm.next_in = (Bytef *)compressed_buffer;
>>> +      strm.avail_out = uncompressed_size;
>>>
>>> -  rc = inflateInit (& strm);
>>> -  while (strm.avail_in > 0)
>>> -    {
>>> -      if (rc != Z_OK)
>>> -        break;
>>> -      strm.next_out = ((Bytef *) uncompressed_buffer
>>> -                       + (uncompressed_size - strm.avail_out));
>>> -      rc = inflate (&strm, Z_FINISH);
>>> -      if (rc != Z_STREAM_END)
>>> -        break;
>>> -      rc = inflateReset (& strm);
>>> +      rc = inflateInit (&strm);
>>> +      while (strm.avail_in > 0)
>>> +    {
>>> +      if (rc != Z_OK)
>>> +        break;
>>> +      strm.next_out = ((Bytef *)uncompressed_buffer
>>> +               + (uncompressed_size - strm.avail_out));
>>> +      rc = inflate (&strm, Z_FINISH);
>>> +      if (rc != Z_STREAM_END)
>>> +        break;
>>> +      rc = inflateReset (&strm);
>>> +    }
>>> +      if (inflateEnd (&strm) != Z_OK || rc != Z_OK || strm.avail_out != 0)
>>> +    goto fail;
>>>      }
>>> -  if (inflateEnd (& strm) != Z_OK
>>> -      || rc != Z_OK
>>> -      || strm.avail_out != 0)
>>> -    goto fail;
>>>
>>>    *buffer = uncompressed_buffer;
>>>    *size = uncompressed_size;
>>> @@ -15254,6 +15265,7 @@ dump_section_as_strings (Elf_Internal_Shdr * section, Filedata * filedata)
>>>      {
>>>        uint64_t new_size = num_bytes;
>>>        uint64_t uncompressed_size = 0;
>>> +      bool is_zstd = false;
>>>
>>>        if ((section->sh_flags & SHF_COMPRESSED) != 0)
>>>      {
>>> @@ -15266,7 +15278,13 @@ dump_section_as_strings (Elf_Internal_Shdr * section, Filedata * filedata)
>>>             by get_compression_header.  */
>>>          goto error_out;
>>>
>>> -      if (chdr.ch_type != ELFCOMPRESS_ZLIB)
>>> +      if (chdr.ch_type == ELFCOMPRESS_ZLIB)
>>> +        ;
>>> +#ifdef HAVE_ZSTD
>>> +      else if (chdr.ch_type == ELFCOMPRESS_ZSTD)
>>> +        is_zstd = true;
>>> +#endif
>>> +      else
>>>          {
>>>            warn (_("section '%s' has unsupported compress type: %d\n"),
>>>              printable_section_name (filedata, section), chdr.ch_type);
>>> @@ -15295,8 +15313,8 @@ dump_section_as_strings (Elf_Internal_Shdr * section, Filedata * filedata)
>>>
>>>        if (uncompressed_size)
>>>      {
>>> -      if (uncompress_section_contents (& start,
>>> -                       uncompressed_size, & new_size))
>>> +      if (uncompress_section_contents (is_zstd, &start, uncompressed_size,
>>> +                       &new_size))
>>>          num_bytes = new_size;
>>>        else
>>>          {
>>> @@ -15470,6 +15488,7 @@ dump_section_as_bytes (Elf_Internal_Shdr *section,
>>>      {
>>>        uint64_t new_size = section_size;
>>>        uint64_t uncompressed_size = 0;
>>> +      bool is_zstd = false;
>>>
>>>        if ((section->sh_flags & SHF_COMPRESSED) != 0)
>>>      {
>>> @@ -15482,7 +15501,13 @@ dump_section_as_bytes (Elf_Internal_Shdr *section,
>>>             by get_compression_header.  */
>>>          goto error_out;
>>>
>>> -      if (chdr.ch_type != ELFCOMPRESS_ZLIB)
>>> +      if (chdr.ch_type == ELFCOMPRESS_ZLIB)
>>> +        ;
>>> +#ifdef HAVE_ZSTD
>>> +      else if (chdr.ch_type == ELFCOMPRESS_ZSTD)
>>> +        is_zstd = true;
>>> +#endif
>>> +      else
>>>          {
>>>            warn (_("section '%s' has unsupported compress type: %d\n"),
>>>              printable_section_name (filedata, section), chdr.ch_type);
>>> @@ -15511,8 +15536,8 @@ dump_section_as_bytes (Elf_Internal_Shdr *section,
>>>
>>>        if (uncompressed_size)
>>>      {
>>> -      if (uncompress_section_contents (& start, uncompressed_size,
>>> -                       & new_size))
>>> +      if (uncompress_section_contents (is_zstd, &start, uncompressed_size,
>>> +                       &new_size))
>>>          {
>>>            section_size = new_size;
>>>          }
>>> @@ -15848,6 +15873,7 @@ load_specific_debug_section (enum dwarf_section_display_enum  debug,
>>>        unsigned char *start = section->start;
>>>        uint64_t size = sec->sh_size;
>>>        uint64_t uncompressed_size = 0;
>>> +      bool is_zstd = false;
>>>
>>>        if ((sec->sh_flags & SHF_COMPRESSED) != 0)
>>>      {
>>> @@ -15869,7 +15895,13 @@ load_specific_debug_section (enum dwarf_section_display_enum  debug,
>>>             by get_compression_header.  */
>>>          return false;
>>>
>>> -      if (chdr.ch_type != ELFCOMPRESS_ZLIB)
>>> +      if (chdr.ch_type == ELFCOMPRESS_ZLIB)
>>> +        ;
>>> +#ifdef HAVE_ZSTD
>>> +      else if (chdr.ch_type == ELFCOMPRESS_ZSTD)
>>> +        is_zstd = true;
>>> +#endif
>>> +      else
>>>          {
>>>            warn (_("section '%s' has unsupported compress type: %d\n"),
>>>              section->name, chdr.ch_type);
>>> @@ -15898,7 +15930,7 @@ load_specific_debug_section (enum dwarf_section_display_enum  debug,
>>>
>>>        if (uncompressed_size)
>>>      {
>>> -      if (uncompress_section_contents (&start, uncompressed_size,
>>> +      if (uncompress_section_contents (is_zstd, &start, uncompressed_size,
>>>                         &size))
>>>          {
>>>            /* Free the compressed buffer, update the section buffer
>>
>> Hi.
>>
>> I noticed the following issue (dunno if caused by objcopy or the patches readelf) with this patch:
>>
>> $ echo 'int main() {return 0;}' | gcc -O2 -g -c -o a.o -x c -
>> $ /home/marxin/Programming/binutils/objdir/ld/../binutils/objcopy -O elf32-x86-64 a.o a-x32.o --compress-debug-sections=zstd
>> $ /home/marxin/Programming/binutils/objdir/binutils/readelf -wi a-x32.o >/dev/null
>> readelf: Error: Unable to decompress section .debug_info
>> readelf: Error: No comp units in .debug_info section ?
>>
>> while zlib is fine:
>>
>> $ /home/marxin/Programming/binutils/objdir/ld/../binutils/objcopy -O elf32-x86-64 a.o a-x32.o --compress-debug-sections=zlib
>> $ /home/marxin/Programming/binutils/objdir/binutils/readelf -wi a-x32.o >/dev/null
>>
>> Can you please take a look?
> 
> objcopy -O elf32-x86-64 a.o a-x32.o --compress-debug-sections=zstd uses
> ch_type=1 (ELFCOMPRESS_ZLIB), but I don't know where goes wrong.
> 
> Converting EI_CLASS looks very hacky to me.

Ok, I've got it. It's unrelated to '-O elf32-x86-64', but to the fact we move from zlib-gabi compression of a.o
to zstd for the objcopy output file.

Simpler reproducer:

$ echo 'int main() {return 0;}' | gcc -O2 -g -c -o a.o -x c - -Wa,--compress-debug-sections=zlib
$ /home/marxin/Programming/binutils/objdir//binutils/objcopy a.o a2.o --compress-debug-sections=zstd
$ /home/marxin/Programming/binutils/objdir/binutils/readelf -wi a2.o
readelf: Error: Unable to decompress section .debug_info
readelf: Error: No comp units in .debug_info section ?

As seen the .debug_info section in a2.o claims to be compressed:

$ readelf -SW a2.o | grep C
  [ 5] .debug_info       PROGBITS        0000000000000000 000043 000050 00   C  0   0  1

But it's not as it's not beneficial and the following code in in bfd/compress.c is taken:

  /* If compression didn't make the section smaller, keep it uncompressed.  */
  if (compressed_size >= uncompressed_size)
    {
      memcpy (buffer, input_buffer, uncompressed_size);
      sec->compress_status = COMPRESS_SECTION_NONE;
    }

Apparently, we miss removal of the compression header :/ Moreover, the following is fine:
$ /home/marxin/Programming/binutils/objdir//binutils/objcopy a.o a2.o --compress-debug-sections=none
$ /home/marxin/Programming/binutils/objdir/binutils/readelf -wi a2.o >/dev/null

Lemme try preparing a fix.

Martin
  
Martin Liška Oct. 14, 2022, 8:26 a.m. UTC | #5
On 10/14/22 09:47, Martin Liška wrote:
> On 10/14/22 05:35, Fangrui Song wrote:
>> On 2022-10-13, Martin Liška wrote:
>>> On 10/1/22 08:20, Fangrui Song wrote:
>>>> ---
>>>>  binutils/Makefile.am |   6 +--
>>>>  binutils/Makefile.in |   6 +--
>>>>  binutils/readelf.c   | 112 +++++++++++++++++++++++++++----------------
>>>>  3 files changed, 78 insertions(+), 46 deletions(-)
>>>>
>>>> diff --git a/binutils/Makefile.am b/binutils/Makefile.am
>>>> index 751fbacce12..b249af721ae 100644
>>>> --- a/binutils/Makefile.am
>>>> +++ b/binutils/Makefile.am
>>>> @@ -54,8 +54,8 @@ DEBUGINFOD_LIBS = @DEBUGINFOD_LIBS@
>>>>  WARN_CFLAGS = @WARN_CFLAGS@
>>>>  WARN_CFLAGS_FOR_BUILD = @WARN_CFLAGS_FOR_BUILD@
>>>>  NO_WERROR = @NO_WERROR@
>>>> -AM_CFLAGS = $(WARN_CFLAGS) $(ZLIBINC)
>>>> -AM_CFLAGS_FOR_BUILD = $(WARN_CFLAGS_FOR_BUILD) $(ZLIBINC)
>>>> +AM_CFLAGS = $(WARN_CFLAGS) $(ZLIBINC) $(ZSTD_CFLAGS)
>>>> +AM_CFLAGS_FOR_BUILD = $(WARN_CFLAGS_FOR_BUILD) $(ZLIBINC) $(ZSTD_CFLAGS)
>>>>  LIBICONV = @LIBICONV@
>>>>
>>>>  # these two are almost the same program
>>>> @@ -256,7 +256,7 @@ objcopy_SOURCES = objcopy.c not-strip.c rename.c $(WRITE_DEBUG_SRCS) $(BULIBS)
>>>>  strings_SOURCES = strings.c $(BULIBS)
>>>>
>>>>  readelf_SOURCES = readelf.c version.c unwind-ia64.c dwarf.c demanguse.c $(ELFLIBS)
>>>> -readelf_LDADD   = $(LIBCTF_NOBFD) $(LIBINTL) $(LIBIBERTY) $(ZLIB) $(DEBUGINFOD_LIBS) $(MSGPACK_LIBS)
>>>> +readelf_LDADD   = $(LIBCTF_NOBFD) $(LIBINTL) $(LIBIBERTY) $(ZLIB) $(ZSTD_LIBS) $(DEBUGINFOD_LIBS) $(MSGPACK_LIBS)
>>>>
>>>>  elfedit_SOURCES = elfedit.c version.c $(ELFLIBS)
>>>>  elfedit_LDADD = $(LIBINTL) $(LIBIBERTY)
>>>> diff --git a/binutils/Makefile.in b/binutils/Makefile.in
>>>> index 6de4e239408..7d9039e0f2f 100644
>>>> --- a/binutils/Makefile.in
>>>> +++ b/binutils/Makefile.in
>>>> @@ -651,8 +651,8 @@ am__skipyacc =
>>>>  # case both are empty.
>>>>  ZLIB = @zlibdir@ -lz
>>>>  ZLIBINC = @zlibinc@
>>>> -AM_CFLAGS = $(WARN_CFLAGS) $(ZLIBINC)
>>>> -AM_CFLAGS_FOR_BUILD = $(WARN_CFLAGS_FOR_BUILD) $(ZLIBINC)
>>>> +AM_CFLAGS = $(WARN_CFLAGS) $(ZLIBINC) $(ZSTD_CFLAGS)
>>>> +AM_CFLAGS_FOR_BUILD = $(WARN_CFLAGS_FOR_BUILD) $(ZLIBINC) $(ZSTD_CFLAGS)
>>>>
>>>>  # these two are almost the same program
>>>>  AR_PROG = ar
>>>> @@ -790,7 +790,7 @@ size_SOURCES = size.c $(BULIBS)
>>>>  objcopy_SOURCES = objcopy.c not-strip.c rename.c $(WRITE_DEBUG_SRCS) $(BULIBS)
>>>>  strings_SOURCES = strings.c $(BULIBS)
>>>>  readelf_SOURCES = readelf.c version.c unwind-ia64.c dwarf.c demanguse.c $(ELFLIBS)
>>>> -readelf_LDADD = $(LIBCTF_NOBFD) $(LIBINTL) $(LIBIBERTY) $(ZLIB) $(DEBUGINFOD_LIBS) $(MSGPACK_LIBS)
>>>> +readelf_LDADD = $(LIBCTF_NOBFD) $(LIBINTL) $(LIBIBERTY) $(ZLIB) $(ZSTD_LIBS) $(DEBUGINFOD_LIBS) $(MSGPACK_LIBS)
>>>>  elfedit_SOURCES = elfedit.c version.c $(ELFLIBS)
>>>>  elfedit_LDADD = $(LIBINTL) $(LIBIBERTY)
>>>>  strip_new_SOURCES = objcopy.c is-strip.c rename.c $(WRITE_DEBUG_SRCS) $(BULIBS)
>>>> diff --git a/binutils/readelf.c b/binutils/readelf.c
>>>> index 351571c8abb..04cda213f33 100644
>>>> --- a/binutils/readelf.c
>>>> +++ b/binutils/readelf.c
>>>> @@ -44,6 +44,9 @@
>>>>  #include <assert.h>
>>>>  #include <time.h>
>>>>  #include <zlib.h>
>>>> +#ifdef HAVE_ZSTD
>>>> +#include <zstd.h>
>>>> +#endif
>>>>  #include <wchar.h>
>>>>
>>>>  #if defined HAVE_MSGPACK
>>>> @@ -15170,48 +15173,56 @@ get_section_contents (Elf_Internal_Shdr * section, Filedata * filedata)
>>>>                               _("section contents"));
>>>>  }
>>>>
>>>> -/* Uncompresses a section that was compressed using zlib, in place.  */
>>>> +/* Uncompresses a section that was compressed using zlib/zstd, in place.  */
>>>>
>>>>  static bool
>>>> -uncompress_section_contents (unsigned char **buffer,
>>>> -                 uint64_t uncompressed_size,
>>>> -                 uint64_t *size)
>>>> +uncompress_section_contents (bool is_zstd, unsigned char **buffer,
>>>> +                 uint64_t uncompressed_size, uint64_t *size)
>>>>  {
>>>>    uint64_t compressed_size = *size;
>>>>    unsigned char *compressed_buffer = *buffer;
>>>> -  unsigned char *uncompressed_buffer;
>>>> +  unsigned char *uncompressed_buffer = xmalloc (uncompressed_size);
>>>>    z_stream strm;
>>>>    int rc;
>>>>
>>>> -  /* It is possible the section consists of several compressed
>>>> -     buffers concatenated together, so we uncompress in a loop.  */
>>>> -  /* PR 18313: The state field in the z_stream structure is supposed
>>>> -     to be invisible to the user (ie us), but some compilers will
>>>> -     still complain about it being used without initialisation.  So
>>>> -     we first zero the entire z_stream structure and then set the fields
>>>> -     that we need.  */
>>>> -  memset (& strm, 0, sizeof strm);
>>>> -  strm.avail_in = compressed_size;
>>>> -  strm.next_in = (Bytef *) compressed_buffer;
>>>> -  strm.avail_out = uncompressed_size;
>>>> -  uncompressed_buffer = (unsigned char *) xmalloc (uncompressed_size);
>>>> +  if (is_zstd)
>>>> +    {
>>>> +#ifdef HAVE_ZSTD
>>>> +      size_t ret = ZSTD_decompress (uncompressed_buffer, uncompressed_size,
>>>> +                    compressed_buffer, compressed_size);
>>>> +      if (ZSTD_isError (ret))
>>>> +    goto fail;
>>>> +#endif
>>>> +    }
>>>> +  else
>>>> +    {
>>>> +      /* It is possible the section consists of several compressed
>>>> +     buffers concatenated together, so we uncompress in a loop.  */
>>>> +      /* PR 18313: The state field in the z_stream structure is supposed
>>>> +     to be invisible to the user (ie us), but some compilers will
>>>> +     still complain about it being used without initialisation.  So
>>>> +     we first zero the entire z_stream structure and then set the fields
>>>> +     that we need.  */
>>>> +      memset (&strm, 0, sizeof strm);
>>>> +      strm.avail_in = compressed_size;
>>>> +      strm.next_in = (Bytef *)compressed_buffer;
>>>> +      strm.avail_out = uncompressed_size;
>>>>
>>>> -  rc = inflateInit (& strm);
>>>> -  while (strm.avail_in > 0)
>>>> -    {
>>>> -      if (rc != Z_OK)
>>>> -        break;
>>>> -      strm.next_out = ((Bytef *) uncompressed_buffer
>>>> -                       + (uncompressed_size - strm.avail_out));
>>>> -      rc = inflate (&strm, Z_FINISH);
>>>> -      if (rc != Z_STREAM_END)
>>>> -        break;
>>>> -      rc = inflateReset (& strm);
>>>> +      rc = inflateInit (&strm);
>>>> +      while (strm.avail_in > 0)
>>>> +    {
>>>> +      if (rc != Z_OK)
>>>> +        break;
>>>> +      strm.next_out = ((Bytef *)uncompressed_buffer
>>>> +               + (uncompressed_size - strm.avail_out));
>>>> +      rc = inflate (&strm, Z_FINISH);
>>>> +      if (rc != Z_STREAM_END)
>>>> +        break;
>>>> +      rc = inflateReset (&strm);
>>>> +    }
>>>> +      if (inflateEnd (&strm) != Z_OK || rc != Z_OK || strm.avail_out != 0)
>>>> +    goto fail;
>>>>      }
>>>> -  if (inflateEnd (& strm) != Z_OK
>>>> -      || rc != Z_OK
>>>> -      || strm.avail_out != 0)
>>>> -    goto fail;
>>>>
>>>>    *buffer = uncompressed_buffer;
>>>>    *size = uncompressed_size;
>>>> @@ -15254,6 +15265,7 @@ dump_section_as_strings (Elf_Internal_Shdr * section, Filedata * filedata)
>>>>      {
>>>>        uint64_t new_size = num_bytes;
>>>>        uint64_t uncompressed_size = 0;
>>>> +      bool is_zstd = false;
>>>>
>>>>        if ((section->sh_flags & SHF_COMPRESSED) != 0)
>>>>      {
>>>> @@ -15266,7 +15278,13 @@ dump_section_as_strings (Elf_Internal_Shdr * section, Filedata * filedata)
>>>>             by get_compression_header.  */
>>>>          goto error_out;
>>>>
>>>> -      if (chdr.ch_type != ELFCOMPRESS_ZLIB)
>>>> +      if (chdr.ch_type == ELFCOMPRESS_ZLIB)
>>>> +        ;
>>>> +#ifdef HAVE_ZSTD
>>>> +      else if (chdr.ch_type == ELFCOMPRESS_ZSTD)
>>>> +        is_zstd = true;
>>>> +#endif
>>>> +      else
>>>>          {
>>>>            warn (_("section '%s' has unsupported compress type: %d\n"),
>>>>              printable_section_name (filedata, section), chdr.ch_type);
>>>> @@ -15295,8 +15313,8 @@ dump_section_as_strings (Elf_Internal_Shdr * section, Filedata * filedata)
>>>>
>>>>        if (uncompressed_size)
>>>>      {
>>>> -      if (uncompress_section_contents (& start,
>>>> -                       uncompressed_size, & new_size))
>>>> +      if (uncompress_section_contents (is_zstd, &start, uncompressed_size,
>>>> +                       &new_size))
>>>>          num_bytes = new_size;
>>>>        else
>>>>          {
>>>> @@ -15470,6 +15488,7 @@ dump_section_as_bytes (Elf_Internal_Shdr *section,
>>>>      {
>>>>        uint64_t new_size = section_size;
>>>>        uint64_t uncompressed_size = 0;
>>>> +      bool is_zstd = false;
>>>>
>>>>        if ((section->sh_flags & SHF_COMPRESSED) != 0)
>>>>      {
>>>> @@ -15482,7 +15501,13 @@ dump_section_as_bytes (Elf_Internal_Shdr *section,
>>>>             by get_compression_header.  */
>>>>          goto error_out;
>>>>
>>>> -      if (chdr.ch_type != ELFCOMPRESS_ZLIB)
>>>> +      if (chdr.ch_type == ELFCOMPRESS_ZLIB)
>>>> +        ;
>>>> +#ifdef HAVE_ZSTD
>>>> +      else if (chdr.ch_type == ELFCOMPRESS_ZSTD)
>>>> +        is_zstd = true;
>>>> +#endif
>>>> +      else
>>>>          {
>>>>            warn (_("section '%s' has unsupported compress type: %d\n"),
>>>>              printable_section_name (filedata, section), chdr.ch_type);
>>>> @@ -15511,8 +15536,8 @@ dump_section_as_bytes (Elf_Internal_Shdr *section,
>>>>
>>>>        if (uncompressed_size)
>>>>      {
>>>> -      if (uncompress_section_contents (& start, uncompressed_size,
>>>> -                       & new_size))
>>>> +      if (uncompress_section_contents (is_zstd, &start, uncompressed_size,
>>>> +                       &new_size))
>>>>          {
>>>>            section_size = new_size;
>>>>          }
>>>> @@ -15848,6 +15873,7 @@ load_specific_debug_section (enum dwarf_section_display_enum  debug,
>>>>        unsigned char *start = section->start;
>>>>        uint64_t size = sec->sh_size;
>>>>        uint64_t uncompressed_size = 0;
>>>> +      bool is_zstd = false;
>>>>
>>>>        if ((sec->sh_flags & SHF_COMPRESSED) != 0)
>>>>      {
>>>> @@ -15869,7 +15895,13 @@ load_specific_debug_section (enum dwarf_section_display_enum  debug,
>>>>             by get_compression_header.  */
>>>>          return false;
>>>>
>>>> -      if (chdr.ch_type != ELFCOMPRESS_ZLIB)
>>>> +      if (chdr.ch_type == ELFCOMPRESS_ZLIB)
>>>> +        ;
>>>> +#ifdef HAVE_ZSTD
>>>> +      else if (chdr.ch_type == ELFCOMPRESS_ZSTD)
>>>> +        is_zstd = true;
>>>> +#endif
>>>> +      else
>>>>          {
>>>>            warn (_("section '%s' has unsupported compress type: %d\n"),
>>>>              section->name, chdr.ch_type);
>>>> @@ -15898,7 +15930,7 @@ load_specific_debug_section (enum dwarf_section_display_enum  debug,
>>>>
>>>>        if (uncompressed_size)
>>>>      {
>>>> -      if (uncompress_section_contents (&start, uncompressed_size,
>>>> +      if (uncompress_section_contents (is_zstd, &start, uncompressed_size,
>>>>                         &size))
>>>>          {
>>>>            /* Free the compressed buffer, update the section buffer
>>>
>>> Hi.
>>>
>>> I noticed the following issue (dunno if caused by objcopy or the patches readelf) with this patch:
>>>
>>> $ echo 'int main() {return 0;}' | gcc -O2 -g -c -o a.o -x c -
>>> $ /home/marxin/Programming/binutils/objdir/ld/../binutils/objcopy -O elf32-x86-64 a.o a-x32.o --compress-debug-sections=zstd
>>> $ /home/marxin/Programming/binutils/objdir/binutils/readelf -wi a-x32.o >/dev/null
>>> readelf: Error: Unable to decompress section .debug_info
>>> readelf: Error: No comp units in .debug_info section ?
>>>
>>> while zlib is fine:
>>>
>>> $ /home/marxin/Programming/binutils/objdir/ld/../binutils/objcopy -O elf32-x86-64 a.o a-x32.o --compress-debug-sections=zlib
>>> $ /home/marxin/Programming/binutils/objdir/binutils/readelf -wi a-x32.o >/dev/null
>>>
>>> Can you please take a look?
>>
>> objcopy -O elf32-x86-64 a.o a-x32.o --compress-debug-sections=zstd uses
>> ch_type=1 (ELFCOMPRESS_ZLIB), but I don't know where goes wrong.
>>
>> Converting EI_CLASS looks very hacky to me.
> 
> Ok, I've got it. It's unrelated to '-O elf32-x86-64', but to the fact we move from zlib-gabi compression of a.o
> to zstd for the objcopy output file.
> 
> Simpler reproducer:
> 
> $ echo 'int main() {return 0;}' | gcc -O2 -g -c -o a.o -x c - -Wa,--compress-debug-sections=zlib
> $ /home/marxin/Programming/binutils/objdir//binutils/objcopy a.o a2.o --compress-debug-sections=zstd
> $ /home/marxin/Programming/binutils/objdir/binutils/readelf -wi a2.o
> readelf: Error: Unable to decompress section .debug_info
> readelf: Error: No comp units in .debug_info section ?
> 
> As seen the .debug_info section in a2.o claims to be compressed:
> 
> $ readelf -SW a2.o | grep C
>   [ 5] .debug_info       PROGBITS        0000000000000000 000043 000050 00   C  0   0  1
> 
> But it's not as it's not beneficial and the following code in in bfd/compress.c is taken:
> 
>   /* If compression didn't make the section smaller, keep it uncompressed.  */
>   if (compressed_size >= uncompressed_size)
>     {
>       memcpy (buffer, input_buffer, uncompressed_size);
>       sec->compress_status = COMPRESS_SECTION_NONE;
>     }
> 
> Apparently, we miss removal of the compression header :/ Moreover, the following is fine:
> $ /home/marxin/Programming/binutils/objdir//binutils/objcopy a.o a2.o --compress-debug-sections=none
> $ /home/marxin/Programming/binutils/objdir/binutils/readelf -wi a2.o >/dev/null
> 
> Lemme try preparing a fix.
> 
> Martin

There's a patch candidate that works for the simple example (all x86_64 ELF), but fails for:
objcopy -O elf32-x86-64 a.o a-x32.o --compress-debug-sections=zstd

likely due to bfd_update_compression_header where compression header is updated.

diff --git a/bfd/compress.c b/bfd/compress.c
index 364df14142b..262e2f5bed2 100644
--- a/bfd/compress.c
+++ b/bfd/compress.c
@@ -202,8 +202,10 @@ bfd_compress_section_contents (bfd *abfd, sec_ptr sec)
       compressed_size += new_header_size;
     }
 
-  /* If compression didn't make the section smaller, keep it uncompressed.  */
-  if (compressed_size >= uncompressed_size)
+  /* If compression didn't make the section smaller, keep it uncompressed.
+     Do it only if the original section is not compressed, otherwise we would
+     need to drop the section header.  */
+  if (!compressed && compressed_size >= uncompressed_size)
     {
       memcpy (buffer, input_buffer, uncompressed_size);
       sec->compress_status = COMPRESS_SECTION_NONE;

@Alan: Can you please help us?

Thanks,
Martin
  
Alan Modra Oct. 14, 2022, 11:28 a.m. UTC | #6
So we have a zlib-gabi .debug_info section that increases in size with
zstd, so much so that it's better to leave the section uncompressed.
Things go horribly wrong due to leaving compress_status as
COMPRESS_SECTION_NONE.  The section is read again off disk using the
uncompressed size.  So you get the zlib section again with some
garbage at the end.

Also, if the section is to be left uncompressed, the input
SHF_COMPRESSED flag needs to be reset otherwise it is copied to
output.

I'm not ready to commit this, just thought I'd post the results of a
bit of debugging.

diff --git a/bfd/compress.c b/bfd/compress.c
index 364df14142b..5eef38bd50a 100644
--- a/bfd/compress.c
+++ b/bfd/compress.c
@@ -206,15 +206,15 @@ bfd_compress_section_contents (bfd *abfd, sec_ptr sec)
   if (compressed_size >= uncompressed_size)
     {
       memcpy (buffer, input_buffer, uncompressed_size);
-      sec->compress_status = COMPRESS_SECTION_NONE;
+      elf_section_flags (sec) &= ~SHF_COMPRESSED;
     }
   else
     {
       sec->size = uncompressed_size;
       bfd_update_compression_header (abfd, buffer, sec);
       sec->size = compressed_size;
-      sec->compress_status = COMPRESS_SECTION_DONE;
     }
+  sec->compress_status = COMPRESS_SECTION_DONE;
   sec->contents = buffer;
   free (input_buffer);
   return uncompressed_size;
  
Alan Modra Oct. 14, 2022, 12:09 p.m. UTC | #7
On Fri, Oct 14, 2022 at 09:58:41PM +1030, Alan Modra wrote:
> So we have a zlib-gabi .debug_info section that increases in size with
> zstd, so much so that it's better to leave the section uncompressed.
> Things go horribly wrong due to leaving compress_status as
> COMPRESS_SECTION_NONE.  The section is read again off disk using the
> uncompressed size.  So you get the zlib section again with some
> garbage at the end.
> 
> Also, if the section is to be left uncompressed, the input
> SHF_COMPRESSED flag needs to be reset otherwise it is copied to
> output.
> 
> I'm not ready to commit this, just thought I'd post the results of a
> bit of debugging.

And if I'd run the testsuite before posting, I may have posted a
better patch..  Using COMPRESS_SECTION_DONE led to .debug -> .zdebug
renaming of sections, so it appears we want another compress_status
that says the final section contents are in sec->contents.

diff --git a/bfd/bfd-in2.h b/bfd/bfd-in2.h
index 25e1806e689..4f4658021c6 100644
--- a/bfd/bfd-in2.h
+++ b/bfd/bfd-in2.h
@@ -974,11 +974,12 @@ typedef struct bfd_section
   unsigned int gc_mark : 1;
 
   /* Section compression status.  */
-  unsigned int compress_status : 2;
+  unsigned int compress_status : 3;
 #define COMPRESS_SECTION_NONE    0
 #define COMPRESS_SECTION_DONE    1
 #define DECOMPRESS_SECTION_ZLIB  2
 #define DECOMPRESS_SECTION_ZSTD  3
+#define DECOMPRESS_SECTION_DONE  4
 
   /* The following flags are used by the ELF linker. */
 
diff --git a/bfd/compress.c b/bfd/compress.c
index 364df14142b..94bc5412a89 100644
--- a/bfd/compress.c
+++ b/bfd/compress.c
@@ -151,7 +151,7 @@ bfd_compress_section_contents (bfd *abfd, sec_ptr sec)
 	  free (input_buffer);
 	  bfd_set_section_alignment (sec, uncompressed_alignment_pow);
 	  sec->contents = buffer;
-	  sec->compress_status = COMPRESS_SECTION_DONE;
+	  sec->compress_status = DECOMPRESS_SECTION_DONE;
 	  sec->size = uncompressed_size;
 	  input_buffer = buffer;
 	}
@@ -206,7 +206,8 @@ bfd_compress_section_contents (bfd *abfd, sec_ptr sec)
   if (compressed_size >= uncompressed_size)
     {
       memcpy (buffer, input_buffer, uncompressed_size);
-      sec->compress_status = COMPRESS_SECTION_NONE;
+      elf_section_flags (sec) &= ~SHF_COMPRESSED;
+      sec->compress_status = DECOMPRESS_SECTION_DONE;
     }
   else
     {
@@ -361,6 +362,7 @@ bfd_get_full_section_contents (bfd *abfd, sec_ptr sec, bfd_byte **ptr)
       return true;
 
     case COMPRESS_SECTION_DONE:
+    case DECOMPRESS_SECTION_DONE:
       if (sec->contents == NULL)
 	return false;
       if (p == NULL)
diff --git a/bfd/section.c b/bfd/section.c
index 614570e976e..52cf7e042cd 100644
--- a/bfd/section.c
+++ b/bfd/section.c
@@ -389,11 +389,12 @@ CODE_FRAGMENT
 .  unsigned int gc_mark : 1;
 .
 .  {* Section compression status.  *}
-.  unsigned int compress_status : 2;
+.  unsigned int compress_status : 3;
 .#define COMPRESS_SECTION_NONE    0
 .#define COMPRESS_SECTION_DONE    1
 .#define DECOMPRESS_SECTION_ZLIB  2
 .#define DECOMPRESS_SECTION_ZSTD  3
+.#define DECOMPRESS_SECTION_DONE  4
 .
 .  {* The following flags are used by the ELF linker. *}
 .
  
Alan Modra Oct. 16, 2022, 4:42 a.m. UTC | #8
On Fri, Oct 14, 2022 at 10:39:17PM +1030, Alan Modra wrote:
> On Fri, Oct 14, 2022 at 09:58:41PM +1030, Alan Modra wrote:
> > So we have a zlib-gabi .debug_info section that increases in size with
> > zstd, so much so that it's better to leave the section uncompressed.
> > Things go horribly wrong due to leaving compress_status as
> > COMPRESS_SECTION_NONE.  The section is read again off disk using the
> > uncompressed size.  So you get the zlib section again with some
> > garbage at the end.
> > 
> > Also, if the section is to be left uncompressed, the input
> > SHF_COMPRESSED flag needs to be reset otherwise it is copied to
> > output.
> > 
> > I'm not ready to commit this, just thought I'd post the results of a
> > bit of debugging.
> 
> And if I'd run the testsuite before posting, I may have posted a
> better patch..  Using COMPRESS_SECTION_DONE led to .debug -> .zdebug
> renaming of sections, so it appears we want another compress_status
> that says the final section contents are in sec->contents.

Another compress_status isn't elegant.  I'm about to commit this:

	* bfd.c (bfd_convert_section_contents): Handle zstd.
	* compress.c (bfd_compress_section_contents): When section
	contents are uncompressed set SEC_IN_MEMORY flag,
	compress_status to COMRESS_SECTION_NONE, and clear
	SHF_COMPRESSED.  Set SEC_IN_MEMORY for compressed contents.
	(bfd_get_full_section_contents): Don't check section size
	against file size when SEC_IN_MEMORY.
	(bfd_cache_section_contents): Delete function.
	* elf32-arm.c (elf32_arm_get_synthetic_symtab): Expand
	bfd_cache_section_contents here.
	* bfd-in2.h: Regenerate.

diff --git a/bfd/bfd-in2.h b/bfd/bfd-in2.h
index 25e1806e689..534a46439fc 100644
--- a/bfd/bfd-in2.h
+++ b/bfd/bfd-in2.h
@@ -7964,9 +7964,6 @@ bfd_byte *bfd_simple_get_relocated_section_contents
 bool bfd_get_full_section_contents
    (bfd *abfd, asection *section, bfd_byte **ptr);
 
-void bfd_cache_section_contents
-   (asection *sec, void *contents);
-
 bool bfd_is_section_compressed_info
    (bfd *abfd, asection *section,
     int *compression_header_size_p,
diff --git a/bfd/bfd.c b/bfd/bfd.c
index 5f2033bee7a..4fca8250082 100644
--- a/bfd/bfd.c
+++ b/bfd/bfd.c
@@ -2801,14 +2801,14 @@ bfd_convert_section_contents (bfd *ibfd, sec_ptr isec, bfd *obfd,
   if (ohdr_size == sizeof (Elf32_External_Chdr))
     {
       Elf32_External_Chdr *echdr = (Elf32_External_Chdr *) contents;
-      bfd_put_32 (obfd, ELFCOMPRESS_ZLIB, &echdr->ch_type);
+      bfd_put_32 (obfd, chdr.ch_type, &echdr->ch_type);
       bfd_put_32 (obfd, chdr.ch_size, &echdr->ch_size);
       bfd_put_32 (obfd, chdr.ch_addralign, &echdr->ch_addralign);
     }
   else
     {
       Elf64_External_Chdr *echdr = (Elf64_External_Chdr *) contents;
-      bfd_put_32 (obfd, ELFCOMPRESS_ZLIB, &echdr->ch_type);
+      bfd_put_32 (obfd, chdr.ch_type, &echdr->ch_type);
       bfd_put_32 (obfd, 0, &echdr->ch_reserved);
       bfd_put_64 (obfd, chdr.ch_size, &echdr->ch_size);
       bfd_put_64 (obfd, chdr.ch_addralign, &echdr->ch_addralign);
diff --git a/bfd/compress.c b/bfd/compress.c
index 364df14142b..9608a0a6341 100644
--- a/bfd/compress.c
+++ b/bfd/compress.c
@@ -151,7 +151,8 @@ bfd_compress_section_contents (bfd *abfd, sec_ptr sec)
 	  free (input_buffer);
 	  bfd_set_section_alignment (sec, uncompressed_alignment_pow);
 	  sec->contents = buffer;
-	  sec->compress_status = COMPRESS_SECTION_DONE;
+	  sec->flags |= SEC_IN_MEMORY;
+	  sec->compress_status = COMPRESS_SECTION_NONE;
 	  sec->size = uncompressed_size;
 	  input_buffer = buffer;
 	}
@@ -206,6 +207,7 @@ bfd_compress_section_contents (bfd *abfd, sec_ptr sec)
   if (compressed_size >= uncompressed_size)
     {
       memcpy (buffer, input_buffer, uncompressed_size);
+      elf_section_flags (sec) &= ~SHF_COMPRESSED;
       sec->compress_status = COMPRESS_SECTION_NONE;
     }
   else
@@ -216,6 +218,7 @@ bfd_compress_section_contents (bfd *abfd, sec_ptr sec)
       sec->compress_status = COMPRESS_SECTION_DONE;
     }
   sec->contents = buffer;
+  sec->flags |= SEC_IN_MEMORY;
   free (input_buffer);
   return uncompressed_size;
 }
@@ -268,6 +271,7 @@ bfd_get_full_section_contents (bfd *abfd, sec_ptr sec, bfd_byte **ptr)
 	  ufile_ptr filesize = bfd_get_file_size (abfd);
 	  if (filesize > 0
 	      && filesize < sz
+	      && (bfd_section_flags (sec) & SEC_IN_MEMORY) == 0
 	      /* PR 24753: Linker created sections can be larger than
 		 the file size, eg if they are being used to hold stubs.  */
 	      && (bfd_section_flags (sec) & SEC_LINKER_CREATED) == 0
@@ -380,29 +384,6 @@ bfd_get_full_section_contents (bfd *abfd, sec_ptr sec, bfd_byte **ptr)
     }
 }
 
-/*
-FUNCTION
-	bfd_cache_section_contents
-
-SYNOPSIS
-	void bfd_cache_section_contents
-	  (asection *sec, void *contents);
-
-DESCRIPTION
-	Stash @var(contents) so any following reads of @var(sec) do
-	not need to decompress again.
-*/
-
-void
-bfd_cache_section_contents (asection *sec, void *contents)
-{
-  if (sec->compress_status == DECOMPRESS_SECTION_ZLIB
-      || sec->compress_status == DECOMPRESS_SECTION_ZSTD)
-    sec->compress_status = COMPRESS_SECTION_DONE;
-  sec->contents = contents;
-  sec->flags |= SEC_IN_MEMORY;
-}
-
 /*
 FUNCTION
 	bfd_is_section_compressed_info
diff --git a/bfd/elf32-arm.c b/bfd/elf32-arm.c
index 0852b38ae4c..ec18a8ab362 100644
--- a/bfd/elf32-arm.c
+++ b/bfd/elf32-arm.c
@@ -20020,9 +20020,11 @@ elf32_arm_get_synthetic_symtab (bfd *abfd,
   data = plt->contents;
   if (data == NULL)
     {
-      if (!bfd_get_full_section_contents (abfd, (asection *) plt, &data) || data == NULL)
+      if (!bfd_get_full_section_contents (abfd, plt, &data)
+	  || data == NULL)
 	return -1;
-      bfd_cache_section_contents ((asection *) plt, data);
+      plt->contents = data;
+      plt->flags |= SEC_IN_MEMORY;
     }
 
   count = relplt->size / hdr->sh_entsize;
  
Fangrui Song Oct. 16, 2022, 8:46 p.m. UTC | #9
On 2022-10-16, Alan Modra wrote:
>On Fri, Oct 14, 2022 at 10:39:17PM +1030, Alan Modra wrote:
>> On Fri, Oct 14, 2022 at 09:58:41PM +1030, Alan Modra wrote:
>> > So we have a zlib-gabi .debug_info section that increases in size with
>> > zstd, so much so that it's better to leave the section uncompressed.
>> > Things go horribly wrong due to leaving compress_status as
>> > COMPRESS_SECTION_NONE.  The section is read again off disk using the
>> > uncompressed size.  So you get the zlib section again with some
>> > garbage at the end.
>> >
>> > Also, if the section is to be left uncompressed, the input
>> > SHF_COMPRESSED flag needs to be reset otherwise it is copied to
>> > output.
>> >
>> > I'm not ready to commit this, just thought I'd post the results of a
>> > bit of debugging.
>>
>> And if I'd run the testsuite before posting, I may have posted a
>> better patch..  Using COMPRESS_SECTION_DONE led to .debug -> .zdebug
>> renaming of sections, so it appears we want another compress_status
>> that says the final section contents are in sec->contents.
>
>Another compress_status isn't elegant.  I'm about to commit this:
>
>	* bfd.c (bfd_convert_section_contents): Handle zstd.
>	* compress.c (bfd_compress_section_contents): When section
>	contents are uncompressed set SEC_IN_MEMORY flag,
>	compress_status to COMRESS_SECTION_NONE, and clear
>	SHF_COMPRESSED.  Set SEC_IN_MEMORY for compressed contents.
>	(bfd_get_full_section_contents): Don't check section size
>	against file size when SEC_IN_MEMORY.
>	(bfd_cache_section_contents): Delete function.
>	* elf32-arm.c (elf32_arm_get_synthetic_symtab): Expand
>	bfd_cache_section_contents here.
>	* bfd-in2.h: Regenerate.

Thanks. Commit 206e9791cb09459bf92603428370c16bfde282ac
fixed the issue.

~/Dev/binutils-gdb/out/debug/binutils/objcopy -O elf32-x86-64 a.o a-x32.o --compress-debug-sections=zstd
~/Dev/binutils-gdb/out/debug/binutils/readelf -wi a-x32.o  # good


Is this readelf patch ok for installing?
  
Martin Liška Oct. 17, 2022, 7:52 a.m. UTC | #10
On 10/16/22 06:42, Alan Modra wrote:
> |Another compress_status isn't elegant. I'm about to commit this:|

Thanks for help Alan!

Cheers,
Martin
  
Martin Liška Oct. 20, 2022, 8:04 a.m. UTC | #11
On 10/6/22 04:20, Fangrui Song via Binutils wrote:
> On 2022-09-30, Fangrui Song wrote:
>> ---
>> binutils/Makefile.am |   6 +--
>> binutils/Makefile.in |   6 +--
>> binutils/readelf.c   | 112 +++++++++++++++++++++++++++----------------
>> 3 files changed, 78 insertions(+), 46 deletions(-)
> 
> ping:)
  
Martin Liška Oct. 20, 2022, 8:05 a.m. UTC | #12
Alan, may I please ping this?

On 10/6/22 04:20, Fangrui Song via Binutils wrote:
> On 2022-09-30, Fangrui Song wrote:
>> ---
>> binutils/Makefile.am |   6 +--
>> binutils/Makefile.in |   6 +--
>> binutils/readelf.c   | 112 +++++++++++++++++++++++++++----------------
>> 3 files changed, 78 insertions(+), 46 deletions(-)
> 
> ping:)
  
Alan Modra Oct. 21, 2022, 9:16 a.m. UTC | #13
On Sun, Oct 16, 2022 at 01:46:52PM -0700, Fangrui Song wrote:
> Is this readelf patch ok for installing?

Yes.  Sorry for the delay in reviewing.
  

Patch

diff --git a/binutils/Makefile.am b/binutils/Makefile.am
index 751fbacce12..b249af721ae 100644
--- a/binutils/Makefile.am
+++ b/binutils/Makefile.am
@@ -54,8 +54,8 @@  DEBUGINFOD_LIBS = @DEBUGINFOD_LIBS@
 WARN_CFLAGS = @WARN_CFLAGS@
 WARN_CFLAGS_FOR_BUILD = @WARN_CFLAGS_FOR_BUILD@
 NO_WERROR = @NO_WERROR@
-AM_CFLAGS = $(WARN_CFLAGS) $(ZLIBINC)
-AM_CFLAGS_FOR_BUILD = $(WARN_CFLAGS_FOR_BUILD) $(ZLIBINC)
+AM_CFLAGS = $(WARN_CFLAGS) $(ZLIBINC) $(ZSTD_CFLAGS)
+AM_CFLAGS_FOR_BUILD = $(WARN_CFLAGS_FOR_BUILD) $(ZLIBINC) $(ZSTD_CFLAGS)
 LIBICONV = @LIBICONV@
 
 # these two are almost the same program
@@ -256,7 +256,7 @@  objcopy_SOURCES = objcopy.c not-strip.c rename.c $(WRITE_DEBUG_SRCS) $(BULIBS)
 strings_SOURCES = strings.c $(BULIBS)
 
 readelf_SOURCES = readelf.c version.c unwind-ia64.c dwarf.c demanguse.c $(ELFLIBS)
-readelf_LDADD   = $(LIBCTF_NOBFD) $(LIBINTL) $(LIBIBERTY) $(ZLIB) $(DEBUGINFOD_LIBS) $(MSGPACK_LIBS)
+readelf_LDADD   = $(LIBCTF_NOBFD) $(LIBINTL) $(LIBIBERTY) $(ZLIB) $(ZSTD_LIBS) $(DEBUGINFOD_LIBS) $(MSGPACK_LIBS)
 
 elfedit_SOURCES = elfedit.c version.c $(ELFLIBS)
 elfedit_LDADD = $(LIBINTL) $(LIBIBERTY)
diff --git a/binutils/Makefile.in b/binutils/Makefile.in
index 6de4e239408..7d9039e0f2f 100644
--- a/binutils/Makefile.in
+++ b/binutils/Makefile.in
@@ -651,8 +651,8 @@  am__skipyacc =
 # case both are empty.
 ZLIB = @zlibdir@ -lz
 ZLIBINC = @zlibinc@
-AM_CFLAGS = $(WARN_CFLAGS) $(ZLIBINC)
-AM_CFLAGS_FOR_BUILD = $(WARN_CFLAGS_FOR_BUILD) $(ZLIBINC)
+AM_CFLAGS = $(WARN_CFLAGS) $(ZLIBINC) $(ZSTD_CFLAGS)
+AM_CFLAGS_FOR_BUILD = $(WARN_CFLAGS_FOR_BUILD) $(ZLIBINC) $(ZSTD_CFLAGS)
 
 # these two are almost the same program
 AR_PROG = ar
@@ -790,7 +790,7 @@  size_SOURCES = size.c $(BULIBS)
 objcopy_SOURCES = objcopy.c not-strip.c rename.c $(WRITE_DEBUG_SRCS) $(BULIBS)
 strings_SOURCES = strings.c $(BULIBS)
 readelf_SOURCES = readelf.c version.c unwind-ia64.c dwarf.c demanguse.c $(ELFLIBS)
-readelf_LDADD = $(LIBCTF_NOBFD) $(LIBINTL) $(LIBIBERTY) $(ZLIB) $(DEBUGINFOD_LIBS) $(MSGPACK_LIBS)
+readelf_LDADD = $(LIBCTF_NOBFD) $(LIBINTL) $(LIBIBERTY) $(ZLIB) $(ZSTD_LIBS) $(DEBUGINFOD_LIBS) $(MSGPACK_LIBS)
 elfedit_SOURCES = elfedit.c version.c $(ELFLIBS)
 elfedit_LDADD = $(LIBINTL) $(LIBIBERTY)
 strip_new_SOURCES = objcopy.c is-strip.c rename.c $(WRITE_DEBUG_SRCS) $(BULIBS)
diff --git a/binutils/readelf.c b/binutils/readelf.c
index 351571c8abb..04cda213f33 100644
--- a/binutils/readelf.c
+++ b/binutils/readelf.c
@@ -44,6 +44,9 @@ 
 #include <assert.h>
 #include <time.h>
 #include <zlib.h>
+#ifdef HAVE_ZSTD
+#include <zstd.h>
+#endif
 #include <wchar.h>
 
 #if defined HAVE_MSGPACK
@@ -15170,48 +15173,56 @@  get_section_contents (Elf_Internal_Shdr * section, Filedata * filedata)
                              _("section contents"));
 }
 
-/* Uncompresses a section that was compressed using zlib, in place.  */
+/* Uncompresses a section that was compressed using zlib/zstd, in place.  */
 
 static bool
-uncompress_section_contents (unsigned char **buffer,
-			     uint64_t uncompressed_size,
-			     uint64_t *size)
+uncompress_section_contents (bool is_zstd, unsigned char **buffer,
+			     uint64_t uncompressed_size, uint64_t *size)
 {
   uint64_t compressed_size = *size;
   unsigned char *compressed_buffer = *buffer;
-  unsigned char *uncompressed_buffer;
+  unsigned char *uncompressed_buffer = xmalloc (uncompressed_size);
   z_stream strm;
   int rc;
 
-  /* It is possible the section consists of several compressed
-     buffers concatenated together, so we uncompress in a loop.  */
-  /* PR 18313: The state field in the z_stream structure is supposed
-     to be invisible to the user (ie us), but some compilers will
-     still complain about it being used without initialisation.  So
-     we first zero the entire z_stream structure and then set the fields
-     that we need.  */
-  memset (& strm, 0, sizeof strm);
-  strm.avail_in = compressed_size;
-  strm.next_in = (Bytef *) compressed_buffer;
-  strm.avail_out = uncompressed_size;
-  uncompressed_buffer = (unsigned char *) xmalloc (uncompressed_size);
+  if (is_zstd)
+    {
+#ifdef HAVE_ZSTD
+      size_t ret = ZSTD_decompress (uncompressed_buffer, uncompressed_size,
+				    compressed_buffer, compressed_size);
+      if (ZSTD_isError (ret))
+	goto fail;
+#endif
+    }
+  else
+    {
+      /* It is possible the section consists of several compressed
+	 buffers concatenated together, so we uncompress in a loop.  */
+      /* PR 18313: The state field in the z_stream structure is supposed
+	 to be invisible to the user (ie us), but some compilers will
+	 still complain about it being used without initialisation.  So
+	 we first zero the entire z_stream structure and then set the fields
+	 that we need.  */
+      memset (&strm, 0, sizeof strm);
+      strm.avail_in = compressed_size;
+      strm.next_in = (Bytef *)compressed_buffer;
+      strm.avail_out = uncompressed_size;
 
-  rc = inflateInit (& strm);
-  while (strm.avail_in > 0)
-    {
-      if (rc != Z_OK)
-        break;
-      strm.next_out = ((Bytef *) uncompressed_buffer
-                       + (uncompressed_size - strm.avail_out));
-      rc = inflate (&strm, Z_FINISH);
-      if (rc != Z_STREAM_END)
-        break;
-      rc = inflateReset (& strm);
+      rc = inflateInit (&strm);
+      while (strm.avail_in > 0)
+	{
+	  if (rc != Z_OK)
+	    break;
+	  strm.next_out = ((Bytef *)uncompressed_buffer
+			   + (uncompressed_size - strm.avail_out));
+	  rc = inflate (&strm, Z_FINISH);
+	  if (rc != Z_STREAM_END)
+	    break;
+	  rc = inflateReset (&strm);
+	}
+      if (inflateEnd (&strm) != Z_OK || rc != Z_OK || strm.avail_out != 0)
+	goto fail;
     }
-  if (inflateEnd (& strm) != Z_OK
-      || rc != Z_OK
-      || strm.avail_out != 0)
-    goto fail;
 
   *buffer = uncompressed_buffer;
   *size = uncompressed_size;
@@ -15254,6 +15265,7 @@  dump_section_as_strings (Elf_Internal_Shdr * section, Filedata * filedata)
     {
       uint64_t new_size = num_bytes;
       uint64_t uncompressed_size = 0;
+      bool is_zstd = false;
 
       if ((section->sh_flags & SHF_COMPRESSED) != 0)
 	{
@@ -15266,7 +15278,13 @@  dump_section_as_strings (Elf_Internal_Shdr * section, Filedata * filedata)
 	       by get_compression_header.  */
 	    goto error_out;
 
-	  if (chdr.ch_type != ELFCOMPRESS_ZLIB)
+	  if (chdr.ch_type == ELFCOMPRESS_ZLIB)
+	    ;
+#ifdef HAVE_ZSTD
+	  else if (chdr.ch_type == ELFCOMPRESS_ZSTD)
+	    is_zstd = true;
+#endif
+	  else
 	    {
 	      warn (_("section '%s' has unsupported compress type: %d\n"),
 		    printable_section_name (filedata, section), chdr.ch_type);
@@ -15295,8 +15313,8 @@  dump_section_as_strings (Elf_Internal_Shdr * section, Filedata * filedata)
 
       if (uncompressed_size)
 	{
-	  if (uncompress_section_contents (& start,
-					   uncompressed_size, & new_size))
+	  if (uncompress_section_contents (is_zstd, &start, uncompressed_size,
+					   &new_size))
 	    num_bytes = new_size;
 	  else
 	    {
@@ -15470,6 +15488,7 @@  dump_section_as_bytes (Elf_Internal_Shdr *section,
     {
       uint64_t new_size = section_size;
       uint64_t uncompressed_size = 0;
+      bool is_zstd = false;
 
       if ((section->sh_flags & SHF_COMPRESSED) != 0)
 	{
@@ -15482,7 +15501,13 @@  dump_section_as_bytes (Elf_Internal_Shdr *section,
 	       by get_compression_header.  */
 	    goto error_out;
 
-	  if (chdr.ch_type != ELFCOMPRESS_ZLIB)
+	  if (chdr.ch_type == ELFCOMPRESS_ZLIB)
+	    ;
+#ifdef HAVE_ZSTD
+	  else if (chdr.ch_type == ELFCOMPRESS_ZSTD)
+	    is_zstd = true;
+#endif
+	  else
 	    {
 	      warn (_("section '%s' has unsupported compress type: %d\n"),
 		    printable_section_name (filedata, section), chdr.ch_type);
@@ -15511,8 +15536,8 @@  dump_section_as_bytes (Elf_Internal_Shdr *section,
 
       if (uncompressed_size)
 	{
-	  if (uncompress_section_contents (& start, uncompressed_size,
-					   & new_size))
+	  if (uncompress_section_contents (is_zstd, &start, uncompressed_size,
+					   &new_size))
 	    {
 	      section_size = new_size;
 	    }
@@ -15848,6 +15873,7 @@  load_specific_debug_section (enum dwarf_section_display_enum  debug,
       unsigned char *start = section->start;
       uint64_t size = sec->sh_size;
       uint64_t uncompressed_size = 0;
+      bool is_zstd = false;
 
       if ((sec->sh_flags & SHF_COMPRESSED) != 0)
 	{
@@ -15869,7 +15895,13 @@  load_specific_debug_section (enum dwarf_section_display_enum  debug,
 	       by get_compression_header.  */
 	    return false;
 
-	  if (chdr.ch_type != ELFCOMPRESS_ZLIB)
+	  if (chdr.ch_type == ELFCOMPRESS_ZLIB)
+	    ;
+#ifdef HAVE_ZSTD
+	  else if (chdr.ch_type == ELFCOMPRESS_ZSTD)
+	    is_zstd = true;
+#endif
+	  else
 	    {
 	      warn (_("section '%s' has unsupported compress type: %d\n"),
 		    section->name, chdr.ch_type);
@@ -15898,7 +15930,7 @@  load_specific_debug_section (enum dwarf_section_display_enum  debug,
 
       if (uncompressed_size)
 	{
-	  if (uncompress_section_contents (&start, uncompressed_size,
+	  if (uncompress_section_contents (is_zstd, &start, uncompressed_size,
 					   &size))
 	    {
 	      /* Free the compressed buffer, update the section buffer