readelf: use fseeko64 or fseeko if possible

Message ID 20221115145717.64948-1-bwerl.dev@gmail.com
State Accepted
Headers
Series readelf: use fseeko64 or fseeko if possible |

Checks

Context Check Description
snail/binutils-gdb-check success Github commit url

Commit Message

Brett Werling Nov. 15, 2022, 2:57 p.m. UTC
  Changes readelf to make use first of fseeko64 and then fseeko,
depending on which of those is available. If neither is available,
reverts to the previous behavior of using fseek. In all cases, the
offset argument given is cast accoridngly.

This is necessary when building readelf for LLP64 systems, where a
long will only be 32 bits wide. If the elf file in question is >= 2 GiB,
that is greater than the max long value and therefore fseek will fail
indicating that the offset is negative. On such systems, making use of
fseeko64 or fseeko will result in the ability so seek past the 2 GiB
max long boundary.
---
 binutils/config.in |  6 ++++++
 binutils/configure |  2 +-
 binutils/readelf.c | 47 ++++++++++++++++++++++++++++------------------
 3 files changed, 36 insertions(+), 19 deletions(-)
  

Comments

Alan Modra Nov. 17, 2022, 7:02 a.m. UTC | #1
On Tue, Nov 15, 2022 at 08:57:17AM -0600, Brett Werling via Binutils wrote:
> Changes readelf to make use first of fseeko64 and then fseeko,
> depending on which of those is available. If neither is available,
> reverts to the previous behavior of using fseek. In all cases, the
> offset argument given is cast accoridngly.
> 
> This is necessary when building readelf for LLP64 systems, where a
> long will only be 32 bits wide. If the elf file in question is >= 2 GiB,
> that is greater than the max long value and therefore fseek will fail
> indicating that the offset is negative. On such systems, making use of
> fseeko64 or fseeko will result in the ability so seek past the 2 GiB
> max long boundary.
> ---
>  binutils/config.in |  6 ++++++
>  binutils/configure |  2 +-
>  binutils/readelf.c | 47 ++++++++++++++++++++++++++++------------------
>  3 files changed, 36 insertions(+), 19 deletions(-)
> 
> diff --git a/binutils/config.in b/binutils/config.in
> index 4d22a80971b..91fe00af777 100644
> --- a/binutils/config.in
> +++ b/binutils/config.in
> @@ -67,6 +67,12 @@
>  /* Define to 1 if you have the <fcntl.h> header file. */
>  #undef HAVE_FCNTL_H
>  
> +/* Define to 1 if you have the `fseeko' function. */
> +#undef HAVE_FSEEKO
> +
> +/* Define to 1 if you have the `fseeko64' function. */
> +#undef HAVE_FSEEKO64
> +
>  /* Define to 1 if you have the `getc_unlocked' function. */
>  #undef HAVE_GETC_UNLOCKED
>  
> diff --git a/binutils/configure b/binutils/configure
> index 6176d699e57..46519a31701 100755
> --- a/binutils/configure
> +++ b/binutils/configure
> @@ -13155,7 +13155,7 @@ $as_echo "#define HAVE_MMAP 1" >>confdefs.h
>  fi
>  rm -f conftest.mmap conftest.txt
>  
> -for ac_func in getc_unlocked mkdtemp mkstemp utimensat utimes
> +for ac_func in getc_unlocked mkdtemp mkstemp utimensat utimes fseeko fseeko64
>  do :
>    as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
>  ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var"
> diff --git a/binutils/readelf.c b/binutils/readelf.c
> index c8323539a21..3e5e4f29ff2 100644
> --- a/binutils/readelf.c
> +++ b/binutils/readelf.c
> @@ -177,6 +177,17 @@
>  #define offsetof(TYPE, MEMBER) ((size_t) &(((TYPE *) 0)->MEMBER))
>  #endif
>  
> +#if defined (HAVE_FSEEKO64)
> +#define FSEEK_FUNC fseeko64
> +#define FSEEK_OFF_TYPE off64_t
> +#elif defined (HAVE_FSEEKO)
> +#define FSEEK_FUNC fseeko
> +#define FSEEK_OFF_TYPE off_t
> +#else
> +#define FSEEK_FUNC fseek
> +#define FSEEK_OFF_TYPE long
> +#endif
> +
>  typedef struct elf_section_list
>  {
>    Elf_Internal_Shdr *        hdr;
> @@ -479,7 +490,7 @@ get_data (void *var,
>        return NULL;
>      }
>  
> -  if (fseek (filedata->handle, filedata->archive_file_offset + offset,
> +  if (FSEEK_FUNC (filedata->handle, (FSEEK_OFF_TYPE)filedata->archive_file_offset + offset,
>  	     SEEK_SET))
>      {

There shouldn't be a need for the FSEEK_OFF_TYPE cast.  Casts always
make me question the code, and indeed in this case you have a whole
lot of unsigned long variables and struct fields that will need to be
wider in order to support large archives.  Also, functions like
offset_from_vma will need changing.

>        if (reason)
> @@ -6262,8 +6273,8 @@ the .dynamic section is not the same as the dynamic segment\n"));
>  	  if (segment->p_offset >= filedata->file_size
>  	      || segment->p_filesz > filedata->file_size - segment->p_offset
>  	      || segment->p_filesz - 1 >= (size_t) -2
> -	      || fseek (filedata->handle,
> -			filedata->archive_file_offset + (long) segment->p_offset,
> +	      || FSEEK_FUNC (filedata->handle,
> +			(FSEEK_OFF_TYPE)filedata->archive_file_offset + (long) segment->p_offset,
>  			SEEK_SET))

and the (long) also should go.  I'm happy enough with the patch as-is,
except please remove all those FSEEK_OFF_TYPE casts.  Further fixes
can be done in followup patches.
  
Brett Werling Nov. 17, 2022, 2:09 p.m. UTC | #2
> There shouldn't be a need for the FSEEK_OFF_TYPE cast.  Casts always
> make me question the code, and indeed in this case you have a whole
> lot of unsigned long variables and struct fields that will need to be
> wider in order to support large archives.  Also, functions like
> offset_from_vma will need changing.
>
> >        if (reason)
> > @@ -6262,8 +6273,8 @@ the .dynamic section is not the same as the dynamic segment\n"));
> >         if (segment->p_offset >= filedata->file_size
> >             || segment->p_filesz > filedata->file_size - segment->p_offset
> >             || segment->p_filesz - 1 >= (size_t) -2
> > -           || fseek (filedata->handle,
> > -                     filedata->archive_file_offset + (long) segment->p_offset,
> > +           || FSEEK_FUNC (filedata->handle,
> > +                     (FSEEK_OFF_TYPE)filedata->archive_file_offset + (long) segment->p_offset,
> >                       SEEK_SET))
>
> and the (long) also should go.  I'm happy enough with the patch as-is,
> except please remove all those FSEEK_OFF_TYPE casts.  Further fixes
> can be done in followup patches.
>
> --
> Alan Modra
> Australia Development Lab, IBM

I tend to agree in that I don't particularly like the casts myself. To be
honest, I only had them in there to try and ensure the math between the
"offset" operands was being done on an off64_t or off_t and try to avoid
the chance of a long. I have no problem removing them and I assume the
resulting code will be functionally equivalent. New patch incoming
shortly.

If others watching this change feel strongly enough that this needs to be
part of a wider effort, please indicate so and I will let this patch lie.
I mostly wanted to avoid having to carry it forward locally in my
particular use case.

Brett
  

Patch

diff --git a/binutils/config.in b/binutils/config.in
index 4d22a80971b..91fe00af777 100644
--- a/binutils/config.in
+++ b/binutils/config.in
@@ -67,6 +67,12 @@ 
 /* Define to 1 if you have the <fcntl.h> header file. */
 #undef HAVE_FCNTL_H
 
+/* Define to 1 if you have the `fseeko' function. */
+#undef HAVE_FSEEKO
+
+/* Define to 1 if you have the `fseeko64' function. */
+#undef HAVE_FSEEKO64
+
 /* Define to 1 if you have the `getc_unlocked' function. */
 #undef HAVE_GETC_UNLOCKED
 
diff --git a/binutils/configure b/binutils/configure
index 6176d699e57..46519a31701 100755
--- a/binutils/configure
+++ b/binutils/configure
@@ -13155,7 +13155,7 @@  $as_echo "#define HAVE_MMAP 1" >>confdefs.h
 fi
 rm -f conftest.mmap conftest.txt
 
-for ac_func in getc_unlocked mkdtemp mkstemp utimensat utimes
+for ac_func in getc_unlocked mkdtemp mkstemp utimensat utimes fseeko fseeko64
 do :
   as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
 ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var"
diff --git a/binutils/readelf.c b/binutils/readelf.c
index c8323539a21..3e5e4f29ff2 100644
--- a/binutils/readelf.c
+++ b/binutils/readelf.c
@@ -177,6 +177,17 @@ 
 #define offsetof(TYPE, MEMBER) ((size_t) &(((TYPE *) 0)->MEMBER))
 #endif
 
+#if defined (HAVE_FSEEKO64)
+#define FSEEK_FUNC fseeko64
+#define FSEEK_OFF_TYPE off64_t
+#elif defined (HAVE_FSEEKO)
+#define FSEEK_FUNC fseeko
+#define FSEEK_OFF_TYPE off_t
+#else
+#define FSEEK_FUNC fseek
+#define FSEEK_OFF_TYPE long
+#endif
+
 typedef struct elf_section_list
 {
   Elf_Internal_Shdr *        hdr;
@@ -479,7 +490,7 @@  get_data (void *var,
       return NULL;
     }
 
-  if (fseek (filedata->handle, filedata->archive_file_offset + offset,
+  if (FSEEK_FUNC (filedata->handle, (FSEEK_OFF_TYPE)filedata->archive_file_offset + offset,
 	     SEEK_SET))
     {
       if (reason)
@@ -6262,8 +6273,8 @@  the .dynamic section is not the same as the dynamic segment\n"));
 	  if (segment->p_offset >= filedata->file_size
 	      || segment->p_filesz > filedata->file_size - segment->p_offset
 	      || segment->p_filesz - 1 >= (size_t) -2
-	      || fseek (filedata->handle,
-			filedata->archive_file_offset + (long) segment->p_offset,
+	      || FSEEK_FUNC (filedata->handle,
+			(FSEEK_OFF_TYPE)filedata->archive_file_offset + (long) segment->p_offset,
 			SEEK_SET))
 	    error (_("Unable to find program interpreter name\n"));
 	  else
@@ -11035,8 +11046,8 @@  get_num_dynamic_syms (Filedata * filedata)
 	  && filedata->file_header.e_ident[EI_CLASS] == ELFCLASS64)
 	hash_ent_size = 8;
 
-      if (fseek (filedata->handle,
-		 (filedata->archive_file_offset
+      if (FSEEK_FUNC (filedata->handle,
+		 ((FSEEK_OFF_TYPE)filedata->archive_file_offset
 		  + offset_from_vma (filedata, filedata->dynamic_info[DT_HASH],
 				     sizeof nb + sizeof nc)),
 		 SEEK_SET))
@@ -11088,8 +11099,8 @@  get_num_dynamic_syms (Filedata * filedata)
       uint64_t buckets_vma;
       unsigned long hn;
 
-      if (fseek (filedata->handle,
-		 (filedata->archive_file_offset
+      if (FSEEK_FUNC (filedata->handle,
+		 ((FSEEK_OFF_TYPE)filedata->archive_file_offset
 		  + offset_from_vma (filedata,
 				     filedata->dynamic_info_DT_GNU_HASH,
 				     sizeof nb)),
@@ -11114,8 +11125,8 @@  get_num_dynamic_syms (Filedata * filedata)
       else
 	buckets_vma += bitmaskwords * 8;
 
-      if (fseek (filedata->handle,
-		 (filedata->archive_file_offset
+      if (FSEEK_FUNC (filedata->handle,
+		 ((FSEEK_OFF_TYPE)filedata->archive_file_offset
 		  + offset_from_vma (filedata, buckets_vma, 4)),
 		 SEEK_SET))
 	{
@@ -11144,8 +11155,8 @@  get_num_dynamic_syms (Filedata * filedata)
 
       maxchain -= filedata->gnusymidx;
 
-      if (fseek (filedata->handle,
-		 (filedata->archive_file_offset
+      if (FSEEK_FUNC (filedata->handle,
+		 ((FSEEK_OFF_TYPE)filedata->archive_file_offset
 		  + offset_from_vma (filedata,
 				     buckets_vma + 4 * (filedata->ngnubuckets
 							+ maxchain),
@@ -11171,8 +11182,8 @@  get_num_dynamic_syms (Filedata * filedata)
 	}
       while ((byte_get (nb, 4) & 1) == 0);
 
-      if (fseek (filedata->handle,
-		 (filedata->archive_file_offset
+      if (FSEEK_FUNC (filedata->handle,
+		 ((FSEEK_OFF_TYPE)filedata->archive_file_offset
 		  + offset_from_vma (filedata, (buckets_vma
 						+ 4 * filedata->ngnubuckets),
 				     4)),
@@ -11190,8 +11201,8 @@  get_num_dynamic_syms (Filedata * filedata)
 
       if (filedata->dynamic_info_DT_MIPS_XHASH)
 	{
-	  if (fseek (filedata->handle,
-		     (filedata->archive_file_offset
+	  if (FSEEK_FUNC (filedata->handle,
+		     ((FSEEK_OFF_TYPE)filedata->archive_file_offset
 		      + offset_from_vma (filedata, (buckets_vma
 						    + 4 * (filedata->ngnubuckets
 							   + maxchain)), 4)),
@@ -22546,7 +22557,7 @@  process_archive (Filedata * filedata, bool is_thin_archive)
 	      ret = false;
 	    }
 
-	  if (fseek (filedata->handle, current_pos, SEEK_SET) != 0)
+	  if (FSEEK_FUNC (filedata->handle, (FSEEK_OFF_TYPE)current_pos, SEEK_SET) != 0)
 	    {
 	      error (_("%s: failed to seek back to start of object files "
 		       "in the archive\n"),
@@ -22573,7 +22584,7 @@  process_archive (Filedata * filedata, bool is_thin_archive)
       char * qualified_name;
 
       /* Read the next archive header.  */
-      if (fseek (filedata->handle, arch.next_arhdr_offset, SEEK_SET) != 0)
+      if (FSEEK_FUNC (filedata->handle, (FSEEK_OFF_TYPE)arch.next_arhdr_offset, SEEK_SET) != 0)
 	{
 	  error (_("%s: failed to seek to next archive header\n"),
 		 arch.file_name);
@@ -22683,7 +22694,7 @@  process_archive (Filedata * filedata, bool is_thin_archive)
 
 	  /* The nested archive file will have been opened and setup by
 	     get_archive_member_name.  */
-	  if (fseek (nested_arch.file, filedata->archive_file_offset,
+	  if (FSEEK_FUNC (nested_arch.file, (FSEEK_OFF_TYPE)filedata->archive_file_offset,
 		     SEEK_SET) != 0)
 	    {
 	      error (_("%s: failed to seek to archive member.\n"),