RISC-V: Fix out of range memory access of machine mode table

Message ID 20230619090548.1574008-1-pan2.li@intel.com
State Accepted
Headers
Series RISC-V: Fix out of range memory access of machine mode table |

Checks

Context Check Description
snail/gcc-patch-check success Github commit url

Commit Message

Li, Pan2 via Gcc-patches June 19, 2023, 9:05 a.m. UTC
  From: Pan Li <pan2.li@intel.com>

We extend the machine mode from 8 to 16 bits already. But there still
one placing missing from the tree-streamer. It has one hard coded array
for the machine code like size 256.

In the lto pass, we memset the array by MAX_MACHINE_MODE count but the
value of the MAX_MACHINE_MODE will grow as more and more modes are added.
While the machine mode array in tree-streamer still leave 256 as is.

Then, when the MAX_MACHINE_MODE is greater than 256, the memset of
lto_output_init_mode_table will touch the memory out of range unexpected.

This patch would like to take the MAX_MACHINE_MODE as the size of the
array in tree-streamer, to make sure there is no potential unexpected
memory access in future.

Signed-off-by: Pan Li <pan2.li@intel.com>

gcc/ChangeLog:

	* lto-streamer-in.cc (lto_input_mode_table): Use
	MAX_MACHINE_MODE for memory allocation.
	* tree-streamer.cc: Use MAX_MACHINE_MODE for array size.
	* tree-streamer.h (streamer_mode_table): Ditto.
	(bp_pack_machine_mode): Ditto.
	(bp_unpack_machine_mode): Ditto.
---
 gcc/lto-streamer-in.cc | 3 ++-
 gcc/tree-streamer.cc   | 2 +-
 gcc/tree-streamer.h    | 7 ++++---
 3 files changed, 7 insertions(+), 5 deletions(-)
  

Comments

Richard Biener June 19, 2023, 9:15 a.m. UTC | #1
On Mon, 19 Jun 2023, pan2.li@intel.com wrote:

> From: Pan Li <pan2.li@intel.com>
> 
> We extend the machine mode from 8 to 16 bits already. But there still
> one placing missing from the tree-streamer. It has one hard coded array
> for the machine code like size 256.
> 
> In the lto pass, we memset the array by MAX_MACHINE_MODE count but the
> value of the MAX_MACHINE_MODE will grow as more and more modes are added.
> While the machine mode array in tree-streamer still leave 256 as is.
> 
> Then, when the MAX_MACHINE_MODE is greater than 256, the memset of
> lto_output_init_mode_table will touch the memory out of range unexpected.
> 
> This patch would like to take the MAX_MACHINE_MODE as the size of the
> array in tree-streamer, to make sure there is no potential unexpected
> memory access in future.

Please review more careful:

void
lto_input_mode_table (struct lto_file_decl_data *file_data)
{
...
  while ((m = bp_unpack_value (&bp, 8)) != VOIDmode)

reads 8 bits again.

          ibit = bp_unpack_value (&bp, 8);
          fbit = bp_unpack_value (&bp, 8);

likewise.

Also file_data->mode_table is indexed by the _host_ mode, so you
have to allocate enough space to fill in all streamed modes but
you are using the targets MAX_MACHINE_MODE here.  I think we
need to stream the hosts MAX_MACHINE_MODE.

Richard.


> Signed-off-by: Pan Li <pan2.li@intel.com>
> 
> gcc/ChangeLog:
> 
> 	* lto-streamer-in.cc (lto_input_mode_table): Use
> 	MAX_MACHINE_MODE for memory allocation.
> 	* tree-streamer.cc: Use MAX_MACHINE_MODE for array size.
> 	* tree-streamer.h (streamer_mode_table): Ditto.
> 	(bp_pack_machine_mode): Ditto.
> 	(bp_unpack_machine_mode): Ditto.
> ---
>  gcc/lto-streamer-in.cc | 3 ++-
>  gcc/tree-streamer.cc   | 2 +-
>  gcc/tree-streamer.h    | 7 ++++---
>  3 files changed, 7 insertions(+), 5 deletions(-)
> 
> diff --git a/gcc/lto-streamer-in.cc b/gcc/lto-streamer-in.cc
> index 2cb83406db5..102b7e18526 100644
> --- a/gcc/lto-streamer-in.cc
> +++ b/gcc/lto-streamer-in.cc
> @@ -1985,7 +1985,8 @@ lto_input_mode_table (struct lto_file_decl_data *file_data)
>      internal_error ("cannot read LTO mode table from %s",
>  		    file_data->file_name);
>  
> -  unsigned char *table = ggc_cleared_vec_alloc<unsigned char> (1 << 8);
> +  unsigned char *table = ggc_cleared_vec_alloc<unsigned char> (
> +    MAX_MACHINE_MODE);
>    file_data->mode_table = table;
>    const struct lto_simple_header_with_strings *header
>      = (const struct lto_simple_header_with_strings *) data;
> diff --git a/gcc/tree-streamer.cc b/gcc/tree-streamer.cc
> index ed65a7692e3..a28ef9c7920 100644
> --- a/gcc/tree-streamer.cc
> +++ b/gcc/tree-streamer.cc
> @@ -35,7 +35,7 @@ along with GCC; see the file COPYING3.  If not see
>     During streaming in, we translate the on the disk mode using this
>     table.  For normal LTO it is set to identity, for ACCEL_COMPILER
>     depending on the mode_table content.  */
> -unsigned char streamer_mode_table[1 << 8];
> +unsigned char streamer_mode_table[MAX_MACHINE_MODE];
>  
>  /* Check that all the TS_* structures handled by the streamer_write_* and
>     streamer_read_* routines are exactly ALL the structures defined in
> diff --git a/gcc/tree-streamer.h b/gcc/tree-streamer.h
> index 170d61cf20b..be3a1938e76 100644
> --- a/gcc/tree-streamer.h
> +++ b/gcc/tree-streamer.h
> @@ -75,7 +75,7 @@ void streamer_write_tree_body (struct output_block *, tree);
>  void streamer_write_integer_cst (struct output_block *, tree);
>  
>  /* In tree-streamer.cc.  */
> -extern unsigned char streamer_mode_table[1 << 8];
> +extern unsigned char streamer_mode_table[MAX_MACHINE_MODE];
>  void streamer_check_handled_ts_structures (void);
>  bool streamer_tree_cache_insert (struct streamer_tree_cache_d *, tree,
>  				 hashval_t, unsigned *);
> @@ -108,7 +108,7 @@ inline void
>  bp_pack_machine_mode (struct bitpack_d *bp, machine_mode mode)
>  {
>    streamer_mode_table[mode] = 1;
> -  bp_pack_enum (bp, machine_mode, 1 << 8, mode);
> +  bp_pack_enum (bp, machine_mode, MAX_MACHINE_MODE, mode);
>  }
>  
>  inline machine_mode
> @@ -116,7 +116,8 @@ bp_unpack_machine_mode (struct bitpack_d *bp)
>  {
>    return (machine_mode)
>  	   ((class lto_input_block *)
> -	    bp->stream)->mode_table[bp_unpack_enum (bp, machine_mode, 1 << 8)];
> +	    bp->stream)->mode_table[bp_unpack_enum (bp, machine_mode,
> +						    MAX_MACHINE_MODE)];
>  }
>  
>  #endif  /* GCC_TREE_STREAMER_H  */
>
  
Jakub Jelinek June 19, 2023, 9:16 a.m. UTC | #2
On Mon, Jun 19, 2023 at 05:05:48PM +0800, pan2.li@intel.com wrote:
> --- a/gcc/lto-streamer-in.cc
> +++ b/gcc/lto-streamer-in.cc
> @@ -1985,7 +1985,8 @@ lto_input_mode_table (struct lto_file_decl_data *file_data)
>      internal_error ("cannot read LTO mode table from %s",
>  		    file_data->file_name);
>  
> -  unsigned char *table = ggc_cleared_vec_alloc<unsigned char> (1 << 8);
> +  unsigned char *table = ggc_cleared_vec_alloc<unsigned char> (
> +    MAX_MACHINE_MODE);

Incorrect formatting.  And, see my other mail, this is wrong.

> @@ -108,7 +108,7 @@ inline void
>  bp_pack_machine_mode (struct bitpack_d *bp, machine_mode mode)
>  {
>    streamer_mode_table[mode] = 1;
> -  bp_pack_enum (bp, machine_mode, 1 << 8, mode);
> +  bp_pack_enum (bp, machine_mode, MAX_MACHINE_MODE, mode);
>  }
>  
>  inline machine_mode
> @@ -116,7 +116,8 @@ bp_unpack_machine_mode (struct bitpack_d *bp)
>  {
>    return (machine_mode)
>  	   ((class lto_input_block *)
> -	    bp->stream)->mode_table[bp_unpack_enum (bp, machine_mode, 1 << 8)];
> +	    bp->stream)->mode_table[bp_unpack_enum (bp, machine_mode,
> +						    MAX_MACHINE_MODE)];
>  }

And these two are wrong as well.  The value passed to bp_pack_enum
has to match the one used on bp_unpack_enum.  But that is not the case
after your changes.  You stream out with the host MAX_MACHINE_MODE, and
stream in for normal LTO with the same value (ok), but for offloading
targets (nvptx, amdgcn) with a different MAX_MACHINE_MODE.  That will
immediate result in LTO streaming being out of sync and ICEs all around.
The reason for using 1 << 8 there was exactly to make it interoperable for
offloading.  What could be perhaps done is that you stream out the
host MAX_MACHINE_MODE value somewhere and stream it in inside of
lto_input_mode_table before you allocate the table.  But, that streamed
in host max_machine_mdoe has to be remembered somewhere and used e.g. in
bp_unpack_machine_mode instead of MAX_MACHINE_MODE.

	Jakub
  
Li, Pan2 via Gcc-patches June 19, 2023, 1:35 p.m. UTC | #3
Thanks Jakub for reviewing, sorry for misleading and will have a try for PATCH v3.

Pan

-----Original Message-----
From: Jakub Jelinek <jakub@redhat.com> 
Sent: Monday, June 19, 2023 5:17 PM
To: Li, Pan2 <pan2.li@intel.com>
Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; rdapp.gcc@gmail.com; jeffreyalaw@gmail.com; Wang, Yanzhang <yanzhang.wang@intel.com>; kito.cheng@gmail.com; rguenther@suse.de
Subject: Re: [PATCH] RISC-V: Fix out of range memory access of machine mode table

On Mon, Jun 19, 2023 at 05:05:48PM +0800, pan2.li@intel.com wrote:
> --- a/gcc/lto-streamer-in.cc
> +++ b/gcc/lto-streamer-in.cc
> @@ -1985,7 +1985,8 @@ lto_input_mode_table (struct lto_file_decl_data *file_data)
>      internal_error ("cannot read LTO mode table from %s",
>  		    file_data->file_name);
>  
> -  unsigned char *table = ggc_cleared_vec_alloc<unsigned char> (1 << 8);
> +  unsigned char *table = ggc_cleared_vec_alloc<unsigned char> (
> +    MAX_MACHINE_MODE);

Incorrect formatting.  And, see my other mail, this is wrong.

> @@ -108,7 +108,7 @@ inline void
>  bp_pack_machine_mode (struct bitpack_d *bp, machine_mode mode)
>  {
>    streamer_mode_table[mode] = 1;
> -  bp_pack_enum (bp, machine_mode, 1 << 8, mode);
> +  bp_pack_enum (bp, machine_mode, MAX_MACHINE_MODE, mode);
>  }
>  
>  inline machine_mode
> @@ -116,7 +116,8 @@ bp_unpack_machine_mode (struct bitpack_d *bp)
>  {
>    return (machine_mode)
>  	   ((class lto_input_block *)
> -	    bp->stream)->mode_table[bp_unpack_enum (bp, machine_mode, 1 << 8)];
> +	    bp->stream)->mode_table[bp_unpack_enum (bp, machine_mode,
> +						    MAX_MACHINE_MODE)];
>  }

And these two are wrong as well.  The value passed to bp_pack_enum
has to match the one used on bp_unpack_enum.  But that is not the case
after your changes.  You stream out with the host MAX_MACHINE_MODE, and
stream in for normal LTO with the same value (ok), but for offloading
targets (nvptx, amdgcn) with a different MAX_MACHINE_MODE.  That will
immediate result in LTO streaming being out of sync and ICEs all around.
The reason for using 1 << 8 there was exactly to make it interoperable for
offloading.  What could be perhaps done is that you stream out the
host MAX_MACHINE_MODE value somewhere and stream it in inside of
lto_input_mode_table before you allocate the table.  But, that streamed
in host max_machine_mdoe has to be remembered somewhere and used e.g. in
bp_unpack_machine_mode instead of MAX_MACHINE_MODE.

	Jakub
  
Li, Pan2 via Gcc-patches June 20, 2023, 7:50 a.m. UTC | #4
Hi Jakub,

Thanks for reviewing but I am not quite sure if I fully understand how to fix this issue. Could you please help to enlighten me more about this ?

Currently for RISC-V, the memset has touched out of range memory already due to MAX_MACHINE_MODE > 256. And we may have below parts require adjusting.

1. streamer_mode_table.
2.  bp_unpack_machine_mode/bp_pack_machine_mode 
3.  bp_pack_value/bp_unpack_value in lto_write_mode_table.
4. unsigned char *table = ggc_cleared_vec_alloc<unsigned char> (1 << 8) in lto_input_mode_table.

For 1. is safe to extend the size to MAX_MACHINE_MODE as the array only used as Boolean, aka streamer_mode_table[XXXmode] = 1.
For 2 & 3. Keep 1 << 8 as is, or stream out the host MAX_MACHINE_MODE value somewhere for underlying consuming?
For 4, one possible approach is that extend unsigned char to unsigned short, as well as 256 to MAX_MACHINE_MODE. Because it stores the actually machine mode in array.

Pan

-----Original Message-----
From: Li, Pan2 
Sent: Monday, June 19, 2023 9:36 PM
To: Jakub Jelinek <jakub@redhat.com>
Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; rdapp.gcc@gmail.com; jeffreyalaw@gmail.com; Wang, Yanzhang <yanzhang.wang@intel.com>; kito.cheng@gmail.com; rguenther@suse.de
Subject: RE: [PATCH] RISC-V: Fix out of range memory access of machine mode table

Thanks Jakub for reviewing, sorry for misleading and will have a try for PATCH v3.

Pan

-----Original Message-----
From: Jakub Jelinek <jakub@redhat.com> 
Sent: Monday, June 19, 2023 5:17 PM
To: Li, Pan2 <pan2.li@intel.com>
Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; rdapp.gcc@gmail.com; jeffreyalaw@gmail.com; Wang, Yanzhang <yanzhang.wang@intel.com>; kito.cheng@gmail.com; rguenther@suse.de
Subject: Re: [PATCH] RISC-V: Fix out of range memory access of machine mode table

On Mon, Jun 19, 2023 at 05:05:48PM +0800, pan2.li@intel.com wrote:
> --- a/gcc/lto-streamer-in.cc
> +++ b/gcc/lto-streamer-in.cc
> @@ -1985,7 +1985,8 @@ lto_input_mode_table (struct lto_file_decl_data *file_data)
>      internal_error ("cannot read LTO mode table from %s",
>  		    file_data->file_name);
>  
> -  unsigned char *table = ggc_cleared_vec_alloc<unsigned char> (1 << 8);
> +  unsigned char *table = ggc_cleared_vec_alloc<unsigned char> (
> +    MAX_MACHINE_MODE);

Incorrect formatting.  And, see my other mail, this is wrong.

> @@ -108,7 +108,7 @@ inline void
>  bp_pack_machine_mode (struct bitpack_d *bp, machine_mode mode)
>  {
>    streamer_mode_table[mode] = 1;
> -  bp_pack_enum (bp, machine_mode, 1 << 8, mode);
> +  bp_pack_enum (bp, machine_mode, MAX_MACHINE_MODE, mode);
>  }
>  
>  inline machine_mode
> @@ -116,7 +116,8 @@ bp_unpack_machine_mode (struct bitpack_d *bp)
>  {
>    return (machine_mode)
>  	   ((class lto_input_block *)
> -	    bp->stream)->mode_table[bp_unpack_enum (bp, machine_mode, 1 << 8)];
> +	    bp->stream)->mode_table[bp_unpack_enum (bp, machine_mode,
> +						    MAX_MACHINE_MODE)];
>  }

And these two are wrong as well.  The value passed to bp_pack_enum
has to match the one used on bp_unpack_enum.  But that is not the case
after your changes.  You stream out with the host MAX_MACHINE_MODE, and
stream in for normal LTO with the same value (ok), but for offloading
targets (nvptx, amdgcn) with a different MAX_MACHINE_MODE.  That will
immediate result in LTO streaming being out of sync and ICEs all around.
The reason for using 1 << 8 there was exactly to make it interoperable for
offloading.  What could be perhaps done is that you stream out the
host MAX_MACHINE_MODE value somewhere and stream it in inside of
lto_input_mode_table before you allocate the table.  But, that streamed
in host max_machine_mdoe has to be remembered somewhere and used e.g. in
bp_unpack_machine_mode instead of MAX_MACHINE_MODE.

	Jakub
  
Jakub Jelinek June 20, 2023, 8:03 a.m. UTC | #5
On Tue, Jun 20, 2023 at 07:50:00AM +0000, Li, Pan2 wrote:
> Hi Jakub,
> 
> Thanks for reviewing but I am not quite sure if I fully understand how to fix this issue. Could you please help to enlighten me more about this ?
> 
> Currently for RISC-V, the memset has touched out of range memory already due to MAX_MACHINE_MODE > 256. And we may have below parts require adjusting.
> 
> 1. streamer_mode_table.
> 2.  bp_unpack_machine_mode/bp_pack_machine_mode 
> 3.  bp_pack_value/bp_unpack_value in lto_write_mode_table.
> 4. unsigned char *table = ggc_cleared_vec_alloc<unsigned char> (1 << 8) in lto_input_mode_table.
> 
> For 1. is safe to extend the size to MAX_MACHINE_MODE as the array only used as Boolean, aka streamer_mode_table[XXXmode] = 1.

Because the array is used only during stream out, that is safe.

> For 2 & 3. Keep 1 << 8 as is, or stream out the host MAX_MACHINE_MODE value somewhere for underlying consuming?

You can't keep 1 << 8, otherwise you won't stream all the bits.
I think you want to use 1 << ceil_log2 (MAX_MACHINE_MODE) on the stream out
side, stream that ceil_log2 (MAX_MACHINE_MODE) value somewhere at the start
of the mode table, add some field next to mode_table in lto_input_block
which will contain that value (and make sure to initialize it to
ceil_log2 (MAX_MACHINE_MODE) in case mode table isn't streamed in and use
1 << ...->mode_bits in e.g. bp_unpack_machine_mode
Or for cases where 8 was used before use ceil_log2 (MAX_MACHINE_MODE)
or mode_bits.

> For 4, one possible approach is that extend unsigned char to unsigned short, as well as 256 to MAX_MACHINE_MODE. Because it stores the actually machine mode in array.

The 1 << 8 needs to be similarly 1 << ...->mode_bits or ...->num_modes (that
is also streamed out and in), it is sized by the host number of modes.
Whether it is unsigned char or unsigned short array depends on if we
want to support offloading targets with > 256 modes.  If yes, it needs to
be unsigned short, if not, we should add an assertion (e.g. on streaming
in the LTO table) that MAX_MACHINE_MODE <= 256.

	Jakub
  
Li, Pan2 via Gcc-patches June 20, 2023, 2:08 p.m. UTC | #6
Thanks Jakub for the explanation, I have a try like below patch but I am not quite sure it is expected, and where should I put the assertion.

> If yes, it needs to
> be unsigned short, if not, we should add an assertion (e.g. on streaming
> in the LTO table) that MAX_MACHINE_MODE <= 256.

diff --git a/gcc/lto-streamer-in.cc b/gcc/lto-streamer-in.cc
index 2cb83406db5..93ef97ec5d3 100644
--- a/gcc/lto-streamer-in.cc
+++ b/gcc/lto-streamer-in.cc
@@ -1985,8 +1985,6 @@ lto_input_mode_table (struct lto_file_decl_data *file_data)
     internal_error ("cannot read LTO mode table from %s",
 		    file_data->file_name);
 
-  unsigned char *table = ggc_cleared_vec_alloc<unsigned char> (1 << 8);
-  file_data->mode_table = table;
   const struct lto_simple_header_with_strings *header
     = (const struct lto_simple_header_with_strings *) data;
   int string_offset;
@@ -1994,6 +1992,9 @@ lto_input_mode_table (struct lto_file_decl_data *file_data)
   string_offset = sizeof (*header) + header->main_size;
 
   lto_input_block ib (data + sizeof (*header), header->main_size, NULL);
+  unsigned char *table = ggc_cleared_vec_alloc<unsigned char> (
+    1 << ib.mode_bits);
+  file_data->mode_table = table;
   data_in = lto_data_in_create (file_data, data + string_offset,
 				header->string_size, vNULL);
   bitpack_d bp = streamer_read_bitpack (&ib);
@@ -2001,13 +2002,13 @@ lto_input_mode_table (struct lto_file_decl_data *file_data)
   table[VOIDmode] = VOIDmode;
   table[BLKmode] = BLKmode;
   unsigned int m;
-  while ((m = bp_unpack_value (&bp, 8)) != VOIDmode)
+  while ((m = bp_unpack_value (&bp, ib.mode_bits)) != VOIDmode)
     {
       enum mode_class mclass
 	= bp_unpack_enum (&bp, mode_class, MAX_MODE_CLASS);
       poly_uint16 size = bp_unpack_poly_value (&bp, 16);
       poly_uint16 prec = bp_unpack_poly_value (&bp, 16);
-      machine_mode inner = (machine_mode) bp_unpack_value (&bp, 8);
+      machine_mode inner = (machine_mode) bp_unpack_value (&bp, ib.mode_bits);
       poly_uint16 nunits = bp_unpack_poly_value (&bp, 16);
       unsigned int ibit = 0, fbit = 0;
       unsigned int real_fmt_len = 0;
@@ -2018,8 +2019,8 @@ lto_input_mode_table (struct lto_file_decl_data *file_data)
 	case MODE_UFRACT:
 	case MODE_ACCUM:
 	case MODE_UACCUM:
-	  ibit = bp_unpack_value (&bp, 8);
-	  fbit = bp_unpack_value (&bp, 8);
+	  ibit = bp_unpack_value (&bp, ib.mode_bits);
+	  fbit = bp_unpack_value (&bp, ib.mode_bits);
 	  break;
 	case MODE_FLOAT:
 	case MODE_DECIMAL_FLOAT:
diff --git a/gcc/lto-streamer.h b/gcc/lto-streamer.h
index fc7133d07ba..f1d826d59e4 100644
--- a/gcc/lto-streamer.h
+++ b/gcc/lto-streamer.h
@@ -352,6 +352,8 @@ public:
 
   const char *data;
   const unsigned char *mode_table;
+  /* Indicates how many bits of one machine mode will have.  */
+  const unsigned int mode_bits = ceil_log2 (MAX_MACHINE_MODE) ;
   unsigned int p;
   unsigned int len;
 };
diff --git a/gcc/tree-streamer.cc b/gcc/tree-streamer.cc
index ed65a7692e3..a28ef9c7920 100644
--- a/gcc/tree-streamer.cc
+++ b/gcc/tree-streamer.cc
@@ -35,7 +35,7 @@ along with GCC; see the file COPYING3.  If not see
    During streaming in, we translate the on the disk mode using this
    table.  For normal LTO it is set to identity, for ACCEL_COMPILER
    depending on the mode_table content.  */
-unsigned char streamer_mode_table[1 << 8];
+unsigned char streamer_mode_table[MAX_MACHINE_MODE];
 
 /* Check that all the TS_* structures handled by the streamer_write_* and
    streamer_read_* routines are exactly ALL the structures defined in
diff --git a/gcc/tree-streamer.h b/gcc/tree-streamer.h
index 170d61cf20b..9aa248cd2f5 100644
--- a/gcc/tree-streamer.h
+++ b/gcc/tree-streamer.h
@@ -75,7 +75,7 @@ void streamer_write_tree_body (struct output_block *, tree);
 void streamer_write_integer_cst (struct output_block *, tree);
 
 /* In tree-streamer.cc.  */
-extern unsigned char streamer_mode_table[1 << 8];
+extern unsigned char streamer_mode_table[MAX_MACHINE_MODE];
 void streamer_check_handled_ts_structures (void);
 bool streamer_tree_cache_insert (struct streamer_tree_cache_d *, tree,
 				 hashval_t, unsigned *);
@@ -108,15 +108,18 @@ inline void
 bp_pack_machine_mode (struct bitpack_d *bp, machine_mode mode)
 {
   streamer_mode_table[mode] = 1;
-  bp_pack_enum (bp, machine_mode, 1 << 8, mode);
+  int last = 1 << ceil_log2 (MAX_MACHINE_MODE);
+
+  bp_pack_enum (bp, machine_mode, last, mode);
 }
 
 inline machine_mode
 bp_unpack_machine_mode (struct bitpack_d *bp)
 {
-  return (machine_mode)
-	   ((class lto_input_block *)
-	    bp->stream)->mode_table[bp_unpack_enum (bp, machine_mode, 1 << 8)];
+  lto_input_block *input_block =  (class lto_input_block *)bp->stream;
+  int index = bp_unpack_enum (bp, machine_mode, input_block->mode_bits);
+
+  return (machine_mode)input_block->mode_table[index];
 }
 
 #endif  /* GCC_TREE_STREAMER_H  */

Pan

-----Original Message-----
From: Jakub Jelinek <jakub@redhat.com> 
Sent: Tuesday, June 20, 2023 4:04 PM
To: Li, Pan2 <pan2.li@intel.com>
Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; rdapp.gcc@gmail.com; jeffreyalaw@gmail.com; Wang, Yanzhang <yanzhang.wang@intel.com>; kito.cheng@gmail.com; rguenther@suse.de
Subject: Re: [PATCH] RISC-V: Fix out of range memory access of machine mode table

On Tue, Jun 20, 2023 at 07:50:00AM +0000, Li, Pan2 wrote:
> Hi Jakub,
> 
> Thanks for reviewing but I am not quite sure if I fully understand how to fix this issue. Could you please help to enlighten me more about this ?
> 
> Currently for RISC-V, the memset has touched out of range memory already due to MAX_MACHINE_MODE > 256. And we may have below parts require adjusting.
> 
> 1. streamer_mode_table.
> 2.  bp_unpack_machine_mode/bp_pack_machine_mode 
> 3.  bp_pack_value/bp_unpack_value in lto_write_mode_table.
> 4. unsigned char *table = ggc_cleared_vec_alloc<unsigned char> (1 << 8) in lto_input_mode_table.
> 
> For 1. is safe to extend the size to MAX_MACHINE_MODE as the array only used as Boolean, aka streamer_mode_table[XXXmode] = 1.

Because the array is used only during stream out, that is safe.

> For 2 & 3. Keep 1 << 8 as is, or stream out the host MAX_MACHINE_MODE value somewhere for underlying consuming?

You can't keep 1 << 8, otherwise you won't stream all the bits.
I think you want to use 1 << ceil_log2 (MAX_MACHINE_MODE) on the stream out
side, stream that ceil_log2 (MAX_MACHINE_MODE) value somewhere at the start
of the mode table, add some field next to mode_table in lto_input_block
which will contain that value (and make sure to initialize it to
ceil_log2 (MAX_MACHINE_MODE) in case mode table isn't streamed in and use
1 << ...->mode_bits in e.g. bp_unpack_machine_mode
Or for cases where 8 was used before use ceil_log2 (MAX_MACHINE_MODE)
or mode_bits.

> For 4, one possible approach is that extend unsigned char to unsigned short, as well as 256 to MAX_MACHINE_MODE. Because it stores the actually machine mode in array.

The 1 << 8 needs to be similarly 1 << ...->mode_bits or ...->num_modes (that
is also streamed out and in), it is sized by the host number of modes.
Whether it is unsigned char or unsigned short array depends on if we
want to support offloading targets with > 256 modes.  If yes, it needs to
be unsigned short, if not, we should add an assertion (e.g. on streaming
in the LTO table) that MAX_MACHINE_MODE <= 256.

	Jakub
  
Jakub Jelinek June 20, 2023, 3:25 p.m. UTC | #7
On Tue, Jun 20, 2023 at 02:08:07PM +0000, Li, Pan2 via Gcc-patches wrote:
> Thanks Jakub for the explanation, I have a try like below patch but I am not quite sure it is expected, and where should I put the assertion.
> 
> > If yes, it needs to
> > be unsigned short, if not, we should add an assertion (e.g. on streaming
> > in the LTO table) that MAX_MACHINE_MODE <= 256.
> 
> diff --git a/gcc/lto-streamer-in.cc b/gcc/lto-streamer-in.cc
> index 2cb83406db5..93ef97ec5d3 100644
> --- a/gcc/lto-streamer-in.cc
> +++ b/gcc/lto-streamer-in.cc
> @@ -1985,8 +1985,6 @@ lto_input_mode_table (struct lto_file_decl_data *file_data)
>      internal_error ("cannot read LTO mode table from %s",
>  		    file_data->file_name);
>  
> -  unsigned char *table = ggc_cleared_vec_alloc<unsigned char> (1 << 8);
> -  file_data->mode_table = table;
>    const struct lto_simple_header_with_strings *header
>      = (const struct lto_simple_header_with_strings *) data;
>    int string_offset;
> @@ -1994,6 +1992,9 @@ lto_input_mode_table (struct lto_file_decl_data *file_data)
>    string_offset = sizeof (*header) + header->main_size;
>  
>    lto_input_block ib (data + sizeof (*header), header->main_size, NULL);
> +  unsigned char *table = ggc_cleared_vec_alloc<unsigned char> (
> +    1 << ib.mode_bits);
> +  file_data->mode_table = table;
>    data_in = lto_data_in_create (file_data, data + string_offset,
>  				header->string_size, vNULL);
>    bitpack_d bp = streamer_read_bitpack (&ib);

Your ib.mode_bits is again the same ceil_log2 (MAX_MACHINE_MODE) value.
You need to stream that value out in lto-streamer-out.cc as perhaps the
first thing in the bitpack and stream it back here, so some
   mode_bits = bp_unpack_value (&bp, 5);
or so (perhaps 4 would be enough if we only support up to 15 bits for mode).
I.e. tell the offloading compiler what value had the host compiler when
streaming LTO out.

Then move those 3 lines from the above after that.  I'd put it next to
mode_table, so file_data->mode_bits.

The 
  unsigned char *table = ggc_cleared_vec_alloc<unsigned char> (
    1 << ib.mode_bits);
formatting is wrong, ( shouldn't if at all possible be the last character
on a line.
In this case,
> --- a/gcc/lto-streamer.h
> +++ b/gcc/lto-streamer.h
> @@ -352,6 +352,8 @@ public:
>  
>    const char *data;
>    const unsigned char *mode_table;
> +  /* Indicates how many bits of one machine mode will have.  */
> +  const unsigned int mode_bits = ceil_log2 (MAX_MACHINE_MODE) ;

As I said earlier, I'd put it elsewhere.  The formatting is wrong
(no space before semicolon) and please don't add NSDMIs in structures
which don't have them already.

>  inline machine_mode
>  bp_unpack_machine_mode (struct bitpack_d *bp)
>  {
> -  return (machine_mode)
> -	   ((class lto_input_block *)
> -	    bp->stream)->mode_table[bp_unpack_enum (bp, machine_mode, 1 << 8)];
> +  lto_input_block *input_block =  (class lto_input_block *)bp->stream;

Wrong formatting again, there shouldn't be two consecutive spaces in there.  On the
other hand, there should be a space between *) and bp.

> +  int index = bp_unpack_enum (bp, machine_mode, input_block->mode_bits);
> +
> +  return (machine_mode)input_block->mode_table[index];

And here similarly.

	Jakub
  
Li, Pan2 via Gcc-patches June 21, 2023, 6:59 a.m. UTC | #8
Thanks Jakub for the useful comments, go thru the mail list and have a refinement version as below. But I not sure if I understand correct about adding new field named mode_bits in struct lto_file_decl_data, looks unnecessary up to a point.

Thanks again for your coaching with patient.

diff --git a/gcc/lto-streamer-in.cc b/gcc/lto-streamer-in.cc
index 2cb83406db5..2a0720b4e6f 100644
--- a/gcc/lto-streamer-in.cc
+++ b/gcc/lto-streamer-in.cc
@@ -1985,8 +1985,6 @@ lto_input_mode_table (struct lto_file_decl_data *file_data)
     internal_error ("cannot read LTO mode table from %s",
 		    file_data->file_name);
 
-  unsigned char *table = ggc_cleared_vec_alloc<unsigned char> (1 << 8);
-  file_data->mode_table = table;
   const struct lto_simple_header_with_strings *header
     = (const struct lto_simple_header_with_strings *) data;
   int string_offset;
@@ -1998,16 +1996,22 @@ lto_input_mode_table (struct lto_file_decl_data *file_data)
 				header->string_size, vNULL);
   bitpack_d bp = streamer_read_bitpack (&ib);
 
+  unsigned mode_bits = bp_unpack_value (&bp, 5);
+  unsigned char *table = ggc_cleared_vec_alloc<unsigned char> (1 << mode_bits);
+
+  file_data->mode_table = table;
+  file_data->mode_bits = mode_bits;
+
   table[VOIDmode] = VOIDmode;
   table[BLKmode] = BLKmode;
   unsigned int m;
-  while ((m = bp_unpack_value (&bp, 8)) != VOIDmode)
+  while ((m = bp_unpack_value (&bp, mode_bits)) != VOIDmode)
     {
       enum mode_class mclass
 	= bp_unpack_enum (&bp, mode_class, MAX_MODE_CLASS);
       poly_uint16 size = bp_unpack_poly_value (&bp, 16);
       poly_uint16 prec = bp_unpack_poly_value (&bp, 16);
-      machine_mode inner = (machine_mode) bp_unpack_value (&bp, 8);
+      machine_mode inner = (machine_mode) bp_unpack_value (&bp, mode_bits);
       poly_uint16 nunits = bp_unpack_poly_value (&bp, 16);
       unsigned int ibit = 0, fbit = 0;
       unsigned int real_fmt_len = 0;
diff --git a/gcc/lto-streamer-out.cc b/gcc/lto-streamer-out.cc
index 5ab2eb4301e..77250ee2385 100644
--- a/gcc/lto-streamer-out.cc
+++ b/gcc/lto-streamer-out.cc
@@ -3196,6 +3196,11 @@ lto_write_mode_table (void)
 	if (inner_m != m)
 	  streamer_mode_table[(int) inner_m] = 1;
       }
+
+  /* Pack the mode_bits value within 5 bits (up to 31) of the beginning.  */
+  unsigned mode_bits = ceil_log2 (MAX_MACHINE_MODE);
+  bp_pack_value (&bp, mode_bits, 5);
+
   /* First stream modes that have GET_MODE_INNER (m) == m,
      so that we can refer to them afterwards.  */
   for (int pass = 0; pass < 2; pass++)
@@ -3205,11 +3210,11 @@ lto_write_mode_table (void)
 	  machine_mode m = (machine_mode) i;
 	  if ((GET_MODE_INNER (m) == m) ^ (pass == 0))
 	    continue;
-	  bp_pack_value (&bp, m, 8);
+	  bp_pack_value (&bp, m, mode_bits);
 	  bp_pack_enum (&bp, mode_class, MAX_MODE_CLASS, GET_MODE_CLASS (m));
 	  bp_pack_poly_value (&bp, GET_MODE_SIZE (m), 16);
 	  bp_pack_poly_value (&bp, GET_MODE_PRECISION (m), 16);
-	  bp_pack_value (&bp, GET_MODE_INNER (m), 8);
+	  bp_pack_value (&bp, GET_MODE_INNER (m), mode_bits);
 	  bp_pack_poly_value (&bp, GET_MODE_NUNITS (m), 16);
 	  switch (GET_MODE_CLASS (m))
 	    {
@@ -3229,7 +3234,7 @@ lto_write_mode_table (void)
 	    }
 	  bp_pack_string (ob, &bp, GET_MODE_NAME (m), true);
 	}
-  bp_pack_value (&bp, VOIDmode, 8);
+  bp_pack_value (&bp, VOIDmode, mode_bits);
 
   streamer_write_bitpack (&bp);
 
diff --git a/gcc/lto-streamer.h b/gcc/lto-streamer.h
index fc7133d07ba..443f0cd616e 100644
--- a/gcc/lto-streamer.h
+++ b/gcc/lto-streamer.h
@@ -604,6 +604,8 @@ struct GTY(()) lto_file_decl_data
   int order_base;
 
   int unit_base;
+
+  unsigned mode_bits;
 };
 
 typedef struct lto_file_decl_data *lto_file_decl_data_ptr;
diff --git a/gcc/tree-streamer.cc b/gcc/tree-streamer.cc
index ed65a7692e3..a28ef9c7920 100644
--- a/gcc/tree-streamer.cc
+++ b/gcc/tree-streamer.cc
@@ -35,7 +35,7 @@ along with GCC; see the file COPYING3.  If not see
    During streaming in, we translate the on the disk mode using this
    table.  For normal LTO it is set to identity, for ACCEL_COMPILER
    depending on the mode_table content.  */
-unsigned char streamer_mode_table[1 << 8];
+unsigned char streamer_mode_table[MAX_MACHINE_MODE];
 
 /* Check that all the TS_* structures handled by the streamer_write_* and
    streamer_read_* routines are exactly ALL the structures defined in
diff --git a/gcc/tree-streamer.h b/gcc/tree-streamer.h
index 170d61cf20b..10718b03640 100644
--- a/gcc/tree-streamer.h
+++ b/gcc/tree-streamer.h
@@ -75,7 +75,7 @@ void streamer_write_tree_body (struct output_block *, tree);
 void streamer_write_integer_cst (struct output_block *, tree);
 
 /* In tree-streamer.cc.  */
-extern unsigned char streamer_mode_table[1 << 8];
+extern unsigned char streamer_mode_table[MAX_MACHINE_MODE];
 void streamer_check_handled_ts_structures (void);
 bool streamer_tree_cache_insert (struct streamer_tree_cache_d *, tree,
 				 hashval_t, unsigned *);
@@ -108,15 +108,19 @@ inline void
 bp_pack_machine_mode (struct bitpack_d *bp, machine_mode mode)
 {
   streamer_mode_table[mode] = 1;
-  bp_pack_enum (bp, machine_mode, 1 << 8, mode);
+  int last = 1 << ceil_log2 (MAX_MACHINE_MODE);
+
+  bp_pack_enum (bp, machine_mode, last, mode);
 }
 
 inline machine_mode
 bp_unpack_machine_mode (struct bitpack_d *bp)
 {
-  return (machine_mode)
-	   ((class lto_input_block *)
-	    bp->stream)->mode_table[bp_unpack_enum (bp, machine_mode, 1 << 8)];
+  int last = 1 << ceil_log2 (MAX_MACHINE_MODE);
+  lto_input_block *input_block =  (class lto_input_block *) bp->stream;
+  int index = bp_unpack_enum (bp, machine_mode, last);
+
+  return (machine_mode) input_block->mode_table[index];
 }
 
 #endif  /* GCC_TREE_STREAMER_H  */

Pan

-----Original Message-----
From: Jakub Jelinek <jakub@redhat.com> 
Sent: Tuesday, June 20, 2023 11:26 PM
To: Li, Pan2 <pan2.li@intel.com>
Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; rdapp.gcc@gmail.com; jeffreyalaw@gmail.com; Wang, Yanzhang <yanzhang.wang@intel.com>; kito.cheng@gmail.com; rguenther@suse.de
Subject: Re: [PATCH] RISC-V: Fix out of range memory access of machine mode table

On Tue, Jun 20, 2023 at 02:08:07PM +0000, Li, Pan2 via Gcc-patches wrote:
> Thanks Jakub for the explanation, I have a try like below patch but I am not quite sure it is expected, and where should I put the assertion.
> 
> > If yes, it needs to
> > be unsigned short, if not, we should add an assertion (e.g. on streaming
> > in the LTO table) that MAX_MACHINE_MODE <= 256.
> 
> diff --git a/gcc/lto-streamer-in.cc b/gcc/lto-streamer-in.cc
> index 2cb83406db5..93ef97ec5d3 100644
> --- a/gcc/lto-streamer-in.cc
> +++ b/gcc/lto-streamer-in.cc
> @@ -1985,8 +1985,6 @@ lto_input_mode_table (struct lto_file_decl_data *file_data)
>      internal_error ("cannot read LTO mode table from %s",
>  		    file_data->file_name);
>  
> -  unsigned char *table = ggc_cleared_vec_alloc<unsigned char> (1 << 8);
> -  file_data->mode_table = table;
>    const struct lto_simple_header_with_strings *header
>      = (const struct lto_simple_header_with_strings *) data;
>    int string_offset;
> @@ -1994,6 +1992,9 @@ lto_input_mode_table (struct lto_file_decl_data *file_data)
>    string_offset = sizeof (*header) + header->main_size;
>  
>    lto_input_block ib (data + sizeof (*header), header->main_size, NULL);
> +  unsigned char *table = ggc_cleared_vec_alloc<unsigned char> (
> +    1 << ib.mode_bits);
> +  file_data->mode_table = table;
>    data_in = lto_data_in_create (file_data, data + string_offset,
>  				header->string_size, vNULL);
>    bitpack_d bp = streamer_read_bitpack (&ib);

Your ib.mode_bits is again the same ceil_log2 (MAX_MACHINE_MODE) value.
You need to stream that value out in lto-streamer-out.cc as perhaps the
first thing in the bitpack and stream it back here, so some
   mode_bits = bp_unpack_value (&bp, 5);
or so (perhaps 4 would be enough if we only support up to 15 bits for mode).
I.e. tell the offloading compiler what value had the host compiler when
streaming LTO out.

Then move those 3 lines from the above after that.  I'd put it next to
mode_table, so file_data->mode_bits.

The 
  unsigned char *table = ggc_cleared_vec_alloc<unsigned char> (
    1 << ib.mode_bits);
formatting is wrong, ( shouldn't if at all possible be the last character
on a line.
In this case,
> --- a/gcc/lto-streamer.h
> +++ b/gcc/lto-streamer.h
> @@ -352,6 +352,8 @@ public:
>  
>    const char *data;
>    const unsigned char *mode_table;
> +  /* Indicates how many bits of one machine mode will have.  */
> +  const unsigned int mode_bits = ceil_log2 (MAX_MACHINE_MODE) ;

As I said earlier, I'd put it elsewhere.  The formatting is wrong
(no space before semicolon) and please don't add NSDMIs in structures
which don't have them already.

>  inline machine_mode
>  bp_unpack_machine_mode (struct bitpack_d *bp)
>  {
> -  return (machine_mode)
> -	   ((class lto_input_block *)
> -	    bp->stream)->mode_table[bp_unpack_enum (bp, machine_mode, 1 << 8)];
> +  lto_input_block *input_block =  (class lto_input_block *)bp->stream;

Wrong formatting again, there shouldn't be two consecutive spaces in there.  On the
other hand, there should be a space between *) and bp.

> +  int index = bp_unpack_enum (bp, machine_mode, input_block->mode_bits);
> +
> +  return (machine_mode)input_block->mode_table[index];

And here similarly.

	Jakub
  
Jakub Jelinek June 21, 2023, 7:16 a.m. UTC | #9
On Wed, Jun 21, 2023 at 06:59:08AM +0000, Li, Pan2 wrote:
>  inline machine_mode
>  bp_unpack_machine_mode (struct bitpack_d *bp)
>  {
> -  return (machine_mode)
> -	   ((class lto_input_block *)
> -	    bp->stream)->mode_table[bp_unpack_enum (bp, machine_mode, 1 << 8)];
> +  int last = 1 << ceil_log2 (MAX_MACHINE_MODE);
> +  lto_input_block *input_block =  (class lto_input_block *) bp->stream;

Still 2 spaces instead of 1 here, otherwise it LGTM, but the important
question is if you have actually tested it with offloading, because only
that will verify it works correctly.
See https://gcc.gnu.org/wiki/Offloading for details.

	Jakub
  
Li, Pan2 via Gcc-patches June 21, 2023, 7:23 a.m. UTC | #10
Thanks Jakub, will fix the format issue and send the V3 patch, as well as try to validate it for offloading.

Pan

-----Original Message-----
From: Jakub Jelinek <jakub@redhat.com> 
Sent: Wednesday, June 21, 2023 3:16 PM
To: Li, Pan2 <pan2.li@intel.com>
Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; rdapp.gcc@gmail.com; jeffreyalaw@gmail.com; Wang, Yanzhang <yanzhang.wang@intel.com>; kito.cheng@gmail.com; rguenther@suse.de
Subject: Re: [PATCH] RISC-V: Fix out of range memory access of machine mode table

On Wed, Jun 21, 2023 at 06:59:08AM +0000, Li, Pan2 wrote:
>  inline machine_mode
>  bp_unpack_machine_mode (struct bitpack_d *bp)
>  {
> -  return (machine_mode)
> -	   ((class lto_input_block *)
> -	    bp->stream)->mode_table[bp_unpack_enum (bp, machine_mode, 1 << 8)];
> +  int last = 1 << ceil_log2 (MAX_MACHINE_MODE);
> +  lto_input_block *input_block =  (class lto_input_block *) bp->stream;

Still 2 spaces instead of 1 here, otherwise it LGTM, but the important
question is if you have actually tested it with offloading, because only
that will verify it works correctly.
See https://gcc.gnu.org/wiki/Offloading for details.

	Jakub
  
Li, Pan2 via Gcc-patches June 22, 2023, 12:19 a.m. UTC | #11
Hi there,

I try to verify the offloading following below doc.

https://gcc.gnu.org/wiki/Offloading#How_to_build_an_offloading-enabled_GCC

with some steps:

1. Build nvptx-tools.
2. Symbol link nvptx-newlib to gcc source code.
3. Build the Nividia PTX accel compiler.
4. Build the host compiler with nvptx as offload target, but I don't have the GPU, then drop the --with-cuda-driver=xxx option.
5. Run command for building, aka ./nvptx-tools/usr/local/bin/gcc -O0 -fopenmp test.c -o test.elf.

The building complete successfully, but looks I cannot run it without GPU, and I am not very sure this is good enough for validation or not.

Pan

-----Original Message-----
From: Li, Pan2 
Sent: Wednesday, June 21, 2023 3:23 PM
To: Jakub Jelinek <jakub@redhat.com>
Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; rdapp.gcc@gmail.com; jeffreyalaw@gmail.com; Wang, Yanzhang <yanzhang.wang@intel.com>; kito.cheng@gmail.com; rguenther@suse.de
Subject: RE: [PATCH] RISC-V: Fix out of range memory access of machine mode table

Thanks Jakub, will fix the format issue and send the V3 patch, as well as try to validate it for offloading.

Pan

-----Original Message-----
From: Jakub Jelinek <jakub@redhat.com> 
Sent: Wednesday, June 21, 2023 3:16 PM
To: Li, Pan2 <pan2.li@intel.com>
Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; rdapp.gcc@gmail.com; jeffreyalaw@gmail.com; Wang, Yanzhang <yanzhang.wang@intel.com>; kito.cheng@gmail.com; rguenther@suse.de
Subject: Re: [PATCH] RISC-V: Fix out of range memory access of machine mode table

On Wed, Jun 21, 2023 at 06:59:08AM +0000, Li, Pan2 wrote:
>  inline machine_mode
>  bp_unpack_machine_mode (struct bitpack_d *bp)
>  {
> -  return (machine_mode)
> -	   ((class lto_input_block *)
> -	    bp->stream)->mode_table[bp_unpack_enum (bp, machine_mode, 1 << 8)];
> +  int last = 1 << ceil_log2 (MAX_MACHINE_MODE);
> +  lto_input_block *input_block =  (class lto_input_block *) bp->stream;

Still 2 spaces instead of 1 here, otherwise it LGTM, but the important
question is if you have actually tested it with offloading, because only
that will verify it works correctly.
See https://gcc.gnu.org/wiki/Offloading for details.

	Jakub
  
Jeff Law June 28, 2023, 6:37 p.m. UTC | #12
On 6/21/23 18:19, Li, Pan2 wrote:
> Hi there,
> 
> I try to verify the offloading following below doc.
> 
> https://gcc.gnu.org/wiki/Offloading#How_to_build_an_offloading-enabled_GCC
> 
> with some steps:
> 
> 1. Build nvptx-tools.
> 2. Symbol link nvptx-newlib to gcc source code.
> 3. Build the Nividia PTX accel compiler.
> 4. Build the host compiler with nvptx as offload target, but I don't have the GPU, then drop the --with-cuda-driver=xxx option.
> 5. Run command for building, aka ./nvptx-tools/usr/local/bin/gcc -O0 -fopenmp test.c -o test.elf.
> 
> The building complete successfully, but looks I cannot run it without GPU, and I am not very sure this is good enough for validation or not.
If you don't have a suitable GPU for offloading, you could instead just 
compare the offloaded binary before/after your change.  I would expect 
them to be 100% identical.

If we take that route for verification, I think the question turns into 
how to do that for the testsuite.  ie, I think Jakub wants to verify 
that check-target-libgomp still passes when offloading is enabled.  I 
don't think there's an easy way to capture the resulting binaries for 
comparison purposes.  But that's what I'd suggest given the lack of a 
suitable GPU for testing.  So you might need to hack up the libgomp 
testsuite's .exp files to capture the binaries.

Before going to those extremes, I would suggest verifying that you do in 
fact get identical binaries before/after your change on a simple 
offloading test.



jeff
  

Patch

diff --git a/gcc/lto-streamer-in.cc b/gcc/lto-streamer-in.cc
index 2cb83406db5..102b7e18526 100644
--- a/gcc/lto-streamer-in.cc
+++ b/gcc/lto-streamer-in.cc
@@ -1985,7 +1985,8 @@  lto_input_mode_table (struct lto_file_decl_data *file_data)
     internal_error ("cannot read LTO mode table from %s",
 		    file_data->file_name);
 
-  unsigned char *table = ggc_cleared_vec_alloc<unsigned char> (1 << 8);
+  unsigned char *table = ggc_cleared_vec_alloc<unsigned char> (
+    MAX_MACHINE_MODE);
   file_data->mode_table = table;
   const struct lto_simple_header_with_strings *header
     = (const struct lto_simple_header_with_strings *) data;
diff --git a/gcc/tree-streamer.cc b/gcc/tree-streamer.cc
index ed65a7692e3..a28ef9c7920 100644
--- a/gcc/tree-streamer.cc
+++ b/gcc/tree-streamer.cc
@@ -35,7 +35,7 @@  along with GCC; see the file COPYING3.  If not see
    During streaming in, we translate the on the disk mode using this
    table.  For normal LTO it is set to identity, for ACCEL_COMPILER
    depending on the mode_table content.  */
-unsigned char streamer_mode_table[1 << 8];
+unsigned char streamer_mode_table[MAX_MACHINE_MODE];
 
 /* Check that all the TS_* structures handled by the streamer_write_* and
    streamer_read_* routines are exactly ALL the structures defined in
diff --git a/gcc/tree-streamer.h b/gcc/tree-streamer.h
index 170d61cf20b..be3a1938e76 100644
--- a/gcc/tree-streamer.h
+++ b/gcc/tree-streamer.h
@@ -75,7 +75,7 @@  void streamer_write_tree_body (struct output_block *, tree);
 void streamer_write_integer_cst (struct output_block *, tree);
 
 /* In tree-streamer.cc.  */
-extern unsigned char streamer_mode_table[1 << 8];
+extern unsigned char streamer_mode_table[MAX_MACHINE_MODE];
 void streamer_check_handled_ts_structures (void);
 bool streamer_tree_cache_insert (struct streamer_tree_cache_d *, tree,
 				 hashval_t, unsigned *);
@@ -108,7 +108,7 @@  inline void
 bp_pack_machine_mode (struct bitpack_d *bp, machine_mode mode)
 {
   streamer_mode_table[mode] = 1;
-  bp_pack_enum (bp, machine_mode, 1 << 8, mode);
+  bp_pack_enum (bp, machine_mode, MAX_MACHINE_MODE, mode);
 }
 
 inline machine_mode
@@ -116,7 +116,8 @@  bp_unpack_machine_mode (struct bitpack_d *bp)
 {
   return (machine_mode)
 	   ((class lto_input_block *)
-	    bp->stream)->mode_table[bp_unpack_enum (bp, machine_mode, 1 << 8)];
+	    bp->stream)->mode_table[bp_unpack_enum (bp, machine_mode,
+						    MAX_MACHINE_MODE)];
 }
 
 #endif  /* GCC_TREE_STREAMER_H  */